aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/llvm12/lib/Transforms/Scalar
diff options
context:
space:
mode:
authorshadchin <shadchin@yandex-team.ru>2022-02-10 16:44:30 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:44:30 +0300
commit2598ef1d0aee359b4b6d5fdd1758916d5907d04f (patch)
tree012bb94d777798f1f56ac1cec429509766d05181 /contrib/libs/llvm12/lib/Transforms/Scalar
parent6751af0b0c1b952fede40b19b71da8025b5d8bcf (diff)
downloadydb-2598ef1d0aee359b4b6d5fdd1758916d5907d04f.tar.gz
Restoring authorship annotation for <shadchin@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/llvm12/lib/Transforms/Scalar')
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/ADCE.cpp4
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp60
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/AnnotationRemarks.cpp180
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/CallSiteSplitting.cpp2
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/ConstantHoisting.cpp16
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/ConstraintElimination.cpp814
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp448
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/DCE.cpp32
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/DeadStoreElimination.cpp1434
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/DivRemPairs.cpp10
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/EarlyCSE.cpp658
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/FlattenCFGPass.cpp2
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/GVN.cpp604
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/GVNHoist.cpp1484
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/GVNSink.cpp8
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/GuardWidening.cpp10
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/IndVarSimplify.cpp394
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp150
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/InferAddressSpaces.cpp228
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/InstSimplifyPass.cpp4
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/JumpThreading.cpp434
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/LICM.cpp452
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/LoopDataPrefetch.cpp2
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/LoopDeletion.cpp178
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/LoopDistribute.cpp16
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/LoopFlatten.cpp1456
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/LoopFuse.cpp546
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/LoopIdiomRecognize.cpp916
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/LoopInterchange.cpp206
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/LoopLoadElimination.cpp82
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/LoopPassManager.cpp548
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/LoopPredication.cpp16
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/LoopRerollPass.cpp102
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/LoopRotation.cpp70
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/LoopSimplifyCFG.cpp34
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/LoopSink.cpp242
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/LoopStrengthReduce.cpp216
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp16
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/LoopUnrollPass.cpp62
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/LoopUnswitch.cpp564
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/LoopVersioningLICM.cpp158
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp10
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp4
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp454
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/MemCpyOptimizer.cpp1194
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/MergeICmps.cpp24
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/NaryReassociate.cpp84
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/NewGVN.cpp58
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/PlaceSafepoints.cpp8
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/Reassociate.cpp220
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/Reg2Mem.cpp134
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp258
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/SCCP.cpp524
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/SROA.cpp404
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/Scalar.cpp44
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp1896
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/Scalarizer.cpp10
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp126
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp118
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/SimplifyCFGPass.cpp256
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/Sink.cpp60
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp8
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/SpeculativeExecution.cpp16
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp138
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/StructurizeCFG.cpp224
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/TailRecursionElimination.cpp124
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/WarnMissedTransforms.cpp6
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Scalar/ya.make24
68 files changed, 9642 insertions, 9642 deletions
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/ADCE.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/ADCE.cpp
index ce4e5e575f..5f605b8ad4 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/ADCE.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/ADCE.cpp
@@ -325,7 +325,7 @@ void AggressiveDeadCodeElimination::initialize() {
bool AggressiveDeadCodeElimination::isAlwaysLive(Instruction &I) {
// TODO -- use llvm::isInstructionTriviallyDead
- if (I.isEHPad() || I.mayHaveSideEffects() || !I.willReturn()) {
+ if (I.isEHPad() || I.mayHaveSideEffects() || !I.willReturn()) {
// Skip any value profile instrumentation calls if they are
// instrumenting constants.
if (isInstrumentsConstant(I))
@@ -643,7 +643,7 @@ void AggressiveDeadCodeElimination::computeReversePostOrder() {
SmallPtrSet<BasicBlock*, 16> Visited;
unsigned PostOrder = 0;
for (auto &BB : F) {
- if (!succ_empty(&BB))
+ if (!succ_empty(&BB))
continue;
for (BasicBlock *Block : inverse_post_order_ext(&BB,Visited))
BlockInfo[Block].PostOrder = PostOrder++;
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index bccf94fc21..f57ee657c2 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -15,7 +15,7 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/InitializePasses.h"
#define AA_NAME "alignment-from-assumptions"
#define DEBUG_TYPE AA_NAME
@@ -204,33 +204,33 @@ static Align getNewAlignment(const SCEV *AASCEV, const SCEV *AlignSCEV,
}
bool AlignmentFromAssumptionsPass::extractAlignmentInfo(CallInst *I,
- unsigned Idx,
+ unsigned Idx,
Value *&AAPtr,
const SCEV *&AlignSCEV,
const SCEV *&OffSCEV) {
- Type *Int64Ty = Type::getInt64Ty(I->getContext());
- OperandBundleUse AlignOB = I->getOperandBundleAt(Idx);
- if (AlignOB.getTagName() != "align")
+ Type *Int64Ty = Type::getInt64Ty(I->getContext());
+ OperandBundleUse AlignOB = I->getOperandBundleAt(Idx);
+ if (AlignOB.getTagName() != "align")
return false;
- assert(AlignOB.Inputs.size() >= 2);
- AAPtr = AlignOB.Inputs[0].get();
- // TODO: Consider accumulating the offset to the base.
- AAPtr = AAPtr->stripPointerCastsSameRepresentation();
- AlignSCEV = SE->getSCEV(AlignOB.Inputs[1].get());
- AlignSCEV = SE->getTruncateOrZeroExtend(AlignSCEV, Int64Ty);
- if (AlignOB.Inputs.size() == 3)
- OffSCEV = SE->getSCEV(AlignOB.Inputs[2].get());
- else
+ assert(AlignOB.Inputs.size() >= 2);
+ AAPtr = AlignOB.Inputs[0].get();
+ // TODO: Consider accumulating the offset to the base.
+ AAPtr = AAPtr->stripPointerCastsSameRepresentation();
+ AlignSCEV = SE->getSCEV(AlignOB.Inputs[1].get());
+ AlignSCEV = SE->getTruncateOrZeroExtend(AlignSCEV, Int64Ty);
+ if (AlignOB.Inputs.size() == 3)
+ OffSCEV = SE->getSCEV(AlignOB.Inputs[2].get());
+ else
OffSCEV = SE->getZero(Int64Ty);
- OffSCEV = SE->getTruncateOrZeroExtend(OffSCEV, Int64Ty);
+ OffSCEV = SE->getTruncateOrZeroExtend(OffSCEV, Int64Ty);
return true;
}
-bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
- unsigned Idx) {
+bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
+ unsigned Idx) {
Value *AAPtr;
const SCEV *AlignSCEV, *OffSCEV;
- if (!extractAlignmentInfo(ACall, Idx, AAPtr, AlignSCEV, OffSCEV))
+ if (!extractAlignmentInfo(ACall, Idx, AAPtr, AlignSCEV, OffSCEV))
return false;
// Skip ConstantPointerNull and UndefValue. Assumptions on these shouldn't
@@ -254,8 +254,8 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
while (!WorkList.empty()) {
Instruction *J = WorkList.pop_back_val();
if (LoadInst *LI = dyn_cast<LoadInst>(J)) {
- if (!isValidAssumeForContext(ACall, J, DT))
- continue;
+ if (!isValidAssumeForContext(ACall, J, DT))
+ continue;
Align NewAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV,
LI->getPointerOperand(), SE);
if (NewAlignment > LI->getAlign()) {
@@ -263,8 +263,8 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
++NumLoadAlignChanged;
}
} else if (StoreInst *SI = dyn_cast<StoreInst>(J)) {
- if (!isValidAssumeForContext(ACall, J, DT))
- continue;
+ if (!isValidAssumeForContext(ACall, J, DT))
+ continue;
Align NewAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV,
SI->getPointerOperand(), SE);
if (NewAlignment > SI->getAlign()) {
@@ -272,8 +272,8 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
++NumStoreAlignChanged;
}
} else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(J)) {
- if (!isValidAssumeForContext(ACall, J, DT))
- continue;
+ if (!isValidAssumeForContext(ACall, J, DT))
+ continue;
Align NewDestAlignment =
getNewAlignment(AASCEV, AlignSCEV, OffSCEV, MI->getDest(), SE);
@@ -305,7 +305,7 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
Visited.insert(J);
for (User *UJ : J->users()) {
Instruction *K = cast<Instruction>(UJ);
- if (!Visited.count(K))
+ if (!Visited.count(K))
WorkList.push_back(K);
}
}
@@ -332,11 +332,11 @@ bool AlignmentFromAssumptionsPass::runImpl(Function &F, AssumptionCache &AC,
bool Changed = false;
for (auto &AssumeVH : AC.assumptions())
- if (AssumeVH) {
- CallInst *Call = cast<CallInst>(AssumeVH);
- for (unsigned Idx = 0; Idx < Call->getNumOperandBundles(); Idx++)
- Changed |= processAssumption(Call, Idx);
- }
+ if (AssumeVH) {
+ CallInst *Call = cast<CallInst>(AssumeVH);
+ for (unsigned Idx = 0; Idx < Call->getNumOperandBundles(); Idx++)
+ Changed |= processAssumption(Call, Idx);
+ }
return Changed;
}
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/AnnotationRemarks.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/AnnotationRemarks.cpp
index a02d88fe06..360c9b542e 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/AnnotationRemarks.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/AnnotationRemarks.cpp
@@ -1,90 +1,90 @@
-//===-- AnnotationRemarks.cpp - Generate remarks for annotated instrs. ----===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Generate remarks for instructions marked with !annotation.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Scalar/AnnotationRemarks.h"
-#include "llvm/ADT/MapVector.h"
-#include "llvm/Analysis/OptimizationRemarkEmitter.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/InstIterator.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Transforms/Scalar.h"
-
-using namespace llvm;
-using namespace llvm::ore;
-
-#define DEBUG_TYPE "annotation-remarks"
-#define REMARK_PASS DEBUG_TYPE
-
-static void runImpl(Function &F) {
- if (!OptimizationRemarkEmitter::allowExtraAnalysis(F, REMARK_PASS))
- return;
-
- OptimizationRemarkEmitter ORE(&F);
- // For now, just generate a summary of the annotated instructions.
- MapVector<StringRef, unsigned> Mapping;
- for (Instruction &I : instructions(F)) {
- if (!I.hasMetadata(LLVMContext::MD_annotation))
- continue;
- for (const MDOperand &Op :
- I.getMetadata(LLVMContext::MD_annotation)->operands()) {
- auto Iter = Mapping.insert({cast<MDString>(Op.get())->getString(), 0});
- Iter.first->second++;
- }
- }
-
- Instruction *IP = &*F.begin()->begin();
- for (const auto &KV : Mapping)
- ORE.emit(OptimizationRemarkAnalysis(REMARK_PASS, "AnnotationSummary", IP)
- << "Annotated " << NV("count", KV.second) << " instructions with "
- << NV("type", KV.first));
-}
-
-namespace {
-
-struct AnnotationRemarksLegacy : public FunctionPass {
- static char ID;
-
- AnnotationRemarksLegacy() : FunctionPass(ID) {
- initializeAnnotationRemarksLegacyPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override {
- runImpl(F);
- return false;
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesAll();
- }
-};
-
-} // end anonymous namespace
-
-char AnnotationRemarksLegacy::ID = 0;
-
-INITIALIZE_PASS_BEGIN(AnnotationRemarksLegacy, "annotation-remarks",
- "Annotation Remarks", false, false)
-INITIALIZE_PASS_END(AnnotationRemarksLegacy, "annotation-remarks",
- "Annotation Remarks", false, false)
-
-FunctionPass *llvm::createAnnotationRemarksLegacyPass() {
- return new AnnotationRemarksLegacy();
-}
-
-PreservedAnalyses AnnotationRemarksPass::run(Function &F,
- FunctionAnalysisManager &AM) {
- runImpl(F);
- return PreservedAnalyses::all();
-}
+//===-- AnnotationRemarks.cpp - Generate remarks for annotated instrs. ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Generate remarks for instructions marked with !annotation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar/AnnotationRemarks.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Scalar.h"
+
+using namespace llvm;
+using namespace llvm::ore;
+
+#define DEBUG_TYPE "annotation-remarks"
+#define REMARK_PASS DEBUG_TYPE
+
+static void runImpl(Function &F) {
+ if (!OptimizationRemarkEmitter::allowExtraAnalysis(F, REMARK_PASS))
+ return;
+
+ OptimizationRemarkEmitter ORE(&F);
+ // For now, just generate a summary of the annotated instructions.
+ MapVector<StringRef, unsigned> Mapping;
+ for (Instruction &I : instructions(F)) {
+ if (!I.hasMetadata(LLVMContext::MD_annotation))
+ continue;
+ for (const MDOperand &Op :
+ I.getMetadata(LLVMContext::MD_annotation)->operands()) {
+ auto Iter = Mapping.insert({cast<MDString>(Op.get())->getString(), 0});
+ Iter.first->second++;
+ }
+ }
+
+ Instruction *IP = &*F.begin()->begin();
+ for (const auto &KV : Mapping)
+ ORE.emit(OptimizationRemarkAnalysis(REMARK_PASS, "AnnotationSummary", IP)
+ << "Annotated " << NV("count", KV.second) << " instructions with "
+ << NV("type", KV.first));
+}
+
+namespace {
+
+struct AnnotationRemarksLegacy : public FunctionPass {
+ static char ID;
+
+ AnnotationRemarksLegacy() : FunctionPass(ID) {
+ initializeAnnotationRemarksLegacyPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override {
+ runImpl(F);
+ return false;
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ }
+};
+
+} // end anonymous namespace
+
+char AnnotationRemarksLegacy::ID = 0;
+
+INITIALIZE_PASS_BEGIN(AnnotationRemarksLegacy, "annotation-remarks",
+ "Annotation Remarks", false, false)
+INITIALIZE_PASS_END(AnnotationRemarksLegacy, "annotation-remarks",
+ "Annotation Remarks", false, false)
+
+FunctionPass *llvm::createAnnotationRemarksLegacyPass() {
+ return new AnnotationRemarksLegacy();
+}
+
+PreservedAnalyses AnnotationRemarksPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ runImpl(F);
+ return PreservedAnalyses::all();
+}
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/CallSiteSplitting.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/CallSiteSplitting.cpp
index 2eb94b721d..a9558f3f16 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/CallSiteSplitting.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/CallSiteSplitting.cpp
@@ -208,7 +208,7 @@ static bool canSplitCallSite(CallBase &CB, TargetTransformInfo &TTI) {
// instructions before the call is less then DuplicationThreshold. The
// instructions before the call will be duplicated in the split blocks and
// corresponding uses will be updated.
- InstructionCost Cost = 0;
+ InstructionCost Cost = 0;
for (auto &InstBeforeCall :
llvm::make_range(CallSiteBB->begin(), CB.getIterator())) {
Cost += TTI.getInstructionCost(&InstBeforeCall,
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/ConstantHoisting.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/ConstantHoisting.cpp
index fdab74fc94..29197218f2 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/ConstantHoisting.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -366,9 +366,9 @@ void ConstantHoistingPass::collectConstantCandidates(
ConstInt->getValue(), ConstInt->getType(),
TargetTransformInfo::TCK_SizeAndLatency);
else
- Cost = TTI->getIntImmCostInst(
- Inst->getOpcode(), Idx, ConstInt->getValue(), ConstInt->getType(),
- TargetTransformInfo::TCK_SizeAndLatency, Inst);
+ Cost = TTI->getIntImmCostInst(
+ Inst->getOpcode(), Idx, ConstInt->getValue(), ConstInt->getType(),
+ TargetTransformInfo::TCK_SizeAndLatency, Inst);
// Ignore cheap integer constants.
if (Cost > TargetTransformInfo::TCC_Basic) {
@@ -418,9 +418,9 @@ void ConstantHoistingPass::collectConstantCandidates(
// usually lowered to a load from constant pool. Such operation is unlikely
// to be cheaper than compute it by <Base + Offset>, which can be lowered to
// an ADD instruction or folded into Load/Store instruction.
- int Cost =
- TTI->getIntImmCostInst(Instruction::Add, 1, Offset, PtrIntTy,
- TargetTransformInfo::TCK_SizeAndLatency, Inst);
+ int Cost =
+ TTI->getIntImmCostInst(Instruction::Add, 1, Offset, PtrIntTy,
+ TargetTransformInfo::TCK_SizeAndLatency, Inst);
ConstCandVecType &ExprCandVec = ConstGEPCandMap[BaseGV];
ConstCandMapType::iterator Itr;
bool Inserted;
@@ -951,7 +951,7 @@ bool ConstantHoistingPass::runImpl(Function &Fn, TargetTransformInfo &TTI,
// base constant.
if (!ConstIntCandVec.empty())
findBaseConstants(nullptr);
- for (const auto &MapEntry : ConstGEPCandMap)
+ for (const auto &MapEntry : ConstGEPCandMap)
if (!MapEntry.second.empty())
findBaseConstants(MapEntry.first);
@@ -960,7 +960,7 @@ bool ConstantHoistingPass::runImpl(Function &Fn, TargetTransformInfo &TTI,
bool MadeChange = false;
if (!ConstIntInfoVec.empty())
MadeChange = emitBaseConstants(nullptr);
- for (const auto &MapEntry : ConstGEPInfoMap)
+ for (const auto &MapEntry : ConstGEPInfoMap)
if (!MapEntry.second.empty())
MadeChange |= emitBaseConstants(MapEntry.first);
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/ConstraintElimination.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/ConstraintElimination.cpp
index 3b8af6f21c..e46462aa1f 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/ConstraintElimination.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/ConstraintElimination.cpp
@@ -1,407 +1,407 @@
-//===-- ConstraintElimination.cpp - Eliminate conds using constraints. ----===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Eliminate conditions based on constraints collected from dominating
-// conditions.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Scalar/ConstraintElimination.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/ConstraintSystem.h"
-#include "llvm/Analysis/GlobalsModRef.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/PatternMatch.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/DebugCounter.h"
-#include "llvm/Transforms/Scalar.h"
-
-using namespace llvm;
-using namespace PatternMatch;
-
-#define DEBUG_TYPE "constraint-elimination"
-
-STATISTIC(NumCondsRemoved, "Number of instructions removed");
-DEBUG_COUNTER(EliminatedCounter, "conds-eliminated",
- "Controls which conditions are eliminated");
-
-static int64_t MaxConstraintValue = std::numeric_limits<int64_t>::max();
-
-// Decomposes \p V into a vector of pairs of the form { c, X } where c * X. The
-// sum of the pairs equals \p V. The first pair is the constant-factor and X
-// must be nullptr. If the expression cannot be decomposed, returns an empty
-// vector.
-static SmallVector<std::pair<int64_t, Value *>, 4> decompose(Value *V) {
- if (auto *CI = dyn_cast<ConstantInt>(V)) {
- if (CI->isNegative() || CI->uge(MaxConstraintValue))
- return {};
- return {{CI->getSExtValue(), nullptr}};
- }
- auto *GEP = dyn_cast<GetElementPtrInst>(V);
- if (GEP && GEP->getNumOperands() == 2) {
- if (isa<ConstantInt>(GEP->getOperand(GEP->getNumOperands() - 1))) {
- return {{cast<ConstantInt>(GEP->getOperand(GEP->getNumOperands() - 1))
- ->getSExtValue(),
- nullptr},
- {1, GEP->getPointerOperand()}};
- }
- Value *Op0;
- ConstantInt *CI;
- if (match(GEP->getOperand(GEP->getNumOperands() - 1),
- m_NUWShl(m_Value(Op0), m_ConstantInt(CI))))
- return {{0, nullptr},
- {1, GEP->getPointerOperand()},
- {std::pow(int64_t(2), CI->getSExtValue()), Op0}};
- if (match(GEP->getOperand(GEP->getNumOperands() - 1),
- m_ZExt(m_NUWShl(m_Value(Op0), m_ConstantInt(CI)))))
- return {{0, nullptr},
- {1, GEP->getPointerOperand()},
- {std::pow(int64_t(2), CI->getSExtValue()), Op0}};
-
- return {{0, nullptr},
- {1, GEP->getPointerOperand()},
- {1, GEP->getOperand(GEP->getNumOperands() - 1)}};
- }
-
- Value *Op0;
- Value *Op1;
- ConstantInt *CI;
- if (match(V, m_NUWAdd(m_Value(Op0), m_ConstantInt(CI))))
- return {{CI->getSExtValue(), nullptr}, {1, Op0}};
- if (match(V, m_NUWAdd(m_Value(Op0), m_Value(Op1))))
- return {{0, nullptr}, {1, Op0}, {1, Op1}};
-
- if (match(V, m_NUWSub(m_Value(Op0), m_ConstantInt(CI))))
- return {{-1 * CI->getSExtValue(), nullptr}, {1, Op0}};
- if (match(V, m_NUWSub(m_Value(Op0), m_Value(Op1))))
- return {{0, nullptr}, {1, Op0}, {1, Op1}};
-
- return {{0, nullptr}, {1, V}};
-}
-
-/// Turn a condition \p CmpI into a constraint vector, using indices from \p
-/// Value2Index. If \p ShouldAdd is true, new indices are added for values not
-/// yet in \p Value2Index.
-static SmallVector<int64_t, 8>
-getConstraint(CmpInst::Predicate Pred, Value *Op0, Value *Op1,
- DenseMap<Value *, unsigned> &Value2Index, bool ShouldAdd) {
- int64_t Offset1 = 0;
- int64_t Offset2 = 0;
-
- auto TryToGetIndex = [ShouldAdd,
- &Value2Index](Value *V) -> Optional<unsigned> {
- if (ShouldAdd) {
- Value2Index.insert({V, Value2Index.size() + 1});
- return Value2Index[V];
- }
- auto I = Value2Index.find(V);
- if (I == Value2Index.end())
- return None;
- return I->second;
- };
-
- if (Pred == CmpInst::ICMP_UGT || Pred == CmpInst::ICMP_UGE)
- return getConstraint(CmpInst::getSwappedPredicate(Pred), Op1, Op0,
- Value2Index, ShouldAdd);
-
- // Only ULE and ULT predicates are supported at the moment.
- if (Pred != CmpInst::ICMP_ULE && Pred != CmpInst::ICMP_ULT)
- return {};
-
- auto ADec = decompose(Op0);
- auto BDec = decompose(Op1);
- // Skip if decomposing either of the values failed.
- if (ADec.empty() || BDec.empty())
- return {};
-
- // Skip trivial constraints without any variables.
- if (ADec.size() == 1 && BDec.size() == 1)
- return {};
-
- Offset1 = ADec[0].first;
- Offset2 = BDec[0].first;
- Offset1 *= -1;
-
- // Create iterator ranges that skip the constant-factor.
- auto VariablesA = make_range(std::next(ADec.begin()), ADec.end());
- auto VariablesB = make_range(std::next(BDec.begin()), BDec.end());
-
- // Check if each referenced value in the constraint is already in the system
- // or can be added (if ShouldAdd is true).
- for (const auto &KV :
- concat<std::pair<int64_t, Value *>>(VariablesA, VariablesB))
- if (!TryToGetIndex(KV.second))
- return {};
-
- // Build result constraint, by first adding all coefficients from A and then
- // subtracting all coefficients from B.
- SmallVector<int64_t, 8> R(Value2Index.size() + 1, 0);
- for (const auto &KV : VariablesA)
- R[Value2Index[KV.second]] += KV.first;
-
- for (const auto &KV : VariablesB)
- R[Value2Index[KV.second]] -= KV.first;
-
- R[0] = Offset1 + Offset2 + (Pred == CmpInst::ICMP_ULT ? -1 : 0);
- return R;
-}
-
-static SmallVector<int64_t, 8>
-getConstraint(CmpInst *Cmp, DenseMap<Value *, unsigned> &Value2Index,
- bool ShouldAdd) {
- return getConstraint(Cmp->getPredicate(), Cmp->getOperand(0),
- Cmp->getOperand(1), Value2Index, ShouldAdd);
-}
-
-namespace {
-/// Represents either a condition that holds on entry to a block or a basic
-/// block, with their respective Dominator DFS in and out numbers.
-struct ConstraintOrBlock {
- unsigned NumIn;
- unsigned NumOut;
- bool IsBlock;
- bool Not;
- union {
- BasicBlock *BB;
- CmpInst *Condition;
- };
-
- ConstraintOrBlock(DomTreeNode *DTN)
- : NumIn(DTN->getDFSNumIn()), NumOut(DTN->getDFSNumOut()), IsBlock(true),
- BB(DTN->getBlock()) {}
- ConstraintOrBlock(DomTreeNode *DTN, CmpInst *Condition, bool Not)
- : NumIn(DTN->getDFSNumIn()), NumOut(DTN->getDFSNumOut()), IsBlock(false),
- Not(Not), Condition(Condition) {}
-};
-
-struct StackEntry {
- unsigned NumIn;
- unsigned NumOut;
- CmpInst *Condition;
- bool IsNot;
-
- StackEntry(unsigned NumIn, unsigned NumOut, CmpInst *Condition, bool IsNot)
- : NumIn(NumIn), NumOut(NumOut), Condition(Condition), IsNot(IsNot) {}
-};
-} // namespace
-
-static bool eliminateConstraints(Function &F, DominatorTree &DT) {
- bool Changed = false;
- DT.updateDFSNumbers();
- ConstraintSystem CS;
-
- SmallVector<ConstraintOrBlock, 64> WorkList;
-
- // First, collect conditions implied by branches and blocks with their
- // Dominator DFS in and out numbers.
- for (BasicBlock &BB : F) {
- if (!DT.getNode(&BB))
- continue;
- WorkList.emplace_back(DT.getNode(&BB));
-
- auto *Br = dyn_cast<BranchInst>(BB.getTerminator());
- if (!Br || !Br->isConditional())
- continue;
-
- // If the condition is an OR of 2 compares and the false successor only has
- // the current block as predecessor, queue both negated conditions for the
- // false successor.
- Value *Op0, *Op1;
- if (match(Br->getCondition(), m_LogicalOr(m_Value(Op0), m_Value(Op1))) &&
- match(Op0, m_Cmp()) && match(Op1, m_Cmp())) {
- BasicBlock *FalseSuccessor = Br->getSuccessor(1);
- if (FalseSuccessor->getSinglePredecessor()) {
- WorkList.emplace_back(DT.getNode(FalseSuccessor), cast<CmpInst>(Op0),
- true);
- WorkList.emplace_back(DT.getNode(FalseSuccessor), cast<CmpInst>(Op1),
- true);
- }
- continue;
- }
-
- // If the condition is an AND of 2 compares and the true successor only has
- // the current block as predecessor, queue both conditions for the true
- // successor.
- if (match(Br->getCondition(), m_LogicalAnd(m_Value(Op0), m_Value(Op1))) &&
- match(Op0, m_Cmp()) && match(Op1, m_Cmp())) {
- BasicBlock *TrueSuccessor = Br->getSuccessor(0);
- if (TrueSuccessor->getSinglePredecessor()) {
- WorkList.emplace_back(DT.getNode(TrueSuccessor), cast<CmpInst>(Op0),
- false);
- WorkList.emplace_back(DT.getNode(TrueSuccessor), cast<CmpInst>(Op1),
- false);
- }
- continue;
- }
-
- auto *CmpI = dyn_cast<CmpInst>(Br->getCondition());
- if (!CmpI)
- continue;
- if (Br->getSuccessor(0)->getSinglePredecessor())
- WorkList.emplace_back(DT.getNode(Br->getSuccessor(0)), CmpI, false);
- if (Br->getSuccessor(1)->getSinglePredecessor())
- WorkList.emplace_back(DT.getNode(Br->getSuccessor(1)), CmpI, true);
- }
-
- // Next, sort worklist by dominance, so that dominating blocks and conditions
- // come before blocks and conditions dominated by them. If a block and a
- // condition have the same numbers, the condition comes before the block, as
- // it holds on entry to the block.
- sort(WorkList, [](const ConstraintOrBlock &A, const ConstraintOrBlock &B) {
- return std::tie(A.NumIn, A.IsBlock) < std::tie(B.NumIn, B.IsBlock);
- });
-
- // Finally, process ordered worklist and eliminate implied conditions.
- SmallVector<StackEntry, 16> DFSInStack;
- DenseMap<Value *, unsigned> Value2Index;
- for (ConstraintOrBlock &CB : WorkList) {
- // First, pop entries from the stack that are out-of-scope for CB. Remove
- // the corresponding entry from the constraint system.
- while (!DFSInStack.empty()) {
- auto &E = DFSInStack.back();
- LLVM_DEBUG(dbgs() << "Top of stack : " << E.NumIn << " " << E.NumOut
- << "\n");
- LLVM_DEBUG(dbgs() << "CB: " << CB.NumIn << " " << CB.NumOut << "\n");
- assert(E.NumIn <= CB.NumIn);
- if (CB.NumOut <= E.NumOut)
- break;
- LLVM_DEBUG(dbgs() << "Removing " << *E.Condition << " " << E.IsNot
- << "\n");
- DFSInStack.pop_back();
- CS.popLastConstraint();
- }
-
- LLVM_DEBUG({
- dbgs() << "Processing ";
- if (CB.IsBlock)
- dbgs() << *CB.BB;
- else
- dbgs() << *CB.Condition;
- dbgs() << "\n";
- });
-
- // For a block, check if any CmpInsts become known based on the current set
- // of constraints.
- if (CB.IsBlock) {
- for (Instruction &I : *CB.BB) {
- auto *Cmp = dyn_cast<CmpInst>(&I);
- if (!Cmp)
- continue;
- auto R = getConstraint(Cmp, Value2Index, false);
- if (R.empty() || R.size() == 1)
- continue;
- if (CS.isConditionImplied(R)) {
- if (!DebugCounter::shouldExecute(EliminatedCounter))
- continue;
-
- LLVM_DEBUG(dbgs() << "Condition " << *Cmp
- << " implied by dominating constraints\n");
- LLVM_DEBUG({
- for (auto &E : reverse(DFSInStack))
- dbgs() << " C " << *E.Condition << " " << E.IsNot << "\n";
- });
- Cmp->replaceAllUsesWith(
- ConstantInt::getTrue(F.getParent()->getContext()));
- NumCondsRemoved++;
- Changed = true;
- }
- if (CS.isConditionImplied(ConstraintSystem::negate(R))) {
- if (!DebugCounter::shouldExecute(EliminatedCounter))
- continue;
-
- LLVM_DEBUG(dbgs() << "Condition !" << *Cmp
- << " implied by dominating constraints\n");
- LLVM_DEBUG({
- for (auto &E : reverse(DFSInStack))
- dbgs() << " C " << *E.Condition << " " << E.IsNot << "\n";
- });
- Cmp->replaceAllUsesWith(
- ConstantInt::getFalse(F.getParent()->getContext()));
- NumCondsRemoved++;
- Changed = true;
- }
- }
- continue;
- }
-
- // Otherwise, add the condition to the system and stack, if we can transform
- // it into a constraint.
- auto R = getConstraint(CB.Condition, Value2Index, true);
- if (R.empty())
- continue;
-
- LLVM_DEBUG(dbgs() << "Adding " << *CB.Condition << " " << CB.Not << "\n");
- if (CB.Not)
- R = ConstraintSystem::negate(R);
-
- // If R has been added to the system, queue it for removal once it goes
- // out-of-scope.
- if (CS.addVariableRowFill(R))
- DFSInStack.emplace_back(CB.NumIn, CB.NumOut, CB.Condition, CB.Not);
- }
-
- return Changed;
-}
-
-PreservedAnalyses ConstraintEliminationPass::run(Function &F,
- FunctionAnalysisManager &AM) {
- auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
- if (!eliminateConstraints(F, DT))
- return PreservedAnalyses::all();
-
- PreservedAnalyses PA;
- PA.preserve<DominatorTreeAnalysis>();
- PA.preserve<GlobalsAA>();
- PA.preserveSet<CFGAnalyses>();
- return PA;
-}
-
-namespace {
-
-class ConstraintElimination : public FunctionPass {
-public:
- static char ID;
-
- ConstraintElimination() : FunctionPass(ID) {
- initializeConstraintEliminationPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override {
- auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- return eliminateConstraints(F, DT);
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
- AU.addPreserved<DominatorTreeWrapperPass>();
- }
-};
-
-} // end anonymous namespace
-
-char ConstraintElimination::ID = 0;
-
-INITIALIZE_PASS_BEGIN(ConstraintElimination, "constraint-elimination",
- "Constraint Elimination", false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LazyValueInfoWrapperPass)
-INITIALIZE_PASS_END(ConstraintElimination, "constraint-elimination",
- "Constraint Elimination", false, false)
-
-FunctionPass *llvm::createConstraintEliminationPass() {
- return new ConstraintElimination();
-}
+//===-- ConstraintElimination.cpp - Eliminate conds using constraints. ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Eliminate conditions based on constraints collected from dominating
+// conditions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar/ConstraintElimination.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ConstraintSystem.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/DebugCounter.h"
+#include "llvm/Transforms/Scalar.h"
+
+using namespace llvm;
+using namespace PatternMatch;
+
+#define DEBUG_TYPE "constraint-elimination"
+
+STATISTIC(NumCondsRemoved, "Number of instructions removed");
+DEBUG_COUNTER(EliminatedCounter, "conds-eliminated",
+ "Controls which conditions are eliminated");
+
+static int64_t MaxConstraintValue = std::numeric_limits<int64_t>::max();
+
+// Decomposes \p V into a vector of pairs of the form { c, X } where c * X. The
+// sum of the pairs equals \p V. The first pair is the constant-factor and X
+// must be nullptr. If the expression cannot be decomposed, returns an empty
+// vector.
+static SmallVector<std::pair<int64_t, Value *>, 4> decompose(Value *V) {
+ if (auto *CI = dyn_cast<ConstantInt>(V)) {
+ if (CI->isNegative() || CI->uge(MaxConstraintValue))
+ return {};
+ return {{CI->getSExtValue(), nullptr}};
+ }
+ auto *GEP = dyn_cast<GetElementPtrInst>(V);
+ if (GEP && GEP->getNumOperands() == 2) {
+ if (isa<ConstantInt>(GEP->getOperand(GEP->getNumOperands() - 1))) {
+ return {{cast<ConstantInt>(GEP->getOperand(GEP->getNumOperands() - 1))
+ ->getSExtValue(),
+ nullptr},
+ {1, GEP->getPointerOperand()}};
+ }
+ Value *Op0;
+ ConstantInt *CI;
+ if (match(GEP->getOperand(GEP->getNumOperands() - 1),
+ m_NUWShl(m_Value(Op0), m_ConstantInt(CI))))
+ return {{0, nullptr},
+ {1, GEP->getPointerOperand()},
+ {std::pow(int64_t(2), CI->getSExtValue()), Op0}};
+ if (match(GEP->getOperand(GEP->getNumOperands() - 1),
+ m_ZExt(m_NUWShl(m_Value(Op0), m_ConstantInt(CI)))))
+ return {{0, nullptr},
+ {1, GEP->getPointerOperand()},
+ {std::pow(int64_t(2), CI->getSExtValue()), Op0}};
+
+ return {{0, nullptr},
+ {1, GEP->getPointerOperand()},
+ {1, GEP->getOperand(GEP->getNumOperands() - 1)}};
+ }
+
+ Value *Op0;
+ Value *Op1;
+ ConstantInt *CI;
+ if (match(V, m_NUWAdd(m_Value(Op0), m_ConstantInt(CI))))
+ return {{CI->getSExtValue(), nullptr}, {1, Op0}};
+ if (match(V, m_NUWAdd(m_Value(Op0), m_Value(Op1))))
+ return {{0, nullptr}, {1, Op0}, {1, Op1}};
+
+ if (match(V, m_NUWSub(m_Value(Op0), m_ConstantInt(CI))))
+ return {{-1 * CI->getSExtValue(), nullptr}, {1, Op0}};
+ if (match(V, m_NUWSub(m_Value(Op0), m_Value(Op1))))
+ return {{0, nullptr}, {1, Op0}, {1, Op1}};
+
+ return {{0, nullptr}, {1, V}};
+}
+
+/// Turn a condition \p CmpI into a constraint vector, using indices from \p
+/// Value2Index. If \p ShouldAdd is true, new indices are added for values not
+/// yet in \p Value2Index.
+static SmallVector<int64_t, 8>
+getConstraint(CmpInst::Predicate Pred, Value *Op0, Value *Op1,
+ DenseMap<Value *, unsigned> &Value2Index, bool ShouldAdd) {
+ int64_t Offset1 = 0;
+ int64_t Offset2 = 0;
+
+ auto TryToGetIndex = [ShouldAdd,
+ &Value2Index](Value *V) -> Optional<unsigned> {
+ if (ShouldAdd) {
+ Value2Index.insert({V, Value2Index.size() + 1});
+ return Value2Index[V];
+ }
+ auto I = Value2Index.find(V);
+ if (I == Value2Index.end())
+ return None;
+ return I->second;
+ };
+
+ if (Pred == CmpInst::ICMP_UGT || Pred == CmpInst::ICMP_UGE)
+ return getConstraint(CmpInst::getSwappedPredicate(Pred), Op1, Op0,
+ Value2Index, ShouldAdd);
+
+ // Only ULE and ULT predicates are supported at the moment.
+ if (Pred != CmpInst::ICMP_ULE && Pred != CmpInst::ICMP_ULT)
+ return {};
+
+ auto ADec = decompose(Op0);
+ auto BDec = decompose(Op1);
+ // Skip if decomposing either of the values failed.
+ if (ADec.empty() || BDec.empty())
+ return {};
+
+ // Skip trivial constraints without any variables.
+ if (ADec.size() == 1 && BDec.size() == 1)
+ return {};
+
+ Offset1 = ADec[0].first;
+ Offset2 = BDec[0].first;
+ Offset1 *= -1;
+
+ // Create iterator ranges that skip the constant-factor.
+ auto VariablesA = make_range(std::next(ADec.begin()), ADec.end());
+ auto VariablesB = make_range(std::next(BDec.begin()), BDec.end());
+
+ // Check if each referenced value in the constraint is already in the system
+ // or can be added (if ShouldAdd is true).
+ for (const auto &KV :
+ concat<std::pair<int64_t, Value *>>(VariablesA, VariablesB))
+ if (!TryToGetIndex(KV.second))
+ return {};
+
+ // Build result constraint, by first adding all coefficients from A and then
+ // subtracting all coefficients from B.
+ SmallVector<int64_t, 8> R(Value2Index.size() + 1, 0);
+ for (const auto &KV : VariablesA)
+ R[Value2Index[KV.second]] += KV.first;
+
+ for (const auto &KV : VariablesB)
+ R[Value2Index[KV.second]] -= KV.first;
+
+ R[0] = Offset1 + Offset2 + (Pred == CmpInst::ICMP_ULT ? -1 : 0);
+ return R;
+}
+
+static SmallVector<int64_t, 8>
+getConstraint(CmpInst *Cmp, DenseMap<Value *, unsigned> &Value2Index,
+ bool ShouldAdd) {
+ return getConstraint(Cmp->getPredicate(), Cmp->getOperand(0),
+ Cmp->getOperand(1), Value2Index, ShouldAdd);
+}
+
+namespace {
+/// Represents either a condition that holds on entry to a block or a basic
+/// block, with their respective Dominator DFS in and out numbers.
+struct ConstraintOrBlock {
+ unsigned NumIn;
+ unsigned NumOut;
+ bool IsBlock;
+ bool Not;
+ union {
+ BasicBlock *BB;
+ CmpInst *Condition;
+ };
+
+ ConstraintOrBlock(DomTreeNode *DTN)
+ : NumIn(DTN->getDFSNumIn()), NumOut(DTN->getDFSNumOut()), IsBlock(true),
+ BB(DTN->getBlock()) {}
+ ConstraintOrBlock(DomTreeNode *DTN, CmpInst *Condition, bool Not)
+ : NumIn(DTN->getDFSNumIn()), NumOut(DTN->getDFSNumOut()), IsBlock(false),
+ Not(Not), Condition(Condition) {}
+};
+
+struct StackEntry {
+ unsigned NumIn;
+ unsigned NumOut;
+ CmpInst *Condition;
+ bool IsNot;
+
+ StackEntry(unsigned NumIn, unsigned NumOut, CmpInst *Condition, bool IsNot)
+ : NumIn(NumIn), NumOut(NumOut), Condition(Condition), IsNot(IsNot) {}
+};
+} // namespace
+
+static bool eliminateConstraints(Function &F, DominatorTree &DT) {
+ bool Changed = false;
+ DT.updateDFSNumbers();
+ ConstraintSystem CS;
+
+ SmallVector<ConstraintOrBlock, 64> WorkList;
+
+ // First, collect conditions implied by branches and blocks with their
+ // Dominator DFS in and out numbers.
+ for (BasicBlock &BB : F) {
+ if (!DT.getNode(&BB))
+ continue;
+ WorkList.emplace_back(DT.getNode(&BB));
+
+ auto *Br = dyn_cast<BranchInst>(BB.getTerminator());
+ if (!Br || !Br->isConditional())
+ continue;
+
+ // If the condition is an OR of 2 compares and the false successor only has
+ // the current block as predecessor, queue both negated conditions for the
+ // false successor.
+ Value *Op0, *Op1;
+ if (match(Br->getCondition(), m_LogicalOr(m_Value(Op0), m_Value(Op1))) &&
+ match(Op0, m_Cmp()) && match(Op1, m_Cmp())) {
+ BasicBlock *FalseSuccessor = Br->getSuccessor(1);
+ if (FalseSuccessor->getSinglePredecessor()) {
+ WorkList.emplace_back(DT.getNode(FalseSuccessor), cast<CmpInst>(Op0),
+ true);
+ WorkList.emplace_back(DT.getNode(FalseSuccessor), cast<CmpInst>(Op1),
+ true);
+ }
+ continue;
+ }
+
+ // If the condition is an AND of 2 compares and the true successor only has
+ // the current block as predecessor, queue both conditions for the true
+ // successor.
+ if (match(Br->getCondition(), m_LogicalAnd(m_Value(Op0), m_Value(Op1))) &&
+ match(Op0, m_Cmp()) && match(Op1, m_Cmp())) {
+ BasicBlock *TrueSuccessor = Br->getSuccessor(0);
+ if (TrueSuccessor->getSinglePredecessor()) {
+ WorkList.emplace_back(DT.getNode(TrueSuccessor), cast<CmpInst>(Op0),
+ false);
+ WorkList.emplace_back(DT.getNode(TrueSuccessor), cast<CmpInst>(Op1),
+ false);
+ }
+ continue;
+ }
+
+ auto *CmpI = dyn_cast<CmpInst>(Br->getCondition());
+ if (!CmpI)
+ continue;
+ if (Br->getSuccessor(0)->getSinglePredecessor())
+ WorkList.emplace_back(DT.getNode(Br->getSuccessor(0)), CmpI, false);
+ if (Br->getSuccessor(1)->getSinglePredecessor())
+ WorkList.emplace_back(DT.getNode(Br->getSuccessor(1)), CmpI, true);
+ }
+
+ // Next, sort worklist by dominance, so that dominating blocks and conditions
+ // come before blocks and conditions dominated by them. If a block and a
+ // condition have the same numbers, the condition comes before the block, as
+ // it holds on entry to the block.
+ sort(WorkList, [](const ConstraintOrBlock &A, const ConstraintOrBlock &B) {
+ return std::tie(A.NumIn, A.IsBlock) < std::tie(B.NumIn, B.IsBlock);
+ });
+
+ // Finally, process ordered worklist and eliminate implied conditions.
+ SmallVector<StackEntry, 16> DFSInStack;
+ DenseMap<Value *, unsigned> Value2Index;
+ for (ConstraintOrBlock &CB : WorkList) {
+ // First, pop entries from the stack that are out-of-scope for CB. Remove
+ // the corresponding entry from the constraint system.
+ while (!DFSInStack.empty()) {
+ auto &E = DFSInStack.back();
+ LLVM_DEBUG(dbgs() << "Top of stack : " << E.NumIn << " " << E.NumOut
+ << "\n");
+ LLVM_DEBUG(dbgs() << "CB: " << CB.NumIn << " " << CB.NumOut << "\n");
+ assert(E.NumIn <= CB.NumIn);
+ if (CB.NumOut <= E.NumOut)
+ break;
+ LLVM_DEBUG(dbgs() << "Removing " << *E.Condition << " " << E.IsNot
+ << "\n");
+ DFSInStack.pop_back();
+ CS.popLastConstraint();
+ }
+
+ LLVM_DEBUG({
+ dbgs() << "Processing ";
+ if (CB.IsBlock)
+ dbgs() << *CB.BB;
+ else
+ dbgs() << *CB.Condition;
+ dbgs() << "\n";
+ });
+
+ // For a block, check if any CmpInsts become known based on the current set
+ // of constraints.
+ if (CB.IsBlock) {
+ for (Instruction &I : *CB.BB) {
+ auto *Cmp = dyn_cast<CmpInst>(&I);
+ if (!Cmp)
+ continue;
+ auto R = getConstraint(Cmp, Value2Index, false);
+ if (R.empty() || R.size() == 1)
+ continue;
+ if (CS.isConditionImplied(R)) {
+ if (!DebugCounter::shouldExecute(EliminatedCounter))
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Condition " << *Cmp
+ << " implied by dominating constraints\n");
+ LLVM_DEBUG({
+ for (auto &E : reverse(DFSInStack))
+ dbgs() << " C " << *E.Condition << " " << E.IsNot << "\n";
+ });
+ Cmp->replaceAllUsesWith(
+ ConstantInt::getTrue(F.getParent()->getContext()));
+ NumCondsRemoved++;
+ Changed = true;
+ }
+ if (CS.isConditionImplied(ConstraintSystem::negate(R))) {
+ if (!DebugCounter::shouldExecute(EliminatedCounter))
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Condition !" << *Cmp
+ << " implied by dominating constraints\n");
+ LLVM_DEBUG({
+ for (auto &E : reverse(DFSInStack))
+ dbgs() << " C " << *E.Condition << " " << E.IsNot << "\n";
+ });
+ Cmp->replaceAllUsesWith(
+ ConstantInt::getFalse(F.getParent()->getContext()));
+ NumCondsRemoved++;
+ Changed = true;
+ }
+ }
+ continue;
+ }
+
+ // Otherwise, add the condition to the system and stack, if we can transform
+ // it into a constraint.
+ auto R = getConstraint(CB.Condition, Value2Index, true);
+ if (R.empty())
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Adding " << *CB.Condition << " " << CB.Not << "\n");
+ if (CB.Not)
+ R = ConstraintSystem::negate(R);
+
+ // If R has been added to the system, queue it for removal once it goes
+ // out-of-scope.
+ if (CS.addVariableRowFill(R))
+ DFSInStack.emplace_back(CB.NumIn, CB.NumOut, CB.Condition, CB.Not);
+ }
+
+ return Changed;
+}
+
+PreservedAnalyses ConstraintEliminationPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+ if (!eliminateConstraints(F, DT))
+ return PreservedAnalyses::all();
+
+ PreservedAnalyses PA;
+ PA.preserve<DominatorTreeAnalysis>();
+ PA.preserve<GlobalsAA>();
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+}
+
+namespace {
+
+class ConstraintElimination : public FunctionPass {
+public:
+ static char ID;
+
+ ConstraintElimination() : FunctionPass(ID) {
+ initializeConstraintEliminationPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override {
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ return eliminateConstraints(F, DT);
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ }
+};
+
+} // end anonymous namespace
+
+char ConstraintElimination::ID = 0;
+
+INITIALIZE_PASS_BEGIN(ConstraintElimination, "constraint-elimination",
+ "Constraint Elimination", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LazyValueInfoWrapperPass)
+INITIALIZE_PASS_END(ConstraintElimination, "constraint-elimination",
+ "Constraint Elimination", false, false)
+
+FunctionPass *llvm::createConstraintEliminationPass() {
+ return new ConstraintElimination();
+}
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index b671d68031..c6a0c3ee7d 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -58,11 +58,11 @@ STATISTIC(NumMemAccess, "Number of memory access targets propagated");
STATISTIC(NumCmps, "Number of comparisons propagated");
STATISTIC(NumReturns, "Number of return values propagated");
STATISTIC(NumDeadCases, "Number of switch cases removed");
-STATISTIC(NumSDivSRemsNarrowed,
- "Number of sdivs/srems whose width was decreased");
+STATISTIC(NumSDivSRemsNarrowed,
+ "Number of sdivs/srems whose width was decreased");
STATISTIC(NumSDivs, "Number of sdiv converted to udiv");
-STATISTIC(NumUDivURemsNarrowed,
- "Number of udivs/urems whose width was decreased");
+STATISTIC(NumUDivURemsNarrowed,
+ "Number of udivs/urems whose width was decreased");
STATISTIC(NumAShrs, "Number of ashr converted to lshr");
STATISTIC(NumSRems, "Number of srem converted to urem");
STATISTIC(NumSExt, "Number of sext converted to zext");
@@ -129,7 +129,7 @@ static bool processSelect(SelectInst *S, LazyValueInfo *LVI) {
if (S->getType()->isVectorTy()) return false;
if (isa<Constant>(S->getCondition())) return false;
- Constant *C = LVI->getConstant(S->getCondition(), S);
+ Constant *C = LVI->getConstant(S->getCondition(), S);
if (!C) return false;
ConstantInt *CI = dyn_cast<ConstantInt>(C);
@@ -286,7 +286,7 @@ static bool processMemAccess(Instruction *I, LazyValueInfo *LVI) {
if (isa<Constant>(Pointer)) return false;
- Constant *C = LVI->getConstant(Pointer, I);
+ Constant *C = LVI->getConstant(Pointer, I);
if (!C) return false;
++NumMemAccess;
@@ -305,8 +305,8 @@ static bool processCmp(CmpInst *Cmp, LazyValueInfo *LVI) {
return false;
LazyValueInfo::Tristate Result =
- LVI->getPredicateAt(Cmp->getPredicate(), Op0, C, Cmp,
- /*UseBlockValue=*/true);
+ LVI->getPredicateAt(Cmp->getPredicate(), Op0, C, Cmp,
+ /*UseBlockValue=*/true);
if (Result == LazyValueInfo::Unknown)
return false;
@@ -342,9 +342,9 @@ static bool processSwitch(SwitchInst *I, LazyValueInfo *LVI,
for (auto CI = SI->case_begin(), CE = SI->case_end(); CI != CE;) {
ConstantInt *Case = CI->getCaseValue();
- LazyValueInfo::Tristate State =
- LVI->getPredicateAt(CmpInst::ICMP_EQ, Cond, Case, I,
- /* UseBlockValue */ true);
+ LazyValueInfo::Tristate State =
+ LVI->getPredicateAt(CmpInst::ICMP_EQ, Cond, Case, I,
+ /* UseBlockValue */ true);
if (State == LazyValueInfo::False) {
// This case never fires - remove it.
@@ -388,8 +388,8 @@ static bool processSwitch(SwitchInst *I, LazyValueInfo *LVI,
// See if we can prove that the given binary op intrinsic will not overflow.
static bool willNotOverflow(BinaryOpIntrinsic *BO, LazyValueInfo *LVI) {
- ConstantRange LRange = LVI->getConstantRange(BO->getLHS(), BO);
- ConstantRange RRange = LVI->getConstantRange(BO->getRHS(), BO);
+ ConstantRange LRange = LVI->getConstantRange(BO->getLHS(), BO);
+ ConstantRange RRange = LVI->getConstantRange(BO->getRHS(), BO);
ConstantRange NWRegion = ConstantRange::makeGuaranteedNoWrapRegion(
BO->getBinaryOp(), RRange, BO->getNoWrapKind());
return NWRegion.contains(LRange);
@@ -504,8 +504,8 @@ static bool processCallSite(CallBase &CB, LazyValueInfo *LVI) {
}
}
- bool Changed = false;
-
+ bool Changed = false;
+
// Deopt bundle operands are intended to capture state with minimal
// perturbance of the code otherwise. If we can find a constant value for
// any such operand and remove a use of the original value, that's
@@ -520,16 +520,16 @@ static bool processCallSite(CallBase &CB, LazyValueInfo *LVI) {
if (V->getType()->isVectorTy()) continue;
if (isa<Constant>(V)) continue;
- Constant *C = LVI->getConstant(V, &CB);
+ Constant *C = LVI->getConstant(V, &CB);
if (!C) continue;
U.set(C);
- Changed = true;
+ Changed = true;
}
}
- SmallVector<unsigned, 4> ArgNos;
- unsigned ArgNo = 0;
-
+ SmallVector<unsigned, 4> ArgNos;
+ unsigned ArgNo = 0;
+
for (Value *V : CB.args()) {
PointerType *Type = dyn_cast<PointerType>(V->getType());
// Try to mark pointer typed parameters as non-null. We skip the
@@ -547,7 +547,7 @@ static bool processCallSite(CallBase &CB, LazyValueInfo *LVI) {
assert(ArgNo == CB.arg_size() && "sanity check");
if (ArgNos.empty())
- return Changed;
+ return Changed;
AttributeList AS = CB.getAttributes();
LLVMContext &Ctx = CB.getContext();
@@ -558,79 +558,79 @@ static bool processCallSite(CallBase &CB, LazyValueInfo *LVI) {
return true;
}
-static bool isNonNegative(Value *V, LazyValueInfo *LVI, Instruction *CxtI) {
- Constant *Zero = ConstantInt::get(V->getType(), 0);
- auto Result = LVI->getPredicateAt(ICmpInst::ICMP_SGE, V, Zero, CxtI);
- return Result == LazyValueInfo::True;
-}
-
-static bool isNonPositive(Value *V, LazyValueInfo *LVI, Instruction *CxtI) {
- Constant *Zero = ConstantInt::get(V->getType(), 0);
- auto Result = LVI->getPredicateAt(ICmpInst::ICMP_SLE, V, Zero, CxtI);
- return Result == LazyValueInfo::True;
-}
-
-enum class Domain { NonNegative, NonPositive, Unknown };
-
-Domain getDomain(Value *V, LazyValueInfo *LVI, Instruction *CxtI) {
- if (isNonNegative(V, LVI, CxtI))
- return Domain::NonNegative;
- if (isNonPositive(V, LVI, CxtI))
- return Domain::NonPositive;
- return Domain::Unknown;
-}
-
-/// Try to shrink a sdiv/srem's width down to the smallest power of two that's
-/// sufficient to contain its operands.
-static bool narrowSDivOrSRem(BinaryOperator *Instr, LazyValueInfo *LVI) {
- assert(Instr->getOpcode() == Instruction::SDiv ||
- Instr->getOpcode() == Instruction::SRem);
- if (Instr->getType()->isVectorTy())
- return false;
-
- // Find the smallest power of two bitwidth that's sufficient to hold Instr's
- // operands.
- unsigned OrigWidth = Instr->getType()->getIntegerBitWidth();
-
- // What is the smallest bit width that can accomodate the entire value ranges
- // of both of the operands?
- std::array<Optional<ConstantRange>, 2> CRs;
- unsigned MinSignedBits = 0;
- for (auto I : zip(Instr->operands(), CRs)) {
- std::get<1>(I) = LVI->getConstantRange(std::get<0>(I), Instr);
- MinSignedBits = std::max(std::get<1>(I)->getMinSignedBits(), MinSignedBits);
+static bool isNonNegative(Value *V, LazyValueInfo *LVI, Instruction *CxtI) {
+ Constant *Zero = ConstantInt::get(V->getType(), 0);
+ auto Result = LVI->getPredicateAt(ICmpInst::ICMP_SGE, V, Zero, CxtI);
+ return Result == LazyValueInfo::True;
+}
+
+static bool isNonPositive(Value *V, LazyValueInfo *LVI, Instruction *CxtI) {
+ Constant *Zero = ConstantInt::get(V->getType(), 0);
+ auto Result = LVI->getPredicateAt(ICmpInst::ICMP_SLE, V, Zero, CxtI);
+ return Result == LazyValueInfo::True;
+}
+
+enum class Domain { NonNegative, NonPositive, Unknown };
+
+Domain getDomain(Value *V, LazyValueInfo *LVI, Instruction *CxtI) {
+ if (isNonNegative(V, LVI, CxtI))
+ return Domain::NonNegative;
+ if (isNonPositive(V, LVI, CxtI))
+ return Domain::NonPositive;
+ return Domain::Unknown;
+}
+
+/// Try to shrink a sdiv/srem's width down to the smallest power of two that's
+/// sufficient to contain its operands.
+static bool narrowSDivOrSRem(BinaryOperator *Instr, LazyValueInfo *LVI) {
+ assert(Instr->getOpcode() == Instruction::SDiv ||
+ Instr->getOpcode() == Instruction::SRem);
+ if (Instr->getType()->isVectorTy())
+ return false;
+
+ // Find the smallest power of two bitwidth that's sufficient to hold Instr's
+ // operands.
+ unsigned OrigWidth = Instr->getType()->getIntegerBitWidth();
+
+ // What is the smallest bit width that can accomodate the entire value ranges
+ // of both of the operands?
+ std::array<Optional<ConstantRange>, 2> CRs;
+ unsigned MinSignedBits = 0;
+ for (auto I : zip(Instr->operands(), CRs)) {
+ std::get<1>(I) = LVI->getConstantRange(std::get<0>(I), Instr);
+ MinSignedBits = std::max(std::get<1>(I)->getMinSignedBits(), MinSignedBits);
}
-
- // sdiv/srem is UB if divisor is -1 and divident is INT_MIN, so unless we can
- // prove that such a combination is impossible, we need to bump the bitwidth.
- if (CRs[1]->contains(APInt::getAllOnesValue(OrigWidth)) &&
- CRs[0]->contains(
- APInt::getSignedMinValue(MinSignedBits).sextOrSelf(OrigWidth)))
- ++MinSignedBits;
-
- // Don't shrink below 8 bits wide.
- unsigned NewWidth = std::max<unsigned>(PowerOf2Ceil(MinSignedBits), 8);
-
- // NewWidth might be greater than OrigWidth if OrigWidth is not a power of
- // two.
- if (NewWidth >= OrigWidth)
- return false;
-
- ++NumSDivSRemsNarrowed;
- IRBuilder<> B{Instr};
- auto *TruncTy = Type::getIntNTy(Instr->getContext(), NewWidth);
- auto *LHS = B.CreateTruncOrBitCast(Instr->getOperand(0), TruncTy,
- Instr->getName() + ".lhs.trunc");
- auto *RHS = B.CreateTruncOrBitCast(Instr->getOperand(1), TruncTy,
- Instr->getName() + ".rhs.trunc");
- auto *BO = B.CreateBinOp(Instr->getOpcode(), LHS, RHS, Instr->getName());
- auto *Sext = B.CreateSExt(BO, Instr->getType(), Instr->getName() + ".sext");
- if (auto *BinOp = dyn_cast<BinaryOperator>(BO))
- if (BinOp->getOpcode() == Instruction::SDiv)
- BinOp->setIsExact(Instr->isExact());
-
- Instr->replaceAllUsesWith(Sext);
- Instr->eraseFromParent();
+
+ // sdiv/srem is UB if divisor is -1 and divident is INT_MIN, so unless we can
+ // prove that such a combination is impossible, we need to bump the bitwidth.
+ if (CRs[1]->contains(APInt::getAllOnesValue(OrigWidth)) &&
+ CRs[0]->contains(
+ APInt::getSignedMinValue(MinSignedBits).sextOrSelf(OrigWidth)))
+ ++MinSignedBits;
+
+ // Don't shrink below 8 bits wide.
+ unsigned NewWidth = std::max<unsigned>(PowerOf2Ceil(MinSignedBits), 8);
+
+ // NewWidth might be greater than OrigWidth if OrigWidth is not a power of
+ // two.
+ if (NewWidth >= OrigWidth)
+ return false;
+
+ ++NumSDivSRemsNarrowed;
+ IRBuilder<> B{Instr};
+ auto *TruncTy = Type::getIntNTy(Instr->getContext(), NewWidth);
+ auto *LHS = B.CreateTruncOrBitCast(Instr->getOperand(0), TruncTy,
+ Instr->getName() + ".lhs.trunc");
+ auto *RHS = B.CreateTruncOrBitCast(Instr->getOperand(1), TruncTy,
+ Instr->getName() + ".rhs.trunc");
+ auto *BO = B.CreateBinOp(Instr->getOpcode(), LHS, RHS, Instr->getName());
+ auto *Sext = B.CreateSExt(BO, Instr->getType(), Instr->getName() + ".sext");
+ if (auto *BinOp = dyn_cast<BinaryOperator>(BO))
+ if (BinOp->getOpcode() == Instruction::SDiv)
+ BinOp->setIsExact(Instr->isExact());
+
+ Instr->replaceAllUsesWith(Sext);
+ Instr->eraseFromParent();
return true;
}
@@ -644,23 +644,23 @@ static bool processUDivOrURem(BinaryOperator *Instr, LazyValueInfo *LVI) {
// Find the smallest power of two bitwidth that's sufficient to hold Instr's
// operands.
-
- // What is the smallest bit width that can accomodate the entire value ranges
- // of both of the operands?
- unsigned MaxActiveBits = 0;
+
+ // What is the smallest bit width that can accomodate the entire value ranges
+ // of both of the operands?
+ unsigned MaxActiveBits = 0;
for (Value *Operand : Instr->operands()) {
- ConstantRange CR = LVI->getConstantRange(Operand, Instr);
- MaxActiveBits = std::max(CR.getActiveBits(), MaxActiveBits);
+ ConstantRange CR = LVI->getConstantRange(Operand, Instr);
+ MaxActiveBits = std::max(CR.getActiveBits(), MaxActiveBits);
}
// Don't shrink below 8 bits wide.
- unsigned NewWidth = std::max<unsigned>(PowerOf2Ceil(MaxActiveBits), 8);
-
+ unsigned NewWidth = std::max<unsigned>(PowerOf2Ceil(MaxActiveBits), 8);
+
// NewWidth might be greater than OrigWidth if OrigWidth is not a power of
// two.
- if (NewWidth >= Instr->getType()->getIntegerBitWidth())
+ if (NewWidth >= Instr->getType()->getIntegerBitWidth())
return false;
- ++NumUDivURemsNarrowed;
+ ++NumUDivURemsNarrowed;
IRBuilder<> B{Instr};
auto *TruncTy = Type::getIntNTy(Instr->getContext(), NewWidth);
auto *LHS = B.CreateTruncOrBitCast(Instr->getOperand(0), TruncTy,
@@ -679,135 +679,135 @@ static bool processUDivOrURem(BinaryOperator *Instr, LazyValueInfo *LVI) {
}
static bool processSRem(BinaryOperator *SDI, LazyValueInfo *LVI) {
- assert(SDI->getOpcode() == Instruction::SRem);
- if (SDI->getType()->isVectorTy())
+ assert(SDI->getOpcode() == Instruction::SRem);
+ if (SDI->getType()->isVectorTy())
return false;
- struct Operand {
- Value *V;
- Domain D;
- };
- std::array<Operand, 2> Ops;
-
- for (const auto I : zip(Ops, SDI->operands())) {
- Operand &Op = std::get<0>(I);
- Op.V = std::get<1>(I);
- Op.D = getDomain(Op.V, LVI, SDI);
- if (Op.D == Domain::Unknown)
- return false;
- }
-
- // We know domains of both of the operands!
+ struct Operand {
+ Value *V;
+ Domain D;
+ };
+ std::array<Operand, 2> Ops;
+
+ for (const auto I : zip(Ops, SDI->operands())) {
+ Operand &Op = std::get<0>(I);
+ Op.V = std::get<1>(I);
+ Op.D = getDomain(Op.V, LVI, SDI);
+ if (Op.D == Domain::Unknown)
+ return false;
+ }
+
+ // We know domains of both of the operands!
++NumSRems;
-
- // We need operands to be non-negative, so negate each one that isn't.
- for (Operand &Op : Ops) {
- if (Op.D == Domain::NonNegative)
- continue;
- auto *BO =
- BinaryOperator::CreateNeg(Op.V, Op.V->getName() + ".nonneg", SDI);
- BO->setDebugLoc(SDI->getDebugLoc());
- Op.V = BO;
- }
-
- auto *URem =
- BinaryOperator::CreateURem(Ops[0].V, Ops[1].V, SDI->getName(), SDI);
- URem->setDebugLoc(SDI->getDebugLoc());
-
- Value *Res = URem;
-
- // If the divident was non-positive, we need to negate the result.
- if (Ops[0].D == Domain::NonPositive)
- Res = BinaryOperator::CreateNeg(Res, Res->getName() + ".neg", SDI);
-
- SDI->replaceAllUsesWith(Res);
+
+ // We need operands to be non-negative, so negate each one that isn't.
+ for (Operand &Op : Ops) {
+ if (Op.D == Domain::NonNegative)
+ continue;
+ auto *BO =
+ BinaryOperator::CreateNeg(Op.V, Op.V->getName() + ".nonneg", SDI);
+ BO->setDebugLoc(SDI->getDebugLoc());
+ Op.V = BO;
+ }
+
+ auto *URem =
+ BinaryOperator::CreateURem(Ops[0].V, Ops[1].V, SDI->getName(), SDI);
+ URem->setDebugLoc(SDI->getDebugLoc());
+
+ Value *Res = URem;
+
+ // If the divident was non-positive, we need to negate the result.
+ if (Ops[0].D == Domain::NonPositive)
+ Res = BinaryOperator::CreateNeg(Res, Res->getName() + ".neg", SDI);
+
+ SDI->replaceAllUsesWith(Res);
SDI->eraseFromParent();
- // Try to simplify our new urem.
- processUDivOrURem(URem, LVI);
+ // Try to simplify our new urem.
+ processUDivOrURem(URem, LVI);
return true;
}
/// See if LazyValueInfo's ability to exploit edge conditions or range
-/// information is sufficient to prove the signs of both operands of this SDiv.
-/// If this is the case, replace the SDiv with a UDiv. Even for local
+/// information is sufficient to prove the signs of both operands of this SDiv.
+/// If this is the case, replace the SDiv with a UDiv. Even for local
/// conditions, this can sometimes prove conditions instcombine can't by
/// exploiting range information.
static bool processSDiv(BinaryOperator *SDI, LazyValueInfo *LVI) {
- assert(SDI->getOpcode() == Instruction::SDiv);
- if (SDI->getType()->isVectorTy())
+ assert(SDI->getOpcode() == Instruction::SDiv);
+ if (SDI->getType()->isVectorTy())
return false;
- struct Operand {
- Value *V;
- Domain D;
- };
- std::array<Operand, 2> Ops;
-
- for (const auto I : zip(Ops, SDI->operands())) {
- Operand &Op = std::get<0>(I);
- Op.V = std::get<1>(I);
- Op.D = getDomain(Op.V, LVI, SDI);
- if (Op.D == Domain::Unknown)
- return false;
- }
-
- // We know domains of both of the operands!
+ struct Operand {
+ Value *V;
+ Domain D;
+ };
+ std::array<Operand, 2> Ops;
+
+ for (const auto I : zip(Ops, SDI->operands())) {
+ Operand &Op = std::get<0>(I);
+ Op.V = std::get<1>(I);
+ Op.D = getDomain(Op.V, LVI, SDI);
+ if (Op.D == Domain::Unknown)
+ return false;
+ }
+
+ // We know domains of both of the operands!
++NumSDivs;
-
- // We need operands to be non-negative, so negate each one that isn't.
- for (Operand &Op : Ops) {
- if (Op.D == Domain::NonNegative)
- continue;
- auto *BO =
- BinaryOperator::CreateNeg(Op.V, Op.V->getName() + ".nonneg", SDI);
- BO->setDebugLoc(SDI->getDebugLoc());
- Op.V = BO;
- }
-
- auto *UDiv =
- BinaryOperator::CreateUDiv(Ops[0].V, Ops[1].V, SDI->getName(), SDI);
- UDiv->setDebugLoc(SDI->getDebugLoc());
- UDiv->setIsExact(SDI->isExact());
-
- Value *Res = UDiv;
-
- // If the operands had two different domains, we need to negate the result.
- if (Ops[0].D != Ops[1].D)
- Res = BinaryOperator::CreateNeg(Res, Res->getName() + ".neg", SDI);
-
- SDI->replaceAllUsesWith(Res);
+
+ // We need operands to be non-negative, so negate each one that isn't.
+ for (Operand &Op : Ops) {
+ if (Op.D == Domain::NonNegative)
+ continue;
+ auto *BO =
+ BinaryOperator::CreateNeg(Op.V, Op.V->getName() + ".nonneg", SDI);
+ BO->setDebugLoc(SDI->getDebugLoc());
+ Op.V = BO;
+ }
+
+ auto *UDiv =
+ BinaryOperator::CreateUDiv(Ops[0].V, Ops[1].V, SDI->getName(), SDI);
+ UDiv->setDebugLoc(SDI->getDebugLoc());
+ UDiv->setIsExact(SDI->isExact());
+
+ Value *Res = UDiv;
+
+ // If the operands had two different domains, we need to negate the result.
+ if (Ops[0].D != Ops[1].D)
+ Res = BinaryOperator::CreateNeg(Res, Res->getName() + ".neg", SDI);
+
+ SDI->replaceAllUsesWith(Res);
SDI->eraseFromParent();
// Try to simplify our new udiv.
- processUDivOrURem(UDiv, LVI);
+ processUDivOrURem(UDiv, LVI);
return true;
}
-static bool processSDivOrSRem(BinaryOperator *Instr, LazyValueInfo *LVI) {
- assert(Instr->getOpcode() == Instruction::SDiv ||
- Instr->getOpcode() == Instruction::SRem);
- if (Instr->getType()->isVectorTy())
- return false;
-
- if (Instr->getOpcode() == Instruction::SDiv)
- if (processSDiv(Instr, LVI))
- return true;
-
- if (Instr->getOpcode() == Instruction::SRem)
- if (processSRem(Instr, LVI))
- return true;
-
- return narrowSDivOrSRem(Instr, LVI);
-}
-
+static bool processSDivOrSRem(BinaryOperator *Instr, LazyValueInfo *LVI) {
+ assert(Instr->getOpcode() == Instruction::SDiv ||
+ Instr->getOpcode() == Instruction::SRem);
+ if (Instr->getType()->isVectorTy())
+ return false;
+
+ if (Instr->getOpcode() == Instruction::SDiv)
+ if (processSDiv(Instr, LVI))
+ return true;
+
+ if (Instr->getOpcode() == Instruction::SRem)
+ if (processSRem(Instr, LVI))
+ return true;
+
+ return narrowSDivOrSRem(Instr, LVI);
+}
+
static bool processAShr(BinaryOperator *SDI, LazyValueInfo *LVI) {
if (SDI->getType()->isVectorTy())
return false;
- if (!isNonNegative(SDI->getOperand(0), LVI, SDI))
+ if (!isNonNegative(SDI->getOperand(0), LVI, SDI))
return false;
++NumAShrs;
@@ -827,7 +827,7 @@ static bool processSExt(SExtInst *SDI, LazyValueInfo *LVI) {
Value *Base = SDI->getOperand(0);
- if (!isNonNegative(Base, LVI, SDI))
+ if (!isNonNegative(Base, LVI, SDI))
return false;
++NumSExt;
@@ -858,8 +858,8 @@ static bool processBinOp(BinaryOperator *BinOp, LazyValueInfo *LVI) {
Value *LHS = BinOp->getOperand(0);
Value *RHS = BinOp->getOperand(1);
- ConstantRange LRange = LVI->getConstantRange(LHS, BinOp);
- ConstantRange RRange = LVI->getConstantRange(RHS, BinOp);
+ ConstantRange LRange = LVI->getConstantRange(LHS, BinOp);
+ ConstantRange RRange = LVI->getConstantRange(RHS, BinOp);
bool Changed = false;
bool NewNUW = false, NewNSW = false;
@@ -895,7 +895,7 @@ static bool processAnd(BinaryOperator *BinOp, LazyValueInfo *LVI) {
// We can only replace the AND with LHS based on range info if the range does
// not include undef.
ConstantRange LRange =
- LVI->getConstantRange(LHS, BinOp, /*UndefAllowed=*/false);
+ LVI->getConstantRange(LHS, BinOp, /*UndefAllowed=*/false);
if (!LRange.getUnsignedMax().ule(RHS->getValue()))
return false;
@@ -907,7 +907,7 @@ static bool processAnd(BinaryOperator *BinOp, LazyValueInfo *LVI) {
static Constant *getConstantAt(Value *V, Instruction *At, LazyValueInfo *LVI) {
- if (Constant *C = LVI->getConstant(V, At))
+ if (Constant *C = LVI->getConstant(V, At))
return C;
// TODO: The following really should be sunk inside LVI's core algorithm, or
@@ -962,7 +962,7 @@ static bool runImpl(Function &F, LazyValueInfo *LVI, DominatorTree *DT,
break;
case Instruction::SRem:
case Instruction::SDiv:
- BBChanged |= processSDivOrSRem(cast<BinaryOperator>(II), LVI);
+ BBChanged |= processSDivOrSRem(cast<BinaryOperator>(II), LVI);
break;
case Instruction::UDiv:
case Instruction::URem:
@@ -1031,18 +1031,18 @@ CorrelatedValuePropagationPass::run(Function &F, FunctionAnalysisManager &AM) {
bool Changed = runImpl(F, LVI, DT, getBestSimplifyQuery(AM, F));
PreservedAnalyses PA;
- if (!Changed) {
- PA = PreservedAnalyses::all();
- } else {
- PA.preserve<GlobalsAA>();
- PA.preserve<DominatorTreeAnalysis>();
- PA.preserve<LazyValueAnalysis>();
- }
-
- // Keeping LVI alive is expensive, both because it uses a lot of memory, and
- // because invalidating values in LVI is expensive. While CVP does preserve
- // LVI, we know that passes after JumpThreading+CVP will not need the result
- // of this analysis, so we forcefully discard it early.
- PA.abandon<LazyValueAnalysis>();
+ if (!Changed) {
+ PA = PreservedAnalyses::all();
+ } else {
+ PA.preserve<GlobalsAA>();
+ PA.preserve<DominatorTreeAnalysis>();
+ PA.preserve<LazyValueAnalysis>();
+ }
+
+ // Keeping LVI alive is expensive, both because it uses a lot of memory, and
+ // because invalidating values in LVI is expensive. While CVP does preserve
+ // LVI, we know that passes after JumpThreading+CVP will not need the result
+ // of this analysis, so we forcefully discard it early.
+ PA.abandon<LazyValueAnalysis>();
return PA;
}
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/DCE.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/DCE.cpp
index d55adf7c2d..5826d9dc96 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/DCE.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/DCE.cpp
@@ -69,18 +69,18 @@ Pass *llvm::createRedundantDbgInstEliminationPass() {
return new RedundantDbgInstElimination();
}
-PreservedAnalyses
-RedundantDbgInstEliminationPass::run(Function &F, FunctionAnalysisManager &AM) {
- bool Changed = false;
- for (auto &BB : F)
- Changed |= RemoveRedundantDbgInstrs(&BB);
- if (!Changed)
- return PreservedAnalyses::all();
- PreservedAnalyses PA;
- PA.preserveSet<CFGAnalyses>();
- return PA;
-}
-
+PreservedAnalyses
+RedundantDbgInstEliminationPass::run(Function &F, FunctionAnalysisManager &AM) {
+ bool Changed = false;
+ for (auto &BB : F)
+ Changed |= RemoveRedundantDbgInstrs(&BB);
+ if (!Changed)
+ return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+}
+
//===--------------------------------------------------------------------===//
// DeadCodeElimination pass implementation
//
@@ -143,7 +143,7 @@ static bool eliminateDeadCode(Function &F, TargetLibraryInfo *TLI) {
}
PreservedAnalyses DCEPass::run(Function &F, FunctionAnalysisManager &AM) {
- if (!eliminateDeadCode(F, &AM.getResult<TargetLibraryAnalysis>(F)))
+ if (!eliminateDeadCode(F, &AM.getResult<TargetLibraryAnalysis>(F)))
return PreservedAnalyses::all();
PreservedAnalyses PA;
@@ -162,14 +162,14 @@ struct DCELegacyPass : public FunctionPass {
if (skipFunction(F))
return false;
- TargetLibraryInfo *TLI =
- &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ TargetLibraryInfo *TLI =
+ &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
return eliminateDeadCode(F, TLI);
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.setPreservesCFG();
}
};
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/DeadStoreElimination.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 2979225c60..e57b1d974b 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -87,10 +87,10 @@ STATISTIC(NumModifiedStores, "Number of stores modified");
STATISTIC(NumCFGChecks, "Number of stores modified");
STATISTIC(NumCFGTries, "Number of stores modified");
STATISTIC(NumCFGSuccess, "Number of stores modified");
-STATISTIC(NumGetDomMemoryDefPassed,
- "Number of times a valid candidate is returned from getDomMemoryDef");
-STATISTIC(NumDomMemDefChecks,
- "Number iterations check for reads in getDomMemoryDef");
+STATISTIC(NumGetDomMemoryDefPassed,
+ "Number of times a valid candidate is returned from getDomMemoryDef");
+STATISTIC(NumDomMemDefChecks,
+ "Number iterations check for reads in getDomMemoryDef");
DEBUG_COUNTER(MemorySSACounter, "dse-memoryssa",
"Controls which MemoryDefs are eliminated.");
@@ -106,42 +106,42 @@ EnablePartialStoreMerging("enable-dse-partial-store-merging",
cl::desc("Enable partial store merging in DSE"));
static cl::opt<bool>
- EnableMemorySSA("enable-dse-memoryssa", cl::init(true), cl::Hidden,
+ EnableMemorySSA("enable-dse-memoryssa", cl::init(true), cl::Hidden,
cl::desc("Use the new MemorySSA-backed DSE."));
static cl::opt<unsigned>
- MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(150), cl::Hidden,
+ MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(150), cl::Hidden,
cl::desc("The number of memory instructions to scan for "
"dead store elimination (default = 100)"));
-static cl::opt<unsigned> MemorySSAUpwardsStepLimit(
- "dse-memoryssa-walklimit", cl::init(90), cl::Hidden,
- cl::desc("The maximum number of steps while walking upwards to find "
- "MemoryDefs that may be killed (default = 90)"));
-
-static cl::opt<unsigned> MemorySSAPartialStoreLimit(
- "dse-memoryssa-partial-store-limit", cl::init(5), cl::Hidden,
- cl::desc("The maximum number candidates that only partially overwrite the "
- "killing MemoryDef to consider"
- " (default = 5)"));
-
+static cl::opt<unsigned> MemorySSAUpwardsStepLimit(
+ "dse-memoryssa-walklimit", cl::init(90), cl::Hidden,
+ cl::desc("The maximum number of steps while walking upwards to find "
+ "MemoryDefs that may be killed (default = 90)"));
+
+static cl::opt<unsigned> MemorySSAPartialStoreLimit(
+ "dse-memoryssa-partial-store-limit", cl::init(5), cl::Hidden,
+ cl::desc("The maximum number candidates that only partially overwrite the "
+ "killing MemoryDef to consider"
+ " (default = 5)"));
+
static cl::opt<unsigned> MemorySSADefsPerBlockLimit(
"dse-memoryssa-defs-per-block-limit", cl::init(5000), cl::Hidden,
cl::desc("The number of MemoryDefs we consider as candidates to eliminated "
"other stores per basic block (default = 5000)"));
-static cl::opt<unsigned> MemorySSASameBBStepCost(
- "dse-memoryssa-samebb-cost", cl::init(1), cl::Hidden,
- cl::desc(
- "The cost of a step in the same basic block as the killing MemoryDef"
- "(default = 1)"));
-
-static cl::opt<unsigned>
- MemorySSAOtherBBStepCost("dse-memoryssa-otherbb-cost", cl::init(5),
- cl::Hidden,
- cl::desc("The cost of a step in a different basic "
- "block than the killing MemoryDef"
- "(default = 5)"));
-
+static cl::opt<unsigned> MemorySSASameBBStepCost(
+ "dse-memoryssa-samebb-cost", cl::init(1), cl::Hidden,
+ cl::desc(
+ "The cost of a step in the same basic block as the killing MemoryDef"
+ "(default = 1)"));
+
+static cl::opt<unsigned>
+ MemorySSAOtherBBStepCost("dse-memoryssa-otherbb-cost", cl::init(5),
+ cl::Hidden,
+ cl::desc("The cost of a step in a different basic "
+ "block than the killing MemoryDef"
+ "(default = 5)"));
+
static cl::opt<unsigned> MemorySSAPathCheckLimit(
"dse-memoryssa-path-check-limit", cl::init(50), cl::Hidden,
cl::desc("The maximum number of blocks to check when trying to prove that "
@@ -229,13 +229,13 @@ static bool hasAnalyzableMemoryWrite(Instruction *I,
case Intrinsic::memset:
case Intrinsic::memmove:
case Intrinsic::memcpy:
- case Intrinsic::memcpy_inline:
+ case Intrinsic::memcpy_inline:
case Intrinsic::memcpy_element_unordered_atomic:
case Intrinsic::memmove_element_unordered_atomic:
case Intrinsic::memset_element_unordered_atomic:
case Intrinsic::init_trampoline:
case Intrinsic::lifetime_end:
- case Intrinsic::masked_store:
+ case Intrinsic::masked_store:
return true;
}
}
@@ -259,23 +259,23 @@ static bool hasAnalyzableMemoryWrite(Instruction *I,
/// Return a Location stored to by the specified instruction. If isRemovable
/// returns true, this function and getLocForRead completely describe the memory
/// operations for this instruction.
-static MemoryLocation getLocForWrite(Instruction *Inst,
- const TargetLibraryInfo &TLI) {
+static MemoryLocation getLocForWrite(Instruction *Inst,
+ const TargetLibraryInfo &TLI) {
if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
return MemoryLocation::get(SI);
- // memcpy/memmove/memset.
- if (auto *MI = dyn_cast<AnyMemIntrinsic>(Inst))
- return MemoryLocation::getForDest(MI);
+ // memcpy/memmove/memset.
+ if (auto *MI = dyn_cast<AnyMemIntrinsic>(Inst))
+ return MemoryLocation::getForDest(MI);
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
switch (II->getIntrinsicID()) {
default:
return MemoryLocation(); // Unhandled intrinsic.
case Intrinsic::init_trampoline:
- return MemoryLocation::getAfter(II->getArgOperand(0));
- case Intrinsic::masked_store:
- return MemoryLocation::getForArgument(II, 1, TLI);
+ return MemoryLocation::getAfter(II->getArgOperand(0));
+ case Intrinsic::masked_store:
+ return MemoryLocation::getForArgument(II, 1, TLI);
case Intrinsic::lifetime_end: {
uint64_t Len = cast<ConstantInt>(II->getArgOperand(0))->getZExtValue();
return MemoryLocation(II->getArgOperand(1), Len);
@@ -285,7 +285,7 @@ static MemoryLocation getLocForWrite(Instruction *Inst,
if (auto *CB = dyn_cast<CallBase>(Inst))
// All the supported TLI functions so far happen to have dest as their
// first argument.
- return MemoryLocation::getAfter(CB->getArgOperand(0));
+ return MemoryLocation::getAfter(CB->getArgOperand(0));
return MemoryLocation();
}
@@ -322,13 +322,13 @@ static bool isRemovable(Instruction *I) {
case Intrinsic::memset:
case Intrinsic::memmove:
case Intrinsic::memcpy:
- case Intrinsic::memcpy_inline:
+ case Intrinsic::memcpy_inline:
// Don't remove volatile memory intrinsics.
return !cast<MemIntrinsic>(II)->isVolatile();
case Intrinsic::memcpy_element_unordered_atomic:
case Intrinsic::memmove_element_unordered_atomic:
case Intrinsic::memset_element_unordered_atomic:
- case Intrinsic::masked_store:
+ case Intrinsic::masked_store:
return true;
}
}
@@ -374,10 +374,10 @@ static bool isShortenableAtTheBeginning(Instruction *I) {
}
/// Return the pointer that is being written to.
-static Value *getStoredPointerOperand(Instruction *I,
- const TargetLibraryInfo &TLI) {
+static Value *getStoredPointerOperand(Instruction *I,
+ const TargetLibraryInfo &TLI) {
//TODO: factor this to reuse getLocForWrite
- MemoryLocation Loc = getLocForWrite(I, TLI);
+ MemoryLocation Loc = getLocForWrite(I, TLI);
assert(Loc.Ptr &&
"unable to find pointer written for analyzable instruction?");
// TODO: most APIs don't expect const Value *
@@ -403,59 +403,59 @@ enum OverwriteResult {
OW_Complete,
OW_End,
OW_PartialEarlierWithFullLater,
- OW_MaybePartial,
+ OW_MaybePartial,
OW_Unknown
};
} // end anonymous namespace
-/// Check if two instruction are masked stores that completely
-/// overwrite one another. More specifically, \p Later has to
-/// overwrite \p Earlier.
-template <typename AATy>
-static OverwriteResult isMaskedStoreOverwrite(const Instruction *Later,
- const Instruction *Earlier,
- AATy &AA) {
- const auto *IIL = dyn_cast<IntrinsicInst>(Later);
- const auto *IIE = dyn_cast<IntrinsicInst>(Earlier);
- if (IIL == nullptr || IIE == nullptr)
- return OW_Unknown;
- if (IIL->getIntrinsicID() != Intrinsic::masked_store ||
- IIE->getIntrinsicID() != Intrinsic::masked_store)
- return OW_Unknown;
- // Pointers.
- Value *LP = IIL->getArgOperand(1)->stripPointerCasts();
- Value *EP = IIE->getArgOperand(1)->stripPointerCasts();
- if (LP != EP && !AA.isMustAlias(LP, EP))
- return OW_Unknown;
- // Masks.
- // TODO: check that Later's mask is a superset of the Earlier's mask.
- if (IIL->getArgOperand(3) != IIE->getArgOperand(3))
- return OW_Unknown;
- return OW_Complete;
-}
-
-/// Return 'OW_Complete' if a store to the 'Later' location (by \p LaterI
-/// instruction) completely overwrites a store to the 'Earlier' location.
-/// (by \p EarlierI instruction).
-/// Return OW_MaybePartial if \p Later does not completely overwrite
-/// \p Earlier, but they both write to the same underlying object. In that
-/// case, use isPartialOverwrite to check if \p Later partially overwrites
-/// \p Earlier. Returns 'OW_Unknown' if nothing can be determined.
-template <typename AATy>
-static OverwriteResult
-isOverwrite(const Instruction *LaterI, const Instruction *EarlierI,
- const MemoryLocation &Later, const MemoryLocation &Earlier,
- const DataLayout &DL, const TargetLibraryInfo &TLI,
- int64_t &EarlierOff, int64_t &LaterOff, AATy &AA,
- const Function *F) {
+/// Check if two instruction are masked stores that completely
+/// overwrite one another. More specifically, \p Later has to
+/// overwrite \p Earlier.
+template <typename AATy>
+static OverwriteResult isMaskedStoreOverwrite(const Instruction *Later,
+ const Instruction *Earlier,
+ AATy &AA) {
+ const auto *IIL = dyn_cast<IntrinsicInst>(Later);
+ const auto *IIE = dyn_cast<IntrinsicInst>(Earlier);
+ if (IIL == nullptr || IIE == nullptr)
+ return OW_Unknown;
+ if (IIL->getIntrinsicID() != Intrinsic::masked_store ||
+ IIE->getIntrinsicID() != Intrinsic::masked_store)
+ return OW_Unknown;
+ // Pointers.
+ Value *LP = IIL->getArgOperand(1)->stripPointerCasts();
+ Value *EP = IIE->getArgOperand(1)->stripPointerCasts();
+ if (LP != EP && !AA.isMustAlias(LP, EP))
+ return OW_Unknown;
+ // Masks.
+ // TODO: check that Later's mask is a superset of the Earlier's mask.
+ if (IIL->getArgOperand(3) != IIE->getArgOperand(3))
+ return OW_Unknown;
+ return OW_Complete;
+}
+
+/// Return 'OW_Complete' if a store to the 'Later' location (by \p LaterI
+/// instruction) completely overwrites a store to the 'Earlier' location.
+/// (by \p EarlierI instruction).
+/// Return OW_MaybePartial if \p Later does not completely overwrite
+/// \p Earlier, but they both write to the same underlying object. In that
+/// case, use isPartialOverwrite to check if \p Later partially overwrites
+/// \p Earlier. Returns 'OW_Unknown' if nothing can be determined.
+template <typename AATy>
+static OverwriteResult
+isOverwrite(const Instruction *LaterI, const Instruction *EarlierI,
+ const MemoryLocation &Later, const MemoryLocation &Earlier,
+ const DataLayout &DL, const TargetLibraryInfo &TLI,
+ int64_t &EarlierOff, int64_t &LaterOff, AATy &AA,
+ const Function *F) {
// FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll
// get imprecise values here, though (except for unknown sizes).
- if (!Later.Size.isPrecise() || !Earlier.Size.isPrecise()) {
- // Masked stores have imprecise locations, but we can reason about them
- // to some extent.
- return isMaskedStoreOverwrite(LaterI, EarlierI, AA);
- }
+ if (!Later.Size.isPrecise() || !Earlier.Size.isPrecise()) {
+ // Masked stores have imprecise locations, but we can reason about them
+ // to some extent.
+ return isMaskedStoreOverwrite(LaterI, EarlierI, AA);
+ }
const uint64_t LaterSize = Later.Size.getValue();
const uint64_t EarlierSize = Earlier.Size.getValue();
@@ -474,7 +474,7 @@ isOverwrite(const Instruction *LaterI, const Instruction *EarlierI,
// Check to see if the later store is to the entire object (either a global,
// an alloca, or a byval/inalloca argument). If so, then it clearly
// overwrites any other store to the same object.
- const Value *UO1 = getUnderlyingObject(P1), *UO2 = getUnderlyingObject(P2);
+ const Value *UO1 = getUnderlyingObject(P1), *UO2 = getUnderlyingObject(P2);
// If we can't resolve the same pointers to the same object, then we can't
// analyze them at all.
@@ -499,59 +499,59 @@ isOverwrite(const Instruction *LaterI, const Instruction *EarlierI,
if (BP1 != BP2)
return OW_Unknown;
- // The later access completely overlaps the earlier store if and only if
- // both start and end of the earlier one is "inside" the later one:
- // |<->|--earlier--|<->|
- // |-------later-------|
- // Accesses may overlap if and only if start of one of them is "inside"
- // another one:
- // |<->|--earlier--|<----->|
- // |-------later-------|
- // OR
- // |----- earlier -----|
- // |<->|---later---|<----->|
+ // The later access completely overlaps the earlier store if and only if
+ // both start and end of the earlier one is "inside" the later one:
+ // |<->|--earlier--|<->|
+ // |-------later-------|
+ // Accesses may overlap if and only if start of one of them is "inside"
+ // another one:
+ // |<->|--earlier--|<----->|
+ // |-------later-------|
+ // OR
+ // |----- earlier -----|
+ // |<->|---later---|<----->|
//
// We have to be careful here as *Off is signed while *.Size is unsigned.
- // Check if the earlier access starts "not before" the later one.
- if (EarlierOff >= LaterOff) {
- // If the earlier access ends "not after" the later access then the earlier
- // one is completely overwritten by the later one.
- if (uint64_t(EarlierOff - LaterOff) + EarlierSize <= LaterSize)
- return OW_Complete;
- // If start of the earlier access is "before" end of the later access then
- // accesses overlap.
- else if ((uint64_t)(EarlierOff - LaterOff) < LaterSize)
- return OW_MaybePartial;
- }
- // If start of the later access is "before" end of the earlier access then
- // accesses overlap.
- else if ((uint64_t)(LaterOff - EarlierOff) < EarlierSize) {
- return OW_MaybePartial;
- }
-
- // Can reach here only if accesses are known not to overlap. There is no
- // dedicated code to indicate no overlap so signal "unknown".
- return OW_Unknown;
-}
-
-/// Return 'OW_Complete' if a store to the 'Later' location completely
-/// overwrites a store to the 'Earlier' location, 'OW_End' if the end of the
-/// 'Earlier' location is completely overwritten by 'Later', 'OW_Begin' if the
-/// beginning of the 'Earlier' location is overwritten by 'Later'.
-/// 'OW_PartialEarlierWithFullLater' means that an earlier (big) store was
-/// overwritten by a latter (smaller) store which doesn't write outside the big
-/// store's memory locations. Returns 'OW_Unknown' if nothing can be determined.
-/// NOTE: This function must only be called if both \p Later and \p Earlier
-/// write to the same underlying object with valid \p EarlierOff and \p
-/// LaterOff.
-static OverwriteResult isPartialOverwrite(const MemoryLocation &Later,
- const MemoryLocation &Earlier,
- int64_t EarlierOff, int64_t LaterOff,
- Instruction *DepWrite,
- InstOverlapIntervalsTy &IOL) {
- const uint64_t LaterSize = Later.Size.getValue();
- const uint64_t EarlierSize = Earlier.Size.getValue();
+ // Check if the earlier access starts "not before" the later one.
+ if (EarlierOff >= LaterOff) {
+ // If the earlier access ends "not after" the later access then the earlier
+ // one is completely overwritten by the later one.
+ if (uint64_t(EarlierOff - LaterOff) + EarlierSize <= LaterSize)
+ return OW_Complete;
+ // If start of the earlier access is "before" end of the later access then
+ // accesses overlap.
+ else if ((uint64_t)(EarlierOff - LaterOff) < LaterSize)
+ return OW_MaybePartial;
+ }
+ // If start of the later access is "before" end of the earlier access then
+ // accesses overlap.
+ else if ((uint64_t)(LaterOff - EarlierOff) < EarlierSize) {
+ return OW_MaybePartial;
+ }
+
+ // Can reach here only if accesses are known not to overlap. There is no
+ // dedicated code to indicate no overlap so signal "unknown".
+ return OW_Unknown;
+}
+
+/// Return 'OW_Complete' if a store to the 'Later' location completely
+/// overwrites a store to the 'Earlier' location, 'OW_End' if the end of the
+/// 'Earlier' location is completely overwritten by 'Later', 'OW_Begin' if the
+/// beginning of the 'Earlier' location is overwritten by 'Later'.
+/// 'OW_PartialEarlierWithFullLater' means that an earlier (big) store was
+/// overwritten by a latter (smaller) store which doesn't write outside the big
+/// store's memory locations. Returns 'OW_Unknown' if nothing can be determined.
+/// NOTE: This function must only be called if both \p Later and \p Earlier
+/// write to the same underlying object with valid \p EarlierOff and \p
+/// LaterOff.
+static OverwriteResult isPartialOverwrite(const MemoryLocation &Later,
+ const MemoryLocation &Earlier,
+ int64_t EarlierOff, int64_t LaterOff,
+ Instruction *DepWrite,
+ InstOverlapIntervalsTy &IOL) {
+ const uint64_t LaterSize = Later.Size.getValue();
+ const uint64_t EarlierSize = Earlier.Size.getValue();
// We may now overlap, although the overlap is not complete. There might also
// be other incomplete overlaps, and together, they might cover the complete
// earlier write.
@@ -718,10 +718,10 @@ static bool isPossibleSelfRead(Instruction *Inst,
/// modified between the first and the second instruction.
/// Precondition: Second instruction must be dominated by the first
/// instruction.
-template <typename AATy>
-static bool
-memoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI, AATy &AA,
- const DataLayout &DL, DominatorTree *DT) {
+template <typename AATy>
+static bool
+memoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI, AATy &AA,
+ const DataLayout &DL, DominatorTree *DT) {
// Do a backwards scan through the CFG from SecondI to FirstI. Look for
// instructions which can modify the memory location accessed by SecondI.
//
@@ -770,7 +770,7 @@ memoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI, AATy &AA,
for (; BI != EI; ++BI) {
Instruction *I = &*BI;
if (I->mayWriteToMemory() && I != SecondI)
- if (isModSet(AA.getModRefInfo(I, MemLoc.getWithNewPtr(Ptr))))
+ if (isModSet(AA.getModRefInfo(I, MemLoc.getWithNewPtr(Ptr))))
return false;
}
if (B != FirstBB) {
@@ -826,7 +826,7 @@ static bool handleFree(CallInst *F, AliasAnalysis *AA,
MapVector<Instruction *, bool> &ThrowableInst) {
bool MadeChange = false;
- MemoryLocation Loc = MemoryLocation::getAfter(F->getOperand(0));
+ MemoryLocation Loc = MemoryLocation::getAfter(F->getOperand(0));
SmallVector<BasicBlock *, 16> Blocks;
Blocks.push_back(F->getParent());
@@ -844,7 +844,7 @@ static bool handleFree(CallInst *F, AliasAnalysis *AA,
break;
Value *DepPointer =
- getUnderlyingObject(getStoredPointerOperand(Dependency, *TLI));
+ getUnderlyingObject(getStoredPointerOperand(Dependency, *TLI));
// Check for aliasing.
if (!AA->isMustAlias(F->getArgOperand(0), DepPointer))
@@ -884,7 +884,7 @@ static void removeAccessedObjects(const MemoryLocation &LoadedLoc,
const DataLayout &DL, AliasAnalysis *AA,
const TargetLibraryInfo *TLI,
const Function *F) {
- const Value *UnderlyingPointer = getUnderlyingObject(LoadedLoc.Ptr);
+ const Value *UnderlyingPointer = getUnderlyingObject(LoadedLoc.Ptr);
// A constant can't be in the dead pointer set.
if (isa<Constant>(UnderlyingPointer))
@@ -937,7 +937,7 @@ static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA,
// Treat byval or inalloca arguments the same, stores to them are dead at the
// end of the function.
for (Argument &AI : BB.getParent()->args())
- if (AI.hasPassPointeeByValueCopyAttr())
+ if (AI.hasPassPointeeByValueCopyAttr())
DeadStackObjects.insert(&AI);
const DataLayout &DL = BB.getModule()->getDataLayout();
@@ -950,7 +950,7 @@ static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA,
if (hasAnalyzableMemoryWrite(&*BBI, *TLI) && isRemovable(&*BBI)) {
// See through pointer-to-pointer bitcasts
SmallVector<const Value *, 4> Pointers;
- getUnderlyingObjects(getStoredPointerOperand(&*BBI, *TLI), Pointers);
+ getUnderlyingObjects(getStoredPointerOperand(&*BBI, *TLI), Pointers);
// Stores to stack values are valid candidates for removal.
bool AllDead = true;
@@ -1069,8 +1069,8 @@ static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA,
}
static bool tryToShorten(Instruction *EarlierWrite, int64_t &EarlierOffset,
- uint64_t &EarlierSize, int64_t LaterOffset,
- uint64_t LaterSize, bool IsOverwriteEnd) {
+ uint64_t &EarlierSize, int64_t LaterOffset,
+ uint64_t LaterSize, bool IsOverwriteEnd) {
// TODO: base this on the target vector size so that if the earlier
// store was too small to get vector writes anyway then its likely
// a good idea to shorten it
@@ -1125,23 +1125,23 @@ static bool tryToShorten(Instruction *EarlierWrite, int64_t &EarlierOffset,
static bool tryToShortenEnd(Instruction *EarlierWrite,
OverlapIntervalsTy &IntervalMap,
- int64_t &EarlierStart, uint64_t &EarlierSize) {
+ int64_t &EarlierStart, uint64_t &EarlierSize) {
if (IntervalMap.empty() || !isShortenableAtTheEnd(EarlierWrite))
return false;
OverlapIntervalsTy::iterator OII = --IntervalMap.end();
int64_t LaterStart = OII->second;
- uint64_t LaterSize = OII->first - LaterStart;
-
- assert(OII->first - LaterStart >= 0 && "Size expected to be positive");
-
- if (LaterStart > EarlierStart &&
- // Note: "LaterStart - EarlierStart" is known to be positive due to
- // preceding check.
- (uint64_t)(LaterStart - EarlierStart) < EarlierSize &&
- // Note: "EarlierSize - (uint64_t)(LaterStart - EarlierStart)" is known to
- // be non negative due to preceding checks.
- LaterSize >= EarlierSize - (uint64_t)(LaterStart - EarlierStart)) {
+ uint64_t LaterSize = OII->first - LaterStart;
+
+ assert(OII->first - LaterStart >= 0 && "Size expected to be positive");
+
+ if (LaterStart > EarlierStart &&
+ // Note: "LaterStart - EarlierStart" is known to be positive due to
+ // preceding check.
+ (uint64_t)(LaterStart - EarlierStart) < EarlierSize &&
+ // Note: "EarlierSize - (uint64_t)(LaterStart - EarlierStart)" is known to
+ // be non negative due to preceding checks.
+ LaterSize >= EarlierSize - (uint64_t)(LaterStart - EarlierStart)) {
if (tryToShorten(EarlierWrite, EarlierStart, EarlierSize, LaterStart,
LaterSize, true)) {
IntervalMap.erase(OII);
@@ -1153,23 +1153,23 @@ static bool tryToShortenEnd(Instruction *EarlierWrite,
static bool tryToShortenBegin(Instruction *EarlierWrite,
OverlapIntervalsTy &IntervalMap,
- int64_t &EarlierStart, uint64_t &EarlierSize) {
+ int64_t &EarlierStart, uint64_t &EarlierSize) {
if (IntervalMap.empty() || !isShortenableAtTheBeginning(EarlierWrite))
return false;
OverlapIntervalsTy::iterator OII = IntervalMap.begin();
int64_t LaterStart = OII->second;
- uint64_t LaterSize = OII->first - LaterStart;
-
- assert(OII->first - LaterStart >= 0 && "Size expected to be positive");
-
- if (LaterStart <= EarlierStart &&
- // Note: "EarlierStart - LaterStart" is known to be non negative due to
- // preceding check.
- LaterSize > (uint64_t)(EarlierStart - LaterStart)) {
- // Note: "LaterSize - (uint64_t)(EarlierStart - LaterStart)" is known to be
- // positive due to preceding checks.
- assert(LaterSize - (uint64_t)(EarlierStart - LaterStart) < EarlierSize &&
+ uint64_t LaterSize = OII->first - LaterStart;
+
+ assert(OII->first - LaterStart >= 0 && "Size expected to be positive");
+
+ if (LaterStart <= EarlierStart &&
+ // Note: "EarlierStart - LaterStart" is known to be non negative due to
+ // preceding check.
+ LaterSize > (uint64_t)(EarlierStart - LaterStart)) {
+ // Note: "LaterSize - (uint64_t)(EarlierStart - LaterStart)" is known to be
+ // positive due to preceding checks.
+ assert(LaterSize - (uint64_t)(EarlierStart - LaterStart) < EarlierSize &&
"Should have been handled as OW_Complete");
if (tryToShorten(EarlierWrite, EarlierStart, EarlierSize, LaterStart,
LaterSize, false)) {
@@ -1180,18 +1180,18 @@ static bool tryToShortenBegin(Instruction *EarlierWrite,
return false;
}
-static bool removePartiallyOverlappedStores(const DataLayout &DL,
- InstOverlapIntervalsTy &IOL,
- const TargetLibraryInfo &TLI) {
+static bool removePartiallyOverlappedStores(const DataLayout &DL,
+ InstOverlapIntervalsTy &IOL,
+ const TargetLibraryInfo &TLI) {
bool Changed = false;
for (auto OI : IOL) {
Instruction *EarlierWrite = OI.first;
- MemoryLocation Loc = getLocForWrite(EarlierWrite, TLI);
+ MemoryLocation Loc = getLocForWrite(EarlierWrite, TLI);
assert(isRemovable(EarlierWrite) && "Expect only removable instruction");
const Value *Ptr = Loc.Ptr->stripPointerCasts();
int64_t EarlierStart = 0;
- uint64_t EarlierSize = Loc.Size.getValue();
+ uint64_t EarlierSize = Loc.Size.getValue();
GetPointerBaseWithConstantOffset(Ptr, EarlierStart, DL);
OverlapIntervalsTy &IntervalMap = OI.second;
Changed |=
@@ -1221,7 +1221,7 @@ static bool eliminateNoopStore(Instruction *Inst, BasicBlock::iterator &BBI,
if (LoadInst *DepLoad = dyn_cast<LoadInst>(SI->getValueOperand())) {
if (SI->getPointerOperand() == DepLoad->getPointerOperand() &&
isRemovable(SI) &&
- memoryIsNotModifiedBetween(DepLoad, SI, *AA, DL, DT)) {
+ memoryIsNotModifiedBetween(DepLoad, SI, *AA, DL, DT)) {
LLVM_DEBUG(
dbgs() << "DSE: Remove Store Of Load from same pointer:\n LOAD: "
@@ -1237,10 +1237,10 @@ static bool eliminateNoopStore(Instruction *Inst, BasicBlock::iterator &BBI,
Constant *StoredConstant = dyn_cast<Constant>(SI->getValueOperand());
if (StoredConstant && StoredConstant->isNullValue() && isRemovable(SI)) {
Instruction *UnderlyingPointer =
- dyn_cast<Instruction>(getUnderlyingObject(SI->getPointerOperand()));
+ dyn_cast<Instruction>(getUnderlyingObject(SI->getPointerOperand()));
if (UnderlyingPointer && isCallocLikeFn(UnderlyingPointer, TLI) &&
- memoryIsNotModifiedBetween(UnderlyingPointer, SI, *AA, DL, DT)) {
+ memoryIsNotModifiedBetween(UnderlyingPointer, SI, *AA, DL, DT)) {
LLVM_DEBUG(
dbgs() << "DSE: Remove null store to the calloc'ed object:\n DEAD: "
<< *Inst << "\n OBJECT: " << *UnderlyingPointer << '\n');
@@ -1253,10 +1253,10 @@ static bool eliminateNoopStore(Instruction *Inst, BasicBlock::iterator &BBI,
return false;
}
-template <typename AATy>
-static Constant *tryToMergePartialOverlappingStores(
- StoreInst *Earlier, StoreInst *Later, int64_t InstWriteOffset,
- int64_t DepWriteOffset, const DataLayout &DL, AATy &AA, DominatorTree *DT) {
+template <typename AATy>
+static Constant *tryToMergePartialOverlappingStores(
+ StoreInst *Earlier, StoreInst *Later, int64_t InstWriteOffset,
+ int64_t DepWriteOffset, const DataLayout &DL, AATy &AA, DominatorTree *DT) {
if (Earlier && isa<ConstantInt>(Earlier->getValueOperand()) &&
DL.typeSizeEqualsStoreSize(Earlier->getValueOperand()->getType()) &&
@@ -1347,7 +1347,7 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
continue;
// Figure out what location is being stored to.
- MemoryLocation Loc = getLocForWrite(Inst, *TLI);
+ MemoryLocation Loc = getLocForWrite(Inst, *TLI);
// If we didn't get a useful location, fail.
if (!Loc.Ptr)
@@ -1371,7 +1371,7 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
Instruction *DepWrite = InstDep.getInst();
if (!hasAnalyzableMemoryWrite(DepWrite, *TLI))
break;
- MemoryLocation DepLoc = getLocForWrite(DepWrite, *TLI);
+ MemoryLocation DepLoc = getLocForWrite(DepWrite, *TLI);
// If we didn't get a useful location, or if it isn't a size, bail out.
if (!DepLoc.Ptr)
break;
@@ -1391,7 +1391,7 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
// to it is dead along the unwind edge. Otherwise, we need to preserve
// the store.
if (LastThrowing && DepWrite->comesBefore(LastThrowing)) {
- const Value *Underlying = getUnderlyingObject(DepLoc.Ptr);
+ const Value *Underlying = getUnderlyingObject(DepLoc.Ptr);
bool IsStoreDeadOnUnwind = isa<AllocaInst>(Underlying);
if (!IsStoreDeadOnUnwind) {
// We're looking for a call to an allocation function
@@ -1413,13 +1413,13 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
if (isRemovable(DepWrite) &&
!isPossibleSelfRead(Inst, Loc, DepWrite, *TLI, *AA)) {
int64_t InstWriteOffset, DepWriteOffset;
- OverwriteResult OR = isOverwrite(Inst, DepWrite, Loc, DepLoc, DL, *TLI,
- DepWriteOffset, InstWriteOffset, *AA,
+ OverwriteResult OR = isOverwrite(Inst, DepWrite, Loc, DepLoc, DL, *TLI,
+ DepWriteOffset, InstWriteOffset, *AA,
BB.getParent());
- if (OR == OW_MaybePartial)
- OR = isPartialOverwrite(Loc, DepLoc, DepWriteOffset, InstWriteOffset,
- DepWrite, IOL);
-
+ if (OR == OW_MaybePartial)
+ OR = isPartialOverwrite(Loc, DepLoc, DepWriteOffset, InstWriteOffset,
+ DepWrite, IOL);
+
if (OR == OW_Complete) {
LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DepWrite
<< "\n KILLER: " << *Inst << '\n');
@@ -1440,8 +1440,8 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
"when partial-overwrite "
"tracking is enabled");
// The overwrite result is known, so these must be known, too.
- uint64_t EarlierSize = DepLoc.Size.getValue();
- uint64_t LaterSize = Loc.Size.getValue();
+ uint64_t EarlierSize = DepLoc.Size.getValue();
+ uint64_t LaterSize = Loc.Size.getValue();
bool IsOverwriteEnd = (OR == OW_End);
MadeChange |= tryToShorten(DepWrite, DepWriteOffset, EarlierSize,
InstWriteOffset, LaterSize, IsOverwriteEnd);
@@ -1450,7 +1450,7 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
auto *Earlier = dyn_cast<StoreInst>(DepWrite);
auto *Later = dyn_cast<StoreInst>(Inst);
if (Constant *C = tryToMergePartialOverlappingStores(
- Earlier, Later, InstWriteOffset, DepWriteOffset, DL, *AA,
+ Earlier, Later, InstWriteOffset, DepWriteOffset, DL, *AA,
DT)) {
auto *SI = new StoreInst(
C, Earlier->getPointerOperand(), false, Earlier->getAlign(),
@@ -1497,7 +1497,7 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
}
if (EnablePartialOverwriteTracking)
- MadeChange |= removePartiallyOverlappedStores(DL, IOL, *TLI);
+ MadeChange |= removePartiallyOverlappedStores(DL, IOL, *TLI);
// If this block ends in a return, unwind, or unreachable, all allocas are
// dead at its end, which means stores to them are also dead.
@@ -1531,21 +1531,21 @@ namespace {
// in between both MemoryDefs. A bit more concretely:
//
// For all MemoryDefs StartDef:
-// 1. Get the next dominating clobbering MemoryDef (EarlierAccess) by walking
+// 1. Get the next dominating clobbering MemoryDef (EarlierAccess) by walking
// upwards.
-// 2. Check that there are no reads between EarlierAccess and the StartDef by
-// checking all uses starting at EarlierAccess and walking until we see
-// StartDef.
-// 3. For each found CurrentDef, check that:
-// 1. There are no barrier instructions between CurrentDef and StartDef (like
+// 2. Check that there are no reads between EarlierAccess and the StartDef by
+// checking all uses starting at EarlierAccess and walking until we see
+// StartDef.
+// 3. For each found CurrentDef, check that:
+// 1. There are no barrier instructions between CurrentDef and StartDef (like
// throws or stores with ordering constraints).
-// 2. StartDef is executed whenever CurrentDef is executed.
-// 3. StartDef completely overwrites CurrentDef.
-// 4. Erase CurrentDef from the function and MemorySSA.
+// 2. StartDef is executed whenever CurrentDef is executed.
+// 3. StartDef completely overwrites CurrentDef.
+// 4. Erase CurrentDef from the function and MemorySSA.
-// Returns true if \p I is an intrisnic that does not read or write memory.
-bool isNoopIntrinsic(Instruction *I) {
- if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+// Returns true if \p I is an intrisnic that does not read or write memory.
+bool isNoopIntrinsic(Instruction *I) {
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
switch (II->getIntrinsicID()) {
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end:
@@ -1588,7 +1588,7 @@ bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
return true;
// Skip intrinsics that do not really read or modify memory.
- if (isNoopIntrinsic(D->getMemoryInst()))
+ if (isNoopIntrinsic(D->getMemoryInst()))
return true;
return false;
@@ -1597,21 +1597,21 @@ bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
struct DSEState {
Function &F;
AliasAnalysis &AA;
-
- /// The single BatchAA instance that is used to cache AA queries. It will
- /// not be invalidated over the whole run. This is safe, because:
- /// 1. Only memory writes are removed, so the alias cache for memory
- /// locations remains valid.
- /// 2. No new instructions are added (only instructions removed), so cached
- /// information for a deleted value cannot be accessed by a re-used new
- /// value pointer.
- BatchAAResults BatchAA;
-
+
+ /// The single BatchAA instance that is used to cache AA queries. It will
+ /// not be invalidated over the whole run. This is safe, because:
+ /// 1. Only memory writes are removed, so the alias cache for memory
+ /// locations remains valid.
+ /// 2. No new instructions are added (only instructions removed), so cached
+ /// information for a deleted value cannot be accessed by a re-used new
+ /// value pointer.
+ BatchAAResults BatchAA;
+
MemorySSA &MSSA;
DominatorTree &DT;
PostDominatorTree &PDT;
const TargetLibraryInfo &TLI;
- const DataLayout &DL;
+ const DataLayout &DL;
// All MemoryDefs that potentially could kill other MemDefs.
SmallVector<MemoryDef *, 64> MemDefs;
@@ -1619,11 +1619,11 @@ struct DSEState {
SmallPtrSet<MemoryAccess *, 4> SkipStores;
// Keep track of all of the objects that are invisible to the caller before
// the function returns.
- // SmallPtrSet<const Value *, 16> InvisibleToCallerBeforeRet;
- DenseMap<const Value *, bool> InvisibleToCallerBeforeRet;
+ // SmallPtrSet<const Value *, 16> InvisibleToCallerBeforeRet;
+ DenseMap<const Value *, bool> InvisibleToCallerBeforeRet;
// Keep track of all of the objects that are invisible to the caller after
// the function returns.
- DenseMap<const Value *, bool> InvisibleToCallerAfterRet;
+ DenseMap<const Value *, bool> InvisibleToCallerAfterRet;
// Keep track of blocks with throwing instructions not modeled in MemorySSA.
SmallPtrSet<BasicBlock *, 16> ThrowingBlocks;
// Post-order numbers for each basic block. Used to figure out if memory
@@ -1636,8 +1636,8 @@ struct DSEState {
DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
PostDominatorTree &PDT, const TargetLibraryInfo &TLI)
- : F(F), AA(AA), BatchAA(AA), MSSA(MSSA), DT(DT), PDT(PDT), TLI(TLI),
- DL(F.getParent()->getDataLayout()) {}
+ : F(F), AA(AA), BatchAA(AA), MSSA(MSSA), DT(DT), PDT(PDT), TLI(TLI),
+ DL(F.getParent()->getDataLayout()) {}
static DSEState get(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
DominatorTree &DT, PostDominatorTree &PDT,
@@ -1663,48 +1663,48 @@ struct DSEState {
// Treat byval or inalloca arguments the same as Allocas, stores to them are
// dead at the end of the function.
for (Argument &AI : F.args())
- if (AI.hasPassPointeeByValueCopyAttr()) {
+ if (AI.hasPassPointeeByValueCopyAttr()) {
// For byval, the caller doesn't know the address of the allocation.
if (AI.hasByValAttr())
- State.InvisibleToCallerBeforeRet.insert({&AI, true});
- State.InvisibleToCallerAfterRet.insert({&AI, true});
+ State.InvisibleToCallerBeforeRet.insert({&AI, true});
+ State.InvisibleToCallerAfterRet.insert({&AI, true});
}
return State;
}
- bool isInvisibleToCallerAfterRet(const Value *V) {
- if (isa<AllocaInst>(V))
- return true;
- auto I = InvisibleToCallerAfterRet.insert({V, false});
- if (I.second) {
- if (!isInvisibleToCallerBeforeRet(V)) {
- I.first->second = false;
- } else {
- auto *Inst = dyn_cast<Instruction>(V);
- if (Inst && isAllocLikeFn(Inst, &TLI))
- I.first->second = !PointerMayBeCaptured(V, true, false);
- }
- }
- return I.first->second;
- }
-
- bool isInvisibleToCallerBeforeRet(const Value *V) {
- if (isa<AllocaInst>(V))
- return true;
- auto I = InvisibleToCallerBeforeRet.insert({V, false});
- if (I.second) {
- auto *Inst = dyn_cast<Instruction>(V);
- if (Inst && isAllocLikeFn(Inst, &TLI))
- // NOTE: This could be made more precise by PointerMayBeCapturedBefore
- // with the killing MemoryDef. But we refrain from doing so for now to
- // limit compile-time and this does not cause any changes to the number
- // of stores removed on a large test set in practice.
- I.first->second = !PointerMayBeCaptured(V, false, true);
- }
- return I.first->second;
- }
-
+ bool isInvisibleToCallerAfterRet(const Value *V) {
+ if (isa<AllocaInst>(V))
+ return true;
+ auto I = InvisibleToCallerAfterRet.insert({V, false});
+ if (I.second) {
+ if (!isInvisibleToCallerBeforeRet(V)) {
+ I.first->second = false;
+ } else {
+ auto *Inst = dyn_cast<Instruction>(V);
+ if (Inst && isAllocLikeFn(Inst, &TLI))
+ I.first->second = !PointerMayBeCaptured(V, true, false);
+ }
+ }
+ return I.first->second;
+ }
+
+ bool isInvisibleToCallerBeforeRet(const Value *V) {
+ if (isa<AllocaInst>(V))
+ return true;
+ auto I = InvisibleToCallerBeforeRet.insert({V, false});
+ if (I.second) {
+ auto *Inst = dyn_cast<Instruction>(V);
+ if (Inst && isAllocLikeFn(Inst, &TLI))
+ // NOTE: This could be made more precise by PointerMayBeCapturedBefore
+ // with the killing MemoryDef. But we refrain from doing so for now to
+ // limit compile-time and this does not cause any changes to the number
+ // of stores removed on a large test set in practice.
+ I.first->second = !PointerMayBeCaptured(V, false, true);
+ }
+ return I.first->second;
+ }
+
Optional<MemoryLocation> getLocForWriteEx(Instruction *I) const {
if (!I->mayWriteToMemory())
return None;
@@ -1713,11 +1713,11 @@ struct DSEState {
return {MemoryLocation::getForDest(MTI)};
if (auto *CB = dyn_cast<CallBase>(I)) {
- // If the functions may write to memory we do not know about, bail out.
- if (!CB->onlyAccessesArgMemory() &&
- !CB->onlyAccessesInaccessibleMemOrArgMem())
- return None;
-
+ // If the functions may write to memory we do not know about, bail out.
+ if (!CB->onlyAccessesArgMemory() &&
+ !CB->onlyAccessesInaccessibleMemOrArgMem())
+ return None;
+
LibFunc LF;
if (TLI.getLibFunc(*CB, LF) && TLI.has(LF)) {
switch (LF) {
@@ -1725,29 +1725,29 @@ struct DSEState {
case LibFunc_strncpy:
case LibFunc_strcat:
case LibFunc_strncat:
- return {MemoryLocation::getAfter(CB->getArgOperand(0))};
+ return {MemoryLocation::getAfter(CB->getArgOperand(0))};
default:
break;
}
}
- switch (CB->getIntrinsicID()) {
- case Intrinsic::init_trampoline:
- return {MemoryLocation::getAfter(CB->getArgOperand(0))};
- case Intrinsic::masked_store:
- return {MemoryLocation::getForArgument(CB, 1, TLI)};
- default:
- break;
- }
+ switch (CB->getIntrinsicID()) {
+ case Intrinsic::init_trampoline:
+ return {MemoryLocation::getAfter(CB->getArgOperand(0))};
+ case Intrinsic::masked_store:
+ return {MemoryLocation::getForArgument(CB, 1, TLI)};
+ default:
+ break;
+ }
return None;
}
return MemoryLocation::getOrNone(I);
}
- /// Returns true if \p UseInst completely overwrites \p DefLoc
- /// (stored by \p DefInst).
- bool isCompleteOverwrite(const MemoryLocation &DefLoc, Instruction *DefInst,
- Instruction *UseInst) {
+ /// Returns true if \p UseInst completely overwrites \p DefLoc
+ /// (stored by \p DefInst).
+ bool isCompleteOverwrite(const MemoryLocation &DefLoc, Instruction *DefInst,
+ Instruction *UseInst) {
// UseInst has a MemoryDef associated in MemorySSA. It's possible for a
// MemoryDef to not write to memory, e.g. a volatile load is modeled as a
// MemoryDef.
@@ -1759,10 +1759,10 @@ struct DSEState {
return false;
int64_t InstWriteOffset, DepWriteOffset;
- if (auto CC = getLocForWriteEx(UseInst))
- return isOverwrite(UseInst, DefInst, *CC, DefLoc, DL, TLI, DepWriteOffset,
- InstWriteOffset, BatchAA, &F) == OW_Complete;
- return false;
+ if (auto CC = getLocForWriteEx(UseInst))
+ return isOverwrite(UseInst, DefInst, *CC, DefLoc, DL, TLI, DepWriteOffset,
+ InstWriteOffset, BatchAA, &F) == OW_Complete;
+ return false;
}
/// Returns true if \p Def is not read before returning from the function.
@@ -1793,12 +1793,12 @@ struct DSEState {
}
MemoryAccess *UseAccess = WorkList[I];
- // Simply adding the users of MemoryPhi to the worklist is not enough,
- // because we might miss read clobbers in different iterations of a loop,
- // for example.
- // TODO: Add support for phi translation to handle the loop case.
- if (isa<MemoryPhi>(UseAccess))
- return false;
+ // Simply adding the users of MemoryPhi to the worklist is not enough,
+ // because we might miss read clobbers in different iterations of a loop,
+ // for example.
+ // TODO: Add support for phi translation to handle the loop case.
+ if (isa<MemoryPhi>(UseAccess))
+ return false;
// TODO: Checking for aliasing is expensive. Consider reducing the amount
// of times this is called and/or caching it.
@@ -1827,8 +1827,8 @@ struct DSEState {
if (auto *CB = dyn_cast<CallBase>(I)) {
if (isFreeCall(I, &TLI))
- return {std::make_pair(MemoryLocation::getAfter(CB->getArgOperand(0)),
- true)};
+ return {std::make_pair(MemoryLocation::getAfter(CB->getArgOperand(0)),
+ true)};
}
return None;
@@ -1842,10 +1842,10 @@ struct DSEState {
isFreeCall(I, &TLI);
}
- /// Returns true if \p MaybeTerm is a memory terminator for \p Loc from
- /// instruction \p AccessI.
- bool isMemTerminator(const MemoryLocation &Loc, Instruction *AccessI,
- Instruction *MaybeTerm) {
+ /// Returns true if \p MaybeTerm is a memory terminator for \p Loc from
+ /// instruction \p AccessI.
+ bool isMemTerminator(const MemoryLocation &Loc, Instruction *AccessI,
+ Instruction *MaybeTerm) {
Optional<std::pair<MemoryLocation, bool>> MaybeTermLoc =
getLocForTerminator(MaybeTerm);
@@ -1854,31 +1854,31 @@ struct DSEState {
// If the terminator is a free-like call, all accesses to the underlying
// object can be considered terminated.
- if (getUnderlyingObject(Loc.Ptr) !=
- getUnderlyingObject(MaybeTermLoc->first.Ptr))
- return false;
-
- auto TermLoc = MaybeTermLoc->first;
+ if (getUnderlyingObject(Loc.Ptr) !=
+ getUnderlyingObject(MaybeTermLoc->first.Ptr))
+ return false;
+
+ auto TermLoc = MaybeTermLoc->first;
if (MaybeTermLoc->second) {
- const Value *LocUO = getUnderlyingObject(Loc.Ptr);
- return BatchAA.isMustAlias(TermLoc.Ptr, LocUO);
+ const Value *LocUO = getUnderlyingObject(Loc.Ptr);
+ return BatchAA.isMustAlias(TermLoc.Ptr, LocUO);
}
- int64_t InstWriteOffset, DepWriteOffset;
- return isOverwrite(MaybeTerm, AccessI, TermLoc, Loc, DL, TLI,
- DepWriteOffset, InstWriteOffset, BatchAA,
- &F) == OW_Complete;
+ int64_t InstWriteOffset, DepWriteOffset;
+ return isOverwrite(MaybeTerm, AccessI, TermLoc, Loc, DL, TLI,
+ DepWriteOffset, InstWriteOffset, BatchAA,
+ &F) == OW_Complete;
}
// Returns true if \p Use may read from \p DefLoc.
- bool isReadClobber(const MemoryLocation &DefLoc, Instruction *UseInst) {
- if (isNoopIntrinsic(UseInst))
- return false;
-
- // Monotonic or weaker atomic stores can be re-ordered and do not need to be
- // treated as read clobber.
- if (auto SI = dyn_cast<StoreInst>(UseInst))
- return isStrongerThan(SI->getOrdering(), AtomicOrdering::Monotonic);
-
+ bool isReadClobber(const MemoryLocation &DefLoc, Instruction *UseInst) {
+ if (isNoopIntrinsic(UseInst))
+ return false;
+
+ // Monotonic or weaker atomic stores can be re-ordered and do not need to be
+ // treated as read clobber.
+ if (auto SI = dyn_cast<StoreInst>(UseInst))
+ return isStrongerThan(SI->getOrdering(), AtomicOrdering::Monotonic);
+
if (!UseInst->mayReadFromMemory())
return false;
@@ -1886,246 +1886,246 @@ struct DSEState {
if (CB->onlyAccessesInaccessibleMemory())
return false;
- // NOTE: For calls, the number of stores removed could be slightly improved
- // by using AA.callCapturesBefore(UseInst, DefLoc, &DT), but that showed to
- // be expensive compared to the benefits in practice. For now, avoid more
- // expensive analysis to limit compile-time.
- return isRefSet(BatchAA.getModRefInfo(UseInst, DefLoc));
+ // NOTE: For calls, the number of stores removed could be slightly improved
+ // by using AA.callCapturesBefore(UseInst, DefLoc, &DT), but that showed to
+ // be expensive compared to the benefits in practice. For now, avoid more
+ // expensive analysis to limit compile-time.
+ return isRefSet(BatchAA.getModRefInfo(UseInst, DefLoc));
}
- /// Returns true if \p Ptr is guaranteed to be loop invariant for any possible
- /// loop. In particular, this guarantees that it only references a single
- /// MemoryLocation during execution of the containing function.
- bool IsGuaranteedLoopInvariant(Value *Ptr) {
- auto IsGuaranteedLoopInvariantBase = [this](Value *Ptr) {
- Ptr = Ptr->stripPointerCasts();
- if (auto *I = dyn_cast<Instruction>(Ptr)) {
- if (isa<AllocaInst>(Ptr))
- return true;
-
- if (isAllocLikeFn(I, &TLI))
- return true;
-
- return false;
- }
- return true;
- };
-
- Ptr = Ptr->stripPointerCasts();
- if (auto *GEP = dyn_cast<GEPOperator>(Ptr)) {
- return IsGuaranteedLoopInvariantBase(GEP->getPointerOperand()) &&
- GEP->hasAllConstantIndices();
- }
- return IsGuaranteedLoopInvariantBase(Ptr);
- }
-
- // Find a MemoryDef writing to \p DefLoc and dominating \p StartAccess, with
- // no read access between them or on any other path to a function exit block
- // if \p DefLoc is not accessible after the function returns. If there is no
- // such MemoryDef, return None. The returned value may not (completely)
- // overwrite \p DefLoc. Currently we bail out when we encounter an aliasing
- // MemoryUse (read).
+ /// Returns true if \p Ptr is guaranteed to be loop invariant for any possible
+ /// loop. In particular, this guarantees that it only references a single
+ /// MemoryLocation during execution of the containing function.
+ bool IsGuaranteedLoopInvariant(Value *Ptr) {
+ auto IsGuaranteedLoopInvariantBase = [this](Value *Ptr) {
+ Ptr = Ptr->stripPointerCasts();
+ if (auto *I = dyn_cast<Instruction>(Ptr)) {
+ if (isa<AllocaInst>(Ptr))
+ return true;
+
+ if (isAllocLikeFn(I, &TLI))
+ return true;
+
+ return false;
+ }
+ return true;
+ };
+
+ Ptr = Ptr->stripPointerCasts();
+ if (auto *GEP = dyn_cast<GEPOperator>(Ptr)) {
+ return IsGuaranteedLoopInvariantBase(GEP->getPointerOperand()) &&
+ GEP->hasAllConstantIndices();
+ }
+ return IsGuaranteedLoopInvariantBase(Ptr);
+ }
+
+ // Find a MemoryDef writing to \p DefLoc and dominating \p StartAccess, with
+ // no read access between them or on any other path to a function exit block
+ // if \p DefLoc is not accessible after the function returns. If there is no
+ // such MemoryDef, return None. The returned value may not (completely)
+ // overwrite \p DefLoc. Currently we bail out when we encounter an aliasing
+ // MemoryUse (read).
Optional<MemoryAccess *>
- getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *StartAccess,
- const MemoryLocation &DefLoc, const Value *DefUO,
- unsigned &ScanLimit, unsigned &WalkerStepLimit,
- bool IsMemTerm, unsigned &PartialLimit) {
- if (ScanLimit == 0 || WalkerStepLimit == 0) {
- LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
- return None;
- }
-
- MemoryAccess *Current = StartAccess;
- Instruction *KillingI = KillingDef->getMemoryInst();
+ getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *StartAccess,
+ const MemoryLocation &DefLoc, const Value *DefUO,
+ unsigned &ScanLimit, unsigned &WalkerStepLimit,
+ bool IsMemTerm, unsigned &PartialLimit) {
+ if (ScanLimit == 0 || WalkerStepLimit == 0) {
+ LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
+ return None;
+ }
+
+ MemoryAccess *Current = StartAccess;
+ Instruction *KillingI = KillingDef->getMemoryInst();
bool StepAgain;
- LLVM_DEBUG(dbgs() << " trying to get dominating access\n");
-
- // Find the next clobbering Mod access for DefLoc, starting at StartAccess.
- Optional<MemoryLocation> CurrentLoc;
+ LLVM_DEBUG(dbgs() << " trying to get dominating access\n");
+
+ // Find the next clobbering Mod access for DefLoc, starting at StartAccess.
+ Optional<MemoryLocation> CurrentLoc;
do {
StepAgain = false;
- LLVM_DEBUG({
- dbgs() << " visiting " << *Current;
- if (!MSSA.isLiveOnEntryDef(Current) && isa<MemoryUseOrDef>(Current))
- dbgs() << " (" << *cast<MemoryUseOrDef>(Current)->getMemoryInst()
- << ")";
- dbgs() << "\n";
- });
-
+ LLVM_DEBUG({
+ dbgs() << " visiting " << *Current;
+ if (!MSSA.isLiveOnEntryDef(Current) && isa<MemoryUseOrDef>(Current))
+ dbgs() << " (" << *cast<MemoryUseOrDef>(Current)->getMemoryInst()
+ << ")";
+ dbgs() << "\n";
+ });
+
// Reached TOP.
- if (MSSA.isLiveOnEntryDef(Current)) {
- LLVM_DEBUG(dbgs() << " ... found LiveOnEntryDef\n");
+ if (MSSA.isLiveOnEntryDef(Current)) {
+ LLVM_DEBUG(dbgs() << " ... found LiveOnEntryDef\n");
return None;
- }
-
- // Cost of a step. Accesses in the same block are more likely to be valid
- // candidates for elimination, hence consider them cheaper.
- unsigned StepCost = KillingDef->getBlock() == Current->getBlock()
- ? MemorySSASameBBStepCost
- : MemorySSAOtherBBStepCost;
- if (WalkerStepLimit <= StepCost) {
- LLVM_DEBUG(dbgs() << " ... hit walker step limit\n");
- return None;
- }
- WalkerStepLimit -= StepCost;
-
- // Return for MemoryPhis. They cannot be eliminated directly and the
- // caller is responsible for traversing them.
+ }
+
+ // Cost of a step. Accesses in the same block are more likely to be valid
+ // candidates for elimination, hence consider them cheaper.
+ unsigned StepCost = KillingDef->getBlock() == Current->getBlock()
+ ? MemorySSASameBBStepCost
+ : MemorySSAOtherBBStepCost;
+ if (WalkerStepLimit <= StepCost) {
+ LLVM_DEBUG(dbgs() << " ... hit walker step limit\n");
+ return None;
+ }
+ WalkerStepLimit -= StepCost;
+
+ // Return for MemoryPhis. They cannot be eliminated directly and the
+ // caller is responsible for traversing them.
if (isa<MemoryPhi>(Current)) {
- LLVM_DEBUG(dbgs() << " ... found MemoryPhi\n");
- return Current;
- }
-
- // Below, check if CurrentDef is a valid candidate to be eliminated by
- // KillingDef. If it is not, check the next candidate.
- MemoryDef *CurrentDef = cast<MemoryDef>(Current);
- Instruction *CurrentI = CurrentDef->getMemoryInst();
-
- if (canSkipDef(CurrentDef, !isInvisibleToCallerBeforeRet(DefUO))) {
- StepAgain = true;
- Current = CurrentDef->getDefiningAccess();
- continue;
- }
-
- // Before we try to remove anything, check for any extra throwing
- // instructions that block us from DSEing
- if (mayThrowBetween(KillingI, CurrentI, DefUO)) {
- LLVM_DEBUG(dbgs() << " ... skip, may throw!\n");
- return None;
- }
-
- // Check for anything that looks like it will be a barrier to further
- // removal
- if (isDSEBarrier(DefUO, CurrentI)) {
- LLVM_DEBUG(dbgs() << " ... skip, barrier\n");
- return None;
+ LLVM_DEBUG(dbgs() << " ... found MemoryPhi\n");
+ return Current;
}
-
- // If Current is known to be on path that reads DefLoc or is a read
- // clobber, bail out, as the path is not profitable. We skip this check
- // for intrinsic calls, because the code knows how to handle memcpy
- // intrinsics.
- if (!isa<IntrinsicInst>(CurrentI) && isReadClobber(DefLoc, CurrentI))
- return None;
-
- // Quick check if there are direct uses that are read-clobbers.
- if (any_of(Current->uses(), [this, &DefLoc, StartAccess](Use &U) {
- if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(U.getUser()))
- return !MSSA.dominates(StartAccess, UseOrDef) &&
- isReadClobber(DefLoc, UseOrDef->getMemoryInst());
- return false;
- })) {
- LLVM_DEBUG(dbgs() << " ... found a read clobber\n");
+
+ // Below, check if CurrentDef is a valid candidate to be eliminated by
+ // KillingDef. If it is not, check the next candidate.
+ MemoryDef *CurrentDef = cast<MemoryDef>(Current);
+ Instruction *CurrentI = CurrentDef->getMemoryInst();
+
+ if (canSkipDef(CurrentDef, !isInvisibleToCallerBeforeRet(DefUO))) {
+ StepAgain = true;
+ Current = CurrentDef->getDefiningAccess();
+ continue;
+ }
+
+ // Before we try to remove anything, check for any extra throwing
+ // instructions that block us from DSEing
+ if (mayThrowBetween(KillingI, CurrentI, DefUO)) {
+ LLVM_DEBUG(dbgs() << " ... skip, may throw!\n");
return None;
- }
-
- // If Current cannot be analyzed or is not removable, check the next
- // candidate.
- if (!hasAnalyzableMemoryWrite(CurrentI, TLI) || !isRemovable(CurrentI)) {
+ }
+
+ // Check for anything that looks like it will be a barrier to further
+ // removal
+ if (isDSEBarrier(DefUO, CurrentI)) {
+ LLVM_DEBUG(dbgs() << " ... skip, barrier\n");
+ return None;
+ }
+
+ // If Current is known to be on path that reads DefLoc or is a read
+ // clobber, bail out, as the path is not profitable. We skip this check
+ // for intrinsic calls, because the code knows how to handle memcpy
+ // intrinsics.
+ if (!isa<IntrinsicInst>(CurrentI) && isReadClobber(DefLoc, CurrentI))
+ return None;
+
+ // Quick check if there are direct uses that are read-clobbers.
+ if (any_of(Current->uses(), [this, &DefLoc, StartAccess](Use &U) {
+ if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(U.getUser()))
+ return !MSSA.dominates(StartAccess, UseOrDef) &&
+ isReadClobber(DefLoc, UseOrDef->getMemoryInst());
+ return false;
+ })) {
+ LLVM_DEBUG(dbgs() << " ... found a read clobber\n");
+ return None;
+ }
+
+ // If Current cannot be analyzed or is not removable, check the next
+ // candidate.
+ if (!hasAnalyzableMemoryWrite(CurrentI, TLI) || !isRemovable(CurrentI)) {
StepAgain = true;
- Current = CurrentDef->getDefiningAccess();
- continue;
+ Current = CurrentDef->getDefiningAccess();
+ continue;
}
- // If Current does not have an analyzable write location, skip it
- CurrentLoc = getLocForWriteEx(CurrentI);
- if (!CurrentLoc) {
- StepAgain = true;
- Current = CurrentDef->getDefiningAccess();
- continue;
- }
-
- // AliasAnalysis does not account for loops. Limit elimination to
- // candidates for which we can guarantee they always store to the same
- // memory location and not multiple locations in a loop.
- if (Current->getBlock() != KillingDef->getBlock() &&
- !IsGuaranteedLoopInvariant(const_cast<Value *>(CurrentLoc->Ptr))) {
- StepAgain = true;
- Current = CurrentDef->getDefiningAccess();
- WalkerStepLimit -= 1;
- continue;
- }
-
- if (IsMemTerm) {
- // If the killing def is a memory terminator (e.g. lifetime.end), check
- // the next candidate if the current Current does not write the same
- // underlying object as the terminator.
- if (!isMemTerminator(*CurrentLoc, CurrentI, KillingI)) {
- StepAgain = true;
- Current = CurrentDef->getDefiningAccess();
- }
- continue;
- } else {
- int64_t InstWriteOffset, DepWriteOffset;
- auto OR = isOverwrite(KillingI, CurrentI, DefLoc, *CurrentLoc, DL, TLI,
- DepWriteOffset, InstWriteOffset, BatchAA, &F);
- // If Current does not write to the same object as KillingDef, check
- // the next candidate.
- if (OR == OW_Unknown) {
- StepAgain = true;
- Current = CurrentDef->getDefiningAccess();
- } else if (OR == OW_MaybePartial) {
- // If KillingDef only partially overwrites Current, check the next
- // candidate if the partial step limit is exceeded. This aggressively
- // limits the number of candidates for partial store elimination,
- // which are less likely to be removable in the end.
- if (PartialLimit <= 1) {
- StepAgain = true;
- Current = CurrentDef->getDefiningAccess();
- WalkerStepLimit -= 1;
- continue;
- }
- PartialLimit -= 1;
- }
- }
+ // If Current does not have an analyzable write location, skip it
+ CurrentLoc = getLocForWriteEx(CurrentI);
+ if (!CurrentLoc) {
+ StepAgain = true;
+ Current = CurrentDef->getDefiningAccess();
+ continue;
+ }
+
+ // AliasAnalysis does not account for loops. Limit elimination to
+ // candidates for which we can guarantee they always store to the same
+ // memory location and not multiple locations in a loop.
+ if (Current->getBlock() != KillingDef->getBlock() &&
+ !IsGuaranteedLoopInvariant(const_cast<Value *>(CurrentLoc->Ptr))) {
+ StepAgain = true;
+ Current = CurrentDef->getDefiningAccess();
+ WalkerStepLimit -= 1;
+ continue;
+ }
+
+ if (IsMemTerm) {
+ // If the killing def is a memory terminator (e.g. lifetime.end), check
+ // the next candidate if the current Current does not write the same
+ // underlying object as the terminator.
+ if (!isMemTerminator(*CurrentLoc, CurrentI, KillingI)) {
+ StepAgain = true;
+ Current = CurrentDef->getDefiningAccess();
+ }
+ continue;
+ } else {
+ int64_t InstWriteOffset, DepWriteOffset;
+ auto OR = isOverwrite(KillingI, CurrentI, DefLoc, *CurrentLoc, DL, TLI,
+ DepWriteOffset, InstWriteOffset, BatchAA, &F);
+ // If Current does not write to the same object as KillingDef, check
+ // the next candidate.
+ if (OR == OW_Unknown) {
+ StepAgain = true;
+ Current = CurrentDef->getDefiningAccess();
+ } else if (OR == OW_MaybePartial) {
+ // If KillingDef only partially overwrites Current, check the next
+ // candidate if the partial step limit is exceeded. This aggressively
+ // limits the number of candidates for partial store elimination,
+ // which are less likely to be removable in the end.
+ if (PartialLimit <= 1) {
+ StepAgain = true;
+ Current = CurrentDef->getDefiningAccess();
+ WalkerStepLimit -= 1;
+ continue;
+ }
+ PartialLimit -= 1;
+ }
+ }
} while (StepAgain);
// Accesses to objects accessible after the function returns can only be
// eliminated if the access is killed along all paths to the exit. Collect
// the blocks with killing (=completely overwriting MemoryDefs) and check if
- // they cover all paths from EarlierAccess to any function exit.
- SmallPtrSet<Instruction *, 16> KillingDefs;
- KillingDefs.insert(KillingDef->getMemoryInst());
- MemoryAccess *EarlierAccess = Current;
- Instruction *EarlierMemInst =
- cast<MemoryDef>(EarlierAccess)->getMemoryInst();
- LLVM_DEBUG(dbgs() << " Checking for reads of " << *EarlierAccess << " ("
- << *EarlierMemInst << ")\n");
+ // they cover all paths from EarlierAccess to any function exit.
+ SmallPtrSet<Instruction *, 16> KillingDefs;
+ KillingDefs.insert(KillingDef->getMemoryInst());
+ MemoryAccess *EarlierAccess = Current;
+ Instruction *EarlierMemInst =
+ cast<MemoryDef>(EarlierAccess)->getMemoryInst();
+ LLVM_DEBUG(dbgs() << " Checking for reads of " << *EarlierAccess << " ("
+ << *EarlierMemInst << ")\n");
SmallSetVector<MemoryAccess *, 32> WorkList;
auto PushMemUses = [&WorkList](MemoryAccess *Acc) {
for (Use &U : Acc->uses())
WorkList.insert(cast<MemoryAccess>(U.getUser()));
};
- PushMemUses(EarlierAccess);
-
- // Optimistically collect all accesses for reads. If we do not find any
- // read clobbers, add them to the cache.
- SmallPtrSet<MemoryAccess *, 16> KnownNoReads;
- if (!EarlierMemInst->mayReadFromMemory())
- KnownNoReads.insert(EarlierAccess);
- // Check if EarlierDef may be read.
+ PushMemUses(EarlierAccess);
+
+ // Optimistically collect all accesses for reads. If we do not find any
+ // read clobbers, add them to the cache.
+ SmallPtrSet<MemoryAccess *, 16> KnownNoReads;
+ if (!EarlierMemInst->mayReadFromMemory())
+ KnownNoReads.insert(EarlierAccess);
+ // Check if EarlierDef may be read.
for (unsigned I = 0; I < WorkList.size(); I++) {
MemoryAccess *UseAccess = WorkList[I];
LLVM_DEBUG(dbgs() << " " << *UseAccess);
- // Bail out if the number of accesses to check exceeds the scan limit.
- if (ScanLimit < (WorkList.size() - I)) {
+ // Bail out if the number of accesses to check exceeds the scan limit.
+ if (ScanLimit < (WorkList.size() - I)) {
LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
return None;
}
- --ScanLimit;
- NumDomMemDefChecks++;
- KnownNoReads.insert(UseAccess);
+ --ScanLimit;
+ NumDomMemDefChecks++;
+ KnownNoReads.insert(UseAccess);
if (isa<MemoryPhi>(UseAccess)) {
- if (any_of(KillingDefs, [this, UseAccess](Instruction *KI) {
- return DT.properlyDominates(KI->getParent(),
- UseAccess->getBlock());
- })) {
- LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing block\n");
- continue;
- }
+ if (any_of(KillingDefs, [this, UseAccess](Instruction *KI) {
+ return DT.properlyDominates(KI->getParent(),
+ UseAccess->getBlock());
+ })) {
+ LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing block\n");
+ continue;
+ }
LLVM_DEBUG(dbgs() << "\n ... adding PHI uses\n");
PushMemUses(UseAccess);
continue;
@@ -2134,45 +2134,45 @@ struct DSEState {
Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();
LLVM_DEBUG(dbgs() << " (" << *UseInst << ")\n");
- if (any_of(KillingDefs, [this, UseInst](Instruction *KI) {
- return DT.dominates(KI, UseInst);
- })) {
- LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing def\n");
+ if (any_of(KillingDefs, [this, UseInst](Instruction *KI) {
+ return DT.dominates(KI, UseInst);
+ })) {
+ LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing def\n");
continue;
}
// A memory terminator kills all preceeding MemoryDefs and all succeeding
// MemoryAccesses. We do not have to check it's users.
- if (isMemTerminator(*CurrentLoc, EarlierMemInst, UseInst)) {
- LLVM_DEBUG(
- dbgs()
- << " ... skipping, memterminator invalidates following accesses\n");
+ if (isMemTerminator(*CurrentLoc, EarlierMemInst, UseInst)) {
+ LLVM_DEBUG(
+ dbgs()
+ << " ... skipping, memterminator invalidates following accesses\n");
continue;
- }
-
- if (isNoopIntrinsic(cast<MemoryUseOrDef>(UseAccess)->getMemoryInst())) {
- LLVM_DEBUG(dbgs() << " ... adding uses of intrinsic\n");
- PushMemUses(UseAccess);
- continue;
- }
-
- if (UseInst->mayThrow() && !isInvisibleToCallerBeforeRet(DefUO)) {
- LLVM_DEBUG(dbgs() << " ... found throwing instruction\n");
- return None;
- }
-
+ }
+
+ if (isNoopIntrinsic(cast<MemoryUseOrDef>(UseAccess)->getMemoryInst())) {
+ LLVM_DEBUG(dbgs() << " ... adding uses of intrinsic\n");
+ PushMemUses(UseAccess);
+ continue;
+ }
+
+ if (UseInst->mayThrow() && !isInvisibleToCallerBeforeRet(DefUO)) {
+ LLVM_DEBUG(dbgs() << " ... found throwing instruction\n");
+ return None;
+ }
+
// Uses which may read the original MemoryDef mean we cannot eliminate the
// original MD. Stop walk.
- if (isReadClobber(*CurrentLoc, UseInst)) {
+ if (isReadClobber(*CurrentLoc, UseInst)) {
LLVM_DEBUG(dbgs() << " ... found read clobber\n");
return None;
}
- // For the KillingDef and EarlierAccess we only have to check if it reads
- // the memory location.
+ // For the KillingDef and EarlierAccess we only have to check if it reads
+ // the memory location.
// TODO: It would probably be better to check for self-reads before
// calling the function.
- if (KillingDef == UseAccess || EarlierAccess == UseAccess) {
+ if (KillingDef == UseAccess || EarlierAccess == UseAccess) {
LLVM_DEBUG(dbgs() << " ... skipping killing def/dom access\n");
continue;
}
@@ -2181,23 +2181,23 @@ struct DSEState {
// the original location. Otherwise we have to check uses of *all*
// MemoryDefs we discover, including non-aliasing ones. Otherwise we might
// miss cases like the following
- // 1 = Def(LoE) ; <----- EarlierDef stores [0,1]
+ // 1 = Def(LoE) ; <----- EarlierDef stores [0,1]
// 2 = Def(1) ; (2, 1) = NoAlias, stores [2,3]
// Use(2) ; MayAlias 2 *and* 1, loads [0, 3].
// (The Use points to the *first* Def it may alias)
// 3 = Def(1) ; <---- Current (3, 2) = NoAlias, (3,1) = MayAlias,
// stores [0,1]
if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess)) {
- if (isCompleteOverwrite(*CurrentLoc, EarlierMemInst, UseInst)) {
- if (!isInvisibleToCallerAfterRet(DefUO) &&
- UseAccess != EarlierAccess) {
+ if (isCompleteOverwrite(*CurrentLoc, EarlierMemInst, UseInst)) {
+ if (!isInvisibleToCallerAfterRet(DefUO) &&
+ UseAccess != EarlierAccess) {
BasicBlock *MaybeKillingBlock = UseInst->getParent();
if (PostOrderNumbers.find(MaybeKillingBlock)->second <
- PostOrderNumbers.find(EarlierAccess->getBlock())->second) {
+ PostOrderNumbers.find(EarlierAccess->getBlock())->second) {
- LLVM_DEBUG(dbgs()
- << " ... found killing def " << *UseInst << "\n");
- KillingDefs.insert(UseInst);
+ LLVM_DEBUG(dbgs()
+ << " ... found killing def " << *UseInst << "\n");
+ KillingDefs.insert(UseInst);
}
}
} else
@@ -2206,15 +2206,15 @@ struct DSEState {
}
// For accesses to locations visible after the function returns, make sure
- // that the location is killed (=overwritten) along all paths from
- // EarlierAccess to the exit.
- if (!isInvisibleToCallerAfterRet(DefUO)) {
- SmallPtrSet<BasicBlock *, 16> KillingBlocks;
- for (Instruction *KD : KillingDefs)
- KillingBlocks.insert(KD->getParent());
+ // that the location is killed (=overwritten) along all paths from
+ // EarlierAccess to the exit.
+ if (!isInvisibleToCallerAfterRet(DefUO)) {
+ SmallPtrSet<BasicBlock *, 16> KillingBlocks;
+ for (Instruction *KD : KillingDefs)
+ KillingBlocks.insert(KD->getParent());
assert(!KillingBlocks.empty() &&
"Expected at least a single killing block");
-
+
// Find the common post-dominator of all killing blocks.
BasicBlock *CommonPred = *KillingBlocks.begin();
for (auto I = std::next(KillingBlocks.begin()), E = KillingBlocks.end();
@@ -2225,17 +2225,17 @@ struct DSEState {
}
// If CommonPred is in the set of killing blocks, just check if it
- // post-dominates EarlierAccess.
+ // post-dominates EarlierAccess.
if (KillingBlocks.count(CommonPred)) {
- if (PDT.dominates(CommonPred, EarlierAccess->getBlock()))
- return {EarlierAccess};
+ if (PDT.dominates(CommonPred, EarlierAccess->getBlock()))
+ return {EarlierAccess};
return None;
}
- // If the common post-dominator does not post-dominate EarlierAccess,
- // there is a path from EarlierAccess to an exit not going through a
- // killing block.
- if (PDT.dominates(CommonPred, EarlierAccess->getBlock())) {
+ // If the common post-dominator does not post-dominate EarlierAccess,
+ // there is a path from EarlierAccess to an exit not going through a
+ // killing block.
+ if (PDT.dominates(CommonPred, EarlierAccess->getBlock())) {
SetVector<BasicBlock *> WorkList;
// If CommonPred is null, there are multiple exits from the function.
@@ -2248,17 +2248,17 @@ struct DSEState {
NumCFGTries++;
// Check if all paths starting from an exit node go through one of the
- // killing blocks before reaching EarlierAccess.
+ // killing blocks before reaching EarlierAccess.
for (unsigned I = 0; I < WorkList.size(); I++) {
NumCFGChecks++;
BasicBlock *Current = WorkList[I];
if (KillingBlocks.count(Current))
continue;
- if (Current == EarlierAccess->getBlock())
+ if (Current == EarlierAccess->getBlock())
return None;
- // EarlierAccess is reachable from the entry, so we don't have to
- // explore unreachable blocks further.
+ // EarlierAccess is reachable from the entry, so we don't have to
+ // explore unreachable blocks further.
if (!DT.isReachableFromEntry(Current))
continue;
@@ -2269,14 +2269,14 @@ struct DSEState {
return None;
}
NumCFGSuccess++;
- return {EarlierAccess};
+ return {EarlierAccess};
}
return None;
}
- // No aliasing MemoryUses of EarlierAccess found, EarlierAccess is
- // potentially dead.
- return {EarlierAccess};
+ // No aliasing MemoryUses of EarlierAccess found, EarlierAccess is
+ // potentially dead.
+ return {EarlierAccess};
}
// Delete dead memory defs
@@ -2321,11 +2321,11 @@ struct DSEState {
// checks extra maythrows (those that aren't MemoryDef's). MemoryDef that may
// throw are handled during the walk from one def to the next.
bool mayThrowBetween(Instruction *SI, Instruction *NI,
- const Value *SILocUnd) {
+ const Value *SILocUnd) {
// First see if we can ignore it by using the fact that SI is an
// alloca/alloca like object that is not visible to the caller during
// execution of the function.
- if (SILocUnd && isInvisibleToCallerBeforeRet(SILocUnd))
+ if (SILocUnd && isInvisibleToCallerBeforeRet(SILocUnd))
return false;
if (SI->getParent() == NI->getParent())
@@ -2338,10 +2338,10 @@ struct DSEState {
// * A memory instruction that may throw and \p SI accesses a non-stack
// object.
// * Atomic stores stronger that monotonic.
- bool isDSEBarrier(const Value *SILocUnd, Instruction *NI) {
+ bool isDSEBarrier(const Value *SILocUnd, Instruction *NI) {
// If NI may throw it acts as a barrier, unless we are to an alloca/alloca
// like object that does not escape.
- if (NI->mayThrow() && !isInvisibleToCallerBeforeRet(SILocUnd))
+ if (NI->mayThrow() && !isInvisibleToCallerBeforeRet(SILocUnd))
return true;
// If NI is an atomic load/store stronger than monotonic, do not try to
@@ -2351,11 +2351,11 @@ struct DSEState {
return isStrongerThanMonotonic(LI->getOrdering());
if (auto *SI = dyn_cast<StoreInst>(NI))
return isStrongerThanMonotonic(SI->getOrdering());
- if (auto *ARMW = dyn_cast<AtomicRMWInst>(NI))
- return isStrongerThanMonotonic(ARMW->getOrdering());
- if (auto *CmpXchg = dyn_cast<AtomicCmpXchgInst>(NI))
- return isStrongerThanMonotonic(CmpXchg->getSuccessOrdering()) ||
- isStrongerThanMonotonic(CmpXchg->getFailureOrdering());
+ if (auto *ARMW = dyn_cast<AtomicRMWInst>(NI))
+ return isStrongerThanMonotonic(ARMW->getOrdering());
+ if (auto *CmpXchg = dyn_cast<AtomicCmpXchgInst>(NI))
+ return isStrongerThanMonotonic(CmpXchg->getSuccessOrdering()) ||
+ isStrongerThanMonotonic(CmpXchg->getFailureOrdering());
llvm_unreachable("other instructions should be skipped in MemorySSA");
}
return false;
@@ -2370,31 +2370,31 @@ struct DSEState {
<< "Trying to eliminate MemoryDefs at the end of the function\n");
for (int I = MemDefs.size() - 1; I >= 0; I--) {
MemoryDef *Def = MemDefs[I];
- if (SkipStores.contains(Def) || !isRemovable(Def->getMemoryInst()))
- continue;
-
- Instruction *DefI = Def->getMemoryInst();
- SmallVector<const Value *, 4> Pointers;
- auto DefLoc = getLocForWriteEx(DefI);
- if (!DefLoc)
- continue;
-
- // NOTE: Currently eliminating writes at the end of a function is limited
- // to MemoryDefs with a single underlying object, to save compile-time. In
- // practice it appears the case with multiple underlying objects is very
- // uncommon. If it turns out to be important, we can use
- // getUnderlyingObjects here instead.
- const Value *UO = getUnderlyingObject(DefLoc->Ptr);
- if (!UO || !isInvisibleToCallerAfterRet(UO))
+ if (SkipStores.contains(Def) || !isRemovable(Def->getMemoryInst()))
continue;
+ Instruction *DefI = Def->getMemoryInst();
+ SmallVector<const Value *, 4> Pointers;
+ auto DefLoc = getLocForWriteEx(DefI);
+ if (!DefLoc)
+ continue;
+
+ // NOTE: Currently eliminating writes at the end of a function is limited
+ // to MemoryDefs with a single underlying object, to save compile-time. In
+ // practice it appears the case with multiple underlying objects is very
+ // uncommon. If it turns out to be important, we can use
+ // getUnderlyingObjects here instead.
+ const Value *UO = getUnderlyingObject(DefLoc->Ptr);
+ if (!UO || !isInvisibleToCallerAfterRet(UO))
+ continue;
+
if (isWriteAtEndOfFunction(Def)) {
// See through pointer-to-pointer bitcasts
LLVM_DEBUG(dbgs() << " ... MemoryDef is not accessed until the end "
"of the function\n");
- deleteDeadInstruction(DefI);
- ++NumFastStores;
- MadeChange = true;
+ deleteDeadInstruction(DefI);
+ ++NumFastStores;
+ MadeChange = true;
}
}
return MadeChange;
@@ -2402,53 +2402,53 @@ struct DSEState {
/// \returns true if \p Def is a no-op store, either because it
/// directly stores back a loaded value or stores zero to a calloced object.
- bool storeIsNoop(MemoryDef *Def, const MemoryLocation &DefLoc,
- const Value *DefUO) {
+ bool storeIsNoop(MemoryDef *Def, const MemoryLocation &DefLoc,
+ const Value *DefUO) {
StoreInst *Store = dyn_cast<StoreInst>(Def->getMemoryInst());
if (!Store)
return false;
if (auto *LoadI = dyn_cast<LoadInst>(Store->getOperand(0))) {
if (LoadI->getPointerOperand() == Store->getOperand(1)) {
- // Get the defining access for the load.
+ // Get the defining access for the load.
auto *LoadAccess = MSSA.getMemoryAccess(LoadI)->getDefiningAccess();
- // Fast path: the defining accesses are the same.
- if (LoadAccess == Def->getDefiningAccess())
- return true;
-
- // Look through phi accesses. Recursively scan all phi accesses by
- // adding them to a worklist. Bail when we run into a memory def that
- // does not match LoadAccess.
- SetVector<MemoryAccess *> ToCheck;
- MemoryAccess *Current =
- MSSA.getWalker()->getClobberingMemoryAccess(Def);
- // We don't want to bail when we run into the store memory def. But,
- // the phi access may point to it. So, pretend like we've already
- // checked it.
- ToCheck.insert(Def);
- ToCheck.insert(Current);
- // Start at current (1) to simulate already having checked Def.
- for (unsigned I = 1; I < ToCheck.size(); ++I) {
- Current = ToCheck[I];
- if (auto PhiAccess = dyn_cast<MemoryPhi>(Current)) {
- // Check all the operands.
- for (auto &Use : PhiAccess->incoming_values())
- ToCheck.insert(cast<MemoryAccess>(&Use));
- continue;
- }
-
- // If we found a memory def, bail. This happens when we have an
- // unrelated write in between an otherwise noop store.
- assert(isa<MemoryDef>(Current) &&
- "Only MemoryDefs should reach here.");
- // TODO: Skip no alias MemoryDefs that have no aliasing reads.
- // We are searching for the definition of the store's destination.
- // So, if that is the same definition as the load, then this is a
- // noop. Otherwise, fail.
- if (LoadAccess != Current)
- return false;
- }
- return true;
+ // Fast path: the defining accesses are the same.
+ if (LoadAccess == Def->getDefiningAccess())
+ return true;
+
+ // Look through phi accesses. Recursively scan all phi accesses by
+ // adding them to a worklist. Bail when we run into a memory def that
+ // does not match LoadAccess.
+ SetVector<MemoryAccess *> ToCheck;
+ MemoryAccess *Current =
+ MSSA.getWalker()->getClobberingMemoryAccess(Def);
+ // We don't want to bail when we run into the store memory def. But,
+ // the phi access may point to it. So, pretend like we've already
+ // checked it.
+ ToCheck.insert(Def);
+ ToCheck.insert(Current);
+ // Start at current (1) to simulate already having checked Def.
+ for (unsigned I = 1; I < ToCheck.size(); ++I) {
+ Current = ToCheck[I];
+ if (auto PhiAccess = dyn_cast<MemoryPhi>(Current)) {
+ // Check all the operands.
+ for (auto &Use : PhiAccess->incoming_values())
+ ToCheck.insert(cast<MemoryAccess>(&Use));
+ continue;
+ }
+
+ // If we found a memory def, bail. This happens when we have an
+ // unrelated write in between an otherwise noop store.
+ assert(isa<MemoryDef>(Current) &&
+ "Only MemoryDefs should reach here.");
+ // TODO: Skip no alias MemoryDefs that have no aliasing reads.
+ // We are searching for the definition of the store's destination.
+ // So, if that is the same definition as the load, then this is a
+ // noop. Otherwise, fail.
+ if (LoadAccess != Current)
+ return false;
+ }
+ return true;
}
}
@@ -2482,7 +2482,7 @@ bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA,
continue;
Instruction *SI = KillingDef->getMemoryInst();
- Optional<MemoryLocation> MaybeSILoc;
+ Optional<MemoryLocation> MaybeSILoc;
if (State.isMemTerminatorInst(SI))
MaybeSILoc = State.getLocForTerminator(SI).map(
[](const std::pair<MemoryLocation, bool> &P) { return P.first; });
@@ -2496,23 +2496,23 @@ bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA,
}
MemoryLocation SILoc = *MaybeSILoc;
assert(SILoc.Ptr && "SILoc should not be null");
- const Value *SILocUnd = getUnderlyingObject(SILoc.Ptr);
+ const Value *SILocUnd = getUnderlyingObject(SILoc.Ptr);
MemoryAccess *Current = KillingDef;
LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by "
<< *KillingDef << " (" << *SI << ")\n");
- unsigned ScanLimit = MemorySSAScanLimit;
- unsigned WalkerStepLimit = MemorySSAUpwardsStepLimit;
- unsigned PartialLimit = MemorySSAPartialStoreLimit;
+ unsigned ScanLimit = MemorySSAScanLimit;
+ unsigned WalkerStepLimit = MemorySSAUpwardsStepLimit;
+ unsigned PartialLimit = MemorySSAPartialStoreLimit;
// Worklist of MemoryAccesses that may be killed by KillingDef.
SetVector<MemoryAccess *> ToCheck;
- if (SILocUnd)
- ToCheck.insert(KillingDef->getDefiningAccess());
-
- bool Shortend = false;
- bool IsMemTerm = State.isMemTerminatorInst(SI);
+ if (SILocUnd)
+ ToCheck.insert(KillingDef->getDefiningAccess());
+
+ bool Shortend = false;
+ bool IsMemTerm = State.isMemTerminatorInst(SI);
// Check if MemoryAccesses in the worklist are killed by KillingDef.
for (unsigned I = 0; I < ToCheck.size(); I++) {
Current = ToCheck[I];
@@ -2520,22 +2520,22 @@ bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA,
continue;
Optional<MemoryAccess *> Next = State.getDomMemoryDef(
- KillingDef, Current, SILoc, SILocUnd, ScanLimit, WalkerStepLimit,
- IsMemTerm, PartialLimit);
+ KillingDef, Current, SILoc, SILocUnd, ScanLimit, WalkerStepLimit,
+ IsMemTerm, PartialLimit);
if (!Next) {
LLVM_DEBUG(dbgs() << " finished walk\n");
continue;
}
- MemoryAccess *EarlierAccess = *Next;
- LLVM_DEBUG(dbgs() << " Checking if we can kill " << *EarlierAccess);
- if (isa<MemoryPhi>(EarlierAccess)) {
+ MemoryAccess *EarlierAccess = *Next;
+ LLVM_DEBUG(dbgs() << " Checking if we can kill " << *EarlierAccess);
+ if (isa<MemoryPhi>(EarlierAccess)) {
LLVM_DEBUG(dbgs() << "\n ... adding incoming values to worklist\n");
- for (Value *V : cast<MemoryPhi>(EarlierAccess)->incoming_values()) {
+ for (Value *V : cast<MemoryPhi>(EarlierAccess)->incoming_values()) {
MemoryAccess *IncomingAccess = cast<MemoryAccess>(V);
BasicBlock *IncomingBlock = IncomingAccess->getBlock();
- BasicBlock *PhiBlock = EarlierAccess->getBlock();
+ BasicBlock *PhiBlock = EarlierAccess->getBlock();
// We only consider incoming MemoryAccesses that come before the
// MemoryPhi. Otherwise we could discover candidates that do not
@@ -2546,20 +2546,20 @@ bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA,
}
continue;
}
- auto *NextDef = cast<MemoryDef>(EarlierAccess);
+ auto *NextDef = cast<MemoryDef>(EarlierAccess);
Instruction *NI = NextDef->getMemoryInst();
LLVM_DEBUG(dbgs() << " (" << *NI << ")\n");
ToCheck.insert(NextDef->getDefiningAccess());
- NumGetDomMemoryDefPassed++;
+ NumGetDomMemoryDefPassed++;
if (!DebugCounter::shouldExecute(MemorySSACounter))
continue;
MemoryLocation NILoc = *State.getLocForWriteEx(NI);
- if (IsMemTerm) {
- const Value *NIUnd = getUnderlyingObject(NILoc.Ptr);
- if (SILocUnd != NIUnd)
+ if (IsMemTerm) {
+ const Value *NIUnd = getUnderlyingObject(NILoc.Ptr);
+ if (SILocUnd != NIUnd)
continue;
LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *NI
<< "\n KILLER: " << *SI << '\n');
@@ -2569,43 +2569,43 @@ bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA,
} else {
// Check if NI overwrites SI.
int64_t InstWriteOffset, DepWriteOffset;
- OverwriteResult OR =
- isOverwrite(SI, NI, SILoc, NILoc, State.DL, TLI, DepWriteOffset,
- InstWriteOffset, State.BatchAA, &F);
- if (OR == OW_MaybePartial) {
- auto Iter = State.IOLs.insert(
- std::make_pair<BasicBlock *, InstOverlapIntervalsTy>(
- NI->getParent(), InstOverlapIntervalsTy()));
- auto &IOL = Iter.first->second;
- OR = isPartialOverwrite(SILoc, NILoc, DepWriteOffset, InstWriteOffset,
- NI, IOL);
- }
+ OverwriteResult OR =
+ isOverwrite(SI, NI, SILoc, NILoc, State.DL, TLI, DepWriteOffset,
+ InstWriteOffset, State.BatchAA, &F);
+ if (OR == OW_MaybePartial) {
+ auto Iter = State.IOLs.insert(
+ std::make_pair<BasicBlock *, InstOverlapIntervalsTy>(
+ NI->getParent(), InstOverlapIntervalsTy()));
+ auto &IOL = Iter.first->second;
+ OR = isPartialOverwrite(SILoc, NILoc, DepWriteOffset, InstWriteOffset,
+ NI, IOL);
+ }
if (EnablePartialStoreMerging && OR == OW_PartialEarlierWithFullLater) {
auto *Earlier = dyn_cast<StoreInst>(NI);
auto *Later = dyn_cast<StoreInst>(SI);
- // We are re-using tryToMergePartialOverlappingStores, which requires
- // Earlier to domiante Later.
- // TODO: implement tryToMergeParialOverlappingStores using MemorySSA.
- if (Earlier && Later && DT.dominates(Earlier, Later)) {
- if (Constant *Merged = tryToMergePartialOverlappingStores(
- Earlier, Later, InstWriteOffset, DepWriteOffset, State.DL,
- State.BatchAA, &DT)) {
-
- // Update stored value of earlier store to merged constant.
- Earlier->setOperand(0, Merged);
- ++NumModifiedStores;
- MadeChange = true;
-
- Shortend = true;
- // Remove later store and remove any outstanding overlap intervals
- // for the updated store.
- State.deleteDeadInstruction(Later);
- auto I = State.IOLs.find(Earlier->getParent());
- if (I != State.IOLs.end())
- I->second.erase(Earlier);
- break;
- }
+ // We are re-using tryToMergePartialOverlappingStores, which requires
+ // Earlier to domiante Later.
+ // TODO: implement tryToMergeParialOverlappingStores using MemorySSA.
+ if (Earlier && Later && DT.dominates(Earlier, Later)) {
+ if (Constant *Merged = tryToMergePartialOverlappingStores(
+ Earlier, Later, InstWriteOffset, DepWriteOffset, State.DL,
+ State.BatchAA, &DT)) {
+
+ // Update stored value of earlier store to merged constant.
+ Earlier->setOperand(0, Merged);
+ ++NumModifiedStores;
+ MadeChange = true;
+
+ Shortend = true;
+ // Remove later store and remove any outstanding overlap intervals
+ // for the updated store.
+ State.deleteDeadInstruction(Later);
+ auto I = State.IOLs.find(Earlier->getParent());
+ if (I != State.IOLs.end())
+ I->second.erase(Earlier);
+ break;
+ }
}
}
@@ -2618,21 +2618,21 @@ bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA,
}
}
}
-
- // Check if the store is a no-op.
- if (!Shortend && isRemovable(SI) &&
- State.storeIsNoop(KillingDef, SILoc, SILocUnd)) {
- LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *SI << '\n');
- State.deleteDeadInstruction(SI);
- NumRedundantStores++;
- MadeChange = true;
- continue;
- }
+
+ // Check if the store is a no-op.
+ if (!Shortend && isRemovable(SI) &&
+ State.storeIsNoop(KillingDef, SILoc, SILocUnd)) {
+ LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *SI << '\n');
+ State.deleteDeadInstruction(SI);
+ NumRedundantStores++;
+ MadeChange = true;
+ continue;
+ }
}
if (EnablePartialOverwriteTracking)
for (auto &KV : State.IOLs)
- MadeChange |= removePartiallyOverlappedStores(State.DL, KV.second, TLI);
+ MadeChange |= removePartiallyOverlappedStores(State.DL, KV.second, TLI);
MadeChange |= State.eliminateDeadWritesAtEndOfFunction();
return MadeChange;
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/DivRemPairs.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/DivRemPairs.cpp
index 3c6c444d66..10cf0580f8 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/DivRemPairs.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/DivRemPairs.cpp
@@ -151,8 +151,8 @@ static DivRemWorklistTy getWorklist(Function &F) {
// rare than division.
for (auto &RemPair : RemMap) {
// Find the matching division instruction from the division map.
- auto It = DivMap.find(RemPair.first);
- if (It == DivMap.end())
+ auto It = DivMap.find(RemPair.first);
+ if (It == DivMap.end())
continue;
// We have a matching pair of div/rem instructions.
@@ -160,7 +160,7 @@ static DivRemWorklistTy getWorklist(Function &F) {
Instruction *RemInst = RemPair.second;
// Place it in the worklist.
- Worklist.emplace_back(It->second, RemInst);
+ Worklist.emplace_back(It->second, RemInst);
}
return Worklist;
@@ -315,14 +315,14 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
// %rem = sub %x, %mul // %rem = undef - undef = undef
// If X is not frozen, %rem becomes undef after transformation.
// TODO: We need a undef-specific checking function in ValueTracking
- if (!isGuaranteedNotToBeUndefOrPoison(X, nullptr, DivInst, &DT)) {
+ if (!isGuaranteedNotToBeUndefOrPoison(X, nullptr, DivInst, &DT)) {
auto *FrX = new FreezeInst(X, X->getName() + ".frozen", DivInst);
DivInst->setOperand(0, FrX);
Sub->setOperand(0, FrX);
}
// Same for Y. If X = 1 and Y = (undef | 1), %rem in src is either 1 or 0,
// but %rem in tgt can be one of many integer values.
- if (!isGuaranteedNotToBeUndefOrPoison(Y, nullptr, DivInst, &DT)) {
+ if (!isGuaranteedNotToBeUndefOrPoison(Y, nullptr, DivInst, &DT)) {
auto *FrY = new FreezeInst(Y, Y->getName() + ".frozen", DivInst);
DivInst->setOperand(1, FrY);
Mul->setOperand(1, FrY);
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/EarlyCSE.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/EarlyCSE.cpp
index 180a82917f..dc144ff173 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -154,7 +154,7 @@ static bool matchSelectWithOptionalNotCond(Value *V, Value *&Cond, Value *&A,
std::swap(A, B);
}
- // Match canonical forms of min/max. We are not using ValueTracking's
+ // Match canonical forms of min/max. We are not using ValueTracking's
// more powerful matchSelectPattern() because it may rely on instruction flags
// such as "nsw". That would be incompatible with the current hashing
// mechanism that may remove flags to increase the likelihood of CSE.
@@ -176,11 +176,11 @@ static bool matchSelectWithOptionalNotCond(Value *V, Value *&Cond, Value *&A,
case CmpInst::ICMP_ULT: Flavor = SPF_UMIN; break;
case CmpInst::ICMP_SGT: Flavor = SPF_SMAX; break;
case CmpInst::ICMP_SLT: Flavor = SPF_SMIN; break;
- // Non-strict inequalities.
- case CmpInst::ICMP_ULE: Flavor = SPF_UMIN; break;
- case CmpInst::ICMP_UGE: Flavor = SPF_UMAX; break;
- case CmpInst::ICMP_SLE: Flavor = SPF_SMIN; break;
- case CmpInst::ICMP_SGE: Flavor = SPF_SMAX; break;
+ // Non-strict inequalities.
+ case CmpInst::ICMP_ULE: Flavor = SPF_UMIN; break;
+ case CmpInst::ICMP_UGE: Flavor = SPF_UMAX; break;
+ case CmpInst::ICMP_SLE: Flavor = SPF_SMIN; break;
+ case CmpInst::ICMP_SGE: Flavor = SPF_SMAX; break;
default: break;
}
@@ -219,7 +219,7 @@ static unsigned getHashValueImpl(SimpleValue Val) {
SelectPatternFlavor SPF;
Value *Cond, *A, *B;
if (matchSelectWithOptionalNotCond(Inst, Cond, A, B, SPF)) {
- // Hash min/max (cmp + select) to allow for commuted operands.
+ // Hash min/max (cmp + select) to allow for commuted operands.
// Min/max may also have non-canonical compare predicate (eg, the compare for
// smin may use 'sgt' rather than 'slt'), and non-canonical operands in the
// compare.
@@ -269,17 +269,17 @@ static unsigned getHashValueImpl(SimpleValue Val) {
isa<FreezeInst>(Inst)) &&
"Invalid/unknown instruction");
- // Handle intrinsics with commutative operands.
- // TODO: Extend this to handle intrinsics with >2 operands where the 1st
- // 2 operands are commutative.
- auto *II = dyn_cast<IntrinsicInst>(Inst);
- if (II && II->isCommutative() && II->getNumArgOperands() == 2) {
- Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
- if (LHS > RHS)
- std::swap(LHS, RHS);
- return hash_combine(II->getOpcode(), LHS, RHS);
- }
-
+ // Handle intrinsics with commutative operands.
+ // TODO: Extend this to handle intrinsics with >2 operands where the 1st
+ // 2 operands are commutative.
+ auto *II = dyn_cast<IntrinsicInst>(Inst);
+ if (II && II->isCommutative() && II->getNumArgOperands() == 2) {
+ Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
+ if (LHS > RHS)
+ std::swap(LHS, RHS);
+ return hash_combine(II->getOpcode(), LHS, RHS);
+ }
+
// Mix in the opcode.
return hash_combine(
Inst->getOpcode(),
@@ -332,16 +332,16 @@ static bool isEqualImpl(SimpleValue LHS, SimpleValue RHS) {
LHSCmp->getSwappedPredicate() == RHSCmp->getPredicate();
}
- // TODO: Extend this for >2 args by matching the trailing N-2 args.
- auto *LII = dyn_cast<IntrinsicInst>(LHSI);
- auto *RII = dyn_cast<IntrinsicInst>(RHSI);
- if (LII && RII && LII->getIntrinsicID() == RII->getIntrinsicID() &&
- LII->isCommutative() && LII->getNumArgOperands() == 2) {
- return LII->getArgOperand(0) == RII->getArgOperand(1) &&
- LII->getArgOperand(1) == RII->getArgOperand(0);
- }
-
- // Min/max can occur with commuted operands, non-canonical predicates,
+ // TODO: Extend this for >2 args by matching the trailing N-2 args.
+ auto *LII = dyn_cast<IntrinsicInst>(LHSI);
+ auto *RII = dyn_cast<IntrinsicInst>(RHSI);
+ if (LII && RII && LII->getIntrinsicID() == RII->getIntrinsicID() &&
+ LII->isCommutative() && LII->getNumArgOperands() == 2) {
+ return LII->getArgOperand(0) == RII->getArgOperand(1) &&
+ LII->getArgOperand(1) == RII->getArgOperand(0);
+ }
+
+ // Min/max can occur with commuted operands, non-canonical predicates,
// and/or non-canonical operands.
// Selects can be non-trivially equivalent via inverted conditions and swaps.
SelectPatternFlavor LSPF, RSPF;
@@ -372,7 +372,7 @@ static bool isEqualImpl(SimpleValue LHS, SimpleValue RHS) {
// This intentionally does NOT handle patterns with a double-negation in
// the sense of not + not, because doing so could result in values
// comparing
- // as equal that hash differently in the min/max cases like:
+ // as equal that hash differently in the min/max cases like:
// select (cmp slt, X, Y), X, Y <--> select (not (not (cmp slt, X, Y))), X, Y
// ^ hashes as min ^ would not hash as min
// In the context of the EarlyCSE pass, however, such cases never reach
@@ -627,11 +627,11 @@ private:
StackNode &operator=(const StackNode &) = delete;
// Accessors.
- unsigned currentGeneration() const { return CurrentGeneration; }
- unsigned childGeneration() const { return ChildGeneration; }
+ unsigned currentGeneration() const { return CurrentGeneration; }
+ unsigned childGeneration() const { return ChildGeneration; }
void childGeneration(unsigned generation) { ChildGeneration = generation; }
DomTreeNode *node() { return Node; }
- DomTreeNode::const_iterator childIter() const { return ChildIter; }
+ DomTreeNode::const_iterator childIter() const { return ChildIter; }
DomTreeNode *nextChild() {
DomTreeNode *child = *ChildIter;
@@ -639,8 +639,8 @@ private:
return child;
}
- DomTreeNode::const_iterator end() const { return EndIter; }
- bool isProcessed() const { return Processed; }
+ DomTreeNode::const_iterator end() const { return EndIter; }
+ bool isProcessed() const { return Processed; }
void process() { Processed = true; }
private:
@@ -659,60 +659,60 @@ private:
public:
ParseMemoryInst(Instruction *Inst, const TargetTransformInfo &TTI)
: Inst(Inst) {
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
- IntrID = II->getIntrinsicID();
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
+ IntrID = II->getIntrinsicID();
if (TTI.getTgtMemIntrinsic(II, Info))
- return;
- if (isHandledNonTargetIntrinsic(IntrID)) {
- switch (IntrID) {
- case Intrinsic::masked_load:
- Info.PtrVal = Inst->getOperand(0);
- Info.MatchingId = Intrinsic::masked_load;
- Info.ReadMem = true;
- Info.WriteMem = false;
- Info.IsVolatile = false;
- break;
- case Intrinsic::masked_store:
- Info.PtrVal = Inst->getOperand(1);
- // Use the ID of masked load as the "matching id". This will
- // prevent matching non-masked loads/stores with masked ones
- // (which could be done), but at the moment, the code here
- // does not support matching intrinsics with non-intrinsics,
- // so keep the MatchingIds specific to masked instructions
- // for now (TODO).
- Info.MatchingId = Intrinsic::masked_load;
- Info.ReadMem = false;
- Info.WriteMem = true;
- Info.IsVolatile = false;
- break;
- }
- }
- }
+ return;
+ if (isHandledNonTargetIntrinsic(IntrID)) {
+ switch (IntrID) {
+ case Intrinsic::masked_load:
+ Info.PtrVal = Inst->getOperand(0);
+ Info.MatchingId = Intrinsic::masked_load;
+ Info.ReadMem = true;
+ Info.WriteMem = false;
+ Info.IsVolatile = false;
+ break;
+ case Intrinsic::masked_store:
+ Info.PtrVal = Inst->getOperand(1);
+ // Use the ID of masked load as the "matching id". This will
+ // prevent matching non-masked loads/stores with masked ones
+ // (which could be done), but at the moment, the code here
+ // does not support matching intrinsics with non-intrinsics,
+ // so keep the MatchingIds specific to masked instructions
+ // for now (TODO).
+ Info.MatchingId = Intrinsic::masked_load;
+ Info.ReadMem = false;
+ Info.WriteMem = true;
+ Info.IsVolatile = false;
+ break;
+ }
+ }
+ }
}
- Instruction *get() { return Inst; }
- const Instruction *get() const { return Inst; }
-
+ Instruction *get() { return Inst; }
+ const Instruction *get() const { return Inst; }
+
bool isLoad() const {
- if (IntrID != 0)
- return Info.ReadMem;
+ if (IntrID != 0)
+ return Info.ReadMem;
return isa<LoadInst>(Inst);
}
bool isStore() const {
- if (IntrID != 0)
- return Info.WriteMem;
+ if (IntrID != 0)
+ return Info.WriteMem;
return isa<StoreInst>(Inst);
}
bool isAtomic() const {
- if (IntrID != 0)
+ if (IntrID != 0)
return Info.Ordering != AtomicOrdering::NotAtomic;
return Inst->isAtomic();
}
bool isUnordered() const {
- if (IntrID != 0)
+ if (IntrID != 0)
return Info.isUnordered();
if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
@@ -725,7 +725,7 @@ private:
}
bool isVolatile() const {
- if (IntrID != 0)
+ if (IntrID != 0)
return Info.IsVolatile;
if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
@@ -750,85 +750,85 @@ private:
// field in the MemIntrinsicInfo structure. That field contains
// non-negative values only.
int getMatchingId() const {
- if (IntrID != 0)
- return Info.MatchingId;
+ if (IntrID != 0)
+ return Info.MatchingId;
return -1;
}
Value *getPointerOperand() const {
- if (IntrID != 0)
- return Info.PtrVal;
+ if (IntrID != 0)
+ return Info.PtrVal;
return getLoadStorePointerOperand(Inst);
}
bool mayReadFromMemory() const {
- if (IntrID != 0)
- return Info.ReadMem;
+ if (IntrID != 0)
+ return Info.ReadMem;
return Inst->mayReadFromMemory();
}
bool mayWriteToMemory() const {
- if (IntrID != 0)
- return Info.WriteMem;
+ if (IntrID != 0)
+ return Info.WriteMem;
return Inst->mayWriteToMemory();
}
private:
- Intrinsic::ID IntrID = 0;
+ Intrinsic::ID IntrID = 0;
MemIntrinsicInfo Info;
Instruction *Inst;
};
- // This function is to prevent accidentally passing a non-target
- // intrinsic ID to TargetTransformInfo.
- static bool isHandledNonTargetIntrinsic(Intrinsic::ID ID) {
- switch (ID) {
- case Intrinsic::masked_load:
- case Intrinsic::masked_store:
- return true;
- }
- return false;
- }
- static bool isHandledNonTargetIntrinsic(const Value *V) {
- if (auto *II = dyn_cast<IntrinsicInst>(V))
- return isHandledNonTargetIntrinsic(II->getIntrinsicID());
- return false;
- }
-
+ // This function is to prevent accidentally passing a non-target
+ // intrinsic ID to TargetTransformInfo.
+ static bool isHandledNonTargetIntrinsic(Intrinsic::ID ID) {
+ switch (ID) {
+ case Intrinsic::masked_load:
+ case Intrinsic::masked_store:
+ return true;
+ }
+ return false;
+ }
+ static bool isHandledNonTargetIntrinsic(const Value *V) {
+ if (auto *II = dyn_cast<IntrinsicInst>(V))
+ return isHandledNonTargetIntrinsic(II->getIntrinsicID());
+ return false;
+ }
+
bool processNode(DomTreeNode *Node);
bool handleBranchCondition(Instruction *CondInst, const BranchInst *BI,
const BasicBlock *BB, const BasicBlock *Pred);
- Value *getMatchingValue(LoadValue &InVal, ParseMemoryInst &MemInst,
- unsigned CurrentGeneration);
-
- bool overridingStores(const ParseMemoryInst &Earlier,
- const ParseMemoryInst &Later);
-
+ Value *getMatchingValue(LoadValue &InVal, ParseMemoryInst &MemInst,
+ unsigned CurrentGeneration);
+
+ bool overridingStores(const ParseMemoryInst &Earlier,
+ const ParseMemoryInst &Later);
+
Value *getOrCreateResult(Value *Inst, Type *ExpectedType) const {
if (auto *LI = dyn_cast<LoadInst>(Inst))
return LI;
if (auto *SI = dyn_cast<StoreInst>(Inst))
return SI->getValueOperand();
assert(isa<IntrinsicInst>(Inst) && "Instruction not supported");
- auto *II = cast<IntrinsicInst>(Inst);
- if (isHandledNonTargetIntrinsic(II->getIntrinsicID()))
- return getOrCreateResultNonTargetMemIntrinsic(II, ExpectedType);
- return TTI.getOrCreateResultFromMemIntrinsic(II, ExpectedType);
- }
-
- Value *getOrCreateResultNonTargetMemIntrinsic(IntrinsicInst *II,
- Type *ExpectedType) const {
- switch (II->getIntrinsicID()) {
- case Intrinsic::masked_load:
- return II;
- case Intrinsic::masked_store:
- return II->getOperand(0);
- }
- return nullptr;
+ auto *II = cast<IntrinsicInst>(Inst);
+ if (isHandledNonTargetIntrinsic(II->getIntrinsicID()))
+ return getOrCreateResultNonTargetMemIntrinsic(II, ExpectedType);
+ return TTI.getOrCreateResultFromMemIntrinsic(II, ExpectedType);
}
+ Value *getOrCreateResultNonTargetMemIntrinsic(IntrinsicInst *II,
+ Type *ExpectedType) const {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::masked_load:
+ return II;
+ case Intrinsic::masked_store:
+ return II->getOperand(0);
+ }
+ return nullptr;
+ }
+
/// Return true if the instruction is known to only operate on memory
/// provably invariant in the given "generation".
bool isOperatingOnInvariantMemAt(Instruction *I, unsigned GenAt);
@@ -836,101 +836,101 @@ private:
bool isSameMemGeneration(unsigned EarlierGeneration, unsigned LaterGeneration,
Instruction *EarlierInst, Instruction *LaterInst);
- bool isNonTargetIntrinsicMatch(const IntrinsicInst *Earlier,
- const IntrinsicInst *Later) {
- auto IsSubmask = [](const Value *Mask0, const Value *Mask1) {
- // Is Mask0 a submask of Mask1?
- if (Mask0 == Mask1)
- return true;
- if (isa<UndefValue>(Mask0) || isa<UndefValue>(Mask1))
- return false;
- auto *Vec0 = dyn_cast<ConstantVector>(Mask0);
- auto *Vec1 = dyn_cast<ConstantVector>(Mask1);
- if (!Vec0 || !Vec1)
- return false;
- assert(Vec0->getType() == Vec1->getType() &&
- "Masks should have the same type");
- for (int i = 0, e = Vec0->getNumOperands(); i != e; ++i) {
- Constant *Elem0 = Vec0->getOperand(i);
- Constant *Elem1 = Vec1->getOperand(i);
- auto *Int0 = dyn_cast<ConstantInt>(Elem0);
- if (Int0 && Int0->isZero())
- continue;
- auto *Int1 = dyn_cast<ConstantInt>(Elem1);
- if (Int1 && !Int1->isZero())
- continue;
- if (isa<UndefValue>(Elem0) || isa<UndefValue>(Elem1))
- return false;
- if (Elem0 == Elem1)
- continue;
- return false;
- }
- return true;
- };
- auto PtrOp = [](const IntrinsicInst *II) {
- if (II->getIntrinsicID() == Intrinsic::masked_load)
- return II->getOperand(0);
- if (II->getIntrinsicID() == Intrinsic::masked_store)
- return II->getOperand(1);
- llvm_unreachable("Unexpected IntrinsicInst");
- };
- auto MaskOp = [](const IntrinsicInst *II) {
- if (II->getIntrinsicID() == Intrinsic::masked_load)
- return II->getOperand(2);
- if (II->getIntrinsicID() == Intrinsic::masked_store)
- return II->getOperand(3);
- llvm_unreachable("Unexpected IntrinsicInst");
- };
- auto ThruOp = [](const IntrinsicInst *II) {
- if (II->getIntrinsicID() == Intrinsic::masked_load)
- return II->getOperand(3);
- llvm_unreachable("Unexpected IntrinsicInst");
- };
-
- if (PtrOp(Earlier) != PtrOp(Later))
- return false;
-
- Intrinsic::ID IDE = Earlier->getIntrinsicID();
- Intrinsic::ID IDL = Later->getIntrinsicID();
- // We could really use specific intrinsic classes for masked loads
- // and stores in IntrinsicInst.h.
- if (IDE == Intrinsic::masked_load && IDL == Intrinsic::masked_load) {
- // Trying to replace later masked load with the earlier one.
- // Check that the pointers are the same, and
- // - masks and pass-throughs are the same, or
- // - replacee's pass-through is "undef" and replacer's mask is a
- // super-set of the replacee's mask.
- if (MaskOp(Earlier) == MaskOp(Later) && ThruOp(Earlier) == ThruOp(Later))
- return true;
- if (!isa<UndefValue>(ThruOp(Later)))
- return false;
- return IsSubmask(MaskOp(Later), MaskOp(Earlier));
- }
- if (IDE == Intrinsic::masked_store && IDL == Intrinsic::masked_load) {
- // Trying to replace a load of a stored value with the store's value.
- // Check that the pointers are the same, and
- // - load's mask is a subset of store's mask, and
- // - load's pass-through is "undef".
- if (!IsSubmask(MaskOp(Later), MaskOp(Earlier)))
- return false;
- return isa<UndefValue>(ThruOp(Later));
- }
- if (IDE == Intrinsic::masked_load && IDL == Intrinsic::masked_store) {
- // Trying to remove a store of the loaded value.
- // Check that the pointers are the same, and
- // - store's mask is a subset of the load's mask.
- return IsSubmask(MaskOp(Later), MaskOp(Earlier));
- }
- if (IDE == Intrinsic::masked_store && IDL == Intrinsic::masked_store) {
- // Trying to remove a dead store (earlier).
- // Check that the pointers are the same,
- // - the to-be-removed store's mask is a subset of the other store's
- // mask.
- return IsSubmask(MaskOp(Earlier), MaskOp(Later));
- }
- return false;
- }
-
+ bool isNonTargetIntrinsicMatch(const IntrinsicInst *Earlier,
+ const IntrinsicInst *Later) {
+ auto IsSubmask = [](const Value *Mask0, const Value *Mask1) {
+ // Is Mask0 a submask of Mask1?
+ if (Mask0 == Mask1)
+ return true;
+ if (isa<UndefValue>(Mask0) || isa<UndefValue>(Mask1))
+ return false;
+ auto *Vec0 = dyn_cast<ConstantVector>(Mask0);
+ auto *Vec1 = dyn_cast<ConstantVector>(Mask1);
+ if (!Vec0 || !Vec1)
+ return false;
+ assert(Vec0->getType() == Vec1->getType() &&
+ "Masks should have the same type");
+ for (int i = 0, e = Vec0->getNumOperands(); i != e; ++i) {
+ Constant *Elem0 = Vec0->getOperand(i);
+ Constant *Elem1 = Vec1->getOperand(i);
+ auto *Int0 = dyn_cast<ConstantInt>(Elem0);
+ if (Int0 && Int0->isZero())
+ continue;
+ auto *Int1 = dyn_cast<ConstantInt>(Elem1);
+ if (Int1 && !Int1->isZero())
+ continue;
+ if (isa<UndefValue>(Elem0) || isa<UndefValue>(Elem1))
+ return false;
+ if (Elem0 == Elem1)
+ continue;
+ return false;
+ }
+ return true;
+ };
+ auto PtrOp = [](const IntrinsicInst *II) {
+ if (II->getIntrinsicID() == Intrinsic::masked_load)
+ return II->getOperand(0);
+ if (II->getIntrinsicID() == Intrinsic::masked_store)
+ return II->getOperand(1);
+ llvm_unreachable("Unexpected IntrinsicInst");
+ };
+ auto MaskOp = [](const IntrinsicInst *II) {
+ if (II->getIntrinsicID() == Intrinsic::masked_load)
+ return II->getOperand(2);
+ if (II->getIntrinsicID() == Intrinsic::masked_store)
+ return II->getOperand(3);
+ llvm_unreachable("Unexpected IntrinsicInst");
+ };
+ auto ThruOp = [](const IntrinsicInst *II) {
+ if (II->getIntrinsicID() == Intrinsic::masked_load)
+ return II->getOperand(3);
+ llvm_unreachable("Unexpected IntrinsicInst");
+ };
+
+ if (PtrOp(Earlier) != PtrOp(Later))
+ return false;
+
+ Intrinsic::ID IDE = Earlier->getIntrinsicID();
+ Intrinsic::ID IDL = Later->getIntrinsicID();
+ // We could really use specific intrinsic classes for masked loads
+ // and stores in IntrinsicInst.h.
+ if (IDE == Intrinsic::masked_load && IDL == Intrinsic::masked_load) {
+ // Trying to replace later masked load with the earlier one.
+ // Check that the pointers are the same, and
+ // - masks and pass-throughs are the same, or
+ // - replacee's pass-through is "undef" and replacer's mask is a
+ // super-set of the replacee's mask.
+ if (MaskOp(Earlier) == MaskOp(Later) && ThruOp(Earlier) == ThruOp(Later))
+ return true;
+ if (!isa<UndefValue>(ThruOp(Later)))
+ return false;
+ return IsSubmask(MaskOp(Later), MaskOp(Earlier));
+ }
+ if (IDE == Intrinsic::masked_store && IDL == Intrinsic::masked_load) {
+ // Trying to replace a load of a stored value with the store's value.
+ // Check that the pointers are the same, and
+ // - load's mask is a subset of store's mask, and
+ // - load's pass-through is "undef".
+ if (!IsSubmask(MaskOp(Later), MaskOp(Earlier)))
+ return false;
+ return isa<UndefValue>(ThruOp(Later));
+ }
+ if (IDE == Intrinsic::masked_load && IDL == Intrinsic::masked_store) {
+ // Trying to remove a store of the loaded value.
+ // Check that the pointers are the same, and
+ // - store's mask is a subset of the load's mask.
+ return IsSubmask(MaskOp(Later), MaskOp(Earlier));
+ }
+ if (IDE == Intrinsic::masked_store && IDL == Intrinsic::masked_store) {
+ // Trying to remove a dead store (earlier).
+ // Check that the pointers are the same,
+ // - the to-be-removed store's mask is a subset of the other store's
+ // mask.
+ return IsSubmask(MaskOp(Earlier), MaskOp(Later));
+ }
+ return false;
+ }
+
void removeMSSA(Instruction &Inst) {
if (!MSSA)
return;
@@ -1033,14 +1033,14 @@ bool EarlyCSE::handleBranchCondition(Instruction *CondInst,
auto *TorF = (BI->getSuccessor(0) == BB)
? ConstantInt::getTrue(BB->getContext())
: ConstantInt::getFalse(BB->getContext());
- auto MatchBinOp = [](Instruction *I, unsigned Opcode, Value *&LHS,
- Value *&RHS) {
- if (Opcode == Instruction::And &&
- match(I, m_LogicalAnd(m_Value(LHS), m_Value(RHS))))
- return true;
- else if (Opcode == Instruction::Or &&
- match(I, m_LogicalOr(m_Value(LHS), m_Value(RHS))))
- return true;
+ auto MatchBinOp = [](Instruction *I, unsigned Opcode, Value *&LHS,
+ Value *&RHS) {
+ if (Opcode == Instruction::And &&
+ match(I, m_LogicalAnd(m_Value(LHS), m_Value(RHS))))
+ return true;
+ else if (Opcode == Instruction::Or &&
+ match(I, m_LogicalOr(m_Value(LHS), m_Value(RHS))))
+ return true;
return false;
};
// If the condition is AND operation, we can propagate its operands into the
@@ -1071,9 +1071,9 @@ bool EarlyCSE::handleBranchCondition(Instruction *CondInst,
}
}
- Value *LHS, *RHS;
- if (MatchBinOp(Curr, PropagateOpcode, LHS, RHS))
- for (auto &Op : { LHS, RHS })
+ Value *LHS, *RHS;
+ if (MatchBinOp(Curr, PropagateOpcode, LHS, RHS))
+ for (auto &Op : { LHS, RHS })
if (Instruction *OPI = dyn_cast<Instruction>(Op))
if (SimpleValue::canHandle(OPI) && Visited.insert(OPI).second)
WorkList.push_back(OPI);
@@ -1082,86 +1082,86 @@ bool EarlyCSE::handleBranchCondition(Instruction *CondInst,
return MadeChanges;
}
-Value *EarlyCSE::getMatchingValue(LoadValue &InVal, ParseMemoryInst &MemInst,
- unsigned CurrentGeneration) {
- if (InVal.DefInst == nullptr)
- return nullptr;
- if (InVal.MatchingId != MemInst.getMatchingId())
- return nullptr;
- // We don't yet handle removing loads with ordering of any kind.
- if (MemInst.isVolatile() || !MemInst.isUnordered())
- return nullptr;
- // We can't replace an atomic load with one which isn't also atomic.
- if (MemInst.isLoad() && !InVal.IsAtomic && MemInst.isAtomic())
- return nullptr;
- // The value V returned from this function is used differently depending
- // on whether MemInst is a load or a store. If it's a load, we will replace
- // MemInst with V, if it's a store, we will check if V is the same as the
- // available value.
- bool MemInstMatching = !MemInst.isLoad();
- Instruction *Matching = MemInstMatching ? MemInst.get() : InVal.DefInst;
- Instruction *Other = MemInstMatching ? InVal.DefInst : MemInst.get();
-
- // For stores check the result values before checking memory generation
- // (otherwise isSameMemGeneration may crash).
- Value *Result = MemInst.isStore()
- ? getOrCreateResult(Matching, Other->getType())
- : nullptr;
- if (MemInst.isStore() && InVal.DefInst != Result)
- return nullptr;
-
- // Deal with non-target memory intrinsics.
- bool MatchingNTI = isHandledNonTargetIntrinsic(Matching);
- bool OtherNTI = isHandledNonTargetIntrinsic(Other);
- if (OtherNTI != MatchingNTI)
- return nullptr;
- if (OtherNTI && MatchingNTI) {
- if (!isNonTargetIntrinsicMatch(cast<IntrinsicInst>(InVal.DefInst),
- cast<IntrinsicInst>(MemInst.get())))
- return nullptr;
- }
-
- if (!isOperatingOnInvariantMemAt(MemInst.get(), InVal.Generation) &&
- !isSameMemGeneration(InVal.Generation, CurrentGeneration, InVal.DefInst,
- MemInst.get()))
- return nullptr;
-
- if (!Result)
- Result = getOrCreateResult(Matching, Other->getType());
- return Result;
-}
-
-bool EarlyCSE::overridingStores(const ParseMemoryInst &Earlier,
- const ParseMemoryInst &Later) {
- // Can we remove Earlier store because of Later store?
-
- assert(Earlier.isUnordered() && !Earlier.isVolatile() &&
- "Violated invariant");
- if (Earlier.getPointerOperand() != Later.getPointerOperand())
- return false;
- if (Earlier.getMatchingId() != Later.getMatchingId())
- return false;
- // At the moment, we don't remove ordered stores, but do remove
- // unordered atomic stores. There's no special requirement (for
- // unordered atomics) about removing atomic stores only in favor of
- // other atomic stores since we were going to execute the non-atomic
- // one anyway and the atomic one might never have become visible.
- if (!Earlier.isUnordered() || !Later.isUnordered())
- return false;
-
- // Deal with non-target memory intrinsics.
- bool ENTI = isHandledNonTargetIntrinsic(Earlier.get());
- bool LNTI = isHandledNonTargetIntrinsic(Later.get());
- if (ENTI && LNTI)
- return isNonTargetIntrinsicMatch(cast<IntrinsicInst>(Earlier.get()),
- cast<IntrinsicInst>(Later.get()));
-
- // Because of the check above, at least one of them is false.
- // For now disallow matching intrinsics with non-intrinsics,
- // so assume that the stores match if neither is an intrinsic.
- return ENTI == LNTI;
-}
-
+Value *EarlyCSE::getMatchingValue(LoadValue &InVal, ParseMemoryInst &MemInst,
+ unsigned CurrentGeneration) {
+ if (InVal.DefInst == nullptr)
+ return nullptr;
+ if (InVal.MatchingId != MemInst.getMatchingId())
+ return nullptr;
+ // We don't yet handle removing loads with ordering of any kind.
+ if (MemInst.isVolatile() || !MemInst.isUnordered())
+ return nullptr;
+ // We can't replace an atomic load with one which isn't also atomic.
+ if (MemInst.isLoad() && !InVal.IsAtomic && MemInst.isAtomic())
+ return nullptr;
+ // The value V returned from this function is used differently depending
+ // on whether MemInst is a load or a store. If it's a load, we will replace
+ // MemInst with V, if it's a store, we will check if V is the same as the
+ // available value.
+ bool MemInstMatching = !MemInst.isLoad();
+ Instruction *Matching = MemInstMatching ? MemInst.get() : InVal.DefInst;
+ Instruction *Other = MemInstMatching ? InVal.DefInst : MemInst.get();
+
+ // For stores check the result values before checking memory generation
+ // (otherwise isSameMemGeneration may crash).
+ Value *Result = MemInst.isStore()
+ ? getOrCreateResult(Matching, Other->getType())
+ : nullptr;
+ if (MemInst.isStore() && InVal.DefInst != Result)
+ return nullptr;
+
+ // Deal with non-target memory intrinsics.
+ bool MatchingNTI = isHandledNonTargetIntrinsic(Matching);
+ bool OtherNTI = isHandledNonTargetIntrinsic(Other);
+ if (OtherNTI != MatchingNTI)
+ return nullptr;
+ if (OtherNTI && MatchingNTI) {
+ if (!isNonTargetIntrinsicMatch(cast<IntrinsicInst>(InVal.DefInst),
+ cast<IntrinsicInst>(MemInst.get())))
+ return nullptr;
+ }
+
+ if (!isOperatingOnInvariantMemAt(MemInst.get(), InVal.Generation) &&
+ !isSameMemGeneration(InVal.Generation, CurrentGeneration, InVal.DefInst,
+ MemInst.get()))
+ return nullptr;
+
+ if (!Result)
+ Result = getOrCreateResult(Matching, Other->getType());
+ return Result;
+}
+
+bool EarlyCSE::overridingStores(const ParseMemoryInst &Earlier,
+ const ParseMemoryInst &Later) {
+ // Can we remove Earlier store because of Later store?
+
+ assert(Earlier.isUnordered() && !Earlier.isVolatile() &&
+ "Violated invariant");
+ if (Earlier.getPointerOperand() != Later.getPointerOperand())
+ return false;
+ if (Earlier.getMatchingId() != Later.getMatchingId())
+ return false;
+ // At the moment, we don't remove ordered stores, but do remove
+ // unordered atomic stores. There's no special requirement (for
+ // unordered atomics) about removing atomic stores only in favor of
+ // other atomic stores since we were going to execute the non-atomic
+ // one anyway and the atomic one might never have become visible.
+ if (!Earlier.isUnordered() || !Later.isUnordered())
+ return false;
+
+ // Deal with non-target memory intrinsics.
+ bool ENTI = isHandledNonTargetIntrinsic(Earlier.get());
+ bool LNTI = isHandledNonTargetIntrinsic(Later.get());
+ if (ENTI && LNTI)
+ return isNonTargetIntrinsicMatch(cast<IntrinsicInst>(Earlier.get()),
+ cast<IntrinsicInst>(Later.get()));
+
+ // Because of the check above, at least one of them is false.
+ // For now disallow matching intrinsics with non-intrinsics,
+ // so assume that the stores match if neither is an intrinsic.
+ return ENTI == LNTI;
+}
+
bool EarlyCSE::processNode(DomTreeNode *Node) {
bool Changed = false;
BasicBlock *BB = Node->getBlock();
@@ -1232,14 +1232,14 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
continue;
}
- // Likewise, noalias intrinsics don't actually write.
- if (match(&Inst,
- m_Intrinsic<Intrinsic::experimental_noalias_scope_decl>())) {
- LLVM_DEBUG(dbgs() << "EarlyCSE skipping noalias intrinsic: " << Inst
- << '\n');
- continue;
- }
-
+ // Likewise, noalias intrinsics don't actually write.
+ if (match(&Inst,
+ m_Intrinsic<Intrinsic::experimental_noalias_scope_decl>())) {
+ LLVM_DEBUG(dbgs() << "EarlyCSE skipping noalias intrinsic: " << Inst
+ << '\n');
+ continue;
+ }
+
// Skip sideeffect intrinsics, for the same reason as assume intrinsics.
if (match(&Inst, m_Intrinsic<Intrinsic::sideeffect>())) {
LLVM_DEBUG(dbgs() << "EarlyCSE skipping sideeffect: " << Inst << '\n');
@@ -1386,21 +1386,21 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// we can assume the current load loads the same value as the dominating
// load.
LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
- if (Value *Op = getMatchingValue(InVal, MemInst, CurrentGeneration)) {
- LLVM_DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << Inst
- << " to: " << *InVal.DefInst << '\n');
- if (!DebugCounter::shouldExecute(CSECounter)) {
- LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
+ if (Value *Op = getMatchingValue(InVal, MemInst, CurrentGeneration)) {
+ LLVM_DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << Inst
+ << " to: " << *InVal.DefInst << '\n');
+ if (!DebugCounter::shouldExecute(CSECounter)) {
+ LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
continue;
}
- if (!Inst.use_empty())
- Inst.replaceAllUsesWith(Op);
- salvageKnowledge(&Inst, &AC);
- removeMSSA(Inst);
- Inst.eraseFromParent();
- Changed = true;
- ++NumCSELoad;
- continue;
+ if (!Inst.use_empty())
+ Inst.replaceAllUsesWith(Op);
+ salvageKnowledge(&Inst, &AC);
+ removeMSSA(Inst);
+ Inst.eraseFromParent();
+ Changed = true;
+ ++NumCSELoad;
+ continue;
}
// Otherwise, remember that we have this instruction.
@@ -1470,7 +1470,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
if (MemInst.isValid() && MemInst.isStore()) {
LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
if (InVal.DefInst &&
- InVal.DefInst == getMatchingValue(InVal, MemInst, CurrentGeneration)) {
+ InVal.DefInst == getMatchingValue(InVal, MemInst, CurrentGeneration)) {
// It is okay to have a LastStore to a different pointer here if MemorySSA
// tells us that the load and store are from the same memory generation.
// In that case, LastStore should keep its present value since we're
@@ -1506,7 +1506,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// We do a trivial form of DSE if there are two stores to the same
// location with no intervening loads. Delete the earlier store.
if (LastStore) {
- if (overridingStores(ParseMemoryInst(LastStore, TTI), MemInst)) {
+ if (overridingStores(ParseMemoryInst(LastStore, TTI), MemInst)) {
LLVM_DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore
<< " due to: " << Inst << '\n');
if (!DebugCounter::shouldExecute(CSECounter)) {
@@ -1667,7 +1667,7 @@ public:
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
if (UseMemorySSA) {
- AU.addRequired<AAResultsWrapperPass>();
+ AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<MemorySSAWrapperPass>();
AU.addPreserved<MemorySSAWrapperPass>();
}
@@ -1709,7 +1709,7 @@ INITIALIZE_PASS_BEGIN(EarlyCSEMemSSALegacyPass, "early-cse-memssa",
"Early CSE w/ MemorySSA", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/FlattenCFGPass.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/FlattenCFGPass.cpp
index e54a270fb2..ab88f253c6 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/FlattenCFGPass.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/FlattenCFGPass.cpp
@@ -12,7 +12,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/IR/CFG.h"
-#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/GVN.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/GVN.cpp
index c6b6d75aef..a0e7dec90f 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/GVN.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/GVN.cpp
@@ -26,7 +26,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumeBundleQueries.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CFG.h"
@@ -36,8 +36,8 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
-#include "llvm/Analysis/MemorySSA.h"
-#include "llvm/Analysis/MemorySSAUpdater.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/PHITransAddr.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
@@ -99,33 +99,33 @@ STATISTIC(NumGVNSimpl, "Number of instructions simplified");
STATISTIC(NumGVNEqProp, "Number of equalities propagated");
STATISTIC(NumPRELoad, "Number of loads PRE'd");
-STATISTIC(IsValueFullyAvailableInBlockNumSpeculationsMax,
- "Number of blocks speculated as available in "
- "IsValueFullyAvailableInBlock(), max");
-STATISTIC(MaxBBSpeculationCutoffReachedTimes,
- "Number of times we we reached gvn-max-block-speculations cut-off "
- "preventing further exploration");
-
+STATISTIC(IsValueFullyAvailableInBlockNumSpeculationsMax,
+ "Number of blocks speculated as available in "
+ "IsValueFullyAvailableInBlock(), max");
+STATISTIC(MaxBBSpeculationCutoffReachedTimes,
+ "Number of times we we reached gvn-max-block-speculations cut-off "
+ "preventing further exploration");
+
static cl::opt<bool> GVNEnablePRE("enable-pre", cl::init(true), cl::Hidden);
static cl::opt<bool> GVNEnableLoadPRE("enable-load-pre", cl::init(true));
static cl::opt<bool> GVNEnableLoadInLoopPRE("enable-load-in-loop-pre",
cl::init(true));
-static cl::opt<bool>
-GVNEnableSplitBackedgeInLoadPRE("enable-split-backedge-in-load-pre",
- cl::init(true));
+static cl::opt<bool>
+GVNEnableSplitBackedgeInLoadPRE("enable-split-backedge-in-load-pre",
+ cl::init(true));
static cl::opt<bool> GVNEnableMemDep("enable-gvn-memdep", cl::init(true));
static cl::opt<uint32_t> MaxNumDeps(
"gvn-max-num-deps", cl::Hidden, cl::init(100), cl::ZeroOrMore,
cl::desc("Max number of dependences to attempt Load PRE (default = 100)"));
-// This is based on IsValueFullyAvailableInBlockNumSpeculationsMax stat.
-static cl::opt<uint32_t> MaxBBSpeculations(
- "gvn-max-block-speculations", cl::Hidden, cl::init(600), cl::ZeroOrMore,
- cl::desc("Max number of blocks we're willing to speculate on (and recurse "
- "into) when deducing if a value is fully available or not in GVN "
- "(default = 600)"));
-
+// This is based on IsValueFullyAvailableInBlockNumSpeculationsMax stat.
+static cl::opt<uint32_t> MaxBBSpeculations(
+ "gvn-max-block-speculations", cl::Hidden, cl::init(600), cl::ZeroOrMore,
+ cl::desc("Max number of blocks we're willing to speculate on (and recurse "
+ "into) when deducing if a value is fully available or not in GVN "
+ "(default = 600)"));
+
struct llvm::GVN::Expression {
uint32_t opcode;
bool commutative = false;
@@ -295,9 +295,9 @@ GVN::Expression GVN::ValueTable::createExpr(Instruction *I) {
if (I->isCommutative()) {
// Ensure that commutative instructions that only differ by a permutation
// of their operands get the same value number by sorting the operand value
- // numbers. Since commutative operands are the 1st two operands it is more
+ // numbers. Since commutative operands are the 1st two operands it is more
// efficient to sort by hand rather than using, say, std::sort.
- assert(I->getNumOperands() >= 2 && "Unsupported commutative instruction!");
+ assert(I->getNumOperands() >= 2 && "Unsupported commutative instruction!");
if (e.varargs[0] > e.varargs[1])
std::swap(e.varargs[0], e.varargs[1]);
e.commutative = true;
@@ -366,7 +366,7 @@ GVN::Expression GVN::ValueTable::createExtractvalueExpr(ExtractValueInst *EI) {
OI != OE; ++OI)
e.varargs.push_back(lookupOrAdd(*OI));
- append_range(e.varargs, EI->indices());
+ append_range(e.varargs, EI->indices());
return e;
}
@@ -410,12 +410,12 @@ uint32_t GVN::ValueTable::lookupOrAddCall(CallInst *C) {
}
if (local_dep.isDef()) {
- // For masked load/store intrinsics, the local_dep may actully be
- // a normal load or store instruction.
- CallInst *local_cdep = dyn_cast<CallInst>(local_dep.getInst());
+ // For masked load/store intrinsics, the local_dep may actully be
+ // a normal load or store instruction.
+ CallInst *local_cdep = dyn_cast<CallInst>(local_dep.getInst());
- if (!local_cdep ||
- local_cdep->getNumArgOperands() != C->getNumArgOperands()) {
+ if (!local_cdep ||
+ local_cdep->getNumArgOperands() != C->getNumArgOperands()) {
valueNumbering[C] = nextValueNumber;
return nextValueNumber++;
}
@@ -640,11 +640,11 @@ bool GVN::isLoadInLoopPREEnabled() const {
return Options.AllowLoadInLoopPRE.getValueOr(GVNEnableLoadInLoopPRE);
}
-bool GVN::isLoadPRESplitBackedgeEnabled() const {
- return Options.AllowLoadPRESplitBackedge.getValueOr(
- GVNEnableSplitBackedgeInLoadPRE);
-}
-
+bool GVN::isLoadPRESplitBackedgeEnabled() const {
+ return Options.AllowLoadPRESplitBackedge.getValueOr(
+ GVNEnableSplitBackedgeInLoadPRE);
+}
+
bool GVN::isMemDepEnabled() const {
return Options.AllowMemDep.getValueOr(GVNEnableMemDep);
}
@@ -661,18 +661,18 @@ PreservedAnalyses GVN::run(Function &F, FunctionAnalysisManager &AM) {
auto *MemDep =
isMemDepEnabled() ? &AM.getResult<MemoryDependenceAnalysis>(F) : nullptr;
auto *LI = AM.getCachedResult<LoopAnalysis>(F);
- auto *MSSA = AM.getCachedResult<MemorySSAAnalysis>(F);
+ auto *MSSA = AM.getCachedResult<MemorySSAAnalysis>(F);
auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
- bool Changed = runImpl(F, AC, DT, TLI, AA, MemDep, LI, &ORE,
- MSSA ? &MSSA->getMSSA() : nullptr);
+ bool Changed = runImpl(F, AC, DT, TLI, AA, MemDep, LI, &ORE,
+ MSSA ? &MSSA->getMSSA() : nullptr);
if (!Changed)
return PreservedAnalyses::all();
PreservedAnalyses PA;
PA.preserve<DominatorTreeAnalysis>();
PA.preserve<GlobalsAA>();
PA.preserve<TargetLibraryAnalysis>();
- if (MSSA)
- PA.preserve<MemorySSAAnalysis>();
+ if (MSSA)
+ PA.preserve<MemorySSAAnalysis>();
if (LI)
PA.preserve<LoopAnalysis>();
return PA;
@@ -690,18 +690,18 @@ LLVM_DUMP_METHOD void GVN::dump(DenseMap<uint32_t, Value*>& d) const {
}
#endif
-enum class AvailabilityState : char {
- /// We know the block *is not* fully available. This is a fixpoint.
- Unavailable = 0,
- /// We know the block *is* fully available. This is a fixpoint.
- Available = 1,
- /// We do not know whether the block is fully available or not,
- /// but we are currently speculating that it will be.
- /// If it would have turned out that the block was, in fact, not fully
- /// available, this would have been cleaned up into an Unavailable.
- SpeculativelyAvailable = 2,
-};
-
+enum class AvailabilityState : char {
+ /// We know the block *is not* fully available. This is a fixpoint.
+ Unavailable = 0,
+ /// We know the block *is* fully available. This is a fixpoint.
+ Available = 1,
+ /// We do not know whether the block is fully available or not,
+ /// but we are currently speculating that it will be.
+ /// If it would have turned out that the block was, in fact, not fully
+ /// available, this would have been cleaned up into an Unavailable.
+ SpeculativelyAvailable = 2,
+};
+
/// Return true if we can prove that the value
/// we're analyzing is fully available in the specified block. As we go, keep
/// track of which blocks we know are fully alive in FullyAvailableBlocks. This
@@ -710,118 +710,118 @@ enum class AvailabilityState : char {
/// 1) we know the block *is* fully available.
/// 2) we do not know whether the block is fully available or not, but we are
/// currently speculating that it will be.
-static bool IsValueFullyAvailableInBlock(
- BasicBlock *BB,
- DenseMap<BasicBlock *, AvailabilityState> &FullyAvailableBlocks) {
- SmallVector<BasicBlock *, 32> Worklist;
- Optional<BasicBlock *> UnavailableBB;
-
- // The number of times we didn't find an entry for a block in a map and
- // optimistically inserted an entry marking block as speculatively available.
- unsigned NumNewNewSpeculativelyAvailableBBs = 0;
-
-#ifndef NDEBUG
- SmallSet<BasicBlock *, 32> NewSpeculativelyAvailableBBs;
- SmallVector<BasicBlock *, 32> AvailableBBs;
-#endif
-
- Worklist.emplace_back(BB);
- while (!Worklist.empty()) {
- BasicBlock *CurrBB = Worklist.pop_back_val(); // LIFO - depth-first!
- // Optimistically assume that the block is Speculatively Available and check
- // to see if we already know about this block in one lookup.
- std::pair<DenseMap<BasicBlock *, AvailabilityState>::iterator, bool> IV =
- FullyAvailableBlocks.try_emplace(
- CurrBB, AvailabilityState::SpeculativelyAvailable);
- AvailabilityState &State = IV.first->second;
-
- // Did the entry already exist for this block?
- if (!IV.second) {
- if (State == AvailabilityState::Unavailable) {
- UnavailableBB = CurrBB;
- break; // Backpropagate unavailability info.
- }
-
-#ifndef NDEBUG
- AvailableBBs.emplace_back(CurrBB);
-#endif
- continue; // Don't recurse further, but continue processing worklist.
- }
-
- // No entry found for block.
- ++NumNewNewSpeculativelyAvailableBBs;
- bool OutOfBudget = NumNewNewSpeculativelyAvailableBBs > MaxBBSpeculations;
-
- // If we have exhausted our budget, mark this block as unavailable.
- // Also, if this block has no predecessors, the value isn't live-in here.
- if (OutOfBudget || pred_empty(CurrBB)) {
- MaxBBSpeculationCutoffReachedTimes += (int)OutOfBudget;
- State = AvailabilityState::Unavailable;
- UnavailableBB = CurrBB;
- break; // Backpropagate unavailability info.
- }
-
- // Tentatively consider this block as speculatively available.
-#ifndef NDEBUG
- NewSpeculativelyAvailableBBs.insert(CurrBB);
-#endif
- // And further recurse into block's predecessors, in depth-first order!
- Worklist.append(pred_begin(CurrBB), pred_end(CurrBB));
- }
-
-#if LLVM_ENABLE_STATS
- IsValueFullyAvailableInBlockNumSpeculationsMax.updateMax(
- NumNewNewSpeculativelyAvailableBBs);
-#endif
-
- // If the block isn't marked as fixpoint yet
- // (the Unavailable and Available states are fixpoints)
- auto MarkAsFixpointAndEnqueueSuccessors =
- [&](BasicBlock *BB, AvailabilityState FixpointState) {
- auto It = FullyAvailableBlocks.find(BB);
- if (It == FullyAvailableBlocks.end())
- return; // Never queried this block, leave as-is.
- switch (AvailabilityState &State = It->second) {
- case AvailabilityState::Unavailable:
- case AvailabilityState::Available:
- return; // Don't backpropagate further, continue processing worklist.
- case AvailabilityState::SpeculativelyAvailable: // Fix it!
- State = FixpointState;
-#ifndef NDEBUG
- assert(NewSpeculativelyAvailableBBs.erase(BB) &&
- "Found a speculatively available successor leftover?");
-#endif
- // Queue successors for further processing.
- Worklist.append(succ_begin(BB), succ_end(BB));
- return;
- }
- };
-
- if (UnavailableBB) {
- // Okay, we have encountered an unavailable block.
- // Mark speculatively available blocks reachable from UnavailableBB as
- // unavailable as well. Paths are terminated when they reach blocks not in
- // FullyAvailableBlocks or they are not marked as speculatively available.
- Worklist.clear();
- Worklist.append(succ_begin(*UnavailableBB), succ_end(*UnavailableBB));
- while (!Worklist.empty())
- MarkAsFixpointAndEnqueueSuccessors(Worklist.pop_back_val(),
- AvailabilityState::Unavailable);
- }
-
-#ifndef NDEBUG
- Worklist.clear();
- for (BasicBlock *AvailableBB : AvailableBBs)
- Worklist.append(succ_begin(AvailableBB), succ_end(AvailableBB));
- while (!Worklist.empty())
- MarkAsFixpointAndEnqueueSuccessors(Worklist.pop_back_val(),
- AvailabilityState::Available);
-
- assert(NewSpeculativelyAvailableBBs.empty() &&
- "Must have fixed all the new speculatively available blocks.");
-#endif
-
- return !UnavailableBB;
+static bool IsValueFullyAvailableInBlock(
+ BasicBlock *BB,
+ DenseMap<BasicBlock *, AvailabilityState> &FullyAvailableBlocks) {
+ SmallVector<BasicBlock *, 32> Worklist;
+ Optional<BasicBlock *> UnavailableBB;
+
+ // The number of times we didn't find an entry for a block in a map and
+ // optimistically inserted an entry marking block as speculatively available.
+ unsigned NumNewNewSpeculativelyAvailableBBs = 0;
+
+#ifndef NDEBUG
+ SmallSet<BasicBlock *, 32> NewSpeculativelyAvailableBBs;
+ SmallVector<BasicBlock *, 32> AvailableBBs;
+#endif
+
+ Worklist.emplace_back(BB);
+ while (!Worklist.empty()) {
+ BasicBlock *CurrBB = Worklist.pop_back_val(); // LIFO - depth-first!
+ // Optimistically assume that the block is Speculatively Available and check
+ // to see if we already know about this block in one lookup.
+ std::pair<DenseMap<BasicBlock *, AvailabilityState>::iterator, bool> IV =
+ FullyAvailableBlocks.try_emplace(
+ CurrBB, AvailabilityState::SpeculativelyAvailable);
+ AvailabilityState &State = IV.first->second;
+
+ // Did the entry already exist for this block?
+ if (!IV.second) {
+ if (State == AvailabilityState::Unavailable) {
+ UnavailableBB = CurrBB;
+ break; // Backpropagate unavailability info.
+ }
+
+#ifndef NDEBUG
+ AvailableBBs.emplace_back(CurrBB);
+#endif
+ continue; // Don't recurse further, but continue processing worklist.
+ }
+
+ // No entry found for block.
+ ++NumNewNewSpeculativelyAvailableBBs;
+ bool OutOfBudget = NumNewNewSpeculativelyAvailableBBs > MaxBBSpeculations;
+
+ // If we have exhausted our budget, mark this block as unavailable.
+ // Also, if this block has no predecessors, the value isn't live-in here.
+ if (OutOfBudget || pred_empty(CurrBB)) {
+ MaxBBSpeculationCutoffReachedTimes += (int)OutOfBudget;
+ State = AvailabilityState::Unavailable;
+ UnavailableBB = CurrBB;
+ break; // Backpropagate unavailability info.
+ }
+
+ // Tentatively consider this block as speculatively available.
+#ifndef NDEBUG
+ NewSpeculativelyAvailableBBs.insert(CurrBB);
+#endif
+ // And further recurse into block's predecessors, in depth-first order!
+ Worklist.append(pred_begin(CurrBB), pred_end(CurrBB));
+ }
+
+#if LLVM_ENABLE_STATS
+ IsValueFullyAvailableInBlockNumSpeculationsMax.updateMax(
+ NumNewNewSpeculativelyAvailableBBs);
+#endif
+
+ // If the block isn't marked as fixpoint yet
+ // (the Unavailable and Available states are fixpoints)
+ auto MarkAsFixpointAndEnqueueSuccessors =
+ [&](BasicBlock *BB, AvailabilityState FixpointState) {
+ auto It = FullyAvailableBlocks.find(BB);
+ if (It == FullyAvailableBlocks.end())
+ return; // Never queried this block, leave as-is.
+ switch (AvailabilityState &State = It->second) {
+ case AvailabilityState::Unavailable:
+ case AvailabilityState::Available:
+ return; // Don't backpropagate further, continue processing worklist.
+ case AvailabilityState::SpeculativelyAvailable: // Fix it!
+ State = FixpointState;
+#ifndef NDEBUG
+ assert(NewSpeculativelyAvailableBBs.erase(BB) &&
+ "Found a speculatively available successor leftover?");
+#endif
+ // Queue successors for further processing.
+ Worklist.append(succ_begin(BB), succ_end(BB));
+ return;
+ }
+ };
+
+ if (UnavailableBB) {
+ // Okay, we have encountered an unavailable block.
+ // Mark speculatively available blocks reachable from UnavailableBB as
+ // unavailable as well. Paths are terminated when they reach blocks not in
+ // FullyAvailableBlocks or they are not marked as speculatively available.
+ Worklist.clear();
+ Worklist.append(succ_begin(*UnavailableBB), succ_end(*UnavailableBB));
+ while (!Worklist.empty())
+ MarkAsFixpointAndEnqueueSuccessors(Worklist.pop_back_val(),
+ AvailabilityState::Unavailable);
+ }
+
+#ifndef NDEBUG
+ Worklist.clear();
+ for (BasicBlock *AvailableBB : AvailableBBs)
+ Worklist.append(succ_begin(AvailableBB), succ_end(AvailableBB));
+ while (!Worklist.empty())
+ MarkAsFixpointAndEnqueueSuccessors(Worklist.pop_back_val(),
+ AvailabilityState::Available);
+
+ assert(NewSpeculativelyAvailableBBs.empty() &&
+ "Must have fixed all the new speculatively available blocks.");
+#endif
+
+ return !UnavailableBB;
}
/// Given a set of loads specified by ValuesPerBlock,
@@ -1040,7 +1040,7 @@ bool GVN::AnalyzeLoadAvailability(LoadInst *LI, MemDepResult DepInfo,
if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) {
// Reject loads and stores that are to the same address but are of
- // different types if we have to. If the stored value is convertable to
+ // different types if we have to. If the stored value is convertable to
// the loaded value, we can reuse it.
if (!canCoerceMustAliasedValueToLoad(S->getValueOperand(), LI->getType(),
DL))
@@ -1155,9 +1155,9 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
// because if the index is out of bounds we should deoptimize rather than
// access the array.
// Check that there is no guard in this block above our instruction.
- bool MustEnsureSafetyOfSpeculativeExecution =
- ICF->isDominatedByICFIFromSameBlock(LI);
-
+ bool MustEnsureSafetyOfSpeculativeExecution =
+ ICF->isDominatedByICFIFromSameBlock(LI);
+
while (TmpBB->getSinglePredecessor()) {
TmpBB = TmpBB->getSinglePredecessor();
if (TmpBB == LoadBB) // Infinite (unreachable) loop.
@@ -1174,8 +1174,8 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
return false;
// Check that there is no implicit control flow in a block above.
- MustEnsureSafetyOfSpeculativeExecution =
- MustEnsureSafetyOfSpeculativeExecution || ICF->hasICF(TmpBB);
+ MustEnsureSafetyOfSpeculativeExecution =
+ MustEnsureSafetyOfSpeculativeExecution || ICF->hasICF(TmpBB);
}
assert(TmpBB);
@@ -1184,11 +1184,11 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
// Check to see how many predecessors have the loaded value fully
// available.
MapVector<BasicBlock *, Value *> PredLoads;
- DenseMap<BasicBlock *, AvailabilityState> FullyAvailableBlocks;
+ DenseMap<BasicBlock *, AvailabilityState> FullyAvailableBlocks;
for (const AvailableValueInBlock &AV : ValuesPerBlock)
- FullyAvailableBlocks[AV.BB] = AvailabilityState::Available;
+ FullyAvailableBlocks[AV.BB] = AvailabilityState::Available;
for (BasicBlock *UnavailableBB : UnavailableBlocks)
- FullyAvailableBlocks[UnavailableBB] = AvailabilityState::Unavailable;
+ FullyAvailableBlocks[UnavailableBB] = AvailabilityState::Unavailable;
SmallVector<BasicBlock *, 4> CriticalEdgePred;
for (BasicBlock *Pred : predecessors(LoadBB)) {
@@ -1201,7 +1201,7 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
return false;
}
- if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks)) {
+ if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks)) {
continue;
}
@@ -1228,16 +1228,16 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
return false;
}
- // Do not split backedge as it will break the canonical loop form.
- if (!isLoadPRESplitBackedgeEnabled())
- if (DT->dominates(LoadBB, Pred)) {
- LLVM_DEBUG(
- dbgs()
- << "COULD NOT PRE LOAD BECAUSE OF A BACKEDGE CRITICAL EDGE '"
- << Pred->getName() << "': " << *LI << '\n');
- return false;
- }
-
+ // Do not split backedge as it will break the canonical loop form.
+ if (!isLoadPRESplitBackedgeEnabled())
+ if (DT->dominates(LoadBB, Pred)) {
+ LLVM_DEBUG(
+ dbgs()
+ << "COULD NOT PRE LOAD BECAUSE OF A BACKEDGE CRITICAL EDGE '"
+ << Pred->getName() << "': " << *LI << '\n');
+ return false;
+ }
+
CriticalEdgePred.push_back(Pred);
} else {
// Only add the predecessors that will not be split for now.
@@ -1257,17 +1257,17 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
if (NumUnavailablePreds != 1)
return false;
- // Now we know where we will insert load. We must ensure that it is safe
- // to speculatively execute the load at that points.
- if (MustEnsureSafetyOfSpeculativeExecution) {
- if (CriticalEdgePred.size())
- if (!isSafeToSpeculativelyExecute(LI, LoadBB->getFirstNonPHI(), DT))
- return false;
- for (auto &PL : PredLoads)
- if (!isSafeToSpeculativelyExecute(LI, PL.first->getTerminator(), DT))
- return false;
- }
-
+ // Now we know where we will insert load. We must ensure that it is safe
+ // to speculatively execute the load at that points.
+ if (MustEnsureSafetyOfSpeculativeExecution) {
+ if (CriticalEdgePred.size())
+ if (!isSafeToSpeculativelyExecute(LI, LoadBB->getFirstNonPHI(), DT))
+ return false;
+ for (auto &PL : PredLoads)
+ if (!isSafeToSpeculativelyExecute(LI, PL.first->getTerminator(), DT))
+ return false;
+ }
+
// Split critical edges, and update the unavailable predecessors accordingly.
for (BasicBlock *OrigPred : CriticalEdgePred) {
BasicBlock *NewPred = splitCriticalEdges(OrigPred, LoadBB);
@@ -1349,7 +1349,7 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
// Instructions that have been inserted in predecessor(s) to materialize
// the load address do not retain their original debug locations. Doing
// so could lead to confusing (but correct) source attributions.
- I->updateLocationAfterHoist();
+ I->updateLocationAfterHoist();
// FIXME: We really _ought_ to insert these value numbers into their
// parent's availability map. However, in doing so, we risk getting into
@@ -1367,22 +1367,22 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
LI->getAlign(), LI->getOrdering(), LI->getSyncScopeID(),
UnavailablePred->getTerminator());
NewLoad->setDebugLoc(LI->getDebugLoc());
- if (MSSAU) {
- auto *MSSA = MSSAU->getMemorySSA();
- // Get the defining access of the original load or use the load if it is a
- // MemoryDef (e.g. because it is volatile). The inserted loads are
- // guaranteed to load from the same definition.
- auto *LIAcc = MSSA->getMemoryAccess(LI);
- auto *DefiningAcc =
- isa<MemoryDef>(LIAcc) ? LIAcc : LIAcc->getDefiningAccess();
- auto *NewAccess = MSSAU->createMemoryAccessInBB(
- NewLoad, DefiningAcc, NewLoad->getParent(),
- MemorySSA::BeforeTerminator);
- if (auto *NewDef = dyn_cast<MemoryDef>(NewAccess))
- MSSAU->insertDef(NewDef, /*RenameUses=*/true);
- else
- MSSAU->insertUse(cast<MemoryUse>(NewAccess), /*RenameUses=*/true);
- }
+ if (MSSAU) {
+ auto *MSSA = MSSAU->getMemorySSA();
+ // Get the defining access of the original load or use the load if it is a
+ // MemoryDef (e.g. because it is volatile). The inserted loads are
+ // guaranteed to load from the same definition.
+ auto *LIAcc = MSSA->getMemoryAccess(LI);
+ auto *DefiningAcc =
+ isa<MemoryDef>(LIAcc) ? LIAcc : LIAcc->getDefiningAccess();
+ auto *NewAccess = MSSAU->createMemoryAccessInBB(
+ NewLoad, DefiningAcc, NewLoad->getParent(),
+ MemorySSA::BeforeTerminator);
+ if (auto *NewDef = dyn_cast<MemoryDef>(NewAccess))
+ MSSAU->insertDef(NewDef, /*RenameUses=*/true);
+ else
+ MSSAU->insertUse(cast<MemoryUse>(NewAccess), /*RenameUses=*/true);
+ }
// Transfer the old load's AA tags to the new load.
AAMDNodes Tags;
@@ -1470,14 +1470,14 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
return false;
}
- bool Changed = false;
+ bool Changed = false;
// If this load follows a GEP, see if we can PRE the indices before analyzing.
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0))) {
for (GetElementPtrInst::op_iterator OI = GEP->idx_begin(),
OE = GEP->idx_end();
OI != OE; ++OI)
if (Instruction *I = dyn_cast<Instruction>(OI->get()))
- Changed |= performScalarPRE(I);
+ Changed |= performScalarPRE(I);
}
// Step 2: Analyze the availability of the load
@@ -1488,7 +1488,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
// If we have no predecessors that produce a known value for this load, exit
// early.
if (ValuesPerBlock.empty())
- return Changed;
+ return Changed;
// Step 3: Eliminate fully redundancy.
//
@@ -1520,12 +1520,12 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
// Step 4: Eliminate partial redundancy.
if (!isPREEnabled() || !isLoadPREEnabled())
- return Changed;
+ return Changed;
if (!isLoadInLoopPREEnabled() && this->LI &&
this->LI->getLoopFor(LI->getParent()))
- return Changed;
+ return Changed;
- return Changed || PerformLoadPRE(LI, ValuesPerBlock, UnavailableBlocks);
+ return Changed || PerformLoadPRE(LI, ValuesPerBlock, UnavailableBlocks);
}
static bool impliesEquivalanceIfTrue(CmpInst* Cmp) {
@@ -1600,40 +1600,40 @@ bool GVN::processAssumeIntrinsic(IntrinsicInst *IntrinsicI) {
// Insert a new store to null instruction before the load to indicate that
// this code is not reachable. FIXME: We could insert unreachable
// instruction directly because we can modify the CFG.
- auto *NewS = new StoreInst(UndefValue::get(Int8Ty),
- Constant::getNullValue(Int8Ty->getPointerTo()),
- IntrinsicI);
- if (MSSAU) {
- const MemoryUseOrDef *FirstNonDom = nullptr;
- const auto *AL =
- MSSAU->getMemorySSA()->getBlockAccesses(IntrinsicI->getParent());
-
- // If there are accesses in the current basic block, find the first one
- // that does not come before NewS. The new memory access is inserted
- // after the found access or before the terminator if no such access is
- // found.
- if (AL) {
- for (auto &Acc : *AL) {
- if (auto *Current = dyn_cast<MemoryUseOrDef>(&Acc))
- if (!Current->getMemoryInst()->comesBefore(NewS)) {
- FirstNonDom = Current;
- break;
- }
- }
- }
-
- // This added store is to null, so it will never executed and we can
- // just use the LiveOnEntry def as defining access.
- auto *NewDef =
- FirstNonDom ? MSSAU->createMemoryAccessBefore(
- NewS, MSSAU->getMemorySSA()->getLiveOnEntryDef(),
- const_cast<MemoryUseOrDef *>(FirstNonDom))
- : MSSAU->createMemoryAccessInBB(
- NewS, MSSAU->getMemorySSA()->getLiveOnEntryDef(),
- NewS->getParent(), MemorySSA::BeforeTerminator);
-
- MSSAU->insertDef(cast<MemoryDef>(NewDef), /*RenameUses=*/false);
- }
+ auto *NewS = new StoreInst(UndefValue::get(Int8Ty),
+ Constant::getNullValue(Int8Ty->getPointerTo()),
+ IntrinsicI);
+ if (MSSAU) {
+ const MemoryUseOrDef *FirstNonDom = nullptr;
+ const auto *AL =
+ MSSAU->getMemorySSA()->getBlockAccesses(IntrinsicI->getParent());
+
+ // If there are accesses in the current basic block, find the first one
+ // that does not come before NewS. The new memory access is inserted
+ // after the found access or before the terminator if no such access is
+ // found.
+ if (AL) {
+ for (auto &Acc : *AL) {
+ if (auto *Current = dyn_cast<MemoryUseOrDef>(&Acc))
+ if (!Current->getMemoryInst()->comesBefore(NewS)) {
+ FirstNonDom = Current;
+ break;
+ }
+ }
+ }
+
+ // This added store is to null, so it will never executed and we can
+ // just use the LiveOnEntry def as defining access.
+ auto *NewDef =
+ FirstNonDom ? MSSAU->createMemoryAccessBefore(
+ NewS, MSSAU->getMemorySSA()->getLiveOnEntryDef(),
+ const_cast<MemoryUseOrDef *>(FirstNonDom))
+ : MSSAU->createMemoryAccessInBB(
+ NewS, MSSAU->getMemorySSA()->getLiveOnEntryDef(),
+ NewS->getParent(), MemorySSA::BeforeTerminator);
+
+ MSSAU->insertDef(cast<MemoryDef>(NewDef), /*RenameUses=*/false);
+ }
}
if (isAssumeWithEmptyBundle(*IntrinsicI))
markInstructionForDeletion(IntrinsicI);
@@ -1661,11 +1661,11 @@ bool GVN::processAssumeIntrinsic(IntrinsicInst *IntrinsicI) {
// br i1 %cmp, label %bb1, label %bb2 ; will change %cmp to true
ReplaceOperandsWithMap[V] = True;
- // Similarly, after assume(!NotV) we know that NotV == false.
- Value *NotV;
- if (match(V, m_Not(m_Value(NotV))))
- ReplaceOperandsWithMap[NotV] = ConstantInt::getFalse(V->getContext());
-
+ // Similarly, after assume(!NotV) we know that NotV == false.
+ Value *NotV;
+ if (match(V, m_Not(m_Value(NotV))))
+ ReplaceOperandsWithMap[NotV] = ConstantInt::getFalse(V->getContext());
+
// If we find an equality fact, canonicalize all dominated uses in this block
// to one of the two values. We heuristically choice the "oldest" of the
// two where age is determined by value number. (Note that propagateEquality
@@ -1772,8 +1772,8 @@ bool GVN::processLoad(LoadInst *L) {
// Replace the load!
patchAndReplaceAllUsesWith(L, AvailableValue);
markInstructionForDeletion(L);
- if (MSSAU)
- MSSAU->removeMemoryAccess(L);
+ if (MSSAU)
+ MSSAU->removeMemoryAccess(L);
++NumGVNLoad;
reportLoadElim(L, AvailableValue, ORE);
// Tell MDA to rexamine the reused pointer since we might have more
@@ -1895,7 +1895,7 @@ uint32_t GVN::ValueTable::phiTranslateImpl(const BasicBlock *Pred,
}
if (Exp.commutative) {
- assert(Exp.varargs.size() >= 2 && "Unsupported commutative instruction!");
+ assert(Exp.varargs.size() >= 2 && "Unsupported commutative instruction!");
if (Exp.varargs[0] > Exp.varargs[1]) {
std::swap(Exp.varargs[0], Exp.varargs[1]);
uint32_t Opcode = Exp.opcode >> 8;
@@ -1918,8 +1918,8 @@ uint32_t GVN::ValueTable::phiTranslateImpl(const BasicBlock *Pred,
/// again.
void GVN::ValueTable::eraseTranslateCacheEntry(uint32_t Num,
const BasicBlock &CurrBlock) {
- for (const BasicBlock *Pred : predecessors(&CurrBlock))
- PhiTranslateTable.erase({Num, Pred});
+ for (const BasicBlock *Pred : predecessors(&CurrBlock))
+ PhiTranslateTable.erase({Num, Pred});
}
// In order to find a leader for a given value number at a
@@ -2083,8 +2083,8 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root,
// If "A && B" is known true then both A and B are known true. If "A || B"
// is known false then both A and B are known false.
Value *A, *B;
- if ((isKnownTrue && match(LHS, m_LogicalAnd(m_Value(A), m_Value(B)))) ||
- (isKnownFalse && match(LHS, m_LogicalOr(m_Value(A), m_Value(B))))) {
+ if ((isKnownTrue && match(LHS, m_LogicalAnd(m_Value(A), m_Value(B)))) ||
+ (isKnownFalse && match(LHS, m_LogicalOr(m_Value(A), m_Value(B))))) {
Worklist.push_back(std::make_pair(A, RHS));
Worklist.push_back(std::make_pair(B, RHS));
continue;
@@ -2286,7 +2286,7 @@ bool GVN::processInstruction(Instruction *I) {
bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
const TargetLibraryInfo &RunTLI, AAResults &RunAA,
MemoryDependenceResults *RunMD, LoopInfo *LI,
- OptimizationRemarkEmitter *RunORE, MemorySSA *MSSA) {
+ OptimizationRemarkEmitter *RunORE, MemorySSA *MSSA) {
AC = &RunAC;
DT = &RunDT;
VN.setDomTree(DT);
@@ -2299,8 +2299,8 @@ bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
VN.setMemDep(MD);
ORE = RunORE;
InvalidBlockRPONumbers = true;
- MemorySSAUpdater Updater(MSSA);
- MSSAU = MSSA ? &Updater : nullptr;
+ MemorySSAUpdater Updater(MSSA);
+ MSSAU = MSSA ? &Updater : nullptr;
bool Changed = false;
bool ShouldContinue = true;
@@ -2311,7 +2311,7 @@ bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) {
BasicBlock *BB = &*FI++;
- bool removedBlock = MergeBlockIntoPredecessor(BB, &DTU, LI, MSSAU, MD);
+ bool removedBlock = MergeBlockIntoPredecessor(BB, &DTU, LI, MSSAU, MD);
if (removedBlock)
++NumGVNBlocks;
@@ -2347,9 +2347,9 @@ bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
// iteration.
DeadBlocks.clear();
- if (MSSA && VerifyMemorySSA)
- MSSA->verifyMemorySSA();
-
+ if (MSSA && VerifyMemorySSA)
+ MSSA->verifyMemorySSA();
+
return Changed;
}
@@ -2390,8 +2390,8 @@ bool GVN::processBlock(BasicBlock *BB) {
salvageKnowledge(I, AC);
salvageDebugInfo(*I);
if (MD) MD->removeInstruction(I);
- if (MSSAU)
- MSSAU->removeMemoryAccess(I);
+ if (MSSAU)
+ MSSAU->removeMemoryAccess(I);
LLVM_DEBUG(verifyRemoved(I));
ICF->removeInstruction(I);
I->eraseFromParent();
@@ -2479,14 +2479,14 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
if (isa<GetElementPtrInst>(CurInst))
return false;
- if (auto *CallB = dyn_cast<CallBase>(CurInst)) {
- // We don't currently value number ANY inline asm calls.
+ if (auto *CallB = dyn_cast<CallBase>(CurInst)) {
+ // We don't currently value number ANY inline asm calls.
if (CallB->isInlineAsm())
return false;
- // Don't do PRE on convergent calls.
- if (CallB->isConvergent())
- return false;
- }
+ // Don't do PRE on convergent calls.
+ if (CallB->isConvergent())
+ return false;
+ }
uint32_t ValNo = VN.lookup(CurInst);
@@ -2626,8 +2626,8 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
LLVM_DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n');
if (MD)
MD->removeInstruction(CurInst);
- if (MSSAU)
- MSSAU->removeMemoryAccess(CurInst);
+ if (MSSAU)
+ MSSAU->removeMemoryAccess(CurInst);
LLVM_DEBUG(verifyRemoved(CurInst));
// FIXME: Intended to be markInstructionForDeletion(CurInst), but it causes
// some assertion failures.
@@ -2672,12 +2672,12 @@ BasicBlock *GVN::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) {
// possible.
BasicBlock *BB = SplitCriticalEdge(
Pred, Succ,
- CriticalEdgeSplittingOptions(DT, LI, MSSAU).unsetPreserveLoopSimplify());
- if (BB) {
- if (MD)
- MD->invalidateCachedPredecessors();
- InvalidBlockRPONumbers = true;
- }
+ CriticalEdgeSplittingOptions(DT, LI, MSSAU).unsetPreserveLoopSimplify());
+ if (BB) {
+ if (MD)
+ MD->invalidateCachedPredecessors();
+ InvalidBlockRPONumbers = true;
+ }
return BB;
}
@@ -2686,20 +2686,20 @@ BasicBlock *GVN::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) {
bool GVN::splitCriticalEdges() {
if (toSplit.empty())
return false;
-
- bool Changed = false;
+
+ bool Changed = false;
do {
std::pair<Instruction *, unsigned> Edge = toSplit.pop_back_val();
- Changed |= SplitCriticalEdge(Edge.first, Edge.second,
- CriticalEdgeSplittingOptions(DT, LI, MSSAU)) !=
- nullptr;
+ Changed |= SplitCriticalEdge(Edge.first, Edge.second,
+ CriticalEdgeSplittingOptions(DT, LI, MSSAU)) !=
+ nullptr;
} while (!toSplit.empty());
- if (Changed) {
- if (MD)
- MD->invalidateCachedPredecessors();
- InvalidBlockRPONumbers = true;
- }
- return Changed;
+ if (Changed) {
+ if (MD)
+ MD->invalidateCachedPredecessors();
+ InvalidBlockRPONumbers = true;
+ }
+ return Changed;
}
/// Executes one iteration of GVN
@@ -2803,12 +2803,12 @@ void GVN::addDeadBlock(BasicBlock *BB) {
// First, split the critical edges. This might also create additional blocks
// to preserve LoopSimplify form and adjust edges accordingly.
- SmallVector<BasicBlock *, 4> Preds(predecessors(B));
+ SmallVector<BasicBlock *, 4> Preds(predecessors(B));
for (BasicBlock *P : Preds) {
if (!DeadBlocks.count(P))
continue;
- if (llvm::is_contained(successors(P), B) &&
+ if (llvm::is_contained(successors(P), B) &&
isCriticalEdge(P->getTerminator(), B)) {
if (BasicBlock *S = splitCriticalEdges(P, B))
DeadBlocks.insert(P = S);
@@ -2893,7 +2893,7 @@ public:
auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
- auto *MSSAWP = getAnalysisIfAvailable<MemorySSAWrapperPass>();
+ auto *MSSAWP = getAnalysisIfAvailable<MemorySSAWrapperPass>();
return Impl.runImpl(
F, getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F),
getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
@@ -2903,8 +2903,8 @@ public:
? &getAnalysis<MemoryDependenceWrapperPass>().getMemDep()
: nullptr,
LIWP ? &LIWP->getLoopInfo() : nullptr,
- &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(),
- MSSAWP ? &MSSAWP->getMSSA() : nullptr);
+ &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(),
+ MSSAWP ? &MSSAWP->getMSSA() : nullptr);
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -2920,7 +2920,7 @@ public:
AU.addPreserved<TargetLibraryInfoWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
- AU.addPreserved<MemorySSAWrapperPass>();
+ AU.addPreserved<MemorySSAWrapperPass>();
}
private:
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/GVNHoist.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/GVNHoist.cpp
index 8d0bd56749..14f438c2c8 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/GVNHoist.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/GVNHoist.cpp
@@ -242,14 +242,14 @@ public:
};
static void combineKnownMetadata(Instruction *ReplInst, Instruction *I) {
- static const unsigned KnownIDs[] = {LLVMContext::MD_tbaa,
- LLVMContext::MD_alias_scope,
- LLVMContext::MD_noalias,
- LLVMContext::MD_range,
- LLVMContext::MD_fpmath,
- LLVMContext::MD_invariant_load,
- LLVMContext::MD_invariant_group,
- LLVMContext::MD_access_group};
+ static const unsigned KnownIDs[] = {LLVMContext::MD_tbaa,
+ LLVMContext::MD_alias_scope,
+ LLVMContext::MD_noalias,
+ LLVMContext::MD_range,
+ LLVMContext::MD_fpmath,
+ LLVMContext::MD_invariant_load,
+ LLVMContext::MD_invariant_group,
+ LLVMContext::MD_access_group};
combineMetadata(ReplInst, I, KnownIDs, true);
}
@@ -263,7 +263,7 @@ public:
: DT(DT), PDT(PDT), AA(AA), MD(MD), MSSA(MSSA),
MSSAUpdater(std::make_unique<MemorySSAUpdater>(MSSA)) {}
- bool run(Function &F);
+ bool run(Function &F);
// Copied from NewGVN.cpp
// This function provides global ranking of operations so that we can place
@@ -271,7 +271,7 @@ public:
// for a complete ordering, as constants all have the same rank. However,
// generally, we will simplify an operation with all constants so that it
// doesn't matter what order they appear in.
- unsigned int rank(const Value *V) const;
+ unsigned int rank(const Value *V) const;
private:
GVN::ValueTable VN;
@@ -291,7 +291,7 @@ private:
enum InsKind { Unknown, Scalar, Load, Store };
// Return true when there are exception handling in BB.
- bool hasEH(const BasicBlock *BB);
+ bool hasEH(const BasicBlock *BB);
// Return true when I1 appears before I2 in the instructions of BB.
bool firstInBB(const Instruction *I1, const Instruction *I2) {
@@ -304,10 +304,10 @@ private:
// Return true when there are memory uses of Def in BB.
bool hasMemoryUse(const Instruction *NewPt, MemoryDef *Def,
- const BasicBlock *BB);
+ const BasicBlock *BB);
bool hasEHhelper(const BasicBlock *BB, const BasicBlock *SrcBB,
- int &NBBsOnAllPaths);
+ int &NBBsOnAllPaths);
// Return true when there are exception handling or loads of memory Def
// between Def and NewPt. This function is only called for stores: Def is
@@ -317,19 +317,19 @@ private:
// return true when the counter NBBsOnAllPaths reaces 0, except when it is
// initialized to -1 which is unlimited.
bool hasEHOrLoadsOnPath(const Instruction *NewPt, MemoryDef *Def,
- int &NBBsOnAllPaths);
+ int &NBBsOnAllPaths);
// Return true when there are exception handling between HoistPt and BB.
// Decrement by 1 NBBsOnAllPaths for each block between HoistPt and BB, and
// return true when the counter NBBsOnAllPaths reaches 0, except when it is
// initialized to -1 which is unlimited.
bool hasEHOnPath(const BasicBlock *HoistPt, const BasicBlock *SrcBB,
- int &NBBsOnAllPaths);
+ int &NBBsOnAllPaths);
// Return true when it is safe to hoist a memory load or store U from OldPt
// to NewPt.
bool safeToHoistLdSt(const Instruction *NewPt, const Instruction *OldPt,
- MemoryUseOrDef *U, InsKind K, int &NBBsOnAllPaths);
+ MemoryUseOrDef *U, InsKind K, int &NBBsOnAllPaths);
// Return true when it is safe to hoist scalar instructions from all blocks in
// WL to HoistBB.
@@ -352,21 +352,21 @@ private:
// Returns the edge via which an instruction in BB will get the values from.
// Returns true when the values are flowing out to each edge.
- bool valueAnticipable(CHIArgs C, Instruction *TI) const;
+ bool valueAnticipable(CHIArgs C, Instruction *TI) const;
// Check if it is safe to hoist values tracked by CHI in the range
// [Begin, End) and accumulate them in Safe.
void checkSafety(CHIArgs C, BasicBlock *BB, InsKind K,
- SmallVectorImpl<CHIArg> &Safe);
+ SmallVectorImpl<CHIArg> &Safe);
using RenameStackType = DenseMap<VNType, SmallVector<Instruction *, 2>>;
// Push all the VNs corresponding to BB into RenameStack.
void fillRenameStack(BasicBlock *BB, InValuesType &ValueBBs,
- RenameStackType &RenameStack);
+ RenameStackType &RenameStack);
void fillChiArgs(BasicBlock *BB, OutValuesType &CHIBBs,
- RenameStackType &RenameStack);
+ RenameStackType &RenameStack);
// Walk the post-dominator tree top-down and use a stack for each value to
// store the last value you see. When you hit a CHI from a given edge, the
@@ -396,7 +396,7 @@ private:
// they form a list of anticipable values. OutValues contains CHIs
// corresponding to each basic block.
void findHoistableCandidates(OutValuesType &CHIBBs, InsKind K,
- HoistingPointList &HPL);
+ HoistingPointList &HPL);
// Compute insertion points for each values which can be fully anticipated at
// a dominator. HPL contains all such values.
@@ -454,14 +454,14 @@ private:
}
// Insert empty CHI node for this VN. This is used to factor out
// basic blocks where the ANTIC can potentially change.
- CHIArg EmptyChi = {VN, nullptr, nullptr};
- for (auto *IDFBB : IDFBlocks) {
+ CHIArg EmptyChi = {VN, nullptr, nullptr};
+ for (auto *IDFBB : IDFBlocks) {
for (unsigned i = 0; i < V.size(); ++i) {
- // Ignore spurious PDFs.
- if (DT->properlyDominates(IDFBB, V[i]->getParent())) {
- OutValue[IDFBB].push_back(EmptyChi);
- LLVM_DEBUG(dbgs() << "\nInserting a CHI for BB: "
- << IDFBB->getName() << ", for Insn: " << *V[i]);
+ // Ignore spurious PDFs.
+ if (DT->properlyDominates(IDFBB, V[i]->getParent())) {
+ OutValue[IDFBB].push_back(EmptyChi);
+ LLVM_DEBUG(dbgs() << "\nInserting a CHI for BB: "
+ << IDFBB->getName() << ", for Insn: " << *V[i]);
}
}
}
@@ -479,755 +479,755 @@ private:
// a load without hoisting its access function. So before hoisting any
// expression, make sure that all its operands are available at insert point.
bool allOperandsAvailable(const Instruction *I,
- const BasicBlock *HoistPt) const;
+ const BasicBlock *HoistPt) const;
// Same as allOperandsAvailable with recursive check for GEP operands.
bool allGepOperandsAvailable(const Instruction *I,
- const BasicBlock *HoistPt) const;
+ const BasicBlock *HoistPt) const;
// Make all operands of the GEP available.
void makeGepsAvailable(Instruction *Repl, BasicBlock *HoistPt,
const SmallVecInsn &InstructionsToHoist,
- Instruction *Gep) const;
-
- void updateAlignment(Instruction *I, Instruction *Repl);
-
- // Remove all the instructions in Candidates and replace their usage with
- // Repl. Returns the number of instructions removed.
- unsigned rauw(const SmallVecInsn &Candidates, Instruction *Repl,
- MemoryUseOrDef *NewMemAcc);
-
- // Replace all Memory PHI usage with NewMemAcc.
- void raMPHIuw(MemoryUseOrDef *NewMemAcc);
-
- // Remove all other instructions and replace them with Repl.
- unsigned removeAndReplace(const SmallVecInsn &Candidates, Instruction *Repl,
- BasicBlock *DestBB, bool MoveAccess);
-
- // In the case Repl is a load or a store, we make all their GEPs
- // available: GEPs are not hoisted by default to avoid the address
- // computations to be hoisted without the associated load or store.
- bool makeGepOperandsAvailable(Instruction *Repl, BasicBlock *HoistPt,
- const SmallVecInsn &InstructionsToHoist) const;
-
- std::pair<unsigned, unsigned> hoist(HoistingPointList &HPL);
-
- // Hoist all expressions. Returns Number of scalars hoisted
- // and number of non-scalars hoisted.
- std::pair<unsigned, unsigned> hoistExpressions(Function &F);
-};
-
-class GVNHoistLegacyPass : public FunctionPass {
-public:
- static char ID;
-
- GVNHoistLegacyPass() : FunctionPass(ID) {
- initializeGVNHoistLegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override {
- if (skipFunction(F))
- return false;
- auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
- auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
- auto &MD = getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
- auto &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
-
- GVNHoist G(&DT, &PDT, &AA, &MD, &MSSA);
- return G.run(F);
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<PostDominatorTreeWrapperPass>();
- AU.addRequired<AAResultsWrapperPass>();
- AU.addRequired<MemoryDependenceWrapperPass>();
- AU.addRequired<MemorySSAWrapperPass>();
- AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addPreserved<MemorySSAWrapperPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
- }
-};
-
-bool GVNHoist::run(Function &F) {
- NumFuncArgs = F.arg_size();
- VN.setDomTree(DT);
- VN.setAliasAnalysis(AA);
- VN.setMemDep(MD);
- bool Res = false;
- // Perform DFS Numbering of instructions.
- unsigned BBI = 0;
- for (const BasicBlock *BB : depth_first(&F.getEntryBlock())) {
- DFSNumber[BB] = ++BBI;
- unsigned I = 0;
- for (auto &Inst : *BB)
- DFSNumber[&Inst] = ++I;
- }
-
- int ChainLength = 0;
-
- // FIXME: use lazy evaluation of VN to avoid the fix-point computation.
- while (true) {
- if (MaxChainLength != -1 && ++ChainLength >= MaxChainLength)
- return Res;
-
- auto HoistStat = hoistExpressions(F);
- if (HoistStat.first + HoistStat.second == 0)
- return Res;
-
- if (HoistStat.second > 0)
- // To address a limitation of the current GVN, we need to rerun the
- // hoisting after we hoisted loads or stores in order to be able to
- // hoist all scalars dependent on the hoisted ld/st.
- VN.clear();
-
- Res = true;
- }
-
- return Res;
-}
-
-unsigned int GVNHoist::rank(const Value *V) const {
- // Prefer constants to undef to anything else
- // Undef is a constant, have to check it first.
- // Prefer smaller constants to constantexprs
- if (isa<ConstantExpr>(V))
- return 2;
- if (isa<UndefValue>(V))
- return 1;
- if (isa<Constant>(V))
- return 0;
- else if (auto *A = dyn_cast<Argument>(V))
- return 3 + A->getArgNo();
-
- // Need to shift the instruction DFS by number of arguments + 3 to account
- // for the constant and argument ranking above.
- auto Result = DFSNumber.lookup(V);
- if (Result > 0)
- return 4 + NumFuncArgs + Result;
- // Unreachable or something else, just return a really large number.
- return ~0;
-}
-
-bool GVNHoist::hasEH(const BasicBlock *BB) {
- auto It = BBSideEffects.find(BB);
- if (It != BBSideEffects.end())
- return It->second;
-
- if (BB->isEHPad() || BB->hasAddressTaken()) {
- BBSideEffects[BB] = true;
- return true;
- }
-
- if (BB->getTerminator()->mayThrow()) {
- BBSideEffects[BB] = true;
- return true;
- }
-
- BBSideEffects[BB] = false;
- return false;
-}
-
-bool GVNHoist::hasMemoryUse(const Instruction *NewPt, MemoryDef *Def,
- const BasicBlock *BB) {
- const MemorySSA::AccessList *Acc = MSSA->getBlockAccesses(BB);
- if (!Acc)
- return false;
-
- Instruction *OldPt = Def->getMemoryInst();
- const BasicBlock *OldBB = OldPt->getParent();
- const BasicBlock *NewBB = NewPt->getParent();
- bool ReachedNewPt = false;
-
- for (const MemoryAccess &MA : *Acc)
- if (const MemoryUse *MU = dyn_cast<MemoryUse>(&MA)) {
- Instruction *Insn = MU->getMemoryInst();
-
- // Do not check whether MU aliases Def when MU occurs after OldPt.
- if (BB == OldBB && firstInBB(OldPt, Insn))
- break;
-
- // Do not check whether MU aliases Def when MU occurs before NewPt.
- if (BB == NewBB) {
- if (!ReachedNewPt) {
- if (firstInBB(Insn, NewPt))
- continue;
- ReachedNewPt = true;
- }
+ Instruction *Gep) const;
+
+ void updateAlignment(Instruction *I, Instruction *Repl);
+
+ // Remove all the instructions in Candidates and replace their usage with
+ // Repl. Returns the number of instructions removed.
+ unsigned rauw(const SmallVecInsn &Candidates, Instruction *Repl,
+ MemoryUseOrDef *NewMemAcc);
+
+ // Replace all Memory PHI usage with NewMemAcc.
+ void raMPHIuw(MemoryUseOrDef *NewMemAcc);
+
+ // Remove all other instructions and replace them with Repl.
+ unsigned removeAndReplace(const SmallVecInsn &Candidates, Instruction *Repl,
+ BasicBlock *DestBB, bool MoveAccess);
+
+ // In the case Repl is a load or a store, we make all their GEPs
+ // available: GEPs are not hoisted by default to avoid the address
+ // computations to be hoisted without the associated load or store.
+ bool makeGepOperandsAvailable(Instruction *Repl, BasicBlock *HoistPt,
+ const SmallVecInsn &InstructionsToHoist) const;
+
+ std::pair<unsigned, unsigned> hoist(HoistingPointList &HPL);
+
+ // Hoist all expressions. Returns Number of scalars hoisted
+ // and number of non-scalars hoisted.
+ std::pair<unsigned, unsigned> hoistExpressions(Function &F);
+};
+
+class GVNHoistLegacyPass : public FunctionPass {
+public:
+ static char ID;
+
+ GVNHoistLegacyPass() : FunctionPass(ID) {
+ initializeGVNHoistLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override {
+ if (skipFunction(F))
+ return false;
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
+ auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
+ auto &MD = getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
+ auto &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
+
+ GVNHoist G(&DT, &PDT, &AA, &MD, &MSSA);
+ return G.run(F);
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<PostDominatorTreeWrapperPass>();
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addRequired<MemoryDependenceWrapperPass>();
+ AU.addRequired<MemorySSAWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addPreserved<MemorySSAWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ }
+};
+
+bool GVNHoist::run(Function &F) {
+ NumFuncArgs = F.arg_size();
+ VN.setDomTree(DT);
+ VN.setAliasAnalysis(AA);
+ VN.setMemDep(MD);
+ bool Res = false;
+ // Perform DFS Numbering of instructions.
+ unsigned BBI = 0;
+ for (const BasicBlock *BB : depth_first(&F.getEntryBlock())) {
+ DFSNumber[BB] = ++BBI;
+ unsigned I = 0;
+ for (auto &Inst : *BB)
+ DFSNumber[&Inst] = ++I;
+ }
+
+ int ChainLength = 0;
+
+ // FIXME: use lazy evaluation of VN to avoid the fix-point computation.
+ while (true) {
+ if (MaxChainLength != -1 && ++ChainLength >= MaxChainLength)
+ return Res;
+
+ auto HoistStat = hoistExpressions(F);
+ if (HoistStat.first + HoistStat.second == 0)
+ return Res;
+
+ if (HoistStat.second > 0)
+ // To address a limitation of the current GVN, we need to rerun the
+ // hoisting after we hoisted loads or stores in order to be able to
+ // hoist all scalars dependent on the hoisted ld/st.
+ VN.clear();
+
+ Res = true;
+ }
+
+ return Res;
+}
+
+unsigned int GVNHoist::rank(const Value *V) const {
+ // Prefer constants to undef to anything else
+ // Undef is a constant, have to check it first.
+ // Prefer smaller constants to constantexprs
+ if (isa<ConstantExpr>(V))
+ return 2;
+ if (isa<UndefValue>(V))
+ return 1;
+ if (isa<Constant>(V))
+ return 0;
+ else if (auto *A = dyn_cast<Argument>(V))
+ return 3 + A->getArgNo();
+
+ // Need to shift the instruction DFS by number of arguments + 3 to account
+ // for the constant and argument ranking above.
+ auto Result = DFSNumber.lookup(V);
+ if (Result > 0)
+ return 4 + NumFuncArgs + Result;
+ // Unreachable or something else, just return a really large number.
+ return ~0;
+}
+
+bool GVNHoist::hasEH(const BasicBlock *BB) {
+ auto It = BBSideEffects.find(BB);
+ if (It != BBSideEffects.end())
+ return It->second;
+
+ if (BB->isEHPad() || BB->hasAddressTaken()) {
+ BBSideEffects[BB] = true;
+ return true;
+ }
+
+ if (BB->getTerminator()->mayThrow()) {
+ BBSideEffects[BB] = true;
+ return true;
+ }
+
+ BBSideEffects[BB] = false;
+ return false;
+}
+
+bool GVNHoist::hasMemoryUse(const Instruction *NewPt, MemoryDef *Def,
+ const BasicBlock *BB) {
+ const MemorySSA::AccessList *Acc = MSSA->getBlockAccesses(BB);
+ if (!Acc)
+ return false;
+
+ Instruction *OldPt = Def->getMemoryInst();
+ const BasicBlock *OldBB = OldPt->getParent();
+ const BasicBlock *NewBB = NewPt->getParent();
+ bool ReachedNewPt = false;
+
+ for (const MemoryAccess &MA : *Acc)
+ if (const MemoryUse *MU = dyn_cast<MemoryUse>(&MA)) {
+ Instruction *Insn = MU->getMemoryInst();
+
+ // Do not check whether MU aliases Def when MU occurs after OldPt.
+ if (BB == OldBB && firstInBB(OldPt, Insn))
+ break;
+
+ // Do not check whether MU aliases Def when MU occurs before NewPt.
+ if (BB == NewBB) {
+ if (!ReachedNewPt) {
+ if (firstInBB(Insn, NewPt))
+ continue;
+ ReachedNewPt = true;
+ }
}
- if (MemorySSAUtil::defClobbersUseOrDef(Def, MU, *AA))
- return true;
+ if (MemorySSAUtil::defClobbersUseOrDef(Def, MU, *AA))
+ return true;
+ }
+
+ return false;
+}
+
+bool GVNHoist::hasEHhelper(const BasicBlock *BB, const BasicBlock *SrcBB,
+ int &NBBsOnAllPaths) {
+ // Stop walk once the limit is reached.
+ if (NBBsOnAllPaths == 0)
+ return true;
+
+ // Impossible to hoist with exceptions on the path.
+ if (hasEH(BB))
+ return true;
+
+ // No such instruction after HoistBarrier in a basic block was
+ // selected for hoisting so instructions selected within basic block with
+ // a hoist barrier can be hoisted.
+ if ((BB != SrcBB) && HoistBarrier.count(BB))
+ return true;
+
+ return false;
+}
+
+bool GVNHoist::hasEHOrLoadsOnPath(const Instruction *NewPt, MemoryDef *Def,
+ int &NBBsOnAllPaths) {
+ const BasicBlock *NewBB = NewPt->getParent();
+ const BasicBlock *OldBB = Def->getBlock();
+ assert(DT->dominates(NewBB, OldBB) && "invalid path");
+ assert(DT->dominates(Def->getDefiningAccess()->getBlock(), NewBB) &&
+ "def does not dominate new hoisting point");
+
+ // Walk all basic blocks reachable in depth-first iteration on the inverse
+ // CFG from OldBB to NewBB. These blocks are all the blocks that may be
+ // executed between the execution of NewBB and OldBB. Hoisting an expression
+ // from OldBB into NewBB has to be safe on all execution paths.
+ for (auto I = idf_begin(OldBB), E = idf_end(OldBB); I != E;) {
+ const BasicBlock *BB = *I;
+ if (BB == NewBB) {
+ // Stop traversal when reaching HoistPt.
+ I.skipChildren();
+ continue;
}
- return false;
-}
-
-bool GVNHoist::hasEHhelper(const BasicBlock *BB, const BasicBlock *SrcBB,
- int &NBBsOnAllPaths) {
- // Stop walk once the limit is reached.
- if (NBBsOnAllPaths == 0)
- return true;
-
- // Impossible to hoist with exceptions on the path.
- if (hasEH(BB))
- return true;
-
- // No such instruction after HoistBarrier in a basic block was
- // selected for hoisting so instructions selected within basic block with
- // a hoist barrier can be hoisted.
- if ((BB != SrcBB) && HoistBarrier.count(BB))
- return true;
-
- return false;
-}
-
-bool GVNHoist::hasEHOrLoadsOnPath(const Instruction *NewPt, MemoryDef *Def,
- int &NBBsOnAllPaths) {
- const BasicBlock *NewBB = NewPt->getParent();
- const BasicBlock *OldBB = Def->getBlock();
- assert(DT->dominates(NewBB, OldBB) && "invalid path");
- assert(DT->dominates(Def->getDefiningAccess()->getBlock(), NewBB) &&
- "def does not dominate new hoisting point");
-
- // Walk all basic blocks reachable in depth-first iteration on the inverse
- // CFG from OldBB to NewBB. These blocks are all the blocks that may be
- // executed between the execution of NewBB and OldBB. Hoisting an expression
- // from OldBB into NewBB has to be safe on all execution paths.
- for (auto I = idf_begin(OldBB), E = idf_end(OldBB); I != E;) {
- const BasicBlock *BB = *I;
- if (BB == NewBB) {
- // Stop traversal when reaching HoistPt.
- I.skipChildren();
- continue;
- }
-
- if (hasEHhelper(BB, OldBB, NBBsOnAllPaths))
- return true;
-
- // Check that we do not move a store past loads.
- if (hasMemoryUse(NewPt, Def, BB))
- return true;
-
- // -1 is unlimited number of blocks on all paths.
- if (NBBsOnAllPaths != -1)
- --NBBsOnAllPaths;
-
- ++I;
+ if (hasEHhelper(BB, OldBB, NBBsOnAllPaths))
+ return true;
+
+ // Check that we do not move a store past loads.
+ if (hasMemoryUse(NewPt, Def, BB))
+ return true;
+
+ // -1 is unlimited number of blocks on all paths.
+ if (NBBsOnAllPaths != -1)
+ --NBBsOnAllPaths;
+
+ ++I;
}
- return false;
-}
-
-bool GVNHoist::hasEHOnPath(const BasicBlock *HoistPt, const BasicBlock *SrcBB,
- int &NBBsOnAllPaths) {
- assert(DT->dominates(HoistPt, SrcBB) && "Invalid path");
-
- // Walk all basic blocks reachable in depth-first iteration on
- // the inverse CFG from BBInsn to NewHoistPt. These blocks are all the
- // blocks that may be executed between the execution of NewHoistPt and
- // BBInsn. Hoisting an expression from BBInsn into NewHoistPt has to be safe
- // on all execution paths.
- for (auto I = idf_begin(SrcBB), E = idf_end(SrcBB); I != E;) {
- const BasicBlock *BB = *I;
- if (BB == HoistPt) {
- // Stop traversal when reaching NewHoistPt.
- I.skipChildren();
- continue;
+ return false;
+}
+
+bool GVNHoist::hasEHOnPath(const BasicBlock *HoistPt, const BasicBlock *SrcBB,
+ int &NBBsOnAllPaths) {
+ assert(DT->dominates(HoistPt, SrcBB) && "Invalid path");
+
+ // Walk all basic blocks reachable in depth-first iteration on
+ // the inverse CFG from BBInsn to NewHoistPt. These blocks are all the
+ // blocks that may be executed between the execution of NewHoistPt and
+ // BBInsn. Hoisting an expression from BBInsn into NewHoistPt has to be safe
+ // on all execution paths.
+ for (auto I = idf_begin(SrcBB), E = idf_end(SrcBB); I != E;) {
+ const BasicBlock *BB = *I;
+ if (BB == HoistPt) {
+ // Stop traversal when reaching NewHoistPt.
+ I.skipChildren();
+ continue;
}
-
- if (hasEHhelper(BB, SrcBB, NBBsOnAllPaths))
- return true;
-
- // -1 is unlimited number of blocks on all paths.
- if (NBBsOnAllPaths != -1)
- --NBBsOnAllPaths;
-
- ++I;
- }
-
- return false;
-}
-
-bool GVNHoist::safeToHoistLdSt(const Instruction *NewPt,
- const Instruction *OldPt, MemoryUseOrDef *U,
- GVNHoist::InsKind K, int &NBBsOnAllPaths) {
- // In place hoisting is safe.
- if (NewPt == OldPt)
- return true;
-
- const BasicBlock *NewBB = NewPt->getParent();
- const BasicBlock *OldBB = OldPt->getParent();
- const BasicBlock *UBB = U->getBlock();
-
- // Check for dependences on the Memory SSA.
- MemoryAccess *D = U->getDefiningAccess();
- BasicBlock *DBB = D->getBlock();
- if (DT->properlyDominates(NewBB, DBB))
- // Cannot move the load or store to NewBB above its definition in DBB.
- return false;
-
- if (NewBB == DBB && !MSSA->isLiveOnEntryDef(D))
- if (auto *UD = dyn_cast<MemoryUseOrDef>(D))
- if (!firstInBB(UD->getMemoryInst(), NewPt))
- // Cannot move the load or store to NewPt above its definition in D.
- return false;
-
- // Check for unsafe hoistings due to side effects.
- if (K == InsKind::Store) {
- if (hasEHOrLoadsOnPath(NewPt, cast<MemoryDef>(U), NBBsOnAllPaths))
- return false;
- } else if (hasEHOnPath(NewBB, OldBB, NBBsOnAllPaths))
- return false;
-
- if (UBB == NewBB) {
- if (DT->properlyDominates(DBB, NewBB))
- return true;
- assert(UBB == DBB);
- assert(MSSA->locallyDominates(D, U));
+
+ if (hasEHhelper(BB, SrcBB, NBBsOnAllPaths))
+ return true;
+
+ // -1 is unlimited number of blocks on all paths.
+ if (NBBsOnAllPaths != -1)
+ --NBBsOnAllPaths;
+
+ ++I;
}
- // No side effects: it is safe to hoist.
- return true;
-}
-
-bool GVNHoist::valueAnticipable(CHIArgs C, Instruction *TI) const {
- if (TI->getNumSuccessors() > (unsigned)size(C))
- return false; // Not enough args in this CHI.
-
- for (auto CHI : C) {
- // Find if all the edges have values flowing out of BB.
- if (!llvm::is_contained(successors(TI), CHI.Dest))
- return false;
+ return false;
+}
+
+bool GVNHoist::safeToHoistLdSt(const Instruction *NewPt,
+ const Instruction *OldPt, MemoryUseOrDef *U,
+ GVNHoist::InsKind K, int &NBBsOnAllPaths) {
+ // In place hoisting is safe.
+ if (NewPt == OldPt)
+ return true;
+
+ const BasicBlock *NewBB = NewPt->getParent();
+ const BasicBlock *OldBB = OldPt->getParent();
+ const BasicBlock *UBB = U->getBlock();
+
+ // Check for dependences on the Memory SSA.
+ MemoryAccess *D = U->getDefiningAccess();
+ BasicBlock *DBB = D->getBlock();
+ if (DT->properlyDominates(NewBB, DBB))
+ // Cannot move the load or store to NewBB above its definition in DBB.
+ return false;
+
+ if (NewBB == DBB && !MSSA->isLiveOnEntryDef(D))
+ if (auto *UD = dyn_cast<MemoryUseOrDef>(D))
+ if (!firstInBB(UD->getMemoryInst(), NewPt))
+ // Cannot move the load or store to NewPt above its definition in D.
+ return false;
+
+ // Check for unsafe hoistings due to side effects.
+ if (K == InsKind::Store) {
+ if (hasEHOrLoadsOnPath(NewPt, cast<MemoryDef>(U), NBBsOnAllPaths))
+ return false;
+ } else if (hasEHOnPath(NewBB, OldBB, NBBsOnAllPaths))
+ return false;
+
+ if (UBB == NewBB) {
+ if (DT->properlyDominates(DBB, NewBB))
+ return true;
+ assert(UBB == DBB);
+ assert(MSSA->locallyDominates(D, U));
}
- return true;
-}
-void GVNHoist::checkSafety(CHIArgs C, BasicBlock *BB, GVNHoist::InsKind K,
- SmallVectorImpl<CHIArg> &Safe) {
- int NumBBsOnAllPaths = MaxNumberOfBBSInPath;
- for (auto CHI : C) {
- Instruction *Insn = CHI.I;
- if (!Insn) // No instruction was inserted in this CHI.
- continue;
- if (K == InsKind::Scalar) {
- if (safeToHoistScalar(BB, Insn->getParent(), NumBBsOnAllPaths))
- Safe.push_back(CHI);
- } else {
- auto *T = BB->getTerminator();
- if (MemoryUseOrDef *UD = MSSA->getMemoryAccess(Insn))
- if (safeToHoistLdSt(T, Insn, UD, K, NumBBsOnAllPaths))
- Safe.push_back(CHI);
+ // No side effects: it is safe to hoist.
+ return true;
+}
+
+bool GVNHoist::valueAnticipable(CHIArgs C, Instruction *TI) const {
+ if (TI->getNumSuccessors() > (unsigned)size(C))
+ return false; // Not enough args in this CHI.
+
+ for (auto CHI : C) {
+ // Find if all the edges have values flowing out of BB.
+ if (!llvm::is_contained(successors(TI), CHI.Dest))
+ return false;
+ }
+ return true;
+}
+
+void GVNHoist::checkSafety(CHIArgs C, BasicBlock *BB, GVNHoist::InsKind K,
+ SmallVectorImpl<CHIArg> &Safe) {
+ int NumBBsOnAllPaths = MaxNumberOfBBSInPath;
+ for (auto CHI : C) {
+ Instruction *Insn = CHI.I;
+ if (!Insn) // No instruction was inserted in this CHI.
+ continue;
+ if (K == InsKind::Scalar) {
+ if (safeToHoistScalar(BB, Insn->getParent(), NumBBsOnAllPaths))
+ Safe.push_back(CHI);
+ } else {
+ auto *T = BB->getTerminator();
+ if (MemoryUseOrDef *UD = MSSA->getMemoryAccess(Insn))
+ if (safeToHoistLdSt(T, Insn, UD, K, NumBBsOnAllPaths))
+ Safe.push_back(CHI);
}
}
-}
-
-void GVNHoist::fillRenameStack(BasicBlock *BB, InValuesType &ValueBBs,
- GVNHoist::RenameStackType &RenameStack) {
- auto it1 = ValueBBs.find(BB);
- if (it1 != ValueBBs.end()) {
- // Iterate in reverse order to keep lower ranked values on the top.
- for (std::pair<VNType, Instruction *> &VI : reverse(it1->second)) {
- // Get the value of instruction I
- LLVM_DEBUG(dbgs() << "\nPushing on stack: " << *VI.second);
- RenameStack[VI.first].push_back(VI.second);
+}
+
+void GVNHoist::fillRenameStack(BasicBlock *BB, InValuesType &ValueBBs,
+ GVNHoist::RenameStackType &RenameStack) {
+ auto it1 = ValueBBs.find(BB);
+ if (it1 != ValueBBs.end()) {
+ // Iterate in reverse order to keep lower ranked values on the top.
+ for (std::pair<VNType, Instruction *> &VI : reverse(it1->second)) {
+ // Get the value of instruction I
+ LLVM_DEBUG(dbgs() << "\nPushing on stack: " << *VI.second);
+ RenameStack[VI.first].push_back(VI.second);
}
+ }
+}
+
+void GVNHoist::fillChiArgs(BasicBlock *BB, OutValuesType &CHIBBs,
+ GVNHoist::RenameStackType &RenameStack) {
+ // For each *predecessor* (because Post-DOM) of BB check if it has a CHI
+ for (auto Pred : predecessors(BB)) {
+ auto P = CHIBBs.find(Pred);
+ if (P == CHIBBs.end()) {
+ continue;
+ }
+ LLVM_DEBUG(dbgs() << "\nLooking at CHIs in: " << Pred->getName(););
+ // A CHI is found (BB -> Pred is an edge in the CFG)
+ // Pop the stack until Top(V) = Ve.
+ auto &VCHI = P->second;
+ for (auto It = VCHI.begin(), E = VCHI.end(); It != E;) {
+ CHIArg &C = *It;
+ if (!C.Dest) {
+ auto si = RenameStack.find(C.VN);
+ // The Basic Block where CHI is must dominate the value we want to
+ // track in a CHI. In the PDom walk, there can be values in the
+ // stack which are not control dependent e.g., nested loop.
+ if (si != RenameStack.end() && si->second.size() &&
+ DT->properlyDominates(Pred, si->second.back()->getParent())) {
+ C.Dest = BB; // Assign the edge
+ C.I = si->second.pop_back_val(); // Assign the argument
+ LLVM_DEBUG(dbgs()
+ << "\nCHI Inserted in BB: " << C.Dest->getName() << *C.I
+ << ", VN: " << C.VN.first << ", " << C.VN.second);
+ }
+ // Move to next CHI of a different value
+ It = std::find_if(It, VCHI.end(), [It](CHIArg &A) { return A != *It; });
+ } else
+ ++It;
+ }
+ }
+}
+
+void GVNHoist::findHoistableCandidates(OutValuesType &CHIBBs,
+ GVNHoist::InsKind K,
+ HoistingPointList &HPL) {
+ auto cmpVN = [](const CHIArg &A, const CHIArg &B) { return A.VN < B.VN; };
+
+ // CHIArgs now have the outgoing values, so check for anticipability and
+ // accumulate hoistable candidates in HPL.
+ for (std::pair<BasicBlock *, SmallVector<CHIArg, 2>> &A : CHIBBs) {
+ BasicBlock *BB = A.first;
+ SmallVectorImpl<CHIArg> &CHIs = A.second;
+ // Vector of PHIs contains PHIs for different instructions.
+ // Sort the args according to their VNs, such that identical
+ // instructions are together.
+ llvm::stable_sort(CHIs, cmpVN);
+ auto TI = BB->getTerminator();
+ auto B = CHIs.begin();
+ // [PreIt, PHIIt) form a range of CHIs which have identical VNs.
+ auto PHIIt = llvm::find_if(CHIs, [B](CHIArg &A) { return A != *B; });
+ auto PrevIt = CHIs.begin();
+ while (PrevIt != PHIIt) {
+ // Collect values which satisfy safety checks.
+ SmallVector<CHIArg, 2> Safe;
+ // We check for safety first because there might be multiple values in
+ // the same path, some of which are not safe to be hoisted, but overall
+ // each edge has at least one value which can be hoisted, making the
+ // value anticipable along that path.
+ checkSafety(make_range(PrevIt, PHIIt), BB, K, Safe);
+
+ // List of safe values should be anticipable at TI.
+ if (valueAnticipable(make_range(Safe.begin(), Safe.end()), TI)) {
+ HPL.push_back({BB, SmallVecInsn()});
+ SmallVecInsn &V = HPL.back().second;
+ for (auto B : Safe)
+ V.push_back(B.I);
+ }
+
+ // Check other VNs
+ PrevIt = PHIIt;
+ PHIIt = std::find_if(PrevIt, CHIs.end(),
+ [PrevIt](CHIArg &A) { return A != *PrevIt; });
+ }
}
-}
-
-void GVNHoist::fillChiArgs(BasicBlock *BB, OutValuesType &CHIBBs,
- GVNHoist::RenameStackType &RenameStack) {
- // For each *predecessor* (because Post-DOM) of BB check if it has a CHI
- for (auto Pred : predecessors(BB)) {
- auto P = CHIBBs.find(Pred);
- if (P == CHIBBs.end()) {
- continue;
- }
- LLVM_DEBUG(dbgs() << "\nLooking at CHIs in: " << Pred->getName(););
- // A CHI is found (BB -> Pred is an edge in the CFG)
- // Pop the stack until Top(V) = Ve.
- auto &VCHI = P->second;
- for (auto It = VCHI.begin(), E = VCHI.end(); It != E;) {
- CHIArg &C = *It;
- if (!C.Dest) {
- auto si = RenameStack.find(C.VN);
- // The Basic Block where CHI is must dominate the value we want to
- // track in a CHI. In the PDom walk, there can be values in the
- // stack which are not control dependent e.g., nested loop.
- if (si != RenameStack.end() && si->second.size() &&
- DT->properlyDominates(Pred, si->second.back()->getParent())) {
- C.Dest = BB; // Assign the edge
- C.I = si->second.pop_back_val(); // Assign the argument
- LLVM_DEBUG(dbgs()
- << "\nCHI Inserted in BB: " << C.Dest->getName() << *C.I
- << ", VN: " << C.VN.first << ", " << C.VN.second);
- }
- // Move to next CHI of a different value
- It = std::find_if(It, VCHI.end(), [It](CHIArg &A) { return A != *It; });
- } else
- ++It;
- }
- }
-}
-
-void GVNHoist::findHoistableCandidates(OutValuesType &CHIBBs,
- GVNHoist::InsKind K,
- HoistingPointList &HPL) {
- auto cmpVN = [](const CHIArg &A, const CHIArg &B) { return A.VN < B.VN; };
-
- // CHIArgs now have the outgoing values, so check for anticipability and
- // accumulate hoistable candidates in HPL.
- for (std::pair<BasicBlock *, SmallVector<CHIArg, 2>> &A : CHIBBs) {
- BasicBlock *BB = A.first;
- SmallVectorImpl<CHIArg> &CHIs = A.second;
- // Vector of PHIs contains PHIs for different instructions.
- // Sort the args according to their VNs, such that identical
- // instructions are together.
- llvm::stable_sort(CHIs, cmpVN);
- auto TI = BB->getTerminator();
- auto B = CHIs.begin();
- // [PreIt, PHIIt) form a range of CHIs which have identical VNs.
- auto PHIIt = llvm::find_if(CHIs, [B](CHIArg &A) { return A != *B; });
- auto PrevIt = CHIs.begin();
- while (PrevIt != PHIIt) {
- // Collect values which satisfy safety checks.
- SmallVector<CHIArg, 2> Safe;
- // We check for safety first because there might be multiple values in
- // the same path, some of which are not safe to be hoisted, but overall
- // each edge has at least one value which can be hoisted, making the
- // value anticipable along that path.
- checkSafety(make_range(PrevIt, PHIIt), BB, K, Safe);
-
- // List of safe values should be anticipable at TI.
- if (valueAnticipable(make_range(Safe.begin(), Safe.end()), TI)) {
- HPL.push_back({BB, SmallVecInsn()});
- SmallVecInsn &V = HPL.back().second;
- for (auto B : Safe)
- V.push_back(B.I);
- }
-
- // Check other VNs
- PrevIt = PHIIt;
- PHIIt = std::find_if(PrevIt, CHIs.end(),
- [PrevIt](CHIArg &A) { return A != *PrevIt; });
- }
- }
-}
-
-bool GVNHoist::allOperandsAvailable(const Instruction *I,
- const BasicBlock *HoistPt) const {
- for (const Use &Op : I->operands())
- if (const auto *Inst = dyn_cast<Instruction>(&Op))
- if (!DT->dominates(Inst->getParent(), HoistPt))
- return false;
-
- return true;
-}
-
-bool GVNHoist::allGepOperandsAvailable(const Instruction *I,
- const BasicBlock *HoistPt) const {
- for (const Use &Op : I->operands())
- if (const auto *Inst = dyn_cast<Instruction>(&Op))
- if (!DT->dominates(Inst->getParent(), HoistPt)) {
- if (const GetElementPtrInst *GepOp =
- dyn_cast<GetElementPtrInst>(Inst)) {
- if (!allGepOperandsAvailable(GepOp, HoistPt))
+}
+
+bool GVNHoist::allOperandsAvailable(const Instruction *I,
+ const BasicBlock *HoistPt) const {
+ for (const Use &Op : I->operands())
+ if (const auto *Inst = dyn_cast<Instruction>(&Op))
+ if (!DT->dominates(Inst->getParent(), HoistPt))
+ return false;
+
+ return true;
+}
+
+bool GVNHoist::allGepOperandsAvailable(const Instruction *I,
+ const BasicBlock *HoistPt) const {
+ for (const Use &Op : I->operands())
+ if (const auto *Inst = dyn_cast<Instruction>(&Op))
+ if (!DT->dominates(Inst->getParent(), HoistPt)) {
+ if (const GetElementPtrInst *GepOp =
+ dyn_cast<GetElementPtrInst>(Inst)) {
+ if (!allGepOperandsAvailable(GepOp, HoistPt))
return false;
- // Gep is available if all operands of GepOp are available.
- } else {
- // Gep is not available if it has operands other than GEPs that are
- // defined in blocks not dominating HoistPt.
+ // Gep is available if all operands of GepOp are available.
+ } else {
+ // Gep is not available if it has operands other than GEPs that are
+ // defined in blocks not dominating HoistPt.
return false;
- }
+ }
}
- return true;
-}
-
-void GVNHoist::makeGepsAvailable(Instruction *Repl, BasicBlock *HoistPt,
- const SmallVecInsn &InstructionsToHoist,
- Instruction *Gep) const {
- assert(allGepOperandsAvailable(Gep, HoistPt) && "GEP operands not available");
-
- Instruction *ClonedGep = Gep->clone();
- for (unsigned i = 0, e = Gep->getNumOperands(); i != e; ++i)
- if (Instruction *Op = dyn_cast<Instruction>(Gep->getOperand(i))) {
- // Check whether the operand is already available.
- if (DT->dominates(Op->getParent(), HoistPt))
- continue;
-
- // As a GEP can refer to other GEPs, recursively make all the operands
- // of this GEP available at HoistPt.
- if (GetElementPtrInst *GepOp = dyn_cast<GetElementPtrInst>(Op))
- makeGepsAvailable(ClonedGep, HoistPt, InstructionsToHoist, GepOp);
+ return true;
+}
+
+void GVNHoist::makeGepsAvailable(Instruction *Repl, BasicBlock *HoistPt,
+ const SmallVecInsn &InstructionsToHoist,
+ Instruction *Gep) const {
+ assert(allGepOperandsAvailable(Gep, HoistPt) && "GEP operands not available");
+
+ Instruction *ClonedGep = Gep->clone();
+ for (unsigned i = 0, e = Gep->getNumOperands(); i != e; ++i)
+ if (Instruction *Op = dyn_cast<Instruction>(Gep->getOperand(i))) {
+ // Check whether the operand is already available.
+ if (DT->dominates(Op->getParent(), HoistPt))
+ continue;
+
+ // As a GEP can refer to other GEPs, recursively make all the operands
+ // of this GEP available at HoistPt.
+ if (GetElementPtrInst *GepOp = dyn_cast<GetElementPtrInst>(Op))
+ makeGepsAvailable(ClonedGep, HoistPt, InstructionsToHoist, GepOp);
}
- // Copy Gep and replace its uses in Repl with ClonedGep.
- ClonedGep->insertBefore(HoistPt->getTerminator());
-
- // Conservatively discard any optimization hints, they may differ on the
- // other paths.
- ClonedGep->dropUnknownNonDebugMetadata();
-
- // If we have optimization hints which agree with each other along different
- // paths, preserve them.
- for (const Instruction *OtherInst : InstructionsToHoist) {
- const GetElementPtrInst *OtherGep;
- if (auto *OtherLd = dyn_cast<LoadInst>(OtherInst))
- OtherGep = cast<GetElementPtrInst>(OtherLd->getPointerOperand());
- else
- OtherGep = cast<GetElementPtrInst>(
- cast<StoreInst>(OtherInst)->getPointerOperand());
- ClonedGep->andIRFlags(OtherGep);
- }
-
- // Replace uses of Gep with ClonedGep in Repl.
- Repl->replaceUsesOfWith(Gep, ClonedGep);
-}
-
-void GVNHoist::updateAlignment(Instruction *I, Instruction *Repl) {
- if (auto *ReplacementLoad = dyn_cast<LoadInst>(Repl)) {
- ReplacementLoad->setAlignment(
- std::min(ReplacementLoad->getAlign(), cast<LoadInst>(I)->getAlign()));
- ++NumLoadsRemoved;
- } else if (auto *ReplacementStore = dyn_cast<StoreInst>(Repl)) {
- ReplacementStore->setAlignment(
- std::min(ReplacementStore->getAlign(), cast<StoreInst>(I)->getAlign()));
- ++NumStoresRemoved;
- } else if (auto *ReplacementAlloca = dyn_cast<AllocaInst>(Repl)) {
- ReplacementAlloca->setAlignment(std::max(ReplacementAlloca->getAlign(),
- cast<AllocaInst>(I)->getAlign()));
- } else if (isa<CallInst>(Repl)) {
- ++NumCallsRemoved;
+ // Copy Gep and replace its uses in Repl with ClonedGep.
+ ClonedGep->insertBefore(HoistPt->getTerminator());
+
+ // Conservatively discard any optimization hints, they may differ on the
+ // other paths.
+ ClonedGep->dropUnknownNonDebugMetadata();
+
+ // If we have optimization hints which agree with each other along different
+ // paths, preserve them.
+ for (const Instruction *OtherInst : InstructionsToHoist) {
+ const GetElementPtrInst *OtherGep;
+ if (auto *OtherLd = dyn_cast<LoadInst>(OtherInst))
+ OtherGep = cast<GetElementPtrInst>(OtherLd->getPointerOperand());
+ else
+ OtherGep = cast<GetElementPtrInst>(
+ cast<StoreInst>(OtherInst)->getPointerOperand());
+ ClonedGep->andIRFlags(OtherGep);
+ }
+
+ // Replace uses of Gep with ClonedGep in Repl.
+ Repl->replaceUsesOfWith(Gep, ClonedGep);
+}
+
+void GVNHoist::updateAlignment(Instruction *I, Instruction *Repl) {
+ if (auto *ReplacementLoad = dyn_cast<LoadInst>(Repl)) {
+ ReplacementLoad->setAlignment(
+ std::min(ReplacementLoad->getAlign(), cast<LoadInst>(I)->getAlign()));
+ ++NumLoadsRemoved;
+ } else if (auto *ReplacementStore = dyn_cast<StoreInst>(Repl)) {
+ ReplacementStore->setAlignment(
+ std::min(ReplacementStore->getAlign(), cast<StoreInst>(I)->getAlign()));
+ ++NumStoresRemoved;
+ } else if (auto *ReplacementAlloca = dyn_cast<AllocaInst>(Repl)) {
+ ReplacementAlloca->setAlignment(std::max(ReplacementAlloca->getAlign(),
+ cast<AllocaInst>(I)->getAlign()));
+ } else if (isa<CallInst>(Repl)) {
+ ++NumCallsRemoved;
}
-}
-
-unsigned GVNHoist::rauw(const SmallVecInsn &Candidates, Instruction *Repl,
- MemoryUseOrDef *NewMemAcc) {
- unsigned NR = 0;
- for (Instruction *I : Candidates) {
- if (I != Repl) {
- ++NR;
- updateAlignment(I, Repl);
- if (NewMemAcc) {
- // Update the uses of the old MSSA access with NewMemAcc.
- MemoryAccess *OldMA = MSSA->getMemoryAccess(I);
- OldMA->replaceAllUsesWith(NewMemAcc);
- MSSAUpdater->removeMemoryAccess(OldMA);
- }
-
- Repl->andIRFlags(I);
- combineKnownMetadata(Repl, I);
- I->replaceAllUsesWith(Repl);
- // Also invalidate the Alias Analysis cache.
- MD->removeInstruction(I);
- I->eraseFromParent();
+}
+
+unsigned GVNHoist::rauw(const SmallVecInsn &Candidates, Instruction *Repl,
+ MemoryUseOrDef *NewMemAcc) {
+ unsigned NR = 0;
+ for (Instruction *I : Candidates) {
+ if (I != Repl) {
+ ++NR;
+ updateAlignment(I, Repl);
+ if (NewMemAcc) {
+ // Update the uses of the old MSSA access with NewMemAcc.
+ MemoryAccess *OldMA = MSSA->getMemoryAccess(I);
+ OldMA->replaceAllUsesWith(NewMemAcc);
+ MSSAUpdater->removeMemoryAccess(OldMA);
+ }
+
+ Repl->andIRFlags(I);
+ combineKnownMetadata(Repl, I);
+ I->replaceAllUsesWith(Repl);
+ // Also invalidate the Alias Analysis cache.
+ MD->removeInstruction(I);
+ I->eraseFromParent();
+ }
+ }
+ return NR;
+}
+
+void GVNHoist::raMPHIuw(MemoryUseOrDef *NewMemAcc) {
+ SmallPtrSet<MemoryPhi *, 4> UsePhis;
+ for (User *U : NewMemAcc->users())
+ if (MemoryPhi *Phi = dyn_cast<MemoryPhi>(U))
+ UsePhis.insert(Phi);
+
+ for (MemoryPhi *Phi : UsePhis) {
+ auto In = Phi->incoming_values();
+ if (llvm::all_of(In, [&](Use &U) { return U == NewMemAcc; })) {
+ Phi->replaceAllUsesWith(NewMemAcc);
+ MSSAUpdater->removeMemoryAccess(Phi);
+ }
+ }
+}
+
+unsigned GVNHoist::removeAndReplace(const SmallVecInsn &Candidates,
+ Instruction *Repl, BasicBlock *DestBB,
+ bool MoveAccess) {
+ MemoryUseOrDef *NewMemAcc = MSSA->getMemoryAccess(Repl);
+ if (MoveAccess && NewMemAcc) {
+ // The definition of this ld/st will not change: ld/st hoisting is
+ // legal when the ld/st is not moved past its current definition.
+ MSSAUpdater->moveToPlace(NewMemAcc, DestBB, MemorySSA::BeforeTerminator);
+ }
+
+ // Replace all other instructions with Repl with memory access NewMemAcc.
+ unsigned NR = rauw(Candidates, Repl, NewMemAcc);
+
+ // Remove MemorySSA phi nodes with the same arguments.
+ if (NewMemAcc)
+ raMPHIuw(NewMemAcc);
+ return NR;
+}
+
+bool GVNHoist::makeGepOperandsAvailable(
+ Instruction *Repl, BasicBlock *HoistPt,
+ const SmallVecInsn &InstructionsToHoist) const {
+ // Check whether the GEP of a ld/st can be synthesized at HoistPt.
+ GetElementPtrInst *Gep = nullptr;
+ Instruction *Val = nullptr;
+ if (auto *Ld = dyn_cast<LoadInst>(Repl)) {
+ Gep = dyn_cast<GetElementPtrInst>(Ld->getPointerOperand());
+ } else if (auto *St = dyn_cast<StoreInst>(Repl)) {
+ Gep = dyn_cast<GetElementPtrInst>(St->getPointerOperand());
+ Val = dyn_cast<Instruction>(St->getValueOperand());
+ // Check that the stored value is available.
+ if (Val) {
+ if (isa<GetElementPtrInst>(Val)) {
+ // Check whether we can compute the GEP at HoistPt.
+ if (!allGepOperandsAvailable(Val, HoistPt))
+ return false;
+ } else if (!DT->dominates(Val->getParent(), HoistPt))
+ return false;
}
- }
- return NR;
-}
-
-void GVNHoist::raMPHIuw(MemoryUseOrDef *NewMemAcc) {
- SmallPtrSet<MemoryPhi *, 4> UsePhis;
- for (User *U : NewMemAcc->users())
- if (MemoryPhi *Phi = dyn_cast<MemoryPhi>(U))
- UsePhis.insert(Phi);
-
- for (MemoryPhi *Phi : UsePhis) {
- auto In = Phi->incoming_values();
- if (llvm::all_of(In, [&](Use &U) { return U == NewMemAcc; })) {
- Phi->replaceAllUsesWith(NewMemAcc);
- MSSAUpdater->removeMemoryAccess(Phi);
- }
- }
-}
-
-unsigned GVNHoist::removeAndReplace(const SmallVecInsn &Candidates,
- Instruction *Repl, BasicBlock *DestBB,
- bool MoveAccess) {
- MemoryUseOrDef *NewMemAcc = MSSA->getMemoryAccess(Repl);
- if (MoveAccess && NewMemAcc) {
- // The definition of this ld/st will not change: ld/st hoisting is
- // legal when the ld/st is not moved past its current definition.
- MSSAUpdater->moveToPlace(NewMemAcc, DestBB, MemorySSA::BeforeTerminator);
- }
-
- // Replace all other instructions with Repl with memory access NewMemAcc.
- unsigned NR = rauw(Candidates, Repl, NewMemAcc);
-
- // Remove MemorySSA phi nodes with the same arguments.
- if (NewMemAcc)
- raMPHIuw(NewMemAcc);
- return NR;
-}
-
-bool GVNHoist::makeGepOperandsAvailable(
- Instruction *Repl, BasicBlock *HoistPt,
- const SmallVecInsn &InstructionsToHoist) const {
- // Check whether the GEP of a ld/st can be synthesized at HoistPt.
- GetElementPtrInst *Gep = nullptr;
- Instruction *Val = nullptr;
- if (auto *Ld = dyn_cast<LoadInst>(Repl)) {
- Gep = dyn_cast<GetElementPtrInst>(Ld->getPointerOperand());
- } else if (auto *St = dyn_cast<StoreInst>(Repl)) {
- Gep = dyn_cast<GetElementPtrInst>(St->getPointerOperand());
- Val = dyn_cast<Instruction>(St->getValueOperand());
- // Check that the stored value is available.
- if (Val) {
- if (isa<GetElementPtrInst>(Val)) {
- // Check whether we can compute the GEP at HoistPt.
- if (!allGepOperandsAvailable(Val, HoistPt))
- return false;
- } else if (!DT->dominates(Val->getParent(), HoistPt))
- return false;
- }
- }
-
- // Check whether we can compute the Gep at HoistPt.
- if (!Gep || !allGepOperandsAvailable(Gep, HoistPt))
- return false;
-
- makeGepsAvailable(Repl, HoistPt, InstructionsToHoist, Gep);
-
- if (Val && isa<GetElementPtrInst>(Val))
- makeGepsAvailable(Repl, HoistPt, InstructionsToHoist, Val);
-
- return true;
-}
-
-std::pair<unsigned, unsigned> GVNHoist::hoist(HoistingPointList &HPL) {
- unsigned NI = 0, NL = 0, NS = 0, NC = 0, NR = 0;
- for (const HoistingPointInfo &HP : HPL) {
- // Find out whether we already have one of the instructions in HoistPt,
- // in which case we do not have to move it.
- BasicBlock *DestBB = HP.first;
- const SmallVecInsn &InstructionsToHoist = HP.second;
- Instruction *Repl = nullptr;
- for (Instruction *I : InstructionsToHoist)
- if (I->getParent() == DestBB)
- // If there are two instructions in HoistPt to be hoisted in place:
- // update Repl to be the first one, such that we can rename the uses
- // of the second based on the first.
- if (!Repl || firstInBB(I, Repl))
- Repl = I;
-
- // Keep track of whether we moved the instruction so we know whether we
- // should move the MemoryAccess.
- bool MoveAccess = true;
- if (Repl) {
- // Repl is already in HoistPt: it remains in place.
- assert(allOperandsAvailable(Repl, DestBB) &&
- "instruction depends on operands that are not available");
- MoveAccess = false;
- } else {
- // When we do not find Repl in HoistPt, select the first in the list
- // and move it to HoistPt.
- Repl = InstructionsToHoist.front();
-
- // We can move Repl in HoistPt only when all operands are available.
- // The order in which hoistings are done may influence the availability
- // of operands.
- if (!allOperandsAvailable(Repl, DestBB)) {
- // When HoistingGeps there is nothing more we can do to make the
- // operands available: just continue.
- if (HoistingGeps)
- continue;
-
- // When not HoistingGeps we need to copy the GEPs.
- if (!makeGepOperandsAvailable(Repl, DestBB, InstructionsToHoist))
- continue;
+ }
+
+ // Check whether we can compute the Gep at HoistPt.
+ if (!Gep || !allGepOperandsAvailable(Gep, HoistPt))
+ return false;
+
+ makeGepsAvailable(Repl, HoistPt, InstructionsToHoist, Gep);
+
+ if (Val && isa<GetElementPtrInst>(Val))
+ makeGepsAvailable(Repl, HoistPt, InstructionsToHoist, Val);
+
+ return true;
+}
+
+std::pair<unsigned, unsigned> GVNHoist::hoist(HoistingPointList &HPL) {
+ unsigned NI = 0, NL = 0, NS = 0, NC = 0, NR = 0;
+ for (const HoistingPointInfo &HP : HPL) {
+ // Find out whether we already have one of the instructions in HoistPt,
+ // in which case we do not have to move it.
+ BasicBlock *DestBB = HP.first;
+ const SmallVecInsn &InstructionsToHoist = HP.second;
+ Instruction *Repl = nullptr;
+ for (Instruction *I : InstructionsToHoist)
+ if (I->getParent() == DestBB)
+ // If there are two instructions in HoistPt to be hoisted in place:
+ // update Repl to be the first one, such that we can rename the uses
+ // of the second based on the first.
+ if (!Repl || firstInBB(I, Repl))
+ Repl = I;
+
+ // Keep track of whether we moved the instruction so we know whether we
+ // should move the MemoryAccess.
+ bool MoveAccess = true;
+ if (Repl) {
+ // Repl is already in HoistPt: it remains in place.
+ assert(allOperandsAvailable(Repl, DestBB) &&
+ "instruction depends on operands that are not available");
+ MoveAccess = false;
+ } else {
+ // When we do not find Repl in HoistPt, select the first in the list
+ // and move it to HoistPt.
+ Repl = InstructionsToHoist.front();
+
+ // We can move Repl in HoistPt only when all operands are available.
+ // The order in which hoistings are done may influence the availability
+ // of operands.
+ if (!allOperandsAvailable(Repl, DestBB)) {
+ // When HoistingGeps there is nothing more we can do to make the
+ // operands available: just continue.
+ if (HoistingGeps)
+ continue;
+
+ // When not HoistingGeps we need to copy the GEPs.
+ if (!makeGepOperandsAvailable(Repl, DestBB, InstructionsToHoist))
+ continue;
}
-
- // Move the instruction at the end of HoistPt.
- Instruction *Last = DestBB->getTerminator();
- MD->removeInstruction(Repl);
- Repl->moveBefore(Last);
-
- DFSNumber[Repl] = DFSNumber[Last]++;
+
+ // Move the instruction at the end of HoistPt.
+ Instruction *Last = DestBB->getTerminator();
+ MD->removeInstruction(Repl);
+ Repl->moveBefore(Last);
+
+ DFSNumber[Repl] = DFSNumber[Last]++;
}
- NR += removeAndReplace(InstructionsToHoist, Repl, DestBB, MoveAccess);
-
- if (isa<LoadInst>(Repl))
- ++NL;
- else if (isa<StoreInst>(Repl))
- ++NS;
- else if (isa<CallInst>(Repl))
- ++NC;
- else // Scalar
- ++NI;
+ NR += removeAndReplace(InstructionsToHoist, Repl, DestBB, MoveAccess);
+
+ if (isa<LoadInst>(Repl))
+ ++NL;
+ else if (isa<StoreInst>(Repl))
+ ++NS;
+ else if (isa<CallInst>(Repl))
+ ++NC;
+ else // Scalar
+ ++NI;
}
- if (MSSA && VerifyMemorySSA)
- MSSA->verifyMemorySSA();
-
- NumHoisted += NL + NS + NC + NI;
- NumRemoved += NR;
- NumLoadsHoisted += NL;
- NumStoresHoisted += NS;
- NumCallsHoisted += NC;
- return {NI, NL + NC + NS};
-}
-
-std::pair<unsigned, unsigned> GVNHoist::hoistExpressions(Function &F) {
- InsnInfo II;
- LoadInfo LI;
- StoreInfo SI;
- CallInfo CI;
- for (BasicBlock *BB : depth_first(&F.getEntryBlock())) {
- int InstructionNb = 0;
- for (Instruction &I1 : *BB) {
- // If I1 cannot guarantee progress, subsequent instructions
- // in BB cannot be hoisted anyways.
- if (!isGuaranteedToTransferExecutionToSuccessor(&I1)) {
- HoistBarrier.insert(BB);
- break;
- }
- // Only hoist the first instructions in BB up to MaxDepthInBB. Hoisting
- // deeper may increase the register pressure and compilation time.
- if (MaxDepthInBB != -1 && InstructionNb++ >= MaxDepthInBB)
- break;
-
- // Do not value number terminator instructions.
- if (I1.isTerminator())
- break;
-
- if (auto *Load = dyn_cast<LoadInst>(&I1))
- LI.insert(Load, VN);
- else if (auto *Store = dyn_cast<StoreInst>(&I1))
- SI.insert(Store, VN);
- else if (auto *Call = dyn_cast<CallInst>(&I1)) {
- if (auto *Intr = dyn_cast<IntrinsicInst>(Call)) {
- if (isa<DbgInfoIntrinsic>(Intr) ||
- Intr->getIntrinsicID() == Intrinsic::assume ||
- Intr->getIntrinsicID() == Intrinsic::sideeffect)
- continue;
- }
- if (Call->mayHaveSideEffects())
- break;
-
- if (Call->isConvergent())
- break;
-
- CI.insert(Call, VN);
- } else if (HoistingGeps || !isa<GetElementPtrInst>(&I1))
- // Do not hoist scalars past calls that may write to memory because
- // that could result in spills later. geps are handled separately.
- // TODO: We can relax this for targets like AArch64 as they have more
- // registers than X86.
- II.insert(&I1, VN);
- }
+ if (MSSA && VerifyMemorySSA)
+ MSSA->verifyMemorySSA();
+
+ NumHoisted += NL + NS + NC + NI;
+ NumRemoved += NR;
+ NumLoadsHoisted += NL;
+ NumStoresHoisted += NS;
+ NumCallsHoisted += NC;
+ return {NI, NL + NC + NS};
+}
+
+std::pair<unsigned, unsigned> GVNHoist::hoistExpressions(Function &F) {
+ InsnInfo II;
+ LoadInfo LI;
+ StoreInfo SI;
+ CallInfo CI;
+ for (BasicBlock *BB : depth_first(&F.getEntryBlock())) {
+ int InstructionNb = 0;
+ for (Instruction &I1 : *BB) {
+ // If I1 cannot guarantee progress, subsequent instructions
+ // in BB cannot be hoisted anyways.
+ if (!isGuaranteedToTransferExecutionToSuccessor(&I1)) {
+ HoistBarrier.insert(BB);
+ break;
+ }
+ // Only hoist the first instructions in BB up to MaxDepthInBB. Hoisting
+ // deeper may increase the register pressure and compilation time.
+ if (MaxDepthInBB != -1 && InstructionNb++ >= MaxDepthInBB)
+ break;
+
+ // Do not value number terminator instructions.
+ if (I1.isTerminator())
+ break;
+
+ if (auto *Load = dyn_cast<LoadInst>(&I1))
+ LI.insert(Load, VN);
+ else if (auto *Store = dyn_cast<StoreInst>(&I1))
+ SI.insert(Store, VN);
+ else if (auto *Call = dyn_cast<CallInst>(&I1)) {
+ if (auto *Intr = dyn_cast<IntrinsicInst>(Call)) {
+ if (isa<DbgInfoIntrinsic>(Intr) ||
+ Intr->getIntrinsicID() == Intrinsic::assume ||
+ Intr->getIntrinsicID() == Intrinsic::sideeffect)
+ continue;
+ }
+ if (Call->mayHaveSideEffects())
+ break;
+
+ if (Call->isConvergent())
+ break;
+
+ CI.insert(Call, VN);
+ } else if (HoistingGeps || !isa<GetElementPtrInst>(&I1))
+ // Do not hoist scalars past calls that may write to memory because
+ // that could result in spills later. geps are handled separately.
+ // TODO: We can relax this for targets like AArch64 as they have more
+ // registers than X86.
+ II.insert(&I1, VN);
+ }
}
- HoistingPointList HPL;
- computeInsertionPoints(II.getVNTable(), HPL, InsKind::Scalar);
- computeInsertionPoints(LI.getVNTable(), HPL, InsKind::Load);
- computeInsertionPoints(SI.getVNTable(), HPL, InsKind::Store);
- computeInsertionPoints(CI.getScalarVNTable(), HPL, InsKind::Scalar);
- computeInsertionPoints(CI.getLoadVNTable(), HPL, InsKind::Load);
- computeInsertionPoints(CI.getStoreVNTable(), HPL, InsKind::Store);
- return hoist(HPL);
-}
-
+ HoistingPointList HPL;
+ computeInsertionPoints(II.getVNTable(), HPL, InsKind::Scalar);
+ computeInsertionPoints(LI.getVNTable(), HPL, InsKind::Load);
+ computeInsertionPoints(SI.getVNTable(), HPL, InsKind::Store);
+ computeInsertionPoints(CI.getScalarVNTable(), HPL, InsKind::Scalar);
+ computeInsertionPoints(CI.getLoadVNTable(), HPL, InsKind::Load);
+ computeInsertionPoints(CI.getStoreVNTable(), HPL, InsKind::Store);
+ return hoist(HPL);
+}
+
} // end namespace llvm
PreservedAnalyses GVNHoistPass::run(Function &F, FunctionAnalysisManager &AM) {
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/GVNSink.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/GVNSink.cpp
index aef927ab65..35ad503e23 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/GVNSink.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/GVNSink.cpp
@@ -158,7 +158,7 @@ public:
void restrictToBlocks(SmallSetVector<BasicBlock *, 4> &Blocks) {
for (auto II = Insts.begin(); II != Insts.end();) {
- if (!llvm::is_contained(Blocks, (*II)->getParent())) {
+ if (!llvm::is_contained(Blocks, (*II)->getParent())) {
ActiveBlocks.remove((*II)->getParent());
II = Insts.erase(II);
} else {
@@ -276,7 +276,7 @@ public:
auto VI = Values.begin();
while (BI != Blocks.end()) {
assert(VI != Values.end());
- if (!llvm::is_contained(NewBlocks, *BI)) {
+ if (!llvm::is_contained(NewBlocks, *BI)) {
BI = Blocks.erase(BI);
VI = Values.erase(VI);
} else {
@@ -692,7 +692,7 @@ Optional<SinkingInstructionCandidate> GVNSink::analyzeInstructionForSinking(
ModelledPHI NewPHI(NewInsts, ActivePreds);
// Does sinking this instruction render previous PHIs redundant?
- if (NeededPHIs.erase(NewPHI))
+ if (NeededPHIs.erase(NewPHI))
RecomputePHIContents = true;
if (RecomputePHIContents) {
@@ -754,7 +754,7 @@ Optional<SinkingInstructionCandidate> GVNSink::analyzeInstructionForSinking(
Cand.NumMemoryInsts = MemoryInstNum;
Cand.NumBlocks = ActivePreds.size();
Cand.NumPHIs = NeededPHIs.size();
- append_range(Cand.Blocks, ActivePreds);
+ append_range(Cand.Blocks, ActivePreds);
return Cand;
}
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/GuardWidening.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/GuardWidening.cpp
index 61eb4ce0ed..80e644fc4f 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/GuardWidening.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/GuardWidening.cpp
@@ -347,8 +347,8 @@ bool GuardWideningImpl::eliminateInstrViaWidening(
const auto &GuardsInCurBB = GuardsInBlock.find(CurBB)->second;
auto I = GuardsInCurBB.begin();
- auto E = Instr->getParent() == CurBB ? find(GuardsInCurBB, Instr)
- : GuardsInCurBB.end();
+ auto E = Instr->getParent() == CurBB ? find(GuardsInCurBB, Instr)
+ : GuardsInCurBB.end();
#ifndef NDEBUG
{
@@ -665,12 +665,12 @@ bool GuardWideningImpl::combineRangeChecks(
};
copy_if(Checks, std::back_inserter(CurrentChecks), IsCurrentCheck);
- erase_if(Checks, IsCurrentCheck);
+ erase_if(Checks, IsCurrentCheck);
assert(CurrentChecks.size() != 0 && "We know we have at least one!");
if (CurrentChecks.size() < 3) {
- llvm::append_range(RangeChecksOut, CurrentChecks);
+ llvm::append_range(RangeChecksOut, CurrentChecks);
continue;
}
@@ -698,7 +698,7 @@ bool GuardWideningImpl::combineRangeChecks(
return (HighOffset - RC.getOffsetValue()).ult(MaxDiff);
};
- if (MaxDiff.isMinValue() || !all_of(drop_begin(CurrentChecks), OffsetOK))
+ if (MaxDiff.isMinValue() || !all_of(drop_begin(CurrentChecks), OffsetOK))
return false;
// We have a series of f+1 checks as:
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/IndVarSimplify.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/IndVarSimplify.cpp
index ae1fff0fa8..29c45e83b9 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -131,10 +131,10 @@ static cl::opt<bool>
LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
cl::desc("Predicate conditions in read only loops"));
-static cl::opt<bool>
-AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true),
- cl::desc("Allow widening of indvars to eliminate s/zext"));
-
+static cl::opt<bool>
+AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true),
+ cl::desc("Allow widening of indvars to eliminate s/zext"));
+
namespace {
struct RewritePhi;
@@ -149,7 +149,7 @@ class IndVarSimplify {
std::unique_ptr<MemorySSAUpdater> MSSAU;
SmallVector<WeakTrackingVH, 16> DeadInsts;
- bool WidenIndVars;
+ bool WidenIndVars;
bool handleFloatingPointIV(Loop *L, PHINode *PH);
bool rewriteNonIntegerIVs(Loop *L);
@@ -172,9 +172,9 @@ class IndVarSimplify {
public:
IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
const DataLayout &DL, TargetLibraryInfo *TLI,
- TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars)
- : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI),
- WidenIndVars(WidenIndVars) {
+ TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars)
+ : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI),
+ WidenIndVars(WidenIndVars) {
if (MSSA)
MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
}
@@ -508,8 +508,8 @@ bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
/// Update information about the induction variable that is extended by this
/// sign or zero extend operation. This is used to determine the final width of
/// the IV before actually widening it.
-static void visitIVCast(CastInst *Cast, WideIVInfo &WI,
- ScalarEvolution *SE,
+static void visitIVCast(CastInst *Cast, WideIVInfo &WI,
+ ScalarEvolution *SE,
const TargetTransformInfo *TTI) {
bool IsSigned = Cast->getOpcode() == Instruction::SExt;
if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
@@ -631,18 +631,18 @@ bool IndVarSimplify::simplifyAndExtend(Loop *L,
}
} while(!LoopPhis.empty());
- // Continue if we disallowed widening.
- if (!WidenIndVars)
- continue;
-
+ // Continue if we disallowed widening.
+ if (!WidenIndVars)
+ continue;
+
for (; !WideIVs.empty(); WideIVs.pop_back()) {
- unsigned ElimExt;
- unsigned Widened;
- if (PHINode *WidePhi = createWideIV(WideIVs.back(), LI, SE, Rewriter,
- DT, DeadInsts, ElimExt, Widened,
- HasGuards, UsePostIncrementRanges)) {
- NumElimExt += ElimExt;
- NumWidened += Widened;
+ unsigned ElimExt;
+ unsigned Widened;
+ if (PHINode *WidePhi = createWideIV(WideIVs.back(), LI, SE, Rewriter,
+ DT, DeadInsts, ElimExt, Widened,
+ HasGuards, UsePostIncrementRanges)) {
+ NumElimExt += ElimExt;
+ NumWidened += Widened;
Changed = true;
LoopPhis.push_back(WidePhi);
}
@@ -785,7 +785,7 @@ static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
// If we can't analyze propagation through this instruction, just skip it
// and transitive users. Safe as false is a conservative result.
- if (!propagatesPoison(cast<Operator>(I)) && I != Root)
+ if (!propagatesPoison(cast<Operator>(I)) && I != Root)
continue;
if (KnownPoison.insert(I).second)
@@ -1290,116 +1290,116 @@ bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
return MadeAnyChanges;
}
-static void replaceExitCond(BranchInst *BI, Value *NewCond,
- SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
- auto *OldCond = BI->getCondition();
- BI->setCondition(NewCond);
- if (OldCond->use_empty())
- DeadInsts.emplace_back(OldCond);
-}
-
-static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken,
- SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
- BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
- bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
- auto *OldCond = BI->getCondition();
- auto *NewCond =
- ConstantInt::get(OldCond->getType(), IsTaken ? ExitIfTrue : !ExitIfTrue);
- replaceExitCond(BI, NewCond, DeadInsts);
-}
-
-static void replaceWithInvariantCond(
- const Loop *L, BasicBlock *ExitingBB, ICmpInst::Predicate InvariantPred,
- const SCEV *InvariantLHS, const SCEV *InvariantRHS, SCEVExpander &Rewriter,
- SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
- BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
- Rewriter.setInsertPoint(BI);
- auto *LHSV = Rewriter.expandCodeFor(InvariantLHS);
- auto *RHSV = Rewriter.expandCodeFor(InvariantRHS);
- bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
- if (ExitIfTrue)
- InvariantPred = ICmpInst::getInversePredicate(InvariantPred);
- IRBuilder<> Builder(BI);
- auto *NewCond = Builder.CreateICmp(InvariantPred, LHSV, RHSV,
- BI->getCondition()->getName());
- replaceExitCond(BI, NewCond, DeadInsts);
-}
-
-static bool optimizeLoopExitWithUnknownExitCount(
- const Loop *L, BranchInst *BI, BasicBlock *ExitingBB,
- const SCEV *MaxIter, bool Inverted, bool SkipLastIter,
- ScalarEvolution *SE, SCEVExpander &Rewriter,
- SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
- ICmpInst::Predicate Pred;
- Value *LHS, *RHS;
- using namespace PatternMatch;
- BasicBlock *TrueSucc, *FalseSucc;
- if (!match(BI, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)),
- m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc))))
- return false;
-
- assert((L->contains(TrueSucc) != L->contains(FalseSucc)) &&
- "Not a loop exit!");
-
- // 'LHS pred RHS' should now mean that we stay in loop.
- if (L->contains(FalseSucc))
- Pred = CmpInst::getInversePredicate(Pred);
-
- // If we are proving loop exit, invert the predicate.
- if (Inverted)
- Pred = CmpInst::getInversePredicate(Pred);
-
- const SCEV *LHSS = SE->getSCEVAtScope(LHS, L);
- const SCEV *RHSS = SE->getSCEVAtScope(RHS, L);
- // Can we prove it to be trivially true?
- if (SE->isKnownPredicateAt(Pred, LHSS, RHSS, BI)) {
- foldExit(L, ExitingBB, Inverted, DeadInsts);
- return true;
- }
- // Further logic works for non-inverted condition only.
- if (Inverted)
- return false;
-
- auto *ARTy = LHSS->getType();
- auto *MaxIterTy = MaxIter->getType();
- // If possible, adjust types.
- if (SE->getTypeSizeInBits(ARTy) > SE->getTypeSizeInBits(MaxIterTy))
- MaxIter = SE->getZeroExtendExpr(MaxIter, ARTy);
- else if (SE->getTypeSizeInBits(ARTy) < SE->getTypeSizeInBits(MaxIterTy)) {
- const SCEV *MinusOne = SE->getMinusOne(ARTy);
- auto *MaxAllowedIter = SE->getZeroExtendExpr(MinusOne, MaxIterTy);
- if (SE->isKnownPredicateAt(ICmpInst::ICMP_ULE, MaxIter, MaxAllowedIter, BI))
- MaxIter = SE->getTruncateExpr(MaxIter, ARTy);
- }
-
- if (SkipLastIter) {
- const SCEV *One = SE->getOne(MaxIter->getType());
- MaxIter = SE->getMinusSCEV(MaxIter, One);
+static void replaceExitCond(BranchInst *BI, Value *NewCond,
+ SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
+ auto *OldCond = BI->getCondition();
+ BI->setCondition(NewCond);
+ if (OldCond->use_empty())
+ DeadInsts.emplace_back(OldCond);
+}
+
+static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken,
+ SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
+ BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
+ bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
+ auto *OldCond = BI->getCondition();
+ auto *NewCond =
+ ConstantInt::get(OldCond->getType(), IsTaken ? ExitIfTrue : !ExitIfTrue);
+ replaceExitCond(BI, NewCond, DeadInsts);
+}
+
+static void replaceWithInvariantCond(
+ const Loop *L, BasicBlock *ExitingBB, ICmpInst::Predicate InvariantPred,
+ const SCEV *InvariantLHS, const SCEV *InvariantRHS, SCEVExpander &Rewriter,
+ SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
+ BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
+ Rewriter.setInsertPoint(BI);
+ auto *LHSV = Rewriter.expandCodeFor(InvariantLHS);
+ auto *RHSV = Rewriter.expandCodeFor(InvariantRHS);
+ bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
+ if (ExitIfTrue)
+ InvariantPred = ICmpInst::getInversePredicate(InvariantPred);
+ IRBuilder<> Builder(BI);
+ auto *NewCond = Builder.CreateICmp(InvariantPred, LHSV, RHSV,
+ BI->getCondition()->getName());
+ replaceExitCond(BI, NewCond, DeadInsts);
+}
+
+static bool optimizeLoopExitWithUnknownExitCount(
+ const Loop *L, BranchInst *BI, BasicBlock *ExitingBB,
+ const SCEV *MaxIter, bool Inverted, bool SkipLastIter,
+ ScalarEvolution *SE, SCEVExpander &Rewriter,
+ SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
+ ICmpInst::Predicate Pred;
+ Value *LHS, *RHS;
+ using namespace PatternMatch;
+ BasicBlock *TrueSucc, *FalseSucc;
+ if (!match(BI, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)),
+ m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc))))
+ return false;
+
+ assert((L->contains(TrueSucc) != L->contains(FalseSucc)) &&
+ "Not a loop exit!");
+
+ // 'LHS pred RHS' should now mean that we stay in loop.
+ if (L->contains(FalseSucc))
+ Pred = CmpInst::getInversePredicate(Pred);
+
+ // If we are proving loop exit, invert the predicate.
+ if (Inverted)
+ Pred = CmpInst::getInversePredicate(Pred);
+
+ const SCEV *LHSS = SE->getSCEVAtScope(LHS, L);
+ const SCEV *RHSS = SE->getSCEVAtScope(RHS, L);
+ // Can we prove it to be trivially true?
+ if (SE->isKnownPredicateAt(Pred, LHSS, RHSS, BI)) {
+ foldExit(L, ExitingBB, Inverted, DeadInsts);
+ return true;
}
-
- // Check if there is a loop-invariant predicate equivalent to our check.
- auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS,
- L, BI, MaxIter);
- if (!LIP)
- return false;
-
- // Can we prove it to be trivially true?
- if (SE->isKnownPredicateAt(LIP->Pred, LIP->LHS, LIP->RHS, BI))
- foldExit(L, ExitingBB, Inverted, DeadInsts);
- else
- replaceWithInvariantCond(L, ExitingBB, LIP->Pred, LIP->LHS, LIP->RHS,
- Rewriter, DeadInsts);
-
- return true;
+ // Further logic works for non-inverted condition only.
+ if (Inverted)
+ return false;
+
+ auto *ARTy = LHSS->getType();
+ auto *MaxIterTy = MaxIter->getType();
+ // If possible, adjust types.
+ if (SE->getTypeSizeInBits(ARTy) > SE->getTypeSizeInBits(MaxIterTy))
+ MaxIter = SE->getZeroExtendExpr(MaxIter, ARTy);
+ else if (SE->getTypeSizeInBits(ARTy) < SE->getTypeSizeInBits(MaxIterTy)) {
+ const SCEV *MinusOne = SE->getMinusOne(ARTy);
+ auto *MaxAllowedIter = SE->getZeroExtendExpr(MinusOne, MaxIterTy);
+ if (SE->isKnownPredicateAt(ICmpInst::ICMP_ULE, MaxIter, MaxAllowedIter, BI))
+ MaxIter = SE->getTruncateExpr(MaxIter, ARTy);
+ }
+
+ if (SkipLastIter) {
+ const SCEV *One = SE->getOne(MaxIter->getType());
+ MaxIter = SE->getMinusSCEV(MaxIter, One);
+ }
+
+ // Check if there is a loop-invariant predicate equivalent to our check.
+ auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS,
+ L, BI, MaxIter);
+ if (!LIP)
+ return false;
+
+ // Can we prove it to be trivially true?
+ if (SE->isKnownPredicateAt(LIP->Pred, LIP->LHS, LIP->RHS, BI))
+ foldExit(L, ExitingBB, Inverted, DeadInsts);
+ else
+ replaceWithInvariantCond(L, ExitingBB, LIP->Pred, LIP->LHS, LIP->RHS,
+ Rewriter, DeadInsts);
+
+ return true;
}
bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
SmallVector<BasicBlock*, 16> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
- // Remove all exits which aren't both rewriteable and execute on every
- // iteration.
- llvm::erase_if(ExitingBlocks, [&](BasicBlock *ExitingBB) {
+ // Remove all exits which aren't both rewriteable and execute on every
+ // iteration.
+ llvm::erase_if(ExitingBlocks, [&](BasicBlock *ExitingBB) {
// If our exitting block exits multiple loops, we can only rewrite the
// innermost one. Otherwise, we're changing how many times the innermost
// loop runs before it exits.
@@ -1415,10 +1415,10 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
if (isa<Constant>(BI->getCondition()))
return true;
- // Likewise, the loop latch must be dominated by the exiting BB.
- if (!DT->dominates(ExitingBB, L->getLoopLatch()))
+ // Likewise, the loop latch must be dominated by the exiting BB.
+ if (!DT->dominates(ExitingBB, L->getLoopLatch()))
return true;
-
+
return false;
});
@@ -1426,25 +1426,25 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
return false;
// Get a symbolic upper bound on the loop backedge taken count.
- const SCEV *MaxExitCount = SE->getSymbolicMaxBackedgeTakenCount(L);
+ const SCEV *MaxExitCount = SE->getSymbolicMaxBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(MaxExitCount))
return false;
- // Visit our exit blocks in order of dominance. We know from the fact that
- // all exits must dominate the latch, so there is a total dominance order
- // between them.
- llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) {
+ // Visit our exit blocks in order of dominance. We know from the fact that
+ // all exits must dominate the latch, so there is a total dominance order
+ // between them.
+ llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) {
// std::sort sorts in ascending order, so we want the inverse of
// the normal dominance relation.
if (A == B) return false;
- if (DT->properlyDominates(A, B))
- return true;
- else {
- assert(DT->properlyDominates(B, A) &&
- "expected total dominance order!");
- return false;
- }
- });
+ if (DT->properlyDominates(A, B))
+ return true;
+ else {
+ assert(DT->properlyDominates(B, A) &&
+ "expected total dominance order!");
+ return false;
+ }
+ });
#ifdef ASSERT
for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
@@ -1452,56 +1452,56 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
#endif
bool Changed = false;
- bool SkipLastIter = false;
+ bool SkipLastIter = false;
SmallSet<const SCEV*, 8> DominatingExitCounts;
for (BasicBlock *ExitingBB : ExitingBlocks) {
const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
- if (isa<SCEVCouldNotCompute>(ExitCount)) {
- // Okay, we do not know the exit count here. Can we at least prove that it
- // will remain the same within iteration space?
- auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
- auto OptimizeCond = [&](bool Inverted, bool SkipLastIter) {
- return optimizeLoopExitWithUnknownExitCount(
- L, BI, ExitingBB, MaxExitCount, Inverted, SkipLastIter, SE,
- Rewriter, DeadInsts);
- };
-
- // TODO: We might have proved that we can skip the last iteration for
- // this check. In this case, we only want to check the condition on the
- // pre-last iteration (MaxExitCount - 1). However, there is a nasty
- // corner case:
- //
- // for (i = len; i != 0; i--) { ... check (i ult X) ... }
- //
- // If we could not prove that len != 0, then we also could not prove that
- // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then
- // OptimizeCond will likely not prove anything for it, even if it could
- // prove the same fact for len.
- //
- // As a temporary solution, we query both last and pre-last iterations in
- // hope that we will be able to prove triviality for at least one of
- // them. We can stop querying MaxExitCount for this case once SCEV
- // understands that (MaxExitCount - 1) will not overflow here.
- if (OptimizeCond(false, false) || OptimizeCond(true, false))
- Changed = true;
- else if (SkipLastIter)
- if (OptimizeCond(false, true) || OptimizeCond(true, true))
- Changed = true;
- continue;
- }
-
- if (MaxExitCount == ExitCount)
- // If the loop has more than 1 iteration, all further checks will be
- // executed 1 iteration less.
- SkipLastIter = true;
-
+ if (isa<SCEVCouldNotCompute>(ExitCount)) {
+ // Okay, we do not know the exit count here. Can we at least prove that it
+ // will remain the same within iteration space?
+ auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
+ auto OptimizeCond = [&](bool Inverted, bool SkipLastIter) {
+ return optimizeLoopExitWithUnknownExitCount(
+ L, BI, ExitingBB, MaxExitCount, Inverted, SkipLastIter, SE,
+ Rewriter, DeadInsts);
+ };
+
+ // TODO: We might have proved that we can skip the last iteration for
+ // this check. In this case, we only want to check the condition on the
+ // pre-last iteration (MaxExitCount - 1). However, there is a nasty
+ // corner case:
+ //
+ // for (i = len; i != 0; i--) { ... check (i ult X) ... }
+ //
+ // If we could not prove that len != 0, then we also could not prove that
+ // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then
+ // OptimizeCond will likely not prove anything for it, even if it could
+ // prove the same fact for len.
+ //
+ // As a temporary solution, we query both last and pre-last iterations in
+ // hope that we will be able to prove triviality for at least one of
+ // them. We can stop querying MaxExitCount for this case once SCEV
+ // understands that (MaxExitCount - 1) will not overflow here.
+ if (OptimizeCond(false, false) || OptimizeCond(true, false))
+ Changed = true;
+ else if (SkipLastIter)
+ if (OptimizeCond(false, true) || OptimizeCond(true, true))
+ Changed = true;
+ continue;
+ }
+
+ if (MaxExitCount == ExitCount)
+ // If the loop has more than 1 iteration, all further checks will be
+ // executed 1 iteration less.
+ SkipLastIter = true;
+
// If we know we'd exit on the first iteration, rewrite the exit to
// reflect this. This does not imply the loop must exit through this
// exit; there may be an earlier one taken on the first iteration.
// TODO: Given we know the backedge can't be taken, we should go ahead
// and break it. Or at least, kill all the header phis and simplify.
if (ExitCount->isZero()) {
- foldExit(L, ExitingBB, true, DeadInsts);
+ foldExit(L, ExitingBB, true, DeadInsts);
Changed = true;
continue;
}
@@ -1523,7 +1523,7 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
// one?
if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT,
MaxExitCount, ExitCount)) {
- foldExit(L, ExitingBB, false, DeadInsts);
+ foldExit(L, ExitingBB, false, DeadInsts);
Changed = true;
continue;
}
@@ -1533,7 +1533,7 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
// exiting iteration, but (from the visit order) strictly follows another
// which does the same and is thus dead.
if (!DominatingExitCounts.insert(ExitCount).second) {
- foldExit(L, ExitingBB, false, DeadInsts);
+ foldExit(L, ExitingBB, false, DeadInsts);
Changed = true;
continue;
}
@@ -1789,9 +1789,9 @@ bool IndVarSimplify::run(Loop *L) {
if (optimizeLoopExits(L, Rewriter)) {
Changed = true;
// Given we've changed exit counts, notify SCEV
- // Some nested loops may share same folded exit basic block,
- // thus we need to notify top most loop.
- SE->forgetTopmostLoop(L);
+ // Some nested loops may share same folded exit basic block,
+ // thus we need to notify top most loop.
+ SE->forgetTopmostLoop(L);
}
// Try to form loop invariant tests for loop exits by changing how many
@@ -1868,15 +1868,15 @@ bool IndVarSimplify::run(Loop *L) {
// Now that we're done iterating through lists, clean up any instructions
// which are now dead.
- while (!DeadInsts.empty()) {
- Value *V = DeadInsts.pop_back_val();
-
- if (PHINode *PHI = dyn_cast_or_null<PHINode>(V))
- Changed |= RecursivelyDeleteDeadPHINode(PHI, TLI, MSSAU.get());
- else if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
+ while (!DeadInsts.empty()) {
+ Value *V = DeadInsts.pop_back_val();
+
+ if (PHINode *PHI = dyn_cast_or_null<PHINode>(V))
+ Changed |= RecursivelyDeleteDeadPHINode(PHI, TLI, MSSAU.get());
+ else if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
Changed |=
RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get());
- }
+ }
// The Rewriter may not be used from this point on.
@@ -1926,8 +1926,8 @@ PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
Function *F = L.getHeader()->getParent();
const DataLayout &DL = F->getParent()->getDataLayout();
- IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA,
- WidenIndVars && AllowIVWidening);
+ IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA,
+ WidenIndVars && AllowIVWidening);
if (!IVS.run(&L))
return PreservedAnalyses::all();
@@ -1964,7 +1964,7 @@ struct IndVarSimplifyLegacyPass : public LoopPass {
if (MSSAAnalysis)
MSSA = &MSSAAnalysis->getMSSA();
- IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI, MSSA, AllowIVWidening);
+ IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI, MSSA, AllowIVWidening);
return IVS.run(L);
}
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
index 6e09dec198..321f44932a 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
@@ -52,7 +52,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
-#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/LoopInfo.h"
@@ -113,9 +113,9 @@ static cl::opt<bool> PrintRangeChecks("irce-print-range-checks", cl::Hidden,
static cl::opt<bool> SkipProfitabilityChecks("irce-skip-profitability-checks",
cl::Hidden, cl::init(false));
-static cl::opt<unsigned> MinRuntimeIterations("irce-min-runtime-iterations",
- cl::Hidden, cl::init(10));
-
+static cl::opt<unsigned> MinRuntimeIterations("irce-min-runtime-iterations",
+ cl::Hidden, cl::init(10));
+
static cl::opt<bool> AllowUnsignedLatchCondition("irce-allow-unsigned-latch",
cl::Hidden, cl::init(true));
@@ -228,27 +228,27 @@ public:
SmallVectorImpl<InductiveRangeCheck> &Checks);
};
-struct LoopStructure;
-
+struct LoopStructure;
+
class InductiveRangeCheckElimination {
ScalarEvolution &SE;
BranchProbabilityInfo *BPI;
DominatorTree &DT;
LoopInfo &LI;
- using GetBFIFunc =
- llvm::Optional<llvm::function_ref<llvm::BlockFrequencyInfo &()> >;
- GetBFIFunc GetBFI;
-
- // Returns true if it is profitable to do a transform basing on estimation of
- // number of iterations.
- bool isProfitableToTransform(const Loop &L, LoopStructure &LS);
-
+ using GetBFIFunc =
+ llvm::Optional<llvm::function_ref<llvm::BlockFrequencyInfo &()> >;
+ GetBFIFunc GetBFI;
+
+ // Returns true if it is profitable to do a transform basing on estimation of
+ // number of iterations.
+ bool isProfitableToTransform(const Loop &L, LoopStructure &LS);
+
public:
InductiveRangeCheckElimination(ScalarEvolution &SE,
BranchProbabilityInfo *BPI, DominatorTree &DT,
- LoopInfo &LI, GetBFIFunc GetBFI = None)
- : SE(SE), BPI(BPI), DT(DT), LI(LI), GetBFI(GetBFI) {}
+ LoopInfo &LI, GetBFIFunc GetBFI = None)
+ : SE(SE), BPI(BPI), DT(DT), LI(LI), GetBFI(GetBFI) {}
bool run(Loop *L, function_ref<void(Loop *, bool)> LPMAddNewLoop);
};
@@ -505,8 +505,8 @@ struct LoopStructure {
return Result;
}
- static Optional<LoopStructure> parseLoopStructure(ScalarEvolution &, Loop &,
- const char *&);
+ static Optional<LoopStructure> parseLoopStructure(ScalarEvolution &, Loop &,
+ const char *&);
};
/// This class is used to constrain loops to run within a given iteration space.
@@ -750,7 +750,7 @@ static bool isSafeIncreasingBound(const SCEV *Start,
}
Optional<LoopStructure>
-LoopStructure::parseLoopStructure(ScalarEvolution &SE, Loop &L,
+LoopStructure::parseLoopStructure(ScalarEvolution &SE, Loop &L,
const char *&FailureReason) {
if (!L.isLoopSimplifyForm()) {
FailureReason = "loop not in LoopSimplify form";
@@ -1768,25 +1768,25 @@ PreservedAnalyses IRCEPass::run(Function &F, FunctionAnalysisManager &AM) {
auto &BPI = AM.getResult<BranchProbabilityAnalysis>(F);
LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
- // Get BFI analysis result on demand. Please note that modification of
- // CFG invalidates this analysis and we should handle it.
- auto getBFI = [&F, &AM ]()->BlockFrequencyInfo & {
- return AM.getResult<BlockFrequencyAnalysis>(F);
- };
- InductiveRangeCheckElimination IRCE(SE, &BPI, DT, LI, { getBFI });
+ // Get BFI analysis result on demand. Please note that modification of
+ // CFG invalidates this analysis and we should handle it.
+ auto getBFI = [&F, &AM ]()->BlockFrequencyInfo & {
+ return AM.getResult<BlockFrequencyAnalysis>(F);
+ };
+ InductiveRangeCheckElimination IRCE(SE, &BPI, DT, LI, { getBFI });
bool Changed = false;
- {
- bool CFGChanged = false;
- for (const auto &L : LI) {
- CFGChanged |= simplifyLoop(L, &DT, &LI, &SE, nullptr, nullptr,
- /*PreserveLCSSA=*/false);
- Changed |= formLCSSARecursively(*L, DT, &LI, &SE);
- }
- Changed |= CFGChanged;
-
- if (CFGChanged && !SkipProfitabilityChecks)
- AM.invalidate<BlockFrequencyAnalysis>(F);
+ {
+ bool CFGChanged = false;
+ for (const auto &L : LI) {
+ CFGChanged |= simplifyLoop(L, &DT, &LI, &SE, nullptr, nullptr,
+ /*PreserveLCSSA=*/false);
+ Changed |= formLCSSARecursively(*L, DT, &LI, &SE);
+ }
+ Changed |= CFGChanged;
+
+ if (CFGChanged && !SkipProfitabilityChecks)
+ AM.invalidate<BlockFrequencyAnalysis>(F);
}
SmallPriorityWorklist<Loop *, 4> Worklist;
@@ -1798,11 +1798,11 @@ PreservedAnalyses IRCEPass::run(Function &F, FunctionAnalysisManager &AM) {
while (!Worklist.empty()) {
Loop *L = Worklist.pop_back_val();
- if (IRCE.run(L, LPMAddNewLoop)) {
- Changed = true;
- if (!SkipProfitabilityChecks)
- AM.invalidate<BlockFrequencyAnalysis>(F);
- }
+ if (IRCE.run(L, LPMAddNewLoop)) {
+ Changed = true;
+ if (!SkipProfitabilityChecks)
+ AM.invalidate<BlockFrequencyAnalysis>(F);
+ }
}
if (!Changed)
@@ -1843,37 +1843,37 @@ bool IRCELegacyPass::runOnFunction(Function &F) {
return Changed;
}
-bool
-InductiveRangeCheckElimination::isProfitableToTransform(const Loop &L,
- LoopStructure &LS) {
- if (SkipProfitabilityChecks)
- return true;
- if (GetBFI.hasValue()) {
- BlockFrequencyInfo &BFI = (*GetBFI)();
- uint64_t hFreq = BFI.getBlockFreq(LS.Header).getFrequency();
- uint64_t phFreq = BFI.getBlockFreq(L.getLoopPreheader()).getFrequency();
- if (phFreq != 0 && hFreq != 0 && (hFreq / phFreq < MinRuntimeIterations)) {
- LLVM_DEBUG(dbgs() << "irce: could not prove profitability: "
- << "the estimated number of iterations basing on "
- "frequency info is " << (hFreq / phFreq) << "\n";);
- return false;
- }
- return true;
- }
-
- if (!BPI)
- return true;
- BranchProbability ExitProbability =
- BPI->getEdgeProbability(LS.Latch, LS.LatchBrExitIdx);
- if (ExitProbability > BranchProbability(1, MinRuntimeIterations)) {
- LLVM_DEBUG(dbgs() << "irce: could not prove profitability: "
- << "the exit probability is too big " << ExitProbability
- << "\n";);
- return false;
- }
- return true;
-}
-
+bool
+InductiveRangeCheckElimination::isProfitableToTransform(const Loop &L,
+ LoopStructure &LS) {
+ if (SkipProfitabilityChecks)
+ return true;
+ if (GetBFI.hasValue()) {
+ BlockFrequencyInfo &BFI = (*GetBFI)();
+ uint64_t hFreq = BFI.getBlockFreq(LS.Header).getFrequency();
+ uint64_t phFreq = BFI.getBlockFreq(L.getLoopPreheader()).getFrequency();
+ if (phFreq != 0 && hFreq != 0 && (hFreq / phFreq < MinRuntimeIterations)) {
+ LLVM_DEBUG(dbgs() << "irce: could not prove profitability: "
+ << "the estimated number of iterations basing on "
+ "frequency info is " << (hFreq / phFreq) << "\n";);
+ return false;
+ }
+ return true;
+ }
+
+ if (!BPI)
+ return true;
+ BranchProbability ExitProbability =
+ BPI->getEdgeProbability(LS.Latch, LS.LatchBrExitIdx);
+ if (ExitProbability > BranchProbability(1, MinRuntimeIterations)) {
+ LLVM_DEBUG(dbgs() << "irce: could not prove profitability: "
+ << "the exit probability is too big " << ExitProbability
+ << "\n";);
+ return false;
+ }
+ return true;
+}
+
bool InductiveRangeCheckElimination::run(
Loop *L, function_ref<void(Loop *, bool)> LPMAddNewLoop) {
if (L->getBlocks().size() >= LoopSizeCutoff) {
@@ -1913,15 +1913,15 @@ bool InductiveRangeCheckElimination::run(
const char *FailureReason = nullptr;
Optional<LoopStructure> MaybeLoopStructure =
- LoopStructure::parseLoopStructure(SE, *L, FailureReason);
+ LoopStructure::parseLoopStructure(SE, *L, FailureReason);
if (!MaybeLoopStructure.hasValue()) {
LLVM_DEBUG(dbgs() << "irce: could not parse loop structure: "
<< FailureReason << "\n";);
return false;
}
LoopStructure LS = MaybeLoopStructure.getValue();
- if (!isProfitableToTransform(*L, LS))
- return false;
+ if (!isProfitableToTransform(*L, LS))
+ return false;
const SCEVAddRecExpr *IndVar =
cast<SCEVAddRecExpr>(SE.getMinusSCEV(SE.getSCEV(LS.IndVarBase), SE.getSCEV(LS.IndVarStep)));
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/InferAddressSpaces.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/InferAddressSpaces.cpp
index 332eb10ac1..9127f3c2e0 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -88,7 +88,7 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar/InferAddressSpaces.h"
+#include "llvm/Transforms/Scalar/InferAddressSpaces.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
@@ -109,7 +109,7 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Operator.h"
-#include "llvm/IR/PassManager.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
@@ -164,16 +164,16 @@ public:
}
bool runOnFunction(Function &F) override;
-};
-
-class InferAddressSpacesImpl {
- const TargetTransformInfo *TTI = nullptr;
- const DataLayout *DL = nullptr;
-
- /// Target specific address space which uses of should be replaced if
- /// possible.
- unsigned FlatAddrSpace = 0;
-
+};
+
+class InferAddressSpacesImpl {
+ const TargetTransformInfo *TTI = nullptr;
+ const DataLayout *DL = nullptr;
+
+ /// Target specific address space which uses of should be replaced if
+ /// possible.
+ unsigned FlatAddrSpace = 0;
+
// Returns the new address space of V if updated; otherwise, returns None.
Optional<unsigned>
updateAddressSpace(const Value &V,
@@ -215,11 +215,11 @@ class InferAddressSpacesImpl {
const ValueToValueMapTy &ValueWithNewAddrSpace,
SmallVectorImpl<const Use *> *UndefUsesToFix) const;
unsigned joinAddressSpaces(unsigned AS1, unsigned AS2) const;
-
-public:
- InferAddressSpacesImpl(const TargetTransformInfo *TTI, unsigned FlatAddrSpace)
- : TTI(TTI), FlatAddrSpace(FlatAddrSpace) {}
- bool run(Function &F);
+
+public:
+ InferAddressSpacesImpl(const TargetTransformInfo *TTI, unsigned FlatAddrSpace)
+ : TTI(TTI), FlatAddrSpace(FlatAddrSpace) {}
+ bool run(Function &F);
};
} // end anonymous namespace
@@ -295,8 +295,8 @@ static bool isAddressExpression(const Value &V, const DataLayout &DL,
case Instruction::IntToPtr:
return isNoopPtrIntCastPair(Op, DL, TTI);
default:
- // That value is an address expression if it has an assumed address space.
- return TTI->getAssumedAddrSpace(&V) != UninitializedAddressSpace;
+ // That value is an address expression if it has an assumed address space.
+ return TTI->getAssumedAddrSpace(&V) != UninitializedAddressSpace;
}
}
@@ -335,9 +335,9 @@ getPointerOperands(const Value &V, const DataLayout &DL,
}
}
-bool InferAddressSpacesImpl::rewriteIntrinsicOperands(IntrinsicInst *II,
- Value *OldV,
- Value *NewV) const {
+bool InferAddressSpacesImpl::rewriteIntrinsicOperands(IntrinsicInst *II,
+ Value *OldV,
+ Value *NewV) const {
Module *M = II->getParent()->getParent()->getParent();
switch (II->getIntrinsicID()) {
@@ -364,7 +364,7 @@ bool InferAddressSpacesImpl::rewriteIntrinsicOperands(IntrinsicInst *II,
}
}
-void InferAddressSpacesImpl::collectRewritableIntrinsicOperands(
+void InferAddressSpacesImpl::collectRewritableIntrinsicOperands(
IntrinsicInst *II, PostorderStackTy &PostorderStack,
DenseSet<Value *> &Visited) const {
auto IID = II->getIntrinsicID();
@@ -389,7 +389,7 @@ void InferAddressSpacesImpl::collectRewritableIntrinsicOperands(
// Returns all flat address expressions in function F. The elements are
// If V is an unvisited flat address expression, appends V to PostorderStack
// and marks it as visited.
-void InferAddressSpacesImpl::appendsFlatAddressExpressionToPostorderStack(
+void InferAddressSpacesImpl::appendsFlatAddressExpressionToPostorderStack(
Value *V, PostorderStackTy &PostorderStack,
DenseSet<Value *> &Visited) const {
assert(V->getType()->isPointerTy());
@@ -404,8 +404,8 @@ void InferAddressSpacesImpl::appendsFlatAddressExpressionToPostorderStack(
return;
}
- if (V->getType()->getPointerAddressSpace() == FlatAddrSpace &&
- isAddressExpression(*V, *DL, TTI)) {
+ if (V->getType()->getPointerAddressSpace() == FlatAddrSpace &&
+ isAddressExpression(*V, *DL, TTI)) {
if (Visited.insert(V).second) {
PostorderStack.emplace_back(V, false);
@@ -423,7 +423,7 @@ void InferAddressSpacesImpl::appendsFlatAddressExpressionToPostorderStack(
// Returns all flat address expressions in function F. The elements are ordered
// ordered in postorder.
std::vector<WeakTrackingVH>
-InferAddressSpacesImpl::collectFlatAddressExpressions(Function &F) const {
+InferAddressSpacesImpl::collectFlatAddressExpressions(Function &F) const {
// This function implements a non-recursive postorder traversal of a partial
// use-def graph of function F.
PostorderStackTy PostorderStack;
@@ -488,12 +488,12 @@ InferAddressSpacesImpl::collectFlatAddressExpressions(Function &F) const {
}
// Otherwise, adds its operands to the stack and explores them.
PostorderStack.back().setInt(true);
- // Skip values with an assumed address space.
- if (TTI->getAssumedAddrSpace(TopVal) == UninitializedAddressSpace) {
- for (Value *PtrOperand : getPointerOperands(*TopVal, *DL, TTI)) {
- appendsFlatAddressExpressionToPostorderStack(PtrOperand, PostorderStack,
- Visited);
- }
+ // Skip values with an assumed address space.
+ if (TTI->getAssumedAddrSpace(TopVal) == UninitializedAddressSpace) {
+ for (Value *PtrOperand : getPointerOperands(*TopVal, *DL, TTI)) {
+ appendsFlatAddressExpressionToPostorderStack(PtrOperand, PostorderStack,
+ Visited);
+ }
}
}
return Postorder;
@@ -533,7 +533,7 @@ static Value *operandWithNewAddressSpaceOrCreateUndef(
//
// This may also return nullptr in the case the instruction could not be
// rewritten.
-Value *InferAddressSpacesImpl::cloneInstructionWithNewAddressSpace(
+Value *InferAddressSpacesImpl::cloneInstructionWithNewAddressSpace(
Instruction *I, unsigned NewAddrSpace,
const ValueToValueMapTy &ValueWithNewAddrSpace,
SmallVectorImpl<const Use *> *UndefUsesToFix) const {
@@ -568,16 +568,16 @@ Value *InferAddressSpacesImpl::cloneInstructionWithNewAddressSpace(
return nullptr;
}
- unsigned AS = TTI->getAssumedAddrSpace(I);
- if (AS != UninitializedAddressSpace) {
- // For the assumed address space, insert an `addrspacecast` to make that
- // explicit.
- auto *NewPtrTy = I->getType()->getPointerElementType()->getPointerTo(AS);
- auto *NewI = new AddrSpaceCastInst(I, NewPtrTy);
- NewI->insertAfter(I);
- return NewI;
- }
-
+ unsigned AS = TTI->getAssumedAddrSpace(I);
+ if (AS != UninitializedAddressSpace) {
+ // For the assumed address space, insert an `addrspacecast` to make that
+ // explicit.
+ auto *NewPtrTy = I->getType()->getPointerElementType()->getPointerTo(AS);
+ auto *NewI = new AddrSpaceCastInst(I, NewPtrTy);
+ NewI->insertAfter(I);
+ return NewI;
+ }
+
// Computes the converted pointer operands.
SmallVector<Value *, 4> NewPointerOperands;
for (const Use &OperandUse : I->operands()) {
@@ -606,7 +606,7 @@ Value *InferAddressSpacesImpl::cloneInstructionWithNewAddressSpace(
GetElementPtrInst *GEP = cast<GetElementPtrInst>(I);
GetElementPtrInst *NewGEP = GetElementPtrInst::Create(
GEP->getSourceElementType(), NewPointerOperands[0],
- SmallVector<Value *, 4>(GEP->indices()));
+ SmallVector<Value *, 4>(GEP->indices()));
NewGEP->setIsInBounds(GEP->isInBounds());
return NewGEP;
}
@@ -718,13 +718,13 @@ static Value *cloneConstantExprWithNewAddressSpace(
// expression whose address space needs to be modified, in postorder.
//
// See cloneInstructionWithNewAddressSpace for the meaning of UndefUsesToFix.
-Value *InferAddressSpacesImpl::cloneValueWithNewAddressSpace(
- Value *V, unsigned NewAddrSpace,
- const ValueToValueMapTy &ValueWithNewAddrSpace,
- SmallVectorImpl<const Use *> *UndefUsesToFix) const {
+Value *InferAddressSpacesImpl::cloneValueWithNewAddressSpace(
+ Value *V, unsigned NewAddrSpace,
+ const ValueToValueMapTy &ValueWithNewAddrSpace,
+ SmallVectorImpl<const Use *> *UndefUsesToFix) const {
// All values in Postorder are flat address expressions.
- assert(V->getType()->getPointerAddressSpace() == FlatAddrSpace &&
- isAddressExpression(*V, *DL, TTI));
+ assert(V->getType()->getPointerAddressSpace() == FlatAddrSpace &&
+ isAddressExpression(*V, *DL, TTI));
if (Instruction *I = dyn_cast<Instruction>(V)) {
Value *NewV = cloneInstructionWithNewAddressSpace(
@@ -744,8 +744,8 @@ Value *InferAddressSpacesImpl::cloneValueWithNewAddressSpace(
// Defines the join operation on the address space lattice (see the file header
// comments).
-unsigned InferAddressSpacesImpl::joinAddressSpaces(unsigned AS1,
- unsigned AS2) const {
+unsigned InferAddressSpacesImpl::joinAddressSpaces(unsigned AS1,
+ unsigned AS2) const {
if (AS1 == FlatAddrSpace || AS2 == FlatAddrSpace)
return FlatAddrSpace;
@@ -758,7 +758,7 @@ unsigned InferAddressSpacesImpl::joinAddressSpaces(unsigned AS1,
return (AS1 == AS2) ? AS1 : FlatAddrSpace;
}
-bool InferAddressSpacesImpl::run(Function &F) {
+bool InferAddressSpacesImpl::run(Function &F) {
DL = &F.getParent()->getDataLayout();
if (AssumeDefaultIsFlatAddressSpace)
@@ -785,7 +785,7 @@ bool InferAddressSpacesImpl::run(Function &F) {
// Constants need to be tracked through RAUW to handle cases with nested
// constant expressions, so wrap values in WeakTrackingVH.
-void InferAddressSpacesImpl::inferAddressSpaces(
+void InferAddressSpacesImpl::inferAddressSpaces(
ArrayRef<WeakTrackingVH> Postorder,
ValueToAddrSpaceMapTy *InferredAddrSpace) const {
SetVector<Value *> Worklist(Postorder.begin(), Postorder.end());
@@ -829,7 +829,7 @@ void InferAddressSpacesImpl::inferAddressSpaces(
}
}
-Optional<unsigned> InferAddressSpacesImpl::updateAddressSpace(
+Optional<unsigned> InferAddressSpacesImpl::updateAddressSpace(
const Value &V, const ValueToAddrSpaceMapTy &InferredAddrSpace) const {
assert(InferredAddrSpace.count(&V));
@@ -867,24 +867,24 @@ Optional<unsigned> InferAddressSpacesImpl::updateAddressSpace(
else
NewAS = joinAddressSpaces(Src0AS, Src1AS);
} else {
- unsigned AS = TTI->getAssumedAddrSpace(&V);
- if (AS != UninitializedAddressSpace) {
- // Use the assumed address space directly.
- NewAS = AS;
- } else {
- // Otherwise, infer the address space from its pointer operands.
- for (Value *PtrOperand : getPointerOperands(V, *DL, TTI)) {
- auto I = InferredAddrSpace.find(PtrOperand);
- unsigned OperandAS =
- I != InferredAddrSpace.end()
- ? I->second
- : PtrOperand->getType()->getPointerAddressSpace();
-
- // join(flat, *) = flat. So we can break if NewAS is already flat.
- NewAS = joinAddressSpaces(NewAS, OperandAS);
- if (NewAS == FlatAddrSpace)
- break;
- }
+ unsigned AS = TTI->getAssumedAddrSpace(&V);
+ if (AS != UninitializedAddressSpace) {
+ // Use the assumed address space directly.
+ NewAS = AS;
+ } else {
+ // Otherwise, infer the address space from its pointer operands.
+ for (Value *PtrOperand : getPointerOperands(V, *DL, TTI)) {
+ auto I = InferredAddrSpace.find(PtrOperand);
+ unsigned OperandAS =
+ I != InferredAddrSpace.end()
+ ? I->second
+ : PtrOperand->getType()->getPointerAddressSpace();
+
+ // join(flat, *) = flat. So we can break if NewAS is already flat.
+ NewAS = joinAddressSpaces(NewAS, OperandAS);
+ if (NewAS == FlatAddrSpace)
+ break;
+ }
}
}
@@ -975,8 +975,8 @@ static bool handleMemIntrinsicPtrUse(MemIntrinsic *MI, Value *OldV,
// \p returns true if it is OK to change the address space of constant \p C with
// a ConstantExpr addrspacecast.
-bool InferAddressSpacesImpl::isSafeToCastConstAddrSpace(Constant *C,
- unsigned NewAS) const {
+bool InferAddressSpacesImpl::isSafeToCastConstAddrSpace(Constant *C,
+ unsigned NewAS) const {
assert(NewAS != UninitializedAddressSpace);
unsigned SrcAS = C->getType()->getPointerAddressSpace();
@@ -1015,7 +1015,7 @@ static Value::use_iterator skipToNextUser(Value::use_iterator I,
return I;
}
-bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces(
+bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces(
const TargetTransformInfo &TTI, ArrayRef<WeakTrackingVH> Postorder,
const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const {
// For each address expression to be modified, creates a clone of it with its
@@ -1026,12 +1026,12 @@ bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces(
SmallVector<const Use *, 32> UndefUsesToFix;
for (Value* V : Postorder) {
unsigned NewAddrSpace = InferredAddrSpace.lookup(V);
-
- // In some degenerate cases (e.g. invalid IR in unreachable code), we may
- // not even infer the value to have its original address space.
- if (NewAddrSpace == UninitializedAddressSpace)
- continue;
-
+
+ // In some degenerate cases (e.g. invalid IR in unreachable code), we may
+ // not even infer the value to have its original address space.
+ if (NewAddrSpace == UninitializedAddressSpace)
+ continue;
+
if (V->getType()->getPointerAddressSpace() != NewAddrSpace) {
Value *New = cloneValueWithNewAddressSpace(
V, NewAddrSpace, ValueWithNewAddrSpace, &UndefUsesToFix);
@@ -1097,9 +1097,9 @@ bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces(
}
User *CurUser = U.getUser();
- // Skip if the current user is the new value itself.
- if (CurUser == NewV)
- continue;
+ // Skip if the current user is the new value itself.
+ if (CurUser == NewV)
+ continue;
// Handle more complex cases like intrinsic that need to be remangled.
if (auto *MI = dyn_cast<MemIntrinsic>(CurUser)) {
if (!MI->isVolatile() && handleMemIntrinsicPtrUse(MI, V, NewV))
@@ -1186,34 +1186,34 @@ bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces(
return true;
}
-bool InferAddressSpaces::runOnFunction(Function &F) {
- if (skipFunction(F))
- return false;
-
- return InferAddressSpacesImpl(
- &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F),
- FlatAddrSpace)
- .run(F);
-}
-
+bool InferAddressSpaces::runOnFunction(Function &F) {
+ if (skipFunction(F))
+ return false;
+
+ return InferAddressSpacesImpl(
+ &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F),
+ FlatAddrSpace)
+ .run(F);
+}
+
FunctionPass *llvm::createInferAddressSpacesPass(unsigned AddressSpace) {
return new InferAddressSpaces(AddressSpace);
}
-
-InferAddressSpacesPass::InferAddressSpacesPass()
- : FlatAddrSpace(UninitializedAddressSpace) {}
-InferAddressSpacesPass::InferAddressSpacesPass(unsigned AddressSpace)
- : FlatAddrSpace(AddressSpace) {}
-
-PreservedAnalyses InferAddressSpacesPass::run(Function &F,
- FunctionAnalysisManager &AM) {
- bool Changed =
- InferAddressSpacesImpl(&AM.getResult<TargetIRAnalysis>(F), FlatAddrSpace)
- .run(F);
- if (Changed) {
- PreservedAnalyses PA;
- PA.preserveSet<CFGAnalyses>();
- return PA;
- }
- return PreservedAnalyses::all();
-}
+
+InferAddressSpacesPass::InferAddressSpacesPass()
+ : FlatAddrSpace(UninitializedAddressSpace) {}
+InferAddressSpacesPass::InferAddressSpacesPass(unsigned AddressSpace)
+ : FlatAddrSpace(AddressSpace) {}
+
+PreservedAnalyses InferAddressSpacesPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ bool Changed =
+ InferAddressSpacesImpl(&AM.getResult<TargetIRAnalysis>(F), FlatAddrSpace)
+ .run(F);
+ if (Changed) {
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+ }
+ return PreservedAnalyses::all();
+}
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/InstSimplifyPass.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/InstSimplifyPass.cpp
index c11d2e4c1d..aeb83643b6 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/InstSimplifyPass.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/InstSimplifyPass.cpp
@@ -20,10 +20,10 @@
#include "llvm/IR/Type.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/Local.h"
-
+
using namespace llvm;
#define DEBUG_TYPE "instsimplify"
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/JumpThreading.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/JumpThreading.cpp
index 10b08b4e22..3e86ad4c14 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/JumpThreading.cpp
@@ -32,7 +32,7 @@
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
@@ -105,11 +105,11 @@ static cl::opt<bool> PrintLVIAfterJumpThreading(
cl::desc("Print the LazyValueInfo cache after JumpThreading"), cl::init(false),
cl::Hidden);
-static cl::opt<bool> JumpThreadingFreezeSelectCond(
- "jump-threading-freeze-select-cond",
- cl::desc("Freeze the condition when unfolding select"), cl::init(false),
- cl::Hidden);
-
+static cl::opt<bool> JumpThreadingFreezeSelectCond(
+ "jump-threading-freeze-select-cond",
+ cl::desc("Freeze the condition when unfolding select"), cl::init(false),
+ cl::Hidden);
+
static cl::opt<bool> ThreadAcrossLoopHeaders(
"jump-threading-across-loop-headers",
cl::desc("Allow JumpThreading to thread across loop headers, for testing"),
@@ -139,8 +139,8 @@ namespace {
public:
static char ID; // Pass identification
- JumpThreading(bool InsertFreezeWhenUnfoldingSelect = false, int T = -1)
- : FunctionPass(ID), Impl(InsertFreezeWhenUnfoldingSelect, T) {
+ JumpThreading(bool InsertFreezeWhenUnfoldingSelect = false, int T = -1)
+ : FunctionPass(ID), Impl(InsertFreezeWhenUnfoldingSelect, T) {
initializeJumpThreadingPass(*PassRegistry::getPassRegistry());
}
@@ -154,7 +154,7 @@ namespace {
AU.addPreserved<LazyValueInfoWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
}
void releaseMemory() override { Impl.releaseMemory(); }
@@ -174,12 +174,12 @@ INITIALIZE_PASS_END(JumpThreading, "jump-threading",
"Jump Threading", false, false)
// Public interface to the Jump Threading pass
-FunctionPass *llvm::createJumpThreadingPass(bool InsertFr, int Threshold) {
- return new JumpThreading(InsertFr, Threshold);
+FunctionPass *llvm::createJumpThreadingPass(bool InsertFr, int Threshold) {
+ return new JumpThreading(InsertFr, Threshold);
}
-JumpThreadingPass::JumpThreadingPass(bool InsertFr, int T) {
- InsertFreezeWhenUnfoldingSelect = JumpThreadingFreezeSelectCond | InsertFr;
+JumpThreadingPass::JumpThreadingPass(bool InsertFr, int T) {
+ InsertFreezeWhenUnfoldingSelect = JumpThreadingFreezeSelectCond | InsertFr;
DefaultBBDupThreshold = (T == -1) ? BBDuplicateThreshold : unsigned(T);
}
@@ -313,10 +313,10 @@ static void updatePredecessorProfileMetadata(PHINode *PN, BasicBlock *BB) {
bool JumpThreading::runOnFunction(Function &F) {
if (skipFunction(F))
return false;
- auto TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- // Jump Threading has no sense for the targets with divergent CF
- if (TTI->hasBranchDivergence())
- return false;
+ auto TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ // Jump Threading has no sense for the targets with divergent CF
+ if (TTI->hasBranchDivergence())
+ return false;
auto TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
auto DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto LVI = &getAnalysis<LazyValueInfoWrapperPass>().getLVI();
@@ -341,10 +341,10 @@ bool JumpThreading::runOnFunction(Function &F) {
PreservedAnalyses JumpThreadingPass::run(Function &F,
FunctionAnalysisManager &AM) {
- auto &TTI = AM.getResult<TargetIRAnalysis>(F);
- // Jump Threading has no sense for the targets with divergent CF
- if (TTI.hasBranchDivergence())
- return PreservedAnalyses::all();
+ auto &TTI = AM.getResult<TargetIRAnalysis>(F);
+ // Jump Threading has no sense for the targets with divergent CF
+ if (TTI.hasBranchDivergence())
+ return PreservedAnalyses::all();
auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
auto &LVI = AM.getResult<LazyValueAnalysis>(F);
@@ -362,11 +362,11 @@ PreservedAnalyses JumpThreadingPass::run(Function &F,
bool Changed = runImpl(F, &TLI, &LVI, &AA, &DTU, F.hasProfileData(),
std::move(BFI), std::move(BPI));
- if (PrintLVIAfterJumpThreading) {
- dbgs() << "LVI for function '" << F.getName() << "':\n";
- LVI.printLVI(F, DTU.getDomTree(), dbgs());
- }
-
+ if (PrintLVIAfterJumpThreading) {
+ dbgs() << "LVI for function '" << F.getName() << "':\n";
+ LVI.printLVI(F, DTU.getDomTree(), dbgs());
+ }
+
if (!Changed)
return PreservedAnalyses::all();
PreservedAnalyses PA;
@@ -419,7 +419,7 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
Unreachable.insert(&BB);
if (!ThreadAcrossLoopHeaders)
- findLoopHeaders(F);
+ findLoopHeaders(F);
bool EverChanged = false;
bool Changed;
@@ -428,7 +428,7 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
for (auto &BB : F) {
if (Unreachable.count(&BB))
continue;
- while (processBlock(&BB)) // Thread all of the branches we can over BB.
+ while (processBlock(&BB)) // Thread all of the branches we can over BB.
Changed = true;
// Jump threading may have introduced redundant debug values into BB
@@ -443,7 +443,7 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
continue;
if (pred_empty(&BB)) {
- // When processBlock makes BB unreachable it doesn't bother to fix up
+ // When processBlock makes BB unreachable it doesn't bother to fix up
// the instructions in it. We must remove BB to prevent invalid IR.
LLVM_DEBUG(dbgs() << " JT: Deleting dead block '" << BB.getName()
<< "' with terminator: " << *BB.getTerminator()
@@ -455,7 +455,7 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
continue;
}
- // processBlock doesn't thread BBs with unconditional TIs. However, if BB
+ // processBlock doesn't thread BBs with unconditional TIs. However, if BB
// is "almost empty", we attempt to merge BB with its sole successor.
auto *BI = dyn_cast<BranchInst>(BB.getTerminator());
if (BI && BI->isUnconditional()) {
@@ -489,7 +489,7 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
// at the end of block. RAUW unconditionally replaces all uses
// including the guards/assumes themselves and the uses before the
// guard/assume.
-static void replaceFoldableUses(Instruction *Cond, Value *ToVal) {
+static void replaceFoldableUses(Instruction *Cond, Value *ToVal) {
assert(Cond->getType() == ToVal->getType());
auto *BB = Cond->getParent();
// We can unconditionally replace all uses in non-local blocks (i.e. uses
@@ -553,18 +553,18 @@ static unsigned getJumpThreadDuplicationCost(BasicBlock *BB,
// Debugger intrinsics don't incur code size.
if (isa<DbgInfoIntrinsic>(I)) continue;
- // Pseudo-probes don't incur code size.
- if (isa<PseudoProbeInst>(I))
- continue;
-
+ // Pseudo-probes don't incur code size.
+ if (isa<PseudoProbeInst>(I))
+ continue;
+
// If this is a pointer->pointer bitcast, it is free.
if (isa<BitCastInst>(I) && I->getType()->isPointerTy())
continue;
- // Freeze instruction is free, too.
- if (isa<FreezeInst>(I))
- continue;
-
+ // Freeze instruction is free, too.
+ if (isa<FreezeInst>(I))
+ continue;
+
// Bail out if this instruction gives back a token type, it is not possible
// to duplicate it if it is used outside this BB.
if (I->getType()->isTokenTy() && I->isUsedOutsideOfBlock(BB))
@@ -592,7 +592,7 @@ static unsigned getJumpThreadDuplicationCost(BasicBlock *BB,
return Size > Bonus ? Size - Bonus : 0;
}
-/// findLoopHeaders - We do not want jump threading to turn proper loop
+/// findLoopHeaders - We do not want jump threading to turn proper loop
/// structures into irreducible loops. Doing this breaks up the loop nesting
/// hierarchy and pessimizes later transformations. To prevent this from
/// happening, we first have to find the loop headers. Here we approximate this
@@ -606,7 +606,7 @@ static unsigned getJumpThreadDuplicationCost(BasicBlock *BB,
/// within the loop (forming a nested loop). This simple analysis is not rich
/// enough to track all of these properties and keep it up-to-date as the CFG
/// mutates, so we don't allow any of these transformations.
-void JumpThreadingPass::findLoopHeaders(Function &F) {
+void JumpThreadingPass::findLoopHeaders(Function &F) {
SmallVector<std::pair<const BasicBlock*,const BasicBlock*>, 32> Edges;
FindFunctionBackedges(F, Edges);
@@ -633,13 +633,13 @@ static Constant *getKnownConstant(Value *Val, ConstantPreference Preference) {
return dyn_cast<ConstantInt>(Val);
}
-/// computeValueKnownInPredecessors - Given a basic block BB and a value V, see
+/// computeValueKnownInPredecessors - Given a basic block BB and a value V, see
/// if we can infer that the value is a known ConstantInt/BlockAddress or undef
/// in any of our predecessors. If so, return the known list of value and pred
/// BB in the result vector.
///
/// This returns true if there were any known values.
-bool JumpThreadingPass::computeValueKnownInPredecessorsImpl(
+bool JumpThreadingPass::computeValueKnownInPredecessorsImpl(
Value *V, BasicBlock *BB, PredValueInfo &Result,
ConstantPreference Preference, DenseSet<Value *> &RecursionSet,
Instruction *CxtI) {
@@ -704,10 +704,10 @@ bool JumpThreadingPass::computeValueKnownInPredecessorsImpl(
return !Result.empty();
}
- // Handle Cast instructions.
+ // Handle Cast instructions.
if (CastInst *CI = dyn_cast<CastInst>(I)) {
Value *Source = CI->getOperand(0);
- computeValueKnownInPredecessorsImpl(Source, BB, Result, Preference,
+ computeValueKnownInPredecessorsImpl(Source, BB, Result, Preference,
RecursionSet, CxtI);
if (Result.empty())
return false;
@@ -719,18 +719,18 @@ bool JumpThreadingPass::computeValueKnownInPredecessorsImpl(
return true;
}
- if (FreezeInst *FI = dyn_cast<FreezeInst>(I)) {
- Value *Source = FI->getOperand(0);
- computeValueKnownInPredecessorsImpl(Source, BB, Result, Preference,
- RecursionSet, CxtI);
-
- erase_if(Result, [](auto &Pair) {
- return !isGuaranteedNotToBeUndefOrPoison(Pair.first);
- });
-
- return !Result.empty();
- }
-
+ if (FreezeInst *FI = dyn_cast<FreezeInst>(I)) {
+ Value *Source = FI->getOperand(0);
+ computeValueKnownInPredecessorsImpl(Source, BB, Result, Preference,
+ RecursionSet, CxtI);
+
+ erase_if(Result, [](auto &Pair) {
+ return !isGuaranteedNotToBeUndefOrPoison(Pair.first);
+ });
+
+ return !Result.empty();
+ }
+
// Handle some boolean conditions.
if (I->getType()->getPrimitiveSizeInBits() == 1) {
assert(Preference == WantInteger && "One-bit non-integer type?");
@@ -740,9 +740,9 @@ bool JumpThreadingPass::computeValueKnownInPredecessorsImpl(
I->getOpcode() == Instruction::And) {
PredValueInfoTy LHSVals, RHSVals;
- computeValueKnownInPredecessorsImpl(I->getOperand(0), BB, LHSVals,
+ computeValueKnownInPredecessorsImpl(I->getOperand(0), BB, LHSVals,
WantInteger, RecursionSet, CxtI);
- computeValueKnownInPredecessorsImpl(I->getOperand(1), BB, RHSVals,
+ computeValueKnownInPredecessorsImpl(I->getOperand(1), BB, RHSVals,
WantInteger, RecursionSet, CxtI);
if (LHSVals.empty() && RHSVals.empty())
@@ -778,7 +778,7 @@ bool JumpThreadingPass::computeValueKnownInPredecessorsImpl(
if (I->getOpcode() == Instruction::Xor &&
isa<ConstantInt>(I->getOperand(1)) &&
cast<ConstantInt>(I->getOperand(1))->isOne()) {
- computeValueKnownInPredecessorsImpl(I->getOperand(0), BB, Result,
+ computeValueKnownInPredecessorsImpl(I->getOperand(0), BB, Result,
WantInteger, RecursionSet, CxtI);
if (Result.empty())
return false;
@@ -796,7 +796,7 @@ bool JumpThreadingPass::computeValueKnownInPredecessorsImpl(
&& "A binary operator creating a block address?");
if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) {
PredValueInfoTy LHSVals;
- computeValueKnownInPredecessorsImpl(BO->getOperand(0), BB, LHSVals,
+ computeValueKnownInPredecessorsImpl(BO->getOperand(0), BB, LHSVals,
WantInteger, RecursionSet, CxtI);
// Try to use constant folding to simplify the binary operator.
@@ -930,7 +930,7 @@ bool JumpThreadingPass::computeValueKnownInPredecessorsImpl(
// Try to find a constant value for the LHS of a comparison,
// and evaluate it statically if we can.
PredValueInfoTy LHSVals;
- computeValueKnownInPredecessorsImpl(I->getOperand(0), BB, LHSVals,
+ computeValueKnownInPredecessorsImpl(I->getOperand(0), BB, LHSVals,
WantInteger, RecursionSet, CxtI);
for (const auto &LHSVal : LHSVals) {
@@ -951,7 +951,7 @@ bool JumpThreadingPass::computeValueKnownInPredecessorsImpl(
Constant *FalseVal = getKnownConstant(SI->getFalseValue(), Preference);
PredValueInfoTy Conds;
if ((TrueVal || FalseVal) &&
- computeValueKnownInPredecessorsImpl(SI->getCondition(), BB, Conds,
+ computeValueKnownInPredecessorsImpl(SI->getCondition(), BB, Conds,
WantInteger, RecursionSet, CxtI)) {
for (auto &C : Conds) {
Constant *Cond = C.first;
@@ -979,8 +979,8 @@ bool JumpThreadingPass::computeValueKnownInPredecessorsImpl(
}
// If all else fails, see if LVI can figure out a constant value for us.
- assert(CxtI->getParent() == BB && "CxtI should be in BB");
- Constant *CI = LVI->getConstant(V, CxtI);
+ assert(CxtI->getParent() == BB && "CxtI should be in BB");
+ Constant *CI = LVI->getConstant(V, CxtI);
if (Constant *KC = getKnownConstant(CI, Preference)) {
for (BasicBlock *Pred : predecessors(BB))
Result.emplace_back(KC, Pred);
@@ -994,7 +994,7 @@ bool JumpThreadingPass::computeValueKnownInPredecessorsImpl(
///
/// Since we can pick an arbitrary destination, we pick the successor with the
/// fewest predecessors. This should reduce the in-degree of the others.
-static unsigned getBestDestForJumpOnUndef(BasicBlock *BB) {
+static unsigned getBestDestForJumpOnUndef(BasicBlock *BB) {
Instruction *BBTerm = BB->getTerminator();
unsigned MinSucc = 0;
BasicBlock *TestBB = BBTerm->getSuccessor(MinSucc);
@@ -1022,9 +1022,9 @@ static bool hasAddressTakenAndUsed(BasicBlock *BB) {
return !BA->use_empty();
}
-/// processBlock - If there are any predecessors whose control can be threaded
+/// processBlock - If there are any predecessors whose control can be threaded
/// through to a successor, transform them now.
-bool JumpThreadingPass::processBlock(BasicBlock *BB) {
+bool JumpThreadingPass::processBlock(BasicBlock *BB) {
// If the block is trivially dead, just return and let the caller nuke it.
// This simplifies other transformations.
if (DTU->isBBPendingDeletion(BB) ||
@@ -1035,14 +1035,14 @@ bool JumpThreadingPass::processBlock(BasicBlock *BB) {
// successor, merge the blocks. This encourages recursive jump threading
// because now the condition in this block can be threaded through
// predecessors of our predecessor block.
- if (maybeMergeBasicBlockIntoOnlyPred(BB))
+ if (maybeMergeBasicBlockIntoOnlyPred(BB))
return true;
- if (tryToUnfoldSelectInCurrBB(BB))
+ if (tryToUnfoldSelectInCurrBB(BB))
return true;
// Look if we can propagate guards to predecessors.
- if (HasGuards && processGuards(BB))
+ if (HasGuards && processGuards(BB))
return true;
// What kind of constant we're looking for.
@@ -1067,9 +1067,9 @@ bool JumpThreadingPass::processBlock(BasicBlock *BB) {
return false; // Must be an invoke or callbr.
}
- // Keep track if we constant folded the condition in this invocation.
- bool ConstantFolded = false;
-
+ // Keep track if we constant folded the condition in this invocation.
+ bool ConstantFolded = false;
+
// Run constant folding to see if we can reduce the condition to a simple
// constant.
if (Instruction *I = dyn_cast<Instruction>(Condition)) {
@@ -1080,16 +1080,16 @@ bool JumpThreadingPass::processBlock(BasicBlock *BB) {
if (isInstructionTriviallyDead(I, TLI))
I->eraseFromParent();
Condition = SimpleVal;
- ConstantFolded = true;
+ ConstantFolded = true;
}
}
- // If the terminator is branching on an undef or freeze undef, we can pick any
- // of the successors to branch to. Let getBestDestForJumpOnUndef decide.
- auto *FI = dyn_cast<FreezeInst>(Condition);
- if (isa<UndefValue>(Condition) ||
- (FI && isa<UndefValue>(FI->getOperand(0)) && FI->hasOneUse())) {
- unsigned BestSucc = getBestDestForJumpOnUndef(BB);
+ // If the terminator is branching on an undef or freeze undef, we can pick any
+ // of the successors to branch to. Let getBestDestForJumpOnUndef decide.
+ auto *FI = dyn_cast<FreezeInst>(Condition);
+ if (isa<UndefValue>(Condition) ||
+ (FI && isa<UndefValue>(FI->getOperand(0)) && FI->hasOneUse())) {
+ unsigned BestSucc = getBestDestForJumpOnUndef(BB);
std::vector<DominatorTree::UpdateType> Updates;
// Fold the branch/switch.
@@ -1107,8 +1107,8 @@ bool JumpThreadingPass::processBlock(BasicBlock *BB) {
BranchInst::Create(BBTerm->getSuccessor(BestSucc), BBTerm);
BBTerm->eraseFromParent();
DTU->applyUpdatesPermissive(Updates);
- if (FI)
- FI->eraseFromParent();
+ if (FI)
+ FI->eraseFromParent();
return true;
}
@@ -1121,8 +1121,8 @@ bool JumpThreadingPass::processBlock(BasicBlock *BB) {
<< '\n');
++NumFolds;
ConstantFoldTerminator(BB, true, nullptr, DTU);
- if (HasProfileData)
- BPI->eraseBlock(BB);
+ if (HasProfileData)
+ BPI->eraseBlock(BB);
return true;
}
@@ -1131,9 +1131,9 @@ bool JumpThreadingPass::processBlock(BasicBlock *BB) {
// All the rest of our checks depend on the condition being an instruction.
if (!CondInst) {
// FIXME: Unify this with code below.
- if (processThreadableEdges(Condition, BB, Preference, Terminator))
+ if (processThreadableEdges(Condition, BB, Preference, Terminator))
return true;
- return ConstantFolded;
+ return ConstantFolded;
}
if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondInst)) {
@@ -1174,24 +1174,24 @@ bool JumpThreadingPass::processBlock(BasicBlock *BB) {
auto *CI = Ret == LazyValueInfo::True ?
ConstantInt::getTrue(CondCmp->getType()) :
ConstantInt::getFalse(CondCmp->getType());
- replaceFoldableUses(CondCmp, CI);
+ replaceFoldableUses(CondCmp, CI);
}
DTU->applyUpdatesPermissive(
{{DominatorTree::Delete, BB, ToRemoveSucc}});
- if (HasProfileData)
- BPI->eraseBlock(BB);
+ if (HasProfileData)
+ BPI->eraseBlock(BB);
return true;
}
// We did not manage to simplify this branch, try to see whether
// CondCmp depends on a known phi-select pattern.
- if (tryToUnfoldSelect(CondCmp, BB))
+ if (tryToUnfoldSelect(CondCmp, BB))
return true;
}
}
if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator()))
- if (tryToUnfoldSelect(SI, BB))
+ if (tryToUnfoldSelect(SI, BB))
return true;
// Check for some cases that are worth simplifying. Right now we want to look
@@ -1199,11 +1199,11 @@ bool JumpThreadingPass::processBlock(BasicBlock *BB) {
// we see one, check to see if it's partially redundant. If so, insert a PHI
// which can then be used to thread the values.
Value *SimplifyValue = CondInst;
-
- if (auto *FI = dyn_cast<FreezeInst>(SimplifyValue))
- // Look into freeze's operand
- SimplifyValue = FI->getOperand(0);
-
+
+ if (auto *FI = dyn_cast<FreezeInst>(SimplifyValue))
+ // Look into freeze's operand
+ SimplifyValue = FI->getOperand(0);
+
if (CmpInst *CondCmp = dyn_cast<CmpInst>(SimplifyValue))
if (isa<Constant>(CondCmp->getOperand(1)))
SimplifyValue = CondCmp->getOperand(0);
@@ -1211,7 +1211,7 @@ bool JumpThreadingPass::processBlock(BasicBlock *BB) {
// TODO: There are other places where load PRE would be profitable, such as
// more complex comparisons.
if (LoadInst *LoadI = dyn_cast<LoadInst>(SimplifyValue))
- if (simplifyPartiallyRedundantLoad(LoadI))
+ if (simplifyPartiallyRedundantLoad(LoadI))
return true;
// Before threading, try to propagate profile data backwards:
@@ -1222,32 +1222,32 @@ bool JumpThreadingPass::processBlock(BasicBlock *BB) {
// Handle a variety of cases where we are branching on something derived from
// a PHI node in the current block. If we can prove that any predecessors
// compute a predictable value based on a PHI node, thread those predecessors.
- if (processThreadableEdges(CondInst, BB, Preference, Terminator))
+ if (processThreadableEdges(CondInst, BB, Preference, Terminator))
return true;
- // If this is an otherwise-unfoldable branch on a phi node or freeze(phi) in
- // the current block, see if we can simplify.
- PHINode *PN = dyn_cast<PHINode>(
- isa<FreezeInst>(CondInst) ? cast<FreezeInst>(CondInst)->getOperand(0)
- : CondInst);
-
- if (PN && PN->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
- return processBranchOnPHI(PN);
+ // If this is an otherwise-unfoldable branch on a phi node or freeze(phi) in
+ // the current block, see if we can simplify.
+ PHINode *PN = dyn_cast<PHINode>(
+ isa<FreezeInst>(CondInst) ? cast<FreezeInst>(CondInst)->getOperand(0)
+ : CondInst);
+ if (PN && PN->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
+ return processBranchOnPHI(PN);
+
// If this is an otherwise-unfoldable branch on a XOR, see if we can simplify.
if (CondInst->getOpcode() == Instruction::Xor &&
CondInst->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
- return processBranchOnXOR(cast<BinaryOperator>(CondInst));
+ return processBranchOnXOR(cast<BinaryOperator>(CondInst));
// Search for a stronger dominating condition that can be used to simplify a
// conditional branch leaving BB.
- if (processImpliedCondition(BB))
+ if (processImpliedCondition(BB))
return true;
return false;
}
-bool JumpThreadingPass::processImpliedCondition(BasicBlock *BB) {
+bool JumpThreadingPass::processImpliedCondition(BasicBlock *BB) {
auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
if (!BI || !BI->isConditional())
return false;
@@ -1277,8 +1277,8 @@ bool JumpThreadingPass::processImpliedCondition(BasicBlock *BB) {
UncondBI->setDebugLoc(BI->getDebugLoc());
BI->eraseFromParent();
DTU->applyUpdatesPermissive({{DominatorTree::Delete, BB, RemoveSucc}});
- if (HasProfileData)
- BPI->eraseBlock(BB);
+ if (HasProfileData)
+ BPI->eraseBlock(BB);
return true;
}
CurrentBB = CurrentPred;
@@ -1296,11 +1296,11 @@ static bool isOpDefinedInBlock(Value *Op, BasicBlock *BB) {
return false;
}
-/// simplifyPartiallyRedundantLoad - If LoadI is an obviously partially
+/// simplifyPartiallyRedundantLoad - If LoadI is an obviously partially
/// redundant load instruction, eliminate it by replacing it with a PHI node.
/// This is an important optimization that encourages jump threading, and needs
/// to be run interlaced with other jump threading tasks.
-bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) {
+bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) {
// Don't hack volatile and ordered loads.
if (!LoadI->isUnordered()) return false;
@@ -1470,7 +1470,7 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) {
}
// Split them out to their own block.
- UnavailablePred = splitBlockPreds(LoadBB, PredsToSplit, "thread-pre-split");
+ UnavailablePred = splitBlockPreds(LoadBB, PredsToSplit, "thread-pre-split");
}
// If the value isn't available in all predecessors, then there will be
@@ -1534,11 +1534,11 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) {
return true;
}
-/// findMostPopularDest - The specified list contains multiple possible
+/// findMostPopularDest - The specified list contains multiple possible
/// threadable destinations. Pick the one that occurs the most frequently in
/// the list.
static BasicBlock *
-findMostPopularDest(BasicBlock *BB,
+findMostPopularDest(BasicBlock *BB,
const SmallVectorImpl<std::pair<BasicBlock *,
BasicBlock *>> &PredToDestList) {
assert(!PredToDestList.empty());
@@ -1573,7 +1573,7 @@ findMostPopularDest(BasicBlock *BB,
// Try to evaluate the value of V when the control flows from PredPredBB to
// BB->getSinglePredecessor() and then on to BB.
-Constant *JumpThreadingPass::evaluateOnPredecessorEdge(BasicBlock *BB,
+Constant *JumpThreadingPass::evaluateOnPredecessorEdge(BasicBlock *BB,
BasicBlock *PredPredBB,
Value *V) {
BasicBlock *PredBB = BB->getSinglePredecessor();
@@ -1600,9 +1600,9 @@ Constant *JumpThreadingPass::evaluateOnPredecessorEdge(BasicBlock *BB,
if (CmpInst *CondCmp = dyn_cast<CmpInst>(V)) {
if (CondCmp->getParent() == BB) {
Constant *Op0 =
- evaluateOnPredecessorEdge(BB, PredPredBB, CondCmp->getOperand(0));
+ evaluateOnPredecessorEdge(BB, PredPredBB, CondCmp->getOperand(0));
Constant *Op1 =
- evaluateOnPredecessorEdge(BB, PredPredBB, CondCmp->getOperand(1));
+ evaluateOnPredecessorEdge(BB, PredPredBB, CondCmp->getOperand(1));
if (Op0 && Op1) {
return ConstantExpr::getCompare(CondCmp->getPredicate(), Op0, Op1);
}
@@ -1613,7 +1613,7 @@ Constant *JumpThreadingPass::evaluateOnPredecessorEdge(BasicBlock *BB,
return nullptr;
}
-bool JumpThreadingPass::processThreadableEdges(Value *Cond, BasicBlock *BB,
+bool JumpThreadingPass::processThreadableEdges(Value *Cond, BasicBlock *BB,
ConstantPreference Preference,
Instruction *CxtI) {
// If threading this would thread across a loop header, don't even try to
@@ -1622,15 +1622,15 @@ bool JumpThreadingPass::processThreadableEdges(Value *Cond, BasicBlock *BB,
return false;
PredValueInfoTy PredValues;
- if (!computeValueKnownInPredecessors(Cond, BB, PredValues, Preference,
+ if (!computeValueKnownInPredecessors(Cond, BB, PredValues, Preference,
CxtI)) {
// We don't have known values in predecessors. See if we can thread through
// BB and its sole predecessor.
- return maybethreadThroughTwoBasicBlocks(BB, Cond);
+ return maybethreadThroughTwoBasicBlocks(BB, Cond);
}
assert(!PredValues.empty() &&
- "computeValueKnownInPredecessors returned true with no values");
+ "computeValueKnownInPredecessors returned true with no values");
LLVM_DEBUG(dbgs() << "IN BB: " << *BB;
for (const auto &PredValue : PredValues) {
@@ -1722,8 +1722,8 @@ bool JumpThreadingPass::processThreadableEdges(Value *Cond, BasicBlock *BB,
BranchInst::Create(OnlyDest, Term);
Term->eraseFromParent();
DTU->applyUpdatesPermissive(Updates);
- if (HasProfileData)
- BPI->eraseBlock(BB);
+ if (HasProfileData)
+ BPI->eraseBlock(BB);
// If the condition is now dead due to the removal of the old terminator,
// erase it.
@@ -1739,7 +1739,7 @@ bool JumpThreadingPass::processThreadableEdges(Value *Cond, BasicBlock *BB,
// guard/assume.
else if (OnlyVal && OnlyVal != MultipleVal &&
CondInst->getParent() == BB)
- replaceFoldableUses(CondInst, OnlyVal);
+ replaceFoldableUses(CondInst, OnlyVal);
}
return true;
}
@@ -1752,18 +1752,18 @@ bool JumpThreadingPass::processThreadableEdges(Value *Cond, BasicBlock *BB,
BasicBlock *MostPopularDest = OnlyDest;
if (MostPopularDest == MultipleDestSentinel) {
- // Remove any loop headers from the Dest list, threadEdge conservatively
+ // Remove any loop headers from the Dest list, threadEdge conservatively
// won't process them, but we might have other destination that are eligible
// and we still want to process.
erase_if(PredToDestList,
[&](const std::pair<BasicBlock *, BasicBlock *> &PredToDest) {
- return LoopHeaders.contains(PredToDest.second);
+ return LoopHeaders.contains(PredToDest.second);
});
if (PredToDestList.empty())
return false;
- MostPopularDest = findMostPopularDest(BB, PredToDestList);
+ MostPopularDest = findMostPopularDest(BB, PredToDestList);
}
// Now that we know what the most popular destination is, factor all
@@ -1785,16 +1785,16 @@ bool JumpThreadingPass::processThreadableEdges(Value *Cond, BasicBlock *BB,
// the destination that these predecessors should get to.
if (!MostPopularDest)
MostPopularDest = BB->getTerminator()->
- getSuccessor(getBestDestForJumpOnUndef(BB));
+ getSuccessor(getBestDestForJumpOnUndef(BB));
// Ok, try to thread it!
- return tryThreadEdge(BB, PredsToFactor, MostPopularDest);
+ return tryThreadEdge(BB, PredsToFactor, MostPopularDest);
}
-/// processBranchOnPHI - We have an otherwise unthreadable conditional branch on
-/// a PHI node (or freeze PHI) in the current block. See if there are any
-/// simplifications we can do based on inputs to the phi node.
-bool JumpThreadingPass::processBranchOnPHI(PHINode *PN) {
+/// processBranchOnPHI - We have an otherwise unthreadable conditional branch on
+/// a PHI node (or freeze PHI) in the current block. See if there are any
+/// simplifications we can do based on inputs to the phi node.
+bool JumpThreadingPass::processBranchOnPHI(PHINode *PN) {
BasicBlock *BB = PN->getParent();
// TODO: We could make use of this to do it once for blocks with common PHI
@@ -1806,16 +1806,16 @@ bool JumpThreadingPass::processBranchOnPHI(PHINode *PN) {
// *duplicate* the conditional branch into that block in order to further
// encourage jump threading and to eliminate cases where we have branch on a
// phi of an icmp (branch on icmp is much better).
- // This is still beneficial when a frozen phi is used as the branch condition
- // because it allows CodeGenPrepare to further canonicalize br(freeze(icmp))
- // to br(icmp(freeze ...)).
+ // This is still beneficial when a frozen phi is used as the branch condition
+ // because it allows CodeGenPrepare to further canonicalize br(freeze(icmp))
+ // to br(icmp(freeze ...)).
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
BasicBlock *PredBB = PN->getIncomingBlock(i);
if (BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator()))
if (PredBr->isUnconditional()) {
PredBBs[0] = PredBB;
// Try to duplicate BB into PredBB.
- if (duplicateCondBranchOnPHIIntoPred(BB, PredBBs))
+ if (duplicateCondBranchOnPHIIntoPred(BB, PredBBs))
return true;
}
}
@@ -1823,10 +1823,10 @@ bool JumpThreadingPass::processBranchOnPHI(PHINode *PN) {
return false;
}
-/// processBranchOnXOR - We have an otherwise unthreadable conditional branch on
+/// processBranchOnXOR - We have an otherwise unthreadable conditional branch on
/// a xor instruction in the current block. See if there are any
/// simplifications we can do based on inputs to the xor.
-bool JumpThreadingPass::processBranchOnXOR(BinaryOperator *BO) {
+bool JumpThreadingPass::processBranchOnXOR(BinaryOperator *BO) {
BasicBlock *BB = BO->getParent();
// If either the LHS or RHS of the xor is a constant, don't do this
@@ -1864,17 +1864,17 @@ bool JumpThreadingPass::processBranchOnXOR(BinaryOperator *BO) {
PredValueInfoTy XorOpValues;
bool isLHS = true;
- if (!computeValueKnownInPredecessors(BO->getOperand(0), BB, XorOpValues,
+ if (!computeValueKnownInPredecessors(BO->getOperand(0), BB, XorOpValues,
WantInteger, BO)) {
assert(XorOpValues.empty());
- if (!computeValueKnownInPredecessors(BO->getOperand(1), BB, XorOpValues,
+ if (!computeValueKnownInPredecessors(BO->getOperand(1), BB, XorOpValues,
WantInteger, BO))
return false;
isLHS = false;
}
assert(!XorOpValues.empty() &&
- "computeValueKnownInPredecessors returned true with no values");
+ "computeValueKnownInPredecessors returned true with no values");
// Scan the information to see which is most popular: true or false. The
// predecessors can be of the set true, false, or undef.
@@ -1935,13 +1935,13 @@ bool JumpThreadingPass::processBranchOnXOR(BinaryOperator *BO) {
return false;
// Try to duplicate BB into PredBB.
- return duplicateCondBranchOnPHIIntoPred(BB, BlocksToFoldInto);
+ return duplicateCondBranchOnPHIIntoPred(BB, BlocksToFoldInto);
}
-/// addPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new
+/// addPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new
/// predecessor to the PHIBB block. If it has PHI nodes, add entries for
/// NewPred using the entries from OldPred (suitably mapped).
-static void addPHINodeEntriesForMappedBlock(BasicBlock *PHIBB,
+static void addPHINodeEntriesForMappedBlock(BasicBlock *PHIBB,
BasicBlock *OldPred,
BasicBlock *NewPred,
DenseMap<Instruction*, Value*> &ValueMap) {
@@ -1962,7 +1962,7 @@ static void addPHINodeEntriesForMappedBlock(BasicBlock *PHIBB,
}
/// Merge basic block BB into its sole predecessor if possible.
-bool JumpThreadingPass::maybeMergeBasicBlockIntoOnlyPred(BasicBlock *BB) {
+bool JumpThreadingPass::maybeMergeBasicBlockIntoOnlyPred(BasicBlock *BB) {
BasicBlock *SinglePred = BB->getSinglePredecessor();
if (!SinglePred)
return false;
@@ -2013,7 +2013,7 @@ bool JumpThreadingPass::maybeMergeBasicBlockIntoOnlyPred(BasicBlock *BB) {
/// Update the SSA form. NewBB contains instructions that are copied from BB.
/// ValueMapping maps old values in BB to new ones in NewBB.
-void JumpThreadingPass::updateSSA(
+void JumpThreadingPass::updateSSA(
BasicBlock *BB, BasicBlock *NewBB,
DenseMap<Instruction *, Value *> &ValueMapping) {
// If there were values defined in BB that are used outside the block, then we
@@ -2059,7 +2059,7 @@ void JumpThreadingPass::updateSSA(
/// arguments that come from PredBB. Return the map from the variables in the
/// source basic block to the variables in the newly created basic block.
DenseMap<Instruction *, Value *>
-JumpThreadingPass::cloneInstructions(BasicBlock::iterator BI,
+JumpThreadingPass::cloneInstructions(BasicBlock::iterator BI,
BasicBlock::iterator BE, BasicBlock *NewBB,
BasicBlock *PredBB) {
// We are going to have to map operands from the source basic block to the new
@@ -2076,15 +2076,15 @@ JumpThreadingPass::cloneInstructions(BasicBlock::iterator BI,
ValueMapping[PN] = NewPN;
}
- // Clone noalias scope declarations in the threaded block. When threading a
- // loop exit, we would otherwise end up with two idential scope declarations
- // visible at the same time.
- SmallVector<MDNode *> NoAliasScopes;
- DenseMap<MDNode *, MDNode *> ClonedScopes;
- LLVMContext &Context = PredBB->getContext();
- identifyNoAliasScopesToClone(BI, BE, NoAliasScopes);
- cloneNoAliasScopes(NoAliasScopes, ClonedScopes, "thread", Context);
-
+ // Clone noalias scope declarations in the threaded block. When threading a
+ // loop exit, we would otherwise end up with two idential scope declarations
+ // visible at the same time.
+ SmallVector<MDNode *> NoAliasScopes;
+ DenseMap<MDNode *, MDNode *> ClonedScopes;
+ LLVMContext &Context = PredBB->getContext();
+ identifyNoAliasScopesToClone(BI, BE, NoAliasScopes);
+ cloneNoAliasScopes(NoAliasScopes, ClonedScopes, "thread", Context);
+
// Clone the non-phi instructions of the source basic block into NewBB,
// keeping track of the mapping and using it to remap operands in the cloned
// instructions.
@@ -2093,7 +2093,7 @@ JumpThreadingPass::cloneInstructions(BasicBlock::iterator BI,
New->setName(BI->getName());
NewBB->getInstList().push_back(New);
ValueMapping[&*BI] = New;
- adaptNoAliasScopes(New, ClonedScopes, Context);
+ adaptNoAliasScopes(New, ClonedScopes, Context);
// Remap operands to patch up intra-block references.
for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
@@ -2108,7 +2108,7 @@ JumpThreadingPass::cloneInstructions(BasicBlock::iterator BI,
}
/// Attempt to thread through two successive basic blocks.
-bool JumpThreadingPass::maybethreadThroughTwoBasicBlocks(BasicBlock *BB,
+bool JumpThreadingPass::maybethreadThroughTwoBasicBlocks(BasicBlock *BB,
Value *Cond) {
// Consider:
//
@@ -2177,7 +2177,7 @@ bool JumpThreadingPass::maybethreadThroughTwoBasicBlocks(BasicBlock *BB,
BasicBlock *OnePred = nullptr;
for (BasicBlock *P : predecessors(PredBB)) {
if (ConstantInt *CI = dyn_cast_or_null<ConstantInt>(
- evaluateOnPredecessorEdge(BB, P, Cond))) {
+ evaluateOnPredecessorEdge(BB, P, Cond))) {
if (CI->isZero()) {
ZeroCount++;
ZeroPred = P;
@@ -2208,7 +2208,7 @@ bool JumpThreadingPass::maybethreadThroughTwoBasicBlocks(BasicBlock *BB,
}
// If threading this would thread across a loop header, don't thread the edge.
- // See the comments above findLoopHeaders for justifications and caveats.
+ // See the comments above findLoopHeaders for justifications and caveats.
if (LoopHeaders.count(BB) || LoopHeaders.count(SuccBB)) {
LLVM_DEBUG({
bool BBIsHeader = LoopHeaders.count(BB);
@@ -2241,11 +2241,11 @@ bool JumpThreadingPass::maybethreadThroughTwoBasicBlocks(BasicBlock *BB,
}
// Now we are ready to duplicate PredBB.
- threadThroughTwoBasicBlocks(PredPredBB, PredBB, BB, SuccBB);
+ threadThroughTwoBasicBlocks(PredPredBB, PredBB, BB, SuccBB);
return true;
}
-void JumpThreadingPass::threadThroughTwoBasicBlocks(BasicBlock *PredPredBB,
+void JumpThreadingPass::threadThroughTwoBasicBlocks(BasicBlock *PredPredBB,
BasicBlock *PredBB,
BasicBlock *BB,
BasicBlock *SuccBB) {
@@ -2271,12 +2271,12 @@ void JumpThreadingPass::threadThroughTwoBasicBlocks(BasicBlock *PredPredBB,
// copy of the block 'NewBB'. If there are PHI nodes in PredBB, evaluate them
// to account for entry from PredPredBB.
DenseMap<Instruction *, Value *> ValueMapping =
- cloneInstructions(PredBB->begin(), PredBB->end(), NewBB, PredPredBB);
-
- // Copy the edge probabilities from PredBB to NewBB.
- if (HasProfileData)
- BPI->copyEdgeProbabilities(PredBB, NewBB);
+ cloneInstructions(PredBB->begin(), PredBB->end(), NewBB, PredPredBB);
+ // Copy the edge probabilities from PredBB to NewBB.
+ if (HasProfileData)
+ BPI->copyEdgeProbabilities(PredBB, NewBB);
+
// Update the terminator of PredPredBB to jump to NewBB instead of PredBB.
// This eliminates predecessors from PredPredBB, which requires us to simplify
// any PHI nodes in PredBB.
@@ -2287,9 +2287,9 @@ void JumpThreadingPass::threadThroughTwoBasicBlocks(BasicBlock *PredPredBB,
PredPredTerm->setSuccessor(i, NewBB);
}
- addPHINodeEntriesForMappedBlock(PredBBBranch->getSuccessor(0), PredBB, NewBB,
+ addPHINodeEntriesForMappedBlock(PredBBBranch->getSuccessor(0), PredBB, NewBB,
ValueMapping);
- addPHINodeEntriesForMappedBlock(PredBBBranch->getSuccessor(1), PredBB, NewBB,
+ addPHINodeEntriesForMappedBlock(PredBBBranch->getSuccessor(1), PredBB, NewBB,
ValueMapping);
DTU->applyUpdatesPermissive(
@@ -2298,7 +2298,7 @@ void JumpThreadingPass::threadThroughTwoBasicBlocks(BasicBlock *PredPredBB,
{DominatorTree::Insert, PredPredBB, NewBB},
{DominatorTree::Delete, PredPredBB, PredBB}});
- updateSSA(PredBB, NewBB, ValueMapping);
+ updateSSA(PredBB, NewBB, ValueMapping);
// Clean up things like PHI nodes with single operands, dead instructions,
// etc.
@@ -2307,11 +2307,11 @@ void JumpThreadingPass::threadThroughTwoBasicBlocks(BasicBlock *PredPredBB,
SmallVector<BasicBlock *, 1> PredsToFactor;
PredsToFactor.push_back(NewBB);
- threadEdge(BB, PredsToFactor, SuccBB);
+ threadEdge(BB, PredsToFactor, SuccBB);
}
-/// tryThreadEdge - Thread an edge if it's safe and profitable to do so.
-bool JumpThreadingPass::tryThreadEdge(
+/// tryThreadEdge - Thread an edge if it's safe and profitable to do so.
+bool JumpThreadingPass::tryThreadEdge(
BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs,
BasicBlock *SuccBB) {
// If threading to the same block as we come from, we would infinite loop.
@@ -2322,7 +2322,7 @@ bool JumpThreadingPass::tryThreadEdge(
}
// If threading this would thread across a loop header, don't thread the edge.
- // See the comments above findLoopHeaders for justifications and caveats.
+ // See the comments above findLoopHeaders for justifications and caveats.
if (LoopHeaders.count(BB) || LoopHeaders.count(SuccBB)) {
LLVM_DEBUG({
bool BBIsHeader = LoopHeaders.count(BB);
@@ -2343,14 +2343,14 @@ bool JumpThreadingPass::tryThreadEdge(
return false;
}
- threadEdge(BB, PredBBs, SuccBB);
+ threadEdge(BB, PredBBs, SuccBB);
return true;
}
-/// threadEdge - We have decided that it is safe and profitable to factor the
+/// threadEdge - We have decided that it is safe and profitable to factor the
/// blocks in PredBBs to one predecessor, then thread an edge from it to SuccBB
/// across BB. Transform the IR to reflect this change.
-void JumpThreadingPass::threadEdge(BasicBlock *BB,
+void JumpThreadingPass::threadEdge(BasicBlock *BB,
const SmallVectorImpl<BasicBlock *> &PredBBs,
BasicBlock *SuccBB) {
assert(SuccBB != BB && "Don't create an infinite loop");
@@ -2365,7 +2365,7 @@ void JumpThreadingPass::threadEdge(BasicBlock *BB,
else {
LLVM_DEBUG(dbgs() << " Factoring out " << PredBBs.size()
<< " common predecessors.\n");
- PredBB = splitBlockPreds(BB, PredBBs, ".thr_comm");
+ PredBB = splitBlockPreds(BB, PredBBs, ".thr_comm");
}
// And finally, do it!
@@ -2389,7 +2389,7 @@ void JumpThreadingPass::threadEdge(BasicBlock *BB,
// Copy all the instructions from BB to NewBB except the terminator.
DenseMap<Instruction *, Value *> ValueMapping =
- cloneInstructions(BB->begin(), std::prev(BB->end()), NewBB, PredBB);
+ cloneInstructions(BB->begin(), std::prev(BB->end()), NewBB, PredBB);
// We didn't copy the terminator from BB over to NewBB, because there is now
// an unconditional jump to SuccBB. Insert the unconditional jump.
@@ -2398,7 +2398,7 @@ void JumpThreadingPass::threadEdge(BasicBlock *BB,
// Check to see if SuccBB has PHI nodes. If so, we need to add entries to the
// PHI nodes for NewBB now.
- addPHINodeEntriesForMappedBlock(SuccBB, BB, NewBB, ValueMapping);
+ addPHINodeEntriesForMappedBlock(SuccBB, BB, NewBB, ValueMapping);
// Update the terminator of PredBB to jump to NewBB instead of BB. This
// eliminates predecessors from BB, which requires us to simplify any PHI
@@ -2415,7 +2415,7 @@ void JumpThreadingPass::threadEdge(BasicBlock *BB,
{DominatorTree::Insert, PredBB, NewBB},
{DominatorTree::Delete, PredBB, BB}});
- updateSSA(BB, NewBB, ValueMapping);
+ updateSSA(BB, NewBB, ValueMapping);
// At this point, the IR is fully up to date and consistent. Do a quick scan
// over the new instructions and zap any that are constants or dead. This
@@ -2423,7 +2423,7 @@ void JumpThreadingPass::threadEdge(BasicBlock *BB,
SimplifyInstructionsInBlock(NewBB, TLI);
// Update the edge weight from BB to SuccBB, which should be less than before.
- updateBlockFreqAndEdgeWeight(PredBB, BB, NewBB, SuccBB);
+ updateBlockFreqAndEdgeWeight(PredBB, BB, NewBB, SuccBB);
// Threaded an edge!
++NumThreads;
@@ -2432,7 +2432,7 @@ void JumpThreadingPass::threadEdge(BasicBlock *BB,
/// Create a new basic block that will be the predecessor of BB and successor of
/// all blocks in Preds. When profile data is available, update the frequency of
/// this new block.
-BasicBlock *JumpThreadingPass::splitBlockPreds(BasicBlock *BB,
+BasicBlock *JumpThreadingPass::splitBlockPreds(BasicBlock *BB,
ArrayRef<BasicBlock *> Preds,
const char *Suffix) {
SmallVector<BasicBlock *, 2> NewBBs;
@@ -2493,7 +2493,7 @@ bool JumpThreadingPass::doesBlockHaveProfileData(BasicBlock *BB) {
/// Update the block frequency of BB and branch weight and the metadata on the
/// edge BB->SuccBB. This is done by scaling the weight of BB->SuccBB by 1 -
/// Freq(PredBB->BB) / Freq(BB->SuccBB).
-void JumpThreadingPass::updateBlockFreqAndEdgeWeight(BasicBlock *PredBB,
+void JumpThreadingPass::updateBlockFreqAndEdgeWeight(BasicBlock *PredBB,
BasicBlock *BB,
BasicBlock *NewBB,
BasicBlock *SuccBB) {
@@ -2585,18 +2585,18 @@ void JumpThreadingPass::updateBlockFreqAndEdgeWeight(BasicBlock *PredBB,
}
}
-/// duplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch
+/// duplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch
/// to BB which contains an i1 PHI node and a conditional branch on that PHI.
/// If we can duplicate the contents of BB up into PredBB do so now, this
/// improves the odds that the branch will be on an analyzable instruction like
/// a compare.
-bool JumpThreadingPass::duplicateCondBranchOnPHIIntoPred(
+bool JumpThreadingPass::duplicateCondBranchOnPHIIntoPred(
BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs) {
assert(!PredBBs.empty() && "Can't handle an empty set");
// If BB is a loop header, then duplicating this block outside the loop would
// cause us to transform this into an irreducible loop, don't do this.
- // See the comments above findLoopHeaders for justifications and caveats.
+ // See the comments above findLoopHeaders for justifications and caveats.
if (LoopHeaders.count(BB)) {
LLVM_DEBUG(dbgs() << " Not duplicating loop header '" << BB->getName()
<< "' into predecessor block '" << PredBBs[0]->getName()
@@ -2620,7 +2620,7 @@ bool JumpThreadingPass::duplicateCondBranchOnPHIIntoPred(
else {
LLVM_DEBUG(dbgs() << " Factoring out " << PredBBs.size()
<< " common predecessors.\n");
- PredBB = splitBlockPreds(BB, PredBBs, ".thr_comm");
+ PredBB = splitBlockPreds(BB, PredBBs, ".thr_comm");
}
Updates.push_back({DominatorTree::Delete, PredBB, BB});
@@ -2692,12 +2692,12 @@ bool JumpThreadingPass::duplicateCondBranchOnPHIIntoPred(
// Check to see if the targets of the branch had PHI nodes. If so, we need to
// add entries to the PHI nodes for branch from PredBB now.
BranchInst *BBBranch = cast<BranchInst>(BB->getTerminator());
- addPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(0), BB, PredBB,
+ addPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(0), BB, PredBB,
ValueMapping);
- addPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(1), BB, PredBB,
+ addPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(1), BB, PredBB,
ValueMapping);
- updateSSA(BB, PredBB, ValueMapping);
+ updateSSA(BB, PredBB, ValueMapping);
// PredBB no longer jumps to BB, remove entries in the PHI node for the edge
// that we nuked.
@@ -2705,8 +2705,8 @@ bool JumpThreadingPass::duplicateCondBranchOnPHIIntoPred(
// Remove the unconditional branch at the end of the PredBB block.
OldPredBranch->eraseFromParent();
- if (HasProfileData)
- BPI->copyEdgeProbabilities(BB, PredBB);
+ if (HasProfileData)
+ BPI->copyEdgeProbabilities(BB, PredBB);
DTU->applyUpdatesPermissive(Updates);
++NumDupes;
@@ -2718,7 +2718,7 @@ bool JumpThreadingPass::duplicateCondBranchOnPHIIntoPred(
// a PHI node in BB. SI has no other use.
// A new basic block, NewBB, is created and SI is converted to compare and
// conditional branch. SI is erased from parent.
-void JumpThreadingPass::unfoldSelectInstr(BasicBlock *Pred, BasicBlock *BB,
+void JumpThreadingPass::unfoldSelectInstr(BasicBlock *Pred, BasicBlock *BB,
SelectInst *SI, PHINode *SIUse,
unsigned Idx) {
// Expand the select.
@@ -2753,7 +2753,7 @@ void JumpThreadingPass::unfoldSelectInstr(BasicBlock *Pred, BasicBlock *BB,
Phi->addIncoming(Phi->getIncomingValueForBlock(Pred), NewBB);
}
-bool JumpThreadingPass::tryToUnfoldSelect(SwitchInst *SI, BasicBlock *BB) {
+bool JumpThreadingPass::tryToUnfoldSelect(SwitchInst *SI, BasicBlock *BB) {
PHINode *CondPHI = dyn_cast<PHINode>(SI->getCondition());
if (!CondPHI || CondPHI->getParent() != BB)
@@ -2765,7 +2765,7 @@ bool JumpThreadingPass::tryToUnfoldSelect(SwitchInst *SI, BasicBlock *BB) {
// The second and third condition can be potentially relaxed. Currently
// the conditions help to simplify the code and allow us to reuse existing
- // code, developed for tryToUnfoldSelect(CmpInst *, BasicBlock *)
+ // code, developed for tryToUnfoldSelect(CmpInst *, BasicBlock *)
if (!PredSI || PredSI->getParent() != Pred || !PredSI->hasOneUse())
continue;
@@ -2773,13 +2773,13 @@ bool JumpThreadingPass::tryToUnfoldSelect(SwitchInst *SI, BasicBlock *BB) {
if (!PredTerm || !PredTerm->isUnconditional())
continue;
- unfoldSelectInstr(Pred, BB, PredSI, CondPHI, I);
+ unfoldSelectInstr(Pred, BB, PredSI, CondPHI, I);
return true;
}
return false;
}
-/// tryToUnfoldSelect - Look for blocks of the form
+/// tryToUnfoldSelect - Look for blocks of the form
/// bb1:
/// %a = select
/// br bb2
@@ -2791,7 +2791,7 @@ bool JumpThreadingPass::tryToUnfoldSelect(SwitchInst *SI, BasicBlock *BB) {
///
/// And expand the select into a branch structure if one of its arms allows %c
/// to be folded. This later enables threading from bb1 over bb2.
-bool JumpThreadingPass::tryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB) {
+bool JumpThreadingPass::tryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB) {
BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
PHINode *CondLHS = dyn_cast<PHINode>(CondCmp->getOperand(0));
Constant *CondRHS = cast<Constant>(CondCmp->getOperand(1));
@@ -2825,14 +2825,14 @@ bool JumpThreadingPass::tryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB) {
if ((LHSFolds != LazyValueInfo::Unknown ||
RHSFolds != LazyValueInfo::Unknown) &&
LHSFolds != RHSFolds) {
- unfoldSelectInstr(Pred, BB, SI, CondLHS, I);
+ unfoldSelectInstr(Pred, BB, SI, CondLHS, I);
return true;
}
}
return false;
}
-/// tryToUnfoldSelectInCurrBB - Look for PHI/Select or PHI/CMP/Select in the
+/// tryToUnfoldSelectInCurrBB - Look for PHI/Select or PHI/CMP/Select in the
/// same BB in the form
/// bb:
/// %p = phi [false, %bb1], [true, %bb2], [false, %bb3], [true, %bb4], ...
@@ -2852,14 +2852,14 @@ bool JumpThreadingPass::tryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB) {
/// select if the associated PHI has at least one constant. If the unfolded
/// select is not jump-threaded, it will be folded again in the later
/// optimizations.
-bool JumpThreadingPass::tryToUnfoldSelectInCurrBB(BasicBlock *BB) {
- // This transform would reduce the quality of msan diagnostics.
+bool JumpThreadingPass::tryToUnfoldSelectInCurrBB(BasicBlock *BB) {
+ // This transform would reduce the quality of msan diagnostics.
// Disable this transform under MemorySanitizer.
if (BB->getParent()->hasFnAttribute(Attribute::SanitizeMemory))
return false;
// If threading this would thread across a loop header, don't thread the edge.
- // See the comments above findLoopHeaders for justifications and caveats.
+ // See the comments above findLoopHeaders for justifications and caveats.
if (LoopHeaders.count(BB))
return false;
@@ -2902,12 +2902,12 @@ bool JumpThreadingPass::tryToUnfoldSelectInCurrBB(BasicBlock *BB) {
if (!SI)
continue;
// Expand the select.
- Value *Cond = SI->getCondition();
- if (InsertFreezeWhenUnfoldingSelect &&
- !isGuaranteedNotToBeUndefOrPoison(Cond, nullptr, SI,
- &DTU->getDomTree()))
- Cond = new FreezeInst(Cond, "cond.fr", SI);
- Instruction *Term = SplitBlockAndInsertIfThen(Cond, SI, false);
+ Value *Cond = SI->getCondition();
+ if (InsertFreezeWhenUnfoldingSelect &&
+ !isGuaranteedNotToBeUndefOrPoison(Cond, nullptr, SI,
+ &DTU->getDomTree()))
+ Cond = new FreezeInst(Cond, "cond.fr", SI);
+ Instruction *Term = SplitBlockAndInsertIfThen(Cond, SI, false);
BasicBlock *SplitBB = SI->getParent();
BasicBlock *NewBB = Term->getParent();
PHINode *NewPN = PHINode::Create(SI->getType(), 2, "", SI);
@@ -2951,7 +2951,7 @@ bool JumpThreadingPass::tryToUnfoldSelectInCurrBB(BasicBlock *BB) {
/// And cond either implies condGuard or !condGuard. In this case all the
/// instructions before the guard can be duplicated in both branches, and the
/// guard is then threaded to one of them.
-bool JumpThreadingPass::processGuards(BasicBlock *BB) {
+bool JumpThreadingPass::processGuards(BasicBlock *BB) {
using namespace PatternMatch;
// We only want to deal with two predecessors.
@@ -2976,7 +2976,7 @@ bool JumpThreadingPass::processGuards(BasicBlock *BB) {
if (auto *BI = dyn_cast<BranchInst>(Parent->getTerminator()))
for (auto &I : *BB)
- if (isGuard(&I) && threadGuard(BB, cast<IntrinsicInst>(&I), BI))
+ if (isGuard(&I) && threadGuard(BB, cast<IntrinsicInst>(&I), BI))
return true;
return false;
@@ -2985,7 +2985,7 @@ bool JumpThreadingPass::processGuards(BasicBlock *BB) {
/// Try to propagate the guard from BB which is the lower block of a diamond
/// to one of its branches, in case if diamond's condition implies guard's
/// condition.
-bool JumpThreadingPass::threadGuard(BasicBlock *BB, IntrinsicInst *Guard,
+bool JumpThreadingPass::threadGuard(BasicBlock *BB, IntrinsicInst *Guard,
BranchInst *BI) {
assert(BI->getNumSuccessors() == 2 && "Wrong number of successors?");
assert(BI->isConditional() && "Unconditional branch has 2 successors?");
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/LICM.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/LICM.cpp
index d2b4ba296f..6db37000d4 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/LICM.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/LICM.cpp
@@ -12,13 +12,13 @@
// safe. This pass also promotes must-aliased memory locations in the loop to
// live in registers, thus hoisting and sinking "invariant" loads and stores.
//
-// Hoisting operations out of loops is a canonicalization transform. It
-// enables and simplifies subsequent optimizations in the middle-end.
-// Rematerialization of hoisted instructions to reduce register pressure is the
-// responsibility of the back-end, which has more accurate information about
-// register pressure and also handles other optimizations than LICM that
-// increase live-ranges.
-//
+// Hoisting operations out of loops is a canonicalization transform. It
+// enables and simplifies subsequent optimizations in the middle-end.
+// Rematerialization of hoisted instructions to reduce register pressure is the
+// responsibility of the back-end, which has more accurate information about
+// register pressure and also handles other optimizations than LICM that
+// increase live-ranges.
+//
// This pass uses alias analysis for two purposes:
//
// 1. Moving loop invariant loads and calls out of loops. If we can determine
@@ -42,12 +42,12 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AliasSetTracker.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
-#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/GuardUtils.h"
-#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
+#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
@@ -107,11 +107,11 @@ static cl::opt<bool> ControlFlowHoisting(
"licm-control-flow-hoisting", cl::Hidden, cl::init(false),
cl::desc("Enable control flow (and PHI) hoisting in LICM"));
-static cl::opt<unsigned> HoistSinkColdnessThreshold(
- "licm-coldness-threshold", cl::Hidden, cl::init(4),
- cl::desc("Relative coldness Threshold of hoisting/sinking destination "
- "block for LICM to be considered beneficial"));
-
+static cl::opt<unsigned> HoistSinkColdnessThreshold(
+ "licm-coldness-threshold", cl::Hidden, cl::init(4),
+ cl::desc("Relative coldness Threshold of hoisting/sinking destination "
+ "block for LICM to be considered beneficial"));
+
static cl::opt<uint32_t> MaxNumUsesTraversed(
"licm-max-num-uses-traversed", cl::Hidden, cl::init(8),
cl::desc("Max num uses visited for identifying load "
@@ -157,9 +157,9 @@ static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
MemorySSAUpdater *MSSAU, ScalarEvolution *SE,
OptimizationRemarkEmitter *ORE);
static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
- BlockFrequencyInfo *BFI, const Loop *CurLoop,
- ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU,
- OptimizationRemarkEmitter *ORE);
+ BlockFrequencyInfo *BFI, const Loop *CurLoop,
+ ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU,
+ OptimizationRemarkEmitter *ORE);
static bool isSafeToExecuteUnconditionally(Instruction &Inst,
const DominatorTree *DT,
const Loop *CurLoop,
@@ -170,10 +170,10 @@ static bool pointerInvalidatedByLoop(MemoryLocation MemLoc,
AliasSetTracker *CurAST, Loop *CurLoop,
AAResults *AA);
static bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU,
- Loop *CurLoop, Instruction &I,
+ Loop *CurLoop, Instruction &I,
SinkAndHoistLICMFlags &Flags);
-static bool pointerInvalidatedByBlockWithMSSA(BasicBlock &BB, MemorySSA &MSSA,
- MemoryUse &MU);
+static bool pointerInvalidatedByBlockWithMSSA(BasicBlock &BB, MemorySSA &MSSA,
+ MemoryUse &MU);
static Instruction *cloneInstructionInExitBlock(
Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU);
@@ -188,8 +188,8 @@ static void moveInstructionBefore(Instruction &I, Instruction &Dest,
namespace {
struct LoopInvariantCodeMotion {
bool runOnLoop(Loop *L, AAResults *AA, LoopInfo *LI, DominatorTree *DT,
- BlockFrequencyInfo *BFI, TargetLibraryInfo *TLI,
- TargetTransformInfo *TTI, ScalarEvolution *SE, MemorySSA *MSSA,
+ BlockFrequencyInfo *BFI, TargetLibraryInfo *TLI,
+ TargetTransformInfo *TTI, ScalarEvolution *SE, MemorySSA *MSSA,
OptimizationRemarkEmitter *ORE);
LoopInvariantCodeMotion(unsigned LicmMssaOptCap,
@@ -221,30 +221,30 @@ struct LegacyLICMPass : public LoopPass {
if (skipLoop(L))
return false;
- LLVM_DEBUG(dbgs() << "Perform LICM on Loop with header at block "
- << L->getHeader()->getNameOrAsOperand() << "\n");
-
+ LLVM_DEBUG(dbgs() << "Perform LICM on Loop with header at block "
+ << L->getHeader()->getNameOrAsOperand() << "\n");
+
auto *SE = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
MemorySSA *MSSA = EnableMSSALoopDependency
? (&getAnalysis<MemorySSAWrapperPass>().getMSSA())
: nullptr;
- bool hasProfileData = L->getHeader()->getParent()->hasProfileData();
- BlockFrequencyInfo *BFI =
- hasProfileData ? &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI()
- : nullptr;
+ bool hasProfileData = L->getHeader()->getParent()->hasProfileData();
+ BlockFrequencyInfo *BFI =
+ hasProfileData ? &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI()
+ : nullptr;
// For the old PM, we can't use OptimizationRemarkEmitter as an analysis
- // pass. Function analyses need to be preserved across loop transformations
+ // pass. Function analyses need to be preserved across loop transformations
// but ORE cannot be preserved (see comment before the pass definition).
OptimizationRemarkEmitter ORE(L->getHeader()->getParent());
- return LICM.runOnLoop(
- L, &getAnalysis<AAResultsWrapperPass>().getAAResults(),
- &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
- &getAnalysis<DominatorTreeWrapperPass>().getDomTree(), BFI,
- &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
- *L->getHeader()->getParent()),
- &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
- *L->getHeader()->getParent()),
- SE ? &SE->getSE() : nullptr, MSSA, &ORE);
+ return LICM.runOnLoop(
+ L, &getAnalysis<AAResultsWrapperPass>().getAAResults(),
+ &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
+ &getAnalysis<DominatorTreeWrapperPass>().getDomTree(), BFI,
+ &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
+ *L->getHeader()->getParent()),
+ &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
+ *L->getHeader()->getParent()),
+ SE ? &SE->getSE() : nullptr, MSSA, &ORE);
}
/// This transformation requires natural loop information & requires that
@@ -260,9 +260,9 @@ struct LegacyLICMPass : public LoopPass {
}
AU.addRequired<TargetTransformInfoWrapperPass>();
getLoopAnalysisUsage(AU);
- LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
- AU.addPreserved<LazyBlockFrequencyInfoPass>();
- AU.addPreserved<LazyBranchProbabilityInfoPass>();
+ LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
+ AU.addPreserved<LazyBlockFrequencyInfoPass>();
+ AU.addPreserved<LazyBranchProbabilityInfoPass>();
}
private:
@@ -278,8 +278,8 @@ PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM,
OptimizationRemarkEmitter ORE(L.getHeader()->getParent());
LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap);
- if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, AR.BFI, &AR.TLI, &AR.TTI,
- &AR.SE, AR.MSSA, &ORE))
+ if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, AR.BFI, &AR.TLI, &AR.TTI,
+ &AR.SE, AR.MSSA, &ORE))
return PreservedAnalyses::all();
auto PA = getLoopPassPreservedAnalyses();
@@ -299,7 +299,7 @@ INITIALIZE_PASS_DEPENDENCY(LoopPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LazyBFIPass)
+INITIALIZE_PASS_DEPENDENCY(LazyBFIPass)
INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false,
false)
@@ -309,42 +309,42 @@ Pass *llvm::createLICMPass(unsigned LicmMssaOptCap,
return new LegacyLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap);
}
-llvm::SinkAndHoistLICMFlags::SinkAndHoistLICMFlags(bool IsSink, Loop *L,
- MemorySSA *MSSA)
- : SinkAndHoistLICMFlags(SetLicmMssaOptCap, SetLicmMssaNoAccForPromotionCap,
- IsSink, L, MSSA) {}
-
-llvm::SinkAndHoistLICMFlags::SinkAndHoistLICMFlags(
- unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap, bool IsSink,
- Loop *L, MemorySSA *MSSA)
- : LicmMssaOptCap(LicmMssaOptCap),
- LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap),
- IsSink(IsSink) {
- assert(((L != nullptr) == (MSSA != nullptr)) &&
- "Unexpected values for SinkAndHoistLICMFlags");
- if (!MSSA)
- return;
-
- unsigned AccessCapCount = 0;
- for (auto *BB : L->getBlocks())
- if (const auto *Accesses = MSSA->getBlockAccesses(BB))
- for (const auto &MA : *Accesses) {
- (void)MA;
- ++AccessCapCount;
- if (AccessCapCount > LicmMssaNoAccForPromotionCap) {
- NoOfMemAccTooLarge = true;
- return;
- }
- }
-}
-
+llvm::SinkAndHoistLICMFlags::SinkAndHoistLICMFlags(bool IsSink, Loop *L,
+ MemorySSA *MSSA)
+ : SinkAndHoistLICMFlags(SetLicmMssaOptCap, SetLicmMssaNoAccForPromotionCap,
+ IsSink, L, MSSA) {}
+
+llvm::SinkAndHoistLICMFlags::SinkAndHoistLICMFlags(
+ unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap, bool IsSink,
+ Loop *L, MemorySSA *MSSA)
+ : LicmMssaOptCap(LicmMssaOptCap),
+ LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap),
+ IsSink(IsSink) {
+ assert(((L != nullptr) == (MSSA != nullptr)) &&
+ "Unexpected values for SinkAndHoistLICMFlags");
+ if (!MSSA)
+ return;
+
+ unsigned AccessCapCount = 0;
+ for (auto *BB : L->getBlocks())
+ if (const auto *Accesses = MSSA->getBlockAccesses(BB))
+ for (const auto &MA : *Accesses) {
+ (void)MA;
+ ++AccessCapCount;
+ if (AccessCapCount > LicmMssaNoAccForPromotionCap) {
+ NoOfMemAccTooLarge = true;
+ return;
+ }
+ }
+}
+
/// Hoist expressions out of the specified loop. Note, alias info for inner
/// loop is not preserved so it is not a good idea to run LICM multiple
/// times on one loop.
bool LoopInvariantCodeMotion::runOnLoop(
Loop *L, AAResults *AA, LoopInfo *LI, DominatorTree *DT,
- BlockFrequencyInfo *BFI, TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
- ScalarEvolution *SE, MemorySSA *MSSA, OptimizationRemarkEmitter *ORE) {
+ BlockFrequencyInfo *BFI, TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
+ ScalarEvolution *SE, MemorySSA *MSSA, OptimizationRemarkEmitter *ORE) {
bool Changed = false;
assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.");
@@ -357,18 +357,18 @@ bool LoopInvariantCodeMotion::runOnLoop(
std::unique_ptr<AliasSetTracker> CurAST;
std::unique_ptr<MemorySSAUpdater> MSSAU;
- std::unique_ptr<SinkAndHoistLICMFlags> Flags;
+ std::unique_ptr<SinkAndHoistLICMFlags> Flags;
if (!MSSA) {
LLVM_DEBUG(dbgs() << "LICM: Using Alias Set Tracker.\n");
CurAST = collectAliasInfoForLoop(L, LI, AA);
- Flags = std::make_unique<SinkAndHoistLICMFlags>(
- LicmMssaOptCap, LicmMssaNoAccForPromotionCap, /*IsSink=*/true);
+ Flags = std::make_unique<SinkAndHoistLICMFlags>(
+ LicmMssaOptCap, LicmMssaNoAccForPromotionCap, /*IsSink=*/true);
} else {
LLVM_DEBUG(dbgs() << "LICM: Using MemorySSA.\n");
MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
- Flags = std::make_unique<SinkAndHoistLICMFlags>(
- LicmMssaOptCap, LicmMssaNoAccForPromotionCap, /*IsSink=*/true, L, MSSA);
+ Flags = std::make_unique<SinkAndHoistLICMFlags>(
+ LicmMssaOptCap, LicmMssaNoAccForPromotionCap, /*IsSink=*/true, L, MSSA);
}
// Get the preheader block to move instructions into...
@@ -388,14 +388,14 @@ bool LoopInvariantCodeMotion::runOnLoop(
// us to sink instructions in one pass, without iteration. After sinking
// instructions, we perform another pass to hoist them out of the loop.
if (L->hasDedicatedExits())
- Changed |=
- sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, TTI, L,
- CurAST.get(), MSSAU.get(), &SafetyInfo, *Flags.get(), ORE);
- Flags->setIsSink(false);
+ Changed |=
+ sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, TTI, L,
+ CurAST.get(), MSSAU.get(), &SafetyInfo, *Flags.get(), ORE);
+ Flags->setIsSink(false);
if (Preheader)
- Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, L,
- CurAST.get(), MSSAU.get(), SE, &SafetyInfo,
- *Flags.get(), ORE);
+ Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, L,
+ CurAST.get(), MSSAU.get(), SE, &SafetyInfo,
+ *Flags.get(), ORE);
// Now that all loop invariants have been removed from the loop, promote any
// memory references to scalars that we can.
@@ -405,7 +405,7 @@ bool LoopInvariantCodeMotion::runOnLoop(
// preheader for SSA updater, so also avoid sinking when no preheader
// is available.
if (!DisablePromotion && Preheader && L->hasDedicatedExits() &&
- !Flags->tooManyMemoryAccesses()) {
+ !Flags->tooManyMemoryAccesses()) {
// Figure out the loop exits and their insertion points
SmallVector<BasicBlock *, 8> ExitBlocks;
L->getUniqueExitBlocks(ExitBlocks);
@@ -474,7 +474,7 @@ bool LoopInvariantCodeMotion::runOnLoop(
// specifically moving instructions across the loop boundary and so it is
// especially in need of sanity checking here.
assert(L->isLCSSAForm(*DT) && "Loop not left in LCSSA form after LICM!");
- assert((L->isOutermost() || L->getParentLoop()->isLCSSAForm(*DT)) &&
+ assert((L->isOutermost() || L->getParentLoop()->isLCSSAForm(*DT)) &&
"Parent loop not left in LCSSA form after LICM!");
if (MSSAU.get() && VerifyMemorySSA)
@@ -491,10 +491,10 @@ bool LoopInvariantCodeMotion::runOnLoop(
/// definitions, allowing us to sink a loop body in one pass without iteration.
///
bool llvm::sinkRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
- DominatorTree *DT, BlockFrequencyInfo *BFI,
- TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
- Loop *CurLoop, AliasSetTracker *CurAST,
- MemorySSAUpdater *MSSAU, ICFLoopSafetyInfo *SafetyInfo,
+ DominatorTree *DT, BlockFrequencyInfo *BFI,
+ TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
+ Loop *CurLoop, AliasSetTracker *CurAST,
+ MemorySSAUpdater *MSSAU, ICFLoopSafetyInfo *SafetyInfo,
SinkAndHoistLICMFlags &Flags,
OptimizationRemarkEmitter *ORE) {
@@ -543,7 +543,7 @@ bool llvm::sinkRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
isNotUsedOrFreeInLoop(I, CurLoop, SafetyInfo, TTI, FreeInLoop) &&
canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, &Flags,
ORE)) {
- if (sink(I, LI, DT, BFI, CurLoop, SafetyInfo, MSSAU, ORE)) {
+ if (sink(I, LI, DT, BFI, CurLoop, SafetyInfo, MSSAU, ORE)) {
if (!FreeInLoop) {
++II;
salvageDebugInfo(I);
@@ -627,7 +627,7 @@ public:
else if (!TrueDestSucc.empty()) {
Function *F = TrueDest->getParent();
auto IsSucc = [&](BasicBlock &BB) { return TrueDestSucc.count(&BB); };
- auto It = llvm::find_if(*F, IsSucc);
+ auto It = llvm::find_if(*F, IsSucc);
assert(It != F->end() && "Could not find successor in function");
CommonSucc = &*It;
}
@@ -695,15 +695,15 @@ public:
return BB != Pair.second && (Pair.first->getSuccessor(0) == BB ||
Pair.first->getSuccessor(1) == BB);
};
- auto It = llvm::find_if(HoistableBranches, HasBBAsSuccessor);
+ auto It = llvm::find_if(HoistableBranches, HasBBAsSuccessor);
// If not involved in a pending branch, hoist to preheader
BasicBlock *InitialPreheader = CurLoop->getLoopPreheader();
if (It == HoistableBranches.end()) {
- LLVM_DEBUG(dbgs() << "LICM using "
- << InitialPreheader->getNameOrAsOperand()
- << " as hoist destination for "
- << BB->getNameOrAsOperand() << "\n");
+ LLVM_DEBUG(dbgs() << "LICM using "
+ << InitialPreheader->getNameOrAsOperand()
+ << " as hoist destination for "
+ << BB->getNameOrAsOperand() << "\n");
HoistDestinationMap[BB] = InitialPreheader;
return InitialPreheader;
}
@@ -788,43 +788,43 @@ public:
};
} // namespace
-// Hoisting/sinking instruction out of a loop isn't always beneficial. It's only
-// only worthwhile if the destination block is actually colder than current
-// block.
-static bool worthSinkOrHoistInst(Instruction &I, BasicBlock *DstBlock,
- OptimizationRemarkEmitter *ORE,
- BlockFrequencyInfo *BFI) {
- // Check block frequency only when runtime profile is available
- // to avoid pathological cases. With static profile, lean towards
- // hosting because it helps canonicalize the loop for vectorizer.
- if (!DstBlock->getParent()->hasProfileData())
- return true;
-
- if (!HoistSinkColdnessThreshold || !BFI)
- return true;
-
- BasicBlock *SrcBlock = I.getParent();
- if (BFI->getBlockFreq(DstBlock).getFrequency() / HoistSinkColdnessThreshold >
- BFI->getBlockFreq(SrcBlock).getFrequency()) {
- ORE->emit([&]() {
- return OptimizationRemarkMissed(DEBUG_TYPE, "SinkHoistInst", &I)
- << "failed to sink or hoist instruction because containing block "
- "has lower frequency than destination block";
- });
- return false;
- }
-
- return true;
-}
-
+// Hoisting/sinking instruction out of a loop isn't always beneficial. It's only
+// only worthwhile if the destination block is actually colder than current
+// block.
+static bool worthSinkOrHoistInst(Instruction &I, BasicBlock *DstBlock,
+ OptimizationRemarkEmitter *ORE,
+ BlockFrequencyInfo *BFI) {
+ // Check block frequency only when runtime profile is available
+ // to avoid pathological cases. With static profile, lean towards
+ // hosting because it helps canonicalize the loop for vectorizer.
+ if (!DstBlock->getParent()->hasProfileData())
+ return true;
+
+ if (!HoistSinkColdnessThreshold || !BFI)
+ return true;
+
+ BasicBlock *SrcBlock = I.getParent();
+ if (BFI->getBlockFreq(DstBlock).getFrequency() / HoistSinkColdnessThreshold >
+ BFI->getBlockFreq(SrcBlock).getFrequency()) {
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "SinkHoistInst", &I)
+ << "failed to sink or hoist instruction because containing block "
+ "has lower frequency than destination block";
+ });
+ return false;
+ }
+
+ return true;
+}
+
/// Walk the specified region of the CFG (defined by all blocks dominated by
/// the specified block, and that are in the current loop) in depth first
/// order w.r.t the DominatorTree. This allows us to visit definitions before
/// uses, allowing us to hoist a loop body in one pass without iteration.
///
bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
- DominatorTree *DT, BlockFrequencyInfo *BFI,
- TargetLibraryInfo *TLI, Loop *CurLoop,
+ DominatorTree *DT, BlockFrequencyInfo *BFI,
+ TargetLibraryInfo *TLI, Loop *CurLoop,
AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
ScalarEvolution *SE, ICFLoopSafetyInfo *SafetyInfo,
SinkAndHoistLICMFlags &Flags,
@@ -875,15 +875,15 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
// Try hoisting the instruction out to the preheader. We can only do
// this if all of the operands of the instruction are loop invariant and
- // if it is safe to hoist the instruction. We also check block frequency
- // to make sure instruction only gets hoisted into colder blocks.
+ // if it is safe to hoist the instruction. We also check block frequency
+ // to make sure instruction only gets hoisted into colder blocks.
// TODO: It may be safe to hoist if we are hoisting to a conditional block
// and we have accurately duplicated the control flow from the loop header
// to that block.
if (CurLoop->hasLoopInvariantOperands(&I) &&
canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, &Flags,
ORE) &&
- worthSinkOrHoistInst(I, CurLoop->getLoopPreheader(), ORE, BFI) &&
+ worthSinkOrHoistInst(I, CurLoop->getLoopPreheader(), ORE, BFI) &&
isSafeToExecuteUnconditionally(
I, DT, CurLoop, SafetyInfo, ORE,
CurLoop->getLoopPreheader()->getTerminator())) {
@@ -982,7 +982,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
HoistPoint = Dominator->getTerminator();
}
LLVM_DEBUG(dbgs() << "LICM rehoisting to "
- << HoistPoint->getParent()->getNameOrAsOperand()
+ << HoistPoint->getParent()->getNameOrAsOperand()
<< ": " << *I << "\n");
moveInstructionBefore(*I, *HoistPoint, *SafetyInfo, MSSAU, SE);
HoistPoint = I;
@@ -1014,20 +1014,20 @@ static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT,
Loop *CurLoop) {
Value *Addr = LI->getOperand(0);
const DataLayout &DL = LI->getModule()->getDataLayout();
- const TypeSize LocSizeInBits = DL.getTypeSizeInBits(LI->getType());
-
- // It is not currently possible for clang to generate an invariant.start
- // intrinsic with scalable vector types because we don't support thread local
- // sizeless types and we don't permit sizeless types in structs or classes.
- // Furthermore, even if support is added for this in future the intrinsic
- // itself is defined to have a size of -1 for variable sized objects. This
- // makes it impossible to verify if the intrinsic envelops our region of
- // interest. For example, both <vscale x 32 x i8> and <vscale x 16 x i8>
- // types would have a -1 parameter, but the former is clearly double the size
- // of the latter.
- if (LocSizeInBits.isScalable())
- return false;
-
+ const TypeSize LocSizeInBits = DL.getTypeSizeInBits(LI->getType());
+
+ // It is not currently possible for clang to generate an invariant.start
+ // intrinsic with scalable vector types because we don't support thread local
+ // sizeless types and we don't permit sizeless types in structs or classes.
+ // Furthermore, even if support is added for this in future the intrinsic
+ // itself is defined to have a size of -1 for variable sized objects. This
+ // makes it impossible to verify if the intrinsic envelops our region of
+ // interest. For example, both <vscale x 32 x i8> and <vscale x 16 x i8>
+ // types would have a -1 parameter, but the former is clearly double the size
+ // of the latter.
+ if (LocSizeInBits.isScalable())
+ return false;
+
// if the type is i8 addrspace(x)*, we know this is the type of
// llvm.invariant.start operand
auto *PtrInt8Ty = PointerType::get(Type::getInt8Ty(LI->getContext()),
@@ -1056,17 +1056,17 @@ static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT,
if (!II || II->getIntrinsicID() != Intrinsic::invariant_start ||
!II->use_empty())
continue;
- ConstantInt *InvariantSize = cast<ConstantInt>(II->getArgOperand(0));
- // The intrinsic supports having a -1 argument for variable sized objects
- // so we should check for that here.
- if (InvariantSize->isNegative())
- continue;
- uint64_t InvariantSizeInBits = InvariantSize->getSExtValue() * 8;
+ ConstantInt *InvariantSize = cast<ConstantInt>(II->getArgOperand(0));
+ // The intrinsic supports having a -1 argument for variable sized objects
+ // so we should check for that here.
+ if (InvariantSize->isNegative())
+ continue;
+ uint64_t InvariantSizeInBits = InvariantSize->getSExtValue() * 8;
// Confirm the invariant.start location size contains the load operand size
// in bits. Also, the invariant.start should dominate the load, and we
// should not hoist the load out of a loop that contains this dominating
// invariant.start.
- if (LocSizeInBits.getFixedSize() <= InvariantSizeInBits &&
+ if (LocSizeInBits.getFixedSize() <= InvariantSizeInBits &&
DT->properlyDominates(II->getParent(), CurLoop->getHeader()))
return true;
}
@@ -1131,9 +1131,9 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
bool TargetExecutesOncePerLoop,
SinkAndHoistLICMFlags *Flags,
OptimizationRemarkEmitter *ORE) {
- assert(((CurAST != nullptr) ^ (MSSAU != nullptr)) &&
- "Either AliasSetTracker or MemorySSA should be initialized.");
-
+ assert(((CurAST != nullptr) ^ (MSSAU != nullptr)) &&
+ "Either AliasSetTracker or MemorySSA should be initialized.");
+
// If we don't understand the instruction, bail early.
if (!isHoistableAndSinkableInst(I))
return false;
@@ -1167,7 +1167,7 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
CurLoop, AA);
else
Invalidated = pointerInvalidatedByLoopWithMSSA(
- MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(LI)), CurLoop, I, *Flags);
+ MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(LI)), CurLoop, I, *Flags);
// Check loop-invariant address because this may also be a sinkable load
// whose address is not necessarily loop-invariant.
if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand()))
@@ -1188,13 +1188,13 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
if (CI->mayThrow())
return false;
- // Convergent attribute has been used on operations that involve
- // inter-thread communication which results are implicitly affected by the
- // enclosing control flows. It is not safe to hoist or sink such operations
- // across control flow.
- if (CI->isConvergent())
- return false;
-
+ // Convergent attribute has been used on operations that involve
+ // inter-thread communication which results are implicitly affected by the
+ // enclosing control flows. It is not safe to hoist or sink such operations
+ // across control flow.
+ if (CI->isConvergent())
+ return false;
+
using namespace PatternMatch;
if (match(CI, m_Intrinsic<Intrinsic::assume>()))
// Assumes don't actually alias anything or throw
@@ -1219,10 +1219,10 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
bool Invalidated;
if (CurAST)
Invalidated = pointerInvalidatedByLoop(
- MemoryLocation::getBeforeOrAfter(Op), CurAST, CurLoop, AA);
+ MemoryLocation::getBeforeOrAfter(Op), CurAST, CurLoop, AA);
else
Invalidated = pointerInvalidatedByLoopWithMSSA(
- MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(CI)), CurLoop, I,
+ MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(CI)), CurLoop, I,
*Flags);
if (Invalidated)
return false;
@@ -1282,9 +1282,9 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
} else { // MSSAU
if (isOnlyMemoryAccess(SI, CurLoop, MSSAU))
return true;
- // If there are more accesses than the Promotion cap or no "quota" to
- // check clobber, then give up as we're not walking a list that long.
- if (Flags->tooManyMemoryAccesses() || Flags->tooManyClobberingCalls())
+ // If there are more accesses than the Promotion cap or no "quota" to
+ // check clobber, then give up as we're not walking a list that long.
+ if (Flags->tooManyMemoryAccesses() || Flags->tooManyClobberingCalls())
return false;
// If there are interfering Uses (i.e. their defining access is in the
// loop), or ordered loads (stored as Defs!), don't move this store.
@@ -1304,7 +1304,7 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
// Uses may point to an access outside the loop, as getClobbering
// checks the previous iteration when walking the backedge.
// FIXME: More precise: no Uses that alias SI.
- if (!Flags->getIsSink() && !MSSA->dominates(SIMD, MU))
+ if (!Flags->getIsSink() && !MSSA->dominates(SIMD, MU))
return false;
} else if (const auto *MD = dyn_cast<MemoryDef>(&MA)) {
if (auto *LI = dyn_cast<LoadInst>(MD->getMemoryInst())) {
@@ -1324,7 +1324,7 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
}
}
auto *Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(SI);
- Flags->incrementClobberingCalls();
+ Flags->incrementClobberingCalls();
// If there are no clobbering Defs in the loop, store is safe to hoist.
return MSSA->isLiveOnEntryDef(Source) ||
!CurLoop->contains(Source->getBlock());
@@ -1624,9 +1624,9 @@ static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT,
/// position, and may either delete it or move it to outside of the loop.
///
static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
- BlockFrequencyInfo *BFI, const Loop *CurLoop,
- ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU,
- OptimizationRemarkEmitter *ORE) {
+ BlockFrequencyInfo *BFI, const Loop *CurLoop,
+ ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU,
+ OptimizationRemarkEmitter *ORE) {
LLVM_DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
ORE->emit([&]() {
return OptimizationRemark(DEBUG_TYPE, "InstSunk", &I)
@@ -1702,10 +1702,10 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
// If this instruction is only used outside of the loop, then all users are
// PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of
// the instruction.
- // First check if I is worth sinking for all uses. Sink only when it is worth
- // across all uses.
+ // First check if I is worth sinking for all uses. Sink only when it is worth
+ // across all uses.
SmallSetVector<User*, 8> Users(I.user_begin(), I.user_end());
- SmallVector<PHINode *, 8> ExitPNs;
+ SmallVector<PHINode *, 8> ExitPNs;
for (auto *UI : Users) {
auto *User = cast<Instruction>(UI);
@@ -1715,15 +1715,15 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
PHINode *PN = cast<PHINode>(User);
assert(ExitBlockSet.count(PN->getParent()) &&
"The LCSSA PHI is not in an exit block!");
- if (!worthSinkOrHoistInst(I, PN->getParent(), ORE, BFI)) {
- return Changed;
- }
-
- ExitPNs.push_back(PN);
- }
-
- for (auto *PN : ExitPNs) {
-
+ if (!worthSinkOrHoistInst(I, PN->getParent(), ORE, BFI)) {
+ return Changed;
+ }
+
+ ExitPNs.push_back(PN);
+ }
+
+ for (auto *PN : ExitPNs) {
+
// The PHI must be trivially replaceable.
Instruction *New = sinkThroughTriviallyReplaceablePHI(
PN, &I, LI, SunkCopies, SafetyInfo, CurLoop, MSSAU);
@@ -1741,8 +1741,8 @@ static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
MemorySSAUpdater *MSSAU, ScalarEvolution *SE,
OptimizationRemarkEmitter *ORE) {
- LLVM_DEBUG(dbgs() << "LICM hoisting to " << Dest->getNameOrAsOperand() << ": "
- << I << "\n");
+ LLVM_DEBUG(dbgs() << "LICM hoisting to " << Dest->getNameOrAsOperand() << ": "
+ << I << "\n");
ORE->emit([&]() {
return OptimizationRemark(DEBUG_TYPE, "Hoisted", &I) << "hoisting "
<< ore::NV("Inst", &I);
@@ -1766,7 +1766,7 @@ static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
// Move the new node to the destination block, before its terminator.
moveInstructionBefore(I, *Dest->getTerminator(), *SafetyInfo, MSSAU, SE);
- I.updateLocationAfterHoist();
+ I.updateLocationAfterHoist();
if (isa<LoadInst>(I))
++NumMovedLoads;
@@ -1812,7 +1812,7 @@ class LoopPromoter : public LoadAndStorePromoter {
SmallVectorImpl<Instruction *> &LoopInsertPts;
SmallVectorImpl<MemoryAccess *> &MSSAInsertPts;
PredIteratorCache &PredCache;
- AliasSetTracker *AST;
+ AliasSetTracker *AST;
MemorySSAUpdater *MSSAU;
LoopInfo &LI;
DebugLoc DL;
@@ -1842,7 +1842,7 @@ public:
SmallVectorImpl<BasicBlock *> &LEB,
SmallVectorImpl<Instruction *> &LIP,
SmallVectorImpl<MemoryAccess *> &MSSAIP, PredIteratorCache &PIC,
- AliasSetTracker *ast, MemorySSAUpdater *MSSAU, LoopInfo &li,
+ AliasSetTracker *ast, MemorySSAUpdater *MSSAU, LoopInfo &li,
DebugLoc dl, int alignment, bool UnorderedAtomic,
const AAMDNodes &AATags, ICFLoopSafetyInfo &SafetyInfo)
: LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA),
@@ -1899,13 +1899,13 @@ public:
void replaceLoadWithValue(LoadInst *LI, Value *V) const override {
// Update alias analysis.
- if (AST)
- AST->copyValue(LI, V);
+ if (AST)
+ AST->copyValue(LI, V);
}
void instructionDeleted(Instruction *I) const override {
SafetyInfo.removeInstruction(I);
- if (AST)
- AST->deleteValue(I);
+ if (AST)
+ AST->deleteValue(I);
if (MSSAU)
MSSAU->removeMemoryAccess(I);
}
@@ -1951,7 +1951,7 @@ bool llvm::promoteLoopAccessesToScalars(
ICFLoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) {
// Verify inputs.
assert(LI != nullptr && DT != nullptr && CurLoop != nullptr &&
- SafetyInfo != nullptr &&
+ SafetyInfo != nullptr &&
"Unexpected Input to promoteLoopAccessesToScalars");
Value *SomePtr = *PointerMustAliases.begin();
@@ -2016,7 +2016,7 @@ bool llvm::promoteLoopAccessesToScalars(
// we have to prove that the store is dead along the unwind edge. We do
// this by proving that the caller can't have a reference to the object
// after return and thus can't possibly load from the object.
- Value *Object = getUnderlyingObject(SomePtr);
+ Value *Object = getUnderlyingObject(SomePtr);
if (!isKnownNonEscaping(Object, TLI))
return false;
// Subtlety: Alloca's aren't visible to callers, but *are* potentially
@@ -2148,7 +2148,7 @@ bool llvm::promoteLoopAccessesToScalars(
if (IsKnownThreadLocalObject)
SafeToInsertStore = true;
else {
- Value *Object = getUnderlyingObject(SomePtr);
+ Value *Object = getUnderlyingObject(SomePtr);
SafeToInsertStore =
(isAllocLikeFn(Object, TLI) || isa<AllocaInst>(Object)) &&
!PointerMayBeCaptured(Object, true, true);
@@ -2179,7 +2179,7 @@ bool llvm::promoteLoopAccessesToScalars(
SmallVector<PHINode *, 16> NewPHIs;
SSAUpdater SSA(&NewPHIs);
LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
- InsertPts, MSSAInsertPts, PIC, CurAST, MSSAU, *LI, DL,
+ InsertPts, MSSAInsertPts, PIC, CurAST, MSSAU, *LI, DL,
Alignment.value(), SawUnorderedAtomic, AATags,
*SafetyInfo);
@@ -2294,18 +2294,18 @@ static bool pointerInvalidatedByLoop(MemoryLocation MemLoc,
return false;
}
-bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU,
- Loop *CurLoop, Instruction &I,
- SinkAndHoistLICMFlags &Flags) {
+bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU,
+ Loop *CurLoop, Instruction &I,
+ SinkAndHoistLICMFlags &Flags) {
// For hoisting, use the walker to determine safety
- if (!Flags.getIsSink()) {
+ if (!Flags.getIsSink()) {
MemoryAccess *Source;
// See declaration of SetLicmMssaOptCap for usage details.
- if (Flags.tooManyClobberingCalls())
+ if (Flags.tooManyClobberingCalls())
Source = MU->getDefiningAccess();
else {
Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(MU);
- Flags.incrementClobberingCalls();
+ Flags.incrementClobberingCalls();
}
return !MSSA->isLiveOnEntryDef(Source) &&
CurLoop->contains(Source->getBlock());
@@ -2328,28 +2328,28 @@ bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU,
// FIXME: Increase precision: Safe to sink if Use post dominates the Def;
// needs PostDominatorTreeAnalysis.
// FIXME: More precise: no Defs that alias this Use.
- if (Flags.tooManyMemoryAccesses())
+ if (Flags.tooManyMemoryAccesses())
return true;
for (auto *BB : CurLoop->getBlocks())
- if (pointerInvalidatedByBlockWithMSSA(*BB, *MSSA, *MU))
- return true;
- // When sinking, the source block may not be part of the loop so check it.
- if (!CurLoop->contains(&I))
- return pointerInvalidatedByBlockWithMSSA(*I.getParent(), *MSSA, *MU);
-
- return false;
-}
-
-bool pointerInvalidatedByBlockWithMSSA(BasicBlock &BB, MemorySSA &MSSA,
- MemoryUse &MU) {
- if (const auto *Accesses = MSSA.getBlockDefs(&BB))
- for (const auto &MA : *Accesses)
- if (const auto *MD = dyn_cast<MemoryDef>(&MA))
- if (MU.getBlock() != MD->getBlock() || !MSSA.locallyDominates(MD, &MU))
- return true;
+ if (pointerInvalidatedByBlockWithMSSA(*BB, *MSSA, *MU))
+ return true;
+ // When sinking, the source block may not be part of the loop so check it.
+ if (!CurLoop->contains(&I))
+ return pointerInvalidatedByBlockWithMSSA(*I.getParent(), *MSSA, *MU);
+
return false;
}
+bool pointerInvalidatedByBlockWithMSSA(BasicBlock &BB, MemorySSA &MSSA,
+ MemoryUse &MU) {
+ if (const auto *Accesses = MSSA.getBlockDefs(&BB))
+ for (const auto &MA : *Accesses)
+ if (const auto *MD = dyn_cast<MemoryDef>(&MA))
+ if (MU.getBlock() != MD->getBlock() || !MSSA.locallyDominates(MD, &MU))
+ return true;
+ return false;
+}
+
/// Little predicate that returns true if the specified basic block is in
/// a subloop of the current one, not the current one itself.
///
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopDataPrefetch.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopDataPrefetch.cpp
index 45cdcb2f37..1b6d3484bf 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopDataPrefetch.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopDataPrefetch.cpp
@@ -271,7 +271,7 @@ bool LoopDataPrefetch::runOnLoop(Loop *L) {
bool MadeChange = false;
// Only prefetch in the inner-most loop
- if (!L->isInnermost())
+ if (!L->isInnermost())
return MadeChange;
SmallPtrSet<const Value *, 32> EphValues;
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopDeletion.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopDeletion.cpp
index 1266c93316..3f896ef191 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -26,7 +26,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
-
+
using namespace llvm;
#define DEBUG_TYPE "loop-delete"
@@ -39,14 +39,14 @@ enum class LoopDeletionResult {
Deleted,
};
-static LoopDeletionResult merge(LoopDeletionResult A, LoopDeletionResult B) {
- if (A == LoopDeletionResult::Deleted || B == LoopDeletionResult::Deleted)
- return LoopDeletionResult::Deleted;
- if (A == LoopDeletionResult::Modified || B == LoopDeletionResult::Modified)
- return LoopDeletionResult::Modified;
- return LoopDeletionResult::Unmodified;
-}
-
+static LoopDeletionResult merge(LoopDeletionResult A, LoopDeletionResult B) {
+ if (A == LoopDeletionResult::Deleted || B == LoopDeletionResult::Deleted)
+ return LoopDeletionResult::Deleted;
+ if (A == LoopDeletionResult::Modified || B == LoopDeletionResult::Modified)
+ return LoopDeletionResult::Modified;
+ return LoopDeletionResult::Unmodified;
+}
+
/// Determines if a loop is dead.
///
/// This assumes that we've already checked for unique exit and exiting blocks,
@@ -62,28 +62,28 @@ static bool isLoopDead(Loop *L, ScalarEvolution &SE,
// of the loop.
bool AllEntriesInvariant = true;
bool AllOutgoingValuesSame = true;
- if (!L->hasNoExitBlocks()) {
- for (PHINode &P : ExitBlock->phis()) {
- Value *incoming = P.getIncomingValueForBlock(ExitingBlocks[0]);
-
- // Make sure all exiting blocks produce the same incoming value for the
- // block. If there are different incoming values for different exiting
- // blocks, then it is impossible to statically determine which value
- // should be used.
- AllOutgoingValuesSame =
- all_of(makeArrayRef(ExitingBlocks).slice(1), [&](BasicBlock *BB) {
- return incoming == P.getIncomingValueForBlock(BB);
- });
-
- if (!AllOutgoingValuesSame)
- break;
-
- if (Instruction *I = dyn_cast<Instruction>(incoming))
- if (!L->makeLoopInvariant(I, Changed, Preheader->getTerminator())) {
- AllEntriesInvariant = false;
- break;
- }
- }
+ if (!L->hasNoExitBlocks()) {
+ for (PHINode &P : ExitBlock->phis()) {
+ Value *incoming = P.getIncomingValueForBlock(ExitingBlocks[0]);
+
+ // Make sure all exiting blocks produce the same incoming value for the
+ // block. If there are different incoming values for different exiting
+ // blocks, then it is impossible to statically determine which value
+ // should be used.
+ AllOutgoingValuesSame =
+ all_of(makeArrayRef(ExitingBlocks).slice(1), [&](BasicBlock *BB) {
+ return incoming == P.getIncomingValueForBlock(BB);
+ });
+
+ if (!AllOutgoingValuesSame)
+ break;
+
+ if (Instruction *I = dyn_cast<Instruction>(incoming))
+ if (!L->makeLoopInvariant(I, Changed, Preheader->getTerminator())) {
+ AllEntriesInvariant = false;
+ break;
+ }
+ }
}
if (Changed)
@@ -96,9 +96,9 @@ static bool isLoopDead(Loop *L, ScalarEvolution &SE,
// This includes instructions that could write to memory, and loads that are
// marked volatile.
for (auto &I : L->blocks())
- if (any_of(*I, [](Instruction &I) {
- return I.mayHaveSideEffects() && !I.isDroppable();
- }))
+ if (any_of(*I, [](Instruction &I) {
+ return I.mayHaveSideEffects() && !I.isDroppable();
+ }))
return false;
return true;
}
@@ -135,33 +135,33 @@ static bool isLoopNeverExecuted(Loop *L) {
return true;
}
-/// If we can prove the backedge is untaken, remove it. This destroys the
-/// loop, but leaves the (now trivially loop invariant) control flow and
-/// side effects (if any) in place.
-static LoopDeletionResult
-breakBackedgeIfNotTaken(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
- LoopInfo &LI, MemorySSA *MSSA,
- OptimizationRemarkEmitter &ORE) {
- assert(L->isLCSSAForm(DT) && "Expected LCSSA!");
-
- if (!L->getLoopLatch())
- return LoopDeletionResult::Unmodified;
-
- auto *BTC = SE.getBackedgeTakenCount(L);
- if (!BTC->isZero())
- return LoopDeletionResult::Unmodified;
-
- breakLoopBackedge(L, DT, SE, LI, MSSA);
- return LoopDeletionResult::Deleted;
-}
-
+/// If we can prove the backedge is untaken, remove it. This destroys the
+/// loop, but leaves the (now trivially loop invariant) control flow and
+/// side effects (if any) in place.
+static LoopDeletionResult
+breakBackedgeIfNotTaken(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
+ LoopInfo &LI, MemorySSA *MSSA,
+ OptimizationRemarkEmitter &ORE) {
+ assert(L->isLCSSAForm(DT) && "Expected LCSSA!");
+
+ if (!L->getLoopLatch())
+ return LoopDeletionResult::Unmodified;
+
+ auto *BTC = SE.getBackedgeTakenCount(L);
+ if (!BTC->isZero())
+ return LoopDeletionResult::Unmodified;
+
+ breakLoopBackedge(L, DT, SE, LI, MSSA);
+ return LoopDeletionResult::Deleted;
+}
+
/// Remove a loop if it is dead.
///
-/// A loop is considered dead either if it does not impact the observable
-/// behavior of the program other than finite running time, or if it is
-/// required to make progress by an attribute such as 'mustprogress' or
-/// 'llvm.loop.mustprogress' and does not make any. This may remove
-/// infinite loops that have been required to make progress.
+/// A loop is considered dead either if it does not impact the observable
+/// behavior of the program other than finite running time, or if it is
+/// required to make progress by an attribute such as 'mustprogress' or
+/// 'llvm.loop.mustprogress' and does not make any. This may remove
+/// infinite loops that have been required to make progress.
///
/// This entire process relies pretty heavily on LoopSimplify form and LCSSA in
/// order to make various safety checks work.
@@ -190,10 +190,10 @@ static LoopDeletionResult deleteLoopIfDead(Loop *L, DominatorTree &DT,
if (ExitBlock && isLoopNeverExecuted(L)) {
LLVM_DEBUG(dbgs() << "Loop is proven to never execute, delete it!");
- // We need to forget the loop before setting the incoming values of the exit
- // phis to undef, so we properly invalidate the SCEV expressions for those
- // phis.
- SE.forgetLoop(L);
+ // We need to forget the loop before setting the incoming values of the exit
+ // phis to undef, so we properly invalidate the SCEV expressions for those
+ // phis.
+ SE.forgetLoop(L);
// Set incoming value to undef for phi nodes in the exit block.
for (PHINode &P : ExitBlock->phis()) {
std::fill(P.incoming_values().begin(), P.incoming_values().end(),
@@ -214,12 +214,12 @@ static LoopDeletionResult deleteLoopIfDead(Loop *L, DominatorTree &DT,
SmallVector<BasicBlock *, 4> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
- // We require that the loop has at most one exit block. Otherwise, we'd be in
- // the situation of needing to be able to solve statically which exit block
- // will be branched to, or trying to preserve the branching logic in a loop
- // invariant manner.
- if (!ExitBlock && !L->hasNoExitBlocks()) {
- LLVM_DEBUG(dbgs() << "Deletion requires at most one exit block.\n");
+ // We require that the loop has at most one exit block. Otherwise, we'd be in
+ // the situation of needing to be able to solve statically which exit block
+ // will be branched to, or trying to preserve the branching logic in a loop
+ // invariant manner.
+ if (!ExitBlock && !L->hasNoExitBlocks()) {
+ LLVM_DEBUG(dbgs() << "Deletion requires at most one exit block.\n");
return LoopDeletionResult::Unmodified;
}
// Finally, we have to check that the loop really is dead.
@@ -230,13 +230,13 @@ static LoopDeletionResult deleteLoopIfDead(Loop *L, DominatorTree &DT,
: LoopDeletionResult::Unmodified;
}
- // Don't remove loops for which we can't solve the trip count unless the loop
- // was required to make progress but has been determined to be dead.
+ // Don't remove loops for which we can't solve the trip count unless the loop
+ // was required to make progress but has been determined to be dead.
const SCEV *S = SE.getConstantMaxBackedgeTakenCount(L);
- if (isa<SCEVCouldNotCompute>(S) &&
- !L->getHeader()->getParent()->mustProgress() && !hasMustProgress(L)) {
- LLVM_DEBUG(dbgs() << "Could not compute SCEV MaxBackedgeTakenCount and was "
- "not required to make progress.\n");
+ if (isa<SCEVCouldNotCompute>(S) &&
+ !L->getHeader()->getParent()->mustProgress() && !hasMustProgress(L)) {
+ LLVM_DEBUG(dbgs() << "Could not compute SCEV MaxBackedgeTakenCount and was "
+ "not required to make progress.\n");
return Changed ? LoopDeletionResult::Modified
: LoopDeletionResult::Unmodified;
}
@@ -265,14 +265,14 @@ PreservedAnalyses LoopDeletionPass::run(Loop &L, LoopAnalysisManager &AM,
// but ORE cannot be preserved (see comment before the pass definition).
OptimizationRemarkEmitter ORE(L.getHeader()->getParent());
auto Result = deleteLoopIfDead(&L, AR.DT, AR.SE, AR.LI, AR.MSSA, ORE);
-
- // If we can prove the backedge isn't taken, just break it and be done. This
- // leaves the loop structure in place which means it can handle dispatching
- // to the right exit based on whatever loop invariant structure remains.
- if (Result != LoopDeletionResult::Deleted)
- Result = merge(Result, breakBackedgeIfNotTaken(&L, AR.DT, AR.SE, AR.LI,
- AR.MSSA, ORE));
-
+
+ // If we can prove the backedge isn't taken, just break it and be done. This
+ // leaves the loop structure in place which means it can handle dispatching
+ // to the right exit based on whatever loop invariant structure remains.
+ if (Result != LoopDeletionResult::Deleted)
+ Result = merge(Result, breakBackedgeIfNotTaken(&L, AR.DT, AR.SE, AR.LI,
+ AR.MSSA, ORE));
+
if (Result == LoopDeletionResult::Unmodified)
return PreservedAnalyses::all();
@@ -332,12 +332,12 @@ bool LoopDeletionLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
LoopDeletionResult Result = deleteLoopIfDead(L, DT, SE, LI, MSSA, ORE);
- // If we can prove the backedge isn't taken, just break it and be done. This
- // leaves the loop structure in place which means it can handle dispatching
- // to the right exit based on whatever loop invariant structure remains.
- if (Result != LoopDeletionResult::Deleted)
- Result = merge(Result, breakBackedgeIfNotTaken(L, DT, SE, LI, MSSA, ORE));
-
+ // If we can prove the backedge isn't taken, just break it and be done. This
+ // leaves the loop structure in place which means it can handle dispatching
+ // to the right exit based on whatever loop invariant structure remains.
+ if (Result != LoopDeletionResult::Deleted)
+ Result = merge(Result, breakBackedgeIfNotTaken(L, DT, SE, LI, MSSA, ORE));
+
if (Result == LoopDeletionResult::Deleted)
LPM.markLoopAsDeleted(*L);
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopDistribute.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopDistribute.cpp
index 1bd2529891..0d467540e3 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopDistribute.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopDistribute.cpp
@@ -663,20 +663,20 @@ public:
/// Try to distribute an inner-most loop.
bool processLoop(std::function<const LoopAccessInfo &(Loop &)> &GetLAA) {
- assert(L->isInnermost() && "Only process inner loops.");
+ assert(L->isInnermost() && "Only process inner loops.");
LLVM_DEBUG(dbgs() << "\nLDist: In \""
<< L->getHeader()->getParent()->getName()
<< "\" checking " << *L << "\n");
- // Having a single exit block implies there's also one exiting block.
+ // Having a single exit block implies there's also one exiting block.
if (!L->getExitBlock())
return fail("MultipleExitBlocks", "multiple exit blocks");
if (!L->isLoopSimplifyForm())
return fail("NotLoopSimplifyForm",
"loop is not in loop-simplify form");
- if (!L->isRotatedForm())
- return fail("NotBottomTested", "loop is not bottom tested");
+ if (!L->isRotatedForm())
+ return fail("NotBottomTested", "loop is not bottom tested");
BasicBlock *PH = L->getLoopPreheader();
@@ -815,7 +815,7 @@ public:
LLVM_DEBUG(dbgs() << "\nPointers:\n");
LLVM_DEBUG(LAI->getRuntimePointerChecking()->printChecks(dbgs(), Checks));
- LoopVersioning LVer(*LAI, Checks, L, LI, DT, SE);
+ LoopVersioning LVer(*LAI, Checks, L, LI, DT, SE);
LVer.versionLoop(DefsUsedOutside);
LVer.annotateLoopWithNoAlias();
@@ -981,7 +981,7 @@ static bool runImpl(Function &F, LoopInfo *LI, DominatorTree *DT,
for (Loop *TopLevelLoop : *LI)
for (Loop *L : depth_first(TopLevelLoop))
// We only handle inner-most loops.
- if (L->isInnermost())
+ if (L->isInnermost())
Worklist.push_back(L);
// Now walk the identified inner loops.
@@ -1057,8 +1057,8 @@ PreservedAnalyses LoopDistributePass::run(Function &F,
auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager();
std::function<const LoopAccessInfo &(Loop &)> GetLAA =
[&](Loop &L) -> const LoopAccessInfo & {
- LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE,
- TLI, TTI, nullptr, nullptr};
+ LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE,
+ TLI, TTI, nullptr, nullptr};
return LAM.getResult<LoopAccessAnalysis>(L, AR);
};
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopFlatten.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopFlatten.cpp
index aaff68436c..f7639dd02e 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopFlatten.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopFlatten.cpp
@@ -1,728 +1,728 @@
-//===- LoopFlatten.cpp - Loop flattening pass------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass flattens pairs nested loops into a single loop.
-//
-// The intention is to optimise loop nests like this, which together access an
-// array linearly:
-// for (int i = 0; i < N; ++i)
-// for (int j = 0; j < M; ++j)
-// f(A[i*M+j]);
-// into one loop:
-// for (int i = 0; i < (N*M); ++i)
-// f(A[i]);
-//
-// It can also flatten loops where the induction variables are not used in the
-// loop. This is only worth doing if the induction variables are only used in an
-// expression like i*M+j. If they had any other uses, we would have to insert a
-// div/mod to reconstruct the original values, so this wouldn't be profitable.
-//
-// We also need to prove that N*M will not overflow.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Scalar/LoopFlatten.h"
-#include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/OptimizationRemarkEmitter.h"
-#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/PatternMatch.h"
-#include "llvm/IR/Verifier.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/LoopUtils.h"
-#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
-#include "llvm/Transforms/Utils/SimplifyIndVar.h"
-
-#define DEBUG_TYPE "loop-flatten"
-
-using namespace llvm;
-using namespace llvm::PatternMatch;
-
-static cl::opt<unsigned> RepeatedInstructionThreshold(
- "loop-flatten-cost-threshold", cl::Hidden, cl::init(2),
- cl::desc("Limit on the cost of instructions that can be repeated due to "
- "loop flattening"));
-
-static cl::opt<bool>
- AssumeNoOverflow("loop-flatten-assume-no-overflow", cl::Hidden,
- cl::init(false),
- cl::desc("Assume that the product of the two iteration "
- "limits will never overflow"));
-
-static cl::opt<bool>
- WidenIV("loop-flatten-widen-iv", cl::Hidden,
- cl::init(true),
- cl::desc("Widen the loop induction variables, if possible, so "
- "overflow checks won't reject flattening"));
-
-struct FlattenInfo {
- Loop *OuterLoop = nullptr;
- Loop *InnerLoop = nullptr;
- PHINode *InnerInductionPHI = nullptr;
- PHINode *OuterInductionPHI = nullptr;
- Value *InnerLimit = nullptr;
- Value *OuterLimit = nullptr;
- BinaryOperator *InnerIncrement = nullptr;
- BinaryOperator *OuterIncrement = nullptr;
- BranchInst *InnerBranch = nullptr;
- BranchInst *OuterBranch = nullptr;
- SmallPtrSet<Value *, 4> LinearIVUses;
- SmallPtrSet<PHINode *, 4> InnerPHIsToTransform;
-
- // Whether this holds the flatten info before or after widening.
- bool Widened = false;
-
- FlattenInfo(Loop *OL, Loop *IL) : OuterLoop(OL), InnerLoop(IL) {};
-};
-
-// Finds the induction variable, increment and limit for a simple loop that we
-// can flatten.
-static bool findLoopComponents(
- Loop *L, SmallPtrSetImpl<Instruction *> &IterationInstructions,
- PHINode *&InductionPHI, Value *&Limit, BinaryOperator *&Increment,
- BranchInst *&BackBranch, ScalarEvolution *SE) {
- LLVM_DEBUG(dbgs() << "Finding components of loop: " << L->getName() << "\n");
-
- if (!L->isLoopSimplifyForm()) {
- LLVM_DEBUG(dbgs() << "Loop is not in normal form\n");
- return false;
- }
-
- // There must be exactly one exiting block, and it must be the same at the
- // latch.
- BasicBlock *Latch = L->getLoopLatch();
- if (L->getExitingBlock() != Latch) {
- LLVM_DEBUG(dbgs() << "Exiting and latch block are different\n");
- return false;
- }
- // Latch block must end in a conditional branch.
- BackBranch = dyn_cast<BranchInst>(Latch->getTerminator());
- if (!BackBranch || !BackBranch->isConditional()) {
- LLVM_DEBUG(dbgs() << "Could not find back-branch\n");
- return false;
- }
- IterationInstructions.insert(BackBranch);
- LLVM_DEBUG(dbgs() << "Found back branch: "; BackBranch->dump());
- bool ContinueOnTrue = L->contains(BackBranch->getSuccessor(0));
-
- // Find the induction PHI. If there is no induction PHI, we can't do the
- // transformation. TODO: could other variables trigger this? Do we have to
- // search for the best one?
- InductionPHI = nullptr;
- for (PHINode &PHI : L->getHeader()->phis()) {
- InductionDescriptor ID;
- if (InductionDescriptor::isInductionPHI(&PHI, L, SE, ID)) {
- InductionPHI = &PHI;
- LLVM_DEBUG(dbgs() << "Found induction PHI: "; InductionPHI->dump());
- break;
- }
- }
- if (!InductionPHI) {
- LLVM_DEBUG(dbgs() << "Could not find induction PHI\n");
- return false;
- }
-
- auto IsValidPredicate = [&](ICmpInst::Predicate Pred) {
- if (ContinueOnTrue)
- return Pred == CmpInst::ICMP_NE || Pred == CmpInst::ICMP_ULT;
- else
- return Pred == CmpInst::ICMP_EQ;
- };
-
- // Find Compare and make sure it is valid
- ICmpInst *Compare = dyn_cast<ICmpInst>(BackBranch->getCondition());
- if (!Compare || !IsValidPredicate(Compare->getUnsignedPredicate()) ||
- Compare->hasNUsesOrMore(2)) {
- LLVM_DEBUG(dbgs() << "Could not find valid comparison\n");
- return false;
- }
- IterationInstructions.insert(Compare);
- LLVM_DEBUG(dbgs() << "Found comparison: "; Compare->dump());
-
- // Find increment and limit from the compare
- Increment = nullptr;
- if (match(Compare->getOperand(0),
- m_c_Add(m_Specific(InductionPHI), m_ConstantInt<1>()))) {
- Increment = dyn_cast<BinaryOperator>(Compare->getOperand(0));
- Limit = Compare->getOperand(1);
- } else if (Compare->getUnsignedPredicate() == CmpInst::ICMP_NE &&
- match(Compare->getOperand(1),
- m_c_Add(m_Specific(InductionPHI), m_ConstantInt<1>()))) {
- Increment = dyn_cast<BinaryOperator>(Compare->getOperand(1));
- Limit = Compare->getOperand(0);
- }
- if (!Increment || Increment->hasNUsesOrMore(3)) {
- LLVM_DEBUG(dbgs() << "Cound not find valid increment\n");
- return false;
- }
- IterationInstructions.insert(Increment);
- LLVM_DEBUG(dbgs() << "Found increment: "; Increment->dump());
- LLVM_DEBUG(dbgs() << "Found limit: "; Limit->dump());
-
- assert(InductionPHI->getNumIncomingValues() == 2);
- assert(InductionPHI->getIncomingValueForBlock(Latch) == Increment &&
- "PHI value is not increment inst");
-
- auto *CI = dyn_cast<ConstantInt>(
- InductionPHI->getIncomingValueForBlock(L->getLoopPreheader()));
- if (!CI || !CI->isZero()) {
- LLVM_DEBUG(dbgs() << "PHI value is not zero: "; CI->dump());
- return false;
- }
-
- LLVM_DEBUG(dbgs() << "Successfully found all loop components\n");
- return true;
-}
-
-static bool checkPHIs(struct FlattenInfo &FI,
- const TargetTransformInfo *TTI) {
- // All PHIs in the inner and outer headers must either be:
- // - The induction PHI, which we are going to rewrite as one induction in
- // the new loop. This is already checked by findLoopComponents.
- // - An outer header PHI with all incoming values from outside the loop.
- // LoopSimplify guarantees we have a pre-header, so we don't need to
- // worry about that here.
- // - Pairs of PHIs in the inner and outer headers, which implement a
- // loop-carried dependency that will still be valid in the new loop. To
- // be valid, this variable must be modified only in the inner loop.
-
- // The set of PHI nodes in the outer loop header that we know will still be
- // valid after the transformation. These will not need to be modified (with
- // the exception of the induction variable), but we do need to check that
- // there are no unsafe PHI nodes.
- SmallPtrSet<PHINode *, 4> SafeOuterPHIs;
- SafeOuterPHIs.insert(FI.OuterInductionPHI);
-
- // Check that all PHI nodes in the inner loop header match one of the valid
- // patterns.
- for (PHINode &InnerPHI : FI.InnerLoop->getHeader()->phis()) {
- // The induction PHIs break these rules, and that's OK because we treat
- // them specially when doing the transformation.
- if (&InnerPHI == FI.InnerInductionPHI)
- continue;
-
- // Each inner loop PHI node must have two incoming values/blocks - one
- // from the pre-header, and one from the latch.
- assert(InnerPHI.getNumIncomingValues() == 2);
- Value *PreHeaderValue =
- InnerPHI.getIncomingValueForBlock(FI.InnerLoop->getLoopPreheader());
- Value *LatchValue =
- InnerPHI.getIncomingValueForBlock(FI.InnerLoop->getLoopLatch());
-
- // The incoming value from the outer loop must be the PHI node in the
- // outer loop header, with no modifications made in the top of the outer
- // loop.
- PHINode *OuterPHI = dyn_cast<PHINode>(PreHeaderValue);
- if (!OuterPHI || OuterPHI->getParent() != FI.OuterLoop->getHeader()) {
- LLVM_DEBUG(dbgs() << "value modified in top of outer loop\n");
- return false;
- }
-
- // The other incoming value must come from the inner loop, without any
- // modifications in the tail end of the outer loop. We are in LCSSA form,
- // so this will actually be a PHI in the inner loop's exit block, which
- // only uses values from inside the inner loop.
- PHINode *LCSSAPHI = dyn_cast<PHINode>(
- OuterPHI->getIncomingValueForBlock(FI.OuterLoop->getLoopLatch()));
- if (!LCSSAPHI) {
- LLVM_DEBUG(dbgs() << "could not find LCSSA PHI\n");
- return false;
- }
-
- // The value used by the LCSSA PHI must be the same one that the inner
- // loop's PHI uses.
- if (LCSSAPHI->hasConstantValue() != LatchValue) {
- LLVM_DEBUG(
- dbgs() << "LCSSA PHI incoming value does not match latch value\n");
- return false;
- }
-
- LLVM_DEBUG(dbgs() << "PHI pair is safe:\n");
- LLVM_DEBUG(dbgs() << " Inner: "; InnerPHI.dump());
- LLVM_DEBUG(dbgs() << " Outer: "; OuterPHI->dump());
- SafeOuterPHIs.insert(OuterPHI);
- FI.InnerPHIsToTransform.insert(&InnerPHI);
- }
-
- for (PHINode &OuterPHI : FI.OuterLoop->getHeader()->phis()) {
- if (!SafeOuterPHIs.count(&OuterPHI)) {
- LLVM_DEBUG(dbgs() << "found unsafe PHI in outer loop: "; OuterPHI.dump());
- return false;
- }
- }
-
- LLVM_DEBUG(dbgs() << "checkPHIs: OK\n");
- return true;
-}
-
-static bool
-checkOuterLoopInsts(struct FlattenInfo &FI,
- SmallPtrSetImpl<Instruction *> &IterationInstructions,
- const TargetTransformInfo *TTI) {
- // Check for instructions in the outer but not inner loop. If any of these
- // have side-effects then this transformation is not legal, and if there is
- // a significant amount of code here which can't be optimised out that it's
- // not profitable (as these instructions would get executed for each
- // iteration of the inner loop).
- unsigned RepeatedInstrCost = 0;
- for (auto *B : FI.OuterLoop->getBlocks()) {
- if (FI.InnerLoop->contains(B))
- continue;
-
- for (auto &I : *B) {
- if (!isa<PHINode>(&I) && !I.isTerminator() &&
- !isSafeToSpeculativelyExecute(&I)) {
- LLVM_DEBUG(dbgs() << "Cannot flatten because instruction may have "
- "side effects: ";
- I.dump());
- return false;
- }
- // The execution count of the outer loop's iteration instructions
- // (increment, compare and branch) will be increased, but the
- // equivalent instructions will be removed from the inner loop, so
- // they make a net difference of zero.
- if (IterationInstructions.count(&I))
- continue;
- // The uncoditional branch to the inner loop's header will turn into
- // a fall-through, so adds no cost.
- BranchInst *Br = dyn_cast<BranchInst>(&I);
- if (Br && Br->isUnconditional() &&
- Br->getSuccessor(0) == FI.InnerLoop->getHeader())
- continue;
- // Multiplies of the outer iteration variable and inner iteration
- // count will be optimised out.
- if (match(&I, m_c_Mul(m_Specific(FI.OuterInductionPHI),
- m_Specific(FI.InnerLimit))))
- continue;
- int Cost = TTI->getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
- LLVM_DEBUG(dbgs() << "Cost " << Cost << ": "; I.dump());
- RepeatedInstrCost += Cost;
- }
- }
-
- LLVM_DEBUG(dbgs() << "Cost of instructions that will be repeated: "
- << RepeatedInstrCost << "\n");
- // Bail out if flattening the loops would cause instructions in the outer
- // loop but not in the inner loop to be executed extra times.
- if (RepeatedInstrCost > RepeatedInstructionThreshold) {
- LLVM_DEBUG(dbgs() << "checkOuterLoopInsts: not profitable, bailing.\n");
- return false;
- }
-
- LLVM_DEBUG(dbgs() << "checkOuterLoopInsts: OK\n");
- return true;
-}
-
-static bool checkIVUsers(struct FlattenInfo &FI) {
- // We require all uses of both induction variables to match this pattern:
- //
- // (OuterPHI * InnerLimit) + InnerPHI
- //
- // Any uses of the induction variables not matching that pattern would
- // require a div/mod to reconstruct in the flattened loop, so the
- // transformation wouldn't be profitable.
-
- Value *InnerLimit = FI.InnerLimit;
- if (FI.Widened &&
- (isa<SExtInst>(InnerLimit) || isa<ZExtInst>(InnerLimit)))
- InnerLimit = cast<Instruction>(InnerLimit)->getOperand(0);
-
- // Check that all uses of the inner loop's induction variable match the
- // expected pattern, recording the uses of the outer IV.
- SmallPtrSet<Value *, 4> ValidOuterPHIUses;
- for (User *U : FI.InnerInductionPHI->users()) {
- if (U == FI.InnerIncrement)
- continue;
-
- // After widening the IVs, a trunc instruction might have been introduced, so
- // look through truncs.
- if (isa<TruncInst>(U)) {
- if (!U->hasOneUse())
- return false;
- U = *U->user_begin();
- }
-
- LLVM_DEBUG(dbgs() << "Found use of inner induction variable: "; U->dump());
-
- Value *MatchedMul;
- Value *MatchedItCount;
- bool IsAdd = match(U, m_c_Add(m_Specific(FI.InnerInductionPHI),
- m_Value(MatchedMul))) &&
- match(MatchedMul, m_c_Mul(m_Specific(FI.OuterInductionPHI),
- m_Value(MatchedItCount)));
-
- // Matches the same pattern as above, except it also looks for truncs
- // on the phi, which can be the result of widening the induction variables.
- bool IsAddTrunc = match(U, m_c_Add(m_Trunc(m_Specific(FI.InnerInductionPHI)),
- m_Value(MatchedMul))) &&
- match(MatchedMul,
- m_c_Mul(m_Trunc(m_Specific(FI.OuterInductionPHI)),
- m_Value(MatchedItCount)));
-
- if ((IsAdd || IsAddTrunc) && MatchedItCount == InnerLimit) {
- LLVM_DEBUG(dbgs() << "Use is optimisable\n");
- ValidOuterPHIUses.insert(MatchedMul);
- FI.LinearIVUses.insert(U);
- } else {
- LLVM_DEBUG(dbgs() << "Did not match expected pattern, bailing\n");
- return false;
- }
- }
-
- // Check that there are no uses of the outer IV other than the ones found
- // as part of the pattern above.
- for (User *U : FI.OuterInductionPHI->users()) {
- if (U == FI.OuterIncrement)
- continue;
-
- auto IsValidOuterPHIUses = [&] (User *U) -> bool {
- LLVM_DEBUG(dbgs() << "Found use of outer induction variable: "; U->dump());
- if (!ValidOuterPHIUses.count(U)) {
- LLVM_DEBUG(dbgs() << "Did not match expected pattern, bailing\n");
- return false;
- }
- LLVM_DEBUG(dbgs() << "Use is optimisable\n");
- return true;
- };
-
- if (auto *V = dyn_cast<TruncInst>(U)) {
- for (auto *K : V->users()) {
- if (!IsValidOuterPHIUses(K))
- return false;
- }
- continue;
- }
-
- if (!IsValidOuterPHIUses(U))
- return false;
- }
-
- LLVM_DEBUG(dbgs() << "checkIVUsers: OK\n";
- dbgs() << "Found " << FI.LinearIVUses.size()
- << " value(s) that can be replaced:\n";
- for (Value *V : FI.LinearIVUses) {
- dbgs() << " ";
- V->dump();
- });
- return true;
-}
-
-// Return an OverflowResult dependant on if overflow of the multiplication of
-// InnerLimit and OuterLimit can be assumed not to happen.
-static OverflowResult checkOverflow(struct FlattenInfo &FI,
- DominatorTree *DT, AssumptionCache *AC) {
- Function *F = FI.OuterLoop->getHeader()->getParent();
- const DataLayout &DL = F->getParent()->getDataLayout();
-
- // For debugging/testing.
- if (AssumeNoOverflow)
- return OverflowResult::NeverOverflows;
-
- // Check if the multiply could not overflow due to known ranges of the
- // input values.
- OverflowResult OR = computeOverflowForUnsignedMul(
- FI.InnerLimit, FI.OuterLimit, DL, AC,
- FI.OuterLoop->getLoopPreheader()->getTerminator(), DT);
- if (OR != OverflowResult::MayOverflow)
- return OR;
-
- for (Value *V : FI.LinearIVUses) {
- for (Value *U : V->users()) {
- if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) {
- // The IV is used as the operand of a GEP, and the IV is at least as
- // wide as the address space of the GEP. In this case, the GEP would
- // wrap around the address space before the IV increment wraps, which
- // would be UB.
- if (GEP->isInBounds() &&
- V->getType()->getIntegerBitWidth() >=
- DL.getPointerTypeSizeInBits(GEP->getType())) {
- LLVM_DEBUG(
- dbgs() << "use of linear IV would be UB if overflow occurred: ";
- GEP->dump());
- return OverflowResult::NeverOverflows;
- }
- }
- }
- }
-
- return OverflowResult::MayOverflow;
-}
-
-static bool CanFlattenLoopPair(struct FlattenInfo &FI, DominatorTree *DT,
- LoopInfo *LI, ScalarEvolution *SE,
- AssumptionCache *AC, const TargetTransformInfo *TTI) {
- SmallPtrSet<Instruction *, 8> IterationInstructions;
- if (!findLoopComponents(FI.InnerLoop, IterationInstructions, FI.InnerInductionPHI,
- FI.InnerLimit, FI.InnerIncrement, FI.InnerBranch, SE))
- return false;
- if (!findLoopComponents(FI.OuterLoop, IterationInstructions, FI.OuterInductionPHI,
- FI.OuterLimit, FI.OuterIncrement, FI.OuterBranch, SE))
- return false;
-
- // Both of the loop limit values must be invariant in the outer loop
- // (non-instructions are all inherently invariant).
- if (!FI.OuterLoop->isLoopInvariant(FI.InnerLimit)) {
- LLVM_DEBUG(dbgs() << "inner loop limit not invariant\n");
- return false;
- }
- if (!FI.OuterLoop->isLoopInvariant(FI.OuterLimit)) {
- LLVM_DEBUG(dbgs() << "outer loop limit not invariant\n");
- return false;
- }
-
- if (!checkPHIs(FI, TTI))
- return false;
-
- // FIXME: it should be possible to handle different types correctly.
- if (FI.InnerInductionPHI->getType() != FI.OuterInductionPHI->getType())
- return false;
-
- if (!checkOuterLoopInsts(FI, IterationInstructions, TTI))
- return false;
-
- // Find the values in the loop that can be replaced with the linearized
- // induction variable, and check that there are no other uses of the inner
- // or outer induction variable. If there were, we could still do this
- // transformation, but we'd have to insert a div/mod to calculate the
- // original IVs, so it wouldn't be profitable.
- if (!checkIVUsers(FI))
- return false;
-
- LLVM_DEBUG(dbgs() << "CanFlattenLoopPair: OK\n");
- return true;
-}
-
-static bool DoFlattenLoopPair(struct FlattenInfo &FI, DominatorTree *DT,
- LoopInfo *LI, ScalarEvolution *SE,
- AssumptionCache *AC,
- const TargetTransformInfo *TTI) {
- Function *F = FI.OuterLoop->getHeader()->getParent();
- LLVM_DEBUG(dbgs() << "Checks all passed, doing the transformation\n");
- {
- using namespace ore;
- OptimizationRemark Remark(DEBUG_TYPE, "Flattened", FI.InnerLoop->getStartLoc(),
- FI.InnerLoop->getHeader());
- OptimizationRemarkEmitter ORE(F);
- Remark << "Flattened into outer loop";
- ORE.emit(Remark);
- }
-
- Value *NewTripCount =
- BinaryOperator::CreateMul(FI.InnerLimit, FI.OuterLimit, "flatten.tripcount",
- FI.OuterLoop->getLoopPreheader()->getTerminator());
- LLVM_DEBUG(dbgs() << "Created new trip count in preheader: ";
- NewTripCount->dump());
-
- // Fix up PHI nodes that take values from the inner loop back-edge, which
- // we are about to remove.
- FI.InnerInductionPHI->removeIncomingValue(FI.InnerLoop->getLoopLatch());
-
- // The old Phi will be optimised away later, but for now we can't leave
- // leave it in an invalid state, so are updating them too.
- for (PHINode *PHI : FI.InnerPHIsToTransform)
- PHI->removeIncomingValue(FI.InnerLoop->getLoopLatch());
-
- // Modify the trip count of the outer loop to be the product of the two
- // trip counts.
- cast<User>(FI.OuterBranch->getCondition())->setOperand(1, NewTripCount);
-
- // Replace the inner loop backedge with an unconditional branch to the exit.
- BasicBlock *InnerExitBlock = FI.InnerLoop->getExitBlock();
- BasicBlock *InnerExitingBlock = FI.InnerLoop->getExitingBlock();
- InnerExitingBlock->getTerminator()->eraseFromParent();
- BranchInst::Create(InnerExitBlock, InnerExitingBlock);
- DT->deleteEdge(InnerExitingBlock, FI.InnerLoop->getHeader());
-
- // Replace all uses of the polynomial calculated from the two induction
- // variables with the one new one.
- IRBuilder<> Builder(FI.OuterInductionPHI->getParent()->getTerminator());
- for (Value *V : FI.LinearIVUses) {
- Value *OuterValue = FI.OuterInductionPHI;
- if (FI.Widened)
- OuterValue = Builder.CreateTrunc(FI.OuterInductionPHI, V->getType(),
- "flatten.trunciv");
-
- LLVM_DEBUG(dbgs() << "Replacing: "; V->dump();
- dbgs() << "with: "; OuterValue->dump());
- V->replaceAllUsesWith(OuterValue);
- }
-
- // Tell LoopInfo, SCEV and the pass manager that the inner loop has been
- // deleted, and any information that have about the outer loop invalidated.
- SE->forgetLoop(FI.OuterLoop);
- SE->forgetLoop(FI.InnerLoop);
- LI->erase(FI.InnerLoop);
- return true;
-}
-
-static bool CanWidenIV(struct FlattenInfo &FI, DominatorTree *DT,
- LoopInfo *LI, ScalarEvolution *SE,
- AssumptionCache *AC, const TargetTransformInfo *TTI) {
- if (!WidenIV) {
- LLVM_DEBUG(dbgs() << "Widening the IVs is disabled\n");
- return false;
- }
-
- LLVM_DEBUG(dbgs() << "Try widening the IVs\n");
- Module *M = FI.InnerLoop->getHeader()->getParent()->getParent();
- auto &DL = M->getDataLayout();
- auto *InnerType = FI.InnerInductionPHI->getType();
- auto *OuterType = FI.OuterInductionPHI->getType();
- unsigned MaxLegalSize = DL.getLargestLegalIntTypeSizeInBits();
- auto *MaxLegalType = DL.getLargestLegalIntType(M->getContext());
-
- // If both induction types are less than the maximum legal integer width,
- // promote both to the widest type available so we know calculating
- // (OuterLimit * InnerLimit) as the new trip count is safe.
- if (InnerType != OuterType ||
- InnerType->getScalarSizeInBits() >= MaxLegalSize ||
- MaxLegalType->getScalarSizeInBits() < InnerType->getScalarSizeInBits() * 2) {
- LLVM_DEBUG(dbgs() << "Can't widen the IV\n");
- return false;
- }
-
- SCEVExpander Rewriter(*SE, DL, "loopflatten");
- SmallVector<WideIVInfo, 2> WideIVs;
- SmallVector<WeakTrackingVH, 4> DeadInsts;
- WideIVs.push_back( {FI.InnerInductionPHI, MaxLegalType, false });
- WideIVs.push_back( {FI.OuterInductionPHI, MaxLegalType, false });
- unsigned ElimExt;
- unsigned Widened;
-
- for (unsigned i = 0; i < WideIVs.size(); i++) {
- PHINode *WidePhi = createWideIV(WideIVs[i], LI, SE, Rewriter, DT, DeadInsts,
- ElimExt, Widened, true /* HasGuards */,
- true /* UsePostIncrementRanges */);
- if (!WidePhi)
- return false;
- LLVM_DEBUG(dbgs() << "Created wide phi: "; WidePhi->dump());
- LLVM_DEBUG(dbgs() << "Deleting old phi: "; WideIVs[i].NarrowIV->dump());
- RecursivelyDeleteDeadPHINode(WideIVs[i].NarrowIV);
- }
- // After widening, rediscover all the loop components.
- assert(Widened && "Widenend IV expected");
- FI.Widened = true;
- return CanFlattenLoopPair(FI, DT, LI, SE, AC, TTI);
-}
-
-static bool FlattenLoopPair(struct FlattenInfo &FI, DominatorTree *DT,
- LoopInfo *LI, ScalarEvolution *SE,
- AssumptionCache *AC,
- const TargetTransformInfo *TTI) {
- LLVM_DEBUG(
- dbgs() << "Loop flattening running on outer loop "
- << FI.OuterLoop->getHeader()->getName() << " and inner loop "
- << FI.InnerLoop->getHeader()->getName() << " in "
- << FI.OuterLoop->getHeader()->getParent()->getName() << "\n");
-
- if (!CanFlattenLoopPair(FI, DT, LI, SE, AC, TTI))
- return false;
-
- // Check if we can widen the induction variables to avoid overflow checks.
- if (CanWidenIV(FI, DT, LI, SE, AC, TTI))
- return DoFlattenLoopPair(FI, DT, LI, SE, AC, TTI);
-
- // Check if the new iteration variable might overflow. In this case, we
- // need to version the loop, and select the original version at runtime if
- // the iteration space is too large.
- // TODO: We currently don't version the loop.
- OverflowResult OR = checkOverflow(FI, DT, AC);
- if (OR == OverflowResult::AlwaysOverflowsHigh ||
- OR == OverflowResult::AlwaysOverflowsLow) {
- LLVM_DEBUG(dbgs() << "Multiply would always overflow, so not profitable\n");
- return false;
- } else if (OR == OverflowResult::MayOverflow) {
- LLVM_DEBUG(dbgs() << "Multiply might overflow, not flattening\n");
- return false;
- }
-
- LLVM_DEBUG(dbgs() << "Multiply cannot overflow, modifying loop in-place\n");
- return DoFlattenLoopPair(FI, DT, LI, SE, AC, TTI);
-}
-
-bool Flatten(DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE,
- AssumptionCache *AC, TargetTransformInfo *TTI) {
- bool Changed = false;
- for (auto *InnerLoop : LI->getLoopsInPreorder()) {
- auto *OuterLoop = InnerLoop->getParentLoop();
- if (!OuterLoop)
- continue;
- struct FlattenInfo FI(OuterLoop, InnerLoop);
- Changed |= FlattenLoopPair(FI, DT, LI, SE, AC, TTI);
- }
- return Changed;
-}
-
-PreservedAnalyses LoopFlattenPass::run(Function &F,
- FunctionAnalysisManager &AM) {
- auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
- auto *LI = &AM.getResult<LoopAnalysis>(F);
- auto *SE = &AM.getResult<ScalarEvolutionAnalysis>(F);
- auto *AC = &AM.getResult<AssumptionAnalysis>(F);
- auto *TTI = &AM.getResult<TargetIRAnalysis>(F);
-
- if (!Flatten(DT, LI, SE, AC, TTI))
- return PreservedAnalyses::all();
-
- PreservedAnalyses PA;
- PA.preserveSet<CFGAnalyses>();
- return PA;
-}
-
-namespace {
-class LoopFlattenLegacyPass : public FunctionPass {
-public:
- static char ID; // Pass ID, replacement for typeid
- LoopFlattenLegacyPass() : FunctionPass(ID) {
- initializeLoopFlattenLegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- // Possibly flatten loop L into its child.
- bool runOnFunction(Function &F) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- getLoopAnalysisUsage(AU);
- AU.addRequired<TargetTransformInfoWrapperPass>();
- AU.addPreserved<TargetTransformInfoWrapperPass>();
- AU.addRequired<AssumptionCacheTracker>();
- AU.addPreserved<AssumptionCacheTracker>();
- }
-};
-} // namespace
-
-char LoopFlattenLegacyPass::ID = 0;
-INITIALIZE_PASS_BEGIN(LoopFlattenLegacyPass, "loop-flatten", "Flattens loops",
- false, false)
-INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_END(LoopFlattenLegacyPass, "loop-flatten", "Flattens loops",
- false, false)
-
-FunctionPass *llvm::createLoopFlattenPass() { return new LoopFlattenLegacyPass(); }
-
-bool LoopFlattenLegacyPass::runOnFunction(Function &F) {
- ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
- LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
- auto &TTIP = getAnalysis<TargetTransformInfoWrapperPass>();
- auto *TTI = &TTIP.getTTI(F);
- auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- return Flatten(DT, LI, SE, AC, TTI);
-}
+//===- LoopFlatten.cpp - Loop flattening pass------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass flattens pairs nested loops into a single loop.
+//
+// The intention is to optimise loop nests like this, which together access an
+// array linearly:
+// for (int i = 0; i < N; ++i)
+// for (int j = 0; j < M; ++j)
+// f(A[i*M+j]);
+// into one loop:
+// for (int i = 0; i < (N*M); ++i)
+// f(A[i]);
+//
+// It can also flatten loops where the induction variables are not used in the
+// loop. This is only worth doing if the induction variables are only used in an
+// expression like i*M+j. If they had any other uses, we would have to insert a
+// div/mod to reconstruct the original values, so this wouldn't be profitable.
+//
+// We also need to prove that N*M will not overflow.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar/LoopFlatten.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
+#include "llvm/Transforms/Utils/SimplifyIndVar.h"
+
+#define DEBUG_TYPE "loop-flatten"
+
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+static cl::opt<unsigned> RepeatedInstructionThreshold(
+ "loop-flatten-cost-threshold", cl::Hidden, cl::init(2),
+ cl::desc("Limit on the cost of instructions that can be repeated due to "
+ "loop flattening"));
+
+static cl::opt<bool>
+ AssumeNoOverflow("loop-flatten-assume-no-overflow", cl::Hidden,
+ cl::init(false),
+ cl::desc("Assume that the product of the two iteration "
+ "limits will never overflow"));
+
+static cl::opt<bool>
+ WidenIV("loop-flatten-widen-iv", cl::Hidden,
+ cl::init(true),
+ cl::desc("Widen the loop induction variables, if possible, so "
+ "overflow checks won't reject flattening"));
+
+struct FlattenInfo {
+ Loop *OuterLoop = nullptr;
+ Loop *InnerLoop = nullptr;
+ PHINode *InnerInductionPHI = nullptr;
+ PHINode *OuterInductionPHI = nullptr;
+ Value *InnerLimit = nullptr;
+ Value *OuterLimit = nullptr;
+ BinaryOperator *InnerIncrement = nullptr;
+ BinaryOperator *OuterIncrement = nullptr;
+ BranchInst *InnerBranch = nullptr;
+ BranchInst *OuterBranch = nullptr;
+ SmallPtrSet<Value *, 4> LinearIVUses;
+ SmallPtrSet<PHINode *, 4> InnerPHIsToTransform;
+
+ // Whether this holds the flatten info before or after widening.
+ bool Widened = false;
+
+ FlattenInfo(Loop *OL, Loop *IL) : OuterLoop(OL), InnerLoop(IL) {};
+};
+
+// Finds the induction variable, increment and limit for a simple loop that we
+// can flatten.
+static bool findLoopComponents(
+ Loop *L, SmallPtrSetImpl<Instruction *> &IterationInstructions,
+ PHINode *&InductionPHI, Value *&Limit, BinaryOperator *&Increment,
+ BranchInst *&BackBranch, ScalarEvolution *SE) {
+ LLVM_DEBUG(dbgs() << "Finding components of loop: " << L->getName() << "\n");
+
+ if (!L->isLoopSimplifyForm()) {
+ LLVM_DEBUG(dbgs() << "Loop is not in normal form\n");
+ return false;
+ }
+
+ // There must be exactly one exiting block, and it must be the same at the
+ // latch.
+ BasicBlock *Latch = L->getLoopLatch();
+ if (L->getExitingBlock() != Latch) {
+ LLVM_DEBUG(dbgs() << "Exiting and latch block are different\n");
+ return false;
+ }
+ // Latch block must end in a conditional branch.
+ BackBranch = dyn_cast<BranchInst>(Latch->getTerminator());
+ if (!BackBranch || !BackBranch->isConditional()) {
+ LLVM_DEBUG(dbgs() << "Could not find back-branch\n");
+ return false;
+ }
+ IterationInstructions.insert(BackBranch);
+ LLVM_DEBUG(dbgs() << "Found back branch: "; BackBranch->dump());
+ bool ContinueOnTrue = L->contains(BackBranch->getSuccessor(0));
+
+ // Find the induction PHI. If there is no induction PHI, we can't do the
+ // transformation. TODO: could other variables trigger this? Do we have to
+ // search for the best one?
+ InductionPHI = nullptr;
+ for (PHINode &PHI : L->getHeader()->phis()) {
+ InductionDescriptor ID;
+ if (InductionDescriptor::isInductionPHI(&PHI, L, SE, ID)) {
+ InductionPHI = &PHI;
+ LLVM_DEBUG(dbgs() << "Found induction PHI: "; InductionPHI->dump());
+ break;
+ }
+ }
+ if (!InductionPHI) {
+ LLVM_DEBUG(dbgs() << "Could not find induction PHI\n");
+ return false;
+ }
+
+ auto IsValidPredicate = [&](ICmpInst::Predicate Pred) {
+ if (ContinueOnTrue)
+ return Pred == CmpInst::ICMP_NE || Pred == CmpInst::ICMP_ULT;
+ else
+ return Pred == CmpInst::ICMP_EQ;
+ };
+
+ // Find Compare and make sure it is valid
+ ICmpInst *Compare = dyn_cast<ICmpInst>(BackBranch->getCondition());
+ if (!Compare || !IsValidPredicate(Compare->getUnsignedPredicate()) ||
+ Compare->hasNUsesOrMore(2)) {
+ LLVM_DEBUG(dbgs() << "Could not find valid comparison\n");
+ return false;
+ }
+ IterationInstructions.insert(Compare);
+ LLVM_DEBUG(dbgs() << "Found comparison: "; Compare->dump());
+
+ // Find increment and limit from the compare
+ Increment = nullptr;
+ if (match(Compare->getOperand(0),
+ m_c_Add(m_Specific(InductionPHI), m_ConstantInt<1>()))) {
+ Increment = dyn_cast<BinaryOperator>(Compare->getOperand(0));
+ Limit = Compare->getOperand(1);
+ } else if (Compare->getUnsignedPredicate() == CmpInst::ICMP_NE &&
+ match(Compare->getOperand(1),
+ m_c_Add(m_Specific(InductionPHI), m_ConstantInt<1>()))) {
+ Increment = dyn_cast<BinaryOperator>(Compare->getOperand(1));
+ Limit = Compare->getOperand(0);
+ }
+ if (!Increment || Increment->hasNUsesOrMore(3)) {
+ LLVM_DEBUG(dbgs() << "Cound not find valid increment\n");
+ return false;
+ }
+ IterationInstructions.insert(Increment);
+ LLVM_DEBUG(dbgs() << "Found increment: "; Increment->dump());
+ LLVM_DEBUG(dbgs() << "Found limit: "; Limit->dump());
+
+ assert(InductionPHI->getNumIncomingValues() == 2);
+ assert(InductionPHI->getIncomingValueForBlock(Latch) == Increment &&
+ "PHI value is not increment inst");
+
+ auto *CI = dyn_cast<ConstantInt>(
+ InductionPHI->getIncomingValueForBlock(L->getLoopPreheader()));
+ if (!CI || !CI->isZero()) {
+ LLVM_DEBUG(dbgs() << "PHI value is not zero: "; CI->dump());
+ return false;
+ }
+
+ LLVM_DEBUG(dbgs() << "Successfully found all loop components\n");
+ return true;
+}
+
+static bool checkPHIs(struct FlattenInfo &FI,
+ const TargetTransformInfo *TTI) {
+ // All PHIs in the inner and outer headers must either be:
+ // - The induction PHI, which we are going to rewrite as one induction in
+ // the new loop. This is already checked by findLoopComponents.
+ // - An outer header PHI with all incoming values from outside the loop.
+ // LoopSimplify guarantees we have a pre-header, so we don't need to
+ // worry about that here.
+ // - Pairs of PHIs in the inner and outer headers, which implement a
+ // loop-carried dependency that will still be valid in the new loop. To
+ // be valid, this variable must be modified only in the inner loop.
+
+ // The set of PHI nodes in the outer loop header that we know will still be
+ // valid after the transformation. These will not need to be modified (with
+ // the exception of the induction variable), but we do need to check that
+ // there are no unsafe PHI nodes.
+ SmallPtrSet<PHINode *, 4> SafeOuterPHIs;
+ SafeOuterPHIs.insert(FI.OuterInductionPHI);
+
+ // Check that all PHI nodes in the inner loop header match one of the valid
+ // patterns.
+ for (PHINode &InnerPHI : FI.InnerLoop->getHeader()->phis()) {
+ // The induction PHIs break these rules, and that's OK because we treat
+ // them specially when doing the transformation.
+ if (&InnerPHI == FI.InnerInductionPHI)
+ continue;
+
+ // Each inner loop PHI node must have two incoming values/blocks - one
+ // from the pre-header, and one from the latch.
+ assert(InnerPHI.getNumIncomingValues() == 2);
+ Value *PreHeaderValue =
+ InnerPHI.getIncomingValueForBlock(FI.InnerLoop->getLoopPreheader());
+ Value *LatchValue =
+ InnerPHI.getIncomingValueForBlock(FI.InnerLoop->getLoopLatch());
+
+ // The incoming value from the outer loop must be the PHI node in the
+ // outer loop header, with no modifications made in the top of the outer
+ // loop.
+ PHINode *OuterPHI = dyn_cast<PHINode>(PreHeaderValue);
+ if (!OuterPHI || OuterPHI->getParent() != FI.OuterLoop->getHeader()) {
+ LLVM_DEBUG(dbgs() << "value modified in top of outer loop\n");
+ return false;
+ }
+
+ // The other incoming value must come from the inner loop, without any
+ // modifications in the tail end of the outer loop. We are in LCSSA form,
+ // so this will actually be a PHI in the inner loop's exit block, which
+ // only uses values from inside the inner loop.
+ PHINode *LCSSAPHI = dyn_cast<PHINode>(
+ OuterPHI->getIncomingValueForBlock(FI.OuterLoop->getLoopLatch()));
+ if (!LCSSAPHI) {
+ LLVM_DEBUG(dbgs() << "could not find LCSSA PHI\n");
+ return false;
+ }
+
+ // The value used by the LCSSA PHI must be the same one that the inner
+ // loop's PHI uses.
+ if (LCSSAPHI->hasConstantValue() != LatchValue) {
+ LLVM_DEBUG(
+ dbgs() << "LCSSA PHI incoming value does not match latch value\n");
+ return false;
+ }
+
+ LLVM_DEBUG(dbgs() << "PHI pair is safe:\n");
+ LLVM_DEBUG(dbgs() << " Inner: "; InnerPHI.dump());
+ LLVM_DEBUG(dbgs() << " Outer: "; OuterPHI->dump());
+ SafeOuterPHIs.insert(OuterPHI);
+ FI.InnerPHIsToTransform.insert(&InnerPHI);
+ }
+
+ for (PHINode &OuterPHI : FI.OuterLoop->getHeader()->phis()) {
+ if (!SafeOuterPHIs.count(&OuterPHI)) {
+ LLVM_DEBUG(dbgs() << "found unsafe PHI in outer loop: "; OuterPHI.dump());
+ return false;
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "checkPHIs: OK\n");
+ return true;
+}
+
+static bool
+checkOuterLoopInsts(struct FlattenInfo &FI,
+ SmallPtrSetImpl<Instruction *> &IterationInstructions,
+ const TargetTransformInfo *TTI) {
+ // Check for instructions in the outer but not inner loop. If any of these
+ // have side-effects then this transformation is not legal, and if there is
+ // a significant amount of code here which can't be optimised out that it's
+ // not profitable (as these instructions would get executed for each
+ // iteration of the inner loop).
+ unsigned RepeatedInstrCost = 0;
+ for (auto *B : FI.OuterLoop->getBlocks()) {
+ if (FI.InnerLoop->contains(B))
+ continue;
+
+ for (auto &I : *B) {
+ if (!isa<PHINode>(&I) && !I.isTerminator() &&
+ !isSafeToSpeculativelyExecute(&I)) {
+ LLVM_DEBUG(dbgs() << "Cannot flatten because instruction may have "
+ "side effects: ";
+ I.dump());
+ return false;
+ }
+ // The execution count of the outer loop's iteration instructions
+ // (increment, compare and branch) will be increased, but the
+ // equivalent instructions will be removed from the inner loop, so
+ // they make a net difference of zero.
+ if (IterationInstructions.count(&I))
+ continue;
+ // The uncoditional branch to the inner loop's header will turn into
+ // a fall-through, so adds no cost.
+ BranchInst *Br = dyn_cast<BranchInst>(&I);
+ if (Br && Br->isUnconditional() &&
+ Br->getSuccessor(0) == FI.InnerLoop->getHeader())
+ continue;
+ // Multiplies of the outer iteration variable and inner iteration
+ // count will be optimised out.
+ if (match(&I, m_c_Mul(m_Specific(FI.OuterInductionPHI),
+ m_Specific(FI.InnerLimit))))
+ continue;
+ int Cost = TTI->getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
+ LLVM_DEBUG(dbgs() << "Cost " << Cost << ": "; I.dump());
+ RepeatedInstrCost += Cost;
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "Cost of instructions that will be repeated: "
+ << RepeatedInstrCost << "\n");
+ // Bail out if flattening the loops would cause instructions in the outer
+ // loop but not in the inner loop to be executed extra times.
+ if (RepeatedInstrCost > RepeatedInstructionThreshold) {
+ LLVM_DEBUG(dbgs() << "checkOuterLoopInsts: not profitable, bailing.\n");
+ return false;
+ }
+
+ LLVM_DEBUG(dbgs() << "checkOuterLoopInsts: OK\n");
+ return true;
+}
+
+static bool checkIVUsers(struct FlattenInfo &FI) {
+ // We require all uses of both induction variables to match this pattern:
+ //
+ // (OuterPHI * InnerLimit) + InnerPHI
+ //
+ // Any uses of the induction variables not matching that pattern would
+ // require a div/mod to reconstruct in the flattened loop, so the
+ // transformation wouldn't be profitable.
+
+ Value *InnerLimit = FI.InnerLimit;
+ if (FI.Widened &&
+ (isa<SExtInst>(InnerLimit) || isa<ZExtInst>(InnerLimit)))
+ InnerLimit = cast<Instruction>(InnerLimit)->getOperand(0);
+
+ // Check that all uses of the inner loop's induction variable match the
+ // expected pattern, recording the uses of the outer IV.
+ SmallPtrSet<Value *, 4> ValidOuterPHIUses;
+ for (User *U : FI.InnerInductionPHI->users()) {
+ if (U == FI.InnerIncrement)
+ continue;
+
+ // After widening the IVs, a trunc instruction might have been introduced, so
+ // look through truncs.
+ if (isa<TruncInst>(U)) {
+ if (!U->hasOneUse())
+ return false;
+ U = *U->user_begin();
+ }
+
+ LLVM_DEBUG(dbgs() << "Found use of inner induction variable: "; U->dump());
+
+ Value *MatchedMul;
+ Value *MatchedItCount;
+ bool IsAdd = match(U, m_c_Add(m_Specific(FI.InnerInductionPHI),
+ m_Value(MatchedMul))) &&
+ match(MatchedMul, m_c_Mul(m_Specific(FI.OuterInductionPHI),
+ m_Value(MatchedItCount)));
+
+ // Matches the same pattern as above, except it also looks for truncs
+ // on the phi, which can be the result of widening the induction variables.
+ bool IsAddTrunc = match(U, m_c_Add(m_Trunc(m_Specific(FI.InnerInductionPHI)),
+ m_Value(MatchedMul))) &&
+ match(MatchedMul,
+ m_c_Mul(m_Trunc(m_Specific(FI.OuterInductionPHI)),
+ m_Value(MatchedItCount)));
+
+ if ((IsAdd || IsAddTrunc) && MatchedItCount == InnerLimit) {
+ LLVM_DEBUG(dbgs() << "Use is optimisable\n");
+ ValidOuterPHIUses.insert(MatchedMul);
+ FI.LinearIVUses.insert(U);
+ } else {
+ LLVM_DEBUG(dbgs() << "Did not match expected pattern, bailing\n");
+ return false;
+ }
+ }
+
+ // Check that there are no uses of the outer IV other than the ones found
+ // as part of the pattern above.
+ for (User *U : FI.OuterInductionPHI->users()) {
+ if (U == FI.OuterIncrement)
+ continue;
+
+ auto IsValidOuterPHIUses = [&] (User *U) -> bool {
+ LLVM_DEBUG(dbgs() << "Found use of outer induction variable: "; U->dump());
+ if (!ValidOuterPHIUses.count(U)) {
+ LLVM_DEBUG(dbgs() << "Did not match expected pattern, bailing\n");
+ return false;
+ }
+ LLVM_DEBUG(dbgs() << "Use is optimisable\n");
+ return true;
+ };
+
+ if (auto *V = dyn_cast<TruncInst>(U)) {
+ for (auto *K : V->users()) {
+ if (!IsValidOuterPHIUses(K))
+ return false;
+ }
+ continue;
+ }
+
+ if (!IsValidOuterPHIUses(U))
+ return false;
+ }
+
+ LLVM_DEBUG(dbgs() << "checkIVUsers: OK\n";
+ dbgs() << "Found " << FI.LinearIVUses.size()
+ << " value(s) that can be replaced:\n";
+ for (Value *V : FI.LinearIVUses) {
+ dbgs() << " ";
+ V->dump();
+ });
+ return true;
+}
+
+// Return an OverflowResult dependant on if overflow of the multiplication of
+// InnerLimit and OuterLimit can be assumed not to happen.
+static OverflowResult checkOverflow(struct FlattenInfo &FI,
+ DominatorTree *DT, AssumptionCache *AC) {
+ Function *F = FI.OuterLoop->getHeader()->getParent();
+ const DataLayout &DL = F->getParent()->getDataLayout();
+
+ // For debugging/testing.
+ if (AssumeNoOverflow)
+ return OverflowResult::NeverOverflows;
+
+ // Check if the multiply could not overflow due to known ranges of the
+ // input values.
+ OverflowResult OR = computeOverflowForUnsignedMul(
+ FI.InnerLimit, FI.OuterLimit, DL, AC,
+ FI.OuterLoop->getLoopPreheader()->getTerminator(), DT);
+ if (OR != OverflowResult::MayOverflow)
+ return OR;
+
+ for (Value *V : FI.LinearIVUses) {
+ for (Value *U : V->users()) {
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) {
+ // The IV is used as the operand of a GEP, and the IV is at least as
+ // wide as the address space of the GEP. In this case, the GEP would
+ // wrap around the address space before the IV increment wraps, which
+ // would be UB.
+ if (GEP->isInBounds() &&
+ V->getType()->getIntegerBitWidth() >=
+ DL.getPointerTypeSizeInBits(GEP->getType())) {
+ LLVM_DEBUG(
+ dbgs() << "use of linear IV would be UB if overflow occurred: ";
+ GEP->dump());
+ return OverflowResult::NeverOverflows;
+ }
+ }
+ }
+ }
+
+ return OverflowResult::MayOverflow;
+}
+
+static bool CanFlattenLoopPair(struct FlattenInfo &FI, DominatorTree *DT,
+ LoopInfo *LI, ScalarEvolution *SE,
+ AssumptionCache *AC, const TargetTransformInfo *TTI) {
+ SmallPtrSet<Instruction *, 8> IterationInstructions;
+ if (!findLoopComponents(FI.InnerLoop, IterationInstructions, FI.InnerInductionPHI,
+ FI.InnerLimit, FI.InnerIncrement, FI.InnerBranch, SE))
+ return false;
+ if (!findLoopComponents(FI.OuterLoop, IterationInstructions, FI.OuterInductionPHI,
+ FI.OuterLimit, FI.OuterIncrement, FI.OuterBranch, SE))
+ return false;
+
+ // Both of the loop limit values must be invariant in the outer loop
+ // (non-instructions are all inherently invariant).
+ if (!FI.OuterLoop->isLoopInvariant(FI.InnerLimit)) {
+ LLVM_DEBUG(dbgs() << "inner loop limit not invariant\n");
+ return false;
+ }
+ if (!FI.OuterLoop->isLoopInvariant(FI.OuterLimit)) {
+ LLVM_DEBUG(dbgs() << "outer loop limit not invariant\n");
+ return false;
+ }
+
+ if (!checkPHIs(FI, TTI))
+ return false;
+
+ // FIXME: it should be possible to handle different types correctly.
+ if (FI.InnerInductionPHI->getType() != FI.OuterInductionPHI->getType())
+ return false;
+
+ if (!checkOuterLoopInsts(FI, IterationInstructions, TTI))
+ return false;
+
+ // Find the values in the loop that can be replaced with the linearized
+ // induction variable, and check that there are no other uses of the inner
+ // or outer induction variable. If there were, we could still do this
+ // transformation, but we'd have to insert a div/mod to calculate the
+ // original IVs, so it wouldn't be profitable.
+ if (!checkIVUsers(FI))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "CanFlattenLoopPair: OK\n");
+ return true;
+}
+
+static bool DoFlattenLoopPair(struct FlattenInfo &FI, DominatorTree *DT,
+ LoopInfo *LI, ScalarEvolution *SE,
+ AssumptionCache *AC,
+ const TargetTransformInfo *TTI) {
+ Function *F = FI.OuterLoop->getHeader()->getParent();
+ LLVM_DEBUG(dbgs() << "Checks all passed, doing the transformation\n");
+ {
+ using namespace ore;
+ OptimizationRemark Remark(DEBUG_TYPE, "Flattened", FI.InnerLoop->getStartLoc(),
+ FI.InnerLoop->getHeader());
+ OptimizationRemarkEmitter ORE(F);
+ Remark << "Flattened into outer loop";
+ ORE.emit(Remark);
+ }
+
+ Value *NewTripCount =
+ BinaryOperator::CreateMul(FI.InnerLimit, FI.OuterLimit, "flatten.tripcount",
+ FI.OuterLoop->getLoopPreheader()->getTerminator());
+ LLVM_DEBUG(dbgs() << "Created new trip count in preheader: ";
+ NewTripCount->dump());
+
+ // Fix up PHI nodes that take values from the inner loop back-edge, which
+ // we are about to remove.
+ FI.InnerInductionPHI->removeIncomingValue(FI.InnerLoop->getLoopLatch());
+
+ // The old Phi will be optimised away later, but for now we can't leave
+ // leave it in an invalid state, so are updating them too.
+ for (PHINode *PHI : FI.InnerPHIsToTransform)
+ PHI->removeIncomingValue(FI.InnerLoop->getLoopLatch());
+
+ // Modify the trip count of the outer loop to be the product of the two
+ // trip counts.
+ cast<User>(FI.OuterBranch->getCondition())->setOperand(1, NewTripCount);
+
+ // Replace the inner loop backedge with an unconditional branch to the exit.
+ BasicBlock *InnerExitBlock = FI.InnerLoop->getExitBlock();
+ BasicBlock *InnerExitingBlock = FI.InnerLoop->getExitingBlock();
+ InnerExitingBlock->getTerminator()->eraseFromParent();
+ BranchInst::Create(InnerExitBlock, InnerExitingBlock);
+ DT->deleteEdge(InnerExitingBlock, FI.InnerLoop->getHeader());
+
+ // Replace all uses of the polynomial calculated from the two induction
+ // variables with the one new one.
+ IRBuilder<> Builder(FI.OuterInductionPHI->getParent()->getTerminator());
+ for (Value *V : FI.LinearIVUses) {
+ Value *OuterValue = FI.OuterInductionPHI;
+ if (FI.Widened)
+ OuterValue = Builder.CreateTrunc(FI.OuterInductionPHI, V->getType(),
+ "flatten.trunciv");
+
+ LLVM_DEBUG(dbgs() << "Replacing: "; V->dump();
+ dbgs() << "with: "; OuterValue->dump());
+ V->replaceAllUsesWith(OuterValue);
+ }
+
+ // Tell LoopInfo, SCEV and the pass manager that the inner loop has been
+ // deleted, and any information that have about the outer loop invalidated.
+ SE->forgetLoop(FI.OuterLoop);
+ SE->forgetLoop(FI.InnerLoop);
+ LI->erase(FI.InnerLoop);
+ return true;
+}
+
+static bool CanWidenIV(struct FlattenInfo &FI, DominatorTree *DT,
+ LoopInfo *LI, ScalarEvolution *SE,
+ AssumptionCache *AC, const TargetTransformInfo *TTI) {
+ if (!WidenIV) {
+ LLVM_DEBUG(dbgs() << "Widening the IVs is disabled\n");
+ return false;
+ }
+
+ LLVM_DEBUG(dbgs() << "Try widening the IVs\n");
+ Module *M = FI.InnerLoop->getHeader()->getParent()->getParent();
+ auto &DL = M->getDataLayout();
+ auto *InnerType = FI.InnerInductionPHI->getType();
+ auto *OuterType = FI.OuterInductionPHI->getType();
+ unsigned MaxLegalSize = DL.getLargestLegalIntTypeSizeInBits();
+ auto *MaxLegalType = DL.getLargestLegalIntType(M->getContext());
+
+ // If both induction types are less than the maximum legal integer width,
+ // promote both to the widest type available so we know calculating
+ // (OuterLimit * InnerLimit) as the new trip count is safe.
+ if (InnerType != OuterType ||
+ InnerType->getScalarSizeInBits() >= MaxLegalSize ||
+ MaxLegalType->getScalarSizeInBits() < InnerType->getScalarSizeInBits() * 2) {
+ LLVM_DEBUG(dbgs() << "Can't widen the IV\n");
+ return false;
+ }
+
+ SCEVExpander Rewriter(*SE, DL, "loopflatten");
+ SmallVector<WideIVInfo, 2> WideIVs;
+ SmallVector<WeakTrackingVH, 4> DeadInsts;
+ WideIVs.push_back( {FI.InnerInductionPHI, MaxLegalType, false });
+ WideIVs.push_back( {FI.OuterInductionPHI, MaxLegalType, false });
+ unsigned ElimExt;
+ unsigned Widened;
+
+ for (unsigned i = 0; i < WideIVs.size(); i++) {
+ PHINode *WidePhi = createWideIV(WideIVs[i], LI, SE, Rewriter, DT, DeadInsts,
+ ElimExt, Widened, true /* HasGuards */,
+ true /* UsePostIncrementRanges */);
+ if (!WidePhi)
+ return false;
+ LLVM_DEBUG(dbgs() << "Created wide phi: "; WidePhi->dump());
+ LLVM_DEBUG(dbgs() << "Deleting old phi: "; WideIVs[i].NarrowIV->dump());
+ RecursivelyDeleteDeadPHINode(WideIVs[i].NarrowIV);
+ }
+ // After widening, rediscover all the loop components.
+ assert(Widened && "Widenend IV expected");
+ FI.Widened = true;
+ return CanFlattenLoopPair(FI, DT, LI, SE, AC, TTI);
+}
+
+static bool FlattenLoopPair(struct FlattenInfo &FI, DominatorTree *DT,
+ LoopInfo *LI, ScalarEvolution *SE,
+ AssumptionCache *AC,
+ const TargetTransformInfo *TTI) {
+ LLVM_DEBUG(
+ dbgs() << "Loop flattening running on outer loop "
+ << FI.OuterLoop->getHeader()->getName() << " and inner loop "
+ << FI.InnerLoop->getHeader()->getName() << " in "
+ << FI.OuterLoop->getHeader()->getParent()->getName() << "\n");
+
+ if (!CanFlattenLoopPair(FI, DT, LI, SE, AC, TTI))
+ return false;
+
+ // Check if we can widen the induction variables to avoid overflow checks.
+ if (CanWidenIV(FI, DT, LI, SE, AC, TTI))
+ return DoFlattenLoopPair(FI, DT, LI, SE, AC, TTI);
+
+ // Check if the new iteration variable might overflow. In this case, we
+ // need to version the loop, and select the original version at runtime if
+ // the iteration space is too large.
+ // TODO: We currently don't version the loop.
+ OverflowResult OR = checkOverflow(FI, DT, AC);
+ if (OR == OverflowResult::AlwaysOverflowsHigh ||
+ OR == OverflowResult::AlwaysOverflowsLow) {
+ LLVM_DEBUG(dbgs() << "Multiply would always overflow, so not profitable\n");
+ return false;
+ } else if (OR == OverflowResult::MayOverflow) {
+ LLVM_DEBUG(dbgs() << "Multiply might overflow, not flattening\n");
+ return false;
+ }
+
+ LLVM_DEBUG(dbgs() << "Multiply cannot overflow, modifying loop in-place\n");
+ return DoFlattenLoopPair(FI, DT, LI, SE, AC, TTI);
+}
+
+bool Flatten(DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE,
+ AssumptionCache *AC, TargetTransformInfo *TTI) {
+ bool Changed = false;
+ for (auto *InnerLoop : LI->getLoopsInPreorder()) {
+ auto *OuterLoop = InnerLoop->getParentLoop();
+ if (!OuterLoop)
+ continue;
+ struct FlattenInfo FI(OuterLoop, InnerLoop);
+ Changed |= FlattenLoopPair(FI, DT, LI, SE, AC, TTI);
+ }
+ return Changed;
+}
+
+PreservedAnalyses LoopFlattenPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
+ auto *LI = &AM.getResult<LoopAnalysis>(F);
+ auto *SE = &AM.getResult<ScalarEvolutionAnalysis>(F);
+ auto *AC = &AM.getResult<AssumptionAnalysis>(F);
+ auto *TTI = &AM.getResult<TargetIRAnalysis>(F);
+
+ if (!Flatten(DT, LI, SE, AC, TTI))
+ return PreservedAnalyses::all();
+
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+}
+
+namespace {
+class LoopFlattenLegacyPass : public FunctionPass {
+public:
+ static char ID; // Pass ID, replacement for typeid
+ LoopFlattenLegacyPass() : FunctionPass(ID) {
+ initializeLoopFlattenLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ // Possibly flatten loop L into its child.
+ bool runOnFunction(Function &F) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ getLoopAnalysisUsage(AU);
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addPreserved<TargetTransformInfoWrapperPass>();
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addPreserved<AssumptionCacheTracker>();
+ }
+};
+} // namespace
+
+char LoopFlattenLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopFlattenLegacyPass, "loop-flatten", "Flattens loops",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_END(LoopFlattenLegacyPass, "loop-flatten", "Flattens loops",
+ false, false)
+
+FunctionPass *llvm::createLoopFlattenPass() { return new LoopFlattenLegacyPass(); }
+
+bool LoopFlattenLegacyPass::runOnFunction(Function &F) {
+ ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
+ auto &TTIP = getAnalysis<TargetTransformInfoWrapperPass>();
+ auto *TTI = &TTIP.getTTI(F);
+ auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ return Flatten(DT, LI, SE, AC, TTI);
+}
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopFuse.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopFuse.cpp
index b5f8dfa9aa..8131b7060a 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopFuse.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopFuse.cpp
@@ -46,7 +46,7 @@
#include "llvm/Transforms/Scalar/LoopFuse.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/DependenceAnalysis.h"
#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/LoopInfo.h"
@@ -54,7 +54,7 @@
#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Verifier.h"
#include "llvm/InitializePasses.h"
@@ -66,7 +66,7 @@
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/CodeMoverUtils.h"
-#include "llvm/Transforms/Utils/LoopPeel.h"
+#include "llvm/Transforms/Utils/LoopPeel.h"
using namespace llvm;
@@ -117,11 +117,11 @@ static cl::opt<FusionDependenceAnalysisChoice> FusionDependenceAnalysis(
"Use all available analyses")),
cl::Hidden, cl::init(FUSION_DEPENDENCE_ANALYSIS_ALL), cl::ZeroOrMore);
-static cl::opt<unsigned> FusionPeelMaxCount(
- "loop-fusion-peel-max-count", cl::init(0), cl::Hidden,
- cl::desc("Max number of iterations to be peeled from a loop, such that "
- "fusion can take place"));
-
+static cl::opt<unsigned> FusionPeelMaxCount(
+ "loop-fusion-peel-max-count", cl::init(0), cl::Hidden,
+ cl::desc("Max number of iterations to be peeled from a loop, such that "
+ "fusion can take place"));
+
#ifndef NDEBUG
static cl::opt<bool>
VerboseFusionDebugging("loop-fusion-verbose-debug",
@@ -165,12 +165,12 @@ struct FusionCandidate {
bool Valid;
/// Guard branch of the loop, if it exists
BranchInst *GuardBranch;
- /// Peeling Paramaters of the Loop.
- TTI::PeelingPreferences PP;
- /// Can you Peel this Loop?
- bool AbleToPeel;
- /// Has this loop been Peeled
- bool Peeled;
+ /// Peeling Paramaters of the Loop.
+ TTI::PeelingPreferences PP;
+ /// Can you Peel this Loop?
+ bool AbleToPeel;
+ /// Has this loop been Peeled
+ bool Peeled;
/// Dominator and PostDominator trees are needed for the
/// FusionCandidateCompare function, required by FusionCandidateSet to
@@ -182,13 +182,13 @@ struct FusionCandidate {
OptimizationRemarkEmitter &ORE;
FusionCandidate(Loop *L, const DominatorTree *DT,
- const PostDominatorTree *PDT, OptimizationRemarkEmitter &ORE,
- TTI::PeelingPreferences PP)
+ const PostDominatorTree *PDT, OptimizationRemarkEmitter &ORE,
+ TTI::PeelingPreferences PP)
: Preheader(L->getLoopPreheader()), Header(L->getHeader()),
ExitingBlock(L->getExitingBlock()), ExitBlock(L->getExitBlock()),
Latch(L->getLoopLatch()), L(L), Valid(true),
- GuardBranch(L->getLoopGuardBranch()), PP(PP), AbleToPeel(canPeel(L)),
- Peeled(false), DT(DT), PDT(PDT), ORE(ORE) {
+ GuardBranch(L->getLoopGuardBranch()), PP(PP), AbleToPeel(canPeel(L)),
+ Peeled(false), DT(DT), PDT(PDT), ORE(ORE) {
// Walk over all blocks in the loop and check for conditions that may
// prevent fusion. For each block, walk over all instructions and collect
@@ -259,17 +259,17 @@ struct FusionCandidate {
return Preheader;
}
- /// After Peeling the loop is modified quite a bit, hence all of the Blocks
- /// need to be updated accordingly.
- void updateAfterPeeling() {
- Preheader = L->getLoopPreheader();
- Header = L->getHeader();
- ExitingBlock = L->getExitingBlock();
- ExitBlock = L->getExitBlock();
- Latch = L->getLoopLatch();
- verify();
- }
-
+ /// After Peeling the loop is modified quite a bit, hence all of the Blocks
+ /// need to be updated accordingly.
+ void updateAfterPeeling() {
+ Preheader = L->getLoopPreheader();
+ Header = L->getHeader();
+ ExitingBlock = L->getExitingBlock();
+ ExitBlock = L->getExitBlock();
+ Latch = L->getLoopLatch();
+ verify();
+ }
+
/// Given a guarded loop, get the successor of the guard that is not in the
/// loop.
///
@@ -281,8 +281,8 @@ struct FusionCandidate {
assert(GuardBranch && "Only valid on guarded loops.");
assert(GuardBranch->isConditional() &&
"Expecting guard to be a conditional branch.");
- if (Peeled)
- return GuardBranch->getSuccessor(1);
+ if (Peeled)
+ return GuardBranch->getSuccessor(1);
return (GuardBranch->getSuccessor(0) == Preheader)
? GuardBranch->getSuccessor(1)
: GuardBranch->getSuccessor(0);
@@ -544,17 +544,17 @@ private:
ScalarEvolution &SE;
PostDominatorTree &PDT;
OptimizationRemarkEmitter &ORE;
- AssumptionCache &AC;
-
- const TargetTransformInfo &TTI;
+ AssumptionCache &AC;
+ const TargetTransformInfo &TTI;
+
public:
LoopFuser(LoopInfo &LI, DominatorTree &DT, DependenceInfo &DI,
ScalarEvolution &SE, PostDominatorTree &PDT,
- OptimizationRemarkEmitter &ORE, const DataLayout &DL,
- AssumptionCache &AC, const TargetTransformInfo &TTI)
+ OptimizationRemarkEmitter &ORE, const DataLayout &DL,
+ AssumptionCache &AC, const TargetTransformInfo &TTI)
: LDT(LI), DTU(DT, PDT, DomTreeUpdater::UpdateStrategy::Lazy), LI(LI),
- DT(DT), DI(DI), SE(SE), PDT(PDT), ORE(ORE), AC(AC), TTI(TTI) {}
+ DT(DT), DI(DI), SE(SE), PDT(PDT), ORE(ORE), AC(AC), TTI(TTI) {}
/// This is the main entry point for loop fusion. It will traverse the
/// specified function and collect candidate loops to fuse, starting at the
@@ -639,9 +639,9 @@ private:
/// Flow Equivalent sets, sorted by dominance.
void collectFusionCandidates(const LoopVector &LV) {
for (Loop *L : LV) {
- TTI::PeelingPreferences PP =
- gatherPeelingPreferences(L, SE, TTI, None, None);
- FusionCandidate CurrCand(L, &DT, &PDT, ORE, PP);
+ TTI::PeelingPreferences PP =
+ gatherPeelingPreferences(L, SE, TTI, None, None);
+ FusionCandidate CurrCand(L, &DT, &PDT, ORE, PP);
if (!CurrCand.isEligibleForFusion(SE))
continue;
@@ -691,135 +691,135 @@ private:
/// Determine if two fusion candidates have the same trip count (i.e., they
/// execute the same number of iterations).
///
- /// This function will return a pair of values. The first is a boolean,
- /// stating whether or not the two candidates are known at compile time to
- /// have the same TripCount. The second is the difference in the two
- /// TripCounts. This information can be used later to determine whether or not
- /// peeling can be performed on either one of the candiates.
- std::pair<bool, Optional<unsigned>>
- haveIdenticalTripCounts(const FusionCandidate &FC0,
- const FusionCandidate &FC1) const {
-
+ /// This function will return a pair of values. The first is a boolean,
+ /// stating whether or not the two candidates are known at compile time to
+ /// have the same TripCount. The second is the difference in the two
+ /// TripCounts. This information can be used later to determine whether or not
+ /// peeling can be performed on either one of the candiates.
+ std::pair<bool, Optional<unsigned>>
+ haveIdenticalTripCounts(const FusionCandidate &FC0,
+ const FusionCandidate &FC1) const {
+
const SCEV *TripCount0 = SE.getBackedgeTakenCount(FC0.L);
if (isa<SCEVCouldNotCompute>(TripCount0)) {
UncomputableTripCount++;
LLVM_DEBUG(dbgs() << "Trip count of first loop could not be computed!");
- return {false, None};
+ return {false, None};
}
const SCEV *TripCount1 = SE.getBackedgeTakenCount(FC1.L);
if (isa<SCEVCouldNotCompute>(TripCount1)) {
UncomputableTripCount++;
LLVM_DEBUG(dbgs() << "Trip count of second loop could not be computed!");
- return {false, None};
+ return {false, None};
}
-
+
LLVM_DEBUG(dbgs() << "\tTrip counts: " << *TripCount0 << " & "
<< *TripCount1 << " are "
<< (TripCount0 == TripCount1 ? "identical" : "different")
<< "\n");
- if (TripCount0 == TripCount1)
- return {true, 0};
-
- LLVM_DEBUG(dbgs() << "The loops do not have the same tripcount, "
- "determining the difference between trip counts\n");
-
- // Currently only considering loops with a single exit point
- // and a non-constant trip count.
- const unsigned TC0 = SE.getSmallConstantTripCount(FC0.L);
- const unsigned TC1 = SE.getSmallConstantTripCount(FC1.L);
-
- // If any of the tripcounts are zero that means that loop(s) do not have
- // a single exit or a constant tripcount.
- if (TC0 == 0 || TC1 == 0) {
- LLVM_DEBUG(dbgs() << "Loop(s) do not have a single exit point or do not "
- "have a constant number of iterations. Peeling "
- "is not benefical\n");
- return {false, None};
- }
-
- Optional<unsigned> Difference = None;
- int Diff = TC0 - TC1;
-
- if (Diff > 0)
- Difference = Diff;
- else {
- LLVM_DEBUG(
- dbgs() << "Difference is less than 0. FC1 (second loop) has more "
- "iterations than the first one. Currently not supported\n");
- }
-
- LLVM_DEBUG(dbgs() << "Difference in loop trip count is: " << Difference
- << "\n");
-
- return {false, Difference};
- }
-
- void peelFusionCandidate(FusionCandidate &FC0, const FusionCandidate &FC1,
- unsigned PeelCount) {
- assert(FC0.AbleToPeel && "Should be able to peel loop");
-
- LLVM_DEBUG(dbgs() << "Attempting to peel first " << PeelCount
- << " iterations of the first loop. \n");
-
- FC0.Peeled = peelLoop(FC0.L, PeelCount, &LI, &SE, &DT, &AC, true);
- if (FC0.Peeled) {
- LLVM_DEBUG(dbgs() << "Done Peeling\n");
-
-#ifndef NDEBUG
- auto IdenticalTripCount = haveIdenticalTripCounts(FC0, FC1);
-
- assert(IdenticalTripCount.first && *IdenticalTripCount.second == 0 &&
- "Loops should have identical trip counts after peeling");
-#endif
-
- FC0.PP.PeelCount += PeelCount;
-
- // Peeling does not update the PDT
- PDT.recalculate(*FC0.Preheader->getParent());
-
- FC0.updateAfterPeeling();
-
- // In this case the iterations of the loop are constant, so the first
- // loop will execute completely (will not jump from one of
- // the peeled blocks to the second loop). Here we are updating the
- // branch conditions of each of the peeled blocks, such that it will
- // branch to its successor which is not the preheader of the second loop
- // in the case of unguarded loops, or the succesors of the exit block of
- // the first loop otherwise. Doing this update will ensure that the entry
- // block of the first loop dominates the entry block of the second loop.
- BasicBlock *BB =
- FC0.GuardBranch ? FC0.ExitBlock->getUniqueSuccessor() : FC1.Preheader;
- if (BB) {
- SmallVector<DominatorTree::UpdateType, 8> TreeUpdates;
- SmallVector<Instruction *, 8> WorkList;
- for (BasicBlock *Pred : predecessors(BB)) {
- if (Pred != FC0.ExitBlock) {
- WorkList.emplace_back(Pred->getTerminator());
- TreeUpdates.emplace_back(
- DominatorTree::UpdateType(DominatorTree::Delete, Pred, BB));
- }
- }
- // Cannot modify the predecessors inside the above loop as it will cause
- // the iterators to be nullptrs, causing memory errors.
- for (Instruction *CurrentBranch: WorkList) {
- BasicBlock *Succ = CurrentBranch->getSuccessor(0);
- if (Succ == BB)
- Succ = CurrentBranch->getSuccessor(1);
- ReplaceInstWithInst(CurrentBranch, BranchInst::Create(Succ));
- }
-
- DTU.applyUpdates(TreeUpdates);
- DTU.flush();
- }
- LLVM_DEBUG(
- dbgs() << "Sucessfully peeled " << FC0.PP.PeelCount
- << " iterations from the first loop.\n"
- "Both Loops have the same number of iterations now.\n");
- }
+ if (TripCount0 == TripCount1)
+ return {true, 0};
+
+ LLVM_DEBUG(dbgs() << "The loops do not have the same tripcount, "
+ "determining the difference between trip counts\n");
+
+ // Currently only considering loops with a single exit point
+ // and a non-constant trip count.
+ const unsigned TC0 = SE.getSmallConstantTripCount(FC0.L);
+ const unsigned TC1 = SE.getSmallConstantTripCount(FC1.L);
+
+ // If any of the tripcounts are zero that means that loop(s) do not have
+ // a single exit or a constant tripcount.
+ if (TC0 == 0 || TC1 == 0) {
+ LLVM_DEBUG(dbgs() << "Loop(s) do not have a single exit point or do not "
+ "have a constant number of iterations. Peeling "
+ "is not benefical\n");
+ return {false, None};
+ }
+
+ Optional<unsigned> Difference = None;
+ int Diff = TC0 - TC1;
+
+ if (Diff > 0)
+ Difference = Diff;
+ else {
+ LLVM_DEBUG(
+ dbgs() << "Difference is less than 0. FC1 (second loop) has more "
+ "iterations than the first one. Currently not supported\n");
+ }
+
+ LLVM_DEBUG(dbgs() << "Difference in loop trip count is: " << Difference
+ << "\n");
+
+ return {false, Difference};
}
+ void peelFusionCandidate(FusionCandidate &FC0, const FusionCandidate &FC1,
+ unsigned PeelCount) {
+ assert(FC0.AbleToPeel && "Should be able to peel loop");
+
+ LLVM_DEBUG(dbgs() << "Attempting to peel first " << PeelCount
+ << " iterations of the first loop. \n");
+
+ FC0.Peeled = peelLoop(FC0.L, PeelCount, &LI, &SE, &DT, &AC, true);
+ if (FC0.Peeled) {
+ LLVM_DEBUG(dbgs() << "Done Peeling\n");
+
+#ifndef NDEBUG
+ auto IdenticalTripCount = haveIdenticalTripCounts(FC0, FC1);
+
+ assert(IdenticalTripCount.first && *IdenticalTripCount.second == 0 &&
+ "Loops should have identical trip counts after peeling");
+#endif
+
+ FC0.PP.PeelCount += PeelCount;
+
+ // Peeling does not update the PDT
+ PDT.recalculate(*FC0.Preheader->getParent());
+
+ FC0.updateAfterPeeling();
+
+ // In this case the iterations of the loop are constant, so the first
+ // loop will execute completely (will not jump from one of
+ // the peeled blocks to the second loop). Here we are updating the
+ // branch conditions of each of the peeled blocks, such that it will
+ // branch to its successor which is not the preheader of the second loop
+ // in the case of unguarded loops, or the succesors of the exit block of
+ // the first loop otherwise. Doing this update will ensure that the entry
+ // block of the first loop dominates the entry block of the second loop.
+ BasicBlock *BB =
+ FC0.GuardBranch ? FC0.ExitBlock->getUniqueSuccessor() : FC1.Preheader;
+ if (BB) {
+ SmallVector<DominatorTree::UpdateType, 8> TreeUpdates;
+ SmallVector<Instruction *, 8> WorkList;
+ for (BasicBlock *Pred : predecessors(BB)) {
+ if (Pred != FC0.ExitBlock) {
+ WorkList.emplace_back(Pred->getTerminator());
+ TreeUpdates.emplace_back(
+ DominatorTree::UpdateType(DominatorTree::Delete, Pred, BB));
+ }
+ }
+ // Cannot modify the predecessors inside the above loop as it will cause
+ // the iterators to be nullptrs, causing memory errors.
+ for (Instruction *CurrentBranch: WorkList) {
+ BasicBlock *Succ = CurrentBranch->getSuccessor(0);
+ if (Succ == BB)
+ Succ = CurrentBranch->getSuccessor(1);
+ ReplaceInstWithInst(CurrentBranch, BranchInst::Create(Succ));
+ }
+
+ DTU.applyUpdates(TreeUpdates);
+ DTU.flush();
+ }
+ LLVM_DEBUG(
+ dbgs() << "Sucessfully peeled " << FC0.PP.PeelCount
+ << " iterations from the first loop.\n"
+ "Both Loops have the same number of iterations now.\n");
+ }
+ }
+
/// Walk each set of control flow equivalent fusion candidates and attempt to
/// fuse them. This does a single linear traversal of all candidates in the
/// set. The conditions for legal fusion are checked at this point. If a pair
@@ -851,32 +851,32 @@ private:
FC0->verify();
FC1->verify();
- // Check if the candidates have identical tripcounts (first value of
- // pair), and if not check the difference in the tripcounts between
- // the loops (second value of pair). The difference is not equal to
- // None iff the loops iterate a constant number of times, and have a
- // single exit.
- std::pair<bool, Optional<unsigned>> IdenticalTripCountRes =
- haveIdenticalTripCounts(*FC0, *FC1);
- bool SameTripCount = IdenticalTripCountRes.first;
- Optional<unsigned> TCDifference = IdenticalTripCountRes.second;
-
- // Here we are checking that FC0 (the first loop) can be peeled, and
- // both loops have different tripcounts.
- if (FC0->AbleToPeel && !SameTripCount && TCDifference) {
- if (*TCDifference > FusionPeelMaxCount) {
- LLVM_DEBUG(dbgs()
- << "Difference in loop trip counts: " << *TCDifference
- << " is greater than maximum peel count specificed: "
- << FusionPeelMaxCount << "\n");
- } else {
- // Dependent on peeling being performed on the first loop, and
- // assuming all other conditions for fusion return true.
- SameTripCount = true;
- }
- }
-
- if (!SameTripCount) {
+ // Check if the candidates have identical tripcounts (first value of
+ // pair), and if not check the difference in the tripcounts between
+ // the loops (second value of pair). The difference is not equal to
+ // None iff the loops iterate a constant number of times, and have a
+ // single exit.
+ std::pair<bool, Optional<unsigned>> IdenticalTripCountRes =
+ haveIdenticalTripCounts(*FC0, *FC1);
+ bool SameTripCount = IdenticalTripCountRes.first;
+ Optional<unsigned> TCDifference = IdenticalTripCountRes.second;
+
+ // Here we are checking that FC0 (the first loop) can be peeled, and
+ // both loops have different tripcounts.
+ if (FC0->AbleToPeel && !SameTripCount && TCDifference) {
+ if (*TCDifference > FusionPeelMaxCount) {
+ LLVM_DEBUG(dbgs()
+ << "Difference in loop trip counts: " << *TCDifference
+ << " is greater than maximum peel count specificed: "
+ << FusionPeelMaxCount << "\n");
+ } else {
+ // Dependent on peeling being performed on the first loop, and
+ // assuming all other conditions for fusion return true.
+ SameTripCount = true;
+ }
+ }
+
+ if (!SameTripCount) {
LLVM_DEBUG(dbgs() << "Fusion candidates do not have identical trip "
"counts. Not fusing.\n");
reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
@@ -894,7 +894,7 @@ private:
// Ensure that FC0 and FC1 have identical guards.
// If one (or both) are not guarded, this check is not necessary.
if (FC0->GuardBranch && FC1->GuardBranch &&
- !haveIdenticalGuards(*FC0, *FC1) && !TCDifference) {
+ !haveIdenticalGuards(*FC0, *FC1) && !TCDifference) {
LLVM_DEBUG(dbgs() << "Fusion candidates do not have identical "
"guards. Not Fusing.\n");
reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
@@ -963,23 +963,23 @@ private:
LLVM_DEBUG(dbgs() << "\tFusion is performed: " << *FC0 << " and "
<< *FC1 << "\n");
- FusionCandidate FC0Copy = *FC0;
- // Peel the loop after determining that fusion is legal. The Loops
- // will still be safe to fuse after the peeling is performed.
- bool Peel = TCDifference && *TCDifference > 0;
- if (Peel)
- peelFusionCandidate(FC0Copy, *FC1, *TCDifference);
-
+ FusionCandidate FC0Copy = *FC0;
+ // Peel the loop after determining that fusion is legal. The Loops
+ // will still be safe to fuse after the peeling is performed.
+ bool Peel = TCDifference && *TCDifference > 0;
+ if (Peel)
+ peelFusionCandidate(FC0Copy, *FC1, *TCDifference);
+
// Report fusion to the Optimization Remarks.
// Note this needs to be done *before* performFusion because
// performFusion will change the original loops, making it not
// possible to identify them after fusion is complete.
- reportLoopFusion<OptimizationRemark>((Peel ? FC0Copy : *FC0), *FC1,
- FuseCounter);
+ reportLoopFusion<OptimizationRemark>((Peel ? FC0Copy : *FC0), *FC1,
+ FuseCounter);
- FusionCandidate FusedCand(
- performFusion((Peel ? FC0Copy : *FC0), *FC1), &DT, &PDT, ORE,
- FC0Copy.PP);
+ FusionCandidate FusedCand(
+ performFusion((Peel ? FC0Copy : *FC0), *FC1), &DT, &PDT, ORE,
+ FC0Copy.PP);
FusedCand.verify();
assert(FusedCand.isEligibleForFusion(SE) &&
"Fused candidate should be eligible for fusion!");
@@ -1256,17 +1256,17 @@ private:
return (FC1.GuardBranch->getSuccessor(1) == FC1.Preheader);
}
- /// Modify the latch branch of FC to be unconditional since successors of the
- /// branch are the same.
+ /// Modify the latch branch of FC to be unconditional since successors of the
+ /// branch are the same.
void simplifyLatchBranch(const FusionCandidate &FC) const {
BranchInst *FCLatchBranch = dyn_cast<BranchInst>(FC.Latch->getTerminator());
if (FCLatchBranch) {
assert(FCLatchBranch->isConditional() &&
FCLatchBranch->getSuccessor(0) == FCLatchBranch->getSuccessor(1) &&
"Expecting the two successors of FCLatchBranch to be the same");
- BranchInst *NewBranch =
- BranchInst::Create(FCLatchBranch->getSuccessor(0));
- ReplaceInstWithInst(FCLatchBranch, NewBranch);
+ BranchInst *NewBranch =
+ BranchInst::Create(FCLatchBranch->getSuccessor(0));
+ ReplaceInstWithInst(FCLatchBranch, NewBranch);
}
}
@@ -1326,8 +1326,8 @@ private:
if (FC0.GuardBranch)
return fuseGuardedLoops(FC0, FC1);
- assert(FC1.Preheader ==
- (FC0.Peeled ? FC0.ExitBlock->getUniqueSuccessor() : FC0.ExitBlock));
+ assert(FC1.Preheader ==
+ (FC0.Peeled ? FC0.ExitBlock->getUniqueSuccessor() : FC0.ExitBlock));
assert(FC1.Preheader->size() == 1 &&
FC1.Preheader->getSingleSuccessor() == FC1.Header);
@@ -1369,30 +1369,30 @@ private:
// to FC1.Header? I think this is basically what the three sequences are
// trying to accomplish; however, doing this directly in the CFG may mean
// the DT/PDT becomes invalid
- if (!FC0.Peeled) {
- FC0.ExitingBlock->getTerminator()->replaceUsesOfWith(FC1.Preheader,
- FC1.Header);
- TreeUpdates.emplace_back(DominatorTree::UpdateType(
- DominatorTree::Delete, FC0.ExitingBlock, FC1.Preheader));
- TreeUpdates.emplace_back(DominatorTree::UpdateType(
- DominatorTree::Insert, FC0.ExitingBlock, FC1.Header));
- } else {
- TreeUpdates.emplace_back(DominatorTree::UpdateType(
- DominatorTree::Delete, FC0.ExitBlock, FC1.Preheader));
-
- // Remove the ExitBlock of the first Loop (also not needed)
- FC0.ExitingBlock->getTerminator()->replaceUsesOfWith(FC0.ExitBlock,
- FC1.Header);
- TreeUpdates.emplace_back(DominatorTree::UpdateType(
- DominatorTree::Delete, FC0.ExitingBlock, FC0.ExitBlock));
- FC0.ExitBlock->getTerminator()->eraseFromParent();
- TreeUpdates.emplace_back(DominatorTree::UpdateType(
- DominatorTree::Insert, FC0.ExitingBlock, FC1.Header));
- new UnreachableInst(FC0.ExitBlock->getContext(), FC0.ExitBlock);
- }
-
+ if (!FC0.Peeled) {
+ FC0.ExitingBlock->getTerminator()->replaceUsesOfWith(FC1.Preheader,
+ FC1.Header);
+ TreeUpdates.emplace_back(DominatorTree::UpdateType(
+ DominatorTree::Delete, FC0.ExitingBlock, FC1.Preheader));
+ TreeUpdates.emplace_back(DominatorTree::UpdateType(
+ DominatorTree::Insert, FC0.ExitingBlock, FC1.Header));
+ } else {
+ TreeUpdates.emplace_back(DominatorTree::UpdateType(
+ DominatorTree::Delete, FC0.ExitBlock, FC1.Preheader));
+
+ // Remove the ExitBlock of the first Loop (also not needed)
+ FC0.ExitingBlock->getTerminator()->replaceUsesOfWith(FC0.ExitBlock,
+ FC1.Header);
+ TreeUpdates.emplace_back(DominatorTree::UpdateType(
+ DominatorTree::Delete, FC0.ExitingBlock, FC0.ExitBlock));
+ FC0.ExitBlock->getTerminator()->eraseFromParent();
+ TreeUpdates.emplace_back(DominatorTree::UpdateType(
+ DominatorTree::Insert, FC0.ExitingBlock, FC1.Header));
+ new UnreachableInst(FC0.ExitBlock->getContext(), FC0.ExitBlock);
+ }
+
// The pre-header of L1 is not necessary anymore.
- assert(pred_empty(FC1.Preheader));
+ assert(pred_empty(FC1.Preheader));
FC1.Preheader->getTerminator()->eraseFromParent();
new UnreachableInst(FC1.Preheader->getContext(), FC1.Preheader);
TreeUpdates.emplace_back(DominatorTree::UpdateType(
@@ -1433,7 +1433,7 @@ private:
FC0.Latch->getTerminator()->replaceUsesOfWith(FC0.Header, FC1.Header);
FC1.Latch->getTerminator()->replaceUsesOfWith(FC1.Header, FC0.Header);
- // Modify the latch branch of FC0 to be unconditional as both successors of
+ // Modify the latch branch of FC0 to be unconditional as both successors of
// the branch are the same.
simplifyLatchBranch(FC0);
@@ -1455,11 +1455,11 @@ private:
LI.removeBlock(FC1.Preheader);
DTU.deleteBB(FC1.Preheader);
- if (FC0.Peeled) {
- LI.removeBlock(FC0.ExitBlock);
- DTU.deleteBB(FC0.ExitBlock);
- }
-
+ if (FC0.Peeled) {
+ LI.removeBlock(FC0.ExitBlock);
+ DTU.deleteBB(FC0.ExitBlock);
+ }
+
DTU.flush();
// Is there a way to keep SE up-to-date so we don't need to forget the loops
@@ -1474,7 +1474,7 @@ private:
mergeLatch(FC0, FC1);
// Merge the loops.
- SmallVector<BasicBlock *, 8> Blocks(FC1.L->blocks());
+ SmallVector<BasicBlock *, 8> Blocks(FC1.L->blocks());
for (BasicBlock *BB : Blocks) {
FC0.L->addBlockEntry(BB);
FC1.L->removeBlockFromLoop(BB);
@@ -1482,7 +1482,7 @@ private:
continue;
LI.changeLoopFor(BB, FC0.L);
}
- while (!FC1.L->isInnermost()) {
+ while (!FC1.L->isInnermost()) {
const auto &ChildLoopIt = FC1.L->begin();
Loop *ChildLoop = *ChildLoopIt;
FC1.L->removeChildLoop(ChildLoopIt);
@@ -1555,15 +1555,15 @@ private:
BasicBlock *FC1GuardBlock = FC1.GuardBranch->getParent();
BasicBlock *FC0NonLoopBlock = FC0.getNonLoopBlock();
BasicBlock *FC1NonLoopBlock = FC1.getNonLoopBlock();
- BasicBlock *FC0ExitBlockSuccessor = FC0.ExitBlock->getUniqueSuccessor();
+ BasicBlock *FC0ExitBlockSuccessor = FC0.ExitBlock->getUniqueSuccessor();
// Move instructions from the exit block of FC0 to the beginning of the exit
- // block of FC1, in the case that the FC0 loop has not been peeled. In the
- // case that FC0 loop is peeled, then move the instructions of the successor
- // of the FC0 Exit block to the beginning of the exit block of FC1.
- moveInstructionsToTheBeginning(
- (FC0.Peeled ? *FC0ExitBlockSuccessor : *FC0.ExitBlock), *FC1.ExitBlock,
- DT, PDT, DI);
+ // block of FC1, in the case that the FC0 loop has not been peeled. In the
+ // case that FC0 loop is peeled, then move the instructions of the successor
+ // of the FC0 Exit block to the beginning of the exit block of FC1.
+ moveInstructionsToTheBeginning(
+ (FC0.Peeled ? *FC0ExitBlockSuccessor : *FC0.ExitBlock), *FC1.ExitBlock,
+ DT, PDT, DI);
// Move instructions from the guard block of FC1 to the end of the guard
// block of FC0.
@@ -1584,9 +1584,9 @@ private:
FC1NonLoopBlock->replacePhiUsesWith(FC1GuardBlock, FC0GuardBlock);
FC0.GuardBranch->replaceUsesOfWith(FC0NonLoopBlock, FC1NonLoopBlock);
- BasicBlock *BBToUpdate = FC0.Peeled ? FC0ExitBlockSuccessor : FC0.ExitBlock;
- BBToUpdate->getTerminator()->replaceUsesOfWith(FC1GuardBlock, FC1.Header);
-
+ BasicBlock *BBToUpdate = FC0.Peeled ? FC0ExitBlockSuccessor : FC0.ExitBlock;
+ BBToUpdate->getTerminator()->replaceUsesOfWith(FC1GuardBlock, FC1.Header);
+
// The guard of FC1 is not necessary anymore.
FC1.GuardBranch->eraseFromParent();
new UnreachableInst(FC1GuardBlock->getContext(), FC1GuardBlock);
@@ -1600,18 +1600,18 @@ private:
TreeUpdates.emplace_back(DominatorTree::UpdateType(
DominatorTree::Insert, FC0GuardBlock, FC1NonLoopBlock));
- if (FC0.Peeled) {
- // Remove the Block after the ExitBlock of FC0
- TreeUpdates.emplace_back(DominatorTree::UpdateType(
- DominatorTree::Delete, FC0ExitBlockSuccessor, FC1GuardBlock));
- FC0ExitBlockSuccessor->getTerminator()->eraseFromParent();
- new UnreachableInst(FC0ExitBlockSuccessor->getContext(),
- FC0ExitBlockSuccessor);
- }
-
- assert(pred_empty(FC1GuardBlock) &&
+ if (FC0.Peeled) {
+ // Remove the Block after the ExitBlock of FC0
+ TreeUpdates.emplace_back(DominatorTree::UpdateType(
+ DominatorTree::Delete, FC0ExitBlockSuccessor, FC1GuardBlock));
+ FC0ExitBlockSuccessor->getTerminator()->eraseFromParent();
+ new UnreachableInst(FC0ExitBlockSuccessor->getContext(),
+ FC0ExitBlockSuccessor);
+ }
+
+ assert(pred_empty(FC1GuardBlock) &&
"Expecting guard block to have no predecessors");
- assert(succ_empty(FC1GuardBlock) &&
+ assert(succ_empty(FC1GuardBlock) &&
"Expecting guard block to have no successors");
// Remember the phi nodes originally in the header of FC0 in order to rewire
@@ -1665,13 +1665,13 @@ private:
// TODO: In the future, we can handle non-empty exit blocks my merging any
// instructions from FC0 exit block into FC1 exit block prior to removing
// the block.
- assert(pred_empty(FC0.ExitBlock) && "Expecting exit block to be empty");
+ assert(pred_empty(FC0.ExitBlock) && "Expecting exit block to be empty");
FC0.ExitBlock->getTerminator()->eraseFromParent();
new UnreachableInst(FC0.ExitBlock->getContext(), FC0.ExitBlock);
// Remove FC1 Preheader
// The pre-header of L1 is not necessary anymore.
- assert(pred_empty(FC1.Preheader));
+ assert(pred_empty(FC1.Preheader));
FC1.Preheader->getTerminator()->eraseFromParent();
new UnreachableInst(FC1.Preheader->getContext(), FC1.Preheader);
TreeUpdates.emplace_back(DominatorTree::UpdateType(
@@ -1714,7 +1714,7 @@ private:
FC0.Latch->getTerminator()->replaceUsesOfWith(FC0.Header, FC1.Header);
FC1.Latch->getTerminator()->replaceUsesOfWith(FC1.Header, FC0.Header);
- // Modify the latch branch of FC0 to be unconditional as both successors of
+ // Modify the latch branch of FC0 to be unconditional as both successors of
// the branch are the same.
simplifyLatchBranch(FC0);
@@ -1734,8 +1734,8 @@ private:
// All done
// Apply the updates to the Dominator Tree and cleanup.
- assert(succ_empty(FC1GuardBlock) && "FC1GuardBlock has successors!!");
- assert(pred_empty(FC1GuardBlock) && "FC1GuardBlock has predecessors!!");
+ assert(succ_empty(FC1GuardBlock) && "FC1GuardBlock has successors!!");
+ assert(pred_empty(FC1GuardBlock) && "FC1GuardBlock has predecessors!!");
// Update DT/PDT
DTU.applyUpdates(TreeUpdates);
@@ -1743,10 +1743,10 @@ private:
LI.removeBlock(FC1GuardBlock);
LI.removeBlock(FC1.Preheader);
LI.removeBlock(FC0.ExitBlock);
- if (FC0.Peeled) {
- LI.removeBlock(FC0ExitBlockSuccessor);
- DTU.deleteBB(FC0ExitBlockSuccessor);
- }
+ if (FC0.Peeled) {
+ LI.removeBlock(FC0ExitBlockSuccessor);
+ DTU.deleteBB(FC0ExitBlockSuccessor);
+ }
DTU.deleteBB(FC1GuardBlock);
DTU.deleteBB(FC1.Preheader);
DTU.deleteBB(FC0.ExitBlock);
@@ -1764,7 +1764,7 @@ private:
mergeLatch(FC0, FC1);
// Merge the loops.
- SmallVector<BasicBlock *, 8> Blocks(FC1.L->blocks());
+ SmallVector<BasicBlock *, 8> Blocks(FC1.L->blocks());
for (BasicBlock *BB : Blocks) {
FC0.L->addBlockEntry(BB);
FC1.L->removeBlockFromLoop(BB);
@@ -1772,7 +1772,7 @@ private:
continue;
LI.changeLoopFor(BB, FC0.L);
}
- while (!FC1.L->isInnermost()) {
+ while (!FC1.L->isInnermost()) {
const auto &ChildLoopIt = FC1.L->begin();
Loop *ChildLoop = *ChildLoopIt;
FC1.L->removeChildLoop(ChildLoopIt);
@@ -1812,8 +1812,8 @@ struct LoopFuseLegacy : public FunctionPass {
AU.addRequired<PostDominatorTreeWrapperPass>();
AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
AU.addRequired<DependenceAnalysisWrapperPass>();
- AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addPreserved<ScalarEvolutionWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
@@ -1830,12 +1830,12 @@ struct LoopFuseLegacy : public FunctionPass {
auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
- auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- const TargetTransformInfo &TTI =
- getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- const DataLayout &DL = F.getParent()->getDataLayout();
+ auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ const TargetTransformInfo &TTI =
+ getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ const DataLayout &DL = F.getParent()->getDataLayout();
- LoopFuser LF(LI, DT, DI, SE, PDT, ORE, DL, AC, TTI);
+ LoopFuser LF(LI, DT, DI, SE, PDT, ORE, DL, AC, TTI);
return LF.fuseLoops(F);
}
};
@@ -1848,11 +1848,11 @@ PreservedAnalyses LoopFusePass::run(Function &F, FunctionAnalysisManager &AM) {
auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
- auto &AC = AM.getResult<AssumptionAnalysis>(F);
- const TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F);
- const DataLayout &DL = F.getParent()->getDataLayout();
+ auto &AC = AM.getResult<AssumptionAnalysis>(F);
+ const TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F);
+ const DataLayout &DL = F.getParent()->getDataLayout();
- LoopFuser LF(LI, DT, DI, SE, PDT, ORE, DL, AC, TTI);
+ LoopFuser LF(LI, DT, DI, SE, PDT, ORE, DL, AC, TTI);
bool Changed = LF.fuseLoops(F);
if (!Changed)
return PreservedAnalyses::all();
@@ -1875,8 +1875,8 @@ INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DependenceAnalysisWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(LoopFuseLegacy, "loop-fusion", "Loop Fusion", false, false)
FunctionPass *llvm::createLoopFusePass() { return new LoopFuseLegacy(); }
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 8064c02e2b..e60c95b7be 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -47,7 +47,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/CmpInstAnalysis.h"
+#include "llvm/Analysis/CmpInstAnalysis.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
@@ -80,7 +80,7 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
@@ -108,33 +108,33 @@ using namespace llvm;
STATISTIC(NumMemSet, "Number of memset's formed from loop stores");
STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");
-STATISTIC(
- NumShiftUntilBitTest,
- "Number of uncountable loops recognized as 'shift until bitttest' idiom");
-
-bool DisableLIRP::All;
-static cl::opt<bool, true>
- DisableLIRPAll("disable-" DEBUG_TYPE "-all",
- cl::desc("Options to disable Loop Idiom Recognize Pass."),
- cl::location(DisableLIRP::All), cl::init(false),
- cl::ReallyHidden);
-
-bool DisableLIRP::Memset;
-static cl::opt<bool, true>
- DisableLIRPMemset("disable-" DEBUG_TYPE "-memset",
- cl::desc("Proceed with loop idiom recognize pass, but do "
- "not convert loop(s) to memset."),
- cl::location(DisableLIRP::Memset), cl::init(false),
- cl::ReallyHidden);
-
-bool DisableLIRP::Memcpy;
-static cl::opt<bool, true>
- DisableLIRPMemcpy("disable-" DEBUG_TYPE "-memcpy",
- cl::desc("Proceed with loop idiom recognize pass, but do "
- "not convert loop(s) to memcpy."),
- cl::location(DisableLIRP::Memcpy), cl::init(false),
- cl::ReallyHidden);
-
+STATISTIC(
+ NumShiftUntilBitTest,
+ "Number of uncountable loops recognized as 'shift until bitttest' idiom");
+
+bool DisableLIRP::All;
+static cl::opt<bool, true>
+ DisableLIRPAll("disable-" DEBUG_TYPE "-all",
+ cl::desc("Options to disable Loop Idiom Recognize Pass."),
+ cl::location(DisableLIRP::All), cl::init(false),
+ cl::ReallyHidden);
+
+bool DisableLIRP::Memset;
+static cl::opt<bool, true>
+ DisableLIRPMemset("disable-" DEBUG_TYPE "-memset",
+ cl::desc("Proceed with loop idiom recognize pass, but do "
+ "not convert loop(s) to memset."),
+ cl::location(DisableLIRP::Memset), cl::init(false),
+ cl::ReallyHidden);
+
+bool DisableLIRP::Memcpy;
+static cl::opt<bool, true>
+ DisableLIRPMemcpy("disable-" DEBUG_TYPE "-memcpy",
+ cl::desc("Proceed with loop idiom recognize pass, but do "
+ "not convert loop(s) to memcpy."),
+ cl::location(DisableLIRP::Memcpy), cl::init(false),
+ cl::ReallyHidden);
+
static cl::opt<bool> UseLIRCodeSizeHeurs(
"use-lir-code-size-heurs",
cl::desc("Use loop idiom recognition code size heuristics when compiling"
@@ -232,8 +232,8 @@ private:
const DebugLoc &DL, bool ZeroCheck,
bool IsCntPhiUsedOutsideLoop);
- bool recognizeShiftUntilBitTest();
-
+ bool recognizeShiftUntilBitTest();
+
/// @}
};
@@ -247,9 +247,9 @@ public:
}
bool runOnLoop(Loop *L, LPPassManager &LPM) override {
- if (DisableLIRP::All)
- return false;
-
+ if (DisableLIRP::All)
+ return false;
+
if (skipLoop(L))
return false;
@@ -295,9 +295,9 @@ char LoopIdiomRecognizeLegacyPass::ID = 0;
PreservedAnalyses LoopIdiomRecognizePass::run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR,
LPMUpdater &) {
- if (DisableLIRP::All)
- return PreservedAnalyses::all();
-
+ if (DisableLIRP::All)
+ return PreservedAnalyses::all();
+
const auto *DL = &L.getHeader()->getModule()->getDataLayout();
// For the new PM, we also can't use OptimizationRemarkEmitter as an analysis
@@ -469,17 +469,17 @@ LoopIdiomRecognize::isLegalStore(StoreInst *SI) {
Value *StoredVal = SI->getValueOperand();
Value *StorePtr = SI->getPointerOperand();
- // Don't convert stores of non-integral pointer types to memsets (which stores
- // integers).
- if (DL->isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
- return LegalStoreKind::None;
-
+ // Don't convert stores of non-integral pointer types to memsets (which stores
+ // integers).
+ if (DL->isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
+ return LegalStoreKind::None;
+
// Reject stores that are so large that they overflow an unsigned.
- // When storing out scalable vectors we bail out for now, since the code
- // below currently only works for constant strides.
- TypeSize SizeInBits = DL->getTypeSizeInBits(StoredVal->getType());
- if (SizeInBits.isScalable() || (SizeInBits.getFixedSize() & 7) ||
- (SizeInBits.getFixedSize() >> 32) != 0)
+ // When storing out scalable vectors we bail out for now, since the code
+ // below currently only works for constant strides.
+ TypeSize SizeInBits = DL->getTypeSizeInBits(StoredVal->getType());
+ if (SizeInBits.isScalable() || (SizeInBits.getFixedSize() & 7) ||
+ (SizeInBits.getFixedSize() >> 32) != 0)
return LegalStoreKind::None;
// See if the pointer expression is an AddRec like {base,+,1} on the current
@@ -508,13 +508,13 @@ LoopIdiomRecognize::isLegalStore(StoreInst *SI) {
// If we're allowed to form a memset, and the stored value would be
// acceptable for memset, use it.
- if (!UnorderedAtomic && HasMemset && SplatValue && !DisableLIRP::Memset &&
+ if (!UnorderedAtomic && HasMemset && SplatValue && !DisableLIRP::Memset &&
// Verify that the stored value is loop invariant. If not, we can't
// promote the memset.
CurLoop->isLoopInvariant(SplatValue)) {
// It looks like we can use SplatValue.
return LegalStoreKind::Memset;
- } else if (!UnorderedAtomic && HasMemsetPattern && !DisableLIRP::Memset &&
+ } else if (!UnorderedAtomic && HasMemsetPattern && !DisableLIRP::Memset &&
// Don't create memset_pattern16s with address spaces.
StorePtr->getType()->getPointerAddressSpace() == 0 &&
(PatternValue = getMemSetPatternValue(StoredVal, DL))) {
@@ -523,7 +523,7 @@ LoopIdiomRecognize::isLegalStore(StoreInst *SI) {
}
// Otherwise, see if the store can be turned into a memcpy.
- if (HasMemcpy && !DisableLIRP::Memcpy) {
+ if (HasMemcpy && !DisableLIRP::Memcpy) {
// Check to see if the stride matches the size of the store. If so, then we
// know that every byte is touched in the loop.
APInt Stride = getStoreStride(StoreEv);
@@ -578,12 +578,12 @@ void LoopIdiomRecognize::collectStores(BasicBlock *BB) {
break;
case LegalStoreKind::Memset: {
// Find the base pointer.
- Value *Ptr = getUnderlyingObject(SI->getPointerOperand());
+ Value *Ptr = getUnderlyingObject(SI->getPointerOperand());
StoreRefsForMemset[Ptr].push_back(SI);
} break;
case LegalStoreKind::MemsetPattern: {
// Find the base pointer.
- Value *Ptr = getUnderlyingObject(SI->getPointerOperand());
+ Value *Ptr = getUnderlyingObject(SI->getPointerOperand());
StoreRefsForMemsetPattern[Ptr].push_back(SI);
} break;
case LegalStoreKind::Memcpy:
@@ -851,7 +851,7 @@ mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
// Get the location that may be stored across the loop. Since the access is
// strided positively through memory, we say that the modified location starts
// at the pointer and has infinite size.
- LocationSize AccessSize = LocationSize::afterPointer();
+ LocationSize AccessSize = LocationSize::afterPointer();
// If the loop iterates a fixed number of times, we can refine the access size
// to be exactly the size of the memset, which is (BECount+1)*StoreSize
@@ -903,8 +903,8 @@ static const SCEV *getNumBytes(const SCEV *BECount, Type *IntPtr,
// If we're going to need to zero extend the BE count, check if we can add
// one to it prior to zero extending without overflow. Provided this is safe,
// it allows better simplification of the +1.
- if (DL->getTypeSizeInBits(BECount->getType()).getFixedSize() <
- DL->getTypeSizeInBits(IntPtr).getFixedSize() &&
+ if (DL->getTypeSizeInBits(BECount->getType()).getFixedSize() <
+ DL->getTypeSizeInBits(IntPtr).getFixedSize() &&
SE->isLoopEntryGuardedByCond(
CurLoop, ICmpInst::ICMP_NE, BECount,
SE->getNegativeSCEV(SE->getOne(BECount->getType())))) {
@@ -947,12 +947,12 @@ bool LoopIdiomRecognize::processLoopStridedStore(
BasicBlock *Preheader = CurLoop->getLoopPreheader();
IRBuilder<> Builder(Preheader->getTerminator());
SCEVExpander Expander(*SE, *DL, "loop-idiom");
- SCEVExpanderCleaner ExpCleaner(Expander, *DT);
+ SCEVExpanderCleaner ExpCleaner(Expander, *DT);
Type *DestInt8PtrTy = Builder.getInt8PtrTy(DestAS);
Type *IntIdxTy = DL->getIndexType(DestPtr->getType());
- bool Changed = false;
+ bool Changed = false;
const SCEV *Start = Ev->getStart();
// Handle negative strided loops.
if (NegStride)
@@ -961,7 +961,7 @@ bool LoopIdiomRecognize::processLoopStridedStore(
// TODO: ideally we should still be able to generate memset if SCEV expander
// is taught to generate the dependencies at the latest point.
if (!isSafeToExpand(Start, *SE))
- return Changed;
+ return Changed;
// Okay, we have a strided store "p[i]" of a splattable value. We can turn
// this into a memset in the loop preheader now if we want. However, this
@@ -970,22 +970,22 @@ bool LoopIdiomRecognize::processLoopStridedStore(
// base pointer and checking the region.
Value *BasePtr =
Expander.expandCodeFor(Start, DestInt8PtrTy, Preheader->getTerminator());
-
- // From here on out, conservatively report to the pass manager that we've
- // changed the IR, even if we later clean up these added instructions. There
- // may be structural differences e.g. in the order of use lists not accounted
- // for in just a textual dump of the IR. This is written as a variable, even
- // though statically all the places this dominates could be replaced with
- // 'true', with the hope that anyone trying to be clever / "more precise" with
- // the return value will read this comment, and leave them alone.
- Changed = true;
-
+
+ // From here on out, conservatively report to the pass manager that we've
+ // changed the IR, even if we later clean up these added instructions. There
+ // may be structural differences e.g. in the order of use lists not accounted
+ // for in just a textual dump of the IR. This is written as a variable, even
+ // though statically all the places this dominates could be replaced with
+ // 'true', with the hope that anyone trying to be clever / "more precise" with
+ // the return value will read this comment, and leave them alone.
+ Changed = true;
+
if (mayLoopAccessLocation(BasePtr, ModRefInfo::ModRef, CurLoop, BECount,
- StoreSize, *AA, Stores))
- return Changed;
+ StoreSize, *AA, Stores))
+ return Changed;
if (avoidLIRForMultiBlockLoop(/*IsMemset=*/true, IsLoopMemset))
- return Changed;
+ return Changed;
// Okay, everything looks good, insert the memset.
@@ -995,7 +995,7 @@ bool LoopIdiomRecognize::processLoopStridedStore(
// TODO: ideally we should still be able to generate memset if SCEV expander
// is taught to generate the dependencies at the latest point.
if (!isSafeToExpand(NumBytesS, *SE))
- return Changed;
+ return Changed;
Value *NumBytes =
Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->getTerminator());
@@ -1054,7 +1054,7 @@ bool LoopIdiomRecognize::processLoopStridedStore(
if (MSSAU && VerifyMemorySSA)
MSSAU->getMemorySSA()->verifyMemorySSA();
++NumMemSet;
- ExpCleaner.markResultUsed();
+ ExpCleaner.markResultUsed();
return true;
}
@@ -1088,9 +1088,9 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
IRBuilder<> Builder(Preheader->getTerminator());
SCEVExpander Expander(*SE, *DL, "loop-idiom");
- SCEVExpanderCleaner ExpCleaner(Expander, *DT);
+ SCEVExpanderCleaner ExpCleaner(Expander, *DT);
- bool Changed = false;
+ bool Changed = false;
const SCEV *StrStart = StoreEv->getStart();
unsigned StrAS = SI->getPointerAddressSpace();
Type *IntIdxTy = Builder.getIntNTy(DL->getIndexSizeInBits(StrAS));
@@ -1108,20 +1108,20 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
Value *StoreBasePtr = Expander.expandCodeFor(
StrStart, Builder.getInt8PtrTy(StrAS), Preheader->getTerminator());
- // From here on out, conservatively report to the pass manager that we've
- // changed the IR, even if we later clean up these added instructions. There
- // may be structural differences e.g. in the order of use lists not accounted
- // for in just a textual dump of the IR. This is written as a variable, even
- // though statically all the places this dominates could be replaced with
- // 'true', with the hope that anyone trying to be clever / "more precise" with
- // the return value will read this comment, and leave them alone.
- Changed = true;
-
+ // From here on out, conservatively report to the pass manager that we've
+ // changed the IR, even if we later clean up these added instructions. There
+ // may be structural differences e.g. in the order of use lists not accounted
+ // for in just a textual dump of the IR. This is written as a variable, even
+ // though statically all the places this dominates could be replaced with
+ // 'true', with the hope that anyone trying to be clever / "more precise" with
+ // the return value will read this comment, and leave them alone.
+ Changed = true;
+
SmallPtrSet<Instruction *, 1> Stores;
Stores.insert(SI);
if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount,
StoreSize, *AA, Stores))
- return Changed;
+ return Changed;
const SCEV *LdStart = LoadEv->getStart();
unsigned LdAS = LI->getPointerAddressSpace();
@@ -1137,10 +1137,10 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
if (mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount,
StoreSize, *AA, Stores))
- return Changed;
+ return Changed;
if (avoidLIRForMultiBlockLoop())
- return Changed;
+ return Changed;
// Okay, everything is safe, we can transform this!
@@ -1163,14 +1163,14 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
const Align StoreAlign = SI->getAlign();
const Align LoadAlign = LI->getAlign();
if (StoreAlign < StoreSize || LoadAlign < StoreSize)
- return Changed;
+ return Changed;
// If the element.atomic memcpy is not lowered into explicit
// loads/stores later, then it will be lowered into an element-size
// specific lib call. If the lib call doesn't exist for our store size, then
// we shouldn't generate the memcpy.
if (StoreSize > TTI->getAtomicMemIntrinsicMaxElementSize())
- return Changed;
+ return Changed;
// Create the call.
// Note that unordered atomic loads/stores are *required* by the spec to
@@ -1208,7 +1208,7 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
if (MSSAU && VerifyMemorySSA)
MSSAU->getMemorySSA()->verifyMemorySSA();
++NumMemCpy;
- ExpCleaner.markResultUsed();
+ ExpCleaner.markResultUsed();
return true;
}
@@ -1218,7 +1218,7 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
bool LoopIdiomRecognize::avoidLIRForMultiBlockLoop(bool IsMemset,
bool IsLoopMemset) {
if (ApplyCodeSizeHeuristics && CurLoop->getNumBlocks() > 1) {
- if (CurLoop->isOutermost() && (!IsMemset || !IsLoopMemset)) {
+ if (CurLoop->isOutermost() && (!IsMemset || !IsLoopMemset)) {
LLVM_DEBUG(dbgs() << " " << CurLoop->getHeader()->getParent()->getName()
<< " : LIR " << (IsMemset ? "Memset" : "Memcpy")
<< " avoided: multi-block top-level loop\n");
@@ -1235,8 +1235,8 @@ bool LoopIdiomRecognize::runOnNoncountableLoop() {
<< "] Noncountable Loop %"
<< CurLoop->getHeader()->getName() << "\n");
- return recognizePopcount() || recognizeAndInsertFFS() ||
- recognizeShiftUntilBitTest();
+ return recognizePopcount() || recognizeAndInsertFFS() ||
+ recognizeShiftUntilBitTest();
}
/// Check if the given conditional branch is based on the comparison between
@@ -1483,7 +1483,7 @@ static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL,
return false;
// step 4: Find the instruction which count the CTLZ: cnt.next = cnt + 1
- // or cnt.next = cnt + -1.
+ // or cnt.next = cnt + -1.
// TODO: We can skip the step. If loop trip count is known (CTLZ),
// then all uses of "cnt.next" could be optimized to the trip count
// plus "cnt0". Currently it is not optimized.
@@ -1497,7 +1497,7 @@ static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL,
continue;
ConstantInt *Inc = dyn_cast<ConstantInt>(Inst->getOperand(1));
- if (!Inc || (!Inc->isOne() && !Inc->isMinusOne()))
+ if (!Inc || (!Inc->isOne() && !Inc->isMinusOne()))
continue;
PHINode *Phi = getRecurrenceVar(Inst->getOperand(0), Inst, LoopEntry);
@@ -1728,11 +1728,11 @@ void LoopIdiomRecognize::transformLoopToCountable(
Builder.SetCurrentDebugLocation(DL);
// Count = BitWidth - CTLZ(InitX);
- // NewCount = Count;
+ // NewCount = Count;
// If there are uses of CntPhi create:
- // NewCount = BitWidth - CTLZ(InitX >> 1);
- // Count = NewCount + 1;
- Value *InitXNext;
+ // NewCount = BitWidth - CTLZ(InitX >> 1);
+ // Count = NewCount + 1;
+ Value *InitXNext;
if (IsCntPhiUsedOutsideLoop) {
if (DefX->getOpcode() == Instruction::AShr)
InitXNext =
@@ -1747,31 +1747,31 @@ void LoopIdiomRecognize::transformLoopToCountable(
llvm_unreachable("Unexpected opcode!");
} else
InitXNext = InitX;
- Value *FFS = createFFSIntrinsic(Builder, InitXNext, DL, ZeroCheck, IntrinID);
- Value *Count = Builder.CreateSub(
- ConstantInt::get(FFS->getType(), FFS->getType()->getIntegerBitWidth()),
+ Value *FFS = createFFSIntrinsic(Builder, InitXNext, DL, ZeroCheck, IntrinID);
+ Value *Count = Builder.CreateSub(
+ ConstantInt::get(FFS->getType(), FFS->getType()->getIntegerBitWidth()),
FFS);
- Value *NewCount = Count;
+ Value *NewCount = Count;
if (IsCntPhiUsedOutsideLoop) {
- NewCount = Count;
- Count = Builder.CreateAdd(Count, ConstantInt::get(Count->getType(), 1));
+ NewCount = Count;
+ Count = Builder.CreateAdd(Count, ConstantInt::get(Count->getType(), 1));
}
- NewCount = Builder.CreateZExtOrTrunc(NewCount,
- cast<IntegerType>(CntInst->getType()));
+ NewCount = Builder.CreateZExtOrTrunc(NewCount,
+ cast<IntegerType>(CntInst->getType()));
Value *CntInitVal = CntPhi->getIncomingValueForBlock(Preheader);
- if (cast<ConstantInt>(CntInst->getOperand(1))->isOne()) {
- // If the counter was being incremented in the loop, add NewCount to the
- // counter's initial value, but only if the initial value is not zero.
- ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
- if (!InitConst || !InitConst->isZero())
- NewCount = Builder.CreateAdd(NewCount, CntInitVal);
- } else {
- // If the count was being decremented in the loop, subtract NewCount from
- // the counter's initial value.
- NewCount = Builder.CreateSub(CntInitVal, NewCount);
- }
+ if (cast<ConstantInt>(CntInst->getOperand(1))->isOne()) {
+ // If the counter was being incremented in the loop, add NewCount to the
+ // counter's initial value, but only if the initial value is not zero.
+ ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
+ if (!InitConst || !InitConst->isZero())
+ NewCount = Builder.CreateAdd(NewCount, CntInitVal);
+ } else {
+ // If the count was being decremented in the loop, subtract NewCount from
+ // the counter's initial value.
+ NewCount = Builder.CreateSub(CntInitVal, NewCount);
+ }
// Step 2: Insert new IV and loop condition:
// loop:
@@ -1919,343 +1919,343 @@ void LoopIdiomRecognize::transformLoopToPopcount(BasicBlock *PreCondBB,
// loop. The loop would otherwise not be deleted even if it becomes empty.
SE->forgetLoop(CurLoop);
}
-
-/// Match loop-invariant value.
-template <typename SubPattern_t> struct match_LoopInvariant {
- SubPattern_t SubPattern;
- const Loop *L;
-
- match_LoopInvariant(const SubPattern_t &SP, const Loop *L)
- : SubPattern(SP), L(L) {}
-
- template <typename ITy> bool match(ITy *V) {
- return L->isLoopInvariant(V) && SubPattern.match(V);
- }
-};
-
-/// Matches if the value is loop-invariant.
-template <typename Ty>
-inline match_LoopInvariant<Ty> m_LoopInvariant(const Ty &M, const Loop *L) {
- return match_LoopInvariant<Ty>(M, L);
-}
-
-/// Return true if the idiom is detected in the loop.
-///
-/// The core idiom we are trying to detect is:
-/// \code
-/// entry:
-/// <...>
-/// %bitmask = shl i32 1, %bitpos
-/// br label %loop
-///
-/// loop:
-/// %x.curr = phi i32 [ %x, %entry ], [ %x.next, %loop ]
-/// %x.curr.bitmasked = and i32 %x.curr, %bitmask
-/// %x.curr.isbitunset = icmp eq i32 %x.curr.bitmasked, 0
-/// %x.next = shl i32 %x.curr, 1
-/// <...>
-/// br i1 %x.curr.isbitunset, label %loop, label %end
-///
-/// end:
-/// %x.curr.res = phi i32 [ %x.curr, %loop ] <...>
-/// %x.next.res = phi i32 [ %x.next, %loop ] <...>
-/// <...>
-/// \endcode
-static bool detectShiftUntilBitTestIdiom(Loop *CurLoop, Value *&BaseX,
- Value *&BitMask, Value *&BitPos,
- Value *&CurrX, Instruction *&NextX) {
- LLVM_DEBUG(dbgs() << DEBUG_TYPE
- " Performing shift-until-bittest idiom detection.\n");
-
- // Give up if the loop has multiple blocks or multiple backedges.
- if (CurLoop->getNumBlocks() != 1 || CurLoop->getNumBackEdges() != 1) {
- LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad block/backedge count.\n");
- return false;
- }
-
- BasicBlock *LoopHeaderBB = CurLoop->getHeader();
- BasicBlock *LoopPreheaderBB = CurLoop->getLoopPreheader();
- assert(LoopPreheaderBB && "There is always a loop preheader.");
-
- using namespace PatternMatch;
-
- // Step 1: Check if the loop backedge is in desirable form.
-
- ICmpInst::Predicate Pred;
- Value *CmpLHS, *CmpRHS;
- BasicBlock *TrueBB, *FalseBB;
- if (!match(LoopHeaderBB->getTerminator(),
- m_Br(m_ICmp(Pred, m_Value(CmpLHS), m_Value(CmpRHS)),
- m_BasicBlock(TrueBB), m_BasicBlock(FalseBB)))) {
- LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge structure.\n");
- return false;
- }
-
- // Step 2: Check if the backedge's condition is in desirable form.
-
- auto MatchVariableBitMask = [&]() {
- return ICmpInst::isEquality(Pred) && match(CmpRHS, m_Zero()) &&
- match(CmpLHS,
- m_c_And(m_Value(CurrX),
- m_CombineAnd(
- m_Value(BitMask),
- m_LoopInvariant(m_Shl(m_One(), m_Value(BitPos)),
- CurLoop))));
- };
- auto MatchConstantBitMask = [&]() {
- return ICmpInst::isEquality(Pred) && match(CmpRHS, m_Zero()) &&
- match(CmpLHS, m_And(m_Value(CurrX),
- m_CombineAnd(m_Value(BitMask), m_Power2()))) &&
- (BitPos = ConstantExpr::getExactLogBase2(cast<Constant>(BitMask)));
- };
- auto MatchDecomposableConstantBitMask = [&]() {
- APInt Mask;
- return llvm::decomposeBitTestICmp(CmpLHS, CmpRHS, Pred, CurrX, Mask) &&
- ICmpInst::isEquality(Pred) && Mask.isPowerOf2() &&
- (BitMask = ConstantInt::get(CurrX->getType(), Mask)) &&
- (BitPos = ConstantInt::get(CurrX->getType(), Mask.logBase2()));
- };
-
- if (!MatchVariableBitMask() && !MatchConstantBitMask() &&
- !MatchDecomposableConstantBitMask()) {
- LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge comparison.\n");
- return false;
- }
-
- // Step 3: Check if the recurrence is in desirable form.
- auto *CurrXPN = dyn_cast<PHINode>(CurrX);
- if (!CurrXPN || CurrXPN->getParent() != LoopHeaderBB) {
- LLVM_DEBUG(dbgs() << DEBUG_TYPE " Not an expected PHI node.\n");
- return false;
- }
-
- BaseX = CurrXPN->getIncomingValueForBlock(LoopPreheaderBB);
- NextX =
- dyn_cast<Instruction>(CurrXPN->getIncomingValueForBlock(LoopHeaderBB));
-
- if (!NextX || !match(NextX, m_Shl(m_Specific(CurrX), m_One()))) {
- // FIXME: support right-shift?
- LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad recurrence.\n");
- return false;
- }
-
- // Step 4: Check if the backedge's destinations are in desirable form.
-
- assert(ICmpInst::isEquality(Pred) &&
- "Should only get equality predicates here.");
-
- // cmp-br is commutative, so canonicalize to a single variant.
- if (Pred != ICmpInst::Predicate::ICMP_EQ) {
- Pred = ICmpInst::getInversePredicate(Pred);
- std::swap(TrueBB, FalseBB);
- }
-
- // We expect to exit loop when comparison yields false,
- // so when it yields true we should branch back to loop header.
- if (TrueBB != LoopHeaderBB) {
- LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge flow.\n");
- return false;
- }
-
- // Okay, idiom checks out.
- return true;
-}
-
-/// Look for the following loop:
-/// \code
-/// entry:
-/// <...>
-/// %bitmask = shl i32 1, %bitpos
-/// br label %loop
-///
-/// loop:
-/// %x.curr = phi i32 [ %x, %entry ], [ %x.next, %loop ]
-/// %x.curr.bitmasked = and i32 %x.curr, %bitmask
-/// %x.curr.isbitunset = icmp eq i32 %x.curr.bitmasked, 0
-/// %x.next = shl i32 %x.curr, 1
-/// <...>
-/// br i1 %x.curr.isbitunset, label %loop, label %end
-///
-/// end:
-/// %x.curr.res = phi i32 [ %x.curr, %loop ] <...>
-/// %x.next.res = phi i32 [ %x.next, %loop ] <...>
-/// <...>
-/// \endcode
-///
-/// And transform it into:
-/// \code
-/// entry:
-/// %bitmask = shl i32 1, %bitpos
-/// %lowbitmask = add i32 %bitmask, -1
-/// %mask = or i32 %lowbitmask, %bitmask
-/// %x.masked = and i32 %x, %mask
-/// %x.masked.numleadingzeros = call i32 @llvm.ctlz.i32(i32 %x.masked,
-/// i1 true)
-/// %x.masked.numactivebits = sub i32 32, %x.masked.numleadingzeros
-/// %x.masked.leadingonepos = add i32 %x.masked.numactivebits, -1
-/// %backedgetakencount = sub i32 %bitpos, %x.masked.leadingonepos
-/// %tripcount = add i32 %backedgetakencount, 1
-/// %x.curr = shl i32 %x, %backedgetakencount
-/// %x.next = shl i32 %x, %tripcount
-/// br label %loop
-///
-/// loop:
-/// %loop.iv = phi i32 [ 0, %entry ], [ %loop.iv.next, %loop ]
-/// %loop.iv.next = add nuw i32 %loop.iv, 1
-/// %loop.ivcheck = icmp eq i32 %loop.iv.next, %tripcount
-/// <...>
-/// br i1 %loop.ivcheck, label %end, label %loop
-///
-/// end:
-/// %x.curr.res = phi i32 [ %x.curr, %loop ] <...>
-/// %x.next.res = phi i32 [ %x.next, %loop ] <...>
-/// <...>
-/// \endcode
-bool LoopIdiomRecognize::recognizeShiftUntilBitTest() {
- bool MadeChange = false;
-
- Value *X, *BitMask, *BitPos, *XCurr;
- Instruction *XNext;
- if (!detectShiftUntilBitTestIdiom(CurLoop, X, BitMask, BitPos, XCurr,
- XNext)) {
- LLVM_DEBUG(dbgs() << DEBUG_TYPE
- " shift-until-bittest idiom detection failed.\n");
- return MadeChange;
- }
- LLVM_DEBUG(dbgs() << DEBUG_TYPE " shift-until-bittest idiom detected!\n");
-
- // Ok, it is the idiom we were looking for, we *could* transform this loop,
- // but is it profitable to transform?
-
- BasicBlock *LoopHeaderBB = CurLoop->getHeader();
- BasicBlock *LoopPreheaderBB = CurLoop->getLoopPreheader();
- assert(LoopPreheaderBB && "There is always a loop preheader.");
-
- BasicBlock *SuccessorBB = CurLoop->getExitBlock();
- assert(LoopPreheaderBB && "There is only a single successor.");
-
- IRBuilder<> Builder(LoopPreheaderBB->getTerminator());
- Builder.SetCurrentDebugLocation(cast<Instruction>(XCurr)->getDebugLoc());
-
- Intrinsic::ID IntrID = Intrinsic::ctlz;
- Type *Ty = X->getType();
-
- TargetTransformInfo::TargetCostKind CostKind =
- TargetTransformInfo::TCK_SizeAndLatency;
-
- // The rewrite is considered to be unprofitable iff and only iff the
- // intrinsic/shift we'll use are not cheap. Note that we are okay with *just*
- // making the loop countable, even if nothing else changes.
- IntrinsicCostAttributes Attrs(
- IntrID, Ty, {UndefValue::get(Ty), /*is_zero_undef=*/Builder.getTrue()});
- int Cost = TTI->getIntrinsicInstrCost(Attrs, CostKind);
- if (Cost > TargetTransformInfo::TCC_Basic) {
- LLVM_DEBUG(dbgs() << DEBUG_TYPE
- " Intrinsic is too costly, not beneficial\n");
- return MadeChange;
- }
- if (TTI->getArithmeticInstrCost(Instruction::Shl, Ty, CostKind) >
- TargetTransformInfo::TCC_Basic) {
- LLVM_DEBUG(dbgs() << DEBUG_TYPE " Shift is too costly, not beneficial\n");
- return MadeChange;
- }
-
- // Ok, transform appears worthwhile.
- MadeChange = true;
-
- // Step 1: Compute the loop trip count.
-
- Value *LowBitMask = Builder.CreateAdd(BitMask, Constant::getAllOnesValue(Ty),
- BitPos->getName() + ".lowbitmask");
- Value *Mask =
- Builder.CreateOr(LowBitMask, BitMask, BitPos->getName() + ".mask");
- Value *XMasked = Builder.CreateAnd(X, Mask, X->getName() + ".masked");
- CallInst *XMaskedNumLeadingZeros = Builder.CreateIntrinsic(
- IntrID, Ty, {XMasked, /*is_zero_undef=*/Builder.getTrue()},
- /*FMFSource=*/nullptr, XMasked->getName() + ".numleadingzeros");
- Value *XMaskedNumActiveBits = Builder.CreateSub(
- ConstantInt::get(Ty, Ty->getScalarSizeInBits()), XMaskedNumLeadingZeros,
- XMasked->getName() + ".numactivebits");
- Value *XMaskedLeadingOnePos =
- Builder.CreateAdd(XMaskedNumActiveBits, Constant::getAllOnesValue(Ty),
- XMasked->getName() + ".leadingonepos");
-
- Value *LoopBackedgeTakenCount = Builder.CreateSub(
- BitPos, XMaskedLeadingOnePos, CurLoop->getName() + ".backedgetakencount");
- // We know loop's backedge-taken count, but what's loop's trip count?
- // Note that while NUW is always safe, while NSW is only for bitwidths != 2.
- Value *LoopTripCount =
- Builder.CreateNUWAdd(LoopBackedgeTakenCount, ConstantInt::get(Ty, 1),
- CurLoop->getName() + ".tripcount");
-
- // Step 2: Compute the recurrence's final value without a loop.
-
- // NewX is always safe to compute, because `LoopBackedgeTakenCount`
- // will always be smaller than `bitwidth(X)`, i.e. we never get poison.
- Value *NewX = Builder.CreateShl(X, LoopBackedgeTakenCount);
- NewX->takeName(XCurr);
- if (auto *I = dyn_cast<Instruction>(NewX))
- I->copyIRFlags(XNext, /*IncludeWrapFlags=*/true);
-
- Value *NewXNext;
- // Rewriting XNext is more complicated, however, because `X << LoopTripCount`
- // will be poison iff `LoopTripCount == bitwidth(X)` (which will happen
- // iff `BitPos` is `bitwidth(x) - 1` and `X` is `1`). So unless we know
- // that isn't the case, we'll need to emit an alternative, safe IR.
- if (XNext->hasNoSignedWrap() || XNext->hasNoUnsignedWrap() ||
- PatternMatch::match(
- BitPos, PatternMatch::m_SpecificInt_ICMP(
- ICmpInst::ICMP_NE, APInt(Ty->getScalarSizeInBits(),
- Ty->getScalarSizeInBits() - 1))))
- NewXNext = Builder.CreateShl(X, LoopTripCount);
- else {
- // Otherwise, just additionally shift by one. It's the smallest solution,
- // alternatively, we could check that NewX is INT_MIN (or BitPos is )
- // and select 0 instead.
- NewXNext = Builder.CreateShl(NewX, ConstantInt::get(Ty, 1));
- }
-
- NewXNext->takeName(XNext);
- if (auto *I = dyn_cast<Instruction>(NewXNext))
- I->copyIRFlags(XNext, /*IncludeWrapFlags=*/true);
-
- // Step 3: Adjust the successor basic block to recieve the computed
- // recurrence's final value instead of the recurrence itself.
-
- XCurr->replaceUsesOutsideBlock(NewX, LoopHeaderBB);
- XNext->replaceUsesOutsideBlock(NewXNext, LoopHeaderBB);
-
- // Step 4: Rewrite the loop into a countable form, with canonical IV.
-
- // The new canonical induction variable.
- Builder.SetInsertPoint(&LoopHeaderBB->front());
- auto *IV = Builder.CreatePHI(Ty, 2, CurLoop->getName() + ".iv");
-
- // The induction itself.
- // Note that while NUW is always safe, while NSW is only for bitwidths != 2.
- Builder.SetInsertPoint(LoopHeaderBB->getTerminator());
- auto *IVNext = Builder.CreateNUWAdd(IV, ConstantInt::get(Ty, 1),
- IV->getName() + ".next");
-
- // The loop trip count check.
- auto *IVCheck = Builder.CreateICmpEQ(IVNext, LoopTripCount,
- CurLoop->getName() + ".ivcheck");
- Builder.CreateCondBr(IVCheck, SuccessorBB, LoopHeaderBB);
- LoopHeaderBB->getTerminator()->eraseFromParent();
-
- // Populate the IV PHI.
- IV->addIncoming(ConstantInt::get(Ty, 0), LoopPreheaderBB);
- IV->addIncoming(IVNext, LoopHeaderBB);
-
- // Step 5: Forget the "non-computable" trip-count SCEV associated with the
- // loop. The loop would otherwise not be deleted even if it becomes empty.
-
- SE->forgetLoop(CurLoop);
-
- // Other passes will take care of actually deleting the loop if possible.
-
- LLVM_DEBUG(dbgs() << DEBUG_TYPE " shift-until-bittest idiom optimized!\n");
-
- ++NumShiftUntilBitTest;
- return MadeChange;
-}
+
+/// Match loop-invariant value.
+template <typename SubPattern_t> struct match_LoopInvariant {
+ SubPattern_t SubPattern;
+ const Loop *L;
+
+ match_LoopInvariant(const SubPattern_t &SP, const Loop *L)
+ : SubPattern(SP), L(L) {}
+
+ template <typename ITy> bool match(ITy *V) {
+ return L->isLoopInvariant(V) && SubPattern.match(V);
+ }
+};
+
+/// Matches if the value is loop-invariant.
+template <typename Ty>
+inline match_LoopInvariant<Ty> m_LoopInvariant(const Ty &M, const Loop *L) {
+ return match_LoopInvariant<Ty>(M, L);
+}
+
+/// Return true if the idiom is detected in the loop.
+///
+/// The core idiom we are trying to detect is:
+/// \code
+/// entry:
+/// <...>
+/// %bitmask = shl i32 1, %bitpos
+/// br label %loop
+///
+/// loop:
+/// %x.curr = phi i32 [ %x, %entry ], [ %x.next, %loop ]
+/// %x.curr.bitmasked = and i32 %x.curr, %bitmask
+/// %x.curr.isbitunset = icmp eq i32 %x.curr.bitmasked, 0
+/// %x.next = shl i32 %x.curr, 1
+/// <...>
+/// br i1 %x.curr.isbitunset, label %loop, label %end
+///
+/// end:
+/// %x.curr.res = phi i32 [ %x.curr, %loop ] <...>
+/// %x.next.res = phi i32 [ %x.next, %loop ] <...>
+/// <...>
+/// \endcode
+static bool detectShiftUntilBitTestIdiom(Loop *CurLoop, Value *&BaseX,
+ Value *&BitMask, Value *&BitPos,
+ Value *&CurrX, Instruction *&NextX) {
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE
+ " Performing shift-until-bittest idiom detection.\n");
+
+ // Give up if the loop has multiple blocks or multiple backedges.
+ if (CurLoop->getNumBlocks() != 1 || CurLoop->getNumBackEdges() != 1) {
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad block/backedge count.\n");
+ return false;
+ }
+
+ BasicBlock *LoopHeaderBB = CurLoop->getHeader();
+ BasicBlock *LoopPreheaderBB = CurLoop->getLoopPreheader();
+ assert(LoopPreheaderBB && "There is always a loop preheader.");
+
+ using namespace PatternMatch;
+
+ // Step 1: Check if the loop backedge is in desirable form.
+
+ ICmpInst::Predicate Pred;
+ Value *CmpLHS, *CmpRHS;
+ BasicBlock *TrueBB, *FalseBB;
+ if (!match(LoopHeaderBB->getTerminator(),
+ m_Br(m_ICmp(Pred, m_Value(CmpLHS), m_Value(CmpRHS)),
+ m_BasicBlock(TrueBB), m_BasicBlock(FalseBB)))) {
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge structure.\n");
+ return false;
+ }
+
+ // Step 2: Check if the backedge's condition is in desirable form.
+
+ auto MatchVariableBitMask = [&]() {
+ return ICmpInst::isEquality(Pred) && match(CmpRHS, m_Zero()) &&
+ match(CmpLHS,
+ m_c_And(m_Value(CurrX),
+ m_CombineAnd(
+ m_Value(BitMask),
+ m_LoopInvariant(m_Shl(m_One(), m_Value(BitPos)),
+ CurLoop))));
+ };
+ auto MatchConstantBitMask = [&]() {
+ return ICmpInst::isEquality(Pred) && match(CmpRHS, m_Zero()) &&
+ match(CmpLHS, m_And(m_Value(CurrX),
+ m_CombineAnd(m_Value(BitMask), m_Power2()))) &&
+ (BitPos = ConstantExpr::getExactLogBase2(cast<Constant>(BitMask)));
+ };
+ auto MatchDecomposableConstantBitMask = [&]() {
+ APInt Mask;
+ return llvm::decomposeBitTestICmp(CmpLHS, CmpRHS, Pred, CurrX, Mask) &&
+ ICmpInst::isEquality(Pred) && Mask.isPowerOf2() &&
+ (BitMask = ConstantInt::get(CurrX->getType(), Mask)) &&
+ (BitPos = ConstantInt::get(CurrX->getType(), Mask.logBase2()));
+ };
+
+ if (!MatchVariableBitMask() && !MatchConstantBitMask() &&
+ !MatchDecomposableConstantBitMask()) {
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge comparison.\n");
+ return false;
+ }
+
+ // Step 3: Check if the recurrence is in desirable form.
+ auto *CurrXPN = dyn_cast<PHINode>(CurrX);
+ if (!CurrXPN || CurrXPN->getParent() != LoopHeaderBB) {
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE " Not an expected PHI node.\n");
+ return false;
+ }
+
+ BaseX = CurrXPN->getIncomingValueForBlock(LoopPreheaderBB);
+ NextX =
+ dyn_cast<Instruction>(CurrXPN->getIncomingValueForBlock(LoopHeaderBB));
+
+ if (!NextX || !match(NextX, m_Shl(m_Specific(CurrX), m_One()))) {
+ // FIXME: support right-shift?
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad recurrence.\n");
+ return false;
+ }
+
+ // Step 4: Check if the backedge's destinations are in desirable form.
+
+ assert(ICmpInst::isEquality(Pred) &&
+ "Should only get equality predicates here.");
+
+ // cmp-br is commutative, so canonicalize to a single variant.
+ if (Pred != ICmpInst::Predicate::ICMP_EQ) {
+ Pred = ICmpInst::getInversePredicate(Pred);
+ std::swap(TrueBB, FalseBB);
+ }
+
+ // We expect to exit loop when comparison yields false,
+ // so when it yields true we should branch back to loop header.
+ if (TrueBB != LoopHeaderBB) {
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge flow.\n");
+ return false;
+ }
+
+ // Okay, idiom checks out.
+ return true;
+}
+
+/// Look for the following loop:
+/// \code
+/// entry:
+/// <...>
+/// %bitmask = shl i32 1, %bitpos
+/// br label %loop
+///
+/// loop:
+/// %x.curr = phi i32 [ %x, %entry ], [ %x.next, %loop ]
+/// %x.curr.bitmasked = and i32 %x.curr, %bitmask
+/// %x.curr.isbitunset = icmp eq i32 %x.curr.bitmasked, 0
+/// %x.next = shl i32 %x.curr, 1
+/// <...>
+/// br i1 %x.curr.isbitunset, label %loop, label %end
+///
+/// end:
+/// %x.curr.res = phi i32 [ %x.curr, %loop ] <...>
+/// %x.next.res = phi i32 [ %x.next, %loop ] <...>
+/// <...>
+/// \endcode
+///
+/// And transform it into:
+/// \code
+/// entry:
+/// %bitmask = shl i32 1, %bitpos
+/// %lowbitmask = add i32 %bitmask, -1
+/// %mask = or i32 %lowbitmask, %bitmask
+/// %x.masked = and i32 %x, %mask
+/// %x.masked.numleadingzeros = call i32 @llvm.ctlz.i32(i32 %x.masked,
+/// i1 true)
+/// %x.masked.numactivebits = sub i32 32, %x.masked.numleadingzeros
+/// %x.masked.leadingonepos = add i32 %x.masked.numactivebits, -1
+/// %backedgetakencount = sub i32 %bitpos, %x.masked.leadingonepos
+/// %tripcount = add i32 %backedgetakencount, 1
+/// %x.curr = shl i32 %x, %backedgetakencount
+/// %x.next = shl i32 %x, %tripcount
+/// br label %loop
+///
+/// loop:
+/// %loop.iv = phi i32 [ 0, %entry ], [ %loop.iv.next, %loop ]
+/// %loop.iv.next = add nuw i32 %loop.iv, 1
+/// %loop.ivcheck = icmp eq i32 %loop.iv.next, %tripcount
+/// <...>
+/// br i1 %loop.ivcheck, label %end, label %loop
+///
+/// end:
+/// %x.curr.res = phi i32 [ %x.curr, %loop ] <...>
+/// %x.next.res = phi i32 [ %x.next, %loop ] <...>
+/// <...>
+/// \endcode
+bool LoopIdiomRecognize::recognizeShiftUntilBitTest() {
+ bool MadeChange = false;
+
+ Value *X, *BitMask, *BitPos, *XCurr;
+ Instruction *XNext;
+ if (!detectShiftUntilBitTestIdiom(CurLoop, X, BitMask, BitPos, XCurr,
+ XNext)) {
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE
+ " shift-until-bittest idiom detection failed.\n");
+ return MadeChange;
+ }
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE " shift-until-bittest idiom detected!\n");
+
+ // Ok, it is the idiom we were looking for, we *could* transform this loop,
+ // but is it profitable to transform?
+
+ BasicBlock *LoopHeaderBB = CurLoop->getHeader();
+ BasicBlock *LoopPreheaderBB = CurLoop->getLoopPreheader();
+ assert(LoopPreheaderBB && "There is always a loop preheader.");
+
+ BasicBlock *SuccessorBB = CurLoop->getExitBlock();
+ assert(LoopPreheaderBB && "There is only a single successor.");
+
+ IRBuilder<> Builder(LoopPreheaderBB->getTerminator());
+ Builder.SetCurrentDebugLocation(cast<Instruction>(XCurr)->getDebugLoc());
+
+ Intrinsic::ID IntrID = Intrinsic::ctlz;
+ Type *Ty = X->getType();
+
+ TargetTransformInfo::TargetCostKind CostKind =
+ TargetTransformInfo::TCK_SizeAndLatency;
+
+ // The rewrite is considered to be unprofitable iff and only iff the
+ // intrinsic/shift we'll use are not cheap. Note that we are okay with *just*
+ // making the loop countable, even if nothing else changes.
+ IntrinsicCostAttributes Attrs(
+ IntrID, Ty, {UndefValue::get(Ty), /*is_zero_undef=*/Builder.getTrue()});
+ int Cost = TTI->getIntrinsicInstrCost(Attrs, CostKind);
+ if (Cost > TargetTransformInfo::TCC_Basic) {
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE
+ " Intrinsic is too costly, not beneficial\n");
+ return MadeChange;
+ }
+ if (TTI->getArithmeticInstrCost(Instruction::Shl, Ty, CostKind) >
+ TargetTransformInfo::TCC_Basic) {
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE " Shift is too costly, not beneficial\n");
+ return MadeChange;
+ }
+
+ // Ok, transform appears worthwhile.
+ MadeChange = true;
+
+ // Step 1: Compute the loop trip count.
+
+ Value *LowBitMask = Builder.CreateAdd(BitMask, Constant::getAllOnesValue(Ty),
+ BitPos->getName() + ".lowbitmask");
+ Value *Mask =
+ Builder.CreateOr(LowBitMask, BitMask, BitPos->getName() + ".mask");
+ Value *XMasked = Builder.CreateAnd(X, Mask, X->getName() + ".masked");
+ CallInst *XMaskedNumLeadingZeros = Builder.CreateIntrinsic(
+ IntrID, Ty, {XMasked, /*is_zero_undef=*/Builder.getTrue()},
+ /*FMFSource=*/nullptr, XMasked->getName() + ".numleadingzeros");
+ Value *XMaskedNumActiveBits = Builder.CreateSub(
+ ConstantInt::get(Ty, Ty->getScalarSizeInBits()), XMaskedNumLeadingZeros,
+ XMasked->getName() + ".numactivebits");
+ Value *XMaskedLeadingOnePos =
+ Builder.CreateAdd(XMaskedNumActiveBits, Constant::getAllOnesValue(Ty),
+ XMasked->getName() + ".leadingonepos");
+
+ Value *LoopBackedgeTakenCount = Builder.CreateSub(
+ BitPos, XMaskedLeadingOnePos, CurLoop->getName() + ".backedgetakencount");
+ // We know loop's backedge-taken count, but what's loop's trip count?
+ // Note that while NUW is always safe, while NSW is only for bitwidths != 2.
+ Value *LoopTripCount =
+ Builder.CreateNUWAdd(LoopBackedgeTakenCount, ConstantInt::get(Ty, 1),
+ CurLoop->getName() + ".tripcount");
+
+ // Step 2: Compute the recurrence's final value without a loop.
+
+ // NewX is always safe to compute, because `LoopBackedgeTakenCount`
+ // will always be smaller than `bitwidth(X)`, i.e. we never get poison.
+ Value *NewX = Builder.CreateShl(X, LoopBackedgeTakenCount);
+ NewX->takeName(XCurr);
+ if (auto *I = dyn_cast<Instruction>(NewX))
+ I->copyIRFlags(XNext, /*IncludeWrapFlags=*/true);
+
+ Value *NewXNext;
+ // Rewriting XNext is more complicated, however, because `X << LoopTripCount`
+ // will be poison iff `LoopTripCount == bitwidth(X)` (which will happen
+ // iff `BitPos` is `bitwidth(x) - 1` and `X` is `1`). So unless we know
+ // that isn't the case, we'll need to emit an alternative, safe IR.
+ if (XNext->hasNoSignedWrap() || XNext->hasNoUnsignedWrap() ||
+ PatternMatch::match(
+ BitPos, PatternMatch::m_SpecificInt_ICMP(
+ ICmpInst::ICMP_NE, APInt(Ty->getScalarSizeInBits(),
+ Ty->getScalarSizeInBits() - 1))))
+ NewXNext = Builder.CreateShl(X, LoopTripCount);
+ else {
+ // Otherwise, just additionally shift by one. It's the smallest solution,
+ // alternatively, we could check that NewX is INT_MIN (or BitPos is )
+ // and select 0 instead.
+ NewXNext = Builder.CreateShl(NewX, ConstantInt::get(Ty, 1));
+ }
+
+ NewXNext->takeName(XNext);
+ if (auto *I = dyn_cast<Instruction>(NewXNext))
+ I->copyIRFlags(XNext, /*IncludeWrapFlags=*/true);
+
+ // Step 3: Adjust the successor basic block to recieve the computed
+ // recurrence's final value instead of the recurrence itself.
+
+ XCurr->replaceUsesOutsideBlock(NewX, LoopHeaderBB);
+ XNext->replaceUsesOutsideBlock(NewXNext, LoopHeaderBB);
+
+ // Step 4: Rewrite the loop into a countable form, with canonical IV.
+
+ // The new canonical induction variable.
+ Builder.SetInsertPoint(&LoopHeaderBB->front());
+ auto *IV = Builder.CreatePHI(Ty, 2, CurLoop->getName() + ".iv");
+
+ // The induction itself.
+ // Note that while NUW is always safe, while NSW is only for bitwidths != 2.
+ Builder.SetInsertPoint(LoopHeaderBB->getTerminator());
+ auto *IVNext = Builder.CreateNUWAdd(IV, ConstantInt::get(Ty, 1),
+ IV->getName() + ".next");
+
+ // The loop trip count check.
+ auto *IVCheck = Builder.CreateICmpEQ(IVNext, LoopTripCount,
+ CurLoop->getName() + ".ivcheck");
+ Builder.CreateCondBr(IVCheck, SuccessorBB, LoopHeaderBB);
+ LoopHeaderBB->getTerminator()->eraseFromParent();
+
+ // Populate the IV PHI.
+ IV->addIncoming(ConstantInt::get(Ty, 0), LoopPreheaderBB);
+ IV->addIncoming(IVNext, LoopHeaderBB);
+
+ // Step 5: Forget the "non-computable" trip-count SCEV associated with the
+ // loop. The loop would otherwise not be deleted even if it becomes empty.
+
+ SE->forgetLoop(CurLoop);
+
+ // Other passes will take care of actually deleting the loop if possible.
+
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE " shift-until-bittest idiom optimized!\n");
+
+ ++NumShiftUntilBitTest;
+ return MadeChange;
+}
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopInterchange.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopInterchange.cpp
index d9dbc0deb4..4f8809275f 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -12,7 +12,7 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar/LoopInterchange.h"
+#include "llvm/Transforms/Scalar/LoopInterchange.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -28,7 +28,7 @@
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
@@ -429,7 +429,7 @@ private:
const LoopInterchangeLegality &LIL;
};
-struct LoopInterchange {
+struct LoopInterchange {
ScalarEvolution *SE = nullptr;
LoopInfo *LI = nullptr;
DependenceInfo *DI = nullptr;
@@ -438,12 +438,12 @@ struct LoopInterchange {
/// Interface to emit optimization remarks.
OptimizationRemarkEmitter *ORE;
- LoopInterchange(ScalarEvolution *SE, LoopInfo *LI, DependenceInfo *DI,
- DominatorTree *DT, OptimizationRemarkEmitter *ORE)
- : SE(SE), LI(LI), DI(DI), DT(DT), ORE(ORE) {}
+ LoopInterchange(ScalarEvolution *SE, LoopInfo *LI, DependenceInfo *DI,
+ DominatorTree *DT, OptimizationRemarkEmitter *ORE)
+ : SE(SE), LI(LI), DI(DI), DT(DT), ORE(ORE) {}
- bool run(Loop *L) {
- if (L->getParentLoop())
+ bool run(Loop *L) {
+ if (L->getParentLoop())
return false;
return processLoopList(populateWorklist(*L));
@@ -452,7 +452,7 @@ struct LoopInterchange {
bool isComputableLoopNest(LoopVector LoopList) {
for (Loop *L : LoopList) {
const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L);
- if (isa<SCEVCouldNotCompute>(ExitCountOuter)) {
+ if (isa<SCEVCouldNotCompute>(ExitCountOuter)) {
LLVM_DEBUG(dbgs() << "Couldn't compute backedge count\n");
return false;
}
@@ -611,13 +611,13 @@ bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) {
containsUnsafeInstructions(OuterLoopLatch))
return false;
- // Also make sure the inner loop preheader does not contain any unsafe
- // instructions. Note that all instructions in the preheader will be moved to
- // the outer loop header when interchanging.
- if (InnerLoopPreHeader != OuterLoopHeader &&
- containsUnsafeInstructions(InnerLoopPreHeader))
- return false;
-
+ // Also make sure the inner loop preheader does not contain any unsafe
+ // instructions. Note that all instructions in the preheader will be moved to
+ // the outer loop header when interchanging.
+ if (InnerLoopPreHeader != OuterLoopHeader &&
+ containsUnsafeInstructions(InnerLoopPreHeader))
+ return false;
+
LLVM_DEBUG(dbgs() << "Loops are perfectly nested\n");
// We have a perfect loop nest.
return true;
@@ -661,10 +661,10 @@ static Value *followLCSSA(Value *SV) {
// Check V's users to see if it is involved in a reduction in L.
static PHINode *findInnerReductionPhi(Loop *L, Value *V) {
- // Reduction variables cannot be constants.
- if (isa<Constant>(V))
- return nullptr;
-
+ // Reduction variables cannot be constants.
+ if (isa<Constant>(V))
+ return nullptr;
+
for (Value *User : V->users()) {
if (PHINode *PHI = dyn_cast<PHINode>(User)) {
if (PHI->getNumIncomingValues() == 1)
@@ -705,7 +705,7 @@ bool LoopInterchangeLegality::findInductionAndReductions(
Value *V = followLCSSA(PHI.getIncomingValueForBlock(L->getLoopLatch()));
PHINode *InnerRedPhi = findInnerReductionPhi(InnerLoop, V);
if (!InnerRedPhi ||
- !llvm::is_contained(InnerRedPhi->incoming_values(), &PHI)) {
+ !llvm::is_contained(InnerRedPhi->incoming_values(), &PHI)) {
LLVM_DEBUG(
dbgs()
<< "Failed to recognize PHI as an induction or reduction.\n");
@@ -1042,10 +1042,10 @@ int LoopInterchangeProfitability::getInstrOrderCost() {
bool FoundInnerInduction = false;
bool FoundOuterInduction = false;
for (unsigned i = 0; i < NumOp; ++i) {
- // Skip operands that are not SCEV-able.
- if (!SE->isSCEVable(GEP->getOperand(i)->getType()))
- continue;
-
+ // Skip operands that are not SCEV-able.
+ if (!SE->isSCEVable(GEP->getOperand(i)->getType()))
+ continue;
+
const SCEV *OperandVal = SE->getSCEV(GEP->getOperand(i));
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(OperandVal);
if (!AR)
@@ -1190,7 +1190,7 @@ void LoopInterchangeTransform::restructureLoops(
removeChildLoop(NewInner, NewOuter);
LI->changeTopLevelLoop(NewInner, NewOuter);
}
- while (!NewOuter->isInnermost())
+ while (!NewOuter->isInnermost())
NewInner->addChildLoop(NewOuter->removeChildLoop(NewOuter->begin()));
NewOuter->addChildLoop(NewInner);
@@ -1306,21 +1306,21 @@ bool LoopInterchangeTransform::transform() {
LLVM_DEBUG(dbgs() << "splitting InnerLoopHeader done\n");
}
- // Instructions in the original inner loop preheader may depend on values
- // defined in the outer loop header. Move them there, because the original
- // inner loop preheader will become the entry into the interchanged loop nest.
- // Currently we move all instructions and rely on LICM to move invariant
- // instructions outside the loop nest.
- BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
- BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
- if (InnerLoopPreHeader != OuterLoopHeader) {
- SmallPtrSet<Instruction *, 4> NeedsMoving;
- for (Instruction &I :
- make_early_inc_range(make_range(InnerLoopPreHeader->begin(),
- std::prev(InnerLoopPreHeader->end()))))
- I.moveBefore(OuterLoopHeader->getTerminator());
- }
-
+ // Instructions in the original inner loop preheader may depend on values
+ // defined in the outer loop header. Move them there, because the original
+ // inner loop preheader will become the entry into the interchanged loop nest.
+ // Currently we move all instructions and rely on LICM to move invariant
+ // instructions outside the loop nest.
+ BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
+ BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
+ if (InnerLoopPreHeader != OuterLoopHeader) {
+ SmallPtrSet<Instruction *, 4> NeedsMoving;
+ for (Instruction &I :
+ make_early_inc_range(make_range(InnerLoopPreHeader->begin(),
+ std::prev(InnerLoopPreHeader->end()))))
+ I.moveBefore(OuterLoopHeader->getTerminator());
+ }
+
Transformed |= adjustLoopLinks();
if (!Transformed) {
LLVM_DEBUG(dbgs() << "adjustLoopLinks failed\n");
@@ -1537,7 +1537,7 @@ bool LoopInterchangeTransform::adjustLoopBranches() {
InnerLoopPreHeader, DTUpdates, /*MustUpdateOnce=*/false);
// The outer loop header might or might not branch to the outer latch.
// We are guaranteed to branch to the inner loop preheader.
- if (llvm::is_contained(OuterLoopHeaderBI->successors(), OuterLoopLatch))
+ if (llvm::is_contained(OuterLoopHeaderBI->successors(), OuterLoopLatch))
updateSuccessor(OuterLoopHeaderBI, OuterLoopLatch, LoopExit, DTUpdates,
/*MustUpdateOnce=*/false);
updateSuccessor(OuterLoopHeaderBI, InnerLoopPreHeader,
@@ -1584,9 +1584,9 @@ bool LoopInterchangeTransform::adjustLoopBranches() {
// Now update the reduction PHIs in the inner and outer loop headers.
SmallVector<PHINode *, 4> InnerLoopPHIs, OuterLoopPHIs;
- for (PHINode &PHI : drop_begin(InnerLoopHeader->phis()))
+ for (PHINode &PHI : drop_begin(InnerLoopHeader->phis()))
InnerLoopPHIs.push_back(cast<PHINode>(&PHI));
- for (PHINode &PHI : drop_begin(OuterLoopHeader->phis()))
+ for (PHINode &PHI : drop_begin(OuterLoopHeader->phis()))
OuterLoopPHIs.push_back(cast<PHINode>(&PHI));
auto &OuterInnerReductions = LIL.getOuterInnerReductions();
@@ -1610,17 +1610,17 @@ bool LoopInterchangeTransform::adjustLoopBranches() {
InnerLoopHeader->replacePhiUsesWith(OuterLoopPreHeader, InnerLoopPreHeader);
InnerLoopHeader->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
- // Values defined in the outer loop header could be used in the inner loop
- // latch. In that case, we need to create LCSSA phis for them, because after
- // interchanging they will be defined in the new inner loop and used in the
- // new outer loop.
- IRBuilder<> Builder(OuterLoopHeader->getContext());
- SmallVector<Instruction *, 4> MayNeedLCSSAPhis;
- for (Instruction &I :
- make_range(OuterLoopHeader->begin(), std::prev(OuterLoopHeader->end())))
- MayNeedLCSSAPhis.push_back(&I);
- formLCSSAForInstructions(MayNeedLCSSAPhis, *DT, *LI, SE, Builder);
-
+ // Values defined in the outer loop header could be used in the inner loop
+ // latch. In that case, we need to create LCSSA phis for them, because after
+ // interchanging they will be defined in the new inner loop and used in the
+ // new outer loop.
+ IRBuilder<> Builder(OuterLoopHeader->getContext());
+ SmallVector<Instruction *, 4> MayNeedLCSSAPhis;
+ for (Instruction &I :
+ make_range(OuterLoopHeader->begin(), std::prev(OuterLoopHeader->end())))
+ MayNeedLCSSAPhis.push_back(&I);
+ formLCSSAForInstructions(MayNeedLCSSAPhis, *DT, *LI, SE, Builder);
+
return true;
}
@@ -1638,58 +1638,58 @@ bool LoopInterchangeTransform::adjustLoopLinks() {
return Changed;
}
-/// Main LoopInterchange Pass.
-struct LoopInterchangeLegacyPass : public LoopPass {
- static char ID;
-
- LoopInterchangeLegacyPass() : LoopPass(ID) {
- initializeLoopInterchangeLegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<DependenceAnalysisWrapperPass>();
- AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
-
- getLoopAnalysisUsage(AU);
- }
-
- bool runOnLoop(Loop *L, LPPassManager &LPM) override {
- if (skipLoop(L))
- return false;
-
- auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
- auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- auto *DI = &getAnalysis<DependenceAnalysisWrapperPass>().getDI();
- auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
-
- return LoopInterchange(SE, LI, DI, DT, ORE).run(L);
- }
-};
-
-char LoopInterchangeLegacyPass::ID = 0;
-
-INITIALIZE_PASS_BEGIN(LoopInterchangeLegacyPass, "loop-interchange",
+/// Main LoopInterchange Pass.
+struct LoopInterchangeLegacyPass : public LoopPass {
+ static char ID;
+
+ LoopInterchangeLegacyPass() : LoopPass(ID) {
+ initializeLoopInterchangeLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<DependenceAnalysisWrapperPass>();
+ AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
+
+ getLoopAnalysisUsage(AU);
+ }
+
+ bool runOnLoop(Loop *L, LPPassManager &LPM) override {
+ if (skipLoop(L))
+ return false;
+
+ auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ auto *DI = &getAnalysis<DependenceAnalysisWrapperPass>().getDI();
+ auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
+
+ return LoopInterchange(SE, LI, DI, DT, ORE).run(L);
+ }
+};
+
+char LoopInterchangeLegacyPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(LoopInterchangeLegacyPass, "loop-interchange",
"Interchanges loops for cache reuse", false, false)
INITIALIZE_PASS_DEPENDENCY(LoopPass)
INITIALIZE_PASS_DEPENDENCY(DependenceAnalysisWrapperPass)
INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
-INITIALIZE_PASS_END(LoopInterchangeLegacyPass, "loop-interchange",
+INITIALIZE_PASS_END(LoopInterchangeLegacyPass, "loop-interchange",
"Interchanges loops for cache reuse", false, false)
-Pass *llvm::createLoopInterchangePass() {
- return new LoopInterchangeLegacyPass();
-}
-
-PreservedAnalyses LoopInterchangePass::run(Loop &L, LoopAnalysisManager &AM,
- LoopStandardAnalysisResults &AR,
- LPMUpdater &U) {
- Function &F = *L.getHeader()->getParent();
-
- DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI);
- OptimizationRemarkEmitter ORE(&F);
- if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, &ORE).run(&L))
- return PreservedAnalyses::all();
- return getLoopPassPreservedAnalyses();
-}
+Pass *llvm::createLoopInterchangePass() {
+ return new LoopInterchangeLegacyPass();
+}
+
+PreservedAnalyses LoopInterchangePass::run(Loop &L, LoopAnalysisManager &AM,
+ LoopStandardAnalysisResults &AR,
+ LPMUpdater &U) {
+ Function &F = *L.getHeader()->getParent();
+
+ DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI);
+ OptimizationRemarkEmitter ORE(&F);
+ if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, &ORE).run(&L))
+ return PreservedAnalyses::all();
+ return getLoopPassPreservedAnalyses();
+}
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopLoadElimination.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopLoadElimination.cpp
index 058612149a..0d3f053e1e 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopLoadElimination.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopLoadElimination.cpp
@@ -55,7 +55,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils.h"
-#include "llvm/Transforms/Utils/LoopSimplify.h"
+#include "llvm/Transforms/Utils/LoopSimplify.h"
#include "llvm/Transforms/Utils/LoopVersioning.h"
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
#include "llvm/Transforms/Utils/SizeOpts.h"
@@ -308,8 +308,8 @@ public:
/// We need a check if one is a pointer for a candidate load and the other is
/// a pointer for a possibly intervening store.
bool needsChecking(unsigned PtrIdx1, unsigned PtrIdx2,
- const SmallPtrSetImpl<Value *> &PtrsWrittenOnFwdingPath,
- const SmallPtrSetImpl<Value *> &CandLoadPtrs) {
+ const SmallPtrSetImpl<Value *> &PtrsWrittenOnFwdingPath,
+ const SmallPtrSetImpl<Value *> &CandLoadPtrs) {
Value *Ptr1 =
LAI.getRuntimePointerChecking()->getPointerInfo(PtrIdx1).PointerValue;
Value *Ptr2 =
@@ -384,9 +384,9 @@ public:
findPointersWrittenOnForwardingPath(Candidates);
// Collect the pointers of the candidate loads.
- SmallPtrSet<Value *, 4> CandLoadPtrs;
- for (const auto &Candidate : Candidates)
- CandLoadPtrs.insert(Candidate.getLoadPtr());
+ SmallPtrSet<Value *, 4> CandLoadPtrs;
+ for (const auto &Candidate : Candidates)
+ CandLoadPtrs.insert(Candidate.getLoadPtr());
const auto &AllChecks = LAI.getRuntimePointerChecking()->getChecks();
SmallVector<RuntimePointerCheck, 4> Checks;
@@ -505,16 +505,16 @@ public:
if (!Cand.isDependenceDistanceOfOne(PSE, L))
continue;
- assert(isa<SCEVAddRecExpr>(PSE.getSCEV(Cand.Load->getPointerOperand())) &&
- "Loading from something other than indvar?");
- assert(
- isa<SCEVAddRecExpr>(PSE.getSCEV(Cand.Store->getPointerOperand())) &&
- "Storing to something other than indvar?");
-
- Candidates.push_back(Cand);
+ assert(isa<SCEVAddRecExpr>(PSE.getSCEV(Cand.Load->getPointerOperand())) &&
+ "Loading from something other than indvar?");
+ assert(
+ isa<SCEVAddRecExpr>(PSE.getSCEV(Cand.Store->getPointerOperand())) &&
+ "Storing to something other than indvar?");
+
+ Candidates.push_back(Cand);
LLVM_DEBUG(
dbgs()
- << Candidates.size()
+ << Candidates.size()
<< ". Valid store-to-load forwarding across the loop backedge\n");
}
if (Candidates.empty())
@@ -563,19 +563,19 @@ public:
// Point of no-return, start the transformation. First, version the loop
// if necessary.
- LoopVersioning LV(LAI, Checks, L, LI, DT, PSE.getSE());
+ LoopVersioning LV(LAI, Checks, L, LI, DT, PSE.getSE());
LV.versionLoop();
-
- // After versioning, some of the candidates' pointers could stop being
- // SCEVAddRecs. We need to filter them out.
- auto NoLongerGoodCandidate = [this](
- const StoreToLoadForwardingCandidate &Cand) {
- return !isa<SCEVAddRecExpr>(
- PSE.getSCEV(Cand.Load->getPointerOperand())) ||
- !isa<SCEVAddRecExpr>(
- PSE.getSCEV(Cand.Store->getPointerOperand()));
- };
- llvm::erase_if(Candidates, NoLongerGoodCandidate);
+
+ // After versioning, some of the candidates' pointers could stop being
+ // SCEVAddRecs. We need to filter them out.
+ auto NoLongerGoodCandidate = [this](
+ const StoreToLoadForwardingCandidate &Cand) {
+ return !isa<SCEVAddRecExpr>(
+ PSE.getSCEV(Cand.Load->getPointerOperand())) ||
+ !isa<SCEVAddRecExpr>(
+ PSE.getSCEV(Cand.Store->getPointerOperand()));
+ };
+ llvm::erase_if(Candidates, NoLongerGoodCandidate);
}
// Next, propagate the value stored by the store to the users of the load.
@@ -584,7 +584,7 @@ public:
"storeforward");
for (const auto &Cand : Candidates)
propagateStoredValueToLoadUsers(Cand, SEE);
- NumLoopLoadEliminted += Candidates.size();
+ NumLoopLoadEliminted += Candidates.size();
return true;
}
@@ -610,7 +610,7 @@ private:
static bool
eliminateLoadsAcrossLoops(Function &F, LoopInfo &LI, DominatorTree &DT,
BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
- ScalarEvolution *SE, AssumptionCache *AC,
+ ScalarEvolution *SE, AssumptionCache *AC,
function_ref<const LoopAccessInfo &(Loop &)> GetLAI) {
// Build up a worklist of inner-loops to transform to avoid iterator
// invalidation.
@@ -619,21 +619,21 @@ eliminateLoadsAcrossLoops(Function &F, LoopInfo &LI, DominatorTree &DT,
// which merely optimizes the use of loads in a loop.
SmallVector<Loop *, 8> Worklist;
- bool Changed = false;
-
+ bool Changed = false;
+
for (Loop *TopLevelLoop : LI)
- for (Loop *L : depth_first(TopLevelLoop)) {
- Changed |= simplifyLoop(L, &DT, &LI, SE, AC, /*MSSAU*/ nullptr, false);
+ for (Loop *L : depth_first(TopLevelLoop)) {
+ Changed |= simplifyLoop(L, &DT, &LI, SE, AC, /*MSSAU*/ nullptr, false);
// We only handle inner-most loops.
- if (L->isInnermost())
+ if (L->isInnermost())
Worklist.push_back(L);
- }
+ }
// Now walk the identified inner loops.
for (Loop *L : Worklist) {
- // Match historical behavior
- if (!L->isRotatedForm() || !L->getExitingBlock())
- continue;
+ // Match historical behavior
+ if (!L->isRotatedForm() || !L->getExitingBlock())
+ continue;
// The actual work is performed by LoadEliminationForLoop.
LoadEliminationForLoop LEL(L, &LI, GetLAI(*L), &DT, BFI, PSI);
Changed |= LEL.processLoop();
@@ -667,7 +667,7 @@ public:
// Process each loop nest in the function.
return eliminateLoadsAcrossLoops(
- F, LI, DT, BFI, PSI, /*SE*/ nullptr, /*AC*/ nullptr,
+ F, LI, DT, BFI, PSI, /*SE*/ nullptr, /*AC*/ nullptr,
[&LAA](Loop &L) -> const LoopAccessInfo & { return LAA.getInfo(&L); });
}
@@ -724,9 +724,9 @@ PreservedAnalyses LoopLoadEliminationPass::run(Function &F,
auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager();
bool Changed = eliminateLoadsAcrossLoops(
- F, LI, DT, BFI, PSI, &SE, &AC, [&](Loop &L) -> const LoopAccessInfo & {
- LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE,
- TLI, TTI, nullptr, MSSA};
+ F, LI, DT, BFI, PSI, &SE, &AC, [&](Loop &L) -> const LoopAccessInfo & {
+ LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE,
+ TLI, TTI, nullptr, MSSA};
return LAM.getResult<LoopAccessAnalysis>(L, AR);
});
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopPassManager.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopPassManager.cpp
index 3fe8e72591..13330c1c80 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopPassManager.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopPassManager.cpp
@@ -6,14 +6,14 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar/LoopPassManager.h"
-#include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/Analysis/BasicAliasAnalysis.h"
-#include "llvm/Analysis/BlockFrequencyInfo.h"
-#include "llvm/Analysis/GlobalsModRef.h"
-#include "llvm/Analysis/MemorySSA.h"
-#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Transforms/Scalar/LoopPassManager.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Support/TimeProfiler.h"
using namespace llvm;
@@ -30,133 +30,133 @@ PassManager<Loop, LoopAnalysisManager, LoopStandardAnalysisResults &,
if (DebugLogging)
dbgs() << "Starting Loop pass manager run.\n";
- // Runs loop-nest passes only when the current loop is a top-level one.
- PreservedAnalyses PA = (L.isOutermost() && !LoopNestPasses.empty())
- ? runWithLoopNestPasses(L, AM, AR, U)
- : runWithoutLoopNestPasses(L, AM, AR, U);
-
- // Invalidation for the current loop should be handled above, and other loop
- // analysis results shouldn't be impacted by runs over this loop. Therefore,
- // the remaining analysis results in the AnalysisManager are preserved. We
- // mark this with a set so that we don't need to inspect each one
- // individually.
- // FIXME: This isn't correct! This loop and all nested loops' analyses should
- // be preserved, but unrolling should invalidate the parent loop's analyses.
- PA.preserveSet<AllAnalysesOn<Loop>>();
-
- if (DebugLogging)
- dbgs() << "Finished Loop pass manager run.\n";
-
- return PA;
-}
-
-// Run both loop passes and loop-nest passes on top-level loop \p L.
-PreservedAnalyses
-LoopPassManager::runWithLoopNestPasses(Loop &L, LoopAnalysisManager &AM,
- LoopStandardAnalysisResults &AR,
- LPMUpdater &U) {
- assert(L.isOutermost() &&
- "Loop-nest passes should only run on top-level loops.");
- PreservedAnalyses PA = PreservedAnalyses::all();
-
+ // Runs loop-nest passes only when the current loop is a top-level one.
+ PreservedAnalyses PA = (L.isOutermost() && !LoopNestPasses.empty())
+ ? runWithLoopNestPasses(L, AM, AR, U)
+ : runWithoutLoopNestPasses(L, AM, AR, U);
+
+ // Invalidation for the current loop should be handled above, and other loop
+ // analysis results shouldn't be impacted by runs over this loop. Therefore,
+ // the remaining analysis results in the AnalysisManager are preserved. We
+ // mark this with a set so that we don't need to inspect each one
+ // individually.
+ // FIXME: This isn't correct! This loop and all nested loops' analyses should
+ // be preserved, but unrolling should invalidate the parent loop's analyses.
+ PA.preserveSet<AllAnalysesOn<Loop>>();
+
+ if (DebugLogging)
+ dbgs() << "Finished Loop pass manager run.\n";
+
+ return PA;
+}
+
+// Run both loop passes and loop-nest passes on top-level loop \p L.
+PreservedAnalyses
+LoopPassManager::runWithLoopNestPasses(Loop &L, LoopAnalysisManager &AM,
+ LoopStandardAnalysisResults &AR,
+ LPMUpdater &U) {
+ assert(L.isOutermost() &&
+ "Loop-nest passes should only run on top-level loops.");
+ PreservedAnalyses PA = PreservedAnalyses::all();
+
// Request PassInstrumentation from analysis manager, will use it to run
// instrumenting callbacks for the passes later.
PassInstrumentation PI = AM.getResult<PassInstrumentationAnalysis>(L, AR);
- unsigned LoopPassIndex = 0, LoopNestPassIndex = 0;
-
- // `LoopNestPtr` points to the `LoopNest` object for the current top-level
- // loop and `IsLoopNestPtrValid` indicates whether the pointer is still valid.
- // The `LoopNest` object will have to be re-constructed if the pointer is
- // invalid when encountering a loop-nest pass.
- std::unique_ptr<LoopNest> LoopNestPtr;
- bool IsLoopNestPtrValid = false;
-
- for (size_t I = 0, E = IsLoopNestPass.size(); I != E; ++I) {
- Optional<PreservedAnalyses> PassPA;
- if (!IsLoopNestPass[I]) {
- // The `I`-th pass is a loop pass.
- auto &Pass = LoopPasses[LoopPassIndex++];
- PassPA = runSinglePass(L, Pass, AM, AR, U, PI);
- } else {
- // The `I`-th pass is a loop-nest pass.
- auto &Pass = LoopNestPasses[LoopNestPassIndex++];
-
- // If the loop-nest object calculated before is no longer valid,
- // re-calculate it here before running the loop-nest pass.
- if (!IsLoopNestPtrValid) {
- LoopNestPtr = LoopNest::getLoopNest(L, AR.SE);
- IsLoopNestPtrValid = true;
- }
- PassPA = runSinglePass(*LoopNestPtr, Pass, AM, AR, U, PI);
+ unsigned LoopPassIndex = 0, LoopNestPassIndex = 0;
+
+ // `LoopNestPtr` points to the `LoopNest` object for the current top-level
+ // loop and `IsLoopNestPtrValid` indicates whether the pointer is still valid.
+ // The `LoopNest` object will have to be re-constructed if the pointer is
+ // invalid when encountering a loop-nest pass.
+ std::unique_ptr<LoopNest> LoopNestPtr;
+ bool IsLoopNestPtrValid = false;
+
+ for (size_t I = 0, E = IsLoopNestPass.size(); I != E; ++I) {
+ Optional<PreservedAnalyses> PassPA;
+ if (!IsLoopNestPass[I]) {
+ // The `I`-th pass is a loop pass.
+ auto &Pass = LoopPasses[LoopPassIndex++];
+ PassPA = runSinglePass(L, Pass, AM, AR, U, PI);
+ } else {
+ // The `I`-th pass is a loop-nest pass.
+ auto &Pass = LoopNestPasses[LoopNestPassIndex++];
+
+ // If the loop-nest object calculated before is no longer valid,
+ // re-calculate it here before running the loop-nest pass.
+ if (!IsLoopNestPtrValid) {
+ LoopNestPtr = LoopNest::getLoopNest(L, AR.SE);
+ IsLoopNestPtrValid = true;
+ }
+ PassPA = runSinglePass(*LoopNestPtr, Pass, AM, AR, U, PI);
}
- // `PassPA` is `None` means that the before-pass callbacks in
- // `PassInstrumentation` return false. The pass does not run in this case,
- // so we can skip the following procedure.
- if (!PassPA)
- continue;
-
- // If the loop was deleted, abort the run and return to the outer walk.
- if (U.skipCurrentLoop()) {
- PA.intersect(std::move(*PassPA));
- break;
- }
-
- // Update the analysis manager as each pass runs and potentially
- // invalidates analyses.
- AM.invalidate(L, *PassPA);
-
- // Finally, we intersect the final preserved analyses to compute the
- // aggregate preserved set for this pass manager.
- PA.intersect(std::move(*PassPA));
-
- // Check if the current pass preserved the loop-nest object or not.
- IsLoopNestPtrValid &= PassPA->getChecker<LoopNestAnalysis>().preserved();
-
- // FIXME: Historically, the pass managers all called the LLVM context's
- // yield function here. We don't have a generic way to acquire the
- // context and it isn't yet clear what the right pattern is for yielding
- // in the new pass manager so it is currently omitted.
- // ...getContext().yield();
- }
- return PA;
-}
-
-// Run all loop passes on loop \p L. Loop-nest passes don't run either because
-// \p L is not a top-level one or simply because there are no loop-nest passes
-// in the pass manager at all.
-PreservedAnalyses
-LoopPassManager::runWithoutLoopNestPasses(Loop &L, LoopAnalysisManager &AM,
- LoopStandardAnalysisResults &AR,
- LPMUpdater &U) {
- PreservedAnalyses PA = PreservedAnalyses::all();
-
- // Request PassInstrumentation from analysis manager, will use it to run
- // instrumenting callbacks for the passes later.
- PassInstrumentation PI = AM.getResult<PassInstrumentationAnalysis>(L, AR);
- for (auto &Pass : LoopPasses) {
- Optional<PreservedAnalyses> PassPA = runSinglePass(L, Pass, AM, AR, U, PI);
-
- // `PassPA` is `None` means that the before-pass callbacks in
- // `PassInstrumentation` return false. The pass does not run in this case,
- // so we can skip the following procedure.
- if (!PassPA)
- continue;
+ // `PassPA` is `None` means that the before-pass callbacks in
+ // `PassInstrumentation` return false. The pass does not run in this case,
+ // so we can skip the following procedure.
+ if (!PassPA)
+ continue;
// If the loop was deleted, abort the run and return to the outer walk.
if (U.skipCurrentLoop()) {
- PA.intersect(std::move(*PassPA));
+ PA.intersect(std::move(*PassPA));
break;
}
+ // Update the analysis manager as each pass runs and potentially
+ // invalidates analyses.
+ AM.invalidate(L, *PassPA);
+
+ // Finally, we intersect the final preserved analyses to compute the
+ // aggregate preserved set for this pass manager.
+ PA.intersect(std::move(*PassPA));
+
+ // Check if the current pass preserved the loop-nest object or not.
+ IsLoopNestPtrValid &= PassPA->getChecker<LoopNestAnalysis>().preserved();
+
+ // FIXME: Historically, the pass managers all called the LLVM context's
+ // yield function here. We don't have a generic way to acquire the
+ // context and it isn't yet clear what the right pattern is for yielding
+ // in the new pass manager so it is currently omitted.
+ // ...getContext().yield();
+ }
+ return PA;
+}
+
+// Run all loop passes on loop \p L. Loop-nest passes don't run either because
+// \p L is not a top-level one or simply because there are no loop-nest passes
+// in the pass manager at all.
+PreservedAnalyses
+LoopPassManager::runWithoutLoopNestPasses(Loop &L, LoopAnalysisManager &AM,
+ LoopStandardAnalysisResults &AR,
+ LPMUpdater &U) {
+ PreservedAnalyses PA = PreservedAnalyses::all();
+
+ // Request PassInstrumentation from analysis manager, will use it to run
+ // instrumenting callbacks for the passes later.
+ PassInstrumentation PI = AM.getResult<PassInstrumentationAnalysis>(L, AR);
+ for (auto &Pass : LoopPasses) {
+ Optional<PreservedAnalyses> PassPA = runSinglePass(L, Pass, AM, AR, U, PI);
+
+ // `PassPA` is `None` means that the before-pass callbacks in
+ // `PassInstrumentation` return false. The pass does not run in this case,
+ // so we can skip the following procedure.
+ if (!PassPA)
+ continue;
+
+ // If the loop was deleted, abort the run and return to the outer walk.
+ if (U.skipCurrentLoop()) {
+ PA.intersect(std::move(*PassPA));
+ break;
+ }
+
// Update the analysis manager as each pass runs and potentially
// invalidates analyses.
- AM.invalidate(L, *PassPA);
+ AM.invalidate(L, *PassPA);
// Finally, we intersect the final preserved analyses to compute the
// aggregate preserved set for this pass manager.
- PA.intersect(std::move(*PassPA));
+ PA.intersect(std::move(*PassPA));
// FIXME: Historically, the pass managers all called the LLVM context's
// yield function here. We don't have a generic way to acquire the
@@ -164,162 +164,162 @@ LoopPassManager::runWithoutLoopNestPasses(Loop &L, LoopAnalysisManager &AM,
// in the new pass manager so it is currently omitted.
// ...getContext().yield();
}
- return PA;
-}
-} // namespace llvm
-
-PreservedAnalyses FunctionToLoopPassAdaptor::run(Function &F,
- FunctionAnalysisManager &AM) {
- // Before we even compute any loop analyses, first run a miniature function
- // pass pipeline to put loops into their canonical form. Note that we can
- // directly build up function analyses after this as the function pass
- // manager handles all the invalidation at that layer.
- PassInstrumentation PI = AM.getResult<PassInstrumentationAnalysis>(F);
-
- PreservedAnalyses PA = PreservedAnalyses::all();
- // Check the PassInstrumentation's BeforePass callbacks before running the
- // canonicalization pipeline.
- if (PI.runBeforePass<Function>(LoopCanonicalizationFPM, F)) {
- PA = LoopCanonicalizationFPM.run(F, AM);
- PI.runAfterPass<Function>(LoopCanonicalizationFPM, F, PA);
- }
-
- // Get the loop structure for this function
- LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
-
- // If there are no loops, there is nothing to do here.
- if (LI.empty())
- return PA;
-
- // Get the analysis results needed by loop passes.
- MemorySSA *MSSA =
- UseMemorySSA ? (&AM.getResult<MemorySSAAnalysis>(F).getMSSA()) : nullptr;
- BlockFrequencyInfo *BFI = UseBlockFrequencyInfo && F.hasProfileData()
- ? (&AM.getResult<BlockFrequencyAnalysis>(F))
- : nullptr;
- LoopStandardAnalysisResults LAR = {AM.getResult<AAManager>(F),
- AM.getResult<AssumptionAnalysis>(F),
- AM.getResult<DominatorTreeAnalysis>(F),
- AM.getResult<LoopAnalysis>(F),
- AM.getResult<ScalarEvolutionAnalysis>(F),
- AM.getResult<TargetLibraryAnalysis>(F),
- AM.getResult<TargetIRAnalysis>(F),
- BFI,
- MSSA};
-
- // Setup the loop analysis manager from its proxy. It is important that
- // this is only done when there are loops to process and we have built the
- // LoopStandardAnalysisResults object. The loop analyses cached in this
- // manager have access to those analysis results and so it must invalidate
- // itself when they go away.
- auto &LAMFP = AM.getResult<LoopAnalysisManagerFunctionProxy>(F);
- if (UseMemorySSA)
- LAMFP.markMSSAUsed();
- LoopAnalysisManager &LAM = LAMFP.getManager();
-
- // A postorder worklist of loops to process.
- SmallPriorityWorklist<Loop *, 4> Worklist;
-
- // Register the worklist and loop analysis manager so that loop passes can
- // update them when they mutate the loop nest structure.
- LPMUpdater Updater(Worklist, LAM, LoopNestMode);
-
- // Add the loop nests in the reverse order of LoopInfo. See method
- // declaration.
- if (!LoopNestMode) {
- appendLoopsToWorklist(LI, Worklist);
- } else {
- for (Loop *L : LI)
- Worklist.insert(L);
- }
-
-#ifndef NDEBUG
- PI.pushBeforeNonSkippedPassCallback([&LAR, &LI](StringRef PassID, Any IR) {
- if (isSpecialPass(PassID, {"PassManager"}))
- return;
- assert(any_isa<const Loop *>(IR) || any_isa<const LoopNest *>(IR));
- const Loop *L = any_isa<const Loop *>(IR)
- ? any_cast<const Loop *>(IR)
- : &any_cast<const LoopNest *>(IR)->getOutermostLoop();
- assert(L && "Loop should be valid for printing");
-
- // Verify the loop structure and LCSSA form before visiting the loop.
- L->verifyLoop();
- assert(L->isRecursivelyLCSSAForm(LAR.DT, LI) &&
- "Loops must remain in LCSSA form!");
- });
-#endif
-
- do {
- Loop *L = Worklist.pop_back_val();
- assert(!(LoopNestMode && L->getParentLoop()) &&
- "L should be a top-level loop in loop-nest mode.");
-
- // Reset the update structure for this loop.
- Updater.CurrentL = L;
- Updater.SkipCurrentLoop = false;
-
-#ifndef NDEBUG
- // Save a parent loop pointer for asserts.
- Updater.ParentL = L->getParentLoop();
-#endif
- // Check the PassInstrumentation's BeforePass callbacks before running the
- // pass, skip its execution completely if asked to (callback returns
- // false).
- if (!PI.runBeforePass<Loop>(*Pass, *L))
- continue;
-
- PreservedAnalyses PassPA;
- {
- TimeTraceScope TimeScope(Pass->name());
- PassPA = Pass->run(*L, LAM, LAR, Updater);
- }
-
- // Do not pass deleted Loop into the instrumentation.
- if (Updater.skipCurrentLoop())
- PI.runAfterPassInvalidated<Loop>(*Pass, PassPA);
- else
- PI.runAfterPass<Loop>(*Pass, *L, PassPA);
-
- // FIXME: We should verify the set of analyses relevant to Loop passes
- // are preserved.
-
- // If the loop hasn't been deleted, we need to handle invalidation here.
- if (!Updater.skipCurrentLoop())
- // We know that the loop pass couldn't have invalidated any other
- // loop's analyses (that's the contract of a loop pass), so directly
- // handle the loop analysis manager's invalidation here.
- LAM.invalidate(*L, PassPA);
-
- // Then intersect the preserved set so that invalidation of module
- // analyses will eventually occur when the module pass completes.
- PA.intersect(std::move(PassPA));
- } while (!Worklist.empty());
-
-#ifndef NDEBUG
- PI.popBeforeNonSkippedPassCallback();
-#endif
-
- // By definition we preserve the proxy. We also preserve all analyses on
- // Loops. This precludes *any* invalidation of loop analyses by the proxy,
- // but that's OK because we've taken care to invalidate analyses in the
- // loop analysis manager incrementally above.
- PA.preserveSet<AllAnalysesOn<Loop>>();
- PA.preserve<LoopAnalysisManagerFunctionProxy>();
- // We also preserve the set of standard analyses.
- PA.preserve<DominatorTreeAnalysis>();
- PA.preserve<LoopAnalysis>();
- PA.preserve<ScalarEvolutionAnalysis>();
- if (UseBlockFrequencyInfo && F.hasProfileData())
- PA.preserve<BlockFrequencyAnalysis>();
- if (UseMemorySSA)
- PA.preserve<MemorySSAAnalysis>();
- // FIXME: What we really want to do here is preserve an AA category, but
- // that concept doesn't exist yet.
- PA.preserve<AAManager>();
- PA.preserve<BasicAA>();
- PA.preserve<GlobalsAA>();
- PA.preserve<SCEVAA>();
+ return PA;
+}
+} // namespace llvm
+
+PreservedAnalyses FunctionToLoopPassAdaptor::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ // Before we even compute any loop analyses, first run a miniature function
+ // pass pipeline to put loops into their canonical form. Note that we can
+ // directly build up function analyses after this as the function pass
+ // manager handles all the invalidation at that layer.
+ PassInstrumentation PI = AM.getResult<PassInstrumentationAnalysis>(F);
+
+ PreservedAnalyses PA = PreservedAnalyses::all();
+ // Check the PassInstrumentation's BeforePass callbacks before running the
+ // canonicalization pipeline.
+ if (PI.runBeforePass<Function>(LoopCanonicalizationFPM, F)) {
+ PA = LoopCanonicalizationFPM.run(F, AM);
+ PI.runAfterPass<Function>(LoopCanonicalizationFPM, F, PA);
+ }
+
+ // Get the loop structure for this function
+ LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
+
+ // If there are no loops, there is nothing to do here.
+ if (LI.empty())
+ return PA;
+
+ // Get the analysis results needed by loop passes.
+ MemorySSA *MSSA =
+ UseMemorySSA ? (&AM.getResult<MemorySSAAnalysis>(F).getMSSA()) : nullptr;
+ BlockFrequencyInfo *BFI = UseBlockFrequencyInfo && F.hasProfileData()
+ ? (&AM.getResult<BlockFrequencyAnalysis>(F))
+ : nullptr;
+ LoopStandardAnalysisResults LAR = {AM.getResult<AAManager>(F),
+ AM.getResult<AssumptionAnalysis>(F),
+ AM.getResult<DominatorTreeAnalysis>(F),
+ AM.getResult<LoopAnalysis>(F),
+ AM.getResult<ScalarEvolutionAnalysis>(F),
+ AM.getResult<TargetLibraryAnalysis>(F),
+ AM.getResult<TargetIRAnalysis>(F),
+ BFI,
+ MSSA};
+
+ // Setup the loop analysis manager from its proxy. It is important that
+ // this is only done when there are loops to process and we have built the
+ // LoopStandardAnalysisResults object. The loop analyses cached in this
+ // manager have access to those analysis results and so it must invalidate
+ // itself when they go away.
+ auto &LAMFP = AM.getResult<LoopAnalysisManagerFunctionProxy>(F);
+ if (UseMemorySSA)
+ LAMFP.markMSSAUsed();
+ LoopAnalysisManager &LAM = LAMFP.getManager();
+
+ // A postorder worklist of loops to process.
+ SmallPriorityWorklist<Loop *, 4> Worklist;
+
+ // Register the worklist and loop analysis manager so that loop passes can
+ // update them when they mutate the loop nest structure.
+ LPMUpdater Updater(Worklist, LAM, LoopNestMode);
+
+ // Add the loop nests in the reverse order of LoopInfo. See method
+ // declaration.
+ if (!LoopNestMode) {
+ appendLoopsToWorklist(LI, Worklist);
+ } else {
+ for (Loop *L : LI)
+ Worklist.insert(L);
+ }
+
+#ifndef NDEBUG
+ PI.pushBeforeNonSkippedPassCallback([&LAR, &LI](StringRef PassID, Any IR) {
+ if (isSpecialPass(PassID, {"PassManager"}))
+ return;
+ assert(any_isa<const Loop *>(IR) || any_isa<const LoopNest *>(IR));
+ const Loop *L = any_isa<const Loop *>(IR)
+ ? any_cast<const Loop *>(IR)
+ : &any_cast<const LoopNest *>(IR)->getOutermostLoop();
+ assert(L && "Loop should be valid for printing");
+
+ // Verify the loop structure and LCSSA form before visiting the loop.
+ L->verifyLoop();
+ assert(L->isRecursivelyLCSSAForm(LAR.DT, LI) &&
+ "Loops must remain in LCSSA form!");
+ });
+#endif
+
+ do {
+ Loop *L = Worklist.pop_back_val();
+ assert(!(LoopNestMode && L->getParentLoop()) &&
+ "L should be a top-level loop in loop-nest mode.");
+
+ // Reset the update structure for this loop.
+ Updater.CurrentL = L;
+ Updater.SkipCurrentLoop = false;
+
+#ifndef NDEBUG
+ // Save a parent loop pointer for asserts.
+ Updater.ParentL = L->getParentLoop();
+#endif
+ // Check the PassInstrumentation's BeforePass callbacks before running the
+ // pass, skip its execution completely if asked to (callback returns
+ // false).
+ if (!PI.runBeforePass<Loop>(*Pass, *L))
+ continue;
+
+ PreservedAnalyses PassPA;
+ {
+ TimeTraceScope TimeScope(Pass->name());
+ PassPA = Pass->run(*L, LAM, LAR, Updater);
+ }
+
+ // Do not pass deleted Loop into the instrumentation.
+ if (Updater.skipCurrentLoop())
+ PI.runAfterPassInvalidated<Loop>(*Pass, PassPA);
+ else
+ PI.runAfterPass<Loop>(*Pass, *L, PassPA);
+
+ // FIXME: We should verify the set of analyses relevant to Loop passes
+ // are preserved.
+
+ // If the loop hasn't been deleted, we need to handle invalidation here.
+ if (!Updater.skipCurrentLoop())
+ // We know that the loop pass couldn't have invalidated any other
+ // loop's analyses (that's the contract of a loop pass), so directly
+ // handle the loop analysis manager's invalidation here.
+ LAM.invalidate(*L, PassPA);
+
+ // Then intersect the preserved set so that invalidation of module
+ // analyses will eventually occur when the module pass completes.
+ PA.intersect(std::move(PassPA));
+ } while (!Worklist.empty());
+
+#ifndef NDEBUG
+ PI.popBeforeNonSkippedPassCallback();
+#endif
+
+ // By definition we preserve the proxy. We also preserve all analyses on
+ // Loops. This precludes *any* invalidation of loop analyses by the proxy,
+ // but that's OK because we've taken care to invalidate analyses in the
+ // loop analysis manager incrementally above.
+ PA.preserveSet<AllAnalysesOn<Loop>>();
+ PA.preserve<LoopAnalysisManagerFunctionProxy>();
+ // We also preserve the set of standard analyses.
+ PA.preserve<DominatorTreeAnalysis>();
+ PA.preserve<LoopAnalysis>();
+ PA.preserve<ScalarEvolutionAnalysis>();
+ if (UseBlockFrequencyInfo && F.hasProfileData())
+ PA.preserve<BlockFrequencyAnalysis>();
+ if (UseMemorySSA)
+ PA.preserve<MemorySSAAnalysis>();
+ // FIXME: What we really want to do here is preserve an AA category, but
+ // that concept doesn't exist yet.
+ PA.preserve<AAManager>();
+ PA.preserve<BasicAA>();
+ PA.preserve<GlobalsAA>();
+ PA.preserve<SCEVAA>();
return PA;
}
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopPredication.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopPredication.cpp
index 4f97641e20..e46c3d64e6 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopPredication.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopPredication.cpp
@@ -362,7 +362,7 @@ PreservedAnalyses LoopPredicationPass::run(Loop &L, LoopAnalysisManager &AM,
// For the new PM, we also can't use BranchProbabilityInfo as an analysis
// pass. Function analyses need to be preserved across loop transformations
// but BPI is not preserved, hence a newly built one is needed.
- BranchProbabilityInfo BPI(*F, AR.LI, &AR.TLI, &AR.DT, nullptr);
+ BranchProbabilityInfo BPI(*F, AR.LI, &AR.TLI, &AR.DT, nullptr);
LoopPredication LP(&AR.AA, &AR.DT, &AR.SE, &AR.LI, &BPI);
if (!LP.runOnLoop(&L))
return PreservedAnalyses::all();
@@ -439,8 +439,8 @@ static bool isSafeToTruncateWideIVType(const DataLayout &DL,
Type *RangeCheckType) {
if (!EnableIVTruncation)
return false;
- assert(DL.getTypeSizeInBits(LatchCheck.IV->getType()).getFixedSize() >
- DL.getTypeSizeInBits(RangeCheckType).getFixedSize() &&
+ assert(DL.getTypeSizeInBits(LatchCheck.IV->getType()).getFixedSize() >
+ DL.getTypeSizeInBits(RangeCheckType).getFixedSize() &&
"Expected latch check IV type to be larger than range check operand "
"type!");
// The start and end values of the IV should be known. This is to guarantee
@@ -454,13 +454,13 @@ static bool isSafeToTruncateWideIVType(const DataLayout &DL,
// LatchEnd = 2, rangeCheckType = i32. If it's not a monotonic predicate, the
// IV wraps around, and the truncation of the IV would lose the range of
// iterations between 2^32 and 2^64.
- if (!SE.getMonotonicPredicateType(LatchCheck.IV, LatchCheck.Pred))
+ if (!SE.getMonotonicPredicateType(LatchCheck.IV, LatchCheck.Pred))
return false;
// The active bits should be less than the bits in the RangeCheckType. This
// guarantees that truncating the latch check to RangeCheckType is a safe
// operation.
- auto RangeCheckTypeBitSize =
- DL.getTypeSizeInBits(RangeCheckType).getFixedSize();
+ auto RangeCheckTypeBitSize =
+ DL.getTypeSizeInBits(RangeCheckType).getFixedSize();
return Start->getAPInt().getActiveBits() < RangeCheckTypeBitSize &&
Limit->getAPInt().getActiveBits() < RangeCheckTypeBitSize;
}
@@ -477,8 +477,8 @@ static Optional<LoopICmp> generateLoopLatchCheck(const DataLayout &DL,
if (RangeCheckType == LatchType)
return LatchCheck;
// For now, bail out if latch type is narrower than range type.
- if (DL.getTypeSizeInBits(LatchType).getFixedSize() <
- DL.getTypeSizeInBits(RangeCheckType).getFixedSize())
+ if (DL.getTypeSizeInBits(LatchType).getFixedSize() <
+ DL.getTypeSizeInBits(RangeCheckType).getFixedSize())
return None;
if (!isSafeToTruncateWideIVType(DL, SE, LatchCheck, RangeCheckType))
return None;
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopRerollPass.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopRerollPass.cpp
index 65a6205f03..18caeabaca 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopRerollPass.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopRerollPass.cpp
@@ -50,7 +50,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Scalar/LoopReroll.h"
+#include "llvm/Transforms/Scalar/LoopReroll.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -162,12 +162,12 @@ namespace {
IL_End
};
- class LoopRerollLegacyPass : public LoopPass {
+ class LoopRerollLegacyPass : public LoopPass {
public:
static char ID; // Pass ID, replacement for typeid
- LoopRerollLegacyPass() : LoopPass(ID) {
- initializeLoopRerollLegacyPassPass(*PassRegistry::getPassRegistry());
+ LoopRerollLegacyPass() : LoopPass(ID) {
+ initializeLoopRerollLegacyPassPass(*PassRegistry::getPassRegistry());
}
bool runOnLoop(Loop *L, LPPassManager &LPM) override;
@@ -176,16 +176,16 @@ namespace {
AU.addRequired<TargetLibraryInfoWrapperPass>();
getLoopAnalysisUsage(AU);
}
- };
-
- class LoopReroll {
- public:
- LoopReroll(AliasAnalysis *AA, LoopInfo *LI, ScalarEvolution *SE,
- TargetLibraryInfo *TLI, DominatorTree *DT, bool PreserveLCSSA)
- : AA(AA), LI(LI), SE(SE), TLI(TLI), DT(DT),
- PreserveLCSSA(PreserveLCSSA) {}
- bool runOnLoop(Loop *L);
-
+ };
+
+ class LoopReroll {
+ public:
+ LoopReroll(AliasAnalysis *AA, LoopInfo *LI, ScalarEvolution *SE,
+ TargetLibraryInfo *TLI, DominatorTree *DT, bool PreserveLCSSA)
+ : AA(AA), LI(LI), SE(SE), TLI(TLI), DT(DT),
+ PreserveLCSSA(PreserveLCSSA) {}
+ bool runOnLoop(Loop *L);
+
protected:
AliasAnalysis *AA;
LoopInfo *LI;
@@ -494,16 +494,16 @@ namespace {
} // end anonymous namespace
-char LoopRerollLegacyPass::ID = 0;
+char LoopRerollLegacyPass::ID = 0;
-INITIALIZE_PASS_BEGIN(LoopRerollLegacyPass, "loop-reroll", "Reroll loops",
- false, false)
+INITIALIZE_PASS_BEGIN(LoopRerollLegacyPass, "loop-reroll", "Reroll loops",
+ false, false)
INITIALIZE_PASS_DEPENDENCY(LoopPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_END(LoopRerollLegacyPass, "loop-reroll", "Reroll loops", false,
- false)
+INITIALIZE_PASS_END(LoopRerollLegacyPass, "loop-reroll", "Reroll loops", false,
+ false)
-Pass *llvm::createLoopRerollPass() { return new LoopRerollLegacyPass; }
+Pass *llvm::createLoopRerollPass() { return new LoopRerollLegacyPass; }
// Returns true if the provided instruction is used outside the given loop.
// This operates like Instruction::isUsedOutsideOfBlock, but considers PHIs in
@@ -1081,12 +1081,12 @@ bool LoopReroll::DAGRootTracker::collectUsedInstructions(SmallInstructionSet &Po
DenseSet<Instruction*> V;
collectInLoopUserSet(LoopIncs, Exclude, PossibleRedSet, V);
for (auto *I : V) {
- if (I->mayHaveSideEffects()) {
- LLVM_DEBUG(dbgs() << "LRR: Aborting - "
- << "An instruction which does not belong to any root "
- << "sets must not have side effects: " << *I);
- return false;
- }
+ if (I->mayHaveSideEffects()) {
+ LLVM_DEBUG(dbgs() << "LRR: Aborting - "
+ << "An instruction which does not belong to any root "
+ << "sets must not have side effects: " << *I);
+ return false;
+ }
Uses[I].set(IL_All);
}
@@ -1102,7 +1102,7 @@ LoopReroll::DAGRootTracker::nextInstr(int Val, UsesTy &In,
UsesTy::iterator *StartI) {
UsesTy::iterator I = StartI ? *StartI : In.begin();
while (I != In.end() && (I->second.test(Val) == 0 ||
- Exclude.contains(I->first)))
+ Exclude.contains(I->first)))
++I;
return I;
}
@@ -1660,7 +1660,7 @@ bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header,
return true;
}
-bool LoopReroll::runOnLoop(Loop *L) {
+bool LoopReroll::runOnLoop(Loop *L) {
BasicBlock *Header = L->getHeader();
LLVM_DEBUG(dbgs() << "LRR: F[" << Header->getParent()->getName() << "] Loop %"
<< Header->getName() << " (" << L->getNumBlocks()
@@ -1709,26 +1709,26 @@ bool LoopReroll::runOnLoop(Loop *L) {
return Changed;
}
-
-bool LoopRerollLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
- if (skipLoop(L))
- return false;
-
- auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
- auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
- auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
- *L->getHeader()->getParent());
- auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
-
- return LoopReroll(AA, LI, SE, TLI, DT, PreserveLCSSA).runOnLoop(L);
-}
-
-PreservedAnalyses LoopRerollPass::run(Loop &L, LoopAnalysisManager &AM,
- LoopStandardAnalysisResults &AR,
- LPMUpdater &U) {
- return LoopReroll(&AR.AA, &AR.LI, &AR.SE, &AR.TLI, &AR.DT, true).runOnLoop(&L)
- ? getLoopPassPreservedAnalyses()
- : PreservedAnalyses::all();
-}
+
+bool LoopRerollLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
+ if (skipLoop(L))
+ return false;
+
+ auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
+ *L->getHeader()->getParent());
+ auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
+
+ return LoopReroll(AA, LI, SE, TLI, DT, PreserveLCSSA).runOnLoop(L);
+}
+
+PreservedAnalyses LoopRerollPass::run(Loop &L, LoopAnalysisManager &AM,
+ LoopStandardAnalysisResults &AR,
+ LPMUpdater &U) {
+ return LoopReroll(&AR.AA, &AR.LI, &AR.SE, &AR.TLI, &AR.DT, true).runOnLoop(&L)
+ ? getLoopPassPreservedAnalyses()
+ : PreservedAnalyses::all();
+}
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopRotation.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopRotation.cpp
index ad1cfc68ec..252668e1d0 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopRotation.cpp
@@ -12,7 +12,7 @@
#include "llvm/Transforms/Scalar/LoopRotation.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/MemorySSA.h"
@@ -34,35 +34,35 @@ static cl::opt<unsigned> DefaultRotationThreshold(
"rotation-max-header-size", cl::init(16), cl::Hidden,
cl::desc("The default maximum header size for automatic loop rotation"));
-static cl::opt<bool> PrepareForLTOOption(
- "rotation-prepare-for-lto", cl::init(false), cl::Hidden,
- cl::desc("Run loop-rotation in the prepare-for-lto stage. This option "
- "should be used for testing only."));
-
-LoopRotatePass::LoopRotatePass(bool EnableHeaderDuplication, bool PrepareForLTO)
- : EnableHeaderDuplication(EnableHeaderDuplication),
- PrepareForLTO(PrepareForLTO) {}
+static cl::opt<bool> PrepareForLTOOption(
+ "rotation-prepare-for-lto", cl::init(false), cl::Hidden,
+ cl::desc("Run loop-rotation in the prepare-for-lto stage. This option "
+ "should be used for testing only."));
+LoopRotatePass::LoopRotatePass(bool EnableHeaderDuplication, bool PrepareForLTO)
+ : EnableHeaderDuplication(EnableHeaderDuplication),
+ PrepareForLTO(PrepareForLTO) {}
+
PreservedAnalyses LoopRotatePass::run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR,
LPMUpdater &) {
- // Vectorization requires loop-rotation. Use default threshold for loops the
- // user explicitly marked for vectorization, even when header duplication is
- // disabled.
- int Threshold = EnableHeaderDuplication ||
- hasVectorizeTransformation(&L) == TM_ForcedByUser
- ? DefaultRotationThreshold
- : 0;
+ // Vectorization requires loop-rotation. Use default threshold for loops the
+ // user explicitly marked for vectorization, even when header duplication is
+ // disabled.
+ int Threshold = EnableHeaderDuplication ||
+ hasVectorizeTransformation(&L) == TM_ForcedByUser
+ ? DefaultRotationThreshold
+ : 0;
const DataLayout &DL = L.getHeader()->getModule()->getDataLayout();
const SimplifyQuery SQ = getBestSimplifyQuery(AR, DL);
Optional<MemorySSAUpdater> MSSAU;
if (AR.MSSA)
MSSAU = MemorySSAUpdater(AR.MSSA);
- bool Changed =
- LoopRotation(&L, &AR.LI, &AR.TTI, &AR.AC, &AR.DT, &AR.SE,
- MSSAU.hasValue() ? MSSAU.getPointer() : nullptr, SQ, false,
- Threshold, false, PrepareForLTO || PrepareForLTOOption);
+ bool Changed =
+ LoopRotation(&L, &AR.LI, &AR.TTI, &AR.AC, &AR.DT, &AR.SE,
+ MSSAU.hasValue() ? MSSAU.getPointer() : nullptr, SQ, false,
+ Threshold, false, PrepareForLTO || PrepareForLTOOption);
if (!Changed)
return PreservedAnalyses::all();
@@ -80,13 +80,13 @@ namespace {
class LoopRotateLegacyPass : public LoopPass {
unsigned MaxHeaderSize;
- bool PrepareForLTO;
+ bool PrepareForLTO;
public:
static char ID; // Pass ID, replacement for typeid
- LoopRotateLegacyPass(int SpecifiedMaxHeaderSize = -1,
- bool PrepareForLTO = false)
- : LoopPass(ID), PrepareForLTO(PrepareForLTO) {
+ LoopRotateLegacyPass(int SpecifiedMaxHeaderSize = -1,
+ bool PrepareForLTO = false)
+ : LoopPass(ID), PrepareForLTO(PrepareForLTO) {
initializeLoopRotateLegacyPassPass(*PassRegistry::getPassRegistry());
if (SpecifiedMaxHeaderSize == -1)
MaxHeaderSize = DefaultRotationThreshold;
@@ -122,17 +122,17 @@ public:
if (MSSAA)
MSSAU = MemorySSAUpdater(&MSSAA->getMSSA());
}
- // Vectorization requires loop-rotation. Use default threshold for loops the
- // user explicitly marked for vectorization, even when header duplication is
- // disabled.
- int Threshold = hasVectorizeTransformation(L) == TM_ForcedByUser
- ? DefaultRotationThreshold
- : MaxHeaderSize;
-
+ // Vectorization requires loop-rotation. Use default threshold for loops the
+ // user explicitly marked for vectorization, even when header duplication is
+ // disabled.
+ int Threshold = hasVectorizeTransformation(L) == TM_ForcedByUser
+ ? DefaultRotationThreshold
+ : MaxHeaderSize;
+
return LoopRotation(L, LI, TTI, AC, &DT, &SE,
MSSAU.hasValue() ? MSSAU.getPointer() : nullptr, SQ,
- false, Threshold, false,
- PrepareForLTO || PrepareForLTOOption);
+ false, Threshold, false,
+ PrepareForLTO || PrepareForLTOOption);
}
};
} // end namespace
@@ -147,6 +147,6 @@ INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
INITIALIZE_PASS_END(LoopRotateLegacyPass, "loop-rotate", "Rotate Loops", false,
false)
-Pass *llvm::createLoopRotatePass(int MaxHeaderSize, bool PrepareForLTO) {
- return new LoopRotateLegacyPass(MaxHeaderSize, PrepareForLTO);
+Pass *llvm::createLoopRotatePass(int MaxHeaderSize, bool PrepareForLTO) {
+ return new LoopRotateLegacyPass(MaxHeaderSize, PrepareForLTO);
}
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopSimplifyCFG.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
index cc6d112208..17f99ff2a9 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
@@ -365,20 +365,20 @@ private:
unsigned DummyIdx = 1;
for (BasicBlock *BB : DeadExitBlocks) {
- // Eliminate all Phis and LandingPads from dead exits.
- // TODO: Consider removing all instructions in this dead block.
- SmallVector<Instruction *, 4> DeadInstructions;
+ // Eliminate all Phis and LandingPads from dead exits.
+ // TODO: Consider removing all instructions in this dead block.
+ SmallVector<Instruction *, 4> DeadInstructions;
for (auto &PN : BB->phis())
- DeadInstructions.push_back(&PN);
-
- if (auto *LandingPad = dyn_cast<LandingPadInst>(BB->getFirstNonPHI()))
- DeadInstructions.emplace_back(LandingPad);
-
- for (Instruction *I : DeadInstructions) {
- I->replaceAllUsesWith(UndefValue::get(I->getType()));
- I->eraseFromParent();
+ DeadInstructions.push_back(&PN);
+
+ if (auto *LandingPad = dyn_cast<LandingPadInst>(BB->getFirstNonPHI()))
+ DeadInstructions.emplace_back(LandingPad);
+
+ for (Instruction *I : DeadInstructions) {
+ I->replaceAllUsesWith(UndefValue::get(I->getType()));
+ I->eraseFromParent();
}
-
+
assert(DummyIdx != 0 && "Too many dead exits!");
DummySwitch->addCase(Builder.getInt32(DummyIdx++), BB);
DTUpdates.push_back({DominatorTree::Insert, Preheader, BB});
@@ -415,9 +415,9 @@ private:
assert(FixLCSSALoop && "Should be a loop!");
// We need all DT updates to be done before forming LCSSA.
if (MSSAU)
- MSSAU->applyUpdates(DTUpdates, DT, /*UpdateDT=*/true);
- else
- DTU.applyUpdates(DTUpdates);
+ MSSAU->applyUpdates(DTUpdates, DT, /*UpdateDT=*/true);
+ else
+ DTU.applyUpdates(DTUpdates);
DTUpdates.clear();
formLCSSARecursively(*FixLCSSALoop, DT, &LI, &SE);
}
@@ -425,7 +425,7 @@ private:
if (MSSAU) {
// Clear all updates now. Facilitates deletes that follow.
- MSSAU->applyUpdates(DTUpdates, DT, /*UpdateDT=*/true);
+ MSSAU->applyUpdates(DTUpdates, DT, /*UpdateDT=*/true);
DTUpdates.clear();
if (VerifyMemorySSA)
MSSAU->getMemorySSA()->verifyMemorySSA();
@@ -451,7 +451,7 @@ private:
if (LI.isLoopHeader(BB)) {
assert(LI.getLoopFor(BB) != &L && "Attempt to remove current loop!");
Loop *DL = LI.getLoopFor(BB);
- if (!DL->isOutermost()) {
+ if (!DL->isOutermost()) {
for (auto *PL = DL->getParentLoop(); PL; PL = PL->getParentLoop())
for (auto *BB : DL->getBlocks())
PL->removeBlockFromLoop(BB);
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopSink.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopSink.cpp
index 47698fdde6..0296b12878 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopSink.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopSink.cpp
@@ -39,8 +39,8 @@
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
-#include "llvm/Analysis/MemorySSA.h"
-#include "llvm/Analysis/MemorySSAUpdater.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/IR/Dominators.h"
@@ -69,14 +69,14 @@ static cl::opt<unsigned> MaxNumberOfUseBBsForSinking(
"max-uses-for-sinking", cl::Hidden, cl::init(30),
cl::desc("Do not sink instructions that have too many uses."));
-static cl::opt<bool> EnableMSSAInLoopSink(
- "enable-mssa-in-loop-sink", cl::Hidden, cl::init(true),
- cl::desc("Enable MemorySSA for LoopSink in new pass manager"));
-
-static cl::opt<bool> EnableMSSAInLegacyLoopSink(
- "enable-mssa-in-legacy-loop-sink", cl::Hidden, cl::init(false),
- cl::desc("Enable MemorySSA for LoopSink in legacy pass manager"));
-
+static cl::opt<bool> EnableMSSAInLoopSink(
+ "enable-mssa-in-loop-sink", cl::Hidden, cl::init(true),
+ cl::desc("Enable MemorySSA for LoopSink in new pass manager"));
+
+static cl::opt<bool> EnableMSSAInLegacyLoopSink(
+ "enable-mssa-in-legacy-loop-sink", cl::Hidden, cl::init(false),
+ cl::desc("Enable MemorySSA for LoopSink in legacy pass manager"));
+
/// Return adjusted total frequency of \p BBs.
///
/// * If there is only one BB, sinking instruction will not introduce code
@@ -182,10 +182,10 @@ findBBsToSinkInto(const Loop &L, const SmallPtrSetImpl<BasicBlock *> &UseBBs,
// sinking is successful.
// \p LoopBlockNumber is used to sort the insertion blocks to ensure
// determinism.
-static bool sinkInstruction(
- Loop &L, Instruction &I, const SmallVectorImpl<BasicBlock *> &ColdLoopBBs,
- const SmallDenseMap<BasicBlock *, int, 16> &LoopBlockNumber, LoopInfo &LI,
- DominatorTree &DT, BlockFrequencyInfo &BFI, MemorySSAUpdater *MSSAU) {
+static bool sinkInstruction(
+ Loop &L, Instruction &I, const SmallVectorImpl<BasicBlock *> &ColdLoopBBs,
+ const SmallDenseMap<BasicBlock *, int, 16> &LoopBlockNumber, LoopInfo &LI,
+ DominatorTree &DT, BlockFrequencyInfo &BFI, MemorySSAUpdater *MSSAU) {
// Compute the set of blocks in loop L which contain a use of I.
SmallPtrSet<BasicBlock *, 2> BBs;
for (auto &U : I.uses()) {
@@ -222,7 +222,7 @@ static bool sinkInstruction(
// of the loop block numbers as iterating the set doesn't give a useful
// order. No need to stable sort as the block numbers are a total ordering.
SmallVector<BasicBlock *, 2> SortedBBsToSinkInto;
- llvm::append_range(SortedBBsToSinkInto, BBsToSinkInto);
+ llvm::append_range(SortedBBsToSinkInto, BBsToSinkInto);
llvm::sort(SortedBBsToSinkInto, [&](BasicBlock *A, BasicBlock *B) {
return LoopBlockNumber.find(A)->second < LoopBlockNumber.find(B)->second;
});
@@ -238,21 +238,21 @@ static bool sinkInstruction(
Instruction *IC = I.clone();
IC->setName(I.getName());
IC->insertBefore(&*N->getFirstInsertionPt());
-
- if (MSSAU && MSSAU->getMemorySSA()->getMemoryAccess(&I)) {
- // Create a new MemoryAccess and let MemorySSA set its defining access.
- MemoryAccess *NewMemAcc =
- MSSAU->createMemoryAccessInBB(IC, nullptr, N, MemorySSA::Beginning);
- if (NewMemAcc) {
- if (auto *MemDef = dyn_cast<MemoryDef>(NewMemAcc))
- MSSAU->insertDef(MemDef, /*RenameUses=*/true);
- else {
- auto *MemUse = cast<MemoryUse>(NewMemAcc);
- MSSAU->insertUse(MemUse, /*RenameUses=*/true);
- }
- }
- }
-
+
+ if (MSSAU && MSSAU->getMemorySSA()->getMemoryAccess(&I)) {
+ // Create a new MemoryAccess and let MemorySSA set its defining access.
+ MemoryAccess *NewMemAcc =
+ MSSAU->createMemoryAccessInBB(IC, nullptr, N, MemorySSA::Beginning);
+ if (NewMemAcc) {
+ if (auto *MemDef = dyn_cast<MemoryDef>(NewMemAcc))
+ MSSAU->insertDef(MemDef, /*RenameUses=*/true);
+ else {
+ auto *MemUse = cast<MemoryUse>(NewMemAcc);
+ MSSAU->insertUse(MemUse, /*RenameUses=*/true);
+ }
+ }
+ }
+
// Replaces uses of I with IC in N
I.replaceUsesWithIf(IC, [N](Use &U) {
return cast<Instruction>(U.getUser())->getParent() == N;
@@ -267,11 +267,11 @@ static bool sinkInstruction(
NumLoopSunk++;
I.moveBefore(&*MoveBB->getFirstInsertionPt());
- if (MSSAU)
- if (MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>(
- MSSAU->getMemorySSA()->getMemoryAccess(&I)))
- MSSAU->moveToPlace(OldMemAcc, MoveBB, MemorySSA::Beginning);
-
+ if (MSSAU)
+ if (MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>(
+ MSSAU->getMemorySSA()->getMemoryAccess(&I)))
+ MSSAU->moveToPlace(OldMemAcc, MoveBB, MemorySSA::Beginning);
+
return true;
}
@@ -280,14 +280,14 @@ static bool sinkInstruction(
static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI,
DominatorTree &DT,
BlockFrequencyInfo &BFI,
- ScalarEvolution *SE,
- AliasSetTracker *CurAST,
- MemorySSA *MSSA) {
+ ScalarEvolution *SE,
+ AliasSetTracker *CurAST,
+ MemorySSA *MSSA) {
BasicBlock *Preheader = L.getLoopPreheader();
- assert(Preheader && "Expected loop to have preheader");
+ assert(Preheader && "Expected loop to have preheader");
- assert(Preheader->getParent()->hasProfileData() &&
- "Unexpected call when profile data unavailable.");
+ assert(Preheader->getParent()->hasProfileData() &&
+ "Unexpected call when profile data unavailable.");
const BlockFrequency PreheaderFreq = BFI.getBlockFreq(Preheader);
// If there are no basic blocks with lower frequency than the preheader then
@@ -298,14 +298,14 @@ static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI,
}))
return false;
- std::unique_ptr<MemorySSAUpdater> MSSAU;
- std::unique_ptr<SinkAndHoistLICMFlags> LICMFlags;
- if (MSSA) {
- MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
- LICMFlags =
- std::make_unique<SinkAndHoistLICMFlags>(/*IsSink=*/true, &L, MSSA);
- }
-
+ std::unique_ptr<MemorySSAUpdater> MSSAU;
+ std::unique_ptr<SinkAndHoistLICMFlags> LICMFlags;
+ if (MSSA) {
+ MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
+ LICMFlags =
+ std::make_unique<SinkAndHoistLICMFlags>(/*IsSink=*/true, &L, MSSA);
+ }
+
bool Changed = false;
// Sort loop's basic blocks by frequency
@@ -329,11 +329,11 @@ static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI,
// No need to check for instruction's operands are loop invariant.
assert(L.hasLoopInvariantOperands(I) &&
"Insts in a loop's preheader should have loop invariant operands!");
- if (!canSinkOrHoistInst(*I, &AA, &DT, &L, CurAST, MSSAU.get(), false,
- LICMFlags.get()))
+ if (!canSinkOrHoistInst(*I, &AA, &DT, &L, CurAST, MSSAU.get(), false,
+ LICMFlags.get()))
continue;
- if (sinkInstruction(L, *I, ColdLoopBBs, LoopBlockNumber, LI, DT, BFI,
- MSSAU.get()))
+ if (sinkInstruction(L, *I, ColdLoopBBs, LoopBlockNumber, LI, DT, BFI,
+ MSSAU.get()))
Changed = true;
}
@@ -342,13 +342,13 @@ static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI,
return Changed;
}
-static void computeAliasSet(Loop &L, BasicBlock &Preheader,
- AliasSetTracker &CurAST) {
- for (BasicBlock *BB : L.blocks())
- CurAST.add(*BB);
- CurAST.add(Preheader);
-}
-
+static void computeAliasSet(Loop &L, BasicBlock &Preheader,
+ AliasSetTracker &CurAST) {
+ for (BasicBlock *BB : L.blocks())
+ CurAST.add(*BB);
+ CurAST.add(Preheader);
+}
+
PreservedAnalyses LoopSinkPass::run(Function &F, FunctionAnalysisManager &FAM) {
LoopInfo &LI = FAM.getResult<LoopAnalysis>(F);
// Nothing to do if there are no loops.
@@ -359,10 +359,10 @@ PreservedAnalyses LoopSinkPass::run(Function &F, FunctionAnalysisManager &FAM) {
DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(F);
BlockFrequencyInfo &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
- MemorySSA *MSSA = EnableMSSAInLoopSink
- ? &FAM.getResult<MemorySSAAnalysis>(F).getMSSA()
- : nullptr;
-
+ MemorySSA *MSSA = EnableMSSAInLoopSink
+ ? &FAM.getResult<MemorySSAAnalysis>(F).getMSSA()
+ : nullptr;
+
// We want to do a postorder walk over the loops. Since loops are a tree this
// is equivalent to a reversed preorder walk and preorder is easy to compute
// without recursion. Since we reverse the preorder, we will visit siblings
@@ -374,27 +374,27 @@ PreservedAnalyses LoopSinkPass::run(Function &F, FunctionAnalysisManager &FAM) {
do {
Loop &L = *PreorderLoops.pop_back_val();
- BasicBlock *Preheader = L.getLoopPreheader();
- if (!Preheader)
- continue;
-
- // Enable LoopSink only when runtime profile is available.
- // With static profile, the sinking decision may be sub-optimal.
- if (!Preheader->getParent()->hasProfileData())
- continue;
-
- std::unique_ptr<AliasSetTracker> CurAST;
- if (!EnableMSSAInLoopSink) {
- CurAST = std::make_unique<AliasSetTracker>(AA);
- computeAliasSet(L, *Preheader, *CurAST.get());
- }
-
+ BasicBlock *Preheader = L.getLoopPreheader();
+ if (!Preheader)
+ continue;
+
+ // Enable LoopSink only when runtime profile is available.
+ // With static profile, the sinking decision may be sub-optimal.
+ if (!Preheader->getParent()->hasProfileData())
+ continue;
+
+ std::unique_ptr<AliasSetTracker> CurAST;
+ if (!EnableMSSAInLoopSink) {
+ CurAST = std::make_unique<AliasSetTracker>(AA);
+ computeAliasSet(L, *Preheader, *CurAST.get());
+ }
+
// Note that we don't pass SCEV here because it is only used to invalidate
// loops in SCEV and we don't preserve (or request) SCEV at all making that
// unnecessary.
Changed |= sinkLoopInvariantInstructions(L, AA, LI, DT, BFI,
- /*ScalarEvolution*/ nullptr,
- CurAST.get(), MSSA);
+ /*ScalarEvolution*/ nullptr,
+ CurAST.get(), MSSA);
} while (!PreorderLoops.empty());
if (!Changed)
@@ -402,14 +402,14 @@ PreservedAnalyses LoopSinkPass::run(Function &F, FunctionAnalysisManager &FAM) {
PreservedAnalyses PA;
PA.preserveSet<CFGAnalyses>();
-
- if (MSSA) {
- PA.preserve<MemorySSAAnalysis>();
-
- if (VerifyMemorySSA)
- MSSA->verifyMemorySSA();
- }
-
+
+ if (MSSA) {
+ PA.preserve<MemorySSAAnalysis>();
+
+ if (VerifyMemorySSA)
+ MSSA->verifyMemorySSA();
+ }
+
return PA;
}
@@ -424,46 +424,46 @@ struct LegacyLoopSinkPass : public LoopPass {
if (skipLoop(L))
return false;
- BasicBlock *Preheader = L->getLoopPreheader();
- if (!Preheader)
- return false;
-
- // Enable LoopSink only when runtime profile is available.
- // With static profile, the sinking decision may be sub-optimal.
- if (!Preheader->getParent()->hasProfileData())
- return false;
-
- AAResults &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader)
+ return false;
+
+ // Enable LoopSink only when runtime profile is available.
+ // With static profile, the sinking decision may be sub-optimal.
+ if (!Preheader->getParent()->hasProfileData())
+ return false;
+
+ AAResults &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
auto *SE = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
- std::unique_ptr<AliasSetTracker> CurAST;
- MemorySSA *MSSA = nullptr;
- if (EnableMSSAInLegacyLoopSink)
- MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
- else {
- CurAST = std::make_unique<AliasSetTracker>(AA);
- computeAliasSet(*L, *Preheader, *CurAST.get());
- }
-
- bool Changed = sinkLoopInvariantInstructions(
- *L, AA, getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
+ std::unique_ptr<AliasSetTracker> CurAST;
+ MemorySSA *MSSA = nullptr;
+ if (EnableMSSAInLegacyLoopSink)
+ MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
+ else {
+ CurAST = std::make_unique<AliasSetTracker>(AA);
+ computeAliasSet(*L, *Preheader, *CurAST.get());
+ }
+
+ bool Changed = sinkLoopInvariantInstructions(
+ *L, AA, getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI(),
- SE ? &SE->getSE() : nullptr, CurAST.get(), MSSA);
-
- if (MSSA && VerifyMemorySSA)
- MSSA->verifyMemorySSA();
-
- return Changed;
+ SE ? &SE->getSE() : nullptr, CurAST.get(), MSSA);
+
+ if (MSSA && VerifyMemorySSA)
+ MSSA->verifyMemorySSA();
+
+ return Changed;
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<BlockFrequencyInfoWrapperPass>();
getLoopAnalysisUsage(AU);
- if (EnableMSSAInLegacyLoopSink) {
- AU.addRequired<MemorySSAWrapperPass>();
- AU.addPreserved<MemorySSAWrapperPass>();
- }
+ if (EnableMSSAInLegacyLoopSink) {
+ AU.addRequired<MemorySSAWrapperPass>();
+ AU.addPreserved<MemorySSAWrapperPass>();
+ }
}
};
}
@@ -473,7 +473,7 @@ INITIALIZE_PASS_BEGIN(LegacyLoopSinkPass, "loop-sink", "Loop Sink", false,
false)
INITIALIZE_PASS_DEPENDENCY(LoopPass)
INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
INITIALIZE_PASS_END(LegacyLoopSinkPass, "loop-sink", "Loop Sink", false, false)
Pass *llvm::createLoopSinkPass() { return new LegacyLoopSinkPass(); }
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 5dec9b5420..2b2f30340a 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -75,13 +75,13 @@
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/ScalarEvolutionNormalization.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/GlobalValue.h"
@@ -424,7 +424,7 @@ static void DoInitialMatch(const SCEV *S, Loop *L,
// Handle a multiplication by -1 (negation) if it didn't fold.
if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S))
if (Mul->getOperand(0)->isAllOnesValue()) {
- SmallVector<const SCEV *, 4> Ops(drop_begin(Mul->operands()));
+ SmallVector<const SCEV *, 4> Ops(drop_begin(Mul->operands()));
const SCEV *NewMul = SE.getMulExpr(Ops);
SmallVector<const SCEV *, 4> MyGood;
@@ -485,10 +485,10 @@ bool Formula::isCanonical(const Loop &L) const {
// If ScaledReg is not a recurrent expr, or it is but its loop is not current
// loop, meanwhile BaseRegs contains a recurrent expr reg related with current
// loop, we want to swap the reg in BaseRegs with ScaledReg.
- auto I = find_if(BaseRegs, [&](const SCEV *S) {
- return isa<const SCEVAddRecExpr>(S) &&
- (cast<SCEVAddRecExpr>(S)->getLoop() == &L);
- });
+ auto I = find_if(BaseRegs, [&](const SCEV *S) {
+ return isa<const SCEVAddRecExpr>(S) &&
+ (cast<SCEVAddRecExpr>(S)->getLoop() == &L);
+ });
return I == BaseRegs.end();
}
@@ -507,7 +507,7 @@ void Formula::canonicalize(const Loop &L) {
// Keep the invariant sum in BaseRegs and one of the variant sum in ScaledReg.
if (!ScaledReg) {
- ScaledReg = BaseRegs.pop_back_val();
+ ScaledReg = BaseRegs.pop_back_val();
Scale = 1;
}
@@ -516,10 +516,10 @@ void Formula::canonicalize(const Loop &L) {
// reg with ScaledReg.
const SCEVAddRecExpr *SAR = dyn_cast<const SCEVAddRecExpr>(ScaledReg);
if (!SAR || SAR->getLoop() != &L) {
- auto I = find_if(BaseRegs, [&](const SCEV *S) {
- return isa<const SCEVAddRecExpr>(S) &&
- (cast<SCEVAddRecExpr>(S)->getLoop() == &L);
- });
+ auto I = find_if(BaseRegs, [&](const SCEV *S) {
+ return isa<const SCEVAddRecExpr>(S) &&
+ (cast<SCEVAddRecExpr>(S)->getLoop() == &L);
+ });
if (I != BaseRegs.end())
std::swap(ScaledReg, *I);
}
@@ -752,13 +752,13 @@ static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) {
return C->getValue()->getSExtValue();
}
} else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
- SmallVector<const SCEV *, 8> NewOps(Add->operands());
+ SmallVector<const SCEV *, 8> NewOps(Add->operands());
int64_t Result = ExtractImmediate(NewOps.front(), SE);
if (Result != 0)
S = SE.getAddExpr(NewOps);
return Result;
} else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
- SmallVector<const SCEV *, 8> NewOps(AR->operands());
+ SmallVector<const SCEV *, 8> NewOps(AR->operands());
int64_t Result = ExtractImmediate(NewOps.front(), SE);
if (Result != 0)
S = SE.getAddRecExpr(NewOps, AR->getLoop(),
@@ -778,13 +778,13 @@ static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) {
return GV;
}
} else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
- SmallVector<const SCEV *, 8> NewOps(Add->operands());
+ SmallVector<const SCEV *, 8> NewOps(Add->operands());
GlobalValue *Result = ExtractSymbol(NewOps.back(), SE);
if (Result)
S = SE.getAddExpr(NewOps);
return Result;
} else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
- SmallVector<const SCEV *, 8> NewOps(AR->operands());
+ SmallVector<const SCEV *, 8> NewOps(AR->operands());
GlobalValue *Result = ExtractSymbol(NewOps.front(), SE);
if (Result)
S = SE.getAddRecExpr(NewOps, AR->getLoop(),
@@ -934,8 +934,8 @@ static bool isHighCostExpansion(const SCEV *S,
case scSignExtend:
return isHighCostExpansion(cast<SCEVSignExtendExpr>(S)->getOperand(),
Processed, SE);
- default:
- break;
+ default:
+ break;
}
if (!Processed.insert(S).second)
@@ -1211,7 +1211,7 @@ static unsigned getSetupCost(const SCEV *Reg, unsigned Depth) {
return 0;
if (const auto *S = dyn_cast<SCEVAddRecExpr>(Reg))
return getSetupCost(S->getStart(), Depth - 1);
- if (auto S = dyn_cast<SCEVIntegralCastExpr>(Reg))
+ if (auto S = dyn_cast<SCEVIntegralCastExpr>(Reg))
return getSetupCost(S->getOperand(), Depth - 1);
if (auto S = dyn_cast<SCEVNAryExpr>(Reg))
return std::accumulate(S->op_begin(), S->op_end(), 0,
@@ -2787,7 +2787,7 @@ static const SCEV *getExprBase(const SCEV *S) {
case scAddRecExpr:
return getExprBase(cast<SCEVAddRecExpr>(S)->getStart());
}
- llvm_unreachable("Unknown SCEV kind!");
+ llvm_unreachable("Unknown SCEV kind!");
}
/// Return true if the chain increment is profitable to expand into a loop
@@ -3402,7 +3402,7 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S))
Worklist.append(N->op_begin(), N->op_end());
- else if (const SCEVIntegralCastExpr *C = dyn_cast<SCEVIntegralCastExpr>(S))
+ else if (const SCEVIntegralCastExpr *C = dyn_cast<SCEVIntegralCastExpr>(S))
Worklist.push_back(C->getOperand());
else if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
Worklist.push_back(D->getLHS());
@@ -3835,14 +3835,14 @@ void LSRInstance::GenerateConstantOffsetsImpl(
F.BaseOffset = (uint64_t)F.BaseOffset + Imm;
if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
return;
- if (IsScaledReg) {
+ if (IsScaledReg) {
F.ScaledReg = G;
- } else {
+ } else {
F.BaseRegs[Idx] = G;
- // We may generate non canonical Formula if G is a recurrent expr reg
- // related with current loop while F.ScaledReg is not.
- F.canonicalize(*L);
- }
+ // We may generate non canonical Formula if G is a recurrent expr reg
+ // related with current loop while F.ScaledReg is not.
+ F.canonicalize(*L);
+ }
(void)InsertFormula(LU, LUIdx, F);
}
@@ -5383,11 +5383,11 @@ void LSRInstance::RewriteForPHI(
// Split the critical edge.
BasicBlock *NewBB = nullptr;
if (!Parent->isLandingPad()) {
- NewBB =
- SplitCriticalEdge(BB, Parent,
- CriticalEdgeSplittingOptions(&DT, &LI, MSSAU)
- .setMergeIdenticalEdges()
- .setKeepOneInputPHIs());
+ NewBB =
+ SplitCriticalEdge(BB, Parent,
+ CriticalEdgeSplittingOptions(&DT, &LI, MSSAU)
+ .setMergeIdenticalEdges()
+ .setKeepOneInputPHIs());
} else {
SmallVector<BasicBlock*, 2> NewBBs;
SplitLandingPadPredecessors(Parent, BB, "", "", NewBBs, &DT, &LI);
@@ -5520,8 +5520,8 @@ void LSRInstance::ImplementSolution(
// we can remove them after we are done working.
SmallVector<WeakTrackingVH, 16> DeadInsts;
- SCEVExpander Rewriter(SE, L->getHeader()->getModule()->getDataLayout(), "lsr",
- false);
+ SCEVExpander Rewriter(SE, L->getHeader()->getModule()->getDataLayout(), "lsr",
+ false);
#ifndef NDEBUG
Rewriter.setDebugType(DEBUG_TYPE);
#endif
@@ -5620,19 +5620,19 @@ LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE,
if (IU.empty()) return;
// Skip nested loops until we can model them better with formulae.
- if (!L->isInnermost()) {
+ if (!L->isInnermost()) {
LLVM_DEBUG(dbgs() << "LSR skipping outer loop " << *L << "\n");
return;
}
// Start collecting data and preparing for the solver.
- // If number of registers is not the major cost, we cannot benefit from the
- // current profitable chain optimization which is based on number of
- // registers.
- // FIXME: add profitable chain optimization for other kinds major cost, for
- // example number of instructions.
- if (TTI.isNumRegsMajorCostOfLSR() || StressIVChain)
- CollectChains();
+ // If number of registers is not the major cost, we cannot benefit from the
+ // current profitable chain optimization which is based on number of
+ // registers.
+ // FIXME: add profitable chain optimization for other kinds major cost, for
+ // example number of instructions.
+ if (TTI.isNumRegsMajorCostOfLSR() || StressIVChain)
+ CollectChains();
CollectInterestingTypesAndFactors();
CollectFixupsAndInitialFormulae();
CollectLoopInvariantFixupsAndFormulae();
@@ -5772,63 +5772,63 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<MemorySSAWrapperPass>();
}
-using EqualValues = SmallVector<std::tuple<WeakVH, int64_t, DIExpression *>, 4>;
-using EqualValuesMap = DenseMap<DbgValueInst *, EqualValues>;
-
-static void DbgGatherEqualValues(Loop *L, ScalarEvolution &SE,
- EqualValuesMap &DbgValueToEqualSet) {
- for (auto &B : L->getBlocks()) {
- for (auto &I : *B) {
- auto DVI = dyn_cast<DbgValueInst>(&I);
- if (!DVI)
- continue;
- auto V = DVI->getVariableLocation();
- if (!V || !SE.isSCEVable(V->getType()))
- continue;
- auto DbgValueSCEV = SE.getSCEV(V);
- EqualValues EqSet;
- for (PHINode &Phi : L->getHeader()->phis()) {
- if (V->getType() != Phi.getType())
- continue;
- if (!SE.isSCEVable(Phi.getType()))
- continue;
- auto PhiSCEV = SE.getSCEV(&Phi);
- Optional<APInt> Offset =
- SE.computeConstantDifference(DbgValueSCEV, PhiSCEV);
- if (Offset && Offset->getMinSignedBits() <= 64)
- EqSet.emplace_back(std::make_tuple(
- &Phi, Offset.getValue().getSExtValue(), DVI->getExpression()));
- }
- DbgValueToEqualSet[DVI] = std::move(EqSet);
- }
- }
-}
-
-static void DbgApplyEqualValues(EqualValuesMap &DbgValueToEqualSet) {
- for (auto A : DbgValueToEqualSet) {
- auto DVI = A.first;
- // Only update those that are now undef.
- if (!isa_and_nonnull<UndefValue>(DVI->getVariableLocation()))
- continue;
- for (auto EV : A.second) {
- auto V = std::get<WeakVH>(EV);
- if (!V)
- continue;
- auto DbgDIExpr = std::get<DIExpression *>(EV);
- auto Offset = std::get<int64_t>(EV);
- auto &Ctx = DVI->getContext();
- DVI->setOperand(0, MetadataAsValue::get(Ctx, ValueAsMetadata::get(V)));
- if (Offset) {
- SmallVector<uint64_t, 8> Ops;
- DIExpression::appendOffset(Ops, Offset);
- DbgDIExpr = DIExpression::prependOpcodes(DbgDIExpr, Ops, true);
- }
- DVI->setOperand(2, MetadataAsValue::get(Ctx, DbgDIExpr));
- break;
- }
- }
-}
-
+using EqualValues = SmallVector<std::tuple<WeakVH, int64_t, DIExpression *>, 4>;
+using EqualValuesMap = DenseMap<DbgValueInst *, EqualValues>;
+
+static void DbgGatherEqualValues(Loop *L, ScalarEvolution &SE,
+ EqualValuesMap &DbgValueToEqualSet) {
+ for (auto &B : L->getBlocks()) {
+ for (auto &I : *B) {
+ auto DVI = dyn_cast<DbgValueInst>(&I);
+ if (!DVI)
+ continue;
+ auto V = DVI->getVariableLocation();
+ if (!V || !SE.isSCEVable(V->getType()))
+ continue;
+ auto DbgValueSCEV = SE.getSCEV(V);
+ EqualValues EqSet;
+ for (PHINode &Phi : L->getHeader()->phis()) {
+ if (V->getType() != Phi.getType())
+ continue;
+ if (!SE.isSCEVable(Phi.getType()))
+ continue;
+ auto PhiSCEV = SE.getSCEV(&Phi);
+ Optional<APInt> Offset =
+ SE.computeConstantDifference(DbgValueSCEV, PhiSCEV);
+ if (Offset && Offset->getMinSignedBits() <= 64)
+ EqSet.emplace_back(std::make_tuple(
+ &Phi, Offset.getValue().getSExtValue(), DVI->getExpression()));
+ }
+ DbgValueToEqualSet[DVI] = std::move(EqSet);
+ }
+ }
+}
+
+static void DbgApplyEqualValues(EqualValuesMap &DbgValueToEqualSet) {
+ for (auto A : DbgValueToEqualSet) {
+ auto DVI = A.first;
+ // Only update those that are now undef.
+ if (!isa_and_nonnull<UndefValue>(DVI->getVariableLocation()))
+ continue;
+ for (auto EV : A.second) {
+ auto V = std::get<WeakVH>(EV);
+ if (!V)
+ continue;
+ auto DbgDIExpr = std::get<DIExpression *>(EV);
+ auto Offset = std::get<int64_t>(EV);
+ auto &Ctx = DVI->getContext();
+ DVI->setOperand(0, MetadataAsValue::get(Ctx, ValueAsMetadata::get(V)));
+ if (Offset) {
+ SmallVector<uint64_t, 8> Ops;
+ DIExpression::appendOffset(Ops, Offset);
+ DbgDIExpr = DIExpression::prependOpcodes(DbgDIExpr, Ops, true);
+ }
+ DVI->setOperand(2, MetadataAsValue::get(Ctx, DbgDIExpr));
+ break;
+ }
+ }
+}
+
static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
DominatorTree &DT, LoopInfo &LI,
const TargetTransformInfo &TTI,
@@ -5844,17 +5844,17 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
Changed |=
LSRInstance(L, IU, SE, DT, LI, TTI, AC, TLI, MSSAU.get()).getChanged();
- // Debug preservation - before we start removing anything create equivalence
- // sets for the llvm.dbg.value intrinsics.
- EqualValuesMap DbgValueToEqualSet;
- DbgGatherEqualValues(L, SE, DbgValueToEqualSet);
-
+ // Debug preservation - before we start removing anything create equivalence
+ // sets for the llvm.dbg.value intrinsics.
+ EqualValuesMap DbgValueToEqualSet;
+ DbgGatherEqualValues(L, SE, DbgValueToEqualSet);
+
// Remove any extra phis created by processing inner loops.
Changed |= DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get());
if (EnablePhiElim && L->isLoopSimplifyForm()) {
SmallVector<WeakTrackingVH, 16> DeadInsts;
const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
- SCEVExpander Rewriter(SE, DL, "lsr", false);
+ SCEVExpander Rewriter(SE, DL, "lsr", false);
#ifndef NDEBUG
Rewriter.setDebugType(DEBUG_TYPE);
#endif
@@ -5866,9 +5866,9 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get());
}
}
-
- DbgApplyEqualValues(DbgValueToEqualSet);
-
+
+ DbgApplyEqualValues(DbgValueToEqualSet);
+
return Changed;
}
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
index 495906e1a7..d65e9dd059 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
@@ -41,7 +41,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/LoopPeel.h"
+#include "llvm/Transforms/Utils/LoopPeel.h"
#include "llvm/Transforms/Utils/LoopSimplify.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/UnrollLoop.h"
@@ -288,13 +288,13 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
None, None, None, None, None);
TargetTransformInfo::PeelingPreferences PP =
gatherPeelingPreferences(L, SE, TTI, None, None);
-
- TransformationMode EnableMode = hasUnrollAndJamTransformation(L);
- if (EnableMode & TM_Disable)
- return LoopUnrollResult::Unmodified;
- if (EnableMode & TM_ForcedByUser)
- UP.UnrollAndJam = true;
-
+
+ TransformationMode EnableMode = hasUnrollAndJamTransformation(L);
+ if (EnableMode & TM_Disable)
+ return LoopUnrollResult::Unmodified;
+ if (EnableMode & TM_ForcedByUser)
+ UP.UnrollAndJam = true;
+
if (AllowUnrollAndJam.getNumOccurrences() > 0)
UP.UnrollAndJam = AllowUnrollAndJam;
if (UnrollAndJamThreshold.getNumOccurrences() > 0)
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopUnrollPass.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 1b974576a3..de36dce3a0 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -56,7 +56,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Transforms/Utils.h"
-#include "llvm/Transforms/Utils/LoopPeel.h"
+#include "llvm/Transforms/Utils/LoopPeel.h"
#include "llvm/Transforms/Utils/LoopSimplify.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/SizeOpts.h"
@@ -76,19 +76,19 @@ using namespace llvm;
cl::opt<bool> llvm::ForgetSCEVInLoopUnroll(
"forget-scev-loop-unroll", cl::init(false), cl::Hidden,
cl::desc("Forget everything in SCEV when doing LoopUnroll, instead of just"
- " the current top-most loop. This is sometimes preferred to reduce"
+ " the current top-most loop. This is sometimes preferred to reduce"
" compile time."));
static cl::opt<unsigned>
UnrollThreshold("unroll-threshold", cl::Hidden,
cl::desc("The cost threshold for loop unrolling"));
-static cl::opt<unsigned>
- UnrollOptSizeThreshold(
- "unroll-optsize-threshold", cl::init(0), cl::Hidden,
- cl::desc("The cost threshold for loop unrolling when optimizing for "
- "size"));
-
+static cl::opt<unsigned>
+ UnrollOptSizeThreshold(
+ "unroll-optsize-threshold", cl::init(0), cl::Hidden,
+ cl::desc("The cost threshold for loop unrolling when optimizing for "
+ "size"));
+
static cl::opt<unsigned> UnrollPartialThreshold(
"unroll-partial-threshold", cl::Hidden,
cl::desc("The cost threshold for partial loop unrolling"));
@@ -194,9 +194,9 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
UP.Threshold =
OptLevel > 2 ? UnrollThresholdAggressive : UnrollThresholdDefault;
UP.MaxPercentThresholdBoost = 400;
- UP.OptSizeThreshold = UnrollOptSizeThreshold;
+ UP.OptSizeThreshold = UnrollOptSizeThreshold;
UP.PartialThreshold = 150;
- UP.PartialOptSizeThreshold = UnrollOptSizeThreshold;
+ UP.PartialOptSizeThreshold = UnrollOptSizeThreshold;
UP.Count = 0;
UP.DefaultUnrollRuntimeCount = 8;
UP.MaxCount = std::numeric_limits<unsigned>::max();
@@ -218,10 +218,10 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
// Apply size attributes
bool OptForSize = L->getHeader()->getParent()->hasOptSize() ||
- // Let unroll hints / pragmas take precedence over PGSO.
- (hasUnrollTransformation(L) != TM_ForcedByUser &&
- llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI,
- PGSOQueryType::IRPass));
+ // Let unroll hints / pragmas take precedence over PGSO.
+ (hasUnrollTransformation(L) != TM_ForcedByUser &&
+ llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI,
+ PGSOQueryType::IRPass));
if (OptForSize) {
UP.Threshold = UP.OptSizeThreshold;
UP.PartialThreshold = UP.PartialOptSizeThreshold;
@@ -347,7 +347,7 @@ static Optional<EstimatedUnrollCost> analyzeLoopUnrollCost(
// Only analyze inner loops. We can't properly estimate cost of nested loops
// and we won't visit inner loops again anyway.
- if (!L->isInnermost())
+ if (!L->isInnermost())
return None;
// Don't simulate loops with a big or unknown tripcount
@@ -389,10 +389,10 @@ static Optional<EstimatedUnrollCost> analyzeLoopUnrollCost(
assert(CostWorklist.empty() && "Must start with an empty cost list");
assert(PHIUsedList.empty() && "Must start with an empty phi used list");
CostWorklist.push_back(&RootI);
- TargetTransformInfo::TargetCostKind CostKind =
- RootI.getFunction()->hasMinSize() ?
- TargetTransformInfo::TCK_CodeSize :
- TargetTransformInfo::TCK_SizeAndLatency;
+ TargetTransformInfo::TargetCostKind CostKind =
+ RootI.getFunction()->hasMinSize() ?
+ TargetTransformInfo::TCK_CodeSize :
+ TargetTransformInfo::TCK_SizeAndLatency;
for (;; --Iteration) {
do {
Instruction *I = CostWorklist.pop_back_val();
@@ -433,7 +433,7 @@ static Optional<EstimatedUnrollCost> analyzeLoopUnrollCost(
// First accumulate the cost of this instruction.
if (!Cost.IsFree) {
- UnrolledCost += TTI.getUserCost(I, CostKind);
+ UnrolledCost += TTI.getUserCost(I, CostKind);
LLVM_DEBUG(dbgs() << "Adding cost of instruction (iteration "
<< Iteration << "): ");
LLVM_DEBUG(I->dump());
@@ -473,9 +473,9 @@ static Optional<EstimatedUnrollCost> analyzeLoopUnrollCost(
LLVM_DEBUG(dbgs() << "Starting LoopUnroll profitability analysis...\n");
- TargetTransformInfo::TargetCostKind CostKind =
- L->getHeader()->getParent()->hasMinSize() ?
- TargetTransformInfo::TCK_CodeSize : TargetTransformInfo::TCK_SizeAndLatency;
+ TargetTransformInfo::TargetCostKind CostKind =
+ L->getHeader()->getParent()->hasMinSize() ?
+ TargetTransformInfo::TCK_CodeSize : TargetTransformInfo::TCK_SizeAndLatency;
// Simulate execution of each iteration of the loop counting instructions,
// which would be simplified.
// Since the same load will take different values on different iterations,
@@ -529,7 +529,7 @@ static Optional<EstimatedUnrollCost> analyzeLoopUnrollCost(
// Track this instruction's expected baseline cost when executing the
// rolled loop form.
- RolledDynamicCost += TTI.getUserCost(&I, CostKind);
+ RolledDynamicCost += TTI.getUserCost(&I, CostKind);
// Visit the instruction to analyze its loop cost after unrolling,
// and if the visitor returns true, mark the instruction as free after
@@ -851,7 +851,7 @@ bool llvm::computeUnrollCount(
}
// 4th priority is loop peeling.
- computePeelCount(L, LoopSize, PP, TripCount, SE, UP.Threshold);
+ computePeelCount(L, LoopSize, PP, TripCount, SE, UP.Threshold);
if (PP.PeelCount) {
UP.Runtime = false;
UP.Count = 1;
@@ -1043,7 +1043,7 @@ static LoopUnrollResult tryToUnrollLoop(
return LoopUnrollResult::Unmodified;
}
- // When automatic unrolling is disabled, do not unroll unless overridden for
+ // When automatic unrolling is disabled, do not unroll unless overridden for
// this loop.
if (OnlyWhenForced && !(TM & TM_Enable))
return LoopUnrollResult::Unmodified;
@@ -1057,7 +1057,7 @@ static LoopUnrollResult tryToUnrollLoop(
ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound,
ProvidedFullUnrollMaxCount);
TargetTransformInfo::PeelingPreferences PP = gatherPeelingPreferences(
- L, SE, TTI, ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling, true);
+ L, SE, TTI, ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling, true);
// Exit early if unrolling is disabled. For OptForSize, we pick the loop size
// as threshold later on.
@@ -1105,7 +1105,7 @@ static LoopUnrollResult tryToUnrollLoop(
// If the loop contains a convergent operation, the prelude we'd add
// to do the first few instructions before we hit the unrolled loop
// is unsafe -- it adds a control-flow dependency to the convergent
- // operation. Therefore restrict remainder loop (try unrolling without).
+ // operation. Therefore restrict remainder loop (try unrolling without).
//
// TODO: This is quite conservative. In practice, convergent_op()
// is likely to be called unconditionally in the loop. In this
@@ -1301,7 +1301,7 @@ Pass *llvm::createLoopUnrollPass(int OptLevel, bool OnlyWhenForced,
Pass *llvm::createSimpleLoopUnrollPass(int OptLevel, bool OnlyWhenForced,
bool ForgetAllSCEV) {
return createLoopUnrollPass(OptLevel, OnlyWhenForced, ForgetAllSCEV, -1, -1,
- 0, 0, 0, 1);
+ 0, 0, 0, 1);
}
PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
@@ -1329,7 +1329,7 @@ PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
OnlyWhenForced, ForgetSCEV, /*Count*/ None,
/*Threshold*/ None, /*AllowPartial*/ false,
/*Runtime*/ false, /*UpperBound*/ false,
- /*AllowPeeling*/ true,
+ /*AllowPeeling*/ true,
/*AllowProfileBasedPeeling*/ false,
/*FullUnrollMaxCount*/ None) !=
LoopUnrollResult::Unmodified;
@@ -1371,7 +1371,7 @@ PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
}
// Otherwise erase the loop from the list if it was in the old loops.
- return OldLoops.contains(SibLoop);
+ return OldLoops.contains(SibLoop);
});
Updater.addSiblingLoops(SibLoops);
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopUnswitch.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopUnswitch.cpp
index 822a786fc7..843be6cbb9 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -32,7 +32,7 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
+#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
@@ -99,12 +99,12 @@ static cl::opt<unsigned>
Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"),
cl::init(100), cl::Hidden);
-static cl::opt<unsigned>
- MSSAThreshold("loop-unswitch-memoryssa-threshold",
- cl::desc("Max number of memory uses to explore during "
- "partial unswitching analysis"),
- cl::init(100), cl::Hidden);
-
+static cl::opt<unsigned>
+ MSSAThreshold("loop-unswitch-memoryssa-threshold",
+ cl::desc("Max number of memory uses to explore during "
+ "partial unswitching analysis"),
+ cl::init(100), cl::Hidden);
+
namespace {
class LUAnalysisCache {
@@ -191,7 +191,7 @@ namespace {
Loop *CurrentLoop = nullptr;
DominatorTree *DT = nullptr;
MemorySSA *MSSA = nullptr;
- AAResults *AA = nullptr;
+ AAResults *AA = nullptr;
std::unique_ptr<MemorySSAUpdater> MSSAU;
BasicBlock *LoopHeader = nullptr;
BasicBlock *LoopPreheader = nullptr;
@@ -225,10 +225,10 @@ namespace {
/// loop preheaders be inserted into the CFG.
///
void getAnalysisUsage(AnalysisUsage &AU) const override {
- // Lazy BFI and BPI are marked as preserved here so Loop Unswitching
- // can remain part of the same loop pass as LICM
- AU.addPreserved<LazyBlockFrequencyInfoPass>();
- AU.addPreserved<LazyBranchProbabilityInfoPass>();
+ // Lazy BFI and BPI are marked as preserved here so Loop Unswitching
+ // can remain part of the same loop pass as LICM
+ AU.addPreserved<LazyBlockFrequencyInfoPass>();
+ AU.addPreserved<LazyBranchProbabilityInfoPass>();
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<TargetTransformInfoWrapperPass>();
if (EnableMSSALoopDependency) {
@@ -256,22 +256,22 @@ namespace {
bool tryTrivialLoopUnswitch(bool &Changed);
bool unswitchIfProfitable(Value *LoopCond, Constant *Val,
- Instruction *TI = nullptr,
- ArrayRef<Instruction *> ToDuplicate = {});
+ Instruction *TI = nullptr,
+ ArrayRef<Instruction *> ToDuplicate = {});
void unswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val,
BasicBlock *ExitBlock, Instruction *TI);
void unswitchNontrivialCondition(Value *LIC, Constant *OnVal, Loop *L,
- Instruction *TI,
- ArrayRef<Instruction *> ToDuplicate = {});
+ Instruction *TI,
+ ArrayRef<Instruction *> ToDuplicate = {});
void rewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
Constant *Val, bool IsEqual);
- void
- emitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
- BasicBlock *TrueDest, BasicBlock *FalseDest,
- BranchInst *OldBranch, Instruction *TI,
- ArrayRef<Instruction *> ToDuplicate = {});
+ void
+ emitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
+ BasicBlock *TrueDest, BasicBlock *FalseDest,
+ BranchInst *OldBranch, Instruction *TI,
+ ArrayRef<Instruction *> ToDuplicate = {});
void simplifyCode(std::vector<Instruction *> &Worklist, Loop *L);
@@ -538,7 +538,7 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPMRef) {
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
LPM = &LPMRef;
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
if (EnableMSSALoopDependency) {
MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
@@ -640,145 +640,145 @@ static bool equalityPropUnSafe(Value &LoopCond) {
return false;
}
-/// Check if the loop header has a conditional branch that is not
-/// loop-invariant, because it involves load instructions. If all paths from
-/// either the true or false successor to the header or loop exists do not
-/// modify the memory feeding the condition, perform 'partial unswitching'. That
-/// is, duplicate the instructions feeding the condition in the pre-header. Then
-/// unswitch on the duplicated condition. The condition is now known in the
-/// unswitched version for the 'invariant' path through the original loop.
-///
-/// If the branch condition of the header is partially invariant, return a pair
-/// containing the instructions to duplicate and a boolean Constant to update
-/// the condition in the loops created for the true or false successors.
-static std::pair<SmallVector<Instruction *, 4>, Constant *>
-hasPartialIVCondition(Loop *L, MemorySSA &MSSA, AAResults *AA) {
- SmallVector<Instruction *, 4> ToDuplicate;
-
- auto *TI = dyn_cast<BranchInst>(L->getHeader()->getTerminator());
- if (!TI || !TI->isConditional())
- return {};
-
- auto *CondI = dyn_cast<CmpInst>(TI->getCondition());
- // The case with the condition outside the loop should already be handled
- // earlier.
- if (!CondI || !L->contains(CondI))
- return {};
-
- ToDuplicate.push_back(CondI);
-
- SmallVector<Value *, 4> WorkList;
- WorkList.append(CondI->op_begin(), CondI->op_end());
-
- SmallVector<MemoryAccess *, 4> AccessesToCheck;
- SmallVector<MemoryLocation, 4> AccessedLocs;
- while (!WorkList.empty()) {
- Instruction *I = dyn_cast<Instruction>(WorkList.pop_back_val());
- if (!I || !L->contains(I))
- continue;
-
- // TODO: support additional instructions.
- if (!isa<LoadInst>(I) && !isa<GetElementPtrInst>(I))
- return {};
-
- // Do not duplicate volatile and atomic loads.
- if (auto *LI = dyn_cast<LoadInst>(I))
- if (LI->isVolatile() || LI->isAtomic())
- return {};
-
- ToDuplicate.push_back(I);
- if (MemoryAccess *MA = MSSA.getMemoryAccess(I)) {
- if (auto *MemUse = dyn_cast_or_null<MemoryUse>(MA)) {
- // Queue the defining access to check for alias checks.
- AccessesToCheck.push_back(MemUse->getDefiningAccess());
- AccessedLocs.push_back(MemoryLocation::get(I));
- } else {
- // MemoryDefs may clobber the location or may be atomic memory
- // operations. Bail out.
- return {};
- }
- }
- WorkList.append(I->op_begin(), I->op_end());
- }
-
- if (ToDuplicate.size() <= 1)
- return {};
-
- auto HasNoClobbersOnPath =
- [L, AA, &AccessedLocs](BasicBlock *Succ, BasicBlock *Header,
- SmallVector<MemoryAccess *, 4> AccessesToCheck) {
- // First, collect all blocks in the loop that are on a patch from Succ
- // to the header.
- SmallVector<BasicBlock *, 4> WorkList;
- WorkList.push_back(Succ);
- WorkList.push_back(Header);
- SmallPtrSet<BasicBlock *, 4> Seen;
- Seen.insert(Header);
- while (!WorkList.empty()) {
- BasicBlock *Current = WorkList.pop_back_val();
- if (!L->contains(Current))
- continue;
- const auto &SeenIns = Seen.insert(Current);
- if (!SeenIns.second)
- continue;
-
- WorkList.append(succ_begin(Current), succ_end(Current));
- }
-
- // Require at least 2 blocks on a path through the loop. This skips
- // paths that directly exit the loop.
- if (Seen.size() < 2)
- return false;
-
- // Next, check if there are any MemoryDefs that are on the path through
- // the loop (in the Seen set) and they may-alias any of the locations in
- // AccessedLocs. If that is the case, they may modify the condition and
- // partial unswitching is not possible.
- SmallPtrSet<MemoryAccess *, 4> SeenAccesses;
- while (!AccessesToCheck.empty()) {
- MemoryAccess *Current = AccessesToCheck.pop_back_val();
- auto SeenI = SeenAccesses.insert(Current);
- if (!SeenI.second || !Seen.contains(Current->getBlock()))
- continue;
-
- // Bail out if exceeded the threshold.
- if (SeenAccesses.size() >= MSSAThreshold)
- return false;
-
- // MemoryUse are read-only accesses.
- if (isa<MemoryUse>(Current))
- continue;
-
- // For a MemoryDef, check if is aliases any of the location feeding
- // the original condition.
- if (auto *CurrentDef = dyn_cast<MemoryDef>(Current)) {
- if (any_of(AccessedLocs, [AA, CurrentDef](MemoryLocation &Loc) {
- return isModSet(
- AA->getModRefInfo(CurrentDef->getMemoryInst(), Loc));
- }))
- return false;
- }
-
- for (Use &U : Current->uses())
- AccessesToCheck.push_back(cast<MemoryAccess>(U.getUser()));
- }
-
- return true;
- };
-
- // If we branch to the same successor, partial unswitching will not be
- // beneficial.
- if (TI->getSuccessor(0) == TI->getSuccessor(1))
- return {};
-
- if (HasNoClobbersOnPath(TI->getSuccessor(0), L->getHeader(), AccessesToCheck))
- return {ToDuplicate, ConstantInt::getTrue(TI->getContext())};
- if (HasNoClobbersOnPath(TI->getSuccessor(1), L->getHeader(), AccessesToCheck))
- return {ToDuplicate, ConstantInt::getFalse(TI->getContext())};
-
- return {};
-}
-
+/// Check if the loop header has a conditional branch that is not
+/// loop-invariant, because it involves load instructions. If all paths from
+/// either the true or false successor to the header or loop exists do not
+/// modify the memory feeding the condition, perform 'partial unswitching'. That
+/// is, duplicate the instructions feeding the condition in the pre-header. Then
+/// unswitch on the duplicated condition. The condition is now known in the
+/// unswitched version for the 'invariant' path through the original loop.
+///
+/// If the branch condition of the header is partially invariant, return a pair
+/// containing the instructions to duplicate and a boolean Constant to update
+/// the condition in the loops created for the true or false successors.
+static std::pair<SmallVector<Instruction *, 4>, Constant *>
+hasPartialIVCondition(Loop *L, MemorySSA &MSSA, AAResults *AA) {
+ SmallVector<Instruction *, 4> ToDuplicate;
+
+ auto *TI = dyn_cast<BranchInst>(L->getHeader()->getTerminator());
+ if (!TI || !TI->isConditional())
+ return {};
+
+ auto *CondI = dyn_cast<CmpInst>(TI->getCondition());
+ // The case with the condition outside the loop should already be handled
+ // earlier.
+ if (!CondI || !L->contains(CondI))
+ return {};
+
+ ToDuplicate.push_back(CondI);
+
+ SmallVector<Value *, 4> WorkList;
+ WorkList.append(CondI->op_begin(), CondI->op_end());
+
+ SmallVector<MemoryAccess *, 4> AccessesToCheck;
+ SmallVector<MemoryLocation, 4> AccessedLocs;
+ while (!WorkList.empty()) {
+ Instruction *I = dyn_cast<Instruction>(WorkList.pop_back_val());
+ if (!I || !L->contains(I))
+ continue;
+
+ // TODO: support additional instructions.
+ if (!isa<LoadInst>(I) && !isa<GetElementPtrInst>(I))
+ return {};
+
+ // Do not duplicate volatile and atomic loads.
+ if (auto *LI = dyn_cast<LoadInst>(I))
+ if (LI->isVolatile() || LI->isAtomic())
+ return {};
+
+ ToDuplicate.push_back(I);
+ if (MemoryAccess *MA = MSSA.getMemoryAccess(I)) {
+ if (auto *MemUse = dyn_cast_or_null<MemoryUse>(MA)) {
+ // Queue the defining access to check for alias checks.
+ AccessesToCheck.push_back(MemUse->getDefiningAccess());
+ AccessedLocs.push_back(MemoryLocation::get(I));
+ } else {
+ // MemoryDefs may clobber the location or may be atomic memory
+ // operations. Bail out.
+ return {};
+ }
+ }
+ WorkList.append(I->op_begin(), I->op_end());
+ }
+
+ if (ToDuplicate.size() <= 1)
+ return {};
+
+ auto HasNoClobbersOnPath =
+ [L, AA, &AccessedLocs](BasicBlock *Succ, BasicBlock *Header,
+ SmallVector<MemoryAccess *, 4> AccessesToCheck) {
+ // First, collect all blocks in the loop that are on a patch from Succ
+ // to the header.
+ SmallVector<BasicBlock *, 4> WorkList;
+ WorkList.push_back(Succ);
+ WorkList.push_back(Header);
+ SmallPtrSet<BasicBlock *, 4> Seen;
+ Seen.insert(Header);
+ while (!WorkList.empty()) {
+ BasicBlock *Current = WorkList.pop_back_val();
+ if (!L->contains(Current))
+ continue;
+ const auto &SeenIns = Seen.insert(Current);
+ if (!SeenIns.second)
+ continue;
+
+ WorkList.append(succ_begin(Current), succ_end(Current));
+ }
+
+ // Require at least 2 blocks on a path through the loop. This skips
+ // paths that directly exit the loop.
+ if (Seen.size() < 2)
+ return false;
+
+ // Next, check if there are any MemoryDefs that are on the path through
+ // the loop (in the Seen set) and they may-alias any of the locations in
+ // AccessedLocs. If that is the case, they may modify the condition and
+ // partial unswitching is not possible.
+ SmallPtrSet<MemoryAccess *, 4> SeenAccesses;
+ while (!AccessesToCheck.empty()) {
+ MemoryAccess *Current = AccessesToCheck.pop_back_val();
+ auto SeenI = SeenAccesses.insert(Current);
+ if (!SeenI.second || !Seen.contains(Current->getBlock()))
+ continue;
+
+ // Bail out if exceeded the threshold.
+ if (SeenAccesses.size() >= MSSAThreshold)
+ return false;
+
+ // MemoryUse are read-only accesses.
+ if (isa<MemoryUse>(Current))
+ continue;
+
+ // For a MemoryDef, check if is aliases any of the location feeding
+ // the original condition.
+ if (auto *CurrentDef = dyn_cast<MemoryDef>(Current)) {
+ if (any_of(AccessedLocs, [AA, CurrentDef](MemoryLocation &Loc) {
+ return isModSet(
+ AA->getModRefInfo(CurrentDef->getMemoryInst(), Loc));
+ }))
+ return false;
+ }
+
+ for (Use &U : Current->uses())
+ AccessesToCheck.push_back(cast<MemoryAccess>(U.getUser()));
+ }
+
+ return true;
+ };
+
+ // If we branch to the same successor, partial unswitching will not be
+ // beneficial.
+ if (TI->getSuccessor(0) == TI->getSuccessor(1))
+ return {};
+
+ if (HasNoClobbersOnPath(TI->getSuccessor(0), L->getHeader(), AccessesToCheck))
+ return {ToDuplicate, ConstantInt::getTrue(TI->getContext())};
+ if (HasNoClobbersOnPath(TI->getSuccessor(1), L->getHeader(), AccessesToCheck))
+ return {ToDuplicate, ConstantInt::getFalse(TI->getContext())};
+
+ return {};
+}
+
/// Do actual work and unswitch loop if possible and profitable.
bool LoopUnswitch::processCurrentLoop() {
bool Changed = false;
@@ -816,7 +816,7 @@ bool LoopUnswitch::processCurrentLoop() {
// FIXME: Use Function::hasOptSize().
if (OptimizeForSize ||
LoopHeader->getParent()->hasFnAttribute(Attribute::OptimizeForSize))
- return Changed;
+ return Changed;
// Run through the instructions in the loop, keeping track of three things:
//
@@ -840,10 +840,10 @@ bool LoopUnswitch::processCurrentLoop() {
if (!CB)
continue;
if (CB->isConvergent())
- return Changed;
+ return Changed;
if (auto *II = dyn_cast<InvokeInst>(&I))
if (!II->getUnwindDest()->canSplitPredecessors())
- return Changed;
+ return Changed;
if (auto *II = dyn_cast<IntrinsicInst>(&I))
if (II->getIntrinsicID() == Intrinsic::experimental_guard)
Guards.push_back(II);
@@ -978,28 +978,28 @@ bool LoopUnswitch::processCurrentLoop() {
}
}
}
-
- // Check if there is a header condition that is invariant along the patch from
- // either the true or false successors to the header. This allows unswitching
- // conditions depending on memory accesses, if there's a path not clobbering
- // the memory locations. Check if this transform has been disabled using
- // metadata, to avoid unswitching the same loop multiple times.
- if (MSSA &&
- !findOptionMDForLoop(CurrentLoop, "llvm.loop.unswitch.partial.disable")) {
- auto ToDuplicate = hasPartialIVCondition(CurrentLoop, *MSSA, AA);
- if (!ToDuplicate.first.empty()) {
- LLVM_DEBUG(dbgs() << "loop-unswitch: Found partially invariant condition "
- << *ToDuplicate.first[0] << "\n");
- ++NumBranches;
- unswitchIfProfitable(ToDuplicate.first[0], ToDuplicate.second,
- CurrentLoop->getHeader()->getTerminator(),
- ToDuplicate.first);
-
- RedoLoop = false;
- return true;
- }
- }
-
+
+ // Check if there is a header condition that is invariant along the patch from
+ // either the true or false successors to the header. This allows unswitching
+ // conditions depending on memory accesses, if there's a path not clobbering
+ // the memory locations. Check if this transform has been disabled using
+ // metadata, to avoid unswitching the same loop multiple times.
+ if (MSSA &&
+ !findOptionMDForLoop(CurrentLoop, "llvm.loop.unswitch.partial.disable")) {
+ auto ToDuplicate = hasPartialIVCondition(CurrentLoop, *MSSA, AA);
+ if (!ToDuplicate.first.empty()) {
+ LLVM_DEBUG(dbgs() << "loop-unswitch: Found partially invariant condition "
+ << *ToDuplicate.first[0] << "\n");
+ ++NumBranches;
+ unswitchIfProfitable(ToDuplicate.first[0], ToDuplicate.second,
+ CurrentLoop->getHeader()->getTerminator(),
+ ToDuplicate.first);
+
+ RedoLoop = false;
+ return true;
+ }
+ }
+
return Changed;
}
@@ -1057,8 +1057,8 @@ static BasicBlock *isTrivialLoopExitBlock(Loop *L, BasicBlock *BB) {
/// simplify the loop. If we decide that this is profitable,
/// unswitch the loop, reprocess the pieces, then return true.
bool LoopUnswitch::unswitchIfProfitable(Value *LoopCond, Constant *Val,
- Instruction *TI,
- ArrayRef<Instruction *> ToDuplicate) {
+ Instruction *TI,
+ ArrayRef<Instruction *> ToDuplicate) {
// Check to see if it would be profitable to unswitch current loop.
if (!BranchesInfo.costAllowsUnswitching()) {
LLVM_DEBUG(dbgs() << "NOT unswitching loop %"
@@ -1078,69 +1078,69 @@ bool LoopUnswitch::unswitchIfProfitable(Value *LoopCond, Constant *Val,
return false;
}
- unswitchNontrivialCondition(LoopCond, Val, CurrentLoop, TI, ToDuplicate);
+ unswitchNontrivialCondition(LoopCond, Val, CurrentLoop, TI, ToDuplicate);
return true;
}
/// Emit a conditional branch on two values if LIC == Val, branch to TrueDst,
/// otherwise branch to FalseDest. Insert the code immediately before OldBranch
/// and remove (but not erase!) it from the function.
-void LoopUnswitch::emitPreheaderBranchOnCondition(
- Value *LIC, Constant *Val, BasicBlock *TrueDest, BasicBlock *FalseDest,
- BranchInst *OldBranch, Instruction *TI,
- ArrayRef<Instruction *> ToDuplicate) {
+void LoopUnswitch::emitPreheaderBranchOnCondition(
+ Value *LIC, Constant *Val, BasicBlock *TrueDest, BasicBlock *FalseDest,
+ BranchInst *OldBranch, Instruction *TI,
+ ArrayRef<Instruction *> ToDuplicate) {
assert(OldBranch->isUnconditional() && "Preheader is not split correctly");
assert(TrueDest != FalseDest && "Branch targets should be different");
-
+
// Insert a conditional branch on LIC to the two preheaders. The original
// code is the true version and the new code is the false version.
Value *BranchVal = LIC;
bool Swapped = false;
-
- if (!ToDuplicate.empty()) {
- ValueToValueMapTy Old2New;
- for (Instruction *I : reverse(ToDuplicate)) {
- auto *New = I->clone();
- New->insertBefore(OldBranch);
- RemapInstruction(New, Old2New,
- RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
- Old2New[I] = New;
-
- if (MSSAU) {
- MemorySSA *MSSA = MSSAU->getMemorySSA();
- auto *MemA = dyn_cast_or_null<MemoryUse>(MSSA->getMemoryAccess(I));
- if (!MemA)
- continue;
-
- Loop *L = LI->getLoopFor(I->getParent());
- auto *DefiningAccess = MemA->getDefiningAccess();
- // Get the first defining access before the loop.
- while (L->contains(DefiningAccess->getBlock())) {
- // If the defining access is a MemoryPhi, get the incoming
- // value for the pre-header as defining access.
- if (auto *MemPhi = dyn_cast<MemoryPhi>(DefiningAccess)) {
- DefiningAccess =
- MemPhi->getIncomingValueForBlock(L->getLoopPreheader());
- } else {
- DefiningAccess =
- cast<MemoryDef>(DefiningAccess)->getDefiningAccess();
- }
- }
- MSSAU->createMemoryAccessInBB(New, DefiningAccess, New->getParent(),
- MemorySSA::BeforeTerminator);
- }
- }
- BranchVal = Old2New[ToDuplicate[0]];
- } else {
-
- if (!isa<ConstantInt>(Val) ||
- Val->getType() != Type::getInt1Ty(LIC->getContext()))
- BranchVal = new ICmpInst(OldBranch, ICmpInst::ICMP_EQ, LIC, Val);
- else if (Val != ConstantInt::getTrue(Val->getContext())) {
- // We want to enter the new loop when the condition is true.
- std::swap(TrueDest, FalseDest);
- Swapped = true;
- }
+
+ if (!ToDuplicate.empty()) {
+ ValueToValueMapTy Old2New;
+ for (Instruction *I : reverse(ToDuplicate)) {
+ auto *New = I->clone();
+ New->insertBefore(OldBranch);
+ RemapInstruction(New, Old2New,
+ RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
+ Old2New[I] = New;
+
+ if (MSSAU) {
+ MemorySSA *MSSA = MSSAU->getMemorySSA();
+ auto *MemA = dyn_cast_or_null<MemoryUse>(MSSA->getMemoryAccess(I));
+ if (!MemA)
+ continue;
+
+ Loop *L = LI->getLoopFor(I->getParent());
+ auto *DefiningAccess = MemA->getDefiningAccess();
+ // Get the first defining access before the loop.
+ while (L->contains(DefiningAccess->getBlock())) {
+ // If the defining access is a MemoryPhi, get the incoming
+ // value for the pre-header as defining access.
+ if (auto *MemPhi = dyn_cast<MemoryPhi>(DefiningAccess)) {
+ DefiningAccess =
+ MemPhi->getIncomingValueForBlock(L->getLoopPreheader());
+ } else {
+ DefiningAccess =
+ cast<MemoryDef>(DefiningAccess)->getDefiningAccess();
+ }
+ }
+ MSSAU->createMemoryAccessInBB(New, DefiningAccess, New->getParent(),
+ MemorySSA::BeforeTerminator);
+ }
+ }
+ BranchVal = Old2New[ToDuplicate[0]];
+ } else {
+
+ if (!isa<ConstantInt>(Val) ||
+ Val->getType() != Type::getInt1Ty(LIC->getContext()))
+ BranchVal = new ICmpInst(OldBranch, ICmpInst::ICMP_EQ, LIC, Val);
+ else if (Val != ConstantInt::getTrue(Val->getContext())) {
+ // We want to enter the new loop when the condition is true.
+ std::swap(TrueDest, FalseDest);
+ Swapped = true;
+ }
}
// Old branch will be removed, so save its parent and successor to update the
@@ -1173,9 +1173,9 @@ void LoopUnswitch::emitPreheaderBranchOnCondition(
}
if (MSSAU)
- MSSAU->applyUpdates(Updates, *DT, /*UpdateDT=*/true);
- else
- DT->applyUpdates(Updates);
+ MSSAU->applyUpdates(Updates, *DT, /*UpdateDT=*/true);
+ else
+ DT->applyUpdates(Updates);
}
// If either edge is critical, split it. This helps preserve LoopSimplify
@@ -1424,9 +1424,9 @@ void LoopUnswitch::splitExitEdges(
/// We determined that the loop is profitable to unswitch when LIC equal Val.
/// Split it into loop versions and test the condition outside of either loop.
/// Return the loops created as Out1/Out2.
-void LoopUnswitch::unswitchNontrivialCondition(
- Value *LIC, Constant *Val, Loop *L, Instruction *TI,
- ArrayRef<Instruction *> ToDuplicate) {
+void LoopUnswitch::unswitchNontrivialCondition(
+ Value *LIC, Constant *Val, Loop *L, Instruction *TI,
+ ArrayRef<Instruction *> ToDuplicate) {
Function *F = LoopHeader->getParent();
LLVM_DEBUG(dbgs() << "loop-unswitch: Unswitching loop %"
<< LoopHeader->getName() << " [" << L->getBlocks().size()
@@ -1451,7 +1451,7 @@ void LoopUnswitch::unswitchNontrivialCondition(
LoopBlocks.push_back(NewPreheader);
// We want the loop to come after the preheader, but before the exit blocks.
- llvm::append_range(LoopBlocks, L->blocks());
+ llvm::append_range(LoopBlocks, L->blocks());
SmallVector<BasicBlock*, 8> ExitBlocks;
L->getUniqueExitBlocks(ExitBlocks);
@@ -1465,7 +1465,7 @@ void LoopUnswitch::unswitchNontrivialCondition(
L->getUniqueExitBlocks(ExitBlocks);
// Add exit blocks to the loop blocks.
- llvm::append_range(LoopBlocks, ExitBlocks);
+ llvm::append_range(LoopBlocks, ExitBlocks);
// Next step, clone all of the basic blocks that make up the loop (including
// the loop preheader and exit blocks), keeping track of the mapping between
@@ -1558,7 +1558,7 @@ void LoopUnswitch::unswitchNontrivialCondition(
// Emit the new branch that selects between the two versions of this loop.
emitPreheaderBranchOnCondition(LIC, Val, NewBlocks[0], LoopBlocks[0], OldBR,
- TI, ToDuplicate);
+ TI, ToDuplicate);
if (MSSAU) {
// Update MemoryPhis in Exit blocks.
MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMap, *DT);
@@ -1580,39 +1580,39 @@ void LoopUnswitch::unswitchNontrivialCondition(
// iteration.
WeakTrackingVH LICHandle(LIC);
- if (ToDuplicate.empty()) {
- // Now we rewrite the original code to know that the condition is true and
- // the new code to know that the condition is false.
- rewriteLoopBodyWithConditionConstant(L, LIC, Val, /*IsEqual=*/false);
-
- // It's possible that simplifying one loop could cause the other to be
- // changed to another value or a constant. If its a constant, don't
- // simplify it.
- if (!LoopProcessWorklist.empty() && LoopProcessWorklist.back() == NewLoop &&
- LICHandle && !isa<Constant>(LICHandle))
- rewriteLoopBodyWithConditionConstant(NewLoop, LICHandle, Val,
- /*IsEqual=*/true);
- } else {
- // Partial unswitching. Update the condition in the right loop with the
- // constant.
- auto *CC = cast<ConstantInt>(Val);
- if (CC->isOneValue()) {
- rewriteLoopBodyWithConditionConstant(NewLoop, VMap[LIC], Val,
- /*IsEqual=*/true);
- } else
- rewriteLoopBodyWithConditionConstant(L, LIC, Val, /*IsEqual=*/true);
-
- // Mark the new loop as partially unswitched, to avoid unswitching on the
- // same condition again.
- auto &Context = NewLoop->getHeader()->getContext();
- MDNode *DisableUnswitchMD = MDNode::get(
- Context, MDString::get(Context, "llvm.loop.unswitch.partial.disable"));
- MDNode *NewLoopID = makePostTransformationMetadata(
- Context, L->getLoopID(), {"llvm.loop.unswitch.partial"},
- {DisableUnswitchMD});
- NewLoop->setLoopID(NewLoopID);
- }
-
+ if (ToDuplicate.empty()) {
+ // Now we rewrite the original code to know that the condition is true and
+ // the new code to know that the condition is false.
+ rewriteLoopBodyWithConditionConstant(L, LIC, Val, /*IsEqual=*/false);
+
+ // It's possible that simplifying one loop could cause the other to be
+ // changed to another value or a constant. If its a constant, don't
+ // simplify it.
+ if (!LoopProcessWorklist.empty() && LoopProcessWorklist.back() == NewLoop &&
+ LICHandle && !isa<Constant>(LICHandle))
+ rewriteLoopBodyWithConditionConstant(NewLoop, LICHandle, Val,
+ /*IsEqual=*/true);
+ } else {
+ // Partial unswitching. Update the condition in the right loop with the
+ // constant.
+ auto *CC = cast<ConstantInt>(Val);
+ if (CC->isOneValue()) {
+ rewriteLoopBodyWithConditionConstant(NewLoop, VMap[LIC], Val,
+ /*IsEqual=*/true);
+ } else
+ rewriteLoopBodyWithConditionConstant(L, LIC, Val, /*IsEqual=*/true);
+
+ // Mark the new loop as partially unswitched, to avoid unswitching on the
+ // same condition again.
+ auto &Context = NewLoop->getHeader()->getContext();
+ MDNode *DisableUnswitchMD = MDNode::get(
+ Context, MDString::get(Context, "llvm.loop.unswitch.partial.disable"));
+ MDNode *NewLoopID = makePostTransformationMetadata(
+ Context, L->getLoopID(), {"llvm.loop.unswitch.partial"},
+ {DisableUnswitchMD});
+ NewLoop->setLoopID(NewLoopID);
+ }
+
if (MSSA && VerifyMemorySSA)
MSSA->verifyMemorySSA();
}
@@ -1620,7 +1620,7 @@ void LoopUnswitch::unswitchNontrivialCondition(
/// Remove all instances of I from the worklist vector specified.
static void removeFromWorklist(Instruction *I,
std::vector<Instruction *> &Worklist) {
- llvm::erase_value(Worklist, I);
+ llvm::erase_value(Worklist, I);
}
/// When we find that I really equals V, remove I from the
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopVersioningLICM.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopVersioningLICM.cpp
index 2ff1e84807..b1a41e0c9d 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/LoopVersioningLICM.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/LoopVersioningLICM.cpp
@@ -59,7 +59,7 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar/LoopVersioningLICM.h"
+#include "llvm/Transforms/Scalar/LoopVersioningLICM.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AliasAnalysis.h"
@@ -115,18 +115,18 @@ static cl::opt<unsigned> LVLoopDepthThreshold(
namespace {
-struct LoopVersioningLICMLegacyPass : public LoopPass {
+struct LoopVersioningLICMLegacyPass : public LoopPass {
static char ID;
- LoopVersioningLICMLegacyPass() : LoopPass(ID) {
- initializeLoopVersioningLICMLegacyPassPass(
- *PassRegistry::getPassRegistry());
+ LoopVersioningLICMLegacyPass() : LoopPass(ID) {
+ initializeLoopVersioningLICMLegacyPassPass(
+ *PassRegistry::getPassRegistry());
}
bool runOnLoop(Loop *L, LPPassManager &LPM) override;
- StringRef getPassName() const override { return "Loop Versioning for LICM"; }
-
+ StringRef getPassName() const override { return "Loop Versioning for LICM"; }
+
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<AAResultsWrapperPass>();
@@ -140,22 +140,22 @@ struct LoopVersioningLICMLegacyPass : public LoopPass {
AU.addPreserved<GlobalsAAWrapperPass>();
AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
}
-};
-
-struct LoopVersioningLICM {
- // We don't explicitly pass in LoopAccessInfo to the constructor since the
- // loop versioning might return early due to instructions that are not safe
- // for versioning. By passing the proxy instead the construction of
- // LoopAccessInfo will take place only when it's necessary.
- LoopVersioningLICM(AliasAnalysis *AA, ScalarEvolution *SE,
- OptimizationRemarkEmitter *ORE,
- function_ref<const LoopAccessInfo &(Loop *)> GetLAI)
- : AA(AA), SE(SE), GetLAI(GetLAI),
- LoopDepthThreshold(LVLoopDepthThreshold),
- InvariantThreshold(LVInvarThreshold), ORE(ORE) {}
-
- bool runOnLoop(Loop *L, LoopInfo *LI, DominatorTree *DT);
-
+};
+
+struct LoopVersioningLICM {
+ // We don't explicitly pass in LoopAccessInfo to the constructor since the
+ // loop versioning might return early due to instructions that are not safe
+ // for versioning. By passing the proxy instead the construction of
+ // LoopAccessInfo will take place only when it's necessary.
+ LoopVersioningLICM(AliasAnalysis *AA, ScalarEvolution *SE,
+ OptimizationRemarkEmitter *ORE,
+ function_ref<const LoopAccessInfo &(Loop *)> GetLAI)
+ : AA(AA), SE(SE), GetLAI(GetLAI),
+ LoopDepthThreshold(LVLoopDepthThreshold),
+ InvariantThreshold(LVInvarThreshold), ORE(ORE) {}
+
+ bool runOnLoop(Loop *L, LoopInfo *LI, DominatorTree *DT);
+
void reset() {
AA = nullptr;
SE = nullptr;
@@ -186,9 +186,9 @@ private:
// Current Loop's LoopAccessInfo
const LoopAccessInfo *LAI = nullptr;
- // Proxy for retrieving LoopAccessInfo.
- function_ref<const LoopAccessInfo &(Loop *)> GetLAI;
-
+ // Proxy for retrieving LoopAccessInfo.
+ function_ref<const LoopAccessInfo &(Loop *)> GetLAI;
+
// The current loop we are working on.
Loop *CurLoop = nullptr;
@@ -267,7 +267,7 @@ bool LoopVersioningLICM::legalLoopStructure() {
// We need to be able to compute the loop trip count in order
// to generate the bound checks.
const SCEV *ExitCount = SE->getBackedgeTakenCount(CurLoop);
- if (isa<SCEVCouldNotCompute>(ExitCount)) {
+ if (isa<SCEVCouldNotCompute>(ExitCount)) {
LLVM_DEBUG(dbgs() << " loop does not has trip count\n");
return false;
}
@@ -414,8 +414,8 @@ bool LoopVersioningLICM::legalLoopInstructions() {
return false;
}
}
- // Get LoopAccessInfo from current loop via the proxy.
- LAI = &GetLAI(CurLoop);
+ // Get LoopAccessInfo from current loop via the proxy.
+ LAI = &GetLAI(CurLoop);
// Check LoopAccessInfo for need of runtime check.
if (LAI->getRuntimePointerChecking()->getChecks().empty()) {
LLVM_DEBUG(dbgs() << " LAA: Runtime check not found !!\n");
@@ -554,7 +554,7 @@ void LoopVersioningLICM::setNoAliasToLoop(Loop *VerLoop) {
MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain("LVDomain");
StringRef Name = "LVAliasScope";
MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name);
- SmallVector<Metadata *, 4> Scopes{NewScope}, NoAliases{NewScope};
+ SmallVector<Metadata *, 4> Scopes{NewScope}, NoAliases{NewScope};
// Iterate over each instruction of loop.
// set no-alias for all load & store instructions.
for (auto *Block : CurLoop->getBlocks()) {
@@ -576,25 +576,25 @@ void LoopVersioningLICM::setNoAliasToLoop(Loop *VerLoop) {
}
}
-bool LoopVersioningLICMLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
- if (skipLoop(L))
- return false;
-
- AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
- ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
- OptimizationRemarkEmitter *ORE =
- &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
- LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-
- auto GetLAI = [&](Loop *L) -> const LoopAccessInfo & {
- return getAnalysis<LoopAccessLegacyAnalysis>().getInfo(L);
- };
-
- return LoopVersioningLICM(AA, SE, ORE, GetLAI).runOnLoop(L, LI, DT);
-}
-
-bool LoopVersioningLICM::runOnLoop(Loop *L, LoopInfo *LI, DominatorTree *DT) {
+bool LoopVersioningLICMLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
+ if (skipLoop(L))
+ return false;
+
+ AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ OptimizationRemarkEmitter *ORE =
+ &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
+ LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+
+ auto GetLAI = [&](Loop *L) -> const LoopAccessInfo & {
+ return getAnalysis<LoopAccessLegacyAnalysis>().getInfo(L);
+ };
+
+ return LoopVersioningLICM(AA, SE, ORE, GetLAI).runOnLoop(L, LI, DT);
+}
+
+bool LoopVersioningLICM::runOnLoop(Loop *L, LoopInfo *LI, DominatorTree *DT) {
// This will automatically release all resources hold by the current
// LoopVersioningLICM object.
AutoResetter Resetter(*this);
@@ -622,8 +622,8 @@ bool LoopVersioningLICM::runOnLoop(Loop *L, LoopInfo *LI, DominatorTree *DT) {
// Do loop versioning.
// Create memcheck for memory accessed inside loop.
// Clone original loop, and set blocks properly.
- LoopVersioning LVer(*LAI, LAI->getRuntimePointerChecking()->getChecks(),
- CurLoop, LI, DT, SE);
+ LoopVersioning LVer(*LAI, LAI->getRuntimePointerChecking()->getChecks(),
+ CurLoop, LI, DT, SE);
LVer.versionLoop();
// Set Loop Versioning metaData for original loop.
addStringMetadataToLoop(LVer.getNonVersionedLoop(), LICMVersioningMetaData);
@@ -641,9 +641,9 @@ bool LoopVersioningLICM::runOnLoop(Loop *L, LoopInfo *LI, DominatorTree *DT) {
return Changed;
}
-char LoopVersioningLICMLegacyPass::ID = 0;
+char LoopVersioningLICMLegacyPass::ID = 0;
-INITIALIZE_PASS_BEGIN(LoopVersioningLICMLegacyPass, "loop-versioning-licm",
+INITIALIZE_PASS_BEGIN(LoopVersioningLICMLegacyPass, "loop-versioning-licm",
"Loop Versioning For LICM", false, false)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
@@ -654,31 +654,31 @@ INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
-INITIALIZE_PASS_END(LoopVersioningLICMLegacyPass, "loop-versioning-licm",
+INITIALIZE_PASS_END(LoopVersioningLICMLegacyPass, "loop-versioning-licm",
"Loop Versioning For LICM", false, false)
-Pass *llvm::createLoopVersioningLICMPass() {
- return new LoopVersioningLICMLegacyPass();
-}
-
-namespace llvm {
-
-PreservedAnalyses LoopVersioningLICMPass::run(Loop &L, LoopAnalysisManager &AM,
- LoopStandardAnalysisResults &LAR,
- LPMUpdater &U) {
- AliasAnalysis *AA = &LAR.AA;
- ScalarEvolution *SE = &LAR.SE;
- DominatorTree *DT = &LAR.DT;
- LoopInfo *LI = &LAR.LI;
- const Function *F = L.getHeader()->getParent();
- OptimizationRemarkEmitter ORE(F);
-
- auto GetLAI = [&](Loop *L) -> const LoopAccessInfo & {
- return AM.getResult<LoopAccessAnalysis>(*L, LAR);
- };
-
- if (!LoopVersioningLICM(AA, SE, &ORE, GetLAI).runOnLoop(&L, LI, DT))
- return PreservedAnalyses::all();
- return getLoopPassPreservedAnalyses();
-}
-} // namespace llvm
+Pass *llvm::createLoopVersioningLICMPass() {
+ return new LoopVersioningLICMLegacyPass();
+}
+
+namespace llvm {
+
+PreservedAnalyses LoopVersioningLICMPass::run(Loop &L, LoopAnalysisManager &AM,
+ LoopStandardAnalysisResults &LAR,
+ LPMUpdater &U) {
+ AliasAnalysis *AA = &LAR.AA;
+ ScalarEvolution *SE = &LAR.SE;
+ DominatorTree *DT = &LAR.DT;
+ LoopInfo *LI = &LAR.LI;
+ const Function *F = L.getHeader()->getParent();
+ OptimizationRemarkEmitter ORE(F);
+
+ auto GetLAI = [&](Loop *L) -> const LoopAccessInfo & {
+ return AM.getResult<LoopAccessAnalysis>(*L, LAR);
+ };
+
+ if (!LoopVersioningLICM(AA, SE, &ORE, GetLAI).runOnLoop(&L, LI, DT))
+ return PreservedAnalyses::all();
+ return getLoopPassPreservedAnalyses();
+}
+} // namespace llvm
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp
index bb30c48127..c17c903dd2 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp
@@ -43,10 +43,10 @@ STATISTIC(ObjectSizeIntrinsicsHandled,
"Number of 'objectsize' intrinsic calls handled");
static Value *lowerIsConstantIntrinsic(IntrinsicInst *II) {
- if (auto *C = dyn_cast<Constant>(II->getOperand(0)))
- if (C->isManifestConstant())
- return ConstantInt::getTrue(II->getType());
- return ConstantInt::getFalse(II->getType());
+ if (auto *C = dyn_cast<Constant>(II->getOperand(0)))
+ if (C->isManifestConstant())
+ return ConstantInt::getTrue(II->getType());
+ return ConstantInt::getFalse(II->getType());
}
static bool replaceConditionalBranchesOnConstant(Instruction *II,
@@ -78,7 +78,7 @@ static bool replaceConditionalBranchesOnConstant(Instruction *II,
Other->removePredecessor(Source);
BI->eraseFromParent();
BranchInst::Create(Target, Source);
- if (pred_empty(Other))
+ if (pred_empty(Other))
HasDeadBlocks = true;
}
}
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
index da13075dfe..98b6adee87 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
@@ -46,10 +46,10 @@ STATISTIC(ExpectIntrinsicsHandled,
// 'select' instructions. It may be worthwhile to hoist these values to some
// shared space, so they can be used directly by other passes.
-cl::opt<uint32_t> llvm::LikelyBranchWeight(
+cl::opt<uint32_t> llvm::LikelyBranchWeight(
"likely-branch-weight", cl::Hidden, cl::init(2000),
cl::desc("Weight of the branch likely to be taken (default = 2000)"));
-cl::opt<uint32_t> llvm::UnlikelyBranchWeight(
+cl::opt<uint32_t> llvm::UnlikelyBranchWeight(
"unlikely-branch-weight", cl::Hidden, cl::init(1),
cl::desc("Weight of the branch unlikely to be taken (default = 1)"));
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
index 8e251ca940..d9f8c9f83d 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
@@ -42,8 +42,8 @@
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/LoopUtils.h"
-#include "llvm/Transforms/Utils/MatrixUtils.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Transforms/Utils/MatrixUtils.h"
using namespace llvm;
using namespace PatternMatch;
@@ -63,9 +63,9 @@ static cl::opt<unsigned> TileSize(
"fuse-matrix-tile-size", cl::init(4), cl::Hidden,
cl::desc(
"Tile size for matrix instruction fusion using square-shaped tiles."));
-static cl::opt<bool> TileUseLoops("fuse-matrix-use-loops", cl::init(false),
- cl::Hidden,
- cl::desc("Generate loop nest for tiling."));
+static cl::opt<bool> TileUseLoops("fuse-matrix-use-loops", cl::init(false),
+ cl::Hidden,
+ cl::desc("Generate loop nest for tiling."));
static cl::opt<bool> ForceFusion(
"force-fuse-matrix", cl::init(false), cl::Hidden,
cl::desc("Force matrix instruction fusion even if not profitable."));
@@ -187,10 +187,10 @@ class LowerMatrixIntrinsics {
Function &Func;
const DataLayout &DL;
const TargetTransformInfo &TTI;
- AliasAnalysis *AA;
- DominatorTree *DT;
- LoopInfo *LI;
- OptimizationRemarkEmitter *ORE;
+ AliasAnalysis *AA;
+ DominatorTree *DT;
+ LoopInfo *LI;
+ OptimizationRemarkEmitter *ORE;
/// Contains estimates of the number of operations (loads, stores, compute) required to lower a matrix operation.
struct OpInfoTy {
@@ -246,7 +246,7 @@ class LowerMatrixIntrinsics {
void setVector(unsigned i, Value *V) { Vectors[i] = V; }
- Type *getElementType() const { return getVectorTy()->getElementType(); }
+ Type *getElementType() const { return getVectorTy()->getElementType(); }
unsigned getNumVectors() const {
if (isColumnMajor())
@@ -276,7 +276,7 @@ class LowerMatrixIntrinsics {
return getVectorTy();
}
- VectorType *getVectorTy() const {
+ VectorType *getVectorTy() const {
return cast<VectorType>(Vectors[0]->getType());
}
@@ -335,7 +335,7 @@ class LowerMatrixIntrinsics {
IRBuilder<> &Builder) const {
Value *Vec = isColumnMajor() ? getColumn(J) : getRow(I);
return Builder.CreateShuffleVector(
- Vec, createSequentialMask(isColumnMajor() ? I : J, NumElts, 0),
+ Vec, createSequentialMask(isColumnMajor() ? I : J, NumElts, 0),
"block");
}
};
@@ -397,8 +397,8 @@ class LowerMatrixIntrinsics {
public:
LowerMatrixIntrinsics(Function &F, TargetTransformInfo &TTI,
- AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI,
- OptimizationRemarkEmitter *ORE)
+ AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI,
+ OptimizationRemarkEmitter *ORE)
: Func(F), DL(F.getParent()->getDataLayout()), TTI(TTI), AA(AA), DT(DT),
LI(LI), ORE(ORE) {}
@@ -450,7 +450,7 @@ public:
MaskStart < cast<FixedVectorType>(VType)->getNumElements();
MaskStart += SI.getStride()) {
Value *V = Builder.CreateShuffleVector(
- MatrixVal, createSequentialMask(MaskStart, SI.getStride(), 0),
+ MatrixVal, createSequentialMask(MaskStart, SI.getStride(), 0),
"split");
SplitVecs.push_back(V);
}
@@ -488,7 +488,7 @@ public:
case Instruction::FAdd:
case Instruction::FSub:
case Instruction::FMul: // Scalar multiply.
- case Instruction::FNeg:
+ case Instruction::FNeg:
case Instruction::Add:
case Instruction::Mul:
case Instruction::Sub:
@@ -531,7 +531,7 @@ public:
// list.
LLVM_DEBUG(dbgs() << "Forward-propagate shapes:\n");
while (!WorkList.empty()) {
- Instruction *Inst = WorkList.pop_back_val();
+ Instruction *Inst = WorkList.pop_back_val();
// New entry, set the value and insert operands
bool Propagate = false;
@@ -601,7 +601,7 @@ public:
// worklist.
LLVM_DEBUG(dbgs() << "Backward-propagate shapes:\n");
while (!WorkList.empty()) {
- Value *V = WorkList.pop_back_val();
+ Value *V = WorkList.pop_back_val();
size_t BeforeProcessingV = WorkList.size();
if (!isa<Instruction>(V))
@@ -723,18 +723,18 @@ public:
Value *Op2;
if (auto *BinOp = dyn_cast<BinaryOperator>(Inst))
Changed |= VisitBinaryOperator(BinOp);
- if (auto *UnOp = dyn_cast<UnaryOperator>(Inst))
- Changed |= VisitUnaryOperator(UnOp);
+ if (auto *UnOp = dyn_cast<UnaryOperator>(Inst))
+ Changed |= VisitUnaryOperator(UnOp);
if (match(Inst, m_Load(m_Value(Op1))))
Changed |= VisitLoad(cast<LoadInst>(Inst), Op1, Builder);
else if (match(Inst, m_Store(m_Value(Op1), m_Value(Op2))))
Changed |= VisitStore(cast<StoreInst>(Inst), Op1, Op2, Builder);
}
- if (ORE) {
- RemarkGenerator RemarkGen(Inst2ColumnMatrix, *ORE, Func);
- RemarkGen.emitRemarks();
- }
+ if (ORE) {
+ RemarkGenerator RemarkGen(Inst2ColumnMatrix, *ORE, Func);
+ RemarkGen.emitRemarks();
+ }
for (Instruction *Inst : reverse(ToRemove))
Inst->eraseFromParent();
@@ -941,7 +941,7 @@ public:
assert(NumElts >= BlockNumElts && "Too few elements for current block");
Block = Builder.CreateShuffleVector(
- Block, createSequentialMask(0, BlockNumElts, NumElts - BlockNumElts));
+ Block, createSequentialMask(0, BlockNumElts, NumElts - BlockNumElts));
// If Col is 7 long and I is 2 and BlockNumElts is 2 the mask is: 0, 1, 7,
// 8, 4, 5, 6
@@ -1089,7 +1089,7 @@ public:
MemoryLocation StoreLoc = MemoryLocation::get(Store);
MemoryLocation LoadLoc = MemoryLocation::get(Load);
- AliasResult LdAliased = AA->alias(LoadLoc, StoreLoc);
+ AliasResult LdAliased = AA->alias(LoadLoc, StoreLoc);
// If we can statically determine noalias we're good.
if (!LdAliased)
@@ -1105,17 +1105,17 @@ public:
// as we adjust Check0 and Check1's branches.
SmallVector<DominatorTree::UpdateType, 4> DTUpdates;
for (BasicBlock *Succ : successors(Check0))
- DTUpdates.push_back({DT->Delete, Check0, Succ});
+ DTUpdates.push_back({DT->Delete, Check0, Succ});
- BasicBlock *Check1 =
- SplitBlock(MatMul->getParent(), MatMul, (DomTreeUpdater *)nullptr, LI,
- nullptr, "alias_cont");
+ BasicBlock *Check1 =
+ SplitBlock(MatMul->getParent(), MatMul, (DomTreeUpdater *)nullptr, LI,
+ nullptr, "alias_cont");
BasicBlock *Copy =
- SplitBlock(MatMul->getParent(), MatMul, (DomTreeUpdater *)nullptr, LI,
- nullptr, "copy");
- BasicBlock *Fusion =
- SplitBlock(MatMul->getParent(), MatMul, (DomTreeUpdater *)nullptr, LI,
- nullptr, "no_alias");
+ SplitBlock(MatMul->getParent(), MatMul, (DomTreeUpdater *)nullptr, LI,
+ nullptr, "copy");
+ BasicBlock *Fusion =
+ SplitBlock(MatMul->getParent(), MatMul, (DomTreeUpdater *)nullptr, LI,
+ nullptr, "no_alias");
// Check if the loaded memory location begins before the end of the store
// location. If the condition holds, they might overlap, otherwise they are
@@ -1159,11 +1159,11 @@ public:
PHI->addIncoming(NewLd, Copy);
// Adjust DT.
- DTUpdates.push_back({DT->Insert, Check0, Check1});
- DTUpdates.push_back({DT->Insert, Check0, Fusion});
- DTUpdates.push_back({DT->Insert, Check1, Copy});
- DTUpdates.push_back({DT->Insert, Check1, Fusion});
- DT->applyUpdates(DTUpdates);
+ DTUpdates.push_back({DT->Insert, Check0, Check1});
+ DTUpdates.push_back({DT->Insert, Check0, Fusion});
+ DTUpdates.push_back({DT->Insert, Check1, Copy});
+ DTUpdates.push_back({DT->Insert, Check1, Fusion});
+ DT->applyUpdates(DTUpdates);
return PHI;
}
@@ -1209,63 +1209,63 @@ public:
return Res;
}
- void createTiledLoops(CallInst *MatMul, Value *LPtr, ShapeInfo LShape,
- Value *RPtr, ShapeInfo RShape, StoreInst *Store,
- bool AllowContract) {
- auto *EltType = cast<VectorType>(MatMul->getType())->getElementType();
-
- // Create the main tiling loop nest.
- TileInfo TI(LShape.NumRows, RShape.NumColumns, LShape.NumColumns, TileSize);
- DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
- Instruction *InsertI = cast<Instruction>(MatMul);
- BasicBlock *Start = InsertI->getParent();
- BasicBlock *End =
- SplitBlock(InsertI->getParent(), InsertI, DT, LI, nullptr, "continue");
- IRBuilder<> Builder(MatMul);
- BasicBlock *InnerBody = TI.CreateTiledLoops(Start, End, Builder, DTU, *LI);
-
- Type *TileVecTy =
- FixedVectorType::get(MatMul->getType()->getScalarType(), TileSize);
- MatrixTy TileResult;
- // Insert in the inner loop header.
- Builder.SetInsertPoint(TI.InnerLoopHeader->getTerminator());
- // Create PHI nodes for the result columns to accumulate across iterations.
- SmallVector<PHINode *, 4> ColumnPhis;
- for (unsigned I = 0; I < TileSize; I++) {
- auto *Phi = Builder.CreatePHI(TileVecTy, 2, "result.vec." + Twine(I));
- Phi->addIncoming(ConstantAggregateZero::get(TileVecTy),
- TI.RowLoopHeader->getSingleSuccessor());
- TileResult.addVector(Phi);
- ColumnPhis.push_back(Phi);
- }
-
- // Insert in the inner loop body, which computes
- // Res += Load(CurrentRow, K) * Load(K, CurrentColumn)
- Builder.SetInsertPoint(InnerBody->getTerminator());
- // Load tiles of the operands.
- MatrixTy A = loadMatrix(LPtr, {}, false, LShape, TI.CurrentRow, TI.CurrentK,
- {TileSize, TileSize}, EltType, Builder);
- MatrixTy B = loadMatrix(RPtr, {}, false, RShape, TI.CurrentK, TI.CurrentCol,
- {TileSize, TileSize}, EltType, Builder);
- emitMatrixMultiply(TileResult, A, B, AllowContract, Builder, true);
- // Store result after the inner loop is done.
- Builder.SetInsertPoint(TI.RowLoopLatch->getTerminator());
- storeMatrix(TileResult, Store->getPointerOperand(), Store->getAlign(),
- Store->isVolatile(), {LShape.NumRows, RShape.NumColumns},
- TI.CurrentRow, TI.CurrentCol, EltType, Builder);
-
- for (unsigned I = 0; I < TileResult.getNumVectors(); I++)
- ColumnPhis[I]->addIncoming(TileResult.getVector(I), TI.InnerLoopLatch);
-
- // Force unrolling of a few iterations of the inner loop, to make sure there
- // is enough work per iteration.
- // FIXME: The unroller should make this decision directly instead, but
- // currently the cost-model is not up to the task.
- unsigned InnerLoopUnrollCount = std::min(10u, LShape.NumColumns / TileSize);
- addStringMetadataToLoop(LI->getLoopFor(TI.InnerLoopHeader),
- "llvm.loop.unroll.count", InnerLoopUnrollCount);
- }
-
+ void createTiledLoops(CallInst *MatMul, Value *LPtr, ShapeInfo LShape,
+ Value *RPtr, ShapeInfo RShape, StoreInst *Store,
+ bool AllowContract) {
+ auto *EltType = cast<VectorType>(MatMul->getType())->getElementType();
+
+ // Create the main tiling loop nest.
+ TileInfo TI(LShape.NumRows, RShape.NumColumns, LShape.NumColumns, TileSize);
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
+ Instruction *InsertI = cast<Instruction>(MatMul);
+ BasicBlock *Start = InsertI->getParent();
+ BasicBlock *End =
+ SplitBlock(InsertI->getParent(), InsertI, DT, LI, nullptr, "continue");
+ IRBuilder<> Builder(MatMul);
+ BasicBlock *InnerBody = TI.CreateTiledLoops(Start, End, Builder, DTU, *LI);
+
+ Type *TileVecTy =
+ FixedVectorType::get(MatMul->getType()->getScalarType(), TileSize);
+ MatrixTy TileResult;
+ // Insert in the inner loop header.
+ Builder.SetInsertPoint(TI.InnerLoopHeader->getTerminator());
+ // Create PHI nodes for the result columns to accumulate across iterations.
+ SmallVector<PHINode *, 4> ColumnPhis;
+ for (unsigned I = 0; I < TileSize; I++) {
+ auto *Phi = Builder.CreatePHI(TileVecTy, 2, "result.vec." + Twine(I));
+ Phi->addIncoming(ConstantAggregateZero::get(TileVecTy),
+ TI.RowLoopHeader->getSingleSuccessor());
+ TileResult.addVector(Phi);
+ ColumnPhis.push_back(Phi);
+ }
+
+ // Insert in the inner loop body, which computes
+ // Res += Load(CurrentRow, K) * Load(K, CurrentColumn)
+ Builder.SetInsertPoint(InnerBody->getTerminator());
+ // Load tiles of the operands.
+ MatrixTy A = loadMatrix(LPtr, {}, false, LShape, TI.CurrentRow, TI.CurrentK,
+ {TileSize, TileSize}, EltType, Builder);
+ MatrixTy B = loadMatrix(RPtr, {}, false, RShape, TI.CurrentK, TI.CurrentCol,
+ {TileSize, TileSize}, EltType, Builder);
+ emitMatrixMultiply(TileResult, A, B, AllowContract, Builder, true);
+ // Store result after the inner loop is done.
+ Builder.SetInsertPoint(TI.RowLoopLatch->getTerminator());
+ storeMatrix(TileResult, Store->getPointerOperand(), Store->getAlign(),
+ Store->isVolatile(), {LShape.NumRows, RShape.NumColumns},
+ TI.CurrentRow, TI.CurrentCol, EltType, Builder);
+
+ for (unsigned I = 0; I < TileResult.getNumVectors(); I++)
+ ColumnPhis[I]->addIncoming(TileResult.getVector(I), TI.InnerLoopLatch);
+
+ // Force unrolling of a few iterations of the inner loop, to make sure there
+ // is enough work per iteration.
+ // FIXME: The unroller should make this decision directly instead, but
+ // currently the cost-model is not up to the task.
+ unsigned InnerLoopUnrollCount = std::min(10u, LShape.NumColumns / TileSize);
+ addStringMetadataToLoop(LI->getLoopFor(TI.InnerLoopHeader),
+ "llvm.loop.unroll.count", InnerLoopUnrollCount);
+ }
+
void emitSIMDTiling(CallInst *MatMul, LoadInst *LoadOp0, LoadInst *LoadOp1,
StoreInst *Store,
SmallPtrSetImpl<Instruction *> &FusedInsts) {
@@ -1288,34 +1288,34 @@ public:
bool AllowContract = AllowContractEnabled || (isa<FPMathOperator>(MatMul) &&
MatMul->hasAllowContract());
- if (TileUseLoops && (R % TileSize == 0 && C % TileSize == 0))
- createTiledLoops(MatMul, APtr, LShape, BPtr, RShape, Store,
- AllowContract);
- else {
- IRBuilder<> Builder(Store);
- for (unsigned J = 0; J < C; J += TileSize)
- for (unsigned I = 0; I < R; I += TileSize) {
- const unsigned TileR = std::min(R - I, unsigned(TileSize));
- const unsigned TileC = std::min(C - J, unsigned(TileSize));
- MatrixTy Res = getZeroMatrix(EltType, TileR, TileC);
-
- for (unsigned K = 0; K < M; K += TileSize) {
- const unsigned TileM = std::min(M - K, unsigned(TileSize));
- MatrixTy A =
- loadMatrix(APtr, LoadOp0->getAlign(), LoadOp0->isVolatile(),
- LShape, Builder.getInt64(I), Builder.getInt64(K),
- {TileR, TileM}, EltType, Builder);
- MatrixTy B =
- loadMatrix(BPtr, LoadOp1->getAlign(), LoadOp1->isVolatile(),
- RShape, Builder.getInt64(K), Builder.getInt64(J),
- {TileM, TileC}, EltType, Builder);
- emitMatrixMultiply(Res, A, B, AllowContract, Builder, true);
- }
- storeMatrix(Res, CPtr, Store->getAlign(), Store->isVolatile(), {R, M},
- Builder.getInt64(I), Builder.getInt64(J), EltType,
- Builder);
+ if (TileUseLoops && (R % TileSize == 0 && C % TileSize == 0))
+ createTiledLoops(MatMul, APtr, LShape, BPtr, RShape, Store,
+ AllowContract);
+ else {
+ IRBuilder<> Builder(Store);
+ for (unsigned J = 0; J < C; J += TileSize)
+ for (unsigned I = 0; I < R; I += TileSize) {
+ const unsigned TileR = std::min(R - I, unsigned(TileSize));
+ const unsigned TileC = std::min(C - J, unsigned(TileSize));
+ MatrixTy Res = getZeroMatrix(EltType, TileR, TileC);
+
+ for (unsigned K = 0; K < M; K += TileSize) {
+ const unsigned TileM = std::min(M - K, unsigned(TileSize));
+ MatrixTy A =
+ loadMatrix(APtr, LoadOp0->getAlign(), LoadOp0->isVolatile(),
+ LShape, Builder.getInt64(I), Builder.getInt64(K),
+ {TileR, TileM}, EltType, Builder);
+ MatrixTy B =
+ loadMatrix(BPtr, LoadOp1->getAlign(), LoadOp1->isVolatile(),
+ RShape, Builder.getInt64(K), Builder.getInt64(J),
+ {TileM, TileC}, EltType, Builder);
+ emitMatrixMultiply(Res, A, B, AllowContract, Builder, true);
+ }
+ storeMatrix(Res, CPtr, Store->getAlign(), Store->isVolatile(), {R, M},
+ Builder.getInt64(I), Builder.getInt64(J), EltType,
+ Builder);
}
- }
+ }
// Mark eliminated instructions as fused and remove them.
FusedInsts.insert(Store);
@@ -1342,11 +1342,11 @@ public:
void LowerMatrixMultiplyFused(CallInst *MatMul,
SmallPtrSetImpl<Instruction *> &FusedInsts) {
if (!FuseMatrix || !MatMul->hasOneUse() ||
- MatrixLayout != MatrixLayoutTy::ColumnMajor || !DT)
+ MatrixLayout != MatrixLayoutTy::ColumnMajor || !DT)
return;
- assert(AA && LI && "Analyses should be available");
-
+ assert(AA && LI && "Analyses should be available");
+
auto *LoadOp0 = dyn_cast<LoadInst>(MatMul->getOperand(0));
auto *LoadOp1 = dyn_cast<LoadInst>(MatMul->getOperand(1));
auto *Store = dyn_cast<StoreInst>(*MatMul->user_begin());
@@ -1355,7 +1355,7 @@ public:
// we create invalid IR.
// FIXME: See if we can hoist the store address computation.
auto *AddrI = dyn_cast<Instruction>(Store->getOperand(1));
- if (AddrI && (!DT->dominates(AddrI, MatMul)))
+ if (AddrI && (!DT->dominates(AddrI, MatMul)))
return;
emitSIMDTiling(MatMul, LoadOp0, LoadOp1, Store, FusedInsts);
@@ -1372,8 +1372,8 @@ public:
const MatrixTy &Lhs = getMatrix(MatMul->getArgOperand(0), LShape, Builder);
const MatrixTy &Rhs = getMatrix(MatMul->getArgOperand(1), RShape, Builder);
- assert(Lhs.getElementType() == Rhs.getElementType() &&
- "Matrix multiply argument element types do not match.");
+ assert(Lhs.getElementType() == Rhs.getElementType() &&
+ "Matrix multiply argument element types do not match.");
const unsigned R = LShape.NumRows;
const unsigned C = RShape.NumColumns;
@@ -1381,8 +1381,8 @@ public:
// Initialize the output
MatrixTy Result(R, C, EltType);
- assert(Lhs.getElementType() == Result.getElementType() &&
- "Matrix multiply result element type does not match arguments.");
+ assert(Lhs.getElementType() == Result.getElementType() &&
+ "Matrix multiply result element type does not match arguments.");
bool AllowContract = AllowContractEnabled || (isa<FPMathOperator>(MatMul) &&
MatMul->hasAllowContract());
@@ -1500,40 +1500,40 @@ public:
return true;
}
- /// Lower unary operators, if shape information is available.
- bool VisitUnaryOperator(UnaryOperator *Inst) {
- auto I = ShapeMap.find(Inst);
- if (I == ShapeMap.end())
- return false;
-
- Value *Op = Inst->getOperand(0);
-
- IRBuilder<> Builder(Inst);
- ShapeInfo &Shape = I->second;
-
- MatrixTy Result;
- MatrixTy M = getMatrix(Op, Shape, Builder);
-
- // Helper to perform unary op on vectors.
- auto BuildVectorOp = [&Builder, Inst](Value *Op) {
- switch (Inst->getOpcode()) {
- case Instruction::FNeg:
- return Builder.CreateFNeg(Op);
- default:
- llvm_unreachable("Unsupported unary operator for matrix");
- }
- };
-
- for (unsigned I = 0; I < Shape.getNumVectors(); ++I)
- Result.addVector(BuildVectorOp(M.getVector(I)));
-
- finalizeLowering(Inst,
- Result.addNumComputeOps(getNumOps(Result.getVectorTy()) *
- Result.getNumVectors()),
- Builder);
- return true;
- }
-
+ /// Lower unary operators, if shape information is available.
+ bool VisitUnaryOperator(UnaryOperator *Inst) {
+ auto I = ShapeMap.find(Inst);
+ if (I == ShapeMap.end())
+ return false;
+
+ Value *Op = Inst->getOperand(0);
+
+ IRBuilder<> Builder(Inst);
+ ShapeInfo &Shape = I->second;
+
+ MatrixTy Result;
+ MatrixTy M = getMatrix(Op, Shape, Builder);
+
+ // Helper to perform unary op on vectors.
+ auto BuildVectorOp = [&Builder, Inst](Value *Op) {
+ switch (Inst->getOpcode()) {
+ case Instruction::FNeg:
+ return Builder.CreateFNeg(Op);
+ default:
+ llvm_unreachable("Unsupported unary operator for matrix");
+ }
+ };
+
+ for (unsigned I = 0; I < Shape.getNumVectors(); ++I)
+ Result.addVector(BuildVectorOp(M.getVector(I)));
+
+ finalizeLowering(Inst,
+ Result.addNumComputeOps(getNumOps(Result.getVectorTy()) *
+ Result.getNumVectors()),
+ Builder);
+ return true;
+ }
+
/// Helper to linearize a matrix expression tree into a string. Currently
/// matrix expressions are linarized by starting at an expression leaf and
/// linearizing bottom up.
@@ -1598,7 +1598,7 @@ public:
if (Value *Ptr = getPointerOperand(V))
return getUnderlyingObjectThroughLoads(Ptr);
else if (V->getType()->isPointerTy())
- return getUnderlyingObject(V);
+ return getUnderlyingObject(V);
return V;
}
@@ -1634,7 +1634,7 @@ public:
write(StringRef(Intrinsic::getName(II->getIntrinsicID(), {}))
.drop_front(StringRef("llvm.matrix.").size()));
write(".");
- std::string Tmp;
+ std::string Tmp;
raw_string_ostream SS(Tmp);
switch (II->getIntrinsicID()) {
@@ -1972,25 +1972,25 @@ public:
PreservedAnalyses LowerMatrixIntrinsicsPass::run(Function &F,
FunctionAnalysisManager &AM) {
auto &TTI = AM.getResult<TargetIRAnalysis>(F);
- OptimizationRemarkEmitter *ORE = nullptr;
- AAResults *AA = nullptr;
- DominatorTree *DT = nullptr;
- LoopInfo *LI = nullptr;
-
- if (!Minimal) {
- ORE = &AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
- AA = &AM.getResult<AAManager>(F);
- DT = &AM.getResult<DominatorTreeAnalysis>(F);
- LI = &AM.getResult<LoopAnalysis>(F);
- }
-
+ OptimizationRemarkEmitter *ORE = nullptr;
+ AAResults *AA = nullptr;
+ DominatorTree *DT = nullptr;
+ LoopInfo *LI = nullptr;
+
+ if (!Minimal) {
+ ORE = &AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+ AA = &AM.getResult<AAManager>(F);
+ DT = &AM.getResult<DominatorTreeAnalysis>(F);
+ LI = &AM.getResult<LoopAnalysis>(F);
+ }
+
LowerMatrixIntrinsics LMT(F, TTI, AA, DT, LI, ORE);
if (LMT.Visit()) {
PreservedAnalyses PA;
- if (!Minimal) {
- PA.preserve<LoopAnalysis>();
- PA.preserve<DominatorTreeAnalysis>();
- }
+ if (!Minimal) {
+ PA.preserve<LoopAnalysis>();
+ PA.preserve<DominatorTreeAnalysis>();
+ }
return PA;
}
return PreservedAnalyses::all();
@@ -2013,7 +2013,7 @@ public:
auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- LowerMatrixIntrinsics LMT(F, TTI, &AA, &DT, &LI, &ORE);
+ LowerMatrixIntrinsics LMT(F, TTI, &AA, &DT, &LI, &ORE);
bool C = LMT.Visit();
return C;
}
@@ -2044,45 +2044,45 @@ INITIALIZE_PASS_END(LowerMatrixIntrinsicsLegacyPass, DEBUG_TYPE, pass_name,
Pass *llvm::createLowerMatrixIntrinsicsPass() {
return new LowerMatrixIntrinsicsLegacyPass();
}
-
-namespace {
-
-/// A lightweight version of the matrix lowering pass that only requires TTI.
-/// Advanced features that require DT, AA or ORE like tiling are disabled. This
-/// is used to lower matrix intrinsics if the main lowering pass is not run, for
-/// example with -O0.
-class LowerMatrixIntrinsicsMinimalLegacyPass : public FunctionPass {
-public:
- static char ID;
-
- LowerMatrixIntrinsicsMinimalLegacyPass() : FunctionPass(ID) {
- initializeLowerMatrixIntrinsicsMinimalLegacyPassPass(
- *PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override {
- auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- LowerMatrixIntrinsics LMT(F, TTI, nullptr, nullptr, nullptr, nullptr);
- bool C = LMT.Visit();
- return C;
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<TargetTransformInfoWrapperPass>();
- AU.setPreservesCFG();
- }
-};
-} // namespace
-
-static const char pass_name_minimal[] = "Lower the matrix intrinsics (minimal)";
-char LowerMatrixIntrinsicsMinimalLegacyPass::ID = 0;
-INITIALIZE_PASS_BEGIN(LowerMatrixIntrinsicsMinimalLegacyPass,
- "lower-matrix-intrinsics-minimal", pass_name_minimal,
- false, false)
-INITIALIZE_PASS_END(LowerMatrixIntrinsicsMinimalLegacyPass,
- "lower-matrix-intrinsics-minimal", pass_name_minimal, false,
- false)
-
-Pass *llvm::createLowerMatrixIntrinsicsMinimalPass() {
- return new LowerMatrixIntrinsicsMinimalLegacyPass();
-}
+
+namespace {
+
+/// A lightweight version of the matrix lowering pass that only requires TTI.
+/// Advanced features that require DT, AA or ORE like tiling are disabled. This
+/// is used to lower matrix intrinsics if the main lowering pass is not run, for
+/// example with -O0.
+class LowerMatrixIntrinsicsMinimalLegacyPass : public FunctionPass {
+public:
+ static char ID;
+
+ LowerMatrixIntrinsicsMinimalLegacyPass() : FunctionPass(ID) {
+ initializeLowerMatrixIntrinsicsMinimalLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override {
+ auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ LowerMatrixIntrinsics LMT(F, TTI, nullptr, nullptr, nullptr, nullptr);
+ bool C = LMT.Visit();
+ return C;
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.setPreservesCFG();
+ }
+};
+} // namespace
+
+static const char pass_name_minimal[] = "Lower the matrix intrinsics (minimal)";
+char LowerMatrixIntrinsicsMinimalLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(LowerMatrixIntrinsicsMinimalLegacyPass,
+ "lower-matrix-intrinsics-minimal", pass_name_minimal,
+ false, false)
+INITIALIZE_PASS_END(LowerMatrixIntrinsicsMinimalLegacyPass,
+ "lower-matrix-intrinsics-minimal", pass_name_minimal, false,
+ false)
+
+Pass *llvm::createLowerMatrixIntrinsicsMinimalPass() {
+ return new LowerMatrixIntrinsicsMinimalLegacyPass();
+}
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index a4e695497f..c5ef74e869 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -21,11 +21,11 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/GlobalsModRef.h"
-#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/MemoryLocation.h"
-#include "llvm/Analysis/MemorySSA.h"
-#include "llvm/Analysis/MemorySSAUpdater.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Argument.h"
@@ -67,15 +67,15 @@ using namespace llvm;
#define DEBUG_TYPE "memcpyopt"
-static cl::opt<bool>
- EnableMemorySSA("enable-memcpyopt-memoryssa", cl::init(false), cl::Hidden,
- cl::desc("Use MemorySSA-backed MemCpyOpt."));
-
+static cl::opt<bool>
+ EnableMemorySSA("enable-memcpyopt-memoryssa", cl::init(false), cl::Hidden,
+ cl::desc("Use MemorySSA-backed MemCpyOpt."));
+
STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
STATISTIC(NumMemSetInfer, "Number of memsets inferred");
STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy");
STATISTIC(NumCpyToSet, "Number of memcpys converted to memset");
-STATISTIC(NumCallSlot, "Number of call slot optimizations performed");
+STATISTIC(NumCallSlot, "Number of call slot optimizations performed");
namespace {
@@ -279,17 +279,17 @@ private:
AU.setPreservesCFG();
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<DominatorTreeWrapperPass>();
- AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
- if (!EnableMemorySSA)
- AU.addRequired<MemoryDependenceWrapperPass>();
+ if (!EnableMemorySSA)
+ AU.addRequired<MemoryDependenceWrapperPass>();
AU.addPreserved<MemoryDependenceWrapperPass>();
- AU.addRequired<AAResultsWrapperPass>();
- AU.addPreserved<AAResultsWrapperPass>();
- if (EnableMemorySSA)
- AU.addRequired<MemorySSAWrapperPass>();
- AU.addPreserved<MemorySSAWrapperPass>();
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addPreserved<AAResultsWrapperPass>();
+ if (EnableMemorySSA)
+ AU.addRequired<MemorySSAWrapperPass>();
+ AU.addPreserved<MemorySSAWrapperPass>();
}
};
@@ -311,56 +311,56 @@ INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
INITIALIZE_PASS_END(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization",
false, false)
-// Check that V is either not accessible by the caller, or unwinding cannot
-// occur between Start and End.
-static bool mayBeVisibleThroughUnwinding(Value *V, Instruction *Start,
- Instruction *End) {
- assert(Start->getParent() == End->getParent() && "Must be in same block");
- if (!Start->getFunction()->doesNotThrow() &&
- !isa<AllocaInst>(getUnderlyingObject(V))) {
- for (const Instruction &I :
- make_range(Start->getIterator(), End->getIterator())) {
- if (I.mayThrow())
- return true;
- }
- }
- return false;
-}
-
-void MemCpyOptPass::eraseInstruction(Instruction *I) {
- if (MSSAU)
- MSSAU->removeMemoryAccess(I);
- if (MD)
- MD->removeInstruction(I);
- I->eraseFromParent();
-}
-
-// Check for mod or ref of Loc between Start and End, excluding both boundaries.
-// Start and End must be in the same block
-static bool accessedBetween(AliasAnalysis &AA, MemoryLocation Loc,
- const MemoryUseOrDef *Start,
- const MemoryUseOrDef *End) {
- assert(Start->getBlock() == End->getBlock() && "Only local supported");
- for (const MemoryAccess &MA :
- make_range(++Start->getIterator(), End->getIterator())) {
- if (isModOrRefSet(AA.getModRefInfo(cast<MemoryUseOrDef>(MA).getMemoryInst(),
- Loc)))
- return true;
- }
- return false;
-}
-
-// Check for mod of Loc between Start and End, excluding both boundaries.
-// Start and End can be in different blocks.
-static bool writtenBetween(MemorySSA *MSSA, MemoryLocation Loc,
- const MemoryUseOrDef *Start,
- const MemoryUseOrDef *End) {
- // TODO: Only walk until we hit Start.
- MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
- End->getDefiningAccess(), Loc);
- return !MSSA->dominates(Clobber, Start);
-}
-
+// Check that V is either not accessible by the caller, or unwinding cannot
+// occur between Start and End.
+static bool mayBeVisibleThroughUnwinding(Value *V, Instruction *Start,
+ Instruction *End) {
+ assert(Start->getParent() == End->getParent() && "Must be in same block");
+ if (!Start->getFunction()->doesNotThrow() &&
+ !isa<AllocaInst>(getUnderlyingObject(V))) {
+ for (const Instruction &I :
+ make_range(Start->getIterator(), End->getIterator())) {
+ if (I.mayThrow())
+ return true;
+ }
+ }
+ return false;
+}
+
+void MemCpyOptPass::eraseInstruction(Instruction *I) {
+ if (MSSAU)
+ MSSAU->removeMemoryAccess(I);
+ if (MD)
+ MD->removeInstruction(I);
+ I->eraseFromParent();
+}
+
+// Check for mod or ref of Loc between Start and End, excluding both boundaries.
+// Start and End must be in the same block
+static bool accessedBetween(AliasAnalysis &AA, MemoryLocation Loc,
+ const MemoryUseOrDef *Start,
+ const MemoryUseOrDef *End) {
+ assert(Start->getBlock() == End->getBlock() && "Only local supported");
+ for (const MemoryAccess &MA :
+ make_range(++Start->getIterator(), End->getIterator())) {
+ if (isModOrRefSet(AA.getModRefInfo(cast<MemoryUseOrDef>(MA).getMemoryInst(),
+ Loc)))
+ return true;
+ }
+ return false;
+}
+
+// Check for mod of Loc between Start and End, excluding both boundaries.
+// Start and End can be in different blocks.
+static bool writtenBetween(MemorySSA *MSSA, MemoryLocation Loc,
+ const MemoryUseOrDef *Start,
+ const MemoryUseOrDef *End) {
+ // TODO: Only walk until we hit Start.
+ MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
+ End->getDefiningAccess(), Loc);
+ return !MSSA->dominates(Clobber, Start);
+}
+
/// When scanning forward over instructions, we look for some other patterns to
/// fold away. In particular, this looks for stores to neighboring locations of
/// memory. If it sees enough consecutive ones, it attempts to merge them
@@ -377,27 +377,27 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
MemsetRanges Ranges(DL);
BasicBlock::iterator BI(StartInst);
-
- // Keeps track of the last memory use or def before the insertion point for
- // the new memset. The new MemoryDef for the inserted memsets will be inserted
- // after MemInsertPoint. It points to either LastMemDef or to the last user
- // before the insertion point of the memset, if there are any such users.
- MemoryUseOrDef *MemInsertPoint = nullptr;
- // Keeps track of the last MemoryDef between StartInst and the insertion point
- // for the new memset. This will become the defining access of the inserted
- // memsets.
- MemoryDef *LastMemDef = nullptr;
+
+ // Keeps track of the last memory use or def before the insertion point for
+ // the new memset. The new MemoryDef for the inserted memsets will be inserted
+ // after MemInsertPoint. It points to either LastMemDef or to the last user
+ // before the insertion point of the memset, if there are any such users.
+ MemoryUseOrDef *MemInsertPoint = nullptr;
+ // Keeps track of the last MemoryDef between StartInst and the insertion point
+ // for the new memset. This will become the defining access of the inserted
+ // memsets.
+ MemoryDef *LastMemDef = nullptr;
for (++BI; !BI->isTerminator(); ++BI) {
- if (MSSAU) {
- auto *CurrentAcc = cast_or_null<MemoryUseOrDef>(
- MSSAU->getMemorySSA()->getMemoryAccess(&*BI));
- if (CurrentAcc) {
- MemInsertPoint = CurrentAcc;
- if (auto *CurrentDef = dyn_cast<MemoryDef>(CurrentAcc))
- LastMemDef = CurrentDef;
- }
- }
-
+ if (MSSAU) {
+ auto *CurrentAcc = cast_or_null<MemoryUseOrDef>(
+ MSSAU->getMemorySSA()->getMemoryAccess(&*BI));
+ if (CurrentAcc) {
+ MemInsertPoint = CurrentAcc;
+ if (auto *CurrentDef = dyn_cast<MemoryDef>(CurrentAcc))
+ LastMemDef = CurrentDef;
+ }
+ }
+
if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
// If the instruction is readnone, ignore it, otherwise bail out. We
// don't even allow readonly here because we don't want something like:
@@ -411,15 +411,15 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
// If this is a store, see if we can merge it in.
if (!NextStore->isSimple()) break;
- Value *StoredVal = NextStore->getValueOperand();
-
- // Don't convert stores of non-integral pointer types to memsets (which
- // stores integers).
- if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
- break;
-
+ Value *StoredVal = NextStore->getValueOperand();
+
+ // Don't convert stores of non-integral pointer types to memsets (which
+ // stores integers).
+ if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
+ break;
+
// Check to see if this stored value is of the same byte-splattable value.
- Value *StoredByte = isBytewiseValue(StoredVal, DL);
+ Value *StoredByte = isBytewiseValue(StoredVal, DL);
if (isa<UndefValue>(ByteVal) && StoredByte)
ByteVal = StoredByte;
if (ByteVal != StoredByte)
@@ -486,24 +486,24 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
if (!Range.TheStores.empty())
AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());
- if (MSSAU) {
- assert(LastMemDef && MemInsertPoint &&
- "Both LastMemDef and MemInsertPoint need to be set");
- auto *NewDef =
- cast<MemoryDef>(MemInsertPoint->getMemoryInst() == &*BI
- ? MSSAU->createMemoryAccessBefore(
- AMemSet, LastMemDef, MemInsertPoint)
- : MSSAU->createMemoryAccessAfter(
- AMemSet, LastMemDef, MemInsertPoint));
- MSSAU->insertDef(NewDef, /*RenameUses=*/true);
- LastMemDef = NewDef;
- MemInsertPoint = NewDef;
- }
-
+ if (MSSAU) {
+ assert(LastMemDef && MemInsertPoint &&
+ "Both LastMemDef and MemInsertPoint need to be set");
+ auto *NewDef =
+ cast<MemoryDef>(MemInsertPoint->getMemoryInst() == &*BI
+ ? MSSAU->createMemoryAccessBefore(
+ AMemSet, LastMemDef, MemInsertPoint)
+ : MSSAU->createMemoryAccessAfter(
+ AMemSet, LastMemDef, MemInsertPoint));
+ MSSAU->insertDef(NewDef, /*RenameUses=*/true);
+ LastMemDef = NewDef;
+ MemInsertPoint = NewDef;
+ }
+
// Zap all the stores.
- for (Instruction *SI : Range.TheStores)
- eraseInstruction(SI);
-
+ for (Instruction *SI : Range.TheStores)
+ eraseInstruction(SI);
+
++NumMemSetInfer;
}
@@ -514,10 +514,10 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
// It will lift the store and its argument + that anything that
// may alias with these.
// The method returns true if it was successful.
-bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) {
+bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) {
// If the store alias this position, early bail out.
MemoryLocation StoreLoc = MemoryLocation::get(SI);
- if (isModOrRefSet(AA->getModRefInfo(P, StoreLoc)))
+ if (isModOrRefSet(AA->getModRefInfo(P, StoreLoc)))
return false;
// Keep track of the arguments of all instruction we plan to lift
@@ -528,7 +528,7 @@ bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) {
Args.insert(Ptr);
// Instruction to lift before P.
- SmallVector<Instruction *, 8> ToLift{SI};
+ SmallVector<Instruction *, 8> ToLift{SI};
// Memory locations of lifted instructions.
SmallVector<MemoryLocation, 8> MemLocs{StoreLoc};
@@ -541,24 +541,24 @@ bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) {
for (auto I = --SI->getIterator(), E = P->getIterator(); I != E; --I) {
auto *C = &*I;
- // Make sure hoisting does not perform a store that was not guaranteed to
- // happen.
- if (!isGuaranteedToTransferExecutionToSuccessor(C))
- return false;
-
- bool MayAlias = isModOrRefSet(AA->getModRefInfo(C, None));
+ // Make sure hoisting does not perform a store that was not guaranteed to
+ // happen.
+ if (!isGuaranteedToTransferExecutionToSuccessor(C))
+ return false;
+ bool MayAlias = isModOrRefSet(AA->getModRefInfo(C, None));
+
bool NeedLift = false;
if (Args.erase(C))
NeedLift = true;
else if (MayAlias) {
- NeedLift = llvm::any_of(MemLocs, [C, this](const MemoryLocation &ML) {
- return isModOrRefSet(AA->getModRefInfo(C, ML));
+ NeedLift = llvm::any_of(MemLocs, [C, this](const MemoryLocation &ML) {
+ return isModOrRefSet(AA->getModRefInfo(C, ML));
});
if (!NeedLift)
- NeedLift = llvm::any_of(Calls, [C, this](const CallBase *Call) {
- return isModOrRefSet(AA->getModRefInfo(C, Call));
+ NeedLift = llvm::any_of(Calls, [C, this](const CallBase *Call) {
+ return isModOrRefSet(AA->getModRefInfo(C, Call));
});
}
@@ -568,18 +568,18 @@ bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) {
if (MayAlias) {
// Since LI is implicitly moved downwards past the lifted instructions,
// none of them may modify its source.
- if (isModSet(AA->getModRefInfo(C, LoadLoc)))
+ if (isModSet(AA->getModRefInfo(C, LoadLoc)))
return false;
else if (const auto *Call = dyn_cast<CallBase>(C)) {
// If we can't lift this before P, it's game over.
- if (isModOrRefSet(AA->getModRefInfo(P, Call)))
+ if (isModOrRefSet(AA->getModRefInfo(P, Call)))
return false;
Calls.push_back(Call);
} else if (isa<LoadInst>(C) || isa<StoreInst>(C) || isa<VAArgInst>(C)) {
// If we can't lift this before P, it's game over.
auto ML = MemoryLocation::get(C);
- if (isModOrRefSet(AA->getModRefInfo(P, ML)))
+ if (isModOrRefSet(AA->getModRefInfo(P, ML)))
return false;
MemLocs.push_back(ML);
@@ -599,40 +599,40 @@ bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) {
}
}
- // Find MSSA insertion point. Normally P will always have a corresponding
- // memory access before which we can insert. However, with non-standard AA
- // pipelines, there may be a mismatch between AA and MSSA, in which case we
- // will scan for a memory access before P. In either case, we know for sure
- // that at least the load will have a memory access.
- // TODO: Simplify this once P will be determined by MSSA, in which case the
- // discrepancy can no longer occur.
- MemoryUseOrDef *MemInsertPoint = nullptr;
- if (MSSAU) {
- if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(P)) {
- MemInsertPoint = cast<MemoryUseOrDef>(--MA->getIterator());
- } else {
- const Instruction *ConstP = P;
- for (const Instruction &I : make_range(++ConstP->getReverseIterator(),
- ++LI->getReverseIterator())) {
- if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(&I)) {
- MemInsertPoint = MA;
- break;
- }
- }
- }
- }
-
- // We made it, we need to lift.
+ // Find MSSA insertion point. Normally P will always have a corresponding
+ // memory access before which we can insert. However, with non-standard AA
+ // pipelines, there may be a mismatch between AA and MSSA, in which case we
+ // will scan for a memory access before P. In either case, we know for sure
+ // that at least the load will have a memory access.
+ // TODO: Simplify this once P will be determined by MSSA, in which case the
+ // discrepancy can no longer occur.
+ MemoryUseOrDef *MemInsertPoint = nullptr;
+ if (MSSAU) {
+ if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(P)) {
+ MemInsertPoint = cast<MemoryUseOrDef>(--MA->getIterator());
+ } else {
+ const Instruction *ConstP = P;
+ for (const Instruction &I : make_range(++ConstP->getReverseIterator(),
+ ++LI->getReverseIterator())) {
+ if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(&I)) {
+ MemInsertPoint = MA;
+ break;
+ }
+ }
+ }
+ }
+
+ // We made it, we need to lift.
for (auto *I : llvm::reverse(ToLift)) {
LLVM_DEBUG(dbgs() << "Lifting " << *I << " before " << *P << "\n");
I->moveBefore(P);
- if (MSSAU) {
- assert(MemInsertPoint && "Must have found insert point");
- if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(I)) {
- MSSAU->moveAfter(MA, MemInsertPoint);
- MemInsertPoint = MA;
- }
- }
+ if (MSSAU) {
+ assert(MemInsertPoint && "Must have found insert point");
+ if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(I)) {
+ MSSAU->moveAfter(MA, MemInsertPoint);
+ MemInsertPoint = MA;
+ }
+ }
}
return true;
@@ -652,15 +652,15 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
const DataLayout &DL = SI->getModule()->getDataLayout();
- Value *StoredVal = SI->getValueOperand();
-
- // Not all the transforms below are correct for non-integral pointers, bail
- // until we've audited the individual pieces.
- if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
- return false;
-
+ Value *StoredVal = SI->getValueOperand();
+
+ // Not all the transforms below are correct for non-integral pointers, bail
+ // until we've audited the individual pieces.
+ if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
+ return false;
+
// Load to store forwarding can be interpreted as memcpy.
- if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) {
if (LI->isSimple() && LI->hasOneUse() &&
LI->getParent() == SI->getParent()) {
@@ -672,10 +672,10 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
// the memory we load from in between the load and the store. If
// such an instruction is found, we try to promote there instead
// of at the store position.
- // TODO: Can use MSSA for this.
+ // TODO: Can use MSSA for this.
Instruction *P = SI;
for (auto &I : make_range(++LI->getIterator(), SI->getIterator())) {
- if (isModSet(AA->getModRefInfo(&I, LoadLoc))) {
+ if (isModSet(AA->getModRefInfo(&I, LoadLoc))) {
P = &I;
break;
}
@@ -686,7 +686,7 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
// position if nothing alias the store memory after this and the store
// destination is not in the range.
if (P && P != SI) {
- if (!moveUp(SI, P, LI))
+ if (!moveUp(SI, P, LI))
P = nullptr;
}
@@ -697,7 +697,7 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
// memmove must be used to preserve semantic. If not, memcpy can
// be used.
bool UseMemMove = false;
- if (!AA->isNoAlias(MemoryLocation::get(SI), LoadLoc))
+ if (!AA->isNoAlias(MemoryLocation::get(SI), LoadLoc))
UseMemMove = true;
uint64_t Size = DL.getTypeStoreSize(T);
@@ -716,16 +716,16 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
LLVM_DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI << " => "
<< *M << "\n");
- if (MSSAU) {
- auto *LastDef =
- cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI));
- auto *NewAccess =
- MSSAU->createMemoryAccessAfter(M, LastDef, LastDef);
- MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
- }
-
- eraseInstruction(SI);
- eraseInstruction(LI);
+ if (MSSAU) {
+ auto *LastDef =
+ cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI));
+ auto *NewAccess =
+ MSSAU->createMemoryAccessAfter(M, LastDef, LastDef);
+ MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
+ }
+
+ eraseInstruction(SI);
+ eraseInstruction(LI);
++NumMemCpyInstr;
// Make sure we do not invalidate the iterator.
@@ -738,49 +738,49 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
// happen to be using a load-store pair to implement it, rather than
// a memcpy.
CallInst *C = nullptr;
- if (EnableMemorySSA) {
- if (auto *LoadClobber = dyn_cast<MemoryUseOrDef>(
- MSSA->getWalker()->getClobberingMemoryAccess(LI))) {
- // The load most post-dom the call. Limit to the same block for now.
- // TODO: Support non-local call-slot optimization?
- if (LoadClobber->getBlock() == SI->getParent())
- C = dyn_cast_or_null<CallInst>(LoadClobber->getMemoryInst());
- }
- } else {
- MemDepResult ldep = MD->getDependency(LI);
- if (ldep.isClobber() && !isa<MemCpyInst>(ldep.getInst()))
- C = dyn_cast<CallInst>(ldep.getInst());
- }
+ if (EnableMemorySSA) {
+ if (auto *LoadClobber = dyn_cast<MemoryUseOrDef>(
+ MSSA->getWalker()->getClobberingMemoryAccess(LI))) {
+ // The load most post-dom the call. Limit to the same block for now.
+ // TODO: Support non-local call-slot optimization?
+ if (LoadClobber->getBlock() == SI->getParent())
+ C = dyn_cast_or_null<CallInst>(LoadClobber->getMemoryInst());
+ }
+ } else {
+ MemDepResult ldep = MD->getDependency(LI);
+ if (ldep.isClobber() && !isa<MemCpyInst>(ldep.getInst()))
+ C = dyn_cast<CallInst>(ldep.getInst());
+ }
if (C) {
// Check that nothing touches the dest of the "copy" between
// the call and the store.
MemoryLocation StoreLoc = MemoryLocation::get(SI);
- if (EnableMemorySSA) {
- if (accessedBetween(*AA, StoreLoc, MSSA->getMemoryAccess(C),
- MSSA->getMemoryAccess(SI)))
+ if (EnableMemorySSA) {
+ if (accessedBetween(*AA, StoreLoc, MSSA->getMemoryAccess(C),
+ MSSA->getMemoryAccess(SI)))
C = nullptr;
- } else {
- for (BasicBlock::iterator I = --SI->getIterator(),
- E = C->getIterator();
- I != E; --I) {
- if (isModOrRefSet(AA->getModRefInfo(&*I, StoreLoc))) {
- C = nullptr;
- break;
- }
+ } else {
+ for (BasicBlock::iterator I = --SI->getIterator(),
+ E = C->getIterator();
+ I != E; --I) {
+ if (isModOrRefSet(AA->getModRefInfo(&*I, StoreLoc))) {
+ C = nullptr;
+ break;
+ }
}
}
}
if (C) {
bool changed = performCallSlotOptzn(
- LI, SI, SI->getPointerOperand()->stripPointerCasts(),
+ LI, SI, SI->getPointerOperand()->stripPointerCasts(),
LI->getPointerOperand()->stripPointerCasts(),
DL.getTypeStoreSize(SI->getOperand(0)->getType()),
commonAlignment(SI->getAlign(), LI->getAlign()), C);
if (changed) {
- eraseInstruction(SI);
- eraseInstruction(LI);
+ eraseInstruction(SI);
+ eraseInstruction(LI);
++NumMemCpyInstr;
return true;
}
@@ -814,15 +814,15 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
LLVM_DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n");
- if (MSSAU) {
- assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI)));
- auto *LastDef =
- cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI));
- auto *NewAccess = MSSAU->createMemoryAccessAfter(M, LastDef, LastDef);
- MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
- }
-
- eraseInstruction(SI);
+ if (MSSAU) {
+ assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI)));
+ auto *LastDef =
+ cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI));
+ auto *NewAccess = MSSAU->createMemoryAccessAfter(M, LastDef, LastDef);
+ MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
+ }
+
+ eraseInstruction(SI);
NumMemSetInfer++;
// Make sure we do not invalidate the iterator.
@@ -849,8 +849,8 @@ bool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
/// Takes a memcpy and a call that it depends on,
/// and checks for the possibility of a call slot optimization by having
/// the call write its result directly into the destination of the memcpy.
-bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
- Instruction *cpyStore, Value *cpyDest,
+bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
+ Instruction *cpyStore, Value *cpyDest,
Value *cpySrc, uint64_t cpyLen,
Align cpyAlign, CallInst *C) {
// The general transformation to keep in mind is
@@ -881,7 +881,7 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
if (!srcArraySize)
return false;
- const DataLayout &DL = cpyLoad->getModule()->getDataLayout();
+ const DataLayout &DL = cpyLoad->getModule()->getDataLayout();
uint64_t srcSize = DL.getTypeAllocSize(srcAlloca->getAllocatedType()) *
srcArraySize->getZExtValue();
@@ -891,25 +891,25 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
// Check that accessing the first srcSize bytes of dest will not cause a
// trap. Otherwise the transform is invalid since it might cause a trap
// to occur earlier than it otherwise would.
- if (!isDereferenceableAndAlignedPointer(cpyDest, Align(1), APInt(64, cpyLen),
- DL, C, DT))
- return false;
-
- // Make sure that nothing can observe cpyDest being written early. There are
- // a number of cases to consider:
- // 1. cpyDest cannot be accessed between C and cpyStore as a precondition of
- // the transform.
- // 2. C itself may not access cpyDest (prior to the transform). This is
- // checked further below.
- // 3. If cpyDest is accessible to the caller of this function (potentially
- // captured and not based on an alloca), we need to ensure that we cannot
- // unwind between C and cpyStore. This is checked here.
- // 4. If cpyDest is potentially captured, there may be accesses to it from
- // another thread. In this case, we need to check that cpyStore is
- // guaranteed to be executed if C is. As it is a non-atomic access, it
- // renders accesses from other threads undefined.
- // TODO: This is currently not checked.
- if (mayBeVisibleThroughUnwinding(cpyDest, C, cpyStore))
+ if (!isDereferenceableAndAlignedPointer(cpyDest, Align(1), APInt(64, cpyLen),
+ DL, C, DT))
+ return false;
+
+ // Make sure that nothing can observe cpyDest being written early. There are
+ // a number of cases to consider:
+ // 1. cpyDest cannot be accessed between C and cpyStore as a precondition of
+ // the transform.
+ // 2. C itself may not access cpyDest (prior to the transform). This is
+ // checked further below.
+ // 3. If cpyDest is accessible to the caller of this function (potentially
+ // captured and not based on an alloca), we need to ensure that we cannot
+ // unwind between C and cpyStore. This is checked here.
+ // 4. If cpyDest is potentially captured, there may be accesses to it from
+ // another thread. In this case, we need to check that cpyStore is
+ // guaranteed to be executed if C is. As it is a non-atomic access, it
+ // renders accesses from other threads undefined.
+ // TODO: This is currently not checked.
+ if (mayBeVisibleThroughUnwinding(cpyDest, C, cpyStore))
return false;
// Check that dest points to memory that is at least as aligned as src.
@@ -924,26 +924,26 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
// guarantees that it holds only undefined values when passed in (so the final
// memcpy can be dropped), that it is not read or written between the call and
// the memcpy, and that writing beyond the end of it is undefined.
- SmallVector<User *, 8> srcUseList(srcAlloca->users());
+ SmallVector<User *, 8> srcUseList(srcAlloca->users());
while (!srcUseList.empty()) {
User *U = srcUseList.pop_back_val();
if (isa<BitCastInst>(U) || isa<AddrSpaceCastInst>(U)) {
- append_range(srcUseList, U->users());
+ append_range(srcUseList, U->users());
continue;
}
if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(U)) {
if (!G->hasAllZeroIndices())
return false;
- append_range(srcUseList, U->users());
+ append_range(srcUseList, U->users());
continue;
}
if (const IntrinsicInst *IT = dyn_cast<IntrinsicInst>(U))
if (IT->isLifetimeStartOrEnd())
continue;
- if (U != C && U != cpyLoad)
+ if (U != C && U != cpyLoad)
return false;
}
@@ -955,24 +955,24 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
// Since we're changing the parameter to the callsite, we need to make sure
// that what would be the new parameter dominates the callsite.
- if (!DT->dominates(cpyDest, C)) {
- // Support moving a constant index GEP before the call.
- auto *GEP = dyn_cast<GetElementPtrInst>(cpyDest);
- if (GEP && GEP->hasAllConstantIndices() &&
- DT->dominates(GEP->getPointerOperand(), C))
- GEP->moveBefore(C);
- else
+ if (!DT->dominates(cpyDest, C)) {
+ // Support moving a constant index GEP before the call.
+ auto *GEP = dyn_cast<GetElementPtrInst>(cpyDest);
+ if (GEP && GEP->hasAllConstantIndices() &&
+ DT->dominates(GEP->getPointerOperand(), C))
+ GEP->moveBefore(C);
+ else
return false;
- }
+ }
// In addition to knowing that the call does not access src in some
// unexpected manner, for example via a global, which we deduce from
// the use analysis, we also need to know that it does not sneakily
// access dest. We rely on AA to figure this out for us.
- ModRefInfo MR = AA->getModRefInfo(C, cpyDest, LocationSize::precise(srcSize));
+ ModRefInfo MR = AA->getModRefInfo(C, cpyDest, LocationSize::precise(srcSize));
// If necessary, perform additional analysis.
if (isModOrRefSet(MR))
- MR = AA->callCapturesBefore(C, cpyDest, LocationSize::precise(srcSize), DT);
+ MR = AA->callCapturesBefore(C, cpyDest, LocationSize::precise(srcSize), DT);
if (isModOrRefSet(MR))
return false;
@@ -1014,8 +1014,8 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
// Drop any cached information about the call, because we may have changed
// its dependence information by changing its parameter.
- if (MD)
- MD->removeInstruction(C);
+ if (MD)
+ MD->removeInstruction(C);
// Update AA metadata
// FIXME: MD_tbaa_struct and MD_mem_parallel_loop_access should also be
@@ -1024,9 +1024,9 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
LLVMContext::MD_noalias,
LLVMContext::MD_invariant_group,
LLVMContext::MD_access_group};
- combineMetadata(C, cpyLoad, KnownIDs, true);
+ combineMetadata(C, cpyLoad, KnownIDs, true);
- ++NumCallSlot;
+ ++NumCallSlot;
return true;
}
@@ -1063,28 +1063,28 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
//
// TODO: If the code between M and MDep is transparent to the destination "c",
// then we could still perform the xform by moving M up to the first memcpy.
- if (EnableMemorySSA) {
- // TODO: It would be sufficient to check the MDep source up to the memcpy
- // size of M, rather than MDep.
- if (writtenBetween(MSSA, MemoryLocation::getForSource(MDep),
- MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(M)))
- return false;
- } else {
- // NOTE: This is conservative, it will stop on any read from the source loc,
- // not just the defining memcpy.
- MemDepResult SourceDep =
- MD->getPointerDependencyFrom(MemoryLocation::getForSource(MDep), false,
- M->getIterator(), M->getParent());
- if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
- return false;
- }
+ if (EnableMemorySSA) {
+ // TODO: It would be sufficient to check the MDep source up to the memcpy
+ // size of M, rather than MDep.
+ if (writtenBetween(MSSA, MemoryLocation::getForSource(MDep),
+ MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(M)))
+ return false;
+ } else {
+ // NOTE: This is conservative, it will stop on any read from the source loc,
+ // not just the defining memcpy.
+ MemDepResult SourceDep =
+ MD->getPointerDependencyFrom(MemoryLocation::getForSource(MDep), false,
+ M->getIterator(), M->getParent());
+ if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
+ return false;
+ }
// If the dest of the second might alias the source of the first, then the
// source and dest might overlap. We still want to eliminate the intermediate
// value, but we have to generate a memmove instead of memcpy.
bool UseMemMove = false;
- if (!AA->isNoAlias(MemoryLocation::getForDest(M),
- MemoryLocation::getForSource(MDep)))
+ if (!AA->isNoAlias(MemoryLocation::getForDest(M),
+ MemoryLocation::getForSource(MDep)))
UseMemMove = true;
// If all checks passed, then we can transform M.
@@ -1094,25 +1094,25 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
// TODO: Is this worth it if we're creating a less aligned memcpy? For
// example we could be moving from movaps -> movq on x86.
IRBuilder<> Builder(M);
- Instruction *NewM;
+ Instruction *NewM;
if (UseMemMove)
- NewM = Builder.CreateMemMove(M->getRawDest(), M->getDestAlign(),
- MDep->getRawSource(), MDep->getSourceAlign(),
- M->getLength(), M->isVolatile());
+ NewM = Builder.CreateMemMove(M->getRawDest(), M->getDestAlign(),
+ MDep->getRawSource(), MDep->getSourceAlign(),
+ M->getLength(), M->isVolatile());
else
- NewM = Builder.CreateMemCpy(M->getRawDest(), M->getDestAlign(),
- MDep->getRawSource(), MDep->getSourceAlign(),
- M->getLength(), M->isVolatile());
-
- if (MSSAU) {
- assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M)));
- auto *LastDef = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M));
- auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
- MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
- }
-
+ NewM = Builder.CreateMemCpy(M->getRawDest(), M->getDestAlign(),
+ MDep->getRawSource(), MDep->getSourceAlign(),
+ M->getLength(), M->isVolatile());
+
+ if (MSSAU) {
+ assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M)));
+ auto *LastDef = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M));
+ auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
+ MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
+ }
+
// Remove the instruction we're replacing.
- eraseInstruction(M);
+ eraseInstruction(M);
++NumMemCpyInstr;
return true;
}
@@ -1137,41 +1137,41 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
if (MemSet->getDest() != MemCpy->getDest())
return false;
- // Check that src and dst of the memcpy aren't the same. While memcpy
- // operands cannot partially overlap, exact equality is allowed.
- if (!AA->isNoAlias(MemoryLocation(MemCpy->getSource(),
- LocationSize::precise(1)),
- MemoryLocation(MemCpy->getDest(),
- LocationSize::precise(1))))
+ // Check that src and dst of the memcpy aren't the same. While memcpy
+ // operands cannot partially overlap, exact equality is allowed.
+ if (!AA->isNoAlias(MemoryLocation(MemCpy->getSource(),
+ LocationSize::precise(1)),
+ MemoryLocation(MemCpy->getDest(),
+ LocationSize::precise(1))))
return false;
- if (EnableMemorySSA) {
- // We know that dst up to src_size is not written. We now need to make sure
- // that dst up to dst_size is not accessed. (If we did not move the memset,
- // checking for reads would be sufficient.)
- if (accessedBetween(*AA, MemoryLocation::getForDest(MemSet),
- MSSA->getMemoryAccess(MemSet),
- MSSA->getMemoryAccess(MemCpy))) {
- return false;
- }
- } else {
- // We have already checked that dst up to src_size is not accessed. We
- // need to make sure that there are no accesses up to dst_size either.
- MemDepResult DstDepInfo = MD->getPointerDependencyFrom(
- MemoryLocation::getForDest(MemSet), false, MemCpy->getIterator(),
- MemCpy->getParent());
- if (DstDepInfo.getInst() != MemSet)
- return false;
- }
-
+ if (EnableMemorySSA) {
+ // We know that dst up to src_size is not written. We now need to make sure
+ // that dst up to dst_size is not accessed. (If we did not move the memset,
+ // checking for reads would be sufficient.)
+ if (accessedBetween(*AA, MemoryLocation::getForDest(MemSet),
+ MSSA->getMemoryAccess(MemSet),
+ MSSA->getMemoryAccess(MemCpy))) {
+ return false;
+ }
+ } else {
+ // We have already checked that dst up to src_size is not accessed. We
+ // need to make sure that there are no accesses up to dst_size either.
+ MemDepResult DstDepInfo = MD->getPointerDependencyFrom(
+ MemoryLocation::getForDest(MemSet), false, MemCpy->getIterator(),
+ MemCpy->getParent());
+ if (DstDepInfo.getInst() != MemSet)
+ return false;
+ }
+
// Use the same i8* dest as the memcpy, killing the memset dest if different.
Value *Dest = MemCpy->getRawDest();
Value *DestSize = MemSet->getLength();
Value *SrcSize = MemCpy->getLength();
- if (mayBeVisibleThroughUnwinding(Dest, MemSet, MemCpy))
- return false;
-
+ if (mayBeVisibleThroughUnwinding(Dest, MemSet, MemCpy))
+ return false;
+
// By default, create an unaligned memset.
unsigned Align = 1;
// If Dest is aligned, and SrcSize is constant, use the minimum alignment
@@ -1197,25 +1197,25 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
Value *SizeDiff = Builder.CreateSub(DestSize, SrcSize);
Value *MemsetLen = Builder.CreateSelect(
Ule, ConstantInt::getNullValue(DestSize->getType()), SizeDiff);
- Instruction *NewMemSet = Builder.CreateMemSet(
+ Instruction *NewMemSet = Builder.CreateMemSet(
Builder.CreateGEP(Dest->getType()->getPointerElementType(), Dest,
SrcSize),
MemSet->getOperand(1), MemsetLen, MaybeAlign(Align));
- if (MSSAU) {
- assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)) &&
- "MemCpy must be a MemoryDef");
- // The new memset is inserted after the memcpy, but it is known that its
- // defining access is the memset about to be removed which immediately
- // precedes the memcpy.
- auto *LastDef =
- cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
- auto *NewAccess = MSSAU->createMemoryAccessBefore(
- NewMemSet, LastDef->getDefiningAccess(), LastDef);
- MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
- }
-
- eraseInstruction(MemSet);
+ if (MSSAU) {
+ assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)) &&
+ "MemCpy must be a MemoryDef");
+ // The new memset is inserted after the memcpy, but it is known that its
+ // defining access is the memset about to be removed which immediately
+ // precedes the memcpy.
+ auto *LastDef =
+ cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
+ auto *NewAccess = MSSAU->createMemoryAccessBefore(
+ NewMemSet, LastDef->getDefiningAccess(), LastDef);
+ MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
+ }
+
+ eraseInstruction(MemSet);
return true;
}
@@ -1234,24 +1234,24 @@ static bool hasUndefContents(Instruction *I, ConstantInt *Size) {
return false;
}
-static bool hasUndefContentsMSSA(MemorySSA *MSSA, AliasAnalysis *AA, Value *V,
- MemoryDef *Def, ConstantInt *Size) {
- if (MSSA->isLiveOnEntryDef(Def))
- return isa<AllocaInst>(getUnderlyingObject(V));
-
- if (IntrinsicInst *II =
- dyn_cast_or_null<IntrinsicInst>(Def->getMemoryInst())) {
- if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
- ConstantInt *LTSize = cast<ConstantInt>(II->getArgOperand(0));
- if (AA->isMustAlias(V, II->getArgOperand(1)) &&
- LTSize->getZExtValue() >= Size->getZExtValue())
- return true;
- }
- }
-
- return false;
-}
-
+static bool hasUndefContentsMSSA(MemorySSA *MSSA, AliasAnalysis *AA, Value *V,
+ MemoryDef *Def, ConstantInt *Size) {
+ if (MSSA->isLiveOnEntryDef(Def))
+ return isa<AllocaInst>(getUnderlyingObject(V));
+
+ if (IntrinsicInst *II =
+ dyn_cast_or_null<IntrinsicInst>(Def->getMemoryInst())) {
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
+ ConstantInt *LTSize = cast<ConstantInt>(II->getArgOperand(0));
+ if (AA->isMustAlias(V, II->getArgOperand(1)) &&
+ LTSize->getZExtValue() >= Size->getZExtValue())
+ return true;
+ }
+ }
+
+ return false;
+}
+
/// Transform memcpy to memset when its source was just memset.
/// In other words, turn:
/// \code
@@ -1270,7 +1270,7 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
MemSetInst *MemSet) {
// Make sure that memcpy(..., memset(...), ...), that is we are memsetting and
// memcpying from the same address. Otherwise it is hard to reason about.
- if (!AA->isMustAlias(MemSet->getRawDest(), MemCpy->getRawSource()))
+ if (!AA->isMustAlias(MemSet->getRawDest(), MemCpy->getRawSource()))
return false;
// A known memset size is required.
@@ -1287,37 +1287,37 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
// interested in the bytes from MemSetSize..CopySize here, but as we can't
// easily represent this location, we use the full 0..CopySize range.
MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy);
- bool CanReduceSize = false;
- if (EnableMemorySSA) {
- MemoryUseOrDef *MemSetAccess = MSSA->getMemoryAccess(MemSet);
- MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
- MemSetAccess->getDefiningAccess(), MemCpyLoc);
- if (auto *MD = dyn_cast<MemoryDef>(Clobber))
- if (hasUndefContentsMSSA(MSSA, AA, MemCpy->getSource(), MD, CopySize))
- CanReduceSize = true;
- } else {
- MemDepResult DepInfo = MD->getPointerDependencyFrom(
- MemCpyLoc, true, MemSet->getIterator(), MemSet->getParent());
- if (DepInfo.isDef() && hasUndefContents(DepInfo.getInst(), CopySize))
- CanReduceSize = true;
- }
-
- if (!CanReduceSize)
+ bool CanReduceSize = false;
+ if (EnableMemorySSA) {
+ MemoryUseOrDef *MemSetAccess = MSSA->getMemoryAccess(MemSet);
+ MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
+ MemSetAccess->getDefiningAccess(), MemCpyLoc);
+ if (auto *MD = dyn_cast<MemoryDef>(Clobber))
+ if (hasUndefContentsMSSA(MSSA, AA, MemCpy->getSource(), MD, CopySize))
+ CanReduceSize = true;
+ } else {
+ MemDepResult DepInfo = MD->getPointerDependencyFrom(
+ MemCpyLoc, true, MemSet->getIterator(), MemSet->getParent());
+ if (DepInfo.isDef() && hasUndefContents(DepInfo.getInst(), CopySize))
+ CanReduceSize = true;
+ }
+
+ if (!CanReduceSize)
return false;
- CopySize = MemSetSize;
+ CopySize = MemSetSize;
}
IRBuilder<> Builder(MemCpy);
- Instruction *NewM =
- Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1),
- CopySize, MaybeAlign(MemCpy->getDestAlignment()));
- if (MSSAU) {
- auto *LastDef =
- cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
- auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
- MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
- }
-
+ Instruction *NewM =
+ Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1),
+ CopySize, MaybeAlign(MemCpy->getDestAlignment()));
+ if (MSSAU) {
+ auto *LastDef =
+ cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
+ auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
+ MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
+ }
+
return true;
}
@@ -1333,7 +1333,7 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
// If the source and destination of the memcpy are the same, then zap it.
if (M->getSource() == M->getDest()) {
++BBI;
- eraseInstruction(M);
+ eraseInstruction(M);
return true;
}
@@ -1343,157 +1343,157 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
if (Value *ByteVal = isBytewiseValue(GV->getInitializer(),
M->getModule()->getDataLayout())) {
IRBuilder<> Builder(M);
- Instruction *NewM =
- Builder.CreateMemSet(M->getRawDest(), ByteVal, M->getLength(),
- MaybeAlign(M->getDestAlignment()), false);
- if (MSSAU) {
- auto *LastDef =
- cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M));
- auto *NewAccess =
- MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
- MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
- }
-
- eraseInstruction(M);
+ Instruction *NewM =
+ Builder.CreateMemSet(M->getRawDest(), ByteVal, M->getLength(),
+ MaybeAlign(M->getDestAlignment()), false);
+ if (MSSAU) {
+ auto *LastDef =
+ cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M));
+ auto *NewAccess =
+ MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
+ MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
+ }
+
+ eraseInstruction(M);
++NumCpyToSet;
return true;
}
- if (EnableMemorySSA) {
- MemoryUseOrDef *MA = MSSA->getMemoryAccess(M);
- MemoryAccess *AnyClobber = MSSA->getWalker()->getClobberingMemoryAccess(MA);
- MemoryLocation DestLoc = MemoryLocation::getForDest(M);
- const MemoryAccess *DestClobber =
- MSSA->getWalker()->getClobberingMemoryAccess(AnyClobber, DestLoc);
-
- // Try to turn a partially redundant memset + memcpy into
- // memcpy + smaller memset. We don't need the memcpy size for this.
- // The memcpy most post-dom the memset, so limit this to the same basic
- // block. A non-local generalization is likely not worthwhile.
- if (auto *MD = dyn_cast<MemoryDef>(DestClobber))
- if (auto *MDep = dyn_cast_or_null<MemSetInst>(MD->getMemoryInst()))
- if (DestClobber->getBlock() == M->getParent())
- if (processMemSetMemCpyDependence(M, MDep))
- return true;
-
- // The optimizations after this point require the memcpy size.
- ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength());
- if (!CopySize) return false;
-
- MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess(
- AnyClobber, MemoryLocation::getForSource(M));
-
- // There are four possible optimizations we can do for memcpy:
- // a) memcpy-memcpy xform which exposes redundance for DSE.
- // b) call-memcpy xform for return slot optimization.
- // c) memcpy from freshly alloca'd space or space that has just started
- // its lifetime copies undefined data, and we can therefore eliminate
- // the memcpy in favor of the data that was already at the destination.
- // d) memcpy from a just-memset'd source can be turned into memset.
- if (auto *MD = dyn_cast<MemoryDef>(SrcClobber)) {
- if (Instruction *MI = MD->getMemoryInst()) {
- if (auto *C = dyn_cast<CallInst>(MI)) {
- // The memcpy must post-dom the call. Limit to the same block for now.
- // Additionally, we need to ensure that there are no accesses to dest
- // between the call and the memcpy. Accesses to src will be checked
- // by performCallSlotOptzn().
- // TODO: Support non-local call-slot optimization?
- if (C->getParent() == M->getParent() &&
- !accessedBetween(*AA, DestLoc, MD, MA)) {
- // FIXME: Can we pass in either of dest/src alignment here instead
- // of conservatively taking the minimum?
- Align Alignment = std::min(M->getDestAlign().valueOrOne(),
- M->getSourceAlign().valueOrOne());
- if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(),
- CopySize->getZExtValue(), Alignment, C)) {
- LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n"
- << " call: " << *C << "\n"
- << " memcpy: " << *M << "\n");
- eraseInstruction(M);
- ++NumMemCpyInstr;
- return true;
- }
- }
- }
- if (auto *MDep = dyn_cast<MemCpyInst>(MI))
- return processMemCpyMemCpyDependence(M, MDep);
- if (auto *MDep = dyn_cast<MemSetInst>(MI)) {
- if (performMemCpyToMemSetOptzn(M, MDep)) {
- LLVM_DEBUG(dbgs() << "Converted memcpy to memset\n");
- eraseInstruction(M);
- ++NumCpyToSet;
- return true;
- }
- }
- }
-
- if (hasUndefContentsMSSA(MSSA, AA, M->getSource(), MD, CopySize)) {
- LLVM_DEBUG(dbgs() << "Removed memcpy from undef\n");
- eraseInstruction(M);
- ++NumMemCpyInstr;
+ if (EnableMemorySSA) {
+ MemoryUseOrDef *MA = MSSA->getMemoryAccess(M);
+ MemoryAccess *AnyClobber = MSSA->getWalker()->getClobberingMemoryAccess(MA);
+ MemoryLocation DestLoc = MemoryLocation::getForDest(M);
+ const MemoryAccess *DestClobber =
+ MSSA->getWalker()->getClobberingMemoryAccess(AnyClobber, DestLoc);
+
+ // Try to turn a partially redundant memset + memcpy into
+ // memcpy + smaller memset. We don't need the memcpy size for this.
+ // The memcpy most post-dom the memset, so limit this to the same basic
+ // block. A non-local generalization is likely not worthwhile.
+ if (auto *MD = dyn_cast<MemoryDef>(DestClobber))
+ if (auto *MDep = dyn_cast_or_null<MemSetInst>(MD->getMemoryInst()))
+ if (DestClobber->getBlock() == M->getParent())
+ if (processMemSetMemCpyDependence(M, MDep))
+ return true;
+
+ // The optimizations after this point require the memcpy size.
+ ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength());
+ if (!CopySize) return false;
+
+ MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess(
+ AnyClobber, MemoryLocation::getForSource(M));
+
+ // There are four possible optimizations we can do for memcpy:
+ // a) memcpy-memcpy xform which exposes redundance for DSE.
+ // b) call-memcpy xform for return slot optimization.
+ // c) memcpy from freshly alloca'd space or space that has just started
+ // its lifetime copies undefined data, and we can therefore eliminate
+ // the memcpy in favor of the data that was already at the destination.
+ // d) memcpy from a just-memset'd source can be turned into memset.
+ if (auto *MD = dyn_cast<MemoryDef>(SrcClobber)) {
+ if (Instruction *MI = MD->getMemoryInst()) {
+ if (auto *C = dyn_cast<CallInst>(MI)) {
+ // The memcpy must post-dom the call. Limit to the same block for now.
+ // Additionally, we need to ensure that there are no accesses to dest
+ // between the call and the memcpy. Accesses to src will be checked
+ // by performCallSlotOptzn().
+ // TODO: Support non-local call-slot optimization?
+ if (C->getParent() == M->getParent() &&
+ !accessedBetween(*AA, DestLoc, MD, MA)) {
+ // FIXME: Can we pass in either of dest/src alignment here instead
+ // of conservatively taking the minimum?
+ Align Alignment = std::min(M->getDestAlign().valueOrOne(),
+ M->getSourceAlign().valueOrOne());
+ if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(),
+ CopySize->getZExtValue(), Alignment, C)) {
+ LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n"
+ << " call: " << *C << "\n"
+ << " memcpy: " << *M << "\n");
+ eraseInstruction(M);
+ ++NumMemCpyInstr;
+ return true;
+ }
+ }
+ }
+ if (auto *MDep = dyn_cast<MemCpyInst>(MI))
+ return processMemCpyMemCpyDependence(M, MDep);
+ if (auto *MDep = dyn_cast<MemSetInst>(MI)) {
+ if (performMemCpyToMemSetOptzn(M, MDep)) {
+ LLVM_DEBUG(dbgs() << "Converted memcpy to memset\n");
+ eraseInstruction(M);
+ ++NumCpyToSet;
+ return true;
+ }
+ }
+ }
+
+ if (hasUndefContentsMSSA(MSSA, AA, M->getSource(), MD, CopySize)) {
+ LLVM_DEBUG(dbgs() << "Removed memcpy from undef\n");
+ eraseInstruction(M);
+ ++NumMemCpyInstr;
return true;
}
}
- } else {
- MemDepResult DepInfo = MD->getDependency(M);
-
- // Try to turn a partially redundant memset + memcpy into
- // memcpy + smaller memset. We don't need the memcpy size for this.
- if (DepInfo.isClobber())
- if (MemSetInst *MDep = dyn_cast<MemSetInst>(DepInfo.getInst()))
- if (processMemSetMemCpyDependence(M, MDep))
- return true;
-
- // The optimizations after this point require the memcpy size.
- ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength());
- if (!CopySize) return false;
-
- // There are four possible optimizations we can do for memcpy:
- // a) memcpy-memcpy xform which exposes redundance for DSE.
- // b) call-memcpy xform for return slot optimization.
- // c) memcpy from freshly alloca'd space or space that has just started
- // its lifetime copies undefined data, and we can therefore eliminate
- // the memcpy in favor of the data that was already at the destination.
- // d) memcpy from a just-memset'd source can be turned into memset.
- if (DepInfo.isClobber()) {
- if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) {
- // FIXME: Can we pass in either of dest/src alignment here instead
- // of conservatively taking the minimum?
- Align Alignment = std::min(M->getDestAlign().valueOrOne(),
- M->getSourceAlign().valueOrOne());
- if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(),
- CopySize->getZExtValue(), Alignment, C)) {
- eraseInstruction(M);
- ++NumMemCpyInstr;
- return true;
- }
- }
+ } else {
+ MemDepResult DepInfo = MD->getDependency(M);
+
+ // Try to turn a partially redundant memset + memcpy into
+ // memcpy + smaller memset. We don't need the memcpy size for this.
+ if (DepInfo.isClobber())
+ if (MemSetInst *MDep = dyn_cast<MemSetInst>(DepInfo.getInst()))
+ if (processMemSetMemCpyDependence(M, MDep))
+ return true;
+
+ // The optimizations after this point require the memcpy size.
+ ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength());
+ if (!CopySize) return false;
+
+ // There are four possible optimizations we can do for memcpy:
+ // a) memcpy-memcpy xform which exposes redundance for DSE.
+ // b) call-memcpy xform for return slot optimization.
+ // c) memcpy from freshly alloca'd space or space that has just started
+ // its lifetime copies undefined data, and we can therefore eliminate
+ // the memcpy in favor of the data that was already at the destination.
+ // d) memcpy from a just-memset'd source can be turned into memset.
+ if (DepInfo.isClobber()) {
+ if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) {
+ // FIXME: Can we pass in either of dest/src alignment here instead
+ // of conservatively taking the minimum?
+ Align Alignment = std::min(M->getDestAlign().valueOrOne(),
+ M->getSourceAlign().valueOrOne());
+ if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(),
+ CopySize->getZExtValue(), Alignment, C)) {
+ eraseInstruction(M);
+ ++NumMemCpyInstr;
+ return true;
+ }
+ }
}
- MemoryLocation SrcLoc = MemoryLocation::getForSource(M);
- MemDepResult SrcDepInfo = MD->getPointerDependencyFrom(
- SrcLoc, true, M->getIterator(), M->getParent());
-
- if (SrcDepInfo.isClobber()) {
- if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst()))
- return processMemCpyMemCpyDependence(M, MDep);
- } else if (SrcDepInfo.isDef()) {
- if (hasUndefContents(SrcDepInfo.getInst(), CopySize)) {
- eraseInstruction(M);
- ++NumMemCpyInstr;
+ MemoryLocation SrcLoc = MemoryLocation::getForSource(M);
+ MemDepResult SrcDepInfo = MD->getPointerDependencyFrom(
+ SrcLoc, true, M->getIterator(), M->getParent());
+
+ if (SrcDepInfo.isClobber()) {
+ if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst()))
+ return processMemCpyMemCpyDependence(M, MDep);
+ } else if (SrcDepInfo.isDef()) {
+ if (hasUndefContents(SrcDepInfo.getInst(), CopySize)) {
+ eraseInstruction(M);
+ ++NumMemCpyInstr;
return true;
}
- }
-
- if (SrcDepInfo.isClobber())
- if (MemSetInst *MDep = dyn_cast<MemSetInst>(SrcDepInfo.getInst()))
- if (performMemCpyToMemSetOptzn(M, MDep)) {
- eraseInstruction(M);
- ++NumCpyToSet;
- return true;
- }
- }
-
+ }
+
+ if (SrcDepInfo.isClobber())
+ if (MemSetInst *MDep = dyn_cast<MemSetInst>(SrcDepInfo.getInst()))
+ if (performMemCpyToMemSetOptzn(M, MDep)) {
+ eraseInstruction(M);
+ ++NumCpyToSet;
+ return true;
+ }
+ }
+
return false;
}
@@ -1504,8 +1504,8 @@ bool MemCpyOptPass::processMemMove(MemMoveInst *M) {
return false;
// See if the pointers alias.
- if (!AA->isNoAlias(MemoryLocation::getForDest(M),
- MemoryLocation::getForSource(M)))
+ if (!AA->isNoAlias(MemoryLocation::getForDest(M),
+ MemoryLocation::getForSource(M)))
return false;
LLVM_DEBUG(dbgs() << "MemCpyOptPass: Optimizing memmove -> memcpy: " << *M
@@ -1518,13 +1518,13 @@ bool MemCpyOptPass::processMemMove(MemMoveInst *M) {
M->setCalledFunction(Intrinsic::getDeclaration(M->getModule(),
Intrinsic::memcpy, ArgTys));
- // For MemorySSA nothing really changes (except that memcpy may imply stricter
- // aliasing guarantees).
-
+ // For MemorySSA nothing really changes (except that memcpy may imply stricter
+ // aliasing guarantees).
+
// MemDep may have over conservative information about this instruction, just
// conservatively flush it from the cache.
- if (MD)
- MD->removeInstruction(M);
+ if (MD)
+ MD->removeInstruction(M);
++NumMoveToCpy;
return true;
@@ -1537,21 +1537,21 @@ bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
Value *ByValArg = CB.getArgOperand(ArgNo);
Type *ByValTy = cast<PointerType>(ByValArg->getType())->getElementType();
uint64_t ByValSize = DL.getTypeAllocSize(ByValTy);
- MemoryLocation Loc(ByValArg, LocationSize::precise(ByValSize));
- MemCpyInst *MDep = nullptr;
- if (EnableMemorySSA) {
- MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB);
- MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
- CallAccess->getDefiningAccess(), Loc);
- if (auto *MD = dyn_cast<MemoryDef>(Clobber))
- MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
- } else {
- MemDepResult DepInfo = MD->getPointerDependencyFrom(
- Loc, true, CB.getIterator(), CB.getParent());
- if (!DepInfo.isClobber())
- return false;
- MDep = dyn_cast<MemCpyInst>(DepInfo.getInst());
- }
+ MemoryLocation Loc(ByValArg, LocationSize::precise(ByValSize));
+ MemCpyInst *MDep = nullptr;
+ if (EnableMemorySSA) {
+ MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB);
+ MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
+ CallAccess->getDefiningAccess(), Loc);
+ if (auto *MD = dyn_cast<MemoryDef>(Clobber))
+ MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
+ } else {
+ MemDepResult DepInfo = MD->getPointerDependencyFrom(
+ Loc, true, CB.getIterator(), CB.getParent());
+ if (!DepInfo.isClobber())
+ return false;
+ MDep = dyn_cast<MemCpyInst>(DepInfo.getInst());
+ }
// If the byval argument isn't fed by a memcpy, ignore it. If it is fed by
// a memcpy, see if we can byval from the source of the memcpy instead of the
@@ -1574,8 +1574,8 @@ bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
// source of the memcpy to the alignment we need. If we fail, we bail out.
MaybeAlign MemDepAlign = MDep->getSourceAlign();
if ((!MemDepAlign || *MemDepAlign < *ByValAlign) &&
- getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL, &CB, AC,
- DT) < *ByValAlign)
+ getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL, &CB, AC,
+ DT) < *ByValAlign)
return false;
// The address space of the memcpy source must match the byval argument
@@ -1589,19 +1589,19 @@ bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
// *b = 42;
// foo(*a)
// It would be invalid to transform the second memcpy into foo(*b).
- if (EnableMemorySSA) {
- if (writtenBetween(MSSA, MemoryLocation::getForSource(MDep),
- MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(&CB)))
- return false;
- } else {
- // NOTE: This is conservative, it will stop on any read from the source loc,
- // not just the defining memcpy.
- MemDepResult SourceDep = MD->getPointerDependencyFrom(
- MemoryLocation::getForSource(MDep), false,
- CB.getIterator(), MDep->getParent());
- if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
- return false;
- }
+ if (EnableMemorySSA) {
+ if (writtenBetween(MSSA, MemoryLocation::getForSource(MDep),
+ MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(&CB)))
+ return false;
+ } else {
+ // NOTE: This is conservative, it will stop on any read from the source loc,
+ // not just the defining memcpy.
+ MemDepResult SourceDep = MD->getPointerDependencyFrom(
+ MemoryLocation::getForSource(MDep), false,
+ CB.getIterator(), MDep->getParent());
+ if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
+ return false;
+ }
Value *TmpCast = MDep->getSource();
if (MDep->getSource()->getType() != ByValArg->getType()) {
@@ -1632,7 +1632,7 @@ bool MemCpyOptPass::iterateOnFunction(Function &F) {
// instruction in a BB can't be dominated by a later instruction in the
// same BB (which is a scenario that can happen for an unreachable BB that
// has itself as a predecessor).
- if (!DT->isReachableFromEntry(&BB))
+ if (!DT->isReachableFromEntry(&BB))
continue;
for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
@@ -1668,43 +1668,43 @@ bool MemCpyOptPass::iterateOnFunction(Function &F) {
}
PreservedAnalyses MemCpyOptPass::run(Function &F, FunctionAnalysisManager &AM) {
- auto *MD = !EnableMemorySSA ? &AM.getResult<MemoryDependenceAnalysis>(F)
- : AM.getCachedResult<MemoryDependenceAnalysis>(F);
+ auto *MD = !EnableMemorySSA ? &AM.getResult<MemoryDependenceAnalysis>(F)
+ : AM.getCachedResult<MemoryDependenceAnalysis>(F);
auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
- auto *AA = &AM.getResult<AAManager>(F);
- auto *AC = &AM.getResult<AssumptionAnalysis>(F);
- auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
- auto *MSSA = EnableMemorySSA ? &AM.getResult<MemorySSAAnalysis>(F)
- : AM.getCachedResult<MemorySSAAnalysis>(F);
-
- bool MadeChange =
- runImpl(F, MD, &TLI, AA, AC, DT, MSSA ? &MSSA->getMSSA() : nullptr);
+ auto *AA = &AM.getResult<AAManager>(F);
+ auto *AC = &AM.getResult<AssumptionAnalysis>(F);
+ auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
+ auto *MSSA = EnableMemorySSA ? &AM.getResult<MemorySSAAnalysis>(F)
+ : AM.getCachedResult<MemorySSAAnalysis>(F);
+
+ bool MadeChange =
+ runImpl(F, MD, &TLI, AA, AC, DT, MSSA ? &MSSA->getMSSA() : nullptr);
if (!MadeChange)
return PreservedAnalyses::all();
PreservedAnalyses PA;
PA.preserveSet<CFGAnalyses>();
PA.preserve<GlobalsAA>();
- if (MD)
- PA.preserve<MemoryDependenceAnalysis>();
- if (MSSA)
- PA.preserve<MemorySSAAnalysis>();
+ if (MD)
+ PA.preserve<MemoryDependenceAnalysis>();
+ if (MSSA)
+ PA.preserve<MemorySSAAnalysis>();
return PA;
}
-bool MemCpyOptPass::runImpl(Function &F, MemoryDependenceResults *MD_,
- TargetLibraryInfo *TLI_, AliasAnalysis *AA_,
- AssumptionCache *AC_, DominatorTree *DT_,
- MemorySSA *MSSA_) {
+bool MemCpyOptPass::runImpl(Function &F, MemoryDependenceResults *MD_,
+ TargetLibraryInfo *TLI_, AliasAnalysis *AA_,
+ AssumptionCache *AC_, DominatorTree *DT_,
+ MemorySSA *MSSA_) {
bool MadeChange = false;
MD = MD_;
TLI = TLI_;
- AA = AA_;
- AC = AC_;
- DT = DT_;
- MSSA = MSSA_;
- MemorySSAUpdater MSSAU_(MSSA_);
- MSSAU = MSSA_ ? &MSSAU_ : nullptr;
+ AA = AA_;
+ AC = AC_;
+ DT = DT_;
+ MSSA = MSSA_;
+ MemorySSAUpdater MSSAU_(MSSA_);
+ MSSAU = MSSA_ ? &MSSAU_ : nullptr;
// If we don't have at least memset and memcpy, there is little point of doing
// anything here. These are required by a freestanding implementation, so if
// even they are disabled, there is no point in trying hard.
@@ -1717,9 +1717,9 @@ bool MemCpyOptPass::runImpl(Function &F, MemoryDependenceResults *MD_,
MadeChange = true;
}
- if (MSSA_ && VerifyMemorySSA)
- MSSA_->verifyMemorySSA();
-
+ if (MSSA_ && VerifyMemorySSA)
+ MSSA_->verifyMemorySSA();
+
MD = nullptr;
return MadeChange;
}
@@ -1729,17 +1729,17 @@ bool MemCpyOptLegacyPass::runOnFunction(Function &F) {
if (skipFunction(F))
return false;
- auto *MDWP = !EnableMemorySSA
- ? &getAnalysis<MemoryDependenceWrapperPass>()
- : getAnalysisIfAvailable<MemoryDependenceWrapperPass>();
+ auto *MDWP = !EnableMemorySSA
+ ? &getAnalysis<MemoryDependenceWrapperPass>()
+ : getAnalysisIfAvailable<MemoryDependenceWrapperPass>();
auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
- auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
- auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- auto *MSSAWP = EnableMemorySSA
- ? &getAnalysis<MemorySSAWrapperPass>()
- : getAnalysisIfAvailable<MemorySSAWrapperPass>();
-
- return Impl.runImpl(F, MDWP ? & MDWP->getMemDep() : nullptr, TLI, AA, AC, DT,
- MSSAWP ? &MSSAWP->getMSSA() : nullptr);
+ auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ auto *MSSAWP = EnableMemorySSA
+ ? &getAnalysis<MemorySSAWrapperPass>()
+ : getAnalysisIfAvailable<MemorySSAWrapperPass>();
+
+ return Impl.runImpl(F, MDWP ? & MDWP->getMemDep() : nullptr, TLI, AA, AC, DT,
+ MSSAWP ? &MSSAWP->getMSSA() : nullptr);
}
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/MergeICmps.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/MergeICmps.cpp
index 7f8b75ac88..5389d41e62 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/MergeICmps.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/MergeICmps.cpp
@@ -372,7 +372,7 @@ BCECmpBlock visitCmpBlock(Value *const Val, BasicBlock *const Block,
} else {
// In this case, we expect a constant incoming value (the comparison is
// chained).
- const auto *const Const = cast<ConstantInt>(Val);
+ const auto *const Const = cast<ConstantInt>(Val);
LLVM_DEBUG(dbgs() << "const\n");
if (!Const->isZero()) return {};
LLVM_DEBUG(dbgs() << "false\n");
@@ -624,17 +624,17 @@ static BasicBlock *mergeComparisons(ArrayRef<BCECmpBlock> Comparisons,
Value *IsEqual = nullptr;
LLVM_DEBUG(dbgs() << "Merging " << Comparisons.size() << " comparisons -> "
<< BB->getName() << "\n");
-
- // If there is one block that requires splitting, we do it now, i.e.
- // just before we know we will collapse the chain. The instructions
- // can be executed before any of the instructions in the chain.
- const auto ToSplit = llvm::find_if(
- Comparisons, [](const BCECmpBlock &B) { return B.RequireSplit; });
- if (ToSplit != Comparisons.end()) {
- LLVM_DEBUG(dbgs() << "Splitting non_BCE work to header\n");
- ToSplit->split(BB, AA);
- }
-
+
+ // If there is one block that requires splitting, we do it now, i.e.
+ // just before we know we will collapse the chain. The instructions
+ // can be executed before any of the instructions in the chain.
+ const auto ToSplit = llvm::find_if(
+ Comparisons, [](const BCECmpBlock &B) { return B.RequireSplit; });
+ if (ToSplit != Comparisons.end()) {
+ LLVM_DEBUG(dbgs() << "Splitting non_BCE work to header\n");
+ ToSplit->split(BB, AA);
+ }
+
if (Comparisons.size() == 1) {
LLVM_DEBUG(dbgs() << "Only one comparison, updating branches\n");
Value *const LhsLoad =
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/NaryReassociate.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/NaryReassociate.cpp
index 32bb62129e..dd2830026c 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/NaryReassociate.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/NaryReassociate.cpp
@@ -219,27 +219,27 @@ bool NaryReassociatePass::doOneIteration(Function &F) {
// Process the basic blocks in a depth first traversal of the dominator
// tree. This order ensures that all bases of a candidate are in Candidates
// when we process it.
- SmallVector<WeakTrackingVH, 16> DeadInsts;
+ SmallVector<WeakTrackingVH, 16> DeadInsts;
for (const auto Node : depth_first(DT)) {
BasicBlock *BB = Node->getBlock();
for (auto I = BB->begin(); I != BB->end(); ++I) {
- Instruction *OrigI = &*I;
- const SCEV *OrigSCEV = nullptr;
- if (Instruction *NewI = tryReassociate(OrigI, OrigSCEV)) {
- Changed = true;
- OrigI->replaceAllUsesWith(NewI);
-
- // Add 'OrigI' to the list of dead instructions.
- DeadInsts.push_back(WeakTrackingVH(OrigI));
- // Add the rewritten instruction to SeenExprs; the original
- // instruction is deleted.
- const SCEV *NewSCEV = SE->getSCEV(NewI);
- SeenExprs[NewSCEV].push_back(WeakTrackingVH(NewI));
-
+ Instruction *OrigI = &*I;
+ const SCEV *OrigSCEV = nullptr;
+ if (Instruction *NewI = tryReassociate(OrigI, OrigSCEV)) {
+ Changed = true;
+ OrigI->replaceAllUsesWith(NewI);
+
+ // Add 'OrigI' to the list of dead instructions.
+ DeadInsts.push_back(WeakTrackingVH(OrigI));
+ // Add the rewritten instruction to SeenExprs; the original
+ // instruction is deleted.
+ const SCEV *NewSCEV = SE->getSCEV(NewI);
+ SeenExprs[NewSCEV].push_back(WeakTrackingVH(NewI));
+
// Ideally, NewSCEV should equal OldSCEV because tryReassociate(I)
// is equivalent to I. However, ScalarEvolution::getSCEV may
- // weaken nsw causing NewSCEV not to equal OldSCEV. For example,
- // suppose we reassociate
+ // weaken nsw causing NewSCEV not to equal OldSCEV. For example,
+ // suppose we reassociate
// I = &a[sext(i +nsw j)] // assuming sizeof(a[0]) = 4
// to
// NewI = &a[sext(i)] + sext(j).
@@ -253,47 +253,47 @@ bool NaryReassociatePass::doOneIteration(Function &F) {
// equivalence, we add I to SeenExprs[OldSCEV] as well so that we can
// map both SCEV before and after tryReassociate(I) to I.
//
- // This improvement is exercised in @reassociate_gep_nsw in
- // nary-gep.ll.
- if (NewSCEV != OrigSCEV)
- SeenExprs[OrigSCEV].push_back(WeakTrackingVH(NewI));
- } else if (OrigSCEV)
- SeenExprs[OrigSCEV].push_back(WeakTrackingVH(OrigI));
+ // This improvement is exercised in @reassociate_gep_nsw in
+ // nary-gep.ll.
+ if (NewSCEV != OrigSCEV)
+ SeenExprs[OrigSCEV].push_back(WeakTrackingVH(NewI));
+ } else if (OrigSCEV)
+ SeenExprs[OrigSCEV].push_back(WeakTrackingVH(OrigI));
}
}
- // Delete all dead instructions from 'DeadInsts'.
- // Please note ScalarEvolution is updated along the way.
- RecursivelyDeleteTriviallyDeadInstructionsPermissive(
- DeadInsts, TLI, nullptr, [this](Value *V) { SE->forgetValue(V); });
-
+ // Delete all dead instructions from 'DeadInsts'.
+ // Please note ScalarEvolution is updated along the way.
+ RecursivelyDeleteTriviallyDeadInstructionsPermissive(
+ DeadInsts, TLI, nullptr, [this](Value *V) { SE->forgetValue(V); });
+
return Changed;
}
-Instruction *NaryReassociatePass::tryReassociate(Instruction * I,
- const SCEV *&OrigSCEV) {
-
- if (!SE->isSCEVable(I->getType()))
- return nullptr;
-
+Instruction *NaryReassociatePass::tryReassociate(Instruction * I,
+ const SCEV *&OrigSCEV) {
+
+ if (!SE->isSCEVable(I->getType()))
+ return nullptr;
+
switch (I->getOpcode()) {
case Instruction::Add:
case Instruction::Mul:
- OrigSCEV = SE->getSCEV(I);
+ OrigSCEV = SE->getSCEV(I);
return tryReassociateBinaryOp(cast<BinaryOperator>(I));
case Instruction::GetElementPtr:
- OrigSCEV = SE->getSCEV(I);
+ OrigSCEV = SE->getSCEV(I);
return tryReassociateGEP(cast<GetElementPtrInst>(I));
default:
- return nullptr;
+ return nullptr;
}
-
- llvm_unreachable("should not be reached");
- return nullptr;
+
+ llvm_unreachable("should not be reached");
+ return nullptr;
}
static bool isGEPFoldable(GetElementPtrInst *GEP,
const TargetTransformInfo *TTI) {
- SmallVector<const Value *, 4> Indices(GEP->indices());
+ SmallVector<const Value *, 4> Indices(GEP->indices());
return TTI->getGEPCost(GEP->getSourceElementType(), GEP->getPointerOperand(),
Indices) == TargetTransformInfo::TCC_Free;
}
@@ -369,8 +369,8 @@ NaryReassociatePass::tryReassociateGEPAtIndex(GetElementPtrInst *GEP,
// Replace the I-th index with LHS.
IndexExprs[I] = SE->getSCEV(LHS);
if (isKnownNonNegative(LHS, *DL, 0, AC, GEP, DT) &&
- DL->getTypeSizeInBits(LHS->getType()).getFixedSize() <
- DL->getTypeSizeInBits(GEP->getOperand(I)->getType()).getFixedSize()) {
+ DL->getTypeSizeInBits(LHS->getType()).getFixedSize() <
+ DL->getTypeSizeInBits(GEP->getOperand(I)->getType()).getFixedSize()) {
// Zero-extend LHS if it is non-negative. InstCombine canonicalizes sext to
// zext if the source operand is proved non-negative. We should do that
// consistently so that CandidateExpr more likely appears before. See
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/NewGVN.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/NewGVN.cpp
index 281d47c862..7638b0fba4 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/NewGVN.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/NewGVN.cpp
@@ -662,8 +662,8 @@ public:
const DataLayout &DL)
: F(F), DT(DT), TLI(TLI), AA(AA), MSSA(MSSA), AC(AC), DL(DL),
PredInfo(std::make_unique<PredicateInfo>(F, *DT, *AC)),
- SQ(DL, TLI, DT, AC, /*CtxI=*/nullptr, /*UseInstrInfo=*/false,
- /*CanUseUndef=*/false) {}
+ SQ(DL, TLI, DT, AC, /*CtxI=*/nullptr, /*UseInstrInfo=*/false,
+ /*CanUseUndef=*/false) {}
bool runGVN();
@@ -1248,7 +1248,7 @@ const UnknownExpression *NewGVN::createUnknownExpression(Instruction *I) const {
const CallExpression *
NewGVN::createCallExpression(CallInst *CI, const MemoryAccess *MA) const {
// FIXME: Add operand bundles for calls.
- // FIXME: Allow commutative matching for intrinsics.
+ // FIXME: Allow commutative matching for intrinsics.
auto *E =
new (ExpressionAllocator) CallExpression(CI->getNumOperands(), CI, MA);
setBasicExpressionInfo(CI, E);
@@ -1535,39 +1535,39 @@ NewGVN::performSymbolicPredicateInfoEvaluation(Instruction *I) const {
LLVM_DEBUG(dbgs() << "Found predicate info from instruction !\n");
- const Optional<PredicateConstraint> &Constraint = PI->getConstraint();
- if (!Constraint)
+ const Optional<PredicateConstraint> &Constraint = PI->getConstraint();
+ if (!Constraint)
return nullptr;
- CmpInst::Predicate Predicate = Constraint->Predicate;
- Value *CmpOp0 = I->getOperand(0);
- Value *CmpOp1 = Constraint->OtherOp;
+ CmpInst::Predicate Predicate = Constraint->Predicate;
+ Value *CmpOp0 = I->getOperand(0);
+ Value *CmpOp1 = Constraint->OtherOp;
- Value *FirstOp = lookupOperandLeader(CmpOp0);
- Value *SecondOp = lookupOperandLeader(CmpOp1);
- Value *AdditionallyUsedValue = CmpOp0;
+ Value *FirstOp = lookupOperandLeader(CmpOp0);
+ Value *SecondOp = lookupOperandLeader(CmpOp1);
+ Value *AdditionallyUsedValue = CmpOp0;
// Sort the ops.
if (shouldSwapOperands(FirstOp, SecondOp)) {
std::swap(FirstOp, SecondOp);
- Predicate = CmpInst::getSwappedPredicate(Predicate);
- AdditionallyUsedValue = CmpOp1;
+ Predicate = CmpInst::getSwappedPredicate(Predicate);
+ AdditionallyUsedValue = CmpOp1;
}
- if (Predicate == CmpInst::ICMP_EQ) {
- addPredicateUsers(PI, I);
- addAdditionalUsers(AdditionallyUsedValue, I);
- return createVariableOrConstant(FirstOp);
+ if (Predicate == CmpInst::ICMP_EQ) {
+ addPredicateUsers(PI, I);
+ addAdditionalUsers(AdditionallyUsedValue, I);
+ return createVariableOrConstant(FirstOp);
}
-
- // Handle the special case of floating point.
- if (Predicate == CmpInst::FCMP_OEQ && isa<ConstantFP>(FirstOp) &&
- !cast<ConstantFP>(FirstOp)->isZero()) {
- addPredicateUsers(PI, I);
- addAdditionalUsers(AdditionallyUsedValue, I);
- return createConstantExpression(cast<Constant>(FirstOp));
+
+ // Handle the special case of floating point.
+ if (Predicate == CmpInst::FCMP_OEQ && isa<ConstantFP>(FirstOp) &&
+ !cast<ConstantFP>(FirstOp)->isZero()) {
+ addPredicateUsers(PI, I);
+ addAdditionalUsers(AdditionallyUsedValue, I);
+ return createConstantExpression(cast<Constant>(FirstOp));
}
-
+
return nullptr;
}
@@ -2876,7 +2876,7 @@ void NewGVN::cleanupTables() {
}
while (!TempInst.empty()) {
- auto *I = TempInst.pop_back_val();
+ auto *I = TempInst.pop_back_val();
I->deleteValue();
}
@@ -3371,9 +3371,9 @@ bool NewGVN::runGVN() {
for (auto &B : RPOT) {
auto *Node = DT->getNode(B);
if (Node->getNumChildren() > 1)
- llvm::sort(*Node, [&](const DomTreeNode *A, const DomTreeNode *B) {
- return RPOOrdering[A] < RPOOrdering[B];
- });
+ llvm::sort(*Node, [&](const DomTreeNode *A, const DomTreeNode *B) {
+ return RPOOrdering[A] < RPOOrdering[B];
+ });
}
// Now a standard depth first ordering of the domtree is equivalent to RPO.
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/PlaceSafepoints.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/PlaceSafepoints.cpp
index a110f7d5c2..9ee2e77af0 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/PlaceSafepoints.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/PlaceSafepoints.cpp
@@ -243,7 +243,7 @@ static bool mustBeFiniteCountedLoop(Loop *L, ScalarEvolution *SE,
BasicBlock *Pred) {
// A conservative bound on the loop as a whole.
const SCEV *MaxTrips = SE->getConstantMaxBackedgeTakenCount(L);
- if (!isa<SCEVCouldNotCompute>(MaxTrips) &&
+ if (!isa<SCEVCouldNotCompute>(MaxTrips) &&
SE->getUnsignedRange(MaxTrips).getUnsignedMax().isIntN(
CountedLoopTripWidth))
return true;
@@ -255,7 +255,7 @@ static bool mustBeFiniteCountedLoop(Loop *L, ScalarEvolution *SE,
// This returns an exact expression only. TODO: We really only need an
// upper bound here, but SE doesn't expose that.
const SCEV *MaxExec = SE->getExitCount(L, Pred);
- if (!isa<SCEVCouldNotCompute>(MaxExec) &&
+ if (!isa<SCEVCouldNotCompute>(MaxExec) &&
SE->getUnsignedRange(MaxExec).getUnsignedMax().isIntN(
CountedLoopTripWidth))
return true;
@@ -435,7 +435,7 @@ static Instruction *findLocationForEntrySafepoint(Function &F,
return Cursor;
}
-const char GCSafepointPollName[] = "gc.safepoint_poll";
+const char GCSafepointPollName[] = "gc.safepoint_poll";
static bool isGCSafepointPoll(Function &F) {
return F.getName().equals(GCSafepointPollName);
@@ -589,7 +589,7 @@ bool PlaceSafepoints::runOnFunction(Function &F) {
for (Instruction *PollLocation : PollsNeeded) {
std::vector<CallBase *> RuntimeCalls;
InsertSafepointPoll(PollLocation, RuntimeCalls, TLI);
- llvm::append_range(ParsePointNeeded, RuntimeCalls);
+ llvm::append_range(ParsePointNeeded, RuntimeCalls);
}
return Modified;
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/Reassociate.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/Reassociate.cpp
index dffeb7cc22..e4c9424aee 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/Reassociate.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/Reassociate.cpp
@@ -920,100 +920,100 @@ static Value *NegateValue(Value *V, Instruction *BI,
return NewNeg;
}
-// See if this `or` looks like an load widening reduction, i.e. that it
-// consists of an `or`/`shl`/`zext`/`load` nodes only. Note that we don't
-// ensure that the pattern is *really* a load widening reduction,
-// we do not ensure that it can really be replaced with a widened load,
-// only that it mostly looks like one.
-static bool isLoadCombineCandidate(Instruction *Or) {
- SmallVector<Instruction *, 8> Worklist;
- SmallSet<Instruction *, 8> Visited;
-
- auto Enqueue = [&](Value *V) {
- auto *I = dyn_cast<Instruction>(V);
- // Each node of an `or` reduction must be an instruction,
- if (!I)
- return false; // Node is certainly not part of an `or` load reduction.
- // Only process instructions we have never processed before.
- if (Visited.insert(I).second)
- Worklist.emplace_back(I);
- return true; // Will need to look at parent nodes.
- };
-
- if (!Enqueue(Or))
- return false; // Not an `or` reduction pattern.
-
- while (!Worklist.empty()) {
- auto *I = Worklist.pop_back_val();
-
- // Okay, which instruction is this node?
- switch (I->getOpcode()) {
- case Instruction::Or:
- // Got an `or` node. That's fine, just recurse into it's operands.
- for (Value *Op : I->operands())
- if (!Enqueue(Op))
- return false; // Not an `or` reduction pattern.
- continue;
-
- case Instruction::Shl:
- case Instruction::ZExt:
- // `shl`/`zext` nodes are fine, just recurse into their base operand.
- if (!Enqueue(I->getOperand(0)))
- return false; // Not an `or` reduction pattern.
- continue;
-
- case Instruction::Load:
- // Perfect, `load` node means we've reached an edge of the graph.
- continue;
-
- default: // Unknown node.
- return false; // Not an `or` reduction pattern.
- }
- }
-
- return true;
-}
-
-/// Return true if it may be profitable to convert this (X|Y) into (X+Y).
-static bool ShouldConvertOrWithNoCommonBitsToAdd(Instruction *Or) {
- // Don't bother to convert this up unless either the LHS is an associable add
- // or subtract or mul or if this is only used by one of the above.
- // This is only a compile-time improvement, it is not needed for correctness!
- auto isInteresting = [](Value *V) {
- for (auto Op : {Instruction::Add, Instruction::Sub, Instruction::Mul})
- if (isReassociableOp(V, Op))
- return true;
- return false;
- };
-
- if (any_of(Or->operands(), isInteresting))
- return true;
-
- Value *VB = Or->user_back();
- if (Or->hasOneUse() && isInteresting(VB))
- return true;
-
- return false;
-}
-
-/// If we have (X|Y), and iff X and Y have no common bits set,
-/// transform this into (X+Y) to allow arithmetics reassociation.
-static BinaryOperator *ConvertOrWithNoCommonBitsToAdd(Instruction *Or) {
- // Convert an or into an add.
- BinaryOperator *New =
- CreateAdd(Or->getOperand(0), Or->getOperand(1), "", Or, Or);
- New->setHasNoSignedWrap();
- New->setHasNoUnsignedWrap();
- New->takeName(Or);
-
- // Everyone now refers to the add instruction.
- Or->replaceAllUsesWith(New);
- New->setDebugLoc(Or->getDebugLoc());
-
- LLVM_DEBUG(dbgs() << "Converted or into an add: " << *New << '\n');
- return New;
-}
-
+// See if this `or` looks like an load widening reduction, i.e. that it
+// consists of an `or`/`shl`/`zext`/`load` nodes only. Note that we don't
+// ensure that the pattern is *really* a load widening reduction,
+// we do not ensure that it can really be replaced with a widened load,
+// only that it mostly looks like one.
+static bool isLoadCombineCandidate(Instruction *Or) {
+ SmallVector<Instruction *, 8> Worklist;
+ SmallSet<Instruction *, 8> Visited;
+
+ auto Enqueue = [&](Value *V) {
+ auto *I = dyn_cast<Instruction>(V);
+ // Each node of an `or` reduction must be an instruction,
+ if (!I)
+ return false; // Node is certainly not part of an `or` load reduction.
+ // Only process instructions we have never processed before.
+ if (Visited.insert(I).second)
+ Worklist.emplace_back(I);
+ return true; // Will need to look at parent nodes.
+ };
+
+ if (!Enqueue(Or))
+ return false; // Not an `or` reduction pattern.
+
+ while (!Worklist.empty()) {
+ auto *I = Worklist.pop_back_val();
+
+ // Okay, which instruction is this node?
+ switch (I->getOpcode()) {
+ case Instruction::Or:
+ // Got an `or` node. That's fine, just recurse into it's operands.
+ for (Value *Op : I->operands())
+ if (!Enqueue(Op))
+ return false; // Not an `or` reduction pattern.
+ continue;
+
+ case Instruction::Shl:
+ case Instruction::ZExt:
+ // `shl`/`zext` nodes are fine, just recurse into their base operand.
+ if (!Enqueue(I->getOperand(0)))
+ return false; // Not an `or` reduction pattern.
+ continue;
+
+ case Instruction::Load:
+ // Perfect, `load` node means we've reached an edge of the graph.
+ continue;
+
+ default: // Unknown node.
+ return false; // Not an `or` reduction pattern.
+ }
+ }
+
+ return true;
+}
+
+/// Return true if it may be profitable to convert this (X|Y) into (X+Y).
+static bool ShouldConvertOrWithNoCommonBitsToAdd(Instruction *Or) {
+ // Don't bother to convert this up unless either the LHS is an associable add
+ // or subtract or mul or if this is only used by one of the above.
+ // This is only a compile-time improvement, it is not needed for correctness!
+ auto isInteresting = [](Value *V) {
+ for (auto Op : {Instruction::Add, Instruction::Sub, Instruction::Mul})
+ if (isReassociableOp(V, Op))
+ return true;
+ return false;
+ };
+
+ if (any_of(Or->operands(), isInteresting))
+ return true;
+
+ Value *VB = Or->user_back();
+ if (Or->hasOneUse() && isInteresting(VB))
+ return true;
+
+ return false;
+}
+
+/// If we have (X|Y), and iff X and Y have no common bits set,
+/// transform this into (X+Y) to allow arithmetics reassociation.
+static BinaryOperator *ConvertOrWithNoCommonBitsToAdd(Instruction *Or) {
+ // Convert an or into an add.
+ BinaryOperator *New =
+ CreateAdd(Or->getOperand(0), Or->getOperand(1), "", Or, Or);
+ New->setHasNoSignedWrap();
+ New->setHasNoUnsignedWrap();
+ New->takeName(Or);
+
+ // Everyone now refers to the add instruction.
+ Or->replaceAllUsesWith(New);
+ New->setDebugLoc(Or->getDebugLoc());
+
+ LLVM_DEBUG(dbgs() << "Converted or into an add: " << *New << '\n');
+ return New;
+}
+
/// Return true if we should break up this subtract of X-Y into (X + -Y).
static bool ShouldBreakUpSubtract(Instruction *Sub) {
// If this is a negation, we can't split it up!
@@ -1128,7 +1128,7 @@ static Value *EmitAddTreeOfValues(Instruction *I,
SmallVectorImpl<WeakTrackingVH> &Ops) {
if (Ops.size() == 1) return Ops.back();
- Value *V1 = Ops.pop_back_val();
+ Value *V1 = Ops.pop_back_val();
Value *V2 = EmitAddTreeOfValues(I, Ops);
return CreateAdd(V2, V1, "reass.add", I, I);
}
@@ -1992,7 +1992,7 @@ Value *ReassociatePass::OptimizeExpression(BinaryOperator *I,
void ReassociatePass::RecursivelyEraseDeadInsts(Instruction *I,
OrderedSet &Insts) {
assert(isInstructionTriviallyDead(I) && "Trivially dead instructions only!");
- SmallVector<Value *, 4> Ops(I->operands());
+ SmallVector<Value *, 4> Ops(I->operands());
ValueRankMap.erase(I);
Insts.remove(I);
RedoInsts.remove(I);
@@ -2009,7 +2009,7 @@ void ReassociatePass::EraseInst(Instruction *I) {
assert(isInstructionTriviallyDead(I) && "Trivially dead instructions only!");
LLVM_DEBUG(dbgs() << "Erasing dead inst: "; I->dump());
- SmallVector<Value *, 8> Ops(I->operands());
+ SmallVector<Value *, 8> Ops(I->operands());
// Erase the dead instruction.
ValueRankMap.erase(I);
RedoInsts.remove(I);
@@ -2209,19 +2209,19 @@ void ReassociatePass::OptimizeInst(Instruction *I) {
if (I->getType()->isIntegerTy(1))
return;
- // If this is a bitwise or instruction of operands
- // with no common bits set, convert it to X+Y.
- if (I->getOpcode() == Instruction::Or &&
- ShouldConvertOrWithNoCommonBitsToAdd(I) && !isLoadCombineCandidate(I) &&
- haveNoCommonBitsSet(I->getOperand(0), I->getOperand(1),
- I->getModule()->getDataLayout(), /*AC=*/nullptr, I,
- /*DT=*/nullptr)) {
- Instruction *NI = ConvertOrWithNoCommonBitsToAdd(I);
- RedoInsts.insert(I);
- MadeChange = true;
- I = NI;
- }
-
+ // If this is a bitwise or instruction of operands
+ // with no common bits set, convert it to X+Y.
+ if (I->getOpcode() == Instruction::Or &&
+ ShouldConvertOrWithNoCommonBitsToAdd(I) && !isLoadCombineCandidate(I) &&
+ haveNoCommonBitsSet(I->getOperand(0), I->getOperand(1),
+ I->getModule()->getDataLayout(), /*AC=*/nullptr, I,
+ /*DT=*/nullptr)) {
+ Instruction *NI = ConvertOrWithNoCommonBitsToAdd(I);
+ RedoInsts.insert(I);
+ MadeChange = true;
+ I = NI;
+ }
+
// If this is a subtract instruction which is not already in negate form,
// see if we can convert it to X+-Y.
if (I->getOpcode() == Instruction::Sub) {
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/Reg2Mem.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/Reg2Mem.cpp
index a49b9ad3f6..fef2f84a63 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/Reg2Mem.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/Reg2Mem.cpp
@@ -15,23 +15,23 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar/Reg2Mem.h"
+#include "llvm/Transforms/Scalar/Reg2Mem.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
-#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
-#include "llvm/IR/PassManager.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include <list>
using namespace llvm;
@@ -41,17 +41,17 @@ using namespace llvm;
STATISTIC(NumRegsDemoted, "Number of registers demoted");
STATISTIC(NumPhisDemoted, "Number of phi-nodes demoted");
-static bool valueEscapes(const Instruction &Inst) {
- const BasicBlock *BB = Inst.getParent();
- for (const User *U : Inst.users()) {
- const Instruction *UI = cast<Instruction>(U);
- if (UI->getParent() != BB || isa<PHINode>(UI))
- return true;
- }
- return false;
+static bool valueEscapes(const Instruction &Inst) {
+ const BasicBlock *BB = Inst.getParent();
+ for (const User *U : Inst.users()) {
+ const Instruction *UI = cast<Instruction>(U);
+ if (UI->getParent() != BB || isa<PHINode>(UI))
+ return true;
+ }
+ return false;
}
-static bool runPass(Function &F) {
+static bool runPass(Function &F) {
// Insert all new allocas into entry block.
BasicBlock *BBEntry = &F.getEntryBlock();
assert(pred_empty(BBEntry) &&
@@ -70,72 +70,72 @@ static bool runPass(Function &F) {
// Find the escaped instructions. But don't create stack slots for
// allocas in entry block.
std::list<Instruction*> WorkList;
- for (Instruction &I : instructions(F))
- if (!(isa<AllocaInst>(I) && I.getParent() == BBEntry) && valueEscapes(I))
- WorkList.push_front(&I);
+ for (Instruction &I : instructions(F))
+ if (!(isa<AllocaInst>(I) && I.getParent() == BBEntry) && valueEscapes(I))
+ WorkList.push_front(&I);
// Demote escaped instructions
NumRegsDemoted += WorkList.size();
- for (Instruction *I : WorkList)
- DemoteRegToStack(*I, false, AllocaInsertionPoint);
+ for (Instruction *I : WorkList)
+ DemoteRegToStack(*I, false, AllocaInsertionPoint);
WorkList.clear();
// Find all phi's
- for (BasicBlock &BB : F)
- for (auto &Phi : BB.phis())
- WorkList.push_front(&Phi);
+ for (BasicBlock &BB : F)
+ for (auto &Phi : BB.phis())
+ WorkList.push_front(&Phi);
// Demote phi nodes
NumPhisDemoted += WorkList.size();
- for (Instruction *I : WorkList)
- DemotePHIToStack(cast<PHINode>(I), AllocaInsertionPoint);
+ for (Instruction *I : WorkList)
+ DemotePHIToStack(cast<PHINode>(I), AllocaInsertionPoint);
return true;
}
-PreservedAnalyses RegToMemPass::run(Function &F, FunctionAnalysisManager &AM) {
- auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
- auto *LI = &AM.getResult<LoopAnalysis>(F);
- unsigned N = SplitAllCriticalEdges(F, CriticalEdgeSplittingOptions(DT, LI));
- bool Changed = runPass(F);
- if (N == 0 && !Changed)
- return PreservedAnalyses::all();
- PreservedAnalyses PA;
- PA.preserve<DominatorTreeAnalysis>();
- PA.preserve<LoopAnalysis>();
- return PA;
-}
-
-namespace {
-struct RegToMemLegacy : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid
- RegToMemLegacy() : FunctionPass(ID) {
- initializeRegToMemLegacyPass(*PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequiredID(BreakCriticalEdgesID);
- AU.addPreservedID(BreakCriticalEdgesID);
- }
-
- bool runOnFunction(Function &F) override {
- if (F.isDeclaration() || skipFunction(F))
- return false;
- return runPass(F);
- }
-};
-} // namespace
-
-char RegToMemLegacy::ID = 0;
-INITIALIZE_PASS_BEGIN(RegToMemLegacy, "reg2mem",
- "Demote all values to stack slots", false, false)
-INITIALIZE_PASS_DEPENDENCY(BreakCriticalEdges)
-INITIALIZE_PASS_END(RegToMemLegacy, "reg2mem",
- "Demote all values to stack slots", false, false)
-
+PreservedAnalyses RegToMemPass::run(Function &F, FunctionAnalysisManager &AM) {
+ auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
+ auto *LI = &AM.getResult<LoopAnalysis>(F);
+ unsigned N = SplitAllCriticalEdges(F, CriticalEdgeSplittingOptions(DT, LI));
+ bool Changed = runPass(F);
+ if (N == 0 && !Changed)
+ return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserve<DominatorTreeAnalysis>();
+ PA.preserve<LoopAnalysis>();
+ return PA;
+}
+
+namespace {
+struct RegToMemLegacy : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ RegToMemLegacy() : FunctionPass(ID) {
+ initializeRegToMemLegacyPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequiredID(BreakCriticalEdgesID);
+ AU.addPreservedID(BreakCriticalEdgesID);
+ }
+
+ bool runOnFunction(Function &F) override {
+ if (F.isDeclaration() || skipFunction(F))
+ return false;
+ return runPass(F);
+ }
+};
+} // namespace
+
+char RegToMemLegacy::ID = 0;
+INITIALIZE_PASS_BEGIN(RegToMemLegacy, "reg2mem",
+ "Demote all values to stack slots", false, false)
+INITIALIZE_PASS_DEPENDENCY(BreakCriticalEdges)
+INITIALIZE_PASS_END(RegToMemLegacy, "reg2mem",
+ "Demote all values to stack slots", false, false)
+
// createDemoteRegisterToMemory - Provide an entry point to create this pass.
-char &llvm::DemoteRegisterToMemoryID = RegToMemLegacy::ID;
+char &llvm::DemoteRegisterToMemoryID = RegToMemLegacy::ID;
FunctionPass *llvm::createDemoteRegisterToMemoryPass() {
- return new RegToMemLegacy();
+ return new RegToMemLegacy();
}
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index b7830555bf..ee39ffa000 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -1487,7 +1487,7 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */
uint32_t NumPatchBytes = 0;
uint32_t Flags = uint32_t(StatepointFlags::None);
- SmallVector<Value *, 8> CallArgs(Call->args());
+ SmallVector<Value *, 8> CallArgs(Call->args());
Optional<ArrayRef<Use>> DeoptArgs;
if (auto Bundle = Call->getOperandBundle(LLVMContext::OB_deopt))
DeoptArgs = Bundle->Inputs;
@@ -1520,8 +1520,8 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */
Value *CallTarget = Call->getCalledOperand();
if (Function *F = dyn_cast<Function>(CallTarget)) {
- auto IID = F->getIntrinsicID();
- if (IID == Intrinsic::experimental_deoptimize) {
+ auto IID = F->getIntrinsicID();
+ if (IID == Intrinsic::experimental_deoptimize) {
// Calls to llvm.experimental.deoptimize are lowered to calls to the
// __llvm_deoptimize symbol. We want to resolve this now, since the
// verifier does not allow taking the address of an intrinsic function.
@@ -1541,101 +1541,101 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */
.getCallee();
IsDeoptimize = true;
- } else if (IID == Intrinsic::memcpy_element_unordered_atomic ||
- IID == Intrinsic::memmove_element_unordered_atomic) {
- // Unordered atomic memcpy and memmove intrinsics which are not explicitly
- // marked as "gc-leaf-function" should be lowered in a GC parseable way.
- // Specifically, these calls should be lowered to the
- // __llvm_{memcpy|memmove}_element_unordered_atomic_safepoint symbols.
- // Similarly to __llvm_deoptimize we want to resolve this now, since the
- // verifier does not allow taking the address of an intrinsic function.
- //
- // Moreover we need to shuffle the arguments for the call in order to
- // accommodate GC. The underlying source and destination objects might be
- // relocated during copy operation should the GC occur. To relocate the
- // derived source and destination pointers the implementation of the
- // intrinsic should know the corresponding base pointers.
- //
- // To make the base pointers available pass them explicitly as arguments:
- // memcpy(dest_derived, source_derived, ...) =>
- // memcpy(dest_base, dest_offset, source_base, source_offset, ...)
- auto &Context = Call->getContext();
- auto &DL = Call->getModule()->getDataLayout();
- auto GetBaseAndOffset = [&](Value *Derived) {
- assert(Result.PointerToBase.count(Derived));
- unsigned AddressSpace = Derived->getType()->getPointerAddressSpace();
- unsigned IntPtrSize = DL.getPointerSizeInBits(AddressSpace);
- Value *Base = Result.PointerToBase.find(Derived)->second;
- Value *Base_int = Builder.CreatePtrToInt(
- Base, Type::getIntNTy(Context, IntPtrSize));
- Value *Derived_int = Builder.CreatePtrToInt(
- Derived, Type::getIntNTy(Context, IntPtrSize));
- return std::make_pair(Base, Builder.CreateSub(Derived_int, Base_int));
- };
-
- auto *Dest = CallArgs[0];
- Value *DestBase, *DestOffset;
- std::tie(DestBase, DestOffset) = GetBaseAndOffset(Dest);
-
- auto *Source = CallArgs[1];
- Value *SourceBase, *SourceOffset;
- std::tie(SourceBase, SourceOffset) = GetBaseAndOffset(Source);
-
- auto *LengthInBytes = CallArgs[2];
- auto *ElementSizeCI = cast<ConstantInt>(CallArgs[3]);
-
- CallArgs.clear();
- CallArgs.push_back(DestBase);
- CallArgs.push_back(DestOffset);
- CallArgs.push_back(SourceBase);
- CallArgs.push_back(SourceOffset);
- CallArgs.push_back(LengthInBytes);
-
- SmallVector<Type *, 8> DomainTy;
- for (Value *Arg : CallArgs)
- DomainTy.push_back(Arg->getType());
- auto *FTy = FunctionType::get(Type::getVoidTy(F->getContext()), DomainTy,
- /* isVarArg = */ false);
-
- auto GetFunctionName = [](Intrinsic::ID IID, ConstantInt *ElementSizeCI) {
- uint64_t ElementSize = ElementSizeCI->getZExtValue();
- if (IID == Intrinsic::memcpy_element_unordered_atomic) {
- switch (ElementSize) {
- case 1:
- return "__llvm_memcpy_element_unordered_atomic_safepoint_1";
- case 2:
- return "__llvm_memcpy_element_unordered_atomic_safepoint_2";
- case 4:
- return "__llvm_memcpy_element_unordered_atomic_safepoint_4";
- case 8:
- return "__llvm_memcpy_element_unordered_atomic_safepoint_8";
- case 16:
- return "__llvm_memcpy_element_unordered_atomic_safepoint_16";
- default:
- llvm_unreachable("unexpected element size!");
- }
- }
- assert(IID == Intrinsic::memmove_element_unordered_atomic);
- switch (ElementSize) {
- case 1:
- return "__llvm_memmove_element_unordered_atomic_safepoint_1";
- case 2:
- return "__llvm_memmove_element_unordered_atomic_safepoint_2";
- case 4:
- return "__llvm_memmove_element_unordered_atomic_safepoint_4";
- case 8:
- return "__llvm_memmove_element_unordered_atomic_safepoint_8";
- case 16:
- return "__llvm_memmove_element_unordered_atomic_safepoint_16";
- default:
- llvm_unreachable("unexpected element size!");
- }
- };
-
- CallTarget =
- F->getParent()
- ->getOrInsertFunction(GetFunctionName(IID, ElementSizeCI), FTy)
- .getCallee();
+ } else if (IID == Intrinsic::memcpy_element_unordered_atomic ||
+ IID == Intrinsic::memmove_element_unordered_atomic) {
+ // Unordered atomic memcpy and memmove intrinsics which are not explicitly
+ // marked as "gc-leaf-function" should be lowered in a GC parseable way.
+ // Specifically, these calls should be lowered to the
+ // __llvm_{memcpy|memmove}_element_unordered_atomic_safepoint symbols.
+ // Similarly to __llvm_deoptimize we want to resolve this now, since the
+ // verifier does not allow taking the address of an intrinsic function.
+ //
+ // Moreover we need to shuffle the arguments for the call in order to
+ // accommodate GC. The underlying source and destination objects might be
+ // relocated during copy operation should the GC occur. To relocate the
+ // derived source and destination pointers the implementation of the
+ // intrinsic should know the corresponding base pointers.
+ //
+ // To make the base pointers available pass them explicitly as arguments:
+ // memcpy(dest_derived, source_derived, ...) =>
+ // memcpy(dest_base, dest_offset, source_base, source_offset, ...)
+ auto &Context = Call->getContext();
+ auto &DL = Call->getModule()->getDataLayout();
+ auto GetBaseAndOffset = [&](Value *Derived) {
+ assert(Result.PointerToBase.count(Derived));
+ unsigned AddressSpace = Derived->getType()->getPointerAddressSpace();
+ unsigned IntPtrSize = DL.getPointerSizeInBits(AddressSpace);
+ Value *Base = Result.PointerToBase.find(Derived)->second;
+ Value *Base_int = Builder.CreatePtrToInt(
+ Base, Type::getIntNTy(Context, IntPtrSize));
+ Value *Derived_int = Builder.CreatePtrToInt(
+ Derived, Type::getIntNTy(Context, IntPtrSize));
+ return std::make_pair(Base, Builder.CreateSub(Derived_int, Base_int));
+ };
+
+ auto *Dest = CallArgs[0];
+ Value *DestBase, *DestOffset;
+ std::tie(DestBase, DestOffset) = GetBaseAndOffset(Dest);
+
+ auto *Source = CallArgs[1];
+ Value *SourceBase, *SourceOffset;
+ std::tie(SourceBase, SourceOffset) = GetBaseAndOffset(Source);
+
+ auto *LengthInBytes = CallArgs[2];
+ auto *ElementSizeCI = cast<ConstantInt>(CallArgs[3]);
+
+ CallArgs.clear();
+ CallArgs.push_back(DestBase);
+ CallArgs.push_back(DestOffset);
+ CallArgs.push_back(SourceBase);
+ CallArgs.push_back(SourceOffset);
+ CallArgs.push_back(LengthInBytes);
+
+ SmallVector<Type *, 8> DomainTy;
+ for (Value *Arg : CallArgs)
+ DomainTy.push_back(Arg->getType());
+ auto *FTy = FunctionType::get(Type::getVoidTy(F->getContext()), DomainTy,
+ /* isVarArg = */ false);
+
+ auto GetFunctionName = [](Intrinsic::ID IID, ConstantInt *ElementSizeCI) {
+ uint64_t ElementSize = ElementSizeCI->getZExtValue();
+ if (IID == Intrinsic::memcpy_element_unordered_atomic) {
+ switch (ElementSize) {
+ case 1:
+ return "__llvm_memcpy_element_unordered_atomic_safepoint_1";
+ case 2:
+ return "__llvm_memcpy_element_unordered_atomic_safepoint_2";
+ case 4:
+ return "__llvm_memcpy_element_unordered_atomic_safepoint_4";
+ case 8:
+ return "__llvm_memcpy_element_unordered_atomic_safepoint_8";
+ case 16:
+ return "__llvm_memcpy_element_unordered_atomic_safepoint_16";
+ default:
+ llvm_unreachable("unexpected element size!");
+ }
+ }
+ assert(IID == Intrinsic::memmove_element_unordered_atomic);
+ switch (ElementSize) {
+ case 1:
+ return "__llvm_memmove_element_unordered_atomic_safepoint_1";
+ case 2:
+ return "__llvm_memmove_element_unordered_atomic_safepoint_2";
+ case 4:
+ return "__llvm_memmove_element_unordered_atomic_safepoint_4";
+ case 8:
+ return "__llvm_memmove_element_unordered_atomic_safepoint_8";
+ case 16:
+ return "__llvm_memmove_element_unordered_atomic_safepoint_16";
+ default:
+ llvm_unreachable("unexpected element size!");
+ }
+ };
+
+ CallTarget =
+ F->getParent()
+ ->getOrInsertFunction(GetFunctionName(IID, ElementSizeCI), FTy)
+ .getCallee();
}
}
@@ -2036,7 +2036,7 @@ static void relocationViaAlloca(
/// tests in ways which make them less useful in testing fused safepoints.
template <typename T> static void unique_unsorted(SmallVectorImpl<T> &Vec) {
SmallSet<T, 8> Seen;
- erase_if(Vec, [&](const T &V) { return !Seen.insert(V).second; });
+ erase_if(Vec, [&](const T &V) { return !Seen.insert(V).second; });
}
/// Insert holders so that each Value is obviously live through the entire
@@ -2108,10 +2108,10 @@ static Value* findRematerializableChainToBasePointer(
// Helper function for the "rematerializeLiveValues". Compute cost of the use
// chain we are going to rematerialize.
-static InstructionCost
-chainToBasePointerCost(SmallVectorImpl<Instruction *> &Chain,
+static InstructionCost
+chainToBasePointerCost(SmallVectorImpl<Instruction *> &Chain,
TargetTransformInfo &TTI) {
- InstructionCost Cost = 0;
+ InstructionCost Cost = 0;
for (Instruction *Instr : Chain) {
if (CastInst *CI = dyn_cast<CastInst>(Instr)) {
@@ -2120,8 +2120,8 @@ chainToBasePointerCost(SmallVectorImpl<Instruction *> &Chain,
Type *SrcTy = CI->getOperand(0)->getType();
Cost += TTI.getCastInstrCost(CI->getOpcode(), CI->getType(), SrcTy,
- TTI::getCastContextHint(CI),
- TargetTransformInfo::TCK_SizeAndLatency, CI);
+ TTI::getCastContextHint(CI),
+ TargetTransformInfo::TCK_SizeAndLatency, CI);
} else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Instr)) {
// Cost of the address calculation
@@ -2218,7 +2218,7 @@ static void rematerializeLiveValues(CallBase *Call,
assert(Info.LiveSet.count(AlternateRootPhi));
}
// Compute cost of this chain
- InstructionCost Cost = chainToBasePointerCost(ChainToBase, TTI);
+ InstructionCost Cost = chainToBasePointerCost(ChainToBase, TTI);
// TODO: We can also account for cases when we will be able to remove some
// of the rematerialized values by later optimization passes. I.e if
// we rematerialized several intersecting chains. Or if original values
@@ -2499,7 +2499,7 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
// That Value* no longer exists and we need to use the new gc_result.
// Thankfully, the live set is embedded in the statepoint (and updated), so
// we just grab that.
- llvm::append_range(Live, Info.StatepointToken->gc_args());
+ llvm::append_range(Live, Info.StatepointToken->gc_args());
#ifndef NDEBUG
// Do some basic sanity checks on our liveness results before performing
// relocation. Relocation can and will turn mistakes in liveness results
@@ -2675,27 +2675,27 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F, DominatorTree &DT,
assert(shouldRewriteStatepointsIn(F) && "mismatch in rewrite decision");
auto NeedsRewrite = [&TLI](Instruction &I) {
- if (const auto *Call = dyn_cast<CallBase>(&I)) {
- if (isa<GCStatepointInst>(Call))
- return false;
- if (callsGCLeafFunction(Call, TLI))
- return false;
-
- // Normally it's up to the frontend to make sure that non-leaf calls also
- // have proper deopt state if it is required. We make an exception for
- // element atomic memcpy/memmove intrinsics here. Unlike other intrinsics
- // these are non-leaf by default. They might be generated by the optimizer
- // which doesn't know how to produce a proper deopt state. So if we see a
- // non-leaf memcpy/memmove without deopt state just treat it as a leaf
- // copy and don't produce a statepoint.
- if (!AllowStatepointWithNoDeoptInfo &&
- !Call->getOperandBundle(LLVMContext::OB_deopt)) {
- assert((isa<AtomicMemCpyInst>(Call) || isa<AtomicMemMoveInst>(Call)) &&
- "Don't expect any other calls here!");
- return false;
- }
- return true;
- }
+ if (const auto *Call = dyn_cast<CallBase>(&I)) {
+ if (isa<GCStatepointInst>(Call))
+ return false;
+ if (callsGCLeafFunction(Call, TLI))
+ return false;
+
+ // Normally it's up to the frontend to make sure that non-leaf calls also
+ // have proper deopt state if it is required. We make an exception for
+ // element atomic memcpy/memmove intrinsics here. Unlike other intrinsics
+ // these are non-leaf by default. They might be generated by the optimizer
+ // which doesn't know how to produce a proper deopt state. So if we see a
+ // non-leaf memcpy/memmove without deopt state just treat it as a leaf
+ // copy and don't produce a statepoint.
+ if (!AllowStatepointWithNoDeoptInfo &&
+ !Call->getOperandBundle(LLVMContext::OB_deopt)) {
+ assert((isa<AtomicMemCpyInst>(Call) || isa<AtomicMemMoveInst>(Call)) &&
+ "Don't expect any other calls here!");
+ return false;
+ }
+ return true;
+ }
return false;
};
@@ -2733,8 +2733,8 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F, DominatorTree &DT,
// of liveness sets for no good reason. It may be harder to do this post
// insertion since relocations and base phis can confuse things.
for (BasicBlock &BB : F)
- if (BB.getUniquePredecessor())
- MadeChange |= FoldSingleEntryPHINodes(&BB);
+ if (BB.getUniquePredecessor())
+ MadeChange |= FoldSingleEntryPHINodes(&BB);
// Before we start introducing relocations, we want to tweak the IR a bit to
// avoid unfortunate code generation effects. The main example is that we
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/SCCP.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/SCCP.cpp
index 8feed9e9eb..97a5040300 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/SCCP.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/SCCP.cpp
@@ -23,7 +23,7 @@
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -34,7 +34,7 @@
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueLattice.h"
#include "llvm/Analysis/ValueLatticeUtils.h"
-#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
@@ -105,7 +105,7 @@ bool isConstant(const ValueLatticeElement &LV) {
// ValueLatticeElement::isOverdefined() and is intended to be used in the
// transition to ValueLatticeElement.
bool isOverdefined(const ValueLatticeElement &LV) {
- return !LV.isUnknownOrUndef() && !isConstant(LV);
+ return !LV.isUnknownOrUndef() && !isConstant(LV);
}
//===----------------------------------------------------------------------===//
@@ -234,7 +234,7 @@ public:
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
TrackedMultipleRetVals.insert(
std::make_pair(std::make_pair(F, i), ValueLatticeElement()));
- } else if (!F->getReturnType()->isVoidTy())
+ } else if (!F->getReturnType()->isVoidTy())
TrackedRetVals.insert(std::make_pair(F, ValueLatticeElement()));
}
@@ -276,7 +276,7 @@ public:
// isEdgeFeasible - Return true if the control flow edge from the 'From' basic
// block to the 'To' basic block is currently feasible.
- bool isEdgeFeasible(BasicBlock *From, BasicBlock *To) const;
+ bool isEdgeFeasible(BasicBlock *From, BasicBlock *To) const;
std::vector<ValueLatticeElement> getStructLatticeValueFor(Value *V) const {
std::vector<ValueLatticeElement> StructValues;
@@ -542,14 +542,14 @@ private:
auto Iter = AdditionalUsers.find(I);
if (Iter != AdditionalUsers.end()) {
- // Copy additional users before notifying them of changes, because new
- // users may be added, potentially invalidating the iterator.
- SmallVector<Instruction *, 2> ToNotify;
+ // Copy additional users before notifying them of changes, because new
+ // users may be added, potentially invalidating the iterator.
+ SmallVector<Instruction *, 2> ToNotify;
for (User *U : Iter->second)
if (auto *UI = dyn_cast<Instruction>(U))
- ToNotify.push_back(UI);
- for (Instruction *UI : ToNotify)
- OperandChangedState(UI);
+ ToNotify.push_back(UI);
+ for (Instruction *UI : ToNotify)
+ OperandChangedState(UI);
}
}
void handleCallOverdefined(CallBase &CB);
@@ -654,30 +654,30 @@ void SCCPSolver::getFeasibleSuccessors(Instruction &TI,
Succs[0] = true;
return;
}
- const ValueLatticeElement &SCValue = getValueState(SI->getCondition());
- if (ConstantInt *CI = getConstantInt(SCValue)) {
- Succs[SI->findCaseValue(CI)->getSuccessorIndex()] = true;
+ const ValueLatticeElement &SCValue = getValueState(SI->getCondition());
+ if (ConstantInt *CI = getConstantInt(SCValue)) {
+ Succs[SI->findCaseValue(CI)->getSuccessorIndex()] = true;
+ return;
+ }
+
+ // TODO: Switch on undef is UB. Stop passing false once the rest of LLVM
+ // is ready.
+ if (SCValue.isConstantRange(/*UndefAllowed=*/false)) {
+ const ConstantRange &Range = SCValue.getConstantRange();
+ for (const auto &Case : SI->cases()) {
+ const APInt &CaseValue = Case.getCaseValue()->getValue();
+ if (Range.contains(CaseValue))
+ Succs[Case.getSuccessorIndex()] = true;
+ }
+
+ // TODO: Determine whether default case is reachable.
+ Succs[SI->case_default()->getSuccessorIndex()] = true;
return;
}
- // TODO: Switch on undef is UB. Stop passing false once the rest of LLVM
- // is ready.
- if (SCValue.isConstantRange(/*UndefAllowed=*/false)) {
- const ConstantRange &Range = SCValue.getConstantRange();
- for (const auto &Case : SI->cases()) {
- const APInt &CaseValue = Case.getCaseValue()->getValue();
- if (Range.contains(CaseValue))
- Succs[Case.getSuccessorIndex()] = true;
- }
-
- // TODO: Determine whether default case is reachable.
- Succs[SI->case_default()->getSuccessorIndex()] = true;
- return;
- }
-
- // Overdefined or unknown condition? All destinations are executable!
- if (!SCValue.isUnknownOrUndef())
- Succs.assign(TI.getNumSuccessors(), true);
+ // Overdefined or unknown condition? All destinations are executable!
+ if (!SCValue.isUnknownOrUndef())
+ Succs.assign(TI.getNumSuccessors(), true);
return;
}
@@ -723,7 +723,7 @@ void SCCPSolver::getFeasibleSuccessors(Instruction &TI,
// isEdgeFeasible - Return true if the control flow edge from the 'From' basic
// block to the 'To' basic block is currently feasible.
-bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) const {
+bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) const {
// Check if we've called markEdgeExecutable on the edge yet. (We could
// be more aggressive and try to consider edges which haven't been marked
// yet, but there isn't any need.)
@@ -848,16 +848,16 @@ void SCCPSolver::visitCastInst(CastInst &I) {
auto &LV = getValueState(&I);
ConstantRange OpRange = OpSt.getConstantRange();
Type *DestTy = I.getDestTy();
- // Vectors where all elements have the same known constant range are treated
- // as a single constant range in the lattice. When bitcasting such vectors,
- // there is a mis-match between the width of the lattice value (single
- // constant range) and the original operands (vector). Go to overdefined in
- // that case.
- if (I.getOpcode() == Instruction::BitCast &&
- I.getOperand(0)->getType()->isVectorTy() &&
- OpRange.getBitWidth() < DL.getTypeSizeInBits(DestTy))
- return (void)markOverdefined(&I);
-
+ // Vectors where all elements have the same known constant range are treated
+ // as a single constant range in the lattice. When bitcasting such vectors,
+ // there is a mis-match between the width of the lattice value (single
+ // constant range) and the original operands (vector). Go to overdefined in
+ // that case.
+ if (I.getOpcode() == Instruction::BitCast &&
+ I.getOperand(0)->getType()->isVectorTy() &&
+ OpRange.getBitWidth() < DL.getTypeSizeInBits(DestTy))
+ return (void)markOverdefined(&I);
+
ConstantRange Res =
OpRange.castOp(I.getOpcode(), DL.getTypeSizeInBits(DestTy));
mergeInValue(LV, &I, ValueLatticeElement::getRange(Res));
@@ -1138,9 +1138,9 @@ static ValueLatticeElement getValueFromMetadata(const Instruction *I) {
if (I->getType()->isIntegerTy())
return ValueLatticeElement::getRange(
getConstantRangeFromMetadata(*Ranges));
- if (I->hasMetadata(LLVMContext::MD_nonnull))
- return ValueLatticeElement::getNot(
- ConstantPointerNull::get(cast<PointerType>(I->getType())));
+ if (I->hasMetadata(LLVMContext::MD_nonnull))
+ return ValueLatticeElement::getNot(
+ ConstantPointerNull::get(cast<PointerType>(I->getType())));
return ValueLatticeElement::getOverdefined();
}
@@ -1293,33 +1293,33 @@ void SCCPSolver::handleCallResult(CallBase &CB) {
auto *PI = getPredicateInfoFor(&CB);
assert(PI && "Missing predicate info for ssa.copy");
- const Optional<PredicateConstraint> &Constraint = PI->getConstraint();
- if (!Constraint) {
+ const Optional<PredicateConstraint> &Constraint = PI->getConstraint();
+ if (!Constraint) {
mergeInValue(ValueState[&CB], &CB, CopyOfVal);
return;
}
- CmpInst::Predicate Pred = Constraint->Predicate;
- Value *OtherOp = Constraint->OtherOp;
+ CmpInst::Predicate Pred = Constraint->Predicate;
+ Value *OtherOp = Constraint->OtherOp;
- // Wait until OtherOp is resolved.
- if (getValueState(OtherOp).isUnknown()) {
- addAdditionalUser(OtherOp, &CB);
+ // Wait until OtherOp is resolved.
+ if (getValueState(OtherOp).isUnknown()) {
+ addAdditionalUser(OtherOp, &CB);
return;
}
- // TODO: Actually filp MayIncludeUndef for the created range to false,
- // once most places in the optimizer respect the branches on
- // undef/poison are UB rule. The reason why the new range cannot be
- // undef is as follows below:
- // The new range is based on a branch condition. That guarantees that
- // neither of the compare operands can be undef in the branch targets,
- // unless we have conditions that are always true/false (e.g. icmp ule
- // i32, %a, i32_max). For the latter overdefined/empty range will be
- // inferred, but the branch will get folded accordingly anyways.
- bool MayIncludeUndef = !isa<PredicateAssume>(PI);
-
- ValueLatticeElement CondVal = getValueState(OtherOp);
+ // TODO: Actually filp MayIncludeUndef for the created range to false,
+ // once most places in the optimizer respect the branches on
+ // undef/poison are UB rule. The reason why the new range cannot be
+ // undef is as follows below:
+ // The new range is based on a branch condition. That guarantees that
+ // neither of the compare operands can be undef in the branch targets,
+ // unless we have conditions that are always true/false (e.g. icmp ule
+ // i32, %a, i32_max). For the latter overdefined/empty range will be
+ // inferred, but the branch will get folded accordingly anyways.
+ bool MayIncludeUndef = !isa<PredicateAssume>(PI);
+
+ ValueLatticeElement CondVal = getValueState(OtherOp);
ValueLatticeElement &IV = ValueState[&CB];
if (CondVal.isConstantRange() || CopyOfVal.isConstantRange()) {
auto ImposedCR =
@@ -1343,47 +1343,47 @@ void SCCPSolver::handleCallResult(CallBase &CB) {
if (!CopyOfCR.contains(NewCR) && CopyOfCR.getSingleMissingElement())
NewCR = CopyOfCR;
- addAdditionalUser(OtherOp, &CB);
+ addAdditionalUser(OtherOp, &CB);
mergeInValue(
IV, &CB,
- ValueLatticeElement::getRange(NewCR, MayIncludeUndef));
+ ValueLatticeElement::getRange(NewCR, MayIncludeUndef));
return;
} else if (Pred == CmpInst::ICMP_EQ && CondVal.isConstant()) {
// For non-integer values or integer constant expressions, only
// propagate equal constants.
- addAdditionalUser(OtherOp, &CB);
+ addAdditionalUser(OtherOp, &CB);
mergeInValue(IV, &CB, CondVal);
return;
- } else if (Pred == CmpInst::ICMP_NE && CondVal.isConstant() &&
- !MayIncludeUndef) {
- // Propagate inequalities.
- addAdditionalUser(OtherOp, &CB);
- mergeInValue(IV, &CB,
- ValueLatticeElement::getNot(CondVal.getConstant()));
- return;
+ } else if (Pred == CmpInst::ICMP_NE && CondVal.isConstant() &&
+ !MayIncludeUndef) {
+ // Propagate inequalities.
+ addAdditionalUser(OtherOp, &CB);
+ mergeInValue(IV, &CB,
+ ValueLatticeElement::getNot(CondVal.getConstant()));
+ return;
}
return (void)mergeInValue(IV, &CB, CopyOfVal);
}
-
- if (ConstantRange::isIntrinsicSupported(II->getIntrinsicID())) {
- // Compute result range for intrinsics supported by ConstantRange.
- // Do this even if we don't know a range for all operands, as we may
- // still know something about the result range, e.g. of abs(x).
- SmallVector<ConstantRange, 2> OpRanges;
- for (Value *Op : II->args()) {
- const ValueLatticeElement &State = getValueState(Op);
- if (State.isConstantRange())
- OpRanges.push_back(State.getConstantRange());
- else
- OpRanges.push_back(
- ConstantRange::getFull(Op->getType()->getScalarSizeInBits()));
- }
-
- ConstantRange Result =
- ConstantRange::intrinsic(II->getIntrinsicID(), OpRanges);
- return (void)mergeInValue(II, ValueLatticeElement::getRange(Result));
- }
+
+ if (ConstantRange::isIntrinsicSupported(II->getIntrinsicID())) {
+ // Compute result range for intrinsics supported by ConstantRange.
+ // Do this even if we don't know a range for all operands, as we may
+ // still know something about the result range, e.g. of abs(x).
+ SmallVector<ConstantRange, 2> OpRanges;
+ for (Value *Op : II->args()) {
+ const ValueLatticeElement &State = getValueState(Op);
+ if (State.isConstantRange())
+ OpRanges.push_back(State.getConstantRange());
+ else
+ OpRanges.push_back(
+ ConstantRange::getFull(Op->getType()->getScalarSizeInBits()));
+ }
+
+ ConstantRange Result =
+ ConstantRange::intrinsic(II->getIntrinsicID(), OpRanges);
+ return (void)mergeInValue(II, ValueLatticeElement::getRange(Result));
+ }
}
// The common case is that we aren't tracking the callee, either because we
@@ -1453,7 +1453,7 @@ void SCCPSolver::Solve() {
// Process the basic block work list.
while (!BBWorkList.empty()) {
- BasicBlock *BB = BBWorkList.pop_back_val();
+ BasicBlock *BB = BBWorkList.pop_back_val();
LLVM_DEBUG(dbgs() << "\nPopped off BBWL: " << *BB << '\n');
@@ -1481,7 +1481,7 @@ void SCCPSolver::Solve() {
/// This scan also checks for values that use undefs. It conservatively marks
/// them as overdefined.
bool SCCPSolver::ResolvedUndefsIn(Function &F) {
- bool MadeChange = false;
+ bool MadeChange = false;
for (BasicBlock &BB : F) {
if (!BBExecutable.count(&BB))
continue;
@@ -1507,10 +1507,10 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
// more precise than this but it isn't worth bothering.
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
ValueLatticeElement &LV = getStructValueState(&I, i);
- if (LV.isUnknownOrUndef()) {
+ if (LV.isUnknownOrUndef()) {
markOverdefined(LV, &I);
- MadeChange = true;
- }
+ MadeChange = true;
+ }
}
continue;
}
@@ -1537,7 +1537,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
}
markOverdefined(&I);
- MadeChange = true;
+ MadeChange = true;
}
// Check to see if we have a branch or switch on an undefined value. If so
@@ -1554,8 +1554,8 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
if (isa<UndefValue>(BI->getCondition())) {
BI->setCondition(ConstantInt::getFalse(BI->getContext()));
markEdgeExecutable(&BB, TI->getSuccessor(1));
- MadeChange = true;
- continue;
+ MadeChange = true;
+ continue;
}
// Otherwise, it is a branch on a symbolic value which is currently
@@ -1564,7 +1564,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
// FIXME: Distinguish between dead code and an LLVM "undef" value.
BasicBlock *DefaultSuccessor = TI->getSuccessor(1);
if (markEdgeExecutable(&BB, DefaultSuccessor))
- MadeChange = true;
+ MadeChange = true;
continue;
}
@@ -1583,8 +1583,8 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
if (isa<UndefValue>(IBR->getAddress())) {
IBR->setAddress(BlockAddress::get(IBR->getSuccessor(0)));
markEdgeExecutable(&BB, IBR->getSuccessor(0));
- MadeChange = true;
- continue;
+ MadeChange = true;
+ continue;
}
// Otherwise, it is a branch on a symbolic value which is currently
@@ -1594,7 +1594,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
// we can assume the branch has undefined behavior instead.
BasicBlock *DefaultSuccessor = IBR->getSuccessor(0);
if (markEdgeExecutable(&BB, DefaultSuccessor))
- MadeChange = true;
+ MadeChange = true;
continue;
}
@@ -1609,8 +1609,8 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
if (isa<UndefValue>(SI->getCondition())) {
SI->setCondition(SI->case_begin()->getCaseValue());
markEdgeExecutable(&BB, SI->case_begin()->getCaseSuccessor());
- MadeChange = true;
- continue;
+ MadeChange = true;
+ continue;
}
// Otherwise, it is a branch on a symbolic value which is currently
@@ -1619,13 +1619,13 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
// FIXME: Distinguish between dead code and an LLVM "undef" value.
BasicBlock *DefaultSuccessor = SI->case_begin()->getCaseSuccessor();
if (markEdgeExecutable(&BB, DefaultSuccessor))
- MadeChange = true;
+ MadeChange = true;
continue;
}
}
- return MadeChange;
+ return MadeChange;
}
static bool tryToReplaceWithConstant(SCCPSolver &Solver, Value *V) {
@@ -1747,7 +1747,7 @@ static bool runSCCP(Function &F, const DataLayout &DL,
LLVM_DEBUG(dbgs() << " BasicBlock Dead:" << BB);
++NumDeadBlocks;
- NumInstRemoved += removeAllNonTerminatorAndEHPadInstructions(&BB).first;
+ NumInstRemoved += removeAllNonTerminatorAndEHPadInstructions(&BB).first;
MadeChanges = true;
continue;
@@ -1870,68 +1870,68 @@ static void findReturnsToZap(Function &F,
}
}
-static bool removeNonFeasibleEdges(const SCCPSolver &Solver, BasicBlock *BB,
- DomTreeUpdater &DTU) {
- SmallPtrSet<BasicBlock *, 8> FeasibleSuccessors;
- bool HasNonFeasibleEdges = false;
- for (BasicBlock *Succ : successors(BB)) {
- if (Solver.isEdgeFeasible(BB, Succ))
- FeasibleSuccessors.insert(Succ);
- else
- HasNonFeasibleEdges = true;
- }
-
- // All edges feasible, nothing to do.
- if (!HasNonFeasibleEdges)
- return false;
-
- // SCCP can only determine non-feasible edges for br, switch and indirectbr.
- Instruction *TI = BB->getTerminator();
- assert((isa<BranchInst>(TI) || isa<SwitchInst>(TI) ||
- isa<IndirectBrInst>(TI)) &&
- "Terminator must be a br, switch or indirectbr");
-
- if (FeasibleSuccessors.size() == 1) {
- // Replace with an unconditional branch to the only feasible successor.
- BasicBlock *OnlyFeasibleSuccessor = *FeasibleSuccessors.begin();
- SmallVector<DominatorTree::UpdateType, 8> Updates;
- bool HaveSeenOnlyFeasibleSuccessor = false;
- for (BasicBlock *Succ : successors(BB)) {
- if (Succ == OnlyFeasibleSuccessor && !HaveSeenOnlyFeasibleSuccessor) {
- // Don't remove the edge to the only feasible successor the first time
- // we see it. We still do need to remove any multi-edges to it though.
- HaveSeenOnlyFeasibleSuccessor = true;
- continue;
- }
-
- Succ->removePredecessor(BB);
- Updates.push_back({DominatorTree::Delete, BB, Succ});
+static bool removeNonFeasibleEdges(const SCCPSolver &Solver, BasicBlock *BB,
+ DomTreeUpdater &DTU) {
+ SmallPtrSet<BasicBlock *, 8> FeasibleSuccessors;
+ bool HasNonFeasibleEdges = false;
+ for (BasicBlock *Succ : successors(BB)) {
+ if (Solver.isEdgeFeasible(BB, Succ))
+ FeasibleSuccessors.insert(Succ);
+ else
+ HasNonFeasibleEdges = true;
+ }
+
+ // All edges feasible, nothing to do.
+ if (!HasNonFeasibleEdges)
+ return false;
+
+ // SCCP can only determine non-feasible edges for br, switch and indirectbr.
+ Instruction *TI = BB->getTerminator();
+ assert((isa<BranchInst>(TI) || isa<SwitchInst>(TI) ||
+ isa<IndirectBrInst>(TI)) &&
+ "Terminator must be a br, switch or indirectbr");
+
+ if (FeasibleSuccessors.size() == 1) {
+ // Replace with an unconditional branch to the only feasible successor.
+ BasicBlock *OnlyFeasibleSuccessor = *FeasibleSuccessors.begin();
+ SmallVector<DominatorTree::UpdateType, 8> Updates;
+ bool HaveSeenOnlyFeasibleSuccessor = false;
+ for (BasicBlock *Succ : successors(BB)) {
+ if (Succ == OnlyFeasibleSuccessor && !HaveSeenOnlyFeasibleSuccessor) {
+ // Don't remove the edge to the only feasible successor the first time
+ // we see it. We still do need to remove any multi-edges to it though.
+ HaveSeenOnlyFeasibleSuccessor = true;
+ continue;
+ }
+
+ Succ->removePredecessor(BB);
+ Updates.push_back({DominatorTree::Delete, BB, Succ});
}
-
- BranchInst::Create(OnlyFeasibleSuccessor, BB);
- TI->eraseFromParent();
- DTU.applyUpdatesPermissive(Updates);
- } else if (FeasibleSuccessors.size() > 1) {
- SwitchInstProfUpdateWrapper SI(*cast<SwitchInst>(TI));
- SmallVector<DominatorTree::UpdateType, 8> Updates;
- for (auto CI = SI->case_begin(); CI != SI->case_end();) {
- if (FeasibleSuccessors.contains(CI->getCaseSuccessor())) {
- ++CI;
- continue;
- }
-
- BasicBlock *Succ = CI->getCaseSuccessor();
- Succ->removePredecessor(BB);
- Updates.push_back({DominatorTree::Delete, BB, Succ});
- SI.removeCase(CI);
- // Don't increment CI, as we removed a case.
+
+ BranchInst::Create(OnlyFeasibleSuccessor, BB);
+ TI->eraseFromParent();
+ DTU.applyUpdatesPermissive(Updates);
+ } else if (FeasibleSuccessors.size() > 1) {
+ SwitchInstProfUpdateWrapper SI(*cast<SwitchInst>(TI));
+ SmallVector<DominatorTree::UpdateType, 8> Updates;
+ for (auto CI = SI->case_begin(); CI != SI->case_end();) {
+ if (FeasibleSuccessors.contains(CI->getCaseSuccessor())) {
+ ++CI;
+ continue;
+ }
+
+ BasicBlock *Succ = CI->getCaseSuccessor();
+ Succ->removePredecessor(BB);
+ Updates.push_back({DominatorTree::Delete, BB, Succ});
+ SI.removeCase(CI);
+ // Don't increment CI, as we removed a case.
}
-
- DTU.applyUpdatesPermissive(Updates);
+
+ DTU.applyUpdatesPermissive(Updates);
} else {
- llvm_unreachable("Must have at least one feasible successor");
+ llvm_unreachable("Must have at least one feasible successor");
}
- return true;
+ return true;
}
bool llvm::runIPSCCP(
@@ -1983,12 +1983,12 @@ bool llvm::runIPSCCP(
while (ResolvedUndefs) {
LLVM_DEBUG(dbgs() << "RESOLVING UNDEFS\n");
ResolvedUndefs = false;
- for (Function &F : M) {
- if (Solver.ResolvedUndefsIn(F))
+ for (Function &F : M) {
+ if (Solver.ResolvedUndefsIn(F))
ResolvedUndefs = true;
- }
- if (ResolvedUndefs)
- Solver.Solve();
+ }
+ if (ResolvedUndefs)
+ Solver.Solve();
}
bool MadeChanges = false;
@@ -2002,35 +2002,35 @@ bool llvm::runIPSCCP(
SmallVector<BasicBlock *, 512> BlocksToErase;
- if (Solver.isBlockExecutable(&F.front())) {
- bool ReplacedPointerArg = false;
- for (Argument &Arg : F.args()) {
- if (!Arg.use_empty() && tryToReplaceWithConstant(Solver, &Arg)) {
- ReplacedPointerArg |= Arg.getType()->isPointerTy();
+ if (Solver.isBlockExecutable(&F.front())) {
+ bool ReplacedPointerArg = false;
+ for (Argument &Arg : F.args()) {
+ if (!Arg.use_empty() && tryToReplaceWithConstant(Solver, &Arg)) {
+ ReplacedPointerArg |= Arg.getType()->isPointerTy();
++IPNumArgsElimed;
}
}
- // If we replaced an argument, the argmemonly and
- // inaccessiblemem_or_argmemonly attributes do not hold any longer. Remove
- // them from both the function and callsites.
- if (ReplacedPointerArg) {
- AttrBuilder AttributesToRemove;
- AttributesToRemove.addAttribute(Attribute::ArgMemOnly);
- AttributesToRemove.addAttribute(Attribute::InaccessibleMemOrArgMemOnly);
- F.removeAttributes(AttributeList::FunctionIndex, AttributesToRemove);
-
- for (User *U : F.users()) {
- auto *CB = dyn_cast<CallBase>(U);
- if (!CB || CB->getCalledFunction() != &F)
- continue;
-
- CB->removeAttributes(AttributeList::FunctionIndex,
- AttributesToRemove);
- }
- }
- }
-
+ // If we replaced an argument, the argmemonly and
+ // inaccessiblemem_or_argmemonly attributes do not hold any longer. Remove
+ // them from both the function and callsites.
+ if (ReplacedPointerArg) {
+ AttrBuilder AttributesToRemove;
+ AttributesToRemove.addAttribute(Attribute::ArgMemOnly);
+ AttributesToRemove.addAttribute(Attribute::InaccessibleMemOrArgMemOnly);
+ F.removeAttributes(AttributeList::FunctionIndex, AttributesToRemove);
+
+ for (User *U : F.users()) {
+ auto *CB = dyn_cast<CallBase>(U);
+ if (!CB || CB->getCalledFunction() != &F)
+ continue;
+
+ CB->removeAttributes(AttributeList::FunctionIndex,
+ AttributesToRemove);
+ }
+ }
+ }
+
SmallPtrSet<Value *, 32> InsertedValues;
for (BasicBlock &BB : F) {
if (!Solver.isBlockExecutable(&BB)) {
@@ -2063,10 +2063,10 @@ bool llvm::runIPSCCP(
/*UseLLVMTrap=*/false,
/*PreserveLCSSA=*/false, &DTU);
- for (BasicBlock &BB : F)
- MadeChanges |= removeNonFeasibleEdges(Solver, &BB, DTU);
+ for (BasicBlock &BB : F)
+ MadeChanges |= removeNonFeasibleEdges(Solver, &BB, DTU);
- for (BasicBlock *DeadBB : BlocksToErase)
+ for (BasicBlock *DeadBB : BlocksToErase)
DTU.deleteBB(DeadBB);
for (BasicBlock &BB : F) {
@@ -2099,47 +2099,47 @@ bool llvm::runIPSCCP(
for (const auto &I : Solver.getTrackedRetVals()) {
Function *F = I.first;
- const ValueLatticeElement &ReturnValue = I.second;
-
- // If there is a known constant range for the return value, add !range
- // metadata to the function's call sites.
- if (ReturnValue.isConstantRange() &&
- !ReturnValue.getConstantRange().isSingleElement()) {
- // Do not add range metadata if the return value may include undef.
- if (ReturnValue.isConstantRangeIncludingUndef())
- continue;
-
- auto &CR = ReturnValue.getConstantRange();
- for (User *User : F->users()) {
- auto *CB = dyn_cast<CallBase>(User);
- if (!CB || CB->getCalledFunction() != F)
- continue;
-
- // Limit to cases where the return value is guaranteed to be neither
- // poison nor undef. Poison will be outside any range and currently
- // values outside of the specified range cause immediate undefined
- // behavior.
- if (!isGuaranteedNotToBeUndefOrPoison(CB, nullptr, CB))
- continue;
-
- // Do not touch existing metadata for now.
- // TODO: We should be able to take the intersection of the existing
- // metadata and the inferred range.
- if (CB->getMetadata(LLVMContext::MD_range))
- continue;
-
- LLVMContext &Context = CB->getParent()->getContext();
- Metadata *RangeMD[] = {
- ConstantAsMetadata::get(ConstantInt::get(Context, CR.getLower())),
- ConstantAsMetadata::get(ConstantInt::get(Context, CR.getUpper()))};
- CB->setMetadata(LLVMContext::MD_range, MDNode::get(Context, RangeMD));
- }
+ const ValueLatticeElement &ReturnValue = I.second;
+
+ // If there is a known constant range for the return value, add !range
+ // metadata to the function's call sites.
+ if (ReturnValue.isConstantRange() &&
+ !ReturnValue.getConstantRange().isSingleElement()) {
+ // Do not add range metadata if the return value may include undef.
+ if (ReturnValue.isConstantRangeIncludingUndef())
+ continue;
+
+ auto &CR = ReturnValue.getConstantRange();
+ for (User *User : F->users()) {
+ auto *CB = dyn_cast<CallBase>(User);
+ if (!CB || CB->getCalledFunction() != F)
+ continue;
+
+ // Limit to cases where the return value is guaranteed to be neither
+ // poison nor undef. Poison will be outside any range and currently
+ // values outside of the specified range cause immediate undefined
+ // behavior.
+ if (!isGuaranteedNotToBeUndefOrPoison(CB, nullptr, CB))
+ continue;
+
+ // Do not touch existing metadata for now.
+ // TODO: We should be able to take the intersection of the existing
+ // metadata and the inferred range.
+ if (CB->getMetadata(LLVMContext::MD_range))
+ continue;
+
+ LLVMContext &Context = CB->getParent()->getContext();
+ Metadata *RangeMD[] = {
+ ConstantAsMetadata::get(ConstantInt::get(Context, CR.getLower())),
+ ConstantAsMetadata::get(ConstantInt::get(Context, CR.getUpper()))};
+ CB->setMetadata(LLVMContext::MD_range, MDNode::get(Context, RangeMD));
+ }
continue;
- }
- if (F->getReturnType()->isVoidTy())
- continue;
- if (isConstant(ReturnValue) || ReturnValue.isUnknownOrUndef())
- findReturnsToZap(*F, ReturnsToZap, Solver);
+ }
+ if (F->getReturnType()->isVoidTy())
+ continue;
+ if (isConstant(ReturnValue) || ReturnValue.isUnknownOrUndef())
+ findReturnsToZap(*F, ReturnsToZap, Solver);
}
for (auto F : Solver.getMRVFunctionsTracked()) {
@@ -2151,29 +2151,29 @@ bool llvm::runIPSCCP(
}
// Zap all returns which we've identified as zap to change.
- SmallSetVector<Function *, 8> FuncZappedReturn;
+ SmallSetVector<Function *, 8> FuncZappedReturn;
for (unsigned i = 0, e = ReturnsToZap.size(); i != e; ++i) {
Function *F = ReturnsToZap[i]->getParent()->getParent();
ReturnsToZap[i]->setOperand(0, UndefValue::get(F->getReturnType()));
- // Record all functions that are zapped.
- FuncZappedReturn.insert(F);
- }
-
- // Remove the returned attribute for zapped functions and the
- // corresponding call sites.
- for (Function *F : FuncZappedReturn) {
- for (Argument &A : F->args())
- F->removeParamAttr(A.getArgNo(), Attribute::Returned);
- for (Use &U : F->uses()) {
- // Skip over blockaddr users.
- if (isa<BlockAddress>(U.getUser()))
- continue;
- CallBase *CB = cast<CallBase>(U.getUser());
- for (Use &Arg : CB->args())
- CB->removeParamAttr(CB->getArgOperandNo(&Arg), Attribute::Returned);
- }
- }
-
+ // Record all functions that are zapped.
+ FuncZappedReturn.insert(F);
+ }
+
+ // Remove the returned attribute for zapped functions and the
+ // corresponding call sites.
+ for (Function *F : FuncZappedReturn) {
+ for (Argument &A : F->args())
+ F->removeParamAttr(A.getArgNo(), Attribute::Returned);
+ for (Use &U : F->uses()) {
+ // Skip over blockaddr users.
+ if (isa<BlockAddress>(U.getUser()))
+ continue;
+ CallBase *CB = cast<CallBase>(U.getUser());
+ for (Use &Arg : CB->args())
+ CB->removeParamAttr(CB->getArgOperandNo(&Arg), Attribute::Returned);
+ }
+ }
+
// If we inferred constant or undef values for globals variables, we can
// delete the global and any stores that remain to it.
for (auto &I : make_early_inc_range(Solver.getTrackedGlobals())) {
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/SROA.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/SROA.cpp
index af510f1a84..587c9e89d3 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/SROA.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/SROA.cpp
@@ -268,11 +268,11 @@ public:
/// Access the dead users for this alloca.
ArrayRef<Instruction *> getDeadUsers() const { return DeadUsers; }
- /// Access Uses that should be dropped if the alloca is promotable.
- ArrayRef<Use *> getDeadUsesIfPromotable() const {
- return DeadUseIfPromotable;
- }
-
+ /// Access Uses that should be dropped if the alloca is promotable.
+ ArrayRef<Use *> getDeadUsesIfPromotable() const {
+ return DeadUseIfPromotable;
+ }
+
/// Access the dead operands referring to this alloca.
///
/// These are operands which have cannot actually be used to refer to the
@@ -327,9 +327,9 @@ private:
/// they come from outside of the allocated space.
SmallVector<Instruction *, 8> DeadUsers;
- /// Uses which will become dead if can promote the alloca.
- SmallVector<Use *, 8> DeadUseIfPromotable;
-
+ /// Uses which will become dead if can promote the alloca.
+ SmallVector<Use *, 8> DeadUseIfPromotable;
+
/// Operands which will become dead if we rewrite the alloca.
///
/// These are operands that in their particular use can be replaced with
@@ -467,8 +467,8 @@ class AllocaSlices::partition_iterator
// Remove the uses which have ended in the prior partition. This
// cannot change the max split slice end because we just checked that
// the prior partition ended prior to that max.
- llvm::erase_if(P.SplitTails,
- [&](Slice *S) { return S->endOffset() <= P.EndOffset; });
+ llvm::erase_if(P.SplitTails,
+ [&](Slice *S) { return S->endOffset() <= P.EndOffset; });
assert(llvm::any_of(P.SplitTails,
[&](Slice *S) {
return S->endOffset() == MaxSplitSliceEndOffset;
@@ -784,9 +784,9 @@ private:
LI.getPointerAddressSpace() != DL.getAllocaAddrSpace())
return PI.setAborted(&LI);
- if (isa<ScalableVectorType>(LI.getType()))
- return PI.setAborted(&LI);
-
+ if (isa<ScalableVectorType>(LI.getType()))
+ return PI.setAborted(&LI);
+
uint64_t Size = DL.getTypeStoreSize(LI.getType()).getFixedSize();
return handleLoadOrStore(LI.getType(), LI, Offset, Size, LI.isVolatile());
}
@@ -802,9 +802,9 @@ private:
SI.getPointerAddressSpace() != DL.getAllocaAddrSpace())
return PI.setAborted(&SI);
- if (isa<ScalableVectorType>(ValOp->getType()))
- return PI.setAborted(&SI);
-
+ if (isa<ScalableVectorType>(ValOp->getType()))
+ return PI.setAborted(&SI);
+
uint64_t Size = DL.getTypeStoreSize(ValOp->getType()).getFixedSize();
// If this memory access can be shown to *statically* extend outside the
@@ -930,11 +930,11 @@ private:
// FIXME: What about debug intrinsics? This matches old behavior, but
// doesn't make sense.
void visitIntrinsicInst(IntrinsicInst &II) {
- if (II.isDroppable()) {
- AS.DeadUseIfPromotable.push_back(U);
- return;
- }
-
+ if (II.isDroppable()) {
+ AS.DeadUseIfPromotable.push_back(U);
+ return;
+ }
+
if (!IsOffsetKnown)
return PI.setAborted(&II);
@@ -1072,11 +1072,11 @@ AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI)
return;
}
- llvm::erase_if(Slices, [](const Slice &S) { return S.isDead(); });
+ llvm::erase_if(Slices, [](const Slice &S) { return S.isDead(); });
// Sort the uses. This arranges for the offsets to be in ascending order,
// and the sizes to be in descending order.
- llvm::stable_sort(Slices);
+ llvm::stable_sort(Slices);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -1122,9 +1122,9 @@ LLVM_DUMP_METHOD void AllocaSlices::dump() const { print(dbgs()); }
/// Walk the range of a partitioning looking for a common type to cover this
/// sequence of slices.
-static std::pair<Type *, IntegerType *>
-findCommonType(AllocaSlices::const_iterator B, AllocaSlices::const_iterator E,
- uint64_t EndOffset) {
+static std::pair<Type *, IntegerType *>
+findCommonType(AllocaSlices::const_iterator B, AllocaSlices::const_iterator E,
+ uint64_t EndOffset) {
Type *Ty = nullptr;
bool TyIsCommon = true;
IntegerType *ITy = nullptr;
@@ -1168,7 +1168,7 @@ findCommonType(AllocaSlices::const_iterator B, AllocaSlices::const_iterator E,
Ty = UserTy;
}
- return {TyIsCommon ? Ty : nullptr, ITy};
+ return {TyIsCommon ? Ty : nullptr, ITy};
}
/// PHI instructions that use an alloca and are subsequently loaded can be
@@ -1392,8 +1392,8 @@ static void speculateSelectInstLoads(SelectInst &SI) {
/// This will return the BasePtr if that is valid, or build a new GEP
/// instruction using the IRBuilder if GEP-ing is needed.
static Value *buildGEP(IRBuilderTy &IRB, Value *BasePtr,
- SmallVectorImpl<Value *> &Indices,
- const Twine &NamePrefix) {
+ SmallVectorImpl<Value *> &Indices,
+ const Twine &NamePrefix) {
if (Indices.empty())
return BasePtr;
@@ -1418,7 +1418,7 @@ static Value *buildGEP(IRBuilderTy &IRB, Value *BasePtr,
static Value *getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &DL,
Value *BasePtr, Type *Ty, Type *TargetTy,
SmallVectorImpl<Value *> &Indices,
- const Twine &NamePrefix) {
+ const Twine &NamePrefix) {
if (Ty == TargetTy)
return buildGEP(IRB, BasePtr, Indices, NamePrefix);
@@ -1463,7 +1463,7 @@ static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL,
Value *Ptr, Type *Ty, APInt &Offset,
Type *TargetTy,
SmallVectorImpl<Value *> &Indices,
- const Twine &NamePrefix) {
+ const Twine &NamePrefix) {
if (Offset == 0)
return getNaturalGEPWithType(IRB, DL, Ptr, Ty, TargetTy, Indices,
NamePrefix);
@@ -1538,7 +1538,7 @@ static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL,
static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &DL,
Value *Ptr, APInt Offset, Type *TargetTy,
SmallVectorImpl<Value *> &Indices,
- const Twine &NamePrefix) {
+ const Twine &NamePrefix) {
PointerType *Ty = cast<PointerType>(Ptr->getType());
// Don't consider any GEPs through an i8* as natural unless the TargetTy is
@@ -1549,8 +1549,8 @@ static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &DL,
Type *ElementTy = Ty->getElementType();
if (!ElementTy->isSized())
return nullptr; // We can't GEP through an unsized element.
- if (isa<ScalableVectorType>(ElementTy))
- return nullptr;
+ if (isa<ScalableVectorType>(ElementTy))
+ return nullptr;
APInt ElementSize(Offset.getBitWidth(),
DL.getTypeAllocSize(ElementTy).getFixedSize());
if (ElementSize == 0)
@@ -1579,8 +1579,8 @@ static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &DL,
/// a single GEP as possible, thus making each GEP more independent of the
/// surrounding code.
static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr,
- APInt Offset, Type *PointerTy,
- const Twine &NamePrefix) {
+ APInt Offset, Type *PointerTy,
+ const Twine &NamePrefix) {
// Even though we don't look through PHI nodes, we could be called on an
// instruction in an unreachable block, which may be on a cycle.
SmallPtrSet<Value *, 4> Visited;
@@ -1842,7 +1842,7 @@ static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S,
if (!S.isSplittable())
return false; // Skip any unsplittable intrinsics.
} else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
- if (!II->isLifetimeStartOrEnd() && !II->isDroppable())
+ if (!II->isLifetimeStartOrEnd() && !II->isDroppable())
return false;
} else if (U->get()->getType()->getPointerElementType()->isStructTy()) {
// Disable vector promotion when there are loads or stores of an FCA.
@@ -1926,9 +1926,9 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
// do that until all the backends are known to produce good code for all
// integer vector types.
if (!HaveCommonEltTy) {
- llvm::erase_if(CandidateTys, [](VectorType *VTy) {
- return !VTy->getElementType()->isIntegerTy();
- });
+ llvm::erase_if(CandidateTys, [](VectorType *VTy) {
+ return !VTy->getElementType()->isIntegerTy();
+ });
// If there were no integer vector types, give up.
if (CandidateTys.empty())
@@ -2072,7 +2072,7 @@ static bool isIntegerWideningViableForSlice(const Slice &S,
if (!S.isSplittable())
return false; // Skip any unsplittable intrinsics.
} else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
- if (!II->isLifetimeStartOrEnd() && !II->isDroppable())
+ if (!II->isLifetimeStartOrEnd() && !II->isDroppable())
return false;
} else {
return false;
@@ -2113,7 +2113,7 @@ static bool isIntegerWideningViable(Partition &P, Type *AllocaTy,
// that we cover the alloca.
// FIXME: We shouldn't consider split slices that happen to start in the
// partition here...
- bool WholeAllocaOp = P.empty() && DL.isLegalInteger(SizeInBits);
+ bool WholeAllocaOp = P.empty() && DL.isLegalInteger(SizeInBits);
for (const Slice &S : P)
if (!isIntegerWideningViableForSlice(S, P.beginOffset(), AllocaTy, DL,
@@ -2206,7 +2206,7 @@ static Value *extractVector(IRBuilderTy &IRB, Value *V, unsigned BeginIndex,
Mask.reserve(NumElements);
for (unsigned i = BeginIndex; i != EndIndex; ++i)
Mask.push_back(i);
- V = IRB.CreateShuffleVector(V, Mask, Name + ".extract");
+ V = IRB.CreateShuffleVector(V, Mask, Name + ".extract");
LLVM_DEBUG(dbgs() << " shuffle: " << *V << "\n");
return V;
}
@@ -2239,22 +2239,22 @@ static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V,
// use a shuffle vector to widen it with undef elements, and then
// a second shuffle vector to select between the loaded vector and the
// incoming vector.
- SmallVector<int, 8> Mask;
+ SmallVector<int, 8> Mask;
Mask.reserve(cast<FixedVectorType>(VecTy)->getNumElements());
for (unsigned i = 0; i != cast<FixedVectorType>(VecTy)->getNumElements(); ++i)
if (i >= BeginIndex && i < EndIndex)
- Mask.push_back(i - BeginIndex);
+ Mask.push_back(i - BeginIndex);
else
- Mask.push_back(-1);
- V = IRB.CreateShuffleVector(V, Mask, Name + ".expand");
+ Mask.push_back(-1);
+ V = IRB.CreateShuffleVector(V, Mask, Name + ".expand");
LLVM_DEBUG(dbgs() << " shuffle: " << *V << "\n");
- SmallVector<Constant *, 8> Mask2;
- Mask2.reserve(cast<FixedVectorType>(VecTy)->getNumElements());
+ SmallVector<Constant *, 8> Mask2;
+ Mask2.reserve(cast<FixedVectorType>(VecTy)->getNumElements());
for (unsigned i = 0; i != cast<FixedVectorType>(VecTy)->getNumElements(); ++i)
- Mask2.push_back(IRB.getInt1(i >= BeginIndex && i < EndIndex));
+ Mask2.push_back(IRB.getInt1(i >= BeginIndex && i < EndIndex));
- V = IRB.CreateSelect(ConstantVector::get(Mask2), V, Old, Name + "blend");
+ V = IRB.CreateSelect(ConstantVector::get(Mask2), V, Old, Name + "blend");
LLVM_DEBUG(dbgs() << " blend: " << *V << "\n");
return V;
@@ -2458,7 +2458,7 @@ private:
void deleteIfTriviallyDead(Value *V) {
Instruction *I = cast<Instruction>(V);
if (isInstructionTriviallyDead(I))
- Pass.DeadInsts.push_back(I);
+ Pass.DeadInsts.push_back(I);
}
Value *rewriteVectorizedLoadInst() {
@@ -2524,7 +2524,7 @@ private:
NewAI.getAlign(), LI.isVolatile(),
LI.getName());
if (AATags)
- NewLI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
+ NewLI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
if (LI.isVolatile())
NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
if (NewLI->isAtomic())
@@ -2563,7 +2563,7 @@ private:
IRB.CreateAlignedLoad(TargetTy, getNewAllocaSlicePtr(IRB, LTy),
getSliceAlign(), LI.isVolatile(), LI.getName());
if (AATags)
- NewLI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
+ NewLI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
if (LI.isVolatile())
NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
@@ -2598,7 +2598,7 @@ private:
LI.replaceAllUsesWith(V);
}
- Pass.DeadInsts.push_back(&LI);
+ Pass.DeadInsts.push_back(&LI);
deleteIfTriviallyDead(OldOp);
LLVM_DEBUG(dbgs() << " to: " << *V << "\n");
return !LI.isVolatile() && !IsPtrAdjusted;
@@ -2626,8 +2626,8 @@ private:
}
StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign());
if (AATags)
- Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
- Pass.DeadInsts.push_back(&SI);
+ Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
+ Pass.DeadInsts.push_back(&SI);
LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
return true;
@@ -2650,8 +2650,8 @@ private:
Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
LLVMContext::MD_access_group});
if (AATags)
- Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
- Pass.DeadInsts.push_back(&SI);
+ Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
+ Pass.DeadInsts.push_back(&SI);
LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
return true;
}
@@ -2720,12 +2720,12 @@ private:
NewSI->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
LLVMContext::MD_access_group});
if (AATags)
- NewSI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
+ NewSI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
if (SI.isVolatile())
NewSI->setAtomic(SI.getOrdering(), SI.getSyncScopeID());
if (NewSI->isAtomic())
NewSI->setAlignment(SI.getAlign());
- Pass.DeadInsts.push_back(&SI);
+ Pass.DeadInsts.push_back(&SI);
deleteIfTriviallyDead(OldOp);
LLVM_DEBUG(dbgs() << " to: " << *NewSI << "\n");
@@ -2786,11 +2786,11 @@ private:
}
// Record this instruction for deletion.
- Pass.DeadInsts.push_back(&II);
+ Pass.DeadInsts.push_back(&II);
Type *AllocaTy = NewAI.getAllocatedType();
Type *ScalarTy = AllocaTy->getScalarType();
-
+
const bool CanContinue = [&]() {
if (VecTy || IntTy)
return true;
@@ -2816,7 +2816,7 @@ private:
getNewAllocaSlicePtr(IRB, OldPtr->getType()), II.getValue(), Size,
MaybeAlign(getSliceAlign()), II.isVolatile());
if (AATags)
- New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
+ New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
return false;
}
@@ -2885,7 +2885,7 @@ private:
StoreInst *New =
IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign(), II.isVolatile());
if (AATags)
- New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
+ New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
return !II.isVolatile();
}
@@ -2956,7 +2956,7 @@ private:
return false;
}
// Record this instruction for deletion.
- Pass.DeadInsts.push_back(&II);
+ Pass.DeadInsts.push_back(&II);
// Strip all inbounds GEPs and pointer casts to try to dig out any root
// alloca that should be re-examined after rewriting this instruction.
@@ -3006,7 +3006,7 @@ private:
CallInst *New = IRB.CreateMemCpy(DestPtr, DestAlign, SrcPtr, SrcAlign,
Size, II.isVolatile());
if (AATags)
- New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
+ New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
return false;
}
@@ -3060,7 +3060,7 @@ private:
LoadInst *Load = IRB.CreateAlignedLoad(OtherTy, SrcPtr, SrcAlign,
II.isVolatile(), "copyload");
if (AATags)
- Load->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
+ Load->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
Src = Load;
}
@@ -3080,27 +3080,27 @@ private:
StoreInst *Store = cast<StoreInst>(
IRB.CreateAlignedStore(Src, DstPtr, DstAlign, II.isVolatile()));
if (AATags)
- Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
+ Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
return !II.isVolatile();
}
bool visitIntrinsicInst(IntrinsicInst &II) {
- assert((II.isLifetimeStartOrEnd() || II.isDroppable()) &&
- "Unexpected intrinsic!");
+ assert((II.isLifetimeStartOrEnd() || II.isDroppable()) &&
+ "Unexpected intrinsic!");
LLVM_DEBUG(dbgs() << " original: " << II << "\n");
// Record this instruction for deletion.
- Pass.DeadInsts.push_back(&II);
-
- if (II.isDroppable()) {
- assert(II.getIntrinsicID() == Intrinsic::assume && "Expected assume");
- // TODO For now we forget assumed information, this can be improved.
- OldPtr->dropDroppableUsesIn(II);
- return true;
- }
-
- assert(II.getArgOperand(1) == OldPtr);
+ Pass.DeadInsts.push_back(&II);
+
+ if (II.isDroppable()) {
+ assert(II.getIntrinsicID() == Intrinsic::assume && "Expected assume");
+ // TODO For now we forget assumed information, this can be improved.
+ OldPtr->dropDroppableUsesIn(II);
+ return true;
+ }
+
+ assert(II.getArgOperand(1) == OldPtr);
// Lifetime intrinsics are only promotable if they cover the whole alloca.
// Therefore, we drop lifetime intrinsics which don't cover the whole
// alloca.
@@ -3381,13 +3381,13 @@ private:
IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep");
LoadInst *Load =
IRB.CreateAlignedLoad(Ty, GEP, Alignment, Name + ".load");
-
- APInt Offset(
- DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0);
- if (AATags &&
- GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset))
- Load->setAAMetadata(AATags.shift(Offset.getZExtValue()));
-
+
+ APInt Offset(
+ DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0);
+ if (AATags &&
+ GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset))
+ Load->setAAMetadata(AATags.shift(Offset.getZExtValue()));
+
Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert");
LLVM_DEBUG(dbgs() << " to: " << *Load << "\n");
}
@@ -3433,13 +3433,13 @@ private:
IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep");
StoreInst *Store =
IRB.CreateAlignedStore(ExtractValue, InBoundsGEP, Alignment);
-
- APInt Offset(
- DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0);
- if (AATags &&
- GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset))
- Store->setAAMetadata(AATags.shift(Offset.getZExtValue()));
-
+
+ APInt Offset(
+ DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0);
+ if (AATags &&
+ GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset))
+ Store->setAAMetadata(AATags.shift(Offset.getZExtValue()));
+
LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
}
};
@@ -3485,7 +3485,7 @@ private:
<< "\n " << GEPI);
IRBuilderTy Builder(&GEPI);
- SmallVector<Value *, 4> Index(GEPI.indices());
+ SmallVector<Value *, 4> Index(GEPI.indices());
bool IsInBounds = GEPI.isInBounds();
Value *True = Sel->getTrueValue();
@@ -3539,27 +3539,27 @@ private:
<< "\n " << GEPI
<< "\n to: ");
- SmallVector<Value *, 4> Index(GEPI.indices());
+ SmallVector<Value *, 4> Index(GEPI.indices());
bool IsInBounds = GEPI.isInBounds();
IRBuilderTy PHIBuilder(GEPI.getParent()->getFirstNonPHI());
PHINode *NewPN = PHIBuilder.CreatePHI(GEPI.getType(),
PHI->getNumIncomingValues(),
PHI->getName() + ".sroa.phi");
for (unsigned I = 0, E = PHI->getNumIncomingValues(); I != E; ++I) {
- BasicBlock *B = PHI->getIncomingBlock(I);
- Value *NewVal = nullptr;
- int Idx = NewPN->getBasicBlockIndex(B);
- if (Idx >= 0) {
- NewVal = NewPN->getIncomingValue(Idx);
- } else {
- Instruction *In = cast<Instruction>(PHI->getIncomingValue(I));
-
- IRBuilderTy B(In->getParent(), std::next(In->getIterator()));
- NewVal = IsInBounds
- ? B.CreateInBoundsGEP(In, Index, In->getName() + ".sroa.gep")
- : B.CreateGEP(In, Index, In->getName() + ".sroa.gep");
- }
- NewPN->addIncoming(NewVal, B);
+ BasicBlock *B = PHI->getIncomingBlock(I);
+ Value *NewVal = nullptr;
+ int Idx = NewPN->getBasicBlockIndex(B);
+ if (Idx >= 0) {
+ NewVal = NewPN->getIncomingValue(Idx);
+ } else {
+ Instruction *In = cast<Instruction>(PHI->getIncomingValue(I));
+
+ IRBuilderTy B(In->getParent(), std::next(In->getIterator()));
+ NewVal = IsInBounds
+ ? B.CreateInBoundsGEP(In, Index, In->getName() + ".sroa.gep")
+ : B.CreateGEP(In, Index, In->getName() + ".sroa.gep");
+ }
+ NewPN->addIncoming(NewVal, B);
}
Visited.erase(&GEPI);
@@ -3901,53 +3901,53 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
// such loads and stores, we can only pre-split them if their splits exactly
// match relative to their starting offset. We have to verify this prior to
// any rewriting.
- llvm::erase_if(Stores, [&UnsplittableLoads, &SplitOffsetsMap](StoreInst *SI) {
- // Lookup the load we are storing in our map of split
- // offsets.
- auto *LI = cast<LoadInst>(SI->getValueOperand());
- // If it was completely unsplittable, then we're done,
- // and this store can't be pre-split.
- if (UnsplittableLoads.count(LI))
- return true;
-
- auto LoadOffsetsI = SplitOffsetsMap.find(LI);
- if (LoadOffsetsI == SplitOffsetsMap.end())
- return false; // Unrelated loads are definitely safe.
- auto &LoadOffsets = LoadOffsetsI->second;
-
- // Now lookup the store's offsets.
- auto &StoreOffsets = SplitOffsetsMap[SI];
-
- // If the relative offsets of each split in the load and
- // store match exactly, then we can split them and we
- // don't need to remove them here.
- if (LoadOffsets.Splits == StoreOffsets.Splits)
- return false;
-
- LLVM_DEBUG(dbgs() << " Mismatched splits for load and store:\n"
- << " " << *LI << "\n"
- << " " << *SI << "\n");
-
- // We've found a store and load that we need to split
- // with mismatched relative splits. Just give up on them
- // and remove both instructions from our list of
- // candidates.
- UnsplittableLoads.insert(LI);
- return true;
- });
+ llvm::erase_if(Stores, [&UnsplittableLoads, &SplitOffsetsMap](StoreInst *SI) {
+ // Lookup the load we are storing in our map of split
+ // offsets.
+ auto *LI = cast<LoadInst>(SI->getValueOperand());
+ // If it was completely unsplittable, then we're done,
+ // and this store can't be pre-split.
+ if (UnsplittableLoads.count(LI))
+ return true;
+
+ auto LoadOffsetsI = SplitOffsetsMap.find(LI);
+ if (LoadOffsetsI == SplitOffsetsMap.end())
+ return false; // Unrelated loads are definitely safe.
+ auto &LoadOffsets = LoadOffsetsI->second;
+
+ // Now lookup the store's offsets.
+ auto &StoreOffsets = SplitOffsetsMap[SI];
+
+ // If the relative offsets of each split in the load and
+ // store match exactly, then we can split them and we
+ // don't need to remove them here.
+ if (LoadOffsets.Splits == StoreOffsets.Splits)
+ return false;
+
+ LLVM_DEBUG(dbgs() << " Mismatched splits for load and store:\n"
+ << " " << *LI << "\n"
+ << " " << *SI << "\n");
+
+ // We've found a store and load that we need to split
+ // with mismatched relative splits. Just give up on them
+ // and remove both instructions from our list of
+ // candidates.
+ UnsplittableLoads.insert(LI);
+ return true;
+ });
// Now we have to go *back* through all the stores, because a later store may
// have caused an earlier store's load to become unsplittable and if it is
// unsplittable for the later store, then we can't rely on it being split in
// the earlier store either.
- llvm::erase_if(Stores, [&UnsplittableLoads](StoreInst *SI) {
- auto *LI = cast<LoadInst>(SI->getValueOperand());
- return UnsplittableLoads.count(LI);
- });
+ llvm::erase_if(Stores, [&UnsplittableLoads](StoreInst *SI) {
+ auto *LI = cast<LoadInst>(SI->getValueOperand());
+ return UnsplittableLoads.count(LI);
+ });
// Once we've established all the loads that can't be split for some reason,
// filter any that made it into our list out.
- llvm::erase_if(Loads, [&UnsplittableLoads](LoadInst *LI) {
- return UnsplittableLoads.count(LI);
- });
+ llvm::erase_if(Loads, [&UnsplittableLoads](LoadInst *LI) {
+ return UnsplittableLoads.count(LI);
+ });
// If no loads or stores are left, there is no pre-splitting to be done for
// this alloca.
@@ -4084,7 +4084,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
}
// Mark the original store as dead.
- DeadInsts.push_back(SI);
+ DeadInsts.push_back(SI);
}
// Save the split loads if there are deferred stores among the users.
@@ -4092,7 +4092,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
SplitLoadsMap.insert(std::make_pair(LI, std::move(SplitLoads)));
// Mark the original load as dead and kill the original slice.
- DeadInsts.push_back(LI);
+ DeadInsts.push_back(LI);
Offsets.S->kill();
}
@@ -4214,14 +4214,14 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
// trivial CSE, including instcombine.
if (LI->hasOneUse()) {
assert(*LI->user_begin() == SI && "Single use isn't this store!");
- DeadInsts.push_back(LI);
+ DeadInsts.push_back(LI);
}
- DeadInsts.push_back(SI);
+ DeadInsts.push_back(SI);
Offsets.S->kill();
}
// Remove the killed slices that have ben pre-split.
- llvm::erase_if(AS, [](const Slice &S) { return S.isDead(); });
+ llvm::erase_if(AS, [](const Slice &S) { return S.isDead(); });
// Insert our new slices. This will sort and merge them into the sorted
// sequence.
@@ -4235,9 +4235,9 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
// Finally, don't try to promote any allocas that new require re-splitting.
// They have already been added to the worklist above.
- llvm::erase_if(PromotableAllocas, [&](AllocaInst *AI) {
- return ResplitPromotableAllocas.count(AI);
- });
+ llvm::erase_if(PromotableAllocas, [&](AllocaInst *AI) {
+ return ResplitPromotableAllocas.count(AI);
+ });
return true;
}
@@ -4259,21 +4259,21 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
// or an i8 array of an appropriate size.
Type *SliceTy = nullptr;
const DataLayout &DL = AI.getModule()->getDataLayout();
- std::pair<Type *, IntegerType *> CommonUseTy =
- findCommonType(P.begin(), P.end(), P.endOffset());
- // Do all uses operate on the same type?
- if (CommonUseTy.first)
- if (DL.getTypeAllocSize(CommonUseTy.first).getFixedSize() >= P.size())
- SliceTy = CommonUseTy.first;
- // If not, can we find an appropriate subtype in the original allocated type?
+ std::pair<Type *, IntegerType *> CommonUseTy =
+ findCommonType(P.begin(), P.end(), P.endOffset());
+ // Do all uses operate on the same type?
+ if (CommonUseTy.first)
+ if (DL.getTypeAllocSize(CommonUseTy.first).getFixedSize() >= P.size())
+ SliceTy = CommonUseTy.first;
+ // If not, can we find an appropriate subtype in the original allocated type?
if (!SliceTy)
if (Type *TypePartitionTy = getTypePartition(DL, AI.getAllocatedType(),
P.beginOffset(), P.size()))
SliceTy = TypePartitionTy;
- // If still not, can we use the largest bitwidth integer type used?
- if (!SliceTy && CommonUseTy.second)
- if (DL.getTypeAllocSize(CommonUseTy.second).getFixedSize() >= P.size())
- SliceTy = CommonUseTy.second;
+ // If still not, can we use the largest bitwidth integer type used?
+ if (!SliceTy && CommonUseTy.second)
+ if (DL.getTypeAllocSize(CommonUseTy.second).getFixedSize() >= P.size())
+ SliceTy = CommonUseTy.second;
if ((!SliceTy || (SliceTy->isArrayTy() &&
SliceTy->getArrayElementType()->isIntegerTy())) &&
DL.isLegalInteger(P.size() * 8))
@@ -4363,13 +4363,13 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
}
if (Promotable) {
- for (Use *U : AS.getDeadUsesIfPromotable()) {
- auto *OldInst = dyn_cast<Instruction>(U->get());
- Value::dropDroppableUse(*U);
- if (OldInst)
- if (isInstructionTriviallyDead(OldInst))
- DeadInsts.push_back(OldInst);
- }
+ for (Use *U : AS.getDeadUsesIfPromotable()) {
+ auto *OldInst = dyn_cast<Instruction>(U->get());
+ Value::dropDroppableUse(*U);
+ if (OldInst)
+ if (isInstructionTriviallyDead(OldInst))
+ DeadInsts.push_back(OldInst);
+ }
if (PHIUsers.empty() && SelectUsers.empty()) {
// Promote the alloca.
PromotableAllocas.push_back(NewAI);
@@ -4504,8 +4504,8 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
// Migrate debug information from the old alloca to the new alloca(s)
// and the individual partitions.
TinyPtrVector<DbgVariableIntrinsic *> DbgDeclares = FindDbgAddrUses(&AI);
- for (DbgVariableIntrinsic *DbgDeclare : DbgDeclares) {
- auto *Expr = DbgDeclare->getExpression();
+ for (DbgVariableIntrinsic *DbgDeclare : DbgDeclares) {
+ auto *Expr = DbgDeclare->getExpression();
DIBuilder DIB(*AI.getModule(), /*AllowUnresolved*/ false);
uint64_t AllocaSize =
DL.getTypeSizeInBits(AI.getAllocatedType()).getFixedSize();
@@ -4536,7 +4536,7 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
}
// The alloca may be larger than the variable.
- auto VarSize = DbgDeclare->getVariable()->getSizeInBits();
+ auto VarSize = DbgDeclare->getVariable()->getSizeInBits();
if (VarSize) {
if (Size > *VarSize)
Size = *VarSize;
@@ -4554,21 +4554,21 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
}
}
- // Remove any existing intrinsics on the new alloca describing
- // the variable fragment.
- for (DbgVariableIntrinsic *OldDII : FindDbgAddrUses(Fragment.Alloca)) {
- auto SameVariableFragment = [](const DbgVariableIntrinsic *LHS,
- const DbgVariableIntrinsic *RHS) {
- return LHS->getVariable() == RHS->getVariable() &&
- LHS->getDebugLoc()->getInlinedAt() ==
- RHS->getDebugLoc()->getInlinedAt();
- };
- if (SameVariableFragment(OldDII, DbgDeclare))
- OldDII->eraseFromParent();
- }
-
- DIB.insertDeclare(Fragment.Alloca, DbgDeclare->getVariable(), FragmentExpr,
- DbgDeclare->getDebugLoc(), &AI);
+ // Remove any existing intrinsics on the new alloca describing
+ // the variable fragment.
+ for (DbgVariableIntrinsic *OldDII : FindDbgAddrUses(Fragment.Alloca)) {
+ auto SameVariableFragment = [](const DbgVariableIntrinsic *LHS,
+ const DbgVariableIntrinsic *RHS) {
+ return LHS->getVariable() == RHS->getVariable() &&
+ LHS->getDebugLoc()->getInlinedAt() ==
+ RHS->getDebugLoc()->getInlinedAt();
+ };
+ if (SameVariableFragment(OldDII, DbgDeclare))
+ OldDII->eraseFromParent();
+ }
+
+ DIB.insertDeclare(Fragment.Alloca, DbgDeclare->getVariable(), FragmentExpr,
+ DbgDeclare->getDebugLoc(), &AI);
}
}
return Changed;
@@ -4585,7 +4585,7 @@ void SROA::clobberUse(Use &U) {
// minimal.
if (Instruction *OldI = dyn_cast<Instruction>(OldV))
if (isInstructionTriviallyDead(OldI)) {
- DeadInsts.push_back(OldI);
+ DeadInsts.push_back(OldI);
}
}
@@ -4634,7 +4634,7 @@ bool SROA::runOnAlloca(AllocaInst &AI) {
DeadUser->replaceAllUsesWith(UndefValue::get(DeadUser->getType()));
// And mark it for deletion.
- DeadInsts.push_back(DeadUser);
+ DeadInsts.push_back(DeadUser);
Changed = true;
}
for (Use *DeadOp : AS.getDeadOperands()) {
@@ -4672,8 +4672,8 @@ bool SROA::deleteDeadInstructions(
SmallPtrSetImpl<AllocaInst *> &DeletedAllocas) {
bool Changed = false;
while (!DeadInsts.empty()) {
- Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val());
- if (!I) continue;
+ Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val());
+ if (!I) continue;
LLVM_DEBUG(dbgs() << "Deleting dead instruction: " << *I << "\n");
// If the instruction is an alloca, find the possible dbg.declare connected
@@ -4692,7 +4692,7 @@ bool SROA::deleteDeadInstructions(
// Zero out the operand and see if it becomes trivially dead.
Operand = nullptr;
if (isInstructionTriviallyDead(U))
- DeadInsts.push_back(U);
+ DeadInsts.push_back(U);
}
++NumDeleted;
@@ -4755,7 +4755,7 @@ PreservedAnalyses SROA::runImpl(Function &F, DominatorTree &RunDT,
auto IsInSet = [&](AllocaInst *AI) { return DeletedAllocas.count(AI); };
Worklist.remove_if(IsInSet);
PostPromotionWorklist.remove_if(IsInSet);
- llvm::erase_if(PromotableAllocas, IsInSet);
+ llvm::erase_if(PromotableAllocas, IsInSet);
DeletedAllocas.clear();
}
}
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/Scalar.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/Scalar.cpp
index dba3dba24e..c897888295 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/Scalar.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/Scalar.cpp
@@ -34,12 +34,12 @@ using namespace llvm;
/// ScalarOpts library.
void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeADCELegacyPassPass(Registry);
- initializeAnnotationRemarksLegacyPass(Registry);
+ initializeAnnotationRemarksLegacyPass(Registry);
initializeBDCELegacyPassPass(Registry);
initializeAlignmentFromAssumptionsPass(Registry);
initializeCallSiteSplittingLegacyPassPass(Registry);
initializeConstantHoistingLegacyPassPass(Registry);
- initializeConstraintEliminationPass(Registry);
+ initializeConstraintEliminationPass(Registry);
initializeCorrelatedValuePropagationPass(Registry);
initializeDCELegacyPassPass(Registry);
initializeDivRemPairsLegacyPassPass(Registry);
@@ -67,24 +67,24 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeLoopDeletionLegacyPassPass(Registry);
initializeLoopAccessLegacyAnalysisPass(Registry);
initializeLoopInstSimplifyLegacyPassPass(Registry);
- initializeLoopInterchangeLegacyPassPass(Registry);
- initializeLoopFlattenLegacyPassPass(Registry);
+ initializeLoopInterchangeLegacyPassPass(Registry);
+ initializeLoopFlattenLegacyPassPass(Registry);
initializeLoopPredicationLegacyPassPass(Registry);
initializeLoopRotateLegacyPassPass(Registry);
initializeLoopStrengthReducePass(Registry);
- initializeLoopRerollLegacyPassPass(Registry);
+ initializeLoopRerollLegacyPassPass(Registry);
initializeLoopUnrollPass(Registry);
initializeLoopUnrollAndJamPass(Registry);
initializeLoopUnswitchPass(Registry);
initializeWarnMissedTransformationsLegacyPass(Registry);
- initializeLoopVersioningLICMLegacyPassPass(Registry);
+ initializeLoopVersioningLICMLegacyPassPass(Registry);
initializeLoopIdiomRecognizeLegacyPassPass(Registry);
initializeLowerAtomicLegacyPassPass(Registry);
initializeLowerConstantIntrinsicsPass(Registry);
initializeLowerExpectIntrinsicPass(Registry);
initializeLowerGuardIntrinsicLegacyPassPass(Registry);
initializeLowerMatrixIntrinsicsLegacyPassPass(Registry);
- initializeLowerMatrixIntrinsicsMinimalLegacyPassPass(Registry);
+ initializeLowerMatrixIntrinsicsMinimalLegacyPassPass(Registry);
initializeLowerWidenableConditionLegacyPassPass(Registry);
initializeMemCpyOptLegacyPassPass(Registry);
initializeMergeICmpsLegacyPassPass(Registry);
@@ -93,26 +93,26 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializePartiallyInlineLibCallsLegacyPassPass(Registry);
initializeReassociateLegacyPassPass(Registry);
initializeRedundantDbgInstEliminationPass(Registry);
- initializeRegToMemLegacyPass(Registry);
+ initializeRegToMemLegacyPass(Registry);
initializeRewriteStatepointsForGCLegacyPassPass(Registry);
- initializeScalarizeMaskedMemIntrinLegacyPassPass(Registry);
+ initializeScalarizeMaskedMemIntrinLegacyPassPass(Registry);
initializeSCCPLegacyPassPass(Registry);
initializeSROALegacyPassPass(Registry);
initializeCFGSimplifyPassPass(Registry);
- initializeStructurizeCFGLegacyPassPass(Registry);
+ initializeStructurizeCFGLegacyPassPass(Registry);
initializeSimpleLoopUnswitchLegacyPassPass(Registry);
initializeSinkingLegacyPassPass(Registry);
initializeTailCallElimPass(Registry);
- initializeSeparateConstOffsetFromGEPLegacyPassPass(Registry);
+ initializeSeparateConstOffsetFromGEPLegacyPassPass(Registry);
initializeSpeculativeExecutionLegacyPassPass(Registry);
- initializeStraightLineStrengthReduceLegacyPassPass(Registry);
+ initializeStraightLineStrengthReduceLegacyPassPass(Registry);
initializePlaceBackedgeSafepointsImplPass(Registry);
initializePlaceSafepointsPass(Registry);
initializeFloat2IntLegacyPassPass(Registry);
initializeLoopDistributeLegacyPass(Registry);
initializeLoopLoadEliminationPass(Registry);
initializeLoopSimplifyCFGLegacyPassPass(Registry);
- initializeLoopVersioningLegacyPassPass(Registry);
+ initializeLoopVersioningLegacyPassPass(Registry);
initializeEntryExitInstrumenterPass(Registry);
initializePostInlineEntryExitInstrumenterPass(Registry);
}
@@ -142,7 +142,7 @@ void LLVMAddAlignmentFromAssumptionsPass(LLVMPassManagerRef PM) {
}
void LLVMAddCFGSimplificationPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createCFGSimplificationPass());
+ unwrap(PM)->add(createCFGSimplificationPass());
}
void LLVMAddDeadStoreEliminationPass(LLVMPassManagerRef PM) {
@@ -169,10 +169,10 @@ void LLVMAddIndVarSimplifyPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createIndVarSimplifyPass());
}
-void LLVMAddInstructionSimplifyPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createInstSimplifyLegacyPass());
-}
-
+void LLVMAddInstructionSimplifyPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createInstSimplifyLegacyPass());
+}
+
void LLVMAddJumpThreadingPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createJumpThreadingPass());
}
@@ -189,10 +189,10 @@ void LLVMAddLoopDeletionPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createLoopDeletionPass());
}
-void LLVMAddLoopFlattenPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createLoopFlattenPass());
-}
-
+void LLVMAddLoopFlattenPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createLoopFlattenPass());
+}
+
void LLVMAddLoopIdiomPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createLoopIdiomPass());
}
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp
index afa2d1bc79..c8da464a3b 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp
@@ -1,948 +1,948 @@
-//===- ScalarizeMaskedMemIntrin.cpp - Scalarize unsupported masked mem ----===//
-// instrinsics
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass replaces masked memory intrinsics - when unsupported by the target
-// - with a chain of basic blocks, that deal with the elements one-by-one if the
-// appropriate mask bit is set.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Scalar/ScalarizeMaskedMemIntrin.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/Constant.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/Type.h"
-#include "llvm/IR/Value.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Transforms/Scalar.h"
-#include <algorithm>
-#include <cassert>
-
-using namespace llvm;
-
-#define DEBUG_TYPE "scalarize-masked-mem-intrin"
-
-namespace {
-
-class ScalarizeMaskedMemIntrinLegacyPass : public FunctionPass {
-public:
- static char ID; // Pass identification, replacement for typeid
-
- explicit ScalarizeMaskedMemIntrinLegacyPass() : FunctionPass(ID) {
- initializeScalarizeMaskedMemIntrinLegacyPassPass(
- *PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override;
-
- StringRef getPassName() const override {
- return "Scalarize Masked Memory Intrinsics";
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<TargetTransformInfoWrapperPass>();
- }
-};
-
-} // end anonymous namespace
-
-static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT,
- const TargetTransformInfo &TTI, const DataLayout &DL);
-static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT,
- const TargetTransformInfo &TTI,
- const DataLayout &DL);
-
-char ScalarizeMaskedMemIntrinLegacyPass::ID = 0;
-
-INITIALIZE_PASS_BEGIN(ScalarizeMaskedMemIntrinLegacyPass, DEBUG_TYPE,
- "Scalarize unsupported masked memory intrinsics", false,
- false)
-INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_END(ScalarizeMaskedMemIntrinLegacyPass, DEBUG_TYPE,
- "Scalarize unsupported masked memory intrinsics", false,
- false)
-
-FunctionPass *llvm::createScalarizeMaskedMemIntrinLegacyPass() {
- return new ScalarizeMaskedMemIntrinLegacyPass();
-}
-
-static bool isConstantIntVector(Value *Mask) {
- Constant *C = dyn_cast<Constant>(Mask);
- if (!C)
- return false;
-
- unsigned NumElts = cast<FixedVectorType>(Mask->getType())->getNumElements();
- for (unsigned i = 0; i != NumElts; ++i) {
- Constant *CElt = C->getAggregateElement(i);
- if (!CElt || !isa<ConstantInt>(CElt))
- return false;
- }
-
- return true;
-}
-
-// Translate a masked load intrinsic like
-// <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align,
-// <16 x i1> %mask, <16 x i32> %passthru)
-// to a chain of basic blocks, with loading element one-by-one if
-// the appropriate mask bit is set
-//
-// %1 = bitcast i8* %addr to i32*
-// %2 = extractelement <16 x i1> %mask, i32 0
-// br i1 %2, label %cond.load, label %else
-//
-// cond.load: ; preds = %0
-// %3 = getelementptr i32* %1, i32 0
-// %4 = load i32* %3
-// %5 = insertelement <16 x i32> %passthru, i32 %4, i32 0
-// br label %else
-//
-// else: ; preds = %0, %cond.load
-// %res.phi.else = phi <16 x i32> [ %5, %cond.load ], [ undef, %0 ]
-// %6 = extractelement <16 x i1> %mask, i32 1
-// br i1 %6, label %cond.load1, label %else2
-//
-// cond.load1: ; preds = %else
-// %7 = getelementptr i32* %1, i32 1
-// %8 = load i32* %7
-// %9 = insertelement <16 x i32> %res.phi.else, i32 %8, i32 1
-// br label %else2
-//
-// else2: ; preds = %else, %cond.load1
-// %res.phi.else3 = phi <16 x i32> [ %9, %cond.load1 ], [ %res.phi.else, %else ]
-// %10 = extractelement <16 x i1> %mask, i32 2
-// br i1 %10, label %cond.load4, label %else5
-//
-static void scalarizeMaskedLoad(CallInst *CI, bool &ModifiedDT) {
- Value *Ptr = CI->getArgOperand(0);
- Value *Alignment = CI->getArgOperand(1);
- Value *Mask = CI->getArgOperand(2);
- Value *Src0 = CI->getArgOperand(3);
-
- const Align AlignVal = cast<ConstantInt>(Alignment)->getAlignValue();
- VectorType *VecType = cast<FixedVectorType>(CI->getType());
-
- Type *EltTy = VecType->getElementType();
-
- IRBuilder<> Builder(CI->getContext());
- Instruction *InsertPt = CI;
- BasicBlock *IfBlock = CI->getParent();
-
- Builder.SetInsertPoint(InsertPt);
- Builder.SetCurrentDebugLocation(CI->getDebugLoc());
-
- // Short-cut if the mask is all-true.
- if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) {
- Value *NewI = Builder.CreateAlignedLoad(VecType, Ptr, AlignVal);
- CI->replaceAllUsesWith(NewI);
- CI->eraseFromParent();
- return;
- }
-
- // Adjust alignment for the scalar instruction.
- const Align AdjustedAlignVal =
- commonAlignment(AlignVal, EltTy->getPrimitiveSizeInBits() / 8);
- // Bitcast %addr from i8* to EltTy*
- Type *NewPtrType =
- EltTy->getPointerTo(Ptr->getType()->getPointerAddressSpace());
- Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
- unsigned VectorWidth = cast<FixedVectorType>(VecType)->getNumElements();
-
- // The result vector
- Value *VResult = Src0;
-
- if (isConstantIntVector(Mask)) {
- for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
- if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
- continue;
- Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
- LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AdjustedAlignVal);
- VResult = Builder.CreateInsertElement(VResult, Load, Idx);
- }
- CI->replaceAllUsesWith(VResult);
- CI->eraseFromParent();
- return;
- }
-
- // If the mask is not v1i1, use scalar bit test operations. This generates
- // better results on X86 at least.
- Value *SclrMask;
- if (VectorWidth != 1) {
- Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
- SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
- }
-
- for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
- // Fill the "else" block, created in the previous iteration
- //
- // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
- // %mask_1 = and i16 %scalar_mask, i32 1 << Idx
- // %cond = icmp ne i16 %mask_1, 0
- // br i1 %mask_1, label %cond.load, label %else
- //
- Value *Predicate;
- if (VectorWidth != 1) {
- Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx));
- Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
- Builder.getIntN(VectorWidth, 0));
- } else {
- Predicate = Builder.CreateExtractElement(Mask, Idx);
- }
-
- // Create "cond" block
- //
- // %EltAddr = getelementptr i32* %1, i32 0
- // %Elt = load i32* %EltAddr
- // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
- //
- BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt->getIterator(),
- "cond.load");
- Builder.SetInsertPoint(InsertPt);
-
- Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
- LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AdjustedAlignVal);
- Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx);
-
- // Create "else" block, fill it in the next iteration
- BasicBlock *NewIfBlock =
- CondBlock->splitBasicBlock(InsertPt->getIterator(), "else");
- Builder.SetInsertPoint(InsertPt);
- Instruction *OldBr = IfBlock->getTerminator();
- BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
- OldBr->eraseFromParent();
- BasicBlock *PrevIfBlock = IfBlock;
- IfBlock = NewIfBlock;
-
- // Create the phi to join the new and previous value.
- PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
- Phi->addIncoming(NewVResult, CondBlock);
- Phi->addIncoming(VResult, PrevIfBlock);
- VResult = Phi;
- }
-
- CI->replaceAllUsesWith(VResult);
- CI->eraseFromParent();
-
- ModifiedDT = true;
-}
-
-// Translate a masked store intrinsic, like
-// void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, i32 align,
-// <16 x i1> %mask)
-// to a chain of basic blocks, that stores element one-by-one if
-// the appropriate mask bit is set
-//
-// %1 = bitcast i8* %addr to i32*
-// %2 = extractelement <16 x i1> %mask, i32 0
-// br i1 %2, label %cond.store, label %else
-//
-// cond.store: ; preds = %0
-// %3 = extractelement <16 x i32> %val, i32 0
-// %4 = getelementptr i32* %1, i32 0
-// store i32 %3, i32* %4
-// br label %else
-//
-// else: ; preds = %0, %cond.store
-// %5 = extractelement <16 x i1> %mask, i32 1
-// br i1 %5, label %cond.store1, label %else2
-//
-// cond.store1: ; preds = %else
-// %6 = extractelement <16 x i32> %val, i32 1
-// %7 = getelementptr i32* %1, i32 1
-// store i32 %6, i32* %7
-// br label %else2
-// . . .
-static void scalarizeMaskedStore(CallInst *CI, bool &ModifiedDT) {
- Value *Src = CI->getArgOperand(0);
- Value *Ptr = CI->getArgOperand(1);
- Value *Alignment = CI->getArgOperand(2);
- Value *Mask = CI->getArgOperand(3);
-
- const Align AlignVal = cast<ConstantInt>(Alignment)->getAlignValue();
- auto *VecType = cast<VectorType>(Src->getType());
-
- Type *EltTy = VecType->getElementType();
-
- IRBuilder<> Builder(CI->getContext());
- Instruction *InsertPt = CI;
- BasicBlock *IfBlock = CI->getParent();
- Builder.SetInsertPoint(InsertPt);
- Builder.SetCurrentDebugLocation(CI->getDebugLoc());
-
- // Short-cut if the mask is all-true.
- if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) {
- Builder.CreateAlignedStore(Src, Ptr, AlignVal);
- CI->eraseFromParent();
- return;
- }
-
- // Adjust alignment for the scalar instruction.
- const Align AdjustedAlignVal =
- commonAlignment(AlignVal, EltTy->getPrimitiveSizeInBits() / 8);
- // Bitcast %addr from i8* to EltTy*
- Type *NewPtrType =
- EltTy->getPointerTo(Ptr->getType()->getPointerAddressSpace());
- Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
- unsigned VectorWidth = cast<FixedVectorType>(VecType)->getNumElements();
-
- if (isConstantIntVector(Mask)) {
- for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
- if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
- continue;
- Value *OneElt = Builder.CreateExtractElement(Src, Idx);
- Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
- Builder.CreateAlignedStore(OneElt, Gep, AdjustedAlignVal);
- }
- CI->eraseFromParent();
- return;
- }
-
- // If the mask is not v1i1, use scalar bit test operations. This generates
- // better results on X86 at least.
- Value *SclrMask;
- if (VectorWidth != 1) {
- Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
- SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
- }
-
- for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
- // Fill the "else" block, created in the previous iteration
- //
- // %mask_1 = and i16 %scalar_mask, i32 1 << Idx
- // %cond = icmp ne i16 %mask_1, 0
- // br i1 %mask_1, label %cond.store, label %else
- //
- Value *Predicate;
- if (VectorWidth != 1) {
- Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx));
- Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
- Builder.getIntN(VectorWidth, 0));
- } else {
- Predicate = Builder.CreateExtractElement(Mask, Idx);
- }
-
- // Create "cond" block
- //
- // %OneElt = extractelement <16 x i32> %Src, i32 Idx
- // %EltAddr = getelementptr i32* %1, i32 0
- // %store i32 %OneElt, i32* %EltAddr
- //
- BasicBlock *CondBlock =
- IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.store");
- Builder.SetInsertPoint(InsertPt);
-
- Value *OneElt = Builder.CreateExtractElement(Src, Idx);
- Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
- Builder.CreateAlignedStore(OneElt, Gep, AdjustedAlignVal);
-
- // Create "else" block, fill it in the next iteration
- BasicBlock *NewIfBlock =
- CondBlock->splitBasicBlock(InsertPt->getIterator(), "else");
- Builder.SetInsertPoint(InsertPt);
- Instruction *OldBr = IfBlock->getTerminator();
- BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
- OldBr->eraseFromParent();
- IfBlock = NewIfBlock;
- }
- CI->eraseFromParent();
-
- ModifiedDT = true;
-}
-
-// Translate a masked gather intrinsic like
-// <16 x i32 > @llvm.masked.gather.v16i32( <16 x i32*> %Ptrs, i32 4,
-// <16 x i1> %Mask, <16 x i32> %Src)
-// to a chain of basic blocks, with loading element one-by-one if
-// the appropriate mask bit is set
-//
-// %Ptrs = getelementptr i32, i32* %base, <16 x i64> %ind
-// %Mask0 = extractelement <16 x i1> %Mask, i32 0
-// br i1 %Mask0, label %cond.load, label %else
-//
-// cond.load:
-// %Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
-// %Load0 = load i32, i32* %Ptr0, align 4
-// %Res0 = insertelement <16 x i32> undef, i32 %Load0, i32 0
-// br label %else
-//
-// else:
-// %res.phi.else = phi <16 x i32>[%Res0, %cond.load], [undef, %0]
-// %Mask1 = extractelement <16 x i1> %Mask, i32 1
-// br i1 %Mask1, label %cond.load1, label %else2
-//
-// cond.load1:
-// %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
-// %Load1 = load i32, i32* %Ptr1, align 4
-// %Res1 = insertelement <16 x i32> %res.phi.else, i32 %Load1, i32 1
-// br label %else2
-// . . .
-// %Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src
-// ret <16 x i32> %Result
-static void scalarizeMaskedGather(CallInst *CI, bool &ModifiedDT) {
- Value *Ptrs = CI->getArgOperand(0);
- Value *Alignment = CI->getArgOperand(1);
- Value *Mask = CI->getArgOperand(2);
- Value *Src0 = CI->getArgOperand(3);
-
- auto *VecType = cast<FixedVectorType>(CI->getType());
- Type *EltTy = VecType->getElementType();
-
- IRBuilder<> Builder(CI->getContext());
- Instruction *InsertPt = CI;
- BasicBlock *IfBlock = CI->getParent();
- Builder.SetInsertPoint(InsertPt);
- MaybeAlign AlignVal = cast<ConstantInt>(Alignment)->getMaybeAlignValue();
-
- Builder.SetCurrentDebugLocation(CI->getDebugLoc());
-
- // The result vector
- Value *VResult = Src0;
- unsigned VectorWidth = VecType->getNumElements();
-
- // Shorten the way if the mask is a vector of constants.
- if (isConstantIntVector(Mask)) {
- for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
- if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
- continue;
- Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
- LoadInst *Load =
- Builder.CreateAlignedLoad(EltTy, Ptr, AlignVal, "Load" + Twine(Idx));
- VResult =
- Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx));
- }
- CI->replaceAllUsesWith(VResult);
- CI->eraseFromParent();
- return;
- }
-
- // If the mask is not v1i1, use scalar bit test operations. This generates
- // better results on X86 at least.
- Value *SclrMask;
- if (VectorWidth != 1) {
- Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
- SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
- }
-
- for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
- // Fill the "else" block, created in the previous iteration
- //
- // %Mask1 = and i16 %scalar_mask, i32 1 << Idx
- // %cond = icmp ne i16 %mask_1, 0
- // br i1 %Mask1, label %cond.load, label %else
- //
-
- Value *Predicate;
- if (VectorWidth != 1) {
- Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx));
- Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
- Builder.getIntN(VectorWidth, 0));
- } else {
- Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
- }
-
- // Create "cond" block
- //
- // %EltAddr = getelementptr i32* %1, i32 0
- // %Elt = load i32* %EltAddr
- // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
- //
- BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load");
- Builder.SetInsertPoint(InsertPt);
-
- Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
- LoadInst *Load =
- Builder.CreateAlignedLoad(EltTy, Ptr, AlignVal, "Load" + Twine(Idx));
- Value *NewVResult =
- Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx));
-
- // Create "else" block, fill it in the next iteration
- BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
- Builder.SetInsertPoint(InsertPt);
- Instruction *OldBr = IfBlock->getTerminator();
- BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
- OldBr->eraseFromParent();
- BasicBlock *PrevIfBlock = IfBlock;
- IfBlock = NewIfBlock;
-
- PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
- Phi->addIncoming(NewVResult, CondBlock);
- Phi->addIncoming(VResult, PrevIfBlock);
- VResult = Phi;
- }
-
- CI->replaceAllUsesWith(VResult);
- CI->eraseFromParent();
-
- ModifiedDT = true;
-}
-
-// Translate a masked scatter intrinsic, like
-// void @llvm.masked.scatter.v16i32(<16 x i32> %Src, <16 x i32*>* %Ptrs, i32 4,
-// <16 x i1> %Mask)
-// to a chain of basic blocks, that stores element one-by-one if
-// the appropriate mask bit is set.
-//
-// %Ptrs = getelementptr i32, i32* %ptr, <16 x i64> %ind
-// %Mask0 = extractelement <16 x i1> %Mask, i32 0
-// br i1 %Mask0, label %cond.store, label %else
-//
-// cond.store:
-// %Elt0 = extractelement <16 x i32> %Src, i32 0
-// %Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
-// store i32 %Elt0, i32* %Ptr0, align 4
-// br label %else
-//
-// else:
-// %Mask1 = extractelement <16 x i1> %Mask, i32 1
-// br i1 %Mask1, label %cond.store1, label %else2
-//
-// cond.store1:
-// %Elt1 = extractelement <16 x i32> %Src, i32 1
-// %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
-// store i32 %Elt1, i32* %Ptr1, align 4
-// br label %else2
-// . . .
-static void scalarizeMaskedScatter(CallInst *CI, bool &ModifiedDT) {
- Value *Src = CI->getArgOperand(0);
- Value *Ptrs = CI->getArgOperand(1);
- Value *Alignment = CI->getArgOperand(2);
- Value *Mask = CI->getArgOperand(3);
-
- auto *SrcFVTy = cast<FixedVectorType>(Src->getType());
-
- assert(
- isa<VectorType>(Ptrs->getType()) &&
- isa<PointerType>(cast<VectorType>(Ptrs->getType())->getElementType()) &&
- "Vector of pointers is expected in masked scatter intrinsic");
-
- IRBuilder<> Builder(CI->getContext());
- Instruction *InsertPt = CI;
- BasicBlock *IfBlock = CI->getParent();
- Builder.SetInsertPoint(InsertPt);
- Builder.SetCurrentDebugLocation(CI->getDebugLoc());
-
- MaybeAlign AlignVal = cast<ConstantInt>(Alignment)->getMaybeAlignValue();
- unsigned VectorWidth = SrcFVTy->getNumElements();
-
- // Shorten the way if the mask is a vector of constants.
- if (isConstantIntVector(Mask)) {
- for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
- if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
- continue;
- Value *OneElt =
- Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx));
- Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
- Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
- }
- CI->eraseFromParent();
- return;
- }
-
- // If the mask is not v1i1, use scalar bit test operations. This generates
- // better results on X86 at least.
- Value *SclrMask;
- if (VectorWidth != 1) {
- Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
- SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
- }
-
- for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
- // Fill the "else" block, created in the previous iteration
- //
- // %Mask1 = and i16 %scalar_mask, i32 1 << Idx
- // %cond = icmp ne i16 %mask_1, 0
- // br i1 %Mask1, label %cond.store, label %else
- //
- Value *Predicate;
- if (VectorWidth != 1) {
- Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx));
- Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
- Builder.getIntN(VectorWidth, 0));
- } else {
- Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
- }
-
- // Create "cond" block
- //
- // %Elt1 = extractelement <16 x i32> %Src, i32 1
- // %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
- // %store i32 %Elt1, i32* %Ptr1
- //
- BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store");
- Builder.SetInsertPoint(InsertPt);
-
- Value *OneElt = Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx));
- Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
- Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
-
- // Create "else" block, fill it in the next iteration
- BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
- Builder.SetInsertPoint(InsertPt);
- Instruction *OldBr = IfBlock->getTerminator();
- BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
- OldBr->eraseFromParent();
- IfBlock = NewIfBlock;
- }
- CI->eraseFromParent();
-
- ModifiedDT = true;
-}
-
-static void scalarizeMaskedExpandLoad(CallInst *CI, bool &ModifiedDT) {
- Value *Ptr = CI->getArgOperand(0);
- Value *Mask = CI->getArgOperand(1);
- Value *PassThru = CI->getArgOperand(2);
-
- auto *VecType = cast<FixedVectorType>(CI->getType());
-
- Type *EltTy = VecType->getElementType();
-
- IRBuilder<> Builder(CI->getContext());
- Instruction *InsertPt = CI;
- BasicBlock *IfBlock = CI->getParent();
-
- Builder.SetInsertPoint(InsertPt);
- Builder.SetCurrentDebugLocation(CI->getDebugLoc());
-
- unsigned VectorWidth = VecType->getNumElements();
-
- // The result vector
- Value *VResult = PassThru;
-
- // Shorten the way if the mask is a vector of constants.
- // Create a build_vector pattern, with loads/undefs as necessary and then
- // shuffle blend with the pass through value.
- if (isConstantIntVector(Mask)) {
- unsigned MemIndex = 0;
- VResult = UndefValue::get(VecType);
- SmallVector<int, 16> ShuffleMask(VectorWidth, UndefMaskElem);
- for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
- Value *InsertElt;
- if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) {
- InsertElt = UndefValue::get(EltTy);
- ShuffleMask[Idx] = Idx + VectorWidth;
- } else {
- Value *NewPtr =
- Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex);
- InsertElt = Builder.CreateAlignedLoad(EltTy, NewPtr, Align(1),
- "Load" + Twine(Idx));
- ShuffleMask[Idx] = Idx;
- ++MemIndex;
- }
- VResult = Builder.CreateInsertElement(VResult, InsertElt, Idx,
- "Res" + Twine(Idx));
- }
- VResult = Builder.CreateShuffleVector(VResult, PassThru, ShuffleMask);
- CI->replaceAllUsesWith(VResult);
- CI->eraseFromParent();
- return;
- }
-
- // If the mask is not v1i1, use scalar bit test operations. This generates
- // better results on X86 at least.
- Value *SclrMask;
- if (VectorWidth != 1) {
- Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
- SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
- }
-
- for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
- // Fill the "else" block, created in the previous iteration
- //
- // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
- // %mask_1 = extractelement <16 x i1> %mask, i32 Idx
- // br i1 %mask_1, label %cond.load, label %else
- //
-
- Value *Predicate;
- if (VectorWidth != 1) {
- Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx));
- Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
- Builder.getIntN(VectorWidth, 0));
- } else {
- Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
- }
-
- // Create "cond" block
- //
- // %EltAddr = getelementptr i32* %1, i32 0
- // %Elt = load i32* %EltAddr
- // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
- //
- BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt->getIterator(),
- "cond.load");
- Builder.SetInsertPoint(InsertPt);
-
- LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Ptr, Align(1));
- Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx);
-
- // Move the pointer if there are more blocks to come.
- Value *NewPtr;
- if ((Idx + 1) != VectorWidth)
- NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, 1);
-
- // Create "else" block, fill it in the next iteration
- BasicBlock *NewIfBlock =
- CondBlock->splitBasicBlock(InsertPt->getIterator(), "else");
- Builder.SetInsertPoint(InsertPt);
- Instruction *OldBr = IfBlock->getTerminator();
- BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
- OldBr->eraseFromParent();
- BasicBlock *PrevIfBlock = IfBlock;
- IfBlock = NewIfBlock;
-
- // Create the phi to join the new and previous value.
- PHINode *ResultPhi = Builder.CreatePHI(VecType, 2, "res.phi.else");
- ResultPhi->addIncoming(NewVResult, CondBlock);
- ResultPhi->addIncoming(VResult, PrevIfBlock);
- VResult = ResultPhi;
-
- // Add a PHI for the pointer if this isn't the last iteration.
- if ((Idx + 1) != VectorWidth) {
- PHINode *PtrPhi = Builder.CreatePHI(Ptr->getType(), 2, "ptr.phi.else");
- PtrPhi->addIncoming(NewPtr, CondBlock);
- PtrPhi->addIncoming(Ptr, PrevIfBlock);
- Ptr = PtrPhi;
- }
- }
-
- CI->replaceAllUsesWith(VResult);
- CI->eraseFromParent();
-
- ModifiedDT = true;
-}
-
-static void scalarizeMaskedCompressStore(CallInst *CI, bool &ModifiedDT) {
- Value *Src = CI->getArgOperand(0);
- Value *Ptr = CI->getArgOperand(1);
- Value *Mask = CI->getArgOperand(2);
-
- auto *VecType = cast<FixedVectorType>(Src->getType());
-
- IRBuilder<> Builder(CI->getContext());
- Instruction *InsertPt = CI;
- BasicBlock *IfBlock = CI->getParent();
-
- Builder.SetInsertPoint(InsertPt);
- Builder.SetCurrentDebugLocation(CI->getDebugLoc());
-
- Type *EltTy = VecType->getElementType();
-
- unsigned VectorWidth = VecType->getNumElements();
-
- // Shorten the way if the mask is a vector of constants.
- if (isConstantIntVector(Mask)) {
- unsigned MemIndex = 0;
- for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
- if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
- continue;
- Value *OneElt =
- Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx));
- Value *NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex);
- Builder.CreateAlignedStore(OneElt, NewPtr, Align(1));
- ++MemIndex;
- }
- CI->eraseFromParent();
- return;
- }
-
- // If the mask is not v1i1, use scalar bit test operations. This generates
- // better results on X86 at least.
- Value *SclrMask;
- if (VectorWidth != 1) {
- Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
- SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
- }
-
- for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
- // Fill the "else" block, created in the previous iteration
- //
- // %mask_1 = extractelement <16 x i1> %mask, i32 Idx
- // br i1 %mask_1, label %cond.store, label %else
- //
- Value *Predicate;
- if (VectorWidth != 1) {
- Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx));
- Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
- Builder.getIntN(VectorWidth, 0));
- } else {
- Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
- }
-
- // Create "cond" block
- //
- // %OneElt = extractelement <16 x i32> %Src, i32 Idx
- // %EltAddr = getelementptr i32* %1, i32 0
- // %store i32 %OneElt, i32* %EltAddr
- //
- BasicBlock *CondBlock =
- IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.store");
- Builder.SetInsertPoint(InsertPt);
-
- Value *OneElt = Builder.CreateExtractElement(Src, Idx);
- Builder.CreateAlignedStore(OneElt, Ptr, Align(1));
-
- // Move the pointer if there are more blocks to come.
- Value *NewPtr;
- if ((Idx + 1) != VectorWidth)
- NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, 1);
-
- // Create "else" block, fill it in the next iteration
- BasicBlock *NewIfBlock =
- CondBlock->splitBasicBlock(InsertPt->getIterator(), "else");
- Builder.SetInsertPoint(InsertPt);
- Instruction *OldBr = IfBlock->getTerminator();
- BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
- OldBr->eraseFromParent();
- BasicBlock *PrevIfBlock = IfBlock;
- IfBlock = NewIfBlock;
-
- // Add a PHI for the pointer if this isn't the last iteration.
- if ((Idx + 1) != VectorWidth) {
- PHINode *PtrPhi = Builder.CreatePHI(Ptr->getType(), 2, "ptr.phi.else");
- PtrPhi->addIncoming(NewPtr, CondBlock);
- PtrPhi->addIncoming(Ptr, PrevIfBlock);
- Ptr = PtrPhi;
- }
- }
- CI->eraseFromParent();
-
- ModifiedDT = true;
-}
-
-static bool runImpl(Function &F, const TargetTransformInfo &TTI) {
- bool EverMadeChange = false;
- bool MadeChange = true;
- auto &DL = F.getParent()->getDataLayout();
- while (MadeChange) {
- MadeChange = false;
- for (Function::iterator I = F.begin(); I != F.end();) {
- BasicBlock *BB = &*I++;
- bool ModifiedDTOnIteration = false;
- MadeChange |= optimizeBlock(*BB, ModifiedDTOnIteration, TTI, DL);
-
- // Restart BB iteration if the dominator tree of the Function was changed
- if (ModifiedDTOnIteration)
- break;
- }
-
- EverMadeChange |= MadeChange;
- }
- return EverMadeChange;
-}
-
-bool ScalarizeMaskedMemIntrinLegacyPass::runOnFunction(Function &F) {
- auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- return runImpl(F, TTI);
-}
-
-PreservedAnalyses
-ScalarizeMaskedMemIntrinPass::run(Function &F, FunctionAnalysisManager &AM) {
- auto &TTI = AM.getResult<TargetIRAnalysis>(F);
- if (!runImpl(F, TTI))
- return PreservedAnalyses::all();
- PreservedAnalyses PA;
- PA.preserve<TargetIRAnalysis>();
- return PA;
-}
-
-static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT,
- const TargetTransformInfo &TTI,
- const DataLayout &DL) {
- bool MadeChange = false;
-
- BasicBlock::iterator CurInstIterator = BB.begin();
- while (CurInstIterator != BB.end()) {
- if (CallInst *CI = dyn_cast<CallInst>(&*CurInstIterator++))
- MadeChange |= optimizeCallInst(CI, ModifiedDT, TTI, DL);
- if (ModifiedDT)
- return true;
- }
-
- return MadeChange;
-}
-
-static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT,
- const TargetTransformInfo &TTI,
- const DataLayout &DL) {
- IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
- if (II) {
- // The scalarization code below does not work for scalable vectors.
- if (isa<ScalableVectorType>(II->getType()) ||
- any_of(II->arg_operands(),
- [](Value *V) { return isa<ScalableVectorType>(V->getType()); }))
- return false;
-
- switch (II->getIntrinsicID()) {
- default:
- break;
- case Intrinsic::masked_load:
- // Scalarize unsupported vector masked load
- if (TTI.isLegalMaskedLoad(
- CI->getType(),
- cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue()))
- return false;
- scalarizeMaskedLoad(CI, ModifiedDT);
- return true;
- case Intrinsic::masked_store:
- if (TTI.isLegalMaskedStore(
- CI->getArgOperand(0)->getType(),
- cast<ConstantInt>(CI->getArgOperand(2))->getAlignValue()))
- return false;
- scalarizeMaskedStore(CI, ModifiedDT);
- return true;
- case Intrinsic::masked_gather: {
- unsigned AlignmentInt =
- cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
- Type *LoadTy = CI->getType();
- Align Alignment =
- DL.getValueOrABITypeAlignment(MaybeAlign(AlignmentInt), LoadTy);
- if (TTI.isLegalMaskedGather(LoadTy, Alignment))
- return false;
- scalarizeMaskedGather(CI, ModifiedDT);
- return true;
- }
- case Intrinsic::masked_scatter: {
- unsigned AlignmentInt =
- cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
- Type *StoreTy = CI->getArgOperand(0)->getType();
- Align Alignment =
- DL.getValueOrABITypeAlignment(MaybeAlign(AlignmentInt), StoreTy);
- if (TTI.isLegalMaskedScatter(StoreTy, Alignment))
- return false;
- scalarizeMaskedScatter(CI, ModifiedDT);
- return true;
- }
- case Intrinsic::masked_expandload:
- if (TTI.isLegalMaskedExpandLoad(CI->getType()))
- return false;
- scalarizeMaskedExpandLoad(CI, ModifiedDT);
- return true;
- case Intrinsic::masked_compressstore:
- if (TTI.isLegalMaskedCompressStore(CI->getArgOperand(0)->getType()))
- return false;
- scalarizeMaskedCompressStore(CI, ModifiedDT);
- return true;
- }
- }
-
- return false;
-}
+//===- ScalarizeMaskedMemIntrin.cpp - Scalarize unsupported masked mem ----===//
+// instrinsics
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass replaces masked memory intrinsics - when unsupported by the target
+// - with a chain of basic blocks, that deal with the elements one-by-one if the
+// appropriate mask bit is set.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar/ScalarizeMaskedMemIntrin.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Transforms/Scalar.h"
+#include <algorithm>
+#include <cassert>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "scalarize-masked-mem-intrin"
+
+namespace {
+
+class ScalarizeMaskedMemIntrinLegacyPass : public FunctionPass {
+public:
+ static char ID; // Pass identification, replacement for typeid
+
+ explicit ScalarizeMaskedMemIntrinLegacyPass() : FunctionPass(ID) {
+ initializeScalarizeMaskedMemIntrinLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override;
+
+ StringRef getPassName() const override {
+ return "Scalarize Masked Memory Intrinsics";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ }
+};
+
+} // end anonymous namespace
+
+static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT,
+ const TargetTransformInfo &TTI, const DataLayout &DL);
+static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT,
+ const TargetTransformInfo &TTI,
+ const DataLayout &DL);
+
+char ScalarizeMaskedMemIntrinLegacyPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(ScalarizeMaskedMemIntrinLegacyPass, DEBUG_TYPE,
+ "Scalarize unsupported masked memory intrinsics", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_END(ScalarizeMaskedMemIntrinLegacyPass, DEBUG_TYPE,
+ "Scalarize unsupported masked memory intrinsics", false,
+ false)
+
+FunctionPass *llvm::createScalarizeMaskedMemIntrinLegacyPass() {
+ return new ScalarizeMaskedMemIntrinLegacyPass();
+}
+
+static bool isConstantIntVector(Value *Mask) {
+ Constant *C = dyn_cast<Constant>(Mask);
+ if (!C)
+ return false;
+
+ unsigned NumElts = cast<FixedVectorType>(Mask->getType())->getNumElements();
+ for (unsigned i = 0; i != NumElts; ++i) {
+ Constant *CElt = C->getAggregateElement(i);
+ if (!CElt || !isa<ConstantInt>(CElt))
+ return false;
+ }
+
+ return true;
+}
+
+// Translate a masked load intrinsic like
+// <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align,
+// <16 x i1> %mask, <16 x i32> %passthru)
+// to a chain of basic blocks, with loading element one-by-one if
+// the appropriate mask bit is set
+//
+// %1 = bitcast i8* %addr to i32*
+// %2 = extractelement <16 x i1> %mask, i32 0
+// br i1 %2, label %cond.load, label %else
+//
+// cond.load: ; preds = %0
+// %3 = getelementptr i32* %1, i32 0
+// %4 = load i32* %3
+// %5 = insertelement <16 x i32> %passthru, i32 %4, i32 0
+// br label %else
+//
+// else: ; preds = %0, %cond.load
+// %res.phi.else = phi <16 x i32> [ %5, %cond.load ], [ undef, %0 ]
+// %6 = extractelement <16 x i1> %mask, i32 1
+// br i1 %6, label %cond.load1, label %else2
+//
+// cond.load1: ; preds = %else
+// %7 = getelementptr i32* %1, i32 1
+// %8 = load i32* %7
+// %9 = insertelement <16 x i32> %res.phi.else, i32 %8, i32 1
+// br label %else2
+//
+// else2: ; preds = %else, %cond.load1
+// %res.phi.else3 = phi <16 x i32> [ %9, %cond.load1 ], [ %res.phi.else, %else ]
+// %10 = extractelement <16 x i1> %mask, i32 2
+// br i1 %10, label %cond.load4, label %else5
+//
+static void scalarizeMaskedLoad(CallInst *CI, bool &ModifiedDT) {
+ Value *Ptr = CI->getArgOperand(0);
+ Value *Alignment = CI->getArgOperand(1);
+ Value *Mask = CI->getArgOperand(2);
+ Value *Src0 = CI->getArgOperand(3);
+
+ const Align AlignVal = cast<ConstantInt>(Alignment)->getAlignValue();
+ VectorType *VecType = cast<FixedVectorType>(CI->getType());
+
+ Type *EltTy = VecType->getElementType();
+
+ IRBuilder<> Builder(CI->getContext());
+ Instruction *InsertPt = CI;
+ BasicBlock *IfBlock = CI->getParent();
+
+ Builder.SetInsertPoint(InsertPt);
+ Builder.SetCurrentDebugLocation(CI->getDebugLoc());
+
+ // Short-cut if the mask is all-true.
+ if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) {
+ Value *NewI = Builder.CreateAlignedLoad(VecType, Ptr, AlignVal);
+ CI->replaceAllUsesWith(NewI);
+ CI->eraseFromParent();
+ return;
+ }
+
+ // Adjust alignment for the scalar instruction.
+ const Align AdjustedAlignVal =
+ commonAlignment(AlignVal, EltTy->getPrimitiveSizeInBits() / 8);
+ // Bitcast %addr from i8* to EltTy*
+ Type *NewPtrType =
+ EltTy->getPointerTo(Ptr->getType()->getPointerAddressSpace());
+ Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
+ unsigned VectorWidth = cast<FixedVectorType>(VecType)->getNumElements();
+
+ // The result vector
+ Value *VResult = Src0;
+
+ if (isConstantIntVector(Mask)) {
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+ if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
+ continue;
+ Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
+ LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AdjustedAlignVal);
+ VResult = Builder.CreateInsertElement(VResult, Load, Idx);
+ }
+ CI->replaceAllUsesWith(VResult);
+ CI->eraseFromParent();
+ return;
+ }
+
+ // If the mask is not v1i1, use scalar bit test operations. This generates
+ // better results on X86 at least.
+ Value *SclrMask;
+ if (VectorWidth != 1) {
+ Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
+ SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
+ }
+
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+ // Fill the "else" block, created in the previous iteration
+ //
+ // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
+ // %mask_1 = and i16 %scalar_mask, i32 1 << Idx
+ // %cond = icmp ne i16 %mask_1, 0
+ // br i1 %mask_1, label %cond.load, label %else
+ //
+ Value *Predicate;
+ if (VectorWidth != 1) {
+ Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx));
+ Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
+ Builder.getIntN(VectorWidth, 0));
+ } else {
+ Predicate = Builder.CreateExtractElement(Mask, Idx);
+ }
+
+ // Create "cond" block
+ //
+ // %EltAddr = getelementptr i32* %1, i32 0
+ // %Elt = load i32* %EltAddr
+ // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
+ //
+ BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt->getIterator(),
+ "cond.load");
+ Builder.SetInsertPoint(InsertPt);
+
+ Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
+ LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AdjustedAlignVal);
+ Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx);
+
+ // Create "else" block, fill it in the next iteration
+ BasicBlock *NewIfBlock =
+ CondBlock->splitBasicBlock(InsertPt->getIterator(), "else");
+ Builder.SetInsertPoint(InsertPt);
+ Instruction *OldBr = IfBlock->getTerminator();
+ BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
+ OldBr->eraseFromParent();
+ BasicBlock *PrevIfBlock = IfBlock;
+ IfBlock = NewIfBlock;
+
+ // Create the phi to join the new and previous value.
+ PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
+ Phi->addIncoming(NewVResult, CondBlock);
+ Phi->addIncoming(VResult, PrevIfBlock);
+ VResult = Phi;
+ }
+
+ CI->replaceAllUsesWith(VResult);
+ CI->eraseFromParent();
+
+ ModifiedDT = true;
+}
+
+// Translate a masked store intrinsic, like
+// void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, i32 align,
+// <16 x i1> %mask)
+// to a chain of basic blocks, that stores element one-by-one if
+// the appropriate mask bit is set
+//
+// %1 = bitcast i8* %addr to i32*
+// %2 = extractelement <16 x i1> %mask, i32 0
+// br i1 %2, label %cond.store, label %else
+//
+// cond.store: ; preds = %0
+// %3 = extractelement <16 x i32> %val, i32 0
+// %4 = getelementptr i32* %1, i32 0
+// store i32 %3, i32* %4
+// br label %else
+//
+// else: ; preds = %0, %cond.store
+// %5 = extractelement <16 x i1> %mask, i32 1
+// br i1 %5, label %cond.store1, label %else2
+//
+// cond.store1: ; preds = %else
+// %6 = extractelement <16 x i32> %val, i32 1
+// %7 = getelementptr i32* %1, i32 1
+// store i32 %6, i32* %7
+// br label %else2
+// . . .
+static void scalarizeMaskedStore(CallInst *CI, bool &ModifiedDT) {
+ Value *Src = CI->getArgOperand(0);
+ Value *Ptr = CI->getArgOperand(1);
+ Value *Alignment = CI->getArgOperand(2);
+ Value *Mask = CI->getArgOperand(3);
+
+ const Align AlignVal = cast<ConstantInt>(Alignment)->getAlignValue();
+ auto *VecType = cast<VectorType>(Src->getType());
+
+ Type *EltTy = VecType->getElementType();
+
+ IRBuilder<> Builder(CI->getContext());
+ Instruction *InsertPt = CI;
+ BasicBlock *IfBlock = CI->getParent();
+ Builder.SetInsertPoint(InsertPt);
+ Builder.SetCurrentDebugLocation(CI->getDebugLoc());
+
+ // Short-cut if the mask is all-true.
+ if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) {
+ Builder.CreateAlignedStore(Src, Ptr, AlignVal);
+ CI->eraseFromParent();
+ return;
+ }
+
+ // Adjust alignment for the scalar instruction.
+ const Align AdjustedAlignVal =
+ commonAlignment(AlignVal, EltTy->getPrimitiveSizeInBits() / 8);
+ // Bitcast %addr from i8* to EltTy*
+ Type *NewPtrType =
+ EltTy->getPointerTo(Ptr->getType()->getPointerAddressSpace());
+ Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
+ unsigned VectorWidth = cast<FixedVectorType>(VecType)->getNumElements();
+
+ if (isConstantIntVector(Mask)) {
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+ if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
+ continue;
+ Value *OneElt = Builder.CreateExtractElement(Src, Idx);
+ Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
+ Builder.CreateAlignedStore(OneElt, Gep, AdjustedAlignVal);
+ }
+ CI->eraseFromParent();
+ return;
+ }
+
+ // If the mask is not v1i1, use scalar bit test operations. This generates
+ // better results on X86 at least.
+ Value *SclrMask;
+ if (VectorWidth != 1) {
+ Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
+ SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
+ }
+
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+ // Fill the "else" block, created in the previous iteration
+ //
+ // %mask_1 = and i16 %scalar_mask, i32 1 << Idx
+ // %cond = icmp ne i16 %mask_1, 0
+ // br i1 %mask_1, label %cond.store, label %else
+ //
+ Value *Predicate;
+ if (VectorWidth != 1) {
+ Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx));
+ Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
+ Builder.getIntN(VectorWidth, 0));
+ } else {
+ Predicate = Builder.CreateExtractElement(Mask, Idx);
+ }
+
+ // Create "cond" block
+ //
+ // %OneElt = extractelement <16 x i32> %Src, i32 Idx
+ // %EltAddr = getelementptr i32* %1, i32 0
+ // %store i32 %OneElt, i32* %EltAddr
+ //
+ BasicBlock *CondBlock =
+ IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.store");
+ Builder.SetInsertPoint(InsertPt);
+
+ Value *OneElt = Builder.CreateExtractElement(Src, Idx);
+ Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
+ Builder.CreateAlignedStore(OneElt, Gep, AdjustedAlignVal);
+
+ // Create "else" block, fill it in the next iteration
+ BasicBlock *NewIfBlock =
+ CondBlock->splitBasicBlock(InsertPt->getIterator(), "else");
+ Builder.SetInsertPoint(InsertPt);
+ Instruction *OldBr = IfBlock->getTerminator();
+ BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
+ OldBr->eraseFromParent();
+ IfBlock = NewIfBlock;
+ }
+ CI->eraseFromParent();
+
+ ModifiedDT = true;
+}
+
+// Translate a masked gather intrinsic like
+// <16 x i32 > @llvm.masked.gather.v16i32( <16 x i32*> %Ptrs, i32 4,
+// <16 x i1> %Mask, <16 x i32> %Src)
+// to a chain of basic blocks, with loading element one-by-one if
+// the appropriate mask bit is set
+//
+// %Ptrs = getelementptr i32, i32* %base, <16 x i64> %ind
+// %Mask0 = extractelement <16 x i1> %Mask, i32 0
+// br i1 %Mask0, label %cond.load, label %else
+//
+// cond.load:
+// %Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
+// %Load0 = load i32, i32* %Ptr0, align 4
+// %Res0 = insertelement <16 x i32> undef, i32 %Load0, i32 0
+// br label %else
+//
+// else:
+// %res.phi.else = phi <16 x i32>[%Res0, %cond.load], [undef, %0]
+// %Mask1 = extractelement <16 x i1> %Mask, i32 1
+// br i1 %Mask1, label %cond.load1, label %else2
+//
+// cond.load1:
+// %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
+// %Load1 = load i32, i32* %Ptr1, align 4
+// %Res1 = insertelement <16 x i32> %res.phi.else, i32 %Load1, i32 1
+// br label %else2
+// . . .
+// %Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src
+// ret <16 x i32> %Result
+static void scalarizeMaskedGather(CallInst *CI, bool &ModifiedDT) {
+ Value *Ptrs = CI->getArgOperand(0);
+ Value *Alignment = CI->getArgOperand(1);
+ Value *Mask = CI->getArgOperand(2);
+ Value *Src0 = CI->getArgOperand(3);
+
+ auto *VecType = cast<FixedVectorType>(CI->getType());
+ Type *EltTy = VecType->getElementType();
+
+ IRBuilder<> Builder(CI->getContext());
+ Instruction *InsertPt = CI;
+ BasicBlock *IfBlock = CI->getParent();
+ Builder.SetInsertPoint(InsertPt);
+ MaybeAlign AlignVal = cast<ConstantInt>(Alignment)->getMaybeAlignValue();
+
+ Builder.SetCurrentDebugLocation(CI->getDebugLoc());
+
+ // The result vector
+ Value *VResult = Src0;
+ unsigned VectorWidth = VecType->getNumElements();
+
+ // Shorten the way if the mask is a vector of constants.
+ if (isConstantIntVector(Mask)) {
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+ if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
+ continue;
+ Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
+ LoadInst *Load =
+ Builder.CreateAlignedLoad(EltTy, Ptr, AlignVal, "Load" + Twine(Idx));
+ VResult =
+ Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx));
+ }
+ CI->replaceAllUsesWith(VResult);
+ CI->eraseFromParent();
+ return;
+ }
+
+ // If the mask is not v1i1, use scalar bit test operations. This generates
+ // better results on X86 at least.
+ Value *SclrMask;
+ if (VectorWidth != 1) {
+ Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
+ SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
+ }
+
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+ // Fill the "else" block, created in the previous iteration
+ //
+ // %Mask1 = and i16 %scalar_mask, i32 1 << Idx
+ // %cond = icmp ne i16 %mask_1, 0
+ // br i1 %Mask1, label %cond.load, label %else
+ //
+
+ Value *Predicate;
+ if (VectorWidth != 1) {
+ Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx));
+ Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
+ Builder.getIntN(VectorWidth, 0));
+ } else {
+ Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
+ }
+
+ // Create "cond" block
+ //
+ // %EltAddr = getelementptr i32* %1, i32 0
+ // %Elt = load i32* %EltAddr
+ // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
+ //
+ BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load");
+ Builder.SetInsertPoint(InsertPt);
+
+ Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
+ LoadInst *Load =
+ Builder.CreateAlignedLoad(EltTy, Ptr, AlignVal, "Load" + Twine(Idx));
+ Value *NewVResult =
+ Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx));
+
+ // Create "else" block, fill it in the next iteration
+ BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
+ Builder.SetInsertPoint(InsertPt);
+ Instruction *OldBr = IfBlock->getTerminator();
+ BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
+ OldBr->eraseFromParent();
+ BasicBlock *PrevIfBlock = IfBlock;
+ IfBlock = NewIfBlock;
+
+ PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
+ Phi->addIncoming(NewVResult, CondBlock);
+ Phi->addIncoming(VResult, PrevIfBlock);
+ VResult = Phi;
+ }
+
+ CI->replaceAllUsesWith(VResult);
+ CI->eraseFromParent();
+
+ ModifiedDT = true;
+}
+
+// Translate a masked scatter intrinsic, like
+// void @llvm.masked.scatter.v16i32(<16 x i32> %Src, <16 x i32*>* %Ptrs, i32 4,
+// <16 x i1> %Mask)
+// to a chain of basic blocks, that stores element one-by-one if
+// the appropriate mask bit is set.
+//
+// %Ptrs = getelementptr i32, i32* %ptr, <16 x i64> %ind
+// %Mask0 = extractelement <16 x i1> %Mask, i32 0
+// br i1 %Mask0, label %cond.store, label %else
+//
+// cond.store:
+// %Elt0 = extractelement <16 x i32> %Src, i32 0
+// %Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
+// store i32 %Elt0, i32* %Ptr0, align 4
+// br label %else
+//
+// else:
+// %Mask1 = extractelement <16 x i1> %Mask, i32 1
+// br i1 %Mask1, label %cond.store1, label %else2
+//
+// cond.store1:
+// %Elt1 = extractelement <16 x i32> %Src, i32 1
+// %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
+// store i32 %Elt1, i32* %Ptr1, align 4
+// br label %else2
+// . . .
+static void scalarizeMaskedScatter(CallInst *CI, bool &ModifiedDT) {
+ Value *Src = CI->getArgOperand(0);
+ Value *Ptrs = CI->getArgOperand(1);
+ Value *Alignment = CI->getArgOperand(2);
+ Value *Mask = CI->getArgOperand(3);
+
+ auto *SrcFVTy = cast<FixedVectorType>(Src->getType());
+
+ assert(
+ isa<VectorType>(Ptrs->getType()) &&
+ isa<PointerType>(cast<VectorType>(Ptrs->getType())->getElementType()) &&
+ "Vector of pointers is expected in masked scatter intrinsic");
+
+ IRBuilder<> Builder(CI->getContext());
+ Instruction *InsertPt = CI;
+ BasicBlock *IfBlock = CI->getParent();
+ Builder.SetInsertPoint(InsertPt);
+ Builder.SetCurrentDebugLocation(CI->getDebugLoc());
+
+ MaybeAlign AlignVal = cast<ConstantInt>(Alignment)->getMaybeAlignValue();
+ unsigned VectorWidth = SrcFVTy->getNumElements();
+
+ // Shorten the way if the mask is a vector of constants.
+ if (isConstantIntVector(Mask)) {
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+ if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
+ continue;
+ Value *OneElt =
+ Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx));
+ Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
+ Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
+ }
+ CI->eraseFromParent();
+ return;
+ }
+
+ // If the mask is not v1i1, use scalar bit test operations. This generates
+ // better results on X86 at least.
+ Value *SclrMask;
+ if (VectorWidth != 1) {
+ Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
+ SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
+ }
+
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+ // Fill the "else" block, created in the previous iteration
+ //
+ // %Mask1 = and i16 %scalar_mask, i32 1 << Idx
+ // %cond = icmp ne i16 %mask_1, 0
+ // br i1 %Mask1, label %cond.store, label %else
+ //
+ Value *Predicate;
+ if (VectorWidth != 1) {
+ Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx));
+ Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
+ Builder.getIntN(VectorWidth, 0));
+ } else {
+ Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
+ }
+
+ // Create "cond" block
+ //
+ // %Elt1 = extractelement <16 x i32> %Src, i32 1
+ // %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
+ // %store i32 %Elt1, i32* %Ptr1
+ //
+ BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store");
+ Builder.SetInsertPoint(InsertPt);
+
+ Value *OneElt = Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx));
+ Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
+ Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
+
+ // Create "else" block, fill it in the next iteration
+ BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
+ Builder.SetInsertPoint(InsertPt);
+ Instruction *OldBr = IfBlock->getTerminator();
+ BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
+ OldBr->eraseFromParent();
+ IfBlock = NewIfBlock;
+ }
+ CI->eraseFromParent();
+
+ ModifiedDT = true;
+}
+
+static void scalarizeMaskedExpandLoad(CallInst *CI, bool &ModifiedDT) {
+ Value *Ptr = CI->getArgOperand(0);
+ Value *Mask = CI->getArgOperand(1);
+ Value *PassThru = CI->getArgOperand(2);
+
+ auto *VecType = cast<FixedVectorType>(CI->getType());
+
+ Type *EltTy = VecType->getElementType();
+
+ IRBuilder<> Builder(CI->getContext());
+ Instruction *InsertPt = CI;
+ BasicBlock *IfBlock = CI->getParent();
+
+ Builder.SetInsertPoint(InsertPt);
+ Builder.SetCurrentDebugLocation(CI->getDebugLoc());
+
+ unsigned VectorWidth = VecType->getNumElements();
+
+ // The result vector
+ Value *VResult = PassThru;
+
+ // Shorten the way if the mask is a vector of constants.
+ // Create a build_vector pattern, with loads/undefs as necessary and then
+ // shuffle blend with the pass through value.
+ if (isConstantIntVector(Mask)) {
+ unsigned MemIndex = 0;
+ VResult = UndefValue::get(VecType);
+ SmallVector<int, 16> ShuffleMask(VectorWidth, UndefMaskElem);
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+ Value *InsertElt;
+ if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) {
+ InsertElt = UndefValue::get(EltTy);
+ ShuffleMask[Idx] = Idx + VectorWidth;
+ } else {
+ Value *NewPtr =
+ Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex);
+ InsertElt = Builder.CreateAlignedLoad(EltTy, NewPtr, Align(1),
+ "Load" + Twine(Idx));
+ ShuffleMask[Idx] = Idx;
+ ++MemIndex;
+ }
+ VResult = Builder.CreateInsertElement(VResult, InsertElt, Idx,
+ "Res" + Twine(Idx));
+ }
+ VResult = Builder.CreateShuffleVector(VResult, PassThru, ShuffleMask);
+ CI->replaceAllUsesWith(VResult);
+ CI->eraseFromParent();
+ return;
+ }
+
+ // If the mask is not v1i1, use scalar bit test operations. This generates
+ // better results on X86 at least.
+ Value *SclrMask;
+ if (VectorWidth != 1) {
+ Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
+ SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
+ }
+
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+ // Fill the "else" block, created in the previous iteration
+ //
+ // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
+ // %mask_1 = extractelement <16 x i1> %mask, i32 Idx
+ // br i1 %mask_1, label %cond.load, label %else
+ //
+
+ Value *Predicate;
+ if (VectorWidth != 1) {
+ Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx));
+ Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
+ Builder.getIntN(VectorWidth, 0));
+ } else {
+ Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
+ }
+
+ // Create "cond" block
+ //
+ // %EltAddr = getelementptr i32* %1, i32 0
+ // %Elt = load i32* %EltAddr
+ // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
+ //
+ BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt->getIterator(),
+ "cond.load");
+ Builder.SetInsertPoint(InsertPt);
+
+ LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Ptr, Align(1));
+ Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx);
+
+ // Move the pointer if there are more blocks to come.
+ Value *NewPtr;
+ if ((Idx + 1) != VectorWidth)
+ NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, 1);
+
+ // Create "else" block, fill it in the next iteration
+ BasicBlock *NewIfBlock =
+ CondBlock->splitBasicBlock(InsertPt->getIterator(), "else");
+ Builder.SetInsertPoint(InsertPt);
+ Instruction *OldBr = IfBlock->getTerminator();
+ BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
+ OldBr->eraseFromParent();
+ BasicBlock *PrevIfBlock = IfBlock;
+ IfBlock = NewIfBlock;
+
+ // Create the phi to join the new and previous value.
+ PHINode *ResultPhi = Builder.CreatePHI(VecType, 2, "res.phi.else");
+ ResultPhi->addIncoming(NewVResult, CondBlock);
+ ResultPhi->addIncoming(VResult, PrevIfBlock);
+ VResult = ResultPhi;
+
+ // Add a PHI for the pointer if this isn't the last iteration.
+ if ((Idx + 1) != VectorWidth) {
+ PHINode *PtrPhi = Builder.CreatePHI(Ptr->getType(), 2, "ptr.phi.else");
+ PtrPhi->addIncoming(NewPtr, CondBlock);
+ PtrPhi->addIncoming(Ptr, PrevIfBlock);
+ Ptr = PtrPhi;
+ }
+ }
+
+ CI->replaceAllUsesWith(VResult);
+ CI->eraseFromParent();
+
+ ModifiedDT = true;
+}
+
+static void scalarizeMaskedCompressStore(CallInst *CI, bool &ModifiedDT) {
+ Value *Src = CI->getArgOperand(0);
+ Value *Ptr = CI->getArgOperand(1);
+ Value *Mask = CI->getArgOperand(2);
+
+ auto *VecType = cast<FixedVectorType>(Src->getType());
+
+ IRBuilder<> Builder(CI->getContext());
+ Instruction *InsertPt = CI;
+ BasicBlock *IfBlock = CI->getParent();
+
+ Builder.SetInsertPoint(InsertPt);
+ Builder.SetCurrentDebugLocation(CI->getDebugLoc());
+
+ Type *EltTy = VecType->getElementType();
+
+ unsigned VectorWidth = VecType->getNumElements();
+
+ // Shorten the way if the mask is a vector of constants.
+ if (isConstantIntVector(Mask)) {
+ unsigned MemIndex = 0;
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+ if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
+ continue;
+ Value *OneElt =
+ Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx));
+ Value *NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex);
+ Builder.CreateAlignedStore(OneElt, NewPtr, Align(1));
+ ++MemIndex;
+ }
+ CI->eraseFromParent();
+ return;
+ }
+
+ // If the mask is not v1i1, use scalar bit test operations. This generates
+ // better results on X86 at least.
+ Value *SclrMask;
+ if (VectorWidth != 1) {
+ Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
+ SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
+ }
+
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+ // Fill the "else" block, created in the previous iteration
+ //
+ // %mask_1 = extractelement <16 x i1> %mask, i32 Idx
+ // br i1 %mask_1, label %cond.store, label %else
+ //
+ Value *Predicate;
+ if (VectorWidth != 1) {
+ Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx));
+ Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
+ Builder.getIntN(VectorWidth, 0));
+ } else {
+ Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
+ }
+
+ // Create "cond" block
+ //
+ // %OneElt = extractelement <16 x i32> %Src, i32 Idx
+ // %EltAddr = getelementptr i32* %1, i32 0
+ // %store i32 %OneElt, i32* %EltAddr
+ //
+ BasicBlock *CondBlock =
+ IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.store");
+ Builder.SetInsertPoint(InsertPt);
+
+ Value *OneElt = Builder.CreateExtractElement(Src, Idx);
+ Builder.CreateAlignedStore(OneElt, Ptr, Align(1));
+
+ // Move the pointer if there are more blocks to come.
+ Value *NewPtr;
+ if ((Idx + 1) != VectorWidth)
+ NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, 1);
+
+ // Create "else" block, fill it in the next iteration
+ BasicBlock *NewIfBlock =
+ CondBlock->splitBasicBlock(InsertPt->getIterator(), "else");
+ Builder.SetInsertPoint(InsertPt);
+ Instruction *OldBr = IfBlock->getTerminator();
+ BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
+ OldBr->eraseFromParent();
+ BasicBlock *PrevIfBlock = IfBlock;
+ IfBlock = NewIfBlock;
+
+ // Add a PHI for the pointer if this isn't the last iteration.
+ if ((Idx + 1) != VectorWidth) {
+ PHINode *PtrPhi = Builder.CreatePHI(Ptr->getType(), 2, "ptr.phi.else");
+ PtrPhi->addIncoming(NewPtr, CondBlock);
+ PtrPhi->addIncoming(Ptr, PrevIfBlock);
+ Ptr = PtrPhi;
+ }
+ }
+ CI->eraseFromParent();
+
+ ModifiedDT = true;
+}
+
+static bool runImpl(Function &F, const TargetTransformInfo &TTI) {
+ bool EverMadeChange = false;
+ bool MadeChange = true;
+ auto &DL = F.getParent()->getDataLayout();
+ while (MadeChange) {
+ MadeChange = false;
+ for (Function::iterator I = F.begin(); I != F.end();) {
+ BasicBlock *BB = &*I++;
+ bool ModifiedDTOnIteration = false;
+ MadeChange |= optimizeBlock(*BB, ModifiedDTOnIteration, TTI, DL);
+
+ // Restart BB iteration if the dominator tree of the Function was changed
+ if (ModifiedDTOnIteration)
+ break;
+ }
+
+ EverMadeChange |= MadeChange;
+ }
+ return EverMadeChange;
+}
+
+bool ScalarizeMaskedMemIntrinLegacyPass::runOnFunction(Function &F) {
+ auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ return runImpl(F, TTI);
+}
+
+PreservedAnalyses
+ScalarizeMaskedMemIntrinPass::run(Function &F, FunctionAnalysisManager &AM) {
+ auto &TTI = AM.getResult<TargetIRAnalysis>(F);
+ if (!runImpl(F, TTI))
+ return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserve<TargetIRAnalysis>();
+ return PA;
+}
+
+static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT,
+ const TargetTransformInfo &TTI,
+ const DataLayout &DL) {
+ bool MadeChange = false;
+
+ BasicBlock::iterator CurInstIterator = BB.begin();
+ while (CurInstIterator != BB.end()) {
+ if (CallInst *CI = dyn_cast<CallInst>(&*CurInstIterator++))
+ MadeChange |= optimizeCallInst(CI, ModifiedDT, TTI, DL);
+ if (ModifiedDT)
+ return true;
+ }
+
+ return MadeChange;
+}
+
+static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT,
+ const TargetTransformInfo &TTI,
+ const DataLayout &DL) {
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
+ if (II) {
+ // The scalarization code below does not work for scalable vectors.
+ if (isa<ScalableVectorType>(II->getType()) ||
+ any_of(II->arg_operands(),
+ [](Value *V) { return isa<ScalableVectorType>(V->getType()); }))
+ return false;
+
+ switch (II->getIntrinsicID()) {
+ default:
+ break;
+ case Intrinsic::masked_load:
+ // Scalarize unsupported vector masked load
+ if (TTI.isLegalMaskedLoad(
+ CI->getType(),
+ cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue()))
+ return false;
+ scalarizeMaskedLoad(CI, ModifiedDT);
+ return true;
+ case Intrinsic::masked_store:
+ if (TTI.isLegalMaskedStore(
+ CI->getArgOperand(0)->getType(),
+ cast<ConstantInt>(CI->getArgOperand(2))->getAlignValue()))
+ return false;
+ scalarizeMaskedStore(CI, ModifiedDT);
+ return true;
+ case Intrinsic::masked_gather: {
+ unsigned AlignmentInt =
+ cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
+ Type *LoadTy = CI->getType();
+ Align Alignment =
+ DL.getValueOrABITypeAlignment(MaybeAlign(AlignmentInt), LoadTy);
+ if (TTI.isLegalMaskedGather(LoadTy, Alignment))
+ return false;
+ scalarizeMaskedGather(CI, ModifiedDT);
+ return true;
+ }
+ case Intrinsic::masked_scatter: {
+ unsigned AlignmentInt =
+ cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
+ Type *StoreTy = CI->getArgOperand(0)->getType();
+ Align Alignment =
+ DL.getValueOrABITypeAlignment(MaybeAlign(AlignmentInt), StoreTy);
+ if (TTI.isLegalMaskedScatter(StoreTy, Alignment))
+ return false;
+ scalarizeMaskedScatter(CI, ModifiedDT);
+ return true;
+ }
+ case Intrinsic::masked_expandload:
+ if (TTI.isLegalMaskedExpandLoad(CI->getType()))
+ return false;
+ scalarizeMaskedExpandLoad(CI, ModifiedDT);
+ return true;
+ case Intrinsic::masked_compressstore:
+ if (TTI.isLegalMaskedCompressStore(CI->getArgOperand(0)->getType()))
+ return false;
+ scalarizeMaskedCompressStore(CI, ModifiedDT);
+ return true;
+ }
+ }
+
+ return false;
+}
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/Scalarizer.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/Scalarizer.cpp
index c95984fe19..130793abff 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/Scalarizer.cpp
@@ -398,8 +398,8 @@ void ScalarizerVisitor::gather(Instruction *Op, const ValueVector &CV) {
continue;
Instruction *Old = cast<Instruction>(V);
- if (isa<Instruction>(CV[I]))
- CV[I]->takeName(Old);
+ if (isa<Instruction>(CV[I]))
+ CV[I]->takeName(Old);
Old->replaceAllUsesWith(CV[I]);
PotentiallyDeadInstrs.emplace_back(Old);
}
@@ -733,7 +733,7 @@ bool ScalarizerVisitor::visitBitCastInst(BitCastInst &BCI) {
auto *MidTy = FixedVectorType::get(SrcVT->getElementType(), FanIn);
unsigned Op0I = 0;
for (unsigned ResI = 0; ResI < DstNumElems; ++ResI) {
- Value *V = PoisonValue::get(MidTy);
+ Value *V = PoisonValue::get(MidTy);
for (unsigned MidI = 0; MidI < FanIn; ++MidI)
V = Builder.CreateInsertElement(V, Op0[Op0I++], Builder.getInt32(MidI),
BCI.getName() + ".i" + Twine(ResI)
@@ -932,7 +932,7 @@ bool ScalarizerVisitor::finish() {
if (!Op->use_empty()) {
// The value is still needed, so recreate it using a series of
// InsertElements.
- Value *Res = PoisonValue::get(Op->getType());
+ Value *Res = PoisonValue::get(Op->getType());
if (auto *Ty = dyn_cast<VectorType>(Op->getType())) {
BasicBlock *BB = Op->getParent();
unsigned Count = cast<FixedVectorType>(Ty)->getNumElements();
@@ -942,7 +942,7 @@ bool ScalarizerVisitor::finish() {
for (unsigned I = 0; I < Count; ++I)
Res = Builder.CreateInsertElement(Res, CV[I], Builder.getInt32(I),
Op->getName() + ".upto" + Twine(I));
- Res->takeName(Op);
+ Res->takeName(Op);
} else {
assert(CV.size() == 1 && Op->getType() == CV[0]->getType());
Res = CV[0];
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index f216956406..c63a069193 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -155,7 +155,7 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h"
+#include "llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DepthFirstIterator.h"
@@ -178,7 +178,7 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
-#include "llvm/IR/PassManager.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
@@ -344,14 +344,14 @@ private:
/// A pass that tries to split every GEP in the function into a variadic
/// base and a constant offset. It is a FunctionPass because searching for the
/// constant offset may inspect other basic blocks.
-class SeparateConstOffsetFromGEPLegacyPass : public FunctionPass {
+class SeparateConstOffsetFromGEPLegacyPass : public FunctionPass {
public:
static char ID;
- SeparateConstOffsetFromGEPLegacyPass(bool LowerGEP = false)
+ SeparateConstOffsetFromGEPLegacyPass(bool LowerGEP = false)
: FunctionPass(ID), LowerGEP(LowerGEP) {
- initializeSeparateConstOffsetFromGEPLegacyPassPass(
- *PassRegistry::getPassRegistry());
+ initializeSeparateConstOffsetFromGEPLegacyPassPass(
+ *PassRegistry::getPassRegistry());
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -366,23 +366,23 @@ public:
bool runOnFunction(Function &F) override;
private:
- bool LowerGEP;
-};
-
-/// A pass that tries to split every GEP in the function into a variadic
-/// base and a constant offset. It is a FunctionPass because searching for the
-/// constant offset may inspect other basic blocks.
-class SeparateConstOffsetFromGEP {
-public:
- SeparateConstOffsetFromGEP(
- DominatorTree *DT, ScalarEvolution *SE, LoopInfo *LI,
- TargetLibraryInfo *TLI,
- function_ref<TargetTransformInfo &(Function &)> GetTTI, bool LowerGEP)
- : DT(DT), SE(SE), LI(LI), TLI(TLI), GetTTI(GetTTI), LowerGEP(LowerGEP) {}
-
- bool run(Function &F);
-
-private:
+ bool LowerGEP;
+};
+
+/// A pass that tries to split every GEP in the function into a variadic
+/// base and a constant offset. It is a FunctionPass because searching for the
+/// constant offset may inspect other basic blocks.
+class SeparateConstOffsetFromGEP {
+public:
+ SeparateConstOffsetFromGEP(
+ DominatorTree *DT, ScalarEvolution *SE, LoopInfo *LI,
+ TargetLibraryInfo *TLI,
+ function_ref<TargetTransformInfo &(Function &)> GetTTI, bool LowerGEP)
+ : DT(DT), SE(SE), LI(LI), TLI(TLI), GetTTI(GetTTI), LowerGEP(LowerGEP) {}
+
+ bool run(Function &F);
+
+private:
/// Tries to split the given GEP into a variadic base and a constant offset,
/// and returns true if the splitting succeeds.
bool splitGEP(GetElementPtrInst *GEP);
@@ -467,8 +467,8 @@ private:
ScalarEvolution *SE;
LoopInfo *LI;
TargetLibraryInfo *TLI;
- // Retrieved lazily since not always used.
- function_ref<TargetTransformInfo &(Function &)> GetTTI;
+ // Retrieved lazily since not always used.
+ function_ref<TargetTransformInfo &(Function &)> GetTTI;
/// Whether to lower a GEP with multiple indices into arithmetic operations or
/// multiple GEPs with a single index.
@@ -480,10 +480,10 @@ private:
} // end anonymous namespace
-char SeparateConstOffsetFromGEPLegacyPass::ID = 0;
+char SeparateConstOffsetFromGEPLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(
- SeparateConstOffsetFromGEPLegacyPass, "separate-const-offset-from-gep",
+ SeparateConstOffsetFromGEPLegacyPass, "separate-const-offset-from-gep",
"Split GEPs to a variadic base and a constant offset for better CSE", false,
false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
@@ -492,12 +492,12 @@ INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(
- SeparateConstOffsetFromGEPLegacyPass, "separate-const-offset-from-gep",
+ SeparateConstOffsetFromGEPLegacyPass, "separate-const-offset-from-gep",
"Split GEPs to a variadic base and a constant offset for better CSE", false,
false)
FunctionPass *llvm::createSeparateConstOffsetFromGEPPass(bool LowerGEP) {
- return new SeparateConstOffsetFromGEPLegacyPass(LowerGEP);
+ return new SeparateConstOffsetFromGEPLegacyPass(LowerGEP);
}
bool ConstantOffsetExtractor::CanTraceInto(bool SignExtended,
@@ -902,8 +902,8 @@ void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs(
// If we created a GEP with constant index, and the base is loop invariant,
// then we swap the first one with it, so LICM can move constant GEP out
// later.
- auto *FirstGEP = dyn_cast_or_null<GetElementPtrInst>(FirstResult);
- auto *SecondGEP = dyn_cast<GetElementPtrInst>(ResultPtr);
+ auto *FirstGEP = dyn_cast_or_null<GetElementPtrInst>(FirstResult);
+ auto *SecondGEP = dyn_cast<GetElementPtrInst>(ResultPtr);
if (isSwapCandidate && isLegalToSwapOperand(FirstGEP, SecondGEP, L))
swapGEPOperand(FirstGEP, SecondGEP);
@@ -978,7 +978,7 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
if (!NeedsExtraction)
return Changed;
- TargetTransformInfo &TTI = GetTTI(*GEP->getFunction());
+ TargetTransformInfo &TTI = GetTTI(*GEP->getFunction());
// If LowerGEP is disabled, before really splitting the GEP, check whether the
// backend supports the addressing mode we are about to produce. If no, this
@@ -1143,25 +1143,25 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
return true;
}
-bool SeparateConstOffsetFromGEPLegacyPass::runOnFunction(Function &F) {
+bool SeparateConstOffsetFromGEPLegacyPass::runOnFunction(Function &F) {
if (skipFunction(F))
return false;
- auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
- auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
- auto GetTTI = [this](Function &F) -> TargetTransformInfo & {
- return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- };
- SeparateConstOffsetFromGEP Impl(DT, SE, LI, TLI, GetTTI, LowerGEP);
- return Impl.run(F);
-}
-
-bool SeparateConstOffsetFromGEP::run(Function &F) {
+ auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ auto GetTTI = [this](Function &F) -> TargetTransformInfo & {
+ return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ };
+ SeparateConstOffsetFromGEP Impl(DT, SE, LI, TLI, GetTTI, LowerGEP);
+ return Impl.run(F);
+}
+
+bool SeparateConstOffsetFromGEP::run(Function &F) {
if (DisableSeparateConstOffsetFromGEP)
return false;
- DL = &F.getParent()->getDataLayout();
+ DL = &F.getParent()->getDataLayout();
bool Changed = false;
for (BasicBlock &B : F) {
for (BasicBlock::iterator I = B.begin(), IE = B.end(); I != IE;)
@@ -1368,20 +1368,20 @@ void SeparateConstOffsetFromGEP::swapGEPOperand(GetElementPtrInst *First,
} else
First->setIsInBounds(true);
}
-
-PreservedAnalyses
-SeparateConstOffsetFromGEPPass::run(Function &F, FunctionAnalysisManager &AM) {
- auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
- auto *SE = &AM.getResult<ScalarEvolutionAnalysis>(F);
- auto *LI = &AM.getResult<LoopAnalysis>(F);
- auto *TLI = &AM.getResult<TargetLibraryAnalysis>(F);
- auto GetTTI = [&AM](Function &F) -> TargetTransformInfo & {
- return AM.getResult<TargetIRAnalysis>(F);
- };
- SeparateConstOffsetFromGEP Impl(DT, SE, LI, TLI, GetTTI, LowerGEP);
- if (!Impl.run(F))
- return PreservedAnalyses::all();
- PreservedAnalyses PA;
- PA.preserveSet<CFGAnalyses>();
- return PA;
-}
+
+PreservedAnalyses
+SeparateConstOffsetFromGEPPass::run(Function &F, FunctionAnalysisManager &AM) {
+ auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
+ auto *SE = &AM.getResult<ScalarEvolutionAnalysis>(F);
+ auto *LI = &AM.getResult<LoopAnalysis>(F);
+ auto *TLI = &AM.getResult<TargetLibraryAnalysis>(F);
+ auto GetTTI = [&AM](Function &F) -> TargetTransformInfo & {
+ return AM.getResult<TargetIRAnalysis>(F);
+ };
+ SeparateConstOffsetFromGEP Impl(DT, SE, LI, TLI, GetTTI, LowerGEP);
+ if (!Impl.run(F))
+ return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+}
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index 9d3c8d0f37..8318870308 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -26,14 +26,14 @@
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
-#include "llvm/Analysis/MustExecute.h"
-#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/MustExecute.h"
+#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
@@ -51,7 +51,7 @@
#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <algorithm>
@@ -96,11 +96,11 @@ static cl::opt<bool> UnswitchGuards(
"simple-loop-unswitch-guards", cl::init(true), cl::Hidden,
cl::desc("If enabled, simple loop unswitching will also consider "
"llvm.experimental.guard intrinsics as unswitch candidates."));
-static cl::opt<bool> DropNonTrivialImplicitNullChecks(
- "simple-loop-unswitch-drop-non-trivial-implicit-null-checks",
- cl::init(false), cl::Hidden,
- cl::desc("If enabled, drop make.implicit metadata in unswitched implicit "
- "null checks to save time analyzing if we can keep it."));
+static cl::opt<bool> DropNonTrivialImplicitNullChecks(
+ "simple-loop-unswitch-drop-non-trivial-implicit-null-checks",
+ cl::init(false), cl::Hidden,
+ cl::desc("If enabled, drop make.implicit metadata in unswitched implicit "
+ "null checks to save time analyzing if we can keep it."));
/// Collect all of the loop invariant input values transitively used by the
/// homogeneous instruction graph from a given root.
@@ -692,9 +692,9 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
// successor.
BasicBlock *CommonSuccBB = nullptr;
if (SI.getNumCases() > 0 &&
- all_of(drop_begin(SI.cases()), [&SI](const SwitchInst::CaseHandle &Case) {
- return Case.getCaseSuccessor() == SI.case_begin()->getCaseSuccessor();
- }))
+ all_of(drop_begin(SI.cases()), [&SI](const SwitchInst::CaseHandle &Case) {
+ return Case.getCaseSuccessor() == SI.case_begin()->getCaseSuccessor();
+ }))
CommonSuccBB = SI.case_begin()->getCaseSuccessor();
if (!DefaultExitBB) {
// If we're not unswitching the default, we need it to match any cases to
@@ -855,11 +855,11 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
}
if (MSSAU) {
- MSSAU->applyUpdates(DTUpdates, DT, /*UpdateDT=*/true);
+ MSSAU->applyUpdates(DTUpdates, DT, /*UpdateDT=*/true);
if (VerifyMemorySSA)
MSSAU->getMemorySSA()->verifyMemorySSA();
- } else {
- DT.applyUpdates(DTUpdates);
+ } else {
+ DT.applyUpdates(DTUpdates);
}
assert(DT.verify(DominatorTree::VerificationLevel::Fast));
@@ -1140,22 +1140,22 @@ static BasicBlock *buildClonedLoopBlocks(
// Replace the cloned branch with an unconditional branch to the cloned
// unswitched successor.
auto *ClonedSuccBB = cast<BasicBlock>(VMap.lookup(UnswitchedSuccBB));
- Instruction *ClonedTerminator = ClonedParentBB->getTerminator();
- // Trivial Simplification. If Terminator is a conditional branch and
- // condition becomes dead - erase it.
- Value *ClonedConditionToErase = nullptr;
- if (auto *BI = dyn_cast<BranchInst>(ClonedTerminator))
- ClonedConditionToErase = BI->getCondition();
- else if (auto *SI = dyn_cast<SwitchInst>(ClonedTerminator))
- ClonedConditionToErase = SI->getCondition();
-
- ClonedTerminator->eraseFromParent();
+ Instruction *ClonedTerminator = ClonedParentBB->getTerminator();
+ // Trivial Simplification. If Terminator is a conditional branch and
+ // condition becomes dead - erase it.
+ Value *ClonedConditionToErase = nullptr;
+ if (auto *BI = dyn_cast<BranchInst>(ClonedTerminator))
+ ClonedConditionToErase = BI->getCondition();
+ else if (auto *SI = dyn_cast<SwitchInst>(ClonedTerminator))
+ ClonedConditionToErase = SI->getCondition();
+
+ ClonedTerminator->eraseFromParent();
BranchInst::Create(ClonedSuccBB, ClonedParentBB);
- if (ClonedConditionToErase)
- RecursivelyDeleteTriviallyDeadInstructions(ClonedConditionToErase, nullptr,
- MSSAU);
-
+ if (ClonedConditionToErase)
+ RecursivelyDeleteTriviallyDeadInstructions(ClonedConditionToErase, nullptr,
+ MSSAU);
+
// If there are duplicate entries in the PHI nodes because of multiple edges
// to the unswitched successor, we need to nuke all but one as we replaced it
// with a direct branch.
@@ -1214,7 +1214,7 @@ static Loop *cloneLoopNest(Loop &OrigRootL, Loop *RootParentL,
LI.addTopLevelLoop(ClonedRootL);
AddClonedBlocksToLoop(OrigRootL, *ClonedRootL);
- if (OrigRootL.isInnermost())
+ if (OrigRootL.isInnermost())
return ClonedRootL;
// If we have a nest, we can quickly clone the entire loop nest using an
@@ -2090,23 +2090,23 @@ static void unswitchNontrivialInvariants(
DominatingSucc, *VMaps.back(), DTUpdates, AC, DT, LI, MSSAU);
}
- // Drop metadata if we may break its semantics by moving this instr into the
- // split block.
- if (TI.getMetadata(LLVMContext::MD_make_implicit)) {
- if (DropNonTrivialImplicitNullChecks)
- // Do not spend time trying to understand if we can keep it, just drop it
- // to save compile time.
- TI.setMetadata(LLVMContext::MD_make_implicit, nullptr);
- else {
- // It is only legal to preserve make.implicit metadata if we are
- // guaranteed no reach implicit null check after following this branch.
- ICFLoopSafetyInfo SafetyInfo;
- SafetyInfo.computeLoopSafetyInfo(&L);
- if (!SafetyInfo.isGuaranteedToExecute(TI, &DT, &L))
- TI.setMetadata(LLVMContext::MD_make_implicit, nullptr);
- }
- }
-
+ // Drop metadata if we may break its semantics by moving this instr into the
+ // split block.
+ if (TI.getMetadata(LLVMContext::MD_make_implicit)) {
+ if (DropNonTrivialImplicitNullChecks)
+ // Do not spend time trying to understand if we can keep it, just drop it
+ // to save compile time.
+ TI.setMetadata(LLVMContext::MD_make_implicit, nullptr);
+ else {
+ // It is only legal to preserve make.implicit metadata if we are
+ // guaranteed no reach implicit null check after following this branch.
+ ICFLoopSafetyInfo SafetyInfo;
+ SafetyInfo.computeLoopSafetyInfo(&L);
+ if (!SafetyInfo.isGuaranteedToExecute(TI, &DT, &L))
+ TI.setMetadata(LLVMContext::MD_make_implicit, nullptr);
+ }
+ }
+
// The stitching of the branched code back together depends on whether we're
// doing full unswitching or not with the exception that we always want to
// nuke the initial terminator placed in the split block.
@@ -2353,12 +2353,12 @@ static void unswitchNontrivialInvariants(
for (Loop *UpdatedL :
llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops)) {
UpdateLoop(*UpdatedL);
- if (UpdatedL->isOutermost())
+ if (UpdatedL->isOutermost())
OuterExitL = nullptr;
}
if (IsStillLoop) {
UpdateLoop(L);
- if (L.isOutermost())
+ if (L.isOutermost())
OuterExitL = nullptr;
}
@@ -2706,10 +2706,10 @@ unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI,
// (convergent, noduplicate, or cross-basic-block tokens).
// FIXME: We might be able to safely handle some of these in non-duplicated
// regions.
- TargetTransformInfo::TargetCostKind CostKind =
- L.getHeader()->getParent()->hasMinSize()
- ? TargetTransformInfo::TCK_CodeSize
- : TargetTransformInfo::TCK_SizeAndLatency;
+ TargetTransformInfo::TargetCostKind CostKind =
+ L.getHeader()->getParent()->hasMinSize()
+ ? TargetTransformInfo::TCK_CodeSize
+ : TargetTransformInfo::TCK_SizeAndLatency;
int LoopCost = 0;
for (auto *BB : L.blocks()) {
int Cost = 0;
@@ -2723,7 +2723,7 @@ unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI,
if (CB->isConvergent() || CB->cannotDuplicate())
return false;
- Cost += TTI.getUserCost(&I, CostKind);
+ Cost += TTI.getUserCost(&I, CostKind);
}
assert(Cost >= 0 && "Must not have negative costs!");
LoopCost += Cost;
@@ -2904,10 +2904,10 @@ static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI,
if (!NonTrivial && !EnableNonTrivialUnswitch)
return false;
- // Skip non-trivial unswitching for optsize functions.
- if (L.getHeader()->getParent()->hasOptSize())
- return false;
-
+ // Skip non-trivial unswitching for optsize functions.
+ if (L.getHeader()->getParent()->hasOptSize())
+ return false;
+
// For non-trivial unswitching, because it often creates new loops, we rely on
// the pass manager to iterate on the loops rather than trying to immediately
// reach a fixed point. There is no substantial advantage to iterating
@@ -2920,7 +2920,7 @@ static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI,
return true;
// No other opportunities to unswitch.
- return false;
+ return false;
}
PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 38e7109ead..7fdd5c659d 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -25,25 +25,25 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CFG.h"
-#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
-#include "llvm/IR/ValueHandle.h"
+#include "llvm/IR/ValueHandle.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/SimplifyCFG.h"
#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/SimplifyCFGOptions.h"
+#include "llvm/Transforms/Utils/SimplifyCFGOptions.h"
#include <utility>
using namespace llvm;
@@ -65,10 +65,10 @@ static cl::opt<bool> UserForwardSwitchCond(
"forward-switch-cond", cl::Hidden, cl::init(false),
cl::desc("Forward switch condition to phi ops (default = false)"));
-static cl::opt<bool> UserHoistCommonInsts(
- "hoist-common-insts", cl::Hidden, cl::init(false),
- cl::desc("hoist common instructions (default = false)"));
-
+static cl::opt<bool> UserHoistCommonInsts(
+ "hoist-common-insts", cl::Hidden, cl::init(false),
+ cl::desc("hoist common instructions (default = false)"));
+
static cl::opt<bool> UserSinkCommonInsts(
"sink-common-insts", cl::Hidden, cl::init(false),
cl::desc("Sink common instructions (default = false)"));
@@ -78,18 +78,18 @@ STATISTIC(NumSimpl, "Number of blocks simplified");
/// If we have more than one empty (other than phi node) return blocks,
/// merge them together to promote recursive block merging.
-static bool mergeEmptyReturnBlocks(Function &F, DomTreeUpdater *DTU) {
+static bool mergeEmptyReturnBlocks(Function &F, DomTreeUpdater *DTU) {
bool Changed = false;
- std::vector<DominatorTree::UpdateType> Updates;
- SmallVector<BasicBlock *, 8> DeadBlocks;
-
+ std::vector<DominatorTree::UpdateType> Updates;
+ SmallVector<BasicBlock *, 8> DeadBlocks;
+
BasicBlock *RetBlock = nullptr;
// Scan all the blocks in the function, looking for empty return blocks.
- for (BasicBlock &BB : make_early_inc_range(F)) {
- if (DTU && DTU->isBBPendingDeletion(&BB))
- continue;
+ for (BasicBlock &BB : make_early_inc_range(F)) {
+ if (DTU && DTU->isBBPendingDeletion(&BB))
+ continue;
// Only look at return blocks.
ReturnInst *Ret = dyn_cast<ReturnInst>(BB.getTerminator());
@@ -140,18 +140,18 @@ static bool mergeEmptyReturnBlocks(Function &F, DomTreeUpdater *DTU) {
if (Ret->getNumOperands() == 0 ||
Ret->getOperand(0) ==
cast<ReturnInst>(RetBlock->getTerminator())->getOperand(0)) {
- // All predecessors of BB should now branch to RetBlock instead.
- if (DTU) {
- for (auto *Predecessor : predecessors(&BB)) {
- // But, iff Predecessor already branches to RetBlock,
- // don't (re-)add DomTree edge, because it already exists.
- if (!is_contained(successors(Predecessor), RetBlock))
- Updates.push_back({DominatorTree::Insert, Predecessor, RetBlock});
- Updates.push_back({DominatorTree::Delete, Predecessor, &BB});
- }
- }
+ // All predecessors of BB should now branch to RetBlock instead.
+ if (DTU) {
+ for (auto *Predecessor : predecessors(&BB)) {
+ // But, iff Predecessor already branches to RetBlock,
+ // don't (re-)add DomTree edge, because it already exists.
+ if (!is_contained(successors(Predecessor), RetBlock))
+ Updates.push_back({DominatorTree::Insert, Predecessor, RetBlock});
+ Updates.push_back({DominatorTree::Delete, Predecessor, &BB});
+ }
+ }
BB.replaceAllUsesWith(RetBlock);
- DeadBlocks.emplace_back(&BB);
+ DeadBlocks.emplace_back(&BB);
continue;
}
@@ -175,55 +175,55 @@ static bool mergeEmptyReturnBlocks(Function &F, DomTreeUpdater *DTU) {
RetBlockPHI->addIncoming(Ret->getOperand(0), &BB);
BB.getTerminator()->eraseFromParent();
BranchInst::Create(RetBlock, &BB);
- if (DTU)
- Updates.push_back({DominatorTree::Insert, &BB, RetBlock});
- }
-
- if (DTU) {
- DTU->applyUpdates(Updates);
- for (auto *BB : DeadBlocks)
- DTU->deleteBB(BB);
- } else {
- for (auto *BB : DeadBlocks)
- BB->eraseFromParent();
+ if (DTU)
+ Updates.push_back({DominatorTree::Insert, &BB, RetBlock});
}
+ if (DTU) {
+ DTU->applyUpdates(Updates);
+ for (auto *BB : DeadBlocks)
+ DTU->deleteBB(BB);
+ } else {
+ for (auto *BB : DeadBlocks)
+ BB->eraseFromParent();
+ }
+
return Changed;
}
/// Call SimplifyCFG on all the blocks in the function,
/// iterating until no more changes are made.
static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
- DomTreeUpdater *DTU,
+ DomTreeUpdater *DTU,
const SimplifyCFGOptions &Options) {
bool Changed = false;
bool LocalChange = true;
SmallVector<std::pair<const BasicBlock *, const BasicBlock *>, 32> Edges;
FindFunctionBackedges(F, Edges);
- SmallPtrSet<BasicBlock *, 16> UniqueLoopHeaders;
+ SmallPtrSet<BasicBlock *, 16> UniqueLoopHeaders;
for (unsigned i = 0, e = Edges.size(); i != e; ++i)
- UniqueLoopHeaders.insert(const_cast<BasicBlock *>(Edges[i].second));
-
- SmallVector<WeakVH, 16> LoopHeaders(UniqueLoopHeaders.begin(),
- UniqueLoopHeaders.end());
+ UniqueLoopHeaders.insert(const_cast<BasicBlock *>(Edges[i].second));
+ SmallVector<WeakVH, 16> LoopHeaders(UniqueLoopHeaders.begin(),
+ UniqueLoopHeaders.end());
+
while (LocalChange) {
LocalChange = false;
// Loop over all of the basic blocks and remove them if they are unneeded.
for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) {
- BasicBlock &BB = *BBIt++;
- if (DTU) {
- assert(
- !DTU->isBBPendingDeletion(&BB) &&
- "Should not end up trying to simplify blocks marked for removal.");
- // Make sure that the advanced iterator does not point at the blocks
- // that are marked for removal, skip over all such blocks.
- while (BBIt != F.end() && DTU->isBBPendingDeletion(&*BBIt))
- ++BBIt;
- }
- if (simplifyCFG(&BB, TTI, DTU, Options, LoopHeaders)) {
+ BasicBlock &BB = *BBIt++;
+ if (DTU) {
+ assert(
+ !DTU->isBBPendingDeletion(&BB) &&
+ "Should not end up trying to simplify blocks marked for removal.");
+ // Make sure that the advanced iterator does not point at the blocks
+ // that are marked for removal, skip over all such blocks.
+ while (BBIt != F.end() && DTU->isBBPendingDeletion(&*BBIt))
+ ++BBIt;
+ }
+ if (simplifyCFG(&BB, TTI, DTU, Options, LoopHeaders)) {
LocalChange = true;
++NumSimpl;
}
@@ -233,15 +233,15 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
return Changed;
}
-static bool simplifyFunctionCFGImpl(Function &F, const TargetTransformInfo &TTI,
- DominatorTree *DT,
- const SimplifyCFGOptions &Options) {
- DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
-
- bool EverChanged = removeUnreachableBlocks(F, DT ? &DTU : nullptr);
- EverChanged |= mergeEmptyReturnBlocks(F, DT ? &DTU : nullptr);
- EverChanged |= iterativelySimplifyCFG(F, TTI, DT ? &DTU : nullptr, Options);
+static bool simplifyFunctionCFGImpl(Function &F, const TargetTransformInfo &TTI,
+ DominatorTree *DT,
+ const SimplifyCFGOptions &Options) {
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
+ bool EverChanged = removeUnreachableBlocks(F, DT ? &DTU : nullptr);
+ EverChanged |= mergeEmptyReturnBlocks(F, DT ? &DTU : nullptr);
+ EverChanged |= iterativelySimplifyCFG(F, TTI, DT ? &DTU : nullptr, Options);
+
// If neither pass changed anything, we're done.
if (!EverChanged) return false;
@@ -250,75 +250,75 @@ static bool simplifyFunctionCFGImpl(Function &F, const TargetTransformInfo &TTI,
// iterate between the two optimizations. We structure the code like this to
// avoid rerunning iterativelySimplifyCFG if the second pass of
// removeUnreachableBlocks doesn't do anything.
- if (!removeUnreachableBlocks(F, DT ? &DTU : nullptr))
+ if (!removeUnreachableBlocks(F, DT ? &DTU : nullptr))
return true;
do {
- EverChanged = iterativelySimplifyCFG(F, TTI, DT ? &DTU : nullptr, Options);
- EverChanged |= removeUnreachableBlocks(F, DT ? &DTU : nullptr);
+ EverChanged = iterativelySimplifyCFG(F, TTI, DT ? &DTU : nullptr, Options);
+ EverChanged |= removeUnreachableBlocks(F, DT ? &DTU : nullptr);
} while (EverChanged);
return true;
}
-static bool simplifyFunctionCFG(Function &F, const TargetTransformInfo &TTI,
- DominatorTree *DT,
- const SimplifyCFGOptions &Options) {
- assert((!RequireAndPreserveDomTree ||
- (DT && DT->verify(DominatorTree::VerificationLevel::Full))) &&
- "Original domtree is invalid?");
-
- bool Changed = simplifyFunctionCFGImpl(F, TTI, DT, Options);
-
- assert((!RequireAndPreserveDomTree ||
- (DT && DT->verify(DominatorTree::VerificationLevel::Full))) &&
- "Failed to maintain validity of domtree!");
-
- return Changed;
-}
-
+static bool simplifyFunctionCFG(Function &F, const TargetTransformInfo &TTI,
+ DominatorTree *DT,
+ const SimplifyCFGOptions &Options) {
+ assert((!RequireAndPreserveDomTree ||
+ (DT && DT->verify(DominatorTree::VerificationLevel::Full))) &&
+ "Original domtree is invalid?");
+
+ bool Changed = simplifyFunctionCFGImpl(F, TTI, DT, Options);
+
+ assert((!RequireAndPreserveDomTree ||
+ (DT && DT->verify(DominatorTree::VerificationLevel::Full))) &&
+ "Failed to maintain validity of domtree!");
+
+ return Changed;
+}
+
// Command-line settings override compile-time settings.
-static void applyCommandLineOverridesToOptions(SimplifyCFGOptions &Options) {
- if (UserBonusInstThreshold.getNumOccurrences())
- Options.BonusInstThreshold = UserBonusInstThreshold;
- if (UserForwardSwitchCond.getNumOccurrences())
- Options.ForwardSwitchCondToPhi = UserForwardSwitchCond;
- if (UserSwitchToLookup.getNumOccurrences())
- Options.ConvertSwitchToLookupTable = UserSwitchToLookup;
- if (UserKeepLoops.getNumOccurrences())
- Options.NeedCanonicalLoop = UserKeepLoops;
- if (UserHoistCommonInsts.getNumOccurrences())
- Options.HoistCommonInsts = UserHoistCommonInsts;
- if (UserSinkCommonInsts.getNumOccurrences())
- Options.SinkCommonInsts = UserSinkCommonInsts;
-}
-
-SimplifyCFGPass::SimplifyCFGPass() : Options() {
- applyCommandLineOverridesToOptions(Options);
-}
-
-SimplifyCFGPass::SimplifyCFGPass(const SimplifyCFGOptions &Opts)
- : Options(Opts) {
- applyCommandLineOverridesToOptions(Options);
+static void applyCommandLineOverridesToOptions(SimplifyCFGOptions &Options) {
+ if (UserBonusInstThreshold.getNumOccurrences())
+ Options.BonusInstThreshold = UserBonusInstThreshold;
+ if (UserForwardSwitchCond.getNumOccurrences())
+ Options.ForwardSwitchCondToPhi = UserForwardSwitchCond;
+ if (UserSwitchToLookup.getNumOccurrences())
+ Options.ConvertSwitchToLookupTable = UserSwitchToLookup;
+ if (UserKeepLoops.getNumOccurrences())
+ Options.NeedCanonicalLoop = UserKeepLoops;
+ if (UserHoistCommonInsts.getNumOccurrences())
+ Options.HoistCommonInsts = UserHoistCommonInsts;
+ if (UserSinkCommonInsts.getNumOccurrences())
+ Options.SinkCommonInsts = UserSinkCommonInsts;
}
+SimplifyCFGPass::SimplifyCFGPass() : Options() {
+ applyCommandLineOverridesToOptions(Options);
+}
+
+SimplifyCFGPass::SimplifyCFGPass(const SimplifyCFGOptions &Opts)
+ : Options(Opts) {
+ applyCommandLineOverridesToOptions(Options);
+}
+
PreservedAnalyses SimplifyCFGPass::run(Function &F,
FunctionAnalysisManager &AM) {
auto &TTI = AM.getResult<TargetIRAnalysis>(F);
Options.AC = &AM.getResult<AssumptionAnalysis>(F);
- DominatorTree *DT = nullptr;
- if (RequireAndPreserveDomTree)
- DT = &AM.getResult<DominatorTreeAnalysis>(F);
- if (F.hasFnAttribute(Attribute::OptForFuzzing)) {
- Options.setSimplifyCondBranch(false).setFoldTwoEntryPHINode(false);
- } else {
- Options.setSimplifyCondBranch(true).setFoldTwoEntryPHINode(true);
- }
- if (!simplifyFunctionCFG(F, TTI, DT, Options))
+ DominatorTree *DT = nullptr;
+ if (RequireAndPreserveDomTree)
+ DT = &AM.getResult<DominatorTreeAnalysis>(F);
+ if (F.hasFnAttribute(Attribute::OptForFuzzing)) {
+ Options.setSimplifyCondBranch(false).setFoldTwoEntryPHINode(false);
+ } else {
+ Options.setSimplifyCondBranch(true).setFoldTwoEntryPHINode(true);
+ }
+ if (!simplifyFunctionCFG(F, TTI, DT, Options))
return PreservedAnalyses::all();
PreservedAnalyses PA;
- if (RequireAndPreserveDomTree)
- PA.preserve<DominatorTreeAnalysis>();
+ if (RequireAndPreserveDomTree)
+ PA.preserve<DominatorTreeAnalysis>();
PA.preserve<GlobalsAA>();
return PA;
}
@@ -329,14 +329,14 @@ struct CFGSimplifyPass : public FunctionPass {
SimplifyCFGOptions Options;
std::function<bool(const Function &)> PredicateFtor;
- CFGSimplifyPass(SimplifyCFGOptions Options_ = SimplifyCFGOptions(),
+ CFGSimplifyPass(SimplifyCFGOptions Options_ = SimplifyCFGOptions(),
std::function<bool(const Function &)> Ftor = nullptr)
- : FunctionPass(ID), Options(Options_), PredicateFtor(std::move(Ftor)) {
+ : FunctionPass(ID), Options(Options_), PredicateFtor(std::move(Ftor)) {
initializeCFGSimplifyPassPass(*PassRegistry::getPassRegistry());
// Check for command-line overrides of options for debug/customization.
- applyCommandLineOverridesToOptions(Options);
+ applyCommandLineOverridesToOptions(Options);
}
bool runOnFunction(Function &F) override {
@@ -344,9 +344,9 @@ struct CFGSimplifyPass : public FunctionPass {
return false;
Options.AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- DominatorTree *DT = nullptr;
- if (RequireAndPreserveDomTree)
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ DominatorTree *DT = nullptr;
+ if (RequireAndPreserveDomTree)
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
if (F.hasFnAttribute(Attribute::OptForFuzzing)) {
Options.setSimplifyCondBranch(false)
.setFoldTwoEntryPHINode(false);
@@ -356,15 +356,15 @@ struct CFGSimplifyPass : public FunctionPass {
}
auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- return simplifyFunctionCFG(F, TTI, DT, Options);
+ return simplifyFunctionCFG(F, TTI, DT, Options);
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();
- if (RequireAndPreserveDomTree)
- AU.addRequired<DominatorTreeWrapperPass>();
+ if (RequireAndPreserveDomTree)
+ AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
- if (RequireAndPreserveDomTree)
- AU.addPreserved<DominatorTreeWrapperPass>();
+ if (RequireAndPreserveDomTree)
+ AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
}
};
@@ -375,13 +375,13 @@ INITIALIZE_PASS_BEGIN(CFGSimplifyPass, "simplifycfg", "Simplify the CFG", false,
false)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_END(CFGSimplifyPass, "simplifycfg", "Simplify the CFG", false,
false)
// Public interface to the CFGSimplification pass
FunctionPass *
-llvm::createCFGSimplificationPass(SimplifyCFGOptions Options,
+llvm::createCFGSimplificationPass(SimplifyCFGOptions Options,
std::function<bool(const Function &)> Ftor) {
- return new CFGSimplifyPass(Options, std::move(Ftor));
+ return new CFGSimplifyPass(Options, std::move(Ftor));
}
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/Sink.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/Sink.cpp
index 89cfbe384b..ffff0e605a 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/Sink.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/Sink.cpp
@@ -99,7 +99,7 @@ static bool IsAcceptableTarget(Instruction *Inst, BasicBlock *SuccToSinkTo,
return false;
}
- return true;
+ return true;
}
/// SinkInstruction - Determine whether it is safe to sink the specified machine
@@ -130,37 +130,37 @@ static bool SinkInstruction(Instruction *Inst,
// decide.
BasicBlock *SuccToSinkTo = nullptr;
- // Find the nearest common dominator of all users as the candidate.
- BasicBlock *BB = Inst->getParent();
- for (Use &U : Inst->uses()) {
- Instruction *UseInst = cast<Instruction>(U.getUser());
- BasicBlock *UseBlock = UseInst->getParent();
- // Don't worry about dead users.
- if (!DT.isReachableFromEntry(UseBlock))
- continue;
- if (PHINode *PN = dyn_cast<PHINode>(UseInst)) {
- // PHI nodes use the operand in the predecessor block, not the block with
- // the PHI.
- unsigned Num = PHINode::getIncomingValueNumForOperand(U.getOperandNo());
- UseBlock = PN->getIncomingBlock(Num);
- }
- if (SuccToSinkTo)
- SuccToSinkTo = DT.findNearestCommonDominator(SuccToSinkTo, UseBlock);
- else
- SuccToSinkTo = UseBlock;
- // The current basic block needs to dominate the candidate.
- if (!DT.dominates(BB, SuccToSinkTo))
- return false;
+ // Find the nearest common dominator of all users as the candidate.
+ BasicBlock *BB = Inst->getParent();
+ for (Use &U : Inst->uses()) {
+ Instruction *UseInst = cast<Instruction>(U.getUser());
+ BasicBlock *UseBlock = UseInst->getParent();
+ // Don't worry about dead users.
+ if (!DT.isReachableFromEntry(UseBlock))
+ continue;
+ if (PHINode *PN = dyn_cast<PHINode>(UseInst)) {
+ // PHI nodes use the operand in the predecessor block, not the block with
+ // the PHI.
+ unsigned Num = PHINode::getIncomingValueNumForOperand(U.getOperandNo());
+ UseBlock = PN->getIncomingBlock(Num);
+ }
+ if (SuccToSinkTo)
+ SuccToSinkTo = DT.findNearestCommonDominator(SuccToSinkTo, UseBlock);
+ else
+ SuccToSinkTo = UseBlock;
+ // The current basic block needs to dominate the candidate.
+ if (!DT.dominates(BB, SuccToSinkTo))
+ return false;
}
- if (SuccToSinkTo) {
- // The nearest common dominator may be in a parent loop of BB, which may not
- // be beneficial. Find an ancestor.
- while (SuccToSinkTo != BB &&
- !IsAcceptableTarget(Inst, SuccToSinkTo, DT, LI))
- SuccToSinkTo = DT.getNode(SuccToSinkTo)->getIDom()->getBlock();
- if (SuccToSinkTo == BB)
- SuccToSinkTo = nullptr;
+ if (SuccToSinkTo) {
+ // The nearest common dominator may be in a parent loop of BB, which may not
+ // be beneficial. Find an ancestor.
+ while (SuccToSinkTo != BB &&
+ !IsAcceptableTarget(Inst, SuccToSinkTo, DT, LI))
+ SuccToSinkTo = DT.getNode(SuccToSinkTo)->getIDom()->getBlock();
+ if (SuccToSinkTo == BB)
+ SuccToSinkTo = nullptr;
}
// If we couldn't find a block to sink to, ignore this instruction.
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp
index 9b18c945d9..b201837ea6 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp
@@ -756,10 +756,10 @@ static bool tryToSpeculatePHIs(SmallVectorImpl<PHINode *> &PNs,
// For each PHI node in this block, check whether there are immediate folding
// opportunities from speculation, and whether that speculation will be
// valid. This determise the set of safe PHIs to speculate.
- llvm::erase_if(PNs, [&](PHINode *PN) {
- return !isSafeAndProfitableToSpeculateAroundPHI(
- *PN, CostSavingsMap, PotentialSpecSet, UnsafeSet, DT, TTI);
- });
+ llvm::erase_if(PNs, [&](PHINode *PN) {
+ return !isSafeAndProfitableToSpeculateAroundPHI(
+ *PN, CostSavingsMap, PotentialSpecSet, UnsafeSet, DT, TTI);
+ });
// If no PHIs were profitable, skip.
if (PNs.empty()) {
LLVM_DEBUG(dbgs() << " No safe and profitable PHIs found!\n");
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/SpeculativeExecution.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/SpeculativeExecution.cpp
index c78185f2a6..4dbeb21638 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/SpeculativeExecution.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/SpeculativeExecution.cpp
@@ -245,13 +245,13 @@ static unsigned ComputeSpeculationCost(const Instruction *I,
case Instruction::FNeg:
case Instruction::ICmp:
case Instruction::FCmp:
- case Instruction::Trunc:
- case Instruction::Freeze:
- case Instruction::ExtractElement:
- case Instruction::InsertElement:
- case Instruction::ShuffleVector:
- case Instruction::ExtractValue:
- case Instruction::InsertValue:
+ case Instruction::Trunc:
+ case Instruction::Freeze:
+ case Instruction::ExtractElement:
+ case Instruction::InsertElement:
+ case Instruction::ShuffleVector:
+ case Instruction::ExtractValue:
+ case Instruction::InsertValue:
return TTI.getUserCost(I, TargetTransformInfo::TCK_SizeAndLatency);
default:
@@ -281,7 +281,7 @@ bool SpeculativeExecutionPass::considerHoistingFromTo(
for (const Value *V : U->operand_values()) {
if (const Instruction *I = dyn_cast<Instruction>(V)) {
- if (NotHoisted.contains(I))
+ if (NotHoisted.contains(I))
return false;
}
}
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
index 577992ccb5..f8177f1f99 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
@@ -55,7 +55,7 @@
// - When (i' - i) is constant but i and i' are not, we could still perform
// SLSR.
-#include "llvm/Transforms/Scalar/StraightLineStrengthReduce.h"
+#include "llvm/Transforms/Scalar/StraightLineStrengthReduce.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SmallVector.h"
@@ -96,39 +96,39 @@ static const unsigned UnknownAddressSpace =
namespace {
-class StraightLineStrengthReduceLegacyPass : public FunctionPass {
- const DataLayout *DL = nullptr;
-
-public:
- static char ID;
-
- StraightLineStrengthReduceLegacyPass() : FunctionPass(ID) {
- initializeStraightLineStrengthReduceLegacyPassPass(
- *PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<ScalarEvolutionWrapperPass>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
- // We do not modify the shape of the CFG.
- AU.setPreservesCFG();
- }
-
- bool doInitialization(Module &M) override {
- DL = &M.getDataLayout();
- return false;
- }
-
- bool runOnFunction(Function &F) override;
-};
-
-class StraightLineStrengthReduce {
+class StraightLineStrengthReduceLegacyPass : public FunctionPass {
+ const DataLayout *DL = nullptr;
+
public:
- StraightLineStrengthReduce(const DataLayout *DL, DominatorTree *DT,
- ScalarEvolution *SE, TargetTransformInfo *TTI)
- : DL(DL), DT(DT), SE(SE), TTI(TTI) {}
-
+ static char ID;
+
+ StraightLineStrengthReduceLegacyPass() : FunctionPass(ID) {
+ initializeStraightLineStrengthReduceLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ // We do not modify the shape of the CFG.
+ AU.setPreservesCFG();
+ }
+
+ bool doInitialization(Module &M) override {
+ DL = &M.getDataLayout();
+ return false;
+ }
+
+ bool runOnFunction(Function &F) override;
+};
+
+class StraightLineStrengthReduce {
+public:
+ StraightLineStrengthReduce(const DataLayout *DL, DominatorTree *DT,
+ ScalarEvolution *SE, TargetTransformInfo *TTI)
+ : DL(DL), DT(DT), SE(SE), TTI(TTI) {}
+
// SLSR candidate. Such a candidate must be in one of the forms described in
// the header comments.
struct Candidate {
@@ -176,7 +176,7 @@ public:
Candidate *Basis = nullptr;
};
- bool runOnFunction(Function &F);
+ bool runOnFunction(Function &F);
private:
// Returns true if Basis is a basis for C, i.e., Basis dominates C and they
@@ -256,18 +256,18 @@ private:
} // end anonymous namespace
-char StraightLineStrengthReduceLegacyPass::ID = 0;
+char StraightLineStrengthReduceLegacyPass::ID = 0;
-INITIALIZE_PASS_BEGIN(StraightLineStrengthReduceLegacyPass, "slsr",
+INITIALIZE_PASS_BEGIN(StraightLineStrengthReduceLegacyPass, "slsr",
"Straight line strength reduction", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_END(StraightLineStrengthReduceLegacyPass, "slsr",
+INITIALIZE_PASS_END(StraightLineStrengthReduceLegacyPass, "slsr",
"Straight line strength reduction", false, false)
FunctionPass *llvm::createStraightLineStrengthReducePass() {
- return new StraightLineStrengthReduceLegacyPass();
+ return new StraightLineStrengthReduceLegacyPass();
}
bool StraightLineStrengthReduce::isBasisFor(const Candidate &Basis,
@@ -285,7 +285,7 @@ bool StraightLineStrengthReduce::isBasisFor(const Candidate &Basis,
static bool isGEPFoldable(GetElementPtrInst *GEP,
const TargetTransformInfo *TTI) {
- SmallVector<const Value *, 4> Indices(GEP->indices());
+ SmallVector<const Value *, 4> Indices(GEP->indices());
return TTI->getGEPCost(GEP->getSourceElementType(), GEP->getPointerOperand(),
Indices) == TargetTransformInfo::TCC_Free;
}
@@ -715,17 +715,17 @@ void StraightLineStrengthReduce::rewriteCandidateWithBasis(
UnlinkedInstructions.push_back(C.Ins);
}
-bool StraightLineStrengthReduceLegacyPass::runOnFunction(Function &F) {
+bool StraightLineStrengthReduceLegacyPass::runOnFunction(Function &F) {
if (skipFunction(F))
return false;
- auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
- return StraightLineStrengthReduce(DL, DT, SE, TTI).runOnFunction(F);
-}
-
-bool StraightLineStrengthReduce::runOnFunction(Function &F) {
+ auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ return StraightLineStrengthReduce(DL, DT, SE, TTI).runOnFunction(F);
+}
+
+bool StraightLineStrengthReduce::runOnFunction(Function &F) {
// Traverse the dominator tree in the depth-first order. This order makes sure
// all bases of a candidate are in Candidates when we process it.
for (const auto Node : depth_first(DT))
@@ -755,25 +755,25 @@ bool StraightLineStrengthReduce::runOnFunction(Function &F) {
UnlinkedInstructions.clear();
return Ret;
}
-
-namespace llvm {
-
-PreservedAnalyses
-StraightLineStrengthReducePass::run(Function &F, FunctionAnalysisManager &AM) {
- const DataLayout *DL = &F.getParent()->getDataLayout();
- auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
- auto *SE = &AM.getResult<ScalarEvolutionAnalysis>(F);
- auto *TTI = &AM.getResult<TargetIRAnalysis>(F);
-
- if (!StraightLineStrengthReduce(DL, DT, SE, TTI).runOnFunction(F))
- return PreservedAnalyses::all();
-
- PreservedAnalyses PA;
- PA.preserveSet<CFGAnalyses>();
- PA.preserve<DominatorTreeAnalysis>();
- PA.preserve<ScalarEvolutionAnalysis>();
- PA.preserve<TargetIRAnalysis>();
- return PA;
-}
-
-} // namespace llvm
+
+namespace llvm {
+
+PreservedAnalyses
+StraightLineStrengthReducePass::run(Function &F, FunctionAnalysisManager &AM) {
+ const DataLayout *DL = &F.getParent()->getDataLayout();
+ auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
+ auto *SE = &AM.getResult<ScalarEvolutionAnalysis>(F);
+ auto *TTI = &AM.getResult<TargetIRAnalysis>(F);
+
+ if (!StraightLineStrengthReduce(DL, DT, SE, TTI).runOnFunction(F))
+ return PreservedAnalyses::all();
+
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ PA.preserve<DominatorTreeAnalysis>();
+ PA.preserve<ScalarEvolutionAnalysis>();
+ PA.preserve<TargetIRAnalysis>();
+ return PA;
+}
+
+} // namespace llvm
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/StructurizeCFG.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/StructurizeCFG.cpp
index 3e15cad5f3..ae83f06ead 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar/StructurizeCFG.h"
+#include "llvm/Transforms/Scalar/StructurizeCFG.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SCCIterator.h"
@@ -29,7 +29,7 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Metadata.h"
-#include "llvm/IR/PassManager.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
@@ -57,7 +57,7 @@ using namespace llvm::PatternMatch;
#define DEBUG_TYPE "structurizecfg"
// The name for newly created blocks.
-const char FlowBlockName[] = "Flow";
+const char FlowBlockName[] = "Flow";
namespace {
@@ -236,7 +236,7 @@ public:
/// consist of a network of PHI nodes where the true incoming values expresses
/// breaks and the false values expresses continue states.
-class StructurizeCFG {
+class StructurizeCFG {
Type *Boolean;
ConstantInt *BoolTrue;
ConstantInt *BoolFalse;
@@ -245,7 +245,7 @@ class StructurizeCFG {
Function *Func;
Region *ParentRegion;
- LegacyDivergenceAnalysis *DA = nullptr;
+ LegacyDivergenceAnalysis *DA = nullptr;
DominatorTree *DT;
SmallVector<RegionNode *, 8> Order;
@@ -310,35 +310,35 @@ class StructurizeCFG {
void rebuildSSA();
public:
- void init(Region *R);
- bool run(Region *R, DominatorTree *DT);
- bool makeUniformRegion(Region *R, LegacyDivergenceAnalysis *DA);
-};
-
-class StructurizeCFGLegacyPass : public RegionPass {
- bool SkipUniformRegions;
-
-public:
+ void init(Region *R);
+ bool run(Region *R, DominatorTree *DT);
+ bool makeUniformRegion(Region *R, LegacyDivergenceAnalysis *DA);
+};
+
+class StructurizeCFGLegacyPass : public RegionPass {
+ bool SkipUniformRegions;
+
+public:
static char ID;
- explicit StructurizeCFGLegacyPass(bool SkipUniformRegions_ = false)
- : RegionPass(ID), SkipUniformRegions(SkipUniformRegions_) {
+ explicit StructurizeCFGLegacyPass(bool SkipUniformRegions_ = false)
+ : RegionPass(ID), SkipUniformRegions(SkipUniformRegions_) {
if (ForceSkipUniformRegions.getNumOccurrences())
SkipUniformRegions = ForceSkipUniformRegions.getValue();
- initializeStructurizeCFGLegacyPassPass(*PassRegistry::getPassRegistry());
+ initializeStructurizeCFGLegacyPassPass(*PassRegistry::getPassRegistry());
}
- bool runOnRegion(Region *R, RGPassManager &RGM) override {
- StructurizeCFG SCFG;
- SCFG.init(R);
- if (SkipUniformRegions) {
- LegacyDivergenceAnalysis *DA = &getAnalysis<LegacyDivergenceAnalysis>();
- if (SCFG.makeUniformRegion(R, DA))
- return false;
- }
- DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- return SCFG.run(R, DT);
- }
+ bool runOnRegion(Region *R, RGPassManager &RGM) override {
+ StructurizeCFG SCFG;
+ SCFG.init(R);
+ if (SkipUniformRegions) {
+ LegacyDivergenceAnalysis *DA = &getAnalysis<LegacyDivergenceAnalysis>();
+ if (SCFG.makeUniformRegion(R, DA))
+ return false;
+ }
+ DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ return SCFG.run(R, DT);
+ }
StringRef getPassName() const override { return "Structurize control flow"; }
@@ -355,16 +355,16 @@ public:
} // end anonymous namespace
-char StructurizeCFGLegacyPass::ID = 0;
+char StructurizeCFGLegacyPass::ID = 0;
-INITIALIZE_PASS_BEGIN(StructurizeCFGLegacyPass, "structurizecfg",
- "Structurize the CFG", false, false)
+INITIALIZE_PASS_BEGIN(StructurizeCFGLegacyPass, "structurizecfg",
+ "Structurize the CFG", false, false)
INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
-INITIALIZE_PASS_DEPENDENCY(LowerSwitchLegacyPass)
+INITIALIZE_PASS_DEPENDENCY(LowerSwitchLegacyPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(RegionInfoPass)
-INITIALIZE_PASS_END(StructurizeCFGLegacyPass, "structurizecfg",
- "Structurize the CFG", false, false)
+INITIALIZE_PASS_END(StructurizeCFGLegacyPass, "structurizecfg",
+ "Structurize the CFG", false, false)
/// Build up the general order of nodes, by performing a topological sort of the
/// parent region's nodes, while ensuring that there is no outer cycle node
@@ -1008,59 +1008,59 @@ static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID,
return SubRegionsAreUniform || (ConditionalDirectChildren <= 1);
}
-void StructurizeCFG::init(Region *R) {
- LLVMContext &Context = R->getEntry()->getContext();
-
- Boolean = Type::getInt1Ty(Context);
- BoolTrue = ConstantInt::getTrue(Context);
- BoolFalse = ConstantInt::getFalse(Context);
- BoolUndef = UndefValue::get(Boolean);
-
- this->DA = nullptr;
-}
-
-bool StructurizeCFG::makeUniformRegion(Region *R,
- LegacyDivergenceAnalysis *DA) {
+void StructurizeCFG::init(Region *R) {
+ LLVMContext &Context = R->getEntry()->getContext();
+
+ Boolean = Type::getInt1Ty(Context);
+ BoolTrue = ConstantInt::getTrue(Context);
+ BoolFalse = ConstantInt::getFalse(Context);
+ BoolUndef = UndefValue::get(Boolean);
+
+ this->DA = nullptr;
+}
+
+bool StructurizeCFG::makeUniformRegion(Region *R,
+ LegacyDivergenceAnalysis *DA) {
if (R->isTopLevelRegion())
return false;
- this->DA = DA;
- // TODO: We could probably be smarter here with how we handle sub-regions.
- // We currently rely on the fact that metadata is set by earlier invocations
- // of the pass on sub-regions, and that this metadata doesn't get lost --
- // but we shouldn't rely on metadata for correctness!
- unsigned UniformMDKindID =
- R->getEntry()->getContext().getMDKindID("structurizecfg.uniform");
-
- if (hasOnlyUniformBranches(R, UniformMDKindID, *DA)) {
- LLVM_DEBUG(dbgs() << "Skipping region with uniform control flow: " << *R
- << '\n');
-
- // Mark all direct child block terminators as having been treated as
- // uniform. To account for a possible future in which non-uniform
- // sub-regions are treated more cleverly, indirect children are not
- // marked as uniform.
- MDNode *MD = MDNode::get(R->getEntry()->getParent()->getContext(), {});
- for (RegionNode *E : R->elements()) {
- if (E->isSubRegion())
- continue;
-
- if (Instruction *Term = E->getEntry()->getTerminator())
- Term->setMetadata(UniformMDKindID, MD);
- }
-
- return true;
+ this->DA = DA;
+ // TODO: We could probably be smarter here with how we handle sub-regions.
+ // We currently rely on the fact that metadata is set by earlier invocations
+ // of the pass on sub-regions, and that this metadata doesn't get lost --
+ // but we shouldn't rely on metadata for correctness!
+ unsigned UniformMDKindID =
+ R->getEntry()->getContext().getMDKindID("structurizecfg.uniform");
+
+ if (hasOnlyUniformBranches(R, UniformMDKindID, *DA)) {
+ LLVM_DEBUG(dbgs() << "Skipping region with uniform control flow: " << *R
+ << '\n');
+
+ // Mark all direct child block terminators as having been treated as
+ // uniform. To account for a possible future in which non-uniform
+ // sub-regions are treated more cleverly, indirect children are not
+ // marked as uniform.
+ MDNode *MD = MDNode::get(R->getEntry()->getParent()->getContext(), {});
+ for (RegionNode *E : R->elements()) {
+ if (E->isSubRegion())
+ continue;
+
+ if (Instruction *Term = E->getEntry()->getTerminator())
+ Term->setMetadata(UniformMDKindID, MD);
+ }
+
+ return true;
}
- return false;
-}
-
-/// Run the transformation for each region found
-bool StructurizeCFG::run(Region *R, DominatorTree *DT) {
- if (R->isTopLevelRegion())
- return false;
-
- this->DT = DT;
-
+ return false;
+}
+
+/// Run the transformation for each region found
+bool StructurizeCFG::run(Region *R, DominatorTree *DT) {
+ if (R->isTopLevelRegion())
+ return false;
+
+ this->DT = DT;
+
Func = R->getEntry()->getParent();
ParentRegion = R;
@@ -1088,33 +1088,33 @@ bool StructurizeCFG::run(Region *R, DominatorTree *DT) {
}
Pass *llvm::createStructurizeCFGPass(bool SkipUniformRegions) {
- return new StructurizeCFGLegacyPass(SkipUniformRegions);
-}
-
-static void addRegionIntoQueue(Region &R, std::vector<Region *> &Regions) {
- Regions.push_back(&R);
- for (const auto &E : R)
- addRegionIntoQueue(*E, Regions);
-}
-
-PreservedAnalyses StructurizeCFGPass::run(Function &F,
- FunctionAnalysisManager &AM) {
-
- bool Changed = false;
- DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F);
- auto &RI = AM.getResult<RegionInfoAnalysis>(F);
- std::vector<Region *> Regions;
- addRegionIntoQueue(*RI.getTopLevelRegion(), Regions);
- while (!Regions.empty()) {
- Region *R = Regions.back();
- StructurizeCFG SCFG;
- SCFG.init(R);
- Changed |= SCFG.run(R, DT);
- Regions.pop_back();
- }
- if (!Changed)
- return PreservedAnalyses::all();
- PreservedAnalyses PA;
- PA.preserve<DominatorTreeAnalysis>();
- return PA;
+ return new StructurizeCFGLegacyPass(SkipUniformRegions);
}
+
+static void addRegionIntoQueue(Region &R, std::vector<Region *> &Regions) {
+ Regions.push_back(&R);
+ for (const auto &E : R)
+ addRegionIntoQueue(*E, Regions);
+}
+
+PreservedAnalyses StructurizeCFGPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+
+ bool Changed = false;
+ DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F);
+ auto &RI = AM.getResult<RegionInfoAnalysis>(F);
+ std::vector<Region *> Regions;
+ addRegionIntoQueue(*RI.getTopLevelRegion(), Regions);
+ while (!Regions.empty()) {
+ Region *R = Regions.back();
+ StructurizeCFG SCFG;
+ SCFG.init(R);
+ Changed |= SCFG.run(R, DT);
+ Regions.pop_back();
+ }
+ if (!Changed)
+ return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserve<DominatorTreeAnalysis>();
+ return PA;
+}
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/TailRecursionElimination.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 9e7cccc884..50f7ac0a31 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -92,10 +92,10 @@ STATISTIC(NumAccumAdded, "Number of accumulators introduced");
/// Scan the specified function for alloca instructions.
/// If it contains any dynamic allocas, returns false.
static bool canTRE(Function &F) {
- // FIXME: The code generator produces really bad code when an 'escaping
- // alloca' is changed from being a static alloca to being a dynamic alloca.
- // Until this is resolved, disable this transformation if that would ever
- // happen. This bug is PR962.
+ // FIXME: The code generator produces really bad code when an 'escaping
+ // alloca' is changed from being a static alloca to being a dynamic alloca.
+ // Until this is resolved, disable this transformation if that would ever
+ // happen. This bug is PR962.
return llvm::all_of(instructions(F), [](Instruction &I) {
auto *AI = dyn_cast<AllocaInst>(&I);
return !AI || AI->isStaticAlloca();
@@ -240,11 +240,11 @@ static bool markTails(Function &F, bool &AllCallsAreTailCalls,
Escaped = ESCAPED;
CallInst *CI = dyn_cast<CallInst>(&I);
- // A PseudoProbeInst has the IntrInaccessibleMemOnly tag hence it is
- // considered accessing memory and will be marked as a tail call if we
- // don't bail out here.
- if (!CI || CI->isTailCall() || isa<DbgInfoIntrinsic>(&I) ||
- isa<PseudoProbeInst>(&I))
+ // A PseudoProbeInst has the IntrInaccessibleMemOnly tag hence it is
+ // considered accessing memory and will be marked as a tail call if we
+ // don't bail out here.
+ if (!CI || CI->isTailCall() || isa<DbgInfoIntrinsic>(&I) ||
+ isa<PseudoProbeInst>(&I))
continue;
bool IsNoTail = CI->isNoTailCall() || CI->hasOperandBundles();
@@ -286,7 +286,7 @@ static bool markTails(Function &F, bool &AllCallsAreTailCalls,
}
}
- for (auto *SuccBB : successors(BB)) {
+ for (auto *SuccBB : successors(BB)) {
auto &State = Visited[SuccBB];
if (State < Escaped) {
State = Escaped;
@@ -426,7 +426,7 @@ class TailRecursionEliminator {
DomTreeUpdater &DTU)
: F(F), TTI(TTI), AA(AA), ORE(ORE), DTU(DTU) {}
- CallInst *findTRECandidate(BasicBlock *BB,
+ CallInst *findTRECandidate(BasicBlock *BB,
bool CannotTailCallElimCallsMarkedTail);
void createTailRecurseLoopHeader(CallInst *CI);
@@ -435,9 +435,9 @@ class TailRecursionEliminator {
bool eliminateCall(CallInst *CI);
- void cleanupAndFinalize();
+ void cleanupAndFinalize();
- bool processBlock(BasicBlock &BB, bool CannotTailCallElimCallsMarkedTail);
+ bool processBlock(BasicBlock &BB, bool CannotTailCallElimCallsMarkedTail);
public:
static bool eliminate(Function &F, const TargetTransformInfo *TTI,
@@ -447,8 +447,8 @@ public:
} // namespace
CallInst *TailRecursionEliminator::findTRECandidate(
- BasicBlock *BB, bool CannotTailCallElimCallsMarkedTail) {
- Instruction *TI = BB->getTerminator();
+ BasicBlock *BB, bool CannotTailCallElimCallsMarkedTail) {
+ Instruction *TI = BB->getTerminator();
if (&BB->front() == TI) // Make sure there is something before the terminator.
return nullptr;
@@ -747,50 +747,50 @@ void TailRecursionEliminator::cleanupAndFinalize() {
}
}
-bool TailRecursionEliminator::processBlock(
- BasicBlock &BB, bool CannotTailCallElimCallsMarkedTail) {
- Instruction *TI = BB.getTerminator();
-
- if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
- if (BI->isConditional())
- return false;
-
- BasicBlock *Succ = BI->getSuccessor(0);
- ReturnInst *Ret = dyn_cast<ReturnInst>(Succ->getFirstNonPHIOrDbg(true));
-
- if (!Ret)
- return false;
-
- CallInst *CI = findTRECandidate(&BB, CannotTailCallElimCallsMarkedTail);
-
- if (!CI)
- return false;
-
- LLVM_DEBUG(dbgs() << "FOLDING: " << *Succ
- << "INTO UNCOND BRANCH PRED: " << BB);
- FoldReturnIntoUncondBranch(Ret, Succ, &BB, &DTU);
- ++NumRetDuped;
-
- // If all predecessors of Succ have been eliminated by
- // FoldReturnIntoUncondBranch, delete it. It is important to empty it,
- // because the ret instruction in there is still using a value which
- // eliminateCall will attempt to remove. This block can only contain
- // instructions that can't have uses, therefore it is safe to remove.
- if (pred_empty(Succ))
- DTU.deleteBB(Succ);
-
- eliminateCall(CI);
- return true;
- } else if (isa<ReturnInst>(TI)) {
- CallInst *CI = findTRECandidate(&BB, CannotTailCallElimCallsMarkedTail);
-
- if (CI)
- return eliminateCall(CI);
- }
-
- return false;
-}
-
+bool TailRecursionEliminator::processBlock(
+ BasicBlock &BB, bool CannotTailCallElimCallsMarkedTail) {
+ Instruction *TI = BB.getTerminator();
+
+ if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+ if (BI->isConditional())
+ return false;
+
+ BasicBlock *Succ = BI->getSuccessor(0);
+ ReturnInst *Ret = dyn_cast<ReturnInst>(Succ->getFirstNonPHIOrDbg(true));
+
+ if (!Ret)
+ return false;
+
+ CallInst *CI = findTRECandidate(&BB, CannotTailCallElimCallsMarkedTail);
+
+ if (!CI)
+ return false;
+
+ LLVM_DEBUG(dbgs() << "FOLDING: " << *Succ
+ << "INTO UNCOND BRANCH PRED: " << BB);
+ FoldReturnIntoUncondBranch(Ret, Succ, &BB, &DTU);
+ ++NumRetDuped;
+
+ // If all predecessors of Succ have been eliminated by
+ // FoldReturnIntoUncondBranch, delete it. It is important to empty it,
+ // because the ret instruction in there is still using a value which
+ // eliminateCall will attempt to remove. This block can only contain
+ // instructions that can't have uses, therefore it is safe to remove.
+ if (pred_empty(Succ))
+ DTU.deleteBB(Succ);
+
+ eliminateCall(CI);
+ return true;
+ } else if (isa<ReturnInst>(TI)) {
+ CallInst *CI = findTRECandidate(&BB, CannotTailCallElimCallsMarkedTail);
+
+ if (CI)
+ return eliminateCall(CI);
+ }
+
+ return false;
+}
+
bool TailRecursionEliminator::eliminate(Function &F,
const TargetTransformInfo *TTI,
AliasAnalysis *AA,
@@ -815,11 +815,11 @@ bool TailRecursionEliminator::eliminate(Function &F,
// TRE would deallocate variable sized allocas, TRE doesn't).
bool CanTRETailMarkedCall = canTRE(F);
- // Change any tail recursive calls to loops.
+ // Change any tail recursive calls to loops.
TailRecursionEliminator TRE(F, TTI, AA, ORE, DTU);
- for (BasicBlock &BB : F)
- MadeChange |= TRE.processBlock(BB, !CanTRETailMarkedCall);
+ for (BasicBlock &BB : F)
+ MadeChange |= TRE.processBlock(BB, !CanTRETailMarkedCall);
TRE.cleanupAndFinalize();
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/WarnMissedTransforms.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/WarnMissedTransforms.cpp
index 80a7d3a43a..ec00528465 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/WarnMissedTransforms.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/WarnMissedTransforms.cpp
@@ -48,12 +48,12 @@ static void warnAboutLeftoverTransformations(Loop *L,
if (hasVectorizeTransformation(L) == TM_ForcedByUser) {
LLVM_DEBUG(dbgs() << "Leftover vectorization transformation\n");
- Optional<ElementCount> VectorizeWidth =
- getOptionalElementCountLoopAttribute(L);
+ Optional<ElementCount> VectorizeWidth =
+ getOptionalElementCountLoopAttribute(L);
Optional<int> InterleaveCount =
getOptionalIntLoopAttribute(L, "llvm.loop.interleave.count");
- if (!VectorizeWidth || VectorizeWidth->isVector())
+ if (!VectorizeWidth || VectorizeWidth->isVector())
ORE->emit(
DiagnosticInfoOptimizationFailure(DEBUG_TYPE,
"FailedRequestedVectorization",
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/ya.make b/contrib/libs/llvm12/lib/Transforms/Scalar/ya.make
index 75501ae81a..00b9ef5ca1 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/ya.make
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/ya.make
@@ -12,14 +12,14 @@ LICENSE(Apache-2.0 WITH LLVM-exception)
LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
PEERDIR(
- contrib/libs/llvm12
- contrib/libs/llvm12/include
- contrib/libs/llvm12/lib/Analysis
- contrib/libs/llvm12/lib/IR
- contrib/libs/llvm12/lib/Support
- contrib/libs/llvm12/lib/Transforms/AggressiveInstCombine
- contrib/libs/llvm12/lib/Transforms/InstCombine
- contrib/libs/llvm12/lib/Transforms/Utils
+ contrib/libs/llvm12
+ contrib/libs/llvm12/include
+ contrib/libs/llvm12/lib/Analysis
+ contrib/libs/llvm12/lib/IR
+ contrib/libs/llvm12/lib/Support
+ contrib/libs/llvm12/lib/Transforms/AggressiveInstCombine
+ contrib/libs/llvm12/lib/Transforms/InstCombine
+ contrib/libs/llvm12/lib/Transforms/Utils
)
ADDINCL(
@@ -33,11 +33,11 @@ NO_UTIL()
SRCS(
ADCE.cpp
AlignmentFromAssumptions.cpp
- AnnotationRemarks.cpp
+ AnnotationRemarks.cpp
BDCE.cpp
CallSiteSplitting.cpp
ConstantHoisting.cpp
- ConstraintElimination.cpp
+ ConstraintElimination.cpp
CorrelatedValuePropagation.cpp
DCE.cpp
DeadStoreElimination.cpp
@@ -60,7 +60,7 @@ SRCS(
LoopDataPrefetch.cpp
LoopDeletion.cpp
LoopDistribute.cpp
- LoopFlatten.cpp
+ LoopFlatten.cpp
LoopFuse.cpp
LoopIdiomRecognize.cpp
LoopInstSimplify.cpp
@@ -97,7 +97,7 @@ SRCS(
SCCP.cpp
SROA.cpp
Scalar.cpp
- ScalarizeMaskedMemIntrin.cpp
+ ScalarizeMaskedMemIntrin.cpp
Scalarizer.cpp
SeparateConstOffsetFromGEP.cpp
SimpleLoopUnswitch.cpp