aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/llvm12/lib/Transforms/Vectorize/VectorCombine.cpp
diff options
context:
space:
mode:
authorshadchin <shadchin@yandex-team.ru>2022-02-10 16:44:39 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:44:39 +0300
commite9656aae26e0358d5378e5b63dcac5c8dbe0e4d0 (patch)
tree64175d5cadab313b3e7039ebaa06c5bc3295e274 /contrib/libs/llvm12/lib/Transforms/Vectorize/VectorCombine.cpp
parent2598ef1d0aee359b4b6d5fdd1758916d5907d04f (diff)
downloadydb-e9656aae26e0358d5378e5b63dcac5c8dbe0e4d0.tar.gz
Restoring authorship annotation for <shadchin@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/libs/llvm12/lib/Transforms/Vectorize/VectorCombine.cpp')
-rw-r--r--contrib/libs/llvm12/lib/Transforms/Vectorize/VectorCombine.cpp360
1 files changed, 180 insertions, 180 deletions
diff --git a/contrib/libs/llvm12/lib/Transforms/Vectorize/VectorCombine.cpp b/contrib/libs/llvm12/lib/Transforms/Vectorize/VectorCombine.cpp
index 7b0a72de4e..787f146bdd 100644
--- a/contrib/libs/llvm12/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -16,7 +16,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/GlobalsModRef.h"
-#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
@@ -34,7 +34,7 @@ using namespace llvm;
using namespace llvm::PatternMatch;
#define DEBUG_TYPE "vector-combine"
-STATISTIC(NumVecLoad, "Number of vector loads formed");
+STATISTIC(NumVecLoad, "Number of vector loads formed");
STATISTIC(NumVecCmp, "Number of vector compares formed");
STATISTIC(NumVecBO, "Number of vector binops formed");
STATISTIC(NumVecCmpBO, "Number of vector compare + binop formed");
@@ -67,7 +67,7 @@ private:
const TargetTransformInfo &TTI;
const DominatorTree &DT;
- bool vectorizeLoadInsert(Instruction &I);
+ bool vectorizeLoadInsert(Instruction &I);
ExtractElementInst *getShuffleExtract(ExtractElementInst *Ext0,
ExtractElementInst *Ext1,
unsigned PreferredExtractIndex) const;
@@ -91,138 +91,138 @@ static void replaceValue(Value &Old, Value &New) {
New.takeName(&Old);
}
-bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
- // Match insert into fixed vector of scalar value.
- // TODO: Handle non-zero insert index.
- auto *Ty = dyn_cast<FixedVectorType>(I.getType());
- Value *Scalar;
- if (!Ty || !match(&I, m_InsertElt(m_Undef(), m_Value(Scalar), m_ZeroInt())) ||
- !Scalar->hasOneUse())
- return false;
-
- // Optionally match an extract from another vector.
- Value *X;
- bool HasExtract = match(Scalar, m_ExtractElt(m_Value(X), m_ZeroInt()));
- if (!HasExtract)
- X = Scalar;
-
- // Match source value as load of scalar or vector.
- // Do not vectorize scalar load (widening) if atomic/volatile or under
- // asan/hwasan/memtag/tsan. The widened load may load data from dirty regions
- // or create data races non-existent in the source.
- auto *Load = dyn_cast<LoadInst>(X);
- if (!Load || !Load->isSimple() || !Load->hasOneUse() ||
- Load->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag) ||
- mustSuppressSpeculation(*Load))
- return false;
-
- const DataLayout &DL = I.getModule()->getDataLayout();
- Value *SrcPtr = Load->getPointerOperand()->stripPointerCasts();
- assert(isa<PointerType>(SrcPtr->getType()) && "Expected a pointer type");
-
- // If original AS != Load's AS, we can't bitcast the original pointer and have
- // to use Load's operand instead. Ideally we would want to strip pointer casts
- // without changing AS, but there's no API to do that ATM.
- unsigned AS = Load->getPointerAddressSpace();
- if (AS != SrcPtr->getType()->getPointerAddressSpace())
- SrcPtr = Load->getPointerOperand();
-
- // We are potentially transforming byte-sized (8-bit) memory accesses, so make
- // sure we have all of our type-based constraints in place for this target.
- Type *ScalarTy = Scalar->getType();
- uint64_t ScalarSize = ScalarTy->getPrimitiveSizeInBits();
- unsigned MinVectorSize = TTI.getMinVectorRegisterBitWidth();
- if (!ScalarSize || !MinVectorSize || MinVectorSize % ScalarSize != 0 ||
- ScalarSize % 8 != 0)
- return false;
-
- // Check safety of replacing the scalar load with a larger vector load.
- // We use minimal alignment (maximum flexibility) because we only care about
- // the dereferenceable region. When calculating cost and creating a new op,
- // we may use a larger value based on alignment attributes.
- unsigned MinVecNumElts = MinVectorSize / ScalarSize;
- auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts, false);
- unsigned OffsetEltIndex = 0;
- Align Alignment = Load->getAlign();
- if (!isSafeToLoadUnconditionally(SrcPtr, MinVecTy, Align(1), DL, Load, &DT)) {
- // It is not safe to load directly from the pointer, but we can still peek
- // through gep offsets and check if it safe to load from a base address with
- // updated alignment. If it is, we can shuffle the element(s) into place
- // after loading.
- unsigned OffsetBitWidth = DL.getIndexTypeSizeInBits(SrcPtr->getType());
- APInt Offset(OffsetBitWidth, 0);
- SrcPtr = SrcPtr->stripAndAccumulateInBoundsConstantOffsets(DL, Offset);
-
- // We want to shuffle the result down from a high element of a vector, so
- // the offset must be positive.
- if (Offset.isNegative())
- return false;
-
- // The offset must be a multiple of the scalar element to shuffle cleanly
- // in the element's size.
- uint64_t ScalarSizeInBytes = ScalarSize / 8;
- if (Offset.urem(ScalarSizeInBytes) != 0)
- return false;
-
- // If we load MinVecNumElts, will our target element still be loaded?
- OffsetEltIndex = Offset.udiv(ScalarSizeInBytes).getZExtValue();
- if (OffsetEltIndex >= MinVecNumElts)
- return false;
-
- if (!isSafeToLoadUnconditionally(SrcPtr, MinVecTy, Align(1), DL, Load, &DT))
- return false;
-
- // Update alignment with offset value. Note that the offset could be negated
- // to more accurately represent "(new) SrcPtr - Offset = (old) SrcPtr", but
- // negation does not change the result of the alignment calculation.
- Alignment = commonAlignment(Alignment, Offset.getZExtValue());
- }
-
- // Original pattern: insertelt undef, load [free casts of] PtrOp, 0
- // Use the greater of the alignment on the load or its source pointer.
- Alignment = std::max(SrcPtr->getPointerAlignment(DL), Alignment);
- Type *LoadTy = Load->getType();
- InstructionCost OldCost =
- TTI.getMemoryOpCost(Instruction::Load, LoadTy, Alignment, AS);
- APInt DemandedElts = APInt::getOneBitSet(MinVecNumElts, 0);
- OldCost += TTI.getScalarizationOverhead(MinVecTy, DemandedElts,
- /* Insert */ true, HasExtract);
-
- // New pattern: load VecPtr
- InstructionCost NewCost =
- TTI.getMemoryOpCost(Instruction::Load, MinVecTy, Alignment, AS);
- // Optionally, we are shuffling the loaded vector element(s) into place.
- if (OffsetEltIndex)
- NewCost += TTI.getShuffleCost(TTI::SK_PermuteSingleSrc, MinVecTy);
-
- // We can aggressively convert to the vector form because the backend can
- // invert this transform if it does not result in a performance win.
- if (OldCost < NewCost || !NewCost.isValid())
- return false;
-
- // It is safe and potentially profitable to load a vector directly:
- // inselt undef, load Scalar, 0 --> load VecPtr
- IRBuilder<> Builder(Load);
- Value *CastedPtr = Builder.CreateBitCast(SrcPtr, MinVecTy->getPointerTo(AS));
- Value *VecLd = Builder.CreateAlignedLoad(MinVecTy, CastedPtr, Alignment);
-
- // Set everything but element 0 to undef to prevent poison from propagating
- // from the extra loaded memory. This will also optionally shrink/grow the
- // vector from the loaded size to the output size.
- // We assume this operation has no cost in codegen if there was no offset.
- // Note that we could use freeze to avoid poison problems, but then we might
- // still need a shuffle to change the vector size.
- unsigned OutputNumElts = Ty->getNumElements();
- SmallVector<int, 16> Mask(OutputNumElts, UndefMaskElem);
- assert(OffsetEltIndex < MinVecNumElts && "Address offset too big");
- Mask[0] = OffsetEltIndex;
- VecLd = Builder.CreateShuffleVector(VecLd, Mask);
-
- replaceValue(I, *VecLd);
- ++NumVecLoad;
- return true;
-}
-
+bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
+ // Match insert into fixed vector of scalar value.
+ // TODO: Handle non-zero insert index.
+ auto *Ty = dyn_cast<FixedVectorType>(I.getType());
+ Value *Scalar;
+ if (!Ty || !match(&I, m_InsertElt(m_Undef(), m_Value(Scalar), m_ZeroInt())) ||
+ !Scalar->hasOneUse())
+ return false;
+
+ // Optionally match an extract from another vector.
+ Value *X;
+ bool HasExtract = match(Scalar, m_ExtractElt(m_Value(X), m_ZeroInt()));
+ if (!HasExtract)
+ X = Scalar;
+
+ // Match source value as load of scalar or vector.
+ // Do not vectorize scalar load (widening) if atomic/volatile or under
+ // asan/hwasan/memtag/tsan. The widened load may load data from dirty regions
+ // or create data races non-existent in the source.
+ auto *Load = dyn_cast<LoadInst>(X);
+ if (!Load || !Load->isSimple() || !Load->hasOneUse() ||
+ Load->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag) ||
+ mustSuppressSpeculation(*Load))
+ return false;
+
+ const DataLayout &DL = I.getModule()->getDataLayout();
+ Value *SrcPtr = Load->getPointerOperand()->stripPointerCasts();
+ assert(isa<PointerType>(SrcPtr->getType()) && "Expected a pointer type");
+
+ // If original AS != Load's AS, we can't bitcast the original pointer and have
+ // to use Load's operand instead. Ideally we would want to strip pointer casts
+ // without changing AS, but there's no API to do that ATM.
+ unsigned AS = Load->getPointerAddressSpace();
+ if (AS != SrcPtr->getType()->getPointerAddressSpace())
+ SrcPtr = Load->getPointerOperand();
+
+ // We are potentially transforming byte-sized (8-bit) memory accesses, so make
+ // sure we have all of our type-based constraints in place for this target.
+ Type *ScalarTy = Scalar->getType();
+ uint64_t ScalarSize = ScalarTy->getPrimitiveSizeInBits();
+ unsigned MinVectorSize = TTI.getMinVectorRegisterBitWidth();
+ if (!ScalarSize || !MinVectorSize || MinVectorSize % ScalarSize != 0 ||
+ ScalarSize % 8 != 0)
+ return false;
+
+ // Check safety of replacing the scalar load with a larger vector load.
+ // We use minimal alignment (maximum flexibility) because we only care about
+ // the dereferenceable region. When calculating cost and creating a new op,
+ // we may use a larger value based on alignment attributes.
+ unsigned MinVecNumElts = MinVectorSize / ScalarSize;
+ auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts, false);
+ unsigned OffsetEltIndex = 0;
+ Align Alignment = Load->getAlign();
+ if (!isSafeToLoadUnconditionally(SrcPtr, MinVecTy, Align(1), DL, Load, &DT)) {
+ // It is not safe to load directly from the pointer, but we can still peek
+ // through gep offsets and check if it safe to load from a base address with
+ // updated alignment. If it is, we can shuffle the element(s) into place
+ // after loading.
+ unsigned OffsetBitWidth = DL.getIndexTypeSizeInBits(SrcPtr->getType());
+ APInt Offset(OffsetBitWidth, 0);
+ SrcPtr = SrcPtr->stripAndAccumulateInBoundsConstantOffsets(DL, Offset);
+
+ // We want to shuffle the result down from a high element of a vector, so
+ // the offset must be positive.
+ if (Offset.isNegative())
+ return false;
+
+ // The offset must be a multiple of the scalar element to shuffle cleanly
+ // in the element's size.
+ uint64_t ScalarSizeInBytes = ScalarSize / 8;
+ if (Offset.urem(ScalarSizeInBytes) != 0)
+ return false;
+
+ // If we load MinVecNumElts, will our target element still be loaded?
+ OffsetEltIndex = Offset.udiv(ScalarSizeInBytes).getZExtValue();
+ if (OffsetEltIndex >= MinVecNumElts)
+ return false;
+
+ if (!isSafeToLoadUnconditionally(SrcPtr, MinVecTy, Align(1), DL, Load, &DT))
+ return false;
+
+ // Update alignment with offset value. Note that the offset could be negated
+ // to more accurately represent "(new) SrcPtr - Offset = (old) SrcPtr", but
+ // negation does not change the result of the alignment calculation.
+ Alignment = commonAlignment(Alignment, Offset.getZExtValue());
+ }
+
+ // Original pattern: insertelt undef, load [free casts of] PtrOp, 0
+ // Use the greater of the alignment on the load or its source pointer.
+ Alignment = std::max(SrcPtr->getPointerAlignment(DL), Alignment);
+ Type *LoadTy = Load->getType();
+ InstructionCost OldCost =
+ TTI.getMemoryOpCost(Instruction::Load, LoadTy, Alignment, AS);
+ APInt DemandedElts = APInt::getOneBitSet(MinVecNumElts, 0);
+ OldCost += TTI.getScalarizationOverhead(MinVecTy, DemandedElts,
+ /* Insert */ true, HasExtract);
+
+ // New pattern: load VecPtr
+ InstructionCost NewCost =
+ TTI.getMemoryOpCost(Instruction::Load, MinVecTy, Alignment, AS);
+ // Optionally, we are shuffling the loaded vector element(s) into place.
+ if (OffsetEltIndex)
+ NewCost += TTI.getShuffleCost(TTI::SK_PermuteSingleSrc, MinVecTy);
+
+ // We can aggressively convert to the vector form because the backend can
+ // invert this transform if it does not result in a performance win.
+ if (OldCost < NewCost || !NewCost.isValid())
+ return false;
+
+ // It is safe and potentially profitable to load a vector directly:
+ // inselt undef, load Scalar, 0 --> load VecPtr
+ IRBuilder<> Builder(Load);
+ Value *CastedPtr = Builder.CreateBitCast(SrcPtr, MinVecTy->getPointerTo(AS));
+ Value *VecLd = Builder.CreateAlignedLoad(MinVecTy, CastedPtr, Alignment);
+
+ // Set everything but element 0 to undef to prevent poison from propagating
+ // from the extra loaded memory. This will also optionally shrink/grow the
+ // vector from the loaded size to the output size.
+ // We assume this operation has no cost in codegen if there was no offset.
+ // Note that we could use freeze to avoid poison problems, but then we might
+ // still need a shuffle to change the vector size.
+ unsigned OutputNumElts = Ty->getNumElements();
+ SmallVector<int, 16> Mask(OutputNumElts, UndefMaskElem);
+ assert(OffsetEltIndex < MinVecNumElts && "Address offset too big");
+ Mask[0] = OffsetEltIndex;
+ VecLd = Builder.CreateShuffleVector(VecLd, Mask);
+
+ replaceValue(I, *VecLd);
+ ++NumVecLoad;
+ return true;
+}
+
/// Determine which, if any, of the inputs should be replaced by a shuffle
/// followed by extract from a different index.
ExtractElementInst *VectorCombine::getShuffleExtract(
@@ -241,15 +241,15 @@ ExtractElementInst *VectorCombine::getShuffleExtract(
Type *VecTy = Ext0->getVectorOperand()->getType();
assert(VecTy == Ext1->getVectorOperand()->getType() && "Need matching types");
- InstructionCost Cost0 =
- TTI.getVectorInstrCost(Ext0->getOpcode(), VecTy, Index0);
- InstructionCost Cost1 =
- TTI.getVectorInstrCost(Ext1->getOpcode(), VecTy, Index1);
-
- // If both costs are invalid no shuffle is needed
- if (!Cost0.isValid() && !Cost1.isValid())
- return nullptr;
-
+ InstructionCost Cost0 =
+ TTI.getVectorInstrCost(Ext0->getOpcode(), VecTy, Index0);
+ InstructionCost Cost1 =
+ TTI.getVectorInstrCost(Ext1->getOpcode(), VecTy, Index1);
+
+ // If both costs are invalid no shuffle is needed
+ if (!Cost0.isValid() && !Cost1.isValid())
+ return nullptr;
+
// We are extracting from 2 different indexes, so one operand must be shuffled
// before performing a vector operation and/or extract. The more expensive
// extract will be replaced by a shuffle.
@@ -284,7 +284,7 @@ bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
"Expected constant extract indexes");
Type *ScalarTy = Ext0->getType();
auto *VecTy = cast<VectorType>(Ext0->getOperand(0)->getType());
- InstructionCost ScalarOpCost, VectorOpCost;
+ InstructionCost ScalarOpCost, VectorOpCost;
// Get cost estimates for scalar and vector versions of the operation.
bool IsBinOp = Instruction::isBinaryOp(Opcode);
@@ -305,9 +305,9 @@ bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
unsigned Ext0Index = cast<ConstantInt>(Ext0->getOperand(1))->getZExtValue();
unsigned Ext1Index = cast<ConstantInt>(Ext1->getOperand(1))->getZExtValue();
- InstructionCost Extract0Cost =
+ InstructionCost Extract0Cost =
TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy, Ext0Index);
- InstructionCost Extract1Cost =
+ InstructionCost Extract1Cost =
TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy, Ext1Index);
// A more expensive extract will always be replaced by a splat shuffle.
@@ -317,11 +317,11 @@ bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
// TODO: Evaluate whether that always results in lowest cost. Alternatively,
// check the cost of creating a broadcast shuffle and shuffling both
// operands to element 0.
- InstructionCost CheapExtractCost = std::min(Extract0Cost, Extract1Cost);
+ InstructionCost CheapExtractCost = std::min(Extract0Cost, Extract1Cost);
// Extra uses of the extracts mean that we include those costs in the
// vector total because those instructions will not be eliminated.
- InstructionCost OldCost, NewCost;
+ InstructionCost OldCost, NewCost;
if (Ext0->getOperand(0) == Ext1->getOperand(0) && Ext0Index == Ext1Index) {
// Handle a special case. If the 2 extracts are identical, adjust the
// formulas to account for that. The extra use charge allows for either the
@@ -372,7 +372,7 @@ static Value *createShiftShuffle(Value *Vec, unsigned OldIndex,
auto *VecTy = cast<FixedVectorType>(Vec->getType());
SmallVector<int, 32> ShufMask(VecTy->getNumElements(), UndefMaskElem);
ShufMask[NewIndex] = OldIndex;
- return Builder.CreateShuffleVector(Vec, ShufMask, "shift");
+ return Builder.CreateShuffleVector(Vec, ShufMask, "shift");
}
/// Given an extract element instruction with constant index operand, shuffle
@@ -506,23 +506,23 @@ bool VectorCombine::foldBitcastShuf(Instruction &I) {
m_OneUse(m_Shuffle(m_Value(V), m_Undef(), m_Mask(Mask))))))
return false;
- // 1) Do not fold bitcast shuffle for scalable type. First, shuffle cost for
- // scalable type is unknown; Second, we cannot reason if the narrowed shuffle
- // mask for scalable type is a splat or not.
- // 2) Disallow non-vector casts and length-changing shuffles.
+ // 1) Do not fold bitcast shuffle for scalable type. First, shuffle cost for
+ // scalable type is unknown; Second, we cannot reason if the narrowed shuffle
+ // mask for scalable type is a splat or not.
+ // 2) Disallow non-vector casts and length-changing shuffles.
// TODO: We could allow any shuffle.
- auto *DestTy = dyn_cast<FixedVectorType>(I.getType());
- auto *SrcTy = dyn_cast<FixedVectorType>(V->getType());
- if (!SrcTy || !DestTy || I.getOperand(0)->getType() != SrcTy)
+ auto *DestTy = dyn_cast<FixedVectorType>(I.getType());
+ auto *SrcTy = dyn_cast<FixedVectorType>(V->getType());
+ if (!SrcTy || !DestTy || I.getOperand(0)->getType() != SrcTy)
return false;
// The new shuffle must not cost more than the old shuffle. The bitcast is
// moved ahead of the shuffle, so assume that it has the same cost as before.
- InstructionCost DestCost =
- TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, DestTy);
- InstructionCost SrcCost =
- TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, SrcTy);
- if (DestCost > SrcCost || !DestCost.isValid())
+ InstructionCost DestCost =
+ TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, DestTy);
+ InstructionCost SrcCost =
+ TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, SrcTy);
+ if (DestCost > SrcCost || !DestCost.isValid())
return false;
unsigned DestNumElts = DestTy->getNumElements();
@@ -545,7 +545,7 @@ bool VectorCombine::foldBitcastShuf(Instruction &I) {
// bitcast (shuf V, MaskC) --> shuf (bitcast V), MaskC'
++NumShufOfBitcast;
Value *CastV = Builder.CreateBitCast(V, DestTy);
- Value *Shuf = Builder.CreateShuffleVector(CastV, NewMask);
+ Value *Shuf = Builder.CreateShuffleVector(CastV, NewMask);
replaceValue(I, *Shuf);
return true;
}
@@ -612,7 +612,7 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
"Unexpected types for insert element into binop or cmp");
unsigned Opcode = I.getOpcode();
- InstructionCost ScalarOpCost, VectorOpCost;
+ InstructionCost ScalarOpCost, VectorOpCost;
if (IsCmp) {
ScalarOpCost = TTI.getCmpSelInstrCost(Opcode, ScalarTy);
VectorOpCost = TTI.getCmpSelInstrCost(Opcode, VecTy);
@@ -623,16 +623,16 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
// Get cost estimate for the insert element. This cost will factor into
// both sequences.
- InstructionCost InsertCost =
+ InstructionCost InsertCost =
TTI.getVectorInstrCost(Instruction::InsertElement, VecTy, Index);
- InstructionCost OldCost =
- (IsConst0 ? 0 : InsertCost) + (IsConst1 ? 0 : InsertCost) + VectorOpCost;
- InstructionCost NewCost = ScalarOpCost + InsertCost +
- (IsConst0 ? 0 : !Ins0->hasOneUse() * InsertCost) +
- (IsConst1 ? 0 : !Ins1->hasOneUse() * InsertCost);
+ InstructionCost OldCost =
+ (IsConst0 ? 0 : InsertCost) + (IsConst1 ? 0 : InsertCost) + VectorOpCost;
+ InstructionCost NewCost = ScalarOpCost + InsertCost +
+ (IsConst0 ? 0 : !Ins0->hasOneUse() * InsertCost) +
+ (IsConst1 ? 0 : !Ins1->hasOneUse() * InsertCost);
// We want to scalarize unless the vector variant actually has lower cost.
- if (OldCost < NewCost || !NewCost.isValid())
+ if (OldCost < NewCost || !NewCost.isValid())
return false;
// vec_op (inselt VecC0, V0, Index), (inselt VecC1, V1, Index) -->
@@ -712,8 +712,8 @@ bool VectorCombine::foldExtractedCmps(Instruction &I) {
if (!VecTy)
return false;
- InstructionCost OldCost =
- TTI.getVectorInstrCost(Ext0->getOpcode(), VecTy, Index0);
+ InstructionCost OldCost =
+ TTI.getVectorInstrCost(Ext0->getOpcode(), VecTy, Index0);
OldCost += TTI.getVectorInstrCost(Ext1->getOpcode(), VecTy, Index1);
OldCost += TTI.getCmpSelInstrCost(CmpOpcode, I0->getType()) * 2;
OldCost += TTI.getArithmeticInstrCost(I.getOpcode(), I.getType());
@@ -724,7 +724,7 @@ bool VectorCombine::foldExtractedCmps(Instruction &I) {
int CheapIndex = ConvertToShuf == Ext0 ? Index1 : Index0;
int ExpensiveIndex = ConvertToShuf == Ext0 ? Index0 : Index1;
auto *CmpTy = cast<FixedVectorType>(CmpInst::makeCmpResultType(X->getType()));
- InstructionCost NewCost = TTI.getCmpSelInstrCost(CmpOpcode, X->getType());
+ InstructionCost NewCost = TTI.getCmpSelInstrCost(CmpOpcode, X->getType());
NewCost +=
TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, CmpTy);
NewCost += TTI.getArithmeticInstrCost(I.getOpcode(), CmpTy);
@@ -733,7 +733,7 @@ bool VectorCombine::foldExtractedCmps(Instruction &I) {
// Aggressively form vector ops if the cost is equal because the transform
// may enable further optimization.
// Codegen can reverse this transform (scalarize) if it was not profitable.
- if (OldCost < NewCost || !NewCost.isValid())
+ if (OldCost < NewCost || !NewCost.isValid())
return false;
// Create a vector constant from the 2 scalar constants.
@@ -758,10 +758,10 @@ bool VectorCombine::run() {
if (DisableVectorCombine)
return false;
- // Don't attempt vectorization if the target does not support vectors.
- if (!TTI.getNumberOfRegisters(TTI.getRegisterClassForType(/*Vector*/ true)))
- return false;
-
+ // Don't attempt vectorization if the target does not support vectors.
+ if (!TTI.getNumberOfRegisters(TTI.getRegisterClassForType(/*Vector*/ true)))
+ return false;
+
bool MadeChange = false;
for (BasicBlock &BB : F) {
// Ignore unreachable basic blocks.
@@ -775,7 +775,7 @@ bool VectorCombine::run() {
if (isa<DbgInfoIntrinsic>(I))
continue;
Builder.SetInsertPoint(&I);
- MadeChange |= vectorizeLoadInsert(I);
+ MadeChange |= vectorizeLoadInsert(I);
MadeChange |= foldExtractExtract(I);
MadeChange |= foldBitcastShuf(I);
MadeChange |= scalarizeBinopOrCmp(I);