diff options
author | shadchin <shadchin@yandex-team.ru> | 2022-02-10 16:44:39 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:44:39 +0300 |
commit | e9656aae26e0358d5378e5b63dcac5c8dbe0e4d0 (patch) | |
tree | 64175d5cadab313b3e7039ebaa06c5bc3295e274 /contrib/libs/llvm12/lib/Transforms/Vectorize/VectorCombine.cpp | |
parent | 2598ef1d0aee359b4b6d5fdd1758916d5907d04f (diff) | |
download | ydb-e9656aae26e0358d5378e5b63dcac5c8dbe0e4d0.tar.gz |
Restoring authorship annotation for <shadchin@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/libs/llvm12/lib/Transforms/Vectorize/VectorCombine.cpp')
-rw-r--r-- | contrib/libs/llvm12/lib/Transforms/Vectorize/VectorCombine.cpp | 360 |
1 files changed, 180 insertions, 180 deletions
diff --git a/contrib/libs/llvm12/lib/Transforms/Vectorize/VectorCombine.cpp b/contrib/libs/llvm12/lib/Transforms/Vectorize/VectorCombine.cpp index 7b0a72de4e..787f146bdd 100644 --- a/contrib/libs/llvm12/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Vectorize/VectorCombine.cpp @@ -16,7 +16,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" -#include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/Loads.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" @@ -34,7 +34,7 @@ using namespace llvm; using namespace llvm::PatternMatch; #define DEBUG_TYPE "vector-combine" -STATISTIC(NumVecLoad, "Number of vector loads formed"); +STATISTIC(NumVecLoad, "Number of vector loads formed"); STATISTIC(NumVecCmp, "Number of vector compares formed"); STATISTIC(NumVecBO, "Number of vector binops formed"); STATISTIC(NumVecCmpBO, "Number of vector compare + binop formed"); @@ -67,7 +67,7 @@ private: const TargetTransformInfo &TTI; const DominatorTree &DT; - bool vectorizeLoadInsert(Instruction &I); + bool vectorizeLoadInsert(Instruction &I); ExtractElementInst *getShuffleExtract(ExtractElementInst *Ext0, ExtractElementInst *Ext1, unsigned PreferredExtractIndex) const; @@ -91,138 +91,138 @@ static void replaceValue(Value &Old, Value &New) { New.takeName(&Old); } -bool VectorCombine::vectorizeLoadInsert(Instruction &I) { - // Match insert into fixed vector of scalar value. - // TODO: Handle non-zero insert index. - auto *Ty = dyn_cast<FixedVectorType>(I.getType()); - Value *Scalar; - if (!Ty || !match(&I, m_InsertElt(m_Undef(), m_Value(Scalar), m_ZeroInt())) || - !Scalar->hasOneUse()) - return false; - - // Optionally match an extract from another vector. - Value *X; - bool HasExtract = match(Scalar, m_ExtractElt(m_Value(X), m_ZeroInt())); - if (!HasExtract) - X = Scalar; - - // Match source value as load of scalar or vector. - // Do not vectorize scalar load (widening) if atomic/volatile or under - // asan/hwasan/memtag/tsan. The widened load may load data from dirty regions - // or create data races non-existent in the source. - auto *Load = dyn_cast<LoadInst>(X); - if (!Load || !Load->isSimple() || !Load->hasOneUse() || - Load->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag) || - mustSuppressSpeculation(*Load)) - return false; - - const DataLayout &DL = I.getModule()->getDataLayout(); - Value *SrcPtr = Load->getPointerOperand()->stripPointerCasts(); - assert(isa<PointerType>(SrcPtr->getType()) && "Expected a pointer type"); - - // If original AS != Load's AS, we can't bitcast the original pointer and have - // to use Load's operand instead. Ideally we would want to strip pointer casts - // without changing AS, but there's no API to do that ATM. - unsigned AS = Load->getPointerAddressSpace(); - if (AS != SrcPtr->getType()->getPointerAddressSpace()) - SrcPtr = Load->getPointerOperand(); - - // We are potentially transforming byte-sized (8-bit) memory accesses, so make - // sure we have all of our type-based constraints in place for this target. - Type *ScalarTy = Scalar->getType(); - uint64_t ScalarSize = ScalarTy->getPrimitiveSizeInBits(); - unsigned MinVectorSize = TTI.getMinVectorRegisterBitWidth(); - if (!ScalarSize || !MinVectorSize || MinVectorSize % ScalarSize != 0 || - ScalarSize % 8 != 0) - return false; - - // Check safety of replacing the scalar load with a larger vector load. - // We use minimal alignment (maximum flexibility) because we only care about - // the dereferenceable region. When calculating cost and creating a new op, - // we may use a larger value based on alignment attributes. - unsigned MinVecNumElts = MinVectorSize / ScalarSize; - auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts, false); - unsigned OffsetEltIndex = 0; - Align Alignment = Load->getAlign(); - if (!isSafeToLoadUnconditionally(SrcPtr, MinVecTy, Align(1), DL, Load, &DT)) { - // It is not safe to load directly from the pointer, but we can still peek - // through gep offsets and check if it safe to load from a base address with - // updated alignment. If it is, we can shuffle the element(s) into place - // after loading. - unsigned OffsetBitWidth = DL.getIndexTypeSizeInBits(SrcPtr->getType()); - APInt Offset(OffsetBitWidth, 0); - SrcPtr = SrcPtr->stripAndAccumulateInBoundsConstantOffsets(DL, Offset); - - // We want to shuffle the result down from a high element of a vector, so - // the offset must be positive. - if (Offset.isNegative()) - return false; - - // The offset must be a multiple of the scalar element to shuffle cleanly - // in the element's size. - uint64_t ScalarSizeInBytes = ScalarSize / 8; - if (Offset.urem(ScalarSizeInBytes) != 0) - return false; - - // If we load MinVecNumElts, will our target element still be loaded? - OffsetEltIndex = Offset.udiv(ScalarSizeInBytes).getZExtValue(); - if (OffsetEltIndex >= MinVecNumElts) - return false; - - if (!isSafeToLoadUnconditionally(SrcPtr, MinVecTy, Align(1), DL, Load, &DT)) - return false; - - // Update alignment with offset value. Note that the offset could be negated - // to more accurately represent "(new) SrcPtr - Offset = (old) SrcPtr", but - // negation does not change the result of the alignment calculation. - Alignment = commonAlignment(Alignment, Offset.getZExtValue()); - } - - // Original pattern: insertelt undef, load [free casts of] PtrOp, 0 - // Use the greater of the alignment on the load or its source pointer. - Alignment = std::max(SrcPtr->getPointerAlignment(DL), Alignment); - Type *LoadTy = Load->getType(); - InstructionCost OldCost = - TTI.getMemoryOpCost(Instruction::Load, LoadTy, Alignment, AS); - APInt DemandedElts = APInt::getOneBitSet(MinVecNumElts, 0); - OldCost += TTI.getScalarizationOverhead(MinVecTy, DemandedElts, - /* Insert */ true, HasExtract); - - // New pattern: load VecPtr - InstructionCost NewCost = - TTI.getMemoryOpCost(Instruction::Load, MinVecTy, Alignment, AS); - // Optionally, we are shuffling the loaded vector element(s) into place. - if (OffsetEltIndex) - NewCost += TTI.getShuffleCost(TTI::SK_PermuteSingleSrc, MinVecTy); - - // We can aggressively convert to the vector form because the backend can - // invert this transform if it does not result in a performance win. - if (OldCost < NewCost || !NewCost.isValid()) - return false; - - // It is safe and potentially profitable to load a vector directly: - // inselt undef, load Scalar, 0 --> load VecPtr - IRBuilder<> Builder(Load); - Value *CastedPtr = Builder.CreateBitCast(SrcPtr, MinVecTy->getPointerTo(AS)); - Value *VecLd = Builder.CreateAlignedLoad(MinVecTy, CastedPtr, Alignment); - - // Set everything but element 0 to undef to prevent poison from propagating - // from the extra loaded memory. This will also optionally shrink/grow the - // vector from the loaded size to the output size. - // We assume this operation has no cost in codegen if there was no offset. - // Note that we could use freeze to avoid poison problems, but then we might - // still need a shuffle to change the vector size. - unsigned OutputNumElts = Ty->getNumElements(); - SmallVector<int, 16> Mask(OutputNumElts, UndefMaskElem); - assert(OffsetEltIndex < MinVecNumElts && "Address offset too big"); - Mask[0] = OffsetEltIndex; - VecLd = Builder.CreateShuffleVector(VecLd, Mask); - - replaceValue(I, *VecLd); - ++NumVecLoad; - return true; -} - +bool VectorCombine::vectorizeLoadInsert(Instruction &I) { + // Match insert into fixed vector of scalar value. + // TODO: Handle non-zero insert index. + auto *Ty = dyn_cast<FixedVectorType>(I.getType()); + Value *Scalar; + if (!Ty || !match(&I, m_InsertElt(m_Undef(), m_Value(Scalar), m_ZeroInt())) || + !Scalar->hasOneUse()) + return false; + + // Optionally match an extract from another vector. + Value *X; + bool HasExtract = match(Scalar, m_ExtractElt(m_Value(X), m_ZeroInt())); + if (!HasExtract) + X = Scalar; + + // Match source value as load of scalar or vector. + // Do not vectorize scalar load (widening) if atomic/volatile or under + // asan/hwasan/memtag/tsan. The widened load may load data from dirty regions + // or create data races non-existent in the source. + auto *Load = dyn_cast<LoadInst>(X); + if (!Load || !Load->isSimple() || !Load->hasOneUse() || + Load->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag) || + mustSuppressSpeculation(*Load)) + return false; + + const DataLayout &DL = I.getModule()->getDataLayout(); + Value *SrcPtr = Load->getPointerOperand()->stripPointerCasts(); + assert(isa<PointerType>(SrcPtr->getType()) && "Expected a pointer type"); + + // If original AS != Load's AS, we can't bitcast the original pointer and have + // to use Load's operand instead. Ideally we would want to strip pointer casts + // without changing AS, but there's no API to do that ATM. + unsigned AS = Load->getPointerAddressSpace(); + if (AS != SrcPtr->getType()->getPointerAddressSpace()) + SrcPtr = Load->getPointerOperand(); + + // We are potentially transforming byte-sized (8-bit) memory accesses, so make + // sure we have all of our type-based constraints in place for this target. + Type *ScalarTy = Scalar->getType(); + uint64_t ScalarSize = ScalarTy->getPrimitiveSizeInBits(); + unsigned MinVectorSize = TTI.getMinVectorRegisterBitWidth(); + if (!ScalarSize || !MinVectorSize || MinVectorSize % ScalarSize != 0 || + ScalarSize % 8 != 0) + return false; + + // Check safety of replacing the scalar load with a larger vector load. + // We use minimal alignment (maximum flexibility) because we only care about + // the dereferenceable region. When calculating cost and creating a new op, + // we may use a larger value based on alignment attributes. + unsigned MinVecNumElts = MinVectorSize / ScalarSize; + auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts, false); + unsigned OffsetEltIndex = 0; + Align Alignment = Load->getAlign(); + if (!isSafeToLoadUnconditionally(SrcPtr, MinVecTy, Align(1), DL, Load, &DT)) { + // It is not safe to load directly from the pointer, but we can still peek + // through gep offsets and check if it safe to load from a base address with + // updated alignment. If it is, we can shuffle the element(s) into place + // after loading. + unsigned OffsetBitWidth = DL.getIndexTypeSizeInBits(SrcPtr->getType()); + APInt Offset(OffsetBitWidth, 0); + SrcPtr = SrcPtr->stripAndAccumulateInBoundsConstantOffsets(DL, Offset); + + // We want to shuffle the result down from a high element of a vector, so + // the offset must be positive. + if (Offset.isNegative()) + return false; + + // The offset must be a multiple of the scalar element to shuffle cleanly + // in the element's size. + uint64_t ScalarSizeInBytes = ScalarSize / 8; + if (Offset.urem(ScalarSizeInBytes) != 0) + return false; + + // If we load MinVecNumElts, will our target element still be loaded? + OffsetEltIndex = Offset.udiv(ScalarSizeInBytes).getZExtValue(); + if (OffsetEltIndex >= MinVecNumElts) + return false; + + if (!isSafeToLoadUnconditionally(SrcPtr, MinVecTy, Align(1), DL, Load, &DT)) + return false; + + // Update alignment with offset value. Note that the offset could be negated + // to more accurately represent "(new) SrcPtr - Offset = (old) SrcPtr", but + // negation does not change the result of the alignment calculation. + Alignment = commonAlignment(Alignment, Offset.getZExtValue()); + } + + // Original pattern: insertelt undef, load [free casts of] PtrOp, 0 + // Use the greater of the alignment on the load or its source pointer. + Alignment = std::max(SrcPtr->getPointerAlignment(DL), Alignment); + Type *LoadTy = Load->getType(); + InstructionCost OldCost = + TTI.getMemoryOpCost(Instruction::Load, LoadTy, Alignment, AS); + APInt DemandedElts = APInt::getOneBitSet(MinVecNumElts, 0); + OldCost += TTI.getScalarizationOverhead(MinVecTy, DemandedElts, + /* Insert */ true, HasExtract); + + // New pattern: load VecPtr + InstructionCost NewCost = + TTI.getMemoryOpCost(Instruction::Load, MinVecTy, Alignment, AS); + // Optionally, we are shuffling the loaded vector element(s) into place. + if (OffsetEltIndex) + NewCost += TTI.getShuffleCost(TTI::SK_PermuteSingleSrc, MinVecTy); + + // We can aggressively convert to the vector form because the backend can + // invert this transform if it does not result in a performance win. + if (OldCost < NewCost || !NewCost.isValid()) + return false; + + // It is safe and potentially profitable to load a vector directly: + // inselt undef, load Scalar, 0 --> load VecPtr + IRBuilder<> Builder(Load); + Value *CastedPtr = Builder.CreateBitCast(SrcPtr, MinVecTy->getPointerTo(AS)); + Value *VecLd = Builder.CreateAlignedLoad(MinVecTy, CastedPtr, Alignment); + + // Set everything but element 0 to undef to prevent poison from propagating + // from the extra loaded memory. This will also optionally shrink/grow the + // vector from the loaded size to the output size. + // We assume this operation has no cost in codegen if there was no offset. + // Note that we could use freeze to avoid poison problems, but then we might + // still need a shuffle to change the vector size. + unsigned OutputNumElts = Ty->getNumElements(); + SmallVector<int, 16> Mask(OutputNumElts, UndefMaskElem); + assert(OffsetEltIndex < MinVecNumElts && "Address offset too big"); + Mask[0] = OffsetEltIndex; + VecLd = Builder.CreateShuffleVector(VecLd, Mask); + + replaceValue(I, *VecLd); + ++NumVecLoad; + return true; +} + /// Determine which, if any, of the inputs should be replaced by a shuffle /// followed by extract from a different index. ExtractElementInst *VectorCombine::getShuffleExtract( @@ -241,15 +241,15 @@ ExtractElementInst *VectorCombine::getShuffleExtract( Type *VecTy = Ext0->getVectorOperand()->getType(); assert(VecTy == Ext1->getVectorOperand()->getType() && "Need matching types"); - InstructionCost Cost0 = - TTI.getVectorInstrCost(Ext0->getOpcode(), VecTy, Index0); - InstructionCost Cost1 = - TTI.getVectorInstrCost(Ext1->getOpcode(), VecTy, Index1); - - // If both costs are invalid no shuffle is needed - if (!Cost0.isValid() && !Cost1.isValid()) - return nullptr; - + InstructionCost Cost0 = + TTI.getVectorInstrCost(Ext0->getOpcode(), VecTy, Index0); + InstructionCost Cost1 = + TTI.getVectorInstrCost(Ext1->getOpcode(), VecTy, Index1); + + // If both costs are invalid no shuffle is needed + if (!Cost0.isValid() && !Cost1.isValid()) + return nullptr; + // We are extracting from 2 different indexes, so one operand must be shuffled // before performing a vector operation and/or extract. The more expensive // extract will be replaced by a shuffle. @@ -284,7 +284,7 @@ bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0, "Expected constant extract indexes"); Type *ScalarTy = Ext0->getType(); auto *VecTy = cast<VectorType>(Ext0->getOperand(0)->getType()); - InstructionCost ScalarOpCost, VectorOpCost; + InstructionCost ScalarOpCost, VectorOpCost; // Get cost estimates for scalar and vector versions of the operation. bool IsBinOp = Instruction::isBinaryOp(Opcode); @@ -305,9 +305,9 @@ bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0, unsigned Ext0Index = cast<ConstantInt>(Ext0->getOperand(1))->getZExtValue(); unsigned Ext1Index = cast<ConstantInt>(Ext1->getOperand(1))->getZExtValue(); - InstructionCost Extract0Cost = + InstructionCost Extract0Cost = TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy, Ext0Index); - InstructionCost Extract1Cost = + InstructionCost Extract1Cost = TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy, Ext1Index); // A more expensive extract will always be replaced by a splat shuffle. @@ -317,11 +317,11 @@ bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0, // TODO: Evaluate whether that always results in lowest cost. Alternatively, // check the cost of creating a broadcast shuffle and shuffling both // operands to element 0. - InstructionCost CheapExtractCost = std::min(Extract0Cost, Extract1Cost); + InstructionCost CheapExtractCost = std::min(Extract0Cost, Extract1Cost); // Extra uses of the extracts mean that we include those costs in the // vector total because those instructions will not be eliminated. - InstructionCost OldCost, NewCost; + InstructionCost OldCost, NewCost; if (Ext0->getOperand(0) == Ext1->getOperand(0) && Ext0Index == Ext1Index) { // Handle a special case. If the 2 extracts are identical, adjust the // formulas to account for that. The extra use charge allows for either the @@ -372,7 +372,7 @@ static Value *createShiftShuffle(Value *Vec, unsigned OldIndex, auto *VecTy = cast<FixedVectorType>(Vec->getType()); SmallVector<int, 32> ShufMask(VecTy->getNumElements(), UndefMaskElem); ShufMask[NewIndex] = OldIndex; - return Builder.CreateShuffleVector(Vec, ShufMask, "shift"); + return Builder.CreateShuffleVector(Vec, ShufMask, "shift"); } /// Given an extract element instruction with constant index operand, shuffle @@ -506,23 +506,23 @@ bool VectorCombine::foldBitcastShuf(Instruction &I) { m_OneUse(m_Shuffle(m_Value(V), m_Undef(), m_Mask(Mask)))))) return false; - // 1) Do not fold bitcast shuffle for scalable type. First, shuffle cost for - // scalable type is unknown; Second, we cannot reason if the narrowed shuffle - // mask for scalable type is a splat or not. - // 2) Disallow non-vector casts and length-changing shuffles. + // 1) Do not fold bitcast shuffle for scalable type. First, shuffle cost for + // scalable type is unknown; Second, we cannot reason if the narrowed shuffle + // mask for scalable type is a splat or not. + // 2) Disallow non-vector casts and length-changing shuffles. // TODO: We could allow any shuffle. - auto *DestTy = dyn_cast<FixedVectorType>(I.getType()); - auto *SrcTy = dyn_cast<FixedVectorType>(V->getType()); - if (!SrcTy || !DestTy || I.getOperand(0)->getType() != SrcTy) + auto *DestTy = dyn_cast<FixedVectorType>(I.getType()); + auto *SrcTy = dyn_cast<FixedVectorType>(V->getType()); + if (!SrcTy || !DestTy || I.getOperand(0)->getType() != SrcTy) return false; // The new shuffle must not cost more than the old shuffle. The bitcast is // moved ahead of the shuffle, so assume that it has the same cost as before. - InstructionCost DestCost = - TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, DestTy); - InstructionCost SrcCost = - TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, SrcTy); - if (DestCost > SrcCost || !DestCost.isValid()) + InstructionCost DestCost = + TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, DestTy); + InstructionCost SrcCost = + TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, SrcTy); + if (DestCost > SrcCost || !DestCost.isValid()) return false; unsigned DestNumElts = DestTy->getNumElements(); @@ -545,7 +545,7 @@ bool VectorCombine::foldBitcastShuf(Instruction &I) { // bitcast (shuf V, MaskC) --> shuf (bitcast V), MaskC' ++NumShufOfBitcast; Value *CastV = Builder.CreateBitCast(V, DestTy); - Value *Shuf = Builder.CreateShuffleVector(CastV, NewMask); + Value *Shuf = Builder.CreateShuffleVector(CastV, NewMask); replaceValue(I, *Shuf); return true; } @@ -612,7 +612,7 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) { "Unexpected types for insert element into binop or cmp"); unsigned Opcode = I.getOpcode(); - InstructionCost ScalarOpCost, VectorOpCost; + InstructionCost ScalarOpCost, VectorOpCost; if (IsCmp) { ScalarOpCost = TTI.getCmpSelInstrCost(Opcode, ScalarTy); VectorOpCost = TTI.getCmpSelInstrCost(Opcode, VecTy); @@ -623,16 +623,16 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) { // Get cost estimate for the insert element. This cost will factor into // both sequences. - InstructionCost InsertCost = + InstructionCost InsertCost = TTI.getVectorInstrCost(Instruction::InsertElement, VecTy, Index); - InstructionCost OldCost = - (IsConst0 ? 0 : InsertCost) + (IsConst1 ? 0 : InsertCost) + VectorOpCost; - InstructionCost NewCost = ScalarOpCost + InsertCost + - (IsConst0 ? 0 : !Ins0->hasOneUse() * InsertCost) + - (IsConst1 ? 0 : !Ins1->hasOneUse() * InsertCost); + InstructionCost OldCost = + (IsConst0 ? 0 : InsertCost) + (IsConst1 ? 0 : InsertCost) + VectorOpCost; + InstructionCost NewCost = ScalarOpCost + InsertCost + + (IsConst0 ? 0 : !Ins0->hasOneUse() * InsertCost) + + (IsConst1 ? 0 : !Ins1->hasOneUse() * InsertCost); // We want to scalarize unless the vector variant actually has lower cost. - if (OldCost < NewCost || !NewCost.isValid()) + if (OldCost < NewCost || !NewCost.isValid()) return false; // vec_op (inselt VecC0, V0, Index), (inselt VecC1, V1, Index) --> @@ -712,8 +712,8 @@ bool VectorCombine::foldExtractedCmps(Instruction &I) { if (!VecTy) return false; - InstructionCost OldCost = - TTI.getVectorInstrCost(Ext0->getOpcode(), VecTy, Index0); + InstructionCost OldCost = + TTI.getVectorInstrCost(Ext0->getOpcode(), VecTy, Index0); OldCost += TTI.getVectorInstrCost(Ext1->getOpcode(), VecTy, Index1); OldCost += TTI.getCmpSelInstrCost(CmpOpcode, I0->getType()) * 2; OldCost += TTI.getArithmeticInstrCost(I.getOpcode(), I.getType()); @@ -724,7 +724,7 @@ bool VectorCombine::foldExtractedCmps(Instruction &I) { int CheapIndex = ConvertToShuf == Ext0 ? Index1 : Index0; int ExpensiveIndex = ConvertToShuf == Ext0 ? Index0 : Index1; auto *CmpTy = cast<FixedVectorType>(CmpInst::makeCmpResultType(X->getType())); - InstructionCost NewCost = TTI.getCmpSelInstrCost(CmpOpcode, X->getType()); + InstructionCost NewCost = TTI.getCmpSelInstrCost(CmpOpcode, X->getType()); NewCost += TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, CmpTy); NewCost += TTI.getArithmeticInstrCost(I.getOpcode(), CmpTy); @@ -733,7 +733,7 @@ bool VectorCombine::foldExtractedCmps(Instruction &I) { // Aggressively form vector ops if the cost is equal because the transform // may enable further optimization. // Codegen can reverse this transform (scalarize) if it was not profitable. - if (OldCost < NewCost || !NewCost.isValid()) + if (OldCost < NewCost || !NewCost.isValid()) return false; // Create a vector constant from the 2 scalar constants. @@ -758,10 +758,10 @@ bool VectorCombine::run() { if (DisableVectorCombine) return false; - // Don't attempt vectorization if the target does not support vectors. - if (!TTI.getNumberOfRegisters(TTI.getRegisterClassForType(/*Vector*/ true))) - return false; - + // Don't attempt vectorization if the target does not support vectors. + if (!TTI.getNumberOfRegisters(TTI.getRegisterClassForType(/*Vector*/ true))) + return false; + bool MadeChange = false; for (BasicBlock &BB : F) { // Ignore unreachable basic blocks. @@ -775,7 +775,7 @@ bool VectorCombine::run() { if (isa<DbgInfoIntrinsic>(I)) continue; Builder.SetInsertPoint(&I); - MadeChange |= vectorizeLoadInsert(I); + MadeChange |= vectorizeLoadInsert(I); MadeChange |= foldExtractExtract(I); MadeChange |= foldBitcastShuf(I); MadeChange |= scalarizeBinopOrCmp(I); |