Restoring authorship annotation for <shadchin@yandex-team.ru>. Commit 2 of 2.

author: shadchin <shadchin@yandex-team.ru> 2022-02-10 16:44:39 +0300
committer: Daniil Cherednik <dcherednik@yandex-team.ru> 2022-02-10 16:44:39 +0300
commit: e9656aae26e0358d5378e5b63dcac5c8dbe0e4d0 (patch)
tree: 64175d5cadab313b3e7039ebaa06c5bc3295e274 /contrib/libs/llvm12/lib/Transforms/Vectorize/VectorCombine.cpp
parent: 2598ef1d0aee359b4b6d5fdd1758916d5907d04f (diff)
download: ydb-e9656aae26e0358d5378e5b63dcac5c8dbe0e4d0.tar.gz
1 files changed, 180 insertions, 180 deletions
diff --git a/contrib/libs/llvm12/lib/Transforms/Vectorize/VectorCombine.cpp b/contrib/libs/llvm12/lib/Transforms/Vectorize/VectorCombine.cpp
index 7b0a72de4e..787f146bdd 100644
--- a/contrib/libs/llvm12/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -16,7 +16,7 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/BasicAliasAnalysis.h"
 #include "llvm/Analysis/GlobalsModRef.h"
-#include "llvm/Analysis/Loads.h" 
+#include "llvm/Analysis/Loads.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Analysis/VectorUtils.h"
@@ -34,7 +34,7 @@ using namespace llvm;
 using namespace llvm::PatternMatch;
 
 #define DEBUG_TYPE "vector-combine"
-STATISTIC(NumVecLoad, "Number of vector loads formed"); 
+STATISTIC(NumVecLoad, "Number of vector loads formed");
 STATISTIC(NumVecCmp, "Number of vector compares formed");
 STATISTIC(NumVecBO, "Number of vector binops formed");
 STATISTIC(NumVecCmpBO, "Number of vector compare + binop formed");
@@ -67,7 +67,7 @@ private:
   const TargetTransformInfo &TTI;
   const DominatorTree &DT;
 
-  bool vectorizeLoadInsert(Instruction &I); 
+  bool vectorizeLoadInsert(Instruction &I);
   ExtractElementInst *getShuffleExtract(ExtractElementInst *Ext0,
                                         ExtractElementInst *Ext1,
                                         unsigned PreferredExtractIndex) const;
@@ -91,138 +91,138 @@ static void replaceValue(Value &Old, Value &New) {
   New.takeName(&Old);
 }
 
-bool VectorCombine::vectorizeLoadInsert(Instruction &I) { 
-  // Match insert into fixed vector of scalar value. 
-  // TODO: Handle non-zero insert index. 
-  auto *Ty = dyn_cast<FixedVectorType>(I.getType()); 
-  Value *Scalar; 
-  if (!Ty || !match(&I, m_InsertElt(m_Undef(), m_Value(Scalar), m_ZeroInt())) || 
-      !Scalar->hasOneUse()) 
-    return false; 
- 
-  // Optionally match an extract from another vector. 
-  Value *X; 
-  bool HasExtract = match(Scalar, m_ExtractElt(m_Value(X), m_ZeroInt())); 
-  if (!HasExtract) 
-    X = Scalar; 
- 
-  // Match source value as load of scalar or vector. 
-  // Do not vectorize scalar load (widening) if atomic/volatile or under 
-  // asan/hwasan/memtag/tsan. The widened load may load data from dirty regions 
-  // or create data races non-existent in the source. 
-  auto *Load = dyn_cast<LoadInst>(X); 
-  if (!Load || !Load->isSimple() || !Load->hasOneUse() || 
-      Load->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag) || 
-      mustSuppressSpeculation(*Load)) 
-    return false; 
- 
-  const DataLayout &DL = I.getModule()->getDataLayout(); 
-  Value *SrcPtr = Load->getPointerOperand()->stripPointerCasts(); 
-  assert(isa<PointerType>(SrcPtr->getType()) && "Expected a pointer type"); 
- 
-  // If original AS != Load's AS, we can't bitcast the original pointer and have 
-  // to use Load's operand instead. Ideally we would want to strip pointer casts 
-  // without changing AS, but there's no API to do that ATM. 
-  unsigned AS = Load->getPointerAddressSpace(); 
-  if (AS != SrcPtr->getType()->getPointerAddressSpace()) 
-    SrcPtr = Load->getPointerOperand(); 
- 
-  // We are potentially transforming byte-sized (8-bit) memory accesses, so make 
-  // sure we have all of our type-based constraints in place for this target. 
-  Type *ScalarTy = Scalar->getType(); 
-  uint64_t ScalarSize = ScalarTy->getPrimitiveSizeInBits(); 
-  unsigned MinVectorSize = TTI.getMinVectorRegisterBitWidth(); 
-  if (!ScalarSize || !MinVectorSize || MinVectorSize % ScalarSize != 0 || 
-      ScalarSize % 8 != 0) 
-    return false; 
- 
-  // Check safety of replacing the scalar load with a larger vector load. 
-  // We use minimal alignment (maximum flexibility) because we only care about 
-  // the dereferenceable region. When calculating cost and creating a new op, 
-  // we may use a larger value based on alignment attributes. 
-  unsigned MinVecNumElts = MinVectorSize / ScalarSize; 
-  auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts, false); 
-  unsigned OffsetEltIndex = 0; 
-  Align Alignment = Load->getAlign(); 
-  if (!isSafeToLoadUnconditionally(SrcPtr, MinVecTy, Align(1), DL, Load, &DT)) { 
-    // It is not safe to load directly from the pointer, but we can still peek 
-    // through gep offsets and check if it safe to load from a base address with 
-    // updated alignment. If it is, we can shuffle the element(s) into place 
-    // after loading. 
-    unsigned OffsetBitWidth = DL.getIndexTypeSizeInBits(SrcPtr->getType()); 
-    APInt Offset(OffsetBitWidth, 0); 
-    SrcPtr = SrcPtr->stripAndAccumulateInBoundsConstantOffsets(DL, Offset); 
- 
-    // We want to shuffle the result down from a high element of a vector, so 
-    // the offset must be positive. 
-    if (Offset.isNegative()) 
-      return false; 
- 
-    // The offset must be a multiple of the scalar element to shuffle cleanly 
-    // in the element's size. 
-    uint64_t ScalarSizeInBytes = ScalarSize / 8; 
-    if (Offset.urem(ScalarSizeInBytes) != 0) 
-      return false; 
- 
-    // If we load MinVecNumElts, will our target element still be loaded? 
-    OffsetEltIndex = Offset.udiv(ScalarSizeInBytes).getZExtValue(); 
-    if (OffsetEltIndex >= MinVecNumElts) 
-      return false; 
- 
-    if (!isSafeToLoadUnconditionally(SrcPtr, MinVecTy, Align(1), DL, Load, &DT)) 
-      return false; 
- 
-    // Update alignment with offset value. Note that the offset could be negated 
-    // to more accurately represent "(new) SrcPtr - Offset = (old) SrcPtr", but 
-    // negation does not change the result of the alignment calculation. 
-    Alignment = commonAlignment(Alignment, Offset.getZExtValue()); 
-  } 
- 
-  // Original pattern: insertelt undef, load [free casts of] PtrOp, 0 
-  // Use the greater of the alignment on the load or its source pointer. 
-  Alignment = std::max(SrcPtr->getPointerAlignment(DL), Alignment); 
-  Type *LoadTy = Load->getType(); 
-  InstructionCost OldCost = 
-      TTI.getMemoryOpCost(Instruction::Load, LoadTy, Alignment, AS); 
-  APInt DemandedElts = APInt::getOneBitSet(MinVecNumElts, 0); 
-  OldCost += TTI.getScalarizationOverhead(MinVecTy, DemandedElts, 
-                                          /* Insert */ true, HasExtract); 
- 
-  // New pattern: load VecPtr 
-  InstructionCost NewCost = 
-      TTI.getMemoryOpCost(Instruction::Load, MinVecTy, Alignment, AS); 
-  // Optionally, we are shuffling the loaded vector element(s) into place. 
-  if (OffsetEltIndex) 
-    NewCost += TTI.getShuffleCost(TTI::SK_PermuteSingleSrc, MinVecTy); 
- 
-  // We can aggressively convert to the vector form because the backend can 
-  // invert this transform if it does not result in a performance win. 
-  if (OldCost < NewCost || !NewCost.isValid()) 
-    return false; 
- 
-  // It is safe and potentially profitable to load a vector directly: 
-  // inselt undef, load Scalar, 0 --> load VecPtr 
-  IRBuilder<> Builder(Load); 
-  Value *CastedPtr = Builder.CreateBitCast(SrcPtr, MinVecTy->getPointerTo(AS)); 
-  Value *VecLd = Builder.CreateAlignedLoad(MinVecTy, CastedPtr, Alignment); 
- 
-  // Set everything but element 0 to undef to prevent poison from propagating 
-  // from the extra loaded memory. This will also optionally shrink/grow the 
-  // vector from the loaded size to the output size. 
-  // We assume this operation has no cost in codegen if there was no offset. 
-  // Note that we could use freeze to avoid poison problems, but then we might 
-  // still need a shuffle to change the vector size. 
-  unsigned OutputNumElts = Ty->getNumElements(); 
-  SmallVector<int, 16> Mask(OutputNumElts, UndefMaskElem); 
-  assert(OffsetEltIndex < MinVecNumElts && "Address offset too big"); 
-  Mask[0] = OffsetEltIndex; 
-  VecLd = Builder.CreateShuffleVector(VecLd, Mask); 
- 
-  replaceValue(I, *VecLd); 
-  ++NumVecLoad; 
-  return true; 
-} 
- 
+bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
+  // Match insert into fixed vector of scalar value.
+  // TODO: Handle non-zero insert index.
+  auto *Ty = dyn_cast<FixedVectorType>(I.getType());
+  Value *Scalar;
+  if (!Ty || !match(&I, m_InsertElt(m_Undef(), m_Value(Scalar), m_ZeroInt())) ||
+      !Scalar->hasOneUse())
+    return false;
+
+  // Optionally match an extract from another vector.
+  Value *X;
+  bool HasExtract = match(Scalar, m_ExtractElt(m_Value(X), m_ZeroInt()));
+  if (!HasExtract)
+    X = Scalar;
+
+  // Match source value as load of scalar or vector.
+  // Do not vectorize scalar load (widening) if atomic/volatile or under
+  // asan/hwasan/memtag/tsan. The widened load may load data from dirty regions
+  // or create data races non-existent in the source.
+  auto *Load = dyn_cast<LoadInst>(X);
+  if (!Load || !Load->isSimple() || !Load->hasOneUse() ||
+      Load->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag) ||
+      mustSuppressSpeculation(*Load))
+    return false;
+
+  const DataLayout &DL = I.getModule()->getDataLayout();
+  Value *SrcPtr = Load->getPointerOperand()->stripPointerCasts();
+  assert(isa<PointerType>(SrcPtr->getType()) && "Expected a pointer type");
+
+  // If original AS != Load's AS, we can't bitcast the original pointer and have
+  // to use Load's operand instead. Ideally we would want to strip pointer casts
+  // without changing AS, but there's no API to do that ATM.
+  unsigned AS = Load->getPointerAddressSpace();
+  if (AS != SrcPtr->getType()->getPointerAddressSpace())
+    SrcPtr = Load->getPointerOperand();
+
+  // We are potentially transforming byte-sized (8-bit) memory accesses, so make
+  // sure we have all of our type-based constraints in place for this target.
+  Type *ScalarTy = Scalar->getType();
+  uint64_t ScalarSize = ScalarTy->getPrimitiveSizeInBits();
+  unsigned MinVectorSize = TTI.getMinVectorRegisterBitWidth();
+  if (!ScalarSize || !MinVectorSize || MinVectorSize % ScalarSize != 0 ||
+      ScalarSize % 8 != 0)
+    return false;
+
+  // Check safety of replacing the scalar load with a larger vector load.
+  // We use minimal alignment (maximum flexibility) because we only care about
+  // the dereferenceable region. When calculating cost and creating a new op,
+  // we may use a larger value based on alignment attributes.
+  unsigned MinVecNumElts = MinVectorSize / ScalarSize;
+  auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts, false);
+  unsigned OffsetEltIndex = 0;
+  Align Alignment = Load->getAlign();
+  if (!isSafeToLoadUnconditionally(SrcPtr, MinVecTy, Align(1), DL, Load, &DT)) {
+    // It is not safe to load directly from the pointer, but we can still peek
+    // through gep offsets and check if it safe to load from a base address with
+    // updated alignment. If it is, we can shuffle the element(s) into place
+    // after loading.
+    unsigned OffsetBitWidth = DL.getIndexTypeSizeInBits(SrcPtr->getType());
+    APInt Offset(OffsetBitWidth, 0);
+    SrcPtr = SrcPtr->stripAndAccumulateInBoundsConstantOffsets(DL, Offset);
+
+    // We want to shuffle the result down from a high element of a vector, so
+    // the offset must be positive.
+    if (Offset.isNegative())
+      return false;
+
+    // The offset must be a multiple of the scalar element to shuffle cleanly
+    // in the element's size.
+    uint64_t ScalarSizeInBytes = ScalarSize / 8;
+    if (Offset.urem(ScalarSizeInBytes) != 0)
+      return false;
+
+    // If we load MinVecNumElts, will our target element still be loaded?
+    OffsetEltIndex = Offset.udiv(ScalarSizeInBytes).getZExtValue();
+    if (OffsetEltIndex >= MinVecNumElts)
+      return false;
+
+    if (!isSafeToLoadUnconditionally(SrcPtr, MinVecTy, Align(1), DL, Load, &DT))
+      return false;
+
+    // Update alignment with offset value. Note that the offset could be negated
+    // to more accurately represent "(new) SrcPtr - Offset = (old) SrcPtr", but
+    // negation does not change the result of the alignment calculation.
+    Alignment = commonAlignment(Alignment, Offset.getZExtValue());
+  }
+
+  // Original pattern: insertelt undef, load [free casts of] PtrOp, 0
+  // Use the greater of the alignment on the load or its source pointer.
+  Alignment = std::max(SrcPtr->getPointerAlignment(DL), Alignment);
+  Type *LoadTy = Load->getType();
+  InstructionCost OldCost =
+      TTI.getMemoryOpCost(Instruction::Load, LoadTy, Alignment, AS);
+  APInt DemandedElts = APInt::getOneBitSet(MinVecNumElts, 0);
+  OldCost += TTI.getScalarizationOverhead(MinVecTy, DemandedElts,
+                                          /* Insert */ true, HasExtract);
+
+  // New pattern: load VecPtr
+  InstructionCost NewCost =
+      TTI.getMemoryOpCost(Instruction::Load, MinVecTy, Alignment, AS);
+  // Optionally, we are shuffling the loaded vector element(s) into place.
+  if (OffsetEltIndex)
+    NewCost += TTI.getShuffleCost(TTI::SK_PermuteSingleSrc, MinVecTy);
+
+  // We can aggressively convert to the vector form because the backend can
+  // invert this transform if it does not result in a performance win.
+  if (OldCost < NewCost || !NewCost.isValid())
+    return false;
+
+  // It is safe and potentially profitable to load a vector directly:
+  // inselt undef, load Scalar, 0 --> load VecPtr
+  IRBuilder<> Builder(Load);
+  Value *CastedPtr = Builder.CreateBitCast(SrcPtr, MinVecTy->getPointerTo(AS));
+  Value *VecLd = Builder.CreateAlignedLoad(MinVecTy, CastedPtr, Alignment);
+
+  // Set everything but element 0 to undef to prevent poison from propagating
+  // from the extra loaded memory. This will also optionally shrink/grow the
+  // vector from the loaded size to the output size.
+  // We assume this operation has no cost in codegen if there was no offset.
+  // Note that we could use freeze to avoid poison problems, but then we might
+  // still need a shuffle to change the vector size.
+  unsigned OutputNumElts = Ty->getNumElements();
+  SmallVector<int, 16> Mask(OutputNumElts, UndefMaskElem);
+  assert(OffsetEltIndex < MinVecNumElts && "Address offset too big");
+  Mask[0] = OffsetEltIndex;
+  VecLd = Builder.CreateShuffleVector(VecLd, Mask);
+
+  replaceValue(I, *VecLd);
+  ++NumVecLoad;
+  return true;
+}
+
 /// Determine which, if any, of the inputs should be replaced by a shuffle
 /// followed by extract from a different index.
 ExtractElementInst *VectorCombine::getShuffleExtract(
@@ -241,15 +241,15 @@ ExtractElementInst *VectorCombine::getShuffleExtract(
 
   Type *VecTy = Ext0->getVectorOperand()->getType();
   assert(VecTy == Ext1->getVectorOperand()->getType() && "Need matching types");
-  InstructionCost Cost0 = 
-      TTI.getVectorInstrCost(Ext0->getOpcode(), VecTy, Index0); 
-  InstructionCost Cost1 = 
-      TTI.getVectorInstrCost(Ext1->getOpcode(), VecTy, Index1); 
-
-  // If both costs are invalid no shuffle is needed 
-  if (!Cost0.isValid() && !Cost1.isValid()) 
-    return nullptr; 
- 
+  InstructionCost Cost0 =
+      TTI.getVectorInstrCost(Ext0->getOpcode(), VecTy, Index0);
+  InstructionCost Cost1 =
+      TTI.getVectorInstrCost(Ext1->getOpcode(), VecTy, Index1);
+
+  // If both costs are invalid no shuffle is needed
+  if (!Cost0.isValid() && !Cost1.isValid())
+    return nullptr;
+
   // We are extracting from 2 different indexes, so one operand must be shuffled
   // before performing a vector operation and/or extract. The more expensive
   // extract will be replaced by a shuffle.
@@ -284,7 +284,7 @@ bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
          "Expected constant extract indexes");
   Type *ScalarTy = Ext0->getType();
   auto *VecTy = cast<VectorType>(Ext0->getOperand(0)->getType());
-  InstructionCost ScalarOpCost, VectorOpCost; 
+  InstructionCost ScalarOpCost, VectorOpCost;
 
   // Get cost estimates for scalar and vector versions of the operation.
   bool IsBinOp = Instruction::isBinaryOp(Opcode);
@@ -305,9 +305,9 @@ bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
   unsigned Ext0Index = cast<ConstantInt>(Ext0->getOperand(1))->getZExtValue();
   unsigned Ext1Index = cast<ConstantInt>(Ext1->getOperand(1))->getZExtValue();
 
-  InstructionCost Extract0Cost = 
+  InstructionCost Extract0Cost =
       TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy, Ext0Index);
-  InstructionCost Extract1Cost = 
+  InstructionCost Extract1Cost =
       TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy, Ext1Index);
 
   // A more expensive extract will always be replaced by a splat shuffle.
@@ -317,11 +317,11 @@ bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
   // TODO: Evaluate whether that always results in lowest cost. Alternatively,
   //       check the cost of creating a broadcast shuffle and shuffling both
   //       operands to element 0.
-  InstructionCost CheapExtractCost = std::min(Extract0Cost, Extract1Cost); 
+  InstructionCost CheapExtractCost = std::min(Extract0Cost, Extract1Cost);
 
   // Extra uses of the extracts mean that we include those costs in the
   // vector total because those instructions will not be eliminated.
-  InstructionCost OldCost, NewCost; 
+  InstructionCost OldCost, NewCost;
   if (Ext0->getOperand(0) == Ext1->getOperand(0) && Ext0Index == Ext1Index) {
     // Handle a special case. If the 2 extracts are identical, adjust the
     // formulas to account for that. The extra use charge allows for either the
@@ -372,7 +372,7 @@ static Value *createShiftShuffle(Value *Vec, unsigned OldIndex,
   auto *VecTy = cast<FixedVectorType>(Vec->getType());
   SmallVector<int, 32> ShufMask(VecTy->getNumElements(), UndefMaskElem);
   ShufMask[NewIndex] = OldIndex;
-  return Builder.CreateShuffleVector(Vec, ShufMask, "shift"); 
+  return Builder.CreateShuffleVector(Vec, ShufMask, "shift");
 }
 
 /// Given an extract element instruction with constant index operand, shuffle
@@ -506,23 +506,23 @@ bool VectorCombine::foldBitcastShuf(Instruction &I) {
                      m_OneUse(m_Shuffle(m_Value(V), m_Undef(), m_Mask(Mask))))))
     return false;
 
-  // 1) Do not fold bitcast shuffle for scalable type. First, shuffle cost for 
-  // scalable type is unknown; Second, we cannot reason if the narrowed shuffle 
-  // mask for scalable type is a splat or not. 
-  // 2) Disallow non-vector casts and length-changing shuffles. 
+  // 1) Do not fold bitcast shuffle for scalable type. First, shuffle cost for
+  // scalable type is unknown; Second, we cannot reason if the narrowed shuffle
+  // mask for scalable type is a splat or not.
+  // 2) Disallow non-vector casts and length-changing shuffles.
   // TODO: We could allow any shuffle.
-  auto *DestTy = dyn_cast<FixedVectorType>(I.getType()); 
-  auto *SrcTy = dyn_cast<FixedVectorType>(V->getType()); 
-  if (!SrcTy || !DestTy || I.getOperand(0)->getType() != SrcTy) 
+  auto *DestTy = dyn_cast<FixedVectorType>(I.getType());
+  auto *SrcTy = dyn_cast<FixedVectorType>(V->getType());
+  if (!SrcTy || !DestTy || I.getOperand(0)->getType() != SrcTy)
     return false;
 
   // The new shuffle must not cost more than the old shuffle. The bitcast is
   // moved ahead of the shuffle, so assume that it has the same cost as before.
-  InstructionCost DestCost = 
-      TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, DestTy); 
-  InstructionCost SrcCost = 
-      TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, SrcTy); 
-  if (DestCost > SrcCost || !DestCost.isValid()) 
+  InstructionCost DestCost =
+      TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, DestTy);
+  InstructionCost SrcCost =
+      TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, SrcTy);
+  if (DestCost > SrcCost || !DestCost.isValid())
     return false;
 
   unsigned DestNumElts = DestTy->getNumElements();
@@ -545,7 +545,7 @@ bool VectorCombine::foldBitcastShuf(Instruction &I) {
   // bitcast (shuf V, MaskC) --> shuf (bitcast V), MaskC'
   ++NumShufOfBitcast;
   Value *CastV = Builder.CreateBitCast(V, DestTy);
-  Value *Shuf = Builder.CreateShuffleVector(CastV, NewMask); 
+  Value *Shuf = Builder.CreateShuffleVector(CastV, NewMask);
   replaceValue(I, *Shuf);
   return true;
 }
@@ -612,7 +612,7 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
          "Unexpected types for insert element into binop or cmp");
 
   unsigned Opcode = I.getOpcode();
-  InstructionCost ScalarOpCost, VectorOpCost; 
+  InstructionCost ScalarOpCost, VectorOpCost;
   if (IsCmp) {
     ScalarOpCost = TTI.getCmpSelInstrCost(Opcode, ScalarTy);
     VectorOpCost = TTI.getCmpSelInstrCost(Opcode, VecTy);
@@ -623,16 +623,16 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
 
   // Get cost estimate for the insert element. This cost will factor into
   // both sequences.
-  InstructionCost InsertCost = 
+  InstructionCost InsertCost =
       TTI.getVectorInstrCost(Instruction::InsertElement, VecTy, Index);
-  InstructionCost OldCost = 
-      (IsConst0 ? 0 : InsertCost) + (IsConst1 ? 0 : InsertCost) + VectorOpCost; 
-  InstructionCost NewCost = ScalarOpCost + InsertCost + 
-                            (IsConst0 ? 0 : !Ins0->hasOneUse() * InsertCost) + 
-                            (IsConst1 ? 0 : !Ins1->hasOneUse() * InsertCost); 
+  InstructionCost OldCost =
+      (IsConst0 ? 0 : InsertCost) + (IsConst1 ? 0 : InsertCost) + VectorOpCost;
+  InstructionCost NewCost = ScalarOpCost + InsertCost +
+                            (IsConst0 ? 0 : !Ins0->hasOneUse() * InsertCost) +
+                            (IsConst1 ? 0 : !Ins1->hasOneUse() * InsertCost);
 
   // We want to scalarize unless the vector variant actually has lower cost.
-  if (OldCost < NewCost || !NewCost.isValid()) 
+  if (OldCost < NewCost || !NewCost.isValid())
     return false;
 
   // vec_op (inselt VecC0, V0, Index), (inselt VecC1, V1, Index) -->
@@ -712,8 +712,8 @@ bool VectorCombine::foldExtractedCmps(Instruction &I) {
   if (!VecTy)
     return false;
 
-  InstructionCost OldCost = 
-      TTI.getVectorInstrCost(Ext0->getOpcode(), VecTy, Index0); 
+  InstructionCost OldCost =
+      TTI.getVectorInstrCost(Ext0->getOpcode(), VecTy, Index0);
   OldCost += TTI.getVectorInstrCost(Ext1->getOpcode(), VecTy, Index1);
   OldCost += TTI.getCmpSelInstrCost(CmpOpcode, I0->getType()) * 2;
   OldCost += TTI.getArithmeticInstrCost(I.getOpcode(), I.getType());
@@ -724,7 +724,7 @@ bool VectorCombine::foldExtractedCmps(Instruction &I) {
   int CheapIndex = ConvertToShuf == Ext0 ? Index1 : Index0;
   int ExpensiveIndex = ConvertToShuf == Ext0 ? Index0 : Index1;
   auto *CmpTy = cast<FixedVectorType>(CmpInst::makeCmpResultType(X->getType()));
-  InstructionCost NewCost = TTI.getCmpSelInstrCost(CmpOpcode, X->getType()); 
+  InstructionCost NewCost = TTI.getCmpSelInstrCost(CmpOpcode, X->getType());
   NewCost +=
       TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, CmpTy);
   NewCost += TTI.getArithmeticInstrCost(I.getOpcode(), CmpTy);
@@ -733,7 +733,7 @@ bool VectorCombine::foldExtractedCmps(Instruction &I) {
   // Aggressively form vector ops if the cost is equal because the transform
   // may enable further optimization.
   // Codegen can reverse this transform (scalarize) if it was not profitable.
-  if (OldCost < NewCost || !NewCost.isValid()) 
+  if (OldCost < NewCost || !NewCost.isValid())
     return false;
 
   // Create a vector constant from the 2 scalar constants.
@@ -758,10 +758,10 @@ bool VectorCombine::run() {
   if (DisableVectorCombine)
     return false;
 
-  // Don't attempt vectorization if the target does not support vectors. 
-  if (!TTI.getNumberOfRegisters(TTI.getRegisterClassForType(/*Vector*/ true))) 
-    return false; 
- 
+  // Don't attempt vectorization if the target does not support vectors.
+  if (!TTI.getNumberOfRegisters(TTI.getRegisterClassForType(/*Vector*/ true)))
+    return false;
+
   bool MadeChange = false;
   for (BasicBlock &BB : F) {
     // Ignore unreachable basic blocks.
@@ -775,7 +775,7 @@ bool VectorCombine::run() {
       if (isa<DbgInfoIntrinsic>(I))
         continue;
       Builder.SetInsertPoint(&I);
-      MadeChange |= vectorizeLoadInsert(I); 
+      MadeChange |= vectorizeLoadInsert(I);
       MadeChange |= foldExtractExtract(I);
       MadeChange |= foldBitcastShuf(I);
       MadeChange |= scalarizeBinopOrCmp(I);
author	shadchin <shadchin@yandex-team.ru>	2022-02-10 16:44:39 +0300
committer	Daniil Cherednik <dcherednik@yandex-team.ru>	2022-02-10 16:44:39 +0300
commit	e9656aae26e0358d5378e5b63dcac5c8dbe0e4d0 (patch)
tree	64175d5cadab313b3e7039ebaa06c5bc3295e274 /contrib/libs/llvm12/lib/Transforms/Vectorize/VectorCombine.cpp
parent	2598ef1d0aee359b4b6d5fdd1758916d5907d04f (diff)
download	ydb-e9656aae26e0358d5378e5b63dcac5c8dbe0e4d0.tar.gz