Restoring authorship annotation for <shadchin@yandex-team.ru>. Commit 2 of 2.

author: shadchin <shadchin@yandex-team.ru> 2022-02-10 16:44:39 +0300
committer: Daniil Cherednik <dcherednik@yandex-team.ru> 2022-02-10 16:44:39 +0300
commit: e9656aae26e0358d5378e5b63dcac5c8dbe0e4d0 (patch)
tree: 64175d5cadab313b3e7039ebaa06c5bc3295e274 /contrib/libs/llvm12/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
parent: 2598ef1d0aee359b4b6d5fdd1758916d5907d04f (diff)
download: ydb-e9656aae26e0358d5378e5b63dcac5c8dbe0e4d0.tar.gz
1 files changed, 227 insertions, 227 deletions
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
index d9f8c9f83d..8e251ca940 100644
--- a/contrib/libs/llvm12/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
@@ -42,8 +42,8 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/LoopUtils.h" 
-#include "llvm/Transforms/Utils/MatrixUtils.h" 
+#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Transforms/Utils/MatrixUtils.h"
 
 using namespace llvm;
 using namespace PatternMatch;
@@ -63,9 +63,9 @@ static cl::opt<unsigned> TileSize(
     "fuse-matrix-tile-size", cl::init(4), cl::Hidden,
     cl::desc(
         "Tile size for matrix instruction fusion using square-shaped tiles."));
-static cl::opt<bool> TileUseLoops("fuse-matrix-use-loops", cl::init(false), 
-                                  cl::Hidden, 
-                                  cl::desc("Generate loop nest for tiling.")); 
+static cl::opt<bool> TileUseLoops("fuse-matrix-use-loops", cl::init(false),
+                                  cl::Hidden,
+                                  cl::desc("Generate loop nest for tiling."));
 static cl::opt<bool> ForceFusion(
     "force-fuse-matrix", cl::init(false), cl::Hidden,
     cl::desc("Force matrix instruction fusion even if not profitable."));
@@ -187,10 +187,10 @@ class LowerMatrixIntrinsics {
   Function &Func;
   const DataLayout &DL;
   const TargetTransformInfo &TTI;
-  AliasAnalysis *AA; 
-  DominatorTree *DT; 
-  LoopInfo *LI; 
-  OptimizationRemarkEmitter *ORE; 
+  AliasAnalysis *AA;
+  DominatorTree *DT;
+  LoopInfo *LI;
+  OptimizationRemarkEmitter *ORE;
 
   /// Contains estimates of the number of operations (loads, stores, compute) required to lower a matrix operation.
   struct OpInfoTy {
@@ -246,7 +246,7 @@ class LowerMatrixIntrinsics {
 
     void setVector(unsigned i, Value *V) { Vectors[i] = V; }
 
-    Type *getElementType() const { return getVectorTy()->getElementType(); } 
+    Type *getElementType() const { return getVectorTy()->getElementType(); }
 
     unsigned getNumVectors() const {
       if (isColumnMajor())
@@ -276,7 +276,7 @@ class LowerMatrixIntrinsics {
       return getVectorTy();
     }
 
-    VectorType *getVectorTy() const { 
+    VectorType *getVectorTy() const {
       return cast<VectorType>(Vectors[0]->getType());
     }
 
@@ -335,7 +335,7 @@ class LowerMatrixIntrinsics {
                          IRBuilder<> &Builder) const {
       Value *Vec = isColumnMajor() ? getColumn(J) : getRow(I);
       return Builder.CreateShuffleVector(
-          Vec, createSequentialMask(isColumnMajor() ? I : J, NumElts, 0), 
+          Vec, createSequentialMask(isColumnMajor() ? I : J, NumElts, 0),
           "block");
     }
   };
@@ -397,8 +397,8 @@ class LowerMatrixIntrinsics {
 
 public:
   LowerMatrixIntrinsics(Function &F, TargetTransformInfo &TTI,
-                        AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI, 
-                        OptimizationRemarkEmitter *ORE) 
+                        AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI,
+                        OptimizationRemarkEmitter *ORE)
       : Func(F), DL(F.getParent()->getDataLayout()), TTI(TTI), AA(AA), DT(DT),
         LI(LI), ORE(ORE) {}
 
@@ -450,7 +450,7 @@ public:
          MaskStart < cast<FixedVectorType>(VType)->getNumElements();
          MaskStart += SI.getStride()) {
       Value *V = Builder.CreateShuffleVector(
-          MatrixVal, createSequentialMask(MaskStart, SI.getStride(), 0), 
+          MatrixVal, createSequentialMask(MaskStart, SI.getStride(), 0),
           "split");
       SplitVecs.push_back(V);
     }
@@ -488,7 +488,7 @@ public:
     case Instruction::FAdd:
     case Instruction::FSub:
     case Instruction::FMul: // Scalar multiply.
-    case Instruction::FNeg: 
+    case Instruction::FNeg:
     case Instruction::Add:
     case Instruction::Mul:
     case Instruction::Sub:
@@ -531,7 +531,7 @@ public:
     // list.
     LLVM_DEBUG(dbgs() << "Forward-propagate shapes:\n");
     while (!WorkList.empty()) {
-      Instruction *Inst = WorkList.pop_back_val(); 
+      Instruction *Inst = WorkList.pop_back_val();
 
       // New entry, set the value and insert operands
       bool Propagate = false;
@@ -601,7 +601,7 @@ public:
     // worklist.
     LLVM_DEBUG(dbgs() << "Backward-propagate shapes:\n");
     while (!WorkList.empty()) {
-      Value *V = WorkList.pop_back_val(); 
+      Value *V = WorkList.pop_back_val();
 
       size_t BeforeProcessingV = WorkList.size();
       if (!isa<Instruction>(V))
@@ -723,18 +723,18 @@ public:
       Value *Op2;
       if (auto *BinOp = dyn_cast<BinaryOperator>(Inst))
         Changed |= VisitBinaryOperator(BinOp);
-      if (auto *UnOp = dyn_cast<UnaryOperator>(Inst)) 
-        Changed |= VisitUnaryOperator(UnOp); 
+      if (auto *UnOp = dyn_cast<UnaryOperator>(Inst))
+        Changed |= VisitUnaryOperator(UnOp);
       if (match(Inst, m_Load(m_Value(Op1))))
         Changed |= VisitLoad(cast<LoadInst>(Inst), Op1, Builder);
       else if (match(Inst, m_Store(m_Value(Op1), m_Value(Op2))))
         Changed |= VisitStore(cast<StoreInst>(Inst), Op1, Op2, Builder);
     }
 
-    if (ORE) { 
-      RemarkGenerator RemarkGen(Inst2ColumnMatrix, *ORE, Func); 
-      RemarkGen.emitRemarks(); 
-    } 
+    if (ORE) {
+      RemarkGenerator RemarkGen(Inst2ColumnMatrix, *ORE, Func);
+      RemarkGen.emitRemarks();
+    }
 
     for (Instruction *Inst : reverse(ToRemove))
       Inst->eraseFromParent();
@@ -941,7 +941,7 @@ public:
     assert(NumElts >= BlockNumElts && "Too few elements for current block");
 
     Block = Builder.CreateShuffleVector(
-        Block, createSequentialMask(0, BlockNumElts, NumElts - BlockNumElts)); 
+        Block, createSequentialMask(0, BlockNumElts, NumElts - BlockNumElts));
 
     // If Col is 7 long and I is 2 and BlockNumElts is 2 the mask is: 0, 1, 7,
     // 8, 4, 5, 6
@@ -1089,7 +1089,7 @@ public:
     MemoryLocation StoreLoc = MemoryLocation::get(Store);
     MemoryLocation LoadLoc = MemoryLocation::get(Load);
 
-    AliasResult LdAliased = AA->alias(LoadLoc, StoreLoc); 
+    AliasResult LdAliased = AA->alias(LoadLoc, StoreLoc);
 
     // If we can statically determine noalias we're good.
     if (!LdAliased)
@@ -1105,17 +1105,17 @@ public:
     // as we adjust Check0 and Check1's branches.
     SmallVector<DominatorTree::UpdateType, 4> DTUpdates;
     for (BasicBlock *Succ : successors(Check0))
-      DTUpdates.push_back({DT->Delete, Check0, Succ}); 
+      DTUpdates.push_back({DT->Delete, Check0, Succ});
 
-    BasicBlock *Check1 = 
-        SplitBlock(MatMul->getParent(), MatMul, (DomTreeUpdater *)nullptr, LI, 
-                   nullptr, "alias_cont"); 
+    BasicBlock *Check1 =
+        SplitBlock(MatMul->getParent(), MatMul, (DomTreeUpdater *)nullptr, LI,
+                   nullptr, "alias_cont");
     BasicBlock *Copy =
-        SplitBlock(MatMul->getParent(), MatMul, (DomTreeUpdater *)nullptr, LI, 
-                   nullptr, "copy"); 
-    BasicBlock *Fusion = 
-        SplitBlock(MatMul->getParent(), MatMul, (DomTreeUpdater *)nullptr, LI, 
-                   nullptr, "no_alias"); 
+        SplitBlock(MatMul->getParent(), MatMul, (DomTreeUpdater *)nullptr, LI,
+                   nullptr, "copy");
+    BasicBlock *Fusion =
+        SplitBlock(MatMul->getParent(), MatMul, (DomTreeUpdater *)nullptr, LI,
+                   nullptr, "no_alias");
 
     // Check if the loaded memory location begins before the end of the store
     // location. If the condition holds, they might overlap, otherwise they are
@@ -1159,11 +1159,11 @@ public:
     PHI->addIncoming(NewLd, Copy);
 
     // Adjust DT.
-    DTUpdates.push_back({DT->Insert, Check0, Check1}); 
-    DTUpdates.push_back({DT->Insert, Check0, Fusion}); 
-    DTUpdates.push_back({DT->Insert, Check1, Copy}); 
-    DTUpdates.push_back({DT->Insert, Check1, Fusion}); 
-    DT->applyUpdates(DTUpdates); 
+    DTUpdates.push_back({DT->Insert, Check0, Check1});
+    DTUpdates.push_back({DT->Insert, Check0, Fusion});
+    DTUpdates.push_back({DT->Insert, Check1, Copy});
+    DTUpdates.push_back({DT->Insert, Check1, Fusion});
+    DT->applyUpdates(DTUpdates);
     return PHI;
   }
 
@@ -1209,63 +1209,63 @@ public:
     return Res;
   }
 
-  void createTiledLoops(CallInst *MatMul, Value *LPtr, ShapeInfo LShape, 
-                        Value *RPtr, ShapeInfo RShape, StoreInst *Store, 
-                        bool AllowContract) { 
-    auto *EltType = cast<VectorType>(MatMul->getType())->getElementType(); 
- 
-    // Create the main tiling loop nest. 
-    TileInfo TI(LShape.NumRows, RShape.NumColumns, LShape.NumColumns, TileSize); 
-    DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); 
-    Instruction *InsertI = cast<Instruction>(MatMul); 
-    BasicBlock *Start = InsertI->getParent(); 
-    BasicBlock *End = 
-        SplitBlock(InsertI->getParent(), InsertI, DT, LI, nullptr, "continue"); 
-    IRBuilder<> Builder(MatMul); 
-    BasicBlock *InnerBody = TI.CreateTiledLoops(Start, End, Builder, DTU, *LI); 
- 
-    Type *TileVecTy = 
-        FixedVectorType::get(MatMul->getType()->getScalarType(), TileSize); 
-    MatrixTy TileResult; 
-    // Insert in the inner loop header. 
-    Builder.SetInsertPoint(TI.InnerLoopHeader->getTerminator()); 
-    // Create PHI nodes for the result columns to accumulate across iterations. 
-    SmallVector<PHINode *, 4> ColumnPhis; 
-    for (unsigned I = 0; I < TileSize; I++) { 
-      auto *Phi = Builder.CreatePHI(TileVecTy, 2, "result.vec." + Twine(I)); 
-      Phi->addIncoming(ConstantAggregateZero::get(TileVecTy), 
-                       TI.RowLoopHeader->getSingleSuccessor()); 
-      TileResult.addVector(Phi); 
-      ColumnPhis.push_back(Phi); 
-    } 
- 
-    // Insert in the inner loop body, which computes 
-    //   Res += Load(CurrentRow, K) * Load(K, CurrentColumn) 
-    Builder.SetInsertPoint(InnerBody->getTerminator()); 
-    // Load tiles of the operands. 
-    MatrixTy A = loadMatrix(LPtr, {}, false, LShape, TI.CurrentRow, TI.CurrentK, 
-                            {TileSize, TileSize}, EltType, Builder); 
-    MatrixTy B = loadMatrix(RPtr, {}, false, RShape, TI.CurrentK, TI.CurrentCol, 
-                            {TileSize, TileSize}, EltType, Builder); 
-    emitMatrixMultiply(TileResult, A, B, AllowContract, Builder, true); 
-    // Store result after the inner loop is done. 
-    Builder.SetInsertPoint(TI.RowLoopLatch->getTerminator()); 
-    storeMatrix(TileResult, Store->getPointerOperand(), Store->getAlign(), 
-                Store->isVolatile(), {LShape.NumRows, RShape.NumColumns}, 
-                TI.CurrentRow, TI.CurrentCol, EltType, Builder); 
- 
-    for (unsigned I = 0; I < TileResult.getNumVectors(); I++) 
-      ColumnPhis[I]->addIncoming(TileResult.getVector(I), TI.InnerLoopLatch); 
- 
-    // Force unrolling of a few iterations of the inner loop, to make sure there 
-    // is enough work per iteration. 
-    // FIXME: The unroller should make this decision directly instead, but 
-    // currently the cost-model is not up to the task. 
-    unsigned InnerLoopUnrollCount = std::min(10u, LShape.NumColumns / TileSize); 
-    addStringMetadataToLoop(LI->getLoopFor(TI.InnerLoopHeader), 
-                            "llvm.loop.unroll.count", InnerLoopUnrollCount); 
-  } 
- 
+  void createTiledLoops(CallInst *MatMul, Value *LPtr, ShapeInfo LShape,
+                        Value *RPtr, ShapeInfo RShape, StoreInst *Store,
+                        bool AllowContract) {
+    auto *EltType = cast<VectorType>(MatMul->getType())->getElementType();
+
+    // Create the main tiling loop nest.
+    TileInfo TI(LShape.NumRows, RShape.NumColumns, LShape.NumColumns, TileSize);
+    DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
+    Instruction *InsertI = cast<Instruction>(MatMul);
+    BasicBlock *Start = InsertI->getParent();
+    BasicBlock *End =
+        SplitBlock(InsertI->getParent(), InsertI, DT, LI, nullptr, "continue");
+    IRBuilder<> Builder(MatMul);
+    BasicBlock *InnerBody = TI.CreateTiledLoops(Start, End, Builder, DTU, *LI);
+
+    Type *TileVecTy =
+        FixedVectorType::get(MatMul->getType()->getScalarType(), TileSize);
+    MatrixTy TileResult;
+    // Insert in the inner loop header.
+    Builder.SetInsertPoint(TI.InnerLoopHeader->getTerminator());
+    // Create PHI nodes for the result columns to accumulate across iterations.
+    SmallVector<PHINode *, 4> ColumnPhis;
+    for (unsigned I = 0; I < TileSize; I++) {
+      auto *Phi = Builder.CreatePHI(TileVecTy, 2, "result.vec." + Twine(I));
+      Phi->addIncoming(ConstantAggregateZero::get(TileVecTy),
+                       TI.RowLoopHeader->getSingleSuccessor());
+      TileResult.addVector(Phi);
+      ColumnPhis.push_back(Phi);
+    }
+
+    // Insert in the inner loop body, which computes
+    //   Res += Load(CurrentRow, K) * Load(K, CurrentColumn)
+    Builder.SetInsertPoint(InnerBody->getTerminator());
+    // Load tiles of the operands.
+    MatrixTy A = loadMatrix(LPtr, {}, false, LShape, TI.CurrentRow, TI.CurrentK,
+                            {TileSize, TileSize}, EltType, Builder);
+    MatrixTy B = loadMatrix(RPtr, {}, false, RShape, TI.CurrentK, TI.CurrentCol,
+                            {TileSize, TileSize}, EltType, Builder);
+    emitMatrixMultiply(TileResult, A, B, AllowContract, Builder, true);
+    // Store result after the inner loop is done.
+    Builder.SetInsertPoint(TI.RowLoopLatch->getTerminator());
+    storeMatrix(TileResult, Store->getPointerOperand(), Store->getAlign(),
+                Store->isVolatile(), {LShape.NumRows, RShape.NumColumns},
+                TI.CurrentRow, TI.CurrentCol, EltType, Builder);
+
+    for (unsigned I = 0; I < TileResult.getNumVectors(); I++)
+      ColumnPhis[I]->addIncoming(TileResult.getVector(I), TI.InnerLoopLatch);
+
+    // Force unrolling of a few iterations of the inner loop, to make sure there
+    // is enough work per iteration.
+    // FIXME: The unroller should make this decision directly instead, but
+    // currently the cost-model is not up to the task.
+    unsigned InnerLoopUnrollCount = std::min(10u, LShape.NumColumns / TileSize);
+    addStringMetadataToLoop(LI->getLoopFor(TI.InnerLoopHeader),
+                            "llvm.loop.unroll.count", InnerLoopUnrollCount);
+  }
+
   void emitSIMDTiling(CallInst *MatMul, LoadInst *LoadOp0, LoadInst *LoadOp1,
                       StoreInst *Store,
                       SmallPtrSetImpl<Instruction *> &FusedInsts) {
@@ -1288,34 +1288,34 @@ public:
 
     bool AllowContract = AllowContractEnabled || (isa<FPMathOperator>(MatMul) &&
                                                   MatMul->hasAllowContract());
-    if (TileUseLoops && (R % TileSize == 0 && C % TileSize == 0)) 
-      createTiledLoops(MatMul, APtr, LShape, BPtr, RShape, Store, 
-                       AllowContract); 
-    else { 
-      IRBuilder<> Builder(Store); 
-      for (unsigned J = 0; J < C; J += TileSize) 
-        for (unsigned I = 0; I < R; I += TileSize) { 
-          const unsigned TileR = std::min(R - I, unsigned(TileSize)); 
-          const unsigned TileC = std::min(C - J, unsigned(TileSize)); 
-          MatrixTy Res = getZeroMatrix(EltType, TileR, TileC); 
-
-          for (unsigned K = 0; K < M; K += TileSize) { 
-            const unsigned TileM = std::min(M - K, unsigned(TileSize)); 
-            MatrixTy A = 
-                loadMatrix(APtr, LoadOp0->getAlign(), LoadOp0->isVolatile(), 
-                           LShape, Builder.getInt64(I), Builder.getInt64(K), 
-                           {TileR, TileM}, EltType, Builder); 
-            MatrixTy B = 
-                loadMatrix(BPtr, LoadOp1->getAlign(), LoadOp1->isVolatile(), 
-                           RShape, Builder.getInt64(K), Builder.getInt64(J), 
-                           {TileM, TileC}, EltType, Builder); 
-            emitMatrixMultiply(Res, A, B, AllowContract, Builder, true); 
-          } 
-          storeMatrix(Res, CPtr, Store->getAlign(), Store->isVolatile(), {R, M}, 
-                      Builder.getInt64(I), Builder.getInt64(J), EltType, 
-                      Builder); 
+    if (TileUseLoops && (R % TileSize == 0 && C % TileSize == 0))
+      createTiledLoops(MatMul, APtr, LShape, BPtr, RShape, Store,
+                       AllowContract);
+    else {
+      IRBuilder<> Builder(Store);
+      for (unsigned J = 0; J < C; J += TileSize)
+        for (unsigned I = 0; I < R; I += TileSize) {
+          const unsigned TileR = std::min(R - I, unsigned(TileSize));
+          const unsigned TileC = std::min(C - J, unsigned(TileSize));
+          MatrixTy Res = getZeroMatrix(EltType, TileR, TileC);
+
+          for (unsigned K = 0; K < M; K += TileSize) {
+            const unsigned TileM = std::min(M - K, unsigned(TileSize));
+            MatrixTy A =
+                loadMatrix(APtr, LoadOp0->getAlign(), LoadOp0->isVolatile(),
+                           LShape, Builder.getInt64(I), Builder.getInt64(K),
+                           {TileR, TileM}, EltType, Builder);
+            MatrixTy B =
+                loadMatrix(BPtr, LoadOp1->getAlign(), LoadOp1->isVolatile(),
+                           RShape, Builder.getInt64(K), Builder.getInt64(J),
+                           {TileM, TileC}, EltType, Builder);
+            emitMatrixMultiply(Res, A, B, AllowContract, Builder, true);
+          }
+          storeMatrix(Res, CPtr, Store->getAlign(), Store->isVolatile(), {R, M},
+                      Builder.getInt64(I), Builder.getInt64(J), EltType,
+                      Builder);
         }
-    } 
+    }
 
     // Mark eliminated instructions as fused and remove them.
     FusedInsts.insert(Store);
@@ -1342,11 +1342,11 @@ public:
   void LowerMatrixMultiplyFused(CallInst *MatMul,
                                 SmallPtrSetImpl<Instruction *> &FusedInsts) {
     if (!FuseMatrix || !MatMul->hasOneUse() ||
-        MatrixLayout != MatrixLayoutTy::ColumnMajor || !DT) 
+        MatrixLayout != MatrixLayoutTy::ColumnMajor || !DT)
       return;
 
-    assert(AA && LI && "Analyses should be available"); 
- 
+    assert(AA && LI && "Analyses should be available");
+
     auto *LoadOp0 = dyn_cast<LoadInst>(MatMul->getOperand(0));
     auto *LoadOp1 = dyn_cast<LoadInst>(MatMul->getOperand(1));
     auto *Store = dyn_cast<StoreInst>(*MatMul->user_begin());
@@ -1355,7 +1355,7 @@ public:
       // we create invalid IR.
       // FIXME: See if we can hoist the store address computation.
       auto *AddrI = dyn_cast<Instruction>(Store->getOperand(1));
-      if (AddrI && (!DT->dominates(AddrI, MatMul))) 
+      if (AddrI && (!DT->dominates(AddrI, MatMul)))
         return;
 
       emitSIMDTiling(MatMul, LoadOp0, LoadOp1, Store, FusedInsts);
@@ -1372,8 +1372,8 @@ public:
 
     const MatrixTy &Lhs = getMatrix(MatMul->getArgOperand(0), LShape, Builder);
     const MatrixTy &Rhs = getMatrix(MatMul->getArgOperand(1), RShape, Builder);
-    assert(Lhs.getElementType() == Rhs.getElementType() && 
-           "Matrix multiply argument element types do not match."); 
+    assert(Lhs.getElementType() == Rhs.getElementType() &&
+           "Matrix multiply argument element types do not match.");
 
     const unsigned R = LShape.NumRows;
     const unsigned C = RShape.NumColumns;
@@ -1381,8 +1381,8 @@ public:
 
     // Initialize the output
     MatrixTy Result(R, C, EltType);
-    assert(Lhs.getElementType() == Result.getElementType() && 
-           "Matrix multiply result element type does not match arguments."); 
+    assert(Lhs.getElementType() == Result.getElementType() &&
+           "Matrix multiply result element type does not match arguments.");
 
     bool AllowContract = AllowContractEnabled || (isa<FPMathOperator>(MatMul) &&
                                                   MatMul->hasAllowContract());
@@ -1500,40 +1500,40 @@ public:
     return true;
   }
 
-  /// Lower unary operators, if shape information is available. 
-  bool VisitUnaryOperator(UnaryOperator *Inst) { 
-    auto I = ShapeMap.find(Inst); 
-    if (I == ShapeMap.end()) 
-      return false; 
- 
-    Value *Op = Inst->getOperand(0); 
- 
-    IRBuilder<> Builder(Inst); 
-    ShapeInfo &Shape = I->second; 
- 
-    MatrixTy Result; 
-    MatrixTy M = getMatrix(Op, Shape, Builder); 
- 
-    // Helper to perform unary op on vectors. 
-    auto BuildVectorOp = [&Builder, Inst](Value *Op) { 
-      switch (Inst->getOpcode()) { 
-      case Instruction::FNeg: 
-        return Builder.CreateFNeg(Op); 
-      default: 
-        llvm_unreachable("Unsupported unary operator for matrix"); 
-      } 
-    }; 
- 
-    for (unsigned I = 0; I < Shape.getNumVectors(); ++I) 
-      Result.addVector(BuildVectorOp(M.getVector(I))); 
- 
-    finalizeLowering(Inst, 
-                     Result.addNumComputeOps(getNumOps(Result.getVectorTy()) * 
-                                             Result.getNumVectors()), 
-                     Builder); 
-    return true; 
-  } 
- 
+  /// Lower unary operators, if shape information is available.
+  bool VisitUnaryOperator(UnaryOperator *Inst) {
+    auto I = ShapeMap.find(Inst);
+    if (I == ShapeMap.end())
+      return false;
+
+    Value *Op = Inst->getOperand(0);
+
+    IRBuilder<> Builder(Inst);
+    ShapeInfo &Shape = I->second;
+
+    MatrixTy Result;
+    MatrixTy M = getMatrix(Op, Shape, Builder);
+
+    // Helper to perform unary op on vectors.
+    auto BuildVectorOp = [&Builder, Inst](Value *Op) {
+      switch (Inst->getOpcode()) {
+      case Instruction::FNeg:
+        return Builder.CreateFNeg(Op);
+      default:
+        llvm_unreachable("Unsupported unary operator for matrix");
+      }
+    };
+
+    for (unsigned I = 0; I < Shape.getNumVectors(); ++I)
+      Result.addVector(BuildVectorOp(M.getVector(I)));
+
+    finalizeLowering(Inst,
+                     Result.addNumComputeOps(getNumOps(Result.getVectorTy()) *
+                                             Result.getNumVectors()),
+                     Builder);
+    return true;
+  }
+
   /// Helper to linearize a matrix expression tree into a string. Currently
   /// matrix expressions are linarized by starting at an expression leaf and
   /// linearizing bottom up.
@@ -1598,7 +1598,7 @@ public:
       if (Value *Ptr = getPointerOperand(V))
         return getUnderlyingObjectThroughLoads(Ptr);
       else if (V->getType()->isPointerTy())
-        return getUnderlyingObject(V); 
+        return getUnderlyingObject(V);
       return V;
     }
 
@@ -1634,7 +1634,7 @@ public:
         write(StringRef(Intrinsic::getName(II->getIntrinsicID(), {}))
                   .drop_front(StringRef("llvm.matrix.").size()));
         write(".");
-        std::string Tmp; 
+        std::string Tmp;
         raw_string_ostream SS(Tmp);
 
         switch (II->getIntrinsicID()) {
@@ -1972,25 +1972,25 @@ public:
 PreservedAnalyses LowerMatrixIntrinsicsPass::run(Function &F,
                                                  FunctionAnalysisManager &AM) {
   auto &TTI = AM.getResult<TargetIRAnalysis>(F);
-  OptimizationRemarkEmitter *ORE = nullptr; 
-  AAResults *AA = nullptr; 
-  DominatorTree *DT = nullptr; 
-  LoopInfo *LI = nullptr; 
-
-  if (!Minimal) { 
-    ORE = &AM.getResult<OptimizationRemarkEmitterAnalysis>(F); 
-    AA = &AM.getResult<AAManager>(F); 
-    DT = &AM.getResult<DominatorTreeAnalysis>(F); 
-    LI = &AM.getResult<LoopAnalysis>(F); 
-  } 
- 
+  OptimizationRemarkEmitter *ORE = nullptr;
+  AAResults *AA = nullptr;
+  DominatorTree *DT = nullptr;
+  LoopInfo *LI = nullptr;
+
+  if (!Minimal) {
+    ORE = &AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+    AA = &AM.getResult<AAManager>(F);
+    DT = &AM.getResult<DominatorTreeAnalysis>(F);
+    LI = &AM.getResult<LoopAnalysis>(F);
+  }
+
   LowerMatrixIntrinsics LMT(F, TTI, AA, DT, LI, ORE);
   if (LMT.Visit()) {
     PreservedAnalyses PA;
-    if (!Minimal) { 
-      PA.preserve<LoopAnalysis>(); 
-      PA.preserve<DominatorTreeAnalysis>(); 
-    } 
+    if (!Minimal) {
+      PA.preserve<LoopAnalysis>();
+      PA.preserve<DominatorTreeAnalysis>();
+    }
     return PA;
   }
   return PreservedAnalyses::all();
@@ -2013,7 +2013,7 @@ public:
     auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
     auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
     auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
-    LowerMatrixIntrinsics LMT(F, TTI, &AA, &DT, &LI, &ORE); 
+    LowerMatrixIntrinsics LMT(F, TTI, &AA, &DT, &LI, &ORE);
     bool C = LMT.Visit();
     return C;
   }
@@ -2044,45 +2044,45 @@ INITIALIZE_PASS_END(LowerMatrixIntrinsicsLegacyPass, DEBUG_TYPE, pass_name,
 Pass *llvm::createLowerMatrixIntrinsicsPass() {
   return new LowerMatrixIntrinsicsLegacyPass();
 }
- 
-namespace { 
- 
-/// A lightweight version of the matrix lowering pass that only requires TTI. 
-/// Advanced features that require DT, AA or ORE like tiling are disabled. This 
-/// is used to lower matrix intrinsics if the main lowering pass is not run, for 
-/// example with -O0. 
-class LowerMatrixIntrinsicsMinimalLegacyPass : public FunctionPass { 
-public: 
-  static char ID; 
- 
-  LowerMatrixIntrinsicsMinimalLegacyPass() : FunctionPass(ID) { 
-    initializeLowerMatrixIntrinsicsMinimalLegacyPassPass( 
-        *PassRegistry::getPassRegistry()); 
-  } 
- 
-  bool runOnFunction(Function &F) override { 
-    auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 
-    LowerMatrixIntrinsics LMT(F, TTI, nullptr, nullptr, nullptr, nullptr); 
-    bool C = LMT.Visit(); 
-    return C; 
-  } 
- 
-  void getAnalysisUsage(AnalysisUsage &AU) const override { 
-    AU.addRequired<TargetTransformInfoWrapperPass>(); 
-    AU.setPreservesCFG(); 
-  } 
-}; 
-} // namespace 
- 
-static const char pass_name_minimal[] = "Lower the matrix intrinsics (minimal)"; 
-char LowerMatrixIntrinsicsMinimalLegacyPass::ID = 0; 
-INITIALIZE_PASS_BEGIN(LowerMatrixIntrinsicsMinimalLegacyPass, 
-                      "lower-matrix-intrinsics-minimal", pass_name_minimal, 
-                      false, false) 
-INITIALIZE_PASS_END(LowerMatrixIntrinsicsMinimalLegacyPass, 
-                    "lower-matrix-intrinsics-minimal", pass_name_minimal, false, 
-                    false) 
- 
-Pass *llvm::createLowerMatrixIntrinsicsMinimalPass() { 
-  return new LowerMatrixIntrinsicsMinimalLegacyPass(); 
-} 
+
+namespace {
+
+/// A lightweight version of the matrix lowering pass that only requires TTI.
+/// Advanced features that require DT, AA or ORE like tiling are disabled. This
+/// is used to lower matrix intrinsics if the main lowering pass is not run, for
+/// example with -O0.
+class LowerMatrixIntrinsicsMinimalLegacyPass : public FunctionPass {
+public:
+  static char ID;
+
+  LowerMatrixIntrinsicsMinimalLegacyPass() : FunctionPass(ID) {
+    initializeLowerMatrixIntrinsicsMinimalLegacyPassPass(
+        *PassRegistry::getPassRegistry());
+  }
+
+  bool runOnFunction(Function &F) override {
+    auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+    LowerMatrixIntrinsics LMT(F, TTI, nullptr, nullptr, nullptr, nullptr);
+    bool C = LMT.Visit();
+    return C;
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<TargetTransformInfoWrapperPass>();
+    AU.setPreservesCFG();
+  }
+};
+} // namespace
+
+static const char pass_name_minimal[] = "Lower the matrix intrinsics (minimal)";
+char LowerMatrixIntrinsicsMinimalLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(LowerMatrixIntrinsicsMinimalLegacyPass,
+                      "lower-matrix-intrinsics-minimal", pass_name_minimal,
+                      false, false)
+INITIALIZE_PASS_END(LowerMatrixIntrinsicsMinimalLegacyPass,
+                    "lower-matrix-intrinsics-minimal", pass_name_minimal, false,
+                    false)
+
+Pass *llvm::createLowerMatrixIntrinsicsMinimalPass() {
+  return new LowerMatrixIntrinsicsMinimalLegacyPass();
+}
author	shadchin <shadchin@yandex-team.ru>	2022-02-10 16:44:39 +0300
committer	Daniil Cherednik <dcherednik@yandex-team.ru>	2022-02-10 16:44:39 +0300
commit	e9656aae26e0358d5378e5b63dcac5c8dbe0e4d0 (patch)
tree	64175d5cadab313b3e7039ebaa06c5bc3295e274 /contrib/libs/llvm12/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
parent	2598ef1d0aee359b4b6d5fdd1758916d5907d04f (diff)
download	ydb-e9656aae26e0358d5378e5b63dcac5c8dbe0e4d0.tar.gz