aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/llvm12/lib/Target/ARM/MVETailPredication.cpp
diff options
context:
space:
mode:
authorshadchin <shadchin@yandex-team.ru>2022-02-10 16:44:39 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:44:39 +0300
commite9656aae26e0358d5378e5b63dcac5c8dbe0e4d0 (patch)
tree64175d5cadab313b3e7039ebaa06c5bc3295e274 /contrib/libs/llvm12/lib/Target/ARM/MVETailPredication.cpp
parent2598ef1d0aee359b4b6d5fdd1758916d5907d04f (diff)
downloadydb-e9656aae26e0358d5378e5b63dcac5c8dbe0e4d0.tar.gz
Restoring authorship annotation for <shadchin@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/libs/llvm12/lib/Target/ARM/MVETailPredication.cpp')
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/MVETailPredication.cpp322
1 files changed, 161 insertions, 161 deletions
diff --git a/contrib/libs/llvm12/lib/Target/ARM/MVETailPredication.cpp b/contrib/libs/llvm12/lib/Target/ARM/MVETailPredication.cpp
index 94e71f1d60..cccac55952 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/MVETailPredication.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/MVETailPredication.cpp
@@ -22,13 +22,13 @@
/// The HardwareLoops pass inserts intrinsics identifying loops that the
/// backend will attempt to convert into a low-overhead loop. The vectorizer is
/// responsible for generating a vectorized loop in which the lanes are
-/// predicated upon an get.active.lane.mask intrinsic. This pass looks at these
-/// get.active.lane.mask intrinsic and attempts to convert them to VCTP
-/// instructions. This will be picked up by the ARM Low-overhead loop pass later
-/// in the backend, which performs the final transformation to a DLSTP or WLSTP
-/// tail-predicated loop.
-//
-//===----------------------------------------------------------------------===//
+/// predicated upon an get.active.lane.mask intrinsic. This pass looks at these
+/// get.active.lane.mask intrinsic and attempts to convert them to VCTP
+/// instructions. This will be picked up by the ARM Low-overhead loop pass later
+/// in the backend, which performs the final transformation to a DLSTP or WLSTP
+/// tail-predicated loop.
+//
+//===----------------------------------------------------------------------===//
#include "ARM.h"
#include "ARMSubtarget.h"
@@ -47,7 +47,7 @@
#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
@@ -57,8 +57,8 @@ using namespace llvm;
#define DESC "Transform predicated vector loops to use MVE tail predication"
cl::opt<TailPredication::Mode> EnableTailPredication(
- "tail-predication", cl::desc("MVE tail-predication pass options"),
- cl::init(TailPredication::Enabled),
+ "tail-predication", cl::desc("MVE tail-predication pass options"),
+ cl::init(TailPredication::Enabled),
cl::values(clEnumValN(TailPredication::Disabled, "disabled",
"Don't tail-predicate loops"),
clEnumValN(TailPredication::EnabledNoReductions,
@@ -103,18 +103,18 @@ public:
bool runOnLoop(Loop *L, LPPassManager&) override;
private:
- /// Perform the relevant checks on the loop and convert active lane masks if
- /// possible.
- bool TryConvertActiveLaneMask(Value *TripCount);
+ /// Perform the relevant checks on the loop and convert active lane masks if
+ /// possible.
+ bool TryConvertActiveLaneMask(Value *TripCount);
- /// Perform several checks on the arguments of @llvm.get.active.lane.mask
- /// intrinsic. E.g., check that the loop induction variable and the element
- /// count are of the form we expect, and also perform overflow checks for
- /// the new expressions that are created.
- bool IsSafeActiveMask(IntrinsicInst *ActiveLaneMask, Value *TripCount);
+ /// Perform several checks on the arguments of @llvm.get.active.lane.mask
+ /// intrinsic. E.g., check that the loop induction variable and the element
+ /// count are of the form we expect, and also perform overflow checks for
+ /// the new expressions that are created.
+ bool IsSafeActiveMask(IntrinsicInst *ActiveLaneMask, Value *TripCount);
/// Insert the intrinsic to represent the effect of tail predication.
- void InsertVCTPIntrinsic(IntrinsicInst *ActiveLaneMask, Value *TripCount);
+ void InsertVCTPIntrinsic(IntrinsicInst *ActiveLaneMask, Value *TripCount);
/// Rematerialize the iteration count in exit blocks, which enables
/// ARMLowOverheadLoops to better optimise away loop update statements inside
@@ -155,7 +155,7 @@ bool MVETailPredication::runOnLoop(Loop *L, LPPassManager&) {
continue;
Intrinsic::ID ID = Call->getIntrinsicID();
- if (ID == Intrinsic::start_loop_iterations ||
+ if (ID == Intrinsic::start_loop_iterations ||
ID == Intrinsic::test_set_loop_iterations)
return cast<IntrinsicInst>(&I);
}
@@ -174,23 +174,23 @@ bool MVETailPredication::runOnLoop(Loop *L, LPPassManager&) {
return false;
}
- LLVM_DEBUG(dbgs() << "ARM TP: Running on Loop: " << *L << *Setup << "\n");
+ LLVM_DEBUG(dbgs() << "ARM TP: Running on Loop: " << *L << *Setup << "\n");
- bool Changed = TryConvertActiveLaneMask(Setup->getArgOperand(0));
+ bool Changed = TryConvertActiveLaneMask(Setup->getArgOperand(0));
- return Changed;
+ return Changed;
}
// The active lane intrinsic has this form:
//
-// @llvm.get.active.lane.mask(IV, TC)
+// @llvm.get.active.lane.mask(IV, TC)
//
// Here we perform checks that this intrinsic behaves as expected,
// which means:
//
-// 1) Check that the TripCount (TC) belongs to this loop (originally).
-// 2) The element count (TC) needs to be sufficiently large that the decrement
-// of element counter doesn't overflow, which means that we need to prove:
+// 1) Check that the TripCount (TC) belongs to this loop (originally).
+// 2) The element count (TC) needs to be sufficiently large that the decrement
+// of element counter doesn't overflow, which means that we need to prove:
// ceil(ElementCount / VectorWidth) >= TripCount
// by rounding up ElementCount up:
// ((ElementCount + (VectorWidth - 1)) / VectorWidth
@@ -199,122 +199,122 @@ bool MVETailPredication::runOnLoop(Loop *L, LPPassManager&) {
// 3) The IV must be an induction phi with an increment equal to the
// vector width.
bool MVETailPredication::IsSafeActiveMask(IntrinsicInst *ActiveLaneMask,
- Value *TripCount) {
+ Value *TripCount) {
bool ForceTailPredication =
EnableTailPredication == TailPredication::ForceEnabledNoReductions ||
EnableTailPredication == TailPredication::ForceEnabled;
- Value *ElemCount = ActiveLaneMask->getOperand(1);
- bool Changed = false;
- if (!L->makeLoopInvariant(ElemCount, Changed))
- return false;
-
- auto *EC= SE->getSCEV(ElemCount);
- auto *TC = SE->getSCEV(TripCount);
- int VectorWidth =
- cast<FixedVectorType>(ActiveLaneMask->getType())->getNumElements();
- if (VectorWidth != 4 && VectorWidth != 8 && VectorWidth != 16)
- return false;
- ConstantInt *ConstElemCount = nullptr;
-
- // 1) Smoke tests that the original scalar loop TripCount (TC) belongs to
- // this loop. The scalar tripcount corresponds the number of elements
- // processed by the loop, so we will refer to that from this point on.
- if (!SE->isLoopInvariant(EC, L)) {
- LLVM_DEBUG(dbgs() << "ARM TP: element count must be loop invariant.\n");
+ Value *ElemCount = ActiveLaneMask->getOperand(1);
+ bool Changed = false;
+ if (!L->makeLoopInvariant(ElemCount, Changed))
+ return false;
+
+ auto *EC= SE->getSCEV(ElemCount);
+ auto *TC = SE->getSCEV(TripCount);
+ int VectorWidth =
+ cast<FixedVectorType>(ActiveLaneMask->getType())->getNumElements();
+ if (VectorWidth != 4 && VectorWidth != 8 && VectorWidth != 16)
+ return false;
+ ConstantInt *ConstElemCount = nullptr;
+
+ // 1) Smoke tests that the original scalar loop TripCount (TC) belongs to
+ // this loop. The scalar tripcount corresponds the number of elements
+ // processed by the loop, so we will refer to that from this point on.
+ if (!SE->isLoopInvariant(EC, L)) {
+ LLVM_DEBUG(dbgs() << "ARM TP: element count must be loop invariant.\n");
return false;
}
- if ((ConstElemCount = dyn_cast<ConstantInt>(ElemCount))) {
- ConstantInt *TC = dyn_cast<ConstantInt>(TripCount);
- if (!TC) {
- LLVM_DEBUG(dbgs() << "ARM TP: Constant tripcount expected in "
- "set.loop.iterations\n");
- return false;
- }
-
- // Calculate 2 tripcount values and check that they are consistent with
- // each other. The TripCount for a predicated vector loop body is
- // ceil(ElementCount/Width), or floor((ElementCount+Width-1)/Width) as we
- // work it out here.
- uint64_t TC1 = TC->getZExtValue();
- uint64_t TC2 =
- (ConstElemCount->getZExtValue() + VectorWidth - 1) / VectorWidth;
-
- // If the tripcount values are inconsistent, we can't insert the VCTP and
- // trigger tail-predication; keep the intrinsic as a get.active.lane.mask
- // and legalize this.
- if (TC1 != TC2) {
- LLVM_DEBUG(dbgs() << "ARM TP: inconsistent constant tripcount values: "
- << TC1 << " from set.loop.iterations, and "
- << TC2 << " from get.active.lane.mask\n");
- return false;
- }
- } else if (!ForceTailPredication) {
- // 2) We need to prove that the sub expression that we create in the
- // tail-predicated loop body, which calculates the remaining elements to be
- // processed, is non-negative, i.e. it doesn't overflow:
- //
- // ((ElementCount + VectorWidth - 1) / VectorWidth) - TripCount >= 0
- //
- // This is true if:
- //
- // TripCount == (ElementCount + VectorWidth - 1) / VectorWidth
- //
- // which what we will be using here.
- //
- auto *VW = SE->getSCEV(ConstantInt::get(TripCount->getType(), VectorWidth));
- // ElementCount + (VW-1):
- auto *ECPlusVWMinus1 = SE->getAddExpr(EC,
- SE->getSCEV(ConstantInt::get(TripCount->getType(), VectorWidth - 1)));
-
- // Ceil = ElementCount + (VW-1) / VW
- auto *Ceil = SE->getUDivExpr(ECPlusVWMinus1, VW);
-
- // Prevent unused variable warnings with TC
- (void)TC;
- LLVM_DEBUG(
- dbgs() << "ARM TP: Analysing overflow behaviour for:\n";
- dbgs() << "ARM TP: - TripCount = "; TC->dump();
- dbgs() << "ARM TP: - ElemCount = "; EC->dump();
- dbgs() << "ARM TP: - VecWidth = " << VectorWidth << "\n";
- dbgs() << "ARM TP: - (ElemCount+VW-1) / VW = "; Ceil->dump();
- );
-
- // As an example, almost all the tripcount expressions (produced by the
- // vectoriser) look like this:
- //
- // TC = ((-4 + (4 * ((3 + %N) /u 4))<nuw>) /u 4)
- //
- // and "ElementCount + (VW-1) / VW":
- //
- // Ceil = ((3 + %N) /u 4)
- //
- // Check for equality of TC and Ceil by calculating SCEV expression
- // TC - Ceil and test it for zero.
- //
- bool Zero = SE->getMinusSCEV(
- SE->getBackedgeTakenCount(L),
- SE->getUDivExpr(SE->getAddExpr(SE->getMulExpr(Ceil, VW),
- SE->getNegativeSCEV(VW)),
- VW))
- ->isZero();
-
- if (!Zero) {
- LLVM_DEBUG(dbgs() << "ARM TP: possible overflow in sub expression.\n");
- return false;
- }
+ if ((ConstElemCount = dyn_cast<ConstantInt>(ElemCount))) {
+ ConstantInt *TC = dyn_cast<ConstantInt>(TripCount);
+ if (!TC) {
+ LLVM_DEBUG(dbgs() << "ARM TP: Constant tripcount expected in "
+ "set.loop.iterations\n");
+ return false;
+ }
+
+ // Calculate 2 tripcount values and check that they are consistent with
+ // each other. The TripCount for a predicated vector loop body is
+ // ceil(ElementCount/Width), or floor((ElementCount+Width-1)/Width) as we
+ // work it out here.
+ uint64_t TC1 = TC->getZExtValue();
+ uint64_t TC2 =
+ (ConstElemCount->getZExtValue() + VectorWidth - 1) / VectorWidth;
+
+ // If the tripcount values are inconsistent, we can't insert the VCTP and
+ // trigger tail-predication; keep the intrinsic as a get.active.lane.mask
+ // and legalize this.
+ if (TC1 != TC2) {
+ LLVM_DEBUG(dbgs() << "ARM TP: inconsistent constant tripcount values: "
+ << TC1 << " from set.loop.iterations, and "
+ << TC2 << " from get.active.lane.mask\n");
+ return false;
+ }
+ } else if (!ForceTailPredication) {
+ // 2) We need to prove that the sub expression that we create in the
+ // tail-predicated loop body, which calculates the remaining elements to be
+ // processed, is non-negative, i.e. it doesn't overflow:
+ //
+ // ((ElementCount + VectorWidth - 1) / VectorWidth) - TripCount >= 0
+ //
+ // This is true if:
+ //
+ // TripCount == (ElementCount + VectorWidth - 1) / VectorWidth
+ //
+ // which what we will be using here.
+ //
+ auto *VW = SE->getSCEV(ConstantInt::get(TripCount->getType(), VectorWidth));
+ // ElementCount + (VW-1):
+ auto *ECPlusVWMinus1 = SE->getAddExpr(EC,
+ SE->getSCEV(ConstantInt::get(TripCount->getType(), VectorWidth - 1)));
+
+ // Ceil = ElementCount + (VW-1) / VW
+ auto *Ceil = SE->getUDivExpr(ECPlusVWMinus1, VW);
+
+ // Prevent unused variable warnings with TC
+ (void)TC;
+ LLVM_DEBUG(
+ dbgs() << "ARM TP: Analysing overflow behaviour for:\n";
+ dbgs() << "ARM TP: - TripCount = "; TC->dump();
+ dbgs() << "ARM TP: - ElemCount = "; EC->dump();
+ dbgs() << "ARM TP: - VecWidth = " << VectorWidth << "\n";
+ dbgs() << "ARM TP: - (ElemCount+VW-1) / VW = "; Ceil->dump();
+ );
+
+ // As an example, almost all the tripcount expressions (produced by the
+ // vectoriser) look like this:
+ //
+ // TC = ((-4 + (4 * ((3 + %N) /u 4))<nuw>) /u 4)
+ //
+ // and "ElementCount + (VW-1) / VW":
+ //
+ // Ceil = ((3 + %N) /u 4)
+ //
+ // Check for equality of TC and Ceil by calculating SCEV expression
+ // TC - Ceil and test it for zero.
+ //
+ bool Zero = SE->getMinusSCEV(
+ SE->getBackedgeTakenCount(L),
+ SE->getUDivExpr(SE->getAddExpr(SE->getMulExpr(Ceil, VW),
+ SE->getNegativeSCEV(VW)),
+ VW))
+ ->isZero();
+
+ if (!Zero) {
+ LLVM_DEBUG(dbgs() << "ARM TP: possible overflow in sub expression.\n");
+ return false;
+ }
}
- // 3) Find out if IV is an induction phi. Note that we can't use Loop
+ // 3) Find out if IV is an induction phi. Note that we can't use Loop
// helpers here to get the induction variable, because the hardware loop is
- // no longer in loopsimplify form, and also the hwloop intrinsic uses a
- // different counter. Using SCEV, we check that the induction is of the
+ // no longer in loopsimplify form, and also the hwloop intrinsic uses a
+ // different counter. Using SCEV, we check that the induction is of the
// form i = i + 4, where the increment must be equal to the VectorWidth.
auto *IV = ActiveLaneMask->getOperand(0);
auto *IVExpr = SE->getSCEV(IV);
auto *AddExpr = dyn_cast<SCEVAddRecExpr>(IVExpr);
-
+
if (!AddExpr) {
LLVM_DEBUG(dbgs() << "ARM TP: induction not an add expr: "; IVExpr->dump());
return false;
@@ -324,11 +324,11 @@ bool MVETailPredication::IsSafeActiveMask(IntrinsicInst *ActiveLaneMask,
LLVM_DEBUG(dbgs() << "ARM TP: phi not part of this loop\n");
return false;
}
- auto *Base = dyn_cast<SCEVConstant>(AddExpr->getOperand(0));
- if (!Base || !Base->isZero()) {
- LLVM_DEBUG(dbgs() << "ARM TP: induction base is not 0\n");
- return false;
- }
+ auto *Base = dyn_cast<SCEVConstant>(AddExpr->getOperand(0));
+ if (!Base || !Base->isZero()) {
+ LLVM_DEBUG(dbgs() << "ARM TP: induction base is not 0\n");
+ return false;
+ }
auto *Step = dyn_cast<SCEVConstant>(AddExpr->getOperand(1));
if (!Step) {
LLVM_DEBUG(dbgs() << "ARM TP: induction step is not a constant: ";
@@ -339,29 +339,29 @@ bool MVETailPredication::IsSafeActiveMask(IntrinsicInst *ActiveLaneMask,
if (VectorWidth == StepValue)
return true;
- LLVM_DEBUG(dbgs() << "ARM TP: Step value " << StepValue
- << " doesn't match vector width " << VectorWidth << "\n");
+ LLVM_DEBUG(dbgs() << "ARM TP: Step value " << StepValue
+ << " doesn't match vector width " << VectorWidth << "\n");
return false;
}
void MVETailPredication::InsertVCTPIntrinsic(IntrinsicInst *ActiveLaneMask,
- Value *TripCount) {
+ Value *TripCount) {
IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
Module *M = L->getHeader()->getModule();
Type *Ty = IntegerType::get(M->getContext(), 32);
- unsigned VectorWidth =
- cast<FixedVectorType>(ActiveLaneMask->getType())->getNumElements();
+ unsigned VectorWidth =
+ cast<FixedVectorType>(ActiveLaneMask->getType())->getNumElements();
// Insert a phi to count the number of elements processed by the loop.
- Builder.SetInsertPoint(L->getHeader()->getFirstNonPHI());
+ Builder.SetInsertPoint(L->getHeader()->getFirstNonPHI());
PHINode *Processed = Builder.CreatePHI(Ty, 2);
- Processed->addIncoming(ActiveLaneMask->getOperand(1), L->getLoopPreheader());
+ Processed->addIncoming(ActiveLaneMask->getOperand(1), L->getLoopPreheader());
- // Replace @llvm.get.active.mask() with the ARM specific VCTP intrinic, and
- // thus represent the effect of tail predication.
+ // Replace @llvm.get.active.mask() with the ARM specific VCTP intrinic, and
+ // thus represent the effect of tail predication.
Builder.SetInsertPoint(ActiveLaneMask);
- ConstantInt *Factor = ConstantInt::get(cast<IntegerType>(Ty), VectorWidth);
+ ConstantInt *Factor = ConstantInt::get(cast<IntegerType>(Ty), VectorWidth);
Intrinsic::ID VCTPID;
switch (VectorWidth) {
@@ -390,36 +390,36 @@ void MVETailPredication::InsertVCTPIntrinsic(IntrinsicInst *ActiveLaneMask,
<< "ARM TP: Inserted VCTP: " << *VCTPCall << "\n");
}
-bool MVETailPredication::TryConvertActiveLaneMask(Value *TripCount) {
- SmallVector<IntrinsicInst *, 4> ActiveLaneMasks;
- for (auto *BB : L->getBlocks())
- for (auto &I : *BB)
- if (auto *Int = dyn_cast<IntrinsicInst>(&I))
- if (Int->getIntrinsicID() == Intrinsic::get_active_lane_mask)
- ActiveLaneMasks.push_back(Int);
-
- if (ActiveLaneMasks.empty())
+bool MVETailPredication::TryConvertActiveLaneMask(Value *TripCount) {
+ SmallVector<IntrinsicInst *, 4> ActiveLaneMasks;
+ for (auto *BB : L->getBlocks())
+ for (auto &I : *BB)
+ if (auto *Int = dyn_cast<IntrinsicInst>(&I))
+ if (Int->getIntrinsicID() == Intrinsic::get_active_lane_mask)
+ ActiveLaneMasks.push_back(Int);
+
+ if (ActiveLaneMasks.empty())
return false;
LLVM_DEBUG(dbgs() << "ARM TP: Found predicated vector loop.\n");
- for (auto *ActiveLaneMask : ActiveLaneMasks) {
+ for (auto *ActiveLaneMask : ActiveLaneMasks) {
LLVM_DEBUG(dbgs() << "ARM TP: Found active lane mask: "
<< *ActiveLaneMask << "\n");
- if (!IsSafeActiveMask(ActiveLaneMask, TripCount)) {
+ if (!IsSafeActiveMask(ActiveLaneMask, TripCount)) {
LLVM_DEBUG(dbgs() << "ARM TP: Not safe to insert VCTP.\n");
return false;
}
LLVM_DEBUG(dbgs() << "ARM TP: Safe to insert VCTP.\n");
- InsertVCTPIntrinsic(ActiveLaneMask, TripCount);
+ InsertVCTPIntrinsic(ActiveLaneMask, TripCount);
}
- // Remove dead instructions and now dead phis.
- for (auto *II : ActiveLaneMasks)
- RecursivelyDeleteTriviallyDeadInstructions(II);
- for (auto I : L->blocks())
- DeleteDeadPHIs(I);
+ // Remove dead instructions and now dead phis.
+ for (auto *II : ActiveLaneMasks)
+ RecursivelyDeleteTriviallyDeadInstructions(II);
+ for (auto I : L->blocks())
+ DeleteDeadPHIs(I);
return true;
}