aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/llvm12/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
diff options
context:
space:
mode:
authorshadchin <shadchin@yandex-team.ru>2022-02-10 16:44:30 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:44:30 +0300
commit2598ef1d0aee359b4b6d5fdd1758916d5907d04f (patch)
tree012bb94d777798f1f56ac1cec429509766d05181 /contrib/libs/llvm12/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
parent6751af0b0c1b952fede40b19b71da8025b5d8bcf (diff)
downloadydb-2598ef1d0aee359b4b6d5fdd1758916d5907d04f.tar.gz
Restoring authorship annotation for <shadchin@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/llvm12/lib/Target/PowerPC/PPCTargetTransformInfo.cpp')
-rw-r--r--contrib/libs/llvm12/lib/Target/PowerPC/PPCTargetTransformInfo.cpp682
1 files changed, 341 insertions, 341 deletions
diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/contrib/libs/llvm12/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index c90ff8b7d5..9f100e63b0 100644
--- a/contrib/libs/llvm12/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/contrib/libs/llvm12/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -8,19 +8,19 @@
#include "PPCTargetTransformInfo.h"
#include "llvm/Analysis/CodeMetrics.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/BasicTTIImpl.h"
#include "llvm/CodeGen/CostTable.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetSchedule.h"
-#include "llvm/IR/IntrinsicsPowerPC.h"
+#include "llvm/IR/IntrinsicsPowerPC.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/KnownBits.h"
-#include "llvm/Transforms/InstCombine/InstCombiner.h"
-#include "llvm/Transforms/Utils/Local.h"
-
+#include "llvm/Support/KnownBits.h"
+#include "llvm/Transforms/InstCombine/InstCombiner.h"
+#include "llvm/Transforms/Utils/Local.h"
+
using namespace llvm;
#define DEBUG_TYPE "ppctti"
@@ -28,7 +28,7 @@ using namespace llvm;
static cl::opt<bool> DisablePPCConstHoist("disable-ppc-constant-hoisting",
cl::desc("disable constant hoisting on PPC"), cl::init(false), cl::Hidden);
-// This is currently only used for the data prefetch pass
+// This is currently only used for the data prefetch pass
static cl::opt<unsigned>
CacheLineSize("ppc-loop-prefetch-cache-line", cl::Hidden, cl::init(64),
cl::desc("The loop prefetch cache line size"));
@@ -64,109 +64,109 @@ PPCTTIImpl::getPopcntSupport(unsigned TyWidth) {
return TTI::PSK_Software;
}
-Optional<Instruction *>
-PPCTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
- Intrinsic::ID IID = II.getIntrinsicID();
- switch (IID) {
- default:
- break;
- case Intrinsic::ppc_altivec_lvx:
- case Intrinsic::ppc_altivec_lvxl:
- // Turn PPC lvx -> load if the pointer is known aligned.
- if (getOrEnforceKnownAlignment(
- II.getArgOperand(0), Align(16), IC.getDataLayout(), &II,
- &IC.getAssumptionCache(), &IC.getDominatorTree()) >= 16) {
- Value *Ptr = IC.Builder.CreateBitCast(
- II.getArgOperand(0), PointerType::getUnqual(II.getType()));
- return new LoadInst(II.getType(), Ptr, "", false, Align(16));
- }
- break;
- case Intrinsic::ppc_vsx_lxvw4x:
- case Intrinsic::ppc_vsx_lxvd2x: {
- // Turn PPC VSX loads into normal loads.
- Value *Ptr = IC.Builder.CreateBitCast(II.getArgOperand(0),
- PointerType::getUnqual(II.getType()));
- return new LoadInst(II.getType(), Ptr, Twine(""), false, Align(1));
- }
- case Intrinsic::ppc_altivec_stvx:
- case Intrinsic::ppc_altivec_stvxl:
- // Turn stvx -> store if the pointer is known aligned.
- if (getOrEnforceKnownAlignment(
- II.getArgOperand(1), Align(16), IC.getDataLayout(), &II,
- &IC.getAssumptionCache(), &IC.getDominatorTree()) >= 16) {
- Type *OpPtrTy = PointerType::getUnqual(II.getArgOperand(0)->getType());
- Value *Ptr = IC.Builder.CreateBitCast(II.getArgOperand(1), OpPtrTy);
- return new StoreInst(II.getArgOperand(0), Ptr, false, Align(16));
- }
- break;
- case Intrinsic::ppc_vsx_stxvw4x:
- case Intrinsic::ppc_vsx_stxvd2x: {
- // Turn PPC VSX stores into normal stores.
- Type *OpPtrTy = PointerType::getUnqual(II.getArgOperand(0)->getType());
- Value *Ptr = IC.Builder.CreateBitCast(II.getArgOperand(1), OpPtrTy);
- return new StoreInst(II.getArgOperand(0), Ptr, false, Align(1));
- }
- case Intrinsic::ppc_altivec_vperm:
- // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
- // Note that ppc_altivec_vperm has a big-endian bias, so when creating
- // a vectorshuffle for little endian, we must undo the transformation
- // performed on vec_perm in altivec.h. That is, we must complement
- // the permutation mask with respect to 31 and reverse the order of
- // V1 and V2.
- if (Constant *Mask = dyn_cast<Constant>(II.getArgOperand(2))) {
- assert(cast<FixedVectorType>(Mask->getType())->getNumElements() == 16 &&
- "Bad type for intrinsic!");
-
- // Check that all of the elements are integer constants or undefs.
- bool AllEltsOk = true;
- for (unsigned i = 0; i != 16; ++i) {
- Constant *Elt = Mask->getAggregateElement(i);
- if (!Elt || !(isa<ConstantInt>(Elt) || isa<UndefValue>(Elt))) {
- AllEltsOk = false;
- break;
- }
- }
-
- if (AllEltsOk) {
- // Cast the input vectors to byte vectors.
- Value *Op0 =
- IC.Builder.CreateBitCast(II.getArgOperand(0), Mask->getType());
- Value *Op1 =
- IC.Builder.CreateBitCast(II.getArgOperand(1), Mask->getType());
- Value *Result = UndefValue::get(Op0->getType());
-
- // Only extract each element once.
- Value *ExtractedElts[32];
- memset(ExtractedElts, 0, sizeof(ExtractedElts));
-
- for (unsigned i = 0; i != 16; ++i) {
- if (isa<UndefValue>(Mask->getAggregateElement(i)))
- continue;
- unsigned Idx =
- cast<ConstantInt>(Mask->getAggregateElement(i))->getZExtValue();
- Idx &= 31; // Match the hardware behavior.
- if (DL.isLittleEndian())
- Idx = 31 - Idx;
-
- if (!ExtractedElts[Idx]) {
- Value *Op0ToUse = (DL.isLittleEndian()) ? Op1 : Op0;
- Value *Op1ToUse = (DL.isLittleEndian()) ? Op0 : Op1;
- ExtractedElts[Idx] = IC.Builder.CreateExtractElement(
- Idx < 16 ? Op0ToUse : Op1ToUse, IC.Builder.getInt32(Idx & 15));
- }
-
- // Insert this value into the result vector.
- Result = IC.Builder.CreateInsertElement(Result, ExtractedElts[Idx],
- IC.Builder.getInt32(i));
- }
- return CastInst::Create(Instruction::BitCast, Result, II.getType());
- }
- }
- break;
- }
- return None;
-}
-
+Optional<Instruction *>
+PPCTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
+ Intrinsic::ID IID = II.getIntrinsicID();
+ switch (IID) {
+ default:
+ break;
+ case Intrinsic::ppc_altivec_lvx:
+ case Intrinsic::ppc_altivec_lvxl:
+ // Turn PPC lvx -> load if the pointer is known aligned.
+ if (getOrEnforceKnownAlignment(
+ II.getArgOperand(0), Align(16), IC.getDataLayout(), &II,
+ &IC.getAssumptionCache(), &IC.getDominatorTree()) >= 16) {
+ Value *Ptr = IC.Builder.CreateBitCast(
+ II.getArgOperand(0), PointerType::getUnqual(II.getType()));
+ return new LoadInst(II.getType(), Ptr, "", false, Align(16));
+ }
+ break;
+ case Intrinsic::ppc_vsx_lxvw4x:
+ case Intrinsic::ppc_vsx_lxvd2x: {
+ // Turn PPC VSX loads into normal loads.
+ Value *Ptr = IC.Builder.CreateBitCast(II.getArgOperand(0),
+ PointerType::getUnqual(II.getType()));
+ return new LoadInst(II.getType(), Ptr, Twine(""), false, Align(1));
+ }
+ case Intrinsic::ppc_altivec_stvx:
+ case Intrinsic::ppc_altivec_stvxl:
+ // Turn stvx -> store if the pointer is known aligned.
+ if (getOrEnforceKnownAlignment(
+ II.getArgOperand(1), Align(16), IC.getDataLayout(), &II,
+ &IC.getAssumptionCache(), &IC.getDominatorTree()) >= 16) {
+ Type *OpPtrTy = PointerType::getUnqual(II.getArgOperand(0)->getType());
+ Value *Ptr = IC.Builder.CreateBitCast(II.getArgOperand(1), OpPtrTy);
+ return new StoreInst(II.getArgOperand(0), Ptr, false, Align(16));
+ }
+ break;
+ case Intrinsic::ppc_vsx_stxvw4x:
+ case Intrinsic::ppc_vsx_stxvd2x: {
+ // Turn PPC VSX stores into normal stores.
+ Type *OpPtrTy = PointerType::getUnqual(II.getArgOperand(0)->getType());
+ Value *Ptr = IC.Builder.CreateBitCast(II.getArgOperand(1), OpPtrTy);
+ return new StoreInst(II.getArgOperand(0), Ptr, false, Align(1));
+ }
+ case Intrinsic::ppc_altivec_vperm:
+ // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
+ // Note that ppc_altivec_vperm has a big-endian bias, so when creating
+ // a vectorshuffle for little endian, we must undo the transformation
+ // performed on vec_perm in altivec.h. That is, we must complement
+ // the permutation mask with respect to 31 and reverse the order of
+ // V1 and V2.
+ if (Constant *Mask = dyn_cast<Constant>(II.getArgOperand(2))) {
+ assert(cast<FixedVectorType>(Mask->getType())->getNumElements() == 16 &&
+ "Bad type for intrinsic!");
+
+ // Check that all of the elements are integer constants or undefs.
+ bool AllEltsOk = true;
+ for (unsigned i = 0; i != 16; ++i) {
+ Constant *Elt = Mask->getAggregateElement(i);
+ if (!Elt || !(isa<ConstantInt>(Elt) || isa<UndefValue>(Elt))) {
+ AllEltsOk = false;
+ break;
+ }
+ }
+
+ if (AllEltsOk) {
+ // Cast the input vectors to byte vectors.
+ Value *Op0 =
+ IC.Builder.CreateBitCast(II.getArgOperand(0), Mask->getType());
+ Value *Op1 =
+ IC.Builder.CreateBitCast(II.getArgOperand(1), Mask->getType());
+ Value *Result = UndefValue::get(Op0->getType());
+
+ // Only extract each element once.
+ Value *ExtractedElts[32];
+ memset(ExtractedElts, 0, sizeof(ExtractedElts));
+
+ for (unsigned i = 0; i != 16; ++i) {
+ if (isa<UndefValue>(Mask->getAggregateElement(i)))
+ continue;
+ unsigned Idx =
+ cast<ConstantInt>(Mask->getAggregateElement(i))->getZExtValue();
+ Idx &= 31; // Match the hardware behavior.
+ if (DL.isLittleEndian())
+ Idx = 31 - Idx;
+
+ if (!ExtractedElts[Idx]) {
+ Value *Op0ToUse = (DL.isLittleEndian()) ? Op1 : Op0;
+ Value *Op1ToUse = (DL.isLittleEndian()) ? Op0 : Op1;
+ ExtractedElts[Idx] = IC.Builder.CreateExtractElement(
+ Idx < 16 ? Op0ToUse : Op1ToUse, IC.Builder.getInt32(Idx & 15));
+ }
+
+ // Insert this value into the result vector.
+ Result = IC.Builder.CreateInsertElement(Result, ExtractedElts[Idx],
+ IC.Builder.getInt32(i));
+ }
+ return CastInst::Create(Instruction::BitCast, Result, II.getType());
+ }
+ }
+ break;
+ }
+ return None;
+}
+
int PPCTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
TTI::TargetCostKind CostKind) {
if (DisablePPCConstHoist)
@@ -234,10 +234,10 @@ int PPCTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
int PPCTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
const APInt &Imm, Type *Ty,
- TTI::TargetCostKind CostKind,
- Instruction *Inst) {
+ TTI::TargetCostKind CostKind,
+ Instruction *Inst) {
if (DisablePPCConstHoist)
- return BaseT::getIntImmCostInst(Opcode, Idx, Imm, Ty, CostKind, Inst);
+ return BaseT::getIntImmCostInst(Opcode, Idx, Imm, Ty, CostKind, Inst);
assert(Ty->isIntegerTy());
@@ -335,29 +335,29 @@ PPCTTIImpl::getUserCost(const User *U, ArrayRef<const Value *> Operands,
return BaseT::getUserCost(U, Operands, CostKind);
}
-// Determining the address of a TLS variable results in a function call in
-// certain TLS models.
-static bool memAddrUsesCTR(const Value *MemAddr, const PPCTargetMachine &TM,
- SmallPtrSetImpl<const Value *> &Visited) {
- // No need to traverse again if we already checked this operand.
- if (!Visited.insert(MemAddr).second)
- return false;
- const auto *GV = dyn_cast<GlobalValue>(MemAddr);
- if (!GV) {
- // Recurse to check for constants that refer to TLS global variables.
- if (const auto *CV = dyn_cast<Constant>(MemAddr))
- for (const auto &CO : CV->operands())
- if (memAddrUsesCTR(CO, TM, Visited))
- return true;
- return false;
- }
-
- if (!GV->isThreadLocal())
- return false;
- TLSModel::Model Model = TM.getTLSModel(GV);
- return Model == TLSModel::GeneralDynamic || Model == TLSModel::LocalDynamic;
-}
-
+// Determining the address of a TLS variable results in a function call in
+// certain TLS models.
+static bool memAddrUsesCTR(const Value *MemAddr, const PPCTargetMachine &TM,
+ SmallPtrSetImpl<const Value *> &Visited) {
+ // No need to traverse again if we already checked this operand.
+ if (!Visited.insert(MemAddr).second)
+ return false;
+ const auto *GV = dyn_cast<GlobalValue>(MemAddr);
+ if (!GV) {
+ // Recurse to check for constants that refer to TLS global variables.
+ if (const auto *CV = dyn_cast<Constant>(MemAddr))
+ for (const auto &CO : CV->operands())
+ if (memAddrUsesCTR(CO, TM, Visited))
+ return true;
+ return false;
+ }
+
+ if (!GV->isThreadLocal())
+ return false;
+ TLSModel::Model Model = TM.getTLSModel(GV);
+ return Model == TLSModel::GeneralDynamic || Model == TLSModel::LocalDynamic;
+}
+
bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo,
SmallPtrSetImpl<const Value *> &Visited) {
const PPCTargetMachine &TM = ST->getTargetMachine();
@@ -383,34 +383,34 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo,
return false;
};
- auto supportedHalfPrecisionOp = [](Instruction *Inst) {
- switch (Inst->getOpcode()) {
- default:
- return false;
- case Instruction::FPTrunc:
- case Instruction::FPExt:
- case Instruction::Load:
- case Instruction::Store:
- case Instruction::FPToUI:
- case Instruction::UIToFP:
- case Instruction::FPToSI:
- case Instruction::SIToFP:
- return true;
- }
- };
-
+ auto supportedHalfPrecisionOp = [](Instruction *Inst) {
+ switch (Inst->getOpcode()) {
+ default:
+ return false;
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::Load:
+ case Instruction::Store:
+ case Instruction::FPToUI:
+ case Instruction::UIToFP:
+ case Instruction::FPToSI:
+ case Instruction::SIToFP:
+ return true;
+ }
+ };
+
for (BasicBlock::iterator J = BB->begin(), JE = BB->end();
J != JE; ++J) {
- // There are no direct operations on half precision so assume that
- // anything with that type requires a call except for a few select
- // operations with Power9.
- if (Instruction *CurrInst = dyn_cast<Instruction>(J)) {
- for (const auto &Op : CurrInst->operands()) {
- if (Op->getType()->getScalarType()->isHalfTy() ||
- CurrInst->getType()->getScalarType()->isHalfTy())
- return !(ST->isISA3_0() && supportedHalfPrecisionOp(CurrInst));
- }
- }
+ // There are no direct operations on half precision so assume that
+ // anything with that type requires a call except for a few select
+ // operations with Power9.
+ if (Instruction *CurrInst = dyn_cast<Instruction>(J)) {
+ for (const auto &Op : CurrInst->operands()) {
+ if (Op->getType()->getScalarType()->isHalfTy() ||
+ CurrInst->getType()->getScalarType()->isHalfTy())
+ return !(ST->isISA3_0() && supportedHalfPrecisionOp(CurrInst));
+ }
+ }
if (CallInst *CI = dyn_cast<CallInst>(J)) {
// Inline ASM is okay, unless it clobbers the ctr register.
if (InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledOperand())) {
@@ -432,30 +432,30 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo,
case Intrinsic::loop_decrement:
return true;
- // Binary operations on 128-bit value will use CTR.
- case Intrinsic::experimental_constrained_fadd:
- case Intrinsic::experimental_constrained_fsub:
- case Intrinsic::experimental_constrained_fmul:
- case Intrinsic::experimental_constrained_fdiv:
- case Intrinsic::experimental_constrained_frem:
- if (F->getType()->getScalarType()->isFP128Ty() ||
- F->getType()->getScalarType()->isPPC_FP128Ty())
- return true;
- break;
-
- case Intrinsic::experimental_constrained_fptosi:
- case Intrinsic::experimental_constrained_fptoui:
- case Intrinsic::experimental_constrained_sitofp:
- case Intrinsic::experimental_constrained_uitofp: {
- Type *SrcType = CI->getArgOperand(0)->getType()->getScalarType();
- Type *DstType = CI->getType()->getScalarType();
- if (SrcType->isPPC_FP128Ty() || DstType->isPPC_FP128Ty() ||
- isLargeIntegerTy(!TM.isPPC64(), SrcType) ||
- isLargeIntegerTy(!TM.isPPC64(), DstType))
- return true;
- break;
- }
-
+ // Binary operations on 128-bit value will use CTR.
+ case Intrinsic::experimental_constrained_fadd:
+ case Intrinsic::experimental_constrained_fsub:
+ case Intrinsic::experimental_constrained_fmul:
+ case Intrinsic::experimental_constrained_fdiv:
+ case Intrinsic::experimental_constrained_frem:
+ if (F->getType()->getScalarType()->isFP128Ty() ||
+ F->getType()->getScalarType()->isPPC_FP128Ty())
+ return true;
+ break;
+
+ case Intrinsic::experimental_constrained_fptosi:
+ case Intrinsic::experimental_constrained_fptoui:
+ case Intrinsic::experimental_constrained_sitofp:
+ case Intrinsic::experimental_constrained_uitofp: {
+ Type *SrcType = CI->getArgOperand(0)->getType()->getScalarType();
+ Type *DstType = CI->getType()->getScalarType();
+ if (SrcType->isPPC_FP128Ty() || DstType->isPPC_FP128Ty() ||
+ isLargeIntegerTy(!TM.isPPC64(), SrcType) ||
+ isLargeIntegerTy(!TM.isPPC64(), DstType))
+ return true;
+ break;
+ }
+
// Exclude eh_sjlj_setjmp; we don't need to exclude eh_sjlj_longjmp
// because, although it does clobber the counter register, the
// control can't then return to inside the loop unless there is also
@@ -474,15 +474,15 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo,
case Intrinsic::pow:
case Intrinsic::sin:
case Intrinsic::cos:
- case Intrinsic::experimental_constrained_powi:
- case Intrinsic::experimental_constrained_log:
- case Intrinsic::experimental_constrained_log2:
- case Intrinsic::experimental_constrained_log10:
- case Intrinsic::experimental_constrained_exp:
- case Intrinsic::experimental_constrained_exp2:
- case Intrinsic::experimental_constrained_pow:
- case Intrinsic::experimental_constrained_sin:
- case Intrinsic::experimental_constrained_cos:
+ case Intrinsic::experimental_constrained_powi:
+ case Intrinsic::experimental_constrained_log:
+ case Intrinsic::experimental_constrained_log2:
+ case Intrinsic::experimental_constrained_log10:
+ case Intrinsic::experimental_constrained_exp:
+ case Intrinsic::experimental_constrained_exp2:
+ case Intrinsic::experimental_constrained_pow:
+ case Intrinsic::experimental_constrained_sin:
+ case Intrinsic::experimental_constrained_cos:
return true;
case Intrinsic::copysign:
if (CI->getArgOperand(0)->getType()->getScalarType()->
@@ -504,54 +504,54 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo,
case Intrinsic::llround: Opcode = ISD::LLROUND; break;
case Intrinsic::minnum: Opcode = ISD::FMINNUM; break;
case Intrinsic::maxnum: Opcode = ISD::FMAXNUM; break;
- case Intrinsic::experimental_constrained_fcmp:
- Opcode = ISD::STRICT_FSETCC;
- break;
- case Intrinsic::experimental_constrained_fcmps:
- Opcode = ISD::STRICT_FSETCCS;
- break;
- case Intrinsic::experimental_constrained_fma:
- Opcode = ISD::STRICT_FMA;
- break;
- case Intrinsic::experimental_constrained_sqrt:
- Opcode = ISD::STRICT_FSQRT;
- break;
- case Intrinsic::experimental_constrained_floor:
- Opcode = ISD::STRICT_FFLOOR;
- break;
- case Intrinsic::experimental_constrained_ceil:
- Opcode = ISD::STRICT_FCEIL;
- break;
- case Intrinsic::experimental_constrained_trunc:
- Opcode = ISD::STRICT_FTRUNC;
- break;
- case Intrinsic::experimental_constrained_rint:
- Opcode = ISD::STRICT_FRINT;
- break;
- case Intrinsic::experimental_constrained_lrint:
- Opcode = ISD::STRICT_LRINT;
- break;
- case Intrinsic::experimental_constrained_llrint:
- Opcode = ISD::STRICT_LLRINT;
- break;
- case Intrinsic::experimental_constrained_nearbyint:
- Opcode = ISD::STRICT_FNEARBYINT;
- break;
- case Intrinsic::experimental_constrained_round:
- Opcode = ISD::STRICT_FROUND;
- break;
- case Intrinsic::experimental_constrained_lround:
- Opcode = ISD::STRICT_LROUND;
- break;
- case Intrinsic::experimental_constrained_llround:
- Opcode = ISD::STRICT_LLROUND;
- break;
- case Intrinsic::experimental_constrained_minnum:
- Opcode = ISD::STRICT_FMINNUM;
- break;
- case Intrinsic::experimental_constrained_maxnum:
- Opcode = ISD::STRICT_FMAXNUM;
- break;
+ case Intrinsic::experimental_constrained_fcmp:
+ Opcode = ISD::STRICT_FSETCC;
+ break;
+ case Intrinsic::experimental_constrained_fcmps:
+ Opcode = ISD::STRICT_FSETCCS;
+ break;
+ case Intrinsic::experimental_constrained_fma:
+ Opcode = ISD::STRICT_FMA;
+ break;
+ case Intrinsic::experimental_constrained_sqrt:
+ Opcode = ISD::STRICT_FSQRT;
+ break;
+ case Intrinsic::experimental_constrained_floor:
+ Opcode = ISD::STRICT_FFLOOR;
+ break;
+ case Intrinsic::experimental_constrained_ceil:
+ Opcode = ISD::STRICT_FCEIL;
+ break;
+ case Intrinsic::experimental_constrained_trunc:
+ Opcode = ISD::STRICT_FTRUNC;
+ break;
+ case Intrinsic::experimental_constrained_rint:
+ Opcode = ISD::STRICT_FRINT;
+ break;
+ case Intrinsic::experimental_constrained_lrint:
+ Opcode = ISD::STRICT_LRINT;
+ break;
+ case Intrinsic::experimental_constrained_llrint:
+ Opcode = ISD::STRICT_LLRINT;
+ break;
+ case Intrinsic::experimental_constrained_nearbyint:
+ Opcode = ISD::STRICT_FNEARBYINT;
+ break;
+ case Intrinsic::experimental_constrained_round:
+ Opcode = ISD::STRICT_FROUND;
+ break;
+ case Intrinsic::experimental_constrained_lround:
+ Opcode = ISD::STRICT_LROUND;
+ break;
+ case Intrinsic::experimental_constrained_llround:
+ Opcode = ISD::STRICT_LLROUND;
+ break;
+ case Intrinsic::experimental_constrained_minnum:
+ Opcode = ISD::STRICT_FMINNUM;
+ break;
+ case Intrinsic::experimental_constrained_maxnum:
+ Opcode = ISD::STRICT_FMAXNUM;
+ break;
case Intrinsic::umul_with_overflow: Opcode = ISD::UMULO; break;
case Intrinsic::smul_with_overflow: Opcode = ISD::SMULO; break;
}
@@ -700,7 +700,7 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo,
}
for (Value *Operand : J->operands())
- if (memAddrUsesCTR(Operand, TM, Visited))
+ if (memAddrUsesCTR(Operand, TM, Visited))
return true;
}
@@ -760,24 +760,24 @@ bool PPCTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
}
}
- // If an exit block has a PHI that accesses a TLS variable as one of the
- // incoming values from the loop, we cannot produce a CTR loop because the
- // address for that value will be computed in the loop.
- SmallVector<BasicBlock *, 4> ExitBlocks;
- L->getExitBlocks(ExitBlocks);
- for (auto &BB : ExitBlocks) {
- for (auto &PHI : BB->phis()) {
- for (int Idx = 0, EndIdx = PHI.getNumIncomingValues(); Idx < EndIdx;
- Idx++) {
- const BasicBlock *IncomingBB = PHI.getIncomingBlock(Idx);
- const Value *IncomingValue = PHI.getIncomingValue(Idx);
- if (L->contains(IncomingBB) &&
- memAddrUsesCTR(IncomingValue, TM, Visited))
- return false;
- }
- }
- }
-
+ // If an exit block has a PHI that accesses a TLS variable as one of the
+ // incoming values from the loop, we cannot produce a CTR loop because the
+ // address for that value will be computed in the loop.
+ SmallVector<BasicBlock *, 4> ExitBlocks;
+ L->getExitBlocks(ExitBlocks);
+ for (auto &BB : ExitBlocks) {
+ for (auto &PHI : BB->phis()) {
+ for (int Idx = 0, EndIdx = PHI.getNumIncomingValues(); Idx < EndIdx;
+ Idx++) {
+ const BasicBlock *IncomingBB = PHI.getIncomingBlock(Idx);
+ const Value *IncomingValue = PHI.getIncomingValue(Idx);
+ if (L->contains(IncomingBB) &&
+ memAddrUsesCTR(IncomingValue, TM, Visited))
+ return false;
+ }
+ }
+ }
+
LLVMContext &C = L->getHeader()->getContext();
HWLoopInfo.CountType = TM.isPPC64() ?
Type::getInt64Ty(C) : Type::getInt32Ty(C);
@@ -813,7 +813,7 @@ bool PPCTTIImpl::useColdCCForColdCall(Function &F) {
}
bool PPCTTIImpl::enableAggressiveInterleaving(bool LoopHasReductions) {
- // On the A2, always unroll aggressively.
+ // On the A2, always unroll aggressively.
if (ST->getCPUDirective() == PPC::DIR_A2)
return true;
@@ -989,7 +989,7 @@ int PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
// Legalize the type.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
- // PPC, for both Altivec/VSX, support cheap arbitrary permutations
+ // PPC, for both Altivec/VSX, support cheap arbitrary permutations
// (at least in the sense that there need only be one non-loop-invariant
// instruction). We need one such shuffle instruction for each actual
// register (this is not true for arbitrary shuffles, but is true for the
@@ -1006,12 +1006,12 @@ int PPCTTIImpl::getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) {
}
int PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
- TTI::CastContextHint CCH,
+ TTI::CastContextHint CCH,
TTI::TargetCostKind CostKind,
const Instruction *I) {
assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
- int Cost = BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
+ int Cost = BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
Cost = vectorCostAdjustment(Cost, Opcode, Dst, Src);
// TODO: Allow non-throughput costs that aren't binary.
if (CostKind != TTI::TCK_RecipThroughput)
@@ -1020,11 +1020,11 @@ int PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
}
int PPCTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
- CmpInst::Predicate VecPred,
+ CmpInst::Predicate VecPred,
TTI::TargetCostKind CostKind,
const Instruction *I) {
- int Cost =
- BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
+ int Cost =
+ BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
// TODO: Handle other cost kinds.
if (CostKind != TTI::TCK_RecipThroughput)
return Cost;
@@ -1071,7 +1071,7 @@ int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
// The cost of the load constant for a vector extract is disregarded
// (invariant, easily schedulable).
return vectorCostAdjustment(1, Opcode, Val, nullptr);
-
+
} else if (ST->hasDirectMove())
// Assume permute has standard cost.
// Assume move-to/move-from VSR have 2x standard cost.
@@ -1144,7 +1144,7 @@ int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
// for Altivec types using the VSX instructions, but that's more expensive
// than using the permutation-based load sequence. On the P8, that's no
// longer true.
- if (Opcode == Instruction::Load && (!ST->hasP8Vector() && IsAltivecType) &&
+ if (Opcode == Instruction::Load && (!ST->hasP8Vector() && IsAltivecType) &&
*Alignment >= LT.second.getScalarType().getStoreSize())
return Cost + LT.first; // Add the cost of the permutations.
@@ -1197,7 +1197,7 @@ int PPCTTIImpl::getInterleavedMemoryOpCost(
getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment), AddressSpace,
CostKind);
- // PPC, for both Altivec/VSX, support cheap arbitrary permutations
+ // PPC, for both Altivec/VSX, support cheap arbitrary permutations
// (at least in the sense that there need only be one non-loop-invariant
// instruction). For each result vector, we need one shuffle per incoming
// vector (except that the first shuffle can take two incoming vectors
@@ -1212,27 +1212,27 @@ unsigned PPCTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
return BaseT::getIntrinsicInstrCost(ICA, CostKind);
}
-bool PPCTTIImpl::areFunctionArgsABICompatible(
- const Function *Caller, const Function *Callee,
- SmallPtrSetImpl<Argument *> &Args) const {
-
- // We need to ensure that argument promotion does not
- // attempt to promote pointers to MMA types (__vector_pair
- // and __vector_quad) since these types explicitly cannot be
- // passed as arguments. Both of these types are larger than
- // the 128-bit Altivec vectors and have a scalar size of 1 bit.
- if (!BaseT::areFunctionArgsABICompatible(Caller, Callee, Args))
- return false;
-
- return llvm::none_of(Args, [](Argument *A) {
- auto *EltTy = cast<PointerType>(A->getType())->getElementType();
- if (EltTy->isSized())
- return (EltTy->isIntOrIntVectorTy(1) &&
- EltTy->getPrimitiveSizeInBits() > 128);
- return false;
- });
-}
-
+bool PPCTTIImpl::areFunctionArgsABICompatible(
+ const Function *Caller, const Function *Callee,
+ SmallPtrSetImpl<Argument *> &Args) const {
+
+ // We need to ensure that argument promotion does not
+ // attempt to promote pointers to MMA types (__vector_pair
+ // and __vector_quad) since these types explicitly cannot be
+ // passed as arguments. Both of these types are larger than
+ // the 128-bit Altivec vectors and have a scalar size of 1 bit.
+ if (!BaseT::areFunctionArgsABICompatible(Caller, Callee, Args))
+ return false;
+
+ return llvm::none_of(Args, [](Argument *A) {
+ auto *EltTy = cast<PointerType>(A->getType())->getElementType();
+ if (EltTy->isSized())
+ return (EltTy->isIntOrIntVectorTy(1) &&
+ EltTy->getPrimitiveSizeInBits() > 128);
+ return false;
+ });
+}
+
bool PPCTTIImpl::canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
LoopInfo *LI, DominatorTree *DT,
AssumptionCache *AC, TargetLibraryInfo *LibInfo) {
@@ -1268,51 +1268,51 @@ bool PPCTTIImpl::isLSRCostLess(TargetTransformInfo::LSRCost &C1,
else
return TargetTransformInfoImplBase::isLSRCostLess(C1, C2);
}
-
-bool PPCTTIImpl::isNumRegsMajorCostOfLSR() {
- return false;
-}
-
-bool PPCTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
- MemIntrinsicInfo &Info) {
- switch (Inst->getIntrinsicID()) {
- case Intrinsic::ppc_altivec_lvx:
- case Intrinsic::ppc_altivec_lvxl:
- case Intrinsic::ppc_altivec_lvebx:
- case Intrinsic::ppc_altivec_lvehx:
- case Intrinsic::ppc_altivec_lvewx:
- case Intrinsic::ppc_vsx_lxvd2x:
- case Intrinsic::ppc_vsx_lxvw4x:
- case Intrinsic::ppc_vsx_lxvd2x_be:
- case Intrinsic::ppc_vsx_lxvw4x_be:
- case Intrinsic::ppc_vsx_lxvl:
- case Intrinsic::ppc_vsx_lxvll:
- case Intrinsic::ppc_vsx_lxvp: {
- Info.PtrVal = Inst->getArgOperand(0);
- Info.ReadMem = true;
- Info.WriteMem = false;
- return true;
- }
- case Intrinsic::ppc_altivec_stvx:
- case Intrinsic::ppc_altivec_stvxl:
- case Intrinsic::ppc_altivec_stvebx:
- case Intrinsic::ppc_altivec_stvehx:
- case Intrinsic::ppc_altivec_stvewx:
- case Intrinsic::ppc_vsx_stxvd2x:
- case Intrinsic::ppc_vsx_stxvw4x:
- case Intrinsic::ppc_vsx_stxvd2x_be:
- case Intrinsic::ppc_vsx_stxvw4x_be:
- case Intrinsic::ppc_vsx_stxvl:
- case Intrinsic::ppc_vsx_stxvll:
- case Intrinsic::ppc_vsx_stxvp: {
- Info.PtrVal = Inst->getArgOperand(1);
- Info.ReadMem = false;
- Info.WriteMem = true;
- return true;
- }
- default:
- break;
- }
-
- return false;
-}
+
+bool PPCTTIImpl::isNumRegsMajorCostOfLSR() {
+ return false;
+}
+
+bool PPCTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
+ MemIntrinsicInfo &Info) {
+ switch (Inst->getIntrinsicID()) {
+ case Intrinsic::ppc_altivec_lvx:
+ case Intrinsic::ppc_altivec_lvxl:
+ case Intrinsic::ppc_altivec_lvebx:
+ case Intrinsic::ppc_altivec_lvehx:
+ case Intrinsic::ppc_altivec_lvewx:
+ case Intrinsic::ppc_vsx_lxvd2x:
+ case Intrinsic::ppc_vsx_lxvw4x:
+ case Intrinsic::ppc_vsx_lxvd2x_be:
+ case Intrinsic::ppc_vsx_lxvw4x_be:
+ case Intrinsic::ppc_vsx_lxvl:
+ case Intrinsic::ppc_vsx_lxvll:
+ case Intrinsic::ppc_vsx_lxvp: {
+ Info.PtrVal = Inst->getArgOperand(0);
+ Info.ReadMem = true;
+ Info.WriteMem = false;
+ return true;
+ }
+ case Intrinsic::ppc_altivec_stvx:
+ case Intrinsic::ppc_altivec_stvxl:
+ case Intrinsic::ppc_altivec_stvebx:
+ case Intrinsic::ppc_altivec_stvehx:
+ case Intrinsic::ppc_altivec_stvewx:
+ case Intrinsic::ppc_vsx_stxvd2x:
+ case Intrinsic::ppc_vsx_stxvw4x:
+ case Intrinsic::ppc_vsx_stxvd2x_be:
+ case Intrinsic::ppc_vsx_stxvw4x_be:
+ case Intrinsic::ppc_vsx_stxvl:
+ case Intrinsic::ppc_vsx_stxvll:
+ case Intrinsic::ppc_vsx_stxvp: {
+ Info.PtrVal = Inst->getArgOperand(1);
+ Info.ReadMem = false;
+ Info.WriteMem = true;
+ return true;
+ }
+ default:
+ break;
+ }
+
+ return false;
+}