diff options
author | shadchin <shadchin@yandex-team.ru> | 2022-02-10 16:44:39 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:44:39 +0300 |
commit | e9656aae26e0358d5378e5b63dcac5c8dbe0e4d0 (patch) | |
tree | 64175d5cadab313b3e7039ebaa06c5bc3295e274 /contrib/libs/llvm12/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp | |
parent | 2598ef1d0aee359b4b6d5fdd1758916d5907d04f (diff) | |
download | ydb-e9656aae26e0358d5378e5b63dcac5c8dbe0e4d0.tar.gz |
Restoring authorship annotation for <shadchin@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/libs/llvm12/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp')
-rw-r--r-- | contrib/libs/llvm12/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp | 514 |
1 files changed, 257 insertions, 257 deletions
diff --git a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp index 28662d9314..d4b2ae3840 100644 --- a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp +++ b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp @@ -111,263 +111,263 @@ bool NVPTXTTIImpl::isSourceOfDivergence(const Value *V) { return false; } -// Convert NVVM intrinsics to target-generic LLVM code where possible. -static Instruction *simplifyNvvmIntrinsic(IntrinsicInst *II, InstCombiner &IC) { - // Each NVVM intrinsic we can simplify can be replaced with one of: - // - // * an LLVM intrinsic, - // * an LLVM cast operation, - // * an LLVM binary operation, or - // * ad-hoc LLVM IR for the particular operation. - - // Some transformations are only valid when the module's - // flush-denormals-to-zero (ftz) setting is true/false, whereas other - // transformations are valid regardless of the module's ftz setting. - enum FtzRequirementTy { - FTZ_Any, // Any ftz setting is ok. - FTZ_MustBeOn, // Transformation is valid only if ftz is on. - FTZ_MustBeOff, // Transformation is valid only if ftz is off. - }; - // Classes of NVVM intrinsics that can't be replaced one-to-one with a - // target-generic intrinsic, cast op, or binary op but that we can nonetheless - // simplify. - enum SpecialCase { - SPC_Reciprocal, - }; - - // SimplifyAction is a poor-man's variant (plus an additional flag) that - // represents how to replace an NVVM intrinsic with target-generic LLVM IR. - struct SimplifyAction { - // Invariant: At most one of these Optionals has a value. - Optional<Intrinsic::ID> IID; - Optional<Instruction::CastOps> CastOp; - Optional<Instruction::BinaryOps> BinaryOp; - Optional<SpecialCase> Special; - - FtzRequirementTy FtzRequirement = FTZ_Any; - - SimplifyAction() = default; - - SimplifyAction(Intrinsic::ID IID, FtzRequirementTy FtzReq) - : IID(IID), FtzRequirement(FtzReq) {} - - // Cast operations don't have anything to do with FTZ, so we skip that - // argument. - SimplifyAction(Instruction::CastOps CastOp) : CastOp(CastOp) {} - - SimplifyAction(Instruction::BinaryOps BinaryOp, FtzRequirementTy FtzReq) - : BinaryOp(BinaryOp), FtzRequirement(FtzReq) {} - - SimplifyAction(SpecialCase Special, FtzRequirementTy FtzReq) - : Special(Special), FtzRequirement(FtzReq) {} - }; - - // Try to generate a SimplifyAction describing how to replace our - // IntrinsicInstr with target-generic LLVM IR. - const SimplifyAction Action = [II]() -> SimplifyAction { - switch (II->getIntrinsicID()) { - // NVVM intrinsics that map directly to LLVM intrinsics. - case Intrinsic::nvvm_ceil_d: - return {Intrinsic::ceil, FTZ_Any}; - case Intrinsic::nvvm_ceil_f: - return {Intrinsic::ceil, FTZ_MustBeOff}; - case Intrinsic::nvvm_ceil_ftz_f: - return {Intrinsic::ceil, FTZ_MustBeOn}; - case Intrinsic::nvvm_fabs_d: - return {Intrinsic::fabs, FTZ_Any}; - case Intrinsic::nvvm_fabs_f: - return {Intrinsic::fabs, FTZ_MustBeOff}; - case Intrinsic::nvvm_fabs_ftz_f: - return {Intrinsic::fabs, FTZ_MustBeOn}; - case Intrinsic::nvvm_floor_d: - return {Intrinsic::floor, FTZ_Any}; - case Intrinsic::nvvm_floor_f: - return {Intrinsic::floor, FTZ_MustBeOff}; - case Intrinsic::nvvm_floor_ftz_f: - return {Intrinsic::floor, FTZ_MustBeOn}; - case Intrinsic::nvvm_fma_rn_d: - return {Intrinsic::fma, FTZ_Any}; - case Intrinsic::nvvm_fma_rn_f: - return {Intrinsic::fma, FTZ_MustBeOff}; - case Intrinsic::nvvm_fma_rn_ftz_f: - return {Intrinsic::fma, FTZ_MustBeOn}; - case Intrinsic::nvvm_fmax_d: - return {Intrinsic::maxnum, FTZ_Any}; - case Intrinsic::nvvm_fmax_f: - return {Intrinsic::maxnum, FTZ_MustBeOff}; - case Intrinsic::nvvm_fmax_ftz_f: - return {Intrinsic::maxnum, FTZ_MustBeOn}; - case Intrinsic::nvvm_fmin_d: - return {Intrinsic::minnum, FTZ_Any}; - case Intrinsic::nvvm_fmin_f: - return {Intrinsic::minnum, FTZ_MustBeOff}; - case Intrinsic::nvvm_fmin_ftz_f: - return {Intrinsic::minnum, FTZ_MustBeOn}; - case Intrinsic::nvvm_round_d: - return {Intrinsic::round, FTZ_Any}; - case Intrinsic::nvvm_round_f: - return {Intrinsic::round, FTZ_MustBeOff}; - case Intrinsic::nvvm_round_ftz_f: - return {Intrinsic::round, FTZ_MustBeOn}; - case Intrinsic::nvvm_sqrt_rn_d: - return {Intrinsic::sqrt, FTZ_Any}; - case Intrinsic::nvvm_sqrt_f: - // nvvm_sqrt_f is a special case. For most intrinsics, foo_ftz_f is the - // ftz version, and foo_f is the non-ftz version. But nvvm_sqrt_f adopts - // the ftz-ness of the surrounding code. sqrt_rn_f and sqrt_rn_ftz_f are - // the versions with explicit ftz-ness. - return {Intrinsic::sqrt, FTZ_Any}; - case Intrinsic::nvvm_sqrt_rn_f: - return {Intrinsic::sqrt, FTZ_MustBeOff}; - case Intrinsic::nvvm_sqrt_rn_ftz_f: - return {Intrinsic::sqrt, FTZ_MustBeOn}; - case Intrinsic::nvvm_trunc_d: - return {Intrinsic::trunc, FTZ_Any}; - case Intrinsic::nvvm_trunc_f: - return {Intrinsic::trunc, FTZ_MustBeOff}; - case Intrinsic::nvvm_trunc_ftz_f: - return {Intrinsic::trunc, FTZ_MustBeOn}; - - // NVVM intrinsics that map to LLVM cast operations. - // - // Note that llvm's target-generic conversion operators correspond to the rz - // (round to zero) versions of the nvvm conversion intrinsics, even though - // most everything else here uses the rn (round to nearest even) nvvm ops. - case Intrinsic::nvvm_d2i_rz: - case Intrinsic::nvvm_f2i_rz: - case Intrinsic::nvvm_d2ll_rz: - case Intrinsic::nvvm_f2ll_rz: - return {Instruction::FPToSI}; - case Intrinsic::nvvm_d2ui_rz: - case Intrinsic::nvvm_f2ui_rz: - case Intrinsic::nvvm_d2ull_rz: - case Intrinsic::nvvm_f2ull_rz: - return {Instruction::FPToUI}; - case Intrinsic::nvvm_i2d_rz: - case Intrinsic::nvvm_i2f_rz: - case Intrinsic::nvvm_ll2d_rz: - case Intrinsic::nvvm_ll2f_rz: - return {Instruction::SIToFP}; - case Intrinsic::nvvm_ui2d_rz: - case Intrinsic::nvvm_ui2f_rz: - case Intrinsic::nvvm_ull2d_rz: - case Intrinsic::nvvm_ull2f_rz: - return {Instruction::UIToFP}; - - // NVVM intrinsics that map to LLVM binary ops. - case Intrinsic::nvvm_add_rn_d: - return {Instruction::FAdd, FTZ_Any}; - case Intrinsic::nvvm_add_rn_f: - return {Instruction::FAdd, FTZ_MustBeOff}; - case Intrinsic::nvvm_add_rn_ftz_f: - return {Instruction::FAdd, FTZ_MustBeOn}; - case Intrinsic::nvvm_mul_rn_d: - return {Instruction::FMul, FTZ_Any}; - case Intrinsic::nvvm_mul_rn_f: - return {Instruction::FMul, FTZ_MustBeOff}; - case Intrinsic::nvvm_mul_rn_ftz_f: - return {Instruction::FMul, FTZ_MustBeOn}; - case Intrinsic::nvvm_div_rn_d: - return {Instruction::FDiv, FTZ_Any}; - case Intrinsic::nvvm_div_rn_f: - return {Instruction::FDiv, FTZ_MustBeOff}; - case Intrinsic::nvvm_div_rn_ftz_f: - return {Instruction::FDiv, FTZ_MustBeOn}; - - // The remainder of cases are NVVM intrinsics that map to LLVM idioms, but - // need special handling. - // - // We seem to be missing intrinsics for rcp.approx.{ftz.}f32, which is just - // as well. - case Intrinsic::nvvm_rcp_rn_d: - return {SPC_Reciprocal, FTZ_Any}; - case Intrinsic::nvvm_rcp_rn_f: - return {SPC_Reciprocal, FTZ_MustBeOff}; - case Intrinsic::nvvm_rcp_rn_ftz_f: - return {SPC_Reciprocal, FTZ_MustBeOn}; - - // We do not currently simplify intrinsics that give an approximate - // answer. These include: - // - // - nvvm_cos_approx_{f,ftz_f} - // - nvvm_ex2_approx_{d,f,ftz_f} - // - nvvm_lg2_approx_{d,f,ftz_f} - // - nvvm_sin_approx_{f,ftz_f} - // - nvvm_sqrt_approx_{f,ftz_f} - // - nvvm_rsqrt_approx_{d,f,ftz_f} - // - nvvm_div_approx_{ftz_d,ftz_f,f} - // - nvvm_rcp_approx_ftz_d - // - // Ideally we'd encode them as e.g. "fast call @llvm.cos", where "fast" - // means that fastmath is enabled in the intrinsic. Unfortunately only - // binary operators (currently) have a fastmath bit in SelectionDAG, so - // this information gets lost and we can't select on it. - // - // TODO: div and rcp are lowered to a binary op, so these we could in - // theory lower them to "fast fdiv". - - default: - return {}; - } - }(); - - // If Action.FtzRequirementTy is not satisfied by the module's ftz state, we - // can bail out now. (Notice that in the case that IID is not an NVVM - // intrinsic, we don't have to look up any module metadata, as - // FtzRequirementTy will be FTZ_Any.) - if (Action.FtzRequirement != FTZ_Any) { - StringRef Attr = II->getFunction() - ->getFnAttribute("denormal-fp-math-f32") - .getValueAsString(); - DenormalMode Mode = parseDenormalFPAttribute(Attr); - bool FtzEnabled = Mode.Output != DenormalMode::IEEE; - - if (FtzEnabled != (Action.FtzRequirement == FTZ_MustBeOn)) - return nullptr; - } - - // Simplify to target-generic intrinsic. - if (Action.IID) { - SmallVector<Value *, 4> Args(II->arg_operands()); - // All the target-generic intrinsics currently of interest to us have one - // type argument, equal to that of the nvvm intrinsic's argument. - Type *Tys[] = {II->getArgOperand(0)->getType()}; - return CallInst::Create( - Intrinsic::getDeclaration(II->getModule(), *Action.IID, Tys), Args); - } - - // Simplify to target-generic binary op. - if (Action.BinaryOp) - return BinaryOperator::Create(*Action.BinaryOp, II->getArgOperand(0), - II->getArgOperand(1), II->getName()); - - // Simplify to target-generic cast op. - if (Action.CastOp) - return CastInst::Create(*Action.CastOp, II->getArgOperand(0), II->getType(), - II->getName()); - - // All that's left are the special cases. - if (!Action.Special) - return nullptr; - - switch (*Action.Special) { - case SPC_Reciprocal: - // Simplify reciprocal. - return BinaryOperator::Create( - Instruction::FDiv, ConstantFP::get(II->getArgOperand(0)->getType(), 1), - II->getArgOperand(0), II->getName()); - } - llvm_unreachable("All SpecialCase enumerators should be handled in switch."); -} - -Optional<Instruction *> -NVPTXTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { - if (Instruction *I = simplifyNvvmIntrinsic(&II, IC)) { - return I; - } - return None; -} - +// Convert NVVM intrinsics to target-generic LLVM code where possible. +static Instruction *simplifyNvvmIntrinsic(IntrinsicInst *II, InstCombiner &IC) { + // Each NVVM intrinsic we can simplify can be replaced with one of: + // + // * an LLVM intrinsic, + // * an LLVM cast operation, + // * an LLVM binary operation, or + // * ad-hoc LLVM IR for the particular operation. + + // Some transformations are only valid when the module's + // flush-denormals-to-zero (ftz) setting is true/false, whereas other + // transformations are valid regardless of the module's ftz setting. + enum FtzRequirementTy { + FTZ_Any, // Any ftz setting is ok. + FTZ_MustBeOn, // Transformation is valid only if ftz is on. + FTZ_MustBeOff, // Transformation is valid only if ftz is off. + }; + // Classes of NVVM intrinsics that can't be replaced one-to-one with a + // target-generic intrinsic, cast op, or binary op but that we can nonetheless + // simplify. + enum SpecialCase { + SPC_Reciprocal, + }; + + // SimplifyAction is a poor-man's variant (plus an additional flag) that + // represents how to replace an NVVM intrinsic with target-generic LLVM IR. + struct SimplifyAction { + // Invariant: At most one of these Optionals has a value. + Optional<Intrinsic::ID> IID; + Optional<Instruction::CastOps> CastOp; + Optional<Instruction::BinaryOps> BinaryOp; + Optional<SpecialCase> Special; + + FtzRequirementTy FtzRequirement = FTZ_Any; + + SimplifyAction() = default; + + SimplifyAction(Intrinsic::ID IID, FtzRequirementTy FtzReq) + : IID(IID), FtzRequirement(FtzReq) {} + + // Cast operations don't have anything to do with FTZ, so we skip that + // argument. + SimplifyAction(Instruction::CastOps CastOp) : CastOp(CastOp) {} + + SimplifyAction(Instruction::BinaryOps BinaryOp, FtzRequirementTy FtzReq) + : BinaryOp(BinaryOp), FtzRequirement(FtzReq) {} + + SimplifyAction(SpecialCase Special, FtzRequirementTy FtzReq) + : Special(Special), FtzRequirement(FtzReq) {} + }; + + // Try to generate a SimplifyAction describing how to replace our + // IntrinsicInstr with target-generic LLVM IR. + const SimplifyAction Action = [II]() -> SimplifyAction { + switch (II->getIntrinsicID()) { + // NVVM intrinsics that map directly to LLVM intrinsics. + case Intrinsic::nvvm_ceil_d: + return {Intrinsic::ceil, FTZ_Any}; + case Intrinsic::nvvm_ceil_f: + return {Intrinsic::ceil, FTZ_MustBeOff}; + case Intrinsic::nvvm_ceil_ftz_f: + return {Intrinsic::ceil, FTZ_MustBeOn}; + case Intrinsic::nvvm_fabs_d: + return {Intrinsic::fabs, FTZ_Any}; + case Intrinsic::nvvm_fabs_f: + return {Intrinsic::fabs, FTZ_MustBeOff}; + case Intrinsic::nvvm_fabs_ftz_f: + return {Intrinsic::fabs, FTZ_MustBeOn}; + case Intrinsic::nvvm_floor_d: + return {Intrinsic::floor, FTZ_Any}; + case Intrinsic::nvvm_floor_f: + return {Intrinsic::floor, FTZ_MustBeOff}; + case Intrinsic::nvvm_floor_ftz_f: + return {Intrinsic::floor, FTZ_MustBeOn}; + case Intrinsic::nvvm_fma_rn_d: + return {Intrinsic::fma, FTZ_Any}; + case Intrinsic::nvvm_fma_rn_f: + return {Intrinsic::fma, FTZ_MustBeOff}; + case Intrinsic::nvvm_fma_rn_ftz_f: + return {Intrinsic::fma, FTZ_MustBeOn}; + case Intrinsic::nvvm_fmax_d: + return {Intrinsic::maxnum, FTZ_Any}; + case Intrinsic::nvvm_fmax_f: + return {Intrinsic::maxnum, FTZ_MustBeOff}; + case Intrinsic::nvvm_fmax_ftz_f: + return {Intrinsic::maxnum, FTZ_MustBeOn}; + case Intrinsic::nvvm_fmin_d: + return {Intrinsic::minnum, FTZ_Any}; + case Intrinsic::nvvm_fmin_f: + return {Intrinsic::minnum, FTZ_MustBeOff}; + case Intrinsic::nvvm_fmin_ftz_f: + return {Intrinsic::minnum, FTZ_MustBeOn}; + case Intrinsic::nvvm_round_d: + return {Intrinsic::round, FTZ_Any}; + case Intrinsic::nvvm_round_f: + return {Intrinsic::round, FTZ_MustBeOff}; + case Intrinsic::nvvm_round_ftz_f: + return {Intrinsic::round, FTZ_MustBeOn}; + case Intrinsic::nvvm_sqrt_rn_d: + return {Intrinsic::sqrt, FTZ_Any}; + case Intrinsic::nvvm_sqrt_f: + // nvvm_sqrt_f is a special case. For most intrinsics, foo_ftz_f is the + // ftz version, and foo_f is the non-ftz version. But nvvm_sqrt_f adopts + // the ftz-ness of the surrounding code. sqrt_rn_f and sqrt_rn_ftz_f are + // the versions with explicit ftz-ness. + return {Intrinsic::sqrt, FTZ_Any}; + case Intrinsic::nvvm_sqrt_rn_f: + return {Intrinsic::sqrt, FTZ_MustBeOff}; + case Intrinsic::nvvm_sqrt_rn_ftz_f: + return {Intrinsic::sqrt, FTZ_MustBeOn}; + case Intrinsic::nvvm_trunc_d: + return {Intrinsic::trunc, FTZ_Any}; + case Intrinsic::nvvm_trunc_f: + return {Intrinsic::trunc, FTZ_MustBeOff}; + case Intrinsic::nvvm_trunc_ftz_f: + return {Intrinsic::trunc, FTZ_MustBeOn}; + + // NVVM intrinsics that map to LLVM cast operations. + // + // Note that llvm's target-generic conversion operators correspond to the rz + // (round to zero) versions of the nvvm conversion intrinsics, even though + // most everything else here uses the rn (round to nearest even) nvvm ops. + case Intrinsic::nvvm_d2i_rz: + case Intrinsic::nvvm_f2i_rz: + case Intrinsic::nvvm_d2ll_rz: + case Intrinsic::nvvm_f2ll_rz: + return {Instruction::FPToSI}; + case Intrinsic::nvvm_d2ui_rz: + case Intrinsic::nvvm_f2ui_rz: + case Intrinsic::nvvm_d2ull_rz: + case Intrinsic::nvvm_f2ull_rz: + return {Instruction::FPToUI}; + case Intrinsic::nvvm_i2d_rz: + case Intrinsic::nvvm_i2f_rz: + case Intrinsic::nvvm_ll2d_rz: + case Intrinsic::nvvm_ll2f_rz: + return {Instruction::SIToFP}; + case Intrinsic::nvvm_ui2d_rz: + case Intrinsic::nvvm_ui2f_rz: + case Intrinsic::nvvm_ull2d_rz: + case Intrinsic::nvvm_ull2f_rz: + return {Instruction::UIToFP}; + + // NVVM intrinsics that map to LLVM binary ops. + case Intrinsic::nvvm_add_rn_d: + return {Instruction::FAdd, FTZ_Any}; + case Intrinsic::nvvm_add_rn_f: + return {Instruction::FAdd, FTZ_MustBeOff}; + case Intrinsic::nvvm_add_rn_ftz_f: + return {Instruction::FAdd, FTZ_MustBeOn}; + case Intrinsic::nvvm_mul_rn_d: + return {Instruction::FMul, FTZ_Any}; + case Intrinsic::nvvm_mul_rn_f: + return {Instruction::FMul, FTZ_MustBeOff}; + case Intrinsic::nvvm_mul_rn_ftz_f: + return {Instruction::FMul, FTZ_MustBeOn}; + case Intrinsic::nvvm_div_rn_d: + return {Instruction::FDiv, FTZ_Any}; + case Intrinsic::nvvm_div_rn_f: + return {Instruction::FDiv, FTZ_MustBeOff}; + case Intrinsic::nvvm_div_rn_ftz_f: + return {Instruction::FDiv, FTZ_MustBeOn}; + + // The remainder of cases are NVVM intrinsics that map to LLVM idioms, but + // need special handling. + // + // We seem to be missing intrinsics for rcp.approx.{ftz.}f32, which is just + // as well. + case Intrinsic::nvvm_rcp_rn_d: + return {SPC_Reciprocal, FTZ_Any}; + case Intrinsic::nvvm_rcp_rn_f: + return {SPC_Reciprocal, FTZ_MustBeOff}; + case Intrinsic::nvvm_rcp_rn_ftz_f: + return {SPC_Reciprocal, FTZ_MustBeOn}; + + // We do not currently simplify intrinsics that give an approximate + // answer. These include: + // + // - nvvm_cos_approx_{f,ftz_f} + // - nvvm_ex2_approx_{d,f,ftz_f} + // - nvvm_lg2_approx_{d,f,ftz_f} + // - nvvm_sin_approx_{f,ftz_f} + // - nvvm_sqrt_approx_{f,ftz_f} + // - nvvm_rsqrt_approx_{d,f,ftz_f} + // - nvvm_div_approx_{ftz_d,ftz_f,f} + // - nvvm_rcp_approx_ftz_d + // + // Ideally we'd encode them as e.g. "fast call @llvm.cos", where "fast" + // means that fastmath is enabled in the intrinsic. Unfortunately only + // binary operators (currently) have a fastmath bit in SelectionDAG, so + // this information gets lost and we can't select on it. + // + // TODO: div and rcp are lowered to a binary op, so these we could in + // theory lower them to "fast fdiv". + + default: + return {}; + } + }(); + + // If Action.FtzRequirementTy is not satisfied by the module's ftz state, we + // can bail out now. (Notice that in the case that IID is not an NVVM + // intrinsic, we don't have to look up any module metadata, as + // FtzRequirementTy will be FTZ_Any.) + if (Action.FtzRequirement != FTZ_Any) { + StringRef Attr = II->getFunction() + ->getFnAttribute("denormal-fp-math-f32") + .getValueAsString(); + DenormalMode Mode = parseDenormalFPAttribute(Attr); + bool FtzEnabled = Mode.Output != DenormalMode::IEEE; + + if (FtzEnabled != (Action.FtzRequirement == FTZ_MustBeOn)) + return nullptr; + } + + // Simplify to target-generic intrinsic. + if (Action.IID) { + SmallVector<Value *, 4> Args(II->arg_operands()); + // All the target-generic intrinsics currently of interest to us have one + // type argument, equal to that of the nvvm intrinsic's argument. + Type *Tys[] = {II->getArgOperand(0)->getType()}; + return CallInst::Create( + Intrinsic::getDeclaration(II->getModule(), *Action.IID, Tys), Args); + } + + // Simplify to target-generic binary op. + if (Action.BinaryOp) + return BinaryOperator::Create(*Action.BinaryOp, II->getArgOperand(0), + II->getArgOperand(1), II->getName()); + + // Simplify to target-generic cast op. + if (Action.CastOp) + return CastInst::Create(*Action.CastOp, II->getArgOperand(0), II->getType(), + II->getName()); + + // All that's left are the special cases. + if (!Action.Special) + return nullptr; + + switch (*Action.Special) { + case SPC_Reciprocal: + // Simplify reciprocal. + return BinaryOperator::Create( + Instruction::FDiv, ConstantFP::get(II->getArgOperand(0)->getType(), 1), + II->getArgOperand(0), II->getName()); + } + llvm_unreachable("All SpecialCase enumerators should be handled in switch."); +} + +Optional<Instruction *> +NVPTXTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { + if (Instruction *I = simplifyNvvmIntrinsic(&II, IC)) { + return I; + } + return None; +} + int NVPTXTTIImpl::getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueKind Opd1Info, |