diff options
author | shadchin <shadchin@yandex-team.ru> | 2022-02-10 16:44:30 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:44:30 +0300 |
commit | 2598ef1d0aee359b4b6d5fdd1758916d5907d04f (patch) | |
tree | 012bb94d777798f1f56ac1cec429509766d05181 /contrib/libs/llvm12/lib/Target/NVPTX | |
parent | 6751af0b0c1b952fede40b19b71da8025b5d8bcf (diff) | |
download | ydb-2598ef1d0aee359b4b6d5fdd1758916d5907d04f.tar.gz |
Restoring authorship annotation for <shadchin@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/llvm12/lib/Target/NVPTX')
27 files changed, 501 insertions, 501 deletions
diff --git a/contrib/libs/llvm12/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h b/contrib/libs/llvm12/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h index 503f0497b6..8a15343ea9 100644 --- a/contrib/libs/llvm12/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h +++ b/contrib/libs/llvm12/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h @@ -29,7 +29,7 @@ public: const MCSubtargetInfo &STI, raw_ostream &OS) override; // Autogenerated by tblgen. - std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override; + std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override; void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O); static const char *getRegisterName(unsigned RegNo); // End diff --git a/contrib/libs/llvm12/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp b/contrib/libs/llvm12/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp index f275011018..d43bbf6a84 100644 --- a/contrib/libs/llvm12/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp +++ b/contrib/libs/llvm12/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp @@ -47,7 +47,7 @@ NVPTXMCAsmInfo::NVPTXMCAsmInfo(const Triple &TheTriple, AscizDirective = nullptr; // not supported SupportsQuotedNames = false; SupportsExtendedDwarfLocDirective = false; - SupportsSignedData = false; + SupportsSignedData = false; // @TODO: Can we just disable this? WeakDirective = "\t// .weak\t"; diff --git a/contrib/libs/llvm12/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp b/contrib/libs/llvm12/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp index d69166feb0..648b3ff0bf 100644 --- a/contrib/libs/llvm12/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp +++ b/contrib/libs/llvm12/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp @@ -46,7 +46,7 @@ static MCRegisterInfo *createNVPTXMCRegisterInfo(const Triple &TT) { static MCSubtargetInfo * createNVPTXMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) { - return createNVPTXMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS); + return createNVPTXMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS); } static MCInstPrinter *createNVPTXMCInstPrinter(const Triple &T, diff --git a/contrib/libs/llvm12/lib/Target/NVPTX/MCTargetDesc/ya.make b/contrib/libs/llvm12/lib/Target/NVPTX/MCTargetDesc/ya.make index 81ad30663e..fadad8ac79 100644 --- a/contrib/libs/llvm12/lib/Target/NVPTX/MCTargetDesc/ya.make +++ b/contrib/libs/llvm12/lib/Target/NVPTX/MCTargetDesc/ya.make @@ -12,17 +12,17 @@ LICENSE(Apache-2.0 WITH LLVM-exception) LICENSE_TEXTS(.yandex_meta/licenses.list.txt) PEERDIR( - contrib/libs/llvm12 - contrib/libs/llvm12/include - contrib/libs/llvm12/lib/MC - contrib/libs/llvm12/lib/Support - contrib/libs/llvm12/lib/Target/NVPTX/TargetInfo + contrib/libs/llvm12 + contrib/libs/llvm12/include + contrib/libs/llvm12/lib/MC + contrib/libs/llvm12/lib/Support + contrib/libs/llvm12/lib/Target/NVPTX/TargetInfo ) ADDINCL( - ${ARCADIA_BUILD_ROOT}/contrib/libs/llvm12/lib/Target/NVPTX - contrib/libs/llvm12/lib/Target/NVPTX - contrib/libs/llvm12/lib/Target/NVPTX/MCTargetDesc + ${ARCADIA_BUILD_ROOT}/contrib/libs/llvm12/lib/Target/NVPTX + contrib/libs/llvm12/lib/Target/NVPTX + contrib/libs/llvm12/lib/Target/NVPTX/MCTargetDesc ) NO_COMPILER_WARNINGS() diff --git a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTX.h b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTX.h index c2fd090da0..3a356942d5 100644 --- a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTX.h +++ b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTX.h @@ -14,7 +14,7 @@ #ifndef LLVM_LIB_TARGET_NVPTX_NVPTX_H #define LLVM_LIB_TARGET_NVPTX_NVPTX_H -#include "llvm/IR/PassManager.h" +#include "llvm/IR/PassManager.h" #include "llvm/Pass.h" #include "llvm/Support/CodeGen.h" @@ -48,24 +48,24 @@ FunctionPass *createNVPTXLowerAllocaPass(); MachineFunctionPass *createNVPTXPeephole(); MachineFunctionPass *createNVPTXProxyRegErasurePass(); -struct NVVMIntrRangePass : PassInfoMixin<NVVMIntrRangePass> { - NVVMIntrRangePass(); - NVVMIntrRangePass(unsigned SmVersion) : SmVersion(SmVersion) {} - PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); - -private: - unsigned SmVersion; -}; - -struct NVVMReflectPass : PassInfoMixin<NVVMReflectPass> { - NVVMReflectPass(); - NVVMReflectPass(unsigned SmVersion) : SmVersion(SmVersion) {} - PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); - -private: - unsigned SmVersion; -}; - +struct NVVMIntrRangePass : PassInfoMixin<NVVMIntrRangePass> { + NVVMIntrRangePass(); + NVVMIntrRangePass(unsigned SmVersion) : SmVersion(SmVersion) {} + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + +private: + unsigned SmVersion; +}; + +struct NVVMReflectPass : PassInfoMixin<NVVMReflectPass> { + NVVMReflectPass(); + NVVMReflectPass(unsigned SmVersion) : SmVersion(SmVersion) {} + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + +private: + unsigned SmVersion; +}; + namespace NVPTX { enum DrvInterface { NVCL, diff --git a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 38844ff4dd..e4d1256798 100644 --- a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -1302,8 +1302,8 @@ NVPTXAsmPrinter::getPTXFundamentalTypeStr(Type *Ty, bool useB4PTR) const { return "b32"; else return "u32"; - default: - break; + default: + break; } llvm_unreachable("unexpected type"); } diff --git a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXFrameLowering.cpp b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXFrameLowering.cpp index 024e51e5f4..a14afad671 100644 --- a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXFrameLowering.cpp +++ b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXFrameLowering.cpp @@ -63,13 +63,13 @@ void NVPTXFrameLowering::emitPrologue(MachineFunction &MF, } } -StackOffset -NVPTXFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, - Register &FrameReg) const { +StackOffset +NVPTXFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, + Register &FrameReg) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); FrameReg = NVPTX::VRDepot; - return StackOffset::getFixed(MFI.getObjectOffset(FI) - - getOffsetOfLocalArea()); + return StackOffset::getFixed(MFI.getObjectOffset(FI) - + getOffsetOfLocalArea()); } void NVPTXFrameLowering::emitEpilogue(MachineFunction &MF, diff --git a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXFrameLowering.h b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXFrameLowering.h index a5d49ac3ab..93f24fca40 100644 --- a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXFrameLowering.h +++ b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXFrameLowering.h @@ -14,7 +14,7 @@ #define LLVM_LIB_TARGET_NVPTX_NVPTXFRAMELOWERING_H #include "llvm/CodeGen/TargetFrameLowering.h" -#include "llvm/Support/TypeSize.h" +#include "llvm/Support/TypeSize.h" namespace llvm { @@ -25,8 +25,8 @@ public: bool hasFP(const MachineFunction &MF) const override; void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; - StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, - Register &FrameReg) const override; + StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, + Register &FrameReg) const override; MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, diff --git a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index 08f4ab87c6..c4df5dc36f 100644 --- a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -700,11 +700,11 @@ static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget, bool IsKernelFn = isKernelFunction(F->getFunction()); - // We use getUnderlyingObjects() here instead of getUnderlyingObject() mainly + // We use getUnderlyingObjects() here instead of getUnderlyingObject() mainly // because the former looks through phi nodes while the latter does not. We // need to look through phi nodes to handle pointer induction variables. SmallVector<const Value *, 8> Objs; - getUnderlyingObjects(N->getMemOperand()->getValue(), Objs); + getUnderlyingObjects(N->getMemOperand()->getValue(), Objs); return all_of(Objs, [&](const Value *V) { if (auto *A = dyn_cast<const Argument>(V)) @@ -2854,7 +2854,7 @@ bool NVPTXDAGToDAGISel::tryTextureIntrinsic(SDNode *N) { } // Copy over operands - SmallVector<SDValue, 8> Ops(drop_begin(N->ops())); + SmallVector<SDValue, 8> Ops(drop_begin(N->ops())); Ops.push_back(N->getOperand(0)); // Move chain to the back. ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops)); @@ -3363,7 +3363,7 @@ bool NVPTXDAGToDAGISel::trySurfaceIntrinsic(SDNode *N) { } // Copy over operands - SmallVector<SDValue, 8> Ops(drop_begin(N->ops())); + SmallVector<SDValue, 8> Ops(drop_begin(N->ops())); Ops.push_back(N->getOperand(0)); // Move chain to the back. ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops)); diff --git a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXISelLowering.cpp b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXISelLowering.cpp index 8860e90f28..753f3bf777 100644 --- a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -19,7 +19,7 @@ #include "NVPTXTargetObjectFile.h" #include "NVPTXUtilities.h" #include "llvm/ADT/APInt.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/Analysis.h" @@ -65,7 +65,7 @@ using namespace llvm; -static std::atomic<unsigned> GlobalUniqueCallSite; +static std::atomic<unsigned> GlobalUniqueCallSite; static cl::opt<bool> sched4reg( "nvptx-sched4reg", @@ -1243,7 +1243,7 @@ NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { std::string NVPTXTargetLowering::getPrototype( const DataLayout &DL, Type *retTy, const ArgListTy &Args, const SmallVectorImpl<ISD::OutputArg> &Outs, MaybeAlign retAlignment, - const CallBase &CB, unsigned UniqueCallSite) const { + const CallBase &CB, unsigned UniqueCallSite) const { auto PtrVT = getPointerTy(DL); bool isABI = (STI.getSmVersion() >= 20); @@ -1252,7 +1252,7 @@ std::string NVPTXTargetLowering::getPrototype( return ""; std::stringstream O; - O << "prototype_" << UniqueCallSite << " : .callprototype "; + O << "prototype_" << UniqueCallSite << " : .callprototype "; if (retTy->getTypeID() == Type::VoidTyID) { O << "()"; @@ -1422,9 +1422,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (!isABI) return Chain; - unsigned UniqueCallSite = GlobalUniqueCallSite.fetch_add(1); + unsigned UniqueCallSite = GlobalUniqueCallSite.fetch_add(1); SDValue tempChain = Chain; - Chain = DAG.getCALLSEQ_START(Chain, UniqueCallSite, 0, dl); + Chain = DAG.getCALLSEQ_START(Chain, UniqueCallSite, 0, dl); SDValue InFlag = Chain.getValue(1); unsigned paramCount = 0; @@ -1679,8 +1679,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // The prototype is embedded in a string and put as the operand for a // CallPrototype SDNode which will print out to the value of the string. SDVTList ProtoVTs = DAG.getVTList(MVT::Other, MVT::Glue); - std::string Proto = - getPrototype(DL, RetTy, Args, Outs, retAlignment, *CB, UniqueCallSite); + std::string Proto = + getPrototype(DL, RetTy, Args, Outs, retAlignment, *CB, UniqueCallSite); const char *ProtoStr = nvTM->getManagedStrPool()->getManagedString(Proto.c_str())->c_str(); SDValue ProtoOps[] = { @@ -1736,8 +1736,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (isIndirectCall) { SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue PrototypeOps[] = { - Chain, DAG.getConstant(UniqueCallSite, dl, MVT::i32), InFlag}; + SDValue PrototypeOps[] = { + Chain, DAG.getConstant(UniqueCallSite, dl, MVT::i32), InFlag}; Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps); InFlag = Chain.getValue(1); } @@ -1833,9 +1833,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } } - Chain = DAG.getCALLSEQ_END( - Chain, DAG.getIntPtrConstant(UniqueCallSite, dl, true), - DAG.getIntPtrConstant(UniqueCallSite + 1, dl, true), InFlag, dl); + Chain = DAG.getCALLSEQ_END( + Chain, DAG.getIntPtrConstant(UniqueCallSite, dl, true), + DAG.getIntPtrConstant(UniqueCallSite + 1, dl, true), InFlag, dl); InFlag = Chain.getValue(1); // Append ProxyReg instructions to the chain to make sure that `callseq_end` @@ -2437,7 +2437,7 @@ static bool isImageOrSamplerVal(const Value *arg, const Module *context) { if (!STy || STy->isLiteral()) return false; - return llvm::is_contained(specialTypes, STy->getName()); + return llvm::is_contained(specialTypes, STy->getName()); } SDValue NVPTXTargetLowering::LowerFormalArguments( @@ -2588,8 +2588,8 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( // Extend the element if necessary (e.g. an i8 is loaded // into an i16 register) if (Ins[InsIdx].VT.isInteger() && - Ins[InsIdx].VT.getFixedSizeInBits() > - LoadVT.getFixedSizeInBits()) { + Ins[InsIdx].VT.getFixedSizeInBits() > + LoadVT.getFixedSizeInBits()) { unsigned Extend = Ins[InsIdx].Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; Elt = DAG.getNode(Extend, dl, Ins[InsIdx].VT, Elt); @@ -4563,13 +4563,13 @@ static bool IsMulWideOperandDemotable(SDValue Op, if (Op.getOpcode() == ISD::SIGN_EXTEND || Op.getOpcode() == ISD::SIGN_EXTEND_INREG) { EVT OrigVT = Op.getOperand(0).getValueType(); - if (OrigVT.getFixedSizeInBits() <= OptSize) { + if (OrigVT.getFixedSizeInBits() <= OptSize) { S = Signed; return true; } } else if (Op.getOpcode() == ISD::ZERO_EXTEND) { EVT OrigVT = Op.getOperand(0).getValueType(); - if (OrigVT.getFixedSizeInBits() <= OptSize) { + if (OrigVT.getFixedSizeInBits() <= OptSize) { S = Unsigned; return true; } diff --git a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXISelLowering.h b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXISelLowering.h index 13829b924d..660ca65e39 100644 --- a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXISelLowering.h @@ -491,8 +491,8 @@ public: std::string getPrototype(const DataLayout &DL, Type *, const ArgListTy &, const SmallVectorImpl<ISD::OutputArg> &, - MaybeAlign retAlignment, const CallBase &CB, - unsigned UniqueCallSite) const; + MaybeAlign retAlignment, const CallBase &CB, + unsigned UniqueCallSite) const; SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, diff --git a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXInstrFormats.td b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXInstrFormats.td index 9220f4766d..9410b463bb 100644 --- a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXInstrFormats.td +++ b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXInstrFormats.td @@ -31,14 +31,14 @@ class NVPTXInst<dag outs, dag ins, string asmstr, list<dag> pattern> // TSFlagFields bits<4> VecInstType = VecNOP.Value; - bit IsSimpleMove = false; - bit IsLoad = false; - bit IsStore = false; + bit IsSimpleMove = false; + bit IsLoad = false; + bit IsStore = false; - bit IsTex = false; - bit IsSust = false; - bit IsSurfTexQuery = false; - bit IsTexModeUnified = false; + bit IsTex = false; + bit IsSust = false; + bit IsSurfTexQuery = false; + bit IsTexModeUnified = false; // The following field is encoded as log2 of the vector size minus one, // with 0 meaning the operation is not a surface instruction. For example, @@ -46,13 +46,13 @@ class NVPTXInst<dag outs, dag ins, string asmstr, list<dag> pattern> // 2**(2-1) = 2. bits<2> IsSuld = 0; - let TSFlags{3...0} = VecInstType; - let TSFlags{4...4} = IsSimpleMove; - let TSFlags{5...5} = IsLoad; - let TSFlags{6...6} = IsStore; - let TSFlags{7} = IsTex; - let TSFlags{9...8} = IsSuld; - let TSFlags{10} = IsSust; - let TSFlags{11} = IsSurfTexQuery; - let TSFlags{12} = IsTexModeUnified; + let TSFlags{3...0} = VecInstType; + let TSFlags{4...4} = IsSimpleMove; + let TSFlags{5...5} = IsLoad; + let TSFlags{6...6} = IsStore; + let TSFlags{7} = IsTex; + let TSFlags{9...8} = IsSuld; + let TSFlags{10} = IsSust; + let TSFlags{11} = IsSurfTexQuery; + let TSFlags{12} = IsTexModeUnified; } diff --git a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXInstrInfo.td b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXInstrInfo.td index 381ed4dd68..345c4c5142 100644 --- a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -13,7 +13,7 @@ include "NVPTXInstrFormats.td" // A NOP instruction -let hasSideEffects = false in { +let hasSideEffects = false in { def NOP : NVPTXInst<(outs), (ins), "", []>; } @@ -137,7 +137,7 @@ def do_SQRTF32_RN : Predicate<"usePrecSqrtF32()">; def hasHWROT32 : Predicate<"Subtarget->hasHWROT32()">; def noHWROT32 : Predicate<"!Subtarget->hasHWROT32()">; -def True : Predicate<"true">; +def True : Predicate<"true">; def hasPTX31 : Predicate<"Subtarget->getPTXVersion() >= 31">; def hasPTX60 : Predicate<"Subtarget->getPTXVersion() >= 60">; @@ -407,7 +407,7 @@ multiclass F2<string OpcStr, SDNode OpNode> { // Type Conversion //----------------------------------- -let hasSideEffects = false in { +let hasSideEffects = false in { // Generate a cvt to the given type from all possible types. Each instance // takes a CvtMode immediate that defines the conversion mode to use. It can // be CvtNONE to omit a conversion mode. @@ -1022,12 +1022,12 @@ multiclass FMA_F16<string OpcStr, RegisterClass RC, Predicate Pred> { } defm FMA16_ftz : FMA_F16<"fma.rn.ftz.f16", Float16Regs, doF32FTZ>; -defm FMA16 : FMA_F16<"fma.rn.f16", Float16Regs, True>; +defm FMA16 : FMA_F16<"fma.rn.f16", Float16Regs, True>; defm FMA16x2_ftz : FMA_F16<"fma.rn.ftz.f16x2", Float16x2Regs, doF32FTZ>; -defm FMA16x2 : FMA_F16<"fma.rn.f16x2", Float16x2Regs, True>; +defm FMA16x2 : FMA_F16<"fma.rn.f16x2", Float16x2Regs, True>; defm FMA32_ftz : FMA<"fma.rn.ftz.f32", Float32Regs, f32imm, doF32FTZ>; -defm FMA32 : FMA<"fma.rn.f32", Float32Regs, f32imm, True>; -defm FMA64 : FMA<"fma.rn.f64", Float64Regs, f64imm, True>; +defm FMA32 : FMA<"fma.rn.f32", Float32Regs, f32imm, True>; +defm FMA64 : FMA<"fma.rn.f64", Float64Regs, f64imm, True>; // sin/cos def SINF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src), @@ -1367,7 +1367,7 @@ multiclass BFE<string TyStr, RegisterClass RC> { !strconcat("bfe.", TyStr, " \t$d, $a, $b, $c;"), []>; } -let hasSideEffects = false in { +let hasSideEffects = false in { defm BFE_S32 : BFE<"s32", Int32Regs>; defm BFE_U32 : BFE<"u32", Int32Regs>; defm BFE_S64 : BFE<"s64", Int64Regs>; @@ -1381,7 +1381,7 @@ let hasSideEffects = false in { // FIXME: This doesn't cover versions of set and setp that combine with a // boolean predicate, e.g. setp.eq.and.b16. -let hasSideEffects = false in { +let hasSideEffects = false in { multiclass SETP<string TypeStr, RegisterClass RC, Operand ImmCls> { def rr : NVPTXInst<(outs Int1Regs:$dst), (ins RC:$a, RC:$b, CmpMode:$cmp), @@ -1427,7 +1427,7 @@ def SETP_f16x2rr : // "set.CmpOp{.ftz}.dtype.stype", where dtype is the type of the destination // reg, either u32, s32, or f32. Anyway these aren't used at the moment. -let hasSideEffects = false in { +let hasSideEffects = false in { multiclass SET<string TypeStr, RegisterClass RC, Operand ImmCls> { def rr : NVPTXInst<(outs Int32Regs:$dst), (ins RC:$a, RC:$b, CmpMode:$cmp), @@ -1462,7 +1462,7 @@ defm SET_f64 : SET<"f64", Float64Regs, f64imm>; // selp instructions that don't have any pattern matches; we explicitly use // them within this file. -let hasSideEffects = false in { +let hasSideEffects = false in { multiclass SELP<string TypeStr, RegisterClass RC, Operand ImmCls> { def rr : NVPTXInst<(outs RC:$dst), (ins RC:$a, RC:$b, Int1Regs:$p), @@ -1572,7 +1572,7 @@ def MOV_ADDR64 : NVPTXInst<(outs Int64Regs:$dst), (ins imem:$a), [(set Int64Regs:$dst, (Wrapper tglobaladdr:$a))]>; // Get pointer to local stack. -let hasSideEffects = false in { +let hasSideEffects = false in { def MOV_DEPOT_ADDR : NVPTXInst<(outs Int32Regs:$d), (ins i32imm:$num), "mov.u32 \t$d, __local_depot$num;", []>; def MOV_DEPOT_ADDR_64 : NVPTXInst<(outs Int64Regs:$d), (ins i32imm:$num), @@ -1988,7 +1988,7 @@ def ProxyReg : SDNode<"NVPTXISD::ProxyReg", SDTProxyRegProfile, [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; -let mayLoad = true in { +let mayLoad = true in { class LoadParamMemInst<NVPTXRegClass regclass, string opstr> : NVPTXInst<(outs regclass:$dst), (ins i32imm:$b), !strconcat("ld.param", opstr, " \t$dst, [retval0+$b];"), @@ -2013,7 +2013,7 @@ class LoadParamRegInst<NVPTXRegClass regclass, string opstr> : !strconcat("mov", opstr, " \t$dst, retval$b;"), [(set regclass:$dst, (LoadParam (i32 0), (i32 imm:$b)))]>; -let mayStore = true in { +let mayStore = true in { class StoreParamInst<NVPTXRegClass regclass, string opstr> : NVPTXInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b), !strconcat("st.param", opstr, " \t[param$a+$b], $val;"), @@ -2823,7 +2823,7 @@ def : Pat<(select Int32Regs:$pred, Float64Regs:$a, Float64Regs:$b), (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; -let hasSideEffects = false in { +let hasSideEffects = false in { // pack a set of smaller int registers to a larger int register def V4I16toI64 : NVPTXInst<(outs Int64Regs:$d), (ins Int16Regs:$s1, Int16Regs:$s2, @@ -2856,7 +2856,7 @@ let hasSideEffects = false in { } -let hasSideEffects = false in { +let hasSideEffects = false in { // Extract element of f16x2 register. PTX does not provide any way // to access elements of f16x2 vector directly, so we need to // extract it using a temporary register. @@ -2899,7 +2899,7 @@ let hasSideEffects = false in { } // Count leading zeros -let hasSideEffects = false in { +let hasSideEffects = false in { def CLZr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a), "clz.b32 \t$d, $a;", []>; def CLZr64 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), @@ -2937,7 +2937,7 @@ def : Pat<(i32 (zext (i16 (ctlz Int16Regs:$a)))), (SUBi32ri (CLZr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), 16)>; // Population count -let hasSideEffects = false in { +let hasSideEffects = false in { def POPCr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a), "popc.b32 \t$d, $a;", []>; def POPCr64 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), diff --git a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXIntrinsics.td b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXIntrinsics.td index 8ccd47c0fc..d9d586a0a6 100644 --- a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -51,19 +51,19 @@ def ptx : PTX; // Generates list of n sequential register names. // E.g. RegNames<3,"r">.ret -> ["r0", "r1", "r2" ] class RegSeq<int n, string prefix> { - list<string> ret = !if(n, !listconcat(RegSeq<!sub(n, 1), prefix>.ret, - [prefix # !sub(n, 1)]), + list<string> ret = !if(n, !listconcat(RegSeq<!sub(n, 1), prefix>.ret, + [prefix # !sub(n, 1)]), []); } class THREADMASK_INFO<bit sync> { - list<bit> ret = !if(sync, [0, 1], [0]); + list<bit> ret = !if(sync, [0, 1], [0]); } //----------------------------------- // Synchronization and shuffle functions //----------------------------------- -let isConvergent = true in { +let isConvergent = true in { def INT_BARRIER0 : NVPTXInst<(outs), (ins), "bar.sync \t0;", [(int_nvvm_barrier0)]>; @@ -173,12 +173,12 @@ class SHFL_INSTR<bit sync, string mode, string reg, bit return_pred, )]; } -foreach sync = [false, true] in { +foreach sync = [false, true] in { foreach mode = ["up", "down", "bfly", "idx"] in { foreach regclass = ["i32", "f32"] in { - foreach return_pred = [false, true] in { - foreach offset_imm = [false, true] in { - foreach mask_imm = [false, true] in { + foreach return_pred = [false, true] in { + foreach offset_imm = [false, true] in { + foreach mask_imm = [false, true] in { foreach threadmask_imm = THREADMASK_INFO<sync>.ret in { def : SHFL_INSTR<sync, mode, regclass, return_pred, offset_imm, mask_imm, threadmask_imm>, @@ -274,7 +274,7 @@ defm MATCH_ALLP_SYNC_32 : MATCH_ALLP_SYNC<Int32Regs, "b32", int_nvvm_match_all_s defm MATCH_ALLP_SYNC_64 : MATCH_ALLP_SYNC<Int64Regs, "b64", int_nvvm_match_all_sync_i64p, i64imm>; -} // isConvergent = true +} // isConvergent = true //----------------------------------- // Explicit Memory Fence Functions @@ -1548,7 +1548,7 @@ multiclass ATOM2N_impl<string OpStr, string IntTypeStr, string TypeStr, !cast<Intrinsic>( "int_nvvm_atomic_" # OpStr # "_" # SpaceStr # "_" # IntTypeStr - # !if(!empty(ScopeStr), "", "_" # ScopeStr)), + # !if(!empty(ScopeStr), "", "_" # ScopeStr)), regclass, ImmType, Imm, ImmTy, Preds>; } multiclass ATOM3N_impl<string OpStr, string IntTypeStr, string TypeStr, @@ -1562,7 +1562,7 @@ multiclass ATOM3N_impl<string OpStr, string IntTypeStr, string TypeStr, !cast<Intrinsic>( "int_nvvm_atomic_" # OpStr # "_" # SpaceStr # "_" # IntTypeStr - # !if(!empty(ScopeStr), "", "_" # ScopeStr)), + # !if(!empty(ScopeStr), "", "_" # ScopeStr)), regclass, ImmType, Imm, ImmTy, Preds>; } @@ -2131,7 +2131,7 @@ def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt), (ROTL32reg_sw Int32Regs:$src, Int32Regs:$amt)>, Requires<[noHWROT32]> ; -let hasSideEffects = false in { +let hasSideEffects = false in { def GET_LO_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src), !strconcat("{{\n\t", ".reg .b32 %dummy;\n\t", @@ -2147,7 +2147,7 @@ let hasSideEffects = false in { []> ; } -let hasSideEffects = false in { +let hasSideEffects = false in { def PACK_TWO_INT32 : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi), "mov.b64 \t$dst, {{$lo, $hi}};", []> ; @@ -2159,7 +2159,7 @@ def : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src), // Funnel shift, requires >= sm_32. Does not trap if amt is out of range, so // no side effects. -let hasSideEffects = false in { +let hasSideEffects = false in { def SHF_L_WRAP_B32_IMM : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt), @@ -2242,7 +2242,7 @@ def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt), // also defined in NVPTXReplaceImageHandles.cpp // texmode_independent -let IsTex = true, IsTexModeUnified = false in { +let IsTex = true, IsTexModeUnified = false in { // Texture fetch instructions using handles def TEX_1D_F32_S32 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, @@ -2925,7 +2925,7 @@ def TLD4_A_2D_U32_F32 // texmode_unified -let IsTex = true, IsTexModeUnified = true in { +let IsTex = true, IsTexModeUnified = true in { // Texture fetch instructions using handles def TEX_UNIFIED_1D_F32_S32 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, @@ -3610,7 +3610,7 @@ def TLD4_UNIFIED_A_2D_U32_F32 //=== Surface load instructions // .clamp variant -let IsSuld = true in { +let IsSuld = true in { def SULD_1D_I8_CLAMP : NVPTXInst<(outs Int16Regs:$r), (ins Int64Regs:$s, Int32Regs:$x), @@ -3922,7 +3922,7 @@ def SULD_3D_V4I32_CLAMP // .trap variant -let IsSuld = true in { +let IsSuld = true in { def SULD_1D_I8_TRAP : NVPTXInst<(outs Int16Regs:$r), (ins Int64Regs:$s, Int32Regs:$x), @@ -4233,7 +4233,7 @@ def SULD_3D_V4I32_TRAP } // .zero variant -let IsSuld = true in { +let IsSuld = true in { def SULD_1D_I8_ZERO : NVPTXInst<(outs Int16Regs:$r), (ins Int64Regs:$s, Int32Regs:$x), @@ -4547,7 +4547,7 @@ def SULD_3D_V4I32_ZERO // Texture Query Intrinsics //----------------------------------- -let IsSurfTexQuery = true in { +let IsSurfTexQuery = true in { def TXQ_CHANNEL_ORDER : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), "txq.channel_order.b32 \t$d, [$a];", @@ -4604,7 +4604,7 @@ def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a), // Surface Query Intrinsics //----------------------------------- -let IsSurfTexQuery = true in { +let IsSurfTexQuery = true in { def SUQ_CHANNEL_ORDER : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), "suq.channel_order.b32 \t$d, [$a];", @@ -4663,7 +4663,7 @@ def ISTYPEP_TEXTURE //===- Surface Stores -----------------------------------------------------===// -let IsSust = true in { +let IsSust = true in { // Unformatted // .clamp variant def SUST_B_1D_B8_CLAMP @@ -7361,13 +7361,13 @@ class WMMA_REGINFO<WMMA_REGS r> !eq(ptx_elt_type, "b1") : Int32Regs); // Instruction input/output arguments for the fragment. - list<NVPTXRegClass> ptx_regs = !listsplat(regclass, !size(regs)); + list<NVPTXRegClass> ptx_regs = !listsplat(regclass, !size(regs)); // List of register names for the fragment -- ["ra0", "ra1",...] list<string> reg_names = RegSeq<!size(ptx_regs), "r"#frag>.ret; // Generates "{{$r0, $r1,.... $rN-1}}" for use in asm string construction. - string regstring = "{{$" # !interleave(reg_names, ", $") # "}}"; + string regstring = "{{$" # !interleave(reg_names, ", $") # "}}"; // Predicates for particular fragment variant. Technically those are // per-instruction predicates, but currently all fragments that can be used in @@ -7450,13 +7450,13 @@ class WMMA_LOAD<WMMA_REGINFO Frag, string Layout, string Space, bit WithStride, // To match the right intrinsic, we need to build AS-constrained PatFrag. // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....). dag PFOperands = !if(WithStride, (ops node:$src, node:$ldm), (ops node:$src)); - dag PFOperandsIntr = !if(WithStride, (Intr node:$src, node:$ldm), (Intr node:$src)); + dag PFOperandsIntr = !if(WithStride, (Intr node:$src, node:$ldm), (Intr node:$src)); // Build PatFrag that only matches particular address space. PatFrag IntrFrag = PatFrag<PFOperands, - PFOperandsIntr, + PFOperandsIntr, !cond(!eq(Space, ".shared"): AS_match.shared, !eq(Space, ".global"): AS_match.global, - true: AS_match.generic)>; + true: AS_match.generic)>; // Build AS-constrained pattern. let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret; @@ -7491,14 +7491,14 @@ class WMMA_STORE_D<WMMA_REGINFO Frag, string Layout, string Space, // To match the right intrinsic, we need to build AS-constrained PatFrag. // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....). dag PFOperands = !con((ops node:$dst), - !dag(ops, !listsplat(node, !size(Frag.regs)), Frag.reg_names), + !dag(ops, !listsplat(node, !size(Frag.regs)), Frag.reg_names), !if(WithStride, (ops node:$ldm), (ops))); // Build PatFrag that only matches particular address space. PatFrag IntrFrag = PatFrag<PFOperands, !foreach(tmp, PFOperands, !subst(ops, Intr, tmp)), !cond(!eq(Space, ".shared"): AS_match.shared, !eq(Space, ".global"): AS_match.global, - true: AS_match.generic)>; + true: AS_match.generic)>; // Build AS-constrained pattern. let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret; @@ -7519,14 +7519,14 @@ class WMMA_STORE_D<WMMA_REGINFO Frag, string Layout, string Space, // Create all load/store variants defset list<WMMA_INSTR> MMA_LDSTs = { foreach layout = ["row", "col"] in { - foreach stride = [false, true] in { + foreach stride = [false, true] in { foreach space = [".global", ".shared", ""] in { foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in { foreach frag = NVVM_MMA_OPS.all_ld_ops in - if NVVM_MMA_SUPPORTED<[frag], layout>.ret then + if NVVM_MMA_SUPPORTED<[frag], layout>.ret then def : WMMA_LOAD<WMMA_REGINFO<frag>, layout, space, stride, addr>; foreach frag = NVVM_MMA_OPS.all_st_ops in - if NVVM_MMA_SUPPORTED<[frag], layout>.ret then + if NVVM_MMA_SUPPORTED<[frag], layout>.ret then def : WMMA_STORE_D<WMMA_REGINFO<frag>, layout, space, stride, addr>; } // addr } // space @@ -7584,7 +7584,7 @@ defset list<WMMA_INSTR> MMAs = { foreach layout_b = ["row", "col"] in { foreach satf = [0, 1] in { foreach op = NVVM_MMA_OPS.all_mma_ops in { - if NVVM_MMA_SUPPORTED<op, layout_a, layout_b, satf>.ret then { + if NVVM_MMA_SUPPORTED<op, layout_a, layout_b, satf>.ret then { def : WMMA_MMA<WMMA_REGINFO<op[0]>, WMMA_REGINFO<op[1]>, WMMA_REGINFO<op[2]>, diff --git a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXLowerArgs.cpp b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXLowerArgs.cpp index fd58ff1378..0048984968 100644 --- a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXLowerArgs.cpp +++ b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXLowerArgs.cpp @@ -172,12 +172,12 @@ void NVPTXLowerArgs::handleByValParam(Argument *Arg) { Value *ArgInParam = new AddrSpaceCastInst( Arg, PointerType::get(StructType, ADDRESS_SPACE_PARAM), Arg->getName(), FirstInst); - // Be sure to propagate alignment to this load; LLVM doesn't know that NVPTX - // addrspacecast preserves alignment. Since params are constant, this load is - // definitely not volatile. + // Be sure to propagate alignment to this load; LLVM doesn't know that NVPTX + // addrspacecast preserves alignment. Since params are constant, this load is + // definitely not volatile. LoadInst *LI = - new LoadInst(StructType, ArgInParam, Arg->getName(), - /*isVolatile=*/false, AllocA->getAlign(), FirstInst); + new LoadInst(StructType, ArgInParam, Arg->getName(), + /*isVolatile=*/false, AllocA->getAlign(), FirstInst); new StoreInst(LI, AllocA, FirstInst); } @@ -218,7 +218,7 @@ bool NVPTXLowerArgs::runOnKernelFunction(Function &F) { for (auto &I : B) { if (LoadInst *LI = dyn_cast<LoadInst>(&I)) { if (LI->getType()->isPointerTy()) { - Value *UO = getUnderlyingObject(LI->getPointerOperand()); + Value *UO = getUnderlyingObject(LI->getPointerOperand()); if (Argument *Arg = dyn_cast<Argument>(UO)) { if (Arg->hasByValAttr()) { // LI is a load from a pointer within a byval kernel parameter. diff --git a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp index 756355f75e..9829fa416b 100644 --- a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp +++ b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp @@ -69,8 +69,8 @@ bool NVPTXPrologEpilogPass::runOnMachineFunction(MachineFunction &MF) { "operand of a DBG_VALUE machine instruction"); Register Reg; int64_t Offset = - TFI.getFrameIndexReference(MF, MI.getOperand(0).getIndex(), Reg) - .getFixed(); + TFI.getFrameIndexReference(MF, MI.getOperand(0).getIndex(), Reg) + .getFixed(); MI.getOperand(0).ChangeToRegister(Reg, /*isDef=*/false); MI.getOperand(0).setIsDebug(); auto *DIExpr = DIExpression::prepend( diff --git a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXRegisterInfo.td b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXRegisterInfo.td index 19895a20ba..7d6dff48d1 100644 --- a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXRegisterInfo.td +++ b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXRegisterInfo.td @@ -30,7 +30,7 @@ def VRDepot : NVPTXReg<"%Depot">; // We use virtual registers, but define a few physical registers here to keep // SDAG and the MachineInstr layers happy. -foreach i = 0...4 in { +foreach i = 0...4 in { def P#i : NVPTXReg<"%p"#i>; // Predicate def RS#i : NVPTXReg<"%rs"#i>; // 16-bit def R#i : NVPTXReg<"%r"#i>; // 32-bit @@ -47,7 +47,7 @@ foreach i = 0...4 in { def da#i : NVPTXReg<"%da"#i>; } -foreach i = 0...31 in { +foreach i = 0...31 in { def ENVREG#i : NVPTXReg<"%envreg"#i>; } diff --git a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXSubtarget.cpp b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXSubtarget.cpp index 05c20369ab..9d998619f8 100644 --- a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXSubtarget.cpp +++ b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXSubtarget.cpp @@ -35,7 +35,7 @@ NVPTXSubtarget &NVPTXSubtarget::initializeSubtargetDependencies(StringRef CPU, // Provide the default CPU if we don't have one. TargetName = std::string(CPU.empty() ? "sm_20" : CPU); - ParseSubtargetFeatures(TargetName, /*TuneCPU*/ TargetName, FS); + ParseSubtargetFeatures(TargetName, /*TuneCPU*/ TargetName, FS); // Set default to PTX 3.2 (CUDA 5.5) if (PTXVersion == 0) { @@ -48,9 +48,9 @@ NVPTXSubtarget &NVPTXSubtarget::initializeSubtargetDependencies(StringRef CPU, NVPTXSubtarget::NVPTXSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS, const NVPTXTargetMachine &TM) - : NVPTXGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), PTXVersion(0), - SmVersion(20), TM(TM), InstrInfo(), - TLInfo(TM, initializeSubtargetDependencies(CPU, FS)), FrameLowering() {} + : NVPTXGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), PTXVersion(0), + SmVersion(20), TM(TM), InstrInfo(), + TLInfo(TM, initializeSubtargetDependencies(CPU, FS)), FrameLowering() {} bool NVPTXSubtarget::hasImageHandles() const { // Enable handles for Kepler+, where CUDA supports indirect surfaces and diff --git a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXSubtarget.h b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXSubtarget.h index 9a249d3da3..886f200160 100644 --- a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXSubtarget.h +++ b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXSubtarget.h @@ -83,7 +83,7 @@ public: unsigned getPTXVersion() const { return PTXVersion; } NVPTXSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS); - void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS); + void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS); }; } // End llvm namespace diff --git a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXTargetMachine.cpp index f1a82f1cf6..57fe1b8fa8 100644 --- a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -24,7 +24,7 @@ #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/Pass.h" -#include "llvm/Passes/PassBuilder.h" +#include "llvm/Passes/PassBuilder.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetMachine.h" @@ -171,11 +171,11 @@ public: void addFastRegAlloc() override; void addOptimizedRegAlloc() override; - bool addRegAssignAndRewriteFast() override { + bool addRegAssignAndRewriteFast() override { llvm_unreachable("should not be used"); } - bool addRegAssignAndRewriteOptimized() override { + bool addRegAssignAndRewriteOptimized() override { llvm_unreachable("should not be used"); } @@ -206,32 +206,32 @@ void NVPTXTargetMachine::adjustPassManager(PassManagerBuilder &Builder) { }); } -void NVPTXTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB, - bool DebugPassManager) { - PB.registerPipelineParsingCallback( - [](StringRef PassName, FunctionPassManager &PM, - ArrayRef<PassBuilder::PipelineElement>) { - if (PassName == "nvvm-reflect") { - PM.addPass(NVVMReflectPass()); - return true; - } - if (PassName == "nvvm-intr-range") { - PM.addPass(NVVMIntrRangePass()); - return true; - } - return false; - }); - - PB.registerPipelineStartEPCallback( - [this, DebugPassManager](ModulePassManager &PM, - PassBuilder::OptimizationLevel Level) { - FunctionPassManager FPM(DebugPassManager); - FPM.addPass(NVVMReflectPass(Subtarget.getSmVersion())); - FPM.addPass(NVVMIntrRangePass(Subtarget.getSmVersion())); - PM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); - }); -} - +void NVPTXTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB, + bool DebugPassManager) { + PB.registerPipelineParsingCallback( + [](StringRef PassName, FunctionPassManager &PM, + ArrayRef<PassBuilder::PipelineElement>) { + if (PassName == "nvvm-reflect") { + PM.addPass(NVVMReflectPass()); + return true; + } + if (PassName == "nvvm-intr-range") { + PM.addPass(NVVMIntrRangePass()); + return true; + } + return false; + }); + + PB.registerPipelineStartEPCallback( + [this, DebugPassManager](ModulePassManager &PM, + PassBuilder::OptimizationLevel Level) { + FunctionPassManager FPM(DebugPassManager); + FPM.addPass(NVVMReflectPass(Subtarget.getSmVersion())); + FPM.addPass(NVVMIntrRangePass(Subtarget.getSmVersion())); + PM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + }); +} + TargetTransformInfo NVPTXTargetMachine::getTargetTransformInfo(const Function &F) { return TargetTransformInfo(NVPTXTTIImpl(this, F)); diff --git a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXTargetMachine.h b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXTargetMachine.h index bef541c2b2..2a2defe0bf 100644 --- a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXTargetMachine.h +++ b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXTargetMachine.h @@ -62,8 +62,8 @@ public: } void adjustPassManager(PassManagerBuilder &) override; - void registerPassBuilderCallbacks(PassBuilder &PB, - bool DebugPassManager) override; + void registerPassBuilderCallbacks(PassBuilder &PB, + bool DebugPassManager) override; TargetTransformInfo getTargetTransformInfo(const Function &F) override; diff --git a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp index d4b2ae3840..28662d9314 100644 --- a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp +++ b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp @@ -111,263 +111,263 @@ bool NVPTXTTIImpl::isSourceOfDivergence(const Value *V) { return false; } -// Convert NVVM intrinsics to target-generic LLVM code where possible. -static Instruction *simplifyNvvmIntrinsic(IntrinsicInst *II, InstCombiner &IC) { - // Each NVVM intrinsic we can simplify can be replaced with one of: - // - // * an LLVM intrinsic, - // * an LLVM cast operation, - // * an LLVM binary operation, or - // * ad-hoc LLVM IR for the particular operation. - - // Some transformations are only valid when the module's - // flush-denormals-to-zero (ftz) setting is true/false, whereas other - // transformations are valid regardless of the module's ftz setting. - enum FtzRequirementTy { - FTZ_Any, // Any ftz setting is ok. - FTZ_MustBeOn, // Transformation is valid only if ftz is on. - FTZ_MustBeOff, // Transformation is valid only if ftz is off. - }; - // Classes of NVVM intrinsics that can't be replaced one-to-one with a - // target-generic intrinsic, cast op, or binary op but that we can nonetheless - // simplify. - enum SpecialCase { - SPC_Reciprocal, - }; - - // SimplifyAction is a poor-man's variant (plus an additional flag) that - // represents how to replace an NVVM intrinsic with target-generic LLVM IR. - struct SimplifyAction { - // Invariant: At most one of these Optionals has a value. - Optional<Intrinsic::ID> IID; - Optional<Instruction::CastOps> CastOp; - Optional<Instruction::BinaryOps> BinaryOp; - Optional<SpecialCase> Special; - - FtzRequirementTy FtzRequirement = FTZ_Any; - - SimplifyAction() = default; - - SimplifyAction(Intrinsic::ID IID, FtzRequirementTy FtzReq) - : IID(IID), FtzRequirement(FtzReq) {} - - // Cast operations don't have anything to do with FTZ, so we skip that - // argument. - SimplifyAction(Instruction::CastOps CastOp) : CastOp(CastOp) {} - - SimplifyAction(Instruction::BinaryOps BinaryOp, FtzRequirementTy FtzReq) - : BinaryOp(BinaryOp), FtzRequirement(FtzReq) {} - - SimplifyAction(SpecialCase Special, FtzRequirementTy FtzReq) - : Special(Special), FtzRequirement(FtzReq) {} - }; - - // Try to generate a SimplifyAction describing how to replace our - // IntrinsicInstr with target-generic LLVM IR. - const SimplifyAction Action = [II]() -> SimplifyAction { - switch (II->getIntrinsicID()) { - // NVVM intrinsics that map directly to LLVM intrinsics. - case Intrinsic::nvvm_ceil_d: - return {Intrinsic::ceil, FTZ_Any}; - case Intrinsic::nvvm_ceil_f: - return {Intrinsic::ceil, FTZ_MustBeOff}; - case Intrinsic::nvvm_ceil_ftz_f: - return {Intrinsic::ceil, FTZ_MustBeOn}; - case Intrinsic::nvvm_fabs_d: - return {Intrinsic::fabs, FTZ_Any}; - case Intrinsic::nvvm_fabs_f: - return {Intrinsic::fabs, FTZ_MustBeOff}; - case Intrinsic::nvvm_fabs_ftz_f: - return {Intrinsic::fabs, FTZ_MustBeOn}; - case Intrinsic::nvvm_floor_d: - return {Intrinsic::floor, FTZ_Any}; - case Intrinsic::nvvm_floor_f: - return {Intrinsic::floor, FTZ_MustBeOff}; - case Intrinsic::nvvm_floor_ftz_f: - return {Intrinsic::floor, FTZ_MustBeOn}; - case Intrinsic::nvvm_fma_rn_d: - return {Intrinsic::fma, FTZ_Any}; - case Intrinsic::nvvm_fma_rn_f: - return {Intrinsic::fma, FTZ_MustBeOff}; - case Intrinsic::nvvm_fma_rn_ftz_f: - return {Intrinsic::fma, FTZ_MustBeOn}; - case Intrinsic::nvvm_fmax_d: - return {Intrinsic::maxnum, FTZ_Any}; - case Intrinsic::nvvm_fmax_f: - return {Intrinsic::maxnum, FTZ_MustBeOff}; - case Intrinsic::nvvm_fmax_ftz_f: - return {Intrinsic::maxnum, FTZ_MustBeOn}; - case Intrinsic::nvvm_fmin_d: - return {Intrinsic::minnum, FTZ_Any}; - case Intrinsic::nvvm_fmin_f: - return {Intrinsic::minnum, FTZ_MustBeOff}; - case Intrinsic::nvvm_fmin_ftz_f: - return {Intrinsic::minnum, FTZ_MustBeOn}; - case Intrinsic::nvvm_round_d: - return {Intrinsic::round, FTZ_Any}; - case Intrinsic::nvvm_round_f: - return {Intrinsic::round, FTZ_MustBeOff}; - case Intrinsic::nvvm_round_ftz_f: - return {Intrinsic::round, FTZ_MustBeOn}; - case Intrinsic::nvvm_sqrt_rn_d: - return {Intrinsic::sqrt, FTZ_Any}; - case Intrinsic::nvvm_sqrt_f: - // nvvm_sqrt_f is a special case. For most intrinsics, foo_ftz_f is the - // ftz version, and foo_f is the non-ftz version. But nvvm_sqrt_f adopts - // the ftz-ness of the surrounding code. sqrt_rn_f and sqrt_rn_ftz_f are - // the versions with explicit ftz-ness. - return {Intrinsic::sqrt, FTZ_Any}; - case Intrinsic::nvvm_sqrt_rn_f: - return {Intrinsic::sqrt, FTZ_MustBeOff}; - case Intrinsic::nvvm_sqrt_rn_ftz_f: - return {Intrinsic::sqrt, FTZ_MustBeOn}; - case Intrinsic::nvvm_trunc_d: - return {Intrinsic::trunc, FTZ_Any}; - case Intrinsic::nvvm_trunc_f: - return {Intrinsic::trunc, FTZ_MustBeOff}; - case Intrinsic::nvvm_trunc_ftz_f: - return {Intrinsic::trunc, FTZ_MustBeOn}; - - // NVVM intrinsics that map to LLVM cast operations. - // - // Note that llvm's target-generic conversion operators correspond to the rz - // (round to zero) versions of the nvvm conversion intrinsics, even though - // most everything else here uses the rn (round to nearest even) nvvm ops. - case Intrinsic::nvvm_d2i_rz: - case Intrinsic::nvvm_f2i_rz: - case Intrinsic::nvvm_d2ll_rz: - case Intrinsic::nvvm_f2ll_rz: - return {Instruction::FPToSI}; - case Intrinsic::nvvm_d2ui_rz: - case Intrinsic::nvvm_f2ui_rz: - case Intrinsic::nvvm_d2ull_rz: - case Intrinsic::nvvm_f2ull_rz: - return {Instruction::FPToUI}; - case Intrinsic::nvvm_i2d_rz: - case Intrinsic::nvvm_i2f_rz: - case Intrinsic::nvvm_ll2d_rz: - case Intrinsic::nvvm_ll2f_rz: - return {Instruction::SIToFP}; - case Intrinsic::nvvm_ui2d_rz: - case Intrinsic::nvvm_ui2f_rz: - case Intrinsic::nvvm_ull2d_rz: - case Intrinsic::nvvm_ull2f_rz: - return {Instruction::UIToFP}; - - // NVVM intrinsics that map to LLVM binary ops. - case Intrinsic::nvvm_add_rn_d: - return {Instruction::FAdd, FTZ_Any}; - case Intrinsic::nvvm_add_rn_f: - return {Instruction::FAdd, FTZ_MustBeOff}; - case Intrinsic::nvvm_add_rn_ftz_f: - return {Instruction::FAdd, FTZ_MustBeOn}; - case Intrinsic::nvvm_mul_rn_d: - return {Instruction::FMul, FTZ_Any}; - case Intrinsic::nvvm_mul_rn_f: - return {Instruction::FMul, FTZ_MustBeOff}; - case Intrinsic::nvvm_mul_rn_ftz_f: - return {Instruction::FMul, FTZ_MustBeOn}; - case Intrinsic::nvvm_div_rn_d: - return {Instruction::FDiv, FTZ_Any}; - case Intrinsic::nvvm_div_rn_f: - return {Instruction::FDiv, FTZ_MustBeOff}; - case Intrinsic::nvvm_div_rn_ftz_f: - return {Instruction::FDiv, FTZ_MustBeOn}; - - // The remainder of cases are NVVM intrinsics that map to LLVM idioms, but - // need special handling. - // - // We seem to be missing intrinsics for rcp.approx.{ftz.}f32, which is just - // as well. - case Intrinsic::nvvm_rcp_rn_d: - return {SPC_Reciprocal, FTZ_Any}; - case Intrinsic::nvvm_rcp_rn_f: - return {SPC_Reciprocal, FTZ_MustBeOff}; - case Intrinsic::nvvm_rcp_rn_ftz_f: - return {SPC_Reciprocal, FTZ_MustBeOn}; - - // We do not currently simplify intrinsics that give an approximate - // answer. These include: - // - // - nvvm_cos_approx_{f,ftz_f} - // - nvvm_ex2_approx_{d,f,ftz_f} - // - nvvm_lg2_approx_{d,f,ftz_f} - // - nvvm_sin_approx_{f,ftz_f} - // - nvvm_sqrt_approx_{f,ftz_f} - // - nvvm_rsqrt_approx_{d,f,ftz_f} - // - nvvm_div_approx_{ftz_d,ftz_f,f} - // - nvvm_rcp_approx_ftz_d - // - // Ideally we'd encode them as e.g. "fast call @llvm.cos", where "fast" - // means that fastmath is enabled in the intrinsic. Unfortunately only - // binary operators (currently) have a fastmath bit in SelectionDAG, so - // this information gets lost and we can't select on it. - // - // TODO: div and rcp are lowered to a binary op, so these we could in - // theory lower them to "fast fdiv". - - default: - return {}; - } - }(); - - // If Action.FtzRequirementTy is not satisfied by the module's ftz state, we - // can bail out now. (Notice that in the case that IID is not an NVVM - // intrinsic, we don't have to look up any module metadata, as - // FtzRequirementTy will be FTZ_Any.) - if (Action.FtzRequirement != FTZ_Any) { - StringRef Attr = II->getFunction() - ->getFnAttribute("denormal-fp-math-f32") - .getValueAsString(); - DenormalMode Mode = parseDenormalFPAttribute(Attr); - bool FtzEnabled = Mode.Output != DenormalMode::IEEE; - - if (FtzEnabled != (Action.FtzRequirement == FTZ_MustBeOn)) - return nullptr; - } - - // Simplify to target-generic intrinsic. - if (Action.IID) { - SmallVector<Value *, 4> Args(II->arg_operands()); - // All the target-generic intrinsics currently of interest to us have one - // type argument, equal to that of the nvvm intrinsic's argument. - Type *Tys[] = {II->getArgOperand(0)->getType()}; - return CallInst::Create( - Intrinsic::getDeclaration(II->getModule(), *Action.IID, Tys), Args); - } - - // Simplify to target-generic binary op. - if (Action.BinaryOp) - return BinaryOperator::Create(*Action.BinaryOp, II->getArgOperand(0), - II->getArgOperand(1), II->getName()); - - // Simplify to target-generic cast op. - if (Action.CastOp) - return CastInst::Create(*Action.CastOp, II->getArgOperand(0), II->getType(), - II->getName()); - - // All that's left are the special cases. - if (!Action.Special) - return nullptr; - - switch (*Action.Special) { - case SPC_Reciprocal: - // Simplify reciprocal. - return BinaryOperator::Create( - Instruction::FDiv, ConstantFP::get(II->getArgOperand(0)->getType(), 1), - II->getArgOperand(0), II->getName()); - } - llvm_unreachable("All SpecialCase enumerators should be handled in switch."); -} - -Optional<Instruction *> -NVPTXTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { - if (Instruction *I = simplifyNvvmIntrinsic(&II, IC)) { - return I; - } - return None; -} - +// Convert NVVM intrinsics to target-generic LLVM code where possible. +static Instruction *simplifyNvvmIntrinsic(IntrinsicInst *II, InstCombiner &IC) { + // Each NVVM intrinsic we can simplify can be replaced with one of: + // + // * an LLVM intrinsic, + // * an LLVM cast operation, + // * an LLVM binary operation, or + // * ad-hoc LLVM IR for the particular operation. + + // Some transformations are only valid when the module's + // flush-denormals-to-zero (ftz) setting is true/false, whereas other + // transformations are valid regardless of the module's ftz setting. + enum FtzRequirementTy { + FTZ_Any, // Any ftz setting is ok. + FTZ_MustBeOn, // Transformation is valid only if ftz is on. + FTZ_MustBeOff, // Transformation is valid only if ftz is off. + }; + // Classes of NVVM intrinsics that can't be replaced one-to-one with a + // target-generic intrinsic, cast op, or binary op but that we can nonetheless + // simplify. + enum SpecialCase { + SPC_Reciprocal, + }; + + // SimplifyAction is a poor-man's variant (plus an additional flag) that + // represents how to replace an NVVM intrinsic with target-generic LLVM IR. + struct SimplifyAction { + // Invariant: At most one of these Optionals has a value. + Optional<Intrinsic::ID> IID; + Optional<Instruction::CastOps> CastOp; + Optional<Instruction::BinaryOps> BinaryOp; + Optional<SpecialCase> Special; + + FtzRequirementTy FtzRequirement = FTZ_Any; + + SimplifyAction() = default; + + SimplifyAction(Intrinsic::ID IID, FtzRequirementTy FtzReq) + : IID(IID), FtzRequirement(FtzReq) {} + + // Cast operations don't have anything to do with FTZ, so we skip that + // argument. + SimplifyAction(Instruction::CastOps CastOp) : CastOp(CastOp) {} + + SimplifyAction(Instruction::BinaryOps BinaryOp, FtzRequirementTy FtzReq) + : BinaryOp(BinaryOp), FtzRequirement(FtzReq) {} + + SimplifyAction(SpecialCase Special, FtzRequirementTy FtzReq) + : Special(Special), FtzRequirement(FtzReq) {} + }; + + // Try to generate a SimplifyAction describing how to replace our + // IntrinsicInstr with target-generic LLVM IR. + const SimplifyAction Action = [II]() -> SimplifyAction { + switch (II->getIntrinsicID()) { + // NVVM intrinsics that map directly to LLVM intrinsics. + case Intrinsic::nvvm_ceil_d: + return {Intrinsic::ceil, FTZ_Any}; + case Intrinsic::nvvm_ceil_f: + return {Intrinsic::ceil, FTZ_MustBeOff}; + case Intrinsic::nvvm_ceil_ftz_f: + return {Intrinsic::ceil, FTZ_MustBeOn}; + case Intrinsic::nvvm_fabs_d: + return {Intrinsic::fabs, FTZ_Any}; + case Intrinsic::nvvm_fabs_f: + return {Intrinsic::fabs, FTZ_MustBeOff}; + case Intrinsic::nvvm_fabs_ftz_f: + return {Intrinsic::fabs, FTZ_MustBeOn}; + case Intrinsic::nvvm_floor_d: + return {Intrinsic::floor, FTZ_Any}; + case Intrinsic::nvvm_floor_f: + return {Intrinsic::floor, FTZ_MustBeOff}; + case Intrinsic::nvvm_floor_ftz_f: + return {Intrinsic::floor, FTZ_MustBeOn}; + case Intrinsic::nvvm_fma_rn_d: + return {Intrinsic::fma, FTZ_Any}; + case Intrinsic::nvvm_fma_rn_f: + return {Intrinsic::fma, FTZ_MustBeOff}; + case Intrinsic::nvvm_fma_rn_ftz_f: + return {Intrinsic::fma, FTZ_MustBeOn}; + case Intrinsic::nvvm_fmax_d: + return {Intrinsic::maxnum, FTZ_Any}; + case Intrinsic::nvvm_fmax_f: + return {Intrinsic::maxnum, FTZ_MustBeOff}; + case Intrinsic::nvvm_fmax_ftz_f: + return {Intrinsic::maxnum, FTZ_MustBeOn}; + case Intrinsic::nvvm_fmin_d: + return {Intrinsic::minnum, FTZ_Any}; + case Intrinsic::nvvm_fmin_f: + return {Intrinsic::minnum, FTZ_MustBeOff}; + case Intrinsic::nvvm_fmin_ftz_f: + return {Intrinsic::minnum, FTZ_MustBeOn}; + case Intrinsic::nvvm_round_d: + return {Intrinsic::round, FTZ_Any}; + case Intrinsic::nvvm_round_f: + return {Intrinsic::round, FTZ_MustBeOff}; + case Intrinsic::nvvm_round_ftz_f: + return {Intrinsic::round, FTZ_MustBeOn}; + case Intrinsic::nvvm_sqrt_rn_d: + return {Intrinsic::sqrt, FTZ_Any}; + case Intrinsic::nvvm_sqrt_f: + // nvvm_sqrt_f is a special case. For most intrinsics, foo_ftz_f is the + // ftz version, and foo_f is the non-ftz version. But nvvm_sqrt_f adopts + // the ftz-ness of the surrounding code. sqrt_rn_f and sqrt_rn_ftz_f are + // the versions with explicit ftz-ness. + return {Intrinsic::sqrt, FTZ_Any}; + case Intrinsic::nvvm_sqrt_rn_f: + return {Intrinsic::sqrt, FTZ_MustBeOff}; + case Intrinsic::nvvm_sqrt_rn_ftz_f: + return {Intrinsic::sqrt, FTZ_MustBeOn}; + case Intrinsic::nvvm_trunc_d: + return {Intrinsic::trunc, FTZ_Any}; + case Intrinsic::nvvm_trunc_f: + return {Intrinsic::trunc, FTZ_MustBeOff}; + case Intrinsic::nvvm_trunc_ftz_f: + return {Intrinsic::trunc, FTZ_MustBeOn}; + + // NVVM intrinsics that map to LLVM cast operations. + // + // Note that llvm's target-generic conversion operators correspond to the rz + // (round to zero) versions of the nvvm conversion intrinsics, even though + // most everything else here uses the rn (round to nearest even) nvvm ops. + case Intrinsic::nvvm_d2i_rz: + case Intrinsic::nvvm_f2i_rz: + case Intrinsic::nvvm_d2ll_rz: + case Intrinsic::nvvm_f2ll_rz: + return {Instruction::FPToSI}; + case Intrinsic::nvvm_d2ui_rz: + case Intrinsic::nvvm_f2ui_rz: + case Intrinsic::nvvm_d2ull_rz: + case Intrinsic::nvvm_f2ull_rz: + return {Instruction::FPToUI}; + case Intrinsic::nvvm_i2d_rz: + case Intrinsic::nvvm_i2f_rz: + case Intrinsic::nvvm_ll2d_rz: + case Intrinsic::nvvm_ll2f_rz: + return {Instruction::SIToFP}; + case Intrinsic::nvvm_ui2d_rz: + case Intrinsic::nvvm_ui2f_rz: + case Intrinsic::nvvm_ull2d_rz: + case Intrinsic::nvvm_ull2f_rz: + return {Instruction::UIToFP}; + + // NVVM intrinsics that map to LLVM binary ops. + case Intrinsic::nvvm_add_rn_d: + return {Instruction::FAdd, FTZ_Any}; + case Intrinsic::nvvm_add_rn_f: + return {Instruction::FAdd, FTZ_MustBeOff}; + case Intrinsic::nvvm_add_rn_ftz_f: + return {Instruction::FAdd, FTZ_MustBeOn}; + case Intrinsic::nvvm_mul_rn_d: + return {Instruction::FMul, FTZ_Any}; + case Intrinsic::nvvm_mul_rn_f: + return {Instruction::FMul, FTZ_MustBeOff}; + case Intrinsic::nvvm_mul_rn_ftz_f: + return {Instruction::FMul, FTZ_MustBeOn}; + case Intrinsic::nvvm_div_rn_d: + return {Instruction::FDiv, FTZ_Any}; + case Intrinsic::nvvm_div_rn_f: + return {Instruction::FDiv, FTZ_MustBeOff}; + case Intrinsic::nvvm_div_rn_ftz_f: + return {Instruction::FDiv, FTZ_MustBeOn}; + + // The remainder of cases are NVVM intrinsics that map to LLVM idioms, but + // need special handling. + // + // We seem to be missing intrinsics for rcp.approx.{ftz.}f32, which is just + // as well. + case Intrinsic::nvvm_rcp_rn_d: + return {SPC_Reciprocal, FTZ_Any}; + case Intrinsic::nvvm_rcp_rn_f: + return {SPC_Reciprocal, FTZ_MustBeOff}; + case Intrinsic::nvvm_rcp_rn_ftz_f: + return {SPC_Reciprocal, FTZ_MustBeOn}; + + // We do not currently simplify intrinsics that give an approximate + // answer. These include: + // + // - nvvm_cos_approx_{f,ftz_f} + // - nvvm_ex2_approx_{d,f,ftz_f} + // - nvvm_lg2_approx_{d,f,ftz_f} + // - nvvm_sin_approx_{f,ftz_f} + // - nvvm_sqrt_approx_{f,ftz_f} + // - nvvm_rsqrt_approx_{d,f,ftz_f} + // - nvvm_div_approx_{ftz_d,ftz_f,f} + // - nvvm_rcp_approx_ftz_d + // + // Ideally we'd encode them as e.g. "fast call @llvm.cos", where "fast" + // means that fastmath is enabled in the intrinsic. Unfortunately only + // binary operators (currently) have a fastmath bit in SelectionDAG, so + // this information gets lost and we can't select on it. + // + // TODO: div and rcp are lowered to a binary op, so these we could in + // theory lower them to "fast fdiv". + + default: + return {}; + } + }(); + + // If Action.FtzRequirementTy is not satisfied by the module's ftz state, we + // can bail out now. (Notice that in the case that IID is not an NVVM + // intrinsic, we don't have to look up any module metadata, as + // FtzRequirementTy will be FTZ_Any.) + if (Action.FtzRequirement != FTZ_Any) { + StringRef Attr = II->getFunction() + ->getFnAttribute("denormal-fp-math-f32") + .getValueAsString(); + DenormalMode Mode = parseDenormalFPAttribute(Attr); + bool FtzEnabled = Mode.Output != DenormalMode::IEEE; + + if (FtzEnabled != (Action.FtzRequirement == FTZ_MustBeOn)) + return nullptr; + } + + // Simplify to target-generic intrinsic. + if (Action.IID) { + SmallVector<Value *, 4> Args(II->arg_operands()); + // All the target-generic intrinsics currently of interest to us have one + // type argument, equal to that of the nvvm intrinsic's argument. + Type *Tys[] = {II->getArgOperand(0)->getType()}; + return CallInst::Create( + Intrinsic::getDeclaration(II->getModule(), *Action.IID, Tys), Args); + } + + // Simplify to target-generic binary op. + if (Action.BinaryOp) + return BinaryOperator::Create(*Action.BinaryOp, II->getArgOperand(0), + II->getArgOperand(1), II->getName()); + + // Simplify to target-generic cast op. + if (Action.CastOp) + return CastInst::Create(*Action.CastOp, II->getArgOperand(0), II->getType(), + II->getName()); + + // All that's left are the special cases. + if (!Action.Special) + return nullptr; + + switch (*Action.Special) { + case SPC_Reciprocal: + // Simplify reciprocal. + return BinaryOperator::Create( + Instruction::FDiv, ConstantFP::get(II->getArgOperand(0)->getType(), 1), + II->getArgOperand(0), II->getName()); + } + llvm_unreachable("All SpecialCase enumerators should be handled in switch."); +} + +Optional<Instruction *> +NVPTXTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { + if (Instruction *I = simplifyNvvmIntrinsic(&II, IC)) { + return I; + } + return None; +} + int NVPTXTTIImpl::getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueKind Opd1Info, diff --git a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXTargetTransformInfo.h index 6f071040dd..9176c5db7b 100644 --- a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXTargetTransformInfo.h +++ b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXTargetTransformInfo.h @@ -48,9 +48,9 @@ public: return AddressSpace::ADDRESS_SPACE_GENERIC; } - Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC, - IntrinsicInst &II) const; - + Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC, + IntrinsicInst &II) const; + // Loads and stores can be vectorized if the alignment is at least as big as // the load/store we want to vectorize. bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, diff --git a/contrib/libs/llvm12/lib/Target/NVPTX/NVVMIntrRange.cpp b/contrib/libs/llvm12/lib/Target/NVPTX/NVVMIntrRange.cpp index 5381646434..7f5e3edb3b 100644 --- a/contrib/libs/llvm12/lib/Target/NVPTX/NVVMIntrRange.cpp +++ b/contrib/libs/llvm12/lib/Target/NVPTX/NVVMIntrRange.cpp @@ -17,7 +17,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsNVPTX.h" -#include "llvm/IR/PassManager.h" +#include "llvm/IR/PassManager.h" #include "llvm/Support/CommandLine.h" using namespace llvm; @@ -33,13 +33,13 @@ static cl::opt<unsigned> NVVMIntrRangeSM("nvvm-intr-range-sm", cl::init(20), namespace { class NVVMIntrRange : public FunctionPass { private: - unsigned SmVersion; + unsigned SmVersion; public: static char ID; NVVMIntrRange() : NVVMIntrRange(NVVMIntrRangeSM) {} - NVVMIntrRange(unsigned int SmVersion) - : FunctionPass(ID), SmVersion(SmVersion) { + NVVMIntrRange(unsigned int SmVersion) + : FunctionPass(ID), SmVersion(SmVersion) { initializeNVVMIntrRangePass(*PassRegistry::getPassRegistry()); } @@ -72,18 +72,18 @@ static bool addRangeMetadata(uint64_t Low, uint64_t High, CallInst *C) { return true; } -static bool runNVVMIntrRange(Function &F, unsigned SmVersion) { - struct { - unsigned x, y, z; - } MaxBlockSize, MaxGridSize; - MaxBlockSize.x = 1024; - MaxBlockSize.y = 1024; - MaxBlockSize.z = 64; - - MaxGridSize.x = SmVersion >= 30 ? 0x7fffffff : 0xffff; - MaxGridSize.y = 0xffff; - MaxGridSize.z = 0xffff; - +static bool runNVVMIntrRange(Function &F, unsigned SmVersion) { + struct { + unsigned x, y, z; + } MaxBlockSize, MaxGridSize; + MaxBlockSize.x = 1024; + MaxBlockSize.y = 1024; + MaxBlockSize.z = 64; + + MaxGridSize.x = SmVersion >= 30 ? 0x7fffffff : 0xffff; + MaxGridSize.y = 0xffff; + MaxGridSize.z = 0xffff; + // Go through the calls in this function. bool Changed = false; for (Instruction &I : instructions(F)) { @@ -155,15 +155,15 @@ static bool runNVVMIntrRange(Function &F, unsigned SmVersion) { return Changed; } - -bool NVVMIntrRange::runOnFunction(Function &F) { - return runNVVMIntrRange(F, SmVersion); -} - -NVVMIntrRangePass::NVVMIntrRangePass() : NVVMIntrRangePass(NVVMIntrRangeSM) {} - -PreservedAnalyses NVVMIntrRangePass::run(Function &F, - FunctionAnalysisManager &AM) { - return runNVVMIntrRange(F, SmVersion) ? PreservedAnalyses::none() - : PreservedAnalyses::all(); -} + +bool NVVMIntrRange::runOnFunction(Function &F) { + return runNVVMIntrRange(F, SmVersion); +} + +NVVMIntrRangePass::NVVMIntrRangePass() : NVVMIntrRangePass(NVVMIntrRangeSM) {} + +PreservedAnalyses NVVMIntrRangePass::run(Function &F, + FunctionAnalysisManager &AM) { + return runNVVMIntrRange(F, SmVersion) ? PreservedAnalyses::none() + : PreservedAnalyses::all(); +} diff --git a/contrib/libs/llvm12/lib/Target/NVPTX/NVVMReflect.cpp b/contrib/libs/llvm12/lib/Target/NVPTX/NVVMReflect.cpp index 339f51d210..c740ded1fd 100644 --- a/contrib/libs/llvm12/lib/Target/NVPTX/NVVMReflect.cpp +++ b/contrib/libs/llvm12/lib/Target/NVPTX/NVVMReflect.cpp @@ -29,7 +29,7 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsNVPTX.h" #include "llvm/IR/Module.h" -#include "llvm/IR/PassManager.h" +#include "llvm/IR/PassManager.h" #include "llvm/IR/Type.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" @@ -74,7 +74,7 @@ INITIALIZE_PASS(NVVMReflect, "nvvm-reflect", "Replace occurrences of __nvvm_reflect() calls with 0/1", false, false) -static bool runNVVMReflect(Function &F, unsigned SmVersion) { +static bool runNVVMReflect(Function &F, unsigned SmVersion) { if (!NVVMReflectEnabled) return false; @@ -180,15 +180,15 @@ static bool runNVVMReflect(Function &F, unsigned SmVersion) { return ToRemove.size() > 0; } - -bool NVVMReflect::runOnFunction(Function &F) { - return runNVVMReflect(F, SmVersion); -} - -NVVMReflectPass::NVVMReflectPass() : NVVMReflectPass(0) {} - -PreservedAnalyses NVVMReflectPass::run(Function &F, - FunctionAnalysisManager &AM) { - return runNVVMReflect(F, SmVersion) ? PreservedAnalyses::none() - : PreservedAnalyses::all(); -} + +bool NVVMReflect::runOnFunction(Function &F) { + return runNVVMReflect(F, SmVersion); +} + +NVVMReflectPass::NVVMReflectPass() : NVVMReflectPass(0) {} + +PreservedAnalyses NVVMReflectPass::run(Function &F, + FunctionAnalysisManager &AM) { + return runNVVMReflect(F, SmVersion) ? PreservedAnalyses::none() + : PreservedAnalyses::all(); +} diff --git a/contrib/libs/llvm12/lib/Target/NVPTX/TargetInfo/ya.make b/contrib/libs/llvm12/lib/Target/NVPTX/TargetInfo/ya.make index 52ef1e5f5b..8e9644a1eb 100644 --- a/contrib/libs/llvm12/lib/Target/NVPTX/TargetInfo/ya.make +++ b/contrib/libs/llvm12/lib/Target/NVPTX/TargetInfo/ya.make @@ -12,13 +12,13 @@ LICENSE(Apache-2.0 WITH LLVM-exception) LICENSE_TEXTS(.yandex_meta/licenses.list.txt) PEERDIR( - contrib/libs/llvm12 - contrib/libs/llvm12/lib/Support + contrib/libs/llvm12 + contrib/libs/llvm12/lib/Support ) ADDINCL( - contrib/libs/llvm12/lib/Target/NVPTX - contrib/libs/llvm12/lib/Target/NVPTX/TargetInfo + contrib/libs/llvm12/lib/Target/NVPTX + contrib/libs/llvm12/lib/Target/NVPTX/TargetInfo ) NO_COMPILER_WARNINGS() diff --git a/contrib/libs/llvm12/lib/Target/NVPTX/ya.make b/contrib/libs/llvm12/lib/Target/NVPTX/ya.make index 4f7542eb65..b6325732ad 100644 --- a/contrib/libs/llvm12/lib/Target/NVPTX/ya.make +++ b/contrib/libs/llvm12/lib/Target/NVPTX/ya.make @@ -12,27 +12,27 @@ LICENSE(Apache-2.0 WITH LLVM-exception) LICENSE_TEXTS(.yandex_meta/licenses.list.txt) PEERDIR( - contrib/libs/llvm12 - contrib/libs/llvm12/include - contrib/libs/llvm12/lib/Analysis - contrib/libs/llvm12/lib/CodeGen - contrib/libs/llvm12/lib/CodeGen/AsmPrinter - contrib/libs/llvm12/lib/CodeGen/SelectionDAG - contrib/libs/llvm12/lib/IR - contrib/libs/llvm12/lib/MC - contrib/libs/llvm12/lib/Support - contrib/libs/llvm12/lib/Target - contrib/libs/llvm12/lib/Target/NVPTX/MCTargetDesc - contrib/libs/llvm12/lib/Target/NVPTX/TargetInfo - contrib/libs/llvm12/lib/Transforms/IPO - contrib/libs/llvm12/lib/Transforms/Scalar - contrib/libs/llvm12/lib/Transforms/Utils - contrib/libs/llvm12/lib/Transforms/Vectorize + contrib/libs/llvm12 + contrib/libs/llvm12/include + contrib/libs/llvm12/lib/Analysis + contrib/libs/llvm12/lib/CodeGen + contrib/libs/llvm12/lib/CodeGen/AsmPrinter + contrib/libs/llvm12/lib/CodeGen/SelectionDAG + contrib/libs/llvm12/lib/IR + contrib/libs/llvm12/lib/MC + contrib/libs/llvm12/lib/Support + contrib/libs/llvm12/lib/Target + contrib/libs/llvm12/lib/Target/NVPTX/MCTargetDesc + contrib/libs/llvm12/lib/Target/NVPTX/TargetInfo + contrib/libs/llvm12/lib/Transforms/IPO + contrib/libs/llvm12/lib/Transforms/Scalar + contrib/libs/llvm12/lib/Transforms/Utils + contrib/libs/llvm12/lib/Transforms/Vectorize ) ADDINCL( - ${ARCADIA_BUILD_ROOT}/contrib/libs/llvm12/lib/Target/NVPTX - contrib/libs/llvm12/lib/Target/NVPTX + ${ARCADIA_BUILD_ROOT}/contrib/libs/llvm12/lib/Target/NVPTX + contrib/libs/llvm12/lib/Target/NVPTX ) NO_COMPILER_WARNINGS() |