aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/llvm12/lib/Target/NVPTX
diff options
context:
space:
mode:
authorshadchin <shadchin@yandex-team.ru>2022-02-10 16:44:30 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:44:30 +0300
commit2598ef1d0aee359b4b6d5fdd1758916d5907d04f (patch)
tree012bb94d777798f1f56ac1cec429509766d05181 /contrib/libs/llvm12/lib/Target/NVPTX
parent6751af0b0c1b952fede40b19b71da8025b5d8bcf (diff)
downloadydb-2598ef1d0aee359b4b6d5fdd1758916d5907d04f.tar.gz
Restoring authorship annotation for <shadchin@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/llvm12/lib/Target/NVPTX')
-rw-r--r--contrib/libs/llvm12/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h2
-rw-r--r--contrib/libs/llvm12/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp2
-rw-r--r--contrib/libs/llvm12/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp2
-rw-r--r--contrib/libs/llvm12/lib/Target/NVPTX/MCTargetDesc/ya.make16
-rw-r--r--contrib/libs/llvm12/lib/Target/NVPTX/NVPTX.h38
-rw-r--r--contrib/libs/llvm12/lib/Target/NVPTX/NVPTXAsmPrinter.cpp4
-rw-r--r--contrib/libs/llvm12/lib/Target/NVPTX/NVPTXFrameLowering.cpp10
-rw-r--r--contrib/libs/llvm12/lib/Target/NVPTX/NVPTXFrameLowering.h6
-rw-r--r--contrib/libs/llvm12/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp8
-rw-r--r--contrib/libs/llvm12/lib/Target/NVPTX/NVPTXISelLowering.cpp36
-rw-r--r--contrib/libs/llvm12/lib/Target/NVPTX/NVPTXISelLowering.h4
-rw-r--r--contrib/libs/llvm12/lib/Target/NVPTX/NVPTXInstrFormats.td32
-rw-r--r--contrib/libs/llvm12/lib/Target/NVPTX/NVPTXInstrInfo.td36
-rw-r--r--contrib/libs/llvm12/lib/Target/NVPTX/NVPTXIntrinsics.td66
-rw-r--r--contrib/libs/llvm12/lib/Target/NVPTX/NVPTXLowerArgs.cpp12
-rw-r--r--contrib/libs/llvm12/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp4
-rw-r--r--contrib/libs/llvm12/lib/Target/NVPTX/NVPTXRegisterInfo.td4
-rw-r--r--contrib/libs/llvm12/lib/Target/NVPTX/NVPTXSubtarget.cpp8
-rw-r--r--contrib/libs/llvm12/lib/Target/NVPTX/NVPTXSubtarget.h2
-rw-r--r--contrib/libs/llvm12/lib/Target/NVPTX/NVPTXTargetMachine.cpp58
-rw-r--r--contrib/libs/llvm12/lib/Target/NVPTX/NVPTXTargetMachine.h4
-rw-r--r--contrib/libs/llvm12/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp514
-rw-r--r--contrib/libs/llvm12/lib/Target/NVPTX/NVPTXTargetTransformInfo.h6
-rw-r--r--contrib/libs/llvm12/lib/Target/NVPTX/NVVMIntrRange.cpp56
-rw-r--r--contrib/libs/llvm12/lib/Target/NVPTX/NVVMReflect.cpp28
-rw-r--r--contrib/libs/llvm12/lib/Target/NVPTX/TargetInfo/ya.make8
-rw-r--r--contrib/libs/llvm12/lib/Target/NVPTX/ya.make36
27 files changed, 501 insertions, 501 deletions
diff --git a/contrib/libs/llvm12/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h b/contrib/libs/llvm12/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h
index 503f0497b6..8a15343ea9 100644
--- a/contrib/libs/llvm12/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h
+++ b/contrib/libs/llvm12/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h
@@ -29,7 +29,7 @@ public:
const MCSubtargetInfo &STI, raw_ostream &OS) override;
// Autogenerated by tblgen.
- std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override;
+ std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override;
void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
// End
diff --git a/contrib/libs/llvm12/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp b/contrib/libs/llvm12/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
index f275011018..d43bbf6a84 100644
--- a/contrib/libs/llvm12/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
+++ b/contrib/libs/llvm12/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
@@ -47,7 +47,7 @@ NVPTXMCAsmInfo::NVPTXMCAsmInfo(const Triple &TheTriple,
AscizDirective = nullptr; // not supported
SupportsQuotedNames = false;
SupportsExtendedDwarfLocDirective = false;
- SupportsSignedData = false;
+ SupportsSignedData = false;
// @TODO: Can we just disable this?
WeakDirective = "\t// .weak\t";
diff --git a/contrib/libs/llvm12/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp b/contrib/libs/llvm12/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
index d69166feb0..648b3ff0bf 100644
--- a/contrib/libs/llvm12/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
+++ b/contrib/libs/llvm12/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
@@ -46,7 +46,7 @@ static MCRegisterInfo *createNVPTXMCRegisterInfo(const Triple &TT) {
static MCSubtargetInfo *
createNVPTXMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
- return createNVPTXMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS);
+ return createNVPTXMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS);
}
static MCInstPrinter *createNVPTXMCInstPrinter(const Triple &T,
diff --git a/contrib/libs/llvm12/lib/Target/NVPTX/MCTargetDesc/ya.make b/contrib/libs/llvm12/lib/Target/NVPTX/MCTargetDesc/ya.make
index 81ad30663e..fadad8ac79 100644
--- a/contrib/libs/llvm12/lib/Target/NVPTX/MCTargetDesc/ya.make
+++ b/contrib/libs/llvm12/lib/Target/NVPTX/MCTargetDesc/ya.make
@@ -12,17 +12,17 @@ LICENSE(Apache-2.0 WITH LLVM-exception)
LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
PEERDIR(
- contrib/libs/llvm12
- contrib/libs/llvm12/include
- contrib/libs/llvm12/lib/MC
- contrib/libs/llvm12/lib/Support
- contrib/libs/llvm12/lib/Target/NVPTX/TargetInfo
+ contrib/libs/llvm12
+ contrib/libs/llvm12/include
+ contrib/libs/llvm12/lib/MC
+ contrib/libs/llvm12/lib/Support
+ contrib/libs/llvm12/lib/Target/NVPTX/TargetInfo
)
ADDINCL(
- ${ARCADIA_BUILD_ROOT}/contrib/libs/llvm12/lib/Target/NVPTX
- contrib/libs/llvm12/lib/Target/NVPTX
- contrib/libs/llvm12/lib/Target/NVPTX/MCTargetDesc
+ ${ARCADIA_BUILD_ROOT}/contrib/libs/llvm12/lib/Target/NVPTX
+ contrib/libs/llvm12/lib/Target/NVPTX
+ contrib/libs/llvm12/lib/Target/NVPTX/MCTargetDesc
)
NO_COMPILER_WARNINGS()
diff --git a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTX.h b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTX.h
index c2fd090da0..3a356942d5 100644
--- a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTX.h
+++ b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTX.h
@@ -14,7 +14,7 @@
#ifndef LLVM_LIB_TARGET_NVPTX_NVPTX_H
#define LLVM_LIB_TARGET_NVPTX_NVPTX_H
-#include "llvm/IR/PassManager.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/Pass.h"
#include "llvm/Support/CodeGen.h"
@@ -48,24 +48,24 @@ FunctionPass *createNVPTXLowerAllocaPass();
MachineFunctionPass *createNVPTXPeephole();
MachineFunctionPass *createNVPTXProxyRegErasurePass();
-struct NVVMIntrRangePass : PassInfoMixin<NVVMIntrRangePass> {
- NVVMIntrRangePass();
- NVVMIntrRangePass(unsigned SmVersion) : SmVersion(SmVersion) {}
- PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
-
-private:
- unsigned SmVersion;
-};
-
-struct NVVMReflectPass : PassInfoMixin<NVVMReflectPass> {
- NVVMReflectPass();
- NVVMReflectPass(unsigned SmVersion) : SmVersion(SmVersion) {}
- PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
-
-private:
- unsigned SmVersion;
-};
-
+struct NVVMIntrRangePass : PassInfoMixin<NVVMIntrRangePass> {
+ NVVMIntrRangePass();
+ NVVMIntrRangePass(unsigned SmVersion) : SmVersion(SmVersion) {}
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+
+private:
+ unsigned SmVersion;
+};
+
+struct NVVMReflectPass : PassInfoMixin<NVVMReflectPass> {
+ NVVMReflectPass();
+ NVVMReflectPass(unsigned SmVersion) : SmVersion(SmVersion) {}
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+
+private:
+ unsigned SmVersion;
+};
+
namespace NVPTX {
enum DrvInterface {
NVCL,
diff --git a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 38844ff4dd..e4d1256798 100644
--- a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -1302,8 +1302,8 @@ NVPTXAsmPrinter::getPTXFundamentalTypeStr(Type *Ty, bool useB4PTR) const {
return "b32";
else
return "u32";
- default:
- break;
+ default:
+ break;
}
llvm_unreachable("unexpected type");
}
diff --git a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXFrameLowering.cpp b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXFrameLowering.cpp
index 024e51e5f4..a14afad671 100644
--- a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXFrameLowering.cpp
+++ b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXFrameLowering.cpp
@@ -63,13 +63,13 @@ void NVPTXFrameLowering::emitPrologue(MachineFunction &MF,
}
}
-StackOffset
-NVPTXFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
- Register &FrameReg) const {
+StackOffset
+NVPTXFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
+ Register &FrameReg) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
FrameReg = NVPTX::VRDepot;
- return StackOffset::getFixed(MFI.getObjectOffset(FI) -
- getOffsetOfLocalArea());
+ return StackOffset::getFixed(MFI.getObjectOffset(FI) -
+ getOffsetOfLocalArea());
}
void NVPTXFrameLowering::emitEpilogue(MachineFunction &MF,
diff --git a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXFrameLowering.h b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXFrameLowering.h
index a5d49ac3ab..93f24fca40 100644
--- a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXFrameLowering.h
+++ b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXFrameLowering.h
@@ -14,7 +14,7 @@
#define LLVM_LIB_TARGET_NVPTX_NVPTXFRAMELOWERING_H
#include "llvm/CodeGen/TargetFrameLowering.h"
-#include "llvm/Support/TypeSize.h"
+#include "llvm/Support/TypeSize.h"
namespace llvm {
@@ -25,8 +25,8 @@ public:
bool hasFP(const MachineFunction &MF) const override;
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
- StackOffset getFrameIndexReference(const MachineFunction &MF, int FI,
- Register &FrameReg) const override;
+ StackOffset getFrameIndexReference(const MachineFunction &MF, int FI,
+ Register &FrameReg) const override;
MachineBasicBlock::iterator
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
diff --git a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 08f4ab87c6..c4df5dc36f 100644
--- a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -700,11 +700,11 @@ static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget,
bool IsKernelFn = isKernelFunction(F->getFunction());
- // We use getUnderlyingObjects() here instead of getUnderlyingObject() mainly
+ // We use getUnderlyingObjects() here instead of getUnderlyingObject() mainly
// because the former looks through phi nodes while the latter does not. We
// need to look through phi nodes to handle pointer induction variables.
SmallVector<const Value *, 8> Objs;
- getUnderlyingObjects(N->getMemOperand()->getValue(), Objs);
+ getUnderlyingObjects(N->getMemOperand()->getValue(), Objs);
return all_of(Objs, [&](const Value *V) {
if (auto *A = dyn_cast<const Argument>(V))
@@ -2854,7 +2854,7 @@ bool NVPTXDAGToDAGISel::tryTextureIntrinsic(SDNode *N) {
}
// Copy over operands
- SmallVector<SDValue, 8> Ops(drop_begin(N->ops()));
+ SmallVector<SDValue, 8> Ops(drop_begin(N->ops()));
Ops.push_back(N->getOperand(0)); // Move chain to the back.
ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops));
@@ -3363,7 +3363,7 @@ bool NVPTXDAGToDAGISel::trySurfaceIntrinsic(SDNode *N) {
}
// Copy over operands
- SmallVector<SDValue, 8> Ops(drop_begin(N->ops()));
+ SmallVector<SDValue, 8> Ops(drop_begin(N->ops()));
Ops.push_back(N->getOperand(0)); // Move chain to the back.
ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops));
diff --git a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXISelLowering.cpp b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 8860e90f28..753f3bf777 100644
--- a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -19,7 +19,7 @@
#include "NVPTXTargetObjectFile.h"
#include "NVPTXUtilities.h"
#include "llvm/ADT/APInt.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/Analysis.h"
@@ -65,7 +65,7 @@
using namespace llvm;
-static std::atomic<unsigned> GlobalUniqueCallSite;
+static std::atomic<unsigned> GlobalUniqueCallSite;
static cl::opt<bool> sched4reg(
"nvptx-sched4reg",
@@ -1243,7 +1243,7 @@ NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
std::string NVPTXTargetLowering::getPrototype(
const DataLayout &DL, Type *retTy, const ArgListTy &Args,
const SmallVectorImpl<ISD::OutputArg> &Outs, MaybeAlign retAlignment,
- const CallBase &CB, unsigned UniqueCallSite) const {
+ const CallBase &CB, unsigned UniqueCallSite) const {
auto PtrVT = getPointerTy(DL);
bool isABI = (STI.getSmVersion() >= 20);
@@ -1252,7 +1252,7 @@ std::string NVPTXTargetLowering::getPrototype(
return "";
std::stringstream O;
- O << "prototype_" << UniqueCallSite << " : .callprototype ";
+ O << "prototype_" << UniqueCallSite << " : .callprototype ";
if (retTy->getTypeID() == Type::VoidTyID) {
O << "()";
@@ -1422,9 +1422,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (!isABI)
return Chain;
- unsigned UniqueCallSite = GlobalUniqueCallSite.fetch_add(1);
+ unsigned UniqueCallSite = GlobalUniqueCallSite.fetch_add(1);
SDValue tempChain = Chain;
- Chain = DAG.getCALLSEQ_START(Chain, UniqueCallSite, 0, dl);
+ Chain = DAG.getCALLSEQ_START(Chain, UniqueCallSite, 0, dl);
SDValue InFlag = Chain.getValue(1);
unsigned paramCount = 0;
@@ -1679,8 +1679,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// The prototype is embedded in a string and put as the operand for a
// CallPrototype SDNode which will print out to the value of the string.
SDVTList ProtoVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- std::string Proto =
- getPrototype(DL, RetTy, Args, Outs, retAlignment, *CB, UniqueCallSite);
+ std::string Proto =
+ getPrototype(DL, RetTy, Args, Outs, retAlignment, *CB, UniqueCallSite);
const char *ProtoStr =
nvTM->getManagedStrPool()->getManagedString(Proto.c_str())->c_str();
SDValue ProtoOps[] = {
@@ -1736,8 +1736,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (isIndirectCall) {
SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue PrototypeOps[] = {
- Chain, DAG.getConstant(UniqueCallSite, dl, MVT::i32), InFlag};
+ SDValue PrototypeOps[] = {
+ Chain, DAG.getConstant(UniqueCallSite, dl, MVT::i32), InFlag};
Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps);
InFlag = Chain.getValue(1);
}
@@ -1833,9 +1833,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
}
- Chain = DAG.getCALLSEQ_END(
- Chain, DAG.getIntPtrConstant(UniqueCallSite, dl, true),
- DAG.getIntPtrConstant(UniqueCallSite + 1, dl, true), InFlag, dl);
+ Chain = DAG.getCALLSEQ_END(
+ Chain, DAG.getIntPtrConstant(UniqueCallSite, dl, true),
+ DAG.getIntPtrConstant(UniqueCallSite + 1, dl, true), InFlag, dl);
InFlag = Chain.getValue(1);
// Append ProxyReg instructions to the chain to make sure that `callseq_end`
@@ -2437,7 +2437,7 @@ static bool isImageOrSamplerVal(const Value *arg, const Module *context) {
if (!STy || STy->isLiteral())
return false;
- return llvm::is_contained(specialTypes, STy->getName());
+ return llvm::is_contained(specialTypes, STy->getName());
}
SDValue NVPTXTargetLowering::LowerFormalArguments(
@@ -2588,8 +2588,8 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
// Extend the element if necessary (e.g. an i8 is loaded
// into an i16 register)
if (Ins[InsIdx].VT.isInteger() &&
- Ins[InsIdx].VT.getFixedSizeInBits() >
- LoadVT.getFixedSizeInBits()) {
+ Ins[InsIdx].VT.getFixedSizeInBits() >
+ LoadVT.getFixedSizeInBits()) {
unsigned Extend = Ins[InsIdx].Flags.isSExt() ? ISD::SIGN_EXTEND
: ISD::ZERO_EXTEND;
Elt = DAG.getNode(Extend, dl, Ins[InsIdx].VT, Elt);
@@ -4563,13 +4563,13 @@ static bool IsMulWideOperandDemotable(SDValue Op,
if (Op.getOpcode() == ISD::SIGN_EXTEND ||
Op.getOpcode() == ISD::SIGN_EXTEND_INREG) {
EVT OrigVT = Op.getOperand(0).getValueType();
- if (OrigVT.getFixedSizeInBits() <= OptSize) {
+ if (OrigVT.getFixedSizeInBits() <= OptSize) {
S = Signed;
return true;
}
} else if (Op.getOpcode() == ISD::ZERO_EXTEND) {
EVT OrigVT = Op.getOperand(0).getValueType();
- if (OrigVT.getFixedSizeInBits() <= OptSize) {
+ if (OrigVT.getFixedSizeInBits() <= OptSize) {
S = Unsigned;
return true;
}
diff --git a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXISelLowering.h b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXISelLowering.h
index 13829b924d..660ca65e39 100644
--- a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -491,8 +491,8 @@ public:
std::string getPrototype(const DataLayout &DL, Type *, const ArgListTy &,
const SmallVectorImpl<ISD::OutputArg> &,
- MaybeAlign retAlignment, const CallBase &CB,
- unsigned UniqueCallSite) const;
+ MaybeAlign retAlignment, const CallBase &CB,
+ unsigned UniqueCallSite) const;
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
diff --git a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXInstrFormats.td b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXInstrFormats.td
index 9220f4766d..9410b463bb 100644
--- a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXInstrFormats.td
+++ b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXInstrFormats.td
@@ -31,14 +31,14 @@ class NVPTXInst<dag outs, dag ins, string asmstr, list<dag> pattern>
// TSFlagFields
bits<4> VecInstType = VecNOP.Value;
- bit IsSimpleMove = false;
- bit IsLoad = false;
- bit IsStore = false;
+ bit IsSimpleMove = false;
+ bit IsLoad = false;
+ bit IsStore = false;
- bit IsTex = false;
- bit IsSust = false;
- bit IsSurfTexQuery = false;
- bit IsTexModeUnified = false;
+ bit IsTex = false;
+ bit IsSust = false;
+ bit IsSurfTexQuery = false;
+ bit IsTexModeUnified = false;
// The following field is encoded as log2 of the vector size minus one,
// with 0 meaning the operation is not a surface instruction. For example,
@@ -46,13 +46,13 @@ class NVPTXInst<dag outs, dag ins, string asmstr, list<dag> pattern>
// 2**(2-1) = 2.
bits<2> IsSuld = 0;
- let TSFlags{3...0} = VecInstType;
- let TSFlags{4...4} = IsSimpleMove;
- let TSFlags{5...5} = IsLoad;
- let TSFlags{6...6} = IsStore;
- let TSFlags{7} = IsTex;
- let TSFlags{9...8} = IsSuld;
- let TSFlags{10} = IsSust;
- let TSFlags{11} = IsSurfTexQuery;
- let TSFlags{12} = IsTexModeUnified;
+ let TSFlags{3...0} = VecInstType;
+ let TSFlags{4...4} = IsSimpleMove;
+ let TSFlags{5...5} = IsLoad;
+ let TSFlags{6...6} = IsStore;
+ let TSFlags{7} = IsTex;
+ let TSFlags{9...8} = IsSuld;
+ let TSFlags{10} = IsSust;
+ let TSFlags{11} = IsSurfTexQuery;
+ let TSFlags{12} = IsTexModeUnified;
}
diff --git a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXInstrInfo.td b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXInstrInfo.td
index 381ed4dd68..345c4c5142 100644
--- a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -13,7 +13,7 @@
include "NVPTXInstrFormats.td"
// A NOP instruction
-let hasSideEffects = false in {
+let hasSideEffects = false in {
def NOP : NVPTXInst<(outs), (ins), "", []>;
}
@@ -137,7 +137,7 @@ def do_SQRTF32_RN : Predicate<"usePrecSqrtF32()">;
def hasHWROT32 : Predicate<"Subtarget->hasHWROT32()">;
def noHWROT32 : Predicate<"!Subtarget->hasHWROT32()">;
-def True : Predicate<"true">;
+def True : Predicate<"true">;
def hasPTX31 : Predicate<"Subtarget->getPTXVersion() >= 31">;
def hasPTX60 : Predicate<"Subtarget->getPTXVersion() >= 60">;
@@ -407,7 +407,7 @@ multiclass F2<string OpcStr, SDNode OpNode> {
// Type Conversion
//-----------------------------------
-let hasSideEffects = false in {
+let hasSideEffects = false in {
// Generate a cvt to the given type from all possible types. Each instance
// takes a CvtMode immediate that defines the conversion mode to use. It can
// be CvtNONE to omit a conversion mode.
@@ -1022,12 +1022,12 @@ multiclass FMA_F16<string OpcStr, RegisterClass RC, Predicate Pred> {
}
defm FMA16_ftz : FMA_F16<"fma.rn.ftz.f16", Float16Regs, doF32FTZ>;
-defm FMA16 : FMA_F16<"fma.rn.f16", Float16Regs, True>;
+defm FMA16 : FMA_F16<"fma.rn.f16", Float16Regs, True>;
defm FMA16x2_ftz : FMA_F16<"fma.rn.ftz.f16x2", Float16x2Regs, doF32FTZ>;
-defm FMA16x2 : FMA_F16<"fma.rn.f16x2", Float16x2Regs, True>;
+defm FMA16x2 : FMA_F16<"fma.rn.f16x2", Float16x2Regs, True>;
defm FMA32_ftz : FMA<"fma.rn.ftz.f32", Float32Regs, f32imm, doF32FTZ>;
-defm FMA32 : FMA<"fma.rn.f32", Float32Regs, f32imm, True>;
-defm FMA64 : FMA<"fma.rn.f64", Float64Regs, f64imm, True>;
+defm FMA32 : FMA<"fma.rn.f32", Float32Regs, f32imm, True>;
+defm FMA64 : FMA<"fma.rn.f64", Float64Regs, f64imm, True>;
// sin/cos
def SINF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
@@ -1367,7 +1367,7 @@ multiclass BFE<string TyStr, RegisterClass RC> {
!strconcat("bfe.", TyStr, " \t$d, $a, $b, $c;"), []>;
}
-let hasSideEffects = false in {
+let hasSideEffects = false in {
defm BFE_S32 : BFE<"s32", Int32Regs>;
defm BFE_U32 : BFE<"u32", Int32Regs>;
defm BFE_S64 : BFE<"s64", Int64Regs>;
@@ -1381,7 +1381,7 @@ let hasSideEffects = false in {
// FIXME: This doesn't cover versions of set and setp that combine with a
// boolean predicate, e.g. setp.eq.and.b16.
-let hasSideEffects = false in {
+let hasSideEffects = false in {
multiclass SETP<string TypeStr, RegisterClass RC, Operand ImmCls> {
def rr :
NVPTXInst<(outs Int1Regs:$dst), (ins RC:$a, RC:$b, CmpMode:$cmp),
@@ -1427,7 +1427,7 @@ def SETP_f16x2rr :
// "set.CmpOp{.ftz}.dtype.stype", where dtype is the type of the destination
// reg, either u32, s32, or f32. Anyway these aren't used at the moment.
-let hasSideEffects = false in {
+let hasSideEffects = false in {
multiclass SET<string TypeStr, RegisterClass RC, Operand ImmCls> {
def rr : NVPTXInst<(outs Int32Regs:$dst),
(ins RC:$a, RC:$b, CmpMode:$cmp),
@@ -1462,7 +1462,7 @@ defm SET_f64 : SET<"f64", Float64Regs, f64imm>;
// selp instructions that don't have any pattern matches; we explicitly use
// them within this file.
-let hasSideEffects = false in {
+let hasSideEffects = false in {
multiclass SELP<string TypeStr, RegisterClass RC, Operand ImmCls> {
def rr : NVPTXInst<(outs RC:$dst),
(ins RC:$a, RC:$b, Int1Regs:$p),
@@ -1572,7 +1572,7 @@ def MOV_ADDR64 : NVPTXInst<(outs Int64Regs:$dst), (ins imem:$a),
[(set Int64Regs:$dst, (Wrapper tglobaladdr:$a))]>;
// Get pointer to local stack.
-let hasSideEffects = false in {
+let hasSideEffects = false in {
def MOV_DEPOT_ADDR : NVPTXInst<(outs Int32Regs:$d), (ins i32imm:$num),
"mov.u32 \t$d, __local_depot$num;", []>;
def MOV_DEPOT_ADDR_64 : NVPTXInst<(outs Int64Regs:$d), (ins i32imm:$num),
@@ -1988,7 +1988,7 @@ def ProxyReg :
SDNode<"NVPTXISD::ProxyReg", SDTProxyRegProfile,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
-let mayLoad = true in {
+let mayLoad = true in {
class LoadParamMemInst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs regclass:$dst), (ins i32imm:$b),
!strconcat("ld.param", opstr, " \t$dst, [retval0+$b];"),
@@ -2013,7 +2013,7 @@ class LoadParamRegInst<NVPTXRegClass regclass, string opstr> :
!strconcat("mov", opstr, " \t$dst, retval$b;"),
[(set regclass:$dst, (LoadParam (i32 0), (i32 imm:$b)))]>;
-let mayStore = true in {
+let mayStore = true in {
class StoreParamInst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b),
!strconcat("st.param", opstr, " \t[param$a+$b], $val;"),
@@ -2823,7 +2823,7 @@ def : Pat<(select Int32Regs:$pred, Float64Regs:$a, Float64Regs:$b),
(SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
-let hasSideEffects = false in {
+let hasSideEffects = false in {
// pack a set of smaller int registers to a larger int register
def V4I16toI64 : NVPTXInst<(outs Int64Regs:$d),
(ins Int16Regs:$s1, Int16Regs:$s2,
@@ -2856,7 +2856,7 @@ let hasSideEffects = false in {
}
-let hasSideEffects = false in {
+let hasSideEffects = false in {
// Extract element of f16x2 register. PTX does not provide any way
// to access elements of f16x2 vector directly, so we need to
// extract it using a temporary register.
@@ -2899,7 +2899,7 @@ let hasSideEffects = false in {
}
// Count leading zeros
-let hasSideEffects = false in {
+let hasSideEffects = false in {
def CLZr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a),
"clz.b32 \t$d, $a;", []>;
def CLZr64 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
@@ -2937,7 +2937,7 @@ def : Pat<(i32 (zext (i16 (ctlz Int16Regs:$a)))),
(SUBi32ri (CLZr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), 16)>;
// Population count
-let hasSideEffects = false in {
+let hasSideEffects = false in {
def POPCr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a),
"popc.b32 \t$d, $a;", []>;
def POPCr64 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
diff --git a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXIntrinsics.td b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXIntrinsics.td
index 8ccd47c0fc..d9d586a0a6 100644
--- a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -51,19 +51,19 @@ def ptx : PTX;
// Generates list of n sequential register names.
// E.g. RegNames<3,"r">.ret -> ["r0", "r1", "r2" ]
class RegSeq<int n, string prefix> {
- list<string> ret = !if(n, !listconcat(RegSeq<!sub(n, 1), prefix>.ret,
- [prefix # !sub(n, 1)]),
+ list<string> ret = !if(n, !listconcat(RegSeq<!sub(n, 1), prefix>.ret,
+ [prefix # !sub(n, 1)]),
[]);
}
class THREADMASK_INFO<bit sync> {
- list<bit> ret = !if(sync, [0, 1], [0]);
+ list<bit> ret = !if(sync, [0, 1], [0]);
}
//-----------------------------------
// Synchronization and shuffle functions
//-----------------------------------
-let isConvergent = true in {
+let isConvergent = true in {
def INT_BARRIER0 : NVPTXInst<(outs), (ins),
"bar.sync \t0;",
[(int_nvvm_barrier0)]>;
@@ -173,12 +173,12 @@ class SHFL_INSTR<bit sync, string mode, string reg, bit return_pred,
)];
}
-foreach sync = [false, true] in {
+foreach sync = [false, true] in {
foreach mode = ["up", "down", "bfly", "idx"] in {
foreach regclass = ["i32", "f32"] in {
- foreach return_pred = [false, true] in {
- foreach offset_imm = [false, true] in {
- foreach mask_imm = [false, true] in {
+ foreach return_pred = [false, true] in {
+ foreach offset_imm = [false, true] in {
+ foreach mask_imm = [false, true] in {
foreach threadmask_imm = THREADMASK_INFO<sync>.ret in {
def : SHFL_INSTR<sync, mode, regclass, return_pred,
offset_imm, mask_imm, threadmask_imm>,
@@ -274,7 +274,7 @@ defm MATCH_ALLP_SYNC_32 : MATCH_ALLP_SYNC<Int32Regs, "b32", int_nvvm_match_all_s
defm MATCH_ALLP_SYNC_64 : MATCH_ALLP_SYNC<Int64Regs, "b64", int_nvvm_match_all_sync_i64p,
i64imm>;
-} // isConvergent = true
+} // isConvergent = true
//-----------------------------------
// Explicit Memory Fence Functions
@@ -1548,7 +1548,7 @@ multiclass ATOM2N_impl<string OpStr, string IntTypeStr, string TypeStr,
!cast<Intrinsic>(
"int_nvvm_atomic_" # OpStr
# "_" # SpaceStr # "_" # IntTypeStr
- # !if(!empty(ScopeStr), "", "_" # ScopeStr)),
+ # !if(!empty(ScopeStr), "", "_" # ScopeStr)),
regclass, ImmType, Imm, ImmTy, Preds>;
}
multiclass ATOM3N_impl<string OpStr, string IntTypeStr, string TypeStr,
@@ -1562,7 +1562,7 @@ multiclass ATOM3N_impl<string OpStr, string IntTypeStr, string TypeStr,
!cast<Intrinsic>(
"int_nvvm_atomic_" # OpStr
# "_" # SpaceStr # "_" # IntTypeStr
- # !if(!empty(ScopeStr), "", "_" # ScopeStr)),
+ # !if(!empty(ScopeStr), "", "_" # ScopeStr)),
regclass, ImmType, Imm, ImmTy, Preds>;
}
@@ -2131,7 +2131,7 @@ def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt),
(ROTL32reg_sw Int32Regs:$src, Int32Regs:$amt)>,
Requires<[noHWROT32]> ;
-let hasSideEffects = false in {
+let hasSideEffects = false in {
def GET_LO_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
!strconcat("{{\n\t",
".reg .b32 %dummy;\n\t",
@@ -2147,7 +2147,7 @@ let hasSideEffects = false in {
[]> ;
}
-let hasSideEffects = false in {
+let hasSideEffects = false in {
def PACK_TWO_INT32
: NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi),
"mov.b64 \t$dst, {{$lo, $hi}};", []> ;
@@ -2159,7 +2159,7 @@ def : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src),
// Funnel shift, requires >= sm_32. Does not trap if amt is out of range, so
// no side effects.
-let hasSideEffects = false in {
+let hasSideEffects = false in {
def SHF_L_WRAP_B32_IMM
: NVPTXInst<(outs Int32Regs:$dst),
(ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
@@ -2242,7 +2242,7 @@ def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
// also defined in NVPTXReplaceImageHandles.cpp
// texmode_independent
-let IsTex = true, IsTexModeUnified = false in {
+let IsTex = true, IsTexModeUnified = false in {
// Texture fetch instructions using handles
def TEX_1D_F32_S32
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
@@ -2925,7 +2925,7 @@ def TLD4_A_2D_U32_F32
// texmode_unified
-let IsTex = true, IsTexModeUnified = true in {
+let IsTex = true, IsTexModeUnified = true in {
// Texture fetch instructions using handles
def TEX_UNIFIED_1D_F32_S32
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
@@ -3610,7 +3610,7 @@ def TLD4_UNIFIED_A_2D_U32_F32
//=== Surface load instructions
// .clamp variant
-let IsSuld = true in {
+let IsSuld = true in {
def SULD_1D_I8_CLAMP
: NVPTXInst<(outs Int16Regs:$r),
(ins Int64Regs:$s, Int32Regs:$x),
@@ -3922,7 +3922,7 @@ def SULD_3D_V4I32_CLAMP
// .trap variant
-let IsSuld = true in {
+let IsSuld = true in {
def SULD_1D_I8_TRAP
: NVPTXInst<(outs Int16Regs:$r),
(ins Int64Regs:$s, Int32Regs:$x),
@@ -4233,7 +4233,7 @@ def SULD_3D_V4I32_TRAP
}
// .zero variant
-let IsSuld = true in {
+let IsSuld = true in {
def SULD_1D_I8_ZERO
: NVPTXInst<(outs Int16Regs:$r),
(ins Int64Regs:$s, Int32Regs:$x),
@@ -4547,7 +4547,7 @@ def SULD_3D_V4I32_ZERO
// Texture Query Intrinsics
//-----------------------------------
-let IsSurfTexQuery = true in {
+let IsSurfTexQuery = true in {
def TXQ_CHANNEL_ORDER
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
"txq.channel_order.b32 \t$d, [$a];",
@@ -4604,7 +4604,7 @@ def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a),
// Surface Query Intrinsics
//-----------------------------------
-let IsSurfTexQuery = true in {
+let IsSurfTexQuery = true in {
def SUQ_CHANNEL_ORDER
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
"suq.channel_order.b32 \t$d, [$a];",
@@ -4663,7 +4663,7 @@ def ISTYPEP_TEXTURE
//===- Surface Stores -----------------------------------------------------===//
-let IsSust = true in {
+let IsSust = true in {
// Unformatted
// .clamp variant
def SUST_B_1D_B8_CLAMP
@@ -7361,13 +7361,13 @@ class WMMA_REGINFO<WMMA_REGS r>
!eq(ptx_elt_type, "b1") : Int32Regs);
// Instruction input/output arguments for the fragment.
- list<NVPTXRegClass> ptx_regs = !listsplat(regclass, !size(regs));
+ list<NVPTXRegClass> ptx_regs = !listsplat(regclass, !size(regs));
// List of register names for the fragment -- ["ra0", "ra1",...]
list<string> reg_names = RegSeq<!size(ptx_regs), "r"#frag>.ret;
// Generates "{{$r0, $r1,.... $rN-1}}" for use in asm string construction.
- string regstring = "{{$" # !interleave(reg_names, ", $") # "}}";
+ string regstring = "{{$" # !interleave(reg_names, ", $") # "}}";
// Predicates for particular fragment variant. Technically those are
// per-instruction predicates, but currently all fragments that can be used in
@@ -7450,13 +7450,13 @@ class WMMA_LOAD<WMMA_REGINFO Frag, string Layout, string Space, bit WithStride,
// To match the right intrinsic, we need to build AS-constrained PatFrag.
// Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
dag PFOperands = !if(WithStride, (ops node:$src, node:$ldm), (ops node:$src));
- dag PFOperandsIntr = !if(WithStride, (Intr node:$src, node:$ldm), (Intr node:$src));
+ dag PFOperandsIntr = !if(WithStride, (Intr node:$src, node:$ldm), (Intr node:$src));
// Build PatFrag that only matches particular address space.
PatFrag IntrFrag = PatFrag<PFOperands,
- PFOperandsIntr,
+ PFOperandsIntr,
!cond(!eq(Space, ".shared"): AS_match.shared,
!eq(Space, ".global"): AS_match.global,
- true: AS_match.generic)>;
+ true: AS_match.generic)>;
// Build AS-constrained pattern.
let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
@@ -7491,14 +7491,14 @@ class WMMA_STORE_D<WMMA_REGINFO Frag, string Layout, string Space,
// To match the right intrinsic, we need to build AS-constrained PatFrag.
// Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
dag PFOperands = !con((ops node:$dst),
- !dag(ops, !listsplat(node, !size(Frag.regs)), Frag.reg_names),
+ !dag(ops, !listsplat(node, !size(Frag.regs)), Frag.reg_names),
!if(WithStride, (ops node:$ldm), (ops)));
// Build PatFrag that only matches particular address space.
PatFrag IntrFrag = PatFrag<PFOperands,
!foreach(tmp, PFOperands, !subst(ops, Intr, tmp)),
!cond(!eq(Space, ".shared"): AS_match.shared,
!eq(Space, ".global"): AS_match.global,
- true: AS_match.generic)>;
+ true: AS_match.generic)>;
// Build AS-constrained pattern.
let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
@@ -7519,14 +7519,14 @@ class WMMA_STORE_D<WMMA_REGINFO Frag, string Layout, string Space,
// Create all load/store variants
defset list<WMMA_INSTR> MMA_LDSTs = {
foreach layout = ["row", "col"] in {
- foreach stride = [false, true] in {
+ foreach stride = [false, true] in {
foreach space = [".global", ".shared", ""] in {
foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in {
foreach frag = NVVM_MMA_OPS.all_ld_ops in
- if NVVM_MMA_SUPPORTED<[frag], layout>.ret then
+ if NVVM_MMA_SUPPORTED<[frag], layout>.ret then
def : WMMA_LOAD<WMMA_REGINFO<frag>, layout, space, stride, addr>;
foreach frag = NVVM_MMA_OPS.all_st_ops in
- if NVVM_MMA_SUPPORTED<[frag], layout>.ret then
+ if NVVM_MMA_SUPPORTED<[frag], layout>.ret then
def : WMMA_STORE_D<WMMA_REGINFO<frag>, layout, space, stride, addr>;
} // addr
} // space
@@ -7584,7 +7584,7 @@ defset list<WMMA_INSTR> MMAs = {
foreach layout_b = ["row", "col"] in {
foreach satf = [0, 1] in {
foreach op = NVVM_MMA_OPS.all_mma_ops in {
- if NVVM_MMA_SUPPORTED<op, layout_a, layout_b, satf>.ret then {
+ if NVVM_MMA_SUPPORTED<op, layout_a, layout_b, satf>.ret then {
def : WMMA_MMA<WMMA_REGINFO<op[0]>,
WMMA_REGINFO<op[1]>,
WMMA_REGINFO<op[2]>,
diff --git a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXLowerArgs.cpp b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXLowerArgs.cpp
index fd58ff1378..0048984968 100644
--- a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXLowerArgs.cpp
+++ b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXLowerArgs.cpp
@@ -172,12 +172,12 @@ void NVPTXLowerArgs::handleByValParam(Argument *Arg) {
Value *ArgInParam = new AddrSpaceCastInst(
Arg, PointerType::get(StructType, ADDRESS_SPACE_PARAM), Arg->getName(),
FirstInst);
- // Be sure to propagate alignment to this load; LLVM doesn't know that NVPTX
- // addrspacecast preserves alignment. Since params are constant, this load is
- // definitely not volatile.
+ // Be sure to propagate alignment to this load; LLVM doesn't know that NVPTX
+ // addrspacecast preserves alignment. Since params are constant, this load is
+ // definitely not volatile.
LoadInst *LI =
- new LoadInst(StructType, ArgInParam, Arg->getName(),
- /*isVolatile=*/false, AllocA->getAlign(), FirstInst);
+ new LoadInst(StructType, ArgInParam, Arg->getName(),
+ /*isVolatile=*/false, AllocA->getAlign(), FirstInst);
new StoreInst(LI, AllocA, FirstInst);
}
@@ -218,7 +218,7 @@ bool NVPTXLowerArgs::runOnKernelFunction(Function &F) {
for (auto &I : B) {
if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
if (LI->getType()->isPointerTy()) {
- Value *UO = getUnderlyingObject(LI->getPointerOperand());
+ Value *UO = getUnderlyingObject(LI->getPointerOperand());
if (Argument *Arg = dyn_cast<Argument>(UO)) {
if (Arg->hasByValAttr()) {
// LI is a load from a pointer within a byval kernel parameter.
diff --git a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
index 756355f75e..9829fa416b 100644
--- a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
+++ b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
@@ -69,8 +69,8 @@ bool NVPTXPrologEpilogPass::runOnMachineFunction(MachineFunction &MF) {
"operand of a DBG_VALUE machine instruction");
Register Reg;
int64_t Offset =
- TFI.getFrameIndexReference(MF, MI.getOperand(0).getIndex(), Reg)
- .getFixed();
+ TFI.getFrameIndexReference(MF, MI.getOperand(0).getIndex(), Reg)
+ .getFixed();
MI.getOperand(0).ChangeToRegister(Reg, /*isDef=*/false);
MI.getOperand(0).setIsDebug();
auto *DIExpr = DIExpression::prepend(
diff --git a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXRegisterInfo.td b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXRegisterInfo.td
index 19895a20ba..7d6dff48d1 100644
--- a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXRegisterInfo.td
+++ b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXRegisterInfo.td
@@ -30,7 +30,7 @@ def VRDepot : NVPTXReg<"%Depot">;
// We use virtual registers, but define a few physical registers here to keep
// SDAG and the MachineInstr layers happy.
-foreach i = 0...4 in {
+foreach i = 0...4 in {
def P#i : NVPTXReg<"%p"#i>; // Predicate
def RS#i : NVPTXReg<"%rs"#i>; // 16-bit
def R#i : NVPTXReg<"%r"#i>; // 32-bit
@@ -47,7 +47,7 @@ foreach i = 0...4 in {
def da#i : NVPTXReg<"%da"#i>;
}
-foreach i = 0...31 in {
+foreach i = 0...31 in {
def ENVREG#i : NVPTXReg<"%envreg"#i>;
}
diff --git a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXSubtarget.cpp b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXSubtarget.cpp
index 05c20369ab..9d998619f8 100644
--- a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXSubtarget.cpp
+++ b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXSubtarget.cpp
@@ -35,7 +35,7 @@ NVPTXSubtarget &NVPTXSubtarget::initializeSubtargetDependencies(StringRef CPU,
// Provide the default CPU if we don't have one.
TargetName = std::string(CPU.empty() ? "sm_20" : CPU);
- ParseSubtargetFeatures(TargetName, /*TuneCPU*/ TargetName, FS);
+ ParseSubtargetFeatures(TargetName, /*TuneCPU*/ TargetName, FS);
// Set default to PTX 3.2 (CUDA 5.5)
if (PTXVersion == 0) {
@@ -48,9 +48,9 @@ NVPTXSubtarget &NVPTXSubtarget::initializeSubtargetDependencies(StringRef CPU,
NVPTXSubtarget::NVPTXSubtarget(const Triple &TT, const std::string &CPU,
const std::string &FS,
const NVPTXTargetMachine &TM)
- : NVPTXGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), PTXVersion(0),
- SmVersion(20), TM(TM), InstrInfo(),
- TLInfo(TM, initializeSubtargetDependencies(CPU, FS)), FrameLowering() {}
+ : NVPTXGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), PTXVersion(0),
+ SmVersion(20), TM(TM), InstrInfo(),
+ TLInfo(TM, initializeSubtargetDependencies(CPU, FS)), FrameLowering() {}
bool NVPTXSubtarget::hasImageHandles() const {
// Enable handles for Kepler+, where CUDA supports indirect surfaces and
diff --git a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXSubtarget.h b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXSubtarget.h
index 9a249d3da3..886f200160 100644
--- a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -83,7 +83,7 @@ public:
unsigned getPTXVersion() const { return PTXVersion; }
NVPTXSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS);
- void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
+ void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
};
} // End llvm namespace
diff --git a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index f1a82f1cf6..57fe1b8fa8 100644
--- a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -24,7 +24,7 @@
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/Pass.h"
-#include "llvm/Passes/PassBuilder.h"
+#include "llvm/Passes/PassBuilder.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetMachine.h"
@@ -171,11 +171,11 @@ public:
void addFastRegAlloc() override;
void addOptimizedRegAlloc() override;
- bool addRegAssignAndRewriteFast() override {
+ bool addRegAssignAndRewriteFast() override {
llvm_unreachable("should not be used");
}
- bool addRegAssignAndRewriteOptimized() override {
+ bool addRegAssignAndRewriteOptimized() override {
llvm_unreachable("should not be used");
}
@@ -206,32 +206,32 @@ void NVPTXTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
});
}
-void NVPTXTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB,
- bool DebugPassManager) {
- PB.registerPipelineParsingCallback(
- [](StringRef PassName, FunctionPassManager &PM,
- ArrayRef<PassBuilder::PipelineElement>) {
- if (PassName == "nvvm-reflect") {
- PM.addPass(NVVMReflectPass());
- return true;
- }
- if (PassName == "nvvm-intr-range") {
- PM.addPass(NVVMIntrRangePass());
- return true;
- }
- return false;
- });
-
- PB.registerPipelineStartEPCallback(
- [this, DebugPassManager](ModulePassManager &PM,
- PassBuilder::OptimizationLevel Level) {
- FunctionPassManager FPM(DebugPassManager);
- FPM.addPass(NVVMReflectPass(Subtarget.getSmVersion()));
- FPM.addPass(NVVMIntrRangePass(Subtarget.getSmVersion()));
- PM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
- });
-}
-
+void NVPTXTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB,
+ bool DebugPassManager) {
+ PB.registerPipelineParsingCallback(
+ [](StringRef PassName, FunctionPassManager &PM,
+ ArrayRef<PassBuilder::PipelineElement>) {
+ if (PassName == "nvvm-reflect") {
+ PM.addPass(NVVMReflectPass());
+ return true;
+ }
+ if (PassName == "nvvm-intr-range") {
+ PM.addPass(NVVMIntrRangePass());
+ return true;
+ }
+ return false;
+ });
+
+ PB.registerPipelineStartEPCallback(
+ [this, DebugPassManager](ModulePassManager &PM,
+ PassBuilder::OptimizationLevel Level) {
+ FunctionPassManager FPM(DebugPassManager);
+ FPM.addPass(NVVMReflectPass(Subtarget.getSmVersion()));
+ FPM.addPass(NVVMIntrRangePass(Subtarget.getSmVersion()));
+ PM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+ });
+}
+
TargetTransformInfo
NVPTXTargetMachine::getTargetTransformInfo(const Function &F) {
return TargetTransformInfo(NVPTXTTIImpl(this, F));
diff --git a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXTargetMachine.h b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXTargetMachine.h
index bef541c2b2..2a2defe0bf 100644
--- a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXTargetMachine.h
+++ b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXTargetMachine.h
@@ -62,8 +62,8 @@ public:
}
void adjustPassManager(PassManagerBuilder &) override;
- void registerPassBuilderCallbacks(PassBuilder &PB,
- bool DebugPassManager) override;
+ void registerPassBuilderCallbacks(PassBuilder &PB,
+ bool DebugPassManager) override;
TargetTransformInfo getTargetTransformInfo(const Function &F) override;
diff --git a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
index d4b2ae3840..28662d9314 100644
--- a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@@ -111,263 +111,263 @@ bool NVPTXTTIImpl::isSourceOfDivergence(const Value *V) {
return false;
}
-// Convert NVVM intrinsics to target-generic LLVM code where possible.
-static Instruction *simplifyNvvmIntrinsic(IntrinsicInst *II, InstCombiner &IC) {
- // Each NVVM intrinsic we can simplify can be replaced with one of:
- //
- // * an LLVM intrinsic,
- // * an LLVM cast operation,
- // * an LLVM binary operation, or
- // * ad-hoc LLVM IR for the particular operation.
-
- // Some transformations are only valid when the module's
- // flush-denormals-to-zero (ftz) setting is true/false, whereas other
- // transformations are valid regardless of the module's ftz setting.
- enum FtzRequirementTy {
- FTZ_Any, // Any ftz setting is ok.
- FTZ_MustBeOn, // Transformation is valid only if ftz is on.
- FTZ_MustBeOff, // Transformation is valid only if ftz is off.
- };
- // Classes of NVVM intrinsics that can't be replaced one-to-one with a
- // target-generic intrinsic, cast op, or binary op but that we can nonetheless
- // simplify.
- enum SpecialCase {
- SPC_Reciprocal,
- };
-
- // SimplifyAction is a poor-man's variant (plus an additional flag) that
- // represents how to replace an NVVM intrinsic with target-generic LLVM IR.
- struct SimplifyAction {
- // Invariant: At most one of these Optionals has a value.
- Optional<Intrinsic::ID> IID;
- Optional<Instruction::CastOps> CastOp;
- Optional<Instruction::BinaryOps> BinaryOp;
- Optional<SpecialCase> Special;
-
- FtzRequirementTy FtzRequirement = FTZ_Any;
-
- SimplifyAction() = default;
-
- SimplifyAction(Intrinsic::ID IID, FtzRequirementTy FtzReq)
- : IID(IID), FtzRequirement(FtzReq) {}
-
- // Cast operations don't have anything to do with FTZ, so we skip that
- // argument.
- SimplifyAction(Instruction::CastOps CastOp) : CastOp(CastOp) {}
-
- SimplifyAction(Instruction::BinaryOps BinaryOp, FtzRequirementTy FtzReq)
- : BinaryOp(BinaryOp), FtzRequirement(FtzReq) {}
-
- SimplifyAction(SpecialCase Special, FtzRequirementTy FtzReq)
- : Special(Special), FtzRequirement(FtzReq) {}
- };
-
- // Try to generate a SimplifyAction describing how to replace our
- // IntrinsicInstr with target-generic LLVM IR.
- const SimplifyAction Action = [II]() -> SimplifyAction {
- switch (II->getIntrinsicID()) {
- // NVVM intrinsics that map directly to LLVM intrinsics.
- case Intrinsic::nvvm_ceil_d:
- return {Intrinsic::ceil, FTZ_Any};
- case Intrinsic::nvvm_ceil_f:
- return {Intrinsic::ceil, FTZ_MustBeOff};
- case Intrinsic::nvvm_ceil_ftz_f:
- return {Intrinsic::ceil, FTZ_MustBeOn};
- case Intrinsic::nvvm_fabs_d:
- return {Intrinsic::fabs, FTZ_Any};
- case Intrinsic::nvvm_fabs_f:
- return {Intrinsic::fabs, FTZ_MustBeOff};
- case Intrinsic::nvvm_fabs_ftz_f:
- return {Intrinsic::fabs, FTZ_MustBeOn};
- case Intrinsic::nvvm_floor_d:
- return {Intrinsic::floor, FTZ_Any};
- case Intrinsic::nvvm_floor_f:
- return {Intrinsic::floor, FTZ_MustBeOff};
- case Intrinsic::nvvm_floor_ftz_f:
- return {Intrinsic::floor, FTZ_MustBeOn};
- case Intrinsic::nvvm_fma_rn_d:
- return {Intrinsic::fma, FTZ_Any};
- case Intrinsic::nvvm_fma_rn_f:
- return {Intrinsic::fma, FTZ_MustBeOff};
- case Intrinsic::nvvm_fma_rn_ftz_f:
- return {Intrinsic::fma, FTZ_MustBeOn};
- case Intrinsic::nvvm_fmax_d:
- return {Intrinsic::maxnum, FTZ_Any};
- case Intrinsic::nvvm_fmax_f:
- return {Intrinsic::maxnum, FTZ_MustBeOff};
- case Intrinsic::nvvm_fmax_ftz_f:
- return {Intrinsic::maxnum, FTZ_MustBeOn};
- case Intrinsic::nvvm_fmin_d:
- return {Intrinsic::minnum, FTZ_Any};
- case Intrinsic::nvvm_fmin_f:
- return {Intrinsic::minnum, FTZ_MustBeOff};
- case Intrinsic::nvvm_fmin_ftz_f:
- return {Intrinsic::minnum, FTZ_MustBeOn};
- case Intrinsic::nvvm_round_d:
- return {Intrinsic::round, FTZ_Any};
- case Intrinsic::nvvm_round_f:
- return {Intrinsic::round, FTZ_MustBeOff};
- case Intrinsic::nvvm_round_ftz_f:
- return {Intrinsic::round, FTZ_MustBeOn};
- case Intrinsic::nvvm_sqrt_rn_d:
- return {Intrinsic::sqrt, FTZ_Any};
- case Intrinsic::nvvm_sqrt_f:
- // nvvm_sqrt_f is a special case. For most intrinsics, foo_ftz_f is the
- // ftz version, and foo_f is the non-ftz version. But nvvm_sqrt_f adopts
- // the ftz-ness of the surrounding code. sqrt_rn_f and sqrt_rn_ftz_f are
- // the versions with explicit ftz-ness.
- return {Intrinsic::sqrt, FTZ_Any};
- case Intrinsic::nvvm_sqrt_rn_f:
- return {Intrinsic::sqrt, FTZ_MustBeOff};
- case Intrinsic::nvvm_sqrt_rn_ftz_f:
- return {Intrinsic::sqrt, FTZ_MustBeOn};
- case Intrinsic::nvvm_trunc_d:
- return {Intrinsic::trunc, FTZ_Any};
- case Intrinsic::nvvm_trunc_f:
- return {Intrinsic::trunc, FTZ_MustBeOff};
- case Intrinsic::nvvm_trunc_ftz_f:
- return {Intrinsic::trunc, FTZ_MustBeOn};
-
- // NVVM intrinsics that map to LLVM cast operations.
- //
- // Note that llvm's target-generic conversion operators correspond to the rz
- // (round to zero) versions of the nvvm conversion intrinsics, even though
- // most everything else here uses the rn (round to nearest even) nvvm ops.
- case Intrinsic::nvvm_d2i_rz:
- case Intrinsic::nvvm_f2i_rz:
- case Intrinsic::nvvm_d2ll_rz:
- case Intrinsic::nvvm_f2ll_rz:
- return {Instruction::FPToSI};
- case Intrinsic::nvvm_d2ui_rz:
- case Intrinsic::nvvm_f2ui_rz:
- case Intrinsic::nvvm_d2ull_rz:
- case Intrinsic::nvvm_f2ull_rz:
- return {Instruction::FPToUI};
- case Intrinsic::nvvm_i2d_rz:
- case Intrinsic::nvvm_i2f_rz:
- case Intrinsic::nvvm_ll2d_rz:
- case Intrinsic::nvvm_ll2f_rz:
- return {Instruction::SIToFP};
- case Intrinsic::nvvm_ui2d_rz:
- case Intrinsic::nvvm_ui2f_rz:
- case Intrinsic::nvvm_ull2d_rz:
- case Intrinsic::nvvm_ull2f_rz:
- return {Instruction::UIToFP};
-
- // NVVM intrinsics that map to LLVM binary ops.
- case Intrinsic::nvvm_add_rn_d:
- return {Instruction::FAdd, FTZ_Any};
- case Intrinsic::nvvm_add_rn_f:
- return {Instruction::FAdd, FTZ_MustBeOff};
- case Intrinsic::nvvm_add_rn_ftz_f:
- return {Instruction::FAdd, FTZ_MustBeOn};
- case Intrinsic::nvvm_mul_rn_d:
- return {Instruction::FMul, FTZ_Any};
- case Intrinsic::nvvm_mul_rn_f:
- return {Instruction::FMul, FTZ_MustBeOff};
- case Intrinsic::nvvm_mul_rn_ftz_f:
- return {Instruction::FMul, FTZ_MustBeOn};
- case Intrinsic::nvvm_div_rn_d:
- return {Instruction::FDiv, FTZ_Any};
- case Intrinsic::nvvm_div_rn_f:
- return {Instruction::FDiv, FTZ_MustBeOff};
- case Intrinsic::nvvm_div_rn_ftz_f:
- return {Instruction::FDiv, FTZ_MustBeOn};
-
- // The remainder of cases are NVVM intrinsics that map to LLVM idioms, but
- // need special handling.
- //
- // We seem to be missing intrinsics for rcp.approx.{ftz.}f32, which is just
- // as well.
- case Intrinsic::nvvm_rcp_rn_d:
- return {SPC_Reciprocal, FTZ_Any};
- case Intrinsic::nvvm_rcp_rn_f:
- return {SPC_Reciprocal, FTZ_MustBeOff};
- case Intrinsic::nvvm_rcp_rn_ftz_f:
- return {SPC_Reciprocal, FTZ_MustBeOn};
-
- // We do not currently simplify intrinsics that give an approximate
- // answer. These include:
- //
- // - nvvm_cos_approx_{f,ftz_f}
- // - nvvm_ex2_approx_{d,f,ftz_f}
- // - nvvm_lg2_approx_{d,f,ftz_f}
- // - nvvm_sin_approx_{f,ftz_f}
- // - nvvm_sqrt_approx_{f,ftz_f}
- // - nvvm_rsqrt_approx_{d,f,ftz_f}
- // - nvvm_div_approx_{ftz_d,ftz_f,f}
- // - nvvm_rcp_approx_ftz_d
- //
- // Ideally we'd encode them as e.g. "fast call @llvm.cos", where "fast"
- // means that fastmath is enabled in the intrinsic. Unfortunately only
- // binary operators (currently) have a fastmath bit in SelectionDAG, so
- // this information gets lost and we can't select on it.
- //
- // TODO: div and rcp are lowered to a binary op, so these we could in
- // theory lower them to "fast fdiv".
-
- default:
- return {};
- }
- }();
-
- // If Action.FtzRequirementTy is not satisfied by the module's ftz state, we
- // can bail out now. (Notice that in the case that IID is not an NVVM
- // intrinsic, we don't have to look up any module metadata, as
- // FtzRequirementTy will be FTZ_Any.)
- if (Action.FtzRequirement != FTZ_Any) {
- StringRef Attr = II->getFunction()
- ->getFnAttribute("denormal-fp-math-f32")
- .getValueAsString();
- DenormalMode Mode = parseDenormalFPAttribute(Attr);
- bool FtzEnabled = Mode.Output != DenormalMode::IEEE;
-
- if (FtzEnabled != (Action.FtzRequirement == FTZ_MustBeOn))
- return nullptr;
- }
-
- // Simplify to target-generic intrinsic.
- if (Action.IID) {
- SmallVector<Value *, 4> Args(II->arg_operands());
- // All the target-generic intrinsics currently of interest to us have one
- // type argument, equal to that of the nvvm intrinsic's argument.
- Type *Tys[] = {II->getArgOperand(0)->getType()};
- return CallInst::Create(
- Intrinsic::getDeclaration(II->getModule(), *Action.IID, Tys), Args);
- }
-
- // Simplify to target-generic binary op.
- if (Action.BinaryOp)
- return BinaryOperator::Create(*Action.BinaryOp, II->getArgOperand(0),
- II->getArgOperand(1), II->getName());
-
- // Simplify to target-generic cast op.
- if (Action.CastOp)
- return CastInst::Create(*Action.CastOp, II->getArgOperand(0), II->getType(),
- II->getName());
-
- // All that's left are the special cases.
- if (!Action.Special)
- return nullptr;
-
- switch (*Action.Special) {
- case SPC_Reciprocal:
- // Simplify reciprocal.
- return BinaryOperator::Create(
- Instruction::FDiv, ConstantFP::get(II->getArgOperand(0)->getType(), 1),
- II->getArgOperand(0), II->getName());
- }
- llvm_unreachable("All SpecialCase enumerators should be handled in switch.");
-}
-
-Optional<Instruction *>
-NVPTXTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
- if (Instruction *I = simplifyNvvmIntrinsic(&II, IC)) {
- return I;
- }
- return None;
-}
-
+// Convert NVVM intrinsics to target-generic LLVM code where possible.
+static Instruction *simplifyNvvmIntrinsic(IntrinsicInst *II, InstCombiner &IC) {
+ // Each NVVM intrinsic we can simplify can be replaced with one of:
+ //
+ // * an LLVM intrinsic,
+ // * an LLVM cast operation,
+ // * an LLVM binary operation, or
+ // * ad-hoc LLVM IR for the particular operation.
+
+ // Some transformations are only valid when the module's
+ // flush-denormals-to-zero (ftz) setting is true/false, whereas other
+ // transformations are valid regardless of the module's ftz setting.
+ enum FtzRequirementTy {
+ FTZ_Any, // Any ftz setting is ok.
+ FTZ_MustBeOn, // Transformation is valid only if ftz is on.
+ FTZ_MustBeOff, // Transformation is valid only if ftz is off.
+ };
+ // Classes of NVVM intrinsics that can't be replaced one-to-one with a
+ // target-generic intrinsic, cast op, or binary op but that we can nonetheless
+ // simplify.
+ enum SpecialCase {
+ SPC_Reciprocal,
+ };
+
+ // SimplifyAction is a poor-man's variant (plus an additional flag) that
+ // represents how to replace an NVVM intrinsic with target-generic LLVM IR.
+ struct SimplifyAction {
+ // Invariant: At most one of these Optionals has a value.
+ Optional<Intrinsic::ID> IID;
+ Optional<Instruction::CastOps> CastOp;
+ Optional<Instruction::BinaryOps> BinaryOp;
+ Optional<SpecialCase> Special;
+
+ FtzRequirementTy FtzRequirement = FTZ_Any;
+
+ SimplifyAction() = default;
+
+ SimplifyAction(Intrinsic::ID IID, FtzRequirementTy FtzReq)
+ : IID(IID), FtzRequirement(FtzReq) {}
+
+ // Cast operations don't have anything to do with FTZ, so we skip that
+ // argument.
+ SimplifyAction(Instruction::CastOps CastOp) : CastOp(CastOp) {}
+
+ SimplifyAction(Instruction::BinaryOps BinaryOp, FtzRequirementTy FtzReq)
+ : BinaryOp(BinaryOp), FtzRequirement(FtzReq) {}
+
+ SimplifyAction(SpecialCase Special, FtzRequirementTy FtzReq)
+ : Special(Special), FtzRequirement(FtzReq) {}
+ };
+
+ // Try to generate a SimplifyAction describing how to replace our
+ // IntrinsicInstr with target-generic LLVM IR.
+ const SimplifyAction Action = [II]() -> SimplifyAction {
+ switch (II->getIntrinsicID()) {
+ // NVVM intrinsics that map directly to LLVM intrinsics.
+ case Intrinsic::nvvm_ceil_d:
+ return {Intrinsic::ceil, FTZ_Any};
+ case Intrinsic::nvvm_ceil_f:
+ return {Intrinsic::ceil, FTZ_MustBeOff};
+ case Intrinsic::nvvm_ceil_ftz_f:
+ return {Intrinsic::ceil, FTZ_MustBeOn};
+ case Intrinsic::nvvm_fabs_d:
+ return {Intrinsic::fabs, FTZ_Any};
+ case Intrinsic::nvvm_fabs_f:
+ return {Intrinsic::fabs, FTZ_MustBeOff};
+ case Intrinsic::nvvm_fabs_ftz_f:
+ return {Intrinsic::fabs, FTZ_MustBeOn};
+ case Intrinsic::nvvm_floor_d:
+ return {Intrinsic::floor, FTZ_Any};
+ case Intrinsic::nvvm_floor_f:
+ return {Intrinsic::floor, FTZ_MustBeOff};
+ case Intrinsic::nvvm_floor_ftz_f:
+ return {Intrinsic::floor, FTZ_MustBeOn};
+ case Intrinsic::nvvm_fma_rn_d:
+ return {Intrinsic::fma, FTZ_Any};
+ case Intrinsic::nvvm_fma_rn_f:
+ return {Intrinsic::fma, FTZ_MustBeOff};
+ case Intrinsic::nvvm_fma_rn_ftz_f:
+ return {Intrinsic::fma, FTZ_MustBeOn};
+ case Intrinsic::nvvm_fmax_d:
+ return {Intrinsic::maxnum, FTZ_Any};
+ case Intrinsic::nvvm_fmax_f:
+ return {Intrinsic::maxnum, FTZ_MustBeOff};
+ case Intrinsic::nvvm_fmax_ftz_f:
+ return {Intrinsic::maxnum, FTZ_MustBeOn};
+ case Intrinsic::nvvm_fmin_d:
+ return {Intrinsic::minnum, FTZ_Any};
+ case Intrinsic::nvvm_fmin_f:
+ return {Intrinsic::minnum, FTZ_MustBeOff};
+ case Intrinsic::nvvm_fmin_ftz_f:
+ return {Intrinsic::minnum, FTZ_MustBeOn};
+ case Intrinsic::nvvm_round_d:
+ return {Intrinsic::round, FTZ_Any};
+ case Intrinsic::nvvm_round_f:
+ return {Intrinsic::round, FTZ_MustBeOff};
+ case Intrinsic::nvvm_round_ftz_f:
+ return {Intrinsic::round, FTZ_MustBeOn};
+ case Intrinsic::nvvm_sqrt_rn_d:
+ return {Intrinsic::sqrt, FTZ_Any};
+ case Intrinsic::nvvm_sqrt_f:
+ // nvvm_sqrt_f is a special case. For most intrinsics, foo_ftz_f is the
+ // ftz version, and foo_f is the non-ftz version. But nvvm_sqrt_f adopts
+ // the ftz-ness of the surrounding code. sqrt_rn_f and sqrt_rn_ftz_f are
+ // the versions with explicit ftz-ness.
+ return {Intrinsic::sqrt, FTZ_Any};
+ case Intrinsic::nvvm_sqrt_rn_f:
+ return {Intrinsic::sqrt, FTZ_MustBeOff};
+ case Intrinsic::nvvm_sqrt_rn_ftz_f:
+ return {Intrinsic::sqrt, FTZ_MustBeOn};
+ case Intrinsic::nvvm_trunc_d:
+ return {Intrinsic::trunc, FTZ_Any};
+ case Intrinsic::nvvm_trunc_f:
+ return {Intrinsic::trunc, FTZ_MustBeOff};
+ case Intrinsic::nvvm_trunc_ftz_f:
+ return {Intrinsic::trunc, FTZ_MustBeOn};
+
+ // NVVM intrinsics that map to LLVM cast operations.
+ //
+ // Note that llvm's target-generic conversion operators correspond to the rz
+ // (round to zero) versions of the nvvm conversion intrinsics, even though
+ // most everything else here uses the rn (round to nearest even) nvvm ops.
+ case Intrinsic::nvvm_d2i_rz:
+ case Intrinsic::nvvm_f2i_rz:
+ case Intrinsic::nvvm_d2ll_rz:
+ case Intrinsic::nvvm_f2ll_rz:
+ return {Instruction::FPToSI};
+ case Intrinsic::nvvm_d2ui_rz:
+ case Intrinsic::nvvm_f2ui_rz:
+ case Intrinsic::nvvm_d2ull_rz:
+ case Intrinsic::nvvm_f2ull_rz:
+ return {Instruction::FPToUI};
+ case Intrinsic::nvvm_i2d_rz:
+ case Intrinsic::nvvm_i2f_rz:
+ case Intrinsic::nvvm_ll2d_rz:
+ case Intrinsic::nvvm_ll2f_rz:
+ return {Instruction::SIToFP};
+ case Intrinsic::nvvm_ui2d_rz:
+ case Intrinsic::nvvm_ui2f_rz:
+ case Intrinsic::nvvm_ull2d_rz:
+ case Intrinsic::nvvm_ull2f_rz:
+ return {Instruction::UIToFP};
+
+ // NVVM intrinsics that map to LLVM binary ops.
+ case Intrinsic::nvvm_add_rn_d:
+ return {Instruction::FAdd, FTZ_Any};
+ case Intrinsic::nvvm_add_rn_f:
+ return {Instruction::FAdd, FTZ_MustBeOff};
+ case Intrinsic::nvvm_add_rn_ftz_f:
+ return {Instruction::FAdd, FTZ_MustBeOn};
+ case Intrinsic::nvvm_mul_rn_d:
+ return {Instruction::FMul, FTZ_Any};
+ case Intrinsic::nvvm_mul_rn_f:
+ return {Instruction::FMul, FTZ_MustBeOff};
+ case Intrinsic::nvvm_mul_rn_ftz_f:
+ return {Instruction::FMul, FTZ_MustBeOn};
+ case Intrinsic::nvvm_div_rn_d:
+ return {Instruction::FDiv, FTZ_Any};
+ case Intrinsic::nvvm_div_rn_f:
+ return {Instruction::FDiv, FTZ_MustBeOff};
+ case Intrinsic::nvvm_div_rn_ftz_f:
+ return {Instruction::FDiv, FTZ_MustBeOn};
+
+ // The remainder of cases are NVVM intrinsics that map to LLVM idioms, but
+ // need special handling.
+ //
+ // We seem to be missing intrinsics for rcp.approx.{ftz.}f32, which is just
+ // as well.
+ case Intrinsic::nvvm_rcp_rn_d:
+ return {SPC_Reciprocal, FTZ_Any};
+ case Intrinsic::nvvm_rcp_rn_f:
+ return {SPC_Reciprocal, FTZ_MustBeOff};
+ case Intrinsic::nvvm_rcp_rn_ftz_f:
+ return {SPC_Reciprocal, FTZ_MustBeOn};
+
+ // We do not currently simplify intrinsics that give an approximate
+ // answer. These include:
+ //
+ // - nvvm_cos_approx_{f,ftz_f}
+ // - nvvm_ex2_approx_{d,f,ftz_f}
+ // - nvvm_lg2_approx_{d,f,ftz_f}
+ // - nvvm_sin_approx_{f,ftz_f}
+ // - nvvm_sqrt_approx_{f,ftz_f}
+ // - nvvm_rsqrt_approx_{d,f,ftz_f}
+ // - nvvm_div_approx_{ftz_d,ftz_f,f}
+ // - nvvm_rcp_approx_ftz_d
+ //
+ // Ideally we'd encode them as e.g. "fast call @llvm.cos", where "fast"
+ // means that fastmath is enabled in the intrinsic. Unfortunately only
+ // binary operators (currently) have a fastmath bit in SelectionDAG, so
+ // this information gets lost and we can't select on it.
+ //
+ // TODO: div and rcp are lowered to a binary op, so these we could in
+ // theory lower them to "fast fdiv".
+
+ default:
+ return {};
+ }
+ }();
+
+ // If Action.FtzRequirementTy is not satisfied by the module's ftz state, we
+ // can bail out now. (Notice that in the case that IID is not an NVVM
+ // intrinsic, we don't have to look up any module metadata, as
+ // FtzRequirementTy will be FTZ_Any.)
+ if (Action.FtzRequirement != FTZ_Any) {
+ StringRef Attr = II->getFunction()
+ ->getFnAttribute("denormal-fp-math-f32")
+ .getValueAsString();
+ DenormalMode Mode = parseDenormalFPAttribute(Attr);
+ bool FtzEnabled = Mode.Output != DenormalMode::IEEE;
+
+ if (FtzEnabled != (Action.FtzRequirement == FTZ_MustBeOn))
+ return nullptr;
+ }
+
+ // Simplify to target-generic intrinsic.
+ if (Action.IID) {
+ SmallVector<Value *, 4> Args(II->arg_operands());
+ // All the target-generic intrinsics currently of interest to us have one
+ // type argument, equal to that of the nvvm intrinsic's argument.
+ Type *Tys[] = {II->getArgOperand(0)->getType()};
+ return CallInst::Create(
+ Intrinsic::getDeclaration(II->getModule(), *Action.IID, Tys), Args);
+ }
+
+ // Simplify to target-generic binary op.
+ if (Action.BinaryOp)
+ return BinaryOperator::Create(*Action.BinaryOp, II->getArgOperand(0),
+ II->getArgOperand(1), II->getName());
+
+ // Simplify to target-generic cast op.
+ if (Action.CastOp)
+ return CastInst::Create(*Action.CastOp, II->getArgOperand(0), II->getType(),
+ II->getName());
+
+ // All that's left are the special cases.
+ if (!Action.Special)
+ return nullptr;
+
+ switch (*Action.Special) {
+ case SPC_Reciprocal:
+ // Simplify reciprocal.
+ return BinaryOperator::Create(
+ Instruction::FDiv, ConstantFP::get(II->getArgOperand(0)->getType(), 1),
+ II->getArgOperand(0), II->getName());
+ }
+ llvm_unreachable("All SpecialCase enumerators should be handled in switch.");
+}
+
+Optional<Instruction *>
+NVPTXTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
+ if (Instruction *I = simplifyNvvmIntrinsic(&II, IC)) {
+ return I;
+ }
+ return None;
+}
+
int NVPTXTTIImpl::getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
TTI::OperandValueKind Opd1Info,
diff --git a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
index 6f071040dd..9176c5db7b 100644
--- a/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
+++ b/contrib/libs/llvm12/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
@@ -48,9 +48,9 @@ public:
return AddressSpace::ADDRESS_SPACE_GENERIC;
}
- Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
- IntrinsicInst &II) const;
-
+ Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
+ IntrinsicInst &II) const;
+
// Loads and stores can be vectorized if the alignment is at least as big as
// the load/store we want to vectorize.
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
diff --git a/contrib/libs/llvm12/lib/Target/NVPTX/NVVMIntrRange.cpp b/contrib/libs/llvm12/lib/Target/NVPTX/NVVMIntrRange.cpp
index 5381646434..7f5e3edb3b 100644
--- a/contrib/libs/llvm12/lib/Target/NVPTX/NVVMIntrRange.cpp
+++ b/contrib/libs/llvm12/lib/Target/NVPTX/NVVMIntrRange.cpp
@@ -17,7 +17,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsNVPTX.h"
-#include "llvm/IR/PassManager.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/Support/CommandLine.h"
using namespace llvm;
@@ -33,13 +33,13 @@ static cl::opt<unsigned> NVVMIntrRangeSM("nvvm-intr-range-sm", cl::init(20),
namespace {
class NVVMIntrRange : public FunctionPass {
private:
- unsigned SmVersion;
+ unsigned SmVersion;
public:
static char ID;
NVVMIntrRange() : NVVMIntrRange(NVVMIntrRangeSM) {}
- NVVMIntrRange(unsigned int SmVersion)
- : FunctionPass(ID), SmVersion(SmVersion) {
+ NVVMIntrRange(unsigned int SmVersion)
+ : FunctionPass(ID), SmVersion(SmVersion) {
initializeNVVMIntrRangePass(*PassRegistry::getPassRegistry());
}
@@ -72,18 +72,18 @@ static bool addRangeMetadata(uint64_t Low, uint64_t High, CallInst *C) {
return true;
}
-static bool runNVVMIntrRange(Function &F, unsigned SmVersion) {
- struct {
- unsigned x, y, z;
- } MaxBlockSize, MaxGridSize;
- MaxBlockSize.x = 1024;
- MaxBlockSize.y = 1024;
- MaxBlockSize.z = 64;
-
- MaxGridSize.x = SmVersion >= 30 ? 0x7fffffff : 0xffff;
- MaxGridSize.y = 0xffff;
- MaxGridSize.z = 0xffff;
-
+static bool runNVVMIntrRange(Function &F, unsigned SmVersion) {
+ struct {
+ unsigned x, y, z;
+ } MaxBlockSize, MaxGridSize;
+ MaxBlockSize.x = 1024;
+ MaxBlockSize.y = 1024;
+ MaxBlockSize.z = 64;
+
+ MaxGridSize.x = SmVersion >= 30 ? 0x7fffffff : 0xffff;
+ MaxGridSize.y = 0xffff;
+ MaxGridSize.z = 0xffff;
+
// Go through the calls in this function.
bool Changed = false;
for (Instruction &I : instructions(F)) {
@@ -155,15 +155,15 @@ static bool runNVVMIntrRange(Function &F, unsigned SmVersion) {
return Changed;
}
-
-bool NVVMIntrRange::runOnFunction(Function &F) {
- return runNVVMIntrRange(F, SmVersion);
-}
-
-NVVMIntrRangePass::NVVMIntrRangePass() : NVVMIntrRangePass(NVVMIntrRangeSM) {}
-
-PreservedAnalyses NVVMIntrRangePass::run(Function &F,
- FunctionAnalysisManager &AM) {
- return runNVVMIntrRange(F, SmVersion) ? PreservedAnalyses::none()
- : PreservedAnalyses::all();
-}
+
+bool NVVMIntrRange::runOnFunction(Function &F) {
+ return runNVVMIntrRange(F, SmVersion);
+}
+
+NVVMIntrRangePass::NVVMIntrRangePass() : NVVMIntrRangePass(NVVMIntrRangeSM) {}
+
+PreservedAnalyses NVVMIntrRangePass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ return runNVVMIntrRange(F, SmVersion) ? PreservedAnalyses::none()
+ : PreservedAnalyses::all();
+}
diff --git a/contrib/libs/llvm12/lib/Target/NVPTX/NVVMReflect.cpp b/contrib/libs/llvm12/lib/Target/NVPTX/NVVMReflect.cpp
index 339f51d210..c740ded1fd 100644
--- a/contrib/libs/llvm12/lib/Target/NVPTX/NVVMReflect.cpp
+++ b/contrib/libs/llvm12/lib/Target/NVPTX/NVVMReflect.cpp
@@ -29,7 +29,7 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsNVPTX.h"
#include "llvm/IR/Module.h"
-#include "llvm/IR/PassManager.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
@@ -74,7 +74,7 @@ INITIALIZE_PASS(NVVMReflect, "nvvm-reflect",
"Replace occurrences of __nvvm_reflect() calls with 0/1", false,
false)
-static bool runNVVMReflect(Function &F, unsigned SmVersion) {
+static bool runNVVMReflect(Function &F, unsigned SmVersion) {
if (!NVVMReflectEnabled)
return false;
@@ -180,15 +180,15 @@ static bool runNVVMReflect(Function &F, unsigned SmVersion) {
return ToRemove.size() > 0;
}
-
-bool NVVMReflect::runOnFunction(Function &F) {
- return runNVVMReflect(F, SmVersion);
-}
-
-NVVMReflectPass::NVVMReflectPass() : NVVMReflectPass(0) {}
-
-PreservedAnalyses NVVMReflectPass::run(Function &F,
- FunctionAnalysisManager &AM) {
- return runNVVMReflect(F, SmVersion) ? PreservedAnalyses::none()
- : PreservedAnalyses::all();
-}
+
+bool NVVMReflect::runOnFunction(Function &F) {
+ return runNVVMReflect(F, SmVersion);
+}
+
+NVVMReflectPass::NVVMReflectPass() : NVVMReflectPass(0) {}
+
+PreservedAnalyses NVVMReflectPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ return runNVVMReflect(F, SmVersion) ? PreservedAnalyses::none()
+ : PreservedAnalyses::all();
+}
diff --git a/contrib/libs/llvm12/lib/Target/NVPTX/TargetInfo/ya.make b/contrib/libs/llvm12/lib/Target/NVPTX/TargetInfo/ya.make
index 52ef1e5f5b..8e9644a1eb 100644
--- a/contrib/libs/llvm12/lib/Target/NVPTX/TargetInfo/ya.make
+++ b/contrib/libs/llvm12/lib/Target/NVPTX/TargetInfo/ya.make
@@ -12,13 +12,13 @@ LICENSE(Apache-2.0 WITH LLVM-exception)
LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
PEERDIR(
- contrib/libs/llvm12
- contrib/libs/llvm12/lib/Support
+ contrib/libs/llvm12
+ contrib/libs/llvm12/lib/Support
)
ADDINCL(
- contrib/libs/llvm12/lib/Target/NVPTX
- contrib/libs/llvm12/lib/Target/NVPTX/TargetInfo
+ contrib/libs/llvm12/lib/Target/NVPTX
+ contrib/libs/llvm12/lib/Target/NVPTX/TargetInfo
)
NO_COMPILER_WARNINGS()
diff --git a/contrib/libs/llvm12/lib/Target/NVPTX/ya.make b/contrib/libs/llvm12/lib/Target/NVPTX/ya.make
index 4f7542eb65..b6325732ad 100644
--- a/contrib/libs/llvm12/lib/Target/NVPTX/ya.make
+++ b/contrib/libs/llvm12/lib/Target/NVPTX/ya.make
@@ -12,27 +12,27 @@ LICENSE(Apache-2.0 WITH LLVM-exception)
LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
PEERDIR(
- contrib/libs/llvm12
- contrib/libs/llvm12/include
- contrib/libs/llvm12/lib/Analysis
- contrib/libs/llvm12/lib/CodeGen
- contrib/libs/llvm12/lib/CodeGen/AsmPrinter
- contrib/libs/llvm12/lib/CodeGen/SelectionDAG
- contrib/libs/llvm12/lib/IR
- contrib/libs/llvm12/lib/MC
- contrib/libs/llvm12/lib/Support
- contrib/libs/llvm12/lib/Target
- contrib/libs/llvm12/lib/Target/NVPTX/MCTargetDesc
- contrib/libs/llvm12/lib/Target/NVPTX/TargetInfo
- contrib/libs/llvm12/lib/Transforms/IPO
- contrib/libs/llvm12/lib/Transforms/Scalar
- contrib/libs/llvm12/lib/Transforms/Utils
- contrib/libs/llvm12/lib/Transforms/Vectorize
+ contrib/libs/llvm12
+ contrib/libs/llvm12/include
+ contrib/libs/llvm12/lib/Analysis
+ contrib/libs/llvm12/lib/CodeGen
+ contrib/libs/llvm12/lib/CodeGen/AsmPrinter
+ contrib/libs/llvm12/lib/CodeGen/SelectionDAG
+ contrib/libs/llvm12/lib/IR
+ contrib/libs/llvm12/lib/MC
+ contrib/libs/llvm12/lib/Support
+ contrib/libs/llvm12/lib/Target
+ contrib/libs/llvm12/lib/Target/NVPTX/MCTargetDesc
+ contrib/libs/llvm12/lib/Target/NVPTX/TargetInfo
+ contrib/libs/llvm12/lib/Transforms/IPO
+ contrib/libs/llvm12/lib/Transforms/Scalar
+ contrib/libs/llvm12/lib/Transforms/Utils
+ contrib/libs/llvm12/lib/Transforms/Vectorize
)
ADDINCL(
- ${ARCADIA_BUILD_ROOT}/contrib/libs/llvm12/lib/Target/NVPTX
- contrib/libs/llvm12/lib/Target/NVPTX
+ ${ARCADIA_BUILD_ROOT}/contrib/libs/llvm12/lib/Target/NVPTX
+ contrib/libs/llvm12/lib/Target/NVPTX
)
NO_COMPILER_WARNINGS()