diff options
author | shadchin <shadchin@yandex-team.ru> | 2022-02-10 16:44:39 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:44:39 +0300 |
commit | e9656aae26e0358d5378e5b63dcac5c8dbe0e4d0 (patch) | |
tree | 64175d5cadab313b3e7039ebaa06c5bc3295e274 /contrib/libs/llvm12/lib/Target/PowerPC | |
parent | 2598ef1d0aee359b4b6d5fdd1758916d5907d04f (diff) | |
download | ydb-e9656aae26e0358d5378e5b63dcac5c8dbe0e4d0.tar.gz |
Restoring authorship annotation for <shadchin@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/libs/llvm12/lib/Target/PowerPC')
76 files changed, 8791 insertions, 8791 deletions
diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/contrib/libs/llvm12/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index 2bcbb4f781..197fd3c7aa 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/contrib/libs/llvm12/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -284,16 +284,16 @@ public: return (unsigned) Imm.Val; } - unsigned getACCReg() const { - assert(isACCRegNumber() && "Invalid access!"); - return (unsigned) Imm.Val; - } - - unsigned getVSRpEvenReg() const { - assert(isVSRpEvenRegNumber() && "Invalid access!"); - return (unsigned) Imm.Val >> 1; - } - + unsigned getACCReg() const { + assert(isACCRegNumber() && "Invalid access!"); + return (unsigned) Imm.Val; + } + + unsigned getVSRpEvenReg() const { + assert(isVSRpEvenRegNumber() && "Invalid access!"); + return (unsigned) Imm.Val >> 1; + } + unsigned getCCReg() const { assert(isCCRegNumber() && "Invalid access!"); return (unsigned) (Kind == Immediate ? Imm.Val : Expr.CRVal); @@ -406,12 +406,12 @@ public: (getImm() & 3) == 0); } bool isImmZero() const { return Kind == Immediate && getImm() == 0; } bool isRegNumber() const { return Kind == Immediate && isUInt<5>(getImm()); } - bool isACCRegNumber() const { - return Kind == Immediate && isUInt<3>(getImm()); - } - bool isVSRpEvenRegNumber() const { - return Kind == Immediate && isUInt<6>(getImm()) && ((getImm() & 1) == 0); - } + bool isACCRegNumber() const { + return Kind == Immediate && isUInt<3>(getImm()); + } + bool isVSRpEvenRegNumber() const { + return Kind == Immediate && isUInt<6>(getImm()) && ((getImm() & 1) == 0); + } bool isVSRegNumber() const { return Kind == Immediate && isUInt<6>(getImm()); } @@ -502,29 +502,29 @@ public: Inst.addOperand(MCOperand::createReg(VSSRegs[getVSReg()])); } - void addRegSPE4RCOperands(MCInst &Inst, unsigned N) const { + void addRegSPE4RCOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::createReg(RRegs[getReg()])); + Inst.addOperand(MCOperand::createReg(RRegs[getReg()])); } - void addRegSPERCOperands(MCInst &Inst, unsigned N) const { + void addRegSPERCOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::createReg(SPERegs[getReg()])); + Inst.addOperand(MCOperand::createReg(SPERegs[getReg()])); } - void addRegACCRCOperands(MCInst &Inst, unsigned N) const { + void addRegACCRCOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::createReg(ACCRegs[getACCReg()])); + Inst.addOperand(MCOperand::createReg(ACCRegs[getACCReg()])); } - void addRegVSRpRCOperands(MCInst &Inst, unsigned N) const { + void addRegVSRpRCOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::createReg(VSRpRegs[getVSRpEvenReg()])); + Inst.addOperand(MCOperand::createReg(VSRpRegs[getVSRpEvenReg()])); } - void addRegVSRpEvenRCOperands(MCInst &Inst, unsigned N) const { + void addRegVSRpEvenRCOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::createReg(VSRpRegs[getVSRpEvenReg()])); + Inst.addOperand(MCOperand::createReg(VSRpRegs[getVSRpEvenReg()])); } void addRegCRBITRCOperands(MCInst &Inst, unsigned N) const { @@ -676,8 +676,8 @@ public: return CreateImm(CE->getValue(), S, E, IsPPC64); if (const MCSymbolRefExpr *SRE = dyn_cast<MCSymbolRefExpr>(Val)) - if (SRE->getKind() == MCSymbolRefExpr::VK_PPC_TLS || - SRE->getKind() == MCSymbolRefExpr::VK_PPC_TLS_PCREL) + if (SRE->getKind() == MCSymbolRefExpr::VK_PPC_TLS || + SRE->getKind() == MCSymbolRefExpr::VK_PPC_TLS_PCREL) return CreateTLSReg(SRE, S, E, IsPPC64); if (const PPCMCExpr *TE = dyn_cast<PPCMCExpr>(Val)) { @@ -773,18 +773,18 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst, } case PPC::DCBFx: case PPC::DCBFL: - case PPC::DCBFLP: - case PPC::DCBFPS: - case PPC::DCBSTPS: { + case PPC::DCBFLP: + case PPC::DCBFPS: + case PPC::DCBSTPS: { int L = 0; if (Opcode == PPC::DCBFL) L = 1; else if (Opcode == PPC::DCBFLP) L = 3; - else if (Opcode == PPC::DCBFPS) - L = 4; - else if (Opcode == PPC::DCBSTPS) - L = 6; + else if (Opcode == PPC::DCBFPS) + L = 4; + else if (Opcode == PPC::DCBSTPS) + L = 6; MCInst TmpInst; TmpInst.setOpcode(PPC::DCBF); @@ -1201,41 +1201,41 @@ bool PPCAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, } bool PPCAsmParser::MatchRegisterName(unsigned &RegNo, int64_t &IntVal) { - if (getParser().getTok().is(AsmToken::Percent)) - getParser().Lex(); // Eat the '%'. - - if (!getParser().getTok().is(AsmToken::Identifier)) - return true; - - StringRef Name = getParser().getTok().getString(); - if (Name.equals_lower("lr")) { - RegNo = isPPC64() ? PPC::LR8 : PPC::LR; - IntVal = 8; - } else if (Name.equals_lower("ctr")) { - RegNo = isPPC64() ? PPC::CTR8 : PPC::CTR; - IntVal = 9; - } else if (Name.equals_lower("vrsave")) { - RegNo = PPC::VRSAVE; - IntVal = 256; - } else if (Name.startswith_lower("r") && - !Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) { - RegNo = isPPC64() ? XRegs[IntVal] : RRegs[IntVal]; - } else if (Name.startswith_lower("f") && - !Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) { - RegNo = FRegs[IntVal]; - } else if (Name.startswith_lower("vs") && - !Name.substr(2).getAsInteger(10, IntVal) && IntVal < 64) { - RegNo = VSRegs[IntVal]; - } else if (Name.startswith_lower("v") && - !Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) { - RegNo = VRegs[IntVal]; - } else if (Name.startswith_lower("cr") && - !Name.substr(2).getAsInteger(10, IntVal) && IntVal < 8) { - RegNo = CRRegs[IntVal]; - } else - return true; - getParser().Lex(); - return false; + if (getParser().getTok().is(AsmToken::Percent)) + getParser().Lex(); // Eat the '%'. + + if (!getParser().getTok().is(AsmToken::Identifier)) + return true; + + StringRef Name = getParser().getTok().getString(); + if (Name.equals_lower("lr")) { + RegNo = isPPC64() ? PPC::LR8 : PPC::LR; + IntVal = 8; + } else if (Name.equals_lower("ctr")) { + RegNo = isPPC64() ? PPC::CTR8 : PPC::CTR; + IntVal = 9; + } else if (Name.equals_lower("vrsave")) { + RegNo = PPC::VRSAVE; + IntVal = 256; + } else if (Name.startswith_lower("r") && + !Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) { + RegNo = isPPC64() ? XRegs[IntVal] : RRegs[IntVal]; + } else if (Name.startswith_lower("f") && + !Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) { + RegNo = FRegs[IntVal]; + } else if (Name.startswith_lower("vs") && + !Name.substr(2).getAsInteger(10, IntVal) && IntVal < 64) { + RegNo = VSRegs[IntVal]; + } else if (Name.startswith_lower("v") && + !Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) { + RegNo = VRegs[IntVal]; + } else if (Name.startswith_lower("cr") && + !Name.substr(2).getAsInteger(10, IntVal) && IntVal < 8) { + RegNo = CRRegs[IntVal]; + } else + return true; + getParser().Lex(); + return false; } bool PPCAsmParser:: @@ -1432,7 +1432,7 @@ bool PPCAsmParser::ParseOperand(OperandVector &Operands) { switch (getLexer().getKind()) { // Special handling for register names. These are interpreted // as immediates corresponding to the register number. - case AsmToken::Percent: { + case AsmToken::Percent: { unsigned RegNo; int64_t IntVal; if (MatchRegisterName(RegNo, IntVal)) @@ -1440,7 +1440,7 @@ bool PPCAsmParser::ParseOperand(OperandVector &Operands) { Operands.push_back(PPCOperand::CreateImm(IntVal, S, E, isPPC64())); return false; - } + } case AsmToken::Identifier: case AsmToken::LParen: case AsmToken::Plus: @@ -1488,18 +1488,18 @@ bool PPCAsmParser::ParseOperand(OperandVector &Operands) { int64_t IntVal; switch (getLexer().getKind()) { - case AsmToken::Percent: { + case AsmToken::Percent: { unsigned RegNo; if (MatchRegisterName(RegNo, IntVal)) return Error(S, "invalid register name"); break; - } + } case AsmToken::Integer: - if (getParser().parseAbsoluteExpression(IntVal) || IntVal < 0 || - IntVal > 31) + if (getParser().parseAbsoluteExpression(IntVal) || IntVal < 0 || + IntVal > 31) return Error(S, "invalid register number"); break; - case AsmToken::Identifier: + case AsmToken::Identifier: default: return Error(S, "invalid memory operand"); } @@ -1583,7 +1583,7 @@ bool PPCAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, /// ParseDirective parses the PPC specific directives bool PPCAsmParser::ParseDirective(AsmToken DirectiveID) { StringRef IDVal = DirectiveID.getIdentifier(); - if (IDVal == ".word") + if (IDVal == ".word") ParseDirectiveWord(2, DirectiveID); else if (IDVal == ".llong") ParseDirectiveWord(8, DirectiveID); @@ -1655,7 +1655,7 @@ bool PPCAsmParser::ParseDirectiveMachine(SMLoc L) { // FIXME: Right now, the parser always allows any available // instruction, so the .machine directive is not useful. - // In the wild, any/push/pop/ppc64/altivec/power[4-9] are seen. + // In the wild, any/push/pop/ppc64/altivec/power[4-9] are seen. Parser.Lex(); @@ -1715,9 +1715,9 @@ bool PPCAsmParser::ParseDirectiveLocalEntry(SMLoc L) { /// Force static initialization. extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCAsmParser() { RegisterMCAsmParser<PPCAsmParser> A(getThePPC32Target()); - RegisterMCAsmParser<PPCAsmParser> B(getThePPC32LETarget()); - RegisterMCAsmParser<PPCAsmParser> C(getThePPC64Target()); - RegisterMCAsmParser<PPCAsmParser> D(getThePPC64LETarget()); + RegisterMCAsmParser<PPCAsmParser> B(getThePPC32LETarget()); + RegisterMCAsmParser<PPCAsmParser> C(getThePPC64Target()); + RegisterMCAsmParser<PPCAsmParser> D(getThePPC64LETarget()); } #define GET_REGISTER_MATCHER diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/AsmParser/ya.make b/contrib/libs/llvm12/lib/Target/PowerPC/AsmParser/ya.make index e8ca90aaed..24183440dc 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/AsmParser/ya.make +++ b/contrib/libs/llvm12/lib/Target/PowerPC/AsmParser/ya.make @@ -12,19 +12,19 @@ LICENSE(Apache-2.0 WITH LLVM-exception) LICENSE_TEXTS(.yandex_meta/licenses.list.txt) PEERDIR( - contrib/libs/llvm12 - contrib/libs/llvm12/include - contrib/libs/llvm12/lib/MC - contrib/libs/llvm12/lib/MC/MCParser - contrib/libs/llvm12/lib/Support - contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc - contrib/libs/llvm12/lib/Target/PowerPC/TargetInfo + contrib/libs/llvm12 + contrib/libs/llvm12/include + contrib/libs/llvm12/lib/MC + contrib/libs/llvm12/lib/MC/MCParser + contrib/libs/llvm12/lib/Support + contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc + contrib/libs/llvm12/lib/Target/PowerPC/TargetInfo ) ADDINCL( - ${ARCADIA_BUILD_ROOT}/contrib/libs/llvm12/lib/Target/PowerPC - contrib/libs/llvm12/lib/Target/PowerPC - contrib/libs/llvm12/lib/Target/PowerPC/AsmParser + ${ARCADIA_BUILD_ROOT}/contrib/libs/llvm12/lib/Target/PowerPC + contrib/libs/llvm12/lib/Target/PowerPC + contrib/libs/llvm12/lib/Target/PowerPC/AsmParser ) NO_COMPILER_WARNINGS() diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/contrib/libs/llvm12/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp index 2441b6d91b..3e9286fb0b 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp +++ b/contrib/libs/llvm12/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp @@ -54,8 +54,8 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCDisassembler() { // Register the disassembler for each target. TargetRegistry::RegisterMCDisassembler(getThePPC32Target(), createPPCDisassembler); - TargetRegistry::RegisterMCDisassembler(getThePPC32LETarget(), - createPPCLEDisassembler); + TargetRegistry::RegisterMCDisassembler(getThePPC32LETarget(), + createPPCLEDisassembler); TargetRegistry::RegisterMCDisassembler(getThePPC64Target(), createPPCDisassembler); TargetRegistry::RegisterMCDisassembler(getThePPC64LETarget(), @@ -175,18 +175,18 @@ static DecodeStatus DecodeSPERCRegisterClass(MCInst &Inst, uint64_t RegNo, return decodeRegisterClass(Inst, RegNo, SPERegs); } -static DecodeStatus DecodeACCRCRegisterClass(MCInst &Inst, uint64_t RegNo, - uint64_t Address, - const void *Decoder) { - return decodeRegisterClass(Inst, RegNo, ACCRegs); -} - -static DecodeStatus DecodeVSRpRCRegisterClass(MCInst &Inst, uint64_t RegNo, - uint64_t Address, - const void *Decoder) { - return decodeRegisterClass(Inst, RegNo, VSRpRegs); -} - +static DecodeStatus DecodeACCRCRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, ACCRegs); +} + +static DecodeStatus DecodeVSRpRCRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, VSRpRegs); +} + #define DecodeQSRCRegisterClass DecodeQFRCRegisterClass #define DecodeQBRCRegisterClass DecodeQFRCRegisterClass @@ -214,15 +214,15 @@ static DecodeStatus decodeImmZeroOperand(MCInst &Inst, uint64_t Imm, return MCDisassembler::Success; } -static DecodeStatus decodeVSRpEvenOperands(MCInst &Inst, uint64_t RegNo, - uint64_t Address, - const void *Decoder) { - if (RegNo & 1) - return MCDisassembler::Fail; - Inst.addOperand(MCOperand::createReg(VSRpRegs[RegNo >> 1])); - return MCDisassembler::Success; -} - +static DecodeStatus decodeVSRpEvenOperands(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo & 1) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::createReg(VSRpRegs[RegNo >> 1])); + return MCDisassembler::Success; +} + static DecodeStatus decodeMemRIOperands(MCInst &Inst, uint64_t Imm, int64_t Address, const void *Decoder) { // Decode the memri field (imm, reg), which has the low 16-bits as the @@ -418,9 +418,9 @@ DecodeStatus PPCDisassembler::getInstruction(MCInst &MI, uint64_t &Size, // Read the instruction in the proper endianness. uint64_t Inst = ReadFunc(Bytes.data()); - if (STI.getFeatureBits()[PPC::FeatureSPE]) { + if (STI.getFeatureBits()[PPC::FeatureSPE]) { DecodeStatus result = - decodeInstruction(DecoderTableSPE32, MI, Inst, Address, this, STI); + decodeInstruction(DecoderTableSPE32, MI, Inst, Address, this, STI); if (result != MCDisassembler::Fail) return result; } diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/Disassembler/ya.make b/contrib/libs/llvm12/lib/Target/PowerPC/Disassembler/ya.make index 30a45916df..a412740df2 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/Disassembler/ya.make +++ b/contrib/libs/llvm12/lib/Target/PowerPC/Disassembler/ya.make @@ -12,17 +12,17 @@ LICENSE(Apache-2.0 WITH LLVM-exception) LICENSE_TEXTS(.yandex_meta/licenses.list.txt) PEERDIR( - contrib/libs/llvm12 - contrib/libs/llvm12/include - contrib/libs/llvm12/lib/MC/MCDisassembler - contrib/libs/llvm12/lib/Support - contrib/libs/llvm12/lib/Target/PowerPC/TargetInfo + contrib/libs/llvm12 + contrib/libs/llvm12/include + contrib/libs/llvm12/lib/MC/MCDisassembler + contrib/libs/llvm12/lib/Support + contrib/libs/llvm12/lib/Target/PowerPC/TargetInfo ) ADDINCL( - ${ARCADIA_BUILD_ROOT}/contrib/libs/llvm12/lib/Target/PowerPC - contrib/libs/llvm12/lib/Target/PowerPC - contrib/libs/llvm12/lib/Target/PowerPC/Disassembler + ${ARCADIA_BUILD_ROOT}/contrib/libs/llvm12/lib/Target/PowerPC + contrib/libs/llvm12/lib/Target/PowerPC + contrib/libs/llvm12/lib/Target/PowerPC/Disassembler ) NO_COMPILER_WARNINGS() diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/GISel/PPCCallLowering.cpp b/contrib/libs/llvm12/lib/Target/PowerPC/GISel/PPCCallLowering.cpp index 24cca73180..e8f8cbfee6 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/GISel/PPCCallLowering.cpp +++ b/contrib/libs/llvm12/lib/Target/PowerPC/GISel/PPCCallLowering.cpp @@ -1,53 +1,53 @@ -//===-- PPCCallLowering.h - Call lowering for GlobalISel -------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file implements the lowering of LLVM calls to machine code calls for -/// GlobalISel. -/// -//===----------------------------------------------------------------------===// - -#include "PPCCallLowering.h" -#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" -#include "llvm/Support/Debug.h" - -#define DEBUG_TYPE "ppc-call-lowering" - -using namespace llvm; - -PPCCallLowering::PPCCallLowering(const PPCTargetLowering &TLI) - : CallLowering(&TLI) {} - -bool PPCCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, - const Value *Val, ArrayRef<Register> VRegs, - FunctionLoweringInfo &FLI, - Register SwiftErrorVReg) const { - assert(((Val && !VRegs.empty()) || (!Val && VRegs.empty())) && - "Return value without a vreg"); - if (VRegs.size() > 0) - return false; - - MIRBuilder.buildInstr(PPC::BLR8); - return true; -} - -bool PPCCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, - const Function &F, - ArrayRef<ArrayRef<Register>> VRegs, - FunctionLoweringInfo &FLI) const { - - // If VRegs is empty, then there are no formal arguments to lower and thus can - // always return true. If there are formal arguments, we currently do not - // handle them and thus return false. - return VRegs.empty(); -} - -bool PPCCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, - CallLoweringInfo &Info) const { - return false; -} +//===-- PPCCallLowering.h - Call lowering for GlobalISel -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file implements the lowering of LLVM calls to machine code calls for +/// GlobalISel. +/// +//===----------------------------------------------------------------------===// + +#include "PPCCallLowering.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "ppc-call-lowering" + +using namespace llvm; + +PPCCallLowering::PPCCallLowering(const PPCTargetLowering &TLI) + : CallLowering(&TLI) {} + +bool PPCCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, + const Value *Val, ArrayRef<Register> VRegs, + FunctionLoweringInfo &FLI, + Register SwiftErrorVReg) const { + assert(((Val && !VRegs.empty()) || (!Val && VRegs.empty())) && + "Return value without a vreg"); + if (VRegs.size() > 0) + return false; + + MIRBuilder.buildInstr(PPC::BLR8); + return true; +} + +bool PPCCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, + const Function &F, + ArrayRef<ArrayRef<Register>> VRegs, + FunctionLoweringInfo &FLI) const { + + // If VRegs is empty, then there are no formal arguments to lower and thus can + // always return true. If there are formal arguments, we currently do not + // handle them and thus return false. + return VRegs.empty(); +} + +bool PPCCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, + CallLoweringInfo &Info) const { + return false; +} diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/GISel/PPCCallLowering.h b/contrib/libs/llvm12/lib/Target/PowerPC/GISel/PPCCallLowering.h index d503f5df9c..5a449f4cab 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/GISel/PPCCallLowering.h +++ b/contrib/libs/llvm12/lib/Target/PowerPC/GISel/PPCCallLowering.h @@ -1,40 +1,40 @@ -//===-- PPCCallLowering.h - Call lowering for GlobalISel -------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file describes how to lower LLVM calls to machine code calls. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_TARGET_POWERPC_GISEL_PPCCALLLOWERING_H -#define LLVM_LIB_TARGET_POWERPC_GISEL_PPCCALLLOWERING_H - -#include "PPCISelLowering.h" -#include "llvm/CodeGen/GlobalISel/CallLowering.h" -#include "llvm/IR/CallingConv.h" - -namespace llvm { - -class PPCTargetLowering; - -class PPCCallLowering : public CallLowering { -public: - PPCCallLowering(const PPCTargetLowering &TLI); - - bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val, - ArrayRef<Register> VRegs, FunctionLoweringInfo &FLI, - Register SwiftErrorVReg) const override; - bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F, - ArrayRef<ArrayRef<Register>> VRegs, - FunctionLoweringInfo &FLI) const override; - bool lowerCall(MachineIRBuilder &MIRBuilder, - CallLoweringInfo &Info) const override; -}; -} // end namespace llvm - -#endif +//===-- PPCCallLowering.h - Call lowering for GlobalISel -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file describes how to lower LLVM calls to machine code calls. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_POWERPC_GISEL_PPCCALLLOWERING_H +#define LLVM_LIB_TARGET_POWERPC_GISEL_PPCCALLLOWERING_H + +#include "PPCISelLowering.h" +#include "llvm/CodeGen/GlobalISel/CallLowering.h" +#include "llvm/IR/CallingConv.h" + +namespace llvm { + +class PPCTargetLowering; + +class PPCCallLowering : public CallLowering { +public: + PPCCallLowering(const PPCTargetLowering &TLI); + + bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val, + ArrayRef<Register> VRegs, FunctionLoweringInfo &FLI, + Register SwiftErrorVReg) const override; + bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F, + ArrayRef<ArrayRef<Register>> VRegs, + FunctionLoweringInfo &FLI) const override; + bool lowerCall(MachineIRBuilder &MIRBuilder, + CallLoweringInfo &Info) const override; +}; +} // end namespace llvm + +#endif diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp b/contrib/libs/llvm12/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp index f7f43c8bfe..7d64816ed6 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp +++ b/contrib/libs/llvm12/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp @@ -1,92 +1,92 @@ -//===- PPCInstructionSelector.cpp --------------------------------*- C++ -*-==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// \file -/// This file implements the targeting of the InstructionSelector class for -/// PowerPC. -//===----------------------------------------------------------------------===// - -#include "PPCInstrInfo.h" -#include "PPCRegisterBankInfo.h" -#include "PPCSubtarget.h" -#include "PPCTargetMachine.h" -#include "llvm/CodeGen/GlobalISel/InstructionSelector.h" -#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/IR/IntrinsicsPowerPC.h" -#include "llvm/Support/Debug.h" - -#define DEBUG_TYPE "ppc-gisel" - -using namespace llvm; - -namespace { - -#define GET_GLOBALISEL_PREDICATE_BITSET -#include "PPCGenGlobalISel.inc" -#undef GET_GLOBALISEL_PREDICATE_BITSET - -class PPCInstructionSelector : public InstructionSelector { -public: - PPCInstructionSelector(const PPCTargetMachine &TM, const PPCSubtarget &STI, - const PPCRegisterBankInfo &RBI); - - bool select(MachineInstr &I) override; - static const char *getName() { return DEBUG_TYPE; } - -private: - /// tblgen generated 'select' implementation that is used as the initial - /// selector for the patterns that do not require complex C++. - bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const; - - const PPCInstrInfo &TII; - const PPCRegisterInfo &TRI; - const PPCRegisterBankInfo &RBI; - -#define GET_GLOBALISEL_PREDICATES_DECL -#include "PPCGenGlobalISel.inc" -#undef GET_GLOBALISEL_PREDICATES_DECL - -#define GET_GLOBALISEL_TEMPORARIES_DECL -#include "PPCGenGlobalISel.inc" -#undef GET_GLOBALISEL_TEMPORARIES_DECL -}; - -} // end anonymous namespace - -#define GET_GLOBALISEL_IMPL -#include "PPCGenGlobalISel.inc" -#undef GET_GLOBALISEL_IMPL - -PPCInstructionSelector::PPCInstructionSelector(const PPCTargetMachine &TM, - const PPCSubtarget &STI, - const PPCRegisterBankInfo &RBI) - : InstructionSelector(), TII(*STI.getInstrInfo()), - TRI(*STI.getRegisterInfo()), RBI(RBI), -#define GET_GLOBALISEL_PREDICATES_INIT -#include "PPCGenGlobalISel.inc" -#undef GET_GLOBALISEL_PREDICATES_INIT -#define GET_GLOBALISEL_TEMPORARIES_INIT -#include "PPCGenGlobalISel.inc" -#undef GET_GLOBALISEL_TEMPORARIES_INIT -{ -} - -bool PPCInstructionSelector::select(MachineInstr &I) { - if (selectImpl(I, *CoverageInfo)) - return true; - return false; -} - -namespace llvm { -InstructionSelector * -createPPCInstructionSelector(const PPCTargetMachine &TM, - const PPCSubtarget &Subtarget, - const PPCRegisterBankInfo &RBI) { - return new PPCInstructionSelector(TM, Subtarget, RBI); -} -} // end namespace llvm +//===- PPCInstructionSelector.cpp --------------------------------*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements the targeting of the InstructionSelector class for +/// PowerPC. +//===----------------------------------------------------------------------===// + +#include "PPCInstrInfo.h" +#include "PPCRegisterBankInfo.h" +#include "PPCSubtarget.h" +#include "PPCTargetMachine.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelector.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/IR/IntrinsicsPowerPC.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "ppc-gisel" + +using namespace llvm; + +namespace { + +#define GET_GLOBALISEL_PREDICATE_BITSET +#include "PPCGenGlobalISel.inc" +#undef GET_GLOBALISEL_PREDICATE_BITSET + +class PPCInstructionSelector : public InstructionSelector { +public: + PPCInstructionSelector(const PPCTargetMachine &TM, const PPCSubtarget &STI, + const PPCRegisterBankInfo &RBI); + + bool select(MachineInstr &I) override; + static const char *getName() { return DEBUG_TYPE; } + +private: + /// tblgen generated 'select' implementation that is used as the initial + /// selector for the patterns that do not require complex C++. + bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const; + + const PPCInstrInfo &TII; + const PPCRegisterInfo &TRI; + const PPCRegisterBankInfo &RBI; + +#define GET_GLOBALISEL_PREDICATES_DECL +#include "PPCGenGlobalISel.inc" +#undef GET_GLOBALISEL_PREDICATES_DECL + +#define GET_GLOBALISEL_TEMPORARIES_DECL +#include "PPCGenGlobalISel.inc" +#undef GET_GLOBALISEL_TEMPORARIES_DECL +}; + +} // end anonymous namespace + +#define GET_GLOBALISEL_IMPL +#include "PPCGenGlobalISel.inc" +#undef GET_GLOBALISEL_IMPL + +PPCInstructionSelector::PPCInstructionSelector(const PPCTargetMachine &TM, + const PPCSubtarget &STI, + const PPCRegisterBankInfo &RBI) + : InstructionSelector(), TII(*STI.getInstrInfo()), + TRI(*STI.getRegisterInfo()), RBI(RBI), +#define GET_GLOBALISEL_PREDICATES_INIT +#include "PPCGenGlobalISel.inc" +#undef GET_GLOBALISEL_PREDICATES_INIT +#define GET_GLOBALISEL_TEMPORARIES_INIT +#include "PPCGenGlobalISel.inc" +#undef GET_GLOBALISEL_TEMPORARIES_INIT +{ +} + +bool PPCInstructionSelector::select(MachineInstr &I) { + if (selectImpl(I, *CoverageInfo)) + return true; + return false; +} + +namespace llvm { +InstructionSelector * +createPPCInstructionSelector(const PPCTargetMachine &TM, + const PPCSubtarget &Subtarget, + const PPCRegisterBankInfo &RBI) { + return new PPCInstructionSelector(TM, Subtarget, RBI); +} +} // end namespace llvm diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/GISel/PPCLegalizerInfo.cpp b/contrib/libs/llvm12/lib/Target/PowerPC/GISel/PPCLegalizerInfo.cpp index 61ea84b9bd..c16bcaea59 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/GISel/PPCLegalizerInfo.cpp +++ b/contrib/libs/llvm12/lib/Target/PowerPC/GISel/PPCLegalizerInfo.cpp @@ -1,20 +1,20 @@ -//===- PPCLegalizerInfo.h ----------------------------------------*- C++ -*-==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// \file -/// This file implements the targeting of the Machinelegalizer class for PowerPC -//===----------------------------------------------------------------------===// - -#include "PPCLegalizerInfo.h" -#include "llvm/Support/Debug.h" - -#define DEBUG_TYPE "ppc-legalinfo" - -using namespace llvm; -using namespace LegalizeActions; - -PPCLegalizerInfo::PPCLegalizerInfo(const PPCSubtarget &ST) { computeTables(); } +//===- PPCLegalizerInfo.h ----------------------------------------*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements the targeting of the Machinelegalizer class for PowerPC +//===----------------------------------------------------------------------===// + +#include "PPCLegalizerInfo.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "ppc-legalinfo" + +using namespace llvm; +using namespace LegalizeActions; + +PPCLegalizerInfo::PPCLegalizerInfo(const PPCSubtarget &ST) { computeTables(); } diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/GISel/PPCLegalizerInfo.h b/contrib/libs/llvm12/lib/Target/PowerPC/GISel/PPCLegalizerInfo.h index d0e513c1b9..c73186d3d0 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/GISel/PPCLegalizerInfo.h +++ b/contrib/libs/llvm12/lib/Target/PowerPC/GISel/PPCLegalizerInfo.h @@ -1,28 +1,28 @@ -//===- PPCLegalizerInfo.h ----------------------------------------*- C++ -*-==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// \file -/// This file declares the targeting of the Machinelegalizer class for PowerPC -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_TARGET_POWERPC_GISEL_PPCMACHINELEGALIZER_H -#define LLVM_LIB_TARGET_POWERPC_GISEL_PPCMACHINELEGALIZER_H - -#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" - -namespace llvm { - -class PPCSubtarget; - -/// This class provides the information for the PowerPC target legalizer for -/// GlobalISel. -class PPCLegalizerInfo : public LegalizerInfo { -public: - PPCLegalizerInfo(const PPCSubtarget &ST); -}; -} // namespace llvm -#endif +//===- PPCLegalizerInfo.h ----------------------------------------*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This file declares the targeting of the Machinelegalizer class for PowerPC +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_POWERPC_GISEL_PPCMACHINELEGALIZER_H +#define LLVM_LIB_TARGET_POWERPC_GISEL_PPCMACHINELEGALIZER_H + +#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" + +namespace llvm { + +class PPCSubtarget; + +/// This class provides the information for the PowerPC target legalizer for +/// GlobalISel. +class PPCLegalizerInfo : public LegalizerInfo { +public: + PPCLegalizerInfo(const PPCSubtarget &ST); +}; +} // namespace llvm +#endif diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp b/contrib/libs/llvm12/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp index 4c4c12da51..6af7932491 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp +++ b/contrib/libs/llvm12/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp @@ -1,27 +1,27 @@ -//===- PPCRegisterBankInfo.cpp --------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// \file -/// This file implements the targeting of the RegisterBankInfo class for -/// PowerPC. -//===----------------------------------------------------------------------===// - -#include "PPCRegisterBankInfo.h" -#include "PPCRegisterInfo.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Support/Debug.h" - -#define DEBUG_TYPE "ppc-reg-bank-info" - -#define GET_TARGET_REGBANK_IMPL -#include "PPCGenRegisterBank.inc" - -using namespace llvm; - -PPCRegisterBankInfo::PPCRegisterBankInfo(const TargetRegisterInfo &TRI) - : PPCGenRegisterBankInfo() {} +//===- PPCRegisterBankInfo.cpp --------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements the targeting of the RegisterBankInfo class for +/// PowerPC. +//===----------------------------------------------------------------------===// + +#include "PPCRegisterBankInfo.h" +#include "PPCRegisterInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "ppc-reg-bank-info" + +#define GET_TARGET_REGBANK_IMPL +#include "PPCGenRegisterBank.inc" + +using namespace llvm; + +PPCRegisterBankInfo::PPCRegisterBankInfo(const TargetRegisterInfo &TRI) + : PPCGenRegisterBankInfo() {} diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.h b/contrib/libs/llvm12/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.h index cdaba6ff01..358d5ed3cf 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.h +++ b/contrib/libs/llvm12/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.h @@ -1,39 +1,39 @@ -//===-- PPCRegisterBankInfo.h -----------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file declares the targeting of the RegisterBankInfo class for PowerPC. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_TARGET_PPC_GISEL_PPCREGISTERBANKINFO_H -#define LLVM_LIB_TARGET_PPC_GISEL_PPCREGISTERBANKINFO_H - -#include "llvm/CodeGen/GlobalISel/RegisterBank.h" -#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" -#include "llvm/CodeGen/TargetRegisterInfo.h" - -#define GET_REGBANK_DECLARATIONS -#include "PPCGenRegisterBank.inc" - -namespace llvm { -class TargetRegisterInfo; - -class PPCGenRegisterBankInfo : public RegisterBankInfo { -protected: -#define GET_TARGET_REGBANK_CLASS -#include "PPCGenRegisterBank.inc" -}; - -class PPCRegisterBankInfo final : public PPCGenRegisterBankInfo { -public: - PPCRegisterBankInfo(const TargetRegisterInfo &TRI); -}; -} // namespace llvm - -#endif +//===-- PPCRegisterBankInfo.h -----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file declares the targeting of the RegisterBankInfo class for PowerPC. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_PPC_GISEL_PPCREGISTERBANKINFO_H +#define LLVM_LIB_TARGET_PPC_GISEL_PPCREGISTERBANKINFO_H + +#include "llvm/CodeGen/GlobalISel/RegisterBank.h" +#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" + +#define GET_REGBANK_DECLARATIONS +#include "PPCGenRegisterBank.inc" + +namespace llvm { +class TargetRegisterInfo; + +class PPCGenRegisterBankInfo : public RegisterBankInfo { +protected: +#define GET_TARGET_REGBANK_CLASS +#include "PPCGenRegisterBank.inc" +}; + +class PPCRegisterBankInfo final : public PPCGenRegisterBankInfo { +public: + PPCRegisterBankInfo(const TargetRegisterInfo &TRI); +}; +} // namespace llvm + +#endif diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/GISel/PPCRegisterBanks.td b/contrib/libs/llvm12/lib/Target/PowerPC/GISel/PPCRegisterBanks.td index c07d1531a6..0e8a4b7061 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/GISel/PPCRegisterBanks.td +++ b/contrib/libs/llvm12/lib/Target/PowerPC/GISel/PPCRegisterBanks.td @@ -1,15 +1,15 @@ -//===-- PPCRegisterBanks.td - Describe the PPC Banks -------*- tablegen -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// Define the PPC register banks used for GlobalISel. -/// -//===----------------------------------------------------------------------===// - -/// General Purpose Registers -def GPRRegBank : RegisterBank<"GPR", [G8RC]>; +//===-- PPCRegisterBanks.td - Describe the PPC Banks -------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Define the PPC register banks used for GlobalISel. +/// +//===----------------------------------------------------------------------===// + +/// General Purpose Registers +def GPRRegBank : RegisterBank<"GPR", [G8RC]>; diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index c8f12555e9..72401668c8 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -46,7 +46,7 @@ static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) { case PPC::fixup_ppc_half16ds: return Value & 0xfffc; case PPC::fixup_ppc_pcrel34: - case PPC::fixup_ppc_imm34: + case PPC::fixup_ppc_imm34: return Value & 0x3ffffffff; } } @@ -69,7 +69,7 @@ static unsigned getFixupKindNumBytes(unsigned Kind) { case PPC::fixup_ppc_br24_notoc: return 4; case PPC::fixup_ppc_pcrel34: - case PPC::fixup_ppc_imm34: + case PPC::fixup_ppc_imm34: case FK_Data_8: return 8; case PPC::fixup_ppc_nofixup: @@ -102,7 +102,7 @@ public: { "fixup_ppc_half16", 0, 16, 0 }, { "fixup_ppc_half16ds", 0, 14, 0 }, { "fixup_ppc_pcrel34", 0, 34, MCFixupKindInfo::FKF_IsPCRel }, - { "fixup_ppc_imm34", 0, 34, 0 }, + { "fixup_ppc_imm34", 0, 34, 0 }, { "fixup_ppc_nofixup", 0, 0, 0 } }; const static MCFixupKindInfo InfosLE[PPC::NumTargetFixupKinds] = { @@ -115,7 +115,7 @@ public: { "fixup_ppc_half16", 0, 16, 0 }, { "fixup_ppc_half16ds", 2, 14, 0 }, { "fixup_ppc_pcrel34", 0, 34, MCFixupKindInfo::FKF_IsPCRel }, - { "fixup_ppc_imm34", 0, 34, 0 }, + { "fixup_ppc_imm34", 0, 34, 0 }, { "fixup_ppc_nofixup", 0, 0, 0 } }; diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp index 594b3b09fc..94ef7b4543 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ b/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -138,15 +138,15 @@ unsigned PPCELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target, case MCSymbolRefExpr::VK_PPC_GOT_PCREL: Type = ELF::R_PPC64_GOT_PCREL34; break; - case MCSymbolRefExpr::VK_PPC_GOT_TLSGD_PCREL: - Type = ELF::R_PPC64_GOT_TLSGD_PCREL34; - break; - case MCSymbolRefExpr::VK_PPC_GOT_TLSLD_PCREL: - Type = ELF::R_PPC64_GOT_TLSLD_PCREL34; - break; - case MCSymbolRefExpr::VK_PPC_GOT_TPREL_PCREL: - Type = ELF::R_PPC64_GOT_TPREL_PCREL34; - break; + case MCSymbolRefExpr::VK_PPC_GOT_TLSGD_PCREL: + Type = ELF::R_PPC64_GOT_TLSGD_PCREL34; + break; + case MCSymbolRefExpr::VK_PPC_GOT_TLSLD_PCREL: + Type = ELF::R_PPC64_GOT_TLSLD_PCREL34; + break; + case MCSymbolRefExpr::VK_PPC_GOT_TPREL_PCREL: + Type = ELF::R_PPC64_GOT_TPREL_PCREL34; + break; } break; case FK_Data_4: @@ -416,23 +416,23 @@ unsigned PPCELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target, else Type = ELF::R_PPC_TLS; break; - case MCSymbolRefExpr::VK_PPC_TLS_PCREL: - Type = ELF::R_PPC64_TLS; - break; + case MCSymbolRefExpr::VK_PPC_TLS_PCREL: + Type = ELF::R_PPC64_TLS; + break; + } + break; + case PPC::fixup_ppc_imm34: + switch (Modifier) { + default: + report_fatal_error("Unsupported Modifier for fixup_ppc_imm34."); + case MCSymbolRefExpr::VK_DTPREL: + Type = ELF::R_PPC64_DTPREL34; + break; + case MCSymbolRefExpr::VK_TPREL: + Type = ELF::R_PPC64_TPREL34; + break; } break; - case PPC::fixup_ppc_imm34: - switch (Modifier) { - default: - report_fatal_error("Unsupported Modifier for fixup_ppc_imm34."); - case MCSymbolRefExpr::VK_DTPREL: - Type = ELF::R_PPC64_DTPREL34; - break; - case MCSymbolRefExpr::VK_TPREL: - Type = ELF::R_PPC64_TPREL34; - break; - } - break; case FK_Data_8: switch (Modifier) { default: llvm_unreachable("Unsupported Modifier"); diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp b/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp index 27784d7e58..386d592660 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp +++ b/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp @@ -20,7 +20,7 @@ #include "PPCELFStreamer.h" -#include "PPCFixupKinds.h" +#include "PPCFixupKinds.h" #include "PPCInstrInfo.h" #include "PPCMCCodeEmitter.h" #include "llvm/BinaryFormat/ELF.h" @@ -90,33 +90,33 @@ void PPCELFStreamer::emitInstruction(const MCInst &Inst, PPCMCCodeEmitter *Emitter = static_cast<PPCMCCodeEmitter*>(getAssembler().getEmitterPtr()); - // If the instruction is a part of the GOT to PC-Rel link time optimization - // instruction pair, return a value, otherwise return None. A true returned - // value means the instruction is the PLDpc and a false value means it is - // the user instruction. - Optional<bool> IsPartOfGOTToPCRelPair = isPartOfGOTToPCRelPair(Inst, STI); - - // User of the GOT-indirect address. - // For example, the load that will get the relocation as follows: - // .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8) - // lwa 3, 4(3) - if (IsPartOfGOTToPCRelPair.hasValue() && !IsPartOfGOTToPCRelPair.getValue()) - emitGOTToPCRelReloc(Inst); - + // If the instruction is a part of the GOT to PC-Rel link time optimization + // instruction pair, return a value, otherwise return None. A true returned + // value means the instruction is the PLDpc and a false value means it is + // the user instruction. + Optional<bool> IsPartOfGOTToPCRelPair = isPartOfGOTToPCRelPair(Inst, STI); + + // User of the GOT-indirect address. + // For example, the load that will get the relocation as follows: + // .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8) + // lwa 3, 4(3) + if (IsPartOfGOTToPCRelPair.hasValue() && !IsPartOfGOTToPCRelPair.getValue()) + emitGOTToPCRelReloc(Inst); + // Special handling is only for prefixed instructions. if (!Emitter->isPrefixedInstruction(Inst)) { MCELFStreamer::emitInstruction(Inst, STI); return; } emitPrefixedInstruction(Inst, STI); - - // Producer of the GOT-indirect address. - // For example, the prefixed load from the got that will get the label as - // follows: - // pld 3, vec@got@pcrel(0), 1 - // .Lpcrel1: - if (IsPartOfGOTToPCRelPair.hasValue() && IsPartOfGOTToPCRelPair.getValue()) - emitGOTToPCRelLabel(Inst); + + // Producer of the GOT-indirect address. + // For example, the prefixed load from the got that will get the label as + // follows: + // pld 3, vec@got@pcrel(0), 1 + // .Lpcrel1: + if (IsPartOfGOTToPCRelPair.hasValue() && IsPartOfGOTToPCRelPair.getValue()) + emitGOTToPCRelLabel(Inst); } void PPCELFStreamer::emitLabel(MCSymbol *Symbol, SMLoc Loc) { @@ -125,102 +125,102 @@ void PPCELFStreamer::emitLabel(MCSymbol *Symbol, SMLoc Loc) { MCELFStreamer::emitLabel(Symbol); } -// This linker time GOT PC Relative optimization relocation will look like this: -// pld <reg> symbol@got@pcrel -// <Label###>: -// .reloc Label###-8,R_PPC64_PCREL_OPT,.-(Label###-8) -// load <loadedreg>, 0(<reg>) -// The reason we place the label after the PLDpc instruction is that there -// may be an alignment nop before it since prefixed instructions must not -// cross a 64-byte boundary (please see -// PPCELFStreamer::emitPrefixedInstruction()). When referring to the -// label, we subtract the width of a prefixed instruction (8 bytes) to ensure -// we refer to the PLDpc. -void PPCELFStreamer::emitGOTToPCRelReloc(const MCInst &Inst) { - // Get the last operand which contains the symbol. - const MCOperand &Operand = Inst.getOperand(Inst.getNumOperands() - 1); - assert(Operand.isExpr() && "Expecting an MCExpr."); - // Cast the last operand to MCSymbolRefExpr to get the symbol. - const MCExpr *Expr = Operand.getExpr(); - const MCSymbolRefExpr *SymExpr = static_cast<const MCSymbolRefExpr *>(Expr); - assert(SymExpr->getKind() == MCSymbolRefExpr::VK_PPC_PCREL_OPT && - "Expecting a symbol of type VK_PPC_PCREL_OPT"); - MCSymbol *LabelSym = - getContext().getOrCreateSymbol(SymExpr->getSymbol().getName()); - const MCExpr *LabelExpr = MCSymbolRefExpr::create(LabelSym, getContext()); - const MCExpr *Eight = MCConstantExpr::create(8, getContext()); - // SubExpr is just Label###-8 - const MCExpr *SubExpr = - MCBinaryExpr::createSub(LabelExpr, Eight, getContext()); - MCSymbol *CurrentLocation = getContext().createTempSymbol(); - const MCExpr *CurrentLocationExpr = - MCSymbolRefExpr::create(CurrentLocation, getContext()); - // SubExpr2 is .-(Label###-8) - const MCExpr *SubExpr2 = - MCBinaryExpr::createSub(CurrentLocationExpr, SubExpr, getContext()); - - MCDataFragment *DF = static_cast<MCDataFragment *>(LabelSym->getFragment()); - assert(DF && "Expecting a valid data fragment."); - MCFixupKind FixupKind = static_cast<MCFixupKind>(FirstLiteralRelocationKind + - ELF::R_PPC64_PCREL_OPT); - DF->getFixups().push_back( - MCFixup::create(LabelSym->getOffset() - 8, SubExpr2, - FixupKind, Inst.getLoc())); - emitLabel(CurrentLocation, Inst.getLoc()); -} - -// Emit the label that immediately follows the PLDpc for a link time GOT PC Rel -// optimization. -void PPCELFStreamer::emitGOTToPCRelLabel(const MCInst &Inst) { - // Get the last operand which contains the symbol. - const MCOperand &Operand = Inst.getOperand(Inst.getNumOperands() - 1); - assert(Operand.isExpr() && "Expecting an MCExpr."); - // Cast the last operand to MCSymbolRefExpr to get the symbol. - const MCExpr *Expr = Operand.getExpr(); - const MCSymbolRefExpr *SymExpr = static_cast<const MCSymbolRefExpr *>(Expr); - assert(SymExpr->getKind() == MCSymbolRefExpr::VK_PPC_PCREL_OPT && - "Expecting a symbol of type VK_PPC_PCREL_OPT"); - MCSymbol *LabelSym = - getContext().getOrCreateSymbol(SymExpr->getSymbol().getName()); - emitLabel(LabelSym, Inst.getLoc()); -} - -// This funciton checks if the parameter Inst is part of the setup for a link -// time GOT PC Relative optimization. For example in this situation: -// <MCInst PLDpc <MCOperand Reg:282> <MCOperand Expr:(glob_double@got@pcrel)> -// <MCOperand Imm:0> <MCOperand Expr:(.Lpcrel@<<invalid>>)>> -// <MCInst SOME_LOAD <MCOperand Reg:22> <MCOperand Imm:0> <MCOperand Reg:282> -// <MCOperand Expr:(.Lpcrel@<<invalid>>)>> -// The above is a pair of such instructions and this function will not return -// None for either one of them. In both cases we are looking for the last -// operand <MCOperand Expr:(.Lpcrel@<<invalid>>)> which needs to be an MCExpr -// and has the flag MCSymbolRefExpr::VK_PPC_PCREL_OPT. After that we just look -// at the opcode and in the case of PLDpc we will return true. For the load -// (or store) this function will return false indicating it has found the second -// instruciton in the pair. -Optional<bool> llvm::isPartOfGOTToPCRelPair(const MCInst &Inst, - const MCSubtargetInfo &STI) { - // Need at least two operands. - if (Inst.getNumOperands() < 2) - return None; - - unsigned LastOp = Inst.getNumOperands() - 1; - // The last operand needs to be an MCExpr and it needs to have a variant kind - // of VK_PPC_PCREL_OPT. If it does not satisfy these conditions it is not a - // link time GOT PC Rel opt instruction and we can ignore it and return None. - const MCOperand &Operand = Inst.getOperand(LastOp); - if (!Operand.isExpr()) - return None; - - // Check for the variant kind VK_PPC_PCREL_OPT in this expression. - const MCExpr *Expr = Operand.getExpr(); - const MCSymbolRefExpr *SymExpr = static_cast<const MCSymbolRefExpr *>(Expr); - if (!SymExpr || SymExpr->getKind() != MCSymbolRefExpr::VK_PPC_PCREL_OPT) - return None; - - return (Inst.getOpcode() == PPC::PLDpc); -} - +// This linker time GOT PC Relative optimization relocation will look like this: +// pld <reg> symbol@got@pcrel +// <Label###>: +// .reloc Label###-8,R_PPC64_PCREL_OPT,.-(Label###-8) +// load <loadedreg>, 0(<reg>) +// The reason we place the label after the PLDpc instruction is that there +// may be an alignment nop before it since prefixed instructions must not +// cross a 64-byte boundary (please see +// PPCELFStreamer::emitPrefixedInstruction()). When referring to the +// label, we subtract the width of a prefixed instruction (8 bytes) to ensure +// we refer to the PLDpc. +void PPCELFStreamer::emitGOTToPCRelReloc(const MCInst &Inst) { + // Get the last operand which contains the symbol. + const MCOperand &Operand = Inst.getOperand(Inst.getNumOperands() - 1); + assert(Operand.isExpr() && "Expecting an MCExpr."); + // Cast the last operand to MCSymbolRefExpr to get the symbol. + const MCExpr *Expr = Operand.getExpr(); + const MCSymbolRefExpr *SymExpr = static_cast<const MCSymbolRefExpr *>(Expr); + assert(SymExpr->getKind() == MCSymbolRefExpr::VK_PPC_PCREL_OPT && + "Expecting a symbol of type VK_PPC_PCREL_OPT"); + MCSymbol *LabelSym = + getContext().getOrCreateSymbol(SymExpr->getSymbol().getName()); + const MCExpr *LabelExpr = MCSymbolRefExpr::create(LabelSym, getContext()); + const MCExpr *Eight = MCConstantExpr::create(8, getContext()); + // SubExpr is just Label###-8 + const MCExpr *SubExpr = + MCBinaryExpr::createSub(LabelExpr, Eight, getContext()); + MCSymbol *CurrentLocation = getContext().createTempSymbol(); + const MCExpr *CurrentLocationExpr = + MCSymbolRefExpr::create(CurrentLocation, getContext()); + // SubExpr2 is .-(Label###-8) + const MCExpr *SubExpr2 = + MCBinaryExpr::createSub(CurrentLocationExpr, SubExpr, getContext()); + + MCDataFragment *DF = static_cast<MCDataFragment *>(LabelSym->getFragment()); + assert(DF && "Expecting a valid data fragment."); + MCFixupKind FixupKind = static_cast<MCFixupKind>(FirstLiteralRelocationKind + + ELF::R_PPC64_PCREL_OPT); + DF->getFixups().push_back( + MCFixup::create(LabelSym->getOffset() - 8, SubExpr2, + FixupKind, Inst.getLoc())); + emitLabel(CurrentLocation, Inst.getLoc()); +} + +// Emit the label that immediately follows the PLDpc for a link time GOT PC Rel +// optimization. +void PPCELFStreamer::emitGOTToPCRelLabel(const MCInst &Inst) { + // Get the last operand which contains the symbol. + const MCOperand &Operand = Inst.getOperand(Inst.getNumOperands() - 1); + assert(Operand.isExpr() && "Expecting an MCExpr."); + // Cast the last operand to MCSymbolRefExpr to get the symbol. + const MCExpr *Expr = Operand.getExpr(); + const MCSymbolRefExpr *SymExpr = static_cast<const MCSymbolRefExpr *>(Expr); + assert(SymExpr->getKind() == MCSymbolRefExpr::VK_PPC_PCREL_OPT && + "Expecting a symbol of type VK_PPC_PCREL_OPT"); + MCSymbol *LabelSym = + getContext().getOrCreateSymbol(SymExpr->getSymbol().getName()); + emitLabel(LabelSym, Inst.getLoc()); +} + +// This funciton checks if the parameter Inst is part of the setup for a link +// time GOT PC Relative optimization. For example in this situation: +// <MCInst PLDpc <MCOperand Reg:282> <MCOperand Expr:(glob_double@got@pcrel)> +// <MCOperand Imm:0> <MCOperand Expr:(.Lpcrel@<<invalid>>)>> +// <MCInst SOME_LOAD <MCOperand Reg:22> <MCOperand Imm:0> <MCOperand Reg:282> +// <MCOperand Expr:(.Lpcrel@<<invalid>>)>> +// The above is a pair of such instructions and this function will not return +// None for either one of them. In both cases we are looking for the last +// operand <MCOperand Expr:(.Lpcrel@<<invalid>>)> which needs to be an MCExpr +// and has the flag MCSymbolRefExpr::VK_PPC_PCREL_OPT. After that we just look +// at the opcode and in the case of PLDpc we will return true. For the load +// (or store) this function will return false indicating it has found the second +// instruciton in the pair. +Optional<bool> llvm::isPartOfGOTToPCRelPair(const MCInst &Inst, + const MCSubtargetInfo &STI) { + // Need at least two operands. + if (Inst.getNumOperands() < 2) + return None; + + unsigned LastOp = Inst.getNumOperands() - 1; + // The last operand needs to be an MCExpr and it needs to have a variant kind + // of VK_PPC_PCREL_OPT. If it does not satisfy these conditions it is not a + // link time GOT PC Rel opt instruction and we can ignore it and return None. + const MCOperand &Operand = Inst.getOperand(LastOp); + if (!Operand.isExpr()) + return None; + + // Check for the variant kind VK_PPC_PCREL_OPT in this expression. + const MCExpr *Expr = Operand.getExpr(); + const MCSymbolRefExpr *SymExpr = static_cast<const MCSymbolRefExpr *>(Expr); + if (!SymExpr || SymExpr->getKind() != MCSymbolRefExpr::VK_PPC_PCREL_OPT) + return None; + + return (Inst.getOpcode() == PPC::PLDpc); +} + MCELFStreamer *llvm::createPPCELFStreamer( MCContext &Context, std::unique_ptr<MCAsmBackend> MAB, std::unique_ptr<MCObjectWriter> OW, diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h b/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h index c30ca4b187..f44200104f 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h +++ b/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h @@ -43,15 +43,15 @@ public: void emitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override; private: void emitPrefixedInstruction(const MCInst &Inst, const MCSubtargetInfo &STI); - void emitGOTToPCRelReloc(const MCInst &Inst); - void emitGOTToPCRelLabel(const MCInst &Inst); + void emitGOTToPCRelReloc(const MCInst &Inst); + void emitGOTToPCRelLabel(const MCInst &Inst); }; -// Check if the instruction Inst is part of a pair of instructions that make up -// a link time GOT PC Rel optimization. -Optional<bool> isPartOfGOTToPCRelPair(const MCInst &Inst, - const MCSubtargetInfo &STI); - +// Check if the instruction Inst is part of a pair of instructions that make up +// a link time GOT PC Rel optimization. +Optional<bool> isPartOfGOTToPCRelPair(const MCInst &Inst, + const MCSubtargetInfo &STI); + MCELFStreamer *createPPCELFStreamer(MCContext &Context, std::unique_ptr<MCAsmBackend> MAB, std::unique_ptr<MCObjectWriter> OW, diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h index 018aa1b558..73292f7b79 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h +++ b/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h @@ -43,9 +43,9 @@ enum Fixups { // A 34-bit fixup corresponding to PC-relative paddi. fixup_ppc_pcrel34, - // A 34-bit fixup corresponding to Non-PC-relative paddi. - fixup_ppc_imm34, - + // A 34-bit fixup corresponding to Non-PC-relative paddi. + fixup_ppc_imm34, + /// Not a true fixup, but ties a symbol to a call to __tls_get_addr for the /// TLS general and local dynamic models, or inserts the thread-pointer /// register number. diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp b/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp index bd64f007df..a291a34d4c 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp +++ b/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp @@ -71,45 +71,45 @@ void PPCInstPrinter::printInst(const MCInst *MI, uint64_t Address, "reference expression if it is an expression at all."); O << "\taddis "; - printOperand(MI, 0, STI, O); + printOperand(MI, 0, STI, O); O << ", "; - printOperand(MI, 2, STI, O); + printOperand(MI, 2, STI, O); O << "("; - printOperand(MI, 1, STI, O); + printOperand(MI, 1, STI, O); O << ")"; return; } - // Check if the last operand is an expression with the variant kind - // VK_PPC_PCREL_OPT. If this is the case then this is a linker optimization - // relocation and the .reloc directive needs to be added. - unsigned LastOp = MI->getNumOperands() - 1; - if (MI->getNumOperands() > 1) { - const MCOperand &Operand = MI->getOperand(LastOp); - if (Operand.isExpr()) { - const MCExpr *Expr = Operand.getExpr(); - const MCSymbolRefExpr *SymExpr = - static_cast<const MCSymbolRefExpr *>(Expr); - - if (SymExpr && SymExpr->getKind() == MCSymbolRefExpr::VK_PPC_PCREL_OPT) { - const MCSymbol &Symbol = SymExpr->getSymbol(); - if (MI->getOpcode() == PPC::PLDpc) { - printInstruction(MI, Address, STI, O); - O << "\n"; - Symbol.print(O, &MAI); - O << ":"; - return; - } else { - O << "\t.reloc "; - Symbol.print(O, &MAI); - O << "-8,R_PPC64_PCREL_OPT,.-("; - Symbol.print(O, &MAI); - O << "-8)\n"; - } - } - } - } - + // Check if the last operand is an expression with the variant kind + // VK_PPC_PCREL_OPT. If this is the case then this is a linker optimization + // relocation and the .reloc directive needs to be added. + unsigned LastOp = MI->getNumOperands() - 1; + if (MI->getNumOperands() > 1) { + const MCOperand &Operand = MI->getOperand(LastOp); + if (Operand.isExpr()) { + const MCExpr *Expr = Operand.getExpr(); + const MCSymbolRefExpr *SymExpr = + static_cast<const MCSymbolRefExpr *>(Expr); + + if (SymExpr && SymExpr->getKind() == MCSymbolRefExpr::VK_PPC_PCREL_OPT) { + const MCSymbol &Symbol = SymExpr->getSymbol(); + if (MI->getOpcode() == PPC::PLDpc) { + printInstruction(MI, Address, STI, O); + O << "\n"; + Symbol.print(O, &MAI); + O << ":"; + return; + } else { + O << "\t.reloc "; + Symbol.print(O, &MAI); + O << "-8,R_PPC64_PCREL_OPT,.-("; + Symbol.print(O, &MAI); + O << "-8)\n"; + } + } + } + } + // Check for slwi/srwi mnemonics. if (MI->getOpcode() == PPC::RLWINM) { unsigned char SH = MI->getOperand(2).getImm(); @@ -124,9 +124,9 @@ void PPCInstPrinter::printInst(const MCInst *MI, uint64_t Address, SH = 32-SH; } if (useSubstituteMnemonic) { - printOperand(MI, 0, STI, O); + printOperand(MI, 0, STI, O); O << ", "; - printOperand(MI, 1, STI, O); + printOperand(MI, 1, STI, O); O << ", " << (unsigned int)SH; printAnnotation(O, Annot); @@ -141,9 +141,9 @@ void PPCInstPrinter::printInst(const MCInst *MI, uint64_t Address, // rldicr RA, RS, SH, 63-SH == sldi RA, RS, SH if (63-SH == ME) { O << "\tsldi "; - printOperand(MI, 0, STI, O); + printOperand(MI, 0, STI, O); O << ", "; - printOperand(MI, 1, STI, O); + printOperand(MI, 1, STI, O); O << ", " << (unsigned int)SH; printAnnotation(O, Annot); return; @@ -171,9 +171,9 @@ void PPCInstPrinter::printInst(const MCInst *MI, uint64_t Address, if (IsBookE && TH != 0 && TH != 16) O << (unsigned int) TH << ", "; - printOperand(MI, 1, STI, O); + printOperand(MI, 1, STI, O); O << ", "; - printOperand(MI, 2, STI, O); + printOperand(MI, 2, STI, O); if (!IsBookE && TH != 0 && TH != 16) O << ", " << (unsigned int) TH; @@ -184,36 +184,36 @@ void PPCInstPrinter::printInst(const MCInst *MI, uint64_t Address, if (MI->getOpcode() == PPC::DCBF) { unsigned char L = MI->getOperand(0).getImm(); - if (!L || L == 1 || L == 3 || L == 4 || L == 6) { - O << "\tdcb"; - if (L != 6) - O << "f"; - if (L == 1) + if (!L || L == 1 || L == 3 || L == 4 || L == 6) { + O << "\tdcb"; + if (L != 6) + O << "f"; + if (L == 1) O << "l"; if (L == 3) - O << "lp"; - if (L == 4) - O << "ps"; - if (L == 6) - O << "stps"; + O << "lp"; + if (L == 4) + O << "ps"; + if (L == 6) + O << "stps"; O << " "; - printOperand(MI, 1, STI, O); + printOperand(MI, 1, STI, O); O << ", "; - printOperand(MI, 2, STI, O); + printOperand(MI, 2, STI, O); printAnnotation(O, Annot); return; } } - if (!printAliasInstr(MI, Address, STI, O)) - printInstruction(MI, Address, STI, O); + if (!printAliasInstr(MI, Address, STI, O)) + printInstruction(MI, Address, STI, O); printAnnotation(O, Annot); } void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, + const MCSubtargetInfo &STI, raw_ostream &O, const char *Modifier) { unsigned Code = MI->getOperand(OpNo).getImm(); @@ -307,11 +307,11 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo, assert(StringRef(Modifier) == "reg" && "Need to specify 'cc', 'pm' or 'reg' as predicate op modifier!"); - printOperand(MI, OpNo + 1, STI, O); + printOperand(MI, OpNo + 1, STI, O); } void PPCInstPrinter::printATBitsAsHint(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned Code = MI->getOperand(OpNo).getImm(); if (Code == 2) @@ -321,7 +321,7 @@ void PPCInstPrinter::printATBitsAsHint(const MCInst *MI, unsigned OpNo, } void PPCInstPrinter::printU1ImmOperand(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned int Value = MI->getOperand(OpNo).getImm(); assert(Value <= 1 && "Invalid u1imm argument!"); @@ -329,7 +329,7 @@ void PPCInstPrinter::printU1ImmOperand(const MCInst *MI, unsigned OpNo, } void PPCInstPrinter::printU2ImmOperand(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned int Value = MI->getOperand(OpNo).getImm(); assert(Value <= 3 && "Invalid u2imm argument!"); @@ -337,7 +337,7 @@ void PPCInstPrinter::printU2ImmOperand(const MCInst *MI, unsigned OpNo, } void PPCInstPrinter::printU3ImmOperand(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned int Value = MI->getOperand(OpNo).getImm(); assert(Value <= 8 && "Invalid u3imm argument!"); @@ -345,7 +345,7 @@ void PPCInstPrinter::printU3ImmOperand(const MCInst *MI, unsigned OpNo, } void PPCInstPrinter::printU4ImmOperand(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned int Value = MI->getOperand(OpNo).getImm(); assert(Value <= 15 && "Invalid u4imm argument!"); @@ -353,7 +353,7 @@ void PPCInstPrinter::printU4ImmOperand(const MCInst *MI, unsigned OpNo, } void PPCInstPrinter::printS5ImmOperand(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, + const MCSubtargetInfo &STI, raw_ostream &O) { int Value = MI->getOperand(OpNo).getImm(); Value = SignExtend32<5>(Value); @@ -361,7 +361,7 @@ void PPCInstPrinter::printS5ImmOperand(const MCInst *MI, unsigned OpNo, } void PPCInstPrinter::printImmZeroOperand(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned int Value = MI->getOperand(OpNo).getImm(); assert(Value == 0 && "Operand must be zero"); @@ -369,7 +369,7 @@ void PPCInstPrinter::printImmZeroOperand(const MCInst *MI, unsigned OpNo, } void PPCInstPrinter::printU5ImmOperand(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned int Value = MI->getOperand(OpNo).getImm(); assert(Value <= 31 && "Invalid u5imm argument!"); @@ -377,7 +377,7 @@ void PPCInstPrinter::printU5ImmOperand(const MCInst *MI, unsigned OpNo, } void PPCInstPrinter::printU6ImmOperand(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned int Value = MI->getOperand(OpNo).getImm(); assert(Value <= 63 && "Invalid u6imm argument!"); @@ -385,7 +385,7 @@ void PPCInstPrinter::printU6ImmOperand(const MCInst *MI, unsigned OpNo, } void PPCInstPrinter::printU7ImmOperand(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned int Value = MI->getOperand(OpNo).getImm(); assert(Value <= 127 && "Invalid u7imm argument!"); @@ -396,14 +396,14 @@ void PPCInstPrinter::printU7ImmOperand(const MCInst *MI, unsigned OpNo, // of XXSPLTIB which are unsigned. So we simply truncate to 8 bits and // print as unsigned. void PPCInstPrinter::printU8ImmOperand(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned char Value = MI->getOperand(OpNo).getImm(); O << (unsigned int)Value; } void PPCInstPrinter::printU10ImmOperand(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned short Value = MI->getOperand(OpNo).getImm(); assert(Value <= 1023 && "Invalid u10imm argument!"); @@ -411,7 +411,7 @@ void PPCInstPrinter::printU10ImmOperand(const MCInst *MI, unsigned OpNo, } void PPCInstPrinter::printU12ImmOperand(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned short Value = MI->getOperand(OpNo).getImm(); assert(Value <= 4095 && "Invalid u12imm argument!"); @@ -419,16 +419,16 @@ void PPCInstPrinter::printU12ImmOperand(const MCInst *MI, unsigned OpNo, } void PPCInstPrinter::printS16ImmOperand(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, + const MCSubtargetInfo &STI, raw_ostream &O) { if (MI->getOperand(OpNo).isImm()) O << (short)MI->getOperand(OpNo).getImm(); else - printOperand(MI, OpNo, STI, O); + printOperand(MI, OpNo, STI, O); } void PPCInstPrinter::printS34ImmOperand(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, + const MCSubtargetInfo &STI, raw_ostream &O) { if (MI->getOperand(OpNo).isImm()) { long long Value = MI->getOperand(OpNo).getImm(); @@ -436,24 +436,24 @@ void PPCInstPrinter::printS34ImmOperand(const MCInst *MI, unsigned OpNo, O << (long long)Value; } else - printOperand(MI, OpNo, STI, O); + printOperand(MI, OpNo, STI, O); } void PPCInstPrinter::printU16ImmOperand(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, + const MCSubtargetInfo &STI, raw_ostream &O) { if (MI->getOperand(OpNo).isImm()) O << (unsigned short)MI->getOperand(OpNo).getImm(); else - printOperand(MI, OpNo, STI, O); + printOperand(MI, OpNo, STI, O); } void PPCInstPrinter::printBranchOperand(const MCInst *MI, uint64_t Address, - unsigned OpNo, - const MCSubtargetInfo &STI, - raw_ostream &O) { + unsigned OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { if (!MI->getOperand(OpNo).isImm()) - return printOperand(MI, OpNo, STI, O); + return printOperand(MI, OpNo, STI, O); int32_t Imm = SignExtend32<32>((unsigned)MI->getOperand(OpNo).getImm() << 2); if (PrintBranchImmAsAddress) { uint64_t Target = Address + Imm; @@ -476,16 +476,16 @@ void PPCInstPrinter::printBranchOperand(const MCInst *MI, uint64_t Address, } void PPCInstPrinter::printAbsBranchOperand(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, + const MCSubtargetInfo &STI, raw_ostream &O) { if (!MI->getOperand(OpNo).isImm()) - return printOperand(MI, OpNo, STI, O); + return printOperand(MI, OpNo, STI, O); O << SignExtend32<32>((unsigned)MI->getOperand(OpNo).getImm() << 2); } void PPCInstPrinter::printcrbitm(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, raw_ostream &O) { + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned CCReg = MI->getOperand(OpNo).getReg(); unsigned RegNo; switch (CCReg) { @@ -503,37 +503,37 @@ void PPCInstPrinter::printcrbitm(const MCInst *MI, unsigned OpNo, } void PPCInstPrinter::printMemRegImm(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, + const MCSubtargetInfo &STI, raw_ostream &O) { - printS16ImmOperand(MI, OpNo, STI, O); + printS16ImmOperand(MI, OpNo, STI, O); O << '('; if (MI->getOperand(OpNo+1).getReg() == PPC::R0) O << "0"; else - printOperand(MI, OpNo + 1, STI, O); + printOperand(MI, OpNo + 1, STI, O); O << ')'; } void PPCInstPrinter::printMemRegImm34PCRel(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, + const MCSubtargetInfo &STI, raw_ostream &O) { - printS34ImmOperand(MI, OpNo, STI, O); + printS34ImmOperand(MI, OpNo, STI, O); O << '('; - printImmZeroOperand(MI, OpNo + 1, STI, O); + printImmZeroOperand(MI, OpNo + 1, STI, O); O << ')'; } void PPCInstPrinter::printMemRegImm34(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, - raw_ostream &O) { - printS34ImmOperand(MI, OpNo, STI, O); + const MCSubtargetInfo &STI, + raw_ostream &O) { + printS34ImmOperand(MI, OpNo, STI, O); O << '('; - printOperand(MI, OpNo + 1, STI, O); + printOperand(MI, OpNo + 1, STI, O); O << ')'; } void PPCInstPrinter::printMemRegReg(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, + const MCSubtargetInfo &STI, raw_ostream &O) { // When used as the base register, r0 reads constant zero rather than // the value contained in the register. For this reason, the darwin @@ -541,13 +541,13 @@ void PPCInstPrinter::printMemRegReg(const MCInst *MI, unsigned OpNo, if (MI->getOperand(OpNo).getReg() == PPC::R0) O << "0"; else - printOperand(MI, OpNo, STI, O); + printOperand(MI, OpNo, STI, O); O << ", "; - printOperand(MI, OpNo + 1, STI, O); + printOperand(MI, OpNo + 1, STI, O); } void PPCInstPrinter::printTLSCall(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, raw_ostream &O) { + const MCSubtargetInfo &STI, raw_ostream &O) { // On PPC64, VariantKind is VK_None, but on PPC32, it's VK_PLT, and it must // come at the _end_ of the expression. const MCOperand &Op = MI->getOperand(OpNo); @@ -560,17 +560,17 @@ void PPCInstPrinter::printTLSCall(const MCInst *MI, unsigned OpNo, RefExp = cast<MCSymbolRefExpr>(Op.getExpr()); O << RefExp->getSymbol().getName(); - // The variant kind VK_PPC_NOTOC needs to be handled as a special case - // because we do not want the assembly to print out the @notoc at the - // end like __tls_get_addr(x@tlsgd)@notoc. Instead we want it to look - // like __tls_get_addr@notoc(x@tlsgd). - if (RefExp->getKind() == MCSymbolRefExpr::VK_PPC_NOTOC) - O << '@' << MCSymbolRefExpr::getVariantKindName(RefExp->getKind()); + // The variant kind VK_PPC_NOTOC needs to be handled as a special case + // because we do not want the assembly to print out the @notoc at the + // end like __tls_get_addr(x@tlsgd)@notoc. Instead we want it to look + // like __tls_get_addr@notoc(x@tlsgd). + if (RefExp->getKind() == MCSymbolRefExpr::VK_PPC_NOTOC) + O << '@' << MCSymbolRefExpr::getVariantKindName(RefExp->getKind()); O << '('; - printOperand(MI, OpNo + 1, STI, O); + printOperand(MI, OpNo + 1, STI, O); O << ')'; - if (RefExp->getKind() != MCSymbolRefExpr::VK_None && - RefExp->getKind() != MCSymbolRefExpr::VK_PPC_NOTOC) + if (RefExp->getKind() != MCSymbolRefExpr::VK_None && + RefExp->getKind() != MCSymbolRefExpr::VK_PPC_NOTOC) O << '@' << MCSymbolRefExpr::getVariantKindName(RefExp->getKind()); if (ConstExp != nullptr) O << '+' << ConstExp->getValue(); @@ -579,7 +579,7 @@ void PPCInstPrinter::printTLSCall(const MCInst *MI, unsigned OpNo, /// showRegistersWithPercentPrefix - Check if this register name should be /// printed with a percentage symbol as prefix. bool PPCInstPrinter::showRegistersWithPercentPrefix(const char *RegName) const { - if (!FullRegNamesWithPercent || TT.getOS() == Triple::AIX) + if (!FullRegNamesWithPercent || TT.getOS() == Triple::AIX) return false; switch (RegName[0]) { @@ -599,7 +599,7 @@ bool PPCInstPrinter::showRegistersWithPercentPrefix(const char *RegName) const { const char *PPCInstPrinter::getVerboseConditionRegName(unsigned RegNum, unsigned RegEncoding) const { - if (!FullRegNames) + if (!FullRegNames) return nullptr; if (RegNum < PPC::CR0EQ || RegNum > PPC::CR7UN) return nullptr; @@ -621,11 +621,11 @@ const char *PPCInstPrinter::getVerboseConditionRegName(unsigned RegNum, bool PPCInstPrinter::showRegistersWithPrefix() const { if (TT.getOS() == Triple::AIX) return false; - return FullRegNamesWithPercent || FullRegNames; + return FullRegNamesWithPercent || FullRegNames; } void PPCInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, raw_ostream &O) { + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &Op = MI->getOperand(OpNo); if (Op.isReg()) { unsigned Reg = Op.getReg(); diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h b/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h index 7b6c4c9fa3..5e9b014944 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h +++ b/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h @@ -36,73 +36,73 @@ public: const MCSubtargetInfo &STI, raw_ostream &O) override; // Autogenerated by tblgen. - std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override; - void printInstruction(const MCInst *MI, uint64_t Address, - const MCSubtargetInfo &STI, raw_ostream &O); + std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override; + void printInstruction(const MCInst *MI, uint64_t Address, + const MCSubtargetInfo &STI, raw_ostream &O); static const char *getRegisterName(unsigned RegNo); - bool printAliasInstr(const MCInst *MI, uint64_t Address, - const MCSubtargetInfo &STI, raw_ostream &OS); + bool printAliasInstr(const MCInst *MI, uint64_t Address, + const MCSubtargetInfo &STI, raw_ostream &OS); void printCustomAliasOperand(const MCInst *MI, uint64_t Address, unsigned OpIdx, unsigned PrintMethodIdx, - const MCSubtargetInfo &STI, raw_ostream &OS); + const MCSubtargetInfo &STI, raw_ostream &OS); - void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, - raw_ostream &O); + void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, + raw_ostream &O); void printPredicateOperand(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, raw_ostream &O, - const char *Modifier = nullptr); - void printATBitsAsHint(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, raw_ostream &O); + const MCSubtargetInfo &STI, raw_ostream &O, + const char *Modifier = nullptr); + void printATBitsAsHint(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); - void printU1ImmOperand(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, raw_ostream &O); - void printU2ImmOperand(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, raw_ostream &O); - void printU3ImmOperand(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, raw_ostream &O); - void printU4ImmOperand(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, raw_ostream &O); - void printS5ImmOperand(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, raw_ostream &O); - void printU5ImmOperand(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, raw_ostream &O); - void printU6ImmOperand(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, raw_ostream &O); - void printU7ImmOperand(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, raw_ostream &O); - void printU8ImmOperand(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, raw_ostream &O); - void printU10ImmOperand(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, raw_ostream &O); - void printU12ImmOperand(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, raw_ostream &O); - void printS16ImmOperand(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, raw_ostream &O); - void printS34ImmOperand(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, raw_ostream &O); - void printU16ImmOperand(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, raw_ostream &O); - void printImmZeroOperand(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, raw_ostream &O); + void printU1ImmOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); + void printU2ImmOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); + void printU3ImmOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); + void printU4ImmOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); + void printS5ImmOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); + void printU5ImmOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); + void printU6ImmOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); + void printU7ImmOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); + void printU8ImmOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); + void printU10ImmOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); + void printU12ImmOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); + void printS16ImmOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); + void printS34ImmOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); + void printU16ImmOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); + void printImmZeroOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); void printBranchOperand(const MCInst *MI, uint64_t Address, unsigned OpNo, - const MCSubtargetInfo &STI, raw_ostream &O); - void printAbsBranchOperand(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, raw_ostream &O); - void printTLSCall(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, - raw_ostream &O); + const MCSubtargetInfo &STI, raw_ostream &O); + void printAbsBranchOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); + void printTLSCall(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, + raw_ostream &O); - void printcrbitm(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, - raw_ostream &O); + void printcrbitm(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, + raw_ostream &O); - void printMemRegImm(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, raw_ostream &O); - void printMemRegImm34PCRel(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, raw_ostream &O); - void printMemRegImm34(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, raw_ostream &O); - void printMemRegReg(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, raw_ostream &O); + void printMemRegImm(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); + void printMemRegImm34PCRel(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); + void printMemRegImm34(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); + void printMemRegReg(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); }; } // end namespace llvm diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp index dfadde0cd4..2b76af279c 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp +++ b/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp @@ -26,8 +26,8 @@ PPCELFMCAsmInfo::PPCELFMCAsmInfo(bool is64Bit, const Triple& T) { if (is64Bit) { CodePointerSize = CalleeSaveStackSlotSize = 8; } - IsLittleEndian = - T.getArch() == Triple::ppc64le || T.getArch() == Triple::ppcle; + IsLittleEndian = + T.getArch() == Triple::ppc64le || T.getArch() == Triple::ppcle; // ".comm align is in bytes but .align is pow-2." AlignmentIsInBytes = false; @@ -57,7 +57,7 @@ PPCELFMCAsmInfo::PPCELFMCAsmInfo(bool is64Bit, const Triple& T) { void PPCXCOFFMCAsmInfo::anchor() {} PPCXCOFFMCAsmInfo::PPCXCOFFMCAsmInfo(bool Is64Bit, const Triple &T) { - if (T.getArch() == Triple::ppc64le || T.getArch() == Triple::ppcle) + if (T.getArch() == Triple::ppc64le || T.getArch() == Triple::ppcle) report_fatal_error("XCOFF is not supported for little-endian targets"); CodePointerSize = CalleeSaveStackSlotSize = Is64Bit ? 8 : 4; diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp index f481106939..5f0769fd21 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ b/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -45,12 +45,12 @@ getDirectBrEncoding(const MCInst &MI, unsigned OpNo, const MCSubtargetInfo &STI) const { const MCOperand &MO = MI.getOperand(OpNo); - if (MO.isReg() || MO.isImm()) - return getMachineOpValue(MI, MO, Fixups, STI); + if (MO.isReg() || MO.isImm()) + return getMachineOpValue(MI, MO, Fixups, STI); // Add a fixup for the branch target. Fixups.push_back(MCFixup::create(0, MO.getExpr(), - ((MI.getOpcode() == PPC::BL8_NOTOC || - MI.getOpcode() == PPC::BL8_NOTOC_TLS) + ((MI.getOpcode() == PPC::BL8_NOTOC || + MI.getOpcode() == PPC::BL8_NOTOC_TLS) ? (MCFixupKind)PPC::fixup_ppc_br24_notoc : (MCFixupKind)PPC::fixup_ppc_br24))); return 0; @@ -94,16 +94,16 @@ getAbsCondBrEncoding(const MCInst &MI, unsigned OpNo, return 0; } -unsigned -PPCMCCodeEmitter::getVSRpEvenEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { - assert(MI.getOperand(OpNo).isReg() && "Operand should be a register"); - unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups, STI) - << 1; - return RegBits; -} - +unsigned +PPCMCCodeEmitter::getVSRpEvenEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { + assert(MI.getOperand(OpNo).isReg() && "Operand should be a register"); + unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups, STI) + << 1; + return RegBits; +} + unsigned PPCMCCodeEmitter::getImm16Encoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const { @@ -116,36 +116,36 @@ unsigned PPCMCCodeEmitter::getImm16Encoding(const MCInst &MI, unsigned OpNo, return 0; } -uint64_t PPCMCCodeEmitter::getImm34Encoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI, - MCFixupKind Fixup) const { +uint64_t PPCMCCodeEmitter::getImm34Encoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI, + MCFixupKind Fixup) const { const MCOperand &MO = MI.getOperand(OpNo); - assert(!MO.isReg() && "Not expecting a register for this operand."); - if (MO.isImm()) + assert(!MO.isReg() && "Not expecting a register for this operand."); + if (MO.isImm()) return getMachineOpValue(MI, MO, Fixups, STI); // Add a fixup for the immediate field. - Fixups.push_back(MCFixup::create(0, MO.getExpr(), Fixup)); + Fixups.push_back(MCFixup::create(0, MO.getExpr(), Fixup)); return 0; } -uint64_t -PPCMCCodeEmitter::getImm34EncodingNoPCRel(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { - return getImm34Encoding(MI, OpNo, Fixups, STI, - (MCFixupKind)PPC::fixup_ppc_imm34); -} - -uint64_t -PPCMCCodeEmitter::getImm34EncodingPCRel(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { - return getImm34Encoding(MI, OpNo, Fixups, STI, - (MCFixupKind)PPC::fixup_ppc_pcrel34); -} - +uint64_t +PPCMCCodeEmitter::getImm34EncodingNoPCRel(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { + return getImm34Encoding(MI, OpNo, Fixups, STI, + (MCFixupKind)PPC::fixup_ppc_imm34); +} + +uint64_t +PPCMCCodeEmitter::getImm34EncodingPCRel(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { + return getImm34Encoding(MI, OpNo, Fixups, STI, + (MCFixupKind)PPC::fixup_ppc_pcrel34); +} + unsigned PPCMCCodeEmitter::getMemRIEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const { @@ -241,13 +241,13 @@ PPCMCCodeEmitter::getMemRI34PCRelEncoding(const MCInst &MI, unsigned OpNo, (void)SRE; // Currently these are the only valid PCRelative Relocations. assert((SRE->getKind() == MCSymbolRefExpr::VK_PCREL || - SRE->getKind() == MCSymbolRefExpr::VK_PPC_GOT_PCREL || - SRE->getKind() == MCSymbolRefExpr::VK_PPC_GOT_TLSGD_PCREL || - SRE->getKind() == MCSymbolRefExpr::VK_PPC_GOT_TLSLD_PCREL || - SRE->getKind() == MCSymbolRefExpr::VK_PPC_GOT_TPREL_PCREL) && - "VariantKind must be VK_PCREL or VK_PPC_GOT_PCREL or " - "VK_PPC_GOT_TLSGD_PCREL or VK_PPC_GOT_TLSLD_PCREL or " - "VK_PPC_GOT_TPREL_PCREL."); + SRE->getKind() == MCSymbolRefExpr::VK_PPC_GOT_PCREL || + SRE->getKind() == MCSymbolRefExpr::VK_PPC_GOT_TLSGD_PCREL || + SRE->getKind() == MCSymbolRefExpr::VK_PPC_GOT_TLSLD_PCREL || + SRE->getKind() == MCSymbolRefExpr::VK_PPC_GOT_TPREL_PCREL) && + "VariantKind must be VK_PCREL or VK_PPC_GOT_PCREL or " + "VK_PPC_GOT_TLSGD_PCREL or VK_PPC_GOT_TLSLD_PCREL or " + "VK_PPC_GOT_TPREL_PCREL."); // Generate the fixup for the relocation. Fixups.push_back( MCFixup::create(0, Expr, @@ -359,12 +359,12 @@ unsigned PPCMCCodeEmitter::getTLSRegEncoding(const MCInst &MI, unsigned OpNo, // Add a fixup for the TLS register, which simply provides a relocation // hint to the linker that this statement is part of a relocation sequence. - // Return the thread-pointer register's encoding. Add a one byte displacement - // if using PC relative memops. - const MCExpr *Expr = MO.getExpr(); - const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(Expr); - bool IsPCRel = SRE->getKind() == MCSymbolRefExpr::VK_PPC_TLS_PCREL; - Fixups.push_back(MCFixup::create(IsPCRel ? 1 : 0, Expr, + // Return the thread-pointer register's encoding. Add a one byte displacement + // if using PC relative memops. + const MCExpr *Expr = MO.getExpr(); + const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(Expr); + bool IsPCRel = SRE->getKind() == MCSymbolRefExpr::VK_PPC_TLS_PCREL; + Fixups.push_back(MCFixup::create(IsPCRel ? 1 : 0, Expr, (MCFixupKind)PPC::fixup_ppc_nofixup)); const Triple &TT = STI.getTargetTriple(); bool isPPC64 = TT.isPPC64(); diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h b/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h index a3618f858b..347e163c95 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h +++ b/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h @@ -52,14 +52,14 @@ public: const MCSubtargetInfo &STI) const; uint64_t getImm34Encoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI, - MCFixupKind Fixup) const; - uint64_t getImm34EncodingNoPCRel(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - uint64_t getImm34EncodingPCRel(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; + const MCSubtargetInfo &STI, + MCFixupKind Fixup) const; + uint64_t getImm34EncodingNoPCRel(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; + uint64_t getImm34EncodingPCRel(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; unsigned getMemRIEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const; @@ -93,9 +93,9 @@ public: unsigned get_crbitm_encoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const; - unsigned getVSRpEvenEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; + unsigned getVSRpEvenEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; /// getMachineOpValue - Return binary encoding of operand. If the machine /// operand requires relocation, record the relocation and return zero. diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp index 19ceda2c62..bf9c6feb54 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp +++ b/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp @@ -20,7 +20,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/BinaryFormat/ELF.h" -#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" @@ -30,7 +30,7 @@ #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCSectionXCOFF.h" +#include "llvm/MC/MCSectionXCOFF.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" @@ -78,17 +78,17 @@ static MCRegisterInfo *createPPCMCRegisterInfo(const Triple &TT) { static MCSubtargetInfo *createPPCMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) { - // Set some default feature to MC layer. - std::string FullFS = std::string(FS); - - if (TT.isOSAIX()) { - if (!FullFS.empty()) - FullFS = "+aix," + FullFS; - else - FullFS = "+aix"; - } - - return createPPCMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FullFS); + // Set some default feature to MC layer. + std::string FullFS = std::string(FS); + + if (TT.isOSAIX()) { + if (!FullFS.empty()) + FullFS = "+aix," + FullFS; + else + FullFS = "+aix"; + } + + return createPPCMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FullFS); } static MCAsmInfo *createPPCMCAsmInfo(const MCRegisterInfo &MRI, @@ -133,10 +133,10 @@ public: void emitTCEntry(const MCSymbol &S) override { if (const MCSymbolXCOFF *XSym = dyn_cast<MCSymbolXCOFF>(&S)) { MCSymbolXCOFF *TCSym = - cast<MCSectionXCOFF>(Streamer.getCurrentSectionOnly()) - ->getQualNameSymbol(); - OS << "\t.tc " << TCSym->getName() << "," << XSym->getName() << '\n'; - + cast<MCSectionXCOFF>(Streamer.getCurrentSectionOnly()) + ->getQualNameSymbol(); + OS << "\t.tc " << TCSym->getName() << "," << XSym->getName() << '\n'; + if (TCSym->hasRename()) Streamer.emitXCOFFRenameDirective(TCSym, TCSym->getSymbolTableName()); return; @@ -346,8 +346,8 @@ static MCInstPrinter *createPPCMCInstPrinter(const Triple &T, } extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCTargetMC() { - for (Target *T : {&getThePPC32Target(), &getThePPC32LETarget(), - &getThePPC64Target(), &getThePPC64LETarget()}) { + for (Target *T : {&getThePPC32Target(), &getThePPC32LETarget(), + &getThePPC64Target(), &getThePPC64LETarget()}) { // Register the MC asm info. RegisterMCAsmInfoFn C(*T, createPPCMCAsmInfo); diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h index 75a119301a..03b3163417 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h +++ b/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h @@ -124,11 +124,11 @@ static inline bool isRunOfOnes64(uint64_t Val, unsigned &MB, unsigned &ME) { #define GET_SUBTARGETINFO_ENUM #include "PPCGenSubtargetInfo.inc" -#define PPC_REGS0_7(X) \ - { \ - X##0, X##1, X##2, X##3, X##4, X##5, X##6, X##7 \ - } - +#define PPC_REGS0_7(X) \ + { \ + X##0, X##1, X##2, X##3, X##4, X##5, X##6, X##7 \ + } + #define PPC_REGS0_31(X) \ { \ X##0, X##1, X##2, X##3, X##4, X##5, X##6, X##7, X##8, X##9, X##10, X##11, \ @@ -161,7 +161,7 @@ using llvm::MCPhysReg; static const MCPhysReg RRegs[32] = PPC_REGS0_31(PPC::R); \ static const MCPhysReg XRegs[32] = PPC_REGS0_31(PPC::X); \ static const MCPhysReg FRegs[32] = PPC_REGS0_31(PPC::F); \ - static const MCPhysReg VSRpRegs[32] = PPC_REGS0_31(PPC::VSRp); \ + static const MCPhysReg VSRpRegs[32] = PPC_REGS0_31(PPC::VSRp); \ static const MCPhysReg SPERegs[32] = PPC_REGS0_31(PPC::S); \ static const MCPhysReg VFRegs[32] = PPC_REGS0_31(PPC::VF); \ static const MCPhysReg VRegs[32] = PPC_REGS0_31(PPC::V); \ @@ -184,6 +184,6 @@ using llvm::MCPhysReg; PPC::CR5LT, PPC::CR5GT, PPC::CR5EQ, PPC::CR5UN, \ PPC::CR6LT, PPC::CR6GT, PPC::CR6EQ, PPC::CR6UN, \ PPC::CR7LT, PPC::CR7GT, PPC::CR7EQ, PPC::CR7UN}; \ - static const MCPhysReg CRRegs[8] = PPC_REGS0_7(PPC::CR); \ - static const MCPhysReg ACCRegs[8] = PPC_REGS0_7(PPC::ACC) + static const MCPhysReg CRRegs[8] = PPC_REGS0_7(PPC::CR); \ + static const MCPhysReg ACCRegs[8] = PPC_REGS0_7(PPC::ACC) #endif // LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCMCTARGETDESC_H diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp b/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp index d8451d4064..77b0331bb1 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp +++ b/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp @@ -58,19 +58,19 @@ std::pair<uint8_t, uint8_t> PPCXCOFFObjectWriter::getRelocTypeAndSignSize( switch ((unsigned)Fixup.getKind()) { default: report_fatal_error("Unimplemented fixup kind."); - case PPC::fixup_ppc_half16: { - const uint8_t SignAndSizeForHalf16 = EncodedSignednessIndicator | 15; + case PPC::fixup_ppc_half16: { + const uint8_t SignAndSizeForHalf16 = EncodedSignednessIndicator | 15; switch (Modifier) { default: report_fatal_error("Unsupported modifier for half16 fixup."); case MCSymbolRefExpr::VK_None: - return {XCOFF::RelocationType::R_TOC, SignAndSizeForHalf16}; - case MCSymbolRefExpr::VK_PPC_U: - return {XCOFF::RelocationType::R_TOCU, SignAndSizeForHalf16}; - case MCSymbolRefExpr::VK_PPC_L: - return {XCOFF::RelocationType::R_TOCL, SignAndSizeForHalf16}; + return {XCOFF::RelocationType::R_TOC, SignAndSizeForHalf16}; + case MCSymbolRefExpr::VK_PPC_U: + return {XCOFF::RelocationType::R_TOCU, SignAndSizeForHalf16}; + case MCSymbolRefExpr::VK_PPC_L: + return {XCOFF::RelocationType::R_TOCL, SignAndSizeForHalf16}; } - } break; + } break; case PPC::fixup_ppc_br24: // Branches are 4 byte aligned, so the 24 bits we encode in // the instruction actually represents a 26 bit offset. diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/ya.make b/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/ya.make index 1eee490274..903dc6ec7f 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/ya.make +++ b/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/ya.make @@ -15,18 +15,18 @@ LICENSE( LICENSE_TEXTS(.yandex_meta/licenses.list.txt) PEERDIR( - contrib/libs/llvm12 - contrib/libs/llvm12/include - contrib/libs/llvm12/lib/BinaryFormat - contrib/libs/llvm12/lib/MC - contrib/libs/llvm12/lib/Support - contrib/libs/llvm12/lib/Target/PowerPC/TargetInfo + contrib/libs/llvm12 + contrib/libs/llvm12/include + contrib/libs/llvm12/lib/BinaryFormat + contrib/libs/llvm12/lib/MC + contrib/libs/llvm12/lib/Support + contrib/libs/llvm12/lib/Target/PowerPC/TargetInfo ) ADDINCL( - ${ARCADIA_BUILD_ROOT}/contrib/libs/llvm12/lib/Target/PowerPC - contrib/libs/llvm12/lib/Target/PowerPC - contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc + ${ARCADIA_BUILD_ROOT}/contrib/libs/llvm12/lib/Target/PowerPC + contrib/libs/llvm12/lib/Target/PowerPC + contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc ) NO_COMPILER_WARNINGS() diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/P9InstrResources.td b/contrib/libs/llvm12/lib/Target/PowerPC/P9InstrResources.td index 69119b12e7..63531f72ad 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/P9InstrResources.td +++ b/contrib/libs/llvm12/lib/Target/PowerPC/P9InstrResources.td @@ -94,7 +94,7 @@ def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_3SLOTS_1C], (instregex "CMPRB(8)?$"), (instregex "TD(I)?$"), (instregex "TW(I)?$"), - (instregex "FCMP(O|U)(S|D)$"), + (instregex "FCMP(O|U)(S|D)$"), (instregex "XSTSTDC(S|D)P$"), FTDIV, FTSQRT, diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/PPC.h b/contrib/libs/llvm12/lib/Target/PowerPC/PPC.h index e4d72dd6c2..264582b244 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/PPC.h +++ b/contrib/libs/llvm12/lib/Target/PowerPC/PPC.h @@ -20,20 +20,20 @@ #undef PPC namespace llvm { -class PPCRegisterBankInfo; -class PPCSubtarget; -class PPCTargetMachine; -class PassRegistry; -class FunctionPass; -class InstructionSelector; -class MachineInstr; -class MachineOperand; -class AsmPrinter; -class MCInst; -class MCOperand; -class ModulePass; - -FunctionPass *createPPCCTRLoops(); +class PPCRegisterBankInfo; +class PPCSubtarget; +class PPCTargetMachine; +class PassRegistry; +class FunctionPass; +class InstructionSelector; +class MachineInstr; +class MachineOperand; +class AsmPrinter; +class MCInst; +class MCOperand; +class ModulePass; + +FunctionPass *createPPCCTRLoops(); #ifndef NDEBUG FunctionPass *createPPCCTRLoopsVerify(); #endif @@ -81,10 +81,10 @@ FunctionPass *createPPCCTRLoops(); ModulePass *createPPCLowerMASSVEntriesPass(); void initializePPCLowerMASSVEntriesPass(PassRegistry &); extern char &PPCLowerMASSVEntriesID; - - InstructionSelector * - createPPCInstructionSelector(const PPCTargetMachine &, const PPCSubtarget &, - const PPCRegisterBankInfo &); + + InstructionSelector * + createPPCInstructionSelector(const PPCTargetMachine &, const PPCSubtarget &, + const PPCRegisterBankInfo &); namespace PPCII { /// Target Operand Flag enum. @@ -111,37 +111,37 @@ FunctionPass *createPPCCTRLoops(); /// produce the relocation @got@pcrel. Fixup is VK_PPC_GOT_PCREL. MO_GOT_FLAG = 8, - // MO_PCREL_OPT_FLAG - If this bit is set the operand is part of a - // PC Relative linker optimization. - MO_PCREL_OPT_FLAG = 16, - - /// MO_TLSGD_FLAG - If this bit is set the symbol reference is relative to - /// TLS General Dynamic model. - MO_TLSGD_FLAG = 32, - - /// MO_TPREL_FLAG - If this bit is set the symbol reference is relative to - /// TLS Initial Exec model. - MO_TPREL_FLAG = 64, - - /// MO_TLSLD_FLAG - If this bit is set the symbol reference is relative to - /// TLS Local Dynamic model. - MO_TLSLD_FLAG = 128, - - /// MO_GOT_TLSGD_PCREL_FLAG - A combintaion of flags, if these bits are set - /// they should produce the relocation @got@tlsgd@pcrel. - /// Fix up is VK_PPC_GOT_TLSGD_PCREL - MO_GOT_TLSGD_PCREL_FLAG = MO_PCREL_FLAG | MO_GOT_FLAG | MO_TLSGD_FLAG, - - /// MO_GOT_TLSLD_PCREL_FLAG - A combintaion of flags, if these bits are set - /// they should produce the relocation @got@tlsld@pcrel. - /// Fix up is VK_PPC_GOT_TLSLD_PCREL - MO_GOT_TLSLD_PCREL_FLAG = MO_PCREL_FLAG | MO_GOT_FLAG | MO_TLSLD_FLAG, - - /// MO_GOT_TPREL_PCREL_FLAG - A combintaion of flags, if these bits are set - /// they should produce the relocation @got@tprel@pcrel. - /// Fix up is VK_PPC_GOT_TPREL_PCREL - MO_GOT_TPREL_PCREL_FLAG = MO_GOT_FLAG | MO_TPREL_FLAG | MO_PCREL_FLAG, - + // MO_PCREL_OPT_FLAG - If this bit is set the operand is part of a + // PC Relative linker optimization. + MO_PCREL_OPT_FLAG = 16, + + /// MO_TLSGD_FLAG - If this bit is set the symbol reference is relative to + /// TLS General Dynamic model. + MO_TLSGD_FLAG = 32, + + /// MO_TPREL_FLAG - If this bit is set the symbol reference is relative to + /// TLS Initial Exec model. + MO_TPREL_FLAG = 64, + + /// MO_TLSLD_FLAG - If this bit is set the symbol reference is relative to + /// TLS Local Dynamic model. + MO_TLSLD_FLAG = 128, + + /// MO_GOT_TLSGD_PCREL_FLAG - A combintaion of flags, if these bits are set + /// they should produce the relocation @got@tlsgd@pcrel. + /// Fix up is VK_PPC_GOT_TLSGD_PCREL + MO_GOT_TLSGD_PCREL_FLAG = MO_PCREL_FLAG | MO_GOT_FLAG | MO_TLSGD_FLAG, + + /// MO_GOT_TLSLD_PCREL_FLAG - A combintaion of flags, if these bits are set + /// they should produce the relocation @got@tlsld@pcrel. + /// Fix up is VK_PPC_GOT_TLSLD_PCREL + MO_GOT_TLSLD_PCREL_FLAG = MO_PCREL_FLAG | MO_GOT_FLAG | MO_TLSLD_FLAG, + + /// MO_GOT_TPREL_PCREL_FLAG - A combintaion of flags, if these bits are set + /// they should produce the relocation @got@tprel@pcrel. + /// Fix up is VK_PPC_GOT_TPREL_PCREL + MO_GOT_TPREL_PCREL_FLAG = MO_GOT_FLAG | MO_TPREL_FLAG | MO_PCREL_FLAG, + /// The next are not flags but distinct values. MO_ACCESS_MASK = 0xf00, diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/PPC.td b/contrib/libs/llvm12/lib/Target/PowerPC/PPC.td index 2e90fe46fb..1e6ded2315 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/PPC.td +++ b/contrib/libs/llvm12/lib/Target/PowerPC/PPC.td @@ -57,10 +57,10 @@ def DirectivePwrFuture def Feature64Bit : SubtargetFeature<"64bit","Has64BitSupport", "true", "Enable 64-bit instructions">; -def AIXOS: SubtargetFeature<"aix", "IsAIX", "true", "AIX OS">; -def FeatureModernAIXAs - : SubtargetFeature<"modern-aix-as", "HasModernAIXAs", "true", - "AIX system assembler is modern enough to support new mnes">; +def AIXOS: SubtargetFeature<"aix", "IsAIX", "true", "AIX OS">; +def FeatureModernAIXAs + : SubtargetFeature<"modern-aix-as", "HasModernAIXAs", "true", + "AIX system assembler is modern enough to support new mnes">; def FeatureHardFloat : SubtargetFeature<"hard-float", "HasHardFloat", "true", "Enable floating-point instructions">; def Feature64BitRegs : SubtargetFeature<"64bitregs","Use64BitRegs", "true", @@ -76,9 +76,9 @@ def FeatureAltivec : SubtargetFeature<"altivec","HasAltivec", "true", def FeatureSPE : SubtargetFeature<"spe","HasSPE", "true", "Enable SPE instructions", [FeatureHardFloat]>; -def FeatureEFPU2 : SubtargetFeature<"efpu2", "HasEFPU2", "true", - "Enable Embedded Floating-Point APU 2 instructions", - [FeatureSPE]>; +def FeatureEFPU2 : SubtargetFeature<"efpu2", "HasEFPU2", "true", + "Enable Embedded Floating-Point APU 2 instructions", + [FeatureSPE]>; def FeatureMFOCRF : SubtargetFeature<"mfocrf","HasMFOCRF", "true", "Enable the MFOCRF instruction">; def FeatureFSqrt : SubtargetFeature<"fsqrt","HasFSQRT", "true", @@ -181,9 +181,9 @@ def FeatureAddisLoadFusion : SubtargetFeature<"fuse-addis-load", "HasAddisLoadFusion", "true", "Power8 Addis-Load fusion", [FeatureFusion]>; -def FeatureStoreFusion : SubtargetFeature<"fuse-store", "HasStoreFusion", "true", - "Target supports store clustering", - [FeatureFusion]>; +def FeatureStoreFusion : SubtargetFeature<"fuse-store", "HasStoreFusion", "true", + "Target supports store clustering", + [FeatureFusion]>; def FeatureUnalignedFloats : SubtargetFeature<"allow-unaligned-fp-access", "AllowsUnalignedFPAccess", "true", "CPU does not trap on unaligned FP access">; @@ -200,7 +200,7 @@ def FeatureFloat128 : def FeaturePOPCNTD : SubtargetFeature<"popcntd","HasPOPCNTD", "POPCNTD_Fast", "Enable the popcnt[dw] instructions">; -// Note that for the a2 processor models we should not use popcnt[dw] by +// Note that for the a2 processor models we should not use popcnt[dw] by // default. These processors do support the instructions, but they're // microcoded, and the software emulation is about twice as fast. def FeatureSlowPOPCNTD : SubtargetFeature<"slow-popcntd","HasPOPCNTD", @@ -243,15 +243,15 @@ def FeaturePrefixInstrs : SubtargetFeature<"prefix-instrs", "HasPrefixInstrs", def FeaturePCRelativeMemops : SubtargetFeature<"pcrelative-memops", "HasPCRelativeMemops", "true", "Enable PC relative Memory Ops", - [FeatureISA3_0, FeaturePrefixInstrs]>; -def FeaturePairedVectorMemops: - SubtargetFeature<"paired-vector-memops", "PairedVectorMemops", "true", - "32Byte load and store instructions", + [FeatureISA3_0, FeaturePrefixInstrs]>; +def FeaturePairedVectorMemops: + SubtargetFeature<"paired-vector-memops", "PairedVectorMemops", "true", + "32Byte load and store instructions", [FeatureISA3_0]>; -def FeatureMMA : SubtargetFeature<"mma", "HasMMA", "true", - "Enable MMA instructions", - [FeatureP8Vector, FeatureP9Altivec, - FeaturePairedVectorMemops]>; +def FeatureMMA : SubtargetFeature<"mma", "HasMMA", "true", + "Enable MMA instructions", + [FeatureP8Vector, FeatureP9Altivec, + FeaturePairedVectorMemops]>; def FeaturePredictableSelectIsExpensive : SubtargetFeature<"predictable-select-expensive", @@ -335,8 +335,8 @@ def ProcessorFeatures { [DirectivePwr9, FeatureP9Altivec, FeatureP9Vector, - FeaturePPCPreRASched, - FeaturePPCPostRASched, + FeaturePPCPreRASched, + FeaturePPCPostRASched, FeatureISA3_0, FeaturePredictableSelectIsExpensive ]; @@ -346,7 +346,7 @@ def ProcessorFeatures { // dispatch for vector operations than scalar ones. For the time being, // this list also includes scheduling-related features since we do not have // enough info to create custom scheduling strategies for future CPUs. - list<SubtargetFeature> P9SpecificFeatures = [FeatureVectorsUseTwoUnits]; + list<SubtargetFeature> P9SpecificFeatures = [FeatureVectorsUseTwoUnits]; list<SubtargetFeature> P9InheritableFeatures = !listconcat(P8InheritableFeatures, P9AdditionalFeatures); list<SubtargetFeature> P9Features = @@ -355,12 +355,12 @@ def ProcessorFeatures { // Power10 // For P10 CPU we assume that all of the existing features from Power9 // still exist with the exception of those we know are Power9 specific. - list<SubtargetFeature> FusionFeatures = [FeatureStoreFusion]; + list<SubtargetFeature> FusionFeatures = [FeatureStoreFusion]; list<SubtargetFeature> P10AdditionalFeatures = - !listconcat(FusionFeatures, [ - DirectivePwr10, FeatureISA3_1, FeaturePrefixInstrs, - FeaturePCRelativeMemops, FeatureP10Vector, FeatureMMA, - FeaturePairedVectorMemops]); + !listconcat(FusionFeatures, [ + DirectivePwr10, FeatureISA3_1, FeaturePrefixInstrs, + FeaturePCRelativeMemops, FeatureP10Vector, FeatureMMA, + FeaturePairedVectorMemops]); list<SubtargetFeature> P10SpecificFeatures = []; list<SubtargetFeature> P10InheritableFeatures = !listconcat(P9InheritableFeatures, P10AdditionalFeatures); @@ -445,7 +445,7 @@ def getAltVSXFMAOpcode : InstrMapping { include "PPCRegisterInfo.td" include "PPCSchedule.td" -include "GISel/PPCRegisterBanks.td" +include "GISel/PPCRegisterBanks.td" //===----------------------------------------------------------------------===// // PowerPC processors supported. @@ -571,7 +571,7 @@ def : ProcessorModel<"pwr7", P7Model, ProcessorFeatures.P7Features>; def : ProcessorModel<"pwr8", P8Model, ProcessorFeatures.P8Features>; def : ProcessorModel<"pwr9", P9Model, ProcessorFeatures.P9Features>; // No scheduler model yet. -def : ProcessorModel<"pwr10", P9Model, ProcessorFeatures.P10Features>; +def : ProcessorModel<"pwr10", P9Model, ProcessorFeatures.P10Features>; // No scheduler model for future CPU. def : ProcessorModel<"future", NoSchedModel, ProcessorFeatures.FutureFeatures>; @@ -602,13 +602,13 @@ def PPCInstrInfo : InstrInfo { let noNamedPositionallyEncodedOperands = 1; } -def PPCAsmWriter : AsmWriter { - string AsmWriterClassName = "InstPrinter"; - int PassSubtarget = 1; - int Variant = 0; - bit isMCAsmWriter = 1; -} - +def PPCAsmWriter : AsmWriter { + string AsmWriterClassName = "InstPrinter"; + int PassSubtarget = 1; + int Variant = 0; + bit isMCAsmWriter = 1; +} + def PPCAsmParser : AsmParser { let ShouldEmitMatchRegisterName = 0; } @@ -627,7 +627,7 @@ def PPC : Target { // Information about the instructions. let InstructionSet = PPCInstrInfo; - let AssemblyWriters = [PPCAsmWriter]; + let AssemblyWriters = [PPCAsmWriter]; let AssemblyParsers = [PPCAsmParser]; let AssemblyParserVariants = [PPCAsmParserVariant]; let AllowRegisterRenaming = 1; diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/PPCAsmPrinter.cpp b/contrib/libs/llvm12/lib/Target/PowerPC/PPCAsmPrinter.cpp index f6e59a2718..6257709731 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/contrib/libs/llvm12/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -27,7 +27,7 @@ #include "PPCTargetStreamer.h" #include "TargetInfo/PowerPCTargetInfo.h" #include "llvm/ADT/MapVector.h" -#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" @@ -47,7 +47,7 @@ #include "llvm/IR/Module.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCDirectives.h" +#include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstBuilder.h" @@ -62,11 +62,11 @@ #include "llvm/Support/CodeGen.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/Process.h" +#include "llvm/Support/Process.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Transforms/Utils/ModuleUtils.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" #include <algorithm> #include <cassert> #include <cstdint> @@ -74,7 +74,7 @@ #include <new> using namespace llvm; -using namespace llvm::XCOFF; +using namespace llvm::XCOFF; #define DEBUG_TYPE "asmprinter" @@ -150,22 +150,22 @@ public: class PPCAIXAsmPrinter : public PPCAsmPrinter { private: - /// Symbols lowered from ExternalSymbolSDNodes, we will need to emit extern - /// linkage for them in AIX. - SmallPtrSet<MCSymbol *, 8> ExtSymSDNodeSymbols; - - /// A format indicator and unique trailing identifier to form part of the - /// sinit/sterm function names. - std::string FormatIndicatorAndUniqueModId; - + /// Symbols lowered from ExternalSymbolSDNodes, we will need to emit extern + /// linkage for them in AIX. + SmallPtrSet<MCSymbol *, 8> ExtSymSDNodeSymbols; + + /// A format indicator and unique trailing identifier to form part of the + /// sinit/sterm function names. + std::string FormatIndicatorAndUniqueModId; + static void ValidateGV(const GlobalVariable *GV); - // Record a list of GlobalAlias associated with a GlobalObject. - // This is used for AIX's extra-label-at-definition aliasing strategy. - DenseMap<const GlobalObject *, SmallVector<const GlobalAlias *, 1>> - GOAliasMap; + // Record a list of GlobalAlias associated with a GlobalObject. + // This is used for AIX's extra-label-at-definition aliasing strategy. + DenseMap<const GlobalObject *, SmallVector<const GlobalAlias *, 1>> + GOAliasMap; + + void emitTracebackTable(); - void emitTracebackTable(); - public: PPCAIXAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer) : PPCAsmPrinter(TM, std::move(Streamer)) { @@ -178,28 +178,28 @@ public: bool doInitialization(Module &M) override; - void emitXXStructorList(const DataLayout &DL, const Constant *List, - bool IsCtor) override; - + void emitXXStructorList(const DataLayout &DL, const Constant *List, + bool IsCtor) override; + void SetupMachineFunction(MachineFunction &MF) override; void emitGlobalVariable(const GlobalVariable *GV) override; void emitFunctionDescriptor() override; - void emitFunctionEntryLabel() override; - - void emitFunctionBodyEnd() override; - + void emitFunctionEntryLabel() override; + + void emitFunctionBodyEnd() override; + void emitEndOfAsmFile(Module &) override; void emitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const override; - - void emitInstruction(const MachineInstr *MI) override; - - bool doFinalization(Module &M) override; - - void emitTTypeReference(const GlobalValue *GV, unsigned Encoding) override; + + void emitInstruction(const MachineInstr *MI) override; + + bool doFinalization(Module &M) override; + + void emitTTypeReference(const GlobalValue *GV, unsigned Encoding) override; }; } // end anonymous namespace @@ -321,12 +321,12 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, O << "0, "; printOperand(MI, OpNo, O); return false; - case 'I': - // Write 'i' if an integer constant, otherwise nothing. Used to print - // addi vs add, etc. - if (MI->getOperand(OpNo).isImm()) - O << "i"; - return false; + case 'I': + // Write 'i' if an integer constant, otherwise nothing. Used to print + // addi vs add, etc. + if (MI->getOperand(OpNo).isImm()) + O << "i"; + return false; case 'U': // Print 'u' for update form. case 'X': // Print 'x' for indexed form. // FIXME: Currently for PowerPC memory operands are always loaded @@ -499,14 +499,14 @@ void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI, StringRef Name = "__tls_get_addr"; MCSymbol *TlsGetAddr = OutContext.getOrCreateSymbol(Name); MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None; - unsigned Opcode = PPC::BL8_NOP_TLS; - - assert(MI->getNumOperands() >= 3 && "Expecting at least 3 operands from MI"); - if (MI->getOperand(2).getTargetFlags() == PPCII::MO_GOT_TLSGD_PCREL_FLAG || - MI->getOperand(2).getTargetFlags() == PPCII::MO_GOT_TLSLD_PCREL_FLAG) { - Kind = MCSymbolRefExpr::VK_PPC_NOTOC; - Opcode = PPC::BL8_NOTOC_TLS; - } + unsigned Opcode = PPC::BL8_NOP_TLS; + + assert(MI->getNumOperands() >= 3 && "Expecting at least 3 operands from MI"); + if (MI->getOperand(2).getTargetFlags() == PPCII::MO_GOT_TLSGD_PCREL_FLAG || + MI->getOperand(2).getTargetFlags() == PPCII::MO_GOT_TLSLD_PCREL_FLAG) { + Kind = MCSymbolRefExpr::VK_PPC_NOTOC; + Opcode = PPC::BL8_NOTOC_TLS; + } const Module *M = MF->getFunction().getParent(); assert(MI->getOperand(0).isReg() && @@ -534,10 +534,10 @@ void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI, MCSymbol *MOSymbol = getSymbol(GValue); const MCExpr *SymVar = MCSymbolRefExpr::create(MOSymbol, VK, OutContext); EmitToStreamer(*OutStreamer, - MCInstBuilder(Subtarget->isPPC64() ? Opcode - : (unsigned)PPC::BL_TLS) - .addExpr(TlsRef) - .addExpr(SymVar)); + MCInstBuilder(Subtarget->isPPC64() ? Opcode + : (unsigned)PPC::BL_TLS) + .addExpr(TlsRef) + .addExpr(SymVar)); } /// Map a machine operand for a TOC pseudo-machine instruction to its @@ -591,38 +591,38 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) { } } #endif - - auto getTOCRelocAdjustedExprForXCOFF = [this](const MCExpr *Expr, - ptrdiff_t OriginalOffset) { - // Apply an offset to the TOC-based expression such that the adjusted - // notional offset from the TOC base (to be encoded into the instruction's D - // or DS field) is the signed 16-bit truncation of the original notional - // offset from the TOC base. - // This is consistent with the treatment used both by XL C/C++ and - // by AIX ld -r. - ptrdiff_t Adjustment = - OriginalOffset - llvm::SignExtend32<16>(OriginalOffset); - return MCBinaryExpr::createAdd( - Expr, MCConstantExpr::create(-Adjustment, OutContext), OutContext); - }; - - auto getTOCEntryLoadingExprForXCOFF = - [IsPPC64, getTOCRelocAdjustedExprForXCOFF, - this](const MCSymbol *MOSymbol, const MCExpr *Expr) -> const MCExpr * { - const unsigned EntryByteSize = IsPPC64 ? 8 : 4; - const auto TOCEntryIter = TOC.find(MOSymbol); - assert(TOCEntryIter != TOC.end() && - "Could not find the TOC entry for this symbol."); - const ptrdiff_t EntryDistanceFromTOCBase = - (TOCEntryIter - TOC.begin()) * EntryByteSize; - constexpr int16_t PositiveTOCRange = INT16_MAX; - - if (EntryDistanceFromTOCBase > PositiveTOCRange) - return getTOCRelocAdjustedExprForXCOFF(Expr, EntryDistanceFromTOCBase); - - return Expr; - }; - + + auto getTOCRelocAdjustedExprForXCOFF = [this](const MCExpr *Expr, + ptrdiff_t OriginalOffset) { + // Apply an offset to the TOC-based expression such that the adjusted + // notional offset from the TOC base (to be encoded into the instruction's D + // or DS field) is the signed 16-bit truncation of the original notional + // offset from the TOC base. + // This is consistent with the treatment used both by XL C/C++ and + // by AIX ld -r. + ptrdiff_t Adjustment = + OriginalOffset - llvm::SignExtend32<16>(OriginalOffset); + return MCBinaryExpr::createAdd( + Expr, MCConstantExpr::create(-Adjustment, OutContext), OutContext); + }; + + auto getTOCEntryLoadingExprForXCOFF = + [IsPPC64, getTOCRelocAdjustedExprForXCOFF, + this](const MCSymbol *MOSymbol, const MCExpr *Expr) -> const MCExpr * { + const unsigned EntryByteSize = IsPPC64 ? 8 : 4; + const auto TOCEntryIter = TOC.find(MOSymbol); + assert(TOCEntryIter != TOC.end() && + "Could not find the TOC entry for this symbol."); + const ptrdiff_t EntryDistanceFromTOCBase = + (TOCEntryIter - TOC.begin()) * EntryByteSize; + constexpr int16_t PositiveTOCRange = INT16_MAX; + + if (EntryDistanceFromTOCBase > PositiveTOCRange) + return getTOCRelocAdjustedExprForXCOFF(Expr, EntryDistanceFromTOCBase); + + return Expr; + }; + // Lower multi-instruction pseudo operations. switch (MI->getOpcode()) { default: break; @@ -769,7 +769,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) { assert( TM.getCodeModel() == CodeModel::Small && "This pseudo should only be selected for 32-bit small code model."); - Exp = getTOCEntryLoadingExprForXCOFF(MOSymbol, Exp); + Exp = getTOCEntryLoadingExprForXCOFF(MOSymbol, Exp); TmpInst.getOperand(1) = MCOperand::createExpr(Exp); EmitToStreamer(*OutStreamer, TmpInst); return; @@ -798,20 +798,20 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) { assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) && "Invalid operand!"); - // Map the operand to its corresponding MCSymbol. - const MCSymbol *const MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this); - + // Map the operand to its corresponding MCSymbol. + const MCSymbol *const MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this); + // Map the machine operand to its corresponding MCSymbol, then map the // global address operand to be a reference to the TOC entry we will // synthesize later. - MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol); + MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol); const MCSymbolRefExpr::VariantKind VK = IsAIX ? MCSymbolRefExpr::VK_None : MCSymbolRefExpr::VK_PPC_TOC; const MCExpr *Exp = MCSymbolRefExpr::create(TOCEntry, VK, OutContext); - TmpInst.getOperand(1) = MCOperand::createExpr( - IsAIX ? getTOCEntryLoadingExprForXCOFF(MOSymbol, Exp) : Exp); + TmpInst.getOperand(1) = MCOperand::createExpr( + IsAIX ? getTOCEntryLoadingExprForXCOFF(MOSymbol, Exp) : Exp); EmitToStreamer(*OutStreamer, TmpInst); return; } @@ -1087,7 +1087,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) { case PPC::GETtlsADDR: // Transform: %x3 = GETtlsADDR %x3, @sym // Into: BL8_NOP_TLS __tls_get_addr(sym at tlsgd) - case PPC::GETtlsADDRPCREL: + case PPC::GETtlsADDRPCREL: case PPC::GETtlsADDR32: { // Transform: %r3 = GETtlsADDR32 %r3, @sym // Into: BL_TLS __tls_get_addr(sym at tlsgd)@PLT @@ -1133,7 +1133,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) { case PPC::GETtlsldADDR: // Transform: %x3 = GETtlsldADDR %x3, @sym // Into: BL8_NOP_TLS __tls_get_addr(sym at tlsld) - case PPC::GETtlsldADDRPCREL: + case PPC::GETtlsldADDRPCREL: case PPC::GETtlsldADDR32: { // Transform: %r3 = GETtlsldADDR32 %r3, @sym // Into: BL_TLS __tls_get_addr(sym at tlsld)@PLT @@ -1160,21 +1160,21 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) { .addExpr(SymDtprel)); return; } - case PPC::PADDIdtprel: { - // Transform: %rd = PADDIdtprel %rs, @sym - // Into: %rd = PADDI8 %rs, sym@dtprel - const MachineOperand &MO = MI->getOperand(2); - const GlobalValue *GValue = MO.getGlobal(); - MCSymbol *MOSymbol = getSymbol(GValue); - const MCExpr *SymDtprel = MCSymbolRefExpr::create( - MOSymbol, MCSymbolRefExpr::VK_DTPREL, OutContext); - EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::PADDI8) - .addReg(MI->getOperand(0).getReg()) - .addReg(MI->getOperand(1).getReg()) - .addExpr(SymDtprel)); - return; - } - + case PPC::PADDIdtprel: { + // Transform: %rd = PADDIdtprel %rs, @sym + // Into: %rd = PADDI8 %rs, sym@dtprel + const MachineOperand &MO = MI->getOperand(2); + const GlobalValue *GValue = MO.getGlobal(); + MCSymbol *MOSymbol = getSymbol(GValue); + const MCExpr *SymDtprel = MCSymbolRefExpr::create( + MOSymbol, MCSymbolRefExpr::VK_DTPREL, OutContext); + EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::PADDI8) + .addReg(MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()) + .addExpr(SymDtprel)); + return; + } + case PPC::ADDIdtprelL: // Transform: %xd = ADDIdtprelL %xs, @sym // Into: %xd = ADDI8 %xs, sym@dtprel@l @@ -1711,19 +1711,19 @@ void PPCAIXAsmPrinter::emitLinkage(const GlobalValue *GV, assert(LinkageAttr != MCSA_Invalid && "LinkageAttr should not MCSA_Invalid."); MCSymbolAttr VisibilityAttr = MCSA_Invalid; - if (!TM.getIgnoreXCOFFVisibility()) { - switch (GV->getVisibility()) { + if (!TM.getIgnoreXCOFFVisibility()) { + switch (GV->getVisibility()) { - // TODO: "exported" and "internal" Visibility needs to go here. - case GlobalValue::DefaultVisibility: - break; - case GlobalValue::HiddenVisibility: - VisibilityAttr = MAI->getHiddenVisibilityAttr(); - break; - case GlobalValue::ProtectedVisibility: - VisibilityAttr = MAI->getProtectedVisibilityAttr(); - break; - } + // TODO: "exported" and "internal" Visibility needs to go here. + case GlobalValue::DefaultVisibility: + break; + case GlobalValue::HiddenVisibility: + VisibilityAttr = MAI->getHiddenVisibilityAttr(); + break; + case GlobalValue::ProtectedVisibility: + VisibilityAttr = MAI->getProtectedVisibilityAttr(); + break; + } } OutStreamer->emitXCOFFSymbolLinkageWithVisibility(GVSym, LinkageAttr, @@ -1742,284 +1742,284 @@ void PPCAIXAsmPrinter::SetupMachineFunction(MachineFunction &MF) { return AsmPrinter::SetupMachineFunction(MF); } -void PPCAIXAsmPrinter::emitFunctionBodyEnd() { - - if (!TM.getXCOFFTracebackTable()) - return; - - emitTracebackTable(); -} - -void PPCAIXAsmPrinter::emitTracebackTable() { - - // Create a symbol for the end of function. - MCSymbol *FuncEnd = createTempSymbol(MF->getName()); - OutStreamer->emitLabel(FuncEnd); - - OutStreamer->AddComment("Traceback table begin"); - // Begin with a fullword of zero. - OutStreamer->emitIntValueInHexWithPadding(0, 4 /*size*/); - - SmallString<128> CommentString; - raw_svector_ostream CommentOS(CommentString); - - auto EmitComment = [&]() { - OutStreamer->AddComment(CommentOS.str()); - CommentString.clear(); - }; - - auto EmitCommentAndValue = [&](uint64_t Value, int Size) { - EmitComment(); - OutStreamer->emitIntValueInHexWithPadding(Value, Size); - }; - - unsigned int Version = 0; - CommentOS << "Version = " << Version; - EmitCommentAndValue(Version, 1); - - // There is a lack of information in the IR to assist with determining the - // source language. AIX exception handling mechanism would only search for - // personality routine and LSDA area when such language supports exception - // handling. So to be conservatively correct and allow runtime to do its job, - // we need to set it to C++ for now. - TracebackTable::LanguageID LanguageIdentifier = - TracebackTable::CPlusPlus; // C++ - - CommentOS << "Language = " - << getNameForTracebackTableLanguageId(LanguageIdentifier); - EmitCommentAndValue(LanguageIdentifier, 1); - - // This is only populated for the third and fourth bytes. - uint32_t FirstHalfOfMandatoryField = 0; - - // Emit the 3rd byte of the mandatory field. - - // We always set traceback offset bit to true. - FirstHalfOfMandatoryField |= TracebackTable::HasTraceBackTableOffsetMask; - - const PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>(); - const MachineRegisterInfo &MRI = MF->getRegInfo(); - - // Check the function uses floating-point processor instructions or not - for (unsigned Reg = PPC::F0; Reg <= PPC::F31; ++Reg) { - if (MRI.isPhysRegUsed(Reg)) { - FirstHalfOfMandatoryField |= TracebackTable::IsFloatingPointPresentMask; - break; - } - } - -#define GENBOOLCOMMENT(Prefix, V, Field) \ - CommentOS << (Prefix) << ((V) & (TracebackTable::Field##Mask) ? "+" : "-") \ - << #Field - -#define GENVALUECOMMENT(PrefixAndName, V, Field) \ - CommentOS << (PrefixAndName) << " = " \ - << static_cast<unsigned>(((V) & (TracebackTable::Field##Mask)) >> \ - (TracebackTable::Field##Shift)) - - GENBOOLCOMMENT("", FirstHalfOfMandatoryField, IsGlobaLinkage); - GENBOOLCOMMENT(", ", FirstHalfOfMandatoryField, IsOutOfLineEpilogOrPrologue); - EmitComment(); - - GENBOOLCOMMENT("", FirstHalfOfMandatoryField, HasTraceBackTableOffset); - GENBOOLCOMMENT(", ", FirstHalfOfMandatoryField, IsInternalProcedure); - EmitComment(); - - GENBOOLCOMMENT("", FirstHalfOfMandatoryField, HasControlledStorage); - GENBOOLCOMMENT(", ", FirstHalfOfMandatoryField, IsTOCless); - EmitComment(); - - GENBOOLCOMMENT("", FirstHalfOfMandatoryField, IsFloatingPointPresent); - EmitComment(); - GENBOOLCOMMENT("", FirstHalfOfMandatoryField, - IsFloatingPointOperationLogOrAbortEnabled); - EmitComment(); - - OutStreamer->emitIntValueInHexWithPadding( - (FirstHalfOfMandatoryField & 0x0000ff00) >> 8, 1); - - // Set the 4th byte of the mandatory field. - FirstHalfOfMandatoryField |= TracebackTable::IsFunctionNamePresentMask; - - static_assert(XCOFF::AllocRegNo == 31, "Unexpected register usage!"); - if (MRI.isPhysRegUsed(Subtarget->isPPC64() ? PPC::X31 : PPC::R31)) - FirstHalfOfMandatoryField |= TracebackTable::IsAllocaUsedMask; - - const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs(); - if (!MustSaveCRs.empty()) - FirstHalfOfMandatoryField |= TracebackTable::IsCRSavedMask; - - if (FI->mustSaveLR()) - FirstHalfOfMandatoryField |= TracebackTable::IsLRSavedMask; - - GENBOOLCOMMENT("", FirstHalfOfMandatoryField, IsInterruptHandler); - GENBOOLCOMMENT(", ", FirstHalfOfMandatoryField, IsFunctionNamePresent); - GENBOOLCOMMENT(", ", FirstHalfOfMandatoryField, IsAllocaUsed); - EmitComment(); - GENVALUECOMMENT("OnConditionDirective", FirstHalfOfMandatoryField, - OnConditionDirective); - GENBOOLCOMMENT(", ", FirstHalfOfMandatoryField, IsCRSaved); - GENBOOLCOMMENT(", ", FirstHalfOfMandatoryField, IsLRSaved); - EmitComment(); - OutStreamer->emitIntValueInHexWithPadding((FirstHalfOfMandatoryField & 0xff), - 1); - - // Set the 5th byte of mandatory field. - uint32_t SecondHalfOfMandatoryField = 0; - - // Always store back chain. - SecondHalfOfMandatoryField |= TracebackTable::IsBackChainStoredMask; - - uint32_t FPRSaved = 0; - for (unsigned Reg = PPC::F14; Reg <= PPC::F31; ++Reg) { - if (MRI.isPhysRegModified(Reg)) { - FPRSaved = PPC::F31 - Reg + 1; - break; - } - } - SecondHalfOfMandatoryField |= (FPRSaved << TracebackTable::FPRSavedShift) & - TracebackTable::FPRSavedMask; - GENBOOLCOMMENT("", SecondHalfOfMandatoryField, IsBackChainStored); - GENBOOLCOMMENT(", ", SecondHalfOfMandatoryField, IsFixup); - GENVALUECOMMENT(", NumOfFPRsSaved", SecondHalfOfMandatoryField, FPRSaved); - EmitComment(); - OutStreamer->emitIntValueInHexWithPadding( - (SecondHalfOfMandatoryField & 0xff000000) >> 24, 1); - - // Set the 6th byte of mandatory field. - bool ShouldEmitEHBlock = TargetLoweringObjectFileXCOFF::ShouldEmitEHBlock(MF); - if (ShouldEmitEHBlock) - SecondHalfOfMandatoryField |= TracebackTable::HasExtensionTableMask; - - uint32_t GPRSaved = 0; - - // X13 is reserved under 64-bit environment. - unsigned GPRBegin = Subtarget->isPPC64() ? PPC::X14 : PPC::R13; - unsigned GPREnd = Subtarget->isPPC64() ? PPC::X31 : PPC::R31; - - for (unsigned Reg = GPRBegin; Reg <= GPREnd; ++Reg) { - if (MRI.isPhysRegModified(Reg)) { - GPRSaved = GPREnd - Reg + 1; - break; - } - } - - SecondHalfOfMandatoryField |= (GPRSaved << TracebackTable::GPRSavedShift) & - TracebackTable::GPRSavedMask; - - GENBOOLCOMMENT("", SecondHalfOfMandatoryField, HasVectorInfo); - GENBOOLCOMMENT(", ", SecondHalfOfMandatoryField, HasExtensionTable); - GENVALUECOMMENT(", NumOfGPRsSaved", SecondHalfOfMandatoryField, GPRSaved); - EmitComment(); - OutStreamer->emitIntValueInHexWithPadding( - (SecondHalfOfMandatoryField & 0x00ff0000) >> 16, 1); - - // Set the 7th byte of mandatory field. - uint32_t NumberOfFixedPara = FI->getFixedParamNum(); - SecondHalfOfMandatoryField |= - (NumberOfFixedPara << TracebackTable::NumberOfFixedParmsShift) & - TracebackTable::NumberOfFixedParmsMask; - GENVALUECOMMENT("NumberOfFixedParms", SecondHalfOfMandatoryField, - NumberOfFixedParms); - EmitComment(); - OutStreamer->emitIntValueInHexWithPadding( - (SecondHalfOfMandatoryField & 0x0000ff00) >> 8, 1); - - // Set the 8th byte of mandatory field. - - // Always set parameter on stack. - SecondHalfOfMandatoryField |= TracebackTable::HasParmsOnStackMask; - - uint32_t NumberOfFPPara = FI->getFloatingPointParamNum(); - SecondHalfOfMandatoryField |= - (NumberOfFPPara << TracebackTable::NumberOfFloatingPointParmsShift) & - TracebackTable::NumberOfFloatingPointParmsMask; - - GENVALUECOMMENT("NumberOfFPParms", SecondHalfOfMandatoryField, - NumberOfFloatingPointParms); - GENBOOLCOMMENT(", ", SecondHalfOfMandatoryField, HasParmsOnStack); - EmitComment(); - OutStreamer->emitIntValueInHexWithPadding(SecondHalfOfMandatoryField & 0xff, - 1); - - // Generate the optional fields of traceback table. - - // Parameter type. - if (NumberOfFixedPara || NumberOfFPPara) { - assert((SecondHalfOfMandatoryField & TracebackTable::HasVectorInfoMask) == - 0 && - "VectorInfo has not been implemented."); - uint32_t ParaType = FI->getParameterType(); - CommentOS << "Parameter type = " - << XCOFF::parseParmsType(ParaType, - NumberOfFixedPara + NumberOfFPPara); - EmitComment(); - OutStreamer->emitIntValueInHexWithPadding(ParaType, sizeof(ParaType)); - } - - // Traceback table offset. - OutStreamer->AddComment("Function size"); - if (FirstHalfOfMandatoryField & TracebackTable::HasTraceBackTableOffsetMask) { - MCSymbol *FuncSectSym = getObjFileLowering().getFunctionEntryPointSymbol( - &(MF->getFunction()), TM); - OutStreamer->emitAbsoluteSymbolDiff(FuncEnd, FuncSectSym, 4); - } - - // Since we unset the Int_Handler. - if (FirstHalfOfMandatoryField & TracebackTable::IsInterruptHandlerMask) - report_fatal_error("Hand_Mask not implement yet"); - - if (FirstHalfOfMandatoryField & TracebackTable::HasControlledStorageMask) - report_fatal_error("Ctl_Info not implement yet"); - - if (FirstHalfOfMandatoryField & TracebackTable::IsFunctionNamePresentMask) { - StringRef Name = MF->getName().substr(0, INT16_MAX); - int16_t NameLength = Name.size(); - CommentOS << "Function name len = " - << static_cast<unsigned int>(NameLength); - EmitCommentAndValue(NameLength, 2); - OutStreamer->AddComment("Function Name"); - OutStreamer->emitBytes(Name); - } - - if (FirstHalfOfMandatoryField & TracebackTable::IsAllocaUsedMask) { - uint8_t AllocReg = XCOFF::AllocRegNo; - OutStreamer->AddComment("AllocaUsed"); - OutStreamer->emitIntValueInHex(AllocReg, sizeof(AllocReg)); - } - - uint8_t ExtensionTableFlag = 0; - if (SecondHalfOfMandatoryField & TracebackTable::HasExtensionTableMask) { - if (ShouldEmitEHBlock) - ExtensionTableFlag |= ExtendedTBTableFlag::TB_EH_INFO; - - CommentOS << "ExtensionTableFlag = " - << getExtendedTBTableFlagString(ExtensionTableFlag); - EmitCommentAndValue(ExtensionTableFlag, sizeof(ExtensionTableFlag)); - } - - if (ExtensionTableFlag & ExtendedTBTableFlag::TB_EH_INFO) { - auto &Ctx = OutStreamer->getContext(); - MCSymbol *EHInfoSym = - TargetLoweringObjectFileXCOFF::getEHInfoTableSymbol(MF); - MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(EHInfoSym); - const MCSymbol *TOCBaseSym = - cast<MCSectionXCOFF>(getObjFileLowering().getTOCBaseSection()) - ->getQualNameSymbol(); - const MCExpr *Exp = - MCBinaryExpr::createSub(MCSymbolRefExpr::create(TOCEntry, Ctx), - MCSymbolRefExpr::create(TOCBaseSym, Ctx), Ctx); - - const DataLayout &DL = getDataLayout(); - OutStreamer->emitValueToAlignment(4); - OutStreamer->AddComment("EHInfo Table"); - OutStreamer->emitValue(Exp, DL.getPointerSize()); - } - -#undef GENBOOLCOMMENT -#undef GENVALUECOMMENT -} - +void PPCAIXAsmPrinter::emitFunctionBodyEnd() { + + if (!TM.getXCOFFTracebackTable()) + return; + + emitTracebackTable(); +} + +void PPCAIXAsmPrinter::emitTracebackTable() { + + // Create a symbol for the end of function. + MCSymbol *FuncEnd = createTempSymbol(MF->getName()); + OutStreamer->emitLabel(FuncEnd); + + OutStreamer->AddComment("Traceback table begin"); + // Begin with a fullword of zero. + OutStreamer->emitIntValueInHexWithPadding(0, 4 /*size*/); + + SmallString<128> CommentString; + raw_svector_ostream CommentOS(CommentString); + + auto EmitComment = [&]() { + OutStreamer->AddComment(CommentOS.str()); + CommentString.clear(); + }; + + auto EmitCommentAndValue = [&](uint64_t Value, int Size) { + EmitComment(); + OutStreamer->emitIntValueInHexWithPadding(Value, Size); + }; + + unsigned int Version = 0; + CommentOS << "Version = " << Version; + EmitCommentAndValue(Version, 1); + + // There is a lack of information in the IR to assist with determining the + // source language. AIX exception handling mechanism would only search for + // personality routine and LSDA area when such language supports exception + // handling. So to be conservatively correct and allow runtime to do its job, + // we need to set it to C++ for now. + TracebackTable::LanguageID LanguageIdentifier = + TracebackTable::CPlusPlus; // C++ + + CommentOS << "Language = " + << getNameForTracebackTableLanguageId(LanguageIdentifier); + EmitCommentAndValue(LanguageIdentifier, 1); + + // This is only populated for the third and fourth bytes. + uint32_t FirstHalfOfMandatoryField = 0; + + // Emit the 3rd byte of the mandatory field. + + // We always set traceback offset bit to true. + FirstHalfOfMandatoryField |= TracebackTable::HasTraceBackTableOffsetMask; + + const PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>(); + const MachineRegisterInfo &MRI = MF->getRegInfo(); + + // Check the function uses floating-point processor instructions or not + for (unsigned Reg = PPC::F0; Reg <= PPC::F31; ++Reg) { + if (MRI.isPhysRegUsed(Reg)) { + FirstHalfOfMandatoryField |= TracebackTable::IsFloatingPointPresentMask; + break; + } + } + +#define GENBOOLCOMMENT(Prefix, V, Field) \ + CommentOS << (Prefix) << ((V) & (TracebackTable::Field##Mask) ? "+" : "-") \ + << #Field + +#define GENVALUECOMMENT(PrefixAndName, V, Field) \ + CommentOS << (PrefixAndName) << " = " \ + << static_cast<unsigned>(((V) & (TracebackTable::Field##Mask)) >> \ + (TracebackTable::Field##Shift)) + + GENBOOLCOMMENT("", FirstHalfOfMandatoryField, IsGlobaLinkage); + GENBOOLCOMMENT(", ", FirstHalfOfMandatoryField, IsOutOfLineEpilogOrPrologue); + EmitComment(); + + GENBOOLCOMMENT("", FirstHalfOfMandatoryField, HasTraceBackTableOffset); + GENBOOLCOMMENT(", ", FirstHalfOfMandatoryField, IsInternalProcedure); + EmitComment(); + + GENBOOLCOMMENT("", FirstHalfOfMandatoryField, HasControlledStorage); + GENBOOLCOMMENT(", ", FirstHalfOfMandatoryField, IsTOCless); + EmitComment(); + + GENBOOLCOMMENT("", FirstHalfOfMandatoryField, IsFloatingPointPresent); + EmitComment(); + GENBOOLCOMMENT("", FirstHalfOfMandatoryField, + IsFloatingPointOperationLogOrAbortEnabled); + EmitComment(); + + OutStreamer->emitIntValueInHexWithPadding( + (FirstHalfOfMandatoryField & 0x0000ff00) >> 8, 1); + + // Set the 4th byte of the mandatory field. + FirstHalfOfMandatoryField |= TracebackTable::IsFunctionNamePresentMask; + + static_assert(XCOFF::AllocRegNo == 31, "Unexpected register usage!"); + if (MRI.isPhysRegUsed(Subtarget->isPPC64() ? PPC::X31 : PPC::R31)) + FirstHalfOfMandatoryField |= TracebackTable::IsAllocaUsedMask; + + const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs(); + if (!MustSaveCRs.empty()) + FirstHalfOfMandatoryField |= TracebackTable::IsCRSavedMask; + + if (FI->mustSaveLR()) + FirstHalfOfMandatoryField |= TracebackTable::IsLRSavedMask; + + GENBOOLCOMMENT("", FirstHalfOfMandatoryField, IsInterruptHandler); + GENBOOLCOMMENT(", ", FirstHalfOfMandatoryField, IsFunctionNamePresent); + GENBOOLCOMMENT(", ", FirstHalfOfMandatoryField, IsAllocaUsed); + EmitComment(); + GENVALUECOMMENT("OnConditionDirective", FirstHalfOfMandatoryField, + OnConditionDirective); + GENBOOLCOMMENT(", ", FirstHalfOfMandatoryField, IsCRSaved); + GENBOOLCOMMENT(", ", FirstHalfOfMandatoryField, IsLRSaved); + EmitComment(); + OutStreamer->emitIntValueInHexWithPadding((FirstHalfOfMandatoryField & 0xff), + 1); + + // Set the 5th byte of mandatory field. + uint32_t SecondHalfOfMandatoryField = 0; + + // Always store back chain. + SecondHalfOfMandatoryField |= TracebackTable::IsBackChainStoredMask; + + uint32_t FPRSaved = 0; + for (unsigned Reg = PPC::F14; Reg <= PPC::F31; ++Reg) { + if (MRI.isPhysRegModified(Reg)) { + FPRSaved = PPC::F31 - Reg + 1; + break; + } + } + SecondHalfOfMandatoryField |= (FPRSaved << TracebackTable::FPRSavedShift) & + TracebackTable::FPRSavedMask; + GENBOOLCOMMENT("", SecondHalfOfMandatoryField, IsBackChainStored); + GENBOOLCOMMENT(", ", SecondHalfOfMandatoryField, IsFixup); + GENVALUECOMMENT(", NumOfFPRsSaved", SecondHalfOfMandatoryField, FPRSaved); + EmitComment(); + OutStreamer->emitIntValueInHexWithPadding( + (SecondHalfOfMandatoryField & 0xff000000) >> 24, 1); + + // Set the 6th byte of mandatory field. + bool ShouldEmitEHBlock = TargetLoweringObjectFileXCOFF::ShouldEmitEHBlock(MF); + if (ShouldEmitEHBlock) + SecondHalfOfMandatoryField |= TracebackTable::HasExtensionTableMask; + + uint32_t GPRSaved = 0; + + // X13 is reserved under 64-bit environment. + unsigned GPRBegin = Subtarget->isPPC64() ? PPC::X14 : PPC::R13; + unsigned GPREnd = Subtarget->isPPC64() ? PPC::X31 : PPC::R31; + + for (unsigned Reg = GPRBegin; Reg <= GPREnd; ++Reg) { + if (MRI.isPhysRegModified(Reg)) { + GPRSaved = GPREnd - Reg + 1; + break; + } + } + + SecondHalfOfMandatoryField |= (GPRSaved << TracebackTable::GPRSavedShift) & + TracebackTable::GPRSavedMask; + + GENBOOLCOMMENT("", SecondHalfOfMandatoryField, HasVectorInfo); + GENBOOLCOMMENT(", ", SecondHalfOfMandatoryField, HasExtensionTable); + GENVALUECOMMENT(", NumOfGPRsSaved", SecondHalfOfMandatoryField, GPRSaved); + EmitComment(); + OutStreamer->emitIntValueInHexWithPadding( + (SecondHalfOfMandatoryField & 0x00ff0000) >> 16, 1); + + // Set the 7th byte of mandatory field. + uint32_t NumberOfFixedPara = FI->getFixedParamNum(); + SecondHalfOfMandatoryField |= + (NumberOfFixedPara << TracebackTable::NumberOfFixedParmsShift) & + TracebackTable::NumberOfFixedParmsMask; + GENVALUECOMMENT("NumberOfFixedParms", SecondHalfOfMandatoryField, + NumberOfFixedParms); + EmitComment(); + OutStreamer->emitIntValueInHexWithPadding( + (SecondHalfOfMandatoryField & 0x0000ff00) >> 8, 1); + + // Set the 8th byte of mandatory field. + + // Always set parameter on stack. + SecondHalfOfMandatoryField |= TracebackTable::HasParmsOnStackMask; + + uint32_t NumberOfFPPara = FI->getFloatingPointParamNum(); + SecondHalfOfMandatoryField |= + (NumberOfFPPara << TracebackTable::NumberOfFloatingPointParmsShift) & + TracebackTable::NumberOfFloatingPointParmsMask; + + GENVALUECOMMENT("NumberOfFPParms", SecondHalfOfMandatoryField, + NumberOfFloatingPointParms); + GENBOOLCOMMENT(", ", SecondHalfOfMandatoryField, HasParmsOnStack); + EmitComment(); + OutStreamer->emitIntValueInHexWithPadding(SecondHalfOfMandatoryField & 0xff, + 1); + + // Generate the optional fields of traceback table. + + // Parameter type. + if (NumberOfFixedPara || NumberOfFPPara) { + assert((SecondHalfOfMandatoryField & TracebackTable::HasVectorInfoMask) == + 0 && + "VectorInfo has not been implemented."); + uint32_t ParaType = FI->getParameterType(); + CommentOS << "Parameter type = " + << XCOFF::parseParmsType(ParaType, + NumberOfFixedPara + NumberOfFPPara); + EmitComment(); + OutStreamer->emitIntValueInHexWithPadding(ParaType, sizeof(ParaType)); + } + + // Traceback table offset. + OutStreamer->AddComment("Function size"); + if (FirstHalfOfMandatoryField & TracebackTable::HasTraceBackTableOffsetMask) { + MCSymbol *FuncSectSym = getObjFileLowering().getFunctionEntryPointSymbol( + &(MF->getFunction()), TM); + OutStreamer->emitAbsoluteSymbolDiff(FuncEnd, FuncSectSym, 4); + } + + // Since we unset the Int_Handler. + if (FirstHalfOfMandatoryField & TracebackTable::IsInterruptHandlerMask) + report_fatal_error("Hand_Mask not implement yet"); + + if (FirstHalfOfMandatoryField & TracebackTable::HasControlledStorageMask) + report_fatal_error("Ctl_Info not implement yet"); + + if (FirstHalfOfMandatoryField & TracebackTable::IsFunctionNamePresentMask) { + StringRef Name = MF->getName().substr(0, INT16_MAX); + int16_t NameLength = Name.size(); + CommentOS << "Function name len = " + << static_cast<unsigned int>(NameLength); + EmitCommentAndValue(NameLength, 2); + OutStreamer->AddComment("Function Name"); + OutStreamer->emitBytes(Name); + } + + if (FirstHalfOfMandatoryField & TracebackTable::IsAllocaUsedMask) { + uint8_t AllocReg = XCOFF::AllocRegNo; + OutStreamer->AddComment("AllocaUsed"); + OutStreamer->emitIntValueInHex(AllocReg, sizeof(AllocReg)); + } + + uint8_t ExtensionTableFlag = 0; + if (SecondHalfOfMandatoryField & TracebackTable::HasExtensionTableMask) { + if (ShouldEmitEHBlock) + ExtensionTableFlag |= ExtendedTBTableFlag::TB_EH_INFO; + + CommentOS << "ExtensionTableFlag = " + << getExtendedTBTableFlagString(ExtensionTableFlag); + EmitCommentAndValue(ExtensionTableFlag, sizeof(ExtensionTableFlag)); + } + + if (ExtensionTableFlag & ExtendedTBTableFlag::TB_EH_INFO) { + auto &Ctx = OutStreamer->getContext(); + MCSymbol *EHInfoSym = + TargetLoweringObjectFileXCOFF::getEHInfoTableSymbol(MF); + MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(EHInfoSym); + const MCSymbol *TOCBaseSym = + cast<MCSectionXCOFF>(getObjFileLowering().getTOCBaseSection()) + ->getQualNameSymbol(); + const MCExpr *Exp = + MCBinaryExpr::createSub(MCSymbolRefExpr::create(TOCEntry, Ctx), + MCSymbolRefExpr::create(TOCBaseSym, Ctx), Ctx); + + const DataLayout &DL = getDataLayout(); + OutStreamer->emitValueToAlignment(4); + OutStreamer->AddComment("EHInfo Table"); + OutStreamer->emitValue(Exp, DL.getPointerSize()); + } + +#undef GENBOOLCOMMENT +#undef GENVALUECOMMENT +} + void PPCAIXAsmPrinter::ValidateGV(const GlobalVariable *GV) { // Early error checking limiting what is supported. if (GV->isThreadLocal()) @@ -2029,18 +2029,18 @@ void PPCAIXAsmPrinter::ValidateGV(const GlobalVariable *GV) { report_fatal_error("COMDAT not yet supported by AIX."); } -static bool isSpecialLLVMGlobalArrayToSkip(const GlobalVariable *GV) { - return GV->hasAppendingLinkage() && - StringSwitch<bool>(GV->getName()) - // TODO: Linker could still eliminate the GV if we just skip - // handling llvm.used array. Skipping them for now until we or the - // AIX OS team come up with a good solution. - .Case("llvm.used", true) - // It's correct to just skip llvm.compiler.used array here. - .Case("llvm.compiler.used", true) - .Default(false); -} - +static bool isSpecialLLVMGlobalArrayToSkip(const GlobalVariable *GV) { + return GV->hasAppendingLinkage() && + StringSwitch<bool>(GV->getName()) + // TODO: Linker could still eliminate the GV if we just skip + // handling llvm.used array. Skipping them for now until we or the + // AIX OS team come up with a good solution. + .Case("llvm.used", true) + // It's correct to just skip llvm.compiler.used array here. + .Case("llvm.compiler.used", true) + .Default(false); +} + static bool isSpecialLLVMGlobalArrayForStaticInit(const GlobalVariable *GV) { return StringSwitch<bool>(GV->getName()) .Cases("llvm.global_ctors", "llvm.global_dtors", true) @@ -2048,12 +2048,12 @@ static bool isSpecialLLVMGlobalArrayForStaticInit(const GlobalVariable *GV) { } void PPCAIXAsmPrinter::emitGlobalVariable(const GlobalVariable *GV) { - // Special LLVM global arrays have been handled at the initialization. - if (isSpecialLLVMGlobalArrayToSkip(GV) || isSpecialLLVMGlobalArrayForStaticInit(GV)) - return; - - assert(!GV->getName().startswith("llvm.") && - "Unhandled intrinsic global variable."); + // Special LLVM global arrays have been handled at the initialization. + if (isSpecialLLVMGlobalArrayToSkip(GV) || isSpecialLLVMGlobalArrayForStaticInit(GV)) + return; + + assert(!GV->getName().startswith("llvm.") && + "Unhandled intrinsic global variable."); ValidateGV(GV); MCSymbolXCOFF *GVSym = cast<MCSymbolXCOFF>(getSymbol(GV)); @@ -2080,12 +2080,12 @@ void PPCAIXAsmPrinter::emitGlobalVariable(const GlobalVariable *GV) { if (GVKind.isCommon() || GVKind.isBSSLocal()) { Align Alignment = GV->getAlign().getValueOr(DL.getPreferredAlign(GV)); uint64_t Size = DL.getTypeAllocSize(GV->getType()->getElementType()); - GVSym->setStorageClass( - TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GV)); + GVSym->setStorageClass( + TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GV)); if (GVKind.isBSSLocal()) OutStreamer->emitXCOFFLocalCommonSymbol( - OutContext.getOrCreateSymbol(GVSym->getSymbolTableName()), Size, + OutContext.getOrCreateSymbol(GVSym->getSymbolTableName()), Size, GVSym, Alignment.value()); else OutStreamer->emitCommonSymbol(GVSym, Size, Alignment.value()); @@ -2095,18 +2095,18 @@ void PPCAIXAsmPrinter::emitGlobalVariable(const GlobalVariable *GV) { MCSymbol *EmittedInitSym = GVSym; emitLinkage(GV, EmittedInitSym); emitAlignment(getGVAlignment(GV, DL), GV); - - // When -fdata-sections is enabled, every GlobalVariable will - // be put into its own csect; therefore, label is not necessary here. - if (!TM.getDataSections() || GV->hasSection()) { - OutStreamer->emitLabel(EmittedInitSym); - } - - // Emit aliasing label for global variable. - llvm::for_each(GOAliasMap[GV], [this](const GlobalAlias *Alias) { - OutStreamer->emitLabel(getSymbol(Alias)); - }); - + + // When -fdata-sections is enabled, every GlobalVariable will + // be put into its own csect; therefore, label is not necessary here. + if (!TM.getDataSections() || GV->hasSection()) { + OutStreamer->emitLabel(EmittedInitSym); + } + + // Emit aliasing label for global variable. + llvm::for_each(GOAliasMap[GV], [this](const GlobalAlias *Alias) { + OutStreamer->emitLabel(getSymbol(Alias)); + }); + emitGlobalConstant(GV->getParent()->getDataLayout(), GV->getInitializer()); } @@ -2118,13 +2118,13 @@ void PPCAIXAsmPrinter::emitFunctionDescriptor() { // Emit function descriptor. OutStreamer->SwitchSection( cast<MCSymbolXCOFF>(CurrentFnDescSym)->getRepresentedCsect()); - - // Emit aliasing label for function descriptor csect. - llvm::for_each(GOAliasMap[&MF->getFunction()], - [this](const GlobalAlias *Alias) { - OutStreamer->emitLabel(getSymbol(Alias)); - }); - + + // Emit aliasing label for function descriptor csect. + llvm::for_each(GOAliasMap[&MF->getFunction()], + [this](const GlobalAlias *Alias) { + OutStreamer->emitLabel(getSymbol(Alias)); + }); + // Emit function entry point address. OutStreamer->emitValue(MCSymbolRefExpr::create(CurrentFnSym, OutContext), PointerSize); @@ -2140,20 +2140,20 @@ void PPCAIXAsmPrinter::emitFunctionDescriptor() { OutStreamer->SwitchSection(Current.first, Current.second); } -void PPCAIXAsmPrinter::emitFunctionEntryLabel() { - // It's not necessary to emit the label when we have individual - // function in its own csect. - if (!TM.getFunctionSections()) - PPCAsmPrinter::emitFunctionEntryLabel(); - - // Emit aliasing label for function entry point label. - llvm::for_each( - GOAliasMap[&MF->getFunction()], [this](const GlobalAlias *Alias) { - OutStreamer->emitLabel( - getObjFileLowering().getFunctionEntryPointSymbol(Alias, TM)); - }); -} - +void PPCAIXAsmPrinter::emitFunctionEntryLabel() { + // It's not necessary to emit the label when we have individual + // function in its own csect. + if (!TM.getFunctionSections()) + PPCAsmPrinter::emitFunctionEntryLabel(); + + // Emit aliasing label for function entry point label. + llvm::for_each( + GOAliasMap[&MF->getFunction()], [this](const GlobalAlias *Alias) { + OutStreamer->emitLabel( + getObjFileLowering().getFunctionEntryPointSymbol(Alias, TM)); + }); +} + void PPCAIXAsmPrinter::emitEndOfAsmFile(Module &M) { // If there are no functions in this module, we will never need to reference // the TOC base. @@ -2169,7 +2169,7 @@ void PPCAIXAsmPrinter::emitEndOfAsmFile(Module &M) { for (auto &I : TOC) { // Setup the csect for the current TC entry. MCSectionXCOFF *TCEntry = cast<MCSectionXCOFF>( - getObjFileLowering().getSectionForTOCEntry(I.first, TM)); + getObjFileLowering().getSectionForTOCEntry(I.first, TM)); OutStreamer->SwitchSection(TCEntry); OutStreamer->emitLabel(I.second); @@ -2198,174 +2198,174 @@ bool PPCAIXAsmPrinter::doInitialization(Module &M) { // We need to know, up front, the alignment of csects for the assembly path, // because once a .csect directive gets emitted, we could not change the // alignment value on it. - for (const auto &G : M.globals()) { - if (isSpecialLLVMGlobalArrayToSkip(&G)) - continue; - - if (isSpecialLLVMGlobalArrayForStaticInit(&G)) { - // Generate a format indicator and a unique module id to be a part of - // the sinit and sterm function names. - if (FormatIndicatorAndUniqueModId.empty()) { - std::string UniqueModuleId = getUniqueModuleId(&M); - if (UniqueModuleId != "") - // TODO: Use source file full path to generate the unique module id - // and add a format indicator as a part of function name in case we - // will support more than one format. - FormatIndicatorAndUniqueModId = "clang_" + UniqueModuleId.substr(1); - else - // Use the Pid and current time as the unique module id when we cannot - // generate one based on a module's strong external symbols. - // FIXME: Adjust the comment accordingly after we use source file full - // path instead. - FormatIndicatorAndUniqueModId = - "clangPidTime_" + llvm::itostr(sys::Process::getProcessId()) + - "_" + llvm::itostr(time(nullptr)); - } - - emitSpecialLLVMGlobal(&G); - continue; - } - + for (const auto &G : M.globals()) { + if (isSpecialLLVMGlobalArrayToSkip(&G)) + continue; + + if (isSpecialLLVMGlobalArrayForStaticInit(&G)) { + // Generate a format indicator and a unique module id to be a part of + // the sinit and sterm function names. + if (FormatIndicatorAndUniqueModId.empty()) { + std::string UniqueModuleId = getUniqueModuleId(&M); + if (UniqueModuleId != "") + // TODO: Use source file full path to generate the unique module id + // and add a format indicator as a part of function name in case we + // will support more than one format. + FormatIndicatorAndUniqueModId = "clang_" + UniqueModuleId.substr(1); + else + // Use the Pid and current time as the unique module id when we cannot + // generate one based on a module's strong external symbols. + // FIXME: Adjust the comment accordingly after we use source file full + // path instead. + FormatIndicatorAndUniqueModId = + "clangPidTime_" + llvm::itostr(sys::Process::getProcessId()) + + "_" + llvm::itostr(time(nullptr)); + } + + emitSpecialLLVMGlobal(&G); + continue; + } + setCsectAlignment(&G); - } + } for (const auto &F : M) setCsectAlignment(&F); - // Construct an aliasing list for each GlobalObject. - for (const auto &Alias : M.aliases()) { - const GlobalObject *Base = Alias.getBaseObject(); - if (!Base) - report_fatal_error( - "alias without a base object is not yet supported on AIX"); - GOAliasMap[Base].push_back(&Alias); - } - + // Construct an aliasing list for each GlobalObject. + for (const auto &Alias : M.aliases()) { + const GlobalObject *Base = Alias.getBaseObject(); + if (!Base) + report_fatal_error( + "alias without a base object is not yet supported on AIX"); + GOAliasMap[Base].push_back(&Alias); + } + return Result; } -void PPCAIXAsmPrinter::emitInstruction(const MachineInstr *MI) { - switch (MI->getOpcode()) { - default: - break; - case PPC::BL8: - case PPC::BL: - case PPC::BL8_NOP: - case PPC::BL_NOP: { - const MachineOperand &MO = MI->getOperand(0); - if (MO.isSymbol()) { - MCSymbolXCOFF *S = - cast<MCSymbolXCOFF>(OutContext.getOrCreateSymbol(MO.getSymbolName())); - ExtSymSDNodeSymbols.insert(S); - } - } break; - case PPC::BL_TLS: - case PPC::BL8_TLS: - case PPC::BL8_TLS_: - case PPC::BL8_NOP_TLS: - report_fatal_error("TLS call not yet implemented"); - case PPC::TAILB: - case PPC::TAILB8: - case PPC::TAILBA: - case PPC::TAILBA8: - case PPC::TAILBCTR: - case PPC::TAILBCTR8: - if (MI->getOperand(0).isSymbol()) - report_fatal_error("Tail call for extern symbol not yet supported."); - break; - } - return PPCAsmPrinter::emitInstruction(MI); -} - -bool PPCAIXAsmPrinter::doFinalization(Module &M) { - for (MCSymbol *Sym : ExtSymSDNodeSymbols) - OutStreamer->emitSymbolAttribute(Sym, MCSA_Extern); - return PPCAsmPrinter::doFinalization(M); -} - -static unsigned mapToSinitPriority(int P) { - if (P < 0 || P > 65535) - report_fatal_error("invalid init priority"); - - if (P <= 20) - return P; - - if (P < 81) - return 20 + (P - 20) * 16; - - if (P <= 1124) - return 1004 + (P - 81); - - if (P < 64512) - return 2047 + (P - 1124) * 33878; - - return 2147482625u + (P - 64512); -} - -static std::string convertToSinitPriority(int Priority) { - // This helper function converts clang init priority to values used in sinit - // and sterm functions. - // - // The conversion strategies are: - // We map the reserved clang/gnu priority range [0, 100] into the sinit/sterm - // reserved priority range [0, 1023] by - // - directly mapping the first 21 and the last 20 elements of the ranges - // - linear interpolating the intermediate values with a step size of 16. - // - // We map the non reserved clang/gnu priority range of [101, 65535] into the - // sinit/sterm priority range [1024, 2147483648] by: - // - directly mapping the first and the last 1024 elements of the ranges - // - linear interpolating the intermediate values with a step size of 33878. - unsigned int P = mapToSinitPriority(Priority); - - std::string PrioritySuffix; - llvm::raw_string_ostream os(PrioritySuffix); - os << llvm::format_hex_no_prefix(P, 8); - os.flush(); - return PrioritySuffix; -} - -void PPCAIXAsmPrinter::emitXXStructorList(const DataLayout &DL, - const Constant *List, bool IsCtor) { - SmallVector<Structor, 8> Structors; - preprocessXXStructorList(DL, List, Structors); - if (Structors.empty()) - return; - - unsigned Index = 0; - for (Structor &S : Structors) { - if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(S.Func)) - S.Func = CE->getOperand(0); - - llvm::GlobalAlias::create( - GlobalValue::ExternalLinkage, - (IsCtor ? llvm::Twine("__sinit") : llvm::Twine("__sterm")) + - llvm::Twine(convertToSinitPriority(S.Priority)) + - llvm::Twine("_", FormatIndicatorAndUniqueModId) + - llvm::Twine("_", llvm::utostr(Index++)), - cast<Function>(S.Func)); - } -} - -void PPCAIXAsmPrinter::emitTTypeReference(const GlobalValue *GV, - unsigned Encoding) { - if (GV) { - MCSymbol *TypeInfoSym = TM.getSymbol(GV); - MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(TypeInfoSym); - const MCSymbol *TOCBaseSym = - cast<MCSectionXCOFF>(getObjFileLowering().getTOCBaseSection()) - ->getQualNameSymbol(); - auto &Ctx = OutStreamer->getContext(); - const MCExpr *Exp = - MCBinaryExpr::createSub(MCSymbolRefExpr::create(TOCEntry, Ctx), - MCSymbolRefExpr::create(TOCBaseSym, Ctx), Ctx); - OutStreamer->emitValue(Exp, GetSizeOfEncodedValue(Encoding)); - } else - OutStreamer->emitIntValue(0, GetSizeOfEncodedValue(Encoding)); -} - -// Return a pass that prints the PPC assembly code for a MachineFunction to the -// given output stream. +void PPCAIXAsmPrinter::emitInstruction(const MachineInstr *MI) { + switch (MI->getOpcode()) { + default: + break; + case PPC::BL8: + case PPC::BL: + case PPC::BL8_NOP: + case PPC::BL_NOP: { + const MachineOperand &MO = MI->getOperand(0); + if (MO.isSymbol()) { + MCSymbolXCOFF *S = + cast<MCSymbolXCOFF>(OutContext.getOrCreateSymbol(MO.getSymbolName())); + ExtSymSDNodeSymbols.insert(S); + } + } break; + case PPC::BL_TLS: + case PPC::BL8_TLS: + case PPC::BL8_TLS_: + case PPC::BL8_NOP_TLS: + report_fatal_error("TLS call not yet implemented"); + case PPC::TAILB: + case PPC::TAILB8: + case PPC::TAILBA: + case PPC::TAILBA8: + case PPC::TAILBCTR: + case PPC::TAILBCTR8: + if (MI->getOperand(0).isSymbol()) + report_fatal_error("Tail call for extern symbol not yet supported."); + break; + } + return PPCAsmPrinter::emitInstruction(MI); +} + +bool PPCAIXAsmPrinter::doFinalization(Module &M) { + for (MCSymbol *Sym : ExtSymSDNodeSymbols) + OutStreamer->emitSymbolAttribute(Sym, MCSA_Extern); + return PPCAsmPrinter::doFinalization(M); +} + +static unsigned mapToSinitPriority(int P) { + if (P < 0 || P > 65535) + report_fatal_error("invalid init priority"); + + if (P <= 20) + return P; + + if (P < 81) + return 20 + (P - 20) * 16; + + if (P <= 1124) + return 1004 + (P - 81); + + if (P < 64512) + return 2047 + (P - 1124) * 33878; + + return 2147482625u + (P - 64512); +} + +static std::string convertToSinitPriority(int Priority) { + // This helper function converts clang init priority to values used in sinit + // and sterm functions. + // + // The conversion strategies are: + // We map the reserved clang/gnu priority range [0, 100] into the sinit/sterm + // reserved priority range [0, 1023] by + // - directly mapping the first 21 and the last 20 elements of the ranges + // - linear interpolating the intermediate values with a step size of 16. + // + // We map the non reserved clang/gnu priority range of [101, 65535] into the + // sinit/sterm priority range [1024, 2147483648] by: + // - directly mapping the first and the last 1024 elements of the ranges + // - linear interpolating the intermediate values with a step size of 33878. + unsigned int P = mapToSinitPriority(Priority); + + std::string PrioritySuffix; + llvm::raw_string_ostream os(PrioritySuffix); + os << llvm::format_hex_no_prefix(P, 8); + os.flush(); + return PrioritySuffix; +} + +void PPCAIXAsmPrinter::emitXXStructorList(const DataLayout &DL, + const Constant *List, bool IsCtor) { + SmallVector<Structor, 8> Structors; + preprocessXXStructorList(DL, List, Structors); + if (Structors.empty()) + return; + + unsigned Index = 0; + for (Structor &S : Structors) { + if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(S.Func)) + S.Func = CE->getOperand(0); + + llvm::GlobalAlias::create( + GlobalValue::ExternalLinkage, + (IsCtor ? llvm::Twine("__sinit") : llvm::Twine("__sterm")) + + llvm::Twine(convertToSinitPriority(S.Priority)) + + llvm::Twine("_", FormatIndicatorAndUniqueModId) + + llvm::Twine("_", llvm::utostr(Index++)), + cast<Function>(S.Func)); + } +} + +void PPCAIXAsmPrinter::emitTTypeReference(const GlobalValue *GV, + unsigned Encoding) { + if (GV) { + MCSymbol *TypeInfoSym = TM.getSymbol(GV); + MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(TypeInfoSym); + const MCSymbol *TOCBaseSym = + cast<MCSectionXCOFF>(getObjFileLowering().getTOCBaseSection()) + ->getQualNameSymbol(); + auto &Ctx = OutStreamer->getContext(); + const MCExpr *Exp = + MCBinaryExpr::createSub(MCSymbolRefExpr::create(TOCEntry, Ctx), + MCSymbolRefExpr::create(TOCBaseSym, Ctx), Ctx); + OutStreamer->emitValue(Exp, GetSizeOfEncodedValue(Encoding)); + } else + OutStreamer->emitIntValue(0, GetSizeOfEncodedValue(Encoding)); +} + +// Return a pass that prints the PPC assembly code for a MachineFunction to the +// given output stream. static AsmPrinter * createPPCAsmPrinterPass(TargetMachine &tm, std::unique_ptr<MCStreamer> &&Streamer) { @@ -2379,8 +2379,8 @@ createPPCAsmPrinterPass(TargetMachine &tm, extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCAsmPrinter() { TargetRegistry::RegisterAsmPrinter(getThePPC32Target(), createPPCAsmPrinterPass); - TargetRegistry::RegisterAsmPrinter(getThePPC32LETarget(), - createPPCAsmPrinterPass); + TargetRegistry::RegisterAsmPrinter(getThePPC32LETarget(), + createPPCAsmPrinterPass); TargetRegistry::RegisterAsmPrinter(getThePPC64Target(), createPPCAsmPrinterPass); TargetRegistry::RegisterAsmPrinter(getThePPC64LETarget(), diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/PPCBoolRetToInt.cpp b/contrib/libs/llvm12/lib/Target/PowerPC/PPCBoolRetToInt.cpp index 1f83428533..3c6b1f84b8 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/PPCBoolRetToInt.cpp +++ b/contrib/libs/llvm12/lib/Target/PowerPC/PPCBoolRetToInt.cpp @@ -59,7 +59,7 @@ using namespace llvm; namespace { -#define DEBUG_TYPE "ppc-bool-ret-to-int" +#define DEBUG_TYPE "ppc-bool-ret-to-int" STATISTIC(NumBoolRetPromotion, "Number of times a bool feeding a RetInst was promoted to an int"); @@ -75,7 +75,7 @@ class PPCBoolRetToInt : public FunctionPass { WorkList.push_back(V); Defs.insert(V); while (!WorkList.empty()) { - Value *Curr = WorkList.pop_back_val(); + Value *Curr = WorkList.pop_back_val(); auto *CurrUser = dyn_cast<User>(Curr); // Operands of CallInst/Constant are skipped because they may not be Bool // type. For CallInst, their positions are defined by ABI. @@ -282,8 +282,8 @@ private: } // end anonymous namespace char PPCBoolRetToInt::ID = 0; -INITIALIZE_PASS(PPCBoolRetToInt, "ppc-bool-ret-to-int", - "Convert i1 constants to i32/i64 if they are returned", false, - false) +INITIALIZE_PASS(PPCBoolRetToInt, "ppc-bool-ret-to-int", + "Convert i1 constants to i32/i64 if they are returned", false, + false) FunctionPass *llvm::createPPCBoolRetToIntPass() { return new PPCBoolRetToInt(); } diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/PPCCCState.cpp b/contrib/libs/llvm12/lib/Target/PowerPC/PPCCCState.cpp index 144ef15b7c..79ffc6627a 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/PPCCCState.cpp +++ b/contrib/libs/llvm12/lib/Target/PowerPC/PPCCCState.cpp @@ -32,4 +32,4 @@ void PPCCCState::PreAnalyzeFormalArguments( OriginalArgWasPPCF128.push_back(false); } } -} +} diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/PPCCTRLoops.cpp b/contrib/libs/llvm12/lib/Target/PowerPC/PPCCTRLoops.cpp index b8b13c6dde..b9518d6d70 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/contrib/libs/llvm12/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -1,4 +1,4 @@ -//===-- PPCCTRLoops.cpp - Verify CTR loops -----------------===// +//===-- PPCCTRLoops.cpp - Verify CTR loops -----------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,45 +6,45 @@ // //===----------------------------------------------------------------------===// // -// This pass verifies that all bdnz/bdz instructions are dominated by a loop -// mtctr before any other instructions that might clobber the ctr register. +// This pass verifies that all bdnz/bdz instructions are dominated by a loop +// mtctr before any other instructions that might clobber the ctr register. // //===----------------------------------------------------------------------===// -// CTR loops are produced by the HardwareLoops pass and this pass is simply a -// verification that no invalid CTR loops are produced. As such, it isn't -// something that needs to be run (or even defined) for Release builds so the -// entire file is guarded by NDEBUG. -#ifndef NDEBUG -#include <vector> - -#include "MCTargetDesc/PPCMCTargetDesc.h" +// CTR loops are produced by the HardwareLoops pass and this pass is simply a +// verification that no invalid CTR loops are produced. As such, it isn't +// something that needs to be run (or even defined) for Release builds so the +// entire file is guarded by NDEBUG. +#ifndef NDEBUG +#include <vector> + +#include "MCTargetDesc/PPCMCTargetDesc.h" #include "PPC.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/ilist_iterator.h" -#include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineInstrBundleIterator.h" -#include "llvm/CodeGen/MachineOperand.h" -#include "llvm/CodeGen/Register.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/ilist_iterator.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBundleIterator.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/Register.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" -#include "llvm/PassRegistry.h" -#include "llvm/Support/CodeGen.h" +#include "llvm/PassRegistry.h" +#include "llvm/Support/CodeGen.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/GenericDomTreeConstruction.h" -#include "llvm/Support/Printable.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/GenericDomTreeConstruction.h" +#include "llvm/Support/Printable.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; -#define DEBUG_TYPE "ppc-ctrloops-verify" +#define DEBUG_TYPE "ppc-ctrloops-verify" namespace { @@ -148,7 +148,7 @@ queue_preds: return false; } - append_range(Preds, MBB->predecessors()); + append_range(Preds, MBB->predecessors()); } do { diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/PPCCallingConv.td b/contrib/libs/llvm12/lib/Target/PowerPC/PPCCallingConv.td index 095e6e6e6a..cc34867181 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/PPCCallingConv.td +++ b/contrib/libs/llvm12/lib/Target/PowerPC/PPCCallingConv.td @@ -59,7 +59,7 @@ def RetCC_PPC_Cold : CallingConv<[ CCIfType<[f32], CCAssignToReg<[F1]>>, CCIfType<[f64], CCAssignToReg<[F1]>>, - CCIfType<[f128], CCIfSubtarget<"hasAltivec()", CCAssignToReg<[V2]>>>, + CCIfType<[f128], CCIfSubtarget<"hasAltivec()", CCAssignToReg<[V2]>>>, CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32, v2f64], CCIfSubtarget<"hasAltivec()", @@ -92,7 +92,7 @@ def RetCC_PPC : CallingConv<[ // For P9, f128 are passed in vector registers. CCIfType<[f128], - CCIfSubtarget<"hasAltivec()", + CCIfSubtarget<"hasAltivec()", CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>, // Vector types returned as "direct" go into V2 .. V9; note that only the @@ -149,7 +149,7 @@ def RetCC_PPC64_ELF_FIS : CallingConv<[ CCIfType<[f32], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>, CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>, CCIfType<[f128], - CCIfSubtarget<"hasAltivec()", + CCIfSubtarget<"hasAltivec()", CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>, CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32, v2f64], CCIfSubtarget<"hasAltivec()", @@ -216,7 +216,7 @@ def CC_PPC32_SVR4_Common : CallingConv<[ // Vectors and float128 get 16-byte stack slots that are 16-byte aligned. CCIfType<[v16i8, v8i16, v4i32, v4f32, v2f64, v2i64], CCAssignToStack<16, 16>>, - CCIfType<[f128], CCIfSubtarget<"hasAltivec()", CCAssignToStack<16, 16>>> + CCIfType<[f128], CCIfSubtarget<"hasAltivec()", CCAssignToStack<16, 16>>> ]>; // This calling convention puts vector arguments always on the stack. It is used @@ -238,7 +238,7 @@ def CC_PPC32_SVR4 : CallingConv<[ // Float128 types treated as vector arguments. CCIfType<[f128], - CCIfSubtarget<"hasAltivec()", CCAssignToReg<[V2, V3, V4, V5, V6, V7, + CCIfSubtarget<"hasAltivec()", CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13]>>>, CCDelegateTo<CC_PPC32_SVR4_Common> @@ -291,8 +291,8 @@ def CSR_AIX32 : CalleeSavedRegs<(add R13, R14, R15, R16, R17, R18, R19, R20, F27, F28, F29, F30, F31, CR2, CR3, CR4 )>; -def CSR_AIX32_Altivec : CalleeSavedRegs<(add CSR_AIX32, CSR_Altivec)>; - +def CSR_AIX32_Altivec : CalleeSavedRegs<(add CSR_AIX32, CSR_Altivec)>; + // Common CalleeSavedRegs for SVR4 and AIX. def CSR_PPC64 : CalleeSavedRegs<(add X14, X15, X16, X17, X18, X19, X20, X21, X22, X23, X24, X25, X26, X27, X28, diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/PPCEarlyReturn.cpp b/contrib/libs/llvm12/lib/Target/PowerPC/PPCEarlyReturn.cpp index f3f0b013a2..08b7bdb3ac 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/PPCEarlyReturn.cpp +++ b/contrib/libs/llvm12/lib/Target/PowerPC/PPCEarlyReturn.cpp @@ -77,9 +77,9 @@ protected: if (J->getOperand(0).getMBB() == &ReturnMBB) { // This is an unconditional branch to the return. Replace the // branch with a blr. - MachineInstr *MI = ReturnMBB.getParent()->CloneMachineInstr(&*I); - (*PI)->insert(J, MI); - + MachineInstr *MI = ReturnMBB.getParent()->CloneMachineInstr(&*I); + (*PI)->insert(J, MI); + MachineBasicBlock::iterator K = J--; K->eraseFromParent(); BlockChanged = true; @@ -90,13 +90,13 @@ protected: if (J->getOperand(2).getMBB() == &ReturnMBB) { // This is a conditional branch to the return. Replace the branch // with a bclr. - MachineInstr *MI = ReturnMBB.getParent()->CloneMachineInstr(&*I); - MI->setDesc(TII->get(PPC::BCCLR)); - MachineInstrBuilder(*ReturnMBB.getParent(), MI) + MachineInstr *MI = ReturnMBB.getParent()->CloneMachineInstr(&*I); + MI->setDesc(TII->get(PPC::BCCLR)); + MachineInstrBuilder(*ReturnMBB.getParent(), MI) .add(J->getOperand(0)) - .add(J->getOperand(1)); - (*PI)->insert(J, MI); - + .add(J->getOperand(1)); + (*PI)->insert(J, MI); + MachineBasicBlock::iterator K = J--; K->eraseFromParent(); BlockChanged = true; @@ -107,13 +107,13 @@ protected: if (J->getOperand(1).getMBB() == &ReturnMBB) { // This is a conditional branch to the return. Replace the branch // with a bclr. - MachineInstr *MI = ReturnMBB.getParent()->CloneMachineInstr(&*I); - MI->setDesc( - TII->get(J->getOpcode() == PPC::BC ? PPC::BCLR : PPC::BCLRn)); - MachineInstrBuilder(*ReturnMBB.getParent(), MI) - .add(J->getOperand(0)); - (*PI)->insert(J, MI); - + MachineInstr *MI = ReturnMBB.getParent()->CloneMachineInstr(&*I); + MI->setDesc( + TII->get(J->getOpcode() == PPC::BC ? PPC::BCLR : PPC::BCLRn)); + MachineInstrBuilder(*ReturnMBB.getParent(), MI) + .add(J->getOperand(0)); + (*PI)->insert(J, MI); + MachineBasicBlock::iterator K = J--; K->eraseFromParent(); BlockChanged = true; diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/PPCFastISel.cpp b/contrib/libs/llvm12/lib/Target/PowerPC/PPCFastISel.cpp index f944ce1dbf..c181816e31 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/PPCFastISel.cpp +++ b/contrib/libs/llvm12/lib/Target/PowerPC/PPCFastISel.cpp @@ -1565,10 +1565,10 @@ bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) { if (IsVarArg) return false; - // If this is a PC-Rel function, let SDISel handle the call. - if (Subtarget->isUsingPCRelativeCalls()) - return false; - + // If this is a PC-Rel function, let SDISel handle the call. + if (Subtarget->isUsingPCRelativeCalls()) + return false; + // Handle simple calls for now, with legal return types and // those that can be extended. Type *RetTy = CLI.RetTy; @@ -1624,10 +1624,10 @@ bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) { if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8) return false; - // FIXME: FastISel cannot handle non-simple types yet, including 128-bit FP - // types, which is passed through vector register. Skip these types and - // fallback to default SelectionDAG based selection. - if (ArgVT.isVector() || ArgVT == MVT::f128) + // FIXME: FastISel cannot handle non-simple types yet, including 128-bit FP + // types, which is passed through vector register. Skip these types and + // fallback to default SelectionDAG based selection. + if (ArgVT.isVector() || ArgVT == MVT::f128) return false; unsigned Arg = getRegForValue(ArgValue); @@ -1996,10 +1996,10 @@ bool PPCFastISel::fastSelectInstruction(const Instruction *I) { // Materialize a floating-point constant into a register, and return // the register number (or zero if we failed to handle it). unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) { - // If this is a PC-Rel function, let SDISel handle constant pool. - if (Subtarget->isUsingPCRelativeCalls()) - return false; - + // If this is a PC-Rel function, let SDISel handle constant pool. + if (Subtarget->isUsingPCRelativeCalls()) + return false; + // No plans to handle long double here. if (VT != MVT::f32 && VT != MVT::f64) return 0; @@ -2064,10 +2064,10 @@ unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) { // Materialize the address of a global value into a register, and return // the register number (or zero if we failed to handle it). unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) { - // If this is a PC-Rel function, let SDISel handle GV materialization. - if (Subtarget->isUsingPCRelativeCalls()) - return false; - + // If this is a PC-Rel function, let SDISel handle GV materialization. + if (Subtarget->isUsingPCRelativeCalls()) + return false; + assert(VT == MVT::i64 && "Non-address!"); const TargetRegisterClass *RC = &PPC::G8RC_and_G8RC_NOX0RegClass; unsigned DestReg = createResultReg(RC); diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/PPCFrameLowering.cpp b/contrib/libs/llvm12/lib/Target/PowerPC/PPCFrameLowering.cpp index 86ae5f5b9e..16536bf23d 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/contrib/libs/llvm12/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -218,14 +218,14 @@ const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots( CALLEE_SAVED_VRS }; - static const SpillSlot AIXOffsets32[] = {CALLEE_SAVED_FPRS, - CALLEE_SAVED_GPRS32, - // Add AIX's extra CSR. - {PPC::R13, -76}, - CALLEE_SAVED_VRS}; + static const SpillSlot AIXOffsets32[] = {CALLEE_SAVED_FPRS, + CALLEE_SAVED_GPRS32, + // Add AIX's extra CSR. + {PPC::R13, -76}, + CALLEE_SAVED_VRS}; static const SpillSlot AIXOffsets64[] = { - CALLEE_SAVED_FPRS, CALLEE_SAVED_GPRS64, CALLEE_SAVED_VRS}; + CALLEE_SAVED_FPRS, CALLEE_SAVED_GPRS64, CALLEE_SAVED_VRS}; if (Subtarget.is64BitELFABI()) { NumEntries = array_lengthof(ELFOffsets64); @@ -318,7 +318,7 @@ PPCFrameLowering::determineFrameLayout(const MachineFunction &MF, !FI->mustSaveTOC() && // No need to save TOC. !RegInfo->hasBasePointer(MF); // No special alignment. - // Note: for PPC32 SVR4ABI, we can still generate stackless + // Note: for PPC32 SVR4ABI, we can still generate stackless // code if all local vars are reg-allocated. bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize(); @@ -375,10 +375,10 @@ bool PPCFrameLowering::needsFP(const MachineFunction &MF) const { return false; return MF.getTarget().Options.DisableFramePointerElim(MF) || - MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() || - MF.exposesReturnsTwice() || - (MF.getTarget().Options.GuaranteedTailCallOpt && - MF.getInfo<PPCFunctionInfo>()->hasFastCall()); + MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() || + MF.exposesReturnsTwice() || + (MF.getTarget().Options.GuaranteedTailCallOpt && + MF.getInfo<PPCFunctionInfo>()->hasFastCall()); } void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const { @@ -526,8 +526,8 @@ PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB, // register is available, we can adjust for that by not overlapping the spill // code. However, if we need to realign the stack (i.e. have a base pointer) // and the stack frame is large, we need two scratch registers. -// Also, stack probe requires two scratch registers, one for old sp, one for -// large frame and large probe size. +// Also, stack probe requires two scratch registers, one for old sp, one for +// large frame and large probe size. bool PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const { const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); @@ -539,10 +539,10 @@ PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const { MachineFrameInfo &MFI = MF.getFrameInfo(); Align MaxAlign = MFI.getMaxAlign(); bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); - const PPCTargetLowering &TLI = *Subtarget.getTargetLowering(); + const PPCTargetLowering &TLI = *Subtarget.getTargetLowering(); - return ((IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1) || - TLI.hasInlineStackProbe(MF); + return ((IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1) || + TLI.hasInlineStackProbe(MF); } bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const { @@ -585,8 +585,8 @@ bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const { // Frame pointers and base pointers complicate matters so don't do anything // if we have them. For example having a frame pointer will sometimes require // a copy of r1 into r31 and that makes keeping track of updates to r1 more - // difficult. Similar situation exists with setjmp. - if (hasFP(MF) || RegInfo->hasBasePointer(MF) || MF.exposesReturnsTwice()) + // difficult. Similar situation exists with setjmp. + if (hasFP(MF) || RegInfo->hasBasePointer(MF) || MF.exposesReturnsTwice()) return false; // Calls to fast_cc functions use different rules for passing parameters on @@ -621,7 +621,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, // Get the ABI. bool isSVR4ABI = Subtarget.isSVR4ABI(); bool isELFv2ABI = Subtarget.isELFv2ABI(); - assert((isSVR4ABI || Subtarget.isAIXABI()) && "Unsupported PPC ABI."); + assert((isSVR4ABI || Subtarget.isAIXABI()) && "Unsupported PPC ABI."); // Work out frame sizes. unsigned FrameSize = determineFrameLayoutAndUpdate(MF); @@ -682,7 +682,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, // Using the same bool variable as below to suppress compiler warnings. bool SingleScratchReg = findScratchRegister( - &MBB, false, twoUniqueScratchRegsRequired(&MBB), &ScratchReg, &TempReg); + &MBB, false, twoUniqueScratchRegsRequired(&MBB), &ScratchReg, &TempReg); assert(SingleScratchReg && "Required number of registers not available in this block"); @@ -692,18 +692,18 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, int FPOffset = 0; if (HasFP) { - MachineFrameInfo &MFI = MF.getFrameInfo(); - int FPIndex = FI->getFramePointerSaveIndex(); - assert(FPIndex && "No Frame Pointer Save Slot!"); - FPOffset = MFI.getObjectOffset(FPIndex); + MachineFrameInfo &MFI = MF.getFrameInfo(); + int FPIndex = FI->getFramePointerSaveIndex(); + assert(FPIndex && "No Frame Pointer Save Slot!"); + FPOffset = MFI.getObjectOffset(FPIndex); } int BPOffset = 0; if (HasBP) { - MachineFrameInfo &MFI = MF.getFrameInfo(); - int BPIndex = FI->getBasePointerSaveIndex(); - assert(BPIndex && "No Base Pointer Save Slot!"); - BPOffset = MFI.getObjectOffset(BPIndex); + MachineFrameInfo &MFI = MF.getFrameInfo(); + int BPIndex = FI->getBasePointerSaveIndex(); + assert(BPIndex && "No Base Pointer Save Slot!"); + BPOffset = MFI.getObjectOffset(BPIndex); } int PBPOffset = 0; @@ -859,15 +859,15 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, BuildMI(MBB, MBBI, dl, TII.get(isPPC64 ? PPC::PROBED_STACKALLOC_64 : PPC::PROBED_STACKALLOC_32)) - .addDef(TempReg) - .addDef(ScratchReg) // ScratchReg stores the old sp. + .addDef(TempReg) + .addDef(ScratchReg) // ScratchReg stores the old sp. .addImm(NegFrameSize); // FIXME: HasSTUX is only read if HasRedZone is not set, in such case, we // update the ScratchReg to meet the assumption that ScratchReg contains // the NegFrameSize. This solution is rather tricky. if (!HasRedZone) { BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg) - .addReg(ScratchReg) + .addReg(ScratchReg) .addReg(SPReg); HasSTUX = true; } @@ -1202,12 +1202,12 @@ void PPCFrameLowering::inlineStackProbe(MachineFunction &MF, if (StackAllocMIPos == PrologMBB.end()) return; const BasicBlock *ProbedBB = PrologMBB.getBasicBlock(); - MachineBasicBlock *CurrentMBB = &PrologMBB; + MachineBasicBlock *CurrentMBB = &PrologMBB; DebugLoc DL = PrologMBB.findDebugLoc(StackAllocMIPos); MachineInstr &MI = *StackAllocMIPos; int64_t NegFrameSize = MI.getOperand(2).getImm(); - unsigned ProbeSize = TLI.getStackProbeSize(MF); - int64_t NegProbeSize = -(int64_t)ProbeSize; + unsigned ProbeSize = TLI.getStackProbeSize(MF); + int64_t NegProbeSize = -(int64_t)ProbeSize; assert(isInt<32>(NegProbeSize) && "Unhandled probe size"); int64_t NumBlocks = NegFrameSize / NegProbeSize; int64_t NegResidualSize = NegFrameSize % NegProbeSize; @@ -1216,9 +1216,9 @@ void PPCFrameLowering::inlineStackProbe(MachineFunction &MF, Register FPReg = MI.getOperand(1).getReg(); const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); bool HasBP = RegInfo->hasBasePointer(MF); - Register BPReg = RegInfo->getBaseRegister(MF); + Register BPReg = RegInfo->getBaseRegister(MF); Align MaxAlign = MFI.getMaxAlign(); - bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); + bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); const MCInstrDesc &CopyInst = TII.get(isPPC64 ? PPC::OR8 : PPC::OR); // Subroutines to generate .cfi_* directives. auto buildDefCFAReg = [&](MachineBasicBlock &MBB, @@ -1259,233 +1259,233 @@ void PPCFrameLowering::inlineStackProbe(MachineFunction &MF, // Subroutine to store frame pointer and decrease stack pointer by probe size. auto allocateAndProbe = [&](MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, int64_t NegSize, - Register NegSizeReg, bool UseDForm, - Register StoreReg) { + Register NegSizeReg, bool UseDForm, + Register StoreReg) { if (UseDForm) BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDU : PPC::STWU), SPReg) - .addReg(StoreReg) + .addReg(StoreReg) .addImm(NegSize) .addReg(SPReg); else BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg) - .addReg(StoreReg) + .addReg(StoreReg) .addReg(SPReg) .addReg(NegSizeReg); }; - // Used to probe stack when realignment is required. - // Note that, according to ABI's requirement, *sp must always equals the - // value of back-chain pointer, only st(w|d)u(x) can be used to update sp. - // Following is pseudo code: - // final_sp = (sp & align) + negframesize; - // neg_gap = final_sp - sp; - // while (neg_gap < negprobesize) { - // stdu fp, negprobesize(sp); - // neg_gap -= negprobesize; - // } - // stdux fp, sp, neg_gap - // - // When HasBP & HasRedzone, back-chain pointer is already saved in BPReg - // before probe code, we don't need to save it, so we get one additional reg - // that can be used to materialize the probeside if needed to use xform. - // Otherwise, we can NOT materialize probeside, so we can only use Dform for - // now. - // - // The allocations are: - // if (HasBP && HasRedzone) { - // r0: materialize the probesize if needed so that we can use xform. - // r12: `neg_gap` - // } else { - // r0: back-chain pointer - // r12: `neg_gap`. - // } - auto probeRealignedStack = [&](MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - Register ScratchReg, Register TempReg) { - assert(HasBP && "The function is supposed to have base pointer when its " - "stack is realigned."); - assert(isPowerOf2_64(ProbeSize) && "Probe size should be power of 2"); - - // FIXME: We can eliminate this limitation if we get more infomation about - // which part of redzone are already used. Used redzone can be treated - // probed. But there might be `holes' in redzone probed, this could - // complicate the implementation. - assert(ProbeSize >= Subtarget.getRedZoneSize() && - "Probe size should be larger or equal to the size of red-zone so " - "that red-zone is not clobbered by probing."); - - Register &FinalStackPtr = TempReg; - // FIXME: We only support NegProbeSize materializable by DForm currently. - // When HasBP && HasRedzone, we can use xform if we have an additional idle - // register. - NegProbeSize = std::max(NegProbeSize, -((int64_t)1 << 15)); - assert(isInt<16>(NegProbeSize) && - "NegProbeSize should be materializable by DForm"); - Register CRReg = PPC::CR0; - // Layout of output assembly kinda like: - // bb.0: - // ... - // sub $scratchreg, $finalsp, r1 - // cmpdi $scratchreg, <negprobesize> - // bge bb.2 - // bb.1: - // stdu <backchain>, <negprobesize>(r1) - // sub $scratchreg, $scratchreg, negprobesize - // cmpdi $scratchreg, <negprobesize> - // blt bb.1 - // bb.2: - // stdux <backchain>, r1, $scratchreg - MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator()); - MachineBasicBlock *ProbeLoopBodyMBB = MF.CreateMachineBasicBlock(ProbedBB); - MF.insert(MBBInsertPoint, ProbeLoopBodyMBB); - MachineBasicBlock *ProbeExitMBB = MF.CreateMachineBasicBlock(ProbedBB); - MF.insert(MBBInsertPoint, ProbeExitMBB); - // bb.2 - { - Register BackChainPointer = HasRedZone ? BPReg : TempReg; - allocateAndProbe(*ProbeExitMBB, ProbeExitMBB->end(), 0, ScratchReg, false, - BackChainPointer); - if (HasRedZone) - // PROBED_STACKALLOC_64 assumes Operand(1) stores the old sp, copy BPReg - // to TempReg to satisfy it. - BuildMI(*ProbeExitMBB, ProbeExitMBB->end(), DL, CopyInst, TempReg) - .addReg(BPReg) - .addReg(BPReg); - ProbeExitMBB->splice(ProbeExitMBB->end(), &MBB, MBBI, MBB.end()); - ProbeExitMBB->transferSuccessorsAndUpdatePHIs(&MBB); - } - // bb.0 - { - BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), ScratchReg) - .addReg(SPReg) - .addReg(FinalStackPtr); - if (!HasRedZone) - BuildMI(&MBB, DL, CopyInst, TempReg).addReg(SPReg).addReg(SPReg); - BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI), CRReg) - .addReg(ScratchReg) - .addImm(NegProbeSize); - BuildMI(&MBB, DL, TII.get(PPC::BCC)) - .addImm(PPC::PRED_GE) - .addReg(CRReg) - .addMBB(ProbeExitMBB); - MBB.addSuccessor(ProbeLoopBodyMBB); - MBB.addSuccessor(ProbeExitMBB); - } - // bb.1 - { - Register BackChainPointer = HasRedZone ? BPReg : TempReg; - allocateAndProbe(*ProbeLoopBodyMBB, ProbeLoopBodyMBB->end(), NegProbeSize, - 0, true /*UseDForm*/, BackChainPointer); - BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::ADDI8 : PPC::ADDI), - ScratchReg) - .addReg(ScratchReg) - .addImm(-NegProbeSize); - BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI), - CRReg) - .addReg(ScratchReg) - .addImm(NegProbeSize); - BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::BCC)) - .addImm(PPC::PRED_LT) - .addReg(CRReg) - .addMBB(ProbeLoopBodyMBB); - ProbeLoopBodyMBB->addSuccessor(ProbeExitMBB); - ProbeLoopBodyMBB->addSuccessor(ProbeLoopBodyMBB); - } - // Update liveins. - recomputeLiveIns(*ProbeLoopBodyMBB); - recomputeLiveIns(*ProbeExitMBB); - return ProbeExitMBB; - }; - // For case HasBP && MaxAlign > 1, we have to realign the SP by performing - // SP = SP - SP % MaxAlign, thus make the probe more like dynamic probe since - // the offset subtracted from SP is determined by SP's runtime value. + // Used to probe stack when realignment is required. + // Note that, according to ABI's requirement, *sp must always equals the + // value of back-chain pointer, only st(w|d)u(x) can be used to update sp. + // Following is pseudo code: + // final_sp = (sp & align) + negframesize; + // neg_gap = final_sp - sp; + // while (neg_gap < negprobesize) { + // stdu fp, negprobesize(sp); + // neg_gap -= negprobesize; + // } + // stdux fp, sp, neg_gap + // + // When HasBP & HasRedzone, back-chain pointer is already saved in BPReg + // before probe code, we don't need to save it, so we get one additional reg + // that can be used to materialize the probeside if needed to use xform. + // Otherwise, we can NOT materialize probeside, so we can only use Dform for + // now. + // + // The allocations are: + // if (HasBP && HasRedzone) { + // r0: materialize the probesize if needed so that we can use xform. + // r12: `neg_gap` + // } else { + // r0: back-chain pointer + // r12: `neg_gap`. + // } + auto probeRealignedStack = [&](MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + Register ScratchReg, Register TempReg) { + assert(HasBP && "The function is supposed to have base pointer when its " + "stack is realigned."); + assert(isPowerOf2_64(ProbeSize) && "Probe size should be power of 2"); + + // FIXME: We can eliminate this limitation if we get more infomation about + // which part of redzone are already used. Used redzone can be treated + // probed. But there might be `holes' in redzone probed, this could + // complicate the implementation. + assert(ProbeSize >= Subtarget.getRedZoneSize() && + "Probe size should be larger or equal to the size of red-zone so " + "that red-zone is not clobbered by probing."); + + Register &FinalStackPtr = TempReg; + // FIXME: We only support NegProbeSize materializable by DForm currently. + // When HasBP && HasRedzone, we can use xform if we have an additional idle + // register. + NegProbeSize = std::max(NegProbeSize, -((int64_t)1 << 15)); + assert(isInt<16>(NegProbeSize) && + "NegProbeSize should be materializable by DForm"); + Register CRReg = PPC::CR0; + // Layout of output assembly kinda like: + // bb.0: + // ... + // sub $scratchreg, $finalsp, r1 + // cmpdi $scratchreg, <negprobesize> + // bge bb.2 + // bb.1: + // stdu <backchain>, <negprobesize>(r1) + // sub $scratchreg, $scratchreg, negprobesize + // cmpdi $scratchreg, <negprobesize> + // blt bb.1 + // bb.2: + // stdux <backchain>, r1, $scratchreg + MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator()); + MachineBasicBlock *ProbeLoopBodyMBB = MF.CreateMachineBasicBlock(ProbedBB); + MF.insert(MBBInsertPoint, ProbeLoopBodyMBB); + MachineBasicBlock *ProbeExitMBB = MF.CreateMachineBasicBlock(ProbedBB); + MF.insert(MBBInsertPoint, ProbeExitMBB); + // bb.2 + { + Register BackChainPointer = HasRedZone ? BPReg : TempReg; + allocateAndProbe(*ProbeExitMBB, ProbeExitMBB->end(), 0, ScratchReg, false, + BackChainPointer); + if (HasRedZone) + // PROBED_STACKALLOC_64 assumes Operand(1) stores the old sp, copy BPReg + // to TempReg to satisfy it. + BuildMI(*ProbeExitMBB, ProbeExitMBB->end(), DL, CopyInst, TempReg) + .addReg(BPReg) + .addReg(BPReg); + ProbeExitMBB->splice(ProbeExitMBB->end(), &MBB, MBBI, MBB.end()); + ProbeExitMBB->transferSuccessorsAndUpdatePHIs(&MBB); + } + // bb.0 + { + BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), ScratchReg) + .addReg(SPReg) + .addReg(FinalStackPtr); + if (!HasRedZone) + BuildMI(&MBB, DL, CopyInst, TempReg).addReg(SPReg).addReg(SPReg); + BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI), CRReg) + .addReg(ScratchReg) + .addImm(NegProbeSize); + BuildMI(&MBB, DL, TII.get(PPC::BCC)) + .addImm(PPC::PRED_GE) + .addReg(CRReg) + .addMBB(ProbeExitMBB); + MBB.addSuccessor(ProbeLoopBodyMBB); + MBB.addSuccessor(ProbeExitMBB); + } + // bb.1 + { + Register BackChainPointer = HasRedZone ? BPReg : TempReg; + allocateAndProbe(*ProbeLoopBodyMBB, ProbeLoopBodyMBB->end(), NegProbeSize, + 0, true /*UseDForm*/, BackChainPointer); + BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::ADDI8 : PPC::ADDI), + ScratchReg) + .addReg(ScratchReg) + .addImm(-NegProbeSize); + BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI), + CRReg) + .addReg(ScratchReg) + .addImm(NegProbeSize); + BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::BCC)) + .addImm(PPC::PRED_LT) + .addReg(CRReg) + .addMBB(ProbeLoopBodyMBB); + ProbeLoopBodyMBB->addSuccessor(ProbeExitMBB); + ProbeLoopBodyMBB->addSuccessor(ProbeLoopBodyMBB); + } + // Update liveins. + recomputeLiveIns(*ProbeLoopBodyMBB); + recomputeLiveIns(*ProbeExitMBB); + return ProbeExitMBB; + }; + // For case HasBP && MaxAlign > 1, we have to realign the SP by performing + // SP = SP - SP % MaxAlign, thus make the probe more like dynamic probe since + // the offset subtracted from SP is determined by SP's runtime value. if (HasBP && MaxAlign > 1) { - // Calculate final stack pointer. - if (isPPC64) - BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLDICL), ScratchReg) - .addReg(SPReg) - .addImm(0) - .addImm(64 - Log2(MaxAlign)); - else - BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLWINM), ScratchReg) - .addReg(SPReg) + // Calculate final stack pointer. + if (isPPC64) + BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLDICL), ScratchReg) + .addReg(SPReg) + .addImm(0) + .addImm(64 - Log2(MaxAlign)); + else + BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLWINM), ScratchReg) + .addReg(SPReg) .addImm(0) .addImm(32 - Log2(MaxAlign)) .addImm(31); - BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), - FPReg) - .addReg(ScratchReg) - .addReg(SPReg); - MaterializeImm(*CurrentMBB, {MI}, NegFrameSize, ScratchReg); - BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::ADD8 : PPC::ADD4), - FPReg) - .addReg(ScratchReg) - .addReg(FPReg); - CurrentMBB = probeRealignedStack(*CurrentMBB, {MI}, ScratchReg, FPReg); - if (needsCFI) - buildDefCFAReg(*CurrentMBB, {MI}, FPReg); - } else { - // Initialize current frame pointer. - BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg); - // Use FPReg to calculate CFA. - if (needsCFI) - buildDefCFA(*CurrentMBB, {MI}, FPReg, 0); - // Probe residual part. - if (NegResidualSize) { - bool ResidualUseDForm = CanUseDForm(NegResidualSize); - if (!ResidualUseDForm) - MaterializeImm(*CurrentMBB, {MI}, NegResidualSize, ScratchReg); - allocateAndProbe(*CurrentMBB, {MI}, NegResidualSize, ScratchReg, - ResidualUseDForm, FPReg); + BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), + FPReg) + .addReg(ScratchReg) + .addReg(SPReg); + MaterializeImm(*CurrentMBB, {MI}, NegFrameSize, ScratchReg); + BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::ADD8 : PPC::ADD4), + FPReg) + .addReg(ScratchReg) + .addReg(FPReg); + CurrentMBB = probeRealignedStack(*CurrentMBB, {MI}, ScratchReg, FPReg); + if (needsCFI) + buildDefCFAReg(*CurrentMBB, {MI}, FPReg); + } else { + // Initialize current frame pointer. + BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg); + // Use FPReg to calculate CFA. + if (needsCFI) + buildDefCFA(*CurrentMBB, {MI}, FPReg, 0); + // Probe residual part. + if (NegResidualSize) { + bool ResidualUseDForm = CanUseDForm(NegResidualSize); + if (!ResidualUseDForm) + MaterializeImm(*CurrentMBB, {MI}, NegResidualSize, ScratchReg); + allocateAndProbe(*CurrentMBB, {MI}, NegResidualSize, ScratchReg, + ResidualUseDForm, FPReg); } - bool UseDForm = CanUseDForm(NegProbeSize); - // If number of blocks is small, just probe them directly. - if (NumBlocks < 3) { - if (!UseDForm) - MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg); - for (int i = 0; i < NumBlocks; ++i) - allocateAndProbe(*CurrentMBB, {MI}, NegProbeSize, ScratchReg, UseDForm, - FPReg); - if (needsCFI) { - // Restore using SPReg to calculate CFA. - buildDefCFAReg(*CurrentMBB, {MI}, SPReg); - } - } else { - // Since CTR is a volatile register and current shrinkwrap implementation - // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a - // CTR loop to probe. - // Calculate trip count and stores it in CTRReg. - MaterializeImm(*CurrentMBB, {MI}, NumBlocks, ScratchReg); - BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR)) - .addReg(ScratchReg, RegState::Kill); - if (!UseDForm) - MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg); - // Create MBBs of the loop. - MachineFunction::iterator MBBInsertPoint = - std::next(CurrentMBB->getIterator()); - MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB); - MF.insert(MBBInsertPoint, LoopMBB); - MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB); - MF.insert(MBBInsertPoint, ExitMBB); - // Synthesize the loop body. - allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg, - UseDForm, FPReg); - BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ)) - .addMBB(LoopMBB); - LoopMBB->addSuccessor(ExitMBB); - LoopMBB->addSuccessor(LoopMBB); - // Synthesize the exit MBB. - ExitMBB->splice(ExitMBB->end(), CurrentMBB, - std::next(MachineBasicBlock::iterator(MI)), - CurrentMBB->end()); - ExitMBB->transferSuccessorsAndUpdatePHIs(CurrentMBB); - CurrentMBB->addSuccessor(LoopMBB); - if (needsCFI) { - // Restore using SPReg to calculate CFA. - buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg); - } - // Update liveins. - recomputeLiveIns(*LoopMBB); - recomputeLiveIns(*ExitMBB); + bool UseDForm = CanUseDForm(NegProbeSize); + // If number of blocks is small, just probe them directly. + if (NumBlocks < 3) { + if (!UseDForm) + MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg); + for (int i = 0; i < NumBlocks; ++i) + allocateAndProbe(*CurrentMBB, {MI}, NegProbeSize, ScratchReg, UseDForm, + FPReg); + if (needsCFI) { + // Restore using SPReg to calculate CFA. + buildDefCFAReg(*CurrentMBB, {MI}, SPReg); + } + } else { + // Since CTR is a volatile register and current shrinkwrap implementation + // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a + // CTR loop to probe. + // Calculate trip count and stores it in CTRReg. + MaterializeImm(*CurrentMBB, {MI}, NumBlocks, ScratchReg); + BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR)) + .addReg(ScratchReg, RegState::Kill); + if (!UseDForm) + MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg); + // Create MBBs of the loop. + MachineFunction::iterator MBBInsertPoint = + std::next(CurrentMBB->getIterator()); + MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB); + MF.insert(MBBInsertPoint, LoopMBB); + MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB); + MF.insert(MBBInsertPoint, ExitMBB); + // Synthesize the loop body. + allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg, + UseDForm, FPReg); + BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ)) + .addMBB(LoopMBB); + LoopMBB->addSuccessor(ExitMBB); + LoopMBB->addSuccessor(LoopMBB); + // Synthesize the exit MBB. + ExitMBB->splice(ExitMBB->end(), CurrentMBB, + std::next(MachineBasicBlock::iterator(MI)), + CurrentMBB->end()); + ExitMBB->transferSuccessorsAndUpdatePHIs(CurrentMBB); + CurrentMBB->addSuccessor(LoopMBB); + if (needsCFI) { + // Restore using SPReg to calculate CFA. + buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg); + } + // Update liveins. + recomputeLiveIns(*LoopMBB); + recomputeLiveIns(*ExitMBB); } } ++NumPrologProbed; @@ -1558,9 +1558,9 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, SingleScratchReg = ScratchReg == TempReg; if (HasFP) { - int FPIndex = FI->getFramePointerSaveIndex(); - assert(FPIndex && "No Frame Pointer Save Slot!"); - FPOffset = MFI.getObjectOffset(FPIndex); + int FPIndex = FI->getFramePointerSaveIndex(); + assert(FPIndex && "No Frame Pointer Save Slot!"); + FPOffset = MFI.getObjectOffset(FPIndex); } int BPOffset = 0; @@ -1653,18 +1653,18 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red // zone add this offset back now. - // If the function has a base pointer, the stack pointer has been copied - // to it so we can restore it by copying in the other direction. - if (HasRedZone && HasBP) { - BuildMI(MBB, MBBI, dl, OrInst, RBReg). - addReg(BPReg). - addReg(BPReg); - } + // If the function has a base pointer, the stack pointer has been copied + // to it so we can restore it by copying in the other direction. + if (HasRedZone && HasBP) { + BuildMI(MBB, MBBI, dl, OrInst, RBReg). + addReg(BPReg). + addReg(BPReg); + } // If this function contained a fastcc call and GuaranteedTailCallOpt is // enabled (=> hasFastCall()==true) the fastcc call might contain a tail // call which invalidates the stack pointer value in SP(0). So we use the - // value of R31 in this case. Similar situation exists with setjmp. - else if (FI->hasFastCall() || MF.exposesReturnsTwice()) { + // value of R31 in this case. Similar situation exists with setjmp. + else if (FI->hasFastCall() || MF.exposesReturnsTwice()) { assert(HasFP && "Expecting a valid frame pointer."); if (!HasRedZone) RBReg = FPReg; @@ -2210,8 +2210,8 @@ PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF, // needed alignment padding. unsigned StackSize = determineFrameLayout(MF, true); MachineFrameInfo &MFI = MF.getFrameInfo(); - if (MFI.hasVarSizedObjects() || spillsCR(MF) || hasNonRISpills(MF) || - (hasSpills(MF) && !isInt<16>(StackSize))) { + if (MFI.hasVarSizedObjects() || spillsCR(MF) || hasNonRISpills(MF) || + (hasSpills(MF) && !isInt<16>(StackSize))) { const TargetRegisterClass &GPRC = PPC::GPRCRegClass; const TargetRegisterClass &G8RC = PPC::G8RCRegClass; const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC; @@ -2225,7 +2225,7 @@ PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF, MFI.hasVarSizedObjects() && MFI.getMaxAlign() > getStackAlign(); // These kinds of spills might need two registers. - if (spillsCR(MF) || HasAlVars) + if (spillsCR(MF) || HasAlVars) RS->addScavengingFrameIndex( MFI.CreateStackObject(Size, Alignment, false)); } diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/contrib/libs/llvm12/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 746193861d..2604218da1 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/contrib/libs/llvm12/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -43,7 +43,7 @@ #include "llvm/IR/GlobalValue.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/InstrTypes.h" -#include "llvm/IR/IntrinsicsPowerPC.h" +#include "llvm/IR/IntrinsicsPowerPC.h" #include "llvm/IR/Module.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CodeGen.h" @@ -214,7 +214,7 @@ namespace { /// SelectCC - Select a comparison of the specified values with the /// specified condition code, returning the CR# of the expression. SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, - const SDLoc &dl, SDValue Chain = SDValue()); + const SDLoc &dl, SDValue Chain = SDValue()); /// SelectAddrImmOffs - Return true if the operand is valid for a preinc /// immediate field. Note that the operand at this point is already the @@ -291,13 +291,13 @@ namespace { Align(16)); } - /// SelectAddrImmX34 - Returns true if the address N can be represented by - /// a base register plus a signed 34-bit displacement. Suitable for use by - /// PSTXVP and friends. - bool SelectAddrImmX34(SDValue N, SDValue &Disp, SDValue &Base) { - return PPCLowering->SelectAddressRegImm34(N, Disp, Base, *CurDAG); - } - + /// SelectAddrImmX34 - Returns true if the address N can be represented by + /// a base register plus a signed 34-bit displacement. Suitable for use by + /// PSTXVP and friends. + bool SelectAddrImmX34(SDValue N, SDValue &Disp, SDValue &Base) { + return PPCLowering->SelectAddressRegImm34(N, Disp, Base, *CurDAG); + } + // Select an address into a single register. bool SelectAddr(SDValue N, SDValue &Base) { Base = N; @@ -352,7 +352,7 @@ namespace { private: bool trySETCC(SDNode *N); - bool tryFoldSWTestBRCC(SDNode *N); + bool tryFoldSWTestBRCC(SDNode *N); bool tryAsSingleRLDICL(SDNode *N); bool tryAsSingleRLDICR(SDNode *N); bool tryAsSingleRLWINM(SDNode *N); @@ -586,8 +586,8 @@ bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode *ST) { SDValue Offset = ST->getOffset(); if (!Offset.isUndef()) return false; - if (Base.getOperand(1).getOpcode() == PPCISD::TLS_LOCAL_EXEC_MAT_ADDR) - return false; + if (Base.getOperand(1).getOpcode() == PPCISD::TLS_LOCAL_EXEC_MAT_ADDR) + return false; SDLoc dl(ST); EVT MemVT = ST->getMemoryVT(); @@ -631,8 +631,8 @@ bool PPCDAGToDAGISel::tryTLSXFormLoad(LoadSDNode *LD) { SDValue Offset = LD->getOffset(); if (!Offset.isUndef()) return false; - if (Base.getOperand(1).getOpcode() == PPCISD::TLS_LOCAL_EXEC_MAT_ADDR) - return false; + if (Base.getOperand(1).getOpcode() == PPCISD::TLS_LOCAL_EXEC_MAT_ADDR) + return false; SDLoc dl(LD); EVT MemVT = LD->getMemoryVT(); @@ -798,274 +798,274 @@ static unsigned allUsesTruncate(SelectionDAG *CurDAG, SDNode *N) { return MaxTruncation; } -// For any 32 < Num < 64, check if the Imm contains at least Num consecutive -// zeros and return the number of bits by the left of these consecutive zeros. -static int findContiguousZerosAtLeast(uint64_t Imm, unsigned Num) { - unsigned HiTZ = countTrailingZeros<uint32_t>(Hi_32(Imm)); - unsigned LoLZ = countLeadingZeros<uint32_t>(Lo_32(Imm)); - if ((HiTZ + LoLZ) >= Num) - return (32 + HiTZ); - return 0; -} - -// Direct materialization of 64-bit constants by enumerated patterns. -static SDNode *selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl, - uint64_t Imm, unsigned &InstCnt) { - unsigned TZ = countTrailingZeros<uint64_t>(Imm); - unsigned LZ = countLeadingZeros<uint64_t>(Imm); - unsigned TO = countTrailingOnes<uint64_t>(Imm); - unsigned LO = countLeadingOnes<uint64_t>(Imm); - unsigned Hi32 = Hi_32(Imm); - unsigned Lo32 = Lo_32(Imm); - SDNode *Result = nullptr; - unsigned Shift = 0; - - auto getI32Imm = [CurDAG, dl](unsigned Imm) { - return CurDAG->getTargetConstant(Imm, dl, MVT::i32); - }; - - // Following patterns use 1 instructions to materialize the Imm. - InstCnt = 1; - // 1-1) Patterns : {zeros}{15-bit valve} - // {ones}{15-bit valve} - if (isInt<16>(Imm)) { - SDValue SDImm = CurDAG->getTargetConstant(Imm, dl, MVT::i64); - return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm); - } - // 1-2) Patterns : {zeros}{15-bit valve}{16 zeros} - // {ones}{15-bit valve}{16 zeros} - if (TZ > 15 && (LZ > 32 || LO > 32)) - return CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, - getI32Imm((Imm >> 16) & 0xffff)); - - // Following patterns use 2 instructions to materialize the Imm. - InstCnt = 2; - assert(LZ < 64 && "Unexpected leading zeros here."); - // Count of ones follwing the leading zeros. - unsigned FO = countLeadingOnes<uint64_t>(Imm << LZ); - // 2-1) Patterns : {zeros}{31-bit value} - // {ones}{31-bit value} - if (isInt<32>(Imm)) { - uint64_t ImmHi16 = (Imm >> 16) & 0xffff; - unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8; - Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16)); - return CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0), - getI32Imm(Imm & 0xffff)); - } - // 2-2) Patterns : {zeros}{ones}{15-bit value}{zeros} - // {zeros}{15-bit value}{zeros} - // {zeros}{ones}{15-bit value} - // {ones}{15-bit value}{zeros} - // We can take advantage of LI's sign-extension semantics to generate leading - // ones, and then use RLDIC to mask off the ones in both sides after rotation. - if ((LZ + FO + TZ) > 48) { - Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, - getI32Imm((Imm >> TZ) & 0xffff)); - return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0), - getI32Imm(TZ), getI32Imm(LZ)); - } - // 2-3) Pattern : {zeros}{15-bit value}{ones} - // Shift right the Imm by (48 - LZ) bits to construct a negtive 16 bits value, - // therefore we can take advantage of LI's sign-extension semantics, and then - // mask them off after rotation. - // - // +--LZ--||-15-bit-||--TO--+ +-------------|--16-bit--+ - // |00000001bbbbbbbbb1111111| -> |00000000000001bbbbbbbbb1| - // +------------------------+ +------------------------+ - // 63 0 63 0 - // Imm (Imm >> (48 - LZ) & 0xffff) - // +----sext-----|--16-bit--+ +clear-|-----------------+ - // |11111111111111bbbbbbbbb1| -> |00000001bbbbbbbbb1111111| - // +------------------------+ +------------------------+ - // 63 0 63 0 - // LI8: sext many leading zeros RLDICL: rotate left (48 - LZ), clear left LZ - if ((LZ + TO) > 48) { - // Since the immediates with (LZ > 32) have been handled by previous - // patterns, here we have (LZ <= 32) to make sure we will not shift right - // the Imm by a negative value. - assert(LZ <= 32 && "Unexpected shift value."); - Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, - getI32Imm((Imm >> (48 - LZ) & 0xffff))); - return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0), - getI32Imm(48 - LZ), getI32Imm(LZ)); - } - // 2-4) Patterns : {zeros}{ones}{15-bit value}{ones} - // {ones}{15-bit value}{ones} - // We can take advantage of LI's sign-extension semantics to generate leading - // ones, and then use RLDICL to mask off the ones in left sides (if required) - // after rotation. - // - // +-LZ-FO||-15-bit-||--TO--+ +-------------|--16-bit--+ - // |00011110bbbbbbbbb1111111| -> |000000000011110bbbbbbbbb| - // +------------------------+ +------------------------+ - // 63 0 63 0 - // Imm (Imm >> TO) & 0xffff - // +----sext-----|--16-bit--+ +LZ|---------------------+ - // |111111111111110bbbbbbbbb| -> |00011110bbbbbbbbb1111111| - // +------------------------+ +------------------------+ - // 63 0 63 0 - // LI8: sext many leading zeros RLDICL: rotate left TO, clear left LZ - if ((LZ + FO + TO) > 48) { - Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, - getI32Imm((Imm >> TO) & 0xffff)); - return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0), - getI32Imm(TO), getI32Imm(LZ)); - } - // 2-5) Pattern : {32 zeros}{****}{0}{15-bit value} - // If Hi32 is zero and the Lo16(in Lo32) can be presented as a positive 16 bit - // value, we can use LI for Lo16 without generating leading ones then add the - // Hi16(in Lo32). - if (LZ == 32 && ((Lo32 & 0x8000) == 0)) { - Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, - getI32Imm(Lo32 & 0xffff)); - return CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64, SDValue(Result, 0), - getI32Imm(Lo32 >> 16)); - } - // 2-6) Patterns : {******}{49 zeros}{******} - // {******}{49 ones}{******} - // If the Imm contains 49 consecutive zeros/ones, it means that a total of 15 - // bits remain on both sides. Rotate right the Imm to construct an int<16> - // value, use LI for int<16> value and then use RLDICL without mask to rotate - // it back. - // - // 1) findContiguousZerosAtLeast(Imm, 49) - // +------|--zeros-|------+ +---ones--||---15 bit--+ - // |bbbbbb0000000000aaaaaa| -> |0000000000aaaaaabbbbbb| - // +----------------------+ +----------------------+ - // 63 0 63 0 - // - // 2) findContiguousZerosAtLeast(~Imm, 49) - // +------|--ones--|------+ +---ones--||---15 bit--+ - // |bbbbbb1111111111aaaaaa| -> |1111111111aaaaaabbbbbb| - // +----------------------+ +----------------------+ - // 63 0 63 0 - if ((Shift = findContiguousZerosAtLeast(Imm, 49)) || - (Shift = findContiguousZerosAtLeast(~Imm, 49))) { - uint64_t RotImm = (Imm >> Shift) | (Imm << (64 - Shift)); - Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, - getI32Imm(RotImm & 0xffff)); - return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0), - getI32Imm(Shift), getI32Imm(0)); - } - - // Following patterns use 3 instructions to materialize the Imm. - InstCnt = 3; - // 3-1) Patterns : {zeros}{ones}{31-bit value}{zeros} - // {zeros}{31-bit value}{zeros} - // {zeros}{ones}{31-bit value} - // {ones}{31-bit value}{zeros} - // We can take advantage of LIS's sign-extension semantics to generate leading - // ones, add the remaining bits with ORI, and then use RLDIC to mask off the - // ones in both sides after rotation. - if ((LZ + FO + TZ) > 32) { - uint64_t ImmHi16 = (Imm >> (TZ + 16)) & 0xffff; - unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8; - Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16)); - Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0), - getI32Imm((Imm >> TZ) & 0xffff)); - return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0), - getI32Imm(TZ), getI32Imm(LZ)); - } - // 3-2) Pattern : {zeros}{31-bit value}{ones} - // Shift right the Imm by (32 - LZ) bits to construct a negtive 32 bits value, - // therefore we can take advantage of LIS's sign-extension semantics, add - // the remaining bits with ORI, and then mask them off after rotation. - // This is similar to Pattern 2-3, please refer to the diagram there. - if ((LZ + TO) > 32) { - // Since the immediates with (LZ > 32) have been handled by previous - // patterns, here we have (LZ <= 32) to make sure we will not shift right - // the Imm by a negative value. - assert(LZ <= 32 && "Unexpected shift value."); - Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, - getI32Imm((Imm >> (48 - LZ)) & 0xffff)); - Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0), - getI32Imm((Imm >> (32 - LZ)) & 0xffff)); - return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0), - getI32Imm(32 - LZ), getI32Imm(LZ)); - } - // 3-3) Patterns : {zeros}{ones}{31-bit value}{ones} - // {ones}{31-bit value}{ones} - // We can take advantage of LIS's sign-extension semantics to generate leading - // ones, add the remaining bits with ORI, and then use RLDICL to mask off the - // ones in left sides (if required) after rotation. - // This is similar to Pattern 2-4, please refer to the diagram there. - if ((LZ + FO + TO) > 32) { - Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, - getI32Imm((Imm >> (TO + 16)) & 0xffff)); - Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0), - getI32Imm((Imm >> TO) & 0xffff)); - return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0), - getI32Imm(TO), getI32Imm(LZ)); - } - // 3-4) Patterns : High word == Low word - if (Hi32 == Lo32) { - // Handle the first 32 bits. - uint64_t ImmHi16 = (Lo32 >> 16) & 0xffff; - unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8; - Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16)); - Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0), - getI32Imm(Lo32 & 0xffff)); - // Use rldimi to insert the Low word into High word. - SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32), - getI32Imm(0)}; - return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops); - } - // 3-5) Patterns : {******}{33 zeros}{******} - // {******}{33 ones}{******} - // If the Imm contains 33 consecutive zeros/ones, it means that a total of 31 - // bits remain on both sides. Rotate right the Imm to construct an int<32> - // value, use LIS + ORI for int<32> value and then use RLDICL without mask to - // rotate it back. - // This is similar to Pattern 2-6, please refer to the diagram there. - if ((Shift = findContiguousZerosAtLeast(Imm, 33)) || - (Shift = findContiguousZerosAtLeast(~Imm, 33))) { - uint64_t RotImm = (Imm >> Shift) | (Imm << (64 - Shift)); - uint64_t ImmHi16 = (RotImm >> 16) & 0xffff; - unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8; - Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16)); - Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0), - getI32Imm(RotImm & 0xffff)); - return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0), - getI32Imm(Shift), getI32Imm(0)); - } - - InstCnt = 0; - return nullptr; -} - -static SDNode *selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl, uint64_t Imm, - unsigned *InstCnt = nullptr) { - unsigned InstCntDirect = 0; - // No more than 3 instructions is used if we can select the i64 immediate - // directly. - SDNode *Result = selectI64ImmDirect(CurDAG, dl, Imm, InstCntDirect); - if (Result) { - if (InstCnt) - *InstCnt = InstCntDirect; - return Result; - } - auto getI32Imm = [CurDAG, dl](unsigned Imm) { - return CurDAG->getTargetConstant(Imm, dl, MVT::i32); - }; - // Handle the upper 32 bit value. - Result = - selectI64ImmDirect(CurDAG, dl, Imm & 0xffffffff00000000, InstCntDirect); - // Add in the last bits as required. - if (uint32_t Hi16 = (Lo_32(Imm) >> 16) & 0xffff) { - Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64, - SDValue(Result, 0), getI32Imm(Hi16)); - ++InstCntDirect; - } - if (uint32_t Lo16 = Lo_32(Imm) & 0xffff) { - Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0), - getI32Imm(Lo16)); - ++InstCntDirect; - } - if (InstCnt) - *InstCnt = InstCntDirect; - return Result; -} - +// For any 32 < Num < 64, check if the Imm contains at least Num consecutive +// zeros and return the number of bits by the left of these consecutive zeros. +static int findContiguousZerosAtLeast(uint64_t Imm, unsigned Num) { + unsigned HiTZ = countTrailingZeros<uint32_t>(Hi_32(Imm)); + unsigned LoLZ = countLeadingZeros<uint32_t>(Lo_32(Imm)); + if ((HiTZ + LoLZ) >= Num) + return (32 + HiTZ); + return 0; +} + +// Direct materialization of 64-bit constants by enumerated patterns. +static SDNode *selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl, + uint64_t Imm, unsigned &InstCnt) { + unsigned TZ = countTrailingZeros<uint64_t>(Imm); + unsigned LZ = countLeadingZeros<uint64_t>(Imm); + unsigned TO = countTrailingOnes<uint64_t>(Imm); + unsigned LO = countLeadingOnes<uint64_t>(Imm); + unsigned Hi32 = Hi_32(Imm); + unsigned Lo32 = Lo_32(Imm); + SDNode *Result = nullptr; + unsigned Shift = 0; + + auto getI32Imm = [CurDAG, dl](unsigned Imm) { + return CurDAG->getTargetConstant(Imm, dl, MVT::i32); + }; + + // Following patterns use 1 instructions to materialize the Imm. + InstCnt = 1; + // 1-1) Patterns : {zeros}{15-bit valve} + // {ones}{15-bit valve} + if (isInt<16>(Imm)) { + SDValue SDImm = CurDAG->getTargetConstant(Imm, dl, MVT::i64); + return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm); + } + // 1-2) Patterns : {zeros}{15-bit valve}{16 zeros} + // {ones}{15-bit valve}{16 zeros} + if (TZ > 15 && (LZ > 32 || LO > 32)) + return CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, + getI32Imm((Imm >> 16) & 0xffff)); + + // Following patterns use 2 instructions to materialize the Imm. + InstCnt = 2; + assert(LZ < 64 && "Unexpected leading zeros here."); + // Count of ones follwing the leading zeros. + unsigned FO = countLeadingOnes<uint64_t>(Imm << LZ); + // 2-1) Patterns : {zeros}{31-bit value} + // {ones}{31-bit value} + if (isInt<32>(Imm)) { + uint64_t ImmHi16 = (Imm >> 16) & 0xffff; + unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8; + Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16)); + return CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0), + getI32Imm(Imm & 0xffff)); + } + // 2-2) Patterns : {zeros}{ones}{15-bit value}{zeros} + // {zeros}{15-bit value}{zeros} + // {zeros}{ones}{15-bit value} + // {ones}{15-bit value}{zeros} + // We can take advantage of LI's sign-extension semantics to generate leading + // ones, and then use RLDIC to mask off the ones in both sides after rotation. + if ((LZ + FO + TZ) > 48) { + Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, + getI32Imm((Imm >> TZ) & 0xffff)); + return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0), + getI32Imm(TZ), getI32Imm(LZ)); + } + // 2-3) Pattern : {zeros}{15-bit value}{ones} + // Shift right the Imm by (48 - LZ) bits to construct a negtive 16 bits value, + // therefore we can take advantage of LI's sign-extension semantics, and then + // mask them off after rotation. + // + // +--LZ--||-15-bit-||--TO--+ +-------------|--16-bit--+ + // |00000001bbbbbbbbb1111111| -> |00000000000001bbbbbbbbb1| + // +------------------------+ +------------------------+ + // 63 0 63 0 + // Imm (Imm >> (48 - LZ) & 0xffff) + // +----sext-----|--16-bit--+ +clear-|-----------------+ + // |11111111111111bbbbbbbbb1| -> |00000001bbbbbbbbb1111111| + // +------------------------+ +------------------------+ + // 63 0 63 0 + // LI8: sext many leading zeros RLDICL: rotate left (48 - LZ), clear left LZ + if ((LZ + TO) > 48) { + // Since the immediates with (LZ > 32) have been handled by previous + // patterns, here we have (LZ <= 32) to make sure we will not shift right + // the Imm by a negative value. + assert(LZ <= 32 && "Unexpected shift value."); + Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, + getI32Imm((Imm >> (48 - LZ) & 0xffff))); + return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0), + getI32Imm(48 - LZ), getI32Imm(LZ)); + } + // 2-4) Patterns : {zeros}{ones}{15-bit value}{ones} + // {ones}{15-bit value}{ones} + // We can take advantage of LI's sign-extension semantics to generate leading + // ones, and then use RLDICL to mask off the ones in left sides (if required) + // after rotation. + // + // +-LZ-FO||-15-bit-||--TO--+ +-------------|--16-bit--+ + // |00011110bbbbbbbbb1111111| -> |000000000011110bbbbbbbbb| + // +------------------------+ +------------------------+ + // 63 0 63 0 + // Imm (Imm >> TO) & 0xffff + // +----sext-----|--16-bit--+ +LZ|---------------------+ + // |111111111111110bbbbbbbbb| -> |00011110bbbbbbbbb1111111| + // +------------------------+ +------------------------+ + // 63 0 63 0 + // LI8: sext many leading zeros RLDICL: rotate left TO, clear left LZ + if ((LZ + FO + TO) > 48) { + Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, + getI32Imm((Imm >> TO) & 0xffff)); + return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0), + getI32Imm(TO), getI32Imm(LZ)); + } + // 2-5) Pattern : {32 zeros}{****}{0}{15-bit value} + // If Hi32 is zero and the Lo16(in Lo32) can be presented as a positive 16 bit + // value, we can use LI for Lo16 without generating leading ones then add the + // Hi16(in Lo32). + if (LZ == 32 && ((Lo32 & 0x8000) == 0)) { + Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, + getI32Imm(Lo32 & 0xffff)); + return CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64, SDValue(Result, 0), + getI32Imm(Lo32 >> 16)); + } + // 2-6) Patterns : {******}{49 zeros}{******} + // {******}{49 ones}{******} + // If the Imm contains 49 consecutive zeros/ones, it means that a total of 15 + // bits remain on both sides. Rotate right the Imm to construct an int<16> + // value, use LI for int<16> value and then use RLDICL without mask to rotate + // it back. + // + // 1) findContiguousZerosAtLeast(Imm, 49) + // +------|--zeros-|------+ +---ones--||---15 bit--+ + // |bbbbbb0000000000aaaaaa| -> |0000000000aaaaaabbbbbb| + // +----------------------+ +----------------------+ + // 63 0 63 0 + // + // 2) findContiguousZerosAtLeast(~Imm, 49) + // +------|--ones--|------+ +---ones--||---15 bit--+ + // |bbbbbb1111111111aaaaaa| -> |1111111111aaaaaabbbbbb| + // +----------------------+ +----------------------+ + // 63 0 63 0 + if ((Shift = findContiguousZerosAtLeast(Imm, 49)) || + (Shift = findContiguousZerosAtLeast(~Imm, 49))) { + uint64_t RotImm = (Imm >> Shift) | (Imm << (64 - Shift)); + Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, + getI32Imm(RotImm & 0xffff)); + return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0), + getI32Imm(Shift), getI32Imm(0)); + } + + // Following patterns use 3 instructions to materialize the Imm. + InstCnt = 3; + // 3-1) Patterns : {zeros}{ones}{31-bit value}{zeros} + // {zeros}{31-bit value}{zeros} + // {zeros}{ones}{31-bit value} + // {ones}{31-bit value}{zeros} + // We can take advantage of LIS's sign-extension semantics to generate leading + // ones, add the remaining bits with ORI, and then use RLDIC to mask off the + // ones in both sides after rotation. + if ((LZ + FO + TZ) > 32) { + uint64_t ImmHi16 = (Imm >> (TZ + 16)) & 0xffff; + unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8; + Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16)); + Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0), + getI32Imm((Imm >> TZ) & 0xffff)); + return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0), + getI32Imm(TZ), getI32Imm(LZ)); + } + // 3-2) Pattern : {zeros}{31-bit value}{ones} + // Shift right the Imm by (32 - LZ) bits to construct a negtive 32 bits value, + // therefore we can take advantage of LIS's sign-extension semantics, add + // the remaining bits with ORI, and then mask them off after rotation. + // This is similar to Pattern 2-3, please refer to the diagram there. + if ((LZ + TO) > 32) { + // Since the immediates with (LZ > 32) have been handled by previous + // patterns, here we have (LZ <= 32) to make sure we will not shift right + // the Imm by a negative value. + assert(LZ <= 32 && "Unexpected shift value."); + Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, + getI32Imm((Imm >> (48 - LZ)) & 0xffff)); + Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0), + getI32Imm((Imm >> (32 - LZ)) & 0xffff)); + return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0), + getI32Imm(32 - LZ), getI32Imm(LZ)); + } + // 3-3) Patterns : {zeros}{ones}{31-bit value}{ones} + // {ones}{31-bit value}{ones} + // We can take advantage of LIS's sign-extension semantics to generate leading + // ones, add the remaining bits with ORI, and then use RLDICL to mask off the + // ones in left sides (if required) after rotation. + // This is similar to Pattern 2-4, please refer to the diagram there. + if ((LZ + FO + TO) > 32) { + Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, + getI32Imm((Imm >> (TO + 16)) & 0xffff)); + Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0), + getI32Imm((Imm >> TO) & 0xffff)); + return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0), + getI32Imm(TO), getI32Imm(LZ)); + } + // 3-4) Patterns : High word == Low word + if (Hi32 == Lo32) { + // Handle the first 32 bits. + uint64_t ImmHi16 = (Lo32 >> 16) & 0xffff; + unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8; + Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16)); + Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0), + getI32Imm(Lo32 & 0xffff)); + // Use rldimi to insert the Low word into High word. + SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32), + getI32Imm(0)}; + return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops); + } + // 3-5) Patterns : {******}{33 zeros}{******} + // {******}{33 ones}{******} + // If the Imm contains 33 consecutive zeros/ones, it means that a total of 31 + // bits remain on both sides. Rotate right the Imm to construct an int<32> + // value, use LIS + ORI for int<32> value and then use RLDICL without mask to + // rotate it back. + // This is similar to Pattern 2-6, please refer to the diagram there. + if ((Shift = findContiguousZerosAtLeast(Imm, 33)) || + (Shift = findContiguousZerosAtLeast(~Imm, 33))) { + uint64_t RotImm = (Imm >> Shift) | (Imm << (64 - Shift)); + uint64_t ImmHi16 = (RotImm >> 16) & 0xffff; + unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8; + Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16)); + Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0), + getI32Imm(RotImm & 0xffff)); + return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0), + getI32Imm(Shift), getI32Imm(0)); + } + + InstCnt = 0; + return nullptr; +} + +static SDNode *selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl, uint64_t Imm, + unsigned *InstCnt = nullptr) { + unsigned InstCntDirect = 0; + // No more than 3 instructions is used if we can select the i64 immediate + // directly. + SDNode *Result = selectI64ImmDirect(CurDAG, dl, Imm, InstCntDirect); + if (Result) { + if (InstCnt) + *InstCnt = InstCntDirect; + return Result; + } + auto getI32Imm = [CurDAG, dl](unsigned Imm) { + return CurDAG->getTargetConstant(Imm, dl, MVT::i32); + }; + // Handle the upper 32 bit value. + Result = + selectI64ImmDirect(CurDAG, dl, Imm & 0xffffffff00000000, InstCntDirect); + // Add in the last bits as required. + if (uint32_t Hi16 = (Lo_32(Imm) >> 16) & 0xffff) { + Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64, + SDValue(Result, 0), getI32Imm(Hi16)); + ++InstCntDirect; + } + if (uint32_t Lo16 = Lo_32(Imm) & 0xffff) { + Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0), + getI32Imm(Lo16)); + ++InstCntDirect; + } + if (InstCnt) + *InstCnt = InstCntDirect; + return Result; +} + // Select a 64-bit constant. static SDNode *selectI64Imm(SelectionDAG *CurDAG, SDNode *N) { SDLoc dl(N); @@ -1218,7 +1218,7 @@ class BitPermutationSelector { } break; case ISD::SHL: - case PPCISD::SHL: + case PPCISD::SHL: if (isa<ConstantSDNode>(V.getOperand(1))) { unsigned ShiftAmt = V.getConstantOperandVal(1); @@ -1234,7 +1234,7 @@ class BitPermutationSelector { } break; case ISD::SRL: - case PPCISD::SRL: + case PPCISD::SRL: if (isa<ConstantSDNode>(V.getOperand(1))) { unsigned ShiftAmt = V.getConstantOperandVal(1); @@ -2114,14 +2114,14 @@ class BitPermutationSelector { unsigned NumAndInsts = (unsigned) NeedsRotate + (unsigned) (bool) Res; - unsigned NumOfSelectInsts = 0; - selectI64Imm(CurDAG, dl, Mask, &NumOfSelectInsts); - assert(NumOfSelectInsts > 0 && "Failed to select an i64 constant."); + unsigned NumOfSelectInsts = 0; + selectI64Imm(CurDAG, dl, Mask, &NumOfSelectInsts); + assert(NumOfSelectInsts > 0 && "Failed to select an i64 constant."); if (Use32BitInsts) NumAndInsts += (unsigned) (ANDIMask != 0) + (unsigned) (ANDISMask != 0) + (unsigned) (ANDIMask != 0 && ANDISMask != 0); else - NumAndInsts += NumOfSelectInsts + /* and */ 1; + NumAndInsts += NumOfSelectInsts + /* and */ 1; unsigned NumRLInsts = 0; bool FirstBG = true; @@ -2345,14 +2345,14 @@ class BitPermutationSelector { Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64, ExtendToInt64(ANDIVal, dl), ANDISVal), 0); } else { - unsigned NumOfSelectInsts = 0; - SDValue MaskVal = - SDValue(selectI64Imm(CurDAG, dl, Mask, &NumOfSelectInsts), 0); - Res = SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64, - ExtendToInt64(Res, dl), MaskVal), - 0); - if (InstCnt) - *InstCnt += NumOfSelectInsts + /* and */ 1; + unsigned NumOfSelectInsts = 0; + SDValue MaskVal = + SDValue(selectI64Imm(CurDAG, dl, Mask, &NumOfSelectInsts), 0); + Res = SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64, + ExtendToInt64(Res, dl), MaskVal), + 0); + if (InstCnt) + *InstCnt += NumOfSelectInsts + /* and */ 1; } } @@ -2383,7 +2383,7 @@ class BitPermutationSelector { } void eraseMatchingBitGroups(function_ref<bool(const BitGroup &)> F) { - erase_if(BitGroups, F); + erase_if(BitGroups, F); } SmallVector<ValueBit, 64> Bits; @@ -3633,12 +3633,12 @@ bool PPCDAGToDAGISel::tryIntCompareInGPR(SDNode *N) { if (TM.getOptLevel() == CodeGenOpt::None || !TM.isPPC64()) return false; - // For POWER10, it is more profitable to use the set boolean extension - // instructions rather than the integer compare elimination codegen. - // Users can override this via the command line option, `--ppc-gpr-icmps`. - if (!(CmpInGPR.getNumOccurrences() > 0) && Subtarget->isISA3_1()) - return false; - + // For POWER10, it is more profitable to use the set boolean extension + // instructions rather than the integer compare elimination codegen. + // Users can override this via the command line option, `--ppc-gpr-icmps`. + if (!(CmpInGPR.getNumOccurrences() > 0) && Subtarget->isISA3_1()) + return false; + switch (N->getOpcode()) { default: break; case ISD::ZERO_EXTEND: @@ -3686,7 +3686,7 @@ bool PPCDAGToDAGISel::tryBitPermutation(SDNode *N) { /// SelectCC - Select a comparison of the specified values with the specified /// condition code, returning the CR# of the expression. SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, - const SDLoc &dl, SDValue Chain) { + const SDLoc &dl, SDValue Chain) { // Always select the LHS. unsigned Opc; @@ -3839,12 +3839,12 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, assert(Subtarget->hasVSX() && "__float128 requires VSX"); Opc = PPC::XSCMPUQP; } - if (Chain) - return SDValue( - CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::Other, LHS, RHS, Chain), - 0); - else - return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0); + if (Chain) + return SDValue( + CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::Other, LHS, RHS, Chain), + 0); + else + return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0); } static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC, const EVT &VT, @@ -3919,8 +3919,8 @@ static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) { // getVCmpInst: return the vector compare instruction for the specified // vector type and condition code. Since this is for altivec specific code, -// only support the altivec types (v16i8, v8i16, v4i32, v2i64, v1i128, -// and v4f32). +// only support the altivec types (v16i8, v8i16, v4i32, v2i64, v1i128, +// and v4f32). static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC, bool HasVSX, bool &Swap, bool &Negate) { Swap = false; @@ -4001,8 +4001,8 @@ static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC, return PPC::VCMPEQUW; else if (VecVT == MVT::v2i64) return PPC::VCMPEQUD; - else if (VecVT == MVT::v1i128) - return PPC::VCMPEQUQ; + else if (VecVT == MVT::v1i128) + return PPC::VCMPEQUQ; break; case ISD::SETGT: if (VecVT == MVT::v16i8) @@ -4013,8 +4013,8 @@ static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC, return PPC::VCMPGTSW; else if (VecVT == MVT::v2i64) return PPC::VCMPGTSD; - else if (VecVT == MVT::v1i128) - return PPC::VCMPGTSQ; + else if (VecVT == MVT::v1i128) + return PPC::VCMPGTSQ; break; case ISD::SETUGT: if (VecVT == MVT::v16i8) @@ -4025,8 +4025,8 @@ static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC, return PPC::VCMPGTUW; else if (VecVT == MVT::v2i64) return PPC::VCMPGTUD; - else if (VecVT == MVT::v1i128) - return PPC::VCMPGTUQ; + else if (VecVT == MVT::v1i128) + return PPC::VCMPGTUQ; break; default: break; @@ -4038,23 +4038,23 @@ static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC, bool PPCDAGToDAGISel::trySETCC(SDNode *N) { SDLoc dl(N); unsigned Imm; - bool IsStrict = N->isStrictFPOpcode(); - ISD::CondCode CC = - cast<CondCodeSDNode>(N->getOperand(IsStrict ? 3 : 2))->get(); + bool IsStrict = N->isStrictFPOpcode(); + ISD::CondCode CC = + cast<CondCodeSDNode>(N->getOperand(IsStrict ? 3 : 2))->get(); EVT PtrVT = CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout()); bool isPPC64 = (PtrVT == MVT::i64); - SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); + + SDValue LHS = N->getOperand(IsStrict ? 1 : 0); + SDValue RHS = N->getOperand(IsStrict ? 2 : 1); - SDValue LHS = N->getOperand(IsStrict ? 1 : 0); - SDValue RHS = N->getOperand(IsStrict ? 2 : 1); - - if (!IsStrict && !Subtarget->useCRBits() && isInt32Immediate(RHS, Imm)) { + if (!IsStrict && !Subtarget->useCRBits() && isInt32Immediate(RHS, Imm)) { // We can codegen setcc op, imm very efficiently compared to a brcond. // Check for those cases here. // setcc op, 0 if (Imm == 0) { - SDValue Op = LHS; + SDValue Op = LHS; switch (CC) { default: break; case ISD::SETEQ: { @@ -4089,7 +4089,7 @@ bool PPCDAGToDAGISel::trySETCC(SDNode *N) { } } } else if (Imm == ~0U) { // setcc op, -1 - SDValue Op = LHS; + SDValue Op = LHS; switch (CC) { default: break; case ISD::SETEQ: @@ -4134,8 +4134,8 @@ bool PPCDAGToDAGISel::trySETCC(SDNode *N) { // Altivec Vector compare instructions do not set any CR register by default and // vector compare operations return the same type as the operands. - if (!IsStrict && LHS.getValueType().isVector()) { - if (Subtarget->hasSPE()) + if (!IsStrict && LHS.getValueType().isVector()) { + if (Subtarget->hasSPE()) return false; EVT VecVT = LHS.getValueType(); @@ -4162,9 +4162,9 @@ bool PPCDAGToDAGISel::trySETCC(SDNode *N) { bool Inv; unsigned Idx = getCRIdxForSetCC(CC, Inv); - SDValue CCReg = SelectCC(LHS, RHS, CC, dl, Chain); - if (IsStrict) - CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), CCReg.getValue(1)); + SDValue CCReg = SelectCC(LHS, RHS, CC, dl, Chain); + if (IsStrict) + CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), CCReg.getValue(1)); SDValue IntCR; // SPE e*cmp* instructions only set the 'gt' bit, so hard-code that @@ -4267,10 +4267,10 @@ static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG, (FalseRes.getOpcode() != ISD::SELECT_CC || CC != ISD::SETEQ))) return false; - SDValue SetOrSelCC = FalseRes.getOpcode() == ISD::SELECT_CC - ? FalseRes - : FalseRes.getOperand(0); - bool InnerIsSel = SetOrSelCC.getOpcode() == ISD::SELECT_CC; + SDValue SetOrSelCC = FalseRes.getOpcode() == ISD::SELECT_CC + ? FalseRes + : FalseRes.getOperand(0); + bool InnerIsSel = SetOrSelCC.getOpcode() == ISD::SELECT_CC; if (SetOrSelCC.getOpcode() != ISD::SETCC && SetOrSelCC.getOpcode() != ISD::SELECT_CC) return false; @@ -4379,81 +4379,81 @@ static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG, return true; } -// Return true if it's a software square-root/divide operand. -static bool isSWTestOp(SDValue N) { - if (N.getOpcode() == PPCISD::FTSQRT) - return true; - if (N.getNumOperands() < 1 || !isa<ConstantSDNode>(N.getOperand(0))) - return false; - switch (N.getConstantOperandVal(0)) { - case Intrinsic::ppc_vsx_xvtdivdp: - case Intrinsic::ppc_vsx_xvtdivsp: - case Intrinsic::ppc_vsx_xvtsqrtdp: - case Intrinsic::ppc_vsx_xvtsqrtsp: - return true; - } - return false; -} - -bool PPCDAGToDAGISel::tryFoldSWTestBRCC(SDNode *N) { - assert(N->getOpcode() == ISD::BR_CC && "ISD::BR_CC is expected."); - // We are looking for following patterns, where `truncate to i1` actually has - // the same semantic with `and 1`. - // (br_cc seteq, (truncateToi1 SWTestOp), 0) -> (BCC PRED_NU, SWTestOp) - // (br_cc seteq, (and SWTestOp, 2), 0) -> (BCC PRED_NE, SWTestOp) - // (br_cc seteq, (and SWTestOp, 4), 0) -> (BCC PRED_LE, SWTestOp) - // (br_cc seteq, (and SWTestOp, 8), 0) -> (BCC PRED_GE, SWTestOp) - // (br_cc setne, (truncateToi1 SWTestOp), 0) -> (BCC PRED_UN, SWTestOp) - // (br_cc setne, (and SWTestOp, 2), 0) -> (BCC PRED_EQ, SWTestOp) - // (br_cc setne, (and SWTestOp, 4), 0) -> (BCC PRED_GT, SWTestOp) - // (br_cc setne, (and SWTestOp, 8), 0) -> (BCC PRED_LT, SWTestOp) - ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get(); - if (CC != ISD::SETEQ && CC != ISD::SETNE) - return false; - - SDValue CmpRHS = N->getOperand(3); - if (!isa<ConstantSDNode>(CmpRHS) || - cast<ConstantSDNode>(CmpRHS)->getSExtValue() != 0) - return false; - - SDValue CmpLHS = N->getOperand(2); - if (CmpLHS.getNumOperands() < 1 || !isSWTestOp(CmpLHS.getOperand(0))) - return false; - - unsigned PCC = 0; - bool IsCCNE = CC == ISD::SETNE; - if (CmpLHS.getOpcode() == ISD::AND && - isa<ConstantSDNode>(CmpLHS.getOperand(1))) - switch (CmpLHS.getConstantOperandVal(1)) { - case 1: - PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU; - break; - case 2: - PCC = IsCCNE ? PPC::PRED_EQ : PPC::PRED_NE; - break; - case 4: - PCC = IsCCNE ? PPC::PRED_GT : PPC::PRED_LE; - break; - case 8: - PCC = IsCCNE ? PPC::PRED_LT : PPC::PRED_GE; - break; - default: - return false; - } - else if (CmpLHS.getOpcode() == ISD::TRUNCATE && - CmpLHS.getValueType() == MVT::i1) - PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU; - - if (PCC) { - SDLoc dl(N); - SDValue Ops[] = {getI32Imm(PCC, dl), CmpLHS.getOperand(0), N->getOperand(4), - N->getOperand(0)}; - CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops); - return true; - } - return false; -} - +// Return true if it's a software square-root/divide operand. +static bool isSWTestOp(SDValue N) { + if (N.getOpcode() == PPCISD::FTSQRT) + return true; + if (N.getNumOperands() < 1 || !isa<ConstantSDNode>(N.getOperand(0))) + return false; + switch (N.getConstantOperandVal(0)) { + case Intrinsic::ppc_vsx_xvtdivdp: + case Intrinsic::ppc_vsx_xvtdivsp: + case Intrinsic::ppc_vsx_xvtsqrtdp: + case Intrinsic::ppc_vsx_xvtsqrtsp: + return true; + } + return false; +} + +bool PPCDAGToDAGISel::tryFoldSWTestBRCC(SDNode *N) { + assert(N->getOpcode() == ISD::BR_CC && "ISD::BR_CC is expected."); + // We are looking for following patterns, where `truncate to i1` actually has + // the same semantic with `and 1`. + // (br_cc seteq, (truncateToi1 SWTestOp), 0) -> (BCC PRED_NU, SWTestOp) + // (br_cc seteq, (and SWTestOp, 2), 0) -> (BCC PRED_NE, SWTestOp) + // (br_cc seteq, (and SWTestOp, 4), 0) -> (BCC PRED_LE, SWTestOp) + // (br_cc seteq, (and SWTestOp, 8), 0) -> (BCC PRED_GE, SWTestOp) + // (br_cc setne, (truncateToi1 SWTestOp), 0) -> (BCC PRED_UN, SWTestOp) + // (br_cc setne, (and SWTestOp, 2), 0) -> (BCC PRED_EQ, SWTestOp) + // (br_cc setne, (and SWTestOp, 4), 0) -> (BCC PRED_GT, SWTestOp) + // (br_cc setne, (and SWTestOp, 8), 0) -> (BCC PRED_LT, SWTestOp) + ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get(); + if (CC != ISD::SETEQ && CC != ISD::SETNE) + return false; + + SDValue CmpRHS = N->getOperand(3); + if (!isa<ConstantSDNode>(CmpRHS) || + cast<ConstantSDNode>(CmpRHS)->getSExtValue() != 0) + return false; + + SDValue CmpLHS = N->getOperand(2); + if (CmpLHS.getNumOperands() < 1 || !isSWTestOp(CmpLHS.getOperand(0))) + return false; + + unsigned PCC = 0; + bool IsCCNE = CC == ISD::SETNE; + if (CmpLHS.getOpcode() == ISD::AND && + isa<ConstantSDNode>(CmpLHS.getOperand(1))) + switch (CmpLHS.getConstantOperandVal(1)) { + case 1: + PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU; + break; + case 2: + PCC = IsCCNE ? PPC::PRED_EQ : PPC::PRED_NE; + break; + case 4: + PCC = IsCCNE ? PPC::PRED_GT : PPC::PRED_LE; + break; + case 8: + PCC = IsCCNE ? PPC::PRED_LT : PPC::PRED_GE; + break; + default: + return false; + } + else if (CmpLHS.getOpcode() == ISD::TRUNCATE && + CmpLHS.getValueType() == MVT::i1) + PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU; + + if (PCC) { + SDLoc dl(N); + SDValue Ops[] = {getI32Imm(PCC, dl), CmpLHS.getOperand(0), N->getOperand(4), + N->getOperand(0)}; + CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops); + return true; + } + return false; +} + bool PPCDAGToDAGISel::tryAsSingleRLWINM(SDNode *N) { assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected"); unsigned Imm; @@ -4733,48 +4733,48 @@ void PPCDAGToDAGISel::Select(SDNode *N) { } break; - case ISD::INTRINSIC_WO_CHAIN: { - if (!Subtarget->isISA3_1()) - break; - unsigned Opcode = 0; - switch (N->getConstantOperandVal(0)) { - default: - break; - case Intrinsic::ppc_altivec_vstribr_p: - Opcode = PPC::VSTRIBR_rec; - break; - case Intrinsic::ppc_altivec_vstribl_p: - Opcode = PPC::VSTRIBL_rec; - break; - case Intrinsic::ppc_altivec_vstrihr_p: - Opcode = PPC::VSTRIHR_rec; - break; - case Intrinsic::ppc_altivec_vstrihl_p: - Opcode = PPC::VSTRIHL_rec; - break; - } - if (!Opcode) - break; - - // Generate the appropriate vector string isolate intrinsic to match. - EVT VTs[] = {MVT::v16i8, MVT::Glue}; - SDValue VecStrOp = - SDValue(CurDAG->getMachineNode(Opcode, dl, VTs, N->getOperand(2)), 0); - // Vector string isolate instructions update the EQ bit of CR6. - // Generate a SETBC instruction to extract the bit and place it in a GPR. - SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_eq, dl, MVT::i32); - SDValue CR6Reg = CurDAG->getRegister(PPC::CR6, MVT::i32); - SDValue CRBit = SDValue( - CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1, - CR6Reg, SubRegIdx, VecStrOp.getValue(1)), - 0); - CurDAG->SelectNodeTo(N, PPC::SETBC, MVT::i32, CRBit); - return; - } - + case ISD::INTRINSIC_WO_CHAIN: { + if (!Subtarget->isISA3_1()) + break; + unsigned Opcode = 0; + switch (N->getConstantOperandVal(0)) { + default: + break; + case Intrinsic::ppc_altivec_vstribr_p: + Opcode = PPC::VSTRIBR_rec; + break; + case Intrinsic::ppc_altivec_vstribl_p: + Opcode = PPC::VSTRIBL_rec; + break; + case Intrinsic::ppc_altivec_vstrihr_p: + Opcode = PPC::VSTRIHR_rec; + break; + case Intrinsic::ppc_altivec_vstrihl_p: + Opcode = PPC::VSTRIHL_rec; + break; + } + if (!Opcode) + break; + + // Generate the appropriate vector string isolate intrinsic to match. + EVT VTs[] = {MVT::v16i8, MVT::Glue}; + SDValue VecStrOp = + SDValue(CurDAG->getMachineNode(Opcode, dl, VTs, N->getOperand(2)), 0); + // Vector string isolate instructions update the EQ bit of CR6. + // Generate a SETBC instruction to extract the bit and place it in a GPR. + SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_eq, dl, MVT::i32); + SDValue CR6Reg = CurDAG->getRegister(PPC::CR6, MVT::i32); + SDValue CRBit = SDValue( + CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1, + CR6Reg, SubRegIdx, VecStrOp.getValue(1)), + 0); + CurDAG->SelectNodeTo(N, PPC::SETBC, MVT::i32, CRBit); + return; + } + case ISD::SETCC: - case ISD::STRICT_FSETCC: - case ISD::STRICT_FSETCCS: + case ISD::STRICT_FSETCC: + case ISD::STRICT_FSETCCS: if (trySETCC(N)) return; break; @@ -5072,32 +5072,32 @@ void PPCDAGToDAGISel::Select(SDNode *N) { // Other cases are autogenerated. break; } - case ISD::MUL: { - SDValue Op1 = N->getOperand(1); - if (Op1.getOpcode() != ISD::Constant || Op1.getValueType() != MVT::i64) - break; - - // If the multiplier fits int16, we can handle it with mulli. - int64_t Imm = cast<ConstantSDNode>(Op1)->getZExtValue(); - unsigned Shift = countTrailingZeros<uint64_t>(Imm); - if (isInt<16>(Imm) || !Shift) - break; - - // If the shifted value fits int16, we can do this transformation: - // (mul X, c1 << c2) -> (rldicr (mulli X, c1) c2). We do this in ISEL due to - // DAGCombiner prefers (shl (mul X, c1), c2) -> (mul X, c1 << c2). - uint64_t ImmSh = Imm >> Shift; - if (isInt<16>(ImmSh)) { - uint64_t SextImm = SignExtend64(ImmSh & 0xFFFF, 16); - SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64); - SDNode *MulNode = CurDAG->getMachineNode(PPC::MULLI8, dl, MVT::i64, - N->getOperand(0), SDImm); - CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, SDValue(MulNode, 0), - getI32Imm(Shift, dl), getI32Imm(63 - Shift, dl)); - return; - } - break; - } + case ISD::MUL: { + SDValue Op1 = N->getOperand(1); + if (Op1.getOpcode() != ISD::Constant || Op1.getValueType() != MVT::i64) + break; + + // If the multiplier fits int16, we can handle it with mulli. + int64_t Imm = cast<ConstantSDNode>(Op1)->getZExtValue(); + unsigned Shift = countTrailingZeros<uint64_t>(Imm); + if (isInt<16>(Imm) || !Shift) + break; + + // If the shifted value fits int16, we can do this transformation: + // (mul X, c1 << c2) -> (rldicr (mulli X, c1) c2). We do this in ISEL due to + // DAGCombiner prefers (shl (mul X, c1), c2) -> (mul X, c1 << c2). + uint64_t ImmSh = Imm >> Shift; + if (isInt<16>(ImmSh)) { + uint64_t SextImm = SignExtend64(ImmSh & 0xFFFF, 16); + SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64); + SDNode *MulNode = CurDAG->getMachineNode(PPC::MULLI8, dl, MVT::i64, + N->getOperand(0), SDImm); + CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, SDValue(MulNode, 0), + getI32Imm(Shift, dl), getI32Imm(63 - Shift, dl)); + return; + } + break; + } // FIXME: Remove this once the ANDI glue bug is fixed: case PPCISD::ANDI_rec_1_EQ_BIT: case PPCISD::ANDI_rec_1_GT_BIT: { @@ -5323,8 +5323,8 @@ void PPCDAGToDAGISel::Select(SDNode *N) { return; } case ISD::BR_CC: { - if (tryFoldSWTestBRCC(N)) - return; + if (tryFoldSWTestBRCC(N)) + return; ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get(); unsigned PCC = getPredicateForSetCC(CC, N->getOperand(2).getValueType(), Subtarget); @@ -5896,13 +5896,13 @@ bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) { User->getMachineOpcode() != PPC::SELECT_I8) return false; - SDNode *Op1 = User->getOperand(1).getNode(); + SDNode *Op1 = User->getOperand(1).getNode(); SDNode *Op2 = User->getOperand(2).getNode(); - // If we have a degenerate select with two equal operands, swapping will - // not do anything, and we may run into an infinite loop. - if (Op1 == Op2) - return false; - + // If we have a degenerate select with two equal operands, swapping will + // not do anything, and we may run into an infinite loop. + if (Op1 == Op2) + return false; + if (!Op2->isMachineOpcode()) return false; diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/libs/llvm12/lib/Target/PowerPC/PPCISelLowering.cpp index 69a2ed730d..26dc3afc89 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/contrib/libs/llvm12/lib/Target/PowerPC/PPCISelLowering.cpp @@ -74,7 +74,7 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCSectionXCOFF.h" +#include "llvm/MC/MCSectionXCOFF.h" #include "llvm/MC/MCSymbolXCOFF.h" #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/BranchProbability.h" @@ -121,11 +121,11 @@ cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden); static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables", cl::desc("use absolute jump tables on ppc"), cl::Hidden); -// TODO - Remove this option if soft fp128 has been fully supported . -static cl::opt<bool> - EnableSoftFP128("enable-soft-fp128", - cl::desc("temp option to enable soft fp128"), cl::Hidden); - +// TODO - Remove this option if soft fp128 has been fully supported . +static cl::opt<bool> + EnableSoftFP128("enable-soft-fp128", + cl::desc("temp option to enable soft fp128"), cl::Hidden); + STATISTIC(NumTailCalls, "Number of tail calls"); STATISTIC(NumSiblingCalls, "Number of sibling calls"); STATISTIC(ShufflesHandledWithVPERM, "Number of shuffles lowered to a VPERM"); @@ -151,9 +151,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, if (!useSoftFloat()) { if (hasSPE()) { addRegisterClass(MVT::f32, &PPC::GPRCRegClass); - // EFPU2 APU only supports f32 - if (!Subtarget.hasEFPU2()) - addRegisterClass(MVT::f64, &PPC::SPERCRegClass); + // EFPU2 APU only supports f32 + if (!Subtarget.hasEFPU2()) + addRegisterClass(MVT::f64, &PPC::SPERCRegClass); } else { addRegisterClass(MVT::f32, &PPC::F4RCRegClass); addRegisterClass(MVT::f64, &PPC::F8RCRegClass); @@ -167,10 +167,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended. setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom); - // Custom lower inline assembly to check for special registers. - setOperationAction(ISD::INLINEASM, MVT::Other, Custom); - setOperationAction(ISD::INLINEASM_BR, MVT::Other, Custom); - + // Custom lower inline assembly to check for special registers. + setOperationAction(ISD::INLINEASM, MVT::Other, Custom); + setOperationAction(ISD::INLINEASM_BR, MVT::Other, Custom); + // PowerPC has an i16 but no i8 (or i1) SEXTLOAD. for (MVT VT : MVT::integer_valuetypes()) { setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); @@ -227,36 +227,36 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); if (isPPC64 || Subtarget.hasFPCVT()) { - setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i1, Promote); - AddPromotedToType(ISD::STRICT_SINT_TO_FP, MVT::i1, - isPPC64 ? MVT::i64 : MVT::i32); - setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i1, Promote); - AddPromotedToType(ISD::STRICT_UINT_TO_FP, MVT::i1, - isPPC64 ? MVT::i64 : MVT::i32); - + setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i1, Promote); + AddPromotedToType(ISD::STRICT_SINT_TO_FP, MVT::i1, + isPPC64 ? MVT::i64 : MVT::i32); + setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i1, Promote); + AddPromotedToType(ISD::STRICT_UINT_TO_FP, MVT::i1, + isPPC64 ? MVT::i64 : MVT::i32); + setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote); AddPromotedToType (ISD::SINT_TO_FP, MVT::i1, isPPC64 ? MVT::i64 : MVT::i32); setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote); AddPromotedToType(ISD::UINT_TO_FP, MVT::i1, isPPC64 ? MVT::i64 : MVT::i32); - - setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i1, Promote); - AddPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::i1, - isPPC64 ? MVT::i64 : MVT::i32); - setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i1, Promote); - AddPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::i1, - isPPC64 ? MVT::i64 : MVT::i32); - - setOperationAction(ISD::FP_TO_SINT, MVT::i1, Promote); - AddPromotedToType(ISD::FP_TO_SINT, MVT::i1, - isPPC64 ? MVT::i64 : MVT::i32); - setOperationAction(ISD::FP_TO_UINT, MVT::i1, Promote); - AddPromotedToType(ISD::FP_TO_UINT, MVT::i1, - isPPC64 ? MVT::i64 : MVT::i32); + + setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i1, Promote); + AddPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::i1, + isPPC64 ? MVT::i64 : MVT::i32); + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i1, Promote); + AddPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::i1, + isPPC64 ? MVT::i64 : MVT::i32); + + setOperationAction(ISD::FP_TO_SINT, MVT::i1, Promote); + AddPromotedToType(ISD::FP_TO_SINT, MVT::i1, + isPPC64 ? MVT::i64 : MVT::i32); + setOperationAction(ISD::FP_TO_UINT, MVT::i1, Promote); + AddPromotedToType(ISD::FP_TO_UINT, MVT::i1, + isPPC64 ? MVT::i64 : MVT::i32); } else { - setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i1, Custom); - setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i1, Custom); + setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i1, Custom); + setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i1, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom); } @@ -282,8 +282,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, // PPC (the libcall is not available). setOperationAction(ISD::FP_TO_SINT, MVT::ppcf128, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::ppcf128, Custom); - setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::ppcf128, Custom); - setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::ppcf128, Custom); + setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::ppcf128, Custom); + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::ppcf128, Custom); // We do not currently implement these libm ops for PowerPC. setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand); @@ -336,10 +336,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal); setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal); setOperationAction(ISD::STRICT_FMA, MVT::f64, Legal); - if (Subtarget.hasVSX()) { - setOperationAction(ISD::STRICT_FRINT, MVT::f32, Legal); - setOperationAction(ISD::STRICT_FRINT, MVT::f64, Legal); - } + if (Subtarget.hasVSX()) { + setOperationAction(ISD::STRICT_FRINT, MVT::f32, Legal); + setOperationAction(ISD::STRICT_FRINT, MVT::f64, Legal); + } if (Subtarget.hasFSQRT()) { setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal); @@ -377,9 +377,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::FMA , MVT::f32, Legal); } - if (Subtarget.hasSPE()) - setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); - + if (Subtarget.hasSPE()) + setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); + setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); // If we're enabling GP optimizations, use hardware square root @@ -457,16 +457,16 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, if (!Subtarget.useCRBits()) setOperationAction(ISD::SETCC, MVT::i32, Custom); - if (Subtarget.hasFPU()) { - setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal); - setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal); - setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Legal); - - setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal); - setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal); - setOperationAction(ISD::STRICT_FSETCCS, MVT::f128, Legal); - } - + if (Subtarget.hasFPU()) { + setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal); + setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal); + setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Legal); + + setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal); + setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal); + setOperationAction(ISD::STRICT_FSETCCS, MVT::f128, Legal); + } + // PowerPC does not have BRCOND which requires SetCC if (!Subtarget.useCRBits()) setOperationAction(ISD::BRCOND, MVT::Other, Expand); @@ -483,12 +483,12 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal); } else { // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores. - setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom); + setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); // PowerPC does not have [U|S]INT_TO_FP - setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Expand); - setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Expand); + setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Expand); + setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Expand); setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); } @@ -616,56 +616,56 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setCondCodeAction(ISD::SETONE, MVT::f32, Expand); setCondCodeAction(ISD::SETONE, MVT::f64, Expand); - setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal); - setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal); - + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal); + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal); + if (Subtarget.has64BitSupport()) { // They also have instructions for converting between i64 and fp. - setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom); - setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Expand); - setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom); - setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Expand); + setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom); + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Expand); + setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom); + setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Expand); setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); // This is just the low 32 bits of a (signed) fp->i64 conversion. // We cannot do this with Promote because i64 is not a legal type. - setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom); + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); - if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) { + if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) { setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); - setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom); - } + setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom); + } } else { // PowerPC does not have FP_TO_UINT on 32-bit implementations. if (Subtarget.hasSPE()) { setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal); - } else { - setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Expand); + } else { + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Expand); setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); - } + } } // With the instructions enabled under FPCVT, we can do everything. if (Subtarget.hasFPCVT()) { if (Subtarget.has64BitSupport()) { - setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom); - setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom); - setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom); - setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom); + setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom); + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom); + setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom); + setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); } - setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom); - setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom); - setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom); - setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom); + setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom); + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom); + setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom); + setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); @@ -688,15 +688,15 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); } - // PowerPC has better expansions for funnel shifts than the generic - // TargetLowering::expandFunnelShift. - if (Subtarget.has64BitSupport()) { - setOperationAction(ISD::FSHL, MVT::i64, Custom); - setOperationAction(ISD::FSHR, MVT::i64, Custom); - } - setOperationAction(ISD::FSHL, MVT::i32, Custom); - setOperationAction(ISD::FSHR, MVT::i32, Custom); - + // PowerPC has better expansions for funnel shifts than the generic + // TargetLowering::expandFunnelShift. + if (Subtarget.has64BitSupport()) { + setOperationAction(ISD::FSHL, MVT::i64, Custom); + setOperationAction(ISD::FSHR, MVT::i64, Custom); + } + setOperationAction(ISD::FSHL, MVT::i32, Custom); + setOperationAction(ISD::FSHR, MVT::i32, Custom); + if (Subtarget.hasVSX()) { setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal); setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal); @@ -848,10 +848,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::SELECT, MVT::v4i32, Subtarget.useCRBits() ? Legal : Expand); setOperationAction(ISD::STORE , MVT::v4i32, Legal); - setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal); - setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Legal); - setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal); - setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Legal); + setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal); + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Legal); + setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal); + setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Legal); setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); @@ -889,27 +889,27 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, else setOperationAction(ISD::MUL, MVT::v4i32, Custom); - if (Subtarget.isISA3_1()) { - setOperationAction(ISD::MUL, MVT::v2i64, Legal); - setOperationAction(ISD::MULHS, MVT::v2i64, Legal); - setOperationAction(ISD::MULHU, MVT::v2i64, Legal); - setOperationAction(ISD::MULHS, MVT::v4i32, Legal); - setOperationAction(ISD::MULHU, MVT::v4i32, Legal); - setOperationAction(ISD::UDIV, MVT::v2i64, Legal); - setOperationAction(ISD::SDIV, MVT::v2i64, Legal); - setOperationAction(ISD::UDIV, MVT::v4i32, Legal); - setOperationAction(ISD::SDIV, MVT::v4i32, Legal); - setOperationAction(ISD::UREM, MVT::v2i64, Legal); - setOperationAction(ISD::SREM, MVT::v2i64, Legal); - setOperationAction(ISD::UREM, MVT::v4i32, Legal); - setOperationAction(ISD::SREM, MVT::v4i32, Legal); - setOperationAction(ISD::UREM, MVT::v1i128, Legal); - setOperationAction(ISD::SREM, MVT::v1i128, Legal); - setOperationAction(ISD::UDIV, MVT::v1i128, Legal); - setOperationAction(ISD::SDIV, MVT::v1i128, Legal); - setOperationAction(ISD::ROTL, MVT::v1i128, Legal); - } - + if (Subtarget.isISA3_1()) { + setOperationAction(ISD::MUL, MVT::v2i64, Legal); + setOperationAction(ISD::MULHS, MVT::v2i64, Legal); + setOperationAction(ISD::MULHU, MVT::v2i64, Legal); + setOperationAction(ISD::MULHS, MVT::v4i32, Legal); + setOperationAction(ISD::MULHU, MVT::v4i32, Legal); + setOperationAction(ISD::UDIV, MVT::v2i64, Legal); + setOperationAction(ISD::SDIV, MVT::v2i64, Legal); + setOperationAction(ISD::UDIV, MVT::v4i32, Legal); + setOperationAction(ISD::SDIV, MVT::v4i32, Legal); + setOperationAction(ISD::UREM, MVT::v2i64, Legal); + setOperationAction(ISD::SREM, MVT::v2i64, Legal); + setOperationAction(ISD::UREM, MVT::v4i32, Legal); + setOperationAction(ISD::SREM, MVT::v4i32, Legal); + setOperationAction(ISD::UREM, MVT::v1i128, Legal); + setOperationAction(ISD::SREM, MVT::v1i128, Legal); + setOperationAction(ISD::UDIV, MVT::v1i128, Legal); + setOperationAction(ISD::SDIV, MVT::v1i128, Legal); + setOperationAction(ISD::ROTL, MVT::v1i128, Legal); + } + setOperationAction(ISD::MUL, MVT::v8i16, Legal); setOperationAction(ISD::MUL, MVT::v16i8, Custom); @@ -1021,10 +1021,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::SUB, MVT::v2i64, Expand); } - if (Subtarget.isISA3_1()) - setOperationAction(ISD::SETCC, MVT::v1i128, Legal); - else - setOperationAction(ISD::SETCC, MVT::v1i128, Expand); + if (Subtarget.isISA3_1()) + setOperationAction(ISD::SETCC, MVT::v1i128, Legal); + else + setOperationAction(ISD::SETCC, MVT::v1i128, Expand); setOperationAction(ISD::LOAD, MVT::v2i64, Promote); AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64); @@ -1033,10 +1033,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal); - setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i64, Legal); - setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i64, Legal); - setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i64, Legal); - setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i64, Legal); + setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i64, Legal); + setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i64, Legal); + setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i64, Legal); + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i64, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal); setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal); @@ -1045,14 +1045,14 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, // Custom handling for partial vectors of integers converted to // floating point. We already have optimal handling for v2i32 through // the DAG combine, so those aren't necessary. - setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i8, Custom); - setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i8, Custom); - setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i16, Custom); - setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i16, Custom); - setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i8, Custom); - setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i8, Custom); - setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i16, Custom); - setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i16, Custom); + setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i8, Custom); + setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i8, Custom); + setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i16, Custom); + setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i16, Custom); + setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i8, Custom); + setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i8, Custom); + setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i16, Custom); + setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i16, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::v2i8, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::v2i16, Custom); @@ -1084,7 +1084,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FMAXNUM, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FMINNUM, MVT::v4f32, Legal); - setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FCEIL, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal); @@ -1098,7 +1098,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FMAXNUM, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FMINNUM, MVT::v2f64, Legal); - setOperationAction(ISD::STRICT_FRINT, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FRINT, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FCEIL, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal); @@ -1181,48 +1181,48 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::BSWAP, MVT::v4i32, Legal); setOperationAction(ISD::BSWAP, MVT::v2i64, Legal); setOperationAction(ISD::BSWAP, MVT::v1i128, Legal); - } else if (Subtarget.hasAltivec() && EnableSoftFP128) { - addRegisterClass(MVT::f128, &PPC::VRRCRegClass); - - for (MVT FPT : MVT::fp_valuetypes()) - setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand); - - setOperationAction(ISD::LOAD, MVT::f128, Promote); - setOperationAction(ISD::STORE, MVT::f128, Promote); - - AddPromotedToType(ISD::LOAD, MVT::f128, MVT::v4i32); - AddPromotedToType(ISD::STORE, MVT::f128, MVT::v4i32); - - // Set FADD/FSUB as libcall to avoid the legalizer to expand the - // fp_to_uint and int_to_fp. - setOperationAction(ISD::FADD, MVT::f128, LibCall); - setOperationAction(ISD::FSUB, MVT::f128, LibCall); - - setOperationAction(ISD::FMUL, MVT::f128, Expand); - setOperationAction(ISD::FDIV, MVT::f128, Expand); - setOperationAction(ISD::FNEG, MVT::f128, Expand); - setOperationAction(ISD::FABS, MVT::f128, Expand); - setOperationAction(ISD::FSIN, MVT::f128, Expand); - setOperationAction(ISD::FCOS, MVT::f128, Expand); - setOperationAction(ISD::FPOW, MVT::f128, Expand); - setOperationAction(ISD::FPOWI, MVT::f128, Expand); - setOperationAction(ISD::FREM, MVT::f128, Expand); - setOperationAction(ISD::FSQRT, MVT::f128, Expand); - setOperationAction(ISD::FMA, MVT::f128, Expand); - setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand); - - setTruncStoreAction(MVT::f128, MVT::f64, Expand); - setTruncStoreAction(MVT::f128, MVT::f32, Expand); - - // Expand the fp_extend if the target type is fp128. - setOperationAction(ISD::FP_EXTEND, MVT::f128, Expand); - setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Expand); - - // Expand the fp_round if the source type is fp128. - for (MVT VT : {MVT::f32, MVT::f64}) { - setOperationAction(ISD::FP_ROUND, VT, Custom); - setOperationAction(ISD::STRICT_FP_ROUND, VT, Custom); - } + } else if (Subtarget.hasAltivec() && EnableSoftFP128) { + addRegisterClass(MVT::f128, &PPC::VRRCRegClass); + + for (MVT FPT : MVT::fp_valuetypes()) + setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand); + + setOperationAction(ISD::LOAD, MVT::f128, Promote); + setOperationAction(ISD::STORE, MVT::f128, Promote); + + AddPromotedToType(ISD::LOAD, MVT::f128, MVT::v4i32); + AddPromotedToType(ISD::STORE, MVT::f128, MVT::v4i32); + + // Set FADD/FSUB as libcall to avoid the legalizer to expand the + // fp_to_uint and int_to_fp. + setOperationAction(ISD::FADD, MVT::f128, LibCall); + setOperationAction(ISD::FSUB, MVT::f128, LibCall); + + setOperationAction(ISD::FMUL, MVT::f128, Expand); + setOperationAction(ISD::FDIV, MVT::f128, Expand); + setOperationAction(ISD::FNEG, MVT::f128, Expand); + setOperationAction(ISD::FABS, MVT::f128, Expand); + setOperationAction(ISD::FSIN, MVT::f128, Expand); + setOperationAction(ISD::FCOS, MVT::f128, Expand); + setOperationAction(ISD::FPOW, MVT::f128, Expand); + setOperationAction(ISD::FPOWI, MVT::f128, Expand); + setOperationAction(ISD::FREM, MVT::f128, Expand); + setOperationAction(ISD::FSQRT, MVT::f128, Expand); + setOperationAction(ISD::FMA, MVT::f128, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand); + + setTruncStoreAction(MVT::f128, MVT::f64, Expand); + setTruncStoreAction(MVT::f128, MVT::f32, Expand); + + // Expand the fp_extend if the target type is fp128. + setOperationAction(ISD::FP_EXTEND, MVT::f128, Expand); + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Expand); + + // Expand the fp_round if the source type is fp128. + for (MVT VT : {MVT::f32, MVT::f64}) { + setOperationAction(ISD::FP_ROUND, VT, Custom); + setOperationAction(ISD::STRICT_FP_ROUND, VT, Custom); + } } if (Subtarget.hasP9Altivec()) { @@ -1239,24 +1239,24 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, } } - if (Subtarget.pairedVectorMemops()) { - addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass); - setOperationAction(ISD::LOAD, MVT::v256i1, Custom); - setOperationAction(ISD::STORE, MVT::v256i1, Custom); + if (Subtarget.pairedVectorMemops()) { + addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass); + setOperationAction(ISD::LOAD, MVT::v256i1, Custom); + setOperationAction(ISD::STORE, MVT::v256i1, Custom); + } + if (Subtarget.hasMMA()) { + addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass); + setOperationAction(ISD::LOAD, MVT::v512i1, Custom); + setOperationAction(ISD::STORE, MVT::v512i1, Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v512i1, Custom); } - if (Subtarget.hasMMA()) { - addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass); - setOperationAction(ISD::LOAD, MVT::v512i1, Custom); - setOperationAction(ISD::STORE, MVT::v512i1, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v512i1, Custom); - } if (Subtarget.has64BitSupport()) setOperationAction(ISD::PREFETCH, MVT::Other, Legal); - if (Subtarget.isISA3_1()) - setOperationAction(ISD::SRA, MVT::v1i128, Legal); - + if (Subtarget.isISA3_1()) + setOperationAction(ISD::SRA, MVT::v1i128, Legal); + setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom); if (!isPPC64) { @@ -1334,18 +1334,18 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setLibcallName(RTLIB::FMIN_F128, "fminf128"); setLibcallName(RTLIB::FMAX_F128, "fmaxf128"); setLibcallName(RTLIB::REM_F128, "fmodf128"); - setLibcallName(RTLIB::SQRT_F128, "sqrtf128"); - setLibcallName(RTLIB::CEIL_F128, "ceilf128"); - setLibcallName(RTLIB::FLOOR_F128, "floorf128"); - setLibcallName(RTLIB::TRUNC_F128, "truncf128"); - setLibcallName(RTLIB::ROUND_F128, "roundf128"); - setLibcallName(RTLIB::LROUND_F128, "lroundf128"); - setLibcallName(RTLIB::LLROUND_F128, "llroundf128"); - setLibcallName(RTLIB::RINT_F128, "rintf128"); - setLibcallName(RTLIB::LRINT_F128, "lrintf128"); - setLibcallName(RTLIB::LLRINT_F128, "llrintf128"); - setLibcallName(RTLIB::NEARBYINT_F128, "nearbyintf128"); - setLibcallName(RTLIB::FMA_F128, "fmaf128"); + setLibcallName(RTLIB::SQRT_F128, "sqrtf128"); + setLibcallName(RTLIB::CEIL_F128, "ceilf128"); + setLibcallName(RTLIB::FLOOR_F128, "floorf128"); + setLibcallName(RTLIB::TRUNC_F128, "truncf128"); + setLibcallName(RTLIB::ROUND_F128, "roundf128"); + setLibcallName(RTLIB::LROUND_F128, "lroundf128"); + setLibcallName(RTLIB::LLROUND_F128, "llroundf128"); + setLibcallName(RTLIB::RINT_F128, "rintf128"); + setLibcallName(RTLIB::LRINT_F128, "lrintf128"); + setLibcallName(RTLIB::LLRINT_F128, "llrintf128"); + setLibcallName(RTLIB::NEARBYINT_F128, "nearbyintf128"); + setLibcallName(RTLIB::FMA_F128, "fmaf128"); // With 32 condition bits, we don't need to sink (and duplicate) compares // aggressively in CodeGenPrep. @@ -1408,8 +1408,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, MaxLoadsPerMemcmpOptSize = 4; } - IsStrictFPEnabled = true; - + IsStrictFPEnabled = true; + // Let the subtarget (CPU) decide if a predictable select is more expensive // than the corresponding branch. This information is used in CGP to decide // when to convert selects into branches. @@ -1452,8 +1452,8 @@ unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty, // 16byte and wider vectors are passed on 16byte boundary. // The rest is 8 on PPC64 and 4 on PPC32 boundary. Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4); - if (Subtarget.hasAltivec()) - getMaxByValAlign(Ty, Alignment, Align(16)); + if (Subtarget.hasAltivec()) + getMaxByValAlign(Ty, Alignment, Align(16)); return Alignment.value(); } @@ -1489,10 +1489,10 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { return "PPCISD::FP_TO_SINT_IN_VSR"; case PPCISD::FRE: return "PPCISD::FRE"; case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE"; - case PPCISD::FTSQRT: - return "PPCISD::FTSQRT"; - case PPCISD::FSQRT: - return "PPCISD::FSQRT"; + case PPCISD::FTSQRT: + return "PPCISD::FTSQRT"; + case PPCISD::FSQRT: + return "PPCISD::FSQRT"; case PPCISD::STFIWX: return "PPCISD::STFIWX"; case PPCISD::VPERM: return "PPCISD::VPERM"; case PPCISD::XXSPLT: return "PPCISD::XXSPLT"; @@ -1540,7 +1540,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::ANDI_rec_1_GT_BIT: return "PPCISD::ANDI_rec_1_GT_BIT"; case PPCISD::VCMP: return "PPCISD::VCMP"; - case PPCISD::VCMP_rec: return "PPCISD::VCMP_rec"; + case PPCISD::VCMP_rec: return "PPCISD::VCMP_rec"; case PPCISD::LBRX: return "PPCISD::LBRX"; case PPCISD::STBRX: return "PPCISD::STBRX"; case PPCISD::LFIWAX: return "PPCISD::LFIWAX"; @@ -1577,8 +1577,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR"; case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA"; case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L"; - case PPCISD::PADDI_DTPREL: - return "PPCISD::PADDI_DTPREL"; + case PPCISD::PADDI_DTPREL: + return "PPCISD::PADDI_DTPREL"; case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT"; case PPCISD::SC: return "PPCISD::SC"; case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB"; @@ -1594,35 +1594,35 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH"; case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF"; case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR"; - case PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR: - return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR"; - case PPCISD::TLS_LOCAL_EXEC_MAT_ADDR: - return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR"; - case PPCISD::ACC_BUILD: return "PPCISD::ACC_BUILD"; - case PPCISD::PAIR_BUILD: return "PPCISD::PAIR_BUILD"; - case PPCISD::EXTRACT_VSX_REG: return "PPCISD::EXTRACT_VSX_REG"; - case PPCISD::XXMFACC: return "PPCISD::XXMFACC"; + case PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR: + return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR"; + case PPCISD::TLS_LOCAL_EXEC_MAT_ADDR: + return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR"; + case PPCISD::ACC_BUILD: return "PPCISD::ACC_BUILD"; + case PPCISD::PAIR_BUILD: return "PPCISD::PAIR_BUILD"; + case PPCISD::EXTRACT_VSX_REG: return "PPCISD::EXTRACT_VSX_REG"; + case PPCISD::XXMFACC: return "PPCISD::XXMFACC"; case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT"; case PPCISD::FNMSUB: return "PPCISD::FNMSUB"; - case PPCISD::STRICT_FADDRTZ: - return "PPCISD::STRICT_FADDRTZ"; - case PPCISD::STRICT_FCTIDZ: - return "PPCISD::STRICT_FCTIDZ"; - case PPCISD::STRICT_FCTIWZ: - return "PPCISD::STRICT_FCTIWZ"; - case PPCISD::STRICT_FCTIDUZ: - return "PPCISD::STRICT_FCTIDUZ"; - case PPCISD::STRICT_FCTIWUZ: - return "PPCISD::STRICT_FCTIWUZ"; - case PPCISD::STRICT_FCFID: - return "PPCISD::STRICT_FCFID"; - case PPCISD::STRICT_FCFIDU: - return "PPCISD::STRICT_FCFIDU"; - case PPCISD::STRICT_FCFIDS: - return "PPCISD::STRICT_FCFIDS"; - case PPCISD::STRICT_FCFIDUS: - return "PPCISD::STRICT_FCFIDUS"; - case PPCISD::LXVRZX: return "PPCISD::LXVRZX"; + case PPCISD::STRICT_FADDRTZ: + return "PPCISD::STRICT_FADDRTZ"; + case PPCISD::STRICT_FCTIDZ: + return "PPCISD::STRICT_FCTIDZ"; + case PPCISD::STRICT_FCTIWZ: + return "PPCISD::STRICT_FCTIWZ"; + case PPCISD::STRICT_FCTIDUZ: + return "PPCISD::STRICT_FCTIDUZ"; + case PPCISD::STRICT_FCTIWUZ: + return "PPCISD::STRICT_FCTIWUZ"; + case PPCISD::STRICT_FCFID: + return "PPCISD::STRICT_FCFID"; + case PPCISD::STRICT_FCFIDU: + return "PPCISD::STRICT_FCFIDU"; + case PPCISD::STRICT_FCFIDS: + return "PPCISD::STRICT_FCFIDS"; + case PPCISD::STRICT_FCFIDUS: + return "PPCISD::STRICT_FCFIDUS"; + case PPCISD::LXVRZX: return "PPCISD::LXVRZX"; } return nullptr; } @@ -2446,20 +2446,20 @@ bool PPCTargetLowering::SelectAddressEVXRegReg(SDValue N, SDValue &Base, return false; } -/// isIntS34Immediate - This method tests if value of node given can be -/// accurately represented as a sign extension from a 34-bit value. If so, -/// this returns true and the immediate. -bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) { - if (!isa<ConstantSDNode>(N)) - return false; - - Imm = (int64_t)cast<ConstantSDNode>(N)->getZExtValue(); - return isInt<34>(Imm); -} -bool llvm::isIntS34Immediate(SDValue Op, int64_t &Imm) { - return isIntS34Immediate(Op.getNode(), Imm); -} - +/// isIntS34Immediate - This method tests if value of node given can be +/// accurately represented as a sign extension from a 34-bit value. If so, +/// this returns true and the immediate. +bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) { + if (!isa<ConstantSDNode>(N)) + return false; + + Imm = (int64_t)cast<ConstantSDNode>(N)->getZExtValue(); + return isInt<34>(Imm); +} +bool llvm::isIntS34Immediate(SDValue Op, int64_t &Imm) { + return isIntS34Immediate(Op.getNode(), Imm); +} + /// SelectAddressRegReg - Given the specified addressed, check to see if it /// can be represented as an indexed [r+r] operation. Returns false if it /// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is @@ -2660,55 +2660,55 @@ bool PPCTargetLowering::SelectAddressRegImm( return true; // [r+0] } -/// Similar to the 16-bit case but for instructions that take a 34-bit -/// displacement field (prefixed loads/stores). -bool PPCTargetLowering::SelectAddressRegImm34(SDValue N, SDValue &Disp, - SDValue &Base, - SelectionDAG &DAG) const { - // Only on 64-bit targets. - if (N.getValueType() != MVT::i64) - return false; - - SDLoc dl(N); - int64_t Imm = 0; - - if (N.getOpcode() == ISD::ADD) { - if (!isIntS34Immediate(N.getOperand(1), Imm)) - return false; - Disp = DAG.getTargetConstant(Imm, dl, N.getValueType()); - if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) - Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); - else - Base = N.getOperand(0); - return true; - } - - if (N.getOpcode() == ISD::OR) { - if (!isIntS34Immediate(N.getOperand(1), Imm)) - return false; - // If this is an or of disjoint bitfields, we can codegen this as an add - // (for better address arithmetic) if the LHS and RHS of the OR are - // provably disjoint. - KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0)); - if ((LHSKnown.Zero.getZExtValue() | ~(uint64_t)Imm) != ~0ULL) - return false; - if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) - Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); - else - Base = N.getOperand(0); - Disp = DAG.getTargetConstant(Imm, dl, N.getValueType()); - return true; - } - - if (isIntS34Immediate(N, Imm)) { // If the address is a 34-bit const. - Disp = DAG.getTargetConstant(Imm, dl, N.getValueType()); - Base = DAG.getRegister(PPC::ZERO8, N.getValueType()); - return true; - } - - return false; -} - +/// Similar to the 16-bit case but for instructions that take a 34-bit +/// displacement field (prefixed loads/stores). +bool PPCTargetLowering::SelectAddressRegImm34(SDValue N, SDValue &Disp, + SDValue &Base, + SelectionDAG &DAG) const { + // Only on 64-bit targets. + if (N.getValueType() != MVT::i64) + return false; + + SDLoc dl(N); + int64_t Imm = 0; + + if (N.getOpcode() == ISD::ADD) { + if (!isIntS34Immediate(N.getOperand(1), Imm)) + return false; + Disp = DAG.getTargetConstant(Imm, dl, N.getValueType()); + if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) + Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); + else + Base = N.getOperand(0); + return true; + } + + if (N.getOpcode() == ISD::OR) { + if (!isIntS34Immediate(N.getOperand(1), Imm)) + return false; + // If this is an or of disjoint bitfields, we can codegen this as an add + // (for better address arithmetic) if the LHS and RHS of the OR are + // provably disjoint. + KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0)); + if ((LHSKnown.Zero.getZExtValue() | ~(uint64_t)Imm) != ~0ULL) + return false; + if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) + Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); + else + Base = N.getOperand(0); + Disp = DAG.getTargetConstant(Imm, dl, N.getValueType()); + return true; + } + + if (isIntS34Immediate(N, Imm)) { // If the address is a 34-bit const. + Disp = DAG.getTargetConstant(Imm, dl, N.getValueType()); + Base = DAG.getRegister(PPC::ZERO8, N.getValueType()); + return true; + } + + return false; +} + /// SelectAddressRegRegOnly - Given the specified addressed, force it to be /// represented as an indexed [r+r] operation. bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base, @@ -2838,9 +2838,9 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, return false; } - // PowerPC doesn't have preinc load/store instructions for vectors - if (VT.isVector()) - return false; + // PowerPC doesn't have preinc load/store instructions for vectors + if (VT.isVector()) + return false; if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) { // Common code will reject creating a pre-inc form if the base pointer @@ -3135,15 +3135,15 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, TLSModel::Model Model = TM.getTLSModel(GV); if (Model == TLSModel::LocalExec) { - if (Subtarget.isUsingPCRelativeCalls()) { - SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64); - SDValue TGA = DAG.getTargetGlobalAddress( - GV, dl, PtrVT, 0, (PPCII::MO_PCREL_FLAG | PPCII::MO_TPREL_FLAG)); - SDValue MatAddr = - DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA); - return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr); - } - + if (Subtarget.isUsingPCRelativeCalls()) { + SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64); + SDValue TGA = DAG.getTargetGlobalAddress( + GV, dl, PtrVT, 0, (PPCII::MO_PCREL_FLAG | PPCII::MO_TPREL_FLAG)); + SDValue MatAddr = + DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA); + return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr); + } + SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_HA); SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, @@ -3156,44 +3156,44 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, } if (Model == TLSModel::InitialExec) { - bool IsPCRel = Subtarget.isUsingPCRelativeCalls(); - SDValue TGA = DAG.getTargetGlobalAddress( - GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0); - SDValue TGATLS = DAG.getTargetGlobalAddress( - GV, dl, PtrVT, 0, - IsPCRel ? (PPCII::MO_TLS | PPCII::MO_PCREL_FLAG) : PPCII::MO_TLS); - SDValue TPOffset; - if (IsPCRel) { - SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA); - TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel, - MachinePointerInfo()); + bool IsPCRel = Subtarget.isUsingPCRelativeCalls(); + SDValue TGA = DAG.getTargetGlobalAddress( + GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0); + SDValue TGATLS = DAG.getTargetGlobalAddress( + GV, dl, PtrVT, 0, + IsPCRel ? (PPCII::MO_TLS | PPCII::MO_PCREL_FLAG) : PPCII::MO_TLS); + SDValue TPOffset; + if (IsPCRel) { + SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA); + TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel, + MachinePointerInfo()); } else { - SDValue GOTPtr; - if (is64bit) { - setUsesTOCBasePtr(DAG); - SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); - GOTPtr = - DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA); - } else { - if (!TM.isPositionIndependent()) - GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT); - else if (picLevel == PICLevel::SmallPIC) - GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT); - else - GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT); - } - TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr); + SDValue GOTPtr; + if (is64bit) { + setUsesTOCBasePtr(DAG); + SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); + GOTPtr = + DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA); + } else { + if (!TM.isPositionIndependent()) + GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT); + else if (picLevel == PICLevel::SmallPIC) + GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT); + else + GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT); + } + TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr); } return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS); } if (Model == TLSModel::GeneralDynamic) { - if (Subtarget.isUsingPCRelativeCalls()) { - SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, - PPCII::MO_GOT_TLSGD_PCREL_FLAG); - return DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA); - } - + if (Subtarget.isUsingPCRelativeCalls()) { + SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, + PPCII::MO_GOT_TLSGD_PCREL_FLAG); + return DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA); + } + SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); SDValue GOTPtr; if (is64bit) { @@ -3212,14 +3212,14 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, } if (Model == TLSModel::LocalDynamic) { - if (Subtarget.isUsingPCRelativeCalls()) { - SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, - PPCII::MO_GOT_TLSLD_PCREL_FLAG); - SDValue MatPCRel = - DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA); - return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA); - } - + if (Subtarget.isUsingPCRelativeCalls()) { + SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, + PPCII::MO_GOT_TLSLD_PCREL_FLAG); + SDValue MatPCRel = + DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA); + return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA); + } + SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); SDValue GOTPtr; if (is64bit) { @@ -3465,57 +3465,57 @@ SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op, return Op.getOperand(0); } -SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const { - MachineFunction &MF = DAG.getMachineFunction(); - PPCFunctionInfo &MFI = *MF.getInfo<PPCFunctionInfo>(); - - assert((Op.getOpcode() == ISD::INLINEASM || - Op.getOpcode() == ISD::INLINEASM_BR) && - "Expecting Inline ASM node."); - - // If an LR store is already known to be required then there is not point in - // checking this ASM as well. - if (MFI.isLRStoreRequired()) - return Op; - - // Inline ASM nodes have an optional last operand that is an incoming Flag of - // type MVT::Glue. We want to ignore this last operand if that is the case. - unsigned NumOps = Op.getNumOperands(); - if (Op.getOperand(NumOps - 1).getValueType() == MVT::Glue) - --NumOps; - - // Check all operands that may contain the LR. - for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { - unsigned Flags = cast<ConstantSDNode>(Op.getOperand(i))->getZExtValue(); - unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); - ++i; // Skip the ID value. - - switch (InlineAsm::getKind(Flags)) { - default: - llvm_unreachable("Bad flags!"); - case InlineAsm::Kind_RegUse: - case InlineAsm::Kind_Imm: - case InlineAsm::Kind_Mem: - i += NumVals; - break; - case InlineAsm::Kind_Clobber: - case InlineAsm::Kind_RegDef: - case InlineAsm::Kind_RegDefEarlyClobber: { - for (; NumVals; --NumVals, ++i) { - Register Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg(); - if (Reg != PPC::LR && Reg != PPC::LR8) - continue; - MFI.setLRStoreRequired(); - return Op; - } - break; - } - } - } - - return Op; -} - +SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + PPCFunctionInfo &MFI = *MF.getInfo<PPCFunctionInfo>(); + + assert((Op.getOpcode() == ISD::INLINEASM || + Op.getOpcode() == ISD::INLINEASM_BR) && + "Expecting Inline ASM node."); + + // If an LR store is already known to be required then there is not point in + // checking this ASM as well. + if (MFI.isLRStoreRequired()) + return Op; + + // Inline ASM nodes have an optional last operand that is an incoming Flag of + // type MVT::Glue. We want to ignore this last operand if that is the case. + unsigned NumOps = Op.getNumOperands(); + if (Op.getOperand(NumOps - 1).getValueType() == MVT::Glue) + --NumOps; + + // Check all operands that may contain the LR. + for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { + unsigned Flags = cast<ConstantSDNode>(Op.getOperand(i))->getZExtValue(); + unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); + ++i; // Skip the ID value. + + switch (InlineAsm::getKind(Flags)) { + default: + llvm_unreachable("Bad flags!"); + case InlineAsm::Kind_RegUse: + case InlineAsm::Kind_Imm: + case InlineAsm::Kind_Mem: + i += NumVals; + break; + case InlineAsm::Kind_Clobber: + case InlineAsm::Kind_RegDef: + case InlineAsm::Kind_RegDefEarlyClobber: { + for (; NumVals; --NumVals, ++i) { + Register Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg(); + if (Reg != PPC::LR && Reg != PPC::LR8) + continue; + MFI.setLRStoreRequired(); + return Op; + } + break; + } + } + } + + return Op; +} + SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const { if (Subtarget.isAIXABI()) @@ -3705,11 +3705,11 @@ static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, /// stack slot (instead of being passed in registers). ArgOffset, /// AvailableFPRs, and AvailableVRs must hold the current argument /// position, and will be updated to account for this argument. -static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, - unsigned PtrByteSize, unsigned LinkageSize, - unsigned ParamAreaSize, unsigned &ArgOffset, +static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, + unsigned PtrByteSize, unsigned LinkageSize, + unsigned ParamAreaSize, unsigned &ArgOffset, unsigned &AvailableFPRs, - unsigned &AvailableVRs) { + unsigned &AvailableVRs) { bool UseMemory = false; // Respect alignment of argument on the stack. @@ -3733,7 +3733,7 @@ static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, // However, if the argument is actually passed in an FPR or a VR, // we don't use memory after all. if (!Flags.isByVal()) { - if (ArgVT == MVT::f32 || ArgVT == MVT::f64) + if (ArgVT == MVT::f32 || ArgVT == MVT::f64) if (AvailableFPRs > 0) { --AvailableFPRs; return false; @@ -3768,8 +3768,8 @@ SDValue PPCTargetLowering::LowerFormalArguments( if (Subtarget.is64BitELFABI()) return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG, InVals); - assert(Subtarget.is32BitELFABI()); - return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG, + assert(Subtarget.is32BitELFABI()); + return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG, InVals); } @@ -3869,7 +3869,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_32SVR4( RC = &PPC::VRRCRegClass; break; case MVT::v4f32: - RC = &PPC::VRRCRegClass; + RC = &PPC::VRRCRegClass; break; case MVT::v2f64: case MVT::v2i64: @@ -4091,7 +4091,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4( if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags, PtrByteSize, LinkageSize, ParamAreaSize, - NumBytes, AvailableFPRs, AvailableVRs)) + NumBytes, AvailableFPRs, AvailableVRs)) HasParameterArea = true; } @@ -4343,20 +4343,20 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4( case MVT::v2i64: case MVT::v1i128: case MVT::f128: - // These can be scalar arguments or elements of a vector array type - // passed directly. The latter are used to implement ELFv2 homogenous - // vector aggregates. - if (VR_idx != Num_VR_Regs) { - unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); + // These can be scalar arguments or elements of a vector array type + // passed directly. The latter are used to implement ELFv2 homogenous + // vector aggregates. + if (VR_idx != Num_VR_Regs) { + unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); - ++VR_idx; + ++VR_idx; } else { if (CallConv == CallingConv::Fast) ComputeArgOffset(); needsLoad = true; } if (CallConv != CallingConv::Fast || needsLoad) - ArgOffset += 16; + ArgOffset += 16; break; } @@ -4493,13 +4493,13 @@ static bool callsShareTOCBase(const Function *Caller, SDValue Callee, if (STICallee->isUsingPCRelativeCalls()) return false; - // If the GV is not a strong definition then we need to assume it can be - // replaced by another function at link time. The function that replaces - // it may not share the same TOC as the caller since the callee may be - // replaced by a PC Relative version of the same function. - if (!GV->isStrongDefinitionForLinker()) - return false; - + // If the GV is not a strong definition then we need to assume it can be + // replaced by another function at link time. The function that replaces + // it may not share the same TOC as the caller since the callee may be + // replaced by a PC Relative version of the same function. + if (!GV->isStrongDefinitionForLinker()) + return false; + // The medium and large code models are expected to provide a sufficiently // large TOC to provide all data addressing needs of a module with a // single TOC. @@ -4550,9 +4550,9 @@ needStackSlotPassParameters(const PPCSubtarget &Subtarget, for (const ISD::OutputArg& Param : Outs) { if (Param.Flags.isNest()) continue; - if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize, - LinkageSize, ParamAreaSize, NumBytes, - AvailableFPRs, AvailableVRs)) + if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize, + LinkageSize, ParamAreaSize, NumBytes, + AvailableFPRs, AvailableVRs)) return true; } return false; @@ -5066,53 +5066,53 @@ static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG, Subtarget.is32BitELFABI() && !isLocalCallee() && Subtarget.getTargetMachine().getRelocationModel() == Reloc::PIC_; - const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) { - const TargetMachine &TM = Subtarget.getTargetMachine(); - const TargetLoweringObjectFile *TLOF = TM.getObjFileLowering(); - MCSymbolXCOFF *S = - cast<MCSymbolXCOFF>(TLOF->getFunctionEntryPointSymbol(GV, TM)); + const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) { + const TargetMachine &TM = Subtarget.getTargetMachine(); + const TargetLoweringObjectFile *TLOF = TM.getObjFileLowering(); + MCSymbolXCOFF *S = + cast<MCSymbolXCOFF>(TLOF->getFunctionEntryPointSymbol(GV, TM)); - MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); - return DAG.getMCSymbol(S, PtrVT); - }; + MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); + return DAG.getMCSymbol(S, PtrVT); + }; if (isFunctionGlobalAddress(Callee)) { - const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal(); + const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal(); - if (Subtarget.isAIXABI()) { - assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX."); - return getAIXFuncEntryPointSymbolSDNode(GV); - } - return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0, - UsePlt ? PPCII::MO_PLT : 0); + if (Subtarget.isAIXABI()) { + assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX."); + return getAIXFuncEntryPointSymbolSDNode(GV); + } + return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0, + UsePlt ? PPCII::MO_PLT : 0); } if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { const char *SymName = S->getSymbol(); - if (Subtarget.isAIXABI()) { - // If there exists a user-declared function whose name is the same as the - // ExternalSymbol's, then we pick up the user-declared version. - const Module *Mod = DAG.getMachineFunction().getFunction().getParent(); - if (const Function *F = - dyn_cast_or_null<Function>(Mod->getNamedValue(SymName))) - return getAIXFuncEntryPointSymbolSDNode(F); - - // On AIX, direct function calls reference the symbol for the function's - // entry point, which is named by prepending a "." before the function's - // C-linkage name. A Qualname is returned here because an external - // function entry point is a csect with XTY_ER property. - const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) { - auto &Context = DAG.getMachineFunction().getMMI().getContext(); - MCSectionXCOFF *Sec = Context.getXCOFFSection( - (Twine(".") + Twine(SymName)).str(), XCOFF::XMC_PR, XCOFF::XTY_ER, - SectionKind::getMetadata()); - return Sec->getQualNameSymbol(); - }; - - SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data(); + if (Subtarget.isAIXABI()) { + // If there exists a user-declared function whose name is the same as the + // ExternalSymbol's, then we pick up the user-declared version. + const Module *Mod = DAG.getMachineFunction().getFunction().getParent(); + if (const Function *F = + dyn_cast_or_null<Function>(Mod->getNamedValue(SymName))) + return getAIXFuncEntryPointSymbolSDNode(F); + + // On AIX, direct function calls reference the symbol for the function's + // entry point, which is named by prepending a "." before the function's + // C-linkage name. A Qualname is returned here because an external + // function entry point is a csect with XTY_ER property. + const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) { + auto &Context = DAG.getMachineFunction().getMMI().getContext(); + MCSectionXCOFF *Sec = Context.getXCOFFSection( + (Twine(".") + Twine(SymName)).str(), XCOFF::XMC_PR, XCOFF::XTY_ER, + SectionKind::getMetadata()); + return Sec->getQualNameSymbol(); + }; + + SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data(); } - return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(), - UsePlt ? PPCII::MO_PLT : 0); + return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(), + UsePlt ? PPCII::MO_PLT : 0); } // No transformation needed. @@ -5461,11 +5461,11 @@ PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG, InVals, CB); - assert(Subtarget.isSVR4ABI()); - if (Subtarget.isPPC64()) - return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG, - InVals, CB); - return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG, + assert(Subtarget.isSVR4ABI()); + if (Subtarget.isPPC64()) + return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG, + InVals, CB); + return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG, InVals, CB); } @@ -5789,8 +5789,8 @@ SDValue PPCTargetLowering::LowerCall_64SVR4( for (unsigned i = 0; i != NumOps; ++i) { if (Outs[i].Flags.isNest()) continue; if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags, - PtrByteSize, LinkageSize, ParamAreaSize, - NumBytesTmp, AvailableFPRs, AvailableVRs)) + PtrByteSize, LinkageSize, ParamAreaSize, + NumBytesTmp, AvailableFPRs, AvailableVRs)) HasParameterArea = true; } } @@ -5838,8 +5838,8 @@ SDValue PPCTargetLowering::LowerCall_64SVR4( continue; break; case MVT::v4f32: - if (++NumVRsUsed <= NumVRs) - continue; + if (++NumVRsUsed <= NumVRs) + continue; break; case MVT::f32: case MVT::f64: @@ -6322,10 +6322,10 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, const Align PtrAlign = IsPPC64 ? Align(8) : Align(4); const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32; - if (ValVT.isVector() && !State.getMachineFunction() - .getTarget() - .Options.EnableAIXExtendedAltivecABI) - report_fatal_error("the default Altivec AIX ABI is not yet supported"); + if (ValVT.isVector() && !State.getMachineFunction() + .getTarget() + .Options.EnableAIXExtendedAltivecABI) + report_fatal_error("the default Altivec AIX ABI is not yet supported"); if (ValVT == MVT::f128) report_fatal_error("f128 is unimplemented on AIX."); @@ -6340,11 +6340,11 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, PPC::X3, PPC::X4, PPC::X5, PPC::X6, PPC::X7, PPC::X8, PPC::X9, PPC::X10}; - static const MCPhysReg VR[] = {// Vector registers. - PPC::V2, PPC::V3, PPC::V4, PPC::V5, - PPC::V6, PPC::V7, PPC::V8, PPC::V9, - PPC::V10, PPC::V11, PPC::V12, PPC::V13}; - + static const MCPhysReg VR[] = {// Vector registers. + PPC::V2, PPC::V3, PPC::V4, PPC::V5, + PPC::V6, PPC::V7, PPC::V8, PPC::V9, + PPC::V10, PPC::V11, PPC::V12, PPC::V13}; + if (ArgFlags.isByVal()) { if (ArgFlags.getNonZeroByValAlign() > PtrAlign) report_fatal_error("Pass-by-value arguments with alignment greater than " @@ -6389,7 +6389,7 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, case MVT::i32: { const unsigned Offset = State.AllocateStack(PtrAlign.value(), PtrAlign); // AIX integer arguments are always passed in register width. - if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits()) + if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits()) LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt : CCValAssign::LocInfo::ZExt; if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) @@ -6440,26 +6440,26 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, return false; } - case MVT::v4f32: - case MVT::v4i32: - case MVT::v8i16: - case MVT::v16i8: - case MVT::v2i64: - case MVT::v2f64: - case MVT::v1i128: { - if (State.isVarArg()) - report_fatal_error( - "variadic arguments for vector types are unimplemented for AIX"); - - if (unsigned VReg = State.AllocateReg(VR)) - State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo)); - else { - report_fatal_error( - "passing vector parameters to the stack is unimplemented for AIX"); - } - return false; - } - } + case MVT::v4f32: + case MVT::v4i32: + case MVT::v8i16: + case MVT::v16i8: + case MVT::v2i64: + case MVT::v2f64: + case MVT::v1i128: { + if (State.isVarArg()) + report_fatal_error( + "variadic arguments for vector types are unimplemented for AIX"); + + if (unsigned VReg = State.AllocateReg(VR)) + State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo)); + else { + report_fatal_error( + "passing vector parameters to the stack is unimplemented for AIX"); + } + return false; + } + } return true; } @@ -6479,14 +6479,14 @@ static const TargetRegisterClass *getRegClassForSVT(MVT::SimpleValueType SVT, return &PPC::F4RCRegClass; case MVT::f64: return &PPC::F8RCRegClass; - case MVT::v4f32: - case MVT::v4i32: - case MVT::v8i16: - case MVT::v16i8: - case MVT::v2i64: - case MVT::v2f64: - case MVT::v1i128: - return &PPC::VRRCRegClass; + case MVT::v4f32: + case MVT::v4i32: + case MVT::v8i16: + case MVT::v16i8: + case MVT::v2i64: + case MVT::v2f64: + case MVT::v1i128: + return &PPC::VRRCRegClass; } } @@ -6494,7 +6494,7 @@ static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT, SelectionDAG &DAG, SDValue ArgValue, MVT LocVT, const SDLoc &dl) { assert(ValVT.isScalarInteger() && LocVT.isScalarInteger()); - assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits()); + assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits()); if (Flags.isSExt()) ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue, @@ -6589,7 +6589,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX( SmallVector<CCValAssign, 16> ArgLocs; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); - PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext()); const EVT PtrVT = getPointerTy(MF.getDataLayout()); @@ -6604,9 +6604,9 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX( CCValAssign &VA = ArgLocs[I++]; MVT LocVT = VA.getLocVT(); ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags; - if (VA.isMemLoc() && VA.getValVT().isVector()) - report_fatal_error( - "passing vector parameters to the stack is unimplemented for AIX"); + if (VA.isMemLoc() && VA.getValVT().isVector()) + report_fatal_error( + "passing vector parameters to the stack is unimplemented for AIX"); // For compatibility with the AIX XL compiler, the float args in the // parameter save area are initialized even if the argument is available @@ -6617,15 +6617,15 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX( if (VA.isMemLoc() && VA.needsCustom()) continue; - if (VA.isRegLoc()) { - if (VA.getValVT().isScalarInteger()) - FuncInfo->appendParameterType(PPCFunctionInfo::FixedType); - else if (VA.getValVT().isFloatingPoint() && !VA.getValVT().isVector()) - FuncInfo->appendParameterType(VA.getValVT().SimpleTy == MVT::f32 - ? PPCFunctionInfo::ShortFloatPoint - : PPCFunctionInfo::LongFloatPoint); - } - + if (VA.isRegLoc()) { + if (VA.getValVT().isScalarInteger()) + FuncInfo->appendParameterType(PPCFunctionInfo::FixedType); + else if (VA.getValVT().isFloatingPoint() && !VA.getValVT().isVector()) + FuncInfo->appendParameterType(VA.getValVT().SimpleTy == MVT::f32 + ? PPCFunctionInfo::ShortFloatPoint + : PPCFunctionInfo::LongFloatPoint); + } + if (Flags.isByVal() && VA.isMemLoc()) { const unsigned Size = alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize, @@ -6671,10 +6671,10 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX( // to extracting the value from the register directly, and elide the // stores when the arguments address is not taken, but that will need to // be future work. - SDValue Store = DAG.getStore( - CopyFrom.getValue(1), dl, CopyFrom, - DAG.getObjectPtrOffset(dl, FIN, TypeSize::Fixed(Offset)), - MachinePointerInfo::getFixedStack(MF, FI, Offset)); + SDValue Store = DAG.getStore( + CopyFrom.getValue(1), dl, CopyFrom, + DAG.getObjectPtrOffset(dl, FIN, TypeSize::Fixed(Offset)), + MachinePointerInfo::getFixedStack(MF, FI, Offset)); MemOps.push_back(Store); }; @@ -6689,7 +6689,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX( const CCValAssign RL = ArgLocs[I++]; HandleRegLoc(RL.getLocReg(), Offset); - FuncInfo->appendParameterType(PPCFunctionInfo::FixedType); + FuncInfo->appendParameterType(PPCFunctionInfo::FixedType); } if (Offset != StackSize) { @@ -6711,7 +6711,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX( MF.addLiveIn(VA.getLocReg(), getRegClassForSVT(SVT, IsPPC64)); SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT); if (ValVT.isScalarInteger() && - (ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) { + (ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) { ArgValue = truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl); } @@ -6869,12 +6869,12 @@ SDValue PPCTargetLowering::LowerCall_AIX( } auto GetLoad = [&](EVT VT, unsigned LoadOffset) { - return DAG.getExtLoad( - ISD::ZEXTLOAD, dl, PtrVT, Chain, - (LoadOffset != 0) - ? DAG.getObjectPtrOffset(dl, Arg, TypeSize::Fixed(LoadOffset)) - : Arg, - MachinePointerInfo(), VT); + return DAG.getExtLoad( + ISD::ZEXTLOAD, dl, PtrVT, Chain, + (LoadOffset != 0) + ? DAG.getObjectPtrOffset(dl, Arg, TypeSize::Fixed(LoadOffset)) + : Arg, + MachinePointerInfo(), VT); }; unsigned LoadOffset = 0; @@ -6904,11 +6904,11 @@ SDValue PPCTargetLowering::LowerCall_AIX( // Only memcpy the bytes that don't pass in register. MemcpyFlags.setByValSize(ByValSize - LoadOffset); Chain = CallSeqStart = createMemcpyOutsideCallSeq( - (LoadOffset != 0) - ? DAG.getObjectPtrOffset(dl, Arg, TypeSize::Fixed(LoadOffset)) - : Arg, - DAG.getObjectPtrOffset(dl, StackPtr, - TypeSize::Fixed(ByValVA.getLocMemOffset())), + (LoadOffset != 0) + ? DAG.getObjectPtrOffset(dl, Arg, TypeSize::Fixed(LoadOffset)) + : Arg, + DAG.getObjectPtrOffset(dl, StackPtr, + TypeSize::Fixed(ByValVA.getLocMemOffset())), CallSeqStart, MemcpyFlags, DAG, dl); continue; } @@ -6958,10 +6958,10 @@ SDValue PPCTargetLowering::LowerCall_AIX( const MVT LocVT = VA.getLocVT(); const MVT ValVT = VA.getValVT(); - if (VA.isMemLoc() && VA.getValVT().isVector()) - report_fatal_error( - "passing vector parameters to the stack is unimplemented for AIX"); - + if (VA.isMemLoc() && VA.getValVT().isVector()) + report_fatal_error( + "passing vector parameters to the stack is unimplemented for AIX"); + switch (VA.getLocInfo()) { default: report_fatal_error("Unexpected argument extension type."); @@ -7003,8 +7003,8 @@ SDValue PPCTargetLowering::LowerCall_AIX( // f32 in 32-bit GPR // f64 in 64-bit GPR RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt)); - else if (Arg.getValueType().getFixedSizeInBits() < - LocVT.getFixedSizeInBits()) + else if (Arg.getValueType().getFixedSizeInBits() < + LocVT.getFixedSizeInBits()) // f32 in 64-bit GPR. RegsToPass.push_back(std::make_pair( VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT))); @@ -7363,45 +7363,45 @@ SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op, // <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to // <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1> - EVT TrgVT = Op.getValueType(); - assert(TrgVT.isVector() && "Vector type expected."); - unsigned TrgNumElts = TrgVT.getVectorNumElements(); - EVT EltVT = TrgVT.getVectorElementType(); - if (!isOperationCustom(Op.getOpcode(), TrgVT) || - TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(TrgNumElts) || - !isPowerOf2_32(EltVT.getSizeInBits())) - return SDValue(); + EVT TrgVT = Op.getValueType(); + assert(TrgVT.isVector() && "Vector type expected."); + unsigned TrgNumElts = TrgVT.getVectorNumElements(); + EVT EltVT = TrgVT.getVectorElementType(); + if (!isOperationCustom(Op.getOpcode(), TrgVT) || + TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(TrgNumElts) || + !isPowerOf2_32(EltVT.getSizeInBits())) + return SDValue(); SDValue N1 = Op.getOperand(0); - EVT SrcVT = N1.getValueType(); - unsigned SrcSize = SrcVT.getSizeInBits(); - if (SrcSize > 256 || - !isPowerOf2_32(SrcVT.getVectorNumElements()) || - !isPowerOf2_32(SrcVT.getVectorElementType().getSizeInBits())) - return SDValue(); - if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2) - return SDValue(); + EVT SrcVT = N1.getValueType(); + unsigned SrcSize = SrcVT.getSizeInBits(); + if (SrcSize > 256 || + !isPowerOf2_32(SrcVT.getVectorNumElements()) || + !isPowerOf2_32(SrcVT.getVectorElementType().getSizeInBits())) + return SDValue(); + if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2) + return SDValue(); unsigned WideNumElts = 128 / EltVT.getSizeInBits(); EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts); - SDLoc DL(Op); - SDValue Op1, Op2; - if (SrcSize == 256) { - EVT VecIdxTy = getVectorIdxTy(DAG.getDataLayout()); - EVT SplitVT = - N1.getValueType().getHalfNumVectorElementsVT(*DAG.getContext()); - unsigned SplitNumElts = SplitVT.getVectorNumElements(); - Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1, - DAG.getConstant(0, DL, VecIdxTy)); - Op2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1, - DAG.getConstant(SplitNumElts, DL, VecIdxTy)); - } - else { - Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL); - Op2 = DAG.getUNDEF(WideVT); - } - + SDLoc DL(Op); + SDValue Op1, Op2; + if (SrcSize == 256) { + EVT VecIdxTy = getVectorIdxTy(DAG.getDataLayout()); + EVT SplitVT = + N1.getValueType().getHalfNumVectorElementsVT(*DAG.getContext()); + unsigned SplitNumElts = SplitVT.getVectorNumElements(); + Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1, + DAG.getConstant(0, DL, VecIdxTy)); + Op2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1, + DAG.getConstant(SplitNumElts, DL, VecIdxTy)); + } + else { + Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL); + Op2 = DAG.getUNDEF(WideVT); + } + // First list the elements we want to keep. unsigned SizeMult = SrcSize / TrgVT.getSizeInBits(); SmallVector<int, 16> ShuffV; @@ -7417,17 +7417,17 @@ SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op, // ShuffV.push_back(i + WideNumElts); ShuffV.push_back(WideNumElts + 1); - Op1 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1); - Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2); - return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV); + Op1 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1); + Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2); + return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV); } /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when /// possible. SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { - // Not FP, or using SPE? Not a fsel. + // Not FP, or using SPE? Not a fsel. if (!Op.getOperand(0).getValueType().isFloatingPoint() || - !Op.getOperand(2).getValueType().isFloatingPoint() || Subtarget.hasSPE()) + !Op.getOperand(2).getValueType().isFloatingPoint() || Subtarget.hasSPE()) return Op; ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); @@ -7543,105 +7543,105 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { return Op; } -static unsigned getPPCStrictOpcode(unsigned Opc) { - switch (Opc) { - default: - llvm_unreachable("No strict version of this opcode!"); - case PPCISD::FCTIDZ: - return PPCISD::STRICT_FCTIDZ; - case PPCISD::FCTIWZ: - return PPCISD::STRICT_FCTIWZ; - case PPCISD::FCTIDUZ: - return PPCISD::STRICT_FCTIDUZ; - case PPCISD::FCTIWUZ: - return PPCISD::STRICT_FCTIWUZ; - case PPCISD::FCFID: - return PPCISD::STRICT_FCFID; - case PPCISD::FCFIDU: - return PPCISD::STRICT_FCFIDU; - case PPCISD::FCFIDS: - return PPCISD::STRICT_FCFIDS; - case PPCISD::FCFIDUS: - return PPCISD::STRICT_FCFIDUS; - } -} - -static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG, - const PPCSubtarget &Subtarget) { - SDLoc dl(Op); - bool IsStrict = Op->isStrictFPOpcode(); - bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT || - Op.getOpcode() == ISD::STRICT_FP_TO_SINT; - - // TODO: Any other flags to propagate? - SDNodeFlags Flags; - Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept()); - - // For strict nodes, source is the second operand. - SDValue Src = Op.getOperand(IsStrict ? 1 : 0); - SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue(); - assert(Src.getValueType().isFloatingPoint()); - if (Src.getValueType() == MVT::f32) { - if (IsStrict) { - Src = - DAG.getNode(ISD::STRICT_FP_EXTEND, dl, - DAG.getVTList(MVT::f64, MVT::Other), {Chain, Src}, Flags); - Chain = Src.getValue(1); - } else - Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); - } - SDValue Conv; - unsigned Opc = ISD::DELETED_NODE; +static unsigned getPPCStrictOpcode(unsigned Opc) { + switch (Opc) { + default: + llvm_unreachable("No strict version of this opcode!"); + case PPCISD::FCTIDZ: + return PPCISD::STRICT_FCTIDZ; + case PPCISD::FCTIWZ: + return PPCISD::STRICT_FCTIWZ; + case PPCISD::FCTIDUZ: + return PPCISD::STRICT_FCTIDUZ; + case PPCISD::FCTIWUZ: + return PPCISD::STRICT_FCTIWUZ; + case PPCISD::FCFID: + return PPCISD::STRICT_FCFID; + case PPCISD::FCFIDU: + return PPCISD::STRICT_FCFIDU; + case PPCISD::FCFIDS: + return PPCISD::STRICT_FCFIDS; + case PPCISD::FCFIDUS: + return PPCISD::STRICT_FCFIDUS; + } +} + +static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG, + const PPCSubtarget &Subtarget) { + SDLoc dl(Op); + bool IsStrict = Op->isStrictFPOpcode(); + bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT || + Op.getOpcode() == ISD::STRICT_FP_TO_SINT; + + // TODO: Any other flags to propagate? + SDNodeFlags Flags; + Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept()); + + // For strict nodes, source is the second operand. + SDValue Src = Op.getOperand(IsStrict ? 1 : 0); + SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue(); + assert(Src.getValueType().isFloatingPoint()); + if (Src.getValueType() == MVT::f32) { + if (IsStrict) { + Src = + DAG.getNode(ISD::STRICT_FP_EXTEND, dl, + DAG.getVTList(MVT::f64, MVT::Other), {Chain, Src}, Flags); + Chain = Src.getValue(1); + } else + Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); + } + SDValue Conv; + unsigned Opc = ISD::DELETED_NODE; switch (Op.getSimpleValueType().SimpleTy) { default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!"); case MVT::i32: - Opc = IsSigned ? PPCISD::FCTIWZ - : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ); + Opc = IsSigned ? PPCISD::FCTIWZ + : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ); break; case MVT::i64: - assert((IsSigned || Subtarget.hasFPCVT()) && + assert((IsSigned || Subtarget.hasFPCVT()) && "i64 FP_TO_UINT is supported only with FPCVT"); - Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ; - } - if (IsStrict) { - Opc = getPPCStrictOpcode(Opc); - Conv = DAG.getNode(Opc, dl, DAG.getVTList(MVT::f64, MVT::Other), - {Chain, Src}, Flags); - } else { - Conv = DAG.getNode(Opc, dl, MVT::f64, Src); - } - return Conv; -} - -void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI, - SelectionDAG &DAG, - const SDLoc &dl) const { - SDValue Tmp = convertFPToInt(Op, DAG, Subtarget); - bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT || - Op.getOpcode() == ISD::STRICT_FP_TO_SINT; - bool IsStrict = Op->isStrictFPOpcode(); - + Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ; + } + if (IsStrict) { + Opc = getPPCStrictOpcode(Opc); + Conv = DAG.getNode(Opc, dl, DAG.getVTList(MVT::f64, MVT::Other), + {Chain, Src}, Flags); + } else { + Conv = DAG.getNode(Opc, dl, MVT::f64, Src); + } + return Conv; +} + +void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI, + SelectionDAG &DAG, + const SDLoc &dl) const { + SDValue Tmp = convertFPToInt(Op, DAG, Subtarget); + bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT || + Op.getOpcode() == ISD::STRICT_FP_TO_SINT; + bool IsStrict = Op->isStrictFPOpcode(); + // Convert the FP value to an int value through memory. bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() && - (IsSigned || Subtarget.hasFPCVT()); + (IsSigned || Subtarget.hasFPCVT()); SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64); int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex(); MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI); // Emit a store to the stack slot. - SDValue Chain = IsStrict ? Tmp.getValue(1) : DAG.getEntryNode(); + SDValue Chain = IsStrict ? Tmp.getValue(1) : DAG.getEntryNode(); Align Alignment(DAG.getEVTAlign(Tmp.getValueType())); if (i32Stack) { MachineFunction &MF = DAG.getMachineFunction(); Alignment = Align(4); MachineMemOperand *MMO = MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment); - SDValue Ops[] = { Chain, Tmp, FIPtr }; + SDValue Ops[] = { Chain, Tmp, FIPtr }; Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl, DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO); } else - Chain = DAG.getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment); + Chain = DAG.getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment); // Result is a load from the stack slot. If loading 4 bytes, make sure to // add in a bias on big endian. @@ -7663,100 +7663,100 @@ void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI, SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op, SelectionDAG &DAG, const SDLoc &dl) const { - SDValue Conv = convertFPToInt(Op, DAG, Subtarget); - SDValue Mov = DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), Conv); - if (Op->isStrictFPOpcode()) - return DAG.getMergeValues({Mov, Conv.getValue(1)}, dl); - else - return Mov; + SDValue Conv = convertFPToInt(Op, DAG, Subtarget); + SDValue Mov = DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), Conv); + if (Op->isStrictFPOpcode()) + return DAG.getMergeValues({Mov, Conv.getValue(1)}, dl); + else + return Mov; } SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, const SDLoc &dl) const { - bool IsStrict = Op->isStrictFPOpcode(); - bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT || - Op.getOpcode() == ISD::STRICT_FP_TO_SINT; - SDValue Src = Op.getOperand(IsStrict ? 1 : 0); - EVT SrcVT = Src.getValueType(); - EVT DstVT = Op.getValueType(); + bool IsStrict = Op->isStrictFPOpcode(); + bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT || + Op.getOpcode() == ISD::STRICT_FP_TO_SINT; + SDValue Src = Op.getOperand(IsStrict ? 1 : 0); + EVT SrcVT = Src.getValueType(); + EVT DstVT = Op.getValueType(); // FP to INT conversions are legal for f128. - if (SrcVT == MVT::f128) - return Subtarget.hasP9Vector() ? Op : SDValue(); + if (SrcVT == MVT::f128) + return Subtarget.hasP9Vector() ? Op : SDValue(); // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on // PPC (the libcall is not available). - if (SrcVT == MVT::ppcf128) { - if (DstVT == MVT::i32) { - // TODO: Conservatively pass only nofpexcept flag here. Need to check and - // set other fast-math flags to FP operations in both strict and - // non-strict cases. (FP_TO_SINT, FSUB) - SDNodeFlags Flags; - Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept()); - - if (IsSigned) { - SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Src, + if (SrcVT == MVT::ppcf128) { + if (DstVT == MVT::i32) { + // TODO: Conservatively pass only nofpexcept flag here. Need to check and + // set other fast-math flags to FP operations in both strict and + // non-strict cases. (FP_TO_SINT, FSUB) + SDNodeFlags Flags; + Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept()); + + if (IsSigned) { + SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Src, DAG.getIntPtrConstant(0, dl)); - SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Src, + SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Src, DAG.getIntPtrConstant(1, dl)); - // Add the two halves of the long double in round-to-zero mode, and use - // a smaller FP_TO_SINT. - if (IsStrict) { - SDValue Res = DAG.getNode(PPCISD::STRICT_FADDRTZ, dl, - DAG.getVTList(MVT::f64, MVT::Other), - {Op.getOperand(0), Lo, Hi}, Flags); - return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, - DAG.getVTList(MVT::i32, MVT::Other), - {Res.getValue(1), Res}, Flags); - } else { - SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi); - return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res); - } - } else { + // Add the two halves of the long double in round-to-zero mode, and use + // a smaller FP_TO_SINT. + if (IsStrict) { + SDValue Res = DAG.getNode(PPCISD::STRICT_FADDRTZ, dl, + DAG.getVTList(MVT::f64, MVT::Other), + {Op.getOperand(0), Lo, Hi}, Flags); + return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, + DAG.getVTList(MVT::i32, MVT::Other), + {Res.getValue(1), Res}, Flags); + } else { + SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi); + return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res); + } + } else { const uint64_t TwoE31[] = {0x41e0000000000000LL, 0}; APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31)); - SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT); - SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT); - if (IsStrict) { - // Sel = Src < 0x80000000 - // FltOfs = select Sel, 0.0, 0x80000000 - // IntOfs = select Sel, 0, 0x80000000 - // Result = fp_to_sint(Src - FltOfs) ^ IntOfs - SDValue Chain = Op.getOperand(0); - EVT SetCCVT = - getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT); - EVT DstSetCCVT = - getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT); - SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT, - Chain, true); - Chain = Sel.getValue(1); - - SDValue FltOfs = DAG.getSelect( - dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst); - Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT); - - SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, - DAG.getVTList(SrcVT, MVT::Other), - {Chain, Src, FltOfs}, Flags); - Chain = Val.getValue(1); - SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, - DAG.getVTList(DstVT, MVT::Other), - {Chain, Val}, Flags); - Chain = SInt.getValue(1); - SDValue IntOfs = DAG.getSelect( - dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask); - SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs); - return DAG.getMergeValues({Result, Chain}, dl); - } else { - // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X - // FIXME: generated code sucks. - SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst); - True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True); - True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask); - SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src); - return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE); - } + SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT); + SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT); + if (IsStrict) { + // Sel = Src < 0x80000000 + // FltOfs = select Sel, 0.0, 0x80000000 + // IntOfs = select Sel, 0, 0x80000000 + // Result = fp_to_sint(Src - FltOfs) ^ IntOfs + SDValue Chain = Op.getOperand(0); + EVT SetCCVT = + getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT); + EVT DstSetCCVT = + getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT); + SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT, + Chain, true); + Chain = Sel.getValue(1); + + SDValue FltOfs = DAG.getSelect( + dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst); + Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT); + + SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, + DAG.getVTList(SrcVT, MVT::Other), + {Chain, Src, FltOfs}, Flags); + Chain = Val.getValue(1); + SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, + DAG.getVTList(DstVT, MVT::Other), + {Chain, Val}, Flags); + Chain = SInt.getValue(1); + SDValue IntOfs = DAG.getSelect( + dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask); + SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs); + return DAG.getMergeValues({Result, Chain}, dl); + } else { + // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X + // FIXME: generated code sucks. + SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst); + True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True); + True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask); + SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src); + return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE); + } } } @@ -7785,10 +7785,10 @@ bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT, ReuseLoadInfo &RLI, SelectionDAG &DAG, ISD::LoadExtType ET) const { - // Conservatively skip reusing for constrained FP nodes. - if (Op->isStrictFPOpcode()) - return false; - + // Conservatively skip reusing for constrained FP nodes. + if (Op->isStrictFPOpcode()) + return false; + SDLoc dl(Op); bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT && (Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32); @@ -7808,13 +7808,13 @@ bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT, if (LD->getMemoryVT() != MemVT) return false; - // If the result of the load is an illegal type, then we can't build a - // valid chain for reuse since the legalised loads and token factor node that - // ties the legalised loads together uses a different output chain then the - // illegal load. - if (!isTypeLegal(LD->getValueType(0))) - return false; - + // If the result of the load is an illegal type, then we can't build a + // valid chain for reuse since the legalised loads and token factor node that + // ties the legalised loads together uses a different output chain then the + // illegal load. + if (!isTypeLegal(LD->getValueType(0))) + return false; + RLI.Ptr = LD->getBasePtr(); if (LD->isIndexed() && !LD->getOffset().isUndef()) { assert(LD->getAddressingMode() == ISD::PRE_INC && @@ -7879,41 +7879,41 @@ bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const { continue; if (UI->getOpcode() != ISD::SINT_TO_FP && - UI->getOpcode() != ISD::UINT_TO_FP && - UI->getOpcode() != ISD::STRICT_SINT_TO_FP && - UI->getOpcode() != ISD::STRICT_UINT_TO_FP) + UI->getOpcode() != ISD::UINT_TO_FP && + UI->getOpcode() != ISD::STRICT_SINT_TO_FP && + UI->getOpcode() != ISD::STRICT_UINT_TO_FP) return true; } return false; } -static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG, - const PPCSubtarget &Subtarget, - SDValue Chain = SDValue()) { - bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP || - Op.getOpcode() == ISD::STRICT_SINT_TO_FP; - SDLoc dl(Op); - - // TODO: Any other flags to propagate? - SDNodeFlags Flags; - Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept()); - - // If we have FCFIDS, then use it when converting to single-precision. - // Otherwise, convert to double-precision and then round. - bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT(); - unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS) - : (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU); - EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64; - if (Op->isStrictFPOpcode()) { - if (!Chain) - Chain = Op.getOperand(0); - return DAG.getNode(getPPCStrictOpcode(ConvOpc), dl, - DAG.getVTList(ConvTy, MVT::Other), {Chain, Src}, Flags); - } else - return DAG.getNode(ConvOpc, dl, ConvTy, Src); -} - +static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG, + const PPCSubtarget &Subtarget, + SDValue Chain = SDValue()) { + bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP || + Op.getOpcode() == ISD::STRICT_SINT_TO_FP; + SDLoc dl(Op); + + // TODO: Any other flags to propagate? + SDNodeFlags Flags; + Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept()); + + // If we have FCFIDS, then use it when converting to single-precision. + // Otherwise, convert to double-precision and then round. + bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT(); + unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS) + : (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU); + EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64; + if (Op->isStrictFPOpcode()) { + if (!Chain) + Chain = Op.getOperand(0); + return DAG.getNode(getPPCStrictOpcode(ConvOpc), dl, + DAG.getVTList(ConvTy, MVT::Other), {Chain, Src}, Flags); + } else + return DAG.getNode(ConvOpc, dl, ConvTy, Src); +} + /// Custom lowers integer to floating point conversions to use /// the direct move instructions available in ISA 2.07 to avoid the /// need for load/store combinations. @@ -7925,13 +7925,13 @@ SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op, "Invalid floating point type as target of conversion"); assert(Subtarget.hasFPCVT() && "Int to FP conversions with direct moves require FPCVT"); - SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0); + SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0); bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32; - bool Signed = Op.getOpcode() == ISD::SINT_TO_FP || - Op.getOpcode() == ISD::STRICT_SINT_TO_FP; - unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA; - SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src); - return convertIntToFP(Op, Mov, DAG, Subtarget); + bool Signed = Op.getOpcode() == ISD::SINT_TO_FP || + Op.getOpcode() == ISD::STRICT_SINT_TO_FP; + unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA; + SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src); + return convertIntToFP(Op, Mov, DAG, Subtarget); } static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) { @@ -7956,23 +7956,23 @@ static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) { SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG, const SDLoc &dl) const { - bool IsStrict = Op->isStrictFPOpcode(); + bool IsStrict = Op->isStrictFPOpcode(); unsigned Opc = Op.getOpcode(); - SDValue Src = Op.getOperand(IsStrict ? 1 : 0); - assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP || - Opc == ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) && + SDValue Src = Op.getOperand(IsStrict ? 1 : 0); + assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP || + Opc == ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) && "Unexpected conversion type"); assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) && "Supports conversions to v2f64/v4f32 only."); - // TODO: Any other flags to propagate? - SDNodeFlags Flags; - Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept()); - - bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP; + // TODO: Any other flags to propagate? + SDNodeFlags Flags; + Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept()); + + bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP; bool FourEltRes = Op.getValueType() == MVT::v4f32; - SDValue Wide = widenVec(DAG, Src, dl); + SDValue Wide = widenVec(DAG, Src, dl); EVT WideVT = Wide.getValueType(); unsigned WideNumElts = WideVT.getVectorNumElements(); MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64; @@ -7997,7 +7997,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG, SDValue Extend; if (SignedConv) { Arrange = DAG.getBitcast(IntermediateVT, Arrange); - EVT ExtVT = Src.getValueType(); + EVT ExtVT = Src.getValueType(); if (Subtarget.hasP9Altivec()) ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(), IntermediateVT.getVectorNumElements()); @@ -8007,27 +8007,27 @@ SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG, } else Extend = DAG.getNode(ISD::BITCAST, dl, IntermediateVT, Arrange); - if (IsStrict) - return DAG.getNode(Opc, dl, DAG.getVTList(Op.getValueType(), MVT::Other), - {Op.getOperand(0), Extend}, Flags); - + if (IsStrict) + return DAG.getNode(Opc, dl, DAG.getVTList(Op.getValueType(), MVT::Other), + {Op.getOperand(0), Extend}, Flags); + return DAG.getNode(Opc, dl, Op.getValueType(), Extend); } SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); - bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP || - Op.getOpcode() == ISD::STRICT_SINT_TO_FP; - bool IsStrict = Op->isStrictFPOpcode(); - SDValue Src = Op.getOperand(IsStrict ? 1 : 0); - SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode(); - - // TODO: Any other flags to propagate? - SDNodeFlags Flags; - Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept()); - - EVT InVT = Src.getValueType(); + bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP || + Op.getOpcode() == ISD::STRICT_SINT_TO_FP; + bool IsStrict = Op->isStrictFPOpcode(); + SDValue Src = Op.getOperand(IsStrict ? 1 : 0); + SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode(); + + // TODO: Any other flags to propagate? + SDNodeFlags Flags; + Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept()); + + EVT InVT = Src.getValueType(); EVT OutVT = Op.getValueType(); if (OutVT.isVector() && OutVT.isFloatingPoint() && isOperationCustom(Op.getOpcode(), InVT)) @@ -8035,21 +8035,21 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, // Conversions to f128 are legal. if (Op.getValueType() == MVT::f128) - return Subtarget.hasP9Vector() ? Op : SDValue(); + return Subtarget.hasP9Vector() ? Op : SDValue(); // Don't handle ppc_fp128 here; let it be lowered to a libcall. if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) return SDValue(); - if (Src.getValueType() == MVT::i1) { - SDValue Sel = DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src, - DAG.getConstantFP(1.0, dl, Op.getValueType()), - DAG.getConstantFP(0.0, dl, Op.getValueType())); - if (IsStrict) - return DAG.getMergeValues({Sel, Chain}, dl); - else - return Sel; - } + if (Src.getValueType() == MVT::i1) { + SDValue Sel = DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src, + DAG.getConstantFP(1.0, dl, Op.getValueType()), + DAG.getConstantFP(0.0, dl, Op.getValueType())); + if (IsStrict) + return DAG.getMergeValues({Sel, Chain}, dl); + else + return Sel; + } // If we have direct moves, we can do all the conversion, skip the store/load // however, without FPCVT we can't do most conversions. @@ -8057,11 +8057,11 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, Subtarget.isPPC64() && Subtarget.hasFPCVT()) return LowerINT_TO_FPDirectMove(Op, DAG, dl); - assert((IsSigned || Subtarget.hasFPCVT()) && + assert((IsSigned || Subtarget.hasFPCVT()) && "UINT_TO_FP is supported only with FPCVT"); - if (Src.getValueType() == MVT::i64) { - SDValue SINT = Src; + if (Src.getValueType() == MVT::i64) { + SDValue SINT = Src; // When converting to single-precision, we actually need to convert // to double-precision first and then round to single-precision. // To avoid double-rounding effects during that operation, we have @@ -8149,16 +8149,16 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, int FrameIdx = MFI.CreateStackObject(4, Align(4), false); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); - SDValue Store = DAG.getStore(Chain, dl, SINT.getOperand(0), FIdx, - MachinePointerInfo::getFixedStack( - DAG.getMachineFunction(), FrameIdx)); - Chain = Store; + SDValue Store = DAG.getStore(Chain, dl, SINT.getOperand(0), FIdx, + MachinePointerInfo::getFixedStack( + DAG.getMachineFunction(), FrameIdx)); + Chain = Store; assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && "Expected an i32 store"); RLI.Ptr = FIdx; - RLI.Chain = Chain; + RLI.Chain = Chain; RLI.MPI = MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); RLI.Alignment = Align(4); @@ -8171,27 +8171,27 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, PPCISD::LFIWZX : PPCISD::LFIWAX, dl, DAG.getVTList(MVT::f64, MVT::Other), Ops, MVT::i32, MMO); - Chain = Bits.getValue(1); + Chain = Bits.getValue(1); } else Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT); - SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget, Chain); - if (IsStrict) - Chain = FP.getValue(1); - - if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) { - if (IsStrict) - FP = DAG.getNode(ISD::STRICT_FP_ROUND, dl, - DAG.getVTList(MVT::f32, MVT::Other), - {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags); - else - FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, - DAG.getIntPtrConstant(0, dl)); - } + SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget, Chain); + if (IsStrict) + Chain = FP.getValue(1); + + if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) { + if (IsStrict) + FP = DAG.getNode(ISD::STRICT_FP_ROUND, dl, + DAG.getVTList(MVT::f32, MVT::Other), + {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags); + else + FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, + DAG.getIntPtrConstant(0, dl)); + } return FP; } - assert(Src.getValueType() == MVT::i32 && + assert(Src.getValueType() == MVT::i32 && "Unhandled INT_TO_FP type in custom expander!"); // Since we only generate this in 64-bit mode, we can take advantage of // 64-bit registers. In particular, sign extend the input value into the @@ -8205,20 +8205,20 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) { ReuseLoadInfo RLI; bool ReusingLoad; - if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) { + if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) { int FrameIdx = MFI.CreateStackObject(4, Align(4), false); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); - SDValue Store = DAG.getStore(Chain, dl, Src, FIdx, - MachinePointerInfo::getFixedStack( - DAG.getMachineFunction(), FrameIdx)); - Chain = Store; + SDValue Store = DAG.getStore(Chain, dl, Src, FIdx, + MachinePointerInfo::getFixedStack( + DAG.getMachineFunction(), FrameIdx)); + Chain = Store; assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && "Expected an i32 store"); RLI.Ptr = FIdx; - RLI.Chain = Chain; + RLI.Chain = Chain; RLI.MPI = MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); RLI.Alignment = Align(4); @@ -8228,10 +8228,10 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, RLI.Alignment, RLI.AAInfo, RLI.Ranges); SDValue Ops[] = { RLI.Chain, RLI.Ptr }; - Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl, - DAG.getVTList(MVT::f64, MVT::Other), Ops, - MVT::i32, MMO); - Chain = Ld.getValue(1); + Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl, + DAG.getVTList(MVT::f64, MVT::Other), Ops, + MVT::i32, MMO); + Chain = Ld.getValue(1); if (ReusingLoad) spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG); } else { @@ -8241,34 +8241,34 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, int FrameIdx = MFI.CreateStackObject(8, Align(8), false); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); - SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Src); + SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Src); // STD the extended value into the stack slot. SDValue Store = DAG.getStore( - Chain, dl, Ext64, FIdx, + Chain, dl, Ext64, FIdx, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx)); - Chain = Store; + Chain = Store; // Load the value as a double. Ld = DAG.getLoad( - MVT::f64, dl, Chain, FIdx, + MVT::f64, dl, Chain, FIdx, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx)); - Chain = Ld.getValue(1); + Chain = Ld.getValue(1); } // FCFID it and return it. - SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget, Chain); - if (IsStrict) - Chain = FP.getValue(1); - if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) { - if (IsStrict) - FP = DAG.getNode(ISD::STRICT_FP_ROUND, dl, - DAG.getVTList(MVT::f32, MVT::Other), - {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags); - else - FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, - DAG.getIntPtrConstant(0, dl)); - } + SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget, Chain); + if (IsStrict) + Chain = FP.getValue(1); + if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) { + if (IsStrict) + FP = DAG.getNode(ISD::STRICT_FP_ROUND, dl, + DAG.getVTList(MVT::f32, MVT::Other), + {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags); + else + FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, + DAG.getIntPtrConstant(0, dl)); + } return FP; } @@ -8303,24 +8303,24 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain); Chain = MFFS.getValue(1); - SDValue CWD; - if (isTypeLegal(MVT::i64)) { - CWD = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, - DAG.getNode(ISD::BITCAST, dl, MVT::i64, MFFS)); - } else { - // Save FP register to stack slot - int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false); - SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); - Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo()); - - // Load FP Control Word from low 32 bits of stack slot. - assert(hasBigEndianPartOrdering(MVT::i64, MF.getDataLayout()) && - "Stack slot adjustment is valid only on big endian subtargets!"); - SDValue Four = DAG.getConstant(4, dl, PtrVT); - SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four); - CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo()); - Chain = CWD.getValue(1); - } + SDValue CWD; + if (isTypeLegal(MVT::i64)) { + CWD = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, + DAG.getNode(ISD::BITCAST, dl, MVT::i64, MFFS)); + } else { + // Save FP register to stack slot + int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false); + SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); + Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo()); + + // Load FP Control Word from low 32 bits of stack slot. + assert(hasBigEndianPartOrdering(MVT::i64, MF.getDataLayout()) && + "Stack slot adjustment is valid only on big endian subtargets!"); + SDValue Four = DAG.getConstant(4, dl, PtrVT); + SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four); + CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo()); + Chain = CWD.getValue(1); + } // Transform as necessary SDValue CWD1 = @@ -8431,31 +8431,31 @@ SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const { return DAG.getMergeValues(OutOps, dl); } -SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op, - SelectionDAG &DAG) const { - SDLoc dl(Op); - EVT VT = Op.getValueType(); - unsigned BitWidth = VT.getSizeInBits(); - - bool IsFSHL = Op.getOpcode() == ISD::FSHL; - SDValue X = Op.getOperand(0); - SDValue Y = Op.getOperand(1); - SDValue Z = Op.getOperand(2); - EVT AmtVT = Z.getValueType(); - - // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW))) - // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW)) - // This is simpler than TargetLowering::expandFunnelShift because we can rely - // on PowerPC shift by BW being well defined. - Z = DAG.getNode(ISD::AND, dl, AmtVT, Z, - DAG.getConstant(BitWidth - 1, dl, AmtVT)); - SDValue SubZ = - DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Z); - X = DAG.getNode(PPCISD::SHL, dl, VT, X, IsFSHL ? Z : SubZ); - Y = DAG.getNode(PPCISD::SRL, dl, VT, Y, IsFSHL ? SubZ : Z); - return DAG.getNode(ISD::OR, dl, VT, X, Y); -} - +SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); + EVT VT = Op.getValueType(); + unsigned BitWidth = VT.getSizeInBits(); + + bool IsFSHL = Op.getOpcode() == ISD::FSHL; + SDValue X = Op.getOperand(0); + SDValue Y = Op.getOperand(1); + SDValue Z = Op.getOperand(2); + EVT AmtVT = Z.getValueType(); + + // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW))) + // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW)) + // This is simpler than TargetLowering::expandFunnelShift because we can rely + // on PowerPC shift by BW being well defined. + Z = DAG.getNode(ISD::AND, dl, AmtVT, Z, + DAG.getConstant(BitWidth - 1, dl, AmtVT)); + SDValue SubZ = + DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Z); + X = DAG.getNode(PPCISD::SHL, dl, VT, X, IsFSHL ? Z : SubZ); + Y = DAG.getNode(PPCISD::SRL, dl, VT, Y, IsFSHL ? SubZ : Z); + return DAG.getNode(ISD::OR, dl, VT, X, Y); +} + //===----------------------------------------------------------------------===// // Vector related lowering. // @@ -8471,7 +8471,7 @@ static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT, EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1]; // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize. - if (Val == ((1LLU << (SplatSize * 8)) - 1)) { + if (Val == ((1LLU << (SplatSize * 8)) - 1)) { SplatSize = 1; Val = 0xFF; } @@ -8660,44 +8660,44 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, // If it is a splat of a double, check if we can shrink it to a 32 bit // non-denormal float which when converted back to double gives us the same // double. This is to exploit the XXSPLTIDP instruction. - // If we lose precision, we use XXSPLTI32DX. - if (BVNIsConstantSplat && (SplatBitSize == 64) && - Subtarget.hasPrefixInstrs()) { - // Check the type first to short-circuit so we don't modify APSplatBits if - // this block isn't executed. - if ((Op->getValueType(0) == MVT::v2f64) && - convertToNonDenormSingle(APSplatBits)) { - SDValue SplatNode = DAG.getNode( - PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64, - DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32)); - return DAG.getBitcast(Op.getValueType(), SplatNode); - } else { - // We may lose precision, so we have to use XXSPLTI32DX. - - uint32_t Hi = - (uint32_t)((APSplatBits.getZExtValue() & 0xFFFFFFFF00000000LL) >> 32); - uint32_t Lo = - (uint32_t)(APSplatBits.getZExtValue() & 0xFFFFFFFF); - SDValue SplatNode = DAG.getUNDEF(MVT::v2i64); - - if (!Hi || !Lo) - // If either load is 0, then we should generate XXLXOR to set to 0. - SplatNode = DAG.getTargetConstant(0, dl, MVT::v2i64); - - if (Hi) - SplatNode = DAG.getNode( - PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode, - DAG.getTargetConstant(0, dl, MVT::i32), - DAG.getTargetConstant(Hi, dl, MVT::i32)); - - if (Lo) - SplatNode = - DAG.getNode(PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode, - DAG.getTargetConstant(1, dl, MVT::i32), - DAG.getTargetConstant(Lo, dl, MVT::i32)); - - return DAG.getBitcast(Op.getValueType(), SplatNode); - } + // If we lose precision, we use XXSPLTI32DX. + if (BVNIsConstantSplat && (SplatBitSize == 64) && + Subtarget.hasPrefixInstrs()) { + // Check the type first to short-circuit so we don't modify APSplatBits if + // this block isn't executed. + if ((Op->getValueType(0) == MVT::v2f64) && + convertToNonDenormSingle(APSplatBits)) { + SDValue SplatNode = DAG.getNode( + PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64, + DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32)); + return DAG.getBitcast(Op.getValueType(), SplatNode); + } else { + // We may lose precision, so we have to use XXSPLTI32DX. + + uint32_t Hi = + (uint32_t)((APSplatBits.getZExtValue() & 0xFFFFFFFF00000000LL) >> 32); + uint32_t Lo = + (uint32_t)(APSplatBits.getZExtValue() & 0xFFFFFFFF); + SDValue SplatNode = DAG.getUNDEF(MVT::v2i64); + + if (!Hi || !Lo) + // If either load is 0, then we should generate XXLXOR to set to 0. + SplatNode = DAG.getTargetConstant(0, dl, MVT::v2i64); + + if (Hi) + SplatNode = DAG.getNode( + PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode, + DAG.getTargetConstant(0, dl, MVT::i32), + DAG.getTargetConstant(Hi, dl, MVT::i32)); + + if (Lo) + SplatNode = + DAG.getNode(PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode, + DAG.getTargetConstant(1, dl, MVT::i32), + DAG.getTargetConstant(Lo, dl, MVT::i32)); + + return DAG.getBitcast(Op.getValueType(), SplatNode); + } } if (!BVNIsConstantSplat || SplatBitSize > 32) { @@ -8716,12 +8716,12 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, // Checking for a single use of this load, we have to check for vector // width (128 bits) / ElementSize uses (since each operand of the // BUILD_VECTOR is a separate use of the value. - unsigned NumUsesOfInputLD = 128 / ElementSize; - for (SDValue BVInOp : Op->ops()) - if (BVInOp.isUndef()) - NumUsesOfInputLD--; - assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?"); - if (InputLoad->getNode()->hasNUsesOfValue(NumUsesOfInputLD, 0) && + unsigned NumUsesOfInputLD = 128 / ElementSize; + for (SDValue BVInOp : Op->ops()) + if (BVInOp.isUndef()) + NumUsesOfInputLD--; + assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?"); + if (InputLoad->getNode()->hasNUsesOfValue(NumUsesOfInputLD, 0) && ((Subtarget.hasVSX() && ElementSize == 64) || (Subtarget.hasP9Vector() && ElementSize == 32))) { SDValue Ops[] = { @@ -8729,21 +8729,21 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, LD->getBasePtr(), // Ptr DAG.getValueType(Op.getValueType()) // VT }; - SDValue LdSplt = DAG.getMemIntrinsicNode( - PPCISD::LD_SPLAT, dl, DAG.getVTList(Op.getValueType(), MVT::Other), - Ops, LD->getMemoryVT(), LD->getMemOperand()); - // Replace all uses of the output chain of the original load with the - // output chain of the new load. - DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1), - LdSplt.getValue(1)); - return LdSplt; + SDValue LdSplt = DAG.getMemIntrinsicNode( + PPCISD::LD_SPLAT, dl, DAG.getVTList(Op.getValueType(), MVT::Other), + Ops, LD->getMemoryVT(), LD->getMemOperand()); + // Replace all uses of the output chain of the original load with the + // output chain of the new load. + DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1), + LdSplt.getValue(1)); + return LdSplt; } } - // In 64BIT mode BUILD_VECTOR nodes that are not constant splats of up to - // 32-bits can be lowered to VSX instructions under certain conditions. + // In 64BIT mode BUILD_VECTOR nodes that are not constant splats of up to + // 32-bits can be lowered to VSX instructions under certain conditions. // Without VSX, there is no pattern more efficient than expanding the node. - if (Subtarget.hasVSX() && Subtarget.isPPC64() && + if (Subtarget.hasVSX() && Subtarget.isPPC64() && haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(), Subtarget.hasP8Vector())) return Op; @@ -8772,7 +8772,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be // turned into a 4-byte splat of 0xABABABAB. if (Subtarget.hasPrefixInstrs() && SplatSize == 2) - return getCanonicalConstSplat(SplatBits | (SplatBits << 16), SplatSize * 2, + return getCanonicalConstSplat(SplatBits | (SplatBits << 16), SplatSize * 2, Op.getValueType(), DAG, dl); if (Subtarget.hasPrefixInstrs() && SplatSize == 4) @@ -9367,7 +9367,7 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SDValue LdSplt = DAG.getMemIntrinsicNode(PPCISD::LD_SPLAT, dl, VTL, Ops, LD->getMemoryVT(), LD->getMemOperand()); - DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1), LdSplt.getValue(1)); + DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1), LdSplt.getValue(1)); if (LdSplt.getValueType() != SVOp->getValueType(0)) LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt); return LdSplt; @@ -9732,26 +9732,26 @@ static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, return false; break; - case Intrinsic::ppc_altivec_vcmpequq: - case Intrinsic::ppc_altivec_vcmpgtsq: - case Intrinsic::ppc_altivec_vcmpgtuq: - if (!Subtarget.isISA3_1()) - return false; - switch (IntrinsicID) { - default: - llvm_unreachable("Unknown comparison intrinsic."); - case Intrinsic::ppc_altivec_vcmpequq: - CompareOpc = 455; - break; - case Intrinsic::ppc_altivec_vcmpgtsq: - CompareOpc = 903; - break; - case Intrinsic::ppc_altivec_vcmpgtuq: - CompareOpc = 647; - break; - } - break; - + case Intrinsic::ppc_altivec_vcmpequq: + case Intrinsic::ppc_altivec_vcmpgtsq: + case Intrinsic::ppc_altivec_vcmpgtuq: + if (!Subtarget.isISA3_1()) + return false; + switch (IntrinsicID) { + default: + llvm_unreachable("Unknown comparison intrinsic."); + case Intrinsic::ppc_altivec_vcmpequq: + CompareOpc = 455; + break; + case Intrinsic::ppc_altivec_vcmpgtsq: + CompareOpc = 903; + break; + case Intrinsic::ppc_altivec_vcmpgtuq: + CompareOpc = 647; + break; + } + break; + // VSX predicate comparisons use the same infrastructure case Intrinsic::ppc_vsx_xvcmpeqdp_p: case Intrinsic::ppc_vsx_xvcmpgedp_p: @@ -9875,26 +9875,26 @@ static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, else return false; break; - case Intrinsic::ppc_altivec_vcmpequq_p: - case Intrinsic::ppc_altivec_vcmpgtsq_p: - case Intrinsic::ppc_altivec_vcmpgtuq_p: - if (!Subtarget.isISA3_1()) - return false; - switch (IntrinsicID) { - default: - llvm_unreachable("Unknown comparison intrinsic."); - case Intrinsic::ppc_altivec_vcmpequq_p: - CompareOpc = 455; - break; - case Intrinsic::ppc_altivec_vcmpgtsq_p: - CompareOpc = 903; - break; - case Intrinsic::ppc_altivec_vcmpgtuq_p: - CompareOpc = 647; - break; - } - isDot = true; - break; + case Intrinsic::ppc_altivec_vcmpequq_p: + case Intrinsic::ppc_altivec_vcmpgtsq_p: + case Intrinsic::ppc_altivec_vcmpgtuq_p: + if (!Subtarget.isISA3_1()) + return false; + switch (IntrinsicID) { + default: + llvm_unreachable("Unknown comparison intrinsic."); + case Intrinsic::ppc_altivec_vcmpequq_p: + CompareOpc = 455; + break; + case Intrinsic::ppc_altivec_vcmpgtsq_p: + CompareOpc = 903; + break; + case Intrinsic::ppc_altivec_vcmpgtuq_p: + CompareOpc = 647; + break; + } + isDot = true; + break; } return true; } @@ -9908,33 +9908,33 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SDLoc dl(Op); - switch (IntrinsicID) { - case Intrinsic::thread_pointer: + switch (IntrinsicID) { + case Intrinsic::thread_pointer: // Reads the thread pointer register, used for __builtin_thread_pointer. if (Subtarget.isPPC64()) return DAG.getRegister(PPC::X13, MVT::i64); return DAG.getRegister(PPC::R2, MVT::i32); - - case Intrinsic::ppc_mma_disassemble_acc: - case Intrinsic::ppc_vsx_disassemble_pair: { - int NumVecs = 2; - SDValue WideVec = Op.getOperand(1); - if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) { - NumVecs = 4; - WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec); - } - SmallVector<SDValue, 4> RetOps; - for (int VecNo = 0; VecNo < NumVecs; VecNo++) { - SDValue Extract = DAG.getNode( - PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec, - DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo - : VecNo, - dl, MVT::i64)); - RetOps.push_back(Extract); - } - return DAG.getMergeValues(RetOps, dl); - } - } + + case Intrinsic::ppc_mma_disassemble_acc: + case Intrinsic::ppc_vsx_disassemble_pair: { + int NumVecs = 2; + SDValue WideVec = Op.getOperand(1); + if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) { + NumVecs = 4; + WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec); + } + SmallVector<SDValue, 4> RetOps; + for (int VecNo = 0; VecNo < NumVecs; VecNo++) { + SDValue Extract = DAG.getNode( + PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec, + DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo + : VecNo, + dl, MVT::i64)); + RetOps.push_back(Extract); + } + return DAG.getMergeValues(RetOps, dl); + } + } // If this is a lowered altivec predicate compare, CompareOpc is set to the // opcode number of the comparison. @@ -9958,7 +9958,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, DAG.getConstant(CompareOpc, dl, MVT::i32) }; EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue }; - SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops); + SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops); // Now that we have the comparison, emit a copy from the CR to a GPR. // This is flagged to the above dot comparison. @@ -10125,43 +10125,43 @@ SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op, LoadSDNode *LN = cast<LoadSDNode>(Op.getNode()); SDValue LoadChain = LN->getChain(); SDValue BasePtr = LN->getBasePtr(); - EVT VT = Op.getValueType(); - - if (VT != MVT::v256i1 && VT != MVT::v512i1) - return Op; - - // Type v256i1 is used for pairs and v512i1 is used for accumulators. - // Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in - // 2 or 4 vsx registers. - assert((VT != MVT::v512i1 || Subtarget.hasMMA()) && - "Type unsupported without MMA"); - assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) && - "Type unsupported without paired vector support"); - Align Alignment = LN->getAlign(); - SmallVector<SDValue, 4> Loads; - SmallVector<SDValue, 4> LoadChains; - unsigned NumVecs = VT.getSizeInBits() / 128; - for (unsigned Idx = 0; Idx < NumVecs; ++Idx) { - SDValue Load = - DAG.getLoad(MVT::v16i8, dl, LoadChain, BasePtr, - LN->getPointerInfo().getWithOffset(Idx * 16), - commonAlignment(Alignment, Idx * 16), - LN->getMemOperand()->getFlags(), LN->getAAInfo()); - BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, - DAG.getConstant(16, dl, BasePtr.getValueType())); - Loads.push_back(Load); - LoadChains.push_back(Load.getValue(1)); - } - if (Subtarget.isLittleEndian()) { - std::reverse(Loads.begin(), Loads.end()); - std::reverse(LoadChains.begin(), LoadChains.end()); - } - SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); - SDValue Value = - DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD, - dl, VT, Loads); - SDValue RetOps[] = {Value, TF}; - return DAG.getMergeValues(RetOps, dl); + EVT VT = Op.getValueType(); + + if (VT != MVT::v256i1 && VT != MVT::v512i1) + return Op; + + // Type v256i1 is used for pairs and v512i1 is used for accumulators. + // Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in + // 2 or 4 vsx registers. + assert((VT != MVT::v512i1 || Subtarget.hasMMA()) && + "Type unsupported without MMA"); + assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) && + "Type unsupported without paired vector support"); + Align Alignment = LN->getAlign(); + SmallVector<SDValue, 4> Loads; + SmallVector<SDValue, 4> LoadChains; + unsigned NumVecs = VT.getSizeInBits() / 128; + for (unsigned Idx = 0; Idx < NumVecs; ++Idx) { + SDValue Load = + DAG.getLoad(MVT::v16i8, dl, LoadChain, BasePtr, + LN->getPointerInfo().getWithOffset(Idx * 16), + commonAlignment(Alignment, Idx * 16), + LN->getMemOperand()->getFlags(), LN->getAAInfo()); + BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, + DAG.getConstant(16, dl, BasePtr.getValueType())); + Loads.push_back(Load); + LoadChains.push_back(Load.getValue(1)); + } + if (Subtarget.isLittleEndian()) { + std::reverse(Loads.begin(), Loads.end()); + std::reverse(LoadChains.begin(), LoadChains.end()); + } + SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); + SDValue Value = + DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD, + dl, VT, Loads); + SDValue RetOps[] = {Value, TF}; + return DAG.getMergeValues(RetOps, dl); } SDValue PPCTargetLowering::LowerVectorStore(SDValue Op, @@ -10171,40 +10171,40 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op, SDValue StoreChain = SN->getChain(); SDValue BasePtr = SN->getBasePtr(); SDValue Value = SN->getValue(); - EVT StoreVT = Value.getValueType(); - - if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1) - return Op; - - // Type v256i1 is used for pairs and v512i1 is used for accumulators. - // Here we create 2 or 4 v16i8 stores to store the pair or accumulator - // underlying registers individually. - assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) && - "Type unsupported without MMA"); - assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) && - "Type unsupported without paired vector support"); - Align Alignment = SN->getAlign(); - SmallVector<SDValue, 4> Stores; - unsigned NumVecs = 2; - if (StoreVT == MVT::v512i1) { - Value = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, Value); - NumVecs = 4; - } - for (unsigned Idx = 0; Idx < NumVecs; ++Idx) { - unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx; - SDValue Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, Value, - DAG.getConstant(VecNum, dl, MVT::i64)); - SDValue Store = - DAG.getStore(StoreChain, dl, Elt, BasePtr, - SN->getPointerInfo().getWithOffset(Idx * 16), - commonAlignment(Alignment, Idx * 16), - SN->getMemOperand()->getFlags(), SN->getAAInfo()); - BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, - DAG.getConstant(16, dl, BasePtr.getValueType())); - Stores.push_back(Store); - } - SDValue TF = DAG.getTokenFactor(dl, Stores); - return TF; + EVT StoreVT = Value.getValueType(); + + if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1) + return Op; + + // Type v256i1 is used for pairs and v512i1 is used for accumulators. + // Here we create 2 or 4 v16i8 stores to store the pair or accumulator + // underlying registers individually. + assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) && + "Type unsupported without MMA"); + assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) && + "Type unsupported without paired vector support"); + Align Alignment = SN->getAlign(); + SmallVector<SDValue, 4> Stores; + unsigned NumVecs = 2; + if (StoreVT == MVT::v512i1) { + Value = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, Value); + NumVecs = 4; + } + for (unsigned Idx = 0; Idx < NumVecs; ++Idx) { + unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx; + SDValue Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, Value, + DAG.getConstant(VecNum, dl, MVT::i64)); + SDValue Store = + DAG.getStore(StoreChain, dl, Elt, BasePtr, + SN->getPointerInfo().getWithOffset(Idx * 16), + commonAlignment(Alignment, Idx * 16), + SN->getMemOperand()->getFlags(), SN->getAAInfo()); + BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, + DAG.getConstant(16, dl, BasePtr.getValueType())); + Stores.push_back(Store); + } + SDValue TF = DAG.getTokenFactor(dl, Stores); + return TF; } SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { @@ -10271,13 +10271,13 @@ SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { } } -SDValue PPCTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { - bool IsStrict = Op->isStrictFPOpcode(); - if (Op.getOperand(IsStrict ? 1 : 0).getValueType() == MVT::f128 && - !Subtarget.hasP9Vector()) - return SDValue(); +SDValue PPCTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { + bool IsStrict = Op->isStrictFPOpcode(); + if (Op.getOperand(IsStrict ? 1 : 0).getValueType() == MVT::f128 && + !Subtarget.hasP9Vector()) + return SDValue(); - return Op; + return Op; } // Custom lowering for fpext vf32 to v2f64 @@ -10371,8 +10371,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); - case ISD::INLINEASM: - case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG); + case ISD::INLINEASM: + case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG); // Variable argument lowering. case ISD::VASTART: return LowerVASTART(Op, DAG); case ISD::VAARG: return LowerVAARG(Op, DAG); @@ -10392,12 +10392,12 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::STORE: return LowerSTORE(Op, DAG); case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); - case ISD::STRICT_FP_TO_UINT: - case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: + case ISD::STRICT_FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op)); - case ISD::STRICT_UINT_TO_FP: - case ISD::STRICT_SINT_TO_FP: + case ISD::STRICT_UINT_TO_FP: + case ISD::STRICT_SINT_TO_FP: case ISD::UINT_TO_FP: case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG); case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); @@ -10407,9 +10407,9 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG); case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG); - case ISD::FSHL: return LowerFunnelShift(Op, DAG); - case ISD::FSHR: return LowerFunnelShift(Op, DAG); - + case ISD::FSHL: return LowerFunnelShift(Op, DAG); + case ISD::FSHR: return LowerFunnelShift(Op, DAG); + // Vector-related lowering. case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); @@ -10418,9 +10418,9 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); case ISD::MUL: return LowerMUL(Op, DAG); case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG); - case ISD::STRICT_FP_ROUND: - case ISD::FP_ROUND: - return LowerFP_ROUND(Op, DAG); + case ISD::STRICT_FP_ROUND: + case ISD::FP_ROUND: + return LowerFP_ROUND(Op, DAG); case ISD::ROTL: return LowerROTL(Op, DAG); // For counter-based loop handling. @@ -10488,28 +10488,28 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N, } return; } - case ISD::STRICT_FP_TO_SINT: - case ISD::STRICT_FP_TO_UINT: + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: // LowerFP_TO_INT() can only handle f32 and f64. - if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() == - MVT::ppcf128) + if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() == + MVT::ppcf128) return; Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl)); return; case ISD::TRUNCATE: { - if (!N->getValueType(0).isVector()) - return; - SDValue Lowered = LowerTRUNCATEVector(SDValue(N, 0), DAG); - if (Lowered) - Results.push_back(Lowered); + if (!N->getValueType(0).isVector()) + return; + SDValue Lowered = LowerTRUNCATEVector(SDValue(N, 0), DAG); + if (Lowered) + Results.push_back(Lowered); return; } - case ISD::FSHL: - case ISD::FSHR: - // Don't handle funnel shifts here. - return; + case ISD::FSHL: + case ISD::FSHR: + // Don't handle funnel shifts here. + return; case ISD::BITCAST: // Don't handle bitcast here. return; @@ -10681,83 +10681,83 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB, return BB; } -static bool isSignExtended(MachineInstr &MI, const PPCInstrInfo *TII) { - switch(MI.getOpcode()) { - default: - return false; - case PPC::COPY: - return TII->isSignExtended(MI); - case PPC::LHA: - case PPC::LHA8: - case PPC::LHAU: - case PPC::LHAU8: - case PPC::LHAUX: - case PPC::LHAUX8: - case PPC::LHAX: - case PPC::LHAX8: - case PPC::LWA: - case PPC::LWAUX: - case PPC::LWAX: - case PPC::LWAX_32: - case PPC::LWA_32: - case PPC::PLHA: - case PPC::PLHA8: - case PPC::PLHA8pc: - case PPC::PLHApc: - case PPC::PLWA: - case PPC::PLWA8: - case PPC::PLWA8pc: - case PPC::PLWApc: - case PPC::EXTSB: - case PPC::EXTSB8: - case PPC::EXTSB8_32_64: - case PPC::EXTSB8_rec: - case PPC::EXTSB_rec: - case PPC::EXTSH: - case PPC::EXTSH8: - case PPC::EXTSH8_32_64: - case PPC::EXTSH8_rec: - case PPC::EXTSH_rec: - case PPC::EXTSW: - case PPC::EXTSWSLI: - case PPC::EXTSWSLI_32_64: - case PPC::EXTSWSLI_32_64_rec: - case PPC::EXTSWSLI_rec: - case PPC::EXTSW_32: - case PPC::EXTSW_32_64: - case PPC::EXTSW_32_64_rec: - case PPC::EXTSW_rec: - case PPC::SRAW: - case PPC::SRAWI: - case PPC::SRAWI_rec: - case PPC::SRAW_rec: - return true; - } - return false; -} - +static bool isSignExtended(MachineInstr &MI, const PPCInstrInfo *TII) { + switch(MI.getOpcode()) { + default: + return false; + case PPC::COPY: + return TII->isSignExtended(MI); + case PPC::LHA: + case PPC::LHA8: + case PPC::LHAU: + case PPC::LHAU8: + case PPC::LHAUX: + case PPC::LHAUX8: + case PPC::LHAX: + case PPC::LHAX8: + case PPC::LWA: + case PPC::LWAUX: + case PPC::LWAX: + case PPC::LWAX_32: + case PPC::LWA_32: + case PPC::PLHA: + case PPC::PLHA8: + case PPC::PLHA8pc: + case PPC::PLHApc: + case PPC::PLWA: + case PPC::PLWA8: + case PPC::PLWA8pc: + case PPC::PLWApc: + case PPC::EXTSB: + case PPC::EXTSB8: + case PPC::EXTSB8_32_64: + case PPC::EXTSB8_rec: + case PPC::EXTSB_rec: + case PPC::EXTSH: + case PPC::EXTSH8: + case PPC::EXTSH8_32_64: + case PPC::EXTSH8_rec: + case PPC::EXTSH_rec: + case PPC::EXTSW: + case PPC::EXTSWSLI: + case PPC::EXTSWSLI_32_64: + case PPC::EXTSWSLI_32_64_rec: + case PPC::EXTSWSLI_rec: + case PPC::EXTSW_32: + case PPC::EXTSW_32_64: + case PPC::EXTSW_32_64_rec: + case PPC::EXTSW_rec: + case PPC::SRAW: + case PPC::SRAWI: + case PPC::SRAWI_rec: + case PPC::SRAW_rec: + return true; + } + return false; +} + MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary( MachineInstr &MI, MachineBasicBlock *BB, bool is8bit, // operation unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const { - // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. - const PPCInstrInfo *TII = Subtarget.getInstrInfo(); - - // If this is a signed comparison and the value being compared is not known - // to be sign extended, sign extend it here. - DebugLoc dl = MI.getDebugLoc(); - MachineFunction *F = BB->getParent(); - MachineRegisterInfo &RegInfo = F->getRegInfo(); - Register incr = MI.getOperand(3).getReg(); - bool IsSignExtended = Register::isVirtualRegister(incr) && - isSignExtended(*RegInfo.getVRegDef(incr), TII); - - if (CmpOpcode == PPC::CMPW && !IsSignExtended) { - Register ValueReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); - BuildMI(*BB, MI, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueReg) - .addReg(MI.getOperand(3).getReg()); - MI.getOperand(3).setReg(ValueReg); - } + // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. + const PPCInstrInfo *TII = Subtarget.getInstrInfo(); + + // If this is a signed comparison and the value being compared is not known + // to be sign extended, sign extend it here. + DebugLoc dl = MI.getDebugLoc(); + MachineFunction *F = BB->getParent(); + MachineRegisterInfo &RegInfo = F->getRegInfo(); + Register incr = MI.getOperand(3).getReg(); + bool IsSignExtended = Register::isVirtualRegister(incr) && + isSignExtended(*RegInfo.getVRegDef(incr), TII); + + if (CmpOpcode == PPC::CMPW && !IsSignExtended) { + Register ValueReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); + BuildMI(*BB, MI, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueReg) + .addReg(MI.getOperand(3).getReg()); + MI.getOperand(3).setReg(ValueReg); + } // If we support part-word atomic mnemonics, just use them if (Subtarget.hasPartwordAtomics()) return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode, @@ -11960,20 +11960,20 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg); // Set rounding mode to round-to-zero. - BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)) - .addImm(31) - .addReg(PPC::RM, RegState::ImplicitDefine); - - BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)) - .addImm(30) - .addReg(PPC::RM, RegState::ImplicitDefine); - + BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)) + .addImm(31) + .addReg(PPC::RM, RegState::ImplicitDefine); + + BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)) + .addImm(30) + .addReg(PPC::RM, RegState::ImplicitDefine); + // Perform addition. - auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest) - .addReg(Src1) - .addReg(Src2); - if (MI.getFlag(MachineInstr::NoFPExcept)) - MIB.setMIFlag(MachineInstr::NoFPExcept); + auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest) + .addReg(Src1) + .addReg(Src2); + if (MI.getFlag(MachineInstr::NoFPExcept)) + MIB.setMIFlag(MachineInstr::NoFPExcept); // Restore FPSCR value. BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg); @@ -12032,12 +12032,12 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, // the immediate to set the bits 62:63 of FPSCR. unsigned Mode = MI.getOperand(1).getImm(); BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0)) - .addImm(31) - .addReg(PPC::RM, RegState::ImplicitDefine); + .addImm(31) + .addReg(PPC::RM, RegState::ImplicitDefine); BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0)) - .addImm(30) - .addReg(PPC::RM, RegState::ImplicitDefine); + .addImm(30) + .addReg(PPC::RM, RegState::ImplicitDefine); } else if (MI.getOpcode() == PPC::SETRND) { DebugLoc dl = MI.getDebugLoc(); @@ -12147,20 +12147,20 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, .addReg(NewFPSCRReg) .addImm(0) .addImm(0); - } else if (MI.getOpcode() == PPC::SETFLM) { - DebugLoc Dl = MI.getDebugLoc(); - - // Result of setflm is previous FPSCR content, so we need to save it first. - Register OldFPSCRReg = MI.getOperand(0).getReg(); - BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg); - - // Put bits in 32:63 to FPSCR. - Register NewFPSCRReg = MI.getOperand(1).getReg(); - BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF)) - .addImm(255) - .addReg(NewFPSCRReg) - .addImm(0) - .addImm(0); + } else if (MI.getOpcode() == PPC::SETFLM) { + DebugLoc Dl = MI.getDebugLoc(); + + // Result of setflm is previous FPSCR content, so we need to save it first. + Register OldFPSCRReg = MI.getOperand(0).getReg(); + BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg); + + // Put bits in 32:63 to FPSCR. + Register NewFPSCRReg = MI.getOperand(1).getReg(); + BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF)) + .addImm(255) + .addReg(NewFPSCRReg) + .addImm(0) + .addImm(0); } else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 || MI.getOpcode() == PPC::PROBED_ALLOCA_64) { return emitProbedAlloca(MI, BB); @@ -12187,47 +12187,47 @@ static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) { return RefinementSteps; } -SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG, - const DenormalMode &Mode) const { - // We only have VSX Vector Test for software Square Root. - EVT VT = Op.getValueType(); - if (!isTypeLegal(MVT::i1) || - (VT != MVT::f64 && - ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX()))) - return TargetLowering::getSqrtInputTest(Op, DAG, Mode); - - SDLoc DL(Op); - // The output register of FTSQRT is CR field. - SDValue FTSQRT = DAG.getNode(PPCISD::FTSQRT, DL, MVT::i32, Op); - // ftsqrt BF,FRB - // Let e_b be the unbiased exponent of the double-precision - // floating-point operand in register FRB. - // fe_flag is set to 1 if either of the following conditions occurs. - // - The double-precision floating-point operand in register FRB is a zero, - // a NaN, or an infinity, or a negative value. - // - e_b is less than or equal to -970. - // Otherwise fe_flag is set to 0. - // Both VSX and non-VSX versions would set EQ bit in the CR if the number is - // not eligible for iteration. (zero/negative/infinity/nan or unbiased - // exponent is less than -970) - SDValue SRIdxVal = DAG.getTargetConstant(PPC::sub_eq, DL, MVT::i32); - return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i1, - FTSQRT, SRIdxVal), - 0); -} - -SDValue -PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op, - SelectionDAG &DAG) const { - // We only have VSX Vector Square Root. - EVT VT = Op.getValueType(); - if (VT != MVT::f64 && - ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX())) - return TargetLowering::getSqrtResultForDenormInput(Op, DAG); - - return DAG.getNode(PPCISD::FSQRT, SDLoc(Op), VT, Op); -} - +SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG, + const DenormalMode &Mode) const { + // We only have VSX Vector Test for software Square Root. + EVT VT = Op.getValueType(); + if (!isTypeLegal(MVT::i1) || + (VT != MVT::f64 && + ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX()))) + return TargetLowering::getSqrtInputTest(Op, DAG, Mode); + + SDLoc DL(Op); + // The output register of FTSQRT is CR field. + SDValue FTSQRT = DAG.getNode(PPCISD::FTSQRT, DL, MVT::i32, Op); + // ftsqrt BF,FRB + // Let e_b be the unbiased exponent of the double-precision + // floating-point operand in register FRB. + // fe_flag is set to 1 if either of the following conditions occurs. + // - The double-precision floating-point operand in register FRB is a zero, + // a NaN, or an infinity, or a negative value. + // - e_b is less than or equal to -970. + // Otherwise fe_flag is set to 0. + // Both VSX and non-VSX versions would set EQ bit in the CR if the number is + // not eligible for iteration. (zero/negative/infinity/nan or unbiased + // exponent is less than -970) + SDValue SRIdxVal = DAG.getTargetConstant(PPC::sub_eq, DL, MVT::i32); + return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i1, + FTSQRT, SRIdxVal), + 0); +} + +SDValue +PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op, + SelectionDAG &DAG) const { + // We only have VSX Vector Square Root. + EVT VT = Op.getValueType(); + if (VT != MVT::f64 && + ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX())) + return TargetLowering::getSqrtResultForDenormInput(Op, DAG); + + return DAG.getNode(PPCISD::FSQRT, SDLoc(Op), VT, Op); +} + SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, @@ -12236,7 +12236,7 @@ SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) || (VT == MVT::f64 && Subtarget.hasFRSQRTE()) || (VT == MVT::v4f32 && Subtarget.hasAltivec()) || - (VT == MVT::v2f64 && Subtarget.hasVSX())) { + (VT == MVT::v2f64 && Subtarget.hasVSX())) { if (RefinementSteps == ReciprocalEstimate::Unspecified) RefinementSteps = getEstimateRefinementSteps(VT, Subtarget); @@ -12255,7 +12255,7 @@ SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG, if ((VT == MVT::f32 && Subtarget.hasFRES()) || (VT == MVT::f64 && Subtarget.hasFRE()) || (VT == MVT::v4f32 && Subtarget.hasAltivec()) || - (VT == MVT::v2f64 && Subtarget.hasVSX())) { + (VT == MVT::v2f64 && Subtarget.hasVSX())) { if (RefinementSteps == ReciprocalEstimate::Unspecified) RefinementSteps = getEstimateRefinementSteps(VT, Subtarget); return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand); @@ -12608,13 +12608,13 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N, KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1)); // We don't really care about what is known about the first bit (if - // anything), so pretend that it is known zero for both to ensure they can - // be compared as constants. - Op1Known.Zero.setBit(0); Op1Known.One.clearBit(0); - Op2Known.Zero.setBit(0); Op2Known.One.clearBit(0); + // anything), so pretend that it is known zero for both to ensure they can + // be compared as constants. + Op1Known.Zero.setBit(0); Op1Known.One.clearBit(0); + Op2Known.Zero.setBit(0); Op2Known.One.clearBit(0); - if (!Op1Known.isConstant() || !Op2Known.isConstant() || - Op1Known.getConstant() != Op2Known.getConstant()) + if (!Op1Known.isConstant() || !Op2Known.isConstant() || + Op1Known.getConstant() != Op2Known.getConstant()) return SDValue(); } } @@ -12666,7 +12666,7 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N, // Visit all inputs, collect all binary operations (and, or, xor and // select) that are all fed by extensions. while (!BinOps.empty()) { - SDValue BinOp = BinOps.pop_back_val(); + SDValue BinOp = BinOps.pop_back_val(); if (!Visited.insert(BinOp.getNode()).second) continue; @@ -12881,7 +12881,7 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N, // Visit all inputs, collect all binary operations (and, or, xor and // select) that are all fed by truncations. while (!BinOps.empty()) { - SDValue BinOp = BinOps.pop_back_val(); + SDValue BinOp = BinOps.pop_back_val(); if (!Visited.insert(BinOp.getNode()).second) continue; @@ -13478,46 +13478,46 @@ static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG) { return SDValue(); } -// Look for the pattern of a load from a narrow width to i128, feeding -// into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node -// (LXVRZX). This node represents a zero extending load that will be matched -// to the Load VSX Vector Rightmost instructions. -static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG) { - SDLoc DL(N); - - // This combine is only eligible for a BUILD_VECTOR of v1i128. - if (N->getValueType(0) != MVT::v1i128) - return SDValue(); - - SDValue Operand = N->getOperand(0); - // Proceed with the transformation if the operand to the BUILD_VECTOR - // is a load instruction. - if (Operand.getOpcode() != ISD::LOAD) - return SDValue(); - - LoadSDNode *LD = dyn_cast<LoadSDNode>(Operand); - EVT MemoryType = LD->getMemoryVT(); - - // This transformation is only valid if the we are loading either a byte, - // halfword, word, or doubleword. - bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 || - MemoryType == MVT::i32 || MemoryType == MVT::i64; - - // Ensure that the load from the narrow width is being zero extended to i128. - if (!ValidLDType || - (LD->getExtensionType() != ISD::ZEXTLOAD && - LD->getExtensionType() != ISD::EXTLOAD)) - return SDValue(); - - SDValue LoadOps[] = { - LD->getChain(), LD->getBasePtr(), - DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), DL)}; - - return DAG.getMemIntrinsicNode(PPCISD::LXVRZX, DL, - DAG.getVTList(MVT::v1i128, MVT::Other), - LoadOps, MemoryType, LD->getMemOperand()); -} - +// Look for the pattern of a load from a narrow width to i128, feeding +// into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node +// (LXVRZX). This node represents a zero extending load that will be matched +// to the Load VSX Vector Rightmost instructions. +static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG) { + SDLoc DL(N); + + // This combine is only eligible for a BUILD_VECTOR of v1i128. + if (N->getValueType(0) != MVT::v1i128) + return SDValue(); + + SDValue Operand = N->getOperand(0); + // Proceed with the transformation if the operand to the BUILD_VECTOR + // is a load instruction. + if (Operand.getOpcode() != ISD::LOAD) + return SDValue(); + + LoadSDNode *LD = dyn_cast<LoadSDNode>(Operand); + EVT MemoryType = LD->getMemoryVT(); + + // This transformation is only valid if the we are loading either a byte, + // halfword, word, or doubleword. + bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 || + MemoryType == MVT::i32 || MemoryType == MVT::i64; + + // Ensure that the load from the narrow width is being zero extended to i128. + if (!ValidLDType || + (LD->getExtensionType() != ISD::ZEXTLOAD && + LD->getExtensionType() != ISD::EXTLOAD)) + return SDValue(); + + SDValue LoadOps[] = { + LD->getChain(), LD->getBasePtr(), + DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), DL)}; + + return DAG.getMemIntrinsicNode(PPCISD::LXVRZX, DL, + DAG.getVTList(MVT::v1i128, MVT::Other), + LoadOps, MemoryType, LD->getMemOperand()); +} + SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N, DAGCombinerInfo &DCI) const { assert(N->getOpcode() == ISD::BUILD_VECTOR && @@ -13555,14 +13555,14 @@ SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N, return Reduced; } - // On Power10, the Load VSX Vector Rightmost instructions can be utilized - // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR - // is a load from <valid narrow width> to i128. - if (Subtarget.isISA3_1()) { - SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG); - if (BVOfZLoad) - return BVOfZLoad; - } + // On Power10, the Load VSX Vector Rightmost instructions can be utilized + // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR + // is a load from <valid narrow width> to i128. + if (Subtarget.isISA3_1()) { + SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG); + if (BVOfZLoad) + return BVOfZLoad; + } if (N->getValueType(0) != MVT::v2f64) return SDValue(); @@ -13626,8 +13626,8 @@ SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N, // from the hardware. if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) return SDValue(); - if (!Op.getOperand(0).getValueType().isSimple()) - return SDValue(); + if (!Op.getOperand(0).getValueType().isSimple()) + return SDValue(); if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) || Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64)) return SDValue(); @@ -13864,7 +13864,7 @@ SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N, EVT Op1VT = N->getOperand(1).getValueType(); EVT ResVT = Val.getValueType(); - if (!isTypeLegal(ResVT)) + if (!isTypeLegal(ResVT)) return SDValue(); // Only perform combine for conversion to i64/i32 or power9 i16/i8. @@ -14431,9 +14431,9 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, // P8 and later hardware should just use LOAD. !Subtarget.hasP8Vector() && (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 || - VT == MVT::v4f32))) && + VT == MVT::v4f32))) && LD->getAlign() < ABIAlignment) { - // This is a type-legal unaligned Altivec load. + // This is a type-legal unaligned Altivec load. SDValue Chain = LD->getChain(); SDValue Ptr = LD->getBasePtr(); bool isLittleEndian = Subtarget.isLittleEndian(); @@ -14464,13 +14464,13 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, // optimization later. Intrinsic::ID Intr, IntrLD, IntrPerm; MVT PermCntlTy, PermTy, LDTy; - Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr - : Intrinsic::ppc_altivec_lvsl; - IntrLD = Intrinsic::ppc_altivec_lvx; - IntrPerm = Intrinsic::ppc_altivec_vperm; - PermCntlTy = MVT::v16i8; - PermTy = MVT::v4i32; - LDTy = MVT::v4i32; + Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr + : Intrinsic::ppc_altivec_lvsl; + IntrLD = Intrinsic::ppc_altivec_lvx; + IntrPerm = Intrinsic::ppc_altivec_vperm; + PermCntlTy = MVT::v16i8; + PermTy = MVT::v4i32; + LDTy = MVT::v4i32; SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy); @@ -14541,10 +14541,10 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, BaseLoad, ExtraLoad, PermCntl, DAG, dl); if (VT != PermTy) - Perm = Subtarget.hasAltivec() - ? DAG.getNode(ISD::BITCAST, dl, VT, Perm) - : DAG.getNode(ISD::FP_ROUND, dl, VT, Perm, - DAG.getTargetConstant(1, dl, MVT::i64)); + Perm = Subtarget.hasAltivec() + ? DAG.getNode(ISD::BITCAST, dl, VT, Perm) + : DAG.getNode(ISD::FP_ROUND, dl, VT, Perm, + DAG.getTargetConstant(1, dl, MVT::i64)); // second argument is 1 because this rounding // is always exact. @@ -14560,10 +14560,10 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr : Intrinsic::ppc_altivec_lvsl); - if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) { + if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) { SDValue Add = N->getOperand(1); - int Bits = 4 /* 16 byte alignment */; + int Bits = 4 /* 16 byte alignment */; if (DAG.MaskedValueIsZero(Add->getOperand(1), APInt::getAllOnesValue(Bits /* alignment */) @@ -14573,8 +14573,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, UE = BasePtr->use_end(); UI != UE; ++UI) { if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN && - cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() == - IID) { + cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() == + IID) { // We've found another LVSL/LVSR, and this address is an aligned // multiple of that one. The results will be the same, so use the // one we've just found instead. @@ -14706,43 +14706,43 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, } break; case PPCISD::VCMP: - // If a VCMP_rec node already exists with exactly the same operands as this - // node, use its result instead of this node (VCMP_rec computes both a CR6 - // and a normal output). + // If a VCMP_rec node already exists with exactly the same operands as this + // node, use its result instead of this node (VCMP_rec computes both a CR6 + // and a normal output). // if (!N->getOperand(0).hasOneUse() && !N->getOperand(1).hasOneUse() && !N->getOperand(2).hasOneUse()) { - // Scan all of the users of the LHS, looking for VCMP_rec's that match. - SDNode *VCMPrecNode = nullptr; + // Scan all of the users of the LHS, looking for VCMP_rec's that match. + SDNode *VCMPrecNode = nullptr; SDNode *LHSN = N->getOperand(0).getNode(); for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end(); UI != E; ++UI) - if (UI->getOpcode() == PPCISD::VCMP_rec && + if (UI->getOpcode() == PPCISD::VCMP_rec && UI->getOperand(1) == N->getOperand(1) && UI->getOperand(2) == N->getOperand(2) && UI->getOperand(0) == N->getOperand(0)) { - VCMPrecNode = *UI; + VCMPrecNode = *UI; break; } - // If there is no VCMP_rec node, or if the flag value has a single use, - // don't transform this. - if (!VCMPrecNode || VCMPrecNode->hasNUsesOfValue(0, 1)) + // If there is no VCMP_rec node, or if the flag value has a single use, + // don't transform this. + if (!VCMPrecNode || VCMPrecNode->hasNUsesOfValue(0, 1)) break; // Look at the (necessarily single) use of the flag value. If it has a // chain, this transformation is more complex. Note that multiple things // could use the value result, which we should ignore. SDNode *FlagUser = nullptr; - for (SDNode::use_iterator UI = VCMPrecNode->use_begin(); + for (SDNode::use_iterator UI = VCMPrecNode->use_begin(); FlagUser == nullptr; ++UI) { - assert(UI != VCMPrecNode->use_end() && "Didn't find user!"); + assert(UI != VCMPrecNode->use_end() && "Didn't find user!"); SDNode *User = *UI; for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { - if (User->getOperand(i) == SDValue(VCMPrecNode, 1)) { + if (User->getOperand(i) == SDValue(VCMPrecNode, 1)) { FlagUser = User; break; } @@ -14752,7 +14752,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, // If the user is a MFOCRF instruction, we know this is safe. // Otherwise we give up for right now. if (FlagUser->getOpcode() == PPCISD::MFOCRF) - return SDValue(VCMPrecNode, 0); + return SDValue(VCMPrecNode, 0); } break; case ISD::BRCOND: { @@ -14841,7 +14841,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, DAG.getConstant(CompareOpc, dl, MVT::i32) }; EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue }; - SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops); + SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops); // Unpack the result based on how the target uses it. PPC::Predicate CompOpc; @@ -14936,19 +14936,19 @@ void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, case Intrinsic::ppc_altivec_vcmpequh_p: case Intrinsic::ppc_altivec_vcmpequw_p: case Intrinsic::ppc_altivec_vcmpequd_p: - case Intrinsic::ppc_altivec_vcmpequq_p: + case Intrinsic::ppc_altivec_vcmpequq_p: case Intrinsic::ppc_altivec_vcmpgefp_p: case Intrinsic::ppc_altivec_vcmpgtfp_p: case Intrinsic::ppc_altivec_vcmpgtsb_p: case Intrinsic::ppc_altivec_vcmpgtsh_p: case Intrinsic::ppc_altivec_vcmpgtsw_p: case Intrinsic::ppc_altivec_vcmpgtsd_p: - case Intrinsic::ppc_altivec_vcmpgtsq_p: + case Intrinsic::ppc_altivec_vcmpgtsq_p: case Intrinsic::ppc_altivec_vcmpgtub_p: case Intrinsic::ppc_altivec_vcmpgtuh_p: case Intrinsic::ppc_altivec_vcmpgtuw_p: case Intrinsic::ppc_altivec_vcmpgtud_p: - case Intrinsic::ppc_altivec_vcmpgtuq_p: + case Intrinsic::ppc_altivec_vcmpgtuq_p: Known.Zero = ~1U; // All bits but the low one are known to be zero. break; } @@ -15147,45 +15147,45 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, return std::make_pair(0U, &PPC::VSSRCRegClass); else return std::make_pair(0U, &PPC::VSFRCRegClass); - } else if (Constraint == "lr") { - if (VT == MVT::i64) - return std::make_pair(0U, &PPC::LR8RCRegClass); - else - return std::make_pair(0U, &PPC::LRRCRegClass); - } - - // Handle special cases of physical registers that are not properly handled - // by the base class. - if (Constraint[0] == '{' && Constraint[Constraint.size() - 1] == '}') { - // If we name a VSX register, we can't defer to the base class because it - // will not recognize the correct register (their names will be VSL{0-31} - // and V{0-31} so they won't match). So we match them here. - if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') { - int VSNum = atoi(Constraint.data() + 3); - assert(VSNum >= 0 && VSNum <= 63 && - "Attempted to access a vsr out of range"); - if (VSNum < 32) - return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass); - return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass); - } - - // For float registers, we can't defer to the base class as it will match - // the SPILLTOVSRRC class. - if (Constraint.size() > 3 && Constraint[1] == 'f') { - int RegNum = atoi(Constraint.data() + 2); - if (RegNum > 31 || RegNum < 0) - report_fatal_error("Invalid floating point register number"); - if (VT == MVT::f32 || VT == MVT::i32) - return Subtarget.hasSPE() - ? std::make_pair(PPC::R0 + RegNum, &PPC::GPRCRegClass) - : std::make_pair(PPC::F0 + RegNum, &PPC::F4RCRegClass); - if (VT == MVT::f64 || VT == MVT::i64) - return Subtarget.hasSPE() - ? std::make_pair(PPC::S0 + RegNum, &PPC::SPERCRegClass) - : std::make_pair(PPC::F0 + RegNum, &PPC::F8RCRegClass); - } - } - + } else if (Constraint == "lr") { + if (VT == MVT::i64) + return std::make_pair(0U, &PPC::LR8RCRegClass); + else + return std::make_pair(0U, &PPC::LRRCRegClass); + } + + // Handle special cases of physical registers that are not properly handled + // by the base class. + if (Constraint[0] == '{' && Constraint[Constraint.size() - 1] == '}') { + // If we name a VSX register, we can't defer to the base class because it + // will not recognize the correct register (their names will be VSL{0-31} + // and V{0-31} so they won't match). So we match them here. + if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') { + int VSNum = atoi(Constraint.data() + 3); + assert(VSNum >= 0 && VSNum <= 63 && + "Attempted to access a vsr out of range"); + if (VSNum < 32) + return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass); + return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass); + } + + // For float registers, we can't defer to the base class as it will match + // the SPILLTOVSRRC class. + if (Constraint.size() > 3 && Constraint[1] == 'f') { + int RegNum = atoi(Constraint.data() + 2); + if (RegNum > 31 || RegNum < 0) + report_fatal_error("Invalid floating point register number"); + if (VT == MVT::f32 || VT == MVT::i32) + return Subtarget.hasSPE() + ? std::make_pair(PPC::R0 + RegNum, &PPC::GPRCRegClass) + : std::make_pair(PPC::F0 + RegNum, &PPC::F4RCRegClass); + if (VT == MVT::f64 || VT == MVT::i64) + return Subtarget.hasSPE() + ? std::make_pair(PPC::S0 + RegNum, &PPC::SPERCRegClass) + : std::make_pair(PPC::F0 + RegNum, &PPC::F8RCRegClass); + } + } + std::pair<unsigned, const TargetRegisterClass *> R = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); @@ -15290,15 +15290,15 @@ void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, // by AM is legal for this target, for a load/store of the specified type. bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, - unsigned AS, - Instruction *I) const { - // Vector type r+i form is supported since power9 as DQ form. We don't check - // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC, - // imm form is preferred and the offset can be adjusted to use imm form later - // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and - // max offset to check legal addressing mode, we should be a little aggressive - // to contain other offsets for that LSRUse. - if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector()) + unsigned AS, + Instruction *I) const { + // Vector type r+i form is supported since power9 as DQ form. We don't check + // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC, + // imm form is preferred and the offset can be adjusted to use imm form later + // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and + // max offset to check legal addressing mode, we should be a little aggressive + // to contain other offsets for that LSRUse. + if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector()) return false; // PPC allows a sign-extended 16-bit immediate field. @@ -15458,11 +15458,11 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, case Intrinsic::ppc_altivec_lvehx: case Intrinsic::ppc_altivec_lvewx: case Intrinsic::ppc_vsx_lxvd2x: - case Intrinsic::ppc_vsx_lxvw4x: - case Intrinsic::ppc_vsx_lxvd2x_be: - case Intrinsic::ppc_vsx_lxvw4x_be: - case Intrinsic::ppc_vsx_lxvl: - case Intrinsic::ppc_vsx_lxvll: { + case Intrinsic::ppc_vsx_lxvw4x: + case Intrinsic::ppc_vsx_lxvd2x_be: + case Intrinsic::ppc_vsx_lxvw4x_be: + case Intrinsic::ppc_vsx_lxvl: + case Intrinsic::ppc_vsx_lxvll: { EVT VT; switch (Intrinsic) { case Intrinsic::ppc_altivec_lvebx: @@ -15475,7 +15475,7 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, VT = MVT::i32; break; case Intrinsic::ppc_vsx_lxvd2x: - case Intrinsic::ppc_vsx_lxvd2x_be: + case Intrinsic::ppc_vsx_lxvd2x_be: VT = MVT::v2f64; break; default: @@ -15498,11 +15498,11 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, case Intrinsic::ppc_altivec_stvehx: case Intrinsic::ppc_altivec_stvewx: case Intrinsic::ppc_vsx_stxvd2x: - case Intrinsic::ppc_vsx_stxvw4x: - case Intrinsic::ppc_vsx_stxvd2x_be: - case Intrinsic::ppc_vsx_stxvw4x_be: - case Intrinsic::ppc_vsx_stxvl: - case Intrinsic::ppc_vsx_stxvll: { + case Intrinsic::ppc_vsx_stxvw4x: + case Intrinsic::ppc_vsx_stxvd2x_be: + case Intrinsic::ppc_vsx_stxvw4x_be: + case Intrinsic::ppc_vsx_stxvl: + case Intrinsic::ppc_vsx_stxvll: { EVT VT; switch (Intrinsic) { case Intrinsic::ppc_altivec_stvebx: @@ -15515,7 +15515,7 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, VT = MVT::i32; break; case Intrinsic::ppc_vsx_stxvd2x: - case Intrinsic::ppc_vsx_stxvd2x_be: + case Intrinsic::ppc_vsx_stxvd2x_be: VT = MVT::v2f64; break; default: @@ -15662,33 +15662,33 @@ bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, return true; } -bool PPCTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT, - SDValue C) const { - // Check integral scalar types. - if (!VT.isScalarInteger()) - return false; - if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) { - if (!ConstNode->getAPIntValue().isSignedIntN(64)) - return false; - // This transformation will generate >= 2 operations. But the following - // cases will generate <= 2 instructions during ISEL. So exclude them. - // 1. If the constant multiplier fits 16 bits, it can be handled by one - // HW instruction, ie. MULLI - // 2. If the multiplier after shifted fits 16 bits, an extra shift - // instruction is needed than case 1, ie. MULLI and RLDICR - int64_t Imm = ConstNode->getSExtValue(); - unsigned Shift = countTrailingZeros<uint64_t>(Imm); - Imm >>= Shift; - if (isInt<16>(Imm)) - return false; - uint64_t UImm = static_cast<uint64_t>(Imm); - if (isPowerOf2_64(UImm + 1) || isPowerOf2_64(UImm - 1) || - isPowerOf2_64(1 - UImm) || isPowerOf2_64(-1 - UImm)) - return true; - } - return false; -} - +bool PPCTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT, + SDValue C) const { + // Check integral scalar types. + if (!VT.isScalarInteger()) + return false; + if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) { + if (!ConstNode->getAPIntValue().isSignedIntN(64)) + return false; + // This transformation will generate >= 2 operations. But the following + // cases will generate <= 2 instructions during ISEL. So exclude them. + // 1. If the constant multiplier fits 16 bits, it can be handled by one + // HW instruction, ie. MULLI + // 2. If the multiplier after shifted fits 16 bits, an extra shift + // instruction is needed than case 1, ie. MULLI and RLDICR + int64_t Imm = ConstNode->getSExtValue(); + unsigned Shift = countTrailingZeros<uint64_t>(Imm); + Imm >>= Shift; + if (isInt<16>(Imm)) + return false; + uint64_t UImm = static_cast<uint64_t>(Imm); + if (isPowerOf2_64(UImm + 1) || isPowerOf2_64(UImm - 1) || + isPowerOf2_64(1 - UImm) || isPowerOf2_64(-1 - UImm)) + return true; + } + return false; +} + bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const { return isFMAFasterThanFMulAndFAdd( @@ -15708,7 +15708,7 @@ bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F, } } -// FIXME: add more patterns which are not profitable to hoist. +// FIXME: add more patterns which are not profitable to hoist. bool PPCTargetLowering::isProfitableToHoist(Instruction *I) const { if (!I->hasOneUse()) return true; @@ -15716,48 +15716,48 @@ bool PPCTargetLowering::isProfitableToHoist(Instruction *I) const { Instruction *User = I->user_back(); assert(User && "A single use instruction with no uses."); - switch (I->getOpcode()) { - case Instruction::FMul: { - // Don't break FMA, PowerPC prefers FMA. - if (User->getOpcode() != Instruction::FSub && - User->getOpcode() != Instruction::FAdd) - return true; - - const TargetOptions &Options = getTargetMachine().Options; - const Function *F = I->getFunction(); - const DataLayout &DL = F->getParent()->getDataLayout(); - Type *Ty = User->getOperand(0)->getType(); - - return !( - isFMAFasterThanFMulAndFAdd(*F, Ty) && - isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) && - (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath)); - } - case Instruction::Load: { - // Don't break "store (load float*)" pattern, this pattern will be combined - // to "store (load int32)" in later InstCombine pass. See function - // combineLoadToOperationType. On PowerPC, loading a float point takes more - // cycles than loading a 32 bit integer. - LoadInst *LI = cast<LoadInst>(I); - // For the loads that combineLoadToOperationType does nothing, like - // ordered load, it should be profitable to hoist them. - // For swifterror load, it can only be used for pointer to pointer type, so - // later type check should get rid of this case. - if (!LI->isUnordered()) - return true; - - if (User->getOpcode() != Instruction::Store) - return true; - - if (I->getType()->getTypeID() != Type::FloatTyID) - return true; - - return false; - } - default: - return true; - } - return true; + switch (I->getOpcode()) { + case Instruction::FMul: { + // Don't break FMA, PowerPC prefers FMA. + if (User->getOpcode() != Instruction::FSub && + User->getOpcode() != Instruction::FAdd) + return true; + + const TargetOptions &Options = getTargetMachine().Options; + const Function *F = I->getFunction(); + const DataLayout &DL = F->getParent()->getDataLayout(); + Type *Ty = User->getOperand(0)->getType(); + + return !( + isFMAFasterThanFMulAndFAdd(*F, Ty) && + isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) && + (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath)); + } + case Instruction::Load: { + // Don't break "store (load float*)" pattern, this pattern will be combined + // to "store (load int32)" in later InstCombine pass. See function + // combineLoadToOperationType. On PowerPC, loading a float point takes more + // cycles than loading a 32 bit integer. + LoadInst *LI = cast<LoadInst>(I); + // For the loads that combineLoadToOperationType does nothing, like + // ordered load, it should be profitable to hoist them. + // For swifterror load, it can only be used for pointer to pointer type, so + // later type check should get rid of this case. + if (!LI->isUnordered()) + return true; + + if (User->getOpcode() != Instruction::Store) + return true; + + if (I->getType()->getTypeID() != Type::FloatTyID) + return true; + + return false; + } + default: + return true; + } + return true; } const MCPhysReg * @@ -15789,7 +15789,7 @@ PPCTargetLowering::shouldExpandBuildVectorWithShuffles( if (VT == MVT::v2i64) return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves - if (Subtarget.hasVSX()) + if (Subtarget.hasVSX()) return true; return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues); @@ -15835,7 +15835,7 @@ SDValue PPCTargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, switch (Opc) { case PPCISD::FNMSUB: - if (!Op.hasOneUse() || !isTypeLegal(VT)) + if (!Op.hasOneUse() || !isTypeLegal(VT)) break; const TargetOptions &Options = getTargetMachine().Options; @@ -15964,10 +15964,10 @@ SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const { SDValue N0 = N->getOperand(0); ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1)); - if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() || + if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() || N0.getOpcode() != ISD::SIGN_EXTEND || - N0.getOperand(0).getValueType() != MVT::i32 || CN1 == nullptr || - N->getValueType(0) != MVT::i64) + N0.getOperand(0).getValueType() != MVT::i32 || CN1 == nullptr || + N->getValueType(0) != MVT::i64) return SDValue(); // We can't save an operation here if the value is already extended, and @@ -16316,7 +16316,7 @@ SDValue PPCTargetLowering::combineFMALike(SDNode *N, bool LegalOps = !DCI.isBeforeLegalizeOps(); SDLoc Loc(N); - if (!isOperationLegal(ISD::FMA, VT)) + if (!isOperationLegal(ISD::FMA, VT)) return SDValue(); // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0 diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/PPCISelLowering.h b/contrib/libs/llvm12/lib/Target/PowerPC/PPCISelLowering.h index 2a0b6d3eb0..836c52bdff 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/PPCISelLowering.h +++ b/contrib/libs/llvm12/lib/Target/PowerPC/PPCISelLowering.h @@ -89,12 +89,12 @@ namespace llvm { FRE, FRSQRTE, - /// Test instruction for software square root. - FTSQRT, - - /// Square root instruction. - FSQRT, - + /// Test instruction for software square root. + FTSQRT, + + /// Square root instruction. + FSQRT, + /// VPERM - The PPC VPERM Instruction. /// VPERM, @@ -152,7 +152,7 @@ namespace llvm { /// probed. PROBED_ALLOCA, - /// The result of the mflr at function entry, used for PIC code. + /// The result of the mflr at function entry, used for PIC code. GlobalBaseReg, /// These nodes represent PPC shifts. @@ -270,11 +270,11 @@ namespace llvm { /// is VCMPGTSH. VCMP, - /// RESVEC, OUTFLAG = VCMP_rec(LHS, RHS, OPC) - Represents one of the - /// altivec VCMP*_rec instructions. For lack of better number, we use the + /// RESVEC, OUTFLAG = VCMP_rec(LHS, RHS, OPC) - Represents one of the + /// altivec VCMP*_rec instructions. For lack of better number, we use the /// opcode number encoding for the OPC field to identify the compare. For /// example, 838 is VCMPGTSH. - VCMP_rec, + VCMP_rec, /// CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This /// corresponds to the COND_BRANCH pseudo instruction. CRRC is the @@ -386,10 +386,10 @@ namespace llvm { /// sym\@got\@dtprel\@l. ADDI_DTPREL_L, - /// G8RC = PADDI_DTPREL %x3, Symbol - For the pc-rel based local-dynamic TLS - /// model, produces a PADDI8 instruction that adds X3 to sym\@dtprel. - PADDI_DTPREL, - + /// G8RC = PADDI_DTPREL %x3, Symbol - For the pc-rel based local-dynamic TLS + /// model, produces a PADDI8 instruction that adds X3 to sym\@dtprel. + PADDI_DTPREL, + /// VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded /// during instruction selection to optimize a BUILD_VECTOR into /// operations on splats. This is necessary to avoid losing these @@ -445,46 +445,46 @@ namespace llvm { /// PLD. MAT_PCREL_ADDR, - /// TLS_DYNAMIC_MAT_PCREL_ADDR = Materialize a PC Relative address for - /// TLS global address when using dynamic access models. This can be done - /// through an add like PADDI. - TLS_DYNAMIC_MAT_PCREL_ADDR, - - /// TLS_LOCAL_EXEC_MAT_ADDR = Materialize an address for TLS global address - /// when using local exec access models, and when prefixed instructions are - /// available. This is used with ADD_TLS to produce an add like PADDI. - TLS_LOCAL_EXEC_MAT_ADDR, - - /// ACC_BUILD = Build an accumulator register from 4 VSX registers. - ACC_BUILD, - - /// PAIR_BUILD = Build a vector pair register from 2 VSX registers. - PAIR_BUILD, - - /// EXTRACT_VSX_REG = Extract one of the underlying vsx registers of - /// an accumulator or pair register. This node is needed because - /// EXTRACT_SUBVECTOR expects the input and output vectors to have the same - /// element type. - EXTRACT_VSX_REG, - - /// XXMFACC = This corresponds to the xxmfacc instruction. - XXMFACC, - - // Constrained conversion from floating point to int - STRICT_FCTIDZ = ISD::FIRST_TARGET_STRICTFP_OPCODE, - STRICT_FCTIWZ, - STRICT_FCTIDUZ, - STRICT_FCTIWUZ, - - /// Constrained integer-to-floating-point conversion instructions. - STRICT_FCFID, - STRICT_FCFIDU, - STRICT_FCFIDS, - STRICT_FCFIDUS, - - /// Constrained floating point add in round-to-zero mode. - STRICT_FADDRTZ, - + /// TLS_DYNAMIC_MAT_PCREL_ADDR = Materialize a PC Relative address for + /// TLS global address when using dynamic access models. This can be done + /// through an add like PADDI. + TLS_DYNAMIC_MAT_PCREL_ADDR, + + /// TLS_LOCAL_EXEC_MAT_ADDR = Materialize an address for TLS global address + /// when using local exec access models, and when prefixed instructions are + /// available. This is used with ADD_TLS to produce an add like PADDI. + TLS_LOCAL_EXEC_MAT_ADDR, + + /// ACC_BUILD = Build an accumulator register from 4 VSX registers. + ACC_BUILD, + + /// PAIR_BUILD = Build a vector pair register from 2 VSX registers. + PAIR_BUILD, + + /// EXTRACT_VSX_REG = Extract one of the underlying vsx registers of + /// an accumulator or pair register. This node is needed because + /// EXTRACT_SUBVECTOR expects the input and output vectors to have the same + /// element type. + EXTRACT_VSX_REG, + + /// XXMFACC = This corresponds to the xxmfacc instruction. + XXMFACC, + + // Constrained conversion from floating point to int + STRICT_FCTIDZ = ISD::FIRST_TARGET_STRICTFP_OPCODE, + STRICT_FCTIWZ, + STRICT_FCTIDUZ, + STRICT_FCTIWUZ, + + /// Constrained integer-to-floating-point conversion instructions. + STRICT_FCFID, + STRICT_FCFIDU, + STRICT_FCFIDS, + STRICT_FCFIDUS, + + /// Constrained floating point add in round-to-zero mode. + STRICT_FADDRTZ, + /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a /// byte-swapping store instruction. It byte-swaps the low "Type" bits of /// the GPRC input, then stores it through Ptr. Type can be either i16 or @@ -526,12 +526,12 @@ namespace llvm { /// an xxswapd. LXVD2X, - /// LXVRZX - Load VSX Vector Rightmost and Zero Extend - /// This node represents v1i128 BUILD_VECTOR of a zero extending load - /// instruction from <byte, halfword, word, or doubleword> to i128. - /// Allows utilization of the Load VSX Vector Rightmost Instructions. - LXVRZX, - + /// LXVRZX - Load VSX Vector Rightmost and Zero Extend + /// This node represents v1i128 BUILD_VECTOR of a zero extending load + /// instruction from <byte, halfword, word, or doubleword> to i128. + /// Allows utilization of the Load VSX Vector Rightmost Instructions. + LXVRZX, + /// VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian. /// Maps directly to one of lxvd2x/lxvw4x/lxvh8x/lxvb16x depending on /// the vector type to load vector in big-endian element order. @@ -771,8 +771,8 @@ namespace llvm { bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign EncodingAlignment) const; - bool SelectAddressRegImm34(SDValue N, SDValue &Disp, SDValue &Base, - SelectionDAG &DAG) const; + bool SelectAddressRegImm34(SDValue N, SDValue &Disp, SDValue &Base, + SelectionDAG &DAG) const; /// SelectAddressRegRegOnly - Given the specified addressed, force it to be /// represented as an indexed [r+r] operation. @@ -928,9 +928,9 @@ namespace llvm { return true; } - bool decomposeMulByConstant(LLVMContext &Context, EVT VT, - SDValue C) const override; - + bool decomposeMulByConstant(LLVMContext &Context, EVT VT, + SDValue C) const override; + bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const override { // Only handle float load/store pair because float(fpr) load/store @@ -987,9 +987,9 @@ namespace llvm { shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override; - // Keep the zero-extensions for arguments to libcalls. - bool shouldKeepZExtForFP16Conv() const override { return true; } - + // Keep the zero-extensions for arguments to libcalls. + bool shouldKeepZExtForFP16Conv() const override { return true; } + /// createFastISel - This method returns a target-specific FastISel object, /// or null if the target does not support "fast" instruction selection. FastISel *createFastISel(FunctionLoweringInfo &FuncInfo, @@ -1128,7 +1128,7 @@ namespace llvm { SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const; @@ -1147,7 +1147,7 @@ namespace llvm { SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerFunnelShift(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFunnelShift(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; @@ -1158,7 +1158,7 @@ namespace llvm { SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const; @@ -1275,10 +1275,10 @@ namespace llvm { bool Reciprocal) const override; SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const override; - SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, - const DenormalMode &Mode) const override; - SDValue getSqrtResultForDenormInput(SDValue Operand, - SelectionDAG &DAG) const override; + SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, + const DenormalMode &Mode) const override; + SDValue getSqrtResultForDenormInput(SDValue Operand, + SelectionDAG &DAG) const override; unsigned combineRepeatedFPDivisors() const override; SDValue @@ -1317,8 +1317,8 @@ namespace llvm { bool isIntS16Immediate(SDNode *N, int16_t &Imm); bool isIntS16Immediate(SDValue Op, int16_t &Imm); - bool isIntS34Immediate(SDNode *N, int64_t &Imm); - bool isIntS34Immediate(SDValue Op, int64_t &Imm); + bool isIntS34Immediate(SDNode *N, int64_t &Imm); + bool isIntS34Immediate(SDValue Op, int64_t &Imm); bool convertToNonDenormSingle(APInt &ArgAPInt); bool convertToNonDenormSingle(APFloat &ArgAPFloat); diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/PPCInstr64Bit.td b/contrib/libs/llvm12/lib/Target/PowerPC/PPCInstr64Bit.td index f5307bf690..03e9d6970a 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/contrib/libs/llvm12/lib/Target/PowerPC/PPCInstr64Bit.td @@ -19,14 +19,14 @@ def s16imm64 : Operand<i64> { let EncoderMethod = "getImm16Encoding"; let ParserMatchClass = PPCS16ImmAsmOperand; let DecoderMethod = "decodeSImmOperand<16>"; - let OperandType = "OPERAND_IMMEDIATE"; + let OperandType = "OPERAND_IMMEDIATE"; } def u16imm64 : Operand<i64> { let PrintMethod = "printU16ImmOperand"; let EncoderMethod = "getImm16Encoding"; let ParserMatchClass = PPCU16ImmAsmOperand; let DecoderMethod = "decodeUImmOperand<16>"; - let OperandType = "OPERAND_IMMEDIATE"; + let OperandType = "OPERAND_IMMEDIATE"; } def s17imm64 : Operand<i64> { // This operand type is used for addis/lis to allow the assembler parser @@ -36,7 +36,7 @@ def s17imm64 : Operand<i64> { let EncoderMethod = "getImm16Encoding"; let ParserMatchClass = PPCS17ImmAsmOperand; let DecoderMethod = "decodeSImmOperand<16>"; - let OperandType = "OPERAND_IMMEDIATE"; + let OperandType = "OPERAND_IMMEDIATE"; } def tocentry : Operand<iPTR> { let MIOperandInfo = (ops i64imm:$imm); @@ -151,9 +151,9 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in { def BL8_NOTOC : IForm<18, 0, 1, (outs), (ins calltarget:$func), "bl $func", IIC_BrB, []>; - def BL8_NOTOC_TLS : IForm<18, 0, 1, (outs), - (ins tlscall:$func), - "bl $func", IIC_BrB, []>; + def BL8_NOTOC_TLS : IForm<18, 0, 1, (outs), + (ins tlscall:$func), + "bl $func", IIC_BrB, []>; } } let Uses = [CTR8, RM] in { @@ -846,7 +846,7 @@ let Interpretation64Bit = 1, isCodeGenOnly = 1 in { def SETB8 : XForm_44<31, 128, (outs g8rc:$RT), (ins crrc:$BFA), "setb $RT, $BFA", IIC_IntGeneral>, isPPC64; } -def DARN : XForm_45<31, 755, (outs g8rc:$RT), (ins u2imm:$L), +def DARN : XForm_45<31, 755, (outs g8rc:$RT), (ins u2imm:$L), "darn $RT, $L", IIC_LdStLD>, isPPC64; def ADDPCIS : DXForm<19, 2, (outs g8rc:$RT), (ins i32imm:$D), "addpcis $RT, $D", IIC_BrB, []>, isPPC64; @@ -987,11 +987,11 @@ def : InstAlias<"cntlzw. $rA, $rS", (CNTLZW8_rec g8rc:$rA, g8rc:$rS)>; def : InstAlias<"mtxer $Rx", (MTSPR8 1, g8rc:$Rx)>; def : InstAlias<"mfxer $Rx", (MFSPR8 g8rc:$Rx, 1)>; -//Disable this alias on AIX for now because as does not support them. -let Predicates = [ModernAs] in { - def : InstAlias<"mtudscr $Rx", (MTSPR8 3, g8rc:$Rx)>; - def : InstAlias<"mfudscr $Rx", (MFSPR8 g8rc:$Rx, 3)>; -} +//Disable this alias on AIX for now because as does not support them. +let Predicates = [ModernAs] in { + def : InstAlias<"mtudscr $Rx", (MTSPR8 3, g8rc:$Rx)>; + def : InstAlias<"mfudscr $Rx", (MFSPR8 g8rc:$Rx, 3)>; +} def : InstAlias<"mfrtcu $Rx", (MFSPR8 g8rc:$Rx, 4)>; def : InstAlias<"mfrtcl $Rx", (MFSPR8 g8rc:$Rx, 5)>; @@ -1065,7 +1065,7 @@ def LHA8: DForm_1<42, (outs g8rc:$rD), (ins memri:$src), def LWA : DSForm_1<58, 2, (outs g8rc:$rD), (ins memrix:$src), "lwa $rD, $src", IIC_LdStLWA, [(set i64:$rD, - (DSFormSextLoadi32 iaddrX4:$src))]>, isPPC64, + (DSFormSextLoadi32 iaddrX4:$src))]>, isPPC64, PPC970_DGroup_Cracked; let Interpretation64Bit = 1, isCodeGenOnly = 1 in def LHAX8: XForm_1_memOp<31, 343, (outs g8rc:$rD), (ins memrr:$src), @@ -1176,7 +1176,7 @@ def LWZUX8 : XForm_1_memOp<31, 55, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), let PPC970_Unit = 2 in { def LD : DSForm_1<58, 0, (outs g8rc:$rD), (ins memrix:$src), "ld $rD, $src", IIC_LdStLD, - [(set i64:$rD, (DSFormLoad iaddrX4:$src))]>, isPPC64; + [(set i64:$rD, (DSFormLoad iaddrX4:$src))]>, isPPC64; // The following four definitions are selected for small code model only. // Otherwise, we need to create two instructions to form a 32-bit offset, // so we have a custom matcher for TOC_ENTRY in PPCDAGToDAGIsel::Select(). @@ -1271,36 +1271,36 @@ def ADDItlsgdL : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm6 [(set i64:$rD, (PPCaddiTlsgdL i64:$reg, tglobaltlsaddr:$disp))]>, isPPC64; - -class GETtlsADDRPseudo <string asmstr> : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym), - asmstr, - [(set i64:$rD, - (PPCgetTlsAddr i64:$reg, tglobaltlsaddr:$sym))]>, - isPPC64; -class GETtlsldADDRPseudo <string asmstr> : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym), - asmstr, - [(set i64:$rD, - (PPCgetTlsldAddr i64:$reg, tglobaltlsaddr:$sym))]>, - isPPC64; - -let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1 in { -// LR8 is a true define, while the rest of the Defs are clobbers. X3 is + +class GETtlsADDRPseudo <string asmstr> : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym), + asmstr, + [(set i64:$rD, + (PPCgetTlsAddr i64:$reg, tglobaltlsaddr:$sym))]>, + isPPC64; +class GETtlsldADDRPseudo <string asmstr> : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym), + asmstr, + [(set i64:$rD, + (PPCgetTlsldAddr i64:$reg, tglobaltlsaddr:$sym))]>, + isPPC64; + +let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1 in { +// LR8 is a true define, while the rest of the Defs are clobbers. X3 is // explicitly defined when this op is created, so not mentioned here. // This is lowered to BL8_NOP_TLS by the assembly printer, so the size must be // correct because the branch select pass is relying on it. -let Defs = [X0,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7], Size = 8 in -def GETtlsADDR : GETtlsADDRPseudo <"#GETtlsADDR">; -let Defs = [X0,X2,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7], Size = 8 in -def GETtlsADDRPCREL : GETtlsADDRPseudo <"#GETtlsADDRPCREL">; - -// LR8 is a true define, while the rest of the Defs are clobbers. X3 is -// explicitly defined when this op is created, so not mentioned here. -let Defs = [X0,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7] in -def GETtlsldADDR : GETtlsldADDRPseudo <"#GETtlsldADDR">; -let Defs = [X0,X2,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7] in -def GETtlsldADDRPCREL : GETtlsldADDRPseudo <"#GETtlsldADDRPCREL">; -} - +let Defs = [X0,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7], Size = 8 in +def GETtlsADDR : GETtlsADDRPseudo <"#GETtlsADDR">; +let Defs = [X0,X2,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7], Size = 8 in +def GETtlsADDRPCREL : GETtlsADDRPseudo <"#GETtlsADDRPCREL">; + +// LR8 is a true define, while the rest of the Defs are clobbers. X3 is +// explicitly defined when this op is created, so not mentioned here. +let Defs = [X0,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7] in +def GETtlsldADDR : GETtlsldADDRPseudo <"#GETtlsldADDR">; +let Defs = [X0,X2,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7] in +def GETtlsldADDRPCREL : GETtlsldADDRPseudo <"#GETtlsldADDRPCREL">; +} + // Combined op for ADDItlsgdL and GETtlsADDR, late expanded. X3 and LR8 // are true defines while the rest of the Defs are clobbers. let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, @@ -1348,11 +1348,11 @@ def ADDIdtprelL : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm [(set i64:$rD, (PPCaddiDtprelL i64:$reg, tglobaltlsaddr:$disp))]>, isPPC64; -def PADDIdtprel : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp), - "#PADDIdtprel", - [(set i64:$rD, - (PPCpaddiDtprel i64:$reg, tglobaltlsaddr:$disp))]>, - isPPC64; +def PADDIdtprel : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp), + "#PADDIdtprel", + [(set i64:$rD, + (PPCpaddiDtprel i64:$reg, tglobaltlsaddr:$disp))]>, + isPPC64; let PPC970_Unit = 2 in { let Interpretation64Bit = 1, isCodeGenOnly = 1 in { @@ -1383,7 +1383,7 @@ def STWX8 : XForm_8_memOp<31, 151, (outs), (ins g8rc:$rS, memrr:$dst), // Normal 8-byte stores. def STD : DSForm_1<62, 0, (outs), (ins g8rc:$rS, memrix:$dst), "std $rS, $dst", IIC_LdStSTD, - [(DSFormStore i64:$rS, iaddrX4:$dst)]>, isPPC64; + [(DSFormStore i64:$rS, iaddrX4:$dst)]>, isPPC64; def STDX : XForm_8_memOp<31, 149, (outs), (ins g8rc:$rS, memrr:$dst), "stdx $rS, $dst", IIC_LdStSTD, [(store i64:$rS, xaddrX4:$dst)]>, isPPC64, @@ -1450,7 +1450,7 @@ def : Pat<(pre_truncsti16 i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff), (STHU8 $rS, iaddroff:$ptroff, $ptrreg)>; def : Pat<(pre_truncsti32 i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff), (STWU8 $rS, iaddroff:$ptroff, $ptrreg)>; -def : Pat<(DSFormPreStore i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff), +def : Pat<(DSFormPreStore i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff), (STDU $rS, iaddroff:$ptroff, $ptrreg)>; def : Pat<(pre_truncsti8 i64:$rS, iPTR:$ptrreg, iPTR:$ptroff), @@ -1468,11 +1468,11 @@ def : Pat<(pre_store i64:$rS, iPTR:$ptrreg, iPTR:$ptroff), // -let PPC970_Unit = 3, hasSideEffects = 0, mayRaiseFPException = 1, +let PPC970_Unit = 3, hasSideEffects = 0, mayRaiseFPException = 1, Uses = [RM] in { // FPU Operations. defm FCFID : XForm_26r<63, 846, (outs f8rc:$frD), (ins f8rc:$frB), "fcfid", "$frD, $frB", IIC_FPGeneral, - [(set f64:$frD, (PPCany_fcfid f64:$frB))]>, isPPC64; + [(set f64:$frD, (PPCany_fcfid f64:$frB))]>, isPPC64; defm FCTID : XForm_26r<63, 814, (outs f8rc:$frD), (ins f8rc:$frB), "fctid", "$frD, $frB", IIC_FPGeneral, []>, isPPC64; @@ -1481,23 +1481,23 @@ defm FCTIDU : XForm_26r<63, 942, (outs f8rc:$frD), (ins f8rc:$frB), []>, isPPC64; defm FCTIDZ : XForm_26r<63, 815, (outs f8rc:$frD), (ins f8rc:$frB), "fctidz", "$frD, $frB", IIC_FPGeneral, - [(set f64:$frD, (PPCany_fctidz f64:$frB))]>, isPPC64; + [(set f64:$frD, (PPCany_fctidz f64:$frB))]>, isPPC64; defm FCFIDU : XForm_26r<63, 974, (outs f8rc:$frD), (ins f8rc:$frB), "fcfidu", "$frD, $frB", IIC_FPGeneral, - [(set f64:$frD, (PPCany_fcfidu f64:$frB))]>, isPPC64; + [(set f64:$frD, (PPCany_fcfidu f64:$frB))]>, isPPC64; defm FCFIDS : XForm_26r<59, 846, (outs f4rc:$frD), (ins f8rc:$frB), "fcfids", "$frD, $frB", IIC_FPGeneral, - [(set f32:$frD, (PPCany_fcfids f64:$frB))]>, isPPC64; + [(set f32:$frD, (PPCany_fcfids f64:$frB))]>, isPPC64; defm FCFIDUS : XForm_26r<59, 974, (outs f4rc:$frD), (ins f8rc:$frB), "fcfidus", "$frD, $frB", IIC_FPGeneral, - [(set f32:$frD, (PPCany_fcfidus f64:$frB))]>, isPPC64; + [(set f32:$frD, (PPCany_fcfidus f64:$frB))]>, isPPC64; defm FCTIDUZ : XForm_26r<63, 943, (outs f8rc:$frD), (ins f8rc:$frB), "fctiduz", "$frD, $frB", IIC_FPGeneral, - [(set f64:$frD, (PPCany_fctiduz f64:$frB))]>, isPPC64; + [(set f64:$frD, (PPCany_fctiduz f64:$frB))]>, isPPC64; defm FCTIWUZ : XForm_26r<63, 143, (outs f8rc:$frD), (ins f8rc:$frB), "fctiwuz", "$frD, $frB", IIC_FPGeneral, - [(set f64:$frD, (PPCany_fctiwuz f64:$frB))]>, isPPC64; + [(set f64:$frD, (PPCany_fctiwuz f64:$frB))]>, isPPC64; } @@ -1594,11 +1594,11 @@ def : Pat<(add i64:$in, (PPChi tblockaddress:$g, 0)), // Patterns to match r+r indexed loads and stores for // addresses without at least 4-byte alignment. -def : Pat<(i64 (NonDSFormSextLoadi32 xoaddr:$src)), +def : Pat<(i64 (NonDSFormSextLoadi32 xoaddr:$src)), (LWAX xoaddr:$src)>; -def : Pat<(i64 (NonDSFormLoad xoaddr:$src)), +def : Pat<(i64 (NonDSFormLoad xoaddr:$src)), (LDX xoaddr:$src)>; -def : Pat<(NonDSFormStore i64:$rS, xoaddr:$dst), +def : Pat<(NonDSFormStore i64:$rS, xoaddr:$dst), (STDX $rS, xoaddr:$dst)>; // 64-bits atomic loads and stores @@ -1609,11 +1609,11 @@ def : Pat<(atomic_store_64 iaddrX4:$ptr, i64:$val), (STD g8rc:$val, memrix:$ptr def : Pat<(atomic_store_64 xaddrX4:$ptr, i64:$val), (STDX g8rc:$val, memrr:$ptr)>; let Predicates = [IsISA3_0] in { -// DARN (deliver random number) -// L=0 for 32-bit, L=1 for conditioned random, L=2 for raw random -def : Pat<(int_ppc_darn32), (EXTRACT_SUBREG (DARN 0), sub_32)>; -def : Pat<(int_ppc_darn), (DARN 1)>; -def : Pat<(int_ppc_darnraw), (DARN 2)>; +// DARN (deliver random number) +// L=0 for 32-bit, L=1 for conditioned random, L=2 for raw random +def : Pat<(int_ppc_darn32), (EXTRACT_SUBREG (DARN 0), sub_32)>; +def : Pat<(int_ppc_darn), (DARN 1)>; +def : Pat<(int_ppc_darnraw), (DARN 2)>; class X_L1_RA5_RB5<bits<6> opcode, bits<10> xo, string opc, RegisterOperand ty, InstrItinClass itin, list<dag> pattern> diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/PPCInstrAltivec.td b/contrib/libs/llvm12/lib/Target/PowerPC/PPCInstrAltivec.td index b6b1f3d442..1a34aa0931 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/contrib/libs/llvm12/lib/Target/PowerPC/PPCInstrAltivec.td @@ -404,14 +404,14 @@ let isCodeGenOnly = 1 in { Deprecated<DeprecatedDST>; } -let hasSideEffects = 1 in { - def MFVSCR : VXForm_4<1540, (outs vrrc:$vD), (ins), - "mfvscr $vD", IIC_LdStStore, - [(set v8i16:$vD, (int_ppc_altivec_mfvscr))]>; - def MTVSCR : VXForm_5<1604, (outs), (ins vrrc:$vB), - "mtvscr $vB", IIC_LdStLoad, - [(int_ppc_altivec_mtvscr v4i32:$vB)]>; -} +let hasSideEffects = 1 in { + def MFVSCR : VXForm_4<1540, (outs vrrc:$vD), (ins), + "mfvscr $vD", IIC_LdStStore, + [(set v8i16:$vD, (int_ppc_altivec_mfvscr))]>; + def MTVSCR : VXForm_5<1604, (outs), (ins vrrc:$vB), + "mtvscr $vB", IIC_LdStLoad, + [(int_ppc_altivec_mtvscr v4i32:$vB)]>; +} let PPC970_Unit = 2, mayLoad = 1, mayStore = 0 in { // Loads. def LVEBX: XForm_1_memOp<31, 7, (outs vrrc:$vD), (ins memrr:$src), @@ -471,11 +471,11 @@ def VNMSUBFP: VAForm_1<47, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vC, vrrc:$vB), "vnmsubfp $vD, $vA, $vC, $vB", IIC_VecFP, [(set v4f32:$vD, (fneg (fma v4f32:$vA, v4f32:$vC, (fneg v4f32:$vB))))]>; -let hasSideEffects = 1 in { - def VMHADDSHS : VA1a_Int_Ty<32, "vmhaddshs", int_ppc_altivec_vmhaddshs, v8i16>; - def VMHRADDSHS : VA1a_Int_Ty<33, "vmhraddshs", int_ppc_altivec_vmhraddshs, - v8i16>; -} +let hasSideEffects = 1 in { + def VMHADDSHS : VA1a_Int_Ty<32, "vmhaddshs", int_ppc_altivec_vmhaddshs, v8i16>; + def VMHRADDSHS : VA1a_Int_Ty<33, "vmhraddshs", int_ppc_altivec_vmhraddshs, + v8i16>; +} def VMLADDUHM : VA1a_Int_Ty<34, "vmladduhm", int_ppc_altivec_vmladduhm, v8i16>; } // isCommutable @@ -615,12 +615,12 @@ def VMSUMUBM : VA1a_Int_Ty3<36, "vmsumubm", int_ppc_altivec_vmsumubm, v4i32, v16i8, v4i32>; def VMSUMUHM : VA1a_Int_Ty3<38, "vmsumuhm", int_ppc_altivec_vmsumuhm, v4i32, v8i16, v4i32>; -let hasSideEffects = 1 in { - def VMSUMSHS : VA1a_Int_Ty3<41, "vmsumshs", int_ppc_altivec_vmsumshs, - v4i32, v8i16, v4i32>; - def VMSUMUHS : VA1a_Int_Ty3<39, "vmsumuhs", int_ppc_altivec_vmsumuhs, - v4i32, v8i16, v4i32>; -} +let hasSideEffects = 1 in { + def VMSUMSHS : VA1a_Int_Ty3<41, "vmsumshs", int_ppc_altivec_vmsumshs, + v4i32, v8i16, v4i32>; + def VMSUMUHS : VA1a_Int_Ty3<39, "vmsumuhs", int_ppc_altivec_vmsumuhs, + v4i32, v8i16, v4i32>; +} let isCommutable = 1 in { def VMULESB : VX1_Int_Ty2<776, "vmulesb", int_ppc_altivec_vmulesb, @@ -670,17 +670,17 @@ def VSUBUBS : VX1_Int_Ty<1536, "vsububs" , int_ppc_altivec_vsububs, v16i8>; def VSUBUHS : VX1_Int_Ty<1600, "vsubuhs" , int_ppc_altivec_vsubuhs, v8i16>; def VSUBUWS : VX1_Int_Ty<1664, "vsubuws" , int_ppc_altivec_vsubuws, v4i32>; -let hasSideEffects = 1 in { - def VSUMSWS : VX1_Int_Ty<1928, "vsumsws" , int_ppc_altivec_vsumsws, v4i32>; - def VSUM2SWS: VX1_Int_Ty<1672, "vsum2sws", int_ppc_altivec_vsum2sws, v4i32>; +let hasSideEffects = 1 in { + def VSUMSWS : VX1_Int_Ty<1928, "vsumsws" , int_ppc_altivec_vsumsws, v4i32>; + def VSUM2SWS: VX1_Int_Ty<1672, "vsum2sws", int_ppc_altivec_vsum2sws, v4i32>; - def VSUM4SBS: VX1_Int_Ty3<1800, "vsum4sbs", int_ppc_altivec_vsum4sbs, - v4i32, v16i8, v4i32>; - def VSUM4SHS: VX1_Int_Ty3<1608, "vsum4shs", int_ppc_altivec_vsum4shs, - v4i32, v8i16, v4i32>; - def VSUM4UBS: VX1_Int_Ty3<1544, "vsum4ubs", int_ppc_altivec_vsum4ubs, - v4i32, v16i8, v4i32>; -} + def VSUM4SBS: VX1_Int_Ty3<1800, "vsum4sbs", int_ppc_altivec_vsum4sbs, + v4i32, v16i8, v4i32>; + def VSUM4SHS: VX1_Int_Ty3<1608, "vsum4shs", int_ppc_altivec_vsum4shs, + v4i32, v8i16, v4i32>; + def VSUM4UBS: VX1_Int_Ty3<1544, "vsum4ubs", int_ppc_altivec_vsum4ubs, + v4i32, v16i8, v4i32>; +} def VNOR : VXForm_1<1284, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), "vnor $vD, $vA, $vB", IIC_VecFP, @@ -749,20 +749,20 @@ def VSPLTISW : VXForm_3<908, (outs vrrc:$vD), (ins s5imm:$SIMM), // Vector Pack. def VPKPX : VX1_Int_Ty2<782, "vpkpx", int_ppc_altivec_vpkpx, v8i16, v4i32>; -let hasSideEffects = 1 in { - def VPKSHSS : VX1_Int_Ty2<398, "vpkshss", int_ppc_altivec_vpkshss, - v16i8, v8i16>; - def VPKSHUS : VX1_Int_Ty2<270, "vpkshus", int_ppc_altivec_vpkshus, - v16i8, v8i16>; - def VPKSWSS : VX1_Int_Ty2<462, "vpkswss", int_ppc_altivec_vpkswss, - v8i16, v4i32>; - def VPKSWUS : VX1_Int_Ty2<334, "vpkswus", int_ppc_altivec_vpkswus, - v8i16, v4i32>; - def VPKUHUS : VX1_Int_Ty2<142, "vpkuhus", int_ppc_altivec_vpkuhus, - v16i8, v8i16>; - def VPKUWUS : VX1_Int_Ty2<206, "vpkuwus", int_ppc_altivec_vpkuwus, - v8i16, v4i32>; -} +let hasSideEffects = 1 in { + def VPKSHSS : VX1_Int_Ty2<398, "vpkshss", int_ppc_altivec_vpkshss, + v16i8, v8i16>; + def VPKSHUS : VX1_Int_Ty2<270, "vpkshus", int_ppc_altivec_vpkshus, + v16i8, v8i16>; + def VPKSWSS : VX1_Int_Ty2<462, "vpkswss", int_ppc_altivec_vpkswss, + v8i16, v4i32>; + def VPKSWUS : VX1_Int_Ty2<334, "vpkswus", int_ppc_altivec_vpkswus, + v8i16, v4i32>; + def VPKUHUS : VX1_Int_Ty2<142, "vpkuhus", int_ppc_altivec_vpkuhus, + v16i8, v8i16>; + def VPKUWUS : VX1_Int_Ty2<206, "vpkuwus", int_ppc_altivec_vpkuwus, + v8i16, v4i32>; +} def VPKUHUM : VXForm_1<14, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), "vpkuhum $vD, $vA, $vB", IIC_VecFP, [(set v16i8:$vD, @@ -793,47 +793,47 @@ class VCMP<bits<10> xo, string asmstr, ValueType Ty> : VXRForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), asmstr, IIC_VecFPCompare, [(set Ty:$vD, (Ty (PPCvcmp Ty:$vA, Ty:$vB, xo)))]>; -class VCMP_rec<bits<10> xo, string asmstr, ValueType Ty> +class VCMP_rec<bits<10> xo, string asmstr, ValueType Ty> : VXRForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), asmstr, IIC_VecFPCompare, - [(set Ty:$vD, (Ty (PPCvcmp_rec Ty:$vA, Ty:$vB, xo)))]> { + [(set Ty:$vD, (Ty (PPCvcmp_rec Ty:$vA, Ty:$vB, xo)))]> { let Defs = [CR6]; let RC = 1; } // f32 element comparisons.0 def VCMPBFP : VCMP <966, "vcmpbfp $vD, $vA, $vB" , v4f32>; -def VCMPBFP_rec : VCMP_rec<966, "vcmpbfp. $vD, $vA, $vB" , v4f32>; +def VCMPBFP_rec : VCMP_rec<966, "vcmpbfp. $vD, $vA, $vB" , v4f32>; def VCMPEQFP : VCMP <198, "vcmpeqfp $vD, $vA, $vB" , v4f32>; -def VCMPEQFP_rec : VCMP_rec<198, "vcmpeqfp. $vD, $vA, $vB", v4f32>; +def VCMPEQFP_rec : VCMP_rec<198, "vcmpeqfp. $vD, $vA, $vB", v4f32>; def VCMPGEFP : VCMP <454, "vcmpgefp $vD, $vA, $vB" , v4f32>; -def VCMPGEFP_rec : VCMP_rec<454, "vcmpgefp. $vD, $vA, $vB", v4f32>; +def VCMPGEFP_rec : VCMP_rec<454, "vcmpgefp. $vD, $vA, $vB", v4f32>; def VCMPGTFP : VCMP <710, "vcmpgtfp $vD, $vA, $vB" , v4f32>; -def VCMPGTFP_rec : VCMP_rec<710, "vcmpgtfp. $vD, $vA, $vB", v4f32>; +def VCMPGTFP_rec : VCMP_rec<710, "vcmpgtfp. $vD, $vA, $vB", v4f32>; // i8 element comparisons. def VCMPEQUB : VCMP < 6, "vcmpequb $vD, $vA, $vB" , v16i8>; -def VCMPEQUB_rec : VCMP_rec< 6, "vcmpequb. $vD, $vA, $vB", v16i8>; +def VCMPEQUB_rec : VCMP_rec< 6, "vcmpequb. $vD, $vA, $vB", v16i8>; def VCMPGTSB : VCMP <774, "vcmpgtsb $vD, $vA, $vB" , v16i8>; -def VCMPGTSB_rec : VCMP_rec<774, "vcmpgtsb. $vD, $vA, $vB", v16i8>; +def VCMPGTSB_rec : VCMP_rec<774, "vcmpgtsb. $vD, $vA, $vB", v16i8>; def VCMPGTUB : VCMP <518, "vcmpgtub $vD, $vA, $vB" , v16i8>; -def VCMPGTUB_rec : VCMP_rec<518, "vcmpgtub. $vD, $vA, $vB", v16i8>; +def VCMPGTUB_rec : VCMP_rec<518, "vcmpgtub. $vD, $vA, $vB", v16i8>; // i16 element comparisons. def VCMPEQUH : VCMP < 70, "vcmpequh $vD, $vA, $vB" , v8i16>; -def VCMPEQUH_rec : VCMP_rec< 70, "vcmpequh. $vD, $vA, $vB", v8i16>; +def VCMPEQUH_rec : VCMP_rec< 70, "vcmpequh. $vD, $vA, $vB", v8i16>; def VCMPGTSH : VCMP <838, "vcmpgtsh $vD, $vA, $vB" , v8i16>; -def VCMPGTSH_rec : VCMP_rec<838, "vcmpgtsh. $vD, $vA, $vB", v8i16>; +def VCMPGTSH_rec : VCMP_rec<838, "vcmpgtsh. $vD, $vA, $vB", v8i16>; def VCMPGTUH : VCMP <582, "vcmpgtuh $vD, $vA, $vB" , v8i16>; -def VCMPGTUH_rec : VCMP_rec<582, "vcmpgtuh. $vD, $vA, $vB", v8i16>; +def VCMPGTUH_rec : VCMP_rec<582, "vcmpgtuh. $vD, $vA, $vB", v8i16>; // i32 element comparisons. def VCMPEQUW : VCMP <134, "vcmpequw $vD, $vA, $vB" , v4i32>; -def VCMPEQUW_rec : VCMP_rec<134, "vcmpequw. $vD, $vA, $vB", v4i32>; +def VCMPEQUW_rec : VCMP_rec<134, "vcmpequw. $vD, $vA, $vB", v4i32>; def VCMPGTSW : VCMP <902, "vcmpgtsw $vD, $vA, $vB" , v4i32>; -def VCMPGTSW_rec : VCMP_rec<902, "vcmpgtsw. $vD, $vA, $vB", v4i32>; +def VCMPGTSW_rec : VCMP_rec<902, "vcmpgtsw. $vD, $vA, $vB", v4i32>; def VCMPGTUW : VCMP <646, "vcmpgtuw $vD, $vA, $vB" , v4i32>; -def VCMPGTUW_rec : VCMP_rec<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>; +def VCMPGTUW_rec : VCMP_rec<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>; let isCodeGenOnly = 1, isMoveImm = 1, isAsCheapAsAMove = 1, isReMaterializable = 1 in { @@ -942,18 +942,18 @@ def : Pat<(v1i128 (bitconvert (v4i32 VRRC:$src))), (v1i128 VRRC:$src)>; def : Pat<(v1i128 (bitconvert (v4f32 VRRC:$src))), (v1i128 VRRC:$src)>; def : Pat<(v1i128 (bitconvert (v2i64 VRRC:$src))), (v1i128 VRRC:$src)>; -def : Pat<(f128 (bitconvert (v16i8 VRRC:$src))), (f128 VRRC:$src)>; -def : Pat<(f128 (bitconvert (v8i16 VRRC:$src))), (f128 VRRC:$src)>; -def : Pat<(f128 (bitconvert (v4i32 VRRC:$src))), (f128 VRRC:$src)>; -def : Pat<(f128 (bitconvert (v4f32 VRRC:$src))), (f128 VRRC:$src)>; -def : Pat<(f128 (bitconvert (v2f64 VRRC:$src))), (f128 VRRC:$src)>; - -def : Pat<(v16i8 (bitconvert (f128 VRRC:$src))), (v16i8 VRRC:$src)>; -def : Pat<(v8i16 (bitconvert (f128 VRRC:$src))), (v8i16 VRRC:$src)>; -def : Pat<(v4i32 (bitconvert (f128 VRRC:$src))), (v4i32 VRRC:$src)>; -def : Pat<(v4f32 (bitconvert (f128 VRRC:$src))), (v4f32 VRRC:$src)>; -def : Pat<(v2f64 (bitconvert (f128 VRRC:$src))), (v2f64 VRRC:$src)>; - +def : Pat<(f128 (bitconvert (v16i8 VRRC:$src))), (f128 VRRC:$src)>; +def : Pat<(f128 (bitconvert (v8i16 VRRC:$src))), (f128 VRRC:$src)>; +def : Pat<(f128 (bitconvert (v4i32 VRRC:$src))), (f128 VRRC:$src)>; +def : Pat<(f128 (bitconvert (v4f32 VRRC:$src))), (f128 VRRC:$src)>; +def : Pat<(f128 (bitconvert (v2f64 VRRC:$src))), (f128 VRRC:$src)>; + +def : Pat<(v16i8 (bitconvert (f128 VRRC:$src))), (v16i8 VRRC:$src)>; +def : Pat<(v8i16 (bitconvert (f128 VRRC:$src))), (v8i16 VRRC:$src)>; +def : Pat<(v4i32 (bitconvert (f128 VRRC:$src))), (v4i32 VRRC:$src)>; +def : Pat<(v4f32 (bitconvert (f128 VRRC:$src))), (v4f32 VRRC:$src)>; +def : Pat<(v2f64 (bitconvert (f128 VRRC:$src))), (v2f64 VRRC:$src)>; + // Max/Min def : Pat<(v16i8 (umax v16i8:$src1, v16i8:$src2)), (v16i8 (VMAXUB $src1, $src2))>; @@ -1312,11 +1312,11 @@ def VORC : VXForm_1<1348, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), // i64 element comparisons. def VCMPEQUD : VCMP <199, "vcmpequd $vD, $vA, $vB" , v2i64>; -def VCMPEQUD_rec : VCMP_rec<199, "vcmpequd. $vD, $vA, $vB", v2i64>; +def VCMPEQUD_rec : VCMP_rec<199, "vcmpequd. $vD, $vA, $vB", v2i64>; def VCMPGTSD : VCMP <967, "vcmpgtsd $vD, $vA, $vB" , v2i64>; -def VCMPGTSD_rec : VCMP_rec<967, "vcmpgtsd. $vD, $vA, $vB", v2i64>; +def VCMPGTSD_rec : VCMP_rec<967, "vcmpgtsd. $vD, $vA, $vB", v2i64>; def VCMPGTUD : VCMP <711, "vcmpgtud $vD, $vA, $vB" , v2i64>; -def VCMPGTUD_rec : VCMP_rec<711, "vcmpgtud. $vD, $vA, $vB", v2i64>; +def VCMPGTUD_rec : VCMP_rec<711, "vcmpgtud. $vD, $vA, $vB", v2i64>; // The cryptography instructions that do not require Category:Vector.Crypto def VPMSUMB : VX1_Int_Ty<1032, "vpmsumb", @@ -1327,18 +1327,18 @@ def VPMSUMW : VX1_Int_Ty<1160, "vpmsumw", int_ppc_altivec_crypto_vpmsumw, v4i32>; def VPMSUMD : VX1_Int_Ty<1224, "vpmsumd", int_ppc_altivec_crypto_vpmsumd, v2i64>; -def VPERMXOR : VAForm_1<45, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB, vrrc:$VC), - "vpermxor $VD, $VA, $VB, $VC", IIC_VecFP, []>; +def VPERMXOR : VAForm_1<45, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB, vrrc:$VC), + "vpermxor $VD, $VA, $VB, $VC", IIC_VecFP, []>; // Vector doubleword integer pack and unpack. -let hasSideEffects = 1 in { - def VPKSDSS : VX1_Int_Ty2<1486, "vpksdss", int_ppc_altivec_vpksdss, - v4i32, v2i64>; - def VPKSDUS : VX1_Int_Ty2<1358, "vpksdus", int_ppc_altivec_vpksdus, - v4i32, v2i64>; - def VPKUDUS : VX1_Int_Ty2<1230, "vpkudus", int_ppc_altivec_vpkudus, - v4i32, v2i64>; -} +let hasSideEffects = 1 in { + def VPKSDSS : VX1_Int_Ty2<1486, "vpksdss", int_ppc_altivec_vpksdss, + v4i32, v2i64>; + def VPKSDUS : VX1_Int_Ty2<1358, "vpksdus", int_ppc_altivec_vpksdus, + v4i32, v2i64>; + def VPKUDUS : VX1_Int_Ty2<1230, "vpkudus", int_ppc_altivec_vpkudus, + v4i32, v2i64>; +} def VPKUDUM : VXForm_1<1102, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), "vpkudum $vD, $vA, $vB", IIC_VecFP, [(set v16i8:$vD, @@ -1386,21 +1386,21 @@ def VMSUMUDM : VA1a_Int_Ty3<35, "vmsumudm", int_ppc_altivec_vmsumudm, // i8 element comparisons. def VCMPNEB : VCMP < 7, "vcmpneb $vD, $vA, $vB" , v16i8>; -def VCMPNEB_rec : VCMP_rec < 7, "vcmpneb. $vD, $vA, $vB" , v16i8>; +def VCMPNEB_rec : VCMP_rec < 7, "vcmpneb. $vD, $vA, $vB" , v16i8>; def VCMPNEZB : VCMP <263, "vcmpnezb $vD, $vA, $vB" , v16i8>; -def VCMPNEZB_rec : VCMP_rec<263, "vcmpnezb. $vD, $vA, $vB", v16i8>; +def VCMPNEZB_rec : VCMP_rec<263, "vcmpnezb. $vD, $vA, $vB", v16i8>; // i16 element comparisons. def VCMPNEH : VCMP < 71, "vcmpneh $vD, $vA, $vB" , v8i16>; -def VCMPNEH_rec : VCMP_rec< 71, "vcmpneh. $vD, $vA, $vB" , v8i16>; +def VCMPNEH_rec : VCMP_rec< 71, "vcmpneh. $vD, $vA, $vB" , v8i16>; def VCMPNEZH : VCMP <327, "vcmpnezh $vD, $vA, $vB" , v8i16>; -def VCMPNEZH_rec : VCMP_rec<327, "vcmpnezh. $vD, $vA, $vB", v8i16>; +def VCMPNEZH_rec : VCMP_rec<327, "vcmpnezh. $vD, $vA, $vB", v8i16>; // i32 element comparisons. def VCMPNEW : VCMP <135, "vcmpnew $vD, $vA, $vB" , v4i32>; -def VCMPNEW_rec : VCMP_rec<135, "vcmpnew. $vD, $vA, $vB" , v4i32>; +def VCMPNEW_rec : VCMP_rec<135, "vcmpnew. $vD, $vA, $vB" , v4i32>; def VCMPNEZW : VCMP <391, "vcmpnezw $vD, $vA, $vB" , v4i32>; -def VCMPNEZW_rec : VCMP_rec<391, "vcmpnezw. $vD, $vA, $vB", v4i32>; +def VCMPNEZW_rec : VCMP_rec<391, "vcmpnezw. $vD, $vA, $vB", v4i32>; // VX-Form: [PO VRT / UIM VRB XO]. // We use VXForm_1 to implement it, that is, we use "VRA" (5 bit) to represent @@ -1472,16 +1472,16 @@ def VCTZD : VX_VT5_EO5_VB5<1538, 31, "vctzd", [(set v2i64:$vD, (cttz v2i64:$vB))]>; // Vector Extend Sign -def VEXTSB2W : VX_VT5_EO5_VB5<1538, 16, "vextsb2w", - [(set v4i32:$vD, (int_ppc_altivec_vextsb2w v16i8:$vB))]>; -def VEXTSH2W : VX_VT5_EO5_VB5<1538, 17, "vextsh2w", - [(set v4i32:$vD, (int_ppc_altivec_vextsh2w v8i16:$vB))]>; -def VEXTSB2D : VX_VT5_EO5_VB5<1538, 24, "vextsb2d", - [(set v2i64:$vD, (int_ppc_altivec_vextsb2d v16i8:$vB))]>; -def VEXTSH2D : VX_VT5_EO5_VB5<1538, 25, "vextsh2d", - [(set v2i64:$vD, (int_ppc_altivec_vextsh2d v8i16:$vB))]>; -def VEXTSW2D : VX_VT5_EO5_VB5<1538, 26, "vextsw2d", - [(set v2i64:$vD, (int_ppc_altivec_vextsw2d v4i32:$vB))]>; +def VEXTSB2W : VX_VT5_EO5_VB5<1538, 16, "vextsb2w", + [(set v4i32:$vD, (int_ppc_altivec_vextsb2w v16i8:$vB))]>; +def VEXTSH2W : VX_VT5_EO5_VB5<1538, 17, "vextsh2w", + [(set v4i32:$vD, (int_ppc_altivec_vextsh2w v8i16:$vB))]>; +def VEXTSB2D : VX_VT5_EO5_VB5<1538, 24, "vextsb2d", + [(set v2i64:$vD, (int_ppc_altivec_vextsb2d v16i8:$vB))]>; +def VEXTSH2D : VX_VT5_EO5_VB5<1538, 25, "vextsh2d", + [(set v2i64:$vD, (int_ppc_altivec_vextsh2d v8i16:$vB))]>; +def VEXTSW2D : VX_VT5_EO5_VB5<1538, 26, "vextsw2d", + [(set v2i64:$vD, (int_ppc_altivec_vextsw2d v4i32:$vB))]>; let isCodeGenOnly = 1 in { def VEXTSB2Ws : VX_VT5_EO5_VB5s<1538, 16, "vextsb2w", []>; def VEXTSH2Ws : VX_VT5_EO5_VB5s<1538, 17, "vextsh2w", []>; diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/PPCInstrFormats.td b/contrib/libs/llvm12/lib/Target/PowerPC/PPCInstrFormats.td index 0acd55b12d..646efe64a2 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/PPCInstrFormats.td +++ b/contrib/libs/llvm12/lib/Target/PowerPC/PPCInstrFormats.td @@ -637,10 +637,10 @@ class XForm_17<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, } class XForm_17a<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, - InstrItinClass itin, list<dag> pattern> + InstrItinClass itin, list<dag> pattern> : XForm_17<opcode, xo, OOL, IOL, asmstr, itin > { let FRA = 0; - let Pattern = pattern; + let Pattern = pattern; } class XForm_18<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/PPCInstrHTM.td b/contrib/libs/llvm12/lib/Target/PowerPC/PPCInstrHTM.td index 0504695469..e59a08774d 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/PPCInstrHTM.td +++ b/contrib/libs/llvm12/lib/Target/PowerPC/PPCInstrHTM.td @@ -164,8 +164,8 @@ def : Pat<(int_ppc_tsuspend), (TSR 0)>; def : Pat<(i64 (int_ppc_ttest)), - (i64 (INSERT_SUBREG - (i64 (IMPLICIT_DEF)), (TABORTWCI 0, (LI 0), 0), sub_32))>; + (i64 (INSERT_SUBREG + (i64 (IMPLICIT_DEF)), (TABORTWCI 0, (LI 0), 0), sub_32))>; } // [HasHTM] diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/PPCInstrInfo.cpp b/contrib/libs/llvm12/lib/Target/PowerPC/PPCInstrInfo.cpp index 4b23a63eb0..9e3c6c569b 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/contrib/libs/llvm12/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -21,15 +21,15 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveIntervals.h" -#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" -#include "llvm/CodeGen/RegisterClassInfo.h" -#include "llvm/CodeGen/RegisterPressure.h" +#include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/RegisterPressure.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/StackMaps.h" @@ -76,14 +76,14 @@ static cl::opt<bool> UseOldLatencyCalc("ppc-old-latency-calc", cl::Hidden, cl::desc("Use the old (incorrect) instruction latency calculation")); -static cl::opt<float> - FMARPFactor("ppc-fma-rp-factor", cl::Hidden, cl::init(1.5), - cl::desc("register pressure factor for the transformations.")); - -static cl::opt<bool> EnableFMARegPressureReduction( - "ppc-fma-rp-reduction", cl::Hidden, cl::init(true), - cl::desc("enable register pressure reduce in machine combiner pass.")); - +static cl::opt<float> + FMARPFactor("ppc-fma-rp-factor", cl::Hidden, cl::init(1.5), + cl::desc("register pressure factor for the transformations.")); + +static cl::opt<bool> EnableFMARegPressureReduction( + "ppc-fma-rp-reduction", cl::Hidden, cl::init(true), + cl::desc("enable register pressure reduce in machine combiner pass.")); + // Pin the vtable to this file. void PPCInstrInfo::anchor() {} @@ -289,23 +289,23 @@ bool PPCInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const { #define InfoArrayIdxFMULInst 2 #define InfoArrayIdxAddOpIdx 3 #define InfoArrayIdxMULOpIdx 4 -#define InfoArrayIdxFSubInst 5 +#define InfoArrayIdxFSubInst 5 // Array keeps info for FMA instructions: // Index 0(InfoArrayIdxFMAInst): FMA instruction; -// Index 1(InfoArrayIdxFAddInst): ADD instruction associated with FMA; -// Index 2(InfoArrayIdxFMULInst): MUL instruction associated with FMA; +// Index 1(InfoArrayIdxFAddInst): ADD instruction associated with FMA; +// Index 2(InfoArrayIdxFMULInst): MUL instruction associated with FMA; // Index 3(InfoArrayIdxAddOpIdx): ADD operand index in FMA operands; // Index 4(InfoArrayIdxMULOpIdx): first MUL operand index in FMA operands; -// second MUL operand index is plus 1; -// Index 5(InfoArrayIdxFSubInst): SUB instruction associated with FMA. -static const uint16_t FMAOpIdxInfo[][6] = { +// second MUL operand index is plus 1; +// Index 5(InfoArrayIdxFSubInst): SUB instruction associated with FMA. +static const uint16_t FMAOpIdxInfo[][6] = { // FIXME: Add more FMA instructions like XSNMADDADP and so on. - {PPC::XSMADDADP, PPC::XSADDDP, PPC::XSMULDP, 1, 2, PPC::XSSUBDP}, - {PPC::XSMADDASP, PPC::XSADDSP, PPC::XSMULSP, 1, 2, PPC::XSSUBSP}, - {PPC::XVMADDADP, PPC::XVADDDP, PPC::XVMULDP, 1, 2, PPC::XVSUBDP}, - {PPC::XVMADDASP, PPC::XVADDSP, PPC::XVMULSP, 1, 2, PPC::XVSUBSP}, - {PPC::FMADD, PPC::FADD, PPC::FMUL, 3, 1, PPC::FSUB}, - {PPC::FMADDS, PPC::FADDS, PPC::FMULS, 3, 1, PPC::FSUBS}}; + {PPC::XSMADDADP, PPC::XSADDDP, PPC::XSMULDP, 1, 2, PPC::XSSUBDP}, + {PPC::XSMADDASP, PPC::XSADDSP, PPC::XSMULSP, 1, 2, PPC::XSSUBSP}, + {PPC::XVMADDADP, PPC::XVADDDP, PPC::XVMULDP, 1, 2, PPC::XVSUBDP}, + {PPC::XVMADDASP, PPC::XVADDSP, PPC::XVMULSP, 1, 2, PPC::XVSUBSP}, + {PPC::FMADD, PPC::FADD, PPC::FMUL, 3, 1, PPC::FSUB}, + {PPC::FMADDS, PPC::FADDS, PPC::FMULS, 3, 1, PPC::FSUBS}}; // Check if an opcode is a FMA instruction. If it is, return the index in array // FMAOpIdxInfo. Otherwise, return -1. @@ -316,8 +316,8 @@ int16_t PPCInstrInfo::getFMAOpIdxInfo(unsigned Opcode) const { return -1; } -// On PowerPC target, we have two kinds of patterns related to FMA: -// 1: Improve ILP. +// On PowerPC target, we have two kinds of patterns related to FMA: +// 1: Improve ILP. // Try to reassociate FMA chains like below: // // Pattern 1: @@ -341,35 +341,35 @@ int16_t PPCInstrInfo::getFMAOpIdxInfo(unsigned Opcode) const { // // breaking the dependency between A and B, allowing FMA to be executed in // parallel (or back-to-back in a pipeline) instead of depending on each other. -// -// 2: Reduce register pressure. -// Try to reassociate FMA with FSUB and a constant like below: -// C is a floatint point const. -// -// Pattern 1: -// A = FSUB X, Y (Leaf) -// D = FMA B, C, A (Root) -// --> -// A = FMA B, Y, -C -// D = FMA A, X, C -// -// Pattern 2: -// A = FSUB X, Y (Leaf) -// D = FMA B, A, C (Root) -// --> -// A = FMA B, Y, -C -// D = FMA A, X, C -// -// Before the transformation, A must be assigned with different hardware -// register with D. After the transformation, A and D must be assigned with -// same hardware register due to TIE attricute of FMA instructions. -// +// +// 2: Reduce register pressure. +// Try to reassociate FMA with FSUB and a constant like below: +// C is a floatint point const. +// +// Pattern 1: +// A = FSUB X, Y (Leaf) +// D = FMA B, C, A (Root) +// --> +// A = FMA B, Y, -C +// D = FMA A, X, C +// +// Pattern 2: +// A = FSUB X, Y (Leaf) +// D = FMA B, A, C (Root) +// --> +// A = FMA B, Y, -C +// D = FMA A, X, C +// +// Before the transformation, A must be assigned with different hardware +// register with D. After the transformation, A and D must be assigned with +// same hardware register due to TIE attricute of FMA instructions. +// bool PPCInstrInfo::getFMAPatterns( - MachineInstr &Root, SmallVectorImpl<MachineCombinerPattern> &Patterns, - bool DoRegPressureReduce) const { + MachineInstr &Root, SmallVectorImpl<MachineCombinerPattern> &Patterns, + bool DoRegPressureReduce) const { MachineBasicBlock *MBB = Root.getParent(); - const MachineRegisterInfo *MRI = &MBB->getParent()->getRegInfo(); - const TargetRegisterInfo *TRI = &getRegisterInfo(); + const MachineRegisterInfo *MRI = &MBB->getParent()->getRegInfo(); + const TargetRegisterInfo *TRI = &getRegisterInfo(); auto IsAllOpsVirtualReg = [](const MachineInstr &Instr) { for (const auto &MO : Instr.explicit_operands()) @@ -378,10 +378,35 @@ bool PPCInstrInfo::getFMAPatterns( return true; }; - auto IsReassociableAddOrSub = [&](const MachineInstr &Instr, - unsigned OpType) { - if (Instr.getOpcode() != - FMAOpIdxInfo[getFMAOpIdxInfo(Root.getOpcode())][OpType]) + auto IsReassociableAddOrSub = [&](const MachineInstr &Instr, + unsigned OpType) { + if (Instr.getOpcode() != + FMAOpIdxInfo[getFMAOpIdxInfo(Root.getOpcode())][OpType]) + return false; + + // Instruction can be reassociated. + // fast math flags may prohibit reassociation. + if (!(Instr.getFlag(MachineInstr::MIFlag::FmReassoc) && + Instr.getFlag(MachineInstr::MIFlag::FmNsz))) + return false; + + // Instruction operands are virtual registers for reassociation. + if (!IsAllOpsVirtualReg(Instr)) + return false; + + // For register pressure reassociation, the FSub must have only one use as + // we want to delete the sub to save its def. + if (OpType == InfoArrayIdxFSubInst && + !MRI->hasOneNonDBGUse(Instr.getOperand(0).getReg())) + return false; + + return true; + }; + + auto IsReassociableFMA = [&](const MachineInstr &Instr, int16_t &AddOpIdx, + int16_t &MulOpIdx, bool IsLeaf) { + int16_t Idx = getFMAOpIdxInfo(Instr.getOpcode()); + if (Idx < 0) return false; // Instruction can be reassociated. @@ -394,381 +419,356 @@ bool PPCInstrInfo::getFMAPatterns( if (!IsAllOpsVirtualReg(Instr)) return false; - // For register pressure reassociation, the FSub must have only one use as - // we want to delete the sub to save its def. - if (OpType == InfoArrayIdxFSubInst && - !MRI->hasOneNonDBGUse(Instr.getOperand(0).getReg())) - return false; - - return true; - }; - - auto IsReassociableFMA = [&](const MachineInstr &Instr, int16_t &AddOpIdx, - int16_t &MulOpIdx, bool IsLeaf) { - int16_t Idx = getFMAOpIdxInfo(Instr.getOpcode()); - if (Idx < 0) - return false; - - // Instruction can be reassociated. - // fast math flags may prohibit reassociation. - if (!(Instr.getFlag(MachineInstr::MIFlag::FmReassoc) && - Instr.getFlag(MachineInstr::MIFlag::FmNsz))) - return false; - - // Instruction operands are virtual registers for reassociation. - if (!IsAllOpsVirtualReg(Instr)) - return false; - - MulOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxMULOpIdx]; - if (IsLeaf) + MulOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxMULOpIdx]; + if (IsLeaf) return true; AddOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxAddOpIdx]; const MachineOperand &OpAdd = Instr.getOperand(AddOpIdx); - MachineInstr *MIAdd = MRI->getUniqueVRegDef(OpAdd.getReg()); + MachineInstr *MIAdd = MRI->getUniqueVRegDef(OpAdd.getReg()); // If 'add' operand's def is not in current block, don't do ILP related opt. if (!MIAdd || MIAdd->getParent() != MBB) return false; // If this is not Leaf FMA Instr, its 'add' operand should only have one use // as this fma will be changed later. - return IsLeaf ? true : MRI->hasOneNonDBGUse(OpAdd.getReg()); + return IsLeaf ? true : MRI->hasOneNonDBGUse(OpAdd.getReg()); }; int16_t AddOpIdx = -1; - int16_t MulOpIdx = -1; - - bool IsUsedOnceL = false; - bool IsUsedOnceR = false; - MachineInstr *MULInstrL = nullptr; - MachineInstr *MULInstrR = nullptr; - - auto IsRPReductionCandidate = [&]() { - // Currently, we only support float and double. - // FIXME: add support for other types. - unsigned Opcode = Root.getOpcode(); - if (Opcode != PPC::XSMADDASP && Opcode != PPC::XSMADDADP) - return false; - - // Root must be a valid FMA like instruction. - // Treat it as leaf as we don't care its add operand. - if (IsReassociableFMA(Root, AddOpIdx, MulOpIdx, true)) { - assert((MulOpIdx >= 0) && "mul operand index not right!"); - Register MULRegL = TRI->lookThruSingleUseCopyChain( - Root.getOperand(MulOpIdx).getReg(), MRI); - Register MULRegR = TRI->lookThruSingleUseCopyChain( - Root.getOperand(MulOpIdx + 1).getReg(), MRI); - if (!MULRegL && !MULRegR) - return false; - - if (MULRegL && !MULRegR) { - MULRegR = - TRI->lookThruCopyLike(Root.getOperand(MulOpIdx + 1).getReg(), MRI); - IsUsedOnceL = true; - } else if (!MULRegL && MULRegR) { - MULRegL = - TRI->lookThruCopyLike(Root.getOperand(MulOpIdx).getReg(), MRI); - IsUsedOnceR = true; - } else { - IsUsedOnceL = true; - IsUsedOnceR = true; - } - - if (!Register::isVirtualRegister(MULRegL) || - !Register::isVirtualRegister(MULRegR)) - return false; - - MULInstrL = MRI->getVRegDef(MULRegL); - MULInstrR = MRI->getVRegDef(MULRegR); - return true; - } - return false; - }; - - // Register pressure fma reassociation patterns. - if (DoRegPressureReduce && IsRPReductionCandidate()) { - assert((MULInstrL && MULInstrR) && "wrong register preduction candidate!"); - // Register pressure pattern 1 - if (isLoadFromConstantPool(MULInstrL) && IsUsedOnceR && - IsReassociableAddOrSub(*MULInstrR, InfoArrayIdxFSubInst)) { - LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_BCA\n"); - Patterns.push_back(MachineCombinerPattern::REASSOC_XY_BCA); - return true; - } - - // Register pressure pattern 2 - if ((isLoadFromConstantPool(MULInstrR) && IsUsedOnceL && - IsReassociableAddOrSub(*MULInstrL, InfoArrayIdxFSubInst))) { - LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_BAC\n"); - Patterns.push_back(MachineCombinerPattern::REASSOC_XY_BAC); - return true; - } - } - - // ILP fma reassociation patterns. + int16_t MulOpIdx = -1; + + bool IsUsedOnceL = false; + bool IsUsedOnceR = false; + MachineInstr *MULInstrL = nullptr; + MachineInstr *MULInstrR = nullptr; + + auto IsRPReductionCandidate = [&]() { + // Currently, we only support float and double. + // FIXME: add support for other types. + unsigned Opcode = Root.getOpcode(); + if (Opcode != PPC::XSMADDASP && Opcode != PPC::XSMADDADP) + return false; + + // Root must be a valid FMA like instruction. + // Treat it as leaf as we don't care its add operand. + if (IsReassociableFMA(Root, AddOpIdx, MulOpIdx, true)) { + assert((MulOpIdx >= 0) && "mul operand index not right!"); + Register MULRegL = TRI->lookThruSingleUseCopyChain( + Root.getOperand(MulOpIdx).getReg(), MRI); + Register MULRegR = TRI->lookThruSingleUseCopyChain( + Root.getOperand(MulOpIdx + 1).getReg(), MRI); + if (!MULRegL && !MULRegR) + return false; + + if (MULRegL && !MULRegR) { + MULRegR = + TRI->lookThruCopyLike(Root.getOperand(MulOpIdx + 1).getReg(), MRI); + IsUsedOnceL = true; + } else if (!MULRegL && MULRegR) { + MULRegL = + TRI->lookThruCopyLike(Root.getOperand(MulOpIdx).getReg(), MRI); + IsUsedOnceR = true; + } else { + IsUsedOnceL = true; + IsUsedOnceR = true; + } + + if (!Register::isVirtualRegister(MULRegL) || + !Register::isVirtualRegister(MULRegR)) + return false; + + MULInstrL = MRI->getVRegDef(MULRegL); + MULInstrR = MRI->getVRegDef(MULRegR); + return true; + } + return false; + }; + + // Register pressure fma reassociation patterns. + if (DoRegPressureReduce && IsRPReductionCandidate()) { + assert((MULInstrL && MULInstrR) && "wrong register preduction candidate!"); + // Register pressure pattern 1 + if (isLoadFromConstantPool(MULInstrL) && IsUsedOnceR && + IsReassociableAddOrSub(*MULInstrR, InfoArrayIdxFSubInst)) { + LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_BCA\n"); + Patterns.push_back(MachineCombinerPattern::REASSOC_XY_BCA); + return true; + } + + // Register pressure pattern 2 + if ((isLoadFromConstantPool(MULInstrR) && IsUsedOnceL && + IsReassociableAddOrSub(*MULInstrL, InfoArrayIdxFSubInst))) { + LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_BAC\n"); + Patterns.push_back(MachineCombinerPattern::REASSOC_XY_BAC); + return true; + } + } + + // ILP fma reassociation patterns. // Root must be a valid FMA like instruction. - AddOpIdx = -1; - if (!IsReassociableFMA(Root, AddOpIdx, MulOpIdx, false)) + AddOpIdx = -1; + if (!IsReassociableFMA(Root, AddOpIdx, MulOpIdx, false)) return false; assert((AddOpIdx >= 0) && "add operand index not right!"); Register RegB = Root.getOperand(AddOpIdx).getReg(); - MachineInstr *Prev = MRI->getUniqueVRegDef(RegB); + MachineInstr *Prev = MRI->getUniqueVRegDef(RegB); // Prev must be a valid FMA like instruction. AddOpIdx = -1; - if (!IsReassociableFMA(*Prev, AddOpIdx, MulOpIdx, false)) + if (!IsReassociableFMA(*Prev, AddOpIdx, MulOpIdx, false)) return false; assert((AddOpIdx >= 0) && "add operand index not right!"); Register RegA = Prev->getOperand(AddOpIdx).getReg(); - MachineInstr *Leaf = MRI->getUniqueVRegDef(RegA); + MachineInstr *Leaf = MRI->getUniqueVRegDef(RegA); AddOpIdx = -1; - if (IsReassociableFMA(*Leaf, AddOpIdx, MulOpIdx, true)) { + if (IsReassociableFMA(*Leaf, AddOpIdx, MulOpIdx, true)) { Patterns.push_back(MachineCombinerPattern::REASSOC_XMM_AMM_BMM); - LLVM_DEBUG(dbgs() << "add pattern REASSOC_XMM_AMM_BMM\n"); + LLVM_DEBUG(dbgs() << "add pattern REASSOC_XMM_AMM_BMM\n"); return true; } - if (IsReassociableAddOrSub(*Leaf, InfoArrayIdxFAddInst)) { + if (IsReassociableAddOrSub(*Leaf, InfoArrayIdxFAddInst)) { Patterns.push_back(MachineCombinerPattern::REASSOC_XY_AMM_BMM); - LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_AMM_BMM\n"); + LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_AMM_BMM\n"); return true; } return false; } -void PPCInstrInfo::finalizeInsInstrs( - MachineInstr &Root, MachineCombinerPattern &P, - SmallVectorImpl<MachineInstr *> &InsInstrs) const { - assert(!InsInstrs.empty() && "Instructions set to be inserted is empty!"); - - MachineFunction *MF = Root.getMF(); - MachineRegisterInfo *MRI = &MF->getRegInfo(); - const TargetRegisterInfo *TRI = &getRegisterInfo(); - MachineConstantPool *MCP = MF->getConstantPool(); - - int16_t Idx = getFMAOpIdxInfo(Root.getOpcode()); - if (Idx < 0) - return; - - uint16_t FirstMulOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxMULOpIdx]; - - // For now we only need to fix up placeholder for register pressure reduce - // patterns. - Register ConstReg = 0; - switch (P) { - case MachineCombinerPattern::REASSOC_XY_BCA: - ConstReg = - TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx).getReg(), MRI); - break; - case MachineCombinerPattern::REASSOC_XY_BAC: - ConstReg = - TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx + 1).getReg(), MRI); - break; - default: - // Not register pressure reduce patterns. - return; - } - - MachineInstr *ConstDefInstr = MRI->getVRegDef(ConstReg); - // Get const value from const pool. - const Constant *C = getConstantFromConstantPool(ConstDefInstr); - assert(isa<llvm::ConstantFP>(C) && "not a valid constant!"); - - // Get negative fp const. - APFloat F1((dyn_cast<ConstantFP>(C))->getValueAPF()); - F1.changeSign(); - Constant *NegC = ConstantFP::get(dyn_cast<ConstantFP>(C)->getContext(), F1); - Align Alignment = MF->getDataLayout().getPrefTypeAlign(C->getType()); - - // Put negative fp const into constant pool. - unsigned ConstPoolIdx = MCP->getConstantPoolIndex(NegC, Alignment); - - MachineOperand *Placeholder = nullptr; - // Record the placeholder PPC::ZERO8 we add in reassociateFMA. - for (auto *Inst : InsInstrs) { - for (MachineOperand &Operand : Inst->explicit_operands()) { - assert(Operand.isReg() && "Invalid instruction in InsInstrs!"); - if (Operand.getReg() == PPC::ZERO8) { - Placeholder = &Operand; - break; - } - } - } - - assert(Placeholder && "Placeholder does not exist!"); - - // Generate instructions to load the const fp from constant pool. - // We only support PPC64 and medium code model. - Register LoadNewConst = - generateLoadForNewConst(ConstPoolIdx, &Root, C->getType(), InsInstrs); - - // Fill the placeholder with the new load from constant pool. - Placeholder->setReg(LoadNewConst); -} - -bool PPCInstrInfo::shouldReduceRegisterPressure( - MachineBasicBlock *MBB, RegisterClassInfo *RegClassInfo) const { - - if (!EnableFMARegPressureReduction) - return false; - - // Currently, we only enable register pressure reducing in machine combiner - // for: 1: PPC64; 2: Code Model is Medium; 3: Power9 which also has vector - // support. - // - // So we need following instructions to access a TOC entry: - // - // %6:g8rc_and_g8rc_nox0 = ADDIStocHA8 $x2, %const.0 - // %7:vssrc = DFLOADf32 target-flags(ppc-toc-lo) %const.0, - // killed %6:g8rc_and_g8rc_nox0, implicit $x2 :: (load 4 from constant-pool) - // - // FIXME: add more supported targets, like Small and Large code model, PPC32, - // AIX. - if (!(Subtarget.isPPC64() && Subtarget.hasP9Vector() && - Subtarget.getTargetMachine().getCodeModel() == CodeModel::Medium)) - return false; - - const TargetRegisterInfo *TRI = &getRegisterInfo(); - MachineFunction *MF = MBB->getParent(); - MachineRegisterInfo *MRI = &MF->getRegInfo(); - - auto GetMBBPressure = [&](MachineBasicBlock *MBB) -> std::vector<unsigned> { - RegionPressure Pressure; - RegPressureTracker RPTracker(Pressure); - - // Initialize the register pressure tracker. - RPTracker.init(MBB->getParent(), RegClassInfo, nullptr, MBB, MBB->end(), - /*TrackLaneMasks*/ false, /*TrackUntiedDefs=*/true); - - for (MachineBasicBlock::iterator MII = MBB->instr_end(), - MIE = MBB->instr_begin(); - MII != MIE; --MII) { - MachineInstr &MI = *std::prev(MII); - if (MI.isDebugValue() || MI.isDebugLabel()) - continue; - RegisterOperands RegOpers; - RegOpers.collect(MI, *TRI, *MRI, false, false); - RPTracker.recedeSkipDebugValues(); - assert(&*RPTracker.getPos() == &MI && "RPTracker sync error!"); - RPTracker.recede(RegOpers); - } - - // Close the RPTracker to finalize live ins. - RPTracker.closeRegion(); - - return RPTracker.getPressure().MaxSetPressure; - }; - - // For now we only care about float and double type fma. - unsigned VSSRCLimit = TRI->getRegPressureSetLimit( - *MBB->getParent(), PPC::RegisterPressureSets::VSSRC); - - // Only reduce register pressure when pressure is high. - return GetMBBPressure(MBB)[PPC::RegisterPressureSets::VSSRC] > - (float)VSSRCLimit * FMARPFactor; -} - -bool PPCInstrInfo::isLoadFromConstantPool(MachineInstr *I) const { - // I has only one memory operand which is load from constant pool. - if (!I->hasOneMemOperand()) - return false; - - MachineMemOperand *Op = I->memoperands()[0]; - return Op->isLoad() && Op->getPseudoValue() && - Op->getPseudoValue()->kind() == PseudoSourceValue::ConstantPool; -} - -Register PPCInstrInfo::generateLoadForNewConst( - unsigned Idx, MachineInstr *MI, Type *Ty, - SmallVectorImpl<MachineInstr *> &InsInstrs) const { - // Now we only support PPC64, Medium code model and P9 with vector. - // We have immutable pattern to access const pool. See function - // shouldReduceRegisterPressure. - assert((Subtarget.isPPC64() && Subtarget.hasP9Vector() && - Subtarget.getTargetMachine().getCodeModel() == CodeModel::Medium) && - "Target not supported!\n"); - - MachineFunction *MF = MI->getMF(); - MachineRegisterInfo *MRI = &MF->getRegInfo(); - - // Generate ADDIStocHA8 - Register VReg1 = MRI->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass); - MachineInstrBuilder TOCOffset = - BuildMI(*MF, MI->getDebugLoc(), get(PPC::ADDIStocHA8), VReg1) - .addReg(PPC::X2) - .addConstantPoolIndex(Idx); - - assert((Ty->isFloatTy() || Ty->isDoubleTy()) && - "Only float and double are supported!"); - - unsigned LoadOpcode; - // Should be float type or double type. - if (Ty->isFloatTy()) - LoadOpcode = PPC::DFLOADf32; - else - LoadOpcode = PPC::DFLOADf64; - - const TargetRegisterClass *RC = MRI->getRegClass(MI->getOperand(0).getReg()); - Register VReg2 = MRI->createVirtualRegister(RC); - MachineMemOperand *MMO = MF->getMachineMemOperand( - MachinePointerInfo::getConstantPool(*MF), MachineMemOperand::MOLoad, - Ty->getScalarSizeInBits() / 8, MF->getDataLayout().getPrefTypeAlign(Ty)); - - // Generate Load from constant pool. - MachineInstrBuilder Load = - BuildMI(*MF, MI->getDebugLoc(), get(LoadOpcode), VReg2) - .addConstantPoolIndex(Idx) - .addReg(VReg1, getKillRegState(true)) - .addMemOperand(MMO); - - Load->getOperand(1).setTargetFlags(PPCII::MO_TOC_LO); - - // Insert the toc load instructions into InsInstrs. - InsInstrs.insert(InsInstrs.begin(), Load); - InsInstrs.insert(InsInstrs.begin(), TOCOffset); - return VReg2; -} - -// This function returns the const value in constant pool if the \p I is a load -// from constant pool. -const Constant * -PPCInstrInfo::getConstantFromConstantPool(MachineInstr *I) const { - MachineFunction *MF = I->getMF(); - MachineRegisterInfo *MRI = &MF->getRegInfo(); - MachineConstantPool *MCP = MF->getConstantPool(); - assert(I->mayLoad() && "Should be a load instruction.\n"); - for (auto MO : I->uses()) { - if (!MO.isReg()) - continue; - Register Reg = MO.getReg(); - if (Reg == 0 || !Register::isVirtualRegister(Reg)) - continue; - // Find the toc address. - MachineInstr *DefMI = MRI->getVRegDef(Reg); - for (auto MO2 : DefMI->uses()) - if (MO2.isCPI()) - return (MCP->getConstants())[MO2.getIndex()].Val.ConstVal; - } - return nullptr; -} - +void PPCInstrInfo::finalizeInsInstrs( + MachineInstr &Root, MachineCombinerPattern &P, + SmallVectorImpl<MachineInstr *> &InsInstrs) const { + assert(!InsInstrs.empty() && "Instructions set to be inserted is empty!"); + + MachineFunction *MF = Root.getMF(); + MachineRegisterInfo *MRI = &MF->getRegInfo(); + const TargetRegisterInfo *TRI = &getRegisterInfo(); + MachineConstantPool *MCP = MF->getConstantPool(); + + int16_t Idx = getFMAOpIdxInfo(Root.getOpcode()); + if (Idx < 0) + return; + + uint16_t FirstMulOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxMULOpIdx]; + + // For now we only need to fix up placeholder for register pressure reduce + // patterns. + Register ConstReg = 0; + switch (P) { + case MachineCombinerPattern::REASSOC_XY_BCA: + ConstReg = + TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx).getReg(), MRI); + break; + case MachineCombinerPattern::REASSOC_XY_BAC: + ConstReg = + TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx + 1).getReg(), MRI); + break; + default: + // Not register pressure reduce patterns. + return; + } + + MachineInstr *ConstDefInstr = MRI->getVRegDef(ConstReg); + // Get const value from const pool. + const Constant *C = getConstantFromConstantPool(ConstDefInstr); + assert(isa<llvm::ConstantFP>(C) && "not a valid constant!"); + + // Get negative fp const. + APFloat F1((dyn_cast<ConstantFP>(C))->getValueAPF()); + F1.changeSign(); + Constant *NegC = ConstantFP::get(dyn_cast<ConstantFP>(C)->getContext(), F1); + Align Alignment = MF->getDataLayout().getPrefTypeAlign(C->getType()); + + // Put negative fp const into constant pool. + unsigned ConstPoolIdx = MCP->getConstantPoolIndex(NegC, Alignment); + + MachineOperand *Placeholder = nullptr; + // Record the placeholder PPC::ZERO8 we add in reassociateFMA. + for (auto *Inst : InsInstrs) { + for (MachineOperand &Operand : Inst->explicit_operands()) { + assert(Operand.isReg() && "Invalid instruction in InsInstrs!"); + if (Operand.getReg() == PPC::ZERO8) { + Placeholder = &Operand; + break; + } + } + } + + assert(Placeholder && "Placeholder does not exist!"); + + // Generate instructions to load the const fp from constant pool. + // We only support PPC64 and medium code model. + Register LoadNewConst = + generateLoadForNewConst(ConstPoolIdx, &Root, C->getType(), InsInstrs); + + // Fill the placeholder with the new load from constant pool. + Placeholder->setReg(LoadNewConst); +} + +bool PPCInstrInfo::shouldReduceRegisterPressure( + MachineBasicBlock *MBB, RegisterClassInfo *RegClassInfo) const { + + if (!EnableFMARegPressureReduction) + return false; + + // Currently, we only enable register pressure reducing in machine combiner + // for: 1: PPC64; 2: Code Model is Medium; 3: Power9 which also has vector + // support. + // + // So we need following instructions to access a TOC entry: + // + // %6:g8rc_and_g8rc_nox0 = ADDIStocHA8 $x2, %const.0 + // %7:vssrc = DFLOADf32 target-flags(ppc-toc-lo) %const.0, + // killed %6:g8rc_and_g8rc_nox0, implicit $x2 :: (load 4 from constant-pool) + // + // FIXME: add more supported targets, like Small and Large code model, PPC32, + // AIX. + if (!(Subtarget.isPPC64() && Subtarget.hasP9Vector() && + Subtarget.getTargetMachine().getCodeModel() == CodeModel::Medium)) + return false; + + const TargetRegisterInfo *TRI = &getRegisterInfo(); + MachineFunction *MF = MBB->getParent(); + MachineRegisterInfo *MRI = &MF->getRegInfo(); + + auto GetMBBPressure = [&](MachineBasicBlock *MBB) -> std::vector<unsigned> { + RegionPressure Pressure; + RegPressureTracker RPTracker(Pressure); + + // Initialize the register pressure tracker. + RPTracker.init(MBB->getParent(), RegClassInfo, nullptr, MBB, MBB->end(), + /*TrackLaneMasks*/ false, /*TrackUntiedDefs=*/true); + + for (MachineBasicBlock::iterator MII = MBB->instr_end(), + MIE = MBB->instr_begin(); + MII != MIE; --MII) { + MachineInstr &MI = *std::prev(MII); + if (MI.isDebugValue() || MI.isDebugLabel()) + continue; + RegisterOperands RegOpers; + RegOpers.collect(MI, *TRI, *MRI, false, false); + RPTracker.recedeSkipDebugValues(); + assert(&*RPTracker.getPos() == &MI && "RPTracker sync error!"); + RPTracker.recede(RegOpers); + } + + // Close the RPTracker to finalize live ins. + RPTracker.closeRegion(); + + return RPTracker.getPressure().MaxSetPressure; + }; + + // For now we only care about float and double type fma. + unsigned VSSRCLimit = TRI->getRegPressureSetLimit( + *MBB->getParent(), PPC::RegisterPressureSets::VSSRC); + + // Only reduce register pressure when pressure is high. + return GetMBBPressure(MBB)[PPC::RegisterPressureSets::VSSRC] > + (float)VSSRCLimit * FMARPFactor; +} + +bool PPCInstrInfo::isLoadFromConstantPool(MachineInstr *I) const { + // I has only one memory operand which is load from constant pool. + if (!I->hasOneMemOperand()) + return false; + + MachineMemOperand *Op = I->memoperands()[0]; + return Op->isLoad() && Op->getPseudoValue() && + Op->getPseudoValue()->kind() == PseudoSourceValue::ConstantPool; +} + +Register PPCInstrInfo::generateLoadForNewConst( + unsigned Idx, MachineInstr *MI, Type *Ty, + SmallVectorImpl<MachineInstr *> &InsInstrs) const { + // Now we only support PPC64, Medium code model and P9 with vector. + // We have immutable pattern to access const pool. See function + // shouldReduceRegisterPressure. + assert((Subtarget.isPPC64() && Subtarget.hasP9Vector() && + Subtarget.getTargetMachine().getCodeModel() == CodeModel::Medium) && + "Target not supported!\n"); + + MachineFunction *MF = MI->getMF(); + MachineRegisterInfo *MRI = &MF->getRegInfo(); + + // Generate ADDIStocHA8 + Register VReg1 = MRI->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass); + MachineInstrBuilder TOCOffset = + BuildMI(*MF, MI->getDebugLoc(), get(PPC::ADDIStocHA8), VReg1) + .addReg(PPC::X2) + .addConstantPoolIndex(Idx); + + assert((Ty->isFloatTy() || Ty->isDoubleTy()) && + "Only float and double are supported!"); + + unsigned LoadOpcode; + // Should be float type or double type. + if (Ty->isFloatTy()) + LoadOpcode = PPC::DFLOADf32; + else + LoadOpcode = PPC::DFLOADf64; + + const TargetRegisterClass *RC = MRI->getRegClass(MI->getOperand(0).getReg()); + Register VReg2 = MRI->createVirtualRegister(RC); + MachineMemOperand *MMO = MF->getMachineMemOperand( + MachinePointerInfo::getConstantPool(*MF), MachineMemOperand::MOLoad, + Ty->getScalarSizeInBits() / 8, MF->getDataLayout().getPrefTypeAlign(Ty)); + + // Generate Load from constant pool. + MachineInstrBuilder Load = + BuildMI(*MF, MI->getDebugLoc(), get(LoadOpcode), VReg2) + .addConstantPoolIndex(Idx) + .addReg(VReg1, getKillRegState(true)) + .addMemOperand(MMO); + + Load->getOperand(1).setTargetFlags(PPCII::MO_TOC_LO); + + // Insert the toc load instructions into InsInstrs. + InsInstrs.insert(InsInstrs.begin(), Load); + InsInstrs.insert(InsInstrs.begin(), TOCOffset); + return VReg2; +} + +// This function returns the const value in constant pool if the \p I is a load +// from constant pool. +const Constant * +PPCInstrInfo::getConstantFromConstantPool(MachineInstr *I) const { + MachineFunction *MF = I->getMF(); + MachineRegisterInfo *MRI = &MF->getRegInfo(); + MachineConstantPool *MCP = MF->getConstantPool(); + assert(I->mayLoad() && "Should be a load instruction.\n"); + for (auto MO : I->uses()) { + if (!MO.isReg()) + continue; + Register Reg = MO.getReg(); + if (Reg == 0 || !Register::isVirtualRegister(Reg)) + continue; + // Find the toc address. + MachineInstr *DefMI = MRI->getVRegDef(Reg); + for (auto MO2 : DefMI->uses()) + if (MO2.isCPI()) + return (MCP->getConstants())[MO2.getIndex()].Val.ConstVal; + } + return nullptr; +} + bool PPCInstrInfo::getMachineCombinerPatterns( - MachineInstr &Root, SmallVectorImpl<MachineCombinerPattern> &Patterns, - bool DoRegPressureReduce) const { + MachineInstr &Root, SmallVectorImpl<MachineCombinerPattern> &Patterns, + bool DoRegPressureReduce) const { // Using the machine combiner in this way is potentially expensive, so // restrict to when aggressive optimizations are desired. if (Subtarget.getTargetMachine().getOptLevel() != CodeGenOpt::Aggressive) return false; - if (getFMAPatterns(Root, Patterns, DoRegPressureReduce)) + if (getFMAPatterns(Root, Patterns, DoRegPressureReduce)) return true; - return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns, - DoRegPressureReduce); + return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns, + DoRegPressureReduce); } void PPCInstrInfo::genAlternativeCodeSequence( @@ -779,8 +779,8 @@ void PPCInstrInfo::genAlternativeCodeSequence( switch (Pattern) { case MachineCombinerPattern::REASSOC_XY_AMM_BMM: case MachineCombinerPattern::REASSOC_XMM_AMM_BMM: - case MachineCombinerPattern::REASSOC_XY_BCA: - case MachineCombinerPattern::REASSOC_XY_BAC: + case MachineCombinerPattern::REASSOC_XY_BCA: + case MachineCombinerPattern::REASSOC_XY_BAC: reassociateFMA(Root, Pattern, InsInstrs, DelInstrs, InstrIdxForVirtReg); break; default: @@ -798,7 +798,7 @@ void PPCInstrInfo::reassociateFMA( DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const { MachineFunction *MF = Root.getMF(); MachineRegisterInfo &MRI = MF->getRegInfo(); - const TargetRegisterInfo *TRI = &getRegisterInfo(); + const TargetRegisterInfo *TRI = &getRegisterInfo(); MachineOperand &OpC = Root.getOperand(0); Register RegC = OpC.getReg(); const TargetRegisterClass *RC = MRI.getRegClass(RegC); @@ -808,43 +808,43 @@ void PPCInstrInfo::reassociateFMA( int16_t Idx = getFMAOpIdxInfo(FmaOp); assert(Idx >= 0 && "Root must be a FMA instruction"); - bool IsILPReassociate = - (Pattern == MachineCombinerPattern::REASSOC_XY_AMM_BMM) || - (Pattern == MachineCombinerPattern::REASSOC_XMM_AMM_BMM); - + bool IsILPReassociate = + (Pattern == MachineCombinerPattern::REASSOC_XY_AMM_BMM) || + (Pattern == MachineCombinerPattern::REASSOC_XMM_AMM_BMM); + uint16_t AddOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxAddOpIdx]; uint16_t FirstMulOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxMULOpIdx]; - MachineInstr *Prev = nullptr; - MachineInstr *Leaf = nullptr; - switch (Pattern) { - default: - llvm_unreachable("not recognized pattern!"); - case MachineCombinerPattern::REASSOC_XY_AMM_BMM: - case MachineCombinerPattern::REASSOC_XMM_AMM_BMM: - Prev = MRI.getUniqueVRegDef(Root.getOperand(AddOpIdx).getReg()); - Leaf = MRI.getUniqueVRegDef(Prev->getOperand(AddOpIdx).getReg()); - break; - case MachineCombinerPattern::REASSOC_XY_BAC: { - Register MULReg = - TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx).getReg(), &MRI); - Leaf = MRI.getVRegDef(MULReg); - break; - } - case MachineCombinerPattern::REASSOC_XY_BCA: { - Register MULReg = TRI->lookThruCopyLike( - Root.getOperand(FirstMulOpIdx + 1).getReg(), &MRI); - Leaf = MRI.getVRegDef(MULReg); - break; - } - } - - uint16_t IntersectedFlags = 0; - if (IsILPReassociate) - IntersectedFlags = Root.getFlags() & Prev->getFlags() & Leaf->getFlags(); - else - IntersectedFlags = Root.getFlags() & Leaf->getFlags(); - + MachineInstr *Prev = nullptr; + MachineInstr *Leaf = nullptr; + switch (Pattern) { + default: + llvm_unreachable("not recognized pattern!"); + case MachineCombinerPattern::REASSOC_XY_AMM_BMM: + case MachineCombinerPattern::REASSOC_XMM_AMM_BMM: + Prev = MRI.getUniqueVRegDef(Root.getOperand(AddOpIdx).getReg()); + Leaf = MRI.getUniqueVRegDef(Prev->getOperand(AddOpIdx).getReg()); + break; + case MachineCombinerPattern::REASSOC_XY_BAC: { + Register MULReg = + TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx).getReg(), &MRI); + Leaf = MRI.getVRegDef(MULReg); + break; + } + case MachineCombinerPattern::REASSOC_XY_BCA: { + Register MULReg = TRI->lookThruCopyLike( + Root.getOperand(FirstMulOpIdx + 1).getReg(), &MRI); + Leaf = MRI.getVRegDef(MULReg); + break; + } + } + + uint16_t IntersectedFlags = 0; + if (IsILPReassociate) + IntersectedFlags = Root.getFlags() & Prev->getFlags() & Leaf->getFlags(); + else + IntersectedFlags = Root.getFlags() & Leaf->getFlags(); + auto GetOperandInfo = [&](const MachineOperand &Operand, Register &Reg, bool &KillFlag) { Reg = Operand.getReg(); @@ -853,51 +853,51 @@ void PPCInstrInfo::reassociateFMA( }; auto GetFMAInstrInfo = [&](const MachineInstr &Instr, Register &MulOp1, - Register &MulOp2, Register &AddOp, - bool &MulOp1KillFlag, bool &MulOp2KillFlag, - bool &AddOpKillFlag) { + Register &MulOp2, Register &AddOp, + bool &MulOp1KillFlag, bool &MulOp2KillFlag, + bool &AddOpKillFlag) { GetOperandInfo(Instr.getOperand(FirstMulOpIdx), MulOp1, MulOp1KillFlag); GetOperandInfo(Instr.getOperand(FirstMulOpIdx + 1), MulOp2, MulOp2KillFlag); - GetOperandInfo(Instr.getOperand(AddOpIdx), AddOp, AddOpKillFlag); + GetOperandInfo(Instr.getOperand(AddOpIdx), AddOp, AddOpKillFlag); }; - Register RegM11, RegM12, RegX, RegY, RegM21, RegM22, RegM31, RegM32, RegA11, - RegA21, RegB; + Register RegM11, RegM12, RegX, RegY, RegM21, RegM22, RegM31, RegM32, RegA11, + RegA21, RegB; bool KillX = false, KillY = false, KillM11 = false, KillM12 = false, - KillM21 = false, KillM22 = false, KillM31 = false, KillM32 = false, - KillA11 = false, KillA21 = false, KillB = false; + KillM21 = false, KillM22 = false, KillM31 = false, KillM32 = false, + KillA11 = false, KillA21 = false, KillB = false; - GetFMAInstrInfo(Root, RegM31, RegM32, RegB, KillM31, KillM32, KillB); + GetFMAInstrInfo(Root, RegM31, RegM32, RegB, KillM31, KillM32, KillB); + + if (IsILPReassociate) + GetFMAInstrInfo(*Prev, RegM21, RegM22, RegA21, KillM21, KillM22, KillA21); - if (IsILPReassociate) - GetFMAInstrInfo(*Prev, RegM21, RegM22, RegA21, KillM21, KillM22, KillA21); - if (Pattern == MachineCombinerPattern::REASSOC_XMM_AMM_BMM) { - GetFMAInstrInfo(*Leaf, RegM11, RegM12, RegA11, KillM11, KillM12, KillA11); + GetFMAInstrInfo(*Leaf, RegM11, RegM12, RegA11, KillM11, KillM12, KillA11); GetOperandInfo(Leaf->getOperand(AddOpIdx), RegX, KillX); } else if (Pattern == MachineCombinerPattern::REASSOC_XY_AMM_BMM) { GetOperandInfo(Leaf->getOperand(1), RegX, KillX); GetOperandInfo(Leaf->getOperand(2), RegY, KillY); - } else { - // Get FSUB instruction info. - GetOperandInfo(Leaf->getOperand(1), RegX, KillX); - GetOperandInfo(Leaf->getOperand(2), RegY, KillY); + } else { + // Get FSUB instruction info. + GetOperandInfo(Leaf->getOperand(1), RegX, KillX); + GetOperandInfo(Leaf->getOperand(2), RegY, KillY); } // Create new virtual registers for the new results instead of // recycling legacy ones because the MachineCombiner's computation of the // critical path requires a new register definition rather than an existing // one. - // For register pressure reassociation, we only need create one virtual - // register for the new fma. + // For register pressure reassociation, we only need create one virtual + // register for the new fma. Register NewVRA = MRI.createVirtualRegister(RC); InstrIdxForVirtReg.insert(std::make_pair(NewVRA, 0)); - Register NewVRB = 0; - if (IsILPReassociate) { - NewVRB = MRI.createVirtualRegister(RC); - InstrIdxForVirtReg.insert(std::make_pair(NewVRB, 1)); - } + Register NewVRB = 0; + if (IsILPReassociate) { + NewVRB = MRI.createVirtualRegister(RC); + InstrIdxForVirtReg.insert(std::make_pair(NewVRB, 1)); + } Register NewVRD = 0; if (Pattern == MachineCombinerPattern::REASSOC_XMM_AMM_BMM) { @@ -916,11 +916,11 @@ void PPCInstrInfo::reassociateFMA( MI->getOperand(FirstMulOpIdx + 1).setIsKill(KillRegMul2); }; - MachineInstrBuilder NewARegPressure, NewCRegPressure; - switch (Pattern) { - default: - llvm_unreachable("not recognized pattern!"); - case MachineCombinerPattern::REASSOC_XY_AMM_BMM: { + MachineInstrBuilder NewARegPressure, NewCRegPressure; + switch (Pattern) { + default: + llvm_unreachable("not recognized pattern!"); + case MachineCombinerPattern::REASSOC_XY_AMM_BMM: { // Create new instructions for insertion. MachineInstrBuilder MINewB = BuildMI(*MF, Prev->getDebugLoc(), get(FmaOp), NewVRB) @@ -953,9 +953,9 @@ void PPCInstrInfo::reassociateFMA( InsInstrs.push_back(MINewA); InsInstrs.push_back(MINewB); InsInstrs.push_back(MINewC); - break; - } - case MachineCombinerPattern::REASSOC_XMM_AMM_BMM: { + break; + } + case MachineCombinerPattern::REASSOC_XMM_AMM_BMM: { assert(NewVRD && "new FMA register not created!"); // Create new instructions for insertion. MachineInstrBuilder MINewA = @@ -997,56 +997,56 @@ void PPCInstrInfo::reassociateFMA( InsInstrs.push_back(MINewB); InsInstrs.push_back(MINewD); InsInstrs.push_back(MINewC); - break; + break; } - case MachineCombinerPattern::REASSOC_XY_BAC: - case MachineCombinerPattern::REASSOC_XY_BCA: { - Register VarReg; - bool KillVarReg = false; - if (Pattern == MachineCombinerPattern::REASSOC_XY_BCA) { - VarReg = RegM31; - KillVarReg = KillM31; - } else { - VarReg = RegM32; - KillVarReg = KillM32; - } - // We don't want to get negative const from memory pool too early, as the - // created entry will not be deleted even if it has no users. Since all - // operand of Leaf and Root are virtual register, we use zero register - // here as a placeholder. When the InsInstrs is selected in - // MachineCombiner, we call finalizeInsInstrs to replace the zero register - // with a virtual register which is a load from constant pool. - NewARegPressure = BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), NewVRA) - .addReg(RegB, getKillRegState(RegB)) - .addReg(RegY, getKillRegState(KillY)) - .addReg(PPC::ZERO8); - NewCRegPressure = BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), RegC) - .addReg(NewVRA, getKillRegState(true)) - .addReg(RegX, getKillRegState(KillX)) - .addReg(VarReg, getKillRegState(KillVarReg)); - // For now, we only support xsmaddadp/xsmaddasp, their add operand are - // both at index 1, no need to adjust. - // FIXME: when add more fma instructions support, like fma/fmas, adjust - // the operand index here. - break; - } - } - - if (!IsILPReassociate) { - setSpecialOperandAttr(*NewARegPressure, IntersectedFlags); - setSpecialOperandAttr(*NewCRegPressure, IntersectedFlags); - - InsInstrs.push_back(NewARegPressure); - InsInstrs.push_back(NewCRegPressure); - } - + case MachineCombinerPattern::REASSOC_XY_BAC: + case MachineCombinerPattern::REASSOC_XY_BCA: { + Register VarReg; + bool KillVarReg = false; + if (Pattern == MachineCombinerPattern::REASSOC_XY_BCA) { + VarReg = RegM31; + KillVarReg = KillM31; + } else { + VarReg = RegM32; + KillVarReg = KillM32; + } + // We don't want to get negative const from memory pool too early, as the + // created entry will not be deleted even if it has no users. Since all + // operand of Leaf and Root are virtual register, we use zero register + // here as a placeholder. When the InsInstrs is selected in + // MachineCombiner, we call finalizeInsInstrs to replace the zero register + // with a virtual register which is a load from constant pool. + NewARegPressure = BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), NewVRA) + .addReg(RegB, getKillRegState(RegB)) + .addReg(RegY, getKillRegState(KillY)) + .addReg(PPC::ZERO8); + NewCRegPressure = BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), RegC) + .addReg(NewVRA, getKillRegState(true)) + .addReg(RegX, getKillRegState(KillX)) + .addReg(VarReg, getKillRegState(KillVarReg)); + // For now, we only support xsmaddadp/xsmaddasp, their add operand are + // both at index 1, no need to adjust. + // FIXME: when add more fma instructions support, like fma/fmas, adjust + // the operand index here. + break; + } + } + + if (!IsILPReassociate) { + setSpecialOperandAttr(*NewARegPressure, IntersectedFlags); + setSpecialOperandAttr(*NewCRegPressure, IntersectedFlags); + + InsInstrs.push_back(NewARegPressure); + InsInstrs.push_back(NewCRegPressure); + } + assert(!InsInstrs.empty() && "Insertion instructions set should not be empty!"); // Record old instructions for deletion. DelInstrs.push_back(Leaf); - if (IsILPReassociate) - DelInstrs.push_back(Prev); + if (IsILPReassociate) + DelInstrs.push_back(Prev); DelInstrs.push_back(&Root); } @@ -1114,7 +1114,7 @@ bool PPCInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI, case PPC::V_SETALLONES: case PPC::CRSET: case PPC::CRUNSET: - case PPC::XXSETACCZ: + case PPC::XXSETACCZ: return true; } return false; @@ -1715,22 +1715,22 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB, .addImm(31); return; } else if (PPC::CRRCRegClass.contains(SrcReg) && - (PPC::G8RCRegClass.contains(DestReg) || - PPC::GPRCRegClass.contains(DestReg))) { - bool Is64Bit = PPC::G8RCRegClass.contains(DestReg); - unsigned MvCode = Is64Bit ? PPC::MFOCRF8 : PPC::MFOCRF; - unsigned ShCode = Is64Bit ? PPC::RLWINM8 : PPC::RLWINM; - unsigned CRNum = TRI->getEncodingValue(SrcReg); - BuildMI(MBB, I, DL, get(MvCode), DestReg).addReg(SrcReg); + (PPC::G8RCRegClass.contains(DestReg) || + PPC::GPRCRegClass.contains(DestReg))) { + bool Is64Bit = PPC::G8RCRegClass.contains(DestReg); + unsigned MvCode = Is64Bit ? PPC::MFOCRF8 : PPC::MFOCRF; + unsigned ShCode = Is64Bit ? PPC::RLWINM8 : PPC::RLWINM; + unsigned CRNum = TRI->getEncodingValue(SrcReg); + BuildMI(MBB, I, DL, get(MvCode), DestReg).addReg(SrcReg); getKillRegState(KillSrc); - if (CRNum == 7) - return; - // Shift the CR bits to make the CR field in the lowest 4 bits of GRC. - BuildMI(MBB, I, DL, get(ShCode), DestReg) - .addReg(DestReg, RegState::Kill) - .addImm(CRNum * 4 + 4) - .addImm(28) - .addImm(31); + if (CRNum == 7) + return; + // Shift the CR bits to make the CR field in the lowest 4 bits of GRC. + BuildMI(MBB, I, DL, get(ShCode), DestReg) + .addReg(DestReg, RegState::Kill) + .addImm(CRNum * 4 + 4) + .addImm(28) + .addImm(31); return; } else if (PPC::G8RCRegClass.contains(SrcReg) && PPC::VSFRCRegClass.contains(DestReg)) { @@ -1783,53 +1783,53 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB, else if (PPC::VSFRCRegClass.contains(DestReg, SrcReg) || PPC::VSSRCRegClass.contains(DestReg, SrcReg)) Opc = (Subtarget.hasP9Vector()) ? PPC::XSCPSGNDP : PPC::XXLORf; - else if (Subtarget.pairedVectorMemops() && - PPC::VSRpRCRegClass.contains(DestReg, SrcReg)) { - if (SrcReg > PPC::VSRp15) - SrcReg = PPC::V0 + (SrcReg - PPC::VSRp16) * 2; - else - SrcReg = PPC::VSL0 + (SrcReg - PPC::VSRp0) * 2; - if (DestReg > PPC::VSRp15) - DestReg = PPC::V0 + (DestReg - PPC::VSRp16) * 2; - else - DestReg = PPC::VSL0 + (DestReg - PPC::VSRp0) * 2; - BuildMI(MBB, I, DL, get(PPC::XXLOR), DestReg). - addReg(SrcReg).addReg(SrcReg, getKillRegState(KillSrc)); - BuildMI(MBB, I, DL, get(PPC::XXLOR), DestReg + 1). - addReg(SrcReg + 1).addReg(SrcReg + 1, getKillRegState(KillSrc)); - return; - } + else if (Subtarget.pairedVectorMemops() && + PPC::VSRpRCRegClass.contains(DestReg, SrcReg)) { + if (SrcReg > PPC::VSRp15) + SrcReg = PPC::V0 + (SrcReg - PPC::VSRp16) * 2; + else + SrcReg = PPC::VSL0 + (SrcReg - PPC::VSRp0) * 2; + if (DestReg > PPC::VSRp15) + DestReg = PPC::V0 + (DestReg - PPC::VSRp16) * 2; + else + DestReg = PPC::VSL0 + (DestReg - PPC::VSRp0) * 2; + BuildMI(MBB, I, DL, get(PPC::XXLOR), DestReg). + addReg(SrcReg).addReg(SrcReg, getKillRegState(KillSrc)); + BuildMI(MBB, I, DL, get(PPC::XXLOR), DestReg + 1). + addReg(SrcReg + 1).addReg(SrcReg + 1, getKillRegState(KillSrc)); + return; + } else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg)) Opc = PPC::CROR; else if (PPC::SPERCRegClass.contains(DestReg, SrcReg)) Opc = PPC::EVOR; - else if ((PPC::ACCRCRegClass.contains(DestReg) || - PPC::UACCRCRegClass.contains(DestReg)) && - (PPC::ACCRCRegClass.contains(SrcReg) || - PPC::UACCRCRegClass.contains(SrcReg))) { - // If primed, de-prime the source register, copy the individual registers - // and prime the destination if needed. The vector subregisters are - // vs[(u)acc * 4] - vs[(u)acc * 4 + 3]. If the copy is not a kill and the - // source is primed, we need to re-prime it after the copy as well. - PPCRegisterInfo::emitAccCopyInfo(MBB, DestReg, SrcReg); - bool DestPrimed = PPC::ACCRCRegClass.contains(DestReg); - bool SrcPrimed = PPC::ACCRCRegClass.contains(SrcReg); - MCRegister VSLSrcReg = - PPC::VSL0 + (SrcReg - (SrcPrimed ? PPC::ACC0 : PPC::UACC0)) * 4; - MCRegister VSLDestReg = - PPC::VSL0 + (DestReg - (DestPrimed ? PPC::ACC0 : PPC::UACC0)) * 4; - if (SrcPrimed) - BuildMI(MBB, I, DL, get(PPC::XXMFACC), SrcReg).addReg(SrcReg); - for (unsigned Idx = 0; Idx < 4; Idx++) - BuildMI(MBB, I, DL, get(PPC::XXLOR), VSLDestReg + Idx) - .addReg(VSLSrcReg + Idx) - .addReg(VSLSrcReg + Idx, getKillRegState(KillSrc)); - if (DestPrimed) - BuildMI(MBB, I, DL, get(PPC::XXMTACC), DestReg).addReg(DestReg); - if (SrcPrimed && !KillSrc) - BuildMI(MBB, I, DL, get(PPC::XXMTACC), SrcReg).addReg(SrcReg); - return; - } else + else if ((PPC::ACCRCRegClass.contains(DestReg) || + PPC::UACCRCRegClass.contains(DestReg)) && + (PPC::ACCRCRegClass.contains(SrcReg) || + PPC::UACCRCRegClass.contains(SrcReg))) { + // If primed, de-prime the source register, copy the individual registers + // and prime the destination if needed. The vector subregisters are + // vs[(u)acc * 4] - vs[(u)acc * 4 + 3]. If the copy is not a kill and the + // source is primed, we need to re-prime it after the copy as well. + PPCRegisterInfo::emitAccCopyInfo(MBB, DestReg, SrcReg); + bool DestPrimed = PPC::ACCRCRegClass.contains(DestReg); + bool SrcPrimed = PPC::ACCRCRegClass.contains(SrcReg); + MCRegister VSLSrcReg = + PPC::VSL0 + (SrcReg - (SrcPrimed ? PPC::ACC0 : PPC::UACC0)) * 4; + MCRegister VSLDestReg = + PPC::VSL0 + (DestReg - (DestPrimed ? PPC::ACC0 : PPC::UACC0)) * 4; + if (SrcPrimed) + BuildMI(MBB, I, DL, get(PPC::XXMFACC), SrcReg).addReg(SrcReg); + for (unsigned Idx = 0; Idx < 4; Idx++) + BuildMI(MBB, I, DL, get(PPC::XXLOR), VSLDestReg + Idx) + .addReg(VSLSrcReg + Idx) + .addReg(VSLSrcReg + Idx, getKillRegState(KillSrc)); + if (DestPrimed) + BuildMI(MBB, I, DL, get(PPC::XXMTACC), DestReg).addReg(DestReg); + if (SrcPrimed && !KillSrc) + BuildMI(MBB, I, DL, get(PPC::XXMTACC), SrcReg).addReg(SrcReg); + return; + } else llvm_unreachable("Impossible reg-to-reg copy"); const MCInstrDesc &MCID = get(Opc); @@ -1840,7 +1840,7 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB, BuildMI(MBB, I, DL, MCID, DestReg).addReg(SrcReg, getKillRegState(KillSrc)); } -unsigned PPCInstrInfo::getSpillIndex(const TargetRegisterClass *RC) const { +unsigned PPCInstrInfo::getSpillIndex(const TargetRegisterClass *RC) const { int OpcodeIndex = 0; if (PPC::GPRCRegClass.hasSubClassEq(RC) || @@ -1869,18 +1869,18 @@ unsigned PPCInstrInfo::getSpillIndex(const TargetRegisterClass *RC) const { OpcodeIndex = SOK_VectorFloat4Spill; } else if (PPC::SPILLTOVSRRCRegClass.hasSubClassEq(RC)) { OpcodeIndex = SOK_SpillToVSR; - } else if (PPC::ACCRCRegClass.hasSubClassEq(RC)) { - assert(Subtarget.pairedVectorMemops() && - "Register unexpected when paired memops are disabled."); - OpcodeIndex = SOK_AccumulatorSpill; - } else if (PPC::UACCRCRegClass.hasSubClassEq(RC)) { - assert(Subtarget.pairedVectorMemops() && - "Register unexpected when paired memops are disabled."); - OpcodeIndex = SOK_UAccumulatorSpill; - } else if (PPC::VSRpRCRegClass.hasSubClassEq(RC)) { - assert(Subtarget.pairedVectorMemops() && - "Register unexpected when paired memops are disabled."); - OpcodeIndex = SOK_PairedVecSpill; + } else if (PPC::ACCRCRegClass.hasSubClassEq(RC)) { + assert(Subtarget.pairedVectorMemops() && + "Register unexpected when paired memops are disabled."); + OpcodeIndex = SOK_AccumulatorSpill; + } else if (PPC::UACCRCRegClass.hasSubClassEq(RC)) { + assert(Subtarget.pairedVectorMemops() && + "Register unexpected when paired memops are disabled."); + OpcodeIndex = SOK_UAccumulatorSpill; + } else if (PPC::VSRpRCRegClass.hasSubClassEq(RC)) { + assert(Subtarget.pairedVectorMemops() && + "Register unexpected when paired memops are disabled."); + OpcodeIndex = SOK_PairedVecSpill; } else { llvm_unreachable("Unknown regclass!"); } @@ -2141,17 +2141,17 @@ bool PPCInstrInfo::isPredicated(const MachineInstr &MI) const { return false; } -bool PPCInstrInfo::isSchedulingBoundary(const MachineInstr &MI, - const MachineBasicBlock *MBB, - const MachineFunction &MF) const { - // Set MFFS and MTFSF as scheduling boundary to avoid unexpected code motion - // across them, since some FP operations may change content of FPSCR. - // TODO: Model FPSCR in PPC instruction definitions and remove the workaround - if (MI.getOpcode() == PPC::MFFS || MI.getOpcode() == PPC::MTFSF) - return true; - return TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF); -} - +bool PPCInstrInfo::isSchedulingBoundary(const MachineInstr &MI, + const MachineBasicBlock *MBB, + const MachineFunction &MF) const { + // Set MFFS and MTFSF as scheduling boundary to avoid unexpected code motion + // across them, since some FP operations may change content of FPSCR. + // TODO: Model FPSCR in PPC instruction definitions and remove the workaround + if (MI.getOpcode() == PPC::MFFS || MI.getOpcode() == PPC::MTFSF) + return true; + return TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF); +} + bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI, ArrayRef<MachineOperand> Pred) const { unsigned OpC = MI.getOpcode(); @@ -2160,10 +2160,10 @@ bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI, bool isPPC64 = Subtarget.isPPC64(); MI.setDesc(get(Pred[0].getImm() ? (isPPC64 ? PPC::BDNZLR8 : PPC::BDNZLR) : (isPPC64 ? PPC::BDZLR8 : PPC::BDZLR))); - // Need add Def and Use for CTR implicit operand. - MachineInstrBuilder(*MI.getParent()->getParent(), MI) - .addReg(Pred[1].getReg(), RegState::Implicit) - .addReg(Pred[1].getReg(), RegState::ImplicitDefine); + // Need add Def and Use for CTR implicit operand. + MachineInstrBuilder(*MI.getParent()->getParent(), MI) + .addReg(Pred[1].getReg(), RegState::Implicit) + .addReg(Pred[1].getReg(), RegState::ImplicitDefine); } else if (Pred[0].getImm() == PPC::PRED_BIT_SET) { MI.setDesc(get(PPC::BCLR)); MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]); @@ -2183,10 +2183,10 @@ bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI, bool isPPC64 = Subtarget.isPPC64(); MI.setDesc(get(Pred[0].getImm() ? (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) : (isPPC64 ? PPC::BDZ8 : PPC::BDZ))); - // Need add Def and Use for CTR implicit operand. - MachineInstrBuilder(*MI.getParent()->getParent(), MI) - .addReg(Pred[1].getReg(), RegState::Implicit) - .addReg(Pred[1].getReg(), RegState::ImplicitDefine); + // Need add Def and Use for CTR implicit operand. + MachineInstrBuilder(*MI.getParent()->getParent(), MI) + .addReg(Pred[1].getReg(), RegState::Implicit) + .addReg(Pred[1].getReg(), RegState::ImplicitDefine); } else if (Pred[0].getImm() == PPC::PRED_BIT_SET) { MachineBasicBlock *MBB = MI.getOperand(0).getMBB(); MI.RemoveOperand(0); @@ -2231,20 +2231,20 @@ bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI, MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8n : PPC::BCCTR8n) : (setLR ? PPC::BCCTRLn : PPC::BCCTRn))); MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]); - } else { - MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCCTRL8 : PPC::BCCCTR8) - : (setLR ? PPC::BCCCTRL : PPC::BCCCTR))); - MachineInstrBuilder(*MI.getParent()->getParent(), MI) - .addImm(Pred[0].getImm()) - .add(Pred[1]); + } else { + MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCCTRL8 : PPC::BCCCTR8) + : (setLR ? PPC::BCCCTRL : PPC::BCCCTR))); + MachineInstrBuilder(*MI.getParent()->getParent(), MI) + .addImm(Pred[0].getImm()) + .add(Pred[1]); } - // Need add Def and Use for LR implicit operand. - if (setLR) - MachineInstrBuilder(*MI.getParent()->getParent(), MI) - .addReg(isPPC64 ? PPC::LR8 : PPC::LR, RegState::Implicit) - .addReg(isPPC64 ? PPC::LR8 : PPC::LR, RegState::ImplicitDefine); - + // Need add Def and Use for LR implicit operand. + if (setLR) + MachineInstrBuilder(*MI.getParent()->getParent(), MI) + .addReg(isPPC64 ? PPC::LR8 : PPC::LR, RegState::Implicit) + .addReg(isPPC64 ? PPC::LR8 : PPC::LR, RegState::ImplicitDefine); + return true; } @@ -2282,9 +2282,9 @@ bool PPCInstrInfo::SubsumesPredicate(ArrayRef<MachineOperand> Pred1, return false; } -bool PPCInstrInfo::ClobbersPredicate(MachineInstr &MI, - std::vector<MachineOperand> &Pred, - bool SkipDead) const { +bool PPCInstrInfo::ClobbersPredicate(MachineInstr &MI, + std::vector<MachineOperand> &Pred, + bool SkipDead) const { // Note: At the present time, the contents of Pred from this function is // unused by IfConversion. This implementation follows ARM by pushing the // CR-defining operand. Because the 'DZ' and 'DNZ' count as types of @@ -2570,14 +2570,14 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, if (NewOpC == -1) return false; - // This transformation should not be performed if `nsw` is missing and is not - // `equalityOnly` comparison. Since if there is overflow, sub_lt, sub_gt in - // CRReg do not reflect correct order. If `equalityOnly` is true, sub_eq in - // CRReg can reflect if compared values are equal, this optz is still valid. - if (!equalityOnly && (NewOpC == PPC::SUBF_rec || NewOpC == PPC::SUBF8_rec) && - Sub && !Sub->getFlag(MachineInstr::NoSWrap)) - return false; - + // This transformation should not be performed if `nsw` is missing and is not + // `equalityOnly` comparison. Since if there is overflow, sub_lt, sub_gt in + // CRReg do not reflect correct order. If `equalityOnly` is true, sub_eq in + // CRReg can reflect if compared values are equal, this optz is still valid. + if (!equalityOnly && (NewOpC == PPC::SUBF_rec || NewOpC == PPC::SUBF8_rec) && + Sub && !Sub->getFlag(MachineInstr::NoSWrap)) + return false; + // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based on CMP // needs to be updated to be based on SUB. Push the condition code // operands to OperandsToUpdate. If it is safe to remove CmpInstr, the @@ -2728,112 +2728,112 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, return true; } -bool PPCInstrInfo::getMemOperandsWithOffsetWidth( - const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps, - int64_t &Offset, bool &OffsetIsScalable, unsigned &Width, - const TargetRegisterInfo *TRI) const { - const MachineOperand *BaseOp; - OffsetIsScalable = false; - if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI)) - return false; - BaseOps.push_back(BaseOp); - return true; -} - -static bool isLdStSafeToCluster(const MachineInstr &LdSt, - const TargetRegisterInfo *TRI) { - // If this is a volatile load/store, don't mess with it. - if (LdSt.hasOrderedMemoryRef() || LdSt.getNumExplicitOperands() != 3) - return false; - - if (LdSt.getOperand(2).isFI()) - return true; - - assert(LdSt.getOperand(2).isReg() && "Expected a reg operand."); - // Can't cluster if the instruction modifies the base register - // or it is update form. e.g. ld r2,3(r2) - if (LdSt.modifiesRegister(LdSt.getOperand(2).getReg(), TRI)) - return false; - - return true; -} - -// Only cluster instruction pair that have the same opcode, and they are -// clusterable according to PowerPC specification. -static bool isClusterableLdStOpcPair(unsigned FirstOpc, unsigned SecondOpc, - const PPCSubtarget &Subtarget) { - switch (FirstOpc) { - default: - return false; - case PPC::STD: - case PPC::STFD: - case PPC::STXSD: - case PPC::DFSTOREf64: - return FirstOpc == SecondOpc; - // PowerPC backend has opcode STW/STW8 for instruction "stw" to deal with - // 32bit and 64bit instruction selection. They are clusterable pair though - // they are different opcode. - case PPC::STW: - case PPC::STW8: - return SecondOpc == PPC::STW || SecondOpc == PPC::STW8; - } -} - -bool PPCInstrInfo::shouldClusterMemOps( - ArrayRef<const MachineOperand *> BaseOps1, - ArrayRef<const MachineOperand *> BaseOps2, unsigned NumLoads, - unsigned NumBytes) const { - - assert(BaseOps1.size() == 1 && BaseOps2.size() == 1); - const MachineOperand &BaseOp1 = *BaseOps1.front(); - const MachineOperand &BaseOp2 = *BaseOps2.front(); - assert((BaseOp1.isReg() || BaseOp1.isFI()) && - "Only base registers and frame indices are supported."); - - // The NumLoads means the number of loads that has been clustered. - // Don't cluster memory op if there are already two ops clustered at least. - if (NumLoads > 2) - return false; - - // Cluster the load/store only when they have the same base - // register or FI. - if ((BaseOp1.isReg() != BaseOp2.isReg()) || - (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg()) || - (BaseOp1.isFI() && BaseOp1.getIndex() != BaseOp2.getIndex())) - return false; - - // Check if the load/store are clusterable according to the PowerPC - // specification. - const MachineInstr &FirstLdSt = *BaseOp1.getParent(); - const MachineInstr &SecondLdSt = *BaseOp2.getParent(); - unsigned FirstOpc = FirstLdSt.getOpcode(); - unsigned SecondOpc = SecondLdSt.getOpcode(); - const TargetRegisterInfo *TRI = &getRegisterInfo(); - // Cluster the load/store only when they have the same opcode, and they are - // clusterable opcode according to PowerPC specification. - if (!isClusterableLdStOpcPair(FirstOpc, SecondOpc, Subtarget)) - return false; - - // Can't cluster load/store that have ordered or volatile memory reference. - if (!isLdStSafeToCluster(FirstLdSt, TRI) || - !isLdStSafeToCluster(SecondLdSt, TRI)) - return false; - - int64_t Offset1 = 0, Offset2 = 0; - unsigned Width1 = 0, Width2 = 0; - const MachineOperand *Base1 = nullptr, *Base2 = nullptr; - if (!getMemOperandWithOffsetWidth(FirstLdSt, Base1, Offset1, Width1, TRI) || - !getMemOperandWithOffsetWidth(SecondLdSt, Base2, Offset2, Width2, TRI) || - Width1 != Width2) - return false; - - assert(Base1 == &BaseOp1 && Base2 == &BaseOp2 && - "getMemOperandWithOffsetWidth return incorrect base op"); - // The caller should already have ordered FirstMemOp/SecondMemOp by offset. - assert(Offset1 <= Offset2 && "Caller should have ordered offsets."); - return Offset1 + Width1 == Offset2; -} - +bool PPCInstrInfo::getMemOperandsWithOffsetWidth( + const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps, + int64_t &Offset, bool &OffsetIsScalable, unsigned &Width, + const TargetRegisterInfo *TRI) const { + const MachineOperand *BaseOp; + OffsetIsScalable = false; + if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI)) + return false; + BaseOps.push_back(BaseOp); + return true; +} + +static bool isLdStSafeToCluster(const MachineInstr &LdSt, + const TargetRegisterInfo *TRI) { + // If this is a volatile load/store, don't mess with it. + if (LdSt.hasOrderedMemoryRef() || LdSt.getNumExplicitOperands() != 3) + return false; + + if (LdSt.getOperand(2).isFI()) + return true; + + assert(LdSt.getOperand(2).isReg() && "Expected a reg operand."); + // Can't cluster if the instruction modifies the base register + // or it is update form. e.g. ld r2,3(r2) + if (LdSt.modifiesRegister(LdSt.getOperand(2).getReg(), TRI)) + return false; + + return true; +} + +// Only cluster instruction pair that have the same opcode, and they are +// clusterable according to PowerPC specification. +static bool isClusterableLdStOpcPair(unsigned FirstOpc, unsigned SecondOpc, + const PPCSubtarget &Subtarget) { + switch (FirstOpc) { + default: + return false; + case PPC::STD: + case PPC::STFD: + case PPC::STXSD: + case PPC::DFSTOREf64: + return FirstOpc == SecondOpc; + // PowerPC backend has opcode STW/STW8 for instruction "stw" to deal with + // 32bit and 64bit instruction selection. They are clusterable pair though + // they are different opcode. + case PPC::STW: + case PPC::STW8: + return SecondOpc == PPC::STW || SecondOpc == PPC::STW8; + } +} + +bool PPCInstrInfo::shouldClusterMemOps( + ArrayRef<const MachineOperand *> BaseOps1, + ArrayRef<const MachineOperand *> BaseOps2, unsigned NumLoads, + unsigned NumBytes) const { + + assert(BaseOps1.size() == 1 && BaseOps2.size() == 1); + const MachineOperand &BaseOp1 = *BaseOps1.front(); + const MachineOperand &BaseOp2 = *BaseOps2.front(); + assert((BaseOp1.isReg() || BaseOp1.isFI()) && + "Only base registers and frame indices are supported."); + + // The NumLoads means the number of loads that has been clustered. + // Don't cluster memory op if there are already two ops clustered at least. + if (NumLoads > 2) + return false; + + // Cluster the load/store only when they have the same base + // register or FI. + if ((BaseOp1.isReg() != BaseOp2.isReg()) || + (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg()) || + (BaseOp1.isFI() && BaseOp1.getIndex() != BaseOp2.getIndex())) + return false; + + // Check if the load/store are clusterable according to the PowerPC + // specification. + const MachineInstr &FirstLdSt = *BaseOp1.getParent(); + const MachineInstr &SecondLdSt = *BaseOp2.getParent(); + unsigned FirstOpc = FirstLdSt.getOpcode(); + unsigned SecondOpc = SecondLdSt.getOpcode(); + const TargetRegisterInfo *TRI = &getRegisterInfo(); + // Cluster the load/store only when they have the same opcode, and they are + // clusterable opcode according to PowerPC specification. + if (!isClusterableLdStOpcPair(FirstOpc, SecondOpc, Subtarget)) + return false; + + // Can't cluster load/store that have ordered or volatile memory reference. + if (!isLdStSafeToCluster(FirstLdSt, TRI) || + !isLdStSafeToCluster(SecondLdSt, TRI)) + return false; + + int64_t Offset1 = 0, Offset2 = 0; + unsigned Width1 = 0, Width2 = 0; + const MachineOperand *Base1 = nullptr, *Base2 = nullptr; + if (!getMemOperandWithOffsetWidth(FirstLdSt, Base1, Offset1, Width1, TRI) || + !getMemOperandWithOffsetWidth(SecondLdSt, Base2, Offset2, Width2, TRI) || + Width1 != Width2) + return false; + + assert(Base1 == &BaseOp1 && Base2 == &BaseOp2 && + "getMemOperandWithOffsetWidth return incorrect base op"); + // The caller should already have ordered FirstMemOp/SecondMemOp by offset. + assert(Offset1 <= Offset2 && "Caller should have ordered offsets."); + return Offset1 + Width1 == Offset2; +} + /// GetInstSize - Return the number of bytes of code the specified /// instruction may be. This returns the maximum number of bytes. /// @@ -2883,14 +2883,14 @@ PPCInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const { {MO_PLT, "ppc-plt"}, {MO_PIC_FLAG, "ppc-pic"}, {MO_PCREL_FLAG, "ppc-pcrel"}, - {MO_GOT_FLAG, "ppc-got"}, - {MO_PCREL_OPT_FLAG, "ppc-opt-pcrel"}, - {MO_TLSGD_FLAG, "ppc-tlsgd"}, - {MO_TLSLD_FLAG, "ppc-tlsld"}, - {MO_TPREL_FLAG, "ppc-tprel"}, - {MO_GOT_TLSGD_PCREL_FLAG, "ppc-got-tlsgd-pcrel"}, - {MO_GOT_TLSLD_PCREL_FLAG, "ppc-got-tlsld-pcrel"}, - {MO_GOT_TPREL_PCREL_FLAG, "ppc-got-tprel-pcrel"}}; + {MO_GOT_FLAG, "ppc-got"}, + {MO_PCREL_OPT_FLAG, "ppc-opt-pcrel"}, + {MO_TLSGD_FLAG, "ppc-tlsgd"}, + {MO_TLSLD_FLAG, "ppc-tlsld"}, + {MO_TPREL_FLAG, "ppc-tprel"}, + {MO_GOT_TLSGD_PCREL_FLAG, "ppc-got-tlsgd-pcrel"}, + {MO_GOT_TLSLD_PCREL_FLAG, "ppc-got-tlsld-pcrel"}, + {MO_GOT_TPREL_PCREL_FLAG, "ppc-got-tprel-pcrel"}}; return makeArrayRef(TargetFlags); } @@ -2971,31 +2971,31 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { auto DL = MI.getDebugLoc(); switch (MI.getOpcode()) { - case PPC::BUILD_UACC: { - MCRegister ACC = MI.getOperand(0).getReg(); - MCRegister UACC = MI.getOperand(1).getReg(); - if (ACC - PPC::ACC0 != UACC - PPC::UACC0) { - MCRegister SrcVSR = PPC::VSL0 + (UACC - PPC::UACC0) * 4; - MCRegister DstVSR = PPC::VSL0 + (ACC - PPC::ACC0) * 4; - // FIXME: This can easily be improved to look up to the top of the MBB - // to see if the inputs are XXLOR's. If they are and SrcReg is killed, - // we can just re-target any such XXLOR's to DstVSR + offset. - for (int VecNo = 0; VecNo < 4; VecNo++) - BuildMI(MBB, MI, DL, get(PPC::XXLOR), DstVSR + VecNo) - .addReg(SrcVSR + VecNo) - .addReg(SrcVSR + VecNo); - } - // BUILD_UACC is expanded to 4 copies of the underlying vsx regisers. - // So after building the 4 copies, we can replace the BUILD_UACC instruction - // with a NOP. - LLVM_FALLTHROUGH; - } - case PPC::KILL_PAIR: { - MI.setDesc(get(PPC::UNENCODED_NOP)); - MI.RemoveOperand(1); - MI.RemoveOperand(0); - return true; - } + case PPC::BUILD_UACC: { + MCRegister ACC = MI.getOperand(0).getReg(); + MCRegister UACC = MI.getOperand(1).getReg(); + if (ACC - PPC::ACC0 != UACC - PPC::UACC0) { + MCRegister SrcVSR = PPC::VSL0 + (UACC - PPC::UACC0) * 4; + MCRegister DstVSR = PPC::VSL0 + (ACC - PPC::ACC0) * 4; + // FIXME: This can easily be improved to look up to the top of the MBB + // to see if the inputs are XXLOR's. If they are and SrcReg is killed, + // we can just re-target any such XXLOR's to DstVSR + offset. + for (int VecNo = 0; VecNo < 4; VecNo++) + BuildMI(MBB, MI, DL, get(PPC::XXLOR), DstVSR + VecNo) + .addReg(SrcVSR + VecNo) + .addReg(SrcVSR + VecNo); + } + // BUILD_UACC is expanded to 4 copies of the underlying vsx regisers. + // So after building the 4 copies, we can replace the BUILD_UACC instruction + // with a NOP. + LLVM_FALLTHROUGH; + } + case PPC::KILL_PAIR: { + MI.setDesc(get(PPC::UNENCODED_NOP)); + MI.RemoveOperand(1); + MI.RemoveOperand(0); + return true; + } case TargetOpcode::LOAD_STACK_GUARD: { assert(Subtarget.isTargetLinux() && "Only Linux target is expected to contain LOAD_STACK_GUARD"); @@ -3287,10 +3287,10 @@ MachineInstr *PPCInstrInfo::getForwardingDefMI( } unsigned PPCInstrInfo::getSpillTarget() const { - // With P10, we may need to spill paired vector registers or accumulator - // registers. MMA implies paired vectors, so we can just check that. - bool IsP10Variant = Subtarget.isISA3_1() || Subtarget.pairedVectorMemops(); - return IsP10Variant ? 2 : Subtarget.hasP9Vector() ? 1 : 0; + // With P10, we may need to spill paired vector registers or accumulator + // registers. MMA implies paired vectors, so we can just check that. + bool IsP10Variant = Subtarget.isISA3_1() || Subtarget.pairedVectorMemops(); + return IsP10Variant ? 2 : Subtarget.hasP9Vector() ? 1 : 0; } const unsigned *PPCInstrInfo::getStoreOpcodesForSpillArray() const { @@ -3681,143 +3681,143 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI, return false; } -bool PPCInstrInfo::combineRLWINM(MachineInstr &MI, - MachineInstr **ToErase) const { - MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo(); - unsigned FoldingReg = MI.getOperand(1).getReg(); - if (!Register::isVirtualRegister(FoldingReg)) - return false; - MachineInstr *SrcMI = MRI->getVRegDef(FoldingReg); - if (SrcMI->getOpcode() != PPC::RLWINM && - SrcMI->getOpcode() != PPC::RLWINM_rec && - SrcMI->getOpcode() != PPC::RLWINM8 && - SrcMI->getOpcode() != PPC::RLWINM8_rec) - return false; - assert((MI.getOperand(2).isImm() && MI.getOperand(3).isImm() && - MI.getOperand(4).isImm() && SrcMI->getOperand(2).isImm() && - SrcMI->getOperand(3).isImm() && SrcMI->getOperand(4).isImm()) && - "Invalid PPC::RLWINM Instruction!"); - uint64_t SHSrc = SrcMI->getOperand(2).getImm(); - uint64_t SHMI = MI.getOperand(2).getImm(); - uint64_t MBSrc = SrcMI->getOperand(3).getImm(); - uint64_t MBMI = MI.getOperand(3).getImm(); - uint64_t MESrc = SrcMI->getOperand(4).getImm(); - uint64_t MEMI = MI.getOperand(4).getImm(); - - assert((MEMI < 32 && MESrc < 32 && MBMI < 32 && MBSrc < 32) && - "Invalid PPC::RLWINM Instruction!"); - // If MBMI is bigger than MEMI, we always can not get run of ones. - // RotatedSrcMask non-wrap: - // 0........31|32........63 - // RotatedSrcMask: B---E B---E - // MaskMI: -----------|--E B------ - // Result: ----- --- (Bad candidate) - // - // RotatedSrcMask wrap: - // 0........31|32........63 - // RotatedSrcMask: --E B----|--E B---- - // MaskMI: -----------|--E B------ - // Result: --- -----|--- ----- (Bad candidate) - // - // One special case is RotatedSrcMask is a full set mask. - // RotatedSrcMask full: - // 0........31|32........63 - // RotatedSrcMask: ------EB---|-------EB--- - // MaskMI: -----------|--E B------ - // Result: -----------|--- ------- (Good candidate) - - // Mark special case. - bool SrcMaskFull = (MBSrc - MESrc == 1) || (MBSrc == 0 && MESrc == 31); - - // For other MBMI > MEMI cases, just return. - if ((MBMI > MEMI) && !SrcMaskFull) - return false; - - // Handle MBMI <= MEMI cases. - APInt MaskMI = APInt::getBitsSetWithWrap(32, 32 - MEMI - 1, 32 - MBMI); - // In MI, we only need low 32 bits of SrcMI, just consider about low 32 - // bit of SrcMI mask. Note that in APInt, lowerest bit is at index 0, - // while in PowerPC ISA, lowerest bit is at index 63. - APInt MaskSrc = APInt::getBitsSetWithWrap(32, 32 - MESrc - 1, 32 - MBSrc); - - APInt RotatedSrcMask = MaskSrc.rotl(SHMI); - APInt FinalMask = RotatedSrcMask & MaskMI; - uint32_t NewMB, NewME; - bool Simplified = false; - - // If final mask is 0, MI result should be 0 too. - if (FinalMask.isNullValue()) { - bool Is64Bit = - (MI.getOpcode() == PPC::RLWINM8 || MI.getOpcode() == PPC::RLWINM8_rec); - Simplified = true; - LLVM_DEBUG(dbgs() << "Replace Instr: "); - LLVM_DEBUG(MI.dump()); - - if (MI.getOpcode() == PPC::RLWINM || MI.getOpcode() == PPC::RLWINM8) { - // Replace MI with "LI 0" - MI.RemoveOperand(4); - MI.RemoveOperand(3); - MI.RemoveOperand(2); - MI.getOperand(1).ChangeToImmediate(0); - MI.setDesc(get(Is64Bit ? PPC::LI8 : PPC::LI)); - } else { - // Replace MI with "ANDI_rec reg, 0" - MI.RemoveOperand(4); - MI.RemoveOperand(3); - MI.getOperand(2).setImm(0); - MI.setDesc(get(Is64Bit ? PPC::ANDI8_rec : PPC::ANDI_rec)); - MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg()); - if (SrcMI->getOperand(1).isKill()) { - MI.getOperand(1).setIsKill(true); - SrcMI->getOperand(1).setIsKill(false); - } else - // About to replace MI.getOperand(1), clear its kill flag. - MI.getOperand(1).setIsKill(false); - } - - LLVM_DEBUG(dbgs() << "With: "); - LLVM_DEBUG(MI.dump()); - - } else if ((isRunOfOnes((unsigned)(FinalMask.getZExtValue()), NewMB, NewME) && - NewMB <= NewME) || - SrcMaskFull) { - // Here we only handle MBMI <= MEMI case, so NewMB must be no bigger - // than NewME. Otherwise we get a 64 bit value after folding, but MI - // return a 32 bit value. - Simplified = true; - LLVM_DEBUG(dbgs() << "Converting Instr: "); - LLVM_DEBUG(MI.dump()); - - uint16_t NewSH = (SHSrc + SHMI) % 32; - MI.getOperand(2).setImm(NewSH); - // If SrcMI mask is full, no need to update MBMI and MEMI. - if (!SrcMaskFull) { - MI.getOperand(3).setImm(NewMB); - MI.getOperand(4).setImm(NewME); - } - MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg()); - if (SrcMI->getOperand(1).isKill()) { - MI.getOperand(1).setIsKill(true); - SrcMI->getOperand(1).setIsKill(false); - } else - // About to replace MI.getOperand(1), clear its kill flag. - MI.getOperand(1).setIsKill(false); - - LLVM_DEBUG(dbgs() << "To: "); - LLVM_DEBUG(MI.dump()); - } - if (Simplified & MRI->use_nodbg_empty(FoldingReg) && - !SrcMI->hasImplicitDef()) { - // If FoldingReg has no non-debug use and it has no implicit def (it - // is not RLWINMO or RLWINM8o), it's safe to delete its def SrcMI. - // Otherwise keep it. - *ToErase = SrcMI; - LLVM_DEBUG(dbgs() << "Delete dead instruction: "); - LLVM_DEBUG(SrcMI->dump()); - } - return Simplified; -} - +bool PPCInstrInfo::combineRLWINM(MachineInstr &MI, + MachineInstr **ToErase) const { + MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo(); + unsigned FoldingReg = MI.getOperand(1).getReg(); + if (!Register::isVirtualRegister(FoldingReg)) + return false; + MachineInstr *SrcMI = MRI->getVRegDef(FoldingReg); + if (SrcMI->getOpcode() != PPC::RLWINM && + SrcMI->getOpcode() != PPC::RLWINM_rec && + SrcMI->getOpcode() != PPC::RLWINM8 && + SrcMI->getOpcode() != PPC::RLWINM8_rec) + return false; + assert((MI.getOperand(2).isImm() && MI.getOperand(3).isImm() && + MI.getOperand(4).isImm() && SrcMI->getOperand(2).isImm() && + SrcMI->getOperand(3).isImm() && SrcMI->getOperand(4).isImm()) && + "Invalid PPC::RLWINM Instruction!"); + uint64_t SHSrc = SrcMI->getOperand(2).getImm(); + uint64_t SHMI = MI.getOperand(2).getImm(); + uint64_t MBSrc = SrcMI->getOperand(3).getImm(); + uint64_t MBMI = MI.getOperand(3).getImm(); + uint64_t MESrc = SrcMI->getOperand(4).getImm(); + uint64_t MEMI = MI.getOperand(4).getImm(); + + assert((MEMI < 32 && MESrc < 32 && MBMI < 32 && MBSrc < 32) && + "Invalid PPC::RLWINM Instruction!"); + // If MBMI is bigger than MEMI, we always can not get run of ones. + // RotatedSrcMask non-wrap: + // 0........31|32........63 + // RotatedSrcMask: B---E B---E + // MaskMI: -----------|--E B------ + // Result: ----- --- (Bad candidate) + // + // RotatedSrcMask wrap: + // 0........31|32........63 + // RotatedSrcMask: --E B----|--E B---- + // MaskMI: -----------|--E B------ + // Result: --- -----|--- ----- (Bad candidate) + // + // One special case is RotatedSrcMask is a full set mask. + // RotatedSrcMask full: + // 0........31|32........63 + // RotatedSrcMask: ------EB---|-------EB--- + // MaskMI: -----------|--E B------ + // Result: -----------|--- ------- (Good candidate) + + // Mark special case. + bool SrcMaskFull = (MBSrc - MESrc == 1) || (MBSrc == 0 && MESrc == 31); + + // For other MBMI > MEMI cases, just return. + if ((MBMI > MEMI) && !SrcMaskFull) + return false; + + // Handle MBMI <= MEMI cases. + APInt MaskMI = APInt::getBitsSetWithWrap(32, 32 - MEMI - 1, 32 - MBMI); + // In MI, we only need low 32 bits of SrcMI, just consider about low 32 + // bit of SrcMI mask. Note that in APInt, lowerest bit is at index 0, + // while in PowerPC ISA, lowerest bit is at index 63. + APInt MaskSrc = APInt::getBitsSetWithWrap(32, 32 - MESrc - 1, 32 - MBSrc); + + APInt RotatedSrcMask = MaskSrc.rotl(SHMI); + APInt FinalMask = RotatedSrcMask & MaskMI; + uint32_t NewMB, NewME; + bool Simplified = false; + + // If final mask is 0, MI result should be 0 too. + if (FinalMask.isNullValue()) { + bool Is64Bit = + (MI.getOpcode() == PPC::RLWINM8 || MI.getOpcode() == PPC::RLWINM8_rec); + Simplified = true; + LLVM_DEBUG(dbgs() << "Replace Instr: "); + LLVM_DEBUG(MI.dump()); + + if (MI.getOpcode() == PPC::RLWINM || MI.getOpcode() == PPC::RLWINM8) { + // Replace MI with "LI 0" + MI.RemoveOperand(4); + MI.RemoveOperand(3); + MI.RemoveOperand(2); + MI.getOperand(1).ChangeToImmediate(0); + MI.setDesc(get(Is64Bit ? PPC::LI8 : PPC::LI)); + } else { + // Replace MI with "ANDI_rec reg, 0" + MI.RemoveOperand(4); + MI.RemoveOperand(3); + MI.getOperand(2).setImm(0); + MI.setDesc(get(Is64Bit ? PPC::ANDI8_rec : PPC::ANDI_rec)); + MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg()); + if (SrcMI->getOperand(1).isKill()) { + MI.getOperand(1).setIsKill(true); + SrcMI->getOperand(1).setIsKill(false); + } else + // About to replace MI.getOperand(1), clear its kill flag. + MI.getOperand(1).setIsKill(false); + } + + LLVM_DEBUG(dbgs() << "With: "); + LLVM_DEBUG(MI.dump()); + + } else if ((isRunOfOnes((unsigned)(FinalMask.getZExtValue()), NewMB, NewME) && + NewMB <= NewME) || + SrcMaskFull) { + // Here we only handle MBMI <= MEMI case, so NewMB must be no bigger + // than NewME. Otherwise we get a 64 bit value after folding, but MI + // return a 32 bit value. + Simplified = true; + LLVM_DEBUG(dbgs() << "Converting Instr: "); + LLVM_DEBUG(MI.dump()); + + uint16_t NewSH = (SHSrc + SHMI) % 32; + MI.getOperand(2).setImm(NewSH); + // If SrcMI mask is full, no need to update MBMI and MEMI. + if (!SrcMaskFull) { + MI.getOperand(3).setImm(NewMB); + MI.getOperand(4).setImm(NewME); + } + MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg()); + if (SrcMI->getOperand(1).isKill()) { + MI.getOperand(1).setIsKill(true); + SrcMI->getOperand(1).setIsKill(false); + } else + // About to replace MI.getOperand(1), clear its kill flag. + MI.getOperand(1).setIsKill(false); + + LLVM_DEBUG(dbgs() << "To: "); + LLVM_DEBUG(MI.dump()); + } + if (Simplified & MRI->use_nodbg_empty(FoldingReg) && + !SrcMI->hasImplicitDef()) { + // If FoldingReg has no non-debug use and it has no implicit def (it + // is not RLWINMO or RLWINM8o), it's safe to delete its def SrcMI. + // Otherwise keep it. + *ToErase = SrcMI; + LLVM_DEBUG(dbgs() << "Delete dead instruction: "); + LLVM_DEBUG(SrcMI->dump()); + } + return Simplified; +} + bool PPCInstrInfo::instrHasImmForm(unsigned Opc, bool IsVFReg, ImmInstrInfo &III, bool PostRA) const { // The vast majority of the instructions would need their operand 2 replaced @@ -4539,20 +4539,20 @@ bool PPCInstrInfo::simplifyToLI(MachineInstr &MI, MachineInstr &DefMI, } return false; } - case PPC::SUBFIC: - case PPC::SUBFIC8: { - // Only transform this if the CARRY implicit operand is dead. - if (MI.getNumOperands() > 3 && !MI.getOperand(3).isDead()) - return false; - int64_t Minuend = MI.getOperand(2).getImm(); - if (isInt<16>(Minuend - SExtImm)) { - ReplaceWithLI = true; - Is64BitLI = Opc == PPC::SUBFIC8; - NewImm = Minuend - SExtImm; - break; - } - return false; - } + case PPC::SUBFIC: + case PPC::SUBFIC8: { + // Only transform this if the CARRY implicit operand is dead. + if (MI.getNumOperands() > 3 && !MI.getOperand(3).isDead()) + return false; + int64_t Minuend = MI.getOperand(2).getImm(); + if (isInt<16>(Minuend - SExtImm)) { + ReplaceWithLI = true; + Is64BitLI = Opc == PPC::SUBFIC8; + NewImm = Minuend - SExtImm; + break; + } + return false; + } case PPC::RLDICL: case PPC::RLDICL_rec: case PPC::RLDICL_32: @@ -5439,15 +5439,15 @@ MachineInstr *PPCInstrInfo::findLoopInstr( bool PPCInstrInfo::getMemOperandWithOffsetWidth( const MachineInstr &LdSt, const MachineOperand *&BaseReg, int64_t &Offset, unsigned &Width, const TargetRegisterInfo *TRI) const { - if (!LdSt.mayLoadOrStore() || LdSt.getNumExplicitOperands() != 3) + if (!LdSt.mayLoadOrStore() || LdSt.getNumExplicitOperands() != 3) return false; // Handle only loads/stores with base register followed by immediate offset. - if (!LdSt.getOperand(1).isImm() || - (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI())) + if (!LdSt.getOperand(1).isImm() || + (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI())) return false; - if (!LdSt.getOperand(1).isImm() || - (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI())) + if (!LdSt.getOperand(1).isImm() || + (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI())) return false; if (!LdSt.hasOneMemOperand()) diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/PPCInstrInfo.h b/contrib/libs/llvm12/lib/Target/PowerPC/PPCInstrInfo.h index 1f4fc87086..c6ef1742b7 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/PPCInstrInfo.h +++ b/contrib/libs/llvm12/lib/Target/PowerPC/PPCInstrInfo.h @@ -123,72 +123,72 @@ enum SpillOpcodeKey { SOK_VectorFloat8Spill, SOK_VectorFloat4Spill, SOK_SpillToVSR, - SOK_PairedVecSpill, - SOK_AccumulatorSpill, - SOK_UAccumulatorSpill, + SOK_PairedVecSpill, + SOK_AccumulatorSpill, + SOK_UAccumulatorSpill, SOK_SPESpill, SOK_LastOpcodeSpill // This must be last on the enum. }; // Define list of load and store spill opcodes. -#define NoInstr PPC::INSTRUCTION_LIST_END +#define NoInstr PPC::INSTRUCTION_LIST_END #define Pwr8LoadOpcodes \ { \ PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR, \ PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXVD2X, PPC::LXSDX, PPC::LXSSPX, \ - PPC::SPILLTOVSR_LD, NoInstr, NoInstr, NoInstr, PPC::EVLDD \ + PPC::SPILLTOVSR_LD, NoInstr, NoInstr, NoInstr, PPC::EVLDD \ } #define Pwr9LoadOpcodes \ { \ PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR, \ PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXV, PPC::DFLOADf64, \ - PPC::DFLOADf32, PPC::SPILLTOVSR_LD, NoInstr, NoInstr, NoInstr, NoInstr \ + PPC::DFLOADf32, PPC::SPILLTOVSR_LD, NoInstr, NoInstr, NoInstr, NoInstr \ + } + +#define Pwr10LoadOpcodes \ + { \ + PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR, \ + PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXV, PPC::DFLOADf64, \ + PPC::DFLOADf32, PPC::SPILLTOVSR_LD, PPC::LXVP, PPC::RESTORE_ACC, \ + PPC::RESTORE_UACC, NoInstr \ } -#define Pwr10LoadOpcodes \ - { \ - PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR, \ - PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXV, PPC::DFLOADf64, \ - PPC::DFLOADf32, PPC::SPILLTOVSR_LD, PPC::LXVP, PPC::RESTORE_ACC, \ - PPC::RESTORE_UACC, NoInstr \ - } - #define Pwr8StoreOpcodes \ { \ PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR, PPC::SPILL_CRBIT, \ - PPC::STVX, PPC::STXVD2X, PPC::STXSDX, PPC::STXSSPX, \ - PPC::SPILLTOVSR_ST, NoInstr, NoInstr, NoInstr, PPC::EVSTDD \ + PPC::STVX, PPC::STXVD2X, PPC::STXSDX, PPC::STXSSPX, \ + PPC::SPILLTOVSR_ST, NoInstr, NoInstr, NoInstr, PPC::EVSTDD \ } #define Pwr9StoreOpcodes \ { \ PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR, PPC::SPILL_CRBIT, \ PPC::STVX, PPC::STXV, PPC::DFSTOREf64, PPC::DFSTOREf32, \ - PPC::SPILLTOVSR_ST, NoInstr, NoInstr, NoInstr, NoInstr \ + PPC::SPILLTOVSR_ST, NoInstr, NoInstr, NoInstr, NoInstr \ + } + +#define Pwr10StoreOpcodes \ + { \ + PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR, PPC::SPILL_CRBIT, \ + PPC::STVX, PPC::STXV, PPC::DFSTOREf64, PPC::DFSTOREf32, \ + PPC::SPILLTOVSR_ST, PPC::STXVP, PPC::SPILL_ACC, PPC::SPILL_UACC, \ + NoInstr \ } -#define Pwr10StoreOpcodes \ - { \ - PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR, PPC::SPILL_CRBIT, \ - PPC::STVX, PPC::STXV, PPC::DFSTOREf64, PPC::DFSTOREf32, \ - PPC::SPILLTOVSR_ST, PPC::STXVP, PPC::SPILL_ACC, PPC::SPILL_UACC, \ - NoInstr \ - } - // Initialize arrays for load and store spill opcodes on supported subtargets. #define StoreOpcodesForSpill \ - { Pwr8StoreOpcodes, Pwr9StoreOpcodes, Pwr10StoreOpcodes } + { Pwr8StoreOpcodes, Pwr9StoreOpcodes, Pwr10StoreOpcodes } #define LoadOpcodesForSpill \ - { Pwr8LoadOpcodes, Pwr9LoadOpcodes, Pwr10LoadOpcodes } + { Pwr8LoadOpcodes, Pwr9LoadOpcodes, Pwr10LoadOpcodes } class PPCSubtarget; class PPCInstrInfo : public PPCGenInstrInfo { PPCSubtarget &Subtarget; const PPCRegisterInfo RI; - const unsigned StoreSpillOpcodesArray[3][SOK_LastOpcodeSpill] = + const unsigned StoreSpillOpcodesArray[3][SOK_LastOpcodeSpill] = StoreOpcodesForSpill; - const unsigned LoadSpillOpcodesArray[3][SOK_LastOpcodeSpill] = + const unsigned LoadSpillOpcodesArray[3][SOK_LastOpcodeSpill] = LoadOpcodesForSpill; void StoreRegToStackSlot(MachineFunction &MF, unsigned SrcReg, bool isKill, @@ -246,17 +246,17 @@ class PPCInstrInfo : public PPCGenInstrInfo { unsigned getSpillTarget() const; const unsigned *getStoreOpcodesForSpillArray() const; const unsigned *getLoadOpcodesForSpillArray() const; - unsigned getSpillIndex(const TargetRegisterClass *RC) const; + unsigned getSpillIndex(const TargetRegisterClass *RC) const; int16_t getFMAOpIdxInfo(unsigned Opcode) const; void reassociateFMA(MachineInstr &Root, MachineCombinerPattern Pattern, SmallVectorImpl<MachineInstr *> &InsInstrs, SmallVectorImpl<MachineInstr *> &DelInstrs, DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const; - bool isLoadFromConstantPool(MachineInstr *I) const; - Register - generateLoadForNewConst(unsigned Idx, MachineInstr *MI, Type *Ty, - SmallVectorImpl<MachineInstr *> &InsInstrs) const; - const Constant *getConstantFromConstantPool(MachineInstr *I) const; + bool isLoadFromConstantPool(MachineInstr *I) const; + Register + generateLoadForNewConst(unsigned Idx, MachineInstr *MI, Type *Ty, + SmallVectorImpl<MachineInstr *> &InsInstrs) const; + const Constant *getConstantFromConstantPool(MachineInstr *I) const; virtual void anchor(); protected: @@ -291,10 +291,10 @@ public: } static bool isSameClassPhysRegCopy(unsigned Opcode) { - unsigned CopyOpcodes[] = {PPC::OR, PPC::OR8, PPC::FMR, - PPC::VOR, PPC::XXLOR, PPC::XXLORf, - PPC::XSCPSGNDP, PPC::MCRF, PPC::CROR, - PPC::EVOR, -1U}; + unsigned CopyOpcodes[] = {PPC::OR, PPC::OR8, PPC::FMR, + PPC::VOR, PPC::XXLOR, PPC::XXLORf, + PPC::XSCPSGNDP, PPC::MCRF, PPC::CROR, + PPC::EVOR, -1U}; for (int i = 0; CopyOpcodes[i] != -1U; i++) if (Opcode == CopyOpcodes[i]) return true; @@ -348,30 +348,30 @@ public: /// chain ending in \p Root. All potential patterns are output in the \p /// P array. bool getFMAPatterns(MachineInstr &Root, - SmallVectorImpl<MachineCombinerPattern> &P, - bool DoRegPressureReduce) const; + SmallVectorImpl<MachineCombinerPattern> &P, + bool DoRegPressureReduce) const; /// Return true when there is potentially a faster code sequence /// for an instruction chain ending in <Root>. All potential patterns are /// output in the <Pattern> array. - bool getMachineCombinerPatterns(MachineInstr &Root, - SmallVectorImpl<MachineCombinerPattern> &P, - bool DoRegPressureReduce) const override; - - /// On PowerPC, we leverage machine combiner pass to reduce register pressure - /// when the register pressure is high for one BB. - /// Return true if register pressure for \p MBB is high and ABI is supported - /// to reduce register pressure. Otherwise return false. - bool - shouldReduceRegisterPressure(MachineBasicBlock *MBB, - RegisterClassInfo *RegClassInfo) const override; - - /// Fixup the placeholders we put in genAlternativeCodeSequence() for - /// MachineCombiner. - void - finalizeInsInstrs(MachineInstr &Root, MachineCombinerPattern &P, - SmallVectorImpl<MachineInstr *> &InsInstrs) const override; - + bool getMachineCombinerPatterns(MachineInstr &Root, + SmallVectorImpl<MachineCombinerPattern> &P, + bool DoRegPressureReduce) const override; + + /// On PowerPC, we leverage machine combiner pass to reduce register pressure + /// when the register pressure is high for one BB. + /// Return true if register pressure for \p MBB is high and ABI is supported + /// to reduce register pressure. Otherwise return false. + bool + shouldReduceRegisterPressure(MachineBasicBlock *MBB, + RegisterClassInfo *RegClassInfo) const override; + + /// Fixup the placeholders we put in genAlternativeCodeSequence() for + /// MachineCombiner. + void + finalizeInsInstrs(MachineInstr &Root, MachineCombinerPattern &P, + SmallVectorImpl<MachineInstr *> &InsInstrs) const override; + bool isAssociativeAndCommutative(const MachineInstr &Inst) const override; /// On PowerPC, we try to reassociate FMA chain which will increase @@ -503,18 +503,18 @@ public: // Predication support. bool isPredicated(const MachineInstr &MI) const override; - bool isSchedulingBoundary(const MachineInstr &MI, - const MachineBasicBlock *MBB, - const MachineFunction &MF) const override; - + bool isSchedulingBoundary(const MachineInstr &MI, + const MachineBasicBlock *MBB, + const MachineFunction &MF) const override; + bool PredicateInstruction(MachineInstr &MI, ArrayRef<MachineOperand> Pred) const override; bool SubsumesPredicate(ArrayRef<MachineOperand> Pred1, ArrayRef<MachineOperand> Pred2) const override; - bool ClobbersPredicate(MachineInstr &MI, std::vector<MachineOperand> &Pred, - bool SkipDead) const override; + bool ClobbersPredicate(MachineInstr &MI, std::vector<MachineOperand> &Pred, + bool SkipDead) const override; // Comparison optimization. @@ -534,20 +534,20 @@ public: int64_t &Offset, unsigned &Width, const TargetRegisterInfo *TRI) const; - /// Get the base operand and byte offset of an instruction that reads/writes - /// memory. - bool getMemOperandsWithOffsetWidth( - const MachineInstr &LdSt, - SmallVectorImpl<const MachineOperand *> &BaseOps, int64_t &Offset, - bool &OffsetIsScalable, unsigned &Width, - const TargetRegisterInfo *TRI) const override; - - /// Returns true if the two given memory operations should be scheduled - /// adjacent. - bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1, - ArrayRef<const MachineOperand *> BaseOps2, - unsigned NumLoads, unsigned NumBytes) const override; - + /// Get the base operand and byte offset of an instruction that reads/writes + /// memory. + bool getMemOperandsWithOffsetWidth( + const MachineInstr &LdSt, + SmallVectorImpl<const MachineOperand *> &BaseOps, int64_t &Offset, + bool &OffsetIsScalable, unsigned &Width, + const TargetRegisterInfo *TRI) const override; + + /// Returns true if the two given memory operations should be scheduled + /// adjacent. + bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1, + ArrayRef<const MachineOperand *> BaseOps2, + unsigned NumLoads, unsigned NumBytes) const override; + /// Return true if two MIs access different memory addresses and false /// otherwise bool @@ -605,7 +605,7 @@ public: bool convertToImmediateForm(MachineInstr &MI, MachineInstr **KilledDef = nullptr) const; bool foldFrameOffset(MachineInstr &MI) const; - bool combineRLWINM(MachineInstr &MI, MachineInstr **ToErase = nullptr) const; + bool combineRLWINM(MachineInstr &MI, MachineInstr **ToErase = nullptr) const; bool isADDIInstrEligibleForFolding(MachineInstr &ADDIMI, int64_t &Imm) const; bool isADDInstrEligibleForFolding(MachineInstr &ADDMI) const; bool isImmInstrEligibleForFolding(MachineInstr &MI, unsigned &BaseReg, diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/PPCInstrInfo.td b/contrib/libs/llvm12/lib/Target/PowerPC/PPCInstrInfo.td index a063963821..724af23542 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/PPCInstrInfo.td +++ b/contrib/libs/llvm12/lib/Target/PowerPC/PPCInstrInfo.td @@ -74,9 +74,9 @@ def SDT_PPCcondbr : SDTypeProfile<0, 3, [ SDTCisVT<0, i32>, SDTCisVT<2, OtherVT> ]>; -def SDT_PPCFtsqrt : SDTypeProfile<1, 1, [ - SDTCisVT<0, i32>]>; - +def SDT_PPCFtsqrt : SDTypeProfile<1, 1, [ + SDTCisVT<0, i32>]>; + def SDT_PPClbrx : SDTypeProfile<1, 2, [ SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT> ]>; @@ -127,8 +127,8 @@ def SDT_PPCFPMinMax : SDTypeProfile<1, 2, [ def PPCfre : SDNode<"PPCISD::FRE", SDTFPUnaryOp, []>; def PPCfrsqrte: SDNode<"PPCISD::FRSQRTE", SDTFPUnaryOp, []>; -def PPCfsqrt : SDNode<"PPCISD::FSQRT", SDTFPUnaryOp, []>; -def PPCftsqrt : SDNode<"PPCISD::FTSQRT", SDT_PPCFtsqrt,[]>; +def PPCfsqrt : SDNode<"PPCISD::FSQRT", SDTFPUnaryOp, []>; +def PPCftsqrt : SDNode<"PPCISD::FTSQRT", SDT_PPCFtsqrt,[]>; def PPCfcfid : SDNode<"PPCISD::FCFID", SDTFPUnaryOp, []>; def PPCfcfidu : SDNode<"PPCISD::FCFIDU", SDTFPUnaryOp, []>; @@ -139,28 +139,28 @@ def PPCfctiwz : SDNode<"PPCISD::FCTIWZ", SDTFPUnaryOp, []>; def PPCfctiduz: SDNode<"PPCISD::FCTIDUZ",SDTFPUnaryOp, []>; def PPCfctiwuz: SDNode<"PPCISD::FCTIWUZ",SDTFPUnaryOp, []>; -def PPCstrict_fcfid : SDNode<"PPCISD::STRICT_FCFID", - SDTFPUnaryOp, [SDNPHasChain]>; -def PPCstrict_fcfidu : SDNode<"PPCISD::STRICT_FCFIDU", - SDTFPUnaryOp, [SDNPHasChain]>; -def PPCstrict_fcfids : SDNode<"PPCISD::STRICT_FCFIDS", - SDTFPRoundOp, [SDNPHasChain]>; -def PPCstrict_fcfidus : SDNode<"PPCISD::STRICT_FCFIDUS", - SDTFPRoundOp, [SDNPHasChain]>; - -def PPCany_fcfid : PatFrags<(ops node:$op), - [(PPCfcfid node:$op), - (PPCstrict_fcfid node:$op)]>; -def PPCany_fcfidu : PatFrags<(ops node:$op), - [(PPCfcfidu node:$op), - (PPCstrict_fcfidu node:$op)]>; -def PPCany_fcfids : PatFrags<(ops node:$op), - [(PPCfcfids node:$op), - (PPCstrict_fcfids node:$op)]>; -def PPCany_fcfidus : PatFrags<(ops node:$op), - [(PPCfcfidus node:$op), - (PPCstrict_fcfidus node:$op)]>; - +def PPCstrict_fcfid : SDNode<"PPCISD::STRICT_FCFID", + SDTFPUnaryOp, [SDNPHasChain]>; +def PPCstrict_fcfidu : SDNode<"PPCISD::STRICT_FCFIDU", + SDTFPUnaryOp, [SDNPHasChain]>; +def PPCstrict_fcfids : SDNode<"PPCISD::STRICT_FCFIDS", + SDTFPRoundOp, [SDNPHasChain]>; +def PPCstrict_fcfidus : SDNode<"PPCISD::STRICT_FCFIDUS", + SDTFPRoundOp, [SDNPHasChain]>; + +def PPCany_fcfid : PatFrags<(ops node:$op), + [(PPCfcfid node:$op), + (PPCstrict_fcfid node:$op)]>; +def PPCany_fcfidu : PatFrags<(ops node:$op), + [(PPCfcfidu node:$op), + (PPCstrict_fcfidu node:$op)]>; +def PPCany_fcfids : PatFrags<(ops node:$op), + [(PPCfcfids node:$op), + (PPCstrict_fcfids node:$op)]>; +def PPCany_fcfidus : PatFrags<(ops node:$op), + [(PPCfcfidus node:$op), + (PPCstrict_fcfidus node:$op)]>; + def PPCcv_fp_to_uint_in_vsr: SDNode<"PPCISD::FP_TO_UINT_IN_VSR", SDT_PPCcv_fp_to_int, []>; def PPCcv_fp_to_sint_in_vsr: @@ -187,12 +187,12 @@ def PPCmffs : SDNode<"PPCISD::MFFS", // Perform FADD in round-to-zero mode. def PPCfaddrtz: SDNode<"PPCISD::FADDRTZ", SDTFPBinOp, []>; -def PPCstrict_faddrtz: SDNode<"PPCISD::STRICT_FADDRTZ", SDTFPBinOp, - [SDNPHasChain]>; +def PPCstrict_faddrtz: SDNode<"PPCISD::STRICT_FADDRTZ", SDTFPBinOp, + [SDNPHasChain]>; -def PPCany_faddrtz: PatFrags<(ops node:$lhs, node:$rhs), - [(PPCfaddrtz node:$lhs, node:$rhs), - (PPCstrict_faddrtz node:$lhs, node:$rhs)]>; +def PPCany_faddrtz: PatFrags<(ops node:$lhs, node:$rhs), + [(PPCfaddrtz node:$lhs, node:$rhs), + (PPCstrict_faddrtz node:$lhs, node:$rhs)]>; def PPCfsel : SDNode<"PPCISD::FSEL", // Type constraint for fsel. @@ -227,7 +227,7 @@ def PPCaddiTlsldLAddr : SDNode<"PPCISD::ADDI_TLSLD_L_ADDR", SDTCisSameAs<0, 3>, SDTCisInt<0> ]>>; def PPCaddisDtprelHA : SDNode<"PPCISD::ADDIS_DTPREL_HA", SDTIntBinOp>; def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>; -def PPCpaddiDtprel : SDNode<"PPCISD::PADDI_DTPREL", SDTIntBinOp>; +def PPCpaddiDtprel : SDNode<"PPCISD::PADDI_DTPREL", SDTIntBinOp>; def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>; def PPCxxsplt : SDNode<"PPCISD::XXSPLT", SDT_PPCVecSplat, []>; @@ -248,28 +248,28 @@ def PPCfnmsub : SDNode<"PPCISD::FNMSUB" , SDTFPTernaryOp>; def PPCextswsli : SDNode<"PPCISD::EXTSWSLI" , SDT_PPCextswsli>; -def PPCstrict_fctidz : SDNode<"PPCISD::STRICT_FCTIDZ", - SDTFPUnaryOp, [SDNPHasChain]>; -def PPCstrict_fctiwz : SDNode<"PPCISD::STRICT_FCTIWZ", - SDTFPUnaryOp, [SDNPHasChain]>; -def PPCstrict_fctiduz : SDNode<"PPCISD::STRICT_FCTIDUZ", - SDTFPUnaryOp, [SDNPHasChain]>; -def PPCstrict_fctiwuz : SDNode<"PPCISD::STRICT_FCTIWUZ", - SDTFPUnaryOp, [SDNPHasChain]>; - -def PPCany_fctidz : PatFrags<(ops node:$op), - [(PPCstrict_fctidz node:$op), - (PPCfctidz node:$op)]>; -def PPCany_fctiwz : PatFrags<(ops node:$op), - [(PPCstrict_fctiwz node:$op), - (PPCfctiwz node:$op)]>; -def PPCany_fctiduz : PatFrags<(ops node:$op), - [(PPCstrict_fctiduz node:$op), - (PPCfctiduz node:$op)]>; -def PPCany_fctiwuz : PatFrags<(ops node:$op), - [(PPCstrict_fctiwuz node:$op), - (PPCfctiwuz node:$op)]>; - +def PPCstrict_fctidz : SDNode<"PPCISD::STRICT_FCTIDZ", + SDTFPUnaryOp, [SDNPHasChain]>; +def PPCstrict_fctiwz : SDNode<"PPCISD::STRICT_FCTIWZ", + SDTFPUnaryOp, [SDNPHasChain]>; +def PPCstrict_fctiduz : SDNode<"PPCISD::STRICT_FCTIDUZ", + SDTFPUnaryOp, [SDNPHasChain]>; +def PPCstrict_fctiwuz : SDNode<"PPCISD::STRICT_FCTIWUZ", + SDTFPUnaryOp, [SDNPHasChain]>; + +def PPCany_fctidz : PatFrags<(ops node:$op), + [(PPCstrict_fctidz node:$op), + (PPCfctidz node:$op)]>; +def PPCany_fctiwz : PatFrags<(ops node:$op), + [(PPCstrict_fctiwz node:$op), + (PPCfctiwz node:$op)]>; +def PPCany_fctiduz : PatFrags<(ops node:$op), + [(PPCstrict_fctiduz node:$op), + (PPCfctiduz node:$op)]>; +def PPCany_fctiwuz : PatFrags<(ops node:$op), + [(PPCstrict_fctiwuz node:$op), + (PPCfctiwuz node:$op)]>; + // Move 2 i64 values into a VSX register def PPCbuild_fp128: SDNode<"PPCISD::BUILD_FP128", SDTypeProfile<1, 2, @@ -340,7 +340,7 @@ def PPCrfebb : SDNode<"PPCISD::RFEBB", SDT_PPCsc, [SDNPHasChain, SDNPSideEffect]>; def PPCvcmp : SDNode<"PPCISD::VCMP" , SDT_PPCvcmp, []>; -def PPCvcmp_rec : SDNode<"PPCISD::VCMP_rec", SDT_PPCvcmp, [SDNPOutGlue]>; +def PPCvcmp_rec : SDNode<"PPCISD::VCMP_rec", SDT_PPCvcmp, [SDNPOutGlue]>; def PPCcondbranch : SDNode<"PPCISD::COND_BRANCH", SDT_PPCcondbr, [SDNPHasChain, SDNPOptInGlue]>; @@ -372,10 +372,10 @@ def PPCprobedalloca : SDNode<"PPCISD::PROBED_ALLOCA", SDTDynOp, [SDNPHasChain]>; // PC Relative Specific Nodes def PPCmatpcreladdr : SDNode<"PPCISD::MAT_PCREL_ADDR", SDTIntUnaryOp, []>; -def PPCtlsdynamatpcreladdr : SDNode<"PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR", - SDTIntUnaryOp, []>; -def PPCtlslocalexecmataddr : SDNode<"PPCISD::TLS_LOCAL_EXEC_MAT_ADDR", - SDTIntUnaryOp, []>; +def PPCtlsdynamatpcreladdr : SDNode<"PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR", + SDTIntUnaryOp, []>; +def PPCtlslocalexecmataddr : SDNode<"PPCISD::TLS_LOCAL_EXEC_MAT_ADDR", + SDTIntUnaryOp, []>; //===----------------------------------------------------------------------===// // PowerPC specific transformation functions and pattern fragments. @@ -495,41 +495,41 @@ def imm64ZExt32 : Operand<i64>, ImmLeaf<i64, [{ return isUInt<32>(Imm); }]>; -// This is a somewhat weaker condition than actually checking for 4-byte -// alignment. It is simply checking that the displacement can be represented -// as an immediate that is a multiple of 4 (i.e. the requirements for DS-Form -// instructions). -// But some r+i load/store instructions (such as LD, STD, LDU, etc.) that require +// This is a somewhat weaker condition than actually checking for 4-byte +// alignment. It is simply checking that the displacement can be represented +// as an immediate that is a multiple of 4 (i.e. the requirements for DS-Form +// instructions). +// But some r+i load/store instructions (such as LD, STD, LDU, etc.) that require // restricted memrix (4-aligned) constants are alignment sensitive. If these // offsets are hidden behind TOC entries than the values of the lower-order // bits cannot be checked directly. As a result, we need to also incorporate // an alignment check into the relevant patterns. -def DSFormLoad : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - return isOffsetMultipleOf(N, 4) || cast<LoadSDNode>(N)->getAlignment() >= 4; +def DSFormLoad : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return isOffsetMultipleOf(N, 4) || cast<LoadSDNode>(N)->getAlignment() >= 4; }]>; -def DSFormStore : PatFrag<(ops node:$val, node:$ptr), +def DSFormStore : PatFrag<(ops node:$val, node:$ptr), (store node:$val, node:$ptr), [{ - return isOffsetMultipleOf(N, 4) || cast<StoreSDNode>(N)->getAlignment() >= 4; + return isOffsetMultipleOf(N, 4) || cast<StoreSDNode>(N)->getAlignment() >= 4; }]>; -def DSFormSextLoadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{ - return isOffsetMultipleOf(N, 4) || cast<LoadSDNode>(N)->getAlignment() >= 4; +def DSFormSextLoadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{ + return isOffsetMultipleOf(N, 4) || cast<LoadSDNode>(N)->getAlignment() >= 4; }]>; -def DSFormPreStore : PatFrag< +def DSFormPreStore : PatFrag< (ops node:$val, node:$base, node:$offset), (pre_store node:$val, node:$base, node:$offset), [{ - return isOffsetMultipleOf(N, 4) || cast<StoreSDNode>(N)->getAlignment() >= 4; + return isOffsetMultipleOf(N, 4) || cast<StoreSDNode>(N)->getAlignment() >= 4; }]>; -def NonDSFormLoad : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - return cast<LoadSDNode>(N)->getAlignment() < 4 && !isOffsetMultipleOf(N, 4); +def NonDSFormLoad : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast<LoadSDNode>(N)->getAlignment() < 4 && !isOffsetMultipleOf(N, 4); }]>; -def NonDSFormStore : PatFrag<(ops node:$val, node:$ptr), +def NonDSFormStore : PatFrag<(ops node:$val, node:$ptr), (store node:$val, node:$ptr), [{ - return cast<StoreSDNode>(N)->getAlignment() < 4 && !isOffsetMultipleOf(N, 4); + return cast<StoreSDNode>(N)->getAlignment() < 4 && !isOffsetMultipleOf(N, 4); }]>; -def NonDSFormSextLoadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{ - return cast<LoadSDNode>(N)->getAlignment() < 4 && !isOffsetMultipleOf(N, 4); +def NonDSFormSextLoadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{ + return cast<LoadSDNode>(N)->getAlignment() < 4 && !isOffsetMultipleOf(N, 4); }]>; // This is a somewhat weaker condition than actually checking for 16-byte @@ -670,7 +670,7 @@ def PPCU1ImmAsmOperand : AsmOperandClass { def u1imm : Operand<i32> { let PrintMethod = "printU1ImmOperand"; let ParserMatchClass = PPCU1ImmAsmOperand; - let OperandType = "OPERAND_IMMEDIATE"; + let OperandType = "OPERAND_IMMEDIATE"; } def PPCU2ImmAsmOperand : AsmOperandClass { @@ -680,7 +680,7 @@ def PPCU2ImmAsmOperand : AsmOperandClass { def u2imm : Operand<i32> { let PrintMethod = "printU2ImmOperand"; let ParserMatchClass = PPCU2ImmAsmOperand; - let OperandType = "OPERAND_IMMEDIATE"; + let OperandType = "OPERAND_IMMEDIATE"; } def PPCATBitsAsHintAsmOperand : AsmOperandClass { @@ -690,7 +690,7 @@ def PPCATBitsAsHintAsmOperand : AsmOperandClass { def atimm : Operand<i32> { let PrintMethod = "printATBitsAsHint"; let ParserMatchClass = PPCATBitsAsHintAsmOperand; - let OperandType = "OPERAND_IMMEDIATE"; + let OperandType = "OPERAND_IMMEDIATE"; } def PPCU3ImmAsmOperand : AsmOperandClass { @@ -700,7 +700,7 @@ def PPCU3ImmAsmOperand : AsmOperandClass { def u3imm : Operand<i32> { let PrintMethod = "printU3ImmOperand"; let ParserMatchClass = PPCU3ImmAsmOperand; - let OperandType = "OPERAND_IMMEDIATE"; + let OperandType = "OPERAND_IMMEDIATE"; } def PPCU4ImmAsmOperand : AsmOperandClass { @@ -710,7 +710,7 @@ def PPCU4ImmAsmOperand : AsmOperandClass { def u4imm : Operand<i32> { let PrintMethod = "printU4ImmOperand"; let ParserMatchClass = PPCU4ImmAsmOperand; - let OperandType = "OPERAND_IMMEDIATE"; + let OperandType = "OPERAND_IMMEDIATE"; } def PPCS5ImmAsmOperand : AsmOperandClass { let Name = "S5Imm"; let PredicateMethod = "isS5Imm"; @@ -720,7 +720,7 @@ def s5imm : Operand<i32> { let PrintMethod = "printS5ImmOperand"; let ParserMatchClass = PPCS5ImmAsmOperand; let DecoderMethod = "decodeSImmOperand<5>"; - let OperandType = "OPERAND_IMMEDIATE"; + let OperandType = "OPERAND_IMMEDIATE"; } def PPCU5ImmAsmOperand : AsmOperandClass { let Name = "U5Imm"; let PredicateMethod = "isU5Imm"; @@ -730,7 +730,7 @@ def u5imm : Operand<i32> { let PrintMethod = "printU5ImmOperand"; let ParserMatchClass = PPCU5ImmAsmOperand; let DecoderMethod = "decodeUImmOperand<5>"; - let OperandType = "OPERAND_IMMEDIATE"; + let OperandType = "OPERAND_IMMEDIATE"; } def PPCU6ImmAsmOperand : AsmOperandClass { let Name = "U6Imm"; let PredicateMethod = "isU6Imm"; @@ -740,7 +740,7 @@ def u6imm : Operand<i32> { let PrintMethod = "printU6ImmOperand"; let ParserMatchClass = PPCU6ImmAsmOperand; let DecoderMethod = "decodeUImmOperand<6>"; - let OperandType = "OPERAND_IMMEDIATE"; + let OperandType = "OPERAND_IMMEDIATE"; } def PPCU7ImmAsmOperand : AsmOperandClass { let Name = "U7Imm"; let PredicateMethod = "isU7Imm"; @@ -750,7 +750,7 @@ def u7imm : Operand<i32> { let PrintMethod = "printU7ImmOperand"; let ParserMatchClass = PPCU7ImmAsmOperand; let DecoderMethod = "decodeUImmOperand<7>"; - let OperandType = "OPERAND_IMMEDIATE"; + let OperandType = "OPERAND_IMMEDIATE"; } def PPCU8ImmAsmOperand : AsmOperandClass { let Name = "U8Imm"; let PredicateMethod = "isU8Imm"; @@ -760,7 +760,7 @@ def u8imm : Operand<i32> { let PrintMethod = "printU8ImmOperand"; let ParserMatchClass = PPCU8ImmAsmOperand; let DecoderMethod = "decodeUImmOperand<8>"; - let OperandType = "OPERAND_IMMEDIATE"; + let OperandType = "OPERAND_IMMEDIATE"; } def PPCU10ImmAsmOperand : AsmOperandClass { let Name = "U10Imm"; let PredicateMethod = "isU10Imm"; @@ -770,7 +770,7 @@ def u10imm : Operand<i32> { let PrintMethod = "printU10ImmOperand"; let ParserMatchClass = PPCU10ImmAsmOperand; let DecoderMethod = "decodeUImmOperand<10>"; - let OperandType = "OPERAND_IMMEDIATE"; + let OperandType = "OPERAND_IMMEDIATE"; } def PPCU12ImmAsmOperand : AsmOperandClass { let Name = "U12Imm"; let PredicateMethod = "isU12Imm"; @@ -780,7 +780,7 @@ def u12imm : Operand<i32> { let PrintMethod = "printU12ImmOperand"; let ParserMatchClass = PPCU12ImmAsmOperand; let DecoderMethod = "decodeUImmOperand<12>"; - let OperandType = "OPERAND_IMMEDIATE"; + let OperandType = "OPERAND_IMMEDIATE"; } def PPCS16ImmAsmOperand : AsmOperandClass { let Name = "S16Imm"; let PredicateMethod = "isS16Imm"; @@ -791,7 +791,7 @@ def s16imm : Operand<i32> { let EncoderMethod = "getImm16Encoding"; let ParserMatchClass = PPCS16ImmAsmOperand; let DecoderMethod = "decodeSImmOperand<16>"; - let OperandType = "OPERAND_IMMEDIATE"; + let OperandType = "OPERAND_IMMEDIATE"; } def PPCU16ImmAsmOperand : AsmOperandClass { let Name = "U16Imm"; let PredicateMethod = "isU16Imm"; @@ -802,7 +802,7 @@ def u16imm : Operand<i32> { let EncoderMethod = "getImm16Encoding"; let ParserMatchClass = PPCU16ImmAsmOperand; let DecoderMethod = "decodeUImmOperand<16>"; - let OperandType = "OPERAND_IMMEDIATE"; + let OperandType = "OPERAND_IMMEDIATE"; } def PPCS17ImmAsmOperand : AsmOperandClass { let Name = "S17Imm"; let PredicateMethod = "isS17Imm"; @@ -816,7 +816,7 @@ def s17imm : Operand<i32> { let EncoderMethod = "getImm16Encoding"; let ParserMatchClass = PPCS17ImmAsmOperand; let DecoderMethod = "decodeSImmOperand<16>"; - let OperandType = "OPERAND_IMMEDIATE"; + let OperandType = "OPERAND_IMMEDIATE"; } def PPCS34ImmAsmOperand : AsmOperandClass { let Name = "S34Imm"; @@ -825,18 +825,18 @@ def PPCS34ImmAsmOperand : AsmOperandClass { } def s34imm : Operand<i64> { let PrintMethod = "printS34ImmOperand"; - let EncoderMethod = "getImm34EncodingNoPCRel"; + let EncoderMethod = "getImm34EncodingNoPCRel"; let ParserMatchClass = PPCS34ImmAsmOperand; let DecoderMethod = "decodeSImmOperand<34>"; - let OperandType = "OPERAND_IMMEDIATE"; -} -def s34imm_pcrel : Operand<i64> { - let PrintMethod = "printS34ImmOperand"; - let EncoderMethod = "getImm34EncodingPCRel"; - let ParserMatchClass = PPCS34ImmAsmOperand; - let DecoderMethod = "decodeSImmOperand<34>"; - let OperandType = "OPERAND_IMMEDIATE"; -} + let OperandType = "OPERAND_IMMEDIATE"; +} +def s34imm_pcrel : Operand<i64> { + let PrintMethod = "printS34ImmOperand"; + let EncoderMethod = "getImm34EncodingPCRel"; + let ParserMatchClass = PPCS34ImmAsmOperand; + let DecoderMethod = "decodeSImmOperand<34>"; + let OperandType = "OPERAND_IMMEDIATE"; +} def PPCImmZeroAsmOperand : AsmOperandClass { let Name = "ImmZero"; let PredicateMethod = "isImmZero"; @@ -846,7 +846,7 @@ def immZero : Operand<i32> { let PrintMethod = "printImmZeroOperand"; let ParserMatchClass = PPCImmZeroAsmOperand; let DecoderMethod = "decodeImmZeroOperand"; - let OperandType = "OPERAND_IMMEDIATE"; + let OperandType = "OPERAND_IMMEDIATE"; } def fpimm0 : PatLeaf<(fpimm), [{ return N->isExactlyValue(+0.0); }]>; @@ -992,47 +992,47 @@ def memri : Operand<iPTR> { let MIOperandInfo = (ops dispRI:$imm, ptr_rc_nor0:$reg); let EncoderMethod = "getMemRIEncoding"; let DecoderMethod = "decodeMemRIOperands"; - let OperandType = "OPERAND_MEMORY"; + let OperandType = "OPERAND_MEMORY"; } def memrr : Operand<iPTR> { let PrintMethod = "printMemRegReg"; let MIOperandInfo = (ops ptr_rc_nor0:$ptrreg, ptr_rc_idx:$offreg); - let OperandType = "OPERAND_MEMORY"; + let OperandType = "OPERAND_MEMORY"; } def memrix : Operand<iPTR> { // memri where the imm is 4-aligned. let PrintMethod = "printMemRegImm"; let MIOperandInfo = (ops dispRIX:$imm, ptr_rc_nor0:$reg); let EncoderMethod = "getMemRIXEncoding"; let DecoderMethod = "decodeMemRIXOperands"; - let OperandType = "OPERAND_MEMORY"; + let OperandType = "OPERAND_MEMORY"; } def memrix16 : Operand<iPTR> { // memri, imm is 16-aligned, 12-bit, Inst{16:27} let PrintMethod = "printMemRegImm"; let MIOperandInfo = (ops dispRIX16:$imm, ptr_rc_nor0:$reg); let EncoderMethod = "getMemRIX16Encoding"; let DecoderMethod = "decodeMemRIX16Operands"; - let OperandType = "OPERAND_MEMORY"; + let OperandType = "OPERAND_MEMORY"; } def spe8dis : Operand<iPTR> { // SPE displacement where the imm is 8-aligned. let PrintMethod = "printMemRegImm"; let MIOperandInfo = (ops dispSPE8:$imm, ptr_rc_nor0:$reg); let EncoderMethod = "getSPE8DisEncoding"; let DecoderMethod = "decodeSPE8Operands"; - let OperandType = "OPERAND_MEMORY"; + let OperandType = "OPERAND_MEMORY"; } def spe4dis : Operand<iPTR> { // SPE displacement where the imm is 4-aligned. let PrintMethod = "printMemRegImm"; let MIOperandInfo = (ops dispSPE4:$imm, ptr_rc_nor0:$reg); let EncoderMethod = "getSPE4DisEncoding"; let DecoderMethod = "decodeSPE4Operands"; - let OperandType = "OPERAND_MEMORY"; + let OperandType = "OPERAND_MEMORY"; } def spe2dis : Operand<iPTR> { // SPE displacement where the imm is 2-aligned. let PrintMethod = "printMemRegImm"; let MIOperandInfo = (ops dispSPE2:$imm, ptr_rc_nor0:$reg); let EncoderMethod = "getSPE2DisEncoding"; let DecoderMethod = "decodeSPE2Operands"; - let OperandType = "OPERAND_MEMORY"; + let OperandType = "OPERAND_MEMORY"; } // A single-register address. This is used with the SjLj @@ -1040,7 +1040,7 @@ def spe2dis : Operand<iPTR> { // SPE displacement where the imm is 2-aligned. // G8RC_NOX0 registers. def memr : Operand<iPTR> { let MIOperandInfo = (ops ptr_rc_nor0:$ptrreg); - let OperandType = "OPERAND_MEMORY"; + let OperandType = "OPERAND_MEMORY"; } def PPCTLSRegOperand : AsmOperandClass { let Name = "TLSReg"; let PredicateMethod = "isTLSReg"; @@ -1066,13 +1066,13 @@ def pred : Operand<OtherVT> { // Define PowerPC specific addressing mode. // d-form -def iaddr : ComplexPattern<iPTR, 2, "SelectAddrImm", [], []>; // "stb" +def iaddr : ComplexPattern<iPTR, 2, "SelectAddrImm", [], []>; // "stb" // ds-form -def iaddrX4 : ComplexPattern<iPTR, 2, "SelectAddrImmX4", [], []>; // "std" +def iaddrX4 : ComplexPattern<iPTR, 2, "SelectAddrImmX4", [], []>; // "std" // dq-form -def iaddrX16 : ComplexPattern<iPTR, 2, "SelectAddrImmX16", [], []>; // "stxv" -// 8LS:d-form -def iaddrX34 : ComplexPattern<iPTR, 2, "SelectAddrImmX34", [], []>; // "pstxvp" +def iaddrX16 : ComplexPattern<iPTR, 2, "SelectAddrImmX16", [], []>; // "stxv" +// 8LS:d-form +def iaddrX34 : ComplexPattern<iPTR, 2, "SelectAddrImmX34", [], []>; // "pstxvp" // Below forms are all x-form addressing mode, use three different ones so we // can make a accurate check for x-form instructions in ISEL. @@ -1118,12 +1118,12 @@ def HasExtDiv : Predicate<"Subtarget->hasExtDiv()">; def IsISA3_0 : Predicate<"Subtarget->isISA3_0()">; def HasFPU : Predicate<"Subtarget->hasFPU()">; def PCRelativeMemops : Predicate<"Subtarget->hasPCRelativeMemops()">; -def IsNotISA3_1 : Predicate<"!Subtarget->isISA3_1()">; +def IsNotISA3_1 : Predicate<"!Subtarget->isISA3_1()">; + +// AIX assembler may not be modern enough to support some extended mne. +def ModernAs: Predicate<"!Subtarget->isAIXABI() || Subtarget->HasModernAIXAs">, + AssemblerPredicate<(any_of (not AIXOS), FeatureModernAIXAs)>; -// AIX assembler may not be modern enough to support some extended mne. -def ModernAs: Predicate<"!Subtarget->isAIXABI() || Subtarget->HasModernAIXAs">, - AssemblerPredicate<(any_of (not AIXOS), FeatureModernAIXAs)>; - //===----------------------------------------------------------------------===// // PowerPC Multiclass Definitions. @@ -1481,7 +1481,7 @@ def ADJCALLSTACKUP : PPCEmitTimePseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2 "#ADJCALLSTACKUP $amt1 $amt2", [(callseq_end timm:$amt1, timm:$amt2)]>; } -} // hasCtrlDep +} // hasCtrlDep let Defs = [R1], Uses = [R1] in def DYNALLOC : PPCEmitTimePseudo<(outs gprc:$result), (ins gprc:$negsize, memri:$fpsi), "#DYNALLOC", @@ -1607,9 +1607,9 @@ def SETRNDi : PPCCustomInserterPseudo<(outs f8rc:$FRT), (ins u2imm:$RND), def SETRND : PPCCustomInserterPseudo<(outs f8rc:$FRT), (ins gprc:$in), "#SETRND", [(set f64:$FRT, (int_ppc_setrnd gprc :$in))]>; - -def SETFLM : PPCCustomInserterPseudo<(outs f8rc:$FRT), (ins f8rc:$FLM), - "#SETFLM", [(set f64:$FRT, (int_ppc_setflm f8rc:$FLM))]>; + +def SETFLM : PPCCustomInserterPseudo<(outs f8rc:$FRT), (ins f8rc:$FLM), + "#SETFLM", [(set f64:$FRT, (int_ppc_setflm f8rc:$FLM))]>; } let Defs = [LR] in @@ -1659,12 +1659,12 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { def BCn : BForm_4<16, 4, 0, 0, (outs), (ins crbitrc:$bi, condbrtarget:$dst), "bc 4, $bi, $dst">; - let isReturn = 1, Uses = [LR, RM] in { + let isReturn = 1, Uses = [LR, RM] in { def BCLR : XLForm_2_br2<19, 16, 12, 0, (outs), (ins crbitrc:$bi), "bclr 12, $bi, 0", IIC_BrB, []>; def BCLRn : XLForm_2_br2<19, 16, 4, 0, (outs), (ins crbitrc:$bi), "bclr 4, $bi, 0", IIC_BrB, []>; - } + } } let isReturn = 1, Defs = [CTR], Uses = [CTR, LR, RM] in { @@ -1936,7 +1936,7 @@ def DCBZL : DCB_Form<1014, 1, (outs), (ins memrr:$dst), "dcbzl $dst", IIC_LdStDCBF, [(int_ppc_dcbzl xoaddr:$dst)]>, PPC970_DGroup_Single; -def DCBF : DCB_Form_hint<86, (outs), (ins u3imm:$TH, memrr:$dst), +def DCBF : DCB_Form_hint<86, (outs), (ins u3imm:$TH, memrr:$dst), "dcbf $dst, $TH", IIC_LdStDCBF, []>, PPC970_DGroup_Single; @@ -2471,7 +2471,7 @@ let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in def STMW : DForm_1<47, (outs), (ins gprc:$rS, memri:$dst), "stmw $rS, $dst", IIC_LdStLMW, []>; -def SYNC : XForm_24_sync<31, 598, (outs), (ins u2imm:$L), +def SYNC : XForm_24_sync<31, 598, (outs), (ins u2imm:$L), "sync $L", IIC_LdStSync, []>; let isCodeGenOnly = 1 in { @@ -2666,26 +2666,26 @@ let isCompare = 1, hasSideEffects = 0 in { } } let PPC970_Unit = 3, Predicates = [HasFPU] in { // FPU Operations. -let isCompare = 1, mayRaiseFPException = 1, hasSideEffects = 0 in { +let isCompare = 1, mayRaiseFPException = 1, hasSideEffects = 0 in { def FCMPUS : XForm_17<63, 0, (outs crrc:$crD), (ins f4rc:$fA, f4rc:$fB), "fcmpu $crD, $fA, $fB", IIC_FPCompare>; - def FCMPOS : XForm_17<63, 32, (outs crrc:$crD), (ins f4rc:$fA, f4rc:$fB), - "fcmpo $crD, $fA, $fB", IIC_FPCompare>; - let Interpretation64Bit = 1, isCodeGenOnly = 1 in { - def FCMPUD : XForm_17<63, 0, (outs crrc:$crD), (ins f8rc:$fA, f8rc:$fB), - "fcmpu $crD, $fA, $fB", IIC_FPCompare>; - def FCMPOD : XForm_17<63, 32, (outs crrc:$crD), (ins f8rc:$fA, f8rc:$fB), - "fcmpo $crD, $fA, $fB", IIC_FPCompare>; - } + def FCMPOS : XForm_17<63, 32, (outs crrc:$crD), (ins f4rc:$fA, f4rc:$fB), + "fcmpo $crD, $fA, $fB", IIC_FPCompare>; + let Interpretation64Bit = 1, isCodeGenOnly = 1 in { + def FCMPUD : XForm_17<63, 0, (outs crrc:$crD), (ins f8rc:$fA, f8rc:$fB), + "fcmpu $crD, $fA, $fB", IIC_FPCompare>; + def FCMPOD : XForm_17<63, 32, (outs crrc:$crD), (ins f8rc:$fA, f8rc:$fB), + "fcmpo $crD, $fA, $fB", IIC_FPCompare>; + } } def FTDIV: XForm_17<63, 128, (outs crrc:$crD), (ins f8rc:$fA, f8rc:$fB), "ftdiv $crD, $fA, $fB", IIC_FPCompare>; def FTSQRT: XForm_17a<63, 160, (outs crrc:$crD), (ins f8rc:$fB), - "ftsqrt $crD, $fB", IIC_FPCompare, - [(set i32:$crD, (PPCftsqrt f64:$fB))]>; + "ftsqrt $crD, $fB", IIC_FPCompare, + [(set i32:$crD, (PPCftsqrt f64:$fB))]>; -let mayRaiseFPException = 1, hasSideEffects = 0 in { +let mayRaiseFPException = 1, hasSideEffects = 0 in { let Interpretation64Bit = 1, isCodeGenOnly = 1 in defm FRIND : XForm_26r<63, 392, (outs f8rc:$frD), (ins f8rc:$frB), "frin", "$frD, $frB", IIC_FPGeneral, @@ -2715,23 +2715,23 @@ let mayRaiseFPException = 1, hasSideEffects = 0 in { defm FRIMS : XForm_26r<63, 488, (outs f4rc:$frD), (ins f4rc:$frB), "frim", "$frD, $frB", IIC_FPGeneral, [(set f32:$frD, (any_ffloor f32:$frB))]>; -} - -let Uses = [RM], mayRaiseFPException = 1, hasSideEffects = 0 in { - defm FCTIW : XForm_26r<63, 14, (outs f8rc:$frD), (ins f8rc:$frB), - "fctiw", "$frD, $frB", IIC_FPGeneral, - []>; - defm FCTIWU : XForm_26r<63, 142, (outs f8rc:$frD), (ins f8rc:$frB), - "fctiwu", "$frD, $frB", IIC_FPGeneral, - []>; - defm FCTIWZ : XForm_26r<63, 15, (outs f8rc:$frD), (ins f8rc:$frB), - "fctiwz", "$frD, $frB", IIC_FPGeneral, - [(set f64:$frD, (PPCany_fctiwz f64:$frB))]>; - - defm FRSP : XForm_26r<63, 12, (outs f4rc:$frD), (ins f8rc:$frB), - "frsp", "$frD, $frB", IIC_FPGeneral, - [(set f32:$frD, (any_fpround f64:$frB))]>; - +} + +let Uses = [RM], mayRaiseFPException = 1, hasSideEffects = 0 in { + defm FCTIW : XForm_26r<63, 14, (outs f8rc:$frD), (ins f8rc:$frB), + "fctiw", "$frD, $frB", IIC_FPGeneral, + []>; + defm FCTIWU : XForm_26r<63, 142, (outs f8rc:$frD), (ins f8rc:$frB), + "fctiwu", "$frD, $frB", IIC_FPGeneral, + []>; + defm FCTIWZ : XForm_26r<63, 15, (outs f8rc:$frD), (ins f8rc:$frB), + "fctiwz", "$frD, $frB", IIC_FPGeneral, + [(set f64:$frD, (PPCany_fctiwz f64:$frB))]>; + + defm FRSP : XForm_26r<63, 12, (outs f4rc:$frD), (ins f8rc:$frB), + "frsp", "$frD, $frB", IIC_FPGeneral, + [(set f32:$frD, (any_fpround f64:$frB))]>; + defm FSQRT : XForm_26r<63, 22, (outs f8rc:$frD), (ins f8rc:$frB), "fsqrt", "$frD, $frB", IIC_FPSqrtD, [(set f64:$frD, (any_fsqrt f64:$frB))]>; @@ -2739,10 +2739,10 @@ let Uses = [RM], mayRaiseFPException = 1, hasSideEffects = 0 in { "fsqrts", "$frD, $frB", IIC_FPSqrtS, [(set f32:$frD, (any_fsqrt f32:$frB))]>; } -} +} + +def : Pat<(PPCfsqrt f64:$frA), (FSQRT $frA)>; -def : Pat<(PPCfsqrt f64:$frA), (FSQRT $frA)>; - /// Note that FMR is defined as pseudo-ops on the PPC970 because they are /// often coalesced away and we don't want the dispatch group builder to think /// that they will fill slots (which could cause the load of a LSU reject to @@ -2786,7 +2786,7 @@ defm FCPSGND : XForm_28r<63, 8, (outs f8rc:$frD), (ins f8rc:$frA, f8rc:$frB), [(set f64:$frD, (fcopysign f64:$frB, f64:$frA))]>; // Reciprocal estimates. -let mayRaiseFPException = 1 in { +let mayRaiseFPException = 1 in { defm FRE : XForm_26r<63, 24, (outs f8rc:$frD), (ins f8rc:$frB), "fre", "$frD, $frB", IIC_FPGeneral, [(set f64:$frD, (PPCfre f64:$frB))]>; @@ -2800,7 +2800,7 @@ defm FRSQRTES : XForm_26r<59, 26, (outs f4rc:$frD), (ins f4rc:$frB), "frsqrtes", "$frD, $frB", IIC_FPGeneral, [(set f32:$frD, (PPCfrsqrte f32:$frB))]>; } -} +} // XL-Form instructions. condition register logical ops. // @@ -3000,24 +3000,24 @@ def : InstAlias<"mtcr $rA", (MTCRF 255, gprc:$rA)>; let Predicates = [HasFPU] in { // Custom inserter instruction to perform FADD in round-to-zero mode. -let Uses = [RM], mayRaiseFPException = 1 in { +let Uses = [RM], mayRaiseFPException = 1 in { def FADDrtz: PPCCustomInserterPseudo<(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB), "", - [(set f64:$FRT, (PPCany_faddrtz f64:$FRA, f64:$FRB))]>; + [(set f64:$FRT, (PPCany_faddrtz f64:$FRA, f64:$FRB))]>; } // The above pseudo gets expanded to make use of the following instructions // to manipulate FPSCR. Note that FPSCR is not modeled at the DAG level. - -// When FM is 30/31, we are setting the 62/63 bit of FPSCR, the implicit-def -// RM should be set. -def MTFSB0 : XForm_43<63, 70, (outs), (ins u5imm:$FM), - "mtfsb0 $FM", IIC_IntMTFSB0, []>, - PPC970_DGroup_Single, PPC970_Unit_FPU; -def MTFSB1 : XForm_43<63, 38, (outs), (ins u5imm:$FM), - "mtfsb1 $FM", IIC_IntMTFSB0, []>, - PPC970_DGroup_Single, PPC970_Unit_FPU; - -let Defs = [RM] in { + +// When FM is 30/31, we are setting the 62/63 bit of FPSCR, the implicit-def +// RM should be set. +def MTFSB0 : XForm_43<63, 70, (outs), (ins u5imm:$FM), + "mtfsb0 $FM", IIC_IntMTFSB0, []>, + PPC970_DGroup_Single, PPC970_Unit_FPU; +def MTFSB1 : XForm_43<63, 38, (outs), (ins u5imm:$FM), + "mtfsb1 $FM", IIC_IntMTFSB0, []>, + PPC970_DGroup_Single, PPC970_Unit_FPU; + +let Defs = [RM] in { let isCodeGenOnly = 1 in def MTFSFb : XFLForm<63, 711, (outs), (ins i32imm:$FM, f8rc:$rT), "mtfsf $FM, $rT", IIC_IntMTFSB0, []>, @@ -3156,7 +3156,7 @@ def : InstAlias<"subc. $rA, $rB, $rC", (SUBFC_rec gprc:$rA, gprc:$rC, gprc:$rB)> // this type. // let PPC970_Unit = 3, hasSideEffects = 0, Predicates = [HasFPU] in { // FPU Operations. -let mayRaiseFPException = 1, Uses = [RM] in { +let mayRaiseFPException = 1, Uses = [RM] in { let isCommutable = 1 in { defm FMADD : AForm_1r<63, 29, (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB), @@ -3342,13 +3342,13 @@ def : Pat<(PPCcall (i32 texternalsym:$dst)), // Calls for AIX only def : Pat<(PPCcall (i32 mcsym:$dst)), (BL mcsym:$dst)>; - + def : Pat<(PPCcall_nop (i32 mcsym:$dst)), (BL_NOP mcsym:$dst)>; -def : Pat<(PPCcall_nop (i32 texternalsym:$dst)), - (BL_NOP texternalsym:$dst)>; - +def : Pat<(PPCcall_nop (i32 texternalsym:$dst)), + (BL_NOP texternalsym:$dst)>; + def : Pat<(PPCtc_return (i32 tglobaladdr:$dst), imm:$imm), (TCRETURNdi tglobaladdr:$dst, imm:$imm)>; @@ -3358,7 +3358,7 @@ def : Pat<(PPCtc_return (i32 texternalsym:$dst), imm:$imm), def : Pat<(PPCtc_return CTRRC:$dst, imm:$imm), (TCRETURNri CTRRC:$dst, imm:$imm)>; -def : Pat<(int_ppc_readflm), (MFFS)>; +def : Pat<(int_ppc_readflm), (MFFS)>; // Hi and Lo for Darwin Global Addresses. def : Pat<(PPChi tglobaladdr:$in, 0), (LIS tglobaladdr:$in)>; @@ -3512,7 +3512,7 @@ def : Pat<(f64 (extloadf32 iaddr:$src)), def : Pat<(f64 (extloadf32 xaddr:$src)), (COPY_TO_REGCLASS (LFSX xaddr:$src), F8RC)>; -def : Pat<(f64 (any_fpextend f32:$src)), +def : Pat<(f64 (any_fpextend f32:$src)), (COPY_TO_REGCLASS $src, F8RC)>; } @@ -3935,7 +3935,7 @@ def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETGT)), def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETEQ)), (EXTRACT_SUBREG (CMPD $s1, $s2), sub_eq)>; -let Predicates = [IsNotISA3_1] in { +let Predicates = [IsNotISA3_1] in { // Instantiations of CRNotPat for i32. defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETUGE)), (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_lt)>; @@ -3993,62 +3993,62 @@ defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETLE)), (EXTRACT_SUBREG (CMPD $s1, $s2), sub_gt)>; defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETNE)), (EXTRACT_SUBREG (CMPD $s1, $s2), sub_eq)>; -} - -multiclass FSetCCPat<SDNode SetCC, ValueType Ty, PatLeaf FCmp> { - defm : CRNotPat<(i1 (SetCC Ty:$s1, Ty:$s2, SETUGE)), - (EXTRACT_SUBREG (FCmp $s1, $s2), sub_lt)>; - defm : CRNotPat<(i1 (SetCC Ty:$s1, Ty:$s2, SETGE)), - (EXTRACT_SUBREG (FCmp $s1, $s2), sub_lt)>; - defm : CRNotPat<(i1 (SetCC Ty:$s1, Ty:$s2, SETULE)), - (EXTRACT_SUBREG (FCmp $s1, $s2), sub_gt)>; - defm : CRNotPat<(i1 (SetCC Ty:$s1, Ty:$s2, SETLE)), - (EXTRACT_SUBREG (FCmp $s1, $s2), sub_gt)>; - defm : CRNotPat<(i1 (SetCC Ty:$s1, Ty:$s2, SETUNE)), - (EXTRACT_SUBREG (FCmp $s1, $s2), sub_eq)>; - defm : CRNotPat<(i1 (SetCC Ty:$s1, Ty:$s2, SETNE)), - (EXTRACT_SUBREG (FCmp $s1, $s2), sub_eq)>; - defm : CRNotPat<(i1 (SetCC Ty:$s1, Ty:$s2, SETO)), - (EXTRACT_SUBREG (FCmp $s1, $s2), sub_un)>; - - def : Pat<(i1 (SetCC Ty:$s1, Ty:$s2, SETOLT)), - (EXTRACT_SUBREG (FCmp $s1, $s2), sub_lt)>; - def : Pat<(i1 (SetCC Ty:$s1, Ty:$s2, SETLT)), - (EXTRACT_SUBREG (FCmp $s1, $s2), sub_lt)>; - def : Pat<(i1 (SetCC Ty:$s1, Ty:$s2, SETOGT)), - (EXTRACT_SUBREG (FCmp $s1, $s2), sub_gt)>; - def : Pat<(i1 (SetCC Ty:$s1, Ty:$s2, SETGT)), - (EXTRACT_SUBREG (FCmp $s1, $s2), sub_gt)>; - def : Pat<(i1 (SetCC Ty:$s1, Ty:$s2, SETOEQ)), - (EXTRACT_SUBREG (FCmp $s1, $s2), sub_eq)>; - def : Pat<(i1 (SetCC Ty:$s1, Ty:$s2, SETEQ)), - (EXTRACT_SUBREG (FCmp $s1, $s2), sub_eq)>; - def : Pat<(i1 (SetCC Ty:$s1, Ty:$s2, SETUO)), - (EXTRACT_SUBREG (FCmp $s1, $s2), sub_un)>; -} - -let Predicates = [HasFPU] in { -// FCMPU: If either of the operands is a Signaling NaN, then VXSNAN is set. +} + +multiclass FSetCCPat<SDNode SetCC, ValueType Ty, PatLeaf FCmp> { + defm : CRNotPat<(i1 (SetCC Ty:$s1, Ty:$s2, SETUGE)), + (EXTRACT_SUBREG (FCmp $s1, $s2), sub_lt)>; + defm : CRNotPat<(i1 (SetCC Ty:$s1, Ty:$s2, SETGE)), + (EXTRACT_SUBREG (FCmp $s1, $s2), sub_lt)>; + defm : CRNotPat<(i1 (SetCC Ty:$s1, Ty:$s2, SETULE)), + (EXTRACT_SUBREG (FCmp $s1, $s2), sub_gt)>; + defm : CRNotPat<(i1 (SetCC Ty:$s1, Ty:$s2, SETLE)), + (EXTRACT_SUBREG (FCmp $s1, $s2), sub_gt)>; + defm : CRNotPat<(i1 (SetCC Ty:$s1, Ty:$s2, SETUNE)), + (EXTRACT_SUBREG (FCmp $s1, $s2), sub_eq)>; + defm : CRNotPat<(i1 (SetCC Ty:$s1, Ty:$s2, SETNE)), + (EXTRACT_SUBREG (FCmp $s1, $s2), sub_eq)>; + defm : CRNotPat<(i1 (SetCC Ty:$s1, Ty:$s2, SETO)), + (EXTRACT_SUBREG (FCmp $s1, $s2), sub_un)>; + + def : Pat<(i1 (SetCC Ty:$s1, Ty:$s2, SETOLT)), + (EXTRACT_SUBREG (FCmp $s1, $s2), sub_lt)>; + def : Pat<(i1 (SetCC Ty:$s1, Ty:$s2, SETLT)), + (EXTRACT_SUBREG (FCmp $s1, $s2), sub_lt)>; + def : Pat<(i1 (SetCC Ty:$s1, Ty:$s2, SETOGT)), + (EXTRACT_SUBREG (FCmp $s1, $s2), sub_gt)>; + def : Pat<(i1 (SetCC Ty:$s1, Ty:$s2, SETGT)), + (EXTRACT_SUBREG (FCmp $s1, $s2), sub_gt)>; + def : Pat<(i1 (SetCC Ty:$s1, Ty:$s2, SETOEQ)), + (EXTRACT_SUBREG (FCmp $s1, $s2), sub_eq)>; + def : Pat<(i1 (SetCC Ty:$s1, Ty:$s2, SETEQ)), + (EXTRACT_SUBREG (FCmp $s1, $s2), sub_eq)>; + def : Pat<(i1 (SetCC Ty:$s1, Ty:$s2, SETUO)), + (EXTRACT_SUBREG (FCmp $s1, $s2), sub_un)>; +} + +let Predicates = [HasFPU] in { +// FCMPU: If either of the operands is a Signaling NaN, then VXSNAN is set. // SETCC for f32. -defm : FSetCCPat<any_fsetcc, f32, FCMPUS>; +defm : FSetCCPat<any_fsetcc, f32, FCMPUS>; // SETCC for f64. -defm : FSetCCPat<any_fsetcc, f64, FCMPUD>; +defm : FSetCCPat<any_fsetcc, f64, FCMPUD>; // SETCC for f128. -defm : FSetCCPat<any_fsetcc, f128, XSCMPUQP>; +defm : FSetCCPat<any_fsetcc, f128, XSCMPUQP>; + +// FCMPO: If either of the operands is a Signaling NaN, then VXSNAN is set and, +// if neither operand is a Signaling NaN but at least one operand is a Quiet NaN, +// then VXVC is set. +// SETCCS for f32. +defm : FSetCCPat<strict_fsetccs, f32, FCMPOS>; -// FCMPO: If either of the operands is a Signaling NaN, then VXSNAN is set and, -// if neither operand is a Signaling NaN but at least one operand is a Quiet NaN, -// then VXVC is set. -// SETCCS for f32. -defm : FSetCCPat<strict_fsetccs, f32, FCMPOS>; - -// SETCCS for f64. -defm : FSetCCPat<strict_fsetccs, f64, FCMPOD>; - -// SETCCS for f128. -defm : FSetCCPat<strict_fsetccs, f128, XSCMPOQP>; +// SETCCS for f64. +defm : FSetCCPat<strict_fsetccs, f64, FCMPOD>; + +// SETCCS for f128. +defm : FSetCCPat<strict_fsetccs, f128, XSCMPOQP>; } // This must be in this file because it relies on patterns defined in this file @@ -4317,7 +4317,7 @@ def ISYNC : XLForm_2_ext<19, 150, 0, 0, 0, (outs), (ins), def ICBI : XForm_1a<31, 982, (outs), (ins memrr:$src), "icbi $src", IIC_LdStICBI, []>; -def WAIT : XForm_24_sync<31, 30, (outs), (ins u2imm:$L), +def WAIT : XForm_24_sync<31, 30, (outs), (ins u2imm:$L), "wait $L", IIC_LdStLoad, []>; def MBAR : XForm_mbar<31, 854, (outs), (ins u5imm:$MO), @@ -4335,7 +4335,7 @@ def MTSRIN: XForm_srin<31, 242, (outs), (ins gprc:$RS, gprc:$RB), def MFSRIN: XForm_srin<31, 659, (outs gprc:$RS), (ins gprc:$RB), "mfsrin $RS, $RB", IIC_SprMFSR>; -def MTMSR: XForm_mtmsr<31, 146, (outs), (ins gprc:$RS, u1imm:$L), +def MTMSR: XForm_mtmsr<31, 146, (outs), (ins gprc:$RS, u1imm:$L), "mtmsr $RS, $L", IIC_SprMTMSR>; def WRTEE: XForm_mtmsr<31, 131, (outs), (ins gprc:$RS), @@ -4364,17 +4364,17 @@ def : InstAlias<"iccci", (ICCCI R0, R0)>, Requires<[IsPPC4xx]>; def MFMSR : XForm_rs<31, 83, (outs gprc:$RT), (ins), "mfmsr $RT", IIC_SprMFMSR, []>; -def MTMSRD : XForm_mtmsr<31, 178, (outs), (ins gprc:$RS, u1imm:$L), +def MTMSRD : XForm_mtmsr<31, 178, (outs), (ins gprc:$RS, u1imm:$L), "mtmsrd $RS, $L", IIC_SprMTMSRD>; def MCRFS : XLForm_3<63, 64, (outs crrc:$BF), (ins crrc:$BFA), "mcrfs $BF, $BFA", IIC_BrMCR>; -// If W is 0 and BF is 7, the 60:63 bits will be set, we should set the -// implicit-def RM. +// If W is 0 and BF is 7, the 60:63 bits will be set, we should set the +// implicit-def RM. def MTFSFI : XLForm_4<63, 134, (outs crrc:$BF), (ins i32imm:$U, i32imm:$W), "mtfsfi $BF, $U, $W", IIC_IntMFFS>; -let Defs = [CR1] in +let Defs = [CR1] in def MTFSFI_rec : XLForm_4<63, 134, (outs crrc:$BF), (ins i32imm:$U, i32imm:$W), "mtfsfi. $BF, $U, $W", IIC_IntMFFS>, isRecordForm; @@ -4382,15 +4382,15 @@ def : InstAlias<"mtfsfi $BF, $U", (MTFSFI crrc:$BF, i32imm:$U, 0)>; def : InstAlias<"mtfsfi. $BF, $U", (MTFSFI_rec crrc:$BF, i32imm:$U, 0)>; let Predicates = [HasFPU] in { -let Defs = [RM] in { +let Defs = [RM] in { def MTFSF : XFLForm_1<63, 711, (outs), - (ins i32imm:$FLM, f8rc:$FRB, u1imm:$L, i32imm:$W), + (ins i32imm:$FLM, f8rc:$FRB, u1imm:$L, i32imm:$W), "mtfsf $FLM, $FRB, $L, $W", IIC_IntMFFS, []>; -let Defs = [CR1] in +let Defs = [CR1] in def MTFSF_rec : XFLForm_1<63, 711, (outs), - (ins i32imm:$FLM, f8rc:$FRB, u1imm:$L, i32imm:$W), + (ins i32imm:$FLM, f8rc:$FRB, u1imm:$L, i32imm:$W), "mtfsf. $FLM, $FRB, $L, $W", IIC_IntMFFS, []>, isRecordForm; -} +} def : InstAlias<"mtfsf $FLM, $FRB", (MTFSF i32imm:$FLM, f8rc:$FRB, 0, 0)>; def : InstAlias<"mtfsf. $FLM, $FRB", (MTFSF_rec i32imm:$FLM, f8rc:$FRB, 0, 0)>; @@ -4617,16 +4617,16 @@ def : Pat<(int_ppc_dcbfl xoaddr:$dst), def : Pat<(int_ppc_dcbflp xoaddr:$dst), (DCBF 3, xoaddr:$dst)>; -let Predicates = [IsISA3_1] in { - def DCBFPS : PPCAsmPseudo<"dcbfps $dst", (ins memrr:$dst)>; - def DCBSTPS : PPCAsmPseudo<"dcbstps $dst", (ins memrr:$dst)>; - - def : Pat<(int_ppc_dcbfps xoaddr:$dst), - (DCBF 4, xoaddr:$dst)>; - def : Pat<(int_ppc_dcbstps xoaddr:$dst), - (DCBF 6, xoaddr:$dst)>; -} - +let Predicates = [IsISA3_1] in { + def DCBFPS : PPCAsmPseudo<"dcbfps $dst", (ins memrr:$dst)>; + def DCBSTPS : PPCAsmPseudo<"dcbstps $dst", (ins memrr:$dst)>; + + def : Pat<(int_ppc_dcbfps xoaddr:$dst), + (DCBF 4, xoaddr:$dst)>; + def : Pat<(int_ppc_dcbstps xoaddr:$dst), + (DCBF 6, xoaddr:$dst)>; +} + def : InstAlias<"crset $bx", (CREQV crbitrc:$bx, crbitrc:$bx, crbitrc:$bx)>; def : InstAlias<"crclr $bx", (CRXOR crbitrc:$bx, crbitrc:$bx, crbitrc:$bx)>; def : InstAlias<"crmove $bx, $by", (CROR crbitrc:$bx, crbitrc:$by, crbitrc:$by)>; @@ -4653,11 +4653,11 @@ def : InstAlias<"mtmsr $RS", (MTMSR gprc:$RS, 0)>; def : InstAlias<"mtxer $Rx", (MTSPR 1, gprc:$Rx)>; def : InstAlias<"mfxer $Rx", (MFSPR gprc:$Rx, 1)>; -//Disable this alias on AIX for now because as does not support them. -let Predicates = [ModernAs] in { +//Disable this alias on AIX for now because as does not support them. +let Predicates = [ModernAs] in { def : InstAlias<"mtudscr $Rx", (MTSPR 3, gprc:$Rx)>; def : InstAlias<"mfudscr $Rx", (MFSPR gprc:$Rx, 3)>; -} +} def : InstAlias<"mfrtcu $Rx", (MFSPR gprc:$Rx, 4)>; def : InstAlias<"mfrtcl $Rx", (MFSPR gprc:$Rx, 5)>; diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/PPCInstrPrefix.td b/contrib/libs/llvm12/lib/Target/PowerPC/PPCInstrPrefix.td index 0460c7bedc..b9eb3b3b7d 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/PPCInstrPrefix.td +++ b/contrib/libs/llvm12/lib/Target/PowerPC/PPCInstrPrefix.td @@ -1,8 +1,8 @@ -// Mask immediates for MMA instructions (2, 4 and 8 bits). -def Msk2Imm : ImmLeaf<i32, [{ return isUInt<2>(Imm); }]>; -def Msk4Imm : ImmLeaf<i32, [{ return isUInt<4>(Imm); }]>; -def Msk8Imm : ImmLeaf<i32, [{ return isUInt<8>(Imm); }]>; - +// Mask immediates for MMA instructions (2, 4 and 8 bits). +def Msk2Imm : ImmLeaf<i32, [{ return isUInt<2>(Imm); }]>; +def Msk4Imm : ImmLeaf<i32, [{ return isUInt<4>(Imm); }]>; +def Msk8Imm : ImmLeaf<i32, [{ return isUInt<8>(Imm); }]>; + //===----------------------------------------------------------------------===// // PowerPC ISA 3.1 specific type constraints. // @@ -10,35 +10,35 @@ def Msk8Imm : ImmLeaf<i32, [{ return isUInt<8>(Imm); }]>; def SDT_PPCSplat32 : SDTypeProfile<1, 3, [ SDTCisVT<0, v2i64>, SDTCisVec<1>, SDTCisInt<2>, SDTCisInt<3> ]>; -def SDT_PPCAccBuild : SDTypeProfile<1, 4, [ - SDTCisVT<0, v512i1>, SDTCisVT<1, v4i32>, SDTCisVT<2, v4i32>, - SDTCisVT<3, v4i32>, SDTCisVT<4, v4i32> -]>; -def SDT_PPCPairBuild : SDTypeProfile<1, 2, [ - SDTCisVT<0, v256i1>, SDTCisVT<1, v4i32>, SDTCisVT<2, v4i32> -]>; -def SDT_PPCAccExtractVsx : SDTypeProfile<1, 2, [ - SDTCisVT<0, v4i32>, SDTCisVT<1, v512i1>, SDTCisInt<2> -]>; -def SDT_PPCPairExtractVsx : SDTypeProfile<1, 2, [ - SDTCisVT<0, v4i32>, SDTCisVT<1, v256i1>, SDTCisInt<2> -]>; -def SDT_PPCxxmfacc : SDTypeProfile<1, 1, [ - SDTCisVT<0, v512i1>, SDTCisVT<1, v512i1> -]>; +def SDT_PPCAccBuild : SDTypeProfile<1, 4, [ + SDTCisVT<0, v512i1>, SDTCisVT<1, v4i32>, SDTCisVT<2, v4i32>, + SDTCisVT<3, v4i32>, SDTCisVT<4, v4i32> +]>; +def SDT_PPCPairBuild : SDTypeProfile<1, 2, [ + SDTCisVT<0, v256i1>, SDTCisVT<1, v4i32>, SDTCisVT<2, v4i32> +]>; +def SDT_PPCAccExtractVsx : SDTypeProfile<1, 2, [ + SDTCisVT<0, v4i32>, SDTCisVT<1, v512i1>, SDTCisInt<2> +]>; +def SDT_PPCPairExtractVsx : SDTypeProfile<1, 2, [ + SDTCisVT<0, v4i32>, SDTCisVT<1, v256i1>, SDTCisInt<2> +]>; +def SDT_PPCxxmfacc : SDTypeProfile<1, 1, [ + SDTCisVT<0, v512i1>, SDTCisVT<1, v512i1> +]>; //===----------------------------------------------------------------------===// // ISA 3.1 specific PPCISD nodes. // def PPCxxsplti32dx : SDNode<"PPCISD::XXSPLTI32DX", SDT_PPCSplat32, []>; -def PPCAccBuild : SDNode<"PPCISD::ACC_BUILD", SDT_PPCAccBuild, []>; -def PPCPairBuild : SDNode<"PPCISD::PAIR_BUILD", SDT_PPCPairBuild, []>; -def PPCAccExtractVsx : SDNode<"PPCISD::EXTRACT_VSX_REG", SDT_PPCAccExtractVsx, - []>; -def PPCPairExtractVsx : SDNode<"PPCISD::EXTRACT_VSX_REG", SDT_PPCPairExtractVsx, - []>; -def PPCxxmfacc : SDNode<"PPCISD::XXMFACC", SDT_PPCxxmfacc, []>; +def PPCAccBuild : SDNode<"PPCISD::ACC_BUILD", SDT_PPCAccBuild, []>; +def PPCPairBuild : SDNode<"PPCISD::PAIR_BUILD", SDT_PPCPairBuild, []>; +def PPCAccExtractVsx : SDNode<"PPCISD::EXTRACT_VSX_REG", SDT_PPCAccExtractVsx, + []>; +def PPCPairExtractVsx : SDNode<"PPCISD::EXTRACT_VSX_REG", SDT_PPCPairExtractVsx, + []>; +def PPCxxmfacc : SDNode<"PPCISD::XXMFACC", SDT_PPCxxmfacc, []>; //===----------------------------------------------------------------------===// @@ -46,15 +46,15 @@ def PPCxxmfacc : SDNode<"PPCISD::XXMFACC", SDT_PPCxxmfacc, []>; // address computations). class isPCRel { bit PCRel = 1; } -// PowerPC specific type constraints. -def SDT_PPCLXVRZX : SDTypeProfile<1, 2, [ - SDTCisVT<0, v1i128>, SDTCisPtrTy<1>, SDTCisPtrTy<2> -]>; - -// PPC Specific DAG Nodes. -def PPClxvrzx : SDNode<"PPCISD::LXVRZX", SDT_PPCLXVRZX, - [SDNPHasChain, SDNPMayLoad]>; - +// PowerPC specific type constraints. +def SDT_PPCLXVRZX : SDTypeProfile<1, 2, [ + SDTCisVT<0, v1i128>, SDTCisPtrTy<1>, SDTCisPtrTy<2> +]>; + +// PPC Specific DAG Nodes. +def PPClxvrzx : SDNode<"PPCISD::LXVRZX", SDT_PPCLXVRZX, + [SDNPHasChain, SDNPMayLoad]>; + // Top-level class for prefixed instructions. class PI<bits<6> pref, bits<6> opcode, dag OOL, dag IOL, string asmstr, InstrItinClass itin> : Instruction { @@ -96,39 +96,39 @@ class PI<bits<6> pref, bits<6> opcode, dag OOL, dag IOL, string asmstr, string BaseName = ""; } -// VX-Form: [ PO VT R VB RC XO ] -class VXForm_VTB5_RC<bits<10> xo, bits<5> R, dag OOL, dag IOL, string asmstr, - InstrItinClass itin, list<dag> pattern> - : I<4, OOL, IOL, asmstr, itin> { - bits<5> VT; - bits<5> VB; - bit RC = 0; - - let Pattern = pattern; - - let Inst{6-10} = VT; - let Inst{11-15} = R; - let Inst{16-20} = VB; - let Inst{21} = RC; - let Inst{22-31} = xo; -} - -// Multiclass definition to account for record and non-record form -// instructions of VXRForm. -multiclass VXForm_VTB5_RCr<bits<10> xo, bits<5> R, dag OOL, dag IOL, - string asmbase, string asmstr, - InstrItinClass itin, list<dag> pattern> { - let BaseName = asmbase in { - def NAME : VXForm_VTB5_RC<xo, R, OOL, IOL, - !strconcat(asmbase, !strconcat(" ", asmstr)), - itin, pattern>, RecFormRel; - let Defs = [CR6] in - def _rec : VXForm_VTB5_RC<xo, R, OOL, IOL, - !strconcat(asmbase, !strconcat(". ", asmstr)), - itin, []>, isRecordForm, RecFormRel; - } -} - +// VX-Form: [ PO VT R VB RC XO ] +class VXForm_VTB5_RC<bits<10> xo, bits<5> R, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<5> VT; + bits<5> VB; + bit RC = 0; + + let Pattern = pattern; + + let Inst{6-10} = VT; + let Inst{11-15} = R; + let Inst{16-20} = VB; + let Inst{21} = RC; + let Inst{22-31} = xo; +} + +// Multiclass definition to account for record and non-record form +// instructions of VXRForm. +multiclass VXForm_VTB5_RCr<bits<10> xo, bits<5> R, dag OOL, dag IOL, + string asmbase, string asmstr, + InstrItinClass itin, list<dag> pattern> { + let BaseName = asmbase in { + def NAME : VXForm_VTB5_RC<xo, R, OOL, IOL, + !strconcat(asmbase, !strconcat(" ", asmstr)), + itin, pattern>, RecFormRel; + let Defs = [CR6] in + def _rec : VXForm_VTB5_RC<xo, R, OOL, IOL, + !strconcat(asmbase, !strconcat(". ", asmstr)), + itin, []>, isRecordForm, RecFormRel; + } +} + class MLS_DForm_R_SI34_RTA5_MEM<bits<6> opcode, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern> : PI<1, opcode, OOL, IOL, asmstr, itin> { @@ -315,34 +315,34 @@ class VXForm_RD5_N3_VB5<bits<11> xo, dag OOL, dag IOL, string asmstr, // VX-Form: [PO VRT RA VRB XO]. // Destructive (insert) forms are suffixed with _ins. class VXForm_VTB5_RA5_ins<bits<11> xo, string opc, list<dag> pattern> - : VXForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vDi, gprc:$rA, vrrc:$vB), + : VXForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vDi, gprc:$rA, vrrc:$vB), !strconcat(opc, " $vD, $rA, $vB"), IIC_VecGeneral, pattern>, RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">; // VX-Form: [PO VRT RA RB XO]. // Destructive (insert) forms are suffixed with _ins. class VXForm_VRT5_RAB5_ins<bits<11> xo, string opc, list<dag> pattern> - : VXForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vDi, gprc:$rA, gprc:$rB), + : VXForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vDi, gprc:$rA, gprc:$rB), !strconcat(opc, " $vD, $rA, $rB"), IIC_VecGeneral, pattern>, RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">; -// VX-Form: [ PO BF // VRA VRB XO ] -class VXForm_BF3_VAB5<bits<11> xo, dag OOL, dag IOL, string asmstr, - InstrItinClass itin, list<dag> pattern> - : I<4, OOL, IOL, asmstr, itin> { - bits<3> BF; - bits<5> VA; - bits<5> VB; - - let Pattern = pattern; - - let Inst{6-8} = BF; - let Inst{9-10} = 0; - let Inst{11-15} = VA; - let Inst{16-20} = VB; - let Inst{21-31} = xo; -} - +// VX-Form: [ PO BF // VRA VRB XO ] +class VXForm_BF3_VAB5<bits<11> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<3> BF; + bits<5> VA; + bits<5> VB; + + let Pattern = pattern; + + let Inst{6-8} = BF; + let Inst{9-10} = 0; + let Inst{11-15} = VA; + let Inst{16-20} = VB; + let Inst{21-31} = xo; +} + // VN-Form: [PO VRT VRA VRB PS SD XO] // SD is "Shift Direction" class VNForm_VTAB5_SD3<bits<6> xo, bits<2> ps, dag OOL, dag IOL, string asmstr, @@ -363,22 +363,22 @@ class VNForm_VTAB5_SD3<bits<6> xo, bits<2> ps, dag OOL, dag IOL, string asmstr, let Inst{26-31} = xo; } -class VXForm_RD5_MP_VB5<bits<11> xo, bits<4> eo, dag OOL, dag IOL, - string asmstr, InstrItinClass itin, list<dag> pattern> - : I<4, OOL, IOL, asmstr, itin> { - bits<5> RD; - bits<5> VB; - bit MP; - - let Pattern = pattern; - - let Inst{6-10} = RD; - let Inst{11-14} = eo; - let Inst{15} = MP; - let Inst{16-20} = VB; - let Inst{21-31} = xo; -} - +class VXForm_RD5_MP_VB5<bits<11> xo, bits<4> eo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list<dag> pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<5> RD; + bits<5> VB; + bit MP; + + let Pattern = pattern; + + let Inst{6-10} = RD; + let Inst{11-14} = eo; + let Inst{15} = MP; + let Inst{16-20} = VB; + let Inst{21-31} = xo; +} + // 8RR:D-Form: [ 1 1 0 // // imm0 // PO T XO TX imm1 ]. class 8RR_DForm_IMM32_XT6<bits<6> opcode, bits<4> xo, dag OOL, dag IOL, @@ -509,13 +509,13 @@ class XX2_BF3_XO5_XB6_XO9<bits<6> opcode, bits<5> xo2, bits<9> xo, dag OOL, let Inst{31} = 0; } -// X-Form: [ PO RT BI /// XO / ] -class XForm_XT5_BI5<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, - string asmstr, InstrItinClass itin, list<dag> pattern> - : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> { - let B = 0; -} - +// X-Form: [ PO RT BI /// XO / ] +class XForm_XT5_BI5<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list<dag> pattern> + : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> { + let B = 0; +} + multiclass MLS_DForm_R_SI34_RTA5_MEM_p<bits<6> opcode, dag OOL, dag IOL, dag PCRel_IOL, string asmstr, InstrItinClass itin> { @@ -545,307 +545,307 @@ multiclass 8LS_DForm_R_SI34_XT6_RA5_p<bits<5> opcode, dag OOL, dag IOL, isPCRel; } -def PPCRegVSRpRCAsmOperand : AsmOperandClass { - let Name = "RegVSRpRC"; let PredicateMethod = "isVSRpEvenRegNumber"; -} - -def vsrprc : RegisterOperand<VSRpRC> { - let ParserMatchClass = PPCRegVSRpRCAsmOperand; -} - -def PPCRegVSRpEvenRCAsmOperand : AsmOperandClass { - let Name = "RegVSRpEvenRC"; let PredicateMethod = "isVSRpEvenRegNumber"; -} - -def vsrpevenrc : RegisterOperand<VSRpRC> { - let ParserMatchClass = PPCRegVSRpEvenRCAsmOperand; - let EncoderMethod = "getVSRpEvenEncoding"; - let DecoderMethod = "decodeVSRpEvenOperands"; -} - -class DQForm_XTp5_RA17_MEM<bits<6> opcode, bits<4> xo, dag OOL, dag IOL, - string asmstr, InstrItinClass itin, list<dag> pattern> - : I<opcode, OOL, IOL, asmstr, itin> { - bits<5> XTp; - bits<17> DQ_RA; - let Pattern = pattern; - - let Inst{6-9} = XTp{3-0}; - let Inst{10} = XTp{4}; - let Inst{11-15} = DQ_RA{16-12}; // Register # - let Inst{16-27} = DQ_RA{11-0}; // Displacement. - let Inst{28-31} = xo; -} - -class XForm_XTp5_XAB5<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, - string asmstr, InstrItinClass itin, list<dag> pattern> - : I<opcode, OOL, IOL, asmstr, itin>, XFormMemOp { - bits<5> XTp; - bits<5> A; - bits<5> B; - - let Pattern = pattern; - let Inst{6-9} = XTp{3-0}; - let Inst{10} = XTp{4}; - let Inst{11-15} = A; - let Inst{16-20} = B; - let Inst{21-30} = xo; - let Inst{31} = 0; -} - -class 8LS_DForm_R_XTp5_SI34_MEM<bits<6> opcode, dag OOL, dag IOL, string asmstr, - InstrItinClass itin, list<dag> pattern> - : PI<1, opcode, OOL, IOL, asmstr, itin> { - bits<5> XTp; - bits<39> D_RA; - - let Pattern = pattern; - - // The prefix. - let Inst{6-10} = 0; - let Inst{11} = PCRel; - let Inst{12-13} = 0; - let Inst{14-31} = D_RA{33-16}; // Imm18 - - // The instruction. - let Inst{38-41} = XTp{3-0}; - let Inst{42} = XTp{4}; - let Inst{43-47} = D_RA{38-34}; // Register # - let Inst{48-63} = D_RA{15-0}; // D -} - -multiclass 8LS_DForm_R_XTp5_SI34_MEM_p<bits<6> pref, bits<6> opcode, dag OOL, - dag IOL, dag PCRel_IOL, - string asmstr, InstrItinClass itin> { - def NAME : 8LS_DForm_R_XTp5_SI34_MEM<opcode, OOL, IOL, - !strconcat(asmstr, ", 0"), itin, []>; - def pc : 8LS_DForm_R_XTp5_SI34_MEM<opcode, OOL, PCRel_IOL, - !strconcat(asmstr, ", 1"), itin, []>, - isPCRel; -} - -def PPCRegACCRCAsmOperand : AsmOperandClass { - let Name = "RegACCRC"; let PredicateMethod = "isACCRegNumber"; -} - -def acc : RegisterOperand<ACCRC> { - let ParserMatchClass = PPCRegACCRCAsmOperand; -} - -def uacc : RegisterOperand<UACCRC> { - let ParserMatchClass = PPCRegACCRCAsmOperand; -} - -// [PO AS XO2 XO] -class XForm_AT3<bits<6> opcode, bits<5> xo2, bits<10> xo, dag OOL, dag IOL, - string asmstr, InstrItinClass itin, list<dag> pattern> - : I<opcode, OOL, IOL, asmstr, itin> { - bits<3> AT; - - let Pattern = pattern; - - let Inst{6-8} = AT; - let Inst{9-10} = 0; - let Inst{11-15} = xo2; - let Inst{16-20} = 0; - let Inst{21-30} = xo; - let Inst{31} = 0; -} - -class XX3Form_AT3_XAB6<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, - string asmstr, InstrItinClass itin, - list<dag> pattern> - : I<opcode, OOL, IOL, asmstr, itin> { - bits<3> AT; - bits<6> XA; - bits<6> XB; - - let Pattern = pattern; - - let Inst{6-8} = AT; - let Inst{9-10} = 0; - let Inst{11-15} = XA{4-0}; - let Inst{16-20} = XB{4-0}; - let Inst{21-28} = xo; - let Inst{29} = XA{5}; - let Inst{30} = XB{5}; - let Inst{31} = 0; -} - -class MMIRR_XX3Form_XY4P2_XAB6<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, - string asmstr, InstrItinClass itin, - list<dag> pattern> - : PI<1, opcode, OOL, IOL, asmstr, itin> { - bits<3> AT; - bits<6> XA; - bits<6> XB; - bits<4> XMSK; - bits<4> YMSK; - bits<2> PMSK; - - let Pattern = pattern; - - // The prefix. - let Inst{6-7} = 3; - let Inst{8-11} = 9; - let Inst{12-15} = 0; - let Inst{16-17} = PMSK; - let Inst{18-23} = 0; - let Inst{24-27} = XMSK; - let Inst{28-31} = YMSK; - - // The instruction. - let Inst{38-40} = AT; - let Inst{41-42} = 0; - let Inst{43-47} = XA{4-0}; - let Inst{48-52} = XB{4-0}; - let Inst{53-60} = xo; - let Inst{61} = XA{5}; - let Inst{62} = XB{5}; - let Inst{63} = 0; -} - -class MMIRR_XX3Form_XY4_XAB6<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, - string asmstr, InstrItinClass itin, - list<dag> pattern> - : PI<1, opcode, OOL, IOL, asmstr, itin> { - bits<3> AT; - bits<6> XA; - bits<6> XB; - bits<4> XMSK; - bits<4> YMSK; - - let Pattern = pattern; - - // The prefix. - let Inst{6-7} = 3; - let Inst{8-11} = 9; - let Inst{12-23} = 0; - let Inst{24-27} = XMSK; - let Inst{28-31} = YMSK; - - // The instruction. - let Inst{38-40} = AT; - let Inst{41-42} = 0; - let Inst{43-47} = XA{4-0}; - let Inst{48-52} = XB{4-0}; - let Inst{53-60} = xo; - let Inst{61} = XA{5}; - let Inst{62} = XB{5}; - let Inst{63} = 0; -} - -class MMIRR_XX3Form_X4Y2_XAB6<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, - string asmstr, InstrItinClass itin, - list<dag> pattern> - : PI<1, opcode, OOL, IOL, asmstr, itin> { - bits<3> AT; - bits<6> XA; - bits<6> XB; - bits<4> XMSK; - bits<2> YMSK; - - let Pattern = pattern; - - // The prefix. - let Inst{6-7} = 3; - let Inst{8-11} = 9; - let Inst{12-23} = 0; - let Inst{24-27} = XMSK; - let Inst{28-29} = YMSK; - let Inst{30-31} = 0; - - // The instruction. - let Inst{38-40} = AT; - let Inst{41-42} = 0; - let Inst{43-47} = XA{4-0}; - let Inst{48-52} = XB{4-0}; - let Inst{53-60} = xo; - let Inst{61} = XA{5}; - let Inst{62} = XB{5}; - let Inst{63} = 0; -} - -class MMIRR_XX3Form_XY4P8_XAB6<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, - string asmstr, InstrItinClass itin, - list<dag> pattern> - : PI<1, opcode, OOL, IOL, asmstr, itin> { - bits<3> AT; - bits<6> XA; - bits<6> XB; - bits<4> XMSK; - bits<4> YMSK; - bits<8> PMSK; - - let Pattern = pattern; - - // The prefix. - let Inst{6-7} = 3; - let Inst{8-11} = 9; - let Inst{12-15} = 0; - let Inst{16-23} = PMSK; - let Inst{24-27} = XMSK; - let Inst{28-31} = YMSK; - - // The instruction. - let Inst{38-40} = AT; - let Inst{41-42} = 0; - let Inst{43-47} = XA{4-0}; - let Inst{48-52} = XB{4-0}; - let Inst{53-60} = xo; - let Inst{61} = XA{5}; - let Inst{62} = XB{5}; - let Inst{63} = 0; -} - -class MMIRR_XX3Form_XYP4_XAB6<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, - string asmstr, InstrItinClass itin, - list<dag> pattern> - : PI<1, opcode, OOL, IOL, asmstr, itin> { - bits<3> AT; - bits<6> XA; - bits<6> XB; - bits<4> XMSK; - bits<4> YMSK; - bits<4> PMSK; - - let Pattern = pattern; - - // The prefix. - let Inst{6-7} = 3; - let Inst{8-11} = 9; - let Inst{12-15} = 0; - let Inst{16-19} = PMSK; - let Inst{20-23} = 0; - let Inst{24-27} = XMSK; - let Inst{28-31} = YMSK; - - // The instruction. - let Inst{38-40} = AT; - let Inst{41-42} = 0; - let Inst{43-47} = XA{4-0}; - let Inst{48-52} = XB{4-0}; - let Inst{53-60} = xo; - let Inst{61} = XA{5}; - let Inst{62} = XB{5}; - let Inst{63} = 0; -} - +def PPCRegVSRpRCAsmOperand : AsmOperandClass { + let Name = "RegVSRpRC"; let PredicateMethod = "isVSRpEvenRegNumber"; +} + +def vsrprc : RegisterOperand<VSRpRC> { + let ParserMatchClass = PPCRegVSRpRCAsmOperand; +} + +def PPCRegVSRpEvenRCAsmOperand : AsmOperandClass { + let Name = "RegVSRpEvenRC"; let PredicateMethod = "isVSRpEvenRegNumber"; +} + +def vsrpevenrc : RegisterOperand<VSRpRC> { + let ParserMatchClass = PPCRegVSRpEvenRCAsmOperand; + let EncoderMethod = "getVSRpEvenEncoding"; + let DecoderMethod = "decodeVSRpEvenOperands"; +} + +class DQForm_XTp5_RA17_MEM<bits<6> opcode, bits<4> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> XTp; + bits<17> DQ_RA; + let Pattern = pattern; + + let Inst{6-9} = XTp{3-0}; + let Inst{10} = XTp{4}; + let Inst{11-15} = DQ_RA{16-12}; // Register # + let Inst{16-27} = DQ_RA{11-0}; // Displacement. + let Inst{28-31} = xo; +} + +class XForm_XTp5_XAB5<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin>, XFormMemOp { + bits<5> XTp; + bits<5> A; + bits<5> B; + + let Pattern = pattern; + let Inst{6-9} = XTp{3-0}; + let Inst{10} = XTp{4}; + let Inst{11-15} = A; + let Inst{16-20} = B; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +class 8LS_DForm_R_XTp5_SI34_MEM<bits<6> opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : PI<1, opcode, OOL, IOL, asmstr, itin> { + bits<5> XTp; + bits<39> D_RA; + + let Pattern = pattern; + + // The prefix. + let Inst{6-10} = 0; + let Inst{11} = PCRel; + let Inst{12-13} = 0; + let Inst{14-31} = D_RA{33-16}; // Imm18 + + // The instruction. + let Inst{38-41} = XTp{3-0}; + let Inst{42} = XTp{4}; + let Inst{43-47} = D_RA{38-34}; // Register # + let Inst{48-63} = D_RA{15-0}; // D +} + +multiclass 8LS_DForm_R_XTp5_SI34_MEM_p<bits<6> pref, bits<6> opcode, dag OOL, + dag IOL, dag PCRel_IOL, + string asmstr, InstrItinClass itin> { + def NAME : 8LS_DForm_R_XTp5_SI34_MEM<opcode, OOL, IOL, + !strconcat(asmstr, ", 0"), itin, []>; + def pc : 8LS_DForm_R_XTp5_SI34_MEM<opcode, OOL, PCRel_IOL, + !strconcat(asmstr, ", 1"), itin, []>, + isPCRel; +} + +def PPCRegACCRCAsmOperand : AsmOperandClass { + let Name = "RegACCRC"; let PredicateMethod = "isACCRegNumber"; +} + +def acc : RegisterOperand<ACCRC> { + let ParserMatchClass = PPCRegACCRCAsmOperand; +} + +def uacc : RegisterOperand<UACCRC> { + let ParserMatchClass = PPCRegACCRCAsmOperand; +} + +// [PO AS XO2 XO] +class XForm_AT3<bits<6> opcode, bits<5> xo2, bits<10> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<3> AT; + + let Pattern = pattern; + + let Inst{6-8} = AT; + let Inst{9-10} = 0; + let Inst{11-15} = xo2; + let Inst{16-20} = 0; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +class XX3Form_AT3_XAB6<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, + list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<3> AT; + bits<6> XA; + bits<6> XB; + + let Pattern = pattern; + + let Inst{6-8} = AT; + let Inst{9-10} = 0; + let Inst{11-15} = XA{4-0}; + let Inst{16-20} = XB{4-0}; + let Inst{21-28} = xo; + let Inst{29} = XA{5}; + let Inst{30} = XB{5}; + let Inst{31} = 0; +} + +class MMIRR_XX3Form_XY4P2_XAB6<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, + list<dag> pattern> + : PI<1, opcode, OOL, IOL, asmstr, itin> { + bits<3> AT; + bits<6> XA; + bits<6> XB; + bits<4> XMSK; + bits<4> YMSK; + bits<2> PMSK; + + let Pattern = pattern; + + // The prefix. + let Inst{6-7} = 3; + let Inst{8-11} = 9; + let Inst{12-15} = 0; + let Inst{16-17} = PMSK; + let Inst{18-23} = 0; + let Inst{24-27} = XMSK; + let Inst{28-31} = YMSK; + + // The instruction. + let Inst{38-40} = AT; + let Inst{41-42} = 0; + let Inst{43-47} = XA{4-0}; + let Inst{48-52} = XB{4-0}; + let Inst{53-60} = xo; + let Inst{61} = XA{5}; + let Inst{62} = XB{5}; + let Inst{63} = 0; +} + +class MMIRR_XX3Form_XY4_XAB6<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, + list<dag> pattern> + : PI<1, opcode, OOL, IOL, asmstr, itin> { + bits<3> AT; + bits<6> XA; + bits<6> XB; + bits<4> XMSK; + bits<4> YMSK; + + let Pattern = pattern; + + // The prefix. + let Inst{6-7} = 3; + let Inst{8-11} = 9; + let Inst{12-23} = 0; + let Inst{24-27} = XMSK; + let Inst{28-31} = YMSK; + + // The instruction. + let Inst{38-40} = AT; + let Inst{41-42} = 0; + let Inst{43-47} = XA{4-0}; + let Inst{48-52} = XB{4-0}; + let Inst{53-60} = xo; + let Inst{61} = XA{5}; + let Inst{62} = XB{5}; + let Inst{63} = 0; +} + +class MMIRR_XX3Form_X4Y2_XAB6<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, + list<dag> pattern> + : PI<1, opcode, OOL, IOL, asmstr, itin> { + bits<3> AT; + bits<6> XA; + bits<6> XB; + bits<4> XMSK; + bits<2> YMSK; + + let Pattern = pattern; + + // The prefix. + let Inst{6-7} = 3; + let Inst{8-11} = 9; + let Inst{12-23} = 0; + let Inst{24-27} = XMSK; + let Inst{28-29} = YMSK; + let Inst{30-31} = 0; + + // The instruction. + let Inst{38-40} = AT; + let Inst{41-42} = 0; + let Inst{43-47} = XA{4-0}; + let Inst{48-52} = XB{4-0}; + let Inst{53-60} = xo; + let Inst{61} = XA{5}; + let Inst{62} = XB{5}; + let Inst{63} = 0; +} + +class MMIRR_XX3Form_XY4P8_XAB6<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, + list<dag> pattern> + : PI<1, opcode, OOL, IOL, asmstr, itin> { + bits<3> AT; + bits<6> XA; + bits<6> XB; + bits<4> XMSK; + bits<4> YMSK; + bits<8> PMSK; + + let Pattern = pattern; + + // The prefix. + let Inst{6-7} = 3; + let Inst{8-11} = 9; + let Inst{12-15} = 0; + let Inst{16-23} = PMSK; + let Inst{24-27} = XMSK; + let Inst{28-31} = YMSK; + + // The instruction. + let Inst{38-40} = AT; + let Inst{41-42} = 0; + let Inst{43-47} = XA{4-0}; + let Inst{48-52} = XB{4-0}; + let Inst{53-60} = xo; + let Inst{61} = XA{5}; + let Inst{62} = XB{5}; + let Inst{63} = 0; +} + +class MMIRR_XX3Form_XYP4_XAB6<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, + list<dag> pattern> + : PI<1, opcode, OOL, IOL, asmstr, itin> { + bits<3> AT; + bits<6> XA; + bits<6> XB; + bits<4> XMSK; + bits<4> YMSK; + bits<4> PMSK; + + let Pattern = pattern; + + // The prefix. + let Inst{6-7} = 3; + let Inst{8-11} = 9; + let Inst{12-15} = 0; + let Inst{16-19} = PMSK; + let Inst{20-23} = 0; + let Inst{24-27} = XMSK; + let Inst{28-31} = YMSK; + + // The instruction. + let Inst{38-40} = AT; + let Inst{41-42} = 0; + let Inst{43-47} = XA{4-0}; + let Inst{48-52} = XB{4-0}; + let Inst{53-60} = xo; + let Inst{61} = XA{5}; + let Inst{62} = XB{5}; + let Inst{63} = 0; +} + def PrefixInstrs : Predicate<"Subtarget->hasPrefixInstrs()">; def IsISA3_1 : Predicate<"Subtarget->isISA3_1()">; -def PairedVectorMemops : Predicate<"Subtarget->pairedVectorMemops()">; -def MMA : Predicate<"Subtarget->hasMMA()">; - -def RCCp { - dag AToVSRC = (COPY_TO_REGCLASS $XA, VSRC); - dag BToVSRC = (COPY_TO_REGCLASS $XB, VSRC); -} - +def PairedVectorMemops : Predicate<"Subtarget->pairedVectorMemops()">; +def MMA : Predicate<"Subtarget->hasMMA()">; + +def RCCp { + dag AToVSRC = (COPY_TO_REGCLASS $XA, VSRC); + dag BToVSRC = (COPY_TO_REGCLASS $XB, VSRC); +} + let Predicates = [PrefixInstrs] in { let Interpretation64Bit = 1, isCodeGenOnly = 1 in { defm PADDI8 : MLS_DForm_R_SI34_RTA5_p<14, (outs g8rc:$RT), (ins g8rc:$RA, s34imm:$SI), - (ins immZero:$RA, s34imm_pcrel:$SI), + (ins immZero:$RA, s34imm_pcrel:$SI), "paddi $RT, $RA, $SI", IIC_LdStLFD>; let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in { def PLI8 : MLS_DForm_SI34_RT5<14, (outs g8rc:$RT), @@ -855,7 +855,7 @@ let Predicates = [PrefixInstrs] in { } defm PADDI : MLS_DForm_R_SI34_RTA5_p<14, (outs gprc:$RT), (ins gprc:$RA, s34imm:$SI), - (ins immZero:$RA, s34imm_pcrel:$SI), + (ins immZero:$RA, s34imm_pcrel:$SI), "paddi $RT, $RA, $SI", IIC_LdStLFD>; let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in { def PLI : MLS_DForm_SI34_RT5<14, (outs gprc:$RT), @@ -986,695 +986,695 @@ let Predicates = [PrefixInstrs] in { } } -// Multiclass definitions for MMA accumulator instructions. -// ---------------------------------------------------------------------------- - -// Defines 2 unmasked instructions where the xo field for acc/non-acc version -// is even/odd. -multiclass ACC_UM_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase, - string asmstr> { - let Predicates = [MMA] in { - def NAME : - XX3Form_AT3_XAB6<opcode, !or(xo, 0x01), (outs acc:$AT), IOL, - !strconcat(asmbase#" ", asmstr), IIC_VecFP, []>, - RegConstraint<"@earlyclobber $AT">; - def PP : - XX3Form_AT3_XAB6<opcode, xo, (outs acc:$AT), !con((ins acc:$ATi), IOL), - !strconcat(asmbase#"pp ", asmstr), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - } -} - -// Defines 4 instructions, masked/unmasked with masks 8, 4, 4 bits. -// The XO field for acc/non-acc version is even/odd. -multiclass ACC_UM_M844_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase, - string asmstr> { - defm NAME : ACC_UM_XOEO<opcode, xo, IOL, asmbase, asmstr>; - let Predicates = [MMA, PrefixInstrs] in { - def PM#NAME : - MMIRR_XX3Form_XY4P8_XAB6< - opcode, !or(xo, 0x01), (outs acc:$AT), - !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u8imm:$PMSK)), - !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"), - IIC_VecFP, []>, - RegConstraint<"@earlyclobber $AT">; - def PM#NAME#PP : - MMIRR_XX3Form_XY4P8_XAB6< - opcode, xo, (outs acc:$AT), - !con((ins acc:$ATi), - !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u8imm:$PMSK))), - !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"), - IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - } -} - -// Defines 4 instructions, masked/unmasked with masks 4, 4, 4 bits. -// The XO field for acc/non-acc version is even/odd. -multiclass ACC_UM_M444_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase, - string asmstr> { - defm NAME : ACC_UM_XOEO<opcode, xo, IOL, asmbase, asmstr>; - let Predicates = [MMA, PrefixInstrs] in { - def PM#NAME : - MMIRR_XX3Form_XYP4_XAB6< - opcode, !or(xo, 0x01), (outs acc:$AT), - !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u4imm:$PMSK)), - !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"), - IIC_VecFP, []>, - RegConstraint<"@earlyclobber $AT">; - def PM#NAME#PP : - MMIRR_XX3Form_XYP4_XAB6< - opcode, xo, (outs acc:$AT), - !con((ins acc:$ATi), - !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u4imm:$PMSK))), - !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"), - IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - } -} - -// Defines 4 instructions, masked/unmasked with masks 2, 4, 4 bits. -// The XO field for acc/non-acc version is even/odd. -multiclass ACC_UM_M244_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase, - string asmstr> { - defm NAME : ACC_UM_XOEO<opcode, xo, IOL, asmbase, asmstr>; - let Predicates = [MMA, PrefixInstrs] in { - def PM#NAME : - MMIRR_XX3Form_XY4P2_XAB6< - opcode, !or(xo, 0x01), (outs acc:$AT), - !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK)), - !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"), - IIC_VecFP, []>, - RegConstraint<"@earlyclobber $AT">; - def PM#NAME#PP : - MMIRR_XX3Form_XY4P2_XAB6< - opcode, xo, (outs acc:$AT), - !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), - !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"), - IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - } -} - -// Defines 4 instructions, masked/unmasked with masks 2, 4, 4 bits. -// Upper nibble of XO field for acc/non-acc version is 0x4/0x6. -multiclass ACC_UM_M244_XO46<bits<6> opcode, bits<8> xo, dag IOL, string asmbase, - string asmstr> { - let Predicates = [MMA] in { - def NAME : - XX3Form_AT3_XAB6<opcode, xo, (outs acc:$AT), IOL, - !strconcat(asmbase#" ", asmstr), IIC_VecFP, []>, - RegConstraint<"@earlyclobber $AT">; - def PP : - XX3Form_AT3_XAB6< - opcode, !or(xo, 0x20), (outs acc:$AT), !con((ins acc:$ATi), IOL), - !strconcat(asmbase#"pp ", asmstr), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - } - let Predicates = [MMA, PrefixInstrs] in { - def PM#NAME : - MMIRR_XX3Form_XY4P2_XAB6< - opcode, xo, (outs acc:$AT), - !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK)), - !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"), - IIC_VecFP, []>, - RegConstraint<"@earlyclobber $AT">; - def PM#NAME#PP : - MMIRR_XX3Form_XY4P2_XAB6< - opcode, !or(xo, 0x20), (outs acc:$AT), - !con((ins acc:$ATi), - !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), - !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"), - IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - } -} - -// Defines 10 instructions, operand negating, unmasked, masked with 2, 4, 4 -// bits. Upper nibble are masked with 0x8, 0x4, 0xC for negating operands. -multiclass ACC_NEG_UM_M244_XOM84C<bits<6> opcode, bits<8> xo, dag IOL, - string asmbase, string asmstr> { - defm NAME : ACC_UM_M244_XOEO<opcode, xo, IOL, asmbase, asmstr>; - let Predicates = [MMA] in { - def PN : XX3Form_AT3_XAB6< - opcode, !or(xo, 0x80), (outs acc:$AT), !con((ins acc:$ATi), IOL), - !strconcat(asmbase#"pn ", asmstr), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - def NP : XX3Form_AT3_XAB6< - opcode, !or(xo, 0x40), (outs acc:$AT), !con((ins acc:$ATi), IOL), - !strconcat(asmbase#"np ", asmstr), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - def NN : XX3Form_AT3_XAB6< - opcode, !or(xo, 0xC0), (outs acc:$AT), !con((ins acc:$ATi), IOL), - !strconcat(asmbase#"nn ", asmstr), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - } - let Predicates = [MMA, PrefixInstrs] in { - def PM#NAME#PN : - MMIRR_XX3Form_XY4P2_XAB6< - opcode, !or(xo, 0x80), (outs acc:$AT), - !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), - !strconcat("pm"#asmbase#"pn ", asmstr#", $XMSK, $YMSK, $PMSK"), - IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - def PM#NAME#NP : - MMIRR_XX3Form_XY4P2_XAB6< - opcode, !or(xo, 0x40), (outs acc:$AT), - !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), - !strconcat("pm"#asmbase#"np ", asmstr#", $XMSK, $YMSK, $PMSK"), - IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - def PM#NAME#NN : - MMIRR_XX3Form_XY4P2_XAB6< - opcode, !or(xo, 0xC0), (outs acc:$AT), - !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), - !strconcat("pm"#asmbase#"nn ", asmstr#", $XMSK, $YMSK, $PMSK"), - IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - } -} - -// Defines 5 instructions, unmasked, operand negating. -// Upper nibble are masked with 0x8, 0x4, 0xC for negating operands. -multiclass ACC_NEG_UM_XOM84C<bits<6> opcode, bits<8> xo, dag IOL, - string asmbase, string asmstr> { - defm NAME : ACC_UM_XOEO<opcode, xo, IOL, asmbase, asmstr>; - let Predicates = [MMA] in { - def PN : XX3Form_AT3_XAB6<opcode, !or(xo, 0x80), (outs acc:$AT), - !con((ins acc:$ATi), IOL), - !strconcat(asmbase#"pn ", asmstr), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - def NP : XX3Form_AT3_XAB6<opcode, !or(xo, 0x40), (outs acc:$AT), - !con((ins acc:$ATi), IOL), - !strconcat(asmbase#"np ", asmstr), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - def NN : XX3Form_AT3_XAB6<opcode, !or(xo, 0xC0), (outs acc:$AT), - !con((ins acc:$ATi), IOL), - !strconcat(asmbase#"nn ", asmstr), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - } -} - -// Defines 10 instructions, operand negating, unmasked, masked with 4, 4 bits. -// Upper nibble are masked with 0x8, 0x4, 0xC for negating operands. -multiclass ACC_NEG_UM_M44_XOM84C<bits<6> opcode, bits<8> xo, dag IOL, - string asmbase, string asmstr> { - defm NAME : ACC_NEG_UM_XOM84C<opcode, xo, IOL, asmbase, asmstr>; - let Predicates = [MMA, PrefixInstrs] in { - def PM#NAME : - MMIRR_XX3Form_XY4_XAB6< - opcode, !or(xo, 0x01), (outs acc:$AT), - !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK)), - !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK"), - IIC_VecFP, []>, - RegConstraint<"@earlyclobber $AT">; - def PM#NAME#PP : - MMIRR_XX3Form_XY4_XAB6< - opcode, xo, (outs acc:$AT), - !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))), - !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK"), - IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - def PM#NAME#PN : - MMIRR_XX3Form_XY4_XAB6< - opcode, !or(xo, 0x80), (outs acc:$AT), - !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))), - !strconcat("pm"#asmbase#"pn ", asmstr#", $XMSK, $YMSK"), - IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - def PM#NAME#NP : - MMIRR_XX3Form_XY4_XAB6< - opcode, !or(xo, 0x40), (outs acc:$AT), - !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))), - !strconcat("pm"#asmbase#"np ", asmstr#", $XMSK, $YMSK"), - IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - def PM#NAME#NN : - MMIRR_XX3Form_XY4_XAB6< - opcode, !or(xo, 0xC0), (outs acc:$AT), - !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))), - !strconcat("pm"#asmbase#"nn ", asmstr#", $XMSK, $YMSK"), - IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - } -} - -// Defines 10 instructions, operand negating, unmasked, masked with 4, 2 bits. -// Upper nibble are masked with 0x8, 0x4, 0xC for negating operands. -multiclass ACC_NEG_UM_M42_XOM84C<bits<6> opcode, bits<8> xo, dag IOL, - string asmbase, string asmstr> { - defm NAME : ACC_NEG_UM_XOM84C<opcode, xo, IOL, asmbase, asmstr>; - let Predicates = [MMA, PrefixInstrs] in { - def PM#NAME : - MMIRR_XX3Form_X4Y2_XAB6< - opcode, !or(xo, 0x01), (outs acc:$AT), - !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK)), - !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK"), - IIC_VecFP, []>, - RegConstraint<"@earlyclobber $AT">; - def PM#NAME#PP : - MMIRR_XX3Form_X4Y2_XAB6< - opcode, xo, (outs acc:$AT), - !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))), - !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK"), - IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - def PM#NAME#PN : - MMIRR_XX3Form_X4Y2_XAB6< - opcode, !or(xo, 0x80), (outs acc:$AT), - !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))), - !strconcat("pm"#asmbase#"pn ", asmstr#", $XMSK, $YMSK"), - IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - def PM#NAME#NP : - MMIRR_XX3Form_X4Y2_XAB6< - opcode, !or(xo, 0x40), (outs acc:$AT), - !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))), - !strconcat("pm"#asmbase#"np ", asmstr#", $XMSK, $YMSK"), - IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - def PM#NAME#NN : - MMIRR_XX3Form_X4Y2_XAB6< - opcode, !or(xo, 0xC0), (outs acc:$AT), - !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))), - !strconcat("pm"#asmbase#"nn ", asmstr#", $XMSK, $YMSK"), - IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - } -} - -// End of class definitions. -//----------------------------------------------------------------------------- - -let Predicates = [MMA] in { - def XXMFACC : - XForm_AT3<31, 0, 177, (outs acc:$ASo), (ins acc:$AS), "xxmfacc $AS", - IIC_VecGeneral, - [(set v512i1:$ASo, (int_ppc_mma_xxmfacc v512i1:$AS))]>, - RegConstraint<"$ASo = $AS">, NoEncode<"$ASo">; - def XXMTACC : - XForm_AT3<31, 1, 177, (outs acc:$AT), (ins acc:$ATi), "xxmtacc $AT", - IIC_VecGeneral, - [(set v512i1:$AT, (int_ppc_mma_xxmtacc v512i1:$ATi))]>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - def KILL_PAIR : PPCPostRAExpPseudo<(outs vsrprc:$XTp), (ins vsrprc:$XSp), - "#KILL_PAIR", []>, - RegConstraint<"$XTp = $XSp">; - def BUILD_UACC : PPCPostRAExpPseudo<(outs acc:$AT), (ins uacc:$AS), - "#BUILD_UACC $AT, $AS", []>; - // We define XXSETACCZ as rematerializable to undo CSE of that intrinsic in - // the backend. We avoid CSE here because it generates a copy of the acc - // register and this copy is more expensive than calling the intrinsic again. - let isAsCheapAsAMove = 1, isReMaterializable = 1 in { - def XXSETACCZ : - XForm_AT3<31, 3, 177, (outs acc:$AT), (ins), "xxsetaccz $AT", IIC_VecGeneral, - [(set v512i1:$AT, (int_ppc_mma_xxsetaccz))]>; - } - def XVI8GER4SPP : - XX3Form_AT3_XAB6<59, 99, (outs acc:$AT), (ins acc:$ATi, vsrc:$XA, vsrc:$XB), - "xvi8ger4spp $AT, $XA, $XB", IIC_VecGeneral, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - let mayStore = 1 in { - def SPILL_ACC: PPCEmitTimePseudo<(outs), (ins acc:$AT, memrix16:$dst), - "#SPILL_ACC", []>; - def SPILL_UACC: PPCEmitTimePseudo<(outs), (ins uacc:$AT, memrix16:$dst), - "#SPILL_UACC", []>; - } - let mayLoad = 1, hasSideEffects = 0 in { - def RESTORE_ACC: PPCEmitTimePseudo<(outs acc:$AT), (ins memrix16:$src), - "#RESTORE_ACC", []>; - def RESTORE_UACC: PPCEmitTimePseudo<(outs uacc:$AT), (ins memrix16:$src), - "#RESTORE_UACC", []>; - } -} - -let Predicates = [MMA, PrefixInstrs] in { - def PMXVI8GER4SPP : - MMIRR_XX3Form_XYP4_XAB6<59, 99, (outs acc:$AT), - (ins acc:$ATi, vsrc:$XA,vsrc:$XB, u4imm:$XMSK, - u4imm:$YMSK, u4imm:$PMSK), - "pmxvi8ger4spp $AT, $XA, $XB, $XMSK, $YMSK, $PMSK", - IIC_VecGeneral, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; -} - -// MMA accumulating/non-accumulating instructions. -//------------------------------------------------------------------------------ - -// XVBF16GER2, XVBF16GER2PP, XVBF16GER2PN, XVBF16GER2NP, XVBF16GER2NN -// PMXVBF16GER2, PMXVBF16GER2PP, PMXVBF16GER2PN, PMXVBF16GER2NP, PMXVBF16GER2NN -defm XVBF16GER2 : ACC_NEG_UM_M244_XOM84C<59, 50, (ins vsrc:$XA, vsrc:$XB), - "xvbf16ger2", "$AT, $XA, $XB">; - -// XVI4GER8, XVI4GER8PP, PMXVI4GER8, PMXVI4GER8PP -defm XVI4GER8 : ACC_UM_M844_XOEO<59, 34, (ins vsrc:$XA, vsrc:$XB), - "xvi4ger8", "$AT, $XA, $XB">; - -// XVI8GER4, XVI8GER4PP, PMXVI8GER4, PMXVI8GER4PP -defm XVI8GER4 : ACC_UM_M444_XOEO<59, 2, (ins vsrc:$XA, vsrc:$XB), - "xvi8ger4", "$AT, $XA, $XB">; - -// XVI16GER2, XVI16GER2PP, PMXVI16GER2, PMXVI16GER2PP -defm XVI16GER2 : ACC_UM_M244_XO46<59, 75, (ins vsrc:$XA, vsrc:$XB), - "xvi16ger2", "$AT, $XA, $XB">; - -// XVI16GER2S, XVI16GER2SPP, PMXVI16GER2S, PMXVI16GER2SPP -defm XVI16GER2S : ACC_UM_M244_XOEO<59, 42, (ins vsrc:$XA, vsrc:$XB), - "xvi16ger2s", "$AT, $XA, $XB">; - -// XVF16GER2, XVF16GER2PP, XVF16GER2PN, XVF16GER2NP, XVF16GER2NN -// PMXVF16GER2, PMXVF16GER2PP, PMXVF16GER2PN, PMXVF16GER2NP, PMXVF16GER2NN -defm XVF16GER2 : ACC_NEG_UM_M244_XOM84C<59, 18, (ins vsrc:$XA, vsrc:$XB), - "xvf16ger2", "$AT, $XA, $XB">; - -// XVF32GER, XVF32GERPP, XVF32GERPN, XVF32GERNP, XVF32GERPP -// PMXVF32GER, PMXVF32GERPP, PMXVF32GERPN, PMXVF32GERNP, PMXVF32GERPP -defm XVF32GER : ACC_NEG_UM_M44_XOM84C<59, 26, (ins vsrc:$XA, vsrc:$XB), - "xvf32ger", "$AT, $XA, $XB">; - -// XVF64GER, XVF64GERPP, XVF64GERPN, XVF64GERNP, XVF64GERNN -// PMXVF64GER, PMXVF64GERPP, PMXVF64GERPN, PMXVF64GERNP, PMXVF64GERNN -defm XVF64GER : ACC_NEG_UM_M42_XOM84C<59, 58, (ins vsrpevenrc:$XA, vsrc:$XB), - "xvf64ger", "$AT, $XA, $XB">; -//------------------------------------------------------------------------------ - -// MMA Intrinsics -let Predicates = [MMA] in { - def : Pat<(v512i1 (int_ppc_mma_xvi4ger8 v16i8:$XA, v16i8:$XB)), - (XVI4GER8 RCCp.AToVSRC, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvi4ger8pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)), - (XVI4GER8PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; - - def : Pat<(v512i1 (int_ppc_mma_xvi8ger4 v16i8:$XA, v16i8:$XB)), - (XVI8GER4 RCCp.AToVSRC, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvi8ger4pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)), - (XVI8GER4PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; - - def : Pat<(v512i1 (int_ppc_mma_xvi16ger2s v16i8:$XA, v16i8:$XB)), - (XVI16GER2S RCCp.AToVSRC, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvi16ger2spp v512i1:$ATi, v16i8:$XA, v16i8:$XB)), - (XVI16GER2SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; - - def : Pat<(v512i1 (int_ppc_mma_xvf16ger2 v16i8:$XA, v16i8:$XB)), - (XVF16GER2 RCCp.AToVSRC, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)), - (XVF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB)), - (XVF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB)), - (XVF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB)), - (XVF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; - - def : Pat<(v512i1 (int_ppc_mma_xvf32ger v16i8:$XA, v16i8:$XB)), - (XVF32GER RCCp.AToVSRC, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvf32gerpp v512i1:$ATi, v16i8:$XA, v16i8:$XB)), - (XVF32GERPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvf32gerpn v512i1:$ATi, v16i8:$XA, v16i8:$XB)), - (XVF32GERPN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvf32gernp v512i1:$ATi, v16i8:$XA, v16i8:$XB)), - (XVF32GERNP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvf32gernn v512i1:$ATi, v16i8:$XA, v16i8:$XB)), - (XVF32GERNN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvf64ger v256i1:$XA, v16i8:$XB)), - (XVF64GER $XA, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvf64gerpp v512i1:$ATi, v256i1:$XA, v16i8:$XB)), - (XVF64GERPP $ATi, $XA, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvf64gerpn v512i1:$ATi, v256i1:$XA, v16i8:$XB)), - (XVF64GERPN $ATi, $XA, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvf64gernp v512i1:$ATi, v256i1:$XA, v16i8:$XB)), - (XVF64GERNP $ATi, $XA, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvf64gernn v512i1:$ATi, v256i1:$XA, v16i8:$XB)), - (XVF64GERNN $ATi, $XA, RCCp.BToVSRC)>; - - def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2 v16i8:$XA, v16i8:$XB)), - (XVBF16GER2 RCCp.AToVSRC, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)), - (XVBF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB)), - (XVBF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB)), - (XVBF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB)), - (XVBF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvi16ger2 v16i8:$XA, v16i8:$XB)), - (XVI16GER2 RCCp.AToVSRC, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvi16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)), - (XVI16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvi8ger4spp v512i1:$ATi, v16i8:$XA, v16i8:$XB)), - (XVI8GER4SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; -} - -// MMA Intrinsics -let Predicates = [MMA, PrefixInstrs] in { - def : Pat<(v512i1 (int_ppc_mma_pmxvi4ger8 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk8Imm:$PMSK)), - (PMXVI4GER8 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk8Imm:$PMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvi4ger8pp v512i1:$ATi, v16i8:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk4Imm:$YMSK, - Msk8Imm:$PMSK)), - (PMXVI4GER8PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk8Imm:$PMSK)>; - - def : Pat<(v512i1 (int_ppc_mma_pmxvi8ger4 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk4Imm:$PMSK)), - (PMXVI8GER4 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk4Imm:$PMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvi8ger4pp v512i1:$ATi, v16i8:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk4Imm:$YMSK, - Msk4Imm:$PMSK)), - (PMXVI8GER4PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk4Imm:$PMSK)>; - - def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2s v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)), - (PMXVI16GER2S RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2spp v512i1:$ATi, v16i8:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk4Imm:$YMSK, - Msk2Imm:$PMSK)), - (PMXVI16GER2SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)), - (PMXVF16GER2 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk4Imm:$YMSK, - Msk2Imm:$PMSK)), - (PMXVF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk4Imm:$YMSK, - Msk2Imm:$PMSK)), - (PMXVF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk4Imm:$YMSK, - Msk2Imm:$PMSK)), - (PMXVF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk4Imm:$YMSK, - Msk2Imm:$PMSK)), - (PMXVF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk4Imm:$YMSK, - Msk2Imm:$PMSK)), - (PMXVF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk4Imm:$YMSK, - Msk2Imm:$PMSK)), - (PMXVF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; - - def : Pat<(v512i1 (int_ppc_mma_pmxvf32ger v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK, - Msk4Imm:$YMSK)), - (PMXVF32GER RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvf32gerpp v512i1:$ATi, v16i8:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk4Imm:$YMSK)), - (PMXVF32GERPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvf32gerpn v512i1:$ATi, v16i8:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk4Imm:$YMSK)), - (PMXVF32GERPN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvf32gernp v512i1:$ATi, v16i8:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk4Imm:$YMSK)), - (PMXVF32GERNP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvf32gernn v512i1:$ATi, v16i8:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk4Imm:$YMSK)), - (PMXVF32GERNN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK)>; - - def : Pat<(v512i1 (int_ppc_mma_pmxvf64ger v256i1:$XA, v16i8:$XB, Msk4Imm:$XMSK, - Msk2Imm:$YMSK)), - (PMXVF64GER $XA, RCCp.BToVSRC, Msk4Imm:$XMSK, Msk2Imm:$YMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvf64gerpp v512i1:$ATi, v256i1:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk2Imm:$YMSK)), - (PMXVF64GERPP $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk2Imm:$YMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvf64gerpn v512i1:$ATi, v256i1:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk2Imm:$YMSK)), - (PMXVF64GERPN $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk2Imm:$YMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvf64gernp v512i1:$ATi, v256i1:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk2Imm:$YMSK)), - (PMXVF64GERNP $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk2Imm:$YMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvf64gernn v512i1:$ATi, v256i1:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk2Imm:$YMSK)), - (PMXVF64GERNN $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk2Imm:$YMSK)>; - - def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)), - (PMXVBF16GER2 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk4Imm:$YMSK, - Msk2Imm:$PMSK)), - (PMXVBF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk4Imm:$YMSK, - Msk2Imm:$PMSK)), - (PMXVBF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk4Imm:$YMSK, - Msk2Imm:$PMSK)), - (PMXVBF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk4Imm:$YMSK, - Msk2Imm:$PMSK)), - (PMXVBF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)), - (PMXVI16GER2 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvi8ger4spp v512i1:$ATi, v16i8:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk4Imm:$YMSK, - Msk2Imm:$PMSK)), - (PMXVI8GER4SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk4Imm:$YMSK, - Msk2Imm:$PMSK)), - (PMXVI16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; -} - -def Concats { - dag VecsToVecPair0 = - (v256i1 (INSERT_SUBREG - (INSERT_SUBREG (IMPLICIT_DEF), $vs0, sub_vsx1), - $vs1, sub_vsx0)); - dag VecsToVecPair1 = - (v256i1 (INSERT_SUBREG - (INSERT_SUBREG (IMPLICIT_DEF), $vs2, sub_vsx1), - $vs3, sub_vsx0)); - dag VecsToVecQuad = - (BUILD_UACC (INSERT_SUBREG - (INSERT_SUBREG (v512i1 (IMPLICIT_DEF)), - (KILL_PAIR VecsToVecPair0), sub_pair0), - (KILL_PAIR VecsToVecPair1), sub_pair1)); -} - -def Extracts { - dag Pair0 = (v256i1 (EXTRACT_SUBREG $v, sub_pair0)); - dag Pair1 = (v256i1 (EXTRACT_SUBREG $v, sub_pair1)); - dag Vec0 = (v4i32 (EXTRACT_SUBREG Pair0, sub_vsx0)); - dag Vec1 = (v4i32 (EXTRACT_SUBREG Pair0, sub_vsx1)); - dag Vec2 = (v4i32 (EXTRACT_SUBREG Pair1, sub_vsx0)); - dag Vec3 = (v4i32 (EXTRACT_SUBREG Pair1, sub_vsx1)); -} - -let Predicates = [MMA] in { - def : Pat<(v512i1 (PPCAccBuild v4i32:$vs1, v4i32:$vs0, v4i32:$vs3, v4i32:$vs2)), - (XXMTACC Concats.VecsToVecQuad)>; - def : Pat<(v512i1 (int_ppc_mma_assemble_acc v16i8:$vs1, v16i8:$vs0, - v16i8:$vs3, v16i8:$vs2)), - (XXMTACC Concats.VecsToVecQuad)>; - def : Pat<(v512i1 (PPCxxmfacc v512i1:$AS)), (XXMFACC acc:$AS)>; - def : Pat<(v4i32 (PPCAccExtractVsx acc:$v, (i64 0))), - Extracts.Vec0>; - def : Pat<(v4i32 (PPCAccExtractVsx acc:$v, (i64 1))), - Extracts.Vec1>; - def : Pat<(v4i32 (PPCAccExtractVsx acc:$v, (i64 2))), - Extracts.Vec2>; - def : Pat<(v4i32 (PPCAccExtractVsx acc:$v, (i64 3))), - Extracts.Vec3>; -} - -let Predicates = [PairedVectorMemops] in { - def : Pat<(v256i1 (PPCPairBuild v4i32:$vs1, v4i32:$vs0)), - Concats.VecsToVecPair0>; - def : Pat<(v256i1 (int_ppc_vsx_assemble_pair v16i8:$vs1, v16i8:$vs0)), - Concats.VecsToVecPair0>; - def : Pat<(v4i32 (PPCPairExtractVsx vsrpevenrc:$v, (i64 0))), - (v4i32 (EXTRACT_SUBREG $v, sub_vsx0))>; - def : Pat<(v4i32 (PPCPairExtractVsx vsrpevenrc:$v, (i64 1))), - (v4i32 (EXTRACT_SUBREG $v, sub_vsx1))>; -} - -let mayLoad = 1, mayStore = 0, Predicates = [PairedVectorMemops] in { - def LXVP : DQForm_XTp5_RA17_MEM<6, 0, (outs vsrprc:$XTp), - (ins memrix16:$DQ_RA), "lxvp $XTp, $DQ_RA", - IIC_LdStLFD, []>; - def LXVPX : XForm_XTp5_XAB5<31, 333, (outs vsrprc:$XTp), (ins memrr:$src), - "lxvpx $XTp, $src", IIC_LdStLFD, - []>; -} - -let mayLoad = 0, mayStore = 1, Predicates = [PairedVectorMemops] in { - def STXVP : DQForm_XTp5_RA17_MEM<6, 1, (outs), (ins vsrprc:$XTp, - memrix16:$DQ_RA), "stxvp $XTp, $DQ_RA", - IIC_LdStLFD, []>; - def STXVPX : XForm_XTp5_XAB5<31, 461, (outs), (ins vsrprc:$XTp, memrr:$dst), - "stxvpx $XTp, $dst", IIC_LdStLFD, - []>; -} - -let mayLoad = 1, mayStore = 0, Predicates = [PairedVectorMemops, PrefixInstrs] in { - defm PLXVP : - 8LS_DForm_R_XTp5_SI34_MEM_p<1, 58, (outs vsrprc:$XTp), (ins memri34:$D_RA), - (ins memri34_pcrel:$D_RA), "plxvp $XTp, $D_RA", - IIC_LdStLFD>; -} - -let mayLoad = 0, mayStore = 1, Predicates = [PairedVectorMemops, PrefixInstrs] in { - defm PSTXVP : - 8LS_DForm_R_XTp5_SI34_MEM_p<1, 62, (outs), (ins vsrprc:$XTp, memri34:$D_RA), - (ins vsrprc:$XTp, memri34_pcrel:$D_RA), - "pstxvp $XTp, $D_RA", IIC_LdStLFD>; -} - -let Predicates = [PairedVectorMemops] in { - // Intrinsics for Paired Vector Loads. - def : Pat<(v256i1 (int_ppc_vsx_lxvp iaddrX16:$src)), (LXVP memrix16:$src)>; - def : Pat<(v256i1 (int_ppc_vsx_lxvp xaddrX16:$src)), (LXVPX xaddrX16:$src)>; - let Predicates = [PairedVectorMemops, PrefixInstrs] in { - def : Pat<(v256i1 (int_ppc_vsx_lxvp iaddrX34:$src)), (PLXVP memri34:$src)>; - } - // Intrinsics for Paired Vector Stores. - def : Pat<(int_ppc_vsx_stxvp v256i1:$XSp, iaddrX16:$dst), - (STXVP $XSp, memrix16:$dst)>; - def : Pat<(int_ppc_vsx_stxvp v256i1:$XSp, xaddrX16:$dst), - (STXVPX $XSp, xaddrX16:$dst)>; - let Predicates = [PairedVectorMemops, PrefixInstrs] in { - def : Pat<(int_ppc_vsx_stxvp v256i1:$XSp, iaddrX34:$dst), - (PSTXVP $XSp, memri34:$dst)>; - } -} - +// Multiclass definitions for MMA accumulator instructions. +// ---------------------------------------------------------------------------- + +// Defines 2 unmasked instructions where the xo field for acc/non-acc version +// is even/odd. +multiclass ACC_UM_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase, + string asmstr> { + let Predicates = [MMA] in { + def NAME : + XX3Form_AT3_XAB6<opcode, !or(xo, 0x01), (outs acc:$AT), IOL, + !strconcat(asmbase#" ", asmstr), IIC_VecFP, []>, + RegConstraint<"@earlyclobber $AT">; + def PP : + XX3Form_AT3_XAB6<opcode, xo, (outs acc:$AT), !con((ins acc:$ATi), IOL), + !strconcat(asmbase#"pp ", asmstr), IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + } +} + +// Defines 4 instructions, masked/unmasked with masks 8, 4, 4 bits. +// The XO field for acc/non-acc version is even/odd. +multiclass ACC_UM_M844_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase, + string asmstr> { + defm NAME : ACC_UM_XOEO<opcode, xo, IOL, asmbase, asmstr>; + let Predicates = [MMA, PrefixInstrs] in { + def PM#NAME : + MMIRR_XX3Form_XY4P8_XAB6< + opcode, !or(xo, 0x01), (outs acc:$AT), + !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u8imm:$PMSK)), + !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"@earlyclobber $AT">; + def PM#NAME#PP : + MMIRR_XX3Form_XY4P8_XAB6< + opcode, xo, (outs acc:$AT), + !con((ins acc:$ATi), + !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u8imm:$PMSK))), + !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + } +} + +// Defines 4 instructions, masked/unmasked with masks 4, 4, 4 bits. +// The XO field for acc/non-acc version is even/odd. +multiclass ACC_UM_M444_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase, + string asmstr> { + defm NAME : ACC_UM_XOEO<opcode, xo, IOL, asmbase, asmstr>; + let Predicates = [MMA, PrefixInstrs] in { + def PM#NAME : + MMIRR_XX3Form_XYP4_XAB6< + opcode, !or(xo, 0x01), (outs acc:$AT), + !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u4imm:$PMSK)), + !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"@earlyclobber $AT">; + def PM#NAME#PP : + MMIRR_XX3Form_XYP4_XAB6< + opcode, xo, (outs acc:$AT), + !con((ins acc:$ATi), + !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u4imm:$PMSK))), + !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + } +} + +// Defines 4 instructions, masked/unmasked with masks 2, 4, 4 bits. +// The XO field for acc/non-acc version is even/odd. +multiclass ACC_UM_M244_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase, + string asmstr> { + defm NAME : ACC_UM_XOEO<opcode, xo, IOL, asmbase, asmstr>; + let Predicates = [MMA, PrefixInstrs] in { + def PM#NAME : + MMIRR_XX3Form_XY4P2_XAB6< + opcode, !or(xo, 0x01), (outs acc:$AT), + !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK)), + !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"@earlyclobber $AT">; + def PM#NAME#PP : + MMIRR_XX3Form_XY4P2_XAB6< + opcode, xo, (outs acc:$AT), + !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), + !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + } +} + +// Defines 4 instructions, masked/unmasked with masks 2, 4, 4 bits. +// Upper nibble of XO field for acc/non-acc version is 0x4/0x6. +multiclass ACC_UM_M244_XO46<bits<6> opcode, bits<8> xo, dag IOL, string asmbase, + string asmstr> { + let Predicates = [MMA] in { + def NAME : + XX3Form_AT3_XAB6<opcode, xo, (outs acc:$AT), IOL, + !strconcat(asmbase#" ", asmstr), IIC_VecFP, []>, + RegConstraint<"@earlyclobber $AT">; + def PP : + XX3Form_AT3_XAB6< + opcode, !or(xo, 0x20), (outs acc:$AT), !con((ins acc:$ATi), IOL), + !strconcat(asmbase#"pp ", asmstr), IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + } + let Predicates = [MMA, PrefixInstrs] in { + def PM#NAME : + MMIRR_XX3Form_XY4P2_XAB6< + opcode, xo, (outs acc:$AT), + !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK)), + !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"@earlyclobber $AT">; + def PM#NAME#PP : + MMIRR_XX3Form_XY4P2_XAB6< + opcode, !or(xo, 0x20), (outs acc:$AT), + !con((ins acc:$ATi), + !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), + !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + } +} + +// Defines 10 instructions, operand negating, unmasked, masked with 2, 4, 4 +// bits. Upper nibble are masked with 0x8, 0x4, 0xC for negating operands. +multiclass ACC_NEG_UM_M244_XOM84C<bits<6> opcode, bits<8> xo, dag IOL, + string asmbase, string asmstr> { + defm NAME : ACC_UM_M244_XOEO<opcode, xo, IOL, asmbase, asmstr>; + let Predicates = [MMA] in { + def PN : XX3Form_AT3_XAB6< + opcode, !or(xo, 0x80), (outs acc:$AT), !con((ins acc:$ATi), IOL), + !strconcat(asmbase#"pn ", asmstr), IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def NP : XX3Form_AT3_XAB6< + opcode, !or(xo, 0x40), (outs acc:$AT), !con((ins acc:$ATi), IOL), + !strconcat(asmbase#"np ", asmstr), IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def NN : XX3Form_AT3_XAB6< + opcode, !or(xo, 0xC0), (outs acc:$AT), !con((ins acc:$ATi), IOL), + !strconcat(asmbase#"nn ", asmstr), IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + } + let Predicates = [MMA, PrefixInstrs] in { + def PM#NAME#PN : + MMIRR_XX3Form_XY4P2_XAB6< + opcode, !or(xo, 0x80), (outs acc:$AT), + !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), + !strconcat("pm"#asmbase#"pn ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def PM#NAME#NP : + MMIRR_XX3Form_XY4P2_XAB6< + opcode, !or(xo, 0x40), (outs acc:$AT), + !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), + !strconcat("pm"#asmbase#"np ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def PM#NAME#NN : + MMIRR_XX3Form_XY4P2_XAB6< + opcode, !or(xo, 0xC0), (outs acc:$AT), + !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), + !strconcat("pm"#asmbase#"nn ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + } +} + +// Defines 5 instructions, unmasked, operand negating. +// Upper nibble are masked with 0x8, 0x4, 0xC for negating operands. +multiclass ACC_NEG_UM_XOM84C<bits<6> opcode, bits<8> xo, dag IOL, + string asmbase, string asmstr> { + defm NAME : ACC_UM_XOEO<opcode, xo, IOL, asmbase, asmstr>; + let Predicates = [MMA] in { + def PN : XX3Form_AT3_XAB6<opcode, !or(xo, 0x80), (outs acc:$AT), + !con((ins acc:$ATi), IOL), + !strconcat(asmbase#"pn ", asmstr), IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def NP : XX3Form_AT3_XAB6<opcode, !or(xo, 0x40), (outs acc:$AT), + !con((ins acc:$ATi), IOL), + !strconcat(asmbase#"np ", asmstr), IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def NN : XX3Form_AT3_XAB6<opcode, !or(xo, 0xC0), (outs acc:$AT), + !con((ins acc:$ATi), IOL), + !strconcat(asmbase#"nn ", asmstr), IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + } +} + +// Defines 10 instructions, operand negating, unmasked, masked with 4, 4 bits. +// Upper nibble are masked with 0x8, 0x4, 0xC for negating operands. +multiclass ACC_NEG_UM_M44_XOM84C<bits<6> opcode, bits<8> xo, dag IOL, + string asmbase, string asmstr> { + defm NAME : ACC_NEG_UM_XOM84C<opcode, xo, IOL, asmbase, asmstr>; + let Predicates = [MMA, PrefixInstrs] in { + def PM#NAME : + MMIRR_XX3Form_XY4_XAB6< + opcode, !or(xo, 0x01), (outs acc:$AT), + !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK)), + !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK"), + IIC_VecFP, []>, + RegConstraint<"@earlyclobber $AT">; + def PM#NAME#PP : + MMIRR_XX3Form_XY4_XAB6< + opcode, xo, (outs acc:$AT), + !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))), + !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def PM#NAME#PN : + MMIRR_XX3Form_XY4_XAB6< + opcode, !or(xo, 0x80), (outs acc:$AT), + !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))), + !strconcat("pm"#asmbase#"pn ", asmstr#", $XMSK, $YMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def PM#NAME#NP : + MMIRR_XX3Form_XY4_XAB6< + opcode, !or(xo, 0x40), (outs acc:$AT), + !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))), + !strconcat("pm"#asmbase#"np ", asmstr#", $XMSK, $YMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def PM#NAME#NN : + MMIRR_XX3Form_XY4_XAB6< + opcode, !or(xo, 0xC0), (outs acc:$AT), + !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))), + !strconcat("pm"#asmbase#"nn ", asmstr#", $XMSK, $YMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + } +} + +// Defines 10 instructions, operand negating, unmasked, masked with 4, 2 bits. +// Upper nibble are masked with 0x8, 0x4, 0xC for negating operands. +multiclass ACC_NEG_UM_M42_XOM84C<bits<6> opcode, bits<8> xo, dag IOL, + string asmbase, string asmstr> { + defm NAME : ACC_NEG_UM_XOM84C<opcode, xo, IOL, asmbase, asmstr>; + let Predicates = [MMA, PrefixInstrs] in { + def PM#NAME : + MMIRR_XX3Form_X4Y2_XAB6< + opcode, !or(xo, 0x01), (outs acc:$AT), + !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK)), + !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK"), + IIC_VecFP, []>, + RegConstraint<"@earlyclobber $AT">; + def PM#NAME#PP : + MMIRR_XX3Form_X4Y2_XAB6< + opcode, xo, (outs acc:$AT), + !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))), + !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def PM#NAME#PN : + MMIRR_XX3Form_X4Y2_XAB6< + opcode, !or(xo, 0x80), (outs acc:$AT), + !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))), + !strconcat("pm"#asmbase#"pn ", asmstr#", $XMSK, $YMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def PM#NAME#NP : + MMIRR_XX3Form_X4Y2_XAB6< + opcode, !or(xo, 0x40), (outs acc:$AT), + !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))), + !strconcat("pm"#asmbase#"np ", asmstr#", $XMSK, $YMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def PM#NAME#NN : + MMIRR_XX3Form_X4Y2_XAB6< + opcode, !or(xo, 0xC0), (outs acc:$AT), + !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))), + !strconcat("pm"#asmbase#"nn ", asmstr#", $XMSK, $YMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + } +} + +// End of class definitions. +//----------------------------------------------------------------------------- + +let Predicates = [MMA] in { + def XXMFACC : + XForm_AT3<31, 0, 177, (outs acc:$ASo), (ins acc:$AS), "xxmfacc $AS", + IIC_VecGeneral, + [(set v512i1:$ASo, (int_ppc_mma_xxmfacc v512i1:$AS))]>, + RegConstraint<"$ASo = $AS">, NoEncode<"$ASo">; + def XXMTACC : + XForm_AT3<31, 1, 177, (outs acc:$AT), (ins acc:$ATi), "xxmtacc $AT", + IIC_VecGeneral, + [(set v512i1:$AT, (int_ppc_mma_xxmtacc v512i1:$ATi))]>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def KILL_PAIR : PPCPostRAExpPseudo<(outs vsrprc:$XTp), (ins vsrprc:$XSp), + "#KILL_PAIR", []>, + RegConstraint<"$XTp = $XSp">; + def BUILD_UACC : PPCPostRAExpPseudo<(outs acc:$AT), (ins uacc:$AS), + "#BUILD_UACC $AT, $AS", []>; + // We define XXSETACCZ as rematerializable to undo CSE of that intrinsic in + // the backend. We avoid CSE here because it generates a copy of the acc + // register and this copy is more expensive than calling the intrinsic again. + let isAsCheapAsAMove = 1, isReMaterializable = 1 in { + def XXSETACCZ : + XForm_AT3<31, 3, 177, (outs acc:$AT), (ins), "xxsetaccz $AT", IIC_VecGeneral, + [(set v512i1:$AT, (int_ppc_mma_xxsetaccz))]>; + } + def XVI8GER4SPP : + XX3Form_AT3_XAB6<59, 99, (outs acc:$AT), (ins acc:$ATi, vsrc:$XA, vsrc:$XB), + "xvi8ger4spp $AT, $XA, $XB", IIC_VecGeneral, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + let mayStore = 1 in { + def SPILL_ACC: PPCEmitTimePseudo<(outs), (ins acc:$AT, memrix16:$dst), + "#SPILL_ACC", []>; + def SPILL_UACC: PPCEmitTimePseudo<(outs), (ins uacc:$AT, memrix16:$dst), + "#SPILL_UACC", []>; + } + let mayLoad = 1, hasSideEffects = 0 in { + def RESTORE_ACC: PPCEmitTimePseudo<(outs acc:$AT), (ins memrix16:$src), + "#RESTORE_ACC", []>; + def RESTORE_UACC: PPCEmitTimePseudo<(outs uacc:$AT), (ins memrix16:$src), + "#RESTORE_UACC", []>; + } +} + +let Predicates = [MMA, PrefixInstrs] in { + def PMXVI8GER4SPP : + MMIRR_XX3Form_XYP4_XAB6<59, 99, (outs acc:$AT), + (ins acc:$ATi, vsrc:$XA,vsrc:$XB, u4imm:$XMSK, + u4imm:$YMSK, u4imm:$PMSK), + "pmxvi8ger4spp $AT, $XA, $XB, $XMSK, $YMSK, $PMSK", + IIC_VecGeneral, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; +} + +// MMA accumulating/non-accumulating instructions. +//------------------------------------------------------------------------------ + +// XVBF16GER2, XVBF16GER2PP, XVBF16GER2PN, XVBF16GER2NP, XVBF16GER2NN +// PMXVBF16GER2, PMXVBF16GER2PP, PMXVBF16GER2PN, PMXVBF16GER2NP, PMXVBF16GER2NN +defm XVBF16GER2 : ACC_NEG_UM_M244_XOM84C<59, 50, (ins vsrc:$XA, vsrc:$XB), + "xvbf16ger2", "$AT, $XA, $XB">; + +// XVI4GER8, XVI4GER8PP, PMXVI4GER8, PMXVI4GER8PP +defm XVI4GER8 : ACC_UM_M844_XOEO<59, 34, (ins vsrc:$XA, vsrc:$XB), + "xvi4ger8", "$AT, $XA, $XB">; + +// XVI8GER4, XVI8GER4PP, PMXVI8GER4, PMXVI8GER4PP +defm XVI8GER4 : ACC_UM_M444_XOEO<59, 2, (ins vsrc:$XA, vsrc:$XB), + "xvi8ger4", "$AT, $XA, $XB">; + +// XVI16GER2, XVI16GER2PP, PMXVI16GER2, PMXVI16GER2PP +defm XVI16GER2 : ACC_UM_M244_XO46<59, 75, (ins vsrc:$XA, vsrc:$XB), + "xvi16ger2", "$AT, $XA, $XB">; + +// XVI16GER2S, XVI16GER2SPP, PMXVI16GER2S, PMXVI16GER2SPP +defm XVI16GER2S : ACC_UM_M244_XOEO<59, 42, (ins vsrc:$XA, vsrc:$XB), + "xvi16ger2s", "$AT, $XA, $XB">; + +// XVF16GER2, XVF16GER2PP, XVF16GER2PN, XVF16GER2NP, XVF16GER2NN +// PMXVF16GER2, PMXVF16GER2PP, PMXVF16GER2PN, PMXVF16GER2NP, PMXVF16GER2NN +defm XVF16GER2 : ACC_NEG_UM_M244_XOM84C<59, 18, (ins vsrc:$XA, vsrc:$XB), + "xvf16ger2", "$AT, $XA, $XB">; + +// XVF32GER, XVF32GERPP, XVF32GERPN, XVF32GERNP, XVF32GERPP +// PMXVF32GER, PMXVF32GERPP, PMXVF32GERPN, PMXVF32GERNP, PMXVF32GERPP +defm XVF32GER : ACC_NEG_UM_M44_XOM84C<59, 26, (ins vsrc:$XA, vsrc:$XB), + "xvf32ger", "$AT, $XA, $XB">; + +// XVF64GER, XVF64GERPP, XVF64GERPN, XVF64GERNP, XVF64GERNN +// PMXVF64GER, PMXVF64GERPP, PMXVF64GERPN, PMXVF64GERNP, PMXVF64GERNN +defm XVF64GER : ACC_NEG_UM_M42_XOM84C<59, 58, (ins vsrpevenrc:$XA, vsrc:$XB), + "xvf64ger", "$AT, $XA, $XB">; +//------------------------------------------------------------------------------ + +// MMA Intrinsics +let Predicates = [MMA] in { + def : Pat<(v512i1 (int_ppc_mma_xvi4ger8 v16i8:$XA, v16i8:$XB)), + (XVI4GER8 RCCp.AToVSRC, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvi4ger8pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)), + (XVI4GER8PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; + + def : Pat<(v512i1 (int_ppc_mma_xvi8ger4 v16i8:$XA, v16i8:$XB)), + (XVI8GER4 RCCp.AToVSRC, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvi8ger4pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)), + (XVI8GER4PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; + + def : Pat<(v512i1 (int_ppc_mma_xvi16ger2s v16i8:$XA, v16i8:$XB)), + (XVI16GER2S RCCp.AToVSRC, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvi16ger2spp v512i1:$ATi, v16i8:$XA, v16i8:$XB)), + (XVI16GER2SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; + + def : Pat<(v512i1 (int_ppc_mma_xvf16ger2 v16i8:$XA, v16i8:$XB)), + (XVF16GER2 RCCp.AToVSRC, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)), + (XVF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB)), + (XVF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB)), + (XVF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB)), + (XVF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; + + def : Pat<(v512i1 (int_ppc_mma_xvf32ger v16i8:$XA, v16i8:$XB)), + (XVF32GER RCCp.AToVSRC, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvf32gerpp v512i1:$ATi, v16i8:$XA, v16i8:$XB)), + (XVF32GERPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvf32gerpn v512i1:$ATi, v16i8:$XA, v16i8:$XB)), + (XVF32GERPN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvf32gernp v512i1:$ATi, v16i8:$XA, v16i8:$XB)), + (XVF32GERNP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvf32gernn v512i1:$ATi, v16i8:$XA, v16i8:$XB)), + (XVF32GERNN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvf64ger v256i1:$XA, v16i8:$XB)), + (XVF64GER $XA, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvf64gerpp v512i1:$ATi, v256i1:$XA, v16i8:$XB)), + (XVF64GERPP $ATi, $XA, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvf64gerpn v512i1:$ATi, v256i1:$XA, v16i8:$XB)), + (XVF64GERPN $ATi, $XA, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvf64gernp v512i1:$ATi, v256i1:$XA, v16i8:$XB)), + (XVF64GERNP $ATi, $XA, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvf64gernn v512i1:$ATi, v256i1:$XA, v16i8:$XB)), + (XVF64GERNN $ATi, $XA, RCCp.BToVSRC)>; + + def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2 v16i8:$XA, v16i8:$XB)), + (XVBF16GER2 RCCp.AToVSRC, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)), + (XVBF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB)), + (XVBF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB)), + (XVBF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB)), + (XVBF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvi16ger2 v16i8:$XA, v16i8:$XB)), + (XVI16GER2 RCCp.AToVSRC, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvi16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)), + (XVI16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvi8ger4spp v512i1:$ATi, v16i8:$XA, v16i8:$XB)), + (XVI8GER4SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; +} + +// MMA Intrinsics +let Predicates = [MMA, PrefixInstrs] in { + def : Pat<(v512i1 (int_ppc_mma_pmxvi4ger8 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk8Imm:$PMSK)), + (PMXVI4GER8 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk8Imm:$PMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvi4ger8pp v512i1:$ATi, v16i8:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk4Imm:$YMSK, + Msk8Imm:$PMSK)), + (PMXVI4GER8PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk8Imm:$PMSK)>; + + def : Pat<(v512i1 (int_ppc_mma_pmxvi8ger4 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk4Imm:$PMSK)), + (PMXVI8GER4 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk4Imm:$PMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvi8ger4pp v512i1:$ATi, v16i8:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk4Imm:$YMSK, + Msk4Imm:$PMSK)), + (PMXVI8GER4PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk4Imm:$PMSK)>; + + def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2s v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)), + (PMXVI16GER2S RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2spp v512i1:$ATi, v16i8:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk4Imm:$YMSK, + Msk2Imm:$PMSK)), + (PMXVI16GER2SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)), + (PMXVF16GER2 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk4Imm:$YMSK, + Msk2Imm:$PMSK)), + (PMXVF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk4Imm:$YMSK, + Msk2Imm:$PMSK)), + (PMXVF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk4Imm:$YMSK, + Msk2Imm:$PMSK)), + (PMXVF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk4Imm:$YMSK, + Msk2Imm:$PMSK)), + (PMXVF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk4Imm:$YMSK, + Msk2Imm:$PMSK)), + (PMXVF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk4Imm:$YMSK, + Msk2Imm:$PMSK)), + (PMXVF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; + + def : Pat<(v512i1 (int_ppc_mma_pmxvf32ger v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK, + Msk4Imm:$YMSK)), + (PMXVF32GER RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvf32gerpp v512i1:$ATi, v16i8:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk4Imm:$YMSK)), + (PMXVF32GERPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvf32gerpn v512i1:$ATi, v16i8:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk4Imm:$YMSK)), + (PMXVF32GERPN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvf32gernp v512i1:$ATi, v16i8:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk4Imm:$YMSK)), + (PMXVF32GERNP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvf32gernn v512i1:$ATi, v16i8:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk4Imm:$YMSK)), + (PMXVF32GERNN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK)>; + + def : Pat<(v512i1 (int_ppc_mma_pmxvf64ger v256i1:$XA, v16i8:$XB, Msk4Imm:$XMSK, + Msk2Imm:$YMSK)), + (PMXVF64GER $XA, RCCp.BToVSRC, Msk4Imm:$XMSK, Msk2Imm:$YMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvf64gerpp v512i1:$ATi, v256i1:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk2Imm:$YMSK)), + (PMXVF64GERPP $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk2Imm:$YMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvf64gerpn v512i1:$ATi, v256i1:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk2Imm:$YMSK)), + (PMXVF64GERPN $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk2Imm:$YMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvf64gernp v512i1:$ATi, v256i1:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk2Imm:$YMSK)), + (PMXVF64GERNP $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk2Imm:$YMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvf64gernn v512i1:$ATi, v256i1:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk2Imm:$YMSK)), + (PMXVF64GERNN $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk2Imm:$YMSK)>; + + def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)), + (PMXVBF16GER2 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk4Imm:$YMSK, + Msk2Imm:$PMSK)), + (PMXVBF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk4Imm:$YMSK, + Msk2Imm:$PMSK)), + (PMXVBF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk4Imm:$YMSK, + Msk2Imm:$PMSK)), + (PMXVBF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk4Imm:$YMSK, + Msk2Imm:$PMSK)), + (PMXVBF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)), + (PMXVI16GER2 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvi8ger4spp v512i1:$ATi, v16i8:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk4Imm:$YMSK, + Msk2Imm:$PMSK)), + (PMXVI8GER4SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk4Imm:$YMSK, + Msk2Imm:$PMSK)), + (PMXVI16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; +} + +def Concats { + dag VecsToVecPair0 = + (v256i1 (INSERT_SUBREG + (INSERT_SUBREG (IMPLICIT_DEF), $vs0, sub_vsx1), + $vs1, sub_vsx0)); + dag VecsToVecPair1 = + (v256i1 (INSERT_SUBREG + (INSERT_SUBREG (IMPLICIT_DEF), $vs2, sub_vsx1), + $vs3, sub_vsx0)); + dag VecsToVecQuad = + (BUILD_UACC (INSERT_SUBREG + (INSERT_SUBREG (v512i1 (IMPLICIT_DEF)), + (KILL_PAIR VecsToVecPair0), sub_pair0), + (KILL_PAIR VecsToVecPair1), sub_pair1)); +} + +def Extracts { + dag Pair0 = (v256i1 (EXTRACT_SUBREG $v, sub_pair0)); + dag Pair1 = (v256i1 (EXTRACT_SUBREG $v, sub_pair1)); + dag Vec0 = (v4i32 (EXTRACT_SUBREG Pair0, sub_vsx0)); + dag Vec1 = (v4i32 (EXTRACT_SUBREG Pair0, sub_vsx1)); + dag Vec2 = (v4i32 (EXTRACT_SUBREG Pair1, sub_vsx0)); + dag Vec3 = (v4i32 (EXTRACT_SUBREG Pair1, sub_vsx1)); +} + +let Predicates = [MMA] in { + def : Pat<(v512i1 (PPCAccBuild v4i32:$vs1, v4i32:$vs0, v4i32:$vs3, v4i32:$vs2)), + (XXMTACC Concats.VecsToVecQuad)>; + def : Pat<(v512i1 (int_ppc_mma_assemble_acc v16i8:$vs1, v16i8:$vs0, + v16i8:$vs3, v16i8:$vs2)), + (XXMTACC Concats.VecsToVecQuad)>; + def : Pat<(v512i1 (PPCxxmfacc v512i1:$AS)), (XXMFACC acc:$AS)>; + def : Pat<(v4i32 (PPCAccExtractVsx acc:$v, (i64 0))), + Extracts.Vec0>; + def : Pat<(v4i32 (PPCAccExtractVsx acc:$v, (i64 1))), + Extracts.Vec1>; + def : Pat<(v4i32 (PPCAccExtractVsx acc:$v, (i64 2))), + Extracts.Vec2>; + def : Pat<(v4i32 (PPCAccExtractVsx acc:$v, (i64 3))), + Extracts.Vec3>; +} + +let Predicates = [PairedVectorMemops] in { + def : Pat<(v256i1 (PPCPairBuild v4i32:$vs1, v4i32:$vs0)), + Concats.VecsToVecPair0>; + def : Pat<(v256i1 (int_ppc_vsx_assemble_pair v16i8:$vs1, v16i8:$vs0)), + Concats.VecsToVecPair0>; + def : Pat<(v4i32 (PPCPairExtractVsx vsrpevenrc:$v, (i64 0))), + (v4i32 (EXTRACT_SUBREG $v, sub_vsx0))>; + def : Pat<(v4i32 (PPCPairExtractVsx vsrpevenrc:$v, (i64 1))), + (v4i32 (EXTRACT_SUBREG $v, sub_vsx1))>; +} + +let mayLoad = 1, mayStore = 0, Predicates = [PairedVectorMemops] in { + def LXVP : DQForm_XTp5_RA17_MEM<6, 0, (outs vsrprc:$XTp), + (ins memrix16:$DQ_RA), "lxvp $XTp, $DQ_RA", + IIC_LdStLFD, []>; + def LXVPX : XForm_XTp5_XAB5<31, 333, (outs vsrprc:$XTp), (ins memrr:$src), + "lxvpx $XTp, $src", IIC_LdStLFD, + []>; +} + +let mayLoad = 0, mayStore = 1, Predicates = [PairedVectorMemops] in { + def STXVP : DQForm_XTp5_RA17_MEM<6, 1, (outs), (ins vsrprc:$XTp, + memrix16:$DQ_RA), "stxvp $XTp, $DQ_RA", + IIC_LdStLFD, []>; + def STXVPX : XForm_XTp5_XAB5<31, 461, (outs), (ins vsrprc:$XTp, memrr:$dst), + "stxvpx $XTp, $dst", IIC_LdStLFD, + []>; +} + +let mayLoad = 1, mayStore = 0, Predicates = [PairedVectorMemops, PrefixInstrs] in { + defm PLXVP : + 8LS_DForm_R_XTp5_SI34_MEM_p<1, 58, (outs vsrprc:$XTp), (ins memri34:$D_RA), + (ins memri34_pcrel:$D_RA), "plxvp $XTp, $D_RA", + IIC_LdStLFD>; +} + +let mayLoad = 0, mayStore = 1, Predicates = [PairedVectorMemops, PrefixInstrs] in { + defm PSTXVP : + 8LS_DForm_R_XTp5_SI34_MEM_p<1, 62, (outs), (ins vsrprc:$XTp, memri34:$D_RA), + (ins vsrprc:$XTp, memri34_pcrel:$D_RA), + "pstxvp $XTp, $D_RA", IIC_LdStLFD>; +} + +let Predicates = [PairedVectorMemops] in { + // Intrinsics for Paired Vector Loads. + def : Pat<(v256i1 (int_ppc_vsx_lxvp iaddrX16:$src)), (LXVP memrix16:$src)>; + def : Pat<(v256i1 (int_ppc_vsx_lxvp xaddrX16:$src)), (LXVPX xaddrX16:$src)>; + let Predicates = [PairedVectorMemops, PrefixInstrs] in { + def : Pat<(v256i1 (int_ppc_vsx_lxvp iaddrX34:$src)), (PLXVP memri34:$src)>; + } + // Intrinsics for Paired Vector Stores. + def : Pat<(int_ppc_vsx_stxvp v256i1:$XSp, iaddrX16:$dst), + (STXVP $XSp, memrix16:$dst)>; + def : Pat<(int_ppc_vsx_stxvp v256i1:$XSp, xaddrX16:$dst), + (STXVPX $XSp, xaddrX16:$dst)>; + let Predicates = [PairedVectorMemops, PrefixInstrs] in { + def : Pat<(int_ppc_vsx_stxvp v256i1:$XSp, iaddrX34:$dst), + (PSTXVP $XSp, memri34:$dst)>; + } +} + // TODO: We have an added complexity of 500 here. This is only a temporary // solution to have tablegen consider these patterns first. The way we do // addressing for PowerPC is complex depending on available D form, X form, or @@ -1836,13 +1836,13 @@ let Predicates = [PCRelativeMemops], AddedComplexity = 500 in { // If the PPCmatpcreladdr node is not caught by any other pattern it should be // caught here and turned into a paddi instruction to materialize the address. def : Pat<(PPCmatpcreladdr pcreladdr:$addr), (PADDI8pc 0, $addr)>; - // PPCtlsdynamatpcreladdr node is used for TLS dynamic models to materialize - // tls global address with paddi instruction. - def : Pat<(PPCtlsdynamatpcreladdr pcreladdr:$addr), (PADDI8pc 0, $addr)>; - // PPCtlslocalexecmataddr node is used for TLS local exec models to - // materialize tls global address with paddi instruction. - def : Pat<(PPCaddTls i64:$in, (PPCtlslocalexecmataddr tglobaltlsaddr:$addr)), - (PADDI8 $in, $addr)>; + // PPCtlsdynamatpcreladdr node is used for TLS dynamic models to materialize + // tls global address with paddi instruction. + def : Pat<(PPCtlsdynamatpcreladdr pcreladdr:$addr), (PADDI8pc 0, $addr)>; + // PPCtlslocalexecmataddr node is used for TLS local exec models to + // materialize tls global address with paddi instruction. + def : Pat<(PPCaddTls i64:$in, (PPCtlslocalexecmataddr tglobaltlsaddr:$addr)), + (PADDI8 $in, $addr)>; } let Predicates = [PrefixInstrs] in { @@ -1887,26 +1887,26 @@ let Predicates = [PrefixInstrs] in { } let Predicates = [IsISA3_1] in { - def SETBC : XForm_XT5_BI5<31, 384, (outs gprc:$RT), (ins crbitrc:$BI), - "setbc $RT, $BI", IIC_IntCompare, []>; - def SETBCR : XForm_XT5_BI5<31, 416, (outs gprc:$RT), (ins crbitrc:$BI), - "setbcr $RT, $BI", IIC_IntCompare, []>; - def SETNBC : XForm_XT5_BI5<31, 448, (outs gprc:$RT), (ins crbitrc:$BI), - "setnbc $RT, $BI", IIC_IntCompare, []>; - def SETNBCR : XForm_XT5_BI5<31, 480, (outs gprc:$RT), (ins crbitrc:$BI), - "setnbcr $RT, $BI", IIC_IntCompare, []>; - - let Interpretation64Bit = 1, isCodeGenOnly = 1 in { - def SETBC8 : XForm_XT5_BI5<31, 384, (outs g8rc:$RT), (ins crbitrc:$BI), - "setbc $RT, $BI", IIC_IntCompare, []>; - def SETBCR8 : XForm_XT5_BI5<31, 416, (outs g8rc:$RT), (ins crbitrc:$BI), - "setbcr $RT, $BI", IIC_IntCompare, []>; - def SETNBC8 : XForm_XT5_BI5<31, 448, (outs g8rc:$RT), (ins crbitrc:$BI), - "setnbc $RT, $BI", IIC_IntCompare, []>; - def SETNBCR8 : XForm_XT5_BI5<31, 480, (outs g8rc:$RT), (ins crbitrc:$BI), - "setnbcr $RT, $BI", IIC_IntCompare, []>; - } - + def SETBC : XForm_XT5_BI5<31, 384, (outs gprc:$RT), (ins crbitrc:$BI), + "setbc $RT, $BI", IIC_IntCompare, []>; + def SETBCR : XForm_XT5_BI5<31, 416, (outs gprc:$RT), (ins crbitrc:$BI), + "setbcr $RT, $BI", IIC_IntCompare, []>; + def SETNBC : XForm_XT5_BI5<31, 448, (outs gprc:$RT), (ins crbitrc:$BI), + "setnbc $RT, $BI", IIC_IntCompare, []>; + def SETNBCR : XForm_XT5_BI5<31, 480, (outs gprc:$RT), (ins crbitrc:$BI), + "setnbcr $RT, $BI", IIC_IntCompare, []>; + + let Interpretation64Bit = 1, isCodeGenOnly = 1 in { + def SETBC8 : XForm_XT5_BI5<31, 384, (outs g8rc:$RT), (ins crbitrc:$BI), + "setbc $RT, $BI", IIC_IntCompare, []>; + def SETBCR8 : XForm_XT5_BI5<31, 416, (outs g8rc:$RT), (ins crbitrc:$BI), + "setbcr $RT, $BI", IIC_IntCompare, []>; + def SETNBC8 : XForm_XT5_BI5<31, 448, (outs g8rc:$RT), (ins crbitrc:$BI), + "setnbc $RT, $BI", IIC_IntCompare, []>; + def SETNBCR8 : XForm_XT5_BI5<31, 480, (outs g8rc:$RT), (ins crbitrc:$BI), + "setnbcr $RT, $BI", IIC_IntCompare, []>; + } + def VSLDBI : VNForm_VTAB5_SD3<22, 0, (outs vrrc:$VRT), (ins vrrc:$VRA, vrrc:$VRB, u3imm:$SH), "vsldbi $VRT, $VRA, $VRB, $SH", @@ -1923,254 +1923,254 @@ let Predicates = [IsISA3_1] in { (int_ppc_altivec_vsrdbi v16i8:$VRA, v16i8:$VRB, i32:$SH))]>; - defm VSTRIBR : VXForm_VTB5_RCr<13, 1, (outs vrrc:$vT), (ins vrrc:$vB), - "vstribr", "$vT, $vB", IIC_VecGeneral, - [(set v16i8:$vT, - (int_ppc_altivec_vstribr v16i8:$vB))]>; - defm VSTRIBL : VXForm_VTB5_RCr<13, 0, (outs vrrc:$vT), (ins vrrc:$vB), - "vstribl", "$vT, $vB", IIC_VecGeneral, - [(set v16i8:$vT, - (int_ppc_altivec_vstribl v16i8:$vB))]>; - defm VSTRIHR : VXForm_VTB5_RCr<13, 3, (outs vrrc:$vT), (ins vrrc:$vB), - "vstrihr", "$vT, $vB", IIC_VecGeneral, - [(set v8i16:$vT, - (int_ppc_altivec_vstrihr v8i16:$vB))]>; - defm VSTRIHL : VXForm_VTB5_RCr<13, 2, (outs vrrc:$vT), (ins vrrc:$vB), - "vstrihl", "$vT, $vB", IIC_VecGeneral, - [(set v8i16:$vT, - (int_ppc_altivec_vstrihl v8i16:$vB))]>; - def VINSW : - VXForm_1<207, (outs vrrc:$vD), (ins vrrc:$vDi, u4imm:$UIM, gprc:$rB), - "vinsw $vD, $rB, $UIM", IIC_VecGeneral, - [(set v4i32:$vD, - (int_ppc_altivec_vinsw v4i32:$vDi, i32:$rB, timm:$UIM))]>, - RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">; + defm VSTRIBR : VXForm_VTB5_RCr<13, 1, (outs vrrc:$vT), (ins vrrc:$vB), + "vstribr", "$vT, $vB", IIC_VecGeneral, + [(set v16i8:$vT, + (int_ppc_altivec_vstribr v16i8:$vB))]>; + defm VSTRIBL : VXForm_VTB5_RCr<13, 0, (outs vrrc:$vT), (ins vrrc:$vB), + "vstribl", "$vT, $vB", IIC_VecGeneral, + [(set v16i8:$vT, + (int_ppc_altivec_vstribl v16i8:$vB))]>; + defm VSTRIHR : VXForm_VTB5_RCr<13, 3, (outs vrrc:$vT), (ins vrrc:$vB), + "vstrihr", "$vT, $vB", IIC_VecGeneral, + [(set v8i16:$vT, + (int_ppc_altivec_vstrihr v8i16:$vB))]>; + defm VSTRIHL : VXForm_VTB5_RCr<13, 2, (outs vrrc:$vT), (ins vrrc:$vB), + "vstrihl", "$vT, $vB", IIC_VecGeneral, + [(set v8i16:$vT, + (int_ppc_altivec_vstrihl v8i16:$vB))]>; + def VINSW : + VXForm_1<207, (outs vrrc:$vD), (ins vrrc:$vDi, u4imm:$UIM, gprc:$rB), + "vinsw $vD, $rB, $UIM", IIC_VecGeneral, + [(set v4i32:$vD, + (int_ppc_altivec_vinsw v4i32:$vDi, i32:$rB, timm:$UIM))]>, + RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">; def VINSD : - VXForm_1<463, (outs vrrc:$vD), (ins vrrc:$vDi, u4imm:$UIM, g8rc:$rB), - "vinsd $vD, $rB, $UIM", IIC_VecGeneral, - [(set v2i64:$vD, - (int_ppc_altivec_vinsd v2i64:$vDi, i64:$rB, timm:$UIM))]>, - RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">; + VXForm_1<463, (outs vrrc:$vD), (ins vrrc:$vDi, u4imm:$UIM, g8rc:$rB), + "vinsd $vD, $rB, $UIM", IIC_VecGeneral, + [(set v2i64:$vD, + (int_ppc_altivec_vinsd v2i64:$vDi, i64:$rB, timm:$UIM))]>, + RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">; def VINSBVLX : VXForm_VTB5_RA5_ins<15, "vinsbvlx", [(set v16i8:$vD, - (int_ppc_altivec_vinsbvlx v16i8:$vDi, i32:$rA, + (int_ppc_altivec_vinsbvlx v16i8:$vDi, i32:$rA, v16i8:$vB))]>; def VINSBVRX : VXForm_VTB5_RA5_ins<271, "vinsbvrx", [(set v16i8:$vD, - (int_ppc_altivec_vinsbvrx v16i8:$vDi, i32:$rA, + (int_ppc_altivec_vinsbvrx v16i8:$vDi, i32:$rA, v16i8:$vB))]>; def VINSHVLX : VXForm_VTB5_RA5_ins<79, "vinshvlx", [(set v8i16:$vD, - (int_ppc_altivec_vinshvlx v8i16:$vDi, i32:$rA, + (int_ppc_altivec_vinshvlx v8i16:$vDi, i32:$rA, v8i16:$vB))]>; def VINSHVRX : VXForm_VTB5_RA5_ins<335, "vinshvrx", [(set v8i16:$vD, - (int_ppc_altivec_vinshvrx v8i16:$vDi, i32:$rA, + (int_ppc_altivec_vinshvrx v8i16:$vDi, i32:$rA, v8i16:$vB))]>; def VINSWVLX : VXForm_VTB5_RA5_ins<143, "vinswvlx", [(set v4i32:$vD, - (int_ppc_altivec_vinswvlx v4i32:$vDi, i32:$rA, + (int_ppc_altivec_vinswvlx v4i32:$vDi, i32:$rA, v4i32:$vB))]>; def VINSWVRX : VXForm_VTB5_RA5_ins<399, "vinswvrx", [(set v4i32:$vD, - (int_ppc_altivec_vinswvrx v4i32:$vDi, i32:$rA, + (int_ppc_altivec_vinswvrx v4i32:$vDi, i32:$rA, v4i32:$vB))]>; def VINSBLX : VXForm_VRT5_RAB5_ins<527, "vinsblx", [(set v16i8:$vD, - (int_ppc_altivec_vinsblx v16i8:$vDi, i32:$rA, - i32:$rB))]>; + (int_ppc_altivec_vinsblx v16i8:$vDi, i32:$rA, + i32:$rB))]>; def VINSBRX : VXForm_VRT5_RAB5_ins<783, "vinsbrx", [(set v16i8:$vD, - (int_ppc_altivec_vinsbrx v16i8:$vDi, i32:$rA, - i32:$rB))]>; + (int_ppc_altivec_vinsbrx v16i8:$vDi, i32:$rA, + i32:$rB))]>; def VINSHLX : VXForm_VRT5_RAB5_ins<591, "vinshlx", [(set v8i16:$vD, - (int_ppc_altivec_vinshlx v8i16:$vDi, i32:$rA, - i32:$rB))]>; + (int_ppc_altivec_vinshlx v8i16:$vDi, i32:$rA, + i32:$rB))]>; def VINSHRX : VXForm_VRT5_RAB5_ins<847, "vinshrx", [(set v8i16:$vD, - (int_ppc_altivec_vinshrx v8i16:$vDi, i32:$rA, - i32:$rB))]>; + (int_ppc_altivec_vinshrx v8i16:$vDi, i32:$rA, + i32:$rB))]>; def VINSWLX : VXForm_VRT5_RAB5_ins<655, "vinswlx", [(set v4i32:$vD, - (int_ppc_altivec_vinswlx v4i32:$vDi, i32:$rA, - i32:$rB))]>; + (int_ppc_altivec_vinswlx v4i32:$vDi, i32:$rA, + i32:$rB))]>; def VINSWRX : VXForm_VRT5_RAB5_ins<911, "vinswrx", [(set v4i32:$vD, - (int_ppc_altivec_vinswrx v4i32:$vDi, i32:$rA, - i32:$rB))]>; + (int_ppc_altivec_vinswrx v4i32:$vDi, i32:$rA, + i32:$rB))]>; def VINSDLX : - VXForm_1<719, (outs vrrc:$vD), (ins vrrc:$vDi, g8rc:$rA, g8rc:$rB), - "vinsdlx $vD, $rA, $rB", IIC_VecGeneral, - [(set v2i64:$vD, - (int_ppc_altivec_vinsdlx v2i64:$vDi, i64:$rA, i64:$rB))]>, - RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">; + VXForm_1<719, (outs vrrc:$vD), (ins vrrc:$vDi, g8rc:$rA, g8rc:$rB), + "vinsdlx $vD, $rA, $rB", IIC_VecGeneral, + [(set v2i64:$vD, + (int_ppc_altivec_vinsdlx v2i64:$vDi, i64:$rA, i64:$rB))]>, + RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">; def VINSDRX : - VXForm_1<975, (outs vrrc:$vD), (ins vrrc:$vDi, g8rc:$rA, g8rc:$rB), - "vinsdrx $vD, $rA, $rB", IIC_VecGeneral, - [(set v2i64:$vD, - (int_ppc_altivec_vinsdrx v2i64:$vDi, i64:$rA, i64:$rB))]>, - RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">; - def VEXTRACTBM : VXForm_RD5_XO5_RS5<1602, 8, (outs gprc:$rD), (ins vrrc:$vB), - "vextractbm $rD, $vB", IIC_VecGeneral, - [(set i32:$rD, - (int_ppc_altivec_vextractbm v16i8:$vB))]>; - def VEXTRACTHM : VXForm_RD5_XO5_RS5<1602, 9, (outs gprc:$rD), (ins vrrc:$vB), - "vextracthm $rD, $vB", IIC_VecGeneral, - [(set i32:$rD, - (int_ppc_altivec_vextracthm v8i16:$vB))]>; - def VEXTRACTWM : VXForm_RD5_XO5_RS5<1602, 10, (outs gprc:$rD), (ins vrrc:$vB), - "vextractwm $rD, $vB", IIC_VecGeneral, - [(set i32:$rD, - (int_ppc_altivec_vextractwm v4i32:$vB))]>; - def VEXTRACTDM : VXForm_RD5_XO5_RS5<1602, 11, (outs gprc:$rD), (ins vrrc:$vB), - "vextractdm $rD, $vB", IIC_VecGeneral, - [(set i32:$rD, - (int_ppc_altivec_vextractdm v2i64:$vB))]>; - def VEXTRACTQM : VXForm_RD5_XO5_RS5<1602, 12, (outs gprc:$rD), (ins vrrc:$vB), - "vextractqm $rD, $vB", IIC_VecGeneral, - [(set i32:$rD, - (int_ppc_altivec_vextractqm v1i128:$vB))]>; - def VEXPANDBM : VXForm_RD5_XO5_RS5<1602, 0, (outs vrrc:$vD), (ins vrrc:$vB), - "vexpandbm $vD, $vB", IIC_VecGeneral, - [(set v16i8:$vD, (int_ppc_altivec_vexpandbm - v16i8:$vB))]>; - def VEXPANDHM : VXForm_RD5_XO5_RS5<1602, 1, (outs vrrc:$vD), (ins vrrc:$vB), - "vexpandhm $vD, $vB", IIC_VecGeneral, - [(set v8i16:$vD, (int_ppc_altivec_vexpandhm - v8i16:$vB))]>; - def VEXPANDWM : VXForm_RD5_XO5_RS5<1602, 2, (outs vrrc:$vD), (ins vrrc:$vB), - "vexpandwm $vD, $vB", IIC_VecGeneral, - [(set v4i32:$vD, (int_ppc_altivec_vexpandwm - v4i32:$vB))]>; - def VEXPANDDM : VXForm_RD5_XO5_RS5<1602, 3, (outs vrrc:$vD), (ins vrrc:$vB), - "vexpanddm $vD, $vB", IIC_VecGeneral, - [(set v2i64:$vD, (int_ppc_altivec_vexpanddm - v2i64:$vB))]>; - def VEXPANDQM : VXForm_RD5_XO5_RS5<1602, 4, (outs vrrc:$vD), (ins vrrc:$vB), - "vexpandqm $vD, $vB", IIC_VecGeneral, - [(set v1i128:$vD, (int_ppc_altivec_vexpandqm - v1i128:$vB))]>; - def MTVSRBM : VXForm_RD5_XO5_RS5<1602, 16, (outs vrrc:$vD), (ins g8rc:$rB), - "mtvsrbm $vD, $rB", IIC_VecGeneral, - [(set v16i8:$vD, - (int_ppc_altivec_mtvsrbm i64:$rB))]>; - def MTVSRHM : VXForm_RD5_XO5_RS5<1602, 17, (outs vrrc:$vD), (ins g8rc:$rB), - "mtvsrhm $vD, $rB", IIC_VecGeneral, - [(set v8i16:$vD, - (int_ppc_altivec_mtvsrhm i64:$rB))]>; - def MTVSRWM : VXForm_RD5_XO5_RS5<1602, 18, (outs vrrc:$vD), (ins g8rc:$rB), - "mtvsrwm $vD, $rB", IIC_VecGeneral, - [(set v4i32:$vD, - (int_ppc_altivec_mtvsrwm i64:$rB))]>; - def MTVSRDM : VXForm_RD5_XO5_RS5<1602, 19, (outs vrrc:$vD), (ins g8rc:$rB), - "mtvsrdm $vD, $rB", IIC_VecGeneral, - [(set v2i64:$vD, - (int_ppc_altivec_mtvsrdm i64:$rB))]>; - def MTVSRQM : VXForm_RD5_XO5_RS5<1602, 20, (outs vrrc:$vD), (ins g8rc:$rB), - "mtvsrqm $vD, $rB", IIC_VecGeneral, - [(set v1i128:$vD, - (int_ppc_altivec_mtvsrqm i64:$rB))]>; - def MTVSRBMI : DXForm<4, 10, (outs vrrc:$vD), (ins u16imm64:$D), - "mtvsrbmi $vD, $D", IIC_VecGeneral, - [(set v16i8:$vD, - (int_ppc_altivec_mtvsrbm imm:$D))]>; - def VCNTMBB : VXForm_RD5_MP_VB5<1602, 12, (outs g8rc:$rD), - (ins vrrc:$vB, u1imm:$MP), - "vcntmbb $rD, $vB, $MP", IIC_VecGeneral, - [(set i64:$rD, (int_ppc_altivec_vcntmbb - v16i8:$vB, timm:$MP))]>; - def VCNTMBH : VXForm_RD5_MP_VB5<1602, 13, (outs g8rc:$rD), - (ins vrrc:$vB, u1imm:$MP), - "vcntmbh $rD, $vB, $MP", IIC_VecGeneral, - [(set i64:$rD, (int_ppc_altivec_vcntmbh - v8i16:$vB, timm:$MP))]>; - def VCNTMBW : VXForm_RD5_MP_VB5<1602, 14, (outs g8rc:$rD), - (ins vrrc:$vB, u1imm:$MP), - "vcntmbw $rD, $vB, $MP", IIC_VecGeneral, - [(set i64:$rD, (int_ppc_altivec_vcntmbw - v4i32:$vB, timm:$MP))]>; - def VCNTMBD : VXForm_RD5_MP_VB5<1602, 15, (outs g8rc:$rD), - (ins vrrc:$vB, u1imm:$MP), - "vcntmbd $rD, $vB, $MP", IIC_VecGeneral, - [(set i64:$rD, (int_ppc_altivec_vcntmbd - v2i64:$vB, timm:$MP))]>; - def VEXTDUBVLX : VAForm_1a<24, (outs vrrc:$vD), - (ins vrrc:$vA, vrrc:$vB, gprc:$rC), - "vextdubvlx $vD, $vA, $vB, $rC", - IIC_VecGeneral, - [(set v2i64:$vD, - (int_ppc_altivec_vextdubvlx v16i8:$vA, - v16i8:$vB, - i32:$rC))]>; - def VEXTDUBVRX : VAForm_1a<25, (outs vrrc:$vD), - (ins vrrc:$vA, vrrc:$vB, gprc:$rC), - "vextdubvrx $vD, $vA, $vB, $rC", - IIC_VecGeneral, - [(set v2i64:$vD, - (int_ppc_altivec_vextdubvrx v16i8:$vA, - v16i8:$vB, - i32:$rC))]>; - def VEXTDUHVLX : VAForm_1a<26, (outs vrrc:$vD), - (ins vrrc:$vA, vrrc:$vB, gprc:$rC), - "vextduhvlx $vD, $vA, $vB, $rC", - IIC_VecGeneral, - [(set v2i64:$vD, - (int_ppc_altivec_vextduhvlx v8i16:$vA, - v8i16:$vB, - i32:$rC))]>; - def VEXTDUHVRX : VAForm_1a<27, (outs vrrc:$vD), - (ins vrrc:$vA, vrrc:$vB, gprc:$rC), - "vextduhvrx $vD, $vA, $vB, $rC", - IIC_VecGeneral, - [(set v2i64:$vD, - (int_ppc_altivec_vextduhvrx v8i16:$vA, - v8i16:$vB, - i32:$rC))]>; - def VEXTDUWVLX : VAForm_1a<28, (outs vrrc:$vD), - (ins vrrc:$vA, vrrc:$vB, gprc:$rC), - "vextduwvlx $vD, $vA, $vB, $rC", - IIC_VecGeneral, - [(set v2i64:$vD, - (int_ppc_altivec_vextduwvlx v4i32:$vA, - v4i32:$vB, - i32:$rC))]>; - def VEXTDUWVRX : VAForm_1a<29, (outs vrrc:$vD), - (ins vrrc:$vA, vrrc:$vB, gprc:$rC), - "vextduwvrx $vD, $vA, $vB, $rC", - IIC_VecGeneral, - [(set v2i64:$vD, - (int_ppc_altivec_vextduwvrx v4i32:$vA, - v4i32:$vB, - i32:$rC))]>; - def VEXTDDVLX : VAForm_1a<30, (outs vrrc:$vD), - (ins vrrc:$vA, vrrc:$vB, gprc:$rC), - "vextddvlx $vD, $vA, $vB, $rC", - IIC_VecGeneral, - [(set v2i64:$vD, - (int_ppc_altivec_vextddvlx v2i64:$vA, - v2i64:$vB, - i32:$rC))]>; - def VEXTDDVRX : VAForm_1a<31, (outs vrrc:$vD), - (ins vrrc:$vA, vrrc:$vB, gprc:$rC), - "vextddvrx $vD, $vA, $vB, $rC", - IIC_VecGeneral, - [(set v2i64:$vD, - (int_ppc_altivec_vextddvrx v2i64:$vA, - v2i64:$vB, - i32:$rC))]>; + VXForm_1<975, (outs vrrc:$vD), (ins vrrc:$vDi, g8rc:$rA, g8rc:$rB), + "vinsdrx $vD, $rA, $rB", IIC_VecGeneral, + [(set v2i64:$vD, + (int_ppc_altivec_vinsdrx v2i64:$vDi, i64:$rA, i64:$rB))]>, + RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">; + def VEXTRACTBM : VXForm_RD5_XO5_RS5<1602, 8, (outs gprc:$rD), (ins vrrc:$vB), + "vextractbm $rD, $vB", IIC_VecGeneral, + [(set i32:$rD, + (int_ppc_altivec_vextractbm v16i8:$vB))]>; + def VEXTRACTHM : VXForm_RD5_XO5_RS5<1602, 9, (outs gprc:$rD), (ins vrrc:$vB), + "vextracthm $rD, $vB", IIC_VecGeneral, + [(set i32:$rD, + (int_ppc_altivec_vextracthm v8i16:$vB))]>; + def VEXTRACTWM : VXForm_RD5_XO5_RS5<1602, 10, (outs gprc:$rD), (ins vrrc:$vB), + "vextractwm $rD, $vB", IIC_VecGeneral, + [(set i32:$rD, + (int_ppc_altivec_vextractwm v4i32:$vB))]>; + def VEXTRACTDM : VXForm_RD5_XO5_RS5<1602, 11, (outs gprc:$rD), (ins vrrc:$vB), + "vextractdm $rD, $vB", IIC_VecGeneral, + [(set i32:$rD, + (int_ppc_altivec_vextractdm v2i64:$vB))]>; + def VEXTRACTQM : VXForm_RD5_XO5_RS5<1602, 12, (outs gprc:$rD), (ins vrrc:$vB), + "vextractqm $rD, $vB", IIC_VecGeneral, + [(set i32:$rD, + (int_ppc_altivec_vextractqm v1i128:$vB))]>; + def VEXPANDBM : VXForm_RD5_XO5_RS5<1602, 0, (outs vrrc:$vD), (ins vrrc:$vB), + "vexpandbm $vD, $vB", IIC_VecGeneral, + [(set v16i8:$vD, (int_ppc_altivec_vexpandbm + v16i8:$vB))]>; + def VEXPANDHM : VXForm_RD5_XO5_RS5<1602, 1, (outs vrrc:$vD), (ins vrrc:$vB), + "vexpandhm $vD, $vB", IIC_VecGeneral, + [(set v8i16:$vD, (int_ppc_altivec_vexpandhm + v8i16:$vB))]>; + def VEXPANDWM : VXForm_RD5_XO5_RS5<1602, 2, (outs vrrc:$vD), (ins vrrc:$vB), + "vexpandwm $vD, $vB", IIC_VecGeneral, + [(set v4i32:$vD, (int_ppc_altivec_vexpandwm + v4i32:$vB))]>; + def VEXPANDDM : VXForm_RD5_XO5_RS5<1602, 3, (outs vrrc:$vD), (ins vrrc:$vB), + "vexpanddm $vD, $vB", IIC_VecGeneral, + [(set v2i64:$vD, (int_ppc_altivec_vexpanddm + v2i64:$vB))]>; + def VEXPANDQM : VXForm_RD5_XO5_RS5<1602, 4, (outs vrrc:$vD), (ins vrrc:$vB), + "vexpandqm $vD, $vB", IIC_VecGeneral, + [(set v1i128:$vD, (int_ppc_altivec_vexpandqm + v1i128:$vB))]>; + def MTVSRBM : VXForm_RD5_XO5_RS5<1602, 16, (outs vrrc:$vD), (ins g8rc:$rB), + "mtvsrbm $vD, $rB", IIC_VecGeneral, + [(set v16i8:$vD, + (int_ppc_altivec_mtvsrbm i64:$rB))]>; + def MTVSRHM : VXForm_RD5_XO5_RS5<1602, 17, (outs vrrc:$vD), (ins g8rc:$rB), + "mtvsrhm $vD, $rB", IIC_VecGeneral, + [(set v8i16:$vD, + (int_ppc_altivec_mtvsrhm i64:$rB))]>; + def MTVSRWM : VXForm_RD5_XO5_RS5<1602, 18, (outs vrrc:$vD), (ins g8rc:$rB), + "mtvsrwm $vD, $rB", IIC_VecGeneral, + [(set v4i32:$vD, + (int_ppc_altivec_mtvsrwm i64:$rB))]>; + def MTVSRDM : VXForm_RD5_XO5_RS5<1602, 19, (outs vrrc:$vD), (ins g8rc:$rB), + "mtvsrdm $vD, $rB", IIC_VecGeneral, + [(set v2i64:$vD, + (int_ppc_altivec_mtvsrdm i64:$rB))]>; + def MTVSRQM : VXForm_RD5_XO5_RS5<1602, 20, (outs vrrc:$vD), (ins g8rc:$rB), + "mtvsrqm $vD, $rB", IIC_VecGeneral, + [(set v1i128:$vD, + (int_ppc_altivec_mtvsrqm i64:$rB))]>; + def MTVSRBMI : DXForm<4, 10, (outs vrrc:$vD), (ins u16imm64:$D), + "mtvsrbmi $vD, $D", IIC_VecGeneral, + [(set v16i8:$vD, + (int_ppc_altivec_mtvsrbm imm:$D))]>; + def VCNTMBB : VXForm_RD5_MP_VB5<1602, 12, (outs g8rc:$rD), + (ins vrrc:$vB, u1imm:$MP), + "vcntmbb $rD, $vB, $MP", IIC_VecGeneral, + [(set i64:$rD, (int_ppc_altivec_vcntmbb + v16i8:$vB, timm:$MP))]>; + def VCNTMBH : VXForm_RD5_MP_VB5<1602, 13, (outs g8rc:$rD), + (ins vrrc:$vB, u1imm:$MP), + "vcntmbh $rD, $vB, $MP", IIC_VecGeneral, + [(set i64:$rD, (int_ppc_altivec_vcntmbh + v8i16:$vB, timm:$MP))]>; + def VCNTMBW : VXForm_RD5_MP_VB5<1602, 14, (outs g8rc:$rD), + (ins vrrc:$vB, u1imm:$MP), + "vcntmbw $rD, $vB, $MP", IIC_VecGeneral, + [(set i64:$rD, (int_ppc_altivec_vcntmbw + v4i32:$vB, timm:$MP))]>; + def VCNTMBD : VXForm_RD5_MP_VB5<1602, 15, (outs g8rc:$rD), + (ins vrrc:$vB, u1imm:$MP), + "vcntmbd $rD, $vB, $MP", IIC_VecGeneral, + [(set i64:$rD, (int_ppc_altivec_vcntmbd + v2i64:$vB, timm:$MP))]>; + def VEXTDUBVLX : VAForm_1a<24, (outs vrrc:$vD), + (ins vrrc:$vA, vrrc:$vB, gprc:$rC), + "vextdubvlx $vD, $vA, $vB, $rC", + IIC_VecGeneral, + [(set v2i64:$vD, + (int_ppc_altivec_vextdubvlx v16i8:$vA, + v16i8:$vB, + i32:$rC))]>; + def VEXTDUBVRX : VAForm_1a<25, (outs vrrc:$vD), + (ins vrrc:$vA, vrrc:$vB, gprc:$rC), + "vextdubvrx $vD, $vA, $vB, $rC", + IIC_VecGeneral, + [(set v2i64:$vD, + (int_ppc_altivec_vextdubvrx v16i8:$vA, + v16i8:$vB, + i32:$rC))]>; + def VEXTDUHVLX : VAForm_1a<26, (outs vrrc:$vD), + (ins vrrc:$vA, vrrc:$vB, gprc:$rC), + "vextduhvlx $vD, $vA, $vB, $rC", + IIC_VecGeneral, + [(set v2i64:$vD, + (int_ppc_altivec_vextduhvlx v8i16:$vA, + v8i16:$vB, + i32:$rC))]>; + def VEXTDUHVRX : VAForm_1a<27, (outs vrrc:$vD), + (ins vrrc:$vA, vrrc:$vB, gprc:$rC), + "vextduhvrx $vD, $vA, $vB, $rC", + IIC_VecGeneral, + [(set v2i64:$vD, + (int_ppc_altivec_vextduhvrx v8i16:$vA, + v8i16:$vB, + i32:$rC))]>; + def VEXTDUWVLX : VAForm_1a<28, (outs vrrc:$vD), + (ins vrrc:$vA, vrrc:$vB, gprc:$rC), + "vextduwvlx $vD, $vA, $vB, $rC", + IIC_VecGeneral, + [(set v2i64:$vD, + (int_ppc_altivec_vextduwvlx v4i32:$vA, + v4i32:$vB, + i32:$rC))]>; + def VEXTDUWVRX : VAForm_1a<29, (outs vrrc:$vD), + (ins vrrc:$vA, vrrc:$vB, gprc:$rC), + "vextduwvrx $vD, $vA, $vB, $rC", + IIC_VecGeneral, + [(set v2i64:$vD, + (int_ppc_altivec_vextduwvrx v4i32:$vA, + v4i32:$vB, + i32:$rC))]>; + def VEXTDDVLX : VAForm_1a<30, (outs vrrc:$vD), + (ins vrrc:$vA, vrrc:$vB, gprc:$rC), + "vextddvlx $vD, $vA, $vB, $rC", + IIC_VecGeneral, + [(set v2i64:$vD, + (int_ppc_altivec_vextddvlx v2i64:$vA, + v2i64:$vB, + i32:$rC))]>; + def VEXTDDVRX : VAForm_1a<31, (outs vrrc:$vD), + (ins vrrc:$vA, vrrc:$vB, gprc:$rC), + "vextddvrx $vD, $vA, $vB, $rC", + IIC_VecGeneral, + [(set v2i64:$vD, + (int_ppc_altivec_vextddvrx v2i64:$vA, + v2i64:$vB, + i32:$rC))]>; def VPDEPD : VXForm_1<1485, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), "vpdepd $vD, $vA, $vB", IIC_VecGeneral, [(set v2i64:$vD, @@ -2238,61 +2238,61 @@ let Predicates = [IsISA3_1] in { "vclrrb $vD, $vA, $rB", IIC_VecGeneral, [(set v16i8:$vD, (int_ppc_altivec_vclrrb v16i8:$vA, i32:$rB))]>; - def VMULLD : VXForm_1<457, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmulld $vD, $vA, $vB", IIC_VecGeneral, - [(set v2i64:$vD, (mul v2i64:$vA, v2i64:$vB))]>; - def VMULHSW : VXForm_1<905, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmulhsw $vD, $vA, $vB", IIC_VecGeneral, - [(set v4i32:$vD, (mulhs v4i32:$vA, v4i32:$vB))]>; - def VMULHUW : VXForm_1<649, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmulhuw $vD, $vA, $vB", IIC_VecGeneral, - [(set v4i32:$vD, (mulhu v4i32:$vA, v4i32:$vB))]>; - def VMULHSD : VXForm_1<969, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmulhsd $vD, $vA, $vB", IIC_VecGeneral, - [(set v2i64:$vD, (mulhs v2i64:$vA, v2i64:$vB))]>; - def VMULHUD : VXForm_1<713, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmulhud $vD, $vA, $vB", IIC_VecGeneral, - [(set v2i64:$vD, (mulhu v2i64:$vA, v2i64:$vB))]>; - def VMODSW : VXForm_1<1931, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmodsw $vD, $vA, $vB", IIC_VecGeneral, - [(set v4i32:$vD, (srem v4i32:$vA, v4i32:$vB))]>; - def VMODUW : VXForm_1<1675, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmoduw $vD, $vA, $vB", IIC_VecGeneral, - [(set v4i32:$vD, (urem v4i32:$vA, v4i32:$vB))]>; - def VMODSD : VXForm_1<1995, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmodsd $vD, $vA, $vB", IIC_VecGeneral, - [(set v2i64:$vD, (srem v2i64:$vA, v2i64:$vB))]>; - def VMODUD : VXForm_1<1739, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmodud $vD, $vA, $vB", IIC_VecGeneral, - [(set v2i64:$vD, (urem v2i64:$vA, v2i64:$vB))]>; - def VDIVSW : VXForm_1<395, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vdivsw $vD, $vA, $vB", IIC_VecGeneral, - [(set v4i32:$vD, (sdiv v4i32:$vA, v4i32:$vB))]>; - def VDIVUW : VXForm_1<139, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vdivuw $vD, $vA, $vB", IIC_VecGeneral, - [(set v4i32:$vD, (udiv v4i32:$vA, v4i32:$vB))]>; - def VDIVSD : VXForm_1<459, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vdivsd $vD, $vA, $vB", IIC_VecGeneral, - [(set v2i64:$vD, (sdiv v2i64:$vA, v2i64:$vB))]>; - def VDIVUD : VXForm_1<203, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vdivud $vD, $vA, $vB", IIC_VecGeneral, - [(set v2i64:$vD, (udiv v2i64:$vA, v2i64:$vB))]>; - def VDIVESW : VXForm_1<907, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vdivesw $vD, $vA, $vB", IIC_VecGeneral, - [(set v4i32:$vD, (int_ppc_altivec_vdivesw v4i32:$vA, - v4i32:$vB))]>; - def VDIVEUW : VXForm_1<651, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vdiveuw $vD, $vA, $vB", IIC_VecGeneral, - [(set v4i32:$vD, (int_ppc_altivec_vdiveuw v4i32:$vA, - v4i32:$vB))]>; - def VDIVESD : VXForm_1<971, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vdivesd $vD, $vA, $vB", IIC_VecGeneral, - [(set v2i64:$vD, (int_ppc_altivec_vdivesd v2i64:$vA, - v2i64:$vB))]>; - def VDIVEUD : VXForm_1<715, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vdiveud $vD, $vA, $vB", IIC_VecGeneral, - [(set v2i64:$vD, (int_ppc_altivec_vdiveud v2i64:$vA, - v2i64:$vB))]>; + def VMULLD : VXForm_1<457, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vmulld $vD, $vA, $vB", IIC_VecGeneral, + [(set v2i64:$vD, (mul v2i64:$vA, v2i64:$vB))]>; + def VMULHSW : VXForm_1<905, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vmulhsw $vD, $vA, $vB", IIC_VecGeneral, + [(set v4i32:$vD, (mulhs v4i32:$vA, v4i32:$vB))]>; + def VMULHUW : VXForm_1<649, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vmulhuw $vD, $vA, $vB", IIC_VecGeneral, + [(set v4i32:$vD, (mulhu v4i32:$vA, v4i32:$vB))]>; + def VMULHSD : VXForm_1<969, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vmulhsd $vD, $vA, $vB", IIC_VecGeneral, + [(set v2i64:$vD, (mulhs v2i64:$vA, v2i64:$vB))]>; + def VMULHUD : VXForm_1<713, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vmulhud $vD, $vA, $vB", IIC_VecGeneral, + [(set v2i64:$vD, (mulhu v2i64:$vA, v2i64:$vB))]>; + def VMODSW : VXForm_1<1931, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vmodsw $vD, $vA, $vB", IIC_VecGeneral, + [(set v4i32:$vD, (srem v4i32:$vA, v4i32:$vB))]>; + def VMODUW : VXForm_1<1675, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vmoduw $vD, $vA, $vB", IIC_VecGeneral, + [(set v4i32:$vD, (urem v4i32:$vA, v4i32:$vB))]>; + def VMODSD : VXForm_1<1995, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vmodsd $vD, $vA, $vB", IIC_VecGeneral, + [(set v2i64:$vD, (srem v2i64:$vA, v2i64:$vB))]>; + def VMODUD : VXForm_1<1739, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vmodud $vD, $vA, $vB", IIC_VecGeneral, + [(set v2i64:$vD, (urem v2i64:$vA, v2i64:$vB))]>; + def VDIVSW : VXForm_1<395, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vdivsw $vD, $vA, $vB", IIC_VecGeneral, + [(set v4i32:$vD, (sdiv v4i32:$vA, v4i32:$vB))]>; + def VDIVUW : VXForm_1<139, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vdivuw $vD, $vA, $vB", IIC_VecGeneral, + [(set v4i32:$vD, (udiv v4i32:$vA, v4i32:$vB))]>; + def VDIVSD : VXForm_1<459, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vdivsd $vD, $vA, $vB", IIC_VecGeneral, + [(set v2i64:$vD, (sdiv v2i64:$vA, v2i64:$vB))]>; + def VDIVUD : VXForm_1<203, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vdivud $vD, $vA, $vB", IIC_VecGeneral, + [(set v2i64:$vD, (udiv v2i64:$vA, v2i64:$vB))]>; + def VDIVESW : VXForm_1<907, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vdivesw $vD, $vA, $vB", IIC_VecGeneral, + [(set v4i32:$vD, (int_ppc_altivec_vdivesw v4i32:$vA, + v4i32:$vB))]>; + def VDIVEUW : VXForm_1<651, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vdiveuw $vD, $vA, $vB", IIC_VecGeneral, + [(set v4i32:$vD, (int_ppc_altivec_vdiveuw v4i32:$vA, + v4i32:$vB))]>; + def VDIVESD : VXForm_1<971, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vdivesd $vD, $vA, $vB", IIC_VecGeneral, + [(set v2i64:$vD, (int_ppc_altivec_vdivesd v2i64:$vA, + v2i64:$vB))]>; + def VDIVEUD : VXForm_1<715, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vdiveud $vD, $vA, $vB", IIC_VecGeneral, + [(set v2i64:$vD, (int_ppc_altivec_vdiveud v2i64:$vA, + v2i64:$vB))]>; def XVTLSBB : XX2_BF3_XO5_XB6_XO9<60, 2, 475, (outs crrc:$BF), (ins vsrc:$XB), "xvtlsbb $BF, $XB", IIC_VecGeneral, []>; @@ -2311,204 +2311,204 @@ let Predicates = [IsISA3_1] in { def STXVRWX : X_XS6_RA5_RB5<31, 205, "stxvrwx", vsrc, []>; def STXVRDX : X_XS6_RA5_RB5<31, 237, "stxvrdx", vsrc, []>; } - - def VMULESD : VXForm_1<968, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmulesd $vD, $vA, $vB", IIC_VecGeneral, - [(set v1i128:$vD, (int_ppc_altivec_vmulesd v2i64:$vA, - v2i64:$vB))]>; - def VMULEUD : VXForm_1<712, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmuleud $vD, $vA, $vB", IIC_VecGeneral, - [(set v1i128:$vD, (int_ppc_altivec_vmuleud v2i64:$vA, - v2i64:$vB))]>; - def VMULOSD : VXForm_1<456, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmulosd $vD, $vA, $vB", IIC_VecGeneral, - [(set v1i128:$vD, (int_ppc_altivec_vmulosd v2i64:$vA, - v2i64:$vB))]>; - def VMULOUD : VXForm_1<200, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmuloud $vD, $vA, $vB", IIC_VecGeneral, - [(set v1i128:$vD, (int_ppc_altivec_vmuloud v2i64:$vA, - v2i64:$vB))]>; - def VMSUMCUD : VAForm_1a<23, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, vrrc:$vC), - "vmsumcud $vD, $vA, $vB, $vC", IIC_VecGeneral, - [(set v1i128:$vD, (int_ppc_altivec_vmsumcud - v2i64:$vA, v2i64:$vB, v1i128:$vC))]>; - def VDIVSQ : VXForm_1<267, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vdivsq $vD, $vA, $vB", IIC_VecGeneral, - [(set v1i128:$vD, (sdiv v1i128:$vA, v1i128:$vB))]>; - def VDIVUQ : VXForm_1<11, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vdivuq $vD, $vA, $vB", IIC_VecGeneral, - [(set v1i128:$vD, (udiv v1i128:$vA, v1i128:$vB))]>; - def VDIVESQ : VXForm_1<779, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vdivesq $vD, $vA, $vB", IIC_VecGeneral, - [(set v1i128:$vD, (int_ppc_altivec_vdivesq v1i128:$vA, - v1i128:$vB))]>; - def VDIVEUQ : VXForm_1<523, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vdiveuq $vD, $vA, $vB", IIC_VecGeneral, - [(set v1i128:$vD, (int_ppc_altivec_vdiveuq v1i128:$vA, - v1i128:$vB))]>; - def VCMPEQUQ : VCMP <455, "vcmpequq $vD, $vA, $vB" , v1i128>; - def VCMPGTSQ : VCMP <903, "vcmpgtsq $vD, $vA, $vB" , v1i128>; - def VCMPGTUQ : VCMP <647, "vcmpgtuq $vD, $vA, $vB" , v1i128>; - def VCMPEQUQ_rec : VCMP_rec <455, "vcmpequq. $vD, $vA, $vB" , v1i128>; - def VCMPGTSQ_rec : VCMP_rec <903, "vcmpgtsq. $vD, $vA, $vB" , v1i128>; - def VCMPGTUQ_rec : VCMP_rec <647, "vcmpgtuq. $vD, $vA, $vB" , v1i128>; - def VMODSQ : VXForm_1<1803, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmodsq $vD, $vA, $vB", IIC_VecGeneral, - [(set v1i128:$vD, (srem v1i128:$vA, v1i128:$vB))]>; - def VMODUQ : VXForm_1<1547, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmoduq $vD, $vA, $vB", IIC_VecGeneral, - [(set v1i128:$vD, (urem v1i128:$vA, v1i128:$vB))]>; - def VEXTSD2Q : VXForm_RD5_XO5_RS5<1538, 27, (outs vrrc:$vD), (ins vrrc:$vB), - "vextsd2q $vD, $vB", IIC_VecGeneral, - [(set v1i128:$vD, (int_ppc_altivec_vextsd2q v2i64:$vB))]>; - def VCMPUQ : VXForm_BF3_VAB5<257, (outs crrc:$BF), (ins vrrc:$vA, vrrc:$vB), - "vcmpuq $BF, $vA, $vB", IIC_VecGeneral, []>; - def VCMPSQ : VXForm_BF3_VAB5<321, (outs crrc:$BF), (ins vrrc:$vA, vrrc:$vB), - "vcmpsq $BF, $vA, $vB", IIC_VecGeneral, []>; - def VRLQNM : VX1_VT5_VA5_VB5<325, "vrlqnm", - [(set v1i128:$vD, - (int_ppc_altivec_vrlqnm v1i128:$vA, - v1i128:$vB))]>; - def VRLQMI : VXForm_1<69, (outs vrrc:$vD), - (ins vrrc:$vA, vrrc:$vB, vrrc:$vDi), - "vrlqmi $vD, $vA, $vB", IIC_VecFP, - [(set v1i128:$vD, - (int_ppc_altivec_vrlqmi v1i128:$vA, v1i128:$vB, - v1i128:$vDi))]>, - RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">; - def VSLQ : VX1_VT5_VA5_VB5<261, "vslq", []>; - def VSRAQ : VX1_VT5_VA5_VB5<773, "vsraq", []>; - def VSRQ : VX1_VT5_VA5_VB5<517, "vsrq", []>; - def VRLQ : VX1_VT5_VA5_VB5<5, "vrlq", []>; - def XSCVQPUQZ : X_VT5_XO5_VB5<63, 0, 836, "xscvqpuqz", []>; - def XSCVQPSQZ : X_VT5_XO5_VB5<63, 8, 836, "xscvqpsqz", []>; - def XSCVUQQP : X_VT5_XO5_VB5<63, 3, 836, "xscvuqqp", []>; - def XSCVSQQP : X_VT5_XO5_VB5<63, 11, 836, "xscvsqqp", []>; -} - -let Predicates = [IsISA3_1, HasVSX] in { - def XVCVSPBF16 : XX2_XT6_XO5_XB6<60, 17, 475, "xvcvspbf16", vsrc, []>; - def XVCVBF16SPN : XX2_XT6_XO5_XB6<60, 16, 475, "xvcvbf16spn", vsrc, []>; -} - -// Multiclass defining patterns for Set Boolean Extension Reverse Instructions. -// This is analogous to the CRNotPat multiclass but specifically for Power10 -// and newer subtargets since the extended forms use Set Boolean instructions. -// The first two anonymous patterns defined are actually a duplicate of those -// in CRNotPat, but it is preferable to define both multiclasses as complete -// ones rather than pulling that small common section out. -multiclass P10ReverseSetBool<dag pattern, dag result> { - def : Pat<pattern, (crnot result)>; - def : Pat<(not pattern), result>; - - def : Pat<(i32 (zext pattern)), - (SETBCR result)>; - def : Pat<(i64 (zext pattern)), - (SETBCR8 result)>; - - def : Pat<(i32 (sext pattern)), - (SETNBCR result)>; - def : Pat<(i64 (sext pattern)), - (SETNBCR8 result)>; - - def : Pat<(i32 (anyext pattern)), - (SETBCR result)>; - def : Pat<(i64 (anyext pattern)), - (SETBCR8 result)>; -} - -multiclass IntSetP10RevSetBool<SDNode SetCC, ValueType Ty, ImmLeaf ZExtTy, - ImmLeaf SExtTy, PatLeaf Cmpi, PatLeaf Cmpli, - PatLeaf Cmp, PatLeaf Cmpl> { - defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETUGE)), - (EXTRACT_SUBREG (Cmpl $s1, $s2), sub_lt)>; - defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETGE)), - (EXTRACT_SUBREG (Cmp $s1, $s2), sub_lt)>; - defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETULE)), - (EXTRACT_SUBREG (Cmpl $s1, $s2), sub_gt)>; - defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETLE)), - (EXTRACT_SUBREG (Cmp $s1, $s2), sub_gt)>; - defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETNE)), - (EXTRACT_SUBREG (Cmp $s1, $s2), sub_eq)>; - - defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, ZExtTy:$imm, SETUGE)), - (EXTRACT_SUBREG (Cmpli $s1, imm:$imm), sub_lt)>; - defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, SExtTy:$imm, SETGE)), - (EXTRACT_SUBREG (Cmpi $s1, imm:$imm), sub_lt)>; - defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, ZExtTy:$imm, SETULE)), - (EXTRACT_SUBREG (Cmpli $s1, imm:$imm), sub_gt)>; - defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, SExtTy:$imm, SETLE)), - (EXTRACT_SUBREG (Cmpi $s1, imm:$imm), sub_gt)>; - defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, SExtTy:$imm, SETNE)), - (EXTRACT_SUBREG (Cmpi $s1, imm:$imm), sub_eq)>; - defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, ZExtTy:$imm, SETNE)), - (EXTRACT_SUBREG (Cmpli $s1, imm:$imm), sub_eq)>; -} - -multiclass FSetP10RevSetBool<SDNode SetCC, ValueType Ty, PatLeaf FCmp> { - defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETUGE)), - (EXTRACT_SUBREG (FCmp $s1, $s2), sub_lt)>; - defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETGE)), - (EXTRACT_SUBREG (FCmp $s1, $s2), sub_lt)>; - defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETULE)), - (EXTRACT_SUBREG (FCmp $s1, $s2), sub_gt)>; - defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETLE)), - (EXTRACT_SUBREG (FCmp $s1, $s2), sub_gt)>; - defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETUNE)), - (EXTRACT_SUBREG (FCmp $s1, $s2), sub_eq)>; - defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETNE)), - (EXTRACT_SUBREG (FCmp $s1, $s2), sub_eq)>; - defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETO)), - (EXTRACT_SUBREG (FCmp $s1, $s2), sub_un)>; -} - -let Predicates = [IsISA3_1] in { - def : Pat<(i32 (zext i1:$in)), - (SETBC $in)>; - def : Pat<(i64 (zext i1:$in)), - (SETBC8 $in)>; - def : Pat<(i32 (sext i1:$in)), - (SETNBC $in)>; - def : Pat<(i64 (sext i1:$in)), - (SETNBC8 $in)>; - def : Pat<(i32 (anyext i1:$in)), - (SETBC $in)>; - def : Pat<(i64 (anyext i1:$in)), - (SETBC8 $in)>; - - // Instantiation of the set boolean reverse patterns for 32-bit integers. - defm : IntSetP10RevSetBool<setcc, i32, immZExt16, imm32SExt16, - CMPWI, CMPLWI, CMPW, CMPLW>; - defm : P10ReverseSetBool<(i1 (setcc i32:$s1, imm:$imm, SETNE)), - (EXTRACT_SUBREG (CMPLWI (XORIS $s1, (HI16 imm:$imm)), - (LO16 imm:$imm)), sub_eq)>; - - // Instantiation of the set boolean reverse patterns for 64-bit integers. - defm : IntSetP10RevSetBool<setcc, i64, immZExt16, imm64SExt16, - CMPDI, CMPLDI, CMPD, CMPLD>; - defm : P10ReverseSetBool<(i1 (setcc i64:$s1, imm64ZExt32:$imm, SETNE)), - (EXTRACT_SUBREG (CMPLDI (XORIS8 $s1, (HI16 imm:$imm)), - (LO16 imm:$imm)), sub_eq)>; -} - -// Instantiation of the set boolean reverse patterns for f32, f64, f128. -let Predicates = [IsISA3_1, HasFPU] in { - defm : FSetP10RevSetBool<setcc, f32, FCMPUS>; - defm : FSetP10RevSetBool<setcc, f64, FCMPUD>; - defm : FSetP10RevSetBool<setcc, f128, XSCMPUQP>; -} - + + def VMULESD : VXForm_1<968, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vmulesd $vD, $vA, $vB", IIC_VecGeneral, + [(set v1i128:$vD, (int_ppc_altivec_vmulesd v2i64:$vA, + v2i64:$vB))]>; + def VMULEUD : VXForm_1<712, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vmuleud $vD, $vA, $vB", IIC_VecGeneral, + [(set v1i128:$vD, (int_ppc_altivec_vmuleud v2i64:$vA, + v2i64:$vB))]>; + def VMULOSD : VXForm_1<456, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vmulosd $vD, $vA, $vB", IIC_VecGeneral, + [(set v1i128:$vD, (int_ppc_altivec_vmulosd v2i64:$vA, + v2i64:$vB))]>; + def VMULOUD : VXForm_1<200, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vmuloud $vD, $vA, $vB", IIC_VecGeneral, + [(set v1i128:$vD, (int_ppc_altivec_vmuloud v2i64:$vA, + v2i64:$vB))]>; + def VMSUMCUD : VAForm_1a<23, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, vrrc:$vC), + "vmsumcud $vD, $vA, $vB, $vC", IIC_VecGeneral, + [(set v1i128:$vD, (int_ppc_altivec_vmsumcud + v2i64:$vA, v2i64:$vB, v1i128:$vC))]>; + def VDIVSQ : VXForm_1<267, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vdivsq $vD, $vA, $vB", IIC_VecGeneral, + [(set v1i128:$vD, (sdiv v1i128:$vA, v1i128:$vB))]>; + def VDIVUQ : VXForm_1<11, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vdivuq $vD, $vA, $vB", IIC_VecGeneral, + [(set v1i128:$vD, (udiv v1i128:$vA, v1i128:$vB))]>; + def VDIVESQ : VXForm_1<779, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vdivesq $vD, $vA, $vB", IIC_VecGeneral, + [(set v1i128:$vD, (int_ppc_altivec_vdivesq v1i128:$vA, + v1i128:$vB))]>; + def VDIVEUQ : VXForm_1<523, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vdiveuq $vD, $vA, $vB", IIC_VecGeneral, + [(set v1i128:$vD, (int_ppc_altivec_vdiveuq v1i128:$vA, + v1i128:$vB))]>; + def VCMPEQUQ : VCMP <455, "vcmpequq $vD, $vA, $vB" , v1i128>; + def VCMPGTSQ : VCMP <903, "vcmpgtsq $vD, $vA, $vB" , v1i128>; + def VCMPGTUQ : VCMP <647, "vcmpgtuq $vD, $vA, $vB" , v1i128>; + def VCMPEQUQ_rec : VCMP_rec <455, "vcmpequq. $vD, $vA, $vB" , v1i128>; + def VCMPGTSQ_rec : VCMP_rec <903, "vcmpgtsq. $vD, $vA, $vB" , v1i128>; + def VCMPGTUQ_rec : VCMP_rec <647, "vcmpgtuq. $vD, $vA, $vB" , v1i128>; + def VMODSQ : VXForm_1<1803, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vmodsq $vD, $vA, $vB", IIC_VecGeneral, + [(set v1i128:$vD, (srem v1i128:$vA, v1i128:$vB))]>; + def VMODUQ : VXForm_1<1547, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vmoduq $vD, $vA, $vB", IIC_VecGeneral, + [(set v1i128:$vD, (urem v1i128:$vA, v1i128:$vB))]>; + def VEXTSD2Q : VXForm_RD5_XO5_RS5<1538, 27, (outs vrrc:$vD), (ins vrrc:$vB), + "vextsd2q $vD, $vB", IIC_VecGeneral, + [(set v1i128:$vD, (int_ppc_altivec_vextsd2q v2i64:$vB))]>; + def VCMPUQ : VXForm_BF3_VAB5<257, (outs crrc:$BF), (ins vrrc:$vA, vrrc:$vB), + "vcmpuq $BF, $vA, $vB", IIC_VecGeneral, []>; + def VCMPSQ : VXForm_BF3_VAB5<321, (outs crrc:$BF), (ins vrrc:$vA, vrrc:$vB), + "vcmpsq $BF, $vA, $vB", IIC_VecGeneral, []>; + def VRLQNM : VX1_VT5_VA5_VB5<325, "vrlqnm", + [(set v1i128:$vD, + (int_ppc_altivec_vrlqnm v1i128:$vA, + v1i128:$vB))]>; + def VRLQMI : VXForm_1<69, (outs vrrc:$vD), + (ins vrrc:$vA, vrrc:$vB, vrrc:$vDi), + "vrlqmi $vD, $vA, $vB", IIC_VecFP, + [(set v1i128:$vD, + (int_ppc_altivec_vrlqmi v1i128:$vA, v1i128:$vB, + v1i128:$vDi))]>, + RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">; + def VSLQ : VX1_VT5_VA5_VB5<261, "vslq", []>; + def VSRAQ : VX1_VT5_VA5_VB5<773, "vsraq", []>; + def VSRQ : VX1_VT5_VA5_VB5<517, "vsrq", []>; + def VRLQ : VX1_VT5_VA5_VB5<5, "vrlq", []>; + def XSCVQPUQZ : X_VT5_XO5_VB5<63, 0, 836, "xscvqpuqz", []>; + def XSCVQPSQZ : X_VT5_XO5_VB5<63, 8, 836, "xscvqpsqz", []>; + def XSCVUQQP : X_VT5_XO5_VB5<63, 3, 836, "xscvuqqp", []>; + def XSCVSQQP : X_VT5_XO5_VB5<63, 11, 836, "xscvsqqp", []>; +} + +let Predicates = [IsISA3_1, HasVSX] in { + def XVCVSPBF16 : XX2_XT6_XO5_XB6<60, 17, 475, "xvcvspbf16", vsrc, []>; + def XVCVBF16SPN : XX2_XT6_XO5_XB6<60, 16, 475, "xvcvbf16spn", vsrc, []>; +} + +// Multiclass defining patterns for Set Boolean Extension Reverse Instructions. +// This is analogous to the CRNotPat multiclass but specifically for Power10 +// and newer subtargets since the extended forms use Set Boolean instructions. +// The first two anonymous patterns defined are actually a duplicate of those +// in CRNotPat, but it is preferable to define both multiclasses as complete +// ones rather than pulling that small common section out. +multiclass P10ReverseSetBool<dag pattern, dag result> { + def : Pat<pattern, (crnot result)>; + def : Pat<(not pattern), result>; + + def : Pat<(i32 (zext pattern)), + (SETBCR result)>; + def : Pat<(i64 (zext pattern)), + (SETBCR8 result)>; + + def : Pat<(i32 (sext pattern)), + (SETNBCR result)>; + def : Pat<(i64 (sext pattern)), + (SETNBCR8 result)>; + + def : Pat<(i32 (anyext pattern)), + (SETBCR result)>; + def : Pat<(i64 (anyext pattern)), + (SETBCR8 result)>; +} + +multiclass IntSetP10RevSetBool<SDNode SetCC, ValueType Ty, ImmLeaf ZExtTy, + ImmLeaf SExtTy, PatLeaf Cmpi, PatLeaf Cmpli, + PatLeaf Cmp, PatLeaf Cmpl> { + defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETUGE)), + (EXTRACT_SUBREG (Cmpl $s1, $s2), sub_lt)>; + defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETGE)), + (EXTRACT_SUBREG (Cmp $s1, $s2), sub_lt)>; + defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETULE)), + (EXTRACT_SUBREG (Cmpl $s1, $s2), sub_gt)>; + defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETLE)), + (EXTRACT_SUBREG (Cmp $s1, $s2), sub_gt)>; + defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETNE)), + (EXTRACT_SUBREG (Cmp $s1, $s2), sub_eq)>; + + defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, ZExtTy:$imm, SETUGE)), + (EXTRACT_SUBREG (Cmpli $s1, imm:$imm), sub_lt)>; + defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, SExtTy:$imm, SETGE)), + (EXTRACT_SUBREG (Cmpi $s1, imm:$imm), sub_lt)>; + defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, ZExtTy:$imm, SETULE)), + (EXTRACT_SUBREG (Cmpli $s1, imm:$imm), sub_gt)>; + defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, SExtTy:$imm, SETLE)), + (EXTRACT_SUBREG (Cmpi $s1, imm:$imm), sub_gt)>; + defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, SExtTy:$imm, SETNE)), + (EXTRACT_SUBREG (Cmpi $s1, imm:$imm), sub_eq)>; + defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, ZExtTy:$imm, SETNE)), + (EXTRACT_SUBREG (Cmpli $s1, imm:$imm), sub_eq)>; +} + +multiclass FSetP10RevSetBool<SDNode SetCC, ValueType Ty, PatLeaf FCmp> { + defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETUGE)), + (EXTRACT_SUBREG (FCmp $s1, $s2), sub_lt)>; + defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETGE)), + (EXTRACT_SUBREG (FCmp $s1, $s2), sub_lt)>; + defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETULE)), + (EXTRACT_SUBREG (FCmp $s1, $s2), sub_gt)>; + defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETLE)), + (EXTRACT_SUBREG (FCmp $s1, $s2), sub_gt)>; + defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETUNE)), + (EXTRACT_SUBREG (FCmp $s1, $s2), sub_eq)>; + defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETNE)), + (EXTRACT_SUBREG (FCmp $s1, $s2), sub_eq)>; + defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETO)), + (EXTRACT_SUBREG (FCmp $s1, $s2), sub_un)>; +} + +let Predicates = [IsISA3_1] in { + def : Pat<(i32 (zext i1:$in)), + (SETBC $in)>; + def : Pat<(i64 (zext i1:$in)), + (SETBC8 $in)>; + def : Pat<(i32 (sext i1:$in)), + (SETNBC $in)>; + def : Pat<(i64 (sext i1:$in)), + (SETNBC8 $in)>; + def : Pat<(i32 (anyext i1:$in)), + (SETBC $in)>; + def : Pat<(i64 (anyext i1:$in)), + (SETBC8 $in)>; + + // Instantiation of the set boolean reverse patterns for 32-bit integers. + defm : IntSetP10RevSetBool<setcc, i32, immZExt16, imm32SExt16, + CMPWI, CMPLWI, CMPW, CMPLW>; + defm : P10ReverseSetBool<(i1 (setcc i32:$s1, imm:$imm, SETNE)), + (EXTRACT_SUBREG (CMPLWI (XORIS $s1, (HI16 imm:$imm)), + (LO16 imm:$imm)), sub_eq)>; + + // Instantiation of the set boolean reverse patterns for 64-bit integers. + defm : IntSetP10RevSetBool<setcc, i64, immZExt16, imm64SExt16, + CMPDI, CMPLDI, CMPD, CMPLD>; + defm : P10ReverseSetBool<(i1 (setcc i64:$s1, imm64ZExt32:$imm, SETNE)), + (EXTRACT_SUBREG (CMPLDI (XORIS8 $s1, (HI16 imm:$imm)), + (LO16 imm:$imm)), sub_eq)>; +} + +// Instantiation of the set boolean reverse patterns for f32, f64, f128. +let Predicates = [IsISA3_1, HasFPU] in { + defm : FSetP10RevSetBool<setcc, f32, FCMPUS>; + defm : FSetP10RevSetBool<setcc, f64, FCMPUD>; + defm : FSetP10RevSetBool<setcc, f128, XSCMPUQP>; +} + //---------------------------- Anonymous Patterns ----------------------------// let Predicates = [IsISA3_1] in { - // Exploit the vector multiply high instructions using intrinsics. - def : Pat<(v4i32 (int_ppc_altivec_vmulhsw v4i32:$vA, v4i32:$vB)), - (v4i32 (VMULHSW $vA, $vB))>; - def : Pat<(v4i32 (int_ppc_altivec_vmulhuw v4i32:$vA, v4i32:$vB)), - (v4i32 (VMULHUW $vA, $vB))>; - def : Pat<(v2i64 (int_ppc_altivec_vmulhsd v2i64:$vA, v2i64:$vB)), - (v2i64 (VMULHSD $vA, $vB))>; - def : Pat<(v2i64 (int_ppc_altivec_vmulhud v2i64:$vA, v2i64:$vB)), - (v2i64 (VMULHUD $vA, $vB))>; + // Exploit the vector multiply high instructions using intrinsics. + def : Pat<(v4i32 (int_ppc_altivec_vmulhsw v4i32:$vA, v4i32:$vB)), + (v4i32 (VMULHSW $vA, $vB))>; + def : Pat<(v4i32 (int_ppc_altivec_vmulhuw v4i32:$vA, v4i32:$vB)), + (v4i32 (VMULHUW $vA, $vB))>; + def : Pat<(v2i64 (int_ppc_altivec_vmulhsd v2i64:$vA, v2i64:$vB)), + (v2i64 (VMULHSD $vA, $vB))>; + def : Pat<(v2i64 (int_ppc_altivec_vmulhud v2i64:$vA, v2i64:$vB)), + (v2i64 (VMULHUD $vA, $vB))>; def : Pat<(v16i8 (int_ppc_vsx_xxgenpcvbm v16i8:$VRB, imm:$IMM)), (v16i8 (COPY_TO_REGCLASS (XXGENPCVBM $VRB, imm:$IMM), VRRC))>; def : Pat<(v8i16 (int_ppc_vsx_xxgenpcvhm v8i16:$VRB, imm:$IMM)), @@ -2517,82 +2517,82 @@ let Predicates = [IsISA3_1] in { (v4i32 (COPY_TO_REGCLASS (XXGENPCVWM $VRB, imm:$IMM), VRRC))>; def : Pat<(v2i64 (int_ppc_vsx_xxgenpcvdm v2i64:$VRB, imm:$IMM)), (v2i64 (COPY_TO_REGCLASS (XXGENPCVDM $VRB, imm:$IMM), VRRC))>; - def : Pat<(i32 (int_ppc_vsx_xvtlsbb v16i8:$XB, 1)), + def : Pat<(i32 (int_ppc_vsx_xvtlsbb v16i8:$XB, 1)), (EXTRACT_SUBREG (XVTLSBB (COPY_TO_REGCLASS $XB, VSRC)), sub_lt)>; def : Pat<(i32 (int_ppc_vsx_xvtlsbb v16i8:$XB, 0)), (EXTRACT_SUBREG (XVTLSBB (COPY_TO_REGCLASS $XB, VSRC)), sub_eq)>; - - def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 8)), - (v1i128 (COPY_TO_REGCLASS (LXVRBX xoaddr:$src), VRRC))>; - def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 16)), - (v1i128 (COPY_TO_REGCLASS (LXVRHX xoaddr:$src), VRRC))>; - def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 32)), - (v1i128 (COPY_TO_REGCLASS (LXVRWX xoaddr:$src), VRRC))>; - def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 64)), - (v1i128 (COPY_TO_REGCLASS (LXVRDX xoaddr:$src), VRRC))>; - - def : Pat<(v1i128 (rotl v1i128:$vA, v1i128:$vB)), - (v1i128 (VRLQ v1i128:$vA, v1i128:$vB))>; - - def : Pat <(v2i64 (PPCxxsplti32dx v2i64:$XT, i32:$XI, i32:$IMM32)), - (v2i64 (XXSPLTI32DX v2i64:$XT, i32:$XI, i32:$IMM32))>; -} - -let Predicates = [IsISA3_1, HasVSX] in { - def : Pat<(v16i8 (int_ppc_vsx_xvcvspbf16 v16i8:$XA)), - (COPY_TO_REGCLASS (XVCVSPBF16 RCCp.AToVSRC), VRRC)>; - def : Pat<(v16i8 (int_ppc_vsx_xvcvbf16spn v16i8:$XA)), - (COPY_TO_REGCLASS (XVCVBF16SPN RCCp.AToVSRC), VRRC)>; -} - -let AddedComplexity = 400, Predicates = [IsISA3_1, IsLittleEndian] in { - // Store element 0 of a VSX register to memory - def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$src, 0)), xoaddr:$dst), - (STXVRBX (COPY_TO_REGCLASS v16i8:$src, VSRC), xoaddr:$dst)>; - def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$src, 0)), xoaddr:$dst), - (STXVRHX (COPY_TO_REGCLASS v8i16:$src, VSRC), xoaddr:$dst)>; - def : Pat<(store (i32 (extractelt v4i32:$src, 0)), xoaddr:$dst), - (STXVRWX $src, xoaddr:$dst)>; - def : Pat<(store (f32 (extractelt v4f32:$src, 0)), xoaddr:$dst), - (STXVRWX $src, xoaddr:$dst)>; - def : Pat<(store (i64 (extractelt v2i64:$src, 0)), xoaddr:$dst), - (STXVRDX $src, xoaddr:$dst)>; - def : Pat<(store (f64 (extractelt v2f64:$src, 0)), xoaddr:$dst), - (STXVRDX $src, xoaddr:$dst)>; - } - -// FIXME: The swap is overkill when the shift amount is a constant. -// We should just fix the constant in the DAG. -let AddedComplexity = 400, Predicates = [IsISA3_1, HasVSX] in { - def : Pat<(v1i128 (shl v1i128:$VRA, v1i128:$VRB)), - (v1i128 (VSLQ v1i128:$VRA, - (XXPERMDI (COPY_TO_REGCLASS $VRB, VSRC), - (COPY_TO_REGCLASS $VRB, VSRC), 2)))>; - def : Pat<(v1i128 (PPCshl v1i128:$VRA, v1i128:$VRB)), - (v1i128 (VSLQ v1i128:$VRA, - (XXPERMDI (COPY_TO_REGCLASS $VRB, VSRC), - (COPY_TO_REGCLASS $VRB, VSRC), 2)))>; - def : Pat<(v1i128 (srl v1i128:$VRA, v1i128:$VRB)), - (v1i128 (VSRQ v1i128:$VRA, - (XXPERMDI (COPY_TO_REGCLASS $VRB, VSRC), - (COPY_TO_REGCLASS $VRB, VSRC), 2)))>; - def : Pat<(v1i128 (PPCsrl v1i128:$VRA, v1i128:$VRB)), - (v1i128 (VSRQ v1i128:$VRA, - (XXPERMDI (COPY_TO_REGCLASS $VRB, VSRC), - (COPY_TO_REGCLASS $VRB, VSRC), 2)))>; - def : Pat<(v1i128 (sra v1i128:$VRA, v1i128:$VRB)), - (v1i128 (VSRAQ v1i128:$VRA, - (XXPERMDI (COPY_TO_REGCLASS $VRB, VSRC), - (COPY_TO_REGCLASS $VRB, VSRC), 2)))>; - def : Pat<(v1i128 (PPCsra v1i128:$VRA, v1i128:$VRB)), - (v1i128 (VSRAQ v1i128:$VRA, - (XXPERMDI (COPY_TO_REGCLASS $VRB, VSRC), - (COPY_TO_REGCLASS $VRB, VSRC), 2)))>; -} - -class xxevalPattern <dag pattern, bits<8> imm> : - Pat<(v4i32 pattern), (XXEVAL $vA, $vB, $vC, imm)> {} - + + def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 8)), + (v1i128 (COPY_TO_REGCLASS (LXVRBX xoaddr:$src), VRRC))>; + def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 16)), + (v1i128 (COPY_TO_REGCLASS (LXVRHX xoaddr:$src), VRRC))>; + def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 32)), + (v1i128 (COPY_TO_REGCLASS (LXVRWX xoaddr:$src), VRRC))>; + def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 64)), + (v1i128 (COPY_TO_REGCLASS (LXVRDX xoaddr:$src), VRRC))>; + + def : Pat<(v1i128 (rotl v1i128:$vA, v1i128:$vB)), + (v1i128 (VRLQ v1i128:$vA, v1i128:$vB))>; + + def : Pat <(v2i64 (PPCxxsplti32dx v2i64:$XT, i32:$XI, i32:$IMM32)), + (v2i64 (XXSPLTI32DX v2i64:$XT, i32:$XI, i32:$IMM32))>; +} + +let Predicates = [IsISA3_1, HasVSX] in { + def : Pat<(v16i8 (int_ppc_vsx_xvcvspbf16 v16i8:$XA)), + (COPY_TO_REGCLASS (XVCVSPBF16 RCCp.AToVSRC), VRRC)>; + def : Pat<(v16i8 (int_ppc_vsx_xvcvbf16spn v16i8:$XA)), + (COPY_TO_REGCLASS (XVCVBF16SPN RCCp.AToVSRC), VRRC)>; +} + +let AddedComplexity = 400, Predicates = [IsISA3_1, IsLittleEndian] in { + // Store element 0 of a VSX register to memory + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$src, 0)), xoaddr:$dst), + (STXVRBX (COPY_TO_REGCLASS v16i8:$src, VSRC), xoaddr:$dst)>; + def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$src, 0)), xoaddr:$dst), + (STXVRHX (COPY_TO_REGCLASS v8i16:$src, VSRC), xoaddr:$dst)>; + def : Pat<(store (i32 (extractelt v4i32:$src, 0)), xoaddr:$dst), + (STXVRWX $src, xoaddr:$dst)>; + def : Pat<(store (f32 (extractelt v4f32:$src, 0)), xoaddr:$dst), + (STXVRWX $src, xoaddr:$dst)>; + def : Pat<(store (i64 (extractelt v2i64:$src, 0)), xoaddr:$dst), + (STXVRDX $src, xoaddr:$dst)>; + def : Pat<(store (f64 (extractelt v2f64:$src, 0)), xoaddr:$dst), + (STXVRDX $src, xoaddr:$dst)>; + } + +// FIXME: The swap is overkill when the shift amount is a constant. +// We should just fix the constant in the DAG. +let AddedComplexity = 400, Predicates = [IsISA3_1, HasVSX] in { + def : Pat<(v1i128 (shl v1i128:$VRA, v1i128:$VRB)), + (v1i128 (VSLQ v1i128:$VRA, + (XXPERMDI (COPY_TO_REGCLASS $VRB, VSRC), + (COPY_TO_REGCLASS $VRB, VSRC), 2)))>; + def : Pat<(v1i128 (PPCshl v1i128:$VRA, v1i128:$VRB)), + (v1i128 (VSLQ v1i128:$VRA, + (XXPERMDI (COPY_TO_REGCLASS $VRB, VSRC), + (COPY_TO_REGCLASS $VRB, VSRC), 2)))>; + def : Pat<(v1i128 (srl v1i128:$VRA, v1i128:$VRB)), + (v1i128 (VSRQ v1i128:$VRA, + (XXPERMDI (COPY_TO_REGCLASS $VRB, VSRC), + (COPY_TO_REGCLASS $VRB, VSRC), 2)))>; + def : Pat<(v1i128 (PPCsrl v1i128:$VRA, v1i128:$VRB)), + (v1i128 (VSRQ v1i128:$VRA, + (XXPERMDI (COPY_TO_REGCLASS $VRB, VSRC), + (COPY_TO_REGCLASS $VRB, VSRC), 2)))>; + def : Pat<(v1i128 (sra v1i128:$VRA, v1i128:$VRB)), + (v1i128 (VSRAQ v1i128:$VRA, + (XXPERMDI (COPY_TO_REGCLASS $VRB, VSRC), + (COPY_TO_REGCLASS $VRB, VSRC), 2)))>; + def : Pat<(v1i128 (PPCsra v1i128:$VRA, v1i128:$VRB)), + (v1i128 (VSRAQ v1i128:$VRA, + (XXPERMDI (COPY_TO_REGCLASS $VRB, VSRC), + (COPY_TO_REGCLASS $VRB, VSRC), 2)))>; +} + +class xxevalPattern <dag pattern, bits<8> imm> : + Pat<(v4i32 pattern), (XXEVAL $vA, $vB, $vC, imm)> {} + let AddedComplexity = 400, Predicates = [PrefixInstrs] in { def : Pat<(v4i32 (build_vector i32immNonAllOneNonZero:$A, i32immNonAllOneNonZero:$A, @@ -2605,44 +2605,44 @@ let AddedComplexity = 400, Predicates = [PrefixInstrs] in { def : Pat<(f64 nzFPImmAsi32:$A), (COPY_TO_REGCLASS (XXSPLTIDP (getFPAs32BitInt fpimm:$A)), VSFRC)>; - - // Anonymous patterns for XXEVAL - // AND - // and(A, B, C) - def : xxevalPattern<(and v4i32:$vA, (and v4i32:$vB, v4i32:$vC)), 1>; - // and(A, xor(B, C)) - def : xxevalPattern<(and v4i32:$vA, (xor v4i32:$vB, v4i32:$vC)), 6>; - // and(A, or(B, C)) - def : xxevalPattern<(and v4i32:$vA, (or v4i32:$vB, v4i32:$vC)), 7>; - // and(A, nor(B, C)) - def : xxevalPattern<(and v4i32:$vA, (vnot_ppc (or v4i32:$vB, v4i32:$vC))), - 8>; - // and(A, eqv(B, C)) - def : xxevalPattern<(and v4i32:$vA, (vnot_ppc (xor v4i32:$vB, v4i32:$vC))), - 9>; - // and(A, nand(B, C)) - def : xxevalPattern<(and v4i32:$vA, (vnot_ppc (and v4i32:$vB, v4i32:$vC))), - 14>; - - // NAND - // nand(A, B, C) - def : xxevalPattern<(vnot_ppc (and v4i32:$vA, (and v4i32:$vB, v4i32:$vC))), - !sub(255, 1)>; - // nand(A, xor(B, C)) - def : xxevalPattern<(vnot_ppc (and v4i32:$vA, (xor v4i32:$vB, v4i32:$vC))), - !sub(255, 6)>; - // nand(A, or(B, C)) - def : xxevalPattern<(vnot_ppc (and v4i32:$vA, (or v4i32:$vB, v4i32:$vC))), - !sub(255, 7)>; - // nand(A, nor(B, C)) - def : xxevalPattern<(or (vnot_ppc v4i32:$vA), (or v4i32:$vB, v4i32:$vC)), - !sub(255, 8)>; - // nand(A, eqv(B, C)) - def : xxevalPattern<(or (vnot_ppc v4i32:$vA), (xor v4i32:$vB, v4i32:$vC)), - !sub(255, 9)>; - // nand(A, nand(B, C)) - def : xxevalPattern<(or (vnot_ppc v4i32:$vA), (and v4i32:$vB, v4i32:$vC)), - !sub(255, 14)>; + + // Anonymous patterns for XXEVAL + // AND + // and(A, B, C) + def : xxevalPattern<(and v4i32:$vA, (and v4i32:$vB, v4i32:$vC)), 1>; + // and(A, xor(B, C)) + def : xxevalPattern<(and v4i32:$vA, (xor v4i32:$vB, v4i32:$vC)), 6>; + // and(A, or(B, C)) + def : xxevalPattern<(and v4i32:$vA, (or v4i32:$vB, v4i32:$vC)), 7>; + // and(A, nor(B, C)) + def : xxevalPattern<(and v4i32:$vA, (vnot_ppc (or v4i32:$vB, v4i32:$vC))), + 8>; + // and(A, eqv(B, C)) + def : xxevalPattern<(and v4i32:$vA, (vnot_ppc (xor v4i32:$vB, v4i32:$vC))), + 9>; + // and(A, nand(B, C)) + def : xxevalPattern<(and v4i32:$vA, (vnot_ppc (and v4i32:$vB, v4i32:$vC))), + 14>; + + // NAND + // nand(A, B, C) + def : xxevalPattern<(vnot_ppc (and v4i32:$vA, (and v4i32:$vB, v4i32:$vC))), + !sub(255, 1)>; + // nand(A, xor(B, C)) + def : xxevalPattern<(vnot_ppc (and v4i32:$vA, (xor v4i32:$vB, v4i32:$vC))), + !sub(255, 6)>; + // nand(A, or(B, C)) + def : xxevalPattern<(vnot_ppc (and v4i32:$vA, (or v4i32:$vB, v4i32:$vC))), + !sub(255, 7)>; + // nand(A, nor(B, C)) + def : xxevalPattern<(or (vnot_ppc v4i32:$vA), (or v4i32:$vB, v4i32:$vC)), + !sub(255, 8)>; + // nand(A, eqv(B, C)) + def : xxevalPattern<(or (vnot_ppc v4i32:$vA), (xor v4i32:$vB, v4i32:$vC)), + !sub(255, 9)>; + // nand(A, nand(B, C)) + def : xxevalPattern<(or (vnot_ppc v4i32:$vA), (and v4i32:$vB, v4i32:$vC)), + !sub(255, 14)>; } let Predicates = [PrefixInstrs] in { diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/PPCInstrVSX.td b/contrib/libs/llvm12/lib/Target/PowerPC/PPCInstrVSX.td index 563da075ba..db6e00c71b 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/PPCInstrVSX.td +++ b/contrib/libs/llvm12/lib/Target/PowerPC/PPCInstrVSX.td @@ -145,7 +145,7 @@ def PPCSToV : SDNode<"PPCISD::SCALAR_TO_VECTOR_PERMUTED", def HasVSX : Predicate<"Subtarget->hasVSX()">; def IsLittleEndian : Predicate<"Subtarget->isLittleEndian()">; def IsBigEndian : Predicate<"!Subtarget->isLittleEndian()">; -def IsPPC64 : Predicate<"Subtarget->isPPC64()">; +def IsPPC64 : Predicate<"Subtarget->isPPC64()">; def HasOnlySwappingMemOps : Predicate<"!Subtarget->hasP9Vector()">; def HasP8Vector : Predicate<"Subtarget->hasP8Vector()">; def HasDirectMove : Predicate<"Subtarget->hasDirectMove()">; @@ -168,7 +168,7 @@ multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, string asmbase, def _rec : XX3Form_Rc<opcode, xo, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), !strconcat(asmbase, !strconcat(". ", asmstr)), itin, [(set InTy:$XT, - (InTy (PPCvcmp_rec InTy:$XA, InTy:$XB, xo)))]>, + (InTy (PPCvcmp_rec InTy:$XA, InTy:$XB, xo)))]>, isRecordForm; } } @@ -363,8 +363,8 @@ let hasSideEffects = 0 in { } } // mayStore - let mayRaiseFPException = 1 in { - let Uses = [RM] in { + let mayRaiseFPException = 1 in { + let Uses = [RM] in { // Add/Mul Instructions let isCommutable = 1 in { def XSADDDP : XX3Form<60, 32, @@ -624,31 +624,31 @@ let hasSideEffects = 0 in { "xsrsqrtedp $XT, $XB", IIC_VecFP, [(set f64:$XT, (PPCfrsqrte f64:$XB))]>; - let mayRaiseFPException = 0 in { + let mayRaiseFPException = 0 in { def XSTDIVDP : XX3Form_1<60, 61, (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB), "xstdivdp $crD, $XA, $XB", IIC_FPCompare, []>; def XSTSQRTDP : XX2Form_1<60, 106, (outs crrc:$crD), (ins vsfrc:$XB), - "xstsqrtdp $crD, $XB", IIC_FPCompare, - [(set i32:$crD, (PPCftsqrt f64:$XB))]>; - def XVTDIVDP : XX3Form_1<60, 125, - (outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB), - "xvtdivdp $crD, $XA, $XB", IIC_FPCompare, []>; - def XVTDIVSP : XX3Form_1<60, 93, - (outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB), - "xvtdivsp $crD, $XA, $XB", IIC_FPCompare, []>; - - def XVTSQRTDP : XX2Form_1<60, 234, - (outs crrc:$crD), (ins vsrc:$XB), - "xvtsqrtdp $crD, $XB", IIC_FPCompare, - [(set i32:$crD, (PPCftsqrt v2f64:$XB))]>; - def XVTSQRTSP : XX2Form_1<60, 170, - (outs crrc:$crD), (ins vsrc:$XB), - "xvtsqrtsp $crD, $XB", IIC_FPCompare, - [(set i32:$crD, (PPCftsqrt v4f32:$XB))]>; - } - + "xstsqrtdp $crD, $XB", IIC_FPCompare, + [(set i32:$crD, (PPCftsqrt f64:$XB))]>; + def XVTDIVDP : XX3Form_1<60, 125, + (outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB), + "xvtdivdp $crD, $XA, $XB", IIC_FPCompare, []>; + def XVTDIVSP : XX3Form_1<60, 93, + (outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB), + "xvtdivsp $crD, $XA, $XB", IIC_FPCompare, []>; + + def XVTSQRTDP : XX2Form_1<60, 234, + (outs crrc:$crD), (ins vsrc:$XB), + "xvtsqrtdp $crD, $XB", IIC_FPCompare, + [(set i32:$crD, (PPCftsqrt v2f64:$XB))]>; + def XVTSQRTSP : XX2Form_1<60, 170, + (outs crrc:$crD), (ins vsrc:$XB), + "xvtsqrtsp $crD, $XB", IIC_FPCompare, + [(set i32:$crD, (PPCftsqrt v4f32:$XB))]>; + } + def XVDIVDP : XX3Form<60, 120, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), "xvdivdp $XT, $XA, $XB", IIC_FPDivD, @@ -713,7 +713,7 @@ let hasSideEffects = 0 in { int_ppc_vsx_xvcmpgtsp, v4i32, v4f32>; // Move Instructions - let mayRaiseFPException = 0 in { + let mayRaiseFPException = 0 in { def XSABSDP : XX2Form<60, 345, (outs vsfrc:$XT), (ins vsfrc:$XB), "xsabsdp $XT, $XB", IIC_VecFP, @@ -767,7 +767,7 @@ let hasSideEffects = 0 in { (outs vsrc:$XT), (ins vsrc:$XB), "xvnegsp $XT, $XB", IIC_VecFP, [(set v4f32:$XT, (fneg v4f32:$XB))]>; - } + } // Conversion Instructions def XSCVDPSP : XX2Form<60, 265, @@ -776,50 +776,50 @@ let hasSideEffects = 0 in { def XSCVDPSXDS : XX2Form<60, 344, (outs vsfrc:$XT), (ins vsfrc:$XB), "xscvdpsxds $XT, $XB", IIC_VecFP, - [(set f64:$XT, (PPCany_fctidz f64:$XB))]>; + [(set f64:$XT, (PPCany_fctidz f64:$XB))]>; let isCodeGenOnly = 1 in def XSCVDPSXDSs : XX2Form<60, 344, (outs vssrc:$XT), (ins vssrc:$XB), "xscvdpsxds $XT, $XB", IIC_VecFP, - [(set f32:$XT, (PPCany_fctidz f32:$XB))]>; + [(set f32:$XT, (PPCany_fctidz f32:$XB))]>; def XSCVDPSXWS : XX2Form<60, 88, (outs vsfrc:$XT), (ins vsfrc:$XB), "xscvdpsxws $XT, $XB", IIC_VecFP, - [(set f64:$XT, (PPCany_fctiwz f64:$XB))]>; + [(set f64:$XT, (PPCany_fctiwz f64:$XB))]>; let isCodeGenOnly = 1 in def XSCVDPSXWSs : XX2Form<60, 88, (outs vssrc:$XT), (ins vssrc:$XB), "xscvdpsxws $XT, $XB", IIC_VecFP, - [(set f32:$XT, (PPCany_fctiwz f32:$XB))]>; + [(set f32:$XT, (PPCany_fctiwz f32:$XB))]>; def XSCVDPUXDS : XX2Form<60, 328, (outs vsfrc:$XT), (ins vsfrc:$XB), "xscvdpuxds $XT, $XB", IIC_VecFP, - [(set f64:$XT, (PPCany_fctiduz f64:$XB))]>; + [(set f64:$XT, (PPCany_fctiduz f64:$XB))]>; let isCodeGenOnly = 1 in def XSCVDPUXDSs : XX2Form<60, 328, (outs vssrc:$XT), (ins vssrc:$XB), "xscvdpuxds $XT, $XB", IIC_VecFP, - [(set f32:$XT, (PPCany_fctiduz f32:$XB))]>; + [(set f32:$XT, (PPCany_fctiduz f32:$XB))]>; def XSCVDPUXWS : XX2Form<60, 72, (outs vsfrc:$XT), (ins vsfrc:$XB), "xscvdpuxws $XT, $XB", IIC_VecFP, - [(set f64:$XT, (PPCany_fctiwuz f64:$XB))]>; + [(set f64:$XT, (PPCany_fctiwuz f64:$XB))]>; let isCodeGenOnly = 1 in def XSCVDPUXWSs : XX2Form<60, 72, (outs vssrc:$XT), (ins vssrc:$XB), "xscvdpuxws $XT, $XB", IIC_VecFP, - [(set f32:$XT, (PPCany_fctiwuz f32:$XB))]>; + [(set f32:$XT, (PPCany_fctiwuz f32:$XB))]>; def XSCVSPDP : XX2Form<60, 329, (outs vsfrc:$XT), (ins vsfrc:$XB), "xscvspdp $XT, $XB", IIC_VecFP, []>; def XSCVSXDDP : XX2Form<60, 376, (outs vsfrc:$XT), (ins vsfrc:$XB), "xscvsxddp $XT, $XB", IIC_VecFP, - [(set f64:$XT, (PPCany_fcfid f64:$XB))]>; + [(set f64:$XT, (PPCany_fcfid f64:$XB))]>; def XSCVUXDDP : XX2Form<60, 360, (outs vsfrc:$XT), (ins vsfrc:$XB), "xscvuxddp $XT, $XB", IIC_VecFP, - [(set f64:$XT, (PPCany_fcfidu f64:$XB))]>; + [(set f64:$XT, (PPCany_fcfidu f64:$XB))]>; def XVCVDPSP : XX2Form<60, 393, (outs vsrc:$XT), (ins vsrc:$XB), @@ -828,7 +828,7 @@ let hasSideEffects = 0 in { def XVCVDPSXDS : XX2Form<60, 472, (outs vsrc:$XT), (ins vsrc:$XB), "xvcvdpsxds $XT, $XB", IIC_VecFP, - [(set v2i64:$XT, (any_fp_to_sint v2f64:$XB))]>; + [(set v2i64:$XT, (any_fp_to_sint v2f64:$XB))]>; def XVCVDPSXWS : XX2Form<60, 216, (outs vsrc:$XT), (ins vsrc:$XB), "xvcvdpsxws $XT, $XB", IIC_VecFP, @@ -836,7 +836,7 @@ let hasSideEffects = 0 in { def XVCVDPUXDS : XX2Form<60, 456, (outs vsrc:$XT), (ins vsrc:$XB), "xvcvdpuxds $XT, $XB", IIC_VecFP, - [(set v2i64:$XT, (any_fp_to_uint v2f64:$XB))]>; + [(set v2i64:$XT, (any_fp_to_uint v2f64:$XB))]>; def XVCVDPUXWS : XX2Form<60, 200, (outs vsrc:$XT), (ins vsrc:$XB), "xvcvdpuxws $XT, $XB", IIC_VecFP, @@ -852,18 +852,18 @@ let hasSideEffects = 0 in { def XVCVSPSXWS : XX2Form<60, 152, (outs vsrc:$XT), (ins vsrc:$XB), "xvcvspsxws $XT, $XB", IIC_VecFP, - [(set v4i32:$XT, (any_fp_to_sint v4f32:$XB))]>; + [(set v4i32:$XT, (any_fp_to_sint v4f32:$XB))]>; def XVCVSPUXDS : XX2Form<60, 392, (outs vsrc:$XT), (ins vsrc:$XB), "xvcvspuxds $XT, $XB", IIC_VecFP, []>; def XVCVSPUXWS : XX2Form<60, 136, (outs vsrc:$XT), (ins vsrc:$XB), "xvcvspuxws $XT, $XB", IIC_VecFP, - [(set v4i32:$XT, (any_fp_to_uint v4f32:$XB))]>; + [(set v4i32:$XT, (any_fp_to_uint v4f32:$XB))]>; def XVCVSXDDP : XX2Form<60, 504, (outs vsrc:$XT), (ins vsrc:$XB), "xvcvsxddp $XT, $XB", IIC_VecFP, - [(set v2f64:$XT, (any_sint_to_fp v2i64:$XB))]>; + [(set v2f64:$XT, (any_sint_to_fp v2i64:$XB))]>; def XVCVSXDSP : XX2Form<60, 440, (outs vsrc:$XT), (ins vsrc:$XB), "xvcvsxdsp $XT, $XB", IIC_VecFP, @@ -871,82 +871,82 @@ let hasSideEffects = 0 in { def XVCVSXWSP : XX2Form<60, 184, (outs vsrc:$XT), (ins vsrc:$XB), "xvcvsxwsp $XT, $XB", IIC_VecFP, - [(set v4f32:$XT, (any_sint_to_fp v4i32:$XB))]>; + [(set v4f32:$XT, (any_sint_to_fp v4i32:$XB))]>; def XVCVUXDDP : XX2Form<60, 488, (outs vsrc:$XT), (ins vsrc:$XB), "xvcvuxddp $XT, $XB", IIC_VecFP, - [(set v2f64:$XT, (any_uint_to_fp v2i64:$XB))]>; + [(set v2f64:$XT, (any_uint_to_fp v2i64:$XB))]>; def XVCVUXDSP : XX2Form<60, 424, (outs vsrc:$XT), (ins vsrc:$XB), "xvcvuxdsp $XT, $XB", IIC_VecFP, [(set v4f32:$XT, (int_ppc_vsx_xvcvuxdsp v2i64:$XB))]>; - def XVCVUXWSP : XX2Form<60, 168, - (outs vsrc:$XT), (ins vsrc:$XB), - "xvcvuxwsp $XT, $XB", IIC_VecFP, - [(set v4f32:$XT, (any_uint_to_fp v4i32:$XB))]>; - - let mayRaiseFPException = 0 in { - def XVCVSXWDP : XX2Form<60, 248, - (outs vsrc:$XT), (ins vsrc:$XB), - "xvcvsxwdp $XT, $XB", IIC_VecFP, - [(set v2f64:$XT, (int_ppc_vsx_xvcvsxwdp v4i32:$XB))]>; + def XVCVUXWSP : XX2Form<60, 168, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvcvuxwsp $XT, $XB", IIC_VecFP, + [(set v4f32:$XT, (any_uint_to_fp v4i32:$XB))]>; + + let mayRaiseFPException = 0 in { + def XVCVSXWDP : XX2Form<60, 248, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvcvsxwdp $XT, $XB", IIC_VecFP, + [(set v2f64:$XT, (int_ppc_vsx_xvcvsxwdp v4i32:$XB))]>; def XVCVUXWDP : XX2Form<60, 232, (outs vsrc:$XT), (ins vsrc:$XB), "xvcvuxwdp $XT, $XB", IIC_VecFP, [(set v2f64:$XT, (int_ppc_vsx_xvcvuxwdp v4i32:$XB))]>; - } - - // Rounding Instructions respecting current rounding mode - def XSRDPIC : XX2Form<60, 107, - (outs vsfrc:$XT), (ins vsfrc:$XB), - "xsrdpic $XT, $XB", IIC_VecFP, - [(set f64:$XT, (fnearbyint f64:$XB))]>; - def XVRDPIC : XX2Form<60, 235, + } + + // Rounding Instructions respecting current rounding mode + def XSRDPIC : XX2Form<60, 107, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xsrdpic $XT, $XB", IIC_VecFP, + [(set f64:$XT, (fnearbyint f64:$XB))]>; + def XVRDPIC : XX2Form<60, 235, (outs vsrc:$XT), (ins vsrc:$XB), - "xvrdpic $XT, $XB", IIC_VecFP, - [(set v2f64:$XT, (fnearbyint v2f64:$XB))]>; - def XVRSPIC : XX2Form<60, 171, - (outs vsrc:$XT), (ins vsrc:$XB), - "xvrspic $XT, $XB", IIC_VecFP, - [(set v4f32:$XT, (fnearbyint v4f32:$XB))]>; - // Max/Min Instructions - let isCommutable = 1 in { - def XSMAXDP : XX3Form<60, 160, - (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB), - "xsmaxdp $XT, $XA, $XB", IIC_VecFP, - [(set vsfrc:$XT, - (int_ppc_vsx_xsmaxdp vsfrc:$XA, vsfrc:$XB))]>; - def XSMINDP : XX3Form<60, 168, - (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB), - "xsmindp $XT, $XA, $XB", IIC_VecFP, - [(set vsfrc:$XT, - (int_ppc_vsx_xsmindp vsfrc:$XA, vsfrc:$XB))]>; - - def XVMAXDP : XX3Form<60, 224, - (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), - "xvmaxdp $XT, $XA, $XB", IIC_VecFP, - [(set vsrc:$XT, - (int_ppc_vsx_xvmaxdp vsrc:$XA, vsrc:$XB))]>; - def XVMINDP : XX3Form<60, 232, - (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), - "xvmindp $XT, $XA, $XB", IIC_VecFP, - [(set vsrc:$XT, - (int_ppc_vsx_xvmindp vsrc:$XA, vsrc:$XB))]>; - - def XVMAXSP : XX3Form<60, 192, - (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), - "xvmaxsp $XT, $XA, $XB", IIC_VecFP, - [(set vsrc:$XT, - (int_ppc_vsx_xvmaxsp vsrc:$XA, vsrc:$XB))]>; - def XVMINSP : XX3Form<60, 200, - (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), - "xvminsp $XT, $XA, $XB", IIC_VecFP, - [(set vsrc:$XT, - (int_ppc_vsx_xvminsp vsrc:$XA, vsrc:$XB))]>; - } // isCommutable - } // Uses = [RM] - - // Rounding Instructions with static direction. + "xvrdpic $XT, $XB", IIC_VecFP, + [(set v2f64:$XT, (fnearbyint v2f64:$XB))]>; + def XVRSPIC : XX2Form<60, 171, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvrspic $XT, $XB", IIC_VecFP, + [(set v4f32:$XT, (fnearbyint v4f32:$XB))]>; + // Max/Min Instructions + let isCommutable = 1 in { + def XSMAXDP : XX3Form<60, 160, + (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB), + "xsmaxdp $XT, $XA, $XB", IIC_VecFP, + [(set vsfrc:$XT, + (int_ppc_vsx_xsmaxdp vsfrc:$XA, vsfrc:$XB))]>; + def XSMINDP : XX3Form<60, 168, + (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB), + "xsmindp $XT, $XA, $XB", IIC_VecFP, + [(set vsfrc:$XT, + (int_ppc_vsx_xsmindp vsfrc:$XA, vsfrc:$XB))]>; + + def XVMAXDP : XX3Form<60, 224, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvmaxdp $XT, $XA, $XB", IIC_VecFP, + [(set vsrc:$XT, + (int_ppc_vsx_xvmaxdp vsrc:$XA, vsrc:$XB))]>; + def XVMINDP : XX3Form<60, 232, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvmindp $XT, $XA, $XB", IIC_VecFP, + [(set vsrc:$XT, + (int_ppc_vsx_xvmindp vsrc:$XA, vsrc:$XB))]>; + + def XVMAXSP : XX3Form<60, 192, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvmaxsp $XT, $XA, $XB", IIC_VecFP, + [(set vsrc:$XT, + (int_ppc_vsx_xvmaxsp vsrc:$XA, vsrc:$XB))]>; + def XVMINSP : XX3Form<60, 200, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvminsp $XT, $XA, $XB", IIC_VecFP, + [(set vsrc:$XT, + (int_ppc_vsx_xvminsp vsrc:$XA, vsrc:$XB))]>; + } // isCommutable + } // Uses = [RM] + + // Rounding Instructions with static direction. def XSRDPI : XX2Form<60, 73, (outs vsfrc:$XT), (ins vsfrc:$XB), "xsrdpi $XT, $XB", IIC_VecFP, @@ -997,7 +997,7 @@ let hasSideEffects = 0 in { (outs vsrc:$XT), (ins vsrc:$XB), "xvrspiz $XT, $XB", IIC_VecFP, [(set v4f32:$XT, (any_ftrunc v4f32:$XB))]>; - } // mayRaiseFPException + } // mayRaiseFPException // Logical Instructions let isCommutable = 1 in @@ -1183,7 +1183,7 @@ let Predicates = [HasVSX, HasP8Vector] in { "xsresp $XT, $XB", IIC_VecFP, [(set f32:$XT, (PPCfre f32:$XB))]>; // FIXME: Setting the hasSideEffects flag here to match current behaviour. - let hasSideEffects = 1 in + let hasSideEffects = 1 in def XSRSP : XX2Form<60, 281, (outs vssrc:$XT), (ins vsfrc:$XB), "xsrsp $XT, $XB", IIC_VecFP, @@ -1281,12 +1281,12 @@ let Predicates = [HasVSX, HasP8Vector] in { def XSCVSXDSP : XX2Form<60, 312, (outs vssrc:$XT), (ins vsfrc:$XB), "xscvsxdsp $XT, $XB", IIC_VecFP, - [(set f32:$XT, (PPCany_fcfids f64:$XB))]>; + [(set f32:$XT, (PPCany_fcfids f64:$XB))]>; def XSCVUXDSP : XX2Form<60, 296, (outs vssrc:$XT), (ins vsfrc:$XB), "xscvuxdsp $XT, $XB", IIC_VecFP, - [(set f32:$XT, (PPCany_fcfidus f64:$XB))]>; - } // mayRaiseFPException + [(set f32:$XT, (PPCany_fcfidus f64:$XB))]>; + } // mayRaiseFPException // Conversions between vector and scalar single precision def XSCVDPSPN : XX2Form<60, 267, (outs vsrc:$XT), (ins vssrc:$XB), @@ -1459,10 +1459,10 @@ let Predicates = [HasVSX, HasP9Vector] in { "xscmpexpdp $crD, $XA, $XB", IIC_FPCompare, []>; def XSCMPEXPQP : X_BF3_VA5_VB5<63, 164, "xscmpexpqp", []>; - let mayRaiseFPException = 1 in { - def XSCMPOQP : X_BF3_VA5_VB5<63, 132, "xscmpoqp", []>; - def XSCMPUQP : X_BF3_VA5_VB5<63, 644, "xscmpuqp", []>; - + let mayRaiseFPException = 1 in { + def XSCMPOQP : X_BF3_VA5_VB5<63, 132, "xscmpoqp", []>; + def XSCMPUQP : X_BF3_VA5_VB5<63, 644, "xscmpuqp", []>; + // DP Compare ==, >=, >, != // Use vsrc for XT, because the entire register of XT is set. // XT.dword[1] = 0x0000_0000_0000_0000 @@ -1472,7 +1472,7 @@ let Predicates = [HasVSX, HasP9Vector] in { IIC_FPCompare, []>; def XSCMPGTDP : XX3_XT5_XA5_XB5<60, 11, "xscmpgtdp", vsrc, vsfrc, vsfrc, IIC_FPCompare, []>; - } + } } //===--------------------------------------------------------------------===// @@ -1492,7 +1492,7 @@ let Predicates = [HasVSX, HasP9Vector] in { } // Truncate & Convert QP -> (Un)Signed (D)Word (dword[1] is set to zero) - let mayRaiseFPException = 1 in { + let mayRaiseFPException = 1 in { def XSCVQPSDZ : X_VT5_XO5_VB5<63, 25, 836, "xscvqpsdz", []>; def XSCVQPSWZ : X_VT5_XO5_VB5<63, 9, 836, "xscvqpswz", []>; def XSCVQPUDZ : X_VT5_XO5_VB5<63, 17, 836, "xscvqpudz", []>; @@ -1508,12 +1508,12 @@ let Predicates = [HasVSX, HasP9Vector] in { // vsfrc for src and dest register. xscvhpdp's src only use the left 16 bits, // but we still use vsfrc for it. // FIXME: Setting the hasSideEffects flag here to match current behaviour. - let hasSideEffects = 1, mayRaiseFPException = 1 in { + let hasSideEffects = 1, mayRaiseFPException = 1 in { def XSCVDPHP : XX2_XT6_XO5_XB6<60, 17, 347, "xscvdphp", vsfrc, []>; def XSCVHPDP : XX2_XT6_XO5_XB6<60, 16, 347, "xscvhpdp", vsfrc, []>; } - let mayRaiseFPException = 1 in { + let mayRaiseFPException = 1 in { // Vector HP -> SP // FIXME: Setting the hasSideEffects flag here to match current behaviour. let hasSideEffects = 1 in @@ -1522,15 +1522,15 @@ let Predicates = [HasVSX, HasP9Vector] in { [(set v4f32:$XT, (int_ppc_vsx_xvcvsphp v4f32:$XB))]>; - // Round to Quad-Precision Integer [with Inexact] - def XSRQPI : Z23_VT5_R1_VB5_RMC2_EX1<63, 5, 0, "xsrqpi" , []>; - def XSRQPIX : Z23_VT5_R1_VB5_RMC2_EX1<63, 5, 1, "xsrqpix", []>; + // Round to Quad-Precision Integer [with Inexact] + def XSRQPI : Z23_VT5_R1_VB5_RMC2_EX1<63, 5, 0, "xsrqpi" , []>; + def XSRQPIX : Z23_VT5_R1_VB5_RMC2_EX1<63, 5, 1, "xsrqpix", []>; // Round Quad-Precision to Double-Extended Precision (fp80) // FIXME: Setting the hasSideEffects flag here to match current behaviour. let hasSideEffects = 1 in def XSRQPXP : Z23_VT5_R1_VB5_RMC2_EX1<63, 37, 0, "xsrqpxp", []>; - } + } //===--------------------------------------------------------------------===// // Insert/Extract Instructions @@ -1621,7 +1621,7 @@ let Predicates = [HasVSX, HasP9Vector] in { (int_ppc_vsx_xvtstdcdp v2f64:$XB, timm:$DCMX))]>; // Maximum/Minimum Type-C/Type-J DP - let mayRaiseFPException = 1 in { + let mayRaiseFPException = 1 in { def XSMAXCDP : XX3_XT5_XA5_XB5<60, 128, "xsmaxcdp", vsfrc, vsfrc, vsfrc, IIC_VecFP, [(set f64:$XT, (PPCxsmaxc f64:$XA, f64:$XB))]>; @@ -1636,7 +1636,7 @@ let Predicates = [HasVSX, HasP9Vector] in { def XSMINJDP : XX3_XT5_XA5_XB5<60, 152, "xsminjdp", vsrc, vsfrc, vsfrc, IIC_VecFP, []>; } - } + } // Vector Byte-Reverse H/W/D/Q Word // FIXME: Setting the hasSideEffects flag here to match current behaviour. @@ -2408,48 +2408,48 @@ def MrgWords { // arbitrarily chosen to be Big, Little. // // Predicate combinations available: -// [HasVSX, IsLittleEndian, HasP8Altivec] Altivec patterns using VSX instr. -// [HasVSX, IsBigEndian, HasP8Altivec] Altivec patterns using VSX instr. +// [HasVSX, IsLittleEndian, HasP8Altivec] Altivec patterns using VSX instr. +// [HasVSX, IsBigEndian, HasP8Altivec] Altivec patterns using VSX instr. // [HasVSX] // [HasVSX, IsBigEndian] // [HasVSX, IsLittleEndian] // [HasVSX, NoP9Vector] -// [HasVSX, NoP9Vector, IsLittleEndian] +// [HasVSX, NoP9Vector, IsLittleEndian] // [HasVSX, HasOnlySwappingMemOps] // [HasVSX, HasOnlySwappingMemOps, IsBigEndian] // [HasVSX, HasP8Vector] -// [HasVSX, HasP8Vector, IsBigEndian, IsPPC64] +// [HasVSX, HasP8Vector, IsBigEndian, IsPPC64] // [HasVSX, HasP8Vector, IsLittleEndian] -// [HasVSX, HasP8Vector, NoP9Vector, IsBigEndian, IsPPC64] +// [HasVSX, HasP8Vector, NoP9Vector, IsBigEndian, IsPPC64] // [HasVSX, HasP8Vector, NoP9Vector, IsLittleEndian] // [HasVSX, HasDirectMove] // [HasVSX, HasDirectMove, IsBigEndian] // [HasVSX, HasDirectMove, IsLittleEndian] -// [HasVSX, HasDirectMove, NoP9Altivec, IsBigEndian, IsPPC64] -// [HasVSX, HasDirectMove, NoP9Vector, IsBigEndian, IsPPC64] +// [HasVSX, HasDirectMove, NoP9Altivec, IsBigEndian, IsPPC64] +// [HasVSX, HasDirectMove, NoP9Vector, IsBigEndian, IsPPC64] // [HasVSX, HasDirectMove, NoP9Altivec, IsLittleEndian] // [HasVSX, HasDirectMove, NoP9Vector, IsLittleEndian] // [HasVSX, HasP9Vector] -// [HasVSX, HasP9Vector, IsBigEndian, IsPPC64] +// [HasVSX, HasP9Vector, IsBigEndian, IsPPC64] // [HasVSX, HasP9Vector, IsLittleEndian] // [HasVSX, HasP9Altivec] -// [HasVSX, HasP9Altivec, IsBigEndian, IsPPC64] +// [HasVSX, HasP9Altivec, IsBigEndian, IsPPC64] // [HasVSX, HasP9Altivec, IsLittleEndian] -// [HasVSX, IsISA3_0, HasDirectMove, IsBigEndian, IsPPC64] +// [HasVSX, IsISA3_0, HasDirectMove, IsBigEndian, IsPPC64] // [HasVSX, IsISA3_0, HasDirectMove, IsLittleEndian] -// These Altivec patterns are here because we need a VSX instruction to match -// the intrinsic (but only for little endian system). -let Predicates = [HasVSX, IsLittleEndian, HasP8Altivec] in - def : Pat<(v16i8 (int_ppc_altivec_crypto_vpermxor v16i8:$a, - v16i8:$b, v16i8:$c)), - (v16i8 (VPERMXOR $a, $b, (XXLNOR (COPY_TO_REGCLASS $c, VSRC), - (COPY_TO_REGCLASS $c, VSRC))))>; -let Predicates = [HasVSX, IsBigEndian, HasP8Altivec] in - def : Pat<(v16i8 (int_ppc_altivec_crypto_vpermxor v16i8:$a, - v16i8:$b, v16i8:$c)), - (v16i8 (VPERMXOR $a, $b, $c))>; - +// These Altivec patterns are here because we need a VSX instruction to match +// the intrinsic (but only for little endian system). +let Predicates = [HasVSX, IsLittleEndian, HasP8Altivec] in + def : Pat<(v16i8 (int_ppc_altivec_crypto_vpermxor v16i8:$a, + v16i8:$b, v16i8:$c)), + (v16i8 (VPERMXOR $a, $b, (XXLNOR (COPY_TO_REGCLASS $c, VSRC), + (COPY_TO_REGCLASS $c, VSRC))))>; +let Predicates = [HasVSX, IsBigEndian, HasP8Altivec] in + def : Pat<(v16i8 (int_ppc_altivec_crypto_vpermxor v16i8:$a, + v16i8:$b, v16i8:$c)), + (v16i8 (VPERMXOR $a, $b, $c))>; + let AddedComplexity = 400 in { // Valid for any VSX subtarget, regardless of endianness. let Predicates = [HasVSX] in { @@ -2481,10 +2481,10 @@ def : Pat<(fneg (PPCfnmsub v4f32:$A, v4f32:$B, v4f32:$C)), def : Pat<(PPCfnmsub v4f32:$A, v4f32:$B, (fneg v4f32:$C)), (XVNMADDASP $C, $A, $B)>; -def : Pat<(PPCfsqrt f64:$frA), (XSSQRTDP $frA)>; -def : Pat<(PPCfsqrt v2f64:$frA), (XVSQRTDP $frA)>; -def : Pat<(PPCfsqrt v4f32:$frA), (XVSQRTSP $frA)>; - +def : Pat<(PPCfsqrt f64:$frA), (XSSQRTDP $frA)>; +def : Pat<(PPCfsqrt v2f64:$frA), (XVSQRTDP $frA)>; +def : Pat<(PPCfsqrt v4f32:$frA), (XVSQRTSP $frA)>; + def : Pat<(v2f64 (bitconvert v4f32:$A)), (COPY_TO_REGCLASS $A, VSRC)>; def : Pat<(v2f64 (bitconvert v4i32:$A)), @@ -2614,16 +2614,16 @@ def : Pat<(int_ppc_vsx_xvdivsp v4f32:$A, v4f32:$B), def : Pat<(int_ppc_vsx_xvdivdp v2f64:$A, v2f64:$B), (XVDIVDP $A, $B)>; -// Vector test for software divide and sqrt. -def : Pat<(i32 (int_ppc_vsx_xvtdivdp v2f64:$A, v2f64:$B)), - (COPY_TO_REGCLASS (XVTDIVDP $A, $B), GPRC)>; -def : Pat<(i32 (int_ppc_vsx_xvtdivsp v4f32:$A, v4f32:$B)), - (COPY_TO_REGCLASS (XVTDIVSP $A, $B), GPRC)>; -def : Pat<(i32 (int_ppc_vsx_xvtsqrtdp v2f64:$A)), - (COPY_TO_REGCLASS (XVTSQRTDP $A), GPRC)>; -def : Pat<(i32 (int_ppc_vsx_xvtsqrtsp v4f32:$A)), - (COPY_TO_REGCLASS (XVTSQRTSP $A), GPRC)>; - +// Vector test for software divide and sqrt. +def : Pat<(i32 (int_ppc_vsx_xvtdivdp v2f64:$A, v2f64:$B)), + (COPY_TO_REGCLASS (XVTDIVDP $A, $B), GPRC)>; +def : Pat<(i32 (int_ppc_vsx_xvtdivsp v4f32:$A, v4f32:$B)), + (COPY_TO_REGCLASS (XVTDIVSP $A, $B), GPRC)>; +def : Pat<(i32 (int_ppc_vsx_xvtsqrtdp v2f64:$A)), + (COPY_TO_REGCLASS (XVTSQRTDP $A), GPRC)>; +def : Pat<(i32 (int_ppc_vsx_xvtsqrtsp v4f32:$A)), + (COPY_TO_REGCLASS (XVTSQRTSP $A), GPRC)>; + // Reciprocal estimate def : Pat<(int_ppc_vsx_xvresp v4f32:$A), (XVRESP $A)>; @@ -2724,7 +2724,7 @@ def : Pat<(v2f64 (int_ppc_vsx_lxvd2x_be xoaddr:$src)), (LXVD2X xoaddr:$src)>; def : Pat<(f32 (any_fround f32:$S)), (f32 (COPY_TO_REGCLASS (XSRDPI (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>; -def : Pat<(f32 (fnearbyint f32:$S)), +def : Pat<(f32 (fnearbyint f32:$S)), (f32 (COPY_TO_REGCLASS (XSRDPIC (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>; def : Pat<(f32 (any_ffloor f32:$S)), @@ -2739,11 +2739,11 @@ def : Pat<(f32 (any_ftrunc f32:$S)), def : Pat<(f32 (any_frint f32:$S)), (f32 (COPY_TO_REGCLASS (XSRDPIC (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>; -def : Pat<(v4f32 (any_frint v4f32:$S)), (v4f32 (XVRSPIC $S))>; +def : Pat<(v4f32 (any_frint v4f32:$S)), (v4f32 (XVRSPIC $S))>; // Rounding for double precision. -def : Pat<(f64 (any_frint f64:$S)), (f64 (XSRDPIC $S))>; -def : Pat<(v2f64 (any_frint v2f64:$S)), (v2f64 (XVRDPIC $S))>; +def : Pat<(f64 (any_frint f64:$S)), (f64 (XSRDPIC $S))>; +def : Pat<(v2f64 (any_frint v2f64:$S)), (v2f64 (XVRDPIC $S))>; // Materialize a zero-vector of long long def : Pat<(v2i64 immAllZerosV), @@ -3020,19 +3020,19 @@ defm : ScalToVecWPermute< VSFRC)), sub_64)>; } // HasVSX, NoP9Vector -// Any little endian pre-Power9 VSX subtarget. -let Predicates = [HasVSX, NoP9Vector, IsLittleEndian] in { -// Load-and-splat using only X-Form VSX loads. -defm : ScalToVecWPermute< - v2i64, (i64 (load xoaddr:$src)), - (XXPERMDIs (XFLOADf64 xoaddr:$src), 2), - (SUBREG_TO_REG (i64 1), (XFLOADf64 xoaddr:$src), sub_64)>; -defm : ScalToVecWPermute< - v2f64, (f64 (load xoaddr:$src)), - (XXPERMDIs (XFLOADf64 xoaddr:$src), 2), - (SUBREG_TO_REG (i64 1), (XFLOADf64 xoaddr:$src), sub_64)>; -} // HasVSX, NoP9Vector, IsLittleEndian - +// Any little endian pre-Power9 VSX subtarget. +let Predicates = [HasVSX, NoP9Vector, IsLittleEndian] in { +// Load-and-splat using only X-Form VSX loads. +defm : ScalToVecWPermute< + v2i64, (i64 (load xoaddr:$src)), + (XXPERMDIs (XFLOADf64 xoaddr:$src), 2), + (SUBREG_TO_REG (i64 1), (XFLOADf64 xoaddr:$src), sub_64)>; +defm : ScalToVecWPermute< + v2f64, (f64 (load xoaddr:$src)), + (XXPERMDIs (XFLOADf64 xoaddr:$src), 2), + (SUBREG_TO_REG (i64 1), (XFLOADf64 xoaddr:$src), sub_64)>; +} // HasVSX, NoP9Vector, IsLittleEndian + // Any VSX subtarget that only has loads and stores that load in big endian // order regardless of endianness. This is really pre-Power9 subtargets. let Predicates = [HasVSX, HasOnlySwappingMemOps] in { @@ -3044,8 +3044,8 @@ let Predicates = [HasVSX, HasOnlySwappingMemOps] in { def : Pat<(PPCstxvd2x v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; } // HasVSX, HasOnlySwappingMemOps -// Big endian VSX subtarget that only has loads and stores that always -// load in big endian order. Really big endian pre-Power9 subtargets. +// Big endian VSX subtarget that only has loads and stores that always +// load in big endian order. Really big endian pre-Power9 subtargets. let Predicates = [HasVSX, HasOnlySwappingMemOps, IsBigEndian] in { def : Pat<(v2f64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>; def : Pat<(v2i64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>; @@ -3138,7 +3138,7 @@ def : Pat<(v16i8 (bitconvert (v16i8 immAllOnesV))), } // HasVSX, HasP8Vector // Big endian Power8 VSX subtarget. -let Predicates = [HasVSX, HasP8Vector, IsBigEndian, IsPPC64] in { +let Predicates = [HasVSX, HasP8Vector, IsBigEndian, IsPPC64] in { def : Pat<DWToSPExtractConv.El0SS1, (f32 (XSCVSXDSP (COPY_TO_REGCLASS $S1, VSFRC)))>; def : Pat<DWToSPExtractConv.El1SS1, @@ -3216,7 +3216,7 @@ foreach Idx = [ [0,3], [2,1], [3,2] ] in { (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))), sub_64), xoaddr:$src)>; } -} // HasVSX, HasP8Vector, IsBigEndian, IsPPC64 +} // HasVSX, HasP8Vector, IsBigEndian, IsPPC64 // Little endian Power8 VSX subtarget. let Predicates = [HasVSX, HasP8Vector, IsLittleEndian] in { @@ -3315,7 +3315,7 @@ foreach Idx = [ [0,2], [1,1], [3,3] ] in { } // HasVSX, HasP8Vector, IsLittleEndian // Big endian pre-Power9 VSX subtarget. -let Predicates = [HasVSX, HasP8Vector, NoP9Vector, IsBigEndian, IsPPC64] in { +let Predicates = [HasVSX, HasP8Vector, NoP9Vector, IsBigEndian, IsPPC64] in { def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xoaddr:$src), (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xoaddr:$src), @@ -3326,7 +3326,7 @@ def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xoaddr:$src), def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xoaddr:$src), (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64), xoaddr:$src)>; -} // HasVSX, HasP8Vector, NoP9Vector, IsBigEndian, IsPPC64 +} // HasVSX, HasP8Vector, NoP9Vector, IsBigEndian, IsPPC64 // Little endian pre-Power9 VSX subtarget. let Predicates = [HasVSX, HasP8Vector, NoP9Vector, IsLittleEndian] in { @@ -3583,8 +3583,8 @@ def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)), (i32 VectorExtractions.LE_VARIABLE_WORD)>; } // HasVSX, HasDirectMove, NoP9Altivec, IsLittleEndian -// Big endian pre-Power9 64Bit VSX subtarget that has direct moves. -let Predicates = [HasVSX, HasDirectMove, NoP9Vector, IsBigEndian, IsPPC64] in { +// Big endian pre-Power9 64Bit VSX subtarget that has direct moves. +let Predicates = [HasVSX, HasDirectMove, NoP9Vector, IsBigEndian, IsPPC64] in { // Big endian integer vectors using direct moves. def : Pat<(v2i64 (build_vector i64:$A, i64:$B)), (v2i64 (XXPERMDI @@ -3598,7 +3598,7 @@ def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)), (MTVSRD (RLDIMI AnyExts.D, AnyExts.C, 32, 0)), VSRC), 0)>; def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)), (XXSPLTW (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 1)>; -} // HasVSX, HasDirectMove, NoP9Vector, IsBigEndian, IsPPC64 +} // HasVSX, HasDirectMove, NoP9Vector, IsBigEndian, IsPPC64 // Little endian pre-Power9 VSX subtarget that has direct moves. let Predicates = [HasVSX, HasDirectMove, NoP9Vector, IsLittleEndian] in { @@ -3627,25 +3627,25 @@ def : Pat<(fneg (PPCfnmsub f128:$A, f128:$B, f128:$C)), def : Pat<(PPCfnmsub f128:$A, f128:$B, (fneg f128:$C)), (XSNMADDQP $C, $A, $B)>; -def : Pat<(f128 (any_sint_to_fp i64:$src)), +def : Pat<(f128 (any_sint_to_fp i64:$src)), (f128 (XSCVSDQP (COPY_TO_REGCLASS $src, VFRC)))>; -def : Pat<(f128 (any_sint_to_fp (i64 (PPCmfvsr f64:$src)))), +def : Pat<(f128 (any_sint_to_fp (i64 (PPCmfvsr f64:$src)))), (f128 (XSCVSDQP $src))>; -def : Pat<(f128 (any_sint_to_fp (i32 (PPCmfvsr f64:$src)))), +def : Pat<(f128 (any_sint_to_fp (i32 (PPCmfvsr f64:$src)))), (f128 (XSCVSDQP (VEXTSW2Ds $src)))>; -def : Pat<(f128 (any_uint_to_fp i64:$src)), +def : Pat<(f128 (any_uint_to_fp i64:$src)), (f128 (XSCVUDQP (COPY_TO_REGCLASS $src, VFRC)))>; -def : Pat<(f128 (any_uint_to_fp (i64 (PPCmfvsr f64:$src)))), +def : Pat<(f128 (any_uint_to_fp (i64 (PPCmfvsr f64:$src)))), (f128 (XSCVUDQP $src))>; // Convert (Un)Signed Word -> QP. -def : Pat<(f128 (any_sint_to_fp i32:$src)), +def : Pat<(f128 (any_sint_to_fp i32:$src)), (f128 (XSCVSDQP (MTVSRWA $src)))>; -def : Pat<(f128 (any_sint_to_fp (i32 (load xoaddr:$src)))), +def : Pat<(f128 (any_sint_to_fp (i32 (load xoaddr:$src)))), (f128 (XSCVSDQP (LIWAX xoaddr:$src)))>; -def : Pat<(f128 (any_uint_to_fp i32:$src)), +def : Pat<(f128 (any_uint_to_fp i32:$src)), (f128 (XSCVUDQP (MTVSRWZ $src)))>; -def : Pat<(f128 (any_uint_to_fp (i32 (load xoaddr:$src)))), +def : Pat<(f128 (any_uint_to_fp (i32 (load xoaddr:$src)))), (f128 (XSCVUDQP (LIWZX xoaddr:$src)))>; // Pattern for matching Vector HP -> Vector SP intrinsic. Defined as a @@ -3819,11 +3819,11 @@ def : Pat<(f128 (uint_to_fp ScalarLoads.ZELi8)), (f128 (XSCVUDQP (LXSIBZX xoaddr:$src)))>; // Truncate & Convert QP -> (Un)Signed (D)Word. -def : Pat<(i64 (any_fp_to_sint f128:$src)), (i64 (MFVRD (XSCVQPSDZ $src)))>; -def : Pat<(i64 (any_fp_to_uint f128:$src)), (i64 (MFVRD (XSCVQPUDZ $src)))>; -def : Pat<(i32 (any_fp_to_sint f128:$src)), +def : Pat<(i64 (any_fp_to_sint f128:$src)), (i64 (MFVRD (XSCVQPSDZ $src)))>; +def : Pat<(i64 (any_fp_to_uint f128:$src)), (i64 (MFVRD (XSCVQPUDZ $src)))>; +def : Pat<(i32 (any_fp_to_sint f128:$src)), (i32 (MFVSRWZ (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC)))>; -def : Pat<(i32 (any_fp_to_uint f128:$src)), +def : Pat<(i32 (any_fp_to_uint f128:$src)), (i32 (MFVSRWZ (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC)))>; // Instructions for store(fptosi). @@ -3951,8 +3951,8 @@ def : Pat<(v4i32 (PPCldsplat xoaddr:$A)), (v4i32 (LXVWSX xoaddr:$A))>; } // HasVSX, HasP9Vector -// Big endian 64Bit Power9 subtarget. -let Predicates = [HasVSX, HasP9Vector, IsBigEndian, IsPPC64] in { +// Big endian 64Bit Power9 subtarget. +let Predicates = [HasVSX, HasP9Vector, IsBigEndian, IsPPC64] in { def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 0)))))), (f32 (XSCVUXDSP (XXEXTRACTUW $A, 0)))>; def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 1)))))), @@ -4125,7 +4125,7 @@ foreach Idx = 0-15 in { def : Pat<(f128 (uint_to_fp (i32 (PPCmfvsr f64:$src)))), (f128 (XSCVUDQP (XXEXTRACTUW (SUBREG_TO_REG (i64 1), $src, sub_64), 4)))>; -} // HasVSX, HasP9Vector, IsBigEndian, IsPPC64 +} // HasVSX, HasP9Vector, IsBigEndian, IsPPC64 // Little endian Power9 subtarget. let Predicates = [HasVSX, HasP9Vector, IsLittleEndian] in { @@ -4350,8 +4350,8 @@ def : Pat<(v4i32 (PPCvabsd v4i32:$A, v4i32:$B, (i32 1))), (v4i32 (VABSDUW (XVNEGSP $A), (XVNEGSP $B)))>; } // HasVSX, HasP9Altivec -// Big endian Power9 64Bit VSX subtargets with P9 Altivec support. -let Predicates = [HasVSX, HasP9Altivec, IsBigEndian, IsPPC64] in { +// Big endian Power9 64Bit VSX subtargets with P9 Altivec support. +let Predicates = [HasVSX, HasP9Altivec, IsBigEndian, IsPPC64] in { def : Pat<(i64 (anyext (i32 (vector_extract v16i8:$S, i64:$Idx)))), (VEXTUBLX $Idx, $S)>; @@ -4484,7 +4484,7 @@ def : Pat<(v4i32 (build_vector ByteToWord.BE_A0, ByteToWord.BE_A1, (v4i32 (VEXTSB2W $A))>; def : Pat<(v2i64 (build_vector ByteToDWord.BE_A0, ByteToDWord.BE_A1)), (v2i64 (VEXTSB2D $A))>; -} // HasVSX, HasP9Altivec, IsBigEndian, IsPPC64 +} // HasVSX, HasP9Altivec, IsBigEndian, IsPPC64 // Little endian Power9 VSX subtargets with P9 Altivec support. let Predicates = [HasVSX, HasP9Altivec, IsLittleEndian] in { @@ -4621,9 +4621,9 @@ def : Pat<(v2i64 (build_vector ByteToDWord.LE_A0, ByteToDWord.LE_A1)), (v2i64 (VEXTSB2D $A))>; } // HasVSX, HasP9Altivec, IsLittleEndian -// Big endian 64Bit VSX subtarget that supports additional direct moves from -// ISA3.0. -let Predicates = [HasVSX, IsISA3_0, HasDirectMove, IsBigEndian, IsPPC64] in { +// Big endian 64Bit VSX subtarget that supports additional direct moves from +// ISA3.0. +let Predicates = [HasVSX, IsISA3_0, HasDirectMove, IsBigEndian, IsPPC64] in { def : Pat<(i64 (extractelt v2i64:$A, 1)), (i64 (MFVSRLD $A))>; // Better way to build integer vectors if we have MTVSRDD. Big endian. @@ -4636,7 +4636,7 @@ def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)), def : Pat<(f128 (PPCbuild_fp128 i64:$rB, i64:$rA)), (f128 (COPY_TO_REGCLASS (MTVSRDD $rB, $rA), VRRC))>; -} // HasVSX, IsISA3_0, HasDirectMove, IsBigEndian, IsPPC64 +} // HasVSX, IsISA3_0, HasDirectMove, IsBigEndian, IsPPC64 // Little endian VSX subtarget that supports direct moves from ISA3.0. let Predicates = [HasVSX, IsISA3_0, HasDirectMove, IsLittleEndian] in { @@ -4661,18 +4661,18 @@ def : InstAlias<"xvmovdp $XT, $XB", def : InstAlias<"xvmovsp $XT, $XB", (XVCPSGNSP vsrc:$XT, vsrc:$XB, vsrc:$XB)>; -// Certain versions of the AIX assembler may missassemble these mnemonics. -let Predicates = [ModernAs] in { - def : InstAlias<"xxspltd $XT, $XB, 0", - (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 0)>; - def : InstAlias<"xxspltd $XT, $XB, 1", - (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 3)>; - def : InstAlias<"xxspltd $XT, $XB, 0", - (XXPERMDIs vsrc:$XT, vsfrc:$XB, 0)>; - def : InstAlias<"xxspltd $XT, $XB, 1", - (XXPERMDIs vsrc:$XT, vsfrc:$XB, 3)>; -} - +// Certain versions of the AIX assembler may missassemble these mnemonics. +let Predicates = [ModernAs] in { + def : InstAlias<"xxspltd $XT, $XB, 0", + (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 0)>; + def : InstAlias<"xxspltd $XT, $XB, 1", + (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 3)>; + def : InstAlias<"xxspltd $XT, $XB, 0", + (XXPERMDIs vsrc:$XT, vsfrc:$XB, 0)>; + def : InstAlias<"xxspltd $XT, $XB, 1", + (XXPERMDIs vsrc:$XT, vsfrc:$XB, 3)>; +} + def : InstAlias<"xxmrghd $XT, $XA, $XB", (XXPERMDI vsrc:$XT, vsrc:$XA, vsrc:$XB, 0)>; def : InstAlias<"xxmrgld $XT, $XA, $XB", diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp b/contrib/libs/llvm12/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp index d84d9a65ce..c242409097 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp +++ b/contrib/libs/llvm12/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp @@ -60,7 +60,7 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/IntrinsicsPowerPC.h" +#include "llvm/IR/IntrinsicsPowerPC.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" @@ -82,7 +82,7 @@ using namespace llvm; static cl::opt<unsigned> MaxVarsPrep("ppc-formprep-max-vars", - cl::Hidden, cl::init(24), + cl::Hidden, cl::init(24), cl::desc("Potential common base number threshold per function for PPC loop " "prep")); @@ -92,7 +92,7 @@ static cl::opt<bool> PreferUpdateForm("ppc-formprep-prefer-update", // Sum of following 3 per loop thresholds for all loops can not be larger // than MaxVarsPrep. -// now the thresholds for each kind prep are exterimental values on Power9. +// now the thresholds for each kind prep are exterimental values on Power9. static cl::opt<unsigned> MaxVarsUpdateForm("ppc-preinc-prep-max-vars", cl::Hidden, cl::init(3), cl::desc("Potential PHI threshold per loop for PPC loop prep of update " @@ -103,7 +103,7 @@ static cl::opt<unsigned> MaxVarsDSForm("ppc-dsprep-max-vars", cl::desc("Potential PHI threshold per loop for PPC loop prep of DS form")); static cl::opt<unsigned> MaxVarsDQForm("ppc-dqprep-max-vars", - cl::Hidden, cl::init(8), + cl::Hidden, cl::init(8), cl::desc("Potential PHI threshold per loop for PPC loop prep of DQ form")); @@ -275,11 +275,11 @@ static Value *GetPointerOperand(Value *MemI) { } else if (StoreInst *SMemI = dyn_cast<StoreInst>(MemI)) { return SMemI->getPointerOperand(); } else if (IntrinsicInst *IMemI = dyn_cast<IntrinsicInst>(MemI)) { - if (IMemI->getIntrinsicID() == Intrinsic::prefetch || - IMemI->getIntrinsicID() == Intrinsic::ppc_vsx_lxvp) + if (IMemI->getIntrinsicID() == Intrinsic::prefetch || + IMemI->getIntrinsicID() == Intrinsic::ppc_vsx_lxvp) return IMemI->getArgOperand(0); - if (IMemI->getIntrinsicID() == Intrinsic::ppc_vsx_stxvp) - return IMemI->getArgOperand(1); + if (IMemI->getIntrinsicID() == Intrinsic::ppc_vsx_stxvp) + return IMemI->getArgOperand(1); } return nullptr; @@ -346,13 +346,13 @@ SmallVector<Bucket, 16> PPCLoopInstrFormPrep::collectCandidates( MemI = SMemI; PtrValue = SMemI->getPointerOperand(); } else if (IntrinsicInst *IMemI = dyn_cast<IntrinsicInst>(&J)) { - if (IMemI->getIntrinsicID() == Intrinsic::prefetch || - IMemI->getIntrinsicID() == Intrinsic::ppc_vsx_lxvp) { + if (IMemI->getIntrinsicID() == Intrinsic::prefetch || + IMemI->getIntrinsicID() == Intrinsic::ppc_vsx_lxvp) { MemI = IMemI; PtrValue = IMemI->getArgOperand(0); - } else if (IMemI->getIntrinsicID() == Intrinsic::ppc_vsx_stxvp) { - MemI = IMemI; - PtrValue = IMemI->getArgOperand(1); + } else if (IMemI->getIntrinsicID() == Intrinsic::ppc_vsx_stxvp) { + MemI = IMemI; + PtrValue = IMemI->getArgOperand(1); } else continue; } else continue; @@ -611,10 +611,10 @@ bool PPCLoopInstrFormPrep::rewriteLoadStores(Loop *L, Bucket &BucketChain, NewBasePtr = NewPHI; } - // Clear the rewriter cache, because values that are in the rewriter's cache - // can be deleted below, causing the AssertingVH in the cache to trigger. - SCEVE.clear(); - + // Clear the rewriter cache, because values that are in the rewriter's cache + // can be deleted below, causing the AssertingVH in the cache to trigger. + SCEVE.clear(); + if (Instruction *IDel = dyn_cast<Instruction>(BasePtr)) BBChanged.insert(IDel->getParent()); BasePtr->replaceAllUsesWith(NewBasePtr); @@ -800,7 +800,7 @@ bool PPCLoopInstrFormPrep::runOnLoop(Loop *L) { bool MadeChange = false; // Only prep. the inner-most loop - if (!L->isInnermost()) + if (!L->isInnermost()) return MadeChange; // Return if already done enough preparation. @@ -832,11 +832,11 @@ bool PPCLoopInstrFormPrep::runOnLoop(Loop *L) { if (ST && ST->hasAltivec() && PtrValue->getType()->getPointerElementType()->isVectorTy()) return false; - // There are no update forms for P10 lxvp/stxvp intrinsic. - auto *II = dyn_cast<IntrinsicInst>(I); - if (II && ((II->getIntrinsicID() == Intrinsic::ppc_vsx_lxvp) || - II->getIntrinsicID() == Intrinsic::ppc_vsx_stxvp)) - return false; + // There are no update forms for P10 lxvp/stxvp intrinsic. + auto *II = dyn_cast<IntrinsicInst>(I); + if (II && ((II->getIntrinsicID() == Intrinsic::ppc_vsx_lxvp) || + II->getIntrinsicID() == Intrinsic::ppc_vsx_stxvp)) + return false; // See getPreIndexedAddressParts, the displacement for LDU/STDU has to // be 4's multiple (DS-form). For i64 loads/stores when the displacement // fits in a 16-bit signed field but isn't a multiple of 4, it will be @@ -874,13 +874,13 @@ bool PPCLoopInstrFormPrep::runOnLoop(Loop *L) { // Check if a load/store has DQ form. auto isDQFormCandidate = [&] (const Instruction *I, const Value *PtrValue) { assert((PtrValue && I) && "Invalid parameter!"); - // Check if it is a P10 lxvp/stxvp intrinsic. - auto *II = dyn_cast<IntrinsicInst>(I); - if (II) - return II->getIntrinsicID() == Intrinsic::ppc_vsx_lxvp || - II->getIntrinsicID() == Intrinsic::ppc_vsx_stxvp; - // Check if it is a P9 vector load/store. - return ST && ST->hasP9Vector() && + // Check if it is a P10 lxvp/stxvp intrinsic. + auto *II = dyn_cast<IntrinsicInst>(I); + if (II) + return II->getIntrinsicID() == Intrinsic::ppc_vsx_lxvp || + II->getIntrinsicID() == Intrinsic::ppc_vsx_stxvp; + // Check if it is a P9 vector load/store. + return ST && ST->hasP9Vector() && (PtrValue->getType()->getPointerElementType()->isVectorTy()); }; diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/PPCLowerMASSVEntries.cpp b/contrib/libs/llvm12/lib/Target/PowerPC/PPCLowerMASSVEntries.cpp index c2d81f9dfa..27b2c9a628 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/PPCLowerMASSVEntries.cpp +++ b/contrib/libs/llvm12/lib/Target/PowerPC/PPCLowerMASSVEntries.cpp @@ -16,7 +16,7 @@ #include "PPC.h" #include "PPCSubtarget.h" #include "PPCTargetMachine.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Instructions.h" @@ -65,7 +65,7 @@ private: /// Checks if the specified function name represents an entry in the MASSV /// library. bool PPCLowerMASSVEntries::isMASSVFunc(StringRef Name) { - return llvm::is_contained(MASSVFuncs, Name); + return llvm::is_contained(MASSVFuncs, Name); } // FIXME: @@ -105,7 +105,7 @@ bool PPCLowerMASSVEntries::handlePowSpecialCases(CallInst *CI, Function &Func, return false; if (Constant *Exp = dyn_cast<Constant>(CI->getArgOperand(1))) - if (ConstantFP *CFP = dyn_cast_or_null<ConstantFP>(Exp->getSplatValue())) { + if (ConstantFP *CFP = dyn_cast_or_null<ConstantFP>(Exp->getSplatValue())) { // If the argument is 0.75 or 0.25 it is cheaper to turn it into pow // intrinsic so that it could be optimzed as sequence of sqrt's. if (!CI->hasNoInfs() || !CI->hasApproxFunc()) diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/PPCMCInstLower.cpp b/contrib/libs/llvm12/lib/Target/PowerPC/PPCMCInstLower.cpp index 150814474e..5cc180d770 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/PPCMCInstLower.cpp +++ b/contrib/libs/llvm12/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -74,9 +74,9 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, RefKind = MCSymbolRefExpr::VK_PPC_TOC_LO; break; case PPCII::MO_TLS: - bool IsPCRel = (MO.getTargetFlags() & ~access) == PPCII::MO_PCREL_FLAG; - RefKind = IsPCRel ? MCSymbolRefExpr::VK_PPC_TLS_PCREL - : MCSymbolRefExpr::VK_PPC_TLS; + bool IsPCRel = (MO.getTargetFlags() & ~access) == PPCII::MO_PCREL_FLAG; + RefKind = IsPCRel ? MCSymbolRefExpr::VK_PPC_TLS_PCREL + : MCSymbolRefExpr::VK_PPC_TLS; break; } @@ -86,14 +86,14 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, RefKind = MCSymbolRefExpr::VK_PCREL; else if (MO.getTargetFlags() == (PPCII::MO_PCREL_FLAG | PPCII::MO_GOT_FLAG)) RefKind = MCSymbolRefExpr::VK_PPC_GOT_PCREL; - else if (MO.getTargetFlags() == (PPCII::MO_PCREL_FLAG | PPCII::MO_TPREL_FLAG)) - RefKind = MCSymbolRefExpr::VK_TPREL; - else if (MO.getTargetFlags() == PPCII::MO_GOT_TLSGD_PCREL_FLAG) - RefKind = MCSymbolRefExpr::VK_PPC_GOT_TLSGD_PCREL; - else if (MO.getTargetFlags() == PPCII::MO_GOT_TLSLD_PCREL_FLAG) - RefKind = MCSymbolRefExpr::VK_PPC_GOT_TLSLD_PCREL; - else if (MO.getTargetFlags() == PPCII::MO_GOT_TPREL_PCREL_FLAG) - RefKind = MCSymbolRefExpr::VK_PPC_GOT_TPREL_PCREL; + else if (MO.getTargetFlags() == (PPCII::MO_PCREL_FLAG | PPCII::MO_TPREL_FLAG)) + RefKind = MCSymbolRefExpr::VK_TPREL; + else if (MO.getTargetFlags() == PPCII::MO_GOT_TLSGD_PCREL_FLAG) + RefKind = MCSymbolRefExpr::VK_PPC_GOT_TLSGD_PCREL; + else if (MO.getTargetFlags() == PPCII::MO_GOT_TLSLD_PCREL_FLAG) + RefKind = MCSymbolRefExpr::VK_PPC_GOT_TLSLD_PCREL; + else if (MO.getTargetFlags() == PPCII::MO_GOT_TPREL_PCREL_FLAG) + RefKind = MCSymbolRefExpr::VK_PPC_GOT_TPREL_PCREL; const MachineInstr *MI = MO.getParent(); const MachineFunction *MF = MI->getMF(); @@ -110,8 +110,8 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, MIOpcode == PPC::BL8_NOTOC) { RefKind = MCSymbolRefExpr::VK_PPC_NOTOC; } - if (MO.getTargetFlags() == PPCII::MO_PCREL_OPT_FLAG) - RefKind = MCSymbolRefExpr::VK_PPC_PCREL_OPT; + if (MO.getTargetFlags() == PPCII::MO_PCREL_OPT_FLAG) + RefKind = MCSymbolRefExpr::VK_PPC_PCREL_OPT; } const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, RefKind, Ctx); diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/PPCMIPeephole.cpp b/contrib/libs/llvm12/lib/Target/PowerPC/PPCMIPeephole.cpp index f2d1f35067..c8b01aaef8 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/PPCMIPeephole.cpp +++ b/contrib/libs/llvm12/lib/Target/PowerPC/PPCMIPeephole.cpp @@ -267,113 +267,113 @@ void PPCMIPeephole::UpdateTOCSaves( TOCSaves[MI] = Keep; } -// This function returns a list of all PHI nodes in the tree starting from -// the RootPHI node. We perform a BFS traversal to get an ordered list of nodes. -// The list initially only contains the root PHI. When we visit a PHI node, we -// add it to the list. We continue to look for other PHI node operands while -// there are nodes to visit in the list. The function returns false if the -// optimization cannot be applied on this tree. -static bool collectUnprimedAccPHIs(MachineRegisterInfo *MRI, - MachineInstr *RootPHI, - SmallVectorImpl<MachineInstr *> &PHIs) { - PHIs.push_back(RootPHI); - unsigned VisitedIndex = 0; - while (VisitedIndex < PHIs.size()) { - MachineInstr *VisitedPHI = PHIs[VisitedIndex]; - for (unsigned PHIOp = 1, NumOps = VisitedPHI->getNumOperands(); - PHIOp != NumOps; PHIOp += 2) { - Register RegOp = VisitedPHI->getOperand(PHIOp).getReg(); - if (!Register::isVirtualRegister(RegOp)) - return false; - MachineInstr *Instr = MRI->getVRegDef(RegOp); - // While collecting the PHI nodes, we check if they can be converted (i.e. - // all the operands are either copies, implicit defs or PHI nodes). - unsigned Opcode = Instr->getOpcode(); - if (Opcode == PPC::COPY) { - Register Reg = Instr->getOperand(1).getReg(); - if (!Register::isVirtualRegister(Reg) || - MRI->getRegClass(Reg) != &PPC::ACCRCRegClass) - return false; - } else if (Opcode != PPC::IMPLICIT_DEF && Opcode != PPC::PHI) - return false; - // If we detect a cycle in the PHI nodes, we exit. It would be - // possible to change cycles as well, but that would add a lot - // of complexity for a case that is unlikely to occur with MMA - // code. - if (Opcode != PPC::PHI) - continue; - if (llvm::is_contained(PHIs, Instr)) - return false; - PHIs.push_back(Instr); - } - VisitedIndex++; - } - return true; -} - -// This function changes the unprimed accumulator PHI nodes in the PHIs list to -// primed accumulator PHI nodes. The list is traversed in reverse order to -// change all the PHI operands of a PHI node before changing the node itself. -// We keep a map to associate each changed PHI node to its non-changed form. -static void convertUnprimedAccPHIs(const PPCInstrInfo *TII, - MachineRegisterInfo *MRI, - SmallVectorImpl<MachineInstr *> &PHIs, - Register Dst) { - DenseMap<MachineInstr *, MachineInstr *> ChangedPHIMap; - for (auto It = PHIs.rbegin(), End = PHIs.rend(); It != End; ++It) { - MachineInstr *PHI = *It; - SmallVector<std::pair<MachineOperand, MachineOperand>, 4> PHIOps; - // We check if the current PHI node can be changed by looking at its - // operands. If all the operands are either copies from primed - // accumulators, implicit definitions or other unprimed accumulator - // PHI nodes, we change it. - for (unsigned PHIOp = 1, NumOps = PHI->getNumOperands(); PHIOp != NumOps; - PHIOp += 2) { - Register RegOp = PHI->getOperand(PHIOp).getReg(); - MachineInstr *PHIInput = MRI->getVRegDef(RegOp); - unsigned Opcode = PHIInput->getOpcode(); - assert((Opcode == PPC::COPY || Opcode == PPC::IMPLICIT_DEF || - Opcode == PPC::PHI) && - "Unexpected instruction"); - if (Opcode == PPC::COPY) { - assert(MRI->getRegClass(PHIInput->getOperand(1).getReg()) == - &PPC::ACCRCRegClass && - "Unexpected register class"); - PHIOps.push_back({PHIInput->getOperand(1), PHI->getOperand(PHIOp + 1)}); - } else if (Opcode == PPC::IMPLICIT_DEF) { - Register AccReg = MRI->createVirtualRegister(&PPC::ACCRCRegClass); - BuildMI(*PHIInput->getParent(), PHIInput, PHIInput->getDebugLoc(), - TII->get(PPC::IMPLICIT_DEF), AccReg); - PHIOps.push_back({MachineOperand::CreateReg(AccReg, false), - PHI->getOperand(PHIOp + 1)}); - } else if (Opcode == PPC::PHI) { - // We found a PHI operand. At this point we know this operand - // has already been changed so we get its associated changed form - // from the map. - assert(ChangedPHIMap.count(PHIInput) == 1 && - "This PHI node should have already been changed."); - MachineInstr *PrimedAccPHI = ChangedPHIMap.lookup(PHIInput); - PHIOps.push_back({MachineOperand::CreateReg( - PrimedAccPHI->getOperand(0).getReg(), false), - PHI->getOperand(PHIOp + 1)}); - } - } - Register AccReg = Dst; - // If the PHI node we are changing is the root node, the register it defines - // will be the destination register of the original copy (of the PHI def). - // For all other PHI's in the list, we need to create another primed - // accumulator virtual register as the PHI will no longer define the - // unprimed accumulator. - if (PHI != PHIs[0]) - AccReg = MRI->createVirtualRegister(&PPC::ACCRCRegClass); - MachineInstrBuilder NewPHI = BuildMI( - *PHI->getParent(), PHI, PHI->getDebugLoc(), TII->get(PPC::PHI), AccReg); - for (auto RegMBB : PHIOps) - NewPHI.add(RegMBB.first).add(RegMBB.second); - ChangedPHIMap[PHI] = NewPHI.getInstr(); - } -} - +// This function returns a list of all PHI nodes in the tree starting from +// the RootPHI node. We perform a BFS traversal to get an ordered list of nodes. +// The list initially only contains the root PHI. When we visit a PHI node, we +// add it to the list. We continue to look for other PHI node operands while +// there are nodes to visit in the list. The function returns false if the +// optimization cannot be applied on this tree. +static bool collectUnprimedAccPHIs(MachineRegisterInfo *MRI, + MachineInstr *RootPHI, + SmallVectorImpl<MachineInstr *> &PHIs) { + PHIs.push_back(RootPHI); + unsigned VisitedIndex = 0; + while (VisitedIndex < PHIs.size()) { + MachineInstr *VisitedPHI = PHIs[VisitedIndex]; + for (unsigned PHIOp = 1, NumOps = VisitedPHI->getNumOperands(); + PHIOp != NumOps; PHIOp += 2) { + Register RegOp = VisitedPHI->getOperand(PHIOp).getReg(); + if (!Register::isVirtualRegister(RegOp)) + return false; + MachineInstr *Instr = MRI->getVRegDef(RegOp); + // While collecting the PHI nodes, we check if they can be converted (i.e. + // all the operands are either copies, implicit defs or PHI nodes). + unsigned Opcode = Instr->getOpcode(); + if (Opcode == PPC::COPY) { + Register Reg = Instr->getOperand(1).getReg(); + if (!Register::isVirtualRegister(Reg) || + MRI->getRegClass(Reg) != &PPC::ACCRCRegClass) + return false; + } else if (Opcode != PPC::IMPLICIT_DEF && Opcode != PPC::PHI) + return false; + // If we detect a cycle in the PHI nodes, we exit. It would be + // possible to change cycles as well, but that would add a lot + // of complexity for a case that is unlikely to occur with MMA + // code. + if (Opcode != PPC::PHI) + continue; + if (llvm::is_contained(PHIs, Instr)) + return false; + PHIs.push_back(Instr); + } + VisitedIndex++; + } + return true; +} + +// This function changes the unprimed accumulator PHI nodes in the PHIs list to +// primed accumulator PHI nodes. The list is traversed in reverse order to +// change all the PHI operands of a PHI node before changing the node itself. +// We keep a map to associate each changed PHI node to its non-changed form. +static void convertUnprimedAccPHIs(const PPCInstrInfo *TII, + MachineRegisterInfo *MRI, + SmallVectorImpl<MachineInstr *> &PHIs, + Register Dst) { + DenseMap<MachineInstr *, MachineInstr *> ChangedPHIMap; + for (auto It = PHIs.rbegin(), End = PHIs.rend(); It != End; ++It) { + MachineInstr *PHI = *It; + SmallVector<std::pair<MachineOperand, MachineOperand>, 4> PHIOps; + // We check if the current PHI node can be changed by looking at its + // operands. If all the operands are either copies from primed + // accumulators, implicit definitions or other unprimed accumulator + // PHI nodes, we change it. + for (unsigned PHIOp = 1, NumOps = PHI->getNumOperands(); PHIOp != NumOps; + PHIOp += 2) { + Register RegOp = PHI->getOperand(PHIOp).getReg(); + MachineInstr *PHIInput = MRI->getVRegDef(RegOp); + unsigned Opcode = PHIInput->getOpcode(); + assert((Opcode == PPC::COPY || Opcode == PPC::IMPLICIT_DEF || + Opcode == PPC::PHI) && + "Unexpected instruction"); + if (Opcode == PPC::COPY) { + assert(MRI->getRegClass(PHIInput->getOperand(1).getReg()) == + &PPC::ACCRCRegClass && + "Unexpected register class"); + PHIOps.push_back({PHIInput->getOperand(1), PHI->getOperand(PHIOp + 1)}); + } else if (Opcode == PPC::IMPLICIT_DEF) { + Register AccReg = MRI->createVirtualRegister(&PPC::ACCRCRegClass); + BuildMI(*PHIInput->getParent(), PHIInput, PHIInput->getDebugLoc(), + TII->get(PPC::IMPLICIT_DEF), AccReg); + PHIOps.push_back({MachineOperand::CreateReg(AccReg, false), + PHI->getOperand(PHIOp + 1)}); + } else if (Opcode == PPC::PHI) { + // We found a PHI operand. At this point we know this operand + // has already been changed so we get its associated changed form + // from the map. + assert(ChangedPHIMap.count(PHIInput) == 1 && + "This PHI node should have already been changed."); + MachineInstr *PrimedAccPHI = ChangedPHIMap.lookup(PHIInput); + PHIOps.push_back({MachineOperand::CreateReg( + PrimedAccPHI->getOperand(0).getReg(), false), + PHI->getOperand(PHIOp + 1)}); + } + } + Register AccReg = Dst; + // If the PHI node we are changing is the root node, the register it defines + // will be the destination register of the original copy (of the PHI def). + // For all other PHI's in the list, we need to create another primed + // accumulator virtual register as the PHI will no longer define the + // unprimed accumulator. + if (PHI != PHIs[0]) + AccReg = MRI->createVirtualRegister(&PPC::ACCRCRegClass); + MachineInstrBuilder NewPHI = BuildMI( + *PHI->getParent(), PHI, PHI->getDebugLoc(), TII->get(PPC::PHI), AccReg); + for (auto RegMBB : PHIOps) + NewPHI.add(RegMBB.first).add(RegMBB.second); + ChangedPHIMap[PHI] = NewPHI.getInstr(); + } +} + // Perform peephole optimizations. bool PPCMIPeephole::simplifyCode(void) { bool Simplified = false; @@ -428,38 +428,38 @@ bool PPCMIPeephole::simplifyCode(void) { default: break; - case PPC::COPY: { - Register Src = MI.getOperand(1).getReg(); - Register Dst = MI.getOperand(0).getReg(); - if (!Register::isVirtualRegister(Src) || - !Register::isVirtualRegister(Dst)) - break; - if (MRI->getRegClass(Src) != &PPC::UACCRCRegClass || - MRI->getRegClass(Dst) != &PPC::ACCRCRegClass) - break; - - // We are copying an unprimed accumulator to a primed accumulator. - // If the input to the copy is a PHI that is fed only by (i) copies in - // the other direction (ii) implicitly defined unprimed accumulators or - // (iii) other PHI nodes satisfying (i) and (ii), we can change - // the PHI to a PHI on primed accumulators (as long as we also change - // its operands). To detect and change such copies, we first get a list - // of all the PHI nodes starting from the root PHI node in BFS order. - // We then visit all these PHI nodes to check if they can be changed to - // primed accumulator PHI nodes and if so, we change them. - MachineInstr *RootPHI = MRI->getVRegDef(Src); - if (RootPHI->getOpcode() != PPC::PHI) - break; - - SmallVector<MachineInstr *, 4> PHIs; - if (!collectUnprimedAccPHIs(MRI, RootPHI, PHIs)) - break; - - convertUnprimedAccPHIs(TII, MRI, PHIs, Dst); - - ToErase = &MI; - break; - } + case PPC::COPY: { + Register Src = MI.getOperand(1).getReg(); + Register Dst = MI.getOperand(0).getReg(); + if (!Register::isVirtualRegister(Src) || + !Register::isVirtualRegister(Dst)) + break; + if (MRI->getRegClass(Src) != &PPC::UACCRCRegClass || + MRI->getRegClass(Dst) != &PPC::ACCRCRegClass) + break; + + // We are copying an unprimed accumulator to a primed accumulator. + // If the input to the copy is a PHI that is fed only by (i) copies in + // the other direction (ii) implicitly defined unprimed accumulators or + // (iii) other PHI nodes satisfying (i) and (ii), we can change + // the PHI to a PHI on primed accumulators (as long as we also change + // its operands). To detect and change such copies, we first get a list + // of all the PHI nodes starting from the root PHI node in BFS order. + // We then visit all these PHI nodes to check if they can be changed to + // primed accumulator PHI nodes and if so, we change them. + MachineInstr *RootPHI = MRI->getVRegDef(Src); + if (RootPHI->getOpcode() != PPC::PHI) + break; + + SmallVector<MachineInstr *, 4> PHIs; + if (!collectUnprimedAccPHIs(MRI, RootPHI, PHIs)) + break; + + convertUnprimedAccPHIs(TII, MRI, PHIs, Dst); + + ToErase = &MI; + break; + } case PPC::LI: case PPC::LI8: { // If we are materializing a zero, look for any use operands for which @@ -712,7 +712,7 @@ bool PPCMIPeephole::simplifyCode(void) { Simplified = true; Register ConvReg1 = RoundInstr->getOperand(1).getReg(); Register FRSPDefines = RoundInstr->getOperand(0).getReg(); - MachineInstr &Use = *(MRI->use_instr_nodbg_begin(FRSPDefines)); + MachineInstr &Use = *(MRI->use_instr_nodbg_begin(FRSPDefines)); for (int i = 0, e = Use.getNumOperands(); i < e; ++i) if (Use.getOperand(i).isReg() && Use.getOperand(i).getReg() == FRSPDefines) @@ -987,8 +987,8 @@ bool PPCMIPeephole::simplifyCode(void) { case PPC::RLWINM_rec: case PPC::RLWINM8: case PPC::RLWINM8_rec: { - Simplified = TII->combineRLWINM(MI, &ToErase); - if (Simplified) + Simplified = TII->combineRLWINM(MI, &ToErase); + if (Simplified) ++NumRotatesCollapsed; break; } diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp b/contrib/libs/llvm12/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp index 34c531c7cb..c976a9c62d 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp +++ b/contrib/libs/llvm12/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp @@ -8,7 +8,7 @@ #include "PPCMachineFunctionInfo.h" #include "llvm/ADT/Twine.h" -#include "llvm/BinaryFormat/XCOFF.h" +#include "llvm/BinaryFormat/XCOFF.h" #include "llvm/IR/DataLayout.h" #include "llvm/MC/MCContext.h" #include "llvm/Support/CommandLine.h" @@ -64,36 +64,36 @@ bool PPCFunctionInfo::isLiveInZExt(Register VReg) const { return LiveIn.second.isZExt(); return false; } - -void PPCFunctionInfo::appendParameterType(ParamType Type) { - uint32_t CopyParamType = ParameterType; - int Bits = 0; - - // If it is fixed type, we only need to increase the FixedParamNum, for - // the bit encode of fixed type is bit of zero, we do not need to change the - // ParamType. - if (Type == FixedType) { - ++FixedParamNum; - return; - } - - ++FloatingPointParamNum; - - for (int I = 0; - I < static_cast<int>(FloatingPointParamNum + FixedParamNum - 1); ++I) { - if (CopyParamType & XCOFF::TracebackTable::ParmTypeIsFloatingBit) { - // '10'b => floating point short parameter. - // '11'b => floating point long parameter. - CopyParamType <<= 2; - Bits += 2; - } else { - // '0'b => fixed parameter. - CopyParamType <<= 1; - ++Bits; - } - } - - assert(Type != FixedType && "FixedType should already be handled."); - if (Bits < 31) - ParameterType |= Type << (30 - Bits); -} + +void PPCFunctionInfo::appendParameterType(ParamType Type) { + uint32_t CopyParamType = ParameterType; + int Bits = 0; + + // If it is fixed type, we only need to increase the FixedParamNum, for + // the bit encode of fixed type is bit of zero, we do not need to change the + // ParamType. + if (Type == FixedType) { + ++FixedParamNum; + return; + } + + ++FloatingPointParamNum; + + for (int I = 0; + I < static_cast<int>(FloatingPointParamNum + FixedParamNum - 1); ++I) { + if (CopyParamType & XCOFF::TracebackTable::ParmTypeIsFloatingBit) { + // '10'b => floating point short parameter. + // '11'b => floating point long parameter. + CopyParamType <<= 2; + Bits += 2; + } else { + // '0'b => fixed parameter. + CopyParamType <<= 1; + ++Bits; + } + } + + assert(Type != FixedType && "FixedType should already be handled."); + if (Bits < 31) + ParameterType |= Type << (30 - Bits); +} diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/contrib/libs/llvm12/lib/Target/PowerPC/PPCMachineFunctionInfo.h index f34d23ee29..4b73b36318 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/PPCMachineFunctionInfo.h +++ b/contrib/libs/llvm12/lib/Target/PowerPC/PPCMachineFunctionInfo.h @@ -22,16 +22,16 @@ namespace llvm { /// PPCFunctionInfo - This class is derived from MachineFunction private /// PowerPC target-specific information for each MachineFunction. class PPCFunctionInfo : public MachineFunctionInfo { -public: - // The value in the ParamType are used to indicate the bitstrings used in the - // encoding format. - enum ParamType { - FixedType = 0x0, - ShortFloatPoint = 0x2, - LongFloatPoint = 0x3 - }; - -private: +public: + // The value in the ParamType are used to indicate the bitstrings used in the + // encoding format. + enum ParamType { + FixedType = 0x0, + ShortFloatPoint = 0x2, + LongFloatPoint = 0x3 + }; + +private: virtual void anchor(); /// FramePointerSaveIndex - Frame index of where the old frame pointer is @@ -117,20 +117,20 @@ private: /// register for parameter passing. unsigned VarArgsNumFPR = 0; - /// FixedParamNum - Number of fixed parameter. - unsigned FixedParamNum = 0; - - /// FloatingParamNum - Number of floating point parameter. - unsigned FloatingPointParamNum = 0; - - /// ParamType - Encode type for every parameter - /// in the order of parameters passing in. - /// Bitstring starts from the most significant (leftmost) bit. - /// '0'b => fixed parameter. - /// '10'b => floating point short parameter. - /// '11'b => floating point long parameter. - uint32_t ParameterType = 0; - + /// FixedParamNum - Number of fixed parameter. + unsigned FixedParamNum = 0; + + /// FloatingParamNum - Number of floating point parameter. + unsigned FloatingPointParamNum = 0; + + /// ParamType - Encode type for every parameter + /// in the order of parameters passing in. + /// Bitstring starts from the most significant (leftmost) bit. + /// '0'b => fixed parameter. + /// '10'b => floating point short parameter. + /// '11'b => floating point long parameter. + uint32_t ParameterType = 0; + /// CRSpillFrameIndex - FrameIndex for CR spill slot for 32-bit SVR4. int CRSpillFrameIndex = 0; @@ -214,13 +214,13 @@ public: unsigned getVarArgsNumGPR() const { return VarArgsNumGPR; } void setVarArgsNumGPR(unsigned Num) { VarArgsNumGPR = Num; } - unsigned getFixedParamNum() const { return FixedParamNum; } - - unsigned getFloatingPointParamNum() const { return FloatingPointParamNum; } - - uint32_t getParameterType() const { return ParameterType; } - void appendParameterType(ParamType Type); - + unsigned getFixedParamNum() const { return FixedParamNum; } + + unsigned getFloatingPointParamNum() const { return FloatingPointParamNum; } + + uint32_t getParameterType() const { return ParameterType; } + void appendParameterType(ParamType Type); + unsigned getVarArgsNumFPR() const { return VarArgsNumFPR; } void setVarArgsNumFPR(unsigned Num) { VarArgsNumFPR = Num; } diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/PPCMachineScheduler.cpp b/contrib/libs/llvm12/lib/Target/PowerPC/PPCMachineScheduler.cpp index 445767b120..ce615e554d 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/PPCMachineScheduler.cpp +++ b/contrib/libs/llvm12/lib/Target/PowerPC/PPCMachineScheduler.cpp @@ -49,104 +49,104 @@ bool PPCPreRASchedStrategy::biasAddiLoadCandidate(SchedCandidate &Cand, void PPCPreRASchedStrategy::tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const { - // From GenericScheduler::tryCandidate + // From GenericScheduler::tryCandidate - // Initialize the candidate if needed. - if (!Cand.isValid()) { - TryCand.Reason = NodeOrder; + // Initialize the candidate if needed. + if (!Cand.isValid()) { + TryCand.Reason = NodeOrder; return; - } - - // Bias PhysReg Defs and copies to their uses and defined respectively. - if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop), - biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg)) - return; - - // Avoid exceeding the target's limit. - if (DAG->isTrackingPressure() && - tryPressure(TryCand.RPDelta.Excess, Cand.RPDelta.Excess, TryCand, Cand, - RegExcess, TRI, DAG->MF)) - return; - - // Avoid increasing the max critical pressure in the scheduled region. - if (DAG->isTrackingPressure() && - tryPressure(TryCand.RPDelta.CriticalMax, Cand.RPDelta.CriticalMax, - TryCand, Cand, RegCritical, TRI, DAG->MF)) - return; - - // We only compare a subset of features when comparing nodes between - // Top and Bottom boundary. Some properties are simply incomparable, in many - // other instances we should only override the other boundary if something - // is a clear good pick on one boundary. Skip heuristics that are more - // "tie-breaking" in nature. - bool SameBoundary = Zone != nullptr; - if (SameBoundary) { - // For loops that are acyclic path limited, aggressively schedule for - // latency. Within an single cycle, whenever CurrMOps > 0, allow normal - // heuristics to take precedence. - if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() && - tryLatency(TryCand, Cand, *Zone)) - return; - - // Prioritize instructions that read unbuffered resources by stall cycles. - if (tryLess(Zone->getLatencyStallCycles(TryCand.SU), - Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) - return; - } - - // Keep clustered nodes together to encourage downstream peephole - // optimizations which may reduce resource requirements. - // - // This is a best effort to set things up for a post-RA pass. Optimizations - // like generating loads of multiple registers should ideally be done within - // the scheduler pass by combining the loads during DAG postprocessing. - const SUnit *CandNextClusterSU = - Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); - const SUnit *TryCandNextClusterSU = - TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); - if (tryGreater(TryCand.SU == TryCandNextClusterSU, - Cand.SU == CandNextClusterSU, TryCand, Cand, Cluster)) - return; - - if (SameBoundary) { - // Weak edges are for clustering and other constraints. - if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop), - getWeakLeft(Cand.SU, Cand.AtTop), TryCand, Cand, Weak)) - return; - } - - // Avoid increasing the max pressure of the entire region. - if (DAG->isTrackingPressure() && - tryPressure(TryCand.RPDelta.CurrentMax, Cand.RPDelta.CurrentMax, TryCand, - Cand, RegMax, TRI, DAG->MF)) - return; - - if (SameBoundary) { - // Avoid critical resource consumption and balance the schedule. - TryCand.initResourceDelta(DAG, SchedModel); - if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, - TryCand, Cand, ResourceReduce)) - return; - if (tryGreater(TryCand.ResDelta.DemandedResources, - Cand.ResDelta.DemandedResources, TryCand, Cand, - ResourceDemand)) - return; - - // Avoid serializing long latency dependence chains. - // For acyclic path limited loops, latency was already checked above. - if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency && - !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone)) - return; - - // Fall through to original instruction order. - if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) || - (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) { - TryCand.Reason = NodeOrder; - } - } - - // GenericScheduler::tryCandidate end - + } + + // Bias PhysReg Defs and copies to their uses and defined respectively. + if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop), + biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg)) + return; + + // Avoid exceeding the target's limit. + if (DAG->isTrackingPressure() && + tryPressure(TryCand.RPDelta.Excess, Cand.RPDelta.Excess, TryCand, Cand, + RegExcess, TRI, DAG->MF)) + return; + + // Avoid increasing the max critical pressure in the scheduled region. + if (DAG->isTrackingPressure() && + tryPressure(TryCand.RPDelta.CriticalMax, Cand.RPDelta.CriticalMax, + TryCand, Cand, RegCritical, TRI, DAG->MF)) + return; + + // We only compare a subset of features when comparing nodes between + // Top and Bottom boundary. Some properties are simply incomparable, in many + // other instances we should only override the other boundary if something + // is a clear good pick on one boundary. Skip heuristics that are more + // "tie-breaking" in nature. + bool SameBoundary = Zone != nullptr; + if (SameBoundary) { + // For loops that are acyclic path limited, aggressively schedule for + // latency. Within an single cycle, whenever CurrMOps > 0, allow normal + // heuristics to take precedence. + if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() && + tryLatency(TryCand, Cand, *Zone)) + return; + + // Prioritize instructions that read unbuffered resources by stall cycles. + if (tryLess(Zone->getLatencyStallCycles(TryCand.SU), + Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) + return; + } + + // Keep clustered nodes together to encourage downstream peephole + // optimizations which may reduce resource requirements. + // + // This is a best effort to set things up for a post-RA pass. Optimizations + // like generating loads of multiple registers should ideally be done within + // the scheduler pass by combining the loads during DAG postprocessing. + const SUnit *CandNextClusterSU = + Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); + const SUnit *TryCandNextClusterSU = + TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); + if (tryGreater(TryCand.SU == TryCandNextClusterSU, + Cand.SU == CandNextClusterSU, TryCand, Cand, Cluster)) + return; + + if (SameBoundary) { + // Weak edges are for clustering and other constraints. + if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop), + getWeakLeft(Cand.SU, Cand.AtTop), TryCand, Cand, Weak)) + return; + } + + // Avoid increasing the max pressure of the entire region. + if (DAG->isTrackingPressure() && + tryPressure(TryCand.RPDelta.CurrentMax, Cand.RPDelta.CurrentMax, TryCand, + Cand, RegMax, TRI, DAG->MF)) + return; + + if (SameBoundary) { + // Avoid critical resource consumption and balance the schedule. + TryCand.initResourceDelta(DAG, SchedModel); + if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, + TryCand, Cand, ResourceReduce)) + return; + if (tryGreater(TryCand.ResDelta.DemandedResources, + Cand.ResDelta.DemandedResources, TryCand, Cand, + ResourceDemand)) + return; + + // Avoid serializing long latency dependence chains. + // For acyclic path limited loops, latency was already checked above. + if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency && + !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone)) + return; + + // Fall through to original instruction order. + if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) || + (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) { + TryCand.Reason = NodeOrder; + } + } + + // GenericScheduler::tryCandidate end + // Add powerpc specific heuristic only when TryCand isn't selected or // selected as node order. if (TryCand.Reason != NodeOrder && TryCand.Reason != NoCand) @@ -154,10 +154,10 @@ void PPCPreRASchedStrategy::tryCandidate(SchedCandidate &Cand, // There are some benefits to schedule the ADDI before the load to hide the // latency, as RA may create a true dependency between the load and addi. - if (SameBoundary) { - if (biasAddiLoadCandidate(Cand, TryCand, *Zone)) - return; - } + if (SameBoundary) { + if (biasAddiLoadCandidate(Cand, TryCand, *Zone)) + return; + } } bool PPCPostRASchedStrategy::biasAddiCandidate(SchedCandidate &Cand, @@ -174,44 +174,44 @@ bool PPCPostRASchedStrategy::biasAddiCandidate(SchedCandidate &Cand, void PPCPostRASchedStrategy::tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand) { - // From PostGenericScheduler::tryCandidate + // From PostGenericScheduler::tryCandidate - // Initialize the candidate if needed. - if (!Cand.isValid()) { - TryCand.Reason = NodeOrder; + // Initialize the candidate if needed. + if (!Cand.isValid()) { + TryCand.Reason = NodeOrder; return; - } - - // Prioritize instructions that read unbuffered resources by stall cycles. - if (tryLess(Top.getLatencyStallCycles(TryCand.SU), - Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) - return; - - // Keep clustered nodes together. - if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(), - Cand.SU == DAG->getNextClusterSucc(), TryCand, Cand, Cluster)) - return; - - // Avoid critical resource consumption and balance the schedule. - if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, - TryCand, Cand, ResourceReduce)) - return; - if (tryGreater(TryCand.ResDelta.DemandedResources, - Cand.ResDelta.DemandedResources, TryCand, Cand, - ResourceDemand)) - return; - - // Avoid serializing long latency dependence chains. - if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) { - return; - } - - // Fall through to original instruction order. - if (TryCand.SU->NodeNum < Cand.SU->NodeNum) - TryCand.Reason = NodeOrder; - - // PostGenericScheduler::tryCandidate end - + } + + // Prioritize instructions that read unbuffered resources by stall cycles. + if (tryLess(Top.getLatencyStallCycles(TryCand.SU), + Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) + return; + + // Keep clustered nodes together. + if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(), + Cand.SU == DAG->getNextClusterSucc(), TryCand, Cand, Cluster)) + return; + + // Avoid critical resource consumption and balance the schedule. + if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, + TryCand, Cand, ResourceReduce)) + return; + if (tryGreater(TryCand.ResDelta.DemandedResources, + Cand.ResDelta.DemandedResources, TryCand, Cand, + ResourceDemand)) + return; + + // Avoid serializing long latency dependence chains. + if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) { + return; + } + + // Fall through to original instruction order. + if (TryCand.SU->NodeNum < Cand.SU->NodeNum) + TryCand.Reason = NodeOrder; + + // PostGenericScheduler::tryCandidate end + // Add powerpc post ra specific heuristic only when TryCand isn't selected or // selected as node order. if (TryCand.Reason != NodeOrder && TryCand.Reason != NoCand) diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/PPCMacroFusion.cpp b/contrib/libs/llvm12/lib/Target/PowerPC/PPCMacroFusion.cpp index 5175b0a279..d12c6d9cd4 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/PPCMacroFusion.cpp +++ b/contrib/libs/llvm12/lib/Target/PowerPC/PPCMacroFusion.cpp @@ -51,8 +51,8 @@ public: Kd(Kind), Supported(HasFeature), DepOpIdx(Index), OpSet1(First), OpSet2(Second) {} - bool hasOp1(unsigned Opc) const { return OpSet1.contains(Opc); } - bool hasOp2(unsigned Opc) const { return OpSet2.contains(Opc); } + bool hasOp1(unsigned Opc) const { return OpSet1.contains(Opc); } + bool hasOp2(unsigned Opc) const { return OpSet2.contains(Opc); } bool isSupported() const { return Supported; } Optional<unsigned> depOpIdx() const { if (DepOpIdx < 0) diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/PPCPreEmitPeephole.cpp b/contrib/libs/llvm12/lib/Target/PowerPC/PPCPreEmitPeephole.cpp index 013c928bdb..a8853609a7 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/PPCPreEmitPeephole.cpp +++ b/contrib/libs/llvm12/lib/Target/PowerPC/PPCPreEmitPeephole.cpp @@ -21,7 +21,7 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCContext.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -39,54 +39,54 @@ STATISTIC(NumFrameOffFoldInPreEmit, "Number of folding frame offset by using r+r in pre-emit peephole"); static cl::opt<bool> -EnablePCRelLinkerOpt("ppc-pcrel-linker-opt", cl::Hidden, cl::init(true), - cl::desc("enable PC Relative linker optimization")); - -static cl::opt<bool> +EnablePCRelLinkerOpt("ppc-pcrel-linker-opt", cl::Hidden, cl::init(true), + cl::desc("enable PC Relative linker optimization")); + +static cl::opt<bool> RunPreEmitPeephole("ppc-late-peephole", cl::Hidden, cl::init(true), cl::desc("Run pre-emit peephole optimizations.")); namespace { - -static bool hasPCRelativeForm(MachineInstr &Use) { - switch (Use.getOpcode()) { - default: - return false; - case PPC::LBZ: - case PPC::LBZ8: - case PPC::LHA: - case PPC::LHA8: - case PPC::LHZ: - case PPC::LHZ8: - case PPC::LWZ: - case PPC::LWZ8: - case PPC::STB: - case PPC::STB8: - case PPC::STH: - case PPC::STH8: - case PPC::STW: - case PPC::STW8: - case PPC::LD: - case PPC::STD: - case PPC::LWA: - case PPC::LXSD: - case PPC::LXSSP: - case PPC::LXV: - case PPC::STXSD: - case PPC::STXSSP: - case PPC::STXV: - case PPC::LFD: - case PPC::LFS: - case PPC::STFD: - case PPC::STFS: - case PPC::DFLOADf32: - case PPC::DFLOADf64: - case PPC::DFSTOREf32: - case PPC::DFSTOREf64: - return true; - } -} - + +static bool hasPCRelativeForm(MachineInstr &Use) { + switch (Use.getOpcode()) { + default: + return false; + case PPC::LBZ: + case PPC::LBZ8: + case PPC::LHA: + case PPC::LHA8: + case PPC::LHZ: + case PPC::LHZ8: + case PPC::LWZ: + case PPC::LWZ8: + case PPC::STB: + case PPC::STB8: + case PPC::STH: + case PPC::STH8: + case PPC::STW: + case PPC::STW8: + case PPC::LD: + case PPC::STD: + case PPC::LWA: + case PPC::LXSD: + case PPC::LXSSP: + case PPC::LXV: + case PPC::STXSD: + case PPC::STXSSP: + case PPC::STXV: + case PPC::LFD: + case PPC::LFS: + case PPC::STFD: + case PPC::STFS: + case PPC::DFLOADf32: + case PPC::DFLOADf64: + case PPC::DFSTOREf32: + case PPC::DFSTOREf64: + return true; + } +} + class PPCPreEmitPeephole : public MachineFunctionPass { public: static char ID; @@ -121,7 +121,7 @@ static bool hasPCRelativeForm(MachineInstr &Use) { for (auto BBI = MBB.instr_begin(); BBI != MBB.instr_end(); ++BBI) { // Skip load immediate that is marked to be erased later because it // cannot be used to replace any other instructions. - if (InstrsToErase.contains(&*BBI)) + if (InstrsToErase.contains(&*BBI)) continue; // Skip non-load immediate. unsigned Opc = BBI->getOpcode(); @@ -216,196 +216,196 @@ static bool hasPCRelativeForm(MachineInstr &Use) { return !InstrsToErase.empty(); } - // Check if this instruction is a PLDpc that is part of a GOT indirect - // access. - bool isGOTPLDpc(MachineInstr &Instr) { - if (Instr.getOpcode() != PPC::PLDpc) - return false; - - // The result must be a register. - const MachineOperand &LoadedAddressReg = Instr.getOperand(0); - if (!LoadedAddressReg.isReg()) - return false; - - // Make sure that this is a global symbol. - const MachineOperand &SymbolOp = Instr.getOperand(1); - if (!SymbolOp.isGlobal()) - return false; - - // Finally return true only if the GOT flag is present. - return (SymbolOp.getTargetFlags() & PPCII::MO_GOT_FLAG); - } - - bool addLinkerOpt(MachineBasicBlock &MBB, const TargetRegisterInfo *TRI) { - MachineFunction *MF = MBB.getParent(); - // If the linker opt is disabled then just return. - if (!EnablePCRelLinkerOpt) - return false; - - // Add this linker opt only if we are using PC Relative memops. - if (!MF->getSubtarget<PPCSubtarget>().isUsingPCRelativeCalls()) - return false; - - // Struct to keep track of one def/use pair for a GOT indirect access. - struct GOTDefUsePair { - MachineBasicBlock::iterator DefInst; - MachineBasicBlock::iterator UseInst; - Register DefReg; - Register UseReg; - bool StillValid; - }; - // Vector of def/ues pairs in this basic block. - SmallVector<GOTDefUsePair, 4> CandPairs; - SmallVector<GOTDefUsePair, 4> ValidPairs; - bool MadeChange = false; - - // Run through all of the instructions in the basic block and try to - // collect potential pairs of GOT indirect access instructions. - for (auto BBI = MBB.instr_begin(); BBI != MBB.instr_end(); ++BBI) { - // Look for the initial GOT indirect load. - if (isGOTPLDpc(*BBI)) { - GOTDefUsePair CurrentPair{BBI, MachineBasicBlock::iterator(), - BBI->getOperand(0).getReg(), - PPC::NoRegister, true}; - CandPairs.push_back(CurrentPair); - continue; - } - - // We haven't encountered any new PLD instructions, nothing to check. - if (CandPairs.empty()) - continue; - - // Run through the candidate pairs and see if any of the registers - // defined in the PLD instructions are used by this instruction. - // Note: the size of CandPairs can change in the loop. - for (unsigned Idx = 0; Idx < CandPairs.size(); Idx++) { - GOTDefUsePair &Pair = CandPairs[Idx]; - // The instruction does not use or modify this PLD's def reg, - // ignore it. - if (!BBI->readsRegister(Pair.DefReg, TRI) && - !BBI->modifiesRegister(Pair.DefReg, TRI)) - continue; - - // The use needs to be used in the address compuation and not - // as the register being stored for a store. - const MachineOperand *UseOp = - hasPCRelativeForm(*BBI) ? &BBI->getOperand(2) : nullptr; - - // Check for a valid use. - if (UseOp && UseOp->isReg() && UseOp->getReg() == Pair.DefReg && - UseOp->isUse() && UseOp->isKill()) { - Pair.UseInst = BBI; - Pair.UseReg = BBI->getOperand(0).getReg(); - ValidPairs.push_back(Pair); - } - CandPairs.erase(CandPairs.begin() + Idx); - } - } - - // Go through all of the pairs and check for any more valid uses. - for (auto Pair = ValidPairs.begin(); Pair != ValidPairs.end(); Pair++) { - // We shouldn't be here if we don't have a valid pair. - assert(Pair->UseInst.isValid() && Pair->StillValid && - "Kept an invalid def/use pair for GOT PCRel opt"); - // We have found a potential pair. Search through the instructions - // between the def and the use to see if it is valid to mark this as a - // linker opt. - MachineBasicBlock::iterator BBI = Pair->DefInst; - ++BBI; - for (; BBI != Pair->UseInst; ++BBI) { - if (BBI->readsRegister(Pair->UseReg, TRI) || - BBI->modifiesRegister(Pair->UseReg, TRI)) { - Pair->StillValid = false; - break; - } - } - - if (!Pair->StillValid) - continue; - - // The load/store instruction that uses the address from the PLD will - // either use a register (for a store) or define a register (for the - // load). That register will be added as an implicit def to the PLD - // and as an implicit use on the second memory op. This is a precaution - // to prevent future passes from using that register between the two - // instructions. - MachineOperand ImplDef = - MachineOperand::CreateReg(Pair->UseReg, true, true); - MachineOperand ImplUse = - MachineOperand::CreateReg(Pair->UseReg, false, true); - Pair->DefInst->addOperand(ImplDef); - Pair->UseInst->addOperand(ImplUse); - - // Create the symbol. - MCContext &Context = MF->getContext(); - MCSymbol *Symbol = Context.createNamedTempSymbol("pcrel"); - MachineOperand PCRelLabel = - MachineOperand::CreateMCSymbol(Symbol, PPCII::MO_PCREL_OPT_FLAG); - Pair->DefInst->addOperand(*MF, PCRelLabel); - Pair->UseInst->addOperand(*MF, PCRelLabel); - MadeChange |= true; - } - return MadeChange; - } - - // This function removes redundant pairs of accumulator prime/unprime - // instructions. In some situations, it's possible the compiler inserts an - // accumulator prime instruction followed by an unprime instruction (e.g. - // when we store an accumulator after restoring it from a spill). If the - // accumulator is not used between the two, they can be removed. This - // function removes these redundant pairs from basic blocks. - // The algorithm is quite straightforward - every time we encounter a prime - // instruction, the primed register is added to a candidate set. Any use - // other than a prime removes the candidate from the set and any de-prime - // of a current candidate marks both the prime and de-prime for removal. - // This way we ensure we only remove prime/de-prime *pairs* with no - // intervening uses. - bool removeAccPrimeUnprime(MachineBasicBlock &MBB) { - DenseSet<MachineInstr *> InstrsToErase; - // Initially, none of the acc registers are candidates. - SmallVector<MachineInstr *, 8> Candidates( - PPC::UACCRCRegClass.getNumRegs(), nullptr); - - for (MachineInstr &BBI : MBB.instrs()) { - unsigned Opc = BBI.getOpcode(); - // If we are visiting a xxmtacc instruction, we add it and its operand - // register to the candidate set. - if (Opc == PPC::XXMTACC) { - Register Acc = BBI.getOperand(0).getReg(); - assert(PPC::ACCRCRegClass.contains(Acc) && - "Unexpected register for XXMTACC"); - Candidates[Acc - PPC::ACC0] = &BBI; - } - // If we are visiting a xxmfacc instruction and its operand register is - // in the candidate set, we mark the two instructions for removal. - else if (Opc == PPC::XXMFACC) { - Register Acc = BBI.getOperand(0).getReg(); - assert(PPC::ACCRCRegClass.contains(Acc) && - "Unexpected register for XXMFACC"); - if (!Candidates[Acc - PPC::ACC0]) - continue; - InstrsToErase.insert(&BBI); - InstrsToErase.insert(Candidates[Acc - PPC::ACC0]); - } - // If we are visiting an instruction using an accumulator register - // as operand, we remove it from the candidate set. - else { - for (MachineOperand &Operand : BBI.operands()) { - if (!Operand.isReg()) - continue; - Register Reg = Operand.getReg(); - if (PPC::ACCRCRegClass.contains(Reg)) - Candidates[Reg - PPC::ACC0] = nullptr; - } - } - } - - for (MachineInstr *MI : InstrsToErase) - MI->eraseFromParent(); - NumRemovedInPreEmit += InstrsToErase.size(); - return !InstrsToErase.empty(); - } - + // Check if this instruction is a PLDpc that is part of a GOT indirect + // access. + bool isGOTPLDpc(MachineInstr &Instr) { + if (Instr.getOpcode() != PPC::PLDpc) + return false; + + // The result must be a register. + const MachineOperand &LoadedAddressReg = Instr.getOperand(0); + if (!LoadedAddressReg.isReg()) + return false; + + // Make sure that this is a global symbol. + const MachineOperand &SymbolOp = Instr.getOperand(1); + if (!SymbolOp.isGlobal()) + return false; + + // Finally return true only if the GOT flag is present. + return (SymbolOp.getTargetFlags() & PPCII::MO_GOT_FLAG); + } + + bool addLinkerOpt(MachineBasicBlock &MBB, const TargetRegisterInfo *TRI) { + MachineFunction *MF = MBB.getParent(); + // If the linker opt is disabled then just return. + if (!EnablePCRelLinkerOpt) + return false; + + // Add this linker opt only if we are using PC Relative memops. + if (!MF->getSubtarget<PPCSubtarget>().isUsingPCRelativeCalls()) + return false; + + // Struct to keep track of one def/use pair for a GOT indirect access. + struct GOTDefUsePair { + MachineBasicBlock::iterator DefInst; + MachineBasicBlock::iterator UseInst; + Register DefReg; + Register UseReg; + bool StillValid; + }; + // Vector of def/ues pairs in this basic block. + SmallVector<GOTDefUsePair, 4> CandPairs; + SmallVector<GOTDefUsePair, 4> ValidPairs; + bool MadeChange = false; + + // Run through all of the instructions in the basic block and try to + // collect potential pairs of GOT indirect access instructions. + for (auto BBI = MBB.instr_begin(); BBI != MBB.instr_end(); ++BBI) { + // Look for the initial GOT indirect load. + if (isGOTPLDpc(*BBI)) { + GOTDefUsePair CurrentPair{BBI, MachineBasicBlock::iterator(), + BBI->getOperand(0).getReg(), + PPC::NoRegister, true}; + CandPairs.push_back(CurrentPair); + continue; + } + + // We haven't encountered any new PLD instructions, nothing to check. + if (CandPairs.empty()) + continue; + + // Run through the candidate pairs and see if any of the registers + // defined in the PLD instructions are used by this instruction. + // Note: the size of CandPairs can change in the loop. + for (unsigned Idx = 0; Idx < CandPairs.size(); Idx++) { + GOTDefUsePair &Pair = CandPairs[Idx]; + // The instruction does not use or modify this PLD's def reg, + // ignore it. + if (!BBI->readsRegister(Pair.DefReg, TRI) && + !BBI->modifiesRegister(Pair.DefReg, TRI)) + continue; + + // The use needs to be used in the address compuation and not + // as the register being stored for a store. + const MachineOperand *UseOp = + hasPCRelativeForm(*BBI) ? &BBI->getOperand(2) : nullptr; + + // Check for a valid use. + if (UseOp && UseOp->isReg() && UseOp->getReg() == Pair.DefReg && + UseOp->isUse() && UseOp->isKill()) { + Pair.UseInst = BBI; + Pair.UseReg = BBI->getOperand(0).getReg(); + ValidPairs.push_back(Pair); + } + CandPairs.erase(CandPairs.begin() + Idx); + } + } + + // Go through all of the pairs and check for any more valid uses. + for (auto Pair = ValidPairs.begin(); Pair != ValidPairs.end(); Pair++) { + // We shouldn't be here if we don't have a valid pair. + assert(Pair->UseInst.isValid() && Pair->StillValid && + "Kept an invalid def/use pair for GOT PCRel opt"); + // We have found a potential pair. Search through the instructions + // between the def and the use to see if it is valid to mark this as a + // linker opt. + MachineBasicBlock::iterator BBI = Pair->DefInst; + ++BBI; + for (; BBI != Pair->UseInst; ++BBI) { + if (BBI->readsRegister(Pair->UseReg, TRI) || + BBI->modifiesRegister(Pair->UseReg, TRI)) { + Pair->StillValid = false; + break; + } + } + + if (!Pair->StillValid) + continue; + + // The load/store instruction that uses the address from the PLD will + // either use a register (for a store) or define a register (for the + // load). That register will be added as an implicit def to the PLD + // and as an implicit use on the second memory op. This is a precaution + // to prevent future passes from using that register between the two + // instructions. + MachineOperand ImplDef = + MachineOperand::CreateReg(Pair->UseReg, true, true); + MachineOperand ImplUse = + MachineOperand::CreateReg(Pair->UseReg, false, true); + Pair->DefInst->addOperand(ImplDef); + Pair->UseInst->addOperand(ImplUse); + + // Create the symbol. + MCContext &Context = MF->getContext(); + MCSymbol *Symbol = Context.createNamedTempSymbol("pcrel"); + MachineOperand PCRelLabel = + MachineOperand::CreateMCSymbol(Symbol, PPCII::MO_PCREL_OPT_FLAG); + Pair->DefInst->addOperand(*MF, PCRelLabel); + Pair->UseInst->addOperand(*MF, PCRelLabel); + MadeChange |= true; + } + return MadeChange; + } + + // This function removes redundant pairs of accumulator prime/unprime + // instructions. In some situations, it's possible the compiler inserts an + // accumulator prime instruction followed by an unprime instruction (e.g. + // when we store an accumulator after restoring it from a spill). If the + // accumulator is not used between the two, they can be removed. This + // function removes these redundant pairs from basic blocks. + // The algorithm is quite straightforward - every time we encounter a prime + // instruction, the primed register is added to a candidate set. Any use + // other than a prime removes the candidate from the set and any de-prime + // of a current candidate marks both the prime and de-prime for removal. + // This way we ensure we only remove prime/de-prime *pairs* with no + // intervening uses. + bool removeAccPrimeUnprime(MachineBasicBlock &MBB) { + DenseSet<MachineInstr *> InstrsToErase; + // Initially, none of the acc registers are candidates. + SmallVector<MachineInstr *, 8> Candidates( + PPC::UACCRCRegClass.getNumRegs(), nullptr); + + for (MachineInstr &BBI : MBB.instrs()) { + unsigned Opc = BBI.getOpcode(); + // If we are visiting a xxmtacc instruction, we add it and its operand + // register to the candidate set. + if (Opc == PPC::XXMTACC) { + Register Acc = BBI.getOperand(0).getReg(); + assert(PPC::ACCRCRegClass.contains(Acc) && + "Unexpected register for XXMTACC"); + Candidates[Acc - PPC::ACC0] = &BBI; + } + // If we are visiting a xxmfacc instruction and its operand register is + // in the candidate set, we mark the two instructions for removal. + else if (Opc == PPC::XXMFACC) { + Register Acc = BBI.getOperand(0).getReg(); + assert(PPC::ACCRCRegClass.contains(Acc) && + "Unexpected register for XXMFACC"); + if (!Candidates[Acc - PPC::ACC0]) + continue; + InstrsToErase.insert(&BBI); + InstrsToErase.insert(Candidates[Acc - PPC::ACC0]); + } + // If we are visiting an instruction using an accumulator register + // as operand, we remove it from the candidate set. + else { + for (MachineOperand &Operand : BBI.operands()) { + if (!Operand.isReg()) + continue; + Register Reg = Operand.getReg(); + if (PPC::ACCRCRegClass.contains(Reg)) + Candidates[Reg - PPC::ACC0] = nullptr; + } + } + } + + for (MachineInstr *MI : InstrsToErase) + MI->eraseFromParent(); + NumRemovedInPreEmit += InstrsToErase.size(); + return !InstrsToErase.empty(); + } + bool runOnMachineFunction(MachineFunction &MF) override { if (skipFunction(MF.getFunction()) || !RunPreEmitPeephole) { // Remove UNENCODED_NOP even when this pass is disabled. @@ -426,8 +426,8 @@ static bool hasPCRelativeForm(MachineInstr &Use) { SmallVector<MachineInstr *, 4> InstrsToErase; for (MachineBasicBlock &MBB : MF) { Changed |= removeRedundantLIs(MBB, TRI); - Changed |= addLinkerOpt(MBB, TRI); - Changed |= removeAccPrimeUnprime(MBB); + Changed |= addLinkerOpt(MBB, TRI); + Changed |= removeAccPrimeUnprime(MBB); for (MachineInstr &MI : MBB) { unsigned Opc = MI.getOpcode(); if (Opc == PPC::UNENCODED_NOP) { diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/PPCReduceCRLogicals.cpp b/contrib/libs/llvm12/lib/Target/PowerPC/PPCReduceCRLogicals.cpp index 2f717b3adf..5cee00c61f 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/PPCReduceCRLogicals.cpp +++ b/contrib/libs/llvm12/lib/Target/PowerPC/PPCReduceCRLogicals.cpp @@ -206,9 +206,9 @@ static bool splitMBB(BlockSplitInfo &BSI) { NewMBB->splice(NewMBB->end(), ThisMBB, InsertPoint, ThisMBB->end()); NewMBB->transferSuccessors(ThisMBB); if (!ProbOrigTarget.isUnknown()) { - auto MBBI = find(NewMBB->successors(), OrigTarget); + auto MBBI = find(NewMBB->successors(), OrigTarget); NewMBB->setSuccProbability(MBBI, ProbOrigTarget); - MBBI = find(NewMBB->successors(), OrigFallThrough); + MBBI = find(NewMBB->successors(), OrigFallThrough); NewMBB->setSuccProbability(MBBI, ProbOrigFallThrough); } diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/PPCRegisterInfo.cpp b/contrib/libs/llvm12/lib/Target/PowerPC/PPCRegisterInfo.cpp index fc44a2b490..178a13443e 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/contrib/libs/llvm12/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -75,21 +75,21 @@ MaxCRBitSpillDist("ppc-max-crbit-spill-dist", "spill on ppc"), cl::Hidden, cl::init(100)); -// Copies/moves of physical accumulators are expensive operations -// that should be avoided whenever possible. MMA instructions are -// meant to be used in performance-sensitive computational kernels. -// This option is provided, at least for the time being, to give the -// user a tool to detect this expensive operation and either rework -// their code or report a compiler bug if that turns out to be the -// cause. -#ifndef NDEBUG -static cl::opt<bool> -ReportAccMoves("ppc-report-acc-moves", - cl::desc("Emit information about accumulator register spills " - "and copies"), - cl::Hidden, cl::init(false)); -#endif - +// Copies/moves of physical accumulators are expensive operations +// that should be avoided whenever possible. MMA instructions are +// meant to be used in performance-sensitive computational kernels. +// This option is provided, at least for the time being, to give the +// user a tool to detect this expensive operation and either rework +// their code or report a compiler bug if that turns out to be the +// cause. +#ifndef NDEBUG +static cl::opt<bool> +ReportAccMoves("ppc-report-acc-moves", + cl::desc("Emit information about accumulator register spills " + "and copies"), + cl::Hidden, cl::init(false)); +#endif + static unsigned offsetMinAlignForOpcode(unsigned OpC); PPCRegisterInfo::PPCRegisterInfo(const PPCTargetMachine &TM) @@ -156,10 +156,10 @@ PPCRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind) const MCPhysReg* PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { const PPCSubtarget &Subtarget = MF->getSubtarget<PPCSubtarget>(); - if (Subtarget.isAIXABI() && - (Subtarget.hasAltivec() && !TM.getAIXExtendedAltivecABI())) - report_fatal_error("the default AIX Altivec ABI is not yet " - "supported."); + if (Subtarget.isAIXABI() && + (Subtarget.hasAltivec() && !TM.getAIXExtendedAltivecABI())) + report_fatal_error("the default AIX Altivec ABI is not yet " + "supported."); if (MF->getFunction().getCallingConv() == CallingConv::AnyReg) { if (!TM.isPPC64() && Subtarget.isAIXABI()) report_fatal_error("AnyReg unimplemented on 32-bit AIX."); @@ -206,11 +206,11 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { return SaveR2 ? CSR_PPC64_R2_SaveList : CSR_PPC64_SaveList; } // 32-bit targets. - if (Subtarget.isAIXABI()) { - if (Subtarget.hasAltivec()) - return CSR_AIX32_Altivec_SaveList; + if (Subtarget.isAIXABI()) { + if (Subtarget.hasAltivec()) + return CSR_AIX32_Altivec_SaveList; return CSR_AIX32_SaveList; - } + } if (Subtarget.hasAltivec()) return CSR_SVR432_Altivec_SaveList; else if (Subtarget.hasSPE()) @@ -231,10 +231,10 @@ PPCRegisterInfo::getCallPreservedMask(const MachineFunction &MF, } if (Subtarget.isAIXABI()) { - return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_PPC64_Altivec_RegMask - : CSR_PPC64_RegMask) - : (Subtarget.hasAltivec() ? CSR_AIX32_Altivec_RegMask - : CSR_AIX32_RegMask); + return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_PPC64_Altivec_RegMask + : CSR_PPC64_RegMask) + : (Subtarget.hasAltivec() ? CSR_AIX32_Altivec_RegMask + : CSR_AIX32_RegMask); } if (CC == CallingConv::Cold) { @@ -851,16 +851,16 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II, SpillsKnownBit = true; break; default: - // On Power10, we can use SETNBC to spill all CR bits. SETNBC will set all - // bits (specifically, it produces a -1 if the CR bit is set). Ultimately, - // the bit that is of importance to us is bit 32 (bit 0 of a 32-bit - // register), and SETNBC will set this. - if (Subtarget.isISA3_1()) { - BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::SETNBC8 : PPC::SETNBC), Reg) - .addReg(SrcReg, RegState::Undef); - break; - } - + // On Power10, we can use SETNBC to spill all CR bits. SETNBC will set all + // bits (specifically, it produces a -1 if the CR bit is set). Ultimately, + // the bit that is of importance to us is bit 32 (bit 0 of a 32-bit + // register), and SETNBC will set this. + if (Subtarget.isISA3_1()) { + BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::SETNBC8 : PPC::SETNBC), Reg) + .addReg(SrcReg, RegState::Undef); + break; + } + // On Power9, we can use SETB to extract the LT bit. This only works for // the LT bit since SETB produces -1/1/0 for LT/GT/<neither>. So the value // of the bit we care about (32-bit sign bit) will be set to the value of @@ -960,105 +960,105 @@ void PPCRegisterInfo::lowerCRBitRestore(MachineBasicBlock::iterator II, MBB.erase(II); } -void PPCRegisterInfo::emitAccCopyInfo(MachineBasicBlock &MBB, - MCRegister DestReg, MCRegister SrcReg) { -#ifdef NDEBUG - return; -#else - if (ReportAccMoves) { - std::string Dest = PPC::ACCRCRegClass.contains(DestReg) ? "acc" : "uacc"; - std::string Src = PPC::ACCRCRegClass.contains(SrcReg) ? "acc" : "uacc"; - dbgs() << "Emitting copy from " << Src << " to " << Dest << ":\n"; - MBB.dump(); - } -#endif -} - -static void emitAccSpillRestoreInfo(MachineBasicBlock &MBB, bool IsPrimed, - bool IsRestore) { -#ifdef NDEBUG - return; -#else - if (ReportAccMoves) { - dbgs() << "Emitting " << (IsPrimed ? "acc" : "uacc") << " register " - << (IsRestore ? "restore" : "spill") << ":\n"; - MBB.dump(); - } -#endif -} - -/// lowerACCSpilling - Generate the code for spilling the accumulator register. -/// Similarly to other spills/reloads that use pseudo-ops, we do not actually -/// eliminate the FrameIndex here nor compute the stack offset. We simply -/// create a real instruction with an FI and rely on eliminateFrameIndex to -/// handle the FI elimination. -void PPCRegisterInfo::lowerACCSpilling(MachineBasicBlock::iterator II, - unsigned FrameIndex) const { - MachineInstr &MI = *II; // SPILL_ACC <SrcReg>, <offset> +void PPCRegisterInfo::emitAccCopyInfo(MachineBasicBlock &MBB, + MCRegister DestReg, MCRegister SrcReg) { +#ifdef NDEBUG + return; +#else + if (ReportAccMoves) { + std::string Dest = PPC::ACCRCRegClass.contains(DestReg) ? "acc" : "uacc"; + std::string Src = PPC::ACCRCRegClass.contains(SrcReg) ? "acc" : "uacc"; + dbgs() << "Emitting copy from " << Src << " to " << Dest << ":\n"; + MBB.dump(); + } +#endif +} + +static void emitAccSpillRestoreInfo(MachineBasicBlock &MBB, bool IsPrimed, + bool IsRestore) { +#ifdef NDEBUG + return; +#else + if (ReportAccMoves) { + dbgs() << "Emitting " << (IsPrimed ? "acc" : "uacc") << " register " + << (IsRestore ? "restore" : "spill") << ":\n"; + MBB.dump(); + } +#endif +} + +/// lowerACCSpilling - Generate the code for spilling the accumulator register. +/// Similarly to other spills/reloads that use pseudo-ops, we do not actually +/// eliminate the FrameIndex here nor compute the stack offset. We simply +/// create a real instruction with an FI and rely on eliminateFrameIndex to +/// handle the FI elimination. +void PPCRegisterInfo::lowerACCSpilling(MachineBasicBlock::iterator II, + unsigned FrameIndex) const { + MachineInstr &MI = *II; // SPILL_ACC <SrcReg>, <offset> MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>(); const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); - DebugLoc DL = MI.getDebugLoc(); + DebugLoc DL = MI.getDebugLoc(); Register SrcReg = MI.getOperand(0).getReg(); - bool IsKilled = MI.getOperand(0).isKill(); - - bool IsPrimed = PPC::ACCRCRegClass.contains(SrcReg); - Register Reg = - PPC::VSRp0 + (SrcReg - (IsPrimed ? PPC::ACC0 : PPC::UACC0)) * 2; - bool IsLittleEndian = Subtarget.isLittleEndian(); - - emitAccSpillRestoreInfo(MBB, IsPrimed, false); - - // De-prime the register being spilled, create two stores for the pair - // subregisters accounting for endianness and then re-prime the register if - // it isn't killed. This uses the Offset parameter to addFrameReference() to - // adjust the offset of the store that is within the 64-byte stack slot. - if (IsPrimed) - BuildMI(MBB, II, DL, TII.get(PPC::XXMFACC), SrcReg).addReg(SrcReg); - addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXVP)) - .addReg(Reg, getKillRegState(IsKilled)), - FrameIndex, IsLittleEndian ? 32 : 0); - addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXVP)) - .addReg(Reg + 1, getKillRegState(IsKilled)), - FrameIndex, IsLittleEndian ? 0 : 32); - if (IsPrimed && !IsKilled) - BuildMI(MBB, II, DL, TII.get(PPC::XXMTACC), SrcReg).addReg(SrcReg); - + bool IsKilled = MI.getOperand(0).isKill(); + + bool IsPrimed = PPC::ACCRCRegClass.contains(SrcReg); + Register Reg = + PPC::VSRp0 + (SrcReg - (IsPrimed ? PPC::ACC0 : PPC::UACC0)) * 2; + bool IsLittleEndian = Subtarget.isLittleEndian(); + + emitAccSpillRestoreInfo(MBB, IsPrimed, false); + + // De-prime the register being spilled, create two stores for the pair + // subregisters accounting for endianness and then re-prime the register if + // it isn't killed. This uses the Offset parameter to addFrameReference() to + // adjust the offset of the store that is within the 64-byte stack slot. + if (IsPrimed) + BuildMI(MBB, II, DL, TII.get(PPC::XXMFACC), SrcReg).addReg(SrcReg); + addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXVP)) + .addReg(Reg, getKillRegState(IsKilled)), + FrameIndex, IsLittleEndian ? 32 : 0); + addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXVP)) + .addReg(Reg + 1, getKillRegState(IsKilled)), + FrameIndex, IsLittleEndian ? 0 : 32); + if (IsPrimed && !IsKilled) + BuildMI(MBB, II, DL, TII.get(PPC::XXMTACC), SrcReg).addReg(SrcReg); + // Discard the pseudo instruction. MBB.erase(II); } -/// lowerACCRestore - Generate the code to restore the accumulator register. -void PPCRegisterInfo::lowerACCRestore(MachineBasicBlock::iterator II, - unsigned FrameIndex) const { - MachineInstr &MI = *II; // <DestReg> = RESTORE_ACC <offset> +/// lowerACCRestore - Generate the code to restore the accumulator register. +void PPCRegisterInfo::lowerACCRestore(MachineBasicBlock::iterator II, + unsigned FrameIndex) const { + MachineInstr &MI = *II; // <DestReg> = RESTORE_ACC <offset> MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>(); const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); - DebugLoc DL = MI.getDebugLoc(); + DebugLoc DL = MI.getDebugLoc(); Register DestReg = MI.getOperand(0).getReg(); assert(MI.definesRegister(DestReg) && - "RESTORE_ACC does not define its destination"); - - bool IsPrimed = PPC::ACCRCRegClass.contains(DestReg); - Register Reg = - PPC::VSRp0 + (DestReg - (IsPrimed ? PPC::ACC0 : PPC::UACC0)) * 2; - bool IsLittleEndian = Subtarget.isLittleEndian(); - - emitAccSpillRestoreInfo(MBB, IsPrimed, true); - - // Create two loads for the pair subregisters accounting for endianness and - // then prime the accumulator register being restored. - addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::LXVP), Reg), - FrameIndex, IsLittleEndian ? 32 : 0); - addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::LXVP), Reg + 1), - FrameIndex, IsLittleEndian ? 0 : 32); - if (IsPrimed) - BuildMI(MBB, II, DL, TII.get(PPC::XXMTACC), DestReg).addReg(DestReg); - + "RESTORE_ACC does not define its destination"); + + bool IsPrimed = PPC::ACCRCRegClass.contains(DestReg); + Register Reg = + PPC::VSRp0 + (DestReg - (IsPrimed ? PPC::ACC0 : PPC::UACC0)) * 2; + bool IsLittleEndian = Subtarget.isLittleEndian(); + + emitAccSpillRestoreInfo(MBB, IsPrimed, true); + + // Create two loads for the pair subregisters accounting for endianness and + // then prime the accumulator register being restored. + addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::LXVP), Reg), + FrameIndex, IsLittleEndian ? 32 : 0); + addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::LXVP), Reg + 1), + FrameIndex, IsLittleEndian ? 0 : 32); + if (IsPrimed) + BuildMI(MBB, II, DL, TII.get(PPC::XXMTACC), DestReg).addReg(DestReg); + // Discard the pseudo instruction. MBB.erase(II); } @@ -1194,11 +1194,11 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } else if (OpC == PPC::RESTORE_CRBIT) { lowerCRBitRestore(II, FrameIndex); return; - } else if (OpC == PPC::SPILL_ACC || OpC == PPC::SPILL_UACC) { - lowerACCSpilling(II, FrameIndex); + } else if (OpC == PPC::SPILL_ACC || OpC == PPC::SPILL_UACC) { + lowerACCSpilling(II, FrameIndex); return; - } else if (OpC == PPC::RESTORE_ACC || OpC == PPC::RESTORE_UACC) { - lowerACCRestore(II, FrameIndex); + } else if (OpC == PPC::RESTORE_ACC || OpC == PPC::RESTORE_UACC) { + lowerACCRestore(II, FrameIndex); return; } @@ -1375,9 +1375,9 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const { /// Insert defining instruction(s) for BaseReg to /// be a pointer to FrameIdx at the beginning of the basic block. -Register PPCRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB, - int FrameIdx, - int64_t Offset) const { +Register PPCRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB, + int FrameIdx, + int64_t Offset) const { unsigned ADDriOpc = TM.isPPC64() ? PPC::ADDI8 : PPC::ADDI; MachineBasicBlock::iterator Ins = MBB->begin(); @@ -1390,14 +1390,14 @@ Register PPCRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB, const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); const MCInstrDesc &MCID = TII.get(ADDriOpc); MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); - const TargetRegisterClass *RC = getPointerRegClass(MF); - Register BaseReg = MRI.createVirtualRegister(RC); + const TargetRegisterClass *RC = getPointerRegClass(MF); + Register BaseReg = MRI.createVirtualRegister(RC); MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this, MF)); BuildMI(*MBB, Ins, DL, MCID, BaseReg) .addFrameIndex(FrameIdx).addImm(Offset); - - return BaseReg; + + return BaseReg; } void PPCRegisterInfo::resolveFrameIndex(MachineInstr &MI, Register BaseReg, diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/PPCRegisterInfo.h b/contrib/libs/llvm12/lib/Target/PowerPC/PPCRegisterInfo.h index e1785c512a..93f330ab56 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/contrib/libs/llvm12/lib/Target/PowerPC/PPCRegisterInfo.h @@ -120,14 +120,14 @@ public: void lowerCRBitRestore(MachineBasicBlock::iterator II, unsigned FrameIndex) const; - void lowerACCSpilling(MachineBasicBlock::iterator II, - unsigned FrameIndex) const; - void lowerACCRestore(MachineBasicBlock::iterator II, - unsigned FrameIndex) const; - - static void emitAccCopyInfo(MachineBasicBlock &MBB, MCRegister DestReg, - MCRegister SrcReg); - + void lowerACCSpilling(MachineBasicBlock::iterator II, + unsigned FrameIndex) const; + void lowerACCRestore(MachineBasicBlock::iterator II, + unsigned FrameIndex) const; + + static void emitAccCopyInfo(MachineBasicBlock &MBB, MCRegister DestReg, + MCRegister SrcReg); + bool hasReservedSpillSlot(const MachineFunction &MF, Register Reg, int &FrameIdx) const override; void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, @@ -136,8 +136,8 @@ public: // Support for virtual base registers. bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override; - Register materializeFrameBaseRegister(MachineBasicBlock *MBB, int FrameIdx, - int64_t Offset) const override; + Register materializeFrameBaseRegister(MachineBasicBlock *MBB, int FrameIdx, + int64_t Offset) const override; void resolveFrameIndex(MachineInstr &MI, Register BaseReg, int64_t Offset) const override; bool isFrameOffsetLegal(const MachineInstr *MI, Register BaseReg, @@ -154,18 +154,18 @@ public: /// register name so that only the number is left. Used by for linux asm. static const char *stripRegisterPrefix(const char *RegName) { switch (RegName[0]) { - case 'a': - if (RegName[1] == 'c' && RegName[2] == 'c') - return RegName + 3; - break; + case 'a': + if (RegName[1] == 'c' && RegName[2] == 'c') + return RegName + 3; + break; case 'r': case 'f': case 'v': - if (RegName[1] == 's') { - if (RegName[2] == 'p') - return RegName + 3; + if (RegName[1] == 's') { + if (RegName[2] == 'p') + return RegName + 3; return RegName + 2; - } + } return RegName + 1; case 'c': if (RegName[1] == 'r') return RegName + 2; } diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/PPCRegisterInfo.td b/contrib/libs/llvm12/lib/Target/PowerPC/PPCRegisterInfo.td index 4a01821b19..551735c85b 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/contrib/libs/llvm12/lib/Target/PowerPC/PPCRegisterInfo.td @@ -16,10 +16,10 @@ def sub_eq : SubRegIndex<1, 2>; def sub_un : SubRegIndex<1, 3>; def sub_32 : SubRegIndex<32>; def sub_64 : SubRegIndex<64>; -def sub_vsx0 : SubRegIndex<128>; -def sub_vsx1 : SubRegIndex<128, 128>; -def sub_pair0 : SubRegIndex<256>; -def sub_pair1 : SubRegIndex<256, 256>; +def sub_vsx0 : SubRegIndex<128>; +def sub_vsx1 : SubRegIndex<128, 128>; +def sub_pair0 : SubRegIndex<256>; +def sub_pair1 : SubRegIndex<256, 256>; } @@ -98,27 +98,27 @@ class CRBIT<bits<5> num, string n> : PPCReg<n> { let HWEncoding{4-0} = num; } -// ACC - One of the 8 512-bit VSX accumulators. -class ACC<bits<3> num, string n, list<Register> subregs> : PPCReg<n> { - let HWEncoding{2-0} = num; - let SubRegs = subregs; -} - -// UACC - One of the 8 512-bit VSX accumulators prior to being primed. -// Without using this register class, the register allocator has no way to -// differentiate a primed accumulator from an unprimed accumulator. -// This may result in invalid copies between primed and unprimed accumulators. -class UACC<bits<3> num, string n, list<Register> subregs> : PPCReg<n> { - let HWEncoding{2-0} = num; - let SubRegs = subregs; -} - -// VSR Pairs - One of the 32 paired even-odd consecutive VSRs. -class VSRPair<bits<5> num, string n, list<Register> subregs> : PPCReg<n> { - let HWEncoding{4-0} = num; - let SubRegs = subregs; -} - +// ACC - One of the 8 512-bit VSX accumulators. +class ACC<bits<3> num, string n, list<Register> subregs> : PPCReg<n> { + let HWEncoding{2-0} = num; + let SubRegs = subregs; +} + +// UACC - One of the 8 512-bit VSX accumulators prior to being primed. +// Without using this register class, the register allocator has no way to +// differentiate a primed accumulator from an unprimed accumulator. +// This may result in invalid copies between primed and unprimed accumulators. +class UACC<bits<3> num, string n, list<Register> subregs> : PPCReg<n> { + let HWEncoding{2-0} = num; + let SubRegs = subregs; +} + +// VSR Pairs - One of the 32 paired even-odd consecutive VSRs. +class VSRPair<bits<5> num, string n, list<Register> subregs> : PPCReg<n> { + let HWEncoding{4-0} = num; + let SubRegs = subregs; +} + // General-purpose registers foreach Index = 0-31 in { def R#Index : GPR<Index, "r"#Index>, DwarfRegNum<[-2, Index]>; @@ -168,23 +168,23 @@ foreach Index = 32-63 in { def VSX#Index : VSXReg<Index, "vs"#Index>; } -let SubRegIndices = [sub_vsx0, sub_vsx1] in { - // VSR pairs 0 - 15 (corresponding to VSRs 0 - 30 paired with 1 - 31). - foreach Index = { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 } in { - def VSRp#!srl(Index, 1) : VSRPair<!srl(Index, 1), "vsp"#Index, - [!cast<VSRL>("VSL"#Index), !cast<VSRL>("VSL"#!add(Index, 1))]>, - DwarfRegNum<[-1, -1]>; - } - - // VSR pairs 16 - 31 (corresponding to VSRs 32 - 62 paired with 33 - 63). - foreach Index = { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 } in { - def VSRp#!add(!srl(Index, 1), 16) : - VSRPair<!add(!srl(Index, 1), 16), "vsp"#!add(Index, 32), - [!cast<VR>("V"#Index), !cast<VR>("V"#!add(Index, 1))]>, - DwarfRegNum<[-1, -1]>; - } -} - +let SubRegIndices = [sub_vsx0, sub_vsx1] in { + // VSR pairs 0 - 15 (corresponding to VSRs 0 - 30 paired with 1 - 31). + foreach Index = { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 } in { + def VSRp#!srl(Index, 1) : VSRPair<!srl(Index, 1), "vsp"#Index, + [!cast<VSRL>("VSL"#Index), !cast<VSRL>("VSL"#!add(Index, 1))]>, + DwarfRegNum<[-1, -1]>; + } + + // VSR pairs 16 - 31 (corresponding to VSRs 32 - 62 paired with 33 - 63). + foreach Index = { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 } in { + def VSRp#!add(!srl(Index, 1), 16) : + VSRPair<!add(!srl(Index, 1), 16), "vsp"#!add(Index, 32), + [!cast<VR>("V"#Index), !cast<VR>("V"#!add(Index, 1))]>, + DwarfRegNum<[-1, -1]>; + } +} + // The representation of r0 when treated as the constant 0. def ZERO : GPR<0, "0">, DwarfRegAlias<R0>; def ZERO8 : GP8<ZERO, "0">, DwarfRegAlias<X0>; @@ -409,56 +409,56 @@ def CTRRC8 : RegisterClass<"PPC", [i64], 64, (add CTR8)> { let isAllocatable = 0; } -def LRRC : RegisterClass<"PPC", [i32], 32, (add LR)> { - let isAllocatable = 0; -} -def LR8RC : RegisterClass<"PPC", [i64], 64, (add LR8)> { - let isAllocatable = 0; -} - +def LRRC : RegisterClass<"PPC", [i32], 32, (add LR)> { + let isAllocatable = 0; +} +def LR8RC : RegisterClass<"PPC", [i64], 64, (add LR8)> { + let isAllocatable = 0; +} + def VRSAVERC : RegisterClass<"PPC", [i32], 32, (add VRSAVE)>; def CARRYRC : RegisterClass<"PPC", [i32], 32, (add CARRY, XER)> { let CopyCost = -1; } -let SubRegIndices = [sub_pair0, sub_pair1] in { - def ACC0 : ACC<0, "acc0", [VSRp0, VSRp1]>, DwarfRegNum<[-1, -1]>; - def ACC1 : ACC<1, "acc1", [VSRp2, VSRp3]>, DwarfRegNum<[-1, -1]>; - def ACC2 : ACC<2, "acc2", [VSRp4, VSRp5]>, DwarfRegNum<[-1, -1]>; - def ACC3 : ACC<3, "acc3", [VSRp6, VSRp7]>, DwarfRegNum<[-1, -1]>; - def ACC4 : ACC<4, "acc4", [VSRp8, VSRp9]>, DwarfRegNum<[-1, -1]>; - def ACC5 : ACC<5, "acc5", [VSRp10, VSRp11]>, DwarfRegNum<[-1, -1]>; - def ACC6 : ACC<6, "acc6", [VSRp12, VSRp13]>, DwarfRegNum<[-1, -1]>; - def ACC7 : ACC<7, "acc7", [VSRp14, VSRp15]>, DwarfRegNum<[-1, -1]>; -} -def ACCRC : RegisterClass<"PPC", [v512i1], 128, (add ACC0, ACC1, ACC2, ACC3, - ACC4, ACC5, ACC6, ACC7)> { - let Size = 512; -} - -let SubRegIndices = [sub_pair0, sub_pair1] in { - def UACC0 : UACC<0, "acc0", [VSRp0, VSRp1]>, DwarfRegNum<[-1, -1]>; - def UACC1 : UACC<1, "acc1", [VSRp2, VSRp3]>, DwarfRegNum<[-1, -1]>; - def UACC2 : UACC<2, "acc2", [VSRp4, VSRp5]>, DwarfRegNum<[-1, -1]>; - def UACC3 : UACC<3, "acc3", [VSRp6, VSRp7]>, DwarfRegNum<[-1, -1]>; - def UACC4 : UACC<4, "acc4", [VSRp8, VSRp9]>, DwarfRegNum<[-1, -1]>; - def UACC5 : UACC<5, "acc5", [VSRp10, VSRp11]>, DwarfRegNum<[-1, -1]>; - def UACC6 : UACC<6, "acc6", [VSRp12, VSRp13]>, DwarfRegNum<[-1, -1]>; - def UACC7 : UACC<7, "acc7", [VSRp14, VSRp15]>, DwarfRegNum<[-1, -1]>; -} -def UACCRC : RegisterClass<"PPC", [v512i1], 128, - (add UACC0, UACC1, UACC2, UACC3, - UACC4, UACC5, UACC6, UACC7)> { - let Size = 512; -} - -// Allocate in the same order as the underlying VSX registers. -def VSRpRC : - RegisterClass<"PPC", [v256i1], 128, - (add (sequence "VSRp%u", 0, 6), - (sequence "VSRp%u", 15, 7), VSRp17, VSRp18, - VSRp16, VSRp19, VSRp20, VSRp21, VSRp22, VSRp23, - VSRp24, VSRp25, VSRp31, VSRp30, VSRp29, VSRp28, - VSRp27, VSRp26)> { - let Size = 256; -} +let SubRegIndices = [sub_pair0, sub_pair1] in { + def ACC0 : ACC<0, "acc0", [VSRp0, VSRp1]>, DwarfRegNum<[-1, -1]>; + def ACC1 : ACC<1, "acc1", [VSRp2, VSRp3]>, DwarfRegNum<[-1, -1]>; + def ACC2 : ACC<2, "acc2", [VSRp4, VSRp5]>, DwarfRegNum<[-1, -1]>; + def ACC3 : ACC<3, "acc3", [VSRp6, VSRp7]>, DwarfRegNum<[-1, -1]>; + def ACC4 : ACC<4, "acc4", [VSRp8, VSRp9]>, DwarfRegNum<[-1, -1]>; + def ACC5 : ACC<5, "acc5", [VSRp10, VSRp11]>, DwarfRegNum<[-1, -1]>; + def ACC6 : ACC<6, "acc6", [VSRp12, VSRp13]>, DwarfRegNum<[-1, -1]>; + def ACC7 : ACC<7, "acc7", [VSRp14, VSRp15]>, DwarfRegNum<[-1, -1]>; +} +def ACCRC : RegisterClass<"PPC", [v512i1], 128, (add ACC0, ACC1, ACC2, ACC3, + ACC4, ACC5, ACC6, ACC7)> { + let Size = 512; +} + +let SubRegIndices = [sub_pair0, sub_pair1] in { + def UACC0 : UACC<0, "acc0", [VSRp0, VSRp1]>, DwarfRegNum<[-1, -1]>; + def UACC1 : UACC<1, "acc1", [VSRp2, VSRp3]>, DwarfRegNum<[-1, -1]>; + def UACC2 : UACC<2, "acc2", [VSRp4, VSRp5]>, DwarfRegNum<[-1, -1]>; + def UACC3 : UACC<3, "acc3", [VSRp6, VSRp7]>, DwarfRegNum<[-1, -1]>; + def UACC4 : UACC<4, "acc4", [VSRp8, VSRp9]>, DwarfRegNum<[-1, -1]>; + def UACC5 : UACC<5, "acc5", [VSRp10, VSRp11]>, DwarfRegNum<[-1, -1]>; + def UACC6 : UACC<6, "acc6", [VSRp12, VSRp13]>, DwarfRegNum<[-1, -1]>; + def UACC7 : UACC<7, "acc7", [VSRp14, VSRp15]>, DwarfRegNum<[-1, -1]>; +} +def UACCRC : RegisterClass<"PPC", [v512i1], 128, + (add UACC0, UACC1, UACC2, UACC3, + UACC4, UACC5, UACC6, UACC7)> { + let Size = 512; +} + +// Allocate in the same order as the underlying VSX registers. +def VSRpRC : + RegisterClass<"PPC", [v256i1], 128, + (add (sequence "VSRp%u", 0, 6), + (sequence "VSRp%u", 15, 7), VSRp17, VSRp18, + VSRp16, VSRp19, VSRp20, VSRp21, VSRp22, VSRp23, + VSRp24, VSRp25, VSRp31, VSRp30, VSRp29, VSRp28, + VSRp27, VSRp26)> { + let Size = 256; +} diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/PPCScheduleP9.td b/contrib/libs/llvm12/lib/Target/PowerPC/PPCScheduleP9.td index 598d6e4c3b..571cc219ff 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/PPCScheduleP9.td +++ b/contrib/libs/llvm12/lib/Target/PowerPC/PPCScheduleP9.td @@ -40,11 +40,11 @@ def P9Model : SchedMachineModel { let CompleteModel = 1; - // Do not support SPE (Signal Processing Engine), prefixed instructions on - // Power 9, paired vector mem ops, MMA, PC relative mem ops, or instructions - // introduced in ISA 3.1. - let UnsupportedFeatures = [HasSPE, PrefixInstrs, PairedVectorMemops, MMA, - PCRelativeMemops, IsISA3_1]; + // Do not support SPE (Signal Processing Engine), prefixed instructions on + // Power 9, paired vector mem ops, MMA, PC relative mem ops, or instructions + // introduced in ISA 3.1. + let UnsupportedFeatures = [HasSPE, PrefixInstrs, PairedVectorMemops, MMA, + PCRelativeMemops, IsISA3_1]; } let SchedModel = P9Model in { diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/PPCSubtarget.cpp b/contrib/libs/llvm12/lib/Target/PowerPC/PPCSubtarget.cpp index 1ec28066dc..d31195f67e 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/contrib/libs/llvm12/lib/Target/PowerPC/PPCSubtarget.cpp @@ -11,13 +11,13 @@ //===----------------------------------------------------------------------===// #include "PPCSubtarget.h" -#include "GISel/PPCCallLowering.h" -#include "GISel/PPCLegalizerInfo.h" -#include "GISel/PPCRegisterBankInfo.h" +#include "GISel/PPCCallLowering.h" +#include "GISel/PPCLegalizerInfo.h" +#include "GISel/PPCRegisterBankInfo.h" #include "PPC.h" #include "PPCRegisterInfo.h" #include "PPCTargetMachine.h" -#include "llvm/CodeGen/GlobalISel/InstructionSelect.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelect.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineScheduler.h" #include "llvm/IR/Attributes.h" @@ -53,20 +53,20 @@ PPCSubtarget &PPCSubtarget::initializeSubtargetDependencies(StringRef CPU, PPCSubtarget::PPCSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS, const PPCTargetMachine &TM) - : PPCGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), TargetTriple(TT), + : PPCGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), TargetTriple(TT), IsPPC64(TargetTriple.getArch() == Triple::ppc64 || TargetTriple.getArch() == Triple::ppc64le), TM(TM), FrameLowering(initializeSubtargetDependencies(CPU, FS)), - InstrInfo(*this), TLInfo(TM, *this) { - CallLoweringInfo.reset(new PPCCallLowering(*getTargetLowering())); - Legalizer.reset(new PPCLegalizerInfo(*this)); - auto *RBI = new PPCRegisterBankInfo(*getRegisterInfo()); - RegBankInfo.reset(RBI); - - InstSelector.reset(createPPCInstructionSelector( - *static_cast<const PPCTargetMachine *>(&TM), *this, *RBI)); -} - + InstrInfo(*this), TLInfo(TM, *this) { + CallLoweringInfo.reset(new PPCCallLowering(*getTargetLowering())); + Legalizer.reset(new PPCLegalizerInfo(*this)); + auto *RBI = new PPCRegisterBankInfo(*getRegisterInfo()); + RegBankInfo.reset(RBI); + + InstSelector.reset(createPPCInstructionSelector( + *static_cast<const PPCTargetMachine *>(&TM), *this, *RBI)); +} + void PPCSubtarget::initializeEnvironment() { StackAlignment = Align(16); CPUDirective = PPC::DIR_NONE; @@ -77,7 +77,7 @@ void PPCSubtarget::initializeEnvironment() { HasHardFloat = false; HasAltivec = false; HasSPE = false; - HasEFPU2 = false; + HasEFPU2 = false; HasFPU = false; HasVSX = false; NeedsTwoConstNR = false; @@ -86,7 +86,7 @@ void PPCSubtarget::initializeEnvironment() { HasP8Crypto = false; HasP9Vector = false; HasP9Altivec = false; - HasMMA = false; + HasMMA = false; HasP10Vector = false; HasPrefixInstrs = false; HasPCRelativeMemops = false; @@ -121,7 +121,7 @@ void PPCSubtarget::initializeEnvironment() { HasHTM = false; HasFloat128 = false; HasFusion = false; - HasStoreFusion = false; + HasStoreFusion = false; HasAddiLoadFusion = false; HasAddisLoadFusion = false; IsISA3_0 = false; @@ -131,10 +131,10 @@ void PPCSubtarget::initializeEnvironment() { VectorsUseTwoUnits = false; UsePPCPreRASchedStrategy = false; UsePPCPostRASchedStrategy = false; - PairedVectorMemops = false; + PairedVectorMemops = false; PredictableSelectIsExpensive = false; - HasModernAIXAs = false; - IsAIX = false; + HasModernAIXAs = false; + IsAIX = false; HasPOPCNTD = POPCNTD_Unavailable; } @@ -156,7 +156,7 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { InstrItins = getInstrItineraryForCPU(CPUName); // Parse features string. - ParseSubtargetFeatures(CPUName, /*TuneCPU*/ CPUName, FS); + ParseSubtargetFeatures(CPUName, /*TuneCPU*/ CPUName, FS); // If the user requested use of 64-bit regs, but the cpu selected doesn't // support it, ignore. @@ -170,7 +170,7 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { if (HasSPE && IsPPC64) report_fatal_error( "SPE is only supported for 32-bit targets.\n", false); - if (HasSPE && (HasAltivec || HasVSX || HasFPU)) + if (HasSPE && (HasAltivec || HasVSX || HasFPU)) report_fatal_error( "SPE and traditional floating point cannot both be enabled.\n", false); @@ -182,8 +182,8 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { // Determine endianness. // FIXME: Part of the TargetMachine. - IsLittleEndian = (TargetTriple.getArch() == Triple::ppc64le || - TargetTriple.getArch() == Triple::ppcle); + IsLittleEndian = (TargetTriple.getArch() == Triple::ppc64le || + TargetTriple.getArch() == Triple::ppcle); } bool PPCSubtarget::enableMachineScheduler() const { return true; } @@ -244,20 +244,20 @@ bool PPCSubtarget::isUsingPCRelativeCalls() const { return isPPC64() && hasPCRelativeMemops() && isELFv2ABI() && CodeModel::Medium == getTargetMachine().getCodeModel(); } - -// GlobalISEL -const CallLowering *PPCSubtarget::getCallLowering() const { - return CallLoweringInfo.get(); -} - -const RegisterBankInfo *PPCSubtarget::getRegBankInfo() const { - return RegBankInfo.get(); -} - -const LegalizerInfo *PPCSubtarget::getLegalizerInfo() const { - return Legalizer.get(); -} - -InstructionSelector *PPCSubtarget::getInstructionSelector() const { - return InstSelector.get(); -} + +// GlobalISEL +const CallLowering *PPCSubtarget::getCallLowering() const { + return CallLoweringInfo.get(); +} + +const RegisterBankInfo *PPCSubtarget::getRegBankInfo() const { + return RegBankInfo.get(); +} + +const LegalizerInfo *PPCSubtarget::getLegalizerInfo() const { + return Legalizer.get(); +} + +InstructionSelector *PPCSubtarget::getInstructionSelector() const { + return InstSelector.get(); +} diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/PPCSubtarget.h b/contrib/libs/llvm12/lib/Target/PowerPC/PPCSubtarget.h index 17de081c39..50d89390d5 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/PPCSubtarget.h +++ b/contrib/libs/llvm12/lib/Target/PowerPC/PPCSubtarget.h @@ -17,9 +17,9 @@ #include "PPCISelLowering.h" #include "PPCInstrInfo.h" #include "llvm/ADT/Triple.h" -#include "llvm/CodeGen/GlobalISel/CallLowering.h" -#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" -#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" +#include "llvm/CodeGen/GlobalISel/CallLowering.h" +#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" +#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DataLayout.h" @@ -100,7 +100,7 @@ protected: bool HasAltivec; bool HasFPU; bool HasSPE; - bool HasEFPU2; + bool HasEFPU2; bool HasVSX; bool NeedsTwoConstNR; bool HasP8Vector; @@ -111,7 +111,7 @@ protected: bool HasP10Vector; bool HasPrefixInstrs; bool HasPCRelativeMemops; - bool HasMMA; + bool HasMMA; bool HasFCPSGN; bool HasFSQRT; bool HasFRE, HasFRES, HasFRSQRTE, HasFRSQRTES; @@ -141,7 +141,7 @@ protected: bool HasHTM; bool HasFloat128; bool HasFusion; - bool HasStoreFusion; + bool HasStoreFusion; bool HasAddiLoadFusion; bool HasAddisLoadFusion; bool IsISA3_0; @@ -151,10 +151,10 @@ protected: bool VectorsUseTwoUnits; bool UsePPCPreRASchedStrategy; bool UsePPCPostRASchedStrategy; - bool PairedVectorMemops; + bool PairedVectorMemops; bool PredictableSelectIsExpensive; - bool HasModernAIXAs; - bool IsAIX; + bool HasModernAIXAs; + bool IsAIX; POPCNTDKind HasPOPCNTD; @@ -164,12 +164,12 @@ protected: PPCTargetLowering TLInfo; SelectionDAGTargetInfo TSInfo; - /// GlobalISel related APIs. - std::unique_ptr<CallLowering> CallLoweringInfo; - std::unique_ptr<LegalizerInfo> Legalizer; - std::unique_ptr<RegisterBankInfo> RegBankInfo; - std::unique_ptr<InstructionSelector> InstSelector; - + /// GlobalISel related APIs. + std::unique_ptr<CallLowering> CallLoweringInfo; + std::unique_ptr<LegalizerInfo> Legalizer; + std::unique_ptr<RegisterBankInfo> RegBankInfo; + std::unique_ptr<InstructionSelector> InstSelector; + public: /// This constructor initializes the data members to match that /// of the specified triple. @@ -179,7 +179,7 @@ public: /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. - void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS); + void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS); /// getStackAlignment - Returns the minimum alignment known to hold of the /// stack frame on entry to the function and which must be maintained by every @@ -260,7 +260,7 @@ public: bool hasFPCVT() const { return HasFPCVT; } bool hasAltivec() const { return HasAltivec; } bool hasSPE() const { return HasSPE; } - bool hasEFPU2() const { return HasEFPU2; } + bool hasEFPU2() const { return HasEFPU2; } bool hasFPU() const { return HasFPU; } bool hasVSX() const { return HasVSX; } bool needsTwoConstNR() const { return NeedsTwoConstNR; } @@ -272,8 +272,8 @@ public: bool hasP10Vector() const { return HasP10Vector; } bool hasPrefixInstrs() const { return HasPrefixInstrs; } bool hasPCRelativeMemops() const { return HasPCRelativeMemops; } - bool hasMMA() const { return HasMMA; } - bool pairedVectorMemops() const { return PairedVectorMemops; } + bool hasMMA() const { return HasMMA; } + bool pairedVectorMemops() const { return PairedVectorMemops; } bool hasMFOCRF() const { return HasMFOCRF; } bool hasISEL() const { return HasISEL; } bool hasBPERMD() const { return HasBPERMD; } @@ -319,7 +319,7 @@ public: bool isISA3_1() const { return IsISA3_1; } bool useLongCalls() const { return UseLongCalls; } bool hasFusion() const { return HasFusion; } - bool hasStoreFusion() const { return HasStoreFusion; } + bool hasStoreFusion() const { return HasStoreFusion; } bool hasAddiLoadFusion() const { return HasAddiLoadFusion; } bool hasAddisLoadFusion() const { return HasAddisLoadFusion; } bool needsSwapsForVSXMemOps() const { @@ -406,12 +406,12 @@ public: bool isPredictableSelectIsExpensive() const { return PredictableSelectIsExpensive; } - - // GlobalISEL - const CallLowering *getCallLowering() const override; - const RegisterBankInfo *getRegBankInfo() const override; - const LegalizerInfo *getLegalizerInfo() const override; - InstructionSelector *getInstructionSelector() const override; + + // GlobalISEL + const CallLowering *getCallLowering() const override; + const RegisterBankInfo *getRegBankInfo() const override; + const LegalizerInfo *getLegalizerInfo() const override; + InstructionSelector *getInstructionSelector() const override; }; } // End llvm namespace diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/contrib/libs/llvm12/lib/Target/PowerPC/PPCTLSDynamicCall.cpp index ad88a87529..43dcc5844c 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/PPCTLSDynamicCall.cpp +++ b/contrib/libs/llvm12/lib/Target/PowerPC/PPCTLSDynamicCall.cpp @@ -50,17 +50,17 @@ protected: bool Changed = false; bool NeedFence = true; bool Is64Bit = MBB.getParent()->getSubtarget<PPCSubtarget>().isPPC64(); - bool IsPCREL = false; + bool IsPCREL = false; for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end(); I != IE;) { MachineInstr &MI = *I; - IsPCREL = isPCREL(MI); + IsPCREL = isPCREL(MI); if (MI.getOpcode() != PPC::ADDItlsgdLADDR && MI.getOpcode() != PPC::ADDItlsldLADDR && MI.getOpcode() != PPC::ADDItlsgdLADDR32 && - MI.getOpcode() != PPC::ADDItlsldLADDR32 && !IsPCREL) { + MI.getOpcode() != PPC::ADDItlsldLADDR32 && !IsPCREL) { // Although we create ADJCALLSTACKDOWN and ADJCALLSTACKUP // as scheduling fences, we skip creating fences if we already // have existing ADJCALLSTACKDOWN/UP to avoid nesting, @@ -77,15 +77,15 @@ protected: LLVM_DEBUG(dbgs() << "TLS Dynamic Call Fixup:\n " << MI); Register OutReg = MI.getOperand(0).getReg(); - Register InReg = PPC::NoRegister; - Register GPR3 = Is64Bit ? PPC::X3 : PPC::R3; - SmallVector<Register, 3> OrigRegs = {OutReg, GPR3}; - if (!IsPCREL) { - InReg = MI.getOperand(1).getReg(); - OrigRegs.push_back(InReg); - } + Register InReg = PPC::NoRegister; + Register GPR3 = Is64Bit ? PPC::X3 : PPC::R3; + SmallVector<Register, 3> OrigRegs = {OutReg, GPR3}; + if (!IsPCREL) { + InReg = MI.getOperand(1).getReg(); + OrigRegs.push_back(InReg); + } DebugLoc DL = MI.getDebugLoc(); - + unsigned Opc1, Opc2; switch (MI.getOpcode()) { default: @@ -106,13 +106,13 @@ protected: Opc1 = PPC::ADDItlsldL32; Opc2 = PPC::GETtlsldADDR32; break; - case PPC::PADDI8pc: - assert(IsPCREL && "Expecting General/Local Dynamic PCRel"); - Opc1 = PPC::PADDI8pc; - Opc2 = MI.getOperand(2).getTargetFlags() == - PPCII::MO_GOT_TLSGD_PCREL_FLAG - ? PPC::GETtlsADDRPCREL - : PPC::GETtlsldADDRPCREL; + case PPC::PADDI8pc: + assert(IsPCREL && "Expecting General/Local Dynamic PCRel"); + Opc1 = PPC::PADDI8pc; + Opc2 = MI.getOperand(2).getTargetFlags() == + PPCII::MO_GOT_TLSGD_PCREL_FLAG + ? PPC::GETtlsADDRPCREL + : PPC::GETtlsldADDRPCREL; } // We create ADJCALLSTACKUP and ADJCALLSTACKDOWN around _tls_get_addr @@ -125,15 +125,15 @@ protected: BuildMI(MBB, I, DL, TII->get(PPC::ADJCALLSTACKDOWN)).addImm(0) .addImm(0); - MachineInstr *Addi; - if (IsPCREL) { - Addi = BuildMI(MBB, I, DL, TII->get(Opc1), GPR3).addImm(0); - } else { - // Expand into two ops built prior to the existing instruction. - assert(InReg != PPC::NoRegister && "Operand must be a register"); - Addi = BuildMI(MBB, I, DL, TII->get(Opc1), GPR3).addReg(InReg); - } - + MachineInstr *Addi; + if (IsPCREL) { + Addi = BuildMI(MBB, I, DL, TII->get(Opc1), GPR3).addImm(0); + } else { + // Expand into two ops built prior to the existing instruction. + assert(InReg != PPC::NoRegister && "Operand must be a register"); + Addi = BuildMI(MBB, I, DL, TII->get(Opc1), GPR3).addReg(InReg); + } + Addi->addOperand(MI.getOperand(2)); // The ADDItls* instruction is the first instruction in the @@ -143,10 +143,10 @@ protected: MachineInstr *Call = (BuildMI(MBB, I, DL, TII->get(Opc2), GPR3) .addReg(GPR3)); - if (IsPCREL) - Call->addOperand(MI.getOperand(2)); - else - Call->addOperand(MI.getOperand(3)); + if (IsPCREL) + Call->addOperand(MI.getOperand(2)); + else + Call->addOperand(MI.getOperand(3)); if (NeedFence) BuildMI(MBB, I, DL, TII->get(PPC::ADJCALLSTACKUP)).addImm(0).addImm(0); @@ -171,14 +171,14 @@ protected: } public: - bool isPCREL(const MachineInstr &MI) { - return (MI.getOpcode() == PPC::PADDI8pc) && - (MI.getOperand(2).getTargetFlags() == - PPCII::MO_GOT_TLSGD_PCREL_FLAG || - MI.getOperand(2).getTargetFlags() == - PPCII::MO_GOT_TLSLD_PCREL_FLAG); - } - + bool isPCREL(const MachineInstr &MI) { + return (MI.getOpcode() == PPC::PADDI8pc) && + (MI.getOperand(2).getTargetFlags() == + PPCII::MO_GOT_TLSGD_PCREL_FLAG || + MI.getOperand(2).getTargetFlags() == + PPCII::MO_GOT_TLSLD_PCREL_FLAG); + } + bool runOnMachineFunction(MachineFunction &MF) override { TII = MF.getSubtarget<PPCSubtarget>().getInstrInfo(); LIS = &getAnalysis<LiveIntervals>(); diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/PPCTargetMachine.cpp b/contrib/libs/llvm12/lib/Target/PowerPC/PPCTargetMachine.cpp index d9ffab3ceb..0634833e64 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/contrib/libs/llvm12/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -24,18 +24,18 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/CodeGen/GlobalISel/IRTranslator.h" -#include "llvm/CodeGen/GlobalISel/InstructionSelect.h" -#include "llvm/CodeGen/GlobalISel/Legalizer.h" -#include "llvm/CodeGen/GlobalISel/Localizer.h" -#include "llvm/CodeGen/GlobalISel/RegBankSelect.h" -#include "llvm/CodeGen/MachineScheduler.h" +#include "llvm/CodeGen/GlobalISel/IRTranslator.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelect.h" +#include "llvm/CodeGen/GlobalISel/Legalizer.h" +#include "llvm/CodeGen/GlobalISel/Localizer.h" +#include "llvm/CodeGen/GlobalISel/RegBankSelect.h" +#include "llvm/CodeGen/MachineScheduler.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" -#include "llvm/InitializePasses.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" @@ -100,9 +100,9 @@ static cl::opt<bool> extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCTarget() { // Register the targets RegisterTargetMachine<PPCTargetMachine> A(getThePPC32Target()); - RegisterTargetMachine<PPCTargetMachine> B(getThePPC32LETarget()); - RegisterTargetMachine<PPCTargetMachine> C(getThePPC64Target()); - RegisterTargetMachine<PPCTargetMachine> D(getThePPC64LETarget()); + RegisterTargetMachine<PPCTargetMachine> B(getThePPC32LETarget()); + RegisterTargetMachine<PPCTargetMachine> C(getThePPC64Target()); + RegisterTargetMachine<PPCTargetMachine> D(getThePPC64LETarget()); PassRegistry &PR = *PassRegistry::getPassRegistry(); #ifndef NDEBUG @@ -123,7 +123,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCTarget() { initializePPCTLSDynamicCallPass(PR); initializePPCMIPeepholePass(PR); initializePPCLowerMASSVEntriesPass(PR); - initializeGlobalISel(PR); + initializeGlobalISel(PR); } /// Return the datalayout string of a subtarget. @@ -131,8 +131,8 @@ static std::string getDataLayoutString(const Triple &T) { bool is64Bit = T.getArch() == Triple::ppc64 || T.getArch() == Triple::ppc64le; std::string Ret; - // Most PPC* platforms are big endian, PPC(64)LE is little endian. - if (T.getArch() == Triple::ppc64le || T.getArch() == Triple::ppcle) + // Most PPC* platforms are big endian, PPC(64)LE is little endian. + if (T.getArch() == Triple::ppc64le || T.getArch() == Triple::ppcle) Ret = "e"; else Ret = "E"; @@ -146,7 +146,7 @@ static std::string getDataLayoutString(const Triple &T) { // Note, the alignment values for f64 and i64 on ppc64 in Darwin // documentation are wrong; these are correct (i.e. "what gcc does"). - Ret += "-i64:64"; + Ret += "-i64:64"; // PPC64 has 32 and 64 bit registers, PPC32 has only 32 bit ones. if (is64Bit) @@ -154,13 +154,13 @@ static std::string getDataLayoutString(const Triple &T) { else Ret += "-n32"; - // Specify the vector alignment explicitly. For v256i1 and v512i1, the - // calculated alignment would be 256*alignment(i1) and 512*alignment(i1), - // which is 256 and 512 bytes - way over aligned. - if ((T.getArch() == Triple::ppc64le || T.getArch() == Triple::ppc64) && - (T.isOSAIX() || T.isOSLinux())) - Ret += "-v256:256:256-v512:512:512"; - + // Specify the vector alignment explicitly. For v256i1 and v512i1, the + // calculated alignment would be 256*alignment(i1) and 512*alignment(i1), + // which is 256 and 512 bytes - way over aligned. + if ((T.getArch() == Triple::ppc64le || T.getArch() == Triple::ppc64) && + (T.isOSAIX() || T.isOSLinux())) + Ret += "-v256:256:256-v512:512:512"; + return Ret; } @@ -190,13 +190,13 @@ static std::string computeFSAdditions(StringRef FS, CodeGenOpt::Level OL, FullFS = "+invariant-function-descriptors"; } - if (TT.isOSAIX()) { - if (!FullFS.empty()) - FullFS = "+aix," + FullFS; - else - FullFS = "+aix"; - } - + if (TT.isOSAIX()) { + if (!FullFS.empty()) + FullFS = "+aix," + FullFS; + else + FullFS = "+aix"; + } + return FullFS; } @@ -280,8 +280,8 @@ static ScheduleDAGInstrs *createPPCMachineScheduler(MachineSchedContext *C) { std::make_unique<GenericScheduler>(C)); // add DAG Mutations here. DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI)); - if (ST.hasStoreFusion()) - DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); + if (ST.hasStoreFusion()) + DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); if (ST.hasFusion()) DAG->addMutation(createPowerPCMacroFusionDAGMutation()); @@ -296,8 +296,8 @@ static ScheduleDAGInstrs *createPPCPostMachineScheduler( std::make_unique<PPCPostRASchedStrategy>(C) : std::make_unique<PostGenericScheduler>(C), true); // add DAG Mutations here. - if (ST.hasStoreFusion()) - DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); + if (ST.hasStoreFusion()) + DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); if (ST.hasFusion()) DAG->addMutation(createPowerPCMacroFusionDAGMutation()); return DAG; @@ -329,10 +329,10 @@ PPCTargetMachine::getSubtargetImpl(const Function &F) const { Attribute CPUAttr = F.getFnAttribute("target-cpu"); Attribute FSAttr = F.getFnAttribute("target-features"); - std::string CPU = - CPUAttr.isValid() ? CPUAttr.getValueAsString().str() : TargetCPU; - std::string FS = - FSAttr.isValid() ? FSAttr.getValueAsString().str() : TargetFS; + std::string CPU = + CPUAttr.isValid() ? CPUAttr.getValueAsString().str() : TargetCPU; + std::string FS = + FSAttr.isValid() ? FSAttr.getValueAsString().str() : TargetFS; // FIXME: This is related to the code below to reset the target options, // we need to know whether or not the soft float flag is set on the @@ -394,12 +394,12 @@ public: void addPreRegAlloc() override; void addPreSched2() override; void addPreEmitPass() override; - // GlobalISEL - bool addIRTranslator() override; - bool addLegalizeMachineIR() override; - bool addRegBankSelect() override; - bool addGlobalInstructionSelect() override; - + // GlobalISEL + bool addIRTranslator() override; + bool addLegalizeMachineIR() override; + bool addRegBankSelect() override; + bool addGlobalInstructionSelect() override; + ScheduleDAGInstrs * createMachineScheduler(MachineSchedContext *C) const override { return createPPCMachineScheduler(C); @@ -423,8 +423,8 @@ void PPCPassConfig::addIRPasses() { // Lower generic MASSV routines to PowerPC subtarget-specific entries. addPass(createPPCLowerMASSVEntriesPass()); - - // If explicitly requested, add explicit data prefetch intrinsics. + + // If explicitly requested, add explicit data prefetch intrinsics. if (EnablePrefetch.getNumOccurrences() > 0) addPass(createLoopDataPrefetchPass()); @@ -522,7 +522,7 @@ void PPCPassConfig::addPreRegAlloc() { } void PPCPassConfig::addPreSched2() { - if (getOptLevel() != CodeGenOpt::None) + if (getOptLevel() != CodeGenOpt::None) addPass(&IfConverterID); } @@ -550,24 +550,24 @@ static MachineSchedRegistry PPCPostRASchedRegistry("ppc-postra", "Run PowerPC PostRA specific scheduler", createPPCPostMachineScheduler); - -// Global ISEL -bool PPCPassConfig::addIRTranslator() { - addPass(new IRTranslator()); - return false; -} - -bool PPCPassConfig::addLegalizeMachineIR() { - addPass(new Legalizer()); - return false; -} - -bool PPCPassConfig::addRegBankSelect() { - addPass(new RegBankSelect()); - return false; -} - -bool PPCPassConfig::addGlobalInstructionSelect() { - addPass(new InstructionSelect()); - return false; -} + +// Global ISEL +bool PPCPassConfig::addIRTranslator() { + addPass(new IRTranslator()); + return false; +} + +bool PPCPassConfig::addLegalizeMachineIR() { + addPass(new Legalizer()); + return false; +} + +bool PPCPassConfig::addRegBankSelect() { + addPass(new RegBankSelect()); + return false; +} + +bool PPCPassConfig::addGlobalInstructionSelect() { + addPass(new InstructionSelect()); + return false; +} diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/PPCTargetMachine.h b/contrib/libs/llvm12/lib/Target/PowerPC/PPCTargetMachine.h index 347ce1fc83..21faa4e710 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/PPCTargetMachine.h +++ b/contrib/libs/llvm12/lib/Target/PowerPC/PPCTargetMachine.h @@ -58,11 +58,11 @@ public: const Triple &TT = getTargetTriple(); return (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le); }; - - bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override { - // Addrspacecasts are always noops. - return true; - } + + bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override { + // Addrspacecasts are always noops. + return true; + } }; } // end namespace llvm diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/contrib/libs/llvm12/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index 9f100e63b0..c90ff8b7d5 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/contrib/libs/llvm12/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -8,19 +8,19 @@ #include "PPCTargetTransformInfo.h" #include "llvm/Analysis/CodeMetrics.h" -#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/BasicTTIImpl.h" #include "llvm/CodeGen/CostTable.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetSchedule.h" -#include "llvm/IR/IntrinsicsPowerPC.h" +#include "llvm/IR/IntrinsicsPowerPC.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/KnownBits.h" -#include "llvm/Transforms/InstCombine/InstCombiner.h" -#include "llvm/Transforms/Utils/Local.h" - +#include "llvm/Support/KnownBits.h" +#include "llvm/Transforms/InstCombine/InstCombiner.h" +#include "llvm/Transforms/Utils/Local.h" + using namespace llvm; #define DEBUG_TYPE "ppctti" @@ -28,7 +28,7 @@ using namespace llvm; static cl::opt<bool> DisablePPCConstHoist("disable-ppc-constant-hoisting", cl::desc("disable constant hoisting on PPC"), cl::init(false), cl::Hidden); -// This is currently only used for the data prefetch pass +// This is currently only used for the data prefetch pass static cl::opt<unsigned> CacheLineSize("ppc-loop-prefetch-cache-line", cl::Hidden, cl::init(64), cl::desc("The loop prefetch cache line size")); @@ -64,109 +64,109 @@ PPCTTIImpl::getPopcntSupport(unsigned TyWidth) { return TTI::PSK_Software; } -Optional<Instruction *> -PPCTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { - Intrinsic::ID IID = II.getIntrinsicID(); - switch (IID) { - default: - break; - case Intrinsic::ppc_altivec_lvx: - case Intrinsic::ppc_altivec_lvxl: - // Turn PPC lvx -> load if the pointer is known aligned. - if (getOrEnforceKnownAlignment( - II.getArgOperand(0), Align(16), IC.getDataLayout(), &II, - &IC.getAssumptionCache(), &IC.getDominatorTree()) >= 16) { - Value *Ptr = IC.Builder.CreateBitCast( - II.getArgOperand(0), PointerType::getUnqual(II.getType())); - return new LoadInst(II.getType(), Ptr, "", false, Align(16)); - } - break; - case Intrinsic::ppc_vsx_lxvw4x: - case Intrinsic::ppc_vsx_lxvd2x: { - // Turn PPC VSX loads into normal loads. - Value *Ptr = IC.Builder.CreateBitCast(II.getArgOperand(0), - PointerType::getUnqual(II.getType())); - return new LoadInst(II.getType(), Ptr, Twine(""), false, Align(1)); - } - case Intrinsic::ppc_altivec_stvx: - case Intrinsic::ppc_altivec_stvxl: - // Turn stvx -> store if the pointer is known aligned. - if (getOrEnforceKnownAlignment( - II.getArgOperand(1), Align(16), IC.getDataLayout(), &II, - &IC.getAssumptionCache(), &IC.getDominatorTree()) >= 16) { - Type *OpPtrTy = PointerType::getUnqual(II.getArgOperand(0)->getType()); - Value *Ptr = IC.Builder.CreateBitCast(II.getArgOperand(1), OpPtrTy); - return new StoreInst(II.getArgOperand(0), Ptr, false, Align(16)); - } - break; - case Intrinsic::ppc_vsx_stxvw4x: - case Intrinsic::ppc_vsx_stxvd2x: { - // Turn PPC VSX stores into normal stores. - Type *OpPtrTy = PointerType::getUnqual(II.getArgOperand(0)->getType()); - Value *Ptr = IC.Builder.CreateBitCast(II.getArgOperand(1), OpPtrTy); - return new StoreInst(II.getArgOperand(0), Ptr, false, Align(1)); - } - case Intrinsic::ppc_altivec_vperm: - // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant. - // Note that ppc_altivec_vperm has a big-endian bias, so when creating - // a vectorshuffle for little endian, we must undo the transformation - // performed on vec_perm in altivec.h. That is, we must complement - // the permutation mask with respect to 31 and reverse the order of - // V1 and V2. - if (Constant *Mask = dyn_cast<Constant>(II.getArgOperand(2))) { - assert(cast<FixedVectorType>(Mask->getType())->getNumElements() == 16 && - "Bad type for intrinsic!"); - - // Check that all of the elements are integer constants or undefs. - bool AllEltsOk = true; - for (unsigned i = 0; i != 16; ++i) { - Constant *Elt = Mask->getAggregateElement(i); - if (!Elt || !(isa<ConstantInt>(Elt) || isa<UndefValue>(Elt))) { - AllEltsOk = false; - break; - } - } - - if (AllEltsOk) { - // Cast the input vectors to byte vectors. - Value *Op0 = - IC.Builder.CreateBitCast(II.getArgOperand(0), Mask->getType()); - Value *Op1 = - IC.Builder.CreateBitCast(II.getArgOperand(1), Mask->getType()); - Value *Result = UndefValue::get(Op0->getType()); - - // Only extract each element once. - Value *ExtractedElts[32]; - memset(ExtractedElts, 0, sizeof(ExtractedElts)); - - for (unsigned i = 0; i != 16; ++i) { - if (isa<UndefValue>(Mask->getAggregateElement(i))) - continue; - unsigned Idx = - cast<ConstantInt>(Mask->getAggregateElement(i))->getZExtValue(); - Idx &= 31; // Match the hardware behavior. - if (DL.isLittleEndian()) - Idx = 31 - Idx; - - if (!ExtractedElts[Idx]) { - Value *Op0ToUse = (DL.isLittleEndian()) ? Op1 : Op0; - Value *Op1ToUse = (DL.isLittleEndian()) ? Op0 : Op1; - ExtractedElts[Idx] = IC.Builder.CreateExtractElement( - Idx < 16 ? Op0ToUse : Op1ToUse, IC.Builder.getInt32(Idx & 15)); - } - - // Insert this value into the result vector. - Result = IC.Builder.CreateInsertElement(Result, ExtractedElts[Idx], - IC.Builder.getInt32(i)); - } - return CastInst::Create(Instruction::BitCast, Result, II.getType()); - } - } - break; - } - return None; -} - +Optional<Instruction *> +PPCTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { + Intrinsic::ID IID = II.getIntrinsicID(); + switch (IID) { + default: + break; + case Intrinsic::ppc_altivec_lvx: + case Intrinsic::ppc_altivec_lvxl: + // Turn PPC lvx -> load if the pointer is known aligned. + if (getOrEnforceKnownAlignment( + II.getArgOperand(0), Align(16), IC.getDataLayout(), &II, + &IC.getAssumptionCache(), &IC.getDominatorTree()) >= 16) { + Value *Ptr = IC.Builder.CreateBitCast( + II.getArgOperand(0), PointerType::getUnqual(II.getType())); + return new LoadInst(II.getType(), Ptr, "", false, Align(16)); + } + break; + case Intrinsic::ppc_vsx_lxvw4x: + case Intrinsic::ppc_vsx_lxvd2x: { + // Turn PPC VSX loads into normal loads. + Value *Ptr = IC.Builder.CreateBitCast(II.getArgOperand(0), + PointerType::getUnqual(II.getType())); + return new LoadInst(II.getType(), Ptr, Twine(""), false, Align(1)); + } + case Intrinsic::ppc_altivec_stvx: + case Intrinsic::ppc_altivec_stvxl: + // Turn stvx -> store if the pointer is known aligned. + if (getOrEnforceKnownAlignment( + II.getArgOperand(1), Align(16), IC.getDataLayout(), &II, + &IC.getAssumptionCache(), &IC.getDominatorTree()) >= 16) { + Type *OpPtrTy = PointerType::getUnqual(II.getArgOperand(0)->getType()); + Value *Ptr = IC.Builder.CreateBitCast(II.getArgOperand(1), OpPtrTy); + return new StoreInst(II.getArgOperand(0), Ptr, false, Align(16)); + } + break; + case Intrinsic::ppc_vsx_stxvw4x: + case Intrinsic::ppc_vsx_stxvd2x: { + // Turn PPC VSX stores into normal stores. + Type *OpPtrTy = PointerType::getUnqual(II.getArgOperand(0)->getType()); + Value *Ptr = IC.Builder.CreateBitCast(II.getArgOperand(1), OpPtrTy); + return new StoreInst(II.getArgOperand(0), Ptr, false, Align(1)); + } + case Intrinsic::ppc_altivec_vperm: + // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant. + // Note that ppc_altivec_vperm has a big-endian bias, so when creating + // a vectorshuffle for little endian, we must undo the transformation + // performed on vec_perm in altivec.h. That is, we must complement + // the permutation mask with respect to 31 and reverse the order of + // V1 and V2. + if (Constant *Mask = dyn_cast<Constant>(II.getArgOperand(2))) { + assert(cast<FixedVectorType>(Mask->getType())->getNumElements() == 16 && + "Bad type for intrinsic!"); + + // Check that all of the elements are integer constants or undefs. + bool AllEltsOk = true; + for (unsigned i = 0; i != 16; ++i) { + Constant *Elt = Mask->getAggregateElement(i); + if (!Elt || !(isa<ConstantInt>(Elt) || isa<UndefValue>(Elt))) { + AllEltsOk = false; + break; + } + } + + if (AllEltsOk) { + // Cast the input vectors to byte vectors. + Value *Op0 = + IC.Builder.CreateBitCast(II.getArgOperand(0), Mask->getType()); + Value *Op1 = + IC.Builder.CreateBitCast(II.getArgOperand(1), Mask->getType()); + Value *Result = UndefValue::get(Op0->getType()); + + // Only extract each element once. + Value *ExtractedElts[32]; + memset(ExtractedElts, 0, sizeof(ExtractedElts)); + + for (unsigned i = 0; i != 16; ++i) { + if (isa<UndefValue>(Mask->getAggregateElement(i))) + continue; + unsigned Idx = + cast<ConstantInt>(Mask->getAggregateElement(i))->getZExtValue(); + Idx &= 31; // Match the hardware behavior. + if (DL.isLittleEndian()) + Idx = 31 - Idx; + + if (!ExtractedElts[Idx]) { + Value *Op0ToUse = (DL.isLittleEndian()) ? Op1 : Op0; + Value *Op1ToUse = (DL.isLittleEndian()) ? Op0 : Op1; + ExtractedElts[Idx] = IC.Builder.CreateExtractElement( + Idx < 16 ? Op0ToUse : Op1ToUse, IC.Builder.getInt32(Idx & 15)); + } + + // Insert this value into the result vector. + Result = IC.Builder.CreateInsertElement(Result, ExtractedElts[Idx], + IC.Builder.getInt32(i)); + } + return CastInst::Create(Instruction::BitCast, Result, II.getType()); + } + } + break; + } + return None; +} + int PPCTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) { if (DisablePPCConstHoist) @@ -234,10 +234,10 @@ int PPCTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, int PPCTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, - TTI::TargetCostKind CostKind, - Instruction *Inst) { + TTI::TargetCostKind CostKind, + Instruction *Inst) { if (DisablePPCConstHoist) - return BaseT::getIntImmCostInst(Opcode, Idx, Imm, Ty, CostKind, Inst); + return BaseT::getIntImmCostInst(Opcode, Idx, Imm, Ty, CostKind, Inst); assert(Ty->isIntegerTy()); @@ -335,29 +335,29 @@ PPCTTIImpl::getUserCost(const User *U, ArrayRef<const Value *> Operands, return BaseT::getUserCost(U, Operands, CostKind); } -// Determining the address of a TLS variable results in a function call in -// certain TLS models. -static bool memAddrUsesCTR(const Value *MemAddr, const PPCTargetMachine &TM, - SmallPtrSetImpl<const Value *> &Visited) { - // No need to traverse again if we already checked this operand. - if (!Visited.insert(MemAddr).second) - return false; - const auto *GV = dyn_cast<GlobalValue>(MemAddr); - if (!GV) { - // Recurse to check for constants that refer to TLS global variables. - if (const auto *CV = dyn_cast<Constant>(MemAddr)) - for (const auto &CO : CV->operands()) - if (memAddrUsesCTR(CO, TM, Visited)) - return true; - return false; - } - - if (!GV->isThreadLocal()) - return false; - TLSModel::Model Model = TM.getTLSModel(GV); - return Model == TLSModel::GeneralDynamic || Model == TLSModel::LocalDynamic; -} - +// Determining the address of a TLS variable results in a function call in +// certain TLS models. +static bool memAddrUsesCTR(const Value *MemAddr, const PPCTargetMachine &TM, + SmallPtrSetImpl<const Value *> &Visited) { + // No need to traverse again if we already checked this operand. + if (!Visited.insert(MemAddr).second) + return false; + const auto *GV = dyn_cast<GlobalValue>(MemAddr); + if (!GV) { + // Recurse to check for constants that refer to TLS global variables. + if (const auto *CV = dyn_cast<Constant>(MemAddr)) + for (const auto &CO : CV->operands()) + if (memAddrUsesCTR(CO, TM, Visited)) + return true; + return false; + } + + if (!GV->isThreadLocal()) + return false; + TLSModel::Model Model = TM.getTLSModel(GV); + return Model == TLSModel::GeneralDynamic || Model == TLSModel::LocalDynamic; +} + bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo, SmallPtrSetImpl<const Value *> &Visited) { const PPCTargetMachine &TM = ST->getTargetMachine(); @@ -383,34 +383,34 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo, return false; }; - auto supportedHalfPrecisionOp = [](Instruction *Inst) { - switch (Inst->getOpcode()) { - default: - return false; - case Instruction::FPTrunc: - case Instruction::FPExt: - case Instruction::Load: - case Instruction::Store: - case Instruction::FPToUI: - case Instruction::UIToFP: - case Instruction::FPToSI: - case Instruction::SIToFP: - return true; - } - }; - + auto supportedHalfPrecisionOp = [](Instruction *Inst) { + switch (Inst->getOpcode()) { + default: + return false; + case Instruction::FPTrunc: + case Instruction::FPExt: + case Instruction::Load: + case Instruction::Store: + case Instruction::FPToUI: + case Instruction::UIToFP: + case Instruction::FPToSI: + case Instruction::SIToFP: + return true; + } + }; + for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J) { - // There are no direct operations on half precision so assume that - // anything with that type requires a call except for a few select - // operations with Power9. - if (Instruction *CurrInst = dyn_cast<Instruction>(J)) { - for (const auto &Op : CurrInst->operands()) { - if (Op->getType()->getScalarType()->isHalfTy() || - CurrInst->getType()->getScalarType()->isHalfTy()) - return !(ST->isISA3_0() && supportedHalfPrecisionOp(CurrInst)); - } - } + // There are no direct operations on half precision so assume that + // anything with that type requires a call except for a few select + // operations with Power9. + if (Instruction *CurrInst = dyn_cast<Instruction>(J)) { + for (const auto &Op : CurrInst->operands()) { + if (Op->getType()->getScalarType()->isHalfTy() || + CurrInst->getType()->getScalarType()->isHalfTy()) + return !(ST->isISA3_0() && supportedHalfPrecisionOp(CurrInst)); + } + } if (CallInst *CI = dyn_cast<CallInst>(J)) { // Inline ASM is okay, unless it clobbers the ctr register. if (InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledOperand())) { @@ -432,30 +432,30 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo, case Intrinsic::loop_decrement: return true; - // Binary operations on 128-bit value will use CTR. - case Intrinsic::experimental_constrained_fadd: - case Intrinsic::experimental_constrained_fsub: - case Intrinsic::experimental_constrained_fmul: - case Intrinsic::experimental_constrained_fdiv: - case Intrinsic::experimental_constrained_frem: - if (F->getType()->getScalarType()->isFP128Ty() || - F->getType()->getScalarType()->isPPC_FP128Ty()) - return true; - break; - - case Intrinsic::experimental_constrained_fptosi: - case Intrinsic::experimental_constrained_fptoui: - case Intrinsic::experimental_constrained_sitofp: - case Intrinsic::experimental_constrained_uitofp: { - Type *SrcType = CI->getArgOperand(0)->getType()->getScalarType(); - Type *DstType = CI->getType()->getScalarType(); - if (SrcType->isPPC_FP128Ty() || DstType->isPPC_FP128Ty() || - isLargeIntegerTy(!TM.isPPC64(), SrcType) || - isLargeIntegerTy(!TM.isPPC64(), DstType)) - return true; - break; - } - + // Binary operations on 128-bit value will use CTR. + case Intrinsic::experimental_constrained_fadd: + case Intrinsic::experimental_constrained_fsub: + case Intrinsic::experimental_constrained_fmul: + case Intrinsic::experimental_constrained_fdiv: + case Intrinsic::experimental_constrained_frem: + if (F->getType()->getScalarType()->isFP128Ty() || + F->getType()->getScalarType()->isPPC_FP128Ty()) + return true; + break; + + case Intrinsic::experimental_constrained_fptosi: + case Intrinsic::experimental_constrained_fptoui: + case Intrinsic::experimental_constrained_sitofp: + case Intrinsic::experimental_constrained_uitofp: { + Type *SrcType = CI->getArgOperand(0)->getType()->getScalarType(); + Type *DstType = CI->getType()->getScalarType(); + if (SrcType->isPPC_FP128Ty() || DstType->isPPC_FP128Ty() || + isLargeIntegerTy(!TM.isPPC64(), SrcType) || + isLargeIntegerTy(!TM.isPPC64(), DstType)) + return true; + break; + } + // Exclude eh_sjlj_setjmp; we don't need to exclude eh_sjlj_longjmp // because, although it does clobber the counter register, the // control can't then return to inside the loop unless there is also @@ -474,15 +474,15 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo, case Intrinsic::pow: case Intrinsic::sin: case Intrinsic::cos: - case Intrinsic::experimental_constrained_powi: - case Intrinsic::experimental_constrained_log: - case Intrinsic::experimental_constrained_log2: - case Intrinsic::experimental_constrained_log10: - case Intrinsic::experimental_constrained_exp: - case Intrinsic::experimental_constrained_exp2: - case Intrinsic::experimental_constrained_pow: - case Intrinsic::experimental_constrained_sin: - case Intrinsic::experimental_constrained_cos: + case Intrinsic::experimental_constrained_powi: + case Intrinsic::experimental_constrained_log: + case Intrinsic::experimental_constrained_log2: + case Intrinsic::experimental_constrained_log10: + case Intrinsic::experimental_constrained_exp: + case Intrinsic::experimental_constrained_exp2: + case Intrinsic::experimental_constrained_pow: + case Intrinsic::experimental_constrained_sin: + case Intrinsic::experimental_constrained_cos: return true; case Intrinsic::copysign: if (CI->getArgOperand(0)->getType()->getScalarType()-> @@ -504,54 +504,54 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo, case Intrinsic::llround: Opcode = ISD::LLROUND; break; case Intrinsic::minnum: Opcode = ISD::FMINNUM; break; case Intrinsic::maxnum: Opcode = ISD::FMAXNUM; break; - case Intrinsic::experimental_constrained_fcmp: - Opcode = ISD::STRICT_FSETCC; - break; - case Intrinsic::experimental_constrained_fcmps: - Opcode = ISD::STRICT_FSETCCS; - break; - case Intrinsic::experimental_constrained_fma: - Opcode = ISD::STRICT_FMA; - break; - case Intrinsic::experimental_constrained_sqrt: - Opcode = ISD::STRICT_FSQRT; - break; - case Intrinsic::experimental_constrained_floor: - Opcode = ISD::STRICT_FFLOOR; - break; - case Intrinsic::experimental_constrained_ceil: - Opcode = ISD::STRICT_FCEIL; - break; - case Intrinsic::experimental_constrained_trunc: - Opcode = ISD::STRICT_FTRUNC; - break; - case Intrinsic::experimental_constrained_rint: - Opcode = ISD::STRICT_FRINT; - break; - case Intrinsic::experimental_constrained_lrint: - Opcode = ISD::STRICT_LRINT; - break; - case Intrinsic::experimental_constrained_llrint: - Opcode = ISD::STRICT_LLRINT; - break; - case Intrinsic::experimental_constrained_nearbyint: - Opcode = ISD::STRICT_FNEARBYINT; - break; - case Intrinsic::experimental_constrained_round: - Opcode = ISD::STRICT_FROUND; - break; - case Intrinsic::experimental_constrained_lround: - Opcode = ISD::STRICT_LROUND; - break; - case Intrinsic::experimental_constrained_llround: - Opcode = ISD::STRICT_LLROUND; - break; - case Intrinsic::experimental_constrained_minnum: - Opcode = ISD::STRICT_FMINNUM; - break; - case Intrinsic::experimental_constrained_maxnum: - Opcode = ISD::STRICT_FMAXNUM; - break; + case Intrinsic::experimental_constrained_fcmp: + Opcode = ISD::STRICT_FSETCC; + break; + case Intrinsic::experimental_constrained_fcmps: + Opcode = ISD::STRICT_FSETCCS; + break; + case Intrinsic::experimental_constrained_fma: + Opcode = ISD::STRICT_FMA; + break; + case Intrinsic::experimental_constrained_sqrt: + Opcode = ISD::STRICT_FSQRT; + break; + case Intrinsic::experimental_constrained_floor: + Opcode = ISD::STRICT_FFLOOR; + break; + case Intrinsic::experimental_constrained_ceil: + Opcode = ISD::STRICT_FCEIL; + break; + case Intrinsic::experimental_constrained_trunc: + Opcode = ISD::STRICT_FTRUNC; + break; + case Intrinsic::experimental_constrained_rint: + Opcode = ISD::STRICT_FRINT; + break; + case Intrinsic::experimental_constrained_lrint: + Opcode = ISD::STRICT_LRINT; + break; + case Intrinsic::experimental_constrained_llrint: + Opcode = ISD::STRICT_LLRINT; + break; + case Intrinsic::experimental_constrained_nearbyint: + Opcode = ISD::STRICT_FNEARBYINT; + break; + case Intrinsic::experimental_constrained_round: + Opcode = ISD::STRICT_FROUND; + break; + case Intrinsic::experimental_constrained_lround: + Opcode = ISD::STRICT_LROUND; + break; + case Intrinsic::experimental_constrained_llround: + Opcode = ISD::STRICT_LLROUND; + break; + case Intrinsic::experimental_constrained_minnum: + Opcode = ISD::STRICT_FMINNUM; + break; + case Intrinsic::experimental_constrained_maxnum: + Opcode = ISD::STRICT_FMAXNUM; + break; case Intrinsic::umul_with_overflow: Opcode = ISD::UMULO; break; case Intrinsic::smul_with_overflow: Opcode = ISD::SMULO; break; } @@ -700,7 +700,7 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo, } for (Value *Operand : J->operands()) - if (memAddrUsesCTR(Operand, TM, Visited)) + if (memAddrUsesCTR(Operand, TM, Visited)) return true; } @@ -760,24 +760,24 @@ bool PPCTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, } } - // If an exit block has a PHI that accesses a TLS variable as one of the - // incoming values from the loop, we cannot produce a CTR loop because the - // address for that value will be computed in the loop. - SmallVector<BasicBlock *, 4> ExitBlocks; - L->getExitBlocks(ExitBlocks); - for (auto &BB : ExitBlocks) { - for (auto &PHI : BB->phis()) { - for (int Idx = 0, EndIdx = PHI.getNumIncomingValues(); Idx < EndIdx; - Idx++) { - const BasicBlock *IncomingBB = PHI.getIncomingBlock(Idx); - const Value *IncomingValue = PHI.getIncomingValue(Idx); - if (L->contains(IncomingBB) && - memAddrUsesCTR(IncomingValue, TM, Visited)) - return false; - } - } - } - + // If an exit block has a PHI that accesses a TLS variable as one of the + // incoming values from the loop, we cannot produce a CTR loop because the + // address for that value will be computed in the loop. + SmallVector<BasicBlock *, 4> ExitBlocks; + L->getExitBlocks(ExitBlocks); + for (auto &BB : ExitBlocks) { + for (auto &PHI : BB->phis()) { + for (int Idx = 0, EndIdx = PHI.getNumIncomingValues(); Idx < EndIdx; + Idx++) { + const BasicBlock *IncomingBB = PHI.getIncomingBlock(Idx); + const Value *IncomingValue = PHI.getIncomingValue(Idx); + if (L->contains(IncomingBB) && + memAddrUsesCTR(IncomingValue, TM, Visited)) + return false; + } + } + } + LLVMContext &C = L->getHeader()->getContext(); HWLoopInfo.CountType = TM.isPPC64() ? Type::getInt64Ty(C) : Type::getInt32Ty(C); @@ -813,7 +813,7 @@ bool PPCTTIImpl::useColdCCForColdCall(Function &F) { } bool PPCTTIImpl::enableAggressiveInterleaving(bool LoopHasReductions) { - // On the A2, always unroll aggressively. + // On the A2, always unroll aggressively. if (ST->getCPUDirective() == PPC::DIR_A2) return true; @@ -989,7 +989,7 @@ int PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, // Legalize the type. std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp); - // PPC, for both Altivec/VSX, support cheap arbitrary permutations + // PPC, for both Altivec/VSX, support cheap arbitrary permutations // (at least in the sense that there need only be one non-loop-invariant // instruction). We need one such shuffle instruction for each actual // register (this is not true for arbitrary shuffles, but is true for the @@ -1006,12 +1006,12 @@ int PPCTTIImpl::getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) { } int PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, - TTI::CastContextHint CCH, + TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I) { assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode"); - int Cost = BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I); + int Cost = BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I); Cost = vectorCostAdjustment(Cost, Opcode, Dst, Src); // TODO: Allow non-throughput costs that aren't binary. if (CostKind != TTI::TCK_RecipThroughput) @@ -1020,11 +1020,11 @@ int PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, } int PPCTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, - CmpInst::Predicate VecPred, + CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I) { - int Cost = - BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); + int Cost = + BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); // TODO: Handle other cost kinds. if (CostKind != TTI::TCK_RecipThroughput) return Cost; @@ -1071,7 +1071,7 @@ int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { // The cost of the load constant for a vector extract is disregarded // (invariant, easily schedulable). return vectorCostAdjustment(1, Opcode, Val, nullptr); - + } else if (ST->hasDirectMove()) // Assume permute has standard cost. // Assume move-to/move-from VSR have 2x standard cost. @@ -1144,7 +1144,7 @@ int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, // for Altivec types using the VSX instructions, but that's more expensive // than using the permutation-based load sequence. On the P8, that's no // longer true. - if (Opcode == Instruction::Load && (!ST->hasP8Vector() && IsAltivecType) && + if (Opcode == Instruction::Load && (!ST->hasP8Vector() && IsAltivecType) && *Alignment >= LT.second.getScalarType().getStoreSize()) return Cost + LT.first; // Add the cost of the permutations. @@ -1197,7 +1197,7 @@ int PPCTTIImpl::getInterleavedMemoryOpCost( getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment), AddressSpace, CostKind); - // PPC, for both Altivec/VSX, support cheap arbitrary permutations + // PPC, for both Altivec/VSX, support cheap arbitrary permutations // (at least in the sense that there need only be one non-loop-invariant // instruction). For each result vector, we need one shuffle per incoming // vector (except that the first shuffle can take two incoming vectors @@ -1212,27 +1212,27 @@ unsigned PPCTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, return BaseT::getIntrinsicInstrCost(ICA, CostKind); } -bool PPCTTIImpl::areFunctionArgsABICompatible( - const Function *Caller, const Function *Callee, - SmallPtrSetImpl<Argument *> &Args) const { - - // We need to ensure that argument promotion does not - // attempt to promote pointers to MMA types (__vector_pair - // and __vector_quad) since these types explicitly cannot be - // passed as arguments. Both of these types are larger than - // the 128-bit Altivec vectors and have a scalar size of 1 bit. - if (!BaseT::areFunctionArgsABICompatible(Caller, Callee, Args)) - return false; - - return llvm::none_of(Args, [](Argument *A) { - auto *EltTy = cast<PointerType>(A->getType())->getElementType(); - if (EltTy->isSized()) - return (EltTy->isIntOrIntVectorTy(1) && - EltTy->getPrimitiveSizeInBits() > 128); - return false; - }); -} - +bool PPCTTIImpl::areFunctionArgsABICompatible( + const Function *Caller, const Function *Callee, + SmallPtrSetImpl<Argument *> &Args) const { + + // We need to ensure that argument promotion does not + // attempt to promote pointers to MMA types (__vector_pair + // and __vector_quad) since these types explicitly cannot be + // passed as arguments. Both of these types are larger than + // the 128-bit Altivec vectors and have a scalar size of 1 bit. + if (!BaseT::areFunctionArgsABICompatible(Caller, Callee, Args)) + return false; + + return llvm::none_of(Args, [](Argument *A) { + auto *EltTy = cast<PointerType>(A->getType())->getElementType(); + if (EltTy->isSized()) + return (EltTy->isIntOrIntVectorTy(1) && + EltTy->getPrimitiveSizeInBits() > 128); + return false; + }); +} + bool PPCTTIImpl::canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo) { @@ -1268,51 +1268,51 @@ bool PPCTTIImpl::isLSRCostLess(TargetTransformInfo::LSRCost &C1, else return TargetTransformInfoImplBase::isLSRCostLess(C1, C2); } - -bool PPCTTIImpl::isNumRegsMajorCostOfLSR() { - return false; -} - -bool PPCTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst, - MemIntrinsicInfo &Info) { - switch (Inst->getIntrinsicID()) { - case Intrinsic::ppc_altivec_lvx: - case Intrinsic::ppc_altivec_lvxl: - case Intrinsic::ppc_altivec_lvebx: - case Intrinsic::ppc_altivec_lvehx: - case Intrinsic::ppc_altivec_lvewx: - case Intrinsic::ppc_vsx_lxvd2x: - case Intrinsic::ppc_vsx_lxvw4x: - case Intrinsic::ppc_vsx_lxvd2x_be: - case Intrinsic::ppc_vsx_lxvw4x_be: - case Intrinsic::ppc_vsx_lxvl: - case Intrinsic::ppc_vsx_lxvll: - case Intrinsic::ppc_vsx_lxvp: { - Info.PtrVal = Inst->getArgOperand(0); - Info.ReadMem = true; - Info.WriteMem = false; - return true; - } - case Intrinsic::ppc_altivec_stvx: - case Intrinsic::ppc_altivec_stvxl: - case Intrinsic::ppc_altivec_stvebx: - case Intrinsic::ppc_altivec_stvehx: - case Intrinsic::ppc_altivec_stvewx: - case Intrinsic::ppc_vsx_stxvd2x: - case Intrinsic::ppc_vsx_stxvw4x: - case Intrinsic::ppc_vsx_stxvd2x_be: - case Intrinsic::ppc_vsx_stxvw4x_be: - case Intrinsic::ppc_vsx_stxvl: - case Intrinsic::ppc_vsx_stxvll: - case Intrinsic::ppc_vsx_stxvp: { - Info.PtrVal = Inst->getArgOperand(1); - Info.ReadMem = false; - Info.WriteMem = true; - return true; - } - default: - break; - } - - return false; -} + +bool PPCTTIImpl::isNumRegsMajorCostOfLSR() { + return false; +} + +bool PPCTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst, + MemIntrinsicInfo &Info) { + switch (Inst->getIntrinsicID()) { + case Intrinsic::ppc_altivec_lvx: + case Intrinsic::ppc_altivec_lvxl: + case Intrinsic::ppc_altivec_lvebx: + case Intrinsic::ppc_altivec_lvehx: + case Intrinsic::ppc_altivec_lvewx: + case Intrinsic::ppc_vsx_lxvd2x: + case Intrinsic::ppc_vsx_lxvw4x: + case Intrinsic::ppc_vsx_lxvd2x_be: + case Intrinsic::ppc_vsx_lxvw4x_be: + case Intrinsic::ppc_vsx_lxvl: + case Intrinsic::ppc_vsx_lxvll: + case Intrinsic::ppc_vsx_lxvp: { + Info.PtrVal = Inst->getArgOperand(0); + Info.ReadMem = true; + Info.WriteMem = false; + return true; + } + case Intrinsic::ppc_altivec_stvx: + case Intrinsic::ppc_altivec_stvxl: + case Intrinsic::ppc_altivec_stvebx: + case Intrinsic::ppc_altivec_stvehx: + case Intrinsic::ppc_altivec_stvewx: + case Intrinsic::ppc_vsx_stxvd2x: + case Intrinsic::ppc_vsx_stxvw4x: + case Intrinsic::ppc_vsx_stxvd2x_be: + case Intrinsic::ppc_vsx_stxvw4x_be: + case Intrinsic::ppc_vsx_stxvl: + case Intrinsic::ppc_vsx_stxvll: + case Intrinsic::ppc_vsx_stxvp: { + Info.PtrVal = Inst->getArgOperand(1); + Info.ReadMem = false; + Info.WriteMem = true; + return true; + } + default: + break; + } + + return false; +} diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/PPCTargetTransformInfo.h b/contrib/libs/llvm12/lib/Target/PowerPC/PPCTargetTransformInfo.h index 3ef091b13d..c38ae90bc7 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/contrib/libs/llvm12/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -41,9 +41,9 @@ public: : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {} - Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC, - IntrinsicInst &II) const; - + Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC, + IntrinsicInst &II) const; + /// \name Scalar TTI Implementations /// @{ @@ -52,8 +52,8 @@ public: TTI::TargetCostKind CostKind); int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, - Type *Ty, TTI::TargetCostKind CostKind, - Instruction *Inst = nullptr); + Type *Ty, TTI::TargetCostKind CostKind, + Instruction *Inst = nullptr); int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind); @@ -68,14 +68,14 @@ public: bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo); - bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info); + bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info); void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP); void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP); bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, TargetTransformInfo::LSRCost &C2); - bool isNumRegsMajorCostOfLSR(); + bool isNumRegsMajorCostOfLSR(); /// @} @@ -109,11 +109,11 @@ public: const Instruction *CxtI = nullptr); int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp); int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, - TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, + TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I = nullptr); int getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind); int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, - CmpInst::Predicate VecPred, + CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I = nullptr); int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); @@ -129,9 +129,9 @@ public: unsigned getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind); - bool areFunctionArgsABICompatible(const Function *Caller, - const Function *Callee, - SmallPtrSetImpl<Argument *> &Args) const; + bool areFunctionArgsABICompatible(const Function *Caller, + const Function *Callee, + SmallPtrSetImpl<Argument *> &Args) const; /// @} }; diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/PPCVSXFMAMutate.cpp b/contrib/libs/llvm12/lib/Target/PowerPC/PPCVSXFMAMutate.cpp index 3c0eae1d7a..e72e29112d 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/PPCVSXFMAMutate.cpp +++ b/contrib/libs/llvm12/lib/Target/PowerPC/PPCVSXFMAMutate.cpp @@ -315,9 +315,9 @@ protected: // Extend the live interval of the addend source (it might end at the // copy to be removed, or somewhere in between there and here). This // is necessary only if it is a physical register. - if (!AddendSrcReg.isVirtual()) - for (MCRegUnitIterator Units(AddendSrcReg.asMCReg(), TRI); - Units.isValid(); ++Units) { + if (!AddendSrcReg.isVirtual()) + for (MCRegUnitIterator Units(AddendSrcReg.asMCReg(), TRI); + Units.isValid(); ++Units) { unsigned Unit = *Units; LiveRange &AddendSrcRange = LIS->getRegUnit(Unit); diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/contrib/libs/llvm12/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp index 62ec6c3003..ff251f55af 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp +++ b/contrib/libs/llvm12/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp @@ -254,10 +254,10 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() { if (!MO.isReg()) continue; Register Reg = MO.getReg(); - // All operands need to be checked because there are instructions that - // operate on a partial register and produce a full register (such as - // XXPERMDIs). - if (isAnyVecReg(Reg, Partial)) + // All operands need to be checked because there are instructions that + // operate on a partial register and produce a full register (such as + // XXPERMDIs). + if (isAnyVecReg(Reg, Partial)) RelevantInstr = true; } @@ -690,29 +690,29 @@ void PPCVSXSwapRemoval::recordUnoptimizableWebs() { LLVM_DEBUG(UseMI.dump()); LLVM_DEBUG(dbgs() << "\n"); } - - // It is possible that the load feeds a swap and that swap feeds a - // store. In such a case, the code is actually trying to store a swapped - // vector. We must reject such webs. - if (SwapVector[UseIdx].IsSwap && !SwapVector[UseIdx].IsLoad && - !SwapVector[UseIdx].IsStore) { - Register SwapDefReg = UseMI.getOperand(0).getReg(); - for (MachineInstr &UseOfUseMI : - MRI->use_nodbg_instructions(SwapDefReg)) { - int UseOfUseIdx = SwapMap[&UseOfUseMI]; - if (SwapVector[UseOfUseIdx].IsStore) { - SwapVector[Repr].WebRejected = 1; - LLVM_DEBUG( - dbgs() << format( - "Web %d rejected for load/swap feeding a store\n", Repr)); - LLVM_DEBUG(dbgs() << " def " << EntryIdx << ": "); - LLVM_DEBUG(MI->dump()); - LLVM_DEBUG(dbgs() << " use " << UseIdx << ": "); - LLVM_DEBUG(UseMI.dump()); - LLVM_DEBUG(dbgs() << "\n"); - } - } - } + + // It is possible that the load feeds a swap and that swap feeds a + // store. In such a case, the code is actually trying to store a swapped + // vector. We must reject such webs. + if (SwapVector[UseIdx].IsSwap && !SwapVector[UseIdx].IsLoad && + !SwapVector[UseIdx].IsStore) { + Register SwapDefReg = UseMI.getOperand(0).getReg(); + for (MachineInstr &UseOfUseMI : + MRI->use_nodbg_instructions(SwapDefReg)) { + int UseOfUseIdx = SwapMap[&UseOfUseMI]; + if (SwapVector[UseOfUseIdx].IsStore) { + SwapVector[Repr].WebRejected = 1; + LLVM_DEBUG( + dbgs() << format( + "Web %d rejected for load/swap feeding a store\n", Repr)); + LLVM_DEBUG(dbgs() << " def " << EntryIdx << ": "); + LLVM_DEBUG(MI->dump()); + LLVM_DEBUG(dbgs() << " use " << UseIdx << ": "); + LLVM_DEBUG(UseMI.dump()); + LLVM_DEBUG(dbgs() << "\n"); + } + } + } } // Reject webs that contain swapping stores that are fed by something diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp b/contrib/libs/llvm12/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp index c051e56e8d..6bb952f27f 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp +++ b/contrib/libs/llvm12/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp @@ -14,10 +14,10 @@ Target &llvm::getThePPC32Target() { static Target ThePPC32Target; return ThePPC32Target; } -Target &llvm::getThePPC32LETarget() { - static Target ThePPC32LETarget; - return ThePPC32LETarget; -} +Target &llvm::getThePPC32LETarget() { + static Target ThePPC32LETarget; + return ThePPC32LETarget; +} Target &llvm::getThePPC64Target() { static Target ThePPC64Target; return ThePPC64Target; @@ -28,12 +28,12 @@ Target &llvm::getThePPC64LETarget() { } extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCTargetInfo() { - RegisterTarget<Triple::ppc, /*HasJIT=*/true> W(getThePPC32Target(), "ppc32", + RegisterTarget<Triple::ppc, /*HasJIT=*/true> W(getThePPC32Target(), "ppc32", "PowerPC 32", "PPC"); - RegisterTarget<Triple::ppcle, /*HasJIT=*/true> X( - getThePPC32LETarget(), "ppc32le", "PowerPC 32 LE", "PPC"); - + RegisterTarget<Triple::ppcle, /*HasJIT=*/true> X( + getThePPC32LETarget(), "ppc32le", "PowerPC 32 LE", "PPC"); + RegisterTarget<Triple::ppc64, /*HasJIT=*/true> Y(getThePPC64Target(), "ppc64", "PowerPC 64", "PPC"); diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.h b/contrib/libs/llvm12/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.h index d741e55287..f9d20ef00d 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.h +++ b/contrib/libs/llvm12/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.h @@ -14,7 +14,7 @@ namespace llvm { class Target; Target &getThePPC32Target(); -Target &getThePPC32LETarget(); +Target &getThePPC32LETarget(); Target &getThePPC64Target(); Target &getThePPC64LETarget(); diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/TargetInfo/ya.make b/contrib/libs/llvm12/lib/Target/PowerPC/TargetInfo/ya.make index 551efb30b2..68badb4490 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/TargetInfo/ya.make +++ b/contrib/libs/llvm12/lib/Target/PowerPC/TargetInfo/ya.make @@ -12,13 +12,13 @@ LICENSE(Apache-2.0 WITH LLVM-exception) LICENSE_TEXTS(.yandex_meta/licenses.list.txt) PEERDIR( - contrib/libs/llvm12 - contrib/libs/llvm12/lib/Support + contrib/libs/llvm12 + contrib/libs/llvm12/lib/Support ) ADDINCL( - contrib/libs/llvm12/lib/Target/PowerPC - contrib/libs/llvm12/lib/Target/PowerPC/TargetInfo + contrib/libs/llvm12/lib/Target/PowerPC + contrib/libs/llvm12/lib/Target/PowerPC/TargetInfo ) NO_COMPILER_WARNINGS() diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/ya.make b/contrib/libs/llvm12/lib/Target/PowerPC/ya.make index 04512f12fe..8c7039a575 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/ya.make +++ b/contrib/libs/llvm12/lib/Target/PowerPC/ya.make @@ -12,27 +12,27 @@ LICENSE(Apache-2.0 WITH LLVM-exception) LICENSE_TEXTS(.yandex_meta/licenses.list.txt) PEERDIR( - contrib/libs/llvm12 - contrib/libs/llvm12/include - contrib/libs/llvm12/lib/Analysis - contrib/libs/llvm12/lib/BinaryFormat - contrib/libs/llvm12/lib/CodeGen - contrib/libs/llvm12/lib/CodeGen/AsmPrinter - contrib/libs/llvm12/lib/CodeGen/GlobalISel - contrib/libs/llvm12/lib/CodeGen/SelectionDAG - contrib/libs/llvm12/lib/IR - contrib/libs/llvm12/lib/MC - contrib/libs/llvm12/lib/Support - contrib/libs/llvm12/lib/Target - contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc - contrib/libs/llvm12/lib/Target/PowerPC/TargetInfo - contrib/libs/llvm12/lib/Transforms/Scalar - contrib/libs/llvm12/lib/Transforms/Utils + contrib/libs/llvm12 + contrib/libs/llvm12/include + contrib/libs/llvm12/lib/Analysis + contrib/libs/llvm12/lib/BinaryFormat + contrib/libs/llvm12/lib/CodeGen + contrib/libs/llvm12/lib/CodeGen/AsmPrinter + contrib/libs/llvm12/lib/CodeGen/GlobalISel + contrib/libs/llvm12/lib/CodeGen/SelectionDAG + contrib/libs/llvm12/lib/IR + contrib/libs/llvm12/lib/MC + contrib/libs/llvm12/lib/Support + contrib/libs/llvm12/lib/Target + contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc + contrib/libs/llvm12/lib/Target/PowerPC/TargetInfo + contrib/libs/llvm12/lib/Transforms/Scalar + contrib/libs/llvm12/lib/Transforms/Utils ) ADDINCL( - ${ARCADIA_BUILD_ROOT}/contrib/libs/llvm12/lib/Target/PowerPC - contrib/libs/llvm12/lib/Target/PowerPC + ${ARCADIA_BUILD_ROOT}/contrib/libs/llvm12/lib/Target/PowerPC + contrib/libs/llvm12/lib/Target/PowerPC ) NO_COMPILER_WARNINGS() @@ -40,10 +40,10 @@ NO_COMPILER_WARNINGS() NO_UTIL() SRCS( - GISel/PPCCallLowering.cpp - GISel/PPCInstructionSelector.cpp - GISel/PPCLegalizerInfo.cpp - GISel/PPCRegisterBankInfo.cpp + GISel/PPCCallLowering.cpp + GISel/PPCInstructionSelector.cpp + GISel/PPCLegalizerInfo.cpp + GISel/PPCRegisterBankInfo.cpp PPCAsmPrinter.cpp PPCBoolRetToInt.cpp PPCBranchCoalescing.cpp |