aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/llvm12/lib/Target/ARM
diff options
context:
space:
mode:
authorshadchin <shadchin@yandex-team.ru>2022-02-10 16:44:30 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:44:30 +0300
commit2598ef1d0aee359b4b6d5fdd1758916d5907d04f (patch)
tree012bb94d777798f1f56ac1cec429509766d05181 /contrib/libs/llvm12/lib/Target/ARM
parent6751af0b0c1b952fede40b19b71da8025b5d8bcf (diff)
downloadydb-2598ef1d0aee359b4b6d5fdd1758916d5907d04f.tar.gz
Restoring authorship annotation for <shadchin@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/llvm12/lib/Target/ARM')
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/A15SDOptimizer.cpp2
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/ARM.h10
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/ARM.td180
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/ARMAsmPrinter.cpp92
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/ARMBaseInstrInfo.cpp1210
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/ARMBaseInstrInfo.h300
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/ARMBaseRegisterInfo.cpp30
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/ARMBaseRegisterInfo.h12
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/ARMBlockPlacement.cpp456
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/ARMCallLowering.cpp64
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/ARMCallLowering.h8
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/ARMConstantIslandPass.cpp132
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/ARMExpandPseudoInsts.cpp164
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/ARMFastISel.cpp24
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/ARMFeatures.h2
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/ARMFrameLowering.cpp12
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/ARMFrameLowering.h6
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/ARMHazardRecognizer.cpp358
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/ARMHazardRecognizer.h68
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/ARMISelLowering.cpp1156
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/ARMISelLowering.h54
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/ARMInstrFormats.td6
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/ARMInstrInfo.td148
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/ARMInstrMVE.td970
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/ARMInstrNEON.td144
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/ARMInstrThumb.td22
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/ARMInstrThumb2.td92
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/ARMInstrVFP.td46
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/ARMLegalizerInfo.cpp2
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/ARMLoadStoreOptimizer.cpp728
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/ARMLowOverheadLoops.cpp1660
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/ARMParallelDSP.cpp6
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/ARMPredicates.td10
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/ARMRegisterBankInfo.cpp14
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/ARMRegisterInfo.td34
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/ARMSLSHardening.cpp832
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/ARMSchedule.td108
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/ARMScheduleA57.td142
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/ARMScheduleA57WriteRes.td14
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/ARMScheduleA9.td4
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/ARMScheduleM7.td976
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/ARMScheduleR52.td4
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/ARMScheduleSwift.td4
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/ARMSubtarget.cpp18
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/ARMSubtarget.h36
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/ARMTargetMachine.cpp34
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/ARMTargetMachine.h12
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/ARMTargetTransformInfo.cpp1352
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/ARMTargetTransformInfo.h64
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/AsmParser/ARMAsmParser.cpp36
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/AsmParser/ya.make22
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/Disassembler/ARMDisassembler.cpp10
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/Disassembler/ya.make20
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h52
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp2
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h2
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h2
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp2
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp262
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h32
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ya.make22
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/MVEGatherScatterLowering.cpp428
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/MVETailPredUtils.h314
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/MVETailPredication.cpp322
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/MVEVPTBlockPass.cpp54
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/MVEVPTOptimisationsPass.cpp888
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/TargetInfo/ya.make8
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/Thumb2InstrInfo.cpp94
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/Thumb2InstrInfo.h16
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/Thumb2SizeReduction.cpp2
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/Utils/ya.make12
-rw-r--r--contrib/libs/llvm12/lib/Target/ARM/ya.make42
72 files changed, 7233 insertions, 7233 deletions
diff --git a/contrib/libs/llvm12/lib/Target/ARM/A15SDOptimizer.cpp b/contrib/libs/llvm12/lib/Target/ARM/A15SDOptimizer.cpp
index bb81233cf8..6c6f49ff6d 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/A15SDOptimizer.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/A15SDOptimizer.cpp
@@ -359,7 +359,7 @@ void A15SDOptimizer::elideCopiesAndPHIs(MachineInstr *MI,
SmallVector<MachineInstr *, 8> Front;
Front.push_back(MI);
while (Front.size() != 0) {
- MI = Front.pop_back_val();
+ MI = Front.pop_back_val();
// If we have already explored this MachineInstr, ignore it.
if (Reached.find(MI) != Reached.end())
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARM.h b/contrib/libs/llvm12/lib/Target/ARM/ARM.h
index f4fdc98037..2fbfabe828 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARM.h
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARM.h
@@ -37,7 +37,7 @@ class PassRegistry;
Pass *createMVETailPredicationPass();
FunctionPass *createARMLowOverheadLoopsPass();
-FunctionPass *createARMBlockPlacementPass();
+FunctionPass *createARMBlockPlacementPass();
Pass *createARMParallelDSPPass();
FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM,
CodeGenOpt::Level OptLevel);
@@ -56,8 +56,8 @@ InstructionSelector *
createARMInstructionSelector(const ARMBaseTargetMachine &TM, const ARMSubtarget &STI,
const ARMRegisterBankInfo &RBI);
Pass *createMVEGatherScatterLoweringPass();
-FunctionPass *createARMSLSHardeningPass();
-FunctionPass *createARMIndirectThunks();
+FunctionPass *createARMSLSHardeningPass();
+FunctionPass *createARMIndirectThunks();
void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
ARMAsmPrinter &AP);
@@ -72,10 +72,10 @@ void initializeThumb2ITBlockPass(PassRegistry &);
void initializeMVEVPTBlockPass(PassRegistry &);
void initializeMVEVPTOptimisationsPass(PassRegistry &);
void initializeARMLowOverheadLoopsPass(PassRegistry &);
-void initializeARMBlockPlacementPass(PassRegistry &);
+void initializeARMBlockPlacementPass(PassRegistry &);
void initializeMVETailPredicationPass(PassRegistry &);
void initializeMVEGatherScatterLoweringPass(PassRegistry &);
-void initializeARMSLSHardeningPass(PassRegistry &);
+void initializeARMSLSHardeningPass(PassRegistry &);
} // end namespace llvm
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARM.td b/contrib/libs/llvm12/lib/Target/ARM/ARM.td
index 3d0a0bf7f8..9540784c7f 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARM.td
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARM.td
@@ -535,10 +535,10 @@ def HasV8_6aOps : SubtargetFeature<"v8.6a", "HasV8_6aOps", "true",
[HasV8_5aOps, FeatureBF16,
FeatureMatMulInt8]>;
-def HasV8_7aOps : SubtargetFeature<"v8.7a", "HasV8_7aOps", "true",
- "Support ARM v8.7a instructions",
- [HasV8_6aOps]>;
-
+def HasV8_7aOps : SubtargetFeature<"v8.7a", "HasV8_7aOps", "true",
+ "Support ARM v8.7a instructions",
+ [HasV8_6aOps]>;
+
def HasV8_1MMainlineOps : SubtargetFeature<
"v8.1m.main", "HasV8_1MMainlineOps", "true",
"Support ARM v8-1M Mainline instructions",
@@ -563,20 +563,20 @@ foreach i = {0-7} in
[HasCDEOps]>;
//===----------------------------------------------------------------------===//
-// Control codegen mitigation against Straight Line Speculation vulnerability.
-//===----------------------------------------------------------------------===//
-
-def FeatureHardenSlsRetBr : SubtargetFeature<"harden-sls-retbr",
- "HardenSlsRetBr", "true",
- "Harden against straight line speculation across RETurn and BranchRegister "
- "instructions">;
-def FeatureHardenSlsBlr : SubtargetFeature<"harden-sls-blr",
- "HardenSlsBlr", "true",
- "Harden against straight line speculation across indirect calls">;
-
-
-
-//===----------------------------------------------------------------------===//
+// Control codegen mitigation against Straight Line Speculation vulnerability.
+//===----------------------------------------------------------------------===//
+
+def FeatureHardenSlsRetBr : SubtargetFeature<"harden-sls-retbr",
+ "HardenSlsRetBr", "true",
+ "Harden against straight line speculation across RETurn and BranchRegister "
+ "instructions">;
+def FeatureHardenSlsBlr : SubtargetFeature<"harden-sls-blr",
+ "HardenSlsBlr", "true",
+ "Harden against straight line speculation across indirect calls">;
+
+
+
+//===----------------------------------------------------------------------===//
// ARM Processor subtarget features.
//
@@ -616,14 +616,14 @@ def ProcA77 : SubtargetFeature<"a77", "ARMProcFamily", "CortexA77",
"Cortex-A77 ARM processors", []>;
def ProcA78 : SubtargetFeature<"cortex-a78", "ARMProcFamily", "CortexA78",
"Cortex-A78 ARM processors", []>;
-def ProcA78C : SubtargetFeature<"a78c", "ARMProcFamily", "CortexA78C",
- "Cortex-A78C ARM processors", []>;
+def ProcA78C : SubtargetFeature<"a78c", "ARMProcFamily", "CortexA78C",
+ "Cortex-A78C ARM processors", []>;
def ProcX1 : SubtargetFeature<"cortex-x1", "ARMProcFamily", "CortexX1",
"Cortex-X1 ARM processors", []>;
-def ProcV1 : SubtargetFeature<"neoverse-v1", "ARMProcFamily",
- "NeoverseV1", "Neoverse-V1 ARM processors", []>;
-
+def ProcV1 : SubtargetFeature<"neoverse-v1", "ARMProcFamily",
+ "NeoverseV1", "Neoverse-V1 ARM processors", []>;
+
def ProcKrait : SubtargetFeature<"krait", "ARMProcFamily", "Krait",
"Qualcomm Krait processors", []>;
def ProcKryo : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo",
@@ -662,8 +662,8 @@ def ProcR52 : SubtargetFeature<"r52", "ARMProcFamily", "CortexR52",
def ProcM3 : SubtargetFeature<"m3", "ARMProcFamily", "CortexM3",
"Cortex-M3 ARM processors", []>;
-def ProcM7 : SubtargetFeature<"m7", "ARMProcFamily", "CortexM7",
- "Cortex-M7 ARM processors", []>;
+def ProcM7 : SubtargetFeature<"m7", "ARMProcFamily", "CortexM7",
+ "Cortex-M7 ARM processors", []>;
//===----------------------------------------------------------------------===//
// ARM Helper classes.
@@ -852,19 +852,19 @@ def ARMv86a : Architecture<"armv8.6-a", "ARMv86a", [HasV8_6aOps,
FeatureCRC,
FeatureRAS,
FeatureDotProd]>;
-def ARMv87a : Architecture<"armv8.7-a", "ARMv86a", [HasV8_7aOps,
- FeatureAClass,
- FeatureDB,
- FeatureFPARMv8,
- FeatureNEON,
- FeatureDSP,
- FeatureTrustZone,
- FeatureMP,
- FeatureVirtualization,
- FeatureCrypto,
- FeatureCRC,
- FeatureRAS,
- FeatureDotProd]>;
+def ARMv87a : Architecture<"armv8.7-a", "ARMv86a", [HasV8_7aOps,
+ FeatureAClass,
+ FeatureDB,
+ FeatureFPARMv8,
+ FeatureNEON,
+ FeatureDSP,
+ FeatureTrustZone,
+ FeatureMP,
+ FeatureVirtualization,
+ FeatureCrypto,
+ FeatureCRC,
+ FeatureRAS,
+ FeatureDotProd]>;
def ARMv8r : Architecture<"armv8-r", "ARMv8r", [HasV8Ops,
FeatureRClass,
@@ -919,14 +919,14 @@ def ARMv6j : Architecture<"armv6j", "ARMv7a", [ARMv6]>;
def ARMv7k : Architecture<"armv7k", "ARMv7a", [ARMv7a]>;
def ARMv7s : Architecture<"armv7s", "ARMv7a", [ARMv7a]>;
-//===----------------------------------------------------------------------===//
-// Register File Description
-//===----------------------------------------------------------------------===//
-
-include "ARMRegisterInfo.td"
-include "ARMRegisterBanks.td"
-include "ARMCallingConv.td"
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+include "ARMRegisterInfo.td"
+include "ARMRegisterBanks.td"
+include "ARMCallingConv.td"
+
//===----------------------------------------------------------------------===//
// ARM schedules.
//===----------------------------------------------------------------------===//
@@ -935,25 +935,25 @@ include "ARMPredicates.td"
include "ARMSchedule.td"
//===----------------------------------------------------------------------===//
-// Instruction Descriptions
-//===----------------------------------------------------------------------===//
-
-include "ARMInstrInfo.td"
-def ARMInstrInfo : InstrInfo;
-
-//===----------------------------------------------------------------------===//
-// ARM schedules
-//
-include "ARMScheduleV6.td"
-include "ARMScheduleA8.td"
-include "ARMScheduleA9.td"
-include "ARMScheduleSwift.td"
-include "ARMScheduleR52.td"
-include "ARMScheduleA57.td"
-include "ARMScheduleM4.td"
-include "ARMScheduleM7.td"
-
-//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "ARMInstrInfo.td"
+def ARMInstrInfo : InstrInfo;
+
+//===----------------------------------------------------------------------===//
+// ARM schedules
+//
+include "ARMScheduleV6.td"
+include "ARMScheduleA8.td"
+include "ARMScheduleA9.td"
+include "ARMScheduleSwift.td"
+include "ARMScheduleR52.td"
+include "ARMScheduleA57.td"
+include "ARMScheduleM4.td"
+include "ARMScheduleM7.td"
+
+//===----------------------------------------------------------------------===//
// ARM processors
//
// Dummy CPU, used to target architectures
@@ -1193,10 +1193,10 @@ def : ProcessorModel<"cortex-m4", CortexM4Model, [ARMv7em,
FeatureUseMISched,
FeatureHasNoBranchPredictor]>;
-def : ProcessorModel<"cortex-m7", CortexM7Model, [ARMv7em,
- ProcM7,
- FeatureFPARMv8_D16,
- FeatureUseMISched]>;
+def : ProcessorModel<"cortex-m7", CortexM7Model, [ARMv7em,
+ ProcM7,
+ FeatureFPARMv8_D16,
+ FeatureUseMISched]>;
def : ProcNoItin<"cortex-m23", [ARMv8mBaseline,
FeatureNoMovt]>;
@@ -1310,14 +1310,14 @@ def : ProcNoItin<"cortex-a78", [ARMv82a, ProcA78,
FeatureFullFP16,
FeatureDotProd]>;
-def : ProcNoItin<"cortex-a78c", [ARMv82a, ProcA78C,
- FeatureHWDivThumb,
- FeatureHWDivARM,
- FeatureCrypto,
- FeatureCRC,
- FeatureDotProd,
- FeatureFullFP16]>;
-
+def : ProcNoItin<"cortex-a78c", [ARMv82a, ProcA78C,
+ FeatureHWDivThumb,
+ FeatureHWDivARM,
+ FeatureCrypto,
+ FeatureCRC,
+ FeatureDotProd,
+ FeatureFullFP16]>;
+
def : ProcNoItin<"cortex-x1", [ARMv82a, ProcX1,
FeatureHWDivThumb,
FeatureHWDivARM,
@@ -1326,15 +1326,15 @@ def : ProcNoItin<"cortex-x1", [ARMv82a, ProcX1,
FeatureFullFP16,
FeatureDotProd]>;
-def : ProcNoItin<"neoverse-v1", [ARMv84a,
- FeatureHWDivThumb,
- FeatureHWDivARM,
- FeatureCrypto,
- FeatureCRC,
- FeatureFullFP16,
- FeatureBF16,
- FeatureMatMulInt8]>;
-
+def : ProcNoItin<"neoverse-v1", [ARMv84a,
+ FeatureHWDivThumb,
+ FeatureHWDivARM,
+ FeatureCrypto,
+ FeatureCRC,
+ FeatureFullFP16,
+ FeatureBF16,
+ FeatureMatMulInt8]>;
+
def : ProcNoItin<"neoverse-n1", [ARMv82a,
FeatureHWDivThumb,
FeatureHWDivARM,
@@ -1342,11 +1342,11 @@ def : ProcNoItin<"neoverse-n1", [ARMv82a,
FeatureCRC,
FeatureDotProd]>;
-def : ProcNoItin<"neoverse-n2", [ARMv85a,
- FeatureBF16,
- FeatureMatMulInt8,
- FeaturePerfMon]>;
-
+def : ProcNoItin<"neoverse-n2", [ARMv85a,
+ FeatureBF16,
+ FeatureMatMulInt8,
+ FeaturePerfMon]>;
+
def : ProcessorModel<"cyclone", SwiftModel, [ARMv8a, ProcSwift,
FeatureHasRetAddrStack,
FeatureNEONForFP,
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMAsmPrinter.cpp b/contrib/libs/llvm12/lib/Target/ARM/ARMAsmPrinter.cpp
index 04e21867d5..31059e5910 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -285,7 +285,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
return false;
case 'y': // Print a VFP single precision register as indexed double.
if (MI->getOperand(OpNum).isReg()) {
- MCRegister Reg = MI->getOperand(OpNum).getReg().asMCReg();
+ MCRegister Reg = MI->getOperand(OpNum).getReg().asMCReg();
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
// Find the 'd' register that has this 's' register as a sub-register,
// and determine the lane number.
@@ -903,7 +903,7 @@ void ARMAsmPrinter::emitMachineConstantPoolValue(
MCSymbol *MCSym;
if (ACPV->isLSDA()) {
- MCSym = getMBBExceptionSym(MF->front());
+ MCSym = getMBBExceptionSym(MF->front());
} else if (ACPV->isBlockAddress()) {
const BlockAddress *BA =
cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress();
@@ -1897,7 +1897,7 @@ void ARMAsmPrinter::emitInstruction(const MachineInstr *MI) {
// LSJLJEH:
Register SrcReg = MI->getOperand(0).getReg();
Register ValReg = MI->getOperand(1).getReg();
- MCSymbol *Label = OutContext.createTempSymbol("SJLJEH");
+ MCSymbol *Label = OutContext.createTempSymbol("SJLJEH");
OutStreamer->AddComment("eh_setjmp begin");
EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tMOVr)
.addReg(ValReg)
@@ -2180,49 +2180,49 @@ void ARMAsmPrinter::emitInstruction(const MachineInstr *MI) {
case ARM::PATCHABLE_TAIL_CALL:
LowerPATCHABLE_TAIL_CALL(*MI);
return;
- case ARM::SpeculationBarrierISBDSBEndBB: {
- // Print DSB SYS + ISB
- MCInst TmpInstDSB;
- TmpInstDSB.setOpcode(ARM::DSB);
- TmpInstDSB.addOperand(MCOperand::createImm(0xf));
- EmitToStreamer(*OutStreamer, TmpInstDSB);
- MCInst TmpInstISB;
- TmpInstISB.setOpcode(ARM::ISB);
- TmpInstISB.addOperand(MCOperand::createImm(0xf));
- EmitToStreamer(*OutStreamer, TmpInstISB);
- return;
- }
- case ARM::t2SpeculationBarrierISBDSBEndBB: {
- // Print DSB SYS + ISB
- MCInst TmpInstDSB;
- TmpInstDSB.setOpcode(ARM::t2DSB);
- TmpInstDSB.addOperand(MCOperand::createImm(0xf));
- TmpInstDSB.addOperand(MCOperand::createImm(ARMCC::AL));
- TmpInstDSB.addOperand(MCOperand::createReg(0));
- EmitToStreamer(*OutStreamer, TmpInstDSB);
- MCInst TmpInstISB;
- TmpInstISB.setOpcode(ARM::t2ISB);
- TmpInstISB.addOperand(MCOperand::createImm(0xf));
- TmpInstISB.addOperand(MCOperand::createImm(ARMCC::AL));
- TmpInstISB.addOperand(MCOperand::createReg(0));
- EmitToStreamer(*OutStreamer, TmpInstISB);
- return;
- }
- case ARM::SpeculationBarrierSBEndBB: {
- // Print SB
- MCInst TmpInstSB;
- TmpInstSB.setOpcode(ARM::SB);
- EmitToStreamer(*OutStreamer, TmpInstSB);
- return;
- }
- case ARM::t2SpeculationBarrierSBEndBB: {
- // Print SB
- MCInst TmpInstSB;
- TmpInstSB.setOpcode(ARM::t2SB);
- EmitToStreamer(*OutStreamer, TmpInstSB);
- return;
- }
- }
+ case ARM::SpeculationBarrierISBDSBEndBB: {
+ // Print DSB SYS + ISB
+ MCInst TmpInstDSB;
+ TmpInstDSB.setOpcode(ARM::DSB);
+ TmpInstDSB.addOperand(MCOperand::createImm(0xf));
+ EmitToStreamer(*OutStreamer, TmpInstDSB);
+ MCInst TmpInstISB;
+ TmpInstISB.setOpcode(ARM::ISB);
+ TmpInstISB.addOperand(MCOperand::createImm(0xf));
+ EmitToStreamer(*OutStreamer, TmpInstISB);
+ return;
+ }
+ case ARM::t2SpeculationBarrierISBDSBEndBB: {
+ // Print DSB SYS + ISB
+ MCInst TmpInstDSB;
+ TmpInstDSB.setOpcode(ARM::t2DSB);
+ TmpInstDSB.addOperand(MCOperand::createImm(0xf));
+ TmpInstDSB.addOperand(MCOperand::createImm(ARMCC::AL));
+ TmpInstDSB.addOperand(MCOperand::createReg(0));
+ EmitToStreamer(*OutStreamer, TmpInstDSB);
+ MCInst TmpInstISB;
+ TmpInstISB.setOpcode(ARM::t2ISB);
+ TmpInstISB.addOperand(MCOperand::createImm(0xf));
+ TmpInstISB.addOperand(MCOperand::createImm(ARMCC::AL));
+ TmpInstISB.addOperand(MCOperand::createReg(0));
+ EmitToStreamer(*OutStreamer, TmpInstISB);
+ return;
+ }
+ case ARM::SpeculationBarrierSBEndBB: {
+ // Print SB
+ MCInst TmpInstSB;
+ TmpInstSB.setOpcode(ARM::SB);
+ EmitToStreamer(*OutStreamer, TmpInstSB);
+ return;
+ }
+ case ARM::t2SpeculationBarrierSBEndBB: {
+ // Print SB
+ MCInst TmpInstSB;
+ TmpInstSB.setOpcode(ARM::t2SB);
+ EmitToStreamer(*OutStreamer, TmpInstSB);
+ return;
+ }
+ }
MCInst TmpInst;
LowerARMMachineInstrToMCInst(MI, TmpInst, *this);
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMBaseInstrInfo.cpp b/contrib/libs/llvm12/lib/Target/ARM/ARMBaseInstrInfo.cpp
index e418d53b56..d3047e1ae7 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -19,7 +19,7 @@
#include "ARMSubtarget.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "MCTargetDesc/ARMBaseInfo.h"
-#include "MVETailPredUtils.h"
+#include "MVETailPredUtils.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
@@ -36,8 +36,8 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MachineScheduler.h"
-#include "llvm/CodeGen/MultiHazardRecognizer.h"
+#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/CodeGen/MultiHazardRecognizer.h"
#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
@@ -134,43 +134,43 @@ ARMBaseInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
return TargetInstrInfo::CreateTargetHazardRecognizer(STI, DAG);
}
-// Called during:
-// - pre-RA scheduling
-// - post-RA scheduling when FeatureUseMISched is set
-ScheduleHazardRecognizer *ARMBaseInstrInfo::CreateTargetMIHazardRecognizer(
- const InstrItineraryData *II, const ScheduleDAGMI *DAG) const {
- MultiHazardRecognizer *MHR = new MultiHazardRecognizer();
-
- // We would like to restrict this hazard recognizer to only
- // post-RA scheduling; we can tell that we're post-RA because we don't
- // track VRegLiveness.
- // Cortex-M7: TRM indicates that there is a single ITCM bank and two DTCM
- // banks banked on bit 2. Assume that TCMs are in use.
- if (Subtarget.isCortexM7() && !DAG->hasVRegLiveness())
- MHR->AddHazardRecognizer(
- std::make_unique<ARMBankConflictHazardRecognizer>(DAG, 0x4, true));
-
- // Not inserting ARMHazardRecognizerFPMLx because that would change
- // legacy behavior
-
- auto BHR = TargetInstrInfo::CreateTargetMIHazardRecognizer(II, DAG);
- MHR->AddHazardRecognizer(std::unique_ptr<ScheduleHazardRecognizer>(BHR));
- return MHR;
-}
-
-// Called during post-RA scheduling when FeatureUseMISched is not set
+// Called during:
+// - pre-RA scheduling
+// - post-RA scheduling when FeatureUseMISched is set
+ScheduleHazardRecognizer *ARMBaseInstrInfo::CreateTargetMIHazardRecognizer(
+ const InstrItineraryData *II, const ScheduleDAGMI *DAG) const {
+ MultiHazardRecognizer *MHR = new MultiHazardRecognizer();
+
+ // We would like to restrict this hazard recognizer to only
+ // post-RA scheduling; we can tell that we're post-RA because we don't
+ // track VRegLiveness.
+ // Cortex-M7: TRM indicates that there is a single ITCM bank and two DTCM
+ // banks banked on bit 2. Assume that TCMs are in use.
+ if (Subtarget.isCortexM7() && !DAG->hasVRegLiveness())
+ MHR->AddHazardRecognizer(
+ std::make_unique<ARMBankConflictHazardRecognizer>(DAG, 0x4, true));
+
+ // Not inserting ARMHazardRecognizerFPMLx because that would change
+ // legacy behavior
+
+ auto BHR = TargetInstrInfo::CreateTargetMIHazardRecognizer(II, DAG);
+ MHR->AddHazardRecognizer(std::unique_ptr<ScheduleHazardRecognizer>(BHR));
+ return MHR;
+}
+
+// Called during post-RA scheduling when FeatureUseMISched is not set
ScheduleHazardRecognizer *ARMBaseInstrInfo::
CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
const ScheduleDAG *DAG) const {
- MultiHazardRecognizer *MHR = new MultiHazardRecognizer();
-
+ MultiHazardRecognizer *MHR = new MultiHazardRecognizer();
+
if (Subtarget.isThumb2() || Subtarget.hasVFP2Base())
- MHR->AddHazardRecognizer(std::make_unique<ARMHazardRecognizerFPMLx>());
-
- auto BHR = TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG);
- if (BHR)
- MHR->AddHazardRecognizer(std::unique_ptr<ScheduleHazardRecognizer>(BHR));
- return MHR;
+ MHR->AddHazardRecognizer(std::make_unique<ARMHazardRecognizerFPMLx>());
+
+ auto BHR = TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG);
+ if (BHR)
+ MHR->AddHazardRecognizer(std::unique_ptr<ScheduleHazardRecognizer>(BHR));
+ return MHR;
}
MachineInstr *ARMBaseInstrInfo::convertToThreeAddress(
@@ -351,8 +351,8 @@ bool ARMBaseInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
TBB = nullptr;
FBB = nullptr;
- MachineBasicBlock::instr_iterator I = MBB.instr_end();
- if (I == MBB.instr_begin())
+ MachineBasicBlock::instr_iterator I = MBB.instr_end();
+ if (I == MBB.instr_begin())
return false; // Empty blocks are easy.
--I;
@@ -364,12 +364,12 @@ bool ARMBaseInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
// out.
bool CantAnalyze = false;
- // Skip over DEBUG values, predicated nonterminators and speculation
- // barrier terminators.
- while (I->isDebugInstr() || !I->isTerminator() ||
- isSpeculationBarrierEndBBOpcode(I->getOpcode()) ||
- I->getOpcode() == ARM::t2DoLoopStartTP){
- if (I == MBB.instr_begin())
+ // Skip over DEBUG values, predicated nonterminators and speculation
+ // barrier terminators.
+ while (I->isDebugInstr() || !I->isTerminator() ||
+ isSpeculationBarrierEndBBOpcode(I->getOpcode()) ||
+ I->getOpcode() == ARM::t2DoLoopStartTP){
+ if (I == MBB.instr_begin())
return false;
--I;
}
@@ -393,7 +393,7 @@ bool ARMBaseInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
Cond.push_back(I->getOperand(2));
} else if (I->isReturn()) {
// Returns can't be analyzed, but we should run cleanup.
- CantAnalyze = true;
+ CantAnalyze = true;
} else {
// We encountered other unrecognized terminator. Bail out immediately.
return true;
@@ -414,30 +414,30 @@ bool ARMBaseInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
// unconditional branch.
if (AllowModify) {
MachineBasicBlock::iterator DI = std::next(I);
- while (DI != MBB.instr_end()) {
+ while (DI != MBB.instr_end()) {
MachineInstr &InstToDelete = *DI;
++DI;
- // Speculation barriers must not be deleted.
- if (isSpeculationBarrierEndBBOpcode(InstToDelete.getOpcode()))
- continue;
+ // Speculation barriers must not be deleted.
+ if (isSpeculationBarrierEndBBOpcode(InstToDelete.getOpcode()))
+ continue;
InstToDelete.eraseFromParent();
}
}
}
- if (CantAnalyze) {
- // We may not be able to analyze the block, but we could still have
- // an unconditional branch as the last instruction in the block, which
- // just branches to layout successor. If this is the case, then just
- // remove it if we're allowed to make modifications.
- if (AllowModify && !isPredicated(MBB.back()) &&
- isUncondBranchOpcode(MBB.back().getOpcode()) &&
- TBB && MBB.isLayoutSuccessor(TBB))
- removeBranch(MBB);
+ if (CantAnalyze) {
+ // We may not be able to analyze the block, but we could still have
+ // an unconditional branch as the last instruction in the block, which
+ // just branches to layout successor. If this is the case, then just
+ // remove it if we're allowed to make modifications.
+ if (AllowModify && !isPredicated(MBB.back()) &&
+ isUncondBranchOpcode(MBB.back().getOpcode()) &&
+ TBB && MBB.isLayoutSuccessor(TBB))
+ removeBranch(MBB);
return true;
- }
+ }
- if (I == MBB.instr_begin())
+ if (I == MBB.instr_begin())
return false;
--I;
@@ -586,18 +586,18 @@ bool ARMBaseInstrInfo::PredicateInstruction(
MachineOperand &PMO = MI.getOperand(PIdx);
PMO.setImm(Pred[0].getImm());
MI.getOperand(PIdx+1).setReg(Pred[1].getReg());
-
- // Thumb 1 arithmetic instructions do not set CPSR when executed inside an
- // IT block. This affects how they are printed.
- const MCInstrDesc &MCID = MI.getDesc();
- if (MCID.TSFlags & ARMII::ThumbArithFlagSetting) {
- assert(MCID.OpInfo[1].isOptionalDef() && "CPSR def isn't expected operand");
- assert((MI.getOperand(1).isDead() ||
- MI.getOperand(1).getReg() != ARM::CPSR) &&
- "if conversion tried to stop defining used CPSR");
- MI.getOperand(1).setReg(ARM::NoRegister);
- }
-
+
+ // Thumb 1 arithmetic instructions do not set CPSR when executed inside an
+ // IT block. This affects how they are printed.
+ const MCInstrDesc &MCID = MI.getDesc();
+ if (MCID.TSFlags & ARMII::ThumbArithFlagSetting) {
+ assert(MCID.OpInfo[1].isOptionalDef() && "CPSR def isn't expected operand");
+ assert((MI.getOperand(1).isDead() ||
+ MI.getOperand(1).getReg() != ARM::CPSR) &&
+ "if conversion tried to stop defining used CPSR");
+ MI.getOperand(1).setReg(ARM::NoRegister);
+ }
+
return true;
}
return false;
@@ -629,23 +629,23 @@ bool ARMBaseInstrInfo::SubsumesPredicate(ArrayRef<MachineOperand> Pred1,
}
}
-bool ARMBaseInstrInfo::ClobbersPredicate(MachineInstr &MI,
- std::vector<MachineOperand> &Pred,
- bool SkipDead) const {
+bool ARMBaseInstrInfo::ClobbersPredicate(MachineInstr &MI,
+ std::vector<MachineOperand> &Pred,
+ bool SkipDead) const {
bool Found = false;
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI.getOperand(i);
- bool ClobbersCPSR = MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR);
- bool IsCPSR = MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR;
- if (ClobbersCPSR || IsCPSR) {
-
- // Filter out T1 instructions that have a dead CPSR,
- // allowing IT blocks to be generated containing T1 instructions
- const MCInstrDesc &MCID = MI.getDesc();
- if (MCID.TSFlags & ARMII::ThumbArithFlagSetting && MO.isDead() &&
- SkipDead)
- continue;
-
+ bool ClobbersCPSR = MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR);
+ bool IsCPSR = MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR;
+ if (ClobbersCPSR || IsCPSR) {
+
+ // Filter out T1 instructions that have a dead CPSR,
+ // allowing IT blocks to be generated containing T1 instructions
+ const MCInstrDesc &MCID = MI.getDesc();
+ if (MCID.TSFlags & ARMII::ThumbArithFlagSetting && MO.isDead() &&
+ SkipDead)
+ continue;
+
Pred.push_back(MO);
Found = true;
}
@@ -703,23 +703,23 @@ bool ARMBaseInstrInfo::isPredicable(const MachineInstr &MI) const {
if (!isEligibleForITBlock(&MI))
return false;
- const MachineFunction *MF = MI.getParent()->getParent();
+ const MachineFunction *MF = MI.getParent()->getParent();
const ARMFunctionInfo *AFI =
- MF->getInfo<ARMFunctionInfo>();
+ MF->getInfo<ARMFunctionInfo>();
// Neon instructions in Thumb2 IT blocks are deprecated, see ARMARM.
// In their ARM encoding, they can't be encoded in a conditional form.
if ((MI.getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON)
return false;
- // Make indirect control flow changes unpredicable when SLS mitigation is
- // enabled.
- const ARMSubtarget &ST = MF->getSubtarget<ARMSubtarget>();
- if (ST.hardenSlsRetBr() && isIndirectControlFlowNotComingBack(MI))
- return false;
- if (ST.hardenSlsBlr() && isIndirectCall(MI))
- return false;
-
+ // Make indirect control flow changes unpredicable when SLS mitigation is
+ // enabled.
+ const ARMSubtarget &ST = MF->getSubtarget<ARMSubtarget>();
+ if (ST.hardenSlsRetBr() && isIndirectControlFlowNotComingBack(MI))
+ return false;
+ if (ST.hardenSlsBlr() && isIndirectCall(MI))
+ return false;
+
if (AFI->isThumb2Function()) {
if (getSubtarget().restrictIT())
return isV8EligibleForIT(&MI);
@@ -802,14 +802,14 @@ unsigned ARMBaseInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
Size = alignTo(Size, 4);
return Size;
}
- case ARM::SpeculationBarrierISBDSBEndBB:
- case ARM::t2SpeculationBarrierISBDSBEndBB:
- // This gets lowered to 2 4-byte instructions.
- return 8;
- case ARM::SpeculationBarrierSBEndBB:
- case ARM::t2SpeculationBarrierSBEndBB:
- // This gets lowered to 1 4-byte instructions.
- return 4;
+ case ARM::SpeculationBarrierISBDSBEndBB:
+ case ARM::t2SpeculationBarrierISBDSBEndBB:
+ // This gets lowered to 2 4-byte instructions.
+ return 8;
+ case ARM::SpeculationBarrierSBEndBB:
+ case ARM::t2SpeculationBarrierSBEndBB:
+ // This gets lowered to 1 4-byte instructions.
+ return 4;
}
}
@@ -2175,12 +2175,12 @@ ARMBaseInstrInfo::extraSizeToPredicateInstructions(const MachineFunction &MF,
// Thumb2 needs a 2-byte IT instruction to predicate up to 4 instructions.
// ARM has a condition code field in every predicable instruction, using it
// doesn't change code size.
- if (!Subtarget.isThumb2())
- return 0;
-
- // It's possible that the size of the IT is restricted to a single block.
- unsigned MaxInsts = Subtarget.restrictIT() ? 1 : 4;
- return divideCeil(NumInsts, MaxInsts) * 2;
+ if (!Subtarget.isThumb2())
+ return 0;
+
+ // It's possible that the size of the IT is restricted to a single block.
+ unsigned MaxInsts = Subtarget.restrictIT() ? 1 : 4;
+ return divideCeil(NumInsts, MaxInsts) * 2;
}
unsigned
@@ -3417,7 +3417,7 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
case ARM::t2SUBspImm:
case ARM::t2ADDri:
case ARM::t2SUBri:
- MRI->constrainRegClass(UseMI.getOperand(0).getReg(), TRC);
+ MRI->constrainRegClass(UseMI.getOperand(0).getReg(), TRC);
}
return true;
}
@@ -4838,14 +4838,14 @@ bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr &MI,
}
}
}
- if (MI.getOpcode() == ARM::MVE_VMOV_q_rr) {
- assert(MI.getOperand(4).isImm() && MI.getOperand(5).isImm());
- if ((MI.getOperand(4).getImm() != 2 && MI.getOperand(4).getImm() != 3) ||
- MI.getOperand(4).getImm() != MI.getOperand(5).getImm() + 2) {
- ErrInfo = "Incorrect array index for MVE_VMOV_q_rr";
- return false;
- }
- }
+ if (MI.getOpcode() == ARM::MVE_VMOV_q_rr) {
+ assert(MI.getOperand(4).isImm() && MI.getOperand(5).isImm());
+ if ((MI.getOperand(4).getImm() != 2 && MI.getOperand(4).getImm() != 3) ||
+ MI.getOperand(4).getImm() != MI.getOperand(5).getImm() + 2) {
+ ErrInfo = "Incorrect array index for MVE_VMOV_q_rr";
+ return false;
+ }
+ }
return true;
}
@@ -5531,8 +5531,8 @@ unsigned llvm::ConstantMaterializationCost(unsigned Val,
return ForCodesize ? 4 : 1;
if (ARM_AM::isSOImmTwoPartVal(Val)) // two instrs
return ForCodesize ? 8 : 2;
- if (ARM_AM::isSOImmTwoPartValNeg(Val)) // two instrs
- return ForCodesize ? 8 : 2;
+ if (ARM_AM::isSOImmTwoPartValNeg(Val)) // two instrs
+ return ForCodesize ? 8 : 2;
}
if (Subtarget->useMovt()) // MOVW + MOVT
return ForCodesize ? 8 : 2;
@@ -5637,32 +5637,32 @@ bool llvm::HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2,
/// | Frame overhead in Bytes | 2 | 4 |
/// | Stack fixup required | No | No |
/// +-------------------------+--------+-----+
-///
-/// \p MachineOutlinerDefault implies that the function should be called with
-/// a save and restore of LR to the stack.
-///
-/// That is,
-///
-/// I1 Save LR OUTLINED_FUNCTION:
-/// I2 --> BL OUTLINED_FUNCTION I1
-/// I3 Restore LR I2
-/// I3
-/// BX LR
-///
-/// +-------------------------+--------+-----+
-/// | | Thumb2 | ARM |
-/// +-------------------------+--------+-----+
-/// | Call overhead in Bytes | 8 | 12 |
-/// | Frame overhead in Bytes | 2 | 4 |
-/// | Stack fixup required | Yes | Yes |
-/// +-------------------------+--------+-----+
+///
+/// \p MachineOutlinerDefault implies that the function should be called with
+/// a save and restore of LR to the stack.
+///
+/// That is,
+///
+/// I1 Save LR OUTLINED_FUNCTION:
+/// I2 --> BL OUTLINED_FUNCTION I1
+/// I3 Restore LR I2
+/// I3
+/// BX LR
+///
+/// +-------------------------+--------+-----+
+/// | | Thumb2 | ARM |
+/// +-------------------------+--------+-----+
+/// | Call overhead in Bytes | 8 | 12 |
+/// | Frame overhead in Bytes | 2 | 4 |
+/// | Stack fixup required | Yes | Yes |
+/// +-------------------------+--------+-----+
enum MachineOutlinerClass {
MachineOutlinerTailCall,
MachineOutlinerThunk,
MachineOutlinerNoLRSave,
- MachineOutlinerRegSave,
- MachineOutlinerDefault
+ MachineOutlinerRegSave,
+ MachineOutlinerDefault
};
enum MachineOutlinerMBBFlags {
@@ -5680,9 +5680,9 @@ struct OutlinerCosts {
const int FrameNoLRSave;
const int CallRegSave;
const int FrameRegSave;
- const int CallDefault;
- const int FrameDefault;
- const int SaveRestoreLROnStack;
+ const int CallDefault;
+ const int FrameDefault;
+ const int SaveRestoreLROnStack;
OutlinerCosts(const ARMSubtarget &target)
: CallTailCall(target.isThumb() ? 4 : 4),
@@ -5692,10 +5692,10 @@ struct OutlinerCosts {
CallNoLRSave(target.isThumb() ? 4 : 4),
FrameNoLRSave(target.isThumb() ? 4 : 4),
CallRegSave(target.isThumb() ? 8 : 12),
- FrameRegSave(target.isThumb() ? 2 : 4),
- CallDefault(target.isThumb() ? 8 : 12),
- FrameDefault(target.isThumb() ? 2 : 4),
- SaveRestoreLROnStack(target.isThumb() ? 8 : 8) {}
+ FrameRegSave(target.isThumb() ? 2 : 4),
+ CallDefault(target.isThumb() ? 8 : 12),
+ FrameDefault(target.isThumb() ? 2 : 4),
+ SaveRestoreLROnStack(target.isThumb() ? 8 : 8) {}
};
unsigned
@@ -5720,37 +5720,37 @@ ARMBaseInstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const {
return 0u;
}
-// Compute liveness of LR at the point after the interval [I, E), which
-// denotes a *backward* iteration through instructions. Used only for return
-// basic blocks, which do not end with a tail call.
-static bool isLRAvailable(const TargetRegisterInfo &TRI,
- MachineBasicBlock::reverse_iterator I,
- MachineBasicBlock::reverse_iterator E) {
- // At the end of the function LR dead.
- bool Live = false;
- for (; I != E; ++I) {
- const MachineInstr &MI = *I;
-
- // Check defs of LR.
- if (MI.modifiesRegister(ARM::LR, &TRI))
- Live = false;
-
- // Check uses of LR.
- unsigned Opcode = MI.getOpcode();
- if (Opcode == ARM::BX_RET || Opcode == ARM::MOVPCLR ||
- Opcode == ARM::SUBS_PC_LR || Opcode == ARM::tBX_RET ||
- Opcode == ARM::tBXNS_RET) {
- // These instructions use LR, but it's not an (explicit or implicit)
- // operand.
- Live = true;
- continue;
- }
- if (MI.readsRegister(ARM::LR, &TRI))
- Live = true;
- }
- return !Live;
-}
-
+// Compute liveness of LR at the point after the interval [I, E), which
+// denotes a *backward* iteration through instructions. Used only for return
+// basic blocks, which do not end with a tail call.
+static bool isLRAvailable(const TargetRegisterInfo &TRI,
+ MachineBasicBlock::reverse_iterator I,
+ MachineBasicBlock::reverse_iterator E) {
+ // At the end of the function LR dead.
+ bool Live = false;
+ for (; I != E; ++I) {
+ const MachineInstr &MI = *I;
+
+ // Check defs of LR.
+ if (MI.modifiesRegister(ARM::LR, &TRI))
+ Live = false;
+
+ // Check uses of LR.
+ unsigned Opcode = MI.getOpcode();
+ if (Opcode == ARM::BX_RET || Opcode == ARM::MOVPCLR ||
+ Opcode == ARM::SUBS_PC_LR || Opcode == ARM::tBX_RET ||
+ Opcode == ARM::tBXNS_RET) {
+ // These instructions use LR, but it's not an (explicit or implicit)
+ // operand.
+ Live = true;
+ continue;
+ }
+ if (MI.readsRegister(ARM::LR, &TRI))
+ Live = true;
+ }
+ return !Live;
+}
+
outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo(
std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
@@ -5796,7 +5796,7 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo(
// Erase every candidate that violates the restrictions above. (It could be
// true that we have viable candidates, so it's not worth bailing out in
// the case that, say, 1 out of 20 candidates violate the restructions.)
- llvm::erase_if(RepeatedSequenceLocs, CantGuaranteeValueAcrossCall);
+ llvm::erase_if(RepeatedSequenceLocs, CantGuaranteeValueAcrossCall);
// If the sequence doesn't have enough candidates left, then we're done.
if (RepeatedSequenceLocs.size() < 2)
@@ -5816,8 +5816,8 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo(
};
OutlinerCosts Costs(Subtarget);
- unsigned FrameID = MachineOutlinerDefault;
- unsigned NumBytesToCreateFrame = Costs.FrameDefault;
+ unsigned FrameID = MachineOutlinerDefault;
+ unsigned NumBytesToCreateFrame = Costs.FrameDefault;
// If the last instruction in any candidate is a terminator, then we should
// tail call all of the candidates.
@@ -5826,31 +5826,31 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo(
NumBytesToCreateFrame = Costs.FrameTailCall;
SetCandidateCallInfo(MachineOutlinerTailCall, Costs.CallTailCall);
} else if (LastInstrOpcode == ARM::BL || LastInstrOpcode == ARM::BLX ||
- LastInstrOpcode == ARM::BLX_noip || LastInstrOpcode == ARM::tBL ||
- LastInstrOpcode == ARM::tBLXr ||
- LastInstrOpcode == ARM::tBLXr_noip ||
+ LastInstrOpcode == ARM::BLX_noip || LastInstrOpcode == ARM::tBL ||
+ LastInstrOpcode == ARM::tBLXr ||
+ LastInstrOpcode == ARM::tBLXr_noip ||
LastInstrOpcode == ARM::tBLXi) {
FrameID = MachineOutlinerThunk;
NumBytesToCreateFrame = Costs.FrameThunk;
SetCandidateCallInfo(MachineOutlinerThunk, Costs.CallThunk);
} else {
// We need to decide how to emit calls + frames. We can always emit the same
- // frame if we don't need to save to the stack. If we have to save to the
- // stack, then we need a different frame.
+ // frame if we don't need to save to the stack. If we have to save to the
+ // stack, then we need a different frame.
unsigned NumBytesNoStackCalls = 0;
std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
for (outliner::Candidate &C : RepeatedSequenceLocs) {
C.initLRU(TRI);
- // LR liveness is overestimated in return blocks, unless they end with a
- // tail call.
- const auto Last = C.getMBB()->rbegin();
- const bool LRIsAvailable =
- C.getMBB()->isReturnBlock() && !Last->isCall()
- ? isLRAvailable(TRI, Last,
- (MachineBasicBlock::reverse_iterator)C.front())
- : C.LRU.available(ARM::LR);
- if (LRIsAvailable) {
+ // LR liveness is overestimated in return blocks, unless they end with a
+ // tail call.
+ const auto Last = C.getMBB()->rbegin();
+ const bool LRIsAvailable =
+ C.getMBB()->isReturnBlock() && !Last->isCall()
+ ? isLRAvailable(TRI, Last,
+ (MachineBasicBlock::reverse_iterator)C.front())
+ : C.LRU.available(ARM::LR);
+ if (LRIsAvailable) {
FrameID = MachineOutlinerNoLRSave;
NumBytesNoStackCalls += Costs.CallNoLRSave;
C.setCallInfo(MachineOutlinerNoLRSave, Costs.CallNoLRSave);
@@ -5865,157 +5865,157 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo(
C.setCallInfo(MachineOutlinerRegSave, Costs.CallRegSave);
CandidatesWithoutStackFixups.push_back(C);
}
-
- // Is SP used in the sequence at all? If not, we don't have to modify
- // the stack, so we are guaranteed to get the same frame.
- else if (C.UsedInSequence.available(ARM::SP)) {
- NumBytesNoStackCalls += Costs.CallDefault;
- C.setCallInfo(MachineOutlinerDefault, Costs.CallDefault);
- CandidatesWithoutStackFixups.push_back(C);
- }
-
- // If we outline this, we need to modify the stack. Pretend we don't
- // outline this by saving all of its bytes.
- else
- NumBytesNoStackCalls += SequenceSize;
+
+ // Is SP used in the sequence at all? If not, we don't have to modify
+ // the stack, so we are guaranteed to get the same frame.
+ else if (C.UsedInSequence.available(ARM::SP)) {
+ NumBytesNoStackCalls += Costs.CallDefault;
+ C.setCallInfo(MachineOutlinerDefault, Costs.CallDefault);
+ CandidatesWithoutStackFixups.push_back(C);
+ }
+
+ // If we outline this, we need to modify the stack. Pretend we don't
+ // outline this by saving all of its bytes.
+ else
+ NumBytesNoStackCalls += SequenceSize;
}
- // If there are no places where we have to save LR, then note that we don't
- // have to update the stack. Otherwise, give every candidate the default
- // call type
- if (NumBytesNoStackCalls <=
- RepeatedSequenceLocs.size() * Costs.CallDefault) {
+ // If there are no places where we have to save LR, then note that we don't
+ // have to update the stack. Otherwise, give every candidate the default
+ // call type
+ if (NumBytesNoStackCalls <=
+ RepeatedSequenceLocs.size() * Costs.CallDefault) {
RepeatedSequenceLocs = CandidatesWithoutStackFixups;
- FrameID = MachineOutlinerNoLRSave;
+ FrameID = MachineOutlinerNoLRSave;
} else
- SetCandidateCallInfo(MachineOutlinerDefault, Costs.CallDefault);
- }
-
- // Does every candidate's MBB contain a call? If so, then we might have a
- // call in the range.
- if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
- // check if the range contains a call. These require a save + restore of
- // the link register.
- if (std::any_of(FirstCand.front(), FirstCand.back(),
- [](const MachineInstr &MI) { return MI.isCall(); }))
- NumBytesToCreateFrame += Costs.SaveRestoreLROnStack;
-
- // Handle the last instruction separately. If it is tail call, then the
- // last instruction is a call, we don't want to save + restore in this
- // case. However, it could be possible that the last instruction is a
- // call without it being valid to tail call this sequence. We should
- // consider this as well.
- else if (FrameID != MachineOutlinerThunk &&
- FrameID != MachineOutlinerTailCall && FirstCand.back()->isCall())
- NumBytesToCreateFrame += Costs.SaveRestoreLROnStack;
- }
-
+ SetCandidateCallInfo(MachineOutlinerDefault, Costs.CallDefault);
+ }
+
+ // Does every candidate's MBB contain a call? If so, then we might have a
+ // call in the range.
+ if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
+ // check if the range contains a call. These require a save + restore of
+ // the link register.
+ if (std::any_of(FirstCand.front(), FirstCand.back(),
+ [](const MachineInstr &MI) { return MI.isCall(); }))
+ NumBytesToCreateFrame += Costs.SaveRestoreLROnStack;
+
+ // Handle the last instruction separately. If it is tail call, then the
+ // last instruction is a call, we don't want to save + restore in this
+ // case. However, it could be possible that the last instruction is a
+ // call without it being valid to tail call this sequence. We should
+ // consider this as well.
+ else if (FrameID != MachineOutlinerThunk &&
+ FrameID != MachineOutlinerTailCall && FirstCand.back()->isCall())
+ NumBytesToCreateFrame += Costs.SaveRestoreLROnStack;
+ }
+
return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
NumBytesToCreateFrame, FrameID);
}
-bool ARMBaseInstrInfo::checkAndUpdateStackOffset(MachineInstr *MI,
- int64_t Fixup,
- bool Updt) const {
- int SPIdx = MI->findRegisterUseOperandIdx(ARM::SP);
- unsigned AddrMode = (MI->getDesc().TSFlags & ARMII::AddrModeMask);
- if (SPIdx < 0)
- // No SP operand
- return true;
- else if (SPIdx != 1 && (AddrMode != ARMII::AddrModeT2_i8s4 || SPIdx != 2))
- // If SP is not the base register we can't do much
- return false;
-
- // Stack might be involved but addressing mode doesn't handle any offset.
- // Rq: AddrModeT1_[1|2|4] don't operate on SP
- if (AddrMode == ARMII::AddrMode1 // Arithmetic instructions
- || AddrMode == ARMII::AddrMode4 // Load/Store Multiple
- || AddrMode == ARMII::AddrMode6 // Neon Load/Store Multiple
- || AddrMode == ARMII::AddrModeT2_so // SP can't be used as based register
- || AddrMode == ARMII::AddrModeT2_pc // PCrel access
- || AddrMode == ARMII::AddrMode2 // Used by PRE and POST indexed LD/ST
- || AddrMode == ARMII::AddrModeT2_i7 // v8.1-M MVE
- || AddrMode == ARMII::AddrModeT2_i7s2 // v8.1-M MVE
- || AddrMode == ARMII::AddrModeT2_i7s4 // v8.1-M sys regs VLDR/VSTR
- || AddrMode == ARMII::AddrModeNone)
- return false;
-
- unsigned NumOps = MI->getDesc().getNumOperands();
- unsigned ImmIdx = NumOps - 3;
-
- const MachineOperand &Offset = MI->getOperand(ImmIdx);
- assert(Offset.isImm() && "Is not an immediate");
- int64_t OffVal = Offset.getImm();
-
- if (OffVal < 0)
- // Don't override data if the are below SP.
- return false;
-
- unsigned NumBits = 0;
- unsigned Scale = 1;
-
- switch (AddrMode) {
- case ARMII::AddrMode3:
- if (ARM_AM::getAM3Op(OffVal) == ARM_AM::sub)
- return false;
- OffVal = ARM_AM::getAM3Offset(OffVal);
- NumBits = 8;
- break;
- case ARMII::AddrMode5:
- if (ARM_AM::getAM5Op(OffVal) == ARM_AM::sub)
- return false;
- OffVal = ARM_AM::getAM5Offset(OffVal);
- NumBits = 8;
- Scale = 4;
- break;
- case ARMII::AddrMode5FP16:
- if (ARM_AM::getAM5FP16Op(OffVal) == ARM_AM::sub)
- return false;
- OffVal = ARM_AM::getAM5FP16Offset(OffVal);
- NumBits = 8;
- Scale = 2;
- break;
- case ARMII::AddrModeT2_i8:
- NumBits = 8;
- break;
- case ARMII::AddrModeT2_i8s4:
- // FIXME: Values are already scaled in this addressing mode.
- assert((Fixup & 3) == 0 && "Can't encode this offset!");
- NumBits = 10;
- break;
- case ARMII::AddrModeT2_ldrex:
- NumBits = 8;
- Scale = 4;
- break;
- case ARMII::AddrModeT2_i12:
- case ARMII::AddrMode_i12:
- NumBits = 12;
- break;
- case ARMII::AddrModeT1_s: // SP-relative LD/ST
- NumBits = 8;
- Scale = 4;
- break;
- default:
- llvm_unreachable("Unsupported addressing mode!");
- }
- // Make sure the offset is encodable for instructions that scale the
- // immediate.
- assert(((OffVal * Scale + Fixup) & (Scale - 1)) == 0 &&
- "Can't encode this offset!");
- OffVal += Fixup / Scale;
-
- unsigned Mask = (1 << NumBits) - 1;
-
- if (OffVal <= Mask) {
- if (Updt)
- MI->getOperand(ImmIdx).setImm(OffVal);
- return true;
- }
-
- return false;
-
-}
-
+bool ARMBaseInstrInfo::checkAndUpdateStackOffset(MachineInstr *MI,
+ int64_t Fixup,
+ bool Updt) const {
+ int SPIdx = MI->findRegisterUseOperandIdx(ARM::SP);
+ unsigned AddrMode = (MI->getDesc().TSFlags & ARMII::AddrModeMask);
+ if (SPIdx < 0)
+ // No SP operand
+ return true;
+ else if (SPIdx != 1 && (AddrMode != ARMII::AddrModeT2_i8s4 || SPIdx != 2))
+ // If SP is not the base register we can't do much
+ return false;
+
+ // Stack might be involved but addressing mode doesn't handle any offset.
+ // Rq: AddrModeT1_[1|2|4] don't operate on SP
+ if (AddrMode == ARMII::AddrMode1 // Arithmetic instructions
+ || AddrMode == ARMII::AddrMode4 // Load/Store Multiple
+ || AddrMode == ARMII::AddrMode6 // Neon Load/Store Multiple
+ || AddrMode == ARMII::AddrModeT2_so // SP can't be used as based register
+ || AddrMode == ARMII::AddrModeT2_pc // PCrel access
+ || AddrMode == ARMII::AddrMode2 // Used by PRE and POST indexed LD/ST
+ || AddrMode == ARMII::AddrModeT2_i7 // v8.1-M MVE
+ || AddrMode == ARMII::AddrModeT2_i7s2 // v8.1-M MVE
+ || AddrMode == ARMII::AddrModeT2_i7s4 // v8.1-M sys regs VLDR/VSTR
+ || AddrMode == ARMII::AddrModeNone)
+ return false;
+
+ unsigned NumOps = MI->getDesc().getNumOperands();
+ unsigned ImmIdx = NumOps - 3;
+
+ const MachineOperand &Offset = MI->getOperand(ImmIdx);
+ assert(Offset.isImm() && "Is not an immediate");
+ int64_t OffVal = Offset.getImm();
+
+ if (OffVal < 0)
+ // Don't override data if the are below SP.
+ return false;
+
+ unsigned NumBits = 0;
+ unsigned Scale = 1;
+
+ switch (AddrMode) {
+ case ARMII::AddrMode3:
+ if (ARM_AM::getAM3Op(OffVal) == ARM_AM::sub)
+ return false;
+ OffVal = ARM_AM::getAM3Offset(OffVal);
+ NumBits = 8;
+ break;
+ case ARMII::AddrMode5:
+ if (ARM_AM::getAM5Op(OffVal) == ARM_AM::sub)
+ return false;
+ OffVal = ARM_AM::getAM5Offset(OffVal);
+ NumBits = 8;
+ Scale = 4;
+ break;
+ case ARMII::AddrMode5FP16:
+ if (ARM_AM::getAM5FP16Op(OffVal) == ARM_AM::sub)
+ return false;
+ OffVal = ARM_AM::getAM5FP16Offset(OffVal);
+ NumBits = 8;
+ Scale = 2;
+ break;
+ case ARMII::AddrModeT2_i8:
+ NumBits = 8;
+ break;
+ case ARMII::AddrModeT2_i8s4:
+ // FIXME: Values are already scaled in this addressing mode.
+ assert((Fixup & 3) == 0 && "Can't encode this offset!");
+ NumBits = 10;
+ break;
+ case ARMII::AddrModeT2_ldrex:
+ NumBits = 8;
+ Scale = 4;
+ break;
+ case ARMII::AddrModeT2_i12:
+ case ARMII::AddrMode_i12:
+ NumBits = 12;
+ break;
+ case ARMII::AddrModeT1_s: // SP-relative LD/ST
+ NumBits = 8;
+ Scale = 4;
+ break;
+ default:
+ llvm_unreachable("Unsupported addressing mode!");
+ }
+ // Make sure the offset is encodable for instructions that scale the
+ // immediate.
+ assert(((OffVal * Scale + Fixup) & (Scale - 1)) == 0 &&
+ "Can't encode this offset!");
+ OffVal += Fixup / Scale;
+
+ unsigned Mask = (1 << NumBits) - 1;
+
+ if (OffVal <= Mask) {
+ if (Updt)
+ MI->getOperand(ImmIdx).setImm(OffVal);
+ return true;
+ }
+
+ return false;
+
+}
+
bool ARMBaseInstrInfo::isFunctionSafeToOutlineFrom(
MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
const Function &F = MF.getFunction();
@@ -6075,13 +6075,13 @@ bool ARMBaseInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
if (any_of(MBB, [](MachineInstr &MI) { return MI.isCall(); }))
Flags |= MachineOutlinerMBBFlags::HasCalls;
- // LR liveness is overestimated in return blocks.
-
- bool LRIsAvailable =
- MBB.isReturnBlock() && !MBB.back().isCall()
- ? isLRAvailable(getRegisterInfo(), MBB.rbegin(), MBB.rend())
- : LRU.available(ARM::LR);
- if (!LRIsAvailable)
+ // LR liveness is overestimated in return blocks.
+
+ bool LRIsAvailable =
+ MBB.isReturnBlock() && !MBB.back().isCall()
+ ? isLRAvailable(getRegisterInfo(), MBB.rbegin(), MBB.rend())
+ : LRU.available(ARM::LR);
+ if (!LRIsAvailable)
Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
return true;
@@ -6119,9 +6119,9 @@ ARMBaseInstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT,
// Be conservative with ARMv8.1 MVE instructions.
if (Opc == ARM::t2BF_LabelPseudo || Opc == ARM::t2DoLoopStart ||
- Opc == ARM::t2DoLoopStartTP || Opc == ARM::t2WhileLoopStart ||
- Opc == ARM::t2LoopDec || Opc == ARM::t2LoopEnd ||
- Opc == ARM::t2LoopEndDec)
+ Opc == ARM::t2DoLoopStartTP || Opc == ARM::t2WhileLoopStart ||
+ Opc == ARM::t2LoopDec || Opc == ARM::t2LoopEnd ||
+ Opc == ARM::t2LoopEndDec)
return outliner::InstrType::Illegal;
const MCInstrDesc &MCID = MI.getDesc();
@@ -6155,56 +6155,56 @@ ARMBaseInstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT,
return outliner::InstrType::Illegal;
if (MI.isCall()) {
- // Get the function associated with the call. Look at each operand and find
- // the one that represents the calle and get its name.
- const Function *Callee = nullptr;
- for (const MachineOperand &MOP : MI.operands()) {
- if (MOP.isGlobal()) {
- Callee = dyn_cast<Function>(MOP.getGlobal());
- break;
- }
- }
-
- // Dont't outline calls to "mcount" like functions, in particular Linux
- // kernel function tracing relies on it.
- if (Callee &&
- (Callee->getName() == "\01__gnu_mcount_nc" ||
- Callee->getName() == "\01mcount" || Callee->getName() == "__mcount"))
- return outliner::InstrType::Illegal;
-
+ // Get the function associated with the call. Look at each operand and find
+ // the one that represents the calle and get its name.
+ const Function *Callee = nullptr;
+ for (const MachineOperand &MOP : MI.operands()) {
+ if (MOP.isGlobal()) {
+ Callee = dyn_cast<Function>(MOP.getGlobal());
+ break;
+ }
+ }
+
+ // Dont't outline calls to "mcount" like functions, in particular Linux
+ // kernel function tracing relies on it.
+ if (Callee &&
+ (Callee->getName() == "\01__gnu_mcount_nc" ||
+ Callee->getName() == "\01mcount" || Callee->getName() == "__mcount"))
+ return outliner::InstrType::Illegal;
+
// If we don't know anything about the callee, assume it depends on the
// stack layout of the caller. In that case, it's only legal to outline
// as a tail-call. Explicitly list the call instructions we know about so
// we don't get unexpected results with call pseudo-instructions.
auto UnknownCallOutlineType = outliner::InstrType::Illegal;
if (Opc == ARM::BL || Opc == ARM::tBL || Opc == ARM::BLX ||
- Opc == ARM::BLX_noip || Opc == ARM::tBLXr || Opc == ARM::tBLXr_noip ||
- Opc == ARM::tBLXi)
+ Opc == ARM::BLX_noip || Opc == ARM::tBLXr || Opc == ARM::tBLXr_noip ||
+ Opc == ARM::tBLXi)
UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
- if (!Callee)
- return UnknownCallOutlineType;
-
- // We have a function we have information about. Check if it's something we
- // can safely outline.
- MachineFunction *MF = MI.getParent()->getParent();
- MachineFunction *CalleeMF = MF->getMMI().getMachineFunction(*Callee);
-
- // We don't know what's going on with the callee at all. Don't touch it.
- if (!CalleeMF)
- return UnknownCallOutlineType;
-
- // Check if we know anything about the callee saves on the function. If we
- // don't, then don't touch it, since that implies that we haven't computed
- // anything about its stack frame yet.
- MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
- if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
- MFI.getNumObjects() > 0)
- return UnknownCallOutlineType;
-
- // At this point, we can say that CalleeMF ought to not pass anything on the
- // stack. Therefore, we can outline it.
- return outliner::InstrType::Legal;
+ if (!Callee)
+ return UnknownCallOutlineType;
+
+ // We have a function we have information about. Check if it's something we
+ // can safely outline.
+ MachineFunction *MF = MI.getParent()->getParent();
+ MachineFunction *CalleeMF = MF->getMMI().getMachineFunction(*Callee);
+
+ // We don't know what's going on with the callee at all. Don't touch it.
+ if (!CalleeMF)
+ return UnknownCallOutlineType;
+
+ // Check if we know anything about the callee saves on the function. If we
+ // don't, then don't touch it, since that implies that we haven't computed
+ // anything about its stack frame yet.
+ MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
+ if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
+ MFI.getNumObjects() > 0)
+ return UnknownCallOutlineType;
+
+ // At this point, we can say that CalleeMF ought to not pass anything on the
+ // stack. Therefore, we can outline it.
+ return outliner::InstrType::Legal;
}
// Since calls are handled, don't touch LR or PC
@@ -6227,19 +6227,19 @@ ARMBaseInstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT,
if (!MightNeedStackFixUp)
return outliner::InstrType::Legal;
- // Any modification of SP will break our code to save/restore LR.
- // FIXME: We could handle some instructions which add a constant offset to
- // SP, with a bit more work.
- if (MI.modifiesRegister(ARM::SP, TRI))
- return outliner::InstrType::Illegal;
-
- // At this point, we have a stack instruction that we might need to fix up.
- // up. We'll handle it if it's a load or store.
- if (checkAndUpdateStackOffset(&MI, Subtarget.getStackAlignment().value(),
- false))
- return outliner::InstrType::Legal;
-
- // We can't fix it up, so don't outline it.
+ // Any modification of SP will break our code to save/restore LR.
+ // FIXME: We could handle some instructions which add a constant offset to
+ // SP, with a bit more work.
+ if (MI.modifiesRegister(ARM::SP, TRI))
+ return outliner::InstrType::Illegal;
+
+ // At this point, we have a stack instruction that we might need to fix up.
+ // up. We'll handle it if it's a load or store.
+ if (checkAndUpdateStackOffset(&MI, Subtarget.getStackAlignment().value(),
+ false))
+ return outliner::InstrType::Legal;
+
+ // We can't fix it up, so don't outline it.
return outliner::InstrType::Illegal;
}
@@ -6255,104 +6255,104 @@ ARMBaseInstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT,
return outliner::InstrType::Legal;
}
-void ARMBaseInstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
- for (MachineInstr &MI : MBB) {
- checkAndUpdateStackOffset(&MI, Subtarget.getStackAlignment().value(), true);
- }
-}
-
-void ARMBaseInstrInfo::saveLROnStack(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator It) const {
- unsigned Opc = Subtarget.isThumb() ? ARM::t2STR_PRE : ARM::STR_PRE_IMM;
- int Align = -Subtarget.getStackAlignment().value();
- BuildMI(MBB, It, DebugLoc(), get(Opc), ARM::SP)
- .addReg(ARM::LR, RegState::Kill)
- .addReg(ARM::SP)
- .addImm(Align)
- .add(predOps(ARMCC::AL));
-}
-
-void ARMBaseInstrInfo::emitCFIForLRSaveOnStack(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator It) const {
- MachineFunction &MF = *MBB.getParent();
- const MCRegisterInfo *MRI = Subtarget.getRegisterInfo();
- unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true);
- int Align = Subtarget.getStackAlignment().value();
- // Add a CFI saying the stack was moved down.
- int64_t StackPosEntry =
- MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, Align));
- BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
- .addCFIIndex(StackPosEntry)
- .setMIFlags(MachineInstr::FrameSetup);
-
- // Add a CFI saying that the LR that we want to find is now higher than
- // before.
- int64_t LRPosEntry =
- MF.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfLR, -Align));
- BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
- .addCFIIndex(LRPosEntry)
- .setMIFlags(MachineInstr::FrameSetup);
-}
-
-void ARMBaseInstrInfo::emitCFIForLRSaveToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator It,
- Register Reg) const {
- MachineFunction &MF = *MBB.getParent();
- const MCRegisterInfo *MRI = Subtarget.getRegisterInfo();
- unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true);
- unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
-
- int64_t LRPosEntry = MF.addFrameInst(
- MCCFIInstruction::createRegister(nullptr, DwarfLR, DwarfReg));
- BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
- .addCFIIndex(LRPosEntry)
- .setMIFlags(MachineInstr::FrameSetup);
-}
-
-void ARMBaseInstrInfo::restoreLRFromStack(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator It) const {
- unsigned Opc = Subtarget.isThumb() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
- MachineInstrBuilder MIB = BuildMI(MBB, It, DebugLoc(), get(Opc), ARM::LR)
- .addReg(ARM::SP, RegState::Define)
- .addReg(ARM::SP);
- if (!Subtarget.isThumb())
- MIB.addReg(0);
- MIB.addImm(Subtarget.getStackAlignment().value()).add(predOps(ARMCC::AL));
-}
-
-void ARMBaseInstrInfo::emitCFIForLRRestoreFromStack(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator It) const {
- // Now stack has moved back up...
- MachineFunction &MF = *MBB.getParent();
- const MCRegisterInfo *MRI = Subtarget.getRegisterInfo();
- unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true);
- int64_t StackPosEntry =
- MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 0));
- BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
- .addCFIIndex(StackPosEntry)
- .setMIFlags(MachineInstr::FrameDestroy);
-
- // ... and we have restored LR.
- int64_t LRPosEntry =
- MF.addFrameInst(MCCFIInstruction::createRestore(nullptr, DwarfLR));
- BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
- .addCFIIndex(LRPosEntry)
- .setMIFlags(MachineInstr::FrameDestroy);
-}
-
-void ARMBaseInstrInfo::emitCFIForLRRestoreFromReg(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator It) const {
- MachineFunction &MF = *MBB.getParent();
- const MCRegisterInfo *MRI = Subtarget.getRegisterInfo();
- unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true);
-
- int64_t LRPosEntry =
- MF.addFrameInst(MCCFIInstruction::createRestore(nullptr, DwarfLR));
- BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
- .addCFIIndex(LRPosEntry)
- .setMIFlags(MachineInstr::FrameDestroy);
-}
-
+void ARMBaseInstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
+ for (MachineInstr &MI : MBB) {
+ checkAndUpdateStackOffset(&MI, Subtarget.getStackAlignment().value(), true);
+ }
+}
+
+void ARMBaseInstrInfo::saveLROnStack(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator It) const {
+ unsigned Opc = Subtarget.isThumb() ? ARM::t2STR_PRE : ARM::STR_PRE_IMM;
+ int Align = -Subtarget.getStackAlignment().value();
+ BuildMI(MBB, It, DebugLoc(), get(Opc), ARM::SP)
+ .addReg(ARM::LR, RegState::Kill)
+ .addReg(ARM::SP)
+ .addImm(Align)
+ .add(predOps(ARMCC::AL));
+}
+
+void ARMBaseInstrInfo::emitCFIForLRSaveOnStack(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator It) const {
+ MachineFunction &MF = *MBB.getParent();
+ const MCRegisterInfo *MRI = Subtarget.getRegisterInfo();
+ unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true);
+ int Align = Subtarget.getStackAlignment().value();
+ // Add a CFI saying the stack was moved down.
+ int64_t StackPosEntry =
+ MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, Align));
+ BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
+ .addCFIIndex(StackPosEntry)
+ .setMIFlags(MachineInstr::FrameSetup);
+
+ // Add a CFI saying that the LR that we want to find is now higher than
+ // before.
+ int64_t LRPosEntry =
+ MF.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfLR, -Align));
+ BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
+ .addCFIIndex(LRPosEntry)
+ .setMIFlags(MachineInstr::FrameSetup);
+}
+
+void ARMBaseInstrInfo::emitCFIForLRSaveToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator It,
+ Register Reg) const {
+ MachineFunction &MF = *MBB.getParent();
+ const MCRegisterInfo *MRI = Subtarget.getRegisterInfo();
+ unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true);
+ unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
+
+ int64_t LRPosEntry = MF.addFrameInst(
+ MCCFIInstruction::createRegister(nullptr, DwarfLR, DwarfReg));
+ BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
+ .addCFIIndex(LRPosEntry)
+ .setMIFlags(MachineInstr::FrameSetup);
+}
+
+void ARMBaseInstrInfo::restoreLRFromStack(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator It) const {
+ unsigned Opc = Subtarget.isThumb() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
+ MachineInstrBuilder MIB = BuildMI(MBB, It, DebugLoc(), get(Opc), ARM::LR)
+ .addReg(ARM::SP, RegState::Define)
+ .addReg(ARM::SP);
+ if (!Subtarget.isThumb())
+ MIB.addReg(0);
+ MIB.addImm(Subtarget.getStackAlignment().value()).add(predOps(ARMCC::AL));
+}
+
+void ARMBaseInstrInfo::emitCFIForLRRestoreFromStack(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator It) const {
+ // Now stack has moved back up...
+ MachineFunction &MF = *MBB.getParent();
+ const MCRegisterInfo *MRI = Subtarget.getRegisterInfo();
+ unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true);
+ int64_t StackPosEntry =
+ MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 0));
+ BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
+ .addCFIIndex(StackPosEntry)
+ .setMIFlags(MachineInstr::FrameDestroy);
+
+ // ... and we have restored LR.
+ int64_t LRPosEntry =
+ MF.addFrameInst(MCCFIInstruction::createRestore(nullptr, DwarfLR));
+ BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
+ .addCFIIndex(LRPosEntry)
+ .setMIFlags(MachineInstr::FrameDestroy);
+}
+
+void ARMBaseInstrInfo::emitCFIForLRRestoreFromReg(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator It) const {
+ MachineFunction &MF = *MBB.getParent();
+ const MCRegisterInfo *MRI = Subtarget.getRegisterInfo();
+ unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true);
+
+ int64_t LRPosEntry =
+ MF.addFrameInst(MCCFIInstruction::createRestore(nullptr, DwarfLR));
+ BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
+ .addCFIIndex(LRPosEntry)
+ .setMIFlags(MachineInstr::FrameDestroy);
+}
+
void ARMBaseInstrInfo::buildOutlinedFrame(
MachineBasicBlock &MBB, MachineFunction &MF,
const outliner::OutlinedFunction &OF) const {
@@ -6374,57 +6374,57 @@ void ARMBaseInstrInfo::buildOutlinedFrame(
Call->eraseFromParent();
}
- // Is there a call in the outlined range?
- auto IsNonTailCall = [](MachineInstr &MI) {
- return MI.isCall() && !MI.isReturn();
- };
- if (llvm::any_of(MBB.instrs(), IsNonTailCall)) {
- MachineBasicBlock::iterator It = MBB.begin();
- MachineBasicBlock::iterator Et = MBB.end();
-
- if (OF.FrameConstructionID == MachineOutlinerTailCall ||
- OF.FrameConstructionID == MachineOutlinerThunk)
- Et = std::prev(MBB.end());
-
- // We have to save and restore LR, we need to add it to the liveins if it
- // is not already part of the set. This is suffient since outlined
- // functions only have one block.
- if (!MBB.isLiveIn(ARM::LR))
- MBB.addLiveIn(ARM::LR);
-
- // Insert a save before the outlined region
- saveLROnStack(MBB, It);
- emitCFIForLRSaveOnStack(MBB, It);
-
- // Fix up the instructions in the range, since we're going to modify the
- // stack.
- assert(OF.FrameConstructionID != MachineOutlinerDefault &&
- "Can only fix up stack references once");
- fixupPostOutline(MBB);
-
- // Insert a restore before the terminator for the function. Restore LR.
- restoreLRFromStack(MBB, Et);
- emitCFIForLRRestoreFromStack(MBB, Et);
- }
-
- // If this is a tail call outlined function, then there's already a return.
- if (OF.FrameConstructionID == MachineOutlinerTailCall ||
- OF.FrameConstructionID == MachineOutlinerThunk)
- return;
-
+ // Is there a call in the outlined range?
+ auto IsNonTailCall = [](MachineInstr &MI) {
+ return MI.isCall() && !MI.isReturn();
+ };
+ if (llvm::any_of(MBB.instrs(), IsNonTailCall)) {
+ MachineBasicBlock::iterator It = MBB.begin();
+ MachineBasicBlock::iterator Et = MBB.end();
+
+ if (OF.FrameConstructionID == MachineOutlinerTailCall ||
+ OF.FrameConstructionID == MachineOutlinerThunk)
+ Et = std::prev(MBB.end());
+
+ // We have to save and restore LR, we need to add it to the liveins if it
+ // is not already part of the set. This is suffient since outlined
+ // functions only have one block.
+ if (!MBB.isLiveIn(ARM::LR))
+ MBB.addLiveIn(ARM::LR);
+
+ // Insert a save before the outlined region
+ saveLROnStack(MBB, It);
+ emitCFIForLRSaveOnStack(MBB, It);
+
+ // Fix up the instructions in the range, since we're going to modify the
+ // stack.
+ assert(OF.FrameConstructionID != MachineOutlinerDefault &&
+ "Can only fix up stack references once");
+ fixupPostOutline(MBB);
+
+ // Insert a restore before the terminator for the function. Restore LR.
+ restoreLRFromStack(MBB, Et);
+ emitCFIForLRRestoreFromStack(MBB, Et);
+ }
+
+ // If this is a tail call outlined function, then there's already a return.
+ if (OF.FrameConstructionID == MachineOutlinerTailCall ||
+ OF.FrameConstructionID == MachineOutlinerThunk)
+ return;
+
// Here we have to insert the return ourselves. Get the correct opcode from
// current feature set.
BuildMI(MBB, MBB.end(), DebugLoc(), get(Subtarget.getReturnOpcode()))
.add(predOps(ARMCC::AL));
-
- // Did we have to modify the stack by saving the link register?
- if (OF.FrameConstructionID != MachineOutlinerDefault &&
- OF.Candidates[0].CallConstructionID != MachineOutlinerDefault)
- return;
-
- // We modified the stack.
- // Walk over the basic block and fix up all the stack accesses.
- fixupPostOutline(MBB);
+
+ // Did we have to modify the stack by saving the link register?
+ if (OF.FrameConstructionID != MachineOutlinerDefault &&
+ OF.Candidates[0].CallConstructionID != MachineOutlinerDefault)
+ return;
+
+ // We modified the stack.
+ // Walk over the basic block and fix up all the stack accesses.
+ fixupPostOutline(MBB);
}
MachineBasicBlock::iterator ARMBaseInstrInfo::insertOutlinedCall(
@@ -6456,14 +6456,14 @@ MachineBasicBlock::iterator ARMBaseInstrInfo::insertOutlinedCall(
CallMIB.add(predOps(ARMCC::AL));
CallMIB.addGlobalAddress(M.getNamedValue(MF.getName()));
- if (C.CallConstructionID == MachineOutlinerNoLRSave ||
- C.CallConstructionID == MachineOutlinerThunk) {
- // No, so just insert the call.
- It = MBB.insert(It, CallMIB);
- return It;
- }
-
- const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>();
+ if (C.CallConstructionID == MachineOutlinerNoLRSave ||
+ C.CallConstructionID == MachineOutlinerThunk) {
+ // No, so just insert the call.
+ It = MBB.insert(It, CallMIB);
+ return It;
+ }
+
+ const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>();
// Can we save to a register?
if (C.CallConstructionID == MachineOutlinerRegSave) {
unsigned Reg = findRegisterToSaveLRTo(C);
@@ -6471,55 +6471,55 @@ MachineBasicBlock::iterator ARMBaseInstrInfo::insertOutlinedCall(
// Save and restore LR from that register.
copyPhysReg(MBB, It, DebugLoc(), Reg, ARM::LR, true);
- if (!AFI.isLRSpilled())
- emitCFIForLRSaveToReg(MBB, It, Reg);
+ if (!AFI.isLRSpilled())
+ emitCFIForLRSaveToReg(MBB, It, Reg);
CallPt = MBB.insert(It, CallMIB);
copyPhysReg(MBB, It, DebugLoc(), ARM::LR, Reg, true);
- if (!AFI.isLRSpilled())
- emitCFIForLRRestoreFromReg(MBB, It);
+ if (!AFI.isLRSpilled())
+ emitCFIForLRRestoreFromReg(MBB, It);
It--;
return CallPt;
}
- // We have the default case. Save and restore from SP.
- if (!MBB.isLiveIn(ARM::LR))
- MBB.addLiveIn(ARM::LR);
- saveLROnStack(MBB, It);
- if (!AFI.isLRSpilled())
- emitCFIForLRSaveOnStack(MBB, It);
- CallPt = MBB.insert(It, CallMIB);
- restoreLRFromStack(MBB, It);
- if (!AFI.isLRSpilled())
- emitCFIForLRRestoreFromStack(MBB, It);
- It--;
- return CallPt;
-}
-
-bool ARMBaseInstrInfo::shouldOutlineFromFunctionByDefault(
- MachineFunction &MF) const {
- return Subtarget.isMClass() && MF.getFunction().hasMinSize();
-}
-
-bool ARMBaseInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
- AAResults *AA) const {
- // Try hard to rematerialize any VCTPs because if we spill P0, it will block
- // the tail predication conversion. This means that the element count
- // register has to be live for longer, but that has to be better than
- // spill/restore and VPT predication.
- return isVCTP(&MI) && !isPredicated(MI);
-}
-
-unsigned llvm::getBLXOpcode(const MachineFunction &MF) {
- return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::BLX_noip
- : ARM::BLX;
-}
-
-unsigned llvm::gettBLXrOpcode(const MachineFunction &MF) {
- return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::tBLXr_noip
- : ARM::tBLXr;
-}
-
-unsigned llvm::getBLXpredOpcode(const MachineFunction &MF) {
- return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::BLX_pred_noip
- : ARM::BLX_pred;
-}
-
+ // We have the default case. Save and restore from SP.
+ if (!MBB.isLiveIn(ARM::LR))
+ MBB.addLiveIn(ARM::LR);
+ saveLROnStack(MBB, It);
+ if (!AFI.isLRSpilled())
+ emitCFIForLRSaveOnStack(MBB, It);
+ CallPt = MBB.insert(It, CallMIB);
+ restoreLRFromStack(MBB, It);
+ if (!AFI.isLRSpilled())
+ emitCFIForLRRestoreFromStack(MBB, It);
+ It--;
+ return CallPt;
+}
+
+bool ARMBaseInstrInfo::shouldOutlineFromFunctionByDefault(
+ MachineFunction &MF) const {
+ return Subtarget.isMClass() && MF.getFunction().hasMinSize();
+}
+
+bool ARMBaseInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
+ AAResults *AA) const {
+ // Try hard to rematerialize any VCTPs because if we spill P0, it will block
+ // the tail predication conversion. This means that the element count
+ // register has to be live for longer, but that has to be better than
+ // spill/restore and VPT predication.
+ return isVCTP(&MI) && !isPredicated(MI);
+}
+
+unsigned llvm::getBLXOpcode(const MachineFunction &MF) {
+ return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::BLX_noip
+ : ARM::BLX;
+}
+
+unsigned llvm::gettBLXrOpcode(const MachineFunction &MF) {
+ return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::tBLXr_noip
+ : ARM::tBLXr;
+}
+
+unsigned llvm::getBLXpredOpcode(const MachineFunction &MF) {
+ return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::BLX_pred_noip
+ : ARM::BLX_pred;
+}
+
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMBaseInstrInfo.h b/contrib/libs/llvm12/lib/Target/ARM/ARMBaseInstrInfo.h
index 1b843c4281..e61d557c1d 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -132,10 +132,10 @@ public:
const ScheduleDAG *DAG) const override;
ScheduleHazardRecognizer *
- CreateTargetMIHazardRecognizer(const InstrItineraryData *II,
- const ScheduleDAGMI *DAG) const override;
-
- ScheduleHazardRecognizer *
+ CreateTargetMIHazardRecognizer(const InstrItineraryData *II,
+ const ScheduleDAGMI *DAG) const override;
+
+ ScheduleHazardRecognizer *
CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
const ScheduleDAG *DAG) const override;
@@ -175,8 +175,8 @@ public:
bool SubsumesPredicate(ArrayRef<MachineOperand> Pred1,
ArrayRef<MachineOperand> Pred2) const override;
- bool ClobbersPredicate(MachineInstr &MI, std::vector<MachineOperand> &Pred,
- bool SkipDead) const override;
+ bool ClobbersPredicate(MachineInstr &MI, std::vector<MachineOperand> &Pred,
+ bool SkipDead) const override;
bool isPredicable(const MachineInstr &MI) const override;
@@ -361,60 +361,60 @@ public:
MachineBasicBlock::iterator &It, MachineFunction &MF,
const outliner::Candidate &C) const override;
- /// Enable outlining by default at -Oz.
- bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override;
-
- bool isUnspillableTerminatorImpl(const MachineInstr *MI) const override {
- return MI->getOpcode() == ARM::t2LoopEndDec ||
- MI->getOpcode() == ARM::t2DoLoopStartTP;
- }
-
+ /// Enable outlining by default at -Oz.
+ bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override;
+
+ bool isUnspillableTerminatorImpl(const MachineInstr *MI) const override {
+ return MI->getOpcode() == ARM::t2LoopEndDec ||
+ MI->getOpcode() == ARM::t2DoLoopStartTP;
+ }
+
private:
/// Returns an unused general-purpose register which can be used for
/// constructing an outlined call if one exists. Returns 0 otherwise.
unsigned findRegisterToSaveLRTo(const outliner::Candidate &C) const;
- // Adds an instruction which saves the link register on top of the stack into
- /// the MachineBasicBlock \p MBB at position \p It.
- void saveLROnStack(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator It) const;
-
- /// Adds an instruction which restores the link register from the top the
- /// stack into the MachineBasicBlock \p MBB at position \p It.
- void restoreLRFromStack(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator It) const;
-
- /// Emit CFI instructions into the MachineBasicBlock \p MBB at position \p It,
- /// for the case when the LR is saved on the stack.
- void emitCFIForLRSaveOnStack(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator It) const;
-
- /// Emit CFI instructions into the MachineBasicBlock \p MBB at position \p It,
- /// for the case when the LR is saved in the register \p Reg.
- void emitCFIForLRSaveToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator It,
- Register Reg) const;
-
- /// Emit CFI instructions into the MachineBasicBlock \p MBB at position \p It,
- /// after the LR is was restored from the stack.
- void emitCFIForLRRestoreFromStack(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator It) const;
-
- /// Emit CFI instructions into the MachineBasicBlock \p MBB at position \p It,
- /// after the LR is was restored from a register.
- void emitCFIForLRRestoreFromReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator It) const;
- /// \brief Sets the offsets on outlined instructions in \p MBB which use SP
- /// so that they will be valid post-outlining.
- ///
- /// \param MBB A \p MachineBasicBlock in an outlined function.
- void fixupPostOutline(MachineBasicBlock &MBB) const;
-
- /// Returns true if the machine instruction offset can handle the stack fixup
- /// and updates it if requested.
- bool checkAndUpdateStackOffset(MachineInstr *MI, int64_t Fixup,
- bool Updt) const;
-
+ // Adds an instruction which saves the link register on top of the stack into
+ /// the MachineBasicBlock \p MBB at position \p It.
+ void saveLROnStack(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator It) const;
+
+ /// Adds an instruction which restores the link register from the top the
+ /// stack into the MachineBasicBlock \p MBB at position \p It.
+ void restoreLRFromStack(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator It) const;
+
+ /// Emit CFI instructions into the MachineBasicBlock \p MBB at position \p It,
+ /// for the case when the LR is saved on the stack.
+ void emitCFIForLRSaveOnStack(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator It) const;
+
+ /// Emit CFI instructions into the MachineBasicBlock \p MBB at position \p It,
+ /// for the case when the LR is saved in the register \p Reg.
+ void emitCFIForLRSaveToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator It,
+ Register Reg) const;
+
+ /// Emit CFI instructions into the MachineBasicBlock \p MBB at position \p It,
+ /// after the LR is was restored from the stack.
+ void emitCFIForLRRestoreFromStack(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator It) const;
+
+ /// Emit CFI instructions into the MachineBasicBlock \p MBB at position \p It,
+ /// after the LR is was restored from a register.
+ void emitCFIForLRRestoreFromReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator It) const;
+ /// \brief Sets the offsets on outlined instructions in \p MBB which use SP
+ /// so that they will be valid post-outlining.
+ ///
+ /// \param MBB A \p MachineBasicBlock in an outlined function.
+ void fixupPostOutline(MachineBasicBlock &MBB) const;
+
+ /// Returns true if the machine instruction offset can handle the stack fixup
+ /// and updates it if requested.
+ bool checkAndUpdateStackOffset(MachineInstr *MI, int64_t Fixup,
+ bool Updt) const;
+
unsigned getInstBundleLength(const MachineInstr &MI) const;
int getVLDMDefCycle(const InstrItineraryData *ItinData,
@@ -477,9 +477,9 @@ private:
MachineInstr *canFoldIntoMOVCC(Register Reg, const MachineRegisterInfo &MRI,
const TargetInstrInfo *TII) const;
- bool isReallyTriviallyReMaterializable(const MachineInstr &MI,
- AAResults *AA) const override;
-
+ bool isReallyTriviallyReMaterializable(const MachineInstr &MI,
+ AAResults *AA) const override;
+
private:
/// Modeling special VFP / NEON fp MLA / MLS hazards.
@@ -644,77 +644,77 @@ static inline bool isJumpTableBranchOpcode(int Opc) {
Opc == ARM::t2BR_JT;
}
-static inline bool isLowOverheadTerminatorOpcode(int Opc) {
- return Opc == ARM::t2DoLoopStartTP || Opc == ARM::t2WhileLoopStart ||
- Opc == ARM::t2LoopEnd || Opc == ARM::t2LoopEndDec;
-}
-
+static inline bool isLowOverheadTerminatorOpcode(int Opc) {
+ return Opc == ARM::t2DoLoopStartTP || Opc == ARM::t2WhileLoopStart ||
+ Opc == ARM::t2LoopEnd || Opc == ARM::t2LoopEndDec;
+}
+
static inline
bool isIndirectBranchOpcode(int Opc) {
return Opc == ARM::BX || Opc == ARM::MOVPCRX || Opc == ARM::tBRIND;
}
-static inline bool isIndirectCall(const MachineInstr &MI) {
- int Opc = MI.getOpcode();
- switch (Opc) {
- // indirect calls:
- case ARM::BLX:
- case ARM::BLX_noip:
- case ARM::BLX_pred:
- case ARM::BLX_pred_noip:
- case ARM::BX_CALL:
- case ARM::BMOVPCRX_CALL:
- case ARM::TCRETURNri:
- case ARM::TAILJMPr:
- case ARM::TAILJMPr4:
- case ARM::tBLXr:
- case ARM::tBLXr_noip:
- case ARM::tBLXNSr:
- case ARM::tBLXNS_CALL:
- case ARM::tBX_CALL:
- case ARM::tTAILJMPr:
- assert(MI.isCall(MachineInstr::IgnoreBundle));
- return true;
- // direct calls:
- case ARM::BL:
- case ARM::BL_pred:
- case ARM::BMOVPCB_CALL:
- case ARM::BL_PUSHLR:
- case ARM::BLXi:
- case ARM::TCRETURNdi:
- case ARM::TAILJMPd:
- case ARM::SVC:
- case ARM::HVC:
- case ARM::TPsoft:
- case ARM::tTAILJMPd:
- case ARM::t2SMC:
- case ARM::t2HVC:
- case ARM::tBL:
- case ARM::tBLXi:
- case ARM::tBL_PUSHLR:
- case ARM::tTAILJMPdND:
- case ARM::tSVC:
- case ARM::tTPsoft:
- assert(MI.isCall(MachineInstr::IgnoreBundle));
- return false;
- }
- assert(!MI.isCall(MachineInstr::IgnoreBundle));
- return false;
-}
-
-static inline bool isIndirectControlFlowNotComingBack(const MachineInstr &MI) {
- int opc = MI.getOpcode();
- return MI.isReturn() || isIndirectBranchOpcode(MI.getOpcode()) ||
- isJumpTableBranchOpcode(opc);
-}
-
-static inline bool isSpeculationBarrierEndBBOpcode(int Opc) {
- return Opc == ARM::SpeculationBarrierISBDSBEndBB ||
- Opc == ARM::SpeculationBarrierSBEndBB ||
- Opc == ARM::t2SpeculationBarrierISBDSBEndBB ||
- Opc == ARM::t2SpeculationBarrierSBEndBB;
-}
-
+static inline bool isIndirectCall(const MachineInstr &MI) {
+ int Opc = MI.getOpcode();
+ switch (Opc) {
+ // indirect calls:
+ case ARM::BLX:
+ case ARM::BLX_noip:
+ case ARM::BLX_pred:
+ case ARM::BLX_pred_noip:
+ case ARM::BX_CALL:
+ case ARM::BMOVPCRX_CALL:
+ case ARM::TCRETURNri:
+ case ARM::TAILJMPr:
+ case ARM::TAILJMPr4:
+ case ARM::tBLXr:
+ case ARM::tBLXr_noip:
+ case ARM::tBLXNSr:
+ case ARM::tBLXNS_CALL:
+ case ARM::tBX_CALL:
+ case ARM::tTAILJMPr:
+ assert(MI.isCall(MachineInstr::IgnoreBundle));
+ return true;
+ // direct calls:
+ case ARM::BL:
+ case ARM::BL_pred:
+ case ARM::BMOVPCB_CALL:
+ case ARM::BL_PUSHLR:
+ case ARM::BLXi:
+ case ARM::TCRETURNdi:
+ case ARM::TAILJMPd:
+ case ARM::SVC:
+ case ARM::HVC:
+ case ARM::TPsoft:
+ case ARM::tTAILJMPd:
+ case ARM::t2SMC:
+ case ARM::t2HVC:
+ case ARM::tBL:
+ case ARM::tBLXi:
+ case ARM::tBL_PUSHLR:
+ case ARM::tTAILJMPdND:
+ case ARM::tSVC:
+ case ARM::tTPsoft:
+ assert(MI.isCall(MachineInstr::IgnoreBundle));
+ return false;
+ }
+ assert(!MI.isCall(MachineInstr::IgnoreBundle));
+ return false;
+}
+
+static inline bool isIndirectControlFlowNotComingBack(const MachineInstr &MI) {
+ int opc = MI.getOpcode();
+ return MI.isReturn() || isIndirectBranchOpcode(MI.getOpcode()) ||
+ isJumpTableBranchOpcode(opc);
+}
+
+static inline bool isSpeculationBarrierEndBBOpcode(int Opc) {
+ return Opc == ARM::SpeculationBarrierISBDSBEndBB ||
+ Opc == ARM::SpeculationBarrierSBEndBB ||
+ Opc == ARM::t2SpeculationBarrierISBDSBEndBB ||
+ Opc == ARM::t2SpeculationBarrierSBEndBB;
+}
+
static inline bool isPopOpcode(int Opc) {
return Opc == ARM::tPOP_RET || Opc == ARM::LDMIA_RET ||
Opc == ARM::t2LDMIA_RET || Opc == ARM::tPOP || Opc == ARM::LDMIA_UPD ||
@@ -886,17 +886,17 @@ inline bool isLegalAddressImm(unsigned Opcode, int Imm,
return std::abs(Imm) < (((1 << 7) * 2) - 1) && Imm % 2 == 0;
case ARMII::AddrModeT2_i7s4:
return std::abs(Imm) < (((1 << 7) * 4) - 1) && Imm % 4 == 0;
- case ARMII::AddrModeT2_i8:
- return std::abs(Imm) < (((1 << 8) * 1) - 1);
- case ARMII::AddrModeT2_i12:
- return Imm >= 0 && Imm < (((1 << 12) * 1) - 1);
+ case ARMII::AddrModeT2_i8:
+ return std::abs(Imm) < (((1 << 8) * 1) - 1);
+ case ARMII::AddrModeT2_i12:
+ return Imm >= 0 && Imm < (((1 << 12) * 1) - 1);
default:
llvm_unreachable("Unhandled Addressing mode");
}
}
-// Return true if the given intrinsic is a gather
-inline bool isGather(IntrinsicInst *IntInst) {
+// Return true if the given intrinsic is a gather
+inline bool isGather(IntrinsicInst *IntInst) {
if (IntInst == nullptr)
return false;
unsigned IntrinsicID = IntInst->getIntrinsicID();
@@ -906,15 +906,15 @@ inline bool isGather(IntrinsicInst *IntInst) {
IntrinsicID == Intrinsic::arm_mve_vldr_gather_base_wb ||
IntrinsicID == Intrinsic::arm_mve_vldr_gather_base_wb_predicated ||
IntrinsicID == Intrinsic::arm_mve_vldr_gather_offset ||
- IntrinsicID == Intrinsic::arm_mve_vldr_gather_offset_predicated);
-}
-
-// Return true if the given intrinsic is a scatter
-inline bool isScatter(IntrinsicInst *IntInst) {
- if (IntInst == nullptr)
- return false;
- unsigned IntrinsicID = IntInst->getIntrinsicID();
- return (IntrinsicID == Intrinsic::masked_scatter ||
+ IntrinsicID == Intrinsic::arm_mve_vldr_gather_offset_predicated);
+}
+
+// Return true if the given intrinsic is a scatter
+inline bool isScatter(IntrinsicInst *IntInst) {
+ if (IntInst == nullptr)
+ return false;
+ unsigned IntrinsicID = IntInst->getIntrinsicID();
+ return (IntrinsicID == Intrinsic::masked_scatter ||
IntrinsicID == Intrinsic::arm_mve_vstr_scatter_base ||
IntrinsicID == Intrinsic::arm_mve_vstr_scatter_base_predicated ||
IntrinsicID == Intrinsic::arm_mve_vstr_scatter_base_wb ||
@@ -923,17 +923,17 @@ inline bool isScatter(IntrinsicInst *IntInst) {
IntrinsicID == Intrinsic::arm_mve_vstr_scatter_offset_predicated);
}
-// Return true if the given intrinsic is a gather or scatter
-inline bool isGatherScatter(IntrinsicInst *IntInst) {
- if (IntInst == nullptr)
- return false;
- return isGather(IntInst) || isScatter(IntInst);
-}
-
-unsigned getBLXOpcode(const MachineFunction &MF);
-unsigned gettBLXrOpcode(const MachineFunction &MF);
-unsigned getBLXpredOpcode(const MachineFunction &MF);
-
+// Return true if the given intrinsic is a gather or scatter
+inline bool isGatherScatter(IntrinsicInst *IntInst) {
+ if (IntInst == nullptr)
+ return false;
+ return isGather(IntInst) || isScatter(IntInst);
+}
+
+unsigned getBLXOpcode(const MachineFunction &MF);
+unsigned gettBLXrOpcode(const MachineFunction &MF);
+unsigned getBLXpredOpcode(const MachineFunction &MF);
+
} // end namespace llvm
#endif // LLVM_LIB_TARGET_ARM_ARMBASEINSTRINFO_H
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/contrib/libs/llvm12/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 1a264dabee..138431e36d 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -55,9 +55,9 @@
using namespace llvm;
ARMBaseRegisterInfo::ARMBaseRegisterInfo()
- : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC) {
- ARM_MC::initLLVMToCVRegMapping(this);
-}
+ : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC) {
+ ARM_MC::initLLVMToCVRegMapping(this);
+}
static unsigned getFramePointerReg(const ARMSubtarget &STI) {
return STI.useR7AsFramePointer() ? ARM::R7 : ARM::R11;
@@ -330,13 +330,13 @@ bool ARMBaseRegisterInfo::getRegAllocationHints(
case ARMRI::RegPairOdd:
Odd = 1;
break;
- case ARMRI::RegLR:
+ case ARMRI::RegLR:
TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints, MF, VRM);
- if (MRI.getRegClass(VirtReg)->contains(ARM::LR))
- Hints.push_back(ARM::LR);
+ if (MRI.getRegClass(VirtReg)->contains(ARM::LR))
+ Hints.push_back(ARM::LR);
return false;
- default:
- return TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints, MF, VRM);
+ default:
+ return TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints, MF, VRM);
}
// This register should preferably be even (Odd == 0) or odd (Odd == 1).
@@ -640,10 +640,10 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
/// materializeFrameBaseRegister - Insert defining instruction(s) for BaseReg to
/// be a pointer to FrameIdx at the beginning of the basic block.
-Register
-ARMBaseRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
- int FrameIdx,
- int64_t Offset) const {
+Register
+ARMBaseRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
+ int FrameIdx,
+ int64_t Offset) const {
ARMFunctionInfo *AFI = MBB->getParent()->getInfo<ARMFunctionInfo>();
unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri :
(AFI->isThumb1OnlyFunction() ? ARM::tADDframe : ARM::t2ADDri);
@@ -657,7 +657,7 @@ ARMBaseRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
const MCInstrDesc &MCID = TII.get(ADDriOpc);
- Register BaseReg = MRI.createVirtualRegister(&ARM::GPRRegClass);
+ Register BaseReg = MRI.createVirtualRegister(&ARM::GPRRegClass);
MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this, MF));
MachineInstrBuilder MIB = BuildMI(*MBB, Ins, DL, MCID, BaseReg)
@@ -665,8 +665,8 @@ ARMBaseRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
if (!AFI->isThumb1OnlyFunction())
MIB.add(predOps(ARMCC::AL)).add(condCodeOp());
-
- return BaseReg;
+
+ return BaseReg;
}
void ARMBaseRegisterInfo::resolveFrameIndex(MachineInstr &MI, Register BaseReg,
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMBaseRegisterInfo.h b/contrib/libs/llvm12/lib/Target/ARM/ARMBaseRegisterInfo.h
index 5afb6c6aa0..53e8aa657c 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -32,11 +32,11 @@ class LiveIntervals;
namespace ARMRI {
enum {
- // Used for LDRD register pairs
+ // Used for LDRD register pairs
RegPairOdd = 1,
- RegPairEven = 2,
- // Used to hint for lr in t2DoLoopStart
- RegLR = 3
+ RegPairEven = 2,
+ // Used to hint for lr in t2DoLoopStart
+ RegLR = 3
};
} // end namespace ARMRI
@@ -168,8 +168,8 @@ public:
int64_t getFrameIndexInstrOffset(const MachineInstr *MI,
int Idx) const override;
bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override;
- Register materializeFrameBaseRegister(MachineBasicBlock *MBB, int FrameIdx,
- int64_t Offset) const override;
+ Register materializeFrameBaseRegister(MachineBasicBlock *MBB, int FrameIdx,
+ int64_t Offset) const override;
void resolveFrameIndex(MachineInstr &MI, Register BaseReg,
int64_t Offset) const override;
bool isFrameOffsetLegal(const MachineInstr *MI, Register BaseReg,
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMBlockPlacement.cpp b/contrib/libs/llvm12/lib/Target/ARM/ARMBlockPlacement.cpp
index 9ba16003a9..2cc6a5b4c1 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMBlockPlacement.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMBlockPlacement.cpp
@@ -1,228 +1,228 @@
-//===-- ARMBlockPlacement.cpp - ARM block placement pass ------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass re-arranges machine basic blocks to suit target requirements.
-// Currently it only moves blocks to fix backwards WLS branches.
-//
-//===----------------------------------------------------------------------===//
-
-#include "ARM.h"
-#include "ARMBaseInstrInfo.h"
-#include "ARMBasicBlockInfo.h"
-#include "ARMSubtarget.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "arm-block-placement"
-#define DEBUG_PREFIX "ARM Block Placement: "
-
-namespace llvm {
-class ARMBlockPlacement : public MachineFunctionPass {
-private:
- const ARMBaseInstrInfo *TII;
- std::unique_ptr<ARMBasicBlockUtils> BBUtils = nullptr;
- MachineLoopInfo *MLI = nullptr;
-
-public:
- static char ID;
- ARMBlockPlacement() : MachineFunctionPass(ID) {}
-
- bool runOnMachineFunction(MachineFunction &MF) override;
- void moveBasicBlock(MachineBasicBlock *BB, MachineBasicBlock *After);
- bool blockIsBefore(MachineBasicBlock *BB, MachineBasicBlock *Other);
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- AU.addRequired<MachineLoopInfo>();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-};
-
-} // namespace llvm
-
-FunctionPass *llvm::createARMBlockPlacementPass() {
- return new ARMBlockPlacement();
-}
-
-char ARMBlockPlacement::ID = 0;
-
-INITIALIZE_PASS(ARMBlockPlacement, DEBUG_TYPE, "ARM block placement", false,
- false)
-
-bool ARMBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
- if (skipFunction(MF.getFunction()))
- return false;
- const ARMSubtarget &ST = static_cast<const ARMSubtarget &>(MF.getSubtarget());
- if (!ST.hasLOB())
- return false;
- LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Running on " << MF.getName() << "\n");
- MLI = &getAnalysis<MachineLoopInfo>();
- TII = static_cast<const ARMBaseInstrInfo *>(ST.getInstrInfo());
- BBUtils = std::unique_ptr<ARMBasicBlockUtils>(new ARMBasicBlockUtils(MF));
- MF.RenumberBlocks();
- BBUtils->computeAllBlockSizes();
- BBUtils->adjustBBOffsetsAfter(&MF.front());
- bool Changed = false;
-
- // Find loops with a backwards branching WLS.
- // This requires looping over the loops in the function, checking each
- // preheader for a WLS and if its target is before the preheader. If moving
- // the target block wouldn't produce another backwards WLS or a new forwards
- // LE branch then move the target block after the preheader.
- for (auto *ML : *MLI) {
- MachineBasicBlock *Preheader = ML->getLoopPredecessor();
- if (!Preheader)
- continue;
-
- for (auto &Terminator : Preheader->terminators()) {
- if (Terminator.getOpcode() != ARM::t2WhileLoopStart)
- continue;
- MachineBasicBlock *LoopExit = Terminator.getOperand(1).getMBB();
- // We don't want to move the function's entry block.
- if (!LoopExit->getPrevNode())
- continue;
- if (blockIsBefore(Preheader, LoopExit))
- continue;
- LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Found a backwards WLS from "
- << Preheader->getFullName() << " to "
- << LoopExit->getFullName() << "\n");
-
- // Make sure that moving the target block doesn't cause any of its WLSs
- // that were previously not backwards to become backwards
- bool CanMove = true;
- for (auto &LoopExitTerminator : LoopExit->terminators()) {
- if (LoopExitTerminator.getOpcode() != ARM::t2WhileLoopStart)
- continue;
- // An example loop structure where the LoopExit can't be moved, since
- // bb1's WLS will become backwards once it's moved after bb3 bb1: -
- // LoopExit
- // WLS bb2 - LoopExit2
- // bb2:
- // ...
- // bb3: - Preheader
- // WLS bb1
- // bb4: - Header
- MachineBasicBlock *LoopExit2 =
- LoopExitTerminator.getOperand(1).getMBB();
- // If the WLS from LoopExit to LoopExit2 is already backwards then
- // moving LoopExit won't affect it, so it can be moved. If LoopExit2 is
- // after the Preheader then moving will keep it as a forward branch, so
- // it can be moved. If LoopExit2 is between the Preheader and LoopExit
- // then moving LoopExit will make it a backwards branch, so it can't be
- // moved since we'd fix one and introduce one backwards branch.
- // TODO: Analyse the blocks to make a decision if it would be worth
- // moving LoopExit even if LoopExit2 is between the Preheader and
- // LoopExit.
- if (!blockIsBefore(LoopExit2, LoopExit) &&
- (LoopExit2 == Preheader || blockIsBefore(LoopExit2, Preheader))) {
- LLVM_DEBUG(dbgs() << DEBUG_PREFIX
- << "Can't move the target block as it would "
- "introduce a new backwards WLS branch\n");
- CanMove = false;
- break;
- }
- }
-
- if (CanMove) {
- // Make sure no LEs become forwards.
- // An example loop structure where the LoopExit can't be moved, since
- // bb2's LE will become forwards once bb1 is moved after bb3.
- // bb1: - LoopExit
- // bb2:
- // LE bb1 - Terminator
- // bb3: - Preheader
- // WLS bb1
- // bb4: - Header
- for (auto It = LoopExit->getIterator(); It != Preheader->getIterator();
- It++) {
- MachineBasicBlock *MBB = &*It;
- for (auto &Terminator : MBB->terminators()) {
- if (Terminator.getOpcode() != ARM::t2LoopEndDec)
- continue;
- MachineBasicBlock *LETarget = Terminator.getOperand(2).getMBB();
- // The LE will become forwards branching if it branches to LoopExit
- // which isn't allowed by the architecture, so we should avoid
- // introducing these.
- // TODO: Analyse the blocks to make a decision if it would be worth
- // moving LoopExit even if we'd introduce a forwards LE
- if (LETarget == LoopExit) {
- LLVM_DEBUG(dbgs() << DEBUG_PREFIX
- << "Can't move the target block as it would "
- "introduce a new forwards LE branch\n");
- CanMove = false;
- break;
- }
- }
- }
-
- if (!CanMove)
- break;
- }
-
- if (CanMove) {
- moveBasicBlock(LoopExit, Preheader);
- Changed = true;
- break;
- }
- }
- }
-
- return Changed;
-}
-
-bool ARMBlockPlacement::blockIsBefore(MachineBasicBlock *BB,
- MachineBasicBlock *Other) {
- return BBUtils->getOffsetOf(Other) > BBUtils->getOffsetOf(BB);
-}
-
-void ARMBlockPlacement::moveBasicBlock(MachineBasicBlock *BB,
- MachineBasicBlock *After) {
- LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Moving " << BB->getName() << " after "
- << After->getName() << "\n");
- MachineBasicBlock *BBPrevious = BB->getPrevNode();
- assert(BBPrevious && "Cannot move the function entry basic block");
- MachineBasicBlock *AfterNext = After->getNextNode();
- MachineBasicBlock *BBNext = BB->getNextNode();
-
- BB->moveAfter(After);
-
- auto FixFallthrough = [&](MachineBasicBlock *From, MachineBasicBlock *To) {
- LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Checking for fallthrough from "
- << From->getName() << " to " << To->getName() << "\n");
- assert(From->isSuccessor(To) &&
- "'To' is expected to be a successor of 'From'");
- MachineInstr &Terminator = *(--From->terminators().end());
- if (!Terminator.isUnconditionalBranch()) {
- // The BB doesn't have an unconditional branch so it relied on
- // fall-through. Fix by adding an unconditional branch to the moved BB.
- MachineInstrBuilder MIB =
- BuildMI(From, Terminator.getDebugLoc(), TII->get(ARM::t2B));
- MIB.addMBB(To);
- MIB.addImm(ARMCC::CondCodes::AL);
- MIB.addReg(ARM::NoRegister);
- LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Adding unconditional branch from "
- << From->getName() << " to " << To->getName() << ": "
- << *MIB.getInstr());
- }
- };
-
- // Fix fall-through to the moved BB from the one that used to be before it.
- if (BBPrevious->isSuccessor(BB))
- FixFallthrough(BBPrevious, BB);
- // Fix fall through from the destination BB to the one that used to follow.
- if (AfterNext && After->isSuccessor(AfterNext))
- FixFallthrough(After, AfterNext);
- // Fix fall through from the moved BB to the one that used to follow.
- if (BBNext && BB->isSuccessor(BBNext))
- FixFallthrough(BB, BBNext);
-
- BBUtils->adjustBBOffsetsAfter(After);
-}
+//===-- ARMBlockPlacement.cpp - ARM block placement pass ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass re-arranges machine basic blocks to suit target requirements.
+// Currently it only moves blocks to fix backwards WLS branches.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMBasicBlockInfo.h"
+#include "ARMSubtarget.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "arm-block-placement"
+#define DEBUG_PREFIX "ARM Block Placement: "
+
+namespace llvm {
+class ARMBlockPlacement : public MachineFunctionPass {
+private:
+ const ARMBaseInstrInfo *TII;
+ std::unique_ptr<ARMBasicBlockUtils> BBUtils = nullptr;
+ MachineLoopInfo *MLI = nullptr;
+
+public:
+ static char ID;
+ ARMBlockPlacement() : MachineFunctionPass(ID) {}
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ void moveBasicBlock(MachineBasicBlock *BB, MachineBasicBlock *After);
+ bool blockIsBefore(MachineBasicBlock *BB, MachineBasicBlock *Other);
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+
+} // namespace llvm
+
+FunctionPass *llvm::createARMBlockPlacementPass() {
+ return new ARMBlockPlacement();
+}
+
+char ARMBlockPlacement::ID = 0;
+
+INITIALIZE_PASS(ARMBlockPlacement, DEBUG_TYPE, "ARM block placement", false,
+ false)
+
+bool ARMBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(MF.getFunction()))
+ return false;
+ const ARMSubtarget &ST = static_cast<const ARMSubtarget &>(MF.getSubtarget());
+ if (!ST.hasLOB())
+ return false;
+ LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Running on " << MF.getName() << "\n");
+ MLI = &getAnalysis<MachineLoopInfo>();
+ TII = static_cast<const ARMBaseInstrInfo *>(ST.getInstrInfo());
+ BBUtils = std::unique_ptr<ARMBasicBlockUtils>(new ARMBasicBlockUtils(MF));
+ MF.RenumberBlocks();
+ BBUtils->computeAllBlockSizes();
+ BBUtils->adjustBBOffsetsAfter(&MF.front());
+ bool Changed = false;
+
+ // Find loops with a backwards branching WLS.
+ // This requires looping over the loops in the function, checking each
+ // preheader for a WLS and if its target is before the preheader. If moving
+ // the target block wouldn't produce another backwards WLS or a new forwards
+ // LE branch then move the target block after the preheader.
+ for (auto *ML : *MLI) {
+ MachineBasicBlock *Preheader = ML->getLoopPredecessor();
+ if (!Preheader)
+ continue;
+
+ for (auto &Terminator : Preheader->terminators()) {
+ if (Terminator.getOpcode() != ARM::t2WhileLoopStart)
+ continue;
+ MachineBasicBlock *LoopExit = Terminator.getOperand(1).getMBB();
+ // We don't want to move the function's entry block.
+ if (!LoopExit->getPrevNode())
+ continue;
+ if (blockIsBefore(Preheader, LoopExit))
+ continue;
+ LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Found a backwards WLS from "
+ << Preheader->getFullName() << " to "
+ << LoopExit->getFullName() << "\n");
+
+ // Make sure that moving the target block doesn't cause any of its WLSs
+ // that were previously not backwards to become backwards
+ bool CanMove = true;
+ for (auto &LoopExitTerminator : LoopExit->terminators()) {
+ if (LoopExitTerminator.getOpcode() != ARM::t2WhileLoopStart)
+ continue;
+ // An example loop structure where the LoopExit can't be moved, since
+ // bb1's WLS will become backwards once it's moved after bb3 bb1: -
+ // LoopExit
+ // WLS bb2 - LoopExit2
+ // bb2:
+ // ...
+ // bb3: - Preheader
+ // WLS bb1
+ // bb4: - Header
+ MachineBasicBlock *LoopExit2 =
+ LoopExitTerminator.getOperand(1).getMBB();
+ // If the WLS from LoopExit to LoopExit2 is already backwards then
+ // moving LoopExit won't affect it, so it can be moved. If LoopExit2 is
+ // after the Preheader then moving will keep it as a forward branch, so
+ // it can be moved. If LoopExit2 is between the Preheader and LoopExit
+ // then moving LoopExit will make it a backwards branch, so it can't be
+ // moved since we'd fix one and introduce one backwards branch.
+ // TODO: Analyse the blocks to make a decision if it would be worth
+ // moving LoopExit even if LoopExit2 is between the Preheader and
+ // LoopExit.
+ if (!blockIsBefore(LoopExit2, LoopExit) &&
+ (LoopExit2 == Preheader || blockIsBefore(LoopExit2, Preheader))) {
+ LLVM_DEBUG(dbgs() << DEBUG_PREFIX
+ << "Can't move the target block as it would "
+ "introduce a new backwards WLS branch\n");
+ CanMove = false;
+ break;
+ }
+ }
+
+ if (CanMove) {
+ // Make sure no LEs become forwards.
+ // An example loop structure where the LoopExit can't be moved, since
+ // bb2's LE will become forwards once bb1 is moved after bb3.
+ // bb1: - LoopExit
+ // bb2:
+ // LE bb1 - Terminator
+ // bb3: - Preheader
+ // WLS bb1
+ // bb4: - Header
+ for (auto It = LoopExit->getIterator(); It != Preheader->getIterator();
+ It++) {
+ MachineBasicBlock *MBB = &*It;
+ for (auto &Terminator : MBB->terminators()) {
+ if (Terminator.getOpcode() != ARM::t2LoopEndDec)
+ continue;
+ MachineBasicBlock *LETarget = Terminator.getOperand(2).getMBB();
+ // The LE will become forwards branching if it branches to LoopExit
+ // which isn't allowed by the architecture, so we should avoid
+ // introducing these.
+ // TODO: Analyse the blocks to make a decision if it would be worth
+ // moving LoopExit even if we'd introduce a forwards LE
+ if (LETarget == LoopExit) {
+ LLVM_DEBUG(dbgs() << DEBUG_PREFIX
+ << "Can't move the target block as it would "
+ "introduce a new forwards LE branch\n");
+ CanMove = false;
+ break;
+ }
+ }
+ }
+
+ if (!CanMove)
+ break;
+ }
+
+ if (CanMove) {
+ moveBasicBlock(LoopExit, Preheader);
+ Changed = true;
+ break;
+ }
+ }
+ }
+
+ return Changed;
+}
+
+bool ARMBlockPlacement::blockIsBefore(MachineBasicBlock *BB,
+ MachineBasicBlock *Other) {
+ return BBUtils->getOffsetOf(Other) > BBUtils->getOffsetOf(BB);
+}
+
+void ARMBlockPlacement::moveBasicBlock(MachineBasicBlock *BB,
+ MachineBasicBlock *After) {
+ LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Moving " << BB->getName() << " after "
+ << After->getName() << "\n");
+ MachineBasicBlock *BBPrevious = BB->getPrevNode();
+ assert(BBPrevious && "Cannot move the function entry basic block");
+ MachineBasicBlock *AfterNext = After->getNextNode();
+ MachineBasicBlock *BBNext = BB->getNextNode();
+
+ BB->moveAfter(After);
+
+ auto FixFallthrough = [&](MachineBasicBlock *From, MachineBasicBlock *To) {
+ LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Checking for fallthrough from "
+ << From->getName() << " to " << To->getName() << "\n");
+ assert(From->isSuccessor(To) &&
+ "'To' is expected to be a successor of 'From'");
+ MachineInstr &Terminator = *(--From->terminators().end());
+ if (!Terminator.isUnconditionalBranch()) {
+ // The BB doesn't have an unconditional branch so it relied on
+ // fall-through. Fix by adding an unconditional branch to the moved BB.
+ MachineInstrBuilder MIB =
+ BuildMI(From, Terminator.getDebugLoc(), TII->get(ARM::t2B));
+ MIB.addMBB(To);
+ MIB.addImm(ARMCC::CondCodes::AL);
+ MIB.addReg(ARM::NoRegister);
+ LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Adding unconditional branch from "
+ << From->getName() << " to " << To->getName() << ": "
+ << *MIB.getInstr());
+ }
+ };
+
+ // Fix fall-through to the moved BB from the one that used to be before it.
+ if (BBPrevious->isSuccessor(BB))
+ FixFallthrough(BBPrevious, BB);
+ // Fix fall through from the destination BB to the one that used to follow.
+ if (AfterNext && After->isSuccessor(AfterNext))
+ FixFallthrough(After, AfterNext);
+ // Fix fall through from the moved BB to the one that used to follow.
+ if (BBNext && BB->isSuccessor(BBNext))
+ FixFallthrough(BB, BBNext);
+
+ BBUtils->adjustBBOffsetsAfter(After);
+}
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMCallLowering.cpp b/contrib/libs/llvm12/lib/Target/ARM/ARMCallLowering.cpp
index 6feed82596..471474788e 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMCallLowering.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMCallLowering.cpp
@@ -85,11 +85,11 @@ namespace {
/// Helper class for values going out through an ABI boundary (used for handling
/// function return values and call parameters).
-struct ARMOutgoingValueHandler : public CallLowering::OutgoingValueHandler {
- ARMOutgoingValueHandler(MachineIRBuilder &MIRBuilder,
- MachineRegisterInfo &MRI, MachineInstrBuilder &MIB,
- CCAssignFn *AssignFn)
- : OutgoingValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {}
+struct ARMOutgoingValueHandler : public CallLowering::OutgoingValueHandler {
+ ARMOutgoingValueHandler(MachineIRBuilder &MIRBuilder,
+ MachineRegisterInfo &MRI, MachineInstrBuilder &MIB,
+ CCAssignFn *AssignFn)
+ : OutgoingValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {}
Register getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO) override {
@@ -257,14 +257,14 @@ bool ARMCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder,
CCAssignFn *AssignFn =
TLI.CCAssignFnForReturn(F.getCallingConv(), F.isVarArg());
- ARMOutgoingValueHandler RetHandler(MIRBuilder, MF.getRegInfo(), Ret,
- AssignFn);
+ ARMOutgoingValueHandler RetHandler(MIRBuilder, MF.getRegInfo(), Ret,
+ AssignFn);
return handleAssignments(MIRBuilder, SplitRetInfos, RetHandler);
}
bool ARMCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
- const Value *Val, ArrayRef<Register> VRegs,
- FunctionLoweringInfo &FLI) const {
+ const Value *Val, ArrayRef<Register> VRegs,
+ FunctionLoweringInfo &FLI) const {
assert(!Val == VRegs.empty() && "Return value without a vreg");
auto const &ST = MIRBuilder.getMF().getSubtarget<ARMSubtarget>();
@@ -282,10 +282,10 @@ namespace {
/// Helper class for values coming in through an ABI boundary (used for handling
/// formal arguments and call return values).
-struct ARMIncomingValueHandler : public CallLowering::IncomingValueHandler {
- ARMIncomingValueHandler(MachineIRBuilder &MIRBuilder,
- MachineRegisterInfo &MRI, CCAssignFn AssignFn)
- : IncomingValueHandler(MIRBuilder, MRI, AssignFn) {}
+struct ARMIncomingValueHandler : public CallLowering::IncomingValueHandler {
+ ARMIncomingValueHandler(MachineIRBuilder &MIRBuilder,
+ MachineRegisterInfo &MRI, CCAssignFn AssignFn)
+ : IncomingValueHandler(MIRBuilder, MRI, AssignFn) {}
Register getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO) override {
@@ -335,8 +335,8 @@ struct ARMIncomingValueHandler : public CallLowering::IncomingValueHandler {
assert(VA.isRegLoc() && "Value shouldn't be assigned to reg");
assert(VA.getLocReg() == PhysReg && "Assigning to the wrong reg?");
- uint64_t ValSize = VA.getValVT().getFixedSizeInBits();
- uint64_t LocSize = VA.getLocVT().getFixedSizeInBits();
+ uint64_t ValSize = VA.getValVT().getFixedSizeInBits();
+ uint64_t LocSize = VA.getLocVT().getFixedSizeInBits();
assert(ValSize <= 64 && "Unsupported value size");
assert(LocSize <= 64 && "Unsupported location size");
@@ -397,10 +397,10 @@ struct ARMIncomingValueHandler : public CallLowering::IncomingValueHandler {
virtual void markPhysRegUsed(unsigned PhysReg) = 0;
};
-struct FormalArgHandler : public ARMIncomingValueHandler {
+struct FormalArgHandler : public ARMIncomingValueHandler {
FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
CCAssignFn AssignFn)
- : ARMIncomingValueHandler(MIRBuilder, MRI, AssignFn) {}
+ : ARMIncomingValueHandler(MIRBuilder, MRI, AssignFn) {}
void markPhysRegUsed(unsigned PhysReg) override {
MIRBuilder.getMRI()->addLiveIn(PhysReg);
@@ -410,10 +410,10 @@ struct FormalArgHandler : public ARMIncomingValueHandler {
} // end anonymous namespace
-bool ARMCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
- const Function &F,
- ArrayRef<ArrayRef<Register>> VRegs,
- FunctionLoweringInfo &FLI) const {
+bool ARMCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
+ const Function &F,
+ ArrayRef<ArrayRef<Register>> VRegs,
+ FunctionLoweringInfo &FLI) const {
auto &TLI = *getTLI<ARMTargetLowering>();
auto Subtarget = TLI.getSubtarget();
@@ -434,7 +434,7 @@ bool ARMCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
for (auto &Arg : F.args()) {
if (!isSupportedType(DL, TLI, Arg.getType()))
return false;
- if (Arg.hasPassPointeeByValueCopyAttr())
+ if (Arg.hasPassPointeeByValueCopyAttr())
return false;
}
@@ -468,10 +468,10 @@ bool ARMCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
namespace {
-struct CallReturnHandler : public ARMIncomingValueHandler {
+struct CallReturnHandler : public ARMIncomingValueHandler {
CallReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
MachineInstrBuilder MIB, CCAssignFn *AssignFn)
- : ARMIncomingValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {}
+ : ARMIncomingValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {}
void markPhysRegUsed(unsigned PhysReg) override {
MIB.addDef(PhysReg, RegState::Implicit);
@@ -481,16 +481,16 @@ struct CallReturnHandler : public ARMIncomingValueHandler {
};
// FIXME: This should move to the ARMSubtarget when it supports all the opcodes.
-unsigned getCallOpcode(const MachineFunction &MF, const ARMSubtarget &STI,
- bool isDirect) {
+unsigned getCallOpcode(const MachineFunction &MF, const ARMSubtarget &STI,
+ bool isDirect) {
if (isDirect)
return STI.isThumb() ? ARM::tBL : ARM::BL;
if (STI.isThumb())
- return gettBLXrOpcode(MF);
+ return gettBLXrOpcode(MF);
if (STI.hasV5TOps())
- return getBLXOpcode(MF);
+ return getBLXOpcode(MF);
if (STI.hasV4TOps())
return ARM::BX_CALL;
@@ -518,7 +518,7 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &
// Create the call instruction so we can add the implicit uses of arg
// registers, but don't insert it yet.
bool IsDirect = !Info.Callee.isReg();
- auto CallOpcode = getCallOpcode(MF, STI, IsDirect);
+ auto CallOpcode = getCallOpcode(MF, STI, IsDirect);
auto MIB = MIRBuilder.buildInstrNoInsert(CallOpcode);
bool IsThumb = STI.isThumb();
@@ -549,8 +549,8 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &
splitToValueTypes(Arg, ArgInfos, MF);
}
- auto ArgAssignFn = TLI.CCAssignFnForCall(Info.CallConv, Info.IsVarArg);
- ARMOutgoingValueHandler ArgHandler(MIRBuilder, MRI, MIB, ArgAssignFn);
+ auto ArgAssignFn = TLI.CCAssignFnForCall(Info.CallConv, Info.IsVarArg);
+ ARMOutgoingValueHandler ArgHandler(MIRBuilder, MRI, MIB, ArgAssignFn);
if (!handleAssignments(MIRBuilder, ArgInfos, ArgHandler))
return false;
@@ -563,7 +563,7 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &
ArgInfos.clear();
splitToValueTypes(Info.OrigRet, ArgInfos, MF);
- auto RetAssignFn = TLI.CCAssignFnForReturn(Info.CallConv, Info.IsVarArg);
+ auto RetAssignFn = TLI.CCAssignFnForReturn(Info.CallConv, Info.IsVarArg);
CallReturnHandler RetHandler(MIRBuilder, MRI, MIB, RetAssignFn);
if (!handleAssignments(MIRBuilder, ArgInfos, RetHandler))
return false;
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMCallLowering.h b/contrib/libs/llvm12/lib/Target/ARM/ARMCallLowering.h
index 3be73d497d..9bff3564c5 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMCallLowering.h
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMCallLowering.h
@@ -33,12 +33,12 @@ public:
ARMCallLowering(const ARMTargetLowering &TLI);
bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val,
- ArrayRef<Register> VRegs,
- FunctionLoweringInfo &FLI) const override;
+ ArrayRef<Register> VRegs,
+ FunctionLoweringInfo &FLI) const override;
bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
- ArrayRef<ArrayRef<Register>> VRegs,
- FunctionLoweringInfo &FLI) const override;
+ ArrayRef<ArrayRef<Register>> VRegs,
+ FunctionLoweringInfo &FLI) const override;
bool lowerCall(MachineIRBuilder &MIRBuilder,
CallLoweringInfo &Info) const override;
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMConstantIslandPass.cpp b/contrib/libs/llvm12/lib/Target/ARM/ARMConstantIslandPass.cpp
index 630490f6f9..86faf511c9 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -338,32 +338,32 @@ LLVM_DUMP_METHOD void ARMConstantIslands::dumpBBs() {
}
#endif
-// Align blocks where the previous block does not fall through. This may add
-// extra NOP's but they will not be executed. It uses the PrefLoopAlignment as a
-// measure of how much to align, and only runs at CodeGenOpt::Aggressive.
-static bool AlignBlocks(MachineFunction *MF) {
- if (MF->getTarget().getOptLevel() != CodeGenOpt::Aggressive ||
- MF->getFunction().hasOptSize())
- return false;
-
- auto *TLI = MF->getSubtarget().getTargetLowering();
- const Align Alignment = TLI->getPrefLoopAlignment();
- if (Alignment < 4)
- return false;
-
- bool Changed = false;
- bool PrevCanFallthough = true;
- for (auto &MBB : *MF) {
- if (!PrevCanFallthough) {
- Changed = true;
- MBB.setAlignment(Alignment);
- }
- PrevCanFallthough = MBB.canFallThrough();
- }
-
- return Changed;
-}
-
+// Align blocks where the previous block does not fall through. This may add
+// extra NOP's but they will not be executed. It uses the PrefLoopAlignment as a
+// measure of how much to align, and only runs at CodeGenOpt::Aggressive.
+static bool AlignBlocks(MachineFunction *MF) {
+ if (MF->getTarget().getOptLevel() != CodeGenOpt::Aggressive ||
+ MF->getFunction().hasOptSize())
+ return false;
+
+ auto *TLI = MF->getSubtarget().getTargetLowering();
+ const Align Alignment = TLI->getPrefLoopAlignment();
+ if (Alignment < 4)
+ return false;
+
+ bool Changed = false;
+ bool PrevCanFallthough = true;
+ for (auto &MBB : *MF) {
+ if (!PrevCanFallthough) {
+ Changed = true;
+ MBB.setAlignment(Alignment);
+ }
+ PrevCanFallthough = MBB.canFallThrough();
+ }
+
+ return Changed;
+}
+
bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
MF = &mf;
MCP = mf.getConstantPool();
@@ -385,10 +385,10 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
isThumb2 = AFI->isThumb2Function();
bool GenerateTBB = isThumb2 || (isThumb1 && SynthesizeThumb1TBB);
- // TBB generation code in this constant island pass has not been adapted to
- // deal with speculation barriers.
- if (STI->hardenSlsRetBr())
- GenerateTBB = false;
+ // TBB generation code in this constant island pass has not been adapted to
+ // deal with speculation barriers.
+ if (STI->hardenSlsRetBr())
+ GenerateTBB = false;
// Renumber all of the machine basic blocks in the function, guaranteeing that
// the numbers agree with the position of the block in the function.
@@ -406,9 +406,9 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
MF->RenumberBlocks();
}
- // Align any non-fallthrough blocks
- MadeChange |= AlignBlocks(MF);
-
+ // Align any non-fallthrough blocks
+ MadeChange |= AlignBlocks(MF);
+
// Perform the initial placement of the constant pool entries. To start with,
// we put them all at the end of the function.
std::vector<MachineInstr*> CPEMIs;
@@ -524,11 +524,11 @@ ARMConstantIslands::doInitialConstPlacement(std::vector<MachineInstr*> &CPEMIs)
// The function needs to be as aligned as the basic blocks. The linker may
// move functions around based on their alignment.
- // Special case: halfword literals still need word alignment on the function.
- Align FuncAlign = MaxAlign;
- if (MaxAlign == 2)
- FuncAlign = Align(4);
- MF->ensureAlignment(FuncAlign);
+ // Special case: halfword literals still need word alignment on the function.
+ Align FuncAlign = MaxAlign;
+ if (MaxAlign == 2)
+ FuncAlign = Align(4);
+ MF->ensureAlignment(FuncAlign);
// Order the entries in BB by descending alignment. That ensures correct
// alignment of all entries as long as BB is sufficiently aligned. Keep
@@ -543,7 +543,7 @@ ARMConstantIslands::doInitialConstPlacement(std::vector<MachineInstr*> &CPEMIs)
const DataLayout &TD = MF->getDataLayout();
for (unsigned i = 0, e = CPs.size(); i != e; ++i) {
- unsigned Size = CPs[i].getSizeInBytes(TD);
+ unsigned Size = CPs[i].getSizeInBytes(TD);
Align Alignment = CPs[i].getAlign();
// Verify that all constant pool entries are a multiple of their alignment.
// If not, we would have to pad them out so that instructions stay aligned.
@@ -586,12 +586,12 @@ void ARMConstantIslands::doInitialJumpTablePlacement(
MachineBasicBlock *LastCorrectlyNumberedBB = nullptr;
for (MachineBasicBlock &MBB : *MF) {
auto MI = MBB.getLastNonDebugInstr();
- // Look past potential SpeculationBarriers at end of BB.
- while (MI != MBB.end() &&
- (isSpeculationBarrierEndBBOpcode(MI->getOpcode()) ||
- MI->isDebugInstr()))
- --MI;
-
+ // Look past potential SpeculationBarriers at end of BB.
+ while (MI != MBB.end() &&
+ (isSpeculationBarrierEndBBOpcode(MI->getOpcode()) ||
+ MI->isDebugInstr()))
+ --MI;
+
if (MI == MBB.end())
continue;
@@ -814,26 +814,26 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
// Taking the address of a CP entry.
case ARM::LEApcrel:
- case ARM::LEApcrelJT: {
- // This takes a SoImm, which is 8 bit immediate rotated. We'll
- // pretend the maximum offset is 255 * 4. Since each instruction
- // 4 byte wide, this is always correct. We'll check for other
- // displacements that fits in a SoImm as well.
- Bits = 8;
- NegOk = true;
- IsSoImm = true;
- unsigned CPI = I.getOperand(op).getIndex();
- assert(CPI < CPEMIs.size());
- MachineInstr *CPEMI = CPEMIs[CPI];
- const Align CPEAlign = getCPEAlign(CPEMI);
- const unsigned LogCPEAlign = Log2(CPEAlign);
- if (LogCPEAlign >= 2)
- Scale = 4;
- else
- // For constants with less than 4-byte alignment,
- // we'll pretend the maximum offset is 255 * 1.
- Scale = 1;
- }
+ case ARM::LEApcrelJT: {
+ // This takes a SoImm, which is 8 bit immediate rotated. We'll
+ // pretend the maximum offset is 255 * 4. Since each instruction
+ // 4 byte wide, this is always correct. We'll check for other
+ // displacements that fits in a SoImm as well.
+ Bits = 8;
+ NegOk = true;
+ IsSoImm = true;
+ unsigned CPI = I.getOperand(op).getIndex();
+ assert(CPI < CPEMIs.size());
+ MachineInstr *CPEMI = CPEMIs[CPI];
+ const Align CPEAlign = getCPEAlign(CPEMI);
+ const unsigned LogCPEAlign = Log2(CPEAlign);
+ if (LogCPEAlign >= 2)
+ Scale = 4;
+ else
+ // For constants with less than 4-byte alignment,
+ // we'll pretend the maximum offset is 255 * 1.
+ Scale = 1;
+ }
break;
case ARM::t2LEApcrel:
case ARM::t2LEApcrelJT:
@@ -2124,7 +2124,7 @@ static bool jumpTableFollowsTB(MachineInstr *JTMI, MachineInstr *CPEMI) {
MachineFunction *MF = MBB->getParent();
++MBB;
- return MBB != MF->end() && !MBB->empty() && &*MBB->begin() == CPEMI;
+ return MBB != MF->end() && !MBB->empty() && &*MBB->begin() == CPEMI;
}
static void RemoveDeadAddBetweenLEAAndJT(MachineInstr *LEAMI,
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/contrib/libs/llvm12/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index a7f1765a93..a38327ffe6 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -875,25 +875,25 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
assert (MO.isImm() && "MOVi32imm w/ non-immediate source operand!");
unsigned ImmVal = (unsigned)MO.getImm();
- unsigned SOImmValV1 = 0, SOImmValV2 = 0;
-
- if (ARM_AM::isSOImmTwoPartVal(ImmVal)) { // Expand into a movi + orr.
- LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi), DstReg);
- HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::ORRri))
- .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstReg);
- SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(ImmVal);
- SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal);
- } else { // Expand into a mvn + sub.
- LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MVNi), DstReg);
- HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::SUBri))
- .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstReg);
- SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(-ImmVal);
- SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(-ImmVal);
- SOImmValV1 = ~(-SOImmValV1);
- }
-
+ unsigned SOImmValV1 = 0, SOImmValV2 = 0;
+
+ if (ARM_AM::isSOImmTwoPartVal(ImmVal)) { // Expand into a movi + orr.
+ LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi), DstReg);
+ HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::ORRri))
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstReg);
+ SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(ImmVal);
+ SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal);
+ } else { // Expand into a mvn + sub.
+ LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MVNi), DstReg);
+ HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::SUBri))
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstReg);
+ SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(-ImmVal);
+ SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(-ImmVal);
+ SOImmValV1 = ~(-SOImmValV1);
+ }
+
unsigned MIFlags = MI.getFlags();
LO16 = LO16.addImm(SOImmValV1);
HI16 = HI16.addImm(SOImmValV2);
@@ -1871,66 +1871,66 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
default:
return false;
- case ARM::VBSPd:
- case ARM::VBSPq: {
- Register DstReg = MI.getOperand(0).getReg();
- if (DstReg == MI.getOperand(3).getReg()) {
- // Expand to VBIT
- unsigned NewOpc = Opcode == ARM::VBSPd ? ARM::VBITd : ARM::VBITq;
- BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc))
- .add(MI.getOperand(0))
- .add(MI.getOperand(3))
- .add(MI.getOperand(2))
- .add(MI.getOperand(1))
- .addImm(MI.getOperand(4).getImm())
- .add(MI.getOperand(5));
- } else if (DstReg == MI.getOperand(2).getReg()) {
- // Expand to VBIF
- unsigned NewOpc = Opcode == ARM::VBSPd ? ARM::VBIFd : ARM::VBIFq;
- BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc))
- .add(MI.getOperand(0))
- .add(MI.getOperand(2))
- .add(MI.getOperand(3))
- .add(MI.getOperand(1))
- .addImm(MI.getOperand(4).getImm())
- .add(MI.getOperand(5));
- } else {
- // Expand to VBSL
- unsigned NewOpc = Opcode == ARM::VBSPd ? ARM::VBSLd : ARM::VBSLq;
- if (DstReg == MI.getOperand(1).getReg()) {
- BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc))
- .add(MI.getOperand(0))
- .add(MI.getOperand(1))
- .add(MI.getOperand(2))
- .add(MI.getOperand(3))
- .addImm(MI.getOperand(4).getImm())
- .add(MI.getOperand(5));
- } else {
- // Use move to satisfy constraints
- unsigned MoveOpc = Opcode == ARM::VBSPd ? ARM::VORRd : ARM::VORRq;
- BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MoveOpc))
- .addReg(DstReg,
- RegState::Define |
- getRenamableRegState(MI.getOperand(0).isRenamable()))
- .add(MI.getOperand(1))
- .add(MI.getOperand(1))
- .addImm(MI.getOperand(4).getImm())
- .add(MI.getOperand(5));
- BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc))
- .add(MI.getOperand(0))
- .addReg(DstReg,
- RegState::Kill |
- getRenamableRegState(MI.getOperand(0).isRenamable()))
- .add(MI.getOperand(2))
- .add(MI.getOperand(3))
- .addImm(MI.getOperand(4).getImm())
- .add(MI.getOperand(5));
- }
- }
- MI.eraseFromParent();
- return true;
- }
-
+ case ARM::VBSPd:
+ case ARM::VBSPq: {
+ Register DstReg = MI.getOperand(0).getReg();
+ if (DstReg == MI.getOperand(3).getReg()) {
+ // Expand to VBIT
+ unsigned NewOpc = Opcode == ARM::VBSPd ? ARM::VBITd : ARM::VBITq;
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc))
+ .add(MI.getOperand(0))
+ .add(MI.getOperand(3))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(1))
+ .addImm(MI.getOperand(4).getImm())
+ .add(MI.getOperand(5));
+ } else if (DstReg == MI.getOperand(2).getReg()) {
+ // Expand to VBIF
+ unsigned NewOpc = Opcode == ARM::VBSPd ? ARM::VBIFd : ARM::VBIFq;
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc))
+ .add(MI.getOperand(0))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3))
+ .add(MI.getOperand(1))
+ .addImm(MI.getOperand(4).getImm())
+ .add(MI.getOperand(5));
+ } else {
+ // Expand to VBSL
+ unsigned NewOpc = Opcode == ARM::VBSPd ? ARM::VBSLd : ARM::VBSLq;
+ if (DstReg == MI.getOperand(1).getReg()) {
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc))
+ .add(MI.getOperand(0))
+ .add(MI.getOperand(1))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3))
+ .addImm(MI.getOperand(4).getImm())
+ .add(MI.getOperand(5));
+ } else {
+ // Use move to satisfy constraints
+ unsigned MoveOpc = Opcode == ARM::VBSPd ? ARM::VORRd : ARM::VORRq;
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MoveOpc))
+ .addReg(DstReg,
+ RegState::Define |
+ getRenamableRegState(MI.getOperand(0).isRenamable()))
+ .add(MI.getOperand(1))
+ .add(MI.getOperand(1))
+ .addImm(MI.getOperand(4).getImm())
+ .add(MI.getOperand(5));
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc))
+ .add(MI.getOperand(0))
+ .addReg(DstReg,
+ RegState::Kill |
+ getRenamableRegState(MI.getOperand(0).isRenamable()))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3))
+ .addImm(MI.getOperand(4).getImm())
+ .add(MI.getOperand(5));
+ }
+ }
+ MI.eraseFromParent();
+ return true;
+ }
+
case ARM::TCRETURNdi:
case ARM::TCRETURNri: {
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
@@ -2304,9 +2304,9 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MIB.addImm(0);
MIB.add(predOps(ARMCC::AL));
- MIB =
- BuildMI(MBB, MBBI, MI.getDebugLoc(),
- TII->get(Thumb ? gettBLXrOpcode(*MF) : getBLXOpcode(*MF)));
+ MIB =
+ BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(Thumb ? gettBLXrOpcode(*MF) : getBLXOpcode(*MF)));
if (Thumb)
MIB.add(predOps(ARMCC::AL));
MIB.addReg(Reg, RegState::Kill);
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMFastISel.cpp b/contrib/libs/llvm12/lib/Target/ARM/ARMFastISel.cpp
index da1d9af8d5..483aeb4d72 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMFastISel.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMFastISel.cpp
@@ -606,9 +606,9 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
}
}
- if ((Subtarget->isTargetELF() && Subtarget->isGVInGOT(GV)) ||
- (Subtarget->isTargetMachO() && IsIndirect) ||
- Subtarget->genLongCalls()) {
+ if ((Subtarget->isTargetELF() && Subtarget->isGVInGOT(GV)) ||
+ (Subtarget->isTargetMachO() && IsIndirect) ||
+ Subtarget->genLongCalls()) {
MachineInstrBuilder MIB;
unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT));
if (isThumb2)
@@ -2175,7 +2175,7 @@ bool ARMFastISel::SelectRet(const Instruction *I) {
unsigned ARMFastISel::ARMSelectCallOp(bool UseReg) {
if (UseReg)
- return isThumb2 ? gettBLXrOpcode(*MF) : getBLXOpcode(*MF);
+ return isThumb2 ? gettBLXrOpcode(*MF) : getBLXOpcode(*MF);
else
return isThumb2 ? ARM::tBL : ARM::BL;
}
@@ -2266,11 +2266,11 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
// BL / BLX don't take a predicate, but tBL / tBLX do.
if (isThumb2)
MIB.add(predOps(ARMCC::AL));
- if (Subtarget->genLongCalls()) {
- CalleeReg =
- constrainOperandRegClass(TII.get(CallOpc), CalleeReg, isThumb2 ? 2 : 0);
+ if (Subtarget->genLongCalls()) {
+ CalleeReg =
+ constrainOperandRegClass(TII.get(CallOpc), CalleeReg, isThumb2 ? 2 : 0);
MIB.addReg(CalleeReg);
- } else
+ } else
MIB.addExternalSymbol(TLI.getLibcallName(Call));
// Add implicit physical register uses to the call.
@@ -2408,11 +2408,11 @@ bool ARMFastISel::SelectCall(const Instruction *I,
// ARM calls don't take a predicate, but tBL / tBLX do.
if(isThumb2)
MIB.add(predOps(ARMCC::AL));
- if (UseReg) {
- CalleeReg =
- constrainOperandRegClass(TII.get(CallOpc), CalleeReg, isThumb2 ? 2 : 0);
+ if (UseReg) {
+ CalleeReg =
+ constrainOperandRegClass(TII.get(CallOpc), CalleeReg, isThumb2 ? 2 : 0);
MIB.addReg(CalleeReg);
- } else if (!IntrMemName)
+ } else if (!IntrMemName)
MIB.addGlobalAddress(GV, 0, 0);
else
MIB.addExternalSymbol(IntrMemName, 0);
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMFeatures.h b/contrib/libs/llvm12/lib/Target/ARM/ARMFeatures.h
index 99e0ef05b5..6d8e75a2ec 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMFeatures.h
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMFeatures.h
@@ -75,7 +75,7 @@ inline bool isV8EligibleForIT(const InstrType *Instr) {
// there are some "conditionally deprecated" opcodes
case ARM::tADDspr:
case ARM::tBLXr:
- case ARM::tBLXr_noip:
+ case ARM::tBLXr_noip:
return Instr->getOperand(2).getReg() != ARM::PC;
// ADD PC, SP and BLX PC were always unpredictable,
// now on top of it they're deprecated
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMFrameLowering.cpp b/contrib/libs/llvm12/lib/Target/ARM/ARMFrameLowering.cpp
index 9eeb7f20dc..e0a657b505 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMFrameLowering.cpp
@@ -883,10 +883,10 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
/// debug info. It's the same as what we use for resolving the code-gen
/// references for now. FIXME: This can go wrong when references are
/// SP-relative and simple call frames aren't used.
-StackOffset ARMFrameLowering::getFrameIndexReference(const MachineFunction &MF,
- int FI,
- Register &FrameReg) const {
- return StackOffset::getFixed(ResolveFrameIndexReference(MF, FI, FrameReg, 0));
+StackOffset ARMFrameLowering::getFrameIndexReference(const MachineFunction &MF,
+ int FI,
+ Register &FrameReg) const {
+ return StackOffset::getFixed(ResolveFrameIndexReference(MF, FI, FrameReg, 0));
}
int ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
@@ -2114,7 +2114,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
unsigned NumExtras = TargetAlign.value() / 4;
SmallVector<unsigned, 2> Extras;
while (NumExtras && !UnspilledCS1GPRs.empty()) {
- unsigned Reg = UnspilledCS1GPRs.pop_back_val();
+ unsigned Reg = UnspilledCS1GPRs.pop_back_val();
if (!MRI.isReserved(Reg) &&
(!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg))) {
Extras.push_back(Reg);
@@ -2124,7 +2124,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
// For non-Thumb1 functions, also check for hi-reg CS registers
if (!AFI->isThumb1OnlyFunction()) {
while (NumExtras && !UnspilledCS2GPRs.empty()) {
- unsigned Reg = UnspilledCS2GPRs.pop_back_val();
+ unsigned Reg = UnspilledCS2GPRs.pop_back_val();
if (!MRI.isReserved(Reg)) {
Extras.push_back(Reg);
NumExtras--;
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMFrameLowering.h b/contrib/libs/llvm12/lib/Target/ARM/ARMFrameLowering.h
index 9822e2321b..c609c07043 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMFrameLowering.h
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMFrameLowering.h
@@ -10,7 +10,7 @@
#define LLVM_LIB_TARGET_ARM_ARMFRAMELOWERING_H
#include "llvm/CodeGen/TargetFrameLowering.h"
-#include "llvm/Support/TypeSize.h"
+#include "llvm/Support/TypeSize.h"
namespace llvm {
@@ -48,8 +48,8 @@ public:
bool hasFP(const MachineFunction &MF) const override;
bool hasReservedCallFrame(const MachineFunction &MF) const override;
bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override;
- StackOffset getFrameIndexReference(const MachineFunction &MF, int FI,
- Register &FrameReg) const override;
+ StackOffset getFrameIndexReference(const MachineFunction &MF, int FI,
+ Register &FrameReg) const override;
int ResolveFrameIndexReference(const MachineFunction &MF, int FI,
Register &FrameReg, int SPAdj) const;
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMHazardRecognizer.cpp b/contrib/libs/llvm12/lib/Target/ARM/ARMHazardRecognizer.cpp
index f083fa6662..48df96b5e6 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMHazardRecognizer.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMHazardRecognizer.cpp
@@ -10,19 +10,19 @@
#include "ARMBaseInstrInfo.h"
#include "ARMBaseRegisterInfo.h"
#include "ARMSubtarget.h"
-#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
-#include "llvm/Support/CommandLine.h"
-
+#include "llvm/Support/CommandLine.h"
+
using namespace llvm;
-static cl::opt<int> DataBankMask("arm-data-bank-mask", cl::init(-1),
- cl::Hidden);
-static cl::opt<bool> AssumeITCMConflict("arm-assume-itcm-bankconflict",
- cl::init(false), cl::Hidden);
-
+static cl::opt<int> DataBankMask("arm-data-bank-mask", cl::init(-1),
+ cl::Hidden);
+static cl::opt<bool> AssumeITCMConflict("arm-assume-itcm-bankconflict",
+ cl::init(false), cl::Hidden);
+
static bool hasRAWHazard(MachineInstr *DefMI, MachineInstr *MI,
const TargetRegisterInfo &TRI) {
// FIXME: Detect integer instructions properly.
@@ -39,7 +39,7 @@ static bool hasRAWHazard(MachineInstr *DefMI, MachineInstr *MI,
}
ScheduleHazardRecognizer::HazardType
-ARMHazardRecognizerFPMLx::getHazardType(SUnit *SU, int Stalls) {
+ARMHazardRecognizerFPMLx::getHazardType(SUnit *SU, int Stalls) {
assert(Stalls == 0 && "ARM hazards don't support scoreboard lookahead");
MachineInstr *MI = SU->getInstr();
@@ -76,15 +76,15 @@ ARMHazardRecognizerFPMLx::getHazardType(SUnit *SU, int Stalls) {
}
}
}
- return NoHazard;
+ return NoHazard;
}
-void ARMHazardRecognizerFPMLx::Reset() {
+void ARMHazardRecognizerFPMLx::Reset() {
LastMI = nullptr;
FpMLxStalls = 0;
}
-void ARMHazardRecognizerFPMLx::EmitInstruction(SUnit *SU) {
+void ARMHazardRecognizerFPMLx::EmitInstruction(SUnit *SU) {
MachineInstr *MI = SU->getInstr();
if (!MI->isDebugInstr()) {
LastMI = MI;
@@ -92,177 +92,177 @@ void ARMHazardRecognizerFPMLx::EmitInstruction(SUnit *SU) {
}
}
-void ARMHazardRecognizerFPMLx::AdvanceCycle() {
+void ARMHazardRecognizerFPMLx::AdvanceCycle() {
if (FpMLxStalls && --FpMLxStalls == 0)
// Stalled for 4 cycles but still can't schedule any other instructions.
LastMI = nullptr;
}
-void ARMHazardRecognizerFPMLx::RecedeCycle() {
+void ARMHazardRecognizerFPMLx::RecedeCycle() {
llvm_unreachable("reverse ARM hazard checking unsupported");
}
-
-///////// Bank conflicts handled as hazards //////////////
-
-static bool getBaseOffset(const MachineInstr &MI, const MachineOperand *&BaseOp,
- int64_t &Offset) {
-
- uint64_t TSFlags = MI.getDesc().TSFlags;
- unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
- unsigned IndexMode =
- (TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift;
-
- // Address mode tells us what we want to know about operands for T2
- // instructions (but not size). It tells us size (but not about operands)
- // for T1 instructions.
- switch (AddrMode) {
- default:
- return false;
- case ARMII::AddrModeT2_i8:
- // t2LDRBT, t2LDRB_POST, t2LDRB_PRE, t2LDRBi8,
- // t2LDRHT, t2LDRH_POST, t2LDRH_PRE, t2LDRHi8,
- // t2LDRSBT, t2LDRSB_POST, t2LDRSB_PRE, t2LDRSBi8,
- // t2LDRSHT, t2LDRSH_POST, t2LDRSH_PRE, t2LDRSHi8,
- // t2LDRT, t2LDR_POST, t2LDR_PRE, t2LDRi8
- BaseOp = &MI.getOperand(1);
- Offset = (IndexMode == ARMII::IndexModePost)
- ? 0
- : (IndexMode == ARMII::IndexModePre ||
- IndexMode == ARMII::IndexModeUpd)
- ? MI.getOperand(3).getImm()
- : MI.getOperand(2).getImm();
- return true;
- case ARMII::AddrModeT2_i12:
- // t2LDRBi12, t2LDRHi12
- // t2LDRSBi12, t2LDRSHi12
- // t2LDRi12
- BaseOp = &MI.getOperand(1);
- Offset = MI.getOperand(2).getImm();
- return true;
- case ARMII::AddrModeT2_i8s4:
- // t2LDRD_POST, t2LDRD_PRE, t2LDRDi8
- BaseOp = &MI.getOperand(2);
- Offset = (IndexMode == ARMII::IndexModePost)
- ? 0
- : (IndexMode == ARMII::IndexModePre ||
- IndexMode == ARMII::IndexModeUpd)
- ? MI.getOperand(4).getImm()
- : MI.getOperand(3).getImm();
- return true;
- case ARMII::AddrModeT1_1:
- // tLDRBi, tLDRBr (watch out!), TLDRSB
- case ARMII::AddrModeT1_2:
- // tLDRHi, tLDRHr (watch out!), TLDRSH
- case ARMII::AddrModeT1_4:
- // tLDRi, tLDRr (watch out!)
- BaseOp = &MI.getOperand(1);
- Offset = MI.getOperand(2).isImm() ? MI.getOperand(2).getImm() : 0;
- return MI.getOperand(2).isImm();
- }
- return false;
-}
-
-ARMBankConflictHazardRecognizer::ARMBankConflictHazardRecognizer(
- const ScheduleDAG *DAG, int64_t CPUBankMask, bool CPUAssumeITCMConflict)
- : ScheduleHazardRecognizer(), MF(DAG->MF), DL(DAG->MF.getDataLayout()),
- DataMask(DataBankMask.getNumOccurrences() ? int64_t(DataBankMask)
- : CPUBankMask),
- AssumeITCMBankConflict(AssumeITCMConflict.getNumOccurrences()
- ? AssumeITCMConflict
- : CPUAssumeITCMConflict) {
- MaxLookAhead = 1;
-}
-
-ScheduleHazardRecognizer::HazardType
-ARMBankConflictHazardRecognizer::CheckOffsets(unsigned O0, unsigned O1) {
- return (((O0 ^ O1) & DataMask) != 0) ? NoHazard : Hazard;
-}
-
-ScheduleHazardRecognizer::HazardType
-ARMBankConflictHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
- MachineInstr &L0 = *SU->getInstr();
- if (!L0.mayLoad() || L0.mayStore() || L0.getNumMemOperands() != 1)
- return NoHazard;
-
- auto MO0 = *L0.memoperands().begin();
- auto BaseVal0 = MO0->getValue();
- auto BasePseudoVal0 = MO0->getPseudoValue();
- int64_t Offset0 = 0;
-
- if (MO0->getSize() > 4)
- return NoHazard;
-
- bool SPvalid = false;
- const MachineOperand *SP = nullptr;
- int64_t SPOffset0 = 0;
-
- for (auto L1 : Accesses) {
- auto MO1 = *L1->memoperands().begin();
- auto BaseVal1 = MO1->getValue();
- auto BasePseudoVal1 = MO1->getPseudoValue();
- int64_t Offset1 = 0;
-
- // Pointers to the same object
- if (BaseVal0 && BaseVal1) {
- const Value *Ptr0, *Ptr1;
- Ptr0 = GetPointerBaseWithConstantOffset(BaseVal0, Offset0, DL, true);
- Ptr1 = GetPointerBaseWithConstantOffset(BaseVal1, Offset1, DL, true);
- if (Ptr0 == Ptr1 && Ptr0)
- return CheckOffsets(Offset0, Offset1);
- }
-
- if (BasePseudoVal0 && BasePseudoVal1 &&
- BasePseudoVal0->kind() == BasePseudoVal1->kind() &&
- BasePseudoVal0->kind() == PseudoSourceValue::FixedStack) {
- // Spills/fills
- auto FS0 = cast<FixedStackPseudoSourceValue>(BasePseudoVal0);
- auto FS1 = cast<FixedStackPseudoSourceValue>(BasePseudoVal1);
- Offset0 = MF.getFrameInfo().getObjectOffset(FS0->getFrameIndex());
- Offset1 = MF.getFrameInfo().getObjectOffset(FS1->getFrameIndex());
- return CheckOffsets(Offset0, Offset1);
- }
-
- // Constant pools (likely in ITCM)
- if (BasePseudoVal0 && BasePseudoVal1 &&
- BasePseudoVal0->kind() == BasePseudoVal1->kind() &&
- BasePseudoVal0->isConstantPool() && AssumeITCMBankConflict)
- return Hazard;
-
- // Is this a stack pointer-relative access? We could in general try to
- // use "is this the same register and is it unchanged?", but the
- // memory operand tracking is highly likely to have already found that.
- // What we're after here is bank conflicts between different objects in
- // the stack frame.
- if (!SPvalid) { // set up SP
- if (!getBaseOffset(L0, SP, SPOffset0) || SP->getReg().id() != ARM::SP)
- SP = nullptr;
- SPvalid = true;
- }
- if (SP) {
- int64_t SPOffset1;
- const MachineOperand *SP1;
- if (getBaseOffset(*L1, SP1, SPOffset1) && SP1->getReg().id() == ARM::SP)
- return CheckOffsets(SPOffset0, SPOffset1);
- }
- }
-
- return NoHazard;
-}
-
-void ARMBankConflictHazardRecognizer::Reset() { Accesses.clear(); }
-
-void ARMBankConflictHazardRecognizer::EmitInstruction(SUnit *SU) {
- MachineInstr &MI = *SU->getInstr();
- if (!MI.mayLoad() || MI.mayStore() || MI.getNumMemOperands() != 1)
- return;
-
- auto MO = *MI.memoperands().begin();
- uint64_t Size1 = MO->getSize();
- if (Size1 > 4)
- return;
- Accesses.push_back(&MI);
-}
-
-void ARMBankConflictHazardRecognizer::AdvanceCycle() { Accesses.clear(); }
-
-void ARMBankConflictHazardRecognizer::RecedeCycle() { Accesses.clear(); }
+
+///////// Bank conflicts handled as hazards //////////////
+
+static bool getBaseOffset(const MachineInstr &MI, const MachineOperand *&BaseOp,
+ int64_t &Offset) {
+
+ uint64_t TSFlags = MI.getDesc().TSFlags;
+ unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
+ unsigned IndexMode =
+ (TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift;
+
+ // Address mode tells us what we want to know about operands for T2
+ // instructions (but not size). It tells us size (but not about operands)
+ // for T1 instructions.
+ switch (AddrMode) {
+ default:
+ return false;
+ case ARMII::AddrModeT2_i8:
+ // t2LDRBT, t2LDRB_POST, t2LDRB_PRE, t2LDRBi8,
+ // t2LDRHT, t2LDRH_POST, t2LDRH_PRE, t2LDRHi8,
+ // t2LDRSBT, t2LDRSB_POST, t2LDRSB_PRE, t2LDRSBi8,
+ // t2LDRSHT, t2LDRSH_POST, t2LDRSH_PRE, t2LDRSHi8,
+ // t2LDRT, t2LDR_POST, t2LDR_PRE, t2LDRi8
+ BaseOp = &MI.getOperand(1);
+ Offset = (IndexMode == ARMII::IndexModePost)
+ ? 0
+ : (IndexMode == ARMII::IndexModePre ||
+ IndexMode == ARMII::IndexModeUpd)
+ ? MI.getOperand(3).getImm()
+ : MI.getOperand(2).getImm();
+ return true;
+ case ARMII::AddrModeT2_i12:
+ // t2LDRBi12, t2LDRHi12
+ // t2LDRSBi12, t2LDRSHi12
+ // t2LDRi12
+ BaseOp = &MI.getOperand(1);
+ Offset = MI.getOperand(2).getImm();
+ return true;
+ case ARMII::AddrModeT2_i8s4:
+ // t2LDRD_POST, t2LDRD_PRE, t2LDRDi8
+ BaseOp = &MI.getOperand(2);
+ Offset = (IndexMode == ARMII::IndexModePost)
+ ? 0
+ : (IndexMode == ARMII::IndexModePre ||
+ IndexMode == ARMII::IndexModeUpd)
+ ? MI.getOperand(4).getImm()
+ : MI.getOperand(3).getImm();
+ return true;
+ case ARMII::AddrModeT1_1:
+ // tLDRBi, tLDRBr (watch out!), TLDRSB
+ case ARMII::AddrModeT1_2:
+ // tLDRHi, tLDRHr (watch out!), TLDRSH
+ case ARMII::AddrModeT1_4:
+ // tLDRi, tLDRr (watch out!)
+ BaseOp = &MI.getOperand(1);
+ Offset = MI.getOperand(2).isImm() ? MI.getOperand(2).getImm() : 0;
+ return MI.getOperand(2).isImm();
+ }
+ return false;
+}
+
+ARMBankConflictHazardRecognizer::ARMBankConflictHazardRecognizer(
+ const ScheduleDAG *DAG, int64_t CPUBankMask, bool CPUAssumeITCMConflict)
+ : ScheduleHazardRecognizer(), MF(DAG->MF), DL(DAG->MF.getDataLayout()),
+ DataMask(DataBankMask.getNumOccurrences() ? int64_t(DataBankMask)
+ : CPUBankMask),
+ AssumeITCMBankConflict(AssumeITCMConflict.getNumOccurrences()
+ ? AssumeITCMConflict
+ : CPUAssumeITCMConflict) {
+ MaxLookAhead = 1;
+}
+
+ScheduleHazardRecognizer::HazardType
+ARMBankConflictHazardRecognizer::CheckOffsets(unsigned O0, unsigned O1) {
+ return (((O0 ^ O1) & DataMask) != 0) ? NoHazard : Hazard;
+}
+
+ScheduleHazardRecognizer::HazardType
+ARMBankConflictHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
+ MachineInstr &L0 = *SU->getInstr();
+ if (!L0.mayLoad() || L0.mayStore() || L0.getNumMemOperands() != 1)
+ return NoHazard;
+
+ auto MO0 = *L0.memoperands().begin();
+ auto BaseVal0 = MO0->getValue();
+ auto BasePseudoVal0 = MO0->getPseudoValue();
+ int64_t Offset0 = 0;
+
+ if (MO0->getSize() > 4)
+ return NoHazard;
+
+ bool SPvalid = false;
+ const MachineOperand *SP = nullptr;
+ int64_t SPOffset0 = 0;
+
+ for (auto L1 : Accesses) {
+ auto MO1 = *L1->memoperands().begin();
+ auto BaseVal1 = MO1->getValue();
+ auto BasePseudoVal1 = MO1->getPseudoValue();
+ int64_t Offset1 = 0;
+
+ // Pointers to the same object
+ if (BaseVal0 && BaseVal1) {
+ const Value *Ptr0, *Ptr1;
+ Ptr0 = GetPointerBaseWithConstantOffset(BaseVal0, Offset0, DL, true);
+ Ptr1 = GetPointerBaseWithConstantOffset(BaseVal1, Offset1, DL, true);
+ if (Ptr0 == Ptr1 && Ptr0)
+ return CheckOffsets(Offset0, Offset1);
+ }
+
+ if (BasePseudoVal0 && BasePseudoVal1 &&
+ BasePseudoVal0->kind() == BasePseudoVal1->kind() &&
+ BasePseudoVal0->kind() == PseudoSourceValue::FixedStack) {
+ // Spills/fills
+ auto FS0 = cast<FixedStackPseudoSourceValue>(BasePseudoVal0);
+ auto FS1 = cast<FixedStackPseudoSourceValue>(BasePseudoVal1);
+ Offset0 = MF.getFrameInfo().getObjectOffset(FS0->getFrameIndex());
+ Offset1 = MF.getFrameInfo().getObjectOffset(FS1->getFrameIndex());
+ return CheckOffsets(Offset0, Offset1);
+ }
+
+ // Constant pools (likely in ITCM)
+ if (BasePseudoVal0 && BasePseudoVal1 &&
+ BasePseudoVal0->kind() == BasePseudoVal1->kind() &&
+ BasePseudoVal0->isConstantPool() && AssumeITCMBankConflict)
+ return Hazard;
+
+ // Is this a stack pointer-relative access? We could in general try to
+ // use "is this the same register and is it unchanged?", but the
+ // memory operand tracking is highly likely to have already found that.
+ // What we're after here is bank conflicts between different objects in
+ // the stack frame.
+ if (!SPvalid) { // set up SP
+ if (!getBaseOffset(L0, SP, SPOffset0) || SP->getReg().id() != ARM::SP)
+ SP = nullptr;
+ SPvalid = true;
+ }
+ if (SP) {
+ int64_t SPOffset1;
+ const MachineOperand *SP1;
+ if (getBaseOffset(*L1, SP1, SPOffset1) && SP1->getReg().id() == ARM::SP)
+ return CheckOffsets(SPOffset0, SPOffset1);
+ }
+ }
+
+ return NoHazard;
+}
+
+void ARMBankConflictHazardRecognizer::Reset() { Accesses.clear(); }
+
+void ARMBankConflictHazardRecognizer::EmitInstruction(SUnit *SU) {
+ MachineInstr &MI = *SU->getInstr();
+ if (!MI.mayLoad() || MI.mayStore() || MI.getNumMemOperands() != 1)
+ return;
+
+ auto MO = *MI.memoperands().begin();
+ uint64_t Size1 = MO->getSize();
+ if (Size1 > 4)
+ return;
+ Accesses.push_back(&MI);
+}
+
+void ARMBankConflictHazardRecognizer::AdvanceCycle() { Accesses.clear(); }
+
+void ARMBankConflictHazardRecognizer::RecedeCycle() { Accesses.clear(); }
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMHazardRecognizer.h b/contrib/libs/llvm12/lib/Target/ARM/ARMHazardRecognizer.h
index c1f1bcd0a6..e6b5304488 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMHazardRecognizer.h
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMHazardRecognizer.h
@@ -13,28 +13,28 @@
#ifndef LLVM_LIB_TARGET_ARM_ARMHAZARDRECOGNIZER_H
#define LLVM_LIB_TARGET_ARM_ARMHAZARDRECOGNIZER_H
-#include "ARMBaseInstrInfo.h"
-#include "llvm/ADT/BitmaskEnum.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
-#include "llvm/Support/DataTypes.h"
-#include <array>
-#include <initializer_list>
+#include "ARMBaseInstrInfo.h"
+#include "llvm/ADT/BitmaskEnum.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/Support/DataTypes.h"
+#include <array>
+#include <initializer_list>
namespace llvm {
-class DataLayout;
-class MachineFunction;
+class DataLayout;
+class MachineFunction;
class MachineInstr;
-class ScheduleDAG;
+class ScheduleDAG;
-// Hazards related to FP MLx instructions
-class ARMHazardRecognizerFPMLx : public ScheduleHazardRecognizer {
+// Hazards related to FP MLx instructions
+class ARMHazardRecognizerFPMLx : public ScheduleHazardRecognizer {
MachineInstr *LastMI = nullptr;
unsigned FpMLxStalls = 0;
public:
- ARMHazardRecognizerFPMLx() : ScheduleHazardRecognizer() { MaxLookAhead = 1; }
+ ARMHazardRecognizerFPMLx() : ScheduleHazardRecognizer() { MaxLookAhead = 1; }
HazardType getHazardType(SUnit *SU, int Stalls) override;
void Reset() override;
@@ -43,27 +43,27 @@ public:
void RecedeCycle() override;
};
-// Hazards related to bank conflicts
-class ARMBankConflictHazardRecognizer : public ScheduleHazardRecognizer {
- SmallVector<MachineInstr *, 8> Accesses;
- const MachineFunction &MF;
- const DataLayout &DL;
- int64_t DataMask;
- bool AssumeITCMBankConflict;
-
-public:
- ARMBankConflictHazardRecognizer(const ScheduleDAG *DAG, int64_t DDM,
- bool ABC);
- HazardType getHazardType(SUnit *SU, int Stalls) override;
- void Reset() override;
- void EmitInstruction(SUnit *SU) override;
- void AdvanceCycle() override;
- void RecedeCycle() override;
-
-private:
- inline HazardType CheckOffsets(unsigned O0, unsigned O1);
-};
-
+// Hazards related to bank conflicts
+class ARMBankConflictHazardRecognizer : public ScheduleHazardRecognizer {
+ SmallVector<MachineInstr *, 8> Accesses;
+ const MachineFunction &MF;
+ const DataLayout &DL;
+ int64_t DataMask;
+ bool AssumeITCMBankConflict;
+
+public:
+ ARMBankConflictHazardRecognizer(const ScheduleDAG *DAG, int64_t DDM,
+ bool ABC);
+ HazardType getHazardType(SUnit *SU, int Stalls) override;
+ void Reset() override;
+ void EmitInstruction(SUnit *SU) override;
+ void AdvanceCycle() override;
+ void RecedeCycle() override;
+
+private:
+ inline HazardType CheckOffsets(unsigned O0, unsigned O1);
+};
+
} // end namespace llvm
#endif
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMISelLowering.cpp b/contrib/libs/llvm12/lib/Target/ARM/ARMISelLowering.cpp
index 598062672a..2daf77fb5e 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMISelLowering.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMISelLowering.cpp
@@ -143,7 +143,7 @@ static cl::opt<unsigned> ConstpoolPromotionMaxTotal(
cl::desc("Maximum size of ALL constants to promote into a constant pool"),
cl::init(128));
-cl::opt<unsigned>
+cl::opt<unsigned>
MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden,
cl::desc("Maximum interleave factor for MVE VLDn to generate."),
cl::init(2));
@@ -289,8 +289,8 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
setOperationAction(ISD::UDIVREM, VT, Expand);
setOperationAction(ISD::SDIVREM, VT, Expand);
setOperationAction(ISD::CTPOP, VT, Expand);
- setOperationAction(ISD::SELECT, VT, Expand);
- setOperationAction(ISD::SELECT_CC, VT, Expand);
+ setOperationAction(ISD::SELECT, VT, Expand);
+ setOperationAction(ISD::SELECT_CC, VT, Expand);
// Vector reductions
setOperationAction(ISD::VECREDUCE_ADD, VT, Legal);
@@ -337,8 +337,8 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::MLOAD, VT, Custom);
setOperationAction(ISD::MSTORE, VT, Legal);
- setOperationAction(ISD::SELECT, VT, Expand);
- setOperationAction(ISD::SELECT_CC, VT, Expand);
+ setOperationAction(ISD::SELECT, VT, Expand);
+ setOperationAction(ISD::SELECT_CC, VT, Expand);
// Pre and Post inc are supported on loads and stores
for (unsigned im = (unsigned)ISD::PRE_INC;
@@ -443,9 +443,9 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
setOperationAction(ISD::LOAD, VT, Custom);
setOperationAction(ISD::STORE, VT, Custom);
- setOperationAction(ISD::TRUNCATE, VT, Custom);
- setOperationAction(ISD::VSELECT, VT, Expand);
- setOperationAction(ISD::SELECT, VT, Expand);
+ setOperationAction(ISD::TRUNCATE, VT, Custom);
+ setOperationAction(ISD::VSELECT, VT, Expand);
+ setOperationAction(ISD::SELECT, VT, Expand);
}
}
@@ -994,8 +994,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::SMAX);
setTargetDAGCombine(ISD::UMAX);
setTargetDAGCombine(ISD::FP_EXTEND);
- setTargetDAGCombine(ISD::SELECT);
- setTargetDAGCombine(ISD::SELECT_CC);
+ setTargetDAGCombine(ISD::SELECT);
+ setTargetDAGCombine(ISD::SELECT_CC);
}
if (!Subtarget->hasFP64()) {
@@ -1725,11 +1725,11 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::VCVTL: return "ARMISD::VCVTL";
case ARMISD::VMULLs: return "ARMISD::VMULLs";
case ARMISD::VMULLu: return "ARMISD::VMULLu";
- case ARMISD::VQDMULH: return "ARMISD::VQDMULH";
+ case ARMISD::VQDMULH: return "ARMISD::VQDMULH";
case ARMISD::VADDVs: return "ARMISD::VADDVs";
case ARMISD::VADDVu: return "ARMISD::VADDVu";
- case ARMISD::VADDVps: return "ARMISD::VADDVps";
- case ARMISD::VADDVpu: return "ARMISD::VADDVpu";
+ case ARMISD::VADDVps: return "ARMISD::VADDVps";
+ case ARMISD::VADDVpu: return "ARMISD::VADDVpu";
case ARMISD::VADDLVs: return "ARMISD::VADDLVs";
case ARMISD::VADDLVu: return "ARMISD::VADDLVu";
case ARMISD::VADDLVAs: return "ARMISD::VADDLVAs";
@@ -1740,20 +1740,20 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::VADDLVApu: return "ARMISD::VADDLVApu";
case ARMISD::VMLAVs: return "ARMISD::VMLAVs";
case ARMISD::VMLAVu: return "ARMISD::VMLAVu";
- case ARMISD::VMLAVps: return "ARMISD::VMLAVps";
- case ARMISD::VMLAVpu: return "ARMISD::VMLAVpu";
+ case ARMISD::VMLAVps: return "ARMISD::VMLAVps";
+ case ARMISD::VMLAVpu: return "ARMISD::VMLAVpu";
case ARMISD::VMLALVs: return "ARMISD::VMLALVs";
case ARMISD::VMLALVu: return "ARMISD::VMLALVu";
- case ARMISD::VMLALVps: return "ARMISD::VMLALVps";
- case ARMISD::VMLALVpu: return "ARMISD::VMLALVpu";
+ case ARMISD::VMLALVps: return "ARMISD::VMLALVps";
+ case ARMISD::VMLALVpu: return "ARMISD::VMLALVpu";
case ARMISD::VMLALVAs: return "ARMISD::VMLALVAs";
case ARMISD::VMLALVAu: return "ARMISD::VMLALVAu";
- case ARMISD::VMLALVAps: return "ARMISD::VMLALVAps";
- case ARMISD::VMLALVApu: return "ARMISD::VMLALVApu";
- case ARMISD::VMINVu: return "ARMISD::VMINVu";
- case ARMISD::VMINVs: return "ARMISD::VMINVs";
- case ARMISD::VMAXVu: return "ARMISD::VMAXVu";
- case ARMISD::VMAXVs: return "ARMISD::VMAXVs";
+ case ARMISD::VMLALVAps: return "ARMISD::VMLALVAps";
+ case ARMISD::VMLALVApu: return "ARMISD::VMLALVApu";
+ case ARMISD::VMINVu: return "ARMISD::VMINVu";
+ case ARMISD::VMINVs: return "ARMISD::VMINVs";
+ case ARMISD::VMAXVu: return "ARMISD::VMAXVu";
+ case ARMISD::VMAXVs: return "ARMISD::VMAXVs";
case ARMISD::UMAAL: return "ARMISD::UMAAL";
case ARMISD::UMLAL: return "ARMISD::UMLAL";
case ARMISD::SMLAL: return "ARMISD::SMLAL";
@@ -1777,7 +1777,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::BFI: return "ARMISD::BFI";
case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
case ARMISD::VBICIMM: return "ARMISD::VBICIMM";
- case ARMISD::VBSP: return "ARMISD::VBSP";
+ case ARMISD::VBSP: return "ARMISD::VBSP";
case ARMISD::MEMCPY: return "ARMISD::MEMCPY";
case ARMISD::VLD1DUP: return "ARMISD::VLD1DUP";
case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP";
@@ -2531,9 +2531,9 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY));
Callee = DAG.getLoad(
PtrVt, dl, DAG.getEntryNode(), Callee,
- MachinePointerInfo::getGOT(DAG.getMachineFunction()), MaybeAlign(),
- MachineMemOperand::MODereferenceable |
- MachineMemOperand::MOInvariant);
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()), MaybeAlign(),
+ MachineMemOperand::MODereferenceable |
+ MachineMemOperand::MOInvariant);
} else if (Subtarget->isTargetCOFF()) {
assert(Subtarget->isTargetWindows() &&
"Windows is the only supported COFF target");
@@ -3342,7 +3342,7 @@ ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
SDValue Chain = DAG.getEntryNode();
SDValue FuncTLVGet = DAG.getLoad(
MVT::i32, DL, Chain, DescAddr,
- MachinePointerInfo::getGOT(DAG.getMachineFunction()), Align(4),
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()), Align(4),
MachineMemOperand::MONonTemporal | MachineMemOperand::MODereferenceable |
MachineMemOperand::MOInvariant);
Chain = FuncTLVGet.getValue(1);
@@ -3556,7 +3556,7 @@ static bool allUsersAreInFunction(const Value *V, const Function *F) {
while (!Worklist.empty()) {
auto *U = Worklist.pop_back_val();
if (isa<ConstantExpr>(U)) {
- append_range(Worklist, U->users());
+ append_range(Worklist, U->users());
continue;
}
@@ -4443,26 +4443,26 @@ SDValue ARMTargetLowering::LowerFormalArguments(
}
// varargs
- if (isVarArg && MFI.hasVAStart()) {
- VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.getNextStackOffset(),
+ if (isVarArg && MFI.hasVAStart()) {
+ VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.getNextStackOffset(),
TotalArgRegsSaveSize);
- if (AFI->isCmseNSEntryFunction()) {
- DiagnosticInfoUnsupported Diag(
- DAG.getMachineFunction().getFunction(),
- "secure entry function must not be variadic", dl.getDebugLoc());
- DAG.getContext()->diagnose(Diag);
- }
- }
+ if (AFI->isCmseNSEntryFunction()) {
+ DiagnosticInfoUnsupported Diag(
+ DAG.getMachineFunction().getFunction(),
+ "secure entry function must not be variadic", dl.getDebugLoc());
+ DAG.getContext()->diagnose(Diag);
+ }
+ }
AFI->setArgumentStackSize(CCInfo.getNextStackOffset());
- if (CCInfo.getNextStackOffset() > 0 && AFI->isCmseNSEntryFunction()) {
- DiagnosticInfoUnsupported Diag(
- DAG.getMachineFunction().getFunction(),
- "secure entry function requires arguments on stack", dl.getDebugLoc());
- DAG.getContext()->diagnose(Diag);
- }
-
+ if (CCInfo.getNextStackOffset() > 0 && AFI->isCmseNSEntryFunction()) {
+ DiagnosticInfoUnsupported Diag(
+ DAG.getMachineFunction().getFunction(),
+ "secure entry function requires arguments on stack", dl.getDebugLoc());
+ DAG.getContext()->diagnose(Diag);
+ }
+
return Chain;
}
@@ -5034,68 +5034,68 @@ static bool isLowerSaturate(const SDValue LHS, const SDValue RHS,
// x < k ? (x < -k ? -k : x) : k
// etc.
//
-// LLVM canonicalizes these to either a min(max()) or a max(min())
-// pattern. This function tries to match one of these and will return a SSAT
-// node if successful.
+// LLVM canonicalizes these to either a min(max()) or a max(min())
+// pattern. This function tries to match one of these and will return a SSAT
+// node if successful.
//
-// USAT works similarily to SSAT but bounds on the interval [0, k] where k + 1
-// is a power of 2.
-static SDValue LowerSaturatingConditional(SDValue Op, SelectionDAG &DAG) {
- EVT VT = Op.getValueType();
- SDValue V1 = Op.getOperand(0);
- SDValue K1 = Op.getOperand(1);
+// USAT works similarily to SSAT but bounds on the interval [0, k] where k + 1
+// is a power of 2.
+static SDValue LowerSaturatingConditional(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+ SDValue V1 = Op.getOperand(0);
+ SDValue K1 = Op.getOperand(1);
SDValue TrueVal1 = Op.getOperand(2);
SDValue FalseVal1 = Op.getOperand(3);
ISD::CondCode CC1 = cast<CondCodeSDNode>(Op.getOperand(4))->get();
const SDValue Op2 = isa<ConstantSDNode>(TrueVal1) ? FalseVal1 : TrueVal1;
if (Op2.getOpcode() != ISD::SELECT_CC)
- return SDValue();
+ return SDValue();
- SDValue V2 = Op2.getOperand(0);
- SDValue K2 = Op2.getOperand(1);
+ SDValue V2 = Op2.getOperand(0);
+ SDValue K2 = Op2.getOperand(1);
SDValue TrueVal2 = Op2.getOperand(2);
SDValue FalseVal2 = Op2.getOperand(3);
ISD::CondCode CC2 = cast<CondCodeSDNode>(Op2.getOperand(4))->get();
- SDValue V1Tmp = V1;
- SDValue V2Tmp = V2;
+ SDValue V1Tmp = V1;
+ SDValue V2Tmp = V2;
- // Check that the registers and the constants match a max(min()) or min(max())
- // pattern
- if (V1Tmp != TrueVal1 || V2Tmp != TrueVal2 || K1 != FalseVal1 ||
- K2 != FalseVal2 ||
- !((isGTorGE(CC1) && isLTorLE(CC2)) || (isLTorLE(CC1) && isGTorGE(CC2))))
- return SDValue();
+ // Check that the registers and the constants match a max(min()) or min(max())
+ // pattern
+ if (V1Tmp != TrueVal1 || V2Tmp != TrueVal2 || K1 != FalseVal1 ||
+ K2 != FalseVal2 ||
+ !((isGTorGE(CC1) && isLTorLE(CC2)) || (isLTorLE(CC1) && isGTorGE(CC2))))
+ return SDValue();
// Check that the constant in the lower-bound check is
// the opposite of the constant in the upper-bound check
// in 1's complement.
- if (!isa<ConstantSDNode>(K1) || !isa<ConstantSDNode>(K2))
- return SDValue();
-
- int64_t Val1 = cast<ConstantSDNode>(K1)->getSExtValue();
- int64_t Val2 = cast<ConstantSDNode>(K2)->getSExtValue();
+ if (!isa<ConstantSDNode>(K1) || !isa<ConstantSDNode>(K2))
+ return SDValue();
+
+ int64_t Val1 = cast<ConstantSDNode>(K1)->getSExtValue();
+ int64_t Val2 = cast<ConstantSDNode>(K2)->getSExtValue();
int64_t PosVal = std::max(Val1, Val2);
int64_t NegVal = std::min(Val1, Val2);
- if (!((Val1 > Val2 && isLTorLE(CC1)) || (Val1 < Val2 && isLTorLE(CC2))) ||
- !isPowerOf2_64(PosVal + 1))
- return SDValue();
+ if (!((Val1 > Val2 && isLTorLE(CC1)) || (Val1 < Val2 && isLTorLE(CC2))) ||
+ !isPowerOf2_64(PosVal + 1))
+ return SDValue();
- // Handle the difference between USAT (unsigned) and SSAT (signed)
- // saturation
- // At this point, PosVal is guaranteed to be positive
- uint64_t K = PosVal;
- SDLoc dl(Op);
- if (Val1 == ~Val2)
- return DAG.getNode(ARMISD::SSAT, dl, VT, V2Tmp,
- DAG.getConstant(countTrailingOnes(K), dl, VT));
- if (NegVal == 0)
- return DAG.getNode(ARMISD::USAT, dl, VT, V2Tmp,
- DAG.getConstant(countTrailingOnes(K), dl, VT));
+ // Handle the difference between USAT (unsigned) and SSAT (signed)
+ // saturation
+ // At this point, PosVal is guaranteed to be positive
+ uint64_t K = PosVal;
+ SDLoc dl(Op);
+ if (Val1 == ~Val2)
+ return DAG.getNode(ARMISD::SSAT, dl, VT, V2Tmp,
+ DAG.getConstant(countTrailingOnes(K), dl, VT));
+ if (NegVal == 0)
+ return DAG.getNode(ARMISD::USAT, dl, VT, V2Tmp,
+ DAG.getConstant(countTrailingOnes(K), dl, VT));
- return SDValue();
+ return SDValue();
}
// Check if a condition of the type x < k ? k : x can be converted into a
@@ -5155,9 +5155,9 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
// Try to convert two saturating conditional selects into a single SSAT
- if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || Subtarget->isThumb2())
- if (SDValue SatValue = LowerSaturatingConditional(Op, DAG))
- return SatValue;
+ if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || Subtarget->isThumb2())
+ if (SDValue SatValue = LowerSaturatingConditional(Op, DAG))
+ return SatValue;
// Try to convert expressions of the form x < k ? k : x (and similar forms)
// into more efficient bit operations, which is possible when k is 0 or -1
@@ -5166,7 +5166,7 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
// instructions.
// Only allow this transformation on full-width (32-bit) operations
SDValue LowerSatConstant;
- SDValue SatValue;
+ SDValue SatValue;
if (VT == MVT::i32 &&
isLowerSaturatingConditional(Op, SatValue, LowerSatConstant)) {
SDValue ShiftV = DAG.getNode(ISD::SRA, dl, VT, SatValue,
@@ -7750,19 +7750,19 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
for (auto &Src : Sources) {
EVT SrcVT = Src.ShuffleVec.getValueType();
- uint64_t SrcVTSize = SrcVT.getFixedSizeInBits();
- uint64_t VTSize = VT.getFixedSizeInBits();
- if (SrcVTSize == VTSize)
+ uint64_t SrcVTSize = SrcVT.getFixedSizeInBits();
+ uint64_t VTSize = VT.getFixedSizeInBits();
+ if (SrcVTSize == VTSize)
continue;
// This stage of the search produces a source with the same element type as
// the original, but with a total width matching the BUILD_VECTOR output.
EVT EltVT = SrcVT.getVectorElementType();
- unsigned NumSrcElts = VTSize / EltVT.getFixedSizeInBits();
+ unsigned NumSrcElts = VTSize / EltVT.getFixedSizeInBits();
EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
- if (SrcVTSize < VTSize) {
- if (2 * SrcVTSize != VTSize)
+ if (SrcVTSize < VTSize) {
+ if (2 * SrcVTSize != VTSize)
return SDValue();
// We can pad out the smaller vector for free, so if it's part of a
// shuffle...
@@ -7772,7 +7772,7 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
continue;
}
- if (SrcVTSize != 2 * VTSize)
+ if (SrcVTSize != 2 * VTSize)
return SDValue();
if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
@@ -7840,7 +7840,7 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
// trunc. So only std::min(SrcBits, DestBits) actually get defined in this
// segment.
EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
- int BitsDefined = std::min(OrigEltTy.getScalarSizeInBits(),
+ int BitsDefined = std::min(OrigEltTy.getScalarSizeInBits(),
VT.getScalarSizeInBits());
int LanesDefined = BitsDefined / BitsPerShuffleLane;
@@ -8642,23 +8642,23 @@ static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG,
DAG.getConstant(ARMCC::NE, dl, MVT::i32));
}
-// Turn a truncate into a predicate (an i1 vector) into icmp(and(x, 1), 0).
-static SDValue LowerTruncatei1(SDValue N, SelectionDAG &DAG,
- const ARMSubtarget *ST) {
- assert(ST->hasMVEIntegerOps() && "Expected MVE!");
- EVT VT = N.getValueType();
- assert((VT == MVT::v16i1 || VT == MVT::v8i1 || VT == MVT::v4i1) &&
- "Expected a vector i1 type!");
- SDValue Op = N.getOperand(0);
- EVT FromVT = Op.getValueType();
- SDLoc DL(N);
-
- SDValue And =
- DAG.getNode(ISD::AND, DL, FromVT, Op, DAG.getConstant(1, DL, FromVT));
- return DAG.getNode(ISD::SETCC, DL, VT, And, DAG.getConstant(0, DL, FromVT),
- DAG.getCondCode(ISD::SETNE));
-}
-
+// Turn a truncate into a predicate (an i1 vector) into icmp(and(x, 1), 0).
+static SDValue LowerTruncatei1(SDValue N, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ assert(ST->hasMVEIntegerOps() && "Expected MVE!");
+ EVT VT = N.getValueType();
+ assert((VT == MVT::v16i1 || VT == MVT::v8i1 || VT == MVT::v4i1) &&
+ "Expected a vector i1 type!");
+ SDValue Op = N.getOperand(0);
+ EVT FromVT = Op.getValueType();
+ SDLoc DL(N);
+
+ SDValue And =
+ DAG.getNode(ISD::AND, DL, FromVT, Op, DAG.getConstant(1, DL, FromVT));
+ return DAG.getNode(ISD::SETCC, DL, VT, And, DAG.getConstant(0, DL, FromVT),
+ DAG.getCondCode(ISD::SETNE));
+}
+
/// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each
/// element has been zero/sign-extended, depending on the isSigned parameter,
/// from an integer type half its size.
@@ -8723,11 +8723,11 @@ static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
return false;
}
-/// isZeroExtended - Check if a node is a vector value that is zero-extended (or
-/// any-extended) or a constant BUILD_VECTOR with zero-extended elements.
+/// isZeroExtended - Check if a node is a vector value that is zero-extended (or
+/// any-extended) or a constant BUILD_VECTOR with zero-extended elements.
static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
- if (N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND ||
- ISD::isZEXTLoad(N))
+ if (N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND ||
+ ISD::isZEXTLoad(N))
return true;
if (isExtendedBUILD_VECTOR(N, DAG, false))
return true;
@@ -8795,14 +8795,14 @@ static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) {
}
/// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND,
-/// ANY_EXTEND, extending load, or BUILD_VECTOR with extended elements, return
-/// the unextended value. The unextended vector should be 64 bits so that it can
+/// ANY_EXTEND, extending load, or BUILD_VECTOR with extended elements, return
+/// the unextended value. The unextended vector should be 64 bits so that it can
/// be used as an operand to a VMULL instruction. If the original vector size
/// before extension is less than 64 bits we add a an extension to resize
/// the vector to 64 bits.
static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) {
- if (N->getOpcode() == ISD::SIGN_EXTEND ||
- N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND)
+ if (N->getOpcode() == ISD::SIGN_EXTEND ||
+ N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND)
return AddRequiredExtensionForVMULL(N->getOperand(0), DAG,
N->getOperand(0)->getValueType(0),
N->getValueType(0),
@@ -9770,7 +9770,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG, Subtarget);
case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG, Subtarget);
- case ISD::TRUNCATE: return LowerTruncatei1(Op, DAG, Subtarget);
+ case ISD::TRUNCATE: return LowerTruncatei1(Op, DAG, Subtarget);
case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
case ISD::MUL: return LowerMUL(Op, DAG);
case ISD::SDIV:
@@ -10403,7 +10403,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
// Remove the landing pad successor from the invoke block and replace it
// with the new dispatch block.
- SmallVector<MachineBasicBlock*, 4> Successors(BB->successors());
+ SmallVector<MachineBasicBlock*, 4> Successors(BB->successors());
while (!Successors.empty()) {
MachineBasicBlock *SMBB = Successors.pop_back_val();
if (SMBB->isEHPad()) {
@@ -10887,7 +10887,7 @@ ARMTargetLowering::EmitLowered__chkstk(MachineInstr &MI,
BuildMI(*MBB, MI, DL, TII.get(ARM::t2MOVi32imm), Reg)
.addExternalSymbol("__chkstk");
- BuildMI(*MBB, MI, DL, TII.get(gettBLXrOpcode(*MBB->getParent())))
+ BuildMI(*MBB, MI, DL, TII.get(gettBLXrOpcode(*MBB->getParent())))
.add(predOps(ARMCC::AL))
.addReg(Reg, RegState::Kill)
.addReg(ARM::R4, RegState::Implicit | RegState::Kill)
@@ -11266,14 +11266,14 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
return EmitLowered__chkstk(MI, BB);
case ARM::WIN__DBZCHK:
return EmitLowered__dbzchk(MI, BB);
- case ARM::t2DoLoopStart:
- // We are just here to set a register allocation hint, prefering lr for the
- // input register to make it more likely to be movable and removable, later
- // in the pipeline.
- Register R = MI.getOperand(1).getReg();
- MachineFunction *MF = MI.getParent()->getParent();
- MF->getRegInfo().setRegAllocationHint(R, ARMRI::RegLR, 0);
- return BB;
+ case ARM::t2DoLoopStart:
+ // We are just here to set a register allocation hint, prefering lr for the
+ // input register to make it more likely to be movable and removable, later
+ // in the pipeline.
+ Register R = MI.getOperand(1).getReg();
+ MachineFunction *MF = MI.getParent()->getParent();
+ MF->getRegInfo().setRegAllocationHint(R, ARMRI::RegLR, 0);
+ return BB;
}
}
@@ -12115,198 +12115,198 @@ static SDValue PerformAddeSubeCombine(SDNode *N,
return SDValue();
}
-static SDValue PerformSELECTCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI,
- const ARMSubtarget *Subtarget) {
- if (!Subtarget->hasMVEIntegerOps())
- return SDValue();
-
- SDLoc dl(N);
- SDValue SetCC;
- SDValue LHS;
- SDValue RHS;
- ISD::CondCode CC;
- SDValue TrueVal;
- SDValue FalseVal;
-
- if (N->getOpcode() == ISD::SELECT &&
- N->getOperand(0)->getOpcode() == ISD::SETCC) {
- SetCC = N->getOperand(0);
- LHS = SetCC->getOperand(0);
- RHS = SetCC->getOperand(1);
- CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
- TrueVal = N->getOperand(1);
- FalseVal = N->getOperand(2);
- } else if (N->getOpcode() == ISD::SELECT_CC) {
- LHS = N->getOperand(0);
- RHS = N->getOperand(1);
- CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
- TrueVal = N->getOperand(2);
- FalseVal = N->getOperand(3);
- } else {
- return SDValue();
- }
-
- unsigned int Opcode = 0;
- if ((TrueVal->getOpcode() == ISD::VECREDUCE_UMIN ||
- FalseVal->getOpcode() == ISD::VECREDUCE_UMIN) &&
- (CC == ISD::SETULT || CC == ISD::SETUGT)) {
- Opcode = ARMISD::VMINVu;
- if (CC == ISD::SETUGT)
- std::swap(TrueVal, FalseVal);
- } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_SMIN ||
- FalseVal->getOpcode() == ISD::VECREDUCE_SMIN) &&
- (CC == ISD::SETLT || CC == ISD::SETGT)) {
- Opcode = ARMISD::VMINVs;
- if (CC == ISD::SETGT)
- std::swap(TrueVal, FalseVal);
- } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_UMAX ||
- FalseVal->getOpcode() == ISD::VECREDUCE_UMAX) &&
- (CC == ISD::SETUGT || CC == ISD::SETULT)) {
- Opcode = ARMISD::VMAXVu;
- if (CC == ISD::SETULT)
- std::swap(TrueVal, FalseVal);
- } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_SMAX ||
- FalseVal->getOpcode() == ISD::VECREDUCE_SMAX) &&
- (CC == ISD::SETGT || CC == ISD::SETLT)) {
- Opcode = ARMISD::VMAXVs;
- if (CC == ISD::SETLT)
- std::swap(TrueVal, FalseVal);
- } else
- return SDValue();
-
- // Normalise to the right hand side being the vector reduction
- switch (TrueVal->getOpcode()) {
- case ISD::VECREDUCE_UMIN:
- case ISD::VECREDUCE_SMIN:
- case ISD::VECREDUCE_UMAX:
- case ISD::VECREDUCE_SMAX:
- std::swap(LHS, RHS);
- std::swap(TrueVal, FalseVal);
- break;
- }
-
- EVT VectorType = FalseVal->getOperand(0).getValueType();
-
- if (VectorType != MVT::v16i8 && VectorType != MVT::v8i16 &&
- VectorType != MVT::v4i32)
- return SDValue();
-
- EVT VectorScalarType = VectorType.getVectorElementType();
-
- // The values being selected must also be the ones being compared
- if (TrueVal != LHS || FalseVal != RHS)
- return SDValue();
-
- EVT LeftType = LHS->getValueType(0);
- EVT RightType = RHS->getValueType(0);
-
- // The types must match the reduced type too
- if (LeftType != VectorScalarType || RightType != VectorScalarType)
- return SDValue();
-
- // Legalise the scalar to an i32
- if (VectorScalarType != MVT::i32)
- LHS = DCI.DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS);
-
- // Generate the reduction as an i32 for legalisation purposes
- auto Reduction =
- DCI.DAG.getNode(Opcode, dl, MVT::i32, LHS, RHS->getOperand(0));
-
- // The result isn't actually an i32 so truncate it back to its original type
- if (VectorScalarType != MVT::i32)
- Reduction = DCI.DAG.getNode(ISD::TRUNCATE, dl, VectorScalarType, Reduction);
-
- return Reduction;
-}
-
-// A special combine for the vqdmulh family of instructions. This is one of the
-// potential set of patterns that could patch this instruction. The base pattern
-// you would expect to be min(max(ashr(mul(mul(sext(x), 2), sext(y)), 16))).
-// This matches the different min(max(ashr(mul(mul(sext(x), sext(y)), 2), 16))),
-// which llvm will have optimized to min(ashr(mul(sext(x), sext(y)), 15))) as
-// the max is unnecessary.
-static SDValue PerformVQDMULHCombine(SDNode *N, SelectionDAG &DAG) {
- EVT VT = N->getValueType(0);
- SDValue Shft;
- ConstantSDNode *Clamp;
-
- if (N->getOpcode() == ISD::SMIN) {
- Shft = N->getOperand(0);
- Clamp = isConstOrConstSplat(N->getOperand(1));
- } else if (N->getOpcode() == ISD::VSELECT) {
- // Detect a SMIN, which for an i64 node will be a vselect/setcc, not a smin.
- SDValue Cmp = N->getOperand(0);
- if (Cmp.getOpcode() != ISD::SETCC ||
- cast<CondCodeSDNode>(Cmp.getOperand(2))->get() != ISD::SETLT ||
- Cmp.getOperand(0) != N->getOperand(1) ||
- Cmp.getOperand(1) != N->getOperand(2))
- return SDValue();
- Shft = N->getOperand(1);
- Clamp = isConstOrConstSplat(N->getOperand(2));
- } else
- return SDValue();
-
- if (!Clamp)
- return SDValue();
-
- MVT ScalarType;
- int ShftAmt = 0;
- switch (Clamp->getSExtValue()) {
- case (1 << 7) - 1:
- ScalarType = MVT::i8;
- ShftAmt = 7;
- break;
- case (1 << 15) - 1:
- ScalarType = MVT::i16;
- ShftAmt = 15;
- break;
- case (1ULL << 31) - 1:
- ScalarType = MVT::i32;
- ShftAmt = 31;
- break;
- default:
- return SDValue();
- }
-
- if (Shft.getOpcode() != ISD::SRA)
- return SDValue();
- ConstantSDNode *N1 = isConstOrConstSplat(Shft.getOperand(1));
- if (!N1 || N1->getSExtValue() != ShftAmt)
- return SDValue();
-
- SDValue Mul = Shft.getOperand(0);
- if (Mul.getOpcode() != ISD::MUL)
- return SDValue();
-
- SDValue Ext0 = Mul.getOperand(0);
- SDValue Ext1 = Mul.getOperand(1);
- if (Ext0.getOpcode() != ISD::SIGN_EXTEND ||
- Ext1.getOpcode() != ISD::SIGN_EXTEND)
- return SDValue();
- EVT VecVT = Ext0.getOperand(0).getValueType();
- if (VecVT != MVT::v4i32 && VecVT != MVT::v8i16 && VecVT != MVT::v16i8)
- return SDValue();
- if (Ext1.getOperand(0).getValueType() != VecVT ||
- VecVT.getScalarType() != ScalarType ||
- VT.getScalarSizeInBits() < ScalarType.getScalarSizeInBits() * 2)
- return SDValue();
-
- SDLoc DL(Mul);
- SDValue VQDMULH = DAG.getNode(ARMISD::VQDMULH, DL, VecVT, Ext0.getOperand(0),
- Ext1.getOperand(0));
- return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, VQDMULH);
-}
-
+static SDValue PerformSELECTCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ if (!Subtarget->hasMVEIntegerOps())
+ return SDValue();
+
+ SDLoc dl(N);
+ SDValue SetCC;
+ SDValue LHS;
+ SDValue RHS;
+ ISD::CondCode CC;
+ SDValue TrueVal;
+ SDValue FalseVal;
+
+ if (N->getOpcode() == ISD::SELECT &&
+ N->getOperand(0)->getOpcode() == ISD::SETCC) {
+ SetCC = N->getOperand(0);
+ LHS = SetCC->getOperand(0);
+ RHS = SetCC->getOperand(1);
+ CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
+ TrueVal = N->getOperand(1);
+ FalseVal = N->getOperand(2);
+ } else if (N->getOpcode() == ISD::SELECT_CC) {
+ LHS = N->getOperand(0);
+ RHS = N->getOperand(1);
+ CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
+ TrueVal = N->getOperand(2);
+ FalseVal = N->getOperand(3);
+ } else {
+ return SDValue();
+ }
+
+ unsigned int Opcode = 0;
+ if ((TrueVal->getOpcode() == ISD::VECREDUCE_UMIN ||
+ FalseVal->getOpcode() == ISD::VECREDUCE_UMIN) &&
+ (CC == ISD::SETULT || CC == ISD::SETUGT)) {
+ Opcode = ARMISD::VMINVu;
+ if (CC == ISD::SETUGT)
+ std::swap(TrueVal, FalseVal);
+ } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_SMIN ||
+ FalseVal->getOpcode() == ISD::VECREDUCE_SMIN) &&
+ (CC == ISD::SETLT || CC == ISD::SETGT)) {
+ Opcode = ARMISD::VMINVs;
+ if (CC == ISD::SETGT)
+ std::swap(TrueVal, FalseVal);
+ } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_UMAX ||
+ FalseVal->getOpcode() == ISD::VECREDUCE_UMAX) &&
+ (CC == ISD::SETUGT || CC == ISD::SETULT)) {
+ Opcode = ARMISD::VMAXVu;
+ if (CC == ISD::SETULT)
+ std::swap(TrueVal, FalseVal);
+ } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_SMAX ||
+ FalseVal->getOpcode() == ISD::VECREDUCE_SMAX) &&
+ (CC == ISD::SETGT || CC == ISD::SETLT)) {
+ Opcode = ARMISD::VMAXVs;
+ if (CC == ISD::SETLT)
+ std::swap(TrueVal, FalseVal);
+ } else
+ return SDValue();
+
+ // Normalise to the right hand side being the vector reduction
+ switch (TrueVal->getOpcode()) {
+ case ISD::VECREDUCE_UMIN:
+ case ISD::VECREDUCE_SMIN:
+ case ISD::VECREDUCE_UMAX:
+ case ISD::VECREDUCE_SMAX:
+ std::swap(LHS, RHS);
+ std::swap(TrueVal, FalseVal);
+ break;
+ }
+
+ EVT VectorType = FalseVal->getOperand(0).getValueType();
+
+ if (VectorType != MVT::v16i8 && VectorType != MVT::v8i16 &&
+ VectorType != MVT::v4i32)
+ return SDValue();
+
+ EVT VectorScalarType = VectorType.getVectorElementType();
+
+ // The values being selected must also be the ones being compared
+ if (TrueVal != LHS || FalseVal != RHS)
+ return SDValue();
+
+ EVT LeftType = LHS->getValueType(0);
+ EVT RightType = RHS->getValueType(0);
+
+ // The types must match the reduced type too
+ if (LeftType != VectorScalarType || RightType != VectorScalarType)
+ return SDValue();
+
+ // Legalise the scalar to an i32
+ if (VectorScalarType != MVT::i32)
+ LHS = DCI.DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS);
+
+ // Generate the reduction as an i32 for legalisation purposes
+ auto Reduction =
+ DCI.DAG.getNode(Opcode, dl, MVT::i32, LHS, RHS->getOperand(0));
+
+ // The result isn't actually an i32 so truncate it back to its original type
+ if (VectorScalarType != MVT::i32)
+ Reduction = DCI.DAG.getNode(ISD::TRUNCATE, dl, VectorScalarType, Reduction);
+
+ return Reduction;
+}
+
+// A special combine for the vqdmulh family of instructions. This is one of the
+// potential set of patterns that could patch this instruction. The base pattern
+// you would expect to be min(max(ashr(mul(mul(sext(x), 2), sext(y)), 16))).
+// This matches the different min(max(ashr(mul(mul(sext(x), sext(y)), 2), 16))),
+// which llvm will have optimized to min(ashr(mul(sext(x), sext(y)), 15))) as
+// the max is unnecessary.
+static SDValue PerformVQDMULHCombine(SDNode *N, SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+ SDValue Shft;
+ ConstantSDNode *Clamp;
+
+ if (N->getOpcode() == ISD::SMIN) {
+ Shft = N->getOperand(0);
+ Clamp = isConstOrConstSplat(N->getOperand(1));
+ } else if (N->getOpcode() == ISD::VSELECT) {
+ // Detect a SMIN, which for an i64 node will be a vselect/setcc, not a smin.
+ SDValue Cmp = N->getOperand(0);
+ if (Cmp.getOpcode() != ISD::SETCC ||
+ cast<CondCodeSDNode>(Cmp.getOperand(2))->get() != ISD::SETLT ||
+ Cmp.getOperand(0) != N->getOperand(1) ||
+ Cmp.getOperand(1) != N->getOperand(2))
+ return SDValue();
+ Shft = N->getOperand(1);
+ Clamp = isConstOrConstSplat(N->getOperand(2));
+ } else
+ return SDValue();
+
+ if (!Clamp)
+ return SDValue();
+
+ MVT ScalarType;
+ int ShftAmt = 0;
+ switch (Clamp->getSExtValue()) {
+ case (1 << 7) - 1:
+ ScalarType = MVT::i8;
+ ShftAmt = 7;
+ break;
+ case (1 << 15) - 1:
+ ScalarType = MVT::i16;
+ ShftAmt = 15;
+ break;
+ case (1ULL << 31) - 1:
+ ScalarType = MVT::i32;
+ ShftAmt = 31;
+ break;
+ default:
+ return SDValue();
+ }
+
+ if (Shft.getOpcode() != ISD::SRA)
+ return SDValue();
+ ConstantSDNode *N1 = isConstOrConstSplat(Shft.getOperand(1));
+ if (!N1 || N1->getSExtValue() != ShftAmt)
+ return SDValue();
+
+ SDValue Mul = Shft.getOperand(0);
+ if (Mul.getOpcode() != ISD::MUL)
+ return SDValue();
+
+ SDValue Ext0 = Mul.getOperand(0);
+ SDValue Ext1 = Mul.getOperand(1);
+ if (Ext0.getOpcode() != ISD::SIGN_EXTEND ||
+ Ext1.getOpcode() != ISD::SIGN_EXTEND)
+ return SDValue();
+ EVT VecVT = Ext0.getOperand(0).getValueType();
+ if (VecVT != MVT::v4i32 && VecVT != MVT::v8i16 && VecVT != MVT::v16i8)
+ return SDValue();
+ if (Ext1.getOperand(0).getValueType() != VecVT ||
+ VecVT.getScalarType() != ScalarType ||
+ VT.getScalarSizeInBits() < ScalarType.getScalarSizeInBits() * 2)
+ return SDValue();
+
+ SDLoc DL(Mul);
+ SDValue VQDMULH = DAG.getNode(ARMISD::VQDMULH, DL, VecVT, Ext0.getOperand(0),
+ Ext1.getOperand(0));
+ return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, VQDMULH);
+}
+
static SDValue PerformVSELECTCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
- if (!Subtarget->hasMVEIntegerOps())
- return SDValue();
-
- if (SDValue V = PerformVQDMULHCombine(N, DCI.DAG))
- return V;
-
+ if (!Subtarget->hasMVEIntegerOps())
+ return SDValue();
+
+ if (SDValue V = PerformVQDMULHCombine(N, DCI.DAG))
+ return V;
+
// Transforms vselect(not(cond), lhs, rhs) into vselect(cond, rhs, lhs).
//
// We need to re-implement this optimization here as the implementation in the
@@ -12456,14 +12456,14 @@ static SDValue PerformADDVecReduce(SDNode *N,
return M;
if (SDValue M = MakeVecReduce(ARMISD::VMLALVu, ARMISD::VMLALVAu, N1, N0))
return M;
- if (SDValue M = MakeVecReduce(ARMISD::VMLALVps, ARMISD::VMLALVAps, N0, N1))
- return M;
- if (SDValue M = MakeVecReduce(ARMISD::VMLALVpu, ARMISD::VMLALVApu, N0, N1))
- return M;
- if (SDValue M = MakeVecReduce(ARMISD::VMLALVps, ARMISD::VMLALVAps, N1, N0))
- return M;
- if (SDValue M = MakeVecReduce(ARMISD::VMLALVpu, ARMISD::VMLALVApu, N1, N0))
- return M;
+ if (SDValue M = MakeVecReduce(ARMISD::VMLALVps, ARMISD::VMLALVAps, N0, N1))
+ return M;
+ if (SDValue M = MakeVecReduce(ARMISD::VMLALVpu, ARMISD::VMLALVApu, N0, N1))
+ return M;
+ if (SDValue M = MakeVecReduce(ARMISD::VMLALVps, ARMISD::VMLALVAps, N1, N0))
+ return M;
+ if (SDValue M = MakeVecReduce(ARMISD::VMLALVpu, ARMISD::VMLALVApu, N1, N0))
+ return M;
return SDValue();
}
@@ -13358,7 +13358,7 @@ static SDValue PerformORCombine(SDNode *N,
// Canonicalize the vector type to make instruction selection
// simpler.
EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
- SDValue Result = DAG.getNode(ARMISD::VBSP, dl, CanonicalVT,
+ SDValue Result = DAG.getNode(ARMISD::VBSP, dl, CanonicalVT,
N0->getOperand(1),
N0->getOperand(0),
N1->getOperand(0));
@@ -13669,12 +13669,12 @@ static SDValue PerformVMOVrhCombine(SDNode *N,
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
- // fold (VMOVrh (fpconst x)) -> const x
- if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N0)) {
- APFloat V = C->getValueAPF();
- return DCI.DAG.getConstant(V.bitcastToAPInt().getZExtValue(), SDLoc(N), VT);
- }
-
+ // fold (VMOVrh (fpconst x)) -> const x
+ if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N0)) {
+ APFloat V = C->getValueAPF();
+ return DCI.DAG.getConstant(V.bitcastToAPInt().getZExtValue(), SDLoc(N), VT);
+ }
+
// fold (VMOVrh (load x)) -> (zextload (i16*)x)
if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse()) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
@@ -13849,23 +13849,23 @@ PerformPREDICATE_CASTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
return DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, Op->getOperand(0));
}
- // Turn pred_cast(xor x, -1) into xor(pred_cast x, -1), in order to produce
- // more VPNOT which might get folded as else predicates.
- if (Op.getValueType() == MVT::i32 && isBitwiseNot(Op)) {
- SDValue X =
- DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, Op->getOperand(0));
- SDValue C = DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT,
- DCI.DAG.getConstant(65535, dl, MVT::i32));
- return DCI.DAG.getNode(ISD::XOR, dl, VT, X, C);
- }
-
- // Only the bottom 16 bits of the source register are used.
- if (Op.getValueType() == MVT::i32) {
- APInt DemandedMask = APInt::getLowBitsSet(32, 16);
- const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
- if (TLI.SimplifyDemandedBits(Op, DemandedMask, DCI))
- return SDValue(N, 0);
- }
+ // Turn pred_cast(xor x, -1) into xor(pred_cast x, -1), in order to produce
+ // more VPNOT which might get folded as else predicates.
+ if (Op.getValueType() == MVT::i32 && isBitwiseNot(Op)) {
+ SDValue X =
+ DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, Op->getOperand(0));
+ SDValue C = DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT,
+ DCI.DAG.getConstant(65535, dl, MVT::i32));
+ return DCI.DAG.getNode(ISD::XOR, dl, VT, X, C);
+ }
+
+ // Only the bottom 16 bits of the source register are used.
+ if (Op.getValueType() == MVT::i32) {
+ APInt DemandedMask = APInt::getLowBitsSet(32, 16);
+ const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
+ if (TLI.SimplifyDemandedBits(Op, DemandedMask, DCI))
+ return SDValue(N, 0);
+ }
return SDValue();
}
@@ -14078,13 +14078,13 @@ static SDValue CombineBaseUpdate(SDNode *N,
NumVecs = 3; break;
case Intrinsic::arm_neon_vld4: NewOpc = ARMISD::VLD4_UPD;
NumVecs = 4; break;
- case Intrinsic::arm_neon_vld1x2:
- case Intrinsic::arm_neon_vld1x3:
- case Intrinsic::arm_neon_vld1x4:
+ case Intrinsic::arm_neon_vld1x2:
+ case Intrinsic::arm_neon_vld1x3:
+ case Intrinsic::arm_neon_vld1x4:
case Intrinsic::arm_neon_vld2dup:
case Intrinsic::arm_neon_vld3dup:
case Intrinsic::arm_neon_vld4dup:
- // TODO: Support updating VLD1x and VLDxDUP nodes. For now, we just skip
+ // TODO: Support updating VLD1x and VLDxDUP nodes. For now, we just skip
// combining base updates for such intrinsics.
continue;
case Intrinsic::arm_neon_vld2lane: NewOpc = ARMISD::VLD2LN_UPD;
@@ -14676,39 +14676,39 @@ static SDValue PerformSplittingToNarrowingStores(StoreSDNode *St,
// use the VMOVN over splitting the store. We are looking for patterns of:
// !rev: 0 N 1 N+1 2 N+2 ...
// rev: N 0 N+1 1 N+2 2 ...
- // The shuffle may either be a single source (in which case N = NumElts/2) or
- // two inputs extended with concat to the same size (in which case N =
- // NumElts).
- auto isVMOVNShuffle = [&](ShuffleVectorSDNode *SVN, bool Rev) {
- ArrayRef<int> M = SVN->getMask();
+ // The shuffle may either be a single source (in which case N = NumElts/2) or
+ // two inputs extended with concat to the same size (in which case N =
+ // NumElts).
+ auto isVMOVNShuffle = [&](ShuffleVectorSDNode *SVN, bool Rev) {
+ ArrayRef<int> M = SVN->getMask();
unsigned NumElts = ToVT.getVectorNumElements();
- if (SVN->getOperand(1).isUndef())
- NumElts /= 2;
+ if (SVN->getOperand(1).isUndef())
+ NumElts /= 2;
- unsigned Off0 = Rev ? NumElts : 0;
- unsigned Off1 = Rev ? 0 : NumElts;
+ unsigned Off0 = Rev ? NumElts : 0;
+ unsigned Off1 = Rev ? 0 : NumElts;
- for (unsigned I = 0; I < NumElts; I += 2) {
- if (M[I] >= 0 && M[I] != (int)(Off0 + I / 2))
+ for (unsigned I = 0; I < NumElts; I += 2) {
+ if (M[I] >= 0 && M[I] != (int)(Off0 + I / 2))
return false;
- if (M[I + 1] >= 0 && M[I + 1] != (int)(Off1 + I / 2))
+ if (M[I + 1] >= 0 && M[I + 1] != (int)(Off1 + I / 2))
return false;
}
return true;
};
- // It may be preferable to keep the store unsplit as the trunc may end up
- // being removed. Check that here.
- if (Trunc.getOperand(0).getOpcode() == ISD::SMIN) {
- if (SDValue U = PerformVQDMULHCombine(Trunc.getOperand(0).getNode(), DAG)) {
- DAG.ReplaceAllUsesWith(Trunc.getOperand(0), U);
- return SDValue();
- }
- }
- if (auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(Trunc.getOperand(0)))
- if (isVMOVNShuffle(Shuffle, false) || isVMOVNShuffle(Shuffle, true))
+ // It may be preferable to keep the store unsplit as the trunc may end up
+ // being removed. Check that here.
+ if (Trunc.getOperand(0).getOpcode() == ISD::SMIN) {
+ if (SDValue U = PerformVQDMULHCombine(Trunc.getOperand(0).getNode(), DAG)) {
+ DAG.ReplaceAllUsesWith(Trunc.getOperand(0), U);
return SDValue();
+ }
+ }
+ if (auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(Trunc.getOperand(0)))
+ if (isVMOVNShuffle(Shuffle, false) || isVMOVNShuffle(Shuffle, true))
+ return SDValue();
LLVMContext &C = *DAG.getContext();
SDLoc DL(St);
@@ -14728,8 +14728,8 @@ static SDValue PerformSplittingToNarrowingStores(StoreSDNode *St,
SmallVector<SDValue, 4> Stores;
for (unsigned i = 0; i < FromVT.getVectorNumElements() / NumElements; i++) {
unsigned NewOffset = i * NumElements * ToEltVT.getSizeInBits() / 8;
- SDValue NewPtr =
- DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::Fixed(NewOffset));
+ SDValue NewPtr =
+ DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::Fixed(NewOffset));
SDValue Extract =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewFromVT, Trunc.getOperand(0),
@@ -14782,15 +14782,15 @@ static SDValue PerformSTORECombine(SDNode *N,
SDValue BasePtr = St->getBasePtr();
SDValue NewST1 = DAG.getStore(
St->getChain(), DL, StVal.getNode()->getOperand(isBigEndian ? 1 : 0),
- BasePtr, St->getPointerInfo(), St->getOriginalAlign(),
+ BasePtr, St->getPointerInfo(), St->getOriginalAlign(),
St->getMemOperand()->getFlags());
SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
DAG.getConstant(4, DL, MVT::i32));
return DAG.getStore(NewST1.getValue(0), DL,
StVal.getNode()->getOperand(isBigEndian ? 0 : 1),
- OffsetPtr, St->getPointerInfo().getWithOffset(4),
- St->getOriginalAlign(),
+ OffsetPtr, St->getPointerInfo().getWithOffset(4),
+ St->getOriginalAlign(),
St->getMemOperand()->getFlags());
}
@@ -14964,107 +14964,107 @@ static SDValue PerformVECREDUCE_ADDCombine(SDNode *N, SelectionDAG &DAG,
// VADDLV u/s 32
// VMLALV u/s 16/32
- // If the input vector is smaller than legal (v4i8/v4i16 for example) we can
- // extend it and use v4i32 instead.
- auto ExtendIfNeeded = [&](SDValue A, unsigned ExtendCode) {
- EVT AVT = A.getValueType();
- if (!AVT.is128BitVector())
- A = DAG.getNode(ExtendCode, dl,
- AVT.changeVectorElementType(MVT::getIntegerVT(
- 128 / AVT.getVectorMinNumElements())),
- A);
- return A;
- };
+ // If the input vector is smaller than legal (v4i8/v4i16 for example) we can
+ // extend it and use v4i32 instead.
+ auto ExtendIfNeeded = [&](SDValue A, unsigned ExtendCode) {
+ EVT AVT = A.getValueType();
+ if (!AVT.is128BitVector())
+ A = DAG.getNode(ExtendCode, dl,
+ AVT.changeVectorElementType(MVT::getIntegerVT(
+ 128 / AVT.getVectorMinNumElements())),
+ A);
+ return A;
+ };
auto IsVADDV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef<MVT> ExtTypes) {
if (ResVT != RetTy || N0->getOpcode() != ExtendCode)
return SDValue();
SDValue A = N0->getOperand(0);
if (llvm::any_of(ExtTypes, [&A](MVT Ty) { return A.getValueType() == Ty; }))
- return ExtendIfNeeded(A, ExtendCode);
- return SDValue();
- };
- auto IsPredVADDV = [&](MVT RetTy, unsigned ExtendCode,
- ArrayRef<MVT> ExtTypes, SDValue &Mask) {
- if (ResVT != RetTy || N0->getOpcode() != ISD::VSELECT ||
- !ISD::isBuildVectorAllZeros(N0->getOperand(2).getNode()))
- return SDValue();
- Mask = N0->getOperand(0);
- SDValue Ext = N0->getOperand(1);
- if (Ext->getOpcode() != ExtendCode)
- return SDValue();
- SDValue A = Ext->getOperand(0);
- if (llvm::any_of(ExtTypes, [&A](MVT Ty) { return A.getValueType() == Ty; }))
- return ExtendIfNeeded(A, ExtendCode);
+ return ExtendIfNeeded(A, ExtendCode);
return SDValue();
};
+ auto IsPredVADDV = [&](MVT RetTy, unsigned ExtendCode,
+ ArrayRef<MVT> ExtTypes, SDValue &Mask) {
+ if (ResVT != RetTy || N0->getOpcode() != ISD::VSELECT ||
+ !ISD::isBuildVectorAllZeros(N0->getOperand(2).getNode()))
+ return SDValue();
+ Mask = N0->getOperand(0);
+ SDValue Ext = N0->getOperand(1);
+ if (Ext->getOpcode() != ExtendCode)
+ return SDValue();
+ SDValue A = Ext->getOperand(0);
+ if (llvm::any_of(ExtTypes, [&A](MVT Ty) { return A.getValueType() == Ty; }))
+ return ExtendIfNeeded(A, ExtendCode);
+ return SDValue();
+ };
auto IsVMLAV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef<MVT> ExtTypes,
SDValue &A, SDValue &B) {
- // For a vmla we are trying to match a larger pattern:
- // ExtA = sext/zext A
- // ExtB = sext/zext B
- // Mul = mul ExtA, ExtB
- // vecreduce.add Mul
- // There might also be en extra extend between the mul and the addreduce, so
- // long as the bitwidth is high enough to make them equivalent (for example
- // original v8i16 might be mul at v8i32 and the reduce happens at v8i64).
- if (ResVT != RetTy)
+ // For a vmla we are trying to match a larger pattern:
+ // ExtA = sext/zext A
+ // ExtB = sext/zext B
+ // Mul = mul ExtA, ExtB
+ // vecreduce.add Mul
+ // There might also be en extra extend between the mul and the addreduce, so
+ // long as the bitwidth is high enough to make them equivalent (for example
+ // original v8i16 might be mul at v8i32 and the reduce happens at v8i64).
+ if (ResVT != RetTy)
return false;
- SDValue Mul = N0;
- if (Mul->getOpcode() == ExtendCode &&
- Mul->getOperand(0).getScalarValueSizeInBits() * 2 >=
- ResVT.getScalarSizeInBits())
- Mul = Mul->getOperand(0);
- if (Mul->getOpcode() != ISD::MUL)
- return false;
- SDValue ExtA = Mul->getOperand(0);
- SDValue ExtB = Mul->getOperand(1);
+ SDValue Mul = N0;
+ if (Mul->getOpcode() == ExtendCode &&
+ Mul->getOperand(0).getScalarValueSizeInBits() * 2 >=
+ ResVT.getScalarSizeInBits())
+ Mul = Mul->getOperand(0);
+ if (Mul->getOpcode() != ISD::MUL)
+ return false;
+ SDValue ExtA = Mul->getOperand(0);
+ SDValue ExtB = Mul->getOperand(1);
if (ExtA->getOpcode() != ExtendCode && ExtB->getOpcode() != ExtendCode)
return false;
A = ExtA->getOperand(0);
B = ExtB->getOperand(0);
if (A.getValueType() == B.getValueType() &&
- llvm::any_of(ExtTypes,
- [&A](MVT Ty) { return A.getValueType() == Ty; })) {
- A = ExtendIfNeeded(A, ExtendCode);
- B = ExtendIfNeeded(B, ExtendCode);
+ llvm::any_of(ExtTypes,
+ [&A](MVT Ty) { return A.getValueType() == Ty; })) {
+ A = ExtendIfNeeded(A, ExtendCode);
+ B = ExtendIfNeeded(B, ExtendCode);
return true;
- }
- return false;
- };
- auto IsPredVMLAV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef<MVT> ExtTypes,
- SDValue &A, SDValue &B, SDValue &Mask) {
- // Same as the pattern above with a select for the zero predicated lanes
- // ExtA = sext/zext A
- // ExtB = sext/zext B
- // Mul = mul ExtA, ExtB
- // N0 = select Mask, Mul, 0
- // vecreduce.add N0
- if (ResVT != RetTy || N0->getOpcode() != ISD::VSELECT ||
- !ISD::isBuildVectorAllZeros(N0->getOperand(2).getNode()))
- return false;
- Mask = N0->getOperand(0);
- SDValue Mul = N0->getOperand(1);
- if (Mul->getOpcode() == ExtendCode &&
- Mul->getOperand(0).getScalarValueSizeInBits() * 2 >=
- ResVT.getScalarSizeInBits())
- Mul = Mul->getOperand(0);
- if (Mul->getOpcode() != ISD::MUL)
- return false;
- SDValue ExtA = Mul->getOperand(0);
- SDValue ExtB = Mul->getOperand(1);
- if (ExtA->getOpcode() != ExtendCode && ExtB->getOpcode() != ExtendCode)
- return false;
- A = ExtA->getOperand(0);
- B = ExtB->getOperand(0);
- if (A.getValueType() == B.getValueType() &&
- llvm::any_of(ExtTypes,
- [&A](MVT Ty) { return A.getValueType() == Ty; })) {
- A = ExtendIfNeeded(A, ExtendCode);
- B = ExtendIfNeeded(B, ExtendCode);
- return true;
- }
+ }
return false;
};
+ auto IsPredVMLAV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef<MVT> ExtTypes,
+ SDValue &A, SDValue &B, SDValue &Mask) {
+ // Same as the pattern above with a select for the zero predicated lanes
+ // ExtA = sext/zext A
+ // ExtB = sext/zext B
+ // Mul = mul ExtA, ExtB
+ // N0 = select Mask, Mul, 0
+ // vecreduce.add N0
+ if (ResVT != RetTy || N0->getOpcode() != ISD::VSELECT ||
+ !ISD::isBuildVectorAllZeros(N0->getOperand(2).getNode()))
+ return false;
+ Mask = N0->getOperand(0);
+ SDValue Mul = N0->getOperand(1);
+ if (Mul->getOpcode() == ExtendCode &&
+ Mul->getOperand(0).getScalarValueSizeInBits() * 2 >=
+ ResVT.getScalarSizeInBits())
+ Mul = Mul->getOperand(0);
+ if (Mul->getOpcode() != ISD::MUL)
+ return false;
+ SDValue ExtA = Mul->getOperand(0);
+ SDValue ExtB = Mul->getOperand(1);
+ if (ExtA->getOpcode() != ExtendCode && ExtB->getOpcode() != ExtendCode)
+ return false;
+ A = ExtA->getOperand(0);
+ B = ExtB->getOperand(0);
+ if (A.getValueType() == B.getValueType() &&
+ llvm::any_of(ExtTypes,
+ [&A](MVT Ty) { return A.getValueType() == Ty; })) {
+ A = ExtendIfNeeded(A, ExtendCode);
+ B = ExtendIfNeeded(B, ExtendCode);
+ return true;
+ }
+ return false;
+ };
auto Create64bitNode = [&](unsigned Opcode, ArrayRef<SDValue> Ops) {
SDValue Node = DAG.getNode(Opcode, dl, {MVT::i32, MVT::i32}, Ops);
return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Node,
@@ -15075,93 +15075,93 @@ static SDValue PerformVECREDUCE_ADDCombine(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(ARMISD::VADDVs, dl, ResVT, A);
if (SDValue A = IsVADDV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}))
return DAG.getNode(ARMISD::VADDVu, dl, ResVT, A);
- if (SDValue A = IsVADDV(MVT::i64, ISD::SIGN_EXTEND,
- {MVT::v4i8, MVT::v4i16, MVT::v4i32}))
+ if (SDValue A = IsVADDV(MVT::i64, ISD::SIGN_EXTEND,
+ {MVT::v4i8, MVT::v4i16, MVT::v4i32}))
return Create64bitNode(ARMISD::VADDLVs, {A});
- if (SDValue A = IsVADDV(MVT::i64, ISD::ZERO_EXTEND,
- {MVT::v4i8, MVT::v4i16, MVT::v4i32}))
+ if (SDValue A = IsVADDV(MVT::i64, ISD::ZERO_EXTEND,
+ {MVT::v4i8, MVT::v4i16, MVT::v4i32}))
return Create64bitNode(ARMISD::VADDLVu, {A});
- if (SDValue A = IsVADDV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}))
- return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
- DAG.getNode(ARMISD::VADDVs, dl, MVT::i32, A));
- if (SDValue A = IsVADDV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}))
- return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
- DAG.getNode(ARMISD::VADDVu, dl, MVT::i32, A));
-
- SDValue Mask;
- if (SDValue A = IsPredVADDV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, Mask))
- return DAG.getNode(ARMISD::VADDVps, dl, ResVT, A, Mask);
- if (SDValue A = IsPredVADDV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, Mask))
- return DAG.getNode(ARMISD::VADDVpu, dl, ResVT, A, Mask);
- if (SDValue A = IsPredVADDV(MVT::i64, ISD::SIGN_EXTEND,
- {MVT::v4i8, MVT::v4i16, MVT::v4i32}, Mask))
- return Create64bitNode(ARMISD::VADDLVps, {A, Mask});
- if (SDValue A = IsPredVADDV(MVT::i64, ISD::ZERO_EXTEND,
- {MVT::v4i8, MVT::v4i16, MVT::v4i32}, Mask))
- return Create64bitNode(ARMISD::VADDLVpu, {A, Mask});
- if (SDValue A = IsPredVADDV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, Mask))
- return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
- DAG.getNode(ARMISD::VADDVps, dl, MVT::i32, A, Mask));
- if (SDValue A = IsPredVADDV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, Mask))
- return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
- DAG.getNode(ARMISD::VADDVpu, dl, MVT::i32, A, Mask));
-
+ if (SDValue A = IsVADDV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}))
+ return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
+ DAG.getNode(ARMISD::VADDVs, dl, MVT::i32, A));
+ if (SDValue A = IsVADDV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}))
+ return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
+ DAG.getNode(ARMISD::VADDVu, dl, MVT::i32, A));
+
+ SDValue Mask;
+ if (SDValue A = IsPredVADDV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, Mask))
+ return DAG.getNode(ARMISD::VADDVps, dl, ResVT, A, Mask);
+ if (SDValue A = IsPredVADDV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, Mask))
+ return DAG.getNode(ARMISD::VADDVpu, dl, ResVT, A, Mask);
+ if (SDValue A = IsPredVADDV(MVT::i64, ISD::SIGN_EXTEND,
+ {MVT::v4i8, MVT::v4i16, MVT::v4i32}, Mask))
+ return Create64bitNode(ARMISD::VADDLVps, {A, Mask});
+ if (SDValue A = IsPredVADDV(MVT::i64, ISD::ZERO_EXTEND,
+ {MVT::v4i8, MVT::v4i16, MVT::v4i32}, Mask))
+ return Create64bitNode(ARMISD::VADDLVpu, {A, Mask});
+ if (SDValue A = IsPredVADDV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, Mask))
+ return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
+ DAG.getNode(ARMISD::VADDVps, dl, MVT::i32, A, Mask));
+ if (SDValue A = IsPredVADDV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, Mask))
+ return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
+ DAG.getNode(ARMISD::VADDVpu, dl, MVT::i32, A, Mask));
+
SDValue A, B;
if (IsVMLAV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B))
return DAG.getNode(ARMISD::VMLAVs, dl, ResVT, A, B);
if (IsVMLAV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B))
return DAG.getNode(ARMISD::VMLAVu, dl, ResVT, A, B);
- if (IsVMLAV(MVT::i64, ISD::SIGN_EXTEND,
- {MVT::v8i8, MVT::v8i16, MVT::v4i8, MVT::v4i16, MVT::v4i32}, A, B))
+ if (IsVMLAV(MVT::i64, ISD::SIGN_EXTEND,
+ {MVT::v8i8, MVT::v8i16, MVT::v4i8, MVT::v4i16, MVT::v4i32}, A, B))
return Create64bitNode(ARMISD::VMLALVs, {A, B});
- if (IsVMLAV(MVT::i64, ISD::ZERO_EXTEND,
- {MVT::v8i8, MVT::v8i16, MVT::v4i8, MVT::v4i16, MVT::v4i32}, A, B))
+ if (IsVMLAV(MVT::i64, ISD::ZERO_EXTEND,
+ {MVT::v8i8, MVT::v8i16, MVT::v4i8, MVT::v4i16, MVT::v4i32}, A, B))
return Create64bitNode(ARMISD::VMLALVu, {A, B});
- if (IsVMLAV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, A, B))
- return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
- DAG.getNode(ARMISD::VMLAVs, dl, MVT::i32, A, B));
- if (IsVMLAV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, A, B))
- return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
- DAG.getNode(ARMISD::VMLAVu, dl, MVT::i32, A, B));
-
- if (IsPredVMLAV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B, Mask))
- return DAG.getNode(ARMISD::VMLAVps, dl, ResVT, A, B, Mask);
- if (IsPredVMLAV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B, Mask))
- return DAG.getNode(ARMISD::VMLAVpu, dl, ResVT, A, B, Mask);
- if (IsPredVMLAV(MVT::i64, ISD::SIGN_EXTEND,
- {MVT::v8i8, MVT::v8i16, MVT::v4i8, MVT::v4i16, MVT::v4i32}, A,
- B, Mask))
- return Create64bitNode(ARMISD::VMLALVps, {A, B, Mask});
- if (IsPredVMLAV(MVT::i64, ISD::ZERO_EXTEND,
- {MVT::v8i8, MVT::v8i16, MVT::v4i8, MVT::v4i16, MVT::v4i32}, A,
- B, Mask))
- return Create64bitNode(ARMISD::VMLALVpu, {A, B, Mask});
- if (IsPredVMLAV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, A, B, Mask))
- return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
- DAG.getNode(ARMISD::VMLAVps, dl, MVT::i32, A, B, Mask));
- if (IsPredVMLAV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, A, B, Mask))
- return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
- DAG.getNode(ARMISD::VMLAVpu, dl, MVT::i32, A, B, Mask));
-
- // Some complications. We can get a case where the two inputs of the mul are
- // the same, then the output sext will have been helpfully converted to a
- // zext. Turn it back.
- SDValue Op = N0;
- if (Op->getOpcode() == ISD::VSELECT)
- Op = Op->getOperand(1);
- if (Op->getOpcode() == ISD::ZERO_EXTEND &&
- Op->getOperand(0)->getOpcode() == ISD::MUL) {
- SDValue Mul = Op->getOperand(0);
- if (Mul->getOperand(0) == Mul->getOperand(1) &&
- Mul->getOperand(0)->getOpcode() == ISD::SIGN_EXTEND) {
- SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, dl, N0->getValueType(0), Mul);
- if (Op != N0)
- Ext = DAG.getNode(ISD::VSELECT, dl, N0->getValueType(0),
- N0->getOperand(0), Ext, N0->getOperand(2));
- return DAG.getNode(ISD::VECREDUCE_ADD, dl, ResVT, Ext);
- }
- }
-
+ if (IsVMLAV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, A, B))
+ return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
+ DAG.getNode(ARMISD::VMLAVs, dl, MVT::i32, A, B));
+ if (IsVMLAV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, A, B))
+ return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
+ DAG.getNode(ARMISD::VMLAVu, dl, MVT::i32, A, B));
+
+ if (IsPredVMLAV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B, Mask))
+ return DAG.getNode(ARMISD::VMLAVps, dl, ResVT, A, B, Mask);
+ if (IsPredVMLAV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B, Mask))
+ return DAG.getNode(ARMISD::VMLAVpu, dl, ResVT, A, B, Mask);
+ if (IsPredVMLAV(MVT::i64, ISD::SIGN_EXTEND,
+ {MVT::v8i8, MVT::v8i16, MVT::v4i8, MVT::v4i16, MVT::v4i32}, A,
+ B, Mask))
+ return Create64bitNode(ARMISD::VMLALVps, {A, B, Mask});
+ if (IsPredVMLAV(MVT::i64, ISD::ZERO_EXTEND,
+ {MVT::v8i8, MVT::v8i16, MVT::v4i8, MVT::v4i16, MVT::v4i32}, A,
+ B, Mask))
+ return Create64bitNode(ARMISD::VMLALVpu, {A, B, Mask});
+ if (IsPredVMLAV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, A, B, Mask))
+ return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
+ DAG.getNode(ARMISD::VMLAVps, dl, MVT::i32, A, B, Mask));
+ if (IsPredVMLAV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, A, B, Mask))
+ return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
+ DAG.getNode(ARMISD::VMLAVpu, dl, MVT::i32, A, B, Mask));
+
+ // Some complications. We can get a case where the two inputs of the mul are
+ // the same, then the output sext will have been helpfully converted to a
+ // zext. Turn it back.
+ SDValue Op = N0;
+ if (Op->getOpcode() == ISD::VSELECT)
+ Op = Op->getOperand(1);
+ if (Op->getOpcode() == ISD::ZERO_EXTEND &&
+ Op->getOperand(0)->getOpcode() == ISD::MUL) {
+ SDValue Mul = Op->getOperand(0);
+ if (Mul->getOperand(0) == Mul->getOperand(1) &&
+ Mul->getOperand(0)->getOpcode() == ISD::SIGN_EXTEND) {
+ SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, dl, N0->getValueType(0), Mul);
+ if (Op != N0)
+ Ext = DAG.getNode(ISD::VSELECT, dl, N0->getValueType(0),
+ N0->getOperand(0), Ext, N0->getOperand(2));
+ return DAG.getNode(ISD::VECREDUCE_ADD, dl, ResVT, Ext);
+ }
+ }
+
return SDValue();
}
@@ -15613,13 +15613,13 @@ static SDValue PerformSplittingToWideningLoad(SDNode *N, SelectionDAG &DAG) {
SmallVector<SDValue, 4> Chains;
for (unsigned i = 0; i < FromVT.getVectorNumElements() / NumElements; i++) {
unsigned NewOffset = (i * NewFromVT.getSizeInBits()) / 8;
- SDValue NewPtr =
- DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::Fixed(NewOffset));
+ SDValue NewPtr =
+ DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::Fixed(NewOffset));
SDValue NewLoad =
DAG.getLoad(ISD::UNINDEXED, NewExtType, NewToVT, DL, Ch, NewPtr, Offset,
LD->getPointerInfo().getWithOffset(NewOffset), NewFromVT,
- Alignment, MMOFlags, AAInfo);
+ Alignment, MMOFlags, AAInfo);
Loads.push_back(NewLoad);
Chains.push_back(SDValue(NewLoad.getNode(), 1));
}
@@ -15707,9 +15707,9 @@ static SDValue PerformMinMaxCombine(SDNode *N, SelectionDAG &DAG,
if (!ST->hasMVEIntegerOps())
return SDValue();
- if (SDValue V = PerformVQDMULHCombine(N, DAG))
- return V;
-
+ if (SDValue V = PerformVQDMULHCombine(N, DAG))
+ return V;
+
if (VT != MVT::v4i32 && VT != MVT::v8i16)
return SDValue();
@@ -16317,8 +16317,8 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
switch (N->getOpcode()) {
default: break;
- case ISD::SELECT_CC:
- case ISD::SELECT: return PerformSELECTCombine(N, DCI, Subtarget);
+ case ISD::SELECT_CC:
+ case ISD::SELECT: return PerformSELECTCombine(N, DCI, Subtarget);
case ISD::VSELECT: return PerformVSELECTCombine(N, DCI, Subtarget);
case ISD::ABS: return PerformABSCombine(N, DCI, Subtarget);
case ARMISD::ADDE: return PerformADDECombine(N, DCI, Subtarget);
@@ -16735,19 +16735,19 @@ bool ARMTargetLowering::shouldSinkOperands(Instruction *I,
switch (II->getIntrinsicID()) {
case Intrinsic::fma:
return !IsFMS(I);
- case Intrinsic::arm_mve_add_predicated:
- case Intrinsic::arm_mve_mul_predicated:
- case Intrinsic::arm_mve_qadd_predicated:
- case Intrinsic::arm_mve_hadd_predicated:
- case Intrinsic::arm_mve_vqdmull_predicated:
- case Intrinsic::arm_mve_qdmulh_predicated:
- case Intrinsic::arm_mve_qrdmulh_predicated:
- case Intrinsic::arm_mve_fma_predicated:
- return true;
- case Intrinsic::arm_mve_sub_predicated:
- case Intrinsic::arm_mve_qsub_predicated:
- case Intrinsic::arm_mve_hsub_predicated:
- return Operand == 1;
+ case Intrinsic::arm_mve_add_predicated:
+ case Intrinsic::arm_mve_mul_predicated:
+ case Intrinsic::arm_mve_qadd_predicated:
+ case Intrinsic::arm_mve_hadd_predicated:
+ case Intrinsic::arm_mve_vqdmull_predicated:
+ case Intrinsic::arm_mve_qdmulh_predicated:
+ case Intrinsic::arm_mve_qrdmulh_predicated:
+ case Intrinsic::arm_mve_fma_predicated:
+ return true;
+ case Intrinsic::arm_mve_sub_predicated:
+ case Intrinsic::arm_mve_qsub_predicated:
+ case Intrinsic::arm_mve_hsub_predicated:
+ return Operand == 1;
default:
return false;
}
@@ -17476,7 +17476,7 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
return;
KnownBits KnownRHS = DAG.computeKnownBits(Op.getOperand(1), Depth+1);
- Known = KnownBits::commonBits(Known, KnownRHS);
+ Known = KnownBits::commonBits(Known, KnownRHS);
return;
}
case ISD::INTRINSIC_W_CHAIN: {
@@ -18349,9 +18349,9 @@ bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
return false;
if (VT == MVT::f16 && Subtarget->hasFullFP16())
return ARM_AM::getFP16Imm(Imm) != -1;
- if (VT == MVT::f32 && Subtarget->hasFullFP16() &&
- ARM_AM::getFP32FP16Imm(Imm) != -1)
- return true;
+ if (VT == MVT::f32 && Subtarget->hasFullFP16() &&
+ ARM_AM::getFP32FP16Imm(Imm) != -1)
+ return true;
if (VT == MVT::f32)
return ARM_AM::getFP32Imm(Imm) != -1;
if (VT == MVT::f64 && Subtarget->hasFP64())
@@ -18661,8 +18661,8 @@ ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
: AtomicExpansionKind::None;
}
-// Similar to shouldExpandAtomicRMWInIR, ldrex/strex can be used up to 32
-// bits, and up to 64 bits on the non-M profiles.
+// Similar to shouldExpandAtomicRMWInIR, ldrex/strex can be used up to 32
+// bits, and up to 64 bits on the non-M profiles.
TargetLowering::AtomicExpansionKind
ARMTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
// At -O0, fast-regalloc cannot cope with the live vregs necessary to
@@ -18670,11 +18670,11 @@ ARMTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
// on the stack and close enough to the spill slot, this can lead to a
// situation where the monitor always gets cleared and the atomic operation
// can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
- unsigned Size = AI->getOperand(1)->getType()->getPrimitiveSizeInBits();
+ unsigned Size = AI->getOperand(1)->getType()->getPrimitiveSizeInBits();
bool HasAtomicCmpXchg =
!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps();
- if (getTargetMachine().getOptLevel() != 0 && HasAtomicCmpXchg &&
- Size <= (Subtarget->isMClass() ? 32U : 64U))
+ if (getTargetMachine().getOptLevel() != 0 && HasAtomicCmpXchg &&
+ Size <= (Subtarget->isMClass() ? 32U : 64U))
return AtomicExpansionKind::LLSC;
return AtomicExpansionKind::None;
}
@@ -19129,7 +19129,7 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
SmallVector<Value *, 6> Ops;
Ops.push_back(Builder.CreateBitCast(BaseAddr, Int8Ptr));
- append_range(Ops, Shuffles);
+ append_range(Ops, Shuffles);
Ops.push_back(Builder.getInt32(SI->getAlignment()));
Builder.CreateCall(VstNFunc, Ops);
} else {
@@ -19145,7 +19145,7 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
SmallVector<Value *, 6> Ops;
Ops.push_back(Builder.CreateBitCast(BaseAddr, EltPtrTy));
- append_range(Ops, Shuffles);
+ append_range(Ops, Shuffles);
for (unsigned F = 0; F < Factor; F++) {
Ops.push_back(Builder.getInt32(F));
Builder.CreateCall(VstNFunc, Ops);
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMISelLowering.h b/contrib/libs/llvm12/lib/Target/ARM/ARMISelLowering.h
index 61a127af07..5b04ba8c6d 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMISelLowering.h
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMISelLowering.h
@@ -216,37 +216,37 @@ class VectorType;
VMULLs, // ...signed
VMULLu, // ...unsigned
- VQDMULH, // MVE vqdmulh instruction
-
+ VQDMULH, // MVE vqdmulh instruction
+
// MVE reductions
VADDVs, // sign- or zero-extend the elements of a vector to i32,
VADDVu, // add them all together, and return an i32 of their sum
- VADDVps, // Same as VADDV[su] but with a v4i1 predicate mask
- VADDVpu,
+ VADDVps, // Same as VADDV[su] but with a v4i1 predicate mask
+ VADDVpu,
VADDLVs, // sign- or zero-extend elements to i64 and sum, returning
VADDLVu, // the low and high 32-bit halves of the sum
- VADDLVAs, // Same as VADDLV[su] but also add an input accumulator
+ VADDLVAs, // Same as VADDLV[su] but also add an input accumulator
VADDLVAu, // provided as low and high halves
- VADDLVps, // Same as VADDLV[su] but with a v4i1 predicate mask
- VADDLVpu,
- VADDLVAps, // Same as VADDLVp[su] but with a v4i1 predicate mask
- VADDLVApu,
- VMLAVs, // sign- or zero-extend the elements of two vectors to i32, multiply them
- VMLAVu, // and add the results together, returning an i32 of their sum
- VMLAVps, // Same as VMLAV[su] with a v4i1 predicate mask
- VMLAVpu,
- VMLALVs, // Same as VMLAV but with i64, returning the low and
- VMLALVu, // high 32-bit halves of the sum
- VMLALVps, // Same as VMLALV[su] with a v4i1 predicate mask
- VMLALVpu,
- VMLALVAs, // Same as VMLALV but also add an input accumulator
- VMLALVAu, // provided as low and high halves
- VMLALVAps, // Same as VMLALVA[su] with a v4i1 predicate mask
- VMLALVApu,
- VMINVu, // Find minimum unsigned value of a vector and register
- VMINVs, // Find minimum signed value of a vector and register
- VMAXVu, // Find maximum unsigned value of a vector and register
- VMAXVs, // Find maximum signed value of a vector and register
+ VADDLVps, // Same as VADDLV[su] but with a v4i1 predicate mask
+ VADDLVpu,
+ VADDLVAps, // Same as VADDLVp[su] but with a v4i1 predicate mask
+ VADDLVApu,
+ VMLAVs, // sign- or zero-extend the elements of two vectors to i32, multiply them
+ VMLAVu, // and add the results together, returning an i32 of their sum
+ VMLAVps, // Same as VMLAV[su] with a v4i1 predicate mask
+ VMLAVpu,
+ VMLALVs, // Same as VMLAV but with i64, returning the low and
+ VMLALVu, // high 32-bit halves of the sum
+ VMLALVps, // Same as VMLALV[su] with a v4i1 predicate mask
+ VMLALVpu,
+ VMLALVAs, // Same as VMLALV but also add an input accumulator
+ VMLALVAu, // provided as low and high halves
+ VMLALVAps, // Same as VMLALVA[su] with a v4i1 predicate mask
+ VMLALVApu,
+ VMINVu, // Find minimum unsigned value of a vector and register
+ VMINVs, // Find minimum signed value of a vector and register
+ VMAXVu, // Find maximum unsigned value of a vector and register
+ VMAXVs, // Find maximum signed value of a vector and register
SMULWB, // Signed multiply word by half word, bottom
SMULWT, // Signed multiply word by half word, top
@@ -285,8 +285,8 @@ class VectorType;
// Vector AND with NOT of immediate
VBICIMM,
- // Pseudo vector bitwise select
- VBSP,
+ // Pseudo vector bitwise select
+ VBSP,
// Pseudo-instruction representing a memory copy using ldm/stm
// instructions.
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMInstrFormats.td b/contrib/libs/llvm12/lib/Target/ARM/ARMInstrFormats.td
index 85da7c5a53..7937353678 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMInstrFormats.td
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMInstrFormats.td
@@ -403,9 +403,9 @@ class InstTemplate<AddrMode am, int sz, IndexMode im,
bit isUnaryDataProc = 0;
bit canXformTo16Bit = 0;
// The instruction is a 16-bit flag setting Thumb instruction. Used
- // by the parser and if-converter to determine whether to require the 'S'
- // suffix on the mnemonic (when not in an IT block) or preclude it (when
- // in an IT block).
+ // by the parser and if-converter to determine whether to require the 'S'
+ // suffix on the mnemonic (when not in an IT block) or preclude it (when
+ // in an IT block).
bit thumbArithFlagSetting = 0;
bit validForTailPredication = 0;
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMInstrInfo.td b/contrib/libs/llvm12/lib/Target/ARM/ARMInstrInfo.td
index 8dcb319923..2fe8cbc613 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMInstrInfo.td
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMInstrInfo.td
@@ -162,9 +162,9 @@ def ARMcmov : SDNode<"ARMISD::CMOV", SDT_ARMCMov,
[SDNPInGlue]>;
def ARMsubs : SDNode<"ARMISD::SUBS", SDTIntBinOp, [SDNPOutGlue]>;
-def ARMssat : SDNode<"ARMISD::SSAT", SDTIntSatNoShOp, []>;
+def ARMssat : SDNode<"ARMISD::SSAT", SDTIntSatNoShOp, []>;
-def ARMusat : SDNode<"ARMISD::USAT", SDTIntSatNoShOp, []>;
+def ARMusat : SDNode<"ARMISD::USAT", SDTIntSatNoShOp, []>;
def ARMbrcond : SDNode<"ARMISD::BRCOND", SDT_ARMBrcond,
[SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
@@ -371,11 +371,11 @@ def imm_not_XFORM : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(~(int)N->getZExtValue(), SDLoc(N), MVT::i32);
}]>;
-// asr_imm_XFORM - Returns a shift immediate with bit {5} set to 1
-def asr_imm_XFORM : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(0x20 | N->getZExtValue(), SDLoc(N), MVT:: i32);
-}]>;
-
+// asr_imm_XFORM - Returns a shift immediate with bit {5} set to 1
+def asr_imm_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(0x20 | N->getZExtValue(), SDLoc(N), MVT:: i32);
+}]>;
+
/// imm16_31 predicate - True if the 32-bit immediate is in the range [16,31].
def imm16_31 : ImmLeaf<i32, [{
return (int32_t)Imm >= 16 && (int32_t)Imm < 32;
@@ -442,8 +442,8 @@ def fsub_mlx : PatFrag<(ops node:$lhs, node:$rhs),(fsub node:$lhs, node:$rhs),[{
def imm_even : ImmLeaf<i32, [{ return (Imm & 1) == 0; }]>;
def imm_odd : ImmLeaf<i32, [{ return (Imm & 1) == 1; }]>;
-def asr_imm : ImmLeaf<i32, [{ return Imm > 0 && Imm <= 32; }], asr_imm_XFORM>;
-
+def asr_imm : ImmLeaf<i32, [{ return Imm > 0 && Imm <= 32; }], asr_imm_XFORM>;
+
//===----------------------------------------------------------------------===//
// NEON/MVE pattern fragments
//
@@ -496,19 +496,19 @@ def SubReg_i32_lane : SDNodeXForm<imm, [{
}]>;
-def ARMimmAllZerosV: PatLeaf<(bitconvert (v4i32 (ARMvmovImm (i32 0))))>;
-def ARMimmAllZerosD: PatLeaf<(bitconvert (v2i32 (ARMvmovImm (i32 0))))>;
-def ARMimmAllOnesV: PatLeaf<(bitconvert (v16i8 (ARMvmovImm (i32 0xEFF))))>;
-def ARMimmAllOnesD: PatLeaf<(bitconvert (v8i8 (ARMvmovImm (i32 0xEFF))))>;
-
-def ARMimmOneV: PatLeaf<(ARMvmovImm (i32 timm)), [{
- ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
- unsigned EltBits = 0;
- uint64_t EltVal = ARM_AM::decodeVMOVModImm(ConstVal->getZExtValue(), EltBits);
- return (EltBits == N->getValueType(0).getScalarSizeInBits() && EltVal == 0x01);
-}]>;
-
+def ARMimmAllZerosV: PatLeaf<(bitconvert (v4i32 (ARMvmovImm (i32 0))))>;
+def ARMimmAllZerosD: PatLeaf<(bitconvert (v2i32 (ARMvmovImm (i32 0))))>;
+def ARMimmAllOnesV: PatLeaf<(bitconvert (v16i8 (ARMvmovImm (i32 0xEFF))))>;
+def ARMimmAllOnesD: PatLeaf<(bitconvert (v8i8 (ARMvmovImm (i32 0xEFF))))>;
+def ARMimmOneV: PatLeaf<(ARMvmovImm (i32 timm)), [{
+ ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
+ unsigned EltBits = 0;
+ uint64_t EltVal = ARM_AM::decodeVMOVModImm(ConstVal->getZExtValue(), EltBits);
+ return (EltBits == N->getValueType(0).getScalarSizeInBits() && EltVal == 0x01);
+}]>;
+
+
//===----------------------------------------------------------------------===//
// Operand Definitions.
//
@@ -822,9 +822,9 @@ def mod_imm_neg : Operand<i32>, PatLeaf<(imm), [{
def arm_i32imm : IntImmLeaf<i32, [{
if (Subtarget->useMovt())
return true;
- if (ARM_AM::isSOImmTwoPartVal(Imm.getZExtValue()))
- return true;
- return ARM_AM::isSOImmTwoPartValNeg(Imm.getZExtValue());
+ if (ARM_AM::isSOImmTwoPartVal(Imm.getZExtValue()))
+ return true;
+ return ARM_AM::isSOImmTwoPartValNeg(Imm.getZExtValue());
}]>;
/// imm0_1 predicate - Immediate in the range [0,1].
@@ -2492,30 +2492,30 @@ let isCall = 1,
}
// ARMv5T and above
- def BLX : AXI<(outs), (ins GPR:$func), BrMiscFrm, IIC_Br, "blx\t$func", []>,
+ def BLX : AXI<(outs), (ins GPR:$func), BrMiscFrm, IIC_Br, "blx\t$func", []>,
Requires<[IsARM, HasV5T]>, Sched<[WriteBrL]> {
bits<4> func;
let Inst{31-4} = 0b1110000100101111111111110011;
let Inst{3-0} = func;
}
- def BLX_noip : ARMPseudoExpand<(outs), (ins GPRnoip:$func),
- 4, IIC_Br, [], (BLX GPR:$func)>,
- Requires<[IsARM, HasV5T]>, Sched<[WriteBrL]>;
-
+ def BLX_noip : ARMPseudoExpand<(outs), (ins GPRnoip:$func),
+ 4, IIC_Br, [], (BLX GPR:$func)>,
+ Requires<[IsARM, HasV5T]>, Sched<[WriteBrL]>;
+
def BLX_pred : AI<(outs), (ins GPR:$func), BrMiscFrm,
- IIC_Br, "blx", "\t$func", []>,
+ IIC_Br, "blx", "\t$func", []>,
Requires<[IsARM, HasV5T]>, Sched<[WriteBrL]> {
bits<4> func;
let Inst{27-4} = 0b000100101111111111110011;
let Inst{3-0} = func;
}
- def BLX_pred_noip : ARMPseudoExpand<(outs), (ins GPRnoip:$func),
- 4, IIC_Br, [],
- (BLX_pred GPR:$func, (ops 14, zero_reg))>,
- Requires<[IsARM, HasV5T]>, Sched<[WriteBrL]>;
-
+ def BLX_pred_noip : ARMPseudoExpand<(outs), (ins GPRnoip:$func),
+ 4, IIC_Br, [],
+ (BLX_pred GPR:$func, (ops 14, zero_reg))>,
+ Requires<[IsARM, HasV5T]>, Sched<[WriteBrL]>;
+
// ARMv4T
// Note: Restrict $func to the tGPR regclass to prevent it being in LR.
def BX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func),
@@ -2540,16 +2540,16 @@ let isCall = 1,
Requires<[IsARM]>, Sched<[WriteBr]>;
}
-def : ARMPat<(ARMcall GPR:$func), (BLX $func)>,
- Requires<[IsARM, HasV5T, NoSLSBLRMitigation]>;
-def : ARMPat<(ARMcall GPRnoip:$func), (BLX_noip $func)>,
- Requires<[IsARM, HasV5T, SLSBLRMitigation]>;
-def : ARMPat<(ARMcall_pred GPR:$func), (BLX_pred $func)>,
- Requires<[IsARM, HasV5T, NoSLSBLRMitigation]>;
-def : ARMPat<(ARMcall_pred GPRnoip:$func), (BLX_pred_noip $func)>,
- Requires<[IsARM, HasV5T, SLSBLRMitigation]>;
-
-
+def : ARMPat<(ARMcall GPR:$func), (BLX $func)>,
+ Requires<[IsARM, HasV5T, NoSLSBLRMitigation]>;
+def : ARMPat<(ARMcall GPRnoip:$func), (BLX_noip $func)>,
+ Requires<[IsARM, HasV5T, SLSBLRMitigation]>;
+def : ARMPat<(ARMcall_pred GPR:$func), (BLX_pred $func)>,
+ Requires<[IsARM, HasV5T, NoSLSBLRMitigation]>;
+def : ARMPat<(ARMcall_pred GPRnoip:$func), (BLX_pred_noip $func)>,
+ Requires<[IsARM, HasV5T, SLSBLRMitigation]>;
+
+
let isBranch = 1, isTerminator = 1 in {
// FIXME: should be able to write a pattern for ARMBrcond, but can't use
// a two-value operand where a dag node expects two operands. :(
@@ -4089,32 +4089,32 @@ def : ARMV6Pat<(int_arm_ssat GPRnopc:$a, imm1_32:$pos),
(SSAT imm1_32:$pos, GPRnopc:$a, 0)>;
def : ARMV6Pat<(int_arm_usat GPRnopc:$a, imm0_31:$pos),
(USAT imm0_31:$pos, GPRnopc:$a, 0)>;
-def : ARMPat<(ARMssat GPRnopc:$Rn, imm0_31:$imm),
+def : ARMPat<(ARMssat GPRnopc:$Rn, imm0_31:$imm),
(SSAT imm0_31:$imm, GPRnopc:$Rn, 0)>;
-def : ARMPat<(ARMusat GPRnopc:$Rn, imm0_31:$imm),
+def : ARMPat<(ARMusat GPRnopc:$Rn, imm0_31:$imm),
(USAT imm0_31:$imm, GPRnopc:$Rn, 0)>;
def : ARMV6Pat<(int_arm_ssat16 GPRnopc:$a, imm1_16:$pos),
(SSAT16 imm1_16:$pos, GPRnopc:$a)>;
def : ARMV6Pat<(int_arm_usat16 GPRnopc:$a, imm0_15:$pos),
(USAT16 imm0_15:$pos, GPRnopc:$a)>;
-def : ARMV6Pat<(int_arm_ssat (shl GPRnopc:$a, imm0_31:$shft), imm1_32:$pos),
- (SSAT imm1_32:$pos, GPRnopc:$a, imm0_31:$shft)>;
-def : ARMV6Pat<(int_arm_ssat (sra GPRnopc:$a, asr_imm:$shft), imm1_32:$pos),
- (SSAT imm1_32:$pos, GPRnopc:$a, asr_imm:$shft)>;
-def : ARMV6Pat<(int_arm_usat (shl GPRnopc:$a, imm0_31:$shft), imm0_31:$pos),
- (USAT imm0_31:$pos, GPRnopc:$a, imm0_31:$shft)>;
-def : ARMV6Pat<(int_arm_usat (sra GPRnopc:$a, asr_imm:$shft), imm0_31:$pos),
- (USAT imm0_31:$pos, GPRnopc:$a, asr_imm:$shft)>;
-def : ARMPat<(ARMssat (shl GPRnopc:$Rn, imm0_31:$shft), imm0_31:$pos),
- (SSAT imm0_31:$pos, GPRnopc:$Rn, imm0_31:$shft)>;
-def : ARMPat<(ARMssat (sra GPRnopc:$Rn, asr_imm:$shft), imm0_31:$pos),
- (SSAT imm0_31:$pos, GPRnopc:$Rn, asr_imm:$shft)>;
-def : ARMPat<(ARMusat (shl GPRnopc:$Rn, imm0_31:$shft), imm0_31:$pos),
- (USAT imm0_31:$pos, GPRnopc:$Rn, imm0_31:$shft)>;
-def : ARMPat<(ARMusat (sra GPRnopc:$Rn, asr_imm:$shft), imm0_31:$pos),
- (USAT imm0_31:$pos, GPRnopc:$Rn, asr_imm:$shft)>;
-
-
+def : ARMV6Pat<(int_arm_ssat (shl GPRnopc:$a, imm0_31:$shft), imm1_32:$pos),
+ (SSAT imm1_32:$pos, GPRnopc:$a, imm0_31:$shft)>;
+def : ARMV6Pat<(int_arm_ssat (sra GPRnopc:$a, asr_imm:$shft), imm1_32:$pos),
+ (SSAT imm1_32:$pos, GPRnopc:$a, asr_imm:$shft)>;
+def : ARMV6Pat<(int_arm_usat (shl GPRnopc:$a, imm0_31:$shft), imm0_31:$pos),
+ (USAT imm0_31:$pos, GPRnopc:$a, imm0_31:$shft)>;
+def : ARMV6Pat<(int_arm_usat (sra GPRnopc:$a, asr_imm:$shft), imm0_31:$pos),
+ (USAT imm0_31:$pos, GPRnopc:$a, asr_imm:$shft)>;
+def : ARMPat<(ARMssat (shl GPRnopc:$Rn, imm0_31:$shft), imm0_31:$pos),
+ (SSAT imm0_31:$pos, GPRnopc:$Rn, imm0_31:$shft)>;
+def : ARMPat<(ARMssat (sra GPRnopc:$Rn, asr_imm:$shft), imm0_31:$pos),
+ (SSAT imm0_31:$pos, GPRnopc:$Rn, asr_imm:$shft)>;
+def : ARMPat<(ARMusat (shl GPRnopc:$Rn, imm0_31:$shft), imm0_31:$pos),
+ (USAT imm0_31:$pos, GPRnopc:$Rn, imm0_31:$shft)>;
+def : ARMPat<(ARMusat (sra GPRnopc:$Rn, asr_imm:$shft), imm0_31:$pos),
+ (USAT imm0_31:$pos, GPRnopc:$Rn, asr_imm:$shft)>;
+
+
//===----------------------------------------------------------------------===//
// Bitwise Instructions.
//
@@ -6381,15 +6381,15 @@ def SPACE : PseudoInst<(outs GPR:$Rd), (ins i32imm:$size, GPR:$Rn),
NoItinerary,
[(set GPR:$Rd, (int_arm_space timm:$size, GPR:$Rn))]>;
-// SpeculationBarrierEndBB must only be used after an unconditional control
-// flow, i.e. after a terminator for which isBarrier is True.
-let hasSideEffects = 1, isCodeGenOnly = 1, isTerminator = 1, isBarrier = 1 in {
- def SpeculationBarrierISBDSBEndBB
- : PseudoInst<(outs), (ins), NoItinerary, []>, Sched<[]>;
- def SpeculationBarrierSBEndBB
- : PseudoInst<(outs), (ins), NoItinerary, []>, Sched<[]>;
-}
-
+// SpeculationBarrierEndBB must only be used after an unconditional control
+// flow, i.e. after a terminator for which isBarrier is True.
+let hasSideEffects = 1, isCodeGenOnly = 1, isTerminator = 1, isBarrier = 1 in {
+ def SpeculationBarrierISBDSBEndBB
+ : PseudoInst<(outs), (ins), NoItinerary, []>, Sched<[]>;
+ def SpeculationBarrierSBEndBB
+ : PseudoInst<(outs), (ins), NoItinerary, []>, Sched<[]>;
+}
+
//===----------------------------------
// Atomic cmpxchg for -O0
//===----------------------------------
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMInstrMVE.td b/contrib/libs/llvm12/lib/Target/ARM/ARMInstrMVE.td
index 0dfea68887..64cef5d967 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMInstrMVE.td
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMInstrMVE.td
@@ -318,78 +318,78 @@ def MVE_v2f64 : MVEVectorVTInfo<v2f64, ?, v4i1, ?, 0b11, "f", ?>;
def MVE_v16p8 : MVEVectorVTInfo<v16i8, v8i16, v16i1, v8i1, 0b11, "p", 0b0>;
def MVE_v8p16 : MVEVectorVTInfo<v8i16, v4i32, v8i1, v4i1, 0b11, "p", 0b1>;
-multiclass MVE_TwoOpPattern<MVEVectorVTInfo VTI, PatFrag Op, Intrinsic PredInt,
- dag PredOperands, Instruction Inst,
- SDPatternOperator IdentityVec = null_frag> {
- // Unpredicated
- def : Pat<(VTI.Vec (Op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))),
- (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
-
- // Predicated with select
- if !ne(VTI.Size, 0b11) then {
- def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$mask),
- (VTI.Vec (Op (VTI.Vec MQPR:$Qm),
- (VTI.Vec MQPR:$Qn))),
- (VTI.Vec MQPR:$inactive))),
- (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
- ARMVCCThen, (VTI.Pred VCCR:$mask),
- (VTI.Vec MQPR:$inactive)))>;
-
- // Optionally with the select folded through the op
- def : Pat<(VTI.Vec (Op (VTI.Vec MQPR:$Qm),
- (VTI.Vec (vselect (VTI.Pred VCCR:$mask),
- (VTI.Vec MQPR:$Qn),
- (VTI.Vec IdentityVec))))),
- (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
- ARMVCCThen, (VTI.Pred VCCR:$mask),
- (VTI.Vec MQPR:$Qm)))>;
- }
-
- // Predicated with intrinsic
- def : Pat<(VTI.Vec !con((PredInt (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)),
- PredOperands,
- (? (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive)))),
- (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
- ARMVCCThen, (VTI.Pred VCCR:$mask),
- (VTI.Vec MQPR:$inactive)))>;
-}
-
-multiclass MVE_TwoOpPatternDup<MVEVectorVTInfo VTI, PatFrag Op, Intrinsic PredInt,
- dag PredOperands, Instruction Inst,
- SDPatternOperator IdentityVec = null_frag> {
- // Unpredicated
- def : Pat<(VTI.Vec (Op (VTI.Vec MQPR:$Qm), (VTI.Vec (ARMvdup rGPR:$Rn)))),
- (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), rGPR:$Rn))>;
-
- // Predicated with select
- if !ne(VTI.Size, 0b11) then {
- def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$mask),
- (VTI.Vec (Op (VTI.Vec MQPR:$Qm),
- (VTI.Vec (ARMvdup rGPR:$Rn)))),
- (VTI.Vec MQPR:$inactive))),
- (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), rGPR:$Rn,
- ARMVCCThen, (VTI.Pred VCCR:$mask),
- (VTI.Vec MQPR:$inactive)))>;
-
- // Optionally with the select folded through the op
- def : Pat<(VTI.Vec (Op (VTI.Vec MQPR:$Qm),
- (VTI.Vec (vselect (VTI.Pred VCCR:$mask),
- (ARMvdup rGPR:$Rn),
- (VTI.Vec IdentityVec))))),
- (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), rGPR:$Rn,
- ARMVCCThen, (VTI.Pred VCCR:$mask),
- (VTI.Vec MQPR:$Qm)))>;
- }
-
- // Predicated with intrinsic
- def : Pat<(VTI.Vec !con((PredInt (VTI.Vec MQPR:$Qm), (VTI.Vec (ARMvdup rGPR:$Rn))),
- PredOperands,
- (? (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive)))),
- (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), rGPR:$Rn,
- ARMVCCThen, (VTI.Pred VCCR:$mask),
- (VTI.Vec MQPR:$inactive)))>;
-}
-
+multiclass MVE_TwoOpPattern<MVEVectorVTInfo VTI, PatFrag Op, Intrinsic PredInt,
+ dag PredOperands, Instruction Inst,
+ SDPatternOperator IdentityVec = null_frag> {
+ // Unpredicated
+ def : Pat<(VTI.Vec (Op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
+
+ // Predicated with select
+ if !ne(VTI.Size, 0b11) then {
+ def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$mask),
+ (VTI.Vec (Op (VTI.Vec MQPR:$Qm),
+ (VTI.Vec MQPR:$Qn))),
+ (VTI.Vec MQPR:$inactive))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive)))>;
+
+ // Optionally with the select folded through the op
+ def : Pat<(VTI.Vec (Op (VTI.Vec MQPR:$Qm),
+ (VTI.Vec (vselect (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$Qn),
+ (VTI.Vec IdentityVec))))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$Qm)))>;
+ }
+
+ // Predicated with intrinsic
+ def : Pat<(VTI.Vec !con((PredInt (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)),
+ PredOperands,
+ (? (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive)))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive)))>;
+}
+
+multiclass MVE_TwoOpPatternDup<MVEVectorVTInfo VTI, PatFrag Op, Intrinsic PredInt,
+ dag PredOperands, Instruction Inst,
+ SDPatternOperator IdentityVec = null_frag> {
+ // Unpredicated
+ def : Pat<(VTI.Vec (Op (VTI.Vec MQPR:$Qm), (VTI.Vec (ARMvdup rGPR:$Rn)))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), rGPR:$Rn))>;
+
+ // Predicated with select
+ if !ne(VTI.Size, 0b11) then {
+ def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$mask),
+ (VTI.Vec (Op (VTI.Vec MQPR:$Qm),
+ (VTI.Vec (ARMvdup rGPR:$Rn)))),
+ (VTI.Vec MQPR:$inactive))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), rGPR:$Rn,
+ ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive)))>;
+
+ // Optionally with the select folded through the op
+ def : Pat<(VTI.Vec (Op (VTI.Vec MQPR:$Qm),
+ (VTI.Vec (vselect (VTI.Pred VCCR:$mask),
+ (ARMvdup rGPR:$Rn),
+ (VTI.Vec IdentityVec))))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), rGPR:$Rn,
+ ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$Qm)))>;
+ }
+
+ // Predicated with intrinsic
+ def : Pat<(VTI.Vec !con((PredInt (VTI.Vec MQPR:$Qm), (VTI.Vec (ARMvdup rGPR:$Rn))),
+ PredOperands,
+ (? (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive)))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), rGPR:$Rn,
+ ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive)))>;
+}
+
// --------- Start of base classes for the instructions themselves
class MVE_MI<dag oops, dag iops, InstrItinClass itin, string asm,
@@ -450,7 +450,7 @@ class MVE_ScalarShift<string iname, dag oops, dag iops, string asm, string cstr,
: MVE_MI_with_pred<oops, iops, NoItinerary, iname, asm, cstr, pattern> {
let Inst{31-20} = 0b111010100101;
let Inst{8} = 0b1;
- let validForTailPredication=1;
+ let validForTailPredication=1;
}
class MVE_ScalarShiftSingleReg<string iname, dag iops, string asm, string cstr,
@@ -684,13 +684,13 @@ class MVE_VADDV<string iname, string suffix, dag iops, string cstr,
let validForTailPredication = 1;
}
-def SDTVecReduceP : SDTypeProfile<1, 2, [ // VADDLVp
- SDTCisInt<0>, SDTCisVec<1>, SDTCisVec<2>
-]>;
+def SDTVecReduceP : SDTypeProfile<1, 2, [ // VADDLVp
+ SDTCisInt<0>, SDTCisVec<1>, SDTCisVec<2>
+]>;
def ARMVADDVs : SDNode<"ARMISD::VADDVs", SDTVecReduce>;
def ARMVADDVu : SDNode<"ARMISD::VADDVu", SDTVecReduce>;
-def ARMVADDVps : SDNode<"ARMISD::VADDVps", SDTVecReduceP>;
-def ARMVADDVpu : SDNode<"ARMISD::VADDVpu", SDTVecReduceP>;
+def ARMVADDVps : SDNode<"ARMISD::VADDVps", SDTVecReduceP>;
+def ARMVADDVpu : SDNode<"ARMISD::VADDVpu", SDTVecReduceP>;
multiclass MVE_VADDV_A<MVEVectorVTInfo VTI> {
def acc : MVE_VADDV<"vaddva", VTI.Suffix,
@@ -707,39 +707,39 @@ multiclass MVE_VADDV_A<MVEVectorVTInfo VTI> {
if VTI.Unsigned then {
def : Pat<(i32 (vecreduce_add (VTI.Vec MQPR:$vec))),
(i32 (InstN $vec))>;
- def : Pat<(i32 (vecreduce_add (VTI.Vec (vselect (VTI.Pred VCCR:$pred),
- (VTI.Vec MQPR:$vec),
- (VTI.Vec ARMimmAllZerosV))))),
- (i32 (InstN $vec, ARMVCCThen, $pred))>;
+ def : Pat<(i32 (vecreduce_add (VTI.Vec (vselect (VTI.Pred VCCR:$pred),
+ (VTI.Vec MQPR:$vec),
+ (VTI.Vec ARMimmAllZerosV))))),
+ (i32 (InstN $vec, ARMVCCThen, $pred))>;
def : Pat<(i32 (ARMVADDVu (VTI.Vec MQPR:$vec))),
(i32 (InstN $vec))>;
- def : Pat<(i32 (ARMVADDVpu (VTI.Vec MQPR:$vec), (VTI.Pred VCCR:$pred))),
- (i32 (InstN $vec, ARMVCCThen, $pred))>;
+ def : Pat<(i32 (ARMVADDVpu (VTI.Vec MQPR:$vec), (VTI.Pred VCCR:$pred))),
+ (i32 (InstN $vec, ARMVCCThen, $pred))>;
def : Pat<(i32 (add (i32 (vecreduce_add (VTI.Vec MQPR:$vec))),
(i32 tGPREven:$acc))),
(i32 (InstA $acc, $vec))>;
- def : Pat<(i32 (add (i32 (vecreduce_add (VTI.Vec (vselect (VTI.Pred VCCR:$pred),
- (VTI.Vec MQPR:$vec),
- (VTI.Vec ARMimmAllZerosV))))),
- (i32 tGPREven:$acc))),
- (i32 (InstA $acc, $vec, ARMVCCThen, $pred))>;
+ def : Pat<(i32 (add (i32 (vecreduce_add (VTI.Vec (vselect (VTI.Pred VCCR:$pred),
+ (VTI.Vec MQPR:$vec),
+ (VTI.Vec ARMimmAllZerosV))))),
+ (i32 tGPREven:$acc))),
+ (i32 (InstA $acc, $vec, ARMVCCThen, $pred))>;
def : Pat<(i32 (add (i32 (ARMVADDVu (VTI.Vec MQPR:$vec))),
(i32 tGPREven:$acc))),
(i32 (InstA $acc, $vec))>;
- def : Pat<(i32 (add (i32 (ARMVADDVpu (VTI.Vec MQPR:$vec), (VTI.Pred VCCR:$pred))),
- (i32 tGPREven:$acc))),
- (i32 (InstA $acc, $vec, ARMVCCThen, $pred))>;
+ def : Pat<(i32 (add (i32 (ARMVADDVpu (VTI.Vec MQPR:$vec), (VTI.Pred VCCR:$pred))),
+ (i32 tGPREven:$acc))),
+ (i32 (InstA $acc, $vec, ARMVCCThen, $pred))>;
} else {
def : Pat<(i32 (ARMVADDVs (VTI.Vec MQPR:$vec))),
(i32 (InstN $vec))>;
def : Pat<(i32 (add (i32 (ARMVADDVs (VTI.Vec MQPR:$vec))),
(i32 tGPREven:$acc))),
(i32 (InstA $acc, $vec))>;
- def : Pat<(i32 (ARMVADDVps (VTI.Vec MQPR:$vec), (VTI.Pred VCCR:$pred))),
- (i32 (InstN $vec, ARMVCCThen, $pred))>;
- def : Pat<(i32 (add (i32 (ARMVADDVps (VTI.Vec MQPR:$vec), (VTI.Pred VCCR:$pred))),
- (i32 tGPREven:$acc))),
- (i32 (InstA $acc, $vec, ARMVCCThen, $pred))>;
+ def : Pat<(i32 (ARMVADDVps (VTI.Vec MQPR:$vec), (VTI.Pred VCCR:$pred))),
+ (i32 (InstN $vec, ARMVCCThen, $pred))>;
+ def : Pat<(i32 (add (i32 (ARMVADDVps (VTI.Vec MQPR:$vec), (VTI.Pred VCCR:$pred))),
+ (i32 tGPREven:$acc))),
+ (i32 (InstA $acc, $vec, ARMVCCThen, $pred))>;
}
def : Pat<(i32 (int_arm_mve_addv_predicated (VTI.Vec MQPR:$vec),
@@ -944,14 +944,14 @@ multiclass MVE_VMINMAXV_ty<string iname, bit isMin, string intrBaseName> {
defm u32: MVE_VMINMAXV_p<iname, 1, isMin, MVE_v4u32, intrBaseName>;
}
-def SDTVecReduceR : SDTypeProfile<1, 2, [ // Reduction of an integer and vector into an integer
- SDTCisInt<0>, SDTCisInt<1>, SDTCisVec<2>
-]>;
-def ARMVMINVu : SDNode<"ARMISD::VMINVu", SDTVecReduceR>;
-def ARMVMINVs : SDNode<"ARMISD::VMINVs", SDTVecReduceR>;
-def ARMVMAXVu : SDNode<"ARMISD::VMAXVu", SDTVecReduceR>;
-def ARMVMAXVs : SDNode<"ARMISD::VMAXVs", SDTVecReduceR>;
-
+def SDTVecReduceR : SDTypeProfile<1, 2, [ // Reduction of an integer and vector into an integer
+ SDTCisInt<0>, SDTCisInt<1>, SDTCisVec<2>
+]>;
+def ARMVMINVu : SDNode<"ARMISD::VMINVu", SDTVecReduceR>;
+def ARMVMINVs : SDNode<"ARMISD::VMINVs", SDTVecReduceR>;
+def ARMVMAXVu : SDNode<"ARMISD::VMAXVu", SDTVecReduceR>;
+def ARMVMAXVs : SDNode<"ARMISD::VMAXVs", SDTVecReduceR>;
+
defm MVE_VMINV : MVE_VMINMAXV_ty<"vminv", 1, "int_arm_mve_minv">;
defm MVE_VMAXV : MVE_VMINMAXV_ty<"vmaxv", 0, "int_arm_mve_maxv">;
@@ -982,32 +982,32 @@ let Predicates = [HasMVEInt] in {
def : Pat<(i32 (vecreduce_umin (v4i32 MQPR:$src))),
(i32 (MVE_VMINVu32 (t2MOVi (i32 4294967295)), $src))>;
- def : Pat<(i32 (ARMVMINVu (i32 rGPR:$x), (v16i8 MQPR:$src))),
- (i32 (MVE_VMINVu8 $x, $src))>;
- def : Pat<(i32 (ARMVMINVu (i32 rGPR:$x), (v8i16 MQPR:$src))),
- (i32 (MVE_VMINVu16 $x, $src))>;
- def : Pat<(i32 (ARMVMINVu (i32 rGPR:$x), (v4i32 MQPR:$src))),
- (i32 (MVE_VMINVu32 $x, $src))>;
- def : Pat<(i32 (ARMVMINVs (i32 rGPR:$x), (v16i8 MQPR:$src))),
- (i32 (MVE_VMINVs8 $x, $src))>;
- def : Pat<(i32 (ARMVMINVs (i32 rGPR:$x), (v8i16 MQPR:$src))),
- (i32 (MVE_VMINVs16 $x, $src))>;
- def : Pat<(i32 (ARMVMINVs (i32 rGPR:$x), (v4i32 MQPR:$src))),
- (i32 (MVE_VMINVs32 $x, $src))>;
-
- def : Pat<(i32 (ARMVMAXVu (i32 rGPR:$x), (v16i8 MQPR:$src))),
- (i32 (MVE_VMAXVu8 $x, $src))>;
- def : Pat<(i32 (ARMVMAXVu (i32 rGPR:$x), (v8i16 MQPR:$src))),
- (i32 (MVE_VMAXVu16 $x, $src))>;
- def : Pat<(i32 (ARMVMAXVu (i32 rGPR:$x), (v4i32 MQPR:$src))),
- (i32 (MVE_VMAXVu32 $x, $src))>;
- def : Pat<(i32 (ARMVMAXVs (i32 rGPR:$x), (v16i8 MQPR:$src))),
- (i32 (MVE_VMAXVs8 $x, $src))>;
- def : Pat<(i32 (ARMVMAXVs (i32 rGPR:$x), (v8i16 MQPR:$src))),
- (i32 (MVE_VMAXVs16 $x, $src))>;
- def : Pat<(i32 (ARMVMAXVs (i32 rGPR:$x), (v4i32 MQPR:$src))),
- (i32 (MVE_VMAXVs32 $x, $src))>;
-
+ def : Pat<(i32 (ARMVMINVu (i32 rGPR:$x), (v16i8 MQPR:$src))),
+ (i32 (MVE_VMINVu8 $x, $src))>;
+ def : Pat<(i32 (ARMVMINVu (i32 rGPR:$x), (v8i16 MQPR:$src))),
+ (i32 (MVE_VMINVu16 $x, $src))>;
+ def : Pat<(i32 (ARMVMINVu (i32 rGPR:$x), (v4i32 MQPR:$src))),
+ (i32 (MVE_VMINVu32 $x, $src))>;
+ def : Pat<(i32 (ARMVMINVs (i32 rGPR:$x), (v16i8 MQPR:$src))),
+ (i32 (MVE_VMINVs8 $x, $src))>;
+ def : Pat<(i32 (ARMVMINVs (i32 rGPR:$x), (v8i16 MQPR:$src))),
+ (i32 (MVE_VMINVs16 $x, $src))>;
+ def : Pat<(i32 (ARMVMINVs (i32 rGPR:$x), (v4i32 MQPR:$src))),
+ (i32 (MVE_VMINVs32 $x, $src))>;
+
+ def : Pat<(i32 (ARMVMAXVu (i32 rGPR:$x), (v16i8 MQPR:$src))),
+ (i32 (MVE_VMAXVu8 $x, $src))>;
+ def : Pat<(i32 (ARMVMAXVu (i32 rGPR:$x), (v8i16 MQPR:$src))),
+ (i32 (MVE_VMAXVu16 $x, $src))>;
+ def : Pat<(i32 (ARMVMAXVu (i32 rGPR:$x), (v4i32 MQPR:$src))),
+ (i32 (MVE_VMAXVu32 $x, $src))>;
+ def : Pat<(i32 (ARMVMAXVs (i32 rGPR:$x), (v16i8 MQPR:$src))),
+ (i32 (MVE_VMAXVs8 $x, $src))>;
+ def : Pat<(i32 (ARMVMAXVs (i32 rGPR:$x), (v8i16 MQPR:$src))),
+ (i32 (MVE_VMAXVs16 $x, $src))>;
+ def : Pat<(i32 (ARMVMAXVs (i32 rGPR:$x), (v4i32 MQPR:$src))),
+ (i32 (MVE_VMAXVs32 $x, $src))>;
+
}
multiclass MVE_VMINMAXAV_ty<string iname, bit isMin, string intrBaseName> {
@@ -1139,28 +1139,28 @@ def SDTVecReduce2LA : SDTypeProfile<2, 4, [ // VMLALVA
SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2>, SDTCisInt<3>,
SDTCisVec<4>, SDTCisVec<5>
]>;
-def SDTVecReduce2P : SDTypeProfile<1, 3, [ // VMLAV
- SDTCisInt<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>
-]>;
-def SDTVecReduce2LP : SDTypeProfile<2, 3, [ // VMLALV
- SDTCisInt<0>, SDTCisInt<1>, SDTCisVec<2>, SDTCisVec<3>, SDTCisVec<4>
-]>;
-def SDTVecReduce2LAP : SDTypeProfile<2, 5, [ // VMLALVA
- SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2>, SDTCisInt<3>,
- SDTCisVec<4>, SDTCisVec<5>, SDTCisVec<6>
-]>;
+def SDTVecReduce2P : SDTypeProfile<1, 3, [ // VMLAV
+ SDTCisInt<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>
+]>;
+def SDTVecReduce2LP : SDTypeProfile<2, 3, [ // VMLALV
+ SDTCisInt<0>, SDTCisInt<1>, SDTCisVec<2>, SDTCisVec<3>, SDTCisVec<4>
+]>;
+def SDTVecReduce2LAP : SDTypeProfile<2, 5, [ // VMLALVA
+ SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2>, SDTCisInt<3>,
+ SDTCisVec<4>, SDTCisVec<5>, SDTCisVec<6>
+]>;
def ARMVMLAVs : SDNode<"ARMISD::VMLAVs", SDTVecReduce2>;
def ARMVMLAVu : SDNode<"ARMISD::VMLAVu", SDTVecReduce2>;
def ARMVMLALVs : SDNode<"ARMISD::VMLALVs", SDTVecReduce2L>;
def ARMVMLALVu : SDNode<"ARMISD::VMLALVu", SDTVecReduce2L>;
-def ARMVMLALVAs : SDNode<"ARMISD::VMLALVAs", SDTVecReduce2LA>;
-def ARMVMLALVAu : SDNode<"ARMISD::VMLALVAu", SDTVecReduce2LA>;
-def ARMVMLAVps : SDNode<"ARMISD::VMLAVps", SDTVecReduce2P>;
-def ARMVMLAVpu : SDNode<"ARMISD::VMLAVpu", SDTVecReduce2P>;
-def ARMVMLALVps : SDNode<"ARMISD::VMLALVps", SDTVecReduce2LP>;
-def ARMVMLALVpu : SDNode<"ARMISD::VMLALVpu", SDTVecReduce2LP>;
-def ARMVMLALVAps : SDNode<"ARMISD::VMLALVAps", SDTVecReduce2LAP>;
-def ARMVMLALVApu : SDNode<"ARMISD::VMLALVApu", SDTVecReduce2LAP>;
+def ARMVMLALVAs : SDNode<"ARMISD::VMLALVAs", SDTVecReduce2LA>;
+def ARMVMLALVAu : SDNode<"ARMISD::VMLALVAu", SDTVecReduce2LA>;
+def ARMVMLAVps : SDNode<"ARMISD::VMLAVps", SDTVecReduce2P>;
+def ARMVMLAVpu : SDNode<"ARMISD::VMLAVpu", SDTVecReduce2P>;
+def ARMVMLALVps : SDNode<"ARMISD::VMLALVps", SDTVecReduce2LP>;
+def ARMVMLALVpu : SDNode<"ARMISD::VMLALVpu", SDTVecReduce2LP>;
+def ARMVMLALVAps : SDNode<"ARMISD::VMLALVAps", SDTVecReduce2LAP>;
+def ARMVMLALVApu : SDNode<"ARMISD::VMLALVApu", SDTVecReduce2LAP>;
let Predicates = [HasMVEInt] in {
def : Pat<(i32 (vecreduce_add (mul (v4i32 MQPR:$src1), (v4i32 MQPR:$src2)))),
@@ -1179,68 +1179,68 @@ let Predicates = [HasMVEInt] in {
(i32 (MVE_VMLADAVu8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
def : Pat<(i32 (add (i32 (vecreduce_add (mul (v4i32 MQPR:$src1), (v4i32 MQPR:$src2)))),
- (i32 tGPREven:$src3))),
+ (i32 tGPREven:$src3))),
(i32 (MVE_VMLADAVau32 $src3, $src1, $src2))>;
def : Pat<(i32 (add (i32 (vecreduce_add (mul (v8i16 MQPR:$src1), (v8i16 MQPR:$src2)))),
- (i32 tGPREven:$src3))),
+ (i32 tGPREven:$src3))),
(i32 (MVE_VMLADAVau16 $src3, $src1, $src2))>;
def : Pat<(i32 (add (ARMVMLAVs (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)), tGPREven:$Rd)),
(i32 (MVE_VMLADAVas16 tGPREven:$Rd, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
def : Pat<(i32 (add (ARMVMLAVu (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)), tGPREven:$Rd)),
(i32 (MVE_VMLADAVau16 tGPREven:$Rd, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
def : Pat<(i32 (add (i32 (vecreduce_add (mul (v16i8 MQPR:$src1), (v16i8 MQPR:$src2)))),
- (i32 tGPREven:$src3))),
+ (i32 tGPREven:$src3))),
(i32 (MVE_VMLADAVau8 $src3, $src1, $src2))>;
def : Pat<(i32 (add (ARMVMLAVs (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)), tGPREven:$Rd)),
(i32 (MVE_VMLADAVas8 tGPREven:$Rd, (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
def : Pat<(i32 (add (ARMVMLAVu (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)), tGPREven:$Rd)),
(i32 (MVE_VMLADAVau8 tGPREven:$Rd, (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
-
- // Predicated
- def : Pat<(i32 (vecreduce_add (vselect (v4i1 VCCR:$pred),
- (mul (v4i32 MQPR:$src1), (v4i32 MQPR:$src2)),
- (v4i32 ARMimmAllZerosV)))),
- (i32 (MVE_VMLADAVu32 $src1, $src2, ARMVCCThen, $pred))>;
- def : Pat<(i32 (vecreduce_add (vselect (v8i1 VCCR:$pred),
- (mul (v8i16 MQPR:$src1), (v8i16 MQPR:$src2)),
- (v8i16 ARMimmAllZerosV)))),
- (i32 (MVE_VMLADAVu16 $src1, $src2, ARMVCCThen, $pred))>;
- def : Pat<(i32 (ARMVMLAVps (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred))),
- (i32 (MVE_VMLADAVs16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred))>;
- def : Pat<(i32 (ARMVMLAVpu (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred))),
- (i32 (MVE_VMLADAVu16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred))>;
- def : Pat<(i32 (vecreduce_add (vselect (v16i1 VCCR:$pred),
- (mul (v16i8 MQPR:$src1), (v16i8 MQPR:$src2)),
- (v16i8 ARMimmAllZerosV)))),
- (i32 (MVE_VMLADAVu8 $src1, $src2, ARMVCCThen, $pred))>;
- def : Pat<(i32 (ARMVMLAVps (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), (v16i1 VCCR:$pred))),
- (i32 (MVE_VMLADAVs8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), ARMVCCThen, $pred))>;
- def : Pat<(i32 (ARMVMLAVpu (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), (v16i1 VCCR:$pred))),
- (i32 (MVE_VMLADAVu8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), ARMVCCThen, $pred))>;
-
- def : Pat<(i32 (add (i32 (vecreduce_add (vselect (v4i1 VCCR:$pred),
- (mul (v4i32 MQPR:$src1), (v4i32 MQPR:$src2)),
- (v4i32 ARMimmAllZerosV)))),
- (i32 tGPREven:$src3))),
- (i32 (MVE_VMLADAVau32 $src3, $src1, $src2, ARMVCCThen, $pred))>;
- def : Pat<(i32 (add (i32 (vecreduce_add (vselect (v8i1 VCCR:$pred),
- (mul (v8i16 MQPR:$src1), (v8i16 MQPR:$src2)),
- (v8i16 ARMimmAllZerosV)))),
- (i32 tGPREven:$src3))),
- (i32 (MVE_VMLADAVau16 $src3, $src1, $src2, ARMVCCThen, $pred))>;
- def : Pat<(i32 (add (ARMVMLAVps (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred)), tGPREven:$Rd)),
- (i32 (MVE_VMLADAVas16 tGPREven:$Rd, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred))>;
- def : Pat<(i32 (add (ARMVMLAVpu (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred)), tGPREven:$Rd)),
- (i32 (MVE_VMLADAVau16 tGPREven:$Rd, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred))>;
- def : Pat<(i32 (add (i32 (vecreduce_add (vselect (v16i1 VCCR:$pred),
- (mul (v16i8 MQPR:$src1), (v16i8 MQPR:$src2)),
- (v16i8 ARMimmAllZerosV)))),
- (i32 tGPREven:$src3))),
- (i32 (MVE_VMLADAVau8 $src3, $src1, $src2, ARMVCCThen, $pred))>;
- def : Pat<(i32 (add (ARMVMLAVps (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), (v16i1 VCCR:$pred)), tGPREven:$Rd)),
- (i32 (MVE_VMLADAVas8 tGPREven:$Rd, (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), ARMVCCThen, $pred))>;
- def : Pat<(i32 (add (ARMVMLAVpu (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), (v16i1 VCCR:$pred)), tGPREven:$Rd)),
- (i32 (MVE_VMLADAVau8 tGPREven:$Rd, (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), ARMVCCThen, $pred))>;
+
+ // Predicated
+ def : Pat<(i32 (vecreduce_add (vselect (v4i1 VCCR:$pred),
+ (mul (v4i32 MQPR:$src1), (v4i32 MQPR:$src2)),
+ (v4i32 ARMimmAllZerosV)))),
+ (i32 (MVE_VMLADAVu32 $src1, $src2, ARMVCCThen, $pred))>;
+ def : Pat<(i32 (vecreduce_add (vselect (v8i1 VCCR:$pred),
+ (mul (v8i16 MQPR:$src1), (v8i16 MQPR:$src2)),
+ (v8i16 ARMimmAllZerosV)))),
+ (i32 (MVE_VMLADAVu16 $src1, $src2, ARMVCCThen, $pred))>;
+ def : Pat<(i32 (ARMVMLAVps (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred))),
+ (i32 (MVE_VMLADAVs16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred))>;
+ def : Pat<(i32 (ARMVMLAVpu (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred))),
+ (i32 (MVE_VMLADAVu16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred))>;
+ def : Pat<(i32 (vecreduce_add (vselect (v16i1 VCCR:$pred),
+ (mul (v16i8 MQPR:$src1), (v16i8 MQPR:$src2)),
+ (v16i8 ARMimmAllZerosV)))),
+ (i32 (MVE_VMLADAVu8 $src1, $src2, ARMVCCThen, $pred))>;
+ def : Pat<(i32 (ARMVMLAVps (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), (v16i1 VCCR:$pred))),
+ (i32 (MVE_VMLADAVs8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), ARMVCCThen, $pred))>;
+ def : Pat<(i32 (ARMVMLAVpu (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), (v16i1 VCCR:$pred))),
+ (i32 (MVE_VMLADAVu8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), ARMVCCThen, $pred))>;
+
+ def : Pat<(i32 (add (i32 (vecreduce_add (vselect (v4i1 VCCR:$pred),
+ (mul (v4i32 MQPR:$src1), (v4i32 MQPR:$src2)),
+ (v4i32 ARMimmAllZerosV)))),
+ (i32 tGPREven:$src3))),
+ (i32 (MVE_VMLADAVau32 $src3, $src1, $src2, ARMVCCThen, $pred))>;
+ def : Pat<(i32 (add (i32 (vecreduce_add (vselect (v8i1 VCCR:$pred),
+ (mul (v8i16 MQPR:$src1), (v8i16 MQPR:$src2)),
+ (v8i16 ARMimmAllZerosV)))),
+ (i32 tGPREven:$src3))),
+ (i32 (MVE_VMLADAVau16 $src3, $src1, $src2, ARMVCCThen, $pred))>;
+ def : Pat<(i32 (add (ARMVMLAVps (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred)), tGPREven:$Rd)),
+ (i32 (MVE_VMLADAVas16 tGPREven:$Rd, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred))>;
+ def : Pat<(i32 (add (ARMVMLAVpu (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred)), tGPREven:$Rd)),
+ (i32 (MVE_VMLADAVau16 tGPREven:$Rd, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred))>;
+ def : Pat<(i32 (add (i32 (vecreduce_add (vselect (v16i1 VCCR:$pred),
+ (mul (v16i8 MQPR:$src1), (v16i8 MQPR:$src2)),
+ (v16i8 ARMimmAllZerosV)))),
+ (i32 tGPREven:$src3))),
+ (i32 (MVE_VMLADAVau8 $src3, $src1, $src2, ARMVCCThen, $pred))>;
+ def : Pat<(i32 (add (ARMVMLAVps (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), (v16i1 VCCR:$pred)), tGPREven:$Rd)),
+ (i32 (MVE_VMLADAVas8 tGPREven:$Rd, (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), ARMVCCThen, $pred))>;
+ def : Pat<(i32 (add (ARMVMLAVpu (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), (v16i1 VCCR:$pred)), tGPREven:$Rd)),
+ (i32 (MVE_VMLADAVau8 tGPREven:$Rd, (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), ARMVCCThen, $pred))>;
}
// vmlav aliases vmladav
@@ -1360,25 +1360,25 @@ let Predicates = [HasMVEInt] in {
(MVE_VMLALDAVas16 tGPREven:$Rda, tGPROdd:$Rdb, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))>;
def : Pat<(ARMVMLALVAu tGPREven:$Rda, tGPROdd:$Rdb, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)),
(MVE_VMLALDAVau16 tGPREven:$Rda, tGPROdd:$Rdb, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))>;
-
- // Predicated
- def : Pat<(ARMVMLALVps (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), (v4i1 VCCR:$pred)),
- (MVE_VMLALDAVs32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), ARMVCCThen, $pred)>;
- def : Pat<(ARMVMLALVpu (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), (v4i1 VCCR:$pred)),
- (MVE_VMLALDAVu32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), ARMVCCThen, $pred)>;
- def : Pat<(ARMVMLALVps (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred)),
- (MVE_VMLALDAVs16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred)>;
- def : Pat<(ARMVMLALVpu (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred)),
- (MVE_VMLALDAVu16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred)>;
-
- def : Pat<(ARMVMLALVAps tGPREven:$Rda, tGPROdd:$Rdb, (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), (v4i1 VCCR:$pred)),
- (MVE_VMLALDAVas32 tGPREven:$Rda, tGPROdd:$Rdb, (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), ARMVCCThen, $pred)>;
- def : Pat<(ARMVMLALVApu tGPREven:$Rda, tGPROdd:$Rdb, (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), (v4i1 VCCR:$pred)),
- (MVE_VMLALDAVau32 tGPREven:$Rda, tGPROdd:$Rdb, (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), ARMVCCThen, $pred)>;
- def : Pat<(ARMVMLALVAps tGPREven:$Rda, tGPROdd:$Rdb, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred)),
- (MVE_VMLALDAVas16 tGPREven:$Rda, tGPROdd:$Rdb, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred)>;
- def : Pat<(ARMVMLALVApu tGPREven:$Rda, tGPROdd:$Rdb, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred)),
- (MVE_VMLALDAVau16 tGPREven:$Rda, tGPROdd:$Rdb, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred)>;
+
+ // Predicated
+ def : Pat<(ARMVMLALVps (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), (v4i1 VCCR:$pred)),
+ (MVE_VMLALDAVs32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), ARMVCCThen, $pred)>;
+ def : Pat<(ARMVMLALVpu (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), (v4i1 VCCR:$pred)),
+ (MVE_VMLALDAVu32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), ARMVCCThen, $pred)>;
+ def : Pat<(ARMVMLALVps (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred)),
+ (MVE_VMLALDAVs16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred)>;
+ def : Pat<(ARMVMLALVpu (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred)),
+ (MVE_VMLALDAVu16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred)>;
+
+ def : Pat<(ARMVMLALVAps tGPREven:$Rda, tGPROdd:$Rdb, (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), (v4i1 VCCR:$pred)),
+ (MVE_VMLALDAVas32 tGPREven:$Rda, tGPROdd:$Rdb, (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), ARMVCCThen, $pred)>;
+ def : Pat<(ARMVMLALVApu tGPREven:$Rda, tGPROdd:$Rdb, (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), (v4i1 VCCR:$pred)),
+ (MVE_VMLALDAVau32 tGPREven:$Rda, tGPROdd:$Rdb, (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), ARMVCCThen, $pred)>;
+ def : Pat<(ARMVMLALVAps tGPREven:$Rda, tGPROdd:$Rdb, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred)),
+ (MVE_VMLALDAVas16 tGPREven:$Rda, tGPROdd:$Rdb, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred)>;
+ def : Pat<(ARMVMLALVApu tGPREven:$Rda, tGPROdd:$Rdb, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred)),
+ (MVE_VMLALDAVau16 tGPREven:$Rda, tGPROdd:$Rdb, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred)>;
}
// vmlalv aliases vmlaldav
@@ -1426,7 +1426,7 @@ class MVE_comp<InstrItinClass itin, string iname, string suffix,
}
class MVE_VMINMAXNM<string iname, string suffix, bit sz, bit bit_21,
- list<dag> pattern=[]>
+ list<dag> pattern=[]>
: MVE_comp<NoItinerary, iname, suffix, "", pattern> {
let Inst{28} = 0b1;
@@ -1442,18 +1442,18 @@ class MVE_VMINMAXNM<string iname, string suffix, bit sz, bit bit_21,
let Predicates = [HasMVEFloat];
}
-multiclass MVE_VMINMAXNM_m<string iname, bit bit_4, MVEVectorVTInfo VTI, SDNode Op, Intrinsic PredInt> {
- def "" : MVE_VMINMAXNM<iname, VTI.Suffix, VTI.Size{0}, bit_4>;
+multiclass MVE_VMINMAXNM_m<string iname, bit bit_4, MVEVectorVTInfo VTI, SDNode Op, Intrinsic PredInt> {
+ def "" : MVE_VMINMAXNM<iname, VTI.Suffix, VTI.Size{0}, bit_4>;
- let Predicates = [HasMVEFloat] in {
- defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? (i32 0)), !cast<Instruction>(NAME)>;
- }
+ let Predicates = [HasMVEFloat] in {
+ defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? (i32 0)), !cast<Instruction>(NAME)>;
+ }
}
-defm MVE_VMAXNMf32 : MVE_VMINMAXNM_m<"vmaxnm", 0b0, MVE_v4f32, fmaxnum, int_arm_mve_max_predicated>;
-defm MVE_VMAXNMf16 : MVE_VMINMAXNM_m<"vmaxnm", 0b0, MVE_v8f16, fmaxnum, int_arm_mve_max_predicated>;
-defm MVE_VMINNMf32 : MVE_VMINMAXNM_m<"vminnm", 0b1, MVE_v4f32, fminnum, int_arm_mve_min_predicated>;
-defm MVE_VMINNMf16 : MVE_VMINMAXNM_m<"vminnm", 0b1, MVE_v8f16, fminnum, int_arm_mve_min_predicated>;
+defm MVE_VMAXNMf32 : MVE_VMINMAXNM_m<"vmaxnm", 0b0, MVE_v4f32, fmaxnum, int_arm_mve_max_predicated>;
+defm MVE_VMAXNMf16 : MVE_VMINMAXNM_m<"vmaxnm", 0b0, MVE_v8f16, fmaxnum, int_arm_mve_max_predicated>;
+defm MVE_VMINNMf32 : MVE_VMINMAXNM_m<"vminnm", 0b1, MVE_v4f32, fminnum, int_arm_mve_min_predicated>;
+defm MVE_VMINNMf16 : MVE_VMINMAXNM_m<"vminnm", 0b1, MVE_v8f16, fminnum, int_arm_mve_min_predicated>;
class MVE_VMINMAX<string iname, string suffix, bit U, bits<2> size,
@@ -1472,11 +1472,11 @@ class MVE_VMINMAX<string iname, string suffix, bit U, bits<2> size,
}
multiclass MVE_VMINMAX_m<string iname, bit bit_4, MVEVectorVTInfo VTI,
- SDNode Op, Intrinsic PredInt> {
+ SDNode Op, Intrinsic PredInt> {
def "" : MVE_VMINMAX<iname, VTI.Suffix, VTI.Unsigned, VTI.Size, bit_4>;
let Predicates = [HasMVEInt] in {
- defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? (i32 VTI.Unsigned)), !cast<Instruction>(NAME)>;
+ defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? (i32 VTI.Unsigned)), !cast<Instruction>(NAME)>;
}
}
@@ -1649,39 +1649,39 @@ foreach s=["s8", "s16", "s32", "u8", "u16", "u32", "i8", "i16", "i32", "f16", "f
(MVE_VAND MQPR:$QdSrc, MQPR:$QnSrc, MQPR:$QmSrc, vpred_r:$vp)>;
}
-let Predicates = [HasMVEInt] in {
- defm : MVE_TwoOpPattern<MVE_v16i8, and, int_arm_mve_and_predicated, (? ), MVE_VAND, ARMimmAllOnesV>;
- defm : MVE_TwoOpPattern<MVE_v8i16, and, int_arm_mve_and_predicated, (? ), MVE_VAND, ARMimmAllOnesV>;
- defm : MVE_TwoOpPattern<MVE_v4i32, and, int_arm_mve_and_predicated, (? ), MVE_VAND, ARMimmAllOnesV>;
- defm : MVE_TwoOpPattern<MVE_v2i64, and, int_arm_mve_and_predicated, (? ), MVE_VAND, ARMimmAllOnesV>;
-
- defm : MVE_TwoOpPattern<MVE_v16i8, or, int_arm_mve_orr_predicated, (? ), MVE_VORR, ARMimmAllZerosV>;
- defm : MVE_TwoOpPattern<MVE_v8i16, or, int_arm_mve_orr_predicated, (? ), MVE_VORR, ARMimmAllZerosV>;
- defm : MVE_TwoOpPattern<MVE_v4i32, or, int_arm_mve_orr_predicated, (? ), MVE_VORR, ARMimmAllZerosV>;
- defm : MVE_TwoOpPattern<MVE_v2i64, or, int_arm_mve_orr_predicated, (? ), MVE_VORR, ARMimmAllZerosV>;
-
- defm : MVE_TwoOpPattern<MVE_v16i8, xor, int_arm_mve_eor_predicated, (? ), MVE_VEOR, ARMimmAllZerosV>;
- defm : MVE_TwoOpPattern<MVE_v8i16, xor, int_arm_mve_eor_predicated, (? ), MVE_VEOR, ARMimmAllZerosV>;
- defm : MVE_TwoOpPattern<MVE_v4i32, xor, int_arm_mve_eor_predicated, (? ), MVE_VEOR, ARMimmAllZerosV>;
- defm : MVE_TwoOpPattern<MVE_v2i64, xor, int_arm_mve_eor_predicated, (? ), MVE_VEOR, ARMimmAllZerosV>;
-
- defm : MVE_TwoOpPattern<MVE_v16i8, BinOpFrag<(and node:$LHS, (vnotq node:$RHS))>,
- int_arm_mve_bic_predicated, (? ), MVE_VBIC>;
- defm : MVE_TwoOpPattern<MVE_v8i16, BinOpFrag<(and node:$LHS, (vnotq node:$RHS))>,
- int_arm_mve_bic_predicated, (? ), MVE_VBIC>;
- defm : MVE_TwoOpPattern<MVE_v4i32, BinOpFrag<(and node:$LHS, (vnotq node:$RHS))>,
- int_arm_mve_bic_predicated, (? ), MVE_VBIC>;
- defm : MVE_TwoOpPattern<MVE_v2i64, BinOpFrag<(and node:$LHS, (vnotq node:$RHS))>,
- int_arm_mve_bic_predicated, (? ), MVE_VBIC>;
-
- defm : MVE_TwoOpPattern<MVE_v16i8, BinOpFrag<(or node:$LHS, (vnotq node:$RHS))>,
- int_arm_mve_orn_predicated, (? ), MVE_VORN>;
- defm : MVE_TwoOpPattern<MVE_v8i16, BinOpFrag<(or node:$LHS, (vnotq node:$RHS))>,
- int_arm_mve_orn_predicated, (? ), MVE_VORN>;
- defm : MVE_TwoOpPattern<MVE_v4i32, BinOpFrag<(or node:$LHS, (vnotq node:$RHS))>,
- int_arm_mve_orn_predicated, (? ), MVE_VORN>;
- defm : MVE_TwoOpPattern<MVE_v2i64, BinOpFrag<(or node:$LHS, (vnotq node:$RHS))>,
- int_arm_mve_orn_predicated, (? ), MVE_VORN>;
+let Predicates = [HasMVEInt] in {
+ defm : MVE_TwoOpPattern<MVE_v16i8, and, int_arm_mve_and_predicated, (? ), MVE_VAND, ARMimmAllOnesV>;
+ defm : MVE_TwoOpPattern<MVE_v8i16, and, int_arm_mve_and_predicated, (? ), MVE_VAND, ARMimmAllOnesV>;
+ defm : MVE_TwoOpPattern<MVE_v4i32, and, int_arm_mve_and_predicated, (? ), MVE_VAND, ARMimmAllOnesV>;
+ defm : MVE_TwoOpPattern<MVE_v2i64, and, int_arm_mve_and_predicated, (? ), MVE_VAND, ARMimmAllOnesV>;
+
+ defm : MVE_TwoOpPattern<MVE_v16i8, or, int_arm_mve_orr_predicated, (? ), MVE_VORR, ARMimmAllZerosV>;
+ defm : MVE_TwoOpPattern<MVE_v8i16, or, int_arm_mve_orr_predicated, (? ), MVE_VORR, ARMimmAllZerosV>;
+ defm : MVE_TwoOpPattern<MVE_v4i32, or, int_arm_mve_orr_predicated, (? ), MVE_VORR, ARMimmAllZerosV>;
+ defm : MVE_TwoOpPattern<MVE_v2i64, or, int_arm_mve_orr_predicated, (? ), MVE_VORR, ARMimmAllZerosV>;
+
+ defm : MVE_TwoOpPattern<MVE_v16i8, xor, int_arm_mve_eor_predicated, (? ), MVE_VEOR, ARMimmAllZerosV>;
+ defm : MVE_TwoOpPattern<MVE_v8i16, xor, int_arm_mve_eor_predicated, (? ), MVE_VEOR, ARMimmAllZerosV>;
+ defm : MVE_TwoOpPattern<MVE_v4i32, xor, int_arm_mve_eor_predicated, (? ), MVE_VEOR, ARMimmAllZerosV>;
+ defm : MVE_TwoOpPattern<MVE_v2i64, xor, int_arm_mve_eor_predicated, (? ), MVE_VEOR, ARMimmAllZerosV>;
+
+ defm : MVE_TwoOpPattern<MVE_v16i8, BinOpFrag<(and node:$LHS, (vnotq node:$RHS))>,
+ int_arm_mve_bic_predicated, (? ), MVE_VBIC>;
+ defm : MVE_TwoOpPattern<MVE_v8i16, BinOpFrag<(and node:$LHS, (vnotq node:$RHS))>,
+ int_arm_mve_bic_predicated, (? ), MVE_VBIC>;
+ defm : MVE_TwoOpPattern<MVE_v4i32, BinOpFrag<(and node:$LHS, (vnotq node:$RHS))>,
+ int_arm_mve_bic_predicated, (? ), MVE_VBIC>;
+ defm : MVE_TwoOpPattern<MVE_v2i64, BinOpFrag<(and node:$LHS, (vnotq node:$RHS))>,
+ int_arm_mve_bic_predicated, (? ), MVE_VBIC>;
+
+ defm : MVE_TwoOpPattern<MVE_v16i8, BinOpFrag<(or node:$LHS, (vnotq node:$RHS))>,
+ int_arm_mve_orn_predicated, (? ), MVE_VORN>;
+ defm : MVE_TwoOpPattern<MVE_v8i16, BinOpFrag<(or node:$LHS, (vnotq node:$RHS))>,
+ int_arm_mve_orn_predicated, (? ), MVE_VORN>;
+ defm : MVE_TwoOpPattern<MVE_v4i32, BinOpFrag<(or node:$LHS, (vnotq node:$RHS))>,
+ int_arm_mve_orn_predicated, (? ), MVE_VORN>;
+ defm : MVE_TwoOpPattern<MVE_v2i64, BinOpFrag<(or node:$LHS, (vnotq node:$RHS))>,
+ int_arm_mve_orn_predicated, (? ), MVE_VORN>;
}
class MVE_bit_cmode<string iname, string suffix, bit halfword, dag inOps>
@@ -1718,8 +1718,8 @@ multiclass MVE_bit_cmode_p<string iname, bit opcode,
defvar UnpredPat = (VTI.Vec (op (VTI.Vec MQPR:$src), timm:$simm));
let Predicates = [HasMVEInt] in {
- def : Pat<UnpredPat,
- (VTI.Vec (Inst (VTI.Vec MQPR:$src), imm_type:$simm))>;
+ def : Pat<UnpredPat,
+ (VTI.Vec (Inst (VTI.Vec MQPR:$src), imm_type:$simm))>;
def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
UnpredPat, (VTI.Vec MQPR:$src))),
(VTI.Vec (Inst (VTI.Vec MQPR:$src), imm_type:$simm,
@@ -1929,18 +1929,18 @@ class MVE_VMULt1<string iname, string suffix, bits<2> size,
let validForTailPredication = 1;
}
-multiclass MVE_VMUL_m<MVEVectorVTInfo VTI> {
- def "" : MVE_VMULt1<"vmul", VTI.Suffix, VTI.Size>;
+multiclass MVE_VMUL_m<MVEVectorVTInfo VTI> {
+ def "" : MVE_VMULt1<"vmul", VTI.Suffix, VTI.Size>;
let Predicates = [HasMVEInt] in {
- defm : MVE_TwoOpPattern<VTI, mul, int_arm_mve_mul_predicated, (? ),
- !cast<Instruction>(NAME), ARMimmOneV>;
+ defm : MVE_TwoOpPattern<VTI, mul, int_arm_mve_mul_predicated, (? ),
+ !cast<Instruction>(NAME), ARMimmOneV>;
}
}
-defm MVE_VMULi8 : MVE_VMUL_m<MVE_v16i8>;
-defm MVE_VMULi16 : MVE_VMUL_m<MVE_v8i16>;
-defm MVE_VMULi32 : MVE_VMUL_m<MVE_v4i32>;
+defm MVE_VMULi8 : MVE_VMUL_m<MVE_v16i8>;
+defm MVE_VMULi16 : MVE_VMUL_m<MVE_v8i16>;
+defm MVE_VMULi32 : MVE_VMUL_m<MVE_v4i32>;
class MVE_VQxDMULH_Base<string iname, string suffix, bits<2> size, bit rounding,
list<dag> pattern=[]>
@@ -1952,30 +1952,30 @@ class MVE_VQxDMULH_Base<string iname, string suffix, bits<2> size, bit rounding,
let Inst{12-8} = 0b01011;
let Inst{4} = 0b0;
let Inst{0} = 0b0;
- let validForTailPredication = 1;
+ let validForTailPredication = 1;
}
-def MVEvqdmulh : SDNode<"ARMISD::VQDMULH", SDTIntBinOp>;
-
+def MVEvqdmulh : SDNode<"ARMISD::VQDMULH", SDTIntBinOp>;
+
multiclass MVE_VQxDMULH_m<string iname, MVEVectorVTInfo VTI,
- SDNode Op, Intrinsic unpred_int, Intrinsic pred_int,
+ SDNode Op, Intrinsic unpred_int, Intrinsic pred_int,
bit rounding> {
def "" : MVE_VQxDMULH_Base<iname, VTI.Suffix, VTI.Size, rounding>;
defvar Inst = !cast<Instruction>(NAME);
let Predicates = [HasMVEInt] in {
- defm : MVE_TwoOpPattern<VTI, Op, pred_int, (? ), Inst>;
-
- // Extra unpredicated multiply intrinsic patterns
- def : Pat<(VTI.Vec (unpred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))),
+ defm : MVE_TwoOpPattern<VTI, Op, pred_int, (? ), Inst>;
+
+ // Extra unpredicated multiply intrinsic patterns
+ def : Pat<(VTI.Vec (unpred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))),
(VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
}
}
multiclass MVE_VQxDMULH<string iname, MVEVectorVTInfo VTI, bit rounding>
- : MVE_VQxDMULH_m<iname, VTI, !if(rounding, null_frag,
- MVEvqdmulh),
- !if(rounding, int_arm_mve_vqrdmulh,
+ : MVE_VQxDMULH_m<iname, VTI, !if(rounding, null_frag,
+ MVEvqdmulh),
+ !if(rounding, int_arm_mve_vqrdmulh,
int_arm_mve_vqdmulh),
!if(rounding, int_arm_mve_qrdmulh_predicated,
int_arm_mve_qdmulh_predicated),
@@ -2003,12 +2003,12 @@ class MVE_VADDSUB<string iname, string suffix, bits<2> size, bit subtract,
}
multiclass MVE_VADDSUB_m<string iname, MVEVectorVTInfo VTI, bit subtract,
- SDNode Op, Intrinsic PredInt> {
+ SDNode Op, Intrinsic PredInt> {
def "" : MVE_VADDSUB<iname, VTI.Suffix, VTI.Size, subtract>;
defvar Inst = !cast<Instruction>(NAME);
let Predicates = [HasMVEInt] in {
- defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? ), !cast<Instruction>(NAME), ARMimmAllZerosV>;
+ defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? ), !cast<Instruction>(NAME), ARMimmAllZerosV>;
}
}
@@ -2046,13 +2046,13 @@ class MVE_VQSUB_<string suffix, bit U, bits<2> size>
: MVE_VQADDSUB<"vqsub", suffix, U, 0b1, size>;
multiclass MVE_VQADD_m<MVEVectorVTInfo VTI,
- SDNode Op, Intrinsic PredInt> {
+ SDNode Op, Intrinsic PredInt> {
def "" : MVE_VQADD_<VTI.Suffix, VTI.Unsigned, VTI.Size>;
defvar Inst = !cast<Instruction>(NAME);
let Predicates = [HasMVEInt] in {
- defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? (i32 VTI.Unsigned)),
- !cast<Instruction>(NAME)>;
+ defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? (i32 VTI.Unsigned)),
+ !cast<Instruction>(NAME)>;
}
}
@@ -2067,13 +2067,13 @@ defm MVE_VQADDu16 : MVE_VQADD<MVE_v8u16, uaddsat>;
defm MVE_VQADDu32 : MVE_VQADD<MVE_v4u32, uaddsat>;
multiclass MVE_VQSUB_m<MVEVectorVTInfo VTI,
- SDNode Op, Intrinsic PredInt> {
+ SDNode Op, Intrinsic PredInt> {
def "" : MVE_VQSUB_<VTI.Suffix, VTI.Unsigned, VTI.Size>;
defvar Inst = !cast<Instruction>(NAME);
let Predicates = [HasMVEInt] in {
- defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? (i32 VTI.Unsigned)),
- !cast<Instruction>(NAME)>;
+ defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? (i32 VTI.Unsigned)),
+ !cast<Instruction>(NAME)>;
}
}
@@ -2199,32 +2199,32 @@ defm MVE_VRHADDu32 : MVE_VRHADD<MVE_v4u32>;
// modelling that here with these patterns, but we're using no wrap forms of
// add to ensure that the extra bit of information is not needed for the
// arithmetic or the rounding.
-let Predicates = [HasMVEInt] in {
- def : Pat<(v16i8 (ARMvshrsImm (addnsw (addnsw (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn)),
- (v16i8 (ARMvmovImm (i32 3585)))),
- (i32 1))),
- (MVE_VRHADDs8 MQPR:$Qm, MQPR:$Qn)>;
- def : Pat<(v8i16 (ARMvshrsImm (addnsw (addnsw (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn)),
- (v8i16 (ARMvmovImm (i32 2049)))),
- (i32 1))),
- (MVE_VRHADDs16 MQPR:$Qm, MQPR:$Qn)>;
- def : Pat<(v4i32 (ARMvshrsImm (addnsw (addnsw (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn)),
- (v4i32 (ARMvmovImm (i32 1)))),
- (i32 1))),
- (MVE_VRHADDs32 MQPR:$Qm, MQPR:$Qn)>;
- def : Pat<(v16i8 (ARMvshruImm (addnuw (addnuw (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn)),
- (v16i8 (ARMvmovImm (i32 3585)))),
- (i32 1))),
- (MVE_VRHADDu8 MQPR:$Qm, MQPR:$Qn)>;
- def : Pat<(v8i16 (ARMvshruImm (addnuw (addnuw (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn)),
- (v8i16 (ARMvmovImm (i32 2049)))),
- (i32 1))),
- (MVE_VRHADDu16 MQPR:$Qm, MQPR:$Qn)>;
- def : Pat<(v4i32 (ARMvshruImm (addnuw (addnuw (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn)),
- (v4i32 (ARMvmovImm (i32 1)))),
- (i32 1))),
- (MVE_VRHADDu32 MQPR:$Qm, MQPR:$Qn)>;
-}
+let Predicates = [HasMVEInt] in {
+ def : Pat<(v16i8 (ARMvshrsImm (addnsw (addnsw (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn)),
+ (v16i8 (ARMvmovImm (i32 3585)))),
+ (i32 1))),
+ (MVE_VRHADDs8 MQPR:$Qm, MQPR:$Qn)>;
+ def : Pat<(v8i16 (ARMvshrsImm (addnsw (addnsw (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn)),
+ (v8i16 (ARMvmovImm (i32 2049)))),
+ (i32 1))),
+ (MVE_VRHADDs16 MQPR:$Qm, MQPR:$Qn)>;
+ def : Pat<(v4i32 (ARMvshrsImm (addnsw (addnsw (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn)),
+ (v4i32 (ARMvmovImm (i32 1)))),
+ (i32 1))),
+ (MVE_VRHADDs32 MQPR:$Qm, MQPR:$Qn)>;
+ def : Pat<(v16i8 (ARMvshruImm (addnuw (addnuw (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn)),
+ (v16i8 (ARMvmovImm (i32 3585)))),
+ (i32 1))),
+ (MVE_VRHADDu8 MQPR:$Qm, MQPR:$Qn)>;
+ def : Pat<(v8i16 (ARMvshruImm (addnuw (addnuw (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn)),
+ (v8i16 (ARMvmovImm (i32 2049)))),
+ (i32 1))),
+ (MVE_VRHADDu16 MQPR:$Qm, MQPR:$Qn)>;
+ def : Pat<(v4i32 (ARMvshruImm (addnuw (addnuw (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn)),
+ (v4i32 (ARMvmovImm (i32 1)))),
+ (i32 1))),
+ (MVE_VRHADDu32 MQPR:$Qm, MQPR:$Qn)>;
+}
class MVE_VHADDSUB<string iname, string suffix, bit U, bit subtract,
@@ -2473,9 +2473,9 @@ multiclass MVE_VABSNEG_int_m<string iname, bit negate, bit saturate,
let Predicates = [HasMVEInt] in {
// VQABS and VQNEG have more difficult isel patterns defined elsewhere
- if !not(saturate) then {
- def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$v))),
- (VTI.Vec (Inst $v))>;
+ if !not(saturate) then {
+ def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$v))),
+ (VTI.Vec (Inst $v))>;
}
def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$v), (VTI.Pred VCCR:$mask),
@@ -3032,7 +3032,7 @@ multiclass MVE_VSHRN_patterns<MVE_shift_imm_partial inst,
defvar outparams = (inst (OutVTI.Vec MQPR:$QdSrc), (InVTI.Vec MQPR:$Qm),
(imm:$imm));
- def : Pat<(OutVTI.Vec !setdagop(inparams, int_arm_mve_vshrn)),
+ def : Pat<(OutVTI.Vec !setdagop(inparams, int_arm_mve_vshrn)),
(OutVTI.Vec outparams)>;
def : Pat<(OutVTI.Vec !con(inparams, (int_arm_mve_vshrn_predicated
(InVTI.Pred VCCR:$pred)))),
@@ -3234,7 +3234,7 @@ multiclass MVE_VSxI_patterns<MVE_VSxI_imm inst, string name,
defvar unpred_int = !cast<Intrinsic>("int_arm_mve_" # name);
defvar pred_int = !cast<Intrinsic>("int_arm_mve_" # name # "_predicated");
- def : Pat<(VTI.Vec !setdagop(inparams, unpred_int)),
+ def : Pat<(VTI.Vec !setdagop(inparams, unpred_int)),
(VTI.Vec outparams)>;
def : Pat<(VTI.Vec !con(inparams, (pred_int (VTI.Pred VCCR:$pred)))),
(VTI.Vec !con(outparams, (? ARMVCCThen, VCCR:$pred)))>;
@@ -3586,12 +3586,12 @@ class MVE_VMUL_fp<string iname, string suffix, bit size, list<dag> pattern=[]>
}
multiclass MVE_VMULT_fp_m<string iname, bit bit_21, MVEVectorVTInfo VTI,
- SDNode Op, Intrinsic PredInt> {
+ SDNode Op, Intrinsic PredInt> {
def "" : MVE_VMUL_fp<iname, VTI.Suffix, VTI.Size{0}>;
defvar Inst = !cast<Instruction>(NAME);
let Predicates = [HasMVEFloat] in {
- defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? ), !cast<Instruction>(NAME)>;
+ defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? ), !cast<Instruction>(NAME)>;
}
}
@@ -3682,23 +3682,23 @@ multiclass MVE_VFMA_fp_multi<string iname, bit fms, MVEVectorVTInfo VTI> {
let Predicates = [HasMVEFloat] in {
if fms then {
- def : Pat<(VTI.Vec (fma (fneg m1), m2, add)),
- (Inst $add, $m1, $m2)>;
- def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
- (VTI.Vec (fma (fneg m1), m2, add)),
- add)),
- (Inst $add, $m1, $m2, ARMVCCThen, $pred)>;
+ def : Pat<(VTI.Vec (fma (fneg m1), m2, add)),
+ (Inst $add, $m1, $m2)>;
+ def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
+ (VTI.Vec (fma (fneg m1), m2, add)),
+ add)),
+ (Inst $add, $m1, $m2, ARMVCCThen, $pred)>;
def : Pat<(VTI.Vec (pred_int (fneg m1), m2, add, pred)),
(Inst $add, $m1, $m2, ARMVCCThen, $pred)>;
def : Pat<(VTI.Vec (pred_int m1, (fneg m2), add, pred)),
(Inst $add, $m1, $m2, ARMVCCThen, $pred)>;
} else {
- def : Pat<(VTI.Vec (fma m1, m2, add)),
- (Inst $add, $m1, $m2)>;
- def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
- (VTI.Vec (fma m1, m2, add)),
- add)),
- (Inst $add, $m1, $m2, ARMVCCThen, $pred)>;
+ def : Pat<(VTI.Vec (fma m1, m2, add)),
+ (Inst $add, $m1, $m2)>;
+ def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
+ (VTI.Vec (fma m1, m2, add)),
+ add)),
+ (Inst $add, $m1, $m2, ARMVCCThen, $pred)>;
def : Pat<(VTI.Vec (pred_int m1, m2, add, pred)),
(Inst $add, $m1, $m2, ARMVCCThen, $pred)>;
}
@@ -3711,14 +3711,14 @@ defm MVE_VFMSf32 : MVE_VFMA_fp_multi<"vfms", 1, MVE_v4f32>;
defm MVE_VFMSf16 : MVE_VFMA_fp_multi<"vfms", 1, MVE_v8f16>;
multiclass MVE_VADDSUB_fp_m<string iname, bit bit_21, MVEVectorVTInfo VTI,
- SDNode Op, Intrinsic PredInt> {
+ SDNode Op, Intrinsic PredInt> {
def "" : MVE_VADDSUBFMA_fp<iname, VTI.Suffix, VTI.Size{0}, 0, 1, bit_21> {
let validForTailPredication = 1;
}
defvar Inst = !cast<Instruction>(NAME);
let Predicates = [HasMVEFloat] in {
- defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? ), !cast<Instruction>(NAME)>;
+ defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? ), !cast<Instruction>(NAME)>;
}
}
@@ -3820,15 +3820,15 @@ multiclass MVE_VABD_fp_m<MVEVectorVTInfo VTI>
: MVE_VABDT_fp_m<VTI, int_arm_mve_vabd, int_arm_mve_abd_predicated>;
defm MVE_VABDf32 : MVE_VABD_fp_m<MVE_v4f32>;
-defm MVE_VABDf16 : MVE_VABD_fp_m<MVE_v8f16>;
-
-let Predicates = [HasMVEFloat] in {
- def : Pat<(v8f16 (fabs (fsub (v8f16 MQPR:$Qm), (v8f16 MQPR:$Qn)))),
- (MVE_VABDf16 MQPR:$Qm, MQPR:$Qn)>;
- def : Pat<(v4f32 (fabs (fsub (v4f32 MQPR:$Qm), (v4f32 MQPR:$Qn)))),
- (MVE_VABDf32 MQPR:$Qm, MQPR:$Qn)>;
-}
-
+defm MVE_VABDf16 : MVE_VABD_fp_m<MVE_v8f16>;
+
+let Predicates = [HasMVEFloat] in {
+ def : Pat<(v8f16 (fabs (fsub (v8f16 MQPR:$Qm), (v8f16 MQPR:$Qn)))),
+ (MVE_VABDf16 MQPR:$Qm, MQPR:$Qn)>;
+ def : Pat<(v4f32 (fabs (fsub (v4f32 MQPR:$Qm), (v4f32 MQPR:$Qn)))),
+ (MVE_VABDf32 MQPR:$Qm, MQPR:$Qn)>;
+}
+
class MVE_VCVT_fix<string suffix, bit fsi, bit U, bit op,
Operand imm_operand_type>
: MVE_float<"vcvt", suffix,
@@ -4047,8 +4047,8 @@ multiclass MVE_VABSNEG_fp_m<string iname, SDNode unpred_op, Intrinsic pred_int,
defvar Inst = !cast<Instruction>(NAME);
let Predicates = [HasMVEInt] in {
- def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$v))),
- (VTI.Vec (Inst $v))>;
+ def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$v))),
+ (VTI.Vec (Inst $v))>;
def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$v), (VTI.Pred VCCR:$mask),
(VTI.Vec MQPR:$inactive))),
(VTI.Vec (Inst $v, ARMVCCThen, $mask, $inactive))>;
@@ -4083,8 +4083,8 @@ class MVE_VMAXMINNMA<string iname, string suffix, bit size, bit bit_12,
let Inst{4} = 0b0;
let Inst{3-1} = Qm{2-0};
let Inst{0} = 0b1;
-
- let isCommutable = 1;
+
+ let isCommutable = 1;
}
multiclass MVE_VMAXMINNMA_m<string iname, MVEVectorVTInfo VTI,
@@ -4410,10 +4410,10 @@ let Predicates = [HasMVEInt] in {
// vector types (v4i1<>v8i1, etc.) also as part of lowering vector shuffles.
def predicate_cast : SDNode<"ARMISD::PREDICATE_CAST", SDTUnaryOp>;
-def load_align4 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- return cast<LoadSDNode>(N)->getAlignment() >= 4;
-}]>;
-
+def load_align4 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() >= 4;
+}]>;
+
let Predicates = [HasMVEInt] in {
foreach VT = [ v4i1, v8i1, v16i1 ] in {
def : Pat<(i32 (predicate_cast (VT VCCR:$src))),
@@ -4426,13 +4426,13 @@ let Predicates = [HasMVEInt] in {
(VT (COPY_TO_REGCLASS (VT2 VCCR:$src), VCCR))>;
}
- // If we happen to be casting from a load we can convert that straight
- // into a predicate load, so long as the load is of the correct type.
- foreach VT = [ v4i1, v8i1, v16i1 ] in {
- def : Pat<(VT (predicate_cast (i32 (load_align4 taddrmode_imm7<2>:$addr)))),
- (VT (VLDR_P0_off taddrmode_imm7<2>:$addr))>;
- }
-
+ // If we happen to be casting from a load we can convert that straight
+ // into a predicate load, so long as the load is of the correct type.
+ foreach VT = [ v4i1, v8i1, v16i1 ] in {
+ def : Pat<(VT (predicate_cast (i32 (load_align4 taddrmode_imm7<2>:$addr)))),
+ (VT (VLDR_P0_off taddrmode_imm7<2>:$addr))>;
+ }
+
// Here we match the specific SDNode type 'ARMVectorRegCastImpl'
// rather than the more general 'ARMVectorRegCast' which would also
// match some bitconverts. If we use the latter in cases where the
@@ -4441,8 +4441,8 @@ let Predicates = [HasMVEInt] in {
foreach VT = [ v16i8, v8i16, v8f16, v4i32, v4f32, v2i64, v2f64 ] in
foreach VT2 = [ v16i8, v8i16, v8f16, v4i32, v4f32, v2i64, v2f64 ] in
- def : Pat<(VT (ARMVectorRegCastImpl (VT2 MQPR:$src))),
- (VT MQPR:$src)>;
+ def : Pat<(VT (ARMVectorRegCastImpl (VT2 MQPR:$src))),
+ (VT MQPR:$src)>;
}
// end of MVE compares
@@ -4770,7 +4770,7 @@ class MVE_VxMOVxN<string iname, string suffix, bit bit_28, bit bit_17,
let Inst{16} = 0b1;
let Inst{12} = T;
let Inst{8} = 0b0;
- let Inst{7} = !not(bit_17);
+ let Inst{7} = !not(bit_17);
let Inst{0} = 0b1;
let validForTailPredication = 1;
let retainsPreviousHalfElement = 1;
@@ -4801,7 +4801,7 @@ multiclass MVE_VMOVN_p<Instruction Inst, bit top,
(VTI.Vec MQPR:$Qm), (i32 top))),
(VTI.Vec (Inst (VTI.Vec MQPR:$Qd_src), (VTI.Vec MQPR:$Qm)))>;
- if !not(top) then {
+ if !not(top) then {
// If we see MVEvmovn(a,ARMvrev(b),1), that wants to overwrite the odd
// lanes of a with the odd lanes of b. In other words, the lanes we're
// _keeping_ from a are the even ones. So we can flip it round and say that
@@ -5173,11 +5173,11 @@ class MVE_VADDSUB_qr<string iname, string suffix, bits<2> size,
// Vector-scalar add/sub
multiclass MVE_VADDSUB_qr_m<string iname, MVEVectorVTInfo VTI, bit subtract,
- SDNode Op, Intrinsic PredInt> {
+ SDNode Op, Intrinsic PredInt> {
def "" : MVE_VADDSUB_qr<iname, VTI.Suffix, VTI.Size, 0b0, subtract, 0b1, 0b0>;
- let Predicates = [HasMVEInt] in {
- defm : MVE_TwoOpPatternDup<VTI, Op, PredInt, (? ), !cast<Instruction>(NAME), ARMimmAllZerosV>;
- }
+ let Predicates = [HasMVEInt] in {
+ defm : MVE_TwoOpPatternDup<VTI, Op, PredInt, (? ), !cast<Instruction>(NAME), ARMimmAllZerosV>;
+ }
}
multiclass MVE_VADD_qr_m<MVEVectorVTInfo VTI>
@@ -5196,35 +5196,35 @@ defm MVE_VSUB_qr_i32 : MVE_VSUB_qr_m<MVE_v4i32>;
// Vector-scalar saturating add/sub
multiclass MVE_VQADDSUB_qr_m<string iname, MVEVectorVTInfo VTI, bit subtract,
- SDNode Op, Intrinsic PredInt> {
+ SDNode Op, Intrinsic PredInt> {
def "" : MVE_VADDSUB_qr<iname, VTI.Suffix, VTI.Size, 0b1, subtract,
0b0, VTI.Unsigned>;
-
- let Predicates = [HasMVEInt] in {
- defm : MVE_TwoOpPatternDup<VTI, Op, PredInt, (? (i32 VTI.Unsigned)),
- !cast<Instruction>(NAME)>;
- }
+
+ let Predicates = [HasMVEInt] in {
+ defm : MVE_TwoOpPatternDup<VTI, Op, PredInt, (? (i32 VTI.Unsigned)),
+ !cast<Instruction>(NAME)>;
+ }
}
-multiclass MVE_VQADD_qr_m<MVEVectorVTInfo VTI, SDNode Op>
- : MVE_VQADDSUB_qr_m<"vqadd", VTI, 0b0, Op, int_arm_mve_qadd_predicated>;
+multiclass MVE_VQADD_qr_m<MVEVectorVTInfo VTI, SDNode Op>
+ : MVE_VQADDSUB_qr_m<"vqadd", VTI, 0b0, Op, int_arm_mve_qadd_predicated>;
-multiclass MVE_VQSUB_qr_m<MVEVectorVTInfo VTI, SDNode Op>
- : MVE_VQADDSUB_qr_m<"vqsub", VTI, 0b1, Op, int_arm_mve_qsub_predicated>;
+multiclass MVE_VQSUB_qr_m<MVEVectorVTInfo VTI, SDNode Op>
+ : MVE_VQADDSUB_qr_m<"vqsub", VTI, 0b1, Op, int_arm_mve_qsub_predicated>;
-defm MVE_VQADD_qr_s8 : MVE_VQADD_qr_m<MVE_v16s8, saddsat>;
-defm MVE_VQADD_qr_s16 : MVE_VQADD_qr_m<MVE_v8s16, saddsat>;
-defm MVE_VQADD_qr_s32 : MVE_VQADD_qr_m<MVE_v4s32, saddsat>;
-defm MVE_VQADD_qr_u8 : MVE_VQADD_qr_m<MVE_v16u8, uaddsat>;
-defm MVE_VQADD_qr_u16 : MVE_VQADD_qr_m<MVE_v8u16, uaddsat>;
-defm MVE_VQADD_qr_u32 : MVE_VQADD_qr_m<MVE_v4u32, uaddsat>;
+defm MVE_VQADD_qr_s8 : MVE_VQADD_qr_m<MVE_v16s8, saddsat>;
+defm MVE_VQADD_qr_s16 : MVE_VQADD_qr_m<MVE_v8s16, saddsat>;
+defm MVE_VQADD_qr_s32 : MVE_VQADD_qr_m<MVE_v4s32, saddsat>;
+defm MVE_VQADD_qr_u8 : MVE_VQADD_qr_m<MVE_v16u8, uaddsat>;
+defm MVE_VQADD_qr_u16 : MVE_VQADD_qr_m<MVE_v8u16, uaddsat>;
+defm MVE_VQADD_qr_u32 : MVE_VQADD_qr_m<MVE_v4u32, uaddsat>;
-defm MVE_VQSUB_qr_s8 : MVE_VQSUB_qr_m<MVE_v16s8, ssubsat>;
-defm MVE_VQSUB_qr_s16 : MVE_VQSUB_qr_m<MVE_v8s16, ssubsat>;
-defm MVE_VQSUB_qr_s32 : MVE_VQSUB_qr_m<MVE_v4s32, ssubsat>;
-defm MVE_VQSUB_qr_u8 : MVE_VQSUB_qr_m<MVE_v16u8, usubsat>;
-defm MVE_VQSUB_qr_u16 : MVE_VQSUB_qr_m<MVE_v8u16, usubsat>;
-defm MVE_VQSUB_qr_u32 : MVE_VQSUB_qr_m<MVE_v4u32, usubsat>;
+defm MVE_VQSUB_qr_s8 : MVE_VQSUB_qr_m<MVE_v16s8, ssubsat>;
+defm MVE_VQSUB_qr_s16 : MVE_VQSUB_qr_m<MVE_v8s16, ssubsat>;
+defm MVE_VQSUB_qr_s32 : MVE_VQSUB_qr_m<MVE_v4s32, ssubsat>;
+defm MVE_VQSUB_qr_u8 : MVE_VQSUB_qr_m<MVE_v16u8, usubsat>;
+defm MVE_VQSUB_qr_u16 : MVE_VQSUB_qr_m<MVE_v8u16, usubsat>;
+defm MVE_VQSUB_qr_u32 : MVE_VQSUB_qr_m<MVE_v4u32, usubsat>;
class MVE_VQDMULL_qr<string iname, string suffix, bit size,
bit T, string cstr="", list<dag> pattern=[]>
@@ -5315,23 +5315,23 @@ defm MVE_VHSUB_qr_u8 : MVE_VHSUB_qr_m<MVE_v16u8>;
defm MVE_VHSUB_qr_u16 : MVE_VHSUB_qr_m<MVE_v8u16>;
defm MVE_VHSUB_qr_u32 : MVE_VHSUB_qr_m<MVE_v4u32>;
-multiclass MVE_VADDSUB_qr_f<string iname, MVEVectorVTInfo VTI, bit subtract,
- SDNode Op, Intrinsic PredInt> {
- def "" : MVE_VxADDSUB_qr<iname, VTI.Suffix, VTI.Size{0}, 0b11, subtract>;
- defm : MVE_TwoOpPatternDup<VTI, Op, PredInt, (? ),
- !cast<Instruction>(NAME)>;
-}
-
+multiclass MVE_VADDSUB_qr_f<string iname, MVEVectorVTInfo VTI, bit subtract,
+ SDNode Op, Intrinsic PredInt> {
+ def "" : MVE_VxADDSUB_qr<iname, VTI.Suffix, VTI.Size{0}, 0b11, subtract>;
+ defm : MVE_TwoOpPatternDup<VTI, Op, PredInt, (? ),
+ !cast<Instruction>(NAME)>;
+}
+
let Predicates = [HasMVEFloat] in {
- defm MVE_VADD_qr_f32 : MVE_VADDSUB_qr_f<"vadd", MVE_v4f32, 0b0, fadd,
- int_arm_mve_add_predicated>;
- defm MVE_VADD_qr_f16 : MVE_VADDSUB_qr_f<"vadd", MVE_v8f16, 0b0, fadd,
- int_arm_mve_add_predicated>;
+ defm MVE_VADD_qr_f32 : MVE_VADDSUB_qr_f<"vadd", MVE_v4f32, 0b0, fadd,
+ int_arm_mve_add_predicated>;
+ defm MVE_VADD_qr_f16 : MVE_VADDSUB_qr_f<"vadd", MVE_v8f16, 0b0, fadd,
+ int_arm_mve_add_predicated>;
- defm MVE_VSUB_qr_f32 : MVE_VADDSUB_qr_f<"vsub", MVE_v4f32, 0b1, fsub,
- int_arm_mve_sub_predicated>;
- defm MVE_VSUB_qr_f16 : MVE_VADDSUB_qr_f<"vsub", MVE_v8f16, 0b1, fsub,
- int_arm_mve_sub_predicated>;
+ defm MVE_VSUB_qr_f32 : MVE_VADDSUB_qr_f<"vsub", MVE_v4f32, 0b1, fsub,
+ int_arm_mve_sub_predicated>;
+ defm MVE_VSUB_qr_f16 : MVE_VADDSUB_qr_f<"vsub", MVE_v8f16, 0b1, fsub,
+ int_arm_mve_sub_predicated>;
}
class MVE_VxSHL_qr<string iname, string suffix, bit U, bits<2> size,
@@ -5461,10 +5461,10 @@ class MVE_VMUL_qr_int<string iname, string suffix, bits<2> size>
multiclass MVE_VMUL_qr_int_m<MVEVectorVTInfo VTI> {
def "" : MVE_VMUL_qr_int<"vmul", VTI.Suffix, VTI.Size>;
- let Predicates = [HasMVEInt] in {
- defm : MVE_TwoOpPatternDup<VTI, mul, int_arm_mve_mul_predicated, (? ),
- !cast<Instruction>(NAME), ARMimmOneV>;
- }
+ let Predicates = [HasMVEInt] in {
+ defm : MVE_TwoOpPatternDup<VTI, mul, int_arm_mve_mul_predicated, (? ),
+ !cast<Instruction>(NAME), ARMimmOneV>;
+ }
}
defm MVE_VMUL_qr_i8 : MVE_VMUL_qr_int_m<MVE_v16i8>;
@@ -5481,25 +5481,25 @@ class MVE_VxxMUL_qr<string iname, string suffix,
let Inst{12} = 0b0;
let Inst{8} = 0b0;
let Inst{5} = 0b1;
- let validForTailPredication = 1;
+ let validForTailPredication = 1;
}
multiclass MVE_VxxMUL_qr_m<string iname, MVEVectorVTInfo VTI, bit bit_28,
- PatFrag Op, Intrinsic int_unpred, Intrinsic int_pred> {
+ PatFrag Op, Intrinsic int_unpred, Intrinsic int_pred> {
def "" : MVE_VxxMUL_qr<iname, VTI.Suffix, bit_28, VTI.Size>;
-
- let Predicates = [HasMVEInt] in {
- defm : MVE_TwoOpPatternDup<VTI, Op, int_pred, (? ), !cast<Instruction>(NAME)>;
- }
- defm : MVE_vec_scalar_int_pat_m<!cast<Instruction>(NAME), VTI, int_unpred, int_pred>;
+
+ let Predicates = [HasMVEInt] in {
+ defm : MVE_TwoOpPatternDup<VTI, Op, int_pred, (? ), !cast<Instruction>(NAME)>;
+ }
+ defm : MVE_vec_scalar_int_pat_m<!cast<Instruction>(NAME), VTI, int_unpred, int_pred>;
}
multiclass MVE_VQDMULH_qr_m<MVEVectorVTInfo VTI> :
- MVE_VxxMUL_qr_m<"vqdmulh", VTI, 0b0, MVEvqdmulh,
+ MVE_VxxMUL_qr_m<"vqdmulh", VTI, 0b0, MVEvqdmulh,
int_arm_mve_vqdmulh, int_arm_mve_qdmulh_predicated>;
multiclass MVE_VQRDMULH_qr_m<MVEVectorVTInfo VTI> :
- MVE_VxxMUL_qr_m<"vqrdmulh", VTI, 0b1, null_frag,
+ MVE_VxxMUL_qr_m<"vqrdmulh", VTI, 0b1, null_frag,
int_arm_mve_vqrdmulh, int_arm_mve_qrdmulh_predicated>;
defm MVE_VQDMULH_qr_s8 : MVE_VQDMULH_qr_m<MVE_v16s8>;
@@ -5510,17 +5510,17 @@ defm MVE_VQRDMULH_qr_s8 : MVE_VQRDMULH_qr_m<MVE_v16s8>;
defm MVE_VQRDMULH_qr_s16 : MVE_VQRDMULH_qr_m<MVE_v8s16>;
defm MVE_VQRDMULH_qr_s32 : MVE_VQRDMULH_qr_m<MVE_v4s32>;
-multiclass MVE_VxxMUL_qr_f_m<MVEVectorVTInfo VTI> {
- let validForTailPredication = 1 in
- def "" : MVE_VxxMUL_qr<"vmul", VTI.Suffix, VTI.Size{0}, 0b11>;
- defm : MVE_TwoOpPatternDup<VTI, fmul, int_arm_mve_mul_predicated, (? ),
- !cast<Instruction>(NAME)>;
+multiclass MVE_VxxMUL_qr_f_m<MVEVectorVTInfo VTI> {
+ let validForTailPredication = 1 in
+ def "" : MVE_VxxMUL_qr<"vmul", VTI.Suffix, VTI.Size{0}, 0b11>;
+ defm : MVE_TwoOpPatternDup<VTI, fmul, int_arm_mve_mul_predicated, (? ),
+ !cast<Instruction>(NAME)>;
}
-let Predicates = [HasMVEFloat] in {
- defm MVE_VMUL_qr_f16 : MVE_VxxMUL_qr_f_m<MVE_v8f16>;
- defm MVE_VMUL_qr_f32 : MVE_VxxMUL_qr_f_m<MVE_v4f32>;
-}
+let Predicates = [HasMVEFloat] in {
+ defm MVE_VMUL_qr_f16 : MVE_VxxMUL_qr_f_m<MVE_v8f16>;
+ defm MVE_VMUL_qr_f32 : MVE_VxxMUL_qr_f_m<MVE_v4f32>;
+}
class MVE_VFMAMLA_qr<string iname, string suffix,
bit bit_28, bits<2> bits_21_20, bit S,
@@ -5595,10 +5595,10 @@ multiclass MVE_VFMA_qr_multi<string iname, MVEVectorVTInfo VTI,
if scalar_addend then {
def : Pat<(VTI.Vec (fma v1, v2, vs)),
(VTI.Vec (Inst v1, v2, is))>;
- def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
- (VTI.Vec (fma v1, v2, vs)),
- v1)),
- (VTI.Vec (Inst v1, v2, is, ARMVCCThen, $pred))>;
+ def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
+ (VTI.Vec (fma v1, v2, vs)),
+ v1)),
+ (VTI.Vec (Inst v1, v2, is, ARMVCCThen, $pred))>;
def : Pat<(VTI.Vec (pred_int v1, v2, vs, pred)),
(VTI.Vec (Inst v1, v2, is, ARMVCCThen, pred))>;
} else {
@@ -5606,14 +5606,14 @@ multiclass MVE_VFMA_qr_multi<string iname, MVEVectorVTInfo VTI,
(VTI.Vec (Inst v2, v1, is))>;
def : Pat<(VTI.Vec (fma vs, v1, v2)),
(VTI.Vec (Inst v2, v1, is))>;
- def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
- (VTI.Vec (fma vs, v2, v1)),
- v1)),
- (VTI.Vec (Inst v1, v2, is, ARMVCCThen, $pred))>;
- def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
- (VTI.Vec (fma v2, vs, v1)),
- v1)),
- (VTI.Vec (Inst v1, v2, is, ARMVCCThen, $pred))>;
+ def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
+ (VTI.Vec (fma vs, v2, v1)),
+ v1)),
+ (VTI.Vec (Inst v1, v2, is, ARMVCCThen, $pred))>;
+ def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
+ (VTI.Vec (fma v2, vs, v1)),
+ v1)),
+ (VTI.Vec (Inst v1, v2, is, ARMVCCThen, $pred))>;
def : Pat<(VTI.Vec (pred_int v1, vs, v2, pred)),
(VTI.Vec (Inst v2, v1, is, ARMVCCThen, pred))>;
def : Pat<(VTI.Vec (pred_int vs, v1, v2, pred)),
@@ -5742,7 +5742,7 @@ def MVE_VDWDUPu8 : MVE_VxWDUP<"vdwdup", "u8", 0b00, 0b1>;
def MVE_VDWDUPu16 : MVE_VxWDUP<"vdwdup", "u16", 0b01, 0b1>;
def MVE_VDWDUPu32 : MVE_VxWDUP<"vdwdup", "u32", 0b10, 0b1>;
-let isReMaterializable = 1 in
+let isReMaterializable = 1 in
class MVE_VCTPInst<string suffix, bits<2> size, list<dag> pattern=[]>
: MVE_p<(outs VCCR:$P0), (ins rGPR:$Rn), NoItinerary, "vctp", suffix,
"$Rn", vpred_n, "", pattern> {
@@ -5766,8 +5766,8 @@ multiclass MVE_VCTP<MVEVectorVTInfo VTI, Intrinsic intr> {
defvar Inst = !cast<Instruction>(NAME);
let Predicates = [HasMVEInt] in {
- def : Pat<(intr rGPR:$Rn),
- (VTI.Pred (Inst rGPR:$Rn))>;
+ def : Pat<(intr rGPR:$Rn),
+ (VTI.Pred (Inst rGPR:$Rn))>;
def : Pat<(and (intr rGPR:$Rn), (VTI.Pred VCCR:$mask)),
(VTI.Pred (Inst rGPR:$Rn, ARMVCCThen, VCCR:$mask))>;
}
@@ -5845,41 +5845,41 @@ def MVE_VMOV_rr_q : MVE_VMOV_64bit<(outs rGPR:$Rt, rGPR:$Rt2), (ins MQPR:$Qd),
let AsmMatchConverter = "cvtMVEVMOVQtoDReg";
}
-let Predicates = [HasMVEInt] in {
- // Double lane moves. There are a number of patterns here. We know that the
- // insertelt's will be in descending order by index, and need to match the 5
- // patterns that might contain 2-0 or 3-1 pairs. These are:
- // 3 2 1 0 -> vmovqrr 31; vmovqrr 20
- // 3 2 1 -> vmovqrr 31; vmov 2
- // 3 1 -> vmovqrr 31
- // 2 1 0 -> vmovqrr 20; vmov 1
- // 2 0 -> vmovqrr 20
- // The other potential patterns will be handled by single lane inserts.
- def : Pat<(insertelt (insertelt (insertelt (insertelt (v4i32 MQPR:$src1),
- rGPR:$srcA, (i32 0)),
- rGPR:$srcB, (i32 1)),
- rGPR:$srcC, (i32 2)),
- rGPR:$srcD, (i32 3)),
- (MVE_VMOV_q_rr (MVE_VMOV_q_rr MQPR:$src1, rGPR:$srcA, rGPR:$srcC, (i32 2), (i32 0)),
- rGPR:$srcB, rGPR:$srcD, (i32 3), (i32 1))>;
- def : Pat<(insertelt (insertelt (insertelt (v4i32 MQPR:$src1),
- rGPR:$srcB, (i32 1)),
- rGPR:$srcC, (i32 2)),
- rGPR:$srcD, (i32 3)),
- (MVE_VMOV_q_rr (MVE_VMOV_to_lane_32 MQPR:$src1, rGPR:$srcC, (i32 2)),
- rGPR:$srcB, rGPR:$srcD, (i32 3), (i32 1))>;
- def : Pat<(insertelt (insertelt (v4i32 MQPR:$src1), rGPR:$srcA, (i32 1)), rGPR:$srcB, (i32 3)),
- (MVE_VMOV_q_rr MQPR:$src1, rGPR:$srcA, rGPR:$srcB, (i32 3), (i32 1))>;
- def : Pat<(insertelt (insertelt (insertelt (v4i32 MQPR:$src1),
- rGPR:$srcB, (i32 0)),
- rGPR:$srcC, (i32 1)),
- rGPR:$srcD, (i32 2)),
- (MVE_VMOV_q_rr (MVE_VMOV_to_lane_32 MQPR:$src1, rGPR:$srcC, (i32 1)),
- rGPR:$srcB, rGPR:$srcD, (i32 2), (i32 0))>;
- def : Pat<(insertelt (insertelt (v4i32 MQPR:$src1), rGPR:$srcA, (i32 0)), rGPR:$srcB, (i32 2)),
- (MVE_VMOV_q_rr MQPR:$src1, rGPR:$srcA, rGPR:$srcB, (i32 2), (i32 0))>;
-}
-
+let Predicates = [HasMVEInt] in {
+ // Double lane moves. There are a number of patterns here. We know that the
+ // insertelt's will be in descending order by index, and need to match the 5
+ // patterns that might contain 2-0 or 3-1 pairs. These are:
+ // 3 2 1 0 -> vmovqrr 31; vmovqrr 20
+ // 3 2 1 -> vmovqrr 31; vmov 2
+ // 3 1 -> vmovqrr 31
+ // 2 1 0 -> vmovqrr 20; vmov 1
+ // 2 0 -> vmovqrr 20
+ // The other potential patterns will be handled by single lane inserts.
+ def : Pat<(insertelt (insertelt (insertelt (insertelt (v4i32 MQPR:$src1),
+ rGPR:$srcA, (i32 0)),
+ rGPR:$srcB, (i32 1)),
+ rGPR:$srcC, (i32 2)),
+ rGPR:$srcD, (i32 3)),
+ (MVE_VMOV_q_rr (MVE_VMOV_q_rr MQPR:$src1, rGPR:$srcA, rGPR:$srcC, (i32 2), (i32 0)),
+ rGPR:$srcB, rGPR:$srcD, (i32 3), (i32 1))>;
+ def : Pat<(insertelt (insertelt (insertelt (v4i32 MQPR:$src1),
+ rGPR:$srcB, (i32 1)),
+ rGPR:$srcC, (i32 2)),
+ rGPR:$srcD, (i32 3)),
+ (MVE_VMOV_q_rr (MVE_VMOV_to_lane_32 MQPR:$src1, rGPR:$srcC, (i32 2)),
+ rGPR:$srcB, rGPR:$srcD, (i32 3), (i32 1))>;
+ def : Pat<(insertelt (insertelt (v4i32 MQPR:$src1), rGPR:$srcA, (i32 1)), rGPR:$srcB, (i32 3)),
+ (MVE_VMOV_q_rr MQPR:$src1, rGPR:$srcA, rGPR:$srcB, (i32 3), (i32 1))>;
+ def : Pat<(insertelt (insertelt (insertelt (v4i32 MQPR:$src1),
+ rGPR:$srcB, (i32 0)),
+ rGPR:$srcC, (i32 1)),
+ rGPR:$srcD, (i32 2)),
+ (MVE_VMOV_q_rr (MVE_VMOV_to_lane_32 MQPR:$src1, rGPR:$srcC, (i32 1)),
+ rGPR:$srcB, rGPR:$srcD, (i32 2), (i32 0))>;
+ def : Pat<(insertelt (insertelt (v4i32 MQPR:$src1), rGPR:$srcA, (i32 0)), rGPR:$srcB, (i32 2)),
+ (MVE_VMOV_q_rr MQPR:$src1, rGPR:$srcA, rGPR:$srcB, (i32 2), (i32 0))>;
+}
+
// end of coproc mov
// start of MVE interleaving load/store
@@ -5908,7 +5908,7 @@ class MVE_vldst24_base<bit writeback, bit fourregs, bits<2> stage, bits<2> size,
let mayLoad = load;
let mayStore = !eq(load,0);
let hasSideEffects = 0;
- let validForTailPredication = load;
+ let validForTailPredication = load;
}
// A parameter class used to encapsulate all the ways the writeback
@@ -6518,7 +6518,7 @@ class MVE_VPT<string suffix, bits<2> size, dag iops, string asm, list<dag> patte
let Inst{4} = 0b0;
let Defs = [VPR];
- let validForTailPredication=1;
+ let validForTailPredication=1;
}
class MVE_VPTt1<string suffix, bits<2> size, dag iops>
@@ -6631,7 +6631,7 @@ class MVE_VPTf<string suffix, bit size, dag iops, string asm, list<dag> pattern=
let Defs = [VPR];
let Predicates = [HasMVEFloat];
- let validForTailPredication=1;
+ let validForTailPredication=1;
}
class MVE_VPTft1<string suffix, bit size>
@@ -7107,7 +7107,7 @@ class MVE_vector_load_typed<ValueType Ty, Instruction RegImmInst,
class MVE_vector_maskedload_typed<ValueType Ty, Instruction RegImmInst,
PatFrag LoadKind, int shift>
- : Pat<(Ty (LoadKind t2addrmode_imm7<shift>:$addr, VCCR:$pred, (Ty (ARMvmovImm (i32 0))))),
+ : Pat<(Ty (LoadKind t2addrmode_imm7<shift>:$addr, VCCR:$pred, (Ty (ARMvmovImm (i32 0))))),
(Ty (RegImmInst t2addrmode_imm7<shift>:$addr, ARMVCCThen, VCCR:$pred))>;
multiclass MVE_vector_load<Instruction RegImmInst, PatFrag LoadKind,
@@ -7274,11 +7274,11 @@ multiclass MVEExtLoadStore<Instruction LoadSInst, Instruction LoadUInst, string
(VT (LoadUInst taddrmode_imm7<Shift>:$addr))>;
// Masked ext loads
- def : Pat<(VT (!cast<PatFrag>("aligned_extmaskedload"#Amble) taddrmode_imm7<Shift>:$addr, VCCR:$pred, (VT (ARMvmovImm (i32 0))))),
+ def : Pat<(VT (!cast<PatFrag>("aligned_extmaskedload"#Amble) taddrmode_imm7<Shift>:$addr, VCCR:$pred, (VT (ARMvmovImm (i32 0))))),
(VT (LoadUInst taddrmode_imm7<Shift>:$addr, ARMVCCThen, VCCR:$pred))>;
- def : Pat<(VT (!cast<PatFrag>("aligned_sextmaskedload"#Amble) taddrmode_imm7<Shift>:$addr, VCCR:$pred, (VT (ARMvmovImm (i32 0))))),
+ def : Pat<(VT (!cast<PatFrag>("aligned_sextmaskedload"#Amble) taddrmode_imm7<Shift>:$addr, VCCR:$pred, (VT (ARMvmovImm (i32 0))))),
(VT (LoadSInst taddrmode_imm7<Shift>:$addr, ARMVCCThen, VCCR:$pred))>;
- def : Pat<(VT (!cast<PatFrag>("aligned_zextmaskedload"#Amble) taddrmode_imm7<Shift>:$addr, VCCR:$pred, (VT (ARMvmovImm (i32 0))))),
+ def : Pat<(VT (!cast<PatFrag>("aligned_zextmaskedload"#Amble) taddrmode_imm7<Shift>:$addr, VCCR:$pred, (VT (ARMvmovImm (i32 0))))),
(VT (LoadUInst taddrmode_imm7<Shift>:$addr, ARMVCCThen, VCCR:$pred))>;
}
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMInstrNEON.td b/contrib/libs/llvm12/lib/Target/ARM/ARMInstrNEON.td
index a8c0d05d91..0f5d53b57d 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMInstrNEON.td
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMInstrNEON.td
@@ -509,7 +509,7 @@ def NEONvqrshrnsuImm : SDNode<"ARMISD::VQRSHRNsuIMM", SDTARMVSHXIMM>;
def NEONvsliImm : SDNode<"ARMISD::VSLIIMM", SDTARMVSHINSIMM>;
def NEONvsriImm : SDNode<"ARMISD::VSRIIMM", SDTARMVSHINSIMM>;
-def NEONvbsp : SDNode<"ARMISD::VBSP",
+def NEONvbsp : SDNode<"ARMISD::VBSP",
SDTypeProfile<1, 3, [SDTCisVec<0>,
SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>,
@@ -4197,10 +4197,10 @@ def VADDhq : N3VQ<0, 0, 0b01, 0b1101, 0, IIC_VBINQ, "vadd", "f16",
defm VADDLs : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
"vaddl", "s", add, sext, 1>;
defm VADDLu : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
- "vaddl", "u", add, zanyext, 1>;
+ "vaddl", "u", add, zanyext, 1>;
// VADDW : Vector Add Wide (Q = Q + D)
defm VADDWs : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>;
-defm VADDWu : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zanyext, 0>;
+defm VADDWu : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zanyext, 0>;
// VHADD : Vector Halving Add
defm VHADDs : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm,
IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
@@ -4512,9 +4512,9 @@ let Predicates = [HasNEON, HasV8_1a] in {
(SubReg_i16_lane imm:$lane)))>;
def : Pat<(v4i32 (saddsat
(v4i32 QPR:$src1),
- (v4i32 (int_arm_neon_vqrdmulh
+ (v4i32 (int_arm_neon_vqrdmulh
(v4i32 QPR:$src2),
- (v4i32 (ARMvduplane (v4i32 QPR:$src3),
+ (v4i32 (ARMvduplane (v4i32 QPR:$src3),
imm:$lane)))))),
(v4i32 (VQRDMLAHslv4i32 (v4i32 QPR:$src1),
(v4i32 QPR:$src2),
@@ -4565,17 +4565,17 @@ let Predicates = [HasNEON, HasV8_1a] in {
(v2i32 DPR:$Vn),
(v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm),
imm:$lane)))))),
- (v2i32 (VQRDMLSHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
+ (v2i32 (VQRDMLSHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
imm:$lane))>;
def : Pat<(v8i16 (ssubsat
(v8i16 QPR:$src1),
(v8i16 (int_arm_neon_vqrdmulh
(v8i16 QPR:$src2),
- (v8i16 (ARMvduplane (v8i16 QPR:$src3),
+ (v8i16 (ARMvduplane (v8i16 QPR:$src3),
imm:$lane)))))),
(v8i16 (VQRDMLSHslv8i16 (v8i16 QPR:$src1),
(v8i16 QPR:$src2),
- (v4i16 (EXTRACT_SUBREG
+ (v4i16 (EXTRACT_SUBREG
QPR:$src3,
(DSubReg_i16_reg imm:$lane))),
(SubReg_i16_lane imm:$lane)))>;
@@ -4587,7 +4587,7 @@ let Predicates = [HasNEON, HasV8_1a] in {
imm:$lane)))))),
(v4i32 (VQRDMLSHslv4i32 (v4i32 QPR:$src1),
(v4i32 QPR:$src2),
- (v2i32 (EXTRACT_SUBREG
+ (v2i32 (EXTRACT_SUBREG
QPR:$src3,
(DSubReg_i32_reg imm:$lane))),
(SubReg_i32_lane imm:$lane)))>;
@@ -5045,10 +5045,10 @@ def VSUBhq : N3VQ<0, 0, 0b11, 0b1101, 0, IIC_VBINQ, "vsub", "f16",
defm VSUBLs : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
"vsubl", "s", sub, sext, 0>;
defm VSUBLu : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
- "vsubl", "u", sub, zanyext, 0>;
+ "vsubl", "u", sub, zanyext, 0>;
// VSUBW : Vector Subtract Wide (Q = Q - D)
defm VSUBWs : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>;
-defm VSUBWu : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zanyext, 0>;
+defm VSUBWu : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zanyext, 0>;
// VHSUB : Vector Halving Subtract
defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm,
IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
@@ -5259,9 +5259,9 @@ def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm",
// Vector Bitwise Operations.
def vnotd : PatFrag<(ops node:$in),
- (xor node:$in, ARMimmAllOnesD)>;
+ (xor node:$in, ARMimmAllOnesD)>;
def vnotq : PatFrag<(ops node:$in),
- (xor node:$in, ARMimmAllOnesV)>;
+ (xor node:$in, ARMimmAllOnesV)>;
// VAND : Vector Bitwise AND
@@ -5428,84 +5428,84 @@ def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>;
def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>;
}
-// The TwoAddress pass will not go looking for equivalent operations
-// with different register constraints; it just inserts copies.
-// That is why pseudo VBSP implemented. Is is expanded later into
-// VBIT/VBIF/VBSL taking into account register constraints to avoid copies.
-def VBSPd
- : PseudoNeonI<(outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
- IIC_VBINiD, "",
- [(set DPR:$Vd,
- (v2i32 (NEONvbsp DPR:$src1, DPR:$Vn, DPR:$Vm)))]>;
+// The TwoAddress pass will not go looking for equivalent operations
+// with different register constraints; it just inserts copies.
+// That is why pseudo VBSP implemented. Is is expanded later into
+// VBIT/VBIF/VBSL taking into account register constraints to avoid copies.
+def VBSPd
+ : PseudoNeonI<(outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
+ IIC_VBINiD, "",
+ [(set DPR:$Vd,
+ (v2i32 (NEONvbsp DPR:$src1, DPR:$Vn, DPR:$Vm)))]>;
let Predicates = [HasNEON] in {
def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 DPR:$src1),
(v8i8 DPR:$Vn), (v8i8 DPR:$Vm))),
- (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
+ (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 DPR:$src1),
(v4i16 DPR:$Vn), (v4i16 DPR:$Vm))),
- (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
+ (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1),
(v2i32 DPR:$Vn), (v2i32 DPR:$Vm))),
- (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
+ (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 DPR:$src1),
(v2f32 DPR:$Vn), (v2f32 DPR:$Vm))),
- (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
+ (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1),
(v1i64 DPR:$Vn), (v1i64 DPR:$Vm))),
- (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
+ (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd),
(and DPR:$Vm, (vnotd DPR:$Vd)))),
- (VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
+ (VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd),
(and DPR:$Vm, (vnotd DPR:$Vd)))),
- (VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
+ (VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
}
-def VBSPq
- : PseudoNeonI<(outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
- IIC_VBINiQ, "",
- [(set QPR:$Vd,
- (v4i32 (NEONvbsp QPR:$src1, QPR:$Vn, QPR:$Vm)))]>;
+def VBSPq
+ : PseudoNeonI<(outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
+ IIC_VBINiQ, "",
+ [(set QPR:$Vd,
+ (v4i32 (NEONvbsp QPR:$src1, QPR:$Vn, QPR:$Vm)))]>;
let Predicates = [HasNEON] in {
def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 QPR:$src1),
(v16i8 QPR:$Vn), (v16i8 QPR:$Vm))),
- (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
+ (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 QPR:$src1),
(v8i16 QPR:$Vn), (v8i16 QPR:$Vm))),
- (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
+ (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1),
(v4i32 QPR:$Vn), (v4i32 QPR:$Vm))),
- (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
+ (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 QPR:$src1),
(v4f32 QPR:$Vn), (v4f32 QPR:$Vm))),
- (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
+ (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1),
(v2i64 QPR:$Vn), (v2i64 QPR:$Vm))),
- (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
+ (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd),
(and QPR:$Vm, (vnotq QPR:$Vd)))),
- (VBSPq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
+ (VBSPq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
def : Pat<(v2i64 (or (and QPR:$Vn, QPR:$Vd),
(and QPR:$Vm, (vnotq QPR:$Vd)))),
- (VBSPq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
-}
-
-// VBSL : Vector Bitwise Select
-def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
- (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
- N3RegFrm, IIC_VBINiD,
- "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
- []>;
-
-def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
- (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
- N3RegFrm, IIC_VBINiQ,
- "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
- []>;
-
+ (VBSPq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
+}
+
+// VBSL : Vector Bitwise Select
+def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
+ (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
+ N3RegFrm, IIC_VBINiD,
+ "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ []>;
+
+def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
+ (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
+ N3RegFrm, IIC_VBINiQ,
+ "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ []>;
+
// VBIF : Vector Bitwise Insert if False
// like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst",
def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1,
@@ -6040,9 +6040,9 @@ defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0,
// Vector Negate.
def vnegd : PatFrag<(ops node:$in),
- (sub ARMimmAllZerosD, node:$in)>;
+ (sub ARMimmAllZerosD, node:$in)>;
def vnegq : PatFrag<(ops node:$in),
- (sub ARMimmAllZerosV, node:$in)>;
+ (sub ARMimmAllZerosV, node:$in)>;
class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
: N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$Vd), (ins DPR:$Vm),
@@ -6256,11 +6256,11 @@ defm : NEONImmReplicateInstAlias<i32, VMOVv2i32, VMOVv4i32,
let AddedComplexity = 50, isAsCheapAsAMove = 1, isReMaterializable = 1 in {
def VMOVD0 : ARMPseudoExpand<(outs DPR:$Vd), (ins), 4, IIC_VMOVImm,
- [(set DPR:$Vd, (v2i32 ARMimmAllZerosD))],
+ [(set DPR:$Vd, (v2i32 ARMimmAllZerosD))],
(VMOVv2i32 DPR:$Vd, 0, (ops 14, zero_reg))>,
Requires<[HasZCZ]>;
def VMOVQ0 : ARMPseudoExpand<(outs QPR:$Vd), (ins), 4, IIC_VMOVImm,
- [(set QPR:$Vd, (v4i32 ARMimmAllZerosV))],
+ [(set QPR:$Vd, (v4i32 ARMimmAllZerosV))],
(VMOVv4i32 QPR:$Vd, 0, (ops 14, zero_reg))>,
Requires<[HasZCZ]>;
}
@@ -7946,7 +7946,7 @@ let Predicates = [HasNEON,IsLE] in {
(VLD1LNd16 addrmode6:$addr,
(f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
}
-// The following patterns are basically a copy of the patterns above,
+// The following patterns are basically a copy of the patterns above,
// however with an additional VREV16d instruction to convert data
// loaded by VLD1LN into proper vector format in big endian mode.
let Predicates = [HasNEON,IsBE] in {
@@ -9079,11 +9079,11 @@ multiclass BF16VDOTI<bit Q, RegisterClass RegTy, string opc, ValueType AccumTy,
(!cast<Instruction>(NAME) RegTy:$Vd, RegTy:$Vn, RHS, VectorIndex32:$lane)>;
}
-def BF16VDOTS_VDOTD : BF16VDOTS<0, DPR, "vdot", v2f32, v4bf16>;
-def BF16VDOTS_VDOTQ : BF16VDOTS<1, QPR, "vdot", v4f32, v8bf16>;
+def BF16VDOTS_VDOTD : BF16VDOTS<0, DPR, "vdot", v2f32, v4bf16>;
+def BF16VDOTS_VDOTQ : BF16VDOTS<1, QPR, "vdot", v4f32, v8bf16>;
-defm BF16VDOTI_VDOTD : BF16VDOTI<0, DPR, "vdot", v2f32, v4bf16, (v2f32 DPR_VFP2:$Vm)>;
-defm BF16VDOTI_VDOTQ : BF16VDOTI<1, QPR, "vdot", v4f32, v8bf16, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>;
+defm BF16VDOTI_VDOTD : BF16VDOTI<0, DPR, "vdot", v2f32, v4bf16, (v2f32 DPR_VFP2:$Vm)>;
+defm BF16VDOTI_VDOTQ : BF16VDOTI<1, QPR, "vdot", v4f32, v8bf16, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>;
class BF16MM<bit Q, RegisterClass RegTy,
string opc>
@@ -9091,8 +9091,8 @@ class BF16MM<bit Q, RegisterClass RegTy,
(outs RegTy:$dst), (ins RegTy:$Vd, RegTy:$Vn, RegTy:$Vm),
N3RegFrm, IIC_VDOTPROD, "", "",
[(set (v4f32 QPR:$dst), (int_arm_neon_bfmmla (v4f32 QPR:$Vd),
- (v8bf16 QPR:$Vn),
- (v8bf16 QPR:$Vm)))]> {
+ (v8bf16 QPR:$Vn),
+ (v8bf16 QPR:$Vm)))]> {
let Constraints = "$dst = $Vd";
let AsmString = !strconcat(opc, ".bf16", "\t$Vd, $Vn, $Vm");
let DecoderNamespace = "VFPV8";
@@ -9106,8 +9106,8 @@ class VBF16MALQ<bit T, string suffix, SDPatternOperator OpNode>
NoItinerary, "vfma" # suffix, "bf16", "$Vd, $Vn, $Vm", "",
[(set (v4f32 QPR:$dst),
(OpNode (v4f32 QPR:$Vd),
- (v8bf16 QPR:$Vn),
- (v8bf16 QPR:$Vm)))]> {
+ (v8bf16 QPR:$Vn),
+ (v8bf16 QPR:$Vm)))]> {
let Constraints = "$dst = $Vd";
let DecoderNamespace = "VFPV8";
}
@@ -9128,9 +9128,9 @@ multiclass VBF16MALQI<bit T, string suffix, SDPatternOperator OpNode> {
def : Pat<
(v4f32 (OpNode (v4f32 QPR:$Vd),
- (v8bf16 QPR:$Vn),
- (v8bf16 (ARMvduplane (v8bf16 QPR:$Vm),
- VectorIndex16:$lane)))),
+ (v8bf16 QPR:$Vn),
+ (v8bf16 (ARMvduplane (v8bf16 QPR:$Vm),
+ VectorIndex16:$lane)))),
(!cast<Instruction>(NAME) QPR:$Vd,
QPR:$Vn,
(EXTRACT_SUBREG QPR:$Vm,
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMInstrThumb.td b/contrib/libs/llvm12/lib/Target/ARM/ARMInstrThumb.td
index 3a33dfeecd..0b0c510102 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMInstrThumb.td
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMInstrThumb.td
@@ -548,19 +548,19 @@ let isCall = 1,
// Also used for Thumb2
def tBLXr : TI<(outs), (ins pred:$p, GPR:$func), IIC_Br,
- "blx${p}\t$func", []>,
+ "blx${p}\t$func", []>,
Requires<[IsThumb, HasV5T]>,
T1Special<{1,1,1,?}>, Sched<[WriteBrL]> { // A6.2.3 & A8.6.24;
bits<4> func;
let Inst{6-3} = func;
let Inst{2-0} = 0b000;
}
- def tBLXr_noip : ARMPseudoExpand<(outs), (ins pred:$p, GPRnoip:$func),
- 2, IIC_Br, [], (tBLXr pred:$p, GPR:$func)>,
- Requires<[IsThumb, HasV5T]>,
- Sched<[WriteBrL]>;
-
+ def tBLXr_noip : ARMPseudoExpand<(outs), (ins pred:$p, GPRnoip:$func),
+ 2, IIC_Br, [], (tBLXr pred:$p, GPR:$func)>,
+ Requires<[IsThumb, HasV5T]>,
+ Sched<[WriteBrL]>;
+
// ARMv8-M Security Extensions
def tBLXNSr : TI<(outs), (ins pred:$p, GPRnopc:$func), IIC_Br,
"blxns${p}\t$func", []>,
@@ -590,11 +590,11 @@ let isCall = 1,
Requires<[IsThumb]>, Sched<[WriteBr]>;
}
-def : ARMPat<(ARMcall GPR:$func), (tBLXr $func)>,
- Requires<[IsThumb, HasV5T, NoSLSBLRMitigation]>;
-def : ARMPat<(ARMcall GPRnoip:$func), (tBLXr_noip $func)>,
- Requires<[IsThumb, HasV5T, SLSBLRMitigation]>;
-
+def : ARMPat<(ARMcall GPR:$func), (tBLXr $func)>,
+ Requires<[IsThumb, HasV5T, NoSLSBLRMitigation]>;
+def : ARMPat<(ARMcall GPRnoip:$func), (tBLXr_noip $func)>,
+ Requires<[IsThumb, HasV5T, SLSBLRMitigation]>;
+
let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
let isPredicable = 1 in
def tB : T1pI<(outs), (ins t_brtarget:$target), IIC_Br,
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMInstrThumb2.td b/contrib/libs/llvm12/lib/Target/ARM/ARMInstrThumb2.td
index 5642cab32e..b79212a48b 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMInstrThumb2.td
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMInstrThumb2.td
@@ -1724,7 +1724,7 @@ def t2STRH_preidx: t2PseudoInst<(outs GPRnopc:$Rn_wb),
// only.
// Ref: A8.6.193 STR (immediate, Thumb) Encoding T4
class T2IstT<bits<2> type, string opc, InstrItinClass ii>
- : T2Ii8<(outs), (ins rGPR:$Rt, t2addrmode_imm8:$addr), ii, opc,
+ : T2Ii8<(outs), (ins rGPR:$Rt, t2addrmode_imm8:$addr), ii, opc,
"\t$Rt, $addr", []>, Sched<[WriteST]> {
let Inst{31-27} = 0b11111;
let Inst{26-25} = 0b00;
@@ -2623,9 +2623,9 @@ def t2USAT16: T2SatI<(ins imm0_15:$sat_imm, rGPR:$Rn),
let Inst{4} = 0;
}
-def : T2Pat<(ARMssat GPRnopc:$Rn, imm0_31:$imm),
+def : T2Pat<(ARMssat GPRnopc:$Rn, imm0_31:$imm),
(t2SSAT imm0_31:$imm, GPRnopc:$Rn, 0)>;
-def : T2Pat<(ARMusat GPRnopc:$Rn, imm0_31:$imm),
+def : T2Pat<(ARMusat GPRnopc:$Rn, imm0_31:$imm),
(t2USAT imm0_31:$imm, GPRnopc:$Rn, 0)>;
def : T2Pat<(int_arm_ssat GPR:$a, imm1_32:$pos),
(t2SSAT imm1_32:$pos, GPR:$a, 0)>;
@@ -2635,24 +2635,24 @@ def : T2Pat<(int_arm_ssat16 GPR:$a, imm1_16:$pos),
(t2SSAT16 imm1_16:$pos, GPR:$a)>;
def : T2Pat<(int_arm_usat16 GPR:$a, imm0_15:$pos),
(t2USAT16 imm0_15:$pos, GPR:$a)>;
-def : T2Pat<(int_arm_ssat (shl GPRnopc:$a, imm0_31:$shft), imm1_32:$pos),
- (t2SSAT imm1_32:$pos, GPRnopc:$a, imm0_31:$shft)>;
-def : T2Pat<(int_arm_ssat (sra GPRnopc:$a, asr_imm:$shft), imm1_32:$pos),
- (t2SSAT imm1_32:$pos, GPRnopc:$a, asr_imm:$shft)>;
-def : T2Pat<(int_arm_usat (shl GPRnopc:$a, imm0_31:$shft), imm0_31:$pos),
- (t2USAT imm0_31:$pos, GPRnopc:$a, imm0_31:$shft)>;
-def : T2Pat<(int_arm_usat (sra GPRnopc:$a, asr_imm:$shft), imm0_31:$pos),
- (t2USAT imm0_31:$pos, GPRnopc:$a, asr_imm:$shft)>;
-def : T2Pat<(ARMssat (shl GPRnopc:$a, imm0_31:$shft), imm0_31:$pos),
- (t2SSAT imm0_31:$pos, GPRnopc:$a, imm0_31:$shft)>;
-def : T2Pat<(ARMssat (sra GPRnopc:$Rn, asr_imm:$shft), imm0_31:$pos),
- (t2SSAT imm0_31:$pos, GPRnopc:$Rn, asr_imm:$shft)>;
-def : T2Pat<(ARMusat (shl GPRnopc:$a, imm0_31:$shft), imm0_31:$pos),
- (t2USAT imm0_31:$pos, GPRnopc:$a, imm0_31:$shft)>;
-def : T2Pat<(ARMusat (sra GPRnopc:$Rn, asr_imm:$shft), imm0_31:$pos),
- (t2USAT imm0_31:$pos, GPRnopc:$Rn, asr_imm:$shft)>;
-
-
+def : T2Pat<(int_arm_ssat (shl GPRnopc:$a, imm0_31:$shft), imm1_32:$pos),
+ (t2SSAT imm1_32:$pos, GPRnopc:$a, imm0_31:$shft)>;
+def : T2Pat<(int_arm_ssat (sra GPRnopc:$a, asr_imm:$shft), imm1_32:$pos),
+ (t2SSAT imm1_32:$pos, GPRnopc:$a, asr_imm:$shft)>;
+def : T2Pat<(int_arm_usat (shl GPRnopc:$a, imm0_31:$shft), imm0_31:$pos),
+ (t2USAT imm0_31:$pos, GPRnopc:$a, imm0_31:$shft)>;
+def : T2Pat<(int_arm_usat (sra GPRnopc:$a, asr_imm:$shft), imm0_31:$pos),
+ (t2USAT imm0_31:$pos, GPRnopc:$a, asr_imm:$shft)>;
+def : T2Pat<(ARMssat (shl GPRnopc:$a, imm0_31:$shft), imm0_31:$pos),
+ (t2SSAT imm0_31:$pos, GPRnopc:$a, imm0_31:$shft)>;
+def : T2Pat<(ARMssat (sra GPRnopc:$Rn, asr_imm:$shft), imm0_31:$pos),
+ (t2SSAT imm0_31:$pos, GPRnopc:$Rn, asr_imm:$shft)>;
+def : T2Pat<(ARMusat (shl GPRnopc:$a, imm0_31:$shft), imm0_31:$pos),
+ (t2USAT imm0_31:$pos, GPRnopc:$a, imm0_31:$shft)>;
+def : T2Pat<(ARMusat (sra GPRnopc:$Rn, asr_imm:$shft), imm0_31:$pos),
+ (t2USAT imm0_31:$pos, GPRnopc:$Rn, asr_imm:$shft)>;
+
+
//===----------------------------------------------------------------------===//
// Shift and rotate Instructions.
//
@@ -4935,15 +4935,15 @@ def : InstAlias<"pssbb", (t2DSB 0x4, 14, 0), 1>, Requires<[HasDB, IsThumb2]>;
// Armv8-R 'Data Full Barrier'
def : InstAlias<"dfb${p}", (t2DSB 0xc, pred:$p), 1>, Requires<[HasDFB]>;
-// SpeculationBarrierEndBB must only be used after an unconditional control
-// flow, i.e. after a terminator for which isBarrier is True.
-let hasSideEffects = 1, isCodeGenOnly = 1, isTerminator = 1, isBarrier = 1 in {
- def t2SpeculationBarrierISBDSBEndBB
- : PseudoInst<(outs), (ins), NoItinerary, []>, Sched<[]>;
- def t2SpeculationBarrierSBEndBB
- : PseudoInst<(outs), (ins), NoItinerary, []>, Sched<[]>;
-}
-
+// SpeculationBarrierEndBB must only be used after an unconditional control
+// flow, i.e. after a terminator for which isBarrier is True.
+let hasSideEffects = 1, isCodeGenOnly = 1, isTerminator = 1, isBarrier = 1 in {
+ def t2SpeculationBarrierISBDSBEndBB
+ : PseudoInst<(outs), (ins), NoItinerary, []>, Sched<[]>;
+ def t2SpeculationBarrierSBEndBB
+ : PseudoInst<(outs), (ins), NoItinerary, []>, Sched<[]>;
+}
+
// Alias for LDR, LDRB, LDRH, LDRSB, and LDRSH without the ".w" optional
// width specifier.
def : t2InstAlias<"ldr${p} $Rt, $addr",
@@ -5429,17 +5429,17 @@ def t2LE : t2LOL<(outs ), (ins lelabel_u11:$label), "le", "$label"> {
let isTerminator = 1;
}
-let Predicates = [IsThumb2, HasV8_1MMainline, HasLOB] in {
-
-let usesCustomInserter = 1 in
+let Predicates = [IsThumb2, HasV8_1MMainline, HasLOB] in {
+
+let usesCustomInserter = 1 in
def t2DoLoopStart :
- t2PseudoInst<(outs GPRlr:$X), (ins rGPR:$elts), 4, IIC_Br,
- [(set GPRlr:$X, (int_start_loop_iterations rGPR:$elts))]>;
-
-let isTerminator = 1, hasSideEffects = 1 in
-def t2DoLoopStartTP :
- t2PseudoInst<(outs GPRlr:$X), (ins rGPR:$elts, rGPR:$count), 4, IIC_Br, []>;
+ t2PseudoInst<(outs GPRlr:$X), (ins rGPR:$elts), 4, IIC_Br,
+ [(set GPRlr:$X, (int_start_loop_iterations rGPR:$elts))]>;
+let isTerminator = 1, hasSideEffects = 1 in
+def t2DoLoopStartTP :
+ t2PseudoInst<(outs GPRlr:$X), (ins rGPR:$elts, rGPR:$count), 4, IIC_Br, []>;
+
let hasSideEffects = 0 in
def t2LoopDec :
t2PseudoInst<(outs GPRlr:$Rm), (ins GPRlr:$Rn, imm0_7:$size),
@@ -5458,14 +5458,14 @@ def t2LoopEnd :
t2PseudoInst<(outs), (ins GPRlr:$elts, brtarget:$target),
8, IIC_Br, []>, Sched<[WriteBr]>;
-def t2LoopEndDec :
- t2PseudoInst<(outs GPRlr:$Rm), (ins GPRlr:$elts, brtarget:$target),
- 8, IIC_Br, []>, Sched<[WriteBr]>;
-
+def t2LoopEndDec :
+ t2PseudoInst<(outs GPRlr:$Rm), (ins GPRlr:$elts, brtarget:$target),
+ 8, IIC_Br, []>, Sched<[WriteBr]>;
+
} // end isBranch, isTerminator, hasSideEffects
-}
-
+}
+
} // end isNotDuplicable
class CS<string iname, bits<4> opcode, list<dag> pattern=[]>
@@ -5484,7 +5484,7 @@ class CS<string iname, bits<4> opcode, list<dag> pattern=[]>
let Inst{3-0} = Rm{3-0};
let Uses = [CPSR];
- let hasSideEffects = 0;
+ let hasSideEffects = 0;
}
def t2CSEL : CS<"csel", 0b1000>;
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMInstrVFP.td b/contrib/libs/llvm12/lib/Target/ARM/ARMInstrVFP.td
index 2be58d7a0e..9034b35ded 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMInstrVFP.td
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMInstrVFP.td
@@ -54,16 +54,16 @@ def vfp_f16imm : Operand<f16>,
let ParserMatchClass = FPImmOperand;
}
-def vfp_f32f16imm_xform : SDNodeXForm<fpimm, [{
- APFloat InVal = N->getValueAPF();
- uint32_t enc = ARM_AM::getFP32FP16Imm(InVal);
- return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32);
- }]>;
-
-def vfp_f32f16imm : PatLeaf<(f32 fpimm), [{
- return ARM_AM::getFP32FP16Imm(N->getValueAPF()) != -1;
- }], vfp_f32f16imm_xform>;
-
+def vfp_f32f16imm_xform : SDNodeXForm<fpimm, [{
+ APFloat InVal = N->getValueAPF();
+ uint32_t enc = ARM_AM::getFP32FP16Imm(InVal);
+ return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32);
+ }]>;
+
+def vfp_f32f16imm : PatLeaf<(f32 fpimm), [{
+ return ARM_AM::getFP32FP16Imm(N->getValueAPF()) != -1;
+ }], vfp_f32f16imm_xform>;
+
def vfp_f32imm_xform : SDNodeXForm<fpimm, [{
APFloat InVal = N->getValueAPF();
uint32_t enc = ARM_AM::getFP32Imm(InVal);
@@ -1561,8 +1561,8 @@ class AVConv1InsS_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
let Inst{5} = Sm{0};
let Inst{15-12} = Sd{4-1};
let Inst{22} = Sd{0};
-
- let hasSideEffects = 0;
+
+ let hasSideEffects = 0;
}
class AVConv1IsH_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
@@ -2626,11 +2626,11 @@ def FCONSTH : VFPAI<(outs HPR:$Sd), (ins vfp_f16imm:$imm),
}
}
-def : Pat<(f32 (vfp_f32f16imm:$imm)),
- (f32 (COPY_TO_REGCLASS (f16 (FCONSTH (vfp_f32f16imm_xform (f32 $imm)))), SPR))> {
- let Predicates = [HasFullFP16];
-}
-
+def : Pat<(f32 (vfp_f32f16imm:$imm)),
+ (f32 (COPY_TO_REGCLASS (f16 (FCONSTH (vfp_f32f16imm_xform (f32 $imm)))), SPR))> {
+ let Predicates = [HasFullFP16];
+}
+
//===----------------------------------------------------------------------===//
// Assembler aliases.
//
@@ -2846,12 +2846,12 @@ let Predicates = [HasV8_1MMainline, HasMVEInt] in {
}
defm VSTR_P0 : vfp_vstrldr_sysreg<0b0,0b1101, "p0",
(outs), (ins VCCR:$P0)>;
-
- let Defs = [VPR] in {
- defm VLDR_VPR : vfp_vstrldr_sysreg<0b1,0b1100, "vpr">;
- }
- defm VLDR_P0 : vfp_vstrldr_sysreg<0b1,0b1101, "p0",
- (outs VCCR:$P0), (ins)>;
+
+ let Defs = [VPR] in {
+ defm VLDR_VPR : vfp_vstrldr_sysreg<0b1,0b1100, "vpr">;
+ }
+ defm VLDR_P0 : vfp_vstrldr_sysreg<0b1,0b1101, "p0",
+ (outs VCCR:$P0), (ins)>;
}
let Uses = [FPSCR] in {
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMLegalizerInfo.cpp b/contrib/libs/llvm12/lib/Target/ARM/ARMLegalizerInfo.cpp
index d9b60f4c4e..92b7dd5047 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMLegalizerInfo.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMLegalizerInfo.cpp
@@ -88,7 +88,7 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
getActionDefinitionsBuilder({G_MUL, G_AND, G_OR, G_XOR})
.legalFor({s32})
- .clampScalar(0, s32, s32);
+ .clampScalar(0, s32, s32);
if (ST.hasNEON())
getActionDefinitionsBuilder({G_ADD, G_SUB})
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/contrib/libs/llvm12/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index aa1fe4e4ff..e264726f91 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -1268,7 +1268,7 @@ findIncDecAfter(MachineBasicBlock::iterator MBBI, Register Reg,
bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
// Thumb1 is already using updating loads/stores.
if (isThumb1) return false;
- LLVM_DEBUG(dbgs() << "Attempting to merge update of: " << *MI);
+ LLVM_DEBUG(dbgs() << "Attempting to merge update of: " << *MI);
const MachineOperand &BaseOP = MI->getOperand(0);
Register Base = BaseOP.getReg();
@@ -1320,10 +1320,10 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
return false;
}
}
- if (MergeInstr != MBB.end()) {
- LLVM_DEBUG(dbgs() << " Erasing old increment: " << *MergeInstr);
+ if (MergeInstr != MBB.end()) {
+ LLVM_DEBUG(dbgs() << " Erasing old increment: " << *MergeInstr);
MBB.erase(MergeInstr);
- }
+ }
unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
@@ -1338,7 +1338,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
// Transfer memoperands.
MIB.setMemRefs(MI->memoperands());
- LLVM_DEBUG(dbgs() << " Added new load/store: " << *MIB);
+ LLVM_DEBUG(dbgs() << " Added new load/store: " << *MIB);
MBB.erase(MBBI);
return true;
}
@@ -1386,27 +1386,27 @@ static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
case ARM::t2LDRi8:
case ARM::t2LDRi12:
return ARM::t2LDR_POST;
- case ARM::t2LDRBi8:
- case ARM::t2LDRBi12:
- return ARM::t2LDRB_POST;
- case ARM::t2LDRSBi8:
- case ARM::t2LDRSBi12:
- return ARM::t2LDRSB_POST;
- case ARM::t2LDRHi8:
- case ARM::t2LDRHi12:
- return ARM::t2LDRH_POST;
- case ARM::t2LDRSHi8:
- case ARM::t2LDRSHi12:
- return ARM::t2LDRSH_POST;
+ case ARM::t2LDRBi8:
+ case ARM::t2LDRBi12:
+ return ARM::t2LDRB_POST;
+ case ARM::t2LDRSBi8:
+ case ARM::t2LDRSBi12:
+ return ARM::t2LDRSB_POST;
+ case ARM::t2LDRHi8:
+ case ARM::t2LDRHi12:
+ return ARM::t2LDRH_POST;
+ case ARM::t2LDRSHi8:
+ case ARM::t2LDRSHi12:
+ return ARM::t2LDRSH_POST;
case ARM::t2STRi8:
case ARM::t2STRi12:
return ARM::t2STR_POST;
- case ARM::t2STRBi8:
- case ARM::t2STRBi12:
- return ARM::t2STRB_POST;
- case ARM::t2STRHi8:
- case ARM::t2STRHi12:
- return ARM::t2STRH_POST;
+ case ARM::t2STRBi8:
+ case ARM::t2STRBi12:
+ return ARM::t2STRB_POST;
+ case ARM::t2STRHi8:
+ case ARM::t2STRHi12:
+ return ARM::t2STRH_POST;
case ARM::MVE_VLDRBS16:
return ARM::MVE_VLDRBS16_post;
@@ -1449,7 +1449,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
// Thumb1 doesn't have updating LDR/STR.
// FIXME: Use LDM/STM with single register instead.
if (isThumb1) return false;
- LLVM_DEBUG(dbgs() << "Attempting to merge update of: " << *MI);
+ LLVM_DEBUG(dbgs() << "Attempting to merge update of: " << *MI);
Register Base = getLoadStoreBaseOp(*MI).getReg();
bool BaseKill = getLoadStoreBaseOp(*MI).isKill();
@@ -1491,7 +1491,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
} else
return false;
}
- LLVM_DEBUG(dbgs() << " Erasing old increment: " << *MergeInstr);
+ LLVM_DEBUG(dbgs() << " Erasing old increment: " << *MergeInstr);
MBB.erase(MergeInstr);
ARM_AM::AddrOpc AddSub = Offset < 0 ? ARM_AM::sub : ARM_AM::add;
@@ -1503,54 +1503,54 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
// updating load/store-multiple instructions can be used with only one
// register.)
MachineOperand &MO = MI->getOperand(0);
- auto MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
- .addReg(Base, getDefRegState(true)) // WB base register
- .addReg(Base, getKillRegState(isLd ? BaseKill : false))
- .addImm(Pred)
- .addReg(PredReg)
- .addReg(MO.getReg(), (isLd ? getDefRegState(true)
- : getKillRegState(MO.isKill())))
- .cloneMemRefs(*MI);
- (void)MIB;
- LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
+ auto MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
+ .addReg(Base, getDefRegState(true)) // WB base register
+ .addReg(Base, getKillRegState(isLd ? BaseKill : false))
+ .addImm(Pred)
+ .addReg(PredReg)
+ .addReg(MO.getReg(), (isLd ? getDefRegState(true)
+ : getKillRegState(MO.isKill())))
+ .cloneMemRefs(*MI);
+ (void)MIB;
+ LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
} else if (isLd) {
if (isAM2) {
// LDR_PRE, LDR_POST
if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
- auto MIB =
- BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
- .addReg(Base, RegState::Define)
- .addReg(Base)
- .addImm(Offset)
- .addImm(Pred)
- .addReg(PredReg)
- .cloneMemRefs(*MI);
- (void)MIB;
- LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
+ auto MIB =
+ BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
+ .addReg(Base, RegState::Define)
+ .addReg(Base)
+ .addImm(Offset)
+ .addImm(Pred)
+ .addReg(PredReg)
+ .cloneMemRefs(*MI);
+ (void)MIB;
+ LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
} else {
int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
- auto MIB =
- BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
- .addReg(Base, RegState::Define)
- .addReg(Base)
- .addReg(0)
- .addImm(Imm)
- .add(predOps(Pred, PredReg))
- .cloneMemRefs(*MI);
- (void)MIB;
- LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
+ auto MIB =
+ BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
+ .addReg(Base, RegState::Define)
+ .addReg(Base)
+ .addReg(0)
+ .addImm(Imm)
+ .add(predOps(Pred, PredReg))
+ .cloneMemRefs(*MI);
+ (void)MIB;
+ LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
}
} else {
// t2LDR_PRE, t2LDR_POST
- auto MIB =
- BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
- .addReg(Base, RegState::Define)
- .addReg(Base)
- .addImm(Offset)
- .add(predOps(Pred, PredReg))
- .cloneMemRefs(*MI);
- (void)MIB;
- LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
+ auto MIB =
+ BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
+ .addReg(Base, RegState::Define)
+ .addReg(Base)
+ .addImm(Offset)
+ .add(predOps(Pred, PredReg))
+ .cloneMemRefs(*MI);
+ (void)MIB;
+ LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
}
} else {
MachineOperand &MO = MI->getOperand(0);
@@ -1560,25 +1560,25 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
// STR_PRE, STR_POST
- auto MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
- .addReg(MO.getReg(), getKillRegState(MO.isKill()))
- .addReg(Base)
- .addReg(0)
- .addImm(Imm)
- .add(predOps(Pred, PredReg))
- .cloneMemRefs(*MI);
- (void)MIB;
- LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
+ auto MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
+ .addReg(MO.getReg(), getKillRegState(MO.isKill()))
+ .addReg(Base)
+ .addReg(0)
+ .addImm(Imm)
+ .add(predOps(Pred, PredReg))
+ .cloneMemRefs(*MI);
+ (void)MIB;
+ LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
} else {
// t2STR_PRE, t2STR_POST
- auto MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
- .addReg(MO.getReg(), getKillRegState(MO.isKill()))
- .addReg(Base)
- .addImm(Offset)
- .add(predOps(Pred, PredReg))
- .cloneMemRefs(*MI);
- (void)MIB;
- LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
+ auto MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
+ .addReg(MO.getReg(), getKillRegState(MO.isKill()))
+ .addReg(Base)
+ .addImm(Offset)
+ .add(predOps(Pred, PredReg))
+ .cloneMemRefs(*MI);
+ (void)MIB;
+ LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
}
}
MBB.erase(MBBI);
@@ -1592,7 +1592,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
"Must have t2STRDi8 or t2LDRDi8");
if (MI.getOperand(3).getImm() != 0)
return false;
- LLVM_DEBUG(dbgs() << "Attempting to merge update of: " << MI);
+ LLVM_DEBUG(dbgs() << "Attempting to merge update of: " << MI);
// Behaviour for writeback is undefined if base register is the same as one
// of the others.
@@ -1620,7 +1620,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
} else
return false;
}
- LLVM_DEBUG(dbgs() << " Erasing old increment: " << *MergeInstr);
+ LLVM_DEBUG(dbgs() << " Erasing old increment: " << *MergeInstr);
MBB.erase(MergeInstr);
DebugLoc DL = MI.getDebugLoc();
@@ -1642,7 +1642,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
MIB.add(MO);
MIB.cloneMemRefs(MI);
- LLVM_DEBUG(dbgs() << " Added new load/store: " << *MIB);
+ LLVM_DEBUG(dbgs() << " Added new load/store: " << *MIB);
MBB.erase(MBBI);
return true;
}
@@ -2585,169 +2585,169 @@ static int getBaseOperandIndex(MachineInstr &MI) {
case ARM::MVE_VSTRBU8:
case ARM::MVE_VSTRHU16:
case ARM::MVE_VSTRWU32:
- case ARM::t2LDRHi8:
- case ARM::t2LDRHi12:
- case ARM::t2LDRSHi8:
- case ARM::t2LDRSHi12:
- case ARM::t2LDRBi8:
- case ARM::t2LDRBi12:
- case ARM::t2LDRSBi8:
- case ARM::t2LDRSBi12:
- case ARM::t2STRBi8:
- case ARM::t2STRBi12:
- case ARM::t2STRHi8:
- case ARM::t2STRHi12:
+ case ARM::t2LDRHi8:
+ case ARM::t2LDRHi12:
+ case ARM::t2LDRSHi8:
+ case ARM::t2LDRSHi12:
+ case ARM::t2LDRBi8:
+ case ARM::t2LDRBi12:
+ case ARM::t2LDRSBi8:
+ case ARM::t2LDRSBi12:
+ case ARM::t2STRBi8:
+ case ARM::t2STRBi12:
+ case ARM::t2STRHi8:
+ case ARM::t2STRHi12:
return 1;
- case ARM::MVE_VLDRBS16_post:
- case ARM::MVE_VLDRBS32_post:
- case ARM::MVE_VLDRBU16_post:
- case ARM::MVE_VLDRBU32_post:
- case ARM::MVE_VLDRHS32_post:
- case ARM::MVE_VLDRHU32_post:
- case ARM::MVE_VLDRBU8_post:
- case ARM::MVE_VLDRHU16_post:
- case ARM::MVE_VLDRWU32_post:
- case ARM::MVE_VSTRB16_post:
- case ARM::MVE_VSTRB32_post:
- case ARM::MVE_VSTRH32_post:
- case ARM::MVE_VSTRBU8_post:
- case ARM::MVE_VSTRHU16_post:
- case ARM::MVE_VSTRWU32_post:
- case ARM::MVE_VLDRBS16_pre:
- case ARM::MVE_VLDRBS32_pre:
- case ARM::MVE_VLDRBU16_pre:
- case ARM::MVE_VLDRBU32_pre:
- case ARM::MVE_VLDRHS32_pre:
- case ARM::MVE_VLDRHU32_pre:
- case ARM::MVE_VLDRBU8_pre:
- case ARM::MVE_VLDRHU16_pre:
- case ARM::MVE_VLDRWU32_pre:
- case ARM::MVE_VSTRB16_pre:
- case ARM::MVE_VSTRB32_pre:
- case ARM::MVE_VSTRH32_pre:
- case ARM::MVE_VSTRBU8_pre:
- case ARM::MVE_VSTRHU16_pre:
- case ARM::MVE_VSTRWU32_pre:
- return 2;
+ case ARM::MVE_VLDRBS16_post:
+ case ARM::MVE_VLDRBS32_post:
+ case ARM::MVE_VLDRBU16_post:
+ case ARM::MVE_VLDRBU32_post:
+ case ARM::MVE_VLDRHS32_post:
+ case ARM::MVE_VLDRHU32_post:
+ case ARM::MVE_VLDRBU8_post:
+ case ARM::MVE_VLDRHU16_post:
+ case ARM::MVE_VLDRWU32_post:
+ case ARM::MVE_VSTRB16_post:
+ case ARM::MVE_VSTRB32_post:
+ case ARM::MVE_VSTRH32_post:
+ case ARM::MVE_VSTRBU8_post:
+ case ARM::MVE_VSTRHU16_post:
+ case ARM::MVE_VSTRWU32_post:
+ case ARM::MVE_VLDRBS16_pre:
+ case ARM::MVE_VLDRBS32_pre:
+ case ARM::MVE_VLDRBU16_pre:
+ case ARM::MVE_VLDRBU32_pre:
+ case ARM::MVE_VLDRHS32_pre:
+ case ARM::MVE_VLDRHU32_pre:
+ case ARM::MVE_VLDRBU8_pre:
+ case ARM::MVE_VLDRHU16_pre:
+ case ARM::MVE_VLDRWU32_pre:
+ case ARM::MVE_VSTRB16_pre:
+ case ARM::MVE_VSTRB32_pre:
+ case ARM::MVE_VSTRH32_pre:
+ case ARM::MVE_VSTRBU8_pre:
+ case ARM::MVE_VSTRHU16_pre:
+ case ARM::MVE_VSTRWU32_pre:
+ return 2;
}
return -1;
}
-static bool isPostIndex(MachineInstr &MI) {
- switch (MI.getOpcode()) {
- case ARM::MVE_VLDRBS16_post:
- case ARM::MVE_VLDRBS32_post:
- case ARM::MVE_VLDRBU16_post:
- case ARM::MVE_VLDRBU32_post:
- case ARM::MVE_VLDRHS32_post:
- case ARM::MVE_VLDRHU32_post:
- case ARM::MVE_VLDRBU8_post:
- case ARM::MVE_VLDRHU16_post:
- case ARM::MVE_VLDRWU32_post:
- case ARM::MVE_VSTRB16_post:
- case ARM::MVE_VSTRB32_post:
- case ARM::MVE_VSTRH32_post:
- case ARM::MVE_VSTRBU8_post:
- case ARM::MVE_VSTRHU16_post:
- case ARM::MVE_VSTRWU32_post:
- return true;
- }
- return false;
-}
-
-static bool isPreIndex(MachineInstr &MI) {
- switch (MI.getOpcode()) {
- case ARM::MVE_VLDRBS16_pre:
- case ARM::MVE_VLDRBS32_pre:
- case ARM::MVE_VLDRBU16_pre:
- case ARM::MVE_VLDRBU32_pre:
- case ARM::MVE_VLDRHS32_pre:
- case ARM::MVE_VLDRHU32_pre:
- case ARM::MVE_VLDRBU8_pre:
- case ARM::MVE_VLDRHU16_pre:
- case ARM::MVE_VLDRWU32_pre:
- case ARM::MVE_VSTRB16_pre:
- case ARM::MVE_VSTRB32_pre:
- case ARM::MVE_VSTRH32_pre:
- case ARM::MVE_VSTRBU8_pre:
- case ARM::MVE_VSTRHU16_pre:
- case ARM::MVE_VSTRWU32_pre:
- return true;
- }
- return false;
-}
-
-// Given a memory access Opcode, check that the give Imm would be a valid Offset
-// for this instruction (same as isLegalAddressImm), Or if the instruction
-// could be easily converted to one where that was valid. For example converting
-// t2LDRi12 to t2LDRi8 for negative offsets. Works in conjunction with
-// AdjustBaseAndOffset below.
-static bool isLegalOrConvertableAddressImm(unsigned Opcode, int Imm,
- const TargetInstrInfo *TII,
- int &CodesizeEstimate) {
- if (isLegalAddressImm(Opcode, Imm, TII))
- return true;
-
- // We can convert AddrModeT2_i12 to AddrModeT2_i8.
- const MCInstrDesc &Desc = TII->get(Opcode);
- unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
- switch (AddrMode) {
- case ARMII::AddrModeT2_i12:
- CodesizeEstimate += 1;
- return std::abs(Imm) < (((1 << 8) * 1) - 1);
- }
- return false;
-}
-
-// Given an MI adjust its address BaseReg to use NewBaseReg and address offset
-// by -Offset. This can either happen in-place or be a replacement as MI is
-// converted to another instruction type.
-static void AdjustBaseAndOffset(MachineInstr *MI, Register NewBaseReg,
- int Offset, const TargetInstrInfo *TII) {
- unsigned BaseOp = getBaseOperandIndex(*MI);
- MI->getOperand(BaseOp).setReg(NewBaseReg);
- int OldOffset = MI->getOperand(BaseOp + 1).getImm();
- if (isLegalAddressImm(MI->getOpcode(), OldOffset - Offset, TII))
- MI->getOperand(BaseOp + 1).setImm(OldOffset - Offset);
- else {
- unsigned ConvOpcode;
- switch (MI->getOpcode()) {
- case ARM::t2LDRHi12:
- ConvOpcode = ARM::t2LDRHi8;
- break;
- case ARM::t2LDRSHi12:
- ConvOpcode = ARM::t2LDRSHi8;
- break;
- case ARM::t2LDRBi12:
- ConvOpcode = ARM::t2LDRBi8;
- break;
- case ARM::t2LDRSBi12:
- ConvOpcode = ARM::t2LDRSBi8;
- break;
- case ARM::t2STRHi12:
- ConvOpcode = ARM::t2STRHi8;
- break;
- case ARM::t2STRBi12:
- ConvOpcode = ARM::t2STRBi8;
- break;
- default:
- llvm_unreachable("Unhandled convertable opcode");
- }
- assert(isLegalAddressImm(ConvOpcode, OldOffset - Offset, TII) &&
- "Illegal Address Immediate after convert!");
-
- const MCInstrDesc &MCID = TII->get(ConvOpcode);
- BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
- .add(MI->getOperand(0))
- .add(MI->getOperand(1))
- .addImm(OldOffset - Offset)
- .add(MI->getOperand(3))
- .add(MI->getOperand(4))
- .cloneMemRefs(*MI);
- MI->eraseFromParent();
- }
-}
-
+static bool isPostIndex(MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ case ARM::MVE_VLDRBS16_post:
+ case ARM::MVE_VLDRBS32_post:
+ case ARM::MVE_VLDRBU16_post:
+ case ARM::MVE_VLDRBU32_post:
+ case ARM::MVE_VLDRHS32_post:
+ case ARM::MVE_VLDRHU32_post:
+ case ARM::MVE_VLDRBU8_post:
+ case ARM::MVE_VLDRHU16_post:
+ case ARM::MVE_VLDRWU32_post:
+ case ARM::MVE_VSTRB16_post:
+ case ARM::MVE_VSTRB32_post:
+ case ARM::MVE_VSTRH32_post:
+ case ARM::MVE_VSTRBU8_post:
+ case ARM::MVE_VSTRHU16_post:
+ case ARM::MVE_VSTRWU32_post:
+ return true;
+ }
+ return false;
+}
+
+static bool isPreIndex(MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ case ARM::MVE_VLDRBS16_pre:
+ case ARM::MVE_VLDRBS32_pre:
+ case ARM::MVE_VLDRBU16_pre:
+ case ARM::MVE_VLDRBU32_pre:
+ case ARM::MVE_VLDRHS32_pre:
+ case ARM::MVE_VLDRHU32_pre:
+ case ARM::MVE_VLDRBU8_pre:
+ case ARM::MVE_VLDRHU16_pre:
+ case ARM::MVE_VLDRWU32_pre:
+ case ARM::MVE_VSTRB16_pre:
+ case ARM::MVE_VSTRB32_pre:
+ case ARM::MVE_VSTRH32_pre:
+ case ARM::MVE_VSTRBU8_pre:
+ case ARM::MVE_VSTRHU16_pre:
+ case ARM::MVE_VSTRWU32_pre:
+ return true;
+ }
+ return false;
+}
+
+// Given a memory access Opcode, check that the give Imm would be a valid Offset
+// for this instruction (same as isLegalAddressImm), Or if the instruction
+// could be easily converted to one where that was valid. For example converting
+// t2LDRi12 to t2LDRi8 for negative offsets. Works in conjunction with
+// AdjustBaseAndOffset below.
+static bool isLegalOrConvertableAddressImm(unsigned Opcode, int Imm,
+ const TargetInstrInfo *TII,
+ int &CodesizeEstimate) {
+ if (isLegalAddressImm(Opcode, Imm, TII))
+ return true;
+
+ // We can convert AddrModeT2_i12 to AddrModeT2_i8.
+ const MCInstrDesc &Desc = TII->get(Opcode);
+ unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+ switch (AddrMode) {
+ case ARMII::AddrModeT2_i12:
+ CodesizeEstimate += 1;
+ return std::abs(Imm) < (((1 << 8) * 1) - 1);
+ }
+ return false;
+}
+
+// Given an MI adjust its address BaseReg to use NewBaseReg and address offset
+// by -Offset. This can either happen in-place or be a replacement as MI is
+// converted to another instruction type.
+static void AdjustBaseAndOffset(MachineInstr *MI, Register NewBaseReg,
+ int Offset, const TargetInstrInfo *TII) {
+ unsigned BaseOp = getBaseOperandIndex(*MI);
+ MI->getOperand(BaseOp).setReg(NewBaseReg);
+ int OldOffset = MI->getOperand(BaseOp + 1).getImm();
+ if (isLegalAddressImm(MI->getOpcode(), OldOffset - Offset, TII))
+ MI->getOperand(BaseOp + 1).setImm(OldOffset - Offset);
+ else {
+ unsigned ConvOpcode;
+ switch (MI->getOpcode()) {
+ case ARM::t2LDRHi12:
+ ConvOpcode = ARM::t2LDRHi8;
+ break;
+ case ARM::t2LDRSHi12:
+ ConvOpcode = ARM::t2LDRSHi8;
+ break;
+ case ARM::t2LDRBi12:
+ ConvOpcode = ARM::t2LDRBi8;
+ break;
+ case ARM::t2LDRSBi12:
+ ConvOpcode = ARM::t2LDRSBi8;
+ break;
+ case ARM::t2STRHi12:
+ ConvOpcode = ARM::t2STRHi8;
+ break;
+ case ARM::t2STRBi12:
+ ConvOpcode = ARM::t2STRBi8;
+ break;
+ default:
+ llvm_unreachable("Unhandled convertable opcode");
+ }
+ assert(isLegalAddressImm(ConvOpcode, OldOffset - Offset, TII) &&
+ "Illegal Address Immediate after convert!");
+
+ const MCInstrDesc &MCID = TII->get(ConvOpcode);
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
+ .add(MI->getOperand(0))
+ .add(MI->getOperand(1))
+ .addImm(OldOffset - Offset)
+ .add(MI->getOperand(3))
+ .add(MI->getOperand(4))
+ .cloneMemRefs(*MI);
+ MI->eraseFromParent();
+ }
+}
+
static MachineInstr *createPostIncLoadStore(MachineInstr *MI, int Offset,
Register NewReg,
const TargetInstrInfo *TII,
@@ -2766,70 +2766,70 @@ static MachineInstr *createPostIncLoadStore(MachineInstr *MI, int Offset,
TRC = TII->getRegClass(MCID, 2, TRI, *MF);
MRI.constrainRegClass(MI->getOperand(1).getReg(), TRC);
- unsigned AddrMode = (MCID.TSFlags & ARMII::AddrModeMask);
- switch (AddrMode) {
- case ARMII::AddrModeT2_i7:
- case ARMII::AddrModeT2_i7s2:
- case ARMII::AddrModeT2_i7s4:
- // Any MVE load/store
- return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
- .addReg(NewReg, RegState::Define)
- .add(MI->getOperand(0))
- .add(MI->getOperand(1))
- .addImm(Offset)
- .add(MI->getOperand(3))
- .add(MI->getOperand(4))
- .cloneMemRefs(*MI);
- case ARMII::AddrModeT2_i8:
- if (MI->mayLoad()) {
- return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
- .add(MI->getOperand(0))
- .addReg(NewReg, RegState::Define)
- .add(MI->getOperand(1))
- .addImm(Offset)
- .add(MI->getOperand(3))
- .add(MI->getOperand(4))
- .cloneMemRefs(*MI);
- } else {
- return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
- .addReg(NewReg, RegState::Define)
- .add(MI->getOperand(0))
- .add(MI->getOperand(1))
- .addImm(Offset)
- .add(MI->getOperand(3))
- .add(MI->getOperand(4))
- .cloneMemRefs(*MI);
- }
- default:
- llvm_unreachable("Unhandled createPostIncLoadStore");
- }
+ unsigned AddrMode = (MCID.TSFlags & ARMII::AddrModeMask);
+ switch (AddrMode) {
+ case ARMII::AddrModeT2_i7:
+ case ARMII::AddrModeT2_i7s2:
+ case ARMII::AddrModeT2_i7s4:
+ // Any MVE load/store
+ return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
+ .addReg(NewReg, RegState::Define)
+ .add(MI->getOperand(0))
+ .add(MI->getOperand(1))
+ .addImm(Offset)
+ .add(MI->getOperand(3))
+ .add(MI->getOperand(4))
+ .cloneMemRefs(*MI);
+ case ARMII::AddrModeT2_i8:
+ if (MI->mayLoad()) {
+ return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
+ .add(MI->getOperand(0))
+ .addReg(NewReg, RegState::Define)
+ .add(MI->getOperand(1))
+ .addImm(Offset)
+ .add(MI->getOperand(3))
+ .add(MI->getOperand(4))
+ .cloneMemRefs(*MI);
+ } else {
+ return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
+ .addReg(NewReg, RegState::Define)
+ .add(MI->getOperand(0))
+ .add(MI->getOperand(1))
+ .addImm(Offset)
+ .add(MI->getOperand(3))
+ .add(MI->getOperand(4))
+ .cloneMemRefs(*MI);
+ }
+ default:
+ llvm_unreachable("Unhandled createPostIncLoadStore");
+ }
}
// Given a Base Register, optimise the load/store uses to attempt to create more
-// post-inc accesses and less register moves. We do this by taking zero offset
-// loads/stores with an add, and convert them to a postinc load/store of the
-// same type. Any subsequent accesses will be adjusted to use and account for
-// the post-inc value.
+// post-inc accesses and less register moves. We do this by taking zero offset
+// loads/stores with an add, and convert them to a postinc load/store of the
+// same type. Any subsequent accesses will be adjusted to use and account for
+// the post-inc value.
// For example:
// LDR #0 LDR_POSTINC #16
// LDR #4 LDR #-12
// LDR #8 LDR #-8
// LDR #12 LDR #-4
// ADD #16
-//
-// At the same time if we do not find an increment but do find an existing
-// pre/post inc instruction, we can still adjust the offsets of subsequent
-// instructions to save the register move that would otherwise be needed for the
-// in-place increment.
+//
+// At the same time if we do not find an increment but do find an existing
+// pre/post inc instruction, we can still adjust the offsets of subsequent
+// instructions to save the register move that would otherwise be needed for the
+// in-place increment.
bool ARMPreAllocLoadStoreOpt::DistributeIncrements(Register Base) {
// We are looking for:
// One zero offset load/store that can become postinc
MachineInstr *BaseAccess = nullptr;
- MachineInstr *PrePostInc = nullptr;
+ MachineInstr *PrePostInc = nullptr;
// An increment that can be folded in
MachineInstr *Increment = nullptr;
// Other accesses after BaseAccess that will need to be updated to use the
- // postinc value.
+ // postinc value.
SmallPtrSet<MachineInstr *, 8> OtherAccesses;
for (auto &Use : MRI->use_nodbg_instructions(Base)) {
if (!Increment && getAddSubImmediate(Use) != 0) {
@@ -2844,81 +2844,81 @@ bool ARMPreAllocLoadStoreOpt::DistributeIncrements(Register Base) {
if (!Use.getOperand(BaseOp).isReg() ||
Use.getOperand(BaseOp).getReg() != Base)
return false;
- if (isPreIndex(Use) || isPostIndex(Use))
- PrePostInc = &Use;
- else if (Use.getOperand(BaseOp + 1).getImm() == 0)
+ if (isPreIndex(Use) || isPostIndex(Use))
+ PrePostInc = &Use;
+ else if (Use.getOperand(BaseOp + 1).getImm() == 0)
BaseAccess = &Use;
else
OtherAccesses.insert(&Use);
}
- int IncrementOffset;
- Register NewBaseReg;
- if (BaseAccess && Increment) {
- if (PrePostInc || BaseAccess->getParent() != Increment->getParent())
- return false;
- Register PredReg;
- if (Increment->definesRegister(ARM::CPSR) ||
- getInstrPredicate(*Increment, PredReg) != ARMCC::AL)
- return false;
-
- LLVM_DEBUG(dbgs() << "\nAttempting to distribute increments on VirtualReg "
- << Base.virtRegIndex() << "\n");
-
- // Make sure that Increment has no uses before BaseAccess.
- for (MachineInstr &Use :
- MRI->use_nodbg_instructions(Increment->getOperand(0).getReg())) {
- if (!DT->dominates(BaseAccess, &Use) || &Use == BaseAccess) {
- LLVM_DEBUG(dbgs() << " BaseAccess doesn't dominate use of increment\n");
- return false;
- }
- }
-
- // Make sure that Increment can be folded into Base
- IncrementOffset = getAddSubImmediate(*Increment);
- unsigned NewPostIncOpcode = getPostIndexedLoadStoreOpcode(
- BaseAccess->getOpcode(), IncrementOffset > 0 ? ARM_AM::add : ARM_AM::sub);
- if (!isLegalAddressImm(NewPostIncOpcode, IncrementOffset, TII)) {
- LLVM_DEBUG(dbgs() << " Illegal addressing mode immediate on postinc\n");
+ int IncrementOffset;
+ Register NewBaseReg;
+ if (BaseAccess && Increment) {
+ if (PrePostInc || BaseAccess->getParent() != Increment->getParent())
+ return false;
+ Register PredReg;
+ if (Increment->definesRegister(ARM::CPSR) ||
+ getInstrPredicate(*Increment, PredReg) != ARMCC::AL)
+ return false;
+
+ LLVM_DEBUG(dbgs() << "\nAttempting to distribute increments on VirtualReg "
+ << Base.virtRegIndex() << "\n");
+
+ // Make sure that Increment has no uses before BaseAccess.
+ for (MachineInstr &Use :
+ MRI->use_nodbg_instructions(Increment->getOperand(0).getReg())) {
+ if (!DT->dominates(BaseAccess, &Use) || &Use == BaseAccess) {
+ LLVM_DEBUG(dbgs() << " BaseAccess doesn't dominate use of increment\n");
+ return false;
+ }
+ }
+
+ // Make sure that Increment can be folded into Base
+ IncrementOffset = getAddSubImmediate(*Increment);
+ unsigned NewPostIncOpcode = getPostIndexedLoadStoreOpcode(
+ BaseAccess->getOpcode(), IncrementOffset > 0 ? ARM_AM::add : ARM_AM::sub);
+ if (!isLegalAddressImm(NewPostIncOpcode, IncrementOffset, TII)) {
+ LLVM_DEBUG(dbgs() << " Illegal addressing mode immediate on postinc\n");
return false;
}
}
- else if (PrePostInc) {
- // If we already have a pre/post index load/store then set BaseAccess,
- // IncrementOffset and NewBaseReg to the values it already produces,
- // allowing us to update and subsequent uses of BaseOp reg with the
- // incremented value.
- if (Increment)
- return false;
-
- LLVM_DEBUG(dbgs() << "\nAttempting to distribute increments on already "
- << "indexed VirtualReg " << Base.virtRegIndex() << "\n");
- int BaseOp = getBaseOperandIndex(*PrePostInc);
- IncrementOffset = PrePostInc->getOperand(BaseOp+1).getImm();
- BaseAccess = PrePostInc;
- NewBaseReg = PrePostInc->getOperand(0).getReg();
- }
- else
+ else if (PrePostInc) {
+ // If we already have a pre/post index load/store then set BaseAccess,
+ // IncrementOffset and NewBaseReg to the values it already produces,
+ // allowing us to update and subsequent uses of BaseOp reg with the
+ // incremented value.
+ if (Increment)
+ return false;
+
+ LLVM_DEBUG(dbgs() << "\nAttempting to distribute increments on already "
+ << "indexed VirtualReg " << Base.virtRegIndex() << "\n");
+ int BaseOp = getBaseOperandIndex(*PrePostInc);
+ IncrementOffset = PrePostInc->getOperand(BaseOp+1).getImm();
+ BaseAccess = PrePostInc;
+ NewBaseReg = PrePostInc->getOperand(0).getReg();
+ }
+ else
return false;
// And make sure that the negative value of increment can be added to all
// other offsets after the BaseAccess. We rely on either
// dominates(BaseAccess, OtherAccess) or dominates(OtherAccess, BaseAccess)
// to keep things simple.
- // This also adds a simple codesize metric, to detect if an instruction (like
- // t2LDRBi12) which can often be shrunk to a thumb1 instruction (tLDRBi)
- // cannot because it is converted to something else (t2LDRBi8). We start this
- // at -1 for the gain from removing the increment.
+ // This also adds a simple codesize metric, to detect if an instruction (like
+ // t2LDRBi12) which can often be shrunk to a thumb1 instruction (tLDRBi)
+ // cannot because it is converted to something else (t2LDRBi8). We start this
+ // at -1 for the gain from removing the increment.
SmallPtrSet<MachineInstr *, 4> SuccessorAccesses;
- int CodesizeEstimate = -1;
+ int CodesizeEstimate = -1;
for (auto *Use : OtherAccesses) {
if (DT->dominates(BaseAccess, Use)) {
SuccessorAccesses.insert(Use);
unsigned BaseOp = getBaseOperandIndex(*Use);
- if (!isLegalOrConvertableAddressImm(Use->getOpcode(),
- Use->getOperand(BaseOp + 1).getImm() -
- IncrementOffset,
- TII, CodesizeEstimate)) {
+ if (!isLegalOrConvertableAddressImm(Use->getOpcode(),
+ Use->getOperand(BaseOp + 1).getImm() -
+ IncrementOffset,
+ TII, CodesizeEstimate)) {
LLVM_DEBUG(dbgs() << " Illegal addressing mode immediate on use\n");
return false;
}
@@ -2928,27 +2928,27 @@ bool ARMPreAllocLoadStoreOpt::DistributeIncrements(Register Base) {
return false;
}
}
- if (STI->hasMinSize() && CodesizeEstimate > 0) {
- LLVM_DEBUG(dbgs() << " Expected to grow instructions under minsize\n");
- return false;
- }
-
- if (!PrePostInc) {
- // Replace BaseAccess with a post inc
- LLVM_DEBUG(dbgs() << "Changing: "; BaseAccess->dump());
- LLVM_DEBUG(dbgs() << " And : "; Increment->dump());
- NewBaseReg = Increment->getOperand(0).getReg();
- MachineInstr *BaseAccessPost =
- createPostIncLoadStore(BaseAccess, IncrementOffset, NewBaseReg, TII, TRI);
- BaseAccess->eraseFromParent();
- Increment->eraseFromParent();
- (void)BaseAccessPost;
- LLVM_DEBUG(dbgs() << " To : "; BaseAccessPost->dump());
- }
+ if (STI->hasMinSize() && CodesizeEstimate > 0) {
+ LLVM_DEBUG(dbgs() << " Expected to grow instructions under minsize\n");
+ return false;
+ }
+
+ if (!PrePostInc) {
+ // Replace BaseAccess with a post inc
+ LLVM_DEBUG(dbgs() << "Changing: "; BaseAccess->dump());
+ LLVM_DEBUG(dbgs() << " And : "; Increment->dump());
+ NewBaseReg = Increment->getOperand(0).getReg();
+ MachineInstr *BaseAccessPost =
+ createPostIncLoadStore(BaseAccess, IncrementOffset, NewBaseReg, TII, TRI);
+ BaseAccess->eraseFromParent();
+ Increment->eraseFromParent();
+ (void)BaseAccessPost;
+ LLVM_DEBUG(dbgs() << " To : "; BaseAccessPost->dump());
+ }
for (auto *Use : SuccessorAccesses) {
LLVM_DEBUG(dbgs() << "Changing: "; Use->dump());
- AdjustBaseAndOffset(Use, NewBaseReg, IncrementOffset, TII);
+ AdjustBaseAndOffset(Use, NewBaseReg, IncrementOffset, TII);
LLVM_DEBUG(dbgs() << " To : "; Use->dump());
}
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMLowOverheadLoops.cpp b/contrib/libs/llvm12/lib/Target/ARM/ARMLowOverheadLoops.cpp
index 8dc5320584..144e845550 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMLowOverheadLoops.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMLowOverheadLoops.cpp
@@ -56,7 +56,7 @@
#include "ARMBaseRegisterInfo.h"
#include "ARMBasicBlockInfo.h"
#include "ARMSubtarget.h"
-#include "MVETailPredUtils.h"
+#include "MVETailPredUtils.h"
#include "Thumb2InstrInfo.h"
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/SmallSet.h"
@@ -74,37 +74,37 @@ using namespace llvm;
#define DEBUG_TYPE "arm-low-overhead-loops"
#define ARM_LOW_OVERHEAD_LOOPS_NAME "ARM Low Overhead Loops pass"
-static cl::opt<bool>
-DisableTailPredication("arm-loloops-disable-tailpred", cl::Hidden,
- cl::desc("Disable tail-predication in the ARM LowOverheadLoop pass"),
- cl::init(false));
-
-static bool isVectorPredicated(MachineInstr *MI) {
- int PIdx = llvm::findFirstVPTPredOperandIdx(*MI);
- return PIdx != -1 && MI->getOperand(PIdx + 1).getReg() == ARM::VPR;
-}
-
-static bool isVectorPredicate(MachineInstr *MI) {
- return MI->findRegisterDefOperandIdx(ARM::VPR) != -1;
-}
-
-static bool hasVPRUse(MachineInstr &MI) {
- return MI.findRegisterUseOperandIdx(ARM::VPR) != -1;
-}
-
-static bool isDomainMVE(MachineInstr *MI) {
- uint64_t Domain = MI->getDesc().TSFlags & ARMII::DomainMask;
- return Domain == ARMII::DomainMVE;
-}
-
-static bool shouldInspect(MachineInstr &MI) {
- return isDomainMVE(&MI) || isVectorPredicate(&MI) || hasVPRUse(MI);
-}
-
-static bool isDo(MachineInstr *MI) {
- return MI->getOpcode() != ARM::t2WhileLoopStart;
-}
-
+static cl::opt<bool>
+DisableTailPredication("arm-loloops-disable-tailpred", cl::Hidden,
+ cl::desc("Disable tail-predication in the ARM LowOverheadLoop pass"),
+ cl::init(false));
+
+static bool isVectorPredicated(MachineInstr *MI) {
+ int PIdx = llvm::findFirstVPTPredOperandIdx(*MI);
+ return PIdx != -1 && MI->getOperand(PIdx + 1).getReg() == ARM::VPR;
+}
+
+static bool isVectorPredicate(MachineInstr *MI) {
+ return MI->findRegisterDefOperandIdx(ARM::VPR) != -1;
+}
+
+static bool hasVPRUse(MachineInstr &MI) {
+ return MI.findRegisterUseOperandIdx(ARM::VPR) != -1;
+}
+
+static bool isDomainMVE(MachineInstr *MI) {
+ uint64_t Domain = MI->getDesc().TSFlags & ARMII::DomainMask;
+ return Domain == ARMII::DomainMVE;
+}
+
+static bool shouldInspect(MachineInstr &MI) {
+ return isDomainMVE(&MI) || isVectorPredicate(&MI) || hasVPRUse(MI);
+}
+
+static bool isDo(MachineInstr *MI) {
+ return MI->getOpcode() != ARM::t2WhileLoopStart;
+}
+
namespace {
using InstSet = SmallPtrSetImpl<MachineInstr *>;
@@ -143,7 +143,7 @@ namespace {
// Insert exit blocks.
SmallVector<MachineBasicBlock*, 2> ExitBlocks;
ML.getExitBlocks(ExitBlocks);
- append_range(Order, ExitBlocks);
+ append_range(Order, ExitBlocks);
// Then add the loop body.
Search(ML.getHeader());
@@ -174,187 +174,187 @@ namespace {
}
};
- // Represent the current state of the VPR and hold all instances which
- // represent a VPT block, which is a list of instructions that begins with a
- // VPT/VPST and has a maximum of four proceeding instructions. All
- // instructions within the block are predicated upon the vpr and we allow
- // instructions to define the vpr within in the block too.
- class VPTState {
- friend struct LowOverheadLoop;
-
- SmallVector<MachineInstr *, 4> Insts;
-
- static SmallVector<VPTState, 4> Blocks;
- static SetVector<MachineInstr *> CurrentPredicates;
- static std::map<MachineInstr *,
- std::unique_ptr<PredicatedMI>> PredicatedInsts;
-
- static void CreateVPTBlock(MachineInstr *MI) {
- assert((CurrentPredicates.size() || MI->getParent()->isLiveIn(ARM::VPR))
- && "Can't begin VPT without predicate");
- Blocks.emplace_back(MI);
- // The execution of MI is predicated upon the current set of instructions
- // that are AND'ed together to form the VPR predicate value. In the case
- // that MI is a VPT, CurrentPredicates will also just be MI.
- PredicatedInsts.emplace(
- MI, std::make_unique<PredicatedMI>(MI, CurrentPredicates));
- }
-
- static void reset() {
- Blocks.clear();
- PredicatedInsts.clear();
- CurrentPredicates.clear();
- }
-
- static void addInst(MachineInstr *MI) {
- Blocks.back().insert(MI);
- PredicatedInsts.emplace(
- MI, std::make_unique<PredicatedMI>(MI, CurrentPredicates));
- }
-
- static void addPredicate(MachineInstr *MI) {
- LLVM_DEBUG(dbgs() << "ARM Loops: Adding VPT Predicate: " << *MI);
- CurrentPredicates.insert(MI);
+ // Represent the current state of the VPR and hold all instances which
+ // represent a VPT block, which is a list of instructions that begins with a
+ // VPT/VPST and has a maximum of four proceeding instructions. All
+ // instructions within the block are predicated upon the vpr and we allow
+ // instructions to define the vpr within in the block too.
+ class VPTState {
+ friend struct LowOverheadLoop;
+
+ SmallVector<MachineInstr *, 4> Insts;
+
+ static SmallVector<VPTState, 4> Blocks;
+ static SetVector<MachineInstr *> CurrentPredicates;
+ static std::map<MachineInstr *,
+ std::unique_ptr<PredicatedMI>> PredicatedInsts;
+
+ static void CreateVPTBlock(MachineInstr *MI) {
+ assert((CurrentPredicates.size() || MI->getParent()->isLiveIn(ARM::VPR))
+ && "Can't begin VPT without predicate");
+ Blocks.emplace_back(MI);
+ // The execution of MI is predicated upon the current set of instructions
+ // that are AND'ed together to form the VPR predicate value. In the case
+ // that MI is a VPT, CurrentPredicates will also just be MI.
+ PredicatedInsts.emplace(
+ MI, std::make_unique<PredicatedMI>(MI, CurrentPredicates));
}
- static void resetPredicate(MachineInstr *MI) {
- LLVM_DEBUG(dbgs() << "ARM Loops: Resetting VPT Predicate: " << *MI);
- CurrentPredicates.clear();
- CurrentPredicates.insert(MI);
+ static void reset() {
+ Blocks.clear();
+ PredicatedInsts.clear();
+ CurrentPredicates.clear();
}
- public:
+ static void addInst(MachineInstr *MI) {
+ Blocks.back().insert(MI);
+ PredicatedInsts.emplace(
+ MI, std::make_unique<PredicatedMI>(MI, CurrentPredicates));
+ }
+
+ static void addPredicate(MachineInstr *MI) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Adding VPT Predicate: " << *MI);
+ CurrentPredicates.insert(MI);
+ }
+
+ static void resetPredicate(MachineInstr *MI) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Resetting VPT Predicate: " << *MI);
+ CurrentPredicates.clear();
+ CurrentPredicates.insert(MI);
+ }
+
+ public:
// Have we found an instruction within the block which defines the vpr? If
// so, not all the instructions in the block will have the same predicate.
- static bool hasUniformPredicate(VPTState &Block) {
- return getDivergent(Block) == nullptr;
+ static bool hasUniformPredicate(VPTState &Block) {
+ return getDivergent(Block) == nullptr;
}
- // If it exists, return the first internal instruction which modifies the
- // VPR.
- static MachineInstr *getDivergent(VPTState &Block) {
- SmallVectorImpl<MachineInstr *> &Insts = Block.getInsts();
- for (unsigned i = 1; i < Insts.size(); ++i) {
- MachineInstr *Next = Insts[i];
- if (isVectorPredicate(Next))
- return Next; // Found an instruction altering the vpr.
- }
- return nullptr;
- }
-
- // Return whether the given instruction is predicated upon a VCTP.
- static bool isPredicatedOnVCTP(MachineInstr *MI, bool Exclusive = false) {
- SetVector<MachineInstr *> &Predicates = PredicatedInsts[MI]->Predicates;
- if (Exclusive && Predicates.size() != 1)
- return false;
- for (auto *PredMI : Predicates)
- if (isVCTP(PredMI))
- return true;
- return false;
- }
-
- // Is the VPST, controlling the block entry, predicated upon a VCTP.
- static bool isEntryPredicatedOnVCTP(VPTState &Block,
- bool Exclusive = false) {
- SmallVectorImpl<MachineInstr *> &Insts = Block.getInsts();
- return isPredicatedOnVCTP(Insts.front(), Exclusive);
- }
-
- // If this block begins with a VPT, we can check whether it's using
- // at least one predicated input(s), as well as possible loop invariant
- // which would result in it being implicitly predicated.
- static bool hasImplicitlyValidVPT(VPTState &Block,
- ReachingDefAnalysis &RDA) {
- SmallVectorImpl<MachineInstr *> &Insts = Block.getInsts();
- MachineInstr *VPT = Insts.front();
- assert(isVPTOpcode(VPT->getOpcode()) &&
- "Expected VPT block to begin with VPT/VPST");
-
- if (VPT->getOpcode() == ARM::MVE_VPST)
- return false;
-
- auto IsOperandPredicated = [&](MachineInstr *MI, unsigned Idx) {
- MachineInstr *Op = RDA.getMIOperand(MI, MI->getOperand(Idx));
- return Op && PredicatedInsts.count(Op) && isPredicatedOnVCTP(Op);
- };
-
- auto IsOperandInvariant = [&](MachineInstr *MI, unsigned Idx) {
- MachineOperand &MO = MI->getOperand(Idx);
- if (!MO.isReg() || !MO.getReg())
- return true;
-
- SmallPtrSet<MachineInstr *, 2> Defs;
- RDA.getGlobalReachingDefs(MI, MO.getReg(), Defs);
- if (Defs.empty())
- return true;
-
- for (auto *Def : Defs)
- if (Def->getParent() == VPT->getParent())
- return false;
- return true;
- };
-
- // Check that at least one of the operands is directly predicated on a
- // vctp and allow an invariant value too.
- return (IsOperandPredicated(VPT, 1) || IsOperandPredicated(VPT, 2)) &&
- (IsOperandPredicated(VPT, 1) || IsOperandInvariant(VPT, 1)) &&
- (IsOperandPredicated(VPT, 2) || IsOperandInvariant(VPT, 2));
- }
-
- static bool isValid(ReachingDefAnalysis &RDA) {
- // All predication within the loop should be based on vctp. If the block
- // isn't predicated on entry, check whether the vctp is within the block
- // and that all other instructions are then predicated on it.
- for (auto &Block : Blocks) {
- if (isEntryPredicatedOnVCTP(Block, false) ||
- hasImplicitlyValidVPT(Block, RDA))
- continue;
-
- SmallVectorImpl<MachineInstr *> &Insts = Block.getInsts();
- // We don't know how to convert a block with just a VPT;VCTP into
- // anything valid once we remove the VCTP. For now just bail out.
- assert(isVPTOpcode(Insts.front()->getOpcode()) &&
- "Expected VPT block to start with a VPST or VPT!");
- if (Insts.size() == 2 && Insts.front()->getOpcode() != ARM::MVE_VPST &&
- isVCTP(Insts.back()))
- return false;
-
- for (auto *MI : Insts) {
- // Check that any internal VCTPs are 'Then' predicated.
- if (isVCTP(MI) && getVPTInstrPredicate(*MI) != ARMVCC::Then)
- return false;
- // Skip other instructions that build up the predicate.
- if (MI->getOpcode() == ARM::MVE_VPST || isVectorPredicate(MI))
- continue;
- // Check that any other instructions are predicated upon a vctp.
- // TODO: We could infer when VPTs are implicitly predicated on the
- // vctp (when the operands are predicated).
- if (!isPredicatedOnVCTP(MI)) {
- LLVM_DEBUG(dbgs() << "ARM Loops: Can't convert: " << *MI);
- return false;
- }
- }
- }
- return true;
+ // If it exists, return the first internal instruction which modifies the
+ // VPR.
+ static MachineInstr *getDivergent(VPTState &Block) {
+ SmallVectorImpl<MachineInstr *> &Insts = Block.getInsts();
+ for (unsigned i = 1; i < Insts.size(); ++i) {
+ MachineInstr *Next = Insts[i];
+ if (isVectorPredicate(Next))
+ return Next; // Found an instruction altering the vpr.
+ }
+ return nullptr;
}
- VPTState(MachineInstr *MI) { Insts.push_back(MI); }
-
- void insert(MachineInstr *MI) {
- Insts.push_back(MI);
- // VPT/VPST + 4 predicated instructions.
- assert(Insts.size() <= 5 && "Too many instructions in VPT block!");
+ // Return whether the given instruction is predicated upon a VCTP.
+ static bool isPredicatedOnVCTP(MachineInstr *MI, bool Exclusive = false) {
+ SetVector<MachineInstr *> &Predicates = PredicatedInsts[MI]->Predicates;
+ if (Exclusive && Predicates.size() != 1)
+ return false;
+ for (auto *PredMI : Predicates)
+ if (isVCTP(PredMI))
+ return true;
+ return false;
+ }
+
+ // Is the VPST, controlling the block entry, predicated upon a VCTP.
+ static bool isEntryPredicatedOnVCTP(VPTState &Block,
+ bool Exclusive = false) {
+ SmallVectorImpl<MachineInstr *> &Insts = Block.getInsts();
+ return isPredicatedOnVCTP(Insts.front(), Exclusive);
}
- bool containsVCTP() const {
- for (auto *MI : Insts)
- if (isVCTP(MI))
- return true;
- return false;
+ // If this block begins with a VPT, we can check whether it's using
+ // at least one predicated input(s), as well as possible loop invariant
+ // which would result in it being implicitly predicated.
+ static bool hasImplicitlyValidVPT(VPTState &Block,
+ ReachingDefAnalysis &RDA) {
+ SmallVectorImpl<MachineInstr *> &Insts = Block.getInsts();
+ MachineInstr *VPT = Insts.front();
+ assert(isVPTOpcode(VPT->getOpcode()) &&
+ "Expected VPT block to begin with VPT/VPST");
+
+ if (VPT->getOpcode() == ARM::MVE_VPST)
+ return false;
+
+ auto IsOperandPredicated = [&](MachineInstr *MI, unsigned Idx) {
+ MachineInstr *Op = RDA.getMIOperand(MI, MI->getOperand(Idx));
+ return Op && PredicatedInsts.count(Op) && isPredicatedOnVCTP(Op);
+ };
+
+ auto IsOperandInvariant = [&](MachineInstr *MI, unsigned Idx) {
+ MachineOperand &MO = MI->getOperand(Idx);
+ if (!MO.isReg() || !MO.getReg())
+ return true;
+
+ SmallPtrSet<MachineInstr *, 2> Defs;
+ RDA.getGlobalReachingDefs(MI, MO.getReg(), Defs);
+ if (Defs.empty())
+ return true;
+
+ for (auto *Def : Defs)
+ if (Def->getParent() == VPT->getParent())
+ return false;
+ return true;
+ };
+
+ // Check that at least one of the operands is directly predicated on a
+ // vctp and allow an invariant value too.
+ return (IsOperandPredicated(VPT, 1) || IsOperandPredicated(VPT, 2)) &&
+ (IsOperandPredicated(VPT, 1) || IsOperandInvariant(VPT, 1)) &&
+ (IsOperandPredicated(VPT, 2) || IsOperandInvariant(VPT, 2));
}
- unsigned size() const { return Insts.size(); }
- SmallVectorImpl<MachineInstr *> &getInsts() { return Insts; }
+ static bool isValid(ReachingDefAnalysis &RDA) {
+ // All predication within the loop should be based on vctp. If the block
+ // isn't predicated on entry, check whether the vctp is within the block
+ // and that all other instructions are then predicated on it.
+ for (auto &Block : Blocks) {
+ if (isEntryPredicatedOnVCTP(Block, false) ||
+ hasImplicitlyValidVPT(Block, RDA))
+ continue;
+
+ SmallVectorImpl<MachineInstr *> &Insts = Block.getInsts();
+ // We don't know how to convert a block with just a VPT;VCTP into
+ // anything valid once we remove the VCTP. For now just bail out.
+ assert(isVPTOpcode(Insts.front()->getOpcode()) &&
+ "Expected VPT block to start with a VPST or VPT!");
+ if (Insts.size() == 2 && Insts.front()->getOpcode() != ARM::MVE_VPST &&
+ isVCTP(Insts.back()))
+ return false;
+
+ for (auto *MI : Insts) {
+ // Check that any internal VCTPs are 'Then' predicated.
+ if (isVCTP(MI) && getVPTInstrPredicate(*MI) != ARMVCC::Then)
+ return false;
+ // Skip other instructions that build up the predicate.
+ if (MI->getOpcode() == ARM::MVE_VPST || isVectorPredicate(MI))
+ continue;
+ // Check that any other instructions are predicated upon a vctp.
+ // TODO: We could infer when VPTs are implicitly predicated on the
+ // vctp (when the operands are predicated).
+ if (!isPredicatedOnVCTP(MI)) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Can't convert: " << *MI);
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+
+ VPTState(MachineInstr *MI) { Insts.push_back(MI); }
+
+ void insert(MachineInstr *MI) {
+ Insts.push_back(MI);
+ // VPT/VPST + 4 predicated instructions.
+ assert(Insts.size() <= 5 && "Too many instructions in VPT block!");
+ }
+
+ bool containsVCTP() const {
+ for (auto *MI : Insts)
+ if (isVCTP(MI))
+ return true;
+ return false;
+ }
+
+ unsigned size() const { return Insts.size(); }
+ SmallVectorImpl<MachineInstr *> &getInsts() { return Insts; }
};
struct LowOverheadLoop {
@@ -366,13 +366,13 @@ namespace {
const TargetRegisterInfo &TRI;
const ARMBaseInstrInfo &TII;
MachineFunction *MF = nullptr;
- MachineBasicBlock::iterator StartInsertPt;
- MachineBasicBlock *StartInsertBB = nullptr;
+ MachineBasicBlock::iterator StartInsertPt;
+ MachineBasicBlock *StartInsertBB = nullptr;
MachineInstr *Start = nullptr;
MachineInstr *Dec = nullptr;
MachineInstr *End = nullptr;
- MachineOperand TPNumElements;
- SmallVector<MachineInstr*, 4> VCTPs;
+ MachineOperand TPNumElements;
+ SmallVector<MachineInstr*, 4> VCTPs;
SmallPtrSet<MachineInstr*, 4> ToRemove;
SmallPtrSet<MachineInstr*, 4> BlockMasksToRecompute;
bool Revert = false;
@@ -381,14 +381,14 @@ namespace {
LowOverheadLoop(MachineLoop &ML, MachineLoopInfo &MLI,
ReachingDefAnalysis &RDA, const TargetRegisterInfo &TRI,
const ARMBaseInstrInfo &TII)
- : ML(ML), MLI(MLI), RDA(RDA), TRI(TRI), TII(TII),
- TPNumElements(MachineOperand::CreateImm(0)) {
+ : ML(ML), MLI(MLI), RDA(RDA), TRI(TRI), TII(TII),
+ TPNumElements(MachineOperand::CreateImm(0)) {
MF = ML.getHeader()->getParent();
if (auto *MBB = ML.getLoopPreheader())
Preheader = MBB;
else if (auto *MBB = MLI.findLoopPreheader(&ML, true))
Preheader = MBB;
- VPTState::reset();
+ VPTState::reset();
}
// If this is an MVE instruction, check that we know how to use tail
@@ -403,18 +403,18 @@ namespace {
bool IsTailPredicationLegal() const {
// For now, let's keep things really simple and only support a single
// block for tail predication.
- return !Revert && FoundAllComponents() && !VCTPs.empty() &&
+ return !Revert && FoundAllComponents() && !VCTPs.empty() &&
!CannotTailPredicate && ML.getNumBlocks() == 1;
}
- // Given that MI is a VCTP, check that is equivalent to any other VCTPs
- // found.
- bool AddVCTP(MachineInstr *MI);
-
+ // Given that MI is a VCTP, check that is equivalent to any other VCTPs
+ // found.
+ bool AddVCTP(MachineInstr *MI);
+
// Check that the predication in the loop will be equivalent once we
// perform the conversion. Also ensure that we can provide the number
// of elements to the loop start instruction.
- bool ValidateTailPredicate();
+ bool ValidateTailPredicate();
// Check that any values available outside of the loop will be the same
// after tail predication conversion.
@@ -427,41 +427,41 @@ namespace {
// Check the branch targets are within range and we satisfy our
// restrictions.
- void Validate(ARMBasicBlockUtils *BBUtils);
+ void Validate(ARMBasicBlockUtils *BBUtils);
bool FoundAllComponents() const {
return Start && Dec && End;
}
- SmallVectorImpl<VPTState> &getVPTBlocks() {
- return VPTState::Blocks;
- }
+ SmallVectorImpl<VPTState> &getVPTBlocks() {
+ return VPTState::Blocks;
+ }
- // Return the operand for the loop start instruction. This will be the loop
- // iteration count, or the number of elements if we're tail predicating.
- MachineOperand &getLoopStartOperand() {
- if (IsTailPredicationLegal())
- return TPNumElements;
- return isDo(Start) ? Start->getOperand(1) : Start->getOperand(0);
+ // Return the operand for the loop start instruction. This will be the loop
+ // iteration count, or the number of elements if we're tail predicating.
+ MachineOperand &getLoopStartOperand() {
+ if (IsTailPredicationLegal())
+ return TPNumElements;
+ return isDo(Start) ? Start->getOperand(1) : Start->getOperand(0);
}
unsigned getStartOpcode() const {
- bool IsDo = isDo(Start);
+ bool IsDo = isDo(Start);
if (!IsTailPredicationLegal())
return IsDo ? ARM::t2DLS : ARM::t2WLS;
- return VCTPOpcodeToLSTP(VCTPs.back()->getOpcode(), IsDo);
+ return VCTPOpcodeToLSTP(VCTPs.back()->getOpcode(), IsDo);
}
void dump() const {
if (Start) dbgs() << "ARM Loops: Found Loop Start: " << *Start;
if (Dec) dbgs() << "ARM Loops: Found Loop Dec: " << *Dec;
if (End) dbgs() << "ARM Loops: Found Loop End: " << *End;
- if (!VCTPs.empty()) {
- dbgs() << "ARM Loops: Found VCTP(s):\n";
- for (auto *MI : VCTPs)
- dbgs() << " - " << *MI;
- }
+ if (!VCTPs.empty()) {
+ dbgs() << "ARM Loops: Found VCTP(s):\n";
+ for (auto *MI : VCTPs)
+ dbgs() << " - " << *MI;
+ }
if (!FoundAllComponents())
dbgs() << "ARM Loops: Not a low-overhead loop.\n";
else if (!(Start && Dec && End))
@@ -508,14 +508,14 @@ namespace {
bool RevertNonLoops();
void RevertWhile(MachineInstr *MI) const;
- void RevertDo(MachineInstr *MI) const;
+ void RevertDo(MachineInstr *MI) const;
bool RevertLoopDec(MachineInstr *MI) const;
void RevertLoopEnd(MachineInstr *MI, bool SkipCmp = false) const;
- void RevertLoopEndDec(MachineInstr *MI) const;
-
+ void RevertLoopEndDec(MachineInstr *MI) const;
+
void ConvertVPTBlocks(LowOverheadLoop &LoLoop);
MachineInstr *ExpandLoopStart(LowOverheadLoop &LoLoop);
@@ -528,230 +528,230 @@ namespace {
char ARMLowOverheadLoops::ID = 0;
-SmallVector<VPTState, 4> VPTState::Blocks;
-SetVector<MachineInstr *> VPTState::CurrentPredicates;
-std::map<MachineInstr *,
- std::unique_ptr<PredicatedMI>> VPTState::PredicatedInsts;
-
+SmallVector<VPTState, 4> VPTState::Blocks;
+SetVector<MachineInstr *> VPTState::CurrentPredicates;
+std::map<MachineInstr *,
+ std::unique_ptr<PredicatedMI>> VPTState::PredicatedInsts;
+
INITIALIZE_PASS(ARMLowOverheadLoops, DEBUG_TYPE, ARM_LOW_OVERHEAD_LOOPS_NAME,
false, false)
-static bool TryRemove(MachineInstr *MI, ReachingDefAnalysis &RDA,
- InstSet &ToRemove, InstSet &Ignore) {
-
- // Check that we can remove all of Killed without having to modify any IT
- // blocks.
- auto WontCorruptITs = [](InstSet &Killed, ReachingDefAnalysis &RDA) {
- // Collect the dead code and the MBBs in which they reside.
- SmallPtrSet<MachineBasicBlock*, 2> BasicBlocks;
- for (auto *Dead : Killed)
- BasicBlocks.insert(Dead->getParent());
-
- // Collect IT blocks in all affected basic blocks.
- std::map<MachineInstr *, SmallPtrSet<MachineInstr *, 2>> ITBlocks;
- for (auto *MBB : BasicBlocks) {
- for (auto &IT : *MBB) {
- if (IT.getOpcode() != ARM::t2IT)
- continue;
- RDA.getReachingLocalUses(&IT, MCRegister::from(ARM::ITSTATE),
- ITBlocks[&IT]);
- }
- }
-
- // If we're removing all of the instructions within an IT block, then
- // also remove the IT instruction.
- SmallPtrSet<MachineInstr *, 2> ModifiedITs;
- SmallPtrSet<MachineInstr *, 2> RemoveITs;
- for (auto *Dead : Killed) {
- if (MachineOperand *MO = Dead->findRegisterUseOperand(ARM::ITSTATE)) {
- MachineInstr *IT = RDA.getMIOperand(Dead, *MO);
- RemoveITs.insert(IT);
- auto &CurrentBlock = ITBlocks[IT];
- CurrentBlock.erase(Dead);
- if (CurrentBlock.empty())
- ModifiedITs.erase(IT);
- else
- ModifiedITs.insert(IT);
- }
+static bool TryRemove(MachineInstr *MI, ReachingDefAnalysis &RDA,
+ InstSet &ToRemove, InstSet &Ignore) {
+
+ // Check that we can remove all of Killed without having to modify any IT
+ // blocks.
+ auto WontCorruptITs = [](InstSet &Killed, ReachingDefAnalysis &RDA) {
+ // Collect the dead code and the MBBs in which they reside.
+ SmallPtrSet<MachineBasicBlock*, 2> BasicBlocks;
+ for (auto *Dead : Killed)
+ BasicBlocks.insert(Dead->getParent());
+
+ // Collect IT blocks in all affected basic blocks.
+ std::map<MachineInstr *, SmallPtrSet<MachineInstr *, 2>> ITBlocks;
+ for (auto *MBB : BasicBlocks) {
+ for (auto &IT : *MBB) {
+ if (IT.getOpcode() != ARM::t2IT)
+ continue;
+ RDA.getReachingLocalUses(&IT, MCRegister::from(ARM::ITSTATE),
+ ITBlocks[&IT]);
+ }
+ }
+
+ // If we're removing all of the instructions within an IT block, then
+ // also remove the IT instruction.
+ SmallPtrSet<MachineInstr *, 2> ModifiedITs;
+ SmallPtrSet<MachineInstr *, 2> RemoveITs;
+ for (auto *Dead : Killed) {
+ if (MachineOperand *MO = Dead->findRegisterUseOperand(ARM::ITSTATE)) {
+ MachineInstr *IT = RDA.getMIOperand(Dead, *MO);
+ RemoveITs.insert(IT);
+ auto &CurrentBlock = ITBlocks[IT];
+ CurrentBlock.erase(Dead);
+ if (CurrentBlock.empty())
+ ModifiedITs.erase(IT);
+ else
+ ModifiedITs.insert(IT);
+ }
+ }
+ if (!ModifiedITs.empty())
+ return false;
+ Killed.insert(RemoveITs.begin(), RemoveITs.end());
+ return true;
+ };
+
+ SmallPtrSet<MachineInstr *, 2> Uses;
+ if (!RDA.isSafeToRemove(MI, Uses, Ignore))
+ return false;
+
+ if (WontCorruptITs(Uses, RDA)) {
+ ToRemove.insert(Uses.begin(), Uses.end());
+ LLVM_DEBUG(dbgs() << "ARM Loops: Able to remove: " << *MI
+ << " - can also remove:\n";
+ for (auto *Use : Uses)
+ dbgs() << " - " << *Use);
+
+ SmallPtrSet<MachineInstr*, 4> Killed;
+ RDA.collectKilledOperands(MI, Killed);
+ if (WontCorruptITs(Killed, RDA)) {
+ ToRemove.insert(Killed.begin(), Killed.end());
+ LLVM_DEBUG(for (auto *Dead : Killed)
+ dbgs() << " - " << *Dead);
}
- if (!ModifiedITs.empty())
- return false;
- Killed.insert(RemoveITs.begin(), RemoveITs.end());
- return true;
- };
-
- SmallPtrSet<MachineInstr *, 2> Uses;
- if (!RDA.isSafeToRemove(MI, Uses, Ignore))
- return false;
-
- if (WontCorruptITs(Uses, RDA)) {
- ToRemove.insert(Uses.begin(), Uses.end());
- LLVM_DEBUG(dbgs() << "ARM Loops: Able to remove: " << *MI
- << " - can also remove:\n";
- for (auto *Use : Uses)
- dbgs() << " - " << *Use);
-
- SmallPtrSet<MachineInstr*, 4> Killed;
- RDA.collectKilledOperands(MI, Killed);
- if (WontCorruptITs(Killed, RDA)) {
- ToRemove.insert(Killed.begin(), Killed.end());
- LLVM_DEBUG(for (auto *Dead : Killed)
- dbgs() << " - " << *Dead);
- }
- return true;
- }
- return false;
-}
-
-bool LowOverheadLoop::ValidateTailPredicate() {
- if (!IsTailPredicationLegal()) {
- LLVM_DEBUG(if (VCTPs.empty())
- dbgs() << "ARM Loops: Didn't find a VCTP instruction.\n";
- dbgs() << "ARM Loops: Tail-predication is not valid.\n");
- return false;
+ return true;
}
-
- assert(!VCTPs.empty() && "VCTP instruction expected but is not set");
- assert(ML.getBlocks().size() == 1 &&
- "Shouldn't be processing a loop with more than one block");
-
- if (DisableTailPredication) {
- LLVM_DEBUG(dbgs() << "ARM Loops: tail-predication is disabled\n");
+ return false;
+}
+
+bool LowOverheadLoop::ValidateTailPredicate() {
+ if (!IsTailPredicationLegal()) {
+ LLVM_DEBUG(if (VCTPs.empty())
+ dbgs() << "ARM Loops: Didn't find a VCTP instruction.\n";
+ dbgs() << "ARM Loops: Tail-predication is not valid.\n");
return false;
- }
+ }
- if (!VPTState::isValid(RDA)) {
- LLVM_DEBUG(dbgs() << "ARM Loops: Invalid VPT state.\n");
- return false;
- }
+ assert(!VCTPs.empty() && "VCTP instruction expected but is not set");
+ assert(ML.getBlocks().size() == 1 &&
+ "Shouldn't be processing a loop with more than one block");
- if (!ValidateLiveOuts()) {
- LLVM_DEBUG(dbgs() << "ARM Loops: Invalid live outs.\n");
+ if (DisableTailPredication) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: tail-predication is disabled\n");
return false;
}
- // Check that creating a [W|D]LSTP, which will define LR with an element
- // count instead of iteration count, won't affect any other instructions
- // than the LoopStart and LoopDec.
- // TODO: We should try to insert the [W|D]LSTP after any of the other uses.
- Register StartReg = isDo(Start) ? Start->getOperand(1).getReg()
- : Start->getOperand(0).getReg();
- if (StartInsertPt == Start && StartReg == ARM::LR) {
- if (auto *IterCount = RDA.getMIOperand(Start, isDo(Start) ? 1 : 0)) {
- SmallPtrSet<MachineInstr *, 2> Uses;
- RDA.getGlobalUses(IterCount, MCRegister::from(ARM::LR), Uses);
- for (auto *Use : Uses) {
- if (Use != Start && Use != Dec) {
- LLVM_DEBUG(dbgs() << " ARM Loops: Found LR use: " << *Use);
- return false;
- }
+ if (!VPTState::isValid(RDA)) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Invalid VPT state.\n");
+ return false;
+ }
+
+ if (!ValidateLiveOuts()) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Invalid live outs.\n");
+ return false;
+ }
+
+ // Check that creating a [W|D]LSTP, which will define LR with an element
+ // count instead of iteration count, won't affect any other instructions
+ // than the LoopStart and LoopDec.
+ // TODO: We should try to insert the [W|D]LSTP after any of the other uses.
+ Register StartReg = isDo(Start) ? Start->getOperand(1).getReg()
+ : Start->getOperand(0).getReg();
+ if (StartInsertPt == Start && StartReg == ARM::LR) {
+ if (auto *IterCount = RDA.getMIOperand(Start, isDo(Start) ? 1 : 0)) {
+ SmallPtrSet<MachineInstr *, 2> Uses;
+ RDA.getGlobalUses(IterCount, MCRegister::from(ARM::LR), Uses);
+ for (auto *Use : Uses) {
+ if (Use != Start && Use != Dec) {
+ LLVM_DEBUG(dbgs() << " ARM Loops: Found LR use: " << *Use);
+ return false;
+ }
}
}
}
- // For tail predication, we need to provide the number of elements, instead
- // of the iteration count, to the loop start instruction. The number of
- // elements is provided to the vctp instruction, so we need to check that
- // we can use this register at InsertPt.
- MachineInstr *VCTP = VCTPs.back();
- if (Start->getOpcode() == ARM::t2DoLoopStartTP) {
- TPNumElements = Start->getOperand(2);
- StartInsertPt = Start;
- StartInsertBB = Start->getParent();
- } else {
- TPNumElements = VCTP->getOperand(1);
- MCRegister NumElements = TPNumElements.getReg().asMCReg();
-
- // If the register is defined within loop, then we can't perform TP.
- // TODO: Check whether this is just a mov of a register that would be
- // available.
- if (RDA.hasLocalDefBefore(VCTP, NumElements)) {
- LLVM_DEBUG(dbgs() << "ARM Loops: VCTP operand is defined in the loop.\n");
+ // For tail predication, we need to provide the number of elements, instead
+ // of the iteration count, to the loop start instruction. The number of
+ // elements is provided to the vctp instruction, so we need to check that
+ // we can use this register at InsertPt.
+ MachineInstr *VCTP = VCTPs.back();
+ if (Start->getOpcode() == ARM::t2DoLoopStartTP) {
+ TPNumElements = Start->getOperand(2);
+ StartInsertPt = Start;
+ StartInsertBB = Start->getParent();
+ } else {
+ TPNumElements = VCTP->getOperand(1);
+ MCRegister NumElements = TPNumElements.getReg().asMCReg();
+
+ // If the register is defined within loop, then we can't perform TP.
+ // TODO: Check whether this is just a mov of a register that would be
+ // available.
+ if (RDA.hasLocalDefBefore(VCTP, NumElements)) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: VCTP operand is defined in the loop.\n");
+ return false;
+ }
+
+ // The element count register maybe defined after InsertPt, in which case we
+ // need to try to move either InsertPt or the def so that the [w|d]lstp can
+ // use the value.
+
+ if (StartInsertPt != StartInsertBB->end() &&
+ !RDA.isReachingDefLiveOut(&*StartInsertPt, NumElements)) {
+ if (auto *ElemDef =
+ RDA.getLocalLiveOutMIDef(StartInsertBB, NumElements)) {
+ if (RDA.isSafeToMoveForwards(ElemDef, &*StartInsertPt)) {
+ ElemDef->removeFromParent();
+ StartInsertBB->insert(StartInsertPt, ElemDef);
+ LLVM_DEBUG(dbgs()
+ << "ARM Loops: Moved element count def: " << *ElemDef);
+ } else if (RDA.isSafeToMoveBackwards(&*StartInsertPt, ElemDef)) {
+ StartInsertPt->removeFromParent();
+ StartInsertBB->insertAfter(MachineBasicBlock::iterator(ElemDef),
+ &*StartInsertPt);
+ LLVM_DEBUG(dbgs() << "ARM Loops: Moved start past: " << *ElemDef);
+ } else {
+ // If we fail to move an instruction and the element count is provided
+ // by a mov, use the mov operand if it will have the same value at the
+ // insertion point
+ MachineOperand Operand = ElemDef->getOperand(1);
+ if (isMovRegOpcode(ElemDef->getOpcode()) &&
+ RDA.getUniqueReachingMIDef(ElemDef, Operand.getReg().asMCReg()) ==
+ RDA.getUniqueReachingMIDef(&*StartInsertPt,
+ Operand.getReg().asMCReg())) {
+ TPNumElements = Operand;
+ NumElements = TPNumElements.getReg();
+ } else {
+ LLVM_DEBUG(dbgs()
+ << "ARM Loops: Unable to move element count to loop "
+ << "start instruction.\n");
+ return false;
+ }
+ }
+ }
+ }
+
+ // Especially in the case of while loops, InsertBB may not be the
+ // preheader, so we need to check that the register isn't redefined
+ // before entering the loop.
+ auto CannotProvideElements = [this](MachineBasicBlock *MBB,
+ MCRegister NumElements) {
+ if (MBB->empty())
+ return false;
+ // NumElements is redefined in this block.
+ if (RDA.hasLocalDefBefore(&MBB->back(), NumElements))
+ return true;
+
+ // Don't continue searching up through multiple predecessors.
+ if (MBB->pred_size() > 1)
+ return true;
+
return false;
+ };
+
+ // Search backwards for a def, until we get to InsertBB.
+ MachineBasicBlock *MBB = Preheader;
+ while (MBB && MBB != StartInsertBB) {
+ if (CannotProvideElements(MBB, NumElements)) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Unable to provide element count.\n");
+ return false;
+ }
+ MBB = *MBB->pred_begin();
}
-
- // The element count register maybe defined after InsertPt, in which case we
- // need to try to move either InsertPt or the def so that the [w|d]lstp can
- // use the value.
-
- if (StartInsertPt != StartInsertBB->end() &&
- !RDA.isReachingDefLiveOut(&*StartInsertPt, NumElements)) {
- if (auto *ElemDef =
- RDA.getLocalLiveOutMIDef(StartInsertBB, NumElements)) {
- if (RDA.isSafeToMoveForwards(ElemDef, &*StartInsertPt)) {
- ElemDef->removeFromParent();
- StartInsertBB->insert(StartInsertPt, ElemDef);
- LLVM_DEBUG(dbgs()
- << "ARM Loops: Moved element count def: " << *ElemDef);
- } else if (RDA.isSafeToMoveBackwards(&*StartInsertPt, ElemDef)) {
- StartInsertPt->removeFromParent();
- StartInsertBB->insertAfter(MachineBasicBlock::iterator(ElemDef),
- &*StartInsertPt);
- LLVM_DEBUG(dbgs() << "ARM Loops: Moved start past: " << *ElemDef);
- } else {
- // If we fail to move an instruction and the element count is provided
- // by a mov, use the mov operand if it will have the same value at the
- // insertion point
- MachineOperand Operand = ElemDef->getOperand(1);
- if (isMovRegOpcode(ElemDef->getOpcode()) &&
- RDA.getUniqueReachingMIDef(ElemDef, Operand.getReg().asMCReg()) ==
- RDA.getUniqueReachingMIDef(&*StartInsertPt,
- Operand.getReg().asMCReg())) {
- TPNumElements = Operand;
- NumElements = TPNumElements.getReg();
- } else {
- LLVM_DEBUG(dbgs()
- << "ARM Loops: Unable to move element count to loop "
- << "start instruction.\n");
- return false;
- }
- }
- }
- }
-
- // Especially in the case of while loops, InsertBB may not be the
- // preheader, so we need to check that the register isn't redefined
- // before entering the loop.
- auto CannotProvideElements = [this](MachineBasicBlock *MBB,
- MCRegister NumElements) {
- if (MBB->empty())
- return false;
- // NumElements is redefined in this block.
- if (RDA.hasLocalDefBefore(&MBB->back(), NumElements))
- return true;
-
- // Don't continue searching up through multiple predecessors.
- if (MBB->pred_size() > 1)
- return true;
-
- return false;
- };
-
- // Search backwards for a def, until we get to InsertBB.
- MachineBasicBlock *MBB = Preheader;
- while (MBB && MBB != StartInsertBB) {
- if (CannotProvideElements(MBB, NumElements)) {
- LLVM_DEBUG(dbgs() << "ARM Loops: Unable to provide element count.\n");
- return false;
- }
- MBB = *MBB->pred_begin();
- }
- }
-
- // Could inserting the [W|D]LSTP cause some unintended affects? In a perfect
- // world the [w|d]lstp instruction would be last instruction in the preheader
- // and so it would only affect instructions within the loop body. But due to
- // scheduling, and/or the logic in this pass (above), the insertion point can
- // be moved earlier. So if the Loop Start isn't the last instruction in the
- // preheader, and if the initial element count is smaller than the vector
- // width, the Loop Start instruction will immediately generate one or more
- // false lane mask which can, incorrectly, affect the proceeding MVE
- // instructions in the preheader.
- if (std::any_of(StartInsertPt, StartInsertBB->end(), shouldInspect)) {
- LLVM_DEBUG(dbgs() << "ARM Loops: Instruction blocks [W|D]LSTP\n");
- return false;
}
+ // Could inserting the [W|D]LSTP cause some unintended affects? In a perfect
+ // world the [w|d]lstp instruction would be last instruction in the preheader
+ // and so it would only affect instructions within the loop body. But due to
+ // scheduling, and/or the logic in this pass (above), the insertion point can
+ // be moved earlier. So if the Loop Start isn't the last instruction in the
+ // preheader, and if the initial element count is smaller than the vector
+ // width, the Loop Start instruction will immediately generate one or more
+ // false lane mask which can, incorrectly, affect the proceeding MVE
+ // instructions in the preheader.
+ if (std::any_of(StartInsertPt, StartInsertBB->end(), shouldInspect)) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Instruction blocks [W|D]LSTP\n");
+ return false;
+ }
+
// Check that the value change of the element count is what we expect and
// that the predication will be equivalent. For this we need:
// NumElements = NumElements - VectorWidth. The sub will be a sub immediate
@@ -760,20 +760,20 @@ bool LowOverheadLoop::ValidateTailPredicate() {
return -getAddSubImmediate(*MI) == ExpectedVecWidth;
};
- MachineBasicBlock *MBB = VCTP->getParent();
- // Remove modifications to the element count since they have no purpose in a
- // tail predicated loop. Explicitly refer to the vctp operand no matter which
- // register NumElements has been assigned to, since that is what the
- // modifications will be using
- if (auto *Def = RDA.getUniqueReachingMIDef(
- &MBB->back(), VCTP->getOperand(1).getReg().asMCReg())) {
+ MachineBasicBlock *MBB = VCTP->getParent();
+ // Remove modifications to the element count since they have no purpose in a
+ // tail predicated loop. Explicitly refer to the vctp operand no matter which
+ // register NumElements has been assigned to, since that is what the
+ // modifications will be using
+ if (auto *Def = RDA.getUniqueReachingMIDef(
+ &MBB->back(), VCTP->getOperand(1).getReg().asMCReg())) {
SmallPtrSet<MachineInstr*, 2> ElementChain;
- SmallPtrSet<MachineInstr*, 2> Ignore;
+ SmallPtrSet<MachineInstr*, 2> Ignore;
unsigned ExpectedVectorWidth = getTailPredVectorWidth(VCTP->getOpcode());
- Ignore.insert(VCTPs.begin(), VCTPs.end());
+ Ignore.insert(VCTPs.begin(), VCTPs.end());
- if (TryRemove(Def, RDA, ElementChain, Ignore)) {
+ if (TryRemove(Def, RDA, ElementChain, Ignore)) {
bool FoundSub = false;
for (auto *MI : ElementChain) {
@@ -781,17 +781,17 @@ bool LowOverheadLoop::ValidateTailPredicate() {
continue;
if (isSubImmOpcode(MI->getOpcode())) {
- if (FoundSub || !IsValidSub(MI, ExpectedVectorWidth)) {
- LLVM_DEBUG(dbgs() << "ARM Loops: Unexpected instruction in element"
- " count: " << *MI);
+ if (FoundSub || !IsValidSub(MI, ExpectedVectorWidth)) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Unexpected instruction in element"
+ " count: " << *MI);
return false;
- }
+ }
FoundSub = true;
- } else {
- LLVM_DEBUG(dbgs() << "ARM Loops: Unexpected instruction in element"
- " count: " << *MI);
+ } else {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Unexpected instruction in element"
+ " count: " << *MI);
return false;
- }
+ }
}
ToRemove.insert(ElementChain.begin(), ElementChain.end());
}
@@ -868,18 +868,18 @@ static bool producesFalseLanesZero(MachineInstr &MI,
if (canGenerateNonZeros(MI))
return false;
- bool isPredicated = isVectorPredicated(&MI);
- // Predicated loads will write zeros to the falsely predicated bytes of the
- // destination register.
- if (MI.mayLoad())
- return isPredicated;
-
- auto IsZeroInit = [](MachineInstr *Def) {
- return !isVectorPredicated(Def) &&
- Def->getOpcode() == ARM::MVE_VMOVimmi32 &&
- Def->getOperand(1).getImm() == 0;
- };
-
+ bool isPredicated = isVectorPredicated(&MI);
+ // Predicated loads will write zeros to the falsely predicated bytes of the
+ // destination register.
+ if (MI.mayLoad())
+ return isPredicated;
+
+ auto IsZeroInit = [](MachineInstr *Def) {
+ return !isVectorPredicated(Def) &&
+ Def->getOpcode() == ARM::MVE_VMOVimmi32 &&
+ Def->getOperand(1).getImm() == 0;
+ };
+
bool AllowScalars = isHorizontalReduction(MI);
for (auto &MO : MI.operands()) {
if (!MO.isReg() || !MO.getReg())
@@ -887,21 +887,21 @@ static bool producesFalseLanesZero(MachineInstr &MI,
if (!isRegInClass(MO, QPRs) && AllowScalars)
continue;
- // Check that this instruction will produce zeros in its false lanes:
- // - If it only consumes false lanes zero or constant 0 (vmov #0)
- // - If it's predicated, it only matters that it's def register already has
- // false lane zeros, so we can ignore the uses.
- SmallPtrSet<MachineInstr *, 2> Defs;
- RDA.getGlobalReachingDefs(&MI, MO.getReg(), Defs);
- for (auto *Def : Defs) {
- if (Def == &MI || FalseLanesZero.count(Def) || IsZeroInit(Def))
- continue;
- if (MO.isUse() && isPredicated)
- continue;
+ // Check that this instruction will produce zeros in its false lanes:
+ // - If it only consumes false lanes zero or constant 0 (vmov #0)
+ // - If it's predicated, it only matters that it's def register already has
+ // false lane zeros, so we can ignore the uses.
+ SmallPtrSet<MachineInstr *, 2> Defs;
+ RDA.getGlobalReachingDefs(&MI, MO.getReg(), Defs);
+ for (auto *Def : Defs) {
+ if (Def == &MI || FalseLanesZero.count(Def) || IsZeroInit(Def))
+ continue;
+ if (MO.isUse() && isPredicated)
+ continue;
return false;
- }
+ }
}
- LLVM_DEBUG(dbgs() << "ARM Loops: Always False Zeros: " << MI);
+ LLVM_DEBUG(dbgs() << "ARM Loops: Always False Zeros: " << MI);
return true;
}
@@ -921,7 +921,7 @@ bool LowOverheadLoop::ValidateLiveOuts() {
// the false lanes are zeroed and here we're trying to track that those false
// lanes remain zero, or where they change, the differences are masked away
// by their user(s).
- // All MVE stores have to be predicated, so we know that any predicate load
+ // All MVE stores have to be predicated, so we know that any predicate load
// operands, or stored results are equivalent already. Other explicitly
// predicated instructions will perform the same operation in the original
// loop and the tail-predicated form too. Because of this, we can insert
@@ -934,32 +934,32 @@ bool LowOverheadLoop::ValidateLiveOuts() {
MachineBasicBlock *Header = ML.getHeader();
for (auto &MI : *Header) {
- if (!shouldInspect(MI))
+ if (!shouldInspect(MI))
continue;
if (isVCTP(&MI) || isVPTOpcode(MI.getOpcode()))
continue;
- bool isPredicated = isVectorPredicated(&MI);
- bool retainsOrReduces =
- retainsPreviousHalfElement(MI) || isHorizontalReduction(MI);
-
- if (isPredicated)
+ bool isPredicated = isVectorPredicated(&MI);
+ bool retainsOrReduces =
+ retainsPreviousHalfElement(MI) || isHorizontalReduction(MI);
+
+ if (isPredicated)
Predicated.insert(&MI);
- if (producesFalseLanesZero(MI, QPRs, RDA, FalseLanesZero))
- FalseLanesZero.insert(&MI);
- else if (MI.getNumDefs() == 0)
+ if (producesFalseLanesZero(MI, QPRs, RDA, FalseLanesZero))
+ FalseLanesZero.insert(&MI);
+ else if (MI.getNumDefs() == 0)
continue;
- else if (!isPredicated && retainsOrReduces)
- return false;
- else if (!isPredicated)
+ else if (!isPredicated && retainsOrReduces)
+ return false;
+ else if (!isPredicated)
FalseLanesUnknown.insert(&MI);
}
auto HasPredicatedUsers = [this](MachineInstr *MI, const MachineOperand &MO,
SmallPtrSetImpl<MachineInstr *> &Predicated) {
SmallPtrSet<MachineInstr *, 2> Uses;
- RDA.getGlobalUses(MI, MO.getReg().asMCReg(), Uses);
+ RDA.getGlobalUses(MI, MO.getReg().asMCReg(), Uses);
for (auto *Use : Uses) {
if (Use != MI && !Predicated.count(Use))
return false;
@@ -982,12 +982,12 @@ bool LowOverheadLoop::ValidateLiveOuts() {
LLVM_DEBUG(dbgs() << "ARM Loops: Found an unknown def of : "
<< TRI.getRegAsmName(MO.getReg()) << " at " << *MI);
NonPredicated.insert(MI);
- break;
+ break;
}
}
// Any unknown false lanes have been masked away by the user(s).
- if (!NonPredicated.contains(MI))
- Predicated.insert(MI);
+ if (!NonPredicated.contains(MI))
+ Predicated.insert(MI);
}
SmallPtrSet<MachineInstr *, 2> LiveOutMIs;
@@ -997,13 +997,13 @@ bool LowOverheadLoop::ValidateLiveOuts() {
assert(ExitBlocks.size() == 1 && "Expected a single exit block");
MachineBasicBlock *ExitBB = ExitBlocks.front();
for (const MachineBasicBlock::RegisterMaskPair &RegMask : ExitBB->liveins()) {
- // TODO: Instead of blocking predication, we could move the vctp to the exit
- // block and calculate it's operand there in or the preheader.
- if (RegMask.PhysReg == ARM::VPR)
- return false;
+ // TODO: Instead of blocking predication, we could move the vctp to the exit
+ // block and calculate it's operand there in or the preheader.
+ if (RegMask.PhysReg == ARM::VPR)
+ return false;
// Check Q-regs that are live in the exit blocks. We don't collect scalars
// because they won't be affected by lane predication.
- if (QPRs->contains(RegMask.PhysReg))
+ if (QPRs->contains(RegMask.PhysReg))
if (auto *MI = RDA.getLocalLiveOutMIDef(Header, RegMask.PhysReg))
LiveOutMIs.insert(MI);
}
@@ -1014,123 +1014,123 @@ bool LowOverheadLoop::ValidateLiveOuts() {
// instruction needs to be predicated, so check this here. The instructions
// in NonPredicated have been found to be a reduction that we can ensure its
// legality.
- for (auto *MI : LiveOutMIs) {
- if (NonPredicated.count(MI) && FalseLanesUnknown.contains(MI)) {
- LLVM_DEBUG(dbgs() << "ARM Loops: Unable to handle live out: " << *MI);
+ for (auto *MI : LiveOutMIs) {
+ if (NonPredicated.count(MI) && FalseLanesUnknown.contains(MI)) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Unable to handle live out: " << *MI);
return false;
- }
- }
+ }
+ }
return true;
}
-void LowOverheadLoop::Validate(ARMBasicBlockUtils *BBUtils) {
+void LowOverheadLoop::Validate(ARMBasicBlockUtils *BBUtils) {
if (Revert)
return;
- // Check branch target ranges: WLS[TP] can only branch forwards and LE[TP]
- // can only jump back.
- auto ValidateRanges = [](MachineInstr *Start, MachineInstr *End,
- ARMBasicBlockUtils *BBUtils, MachineLoop &ML) {
- MachineBasicBlock *TgtBB = End->getOpcode() == ARM::t2LoopEnd
- ? End->getOperand(1).getMBB()
- : End->getOperand(2).getMBB();
- // TODO Maybe there's cases where the target doesn't have to be the header,
- // but for now be safe and revert.
- if (TgtBB != ML.getHeader()) {
- LLVM_DEBUG(dbgs() << "ARM Loops: LoopEnd is not targeting header.\n");
- return false;
- }
-
- // The WLS and LE instructions have 12-bits for the label offset. WLS
- // requires a positive offset, while LE uses negative.
- if (BBUtils->getOffsetOf(End) < BBUtils->getOffsetOf(ML.getHeader()) ||
- !BBUtils->isBBInRange(End, ML.getHeader(), 4094)) {
- LLVM_DEBUG(dbgs() << "ARM Loops: LE offset is out-of-range\n");
- return false;
- }
-
- if (Start->getOpcode() == ARM::t2WhileLoopStart &&
- (BBUtils->getOffsetOf(Start) >
- BBUtils->getOffsetOf(Start->getOperand(1).getMBB()) ||
- !BBUtils->isBBInRange(Start, Start->getOperand(1).getMBB(), 4094))) {
- LLVM_DEBUG(dbgs() << "ARM Loops: WLS offset is out-of-range!\n");
- return false;
- }
- return true;
- };
-
- // Find a suitable position to insert the loop start instruction. It needs to
- // be able to safely define LR.
- auto FindStartInsertionPoint = [](MachineInstr *Start, MachineInstr *Dec,
- MachineBasicBlock::iterator &InsertPt,
- MachineBasicBlock *&InsertBB,
- ReachingDefAnalysis &RDA,
- InstSet &ToRemove) {
- // For a t2DoLoopStart it is always valid to use the start insertion point.
- // For WLS we can define LR if LR already contains the same value.
- if (isDo(Start) || Start->getOperand(0).getReg() == ARM::LR) {
- InsertPt = MachineBasicBlock::iterator(Start);
- InsertBB = Start->getParent();
- return true;
- }
-
- // We've found no suitable LR def and Start doesn't use LR directly. Can we
- // just define LR anyway?
- if (!RDA.isSafeToDefRegAt(Start, MCRegister::from(ARM::LR)))
- return false;
-
- InsertPt = MachineBasicBlock::iterator(Start);
- InsertBB = Start->getParent();
- return true;
- };
-
- if (!FindStartInsertionPoint(Start, Dec, StartInsertPt, StartInsertBB, RDA,
- ToRemove)) {
+ // Check branch target ranges: WLS[TP] can only branch forwards and LE[TP]
+ // can only jump back.
+ auto ValidateRanges = [](MachineInstr *Start, MachineInstr *End,
+ ARMBasicBlockUtils *BBUtils, MachineLoop &ML) {
+ MachineBasicBlock *TgtBB = End->getOpcode() == ARM::t2LoopEnd
+ ? End->getOperand(1).getMBB()
+ : End->getOperand(2).getMBB();
+ // TODO Maybe there's cases where the target doesn't have to be the header,
+ // but for now be safe and revert.
+ if (TgtBB != ML.getHeader()) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: LoopEnd is not targeting header.\n");
+ return false;
+ }
+
+ // The WLS and LE instructions have 12-bits for the label offset. WLS
+ // requires a positive offset, while LE uses negative.
+ if (BBUtils->getOffsetOf(End) < BBUtils->getOffsetOf(ML.getHeader()) ||
+ !BBUtils->isBBInRange(End, ML.getHeader(), 4094)) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: LE offset is out-of-range\n");
+ return false;
+ }
+
+ if (Start->getOpcode() == ARM::t2WhileLoopStart &&
+ (BBUtils->getOffsetOf(Start) >
+ BBUtils->getOffsetOf(Start->getOperand(1).getMBB()) ||
+ !BBUtils->isBBInRange(Start, Start->getOperand(1).getMBB(), 4094))) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: WLS offset is out-of-range!\n");
+ return false;
+ }
+ return true;
+ };
+
+ // Find a suitable position to insert the loop start instruction. It needs to
+ // be able to safely define LR.
+ auto FindStartInsertionPoint = [](MachineInstr *Start, MachineInstr *Dec,
+ MachineBasicBlock::iterator &InsertPt,
+ MachineBasicBlock *&InsertBB,
+ ReachingDefAnalysis &RDA,
+ InstSet &ToRemove) {
+ // For a t2DoLoopStart it is always valid to use the start insertion point.
+ // For WLS we can define LR if LR already contains the same value.
+ if (isDo(Start) || Start->getOperand(0).getReg() == ARM::LR) {
+ InsertPt = MachineBasicBlock::iterator(Start);
+ InsertBB = Start->getParent();
+ return true;
+ }
+
+ // We've found no suitable LR def and Start doesn't use LR directly. Can we
+ // just define LR anyway?
+ if (!RDA.isSafeToDefRegAt(Start, MCRegister::from(ARM::LR)))
+ return false;
+
+ InsertPt = MachineBasicBlock::iterator(Start);
+ InsertBB = Start->getParent();
+ return true;
+ };
+
+ if (!FindStartInsertionPoint(Start, Dec, StartInsertPt, StartInsertBB, RDA,
+ ToRemove)) {
LLVM_DEBUG(dbgs() << "ARM Loops: Unable to find safe insertion point.\n");
Revert = true;
return;
- }
- LLVM_DEBUG(if (StartInsertPt == StartInsertBB->end())
- dbgs() << "ARM Loops: Will insert LoopStart at end of block\n";
- else
- dbgs() << "ARM Loops: Will insert LoopStart at "
- << *StartInsertPt
- );
-
- Revert = !ValidateRanges(Start, End, BBUtils, ML);
- CannotTailPredicate = !ValidateTailPredicate();
-}
-
-bool LowOverheadLoop::AddVCTP(MachineInstr *MI) {
- LLVM_DEBUG(dbgs() << "ARM Loops: Adding VCTP: " << *MI);
- if (VCTPs.empty()) {
- VCTPs.push_back(MI);
- return true;
+ }
+ LLVM_DEBUG(if (StartInsertPt == StartInsertBB->end())
+ dbgs() << "ARM Loops: Will insert LoopStart at end of block\n";
+ else
+ dbgs() << "ARM Loops: Will insert LoopStart at "
+ << *StartInsertPt
+ );
+
+ Revert = !ValidateRanges(Start, End, BBUtils, ML);
+ CannotTailPredicate = !ValidateTailPredicate();
+}
+
+bool LowOverheadLoop::AddVCTP(MachineInstr *MI) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Adding VCTP: " << *MI);
+ if (VCTPs.empty()) {
+ VCTPs.push_back(MI);
+ return true;
}
- // If we find another VCTP, check whether it uses the same value as the main VCTP.
- // If it does, store it in the VCTPs set, else refuse it.
- MachineInstr *Prev = VCTPs.back();
- if (!Prev->getOperand(1).isIdenticalTo(MI->getOperand(1)) ||
- !RDA.hasSameReachingDef(Prev, MI, MI->getOperand(1).getReg().asMCReg())) {
- LLVM_DEBUG(dbgs() << "ARM Loops: Found VCTP with a different reaching "
- "definition from the main VCTP");
- return false;
- }
- VCTPs.push_back(MI);
- return true;
+ // If we find another VCTP, check whether it uses the same value as the main VCTP.
+ // If it does, store it in the VCTPs set, else refuse it.
+ MachineInstr *Prev = VCTPs.back();
+ if (!Prev->getOperand(1).isIdenticalTo(MI->getOperand(1)) ||
+ !RDA.hasSameReachingDef(Prev, MI, MI->getOperand(1).getReg().asMCReg())) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Found VCTP with a different reaching "
+ "definition from the main VCTP");
+ return false;
+ }
+ VCTPs.push_back(MI);
+ return true;
}
bool LowOverheadLoop::ValidateMVEInst(MachineInstr* MI) {
if (CannotTailPredicate)
return false;
- if (!shouldInspect(*MI))
- return true;
+ if (!shouldInspect(*MI))
+ return true;
- if (MI->getOpcode() == ARM::MVE_VPSEL ||
- MI->getOpcode() == ARM::MVE_VPNOT) {
+ if (MI->getOpcode() == ARM::MVE_VPSEL ||
+ MI->getOpcode() == ARM::MVE_VPNOT) {
// TODO: Allow VPSEL and VPNOT, we currently cannot because:
// 1) It will use the VPR as a predicate operand, but doesn't have to be
// instead a VPT block, which means we can assert while building up
@@ -1142,24 +1142,24 @@ bool LowOverheadLoop::ValidateMVEInst(MachineInstr* MI) {
return false;
}
- // Record all VCTPs and check that they're equivalent to one another.
- if (isVCTP(MI) && !AddVCTP(MI))
- return false;
-
- // Inspect uses first so that any instructions that alter the VPR don't
- // alter the predicate upon themselves.
- const MCInstrDesc &MCID = MI->getDesc();
+ // Record all VCTPs and check that they're equivalent to one another.
+ if (isVCTP(MI) && !AddVCTP(MI))
+ return false;
+
+ // Inspect uses first so that any instructions that alter the VPR don't
+ // alter the predicate upon themselves.
+ const MCInstrDesc &MCID = MI->getDesc();
bool IsUse = false;
- unsigned LastOpIdx = MI->getNumOperands() - 1;
- for (auto &Op : enumerate(reverse(MCID.operands()))) {
- const MachineOperand &MO = MI->getOperand(LastOpIdx - Op.index());
- if (!MO.isReg() || !MO.isUse() || MO.getReg() != ARM::VPR)
+ unsigned LastOpIdx = MI->getNumOperands() - 1;
+ for (auto &Op : enumerate(reverse(MCID.operands()))) {
+ const MachineOperand &MO = MI->getOperand(LastOpIdx - Op.index());
+ if (!MO.isReg() || !MO.isUse() || MO.getReg() != ARM::VPR)
continue;
- if (ARM::isVpred(Op.value().OperandType)) {
- VPTState::addInst(MI);
+ if (ARM::isVpred(Op.value().OperandType)) {
+ VPTState::addInst(MI);
IsUse = true;
- } else if (MI->getOpcode() != ARM::MVE_VPST) {
+ } else if (MI->getOpcode() != ARM::MVE_VPST) {
LLVM_DEBUG(dbgs() << "ARM Loops: Found instruction using vpr: " << *MI);
return false;
}
@@ -1168,36 +1168,36 @@ bool LowOverheadLoop::ValidateMVEInst(MachineInstr* MI) {
// If we find an instruction that has been marked as not valid for tail
// predication, only allow the instruction if it's contained within a valid
// VPT block.
- bool RequiresExplicitPredication =
- (MCID.TSFlags & ARMII::ValidForTailPredication) == 0;
- if (isDomainMVE(MI) && RequiresExplicitPredication) {
- LLVM_DEBUG(if (!IsUse)
- dbgs() << "ARM Loops: Can't tail predicate: " << *MI);
- return IsUse;
+ bool RequiresExplicitPredication =
+ (MCID.TSFlags & ARMII::ValidForTailPredication) == 0;
+ if (isDomainMVE(MI) && RequiresExplicitPredication) {
+ LLVM_DEBUG(if (!IsUse)
+ dbgs() << "ARM Loops: Can't tail predicate: " << *MI);
+ return IsUse;
}
// If the instruction is already explicitly predicated, then the conversion
- // will be fine, but ensure that all store operations are predicated.
- if (MI->mayStore())
- return IsUse;
-
- // If this instruction defines the VPR, update the predicate for the
- // proceeding instructions.
- if (isVectorPredicate(MI)) {
- // Clear the existing predicate when we're not in VPT Active state,
- // otherwise we add to it.
- if (!isVectorPredicated(MI))
- VPTState::resetPredicate(MI);
- else
- VPTState::addPredicate(MI);
- }
-
- // Finally once the predicate has been modified, we can start a new VPT
- // block if necessary.
- if (isVPTOpcode(MI->getOpcode()))
- VPTState::CreateVPTBlock(MI);
-
- return true;
+ // will be fine, but ensure that all store operations are predicated.
+ if (MI->mayStore())
+ return IsUse;
+
+ // If this instruction defines the VPR, update the predicate for the
+ // proceeding instructions.
+ if (isVectorPredicate(MI)) {
+ // Clear the existing predicate when we're not in VPT Active state,
+ // otherwise we add to it.
+ if (!isVectorPredicated(MI))
+ VPTState::resetPredicate(MI);
+ else
+ VPTState::addPredicate(MI);
+ }
+
+ // Finally once the predicate has been modified, we can start a new VPT
+ // block if necessary.
+ if (isVPTOpcode(MI->getOpcode()))
+ VPTState::CreateVPTBlock(MI);
+
+ return true;
}
bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &mf) {
@@ -1220,7 +1220,7 @@ bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &mf) {
bool Changed = false;
for (auto ML : *MLI) {
- if (ML->isOutermost())
+ if (ML->isOutermost())
Changed |= ProcessLoop(ML);
}
Changed |= RevertNonLoops();
@@ -1279,8 +1279,8 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {
LoLoop.Dec = &MI;
else if (MI.getOpcode() == ARM::t2LoopEnd)
LoLoop.End = &MI;
- else if (MI.getOpcode() == ARM::t2LoopEndDec)
- LoLoop.End = LoLoop.Dec = &MI;
+ else if (MI.getOpcode() == ARM::t2LoopEndDec)
+ LoLoop.End = LoLoop.Dec = &MI;
else if (isLoopStart(MI))
LoLoop.Start = &MI;
else if (MI.getDesc().isCall()) {
@@ -1303,18 +1303,18 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {
return false;
}
- // Check that the only instruction using LoopDec is LoopEnd. This can only
- // happen when the Dec and End are separate, not a single t2LoopEndDec.
+ // Check that the only instruction using LoopDec is LoopEnd. This can only
+ // happen when the Dec and End are separate, not a single t2LoopEndDec.
// TODO: Check for copy chains that really have no effect.
- if (LoLoop.Dec != LoLoop.End) {
- SmallPtrSet<MachineInstr *, 2> Uses;
- RDA->getReachingLocalUses(LoLoop.Dec, MCRegister::from(ARM::LR), Uses);
- if (Uses.size() > 1 || !Uses.count(LoLoop.End)) {
- LLVM_DEBUG(dbgs() << "ARM Loops: Unable to remove LoopDec.\n");
- LoLoop.Revert = true;
- }
+ if (LoLoop.Dec != LoLoop.End) {
+ SmallPtrSet<MachineInstr *, 2> Uses;
+ RDA->getReachingLocalUses(LoLoop.Dec, MCRegister::from(ARM::LR), Uses);
+ if (Uses.size() > 1 || !Uses.count(LoLoop.End)) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Unable to remove LoopDec.\n");
+ LoLoop.Revert = true;
+ }
}
- LoLoop.Validate(BBUtils.get());
+ LoLoop.Validate(BBUtils.get());
Expand(LoLoop);
return true;
}
@@ -1329,14 +1329,14 @@ void ARMLowOverheadLoops::RevertWhile(MachineInstr *MI) const {
unsigned BrOpc = BBUtils->isBBInRange(MI, DestBB, 254) ?
ARM::tBcc : ARM::t2Bcc;
- RevertWhileLoopStart(MI, TII, BrOpc);
-}
-
-void ARMLowOverheadLoops::RevertDo(MachineInstr *MI) const {
- LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to mov: " << *MI);
- RevertDoLoopStart(MI, TII);
+ RevertWhileLoopStart(MI, TII, BrOpc);
}
+void ARMLowOverheadLoops::RevertDo(MachineInstr *MI) const {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to mov: " << *MI);
+ RevertDoLoopStart(MI, TII);
+}
+
bool ARMLowOverheadLoops::RevertLoopDec(MachineInstr *MI) const {
LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to sub: " << *MI);
MachineBasicBlock *MBB = MI->getParent();
@@ -1349,10 +1349,10 @@ bool ARMLowOverheadLoops::RevertLoopDec(MachineInstr *MI) const {
}
// If nothing defines CPSR between LoopDec and LoopEnd, use a t2SUBS.
- bool SetFlags =
- RDA->isSafeToDefRegAt(MI, MCRegister::from(ARM::CPSR), Ignore);
+ bool SetFlags =
+ RDA->isSafeToDefRegAt(MI, MCRegister::from(ARM::CPSR), Ignore);
- llvm::RevertLoopDec(MI, TII, SetFlags);
+ llvm::RevertLoopDec(MI, TII, SetFlags);
return SetFlags;
}
@@ -1364,35 +1364,35 @@ void ARMLowOverheadLoops::RevertLoopEnd(MachineInstr *MI, bool SkipCmp) const {
unsigned BrOpc = BBUtils->isBBInRange(MI, DestBB, 254) ?
ARM::tBcc : ARM::t2Bcc;
- llvm::RevertLoopEnd(MI, TII, BrOpc, SkipCmp);
-}
-
-// Generate a subs, or sub and cmp, and a branch instead of an LE.
-void ARMLowOverheadLoops::RevertLoopEndDec(MachineInstr *MI) const {
- LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to subs, br: " << *MI);
- assert(MI->getOpcode() == ARM::t2LoopEndDec && "Expected a t2LoopEndDec!");
- MachineBasicBlock *MBB = MI->getParent();
-
+ llvm::RevertLoopEnd(MI, TII, BrOpc, SkipCmp);
+}
+
+// Generate a subs, or sub and cmp, and a branch instead of an LE.
+void ARMLowOverheadLoops::RevertLoopEndDec(MachineInstr *MI) const {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to subs, br: " << *MI);
+ assert(MI->getOpcode() == ARM::t2LoopEndDec && "Expected a t2LoopEndDec!");
+ MachineBasicBlock *MBB = MI->getParent();
+
MachineInstrBuilder MIB =
- BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2SUBri));
- MIB.addDef(ARM::LR);
- MIB.add(MI->getOperand(1));
- MIB.addImm(1);
- MIB.addImm(ARMCC::AL);
- MIB.addReg(ARM::NoRegister);
- MIB.addReg(ARM::CPSR);
- MIB->getOperand(5).setIsDef(true);
-
- MachineBasicBlock *DestBB = MI->getOperand(2).getMBB();
- unsigned BrOpc =
- BBUtils->isBBInRange(MI, DestBB, 254) ? ARM::tBcc : ARM::t2Bcc;
-
- // Create bne
- MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(BrOpc));
- MIB.add(MI->getOperand(2)); // branch target
- MIB.addImm(ARMCC::NE); // condition code
+ BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2SUBri));
+ MIB.addDef(ARM::LR);
+ MIB.add(MI->getOperand(1));
+ MIB.addImm(1);
+ MIB.addImm(ARMCC::AL);
+ MIB.addReg(ARM::NoRegister);
MIB.addReg(ARM::CPSR);
-
+ MIB->getOperand(5).setIsDef(true);
+
+ MachineBasicBlock *DestBB = MI->getOperand(2).getMBB();
+ unsigned BrOpc =
+ BBUtils->isBBInRange(MI, DestBB, 254) ? ARM::tBcc : ARM::t2Bcc;
+
+ // Create bne
+ MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(BrOpc));
+ MIB.add(MI->getOperand(2)); // branch target
+ MIB.addImm(ARMCC::NE); // condition code
+ MIB.addReg(ARM::CPSR);
+
MI->eraseFromParent();
}
@@ -1403,7 +1403,7 @@ void ARMLowOverheadLoops::RevertLoopEndDec(MachineInstr *MI) const {
//
// $lr = big-itercount-expression
// ..
-// $lr = t2DoLoopStart renamable $lr
+// $lr = t2DoLoopStart renamable $lr
// vector.body:
// ..
// $vpr = MVE_VCTP32 renamable $r3
@@ -1426,8 +1426,8 @@ void ARMLowOverheadLoops::IterationCountDCE(LowOverheadLoop &LoLoop) {
LLVM_DEBUG(dbgs() << "ARM Loops: Trying DCE on loop iteration count.\n");
- MachineInstr *Def =
- RDA->getMIOperand(LoLoop.Start, isDo(LoLoop.Start) ? 1 : 0);
+ MachineInstr *Def =
+ RDA->getMIOperand(LoLoop.Start, isDo(LoLoop.Start) ? 1 : 0);
if (!Def) {
LLVM_DEBUG(dbgs() << "ARM Loops: Couldn't find iteration count.\n");
return;
@@ -1435,8 +1435,8 @@ void ARMLowOverheadLoops::IterationCountDCE(LowOverheadLoop &LoLoop) {
// Collect and remove the users of iteration count.
SmallPtrSet<MachineInstr*, 4> Killed = { LoLoop.Start, LoLoop.Dec,
- LoLoop.End };
- if (!TryRemove(Def, *RDA, LoLoop.ToRemove, Killed))
+ LoLoop.End };
+ if (!TryRemove(Def, *RDA, LoLoop.ToRemove, Killed))
LLVM_DEBUG(dbgs() << "ARM Loops: Unsafe to remove loop iteration count.\n");
}
@@ -1446,18 +1446,18 @@ MachineInstr* ARMLowOverheadLoops::ExpandLoopStart(LowOverheadLoop &LoLoop) {
// calculate the number of loop iterations.
IterationCountDCE(LoLoop);
- MachineBasicBlock::iterator InsertPt = LoLoop.StartInsertPt;
+ MachineBasicBlock::iterator InsertPt = LoLoop.StartInsertPt;
MachineInstr *Start = LoLoop.Start;
- MachineBasicBlock *MBB = LoLoop.StartInsertBB;
+ MachineBasicBlock *MBB = LoLoop.StartInsertBB;
unsigned Opc = LoLoop.getStartOpcode();
- MachineOperand &Count = LoLoop.getLoopStartOperand();
+ MachineOperand &Count = LoLoop.getLoopStartOperand();
MachineInstrBuilder MIB =
- BuildMI(*MBB, InsertPt, Start->getDebugLoc(), TII->get(Opc));
+ BuildMI(*MBB, InsertPt, Start->getDebugLoc(), TII->get(Opc));
MIB.addDef(ARM::LR);
MIB.add(Count);
- if (!isDo(Start))
+ if (!isDo(Start))
MIB.add(Start->getOperand(1));
LoLoop.ToRemove.insert(Start);
@@ -1467,50 +1467,50 @@ MachineInstr* ARMLowOverheadLoops::ExpandLoopStart(LowOverheadLoop &LoLoop) {
void ARMLowOverheadLoops::ConvertVPTBlocks(LowOverheadLoop &LoLoop) {
auto RemovePredicate = [](MachineInstr *MI) {
- if (MI->isDebugInstr())
- return;
+ if (MI->isDebugInstr())
+ return;
LLVM_DEBUG(dbgs() << "ARM Loops: Removing predicate from: " << *MI);
- int PIdx = llvm::findFirstVPTPredOperandIdx(*MI);
- assert(PIdx >= 1 && "Trying to unpredicate a non-predicated instruction");
- assert(MI->getOperand(PIdx).getImm() == ARMVCC::Then &&
- "Expected Then predicate!");
- MI->getOperand(PIdx).setImm(ARMVCC::None);
- MI->getOperand(PIdx + 1).setReg(0);
+ int PIdx = llvm::findFirstVPTPredOperandIdx(*MI);
+ assert(PIdx >= 1 && "Trying to unpredicate a non-predicated instruction");
+ assert(MI->getOperand(PIdx).getImm() == ARMVCC::Then &&
+ "Expected Then predicate!");
+ MI->getOperand(PIdx).setImm(ARMVCC::None);
+ MI->getOperand(PIdx + 1).setReg(0);
};
for (auto &Block : LoLoop.getVPTBlocks()) {
- SmallVectorImpl<MachineInstr *> &Insts = Block.getInsts();
-
- auto ReplaceVCMPWithVPT = [&](MachineInstr *&TheVCMP, MachineInstr *At) {
- assert(TheVCMP && "Replacing a removed or non-existent VCMP");
- // Replace the VCMP with a VPT
- MachineInstrBuilder MIB =
- BuildMI(*At->getParent(), At, At->getDebugLoc(),
- TII->get(VCMPOpcodeToVPT(TheVCMP->getOpcode())));
- MIB.addImm(ARMVCC::Then);
- // Register one
- MIB.add(TheVCMP->getOperand(1));
- // Register two
- MIB.add(TheVCMP->getOperand(2));
- // The comparison code, e.g. ge, eq, lt
- MIB.add(TheVCMP->getOperand(3));
- LLVM_DEBUG(dbgs() << "ARM Loops: Combining with VCMP to VPT: " << *MIB);
- LoLoop.BlockMasksToRecompute.insert(MIB.getInstr());
- LoLoop.ToRemove.insert(TheVCMP);
- TheVCMP = nullptr;
- };
-
- if (VPTState::isEntryPredicatedOnVCTP(Block, /*exclusive*/ true)) {
- MachineInstr *VPST = Insts.front();
- if (VPTState::hasUniformPredicate(Block)) {
- // A vpt block starting with VPST, is only predicated upon vctp and has no
- // internal vpr defs:
- // - Remove vpst.
- // - Unpredicate the remaining instructions.
- LLVM_DEBUG(dbgs() << "ARM Loops: Removing VPST: " << *VPST);
- for (unsigned i = 1; i < Insts.size(); ++i)
- RemovePredicate(Insts[i]);
- } else {
+ SmallVectorImpl<MachineInstr *> &Insts = Block.getInsts();
+
+ auto ReplaceVCMPWithVPT = [&](MachineInstr *&TheVCMP, MachineInstr *At) {
+ assert(TheVCMP && "Replacing a removed or non-existent VCMP");
+ // Replace the VCMP with a VPT
+ MachineInstrBuilder MIB =
+ BuildMI(*At->getParent(), At, At->getDebugLoc(),
+ TII->get(VCMPOpcodeToVPT(TheVCMP->getOpcode())));
+ MIB.addImm(ARMVCC::Then);
+ // Register one
+ MIB.add(TheVCMP->getOperand(1));
+ // Register two
+ MIB.add(TheVCMP->getOperand(2));
+ // The comparison code, e.g. ge, eq, lt
+ MIB.add(TheVCMP->getOperand(3));
+ LLVM_DEBUG(dbgs() << "ARM Loops: Combining with VCMP to VPT: " << *MIB);
+ LoLoop.BlockMasksToRecompute.insert(MIB.getInstr());
+ LoLoop.ToRemove.insert(TheVCMP);
+ TheVCMP = nullptr;
+ };
+
+ if (VPTState::isEntryPredicatedOnVCTP(Block, /*exclusive*/ true)) {
+ MachineInstr *VPST = Insts.front();
+ if (VPTState::hasUniformPredicate(Block)) {
+ // A vpt block starting with VPST, is only predicated upon vctp and has no
+ // internal vpr defs:
+ // - Remove vpst.
+ // - Unpredicate the remaining instructions.
+ LLVM_DEBUG(dbgs() << "ARM Loops: Removing VPST: " << *VPST);
+ for (unsigned i = 1; i < Insts.size(); ++i)
+ RemovePredicate(Insts[i]);
+ } else {
// The VPT block has a non-uniform predicate but it uses a vpst and its
// entry is guarded only by a vctp, which means we:
// - Need to remove the original vpst.
@@ -1518,88 +1518,88 @@ void ARMLowOverheadLoops::ConvertVPTBlocks(LowOverheadLoop &LoLoop) {
// we come across the divergent vpr def.
// - Insert a new vpst to predicate the instruction(s) that following
// the divergent vpr def.
- MachineInstr *Divergent = VPTState::getDivergent(Block);
- MachineBasicBlock *MBB = Divergent->getParent();
- auto DivergentNext = ++MachineBasicBlock::iterator(Divergent);
- while (DivergentNext != MBB->end() && DivergentNext->isDebugInstr())
- ++DivergentNext;
-
- bool DivergentNextIsPredicated =
- DivergentNext != MBB->end() &&
- getVPTInstrPredicate(*DivergentNext) != ARMVCC::None;
-
- for (auto I = ++MachineBasicBlock::iterator(VPST), E = DivergentNext;
- I != E; ++I)
+ MachineInstr *Divergent = VPTState::getDivergent(Block);
+ MachineBasicBlock *MBB = Divergent->getParent();
+ auto DivergentNext = ++MachineBasicBlock::iterator(Divergent);
+ while (DivergentNext != MBB->end() && DivergentNext->isDebugInstr())
+ ++DivergentNext;
+
+ bool DivergentNextIsPredicated =
+ DivergentNext != MBB->end() &&
+ getVPTInstrPredicate(*DivergentNext) != ARMVCC::None;
+
+ for (auto I = ++MachineBasicBlock::iterator(VPST), E = DivergentNext;
+ I != E; ++I)
RemovePredicate(&*I);
- // Check if the instruction defining vpr is a vcmp so it can be combined
- // with the VPST This should be the divergent instruction
- MachineInstr *VCMP =
- VCMPOpcodeToVPT(Divergent->getOpcode()) != 0 ? Divergent : nullptr;
-
- if (DivergentNextIsPredicated) {
- // Insert a VPST at the divergent only if the next instruction
- // would actually use it. A VCMP following a VPST can be
- // merged into a VPT so do that instead if the VCMP exists.
- if (!VCMP) {
- // Create a VPST (with a null mask for now, we'll recompute it
- // later)
- MachineInstrBuilder MIB =
- BuildMI(*Divergent->getParent(), Divergent,
- Divergent->getDebugLoc(), TII->get(ARM::MVE_VPST));
- MIB.addImm(0);
- LLVM_DEBUG(dbgs() << "ARM Loops: Created VPST: " << *MIB);
- LoLoop.BlockMasksToRecompute.insert(MIB.getInstr());
- } else {
- // No RDA checks are necessary here since the VPST would have been
- // directly after the VCMP
- ReplaceVCMPWithVPT(VCMP, VCMP);
- }
+ // Check if the instruction defining vpr is a vcmp so it can be combined
+ // with the VPST This should be the divergent instruction
+ MachineInstr *VCMP =
+ VCMPOpcodeToVPT(Divergent->getOpcode()) != 0 ? Divergent : nullptr;
+
+ if (DivergentNextIsPredicated) {
+ // Insert a VPST at the divergent only if the next instruction
+ // would actually use it. A VCMP following a VPST can be
+ // merged into a VPT so do that instead if the VCMP exists.
+ if (!VCMP) {
+ // Create a VPST (with a null mask for now, we'll recompute it
+ // later)
+ MachineInstrBuilder MIB =
+ BuildMI(*Divergent->getParent(), Divergent,
+ Divergent->getDebugLoc(), TII->get(ARM::MVE_VPST));
+ MIB.addImm(0);
+ LLVM_DEBUG(dbgs() << "ARM Loops: Created VPST: " << *MIB);
+ LoLoop.BlockMasksToRecompute.insert(MIB.getInstr());
+ } else {
+ // No RDA checks are necessary here since the VPST would have been
+ // directly after the VCMP
+ ReplaceVCMPWithVPT(VCMP, VCMP);
+ }
}
}
- LLVM_DEBUG(dbgs() << "ARM Loops: Removing VPST: " << *VPST);
- LoLoop.ToRemove.insert(VPST);
- } else if (Block.containsVCTP()) {
- // The vctp will be removed, so either the entire block will be dead or
- // the block mask of the vp(s)t will need to be recomputed.
- MachineInstr *VPST = Insts.front();
- if (Block.size() == 2) {
- assert(VPST->getOpcode() == ARM::MVE_VPST &&
- "Found a VPST in an otherwise empty vpt block");
- LoLoop.ToRemove.insert(VPST);
- } else
- LoLoop.BlockMasksToRecompute.insert(VPST);
- } else if (Insts.front()->getOpcode() == ARM::MVE_VPST) {
- // If this block starts with a VPST then attempt to merge it with the
- // preceeding un-merged VCMP into a VPT. This VCMP comes from a VPT
- // block that no longer exists
- MachineInstr *VPST = Insts.front();
- auto Next = ++MachineBasicBlock::iterator(VPST);
- assert(getVPTInstrPredicate(*Next) != ARMVCC::None &&
- "The instruction after a VPST must be predicated");
- (void)Next;
- MachineInstr *VprDef = RDA->getUniqueReachingMIDef(VPST, ARM::VPR);
- if (VprDef && VCMPOpcodeToVPT(VprDef->getOpcode()) &&
- !LoLoop.ToRemove.contains(VprDef)) {
- MachineInstr *VCMP = VprDef;
- // The VCMP and VPST can only be merged if the VCMP's operands will have
- // the same values at the VPST.
- // If any of the instructions between the VCMP and VPST are predicated
- // then a different code path is expected to have merged the VCMP and
- // VPST already.
- if (!std::any_of(++MachineBasicBlock::iterator(VCMP),
- MachineBasicBlock::iterator(VPST), hasVPRUse) &&
- RDA->hasSameReachingDef(VCMP, VPST, VCMP->getOperand(1).getReg()) &&
- RDA->hasSameReachingDef(VCMP, VPST, VCMP->getOperand(2).getReg())) {
- ReplaceVCMPWithVPT(VCMP, VPST);
- LLVM_DEBUG(dbgs() << "ARM Loops: Removing VPST: " << *VPST);
- LoLoop.ToRemove.insert(VPST);
+ LLVM_DEBUG(dbgs() << "ARM Loops: Removing VPST: " << *VPST);
+ LoLoop.ToRemove.insert(VPST);
+ } else if (Block.containsVCTP()) {
+ // The vctp will be removed, so either the entire block will be dead or
+ // the block mask of the vp(s)t will need to be recomputed.
+ MachineInstr *VPST = Insts.front();
+ if (Block.size() == 2) {
+ assert(VPST->getOpcode() == ARM::MVE_VPST &&
+ "Found a VPST in an otherwise empty vpt block");
+ LoLoop.ToRemove.insert(VPST);
+ } else
+ LoLoop.BlockMasksToRecompute.insert(VPST);
+ } else if (Insts.front()->getOpcode() == ARM::MVE_VPST) {
+ // If this block starts with a VPST then attempt to merge it with the
+ // preceeding un-merged VCMP into a VPT. This VCMP comes from a VPT
+ // block that no longer exists
+ MachineInstr *VPST = Insts.front();
+ auto Next = ++MachineBasicBlock::iterator(VPST);
+ assert(getVPTInstrPredicate(*Next) != ARMVCC::None &&
+ "The instruction after a VPST must be predicated");
+ (void)Next;
+ MachineInstr *VprDef = RDA->getUniqueReachingMIDef(VPST, ARM::VPR);
+ if (VprDef && VCMPOpcodeToVPT(VprDef->getOpcode()) &&
+ !LoLoop.ToRemove.contains(VprDef)) {
+ MachineInstr *VCMP = VprDef;
+ // The VCMP and VPST can only be merged if the VCMP's operands will have
+ // the same values at the VPST.
+ // If any of the instructions between the VCMP and VPST are predicated
+ // then a different code path is expected to have merged the VCMP and
+ // VPST already.
+ if (!std::any_of(++MachineBasicBlock::iterator(VCMP),
+ MachineBasicBlock::iterator(VPST), hasVPRUse) &&
+ RDA->hasSameReachingDef(VCMP, VPST, VCMP->getOperand(1).getReg()) &&
+ RDA->hasSameReachingDef(VCMP, VPST, VCMP->getOperand(2).getReg())) {
+ ReplaceVCMPWithVPT(VCMP, VPST);
+ LLVM_DEBUG(dbgs() << "ARM Loops: Removing VPST: " << *VPST);
+ LoLoop.ToRemove.insert(VPST);
}
}
}
}
-
- LoLoop.ToRemove.insert(LoLoop.VCTPs.begin(), LoLoop.VCTPs.end());
+
+ LoLoop.ToRemove.insert(LoLoop.VCTPs.begin(), LoLoop.VCTPs.end());
}
void ARMLowOverheadLoops::Expand(LowOverheadLoop &LoLoop) {
@@ -1613,9 +1613,9 @@ void ARMLowOverheadLoops::Expand(LowOverheadLoop &LoLoop) {
MachineInstrBuilder MIB = BuildMI(*MBB, End, End->getDebugLoc(),
TII->get(Opc));
MIB.addDef(ARM::LR);
- unsigned Off = LoLoop.Dec == LoLoop.End ? 1 : 0;
- MIB.add(End->getOperand(Off + 0));
- MIB.add(End->getOperand(Off + 1));
+ unsigned Off = LoLoop.Dec == LoLoop.End ? 1 : 0;
+ MIB.add(End->getOperand(Off + 0));
+ MIB.add(End->getOperand(Off + 1));
LLVM_DEBUG(dbgs() << "ARM Loops: Inserted LE: " << *MIB);
LoLoop.ToRemove.insert(LoLoop.Dec);
LoLoop.ToRemove.insert(End);
@@ -1643,17 +1643,17 @@ void ARMLowOverheadLoops::Expand(LowOverheadLoop &LoLoop) {
if (LoLoop.Start->getOpcode() == ARM::t2WhileLoopStart)
RevertWhile(LoLoop.Start);
else
- RevertDo(LoLoop.Start);
- if (LoLoop.Dec == LoLoop.End)
- RevertLoopEndDec(LoLoop.End);
- else
- RevertLoopEnd(LoLoop.End, RevertLoopDec(LoLoop.Dec));
+ RevertDo(LoLoop.Start);
+ if (LoLoop.Dec == LoLoop.End)
+ RevertLoopEndDec(LoLoop.End);
+ else
+ RevertLoopEnd(LoLoop.End, RevertLoopDec(LoLoop.Dec));
} else {
LoLoop.Start = ExpandLoopStart(LoLoop);
RemoveDeadBranch(LoLoop.Start);
LoLoop.End = ExpandLoopEnd(LoLoop);
RemoveDeadBranch(LoLoop.End);
- if (LoLoop.IsTailPredicationLegal())
+ if (LoLoop.IsTailPredicationLegal())
ConvertVPTBlocks(LoLoop);
for (auto *I : LoLoop.ToRemove) {
LLVM_DEBUG(dbgs() << "ARM Loops: Erasing " << *I);
@@ -1691,7 +1691,7 @@ bool ARMLowOverheadLoops::RevertNonLoops() {
SmallVector<MachineInstr*, 4> Starts;
SmallVector<MachineInstr*, 4> Decs;
SmallVector<MachineInstr*, 4> Ends;
- SmallVector<MachineInstr *, 4> EndDecs;
+ SmallVector<MachineInstr *, 4> EndDecs;
for (auto &I : MBB) {
if (isLoopStart(I))
@@ -1700,11 +1700,11 @@ bool ARMLowOverheadLoops::RevertNonLoops() {
Decs.push_back(&I);
else if (I.getOpcode() == ARM::t2LoopEnd)
Ends.push_back(&I);
- else if (I.getOpcode() == ARM::t2LoopEndDec)
- EndDecs.push_back(&I);
+ else if (I.getOpcode() == ARM::t2LoopEndDec)
+ EndDecs.push_back(&I);
}
- if (Starts.empty() && Decs.empty() && Ends.empty() && EndDecs.empty())
+ if (Starts.empty() && Decs.empty() && Ends.empty() && EndDecs.empty())
continue;
Changed = true;
@@ -1713,15 +1713,15 @@ bool ARMLowOverheadLoops::RevertNonLoops() {
if (Start->getOpcode() == ARM::t2WhileLoopStart)
RevertWhile(Start);
else
- RevertDo(Start);
+ RevertDo(Start);
}
for (auto *Dec : Decs)
RevertLoopDec(Dec);
for (auto *End : Ends)
RevertLoopEnd(End);
- for (auto *End : EndDecs)
- RevertLoopEndDec(End);
+ for (auto *End : EndDecs)
+ RevertLoopEndDec(End);
}
return Changed;
}
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMParallelDSP.cpp b/contrib/libs/llvm12/lib/Target/ARM/ARMParallelDSP.cpp
index 9a7c1f541a..cd3c3b4ca6 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMParallelDSP.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMParallelDSP.cpp
@@ -22,7 +22,7 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicsARM.h"
@@ -202,7 +202,7 @@ namespace {
public:
WidenedLoad(SmallVectorImpl<LoadInst*> &Lds, LoadInst *Wide)
: NewLd(Wide) {
- append_range(Loads, Lds);
+ append_range(Loads, Lds);
}
LoadInst *getLoad() {
return NewLd;
@@ -374,7 +374,7 @@ bool ARMParallelDSP::RecordMemoryOps(BasicBlock *BB) {
DepMap RAWDeps;
// Record any writes that may alias a load.
- const auto Size = LocationSize::beforeOrAfterPointer();
+ const auto Size = LocationSize::beforeOrAfterPointer();
for (auto Write : Writes) {
for (auto Read : Loads) {
MemoryLocation ReadLoc =
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMPredicates.td b/contrib/libs/llvm12/lib/Target/ARM/ARMPredicates.td
index 2dc097566d..3c03b95e26 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMPredicates.td
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMPredicates.td
@@ -77,8 +77,8 @@ def HasV8_5a : Predicate<"Subtarget->hasV8_5aOps()">,
AssemblerPredicate<(all_of HasV8_5aOps), "armv8.5a">;
def HasV8_6a : Predicate<"Subtarget->hasV8_6aOps()">,
AssemblerPredicate<(all_of HasV8_6aOps), "armv8.6a">;
-def HasV8_7a : Predicate<"Subtarget->hasV8_7aOps()">,
- AssemblerPredicate<(all_of HasV8_7aOps), "armv8.7a">;
+def HasV8_7a : Predicate<"Subtarget->hasV8_7aOps()">,
+ AssemblerPredicate<(all_of HasV8_7aOps), "armv8.7a">;
def NoVFP : Predicate<"!Subtarget->hasVFP2Base()">;
def HasVFP2 : Predicate<"Subtarget->hasVFP2Base()">,
AssemblerPredicate<(all_of FeatureVFP2_SP), "VFP2">;
@@ -189,9 +189,9 @@ let RecomputePerFunction = 1 in {
def UseFPVMLx: Predicate<"((Subtarget->useFPVMLx() &&"
" TM.Options.AllowFPOpFusion != FPOpFusion::Fast) ||"
"Subtarget->hasMinSize())">;
- def SLSBLRMitigation : Predicate<[{ MF->getSubtarget<ARMSubtarget>().hardenSlsBlr() }]>;
- def NoSLSBLRMitigation : Predicate<[{ !MF->getSubtarget<ARMSubtarget>().hardenSlsBlr() }]>;
-
+ def SLSBLRMitigation : Predicate<[{ MF->getSubtarget<ARMSubtarget>().hardenSlsBlr() }]>;
+ def NoSLSBLRMitigation : Predicate<[{ !MF->getSubtarget<ARMSubtarget>().hardenSlsBlr() }]>;
+
}
def UseMulOps : Predicate<"Subtarget->useMulOps()">;
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMRegisterBankInfo.cpp b/contrib/libs/llvm12/lib/Target/ARM/ARMRegisterBankInfo.cpp
index 1a7f10a13e..eb905282dc 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMRegisterBankInfo.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMRegisterBankInfo.cpp
@@ -156,10 +156,10 @@ ARMRegisterBankInfo::ARMRegisterBankInfo(const TargetRegisterInfo &TRI)
"Subclass not added?");
assert(RBGPR.covers(*TRI.getRegClass(ARM::tcGPRRegClassID)) &&
"Subclass not added?");
- assert(RBGPR.covers(*TRI.getRegClass(ARM::GPRnoip_and_tcGPRRegClassID)) &&
+ assert(RBGPR.covers(*TRI.getRegClass(ARM::GPRnoip_and_tcGPRRegClassID)) &&
"Subclass not added?");
- assert(RBGPR.covers(*TRI.getRegClass(
- ARM::tGPREven_and_GPRnoip_and_tcGPRRegClassID)) &&
+ assert(RBGPR.covers(*TRI.getRegClass(
+ ARM::tGPREven_and_GPRnoip_and_tcGPRRegClassID)) &&
"Subclass not added?");
assert(RBGPR.covers(*TRI.getRegClass(ARM::tGPROdd_and_tcGPRRegClassID)) &&
"Subclass not added?");
@@ -182,12 +182,12 @@ ARMRegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
switch (RC.getID()) {
case GPRRegClassID:
case GPRwithAPSRRegClassID:
- case GPRnoipRegClassID:
+ case GPRnoipRegClassID:
case GPRnopcRegClassID:
- case GPRnoip_and_GPRnopcRegClassID:
+ case GPRnoip_and_GPRnopcRegClassID:
case rGPRRegClassID:
case GPRspRegClassID:
- case GPRnoip_and_tcGPRRegClassID:
+ case GPRnoip_and_tcGPRRegClassID:
case tcGPRRegClassID:
case tGPRRegClassID:
case tGPREvenRegClassID:
@@ -195,7 +195,7 @@ ARMRegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
case tGPR_and_tGPREvenRegClassID:
case tGPR_and_tGPROddRegClassID:
case tGPREven_and_tcGPRRegClassID:
- case tGPREven_and_GPRnoip_and_tcGPRRegClassID:
+ case tGPREven_and_GPRnoip_and_tcGPRRegClassID:
case tGPROdd_and_tcGPRRegClassID:
return getRegBank(ARM::GPRRegBankID);
case HPRRegClassID:
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMRegisterInfo.td b/contrib/libs/llvm12/lib/Target/ARM/ARMRegisterInfo.td
index fe3243315d..8ac3e3c402 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMRegisterInfo.td
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMRegisterInfo.td
@@ -235,23 +235,23 @@ def GPR : RegisterClass<"ARM", [i32], 32, (add (sequence "R%u", 0, 12),
let DiagnosticString = "operand must be a register in range [r0, r15]";
}
-// Register set that excludes registers that are reserved for procedure calls.
-// This is used for pseudo-instructions that are actually implemented using a
-// procedure call.
-def GPRnoip : RegisterClass<"ARM", [i32], 32, (sub GPR, R12, LR)> {
- // Allocate LR as the first CSR since it is always saved anyway.
- // For Thumb1 mode, we don't want to allocate hi regs at all, as we don't
- // know how to spill them. If we make our prologue/epilogue code smarter at
- // some point, we can go back to using the above allocation orders for the
- // Thumb1 instructions that know how to use hi regs.
- let AltOrders = [(add GPRnoip, GPRnoip), (trunc GPRnoip, 8),
- (add (trunc GPRnoip, 8), (shl GPRnoip, 8))];
- let AltOrderSelect = [{
- return MF.getSubtarget<ARMSubtarget>().getGPRAllocationOrder(MF);
- }];
- let DiagnosticString = "operand must be a register in range [r0, r14]";
-}
-
+// Register set that excludes registers that are reserved for procedure calls.
+// This is used for pseudo-instructions that are actually implemented using a
+// procedure call.
+def GPRnoip : RegisterClass<"ARM", [i32], 32, (sub GPR, R12, LR)> {
+ // Allocate LR as the first CSR since it is always saved anyway.
+ // For Thumb1 mode, we don't want to allocate hi regs at all, as we don't
+ // know how to spill them. If we make our prologue/epilogue code smarter at
+ // some point, we can go back to using the above allocation orders for the
+ // Thumb1 instructions that know how to use hi regs.
+ let AltOrders = [(add GPRnoip, GPRnoip), (trunc GPRnoip, 8),
+ (add (trunc GPRnoip, 8), (shl GPRnoip, 8))];
+ let AltOrderSelect = [{
+ return MF.getSubtarget<ARMSubtarget>().getGPRAllocationOrder(MF);
+ }];
+ let DiagnosticString = "operand must be a register in range [r0, r14]";
+}
+
// GPRs without the PC. Some ARM instructions do not allow the PC in
// certain operand slots, particularly as the destination. Primarily
// useful for disassembly.
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMSLSHardening.cpp b/contrib/libs/llvm12/lib/Target/ARM/ARMSLSHardening.cpp
index cfcc7d5a04..de2cd45c14 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMSLSHardening.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMSLSHardening.cpp
@@ -1,416 +1,416 @@
-//===- ARMSLSHardening.cpp - Harden Straight Line Missspeculation ---------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains a pass to insert code to mitigate against side channel
-// vulnerabilities that may happen under straight line miss-speculation.
-//
-//===----------------------------------------------------------------------===//
-
-#include "ARM.h"
-#include "ARMInstrInfo.h"
-#include "ARMSubtarget.h"
-#include "llvm/CodeGen/IndirectThunks.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/IR/DebugLoc.h"
-#include <cassert>
-
-using namespace llvm;
-
-#define DEBUG_TYPE "arm-sls-hardening"
-
-#define ARM_SLS_HARDENING_NAME "ARM sls hardening pass"
-
-namespace {
-
-class ARMSLSHardening : public MachineFunctionPass {
-public:
- const TargetInstrInfo *TII;
- const ARMSubtarget *ST;
-
- static char ID;
-
- ARMSLSHardening() : MachineFunctionPass(ID) {
- initializeARMSLSHardeningPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnMachineFunction(MachineFunction &Fn) override;
-
- StringRef getPassName() const override { return ARM_SLS_HARDENING_NAME; }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-
-private:
- bool hardenReturnsAndBRs(MachineBasicBlock &MBB) const;
- bool hardenIndirectCalls(MachineBasicBlock &MBB) const;
- MachineBasicBlock &
- ConvertIndirectCallToIndirectJump(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator) const;
-};
-
-} // end anonymous namespace
-
-char ARMSLSHardening::ID = 0;
-
-INITIALIZE_PASS(ARMSLSHardening, "arm-sls-hardening",
- ARM_SLS_HARDENING_NAME, false, false)
-
-static void insertSpeculationBarrier(const ARMSubtarget *ST,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- DebugLoc DL,
- bool AlwaysUseISBDSB = false) {
- assert(MBBI != MBB.begin() &&
- "Must not insert SpeculationBarrierEndBB as only instruction in MBB.");
- assert(std::prev(MBBI)->isBarrier() &&
- "SpeculationBarrierEndBB must only follow unconditional control flow "
- "instructions.");
- assert(std::prev(MBBI)->isTerminator() &&
- "SpeculationBarrierEndBB must only follow terminators.");
- const TargetInstrInfo *TII = ST->getInstrInfo();
- assert(ST->hasDataBarrier() || ST->hasSB());
- bool ProduceSB = ST->hasSB() && !AlwaysUseISBDSB;
- unsigned BarrierOpc =
- ProduceSB ? (ST->isThumb() ? ARM::t2SpeculationBarrierSBEndBB
- : ARM::SpeculationBarrierSBEndBB)
- : (ST->isThumb() ? ARM::t2SpeculationBarrierISBDSBEndBB
- : ARM::SpeculationBarrierISBDSBEndBB);
- if (MBBI == MBB.end() || !isSpeculationBarrierEndBBOpcode(MBBI->getOpcode()))
- BuildMI(MBB, MBBI, DL, TII->get(BarrierOpc));
-}
-
-bool ARMSLSHardening::runOnMachineFunction(MachineFunction &MF) {
- ST = &MF.getSubtarget<ARMSubtarget>();
- TII = MF.getSubtarget().getInstrInfo();
-
- bool Modified = false;
- for (auto &MBB : MF) {
- Modified |= hardenReturnsAndBRs(MBB);
- Modified |= hardenIndirectCalls(MBB);
- }
-
- return Modified;
-}
-
-bool ARMSLSHardening::hardenReturnsAndBRs(MachineBasicBlock &MBB) const {
- if (!ST->hardenSlsRetBr())
- return false;
- assert(!ST->isThumb1Only());
- bool Modified = false;
- MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(), E = MBB.end();
- MachineBasicBlock::iterator NextMBBI;
- for (; MBBI != E; MBBI = NextMBBI) {
- MachineInstr &MI = *MBBI;
- NextMBBI = std::next(MBBI);
- if (isIndirectControlFlowNotComingBack(MI)) {
- assert(MI.isTerminator());
- assert(!TII->isPredicated(MI));
- insertSpeculationBarrier(ST, MBB, std::next(MBBI), MI.getDebugLoc());
- Modified = true;
- }
- }
- return Modified;
-}
-
-static const char SLSBLRNamePrefix[] = "__llvm_slsblr_thunk_";
-
-static const struct ThunkNameRegMode {
- const char* Name;
- Register Reg;
- bool isThumb;
-} SLSBLRThunks[] = {
- {"__llvm_slsblr_thunk_arm_r0", ARM::R0, false},
- {"__llvm_slsblr_thunk_arm_r1", ARM::R1, false},
- {"__llvm_slsblr_thunk_arm_r2", ARM::R2, false},
- {"__llvm_slsblr_thunk_arm_r3", ARM::R3, false},
- {"__llvm_slsblr_thunk_arm_r4", ARM::R4, false},
- {"__llvm_slsblr_thunk_arm_r5", ARM::R5, false},
- {"__llvm_slsblr_thunk_arm_r6", ARM::R6, false},
- {"__llvm_slsblr_thunk_arm_r7", ARM::R7, false},
- {"__llvm_slsblr_thunk_arm_r8", ARM::R8, false},
- {"__llvm_slsblr_thunk_arm_r9", ARM::R9, false},
- {"__llvm_slsblr_thunk_arm_r10", ARM::R10, false},
- {"__llvm_slsblr_thunk_arm_r11", ARM::R11, false},
- {"__llvm_slsblr_thunk_arm_sp", ARM::SP, false},
- {"__llvm_slsblr_thunk_arm_pc", ARM::PC, false},
- {"__llvm_slsblr_thunk_thumb_r0", ARM::R0, true},
- {"__llvm_slsblr_thunk_thumb_r1", ARM::R1, true},
- {"__llvm_slsblr_thunk_thumb_r2", ARM::R2, true},
- {"__llvm_slsblr_thunk_thumb_r3", ARM::R3, true},
- {"__llvm_slsblr_thunk_thumb_r4", ARM::R4, true},
- {"__llvm_slsblr_thunk_thumb_r5", ARM::R5, true},
- {"__llvm_slsblr_thunk_thumb_r6", ARM::R6, true},
- {"__llvm_slsblr_thunk_thumb_r7", ARM::R7, true},
- {"__llvm_slsblr_thunk_thumb_r8", ARM::R8, true},
- {"__llvm_slsblr_thunk_thumb_r9", ARM::R9, true},
- {"__llvm_slsblr_thunk_thumb_r10", ARM::R10, true},
- {"__llvm_slsblr_thunk_thumb_r11", ARM::R11, true},
- {"__llvm_slsblr_thunk_thumb_sp", ARM::SP, true},
- {"__llvm_slsblr_thunk_thumb_pc", ARM::PC, true},
-};
-
-namespace {
-struct SLSBLRThunkInserter : ThunkInserter<SLSBLRThunkInserter> {
- const char *getThunkPrefix() { return SLSBLRNamePrefix; }
- bool mayUseThunk(const MachineFunction &MF) {
- // FIXME: This could also check if there are any indirect calls in the
- // function to more accurately reflect if a thunk will be needed.
- return MF.getSubtarget<ARMSubtarget>().hardenSlsBlr();
- }
- void insertThunks(MachineModuleInfo &MMI);
- void populateThunk(MachineFunction &MF);
-};
-} // namespace
-
-void SLSBLRThunkInserter::insertThunks(MachineModuleInfo &MMI) {
- // FIXME: It probably would be possible to filter which thunks to produce
- // based on which registers are actually used in indirect calls in this
- // function. But would that be a worthwhile optimization?
- for (auto T : SLSBLRThunks)
- createThunkFunction(MMI, T.Name);
-}
-
-void SLSBLRThunkInserter::populateThunk(MachineFunction &MF) {
- // FIXME: How to better communicate Register number, rather than through
- // name and lookup table?
- assert(MF.getName().startswith(getThunkPrefix()));
- auto ThunkIt = llvm::find_if(
- SLSBLRThunks, [&MF](auto T) { return T.Name == MF.getName(); });
- assert(ThunkIt != std::end(SLSBLRThunks));
- Register ThunkReg = ThunkIt->Reg;
- bool isThumb = ThunkIt->isThumb;
-
- const TargetInstrInfo *TII = MF.getSubtarget<ARMSubtarget>().getInstrInfo();
- MachineBasicBlock *Entry = &MF.front();
- Entry->clear();
-
- // These thunks need to consist of the following instructions:
- // __llvm_slsblr_thunk_(arm/thumb)_rN:
- // bx rN
- // barrierInsts
- Entry->addLiveIn(ThunkReg);
- if (isThumb)
- BuildMI(Entry, DebugLoc(), TII->get(ARM::tBX))
- .addReg(ThunkReg)
- .add(predOps(ARMCC::AL));
- else
- BuildMI(Entry, DebugLoc(), TII->get(ARM::BX))
- .addReg(ThunkReg);
-
- // Make sure the thunks do not make use of the SB extension in case there is
- // a function somewhere that will call to it that for some reason disabled
- // the SB extension locally on that function, even though it's enabled for
- // the module otherwise. Therefore set AlwaysUseISBSDB to true.
- insertSpeculationBarrier(&MF.getSubtarget<ARMSubtarget>(), *Entry,
- Entry->end(), DebugLoc(), true /*AlwaysUseISBDSB*/);
-}
-
-MachineBasicBlock &ARMSLSHardening::ConvertIndirectCallToIndirectJump(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
- // Transform an indirect call to an indirect jump as follows:
- // Before:
- // |-----------------------------|
- // | ... |
- // | instI |
- // | BLX rN |
- // | instJ |
- // | ... |
- // |-----------------------------|
- //
- // After:
- // |---------- -------------------------|
- // | ... |
- // | instI |
- // | *call* __llvm_slsblr_thunk_mode_xN |
- // | instJ |
- // | ... |
- // |--------------------------------------|
- //
- // __llvm_slsblr_thunk_mode_xN:
- // |-----------------------------|
- // | BX rN |
- // | barrierInsts |
- // |-----------------------------|
- //
- // The __llvm_slsblr_thunk_mode_xN thunks are created by the
- // SLSBLRThunkInserter.
- // This function merely needs to transform an indirect call to a direct call
- // to __llvm_slsblr_thunk_xN.
- MachineInstr &IndirectCall = *MBBI;
- assert(isIndirectCall(IndirectCall) && !IndirectCall.isReturn());
- int RegOpIdxOnIndirectCall = -1;
- bool isThumb;
- switch (IndirectCall.getOpcode()) {
- case ARM::BLX: // !isThumb2
- case ARM::BLX_noip: // !isThumb2
- isThumb = false;
- RegOpIdxOnIndirectCall = 0;
- break;
- case ARM::tBLXr: // isThumb2
- case ARM::tBLXr_noip: // isThumb2
- isThumb = true;
- RegOpIdxOnIndirectCall = 2;
- break;
- default:
- llvm_unreachable("unhandled Indirect Call");
- }
-
- Register Reg = IndirectCall.getOperand(RegOpIdxOnIndirectCall).getReg();
- // Since linkers are allowed to clobber R12 on function calls, the above
- // mitigation only works if the original indirect call instruction was not
- // using R12. Code generation before must make sure that no indirect call
- // using R12 was produced if the mitigation is enabled.
- // Also, the transformation is incorrect if the indirect call uses LR, so
- // also have to avoid that.
- assert(Reg != ARM::R12 && Reg != ARM::LR);
- bool RegIsKilled = IndirectCall.getOperand(RegOpIdxOnIndirectCall).isKill();
-
- DebugLoc DL = IndirectCall.getDebugLoc();
-
- MachineFunction &MF = *MBBI->getMF();
- auto ThunkIt = llvm::find_if(SLSBLRThunks, [Reg, isThumb](auto T) {
- return T.Reg == Reg && T.isThumb == isThumb;
- });
- assert(ThunkIt != std::end(SLSBLRThunks));
- Module *M = MF.getFunction().getParent();
- const GlobalValue *GV = cast<GlobalValue>(M->getNamedValue(ThunkIt->Name));
-
- MachineInstr *BL =
- isThumb ? BuildMI(MBB, MBBI, DL, TII->get(ARM::tBL))
- .addImm(IndirectCall.getOperand(0).getImm())
- .addReg(IndirectCall.getOperand(1).getReg())
- .addGlobalAddress(GV)
- : BuildMI(MBB, MBBI, DL, TII->get(ARM::BL)).addGlobalAddress(GV);
-
- // Now copy the implicit operands from IndirectCall to BL and copy other
- // necessary info.
- // However, both IndirectCall and BL instructions implictly use SP and
- // implicitly define LR. Blindly copying implicit operands would result in SP
- // and LR operands to be present multiple times. While this may not be too
- // much of an issue, let's avoid that for cleanliness, by removing those
- // implicit operands from the BL created above before we copy over all
- // implicit operands from the IndirectCall.
- int ImpLROpIdx = -1;
- int ImpSPOpIdx = -1;
- for (unsigned OpIdx = BL->getNumExplicitOperands();
- OpIdx < BL->getNumOperands(); OpIdx++) {
- MachineOperand Op = BL->getOperand(OpIdx);
- if (!Op.isReg())
- continue;
- if (Op.getReg() == ARM::LR && Op.isDef())
- ImpLROpIdx = OpIdx;
- if (Op.getReg() == ARM::SP && !Op.isDef())
- ImpSPOpIdx = OpIdx;
- }
- assert(ImpLROpIdx != -1);
- assert(ImpSPOpIdx != -1);
- int FirstOpIdxToRemove = std::max(ImpLROpIdx, ImpSPOpIdx);
- int SecondOpIdxToRemove = std::min(ImpLROpIdx, ImpSPOpIdx);
- BL->RemoveOperand(FirstOpIdxToRemove);
- BL->RemoveOperand(SecondOpIdxToRemove);
- // Now copy over the implicit operands from the original IndirectCall
- BL->copyImplicitOps(MF, IndirectCall);
- MF.moveCallSiteInfo(&IndirectCall, BL);
- // Also add the register called in the IndirectCall as being used in the
- // called thunk.
- BL->addOperand(MachineOperand::CreateReg(Reg, false /*isDef*/, true /*isImp*/,
- RegIsKilled /*isKill*/));
- // Remove IndirectCallinstruction
- MBB.erase(MBBI);
- return MBB;
-}
-
-bool ARMSLSHardening::hardenIndirectCalls(MachineBasicBlock &MBB) const {
- if (!ST->hardenSlsBlr())
- return false;
- bool Modified = false;
- MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
- MachineBasicBlock::iterator NextMBBI;
- for (; MBBI != E; MBBI = NextMBBI) {
- MachineInstr &MI = *MBBI;
- NextMBBI = std::next(MBBI);
- // Tail calls are both indirect calls and "returns".
- // They are also indirect jumps, so should be handled by sls-harden-retbr,
- // rather than sls-harden-blr.
- if (isIndirectCall(MI) && !MI.isReturn()) {
- ConvertIndirectCallToIndirectJump(MBB, MBBI);
- Modified = true;
- }
- }
- return Modified;
-}
-
-
-
-FunctionPass *llvm::createARMSLSHardeningPass() {
- return new ARMSLSHardening();
-}
-
-namespace {
-class ARMIndirectThunks : public MachineFunctionPass {
-public:
- static char ID;
-
- ARMIndirectThunks() : MachineFunctionPass(ID) {}
-
- StringRef getPassName() const override { return "ARM Indirect Thunks"; }
-
- bool doInitialization(Module &M) override;
- bool runOnMachineFunction(MachineFunction &MF) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- MachineFunctionPass::getAnalysisUsage(AU);
- AU.addRequired<MachineModuleInfoWrapperPass>();
- AU.addPreserved<MachineModuleInfoWrapperPass>();
- }
-
-private:
- std::tuple<SLSBLRThunkInserter> TIs;
-
- // FIXME: When LLVM moves to C++17, these can become folds
- template <typename... ThunkInserterT>
- static void initTIs(Module &M,
- std::tuple<ThunkInserterT...> &ThunkInserters) {
- (void)std::initializer_list<int>{
- (std::get<ThunkInserterT>(ThunkInserters).init(M), 0)...};
- }
- template <typename... ThunkInserterT>
- static bool runTIs(MachineModuleInfo &MMI, MachineFunction &MF,
- std::tuple<ThunkInserterT...> &ThunkInserters) {
- bool Modified = false;
- (void)std::initializer_list<int>{
- Modified |= std::get<ThunkInserterT>(ThunkInserters).run(MMI, MF)...};
- return Modified;
- }
-};
-
-} // end anonymous namespace
-
-char ARMIndirectThunks::ID = 0;
-
-FunctionPass *llvm::createARMIndirectThunks() {
- return new ARMIndirectThunks();
-}
-
-bool ARMIndirectThunks::doInitialization(Module &M) {
- initTIs(M, TIs);
- return false;
-}
-
-bool ARMIndirectThunks::runOnMachineFunction(MachineFunction &MF) {
- LLVM_DEBUG(dbgs() << getPassName() << '\n');
- auto &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
- return runTIs(MMI, MF, TIs);
-}
+//===- ARMSLSHardening.cpp - Harden Straight Line Missspeculation ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass to insert code to mitigate against side channel
+// vulnerabilities that may happen under straight line miss-speculation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMInstrInfo.h"
+#include "ARMSubtarget.h"
+#include "llvm/CodeGen/IndirectThunks.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/IR/DebugLoc.h"
+#include <cassert>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "arm-sls-hardening"
+
+#define ARM_SLS_HARDENING_NAME "ARM sls hardening pass"
+
+namespace {
+
+class ARMSLSHardening : public MachineFunctionPass {
+public:
+ const TargetInstrInfo *TII;
+ const ARMSubtarget *ST;
+
+ static char ID;
+
+ ARMSLSHardening() : MachineFunctionPass(ID) {
+ initializeARMSLSHardeningPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &Fn) override;
+
+ StringRef getPassName() const override { return ARM_SLS_HARDENING_NAME; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+private:
+ bool hardenReturnsAndBRs(MachineBasicBlock &MBB) const;
+ bool hardenIndirectCalls(MachineBasicBlock &MBB) const;
+ MachineBasicBlock &
+ ConvertIndirectCallToIndirectJump(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator) const;
+};
+
+} // end anonymous namespace
+
+char ARMSLSHardening::ID = 0;
+
+INITIALIZE_PASS(ARMSLSHardening, "arm-sls-hardening",
+ ARM_SLS_HARDENING_NAME, false, false)
+
+static void insertSpeculationBarrier(const ARMSubtarget *ST,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ DebugLoc DL,
+ bool AlwaysUseISBDSB = false) {
+ assert(MBBI != MBB.begin() &&
+ "Must not insert SpeculationBarrierEndBB as only instruction in MBB.");
+ assert(std::prev(MBBI)->isBarrier() &&
+ "SpeculationBarrierEndBB must only follow unconditional control flow "
+ "instructions.");
+ assert(std::prev(MBBI)->isTerminator() &&
+ "SpeculationBarrierEndBB must only follow terminators.");
+ const TargetInstrInfo *TII = ST->getInstrInfo();
+ assert(ST->hasDataBarrier() || ST->hasSB());
+ bool ProduceSB = ST->hasSB() && !AlwaysUseISBDSB;
+ unsigned BarrierOpc =
+ ProduceSB ? (ST->isThumb() ? ARM::t2SpeculationBarrierSBEndBB
+ : ARM::SpeculationBarrierSBEndBB)
+ : (ST->isThumb() ? ARM::t2SpeculationBarrierISBDSBEndBB
+ : ARM::SpeculationBarrierISBDSBEndBB);
+ if (MBBI == MBB.end() || !isSpeculationBarrierEndBBOpcode(MBBI->getOpcode()))
+ BuildMI(MBB, MBBI, DL, TII->get(BarrierOpc));
+}
+
+bool ARMSLSHardening::runOnMachineFunction(MachineFunction &MF) {
+ ST = &MF.getSubtarget<ARMSubtarget>();
+ TII = MF.getSubtarget().getInstrInfo();
+
+ bool Modified = false;
+ for (auto &MBB : MF) {
+ Modified |= hardenReturnsAndBRs(MBB);
+ Modified |= hardenIndirectCalls(MBB);
+ }
+
+ return Modified;
+}
+
+bool ARMSLSHardening::hardenReturnsAndBRs(MachineBasicBlock &MBB) const {
+ if (!ST->hardenSlsRetBr())
+ return false;
+ assert(!ST->isThumb1Only());
+ bool Modified = false;
+ MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(), E = MBB.end();
+ MachineBasicBlock::iterator NextMBBI;
+ for (; MBBI != E; MBBI = NextMBBI) {
+ MachineInstr &MI = *MBBI;
+ NextMBBI = std::next(MBBI);
+ if (isIndirectControlFlowNotComingBack(MI)) {
+ assert(MI.isTerminator());
+ assert(!TII->isPredicated(MI));
+ insertSpeculationBarrier(ST, MBB, std::next(MBBI), MI.getDebugLoc());
+ Modified = true;
+ }
+ }
+ return Modified;
+}
+
+static const char SLSBLRNamePrefix[] = "__llvm_slsblr_thunk_";
+
+static const struct ThunkNameRegMode {
+ const char* Name;
+ Register Reg;
+ bool isThumb;
+} SLSBLRThunks[] = {
+ {"__llvm_slsblr_thunk_arm_r0", ARM::R0, false},
+ {"__llvm_slsblr_thunk_arm_r1", ARM::R1, false},
+ {"__llvm_slsblr_thunk_arm_r2", ARM::R2, false},
+ {"__llvm_slsblr_thunk_arm_r3", ARM::R3, false},
+ {"__llvm_slsblr_thunk_arm_r4", ARM::R4, false},
+ {"__llvm_slsblr_thunk_arm_r5", ARM::R5, false},
+ {"__llvm_slsblr_thunk_arm_r6", ARM::R6, false},
+ {"__llvm_slsblr_thunk_arm_r7", ARM::R7, false},
+ {"__llvm_slsblr_thunk_arm_r8", ARM::R8, false},
+ {"__llvm_slsblr_thunk_arm_r9", ARM::R9, false},
+ {"__llvm_slsblr_thunk_arm_r10", ARM::R10, false},
+ {"__llvm_slsblr_thunk_arm_r11", ARM::R11, false},
+ {"__llvm_slsblr_thunk_arm_sp", ARM::SP, false},
+ {"__llvm_slsblr_thunk_arm_pc", ARM::PC, false},
+ {"__llvm_slsblr_thunk_thumb_r0", ARM::R0, true},
+ {"__llvm_slsblr_thunk_thumb_r1", ARM::R1, true},
+ {"__llvm_slsblr_thunk_thumb_r2", ARM::R2, true},
+ {"__llvm_slsblr_thunk_thumb_r3", ARM::R3, true},
+ {"__llvm_slsblr_thunk_thumb_r4", ARM::R4, true},
+ {"__llvm_slsblr_thunk_thumb_r5", ARM::R5, true},
+ {"__llvm_slsblr_thunk_thumb_r6", ARM::R6, true},
+ {"__llvm_slsblr_thunk_thumb_r7", ARM::R7, true},
+ {"__llvm_slsblr_thunk_thumb_r8", ARM::R8, true},
+ {"__llvm_slsblr_thunk_thumb_r9", ARM::R9, true},
+ {"__llvm_slsblr_thunk_thumb_r10", ARM::R10, true},
+ {"__llvm_slsblr_thunk_thumb_r11", ARM::R11, true},
+ {"__llvm_slsblr_thunk_thumb_sp", ARM::SP, true},
+ {"__llvm_slsblr_thunk_thumb_pc", ARM::PC, true},
+};
+
+namespace {
+struct SLSBLRThunkInserter : ThunkInserter<SLSBLRThunkInserter> {
+ const char *getThunkPrefix() { return SLSBLRNamePrefix; }
+ bool mayUseThunk(const MachineFunction &MF) {
+ // FIXME: This could also check if there are any indirect calls in the
+ // function to more accurately reflect if a thunk will be needed.
+ return MF.getSubtarget<ARMSubtarget>().hardenSlsBlr();
+ }
+ void insertThunks(MachineModuleInfo &MMI);
+ void populateThunk(MachineFunction &MF);
+};
+} // namespace
+
+void SLSBLRThunkInserter::insertThunks(MachineModuleInfo &MMI) {
+ // FIXME: It probably would be possible to filter which thunks to produce
+ // based on which registers are actually used in indirect calls in this
+ // function. But would that be a worthwhile optimization?
+ for (auto T : SLSBLRThunks)
+ createThunkFunction(MMI, T.Name);
+}
+
+void SLSBLRThunkInserter::populateThunk(MachineFunction &MF) {
+ // FIXME: How to better communicate Register number, rather than through
+ // name and lookup table?
+ assert(MF.getName().startswith(getThunkPrefix()));
+ auto ThunkIt = llvm::find_if(
+ SLSBLRThunks, [&MF](auto T) { return T.Name == MF.getName(); });
+ assert(ThunkIt != std::end(SLSBLRThunks));
+ Register ThunkReg = ThunkIt->Reg;
+ bool isThumb = ThunkIt->isThumb;
+
+ const TargetInstrInfo *TII = MF.getSubtarget<ARMSubtarget>().getInstrInfo();
+ MachineBasicBlock *Entry = &MF.front();
+ Entry->clear();
+
+ // These thunks need to consist of the following instructions:
+ // __llvm_slsblr_thunk_(arm/thumb)_rN:
+ // bx rN
+ // barrierInsts
+ Entry->addLiveIn(ThunkReg);
+ if (isThumb)
+ BuildMI(Entry, DebugLoc(), TII->get(ARM::tBX))
+ .addReg(ThunkReg)
+ .add(predOps(ARMCC::AL));
+ else
+ BuildMI(Entry, DebugLoc(), TII->get(ARM::BX))
+ .addReg(ThunkReg);
+
+ // Make sure the thunks do not make use of the SB extension in case there is
+ // a function somewhere that will call to it that for some reason disabled
+ // the SB extension locally on that function, even though it's enabled for
+ // the module otherwise. Therefore set AlwaysUseISBSDB to true.
+ insertSpeculationBarrier(&MF.getSubtarget<ARMSubtarget>(), *Entry,
+ Entry->end(), DebugLoc(), true /*AlwaysUseISBDSB*/);
+}
+
+MachineBasicBlock &ARMSLSHardening::ConvertIndirectCallToIndirectJump(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
+ // Transform an indirect call to an indirect jump as follows:
+ // Before:
+ // |-----------------------------|
+ // | ... |
+ // | instI |
+ // | BLX rN |
+ // | instJ |
+ // | ... |
+ // |-----------------------------|
+ //
+ // After:
+ // |---------- -------------------------|
+ // | ... |
+ // | instI |
+ // | *call* __llvm_slsblr_thunk_mode_xN |
+ // | instJ |
+ // | ... |
+ // |--------------------------------------|
+ //
+ // __llvm_slsblr_thunk_mode_xN:
+ // |-----------------------------|
+ // | BX rN |
+ // | barrierInsts |
+ // |-----------------------------|
+ //
+ // The __llvm_slsblr_thunk_mode_xN thunks are created by the
+ // SLSBLRThunkInserter.
+ // This function merely needs to transform an indirect call to a direct call
+ // to __llvm_slsblr_thunk_xN.
+ MachineInstr &IndirectCall = *MBBI;
+ assert(isIndirectCall(IndirectCall) && !IndirectCall.isReturn());
+ int RegOpIdxOnIndirectCall = -1;
+ bool isThumb;
+ switch (IndirectCall.getOpcode()) {
+ case ARM::BLX: // !isThumb2
+ case ARM::BLX_noip: // !isThumb2
+ isThumb = false;
+ RegOpIdxOnIndirectCall = 0;
+ break;
+ case ARM::tBLXr: // isThumb2
+ case ARM::tBLXr_noip: // isThumb2
+ isThumb = true;
+ RegOpIdxOnIndirectCall = 2;
+ break;
+ default:
+ llvm_unreachable("unhandled Indirect Call");
+ }
+
+ Register Reg = IndirectCall.getOperand(RegOpIdxOnIndirectCall).getReg();
+ // Since linkers are allowed to clobber R12 on function calls, the above
+ // mitigation only works if the original indirect call instruction was not
+ // using R12. Code generation before must make sure that no indirect call
+ // using R12 was produced if the mitigation is enabled.
+ // Also, the transformation is incorrect if the indirect call uses LR, so
+ // also have to avoid that.
+ assert(Reg != ARM::R12 && Reg != ARM::LR);
+ bool RegIsKilled = IndirectCall.getOperand(RegOpIdxOnIndirectCall).isKill();
+
+ DebugLoc DL = IndirectCall.getDebugLoc();
+
+ MachineFunction &MF = *MBBI->getMF();
+ auto ThunkIt = llvm::find_if(SLSBLRThunks, [Reg, isThumb](auto T) {
+ return T.Reg == Reg && T.isThumb == isThumb;
+ });
+ assert(ThunkIt != std::end(SLSBLRThunks));
+ Module *M = MF.getFunction().getParent();
+ const GlobalValue *GV = cast<GlobalValue>(M->getNamedValue(ThunkIt->Name));
+
+ MachineInstr *BL =
+ isThumb ? BuildMI(MBB, MBBI, DL, TII->get(ARM::tBL))
+ .addImm(IndirectCall.getOperand(0).getImm())
+ .addReg(IndirectCall.getOperand(1).getReg())
+ .addGlobalAddress(GV)
+ : BuildMI(MBB, MBBI, DL, TII->get(ARM::BL)).addGlobalAddress(GV);
+
+ // Now copy the implicit operands from IndirectCall to BL and copy other
+ // necessary info.
+ // However, both IndirectCall and BL instructions implictly use SP and
+ // implicitly define LR. Blindly copying implicit operands would result in SP
+ // and LR operands to be present multiple times. While this may not be too
+ // much of an issue, let's avoid that for cleanliness, by removing those
+ // implicit operands from the BL created above before we copy over all
+ // implicit operands from the IndirectCall.
+ int ImpLROpIdx = -1;
+ int ImpSPOpIdx = -1;
+ for (unsigned OpIdx = BL->getNumExplicitOperands();
+ OpIdx < BL->getNumOperands(); OpIdx++) {
+ MachineOperand Op = BL->getOperand(OpIdx);
+ if (!Op.isReg())
+ continue;
+ if (Op.getReg() == ARM::LR && Op.isDef())
+ ImpLROpIdx = OpIdx;
+ if (Op.getReg() == ARM::SP && !Op.isDef())
+ ImpSPOpIdx = OpIdx;
+ }
+ assert(ImpLROpIdx != -1);
+ assert(ImpSPOpIdx != -1);
+ int FirstOpIdxToRemove = std::max(ImpLROpIdx, ImpSPOpIdx);
+ int SecondOpIdxToRemove = std::min(ImpLROpIdx, ImpSPOpIdx);
+ BL->RemoveOperand(FirstOpIdxToRemove);
+ BL->RemoveOperand(SecondOpIdxToRemove);
+ // Now copy over the implicit operands from the original IndirectCall
+ BL->copyImplicitOps(MF, IndirectCall);
+ MF.moveCallSiteInfo(&IndirectCall, BL);
+ // Also add the register called in the IndirectCall as being used in the
+ // called thunk.
+ BL->addOperand(MachineOperand::CreateReg(Reg, false /*isDef*/, true /*isImp*/,
+ RegIsKilled /*isKill*/));
+ // Remove IndirectCallinstruction
+ MBB.erase(MBBI);
+ return MBB;
+}
+
+bool ARMSLSHardening::hardenIndirectCalls(MachineBasicBlock &MBB) const {
+ if (!ST->hardenSlsBlr())
+ return false;
+ bool Modified = false;
+ MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+ MachineBasicBlock::iterator NextMBBI;
+ for (; MBBI != E; MBBI = NextMBBI) {
+ MachineInstr &MI = *MBBI;
+ NextMBBI = std::next(MBBI);
+ // Tail calls are both indirect calls and "returns".
+ // They are also indirect jumps, so should be handled by sls-harden-retbr,
+ // rather than sls-harden-blr.
+ if (isIndirectCall(MI) && !MI.isReturn()) {
+ ConvertIndirectCallToIndirectJump(MBB, MBBI);
+ Modified = true;
+ }
+ }
+ return Modified;
+}
+
+
+
+FunctionPass *llvm::createARMSLSHardeningPass() {
+ return new ARMSLSHardening();
+}
+
+namespace {
+class ARMIndirectThunks : public MachineFunctionPass {
+public:
+ static char ID;
+
+ ARMIndirectThunks() : MachineFunctionPass(ID) {}
+
+ StringRef getPassName() const override { return "ARM Indirect Thunks"; }
+
+ bool doInitialization(Module &M) override;
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<MachineModuleInfoWrapperPass>();
+ AU.addPreserved<MachineModuleInfoWrapperPass>();
+ }
+
+private:
+ std::tuple<SLSBLRThunkInserter> TIs;
+
+ // FIXME: When LLVM moves to C++17, these can become folds
+ template <typename... ThunkInserterT>
+ static void initTIs(Module &M,
+ std::tuple<ThunkInserterT...> &ThunkInserters) {
+ (void)std::initializer_list<int>{
+ (std::get<ThunkInserterT>(ThunkInserters).init(M), 0)...};
+ }
+ template <typename... ThunkInserterT>
+ static bool runTIs(MachineModuleInfo &MMI, MachineFunction &MF,
+ std::tuple<ThunkInserterT...> &ThunkInserters) {
+ bool Modified = false;
+ (void)std::initializer_list<int>{
+ Modified |= std::get<ThunkInserterT>(ThunkInserters).run(MMI, MF)...};
+ return Modified;
+ }
+};
+
+} // end anonymous namespace
+
+char ARMIndirectThunks::ID = 0;
+
+FunctionPass *llvm::createARMIndirectThunks() {
+ return new ARMIndirectThunks();
+}
+
+bool ARMIndirectThunks::doInitialization(Module &M) {
+ initTIs(M, TIs);
+ return false;
+}
+
+bool ARMIndirectThunks::runOnMachineFunction(MachineFunction &MF) {
+ LLVM_DEBUG(dbgs() << getPassName() << '\n');
+ auto &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
+ return runTIs(MMI, MF, TIs);
+}
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMSchedule.td b/contrib/libs/llvm12/lib/Target/ARM/ARMSchedule.td
index 53a2a6fec5..503a0fbd96 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMSchedule.td
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMSchedule.td
@@ -151,61 +151,61 @@ def : PredicateProlog<[{
(void)STI;
}]>;
-def IsPredicated : CheckFunctionPredicateWithTII<
- "ARM_MC::isPredicated",
- "isPredicated"
->;
-def IsPredicatedPred : MCSchedPredicate<IsPredicated>;
-
-def IsCPSRDefined : CheckFunctionPredicateWithTII<
- "ARM_MC::isCPSRDefined",
- "ARMBaseInstrInfo::isCPSRDefined"
->;
-
-def IsCPSRDefinedPred : MCSchedPredicate<IsCPSRDefined>;
-
-let FunctionMapper = "ARM_AM::getAM2ShiftOpc" in {
- class CheckAM2NoShift<int n> : CheckImmOperand_s<n, "ARM_AM::no_shift">;
- class CheckAM2ShiftLSL<int n> : CheckImmOperand_s<n, "ARM_AM::lsl">;
-}
-
-let FunctionMapper = "ARM_AM::getAM2Op" in {
- class CheckAM2OpAdd<int n> : CheckImmOperand_s<n, "ARM_AM::add"> {}
- class CheckAM2OpSub<int n> : CheckImmOperand_s<n, "ARM_AM::sub"> {}
-}
-
-let FunctionMapper = "ARM_AM::getAM2Offset" in {
- class CheckAM2Offset<int n, int of> : CheckImmOperand<n, of> {}
-}
-
-def IsLDMBaseRegInList : CheckFunctionPredicate<
- "ARM_MC::isLDMBaseRegInList", "ARM_MC::isLDMBaseRegInList"
->;
-
-let FunctionMapper = "ARM_AM::getAM3Op" in {
- class CheckAM3OpSub<int n> : CheckImmOperand_s<n, "ARM_AM::sub"> {}
-}
-
-// LDM, base reg in list
-def IsLDMBaseRegInListPred : MCSchedPredicate<IsLDMBaseRegInList>;
-
-class IsRegPCPred<int n> : MCSchedPredicate<CheckRegOperand<n, PC>>;
-
-class BranchWriteRes<int lat, int uops, list<ProcResourceKind> resl,
- list<int> rcl, SchedWriteRes wr> :
- SchedWriteRes<!listconcat(wr.ProcResources, resl)> {
- let Latency = !add(wr.Latency, lat);
- let ResourceCycles = !listconcat(wr.ResourceCycles, rcl);
- let NumMicroOps = !add(wr.NumMicroOps, uops);
- SchedWriteRes BaseWr = wr;
-}
-
-class CheckBranchForm<int n, BranchWriteRes br> :
- SchedWriteVariant<[
- SchedVar<IsRegPCPred<n>, [br]>,
- SchedVar<NoSchedPred, [br.BaseWr]>
- ]>;
+def IsPredicated : CheckFunctionPredicateWithTII<
+ "ARM_MC::isPredicated",
+ "isPredicated"
+>;
+def IsPredicatedPred : MCSchedPredicate<IsPredicated>;
+def IsCPSRDefined : CheckFunctionPredicateWithTII<
+ "ARM_MC::isCPSRDefined",
+ "ARMBaseInstrInfo::isCPSRDefined"
+>;
+
+def IsCPSRDefinedPred : MCSchedPredicate<IsCPSRDefined>;
+
+let FunctionMapper = "ARM_AM::getAM2ShiftOpc" in {
+ class CheckAM2NoShift<int n> : CheckImmOperand_s<n, "ARM_AM::no_shift">;
+ class CheckAM2ShiftLSL<int n> : CheckImmOperand_s<n, "ARM_AM::lsl">;
+}
+
+let FunctionMapper = "ARM_AM::getAM2Op" in {
+ class CheckAM2OpAdd<int n> : CheckImmOperand_s<n, "ARM_AM::add"> {}
+ class CheckAM2OpSub<int n> : CheckImmOperand_s<n, "ARM_AM::sub"> {}
+}
+
+let FunctionMapper = "ARM_AM::getAM2Offset" in {
+ class CheckAM2Offset<int n, int of> : CheckImmOperand<n, of> {}
+}
+
+def IsLDMBaseRegInList : CheckFunctionPredicate<
+ "ARM_MC::isLDMBaseRegInList", "ARM_MC::isLDMBaseRegInList"
+>;
+
+let FunctionMapper = "ARM_AM::getAM3Op" in {
+ class CheckAM3OpSub<int n> : CheckImmOperand_s<n, "ARM_AM::sub"> {}
+}
+
+// LDM, base reg in list
+def IsLDMBaseRegInListPred : MCSchedPredicate<IsLDMBaseRegInList>;
+
+class IsRegPCPred<int n> : MCSchedPredicate<CheckRegOperand<n, PC>>;
+
+class BranchWriteRes<int lat, int uops, list<ProcResourceKind> resl,
+ list<int> rcl, SchedWriteRes wr> :
+ SchedWriteRes<!listconcat(wr.ProcResources, resl)> {
+ let Latency = !add(wr.Latency, lat);
+ let ResourceCycles = !listconcat(wr.ResourceCycles, rcl);
+ let NumMicroOps = !add(wr.NumMicroOps, uops);
+ SchedWriteRes BaseWr = wr;
+}
+
+class CheckBranchForm<int n, BranchWriteRes br> :
+ SchedWriteVariant<[
+ SchedVar<IsRegPCPred<n>, [br]>,
+ SchedVar<NoSchedPred, [br.BaseWr]>
+ ]>;
+
//===----------------------------------------------------------------------===//
// Instruction Itinerary classes used for ARM
//
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMScheduleA57.td b/contrib/libs/llvm12/lib/Target/ARM/ARMScheduleA57.td
index 0c610a4839..fe8c220db4 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMScheduleA57.td
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMScheduleA57.td
@@ -21,47 +21,47 @@
// Therefore, IssueWidth is set to the narrower of the two at three, while still
// modeling the machine as out-of-order.
-def IsCPSRDefinedAndPredicated : CheckAll<[IsCPSRDefined, IsPredicated]>;
+def IsCPSRDefinedAndPredicated : CheckAll<[IsCPSRDefined, IsPredicated]>;
def IsCPSRDefinedAndPredicatedPred :
- MCSchedPredicate<IsCPSRDefinedAndPredicated>;
+ MCSchedPredicate<IsCPSRDefinedAndPredicated>;
// Cortex A57 rev. r1p0 or later (false = r0px)
-def IsR1P0AndLaterPred : MCSchedPredicate<FalsePred>;
+def IsR1P0AndLaterPred : MCSchedPredicate<FalsePred>;
-def IsLdrAm3RegOffPred : MCSchedPredicate<CheckInvalidRegOperand<2>>;
-def IsLdrAm3RegOffPredX2 : MCSchedPredicate<CheckInvalidRegOperand<3>>;
-def IsLdrAm3RegOffPredX3 : MCSchedPredicate<CheckInvalidRegOperand<4>>;
+def IsLdrAm3RegOffPred : MCSchedPredicate<CheckInvalidRegOperand<2>>;
+def IsLdrAm3RegOffPredX2 : MCSchedPredicate<CheckInvalidRegOperand<3>>;
+def IsLdrAm3RegOffPredX3 : MCSchedPredicate<CheckInvalidRegOperand<4>>;
// If Addrmode3 contains "minus register"
-class Am3NegativeRegOffset<int n> : MCSchedPredicate<CheckAll<[
- CheckValidRegOperand<n>,
- CheckAM3OpSub<!add(n, 1)>]>>;
-
-def IsLdrAm3NegRegOffPred : Am3NegativeRegOffset<2>;
-def IsLdrAm3NegRegOffPredX2 : Am3NegativeRegOffset<3>;
-def IsLdrAm3NegRegOffPredX3 : Am3NegativeRegOffset<4>;
-
+class Am3NegativeRegOffset<int n> : MCSchedPredicate<CheckAll<[
+ CheckValidRegOperand<n>,
+ CheckAM3OpSub<!add(n, 1)>]>>;
+
+def IsLdrAm3NegRegOffPred : Am3NegativeRegOffset<2>;
+def IsLdrAm3NegRegOffPredX2 : Am3NegativeRegOffset<3>;
+def IsLdrAm3NegRegOffPredX3 : Am3NegativeRegOffset<4>;
+
// Load, scaled register offset, not plus LSL2
-class ScaledRegNotPlusLsl2<int n> : CheckNot<
- CheckAny<[
- CheckAM2NoShift<n>,
- CheckAll<[
- CheckAM2OpAdd<n>,
- CheckAM2ShiftLSL<n>,
- CheckAM2Offset<n, 2>
- ]>
- ]>
- >;
-
-def IsLdstsoScaledNotOptimalPredX0 : MCSchedPredicate<ScaledRegNotPlusLsl2<2>>;
-def IsLdstsoScaledNotOptimalPred : MCSchedPredicate<ScaledRegNotPlusLsl2<3>>;
-def IsLdstsoScaledNotOptimalPredX2 : MCSchedPredicate<ScaledRegNotPlusLsl2<4>>;
-
-def IsLdstsoScaledPredX2 : MCSchedPredicate<CheckNot<CheckAM2NoShift<4>>>;
-
-def IsLdstsoMinusRegPredX0 : MCSchedPredicate<CheckAM2OpSub<2>>;
-def IsLdstsoMinusRegPred : MCSchedPredicate<CheckAM2OpSub<3>>;
-def IsLdstsoMinusRegPredX2 : MCSchedPredicate<CheckAM2OpSub<4>>;
+class ScaledRegNotPlusLsl2<int n> : CheckNot<
+ CheckAny<[
+ CheckAM2NoShift<n>,
+ CheckAll<[
+ CheckAM2OpAdd<n>,
+ CheckAM2ShiftLSL<n>,
+ CheckAM2Offset<n, 2>
+ ]>
+ ]>
+ >;
+
+def IsLdstsoScaledNotOptimalPredX0 : MCSchedPredicate<ScaledRegNotPlusLsl2<2>>;
+def IsLdstsoScaledNotOptimalPred : MCSchedPredicate<ScaledRegNotPlusLsl2<3>>;
+def IsLdstsoScaledNotOptimalPredX2 : MCSchedPredicate<ScaledRegNotPlusLsl2<4>>;
+
+def IsLdstsoScaledPredX2 : MCSchedPredicate<CheckNot<CheckAM2NoShift<4>>>;
+
+def IsLdstsoMinusRegPredX0 : MCSchedPredicate<CheckAM2OpSub<2>>;
+def IsLdstsoMinusRegPred : MCSchedPredicate<CheckAM2OpSub<3>>;
+def IsLdstsoMinusRegPredX2 : MCSchedPredicate<CheckAM2OpSub<4>>;
class A57WriteLMOpsListType<list<SchedWriteRes> writes> {
list <SchedWriteRes> Writes = writes;
@@ -173,29 +173,29 @@ def : InstRW<[A57Write_6cyc_1B_1L], (instregex "BR_JTm")>;
def : InstRW<[A57Write_1cyc_1I], (instregex "tADDframe")>;
-// Check branch forms of ALU ops:
-// check reg 0 for ARM_AM::PC
-// if so adds 2 cyc to latency, 1 uop, 1 res cycle for A57UnitB
-class A57BranchForm<SchedWriteRes non_br> :
- BranchWriteRes<2, 1, [A57UnitB], [1], non_br>;
-
+// Check branch forms of ALU ops:
+// check reg 0 for ARM_AM::PC
+// if so adds 2 cyc to latency, 1 uop, 1 res cycle for A57UnitB
+class A57BranchForm<SchedWriteRes non_br> :
+ BranchWriteRes<2, 1, [A57UnitB], [1], non_br>;
+
// shift by register, conditional or unconditional
// TODO: according to the doc, conditional uses I0/I1, unconditional uses M
// Why more complex instruction uses more simple pipeline?
// May be an error in doc.
def A57WriteALUsr : SchedWriteVariant<[
- SchedVar<IsPredicatedPred, [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1I>>]>,
- SchedVar<NoSchedPred, [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1M>>]>
+ SchedVar<IsPredicatedPred, [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1I>>]>,
+ SchedVar<NoSchedPred, [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1M>>]>
]>;
def A57WriteALUSsr : SchedWriteVariant<[
- SchedVar<IsPredicatedPred, [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1I>>]>,
- SchedVar<NoSchedPred, [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1M>>]>
+ SchedVar<IsPredicatedPred, [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1I>>]>,
+ SchedVar<NoSchedPred, [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1M>>]>
]>;
def A57ReadALUsr : SchedReadVariant<[
SchedVar<IsPredicatedPred, [ReadDefault]>,
SchedVar<NoSchedPred, [ReadDefault]>
]>;
-def : SchedAlias<WriteALUsi, CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1M>>>;
+def : SchedAlias<WriteALUsi, CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1M>>>;
def : SchedAlias<WriteALUsr, A57WriteALUsr>;
def : SchedAlias<WriteALUSsr, A57WriteALUSsr>;
def : SchedAlias<ReadALUsr, A57ReadALUsr>;
@@ -271,11 +271,11 @@ def : ReadAdvance<ReadMUL, 0>;
// from similar μops, allowing a typical sequence of multiply-accumulate μops
// to issue one every 1 cycle (sched advance = 2).
def A57WriteMLA : SchedWriteRes<[A57UnitM]> { let Latency = 3; }
-def A57WriteMLAL : SchedWriteVariant<[
- SchedVar<IsCPSRDefinedPred, [A57Write_5cyc_1I_1M]>,
- SchedVar<NoSchedPred, [A57Write_4cyc_1M]>
-]>;
-
+def A57WriteMLAL : SchedWriteVariant<[
+ SchedVar<IsCPSRDefinedPred, [A57Write_5cyc_1I_1M]>,
+ SchedVar<NoSchedPred, [A57Write_4cyc_1M]>
+]>;
+
def A57ReadMLA : SchedReadAdvance<2, [A57WriteMLA, A57WriteMLAL]>;
def : InstRW<[A57WriteMLA],
@@ -470,11 +470,11 @@ def : InstRW<[A57Write_4cyc_1L_1I, A57WrBackTwo], (instregex "LDR_POST_REG",
"LDRB_POST_REG", "LDR(B?)T_POST$")>;
def A57WriteLdrTRegPost : SchedWriteVariant<[
- SchedVar<IsLdstsoScaledPredX2, [A57Write_4cyc_1I_1L_1M]>,
+ SchedVar<IsLdstsoScaledPredX2, [A57Write_4cyc_1I_1L_1M]>,
SchedVar<NoSchedPred, [A57Write_4cyc_1L_1I]>
]>;
def A57WriteLdrTRegPostWrBack : SchedWriteVariant<[
- SchedVar<IsLdstsoScaledPredX2, [A57WrBackThree]>,
+ SchedVar<IsLdstsoScaledPredX2, [A57WrBackThree]>,
SchedVar<NoSchedPred, [A57WrBackTwo]>
]>;
// 4(3) "I0/I1,L,M" for scaled register, otherwise 4(2) "I0/I1,L"
@@ -510,12 +510,12 @@ def : InstRW<[A57WritePLD], (instregex "PLDrs", "PLDWrs")>;
// --- Load multiple instructions ---
foreach NumAddr = 1-8 in {
- def A57LMAddrPred#NumAddr : MCSchedPredicate<CheckAny<[
- CheckNumOperands<!add(!shl(NumAddr, 1), 2)>,
- CheckNumOperands<!add(!shl(NumAddr, 1), 3)>]>>;
- def A57LMAddrUpdPred#NumAddr : MCSchedPredicate<CheckAny<[
- CheckNumOperands<!add(!shl(NumAddr, 1), 3)>,
- CheckNumOperands<!add(!shl(NumAddr, 1), 4)>]>>;
+ def A57LMAddrPred#NumAddr : MCSchedPredicate<CheckAny<[
+ CheckNumOperands<!add(!shl(NumAddr, 1), 2)>,
+ CheckNumOperands<!add(!shl(NumAddr, 1), 3)>]>>;
+ def A57LMAddrUpdPred#NumAddr : MCSchedPredicate<CheckAny<[
+ CheckNumOperands<!add(!shl(NumAddr, 1), 3)>,
+ CheckNumOperands<!add(!shl(NumAddr, 1), 4)>]>>;
}
def A57LDMOpsListNoregin : A57WriteLMOpsListType<
@@ -571,20 +571,20 @@ def A57LDMOpsList_Upd : A57WriteLMOpsListType<
A57Write_9cyc_1L_1I, A57Write_9cyc_1L_1I,
A57Write_10cyc_1L_1I, A57Write_10cyc_1L_1I]>;
def A57WriteLDM_Upd : SchedWriteVariant<[
- SchedVar<A57LMAddrUpdPred1, A57LDMOpsList_Upd.Writes[0-2]>,
- SchedVar<A57LMAddrUpdPred2, A57LDMOpsList_Upd.Writes[0-4]>,
- SchedVar<A57LMAddrUpdPred3, A57LDMOpsList_Upd.Writes[0-6]>,
- SchedVar<A57LMAddrUpdPred4, A57LDMOpsList_Upd.Writes[0-8]>,
- SchedVar<A57LMAddrUpdPred5, A57LDMOpsList_Upd.Writes[0-10]>,
- SchedVar<A57LMAddrUpdPred6, A57LDMOpsList_Upd.Writes[0-12]>,
- SchedVar<A57LMAddrUpdPred7, A57LDMOpsList_Upd.Writes[0-14]>,
- SchedVar<A57LMAddrUpdPred8, A57LDMOpsList_Upd.Writes[0-16]>,
- SchedVar<NoSchedPred, A57LDMOpsList_Upd.Writes[0-16]>
+ SchedVar<A57LMAddrUpdPred1, A57LDMOpsList_Upd.Writes[0-2]>,
+ SchedVar<A57LMAddrUpdPred2, A57LDMOpsList_Upd.Writes[0-4]>,
+ SchedVar<A57LMAddrUpdPred3, A57LDMOpsList_Upd.Writes[0-6]>,
+ SchedVar<A57LMAddrUpdPred4, A57LDMOpsList_Upd.Writes[0-8]>,
+ SchedVar<A57LMAddrUpdPred5, A57LDMOpsList_Upd.Writes[0-10]>,
+ SchedVar<A57LMAddrUpdPred6, A57LDMOpsList_Upd.Writes[0-12]>,
+ SchedVar<A57LMAddrUpdPred7, A57LDMOpsList_Upd.Writes[0-14]>,
+ SchedVar<A57LMAddrUpdPred8, A57LDMOpsList_Upd.Writes[0-16]>,
+ SchedVar<NoSchedPred, A57LDMOpsList_Upd.Writes[0-16]>
]> { let Variadic=1; }
def A57WriteLDM : SchedWriteVariant<[
- SchedVar<IsLDMBaseRegInListPred, [A57WriteLDMreginlist]>,
- SchedVar<NoSchedPred, [A57WriteLDMnoreginlist]>
+ SchedVar<IsLDMBaseRegInListPred, [A57WriteLDMreginlist]>,
+ SchedVar<NoSchedPred, [A57WriteLDMnoreginlist]>
]> { let Variadic=1; }
def : InstRW<[A57WriteLDM], (instregex "(t|t2|sys)?LDM(IA|DA|DB|IB)$")>;
@@ -1194,7 +1194,7 @@ def : InstRW<[A57Write_5cyc_1V], (instregex
// --- 3.16 ASIMD Miscellaneous Instructions ---
// ASIMD bitwise insert
-def : InstRW<[A57Write_3cyc_1V], (instregex "VBIF", "VBIT", "VBSL", "VBSP")>;
+def : InstRW<[A57Write_3cyc_1V], (instregex "VBIF", "VBIT", "VBSL", "VBSP")>;
// ASIMD count
def : InstRW<[A57Write_3cyc_1V], (instregex "VCLS", "VCLZ", "VCNT")>;
@@ -1483,7 +1483,7 @@ def : InstRW<[A57Write_3cyc_1W], (instregex "^(t2)?CRC32")>;
// -----------------------------------------------------------------------------
// Common definitions
def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
-def : SchedAlias<WriteALU, CheckBranchForm<0, A57BranchForm<A57Write_1cyc_1I>>>;
+def : SchedAlias<WriteALU, CheckBranchForm<0, A57BranchForm<A57Write_1cyc_1I>>>;
def : SchedAlias<WriteBr, A57Write_1cyc_1B>;
def : SchedAlias<WriteBrL, A57Write_1cyc_1B_1I>;
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMScheduleA57WriteRes.td b/contrib/libs/llvm12/lib/Target/ARM/ARMScheduleA57WriteRes.td
index 531b10bc5c..3ed917682c 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMScheduleA57WriteRes.td
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMScheduleA57WriteRes.td
@@ -36,16 +36,16 @@ def A57Write_19cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 19;
def A57Write_20cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 20;
let ResourceCycles = [20]; }
def A57Write_1cyc_1B : SchedWriteRes<[A57UnitB]> { let Latency = 1; }
-def A57Write_1cyc_1I : SchedWriteRes<[A57UnitI]> { let Latency = 1;
- let ResourceCycles = [1]; }
-def A57Write_2cyc_1I : SchedWriteRes<[A57UnitI]> { let Latency = 2;
- let ResourceCycles = [1]; }
+def A57Write_1cyc_1I : SchedWriteRes<[A57UnitI]> { let Latency = 1;
+ let ResourceCycles = [1]; }
+def A57Write_2cyc_1I : SchedWriteRes<[A57UnitI]> { let Latency = 2;
+ let ResourceCycles = [1]; }
def A57Write_3cyc_1I : SchedWriteRes<[A57UnitI]> { let Latency = 3; }
def A57Write_1cyc_1S : SchedWriteRes<[A57UnitS]> { let Latency = 1; }
def A57Write_2cyc_1S : SchedWriteRes<[A57UnitS]> { let Latency = 2; }
def A57Write_3cyc_1S : SchedWriteRes<[A57UnitS]> { let Latency = 3; }
-def A57Write_2cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 2;
- let ResourceCycles = [1]; }
+def A57Write_2cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 2;
+ let ResourceCycles = [1]; }
def A57Write_32cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 32;
let ResourceCycles = [32]; }
def A57Write_32cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 32;
@@ -71,7 +71,7 @@ foreach Lat = 4-16 in {
}
}
-def A57Write_4cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 4; }
+def A57Write_4cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 4; }
def A57Write_4cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 4; }
def A57Write_4cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 4; }
def A57Write_5cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 5; }
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMScheduleA9.td b/contrib/libs/llvm12/lib/Target/ARM/ARMScheduleA9.td
index be7017a7b4..dfda6c6b4b 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMScheduleA9.td
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMScheduleA9.td
@@ -2525,8 +2525,8 @@ def : ReadAdvance<ReadFPMAC, 0>;
def : InstRW< [WriteALU],
(instregex "ANDri", "ORRri", "EORri", "BICri", "ANDrr", "ORRrr", "EORrr",
"BICrr")>;
-def : InstRW< [WriteALUsi], (instrs ANDrsi, ORRrsi, EORrsi, BICrsi)>;
-def : InstRW< [WriteALUsr], (instrs ANDrsr, ORRrsr, EORrsr, BICrsr)>;
+def : InstRW< [WriteALUsi], (instrs ANDrsi, ORRrsi, EORrsi, BICrsi)>;
+def : InstRW< [WriteALUsr], (instrs ANDrsr, ORRrsr, EORrsr, BICrsr)>;
def : SchedAlias<WriteCMP, A9WriteALU>;
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMScheduleM7.td b/contrib/libs/llvm12/lib/Target/ARM/ARMScheduleM7.td
index 12296ad092..c5e1d32e8d 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMScheduleM7.td
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMScheduleM7.td
@@ -1,488 +1,488 @@
-//=- ARMScheduleM7.td - ARM Cortex-M7 Scheduling Definitions -*- tablegen -*-=//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the SchedRead/Write data for the ARM Cortex-M7 processor.
-//
-//===----------------------------------------------------------------------===//
-
-def CortexM7Model : SchedMachineModel {
- let IssueWidth = 2; // Dual issue for most instructions.
- let MicroOpBufferSize = 0; // The Cortex-M7 is in-order.
- let LoadLatency = 2; // Best case for load-use case.
- let MispredictPenalty = 4; // Mispredict cost for forward branches is 6,
- // but 4 works better
- let CompleteModel = 0;
-}
-
-//===--------------------------------------------------------------------===//
-// The Cortex-M7 has two ALU, two LOAD, a STORE, a MAC, a BRANCH and a VFP
-// pipe. The stages relevant to scheduling are as follows:
-//
-// EX1: address generation shifts
-// EX2: fast load data ALUs FP operation
-// EX3: slow load data integer writeback FP operation
-// EX4: store data FP writeback
-//
-// There are shifters in both EX1 and EX2, and some instructions can be
-// flexibly allocated between them. EX2 is used as the "zero" point
-// for scheduling, so simple ALU operations executing in EX2 will have
-// ReadAdvance<0> (the default) for their source operands and Latency = 1.
-
-def M7UnitLoad : ProcResource<2> { let BufferSize = 0; }
-def M7UnitStore : ProcResource<1> { let BufferSize = 0; }
-def M7UnitALU : ProcResource<2>;
-def M7UnitShift1 : ProcResource<1> { let BufferSize = 0; }
-def M7UnitShift2 : ProcResource<1> { let BufferSize = 0; }
-def M7UnitMAC : ProcResource<1> { let BufferSize = 0; }
-def M7UnitBranch : ProcResource<1> { let BufferSize = 0; }
-def M7UnitVFP : ProcResource<1> { let BufferSize = 0; }
-def M7UnitVPort : ProcResource<2> { let BufferSize = 0; }
-def M7UnitSIMD : ProcResource<1> { let BufferSize = 0; }
-
-//===---------------------------------------------------------------------===//
-// Subtarget-specific SchedWrite types with map ProcResources and set latency.
-
-let SchedModel = CortexM7Model in {
-
-def : WriteRes<WriteALU, [M7UnitALU]> { let Latency = 1; }
-
-// Basic ALU with shifts.
-let Latency = 1 in {
- def : WriteRes<WriteALUsi, [M7UnitALU, M7UnitShift1]>;
- def : WriteRes<WriteALUsr, [M7UnitALU, M7UnitShift1]>;
- def : WriteRes<WriteALUSsr, [M7UnitALU, M7UnitShift1]>;
-}
-
-// Compares.
-def : WriteRes<WriteCMP, [M7UnitALU]> { let Latency = 1; }
-def : WriteRes<WriteCMPsi, [M7UnitALU, M7UnitShift1]> { let Latency = 2; }
-def : WriteRes<WriteCMPsr, [M7UnitALU, M7UnitShift1]> { let Latency = 2; }
-
-// Multiplies.
-let Latency = 2 in {
- def : WriteRes<WriteMUL16, [M7UnitMAC]>;
- def : WriteRes<WriteMUL32, [M7UnitMAC]>;
- def : WriteRes<WriteMUL64Lo, [M7UnitMAC]>;
- def : WriteRes<WriteMUL64Hi, []> { let NumMicroOps = 0; }
-}
-
-// Multiply-accumulates.
-let Latency = 2 in {
- def : WriteRes<WriteMAC16, [M7UnitMAC]>;
- def : WriteRes<WriteMAC32, [M7UnitMAC]>;
- def : WriteRes<WriteMAC64Lo, [M7UnitMAC]> { let Latency = 2; }
- def : WriteRes<WriteMAC64Hi, []> { let NumMicroOps = 0; }
-}
-
-// Divisions.
-// These cannot be dual-issued with any instructions.
-def : WriteRes<WriteDIV, [M7UnitALU]> {
- let Latency = 7;
- let SingleIssue = 1;
-}
-
-// Loads/Stores.
-def : WriteRes<WriteLd, [M7UnitLoad]> { let Latency = 1; }
-def : WriteRes<WritePreLd, [M7UnitLoad]> { let Latency = 2; }
-def : WriteRes<WriteST, [M7UnitStore]> { let Latency = 2; }
-
-// Branches.
-def : WriteRes<WriteBr, [M7UnitBranch]> { let Latency = 2; }
-def : WriteRes<WriteBrL, [M7UnitBranch]> { let Latency = 2; }
-def : WriteRes<WriteBrTbl, [M7UnitBranch]> { let Latency = 2; }
-
-// Noop.
-def : WriteRes<WriteNoop, []> { let Latency = 0; }
-
-//===---------------------------------------------------------------------===//
-// Sched definitions for floating-point instructions
-//
-// Floating point conversions.
-def : WriteRes<WriteFPCVT, [M7UnitVFP, M7UnitVPort]> { let Latency = 3; }
-def : WriteRes<WriteFPMOV, [M7UnitVPort]> { let Latency = 3; }
-
-// The FP pipeline has a latency of 3 cycles.
-// ALU operations (32/64-bit). These go down the FP pipeline.
-def : WriteRes<WriteFPALU32, [M7UnitVFP, M7UnitVPort]> { let Latency = 3; }
-def : WriteRes<WriteFPALU64, [M7UnitVFP, M7UnitVPort, M7UnitVPort]> {
- let Latency = 4;
- let BeginGroup = 1;
-}
-
-// Multiplication
-def : WriteRes<WriteFPMUL32, [M7UnitVFP, M7UnitVPort]> { let Latency = 3; }
-def : WriteRes<WriteFPMUL64, [M7UnitVFP, M7UnitVPort, M7UnitVPort]> {
- let Latency = 7;
- let BeginGroup = 1;
-}
-
-// Multiply-accumulate. FPMAC goes down the FP Pipeline.
-def : WriteRes<WriteFPMAC32, [M7UnitVFP, M7UnitVPort]> { let Latency = 6; }
-def : WriteRes<WriteFPMAC64, [M7UnitVFP, M7UnitVPort, M7UnitVPort]> {
- let Latency = 11;
- let BeginGroup = 1;
-}
-
-// Division. Effective scheduling latency is 3, though real latency is larger
-def : WriteRes<WriteFPDIV32, [M7UnitVFP, M7UnitVPort]> { let Latency = 16; }
-def : WriteRes<WriteFPDIV64, [M7UnitVFP, M7UnitVPort, M7UnitVPort]> {
- let Latency = 30;
- let BeginGroup = 1;
-}
-
-// Square-root. Effective scheduling latency is 3; real latency is larger
-def : WriteRes<WriteFPSQRT32, [M7UnitVFP, M7UnitVPort]> { let Latency = 16; }
-def : WriteRes<WriteFPSQRT64, [M7UnitVFP, M7UnitVPort, M7UnitVPort]> {
- let Latency = 30;
- let BeginGroup = 1;
-}
-
-def M7WriteShift2 : SchedWriteRes<[M7UnitALU, M7UnitShift2]> {}
-
-// Not used for M7, but needing definitions anyway
-def : WriteRes<WriteVLD1, []>;
-def : WriteRes<WriteVLD2, []>;
-def : WriteRes<WriteVLD3, []>;
-def : WriteRes<WriteVLD4, []>;
-def : WriteRes<WriteVST1, []>;
-def : WriteRes<WriteVST2, []>;
-def : WriteRes<WriteVST3, []>;
-def : WriteRes<WriteVST4, []>;
-
-def M7SingleIssue : SchedWriteRes<[]> {
- let SingleIssue = 1;
- let NumMicroOps = 0;
-}
-def M7Slot0Only : SchedWriteRes<[]> {
- let BeginGroup = 1;
- let NumMicroOps = 0;
-}
-
-// What pipeline stage operands need to be ready for depending on
-// where they come from.
-def : ReadAdvance<ReadALUsr, 0>;
-def : ReadAdvance<ReadMUL, 0>;
-def : ReadAdvance<ReadMAC, 1>;
-def : ReadAdvance<ReadALU, 0>;
-def : ReadAdvance<ReadFPMUL, 0>;
-def : ReadAdvance<ReadFPMAC, 3>;
-def M7Read_ISS : SchedReadAdvance<-1>; // operands needed at EX1
-def M7Read_EX2 : SchedReadAdvance<1>; // operands needed at EX3
-def M7Read_EX3 : SchedReadAdvance<2>; // operands needed at EX4
-
-// Non general purpose instructions may not be dual issued. These
-// use both issue units.
-def M7NonGeneralPurpose : SchedWriteRes<[]> {
- // Assume that these will go down the main ALU pipeline.
- // In reality, many look likely to stall the whole pipeline.
- let Latency = 3;
- let SingleIssue = 1;
-}
-
-// List the non general purpose instructions.
-def : InstRW<[M7NonGeneralPurpose], (instregex "t2MRS", "tSVC", "tBKPT",
- "t2MSR", "t2DMB", "t2DSB", "t2ISB",
- "t2HVC", "t2SMC", "t2UDF", "ERET",
- "tHINT", "t2HINT", "t2CLREX", "BUNDLE")>;
-
-//===---------------------------------------------------------------------===//
-// Sched definitions for load/store
-//
-// Mark whether the loads/stores must be single-issue
-// Address operands are needed earlier
-// Data operands are needed later
-
-def M7BaseUpdate : SchedWriteRes<[]> {
- let Latency = 0; // Update is bypassable out of EX1
- let NumMicroOps = 0;
-}
-def M7LoadLatency1 : SchedWriteRes<[]> {
- let Latency = 1;
- let NumMicroOps = 0;
-}
-def M7SlowLoad : SchedWriteRes<[M7UnitLoad]> { let Latency = 2; }
-
-// Byte and half-word loads should have greater latency than other loads.
-// So should load exclusive.
-
-def : InstRW<[M7SlowLoad],
- (instregex "t2LDR(B|H|SB|SH)pc")>;
-def : InstRW<[M7SlowLoad, M7Read_ISS],
- (instregex "t2LDR(B|H|SB|SH)T", "t2LDR(B|H|SB|SH)i",
- "tLDR(B|H)i")>;
-def : InstRW<[M7SlowLoad, M7Read_ISS, M7Read_ISS],
- (instregex "t2LDR(B|H|SB|SH)s", "tLDR(B|H)r", "tLDR(SB|SH)")>;
-def : InstRW<[M7SlowLoad, M7BaseUpdate, M7Read_ISS],
- (instregex "t2LDR(B|H|SB|SH)_(POST|PRE)")>;
-
-// Exclusive loads/stores cannot be dual-issued
-def : InstRW<[WriteLd, M7Slot0Only, M7Read_ISS],
- (instregex "t2LDREX$")>;
-def : InstRW<[M7SlowLoad, M7Slot0Only, M7Read_ISS],
- (instregex "t2LDREX(B|H)")>;
-def : InstRW<[WriteST, M7SingleIssue, M7Read_EX2, M7Read_ISS],
- (instregex "t2STREX(B|H)?$")>;
-
-// Load/store multiples cannot be dual-issued. Note that default scheduling
-// occurs around read/write times of individual registers in the list; read
-// time for STM cannot be overridden because it is a variadic source operand.
-
-def : InstRW<[WriteLd, M7SingleIssue, M7Read_ISS],
- (instregex "(t|t2)LDM(DB|IA)$")>;
-def : InstRW<[WriteST, M7SingleIssue, M7Read_ISS],
- (instregex "(t|t2)STM(DB|IA)$")>;
-def : InstRW<[M7BaseUpdate, WriteLd, M7SingleIssue, M7Read_ISS],
- (instregex "(t|t2)LDM(DB|IA)_UPD$", "tPOP")>;
-def : InstRW<[M7BaseUpdate, WriteST, M7SingleIssue, M7Read_ISS],
- (instregex "(t|t2)STM(DB|IA)_UPD$", "tPUSH")>;
-
-// Load/store doubles cannot be dual-issued.
-
-def : InstRW<[M7BaseUpdate, WriteST, M7SingleIssue,
- M7Read_EX2, M7Read_EX2, M7Read_ISS],
- (instregex "t2STRD_(PRE|POST)")>;
-def : InstRW<[WriteST, M7SingleIssue, M7Read_EX2, M7Read_EX2, M7Read_ISS],
- (instregex "t2STRDi")>;
-def : InstRW<[WriteLd, M7LoadLatency1, M7SingleIssue, M7BaseUpdate, M7Read_ISS],
- (instregex "t2LDRD_(PRE|POST)")>;
-def : InstRW<[WriteLd, M7LoadLatency1, M7SingleIssue, M7Read_ISS],
- (instregex "t2LDRDi")>;
-
-// Word load / preload
-def : InstRW<[WriteLd],
- (instregex "t2LDRpc", "t2PL[DI]pci", "tLDRpci")>;
-def : InstRW<[WriteLd, M7Read_ISS],
- (instregex "t2LDR(i|T)", "t2PL[DI](W)?i", "tLDRi", "tLDRspi")>;
-def : InstRW<[WriteLd, M7Read_ISS, M7Read_ISS],
- (instregex "t2LDRs", "t2PL[DI](w)?s", "tLDRr")>;
-def : InstRW<[WriteLd, M7BaseUpdate, M7Read_ISS],
- (instregex "t2LDR_(POST|PRE)")>;
-
-// Stores
-def : InstRW<[M7BaseUpdate, WriteST, M7Read_EX2, M7Read_ISS],
- (instregex "t2STR(B|H)?_(POST|PRE)")>;
-def : InstRW<[WriteST, M7Read_EX2, M7Read_ISS, M7Read_ISS],
- (instregex "t2STR(B|H)?s$", "tSTR(B|H)?r$")>;
-def : InstRW<[WriteST, M7Read_EX2, M7Read_ISS],
- (instregex "t2STR(B|H)?(i|T)", "tSTR(B|H)?i$", "tSTRspi")>;
-
-// TBB/TBH - single-issue only; takes two cycles to issue
-
-def M7TableLoad : SchedWriteRes<[M7UnitLoad]> {
- let NumMicroOps = 2;
- let SingleIssue = 1;
-}
-
-def : InstRW<[M7TableLoad, M7Read_ISS, M7Read_ISS], (instregex "t2TB")>;
-
-// VFP loads and stores
-
-def M7LoadSP : SchedWriteRes<[M7UnitLoad, M7UnitVPort]> { let Latency = 1; }
-def M7LoadDP : SchedWriteRes<[M7UnitLoad, M7UnitVPort, M7UnitVPort]> {
- let Latency = 2;
- let SingleIssue = 1;
-}
-def M7StoreSP : SchedWriteRes<[M7UnitStore, M7UnitVPort]>;
-def M7StoreDP : SchedWriteRes<[M7UnitStore, M7UnitVPort, M7UnitVPort]> {
- let SingleIssue = 1;
-}
-
-def : InstRW<[M7LoadSP, M7Read_ISS], (instregex "VLDR(S|H)$")>;
-def : InstRW<[M7LoadDP, M7Read_ISS], (instregex "VLDRD$")>;
-def : InstRW<[M7StoreSP, M7Read_EX3, M7Read_ISS], (instregex "VSTR(S|H)$")>;
-def : InstRW<[M7StoreDP, M7Read_EX3, M7Read_ISS], (instregex "VSTRD$")>;
-
-// Load/store multiples cannot be dual-issued.
-
-def : InstRW<[WriteLd, M7SingleIssue, M7Read_ISS],
- (instregex "VLDM(S|D|Q)(DB|IA)$")>;
-def : InstRW<[WriteST, M7SingleIssue, M7Read_ISS],
- (instregex "VSTM(S|D|Q)(DB|IA)$")>;
-def : InstRW<[M7BaseUpdate, WriteLd, M7SingleIssue, M7Read_ISS],
- (instregex "VLDM(S|D|Q)(DB|IA)_UPD$")>;
-def : InstRW<[M7BaseUpdate, WriteST, M7SingleIssue, M7Read_ISS],
- (instregex "VSTM(S|D|Q)(DB|IA)_UPD$")>;
-
-//===---------------------------------------------------------------------===//
-// Sched definitions for ALU
-//
-
-// Shifted ALU operands are read a cycle early.
-def M7Ex1ReadNoFastBypass : SchedReadAdvance<-1, [WriteLd, M7LoadLatency1]>;
-
-def : InstRW<[WriteALUsi, M7Ex1ReadNoFastBypass, M7Read_ISS],
- (instregex "t2(ADC|ADDS|ADD|BIC|EOR|ORN|ORR|RSBS|RSB|SBC|SUBS)rs$",
- "t2(SUB|CMP|CMNz|TEQ|TST)rs$",
- "t2MOVsr(a|l)")>;
-def : InstRW<[WriteALUsi, M7Read_ISS],
- (instregex "t2MVNs")>;
-
-// Treat pure shift operations (except for RRX) as if they used the EX1
-// shifter but have timing as if they used the EX2 shifter as they usually
-// can choose the EX2 shifter when needed. Will miss a few dual-issue cases,
-// but the results prove to be better than trying to get them exact.
-
-def : InstRW<[M7WriteShift2, M7Read_ISS], (instregex "t2RRX$")>;
-def : InstRW<[WriteALUsi], (instregex "(t|t2)(LSL|LSR|ASR|ROR)")>;
-
-// Instructions that use the shifter, but have normal timing.
-
-def : InstRW<[WriteALUsi,M7Slot0Only], (instregex "t2(BFC|BFI)$")>;
-
-// Instructions which are slot zero only but otherwise normal.
-
-def : InstRW<[WriteALU, M7Slot0Only], (instregex "t2CLZ")>;
-
-// MAC operations that don't have SchedRW set.
-
-def : InstRW<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC], (instregex "t2SML[AS]D")>;
-
-// Divides are special because they stall for their latency, and so look like a
-// single-cycle as far as scheduling opportunities go. By putting WriteALU
-// first, we make the operand latency 1, but keep the instruction latency 7.
-
-def : InstRW<[WriteALU, WriteDIV], (instregex "t2(S|U)DIV")>;
-
-// DSP extension operations
-
-def M7WriteSIMD1 : SchedWriteRes<[M7UnitSIMD, M7UnitALU]> {
- let Latency = 1;
- let BeginGroup = 1;
-}
-def M7WriteSIMD2 : SchedWriteRes<[M7UnitSIMD, M7UnitALU]> {
- let Latency = 2;
- let BeginGroup = 1;
-}
-def M7WriteShSIMD1 : SchedWriteRes<[M7UnitSIMD, M7UnitALU, M7UnitShift1]> {
- let Latency = 1;
- let BeginGroup = 1;
-}
-def M7WriteShSIMD0 : SchedWriteRes<[M7UnitSIMD, M7UnitALU, M7UnitShift1]> {
- let Latency = 0; // Bypassable out of EX1
- let BeginGroup = 1;
-}
-def M7WriteShSIMD2 : SchedWriteRes<[M7UnitSIMD, M7UnitALU, M7UnitShift1]> {
- let Latency = 2;
- let BeginGroup = 1;
-}
-
-def : InstRW<[M7WriteShSIMD2, M7Read_ISS],
- (instregex "t2(S|U)SAT")>;
-def : InstRW<[M7WriteSIMD1, ReadALU],
- (instregex "(t|t2)(S|U)XT(B|H)")>;
-def : InstRW<[M7WriteSIMD1, ReadALU, ReadALU],
- (instregex "t2(S|SH|U|UH)(ADD16|ADD8|ASX|SAX|SUB16|SUB8)",
- "t2SEL")>;
-def : InstRW<[M7WriteSIMD2, ReadALU, ReadALU],
- (instregex "t2(Q|UQ)(ADD|ASX|SAX|SUB)", "t2USAD8")>;
-def : InstRW<[M7WriteShSIMD2, M7Read_ISS, M7Read_ISS],
- (instregex "t2QD(ADD|SUB)")>;
-def : InstRW<[M7WriteShSIMD0, M7Read_ISS],
- (instregex "t2(RBIT|REV)", "tREV")>;
-def : InstRW<[M7WriteShSIMD1, M7Read_ISS],
- (instregex "t2(SBFX|UBFX)")>;
-def : InstRW<[M7WriteShSIMD1, ReadALU, M7Read_ISS],
- (instregex "t2PKH(BT|TB)", "t2(S|U)XTA")>;
-def : InstRW<[M7WriteSIMD2, ReadALU, ReadALU, M7Read_EX2],
- (instregex "t2USADA8")>;
-
-// MSR/MRS
-def : InstRW<[M7NonGeneralPurpose], (instregex "MSR", "MRS")>;
-
-//===---------------------------------------------------------------------===//
-// Sched definitions for FP operations
-//
-
-// Effective scheduling latency is really 3 for nearly all FP operations,
-// even if their true latency is higher.
-def M7WriteVFPLatOverride : SchedWriteRes<[]> {
- let Latency = 3;
- let NumMicroOps = 0;
-}
-def M7WriteVFPExtraVPort : SchedWriteRes<[M7UnitVPort]> {
- let Latency = 3;
- let NumMicroOps = 0;
-}
-
-// Instructions which are missing default schedules.
-def : InstRW<[WriteFPALU32],
- (instregex "V(ABS|CVT.*|NEG|FP_VMAX.*|FP_VMIN.*|RINT.*)S$")>;
-def : InstRW<[M7WriteVFPLatOverride, WriteFPALU64],
- (instregex "V(ABS|CVT.*|NEG|FP_VMAX.*|FP_VMIN.*|RINT.*)D$")>;
-
-// VCMP
-def M7WriteVCMPS : SchedWriteRes<[M7UnitVFP, M7UnitVPort]> { let Latency = 0; }
-def M7WriteVCMPD : SchedWriteRes<[M7UnitVFP, M7UnitVPort, M7UnitVPort]> {
- let Latency = 0;
- let BeginGroup = 1;
-}
-def : InstRW<[M7WriteVCMPS], (instregex "VCMPS$")>;
-def : InstRW<[M7WriteVCMPD], (instregex "VCMPD$")>;
-
- // VMRS/VMSR
-def M7VMRS : SchedWriteRes<[M7UnitVFP, M7UnitVPort]> { let SingleIssue = 1; }
-def M7VMSR : SchedWriteRes<[M7UnitVFP, M7UnitVPort]> { let SingleIssue = 1; }
-def : InstRW<[M7VMRS], (instregex "FMSTAT")>;
-def : InstRW<[M7VMSR], (instregex "VMSR")>;
-
-// VSEL cannot bypass in its implied $cpsr operand; model as earlier read
-def : InstRW<[WriteFPALU32, M7Slot0Only, ReadALU, ReadALU, M7Read_ISS],
- (instregex "VSEL.*S$")>;
-def : InstRW<[M7WriteVFPLatOverride, WriteFPALU64, M7Slot0Only,
- ReadALU, ReadALU, M7Read_ISS],
- (instregex "VSEL.*D$")>;
-
-// VMOV
-def : InstRW<[WriteFPMOV],
- (instregex "VMOV(H|S)$", "FCONST(H|S)")>;
-def : InstRW<[WriteFPMOV, M7WriteVFPExtraVPort, M7Slot0Only],
- (instregex "VMOVD$")>;
-def : InstRW<[WriteFPMOV, M7WriteVFPExtraVPort, M7Slot0Only],
- (instregex "FCONSTD")>;
-def : InstRW<[WriteFPMOV, M7WriteVFPExtraVPort, M7SingleIssue],
- (instregex "VMOV(DRR|RRD|RRS|SRR)")>;
-
-// Larger-latency overrides.
-
-def : InstRW<[M7WriteVFPLatOverride, WriteFPDIV32], (instregex "VDIVS")>;
-def : InstRW<[M7WriteVFPLatOverride, WriteFPDIV64], (instregex "VDIVD")>;
-def : InstRW<[M7WriteVFPLatOverride, WriteFPSQRT32], (instregex "VSQRTS")>;
-def : InstRW<[M7WriteVFPLatOverride, WriteFPSQRT64], (instregex "VSQRTD")>;
-def : InstRW<[M7WriteVFPLatOverride, WriteFPMUL64],
- (instregex "V(MUL|NMUL)D")>;
-def : InstRW<[M7WriteVFPLatOverride, WriteFPALU64],
- (instregex "V(ADD|SUB)D")>;
-
-// Multiply-accumulate. Chained SP timing is correct; rest need overrides
-// Double-precision chained MAC stalls the pipeline behind it for 3 cycles,
-// making it appear to have 3 cycle latency for scheduling.
-
-def : InstRW<[M7WriteVFPLatOverride, WriteFPMAC64,
- ReadFPMAC, ReadFPMUL, ReadFPMUL],
- (instregex "V(N)?ML(A|S)D$")>;
-
-// Single-precision fused MACs look like latency 5 with advance of 2.
-
-def M7WriteVFPLatOverride5 : SchedWriteRes<[]> {
- let Latency = 5;
- let NumMicroOps = 0;
-}
-def M7ReadFPMAC2 : SchedReadAdvance<2>;
-
-def : InstRW<[M7WriteVFPLatOverride5, WriteFPMAC32,
- M7ReadFPMAC2, ReadFPMUL, ReadFPMUL],
- (instregex "VF(N)?M(A|S)S$")>;
-
-// Double-precision fused MAC stalls the pipeline behind it for 2 cycles, making
-// it appear to have 3 cycle latency for scheduling.
-
-def : InstRW<[M7WriteVFPLatOverride, WriteFPMAC64,
- ReadFPMAC, ReadFPMUL, ReadFPMUL],
- (instregex "VF(N)?M(A|S)D$")>;
-
-} // SchedModel = CortexM7Model
+//=- ARMScheduleM7.td - ARM Cortex-M7 Scheduling Definitions -*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the SchedRead/Write data for the ARM Cortex-M7 processor.
+//
+//===----------------------------------------------------------------------===//
+
+def CortexM7Model : SchedMachineModel {
+ let IssueWidth = 2; // Dual issue for most instructions.
+ let MicroOpBufferSize = 0; // The Cortex-M7 is in-order.
+ let LoadLatency = 2; // Best case for load-use case.
+ let MispredictPenalty = 4; // Mispredict cost for forward branches is 6,
+ // but 4 works better
+ let CompleteModel = 0;
+}
+
+//===--------------------------------------------------------------------===//
+// The Cortex-M7 has two ALU, two LOAD, a STORE, a MAC, a BRANCH and a VFP
+// pipe. The stages relevant to scheduling are as follows:
+//
+// EX1: address generation shifts
+// EX2: fast load data ALUs FP operation
+// EX3: slow load data integer writeback FP operation
+// EX4: store data FP writeback
+//
+// There are shifters in both EX1 and EX2, and some instructions can be
+// flexibly allocated between them. EX2 is used as the "zero" point
+// for scheduling, so simple ALU operations executing in EX2 will have
+// ReadAdvance<0> (the default) for their source operands and Latency = 1.
+
+def M7UnitLoad : ProcResource<2> { let BufferSize = 0; }
+def M7UnitStore : ProcResource<1> { let BufferSize = 0; }
+def M7UnitALU : ProcResource<2>;
+def M7UnitShift1 : ProcResource<1> { let BufferSize = 0; }
+def M7UnitShift2 : ProcResource<1> { let BufferSize = 0; }
+def M7UnitMAC : ProcResource<1> { let BufferSize = 0; }
+def M7UnitBranch : ProcResource<1> { let BufferSize = 0; }
+def M7UnitVFP : ProcResource<1> { let BufferSize = 0; }
+def M7UnitVPort : ProcResource<2> { let BufferSize = 0; }
+def M7UnitSIMD : ProcResource<1> { let BufferSize = 0; }
+
+//===---------------------------------------------------------------------===//
+// Subtarget-specific SchedWrite types with map ProcResources and set latency.
+
+let SchedModel = CortexM7Model in {
+
+def : WriteRes<WriteALU, [M7UnitALU]> { let Latency = 1; }
+
+// Basic ALU with shifts.
+let Latency = 1 in {
+ def : WriteRes<WriteALUsi, [M7UnitALU, M7UnitShift1]>;
+ def : WriteRes<WriteALUsr, [M7UnitALU, M7UnitShift1]>;
+ def : WriteRes<WriteALUSsr, [M7UnitALU, M7UnitShift1]>;
+}
+
+// Compares.
+def : WriteRes<WriteCMP, [M7UnitALU]> { let Latency = 1; }
+def : WriteRes<WriteCMPsi, [M7UnitALU, M7UnitShift1]> { let Latency = 2; }
+def : WriteRes<WriteCMPsr, [M7UnitALU, M7UnitShift1]> { let Latency = 2; }
+
+// Multiplies.
+let Latency = 2 in {
+ def : WriteRes<WriteMUL16, [M7UnitMAC]>;
+ def : WriteRes<WriteMUL32, [M7UnitMAC]>;
+ def : WriteRes<WriteMUL64Lo, [M7UnitMAC]>;
+ def : WriteRes<WriteMUL64Hi, []> { let NumMicroOps = 0; }
+}
+
+// Multiply-accumulates.
+let Latency = 2 in {
+ def : WriteRes<WriteMAC16, [M7UnitMAC]>;
+ def : WriteRes<WriteMAC32, [M7UnitMAC]>;
+ def : WriteRes<WriteMAC64Lo, [M7UnitMAC]> { let Latency = 2; }
+ def : WriteRes<WriteMAC64Hi, []> { let NumMicroOps = 0; }
+}
+
+// Divisions.
+// These cannot be dual-issued with any instructions.
+def : WriteRes<WriteDIV, [M7UnitALU]> {
+ let Latency = 7;
+ let SingleIssue = 1;
+}
+
+// Loads/Stores.
+def : WriteRes<WriteLd, [M7UnitLoad]> { let Latency = 1; }
+def : WriteRes<WritePreLd, [M7UnitLoad]> { let Latency = 2; }
+def : WriteRes<WriteST, [M7UnitStore]> { let Latency = 2; }
+
+// Branches.
+def : WriteRes<WriteBr, [M7UnitBranch]> { let Latency = 2; }
+def : WriteRes<WriteBrL, [M7UnitBranch]> { let Latency = 2; }
+def : WriteRes<WriteBrTbl, [M7UnitBranch]> { let Latency = 2; }
+
+// Noop.
+def : WriteRes<WriteNoop, []> { let Latency = 0; }
+
+//===---------------------------------------------------------------------===//
+// Sched definitions for floating-point instructions
+//
+// Floating point conversions.
+def : WriteRes<WriteFPCVT, [M7UnitVFP, M7UnitVPort]> { let Latency = 3; }
+def : WriteRes<WriteFPMOV, [M7UnitVPort]> { let Latency = 3; }
+
+// The FP pipeline has a latency of 3 cycles.
+// ALU operations (32/64-bit). These go down the FP pipeline.
+def : WriteRes<WriteFPALU32, [M7UnitVFP, M7UnitVPort]> { let Latency = 3; }
+def : WriteRes<WriteFPALU64, [M7UnitVFP, M7UnitVPort, M7UnitVPort]> {
+ let Latency = 4;
+ let BeginGroup = 1;
+}
+
+// Multiplication
+def : WriteRes<WriteFPMUL32, [M7UnitVFP, M7UnitVPort]> { let Latency = 3; }
+def : WriteRes<WriteFPMUL64, [M7UnitVFP, M7UnitVPort, M7UnitVPort]> {
+ let Latency = 7;
+ let BeginGroup = 1;
+}
+
+// Multiply-accumulate. FPMAC goes down the FP Pipeline.
+def : WriteRes<WriteFPMAC32, [M7UnitVFP, M7UnitVPort]> { let Latency = 6; }
+def : WriteRes<WriteFPMAC64, [M7UnitVFP, M7UnitVPort, M7UnitVPort]> {
+ let Latency = 11;
+ let BeginGroup = 1;
+}
+
+// Division. Effective scheduling latency is 3, though real latency is larger
+def : WriteRes<WriteFPDIV32, [M7UnitVFP, M7UnitVPort]> { let Latency = 16; }
+def : WriteRes<WriteFPDIV64, [M7UnitVFP, M7UnitVPort, M7UnitVPort]> {
+ let Latency = 30;
+ let BeginGroup = 1;
+}
+
+// Square-root. Effective scheduling latency is 3; real latency is larger
+def : WriteRes<WriteFPSQRT32, [M7UnitVFP, M7UnitVPort]> { let Latency = 16; }
+def : WriteRes<WriteFPSQRT64, [M7UnitVFP, M7UnitVPort, M7UnitVPort]> {
+ let Latency = 30;
+ let BeginGroup = 1;
+}
+
+def M7WriteShift2 : SchedWriteRes<[M7UnitALU, M7UnitShift2]> {}
+
+// Not used for M7, but needing definitions anyway
+def : WriteRes<WriteVLD1, []>;
+def : WriteRes<WriteVLD2, []>;
+def : WriteRes<WriteVLD3, []>;
+def : WriteRes<WriteVLD4, []>;
+def : WriteRes<WriteVST1, []>;
+def : WriteRes<WriteVST2, []>;
+def : WriteRes<WriteVST3, []>;
+def : WriteRes<WriteVST4, []>;
+
+def M7SingleIssue : SchedWriteRes<[]> {
+ let SingleIssue = 1;
+ let NumMicroOps = 0;
+}
+def M7Slot0Only : SchedWriteRes<[]> {
+ let BeginGroup = 1;
+ let NumMicroOps = 0;
+}
+
+// What pipeline stage operands need to be ready for depending on
+// where they come from.
+def : ReadAdvance<ReadALUsr, 0>;
+def : ReadAdvance<ReadMUL, 0>;
+def : ReadAdvance<ReadMAC, 1>;
+def : ReadAdvance<ReadALU, 0>;
+def : ReadAdvance<ReadFPMUL, 0>;
+def : ReadAdvance<ReadFPMAC, 3>;
+def M7Read_ISS : SchedReadAdvance<-1>; // operands needed at EX1
+def M7Read_EX2 : SchedReadAdvance<1>; // operands needed at EX3
+def M7Read_EX3 : SchedReadAdvance<2>; // operands needed at EX4
+
+// Non general purpose instructions may not be dual issued. These
+// use both issue units.
+def M7NonGeneralPurpose : SchedWriteRes<[]> {
+ // Assume that these will go down the main ALU pipeline.
+ // In reality, many look likely to stall the whole pipeline.
+ let Latency = 3;
+ let SingleIssue = 1;
+}
+
+// List the non general purpose instructions.
+def : InstRW<[M7NonGeneralPurpose], (instregex "t2MRS", "tSVC", "tBKPT",
+ "t2MSR", "t2DMB", "t2DSB", "t2ISB",
+ "t2HVC", "t2SMC", "t2UDF", "ERET",
+ "tHINT", "t2HINT", "t2CLREX", "BUNDLE")>;
+
+//===---------------------------------------------------------------------===//
+// Sched definitions for load/store
+//
+// Mark whether the loads/stores must be single-issue
+// Address operands are needed earlier
+// Data operands are needed later
+
+def M7BaseUpdate : SchedWriteRes<[]> {
+ let Latency = 0; // Update is bypassable out of EX1
+ let NumMicroOps = 0;
+}
+def M7LoadLatency1 : SchedWriteRes<[]> {
+ let Latency = 1;
+ let NumMicroOps = 0;
+}
+def M7SlowLoad : SchedWriteRes<[M7UnitLoad]> { let Latency = 2; }
+
+// Byte and half-word loads should have greater latency than other loads.
+// So should load exclusive.
+
+def : InstRW<[M7SlowLoad],
+ (instregex "t2LDR(B|H|SB|SH)pc")>;
+def : InstRW<[M7SlowLoad, M7Read_ISS],
+ (instregex "t2LDR(B|H|SB|SH)T", "t2LDR(B|H|SB|SH)i",
+ "tLDR(B|H)i")>;
+def : InstRW<[M7SlowLoad, M7Read_ISS, M7Read_ISS],
+ (instregex "t2LDR(B|H|SB|SH)s", "tLDR(B|H)r", "tLDR(SB|SH)")>;
+def : InstRW<[M7SlowLoad, M7BaseUpdate, M7Read_ISS],
+ (instregex "t2LDR(B|H|SB|SH)_(POST|PRE)")>;
+
+// Exclusive loads/stores cannot be dual-issued
+def : InstRW<[WriteLd, M7Slot0Only, M7Read_ISS],
+ (instregex "t2LDREX$")>;
+def : InstRW<[M7SlowLoad, M7Slot0Only, M7Read_ISS],
+ (instregex "t2LDREX(B|H)")>;
+def : InstRW<[WriteST, M7SingleIssue, M7Read_EX2, M7Read_ISS],
+ (instregex "t2STREX(B|H)?$")>;
+
+// Load/store multiples cannot be dual-issued. Note that default scheduling
+// occurs around read/write times of individual registers in the list; read
+// time for STM cannot be overridden because it is a variadic source operand.
+
+def : InstRW<[WriteLd, M7SingleIssue, M7Read_ISS],
+ (instregex "(t|t2)LDM(DB|IA)$")>;
+def : InstRW<[WriteST, M7SingleIssue, M7Read_ISS],
+ (instregex "(t|t2)STM(DB|IA)$")>;
+def : InstRW<[M7BaseUpdate, WriteLd, M7SingleIssue, M7Read_ISS],
+ (instregex "(t|t2)LDM(DB|IA)_UPD$", "tPOP")>;
+def : InstRW<[M7BaseUpdate, WriteST, M7SingleIssue, M7Read_ISS],
+ (instregex "(t|t2)STM(DB|IA)_UPD$", "tPUSH")>;
+
+// Load/store doubles cannot be dual-issued.
+
+def : InstRW<[M7BaseUpdate, WriteST, M7SingleIssue,
+ M7Read_EX2, M7Read_EX2, M7Read_ISS],
+ (instregex "t2STRD_(PRE|POST)")>;
+def : InstRW<[WriteST, M7SingleIssue, M7Read_EX2, M7Read_EX2, M7Read_ISS],
+ (instregex "t2STRDi")>;
+def : InstRW<[WriteLd, M7LoadLatency1, M7SingleIssue, M7BaseUpdate, M7Read_ISS],
+ (instregex "t2LDRD_(PRE|POST)")>;
+def : InstRW<[WriteLd, M7LoadLatency1, M7SingleIssue, M7Read_ISS],
+ (instregex "t2LDRDi")>;
+
+// Word load / preload
+def : InstRW<[WriteLd],
+ (instregex "t2LDRpc", "t2PL[DI]pci", "tLDRpci")>;
+def : InstRW<[WriteLd, M7Read_ISS],
+ (instregex "t2LDR(i|T)", "t2PL[DI](W)?i", "tLDRi", "tLDRspi")>;
+def : InstRW<[WriteLd, M7Read_ISS, M7Read_ISS],
+ (instregex "t2LDRs", "t2PL[DI](w)?s", "tLDRr")>;
+def : InstRW<[WriteLd, M7BaseUpdate, M7Read_ISS],
+ (instregex "t2LDR_(POST|PRE)")>;
+
+// Stores
+def : InstRW<[M7BaseUpdate, WriteST, M7Read_EX2, M7Read_ISS],
+ (instregex "t2STR(B|H)?_(POST|PRE)")>;
+def : InstRW<[WriteST, M7Read_EX2, M7Read_ISS, M7Read_ISS],
+ (instregex "t2STR(B|H)?s$", "tSTR(B|H)?r$")>;
+def : InstRW<[WriteST, M7Read_EX2, M7Read_ISS],
+ (instregex "t2STR(B|H)?(i|T)", "tSTR(B|H)?i$", "tSTRspi")>;
+
+// TBB/TBH - single-issue only; takes two cycles to issue
+
+def M7TableLoad : SchedWriteRes<[M7UnitLoad]> {
+ let NumMicroOps = 2;
+ let SingleIssue = 1;
+}
+
+def : InstRW<[M7TableLoad, M7Read_ISS, M7Read_ISS], (instregex "t2TB")>;
+
+// VFP loads and stores
+
+def M7LoadSP : SchedWriteRes<[M7UnitLoad, M7UnitVPort]> { let Latency = 1; }
+def M7LoadDP : SchedWriteRes<[M7UnitLoad, M7UnitVPort, M7UnitVPort]> {
+ let Latency = 2;
+ let SingleIssue = 1;
+}
+def M7StoreSP : SchedWriteRes<[M7UnitStore, M7UnitVPort]>;
+def M7StoreDP : SchedWriteRes<[M7UnitStore, M7UnitVPort, M7UnitVPort]> {
+ let SingleIssue = 1;
+}
+
+def : InstRW<[M7LoadSP, M7Read_ISS], (instregex "VLDR(S|H)$")>;
+def : InstRW<[M7LoadDP, M7Read_ISS], (instregex "VLDRD$")>;
+def : InstRW<[M7StoreSP, M7Read_EX3, M7Read_ISS], (instregex "VSTR(S|H)$")>;
+def : InstRW<[M7StoreDP, M7Read_EX3, M7Read_ISS], (instregex "VSTRD$")>;
+
+// Load/store multiples cannot be dual-issued.
+
+def : InstRW<[WriteLd, M7SingleIssue, M7Read_ISS],
+ (instregex "VLDM(S|D|Q)(DB|IA)$")>;
+def : InstRW<[WriteST, M7SingleIssue, M7Read_ISS],
+ (instregex "VSTM(S|D|Q)(DB|IA)$")>;
+def : InstRW<[M7BaseUpdate, WriteLd, M7SingleIssue, M7Read_ISS],
+ (instregex "VLDM(S|D|Q)(DB|IA)_UPD$")>;
+def : InstRW<[M7BaseUpdate, WriteST, M7SingleIssue, M7Read_ISS],
+ (instregex "VSTM(S|D|Q)(DB|IA)_UPD$")>;
+
+//===---------------------------------------------------------------------===//
+// Sched definitions for ALU
+//
+
+// Shifted ALU operands are read a cycle early.
+def M7Ex1ReadNoFastBypass : SchedReadAdvance<-1, [WriteLd, M7LoadLatency1]>;
+
+def : InstRW<[WriteALUsi, M7Ex1ReadNoFastBypass, M7Read_ISS],
+ (instregex "t2(ADC|ADDS|ADD|BIC|EOR|ORN|ORR|RSBS|RSB|SBC|SUBS)rs$",
+ "t2(SUB|CMP|CMNz|TEQ|TST)rs$",
+ "t2MOVsr(a|l)")>;
+def : InstRW<[WriteALUsi, M7Read_ISS],
+ (instregex "t2MVNs")>;
+
+// Treat pure shift operations (except for RRX) as if they used the EX1
+// shifter but have timing as if they used the EX2 shifter as they usually
+// can choose the EX2 shifter when needed. Will miss a few dual-issue cases,
+// but the results prove to be better than trying to get them exact.
+
+def : InstRW<[M7WriteShift2, M7Read_ISS], (instregex "t2RRX$")>;
+def : InstRW<[WriteALUsi], (instregex "(t|t2)(LSL|LSR|ASR|ROR)")>;
+
+// Instructions that use the shifter, but have normal timing.
+
+def : InstRW<[WriteALUsi,M7Slot0Only], (instregex "t2(BFC|BFI)$")>;
+
+// Instructions which are slot zero only but otherwise normal.
+
+def : InstRW<[WriteALU, M7Slot0Only], (instregex "t2CLZ")>;
+
+// MAC operations that don't have SchedRW set.
+
+def : InstRW<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC], (instregex "t2SML[AS]D")>;
+
+// Divides are special because they stall for their latency, and so look like a
+// single-cycle as far as scheduling opportunities go. By putting WriteALU
+// first, we make the operand latency 1, but keep the instruction latency 7.
+
+def : InstRW<[WriteALU, WriteDIV], (instregex "t2(S|U)DIV")>;
+
+// DSP extension operations
+
+def M7WriteSIMD1 : SchedWriteRes<[M7UnitSIMD, M7UnitALU]> {
+ let Latency = 1;
+ let BeginGroup = 1;
+}
+def M7WriteSIMD2 : SchedWriteRes<[M7UnitSIMD, M7UnitALU]> {
+ let Latency = 2;
+ let BeginGroup = 1;
+}
+def M7WriteShSIMD1 : SchedWriteRes<[M7UnitSIMD, M7UnitALU, M7UnitShift1]> {
+ let Latency = 1;
+ let BeginGroup = 1;
+}
+def M7WriteShSIMD0 : SchedWriteRes<[M7UnitSIMD, M7UnitALU, M7UnitShift1]> {
+ let Latency = 0; // Bypassable out of EX1
+ let BeginGroup = 1;
+}
+def M7WriteShSIMD2 : SchedWriteRes<[M7UnitSIMD, M7UnitALU, M7UnitShift1]> {
+ let Latency = 2;
+ let BeginGroup = 1;
+}
+
+def : InstRW<[M7WriteShSIMD2, M7Read_ISS],
+ (instregex "t2(S|U)SAT")>;
+def : InstRW<[M7WriteSIMD1, ReadALU],
+ (instregex "(t|t2)(S|U)XT(B|H)")>;
+def : InstRW<[M7WriteSIMD1, ReadALU, ReadALU],
+ (instregex "t2(S|SH|U|UH)(ADD16|ADD8|ASX|SAX|SUB16|SUB8)",
+ "t2SEL")>;
+def : InstRW<[M7WriteSIMD2, ReadALU, ReadALU],
+ (instregex "t2(Q|UQ)(ADD|ASX|SAX|SUB)", "t2USAD8")>;
+def : InstRW<[M7WriteShSIMD2, M7Read_ISS, M7Read_ISS],
+ (instregex "t2QD(ADD|SUB)")>;
+def : InstRW<[M7WriteShSIMD0, M7Read_ISS],
+ (instregex "t2(RBIT|REV)", "tREV")>;
+def : InstRW<[M7WriteShSIMD1, M7Read_ISS],
+ (instregex "t2(SBFX|UBFX)")>;
+def : InstRW<[M7WriteShSIMD1, ReadALU, M7Read_ISS],
+ (instregex "t2PKH(BT|TB)", "t2(S|U)XTA")>;
+def : InstRW<[M7WriteSIMD2, ReadALU, ReadALU, M7Read_EX2],
+ (instregex "t2USADA8")>;
+
+// MSR/MRS
+def : InstRW<[M7NonGeneralPurpose], (instregex "MSR", "MRS")>;
+
+//===---------------------------------------------------------------------===//
+// Sched definitions for FP operations
+//
+
+// Effective scheduling latency is really 3 for nearly all FP operations,
+// even if their true latency is higher.
+def M7WriteVFPLatOverride : SchedWriteRes<[]> {
+ let Latency = 3;
+ let NumMicroOps = 0;
+}
+def M7WriteVFPExtraVPort : SchedWriteRes<[M7UnitVPort]> {
+ let Latency = 3;
+ let NumMicroOps = 0;
+}
+
+// Instructions which are missing default schedules.
+def : InstRW<[WriteFPALU32],
+ (instregex "V(ABS|CVT.*|NEG|FP_VMAX.*|FP_VMIN.*|RINT.*)S$")>;
+def : InstRW<[M7WriteVFPLatOverride, WriteFPALU64],
+ (instregex "V(ABS|CVT.*|NEG|FP_VMAX.*|FP_VMIN.*|RINT.*)D$")>;
+
+// VCMP
+def M7WriteVCMPS : SchedWriteRes<[M7UnitVFP, M7UnitVPort]> { let Latency = 0; }
+def M7WriteVCMPD : SchedWriteRes<[M7UnitVFP, M7UnitVPort, M7UnitVPort]> {
+ let Latency = 0;
+ let BeginGroup = 1;
+}
+def : InstRW<[M7WriteVCMPS], (instregex "VCMPS$")>;
+def : InstRW<[M7WriteVCMPD], (instregex "VCMPD$")>;
+
+ // VMRS/VMSR
+def M7VMRS : SchedWriteRes<[M7UnitVFP, M7UnitVPort]> { let SingleIssue = 1; }
+def M7VMSR : SchedWriteRes<[M7UnitVFP, M7UnitVPort]> { let SingleIssue = 1; }
+def : InstRW<[M7VMRS], (instregex "FMSTAT")>;
+def : InstRW<[M7VMSR], (instregex "VMSR")>;
+
+// VSEL cannot bypass in its implied $cpsr operand; model as earlier read
+def : InstRW<[WriteFPALU32, M7Slot0Only, ReadALU, ReadALU, M7Read_ISS],
+ (instregex "VSEL.*S$")>;
+def : InstRW<[M7WriteVFPLatOverride, WriteFPALU64, M7Slot0Only,
+ ReadALU, ReadALU, M7Read_ISS],
+ (instregex "VSEL.*D$")>;
+
+// VMOV
+def : InstRW<[WriteFPMOV],
+ (instregex "VMOV(H|S)$", "FCONST(H|S)")>;
+def : InstRW<[WriteFPMOV, M7WriteVFPExtraVPort, M7Slot0Only],
+ (instregex "VMOVD$")>;
+def : InstRW<[WriteFPMOV, M7WriteVFPExtraVPort, M7Slot0Only],
+ (instregex "FCONSTD")>;
+def : InstRW<[WriteFPMOV, M7WriteVFPExtraVPort, M7SingleIssue],
+ (instregex "VMOV(DRR|RRD|RRS|SRR)")>;
+
+// Larger-latency overrides.
+
+def : InstRW<[M7WriteVFPLatOverride, WriteFPDIV32], (instregex "VDIVS")>;
+def : InstRW<[M7WriteVFPLatOverride, WriteFPDIV64], (instregex "VDIVD")>;
+def : InstRW<[M7WriteVFPLatOverride, WriteFPSQRT32], (instregex "VSQRTS")>;
+def : InstRW<[M7WriteVFPLatOverride, WriteFPSQRT64], (instregex "VSQRTD")>;
+def : InstRW<[M7WriteVFPLatOverride, WriteFPMUL64],
+ (instregex "V(MUL|NMUL)D")>;
+def : InstRW<[M7WriteVFPLatOverride, WriteFPALU64],
+ (instregex "V(ADD|SUB)D")>;
+
+// Multiply-accumulate. Chained SP timing is correct; rest need overrides
+// Double-precision chained MAC stalls the pipeline behind it for 3 cycles,
+// making it appear to have 3 cycle latency for scheduling.
+
+def : InstRW<[M7WriteVFPLatOverride, WriteFPMAC64,
+ ReadFPMAC, ReadFPMUL, ReadFPMUL],
+ (instregex "V(N)?ML(A|S)D$")>;
+
+// Single-precision fused MACs look like latency 5 with advance of 2.
+
+def M7WriteVFPLatOverride5 : SchedWriteRes<[]> {
+ let Latency = 5;
+ let NumMicroOps = 0;
+}
+def M7ReadFPMAC2 : SchedReadAdvance<2>;
+
+def : InstRW<[M7WriteVFPLatOverride5, WriteFPMAC32,
+ M7ReadFPMAC2, ReadFPMUL, ReadFPMUL],
+ (instregex "VF(N)?M(A|S)S$")>;
+
+// Double-precision fused MAC stalls the pipeline behind it for 2 cycles, making
+// it appear to have 3 cycle latency for scheduling.
+
+def : InstRW<[M7WriteVFPLatOverride, WriteFPMAC64,
+ ReadFPMAC, ReadFPMUL, ReadFPMUL],
+ (instregex "VF(N)?M(A|S)D$")>;
+
+} // SchedModel = CortexM7Model
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMScheduleR52.td b/contrib/libs/llvm12/lib/Target/ARM/ARMScheduleR52.td
index 466acec6f7..aabce817a9 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMScheduleR52.td
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMScheduleR52.td
@@ -787,8 +787,8 @@ def : InstRW<[R52Write2FPALU_F3, R52Read_F2, R52Read_F2], (instregex "(VAND|VBIC
def : InstRW<[R52WriteFPALU_F3, R52Read_F2], (instregex "VBICi(v4i16|v2i32)")>;
def : InstRW<[R52Write2FPALU_F3, R52Read_F2], (instregex "VBICi(v8i16|v4i32)")>;
-def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL|VBSP)d")>;
-def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL|VBSP)q")>;
+def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL|VBSP)d")>;
+def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL|VBSP)q")>;
def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1],
(instregex "(VCEQ|VCGE|VCGT|VCLE|VCLT|VCLZ|VCMP|VCMPE|VCNT)")>;
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMScheduleSwift.td b/contrib/libs/llvm12/lib/Target/ARM/ARMScheduleSwift.td
index d66b3065c7..ef2bde2a0d 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMScheduleSwift.td
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMScheduleSwift.td
@@ -558,8 +558,8 @@ let SchedModel = SwiftModel in {
(instregex "VADDv", "VSUBv", "VNEG(s|f|v)", "VADDL", "VSUBL",
"VADDW", "VSUBW", "VHADD", "VHSUB", "VRHADD", "VPADDi",
"VPADDL", "VAND", "VBIC", "VEOR", "VORN", "VORR", "VTST",
- "VSHL", "VSHR(s|u)", "VSHLL", "VQSHL(s|u)", "VBIF", "VBIT",
- "VBSL", "VBSP", "VSLI", "VSRI", "VCLS", "VCLZ", "VCNT")>;
+ "VSHL", "VSHR(s|u)", "VSHLL", "VQSHL(s|u)", "VBIF", "VBIT",
+ "VBSL", "VBSP", "VSLI", "VSRI", "VCLS", "VCLZ", "VCNT")>;
def : InstRW<[SwiftWriteP1TwoCycle],
(instregex "VEXT", "VREV16", "VREV32", "VREV64")>;
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMSubtarget.cpp b/contrib/libs/llvm12/lib/Target/ARM/ARMSubtarget.cpp
index 5cb608b74a..c49135d536 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMSubtarget.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMSubtarget.cpp
@@ -97,9 +97,9 @@ ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU,
const std::string &FS,
const ARMBaseTargetMachine &TM, bool IsLittle,
bool MinSize)
- : ARMGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS),
- UseMulOps(UseFusedMulOps), CPUString(CPU), OptMinSize(MinSize),
- IsLittle(IsLittle), TargetTriple(TT), Options(TM.Options), TM(TM),
+ : ARMGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS),
+ UseMulOps(UseFusedMulOps), CPUString(CPU), OptMinSize(MinSize),
+ IsLittle(IsLittle), TargetTriple(TT), Options(TM.Options), TM(TM),
FrameLowering(initializeFrameLowering(CPU, FS)),
// At this point initializeSubtargetDependencies has been called so
// we can query directly.
@@ -185,7 +185,7 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
else
ArchFS = std::string(FS);
}
- ParseSubtargetFeatures(CPUString, /*TuneCPU*/ CPUString, ArchFS);
+ ParseSubtargetFeatures(CPUString, /*TuneCPU*/ CPUString, ArchFS);
// FIXME: This used enable V6T2 support implicitly for Thumb2 mode.
// Assert this for now to make the change obvious.
@@ -237,7 +237,7 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
switch (IT) {
case DefaultIT:
- RestrictIT = hasV8Ops() && !hasMinSize();
+ RestrictIT = hasV8Ops() && !hasMinSize();
break;
case RestrictedIT:
RestrictIT = true;
@@ -294,13 +294,13 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
case CortexA76:
case CortexA77:
case CortexA78:
- case CortexA78C:
+ case CortexA78C:
case CortexR4:
case CortexR4F:
case CortexR5:
case CortexR7:
case CortexM3:
- case CortexM7:
+ case CortexM7:
case CortexR52:
case CortexX1:
break;
@@ -316,8 +316,8 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
PreISelOperandLatencyAdjustment = 1;
break;
case NeoverseN1:
- case NeoverseN2:
- case NeoverseV1:
+ case NeoverseN2:
+ case NeoverseV1:
break;
case Swift:
MaxInterleaveFactor = 2;
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMSubtarget.h b/contrib/libs/llvm12/lib/Target/ARM/ARMSubtarget.h
index fd9b94fdaa..a6335c6984 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMSubtarget.h
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMSubtarget.h
@@ -63,11 +63,11 @@ protected:
CortexA76,
CortexA77,
CortexA78,
- CortexA78C,
+ CortexA78C,
CortexA8,
CortexA9,
CortexM3,
- CortexM7,
+ CortexM7,
CortexR4,
CortexR4F,
CortexR5,
@@ -78,8 +78,8 @@ protected:
Krait,
Kryo,
NeoverseN1,
- NeoverseN2,
- NeoverseV1,
+ NeoverseN2,
+ NeoverseV1,
Swift
};
enum ARMProcClassEnum {
@@ -167,7 +167,7 @@ protected:
bool HasV8_4aOps = false;
bool HasV8_5aOps = false;
bool HasV8_6aOps = false;
- bool HasV8_7aOps = false;
+ bool HasV8_7aOps = false;
bool HasV8MBaselineOps = false;
bool HasV8MMainlineOps = false;
bool HasV8_1MMainlineOps = false;
@@ -466,13 +466,13 @@ protected:
/// cannot be encoded. For example, ADD r0, r1, #FFFFFFFF -> SUB r0, r1, #1.
bool NegativeImmediates = true;
- /// Harden against Straight Line Speculation for Returns and Indirect
- /// Branches.
- bool HardenSlsRetBr = false;
-
- /// Harden against Straight Line Speculation for indirect calls.
- bool HardenSlsBlr = false;
-
+ /// Harden against Straight Line Speculation for Returns and Indirect
+ /// Branches.
+ bool HardenSlsRetBr = false;
+
+ /// Harden against Straight Line Speculation for indirect calls.
+ bool HardenSlsBlr = false;
+
/// stackAlignment - The minimum alignment known to hold of the stack frame on
/// entry to the function and which must be maintained by every function.
Align stackAlignment = Align(4);
@@ -538,7 +538,7 @@ public:
/// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen.
- void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
+ void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
/// initializeSubtargetDependencies - Initializes using a CPU and feature string
/// so that we can use initializer lists for subtarget initialization.
@@ -606,7 +606,7 @@ public:
bool hasV8_4aOps() const { return HasV8_4aOps; }
bool hasV8_5aOps() const { return HasV8_5aOps; }
bool hasV8_6aOps() const { return HasV8_6aOps; }
- bool hasV8_7aOps() const { return HasV8_7aOps; }
+ bool hasV8_7aOps() const { return HasV8_7aOps; }
bool hasV8MBaselineOps() const { return HasV8MBaselineOps; }
bool hasV8MMainlineOps() const { return HasV8MMainlineOps; }
bool hasV8_1MMainlineOps() const { return HasV8_1MMainlineOps; }
@@ -627,7 +627,7 @@ public:
bool isCortexA15() const { return ARMProcFamily == CortexA15; }
bool isSwift() const { return ARMProcFamily == Swift; }
bool isCortexM3() const { return ARMProcFamily == CortexM3; }
- bool isCortexM7() const { return ARMProcFamily == CortexM7; }
+ bool isCortexM7() const { return ARMProcFamily == CortexM7; }
bool isLikeA9() const { return isCortexA9() || isCortexA15() || isKrait(); }
bool isCortexR5() const { return ARMProcFamily == CortexR5; }
bool isKrait() const { return ARMProcFamily == Krait; }
@@ -915,9 +915,9 @@ public:
bool ignoreCSRForAllocationOrder(const MachineFunction &MF,
unsigned PhysReg) const override;
unsigned getGPRAllocationOrder(const MachineFunction &MF) const;
-
- bool hardenSlsRetBr() const { return HardenSlsRetBr; }
- bool hardenSlsBlr() const { return HardenSlsBlr; }
+
+ bool hardenSlsRetBr() const { return HardenSlsRetBr; }
+ bool hardenSlsBlr() const { return HardenSlsBlr; }
};
} // end namespace llvm
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMTargetMachine.cpp b/contrib/libs/llvm12/lib/Target/ARM/ARMTargetMachine.cpp
index 237ef54c83..c4841aabdf 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMTargetMachine.cpp
@@ -99,9 +99,9 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeARMTarget() {
initializeMVEVPTOptimisationsPass(Registry);
initializeMVETailPredicationPass(Registry);
initializeARMLowOverheadLoopsPass(Registry);
- initializeARMBlockPlacementPass(Registry);
+ initializeARMBlockPlacementPass(Registry);
initializeMVEGatherScatterLoweringPass(Registry);
- initializeARMSLSHardeningPass(Registry);
+ initializeARMSLSHardeningPass(Registry);
}
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
@@ -253,7 +253,7 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, const Triple &TT,
// ARM supports the MachineOutliner.
setMachineOutliner(true);
- setSupportsDefaultOutlining(true);
+ setSupportsDefaultOutlining(true);
}
ARMBaseTargetMachine::~ARMBaseTargetMachine() = default;
@@ -263,10 +263,10 @@ ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const {
Attribute CPUAttr = F.getFnAttribute("target-cpu");
Attribute FSAttr = F.getFnAttribute("target-features");
- std::string CPU =
- CPUAttr.isValid() ? CPUAttr.getValueAsString().str() : TargetCPU;
- std::string FS =
- FSAttr.isValid() ? FSAttr.getValueAsString().str() : TargetFS;
+ std::string CPU =
+ CPUAttr.isValid() ? CPUAttr.getValueAsString().str() : TargetCPU;
+ std::string FS =
+ FSAttr.isValid() ? FSAttr.getValueAsString().str() : TargetFS;
// FIXME: This is related to the code below to reset the target options,
// we need to know whether or not the soft float flag is set on the
@@ -409,8 +409,8 @@ void ARMPassConfig::addIRPasses() {
// ldrex/strex loops to simplify this, but it needs tidying up.
if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy)
addPass(createCFGSimplificationPass(
- SimplifyCFGOptions().hoistCommonInsts(true).sinkCommonInsts(true),
- [this](const Function &F) {
+ SimplifyCFGOptions().hoistCommonInsts(true).sinkCommonInsts(true),
+ [this](const Function &F) {
const auto &ST = this->TM->getSubtarget<ARMSubtarget>(F);
return ST.hasAnyDataBarrier() && !ST.isThumb1Only();
}));
@@ -472,7 +472,7 @@ bool ARMPassConfig::addInstSelector() {
}
bool ARMPassConfig::addIRTranslator() {
- addPass(new IRTranslator(getOptLevel()));
+ addPass(new IRTranslator(getOptLevel()));
return false;
}
@@ -540,9 +540,9 @@ void ARMPassConfig::addPreSched2() {
addPass(&PostMachineSchedulerID);
addPass(&PostRASchedulerID);
}
-
- addPass(createARMIndirectThunks());
- addPass(createARMSLSHardeningPass());
+
+ addPass(createARMIndirectThunks());
+ addPass(createARMSLSHardeningPass());
}
void ARMPassConfig::addPreEmitPass() {
@@ -553,11 +553,11 @@ void ARMPassConfig::addPreEmitPass() {
return MF.getSubtarget<ARMSubtarget>().isThumb2();
}));
- // Don't optimize barriers or block placement at -O0.
- if (getOptLevel() != CodeGenOpt::None) {
- addPass(createARMBlockPlacementPass());
+ // Don't optimize barriers or block placement at -O0.
+ if (getOptLevel() != CodeGenOpt::None) {
+ addPass(createARMBlockPlacementPass());
addPass(createARMOptimizeBarriersPass());
- }
+ }
}
void ARMPassConfig::addPreEmitPass2() {
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMTargetMachine.h b/contrib/libs/llvm12/lib/Target/ARM/ARMTargetMachine.h
index 8428092bf1..d9f5d40eb1 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMTargetMachine.h
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMTargetMachine.h
@@ -72,12 +72,12 @@ public:
}
bool targetSchedulesPostRAScheduling() const override { return true; };
-
- /// Returns true if a cast between SrcAS and DestAS is a noop.
- bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
- // Addrspacecasts are always noops.
- return true;
- }
+
+ /// Returns true if a cast between SrcAS and DestAS is a noop.
+ bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
+ // Addrspacecasts are always noops.
+ return true;
+ }
};
/// ARM/Thumb little endian target machine.
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMTargetTransformInfo.cpp b/contrib/libs/llvm12/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 8901934013..e4e4252041 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -20,18 +20,18 @@
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/IntrinsicsARM.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Support/Casting.h"
-#include "llvm/Support/KnownBits.h"
+#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MachineValueType.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Transforms/InstCombine/InstCombiner.h"
-#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/InstCombine/InstCombiner.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include <algorithm>
#include <cassert>
@@ -50,38 +50,38 @@ static cl::opt<bool> DisableLowOverheadLoops(
"disable-arm-loloops", cl::Hidden, cl::init(false),
cl::desc("Disable the generation of low-overhead loops"));
-static cl::opt<bool>
- AllowWLSLoops("allow-arm-wlsloops", cl::Hidden, cl::init(true),
- cl::desc("Enable the generation of WLS loops"));
-
+static cl::opt<bool>
+ AllowWLSLoops("allow-arm-wlsloops", cl::Hidden, cl::init(true),
+ cl::desc("Enable the generation of WLS loops"));
+
extern cl::opt<TailPredication::Mode> EnableTailPredication;
extern cl::opt<bool> EnableMaskedGatherScatters;
-extern cl::opt<unsigned> MVEMaxSupportedInterleaveFactor;
-
-/// Convert a vector load intrinsic into a simple llvm load instruction.
-/// This is beneficial when the underlying object being addressed comes
-/// from a constant, since we get constant-folding for free.
-static Value *simplifyNeonVld1(const IntrinsicInst &II, unsigned MemAlign,
- InstCombiner::BuilderTy &Builder) {
- auto *IntrAlign = dyn_cast<ConstantInt>(II.getArgOperand(1));
-
- if (!IntrAlign)
- return nullptr;
-
- unsigned Alignment = IntrAlign->getLimitedValue() < MemAlign
- ? MemAlign
- : IntrAlign->getLimitedValue();
-
- if (!isPowerOf2_32(Alignment))
- return nullptr;
-
- auto *BCastInst = Builder.CreateBitCast(II.getArgOperand(0),
- PointerType::get(II.getType(), 0));
- return Builder.CreateAlignedLoad(II.getType(), BCastInst, Align(Alignment));
-}
-
+extern cl::opt<unsigned> MVEMaxSupportedInterleaveFactor;
+
+/// Convert a vector load intrinsic into a simple llvm load instruction.
+/// This is beneficial when the underlying object being addressed comes
+/// from a constant, since we get constant-folding for free.
+static Value *simplifyNeonVld1(const IntrinsicInst &II, unsigned MemAlign,
+ InstCombiner::BuilderTy &Builder) {
+ auto *IntrAlign = dyn_cast<ConstantInt>(II.getArgOperand(1));
+
+ if (!IntrAlign)
+ return nullptr;
+
+ unsigned Alignment = IntrAlign->getLimitedValue() < MemAlign
+ ? MemAlign
+ : IntrAlign->getLimitedValue();
+
+ if (!isPowerOf2_32(Alignment))
+ return nullptr;
+
+ auto *BCastInst = Builder.CreateBitCast(II.getArgOperand(0),
+ PointerType::get(II.getType(), 0));
+ return Builder.CreateAlignedLoad(II.getType(), BCastInst, Align(Alignment));
+}
+
bool ARMTTIImpl::areInlineCompatible(const Function *Caller,
const Function *Callee) const {
const TargetMachine &TM = getTLI()->getTargetMachine();
@@ -114,138 +114,138 @@ bool ARMTTIImpl::shouldFavorPostInc() const {
return false;
}
-Optional<Instruction *>
-ARMTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
- using namespace PatternMatch;
- Intrinsic::ID IID = II.getIntrinsicID();
- switch (IID) {
- default:
- break;
- case Intrinsic::arm_neon_vld1: {
- Align MemAlign =
- getKnownAlignment(II.getArgOperand(0), IC.getDataLayout(), &II,
- &IC.getAssumptionCache(), &IC.getDominatorTree());
- if (Value *V = simplifyNeonVld1(II, MemAlign.value(), IC.Builder)) {
- return IC.replaceInstUsesWith(II, V);
- }
- break;
- }
-
- case Intrinsic::arm_neon_vld2:
- case Intrinsic::arm_neon_vld3:
- case Intrinsic::arm_neon_vld4:
- case Intrinsic::arm_neon_vld2lane:
- case Intrinsic::arm_neon_vld3lane:
- case Intrinsic::arm_neon_vld4lane:
- case Intrinsic::arm_neon_vst1:
- case Intrinsic::arm_neon_vst2:
- case Intrinsic::arm_neon_vst3:
- case Intrinsic::arm_neon_vst4:
- case Intrinsic::arm_neon_vst2lane:
- case Intrinsic::arm_neon_vst3lane:
- case Intrinsic::arm_neon_vst4lane: {
- Align MemAlign =
- getKnownAlignment(II.getArgOperand(0), IC.getDataLayout(), &II,
- &IC.getAssumptionCache(), &IC.getDominatorTree());
- unsigned AlignArg = II.getNumArgOperands() - 1;
- Value *AlignArgOp = II.getArgOperand(AlignArg);
- MaybeAlign Align = cast<ConstantInt>(AlignArgOp)->getMaybeAlignValue();
- if (Align && *Align < MemAlign) {
- return IC.replaceOperand(
- II, AlignArg,
- ConstantInt::get(Type::getInt32Ty(II.getContext()), MemAlign.value(),
- false));
- }
- break;
- }
-
- case Intrinsic::arm_mve_pred_i2v: {
- Value *Arg = II.getArgOperand(0);
- Value *ArgArg;
- if (match(Arg, PatternMatch::m_Intrinsic<Intrinsic::arm_mve_pred_v2i>(
- PatternMatch::m_Value(ArgArg))) &&
- II.getType() == ArgArg->getType()) {
- return IC.replaceInstUsesWith(II, ArgArg);
- }
- Constant *XorMask;
- if (match(Arg, m_Xor(PatternMatch::m_Intrinsic<Intrinsic::arm_mve_pred_v2i>(
- PatternMatch::m_Value(ArgArg)),
- PatternMatch::m_Constant(XorMask))) &&
- II.getType() == ArgArg->getType()) {
- if (auto *CI = dyn_cast<ConstantInt>(XorMask)) {
- if (CI->getValue().trunc(16).isAllOnesValue()) {
- auto TrueVector = IC.Builder.CreateVectorSplat(
- cast<FixedVectorType>(II.getType())->getNumElements(),
- IC.Builder.getTrue());
- return BinaryOperator::Create(Instruction::Xor, ArgArg, TrueVector);
- }
- }
- }
- KnownBits ScalarKnown(32);
- if (IC.SimplifyDemandedBits(&II, 0, APInt::getLowBitsSet(32, 16),
- ScalarKnown, 0)) {
- return &II;
- }
- break;
- }
- case Intrinsic::arm_mve_pred_v2i: {
- Value *Arg = II.getArgOperand(0);
- Value *ArgArg;
- if (match(Arg, PatternMatch::m_Intrinsic<Intrinsic::arm_mve_pred_i2v>(
- PatternMatch::m_Value(ArgArg)))) {
- return IC.replaceInstUsesWith(II, ArgArg);
- }
- if (!II.getMetadata(LLVMContext::MD_range)) {
- Type *IntTy32 = Type::getInt32Ty(II.getContext());
- Metadata *M[] = {
- ConstantAsMetadata::get(ConstantInt::get(IntTy32, 0)),
- ConstantAsMetadata::get(ConstantInt::get(IntTy32, 0xFFFF))};
- II.setMetadata(LLVMContext::MD_range, MDNode::get(II.getContext(), M));
- return &II;
- }
- break;
- }
- case Intrinsic::arm_mve_vadc:
- case Intrinsic::arm_mve_vadc_predicated: {
- unsigned CarryOp =
- (II.getIntrinsicID() == Intrinsic::arm_mve_vadc_predicated) ? 3 : 2;
- assert(II.getArgOperand(CarryOp)->getType()->getScalarSizeInBits() == 32 &&
- "Bad type for intrinsic!");
-
- KnownBits CarryKnown(32);
- if (IC.SimplifyDemandedBits(&II, CarryOp, APInt::getOneBitSet(32, 29),
- CarryKnown)) {
- return &II;
- }
- break;
- }
- case Intrinsic::arm_mve_vmldava: {
- Instruction *I = cast<Instruction>(&II);
- if (I->hasOneUse()) {
- auto *User = cast<Instruction>(*I->user_begin());
- Value *OpZ;
- if (match(User, m_c_Add(m_Specific(I), m_Value(OpZ))) &&
- match(I->getOperand(3), m_Zero())) {
- Value *OpX = I->getOperand(4);
- Value *OpY = I->getOperand(5);
- Type *OpTy = OpX->getType();
-
- IC.Builder.SetInsertPoint(User);
- Value *V =
- IC.Builder.CreateIntrinsic(Intrinsic::arm_mve_vmldava, {OpTy},
- {I->getOperand(0), I->getOperand(1),
- I->getOperand(2), OpZ, OpX, OpY});
-
- IC.replaceInstUsesWith(*User, V);
- return IC.eraseInstFromFunction(*User);
- }
- }
- return None;
- }
- }
- return None;
-}
-
+Optional<Instruction *>
+ARMTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
+ using namespace PatternMatch;
+ Intrinsic::ID IID = II.getIntrinsicID();
+ switch (IID) {
+ default:
+ break;
+ case Intrinsic::arm_neon_vld1: {
+ Align MemAlign =
+ getKnownAlignment(II.getArgOperand(0), IC.getDataLayout(), &II,
+ &IC.getAssumptionCache(), &IC.getDominatorTree());
+ if (Value *V = simplifyNeonVld1(II, MemAlign.value(), IC.Builder)) {
+ return IC.replaceInstUsesWith(II, V);
+ }
+ break;
+ }
+
+ case Intrinsic::arm_neon_vld2:
+ case Intrinsic::arm_neon_vld3:
+ case Intrinsic::arm_neon_vld4:
+ case Intrinsic::arm_neon_vld2lane:
+ case Intrinsic::arm_neon_vld3lane:
+ case Intrinsic::arm_neon_vld4lane:
+ case Intrinsic::arm_neon_vst1:
+ case Intrinsic::arm_neon_vst2:
+ case Intrinsic::arm_neon_vst3:
+ case Intrinsic::arm_neon_vst4:
+ case Intrinsic::arm_neon_vst2lane:
+ case Intrinsic::arm_neon_vst3lane:
+ case Intrinsic::arm_neon_vst4lane: {
+ Align MemAlign =
+ getKnownAlignment(II.getArgOperand(0), IC.getDataLayout(), &II,
+ &IC.getAssumptionCache(), &IC.getDominatorTree());
+ unsigned AlignArg = II.getNumArgOperands() - 1;
+ Value *AlignArgOp = II.getArgOperand(AlignArg);
+ MaybeAlign Align = cast<ConstantInt>(AlignArgOp)->getMaybeAlignValue();
+ if (Align && *Align < MemAlign) {
+ return IC.replaceOperand(
+ II, AlignArg,
+ ConstantInt::get(Type::getInt32Ty(II.getContext()), MemAlign.value(),
+ false));
+ }
+ break;
+ }
+
+ case Intrinsic::arm_mve_pred_i2v: {
+ Value *Arg = II.getArgOperand(0);
+ Value *ArgArg;
+ if (match(Arg, PatternMatch::m_Intrinsic<Intrinsic::arm_mve_pred_v2i>(
+ PatternMatch::m_Value(ArgArg))) &&
+ II.getType() == ArgArg->getType()) {
+ return IC.replaceInstUsesWith(II, ArgArg);
+ }
+ Constant *XorMask;
+ if (match(Arg, m_Xor(PatternMatch::m_Intrinsic<Intrinsic::arm_mve_pred_v2i>(
+ PatternMatch::m_Value(ArgArg)),
+ PatternMatch::m_Constant(XorMask))) &&
+ II.getType() == ArgArg->getType()) {
+ if (auto *CI = dyn_cast<ConstantInt>(XorMask)) {
+ if (CI->getValue().trunc(16).isAllOnesValue()) {
+ auto TrueVector = IC.Builder.CreateVectorSplat(
+ cast<FixedVectorType>(II.getType())->getNumElements(),
+ IC.Builder.getTrue());
+ return BinaryOperator::Create(Instruction::Xor, ArgArg, TrueVector);
+ }
+ }
+ }
+ KnownBits ScalarKnown(32);
+ if (IC.SimplifyDemandedBits(&II, 0, APInt::getLowBitsSet(32, 16),
+ ScalarKnown, 0)) {
+ return &II;
+ }
+ break;
+ }
+ case Intrinsic::arm_mve_pred_v2i: {
+ Value *Arg = II.getArgOperand(0);
+ Value *ArgArg;
+ if (match(Arg, PatternMatch::m_Intrinsic<Intrinsic::arm_mve_pred_i2v>(
+ PatternMatch::m_Value(ArgArg)))) {
+ return IC.replaceInstUsesWith(II, ArgArg);
+ }
+ if (!II.getMetadata(LLVMContext::MD_range)) {
+ Type *IntTy32 = Type::getInt32Ty(II.getContext());
+ Metadata *M[] = {
+ ConstantAsMetadata::get(ConstantInt::get(IntTy32, 0)),
+ ConstantAsMetadata::get(ConstantInt::get(IntTy32, 0xFFFF))};
+ II.setMetadata(LLVMContext::MD_range, MDNode::get(II.getContext(), M));
+ return &II;
+ }
+ break;
+ }
+ case Intrinsic::arm_mve_vadc:
+ case Intrinsic::arm_mve_vadc_predicated: {
+ unsigned CarryOp =
+ (II.getIntrinsicID() == Intrinsic::arm_mve_vadc_predicated) ? 3 : 2;
+ assert(II.getArgOperand(CarryOp)->getType()->getScalarSizeInBits() == 32 &&
+ "Bad type for intrinsic!");
+
+ KnownBits CarryKnown(32);
+ if (IC.SimplifyDemandedBits(&II, CarryOp, APInt::getOneBitSet(32, 29),
+ CarryKnown)) {
+ return &II;
+ }
+ break;
+ }
+ case Intrinsic::arm_mve_vmldava: {
+ Instruction *I = cast<Instruction>(&II);
+ if (I->hasOneUse()) {
+ auto *User = cast<Instruction>(*I->user_begin());
+ Value *OpZ;
+ if (match(User, m_c_Add(m_Specific(I), m_Value(OpZ))) &&
+ match(I->getOperand(3), m_Zero())) {
+ Value *OpX = I->getOperand(4);
+ Value *OpY = I->getOperand(5);
+ Type *OpTy = OpX->getType();
+
+ IC.Builder.SetInsertPoint(User);
+ Value *V =
+ IC.Builder.CreateIntrinsic(Intrinsic::arm_mve_vmldava, {OpTy},
+ {I->getOperand(0), I->getOperand(1),
+ I->getOperand(2), OpZ, OpX, OpY});
+
+ IC.replaceInstUsesWith(*User, V);
+ return IC.eraseInstFromFunction(*User);
+ }
+ }
+ return None;
+ }
+ }
+ return None;
+}
+
int ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
TTI::TargetCostKind CostKind) {
assert(Ty->isIntegerTy());
@@ -289,43 +289,43 @@ int ARMTTIImpl::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
return 1;
}
-// Checks whether Inst is part of a min(max()) or max(min()) pattern
-// that will match to an SSAT instruction
-static bool isSSATMinMaxPattern(Instruction *Inst, const APInt &Imm) {
- Value *LHS, *RHS;
- ConstantInt *C;
- SelectPatternFlavor InstSPF = matchSelectPattern(Inst, LHS, RHS).Flavor;
-
- if (InstSPF == SPF_SMAX &&
- PatternMatch::match(RHS, PatternMatch::m_ConstantInt(C)) &&
- C->getValue() == Imm && Imm.isNegative() && (-Imm).isPowerOf2()) {
-
- auto isSSatMin = [&](Value *MinInst) {
- if (isa<SelectInst>(MinInst)) {
- Value *MinLHS, *MinRHS;
- ConstantInt *MinC;
- SelectPatternFlavor MinSPF =
- matchSelectPattern(MinInst, MinLHS, MinRHS).Flavor;
- if (MinSPF == SPF_SMIN &&
- PatternMatch::match(MinRHS, PatternMatch::m_ConstantInt(MinC)) &&
- MinC->getValue() == ((-Imm) - 1))
- return true;
- }
- return false;
- };
-
- if (isSSatMin(Inst->getOperand(1)) ||
- (Inst->hasNUses(2) && (isSSatMin(*Inst->user_begin()) ||
- isSSatMin(*(++Inst->user_begin())))))
- return true;
- }
- return false;
-}
-
-int ARMTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
- const APInt &Imm, Type *Ty,
- TTI::TargetCostKind CostKind,
- Instruction *Inst) {
+// Checks whether Inst is part of a min(max()) or max(min()) pattern
+// that will match to an SSAT instruction
+static bool isSSATMinMaxPattern(Instruction *Inst, const APInt &Imm) {
+ Value *LHS, *RHS;
+ ConstantInt *C;
+ SelectPatternFlavor InstSPF = matchSelectPattern(Inst, LHS, RHS).Flavor;
+
+ if (InstSPF == SPF_SMAX &&
+ PatternMatch::match(RHS, PatternMatch::m_ConstantInt(C)) &&
+ C->getValue() == Imm && Imm.isNegative() && (-Imm).isPowerOf2()) {
+
+ auto isSSatMin = [&](Value *MinInst) {
+ if (isa<SelectInst>(MinInst)) {
+ Value *MinLHS, *MinRHS;
+ ConstantInt *MinC;
+ SelectPatternFlavor MinSPF =
+ matchSelectPattern(MinInst, MinLHS, MinRHS).Flavor;
+ if (MinSPF == SPF_SMIN &&
+ PatternMatch::match(MinRHS, PatternMatch::m_ConstantInt(MinC)) &&
+ MinC->getValue() == ((-Imm) - 1))
+ return true;
+ }
+ return false;
+ };
+
+ if (isSSatMin(Inst->getOperand(1)) ||
+ (Inst->hasNUses(2) && (isSSatMin(*Inst->user_begin()) ||
+ isSSatMin(*(++Inst->user_begin())))))
+ return true;
+ }
+ return false;
+}
+
+int ARMTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
+ const APInt &Imm, Type *Ty,
+ TTI::TargetCostKind CostKind,
+ Instruction *Inst) {
// Division by a constant can be turned into multiplication, but only if we
// know it's constant. So it's not so much that the immediate is cheap (it's
// not), but that the alternative is worse.
@@ -364,33 +364,33 @@ int ARMTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
if (Opcode == Instruction::Xor && Imm.isAllOnesValue())
return 0;
- // Ensures negative constant of min(max()) or max(min()) patterns that
- // match to SSAT instructions don't get hoisted
- if (Inst && ((ST->hasV6Ops() && !ST->isThumb()) || ST->isThumb2()) &&
- Ty->getIntegerBitWidth() <= 32) {
- if (isSSATMinMaxPattern(Inst, Imm) ||
- (isa<ICmpInst>(Inst) && Inst->hasOneUse() &&
- isSSATMinMaxPattern(cast<Instruction>(*Inst->user_begin()), Imm)))
- return 0;
- }
-
+ // Ensures negative constant of min(max()) or max(min()) patterns that
+ // match to SSAT instructions don't get hoisted
+ if (Inst && ((ST->hasV6Ops() && !ST->isThumb()) || ST->isThumb2()) &&
+ Ty->getIntegerBitWidth() <= 32) {
+ if (isSSATMinMaxPattern(Inst, Imm) ||
+ (isa<ICmpInst>(Inst) && Inst->hasOneUse() &&
+ isSSATMinMaxPattern(cast<Instruction>(*Inst->user_begin()), Imm)))
+ return 0;
+ }
+
return getIntImmCost(Imm, Ty, CostKind);
}
-int ARMTTIImpl::getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) {
- if (CostKind == TTI::TCK_RecipThroughput &&
- (ST->hasNEON() || ST->hasMVEIntegerOps())) {
- // FIXME: The vectorizer is highly sensistive to the cost of these
- // instructions, which suggests that it may be using the costs incorrectly.
- // But, for now, just make them free to avoid performance regressions for
- // vector targets.
- return 0;
- }
- return BaseT::getCFInstrCost(Opcode, CostKind);
-}
-
+int ARMTTIImpl::getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) {
+ if (CostKind == TTI::TCK_RecipThroughput &&
+ (ST->hasNEON() || ST->hasMVEIntegerOps())) {
+ // FIXME: The vectorizer is highly sensistive to the cost of these
+ // instructions, which suggests that it may be using the costs incorrectly.
+ // But, for now, just make them free to avoid performance regressions for
+ // vector targets.
+ return 0;
+ }
+ return BaseT::getCFInstrCost(Opcode, CostKind);
+}
+
int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
- TTI::CastContextHint CCH,
+ TTI::CastContextHint CCH,
TTI::TargetCostKind CostKind,
const Instruction *I) {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
@@ -402,35 +402,35 @@ int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
return Cost == 0 ? 0 : 1;
return Cost;
};
- auto IsLegalFPType = [this](EVT VT) {
- EVT EltVT = VT.getScalarType();
- return (EltVT == MVT::f32 && ST->hasVFP2Base()) ||
- (EltVT == MVT::f64 && ST->hasFP64()) ||
- (EltVT == MVT::f16 && ST->hasFullFP16());
- };
+ auto IsLegalFPType = [this](EVT VT) {
+ EVT EltVT = VT.getScalarType();
+ return (EltVT == MVT::f32 && ST->hasVFP2Base()) ||
+ (EltVT == MVT::f64 && ST->hasFP64()) ||
+ (EltVT == MVT::f16 && ST->hasFullFP16());
+ };
EVT SrcTy = TLI->getValueType(DL, Src);
EVT DstTy = TLI->getValueType(DL, Dst);
if (!SrcTy.isSimple() || !DstTy.isSimple())
- return AdjustCost(
- BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I));
-
- // Extending masked load/Truncating masked stores is expensive because we
- // currently don't split them. This means that we'll likely end up
- // loading/storing each element individually (hence the high cost).
- if ((ST->hasMVEIntegerOps() &&
- (Opcode == Instruction::Trunc || Opcode == Instruction::ZExt ||
- Opcode == Instruction::SExt)) ||
- (ST->hasMVEFloatOps() &&
- (Opcode == Instruction::FPExt || Opcode == Instruction::FPTrunc) &&
- IsLegalFPType(SrcTy) && IsLegalFPType(DstTy)))
- if (CCH == TTI::CastContextHint::Masked && DstTy.getSizeInBits() > 128)
- return 2 * DstTy.getVectorNumElements() * ST->getMVEVectorCostFactor();
-
- // The extend of other kinds of load is free
- if (CCH == TTI::CastContextHint::Normal ||
- CCH == TTI::CastContextHint::Masked) {
+ return AdjustCost(
+ BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I));
+
+ // Extending masked load/Truncating masked stores is expensive because we
+ // currently don't split them. This means that we'll likely end up
+ // loading/storing each element individually (hence the high cost).
+ if ((ST->hasMVEIntegerOps() &&
+ (Opcode == Instruction::Trunc || Opcode == Instruction::ZExt ||
+ Opcode == Instruction::SExt)) ||
+ (ST->hasMVEFloatOps() &&
+ (Opcode == Instruction::FPExt || Opcode == Instruction::FPTrunc) &&
+ IsLegalFPType(SrcTy) && IsLegalFPType(DstTy)))
+ if (CCH == TTI::CastContextHint::Masked && DstTy.getSizeInBits() > 128)
+ return 2 * DstTy.getVectorNumElements() * ST->getMVEVectorCostFactor();
+
+ // The extend of other kinds of load is free
+ if (CCH == TTI::CastContextHint::Normal ||
+ CCH == TTI::CastContextHint::Masked) {
static const TypeConversionCostTblEntry LoadConversionTbl[] = {
{ISD::SIGN_EXTEND, MVT::i32, MVT::i16, 0},
{ISD::ZERO_EXTEND, MVT::i32, MVT::i16, 0},
@@ -485,31 +485,31 @@ int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
return AdjustCost(Entry->Cost * ST->getMVEVectorCostFactor());
}
- // The truncate of a store is free. This is the mirror of extends above.
- static const TypeConversionCostTblEntry MVEStoreConversionTbl[] = {
+ // The truncate of a store is free. This is the mirror of extends above.
+ static const TypeConversionCostTblEntry MVEStoreConversionTbl[] = {
{ISD::TRUNCATE, MVT::v4i32, MVT::v4i16, 0},
{ISD::TRUNCATE, MVT::v4i32, MVT::v4i8, 0},
{ISD::TRUNCATE, MVT::v8i16, MVT::v8i8, 0},
{ISD::TRUNCATE, MVT::v8i32, MVT::v8i16, 1},
- {ISD::TRUNCATE, MVT::v8i32, MVT::v8i8, 1},
+ {ISD::TRUNCATE, MVT::v8i32, MVT::v8i8, 1},
{ISD::TRUNCATE, MVT::v16i32, MVT::v16i8, 3},
{ISD::TRUNCATE, MVT::v16i16, MVT::v16i8, 1},
};
if (SrcTy.isVector() && ST->hasMVEIntegerOps()) {
if (const auto *Entry =
- ConvertCostTableLookup(MVEStoreConversionTbl, ISD,
- SrcTy.getSimpleVT(), DstTy.getSimpleVT()))
+ ConvertCostTableLookup(MVEStoreConversionTbl, ISD,
+ SrcTy.getSimpleVT(), DstTy.getSimpleVT()))
return AdjustCost(Entry->Cost * ST->getMVEVectorCostFactor());
}
- static const TypeConversionCostTblEntry MVEFStoreConversionTbl[] = {
+ static const TypeConversionCostTblEntry MVEFStoreConversionTbl[] = {
{ISD::FP_ROUND, MVT::v4f32, MVT::v4f16, 1},
{ISD::FP_ROUND, MVT::v8f32, MVT::v8f16, 3},
};
if (SrcTy.isVector() && ST->hasMVEFloatOps()) {
if (const auto *Entry =
- ConvertCostTableLookup(MVEFStoreConversionTbl, ISD,
- SrcTy.getSimpleVT(), DstTy.getSimpleVT()))
+ ConvertCostTableLookup(MVEFStoreConversionTbl, ISD,
+ SrcTy.getSimpleVT(), DstTy.getSimpleVT()))
return AdjustCost(Entry->Cost * ST->getMVEVectorCostFactor());
}
}
@@ -746,24 +746,24 @@ int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
if (SrcTy.isFixedLengthVector())
Lanes = SrcTy.getVectorNumElements();
- if (IsLegalFPType(SrcTy) && IsLegalFPType(DstTy))
+ if (IsLegalFPType(SrcTy) && IsLegalFPType(DstTy))
return Lanes;
else
return Lanes * CallCost;
}
- if (ISD == ISD::TRUNCATE && ST->hasMVEIntegerOps() &&
- SrcTy.isFixedLengthVector()) {
- // Treat a truncate with larger than legal source (128bits for MVE) as
- // expensive, 2 instructions per lane.
- if ((SrcTy.getScalarType() == MVT::i8 ||
- SrcTy.getScalarType() == MVT::i16 ||
- SrcTy.getScalarType() == MVT::i32) &&
- SrcTy.getSizeInBits() > 128 &&
- SrcTy.getSizeInBits() > DstTy.getSizeInBits())
- return SrcTy.getVectorNumElements() * 2;
- }
-
+ if (ISD == ISD::TRUNCATE && ST->hasMVEIntegerOps() &&
+ SrcTy.isFixedLengthVector()) {
+ // Treat a truncate with larger than legal source (128bits for MVE) as
+ // expensive, 2 instructions per lane.
+ if ((SrcTy.getScalarType() == MVT::i8 ||
+ SrcTy.getScalarType() == MVT::i16 ||
+ SrcTy.getScalarType() == MVT::i32) &&
+ SrcTy.getSizeInBits() > 128 &&
+ SrcTy.getSizeInBits() > DstTy.getSizeInBits())
+ return SrcTy.getVectorNumElements() * 2;
+ }
+
// Scalar integer conversion costs.
static const TypeConversionCostTblEntry ARMIntegerConversionTbl[] = {
// i16 -> i64 requires two dependent operations.
@@ -787,7 +787,7 @@ int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
? ST->getMVEVectorCostFactor()
: 1;
return AdjustCost(
- BaseCost * BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I));
+ BaseCost * BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I));
}
int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
@@ -827,37 +827,37 @@ int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
}
int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
- CmpInst::Predicate VecPred,
+ CmpInst::Predicate VecPred,
TTI::TargetCostKind CostKind,
const Instruction *I) {
- int ISD = TLI->InstructionOpcodeToISD(Opcode);
-
- // Thumb scalar code size cost for select.
- if (CostKind == TTI::TCK_CodeSize && ISD == ISD::SELECT &&
- ST->isThumb() && !ValTy->isVectorTy()) {
- // Assume expensive structs.
- if (TLI->getValueType(DL, ValTy, true) == MVT::Other)
- return TTI::TCC_Expensive;
-
- // Select costs can vary because they:
- // - may require one or more conditional mov (including an IT),
- // - can't operate directly on immediates,
- // - require live flags, which we can't copy around easily.
- int Cost = TLI->getTypeLegalizationCost(DL, ValTy).first;
-
- // Possible IT instruction for Thumb2, or more for Thumb1.
- ++Cost;
-
- // i1 values may need rematerialising by using mov immediates and/or
- // flag setting instructions.
- if (ValTy->isIntegerTy(1))
- ++Cost;
-
- return Cost;
- }
-
+ int ISD = TLI->InstructionOpcodeToISD(Opcode);
+
+ // Thumb scalar code size cost for select.
+ if (CostKind == TTI::TCK_CodeSize && ISD == ISD::SELECT &&
+ ST->isThumb() && !ValTy->isVectorTy()) {
+ // Assume expensive structs.
+ if (TLI->getValueType(DL, ValTy, true) == MVT::Other)
+ return TTI::TCC_Expensive;
+
+ // Select costs can vary because they:
+ // - may require one or more conditional mov (including an IT),
+ // - can't operate directly on immediates,
+ // - require live flags, which we can't copy around easily.
+ int Cost = TLI->getTypeLegalizationCost(DL, ValTy).first;
+
+ // Possible IT instruction for Thumb2, or more for Thumb1.
+ ++Cost;
+
+ // i1 values may need rematerialising by using mov immediates and/or
+ // flag setting instructions.
+ if (ValTy->isIntegerTy(1))
+ ++Cost;
+
+ return Cost;
+ }
+
// On NEON a vector select gets lowered to vbsl.
- if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT && CondTy) {
+ if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT && CondTy) {
// Lowering of some vector selects is currently far from perfect.
static const TypeConversionCostTblEntry NEONVectorSelectTbl[] = {
{ ISD::SELECT, MVT::v4i1, MVT::v4i64, 4*4 + 1*2 + 1 },
@@ -878,15 +878,15 @@ int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
return LT.first;
}
- // Default to cheap (throughput/size of 1 instruction) but adjust throughput
- // for "multiple beats" potentially needed by MVE instructions.
- int BaseCost = 1;
- if (CostKind != TTI::TCK_CodeSize && ST->hasMVEIntegerOps() &&
- ValTy->isVectorTy())
- BaseCost = ST->getMVEVectorCostFactor();
-
- return BaseCost *
- BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
+ // Default to cheap (throughput/size of 1 instruction) but adjust throughput
+ // for "multiple beats" potentially needed by MVE instructions.
+ int BaseCost = 1;
+ if (CostKind != TTI::TCK_CodeSize && ST->hasMVEIntegerOps() &&
+ ValTy->isVectorTy())
+ BaseCost = ST->getMVEVectorCostFactor();
+
+ return BaseCost *
+ BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
}
int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
@@ -968,85 +968,85 @@ bool ARMTTIImpl::isLegalMaskedGather(Type *Ty, Align Alignment) {
(EltWidth == 16 && Alignment >= 2) || EltWidth == 8);
}
-/// Given a memcpy/memset/memmove instruction, return the number of memory
-/// operations performed, via querying findOptimalMemOpLowering. Returns -1 if a
-/// call is used.
-int ARMTTIImpl::getNumMemOps(const IntrinsicInst *I) const {
- MemOp MOp;
- unsigned DstAddrSpace = ~0u;
- unsigned SrcAddrSpace = ~0u;
- const Function *F = I->getParent()->getParent();
-
- if (const auto *MC = dyn_cast<MemTransferInst>(I)) {
- ConstantInt *C = dyn_cast<ConstantInt>(MC->getLength());
- // If 'size' is not a constant, a library call will be generated.
- if (!C)
- return -1;
-
- const unsigned Size = C->getValue().getZExtValue();
- const Align DstAlign = *MC->getDestAlign();
- const Align SrcAlign = *MC->getSourceAlign();
-
- MOp = MemOp::Copy(Size, /*DstAlignCanChange*/ false, DstAlign, SrcAlign,
- /*IsVolatile*/ false);
- DstAddrSpace = MC->getDestAddressSpace();
- SrcAddrSpace = MC->getSourceAddressSpace();
- }
- else if (const auto *MS = dyn_cast<MemSetInst>(I)) {
- ConstantInt *C = dyn_cast<ConstantInt>(MS->getLength());
- // If 'size' is not a constant, a library call will be generated.
- if (!C)
- return -1;
-
- const unsigned Size = C->getValue().getZExtValue();
- const Align DstAlign = *MS->getDestAlign();
-
- MOp = MemOp::Set(Size, /*DstAlignCanChange*/ false, DstAlign,
- /*IsZeroMemset*/ false, /*IsVolatile*/ false);
- DstAddrSpace = MS->getDestAddressSpace();
- }
- else
- llvm_unreachable("Expected a memcpy/move or memset!");
-
- unsigned Limit, Factor = 2;
- switch(I->getIntrinsicID()) {
- case Intrinsic::memcpy:
- Limit = TLI->getMaxStoresPerMemcpy(F->hasMinSize());
- break;
- case Intrinsic::memmove:
- Limit = TLI->getMaxStoresPerMemmove(F->hasMinSize());
- break;
- case Intrinsic::memset:
- Limit = TLI->getMaxStoresPerMemset(F->hasMinSize());
- Factor = 1;
- break;
- default:
- llvm_unreachable("Expected a memcpy/move or memset!");
- }
-
+/// Given a memcpy/memset/memmove instruction, return the number of memory
+/// operations performed, via querying findOptimalMemOpLowering. Returns -1 if a
+/// call is used.
+int ARMTTIImpl::getNumMemOps(const IntrinsicInst *I) const {
+ MemOp MOp;
+ unsigned DstAddrSpace = ~0u;
+ unsigned SrcAddrSpace = ~0u;
+ const Function *F = I->getParent()->getParent();
+
+ if (const auto *MC = dyn_cast<MemTransferInst>(I)) {
+ ConstantInt *C = dyn_cast<ConstantInt>(MC->getLength());
+ // If 'size' is not a constant, a library call will be generated.
+ if (!C)
+ return -1;
+
+ const unsigned Size = C->getValue().getZExtValue();
+ const Align DstAlign = *MC->getDestAlign();
+ const Align SrcAlign = *MC->getSourceAlign();
+
+ MOp = MemOp::Copy(Size, /*DstAlignCanChange*/ false, DstAlign, SrcAlign,
+ /*IsVolatile*/ false);
+ DstAddrSpace = MC->getDestAddressSpace();
+ SrcAddrSpace = MC->getSourceAddressSpace();
+ }
+ else if (const auto *MS = dyn_cast<MemSetInst>(I)) {
+ ConstantInt *C = dyn_cast<ConstantInt>(MS->getLength());
+ // If 'size' is not a constant, a library call will be generated.
+ if (!C)
+ return -1;
+
+ const unsigned Size = C->getValue().getZExtValue();
+ const Align DstAlign = *MS->getDestAlign();
+
+ MOp = MemOp::Set(Size, /*DstAlignCanChange*/ false, DstAlign,
+ /*IsZeroMemset*/ false, /*IsVolatile*/ false);
+ DstAddrSpace = MS->getDestAddressSpace();
+ }
+ else
+ llvm_unreachable("Expected a memcpy/move or memset!");
+
+ unsigned Limit, Factor = 2;
+ switch(I->getIntrinsicID()) {
+ case Intrinsic::memcpy:
+ Limit = TLI->getMaxStoresPerMemcpy(F->hasMinSize());
+ break;
+ case Intrinsic::memmove:
+ Limit = TLI->getMaxStoresPerMemmove(F->hasMinSize());
+ break;
+ case Intrinsic::memset:
+ Limit = TLI->getMaxStoresPerMemset(F->hasMinSize());
+ Factor = 1;
+ break;
+ default:
+ llvm_unreachable("Expected a memcpy/move or memset!");
+ }
+
// MemOps will be poplulated with a list of data types that needs to be
// loaded and stored. That's why we multiply the number of elements by 2 to
// get the cost for this memcpy.
- std::vector<EVT> MemOps;
+ std::vector<EVT> MemOps;
if (getTLI()->findOptimalMemOpLowering(
- MemOps, Limit, MOp, DstAddrSpace,
- SrcAddrSpace, F->getAttributes()))
- return MemOps.size() * Factor;
+ MemOps, Limit, MOp, DstAddrSpace,
+ SrcAddrSpace, F->getAttributes()))
+ return MemOps.size() * Factor;
// If we can't find an optimal memop lowering, return the default cost
- return -1;
-}
-
-int ARMTTIImpl::getMemcpyCost(const Instruction *I) {
- int NumOps = getNumMemOps(cast<IntrinsicInst>(I));
-
- // To model the cost of a library call, we assume 1 for the call, and
- // 3 for the argument setup.
- if (NumOps == -1)
- return 4;
- return NumOps;
+ return -1;
}
+int ARMTTIImpl::getMemcpyCost(const Instruction *I) {
+ int NumOps = getNumMemOps(cast<IntrinsicInst>(I));
+
+ // To model the cost of a library call, we assume 1 for the call, and
+ // 3 for the argument setup.
+ if (NumOps == -1)
+ return 4;
+ return NumOps;
+}
+
int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
int Index, VectorType *SubTp) {
if (ST->hasNEON()) {
@@ -1149,21 +1149,21 @@ int ARMTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
TTI::OperandValueProperties Opd2PropInfo,
ArrayRef<const Value *> Args,
const Instruction *CxtI) {
- int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
- if (ST->isThumb() && CostKind == TTI::TCK_CodeSize && Ty->isIntegerTy(1)) {
- // Make operations on i1 relatively expensive as this often involves
- // combining predicates. AND and XOR should be easier to handle with IT
- // blocks.
- switch (ISDOpcode) {
- default:
- break;
- case ISD::AND:
- case ISD::XOR:
- return 2;
- case ISD::OR:
- return 3;
- }
- }
+ int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
+ if (ST->isThumb() && CostKind == TTI::TCK_CodeSize && Ty->isIntegerTy(1)) {
+ // Make operations on i1 relatively expensive as this often involves
+ // combining predicates. AND and XOR should be easier to handle with IT
+ // blocks.
+ switch (ISDOpcode) {
+ default:
+ break;
+ case ISD::AND:
+ case ISD::XOR:
+ return 2;
+ case ISD::OR:
+ return 3;
+ }
+ }
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
@@ -1259,12 +1259,12 @@ int ARMTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
if (LooksLikeAFreeShift())
return 0;
- // Default to cheap (throughput/size of 1 instruction) but adjust throughput
- // for "multiple beats" potentially needed by MVE instructions.
- int BaseCost = 1;
- if (CostKind != TTI::TCK_CodeSize && ST->hasMVEIntegerOps() &&
- Ty->isVectorTy())
- BaseCost = ST->getMVEVectorCostFactor();
+ // Default to cheap (throughput/size of 1 instruction) but adjust throughput
+ // for "multiple beats" potentially needed by MVE instructions.
+ int BaseCost = 1;
+ if (CostKind != TTI::TCK_CodeSize && ST->hasMVEIntegerOps() &&
+ Ty->isVectorTy())
+ BaseCost = ST->getMVEVectorCostFactor();
// The rest of this mostly follows what is done in BaseT::getArithmeticInstrCost,
// without treating floats as more expensive that scalars or increasing the
@@ -1331,24 +1331,24 @@ int ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
CostKind, I);
}
-unsigned ARMTTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
- Align Alignment,
- unsigned AddressSpace,
- TTI::TargetCostKind CostKind) {
- if (ST->hasMVEIntegerOps()) {
- if (Opcode == Instruction::Load && isLegalMaskedLoad(Src, Alignment))
- return ST->getMVEVectorCostFactor();
- if (Opcode == Instruction::Store && isLegalMaskedStore(Src, Alignment))
- return ST->getMVEVectorCostFactor();
- }
- if (!isa<FixedVectorType>(Src))
- return BaseT::getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
- CostKind);
- // Scalar cost, which is currently very high due to the efficiency of the
- // generated code.
- return cast<FixedVectorType>(Src)->getNumElements() * 8;
-}
-
+unsigned ARMTTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
+ Align Alignment,
+ unsigned AddressSpace,
+ TTI::TargetCostKind CostKind) {
+ if (ST->hasMVEIntegerOps()) {
+ if (Opcode == Instruction::Load && isLegalMaskedLoad(Src, Alignment))
+ return ST->getMVEVectorCostFactor();
+ if (Opcode == Instruction::Store && isLegalMaskedStore(Src, Alignment))
+ return ST->getMVEVectorCostFactor();
+ }
+ if (!isa<FixedVectorType>(Src))
+ return BaseT::getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
+ CostKind);
+ // Scalar cost, which is currently very high due to the efficiency of the
+ // generated code.
+ return cast<FixedVectorType>(Src)->getNumElements() * 8;
+}
+
int ARMTTIImpl::getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
@@ -1379,8 +1379,8 @@ int ARMTTIImpl::getInterleavedMemoryOpCost(
// promoted differently). The cost of 2 here is then a load and vrev or
// vmovn.
if (ST->hasMVEIntegerOps() && Factor == 2 && NumElts / Factor > 2 &&
- VecTy->isIntOrIntVectorTy() &&
- DL.getTypeSizeInBits(SubVecTy).getFixedSize() <= 64)
+ VecTy->isIntOrIntVectorTy() &&
+ DL.getTypeSizeInBits(SubVecTy).getFixedSize() <= 64)
return 2 * BaseCost;
}
@@ -1413,13 +1413,13 @@ unsigned ARMTTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
// multiplied by the number of elements being loaded. This is possibly very
// conservative, but even so we still end up vectorising loops because the
// cost per iteration for many loops is lower than for scalar loops.
- unsigned VectorCost = NumElems * LT.first * ST->getMVEVectorCostFactor();
+ unsigned VectorCost = NumElems * LT.first * ST->getMVEVectorCostFactor();
// The scalarization cost should be a lot higher. We use the number of vector
// elements plus the scalarization overhead.
unsigned ScalarCost =
NumElems * LT.first + BaseT::getScalarizationOverhead(VTy, {});
- if (EltSize < 8 || Alignment < EltSize / 8)
+ if (EltSize < 8 || Alignment < EltSize / 8)
return ScalarCost;
unsigned ExtSize = EltSize;
@@ -1488,92 +1488,92 @@ unsigned ARMTTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
return ScalarCost;
}
-int ARMTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
- bool IsPairwiseForm,
- TTI::TargetCostKind CostKind) {
- EVT ValVT = TLI->getValueType(DL, ValTy);
- int ISD = TLI->InstructionOpcodeToISD(Opcode);
- if (!ST->hasMVEIntegerOps() || !ValVT.isSimple() || ISD != ISD::ADD)
- return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm,
- CostKind);
-
- std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
-
- static const CostTblEntry CostTblAdd[]{
- {ISD::ADD, MVT::v16i8, 1},
- {ISD::ADD, MVT::v8i16, 1},
- {ISD::ADD, MVT::v4i32, 1},
- };
- if (const auto *Entry = CostTableLookup(CostTblAdd, ISD, LT.second))
- return Entry->Cost * ST->getMVEVectorCostFactor() * LT.first;
-
- return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm,
- CostKind);
-}
-
-InstructionCost
-ARMTTIImpl::getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned,
- Type *ResTy, VectorType *ValTy,
- TTI::TargetCostKind CostKind) {
- EVT ValVT = TLI->getValueType(DL, ValTy);
- EVT ResVT = TLI->getValueType(DL, ResTy);
- if (ST->hasMVEIntegerOps() && ValVT.isSimple() && ResVT.isSimple()) {
- std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
- if ((LT.second == MVT::v16i8 && ResVT.getSizeInBits() <= 32) ||
- (LT.second == MVT::v8i16 &&
- ResVT.getSizeInBits() <= (IsMLA ? 64 : 32)) ||
- (LT.second == MVT::v4i32 && ResVT.getSizeInBits() <= 64))
- return ST->getMVEVectorCostFactor() * LT.first;
- }
-
- return BaseT::getExtendedAddReductionCost(IsMLA, IsUnsigned, ResTy, ValTy,
- CostKind);
-}
-
-int ARMTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
- TTI::TargetCostKind CostKind) {
- switch (ICA.getID()) {
- case Intrinsic::get_active_lane_mask:
- // Currently we make a somewhat optimistic assumption that
- // active_lane_mask's are always free. In reality it may be freely folded
- // into a tail predicated loop, expanded into a VCPT or expanded into a lot
- // of add/icmp code. We may need to improve this in the future, but being
- // able to detect if it is free or not involves looking at a lot of other
- // code. We currently assume that the vectorizer inserted these, and knew
- // what it was doing in adding one.
- if (ST->hasMVEIntegerOps())
- return 0;
- break;
- case Intrinsic::sadd_sat:
- case Intrinsic::ssub_sat:
- case Intrinsic::uadd_sat:
- case Intrinsic::usub_sat: {
- if (!ST->hasMVEIntegerOps())
- break;
- // Get the Return type, either directly of from ICA.ReturnType and ICA.VF.
- Type *VT = ICA.getReturnType();
- if (!VT->isVectorTy() && !ICA.getVectorFactor().isScalar())
- VT = VectorType::get(VT, ICA.getVectorFactor());
-
- std::pair<int, MVT> LT =
- TLI->getTypeLegalizationCost(DL, VT);
- if (LT.second == MVT::v4i32 || LT.second == MVT::v8i16 ||
- LT.second == MVT::v16i8) {
- // This is a base cost of 1 for the vadd, plus 3 extract shifts if we
- // need to extend the type, as it uses shr(qadd(shl, shl)).
- unsigned Instrs = LT.second.getScalarSizeInBits() ==
- ICA.getReturnType()->getScalarSizeInBits()
- ? 1
- : 4;
- return LT.first * ST->getMVEVectorCostFactor() * Instrs;
- }
- break;
- }
- }
-
- return BaseT::getIntrinsicInstrCost(ICA, CostKind);
-}
-
+int ARMTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
+ bool IsPairwiseForm,
+ TTI::TargetCostKind CostKind) {
+ EVT ValVT = TLI->getValueType(DL, ValTy);
+ int ISD = TLI->InstructionOpcodeToISD(Opcode);
+ if (!ST->hasMVEIntegerOps() || !ValVT.isSimple() || ISD != ISD::ADD)
+ return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm,
+ CostKind);
+
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
+
+ static const CostTblEntry CostTblAdd[]{
+ {ISD::ADD, MVT::v16i8, 1},
+ {ISD::ADD, MVT::v8i16, 1},
+ {ISD::ADD, MVT::v4i32, 1},
+ };
+ if (const auto *Entry = CostTableLookup(CostTblAdd, ISD, LT.second))
+ return Entry->Cost * ST->getMVEVectorCostFactor() * LT.first;
+
+ return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm,
+ CostKind);
+}
+
+InstructionCost
+ARMTTIImpl::getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned,
+ Type *ResTy, VectorType *ValTy,
+ TTI::TargetCostKind CostKind) {
+ EVT ValVT = TLI->getValueType(DL, ValTy);
+ EVT ResVT = TLI->getValueType(DL, ResTy);
+ if (ST->hasMVEIntegerOps() && ValVT.isSimple() && ResVT.isSimple()) {
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
+ if ((LT.second == MVT::v16i8 && ResVT.getSizeInBits() <= 32) ||
+ (LT.second == MVT::v8i16 &&
+ ResVT.getSizeInBits() <= (IsMLA ? 64 : 32)) ||
+ (LT.second == MVT::v4i32 && ResVT.getSizeInBits() <= 64))
+ return ST->getMVEVectorCostFactor() * LT.first;
+ }
+
+ return BaseT::getExtendedAddReductionCost(IsMLA, IsUnsigned, ResTy, ValTy,
+ CostKind);
+}
+
+int ARMTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
+ TTI::TargetCostKind CostKind) {
+ switch (ICA.getID()) {
+ case Intrinsic::get_active_lane_mask:
+ // Currently we make a somewhat optimistic assumption that
+ // active_lane_mask's are always free. In reality it may be freely folded
+ // into a tail predicated loop, expanded into a VCPT or expanded into a lot
+ // of add/icmp code. We may need to improve this in the future, but being
+ // able to detect if it is free or not involves looking at a lot of other
+ // code. We currently assume that the vectorizer inserted these, and knew
+ // what it was doing in adding one.
+ if (ST->hasMVEIntegerOps())
+ return 0;
+ break;
+ case Intrinsic::sadd_sat:
+ case Intrinsic::ssub_sat:
+ case Intrinsic::uadd_sat:
+ case Intrinsic::usub_sat: {
+ if (!ST->hasMVEIntegerOps())
+ break;
+ // Get the Return type, either directly of from ICA.ReturnType and ICA.VF.
+ Type *VT = ICA.getReturnType();
+ if (!VT->isVectorTy() && !ICA.getVectorFactor().isScalar())
+ VT = VectorType::get(VT, ICA.getVectorFactor());
+
+ std::pair<int, MVT> LT =
+ TLI->getTypeLegalizationCost(DL, VT);
+ if (LT.second == MVT::v4i32 || LT.second == MVT::v8i16 ||
+ LT.second == MVT::v16i8) {
+ // This is a base cost of 1 for the vadd, plus 3 extract shifts if we
+ // need to extend the type, as it uses shr(qadd(shl, shl)).
+ unsigned Instrs = LT.second.getScalarSizeInBits() ==
+ ICA.getReturnType()->getScalarSizeInBits()
+ ? 1
+ : 4;
+ return LT.first * ST->getMVEVectorCostFactor() * Instrs;
+ }
+ break;
+ }
+ }
+
+ return BaseT::getIntrinsicInstrCost(ICA, CostKind);
+}
+
bool ARMTTIImpl::isLoweredToCall(const Function *F) {
if (!F->isIntrinsic())
BaseT::isLoweredToCall(F);
@@ -1635,93 +1635,93 @@ bool ARMTTIImpl::isLoweredToCall(const Function *F) {
return BaseT::isLoweredToCall(F);
}
-bool ARMTTIImpl::maybeLoweredToCall(Instruction &I) {
- unsigned ISD = TLI->InstructionOpcodeToISD(I.getOpcode());
- EVT VT = TLI->getValueType(DL, I.getType(), true);
- if (TLI->getOperationAction(ISD, VT) == TargetLowering::LibCall)
- return true;
-
- // Check if an intrinsic will be lowered to a call and assume that any
- // other CallInst will generate a bl.
- if (auto *Call = dyn_cast<CallInst>(&I)) {
- if (auto *II = dyn_cast<IntrinsicInst>(Call)) {
- switch(II->getIntrinsicID()) {
- case Intrinsic::memcpy:
- case Intrinsic::memset:
- case Intrinsic::memmove:
- return getNumMemOps(II) == -1;
- default:
- if (const Function *F = Call->getCalledFunction())
- return isLoweredToCall(F);
- }
- }
- return true;
- }
-
- // FPv5 provides conversions between integer, double-precision,
- // single-precision, and half-precision formats.
- switch (I.getOpcode()) {
- default:
- break;
- case Instruction::FPToSI:
- case Instruction::FPToUI:
- case Instruction::SIToFP:
- case Instruction::UIToFP:
- case Instruction::FPTrunc:
- case Instruction::FPExt:
- return !ST->hasFPARMv8Base();
- }
-
- // FIXME: Unfortunately the approach of checking the Operation Action does
- // not catch all cases of Legalization that use library calls. Our
- // Legalization step categorizes some transformations into library calls as
- // Custom, Expand or even Legal when doing type legalization. So for now
- // we have to special case for instance the SDIV of 64bit integers and the
- // use of floating point emulation.
- if (VT.isInteger() && VT.getSizeInBits() >= 64) {
- switch (ISD) {
- default:
- break;
- case ISD::SDIV:
- case ISD::UDIV:
- case ISD::SREM:
- case ISD::UREM:
- case ISD::SDIVREM:
- case ISD::UDIVREM:
- return true;
- }
- }
-
- // Assume all other non-float operations are supported.
- if (!VT.isFloatingPoint())
- return false;
-
- // We'll need a library call to handle most floats when using soft.
- if (TLI->useSoftFloat()) {
- switch (I.getOpcode()) {
- default:
- return true;
- case Instruction::Alloca:
- case Instruction::Load:
- case Instruction::Store:
- case Instruction::Select:
- case Instruction::PHI:
- return false;
- }
- }
-
- // We'll need a libcall to perform double precision operations on a single
- // precision only FPU.
- if (I.getType()->isDoubleTy() && !ST->hasFP64())
- return true;
-
- // Likewise for half precision arithmetic.
- if (I.getType()->isHalfTy() && !ST->hasFullFP16())
- return true;
-
- return false;
-}
-
+bool ARMTTIImpl::maybeLoweredToCall(Instruction &I) {
+ unsigned ISD = TLI->InstructionOpcodeToISD(I.getOpcode());
+ EVT VT = TLI->getValueType(DL, I.getType(), true);
+ if (TLI->getOperationAction(ISD, VT) == TargetLowering::LibCall)
+ return true;
+
+ // Check if an intrinsic will be lowered to a call and assume that any
+ // other CallInst will generate a bl.
+ if (auto *Call = dyn_cast<CallInst>(&I)) {
+ if (auto *II = dyn_cast<IntrinsicInst>(Call)) {
+ switch(II->getIntrinsicID()) {
+ case Intrinsic::memcpy:
+ case Intrinsic::memset:
+ case Intrinsic::memmove:
+ return getNumMemOps(II) == -1;
+ default:
+ if (const Function *F = Call->getCalledFunction())
+ return isLoweredToCall(F);
+ }
+ }
+ return true;
+ }
+
+ // FPv5 provides conversions between integer, double-precision,
+ // single-precision, and half-precision formats.
+ switch (I.getOpcode()) {
+ default:
+ break;
+ case Instruction::FPToSI:
+ case Instruction::FPToUI:
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ return !ST->hasFPARMv8Base();
+ }
+
+ // FIXME: Unfortunately the approach of checking the Operation Action does
+ // not catch all cases of Legalization that use library calls. Our
+ // Legalization step categorizes some transformations into library calls as
+ // Custom, Expand or even Legal when doing type legalization. So for now
+ // we have to special case for instance the SDIV of 64bit integers and the
+ // use of floating point emulation.
+ if (VT.isInteger() && VT.getSizeInBits() >= 64) {
+ switch (ISD) {
+ default:
+ break;
+ case ISD::SDIV:
+ case ISD::UDIV:
+ case ISD::SREM:
+ case ISD::UREM:
+ case ISD::SDIVREM:
+ case ISD::UDIVREM:
+ return true;
+ }
+ }
+
+ // Assume all other non-float operations are supported.
+ if (!VT.isFloatingPoint())
+ return false;
+
+ // We'll need a library call to handle most floats when using soft.
+ if (TLI->useSoftFloat()) {
+ switch (I.getOpcode()) {
+ default:
+ return true;
+ case Instruction::Alloca:
+ case Instruction::Load:
+ case Instruction::Store:
+ case Instruction::Select:
+ case Instruction::PHI:
+ return false;
+ }
+ }
+
+ // We'll need a libcall to perform double precision operations on a single
+ // precision only FPU.
+ if (I.getType()->isDoubleTy() && !ST->hasFP64())
+ return true;
+
+ // Likewise for half precision arithmetic.
+ if (I.getType()->isHalfTy() && !ST->hasFullFP16())
+ return true;
+
+ return false;
+}
+
bool ARMTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
AssumptionCache &AC,
TargetLibraryInfo *LibInfo,
@@ -1762,7 +1762,7 @@ bool ARMTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
switch (Call->getIntrinsicID()) {
default:
break;
- case Intrinsic::start_loop_iterations:
+ case Intrinsic::start_loop_iterations:
case Intrinsic::test_set_loop_iterations:
case Intrinsic::loop_decrement:
case Intrinsic::loop_decrement_reg:
@@ -1773,24 +1773,24 @@ bool ARMTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
};
// Scan the instructions to see if there's any that we know will turn into a
- // call or if this loop is already a low-overhead loop or will become a tail
- // predicated loop.
- bool IsTailPredLoop = false;
+ // call or if this loop is already a low-overhead loop or will become a tail
+ // predicated loop.
+ bool IsTailPredLoop = false;
auto ScanLoop = [&](Loop *L) {
for (auto *BB : L->getBlocks()) {
for (auto &I : *BB) {
- if (maybeLoweredToCall(I) || IsHardwareLoopIntrinsic(I) ||
- isa<InlineAsm>(I)) {
+ if (maybeLoweredToCall(I) || IsHardwareLoopIntrinsic(I) ||
+ isa<InlineAsm>(I)) {
LLVM_DEBUG(dbgs() << "ARMHWLoops: Bad instruction: " << I << "\n");
return false;
}
- if (auto *II = dyn_cast<IntrinsicInst>(&I))
- IsTailPredLoop |=
- II->getIntrinsicID() == Intrinsic::get_active_lane_mask ||
- II->getIntrinsicID() == Intrinsic::arm_mve_vctp8 ||
- II->getIntrinsicID() == Intrinsic::arm_mve_vctp16 ||
- II->getIntrinsicID() == Intrinsic::arm_mve_vctp32 ||
- II->getIntrinsicID() == Intrinsic::arm_mve_vctp64;
+ if (auto *II = dyn_cast<IntrinsicInst>(&I))
+ IsTailPredLoop |=
+ II->getIntrinsicID() == Intrinsic::get_active_lane_mask ||
+ II->getIntrinsicID() == Intrinsic::arm_mve_vctp8 ||
+ II->getIntrinsicID() == Intrinsic::arm_mve_vctp16 ||
+ II->getIntrinsicID() == Intrinsic::arm_mve_vctp32 ||
+ II->getIntrinsicID() == Intrinsic::arm_mve_vctp64;
}
}
return true;
@@ -1811,7 +1811,7 @@ bool ARMTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
LLVMContext &C = L->getHeader()->getContext();
HWLoopInfo.CounterInReg = true;
HWLoopInfo.IsNestingLegal = false;
- HWLoopInfo.PerformEntryTest = AllowWLSLoops && !IsTailPredLoop;
+ HWLoopInfo.PerformEntryTest = AllowWLSLoops && !IsTailPredLoop;
HWLoopInfo.CountType = Type::getInt32Ty(C);
HWLoopInfo.LoopDecrement = ConstantInt::get(HWLoopInfo.CountType, 1);
return true;
@@ -1859,28 +1859,28 @@ static bool canTailPredicateLoop(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
const LoopAccessInfo *LAI) {
LLVM_DEBUG(dbgs() << "Tail-predication: checking allowed instructions\n");
- // If there are live-out values, it is probably a reduction. We can predicate
- // most reduction operations freely under MVE using a combination of
- // prefer-predicated-reduction-select and inloop reductions. We limit this to
- // floating point and integer reductions, but don't check for operators
- // specifically here. If the value ends up not being a reduction (and so the
- // vectorizer cannot tailfold the loop), we should fall back to standard
- // vectorization automatically.
+ // If there are live-out values, it is probably a reduction. We can predicate
+ // most reduction operations freely under MVE using a combination of
+ // prefer-predicated-reduction-select and inloop reductions. We limit this to
+ // floating point and integer reductions, but don't check for operators
+ // specifically here. If the value ends up not being a reduction (and so the
+ // vectorizer cannot tailfold the loop), we should fall back to standard
+ // vectorization automatically.
SmallVector< Instruction *, 8 > LiveOuts;
LiveOuts = llvm::findDefsUsedOutsideOfLoop(L);
- bool ReductionsDisabled =
+ bool ReductionsDisabled =
EnableTailPredication == TailPredication::EnabledNoReductions ||
EnableTailPredication == TailPredication::ForceEnabledNoReductions;
for (auto *I : LiveOuts) {
- if (!I->getType()->isIntegerTy() && !I->getType()->isFloatTy() &&
- !I->getType()->isHalfTy()) {
- LLVM_DEBUG(dbgs() << "Don't tail-predicate loop with non-integer/float "
+ if (!I->getType()->isIntegerTy() && !I->getType()->isFloatTy() &&
+ !I->getType()->isHalfTy()) {
+ LLVM_DEBUG(dbgs() << "Don't tail-predicate loop with non-integer/float "
"live-out value\n");
return false;
}
- if (ReductionsDisabled) {
- LLVM_DEBUG(dbgs() << "Reductions not enabled\n");
+ if (ReductionsDisabled) {
+ LLVM_DEBUG(dbgs() << "Reductions not enabled\n");
return false;
}
}
@@ -1910,35 +1910,35 @@ static bool canTailPredicateLoop(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
if (isa<StoreInst>(I) || isa<LoadInst>(I)) {
Value *Ptr = isa<LoadInst>(I) ? I.getOperand(0) : I.getOperand(1);
int64_t NextStride = getPtrStride(PSE, Ptr, L);
- if (NextStride == 1) {
- // TODO: for now only allow consecutive strides of 1. We could support
- // other strides as long as it is uniform, but let's keep it simple
- // for now.
+ if (NextStride == 1) {
+ // TODO: for now only allow consecutive strides of 1. We could support
+ // other strides as long as it is uniform, but let's keep it simple
+ // for now.
continue;
- } else if (NextStride == -1 ||
- (NextStride == 2 && MVEMaxSupportedInterleaveFactor >= 2) ||
- (NextStride == 4 && MVEMaxSupportedInterleaveFactor >= 4)) {
- LLVM_DEBUG(dbgs()
- << "Consecutive strides of 2 found, vld2/vstr2 can't "
- "be tail-predicated\n.");
+ } else if (NextStride == -1 ||
+ (NextStride == 2 && MVEMaxSupportedInterleaveFactor >= 2) ||
+ (NextStride == 4 && MVEMaxSupportedInterleaveFactor >= 4)) {
+ LLVM_DEBUG(dbgs()
+ << "Consecutive strides of 2 found, vld2/vstr2 can't "
+ "be tail-predicated\n.");
return false;
- // TODO: don't tail predicate if there is a reversed load?
- } else if (EnableMaskedGatherScatters) {
- // Gather/scatters do allow loading from arbitrary strides, at
- // least if they are loop invariant.
- // TODO: Loop variant strides should in theory work, too, but
- // this requires further testing.
- const SCEV *PtrScev =
- replaceSymbolicStrideSCEV(PSE, llvm::ValueToValueMap(), Ptr);
- if (auto AR = dyn_cast<SCEVAddRecExpr>(PtrScev)) {
- const SCEV *Step = AR->getStepRecurrence(*PSE.getSE());
- if (PSE.getSE()->isLoopInvariant(Step, L))
- continue;
- }
+ // TODO: don't tail predicate if there is a reversed load?
+ } else if (EnableMaskedGatherScatters) {
+ // Gather/scatters do allow loading from arbitrary strides, at
+ // least if they are loop invariant.
+ // TODO: Loop variant strides should in theory work, too, but
+ // this requires further testing.
+ const SCEV *PtrScev =
+ replaceSymbolicStrideSCEV(PSE, llvm::ValueToValueMap(), Ptr);
+ if (auto AR = dyn_cast<SCEVAddRecExpr>(PtrScev)) {
+ const SCEV *Step = AR->getStepRecurrence(*PSE.getSE());
+ if (PSE.getSE()->isLoopInvariant(Step, L))
+ continue;
+ }
}
- LLVM_DEBUG(dbgs() << "Bad stride found, can't "
- "tail-predicate\n.");
- return false;
+ LLVM_DEBUG(dbgs() << "Bad stride found, can't "
+ "tail-predicate\n.");
+ return false;
}
}
}
@@ -1971,7 +1971,7 @@ bool ARMTTIImpl::preferPredicateOverEpilogue(Loop *L, LoopInfo *LI,
return false;
}
- assert(L->isInnermost() && "preferPredicateOverEpilogue: inner-loop expected");
+ assert(L->isInnermost() && "preferPredicateOverEpilogue: inner-loop expected");
HardwareLoopInfo HWLoopInfo(L);
if (!HWLoopInfo.canAnalyze(*LI)) {
@@ -2039,10 +2039,10 @@ void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
if (ST->hasBranchPredictor() && L->getNumBlocks() > 4)
return;
- // Don't unroll vectorized loops, including the remainder loop
- if (getBooleanLoopAttribute(L, "llvm.loop.isvectorized"))
- return;
-
+ // Don't unroll vectorized loops, including the remainder loop
+ if (getBooleanLoopAttribute(L, "llvm.loop.isvectorized"))
+ return;
+
// Scan the loop: don't unroll loops with calls as this could prevent
// inlining.
unsigned Cost = 0;
@@ -2061,9 +2061,9 @@ void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
return;
}
- SmallVector<const Value*, 4> Operands(I.operand_values());
- Cost +=
- getUserCost(&I, Operands, TargetTransformInfo::TCK_SizeAndLatency);
+ SmallVector<const Value*, 4> Operands(I.operand_values());
+ Cost +=
+ getUserCost(&I, Operands, TargetTransformInfo::TCK_SizeAndLatency);
}
}
@@ -2092,24 +2092,24 @@ bool ARMTTIImpl::useReductionIntrinsic(unsigned Opcode, Type *Ty,
TTI::ReductionFlags Flags) const {
return ST->hasMVEIntegerOps();
}
-
-bool ARMTTIImpl::preferInLoopReduction(unsigned Opcode, Type *Ty,
- TTI::ReductionFlags Flags) const {
- if (!ST->hasMVEIntegerOps())
- return false;
-
- unsigned ScalarBits = Ty->getScalarSizeInBits();
- switch (Opcode) {
- case Instruction::Add:
- return ScalarBits <= 64;
- default:
- return false;
- }
-}
-
-bool ARMTTIImpl::preferPredicatedReductionSelect(
- unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const {
- if (!ST->hasMVEIntegerOps())
- return false;
- return true;
-}
+
+bool ARMTTIImpl::preferInLoopReduction(unsigned Opcode, Type *Ty,
+ TTI::ReductionFlags Flags) const {
+ if (!ST->hasMVEIntegerOps())
+ return false;
+
+ unsigned ScalarBits = Ty->getScalarSizeInBits();
+ switch (Opcode) {
+ case Instruction::Add:
+ return ScalarBits <= 64;
+ default:
+ return false;
+ }
+}
+
+bool ARMTTIImpl::preferPredicatedReductionSelect(
+ unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const {
+ if (!ST->hasMVEIntegerOps())
+ return false;
+ return true;
+}
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMTargetTransformInfo.h b/contrib/libs/llvm12/lib/Target/ARM/ARMTargetTransformInfo.h
index 7f045080e3..257e325a28 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -113,9 +113,9 @@ public:
return !ST->isTargetDarwin() && !ST->hasMVEFloatOps();
}
- Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
- IntrinsicInst &II) const;
-
+ Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
+ IntrinsicInst &II) const;
+
/// \name Scalar TTI Implementations
/// @{
@@ -126,8 +126,8 @@ public:
int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind);
int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
- Type *Ty, TTI::TargetCostKind CostKind,
- Instruction *Inst = nullptr);
+ Type *Ty, TTI::TargetCostKind CostKind,
+ Instruction *Inst = nullptr);
/// @}
@@ -181,31 +181,31 @@ public:
int getMemcpyCost(const Instruction *I);
- int getNumMemOps(const IntrinsicInst *I) const;
-
+ int getNumMemOps(const IntrinsicInst *I) const;
+
int getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index,
VectorType *SubTp);
bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
TTI::ReductionFlags Flags) const;
- bool preferInLoopReduction(unsigned Opcode, Type *Ty,
- TTI::ReductionFlags Flags) const;
-
- bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
- TTI::ReductionFlags Flags) const;
+ bool preferInLoopReduction(unsigned Opcode, Type *Ty,
+ TTI::ReductionFlags Flags) const;
- bool shouldExpandReduction(const IntrinsicInst *II) const { return false; }
+ bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
+ TTI::ReductionFlags Flags) const;
- int getCFInstrCost(unsigned Opcode,
- TTI::TargetCostKind CostKind);
+ bool shouldExpandReduction(const IntrinsicInst *II) const { return false; }
+ int getCFInstrCost(unsigned Opcode,
+ TTI::TargetCostKind CostKind);
+
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
- TTI::CastContextHint CCH, TTI::TargetCostKind CostKind,
+ TTI::CastContextHint CCH, TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
- CmpInst::Predicate VecPred,
+ CmpInst::Predicate VecPred,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
@@ -229,10 +229,10 @@ public:
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
- unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
- unsigned AddressSpace,
- TTI::TargetCostKind CostKind);
-
+ unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
+ unsigned AddressSpace,
+ TTI::TargetCostKind CostKind);
+
int getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
Align Alignment, unsigned AddressSpace,
@@ -244,17 +244,17 @@ public:
Align Alignment, TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
- int getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
- bool IsPairwiseForm,
- TTI::TargetCostKind CostKind);
- InstructionCost getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned,
- Type *ResTy, VectorType *ValTy,
- TTI::TargetCostKind CostKind);
-
- int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
- TTI::TargetCostKind CostKind);
-
- bool maybeLoweredToCall(Instruction &I);
+ int getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
+ bool IsPairwiseForm,
+ TTI::TargetCostKind CostKind);
+ InstructionCost getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned,
+ Type *ResTy, VectorType *ValTy,
+ TTI::TargetCostKind CostKind);
+
+ int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
+ TTI::TargetCostKind CostKind);
+
+ bool maybeLoweredToCall(Instruction &I);
bool isLoweredToCall(const Function *F);
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
AssumptionCache &AC,
diff --git a/contrib/libs/llvm12/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/contrib/libs/llvm12/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 52577d75dd..b65cfc3811 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -6239,9 +6239,9 @@ bool ARMAsmParser::parsePrefix(ARMMCExpr::VariantKind &RefKind) {
StringRef IDVal = Parser.getTok().getIdentifier();
const auto &Prefix =
- llvm::find_if(PrefixEntries, [&IDVal](const PrefixEntry &PE) {
- return PE.Spelling == IDVal;
- });
+ llvm::find_if(PrefixEntries, [&IDVal](const PrefixEntry &PE) {
+ return PE.Spelling == IDVal;
+ });
if (Prefix == std::end(PrefixEntries)) {
Error(Parser.getTok().getLoc(), "unexpected prefix in operand");
return true;
@@ -10307,14 +10307,14 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
!HasWideQualifier) {
// The operands aren't the same for tMOV[S]r... (no cc_out)
MCInst TmpInst;
- unsigned Op = Inst.getOperand(4).getReg() ? ARM::tMOVSr : ARM::tMOVr;
- TmpInst.setOpcode(Op);
+ unsigned Op = Inst.getOperand(4).getReg() ? ARM::tMOVSr : ARM::tMOVr;
+ TmpInst.setOpcode(Op);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
- if (Op == ARM::tMOVr) {
- TmpInst.addOperand(Inst.getOperand(2));
- TmpInst.addOperand(Inst.getOperand(3));
- }
+ if (Op == ARM::tMOVr) {
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(Inst.getOperand(3));
+ }
Inst = TmpInst;
return true;
}
@@ -10599,12 +10599,12 @@ unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
(isThumb() && !hasV8Ops()))
return Match_InvalidOperand;
break;
- case ARM::t2TBB:
- case ARM::t2TBH:
- // Rn = sp is only allowed with ARMv8-A
- if (!hasV8Ops() && (Inst.getOperand(0).getReg() == ARM::SP))
- return Match_RequiresV8;
- break;
+ case ARM::t2TBB:
+ case ARM::t2TBH:
+ // Rn = sp is only allowed with ARMv8-A
+ if (!hasV8Ops() && (Inst.getOperand(0).getReg() == ARM::SP))
+ return Match_RequiresV8;
+ break;
default:
break;
}
@@ -11135,8 +11135,8 @@ bool ARMAsmParser::parseDirectiveArch(SMLoc L) {
bool WasThumb = isThumb();
Triple T;
MCSubtargetInfo &STI = copySTI();
- STI.setDefaultFeatures("", /*TuneCPU*/ "",
- ("+" + ARM::getArchName(ID)).str());
+ STI.setDefaultFeatures("", /*TuneCPU*/ "",
+ ("+" + ARM::getArchName(ID)).str());
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
FixModeAfterArchChange(WasThumb, L);
@@ -11249,7 +11249,7 @@ bool ARMAsmParser::parseDirectiveCPU(SMLoc L) {
bool WasThumb = isThumb();
MCSubtargetInfo &STI = copySTI();
- STI.setDefaultFeatures(CPU, /*TuneCPU*/ CPU, "");
+ STI.setDefaultFeatures(CPU, /*TuneCPU*/ CPU, "");
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
FixModeAfterArchChange(WasThumb, L);
diff --git a/contrib/libs/llvm12/lib/Target/ARM/AsmParser/ya.make b/contrib/libs/llvm12/lib/Target/ARM/AsmParser/ya.make
index 572d301570..ed9aa8099d 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/AsmParser/ya.make
+++ b/contrib/libs/llvm12/lib/Target/ARM/AsmParser/ya.make
@@ -12,20 +12,20 @@ LICENSE(Apache-2.0 WITH LLVM-exception)
LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
PEERDIR(
- contrib/libs/llvm12
- contrib/libs/llvm12/include
- contrib/libs/llvm12/lib/MC
- contrib/libs/llvm12/lib/MC/MCParser
- contrib/libs/llvm12/lib/Support
- contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc
- contrib/libs/llvm12/lib/Target/ARM/TargetInfo
- contrib/libs/llvm12/lib/Target/ARM/Utils
+ contrib/libs/llvm12
+ contrib/libs/llvm12/include
+ contrib/libs/llvm12/lib/MC
+ contrib/libs/llvm12/lib/MC/MCParser
+ contrib/libs/llvm12/lib/Support
+ contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc
+ contrib/libs/llvm12/lib/Target/ARM/TargetInfo
+ contrib/libs/llvm12/lib/Target/ARM/Utils
)
ADDINCL(
- ${ARCADIA_BUILD_ROOT}/contrib/libs/llvm12/lib/Target/ARM
- contrib/libs/llvm12/lib/Target/ARM
- contrib/libs/llvm12/lib/Target/ARM/AsmParser
+ ${ARCADIA_BUILD_ROOT}/contrib/libs/llvm12/lib/Target/ARM
+ contrib/libs/llvm12/lib/Target/ARM
+ contrib/libs/llvm12/lib/Target/ARM/AsmParser
)
NO_COMPILER_WARNINGS()
diff --git a/contrib/libs/llvm12/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/contrib/libs/llvm12/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 8ea323a9ce..7953681421 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -860,8 +860,8 @@ ARMDisassembler::AddThumbPredicate(MCInst &MI) const {
VCCPos + 2, MCOI::TIED_TO);
assert(TiedOp >= 0 &&
"Inactive register in vpred_r is not tied to an output!");
- // Copy the operand to ensure it's not invalidated when MI grows.
- MI.insert(VCCI, MCOperand(MI.getOperand(TiedOp)));
+ // Copy the operand to ensure it's not invalidated when MI grows.
+ MI.insert(VCCI, MCOperand(MI.getOperand(TiedOp)));
}
} else if (VCC != ARMVCC::None) {
Check(S, SoftFail);
@@ -4530,14 +4530,14 @@ static DecodeStatus DecodeCoprocessor(MCInst &Inst, unsigned Val,
static DecodeStatus
DecodeThumbTableBranch(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
- const FeatureBitset &FeatureBits =
- ((const MCDisassembler*)Decoder)->getSubtargetInfo().getFeatureBits();
+ const FeatureBitset &FeatureBits =
+ ((const MCDisassembler*)Decoder)->getSubtargetInfo().getFeatureBits();
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Insn, 16, 4);
unsigned Rm = fieldFromInstruction(Insn, 0, 4);
- if (Rn == 13 && !FeatureBits[ARM::HasV8Ops]) S = MCDisassembler::SoftFail;
+ if (Rn == 13 && !FeatureBits[ARM::HasV8Ops]) S = MCDisassembler::SoftFail;
if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
return MCDisassembler::Fail;
if (!Check(S, DecoderGPRRegisterClass(Inst, Rm, Address, Decoder)))
diff --git a/contrib/libs/llvm12/lib/Target/ARM/Disassembler/ya.make b/contrib/libs/llvm12/lib/Target/ARM/Disassembler/ya.make
index f8ce0c24d9..660cfd1063 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/Disassembler/ya.make
+++ b/contrib/libs/llvm12/lib/Target/ARM/Disassembler/ya.make
@@ -12,19 +12,19 @@ LICENSE(Apache-2.0 WITH LLVM-exception)
LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
PEERDIR(
- contrib/libs/llvm12
- contrib/libs/llvm12/include
- contrib/libs/llvm12/lib/MC/MCDisassembler
- contrib/libs/llvm12/lib/Support
- contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc
- contrib/libs/llvm12/lib/Target/ARM/TargetInfo
- contrib/libs/llvm12/lib/Target/ARM/Utils
+ contrib/libs/llvm12
+ contrib/libs/llvm12/include
+ contrib/libs/llvm12/lib/MC/MCDisassembler
+ contrib/libs/llvm12/lib/Support
+ contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc
+ contrib/libs/llvm12/lib/Target/ARM/TargetInfo
+ contrib/libs/llvm12/lib/Target/ARM/Utils
)
ADDINCL(
- ${ARCADIA_BUILD_ROOT}/contrib/libs/llvm12/lib/Target/ARM
- contrib/libs/llvm12/lib/Target/ARM
- contrib/libs/llvm12/lib/Target/ARM/Disassembler
+ ${ARCADIA_BUILD_ROOT}/contrib/libs/llvm12/lib/Target/ARM
+ contrib/libs/llvm12/lib/Target/ARM
+ contrib/libs/llvm12/lib/Target/ARM/Disassembler
)
NO_COMPILER_WARNINGS()
diff --git a/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h b/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
index 8459b4ff2a..07376848c4 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
+++ b/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
@@ -205,20 +205,20 @@ namespace ARM_AM {
return V;
}
- /// isSOImmTwoPartValNeg - Return true if the specified value can be obtained
- /// by two SOImmVal, that -V = First + Second.
- /// "R+V" can be optimized to (sub (sub R, First), Second).
- /// "R=V" can be optimized to (sub (mvn R, ~(-First)), Second).
- inline bool isSOImmTwoPartValNeg(unsigned V) {
- unsigned First;
- if (!isSOImmTwoPartVal(-V))
- return false;
- // Return false if ~(-First) is not a SoImmval.
- First = getSOImmTwoPartFirst(-V);
- First = ~(-First);
- return !(rotr32(~255U, getSOImmValRotate(First)) & First);
- }
-
+ /// isSOImmTwoPartValNeg - Return true if the specified value can be obtained
+ /// by two SOImmVal, that -V = First + Second.
+ /// "R+V" can be optimized to (sub (sub R, First), Second).
+ /// "R=V" can be optimized to (sub (mvn R, ~(-First)), Second).
+ inline bool isSOImmTwoPartValNeg(unsigned V) {
+ unsigned First;
+ if (!isSOImmTwoPartVal(-V))
+ return false;
+ // Return false if ~(-First) is not a SoImmval.
+ First = getSOImmTwoPartFirst(-V);
+ First = ~(-First);
+ return !(rotr32(~255U, getSOImmValRotate(First)) & First);
+ }
+
/// getThumbImmValShift - Try to handle Imm with a 8-bit immediate followed
/// by a left shift. Returns the shift amount to use.
inline unsigned getThumbImmValShift(unsigned Imm) {
@@ -687,18 +687,18 @@ namespace ARM_AM {
return getFP16Imm(FPImm.bitcastToAPInt());
}
- /// If this is a FP16Imm encoded as a fp32 value, return the 8-bit encoding
- /// for it. Otherwise return -1 like getFP16Imm.
- inline int getFP32FP16Imm(const APInt &Imm) {
- if (Imm.getActiveBits() > 16)
- return -1;
- return ARM_AM::getFP16Imm(Imm.trunc(16));
- }
-
- inline int getFP32FP16Imm(const APFloat &FPImm) {
- return getFP32FP16Imm(FPImm.bitcastToAPInt());
- }
-
+ /// If this is a FP16Imm encoded as a fp32 value, return the 8-bit encoding
+ /// for it. Otherwise return -1 like getFP16Imm.
+ inline int getFP32FP16Imm(const APInt &Imm) {
+ if (Imm.getActiveBits() > 16)
+ return -1;
+ return ARM_AM::getFP16Imm(Imm.trunc(16));
+ }
+
+ inline int getFP32FP16Imm(const APFloat &FPImm) {
+ return getFP32FP16Imm(FPImm.bitcastToAPInt());
+ }
+
/// getFP32Imm - Return an 8-bit floating-point version of the 32-bit
/// floating-point value. If the value cannot be represented as an 8-bit
/// floating-point value, then return -1.
diff --git a/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index b02aef3c33..697eeab4e5 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -1010,7 +1010,7 @@ static unsigned getFixupKindContainerSizeBytes(unsigned Kind) {
case ARM::fixup_t2_condbranch:
case ARM::fixup_t2_uncondbranch:
case ARM::fixup_t2_pcrel_10:
- case ARM::fixup_t2_pcrel_9:
+ case ARM::fixup_t2_pcrel_9:
case ARM::fixup_t2_adr_pcrel_12:
case ARM::fixup_arm_thumb_bl:
case ARM::fixup_arm_thumb_blx:
diff --git a/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
index ecd96114e8..5599eaaf2f 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
+++ b/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
@@ -254,7 +254,7 @@ namespace ARMII {
MO_OPTION_MASK = 0x3,
/// MO_COFFSTUB - On a symbol operand "FOO", this indicates that the
- /// reference is actually to the ".refptr.FOO" symbol. This is used for
+ /// reference is actually to the ".refptr.FOO" symbol. This is used for
/// stub symbols on windows.
MO_COFFSTUB = 0x4,
diff --git a/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h b/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h
index d975d799e0..ac75bf3fca 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h
+++ b/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h
@@ -30,7 +30,7 @@ public:
void printRegName(raw_ostream &OS, unsigned RegNo) const override;
// Autogenerated by tblgen.
- std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override;
+ std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override;
void printInstruction(const MCInst *MI, uint64_t Address,
const MCSubtargetInfo &STI, raw_ostream &O);
virtual bool printAliasInstr(const MCInst *MI, uint64_t Address,
diff --git a/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
index 40e8e244e3..a26944a38f 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
@@ -87,7 +87,7 @@ void ARMCOFFMCAsmInfoMicrosoft::anchor() { }
ARMCOFFMCAsmInfoMicrosoft::ARMCOFFMCAsmInfoMicrosoft() {
AlignmentIsInBytes = false;
- SupportsDebugInformation = true;
+ SupportsDebugInformation = true;
ExceptionsType = ExceptionHandling::WinEH;
PrivateGlobalPrefix = "$M";
PrivateLabelPrefix = "$M";
diff --git a/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index 774f2507b8..3da71ade87 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -11,13 +11,13 @@
//===----------------------------------------------------------------------===//
#include "ARMMCTargetDesc.h"
-#include "ARMAddressingModes.h"
+#include "ARMAddressingModes.h"
#include "ARMBaseInfo.h"
#include "ARMInstPrinter.h"
#include "ARMMCAsmInfo.h"
#include "TargetInfo/ARMTargetInfo.h"
#include "llvm/ADT/Triple.h"
-#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCELFStreamer.h"
@@ -182,23 +182,23 @@ std::string ARM_MC::ParseARMTriple(const Triple &TT, StringRef CPU) {
return ARMArchFeature;
}
-bool ARM_MC::isPredicated(const MCInst &MI, const MCInstrInfo *MCII) {
- const MCInstrDesc &Desc = MCII->get(MI.getOpcode());
- int PredOpIdx = Desc.findFirstPredOperandIdx();
- return PredOpIdx != -1 && MI.getOperand(PredOpIdx).getImm() != ARMCC::AL;
-}
-
-bool ARM_MC::isCPSRDefined(const MCInst &MI, const MCInstrInfo *MCII) {
- const MCInstrDesc &Desc = MCII->get(MI.getOpcode());
- for (unsigned I = 0; I < MI.getNumOperands(); ++I) {
- const MCOperand &MO = MI.getOperand(I);
- if (MO.isReg() && MO.getReg() == ARM::CPSR &&
- Desc.OpInfo[I].isOptionalDef())
- return true;
- }
- return false;
-}
-
+bool ARM_MC::isPredicated(const MCInst &MI, const MCInstrInfo *MCII) {
+ const MCInstrDesc &Desc = MCII->get(MI.getOpcode());
+ int PredOpIdx = Desc.findFirstPredOperandIdx();
+ return PredOpIdx != -1 && MI.getOperand(PredOpIdx).getImm() != ARMCC::AL;
+}
+
+bool ARM_MC::isCPSRDefined(const MCInst &MI, const MCInstrInfo *MCII) {
+ const MCInstrDesc &Desc = MCII->get(MI.getOpcode());
+ for (unsigned I = 0; I < MI.getNumOperands(); ++I) {
+ const MCOperand &MO = MI.getOperand(I);
+ if (MO.isReg() && MO.getReg() == ARM::CPSR &&
+ Desc.OpInfo[I].isOptionalDef())
+ return true;
+ }
+ return false;
+}
+
MCSubtargetInfo *ARM_MC::createARMMCSubtargetInfo(const Triple &TT,
StringRef CPU, StringRef FS) {
std::string ArchFS = ARM_MC::ParseARMTriple(TT, CPU);
@@ -209,7 +209,7 @@ MCSubtargetInfo *ARM_MC::createARMMCSubtargetInfo(const Triple &TT,
ArchFS = std::string(FS);
}
- return createARMMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, ArchFS);
+ return createARMMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, ArchFS);
}
static MCInstrInfo *createARMMCInstrInfo() {
@@ -218,120 +218,120 @@ static MCInstrInfo *createARMMCInstrInfo() {
return X;
}
-void ARM_MC::initLLVMToCVRegMapping(MCRegisterInfo *MRI) {
- // Mapping from CodeView to MC register id.
- static const struct {
- codeview::RegisterId CVReg;
- MCPhysReg Reg;
- } RegMap[] = {
- {codeview::RegisterId::ARM_R0, ARM::R0},
- {codeview::RegisterId::ARM_R1, ARM::R1},
- {codeview::RegisterId::ARM_R2, ARM::R2},
- {codeview::RegisterId::ARM_R3, ARM::R3},
- {codeview::RegisterId::ARM_R4, ARM::R4},
- {codeview::RegisterId::ARM_R5, ARM::R5},
- {codeview::RegisterId::ARM_R6, ARM::R6},
- {codeview::RegisterId::ARM_R7, ARM::R7},
- {codeview::RegisterId::ARM_R8, ARM::R8},
- {codeview::RegisterId::ARM_R9, ARM::R9},
- {codeview::RegisterId::ARM_R10, ARM::R10},
- {codeview::RegisterId::ARM_R11, ARM::R11},
- {codeview::RegisterId::ARM_R12, ARM::R12},
- {codeview::RegisterId::ARM_SP, ARM::SP},
- {codeview::RegisterId::ARM_LR, ARM::LR},
- {codeview::RegisterId::ARM_PC, ARM::PC},
- {codeview::RegisterId::ARM_CPSR, ARM::CPSR},
- {codeview::RegisterId::ARM_FPSCR, ARM::FPSCR},
- {codeview::RegisterId::ARM_FPEXC, ARM::FPEXC},
- {codeview::RegisterId::ARM_FS0, ARM::S0},
- {codeview::RegisterId::ARM_FS1, ARM::S1},
- {codeview::RegisterId::ARM_FS2, ARM::S2},
- {codeview::RegisterId::ARM_FS3, ARM::S3},
- {codeview::RegisterId::ARM_FS4, ARM::S4},
- {codeview::RegisterId::ARM_FS5, ARM::S5},
- {codeview::RegisterId::ARM_FS6, ARM::S6},
- {codeview::RegisterId::ARM_FS7, ARM::S7},
- {codeview::RegisterId::ARM_FS8, ARM::S8},
- {codeview::RegisterId::ARM_FS9, ARM::S9},
- {codeview::RegisterId::ARM_FS10, ARM::S10},
- {codeview::RegisterId::ARM_FS11, ARM::S11},
- {codeview::RegisterId::ARM_FS12, ARM::S12},
- {codeview::RegisterId::ARM_FS13, ARM::S13},
- {codeview::RegisterId::ARM_FS14, ARM::S14},
- {codeview::RegisterId::ARM_FS15, ARM::S15},
- {codeview::RegisterId::ARM_FS16, ARM::S16},
- {codeview::RegisterId::ARM_FS17, ARM::S17},
- {codeview::RegisterId::ARM_FS18, ARM::S18},
- {codeview::RegisterId::ARM_FS19, ARM::S19},
- {codeview::RegisterId::ARM_FS20, ARM::S20},
- {codeview::RegisterId::ARM_FS21, ARM::S21},
- {codeview::RegisterId::ARM_FS22, ARM::S22},
- {codeview::RegisterId::ARM_FS23, ARM::S23},
- {codeview::RegisterId::ARM_FS24, ARM::S24},
- {codeview::RegisterId::ARM_FS25, ARM::S25},
- {codeview::RegisterId::ARM_FS26, ARM::S26},
- {codeview::RegisterId::ARM_FS27, ARM::S27},
- {codeview::RegisterId::ARM_FS28, ARM::S28},
- {codeview::RegisterId::ARM_FS29, ARM::S29},
- {codeview::RegisterId::ARM_FS30, ARM::S30},
- {codeview::RegisterId::ARM_FS31, ARM::S31},
- {codeview::RegisterId::ARM_ND0, ARM::D0},
- {codeview::RegisterId::ARM_ND1, ARM::D1},
- {codeview::RegisterId::ARM_ND2, ARM::D2},
- {codeview::RegisterId::ARM_ND3, ARM::D3},
- {codeview::RegisterId::ARM_ND4, ARM::D4},
- {codeview::RegisterId::ARM_ND5, ARM::D5},
- {codeview::RegisterId::ARM_ND6, ARM::D6},
- {codeview::RegisterId::ARM_ND7, ARM::D7},
- {codeview::RegisterId::ARM_ND8, ARM::D8},
- {codeview::RegisterId::ARM_ND9, ARM::D9},
- {codeview::RegisterId::ARM_ND10, ARM::D10},
- {codeview::RegisterId::ARM_ND11, ARM::D11},
- {codeview::RegisterId::ARM_ND12, ARM::D12},
- {codeview::RegisterId::ARM_ND13, ARM::D13},
- {codeview::RegisterId::ARM_ND14, ARM::D14},
- {codeview::RegisterId::ARM_ND15, ARM::D15},
- {codeview::RegisterId::ARM_ND16, ARM::D16},
- {codeview::RegisterId::ARM_ND17, ARM::D17},
- {codeview::RegisterId::ARM_ND18, ARM::D18},
- {codeview::RegisterId::ARM_ND19, ARM::D19},
- {codeview::RegisterId::ARM_ND20, ARM::D20},
- {codeview::RegisterId::ARM_ND21, ARM::D21},
- {codeview::RegisterId::ARM_ND22, ARM::D22},
- {codeview::RegisterId::ARM_ND23, ARM::D23},
- {codeview::RegisterId::ARM_ND24, ARM::D24},
- {codeview::RegisterId::ARM_ND25, ARM::D25},
- {codeview::RegisterId::ARM_ND26, ARM::D26},
- {codeview::RegisterId::ARM_ND27, ARM::D27},
- {codeview::RegisterId::ARM_ND28, ARM::D28},
- {codeview::RegisterId::ARM_ND29, ARM::D29},
- {codeview::RegisterId::ARM_ND30, ARM::D30},
- {codeview::RegisterId::ARM_ND31, ARM::D31},
- {codeview::RegisterId::ARM_NQ0, ARM::Q0},
- {codeview::RegisterId::ARM_NQ1, ARM::Q1},
- {codeview::RegisterId::ARM_NQ2, ARM::Q2},
- {codeview::RegisterId::ARM_NQ3, ARM::Q3},
- {codeview::RegisterId::ARM_NQ4, ARM::Q4},
- {codeview::RegisterId::ARM_NQ5, ARM::Q5},
- {codeview::RegisterId::ARM_NQ6, ARM::Q6},
- {codeview::RegisterId::ARM_NQ7, ARM::Q7},
- {codeview::RegisterId::ARM_NQ8, ARM::Q8},
- {codeview::RegisterId::ARM_NQ9, ARM::Q9},
- {codeview::RegisterId::ARM_NQ10, ARM::Q10},
- {codeview::RegisterId::ARM_NQ11, ARM::Q11},
- {codeview::RegisterId::ARM_NQ12, ARM::Q12},
- {codeview::RegisterId::ARM_NQ13, ARM::Q13},
- {codeview::RegisterId::ARM_NQ14, ARM::Q14},
- {codeview::RegisterId::ARM_NQ15, ARM::Q15},
- };
- for (unsigned I = 0; I < array_lengthof(RegMap); ++I)
- MRI->mapLLVMRegToCVReg(RegMap[I].Reg, static_cast<int>(RegMap[I].CVReg));
-}
-
+void ARM_MC::initLLVMToCVRegMapping(MCRegisterInfo *MRI) {
+ // Mapping from CodeView to MC register id.
+ static const struct {
+ codeview::RegisterId CVReg;
+ MCPhysReg Reg;
+ } RegMap[] = {
+ {codeview::RegisterId::ARM_R0, ARM::R0},
+ {codeview::RegisterId::ARM_R1, ARM::R1},
+ {codeview::RegisterId::ARM_R2, ARM::R2},
+ {codeview::RegisterId::ARM_R3, ARM::R3},
+ {codeview::RegisterId::ARM_R4, ARM::R4},
+ {codeview::RegisterId::ARM_R5, ARM::R5},
+ {codeview::RegisterId::ARM_R6, ARM::R6},
+ {codeview::RegisterId::ARM_R7, ARM::R7},
+ {codeview::RegisterId::ARM_R8, ARM::R8},
+ {codeview::RegisterId::ARM_R9, ARM::R9},
+ {codeview::RegisterId::ARM_R10, ARM::R10},
+ {codeview::RegisterId::ARM_R11, ARM::R11},
+ {codeview::RegisterId::ARM_R12, ARM::R12},
+ {codeview::RegisterId::ARM_SP, ARM::SP},
+ {codeview::RegisterId::ARM_LR, ARM::LR},
+ {codeview::RegisterId::ARM_PC, ARM::PC},
+ {codeview::RegisterId::ARM_CPSR, ARM::CPSR},
+ {codeview::RegisterId::ARM_FPSCR, ARM::FPSCR},
+ {codeview::RegisterId::ARM_FPEXC, ARM::FPEXC},
+ {codeview::RegisterId::ARM_FS0, ARM::S0},
+ {codeview::RegisterId::ARM_FS1, ARM::S1},
+ {codeview::RegisterId::ARM_FS2, ARM::S2},
+ {codeview::RegisterId::ARM_FS3, ARM::S3},
+ {codeview::RegisterId::ARM_FS4, ARM::S4},
+ {codeview::RegisterId::ARM_FS5, ARM::S5},
+ {codeview::RegisterId::ARM_FS6, ARM::S6},
+ {codeview::RegisterId::ARM_FS7, ARM::S7},
+ {codeview::RegisterId::ARM_FS8, ARM::S8},
+ {codeview::RegisterId::ARM_FS9, ARM::S9},
+ {codeview::RegisterId::ARM_FS10, ARM::S10},
+ {codeview::RegisterId::ARM_FS11, ARM::S11},
+ {codeview::RegisterId::ARM_FS12, ARM::S12},
+ {codeview::RegisterId::ARM_FS13, ARM::S13},
+ {codeview::RegisterId::ARM_FS14, ARM::S14},
+ {codeview::RegisterId::ARM_FS15, ARM::S15},
+ {codeview::RegisterId::ARM_FS16, ARM::S16},
+ {codeview::RegisterId::ARM_FS17, ARM::S17},
+ {codeview::RegisterId::ARM_FS18, ARM::S18},
+ {codeview::RegisterId::ARM_FS19, ARM::S19},
+ {codeview::RegisterId::ARM_FS20, ARM::S20},
+ {codeview::RegisterId::ARM_FS21, ARM::S21},
+ {codeview::RegisterId::ARM_FS22, ARM::S22},
+ {codeview::RegisterId::ARM_FS23, ARM::S23},
+ {codeview::RegisterId::ARM_FS24, ARM::S24},
+ {codeview::RegisterId::ARM_FS25, ARM::S25},
+ {codeview::RegisterId::ARM_FS26, ARM::S26},
+ {codeview::RegisterId::ARM_FS27, ARM::S27},
+ {codeview::RegisterId::ARM_FS28, ARM::S28},
+ {codeview::RegisterId::ARM_FS29, ARM::S29},
+ {codeview::RegisterId::ARM_FS30, ARM::S30},
+ {codeview::RegisterId::ARM_FS31, ARM::S31},
+ {codeview::RegisterId::ARM_ND0, ARM::D0},
+ {codeview::RegisterId::ARM_ND1, ARM::D1},
+ {codeview::RegisterId::ARM_ND2, ARM::D2},
+ {codeview::RegisterId::ARM_ND3, ARM::D3},
+ {codeview::RegisterId::ARM_ND4, ARM::D4},
+ {codeview::RegisterId::ARM_ND5, ARM::D5},
+ {codeview::RegisterId::ARM_ND6, ARM::D6},
+ {codeview::RegisterId::ARM_ND7, ARM::D7},
+ {codeview::RegisterId::ARM_ND8, ARM::D8},
+ {codeview::RegisterId::ARM_ND9, ARM::D9},
+ {codeview::RegisterId::ARM_ND10, ARM::D10},
+ {codeview::RegisterId::ARM_ND11, ARM::D11},
+ {codeview::RegisterId::ARM_ND12, ARM::D12},
+ {codeview::RegisterId::ARM_ND13, ARM::D13},
+ {codeview::RegisterId::ARM_ND14, ARM::D14},
+ {codeview::RegisterId::ARM_ND15, ARM::D15},
+ {codeview::RegisterId::ARM_ND16, ARM::D16},
+ {codeview::RegisterId::ARM_ND17, ARM::D17},
+ {codeview::RegisterId::ARM_ND18, ARM::D18},
+ {codeview::RegisterId::ARM_ND19, ARM::D19},
+ {codeview::RegisterId::ARM_ND20, ARM::D20},
+ {codeview::RegisterId::ARM_ND21, ARM::D21},
+ {codeview::RegisterId::ARM_ND22, ARM::D22},
+ {codeview::RegisterId::ARM_ND23, ARM::D23},
+ {codeview::RegisterId::ARM_ND24, ARM::D24},
+ {codeview::RegisterId::ARM_ND25, ARM::D25},
+ {codeview::RegisterId::ARM_ND26, ARM::D26},
+ {codeview::RegisterId::ARM_ND27, ARM::D27},
+ {codeview::RegisterId::ARM_ND28, ARM::D28},
+ {codeview::RegisterId::ARM_ND29, ARM::D29},
+ {codeview::RegisterId::ARM_ND30, ARM::D30},
+ {codeview::RegisterId::ARM_ND31, ARM::D31},
+ {codeview::RegisterId::ARM_NQ0, ARM::Q0},
+ {codeview::RegisterId::ARM_NQ1, ARM::Q1},
+ {codeview::RegisterId::ARM_NQ2, ARM::Q2},
+ {codeview::RegisterId::ARM_NQ3, ARM::Q3},
+ {codeview::RegisterId::ARM_NQ4, ARM::Q4},
+ {codeview::RegisterId::ARM_NQ5, ARM::Q5},
+ {codeview::RegisterId::ARM_NQ6, ARM::Q6},
+ {codeview::RegisterId::ARM_NQ7, ARM::Q7},
+ {codeview::RegisterId::ARM_NQ8, ARM::Q8},
+ {codeview::RegisterId::ARM_NQ9, ARM::Q9},
+ {codeview::RegisterId::ARM_NQ10, ARM::Q10},
+ {codeview::RegisterId::ARM_NQ11, ARM::Q11},
+ {codeview::RegisterId::ARM_NQ12, ARM::Q12},
+ {codeview::RegisterId::ARM_NQ13, ARM::Q13},
+ {codeview::RegisterId::ARM_NQ14, ARM::Q14},
+ {codeview::RegisterId::ARM_NQ15, ARM::Q15},
+ };
+ for (unsigned I = 0; I < array_lengthof(RegMap); ++I)
+ MRI->mapLLVMRegToCVReg(RegMap[I].Reg, static_cast<int>(RegMap[I].CVReg));
+}
+
static MCRegisterInfo *createARMMCRegisterInfo(const Triple &Triple) {
MCRegisterInfo *X = new MCRegisterInfo();
InitARMMCRegisterInfo(X, ARM::LR, 0, 0, ARM::PC);
- ARM_MC::initLLVMToCVRegMapping(X);
+ ARM_MC::initLLVMToCVRegMapping(X);
return X;
}
diff --git a/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
index 5a0874f0ef..a84576e757 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
+++ b/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
@@ -41,22 +41,22 @@ class raw_pwrite_stream;
namespace ARM_MC {
std::string ParseARMTriple(const Triple &TT, StringRef CPU);
-void initLLVMToCVRegMapping(MCRegisterInfo *MRI);
-
-bool isPredicated(const MCInst &MI, const MCInstrInfo *MCII);
-bool isCPSRDefined(const MCInst &MI, const MCInstrInfo *MCII);
-
-template<class Inst>
-bool isLDMBaseRegInList(const Inst &MI) {
- auto BaseReg = MI.getOperand(0).getReg();
- for (unsigned I = 1, E = MI.getNumOperands(); I < E; ++I) {
- const auto &Op = MI.getOperand(I);
- if (Op.isReg() && Op.getReg() == BaseReg)
- return true;
- }
- return false;
-}
-
+void initLLVMToCVRegMapping(MCRegisterInfo *MRI);
+
+bool isPredicated(const MCInst &MI, const MCInstrInfo *MCII);
+bool isCPSRDefined(const MCInst &MI, const MCInstrInfo *MCII);
+
+template<class Inst>
+bool isLDMBaseRegInList(const Inst &MI) {
+ auto BaseReg = MI.getOperand(0).getReg();
+ for (unsigned I = 1, E = MI.getNumOperands(); I < E; ++I) {
+ const auto &Op = MI.getOperand(I);
+ if (Op.isReg() && Op.getReg() == BaseReg)
+ return true;
+ }
+ return false;
+}
+
/// Create a ARM MCSubtargetInfo instance. This is exposed so Asm parser, etc.
/// do not need to go through TargetRegistry.
MCSubtargetInfo *createARMMCSubtargetInfo(const Triple &TT, StringRef CPU,
diff --git a/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ya.make b/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ya.make
index b92b47d057..0256e1fdac 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ya.make
+++ b/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ya.make
@@ -12,20 +12,20 @@ LICENSE(Apache-2.0 WITH LLVM-exception)
LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
PEERDIR(
- contrib/libs/llvm12
- contrib/libs/llvm12/include
- contrib/libs/llvm12/lib/BinaryFormat
- contrib/libs/llvm12/lib/MC
- contrib/libs/llvm12/lib/MC/MCDisassembler
- contrib/libs/llvm12/lib/Support
- contrib/libs/llvm12/lib/Target/ARM/TargetInfo
- contrib/libs/llvm12/lib/Target/ARM/Utils
+ contrib/libs/llvm12
+ contrib/libs/llvm12/include
+ contrib/libs/llvm12/lib/BinaryFormat
+ contrib/libs/llvm12/lib/MC
+ contrib/libs/llvm12/lib/MC/MCDisassembler
+ contrib/libs/llvm12/lib/Support
+ contrib/libs/llvm12/lib/Target/ARM/TargetInfo
+ contrib/libs/llvm12/lib/Target/ARM/Utils
)
ADDINCL(
- ${ARCADIA_BUILD_ROOT}/contrib/libs/llvm12/lib/Target/ARM
- contrib/libs/llvm12/lib/Target/ARM
- contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc
+ ${ARCADIA_BUILD_ROOT}/contrib/libs/llvm12/lib/Target/ARM
+ contrib/libs/llvm12/lib/Target/ARM
+ contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc
)
NO_COMPILER_WARNINGS()
diff --git a/contrib/libs/llvm12/lib/Target/ARM/MVEGatherScatterLowering.cpp b/contrib/libs/llvm12/lib/Target/ARM/MVEGatherScatterLowering.cpp
index 56823735e2..0b6cdee512 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/MVEGatherScatterLowering.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/MVEGatherScatterLowering.cpp
@@ -44,10 +44,10 @@
using namespace llvm;
-#define DEBUG_TYPE "arm-mve-gather-scatter-lowering"
+#define DEBUG_TYPE "arm-mve-gather-scatter-lowering"
cl::opt<bool> EnableMaskedGatherScatters(
- "enable-arm-maskedgatscat", cl::Hidden, cl::init(true),
+ "enable-arm-maskedgatscat", cl::Hidden, cl::init(true),
cl::desc("Enable the generation of masked gathers and scatters"));
namespace {
@@ -84,7 +84,7 @@ private:
// Check for a getelementptr and deduce base and offsets from it, on success
// returning the base directly and the offsets indirectly using the Offsets
// argument
- Value *checkGEP(Value *&Offsets, FixedVectorType *Ty, GetElementPtrInst *GEP,
+ Value *checkGEP(Value *&Offsets, FixedVectorType *Ty, GetElementPtrInst *GEP,
IRBuilder<> &Builder);
// Compute the scale of this gather/scatter instruction
int computeScale(unsigned GEPElemSize, unsigned MemoryElemSize);
@@ -132,11 +132,11 @@ private:
Value *tryCreateIncrementingWBGatScat(IntrinsicInst *I, Value *BasePtr,
Value *Ptr, unsigned TypeScale,
IRBuilder<> &Builder);
-
- // Optimise the base and offsets of the given address
- bool optimiseAddress(Value *Address, BasicBlock *BB, LoopInfo *LI);
- // Try to fold consecutive geps together into one
- Value *foldGEP(GetElementPtrInst *GEP, Value *&Offsets, IRBuilder<> &Builder);
+
+ // Optimise the base and offsets of the given address
+ bool optimiseAddress(Value *Address, BasicBlock *BB, LoopInfo *LI);
+ // Try to fold consecutive geps together into one
+ Value *foldGEP(GetElementPtrInst *GEP, Value *&Offsets, IRBuilder<> &Builder);
// Check whether these offsets could be moved out of the loop they're in
bool optimiseOffsets(Value *Offsets, BasicBlock *BB, LoopInfo *LI);
// Pushes the given add out of the loop
@@ -172,49 +172,49 @@ bool MVEGatherScatterLowering::isLegalTypeAndAlignment(unsigned NumElements,
return false;
}
-static bool checkOffsetSize(Value *Offsets, unsigned TargetElemCount) {
- // Offsets that are not of type <N x i32> are sign extended by the
- // getelementptr instruction, and MVE gathers/scatters treat the offset as
- // unsigned. Thus, if the element size is smaller than 32, we can only allow
- // positive offsets - i.e., the offsets are not allowed to be variables we
- // can't look into.
- // Additionally, <N x i32> offsets have to either originate from a zext of a
- // vector with element types smaller or equal the type of the gather we're
- // looking at, or consist of constants that we can check are small enough
- // to fit into the gather type.
- // Thus we check that 0 < value < 2^TargetElemSize.
- unsigned TargetElemSize = 128 / TargetElemCount;
- unsigned OffsetElemSize = cast<FixedVectorType>(Offsets->getType())
- ->getElementType()
- ->getScalarSizeInBits();
- if (OffsetElemSize != TargetElemSize || OffsetElemSize != 32) {
- Constant *ConstOff = dyn_cast<Constant>(Offsets);
- if (!ConstOff)
- return false;
- int64_t TargetElemMaxSize = (1ULL << TargetElemSize);
- auto CheckValueSize = [TargetElemMaxSize](Value *OffsetElem) {
- ConstantInt *OConst = dyn_cast<ConstantInt>(OffsetElem);
- if (!OConst)
- return false;
- int SExtValue = OConst->getSExtValue();
- if (SExtValue >= TargetElemMaxSize || SExtValue < 0)
- return false;
- return true;
- };
- if (isa<FixedVectorType>(ConstOff->getType())) {
- for (unsigned i = 0; i < TargetElemCount; i++) {
- if (!CheckValueSize(ConstOff->getAggregateElement(i)))
- return false;
- }
- } else {
- if (!CheckValueSize(ConstOff))
- return false;
- }
- }
- return true;
-}
-
-Value *MVEGatherScatterLowering::checkGEP(Value *&Offsets, FixedVectorType *Ty,
+static bool checkOffsetSize(Value *Offsets, unsigned TargetElemCount) {
+ // Offsets that are not of type <N x i32> are sign extended by the
+ // getelementptr instruction, and MVE gathers/scatters treat the offset as
+ // unsigned. Thus, if the element size is smaller than 32, we can only allow
+ // positive offsets - i.e., the offsets are not allowed to be variables we
+ // can't look into.
+ // Additionally, <N x i32> offsets have to either originate from a zext of a
+ // vector with element types smaller or equal the type of the gather we're
+ // looking at, or consist of constants that we can check are small enough
+ // to fit into the gather type.
+ // Thus we check that 0 < value < 2^TargetElemSize.
+ unsigned TargetElemSize = 128 / TargetElemCount;
+ unsigned OffsetElemSize = cast<FixedVectorType>(Offsets->getType())
+ ->getElementType()
+ ->getScalarSizeInBits();
+ if (OffsetElemSize != TargetElemSize || OffsetElemSize != 32) {
+ Constant *ConstOff = dyn_cast<Constant>(Offsets);
+ if (!ConstOff)
+ return false;
+ int64_t TargetElemMaxSize = (1ULL << TargetElemSize);
+ auto CheckValueSize = [TargetElemMaxSize](Value *OffsetElem) {
+ ConstantInt *OConst = dyn_cast<ConstantInt>(OffsetElem);
+ if (!OConst)
+ return false;
+ int SExtValue = OConst->getSExtValue();
+ if (SExtValue >= TargetElemMaxSize || SExtValue < 0)
+ return false;
+ return true;
+ };
+ if (isa<FixedVectorType>(ConstOff->getType())) {
+ for (unsigned i = 0; i < TargetElemCount; i++) {
+ if (!CheckValueSize(ConstOff->getAggregateElement(i)))
+ return false;
+ }
+ } else {
+ if (!CheckValueSize(ConstOff))
+ return false;
+ }
+ }
+ return true;
+}
+
+Value *MVEGatherScatterLowering::checkGEP(Value *&Offsets, FixedVectorType *Ty,
GetElementPtrInst *GEP,
IRBuilder<> &Builder) {
if (!GEP) {
@@ -225,43 +225,43 @@ Value *MVEGatherScatterLowering::checkGEP(Value *&Offsets, FixedVectorType *Ty,
LLVM_DEBUG(dbgs() << "masked gathers/scatters: getelementpointer found."
<< " Looking at intrinsic for base + vector of offsets\n");
Value *GEPPtr = GEP->getPointerOperand();
- Offsets = GEP->getOperand(1);
- if (GEPPtr->getType()->isVectorTy() ||
- !isa<FixedVectorType>(Offsets->getType()))
+ Offsets = GEP->getOperand(1);
+ if (GEPPtr->getType()->isVectorTy() ||
+ !isa<FixedVectorType>(Offsets->getType()))
return nullptr;
-
+
if (GEP->getNumOperands() != 2) {
LLVM_DEBUG(dbgs() << "masked gathers/scatters: getelementptr with too many"
<< " operands. Expanding.\n");
return nullptr;
}
Offsets = GEP->getOperand(1);
- unsigned OffsetsElemCount =
- cast<FixedVectorType>(Offsets->getType())->getNumElements();
+ unsigned OffsetsElemCount =
+ cast<FixedVectorType>(Offsets->getType())->getNumElements();
// Paranoid check whether the number of parallel lanes is the same
- assert(Ty->getNumElements() == OffsetsElemCount);
-
- ZExtInst *ZextOffs = dyn_cast<ZExtInst>(Offsets);
- if (ZextOffs)
+ assert(Ty->getNumElements() == OffsetsElemCount);
+
+ ZExtInst *ZextOffs = dyn_cast<ZExtInst>(Offsets);
+ if (ZextOffs)
Offsets = ZextOffs->getOperand(0);
- FixedVectorType *OffsetType = cast<FixedVectorType>(Offsets->getType());
-
- // If the offsets are already being zext-ed to <N x i32>, that relieves us of
- // having to make sure that they won't overflow.
- if (!ZextOffs || cast<FixedVectorType>(ZextOffs->getDestTy())
- ->getElementType()
- ->getScalarSizeInBits() != 32)
- if (!checkOffsetSize(Offsets, OffsetsElemCount))
- return nullptr;
-
- // The offset sizes have been checked; if any truncating or zext-ing is
- // required to fix them, do that now
+ FixedVectorType *OffsetType = cast<FixedVectorType>(Offsets->getType());
+
+ // If the offsets are already being zext-ed to <N x i32>, that relieves us of
+ // having to make sure that they won't overflow.
+ if (!ZextOffs || cast<FixedVectorType>(ZextOffs->getDestTy())
+ ->getElementType()
+ ->getScalarSizeInBits() != 32)
+ if (!checkOffsetSize(Offsets, OffsetsElemCount))
+ return nullptr;
+
+ // The offset sizes have been checked; if any truncating or zext-ing is
+ // required to fix them, do that now
if (Ty != Offsets->getType()) {
- if ((Ty->getElementType()->getScalarSizeInBits() <
- OffsetType->getElementType()->getScalarSizeInBits())) {
- Offsets = Builder.CreateTrunc(Offsets, Ty);
+ if ((Ty->getElementType()->getScalarSizeInBits() <
+ OffsetType->getElementType()->getScalarSizeInBits())) {
+ Offsets = Builder.CreateTrunc(Offsets, Ty);
} else {
- Offsets = Builder.CreateZExt(Offsets, VectorType::getInteger(Ty));
+ Offsets = Builder.CreateZExt(Offsets, VectorType::getInteger(Ty));
}
}
// If none of the checks failed, return the gep's base pointer
@@ -476,8 +476,8 @@ Value *MVEGatherScatterLowering::tryCreateMaskedGatherOffset(
GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
Value *Offsets;
- Value *BasePtr =
- checkGEP(Offsets, cast<FixedVectorType>(ResultTy), GEP, Builder);
+ Value *BasePtr =
+ checkGEP(Offsets, cast<FixedVectorType>(ResultTy), GEP, Builder);
if (!BasePtr)
return nullptr;
// Check whether the offset is a constant increment that could be merged into
@@ -617,8 +617,8 @@ Value *MVEGatherScatterLowering::tryCreateMaskedScatterOffset(
GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
Value *Offsets;
- Value *BasePtr =
- checkGEP(Offsets, cast<FixedVectorType>(InputTy), GEP, Builder);
+ Value *BasePtr =
+ checkGEP(Offsets, cast<FixedVectorType>(InputTy), GEP, Builder);
if (!BasePtr)
return nullptr;
// Check whether the offset is a constant increment that could be merged into
@@ -941,7 +941,7 @@ bool MVEGatherScatterLowering::optimiseOffsets(Value *Offsets, BasicBlock *BB,
int IncrementingBlock = -1;
for (int i = 0; i < 2; i++)
- if (auto *Op = dyn_cast<Instruction>(Phi->getIncomingValue(i)))
+ if (auto *Op = dyn_cast<Instruction>(Phi->getIncomingValue(i)))
if (Op->getOpcode() == Instruction::Add &&
(Op->getOperand(0) == Phi || Op->getOperand(1) == Phi))
IncrementingBlock = i;
@@ -960,8 +960,8 @@ bool MVEGatherScatterLowering::optimiseOffsets(Value *Offsets, BasicBlock *BB,
// Get the value that is added to/multiplied with the phi
Value *OffsSecondOperand = Offs->getOperand(OffsSecondOp);
- if (IncrementPerRound->getType() != OffsSecondOperand->getType() ||
- !L->isLoopInvariant(OffsSecondOperand))
+ if (IncrementPerRound->getType() != OffsSecondOperand->getType() ||
+ !L->isLoopInvariant(OffsSecondOperand))
// Something has gone wrong, abort
return false;
@@ -1029,128 +1029,128 @@ bool MVEGatherScatterLowering::optimiseOffsets(Value *Offsets, BasicBlock *BB,
return true;
}
-static Value *CheckAndCreateOffsetAdd(Value *X, Value *Y, Value *GEP,
- IRBuilder<> &Builder) {
- // Splat the non-vector value to a vector of the given type - if the value is
- // a constant (and its value isn't too big), we can even use this opportunity
- // to scale it to the size of the vector elements
- auto FixSummands = [&Builder](FixedVectorType *&VT, Value *&NonVectorVal) {
- ConstantInt *Const;
- if ((Const = dyn_cast<ConstantInt>(NonVectorVal)) &&
- VT->getElementType() != NonVectorVal->getType()) {
- unsigned TargetElemSize = VT->getElementType()->getPrimitiveSizeInBits();
- uint64_t N = Const->getZExtValue();
- if (N < (unsigned)(1 << (TargetElemSize - 1))) {
- NonVectorVal = Builder.CreateVectorSplat(
- VT->getNumElements(), Builder.getIntN(TargetElemSize, N));
- return;
- }
- }
- NonVectorVal =
- Builder.CreateVectorSplat(VT->getNumElements(), NonVectorVal);
- };
-
- FixedVectorType *XElType = dyn_cast<FixedVectorType>(X->getType());
- FixedVectorType *YElType = dyn_cast<FixedVectorType>(Y->getType());
- // If one of X, Y is not a vector, we have to splat it in order
- // to add the two of them.
- if (XElType && !YElType) {
- FixSummands(XElType, Y);
- YElType = cast<FixedVectorType>(Y->getType());
- } else if (YElType && !XElType) {
- FixSummands(YElType, X);
- XElType = cast<FixedVectorType>(X->getType());
- }
- assert(XElType && YElType && "Unknown vector types");
- // Check that the summands are of compatible types
- if (XElType != YElType) {
- LLVM_DEBUG(dbgs() << "masked gathers/scatters: incompatible gep offsets\n");
- return nullptr;
- }
-
- if (XElType->getElementType()->getScalarSizeInBits() != 32) {
- // Check that by adding the vectors we do not accidentally
- // create an overflow
- Constant *ConstX = dyn_cast<Constant>(X);
- Constant *ConstY = dyn_cast<Constant>(Y);
- if (!ConstX || !ConstY)
- return nullptr;
- unsigned TargetElemSize = 128 / XElType->getNumElements();
- for (unsigned i = 0; i < XElType->getNumElements(); i++) {
- ConstantInt *ConstXEl =
- dyn_cast<ConstantInt>(ConstX->getAggregateElement(i));
- ConstantInt *ConstYEl =
- dyn_cast<ConstantInt>(ConstY->getAggregateElement(i));
- if (!ConstXEl || !ConstYEl ||
- ConstXEl->getZExtValue() + ConstYEl->getZExtValue() >=
- (unsigned)(1 << (TargetElemSize - 1)))
- return nullptr;
- }
- }
-
- Value *Add = Builder.CreateAdd(X, Y);
-
- FixedVectorType *GEPType = cast<FixedVectorType>(GEP->getType());
- if (checkOffsetSize(Add, GEPType->getNumElements()))
- return Add;
- else
- return nullptr;
-}
-
-Value *MVEGatherScatterLowering::foldGEP(GetElementPtrInst *GEP,
- Value *&Offsets,
- IRBuilder<> &Builder) {
- Value *GEPPtr = GEP->getPointerOperand();
- Offsets = GEP->getOperand(1);
- // We only merge geps with constant offsets, because only for those
- // we can make sure that we do not cause an overflow
- if (!isa<Constant>(Offsets))
- return nullptr;
- GetElementPtrInst *BaseGEP;
- if ((BaseGEP = dyn_cast<GetElementPtrInst>(GEPPtr))) {
- // Merge the two geps into one
- Value *BaseBasePtr = foldGEP(BaseGEP, Offsets, Builder);
- if (!BaseBasePtr)
- return nullptr;
- Offsets =
- CheckAndCreateOffsetAdd(Offsets, GEP->getOperand(1), GEP, Builder);
- if (Offsets == nullptr)
- return nullptr;
- return BaseBasePtr;
- }
- return GEPPtr;
-}
-
-bool MVEGatherScatterLowering::optimiseAddress(Value *Address, BasicBlock *BB,
- LoopInfo *LI) {
- GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Address);
- if (!GEP)
- return false;
- bool Changed = false;
- if (GEP->hasOneUse() &&
- dyn_cast<GetElementPtrInst>(GEP->getPointerOperand())) {
- IRBuilder<> Builder(GEP->getContext());
- Builder.SetInsertPoint(GEP);
- Builder.SetCurrentDebugLocation(GEP->getDebugLoc());
- Value *Offsets;
- Value *Base = foldGEP(GEP, Offsets, Builder);
- // We only want to merge the geps if there is a real chance that they can be
- // used by an MVE gather; thus the offset has to have the correct size
- // (always i32 if it is not of vector type) and the base has to be a
- // pointer.
- if (Offsets && Base && Base != GEP) {
- PointerType *BaseType = cast<PointerType>(Base->getType());
- GetElementPtrInst *NewAddress = GetElementPtrInst::Create(
- BaseType->getPointerElementType(), Base, Offsets, "gep.merged", GEP);
- GEP->replaceAllUsesWith(NewAddress);
- GEP = NewAddress;
- Changed = true;
- }
- }
- Changed |= optimiseOffsets(GEP->getOperand(1), GEP->getParent(), LI);
- return Changed;
-}
-
+static Value *CheckAndCreateOffsetAdd(Value *X, Value *Y, Value *GEP,
+ IRBuilder<> &Builder) {
+ // Splat the non-vector value to a vector of the given type - if the value is
+ // a constant (and its value isn't too big), we can even use this opportunity
+ // to scale it to the size of the vector elements
+ auto FixSummands = [&Builder](FixedVectorType *&VT, Value *&NonVectorVal) {
+ ConstantInt *Const;
+ if ((Const = dyn_cast<ConstantInt>(NonVectorVal)) &&
+ VT->getElementType() != NonVectorVal->getType()) {
+ unsigned TargetElemSize = VT->getElementType()->getPrimitiveSizeInBits();
+ uint64_t N = Const->getZExtValue();
+ if (N < (unsigned)(1 << (TargetElemSize - 1))) {
+ NonVectorVal = Builder.CreateVectorSplat(
+ VT->getNumElements(), Builder.getIntN(TargetElemSize, N));
+ return;
+ }
+ }
+ NonVectorVal =
+ Builder.CreateVectorSplat(VT->getNumElements(), NonVectorVal);
+ };
+
+ FixedVectorType *XElType = dyn_cast<FixedVectorType>(X->getType());
+ FixedVectorType *YElType = dyn_cast<FixedVectorType>(Y->getType());
+ // If one of X, Y is not a vector, we have to splat it in order
+ // to add the two of them.
+ if (XElType && !YElType) {
+ FixSummands(XElType, Y);
+ YElType = cast<FixedVectorType>(Y->getType());
+ } else if (YElType && !XElType) {
+ FixSummands(YElType, X);
+ XElType = cast<FixedVectorType>(X->getType());
+ }
+ assert(XElType && YElType && "Unknown vector types");
+ // Check that the summands are of compatible types
+ if (XElType != YElType) {
+ LLVM_DEBUG(dbgs() << "masked gathers/scatters: incompatible gep offsets\n");
+ return nullptr;
+ }
+
+ if (XElType->getElementType()->getScalarSizeInBits() != 32) {
+ // Check that by adding the vectors we do not accidentally
+ // create an overflow
+ Constant *ConstX = dyn_cast<Constant>(X);
+ Constant *ConstY = dyn_cast<Constant>(Y);
+ if (!ConstX || !ConstY)
+ return nullptr;
+ unsigned TargetElemSize = 128 / XElType->getNumElements();
+ for (unsigned i = 0; i < XElType->getNumElements(); i++) {
+ ConstantInt *ConstXEl =
+ dyn_cast<ConstantInt>(ConstX->getAggregateElement(i));
+ ConstantInt *ConstYEl =
+ dyn_cast<ConstantInt>(ConstY->getAggregateElement(i));
+ if (!ConstXEl || !ConstYEl ||
+ ConstXEl->getZExtValue() + ConstYEl->getZExtValue() >=
+ (unsigned)(1 << (TargetElemSize - 1)))
+ return nullptr;
+ }
+ }
+
+ Value *Add = Builder.CreateAdd(X, Y);
+
+ FixedVectorType *GEPType = cast<FixedVectorType>(GEP->getType());
+ if (checkOffsetSize(Add, GEPType->getNumElements()))
+ return Add;
+ else
+ return nullptr;
+}
+
+Value *MVEGatherScatterLowering::foldGEP(GetElementPtrInst *GEP,
+ Value *&Offsets,
+ IRBuilder<> &Builder) {
+ Value *GEPPtr = GEP->getPointerOperand();
+ Offsets = GEP->getOperand(1);
+ // We only merge geps with constant offsets, because only for those
+ // we can make sure that we do not cause an overflow
+ if (!isa<Constant>(Offsets))
+ return nullptr;
+ GetElementPtrInst *BaseGEP;
+ if ((BaseGEP = dyn_cast<GetElementPtrInst>(GEPPtr))) {
+ // Merge the two geps into one
+ Value *BaseBasePtr = foldGEP(BaseGEP, Offsets, Builder);
+ if (!BaseBasePtr)
+ return nullptr;
+ Offsets =
+ CheckAndCreateOffsetAdd(Offsets, GEP->getOperand(1), GEP, Builder);
+ if (Offsets == nullptr)
+ return nullptr;
+ return BaseBasePtr;
+ }
+ return GEPPtr;
+}
+
+bool MVEGatherScatterLowering::optimiseAddress(Value *Address, BasicBlock *BB,
+ LoopInfo *LI) {
+ GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Address);
+ if (!GEP)
+ return false;
+ bool Changed = false;
+ if (GEP->hasOneUse() &&
+ dyn_cast<GetElementPtrInst>(GEP->getPointerOperand())) {
+ IRBuilder<> Builder(GEP->getContext());
+ Builder.SetInsertPoint(GEP);
+ Builder.SetCurrentDebugLocation(GEP->getDebugLoc());
+ Value *Offsets;
+ Value *Base = foldGEP(GEP, Offsets, Builder);
+ // We only want to merge the geps if there is a real chance that they can be
+ // used by an MVE gather; thus the offset has to have the correct size
+ // (always i32 if it is not of vector type) and the base has to be a
+ // pointer.
+ if (Offsets && Base && Base != GEP) {
+ PointerType *BaseType = cast<PointerType>(Base->getType());
+ GetElementPtrInst *NewAddress = GetElementPtrInst::Create(
+ BaseType->getPointerElementType(), Base, Offsets, "gep.merged", GEP);
+ GEP->replaceAllUsesWith(NewAddress);
+ GEP = NewAddress;
+ Changed = true;
+ }
+ }
+ Changed |= optimiseOffsets(GEP->getOperand(1), GEP->getParent(), LI);
+ return Changed;
+}
+
bool MVEGatherScatterLowering::runOnFunction(Function &F) {
if (!EnableMaskedGatherScatters)
return false;
@@ -1166,18 +1166,18 @@ bool MVEGatherScatterLowering::runOnFunction(Function &F) {
bool Changed = false;
for (BasicBlock &BB : F) {
- Changed |= SimplifyInstructionsInBlock(&BB);
-
+ Changed |= SimplifyInstructionsInBlock(&BB);
+
for (Instruction &I : BB) {
IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I);
- if (II && II->getIntrinsicID() == Intrinsic::masked_gather &&
- isa<FixedVectorType>(II->getType())) {
+ if (II && II->getIntrinsicID() == Intrinsic::masked_gather &&
+ isa<FixedVectorType>(II->getType())) {
Gathers.push_back(II);
- Changed |= optimiseAddress(II->getArgOperand(0), II->getParent(), LI);
- } else if (II && II->getIntrinsicID() == Intrinsic::masked_scatter &&
- isa<FixedVectorType>(II->getArgOperand(0)->getType())) {
+ Changed |= optimiseAddress(II->getArgOperand(0), II->getParent(), LI);
+ } else if (II && II->getIntrinsicID() == Intrinsic::masked_scatter &&
+ isa<FixedVectorType>(II->getArgOperand(0)->getType())) {
Scatters.push_back(II);
- Changed |= optimiseAddress(II->getArgOperand(1), II->getParent(), LI);
+ Changed |= optimiseAddress(II->getArgOperand(1), II->getParent(), LI);
}
}
}
diff --git a/contrib/libs/llvm12/lib/Target/ARM/MVETailPredUtils.h b/contrib/libs/llvm12/lib/Target/ARM/MVETailPredUtils.h
index 9ab5d92729..1bb23cc725 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/MVETailPredUtils.h
+++ b/contrib/libs/llvm12/lib/Target/ARM/MVETailPredUtils.h
@@ -1,157 +1,157 @@
-//===-- MVETailPredUtils.h - Tail predication utility functions -*- C++-*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains utility functions for low overhead and tail predicated
-// loops, shared between the ARMLowOverheadLoops pass and anywhere else that
-// needs them.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_ARM_MVETAILPREDUTILS_H
-#define LLVM_LIB_TARGET_ARM_MVETAILPREDUTILS_H
-
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/CodeGen/TargetInstrInfo.h"
-
-namespace llvm {
-
-static inline unsigned VCTPOpcodeToLSTP(unsigned Opcode, bool IsDoLoop) {
- switch (Opcode) {
- default:
- llvm_unreachable("unhandled vctp opcode");
- break;
- case ARM::MVE_VCTP8:
- return IsDoLoop ? ARM::MVE_DLSTP_8 : ARM::MVE_WLSTP_8;
- case ARM::MVE_VCTP16:
- return IsDoLoop ? ARM::MVE_DLSTP_16 : ARM::MVE_WLSTP_16;
- case ARM::MVE_VCTP32:
- return IsDoLoop ? ARM::MVE_DLSTP_32 : ARM::MVE_WLSTP_32;
- case ARM::MVE_VCTP64:
- return IsDoLoop ? ARM::MVE_DLSTP_64 : ARM::MVE_WLSTP_64;
- }
- return 0;
-}
-
-static inline unsigned getTailPredVectorWidth(unsigned Opcode) {
- switch (Opcode) {
- default:
- llvm_unreachable("unhandled vctp opcode");
- case ARM::MVE_VCTP8:
- return 16;
- case ARM::MVE_VCTP16:
- return 8;
- case ARM::MVE_VCTP32:
- return 4;
- case ARM::MVE_VCTP64:
- return 2;
- }
- return 0;
-}
-
-static inline bool isVCTP(const MachineInstr *MI) {
- switch (MI->getOpcode()) {
- default:
- break;
- case ARM::MVE_VCTP8:
- case ARM::MVE_VCTP16:
- case ARM::MVE_VCTP32:
- case ARM::MVE_VCTP64:
- return true;
- }
- return false;
-}
-
-static inline bool isLoopStart(MachineInstr &MI) {
- return MI.getOpcode() == ARM::t2DoLoopStart ||
- MI.getOpcode() == ARM::t2DoLoopStartTP ||
- MI.getOpcode() == ARM::t2WhileLoopStart;
-}
-
-// WhileLoopStart holds the exit block, so produce a cmp lr, 0 and then a
-// beq that branches to the exit branch.
-inline void RevertWhileLoopStart(MachineInstr *MI, const TargetInstrInfo *TII,
- unsigned BrOpc = ARM::t2Bcc) {
- MachineBasicBlock *MBB = MI->getParent();
-
- // Cmp
- MachineInstrBuilder MIB =
- BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2CMPri));
- MIB.add(MI->getOperand(0));
- MIB.addImm(0);
- MIB.addImm(ARMCC::AL);
- MIB.addReg(ARM::NoRegister);
-
- // Branch
- MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(BrOpc));
- MIB.add(MI->getOperand(1)); // branch target
- MIB.addImm(ARMCC::EQ); // condition code
- MIB.addReg(ARM::CPSR);
-
- MI->eraseFromParent();
-}
-
-inline void RevertDoLoopStart(MachineInstr *MI, const TargetInstrInfo *TII) {
- MachineBasicBlock *MBB = MI->getParent();
- BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::tMOVr))
- .add(MI->getOperand(0))
- .add(MI->getOperand(1))
- .add(predOps(ARMCC::AL));
-
- MI->eraseFromParent();
-}
-
-inline void RevertLoopDec(MachineInstr *MI, const TargetInstrInfo *TII,
- bool SetFlags = false) {
- MachineBasicBlock *MBB = MI->getParent();
-
- MachineInstrBuilder MIB =
- BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2SUBri));
- MIB.add(MI->getOperand(0));
- MIB.add(MI->getOperand(1));
- MIB.add(MI->getOperand(2));
- MIB.addImm(ARMCC::AL);
- MIB.addReg(0);
-
- if (SetFlags) {
- MIB.addReg(ARM::CPSR);
- MIB->getOperand(5).setIsDef(true);
- } else
- MIB.addReg(0);
-
- MI->eraseFromParent();
-}
-
-// Generate a subs, or sub and cmp, and a branch instead of an LE.
-inline void RevertLoopEnd(MachineInstr *MI, const TargetInstrInfo *TII,
- unsigned BrOpc = ARM::t2Bcc, bool SkipCmp = false) {
- MachineBasicBlock *MBB = MI->getParent();
-
- // Create cmp
- if (!SkipCmp) {
- MachineInstrBuilder MIB =
- BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2CMPri));
- MIB.add(MI->getOperand(0));
- MIB.addImm(0);
- MIB.addImm(ARMCC::AL);
- MIB.addReg(ARM::NoRegister);
- }
-
- // Create bne
- MachineInstrBuilder MIB =
- BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(BrOpc));
- MIB.add(MI->getOperand(1)); // branch target
- MIB.addImm(ARMCC::NE); // condition code
- MIB.addReg(ARM::CPSR);
- MI->eraseFromParent();
-}
-
-} // end namespace llvm
-
-#endif // LLVM_LIB_TARGET_ARM_MVETAILPREDUTILS_H
+//===-- MVETailPredUtils.h - Tail predication utility functions -*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains utility functions for low overhead and tail predicated
+// loops, shared between the ARMLowOverheadLoops pass and anywhere else that
+// needs them.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_MVETAILPREDUTILS_H
+#define LLVM_LIB_TARGET_ARM_MVETAILPREDUTILS_H
+
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+
+namespace llvm {
+
+static inline unsigned VCTPOpcodeToLSTP(unsigned Opcode, bool IsDoLoop) {
+ switch (Opcode) {
+ default:
+ llvm_unreachable("unhandled vctp opcode");
+ break;
+ case ARM::MVE_VCTP8:
+ return IsDoLoop ? ARM::MVE_DLSTP_8 : ARM::MVE_WLSTP_8;
+ case ARM::MVE_VCTP16:
+ return IsDoLoop ? ARM::MVE_DLSTP_16 : ARM::MVE_WLSTP_16;
+ case ARM::MVE_VCTP32:
+ return IsDoLoop ? ARM::MVE_DLSTP_32 : ARM::MVE_WLSTP_32;
+ case ARM::MVE_VCTP64:
+ return IsDoLoop ? ARM::MVE_DLSTP_64 : ARM::MVE_WLSTP_64;
+ }
+ return 0;
+}
+
+static inline unsigned getTailPredVectorWidth(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ llvm_unreachable("unhandled vctp opcode");
+ case ARM::MVE_VCTP8:
+ return 16;
+ case ARM::MVE_VCTP16:
+ return 8;
+ case ARM::MVE_VCTP32:
+ return 4;
+ case ARM::MVE_VCTP64:
+ return 2;
+ }
+ return 0;
+}
+
+static inline bool isVCTP(const MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ default:
+ break;
+ case ARM::MVE_VCTP8:
+ case ARM::MVE_VCTP16:
+ case ARM::MVE_VCTP32:
+ case ARM::MVE_VCTP64:
+ return true;
+ }
+ return false;
+}
+
+static inline bool isLoopStart(MachineInstr &MI) {
+ return MI.getOpcode() == ARM::t2DoLoopStart ||
+ MI.getOpcode() == ARM::t2DoLoopStartTP ||
+ MI.getOpcode() == ARM::t2WhileLoopStart;
+}
+
+// WhileLoopStart holds the exit block, so produce a cmp lr, 0 and then a
+// beq that branches to the exit branch.
+inline void RevertWhileLoopStart(MachineInstr *MI, const TargetInstrInfo *TII,
+ unsigned BrOpc = ARM::t2Bcc) {
+ MachineBasicBlock *MBB = MI->getParent();
+
+ // Cmp
+ MachineInstrBuilder MIB =
+ BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2CMPri));
+ MIB.add(MI->getOperand(0));
+ MIB.addImm(0);
+ MIB.addImm(ARMCC::AL);
+ MIB.addReg(ARM::NoRegister);
+
+ // Branch
+ MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(BrOpc));
+ MIB.add(MI->getOperand(1)); // branch target
+ MIB.addImm(ARMCC::EQ); // condition code
+ MIB.addReg(ARM::CPSR);
+
+ MI->eraseFromParent();
+}
+
+inline void RevertDoLoopStart(MachineInstr *MI, const TargetInstrInfo *TII) {
+ MachineBasicBlock *MBB = MI->getParent();
+ BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::tMOVr))
+ .add(MI->getOperand(0))
+ .add(MI->getOperand(1))
+ .add(predOps(ARMCC::AL));
+
+ MI->eraseFromParent();
+}
+
+inline void RevertLoopDec(MachineInstr *MI, const TargetInstrInfo *TII,
+ bool SetFlags = false) {
+ MachineBasicBlock *MBB = MI->getParent();
+
+ MachineInstrBuilder MIB =
+ BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2SUBri));
+ MIB.add(MI->getOperand(0));
+ MIB.add(MI->getOperand(1));
+ MIB.add(MI->getOperand(2));
+ MIB.addImm(ARMCC::AL);
+ MIB.addReg(0);
+
+ if (SetFlags) {
+ MIB.addReg(ARM::CPSR);
+ MIB->getOperand(5).setIsDef(true);
+ } else
+ MIB.addReg(0);
+
+ MI->eraseFromParent();
+}
+
+// Generate a subs, or sub and cmp, and a branch instead of an LE.
+inline void RevertLoopEnd(MachineInstr *MI, const TargetInstrInfo *TII,
+ unsigned BrOpc = ARM::t2Bcc, bool SkipCmp = false) {
+ MachineBasicBlock *MBB = MI->getParent();
+
+ // Create cmp
+ if (!SkipCmp) {
+ MachineInstrBuilder MIB =
+ BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2CMPri));
+ MIB.add(MI->getOperand(0));
+ MIB.addImm(0);
+ MIB.addImm(ARMCC::AL);
+ MIB.addReg(ARM::NoRegister);
+ }
+
+ // Create bne
+ MachineInstrBuilder MIB =
+ BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(BrOpc));
+ MIB.add(MI->getOperand(1)); // branch target
+ MIB.addImm(ARMCC::NE); // condition code
+ MIB.addReg(ARM::CPSR);
+ MI->eraseFromParent();
+}
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_ARM_MVETAILPREDUTILS_H
diff --git a/contrib/libs/llvm12/lib/Target/ARM/MVETailPredication.cpp b/contrib/libs/llvm12/lib/Target/ARM/MVETailPredication.cpp
index cccac55952..94e71f1d60 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/MVETailPredication.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/MVETailPredication.cpp
@@ -22,13 +22,13 @@
/// The HardwareLoops pass inserts intrinsics identifying loops that the
/// backend will attempt to convert into a low-overhead loop. The vectorizer is
/// responsible for generating a vectorized loop in which the lanes are
-/// predicated upon an get.active.lane.mask intrinsic. This pass looks at these
-/// get.active.lane.mask intrinsic and attempts to convert them to VCTP
-/// instructions. This will be picked up by the ARM Low-overhead loop pass later
-/// in the backend, which performs the final transformation to a DLSTP or WLSTP
-/// tail-predicated loop.
-//
-//===----------------------------------------------------------------------===//
+/// predicated upon an get.active.lane.mask intrinsic. This pass looks at these
+/// get.active.lane.mask intrinsic and attempts to convert them to VCTP
+/// instructions. This will be picked up by the ARM Low-overhead loop pass later
+/// in the backend, which performs the final transformation to a DLSTP or WLSTP
+/// tail-predicated loop.
+//
+//===----------------------------------------------------------------------===//
#include "ARM.h"
#include "ARMSubtarget.h"
@@ -47,7 +47,7 @@
#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
@@ -57,8 +57,8 @@ using namespace llvm;
#define DESC "Transform predicated vector loops to use MVE tail predication"
cl::opt<TailPredication::Mode> EnableTailPredication(
- "tail-predication", cl::desc("MVE tail-predication pass options"),
- cl::init(TailPredication::Enabled),
+ "tail-predication", cl::desc("MVE tail-predication pass options"),
+ cl::init(TailPredication::Enabled),
cl::values(clEnumValN(TailPredication::Disabled, "disabled",
"Don't tail-predicate loops"),
clEnumValN(TailPredication::EnabledNoReductions,
@@ -103,18 +103,18 @@ public:
bool runOnLoop(Loop *L, LPPassManager&) override;
private:
- /// Perform the relevant checks on the loop and convert active lane masks if
- /// possible.
- bool TryConvertActiveLaneMask(Value *TripCount);
+ /// Perform the relevant checks on the loop and convert active lane masks if
+ /// possible.
+ bool TryConvertActiveLaneMask(Value *TripCount);
- /// Perform several checks on the arguments of @llvm.get.active.lane.mask
- /// intrinsic. E.g., check that the loop induction variable and the element
- /// count are of the form we expect, and also perform overflow checks for
- /// the new expressions that are created.
- bool IsSafeActiveMask(IntrinsicInst *ActiveLaneMask, Value *TripCount);
+ /// Perform several checks on the arguments of @llvm.get.active.lane.mask
+ /// intrinsic. E.g., check that the loop induction variable and the element
+ /// count are of the form we expect, and also perform overflow checks for
+ /// the new expressions that are created.
+ bool IsSafeActiveMask(IntrinsicInst *ActiveLaneMask, Value *TripCount);
/// Insert the intrinsic to represent the effect of tail predication.
- void InsertVCTPIntrinsic(IntrinsicInst *ActiveLaneMask, Value *TripCount);
+ void InsertVCTPIntrinsic(IntrinsicInst *ActiveLaneMask, Value *TripCount);
/// Rematerialize the iteration count in exit blocks, which enables
/// ARMLowOverheadLoops to better optimise away loop update statements inside
@@ -155,7 +155,7 @@ bool MVETailPredication::runOnLoop(Loop *L, LPPassManager&) {
continue;
Intrinsic::ID ID = Call->getIntrinsicID();
- if (ID == Intrinsic::start_loop_iterations ||
+ if (ID == Intrinsic::start_loop_iterations ||
ID == Intrinsic::test_set_loop_iterations)
return cast<IntrinsicInst>(&I);
}
@@ -174,23 +174,23 @@ bool MVETailPredication::runOnLoop(Loop *L, LPPassManager&) {
return false;
}
- LLVM_DEBUG(dbgs() << "ARM TP: Running on Loop: " << *L << *Setup << "\n");
+ LLVM_DEBUG(dbgs() << "ARM TP: Running on Loop: " << *L << *Setup << "\n");
- bool Changed = TryConvertActiveLaneMask(Setup->getArgOperand(0));
+ bool Changed = TryConvertActiveLaneMask(Setup->getArgOperand(0));
- return Changed;
+ return Changed;
}
// The active lane intrinsic has this form:
//
-// @llvm.get.active.lane.mask(IV, TC)
+// @llvm.get.active.lane.mask(IV, TC)
//
// Here we perform checks that this intrinsic behaves as expected,
// which means:
//
-// 1) Check that the TripCount (TC) belongs to this loop (originally).
-// 2) The element count (TC) needs to be sufficiently large that the decrement
-// of element counter doesn't overflow, which means that we need to prove:
+// 1) Check that the TripCount (TC) belongs to this loop (originally).
+// 2) The element count (TC) needs to be sufficiently large that the decrement
+// of element counter doesn't overflow, which means that we need to prove:
// ceil(ElementCount / VectorWidth) >= TripCount
// by rounding up ElementCount up:
// ((ElementCount + (VectorWidth - 1)) / VectorWidth
@@ -199,122 +199,122 @@ bool MVETailPredication::runOnLoop(Loop *L, LPPassManager&) {
// 3) The IV must be an induction phi with an increment equal to the
// vector width.
bool MVETailPredication::IsSafeActiveMask(IntrinsicInst *ActiveLaneMask,
- Value *TripCount) {
+ Value *TripCount) {
bool ForceTailPredication =
EnableTailPredication == TailPredication::ForceEnabledNoReductions ||
EnableTailPredication == TailPredication::ForceEnabled;
- Value *ElemCount = ActiveLaneMask->getOperand(1);
- bool Changed = false;
- if (!L->makeLoopInvariant(ElemCount, Changed))
- return false;
-
- auto *EC= SE->getSCEV(ElemCount);
- auto *TC = SE->getSCEV(TripCount);
- int VectorWidth =
- cast<FixedVectorType>(ActiveLaneMask->getType())->getNumElements();
- if (VectorWidth != 4 && VectorWidth != 8 && VectorWidth != 16)
- return false;
- ConstantInt *ConstElemCount = nullptr;
-
- // 1) Smoke tests that the original scalar loop TripCount (TC) belongs to
- // this loop. The scalar tripcount corresponds the number of elements
- // processed by the loop, so we will refer to that from this point on.
- if (!SE->isLoopInvariant(EC, L)) {
- LLVM_DEBUG(dbgs() << "ARM TP: element count must be loop invariant.\n");
+ Value *ElemCount = ActiveLaneMask->getOperand(1);
+ bool Changed = false;
+ if (!L->makeLoopInvariant(ElemCount, Changed))
+ return false;
+
+ auto *EC= SE->getSCEV(ElemCount);
+ auto *TC = SE->getSCEV(TripCount);
+ int VectorWidth =
+ cast<FixedVectorType>(ActiveLaneMask->getType())->getNumElements();
+ if (VectorWidth != 4 && VectorWidth != 8 && VectorWidth != 16)
+ return false;
+ ConstantInt *ConstElemCount = nullptr;
+
+ // 1) Smoke tests that the original scalar loop TripCount (TC) belongs to
+ // this loop. The scalar tripcount corresponds the number of elements
+ // processed by the loop, so we will refer to that from this point on.
+ if (!SE->isLoopInvariant(EC, L)) {
+ LLVM_DEBUG(dbgs() << "ARM TP: element count must be loop invariant.\n");
return false;
}
- if ((ConstElemCount = dyn_cast<ConstantInt>(ElemCount))) {
- ConstantInt *TC = dyn_cast<ConstantInt>(TripCount);
- if (!TC) {
- LLVM_DEBUG(dbgs() << "ARM TP: Constant tripcount expected in "
- "set.loop.iterations\n");
- return false;
- }
-
- // Calculate 2 tripcount values and check that they are consistent with
- // each other. The TripCount for a predicated vector loop body is
- // ceil(ElementCount/Width), or floor((ElementCount+Width-1)/Width) as we
- // work it out here.
- uint64_t TC1 = TC->getZExtValue();
- uint64_t TC2 =
- (ConstElemCount->getZExtValue() + VectorWidth - 1) / VectorWidth;
-
- // If the tripcount values are inconsistent, we can't insert the VCTP and
- // trigger tail-predication; keep the intrinsic as a get.active.lane.mask
- // and legalize this.
- if (TC1 != TC2) {
- LLVM_DEBUG(dbgs() << "ARM TP: inconsistent constant tripcount values: "
- << TC1 << " from set.loop.iterations, and "
- << TC2 << " from get.active.lane.mask\n");
- return false;
- }
- } else if (!ForceTailPredication) {
- // 2) We need to prove that the sub expression that we create in the
- // tail-predicated loop body, which calculates the remaining elements to be
- // processed, is non-negative, i.e. it doesn't overflow:
- //
- // ((ElementCount + VectorWidth - 1) / VectorWidth) - TripCount >= 0
- //
- // This is true if:
- //
- // TripCount == (ElementCount + VectorWidth - 1) / VectorWidth
- //
- // which what we will be using here.
- //
- auto *VW = SE->getSCEV(ConstantInt::get(TripCount->getType(), VectorWidth));
- // ElementCount + (VW-1):
- auto *ECPlusVWMinus1 = SE->getAddExpr(EC,
- SE->getSCEV(ConstantInt::get(TripCount->getType(), VectorWidth - 1)));
-
- // Ceil = ElementCount + (VW-1) / VW
- auto *Ceil = SE->getUDivExpr(ECPlusVWMinus1, VW);
-
- // Prevent unused variable warnings with TC
- (void)TC;
- LLVM_DEBUG(
- dbgs() << "ARM TP: Analysing overflow behaviour for:\n";
- dbgs() << "ARM TP: - TripCount = "; TC->dump();
- dbgs() << "ARM TP: - ElemCount = "; EC->dump();
- dbgs() << "ARM TP: - VecWidth = " << VectorWidth << "\n";
- dbgs() << "ARM TP: - (ElemCount+VW-1) / VW = "; Ceil->dump();
- );
-
- // As an example, almost all the tripcount expressions (produced by the
- // vectoriser) look like this:
- //
- // TC = ((-4 + (4 * ((3 + %N) /u 4))<nuw>) /u 4)
- //
- // and "ElementCount + (VW-1) / VW":
- //
- // Ceil = ((3 + %N) /u 4)
- //
- // Check for equality of TC and Ceil by calculating SCEV expression
- // TC - Ceil and test it for zero.
- //
- bool Zero = SE->getMinusSCEV(
- SE->getBackedgeTakenCount(L),
- SE->getUDivExpr(SE->getAddExpr(SE->getMulExpr(Ceil, VW),
- SE->getNegativeSCEV(VW)),
- VW))
- ->isZero();
-
- if (!Zero) {
- LLVM_DEBUG(dbgs() << "ARM TP: possible overflow in sub expression.\n");
- return false;
- }
+ if ((ConstElemCount = dyn_cast<ConstantInt>(ElemCount))) {
+ ConstantInt *TC = dyn_cast<ConstantInt>(TripCount);
+ if (!TC) {
+ LLVM_DEBUG(dbgs() << "ARM TP: Constant tripcount expected in "
+ "set.loop.iterations\n");
+ return false;
+ }
+
+ // Calculate 2 tripcount values and check that they are consistent with
+ // each other. The TripCount for a predicated vector loop body is
+ // ceil(ElementCount/Width), or floor((ElementCount+Width-1)/Width) as we
+ // work it out here.
+ uint64_t TC1 = TC->getZExtValue();
+ uint64_t TC2 =
+ (ConstElemCount->getZExtValue() + VectorWidth - 1) / VectorWidth;
+
+ // If the tripcount values are inconsistent, we can't insert the VCTP and
+ // trigger tail-predication; keep the intrinsic as a get.active.lane.mask
+ // and legalize this.
+ if (TC1 != TC2) {
+ LLVM_DEBUG(dbgs() << "ARM TP: inconsistent constant tripcount values: "
+ << TC1 << " from set.loop.iterations, and "
+ << TC2 << " from get.active.lane.mask\n");
+ return false;
+ }
+ } else if (!ForceTailPredication) {
+ // 2) We need to prove that the sub expression that we create in the
+ // tail-predicated loop body, which calculates the remaining elements to be
+ // processed, is non-negative, i.e. it doesn't overflow:
+ //
+ // ((ElementCount + VectorWidth - 1) / VectorWidth) - TripCount >= 0
+ //
+ // This is true if:
+ //
+ // TripCount == (ElementCount + VectorWidth - 1) / VectorWidth
+ //
+ // which what we will be using here.
+ //
+ auto *VW = SE->getSCEV(ConstantInt::get(TripCount->getType(), VectorWidth));
+ // ElementCount + (VW-1):
+ auto *ECPlusVWMinus1 = SE->getAddExpr(EC,
+ SE->getSCEV(ConstantInt::get(TripCount->getType(), VectorWidth - 1)));
+
+ // Ceil = ElementCount + (VW-1) / VW
+ auto *Ceil = SE->getUDivExpr(ECPlusVWMinus1, VW);
+
+ // Prevent unused variable warnings with TC
+ (void)TC;
+ LLVM_DEBUG(
+ dbgs() << "ARM TP: Analysing overflow behaviour for:\n";
+ dbgs() << "ARM TP: - TripCount = "; TC->dump();
+ dbgs() << "ARM TP: - ElemCount = "; EC->dump();
+ dbgs() << "ARM TP: - VecWidth = " << VectorWidth << "\n";
+ dbgs() << "ARM TP: - (ElemCount+VW-1) / VW = "; Ceil->dump();
+ );
+
+ // As an example, almost all the tripcount expressions (produced by the
+ // vectoriser) look like this:
+ //
+ // TC = ((-4 + (4 * ((3 + %N) /u 4))<nuw>) /u 4)
+ //
+ // and "ElementCount + (VW-1) / VW":
+ //
+ // Ceil = ((3 + %N) /u 4)
+ //
+ // Check for equality of TC and Ceil by calculating SCEV expression
+ // TC - Ceil and test it for zero.
+ //
+ bool Zero = SE->getMinusSCEV(
+ SE->getBackedgeTakenCount(L),
+ SE->getUDivExpr(SE->getAddExpr(SE->getMulExpr(Ceil, VW),
+ SE->getNegativeSCEV(VW)),
+ VW))
+ ->isZero();
+
+ if (!Zero) {
+ LLVM_DEBUG(dbgs() << "ARM TP: possible overflow in sub expression.\n");
+ return false;
+ }
}
- // 3) Find out if IV is an induction phi. Note that we can't use Loop
+ // 3) Find out if IV is an induction phi. Note that we can't use Loop
// helpers here to get the induction variable, because the hardware loop is
- // no longer in loopsimplify form, and also the hwloop intrinsic uses a
- // different counter. Using SCEV, we check that the induction is of the
+ // no longer in loopsimplify form, and also the hwloop intrinsic uses a
+ // different counter. Using SCEV, we check that the induction is of the
// form i = i + 4, where the increment must be equal to the VectorWidth.
auto *IV = ActiveLaneMask->getOperand(0);
auto *IVExpr = SE->getSCEV(IV);
auto *AddExpr = dyn_cast<SCEVAddRecExpr>(IVExpr);
-
+
if (!AddExpr) {
LLVM_DEBUG(dbgs() << "ARM TP: induction not an add expr: "; IVExpr->dump());
return false;
@@ -324,11 +324,11 @@ bool MVETailPredication::IsSafeActiveMask(IntrinsicInst *ActiveLaneMask,
LLVM_DEBUG(dbgs() << "ARM TP: phi not part of this loop\n");
return false;
}
- auto *Base = dyn_cast<SCEVConstant>(AddExpr->getOperand(0));
- if (!Base || !Base->isZero()) {
- LLVM_DEBUG(dbgs() << "ARM TP: induction base is not 0\n");
- return false;
- }
+ auto *Base = dyn_cast<SCEVConstant>(AddExpr->getOperand(0));
+ if (!Base || !Base->isZero()) {
+ LLVM_DEBUG(dbgs() << "ARM TP: induction base is not 0\n");
+ return false;
+ }
auto *Step = dyn_cast<SCEVConstant>(AddExpr->getOperand(1));
if (!Step) {
LLVM_DEBUG(dbgs() << "ARM TP: induction step is not a constant: ";
@@ -339,29 +339,29 @@ bool MVETailPredication::IsSafeActiveMask(IntrinsicInst *ActiveLaneMask,
if (VectorWidth == StepValue)
return true;
- LLVM_DEBUG(dbgs() << "ARM TP: Step value " << StepValue
- << " doesn't match vector width " << VectorWidth << "\n");
+ LLVM_DEBUG(dbgs() << "ARM TP: Step value " << StepValue
+ << " doesn't match vector width " << VectorWidth << "\n");
return false;
}
void MVETailPredication::InsertVCTPIntrinsic(IntrinsicInst *ActiveLaneMask,
- Value *TripCount) {
+ Value *TripCount) {
IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
Module *M = L->getHeader()->getModule();
Type *Ty = IntegerType::get(M->getContext(), 32);
- unsigned VectorWidth =
- cast<FixedVectorType>(ActiveLaneMask->getType())->getNumElements();
+ unsigned VectorWidth =
+ cast<FixedVectorType>(ActiveLaneMask->getType())->getNumElements();
// Insert a phi to count the number of elements processed by the loop.
- Builder.SetInsertPoint(L->getHeader()->getFirstNonPHI());
+ Builder.SetInsertPoint(L->getHeader()->getFirstNonPHI());
PHINode *Processed = Builder.CreatePHI(Ty, 2);
- Processed->addIncoming(ActiveLaneMask->getOperand(1), L->getLoopPreheader());
+ Processed->addIncoming(ActiveLaneMask->getOperand(1), L->getLoopPreheader());
- // Replace @llvm.get.active.mask() with the ARM specific VCTP intrinic, and
- // thus represent the effect of tail predication.
+ // Replace @llvm.get.active.mask() with the ARM specific VCTP intrinic, and
+ // thus represent the effect of tail predication.
Builder.SetInsertPoint(ActiveLaneMask);
- ConstantInt *Factor = ConstantInt::get(cast<IntegerType>(Ty), VectorWidth);
+ ConstantInt *Factor = ConstantInt::get(cast<IntegerType>(Ty), VectorWidth);
Intrinsic::ID VCTPID;
switch (VectorWidth) {
@@ -390,36 +390,36 @@ void MVETailPredication::InsertVCTPIntrinsic(IntrinsicInst *ActiveLaneMask,
<< "ARM TP: Inserted VCTP: " << *VCTPCall << "\n");
}
-bool MVETailPredication::TryConvertActiveLaneMask(Value *TripCount) {
- SmallVector<IntrinsicInst *, 4> ActiveLaneMasks;
- for (auto *BB : L->getBlocks())
- for (auto &I : *BB)
- if (auto *Int = dyn_cast<IntrinsicInst>(&I))
- if (Int->getIntrinsicID() == Intrinsic::get_active_lane_mask)
- ActiveLaneMasks.push_back(Int);
-
- if (ActiveLaneMasks.empty())
+bool MVETailPredication::TryConvertActiveLaneMask(Value *TripCount) {
+ SmallVector<IntrinsicInst *, 4> ActiveLaneMasks;
+ for (auto *BB : L->getBlocks())
+ for (auto &I : *BB)
+ if (auto *Int = dyn_cast<IntrinsicInst>(&I))
+ if (Int->getIntrinsicID() == Intrinsic::get_active_lane_mask)
+ ActiveLaneMasks.push_back(Int);
+
+ if (ActiveLaneMasks.empty())
return false;
LLVM_DEBUG(dbgs() << "ARM TP: Found predicated vector loop.\n");
- for (auto *ActiveLaneMask : ActiveLaneMasks) {
+ for (auto *ActiveLaneMask : ActiveLaneMasks) {
LLVM_DEBUG(dbgs() << "ARM TP: Found active lane mask: "
<< *ActiveLaneMask << "\n");
- if (!IsSafeActiveMask(ActiveLaneMask, TripCount)) {
+ if (!IsSafeActiveMask(ActiveLaneMask, TripCount)) {
LLVM_DEBUG(dbgs() << "ARM TP: Not safe to insert VCTP.\n");
return false;
}
LLVM_DEBUG(dbgs() << "ARM TP: Safe to insert VCTP.\n");
- InsertVCTPIntrinsic(ActiveLaneMask, TripCount);
+ InsertVCTPIntrinsic(ActiveLaneMask, TripCount);
}
- // Remove dead instructions and now dead phis.
- for (auto *II : ActiveLaneMasks)
- RecursivelyDeleteTriviallyDeadInstructions(II);
- for (auto I : L->blocks())
- DeleteDeadPHIs(I);
+ // Remove dead instructions and now dead phis.
+ for (auto *II : ActiveLaneMasks)
+ RecursivelyDeleteTriviallyDeadInstructions(II);
+ for (auto I : L->blocks())
+ DeleteDeadPHIs(I);
return true;
}
diff --git a/contrib/libs/llvm12/lib/Target/ARM/MVEVPTBlockPass.cpp b/contrib/libs/llvm12/lib/Target/ARM/MVEVPTBlockPass.cpp
index c7f451cba1..89183c16ac 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/MVEVPTBlockPass.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/MVEVPTBlockPass.cpp
@@ -107,12 +107,12 @@ static bool StepOverPredicatedInstrs(MachineBasicBlock::instr_iterator &Iter,
NumInstrsSteppedOver = 0;
while (Iter != EndIter) {
- if (Iter->isDebugInstr()) {
- // Skip debug instructions
- ++Iter;
- continue;
- }
-
+ if (Iter->isDebugInstr()) {
+ // Skip debug instructions
+ ++Iter;
+ continue;
+ }
+
NextPred = getVPTInstrPredicate(*Iter, PredReg);
assert(NextPred != ARMVCC::Else &&
"VPT block pass does not expect Else preds");
@@ -176,8 +176,8 @@ CreateVPTBlock(MachineBasicBlock::instr_iterator &Iter,
LLVM_DEBUG(for (MachineBasicBlock::instr_iterator AddedInstIter =
std::next(BlockBeg);
AddedInstIter != Iter; ++AddedInstIter) {
- if (AddedInstIter->isDebugInstr())
- continue;
+ if (AddedInstIter->isDebugInstr())
+ continue;
dbgs() << " adding: ";
AddedInstIter->dump();
});
@@ -205,7 +205,7 @@ CreateVPTBlock(MachineBasicBlock::instr_iterator &Iter,
if (!IsVPRDefinedOrKilledByBlock(Iter, VPNOTBlockEndIter))
break;
- LLVM_DEBUG(dbgs() << " removing VPNOT: "; Iter->dump());
+ LLVM_DEBUG(dbgs() << " removing VPNOT: "; Iter->dump());
// Record the new size of the block
BlockSize += ElseInstCnt;
@@ -219,9 +219,9 @@ CreateVPTBlock(MachineBasicBlock::instr_iterator &Iter,
// Note that we are using "Iter" to iterate over the block so we can update
// it at the same time.
for (; Iter != VPNOTBlockEndIter; ++Iter) {
- if (Iter->isDebugInstr())
- continue;
-
+ if (Iter->isDebugInstr())
+ continue;
+
// Find the register in which the predicate is
int OpIdx = findFirstVPTPredOperandIdx(*Iter);
assert(OpIdx != -1);
@@ -281,27 +281,27 @@ bool MVEVPTBlock::InsertVPTBlocks(MachineBasicBlock &Block) {
MIBuilder.add(VCMP->getOperand(1));
MIBuilder.add(VCMP->getOperand(2));
MIBuilder.add(VCMP->getOperand(3));
-
- // We need to remove any kill flags between the original VCMP and the new
- // insertion point.
- for (MachineInstr &MII :
- make_range(VCMP->getIterator(), MI->getIterator())) {
- MII.clearRegisterKills(VCMP->getOperand(1).getReg(), TRI);
- MII.clearRegisterKills(VCMP->getOperand(2).getReg(), TRI);
- }
-
+
+ // We need to remove any kill flags between the original VCMP and the new
+ // insertion point.
+ for (MachineInstr &MII :
+ make_range(VCMP->getIterator(), MI->getIterator())) {
+ MII.clearRegisterKills(VCMP->getOperand(1).getReg(), TRI);
+ MII.clearRegisterKills(VCMP->getOperand(2).getReg(), TRI);
+ }
+
VCMP->eraseFromParent();
} else {
MIBuilder = BuildMI(Block, MI, DL, TII->get(ARM::MVE_VPST));
MIBuilder.addImm((uint64_t)BlockMask);
}
- // Erase all dead instructions (VPNOT's). Do that now so that they do not
- // mess with the bundle creation.
- for (MachineInstr *DeadMI : DeadInstructions)
- DeadMI->eraseFromParent();
- DeadInstructions.clear();
-
+ // Erase all dead instructions (VPNOT's). Do that now so that they do not
+ // mess with the bundle creation.
+ for (MachineInstr *DeadMI : DeadInstructions)
+ DeadMI->eraseFromParent();
+ DeadInstructions.clear();
+
finalizeBundle(
Block, MachineBasicBlock::instr_iterator(MIBuilder.getInstr()), MBIter);
diff --git a/contrib/libs/llvm12/lib/Target/ARM/MVEVPTOptimisationsPass.cpp b/contrib/libs/llvm12/lib/Target/ARM/MVEVPTOptimisationsPass.cpp
index 00e4449769..70fb8c5383 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/MVEVPTOptimisationsPass.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/MVEVPTOptimisationsPass.cpp
@@ -6,28 +6,28 @@
//
//===----------------------------------------------------------------------===//
//
-/// \file This pass does a few optimisations related to Tail predicated loops
-/// and MVE VPT blocks before register allocation is performed. For VPT blocks
-/// the goal is to maximize the sizes of the blocks that will be created by the
-/// MVE VPT Block Insertion pass (which runs after register allocation). For
-/// tail predicated loops we transform the loop into something that will
-/// hopefully make the backend ARMLowOverheadLoops pass's job easier.
-///
+/// \file This pass does a few optimisations related to Tail predicated loops
+/// and MVE VPT blocks before register allocation is performed. For VPT blocks
+/// the goal is to maximize the sizes of the blocks that will be created by the
+/// MVE VPT Block Insertion pass (which runs after register allocation). For
+/// tail predicated loops we transform the loop into something that will
+/// hopefully make the backend ARMLowOverheadLoops pass's job easier.
+///
//===----------------------------------------------------------------------===//
#include "ARM.h"
#include "ARMSubtarget.h"
#include "MCTargetDesc/ARMBaseInfo.h"
-#include "MVETailPredUtils.h"
+#include "MVETailPredUtils.h"
#include "Thumb2InstrInfo.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/InitializePasses.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include <cassert>
@@ -35,11 +35,11 @@ using namespace llvm;
#define DEBUG_TYPE "arm-mve-vpt-opts"
-static cl::opt<bool>
-MergeEndDec("arm-enable-merge-loopenddec", cl::Hidden,
- cl::desc("Enable merging Loop End and Dec instructions."),
- cl::init(true));
-
+static cl::opt<bool>
+MergeEndDec("arm-enable-merge-loopenddec", cl::Hidden,
+ cl::desc("Enable merging Loop End and Dec instructions."),
+ cl::init(true));
+
namespace {
class MVEVPTOptimisations : public MachineFunctionPass {
public:
@@ -51,315 +51,315 @@ public:
bool runOnMachineFunction(MachineFunction &Fn) override;
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<MachineLoopInfo>();
- AU.addPreserved<MachineLoopInfo>();
- AU.addRequired<MachineDominatorTree>();
- AU.addPreserved<MachineDominatorTree>();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
StringRef getPassName() const override {
- return "ARM MVE TailPred and VPT Optimisation Pass";
+ return "ARM MVE TailPred and VPT Optimisation Pass";
}
private:
- bool MergeLoopEnd(MachineLoop *ML);
- bool ConvertTailPredLoop(MachineLoop *ML, MachineDominatorTree *DT);
+ bool MergeLoopEnd(MachineLoop *ML);
+ bool ConvertTailPredLoop(MachineLoop *ML, MachineDominatorTree *DT);
MachineInstr &ReplaceRegisterUseWithVPNOT(MachineBasicBlock &MBB,
MachineInstr &Instr,
MachineOperand &User,
Register Target);
bool ReduceOldVCCRValueUses(MachineBasicBlock &MBB);
bool ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB);
- bool ReplaceConstByVPNOTs(MachineBasicBlock &MBB, MachineDominatorTree *DT);
- bool ConvertVPSEL(MachineBasicBlock &MBB);
+ bool ReplaceConstByVPNOTs(MachineBasicBlock &MBB, MachineDominatorTree *DT);
+ bool ConvertVPSEL(MachineBasicBlock &MBB);
};
char MVEVPTOptimisations::ID = 0;
} // end anonymous namespace
-INITIALIZE_PASS_BEGIN(MVEVPTOptimisations, DEBUG_TYPE,
- "ARM MVE TailPred and VPT Optimisations pass", false,
- false)
-INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
-INITIALIZE_PASS_END(MVEVPTOptimisations, DEBUG_TYPE,
- "ARM MVE TailPred and VPT Optimisations pass", false, false)
-
-static MachineInstr *LookThroughCOPY(MachineInstr *MI,
- MachineRegisterInfo *MRI) {
- while (MI && MI->getOpcode() == TargetOpcode::COPY &&
- MI->getOperand(1).getReg().isVirtual())
- MI = MRI->getVRegDef(MI->getOperand(1).getReg());
- return MI;
-}
-
-// Given a loop ML, this attempts to find the t2LoopEnd, t2LoopDec and
-// corresponding PHI that make up a low overhead loop. Only handles 'do' loops
-// at the moment, returning a t2DoLoopStart in LoopStart.
-static bool findLoopComponents(MachineLoop *ML, MachineRegisterInfo *MRI,
- MachineInstr *&LoopStart, MachineInstr *&LoopPhi,
- MachineInstr *&LoopDec, MachineInstr *&LoopEnd) {
- MachineBasicBlock *Header = ML->getHeader();
- MachineBasicBlock *Latch = ML->getLoopLatch();
- if (!Header || !Latch) {
- LLVM_DEBUG(dbgs() << " no Loop Latch or Header\n");
- return false;
- }
-
- // Find the loop end from the terminators.
- LoopEnd = nullptr;
- for (auto &T : Latch->terminators()) {
- if (T.getOpcode() == ARM::t2LoopEnd && T.getOperand(1).getMBB() == Header) {
- LoopEnd = &T;
- break;
- }
- if (T.getOpcode() == ARM::t2LoopEndDec &&
- T.getOperand(2).getMBB() == Header) {
- LoopEnd = &T;
- break;
- }
- }
- if (!LoopEnd) {
- LLVM_DEBUG(dbgs() << " no LoopEnd\n");
- return false;
- }
- LLVM_DEBUG(dbgs() << " found loop end: " << *LoopEnd);
-
- // Find the dec from the use of the end. There may be copies between
- // instructions. We expect the loop to loop like:
- // $vs = t2DoLoopStart ...
- // loop:
- // $vp = phi [ $vs ], [ $vd ]
- // ...
- // $vd = t2LoopDec $vp
- // ...
- // t2LoopEnd $vd, loop
- if (LoopEnd->getOpcode() == ARM::t2LoopEndDec)
- LoopDec = LoopEnd;
- else {
- LoopDec =
- LookThroughCOPY(MRI->getVRegDef(LoopEnd->getOperand(0).getReg()), MRI);
- if (!LoopDec || LoopDec->getOpcode() != ARM::t2LoopDec) {
- LLVM_DEBUG(dbgs() << " didn't find LoopDec where we expected!\n");
- return false;
- }
- }
- LLVM_DEBUG(dbgs() << " found loop dec: " << *LoopDec);
-
- LoopPhi =
- LookThroughCOPY(MRI->getVRegDef(LoopDec->getOperand(1).getReg()), MRI);
- if (!LoopPhi || LoopPhi->getOpcode() != TargetOpcode::PHI ||
- LoopPhi->getNumOperands() != 5 ||
- (LoopPhi->getOperand(2).getMBB() != Latch &&
- LoopPhi->getOperand(4).getMBB() != Latch)) {
- LLVM_DEBUG(dbgs() << " didn't find PHI where we expected!\n");
- return false;
- }
- LLVM_DEBUG(dbgs() << " found loop phi: " << *LoopPhi);
-
- Register StartReg = LoopPhi->getOperand(2).getMBB() == Latch
- ? LoopPhi->getOperand(3).getReg()
- : LoopPhi->getOperand(1).getReg();
- LoopStart = LookThroughCOPY(MRI->getVRegDef(StartReg), MRI);
- if (!LoopStart || LoopStart->getOpcode() != ARM::t2DoLoopStart) {
- LLVM_DEBUG(dbgs() << " didn't find Start where we expected!\n");
- return false;
- }
- LLVM_DEBUG(dbgs() << " found loop start: " << *LoopStart);
-
- return true;
-}
-
-// This function converts loops with t2LoopEnd and t2LoopEnd instructions into
-// a single t2LoopEndDec instruction. To do that it needs to make sure that LR
-// will be valid to be used for the low overhead loop, which means nothing else
-// is using LR (especially calls) and there are no superfluous copies in the
-// loop. The t2LoopEndDec is a branching terminator that produces a value (the
-// decrement) around the loop edge, which means we need to be careful that they
-// will be valid to allocate without any spilling.
-bool MVEVPTOptimisations::MergeLoopEnd(MachineLoop *ML) {
- if (!MergeEndDec)
- return false;
-
- LLVM_DEBUG(dbgs() << "MergeLoopEnd on loop " << ML->getHeader()->getName()
- << "\n");
-
- MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
- if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
- return false;
-
- // Check if there is an illegal instruction (a call) in the low overhead loop
- // and if so revert it now before we get any further.
- for (MachineBasicBlock *MBB : ML->blocks()) {
- for (MachineInstr &MI : *MBB) {
- if (MI.isCall()) {
- LLVM_DEBUG(dbgs() << "Found call in loop, reverting: " << MI);
- RevertDoLoopStart(LoopStart, TII);
- RevertLoopDec(LoopDec, TII);
- RevertLoopEnd(LoopEnd, TII);
- return true;
- }
- }
- }
-
- // Remove any copies from the loop, to ensure the phi that remains is both
- // simpler and contains no extra uses. Because t2LoopEndDec is a terminator
- // that cannot spill, we need to be careful what remains in the loop.
- Register PhiReg = LoopPhi->getOperand(0).getReg();
- Register DecReg = LoopDec->getOperand(0).getReg();
- Register StartReg = LoopStart->getOperand(0).getReg();
- // Ensure the uses are expected, and collect any copies we want to remove.
- SmallVector<MachineInstr *, 4> Copies;
- auto CheckUsers = [&Copies](Register BaseReg,
- ArrayRef<MachineInstr *> ExpectedUsers,
- MachineRegisterInfo *MRI) {
- SmallVector<Register, 4> Worklist;
- Worklist.push_back(BaseReg);
- while (!Worklist.empty()) {
- Register Reg = Worklist.pop_back_val();
- for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
- if (count(ExpectedUsers, &MI))
- continue;
- if (MI.getOpcode() != TargetOpcode::COPY ||
- !MI.getOperand(0).getReg().isVirtual()) {
- LLVM_DEBUG(dbgs() << "Extra users of register found: " << MI);
- return false;
- }
- Worklist.push_back(MI.getOperand(0).getReg());
- Copies.push_back(&MI);
- }
- }
- return true;
- };
- if (!CheckUsers(PhiReg, {LoopDec}, MRI) ||
- !CheckUsers(DecReg, {LoopPhi, LoopEnd}, MRI) ||
- !CheckUsers(StartReg, {LoopPhi}, MRI))
- return false;
-
- MRI->constrainRegClass(StartReg, &ARM::GPRlrRegClass);
- MRI->constrainRegClass(PhiReg, &ARM::GPRlrRegClass);
- MRI->constrainRegClass(DecReg, &ARM::GPRlrRegClass);
-
- if (LoopPhi->getOperand(2).getMBB() == ML->getLoopLatch()) {
- LoopPhi->getOperand(3).setReg(StartReg);
- LoopPhi->getOperand(1).setReg(DecReg);
- } else {
- LoopPhi->getOperand(1).setReg(StartReg);
- LoopPhi->getOperand(3).setReg(DecReg);
- }
-
- // Replace the loop dec and loop end as a single instruction.
- MachineInstrBuilder MI =
- BuildMI(*LoopEnd->getParent(), *LoopEnd, LoopEnd->getDebugLoc(),
- TII->get(ARM::t2LoopEndDec), DecReg)
- .addReg(PhiReg)
- .add(LoopEnd->getOperand(1));
- (void)MI;
- LLVM_DEBUG(dbgs() << "Merged LoopDec and End into: " << *MI.getInstr());
-
- LoopDec->eraseFromParent();
- LoopEnd->eraseFromParent();
- for (auto *MI : Copies)
- MI->eraseFromParent();
- return true;
-}
-
-// Convert t2DoLoopStart to t2DoLoopStartTP if the loop contains VCTP
-// instructions. This keeps the VCTP count reg operand on the t2DoLoopStartTP
-// instruction, making the backend ARMLowOverheadLoops passes job of finding the
-// VCTP operand much simpler.
-bool MVEVPTOptimisations::ConvertTailPredLoop(MachineLoop *ML,
- MachineDominatorTree *DT) {
- LLVM_DEBUG(dbgs() << "ConvertTailPredLoop on loop "
- << ML->getHeader()->getName() << "\n");
-
- // Find some loop components including the LoopEnd/Dec/Start, and any VCTP's
- // in the loop.
- MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
- if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
- return false;
- if (LoopDec != LoopEnd)
- return false;
-
- SmallVector<MachineInstr *, 4> VCTPs;
- for (MachineBasicBlock *BB : ML->blocks())
- for (MachineInstr &MI : *BB)
- if (isVCTP(&MI))
- VCTPs.push_back(&MI);
-
- if (VCTPs.empty()) {
- LLVM_DEBUG(dbgs() << " no VCTPs\n");
- return false;
- }
-
- // Check all VCTPs are the same.
- MachineInstr *FirstVCTP = *VCTPs.begin();
- for (MachineInstr *VCTP : VCTPs) {
- LLVM_DEBUG(dbgs() << " with VCTP " << *VCTP);
- if (VCTP->getOpcode() != FirstVCTP->getOpcode() ||
- VCTP->getOperand(0).getReg() != FirstVCTP->getOperand(0).getReg()) {
- LLVM_DEBUG(dbgs() << " VCTP's are not identical\n");
- return false;
- }
- }
-
- // Check for the register being used can be setup before the loop. We expect
- // this to be:
- // $vx = ...
- // loop:
- // $vp = PHI [ $vx ], [ $vd ]
- // ..
- // $vpr = VCTP $vp
- // ..
- // $vd = t2SUBri $vp, #n
- // ..
- Register CountReg = FirstVCTP->getOperand(1).getReg();
- if (!CountReg.isVirtual()) {
- LLVM_DEBUG(dbgs() << " cannot determine VCTP PHI\n");
- return false;
- }
- MachineInstr *Phi = LookThroughCOPY(MRI->getVRegDef(CountReg), MRI);
- if (!Phi || Phi->getOpcode() != TargetOpcode::PHI ||
- Phi->getNumOperands() != 5 ||
- (Phi->getOperand(2).getMBB() != ML->getLoopLatch() &&
- Phi->getOperand(4).getMBB() != ML->getLoopLatch())) {
- LLVM_DEBUG(dbgs() << " cannot determine VCTP Count\n");
- return false;
- }
- CountReg = Phi->getOperand(2).getMBB() == ML->getLoopLatch()
- ? Phi->getOperand(3).getReg()
- : Phi->getOperand(1).getReg();
-
- // Replace the t2DoLoopStart with the t2DoLoopStartTP, move it to the end of
- // the preheader and add the new CountReg to it. We attempt to place it late
- // in the preheader, but may need to move that earlier based on uses.
- MachineBasicBlock *MBB = LoopStart->getParent();
- MachineBasicBlock::iterator InsertPt = MBB->getFirstTerminator();
- for (MachineInstr &Use :
- MRI->use_instructions(LoopStart->getOperand(0).getReg()))
- if ((InsertPt != MBB->end() && !DT->dominates(&*InsertPt, &Use)) ||
- !DT->dominates(ML->getHeader(), Use.getParent())) {
- LLVM_DEBUG(dbgs() << " InsertPt could not be a terminator!\n");
- return false;
- }
-
- MachineInstrBuilder MI = BuildMI(*MBB, InsertPt, LoopStart->getDebugLoc(),
- TII->get(ARM::t2DoLoopStartTP))
- .add(LoopStart->getOperand(0))
- .add(LoopStart->getOperand(1))
- .addReg(CountReg);
- (void)MI;
- LLVM_DEBUG(dbgs() << "Replacing " << *LoopStart << " with "
- << *MI.getInstr());
- MRI->constrainRegClass(CountReg, &ARM::rGPRRegClass);
- LoopStart->eraseFromParent();
-
- return true;
-}
-
+INITIALIZE_PASS_BEGIN(MVEVPTOptimisations, DEBUG_TYPE,
+ "ARM MVE TailPred and VPT Optimisations pass", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(MVEVPTOptimisations, DEBUG_TYPE,
+ "ARM MVE TailPred and VPT Optimisations pass", false, false)
+
+static MachineInstr *LookThroughCOPY(MachineInstr *MI,
+ MachineRegisterInfo *MRI) {
+ while (MI && MI->getOpcode() == TargetOpcode::COPY &&
+ MI->getOperand(1).getReg().isVirtual())
+ MI = MRI->getVRegDef(MI->getOperand(1).getReg());
+ return MI;
+}
+
+// Given a loop ML, this attempts to find the t2LoopEnd, t2LoopDec and
+// corresponding PHI that make up a low overhead loop. Only handles 'do' loops
+// at the moment, returning a t2DoLoopStart in LoopStart.
+static bool findLoopComponents(MachineLoop *ML, MachineRegisterInfo *MRI,
+ MachineInstr *&LoopStart, MachineInstr *&LoopPhi,
+ MachineInstr *&LoopDec, MachineInstr *&LoopEnd) {
+ MachineBasicBlock *Header = ML->getHeader();
+ MachineBasicBlock *Latch = ML->getLoopLatch();
+ if (!Header || !Latch) {
+ LLVM_DEBUG(dbgs() << " no Loop Latch or Header\n");
+ return false;
+ }
+
+ // Find the loop end from the terminators.
+ LoopEnd = nullptr;
+ for (auto &T : Latch->terminators()) {
+ if (T.getOpcode() == ARM::t2LoopEnd && T.getOperand(1).getMBB() == Header) {
+ LoopEnd = &T;
+ break;
+ }
+ if (T.getOpcode() == ARM::t2LoopEndDec &&
+ T.getOperand(2).getMBB() == Header) {
+ LoopEnd = &T;
+ break;
+ }
+ }
+ if (!LoopEnd) {
+ LLVM_DEBUG(dbgs() << " no LoopEnd\n");
+ return false;
+ }
+ LLVM_DEBUG(dbgs() << " found loop end: " << *LoopEnd);
+
+ // Find the dec from the use of the end. There may be copies between
+ // instructions. We expect the loop to loop like:
+ // $vs = t2DoLoopStart ...
+ // loop:
+ // $vp = phi [ $vs ], [ $vd ]
+ // ...
+ // $vd = t2LoopDec $vp
+ // ...
+ // t2LoopEnd $vd, loop
+ if (LoopEnd->getOpcode() == ARM::t2LoopEndDec)
+ LoopDec = LoopEnd;
+ else {
+ LoopDec =
+ LookThroughCOPY(MRI->getVRegDef(LoopEnd->getOperand(0).getReg()), MRI);
+ if (!LoopDec || LoopDec->getOpcode() != ARM::t2LoopDec) {
+ LLVM_DEBUG(dbgs() << " didn't find LoopDec where we expected!\n");
+ return false;
+ }
+ }
+ LLVM_DEBUG(dbgs() << " found loop dec: " << *LoopDec);
+
+ LoopPhi =
+ LookThroughCOPY(MRI->getVRegDef(LoopDec->getOperand(1).getReg()), MRI);
+ if (!LoopPhi || LoopPhi->getOpcode() != TargetOpcode::PHI ||
+ LoopPhi->getNumOperands() != 5 ||
+ (LoopPhi->getOperand(2).getMBB() != Latch &&
+ LoopPhi->getOperand(4).getMBB() != Latch)) {
+ LLVM_DEBUG(dbgs() << " didn't find PHI where we expected!\n");
+ return false;
+ }
+ LLVM_DEBUG(dbgs() << " found loop phi: " << *LoopPhi);
+
+ Register StartReg = LoopPhi->getOperand(2).getMBB() == Latch
+ ? LoopPhi->getOperand(3).getReg()
+ : LoopPhi->getOperand(1).getReg();
+ LoopStart = LookThroughCOPY(MRI->getVRegDef(StartReg), MRI);
+ if (!LoopStart || LoopStart->getOpcode() != ARM::t2DoLoopStart) {
+ LLVM_DEBUG(dbgs() << " didn't find Start where we expected!\n");
+ return false;
+ }
+ LLVM_DEBUG(dbgs() << " found loop start: " << *LoopStart);
+
+ return true;
+}
+
+// This function converts loops with t2LoopEnd and t2LoopEnd instructions into
+// a single t2LoopEndDec instruction. To do that it needs to make sure that LR
+// will be valid to be used for the low overhead loop, which means nothing else
+// is using LR (especially calls) and there are no superfluous copies in the
+// loop. The t2LoopEndDec is a branching terminator that produces a value (the
+// decrement) around the loop edge, which means we need to be careful that they
+// will be valid to allocate without any spilling.
+bool MVEVPTOptimisations::MergeLoopEnd(MachineLoop *ML) {
+ if (!MergeEndDec)
+ return false;
+
+ LLVM_DEBUG(dbgs() << "MergeLoopEnd on loop " << ML->getHeader()->getName()
+ << "\n");
+
+ MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
+ if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
+ return false;
+
+ // Check if there is an illegal instruction (a call) in the low overhead loop
+ // and if so revert it now before we get any further.
+ for (MachineBasicBlock *MBB : ML->blocks()) {
+ for (MachineInstr &MI : *MBB) {
+ if (MI.isCall()) {
+ LLVM_DEBUG(dbgs() << "Found call in loop, reverting: " << MI);
+ RevertDoLoopStart(LoopStart, TII);
+ RevertLoopDec(LoopDec, TII);
+ RevertLoopEnd(LoopEnd, TII);
+ return true;
+ }
+ }
+ }
+
+ // Remove any copies from the loop, to ensure the phi that remains is both
+ // simpler and contains no extra uses. Because t2LoopEndDec is a terminator
+ // that cannot spill, we need to be careful what remains in the loop.
+ Register PhiReg = LoopPhi->getOperand(0).getReg();
+ Register DecReg = LoopDec->getOperand(0).getReg();
+ Register StartReg = LoopStart->getOperand(0).getReg();
+ // Ensure the uses are expected, and collect any copies we want to remove.
+ SmallVector<MachineInstr *, 4> Copies;
+ auto CheckUsers = [&Copies](Register BaseReg,
+ ArrayRef<MachineInstr *> ExpectedUsers,
+ MachineRegisterInfo *MRI) {
+ SmallVector<Register, 4> Worklist;
+ Worklist.push_back(BaseReg);
+ while (!Worklist.empty()) {
+ Register Reg = Worklist.pop_back_val();
+ for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
+ if (count(ExpectedUsers, &MI))
+ continue;
+ if (MI.getOpcode() != TargetOpcode::COPY ||
+ !MI.getOperand(0).getReg().isVirtual()) {
+ LLVM_DEBUG(dbgs() << "Extra users of register found: " << MI);
+ return false;
+ }
+ Worklist.push_back(MI.getOperand(0).getReg());
+ Copies.push_back(&MI);
+ }
+ }
+ return true;
+ };
+ if (!CheckUsers(PhiReg, {LoopDec}, MRI) ||
+ !CheckUsers(DecReg, {LoopPhi, LoopEnd}, MRI) ||
+ !CheckUsers(StartReg, {LoopPhi}, MRI))
+ return false;
+
+ MRI->constrainRegClass(StartReg, &ARM::GPRlrRegClass);
+ MRI->constrainRegClass(PhiReg, &ARM::GPRlrRegClass);
+ MRI->constrainRegClass(DecReg, &ARM::GPRlrRegClass);
+
+ if (LoopPhi->getOperand(2).getMBB() == ML->getLoopLatch()) {
+ LoopPhi->getOperand(3).setReg(StartReg);
+ LoopPhi->getOperand(1).setReg(DecReg);
+ } else {
+ LoopPhi->getOperand(1).setReg(StartReg);
+ LoopPhi->getOperand(3).setReg(DecReg);
+ }
+
+ // Replace the loop dec and loop end as a single instruction.
+ MachineInstrBuilder MI =
+ BuildMI(*LoopEnd->getParent(), *LoopEnd, LoopEnd->getDebugLoc(),
+ TII->get(ARM::t2LoopEndDec), DecReg)
+ .addReg(PhiReg)
+ .add(LoopEnd->getOperand(1));
+ (void)MI;
+ LLVM_DEBUG(dbgs() << "Merged LoopDec and End into: " << *MI.getInstr());
+
+ LoopDec->eraseFromParent();
+ LoopEnd->eraseFromParent();
+ for (auto *MI : Copies)
+ MI->eraseFromParent();
+ return true;
+}
+
+// Convert t2DoLoopStart to t2DoLoopStartTP if the loop contains VCTP
+// instructions. This keeps the VCTP count reg operand on the t2DoLoopStartTP
+// instruction, making the backend ARMLowOverheadLoops passes job of finding the
+// VCTP operand much simpler.
+bool MVEVPTOptimisations::ConvertTailPredLoop(MachineLoop *ML,
+ MachineDominatorTree *DT) {
+ LLVM_DEBUG(dbgs() << "ConvertTailPredLoop on loop "
+ << ML->getHeader()->getName() << "\n");
+
+ // Find some loop components including the LoopEnd/Dec/Start, and any VCTP's
+ // in the loop.
+ MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
+ if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
+ return false;
+ if (LoopDec != LoopEnd)
+ return false;
+
+ SmallVector<MachineInstr *, 4> VCTPs;
+ for (MachineBasicBlock *BB : ML->blocks())
+ for (MachineInstr &MI : *BB)
+ if (isVCTP(&MI))
+ VCTPs.push_back(&MI);
+
+ if (VCTPs.empty()) {
+ LLVM_DEBUG(dbgs() << " no VCTPs\n");
+ return false;
+ }
+
+ // Check all VCTPs are the same.
+ MachineInstr *FirstVCTP = *VCTPs.begin();
+ for (MachineInstr *VCTP : VCTPs) {
+ LLVM_DEBUG(dbgs() << " with VCTP " << *VCTP);
+ if (VCTP->getOpcode() != FirstVCTP->getOpcode() ||
+ VCTP->getOperand(0).getReg() != FirstVCTP->getOperand(0).getReg()) {
+ LLVM_DEBUG(dbgs() << " VCTP's are not identical\n");
+ return false;
+ }
+ }
+
+ // Check for the register being used can be setup before the loop. We expect
+ // this to be:
+ // $vx = ...
+ // loop:
+ // $vp = PHI [ $vx ], [ $vd ]
+ // ..
+ // $vpr = VCTP $vp
+ // ..
+ // $vd = t2SUBri $vp, #n
+ // ..
+ Register CountReg = FirstVCTP->getOperand(1).getReg();
+ if (!CountReg.isVirtual()) {
+ LLVM_DEBUG(dbgs() << " cannot determine VCTP PHI\n");
+ return false;
+ }
+ MachineInstr *Phi = LookThroughCOPY(MRI->getVRegDef(CountReg), MRI);
+ if (!Phi || Phi->getOpcode() != TargetOpcode::PHI ||
+ Phi->getNumOperands() != 5 ||
+ (Phi->getOperand(2).getMBB() != ML->getLoopLatch() &&
+ Phi->getOperand(4).getMBB() != ML->getLoopLatch())) {
+ LLVM_DEBUG(dbgs() << " cannot determine VCTP Count\n");
+ return false;
+ }
+ CountReg = Phi->getOperand(2).getMBB() == ML->getLoopLatch()
+ ? Phi->getOperand(3).getReg()
+ : Phi->getOperand(1).getReg();
+
+ // Replace the t2DoLoopStart with the t2DoLoopStartTP, move it to the end of
+ // the preheader and add the new CountReg to it. We attempt to place it late
+ // in the preheader, but may need to move that earlier based on uses.
+ MachineBasicBlock *MBB = LoopStart->getParent();
+ MachineBasicBlock::iterator InsertPt = MBB->getFirstTerminator();
+ for (MachineInstr &Use :
+ MRI->use_instructions(LoopStart->getOperand(0).getReg()))
+ if ((InsertPt != MBB->end() && !DT->dominates(&*InsertPt, &Use)) ||
+ !DT->dominates(ML->getHeader(), Use.getParent())) {
+ LLVM_DEBUG(dbgs() << " InsertPt could not be a terminator!\n");
+ return false;
+ }
+
+ MachineInstrBuilder MI = BuildMI(*MBB, InsertPt, LoopStart->getDebugLoc(),
+ TII->get(ARM::t2DoLoopStartTP))
+ .add(LoopStart->getOperand(0))
+ .add(LoopStart->getOperand(1))
+ .addReg(CountReg);
+ (void)MI;
+ LLVM_DEBUG(dbgs() << "Replacing " << *LoopStart << " with "
+ << *MI.getInstr());
+ MRI->constrainRegClass(CountReg, &ARM::rGPRRegClass);
+ LoopStart->eraseFromParent();
+
+ return true;
+}
+
// Returns true if Opcode is any VCMP Opcode.
static bool IsVCMP(unsigned Opcode) { return VCMPOpcodeToVPT(Opcode) != 0; }
@@ -650,7 +650,7 @@ bool MVEVPTOptimisations::ReduceOldVCCRValueUses(MachineBasicBlock &MBB) {
}
for (MachineInstr *DeadInstruction : DeadInstructions)
- DeadInstruction->eraseFromParent();
+ DeadInstruction->eraseFromParent();
return Modified;
}
@@ -724,160 +724,160 @@ bool MVEVPTOptimisations::ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB) {
}
for (MachineInstr *DeadInstruction : DeadInstructions)
- DeadInstruction->eraseFromParent();
-
- return !DeadInstructions.empty();
-}
-
-bool MVEVPTOptimisations::ReplaceConstByVPNOTs(MachineBasicBlock &MBB,
- MachineDominatorTree *DT) {
- // Scan through the block, looking for instructions that use constants moves
- // into VPR that are the negative of one another. These are expected to be
- // COPY's to VCCRRegClass, from a t2MOVi or t2MOVi16. The last seen constant
- // mask is kept it or and VPNOT's of it are added or reused as we scan through
- // the function.
- unsigned LastVPTImm = 0;
- Register LastVPTReg = 0;
- SmallSet<MachineInstr *, 4> DeadInstructions;
-
- for (MachineInstr &Instr : MBB.instrs()) {
- // Look for predicated MVE instructions.
- int PIdx = llvm::findFirstVPTPredOperandIdx(Instr);
- if (PIdx == -1)
- continue;
- Register VPR = Instr.getOperand(PIdx + 1).getReg();
- if (!VPR.isVirtual())
- continue;
-
- // From that we are looking for an instruction like %11:vccr = COPY %9:rgpr.
- MachineInstr *Copy = MRI->getVRegDef(VPR);
- if (!Copy || Copy->getOpcode() != TargetOpcode::COPY ||
- !Copy->getOperand(1).getReg().isVirtual() ||
- MRI->getRegClass(Copy->getOperand(1).getReg()) == &ARM::VCCRRegClass) {
- LastVPTReg = 0;
- continue;
- }
- Register GPR = Copy->getOperand(1).getReg();
-
- // Find the Immediate used by the copy.
- auto getImm = [&](Register GPR) -> unsigned {
- MachineInstr *Def = MRI->getVRegDef(GPR);
- if (Def && (Def->getOpcode() == ARM::t2MOVi ||
- Def->getOpcode() == ARM::t2MOVi16))
- return Def->getOperand(1).getImm();
- return -1U;
- };
- unsigned Imm = getImm(GPR);
- if (Imm == -1U) {
- LastVPTReg = 0;
- continue;
- }
-
- unsigned NotImm = ~Imm & 0xffff;
- if (LastVPTReg != 0 && LastVPTReg != VPR && LastVPTImm == Imm) {
- Instr.getOperand(PIdx + 1).setReg(LastVPTReg);
- if (MRI->use_empty(VPR)) {
- DeadInstructions.insert(Copy);
- if (MRI->hasOneUse(GPR))
- DeadInstructions.insert(MRI->getVRegDef(GPR));
- }
- LLVM_DEBUG(dbgs() << "Reusing predicate: in " << Instr);
- } else if (LastVPTReg != 0 && LastVPTImm == NotImm) {
- // We have found the not of a previous constant. Create a VPNot of the
- // earlier predicate reg and use it instead of the copy.
- Register NewVPR = MRI->createVirtualRegister(&ARM::VCCRRegClass);
- auto VPNot = BuildMI(MBB, &Instr, Instr.getDebugLoc(),
- TII->get(ARM::MVE_VPNOT), NewVPR)
- .addReg(LastVPTReg);
- addUnpredicatedMveVpredNOp(VPNot);
-
- // Use the new register and check if the def is now dead.
- Instr.getOperand(PIdx + 1).setReg(NewVPR);
- if (MRI->use_empty(VPR)) {
- DeadInstructions.insert(Copy);
- if (MRI->hasOneUse(GPR))
- DeadInstructions.insert(MRI->getVRegDef(GPR));
- }
- LLVM_DEBUG(dbgs() << "Adding VPNot: " << *VPNot << " to replace use at "
- << Instr);
- VPR = NewVPR;
- }
-
- LastVPTImm = Imm;
- LastVPTReg = VPR;
- }
-
- for (MachineInstr *DI : DeadInstructions)
- DI->eraseFromParent();
-
- return !DeadInstructions.empty();
-}
-
-// Replace VPSEL with a predicated VMOV in blocks with a VCTP. This is a
-// somewhat blunt approximation to allow tail predicated with vpsel
-// instructions. We turn a vselect into a VPSEL in ISEL, but they have slightly
-// different semantics under tail predication. Until that is modelled we just
-// convert to a VMOVT (via a predicated VORR) instead.
-bool MVEVPTOptimisations::ConvertVPSEL(MachineBasicBlock &MBB) {
- bool HasVCTP = false;
- SmallVector<MachineInstr *, 4> DeadInstructions;
-
- for (MachineInstr &MI : MBB.instrs()) {
- if (isVCTP(&MI)) {
- HasVCTP = true;
- continue;
- }
-
- if (!HasVCTP || MI.getOpcode() != ARM::MVE_VPSEL)
- continue;
-
- MachineInstrBuilder MIBuilder =
- BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(ARM::MVE_VORR))
- .add(MI.getOperand(0))
- .add(MI.getOperand(1))
- .add(MI.getOperand(1))
- .addImm(ARMVCC::Then)
- .add(MI.getOperand(4))
- .add(MI.getOperand(2));
- // Silence unused variable warning in release builds.
- (void)MIBuilder;
- LLVM_DEBUG(dbgs() << "Replacing VPSEL: "; MI.dump();
- dbgs() << " with VMOVT: "; MIBuilder.getInstr()->dump());
- DeadInstructions.push_back(&MI);
- }
-
- for (MachineInstr *DeadInstruction : DeadInstructions)
- DeadInstruction->eraseFromParent();
+ DeadInstruction->eraseFromParent();
return !DeadInstructions.empty();
}
+bool MVEVPTOptimisations::ReplaceConstByVPNOTs(MachineBasicBlock &MBB,
+ MachineDominatorTree *DT) {
+ // Scan through the block, looking for instructions that use constants moves
+ // into VPR that are the negative of one another. These are expected to be
+ // COPY's to VCCRRegClass, from a t2MOVi or t2MOVi16. The last seen constant
+ // mask is kept it or and VPNOT's of it are added or reused as we scan through
+ // the function.
+ unsigned LastVPTImm = 0;
+ Register LastVPTReg = 0;
+ SmallSet<MachineInstr *, 4> DeadInstructions;
+
+ for (MachineInstr &Instr : MBB.instrs()) {
+ // Look for predicated MVE instructions.
+ int PIdx = llvm::findFirstVPTPredOperandIdx(Instr);
+ if (PIdx == -1)
+ continue;
+ Register VPR = Instr.getOperand(PIdx + 1).getReg();
+ if (!VPR.isVirtual())
+ continue;
+
+ // From that we are looking for an instruction like %11:vccr = COPY %9:rgpr.
+ MachineInstr *Copy = MRI->getVRegDef(VPR);
+ if (!Copy || Copy->getOpcode() != TargetOpcode::COPY ||
+ !Copy->getOperand(1).getReg().isVirtual() ||
+ MRI->getRegClass(Copy->getOperand(1).getReg()) == &ARM::VCCRRegClass) {
+ LastVPTReg = 0;
+ continue;
+ }
+ Register GPR = Copy->getOperand(1).getReg();
+
+ // Find the Immediate used by the copy.
+ auto getImm = [&](Register GPR) -> unsigned {
+ MachineInstr *Def = MRI->getVRegDef(GPR);
+ if (Def && (Def->getOpcode() == ARM::t2MOVi ||
+ Def->getOpcode() == ARM::t2MOVi16))
+ return Def->getOperand(1).getImm();
+ return -1U;
+ };
+ unsigned Imm = getImm(GPR);
+ if (Imm == -1U) {
+ LastVPTReg = 0;
+ continue;
+ }
+
+ unsigned NotImm = ~Imm & 0xffff;
+ if (LastVPTReg != 0 && LastVPTReg != VPR && LastVPTImm == Imm) {
+ Instr.getOperand(PIdx + 1).setReg(LastVPTReg);
+ if (MRI->use_empty(VPR)) {
+ DeadInstructions.insert(Copy);
+ if (MRI->hasOneUse(GPR))
+ DeadInstructions.insert(MRI->getVRegDef(GPR));
+ }
+ LLVM_DEBUG(dbgs() << "Reusing predicate: in " << Instr);
+ } else if (LastVPTReg != 0 && LastVPTImm == NotImm) {
+ // We have found the not of a previous constant. Create a VPNot of the
+ // earlier predicate reg and use it instead of the copy.
+ Register NewVPR = MRI->createVirtualRegister(&ARM::VCCRRegClass);
+ auto VPNot = BuildMI(MBB, &Instr, Instr.getDebugLoc(),
+ TII->get(ARM::MVE_VPNOT), NewVPR)
+ .addReg(LastVPTReg);
+ addUnpredicatedMveVpredNOp(VPNot);
+
+ // Use the new register and check if the def is now dead.
+ Instr.getOperand(PIdx + 1).setReg(NewVPR);
+ if (MRI->use_empty(VPR)) {
+ DeadInstructions.insert(Copy);
+ if (MRI->hasOneUse(GPR))
+ DeadInstructions.insert(MRI->getVRegDef(GPR));
+ }
+ LLVM_DEBUG(dbgs() << "Adding VPNot: " << *VPNot << " to replace use at "
+ << Instr);
+ VPR = NewVPR;
+ }
+
+ LastVPTImm = Imm;
+ LastVPTReg = VPR;
+ }
+
+ for (MachineInstr *DI : DeadInstructions)
+ DI->eraseFromParent();
+
+ return !DeadInstructions.empty();
+}
+
+// Replace VPSEL with a predicated VMOV in blocks with a VCTP. This is a
+// somewhat blunt approximation to allow tail predicated with vpsel
+// instructions. We turn a vselect into a VPSEL in ISEL, but they have slightly
+// different semantics under tail predication. Until that is modelled we just
+// convert to a VMOVT (via a predicated VORR) instead.
+bool MVEVPTOptimisations::ConvertVPSEL(MachineBasicBlock &MBB) {
+ bool HasVCTP = false;
+ SmallVector<MachineInstr *, 4> DeadInstructions;
+
+ for (MachineInstr &MI : MBB.instrs()) {
+ if (isVCTP(&MI)) {
+ HasVCTP = true;
+ continue;
+ }
+
+ if (!HasVCTP || MI.getOpcode() != ARM::MVE_VPSEL)
+ continue;
+
+ MachineInstrBuilder MIBuilder =
+ BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(ARM::MVE_VORR))
+ .add(MI.getOperand(0))
+ .add(MI.getOperand(1))
+ .add(MI.getOperand(1))
+ .addImm(ARMVCC::Then)
+ .add(MI.getOperand(4))
+ .add(MI.getOperand(2));
+ // Silence unused variable warning in release builds.
+ (void)MIBuilder;
+ LLVM_DEBUG(dbgs() << "Replacing VPSEL: "; MI.dump();
+ dbgs() << " with VMOVT: "; MIBuilder.getInstr()->dump());
+ DeadInstructions.push_back(&MI);
+ }
+
+ for (MachineInstr *DeadInstruction : DeadInstructions)
+ DeadInstruction->eraseFromParent();
+
+ return !DeadInstructions.empty();
+}
+
bool MVEVPTOptimisations::runOnMachineFunction(MachineFunction &Fn) {
const ARMSubtarget &STI =
static_cast<const ARMSubtarget &>(Fn.getSubtarget());
- if (!STI.isThumb2() || !STI.hasLOB())
+ if (!STI.isThumb2() || !STI.hasLOB())
return false;
TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo());
MRI = &Fn.getRegInfo();
- MachineLoopInfo *MLI = &getAnalysis<MachineLoopInfo>();
- MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>();
+ MachineLoopInfo *MLI = &getAnalysis<MachineLoopInfo>();
+ MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>();
LLVM_DEBUG(dbgs() << "********** ARM MVE VPT Optimisations **********\n"
<< "********** Function: " << Fn.getName() << '\n');
bool Modified = false;
- for (MachineLoop *ML : MLI->getBase().getLoopsInPreorder()) {
- Modified |= MergeLoopEnd(ML);
- Modified |= ConvertTailPredLoop(ML, DT);
- }
-
+ for (MachineLoop *ML : MLI->getBase().getLoopsInPreorder()) {
+ Modified |= MergeLoopEnd(ML);
+ Modified |= ConvertTailPredLoop(ML, DT);
+ }
+
for (MachineBasicBlock &MBB : Fn) {
- Modified |= ReplaceConstByVPNOTs(MBB, DT);
+ Modified |= ReplaceConstByVPNOTs(MBB, DT);
Modified |= ReplaceVCMPsByVPNOTs(MBB);
Modified |= ReduceOldVCCRValueUses(MBB);
- Modified |= ConvertVPSEL(MBB);
+ Modified |= ConvertVPSEL(MBB);
}
LLVM_DEBUG(dbgs() << "**************************************\n");
diff --git a/contrib/libs/llvm12/lib/Target/ARM/TargetInfo/ya.make b/contrib/libs/llvm12/lib/Target/ARM/TargetInfo/ya.make
index 089e7bf206..3f7fdcb6de 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/TargetInfo/ya.make
+++ b/contrib/libs/llvm12/lib/Target/ARM/TargetInfo/ya.make
@@ -12,13 +12,13 @@ LICENSE(Apache-2.0 WITH LLVM-exception)
LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
PEERDIR(
- contrib/libs/llvm12
- contrib/libs/llvm12/lib/Support
+ contrib/libs/llvm12
+ contrib/libs/llvm12/lib/Support
)
ADDINCL(
- contrib/libs/llvm12/lib/Target/ARM
- contrib/libs/llvm12/lib/Target/ARM/TargetInfo
+ contrib/libs/llvm12/lib/Target/ARM
+ contrib/libs/llvm12/lib/Target/ARM/TargetInfo
)
NO_COMPILER_WARNINGS()
diff --git a/contrib/libs/llvm12/lib/Target/ARM/Thumb2InstrInfo.cpp b/contrib/libs/llvm12/lib/Target/ARM/Thumb2InstrInfo.cpp
index d728572e28..9dd389f440 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -12,7 +12,7 @@
#include "Thumb2InstrInfo.h"
#include "ARMMachineFunctionInfo.h"
-#include "ARMSubtarget.h"
+#include "ARMSubtarget.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -39,11 +39,11 @@ OldT2IfCvt("old-thumb2-ifcvt", cl::Hidden,
cl::desc("Use old-style Thumb2 if-conversion heuristics"),
cl::init(false));
-static cl::opt<bool>
-PreferNoCSEL("prefer-no-csel", cl::Hidden,
- cl::desc("Prefer predicated Move to CSEL"),
- cl::init(false));
-
+static cl::opt<bool>
+PreferNoCSEL("prefer-no-csel", cl::Hidden,
+ cl::desc("Prefer predicated Move to CSEL"),
+ cl::init(false));
+
Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI)
: ARMBaseInstrInfo(STI) {}
@@ -124,31 +124,31 @@ Thumb2InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB,
return getITInstrPredicate(*MBBI, PredReg) == ARMCC::AL;
}
-MachineInstr *
-Thumb2InstrInfo::optimizeSelect(MachineInstr &MI,
- SmallPtrSetImpl<MachineInstr *> &SeenMIs,
- bool PreferFalse) const {
- // Try to use the base optimizeSelect, which uses canFoldIntoMOVCC to fold the
- // MOVCC into another instruction. If that fails on 8.1-M fall back to using a
- // CSEL.
- MachineInstr *RV = ARMBaseInstrInfo::optimizeSelect(MI, SeenMIs, PreferFalse);
- if (!RV && getSubtarget().hasV8_1MMainlineOps() && !PreferNoCSEL) {
- Register DestReg = MI.getOperand(0).getReg();
-
- if (!DestReg.isVirtual())
- return nullptr;
-
- MachineInstrBuilder NewMI = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
- get(ARM::t2CSEL), DestReg)
- .add(MI.getOperand(2))
- .add(MI.getOperand(1))
- .add(MI.getOperand(3));
- SeenMIs.insert(NewMI);
- return NewMI;
- }
- return RV;
-}
-
+MachineInstr *
+Thumb2InstrInfo::optimizeSelect(MachineInstr &MI,
+ SmallPtrSetImpl<MachineInstr *> &SeenMIs,
+ bool PreferFalse) const {
+ // Try to use the base optimizeSelect, which uses canFoldIntoMOVCC to fold the
+ // MOVCC into another instruction. If that fails on 8.1-M fall back to using a
+ // CSEL.
+ MachineInstr *RV = ARMBaseInstrInfo::optimizeSelect(MI, SeenMIs, PreferFalse);
+ if (!RV && getSubtarget().hasV8_1MMainlineOps() && !PreferNoCSEL) {
+ Register DestReg = MI.getOperand(0).getReg();
+
+ if (!DestReg.isVirtual())
+ return nullptr;
+
+ MachineInstrBuilder NewMI = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
+ get(ARM::t2CSEL), DestReg)
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(1))
+ .add(MI.getOperand(3));
+ SeenMIs.insert(NewMI);
+ return NewMI;
+ }
+ return RV;
+}
+
void Thumb2InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
const DebugLoc &DL, MCRegister DestReg,
@@ -258,22 +258,22 @@ void Thumb2InstrInfo::expandLoadStackGuard(
expandLoadStackGuardBase(MI, ARM::t2MOVi32imm, ARM::t2LDRi12);
}
-MachineInstr *Thumb2InstrInfo::commuteInstructionImpl(MachineInstr &MI,
- bool NewMI,
- unsigned OpIdx1,
- unsigned OpIdx2) const {
- switch (MI.getOpcode()) {
- case ARM::MVE_VMAXNMAf16:
- case ARM::MVE_VMAXNMAf32:
- case ARM::MVE_VMINNMAf16:
- case ARM::MVE_VMINNMAf32:
- // Don't allow predicated instructions to be commuted.
- if (getVPTInstrPredicate(MI) != ARMVCC::None)
- return nullptr;
- }
- return ARMBaseInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
-}
-
+MachineInstr *Thumb2InstrInfo::commuteInstructionImpl(MachineInstr &MI,
+ bool NewMI,
+ unsigned OpIdx1,
+ unsigned OpIdx2) const {
+ switch (MI.getOpcode()) {
+ case ARM::MVE_VMAXNMAf16:
+ case ARM::MVE_VMAXNMAf32:
+ case ARM::MVE_VMINNMAf16:
+ case ARM::MVE_VMINNMAf32:
+ // Don't allow predicated instructions to be commuted.
+ if (getVPTInstrPredicate(MI) != ARMVCC::None)
+ return nullptr;
+ }
+ return ARMBaseInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
+}
+
void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
const DebugLoc &dl, Register DestReg,
diff --git a/contrib/libs/llvm12/lib/Target/ARM/Thumb2InstrInfo.h b/contrib/libs/llvm12/lib/Target/ARM/Thumb2InstrInfo.h
index 808167bfdc..6fda236159 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/Thumb2InstrInfo.h
+++ b/contrib/libs/llvm12/lib/Target/ARM/Thumb2InstrInfo.h
@@ -60,14 +60,14 @@ public:
///
const ThumbRegisterInfo &getRegisterInfo() const override { return RI; }
- MachineInstr *optimizeSelect(MachineInstr &MI,
- SmallPtrSetImpl<MachineInstr *> &SeenMIs,
- bool) const override;
-
- MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI,
- unsigned OpIdx1,
- unsigned OpIdx2) const override;
-
+ MachineInstr *optimizeSelect(MachineInstr &MI,
+ SmallPtrSetImpl<MachineInstr *> &SeenMIs,
+ bool) const override;
+
+ MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI,
+ unsigned OpIdx1,
+ unsigned OpIdx2) const override;
+
private:
void expandLoadStackGuard(MachineBasicBlock::iterator MI) const override;
};
diff --git a/contrib/libs/llvm12/lib/Target/ARM/Thumb2SizeReduction.cpp b/contrib/libs/llvm12/lib/Target/ARM/Thumb2SizeReduction.cpp
index 0f7e190386..a200a5cf35 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -43,7 +43,7 @@
using namespace llvm;
-#define DEBUG_TYPE "thumb2-reduce-size"
+#define DEBUG_TYPE "thumb2-reduce-size"
#define THUMB2_SIZE_REDUCE_NAME "Thumb2 instruction size reduce pass"
STATISTIC(NumNarrows, "Number of 32-bit instrs reduced to 16-bit ones");
diff --git a/contrib/libs/llvm12/lib/Target/ARM/Utils/ya.make b/contrib/libs/llvm12/lib/Target/ARM/Utils/ya.make
index 7a980b708c..fed79316b8 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/Utils/ya.make
+++ b/contrib/libs/llvm12/lib/Target/ARM/Utils/ya.make
@@ -12,15 +12,15 @@ LICENSE(Apache-2.0 WITH LLVM-exception)
LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
PEERDIR(
- contrib/libs/llvm12
- contrib/libs/llvm12/include
- contrib/libs/llvm12/lib/Support
+ contrib/libs/llvm12
+ contrib/libs/llvm12/include
+ contrib/libs/llvm12/lib/Support
)
ADDINCL(
- ${ARCADIA_BUILD_ROOT}/contrib/libs/llvm12/lib/Target/ARM
- contrib/libs/llvm12/lib/Target/ARM
- contrib/libs/llvm12/lib/Target/ARM/Utils
+ ${ARCADIA_BUILD_ROOT}/contrib/libs/llvm12/lib/Target/ARM
+ contrib/libs/llvm12/lib/Target/ARM
+ contrib/libs/llvm12/lib/Target/ARM/Utils
)
NO_COMPILER_WARNINGS()
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ya.make b/contrib/libs/llvm12/lib/Target/ARM/ya.make
index 9551f9f11b..7387bc4532 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ya.make
+++ b/contrib/libs/llvm12/lib/Target/ARM/ya.make
@@ -12,28 +12,28 @@ LICENSE(Apache-2.0 WITH LLVM-exception)
LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
PEERDIR(
- contrib/libs/llvm12
- contrib/libs/llvm12/include
- contrib/libs/llvm12/lib/Analysis
- contrib/libs/llvm12/lib/CodeGen
- contrib/libs/llvm12/lib/CodeGen/AsmPrinter
- contrib/libs/llvm12/lib/CodeGen/GlobalISel
- contrib/libs/llvm12/lib/CodeGen/SelectionDAG
- contrib/libs/llvm12/lib/IR
- contrib/libs/llvm12/lib/MC
- contrib/libs/llvm12/lib/Support
- contrib/libs/llvm12/lib/Target
- contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc
- contrib/libs/llvm12/lib/Target/ARM/TargetInfo
- contrib/libs/llvm12/lib/Target/ARM/Utils
- contrib/libs/llvm12/lib/Transforms/CFGuard
- contrib/libs/llvm12/lib/Transforms/Scalar
- contrib/libs/llvm12/lib/Transforms/Utils
+ contrib/libs/llvm12
+ contrib/libs/llvm12/include
+ contrib/libs/llvm12/lib/Analysis
+ contrib/libs/llvm12/lib/CodeGen
+ contrib/libs/llvm12/lib/CodeGen/AsmPrinter
+ contrib/libs/llvm12/lib/CodeGen/GlobalISel
+ contrib/libs/llvm12/lib/CodeGen/SelectionDAG
+ contrib/libs/llvm12/lib/IR
+ contrib/libs/llvm12/lib/MC
+ contrib/libs/llvm12/lib/Support
+ contrib/libs/llvm12/lib/Target
+ contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc
+ contrib/libs/llvm12/lib/Target/ARM/TargetInfo
+ contrib/libs/llvm12/lib/Target/ARM/Utils
+ contrib/libs/llvm12/lib/Transforms/CFGuard
+ contrib/libs/llvm12/lib/Transforms/Scalar
+ contrib/libs/llvm12/lib/Transforms/Utils
)
ADDINCL(
- ${ARCADIA_BUILD_ROOT}/contrib/libs/llvm12/lib/Target/ARM
- contrib/libs/llvm12/lib/Target/ARM
+ ${ARCADIA_BUILD_ROOT}/contrib/libs/llvm12/lib/Target/ARM
+ contrib/libs/llvm12/lib/Target/ARM
)
NO_COMPILER_WARNINGS()
@@ -46,7 +46,7 @@ SRCS(
ARMBaseInstrInfo.cpp
ARMBaseRegisterInfo.cpp
ARMBasicBlockInfo.cpp
- ARMBlockPlacement.cpp
+ ARMBlockPlacement.cpp
ARMCallLowering.cpp
ARMCallingConv.cpp
ARMConstantIslandPass.cpp
@@ -69,7 +69,7 @@ SRCS(
ARMParallelDSP.cpp
ARMRegisterBankInfo.cpp
ARMRegisterInfo.cpp
- ARMSLSHardening.cpp
+ ARMSLSHardening.cpp
ARMSelectionDAGInfo.cpp
ARMSubtarget.cpp
ARMTargetMachine.cpp