diff options
author | shadchin <shadchin@yandex-team.ru> | 2022-02-10 16:44:39 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:44:39 +0300 |
commit | e9656aae26e0358d5378e5b63dcac5c8dbe0e4d0 (patch) | |
tree | 64175d5cadab313b3e7039ebaa06c5bc3295e274 /contrib/libs/llvm12/lib/Target/AArch64/AArch64InstrInfo.cpp | |
parent | 2598ef1d0aee359b4b6d5fdd1758916d5907d04f (diff) | |
download | ydb-e9656aae26e0358d5378e5b63dcac5c8dbe0e4d0.tar.gz |
Restoring authorship annotation for <shadchin@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/libs/llvm12/lib/Target/AArch64/AArch64InstrInfo.cpp')
-rw-r--r-- | contrib/libs/llvm12/lib/Target/AArch64/AArch64InstrInfo.cpp | 784 |
1 files changed, 392 insertions, 392 deletions
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64InstrInfo.cpp b/contrib/libs/llvm12/lib/Target/AArch64/AArch64InstrInfo.cpp index fc3e238182..6b38e216a8 100644 --- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -107,13 +107,13 @@ unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { NumBytes = PatchPointOpers(&MI).getNumPatchBytes(); assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!"); break; - case TargetOpcode::STATEPOINT: - NumBytes = StatepointOpers(&MI).getNumPatchBytes(); - assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!"); - // No patch bytes means a normal call inst is emitted - if (NumBytes == 0) - NumBytes = 4; - break; + case TargetOpcode::STATEPOINT: + NumBytes = StatepointOpers(&MI).getNumPatchBytes(); + assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!"); + // No patch bytes means a normal call inst is emitted + if (NumBytes == 0) + NumBytes = 4; + break; case AArch64::TLSDESC_CALLSEQ: // This gets lowered to an instruction sequence which takes 16 bytes NumBytes = 16; @@ -294,31 +294,31 @@ bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB, } } - // If we're allowed to modify and the block ends in a unconditional branch - // which could simply fallthrough, remove the branch. (Note: This case only - // matters when we can't understand the whole sequence, otherwise it's also - // handled by BranchFolding.cpp.) - if (AllowModify && isUncondBranchOpcode(LastOpc) && - MBB.isLayoutSuccessor(getBranchDestBlock(*LastInst))) { - LastInst->eraseFromParent(); - LastInst = SecondLastInst; - LastOpc = LastInst->getOpcode(); - if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) { - assert(!isUncondBranchOpcode(LastOpc) && - "unreachable unconditional branches removed above"); - - if (isCondBranchOpcode(LastOpc)) { - // Block ends with fall-through condbranch. - parseCondBranch(LastInst, TBB, Cond); - return false; - } - return true; // Can't handle indirect branch. - } else { - SecondLastInst = &*I; - SecondLastOpc = SecondLastInst->getOpcode(); - } - } - + // If we're allowed to modify and the block ends in a unconditional branch + // which could simply fallthrough, remove the branch. (Note: This case only + // matters when we can't understand the whole sequence, otherwise it's also + // handled by BranchFolding.cpp.) + if (AllowModify && isUncondBranchOpcode(LastOpc) && + MBB.isLayoutSuccessor(getBranchDestBlock(*LastInst))) { + LastInst->eraseFromParent(); + LastInst = SecondLastInst; + LastOpc = LastInst->getOpcode(); + if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) { + assert(!isUncondBranchOpcode(LastOpc) && + "unreachable unconditional branches removed above"); + + if (isCondBranchOpcode(LastOpc)) { + // Block ends with fall-through condbranch. + parseCondBranch(LastInst, TBB, Cond); + return false; + } + return true; // Can't handle indirect branch. + } else { + SecondLastInst = &*I; + SecondLastOpc = SecondLastInst->getOpcode(); + } + } + // If there are three terminators, we don't know what sort of block this is. if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I)) return true; @@ -353,56 +353,56 @@ bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB, return true; } -bool AArch64InstrInfo::analyzeBranchPredicate(MachineBasicBlock &MBB, - MachineBranchPredicate &MBP, - bool AllowModify) const { - // For the moment, handle only a block which ends with a cb(n)zx followed by - // a fallthrough. Why this? Because it is a common form. - // TODO: Should we handle b.cc? - - MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); - if (I == MBB.end()) - return true; - - // Skip over SpeculationBarrierEndBB terminators - if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB || - I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) { - --I; - } - - if (!isUnpredicatedTerminator(*I)) - return true; - - // Get the last instruction in the block. - MachineInstr *LastInst = &*I; - unsigned LastOpc = LastInst->getOpcode(); - if (!isCondBranchOpcode(LastOpc)) - return true; - - switch (LastOpc) { - default: - return true; - case AArch64::CBZW: - case AArch64::CBZX: - case AArch64::CBNZW: - case AArch64::CBNZX: - break; - }; - - MBP.TrueDest = LastInst->getOperand(1).getMBB(); - assert(MBP.TrueDest && "expected!"); - MBP.FalseDest = MBB.getNextNode(); - - MBP.ConditionDef = nullptr; - MBP.SingleUseCondition = false; - - MBP.LHS = LastInst->getOperand(0); - MBP.RHS = MachineOperand::CreateImm(0); - MBP.Predicate = LastOpc == AArch64::CBNZX ? MachineBranchPredicate::PRED_NE - : MachineBranchPredicate::PRED_EQ; - return false; -} - +bool AArch64InstrInfo::analyzeBranchPredicate(MachineBasicBlock &MBB, + MachineBranchPredicate &MBP, + bool AllowModify) const { + // For the moment, handle only a block which ends with a cb(n)zx followed by + // a fallthrough. Why this? Because it is a common form. + // TODO: Should we handle b.cc? + + MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); + if (I == MBB.end()) + return true; + + // Skip over SpeculationBarrierEndBB terminators + if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB || + I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) { + --I; + } + + if (!isUnpredicatedTerminator(*I)) + return true; + + // Get the last instruction in the block. + MachineInstr *LastInst = &*I; + unsigned LastOpc = LastInst->getOpcode(); + if (!isCondBranchOpcode(LastOpc)) + return true; + + switch (LastOpc) { + default: + return true; + case AArch64::CBZW: + case AArch64::CBZX: + case AArch64::CBNZW: + case AArch64::CBNZX: + break; + }; + + MBP.TrueDest = LastInst->getOperand(1).getMBB(); + assert(MBP.TrueDest && "expected!"); + MBP.FalseDest = MBB.getNextNode(); + + MBP.ConditionDef = nullptr; + MBP.SingleUseCondition = false; + + MBP.LHS = LastInst->getOperand(0); + MBP.RHS = MachineOperand::CreateImm(0); + MBP.Predicate = LastOpc == AArch64::CBNZX ? MachineBranchPredicate::PRED_NE + : MachineBranchPredicate::PRED_EQ; + return false; +} + bool AArch64InstrInfo::reverseBranchCondition( SmallVectorImpl<MachineOperand> &Cond) const { if (Cond[0].getImm() != -1) { @@ -1119,13 +1119,13 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg, switch (MI.getOpcode()) { default: break; - case AArch64::PTEST_PP: - SrcReg = MI.getOperand(0).getReg(); - SrcReg2 = MI.getOperand(1).getReg(); - // Not sure about the mask and value for now... - CmpMask = ~0; - CmpValue = 0; - return true; + case AArch64::PTEST_PP: + SrcReg = MI.getOperand(0).getReg(); + SrcReg2 = MI.getOperand(1).getReg(); + // Not sure about the mask and value for now... + CmpMask = ~0; + CmpValue = 0; + return true; case AArch64::SUBSWrr: case AArch64::SUBSWrs: case AArch64::SUBSWrx: @@ -1281,9 +1281,9 @@ static bool areCFlagsAccessedBetweenInstrs( return true; // From must be above To. - assert(std::any_of( - ++To.getReverse(), To->getParent()->rend(), - [From](MachineInstr &MI) { return MI.getIterator() == From; })); + assert(std::any_of( + ++To.getReverse(), To->getParent()->rend(), + [From](MachineInstr &MI) { return MI.getIterator() == From; })); // We iterate backward starting at \p To until we hit \p From. for (const MachineInstr &Instr : @@ -1296,127 +1296,127 @@ static bool areCFlagsAccessedBetweenInstrs( return false; } -/// optimizePTestInstr - Attempt to remove a ptest of a predicate-generating -/// operation which could set the flags in an identical manner -bool AArch64InstrInfo::optimizePTestInstr( - MachineInstr *PTest, unsigned MaskReg, unsigned PredReg, - const MachineRegisterInfo *MRI) const { - auto *Mask = MRI->getUniqueVRegDef(MaskReg); - auto *Pred = MRI->getUniqueVRegDef(PredReg); - auto NewOp = Pred->getOpcode(); - bool OpChanged = false; - - unsigned MaskOpcode = Mask->getOpcode(); - unsigned PredOpcode = Pred->getOpcode(); - bool PredIsPTestLike = isPTestLikeOpcode(PredOpcode); - bool PredIsWhileLike = isWhileOpcode(PredOpcode); - - if (isPTrueOpcode(MaskOpcode) && (PredIsPTestLike || PredIsWhileLike)) { - // For PTEST(PTRUE, OTHER_INST), PTEST is redundant when PTRUE doesn't - // deactivate any lanes OTHER_INST might set. - uint64_t MaskElementSize = getElementSizeForOpcode(MaskOpcode); - uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode); - - // Must be an all active predicate of matching element size. - if ((PredElementSize != MaskElementSize) || - (Mask->getOperand(1).getImm() != 31)) - return false; - - // Fallthough to simply remove the PTEST. - } else if ((Mask == Pred) && (PredIsPTestLike || PredIsWhileLike)) { - // For PTEST(PG, PG), PTEST is redundant when PG is the result of an - // instruction that sets the flags as PTEST would. - - // Fallthough to simply remove the PTEST. - } else if (PredIsPTestLike) { - // For PTEST(PG_1, PTEST_LIKE(PG2, ...)), PTEST is redundant when both - // instructions use the same predicate. - auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg()); - if (Mask != PTestLikeMask) - return false; - - // Fallthough to simply remove the PTEST. - } else { - switch (Pred->getOpcode()) { - case AArch64::BRKB_PPzP: - case AArch64::BRKPB_PPzPP: { - // Op 0 is chain, 1 is the mask, 2 the previous predicate to - // propagate, 3 the new predicate. - - // Check to see if our mask is the same as the brkpb's. If - // not the resulting flag bits may be different and we - // can't remove the ptest. - auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg()); - if (Mask != PredMask) - return false; - - // Switch to the new opcode - NewOp = Pred->getOpcode() == AArch64::BRKB_PPzP ? AArch64::BRKBS_PPzP - : AArch64::BRKPBS_PPzPP; - OpChanged = true; - break; - } - case AArch64::BRKN_PPzP: { - auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg()); - if (Mask != PredMask) - return false; - - NewOp = AArch64::BRKNS_PPzP; - OpChanged = true; - break; - } - default: - // Bail out if we don't recognize the input - return false; - } - } - - const TargetRegisterInfo *TRI = &getRegisterInfo(); - - // If the predicate is in a different block (possibly because its been - // hoisted out), then assume the flags are set in between statements. - if (Pred->getParent() != PTest->getParent()) - return false; - - // If another instruction between the propagation and test sets the - // flags, don't remove the ptest. - MachineBasicBlock::iterator I = Pred, E = PTest; - ++I; // Skip past the predicate op itself. - for (; I != E; ++I) { - const MachineInstr &Inst = *I; - - // TODO: If the ptest flags are unused, we could still remove it. - if (Inst.modifiesRegister(AArch64::NZCV, TRI)) - return false; - } - - // If we pass all the checks, it's safe to remove the PTEST and use the flags - // as they are prior to PTEST. Sometimes this requires the tested PTEST - // operand to be replaced with an equivalent instruction that also sets the - // flags. - Pred->setDesc(get(NewOp)); - PTest->eraseFromParent(); - if (OpChanged) { - bool succeeded = UpdateOperandRegClass(*Pred); - (void)succeeded; - assert(succeeded && "Operands have incompatible register classes!"); - Pred->addRegisterDefined(AArch64::NZCV, TRI); - } - - // Ensure that the flags def is live. - if (Pred->registerDefIsDead(AArch64::NZCV, TRI)) { - unsigned i = 0, e = Pred->getNumOperands(); - for (; i != e; ++i) { - MachineOperand &MO = Pred->getOperand(i); - if (MO.isReg() && MO.isDef() && MO.getReg() == AArch64::NZCV) { - MO.setIsDead(false); - break; - } - } - } - return true; -} - +/// optimizePTestInstr - Attempt to remove a ptest of a predicate-generating +/// operation which could set the flags in an identical manner +bool AArch64InstrInfo::optimizePTestInstr( + MachineInstr *PTest, unsigned MaskReg, unsigned PredReg, + const MachineRegisterInfo *MRI) const { + auto *Mask = MRI->getUniqueVRegDef(MaskReg); + auto *Pred = MRI->getUniqueVRegDef(PredReg); + auto NewOp = Pred->getOpcode(); + bool OpChanged = false; + + unsigned MaskOpcode = Mask->getOpcode(); + unsigned PredOpcode = Pred->getOpcode(); + bool PredIsPTestLike = isPTestLikeOpcode(PredOpcode); + bool PredIsWhileLike = isWhileOpcode(PredOpcode); + + if (isPTrueOpcode(MaskOpcode) && (PredIsPTestLike || PredIsWhileLike)) { + // For PTEST(PTRUE, OTHER_INST), PTEST is redundant when PTRUE doesn't + // deactivate any lanes OTHER_INST might set. + uint64_t MaskElementSize = getElementSizeForOpcode(MaskOpcode); + uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode); + + // Must be an all active predicate of matching element size. + if ((PredElementSize != MaskElementSize) || + (Mask->getOperand(1).getImm() != 31)) + return false; + + // Fallthough to simply remove the PTEST. + } else if ((Mask == Pred) && (PredIsPTestLike || PredIsWhileLike)) { + // For PTEST(PG, PG), PTEST is redundant when PG is the result of an + // instruction that sets the flags as PTEST would. + + // Fallthough to simply remove the PTEST. + } else if (PredIsPTestLike) { + // For PTEST(PG_1, PTEST_LIKE(PG2, ...)), PTEST is redundant when both + // instructions use the same predicate. + auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg()); + if (Mask != PTestLikeMask) + return false; + + // Fallthough to simply remove the PTEST. + } else { + switch (Pred->getOpcode()) { + case AArch64::BRKB_PPzP: + case AArch64::BRKPB_PPzPP: { + // Op 0 is chain, 1 is the mask, 2 the previous predicate to + // propagate, 3 the new predicate. + + // Check to see if our mask is the same as the brkpb's. If + // not the resulting flag bits may be different and we + // can't remove the ptest. + auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg()); + if (Mask != PredMask) + return false; + + // Switch to the new opcode + NewOp = Pred->getOpcode() == AArch64::BRKB_PPzP ? AArch64::BRKBS_PPzP + : AArch64::BRKPBS_PPzPP; + OpChanged = true; + break; + } + case AArch64::BRKN_PPzP: { + auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg()); + if (Mask != PredMask) + return false; + + NewOp = AArch64::BRKNS_PPzP; + OpChanged = true; + break; + } + default: + // Bail out if we don't recognize the input + return false; + } + } + + const TargetRegisterInfo *TRI = &getRegisterInfo(); + + // If the predicate is in a different block (possibly because its been + // hoisted out), then assume the flags are set in between statements. + if (Pred->getParent() != PTest->getParent()) + return false; + + // If another instruction between the propagation and test sets the + // flags, don't remove the ptest. + MachineBasicBlock::iterator I = Pred, E = PTest; + ++I; // Skip past the predicate op itself. + for (; I != E; ++I) { + const MachineInstr &Inst = *I; + + // TODO: If the ptest flags are unused, we could still remove it. + if (Inst.modifiesRegister(AArch64::NZCV, TRI)) + return false; + } + + // If we pass all the checks, it's safe to remove the PTEST and use the flags + // as they are prior to PTEST. Sometimes this requires the tested PTEST + // operand to be replaced with an equivalent instruction that also sets the + // flags. + Pred->setDesc(get(NewOp)); + PTest->eraseFromParent(); + if (OpChanged) { + bool succeeded = UpdateOperandRegClass(*Pred); + (void)succeeded; + assert(succeeded && "Operands have incompatible register classes!"); + Pred->addRegisterDefined(AArch64::NZCV, TRI); + } + + // Ensure that the flags def is live. + if (Pred->registerDefIsDead(AArch64::NZCV, TRI)) { + unsigned i = 0, e = Pred->getNumOperands(); + for (; i != e; ++i) { + MachineOperand &MO = Pred->getOperand(i); + if (MO.isReg() && MO.isDef() && MO.getReg() == AArch64::NZCV) { + MO.setIsDead(false); + break; + } + } + } + return true; +} + /// Try to optimize a compare instruction. A compare instruction is an /// instruction which produces AArch64::NZCV. It can be truly compare /// instruction @@ -1455,9 +1455,9 @@ bool AArch64InstrInfo::optimizeCompareInstr( return true; } - if (CmpInstr.getOpcode() == AArch64::PTEST_PP) - return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI); - + if (CmpInstr.getOpcode() == AArch64::PTEST_PP) + return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI); + // Continue only if we have a "ri" where immediate is zero. // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare // function. @@ -2274,24 +2274,24 @@ bool AArch64InstrInfo::getMemOperandsWithOffsetWidth( return true; } -Optional<ExtAddrMode> -AArch64InstrInfo::getAddrModeFromMemoryOp(const MachineInstr &MemI, - const TargetRegisterInfo *TRI) const { - const MachineOperand *Base; // Filled with the base operand of MI. - int64_t Offset; // Filled with the offset of MI. - bool OffsetIsScalable; - if (!getMemOperandWithOffset(MemI, Base, Offset, OffsetIsScalable, TRI)) - return None; - - if (!Base->isReg()) - return None; - ExtAddrMode AM; - AM.BaseReg = Base->getReg(); - AM.Displacement = Offset; - AM.ScaledReg = 0; - return AM; -} - +Optional<ExtAddrMode> +AArch64InstrInfo::getAddrModeFromMemoryOp(const MachineInstr &MemI, + const TargetRegisterInfo *TRI) const { + const MachineOperand *Base; // Filled with the base operand of MI. + int64_t Offset; // Filled with the offset of MI. + bool OffsetIsScalable; + if (!getMemOperandWithOffset(MemI, Base, Offset, OffsetIsScalable, TRI)) + return None; + + if (!Base->isReg()) + return None; + ExtAddrMode AM; + AM.BaseReg = Base->getReg(); + AM.Displacement = Offset; + AM.ScaledReg = 0; + return AM; +} + bool AArch64InstrInfo::getMemOperandWithOffsetWidth( const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, unsigned &Width, @@ -3290,7 +3290,7 @@ void AArch64InstrInfo::storeRegToStackSlot( else if (AArch64::PPRRegClass.hasSubClassEq(RC)) { assert(Subtarget.hasSVE() && "Unexpected register store without SVE"); Opc = AArch64::STR_PXI; - StackID = TargetStackID::ScalableVector; + StackID = TargetStackID::ScalableVector; } break; case 4: @@ -3334,7 +3334,7 @@ void AArch64InstrInfo::storeRegToStackSlot( } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) { assert(Subtarget.hasSVE() && "Unexpected register store without SVE"); Opc = AArch64::STR_ZXI; - StackID = TargetStackID::ScalableVector; + StackID = TargetStackID::ScalableVector; } break; case 24: @@ -3356,7 +3356,7 @@ void AArch64InstrInfo::storeRegToStackSlot( } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) { assert(Subtarget.hasSVE() && "Unexpected register store without SVE"); Opc = AArch64::STR_ZZXI; - StackID = TargetStackID::ScalableVector; + StackID = TargetStackID::ScalableVector; } break; case 48: @@ -3367,7 +3367,7 @@ void AArch64InstrInfo::storeRegToStackSlot( } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) { assert(Subtarget.hasSVE() && "Unexpected register store without SVE"); Opc = AArch64::STR_ZZZXI; - StackID = TargetStackID::ScalableVector; + StackID = TargetStackID::ScalableVector; } break; case 64: @@ -3378,7 +3378,7 @@ void AArch64InstrInfo::storeRegToStackSlot( } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) { assert(Subtarget.hasSVE() && "Unexpected register store without SVE"); Opc = AArch64::STR_ZZZZXI; - StackID = TargetStackID::ScalableVector; + StackID = TargetStackID::ScalableVector; } break; } @@ -3444,7 +3444,7 @@ void AArch64InstrInfo::loadRegFromStackSlot( else if (AArch64::PPRRegClass.hasSubClassEq(RC)) { assert(Subtarget.hasSVE() && "Unexpected register load without SVE"); Opc = AArch64::LDR_PXI; - StackID = TargetStackID::ScalableVector; + StackID = TargetStackID::ScalableVector; } break; case 4: @@ -3488,7 +3488,7 @@ void AArch64InstrInfo::loadRegFromStackSlot( } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) { assert(Subtarget.hasSVE() && "Unexpected register load without SVE"); Opc = AArch64::LDR_ZXI; - StackID = TargetStackID::ScalableVector; + StackID = TargetStackID::ScalableVector; } break; case 24: @@ -3510,7 +3510,7 @@ void AArch64InstrInfo::loadRegFromStackSlot( } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) { assert(Subtarget.hasSVE() && "Unexpected register load without SVE"); Opc = AArch64::LDR_ZZXI; - StackID = TargetStackID::ScalableVector; + StackID = TargetStackID::ScalableVector; } break; case 48: @@ -3521,7 +3521,7 @@ void AArch64InstrInfo::loadRegFromStackSlot( } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) { assert(Subtarget.hasSVE() && "Unexpected register load without SVE"); Opc = AArch64::LDR_ZZZXI; - StackID = TargetStackID::ScalableVector; + StackID = TargetStackID::ScalableVector; } break; case 64: @@ -3532,7 +3532,7 @@ void AArch64InstrInfo::loadRegFromStackSlot( } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) { assert(Subtarget.hasSVE() && "Unexpected register load without SVE"); Opc = AArch64::LDR_ZZZZXI; - StackID = TargetStackID::ScalableVector; + StackID = TargetStackID::ScalableVector; } break; } @@ -3559,47 +3559,47 @@ bool llvm::isNZCVTouchedInInstructionRange(const MachineInstr &DefMI, }); } -void AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets( - const StackOffset &Offset, int64_t &ByteSized, int64_t &VGSized) { - // The smallest scalable element supported by scaled SVE addressing - // modes are predicates, which are 2 scalable bytes in size. So the scalable - // byte offset must always be a multiple of 2. - assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset"); - - // VGSized offsets are divided by '2', because the VG register is the - // the number of 64bit granules as opposed to 128bit vector chunks, - // which is how the 'n' in e.g. MVT::nxv1i8 is modelled. - // So, for a stack offset of 16 MVT::nxv1i8's, the size is n x 16 bytes. - // VG = n * 2 and the dwarf offset must be VG * 8 bytes. - ByteSized = Offset.getFixed(); - VGSized = Offset.getScalable() / 2; -} - -/// Returns the offset in parts to which this frame offset can be -/// decomposed for the purpose of describing a frame offset. -/// For non-scalable offsets this is simply its byte size. -void AArch64InstrInfo::decomposeStackOffsetForFrameOffsets( - const StackOffset &Offset, int64_t &NumBytes, int64_t &NumPredicateVectors, - int64_t &NumDataVectors) { - // The smallest scalable element supported by scaled SVE addressing - // modes are predicates, which are 2 scalable bytes in size. So the scalable - // byte offset must always be a multiple of 2. - assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset"); - - NumBytes = Offset.getFixed(); - NumDataVectors = 0; - NumPredicateVectors = Offset.getScalable() / 2; - // This method is used to get the offsets to adjust the frame offset. - // If the function requires ADDPL to be used and needs more than two ADDPL - // instructions, part of the offset is folded into NumDataVectors so that it - // uses ADDVL for part of it, reducing the number of ADDPL instructions. - if (NumPredicateVectors % 8 == 0 || NumPredicateVectors < -64 || - NumPredicateVectors > 62) { - NumDataVectors = NumPredicateVectors / 8; - NumPredicateVectors -= NumDataVectors * 8; - } -} - +void AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets( + const StackOffset &Offset, int64_t &ByteSized, int64_t &VGSized) { + // The smallest scalable element supported by scaled SVE addressing + // modes are predicates, which are 2 scalable bytes in size. So the scalable + // byte offset must always be a multiple of 2. + assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset"); + + // VGSized offsets are divided by '2', because the VG register is the + // the number of 64bit granules as opposed to 128bit vector chunks, + // which is how the 'n' in e.g. MVT::nxv1i8 is modelled. + // So, for a stack offset of 16 MVT::nxv1i8's, the size is n x 16 bytes. + // VG = n * 2 and the dwarf offset must be VG * 8 bytes. + ByteSized = Offset.getFixed(); + VGSized = Offset.getScalable() / 2; +} + +/// Returns the offset in parts to which this frame offset can be +/// decomposed for the purpose of describing a frame offset. +/// For non-scalable offsets this is simply its byte size. +void AArch64InstrInfo::decomposeStackOffsetForFrameOffsets( + const StackOffset &Offset, int64_t &NumBytes, int64_t &NumPredicateVectors, + int64_t &NumDataVectors) { + // The smallest scalable element supported by scaled SVE addressing + // modes are predicates, which are 2 scalable bytes in size. So the scalable + // byte offset must always be a multiple of 2. + assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset"); + + NumBytes = Offset.getFixed(); + NumDataVectors = 0; + NumPredicateVectors = Offset.getScalable() / 2; + // This method is used to get the offsets to adjust the frame offset. + // If the function requires ADDPL to be used and needs more than two ADDPL + // instructions, part of the offset is folded into NumDataVectors so that it + // uses ADDVL for part of it, reducing the number of ADDPL instructions. + if (NumPredicateVectors % 8 == 0 || NumPredicateVectors < -64 || + NumPredicateVectors > 62) { + NumDataVectors = NumPredicateVectors / 8; + NumPredicateVectors -= NumDataVectors * 8; + } +} + // Helper function to emit a frame offset adjustment from a given // pointer (SrcReg), stored into DestReg. This function is explicit // in that it requires the opcode. @@ -3709,13 +3709,13 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB, MachineInstr::MIFlag Flag, bool SetNZCV, bool NeedsWinCFI, bool *HasWinCFI) { int64_t Bytes, NumPredicateVectors, NumDataVectors; - AArch64InstrInfo::decomposeStackOffsetForFrameOffsets( - Offset, Bytes, NumPredicateVectors, NumDataVectors); + AArch64InstrInfo::decomposeStackOffsetForFrameOffsets( + Offset, Bytes, NumPredicateVectors, NumDataVectors); // First emit non-scalable frame offsets, or a simple 'mov'. if (Bytes || (!Offset && SrcReg != DestReg)) { - assert((DestReg != AArch64::SP || Bytes % 8 == 0) && - "SP increment/decrement not 8-byte aligned"); + assert((DestReg != AArch64::SP || Bytes % 8 == 0) && + "SP increment/decrement not 8-byte aligned"); unsigned Opc = SetNZCV ? AArch64::ADDSXri : AArch64::ADDXri; if (Bytes < 0) { Bytes = -Bytes; @@ -3970,7 +3970,7 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, // Construct the complete offset. bool IsMulVL = ScaleValue.isScalable(); unsigned Scale = ScaleValue.getKnownMinSize(); - int64_t Offset = IsMulVL ? SOffset.getScalable() : SOffset.getFixed(); + int64_t Offset = IsMulVL ? SOffset.getScalable() : SOffset.getFixed(); const MachineOperand &ImmOpnd = MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode())); @@ -4012,9 +4012,9 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, *OutUnscaledOp = *UnscaledOp; if (IsMulVL) - SOffset = StackOffset::get(SOffset.getFixed(), Offset); + SOffset = StackOffset::get(SOffset.getFixed(), Offset); else - SOffset = StackOffset::get(Offset, SOffset.getScalable()); + SOffset = StackOffset::get(Offset, SOffset.getScalable()); return AArch64FrameOffsetCanUpdate | (SOffset ? 0 : AArch64FrameOffsetIsLegal); } @@ -4026,7 +4026,7 @@ bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned ImmIdx = FrameRegIdx + 1; if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) { - Offset += StackOffset::getFixed(MI.getOperand(ImmIdx).getImm()); + Offset += StackOffset::getFixed(MI.getOperand(ImmIdx).getImm()); emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(), MI.getOperand(0).getReg(), FrameReg, Offset, TII, MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri)); @@ -4131,7 +4131,7 @@ static bool isCombineInstrCandidate64(unsigned Opc) { return false; } -// FP Opcodes that can be combined with a FMUL. +// FP Opcodes that can be combined with a FMUL. static bool isCombineInstrCandidateFP(const MachineInstr &Inst) { switch (Inst.getOpcode()) { default: @@ -4153,12 +4153,12 @@ static bool isCombineInstrCandidateFP(const MachineInstr &Inst) { case AArch64::FSUBv2f64: case AArch64::FSUBv4f32: TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options; - // We can fuse FADD/FSUB with FMUL, if fusion is either allowed globally by - // the target options or if FADD/FSUB has the contract fast-math flag. - return Options.UnsafeFPMath || - Options.AllowFPOpFusion == FPOpFusion::Fast || - Inst.getFlag(MachineInstr::FmContract); - return true; + // We can fuse FADD/FSUB with FMUL, if fusion is either allowed globally by + // the target options or if FADD/FSUB has the contract fast-math flag. + return Options.UnsafeFPMath || + Options.AllowFPOpFusion == FPOpFusion::Fast || + Inst.getFlag(MachineInstr::FmContract); + return true; } return false; } @@ -4638,8 +4638,8 @@ bool AArch64InstrInfo::isThroughputPattern( /// pattern evaluator stops checking as soon as it finds a faster sequence. bool AArch64InstrInfo::getMachineCombinerPatterns( - MachineInstr &Root, SmallVectorImpl<MachineCombinerPattern> &Patterns, - bool DoRegPressureReduce) const { + MachineInstr &Root, SmallVectorImpl<MachineCombinerPattern> &Patterns, + bool DoRegPressureReduce) const { // Integer patterns if (getMaddPatterns(Root, Patterns)) return true; @@ -4647,8 +4647,8 @@ bool AArch64InstrInfo::getMachineCombinerPatterns( if (getFMAPatterns(Root, Patterns)) return true; - return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns, - DoRegPressureReduce); + return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns, + DoRegPressureReduce); } enum class FMAInstKind { Default, Indexed, Accumulator }; @@ -4871,7 +4871,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence( MachineFunction &MF = *MBB.getParent(); const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); - MachineInstr *MUL = nullptr; + MachineInstr *MUL = nullptr; const TargetRegisterClass *RC; unsigned Opc; switch (Pattern) { @@ -5692,9 +5692,9 @@ void AArch64InstrInfo::genAlternativeCodeSequence( } } // end switch (Pattern) // Record MUL and ADD/SUB for deletion - // FIXME: This assertion fails in CodeGen/AArch64/tailmerging_in_mbp.ll and - // CodeGen/AArch64/urem-seteq-nonzero.ll. - // assert(MUL && "MUL was never set"); + // FIXME: This assertion fails in CodeGen/AArch64/tailmerging_in_mbp.ll and + // CodeGen/AArch64/urem-seteq-nonzero.ll. + // assert(MUL && "MUL was never set"); DelInstrs.push_back(MUL); DelInstrs.push_back(&Root); } @@ -6034,20 +6034,20 @@ AArch64InstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const { static bool outliningCandidatesSigningScopeConsensus(const outliner::Candidate &a, const outliner::Candidate &b) { - const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>(); - const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>(); + const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>(); + const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>(); - return MFIa->shouldSignReturnAddress(false) == MFIb->shouldSignReturnAddress(false) && - MFIa->shouldSignReturnAddress(true) == MFIb->shouldSignReturnAddress(true); + return MFIa->shouldSignReturnAddress(false) == MFIb->shouldSignReturnAddress(false) && + MFIa->shouldSignReturnAddress(true) == MFIb->shouldSignReturnAddress(true); } static bool outliningCandidatesSigningKeyConsensus(const outliner::Candidate &a, const outliner::Candidate &b) { - const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>(); - const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>(); + const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>(); + const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>(); - return MFIa->shouldSignWithBKey() == MFIb->shouldSignWithBKey(); + return MFIa->shouldSignWithBKey() == MFIb->shouldSignWithBKey(); } static bool outliningCandidatesV8_3OpsConsensus(const outliner::Candidate &a, @@ -6104,9 +6104,9 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo( // necessary. However, at this point we don't know if the outlined function // will have a RET instruction so we assume the worst. const TargetRegisterInfo &TRI = getRegisterInfo(); - if (FirstCand.getMF() - ->getInfo<AArch64FunctionInfo>() - ->shouldSignReturnAddress(true)) { + if (FirstCand.getMF() + ->getInfo<AArch64FunctionInfo>() + ->shouldSignReturnAddress(true)) { // One PAC and one AUT instructions NumBytesToCreateFrame += 8; @@ -6163,7 +6163,7 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo( return false; }; // Remove candidates with illegal stack modifying instructions - llvm::erase_if(RepeatedSequenceLocs, hasIllegalSPModification); + llvm::erase_if(RepeatedSequenceLocs, hasIllegalSPModification); // If the sequence doesn't have enough candidates left, then we're done. if (RepeatedSequenceLocs.size() < 2) @@ -6206,7 +6206,7 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo( // Erase every candidate that violates the restrictions above. (It could be // true that we have viable candidates, so it's not worth bailing out in // the case that, say, 1 out of 20 candidates violate the restructions.) - llvm::erase_if(RepeatedSequenceLocs, CantGuaranteeValueAcrossCall); + llvm::erase_if(RepeatedSequenceLocs, CantGuaranteeValueAcrossCall); // If the sequence doesn't have enough candidates left, then we're done. if (RepeatedSequenceLocs.size() < 2) @@ -6229,7 +6229,7 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo( NumBytesToCreateFrame += 4; bool HasBTI = any_of(RepeatedSequenceLocs, [](outliner::Candidate &C) { - return C.getMF()->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement(); + return C.getMF()->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement(); }); // We check to see if CFI Instructions are present, and if they are @@ -6398,60 +6398,60 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo( FrameID = MachineOutlinerNoLRSave; } else { SetCandidateCallInfo(MachineOutlinerDefault, 12); - - // Bugzilla ID: 46767 - // TODO: Check if fixing up the stack more than once is safe so we can - // outline these. - // - // An outline resulting in a caller that requires stack fixups at the - // callsite to a callee that also requires stack fixups can happen when - // there are no available registers at the candidate callsite for a - // candidate that itself also has calls. - // - // In other words if function_containing_sequence in the following pseudo - // assembly requires that we save LR at the point of the call, but there - // are no available registers: in this case we save using SP and as a - // result the SP offsets requires stack fixups by multiples of 16. - // - // function_containing_sequence: - // ... - // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N - // call OUTLINED_FUNCTION_N - // restore LR from SP - // ... - // - // OUTLINED_FUNCTION_N: - // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N - // ... - // bl foo - // restore LR from SP - // ret - // - // Because the code to handle more than one stack fixup does not - // currently have the proper checks for legality, these cases will assert - // in the AArch64 MachineOutliner. This is because the code to do this - // needs more hardening, testing, better checks that generated code is - // legal, etc and because it is only verified to handle a single pass of - // stack fixup. - // - // The assert happens in AArch64InstrInfo::buildOutlinedFrame to catch - // these cases until they are known to be handled. Bugzilla 46767 is - // referenced in comments at the assert site. - // - // To avoid asserting (or generating non-legal code on noassert builds) - // we remove all candidates which would need more than one stack fixup by - // pruning the cases where the candidate has calls while also having no - // available LR and having no available general purpose registers to copy - // LR to (ie one extra stack save/restore). - // - if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) { - erase_if(RepeatedSequenceLocs, [this](outliner::Candidate &C) { - return (std::any_of( - C.front(), std::next(C.back()), - [](const MachineInstr &MI) { return MI.isCall(); })) && - (!C.LRU.available(AArch64::LR) || !findRegisterToSaveLRTo(C)); - }); - } + + // Bugzilla ID: 46767 + // TODO: Check if fixing up the stack more than once is safe so we can + // outline these. + // + // An outline resulting in a caller that requires stack fixups at the + // callsite to a callee that also requires stack fixups can happen when + // there are no available registers at the candidate callsite for a + // candidate that itself also has calls. + // + // In other words if function_containing_sequence in the following pseudo + // assembly requires that we save LR at the point of the call, but there + // are no available registers: in this case we save using SP and as a + // result the SP offsets requires stack fixups by multiples of 16. + // + // function_containing_sequence: + // ... + // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N + // call OUTLINED_FUNCTION_N + // restore LR from SP + // ... + // + // OUTLINED_FUNCTION_N: + // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N + // ... + // bl foo + // restore LR from SP + // ret + // + // Because the code to handle more than one stack fixup does not + // currently have the proper checks for legality, these cases will assert + // in the AArch64 MachineOutliner. This is because the code to do this + // needs more hardening, testing, better checks that generated code is + // legal, etc and because it is only verified to handle a single pass of + // stack fixup. + // + // The assert happens in AArch64InstrInfo::buildOutlinedFrame to catch + // these cases until they are known to be handled. Bugzilla 46767 is + // referenced in comments at the assert site. + // + // To avoid asserting (or generating non-legal code on noassert builds) + // we remove all candidates which would need more than one stack fixup by + // pruning the cases where the candidate has calls while also having no + // available LR and having no available general purpose registers to copy + // LR to (ie one extra stack save/restore). + // + if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) { + erase_if(RepeatedSequenceLocs, [this](outliner::Candidate &C) { + return (std::any_of( + C.front(), std::next(C.back()), + [](const MachineInstr &MI) { return MI.isCall(); })) && + (!C.LRU.available(AArch64::LR) || !findRegisterToSaveLRTo(C)); + }); + } } // If we dropped all of the candidates, bail out here. @@ -6820,7 +6820,7 @@ static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB, // If v8.3a features are available we can replace a RET instruction by // RETAA or RETAB and omit the AUT instructions - if (Subtarget.hasPAuth() && MBBAUT != MBB.end() && + if (Subtarget.hasPAuth() && MBBAUT != MBB.end() && MBBAUT->getOpcode() == AArch64::RET) { BuildMI(MBB, MBBAUT, DL, TII->get(ShouldSignReturnAddrWithAKey ? AArch64::RETAA @@ -6872,12 +6872,12 @@ void AArch64InstrInfo::buildOutlinedFrame( return MI.isCall() && !MI.isReturn(); }; - if (llvm::any_of(MBB.instrs(), IsNonTailCall)) { + if (llvm::any_of(MBB.instrs(), IsNonTailCall)) { // Fix up the instructions in the range, since we're going to modify the // stack. - - // Bugzilla ID: 46767 - // TODO: Check if fixing up twice is safe so we can outline these. + + // Bugzilla ID: 46767 + // TODO: Check if fixing up twice is safe so we can outline these. assert(OF.FrameConstructionID != MachineOutlinerDefault && "Can only fix up stack references once"); fixupPostOutline(MBB); @@ -6934,11 +6934,11 @@ void AArch64InstrInfo::buildOutlinedFrame( // If a bunch of candidates reach this point they must agree on their return // address signing. It is therefore enough to just consider the signing // behaviour of one of them - const auto &MFI = *OF.Candidates.front().getMF()->getInfo<AArch64FunctionInfo>(); - bool ShouldSignReturnAddr = MFI.shouldSignReturnAddress(!IsLeafFunction); + const auto &MFI = *OF.Candidates.front().getMF()->getInfo<AArch64FunctionInfo>(); + bool ShouldSignReturnAddr = MFI.shouldSignReturnAddress(!IsLeafFunction); // a_key is the default - bool ShouldSignReturnAddrWithAKey = !MFI.shouldSignWithBKey(); + bool ShouldSignReturnAddrWithAKey = !MFI.shouldSignWithBKey(); // If this is a tail call outlined function, then there's already a return. if (OF.FrameConstructionID == MachineOutlinerTailCall || @@ -7099,7 +7099,7 @@ Optional<RegImmPair> AArch64InstrInfo::isAddImmediate(const MachineInstr &MI, return None; int Shift = MI.getOperand(3).getImm(); assert((Shift == 0 || Shift == 12) && "Shift can be either 0 or 12"); - Offset = Sign * (MI.getOperand(2).getImm() << Shift); + Offset = Sign * (MI.getOperand(2).getImm() << Shift); } } return RegImmPair{MI.getOperand(1).getReg(), Offset}; @@ -7175,14 +7175,14 @@ uint64_t AArch64InstrInfo::getElementSizeForOpcode(unsigned Opc) const { return get(Opc).TSFlags & AArch64::ElementSizeMask; } -bool AArch64InstrInfo::isPTestLikeOpcode(unsigned Opc) const { - return get(Opc).TSFlags & AArch64::InstrFlagIsPTestLike; -} - -bool AArch64InstrInfo::isWhileOpcode(unsigned Opc) const { - return get(Opc).TSFlags & AArch64::InstrFlagIsWhile; -} - +bool AArch64InstrInfo::isPTestLikeOpcode(unsigned Opc) const { + return get(Opc).TSFlags & AArch64::InstrFlagIsPTestLike; +} + +bool AArch64InstrInfo::isWhileOpcode(unsigned Opc) const { + return get(Opc).TSFlags & AArch64::InstrFlagIsWhile; +} + unsigned llvm::getBLRCallOpcode(const MachineFunction &MF) { if (MF.getSubtarget<AArch64Subtarget>().hardenSlsBlr()) return AArch64::BLRNoIP; |