diff options
author | shadchin <shadchin@yandex-team.ru> | 2022-02-10 16:44:30 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:44:30 +0300 |
commit | 2598ef1d0aee359b4b6d5fdd1758916d5907d04f (patch) | |
tree | 012bb94d777798f1f56ac1cec429509766d05181 /contrib/libs/llvm12/lib/Target/AArch64/AArch64FrameLowering.cpp | |
parent | 6751af0b0c1b952fede40b19b71da8025b5d8bcf (diff) | |
download | ydb-2598ef1d0aee359b4b6d5fdd1758916d5907d04f.tar.gz |
Restoring authorship annotation for <shadchin@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/llvm12/lib/Target/AArch64/AArch64FrameLowering.cpp')
-rw-r--r-- | contrib/libs/llvm12/lib/Target/AArch64/AArch64FrameLowering.cpp | 790 |
1 files changed, 395 insertions, 395 deletions
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64FrameLowering.cpp b/contrib/libs/llvm12/lib/Target/AArch64/AArch64FrameLowering.cpp index 65ee501604..9aa8f7a804 100644 --- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -175,10 +175,10 @@ static cl::opt<bool> StackTaggingMergeSetTag( cl::desc("merge settag instruction in function epilog"), cl::init(true), cl::Hidden); -static cl::opt<bool> OrderFrameObjects("aarch64-order-frame-objects", - cl::desc("sort stack allocations"), - cl::init(true), cl::Hidden); - +static cl::opt<bool> OrderFrameObjects("aarch64-order-frame-objects", + cl::desc("sort stack allocations"), + cl::init(true), cl::Hidden); + STATISTIC(NumRedZoneFunctions, "Number of functions using red zone"); /// Returns the argument pop size. @@ -249,7 +249,7 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF) { TargetStackID::Value AArch64FrameLowering::getStackIDForScalableVectors() const { - return TargetStackID::ScalableVector; + return TargetStackID::ScalableVector; } /// Returns the size of the fixed object area (allocated next to sp on entry) @@ -273,7 +273,7 @@ static unsigned getFixedObjectSize(const MachineFunction &MF, /// Returns the size of the entire SVE stackframe (calleesaves + spills). static StackOffset getSVEStackSize(const MachineFunction &MF) { const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); - return StackOffset::getScalable((int64_t)AFI->getStackSizeSVE()); + return StackOffset::getScalable((int64_t)AFI->getStackSizeSVE()); } bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const { @@ -365,15 +365,15 @@ MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr( // Most call frames will be allocated at the start of a function so // this is OK, but it is a limitation that needs dealing with. assert(Amount > -0xffffff && Amount < 0xffffff && "call frame too large"); - emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, - StackOffset::getFixed(Amount), TII); + emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, + StackOffset::getFixed(Amount), TII); } } else if (CalleePopAmount != 0) { // If the calling convention demands that the callee pops arguments from the // stack, we want to add it back if we have a reserved call frame. assert(CalleePopAmount < 0xffffff && "call frame too large"); emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, - StackOffset::getFixed(-(int64_t)CalleePopAmount), TII); + StackOffset::getFixed(-(int64_t)CalleePopAmount), TII); } return MBB.erase(I); } @@ -413,8 +413,8 @@ static void appendVGScaledOffsetExpr(SmallVectorImpl<char> &Expr, MCCFIInstruction AArch64FrameLowering::createDefCFAExpressionFromSP( const TargetRegisterInfo &TRI, const StackOffset &OffsetFromSP) const { int64_t NumBytes, NumVGScaledBytes; - AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(OffsetFromSP, NumBytes, - NumVGScaledBytes); + AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(OffsetFromSP, NumBytes, + NumVGScaledBytes); std::string CommentBuffer = "sp"; llvm::raw_string_ostream Comment(CommentBuffer); @@ -441,8 +441,8 @@ MCCFIInstruction AArch64FrameLowering::createCfaOffset( const TargetRegisterInfo &TRI, unsigned Reg, const StackOffset &OffsetFromDefCFA) const { int64_t NumBytes, NumVGScaledBytes; - AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets( - OffsetFromDefCFA, NumBytes, NumVGScaledBytes); + AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets( + OffsetFromDefCFA, NumBytes, NumVGScaledBytes); unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true); @@ -496,14 +496,14 @@ void AArch64FrameLowering::emitCalleeSavedFrameMoves( continue; StackOffset Offset; - if (MFI.getStackID(Info.getFrameIdx()) == TargetStackID::ScalableVector) { + if (MFI.getStackID(Info.getFrameIdx()) == TargetStackID::ScalableVector) { AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); - Offset = - StackOffset::getScalable(MFI.getObjectOffset(Info.getFrameIdx())) - - StackOffset::getFixed(AFI->getCalleeSavedStackSize(MFI)); + Offset = + StackOffset::getScalable(MFI.getObjectOffset(Info.getFrameIdx())) - + StackOffset::getFixed(AFI->getCalleeSavedStackSize(MFI)); } else { - Offset = StackOffset::getFixed(MFI.getObjectOffset(Info.getFrameIdx()) - - getOffsetOfLocalArea()); + Offset = StackOffset::getFixed(MFI.getObjectOffset(Info.getFrameIdx()) - + getOffsetOfLocalArea()); } unsigned CFIIndex = MF.addFrameInst(createCfaOffset(*TRI, Reg, Offset)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) @@ -584,12 +584,12 @@ static bool windowsRequiresStackProbe(MachineFunction &MF, !F.hasFnAttribute("no-stack-arg-probe"); } -static bool needsWinCFI(const MachineFunction &MF) { - const Function &F = MF.getFunction(); - return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() && - F.needsUnwindTableEntry(); -} - +static bool needsWinCFI(const MachineFunction &MF) { + const Function &F = MF.getFunction(); + return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() && + F.needsUnwindTableEntry(); +} + bool AArch64FrameLowering::shouldCombineCSRLocalStackBump( MachineFunction &MF, uint64_t StackBumpBytes) const { AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); @@ -600,18 +600,18 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump( if (AFI->getLocalStackSize() == 0) return false; - // For WinCFI, if optimizing for size, prefer to not combine the stack bump - // (to force a stp with predecrement) to match the packed unwind format, - // provided that there actually are any callee saved registers to merge the - // decrement with. - // This is potentially marginally slower, but allows using the packed - // unwind format for functions that both have a local area and callee saved - // registers. Using the packed unwind format notably reduces the size of - // the unwind info. - if (needsWinCFI(MF) && AFI->getCalleeSavedStackSize() > 0 && - MF.getFunction().hasOptSize()) - return false; - + // For WinCFI, if optimizing for size, prefer to not combine the stack bump + // (to force a stp with predecrement) to match the packed unwind format, + // provided that there actually are any callee saved registers to merge the + // decrement with. + // This is potentially marginally slower, but allows using the packed + // unwind format for functions that both have a local area and callee saved + // registers. Using the packed unwind format notably reduces the size of + // the unwind info. + if (needsWinCFI(MF) && AFI->getCalleeSavedStackSize() > 0 && + MF.getFunction().hasOptSize()) + return false; + // 512 is the maximum immediate for stp/ldp that will be used for // callee-save save/restores if (StackBumpBytes >= 512 || windowsRequiresStackProbe(MF, StackBumpBytes)) @@ -1051,16 +1051,16 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, // to determine the end of the prologue. DebugLoc DL; - const auto &MFnI = *MF.getInfo<AArch64FunctionInfo>(); - if (MFnI.shouldSignReturnAddress()) { - if (MFnI.shouldSignWithBKey()) { + const auto &MFnI = *MF.getInfo<AArch64FunctionInfo>(); + if (MFnI.shouldSignReturnAddress()) { + if (MFnI.shouldSignWithBKey()) { BuildMI(MBB, MBBI, DL, TII->get(AArch64::EMITBKEY)) .setMIFlag(MachineInstr::FrameSetup); BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACIBSP)) .setMIFlag(MachineInstr::FrameSetup); - } else { - BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACIASP)) - .setMIFlag(MachineInstr::FrameSetup); + } else { + BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACIASP)) + .setMIFlag(MachineInstr::FrameSetup); } unsigned CFIIndex = @@ -1075,13 +1075,13 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, if (MF.getFunction().getCallingConv() == CallingConv::GHC) return; - // Set tagged base pointer to the requested stack slot. + // Set tagged base pointer to the requested stack slot. // Ideally it should match SP value after prologue. - Optional<int> TBPI = AFI->getTaggedBasePointerIndex(); - if (TBPI) - AFI->setTaggedBasePointerOffset(-MFI.getObjectOffset(*TBPI)); - else - AFI->setTaggedBasePointerOffset(MFI.getStackSize()); + Optional<int> TBPI = AFI->getTaggedBasePointerIndex(); + if (TBPI) + AFI->setTaggedBasePointerOffset(-MFI.getObjectOffset(*TBPI)); + else + AFI->setTaggedBasePointerOffset(MFI.getStackSize()); const StackOffset &SVEStackSize = getSVEStackSize(MF); @@ -1108,8 +1108,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, ++NumRedZoneFunctions; } else { emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, - StackOffset::getFixed(-NumBytes), TII, - MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI); + StackOffset::getFixed(-NumBytes), TII, + MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI); if (!NeedsWinCFI && needsFrameMoves) { // Label used to tie together the PROLOG_LABEL and the MachineMoves. MCSymbol *FrameLabel = MMI.getContext().createTempSymbol(); @@ -1142,8 +1142,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, if (CombineSPBump) { assert(!SVEStackSize && "Cannot combine SP bump with SVE"); emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, - StackOffset::getFixed(-NumBytes), TII, - MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI); + StackOffset::getFixed(-NumBytes), TII, + MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI); NumBytes = 0; } else if (PrologueSaveSize != 0) { MBBI = convertCalleeSaveRestoreToSPPrePostIncDec( @@ -1167,7 +1167,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, // For funclets the FP belongs to the containing function. if (!IsFunclet && HasFP) { // Only set up FP if we actually need to. - int64_t FPOffset = AFI->getCalleeSaveBaseToFrameRecordOffset(); + int64_t FPOffset = AFI->getCalleeSaveBaseToFrameRecordOffset(); if (CombineSPBump) FPOffset += AFI->getLocalStackSize(); @@ -1177,8 +1177,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, // Note: All stores of callee-saved registers are marked as "FrameSetup". // This code marks the instruction(s) that set the FP also. emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, - StackOffset::getFixed(FPOffset), TII, - MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI); + StackOffset::getFixed(FPOffset), TII, + MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI); } if (windowsRequiresStackProbe(MF, NumBytes)) { @@ -1288,7 +1288,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, ++MBBI; CalleeSavesEnd = MBBI; - AllocateBefore = StackOffset::getScalable(CalleeSavedSize); + AllocateBefore = StackOffset::getScalable(CalleeSavedSize); AllocateAfter = SVEStackSize - AllocateBefore; } @@ -1320,8 +1320,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, // the correct value here, as NumBytes also includes padding bytes, // which shouldn't be counted here. emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, - StackOffset::getFixed(-NumBytes), TII, - MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI); + StackOffset::getFixed(-NumBytes), TII, + MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI); if (NeedsRealignment) { const unsigned NrBitsToZero = Log2(MFI.getMaxAlign()); @@ -1458,15 +1458,15 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, // .cfi_offset w28, -32 if (HasFP) { - const int OffsetToFirstCalleeSaveFromFP = - AFI->getCalleeSaveBaseToFrameRecordOffset() - - AFI->getCalleeSavedStackSize(); - Register FramePtr = RegInfo->getFrameRegister(MF); - + const int OffsetToFirstCalleeSaveFromFP = + AFI->getCalleeSaveBaseToFrameRecordOffset() - + AFI->getCalleeSavedStackSize(); + Register FramePtr = RegInfo->getFrameRegister(MF); + // Define the current CFA rule to use the provided FP. unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true); unsigned CFIIndex = MF.addFrameInst( - MCCFIInstruction::cfiDefCfa(nullptr, Reg, FixedObject - OffsetToFirstCalleeSaveFromFP)); + MCCFIInstruction::cfiDefCfa(nullptr, Reg, FixedObject - OffsetToFirstCalleeSaveFromFP)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); @@ -1476,7 +1476,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, const TargetSubtargetInfo &STI = MF.getSubtarget(); const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); StackOffset TotalSize = - SVEStackSize + StackOffset::getFixed((int64_t)MFI.getStackSize()); + SVEStackSize + StackOffset::getFixed((int64_t)MFI.getStackSize()); CFIIndex = MF.addFrameInst(createDefCFAExpressionFromSP(TRI, TotalSize)); } else { // Encode the stack size of the leaf function. @@ -1496,8 +1496,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, static void InsertReturnAddressAuth(MachineFunction &MF, MachineBasicBlock &MBB) { - const auto &MFI = *MF.getInfo<AArch64FunctionInfo>(); - if (!MFI.shouldSignReturnAddress()) + const auto &MFI = *MF.getInfo<AArch64FunctionInfo>(); + if (!MFI.shouldSignReturnAddress()) return; const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); @@ -1511,16 +1511,16 @@ static void InsertReturnAddressAuth(MachineFunction &MF, // this instruction can safely used for any v8a architecture. // From v8.3a onwards there are optimised authenticate LR and return // instructions, namely RETA{A,B}, that can be used instead. - if (Subtarget.hasPAuth() && MBBI != MBB.end() && + if (Subtarget.hasPAuth() && MBBI != MBB.end() && MBBI->getOpcode() == AArch64::RET_ReallyLR) { BuildMI(MBB, MBBI, DL, - TII->get(MFI.shouldSignWithBKey() ? AArch64::RETAB : AArch64::RETAA)) + TII->get(MFI.shouldSignWithBKey() ? AArch64::RETAB : AArch64::RETAA)) .copyImplicitOps(*MBBI); MBB.erase(MBBI); } else { BuildMI( MBB, MBBI, DL, - TII->get(MFI.shouldSignWithBKey() ? AArch64::AUTIBSP : AArch64::AUTIASP)) + TII->get(MFI.shouldSignWithBKey() ? AArch64::AUTIBSP : AArch64::AUTIASP)) .setMIFlag(MachineInstr::FrameDestroy); } } @@ -1545,7 +1545,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, bool NeedsWinCFI = needsWinCFI(MF); bool HasWinCFI = false; bool IsFunclet = false; - auto WinCFI = make_scope_exit([&]() { assert(HasWinCFI == MF.hasWinCFI()); }); + auto WinCFI = make_scope_exit([&]() { assert(HasWinCFI == MF.hasWinCFI()); }); if (MBB.end() != MBBI) { DL = MBBI->getDebugLoc(); @@ -1645,13 +1645,13 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, NeedsWinCFI, &HasWinCFI); } - if (MF.hasWinCFI()) { - // If the prologue didn't contain any SEH opcodes and didn't set the - // MF.hasWinCFI() flag, assume the epilogue won't either, and skip the - // EpilogStart - to avoid generating CFI for functions that don't need it. - // (And as we didn't generate any prologue at all, it would be asymmetrical - // to the epilogue.) By the end of the function, we assert that - // HasWinCFI is equal to MF.hasWinCFI(), to verify this assumption. + if (MF.hasWinCFI()) { + // If the prologue didn't contain any SEH opcodes and didn't set the + // MF.hasWinCFI() flag, assume the epilogue won't either, and skip the + // EpilogStart - to avoid generating CFI for functions that don't need it. + // (And as we didn't generate any prologue at all, it would be asymmetrical + // to the epilogue.) By the end of the function, we assert that + // HasWinCFI is equal to MF.hasWinCFI(), to verify this assumption. HasWinCFI = true; BuildMI(MBB, LastPopI, DL, TII->get(AArch64::SEH_EpilogStart)) .setMIFlag(MachineInstr::FrameDestroy); @@ -1663,10 +1663,10 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, if (CombineSPBump) { assert(!SVEStackSize && "Cannot combine SP bump with SVE"); emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP, - StackOffset::getFixed(NumBytes + (int64_t)AfterCSRPopSize), - TII, MachineInstr::FrameDestroy, false, NeedsWinCFI, - &HasWinCFI); - if (HasWinCFI) + StackOffset::getFixed(NumBytes + (int64_t)AfterCSRPopSize), + TII, MachineInstr::FrameDestroy, false, NeedsWinCFI, + &HasWinCFI); + if (HasWinCFI) BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::SEH_EpilogEnd)) .setMIFlag(MachineInstr::FrameDestroy); @@ -1689,8 +1689,8 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, assert(IsSVECalleeSave(RestoreBegin) && IsSVECalleeSave(std::prev(RestoreEnd)) && "Unexpected instruction"); - StackOffset CalleeSavedSizeAsOffset = - StackOffset::getScalable(CalleeSavedSize); + StackOffset CalleeSavedSizeAsOffset = + StackOffset::getScalable(CalleeSavedSize); DeallocateBefore = SVEStackSize - CalleeSavedSizeAsOffset; DeallocateAfter = CalleeSavedSizeAsOffset; } @@ -1703,15 +1703,15 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, // be reloaded. The code below will deallocate the stack space // space by moving FP -> SP. emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::FP, - StackOffset::getScalable(-CalleeSavedSize), TII, + StackOffset::getScalable(-CalleeSavedSize), TII, MachineInstr::FrameDestroy); } else { if (AFI->getSVECalleeSavedStackSize()) { // Deallocate the non-SVE locals first before we can deallocate (and // restore callee saves) from the SVE area. emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP, - StackOffset::getFixed(NumBytes), TII, - MachineInstr::FrameDestroy); + StackOffset::getFixed(NumBytes), TII, + MachineInstr::FrameDestroy); NumBytes = 0; } @@ -1744,10 +1744,10 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, adaptForLdStOpt(MBB, MBB.getFirstTerminator(), LastPopI); emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, - StackOffset::getFixed(StackRestoreBytes), TII, + StackOffset::getFixed(StackRestoreBytes), TII, MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI); if (Done) { - if (HasWinCFI) { + if (HasWinCFI) { BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::SEH_EpilogEnd)) .setMIFlag(MachineInstr::FrameDestroy); @@ -1763,14 +1763,14 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, // non-post-indexed loads for the restores if we aren't actually going to // be able to save any instructions. if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned())) { - emitFrameOffset( - MBB, LastPopI, DL, AArch64::SP, AArch64::FP, - StackOffset::getFixed(-AFI->getCalleeSaveBaseToFrameRecordOffset()), - TII, MachineInstr::FrameDestroy, false, NeedsWinCFI); + emitFrameOffset( + MBB, LastPopI, DL, AArch64::SP, AArch64::FP, + StackOffset::getFixed(-AFI->getCalleeSaveBaseToFrameRecordOffset()), + TII, MachineInstr::FrameDestroy, false, NeedsWinCFI); } else if (NumBytes) emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, - StackOffset::getFixed(NumBytes), TII, - MachineInstr::FrameDestroy, false, NeedsWinCFI); + StackOffset::getFixed(NumBytes), TII, + MachineInstr::FrameDestroy, false, NeedsWinCFI); // This must be placed after the callee-save restore code because that code // assumes the SP is at the same location as it was after the callee-save save @@ -1791,10 +1791,10 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, adaptForLdStOpt(MBB, FirstSPPopI, LastPopI); emitFrameOffset(MBB, FirstSPPopI, DL, AArch64::SP, AArch64::SP, - StackOffset::getFixed((int64_t)AfterCSRPopSize), TII, + StackOffset::getFixed((int64_t)AfterCSRPopSize), TII, MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI); } - if (HasWinCFI) + if (HasWinCFI) BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::SEH_EpilogEnd)) .setMIFlag(MachineInstr::FrameDestroy); } @@ -1803,51 +1803,51 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, /// debug info. It's the same as what we use for resolving the code-gen /// references for now. FIXME: This can go wrong when references are /// SP-relative and simple call frames aren't used. -StackOffset -AArch64FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, - Register &FrameReg) const { +StackOffset +AArch64FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, + Register &FrameReg) const { return resolveFrameIndexReference( - MF, FI, FrameReg, - /*PreferFP=*/ - MF.getFunction().hasFnAttribute(Attribute::SanitizeHWAddress), - /*ForSimm=*/false); + MF, FI, FrameReg, + /*PreferFP=*/ + MF.getFunction().hasFnAttribute(Attribute::SanitizeHWAddress), + /*ForSimm=*/false); } -StackOffset -AArch64FrameLowering::getNonLocalFrameIndexReference(const MachineFunction &MF, - int FI) const { - return StackOffset::getFixed(getSEHFrameIndexOffset(MF, FI)); +StackOffset +AArch64FrameLowering::getNonLocalFrameIndexReference(const MachineFunction &MF, + int FI) const { + return StackOffset::getFixed(getSEHFrameIndexOffset(MF, FI)); } -static StackOffset getFPOffset(const MachineFunction &MF, - int64_t ObjectOffset) { +static StackOffset getFPOffset(const MachineFunction &MF, + int64_t ObjectOffset) { const auto *AFI = MF.getInfo<AArch64FunctionInfo>(); const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>(); bool IsWin64 = Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()); unsigned FixedObject = getFixedObjectSize(MF, AFI, IsWin64, /*IsFunclet=*/false); - int64_t CalleeSaveSize = AFI->getCalleeSavedStackSize(MF.getFrameInfo()); - int64_t FPAdjust = - CalleeSaveSize - AFI->getCalleeSaveBaseToFrameRecordOffset(); - return StackOffset::getFixed(ObjectOffset + FixedObject + FPAdjust); + int64_t CalleeSaveSize = AFI->getCalleeSavedStackSize(MF.getFrameInfo()); + int64_t FPAdjust = + CalleeSaveSize - AFI->getCalleeSaveBaseToFrameRecordOffset(); + return StackOffset::getFixed(ObjectOffset + FixedObject + FPAdjust); } -static StackOffset getStackOffset(const MachineFunction &MF, - int64_t ObjectOffset) { +static StackOffset getStackOffset(const MachineFunction &MF, + int64_t ObjectOffset) { const auto &MFI = MF.getFrameInfo(); - return StackOffset::getFixed(ObjectOffset + (int64_t)MFI.getStackSize()); + return StackOffset::getFixed(ObjectOffset + (int64_t)MFI.getStackSize()); } - // TODO: This function currently does not work for scalable vectors. + // TODO: This function currently does not work for scalable vectors. int AArch64FrameLowering::getSEHFrameIndexOffset(const MachineFunction &MF, int FI) const { const auto *RegInfo = static_cast<const AArch64RegisterInfo *>( MF.getSubtarget().getRegisterInfo()); int ObjectOffset = MF.getFrameInfo().getObjectOffset(FI); return RegInfo->getLocalAddressRegister(MF) == AArch64::FP - ? getFPOffset(MF, ObjectOffset).getFixed() - : getStackOffset(MF, ObjectOffset).getFixed(); + ? getFPOffset(MF, ObjectOffset).getFixed() + : getStackOffset(MF, ObjectOffset).getFixed(); } StackOffset AArch64FrameLowering::resolveFrameIndexReference( @@ -1856,7 +1856,7 @@ StackOffset AArch64FrameLowering::resolveFrameIndexReference( const auto &MFI = MF.getFrameInfo(); int64_t ObjectOffset = MFI.getObjectOffset(FI); bool isFixed = MFI.isFixedObjectIndex(FI); - bool isSVE = MFI.getStackID(FI) == TargetStackID::ScalableVector; + bool isSVE = MFI.getStackID(FI) == TargetStackID::ScalableVector; return resolveFrameOffsetReference(MF, ObjectOffset, isFixed, isSVE, FrameReg, PreferFP, ForSimm); } @@ -1870,8 +1870,8 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference( const auto *AFI = MF.getInfo<AArch64FunctionInfo>(); const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>(); - int64_t FPOffset = getFPOffset(MF, ObjectOffset).getFixed(); - int64_t Offset = getStackOffset(MF, ObjectOffset).getFixed(); + int64_t FPOffset = getFPOffset(MF, ObjectOffset).getFixed(); + int64_t Offset = getStackOffset(MF, ObjectOffset).getFixed(); bool isCSR = !isFixed && ObjectOffset >= -((int)AFI->getCalleeSavedStackSize(MFI)); @@ -1946,16 +1946,16 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference( "non-argument/CSR objects cannot be accessed through the frame pointer"); if (isSVE) { - StackOffset FPOffset = - StackOffset::get(-AFI->getCalleeSaveBaseToFrameRecordOffset(), ObjectOffset); - StackOffset SPOffset = - SVEStackSize + - StackOffset::get(MFI.getStackSize() - AFI->getCalleeSavedStackSize(), - ObjectOffset); + StackOffset FPOffset = + StackOffset::get(-AFI->getCalleeSaveBaseToFrameRecordOffset(), ObjectOffset); + StackOffset SPOffset = + SVEStackSize + + StackOffset::get(MFI.getStackSize() - AFI->getCalleeSavedStackSize(), + ObjectOffset); // Always use the FP for SVE spills if available and beneficial. if (hasFP(MF) && - (SPOffset.getFixed() || - FPOffset.getScalable() < SPOffset.getScalable() || + (SPOffset.getFixed() || + FPOffset.getScalable() < SPOffset.getScalable() || RegInfo->needsStackRealignment(MF))) { FrameReg = RegInfo->getFrameRegister(MF); return FPOffset; @@ -1974,7 +1974,7 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference( if (UseFP) { FrameReg = RegInfo->getFrameRegister(MF); - return StackOffset::getFixed(FPOffset) + ScalableOffset; + return StackOffset::getFixed(FPOffset) + ScalableOffset; } // Use the base pointer if we have one. @@ -1991,7 +1991,7 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference( Offset -= AFI->getLocalStackSize(); } - return StackOffset::getFixed(Offset) + ScalableOffset; + return StackOffset::getFixed(Offset) + ScalableOffset; } static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) { @@ -2013,12 +2013,12 @@ static bool produceCompactUnwindFrame(MachineFunction &MF) { } static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2, - bool NeedsWinCFI, bool IsFirst) { + bool NeedsWinCFI, bool IsFirst) { // If we are generating register pairs for a Windows function that requires // EH support, then pair consecutive registers only. There are no unwind // opcodes for saves/restores of non-consectuve register pairs. - // The unwind opcodes are save_regp, save_regp_x, save_fregp, save_frepg_x, - // save_lrpair. + // The unwind opcodes are save_regp, save_regp_x, save_fregp, save_frepg_x, + // save_lrpair. // https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling if (Reg2 == AArch64::FP) @@ -2027,14 +2027,14 @@ static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2, return false; if (Reg2 == Reg1 + 1) return false; - // If pairing a GPR with LR, the pair can be described by the save_lrpair - // opcode. If this is the first register pair, it would end up with a - // predecrement, but there's no save_lrpair_x opcode, so we can only do this - // if LR is paired with something else than the first register. - // The save_lrpair opcode requires the first register to be an odd one. - if (Reg1 >= AArch64::X19 && Reg1 <= AArch64::X27 && - (Reg1 - AArch64::X19) % 2 == 0 && Reg2 == AArch64::LR && !IsFirst) - return false; + // If pairing a GPR with LR, the pair can be described by the save_lrpair + // opcode. If this is the first register pair, it would end up with a + // predecrement, but there's no save_lrpair_x opcode, so we can only do this + // if LR is paired with something else than the first register. + // The save_lrpair opcode requires the first register to be an odd one. + if (Reg1 >= AArch64::X19 && Reg1 <= AArch64::X27 && + (Reg1 - AArch64::X19) % 2 == 0 && Reg2 == AArch64::LR && !IsFirst) + return false; return true; } @@ -2043,10 +2043,10 @@ static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2, /// LR and FP need to be allocated together when the frame needs to save /// the frame-record. This means any other register pairing with LR is invalid. static bool invalidateRegisterPairing(unsigned Reg1, unsigned Reg2, - bool UsesWinAAPCS, bool NeedsWinCFI, - bool NeedsFrameRecord, bool IsFirst) { + bool UsesWinAAPCS, bool NeedsWinCFI, + bool NeedsFrameRecord, bool IsFirst) { if (UsesWinAAPCS) - return invalidateWindowsRegisterPairing(Reg1, Reg2, NeedsWinCFI, IsFirst); + return invalidateWindowsRegisterPairing(Reg1, Reg2, NeedsWinCFI, IsFirst); // If we need to store the frame record, don't pair any register // with LR other than FP. @@ -2110,22 +2110,22 @@ static void computeCalleeSaveRegisterPairs( (Count & 1) == 0) && "Odd number of callee-saved regs to spill!"); int ByteOffset = AFI->getCalleeSavedStackSize(); - int StackFillDir = -1; - int RegInc = 1; - unsigned FirstReg = 0; - if (NeedsWinCFI) { - // For WinCFI, fill the stack from the bottom up. - ByteOffset = 0; - StackFillDir = 1; - // As the CSI array is reversed to match PrologEpilogInserter, iterate - // backwards, to pair up registers starting from lower numbered registers. - RegInc = -1; - FirstReg = Count - 1; - } + int StackFillDir = -1; + int RegInc = 1; + unsigned FirstReg = 0; + if (NeedsWinCFI) { + // For WinCFI, fill the stack from the bottom up. + ByteOffset = 0; + StackFillDir = 1; + // As the CSI array is reversed to match PrologEpilogInserter, iterate + // backwards, to pair up registers starting from lower numbered registers. + RegInc = -1; + FirstReg = Count - 1; + } int ScalableByteOffset = AFI->getSVECalleeSavedStackSize(); - // When iterating backwards, the loop condition relies on unsigned wraparound. - for (unsigned i = FirstReg; i < Count; i += RegInc) { + // When iterating backwards, the loop condition relies on unsigned wraparound. + for (unsigned i = FirstReg; i < Count; i += RegInc) { RegPairInfo RPI; RPI.Reg1 = CSI[i].getReg(); @@ -2143,20 +2143,20 @@ static void computeCalleeSaveRegisterPairs( llvm_unreachable("Unsupported register class."); // Add the next reg to the pair if it is in the same register class. - if (unsigned(i + RegInc) < Count) { - unsigned NextReg = CSI[i + RegInc].getReg(); - bool IsFirst = i == FirstReg; + if (unsigned(i + RegInc) < Count) { + unsigned NextReg = CSI[i + RegInc].getReg(); + bool IsFirst = i == FirstReg; switch (RPI.Type) { case RegPairInfo::GPR: if (AArch64::GPR64RegClass.contains(NextReg) && - !invalidateRegisterPairing(RPI.Reg1, NextReg, IsWindows, - NeedsWinCFI, NeedsFrameRecord, IsFirst)) + !invalidateRegisterPairing(RPI.Reg1, NextReg, IsWindows, + NeedsWinCFI, NeedsFrameRecord, IsFirst)) RPI.Reg2 = NextReg; break; case RegPairInfo::FPR64: if (AArch64::FPR64RegClass.contains(NextReg) && - !invalidateWindowsRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI, - IsFirst)) + !invalidateWindowsRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI, + IsFirst)) RPI.Reg2 = NextReg; break; case RegPairInfo::FPR128: @@ -2185,7 +2185,7 @@ static void computeCalleeSaveRegisterPairs( // The order of the registers in the list is controlled by // getCalleeSavedRegs(), so they will always be in-order, as well. assert((!RPI.isPaired() || - (CSI[i].getFrameIdx() + RegInc == CSI[i + RegInc].getFrameIdx())) && + (CSI[i].getFrameIdx() + RegInc == CSI[i + RegInc].getFrameIdx())) && "Out of order callee saved regs!"); assert((!RPI.isPaired() || !NeedsFrameRecord || RPI.Reg2 != AArch64::FP || @@ -2207,73 +2207,73 @@ static void computeCalleeSaveRegisterPairs( "Callee-save registers not saved as adjacent register pair!"); RPI.FrameIdx = CSI[i].getFrameIdx(); - if (NeedsWinCFI && - RPI.isPaired()) // RPI.FrameIdx must be the lower index of the pair - RPI.FrameIdx = CSI[i + RegInc].getFrameIdx(); + if (NeedsWinCFI && + RPI.isPaired()) // RPI.FrameIdx must be the lower index of the pair + RPI.FrameIdx = CSI[i + RegInc].getFrameIdx(); int Scale = RPI.getScale(); - - int OffsetPre = RPI.isScalable() ? ScalableByteOffset : ByteOffset; - assert(OffsetPre % Scale == 0); - + + int OffsetPre = RPI.isScalable() ? ScalableByteOffset : ByteOffset; + assert(OffsetPre % Scale == 0); + if (RPI.isScalable()) - ScalableByteOffset += StackFillDir * Scale; + ScalableByteOffset += StackFillDir * Scale; else - ByteOffset += StackFillDir * (RPI.isPaired() ? 2 * Scale : Scale); + ByteOffset += StackFillDir * (RPI.isPaired() ? 2 * Scale : Scale); assert(!(RPI.isScalable() && RPI.isPaired()) && "Paired spill/fill instructions don't exist for SVE vectors"); // Round up size of non-pair to pair size if we need to pad the // callee-save area to ensure 16-byte alignment. - if (AFI->hasCalleeSaveStackFreeSpace() && !NeedsWinCFI && + if (AFI->hasCalleeSaveStackFreeSpace() && !NeedsWinCFI && !RPI.isScalable() && RPI.Type != RegPairInfo::FPR128 && !RPI.isPaired()) { - ByteOffset += 8 * StackFillDir; + ByteOffset += 8 * StackFillDir; assert(ByteOffset % 16 == 0); assert(MFI.getObjectAlign(RPI.FrameIdx) <= Align(16)); - // A stack frame with a gap looks like this, bottom up: - // d9, d8. x21, gap, x20, x19. - // Set extra alignment on the x21 object (the only unpaired register) - // to create the gap above it. + // A stack frame with a gap looks like this, bottom up: + // d9, d8. x21, gap, x20, x19. + // Set extra alignment on the x21 object (the only unpaired register) + // to create the gap above it. MFI.setObjectAlignment(RPI.FrameIdx, Align(16)); } - int OffsetPost = RPI.isScalable() ? ScalableByteOffset : ByteOffset; - assert(OffsetPost % Scale == 0); - // If filling top down (default), we want the offset after incrementing it. - // If fillibg bootom up (WinCFI) we need the original offset. - int Offset = NeedsWinCFI ? OffsetPre : OffsetPost; + int OffsetPost = RPI.isScalable() ? ScalableByteOffset : ByteOffset; + assert(OffsetPost % Scale == 0); + // If filling top down (default), we want the offset after incrementing it. + // If fillibg bootom up (WinCFI) we need the original offset. + int Offset = NeedsWinCFI ? OffsetPre : OffsetPost; RPI.Offset = Offset / Scale; assert(((!RPI.isScalable() && RPI.Offset >= -64 && RPI.Offset <= 63) || (RPI.isScalable() && RPI.Offset >= -256 && RPI.Offset <= 255)) && "Offset out of bounds for LDP/STP immediate"); - // Save the offset to frame record so that the FP register can point to the - // innermost frame record (spilled FP and LR registers). - if (NeedsFrameRecord && ((!IsWindows && RPI.Reg1 == AArch64::LR && - RPI.Reg2 == AArch64::FP) || - (IsWindows && RPI.Reg1 == AArch64::FP && - RPI.Reg2 == AArch64::LR))) - AFI->setCalleeSaveBaseToFrameRecordOffset(Offset); - + // Save the offset to frame record so that the FP register can point to the + // innermost frame record (spilled FP and LR registers). + if (NeedsFrameRecord && ((!IsWindows && RPI.Reg1 == AArch64::LR && + RPI.Reg2 == AArch64::FP) || + (IsWindows && RPI.Reg1 == AArch64::FP && + RPI.Reg2 == AArch64::LR))) + AFI->setCalleeSaveBaseToFrameRecordOffset(Offset); + RegPairs.push_back(RPI); if (RPI.isPaired()) - i += RegInc; - } - if (NeedsWinCFI) { - // If we need an alignment gap in the stack, align the topmost stack - // object. A stack frame with a gap looks like this, bottom up: - // x19, d8. d9, gap. - // Set extra alignment on the topmost stack object (the first element in - // CSI, which goes top down), to create the gap above it. - if (AFI->hasCalleeSaveStackFreeSpace()) - MFI.setObjectAlignment(CSI[0].getFrameIdx(), Align(16)); - // We iterated bottom up over the registers; flip RegPairs back to top - // down order. - std::reverse(RegPairs.begin(), RegPairs.end()); - } + i += RegInc; + } + if (NeedsWinCFI) { + // If we need an alignment gap in the stack, align the topmost stack + // object. A stack frame with a gap looks like this, bottom up: + // x19, d8. d9, gap. + // Set extra alignment on the topmost stack object (the first element in + // CSI, which goes top down), to create the gap above it. + if (AFI->hasCalleeSaveStackFreeSpace()) + MFI.setObjectAlignment(CSI[0].getFrameIdx(), Align(16)); + // We iterated bottom up over the registers; flip RegPairs back to top + // down order. + std::reverse(RegPairs.begin(), RegPairs.end()); + } } bool AArch64FrameLowering::spillCalleeSavedRegisters( @@ -2412,7 +2412,7 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters( // Update the StackIDs of the SVE stack slots. MachineFrameInfo &MFI = MF.getFrameInfo(); if (RPI.Type == RegPairInfo::ZPR || RPI.Type == RegPairInfo::PPR) - MFI.setStackID(RPI.FrameIdx, TargetStackID::ScalableVector); + MFI.setStackID(RPI.FrameIdx, TargetStackID::ScalableVector); } return true; @@ -2704,21 +2704,21 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, AFI->setSVECalleeSavedStackSize(alignTo(SVECSStackSize, 16)); } -bool AArch64FrameLowering::assignCalleeSavedSpillSlots( - MachineFunction &MF, const TargetRegisterInfo *TRI, - std::vector<CalleeSavedInfo> &CSI) const { - bool NeedsWinCFI = needsWinCFI(MF); - // To match the canonical windows frame layout, reverse the list of - // callee saved registers to get them laid out by PrologEpilogInserter - // in the right order. (PrologEpilogInserter allocates stack objects top - // down. Windows canonical prologs store higher numbered registers at - // the top, thus have the CSI array start from the highest registers.) - if (NeedsWinCFI) - std::reverse(CSI.begin(), CSI.end()); - // Let the generic code do the rest of the setup. - return false; -} - +bool AArch64FrameLowering::assignCalleeSavedSpillSlots( + MachineFunction &MF, const TargetRegisterInfo *TRI, + std::vector<CalleeSavedInfo> &CSI) const { + bool NeedsWinCFI = needsWinCFI(MF); + // To match the canonical windows frame layout, reverse the list of + // callee saved registers to get them laid out by PrologEpilogInserter + // in the right order. (PrologEpilogInserter allocates stack objects top + // down. Windows canonical prologs store higher numbered registers at + // the top, thus have the CSI array start from the highest registers.) + if (NeedsWinCFI) + std::reverse(CSI.begin(), CSI.end()); + // Let the generic code do the rest of the setup. + return false; +} + bool AArch64FrameLowering::enableStackSlotScavenging( const MachineFunction &MF) const { const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); @@ -2761,7 +2761,7 @@ static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI, #ifndef NDEBUG // First process all fixed stack objects. for (int I = MFI.getObjectIndexBegin(); I != 0; ++I) - assert(MFI.getStackID(I) != TargetStackID::ScalableVector && + assert(MFI.getStackID(I) != TargetStackID::ScalableVector && "SVE vectors should never be passed on the stack by value, only by " "reference."); #endif @@ -2791,7 +2791,7 @@ static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI, SmallVector<int, 8> ObjectsToAllocate; for (int I = 0, E = MFI.getObjectIndexEnd(); I != E; ++I) { unsigned StackID = MFI.getStackID(I); - if (StackID != TargetStackID::ScalableVector) + if (StackID != TargetStackID::ScalableVector) continue; if (MaxCSFrameIndex >= I && I >= MinCSFrameIndex) continue; @@ -2945,12 +2945,12 @@ void TagStoreEdit::emitUnrolled(MachineBasicBlock::iterator InsertI) { const int64_t kMaxOffset = 255 * 16; Register BaseReg = FrameReg; - int64_t BaseRegOffsetBytes = FrameRegOffset.getFixed(); + int64_t BaseRegOffsetBytes = FrameRegOffset.getFixed(); if (BaseRegOffsetBytes < kMinOffset || BaseRegOffsetBytes + (Size - Size % 32) > kMaxOffset) { Register ScratchReg = MRI->createVirtualRegister(&AArch64::GPR64RegClass); emitFrameOffset(*MBB, InsertI, DL, ScratchReg, BaseReg, - StackOffset::getFixed(BaseRegOffsetBytes), TII); + StackOffset::getFixed(BaseRegOffsetBytes), TII); BaseReg = ScratchReg; BaseRegOffsetBytes = 0; } @@ -3007,7 +3007,7 @@ void TagStoreEdit::emitLoop(MachineBasicBlock::iterator InsertI) { LoopI->setFlags(FrameRegUpdateFlags); int64_t ExtraBaseRegUpdate = - FrameRegUpdate ? (*FrameRegUpdate - FrameRegOffset.getFixed() - Size) : 0; + FrameRegUpdate ? (*FrameRegUpdate - FrameRegOffset.getFixed() - Size) : 0; if (LoopSize < Size) { assert(FrameRegUpdate); assert(Size - LoopSize == 16); @@ -3111,7 +3111,7 @@ void TagStoreEdit::emitCode(MachineBasicBlock::iterator &InsertI, // realistically happens in function epilogue. Also, STGloop is expanded // before that pass. if (InsertI != MBB->end() && - canMergeRegUpdate(InsertI, FrameReg, FrameRegOffset.getFixed() + Size, + canMergeRegUpdate(InsertI, FrameReg, FrameRegOffset.getFixed() + Size, &TotalOffset)) { UpdateInstr = &*InsertI++; LLVM_DEBUG(dbgs() << "Folding SP update into loop:\n " @@ -3274,7 +3274,7 @@ void AArch64FrameLowering::processFunctionBeforeFrameIndicesReplaced( /// For Win64 AArch64 EH, the offset to the Unwind object is from the SP /// before the update. This is easily retrieved as it is exactly the offset /// that is set in processFunctionBeforeFrameFinalized. -StackOffset AArch64FrameLowering::getFrameIndexReferencePreferSP( +StackOffset AArch64FrameLowering::getFrameIndexReferencePreferSP( const MachineFunction &MF, int FI, Register &FrameReg, bool IgnoreSPUpdates) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); @@ -3282,7 +3282,7 @@ StackOffset AArch64FrameLowering::getFrameIndexReferencePreferSP( LLVM_DEBUG(dbgs() << "Offset from the SP for " << FI << " is " << MFI.getObjectOffset(FI) << "\n"); FrameReg = AArch64::SP; - return StackOffset::getFixed(MFI.getObjectOffset(FI)); + return StackOffset::getFixed(MFI.getObjectOffset(FI)); } return getFrameIndexReference(MF, FI, FrameReg); @@ -3306,162 +3306,162 @@ unsigned AArch64FrameLowering::getWinEHFuncletFrameSize( return alignTo(CSSize + MF.getFrameInfo().getMaxCallFrameSize(), getStackAlign()); } - -namespace { -struct FrameObject { - bool IsValid = false; - // Index of the object in MFI. - int ObjectIndex = 0; - // Group ID this object belongs to. - int GroupIndex = -1; - // This object should be placed first (closest to SP). - bool ObjectFirst = false; - // This object's group (which always contains the object with - // ObjectFirst==true) should be placed first. - bool GroupFirst = false; -}; - -class GroupBuilder { - SmallVector<int, 8> CurrentMembers; - int NextGroupIndex = 0; - std::vector<FrameObject> &Objects; - -public: - GroupBuilder(std::vector<FrameObject> &Objects) : Objects(Objects) {} - void AddMember(int Index) { CurrentMembers.push_back(Index); } - void EndCurrentGroup() { - if (CurrentMembers.size() > 1) { - // Create a new group with the current member list. This might remove them - // from their pre-existing groups. That's OK, dealing with overlapping - // groups is too hard and unlikely to make a difference. - LLVM_DEBUG(dbgs() << "group:"); - for (int Index : CurrentMembers) { - Objects[Index].GroupIndex = NextGroupIndex; - LLVM_DEBUG(dbgs() << " " << Index); - } - LLVM_DEBUG(dbgs() << "\n"); - NextGroupIndex++; - } - CurrentMembers.clear(); - } -}; - -bool FrameObjectCompare(const FrameObject &A, const FrameObject &B) { - // Objects at a lower index are closer to FP; objects at a higher index are - // closer to SP. - // - // For consistency in our comparison, all invalid objects are placed - // at the end. This also allows us to stop walking when we hit the - // first invalid item after it's all sorted. - // - // The "first" object goes first (closest to SP), followed by the members of - // the "first" group. - // - // The rest are sorted by the group index to keep the groups together. - // Higher numbered groups are more likely to be around longer (i.e. untagged - // in the function epilogue and not at some earlier point). Place them closer - // to SP. - // - // If all else equal, sort by the object index to keep the objects in the - // original order. - return std::make_tuple(!A.IsValid, A.ObjectFirst, A.GroupFirst, A.GroupIndex, - A.ObjectIndex) < - std::make_tuple(!B.IsValid, B.ObjectFirst, B.GroupFirst, B.GroupIndex, - B.ObjectIndex); -} -} // namespace - -void AArch64FrameLowering::orderFrameObjects( - const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const { - if (!OrderFrameObjects || ObjectsToAllocate.empty()) - return; - - const MachineFrameInfo &MFI = MF.getFrameInfo(); - std::vector<FrameObject> FrameObjects(MFI.getObjectIndexEnd()); - for (auto &Obj : ObjectsToAllocate) { - FrameObjects[Obj].IsValid = true; - FrameObjects[Obj].ObjectIndex = Obj; - } - - // Identify stack slots that are tagged at the same time. - GroupBuilder GB(FrameObjects); - for (auto &MBB : MF) { - for (auto &MI : MBB) { - if (MI.isDebugInstr()) - continue; - int OpIndex; - switch (MI.getOpcode()) { - case AArch64::STGloop: - case AArch64::STZGloop: - OpIndex = 3; - break; - case AArch64::STGOffset: - case AArch64::STZGOffset: - case AArch64::ST2GOffset: - case AArch64::STZ2GOffset: - OpIndex = 1; - break; - default: - OpIndex = -1; - } - - int TaggedFI = -1; - if (OpIndex >= 0) { - const MachineOperand &MO = MI.getOperand(OpIndex); - if (MO.isFI()) { - int FI = MO.getIndex(); - if (FI >= 0 && FI < MFI.getObjectIndexEnd() && - FrameObjects[FI].IsValid) - TaggedFI = FI; - } - } - - // If this is a stack tagging instruction for a slot that is not part of a - // group yet, either start a new group or add it to the current one. - if (TaggedFI >= 0) - GB.AddMember(TaggedFI); - else - GB.EndCurrentGroup(); - } - // Groups should never span multiple basic blocks. - GB.EndCurrentGroup(); - } - - // If the function's tagged base pointer is pinned to a stack slot, we want to - // put that slot first when possible. This will likely place it at SP + 0, - // and save one instruction when generating the base pointer because IRG does - // not allow an immediate offset. - const AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>(); - Optional<int> TBPI = AFI.getTaggedBasePointerIndex(); - if (TBPI) { - FrameObjects[*TBPI].ObjectFirst = true; - FrameObjects[*TBPI].GroupFirst = true; - int FirstGroupIndex = FrameObjects[*TBPI].GroupIndex; - if (FirstGroupIndex >= 0) - for (FrameObject &Object : FrameObjects) - if (Object.GroupIndex == FirstGroupIndex) - Object.GroupFirst = true; - } - - llvm::stable_sort(FrameObjects, FrameObjectCompare); - - int i = 0; - for (auto &Obj : FrameObjects) { - // All invalid items are sorted at the end, so it's safe to stop. - if (!Obj.IsValid) - break; - ObjectsToAllocate[i++] = Obj.ObjectIndex; - } - - LLVM_DEBUG(dbgs() << "Final frame order:\n"; for (auto &Obj - : FrameObjects) { - if (!Obj.IsValid) - break; - dbgs() << " " << Obj.ObjectIndex << ": group " << Obj.GroupIndex; - if (Obj.ObjectFirst) - dbgs() << ", first"; - if (Obj.GroupFirst) - dbgs() << ", group-first"; - dbgs() << "\n"; - }); -} + +namespace { +struct FrameObject { + bool IsValid = false; + // Index of the object in MFI. + int ObjectIndex = 0; + // Group ID this object belongs to. + int GroupIndex = -1; + // This object should be placed first (closest to SP). + bool ObjectFirst = false; + // This object's group (which always contains the object with + // ObjectFirst==true) should be placed first. + bool GroupFirst = false; +}; + +class GroupBuilder { + SmallVector<int, 8> CurrentMembers; + int NextGroupIndex = 0; + std::vector<FrameObject> &Objects; + +public: + GroupBuilder(std::vector<FrameObject> &Objects) : Objects(Objects) {} + void AddMember(int Index) { CurrentMembers.push_back(Index); } + void EndCurrentGroup() { + if (CurrentMembers.size() > 1) { + // Create a new group with the current member list. This might remove them + // from their pre-existing groups. That's OK, dealing with overlapping + // groups is too hard and unlikely to make a difference. + LLVM_DEBUG(dbgs() << "group:"); + for (int Index : CurrentMembers) { + Objects[Index].GroupIndex = NextGroupIndex; + LLVM_DEBUG(dbgs() << " " << Index); + } + LLVM_DEBUG(dbgs() << "\n"); + NextGroupIndex++; + } + CurrentMembers.clear(); + } +}; + +bool FrameObjectCompare(const FrameObject &A, const FrameObject &B) { + // Objects at a lower index are closer to FP; objects at a higher index are + // closer to SP. + // + // For consistency in our comparison, all invalid objects are placed + // at the end. This also allows us to stop walking when we hit the + // first invalid item after it's all sorted. + // + // The "first" object goes first (closest to SP), followed by the members of + // the "first" group. + // + // The rest are sorted by the group index to keep the groups together. + // Higher numbered groups are more likely to be around longer (i.e. untagged + // in the function epilogue and not at some earlier point). Place them closer + // to SP. + // + // If all else equal, sort by the object index to keep the objects in the + // original order. + return std::make_tuple(!A.IsValid, A.ObjectFirst, A.GroupFirst, A.GroupIndex, + A.ObjectIndex) < + std::make_tuple(!B.IsValid, B.ObjectFirst, B.GroupFirst, B.GroupIndex, + B.ObjectIndex); +} +} // namespace + +void AArch64FrameLowering::orderFrameObjects( + const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const { + if (!OrderFrameObjects || ObjectsToAllocate.empty()) + return; + + const MachineFrameInfo &MFI = MF.getFrameInfo(); + std::vector<FrameObject> FrameObjects(MFI.getObjectIndexEnd()); + for (auto &Obj : ObjectsToAllocate) { + FrameObjects[Obj].IsValid = true; + FrameObjects[Obj].ObjectIndex = Obj; + } + + // Identify stack slots that are tagged at the same time. + GroupBuilder GB(FrameObjects); + for (auto &MBB : MF) { + for (auto &MI : MBB) { + if (MI.isDebugInstr()) + continue; + int OpIndex; + switch (MI.getOpcode()) { + case AArch64::STGloop: + case AArch64::STZGloop: + OpIndex = 3; + break; + case AArch64::STGOffset: + case AArch64::STZGOffset: + case AArch64::ST2GOffset: + case AArch64::STZ2GOffset: + OpIndex = 1; + break; + default: + OpIndex = -1; + } + + int TaggedFI = -1; + if (OpIndex >= 0) { + const MachineOperand &MO = MI.getOperand(OpIndex); + if (MO.isFI()) { + int FI = MO.getIndex(); + if (FI >= 0 && FI < MFI.getObjectIndexEnd() && + FrameObjects[FI].IsValid) + TaggedFI = FI; + } + } + + // If this is a stack tagging instruction for a slot that is not part of a + // group yet, either start a new group or add it to the current one. + if (TaggedFI >= 0) + GB.AddMember(TaggedFI); + else + GB.EndCurrentGroup(); + } + // Groups should never span multiple basic blocks. + GB.EndCurrentGroup(); + } + + // If the function's tagged base pointer is pinned to a stack slot, we want to + // put that slot first when possible. This will likely place it at SP + 0, + // and save one instruction when generating the base pointer because IRG does + // not allow an immediate offset. + const AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>(); + Optional<int> TBPI = AFI.getTaggedBasePointerIndex(); + if (TBPI) { + FrameObjects[*TBPI].ObjectFirst = true; + FrameObjects[*TBPI].GroupFirst = true; + int FirstGroupIndex = FrameObjects[*TBPI].GroupIndex; + if (FirstGroupIndex >= 0) + for (FrameObject &Object : FrameObjects) + if (Object.GroupIndex == FirstGroupIndex) + Object.GroupFirst = true; + } + + llvm::stable_sort(FrameObjects, FrameObjectCompare); + + int i = 0; + for (auto &Obj : FrameObjects) { + // All invalid items are sorted at the end, so it's safe to stop. + if (!Obj.IsValid) + break; + ObjectsToAllocate[i++] = Obj.ObjectIndex; + } + + LLVM_DEBUG(dbgs() << "Final frame order:\n"; for (auto &Obj + : FrameObjects) { + if (!Obj.IsValid) + break; + dbgs() << " " << Obj.ObjectIndex << ": group " << Obj.GroupIndex; + if (Obj.ObjectFirst) + dbgs() << ", first"; + if (Obj.GroupFirst) + dbgs() << ", group-first"; + dbgs() << "\n"; + }); +} |