diff options
author | vitalyisaev <vitalyisaev@yandex-team.com> | 2023-06-29 10:00:50 +0300 |
---|---|---|
committer | vitalyisaev <vitalyisaev@yandex-team.com> | 2023-06-29 10:00:50 +0300 |
commit | 6ffe9e53658409f212834330e13564e4952558f6 (patch) | |
tree | 85b1e00183517648b228aafa7c8fb07f5276f419 /contrib/libs/llvm16/lib/Transforms/Utils | |
parent | 726057070f9c5a91fc10fde0d5024913d10f1ab9 (diff) | |
download | ydb-6ffe9e53658409f212834330e13564e4952558f6.tar.gz |
YQ Connector: support managed ClickHouse
Со стороны dqrun можно обратиться к инстансу коннектора, который работает на streaming стенде, и извлечь данные из облачного CH.
Diffstat (limited to 'contrib/libs/llvm16/lib/Transforms/Utils')
83 files changed, 63148 insertions, 0 deletions
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp new file mode 100644 index 0000000000..24972db404 --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp @@ -0,0 +1,232 @@ +//===- AMDGPUEmitPrintf.cpp -----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Utility function to lower a printf call into a series of device +// library calls on the AMDGPU target. +// +// WARNING: This file knows about certain library functions. It recognizes them +// by name, and hardwires knowledge of their semantics. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/AMDGPUEmitPrintf.h" +#include "llvm/ADT/SparseBitVector.h" +#include "llvm/Analysis/ValueTracking.h" + +using namespace llvm; + +#define DEBUG_TYPE "amdgpu-emit-printf" + +static Value *fitArgInto64Bits(IRBuilder<> &Builder, Value *Arg) { + auto Int64Ty = Builder.getInt64Ty(); + auto Ty = Arg->getType(); + + if (auto IntTy = dyn_cast<IntegerType>(Ty)) { + switch (IntTy->getBitWidth()) { + case 32: + return Builder.CreateZExt(Arg, Int64Ty); + case 64: + return Arg; + } + } + + if (Ty->getTypeID() == Type::DoubleTyID) { + return Builder.CreateBitCast(Arg, Int64Ty); + } + + if (isa<PointerType>(Ty)) { + return Builder.CreatePtrToInt(Arg, Int64Ty); + } + + llvm_unreachable("unexpected type"); +} + +static Value *callPrintfBegin(IRBuilder<> &Builder, Value *Version) { + auto Int64Ty = Builder.getInt64Ty(); + auto M = Builder.GetInsertBlock()->getModule(); + auto Fn = M->getOrInsertFunction("__ockl_printf_begin", Int64Ty, Int64Ty); + return Builder.CreateCall(Fn, Version); +} + +static Value *callAppendArgs(IRBuilder<> &Builder, Value *Desc, int NumArgs, + Value *Arg0, Value *Arg1, Value *Arg2, Value *Arg3, + Value *Arg4, Value *Arg5, Value *Arg6, + bool IsLast) { + auto Int64Ty = Builder.getInt64Ty(); + auto Int32Ty = Builder.getInt32Ty(); + auto M = Builder.GetInsertBlock()->getModule(); + auto Fn = M->getOrInsertFunction("__ockl_printf_append_args", Int64Ty, + Int64Ty, Int32Ty, Int64Ty, Int64Ty, Int64Ty, + Int64Ty, Int64Ty, Int64Ty, Int64Ty, Int32Ty); + auto IsLastValue = Builder.getInt32(IsLast); + auto NumArgsValue = Builder.getInt32(NumArgs); + return Builder.CreateCall(Fn, {Desc, NumArgsValue, Arg0, Arg1, Arg2, Arg3, + Arg4, Arg5, Arg6, IsLastValue}); +} + +static Value *appendArg(IRBuilder<> &Builder, Value *Desc, Value *Arg, + bool IsLast) { + auto Arg0 = fitArgInto64Bits(Builder, Arg); + auto Zero = Builder.getInt64(0); + return callAppendArgs(Builder, Desc, 1, Arg0, Zero, Zero, Zero, Zero, Zero, + Zero, IsLast); +} + +// The device library does not provide strlen, so we build our own loop +// here. While we are at it, we also include the terminating null in the length. +static Value *getStrlenWithNull(IRBuilder<> &Builder, Value *Str) { + auto *Prev = Builder.GetInsertBlock(); + Module *M = Prev->getModule(); + + auto CharZero = Builder.getInt8(0); + auto One = Builder.getInt64(1); + auto Zero = Builder.getInt64(0); + auto Int64Ty = Builder.getInt64Ty(); + + // The length is either zero for a null pointer, or the computed value for an + // actual string. We need a join block for a phi that represents the final + // value. + // + // Strictly speaking, the zero does not matter since + // __ockl_printf_append_string_n ignores the length if the pointer is null. + BasicBlock *Join = nullptr; + if (Prev->getTerminator()) { + Join = Prev->splitBasicBlock(Builder.GetInsertPoint(), + "strlen.join"); + Prev->getTerminator()->eraseFromParent(); + } else { + Join = BasicBlock::Create(M->getContext(), "strlen.join", + Prev->getParent()); + } + BasicBlock *While = + BasicBlock::Create(M->getContext(), "strlen.while", + Prev->getParent(), Join); + BasicBlock *WhileDone = BasicBlock::Create( + M->getContext(), "strlen.while.done", + Prev->getParent(), Join); + + // Emit an early return for when the pointer is null. + Builder.SetInsertPoint(Prev); + auto CmpNull = + Builder.CreateICmpEQ(Str, Constant::getNullValue(Str->getType())); + BranchInst::Create(Join, While, CmpNull, Prev); + + // Entry to the while loop. + Builder.SetInsertPoint(While); + + auto PtrPhi = Builder.CreatePHI(Str->getType(), 2); + PtrPhi->addIncoming(Str, Prev); + auto PtrNext = Builder.CreateGEP(Builder.getInt8Ty(), PtrPhi, One); + PtrPhi->addIncoming(PtrNext, While); + + // Condition for the while loop. + auto Data = Builder.CreateLoad(Builder.getInt8Ty(), PtrPhi); + auto Cmp = Builder.CreateICmpEQ(Data, CharZero); + Builder.CreateCondBr(Cmp, WhileDone, While); + + // Add one to the computed length. + Builder.SetInsertPoint(WhileDone, WhileDone->begin()); + auto Begin = Builder.CreatePtrToInt(Str, Int64Ty); + auto End = Builder.CreatePtrToInt(PtrPhi, Int64Ty); + auto Len = Builder.CreateSub(End, Begin); + Len = Builder.CreateAdd(Len, One); + + // Final join. + BranchInst::Create(Join, WhileDone); + Builder.SetInsertPoint(Join, Join->begin()); + auto LenPhi = Builder.CreatePHI(Len->getType(), 2); + LenPhi->addIncoming(Len, WhileDone); + LenPhi->addIncoming(Zero, Prev); + + return LenPhi; +} + +static Value *callAppendStringN(IRBuilder<> &Builder, Value *Desc, Value *Str, + Value *Length, bool isLast) { + auto Int64Ty = Builder.getInt64Ty(); + auto CharPtrTy = Builder.getInt8PtrTy(); + auto Int32Ty = Builder.getInt32Ty(); + auto M = Builder.GetInsertBlock()->getModule(); + auto Fn = M->getOrInsertFunction("__ockl_printf_append_string_n", Int64Ty, + Int64Ty, CharPtrTy, Int64Ty, Int32Ty); + auto IsLastInt32 = Builder.getInt32(isLast); + return Builder.CreateCall(Fn, {Desc, Str, Length, IsLastInt32}); +} + +static Value *appendString(IRBuilder<> &Builder, Value *Desc, Value *Arg, + bool IsLast) { + Arg = Builder.CreateBitCast( + Arg, Builder.getInt8PtrTy(Arg->getType()->getPointerAddressSpace())); + auto Length = getStrlenWithNull(Builder, Arg); + return callAppendStringN(Builder, Desc, Arg, Length, IsLast); +} + +static Value *processArg(IRBuilder<> &Builder, Value *Desc, Value *Arg, + bool SpecIsCString, bool IsLast) { + if (SpecIsCString && isa<PointerType>(Arg->getType())) { + return appendString(Builder, Desc, Arg, IsLast); + } + // If the format specifies a string but the argument is not, the frontend will + // have printed a warning. We just rely on undefined behaviour and send the + // argument anyway. + return appendArg(Builder, Desc, Arg, IsLast); +} + +// Scan the format string to locate all specifiers, and mark the ones that +// specify a string, i.e, the "%s" specifier with optional '*' characters. +static void locateCStrings(SparseBitVector<8> &BV, Value *Fmt) { + StringRef Str; + if (!getConstantStringInfo(Fmt, Str) || Str.empty()) + return; + + static const char ConvSpecifiers[] = "diouxXfFeEgGaAcspn"; + size_t SpecPos = 0; + // Skip the first argument, the format string. + unsigned ArgIdx = 1; + + while ((SpecPos = Str.find_first_of('%', SpecPos)) != StringRef::npos) { + if (Str[SpecPos + 1] == '%') { + SpecPos += 2; + continue; + } + auto SpecEnd = Str.find_first_of(ConvSpecifiers, SpecPos); + if (SpecEnd == StringRef::npos) + return; + auto Spec = Str.slice(SpecPos, SpecEnd + 1); + ArgIdx += Spec.count('*'); + if (Str[SpecEnd] == 's') { + BV.set(ArgIdx); + } + SpecPos = SpecEnd + 1; + ++ArgIdx; + } +} + +Value *llvm::emitAMDGPUPrintfCall(IRBuilder<> &Builder, + ArrayRef<Value *> Args) { + auto NumOps = Args.size(); + assert(NumOps >= 1); + + auto Fmt = Args[0]; + SparseBitVector<8> SpecIsCString; + locateCStrings(SpecIsCString, Fmt); + + auto Desc = callPrintfBegin(Builder, Builder.getIntN(64, 0)); + Desc = appendString(Builder, Desc, Fmt, NumOps == 1); + + // FIXME: This invokes hostcall once for each argument. We can pack up to + // seven scalar printf arguments in a single hostcall. See the signature of + // callAppendArgs(). + for (unsigned int i = 1; i != NumOps; ++i) { + bool IsLast = i == NumOps - 1; + bool IsCString = SpecIsCString.test(i); + Desc = processArg(Builder, Desc, Args[i], IsCString, IsLast); + } + + return Builder.CreateTrunc(Desc, Builder.getInt32Ty()); +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/ASanStackFrameLayout.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/ASanStackFrameLayout.cpp new file mode 100644 index 0000000000..0318429a76 --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/ASanStackFrameLayout.cpp @@ -0,0 +1,151 @@ +//===-- ASanStackFrameLayout.cpp - helper for AddressSanitizer ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Definition of ComputeASanStackFrameLayout (see ASanStackFrameLayout.h). +// +//===----------------------------------------------------------------------===// +#include "llvm/Transforms/Utils/ASanStackFrameLayout.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/ScopedPrinter.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> + +namespace llvm { + +// We sort the stack variables by alignment (largest first) to minimize +// unnecessary large gaps due to alignment. +// It is tempting to also sort variables by size so that larger variables +// have larger redzones at both ends. But reordering will make report analysis +// harder, especially when temporary unnamed variables are present. +// So, until we can provide more information (type, line number, etc) +// for the stack variables we avoid reordering them too much. +static inline bool CompareVars(const ASanStackVariableDescription &a, + const ASanStackVariableDescription &b) { + return a.Alignment > b.Alignment; +} + +// We also force minimal alignment for all vars to kMinAlignment so that vars +// with e.g. alignment 1 and alignment 16 do not get reordered by CompareVars. +static const uint64_t kMinAlignment = 16; + +// We want to add a full redzone after every variable. +// The larger the variable Size the larger is the redzone. +// The resulting frame size is a multiple of Alignment. +static uint64_t VarAndRedzoneSize(uint64_t Size, uint64_t Granularity, + uint64_t Alignment) { + uint64_t Res = 0; + if (Size <= 4) Res = 16; + else if (Size <= 16) Res = 32; + else if (Size <= 128) Res = Size + 32; + else if (Size <= 512) Res = Size + 64; + else if (Size <= 4096) Res = Size + 128; + else Res = Size + 256; + return alignTo(std::max(Res, 2 * Granularity), Alignment); +} + +ASanStackFrameLayout +ComputeASanStackFrameLayout(SmallVectorImpl<ASanStackVariableDescription> &Vars, + uint64_t Granularity, uint64_t MinHeaderSize) { + assert(Granularity >= 8 && Granularity <= 64 && + (Granularity & (Granularity - 1)) == 0); + assert(MinHeaderSize >= 16 && (MinHeaderSize & (MinHeaderSize - 1)) == 0 && + MinHeaderSize >= Granularity); + const size_t NumVars = Vars.size(); + assert(NumVars > 0); + for (size_t i = 0; i < NumVars; i++) + Vars[i].Alignment = std::max(Vars[i].Alignment, kMinAlignment); + + llvm::stable_sort(Vars, CompareVars); + + ASanStackFrameLayout Layout; + Layout.Granularity = Granularity; + Layout.FrameAlignment = std::max(Granularity, Vars[0].Alignment); + uint64_t Offset = + std::max(std::max(MinHeaderSize, Granularity), Vars[0].Alignment); + assert((Offset % Granularity) == 0); + for (size_t i = 0; i < NumVars; i++) { + bool IsLast = i == NumVars - 1; + uint64_t Alignment = std::max(Granularity, Vars[i].Alignment); + (void)Alignment; // Used only in asserts. + uint64_t Size = Vars[i].Size; + assert((Alignment & (Alignment - 1)) == 0); + assert(Layout.FrameAlignment >= Alignment); + assert((Offset % Alignment) == 0); + assert(Size > 0); + uint64_t NextAlignment = + IsLast ? Granularity : std::max(Granularity, Vars[i + 1].Alignment); + uint64_t SizeWithRedzone = + VarAndRedzoneSize(Size, Granularity, NextAlignment); + Vars[i].Offset = Offset; + Offset += SizeWithRedzone; + } + if (Offset % MinHeaderSize) { + Offset += MinHeaderSize - (Offset % MinHeaderSize); + } + Layout.FrameSize = Offset; + assert((Layout.FrameSize % MinHeaderSize) == 0); + return Layout; +} + +SmallString<64> ComputeASanStackFrameDescription( + const SmallVectorImpl<ASanStackVariableDescription> &Vars) { + SmallString<2048> StackDescriptionStorage; + raw_svector_ostream StackDescription(StackDescriptionStorage); + StackDescription << Vars.size(); + + for (const auto &Var : Vars) { + std::string Name = Var.Name; + if (Var.Line) { + Name += ":"; + Name += to_string(Var.Line); + } + StackDescription << " " << Var.Offset << " " << Var.Size << " " + << Name.size() << " " << Name; + } + return StackDescription.str(); +} + +SmallVector<uint8_t, 64> +GetShadowBytes(const SmallVectorImpl<ASanStackVariableDescription> &Vars, + const ASanStackFrameLayout &Layout) { + assert(Vars.size() > 0); + SmallVector<uint8_t, 64> SB; + SB.clear(); + const uint64_t Granularity = Layout.Granularity; + SB.resize(Vars[0].Offset / Granularity, kAsanStackLeftRedzoneMagic); + for (const auto &Var : Vars) { + SB.resize(Var.Offset / Granularity, kAsanStackMidRedzoneMagic); + + SB.resize(SB.size() + Var.Size / Granularity, 0); + if (Var.Size % Granularity) + SB.push_back(Var.Size % Granularity); + } + SB.resize(Layout.FrameSize / Granularity, kAsanStackRightRedzoneMagic); + return SB; +} + +SmallVector<uint8_t, 64> GetShadowBytesAfterScope( + const SmallVectorImpl<ASanStackVariableDescription> &Vars, + const ASanStackFrameLayout &Layout) { + SmallVector<uint8_t, 64> SB = GetShadowBytes(Vars, Layout); + const uint64_t Granularity = Layout.Granularity; + + for (const auto &Var : Vars) { + assert(Var.LifetimeSize <= Var.Size); + const uint64_t LifetimeShadowSize = + (Var.LifetimeSize + Granularity - 1) / Granularity; + const uint64_t Offset = Var.Offset / Granularity; + std::fill(SB.begin() + Offset, SB.begin() + Offset + LifetimeShadowSize, + kAsanStackUseAfterScopeMagic); + } + + return SB; +} + +} // llvm namespace diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/AddDiscriminators.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/AddDiscriminators.cpp new file mode 100644 index 0000000000..56acdcc0bc --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/AddDiscriminators.cpp @@ -0,0 +1,283 @@ +//===- AddDiscriminators.cpp - Insert DWARF path discriminators -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file adds DWARF discriminators to the IR. Path discriminators are +// used to decide what CFG path was taken inside sub-graphs whose instructions +// share the same line and column number information. +// +// The main user of this is the sample profiler. Instruction samples are +// mapped to line number information. Since a single line may be spread +// out over several basic blocks, discriminators add more precise location +// for the samples. +// +// For example, +// +// 1 #define ASSERT(P) +// 2 if (!(P)) +// 3 abort() +// ... +// 100 while (true) { +// 101 ASSERT (sum < 0); +// 102 ... +// 130 } +// +// when converted to IR, this snippet looks something like: +// +// while.body: ; preds = %entry, %if.end +// %0 = load i32* %sum, align 4, !dbg !15 +// %cmp = icmp slt i32 %0, 0, !dbg !15 +// br i1 %cmp, label %if.end, label %if.then, !dbg !15 +// +// if.then: ; preds = %while.body +// call void @abort(), !dbg !15 +// br label %if.end, !dbg !15 +// +// Notice that all the instructions in blocks 'while.body' and 'if.then' +// have exactly the same debug information. When this program is sampled +// at runtime, the profiler will assume that all these instructions are +// equally frequent. This, in turn, will consider the edge while.body->if.then +// to be frequently taken (which is incorrect). +// +// By adding a discriminator value to the instructions in block 'if.then', +// we can distinguish instructions at line 101 with discriminator 0 from +// the instructions at line 101 with discriminator 1. +// +// For more details about DWARF discriminators, please visit +// http://wiki.dwarfstd.org/index.php?title=Path_Discriminators +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/AddDiscriminators.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/PassManager.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils.h" +#include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h" +#include <utility> + +using namespace llvm; +using namespace sampleprofutil; + +#define DEBUG_TYPE "add-discriminators" + +// Command line option to disable discriminator generation even in the +// presence of debug information. This is only needed when debugging +// debug info generation issues. +static cl::opt<bool> NoDiscriminators( + "no-discriminators", cl::init(false), + cl::desc("Disable generation of discriminator information.")); + +namespace { + +// The legacy pass of AddDiscriminators. +struct AddDiscriminatorsLegacyPass : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + + AddDiscriminatorsLegacyPass() : FunctionPass(ID) { + initializeAddDiscriminatorsLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override; +}; + +} // end anonymous namespace + +char AddDiscriminatorsLegacyPass::ID = 0; + +INITIALIZE_PASS_BEGIN(AddDiscriminatorsLegacyPass, "add-discriminators", + "Add DWARF path discriminators", false, false) +INITIALIZE_PASS_END(AddDiscriminatorsLegacyPass, "add-discriminators", + "Add DWARF path discriminators", false, false) + +// Create the legacy AddDiscriminatorsPass. +FunctionPass *llvm::createAddDiscriminatorsPass() { + return new AddDiscriminatorsLegacyPass(); +} + +static bool shouldHaveDiscriminator(const Instruction *I) { + return !isa<IntrinsicInst>(I) || isa<MemIntrinsic>(I); +} + +/// Assign DWARF discriminators. +/// +/// To assign discriminators, we examine the boundaries of every +/// basic block and its successors. Suppose there is a basic block B1 +/// with successor B2. The last instruction I1 in B1 and the first +/// instruction I2 in B2 are located at the same file and line number. +/// This situation is illustrated in the following code snippet: +/// +/// if (i < 10) x = i; +/// +/// entry: +/// br i1 %cmp, label %if.then, label %if.end, !dbg !10 +/// if.then: +/// %1 = load i32* %i.addr, align 4, !dbg !10 +/// store i32 %1, i32* %x, align 4, !dbg !10 +/// br label %if.end, !dbg !10 +/// if.end: +/// ret void, !dbg !12 +/// +/// Notice how the branch instruction in block 'entry' and all the +/// instructions in block 'if.then' have the exact same debug location +/// information (!dbg !10). +/// +/// To distinguish instructions in block 'entry' from instructions in +/// block 'if.then', we generate a new lexical block for all the +/// instruction in block 'if.then' that share the same file and line +/// location with the last instruction of block 'entry'. +/// +/// This new lexical block will have the same location information as +/// the previous one, but with a new DWARF discriminator value. +/// +/// One of the main uses of this discriminator value is in runtime +/// sample profilers. It allows the profiler to distinguish instructions +/// at location !dbg !10 that execute on different basic blocks. This is +/// important because while the predicate 'if (x < 10)' may have been +/// executed millions of times, the assignment 'x = i' may have only +/// executed a handful of times (meaning that the entry->if.then edge is +/// seldom taken). +/// +/// If we did not have discriminator information, the profiler would +/// assign the same weight to both blocks 'entry' and 'if.then', which +/// in turn will make it conclude that the entry->if.then edge is very +/// hot. +/// +/// To decide where to create new discriminator values, this function +/// traverses the CFG and examines instruction at basic block boundaries. +/// If the last instruction I1 of a block B1 is at the same file and line +/// location as instruction I2 of successor B2, then it creates a new +/// lexical block for I2 and all the instruction in B2 that share the same +/// file and line location as I2. This new lexical block will have a +/// different discriminator number than I1. +static bool addDiscriminators(Function &F) { + // If the function has debug information, but the user has disabled + // discriminators, do nothing. + // Simlarly, if the function has no debug info, do nothing. + if (NoDiscriminators || !F.getSubprogram()) + return false; + + // Create FSDiscriminatorVariable if flow sensitive discriminators are used. + if (EnableFSDiscriminator) + createFSDiscriminatorVariable(F.getParent()); + + bool Changed = false; + + using Location = std::pair<StringRef, unsigned>; + using BBSet = DenseSet<const BasicBlock *>; + using LocationBBMap = DenseMap<Location, BBSet>; + using LocationDiscriminatorMap = DenseMap<Location, unsigned>; + using LocationSet = DenseSet<Location>; + + LocationBBMap LBM; + LocationDiscriminatorMap LDM; + + // Traverse all instructions in the function. If the source line location + // of the instruction appears in other basic block, assign a new + // discriminator for this instruction. + for (BasicBlock &B : F) { + for (auto &I : B) { + // Not all intrinsic calls should have a discriminator. + // We want to avoid a non-deterministic assignment of discriminators at + // different debug levels. We still allow discriminators on memory + // intrinsic calls because those can be early expanded by SROA into + // pairs of loads and stores, and the expanded load/store instructions + // should have a valid discriminator. + if (!shouldHaveDiscriminator(&I)) + continue; + const DILocation *DIL = I.getDebugLoc(); + if (!DIL) + continue; + Location L = std::make_pair(DIL->getFilename(), DIL->getLine()); + auto &BBMap = LBM[L]; + auto R = BBMap.insert(&B); + if (BBMap.size() == 1) + continue; + // If we could insert more than one block with the same line+file, a + // discriminator is needed to distinguish both instructions. + // Only the lowest 7 bits are used to represent a discriminator to fit + // it in 1 byte ULEB128 representation. + unsigned Discriminator = R.second ? ++LDM[L] : LDM[L]; + auto NewDIL = DIL->cloneWithBaseDiscriminator(Discriminator); + if (!NewDIL) { + LLVM_DEBUG(dbgs() << "Could not encode discriminator: " + << DIL->getFilename() << ":" << DIL->getLine() << ":" + << DIL->getColumn() << ":" << Discriminator << " " + << I << "\n"); + } else { + I.setDebugLoc(*NewDIL); + LLVM_DEBUG(dbgs() << DIL->getFilename() << ":" << DIL->getLine() << ":" + << DIL->getColumn() << ":" << Discriminator << " " << I + << "\n"); + } + Changed = true; + } + } + + // Traverse all instructions and assign new discriminators to call + // instructions with the same lineno that are in the same basic block. + // Sample base profile needs to distinguish different function calls within + // a same source line for correct profile annotation. + for (BasicBlock &B : F) { + LocationSet CallLocations; + for (auto &I : B) { + // We bypass intrinsic calls for the following two reasons: + // 1) We want to avoid a non-deterministic assignment of + // discriminators. + // 2) We want to minimize the number of base discriminators used. + if (!isa<InvokeInst>(I) && (!isa<CallInst>(I) || isa<IntrinsicInst>(I))) + continue; + + DILocation *CurrentDIL = I.getDebugLoc(); + if (!CurrentDIL) + continue; + Location L = + std::make_pair(CurrentDIL->getFilename(), CurrentDIL->getLine()); + if (!CallLocations.insert(L).second) { + unsigned Discriminator = ++LDM[L]; + auto NewDIL = CurrentDIL->cloneWithBaseDiscriminator(Discriminator); + if (!NewDIL) { + LLVM_DEBUG(dbgs() + << "Could not encode discriminator: " + << CurrentDIL->getFilename() << ":" + << CurrentDIL->getLine() << ":" << CurrentDIL->getColumn() + << ":" << Discriminator << " " << I << "\n"); + } else { + I.setDebugLoc(*NewDIL); + Changed = true; + } + } + } + } + return Changed; +} + +bool AddDiscriminatorsLegacyPass::runOnFunction(Function &F) { + return addDiscriminators(F); +} + +PreservedAnalyses AddDiscriminatorsPass::run(Function &F, + FunctionAnalysisManager &AM) { + if (!addDiscriminators(F)) + return PreservedAnalyses::all(); + + // FIXME: should be all() + return PreservedAnalyses::none(); +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/AssumeBundleBuilder.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/AssumeBundleBuilder.cpp new file mode 100644 index 0000000000..d17c399ba7 --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/AssumeBundleBuilder.cpp @@ -0,0 +1,651 @@ +//===- AssumeBundleBuilder.cpp - tools to preserve informations -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/AssumeBundleBuilder.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AssumeBundleQueries.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/DebugCounter.h" +#include "llvm/Transforms/Utils/Local.h" + +using namespace llvm; + +namespace llvm { +cl::opt<bool> ShouldPreserveAllAttributes( + "assume-preserve-all", cl::init(false), cl::Hidden, + cl::desc("enable preservation of all attrbitues. even those that are " + "unlikely to be usefull")); + +cl::opt<bool> EnableKnowledgeRetention( + "enable-knowledge-retention", cl::init(false), cl::Hidden, + cl::desc( + "enable preservation of attributes throughout code transformation")); +} // namespace llvm + +#define DEBUG_TYPE "assume-builder" + +STATISTIC(NumAssumeBuilt, "Number of assume built by the assume builder"); +STATISTIC(NumBundlesInAssumes, "Total number of Bundles in the assume built"); +STATISTIC(NumAssumesMerged, + "Number of assume merged by the assume simplify pass"); +STATISTIC(NumAssumesRemoved, + "Number of assume removed by the assume simplify pass"); + +DEBUG_COUNTER(BuildAssumeCounter, "assume-builder-counter", + "Controls which assumes gets created"); + +namespace { + +bool isUsefullToPreserve(Attribute::AttrKind Kind) { + switch (Kind) { + case Attribute::NonNull: + case Attribute::NoUndef: + case Attribute::Alignment: + case Attribute::Dereferenceable: + case Attribute::DereferenceableOrNull: + case Attribute::Cold: + return true; + default: + return false; + } +} + +/// This function will try to transform the given knowledge into a more +/// canonical one. the canonical knowledge maybe the given one. +RetainedKnowledge canonicalizedKnowledge(RetainedKnowledge RK, + const DataLayout &DL) { + switch (RK.AttrKind) { + default: + return RK; + case Attribute::NonNull: + RK.WasOn = getUnderlyingObject(RK.WasOn); + return RK; + case Attribute::Alignment: { + Value *V = RK.WasOn->stripInBoundsOffsets([&](const Value *Strip) { + if (auto *GEP = dyn_cast<GEPOperator>(Strip)) + RK.ArgValue = + MinAlign(RK.ArgValue, GEP->getMaxPreservedAlignment(DL).value()); + }); + RK.WasOn = V; + return RK; + } + case Attribute::Dereferenceable: + case Attribute::DereferenceableOrNull: { + int64_t Offset = 0; + Value *V = GetPointerBaseWithConstantOffset(RK.WasOn, Offset, DL, + /*AllowNonInBounds*/ false); + if (Offset < 0) + return RK; + RK.ArgValue = RK.ArgValue + Offset; + RK.WasOn = V; + } + } + return RK; +} + +/// This class contain all knowledge that have been gather while building an +/// llvm.assume and the function to manipulate it. +struct AssumeBuilderState { + Module *M; + + using MapKey = std::pair<Value *, Attribute::AttrKind>; + SmallMapVector<MapKey, uint64_t, 8> AssumedKnowledgeMap; + Instruction *InstBeingModified = nullptr; + AssumptionCache* AC = nullptr; + DominatorTree* DT = nullptr; + + AssumeBuilderState(Module *M, Instruction *I = nullptr, + AssumptionCache *AC = nullptr, DominatorTree *DT = nullptr) + : M(M), InstBeingModified(I), AC(AC), DT(DT) {} + + bool tryToPreserveWithoutAddingAssume(RetainedKnowledge RK) { + if (!InstBeingModified || !RK.WasOn) + return false; + bool HasBeenPreserved = false; + Use* ToUpdate = nullptr; + getKnowledgeForValue( + RK.WasOn, {RK.AttrKind}, AC, + [&](RetainedKnowledge RKOther, Instruction *Assume, + const CallInst::BundleOpInfo *Bundle) { + if (!isValidAssumeForContext(Assume, InstBeingModified, DT)) + return false; + if (RKOther.ArgValue >= RK.ArgValue) { + HasBeenPreserved = true; + return true; + } else if (isValidAssumeForContext(InstBeingModified, Assume, DT)) { + HasBeenPreserved = true; + IntrinsicInst *Intr = cast<IntrinsicInst>(Assume); + ToUpdate = &Intr->op_begin()[Bundle->Begin + ABA_Argument]; + return true; + } + return false; + }); + if (ToUpdate) + ToUpdate->set( + ConstantInt::get(Type::getInt64Ty(M->getContext()), RK.ArgValue)); + return HasBeenPreserved; + } + + bool isKnowledgeWorthPreserving(RetainedKnowledge RK) { + if (!RK) + return false; + if (!RK.WasOn) + return true; + if (RK.WasOn->getType()->isPointerTy()) { + Value *UnderlyingPtr = getUnderlyingObject(RK.WasOn); + if (isa<AllocaInst>(UnderlyingPtr) || isa<GlobalValue>(UnderlyingPtr)) + return false; + } + if (auto *Arg = dyn_cast<Argument>(RK.WasOn)) { + if (Arg->hasAttribute(RK.AttrKind) && + (!Attribute::isIntAttrKind(RK.AttrKind) || + Arg->getAttribute(RK.AttrKind).getValueAsInt() >= RK.ArgValue)) + return false; + return true; + } + if (auto *Inst = dyn_cast<Instruction>(RK.WasOn)) + if (wouldInstructionBeTriviallyDead(Inst)) { + if (RK.WasOn->use_empty()) + return false; + Use *SingleUse = RK.WasOn->getSingleUndroppableUse(); + if (SingleUse && SingleUse->getUser() == InstBeingModified) + return false; + } + return true; + } + + void addKnowledge(RetainedKnowledge RK) { + RK = canonicalizedKnowledge(RK, M->getDataLayout()); + + if (!isKnowledgeWorthPreserving(RK)) + return; + + if (tryToPreserveWithoutAddingAssume(RK)) + return; + MapKey Key{RK.WasOn, RK.AttrKind}; + auto Lookup = AssumedKnowledgeMap.find(Key); + if (Lookup == AssumedKnowledgeMap.end()) { + AssumedKnowledgeMap[Key] = RK.ArgValue; + return; + } + assert(((Lookup->second == 0 && RK.ArgValue == 0) || + (Lookup->second != 0 && RK.ArgValue != 0)) && + "inconsistent argument value"); + + /// This is only desirable because for all attributes taking an argument + /// higher is better. + Lookup->second = std::max(Lookup->second, RK.ArgValue); + } + + void addAttribute(Attribute Attr, Value *WasOn) { + if (Attr.isTypeAttribute() || Attr.isStringAttribute() || + (!ShouldPreserveAllAttributes && + !isUsefullToPreserve(Attr.getKindAsEnum()))) + return; + uint64_t AttrArg = 0; + if (Attr.isIntAttribute()) + AttrArg = Attr.getValueAsInt(); + addKnowledge({Attr.getKindAsEnum(), AttrArg, WasOn}); + } + + void addCall(const CallBase *Call) { + auto addAttrList = [&](AttributeList AttrList, unsigned NumArgs) { + for (unsigned Idx = 0; Idx < NumArgs; Idx++) + for (Attribute Attr : AttrList.getParamAttrs(Idx)) { + bool IsPoisonAttr = Attr.hasAttribute(Attribute::NonNull) || + Attr.hasAttribute(Attribute::Alignment); + if (!IsPoisonAttr || Call->isPassingUndefUB(Idx)) + addAttribute(Attr, Call->getArgOperand(Idx)); + } + for (Attribute Attr : AttrList.getFnAttrs()) + addAttribute(Attr, nullptr); + }; + addAttrList(Call->getAttributes(), Call->arg_size()); + if (Function *Fn = Call->getCalledFunction()) + addAttrList(Fn->getAttributes(), Fn->arg_size()); + } + + AssumeInst *build() { + if (AssumedKnowledgeMap.empty()) + return nullptr; + if (!DebugCounter::shouldExecute(BuildAssumeCounter)) + return nullptr; + Function *FnAssume = Intrinsic::getDeclaration(M, Intrinsic::assume); + LLVMContext &C = M->getContext(); + SmallVector<OperandBundleDef, 8> OpBundle; + for (auto &MapElem : AssumedKnowledgeMap) { + SmallVector<Value *, 2> Args; + if (MapElem.first.first) + Args.push_back(MapElem.first.first); + + /// This is only valid because for all attribute that currently exist a + /// value of 0 is useless. and should not be preserved. + if (MapElem.second) + Args.push_back(ConstantInt::get(Type::getInt64Ty(M->getContext()), + MapElem.second)); + OpBundle.push_back(OperandBundleDefT<Value *>( + std::string(Attribute::getNameFromAttrKind(MapElem.first.second)), + Args)); + NumBundlesInAssumes++; + } + NumAssumeBuilt++; + return cast<AssumeInst>(CallInst::Create( + FnAssume, ArrayRef<Value *>({ConstantInt::getTrue(C)}), OpBundle)); + } + + void addAccessedPtr(Instruction *MemInst, Value *Pointer, Type *AccType, + MaybeAlign MA) { + unsigned DerefSize = MemInst->getModule() + ->getDataLayout() + .getTypeStoreSize(AccType) + .getKnownMinValue(); + if (DerefSize != 0) { + addKnowledge({Attribute::Dereferenceable, DerefSize, Pointer}); + if (!NullPointerIsDefined(MemInst->getFunction(), + Pointer->getType()->getPointerAddressSpace())) + addKnowledge({Attribute::NonNull, 0u, Pointer}); + } + if (MA.valueOrOne() > 1) + addKnowledge({Attribute::Alignment, MA.valueOrOne().value(), Pointer}); + } + + void addInstruction(Instruction *I) { + if (auto *Call = dyn_cast<CallBase>(I)) + return addCall(Call); + if (auto *Load = dyn_cast<LoadInst>(I)) + return addAccessedPtr(I, Load->getPointerOperand(), Load->getType(), + Load->getAlign()); + if (auto *Store = dyn_cast<StoreInst>(I)) + return addAccessedPtr(I, Store->getPointerOperand(), + Store->getValueOperand()->getType(), + Store->getAlign()); + // TODO: Add support for the other Instructions. + // TODO: Maybe we should look around and merge with other llvm.assume. + } +}; + +} // namespace + +AssumeInst *llvm::buildAssumeFromInst(Instruction *I) { + if (!EnableKnowledgeRetention) + return nullptr; + AssumeBuilderState Builder(I->getModule()); + Builder.addInstruction(I); + return Builder.build(); +} + +void llvm::salvageKnowledge(Instruction *I, AssumptionCache *AC, + DominatorTree *DT) { + if (!EnableKnowledgeRetention || I->isTerminator()) + return; + AssumeBuilderState Builder(I->getModule(), I, AC, DT); + Builder.addInstruction(I); + if (auto *Intr = Builder.build()) { + Intr->insertBefore(I); + if (AC) + AC->registerAssumption(Intr); + } +} + +AssumeInst * +llvm::buildAssumeFromKnowledge(ArrayRef<RetainedKnowledge> Knowledge, + Instruction *CtxI, AssumptionCache *AC, + DominatorTree *DT) { + AssumeBuilderState Builder(CtxI->getModule(), CtxI, AC, DT); + for (const RetainedKnowledge &RK : Knowledge) + Builder.addKnowledge(RK); + return Builder.build(); +} + +RetainedKnowledge llvm::simplifyRetainedKnowledge(AssumeInst *Assume, + RetainedKnowledge RK, + AssumptionCache *AC, + DominatorTree *DT) { + AssumeBuilderState Builder(Assume->getModule(), Assume, AC, DT); + RK = canonicalizedKnowledge(RK, Assume->getModule()->getDataLayout()); + + if (!Builder.isKnowledgeWorthPreserving(RK)) + return RetainedKnowledge::none(); + + if (Builder.tryToPreserveWithoutAddingAssume(RK)) + return RetainedKnowledge::none(); + return RK; +} + +namespace { + +struct AssumeSimplify { + Function &F; + AssumptionCache &AC; + DominatorTree *DT; + LLVMContext &C; + SmallDenseSet<IntrinsicInst *> CleanupToDo; + StringMapEntry<uint32_t> *IgnoreTag; + SmallDenseMap<BasicBlock *, SmallVector<IntrinsicInst *, 4>, 8> BBToAssume; + bool MadeChange = false; + + AssumeSimplify(Function &F, AssumptionCache &AC, DominatorTree *DT, + LLVMContext &C) + : F(F), AC(AC), DT(DT), C(C), + IgnoreTag(C.getOrInsertBundleTag(IgnoreBundleTag)) {} + + void buildMapping(bool FilterBooleanArgument) { + BBToAssume.clear(); + for (Value *V : AC.assumptions()) { + if (!V) + continue; + IntrinsicInst *Assume = cast<IntrinsicInst>(V); + if (FilterBooleanArgument) { + auto *Arg = dyn_cast<ConstantInt>(Assume->getOperand(0)); + if (!Arg || Arg->isZero()) + continue; + } + BBToAssume[Assume->getParent()].push_back(Assume); + } + + for (auto &Elem : BBToAssume) { + llvm::sort(Elem.second, + [](const IntrinsicInst *LHS, const IntrinsicInst *RHS) { + return LHS->comesBefore(RHS); + }); + } + } + + /// Remove all asumes in CleanupToDo if there boolean argument is true and + /// ForceCleanup is set or the assume doesn't hold valuable knowledge. + void RunCleanup(bool ForceCleanup) { + for (IntrinsicInst *Assume : CleanupToDo) { + auto *Arg = dyn_cast<ConstantInt>(Assume->getOperand(0)); + if (!Arg || Arg->isZero() || + (!ForceCleanup && + !isAssumeWithEmptyBundle(cast<AssumeInst>(*Assume)))) + continue; + MadeChange = true; + if (ForceCleanup) + NumAssumesMerged++; + else + NumAssumesRemoved++; + Assume->eraseFromParent(); + } + CleanupToDo.clear(); + } + + /// Remove knowledge stored in assume when it is already know by an attribute + /// or an other assume. This can when valid update an existing knowledge in an + /// attribute or an other assume. + void dropRedundantKnowledge() { + struct MapValue { + IntrinsicInst *Assume; + uint64_t ArgValue; + CallInst::BundleOpInfo *BOI; + }; + buildMapping(false); + SmallDenseMap<std::pair<Value *, Attribute::AttrKind>, + SmallVector<MapValue, 2>, 16> + Knowledge; + for (BasicBlock *BB : depth_first(&F)) + for (Value *V : BBToAssume[BB]) { + if (!V) + continue; + IntrinsicInst *Assume = cast<IntrinsicInst>(V); + for (CallInst::BundleOpInfo &BOI : Assume->bundle_op_infos()) { + auto RemoveFromAssume = [&]() { + CleanupToDo.insert(Assume); + if (BOI.Begin != BOI.End) { + Use *U = &Assume->op_begin()[BOI.Begin + ABA_WasOn]; + U->set(UndefValue::get(U->get()->getType())); + } + BOI.Tag = IgnoreTag; + }; + if (BOI.Tag == IgnoreTag) { + CleanupToDo.insert(Assume); + continue; + } + RetainedKnowledge RK = + getKnowledgeFromBundle(cast<AssumeInst>(*Assume), BOI); + if (auto *Arg = dyn_cast_or_null<Argument>(RK.WasOn)) { + bool HasSameKindAttr = Arg->hasAttribute(RK.AttrKind); + if (HasSameKindAttr) + if (!Attribute::isIntAttrKind(RK.AttrKind) || + Arg->getAttribute(RK.AttrKind).getValueAsInt() >= + RK.ArgValue) { + RemoveFromAssume(); + continue; + } + if (isValidAssumeForContext( + Assume, &*F.getEntryBlock().getFirstInsertionPt()) || + Assume == &*F.getEntryBlock().getFirstInsertionPt()) { + if (HasSameKindAttr) + Arg->removeAttr(RK.AttrKind); + Arg->addAttr(Attribute::get(C, RK.AttrKind, RK.ArgValue)); + MadeChange = true; + RemoveFromAssume(); + continue; + } + } + auto &Lookup = Knowledge[{RK.WasOn, RK.AttrKind}]; + for (MapValue &Elem : Lookup) { + if (!isValidAssumeForContext(Elem.Assume, Assume, DT)) + continue; + if (Elem.ArgValue >= RK.ArgValue) { + RemoveFromAssume(); + continue; + } else if (isValidAssumeForContext(Assume, Elem.Assume, DT)) { + Elem.Assume->op_begin()[Elem.BOI->Begin + ABA_Argument].set( + ConstantInt::get(Type::getInt64Ty(C), RK.ArgValue)); + MadeChange = true; + RemoveFromAssume(); + continue; + } + } + Lookup.push_back({Assume, RK.ArgValue, &BOI}); + } + } + } + + using MergeIterator = SmallVectorImpl<IntrinsicInst *>::iterator; + + /// Merge all Assumes from Begin to End in and insert the resulting assume as + /// high as possible in the basicblock. + void mergeRange(BasicBlock *BB, MergeIterator Begin, MergeIterator End) { + if (Begin == End || std::next(Begin) == End) + return; + /// Provide no additional information so that AssumeBuilderState doesn't + /// try to do any punning since it already has been done better. + AssumeBuilderState Builder(F.getParent()); + + /// For now it is initialized to the best value it could have + Instruction *InsertPt = BB->getFirstNonPHI(); + if (isa<LandingPadInst>(InsertPt)) + InsertPt = InsertPt->getNextNode(); + for (IntrinsicInst *I : make_range(Begin, End)) { + CleanupToDo.insert(I); + for (CallInst::BundleOpInfo &BOI : I->bundle_op_infos()) { + RetainedKnowledge RK = + getKnowledgeFromBundle(cast<AssumeInst>(*I), BOI); + if (!RK) + continue; + Builder.addKnowledge(RK); + if (auto *I = dyn_cast_or_null<Instruction>(RK.WasOn)) + if (I->getParent() == InsertPt->getParent() && + (InsertPt->comesBefore(I) || InsertPt == I)) + InsertPt = I->getNextNode(); + } + } + + /// Adjust InsertPt if it is before Begin, since mergeAssumes only + /// guarantees we can place the resulting assume between Begin and End. + if (InsertPt->comesBefore(*Begin)) + for (auto It = (*Begin)->getIterator(), E = InsertPt->getIterator(); + It != E; --It) + if (!isGuaranteedToTransferExecutionToSuccessor(&*It)) { + InsertPt = It->getNextNode(); + break; + } + auto *MergedAssume = Builder.build(); + if (!MergedAssume) + return; + MadeChange = true; + MergedAssume->insertBefore(InsertPt); + AC.registerAssumption(MergedAssume); + } + + /// Merge assume when they are in the same BasicBlock and for all instruction + /// between them isGuaranteedToTransferExecutionToSuccessor returns true. + void mergeAssumes() { + buildMapping(true); + + SmallVector<MergeIterator, 4> SplitPoints; + for (auto &Elem : BBToAssume) { + SmallVectorImpl<IntrinsicInst *> &AssumesInBB = Elem.second; + if (AssumesInBB.size() < 2) + continue; + /// AssumesInBB is already sorted by order in the block. + + BasicBlock::iterator It = AssumesInBB.front()->getIterator(); + BasicBlock::iterator E = AssumesInBB.back()->getIterator(); + SplitPoints.push_back(AssumesInBB.begin()); + MergeIterator LastSplit = AssumesInBB.begin(); + for (; It != E; ++It) + if (!isGuaranteedToTransferExecutionToSuccessor(&*It)) { + for (; (*LastSplit)->comesBefore(&*It); ++LastSplit) + ; + if (SplitPoints.back() != LastSplit) + SplitPoints.push_back(LastSplit); + } + SplitPoints.push_back(AssumesInBB.end()); + for (auto SplitIt = SplitPoints.begin(); + SplitIt != std::prev(SplitPoints.end()); SplitIt++) { + mergeRange(Elem.first, *SplitIt, *(SplitIt + 1)); + } + SplitPoints.clear(); + } + } +}; + +bool simplifyAssumes(Function &F, AssumptionCache *AC, DominatorTree *DT) { + AssumeSimplify AS(F, *AC, DT, F.getContext()); + + /// Remove knowledge that is already known by a dominating other assume or an + /// attribute. + AS.dropRedundantKnowledge(); + + /// Remove assume that are empty. + AS.RunCleanup(false); + + /// Merge assume in the same basicblock when possible. + AS.mergeAssumes(); + + /// Remove assume that were merged. + AS.RunCleanup(true); + return AS.MadeChange; +} + +} // namespace + +PreservedAnalyses AssumeSimplifyPass::run(Function &F, + FunctionAnalysisManager &AM) { + if (!EnableKnowledgeRetention) + return PreservedAnalyses::all(); + simplifyAssumes(F, &AM.getResult<AssumptionAnalysis>(F), + AM.getCachedResult<DominatorTreeAnalysis>(F)); + return PreservedAnalyses::all(); +} + +namespace { +class AssumeSimplifyPassLegacyPass : public FunctionPass { +public: + static char ID; + + AssumeSimplifyPassLegacyPass() : FunctionPass(ID) { + initializeAssumeSimplifyPassLegacyPassPass( + *PassRegistry::getPassRegistry()); + } + bool runOnFunction(Function &F) override { + if (skipFunction(F) || !EnableKnowledgeRetention) + return false; + AssumptionCache &AC = + getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); + DominatorTreeWrapperPass *DTWP = + getAnalysisIfAvailable<DominatorTreeWrapperPass>(); + return simplifyAssumes(F, &AC, DTWP ? &DTWP->getDomTree() : nullptr); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<AssumptionCacheTracker>(); + + AU.setPreservesAll(); + } +}; +} // namespace + +char AssumeSimplifyPassLegacyPass::ID = 0; + +INITIALIZE_PASS_BEGIN(AssumeSimplifyPassLegacyPass, "assume-simplify", + "Assume Simplify", false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_END(AssumeSimplifyPassLegacyPass, "assume-simplify", + "Assume Simplify", false, false) + +FunctionPass *llvm::createAssumeSimplifyPass() { + return new AssumeSimplifyPassLegacyPass(); +} + +PreservedAnalyses AssumeBuilderPass::run(Function &F, + FunctionAnalysisManager &AM) { + AssumptionCache *AC = &AM.getResult<AssumptionAnalysis>(F); + DominatorTree* DT = AM.getCachedResult<DominatorTreeAnalysis>(F); + for (Instruction &I : instructions(F)) + salvageKnowledge(&I, AC, DT); + return PreservedAnalyses::all(); +} + +namespace { +class AssumeBuilderPassLegacyPass : public FunctionPass { +public: + static char ID; + + AssumeBuilderPassLegacyPass() : FunctionPass(ID) { + initializeAssumeBuilderPassLegacyPassPass(*PassRegistry::getPassRegistry()); + } + bool runOnFunction(Function &F) override { + AssumptionCache &AC = + getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); + DominatorTreeWrapperPass *DTWP = + getAnalysisIfAvailable<DominatorTreeWrapperPass>(); + for (Instruction &I : instructions(F)) + salvageKnowledge(&I, &AC, DTWP ? &DTWP->getDomTree() : nullptr); + return true; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<AssumptionCacheTracker>(); + + AU.setPreservesAll(); + } +}; +} // namespace + +char AssumeBuilderPassLegacyPass::ID = 0; + +INITIALIZE_PASS_BEGIN(AssumeBuilderPassLegacyPass, "assume-builder", + "Assume Builder", false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_END(AssumeBuilderPassLegacyPass, "assume-builder", + "Assume Builder", false, false) diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/BasicBlockUtils.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/BasicBlockUtils.cpp new file mode 100644 index 0000000000..58a226fc60 --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -0,0 +1,1999 @@ +//===- BasicBlockUtils.cpp - BasicBlock Utilities --------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This family of functions perform manipulations on basic blocks, and +// instructions contained within basic blocks. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/DomTreeUpdater.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/Analysis/MemorySSAUpdater.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/Local.h" +#include <cassert> +#include <cstdint> +#include <string> +#include <utility> +#include <vector> + +using namespace llvm; + +#define DEBUG_TYPE "basicblock-utils" + +static cl::opt<unsigned> MaxDeoptOrUnreachableSuccessorCheckDepth( + "max-deopt-or-unreachable-succ-check-depth", cl::init(8), cl::Hidden, + cl::desc("Set the maximum path length when checking whether a basic block " + "is followed by a block that either has a terminating " + "deoptimizing call or is terminated with an unreachable")); + +void llvm::detachDeadBlocks( + ArrayRef<BasicBlock *> BBs, + SmallVectorImpl<DominatorTree::UpdateType> *Updates, + bool KeepOneInputPHIs) { + for (auto *BB : BBs) { + // Loop through all of our successors and make sure they know that one + // of their predecessors is going away. + SmallPtrSet<BasicBlock *, 4> UniqueSuccessors; + for (BasicBlock *Succ : successors(BB)) { + Succ->removePredecessor(BB, KeepOneInputPHIs); + if (Updates && UniqueSuccessors.insert(Succ).second) + Updates->push_back({DominatorTree::Delete, BB, Succ}); + } + + // Zap all the instructions in the block. + while (!BB->empty()) { + Instruction &I = BB->back(); + // If this instruction is used, replace uses with an arbitrary value. + // Because control flow can't get here, we don't care what we replace the + // value with. Note that since this block is unreachable, and all values + // contained within it must dominate their uses, that all uses will + // eventually be removed (they are themselves dead). + if (!I.use_empty()) + I.replaceAllUsesWith(PoisonValue::get(I.getType())); + BB->back().eraseFromParent(); + } + new UnreachableInst(BB->getContext(), BB); + assert(BB->size() == 1 && + isa<UnreachableInst>(BB->getTerminator()) && + "The successor list of BB isn't empty before " + "applying corresponding DTU updates."); + } +} + +void llvm::DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU, + bool KeepOneInputPHIs) { + DeleteDeadBlocks({BB}, DTU, KeepOneInputPHIs); +} + +void llvm::DeleteDeadBlocks(ArrayRef <BasicBlock *> BBs, DomTreeUpdater *DTU, + bool KeepOneInputPHIs) { +#ifndef NDEBUG + // Make sure that all predecessors of each dead block is also dead. + SmallPtrSet<BasicBlock *, 4> Dead(BBs.begin(), BBs.end()); + assert(Dead.size() == BBs.size() && "Duplicating blocks?"); + for (auto *BB : Dead) + for (BasicBlock *Pred : predecessors(BB)) + assert(Dead.count(Pred) && "All predecessors must be dead!"); +#endif + + SmallVector<DominatorTree::UpdateType, 4> Updates; + detachDeadBlocks(BBs, DTU ? &Updates : nullptr, KeepOneInputPHIs); + + if (DTU) + DTU->applyUpdates(Updates); + + for (BasicBlock *BB : BBs) + if (DTU) + DTU->deleteBB(BB); + else + BB->eraseFromParent(); +} + +bool llvm::EliminateUnreachableBlocks(Function &F, DomTreeUpdater *DTU, + bool KeepOneInputPHIs) { + df_iterator_default_set<BasicBlock*> Reachable; + + // Mark all reachable blocks. + for (BasicBlock *BB : depth_first_ext(&F, Reachable)) + (void)BB/* Mark all reachable blocks */; + + // Collect all dead blocks. + std::vector<BasicBlock*> DeadBlocks; + for (BasicBlock &BB : F) + if (!Reachable.count(&BB)) + DeadBlocks.push_back(&BB); + + // Delete the dead blocks. + DeleteDeadBlocks(DeadBlocks, DTU, KeepOneInputPHIs); + + return !DeadBlocks.empty(); +} + +bool llvm::FoldSingleEntryPHINodes(BasicBlock *BB, + MemoryDependenceResults *MemDep) { + if (!isa<PHINode>(BB->begin())) + return false; + + while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) { + if (PN->getIncomingValue(0) != PN) + PN->replaceAllUsesWith(PN->getIncomingValue(0)); + else + PN->replaceAllUsesWith(PoisonValue::get(PN->getType())); + + if (MemDep) + MemDep->removeInstruction(PN); // Memdep updates AA itself. + + PN->eraseFromParent(); + } + return true; +} + +bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI, + MemorySSAUpdater *MSSAU) { + // Recursively deleting a PHI may cause multiple PHIs to be deleted + // or RAUW'd undef, so use an array of WeakTrackingVH for the PHIs to delete. + SmallVector<WeakTrackingVH, 8> PHIs; + for (PHINode &PN : BB->phis()) + PHIs.push_back(&PN); + + bool Changed = false; + for (unsigned i = 0, e = PHIs.size(); i != e; ++i) + if (PHINode *PN = dyn_cast_or_null<PHINode>(PHIs[i].operator Value*())) + Changed |= RecursivelyDeleteDeadPHINode(PN, TLI, MSSAU); + + return Changed; +} + +bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, + LoopInfo *LI, MemorySSAUpdater *MSSAU, + MemoryDependenceResults *MemDep, + bool PredecessorWithTwoSuccessors, + DominatorTree *DT) { + if (BB->hasAddressTaken()) + return false; + + // Can't merge if there are multiple predecessors, or no predecessors. + BasicBlock *PredBB = BB->getUniquePredecessor(); + if (!PredBB) return false; + + // Don't break self-loops. + if (PredBB == BB) return false; + + // Don't break unwinding instructions or terminators with other side-effects. + Instruction *PTI = PredBB->getTerminator(); + if (PTI->isExceptionalTerminator() || PTI->mayHaveSideEffects()) + return false; + + // Can't merge if there are multiple distinct successors. + if (!PredecessorWithTwoSuccessors && PredBB->getUniqueSuccessor() != BB) + return false; + + // Currently only allow PredBB to have two predecessors, one being BB. + // Update BI to branch to BB's only successor instead of BB. + BranchInst *PredBB_BI; + BasicBlock *NewSucc = nullptr; + unsigned FallThruPath; + if (PredecessorWithTwoSuccessors) { + if (!(PredBB_BI = dyn_cast<BranchInst>(PTI))) + return false; + BranchInst *BB_JmpI = dyn_cast<BranchInst>(BB->getTerminator()); + if (!BB_JmpI || !BB_JmpI->isUnconditional()) + return false; + NewSucc = BB_JmpI->getSuccessor(0); + FallThruPath = PredBB_BI->getSuccessor(0) == BB ? 0 : 1; + } + + // Can't merge if there is PHI loop. + for (PHINode &PN : BB->phis()) + if (llvm::is_contained(PN.incoming_values(), &PN)) + return false; + + LLVM_DEBUG(dbgs() << "Merging: " << BB->getName() << " into " + << PredBB->getName() << "\n"); + + // Begin by getting rid of unneeded PHIs. + SmallVector<AssertingVH<Value>, 4> IncomingValues; + if (isa<PHINode>(BB->front())) { + for (PHINode &PN : BB->phis()) + if (!isa<PHINode>(PN.getIncomingValue(0)) || + cast<PHINode>(PN.getIncomingValue(0))->getParent() != BB) + IncomingValues.push_back(PN.getIncomingValue(0)); + FoldSingleEntryPHINodes(BB, MemDep); + } + + if (DT) { + assert(!DTU && "cannot use both DT and DTU for updates"); + DomTreeNode *PredNode = DT->getNode(PredBB); + DomTreeNode *BBNode = DT->getNode(BB); + if (PredNode) { + assert(BBNode && "PredNode unreachable but BBNode reachable?"); + for (DomTreeNode *C : to_vector(BBNode->children())) + C->setIDom(PredNode); + } + } + // DTU update: Collect all the edges that exit BB. + // These dominator edges will be redirected from Pred. + std::vector<DominatorTree::UpdateType> Updates; + if (DTU) { + assert(!DT && "cannot use both DT and DTU for updates"); + // To avoid processing the same predecessor more than once. + SmallPtrSet<BasicBlock *, 8> SeenSuccs; + SmallPtrSet<BasicBlock *, 2> SuccsOfPredBB(succ_begin(PredBB), + succ_end(PredBB)); + Updates.reserve(Updates.size() + 2 * succ_size(BB) + 1); + // Add insert edges first. Experimentally, for the particular case of two + // blocks that can be merged, with a single successor and single predecessor + // respectively, it is beneficial to have all insert updates first. Deleting + // edges first may lead to unreachable blocks, followed by inserting edges + // making the blocks reachable again. Such DT updates lead to high compile + // times. We add inserts before deletes here to reduce compile time. + for (BasicBlock *SuccOfBB : successors(BB)) + // This successor of BB may already be a PredBB's successor. + if (!SuccsOfPredBB.contains(SuccOfBB)) + if (SeenSuccs.insert(SuccOfBB).second) + Updates.push_back({DominatorTree::Insert, PredBB, SuccOfBB}); + SeenSuccs.clear(); + for (BasicBlock *SuccOfBB : successors(BB)) + if (SeenSuccs.insert(SuccOfBB).second) + Updates.push_back({DominatorTree::Delete, BB, SuccOfBB}); + Updates.push_back({DominatorTree::Delete, PredBB, BB}); + } + + Instruction *STI = BB->getTerminator(); + Instruction *Start = &*BB->begin(); + // If there's nothing to move, mark the starting instruction as the last + // instruction in the block. Terminator instruction is handled separately. + if (Start == STI) + Start = PTI; + + // Move all definitions in the successor to the predecessor... + PredBB->splice(PTI->getIterator(), BB, BB->begin(), STI->getIterator()); + + if (MSSAU) + MSSAU->moveAllAfterMergeBlocks(BB, PredBB, Start); + + // Make all PHI nodes that referred to BB now refer to Pred as their + // source... + BB->replaceAllUsesWith(PredBB); + + if (PredecessorWithTwoSuccessors) { + // Delete the unconditional branch from BB. + BB->back().eraseFromParent(); + + // Update branch in the predecessor. + PredBB_BI->setSuccessor(FallThruPath, NewSucc); + } else { + // Delete the unconditional branch from the predecessor. + PredBB->back().eraseFromParent(); + + // Move terminator instruction. + PredBB->splice(PredBB->end(), BB); + + // Terminator may be a memory accessing instruction too. + if (MSSAU) + if (MemoryUseOrDef *MUD = cast_or_null<MemoryUseOrDef>( + MSSAU->getMemorySSA()->getMemoryAccess(PredBB->getTerminator()))) + MSSAU->moveToPlace(MUD, PredBB, MemorySSA::End); + } + // Add unreachable to now empty BB. + new UnreachableInst(BB->getContext(), BB); + + // Inherit predecessors name if it exists. + if (!PredBB->hasName()) + PredBB->takeName(BB); + + if (LI) + LI->removeBlock(BB); + + if (MemDep) + MemDep->invalidateCachedPredecessors(); + + if (DTU) + DTU->applyUpdates(Updates); + + if (DT) { + assert(succ_empty(BB) && + "successors should have been transferred to PredBB"); + DT->eraseNode(BB); + } + + // Finally, erase the old block and update dominator info. + DeleteDeadBlock(BB, DTU); + + return true; +} + +bool llvm::MergeBlockSuccessorsIntoGivenBlocks( + SmallPtrSetImpl<BasicBlock *> &MergeBlocks, Loop *L, DomTreeUpdater *DTU, + LoopInfo *LI) { + assert(!MergeBlocks.empty() && "MergeBlocks should not be empty"); + + bool BlocksHaveBeenMerged = false; + while (!MergeBlocks.empty()) { + BasicBlock *BB = *MergeBlocks.begin(); + BasicBlock *Dest = BB->getSingleSuccessor(); + if (Dest && (!L || L->contains(Dest))) { + BasicBlock *Fold = Dest->getUniquePredecessor(); + (void)Fold; + if (MergeBlockIntoPredecessor(Dest, DTU, LI)) { + assert(Fold == BB && + "Expecting BB to be unique predecessor of the Dest block"); + MergeBlocks.erase(Dest); + BlocksHaveBeenMerged = true; + } else + MergeBlocks.erase(BB); + } else + MergeBlocks.erase(BB); + } + return BlocksHaveBeenMerged; +} + +/// Remove redundant instructions within sequences of consecutive dbg.value +/// instructions. This is done using a backward scan to keep the last dbg.value +/// describing a specific variable/fragment. +/// +/// BackwardScan strategy: +/// ---------------------- +/// Given a sequence of consecutive DbgValueInst like this +/// +/// dbg.value ..., "x", FragmentX1 (*) +/// dbg.value ..., "y", FragmentY1 +/// dbg.value ..., "x", FragmentX2 +/// dbg.value ..., "x", FragmentX1 (**) +/// +/// then the instruction marked with (*) can be removed (it is guaranteed to be +/// obsoleted by the instruction marked with (**) as the latter instruction is +/// describing the same variable using the same fragment info). +/// +/// Possible improvements: +/// - Check fully overlapping fragments and not only identical fragments. +/// - Support dbg.addr, dbg.declare. dbg.label, and possibly other meta +/// instructions being part of the sequence of consecutive instructions. +static bool removeRedundantDbgInstrsUsingBackwardScan(BasicBlock *BB) { + SmallVector<DbgValueInst *, 8> ToBeRemoved; + SmallDenseSet<DebugVariable> VariableSet; + for (auto &I : reverse(*BB)) { + if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(&I)) { + DebugVariable Key(DVI->getVariable(), + DVI->getExpression(), + DVI->getDebugLoc()->getInlinedAt()); + auto R = VariableSet.insert(Key); + // If the variable fragment hasn't been seen before then we don't want + // to remove this dbg intrinsic. + if (R.second) + continue; + + if (auto *DAI = dyn_cast<DbgAssignIntrinsic>(DVI)) { + // Don't delete dbg.assign intrinsics that are linked to instructions. + if (!at::getAssignmentInsts(DAI).empty()) + continue; + // Unlinked dbg.assign intrinsics can be treated like dbg.values. + } + + // If the same variable fragment is described more than once it is enough + // to keep the last one (i.e. the first found since we for reverse + // iteration). + ToBeRemoved.push_back(DVI); + continue; + } + // Sequence with consecutive dbg.value instrs ended. Clear the map to + // restart identifying redundant instructions if case we find another + // dbg.value sequence. + VariableSet.clear(); + } + + for (auto &Instr : ToBeRemoved) + Instr->eraseFromParent(); + + return !ToBeRemoved.empty(); +} + +/// Remove redundant dbg.value instructions using a forward scan. This can +/// remove a dbg.value instruction that is redundant due to indicating that a +/// variable has the same value as already being indicated by an earlier +/// dbg.value. +/// +/// ForwardScan strategy: +/// --------------------- +/// Given two identical dbg.value instructions, separated by a block of +/// instructions that isn't describing the same variable, like this +/// +/// dbg.value X1, "x", FragmentX1 (**) +/// <block of instructions, none being "dbg.value ..., "x", ..."> +/// dbg.value X1, "x", FragmentX1 (*) +/// +/// then the instruction marked with (*) can be removed. Variable "x" is already +/// described as being mapped to the SSA value X1. +/// +/// Possible improvements: +/// - Keep track of non-overlapping fragments. +static bool removeRedundantDbgInstrsUsingForwardScan(BasicBlock *BB) { + SmallVector<DbgValueInst *, 8> ToBeRemoved; + DenseMap<DebugVariable, std::pair<SmallVector<Value *, 4>, DIExpression *>> + VariableMap; + for (auto &I : *BB) { + if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(&I)) { + DebugVariable Key(DVI->getVariable(), std::nullopt, + DVI->getDebugLoc()->getInlinedAt()); + auto VMI = VariableMap.find(Key); + auto *DAI = dyn_cast<DbgAssignIntrinsic>(DVI); + // A dbg.assign with no linked instructions can be treated like a + // dbg.value (i.e. can be deleted). + bool IsDbgValueKind = (!DAI || at::getAssignmentInsts(DAI).empty()); + + // Update the map if we found a new value/expression describing the + // variable, or if the variable wasn't mapped already. + SmallVector<Value *, 4> Values(DVI->getValues()); + if (VMI == VariableMap.end() || VMI->second.first != Values || + VMI->second.second != DVI->getExpression()) { + // Use a sentinal value (nullptr) for the DIExpression when we see a + // linked dbg.assign so that the next debug intrinsic will never match + // it (i.e. always treat linked dbg.assigns as if they're unique). + if (IsDbgValueKind) + VariableMap[Key] = {Values, DVI->getExpression()}; + else + VariableMap[Key] = {Values, nullptr}; + continue; + } + + // Don't delete dbg.assign intrinsics that are linked to instructions. + if (!IsDbgValueKind) + continue; + ToBeRemoved.push_back(DVI); + } + } + + for (auto &Instr : ToBeRemoved) + Instr->eraseFromParent(); + + return !ToBeRemoved.empty(); +} + +/// Remove redundant undef dbg.assign intrinsic from an entry block using a +/// forward scan. +/// Strategy: +/// --------------------- +/// Scanning forward, delete dbg.assign intrinsics iff they are undef, not +/// linked to an intrinsic, and don't share an aggregate variable with a debug +/// intrinsic that didn't meet the criteria. In other words, undef dbg.assigns +/// that come before non-undef debug intrinsics for the variable are +/// deleted. Given: +/// +/// dbg.assign undef, "x", FragmentX1 (*) +/// <block of instructions, none being "dbg.value ..., "x", ..."> +/// dbg.value %V, "x", FragmentX2 +/// <block of instructions, none being "dbg.value ..., "x", ..."> +/// dbg.assign undef, "x", FragmentX1 +/// +/// then (only) the instruction marked with (*) can be removed. +/// Possible improvements: +/// - Keep track of non-overlapping fragments. +static bool remomveUndefDbgAssignsFromEntryBlock(BasicBlock *BB) { + assert(BB->isEntryBlock() && "expected entry block"); + SmallVector<DbgAssignIntrinsic *, 8> ToBeRemoved; + DenseSet<DebugVariable> SeenDefForAggregate; + // Returns the DebugVariable for DVI with no fragment info. + auto GetAggregateVariable = [](DbgValueInst *DVI) { + return DebugVariable(DVI->getVariable(), std::nullopt, + DVI->getDebugLoc()->getInlinedAt()); + }; + + // Remove undef dbg.assign intrinsics that are encountered before + // any non-undef intrinsics from the entry block. + for (auto &I : *BB) { + DbgValueInst *DVI = dyn_cast<DbgValueInst>(&I); + if (!DVI) + continue; + auto *DAI = dyn_cast<DbgAssignIntrinsic>(DVI); + bool IsDbgValueKind = (!DAI || at::getAssignmentInsts(DAI).empty()); + DebugVariable Aggregate = GetAggregateVariable(DVI); + if (!SeenDefForAggregate.contains(Aggregate)) { + bool IsKill = DVI->isKillLocation() && IsDbgValueKind; + if (!IsKill) { + SeenDefForAggregate.insert(Aggregate); + } else if (DAI) { + ToBeRemoved.push_back(DAI); + } + } + } + + for (DbgAssignIntrinsic *DAI : ToBeRemoved) + DAI->eraseFromParent(); + + return !ToBeRemoved.empty(); +} + +bool llvm::RemoveRedundantDbgInstrs(BasicBlock *BB) { + bool MadeChanges = false; + // By using the "backward scan" strategy before the "forward scan" strategy we + // can remove both dbg.value (2) and (3) in a situation like this: + // + // (1) dbg.value V1, "x", DIExpression() + // ... + // (2) dbg.value V2, "x", DIExpression() + // (3) dbg.value V1, "x", DIExpression() + // + // The backward scan will remove (2), it is made obsolete by (3). After + // getting (2) out of the way, the foward scan will remove (3) since "x" + // already is described as having the value V1 at (1). + MadeChanges |= removeRedundantDbgInstrsUsingBackwardScan(BB); + if (BB->isEntryBlock() && + isAssignmentTrackingEnabled(*BB->getParent()->getParent())) + MadeChanges |= remomveUndefDbgAssignsFromEntryBlock(BB); + MadeChanges |= removeRedundantDbgInstrsUsingForwardScan(BB); + + if (MadeChanges) + LLVM_DEBUG(dbgs() << "Removed redundant dbg instrs from: " + << BB->getName() << "\n"); + return MadeChanges; +} + +void llvm::ReplaceInstWithValue(BasicBlock::iterator &BI, Value *V) { + Instruction &I = *BI; + // Replaces all of the uses of the instruction with uses of the value + I.replaceAllUsesWith(V); + + // Make sure to propagate a name if there is one already. + if (I.hasName() && !V->hasName()) + V->takeName(&I); + + // Delete the unnecessary instruction now... + BI = BI->eraseFromParent(); +} + +void llvm::ReplaceInstWithInst(BasicBlock *BB, BasicBlock::iterator &BI, + Instruction *I) { + assert(I->getParent() == nullptr && + "ReplaceInstWithInst: Instruction already inserted into basic block!"); + + // Copy debug location to newly added instruction, if it wasn't already set + // by the caller. + if (!I->getDebugLoc()) + I->setDebugLoc(BI->getDebugLoc()); + + // Insert the new instruction into the basic block... + BasicBlock::iterator New = I->insertInto(BB, BI); + + // Replace all uses of the old instruction, and delete it. + ReplaceInstWithValue(BI, I); + + // Move BI back to point to the newly inserted instruction + BI = New; +} + +bool llvm::IsBlockFollowedByDeoptOrUnreachable(const BasicBlock *BB) { + // Remember visited blocks to avoid infinite loop + SmallPtrSet<const BasicBlock *, 8> VisitedBlocks; + unsigned Depth = 0; + while (BB && Depth++ < MaxDeoptOrUnreachableSuccessorCheckDepth && + VisitedBlocks.insert(BB).second) { + if (BB->getTerminatingDeoptimizeCall() || + isa<UnreachableInst>(BB->getTerminator())) + return true; + BB = BB->getUniqueSuccessor(); + } + return false; +} + +void llvm::ReplaceInstWithInst(Instruction *From, Instruction *To) { + BasicBlock::iterator BI(From); + ReplaceInstWithInst(From->getParent(), BI, To); +} + +BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, DominatorTree *DT, + LoopInfo *LI, MemorySSAUpdater *MSSAU, + const Twine &BBName) { + unsigned SuccNum = GetSuccessorNumber(BB, Succ); + + Instruction *LatchTerm = BB->getTerminator(); + + CriticalEdgeSplittingOptions Options = + CriticalEdgeSplittingOptions(DT, LI, MSSAU).setPreserveLCSSA(); + + if ((isCriticalEdge(LatchTerm, SuccNum, Options.MergeIdenticalEdges))) { + // If it is a critical edge, and the succesor is an exception block, handle + // the split edge logic in this specific function + if (Succ->isEHPad()) + return ehAwareSplitEdge(BB, Succ, nullptr, nullptr, Options, BBName); + + // If this is a critical edge, let SplitKnownCriticalEdge do it. + return SplitKnownCriticalEdge(LatchTerm, SuccNum, Options, BBName); + } + + // If the edge isn't critical, then BB has a single successor or Succ has a + // single pred. Split the block. + if (BasicBlock *SP = Succ->getSinglePredecessor()) { + // If the successor only has a single pred, split the top of the successor + // block. + assert(SP == BB && "CFG broken"); + SP = nullptr; + return SplitBlock(Succ, &Succ->front(), DT, LI, MSSAU, BBName, + /*Before=*/true); + } + + // Otherwise, if BB has a single successor, split it at the bottom of the + // block. + assert(BB->getTerminator()->getNumSuccessors() == 1 && + "Should have a single succ!"); + return SplitBlock(BB, BB->getTerminator(), DT, LI, MSSAU, BBName); +} + +void llvm::setUnwindEdgeTo(Instruction *TI, BasicBlock *Succ) { + if (auto *II = dyn_cast<InvokeInst>(TI)) + II->setUnwindDest(Succ); + else if (auto *CS = dyn_cast<CatchSwitchInst>(TI)) + CS->setUnwindDest(Succ); + else if (auto *CR = dyn_cast<CleanupReturnInst>(TI)) + CR->setUnwindDest(Succ); + else + llvm_unreachable("unexpected terminator instruction"); +} + +void llvm::updatePhiNodes(BasicBlock *DestBB, BasicBlock *OldPred, + BasicBlock *NewPred, PHINode *Until) { + int BBIdx = 0; + for (PHINode &PN : DestBB->phis()) { + // We manually update the LandingPadReplacement PHINode and it is the last + // PHI Node. So, if we find it, we are done. + if (Until == &PN) + break; + + // Reuse the previous value of BBIdx if it lines up. In cases where we + // have multiple phi nodes with *lots* of predecessors, this is a speed + // win because we don't have to scan the PHI looking for TIBB. This + // happens because the BB list of PHI nodes are usually in the same + // order. + if (PN.getIncomingBlock(BBIdx) != OldPred) + BBIdx = PN.getBasicBlockIndex(OldPred); + + assert(BBIdx != -1 && "Invalid PHI Index!"); + PN.setIncomingBlock(BBIdx, NewPred); + } +} + +BasicBlock *llvm::ehAwareSplitEdge(BasicBlock *BB, BasicBlock *Succ, + LandingPadInst *OriginalPad, + PHINode *LandingPadReplacement, + const CriticalEdgeSplittingOptions &Options, + const Twine &BBName) { + + auto *PadInst = Succ->getFirstNonPHI(); + if (!LandingPadReplacement && !PadInst->isEHPad()) + return SplitEdge(BB, Succ, Options.DT, Options.LI, Options.MSSAU, BBName); + + auto *LI = Options.LI; + SmallVector<BasicBlock *, 4> LoopPreds; + // Check if extra modifications will be required to preserve loop-simplify + // form after splitting. If it would require splitting blocks with IndirectBr + // terminators, bail out if preserving loop-simplify form is requested. + if (Options.PreserveLoopSimplify && LI) { + if (Loop *BBLoop = LI->getLoopFor(BB)) { + + // The only way that we can break LoopSimplify form by splitting a + // critical edge is when there exists some edge from BBLoop to Succ *and* + // the only edge into Succ from outside of BBLoop is that of NewBB after + // the split. If the first isn't true, then LoopSimplify still holds, + // NewBB is the new exit block and it has no non-loop predecessors. If the + // second isn't true, then Succ was not in LoopSimplify form prior to + // the split as it had a non-loop predecessor. In both of these cases, + // the predecessor must be directly in BBLoop, not in a subloop, or again + // LoopSimplify doesn't hold. + for (BasicBlock *P : predecessors(Succ)) { + if (P == BB) + continue; // The new block is known. + if (LI->getLoopFor(P) != BBLoop) { + // Loop is not in LoopSimplify form, no need to re simplify after + // splitting edge. + LoopPreds.clear(); + break; + } + LoopPreds.push_back(P); + } + // Loop-simplify form can be preserved, if we can split all in-loop + // predecessors. + if (any_of(LoopPreds, [](BasicBlock *Pred) { + return isa<IndirectBrInst>(Pred->getTerminator()); + })) { + return nullptr; + } + } + } + + auto *NewBB = + BasicBlock::Create(BB->getContext(), BBName, BB->getParent(), Succ); + setUnwindEdgeTo(BB->getTerminator(), NewBB); + updatePhiNodes(Succ, BB, NewBB, LandingPadReplacement); + + if (LandingPadReplacement) { + auto *NewLP = OriginalPad->clone(); + auto *Terminator = BranchInst::Create(Succ, NewBB); + NewLP->insertBefore(Terminator); + LandingPadReplacement->addIncoming(NewLP, NewBB); + } else { + Value *ParentPad = nullptr; + if (auto *FuncletPad = dyn_cast<FuncletPadInst>(PadInst)) + ParentPad = FuncletPad->getParentPad(); + else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(PadInst)) + ParentPad = CatchSwitch->getParentPad(); + else if (auto *CleanupPad = dyn_cast<CleanupPadInst>(PadInst)) + ParentPad = CleanupPad->getParentPad(); + else if (auto *LandingPad = dyn_cast<LandingPadInst>(PadInst)) + ParentPad = LandingPad->getParent(); + else + llvm_unreachable("handling for other EHPads not implemented yet"); + + auto *NewCleanupPad = CleanupPadInst::Create(ParentPad, {}, BBName, NewBB); + CleanupReturnInst::Create(NewCleanupPad, Succ, NewBB); + } + + auto *DT = Options.DT; + auto *MSSAU = Options.MSSAU; + if (!DT && !LI) + return NewBB; + + if (DT) { + DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); + SmallVector<DominatorTree::UpdateType, 3> Updates; + + Updates.push_back({DominatorTree::Insert, BB, NewBB}); + Updates.push_back({DominatorTree::Insert, NewBB, Succ}); + Updates.push_back({DominatorTree::Delete, BB, Succ}); + + DTU.applyUpdates(Updates); + DTU.flush(); + + if (MSSAU) { + MSSAU->applyUpdates(Updates, *DT); + if (VerifyMemorySSA) + MSSAU->getMemorySSA()->verifyMemorySSA(); + } + } + + if (LI) { + if (Loop *BBLoop = LI->getLoopFor(BB)) { + // If one or the other blocks were not in a loop, the new block is not + // either, and thus LI doesn't need to be updated. + if (Loop *SuccLoop = LI->getLoopFor(Succ)) { + if (BBLoop == SuccLoop) { + // Both in the same loop, the NewBB joins loop. + SuccLoop->addBasicBlockToLoop(NewBB, *LI); + } else if (BBLoop->contains(SuccLoop)) { + // Edge from an outer loop to an inner loop. Add to the outer loop. + BBLoop->addBasicBlockToLoop(NewBB, *LI); + } else if (SuccLoop->contains(BBLoop)) { + // Edge from an inner loop to an outer loop. Add to the outer loop. + SuccLoop->addBasicBlockToLoop(NewBB, *LI); + } else { + // Edge from two loops with no containment relation. Because these + // are natural loops, we know that the destination block must be the + // header of its loop (adding a branch into a loop elsewhere would + // create an irreducible loop). + assert(SuccLoop->getHeader() == Succ && + "Should not create irreducible loops!"); + if (Loop *P = SuccLoop->getParentLoop()) + P->addBasicBlockToLoop(NewBB, *LI); + } + } + + // If BB is in a loop and Succ is outside of that loop, we may need to + // update LoopSimplify form and LCSSA form. + if (!BBLoop->contains(Succ)) { + assert(!BBLoop->contains(NewBB) && + "Split point for loop exit is contained in loop!"); + + // Update LCSSA form in the newly created exit block. + if (Options.PreserveLCSSA) { + createPHIsForSplitLoopExit(BB, NewBB, Succ); + } + + if (!LoopPreds.empty()) { + BasicBlock *NewExitBB = SplitBlockPredecessors( + Succ, LoopPreds, "split", DT, LI, MSSAU, Options.PreserveLCSSA); + if (Options.PreserveLCSSA) + createPHIsForSplitLoopExit(LoopPreds, NewExitBB, Succ); + } + } + } + } + + return NewBB; +} + +void llvm::createPHIsForSplitLoopExit(ArrayRef<BasicBlock *> Preds, + BasicBlock *SplitBB, BasicBlock *DestBB) { + // SplitBB shouldn't have anything non-trivial in it yet. + assert((SplitBB->getFirstNonPHI() == SplitBB->getTerminator() || + SplitBB->isLandingPad()) && + "SplitBB has non-PHI nodes!"); + + // For each PHI in the destination block. + for (PHINode &PN : DestBB->phis()) { + int Idx = PN.getBasicBlockIndex(SplitBB); + assert(Idx >= 0 && "Invalid Block Index"); + Value *V = PN.getIncomingValue(Idx); + + // If the input is a PHI which already satisfies LCSSA, don't create + // a new one. + if (const PHINode *VP = dyn_cast<PHINode>(V)) + if (VP->getParent() == SplitBB) + continue; + + // Otherwise a new PHI is needed. Create one and populate it. + PHINode *NewPN = PHINode::Create( + PN.getType(), Preds.size(), "split", + SplitBB->isLandingPad() ? &SplitBB->front() : SplitBB->getTerminator()); + for (BasicBlock *BB : Preds) + NewPN->addIncoming(V, BB); + + // Update the original PHI. + PN.setIncomingValue(Idx, NewPN); + } +} + +unsigned +llvm::SplitAllCriticalEdges(Function &F, + const CriticalEdgeSplittingOptions &Options) { + unsigned NumBroken = 0; + for (BasicBlock &BB : F) { + Instruction *TI = BB.getTerminator(); + if (TI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(TI)) + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) + if (SplitCriticalEdge(TI, i, Options)) + ++NumBroken; + } + return NumBroken; +} + +static BasicBlock *SplitBlockImpl(BasicBlock *Old, Instruction *SplitPt, + DomTreeUpdater *DTU, DominatorTree *DT, + LoopInfo *LI, MemorySSAUpdater *MSSAU, + const Twine &BBName, bool Before) { + if (Before) { + DomTreeUpdater LocalDTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); + return splitBlockBefore(Old, SplitPt, + DTU ? DTU : (DT ? &LocalDTU : nullptr), LI, MSSAU, + BBName); + } + BasicBlock::iterator SplitIt = SplitPt->getIterator(); + while (isa<PHINode>(SplitIt) || SplitIt->isEHPad()) { + ++SplitIt; + assert(SplitIt != SplitPt->getParent()->end()); + } + std::string Name = BBName.str(); + BasicBlock *New = Old->splitBasicBlock( + SplitIt, Name.empty() ? Old->getName() + ".split" : Name); + + // The new block lives in whichever loop the old one did. This preserves + // LCSSA as well, because we force the split point to be after any PHI nodes. + if (LI) + if (Loop *L = LI->getLoopFor(Old)) + L->addBasicBlockToLoop(New, *LI); + + if (DTU) { + SmallVector<DominatorTree::UpdateType, 8> Updates; + // Old dominates New. New node dominates all other nodes dominated by Old. + SmallPtrSet<BasicBlock *, 8> UniqueSuccessorsOfOld; + Updates.push_back({DominatorTree::Insert, Old, New}); + Updates.reserve(Updates.size() + 2 * succ_size(New)); + for (BasicBlock *SuccessorOfOld : successors(New)) + if (UniqueSuccessorsOfOld.insert(SuccessorOfOld).second) { + Updates.push_back({DominatorTree::Insert, New, SuccessorOfOld}); + Updates.push_back({DominatorTree::Delete, Old, SuccessorOfOld}); + } + + DTU->applyUpdates(Updates); + } else if (DT) + // Old dominates New. New node dominates all other nodes dominated by Old. + if (DomTreeNode *OldNode = DT->getNode(Old)) { + std::vector<DomTreeNode *> Children(OldNode->begin(), OldNode->end()); + + DomTreeNode *NewNode = DT->addNewBlock(New, Old); + for (DomTreeNode *I : Children) + DT->changeImmediateDominator(I, NewNode); + } + + // Move MemoryAccesses still tracked in Old, but part of New now. + // Update accesses in successor blocks accordingly. + if (MSSAU) + MSSAU->moveAllAfterSpliceBlocks(Old, New, &*(New->begin())); + + return New; +} + +BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, + DominatorTree *DT, LoopInfo *LI, + MemorySSAUpdater *MSSAU, const Twine &BBName, + bool Before) { + return SplitBlockImpl(Old, SplitPt, /*DTU=*/nullptr, DT, LI, MSSAU, BBName, + Before); +} +BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, + DomTreeUpdater *DTU, LoopInfo *LI, + MemorySSAUpdater *MSSAU, const Twine &BBName, + bool Before) { + return SplitBlockImpl(Old, SplitPt, DTU, /*DT=*/nullptr, LI, MSSAU, BBName, + Before); +} + +BasicBlock *llvm::splitBlockBefore(BasicBlock *Old, Instruction *SplitPt, + DomTreeUpdater *DTU, LoopInfo *LI, + MemorySSAUpdater *MSSAU, + const Twine &BBName) { + + BasicBlock::iterator SplitIt = SplitPt->getIterator(); + while (isa<PHINode>(SplitIt) || SplitIt->isEHPad()) + ++SplitIt; + std::string Name = BBName.str(); + BasicBlock *New = Old->splitBasicBlock( + SplitIt, Name.empty() ? Old->getName() + ".split" : Name, + /* Before=*/true); + + // The new block lives in whichever loop the old one did. This preserves + // LCSSA as well, because we force the split point to be after any PHI nodes. + if (LI) + if (Loop *L = LI->getLoopFor(Old)) + L->addBasicBlockToLoop(New, *LI); + + if (DTU) { + SmallVector<DominatorTree::UpdateType, 8> DTUpdates; + // New dominates Old. The predecessor nodes of the Old node dominate + // New node. + SmallPtrSet<BasicBlock *, 8> UniquePredecessorsOfOld; + DTUpdates.push_back({DominatorTree::Insert, New, Old}); + DTUpdates.reserve(DTUpdates.size() + 2 * pred_size(New)); + for (BasicBlock *PredecessorOfOld : predecessors(New)) + if (UniquePredecessorsOfOld.insert(PredecessorOfOld).second) { + DTUpdates.push_back({DominatorTree::Insert, PredecessorOfOld, New}); + DTUpdates.push_back({DominatorTree::Delete, PredecessorOfOld, Old}); + } + + DTU->applyUpdates(DTUpdates); + + // Move MemoryAccesses still tracked in Old, but part of New now. + // Update accesses in successor blocks accordingly. + if (MSSAU) { + MSSAU->applyUpdates(DTUpdates, DTU->getDomTree()); + if (VerifyMemorySSA) + MSSAU->getMemorySSA()->verifyMemorySSA(); + } + } + return New; +} + +/// Update DominatorTree, LoopInfo, and LCCSA analysis information. +static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB, + ArrayRef<BasicBlock *> Preds, + DomTreeUpdater *DTU, DominatorTree *DT, + LoopInfo *LI, MemorySSAUpdater *MSSAU, + bool PreserveLCSSA, bool &HasLoopExit) { + // Update dominator tree if available. + if (DTU) { + // Recalculation of DomTree is needed when updating a forward DomTree and + // the Entry BB is replaced. + if (NewBB->isEntryBlock() && DTU->hasDomTree()) { + // The entry block was removed and there is no external interface for + // the dominator tree to be notified of this change. In this corner-case + // we recalculate the entire tree. + DTU->recalculate(*NewBB->getParent()); + } else { + // Split block expects NewBB to have a non-empty set of predecessors. + SmallVector<DominatorTree::UpdateType, 8> Updates; + SmallPtrSet<BasicBlock *, 8> UniquePreds; + Updates.push_back({DominatorTree::Insert, NewBB, OldBB}); + Updates.reserve(Updates.size() + 2 * Preds.size()); + for (auto *Pred : Preds) + if (UniquePreds.insert(Pred).second) { + Updates.push_back({DominatorTree::Insert, Pred, NewBB}); + Updates.push_back({DominatorTree::Delete, Pred, OldBB}); + } + DTU->applyUpdates(Updates); + } + } else if (DT) { + if (OldBB == DT->getRootNode()->getBlock()) { + assert(NewBB->isEntryBlock()); + DT->setNewRoot(NewBB); + } else { + // Split block expects NewBB to have a non-empty set of predecessors. + DT->splitBlock(NewBB); + } + } + + // Update MemoryPhis after split if MemorySSA is available + if (MSSAU) + MSSAU->wireOldPredecessorsToNewImmediatePredecessor(OldBB, NewBB, Preds); + + // The rest of the logic is only relevant for updating the loop structures. + if (!LI) + return; + + if (DTU && DTU->hasDomTree()) + DT = &DTU->getDomTree(); + assert(DT && "DT should be available to update LoopInfo!"); + Loop *L = LI->getLoopFor(OldBB); + + // If we need to preserve loop analyses, collect some information about how + // this split will affect loops. + bool IsLoopEntry = !!L; + bool SplitMakesNewLoopHeader = false; + for (BasicBlock *Pred : Preds) { + // Preds that are not reachable from entry should not be used to identify if + // OldBB is a loop entry or if SplitMakesNewLoopHeader. Unreachable blocks + // are not within any loops, so we incorrectly mark SplitMakesNewLoopHeader + // as true and make the NewBB the header of some loop. This breaks LI. + if (!DT->isReachableFromEntry(Pred)) + continue; + // If we need to preserve LCSSA, determine if any of the preds is a loop + // exit. + if (PreserveLCSSA) + if (Loop *PL = LI->getLoopFor(Pred)) + if (!PL->contains(OldBB)) + HasLoopExit = true; + + // If we need to preserve LoopInfo, note whether any of the preds crosses + // an interesting loop boundary. + if (!L) + continue; + if (L->contains(Pred)) + IsLoopEntry = false; + else + SplitMakesNewLoopHeader = true; + } + + // Unless we have a loop for OldBB, nothing else to do here. + if (!L) + return; + + if (IsLoopEntry) { + // Add the new block to the nearest enclosing loop (and not an adjacent + // loop). To find this, examine each of the predecessors and determine which + // loops enclose them, and select the most-nested loop which contains the + // loop containing the block being split. + Loop *InnermostPredLoop = nullptr; + for (BasicBlock *Pred : Preds) { + if (Loop *PredLoop = LI->getLoopFor(Pred)) { + // Seek a loop which actually contains the block being split (to avoid + // adjacent loops). + while (PredLoop && !PredLoop->contains(OldBB)) + PredLoop = PredLoop->getParentLoop(); + + // Select the most-nested of these loops which contains the block. + if (PredLoop && PredLoop->contains(OldBB) && + (!InnermostPredLoop || + InnermostPredLoop->getLoopDepth() < PredLoop->getLoopDepth())) + InnermostPredLoop = PredLoop; + } + } + + if (InnermostPredLoop) + InnermostPredLoop->addBasicBlockToLoop(NewBB, *LI); + } else { + L->addBasicBlockToLoop(NewBB, *LI); + if (SplitMakesNewLoopHeader) + L->moveToHeader(NewBB); + } +} + +/// Update the PHI nodes in OrigBB to include the values coming from NewBB. +/// This also updates AliasAnalysis, if available. +static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB, + ArrayRef<BasicBlock *> Preds, BranchInst *BI, + bool HasLoopExit) { + // Otherwise, create a new PHI node in NewBB for each PHI node in OrigBB. + SmallPtrSet<BasicBlock *, 16> PredSet(Preds.begin(), Preds.end()); + for (BasicBlock::iterator I = OrigBB->begin(); isa<PHINode>(I); ) { + PHINode *PN = cast<PHINode>(I++); + + // Check to see if all of the values coming in are the same. If so, we + // don't need to create a new PHI node, unless it's needed for LCSSA. + Value *InVal = nullptr; + if (!HasLoopExit) { + InVal = PN->getIncomingValueForBlock(Preds[0]); + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + if (!PredSet.count(PN->getIncomingBlock(i))) + continue; + if (!InVal) + InVal = PN->getIncomingValue(i); + else if (InVal != PN->getIncomingValue(i)) { + InVal = nullptr; + break; + } + } + } + + if (InVal) { + // If all incoming values for the new PHI would be the same, just don't + // make a new PHI. Instead, just remove the incoming values from the old + // PHI. + + // NOTE! This loop walks backwards for a reason! First off, this minimizes + // the cost of removal if we end up removing a large number of values, and + // second off, this ensures that the indices for the incoming values + // aren't invalidated when we remove one. + for (int64_t i = PN->getNumIncomingValues() - 1; i >= 0; --i) + if (PredSet.count(PN->getIncomingBlock(i))) + PN->removeIncomingValue(i, false); + + // Add an incoming value to the PHI node in the loop for the preheader + // edge. + PN->addIncoming(InVal, NewBB); + continue; + } + + // If the values coming into the block are not the same, we need a new + // PHI. + // Create the new PHI node, insert it into NewBB at the end of the block + PHINode *NewPHI = + PHINode::Create(PN->getType(), Preds.size(), PN->getName() + ".ph", BI); + + // NOTE! This loop walks backwards for a reason! First off, this minimizes + // the cost of removal if we end up removing a large number of values, and + // second off, this ensures that the indices for the incoming values aren't + // invalidated when we remove one. + for (int64_t i = PN->getNumIncomingValues() - 1; i >= 0; --i) { + BasicBlock *IncomingBB = PN->getIncomingBlock(i); + if (PredSet.count(IncomingBB)) { + Value *V = PN->removeIncomingValue(i, false); + NewPHI->addIncoming(V, IncomingBB); + } + } + + PN->addIncoming(NewPHI, NewBB); + } +} + +static void SplitLandingPadPredecessorsImpl( + BasicBlock *OrigBB, ArrayRef<BasicBlock *> Preds, const char *Suffix1, + const char *Suffix2, SmallVectorImpl<BasicBlock *> &NewBBs, + DomTreeUpdater *DTU, DominatorTree *DT, LoopInfo *LI, + MemorySSAUpdater *MSSAU, bool PreserveLCSSA); + +static BasicBlock * +SplitBlockPredecessorsImpl(BasicBlock *BB, ArrayRef<BasicBlock *> Preds, + const char *Suffix, DomTreeUpdater *DTU, + DominatorTree *DT, LoopInfo *LI, + MemorySSAUpdater *MSSAU, bool PreserveLCSSA) { + // Do not attempt to split that which cannot be split. + if (!BB->canSplitPredecessors()) + return nullptr; + + // For the landingpads we need to act a bit differently. + // Delegate this work to the SplitLandingPadPredecessors. + if (BB->isLandingPad()) { + SmallVector<BasicBlock*, 2> NewBBs; + std::string NewName = std::string(Suffix) + ".split-lp"; + + SplitLandingPadPredecessorsImpl(BB, Preds, Suffix, NewName.c_str(), NewBBs, + DTU, DT, LI, MSSAU, PreserveLCSSA); + return NewBBs[0]; + } + + // Create new basic block, insert right before the original block. + BasicBlock *NewBB = BasicBlock::Create( + BB->getContext(), BB->getName() + Suffix, BB->getParent(), BB); + + // The new block unconditionally branches to the old block. + BranchInst *BI = BranchInst::Create(BB, NewBB); + + Loop *L = nullptr; + BasicBlock *OldLatch = nullptr; + // Splitting the predecessors of a loop header creates a preheader block. + if (LI && LI->isLoopHeader(BB)) { + L = LI->getLoopFor(BB); + // Using the loop start line number prevents debuggers stepping into the + // loop body for this instruction. + BI->setDebugLoc(L->getStartLoc()); + + // If BB is the header of the Loop, it is possible that the loop is + // modified, such that the current latch does not remain the latch of the + // loop. If that is the case, the loop metadata from the current latch needs + // to be applied to the new latch. + OldLatch = L->getLoopLatch(); + } else + BI->setDebugLoc(BB->getFirstNonPHIOrDbg()->getDebugLoc()); + + // Move the edges from Preds to point to NewBB instead of BB. + for (BasicBlock *Pred : Preds) { + // This is slightly more strict than necessary; the minimum requirement + // is that there be no more than one indirectbr branching to BB. And + // all BlockAddress uses would need to be updated. + assert(!isa<IndirectBrInst>(Pred->getTerminator()) && + "Cannot split an edge from an IndirectBrInst"); + Pred->getTerminator()->replaceSuccessorWith(BB, NewBB); + } + + // Insert a new PHI node into NewBB for every PHI node in BB and that new PHI + // node becomes an incoming value for BB's phi node. However, if the Preds + // list is empty, we need to insert dummy entries into the PHI nodes in BB to + // account for the newly created predecessor. + if (Preds.empty()) { + // Insert dummy values as the incoming value. + for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++I) + cast<PHINode>(I)->addIncoming(PoisonValue::get(I->getType()), NewBB); + } + + // Update DominatorTree, LoopInfo, and LCCSA analysis information. + bool HasLoopExit = false; + UpdateAnalysisInformation(BB, NewBB, Preds, DTU, DT, LI, MSSAU, PreserveLCSSA, + HasLoopExit); + + if (!Preds.empty()) { + // Update the PHI nodes in BB with the values coming from NewBB. + UpdatePHINodes(BB, NewBB, Preds, BI, HasLoopExit); + } + + if (OldLatch) { + BasicBlock *NewLatch = L->getLoopLatch(); + if (NewLatch != OldLatch) { + MDNode *MD = OldLatch->getTerminator()->getMetadata("llvm.loop"); + NewLatch->getTerminator()->setMetadata("llvm.loop", MD); + // It's still possible that OldLatch is the latch of another inner loop, + // in which case we do not remove the metadata. + Loop *IL = LI->getLoopFor(OldLatch); + if (IL && IL->getLoopLatch() != OldLatch) + OldLatch->getTerminator()->setMetadata("llvm.loop", nullptr); + } + } + + return NewBB; +} + +BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, + ArrayRef<BasicBlock *> Preds, + const char *Suffix, DominatorTree *DT, + LoopInfo *LI, MemorySSAUpdater *MSSAU, + bool PreserveLCSSA) { + return SplitBlockPredecessorsImpl(BB, Preds, Suffix, /*DTU=*/nullptr, DT, LI, + MSSAU, PreserveLCSSA); +} +BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, + ArrayRef<BasicBlock *> Preds, + const char *Suffix, + DomTreeUpdater *DTU, LoopInfo *LI, + MemorySSAUpdater *MSSAU, + bool PreserveLCSSA) { + return SplitBlockPredecessorsImpl(BB, Preds, Suffix, DTU, + /*DT=*/nullptr, LI, MSSAU, PreserveLCSSA); +} + +static void SplitLandingPadPredecessorsImpl( + BasicBlock *OrigBB, ArrayRef<BasicBlock *> Preds, const char *Suffix1, + const char *Suffix2, SmallVectorImpl<BasicBlock *> &NewBBs, + DomTreeUpdater *DTU, DominatorTree *DT, LoopInfo *LI, + MemorySSAUpdater *MSSAU, bool PreserveLCSSA) { + assert(OrigBB->isLandingPad() && "Trying to split a non-landing pad!"); + + // Create a new basic block for OrigBB's predecessors listed in Preds. Insert + // it right before the original block. + BasicBlock *NewBB1 = BasicBlock::Create(OrigBB->getContext(), + OrigBB->getName() + Suffix1, + OrigBB->getParent(), OrigBB); + NewBBs.push_back(NewBB1); + + // The new block unconditionally branches to the old block. + BranchInst *BI1 = BranchInst::Create(OrigBB, NewBB1); + BI1->setDebugLoc(OrigBB->getFirstNonPHI()->getDebugLoc()); + + // Move the edges from Preds to point to NewBB1 instead of OrigBB. + for (BasicBlock *Pred : Preds) { + // This is slightly more strict than necessary; the minimum requirement + // is that there be no more than one indirectbr branching to BB. And + // all BlockAddress uses would need to be updated. + assert(!isa<IndirectBrInst>(Pred->getTerminator()) && + "Cannot split an edge from an IndirectBrInst"); + Pred->getTerminator()->replaceUsesOfWith(OrigBB, NewBB1); + } + + bool HasLoopExit = false; + UpdateAnalysisInformation(OrigBB, NewBB1, Preds, DTU, DT, LI, MSSAU, + PreserveLCSSA, HasLoopExit); + + // Update the PHI nodes in OrigBB with the values coming from NewBB1. + UpdatePHINodes(OrigBB, NewBB1, Preds, BI1, HasLoopExit); + + // Move the remaining edges from OrigBB to point to NewBB2. + SmallVector<BasicBlock*, 8> NewBB2Preds; + for (pred_iterator i = pred_begin(OrigBB), e = pred_end(OrigBB); + i != e; ) { + BasicBlock *Pred = *i++; + if (Pred == NewBB1) continue; + assert(!isa<IndirectBrInst>(Pred->getTerminator()) && + "Cannot split an edge from an IndirectBrInst"); + NewBB2Preds.push_back(Pred); + e = pred_end(OrigBB); + } + + BasicBlock *NewBB2 = nullptr; + if (!NewBB2Preds.empty()) { + // Create another basic block for the rest of OrigBB's predecessors. + NewBB2 = BasicBlock::Create(OrigBB->getContext(), + OrigBB->getName() + Suffix2, + OrigBB->getParent(), OrigBB); + NewBBs.push_back(NewBB2); + + // The new block unconditionally branches to the old block. + BranchInst *BI2 = BranchInst::Create(OrigBB, NewBB2); + BI2->setDebugLoc(OrigBB->getFirstNonPHI()->getDebugLoc()); + + // Move the remaining edges from OrigBB to point to NewBB2. + for (BasicBlock *NewBB2Pred : NewBB2Preds) + NewBB2Pred->getTerminator()->replaceUsesOfWith(OrigBB, NewBB2); + + // Update DominatorTree, LoopInfo, and LCCSA analysis information. + HasLoopExit = false; + UpdateAnalysisInformation(OrigBB, NewBB2, NewBB2Preds, DTU, DT, LI, MSSAU, + PreserveLCSSA, HasLoopExit); + + // Update the PHI nodes in OrigBB with the values coming from NewBB2. + UpdatePHINodes(OrigBB, NewBB2, NewBB2Preds, BI2, HasLoopExit); + } + + LandingPadInst *LPad = OrigBB->getLandingPadInst(); + Instruction *Clone1 = LPad->clone(); + Clone1->setName(Twine("lpad") + Suffix1); + Clone1->insertInto(NewBB1, NewBB1->getFirstInsertionPt()); + + if (NewBB2) { + Instruction *Clone2 = LPad->clone(); + Clone2->setName(Twine("lpad") + Suffix2); + Clone2->insertInto(NewBB2, NewBB2->getFirstInsertionPt()); + + // Create a PHI node for the two cloned landingpad instructions only + // if the original landingpad instruction has some uses. + if (!LPad->use_empty()) { + assert(!LPad->getType()->isTokenTy() && + "Split cannot be applied if LPad is token type. Otherwise an " + "invalid PHINode of token type would be created."); + PHINode *PN = PHINode::Create(LPad->getType(), 2, "lpad.phi", LPad); + PN->addIncoming(Clone1, NewBB1); + PN->addIncoming(Clone2, NewBB2); + LPad->replaceAllUsesWith(PN); + } + LPad->eraseFromParent(); + } else { + // There is no second clone. Just replace the landing pad with the first + // clone. + LPad->replaceAllUsesWith(Clone1); + LPad->eraseFromParent(); + } +} + +void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB, + ArrayRef<BasicBlock *> Preds, + const char *Suffix1, const char *Suffix2, + SmallVectorImpl<BasicBlock *> &NewBBs, + DominatorTree *DT, LoopInfo *LI, + MemorySSAUpdater *MSSAU, + bool PreserveLCSSA) { + return SplitLandingPadPredecessorsImpl( + OrigBB, Preds, Suffix1, Suffix2, NewBBs, + /*DTU=*/nullptr, DT, LI, MSSAU, PreserveLCSSA); +} +void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB, + ArrayRef<BasicBlock *> Preds, + const char *Suffix1, const char *Suffix2, + SmallVectorImpl<BasicBlock *> &NewBBs, + DomTreeUpdater *DTU, LoopInfo *LI, + MemorySSAUpdater *MSSAU, + bool PreserveLCSSA) { + return SplitLandingPadPredecessorsImpl(OrigBB, Preds, Suffix1, Suffix2, + NewBBs, DTU, /*DT=*/nullptr, LI, MSSAU, + PreserveLCSSA); +} + +ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, + BasicBlock *Pred, + DomTreeUpdater *DTU) { + Instruction *UncondBranch = Pred->getTerminator(); + // Clone the return and add it to the end of the predecessor. + Instruction *NewRet = RI->clone(); + NewRet->insertInto(Pred, Pred->end()); + + // If the return instruction returns a value, and if the value was a + // PHI node in "BB", propagate the right value into the return. + for (Use &Op : NewRet->operands()) { + Value *V = Op; + Instruction *NewBC = nullptr; + if (BitCastInst *BCI = dyn_cast<BitCastInst>(V)) { + // Return value might be bitcasted. Clone and insert it before the + // return instruction. + V = BCI->getOperand(0); + NewBC = BCI->clone(); + NewBC->insertInto(Pred, NewRet->getIterator()); + Op = NewBC; + } + + Instruction *NewEV = nullptr; + if (ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(V)) { + V = EVI->getOperand(0); + NewEV = EVI->clone(); + if (NewBC) { + NewBC->setOperand(0, NewEV); + NewEV->insertInto(Pred, NewBC->getIterator()); + } else { + NewEV->insertInto(Pred, NewRet->getIterator()); + Op = NewEV; + } + } + + if (PHINode *PN = dyn_cast<PHINode>(V)) { + if (PN->getParent() == BB) { + if (NewEV) { + NewEV->setOperand(0, PN->getIncomingValueForBlock(Pred)); + } else if (NewBC) + NewBC->setOperand(0, PN->getIncomingValueForBlock(Pred)); + else + Op = PN->getIncomingValueForBlock(Pred); + } + } + } + + // Update any PHI nodes in the returning block to realize that we no + // longer branch to them. + BB->removePredecessor(Pred); + UncondBranch->eraseFromParent(); + + if (DTU) + DTU->applyUpdates({{DominatorTree::Delete, Pred, BB}}); + + return cast<ReturnInst>(NewRet); +} + +static Instruction * +SplitBlockAndInsertIfThenImpl(Value *Cond, Instruction *SplitBefore, + bool Unreachable, MDNode *BranchWeights, + DomTreeUpdater *DTU, DominatorTree *DT, + LoopInfo *LI, BasicBlock *ThenBlock) { + SmallVector<DominatorTree::UpdateType, 8> Updates; + BasicBlock *Head = SplitBefore->getParent(); + BasicBlock *Tail = Head->splitBasicBlock(SplitBefore->getIterator()); + if (DTU) { + SmallPtrSet<BasicBlock *, 8> UniqueSuccessorsOfHead; + Updates.push_back({DominatorTree::Insert, Head, Tail}); + Updates.reserve(Updates.size() + 2 * succ_size(Tail)); + for (BasicBlock *SuccessorOfHead : successors(Tail)) + if (UniqueSuccessorsOfHead.insert(SuccessorOfHead).second) { + Updates.push_back({DominatorTree::Insert, Tail, SuccessorOfHead}); + Updates.push_back({DominatorTree::Delete, Head, SuccessorOfHead}); + } + } + Instruction *HeadOldTerm = Head->getTerminator(); + LLVMContext &C = Head->getContext(); + Instruction *CheckTerm; + bool CreateThenBlock = (ThenBlock == nullptr); + if (CreateThenBlock) { + ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail); + if (Unreachable) + CheckTerm = new UnreachableInst(C, ThenBlock); + else { + CheckTerm = BranchInst::Create(Tail, ThenBlock); + if (DTU) + Updates.push_back({DominatorTree::Insert, ThenBlock, Tail}); + } + CheckTerm->setDebugLoc(SplitBefore->getDebugLoc()); + } else + CheckTerm = ThenBlock->getTerminator(); + BranchInst *HeadNewTerm = + BranchInst::Create(/*ifTrue*/ ThenBlock, /*ifFalse*/ Tail, Cond); + if (DTU) + Updates.push_back({DominatorTree::Insert, Head, ThenBlock}); + HeadNewTerm->setMetadata(LLVMContext::MD_prof, BranchWeights); + ReplaceInstWithInst(HeadOldTerm, HeadNewTerm); + + if (DTU) + DTU->applyUpdates(Updates); + else if (DT) { + if (DomTreeNode *OldNode = DT->getNode(Head)) { + std::vector<DomTreeNode *> Children(OldNode->begin(), OldNode->end()); + + DomTreeNode *NewNode = DT->addNewBlock(Tail, Head); + for (DomTreeNode *Child : Children) + DT->changeImmediateDominator(Child, NewNode); + + // Head dominates ThenBlock. + if (CreateThenBlock) + DT->addNewBlock(ThenBlock, Head); + else + DT->changeImmediateDominator(ThenBlock, Head); + } + } + + if (LI) { + if (Loop *L = LI->getLoopFor(Head)) { + L->addBasicBlockToLoop(ThenBlock, *LI); + L->addBasicBlockToLoop(Tail, *LI); + } + } + + return CheckTerm; +} + +Instruction *llvm::SplitBlockAndInsertIfThen(Value *Cond, + Instruction *SplitBefore, + bool Unreachable, + MDNode *BranchWeights, + DominatorTree *DT, LoopInfo *LI, + BasicBlock *ThenBlock) { + return SplitBlockAndInsertIfThenImpl(Cond, SplitBefore, Unreachable, + BranchWeights, + /*DTU=*/nullptr, DT, LI, ThenBlock); +} +Instruction *llvm::SplitBlockAndInsertIfThen(Value *Cond, + Instruction *SplitBefore, + bool Unreachable, + MDNode *BranchWeights, + DomTreeUpdater *DTU, LoopInfo *LI, + BasicBlock *ThenBlock) { + return SplitBlockAndInsertIfThenImpl(Cond, SplitBefore, Unreachable, + BranchWeights, DTU, /*DT=*/nullptr, LI, + ThenBlock); +} + +void llvm::SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore, + Instruction **ThenTerm, + Instruction **ElseTerm, + MDNode *BranchWeights, + DomTreeUpdater *DTU) { + BasicBlock *Head = SplitBefore->getParent(); + + SmallPtrSet<BasicBlock *, 8> UniqueOrigSuccessors; + if (DTU) + UniqueOrigSuccessors.insert(succ_begin(Head), succ_end(Head)); + + BasicBlock *Tail = Head->splitBasicBlock(SplitBefore->getIterator()); + Instruction *HeadOldTerm = Head->getTerminator(); + LLVMContext &C = Head->getContext(); + BasicBlock *ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail); + BasicBlock *ElseBlock = BasicBlock::Create(C, "", Head->getParent(), Tail); + *ThenTerm = BranchInst::Create(Tail, ThenBlock); + (*ThenTerm)->setDebugLoc(SplitBefore->getDebugLoc()); + *ElseTerm = BranchInst::Create(Tail, ElseBlock); + (*ElseTerm)->setDebugLoc(SplitBefore->getDebugLoc()); + BranchInst *HeadNewTerm = + BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/ElseBlock, Cond); + HeadNewTerm->setMetadata(LLVMContext::MD_prof, BranchWeights); + ReplaceInstWithInst(HeadOldTerm, HeadNewTerm); + if (DTU) { + SmallVector<DominatorTree::UpdateType, 8> Updates; + Updates.reserve(4 + 2 * UniqueOrigSuccessors.size()); + for (BasicBlock *Succ : successors(Head)) { + Updates.push_back({DominatorTree::Insert, Head, Succ}); + Updates.push_back({DominatorTree::Insert, Succ, Tail}); + } + for (BasicBlock *UniqueOrigSuccessor : UniqueOrigSuccessors) + Updates.push_back({DominatorTree::Insert, Tail, UniqueOrigSuccessor}); + for (BasicBlock *UniqueOrigSuccessor : UniqueOrigSuccessors) + Updates.push_back({DominatorTree::Delete, Head, UniqueOrigSuccessor}); + DTU->applyUpdates(Updates); + } +} + +BranchInst *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, + BasicBlock *&IfFalse) { + PHINode *SomePHI = dyn_cast<PHINode>(BB->begin()); + BasicBlock *Pred1 = nullptr; + BasicBlock *Pred2 = nullptr; + + if (SomePHI) { + if (SomePHI->getNumIncomingValues() != 2) + return nullptr; + Pred1 = SomePHI->getIncomingBlock(0); + Pred2 = SomePHI->getIncomingBlock(1); + } else { + pred_iterator PI = pred_begin(BB), PE = pred_end(BB); + if (PI == PE) // No predecessor + return nullptr; + Pred1 = *PI++; + if (PI == PE) // Only one predecessor + return nullptr; + Pred2 = *PI++; + if (PI != PE) // More than two predecessors + return nullptr; + } + + // We can only handle branches. Other control flow will be lowered to + // branches if possible anyway. + BranchInst *Pred1Br = dyn_cast<BranchInst>(Pred1->getTerminator()); + BranchInst *Pred2Br = dyn_cast<BranchInst>(Pred2->getTerminator()); + if (!Pred1Br || !Pred2Br) + return nullptr; + + // Eliminate code duplication by ensuring that Pred1Br is conditional if + // either are. + if (Pred2Br->isConditional()) { + // If both branches are conditional, we don't have an "if statement". In + // reality, we could transform this case, but since the condition will be + // required anyway, we stand no chance of eliminating it, so the xform is + // probably not profitable. + if (Pred1Br->isConditional()) + return nullptr; + + std::swap(Pred1, Pred2); + std::swap(Pred1Br, Pred2Br); + } + + if (Pred1Br->isConditional()) { + // The only thing we have to watch out for here is to make sure that Pred2 + // doesn't have incoming edges from other blocks. If it does, the condition + // doesn't dominate BB. + if (!Pred2->getSinglePredecessor()) + return nullptr; + + // If we found a conditional branch predecessor, make sure that it branches + // to BB and Pred2Br. If it doesn't, this isn't an "if statement". + if (Pred1Br->getSuccessor(0) == BB && + Pred1Br->getSuccessor(1) == Pred2) { + IfTrue = Pred1; + IfFalse = Pred2; + } else if (Pred1Br->getSuccessor(0) == Pred2 && + Pred1Br->getSuccessor(1) == BB) { + IfTrue = Pred2; + IfFalse = Pred1; + } else { + // We know that one arm of the conditional goes to BB, so the other must + // go somewhere unrelated, and this must not be an "if statement". + return nullptr; + } + + return Pred1Br; + } + + // Ok, if we got here, both predecessors end with an unconditional branch to + // BB. Don't panic! If both blocks only have a single (identical) + // predecessor, and THAT is a conditional branch, then we're all ok! + BasicBlock *CommonPred = Pred1->getSinglePredecessor(); + if (CommonPred == nullptr || CommonPred != Pred2->getSinglePredecessor()) + return nullptr; + + // Otherwise, if this is a conditional branch, then we can use it! + BranchInst *BI = dyn_cast<BranchInst>(CommonPred->getTerminator()); + if (!BI) return nullptr; + + assert(BI->isConditional() && "Two successors but not conditional?"); + if (BI->getSuccessor(0) == Pred1) { + IfTrue = Pred1; + IfFalse = Pred2; + } else { + IfTrue = Pred2; + IfFalse = Pred1; + } + return BI; +} + +// After creating a control flow hub, the operands of PHINodes in an outgoing +// block Out no longer match the predecessors of that block. Predecessors of Out +// that are incoming blocks to the hub are now replaced by just one edge from +// the hub. To match this new control flow, the corresponding values from each +// PHINode must now be moved a new PHINode in the first guard block of the hub. +// +// This operation cannot be performed with SSAUpdater, because it involves one +// new use: If the block Out is in the list of Incoming blocks, then the newly +// created PHI in the Hub will use itself along that edge from Out to Hub. +static void reconnectPhis(BasicBlock *Out, BasicBlock *GuardBlock, + const SetVector<BasicBlock *> &Incoming, + BasicBlock *FirstGuardBlock) { + auto I = Out->begin(); + while (I != Out->end() && isa<PHINode>(I)) { + auto Phi = cast<PHINode>(I); + auto NewPhi = + PHINode::Create(Phi->getType(), Incoming.size(), + Phi->getName() + ".moved", &FirstGuardBlock->front()); + for (auto *In : Incoming) { + Value *V = UndefValue::get(Phi->getType()); + if (In == Out) { + V = NewPhi; + } else if (Phi->getBasicBlockIndex(In) != -1) { + V = Phi->removeIncomingValue(In, false); + } + NewPhi->addIncoming(V, In); + } + assert(NewPhi->getNumIncomingValues() == Incoming.size()); + if (Phi->getNumOperands() == 0) { + Phi->replaceAllUsesWith(NewPhi); + I = Phi->eraseFromParent(); + continue; + } + Phi->addIncoming(NewPhi, GuardBlock); + ++I; + } +} + +using BBPredicates = DenseMap<BasicBlock *, Instruction *>; +using BBSetVector = SetVector<BasicBlock *>; + +// Redirects the terminator of the incoming block to the first guard +// block in the hub. The condition of the original terminator (if it +// was conditional) and its original successors are returned as a +// tuple <condition, succ0, succ1>. The function additionally filters +// out successors that are not in the set of outgoing blocks. +// +// - condition is non-null iff the branch is conditional. +// - Succ1 is non-null iff the sole/taken target is an outgoing block. +// - Succ2 is non-null iff condition is non-null and the fallthrough +// target is an outgoing block. +static std::tuple<Value *, BasicBlock *, BasicBlock *> +redirectToHub(BasicBlock *BB, BasicBlock *FirstGuardBlock, + const BBSetVector &Outgoing) { + assert(isa<BranchInst>(BB->getTerminator()) && + "Only support branch terminator."); + auto Branch = cast<BranchInst>(BB->getTerminator()); + auto Condition = Branch->isConditional() ? Branch->getCondition() : nullptr; + + BasicBlock *Succ0 = Branch->getSuccessor(0); + BasicBlock *Succ1 = nullptr; + Succ0 = Outgoing.count(Succ0) ? Succ0 : nullptr; + + if (Branch->isUnconditional()) { + Branch->setSuccessor(0, FirstGuardBlock); + assert(Succ0); + } else { + Succ1 = Branch->getSuccessor(1); + Succ1 = Outgoing.count(Succ1) ? Succ1 : nullptr; + assert(Succ0 || Succ1); + if (Succ0 && !Succ1) { + Branch->setSuccessor(0, FirstGuardBlock); + } else if (Succ1 && !Succ0) { + Branch->setSuccessor(1, FirstGuardBlock); + } else { + Branch->eraseFromParent(); + BranchInst::Create(FirstGuardBlock, BB); + } + } + + assert(Succ0 || Succ1); + return std::make_tuple(Condition, Succ0, Succ1); +} +// Setup the branch instructions for guard blocks. +// +// Each guard block terminates in a conditional branch that transfers +// control to the corresponding outgoing block or the next guard +// block. The last guard block has two outgoing blocks as successors +// since the condition for the final outgoing block is trivially +// true. So we create one less block (including the first guard block) +// than the number of outgoing blocks. +static void setupBranchForGuard(SmallVectorImpl<BasicBlock *> &GuardBlocks, + const BBSetVector &Outgoing, + BBPredicates &GuardPredicates) { + // To help keep the loop simple, temporarily append the last + // outgoing block to the list of guard blocks. + GuardBlocks.push_back(Outgoing.back()); + + for (int i = 0, e = GuardBlocks.size() - 1; i != e; ++i) { + auto Out = Outgoing[i]; + assert(GuardPredicates.count(Out)); + BranchInst::Create(Out, GuardBlocks[i + 1], GuardPredicates[Out], + GuardBlocks[i]); + } + + // Remove the last block from the guard list. + GuardBlocks.pop_back(); +} + +/// We are using one integer to represent the block we are branching to. Then at +/// each guard block, the predicate was calcuated using a simple `icmp eq`. +static void calcPredicateUsingInteger( + const BBSetVector &Incoming, const BBSetVector &Outgoing, + SmallVectorImpl<BasicBlock *> &GuardBlocks, BBPredicates &GuardPredicates) { + auto &Context = Incoming.front()->getContext(); + auto FirstGuardBlock = GuardBlocks.front(); + + auto Phi = PHINode::Create(Type::getInt32Ty(Context), Incoming.size(), + "merged.bb.idx", FirstGuardBlock); + + for (auto In : Incoming) { + Value *Condition; + BasicBlock *Succ0; + BasicBlock *Succ1; + std::tie(Condition, Succ0, Succ1) = + redirectToHub(In, FirstGuardBlock, Outgoing); + Value *IncomingId = nullptr; + if (Succ0 && Succ1) { + // target_bb_index = Condition ? index_of_succ0 : index_of_succ1. + auto Succ0Iter = find(Outgoing, Succ0); + auto Succ1Iter = find(Outgoing, Succ1); + Value *Id0 = ConstantInt::get(Type::getInt32Ty(Context), + std::distance(Outgoing.begin(), Succ0Iter)); + Value *Id1 = ConstantInt::get(Type::getInt32Ty(Context), + std::distance(Outgoing.begin(), Succ1Iter)); + IncomingId = SelectInst::Create(Condition, Id0, Id1, "target.bb.idx", + In->getTerminator()); + } else { + // Get the index of the non-null successor. + auto SuccIter = Succ0 ? find(Outgoing, Succ0) : find(Outgoing, Succ1); + IncomingId = ConstantInt::get(Type::getInt32Ty(Context), + std::distance(Outgoing.begin(), SuccIter)); + } + Phi->addIncoming(IncomingId, In); + } + + for (int i = 0, e = Outgoing.size() - 1; i != e; ++i) { + auto Out = Outgoing[i]; + auto Cmp = ICmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_EQ, Phi, + ConstantInt::get(Type::getInt32Ty(Context), i), + Out->getName() + ".predicate", GuardBlocks[i]); + GuardPredicates[Out] = Cmp; + } +} + +/// We record the predicate of each outgoing block using a phi of boolean. +static void calcPredicateUsingBooleans( + const BBSetVector &Incoming, const BBSetVector &Outgoing, + SmallVectorImpl<BasicBlock *> &GuardBlocks, BBPredicates &GuardPredicates, + SmallVectorImpl<WeakVH> &DeletionCandidates) { + auto &Context = Incoming.front()->getContext(); + auto BoolTrue = ConstantInt::getTrue(Context); + auto BoolFalse = ConstantInt::getFalse(Context); + auto FirstGuardBlock = GuardBlocks.front(); + + // The predicate for the last outgoing is trivially true, and so we + // process only the first N-1 successors. + for (int i = 0, e = Outgoing.size() - 1; i != e; ++i) { + auto Out = Outgoing[i]; + LLVM_DEBUG(dbgs() << "Creating guard for " << Out->getName() << "\n"); + + auto Phi = + PHINode::Create(Type::getInt1Ty(Context), Incoming.size(), + StringRef("Guard.") + Out->getName(), FirstGuardBlock); + GuardPredicates[Out] = Phi; + } + + for (auto *In : Incoming) { + Value *Condition; + BasicBlock *Succ0; + BasicBlock *Succ1; + std::tie(Condition, Succ0, Succ1) = + redirectToHub(In, FirstGuardBlock, Outgoing); + + // Optimization: Consider an incoming block A with both successors + // Succ0 and Succ1 in the set of outgoing blocks. The predicates + // for Succ0 and Succ1 complement each other. If Succ0 is visited + // first in the loop below, control will branch to Succ0 using the + // corresponding predicate. But if that branch is not taken, then + // control must reach Succ1, which means that the incoming value of + // the predicate from `In` is true for Succ1. + bool OneSuccessorDone = false; + for (int i = 0, e = Outgoing.size() - 1; i != e; ++i) { + auto Out = Outgoing[i]; + PHINode *Phi = cast<PHINode>(GuardPredicates[Out]); + if (Out != Succ0 && Out != Succ1) { + Phi->addIncoming(BoolFalse, In); + } else if (!Succ0 || !Succ1 || OneSuccessorDone) { + // Optimization: When only one successor is an outgoing block, + // the incoming predicate from `In` is always true. + Phi->addIncoming(BoolTrue, In); + } else { + assert(Succ0 && Succ1); + if (Out == Succ0) { + Phi->addIncoming(Condition, In); + } else { + auto Inverted = invertCondition(Condition); + DeletionCandidates.push_back(Condition); + Phi->addIncoming(Inverted, In); + } + OneSuccessorDone = true; + } + } + } +} + +// Capture the existing control flow as guard predicates, and redirect +// control flow from \p Incoming block through the \p GuardBlocks to the +// \p Outgoing blocks. +// +// There is one guard predicate for each outgoing block OutBB. The +// predicate represents whether the hub should transfer control flow +// to OutBB. These predicates are NOT ORTHOGONAL. The Hub evaluates +// them in the same order as the Outgoing set-vector, and control +// branches to the first outgoing block whose predicate evaluates to true. +static void +convertToGuardPredicates(SmallVectorImpl<BasicBlock *> &GuardBlocks, + SmallVectorImpl<WeakVH> &DeletionCandidates, + const BBSetVector &Incoming, + const BBSetVector &Outgoing, const StringRef Prefix, + std::optional<unsigned> MaxControlFlowBooleans) { + BBPredicates GuardPredicates; + auto F = Incoming.front()->getParent(); + + for (int i = 0, e = Outgoing.size() - 1; i != e; ++i) + GuardBlocks.push_back( + BasicBlock::Create(F->getContext(), Prefix + ".guard", F)); + + // When we are using an integer to record which target block to jump to, we + // are creating less live values, actually we are using one single integer to + // store the index of the target block. When we are using booleans to store + // the branching information, we need (N-1) boolean values, where N is the + // number of outgoing block. + if (!MaxControlFlowBooleans || Outgoing.size() <= *MaxControlFlowBooleans) + calcPredicateUsingBooleans(Incoming, Outgoing, GuardBlocks, GuardPredicates, + DeletionCandidates); + else + calcPredicateUsingInteger(Incoming, Outgoing, GuardBlocks, GuardPredicates); + + setupBranchForGuard(GuardBlocks, Outgoing, GuardPredicates); +} + +BasicBlock *llvm::CreateControlFlowHub( + DomTreeUpdater *DTU, SmallVectorImpl<BasicBlock *> &GuardBlocks, + const BBSetVector &Incoming, const BBSetVector &Outgoing, + const StringRef Prefix, std::optional<unsigned> MaxControlFlowBooleans) { + if (Outgoing.size() < 2) + return Outgoing.front(); + + SmallVector<DominatorTree::UpdateType, 16> Updates; + if (DTU) { + for (auto *In : Incoming) { + for (auto Succ : successors(In)) + if (Outgoing.count(Succ)) + Updates.push_back({DominatorTree::Delete, In, Succ}); + } + } + + SmallVector<WeakVH, 8> DeletionCandidates; + convertToGuardPredicates(GuardBlocks, DeletionCandidates, Incoming, Outgoing, + Prefix, MaxControlFlowBooleans); + auto FirstGuardBlock = GuardBlocks.front(); + + // Update the PHINodes in each outgoing block to match the new control flow. + for (int i = 0, e = GuardBlocks.size(); i != e; ++i) + reconnectPhis(Outgoing[i], GuardBlocks[i], Incoming, FirstGuardBlock); + + reconnectPhis(Outgoing.back(), GuardBlocks.back(), Incoming, FirstGuardBlock); + + if (DTU) { + int NumGuards = GuardBlocks.size(); + assert((int)Outgoing.size() == NumGuards + 1); + + for (auto In : Incoming) + Updates.push_back({DominatorTree::Insert, In, FirstGuardBlock}); + + for (int i = 0; i != NumGuards - 1; ++i) { + Updates.push_back({DominatorTree::Insert, GuardBlocks[i], Outgoing[i]}); + Updates.push_back( + {DominatorTree::Insert, GuardBlocks[i], GuardBlocks[i + 1]}); + } + Updates.push_back({DominatorTree::Insert, GuardBlocks[NumGuards - 1], + Outgoing[NumGuards - 1]}); + Updates.push_back({DominatorTree::Insert, GuardBlocks[NumGuards - 1], + Outgoing[NumGuards]}); + DTU->applyUpdates(Updates); + } + + for (auto I : DeletionCandidates) { + if (I->use_empty()) + if (auto Inst = dyn_cast_or_null<Instruction>(I)) + Inst->eraseFromParent(); + } + + return FirstGuardBlock; +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/BreakCriticalEdges.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/BreakCriticalEdges.cpp new file mode 100644 index 0000000000..ddb3575603 --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -0,0 +1,465 @@ +//===- BreakCriticalEdges.cpp - Critical Edge Elimination Pass ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// BreakCriticalEdges pass - Break all of the critical edges in the CFG by +// inserting a dummy basic block. This pass may be "required" by passes that +// cannot deal with critical edges. For this usage, the structure type is +// forward declared. This pass obviously invalidates the CFG, but can update +// dominator trees. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/BreakCriticalEdges.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/MemorySSAUpdater.h" +#include "llvm/Analysis/PostDominators.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Instructions.h" +#include "llvm/InitializePasses.h" +#include "llvm/Transforms/Utils.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/ValueMapper.h" +using namespace llvm; + +#define DEBUG_TYPE "break-crit-edges" + +STATISTIC(NumBroken, "Number of blocks inserted"); + +namespace { + struct BreakCriticalEdges : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + BreakCriticalEdges() : FunctionPass(ID) { + initializeBreakCriticalEdgesPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override { + auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); + auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; + + auto *PDTWP = getAnalysisIfAvailable<PostDominatorTreeWrapperPass>(); + auto *PDT = PDTWP ? &PDTWP->getPostDomTree() : nullptr; + + auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>(); + auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr; + unsigned N = + SplitAllCriticalEdges(F, CriticalEdgeSplittingOptions(DT, LI, nullptr, PDT)); + NumBroken += N; + return N > 0; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addPreserved<DominatorTreeWrapperPass>(); + AU.addPreserved<LoopInfoWrapperPass>(); + + // No loop canonicalization guarantees are broken by this pass. + AU.addPreservedID(LoopSimplifyID); + } + }; +} + +char BreakCriticalEdges::ID = 0; +INITIALIZE_PASS(BreakCriticalEdges, "break-crit-edges", + "Break critical edges in CFG", false, false) + +// Publicly exposed interface to pass... +char &llvm::BreakCriticalEdgesID = BreakCriticalEdges::ID; +FunctionPass *llvm::createBreakCriticalEdgesPass() { + return new BreakCriticalEdges(); +} + +PreservedAnalyses BreakCriticalEdgesPass::run(Function &F, + FunctionAnalysisManager &AM) { + auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F); + auto *LI = AM.getCachedResult<LoopAnalysis>(F); + unsigned N = SplitAllCriticalEdges(F, CriticalEdgeSplittingOptions(DT, LI)); + NumBroken += N; + if (N == 0) + return PreservedAnalyses::all(); + PreservedAnalyses PA; + PA.preserve<DominatorTreeAnalysis>(); + PA.preserve<LoopAnalysis>(); + return PA; +} + +//===----------------------------------------------------------------------===// +// Implementation of the external critical edge manipulation functions +//===----------------------------------------------------------------------===// + +BasicBlock *llvm::SplitCriticalEdge(Instruction *TI, unsigned SuccNum, + const CriticalEdgeSplittingOptions &Options, + const Twine &BBName) { + if (!isCriticalEdge(TI, SuccNum, Options.MergeIdenticalEdges)) + return nullptr; + + return SplitKnownCriticalEdge(TI, SuccNum, Options, BBName); +} + +BasicBlock * +llvm::SplitKnownCriticalEdge(Instruction *TI, unsigned SuccNum, + const CriticalEdgeSplittingOptions &Options, + const Twine &BBName) { + assert(!isa<IndirectBrInst>(TI) && + "Cannot split critical edge from IndirectBrInst"); + + BasicBlock *TIBB = TI->getParent(); + BasicBlock *DestBB = TI->getSuccessor(SuccNum); + + // Splitting the critical edge to a pad block is non-trivial. Don't do + // it in this generic function. + if (DestBB->isEHPad()) return nullptr; + + if (Options.IgnoreUnreachableDests && + isa<UnreachableInst>(DestBB->getFirstNonPHIOrDbgOrLifetime())) + return nullptr; + + auto *LI = Options.LI; + SmallVector<BasicBlock *, 4> LoopPreds; + // Check if extra modifications will be required to preserve loop-simplify + // form after splitting. If it would require splitting blocks with IndirectBr + // terminators, bail out if preserving loop-simplify form is requested. + if (LI) { + if (Loop *TIL = LI->getLoopFor(TIBB)) { + + // The only way that we can break LoopSimplify form by splitting a + // critical edge is if after the split there exists some edge from TIL to + // DestBB *and* the only edge into DestBB from outside of TIL is that of + // NewBB. If the first isn't true, then LoopSimplify still holds, NewBB + // is the new exit block and it has no non-loop predecessors. If the + // second isn't true, then DestBB was not in LoopSimplify form prior to + // the split as it had a non-loop predecessor. In both of these cases, + // the predecessor must be directly in TIL, not in a subloop, or again + // LoopSimplify doesn't hold. + for (BasicBlock *P : predecessors(DestBB)) { + if (P == TIBB) + continue; // The new block is known. + if (LI->getLoopFor(P) != TIL) { + // No need to re-simplify, it wasn't to start with. + LoopPreds.clear(); + break; + } + LoopPreds.push_back(P); + } + // Loop-simplify form can be preserved, if we can split all in-loop + // predecessors. + if (any_of(LoopPreds, [](BasicBlock *Pred) { + return isa<IndirectBrInst>(Pred->getTerminator()); + })) { + if (Options.PreserveLoopSimplify) + return nullptr; + LoopPreds.clear(); + } + } + } + + // Create a new basic block, linking it into the CFG. + BasicBlock *NewBB = nullptr; + if (BBName.str() != "") + NewBB = BasicBlock::Create(TI->getContext(), BBName); + else + NewBB = BasicBlock::Create(TI->getContext(), TIBB->getName() + "." + + DestBB->getName() + + "_crit_edge"); + // Create our unconditional branch. + BranchInst *NewBI = BranchInst::Create(DestBB, NewBB); + NewBI->setDebugLoc(TI->getDebugLoc()); + + // Insert the block into the function... right after the block TI lives in. + Function &F = *TIBB->getParent(); + Function::iterator FBBI = TIBB->getIterator(); + F.insert(++FBBI, NewBB); + + // Branch to the new block, breaking the edge. + TI->setSuccessor(SuccNum, NewBB); + + // If there are any PHI nodes in DestBB, we need to update them so that they + // merge incoming values from NewBB instead of from TIBB. + { + unsigned BBIdx = 0; + for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) { + // We no longer enter through TIBB, now we come in through NewBB. + // Revector exactly one entry in the PHI node that used to come from + // TIBB to come from NewBB. + PHINode *PN = cast<PHINode>(I); + + // Reuse the previous value of BBIdx if it lines up. In cases where we + // have multiple phi nodes with *lots* of predecessors, this is a speed + // win because we don't have to scan the PHI looking for TIBB. This + // happens because the BB list of PHI nodes are usually in the same + // order. + if (PN->getIncomingBlock(BBIdx) != TIBB) + BBIdx = PN->getBasicBlockIndex(TIBB); + PN->setIncomingBlock(BBIdx, NewBB); + } + } + + // If there are any other edges from TIBB to DestBB, update those to go + // through the split block, making those edges non-critical as well (and + // reducing the number of phi entries in the DestBB if relevant). + if (Options.MergeIdenticalEdges) { + for (unsigned i = SuccNum+1, e = TI->getNumSuccessors(); i != e; ++i) { + if (TI->getSuccessor(i) != DestBB) continue; + + // Remove an entry for TIBB from DestBB phi nodes. + DestBB->removePredecessor(TIBB, Options.KeepOneInputPHIs); + + // We found another edge to DestBB, go to NewBB instead. + TI->setSuccessor(i, NewBB); + } + } + + // If we have nothing to update, just return. + auto *DT = Options.DT; + auto *PDT = Options.PDT; + auto *MSSAU = Options.MSSAU; + if (MSSAU) + MSSAU->wireOldPredecessorsToNewImmediatePredecessor( + DestBB, NewBB, {TIBB}, Options.MergeIdenticalEdges); + + if (!DT && !PDT && !LI) + return NewBB; + + if (DT || PDT) { + // Update the DominatorTree. + // ---> NewBB -----\ + // / V + // TIBB -------\\------> DestBB + // + // First, inform the DT about the new path from TIBB to DestBB via NewBB, + // then delete the old edge from TIBB to DestBB. By doing this in that order + // DestBB stays reachable in the DT the whole time and its subtree doesn't + // get disconnected. + SmallVector<DominatorTree::UpdateType, 3> Updates; + Updates.push_back({DominatorTree::Insert, TIBB, NewBB}); + Updates.push_back({DominatorTree::Insert, NewBB, DestBB}); + if (!llvm::is_contained(successors(TIBB), DestBB)) + Updates.push_back({DominatorTree::Delete, TIBB, DestBB}); + + if (DT) + DT->applyUpdates(Updates); + if (PDT) + PDT->applyUpdates(Updates); + } + + // Update LoopInfo if it is around. + if (LI) { + if (Loop *TIL = LI->getLoopFor(TIBB)) { + // If one or the other blocks were not in a loop, the new block is not + // either, and thus LI doesn't need to be updated. + if (Loop *DestLoop = LI->getLoopFor(DestBB)) { + if (TIL == DestLoop) { + // Both in the same loop, the NewBB joins loop. + DestLoop->addBasicBlockToLoop(NewBB, *LI); + } else if (TIL->contains(DestLoop)) { + // Edge from an outer loop to an inner loop. Add to the outer loop. + TIL->addBasicBlockToLoop(NewBB, *LI); + } else if (DestLoop->contains(TIL)) { + // Edge from an inner loop to an outer loop. Add to the outer loop. + DestLoop->addBasicBlockToLoop(NewBB, *LI); + } else { + // Edge from two loops with no containment relation. Because these + // are natural loops, we know that the destination block must be the + // header of its loop (adding a branch into a loop elsewhere would + // create an irreducible loop). + assert(DestLoop->getHeader() == DestBB && + "Should not create irreducible loops!"); + if (Loop *P = DestLoop->getParentLoop()) + P->addBasicBlockToLoop(NewBB, *LI); + } + } + + // If TIBB is in a loop and DestBB is outside of that loop, we may need + // to update LoopSimplify form and LCSSA form. + if (!TIL->contains(DestBB)) { + assert(!TIL->contains(NewBB) && + "Split point for loop exit is contained in loop!"); + + // Update LCSSA form in the newly created exit block. + if (Options.PreserveLCSSA) { + createPHIsForSplitLoopExit(TIBB, NewBB, DestBB); + } + + if (!LoopPreds.empty()) { + assert(!DestBB->isEHPad() && "We don't split edges to EH pads!"); + BasicBlock *NewExitBB = SplitBlockPredecessors( + DestBB, LoopPreds, "split", DT, LI, MSSAU, Options.PreserveLCSSA); + if (Options.PreserveLCSSA) + createPHIsForSplitLoopExit(LoopPreds, NewExitBB, DestBB); + } + } + } + } + + return NewBB; +} + +// Return the unique indirectbr predecessor of a block. This may return null +// even if such a predecessor exists, if it's not useful for splitting. +// If a predecessor is found, OtherPreds will contain all other (non-indirectbr) +// predecessors of BB. +static BasicBlock * +findIBRPredecessor(BasicBlock *BB, SmallVectorImpl<BasicBlock *> &OtherPreds) { + // Verify we have exactly one IBR predecessor. + // Conservatively bail out if one of the other predecessors is not a "regular" + // terminator (that is, not a switch or a br). + BasicBlock *IBB = nullptr; + for (BasicBlock *PredBB : predecessors(BB)) { + Instruction *PredTerm = PredBB->getTerminator(); + switch (PredTerm->getOpcode()) { + case Instruction::IndirectBr: + if (IBB) + return nullptr; + IBB = PredBB; + break; + case Instruction::Br: + case Instruction::Switch: + OtherPreds.push_back(PredBB); + continue; + default: + return nullptr; + } + } + + return IBB; +} + +bool llvm::SplitIndirectBrCriticalEdges(Function &F, + bool IgnoreBlocksWithoutPHI, + BranchProbabilityInfo *BPI, + BlockFrequencyInfo *BFI) { + // Check whether the function has any indirectbrs, and collect which blocks + // they may jump to. Since most functions don't have indirect branches, + // this lowers the common case's overhead to O(Blocks) instead of O(Edges). + SmallSetVector<BasicBlock *, 16> Targets; + for (auto &BB : F) { + auto *IBI = dyn_cast<IndirectBrInst>(BB.getTerminator()); + if (!IBI) + continue; + + for (unsigned Succ = 0, E = IBI->getNumSuccessors(); Succ != E; ++Succ) + Targets.insert(IBI->getSuccessor(Succ)); + } + + if (Targets.empty()) + return false; + + bool ShouldUpdateAnalysis = BPI && BFI; + bool Changed = false; + for (BasicBlock *Target : Targets) { + if (IgnoreBlocksWithoutPHI && Target->phis().empty()) + continue; + + SmallVector<BasicBlock *, 16> OtherPreds; + BasicBlock *IBRPred = findIBRPredecessor(Target, OtherPreds); + // If we did not found an indirectbr, or the indirectbr is the only + // incoming edge, this isn't the kind of edge we're looking for. + if (!IBRPred || OtherPreds.empty()) + continue; + + // Don't even think about ehpads/landingpads. + Instruction *FirstNonPHI = Target->getFirstNonPHI(); + if (FirstNonPHI->isEHPad() || Target->isLandingPad()) + continue; + + // Remember edge probabilities if needed. + SmallVector<BranchProbability, 4> EdgeProbabilities; + if (ShouldUpdateAnalysis) { + EdgeProbabilities.reserve(Target->getTerminator()->getNumSuccessors()); + for (unsigned I = 0, E = Target->getTerminator()->getNumSuccessors(); + I < E; ++I) + EdgeProbabilities.emplace_back(BPI->getEdgeProbability(Target, I)); + BPI->eraseBlock(Target); + } + + BasicBlock *BodyBlock = Target->splitBasicBlock(FirstNonPHI, ".split"); + if (ShouldUpdateAnalysis) { + // Copy the BFI/BPI from Target to BodyBlock. + BPI->setEdgeProbability(BodyBlock, EdgeProbabilities); + BFI->setBlockFreq(BodyBlock, BFI->getBlockFreq(Target).getFrequency()); + } + // It's possible Target was its own successor through an indirectbr. + // In this case, the indirectbr now comes from BodyBlock. + if (IBRPred == Target) + IBRPred = BodyBlock; + + // At this point Target only has PHIs, and BodyBlock has the rest of the + // block's body. Create a copy of Target that will be used by the "direct" + // preds. + ValueToValueMapTy VMap; + BasicBlock *DirectSucc = CloneBasicBlock(Target, VMap, ".clone", &F); + + BlockFrequency BlockFreqForDirectSucc; + for (BasicBlock *Pred : OtherPreds) { + // If the target is a loop to itself, then the terminator of the split + // block (BodyBlock) needs to be updated. + BasicBlock *Src = Pred != Target ? Pred : BodyBlock; + Src->getTerminator()->replaceUsesOfWith(Target, DirectSucc); + if (ShouldUpdateAnalysis) + BlockFreqForDirectSucc += BFI->getBlockFreq(Src) * + BPI->getEdgeProbability(Src, DirectSucc); + } + if (ShouldUpdateAnalysis) { + BFI->setBlockFreq(DirectSucc, BlockFreqForDirectSucc.getFrequency()); + BlockFrequency NewBlockFreqForTarget = + BFI->getBlockFreq(Target) - BlockFreqForDirectSucc; + BFI->setBlockFreq(Target, NewBlockFreqForTarget.getFrequency()); + } + + // Ok, now fix up the PHIs. We know the two blocks only have PHIs, and that + // they are clones, so the number of PHIs are the same. + // (a) Remove the edge coming from IBRPred from the "Direct" PHI + // (b) Leave that as the only edge in the "Indirect" PHI. + // (c) Merge the two in the body block. + BasicBlock::iterator Indirect = Target->begin(), + End = Target->getFirstNonPHI()->getIterator(); + BasicBlock::iterator Direct = DirectSucc->begin(); + BasicBlock::iterator MergeInsert = BodyBlock->getFirstInsertionPt(); + + assert(&*End == Target->getTerminator() && + "Block was expected to only contain PHIs"); + + while (Indirect != End) { + PHINode *DirPHI = cast<PHINode>(Direct); + PHINode *IndPHI = cast<PHINode>(Indirect); + + // Now, clean up - the direct block shouldn't get the indirect value, + // and vice versa. + DirPHI->removeIncomingValue(IBRPred); + Direct++; + + // Advance the pointer here, to avoid invalidation issues when the old + // PHI is erased. + Indirect++; + + PHINode *NewIndPHI = PHINode::Create(IndPHI->getType(), 1, "ind", IndPHI); + NewIndPHI->addIncoming(IndPHI->getIncomingValueForBlock(IBRPred), + IBRPred); + + // Create a PHI in the body block, to merge the direct and indirect + // predecessors. + PHINode *MergePHI = + PHINode::Create(IndPHI->getType(), 2, "merge", &*MergeInsert); + MergePHI->addIncoming(NewIndPHI, Target); + MergePHI->addIncoming(DirPHI, DirectSucc); + + IndPHI->replaceAllUsesWith(MergePHI); + IndPHI->eraseFromParent(); + } + + Changed = true; + } + + return Changed; +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/BuildLibCalls.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/BuildLibCalls.cpp new file mode 100644 index 0000000000..1e21a2f854 --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/BuildLibCalls.cpp @@ -0,0 +1,1939 @@ +//===- BuildLibCalls.cpp - Utility builder for libcalls -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements some functions that will create standard C libcalls. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/BuildLibCalls.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/Support/TypeSize.h" +#include <optional> + +using namespace llvm; + +#define DEBUG_TYPE "build-libcalls" + +//- Infer Attributes ---------------------------------------------------------// + +STATISTIC(NumReadNone, "Number of functions inferred as readnone"); +STATISTIC(NumInaccessibleMemOnly, + "Number of functions inferred as inaccessiblememonly"); +STATISTIC(NumReadOnly, "Number of functions inferred as readonly"); +STATISTIC(NumWriteOnly, "Number of functions inferred as writeonly"); +STATISTIC(NumArgMemOnly, "Number of functions inferred as argmemonly"); +STATISTIC(NumInaccessibleMemOrArgMemOnly, + "Number of functions inferred as inaccessiblemem_or_argmemonly"); +STATISTIC(NumNoUnwind, "Number of functions inferred as nounwind"); +STATISTIC(NumNoCapture, "Number of arguments inferred as nocapture"); +STATISTIC(NumWriteOnlyArg, "Number of arguments inferred as writeonly"); +STATISTIC(NumReadOnlyArg, "Number of arguments inferred as readonly"); +STATISTIC(NumNoAlias, "Number of function returns inferred as noalias"); +STATISTIC(NumNoUndef, "Number of function returns inferred as noundef returns"); +STATISTIC(NumReturnedArg, "Number of arguments inferred as returned"); +STATISTIC(NumWillReturn, "Number of functions inferred as willreturn"); + +static bool setDoesNotAccessMemory(Function &F) { + if (F.doesNotAccessMemory()) + return false; + F.setDoesNotAccessMemory(); + ++NumReadNone; + return true; +} + +static bool setOnlyAccessesInaccessibleMemory(Function &F) { + if (F.onlyAccessesInaccessibleMemory()) + return false; + F.setOnlyAccessesInaccessibleMemory(); + ++NumInaccessibleMemOnly; + return true; +} + +static bool setOnlyReadsMemory(Function &F) { + if (F.onlyReadsMemory()) + return false; + F.setOnlyReadsMemory(); + ++NumReadOnly; + return true; +} + +static bool setOnlyWritesMemory(Function &F) { + if (F.onlyWritesMemory()) // writeonly or readnone + return false; + ++NumWriteOnly; + F.setOnlyWritesMemory(); + return true; +} + +static bool setOnlyAccessesArgMemory(Function &F) { + if (F.onlyAccessesArgMemory()) + return false; + F.setOnlyAccessesArgMemory(); + ++NumArgMemOnly; + return true; +} + +static bool setOnlyAccessesInaccessibleMemOrArgMem(Function &F) { + if (F.onlyAccessesInaccessibleMemOrArgMem()) + return false; + F.setOnlyAccessesInaccessibleMemOrArgMem(); + ++NumInaccessibleMemOrArgMemOnly; + return true; +} + +static bool setDoesNotThrow(Function &F) { + if (F.doesNotThrow()) + return false; + F.setDoesNotThrow(); + ++NumNoUnwind; + return true; +} + +static bool setRetDoesNotAlias(Function &F) { + if (F.hasRetAttribute(Attribute::NoAlias)) + return false; + F.addRetAttr(Attribute::NoAlias); + ++NumNoAlias; + return true; +} + +static bool setDoesNotCapture(Function &F, unsigned ArgNo) { + if (F.hasParamAttribute(ArgNo, Attribute::NoCapture)) + return false; + F.addParamAttr(ArgNo, Attribute::NoCapture); + ++NumNoCapture; + return true; +} + +static bool setDoesNotAlias(Function &F, unsigned ArgNo) { + if (F.hasParamAttribute(ArgNo, Attribute::NoAlias)) + return false; + F.addParamAttr(ArgNo, Attribute::NoAlias); + ++NumNoAlias; + return true; +} + +static bool setOnlyReadsMemory(Function &F, unsigned ArgNo) { + if (F.hasParamAttribute(ArgNo, Attribute::ReadOnly)) + return false; + F.addParamAttr(ArgNo, Attribute::ReadOnly); + ++NumReadOnlyArg; + return true; +} + +static bool setOnlyWritesMemory(Function &F, unsigned ArgNo) { + if (F.hasParamAttribute(ArgNo, Attribute::WriteOnly)) + return false; + F.addParamAttr(ArgNo, Attribute::WriteOnly); + ++NumWriteOnlyArg; + return true; +} + +static bool setRetNoUndef(Function &F) { + if (!F.getReturnType()->isVoidTy() && + !F.hasRetAttribute(Attribute::NoUndef)) { + F.addRetAttr(Attribute::NoUndef); + ++NumNoUndef; + return true; + } + return false; +} + +static bool setArgsNoUndef(Function &F) { + bool Changed = false; + for (unsigned ArgNo = 0; ArgNo < F.arg_size(); ++ArgNo) { + if (!F.hasParamAttribute(ArgNo, Attribute::NoUndef)) { + F.addParamAttr(ArgNo, Attribute::NoUndef); + ++NumNoUndef; + Changed = true; + } + } + return Changed; +} + +static bool setArgNoUndef(Function &F, unsigned ArgNo) { + if (F.hasParamAttribute(ArgNo, Attribute::NoUndef)) + return false; + F.addParamAttr(ArgNo, Attribute::NoUndef); + ++NumNoUndef; + return true; +} + +static bool setRetAndArgsNoUndef(Function &F) { + bool UndefAdded = false; + UndefAdded |= setRetNoUndef(F); + UndefAdded |= setArgsNoUndef(F); + return UndefAdded; +} + +static bool setReturnedArg(Function &F, unsigned ArgNo) { + if (F.hasParamAttribute(ArgNo, Attribute::Returned)) + return false; + F.addParamAttr(ArgNo, Attribute::Returned); + ++NumReturnedArg; + return true; +} + +static bool setNonLazyBind(Function &F) { + if (F.hasFnAttribute(Attribute::NonLazyBind)) + return false; + F.addFnAttr(Attribute::NonLazyBind); + return true; +} + +static bool setDoesNotFreeMemory(Function &F) { + if (F.hasFnAttribute(Attribute::NoFree)) + return false; + F.addFnAttr(Attribute::NoFree); + return true; +} + +static bool setWillReturn(Function &F) { + if (F.hasFnAttribute(Attribute::WillReturn)) + return false; + F.addFnAttr(Attribute::WillReturn); + ++NumWillReturn; + return true; +} + +static bool setAlignedAllocParam(Function &F, unsigned ArgNo) { + if (F.hasParamAttribute(ArgNo, Attribute::AllocAlign)) + return false; + F.addParamAttr(ArgNo, Attribute::AllocAlign); + return true; +} + +static bool setAllocatedPointerParam(Function &F, unsigned ArgNo) { + if (F.hasParamAttribute(ArgNo, Attribute::AllocatedPointer)) + return false; + F.addParamAttr(ArgNo, Attribute::AllocatedPointer); + return true; +} + +static bool setAllocSize(Function &F, unsigned ElemSizeArg, + std::optional<unsigned> NumElemsArg) { + if (F.hasFnAttribute(Attribute::AllocSize)) + return false; + F.addFnAttr(Attribute::getWithAllocSizeArgs(F.getContext(), ElemSizeArg, + NumElemsArg)); + return true; +} + +static bool setAllocFamily(Function &F, StringRef Family) { + if (F.hasFnAttribute("alloc-family")) + return false; + F.addFnAttr("alloc-family", Family); + return true; +} + +static bool setAllocKind(Function &F, AllocFnKind K) { + if (F.hasFnAttribute(Attribute::AllocKind)) + return false; + F.addFnAttr( + Attribute::get(F.getContext(), Attribute::AllocKind, uint64_t(K))); + return true; +} + +bool llvm::inferNonMandatoryLibFuncAttrs(Module *M, StringRef Name, + const TargetLibraryInfo &TLI) { + Function *F = M->getFunction(Name); + if (!F) + return false; + return inferNonMandatoryLibFuncAttrs(*F, TLI); +} + +bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, + const TargetLibraryInfo &TLI) { + LibFunc TheLibFunc; + if (!(TLI.getLibFunc(F, TheLibFunc) && TLI.has(TheLibFunc))) + return false; + + bool Changed = false; + + if (F.getParent() != nullptr && F.getParent()->getRtLibUseGOT()) + Changed |= setNonLazyBind(F); + + switch (TheLibFunc) { + case LibFunc_strlen: + case LibFunc_strnlen: + case LibFunc_wcslen: + Changed |= setOnlyReadsMemory(F); + Changed |= setDoesNotThrow(F); + Changed |= setOnlyAccessesArgMemory(F); + Changed |= setWillReturn(F); + Changed |= setDoesNotCapture(F, 0); + break; + case LibFunc_strchr: + case LibFunc_strrchr: + Changed |= setOnlyAccessesArgMemory(F); + Changed |= setOnlyReadsMemory(F); + Changed |= setDoesNotThrow(F); + Changed |= setWillReturn(F); + break; + case LibFunc_strtol: + case LibFunc_strtod: + case LibFunc_strtof: + case LibFunc_strtoul: + case LibFunc_strtoll: + case LibFunc_strtold: + case LibFunc_strtoull: + Changed |= setDoesNotThrow(F); + Changed |= setWillReturn(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 0); + break; + case LibFunc_strcat: + case LibFunc_strncat: + Changed |= setOnlyAccessesArgMemory(F); + Changed |= setDoesNotThrow(F); + Changed |= setWillReturn(F); + Changed |= setReturnedArg(F, 0); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotAlias(F, 0); + Changed |= setDoesNotAlias(F, 1); + break; + case LibFunc_strcpy: + case LibFunc_strncpy: + Changed |= setReturnedArg(F, 0); + [[fallthrough]]; + case LibFunc_stpcpy: + case LibFunc_stpncpy: + Changed |= setOnlyAccessesArgMemory(F); + Changed |= setDoesNotThrow(F); + Changed |= setWillReturn(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyWritesMemory(F, 0); + Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotAlias(F, 0); + Changed |= setDoesNotAlias(F, 1); + break; + case LibFunc_strxfrm: + Changed |= setDoesNotThrow(F); + Changed |= setWillReturn(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + break; + case LibFunc_strcmp: // 0,1 + case LibFunc_strspn: // 0,1 + case LibFunc_strncmp: // 0,1 + case LibFunc_strcspn: // 0,1 + Changed |= setDoesNotThrow(F); + Changed |= setOnlyAccessesArgMemory(F); + Changed |= setWillReturn(F); + Changed |= setOnlyReadsMemory(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setDoesNotCapture(F, 1); + break; + case LibFunc_strcoll: + case LibFunc_strcasecmp: // 0,1 + case LibFunc_strncasecmp: // + // Those functions may depend on the locale, which may be accessed through + // global memory. + Changed |= setOnlyReadsMemory(F); + Changed |= setDoesNotThrow(F); + Changed |= setWillReturn(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setDoesNotCapture(F, 1); + break; + case LibFunc_strstr: + case LibFunc_strpbrk: + Changed |= setOnlyAccessesArgMemory(F); + Changed |= setOnlyReadsMemory(F); + Changed |= setDoesNotThrow(F); + Changed |= setWillReturn(F); + Changed |= setDoesNotCapture(F, 1); + break; + case LibFunc_strtok: + case LibFunc_strtok_r: + Changed |= setDoesNotThrow(F); + Changed |= setWillReturn(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + break; + case LibFunc_scanf: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); + break; + case LibFunc_setbuf: + case LibFunc_setvbuf: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); + break; + case LibFunc_strndup: + Changed |= setArgNoUndef(F, 1); + [[fallthrough]]; + case LibFunc_strdup: + Changed |= setAllocFamily(F, "malloc"); + Changed |= setOnlyAccessesInaccessibleMemOrArgMem(F); + Changed |= setDoesNotThrow(F); + Changed |= setRetDoesNotAlias(F); + Changed |= setWillReturn(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); + break; + case LibFunc_stat: + case LibFunc_statvfs: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 0); + break; + case LibFunc_sscanf: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 0); + Changed |= setOnlyReadsMemory(F, 1); + break; + case LibFunc_sprintf: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setDoesNotAlias(F, 0); + Changed |= setOnlyWritesMemory(F, 0); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + break; + case LibFunc_snprintf: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setDoesNotAlias(F, 0); + Changed |= setOnlyWritesMemory(F, 0); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + break; + case LibFunc_setitimer: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setWillReturn(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + break; + case LibFunc_system: + // May throw; "system" is a valid pthread cancellation point. + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); + break; + case LibFunc_aligned_alloc: + Changed |= setAlignedAllocParam(F, 0); + Changed |= setAllocSize(F, 1, std::nullopt); + Changed |= setAllocKind(F, AllocFnKind::Alloc | AllocFnKind::Uninitialized | AllocFnKind::Aligned); + [[fallthrough]]; + case LibFunc_valloc: + case LibFunc_malloc: + case LibFunc_vec_malloc: + Changed |= setAllocFamily(F, TheLibFunc == LibFunc_vec_malloc ? "vec_malloc" + : "malloc"); + Changed |= setAllocKind(F, AllocFnKind::Alloc | AllocFnKind::Uninitialized); + Changed |= setAllocSize(F, 0, std::nullopt); + Changed |= setOnlyAccessesInaccessibleMemory(F); + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setRetDoesNotAlias(F); + Changed |= setWillReturn(F); + break; + case LibFunc_memcmp: + Changed |= setOnlyAccessesArgMemory(F); + Changed |= setOnlyReadsMemory(F); + Changed |= setDoesNotThrow(F); + Changed |= setWillReturn(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setDoesNotCapture(F, 1); + break; + case LibFunc_memchr: + case LibFunc_memrchr: + Changed |= setDoesNotThrow(F); + Changed |= setOnlyAccessesArgMemory(F); + Changed |= setOnlyReadsMemory(F); + Changed |= setWillReturn(F); + break; + case LibFunc_modf: + case LibFunc_modff: + case LibFunc_modfl: + Changed |= setDoesNotThrow(F); + Changed |= setWillReturn(F); + Changed |= setDoesNotCapture(F, 1); + break; + case LibFunc_memcpy: + Changed |= setDoesNotThrow(F); + Changed |= setOnlyAccessesArgMemory(F); + Changed |= setWillReturn(F); + Changed |= setDoesNotAlias(F, 0); + Changed |= setReturnedArg(F, 0); + Changed |= setOnlyWritesMemory(F, 0); + Changed |= setDoesNotAlias(F, 1); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + break; + case LibFunc_memmove: + Changed |= setDoesNotThrow(F); + Changed |= setOnlyAccessesArgMemory(F); + Changed |= setWillReturn(F); + Changed |= setReturnedArg(F, 0); + Changed |= setOnlyWritesMemory(F, 0); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + break; + case LibFunc_mempcpy: + case LibFunc_memccpy: + Changed |= setWillReturn(F); + [[fallthrough]]; + case LibFunc_memcpy_chk: + Changed |= setDoesNotThrow(F); + Changed |= setOnlyAccessesArgMemory(F); + Changed |= setDoesNotAlias(F, 0); + Changed |= setOnlyWritesMemory(F, 0); + Changed |= setDoesNotAlias(F, 1); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + break; + case LibFunc_memalign: + Changed |= setAllocFamily(F, "malloc"); + Changed |= setAllocKind(F, AllocFnKind::Alloc | AllocFnKind::Aligned | + AllocFnKind::Uninitialized); + Changed |= setAllocSize(F, 1, std::nullopt); + Changed |= setAlignedAllocParam(F, 0); + Changed |= setOnlyAccessesInaccessibleMemory(F); + Changed |= setRetNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setRetDoesNotAlias(F); + Changed |= setWillReturn(F); + break; + case LibFunc_mkdir: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); + break; + case LibFunc_mktime: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setWillReturn(F); + Changed |= setDoesNotCapture(F, 0); + break; + case LibFunc_realloc: + case LibFunc_reallocf: + case LibFunc_vec_realloc: + Changed |= setAllocFamily( + F, TheLibFunc == LibFunc_vec_realloc ? "vec_malloc" : "malloc"); + Changed |= setAllocKind(F, AllocFnKind::Realloc); + Changed |= setAllocatedPointerParam(F, 0); + Changed |= setAllocSize(F, 1, std::nullopt); + Changed |= setOnlyAccessesInaccessibleMemOrArgMem(F); + Changed |= setRetNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setRetDoesNotAlias(F); + Changed |= setWillReturn(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setArgNoUndef(F, 1); + break; + case LibFunc_read: + // May throw; "read" is a valid pthread cancellation point. + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotCapture(F, 1); + break; + case LibFunc_rewind: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); + break; + case LibFunc_rmdir: + case LibFunc_remove: + case LibFunc_realpath: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); + break; + case LibFunc_rename: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 0); + Changed |= setOnlyReadsMemory(F, 1); + break; + case LibFunc_readlink: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 0); + break; + case LibFunc_write: + // May throw; "write" is a valid pthread cancellation point. + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + break; + case LibFunc_bcopy: + Changed |= setDoesNotThrow(F); + Changed |= setOnlyAccessesArgMemory(F); + Changed |= setWillReturn(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); + Changed |= setOnlyWritesMemory(F, 1); + Changed |= setDoesNotCapture(F, 1); + break; + case LibFunc_bcmp: + Changed |= setDoesNotThrow(F); + Changed |= setOnlyAccessesArgMemory(F); + Changed |= setOnlyReadsMemory(F); + Changed |= setWillReturn(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setDoesNotCapture(F, 1); + break; + case LibFunc_bzero: + Changed |= setDoesNotThrow(F); + Changed |= setOnlyAccessesArgMemory(F); + Changed |= setWillReturn(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyWritesMemory(F, 0); + break; + case LibFunc_calloc: + case LibFunc_vec_calloc: + Changed |= setAllocFamily(F, TheLibFunc == LibFunc_vec_calloc ? "vec_malloc" + : "malloc"); + Changed |= setAllocKind(F, AllocFnKind::Alloc | AllocFnKind::Zeroed); + Changed |= setAllocSize(F, 0, 1); + Changed |= setOnlyAccessesInaccessibleMemory(F); + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setRetDoesNotAlias(F); + Changed |= setWillReturn(F); + break; + case LibFunc_chmod: + case LibFunc_chown: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); + break; + case LibFunc_ctermid: + case LibFunc_clearerr: + case LibFunc_closedir: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); + break; + case LibFunc_atoi: + case LibFunc_atol: + case LibFunc_atof: + case LibFunc_atoll: + Changed |= setDoesNotThrow(F); + Changed |= setOnlyReadsMemory(F); + Changed |= setWillReturn(F); + Changed |= setDoesNotCapture(F, 0); + break; + case LibFunc_access: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); + break; + case LibFunc_fopen: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setRetDoesNotAlias(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 0); + Changed |= setOnlyReadsMemory(F, 1); + break; + case LibFunc_fdopen: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setRetDoesNotAlias(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + break; + case LibFunc_feof: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); + break; + case LibFunc_free: + case LibFunc_vec_free: + Changed |= setAllocFamily(F, TheLibFunc == LibFunc_vec_free ? "vec_malloc" + : "malloc"); + Changed |= setAllocKind(F, AllocFnKind::Free); + Changed |= setAllocatedPointerParam(F, 0); + Changed |= setOnlyAccessesInaccessibleMemOrArgMem(F); + Changed |= setArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setWillReturn(F); + Changed |= setDoesNotCapture(F, 0); + break; + case LibFunc_fseek: + case LibFunc_ftell: + case LibFunc_fgetc: + case LibFunc_fgetc_unlocked: + case LibFunc_fseeko: + case LibFunc_ftello: + case LibFunc_fileno: + case LibFunc_fflush: + case LibFunc_fclose: + case LibFunc_fsetpos: + case LibFunc_flockfile: + case LibFunc_funlockfile: + case LibFunc_ftrylockfile: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); + break; + case LibFunc_ferror: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F); + break; + case LibFunc_fputc: + case LibFunc_fputc_unlocked: + case LibFunc_fstat: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + break; + case LibFunc_frexp: + case LibFunc_frexpf: + case LibFunc_frexpl: + Changed |= setDoesNotThrow(F); + Changed |= setWillReturn(F); + Changed |= setDoesNotCapture(F, 1); + break; + case LibFunc_fstatvfs: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + break; + case LibFunc_fgets: + case LibFunc_fgets_unlocked: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + break; + case LibFunc_fread: + case LibFunc_fread_unlocked: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setDoesNotCapture(F, 3); + break; + case LibFunc_fwrite: + case LibFunc_fwrite_unlocked: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setDoesNotCapture(F, 3); + // FIXME: readonly #1? + break; + case LibFunc_fputs: + case LibFunc_fputs_unlocked: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 0); + break; + case LibFunc_fscanf: + case LibFunc_fprintf: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + break; + case LibFunc_fgetpos: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setDoesNotCapture(F, 1); + break; + case LibFunc_getc: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); + break; + case LibFunc_getlogin_r: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); + break; + case LibFunc_getc_unlocked: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); + break; + case LibFunc_getenv: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setOnlyReadsMemory(F); + Changed |= setDoesNotCapture(F, 0); + break; + case LibFunc_gets: + case LibFunc_getchar: + case LibFunc_getchar_unlocked: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + break; + case LibFunc_getitimer: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + break; + case LibFunc_getpwnam: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); + break; + case LibFunc_ungetc: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + break; + case LibFunc_uname: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); + break; + case LibFunc_unlink: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); + break; + case LibFunc_unsetenv: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); + break; + case LibFunc_utime: + case LibFunc_utimes: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 0); + Changed |= setOnlyReadsMemory(F, 1); + break; + case LibFunc_putc: + case LibFunc_putc_unlocked: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + break; + case LibFunc_puts: + case LibFunc_printf: + case LibFunc_perror: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); + break; + case LibFunc_pread: + // May throw; "pread" is a valid pthread cancellation point. + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotCapture(F, 1); + break; + case LibFunc_pwrite: + // May throw; "pwrite" is a valid pthread cancellation point. + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + break; + case LibFunc_putchar: + case LibFunc_putchar_unlocked: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + break; + case LibFunc_popen: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setRetDoesNotAlias(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 0); + Changed |= setOnlyReadsMemory(F, 1); + break; + case LibFunc_pclose: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); + break; + case LibFunc_vscanf: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); + break; + case LibFunc_vsscanf: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 0); + Changed |= setOnlyReadsMemory(F, 1); + break; + case LibFunc_vfscanf: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + break; + case LibFunc_vprintf: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); + break; + case LibFunc_vfprintf: + case LibFunc_vsprintf: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + break; + case LibFunc_vsnprintf: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + break; + case LibFunc_open: + // May throw; "open" is a valid pthread cancellation point. + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); + break; + case LibFunc_opendir: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setRetDoesNotAlias(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); + break; + case LibFunc_tmpfile: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setRetDoesNotAlias(F); + break; + case LibFunc_times: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); + break; + case LibFunc_htonl: + case LibFunc_htons: + case LibFunc_ntohl: + case LibFunc_ntohs: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAccessMemory(F); + break; + case LibFunc_lstat: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 0); + break; + case LibFunc_lchown: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); + break; + case LibFunc_qsort: + // May throw; places call through function pointer. + // Cannot give undef pointer/size + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotCapture(F, 3); + break; + case LibFunc_dunder_strndup: + Changed |= setArgNoUndef(F, 1); + [[fallthrough]]; + case LibFunc_dunder_strdup: + Changed |= setDoesNotThrow(F); + Changed |= setRetDoesNotAlias(F); + Changed |= setWillReturn(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); + break; + case LibFunc_dunder_strtok_r: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + break; + case LibFunc_under_IO_getc: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); + break; + case LibFunc_under_IO_putc: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + break; + case LibFunc_dunder_isoc99_scanf: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); + break; + case LibFunc_stat64: + case LibFunc_lstat64: + case LibFunc_statvfs64: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 0); + break; + case LibFunc_dunder_isoc99_sscanf: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 0); + Changed |= setOnlyReadsMemory(F, 1); + break; + case LibFunc_fopen64: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setRetDoesNotAlias(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 0); + Changed |= setOnlyReadsMemory(F, 1); + break; + case LibFunc_fseeko64: + case LibFunc_ftello64: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); + break; + case LibFunc_tmpfile64: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setRetDoesNotAlias(F); + break; + case LibFunc_fstat64: + case LibFunc_fstatvfs64: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + break; + case LibFunc_open64: + // May throw; "open" is a valid pthread cancellation point. + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); + break; + case LibFunc_gettimeofday: + // Currently some platforms have the restrict keyword on the arguments to + // gettimeofday. To be conservative, do not add noalias to gettimeofday's + // arguments. + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setDoesNotCapture(F, 1); + break; + case LibFunc_memset_pattern4: + case LibFunc_memset_pattern8: + case LibFunc_memset_pattern16: + Changed |= setDoesNotCapture(F, 0); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + [[fallthrough]]; + case LibFunc_memset: + Changed |= setWillReturn(F); + [[fallthrough]]; + case LibFunc_memset_chk: + Changed |= setOnlyAccessesArgMemory(F); + Changed |= setOnlyWritesMemory(F, 0); + Changed |= setDoesNotThrow(F); + break; + // int __nvvm_reflect(const char *) + case LibFunc_nvvm_reflect: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotAccessMemory(F); + Changed |= setDoesNotThrow(F); + break; + case LibFunc_ldexp: + case LibFunc_ldexpf: + case LibFunc_ldexpl: + Changed |= setWillReturn(F); + break; + case LibFunc_abs: + case LibFunc_acos: + case LibFunc_acosf: + case LibFunc_acosh: + case LibFunc_acoshf: + case LibFunc_acoshl: + case LibFunc_acosl: + case LibFunc_asin: + case LibFunc_asinf: + case LibFunc_asinh: + case LibFunc_asinhf: + case LibFunc_asinhl: + case LibFunc_asinl: + case LibFunc_atan: + case LibFunc_atan2: + case LibFunc_atan2f: + case LibFunc_atan2l: + case LibFunc_atanf: + case LibFunc_atanh: + case LibFunc_atanhf: + case LibFunc_atanhl: + case LibFunc_atanl: + case LibFunc_cbrt: + case LibFunc_cbrtf: + case LibFunc_cbrtl: + case LibFunc_ceil: + case LibFunc_ceilf: + case LibFunc_ceill: + case LibFunc_copysign: + case LibFunc_copysignf: + case LibFunc_copysignl: + case LibFunc_cos: + case LibFunc_cosh: + case LibFunc_coshf: + case LibFunc_coshl: + case LibFunc_cosf: + case LibFunc_cosl: + case LibFunc_cospi: + case LibFunc_cospif: + case LibFunc_exp: + case LibFunc_expf: + case LibFunc_expl: + case LibFunc_exp2: + case LibFunc_exp2f: + case LibFunc_exp2l: + case LibFunc_expm1: + case LibFunc_expm1f: + case LibFunc_expm1l: + case LibFunc_fabs: + case LibFunc_fabsf: + case LibFunc_fabsl: + case LibFunc_ffs: + case LibFunc_ffsl: + case LibFunc_ffsll: + case LibFunc_floor: + case LibFunc_floorf: + case LibFunc_floorl: + case LibFunc_fls: + case LibFunc_flsl: + case LibFunc_flsll: + case LibFunc_fmax: + case LibFunc_fmaxf: + case LibFunc_fmaxl: + case LibFunc_fmin: + case LibFunc_fminf: + case LibFunc_fminl: + case LibFunc_fmod: + case LibFunc_fmodf: + case LibFunc_fmodl: + case LibFunc_isascii: + case LibFunc_isdigit: + case LibFunc_labs: + case LibFunc_llabs: + case LibFunc_log: + case LibFunc_log10: + case LibFunc_log10f: + case LibFunc_log10l: + case LibFunc_log1p: + case LibFunc_log1pf: + case LibFunc_log1pl: + case LibFunc_log2: + case LibFunc_log2f: + case LibFunc_log2l: + case LibFunc_logb: + case LibFunc_logbf: + case LibFunc_logbl: + case LibFunc_logf: + case LibFunc_logl: + case LibFunc_nearbyint: + case LibFunc_nearbyintf: + case LibFunc_nearbyintl: + case LibFunc_pow: + case LibFunc_powf: + case LibFunc_powl: + case LibFunc_rint: + case LibFunc_rintf: + case LibFunc_rintl: + case LibFunc_round: + case LibFunc_roundf: + case LibFunc_roundl: + case LibFunc_sin: + case LibFunc_sincospif_stret: + case LibFunc_sinf: + case LibFunc_sinh: + case LibFunc_sinhf: + case LibFunc_sinhl: + case LibFunc_sinl: + case LibFunc_sinpi: + case LibFunc_sinpif: + case LibFunc_sqrt: + case LibFunc_sqrtf: + case LibFunc_sqrtl: + case LibFunc_tan: + case LibFunc_tanf: + case LibFunc_tanh: + case LibFunc_tanhf: + case LibFunc_tanhl: + case LibFunc_tanl: + case LibFunc_toascii: + case LibFunc_trunc: + case LibFunc_truncf: + case LibFunc_truncl: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotFreeMemory(F); + Changed |= setOnlyWritesMemory(F); + Changed |= setWillReturn(F); + break; + default: + // FIXME: It'd be really nice to cover all the library functions we're + // aware of here. + break; + } + // We have to do this step after AllocKind has been inferred on functions so + // we can reliably identify free-like and realloc-like functions. + if (!isLibFreeFunction(&F, TheLibFunc) && !isReallocLikeFn(&F)) + Changed |= setDoesNotFreeMemory(F); + return Changed; +} + +static void setArgExtAttr(Function &F, unsigned ArgNo, + const TargetLibraryInfo &TLI, bool Signed = true) { + Attribute::AttrKind ExtAttr = TLI.getExtAttrForI32Param(Signed); + if (ExtAttr != Attribute::None && !F.hasParamAttribute(ArgNo, ExtAttr)) + F.addParamAttr(ArgNo, ExtAttr); +} + +static void setRetExtAttr(Function &F, + const TargetLibraryInfo &TLI, bool Signed = true) { + Attribute::AttrKind ExtAttr = TLI.getExtAttrForI32Return(Signed); + if (ExtAttr != Attribute::None && !F.hasRetAttribute(ExtAttr)) + F.addRetAttr(ExtAttr); +} + +// Modeled after X86TargetLowering::markLibCallAttributes. +static void markRegisterParameterAttributes(Function *F) { + if (!F->arg_size() || F->isVarArg()) + return; + + const CallingConv::ID CC = F->getCallingConv(); + if (CC != CallingConv::C && CC != CallingConv::X86_StdCall) + return; + + const Module *M = F->getParent(); + unsigned N = M->getNumberRegisterParameters(); + if (!N) + return; + + const DataLayout &DL = M->getDataLayout(); + + for (Argument &A : F->args()) { + Type *T = A.getType(); + if (!T->isIntOrPtrTy()) + continue; + + const TypeSize &TS = DL.getTypeAllocSize(T); + if (TS > 8) + continue; + + assert(TS <= 4 && "Need to account for parameters larger than word size"); + const unsigned NumRegs = TS > 4 ? 2 : 1; + if (N < NumRegs) + return; + + N -= NumRegs; + F->addParamAttr(A.getArgNo(), Attribute::InReg); + } +} + +FunctionCallee llvm::getOrInsertLibFunc(Module *M, const TargetLibraryInfo &TLI, + LibFunc TheLibFunc, FunctionType *T, + AttributeList AttributeList) { + assert(TLI.has(TheLibFunc) && + "Creating call to non-existing library function."); + StringRef Name = TLI.getName(TheLibFunc); + FunctionCallee C = M->getOrInsertFunction(Name, T, AttributeList); + + // Make sure any mandatory argument attributes are added. + + // Any outgoing i32 argument should be handled with setArgExtAttr() which + // will add an extension attribute if the target ABI requires it. Adding + // argument extensions is typically done by the front end but when an + // optimizer is building a library call on its own it has to take care of + // this. Each such generated function must be handled here with sign or + // zero extensions as needed. F is retreived with cast<> because we demand + // of the caller to have called isLibFuncEmittable() first. + Function *F = cast<Function>(C.getCallee()); + assert(F->getFunctionType() == T && "Function type does not match."); + switch (TheLibFunc) { + case LibFunc_fputc: + case LibFunc_putchar: + setArgExtAttr(*F, 0, TLI); + break; + case LibFunc_ldexp: + case LibFunc_ldexpf: + case LibFunc_ldexpl: + case LibFunc_memchr: + case LibFunc_memrchr: + case LibFunc_strchr: + setArgExtAttr(*F, 1, TLI); + break; + case LibFunc_memccpy: + setArgExtAttr(*F, 2, TLI); + break; + + // These are functions that are known to not need any argument extension + // on any target: A size_t argument (which may be an i32 on some targets) + // should not trigger the assert below. + case LibFunc_bcmp: + setRetExtAttr(*F, TLI); + break; + case LibFunc_calloc: + case LibFunc_fwrite: + case LibFunc_malloc: + case LibFunc_memcmp: + case LibFunc_memcpy_chk: + case LibFunc_mempcpy: + case LibFunc_memset_pattern16: + case LibFunc_snprintf: + case LibFunc_stpncpy: + case LibFunc_strlcat: + case LibFunc_strlcpy: + case LibFunc_strncat: + case LibFunc_strncmp: + case LibFunc_strncpy: + case LibFunc_vsnprintf: + break; + + default: +#ifndef NDEBUG + for (unsigned i = 0; i < T->getNumParams(); i++) + assert(!isa<IntegerType>(T->getParamType(i)) && + "Unhandled integer argument."); +#endif + break; + } + + markRegisterParameterAttributes(F); + + return C; +} + +FunctionCallee llvm::getOrInsertLibFunc(Module *M, const TargetLibraryInfo &TLI, + LibFunc TheLibFunc, FunctionType *T) { + return getOrInsertLibFunc(M, TLI, TheLibFunc, T, AttributeList()); +} + +bool llvm::isLibFuncEmittable(const Module *M, const TargetLibraryInfo *TLI, + LibFunc TheLibFunc) { + StringRef FuncName = TLI->getName(TheLibFunc); + if (!TLI->has(TheLibFunc)) + return false; + + // Check if the Module already has a GlobalValue with the same name, in + // which case it must be a Function with the expected type. + if (GlobalValue *GV = M->getNamedValue(FuncName)) { + if (auto *F = dyn_cast<Function>(GV)) + return TLI->isValidProtoForLibFunc(*F->getFunctionType(), TheLibFunc, *M); + return false; + } + + return true; +} + +bool llvm::isLibFuncEmittable(const Module *M, const TargetLibraryInfo *TLI, + StringRef Name) { + LibFunc TheLibFunc; + return TLI->getLibFunc(Name, TheLibFunc) && + isLibFuncEmittable(M, TLI, TheLibFunc); +} + +bool llvm::hasFloatFn(const Module *M, const TargetLibraryInfo *TLI, Type *Ty, + LibFunc DoubleFn, LibFunc FloatFn, LibFunc LongDoubleFn) { + switch (Ty->getTypeID()) { + case Type::HalfTyID: + return false; + case Type::FloatTyID: + return isLibFuncEmittable(M, TLI, FloatFn); + case Type::DoubleTyID: + return isLibFuncEmittable(M, TLI, DoubleFn); + default: + return isLibFuncEmittable(M, TLI, LongDoubleFn); + } +} + +StringRef llvm::getFloatFn(const Module *M, const TargetLibraryInfo *TLI, + Type *Ty, LibFunc DoubleFn, LibFunc FloatFn, + LibFunc LongDoubleFn, LibFunc &TheLibFunc) { + assert(hasFloatFn(M, TLI, Ty, DoubleFn, FloatFn, LongDoubleFn) && + "Cannot get name for unavailable function!"); + + switch (Ty->getTypeID()) { + case Type::HalfTyID: + llvm_unreachable("No name for HalfTy!"); + case Type::FloatTyID: + TheLibFunc = FloatFn; + return TLI->getName(FloatFn); + case Type::DoubleTyID: + TheLibFunc = DoubleFn; + return TLI->getName(DoubleFn); + default: + TheLibFunc = LongDoubleFn; + return TLI->getName(LongDoubleFn); + } +} + +//- Emit LibCalls ------------------------------------------------------------// + +Value *llvm::castToCStr(Value *V, IRBuilderBase &B) { + unsigned AS = V->getType()->getPointerAddressSpace(); + return B.CreateBitCast(V, B.getInt8PtrTy(AS), "cstr"); +} + +static IntegerType *getIntTy(IRBuilderBase &B, const TargetLibraryInfo *TLI) { + return B.getIntNTy(TLI->getIntSize()); +} + +static IntegerType *getSizeTTy(IRBuilderBase &B, const TargetLibraryInfo *TLI) { + const Module *M = B.GetInsertBlock()->getModule(); + return B.getIntNTy(TLI->getSizeTSize(*M)); +} + +static Value *emitLibCall(LibFunc TheLibFunc, Type *ReturnType, + ArrayRef<Type *> ParamTypes, + ArrayRef<Value *> Operands, IRBuilderBase &B, + const TargetLibraryInfo *TLI, + bool IsVaArgs = false) { + Module *M = B.GetInsertBlock()->getModule(); + if (!isLibFuncEmittable(M, TLI, TheLibFunc)) + return nullptr; + + StringRef FuncName = TLI->getName(TheLibFunc); + FunctionType *FuncType = FunctionType::get(ReturnType, ParamTypes, IsVaArgs); + FunctionCallee Callee = getOrInsertLibFunc(M, *TLI, TheLibFunc, FuncType); + inferNonMandatoryLibFuncAttrs(M, FuncName, *TLI); + CallInst *CI = B.CreateCall(Callee, Operands, FuncName); + if (const Function *F = + dyn_cast<Function>(Callee.getCallee()->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + return CI; +} + +Value *llvm::emitStrLen(Value *Ptr, IRBuilderBase &B, const DataLayout &DL, + const TargetLibraryInfo *TLI) { + Type *SizeTTy = getSizeTTy(B, TLI); + return emitLibCall(LibFunc_strlen, SizeTTy, + B.getInt8PtrTy(), castToCStr(Ptr, B), B, TLI); +} + +Value *llvm::emitStrDup(Value *Ptr, IRBuilderBase &B, + const TargetLibraryInfo *TLI) { + return emitLibCall(LibFunc_strdup, B.getInt8PtrTy(), B.getInt8PtrTy(), + castToCStr(Ptr, B), B, TLI); +} + +Value *llvm::emitStrChr(Value *Ptr, char C, IRBuilderBase &B, + const TargetLibraryInfo *TLI) { + Type *I8Ptr = B.getInt8PtrTy(); + Type *IntTy = getIntTy(B, TLI); + return emitLibCall(LibFunc_strchr, I8Ptr, {I8Ptr, IntTy}, + {castToCStr(Ptr, B), ConstantInt::get(IntTy, C)}, B, TLI); +} + +Value *llvm::emitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilderBase &B, + const DataLayout &DL, const TargetLibraryInfo *TLI) { + Type *IntTy = getIntTy(B, TLI); + Type *SizeTTy = getSizeTTy(B, TLI); + return emitLibCall( + LibFunc_strncmp, IntTy, + {B.getInt8PtrTy(), B.getInt8PtrTy(), SizeTTy}, + {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, B, TLI); +} + +Value *llvm::emitStrCpy(Value *Dst, Value *Src, IRBuilderBase &B, + const TargetLibraryInfo *TLI) { + Type *I8Ptr = Dst->getType(); + return emitLibCall(LibFunc_strcpy, I8Ptr, {I8Ptr, I8Ptr}, + {castToCStr(Dst, B), castToCStr(Src, B)}, B, TLI); +} + +Value *llvm::emitStpCpy(Value *Dst, Value *Src, IRBuilderBase &B, + const TargetLibraryInfo *TLI) { + Type *I8Ptr = B.getInt8PtrTy(); + return emitLibCall(LibFunc_stpcpy, I8Ptr, {I8Ptr, I8Ptr}, + {castToCStr(Dst, B), castToCStr(Src, B)}, B, TLI); +} + +Value *llvm::emitStrNCpy(Value *Dst, Value *Src, Value *Len, IRBuilderBase &B, + const TargetLibraryInfo *TLI) { + Type *I8Ptr = B.getInt8PtrTy(); + Type *SizeTTy = getSizeTTy(B, TLI); + return emitLibCall(LibFunc_strncpy, I8Ptr, {I8Ptr, I8Ptr, SizeTTy}, + {castToCStr(Dst, B), castToCStr(Src, B), Len}, B, TLI); +} + +Value *llvm::emitStpNCpy(Value *Dst, Value *Src, Value *Len, IRBuilderBase &B, + const TargetLibraryInfo *TLI) { + Type *I8Ptr = B.getInt8PtrTy(); + Type *SizeTTy = getSizeTTy(B, TLI); + return emitLibCall(LibFunc_stpncpy, I8Ptr, {I8Ptr, I8Ptr, SizeTTy}, + {castToCStr(Dst, B), castToCStr(Src, B), Len}, B, TLI); +} + +Value *llvm::emitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize, + IRBuilderBase &B, const DataLayout &DL, + const TargetLibraryInfo *TLI) { + Module *M = B.GetInsertBlock()->getModule(); + if (!isLibFuncEmittable(M, TLI, LibFunc_memcpy_chk)) + return nullptr; + + AttributeList AS; + AS = AttributeList::get(M->getContext(), AttributeList::FunctionIndex, + Attribute::NoUnwind); + Type *I8Ptr = B.getInt8PtrTy(); + Type *SizeTTy = getSizeTTy(B, TLI); + FunctionCallee MemCpy = getOrInsertLibFunc(M, *TLI, LibFunc_memcpy_chk, + AttributeList::get(M->getContext(), AS), I8Ptr, + I8Ptr, I8Ptr, SizeTTy, SizeTTy); + Dst = castToCStr(Dst, B); + Src = castToCStr(Src, B); + CallInst *CI = B.CreateCall(MemCpy, {Dst, Src, Len, ObjSize}); + if (const Function *F = + dyn_cast<Function>(MemCpy.getCallee()->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + return CI; +} + +Value *llvm::emitMemPCpy(Value *Dst, Value *Src, Value *Len, IRBuilderBase &B, + const DataLayout &DL, const TargetLibraryInfo *TLI) { + Type *I8Ptr = B.getInt8PtrTy(); + Type *SizeTTy = getSizeTTy(B, TLI); + return emitLibCall(LibFunc_mempcpy, I8Ptr, + {I8Ptr, I8Ptr, SizeTTy}, + {Dst, Src, Len}, B, TLI); +} + +Value *llvm::emitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilderBase &B, + const DataLayout &DL, const TargetLibraryInfo *TLI) { + Type *I8Ptr = B.getInt8PtrTy(); + Type *IntTy = getIntTy(B, TLI); + Type *SizeTTy = getSizeTTy(B, TLI); + return emitLibCall(LibFunc_memchr, I8Ptr, + {I8Ptr, IntTy, SizeTTy}, + {castToCStr(Ptr, B), Val, Len}, B, TLI); +} + +Value *llvm::emitMemRChr(Value *Ptr, Value *Val, Value *Len, IRBuilderBase &B, + const DataLayout &DL, const TargetLibraryInfo *TLI) { + Type *I8Ptr = B.getInt8PtrTy(); + Type *IntTy = getIntTy(B, TLI); + Type *SizeTTy = getSizeTTy(B, TLI); + return emitLibCall(LibFunc_memrchr, I8Ptr, + {I8Ptr, IntTy, SizeTTy}, + {castToCStr(Ptr, B), Val, Len}, B, TLI); +} + +Value *llvm::emitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilderBase &B, + const DataLayout &DL, const TargetLibraryInfo *TLI) { + Type *I8Ptr = B.getInt8PtrTy(); + Type *IntTy = getIntTy(B, TLI); + Type *SizeTTy = getSizeTTy(B, TLI); + return emitLibCall(LibFunc_memcmp, IntTy, + {I8Ptr, I8Ptr, SizeTTy}, + {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, B, TLI); +} + +Value *llvm::emitBCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilderBase &B, + const DataLayout &DL, const TargetLibraryInfo *TLI) { + Type *I8Ptr = B.getInt8PtrTy(); + Type *IntTy = getIntTy(B, TLI); + Type *SizeTTy = getSizeTTy(B, TLI); + return emitLibCall(LibFunc_bcmp, IntTy, + {I8Ptr, I8Ptr, SizeTTy}, + {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, B, TLI); +} + +Value *llvm::emitMemCCpy(Value *Ptr1, Value *Ptr2, Value *Val, Value *Len, + IRBuilderBase &B, const TargetLibraryInfo *TLI) { + Type *I8Ptr = B.getInt8PtrTy(); + Type *IntTy = getIntTy(B, TLI); + Type *SizeTTy = getSizeTTy(B, TLI); + return emitLibCall(LibFunc_memccpy, I8Ptr, + {I8Ptr, I8Ptr, IntTy, SizeTTy}, + {Ptr1, Ptr2, Val, Len}, B, TLI); +} + +Value *llvm::emitSNPrintf(Value *Dest, Value *Size, Value *Fmt, + ArrayRef<Value *> VariadicArgs, IRBuilderBase &B, + const TargetLibraryInfo *TLI) { + Type *I8Ptr = B.getInt8PtrTy(); + Type *IntTy = getIntTy(B, TLI); + Type *SizeTTy = getSizeTTy(B, TLI); + SmallVector<Value *, 8> Args{castToCStr(Dest, B), Size, castToCStr(Fmt, B)}; + llvm::append_range(Args, VariadicArgs); + return emitLibCall(LibFunc_snprintf, IntTy, + {I8Ptr, SizeTTy, I8Ptr}, + Args, B, TLI, /*IsVaArgs=*/true); +} + +Value *llvm::emitSPrintf(Value *Dest, Value *Fmt, + ArrayRef<Value *> VariadicArgs, IRBuilderBase &B, + const TargetLibraryInfo *TLI) { + Type *I8Ptr = B.getInt8PtrTy(); + Type *IntTy = getIntTy(B, TLI); + SmallVector<Value *, 8> Args{castToCStr(Dest, B), castToCStr(Fmt, B)}; + llvm::append_range(Args, VariadicArgs); + return emitLibCall(LibFunc_sprintf, IntTy, + {I8Ptr, I8Ptr}, Args, B, TLI, + /*IsVaArgs=*/true); +} + +Value *llvm::emitStrCat(Value *Dest, Value *Src, IRBuilderBase &B, + const TargetLibraryInfo *TLI) { + return emitLibCall(LibFunc_strcat, B.getInt8PtrTy(), + {B.getInt8PtrTy(), B.getInt8PtrTy()}, + {castToCStr(Dest, B), castToCStr(Src, B)}, B, TLI); +} + +Value *llvm::emitStrLCpy(Value *Dest, Value *Src, Value *Size, IRBuilderBase &B, + const TargetLibraryInfo *TLI) { + Type *I8Ptr = B.getInt8PtrTy(); + Type *SizeTTy = getSizeTTy(B, TLI); + return emitLibCall(LibFunc_strlcpy, SizeTTy, + {I8Ptr, I8Ptr, SizeTTy}, + {castToCStr(Dest, B), castToCStr(Src, B), Size}, B, TLI); +} + +Value *llvm::emitStrLCat(Value *Dest, Value *Src, Value *Size, IRBuilderBase &B, + const TargetLibraryInfo *TLI) { + Type *I8Ptr = B.getInt8PtrTy(); + Type *SizeTTy = getSizeTTy(B, TLI); + return emitLibCall(LibFunc_strlcat, SizeTTy, + {I8Ptr, I8Ptr, SizeTTy}, + {castToCStr(Dest, B), castToCStr(Src, B), Size}, B, TLI); +} + +Value *llvm::emitStrNCat(Value *Dest, Value *Src, Value *Size, IRBuilderBase &B, + const TargetLibraryInfo *TLI) { + Type *I8Ptr = B.getInt8PtrTy(); + Type *SizeTTy = getSizeTTy(B, TLI); + return emitLibCall(LibFunc_strncat, I8Ptr, + {I8Ptr, I8Ptr, SizeTTy}, + {castToCStr(Dest, B), castToCStr(Src, B), Size}, B, TLI); +} + +Value *llvm::emitVSNPrintf(Value *Dest, Value *Size, Value *Fmt, Value *VAList, + IRBuilderBase &B, const TargetLibraryInfo *TLI) { + Type *I8Ptr = B.getInt8PtrTy(); + Type *IntTy = getIntTy(B, TLI); + Type *SizeTTy = getSizeTTy(B, TLI); + return emitLibCall( + LibFunc_vsnprintf, IntTy, + {I8Ptr, SizeTTy, I8Ptr, VAList->getType()}, + {castToCStr(Dest, B), Size, castToCStr(Fmt, B), VAList}, B, TLI); +} + +Value *llvm::emitVSPrintf(Value *Dest, Value *Fmt, Value *VAList, + IRBuilderBase &B, const TargetLibraryInfo *TLI) { + Type *I8Ptr = B.getInt8PtrTy(); + Type *IntTy = getIntTy(B, TLI); + return emitLibCall(LibFunc_vsprintf, IntTy, + {I8Ptr, I8Ptr, VAList->getType()}, + {castToCStr(Dest, B), castToCStr(Fmt, B), VAList}, B, TLI); +} + +/// Append a suffix to the function name according to the type of 'Op'. +static void appendTypeSuffix(Value *Op, StringRef &Name, + SmallString<20> &NameBuffer) { + if (!Op->getType()->isDoubleTy()) { + NameBuffer += Name; + + if (Op->getType()->isFloatTy()) + NameBuffer += 'f'; + else + NameBuffer += 'l'; + + Name = NameBuffer; + } +} + +static Value *emitUnaryFloatFnCallHelper(Value *Op, LibFunc TheLibFunc, + StringRef Name, IRBuilderBase &B, + const AttributeList &Attrs, + const TargetLibraryInfo *TLI) { + assert((Name != "") && "Must specify Name to emitUnaryFloatFnCall"); + + Module *M = B.GetInsertBlock()->getModule(); + FunctionCallee Callee = getOrInsertLibFunc(M, *TLI, TheLibFunc, Op->getType(), + Op->getType()); + CallInst *CI = B.CreateCall(Callee, Op, Name); + + // The incoming attribute set may have come from a speculatable intrinsic, but + // is being replaced with a library call which is not allowed to be + // speculatable. + CI->setAttributes( + Attrs.removeFnAttribute(B.getContext(), Attribute::Speculatable)); + if (const Function *F = + dyn_cast<Function>(Callee.getCallee()->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + + return CI; +} + +Value *llvm::emitUnaryFloatFnCall(Value *Op, const TargetLibraryInfo *TLI, + StringRef Name, IRBuilderBase &B, + const AttributeList &Attrs) { + SmallString<20> NameBuffer; + appendTypeSuffix(Op, Name, NameBuffer); + + LibFunc TheLibFunc; + TLI->getLibFunc(Name, TheLibFunc); + + return emitUnaryFloatFnCallHelper(Op, TheLibFunc, Name, B, Attrs, TLI); +} + +Value *llvm::emitUnaryFloatFnCall(Value *Op, const TargetLibraryInfo *TLI, + LibFunc DoubleFn, LibFunc FloatFn, + LibFunc LongDoubleFn, IRBuilderBase &B, + const AttributeList &Attrs) { + // Get the name of the function according to TLI. + Module *M = B.GetInsertBlock()->getModule(); + LibFunc TheLibFunc; + StringRef Name = getFloatFn(M, TLI, Op->getType(), DoubleFn, FloatFn, + LongDoubleFn, TheLibFunc); + + return emitUnaryFloatFnCallHelper(Op, TheLibFunc, Name, B, Attrs, TLI); +} + +static Value *emitBinaryFloatFnCallHelper(Value *Op1, Value *Op2, + LibFunc TheLibFunc, + StringRef Name, IRBuilderBase &B, + const AttributeList &Attrs, + const TargetLibraryInfo *TLI) { + assert((Name != "") && "Must specify Name to emitBinaryFloatFnCall"); + + Module *M = B.GetInsertBlock()->getModule(); + FunctionCallee Callee = getOrInsertLibFunc(M, *TLI, TheLibFunc, Op1->getType(), + Op1->getType(), Op2->getType()); + inferNonMandatoryLibFuncAttrs(M, Name, *TLI); + CallInst *CI = B.CreateCall(Callee, { Op1, Op2 }, Name); + + // The incoming attribute set may have come from a speculatable intrinsic, but + // is being replaced with a library call which is not allowed to be + // speculatable. + CI->setAttributes( + Attrs.removeFnAttribute(B.getContext(), Attribute::Speculatable)); + if (const Function *F = + dyn_cast<Function>(Callee.getCallee()->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + + return CI; +} + +Value *llvm::emitBinaryFloatFnCall(Value *Op1, Value *Op2, + const TargetLibraryInfo *TLI, + StringRef Name, IRBuilderBase &B, + const AttributeList &Attrs) { + assert((Name != "") && "Must specify Name to emitBinaryFloatFnCall"); + + SmallString<20> NameBuffer; + appendTypeSuffix(Op1, Name, NameBuffer); + + LibFunc TheLibFunc; + TLI->getLibFunc(Name, TheLibFunc); + + return emitBinaryFloatFnCallHelper(Op1, Op2, TheLibFunc, Name, B, Attrs, TLI); +} + +Value *llvm::emitBinaryFloatFnCall(Value *Op1, Value *Op2, + const TargetLibraryInfo *TLI, + LibFunc DoubleFn, LibFunc FloatFn, + LibFunc LongDoubleFn, IRBuilderBase &B, + const AttributeList &Attrs) { + // Get the name of the function according to TLI. + Module *M = B.GetInsertBlock()->getModule(); + LibFunc TheLibFunc; + StringRef Name = getFloatFn(M, TLI, Op1->getType(), DoubleFn, FloatFn, + LongDoubleFn, TheLibFunc); + + return emitBinaryFloatFnCallHelper(Op1, Op2, TheLibFunc, Name, B, Attrs, TLI); +} + +// Emit a call to putchar(int) with Char as the argument. Char must have +// the same precision as int, which need not be 32 bits. +Value *llvm::emitPutChar(Value *Char, IRBuilderBase &B, + const TargetLibraryInfo *TLI) { + Module *M = B.GetInsertBlock()->getModule(); + if (!isLibFuncEmittable(M, TLI, LibFunc_putchar)) + return nullptr; + + Type *IntTy = getIntTy(B, TLI); + StringRef PutCharName = TLI->getName(LibFunc_putchar); + FunctionCallee PutChar = getOrInsertLibFunc(M, *TLI, LibFunc_putchar, + IntTy, IntTy); + inferNonMandatoryLibFuncAttrs(M, PutCharName, *TLI); + CallInst *CI = B.CreateCall(PutChar, Char, PutCharName); + + if (const Function *F = + dyn_cast<Function>(PutChar.getCallee()->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + return CI; +} + +Value *llvm::emitPutS(Value *Str, IRBuilderBase &B, + const TargetLibraryInfo *TLI) { + Module *M = B.GetInsertBlock()->getModule(); + if (!isLibFuncEmittable(M, TLI, LibFunc_puts)) + return nullptr; + + Type *IntTy = getIntTy(B, TLI); + StringRef PutsName = TLI->getName(LibFunc_puts); + FunctionCallee PutS = getOrInsertLibFunc(M, *TLI, LibFunc_puts, IntTy, + B.getInt8PtrTy()); + inferNonMandatoryLibFuncAttrs(M, PutsName, *TLI); + CallInst *CI = B.CreateCall(PutS, castToCStr(Str, B), PutsName); + if (const Function *F = + dyn_cast<Function>(PutS.getCallee()->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + return CI; +} + +Value *llvm::emitFPutC(Value *Char, Value *File, IRBuilderBase &B, + const TargetLibraryInfo *TLI) { + Module *M = B.GetInsertBlock()->getModule(); + if (!isLibFuncEmittable(M, TLI, LibFunc_fputc)) + return nullptr; + + Type *IntTy = getIntTy(B, TLI); + StringRef FPutcName = TLI->getName(LibFunc_fputc); + FunctionCallee F = getOrInsertLibFunc(M, *TLI, LibFunc_fputc, IntTy, + IntTy, File->getType()); + if (File->getType()->isPointerTy()) + inferNonMandatoryLibFuncAttrs(M, FPutcName, *TLI); + CallInst *CI = B.CreateCall(F, {Char, File}, FPutcName); + + if (const Function *Fn = + dyn_cast<Function>(F.getCallee()->stripPointerCasts())) + CI->setCallingConv(Fn->getCallingConv()); + return CI; +} + +Value *llvm::emitFPutS(Value *Str, Value *File, IRBuilderBase &B, + const TargetLibraryInfo *TLI) { + Module *M = B.GetInsertBlock()->getModule(); + if (!isLibFuncEmittable(M, TLI, LibFunc_fputs)) + return nullptr; + + Type *IntTy = getIntTy(B, TLI); + StringRef FPutsName = TLI->getName(LibFunc_fputs); + FunctionCallee F = getOrInsertLibFunc(M, *TLI, LibFunc_fputs, IntTy, + B.getInt8PtrTy(), File->getType()); + if (File->getType()->isPointerTy()) + inferNonMandatoryLibFuncAttrs(M, FPutsName, *TLI); + CallInst *CI = B.CreateCall(F, {castToCStr(Str, B), File}, FPutsName); + + if (const Function *Fn = + dyn_cast<Function>(F.getCallee()->stripPointerCasts())) + CI->setCallingConv(Fn->getCallingConv()); + return CI; +} + +Value *llvm::emitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilderBase &B, + const DataLayout &DL, const TargetLibraryInfo *TLI) { + Module *M = B.GetInsertBlock()->getModule(); + if (!isLibFuncEmittable(M, TLI, LibFunc_fwrite)) + return nullptr; + + Type *SizeTTy = getSizeTTy(B, TLI); + StringRef FWriteName = TLI->getName(LibFunc_fwrite); + FunctionCallee F = getOrInsertLibFunc(M, *TLI, LibFunc_fwrite, + SizeTTy, B.getInt8PtrTy(), SizeTTy, + SizeTTy, File->getType()); + + if (File->getType()->isPointerTy()) + inferNonMandatoryLibFuncAttrs(M, FWriteName, *TLI); + CallInst *CI = + B.CreateCall(F, {castToCStr(Ptr, B), Size, + ConstantInt::get(SizeTTy, 1), File}); + + if (const Function *Fn = + dyn_cast<Function>(F.getCallee()->stripPointerCasts())) + CI->setCallingConv(Fn->getCallingConv()); + return CI; +} + +Value *llvm::emitMalloc(Value *Num, IRBuilderBase &B, const DataLayout &DL, + const TargetLibraryInfo *TLI) { + Module *M = B.GetInsertBlock()->getModule(); + if (!isLibFuncEmittable(M, TLI, LibFunc_malloc)) + return nullptr; + + StringRef MallocName = TLI->getName(LibFunc_malloc); + Type *SizeTTy = getSizeTTy(B, TLI); + FunctionCallee Malloc = getOrInsertLibFunc(M, *TLI, LibFunc_malloc, + B.getInt8PtrTy(), SizeTTy); + inferNonMandatoryLibFuncAttrs(M, MallocName, *TLI); + CallInst *CI = B.CreateCall(Malloc, Num, MallocName); + + if (const Function *F = + dyn_cast<Function>(Malloc.getCallee()->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + + return CI; +} + +Value *llvm::emitCalloc(Value *Num, Value *Size, IRBuilderBase &B, + const TargetLibraryInfo &TLI) { + Module *M = B.GetInsertBlock()->getModule(); + if (!isLibFuncEmittable(M, &TLI, LibFunc_calloc)) + return nullptr; + + StringRef CallocName = TLI.getName(LibFunc_calloc); + Type *SizeTTy = getSizeTTy(B, &TLI); + FunctionCallee Calloc = getOrInsertLibFunc(M, TLI, LibFunc_calloc, + B.getInt8PtrTy(), SizeTTy, SizeTTy); + inferNonMandatoryLibFuncAttrs(M, CallocName, TLI); + CallInst *CI = B.CreateCall(Calloc, {Num, Size}, CallocName); + + if (const auto *F = + dyn_cast<Function>(Calloc.getCallee()->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + + return CI; +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/BypassSlowDivision.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/BypassSlowDivision.cpp new file mode 100644 index 0000000000..930a0bcbfa --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/BypassSlowDivision.cpp @@ -0,0 +1,480 @@ +//===- BypassSlowDivision.cpp - Bypass slow division ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains an optimization for div and rem on architectures that +// execute short instructions significantly faster than longer instructions. +// For example, on Intel Atom 32-bit divides are slow enough that during +// runtime it is profitable to check the value of the operands, and if they are +// positive and less than 256 use an unsigned 8-bit divide. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/BypassSlowDivision.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/KnownBits.h" +#include <cassert> +#include <cstdint> + +using namespace llvm; + +#define DEBUG_TYPE "bypass-slow-division" + +namespace { + + struct QuotRemPair { + Value *Quotient; + Value *Remainder; + + QuotRemPair(Value *InQuotient, Value *InRemainder) + : Quotient(InQuotient), Remainder(InRemainder) {} + }; + + /// A quotient and remainder, plus a BB from which they logically "originate". + /// If you use Quotient or Remainder in a Phi node, you should use BB as its + /// corresponding predecessor. + struct QuotRemWithBB { + BasicBlock *BB = nullptr; + Value *Quotient = nullptr; + Value *Remainder = nullptr; + }; + +using DivCacheTy = DenseMap<DivRemMapKey, QuotRemPair>; +using BypassWidthsTy = DenseMap<unsigned, unsigned>; +using VisitedSetTy = SmallPtrSet<Instruction *, 4>; + +enum ValueRange { + /// Operand definitely fits into BypassType. No runtime checks are needed. + VALRNG_KNOWN_SHORT, + /// A runtime check is required, as value range is unknown. + VALRNG_UNKNOWN, + /// Operand is unlikely to fit into BypassType. The bypassing should be + /// disabled. + VALRNG_LIKELY_LONG +}; + +class FastDivInsertionTask { + bool IsValidTask = false; + Instruction *SlowDivOrRem = nullptr; + IntegerType *BypassType = nullptr; + BasicBlock *MainBB = nullptr; + + bool isHashLikeValue(Value *V, VisitedSetTy &Visited); + ValueRange getValueRange(Value *Op, VisitedSetTy &Visited); + QuotRemWithBB createSlowBB(BasicBlock *Successor); + QuotRemWithBB createFastBB(BasicBlock *Successor); + QuotRemPair createDivRemPhiNodes(QuotRemWithBB &LHS, QuotRemWithBB &RHS, + BasicBlock *PhiBB); + Value *insertOperandRuntimeCheck(Value *Op1, Value *Op2); + std::optional<QuotRemPair> insertFastDivAndRem(); + + bool isSignedOp() { + return SlowDivOrRem->getOpcode() == Instruction::SDiv || + SlowDivOrRem->getOpcode() == Instruction::SRem; + } + + bool isDivisionOp() { + return SlowDivOrRem->getOpcode() == Instruction::SDiv || + SlowDivOrRem->getOpcode() == Instruction::UDiv; + } + + Type *getSlowType() { return SlowDivOrRem->getType(); } + +public: + FastDivInsertionTask(Instruction *I, const BypassWidthsTy &BypassWidths); + + Value *getReplacement(DivCacheTy &Cache); +}; + +} // end anonymous namespace + +FastDivInsertionTask::FastDivInsertionTask(Instruction *I, + const BypassWidthsTy &BypassWidths) { + switch (I->getOpcode()) { + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::URem: + case Instruction::SRem: + SlowDivOrRem = I; + break; + default: + // I is not a div/rem operation. + return; + } + + // Skip division on vector types. Only optimize integer instructions. + IntegerType *SlowType = dyn_cast<IntegerType>(SlowDivOrRem->getType()); + if (!SlowType) + return; + + // Skip if this bitwidth is not bypassed. + auto BI = BypassWidths.find(SlowType->getBitWidth()); + if (BI == BypassWidths.end()) + return; + + // Get type for div/rem instruction with bypass bitwidth. + IntegerType *BT = IntegerType::get(I->getContext(), BI->second); + BypassType = BT; + + // The original basic block. + MainBB = I->getParent(); + + // The instruction is indeed a slow div or rem operation. + IsValidTask = true; +} + +/// Reuses previously-computed dividend or remainder from the current BB if +/// operands and operation are identical. Otherwise calls insertFastDivAndRem to +/// perform the optimization and caches the resulting dividend and remainder. +/// If no replacement can be generated, nullptr is returned. +Value *FastDivInsertionTask::getReplacement(DivCacheTy &Cache) { + // First, make sure that the task is valid. + if (!IsValidTask) + return nullptr; + + // Then, look for a value in Cache. + Value *Dividend = SlowDivOrRem->getOperand(0); + Value *Divisor = SlowDivOrRem->getOperand(1); + DivRemMapKey Key(isSignedOp(), Dividend, Divisor); + auto CacheI = Cache.find(Key); + + if (CacheI == Cache.end()) { + // If previous instance does not exist, try to insert fast div. + std::optional<QuotRemPair> OptResult = insertFastDivAndRem(); + // Bail out if insertFastDivAndRem has failed. + if (!OptResult) + return nullptr; + CacheI = Cache.insert({Key, *OptResult}).first; + } + + QuotRemPair &Value = CacheI->second; + return isDivisionOp() ? Value.Quotient : Value.Remainder; +} + +/// Check if a value looks like a hash. +/// +/// The routine is expected to detect values computed using the most common hash +/// algorithms. Typically, hash computations end with one of the following +/// instructions: +/// +/// 1) MUL with a constant wider than BypassType +/// 2) XOR instruction +/// +/// And even if we are wrong and the value is not a hash, it is still quite +/// unlikely that such values will fit into BypassType. +/// +/// To detect string hash algorithms like FNV we have to look through PHI-nodes. +/// It is implemented as a depth-first search for values that look neither long +/// nor hash-like. +bool FastDivInsertionTask::isHashLikeValue(Value *V, VisitedSetTy &Visited) { + Instruction *I = dyn_cast<Instruction>(V); + if (!I) + return false; + + switch (I->getOpcode()) { + case Instruction::Xor: + return true; + case Instruction::Mul: { + // After Constant Hoisting pass, long constants may be represented as + // bitcast instructions. As a result, some constants may look like an + // instruction at first, and an additional check is necessary to find out if + // an operand is actually a constant. + Value *Op1 = I->getOperand(1); + ConstantInt *C = dyn_cast<ConstantInt>(Op1); + if (!C && isa<BitCastInst>(Op1)) + C = dyn_cast<ConstantInt>(cast<BitCastInst>(Op1)->getOperand(0)); + return C && C->getValue().getMinSignedBits() > BypassType->getBitWidth(); + } + case Instruction::PHI: + // Stop IR traversal in case of a crazy input code. This limits recursion + // depth. + if (Visited.size() >= 16) + return false; + // Do not visit nodes that have been visited already. We return true because + // it means that we couldn't find any value that doesn't look hash-like. + if (!Visited.insert(I).second) + return true; + return llvm::all_of(cast<PHINode>(I)->incoming_values(), [&](Value *V) { + // Ignore undef values as they probably don't affect the division + // operands. + return getValueRange(V, Visited) == VALRNG_LIKELY_LONG || + isa<UndefValue>(V); + }); + default: + return false; + } +} + +/// Check if an integer value fits into our bypass type. +ValueRange FastDivInsertionTask::getValueRange(Value *V, + VisitedSetTy &Visited) { + unsigned ShortLen = BypassType->getBitWidth(); + unsigned LongLen = V->getType()->getIntegerBitWidth(); + + assert(LongLen > ShortLen && "Value type must be wider than BypassType"); + unsigned HiBits = LongLen - ShortLen; + + const DataLayout &DL = SlowDivOrRem->getModule()->getDataLayout(); + KnownBits Known(LongLen); + + computeKnownBits(V, Known, DL); + + if (Known.countMinLeadingZeros() >= HiBits) + return VALRNG_KNOWN_SHORT; + + if (Known.countMaxLeadingZeros() < HiBits) + return VALRNG_LIKELY_LONG; + + // Long integer divisions are often used in hashtable implementations. It's + // not worth bypassing such divisions because hash values are extremely + // unlikely to have enough leading zeros. The call below tries to detect + // values that are unlikely to fit BypassType (including hashes). + if (isHashLikeValue(V, Visited)) + return VALRNG_LIKELY_LONG; + + return VALRNG_UNKNOWN; +} + +/// Add new basic block for slow div and rem operations and put it before +/// SuccessorBB. +QuotRemWithBB FastDivInsertionTask::createSlowBB(BasicBlock *SuccessorBB) { + QuotRemWithBB DivRemPair; + DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "", + MainBB->getParent(), SuccessorBB); + IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin()); + Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc()); + + Value *Dividend = SlowDivOrRem->getOperand(0); + Value *Divisor = SlowDivOrRem->getOperand(1); + + if (isSignedOp()) { + DivRemPair.Quotient = Builder.CreateSDiv(Dividend, Divisor); + DivRemPair.Remainder = Builder.CreateSRem(Dividend, Divisor); + } else { + DivRemPair.Quotient = Builder.CreateUDiv(Dividend, Divisor); + DivRemPair.Remainder = Builder.CreateURem(Dividend, Divisor); + } + + Builder.CreateBr(SuccessorBB); + return DivRemPair; +} + +/// Add new basic block for fast div and rem operations and put it before +/// SuccessorBB. +QuotRemWithBB FastDivInsertionTask::createFastBB(BasicBlock *SuccessorBB) { + QuotRemWithBB DivRemPair; + DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "", + MainBB->getParent(), SuccessorBB); + IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin()); + Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc()); + + Value *Dividend = SlowDivOrRem->getOperand(0); + Value *Divisor = SlowDivOrRem->getOperand(1); + Value *ShortDivisorV = + Builder.CreateCast(Instruction::Trunc, Divisor, BypassType); + Value *ShortDividendV = + Builder.CreateCast(Instruction::Trunc, Dividend, BypassType); + + // udiv/urem because this optimization only handles positive numbers. + Value *ShortQV = Builder.CreateUDiv(ShortDividendV, ShortDivisorV); + Value *ShortRV = Builder.CreateURem(ShortDividendV, ShortDivisorV); + DivRemPair.Quotient = + Builder.CreateCast(Instruction::ZExt, ShortQV, getSlowType()); + DivRemPair.Remainder = + Builder.CreateCast(Instruction::ZExt, ShortRV, getSlowType()); + Builder.CreateBr(SuccessorBB); + + return DivRemPair; +} + +/// Creates Phi nodes for result of Div and Rem. +QuotRemPair FastDivInsertionTask::createDivRemPhiNodes(QuotRemWithBB &LHS, + QuotRemWithBB &RHS, + BasicBlock *PhiBB) { + IRBuilder<> Builder(PhiBB, PhiBB->begin()); + Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc()); + PHINode *QuoPhi = Builder.CreatePHI(getSlowType(), 2); + QuoPhi->addIncoming(LHS.Quotient, LHS.BB); + QuoPhi->addIncoming(RHS.Quotient, RHS.BB); + PHINode *RemPhi = Builder.CreatePHI(getSlowType(), 2); + RemPhi->addIncoming(LHS.Remainder, LHS.BB); + RemPhi->addIncoming(RHS.Remainder, RHS.BB); + return QuotRemPair(QuoPhi, RemPhi); +} + +/// Creates a runtime check to test whether both the divisor and dividend fit +/// into BypassType. The check is inserted at the end of MainBB. True return +/// value means that the operands fit. Either of the operands may be NULL if it +/// doesn't need a runtime check. +Value *FastDivInsertionTask::insertOperandRuntimeCheck(Value *Op1, Value *Op2) { + assert((Op1 || Op2) && "Nothing to check"); + IRBuilder<> Builder(MainBB, MainBB->end()); + Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc()); + + Value *OrV; + if (Op1 && Op2) + OrV = Builder.CreateOr(Op1, Op2); + else + OrV = Op1 ? Op1 : Op2; + + // BitMask is inverted to check if the operands are + // larger than the bypass type + uint64_t BitMask = ~BypassType->getBitMask(); + Value *AndV = Builder.CreateAnd(OrV, BitMask); + + // Compare operand values + Value *ZeroV = ConstantInt::getSigned(getSlowType(), 0); + return Builder.CreateICmpEQ(AndV, ZeroV); +} + +/// Substitutes the div/rem instruction with code that checks the value of the +/// operands and uses a shorter-faster div/rem instruction when possible. +std::optional<QuotRemPair> FastDivInsertionTask::insertFastDivAndRem() { + Value *Dividend = SlowDivOrRem->getOperand(0); + Value *Divisor = SlowDivOrRem->getOperand(1); + + VisitedSetTy SetL; + ValueRange DividendRange = getValueRange(Dividend, SetL); + if (DividendRange == VALRNG_LIKELY_LONG) + return std::nullopt; + + VisitedSetTy SetR; + ValueRange DivisorRange = getValueRange(Divisor, SetR); + if (DivisorRange == VALRNG_LIKELY_LONG) + return std::nullopt; + + bool DividendShort = (DividendRange == VALRNG_KNOWN_SHORT); + bool DivisorShort = (DivisorRange == VALRNG_KNOWN_SHORT); + + if (DividendShort && DivisorShort) { + // If both operands are known to be short then just replace the long + // division with a short one in-place. Since we're not introducing control + // flow in this case, narrowing the division is always a win, even if the + // divisor is a constant (and will later get replaced by a multiplication). + + IRBuilder<> Builder(SlowDivOrRem); + Value *TruncDividend = Builder.CreateTrunc(Dividend, BypassType); + Value *TruncDivisor = Builder.CreateTrunc(Divisor, BypassType); + Value *TruncDiv = Builder.CreateUDiv(TruncDividend, TruncDivisor); + Value *TruncRem = Builder.CreateURem(TruncDividend, TruncDivisor); + Value *ExtDiv = Builder.CreateZExt(TruncDiv, getSlowType()); + Value *ExtRem = Builder.CreateZExt(TruncRem, getSlowType()); + return QuotRemPair(ExtDiv, ExtRem); + } + + if (isa<ConstantInt>(Divisor)) { + // If the divisor is not a constant, DAGCombiner will convert it to a + // multiplication by a magic constant. It isn't clear if it is worth + // introducing control flow to get a narrower multiply. + return std::nullopt; + } + + // After Constant Hoisting pass, long constants may be represented as + // bitcast instructions. As a result, some constants may look like an + // instruction at first, and an additional check is necessary to find out if + // an operand is actually a constant. + if (auto *BCI = dyn_cast<BitCastInst>(Divisor)) + if (BCI->getParent() == SlowDivOrRem->getParent() && + isa<ConstantInt>(BCI->getOperand(0))) + return std::nullopt; + + IRBuilder<> Builder(MainBB, MainBB->end()); + Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc()); + + if (DividendShort && !isSignedOp()) { + // If the division is unsigned and Dividend is known to be short, then + // either + // 1) Divisor is less or equal to Dividend, and the result can be computed + // with a short division. + // 2) Divisor is greater than Dividend. In this case, no division is needed + // at all: The quotient is 0 and the remainder is equal to Dividend. + // + // So instead of checking at runtime whether Divisor fits into BypassType, + // we emit a runtime check to differentiate between these two cases. This + // lets us entirely avoid a long div. + + // Split the basic block before the div/rem. + BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem); + // Remove the unconditional branch from MainBB to SuccessorBB. + MainBB->back().eraseFromParent(); + QuotRemWithBB Long; + Long.BB = MainBB; + Long.Quotient = ConstantInt::get(getSlowType(), 0); + Long.Remainder = Dividend; + QuotRemWithBB Fast = createFastBB(SuccessorBB); + QuotRemPair Result = createDivRemPhiNodes(Fast, Long, SuccessorBB); + Value *CmpV = Builder.CreateICmpUGE(Dividend, Divisor); + Builder.CreateCondBr(CmpV, Fast.BB, SuccessorBB); + return Result; + } else { + // General case. Create both slow and fast div/rem pairs and choose one of + // them at runtime. + + // Split the basic block before the div/rem. + BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem); + // Remove the unconditional branch from MainBB to SuccessorBB. + MainBB->back().eraseFromParent(); + QuotRemWithBB Fast = createFastBB(SuccessorBB); + QuotRemWithBB Slow = createSlowBB(SuccessorBB); + QuotRemPair Result = createDivRemPhiNodes(Fast, Slow, SuccessorBB); + Value *CmpV = insertOperandRuntimeCheck(DividendShort ? nullptr : Dividend, + DivisorShort ? nullptr : Divisor); + Builder.CreateCondBr(CmpV, Fast.BB, Slow.BB); + return Result; + } +} + +/// This optimization identifies DIV/REM instructions in a BB that can be +/// profitably bypassed and carried out with a shorter, faster divide. +bool llvm::bypassSlowDivision(BasicBlock *BB, + const BypassWidthsTy &BypassWidths) { + DivCacheTy PerBBDivCache; + + bool MadeChange = false; + Instruction *Next = &*BB->begin(); + while (Next != nullptr) { + // We may add instructions immediately after I, but we want to skip over + // them. + Instruction *I = Next; + Next = Next->getNextNode(); + + // Ignore dead code to save time and avoid bugs. + if (I->hasNUses(0)) + continue; + + FastDivInsertionTask Task(I, BypassWidths); + if (Value *Replacement = Task.getReplacement(PerBBDivCache)) { + I->replaceAllUsesWith(Replacement); + I->eraseFromParent(); + MadeChange = true; + } + } + + // Above we eagerly create divs and rems, as pairs, so that we can efficiently + // create divrem machine instructions. Now erase any unused divs / rems so we + // don't leave extra instructions sitting around. + for (auto &KV : PerBBDivCache) + for (Value *V : {KV.second.Quotient, KV.second.Remainder}) + RecursivelyDeleteTriviallyDeadInstructions(V); + + return MadeChange; +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/CallGraphUpdater.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/CallGraphUpdater.cpp new file mode 100644 index 0000000000..d0b89ba260 --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/CallGraphUpdater.cpp @@ -0,0 +1,170 @@ +//===- CallGraphUpdater.cpp - A (lazy) call graph update helper -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file provides interfaces used to manipulate a call graph, regardless +/// if it is a "old style" CallGraph or an "new style" LazyCallGraph. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/CallGraphUpdater.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/CallGraphSCCPass.h" +#include "llvm/IR/Constants.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" + +using namespace llvm; + +bool CallGraphUpdater::finalize() { + if (!DeadFunctionsInComdats.empty()) { + filterDeadComdatFunctions(DeadFunctionsInComdats); + DeadFunctions.append(DeadFunctionsInComdats.begin(), + DeadFunctionsInComdats.end()); + } + + if (CG) { + // First remove all references, e.g., outgoing via called functions. This is + // necessary as we can delete functions that have circular references. + for (Function *DeadFn : DeadFunctions) { + DeadFn->removeDeadConstantUsers(); + CallGraphNode *DeadCGN = (*CG)[DeadFn]; + DeadCGN->removeAllCalledFunctions(); + CG->getExternalCallingNode()->removeAnyCallEdgeTo(DeadCGN); + DeadFn->replaceAllUsesWith(PoisonValue::get(DeadFn->getType())); + } + + // Then remove the node and function from the module. + for (Function *DeadFn : DeadFunctions) { + CallGraphNode *DeadCGN = CG->getOrInsertFunction(DeadFn); + assert(DeadCGN->getNumReferences() == 0 && + "References should have been handled by now"); + delete CG->removeFunctionFromModule(DeadCGN); + } + } else { + // This is the code path for the new lazy call graph and for the case were + // no call graph was provided. + for (Function *DeadFn : DeadFunctions) { + DeadFn->removeDeadConstantUsers(); + DeadFn->replaceAllUsesWith(PoisonValue::get(DeadFn->getType())); + + if (LCG && !ReplacedFunctions.count(DeadFn)) { + // Taken mostly from the inliner: + LazyCallGraph::Node &N = LCG->get(*DeadFn); + auto *DeadSCC = LCG->lookupSCC(N); + assert(DeadSCC && DeadSCC->size() == 1 && + &DeadSCC->begin()->getFunction() == DeadFn); + auto &DeadRC = DeadSCC->getOuterRefSCC(); + + FunctionAnalysisManager &FAM = + AM->getResult<FunctionAnalysisManagerCGSCCProxy>(*DeadSCC, *LCG) + .getManager(); + + FAM.clear(*DeadFn, DeadFn->getName()); + AM->clear(*DeadSCC, DeadSCC->getName()); + LCG->removeDeadFunction(*DeadFn); + + // Mark the relevant parts of the call graph as invalid so we don't + // visit them. + UR->InvalidatedSCCs.insert(DeadSCC); + UR->InvalidatedRefSCCs.insert(&DeadRC); + } + + // The function is now really dead and de-attached from everything. + DeadFn->eraseFromParent(); + } + } + + bool Changed = !DeadFunctions.empty(); + DeadFunctionsInComdats.clear(); + DeadFunctions.clear(); + return Changed; +} + +void CallGraphUpdater::reanalyzeFunction(Function &Fn) { + if (CG) { + CallGraphNode *OldCGN = CG->getOrInsertFunction(&Fn); + OldCGN->removeAllCalledFunctions(); + CG->populateCallGraphNode(OldCGN); + } else if (LCG) { + LazyCallGraph::Node &N = LCG->get(Fn); + LazyCallGraph::SCC *C = LCG->lookupSCC(N); + updateCGAndAnalysisManagerForCGSCCPass(*LCG, *C, N, *AM, *UR, *FAM); + } +} + +void CallGraphUpdater::registerOutlinedFunction(Function &OriginalFn, + Function &NewFn) { + if (CG) + CG->addToCallGraph(&NewFn); + else if (LCG) + LCG->addSplitFunction(OriginalFn, NewFn); +} + +void CallGraphUpdater::removeFunction(Function &DeadFn) { + DeadFn.deleteBody(); + DeadFn.setLinkage(GlobalValue::ExternalLinkage); + if (DeadFn.hasComdat()) + DeadFunctionsInComdats.push_back(&DeadFn); + else + DeadFunctions.push_back(&DeadFn); + + // For the old call graph we remove the function from the SCC right away. + if (CG && !ReplacedFunctions.count(&DeadFn)) { + CallGraphNode *DeadCGN = (*CG)[&DeadFn]; + DeadCGN->removeAllCalledFunctions(); + CGSCC->DeleteNode(DeadCGN); + } +} + +void CallGraphUpdater::replaceFunctionWith(Function &OldFn, Function &NewFn) { + OldFn.removeDeadConstantUsers(); + ReplacedFunctions.insert(&OldFn); + if (CG) { + // Update the call graph for the newly promoted function. + CallGraphNode *OldCGN = (*CG)[&OldFn]; + CallGraphNode *NewCGN = CG->getOrInsertFunction(&NewFn); + NewCGN->stealCalledFunctionsFrom(OldCGN); + CG->ReplaceExternalCallEdge(OldCGN, NewCGN); + + // And update the SCC we're iterating as well. + CGSCC->ReplaceNode(OldCGN, NewCGN); + } else if (LCG) { + // Directly substitute the functions in the call graph. + LazyCallGraph::Node &OldLCGN = LCG->get(OldFn); + SCC->getOuterRefSCC().replaceNodeFunction(OldLCGN, NewFn); + } + removeFunction(OldFn); +} + +bool CallGraphUpdater::replaceCallSite(CallBase &OldCS, CallBase &NewCS) { + // This is only necessary in the (old) CG. + if (!CG) + return true; + + Function *Caller = OldCS.getCaller(); + CallGraphNode *NewCalleeNode = + CG->getOrInsertFunction(NewCS.getCalledFunction()); + CallGraphNode *CallerNode = (*CG)[Caller]; + if (llvm::none_of(*CallerNode, [&OldCS](const CallGraphNode::CallRecord &CR) { + return CR.first && *CR.first == &OldCS; + })) + return false; + CallerNode->replaceCallEdge(OldCS, NewCS, NewCalleeNode); + return true; +} + +void CallGraphUpdater::removeCallSite(CallBase &CS) { + // This is only necessary in the (old) CG. + if (!CG) + return; + + Function *Caller = CS.getCaller(); + CallGraphNode *CallerNode = (*CG)[Caller]; + CallerNode->removeCallEdgeFor(CS); +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/CallPromotionUtils.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/CallPromotionUtils.cpp new file mode 100644 index 0000000000..4a82f9606d --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/CallPromotionUtils.cpp @@ -0,0 +1,620 @@ +//===- CallPromotionUtils.cpp - Utilities for call promotion ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements utilities useful for promoting indirect call sites to +// direct call sites. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/CallPromotionUtils.h" +#include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/TypeMetadataUtils.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" + +using namespace llvm; + +#define DEBUG_TYPE "call-promotion-utils" + +/// Fix-up phi nodes in an invoke instruction's normal destination. +/// +/// After versioning an invoke instruction, values coming from the original +/// block will now be coming from the "merge" block. For example, in the code +/// below: +/// +/// then_bb: +/// %t0 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst +/// +/// else_bb: +/// %t1 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst +/// +/// merge_bb: +/// %t2 = phi i32 [ %t0, %then_bb ], [ %t1, %else_bb ] +/// br %normal_dst +/// +/// normal_dst: +/// %t3 = phi i32 [ %x, %orig_bb ], ... +/// +/// "orig_bb" is no longer a predecessor of "normal_dst", so the phi nodes in +/// "normal_dst" must be fixed to refer to "merge_bb": +/// +/// normal_dst: +/// %t3 = phi i32 [ %x, %merge_bb ], ... +/// +static void fixupPHINodeForNormalDest(InvokeInst *Invoke, BasicBlock *OrigBlock, + BasicBlock *MergeBlock) { + for (PHINode &Phi : Invoke->getNormalDest()->phis()) { + int Idx = Phi.getBasicBlockIndex(OrigBlock); + if (Idx == -1) + continue; + Phi.setIncomingBlock(Idx, MergeBlock); + } +} + +/// Fix-up phi nodes in an invoke instruction's unwind destination. +/// +/// After versioning an invoke instruction, values coming from the original +/// block will now be coming from either the "then" block or the "else" block. +/// For example, in the code below: +/// +/// then_bb: +/// %t0 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst +/// +/// else_bb: +/// %t1 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst +/// +/// unwind_dst: +/// %t3 = phi i32 [ %x, %orig_bb ], ... +/// +/// "orig_bb" is no longer a predecessor of "unwind_dst", so the phi nodes in +/// "unwind_dst" must be fixed to refer to "then_bb" and "else_bb": +/// +/// unwind_dst: +/// %t3 = phi i32 [ %x, %then_bb ], [ %x, %else_bb ], ... +/// +static void fixupPHINodeForUnwindDest(InvokeInst *Invoke, BasicBlock *OrigBlock, + BasicBlock *ThenBlock, + BasicBlock *ElseBlock) { + for (PHINode &Phi : Invoke->getUnwindDest()->phis()) { + int Idx = Phi.getBasicBlockIndex(OrigBlock); + if (Idx == -1) + continue; + auto *V = Phi.getIncomingValue(Idx); + Phi.setIncomingBlock(Idx, ThenBlock); + Phi.addIncoming(V, ElseBlock); + } +} + +/// Create a phi node for the returned value of a call or invoke instruction. +/// +/// After versioning a call or invoke instruction that returns a value, we have +/// to merge the value of the original and new instructions. We do this by +/// creating a phi node and replacing uses of the original instruction with this +/// phi node. +/// +/// For example, if \p OrigInst is defined in "else_bb" and \p NewInst is +/// defined in "then_bb", we create the following phi node: +/// +/// ; Uses of the original instruction are replaced by uses of the phi node. +/// %t0 = phi i32 [ %orig_inst, %else_bb ], [ %new_inst, %then_bb ], +/// +static void createRetPHINode(Instruction *OrigInst, Instruction *NewInst, + BasicBlock *MergeBlock, IRBuilder<> &Builder) { + + if (OrigInst->getType()->isVoidTy() || OrigInst->use_empty()) + return; + + Builder.SetInsertPoint(&MergeBlock->front()); + PHINode *Phi = Builder.CreatePHI(OrigInst->getType(), 0); + SmallVector<User *, 16> UsersToUpdate(OrigInst->users()); + for (User *U : UsersToUpdate) + U->replaceUsesOfWith(OrigInst, Phi); + Phi->addIncoming(OrigInst, OrigInst->getParent()); + Phi->addIncoming(NewInst, NewInst->getParent()); +} + +/// Cast a call or invoke instruction to the given type. +/// +/// When promoting a call site, the return type of the call site might not match +/// that of the callee. If this is the case, we have to cast the returned value +/// to the correct type. The location of the cast depends on if we have a call +/// or invoke instruction. +/// +/// For example, if the call instruction below requires a bitcast after +/// promotion: +/// +/// orig_bb: +/// %t0 = call i32 @func() +/// ... +/// +/// The bitcast is placed after the call instruction: +/// +/// orig_bb: +/// ; Uses of the original return value are replaced by uses of the bitcast. +/// %t0 = call i32 @func() +/// %t1 = bitcast i32 %t0 to ... +/// ... +/// +/// A similar transformation is performed for invoke instructions. However, +/// since invokes are terminating, a new block is created for the bitcast. For +/// example, if the invoke instruction below requires a bitcast after promotion: +/// +/// orig_bb: +/// %t0 = invoke i32 @func() to label %normal_dst unwind label %unwind_dst +/// +/// The edge between the original block and the invoke's normal destination is +/// split, and the bitcast is placed there: +/// +/// orig_bb: +/// %t0 = invoke i32 @func() to label %split_bb unwind label %unwind_dst +/// +/// split_bb: +/// ; Uses of the original return value are replaced by uses of the bitcast. +/// %t1 = bitcast i32 %t0 to ... +/// br label %normal_dst +/// +static void createRetBitCast(CallBase &CB, Type *RetTy, CastInst **RetBitCast) { + + // Save the users of the calling instruction. These uses will be changed to + // use the bitcast after we create it. + SmallVector<User *, 16> UsersToUpdate(CB.users()); + + // Determine an appropriate location to create the bitcast for the return + // value. The location depends on if we have a call or invoke instruction. + Instruction *InsertBefore = nullptr; + if (auto *Invoke = dyn_cast<InvokeInst>(&CB)) + InsertBefore = + &SplitEdge(Invoke->getParent(), Invoke->getNormalDest())->front(); + else + InsertBefore = &*std::next(CB.getIterator()); + + // Bitcast the return value to the correct type. + auto *Cast = CastInst::CreateBitOrPointerCast(&CB, RetTy, "", InsertBefore); + if (RetBitCast) + *RetBitCast = Cast; + + // Replace all the original uses of the calling instruction with the bitcast. + for (User *U : UsersToUpdate) + U->replaceUsesOfWith(&CB, Cast); +} + +/// Predicate and clone the given call site. +/// +/// This function creates an if-then-else structure at the location of the call +/// site. The "if" condition compares the call site's called value to the given +/// callee. The original call site is moved into the "else" block, and a clone +/// of the call site is placed in the "then" block. The cloned instruction is +/// returned. +/// +/// For example, the call instruction below: +/// +/// orig_bb: +/// %t0 = call i32 %ptr() +/// ... +/// +/// Is replace by the following: +/// +/// orig_bb: +/// %cond = icmp eq i32 ()* %ptr, @func +/// br i1 %cond, %then_bb, %else_bb +/// +/// then_bb: +/// ; The clone of the original call instruction is placed in the "then" +/// ; block. It is not yet promoted. +/// %t1 = call i32 %ptr() +/// br merge_bb +/// +/// else_bb: +/// ; The original call instruction is moved to the "else" block. +/// %t0 = call i32 %ptr() +/// br merge_bb +/// +/// merge_bb: +/// ; Uses of the original call instruction are replaced by uses of the phi +/// ; node. +/// %t2 = phi i32 [ %t0, %else_bb ], [ %t1, %then_bb ] +/// ... +/// +/// A similar transformation is performed for invoke instructions. However, +/// since invokes are terminating, more work is required. For example, the +/// invoke instruction below: +/// +/// orig_bb: +/// %t0 = invoke %ptr() to label %normal_dst unwind label %unwind_dst +/// +/// Is replace by the following: +/// +/// orig_bb: +/// %cond = icmp eq i32 ()* %ptr, @func +/// br i1 %cond, %then_bb, %else_bb +/// +/// then_bb: +/// ; The clone of the original invoke instruction is placed in the "then" +/// ; block, and its normal destination is set to the "merge" block. It is +/// ; not yet promoted. +/// %t1 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst +/// +/// else_bb: +/// ; The original invoke instruction is moved into the "else" block, and +/// ; its normal destination is set to the "merge" block. +/// %t0 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst +/// +/// merge_bb: +/// ; Uses of the original invoke instruction are replaced by uses of the +/// ; phi node, and the merge block branches to the normal destination. +/// %t2 = phi i32 [ %t0, %else_bb ], [ %t1, %then_bb ] +/// br %normal_dst +/// +/// An indirect musttail call is processed slightly differently in that: +/// 1. No merge block needed for the orginal and the cloned callsite, since +/// either one ends the flow. No phi node is needed either. +/// 2. The return statement following the original call site is duplicated too +/// and placed immediately after the cloned call site per the IR convention. +/// +/// For example, the musttail call instruction below: +/// +/// orig_bb: +/// %t0 = musttail call i32 %ptr() +/// ... +/// +/// Is replaced by the following: +/// +/// cond_bb: +/// %cond = icmp eq i32 ()* %ptr, @func +/// br i1 %cond, %then_bb, %orig_bb +/// +/// then_bb: +/// ; The clone of the original call instruction is placed in the "then" +/// ; block. It is not yet promoted. +/// %t1 = musttail call i32 %ptr() +/// ret %t1 +/// +/// orig_bb: +/// ; The original call instruction stays in its original block. +/// %t0 = musttail call i32 %ptr() +/// ret %t0 +CallBase &llvm::versionCallSite(CallBase &CB, Value *Callee, + MDNode *BranchWeights) { + + IRBuilder<> Builder(&CB); + CallBase *OrigInst = &CB; + BasicBlock *OrigBlock = OrigInst->getParent(); + + // Create the compare. The called value and callee must have the same type to + // be compared. + if (CB.getCalledOperand()->getType() != Callee->getType()) + Callee = Builder.CreateBitCast(Callee, CB.getCalledOperand()->getType()); + auto *Cond = Builder.CreateICmpEQ(CB.getCalledOperand(), Callee); + + if (OrigInst->isMustTailCall()) { + // Create an if-then structure. The original instruction stays in its block, + // and a clone of the original instruction is placed in the "then" block. + Instruction *ThenTerm = + SplitBlockAndInsertIfThen(Cond, &CB, false, BranchWeights); + BasicBlock *ThenBlock = ThenTerm->getParent(); + ThenBlock->setName("if.true.direct_targ"); + CallBase *NewInst = cast<CallBase>(OrigInst->clone()); + NewInst->insertBefore(ThenTerm); + + // Place a clone of the optional bitcast after the new call site. + Value *NewRetVal = NewInst; + auto Next = OrigInst->getNextNode(); + if (auto *BitCast = dyn_cast_or_null<BitCastInst>(Next)) { + assert(BitCast->getOperand(0) == OrigInst && + "bitcast following musttail call must use the call"); + auto NewBitCast = BitCast->clone(); + NewBitCast->replaceUsesOfWith(OrigInst, NewInst); + NewBitCast->insertBefore(ThenTerm); + NewRetVal = NewBitCast; + Next = BitCast->getNextNode(); + } + + // Place a clone of the return instruction after the new call site. + ReturnInst *Ret = dyn_cast_or_null<ReturnInst>(Next); + assert(Ret && "musttail call must precede a ret with an optional bitcast"); + auto NewRet = Ret->clone(); + if (Ret->getReturnValue()) + NewRet->replaceUsesOfWith(Ret->getReturnValue(), NewRetVal); + NewRet->insertBefore(ThenTerm); + + // A return instructions is terminating, so we don't need the terminator + // instruction just created. + ThenTerm->eraseFromParent(); + + return *NewInst; + } + + // Create an if-then-else structure. The original instruction is moved into + // the "else" block, and a clone of the original instruction is placed in the + // "then" block. + Instruction *ThenTerm = nullptr; + Instruction *ElseTerm = nullptr; + SplitBlockAndInsertIfThenElse(Cond, &CB, &ThenTerm, &ElseTerm, BranchWeights); + BasicBlock *ThenBlock = ThenTerm->getParent(); + BasicBlock *ElseBlock = ElseTerm->getParent(); + BasicBlock *MergeBlock = OrigInst->getParent(); + + ThenBlock->setName("if.true.direct_targ"); + ElseBlock->setName("if.false.orig_indirect"); + MergeBlock->setName("if.end.icp"); + + CallBase *NewInst = cast<CallBase>(OrigInst->clone()); + OrigInst->moveBefore(ElseTerm); + NewInst->insertBefore(ThenTerm); + + // If the original call site is an invoke instruction, we have extra work to + // do since invoke instructions are terminating. We have to fix-up phi nodes + // in the invoke's normal and unwind destinations. + if (auto *OrigInvoke = dyn_cast<InvokeInst>(OrigInst)) { + auto *NewInvoke = cast<InvokeInst>(NewInst); + + // Invoke instructions are terminating, so we don't need the terminator + // instructions that were just created. + ThenTerm->eraseFromParent(); + ElseTerm->eraseFromParent(); + + // Branch from the "merge" block to the original normal destination. + Builder.SetInsertPoint(MergeBlock); + Builder.CreateBr(OrigInvoke->getNormalDest()); + + // Fix-up phi nodes in the original invoke's normal and unwind destinations. + fixupPHINodeForNormalDest(OrigInvoke, OrigBlock, MergeBlock); + fixupPHINodeForUnwindDest(OrigInvoke, MergeBlock, ThenBlock, ElseBlock); + + // Now set the normal destinations of the invoke instructions to be the + // "merge" block. + OrigInvoke->setNormalDest(MergeBlock); + NewInvoke->setNormalDest(MergeBlock); + } + + // Create a phi node for the returned value of the call site. + createRetPHINode(OrigInst, NewInst, MergeBlock, Builder); + + return *NewInst; +} + +bool llvm::isLegalToPromote(const CallBase &CB, Function *Callee, + const char **FailureReason) { + assert(!CB.getCalledFunction() && "Only indirect call sites can be promoted"); + + auto &DL = Callee->getParent()->getDataLayout(); + + // Check the return type. The callee's return value type must be bitcast + // compatible with the call site's type. + Type *CallRetTy = CB.getType(); + Type *FuncRetTy = Callee->getReturnType(); + if (CallRetTy != FuncRetTy) + if (!CastInst::isBitOrNoopPointerCastable(FuncRetTy, CallRetTy, DL)) { + if (FailureReason) + *FailureReason = "Return type mismatch"; + return false; + } + + // The number of formal arguments of the callee. + unsigned NumParams = Callee->getFunctionType()->getNumParams(); + + // The number of actual arguments in the call. + unsigned NumArgs = CB.arg_size(); + + // Check the number of arguments. The callee and call site must agree on the + // number of arguments. + if (NumArgs != NumParams && !Callee->isVarArg()) { + if (FailureReason) + *FailureReason = "The number of arguments mismatch"; + return false; + } + + // Check the argument types. The callee's formal argument types must be + // bitcast compatible with the corresponding actual argument types of the call + // site. + unsigned I = 0; + for (; I < NumParams; ++I) { + // Make sure that the callee and call agree on byval/inalloca. The types do + // not have to match. + if (Callee->hasParamAttribute(I, Attribute::ByVal) != + CB.getAttributes().hasParamAttr(I, Attribute::ByVal)) { + if (FailureReason) + *FailureReason = "byval mismatch"; + return false; + } + if (Callee->hasParamAttribute(I, Attribute::InAlloca) != + CB.getAttributes().hasParamAttr(I, Attribute::InAlloca)) { + if (FailureReason) + *FailureReason = "inalloca mismatch"; + return false; + } + + Type *FormalTy = Callee->getFunctionType()->getFunctionParamType(I); + Type *ActualTy = CB.getArgOperand(I)->getType(); + if (FormalTy == ActualTy) + continue; + if (!CastInst::isBitOrNoopPointerCastable(ActualTy, FormalTy, DL)) { + if (FailureReason) + *FailureReason = "Argument type mismatch"; + return false; + } + + // MustTail call needs stricter type match. See + // Verifier::verifyMustTailCall(). + if (CB.isMustTailCall()) { + PointerType *PF = dyn_cast<PointerType>(FormalTy); + PointerType *PA = dyn_cast<PointerType>(ActualTy); + if (!PF || !PA || PF->getAddressSpace() != PA->getAddressSpace()) { + if (FailureReason) + *FailureReason = "Musttail call Argument type mismatch"; + return false; + } + } + } + for (; I < NumArgs; I++) { + // Vararg functions can have more arguments than parameters. + assert(Callee->isVarArg()); + if (CB.paramHasAttr(I, Attribute::StructRet)) { + if (FailureReason) + *FailureReason = "SRet arg to vararg function"; + return false; + } + } + + return true; +} + +CallBase &llvm::promoteCall(CallBase &CB, Function *Callee, + CastInst **RetBitCast) { + assert(!CB.getCalledFunction() && "Only indirect call sites can be promoted"); + + // Set the called function of the call site to be the given callee (but don't + // change the type). + CB.setCalledOperand(Callee); + + // Since the call site will no longer be direct, we must clear metadata that + // is only appropriate for indirect calls. This includes !prof and !callees + // metadata. + CB.setMetadata(LLVMContext::MD_prof, nullptr); + CB.setMetadata(LLVMContext::MD_callees, nullptr); + + // If the function type of the call site matches that of the callee, no + // additional work is required. + if (CB.getFunctionType() == Callee->getFunctionType()) + return CB; + + // Save the return types of the call site and callee. + Type *CallSiteRetTy = CB.getType(); + Type *CalleeRetTy = Callee->getReturnType(); + + // Change the function type of the call site the match that of the callee. + CB.mutateFunctionType(Callee->getFunctionType()); + + // Inspect the arguments of the call site. If an argument's type doesn't + // match the corresponding formal argument's type in the callee, bitcast it + // to the correct type. + auto CalleeType = Callee->getFunctionType(); + auto CalleeParamNum = CalleeType->getNumParams(); + + LLVMContext &Ctx = Callee->getContext(); + const AttributeList &CallerPAL = CB.getAttributes(); + // The new list of argument attributes. + SmallVector<AttributeSet, 4> NewArgAttrs; + bool AttributeChanged = false; + + for (unsigned ArgNo = 0; ArgNo < CalleeParamNum; ++ArgNo) { + auto *Arg = CB.getArgOperand(ArgNo); + Type *FormalTy = CalleeType->getParamType(ArgNo); + Type *ActualTy = Arg->getType(); + if (FormalTy != ActualTy) { + auto *Cast = CastInst::CreateBitOrPointerCast(Arg, FormalTy, "", &CB); + CB.setArgOperand(ArgNo, Cast); + + // Remove any incompatible attributes for the argument. + AttrBuilder ArgAttrs(Ctx, CallerPAL.getParamAttrs(ArgNo)); + ArgAttrs.remove(AttributeFuncs::typeIncompatible(FormalTy)); + + // We may have a different byval/inalloca type. + if (ArgAttrs.getByValType()) + ArgAttrs.addByValAttr(Callee->getParamByValType(ArgNo)); + if (ArgAttrs.getInAllocaType()) + ArgAttrs.addInAllocaAttr(Callee->getParamInAllocaType(ArgNo)); + + NewArgAttrs.push_back(AttributeSet::get(Ctx, ArgAttrs)); + AttributeChanged = true; + } else + NewArgAttrs.push_back(CallerPAL.getParamAttrs(ArgNo)); + } + + // If the return type of the call site doesn't match that of the callee, cast + // the returned value to the appropriate type. + // Remove any incompatible return value attribute. + AttrBuilder RAttrs(Ctx, CallerPAL.getRetAttrs()); + if (!CallSiteRetTy->isVoidTy() && CallSiteRetTy != CalleeRetTy) { + createRetBitCast(CB, CallSiteRetTy, RetBitCast); + RAttrs.remove(AttributeFuncs::typeIncompatible(CalleeRetTy)); + AttributeChanged = true; + } + + // Set the new callsite attribute. + if (AttributeChanged) + CB.setAttributes(AttributeList::get(Ctx, CallerPAL.getFnAttrs(), + AttributeSet::get(Ctx, RAttrs), + NewArgAttrs)); + + return CB; +} + +CallBase &llvm::promoteCallWithIfThenElse(CallBase &CB, Function *Callee, + MDNode *BranchWeights) { + + // Version the indirect call site. If the called value is equal to the given + // callee, 'NewInst' will be executed, otherwise the original call site will + // be executed. + CallBase &NewInst = versionCallSite(CB, Callee, BranchWeights); + + // Promote 'NewInst' so that it directly calls the desired function. + return promoteCall(NewInst, Callee); +} + +bool llvm::tryPromoteCall(CallBase &CB) { + assert(!CB.getCalledFunction()); + Module *M = CB.getCaller()->getParent(); + const DataLayout &DL = M->getDataLayout(); + Value *Callee = CB.getCalledOperand(); + + LoadInst *VTableEntryLoad = dyn_cast<LoadInst>(Callee); + if (!VTableEntryLoad) + return false; // Not a vtable entry load. + Value *VTableEntryPtr = VTableEntryLoad->getPointerOperand(); + APInt VTableOffset(DL.getTypeSizeInBits(VTableEntryPtr->getType()), 0); + Value *VTableBasePtr = VTableEntryPtr->stripAndAccumulateConstantOffsets( + DL, VTableOffset, /* AllowNonInbounds */ true); + LoadInst *VTablePtrLoad = dyn_cast<LoadInst>(VTableBasePtr); + if (!VTablePtrLoad) + return false; // Not a vtable load. + Value *Object = VTablePtrLoad->getPointerOperand(); + APInt ObjectOffset(DL.getTypeSizeInBits(Object->getType()), 0); + Value *ObjectBase = Object->stripAndAccumulateConstantOffsets( + DL, ObjectOffset, /* AllowNonInbounds */ true); + if (!(isa<AllocaInst>(ObjectBase) && ObjectOffset == 0)) + // Not an Alloca or the offset isn't zero. + return false; + + // Look for the vtable pointer store into the object by the ctor. + BasicBlock::iterator BBI(VTablePtrLoad); + Value *VTablePtr = FindAvailableLoadedValue( + VTablePtrLoad, VTablePtrLoad->getParent(), BBI, 0, nullptr, nullptr); + if (!VTablePtr) + return false; // No vtable found. + APInt VTableOffsetGVBase(DL.getTypeSizeInBits(VTablePtr->getType()), 0); + Value *VTableGVBase = VTablePtr->stripAndAccumulateConstantOffsets( + DL, VTableOffsetGVBase, /* AllowNonInbounds */ true); + GlobalVariable *GV = dyn_cast<GlobalVariable>(VTableGVBase); + if (!(GV && GV->isConstant() && GV->hasDefinitiveInitializer())) + // Not in the form of a global constant variable with an initializer. + return false; + + Constant *VTableGVInitializer = GV->getInitializer(); + APInt VTableGVOffset = VTableOffsetGVBase + VTableOffset; + if (!(VTableGVOffset.getActiveBits() <= 64)) + return false; // Out of range. + Constant *Ptr = getPointerAtOffset(VTableGVInitializer, + VTableGVOffset.getZExtValue(), + *M); + if (!Ptr) + return false; // No constant (function) pointer found. + Function *DirectCallee = dyn_cast<Function>(Ptr->stripPointerCasts()); + if (!DirectCallee) + return false; // No function pointer found. + + if (!isLegalToPromote(CB, DirectCallee)) + return false; + + // Success. + promoteCall(CB, DirectCallee); + return true; +} + +#undef DEBUG_TYPE diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/CanonicalizeAliases.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/CanonicalizeAliases.cpp new file mode 100644 index 0000000000..4d622679db --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/CanonicalizeAliases.cpp @@ -0,0 +1,76 @@ +//===- CanonicalizeAliases.cpp - ThinLTO Support: Canonicalize Aliases ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Currently this file implements partial alias canonicalization, to +// flatten chains of aliases (also done by GlobalOpt, but not on for +// O0 compiles). E.g. +// @a = alias i8, i8 *@b +// @b = alias i8, i8 *@g +// +// will be converted to: +// @a = alias i8, i8 *@g <-- @a is now an alias to base object @g +// @b = alias i8, i8 *@g +// +// Eventually this file will implement full alias canonicalization, so that +// all aliasees are private anonymous values. E.g. +// @a = alias i8, i8 *@g +// @g = global i8 0 +// +// will be converted to: +// @0 = private global +// @a = alias i8, i8* @0 +// @g = alias i8, i8* @0 +// +// This simplifies optimization and ThinLTO linking of the original symbols. +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/CanonicalizeAliases.h" +#include "llvm/IR/Constants.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" + +using namespace llvm; + +namespace { + +static Constant *canonicalizeAlias(Constant *C, bool &Changed) { + if (auto *GA = dyn_cast<GlobalAlias>(C)) { + auto *NewAliasee = canonicalizeAlias(GA->getAliasee(), Changed); + if (NewAliasee != GA->getAliasee()) { + GA->setAliasee(NewAliasee); + Changed = true; + } + return NewAliasee; + } + + auto *CE = dyn_cast<ConstantExpr>(C); + if (!CE) + return C; + + std::vector<Constant *> Ops; + for (Use &U : CE->operands()) + Ops.push_back(canonicalizeAlias(cast<Constant>(U), Changed)); + return CE->getWithOperands(Ops); +} + +/// Convert aliases to canonical form. +static bool canonicalizeAliases(Module &M) { + bool Changed = false; + for (auto &GA : M.aliases()) + canonicalizeAlias(&GA, Changed); + return Changed; +} +} // anonymous namespace + +PreservedAnalyses CanonicalizeAliasesPass::run(Module &M, + ModuleAnalysisManager &AM) { + if (!canonicalizeAliases(M)) + return PreservedAnalyses::all(); + + return PreservedAnalyses::none(); +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp new file mode 100644 index 0000000000..a1ee3df907 --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp @@ -0,0 +1,248 @@ +//==- CanonicalizeFreezeInLoops - Canonicalize freezes in a loop-*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass canonicalizes freeze instructions in a loop by pushing them out to +// the preheader. +// +// loop: +// i = phi init, i.next +// i.next = add nsw i, 1 +// i.next.fr = freeze i.next // push this out of this loop +// use(i.next.fr) +// br i1 (i.next <= N), loop, exit +// => +// init.fr = freeze init +// loop: +// i = phi init.fr, i.next +// i.next = add i, 1 // nsw is dropped here +// use(i.next) +// br i1 (i.next <= N), loop, exit +// +// Removing freezes from these chains help scalar evolution successfully analyze +// expressions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/CanonicalizeFreezeInLoops.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/IVDescriptors.h" +#include "llvm/Analysis/LoopAnalysisManager.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Dominators.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Transforms/Utils.h" + +using namespace llvm; + +#define DEBUG_TYPE "canon-freeze" + +namespace { + +class CanonicalizeFreezeInLoops : public LoopPass { +public: + static char ID; + + CanonicalizeFreezeInLoops(); + +private: + bool runOnLoop(Loop *L, LPPassManager &LPM) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; +}; + +class CanonicalizeFreezeInLoopsImpl { + Loop *L; + ScalarEvolution &SE; + DominatorTree &DT; + + struct FrozenIndPHIInfo { + // A freeze instruction that uses an induction phi + FreezeInst *FI = nullptr; + // The induction phi, step instruction, the operand idx of StepInst which is + // a step value + PHINode *PHI; + BinaryOperator *StepInst; + unsigned StepValIdx = 0; + + FrozenIndPHIInfo(PHINode *PHI, BinaryOperator *StepInst) + : PHI(PHI), StepInst(StepInst) {} + }; + + // Can freeze instruction be pushed into operands of I? + // In order to do this, I should not create a poison after I's flags are + // stripped. + bool canHandleInst(const Instruction *I) { + auto Opc = I->getOpcode(); + // If add/sub/mul, drop nsw/nuw flags. + return Opc == Instruction::Add || Opc == Instruction::Sub || + Opc == Instruction::Mul; + } + + void InsertFreezeAndForgetFromSCEV(Use &U); + +public: + CanonicalizeFreezeInLoopsImpl(Loop *L, ScalarEvolution &SE, DominatorTree &DT) + : L(L), SE(SE), DT(DT) {} + bool run(); +}; + +} // anonymous namespace + +// Given U = (value, user), replace value with freeze(value), and let +// SCEV forget user. The inserted freeze is placed in the preheader. +void CanonicalizeFreezeInLoopsImpl::InsertFreezeAndForgetFromSCEV(Use &U) { + auto *PH = L->getLoopPreheader(); + + auto *UserI = cast<Instruction>(U.getUser()); + auto *ValueToFr = U.get(); + assert(L->contains(UserI->getParent()) && + "Should not process an instruction that isn't inside the loop"); + if (isGuaranteedNotToBeUndefOrPoison(ValueToFr, nullptr, UserI, &DT)) + return; + + LLVM_DEBUG(dbgs() << "canonfr: inserting freeze:\n"); + LLVM_DEBUG(dbgs() << "\tUser: " << *U.getUser() << "\n"); + LLVM_DEBUG(dbgs() << "\tOperand: " << *U.get() << "\n"); + + U.set(new FreezeInst(ValueToFr, ValueToFr->getName() + ".frozen", + PH->getTerminator())); + + SE.forgetValue(UserI); +} + +bool CanonicalizeFreezeInLoopsImpl::run() { + // The loop should be in LoopSimplify form. + if (!L->isLoopSimplifyForm()) + return false; + + SmallVector<FrozenIndPHIInfo, 4> Candidates; + + for (auto &PHI : L->getHeader()->phis()) { + InductionDescriptor ID; + if (!InductionDescriptor::isInductionPHI(&PHI, L, &SE, ID)) + continue; + + LLVM_DEBUG(dbgs() << "canonfr: PHI: " << PHI << "\n"); + FrozenIndPHIInfo Info(&PHI, ID.getInductionBinOp()); + if (!Info.StepInst || !canHandleInst(Info.StepInst)) { + // The stepping instruction has unknown form. + // Ignore this PHI. + continue; + } + + Info.StepValIdx = Info.StepInst->getOperand(0) == &PHI; + Value *StepV = Info.StepInst->getOperand(Info.StepValIdx); + if (auto *StepI = dyn_cast<Instruction>(StepV)) { + if (L->contains(StepI->getParent())) { + // The step value is inside the loop. Freezing step value will introduce + // another freeze into the loop, so skip this PHI. + continue; + } + } + + auto Visit = [&](User *U) { + if (auto *FI = dyn_cast<FreezeInst>(U)) { + LLVM_DEBUG(dbgs() << "canonfr: found: " << *FI << "\n"); + Info.FI = FI; + Candidates.push_back(Info); + } + }; + for_each(PHI.users(), Visit); + for_each(Info.StepInst->users(), Visit); + } + + if (Candidates.empty()) + return false; + + SmallSet<PHINode *, 8> ProcessedPHIs; + for (const auto &Info : Candidates) { + PHINode *PHI = Info.PHI; + if (!ProcessedPHIs.insert(Info.PHI).second) + continue; + + BinaryOperator *StepI = Info.StepInst; + assert(StepI && "Step instruction should have been found"); + + // Drop flags from the step instruction. + if (!isGuaranteedNotToBeUndefOrPoison(StepI, nullptr, StepI, &DT)) { + LLVM_DEBUG(dbgs() << "canonfr: drop flags: " << *StepI << "\n"); + StepI->dropPoisonGeneratingFlags(); + SE.forgetValue(StepI); + } + + InsertFreezeAndForgetFromSCEV(StepI->getOperandUse(Info.StepValIdx)); + + unsigned OperandIdx = + PHI->getOperandNumForIncomingValue(PHI->getIncomingValue(0) == StepI); + InsertFreezeAndForgetFromSCEV(PHI->getOperandUse(OperandIdx)); + } + + // Finally, remove the old freeze instructions. + for (const auto &Item : Candidates) { + auto *FI = Item.FI; + LLVM_DEBUG(dbgs() << "canonfr: removing " << *FI << "\n"); + SE.forgetValue(FI); + FI->replaceAllUsesWith(FI->getOperand(0)); + FI->eraseFromParent(); + } + + return true; +} + +CanonicalizeFreezeInLoops::CanonicalizeFreezeInLoops() : LoopPass(ID) { + initializeCanonicalizeFreezeInLoopsPass(*PassRegistry::getPassRegistry()); +} + +void CanonicalizeFreezeInLoops::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreservedID(LoopSimplifyID); + AU.addRequired<LoopInfoWrapperPass>(); + AU.addPreserved<LoopInfoWrapperPass>(); + AU.addRequiredID(LoopSimplifyID); + AU.addRequired<ScalarEvolutionWrapperPass>(); + AU.addPreserved<ScalarEvolutionWrapperPass>(); + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addPreserved<DominatorTreeWrapperPass>(); +} + +bool CanonicalizeFreezeInLoops::runOnLoop(Loop *L, LPPassManager &) { + if (skipLoop(L)) + return false; + + auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE(); + auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + return CanonicalizeFreezeInLoopsImpl(L, SE, DT).run(); +} + +PreservedAnalyses +CanonicalizeFreezeInLoopsPass::run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, + LPMUpdater &U) { + if (!CanonicalizeFreezeInLoopsImpl(&L, AR.SE, AR.DT).run()) + return PreservedAnalyses::all(); + + return getLoopPassPreservedAnalyses(); +} + +INITIALIZE_PASS_BEGIN(CanonicalizeFreezeInLoops, "canon-freeze", + "Canonicalize Freeze Instructions in Loops", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopSimplify) +INITIALIZE_PASS_END(CanonicalizeFreezeInLoops, "canon-freeze", + "Canonicalize Freeze Instructions in Loops", false, false) + +Pass *llvm::createCanonicalizeFreezeInLoopsPass() { + return new CanonicalizeFreezeInLoops(); +} + +char CanonicalizeFreezeInLoops::ID = 0; diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/CloneFunction.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/CloneFunction.cpp new file mode 100644 index 0000000000..87822ee85c --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/CloneFunction.cpp @@ -0,0 +1,1194 @@ +//===- CloneFunction.cpp - Clone a function into another function ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the CloneFunctionInto interface, which is used as the +// low-level function cloner. This is used by the CloneFunction and function +// inliner to do the dirty work of copying the body of a function around. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/DomTreeUpdater.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/ValueMapper.h" +#include <map> +#include <optional> +using namespace llvm; + +#define DEBUG_TYPE "clone-function" + +/// See comments in Cloning.h. +BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, + const Twine &NameSuffix, Function *F, + ClonedCodeInfo *CodeInfo, + DebugInfoFinder *DIFinder) { + BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "", F); + if (BB->hasName()) + NewBB->setName(BB->getName() + NameSuffix); + + bool hasCalls = false, hasDynamicAllocas = false, hasMemProfMetadata = false; + Module *TheModule = F ? F->getParent() : nullptr; + + // Loop over all instructions, and copy them over. + for (const Instruction &I : *BB) { + if (DIFinder && TheModule) + DIFinder->processInstruction(*TheModule, I); + + Instruction *NewInst = I.clone(); + if (I.hasName()) + NewInst->setName(I.getName() + NameSuffix); + NewInst->insertInto(NewBB, NewBB->end()); + VMap[&I] = NewInst; // Add instruction map to value. + + if (isa<CallInst>(I) && !I.isDebugOrPseudoInst()) { + hasCalls = true; + hasMemProfMetadata |= I.hasMetadata(LLVMContext::MD_memprof); + } + if (const AllocaInst *AI = dyn_cast<AllocaInst>(&I)) { + if (!AI->isStaticAlloca()) { + hasDynamicAllocas = true; + } + } + } + + if (CodeInfo) { + CodeInfo->ContainsCalls |= hasCalls; + CodeInfo->ContainsMemProfMetadata |= hasMemProfMetadata; + CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas; + } + return NewBB; +} + +// Clone OldFunc into NewFunc, transforming the old arguments into references to +// VMap values. +// +void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, + ValueToValueMapTy &VMap, + CloneFunctionChangeType Changes, + SmallVectorImpl<ReturnInst *> &Returns, + const char *NameSuffix, ClonedCodeInfo *CodeInfo, + ValueMapTypeRemapper *TypeMapper, + ValueMaterializer *Materializer) { + assert(NameSuffix && "NameSuffix cannot be null!"); + +#ifndef NDEBUG + for (const Argument &I : OldFunc->args()) + assert(VMap.count(&I) && "No mapping from source argument specified!"); +#endif + + bool ModuleLevelChanges = Changes > CloneFunctionChangeType::LocalChangesOnly; + + // Copy all attributes other than those stored in the AttributeList. We need + // to remap the parameter indices of the AttributeList. + AttributeList NewAttrs = NewFunc->getAttributes(); + NewFunc->copyAttributesFrom(OldFunc); + NewFunc->setAttributes(NewAttrs); + + const RemapFlags FuncGlobalRefFlags = + ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges; + + // Fix up the personality function that got copied over. + if (OldFunc->hasPersonalityFn()) + NewFunc->setPersonalityFn(MapValue(OldFunc->getPersonalityFn(), VMap, + FuncGlobalRefFlags, TypeMapper, + Materializer)); + + if (OldFunc->hasPrefixData()) { + NewFunc->setPrefixData(MapValue(OldFunc->getPrefixData(), VMap, + FuncGlobalRefFlags, TypeMapper, + Materializer)); + } + + if (OldFunc->hasPrologueData()) { + NewFunc->setPrologueData(MapValue(OldFunc->getPrologueData(), VMap, + FuncGlobalRefFlags, TypeMapper, + Materializer)); + } + + SmallVector<AttributeSet, 4> NewArgAttrs(NewFunc->arg_size()); + AttributeList OldAttrs = OldFunc->getAttributes(); + + // Clone any argument attributes that are present in the VMap. + for (const Argument &OldArg : OldFunc->args()) { + if (Argument *NewArg = dyn_cast<Argument>(VMap[&OldArg])) { + NewArgAttrs[NewArg->getArgNo()] = + OldAttrs.getParamAttrs(OldArg.getArgNo()); + } + } + + NewFunc->setAttributes( + AttributeList::get(NewFunc->getContext(), OldAttrs.getFnAttrs(), + OldAttrs.getRetAttrs(), NewArgAttrs)); + + // Everything else beyond this point deals with function instructions, + // so if we are dealing with a function declaration, we're done. + if (OldFunc->isDeclaration()) + return; + + // When we remap instructions within the same module, we want to avoid + // duplicating inlined DISubprograms, so record all subprograms we find as we + // duplicate instructions and then freeze them in the MD map. We also record + // information about dbg.value and dbg.declare to avoid duplicating the + // types. + std::optional<DebugInfoFinder> DIFinder; + + // Track the subprogram attachment that needs to be cloned to fine-tune the + // mapping within the same module. + DISubprogram *SPClonedWithinModule = nullptr; + if (Changes < CloneFunctionChangeType::DifferentModule) { + assert((NewFunc->getParent() == nullptr || + NewFunc->getParent() == OldFunc->getParent()) && + "Expected NewFunc to have the same parent, or no parent"); + + // Need to find subprograms, types, and compile units. + DIFinder.emplace(); + + SPClonedWithinModule = OldFunc->getSubprogram(); + if (SPClonedWithinModule) + DIFinder->processSubprogram(SPClonedWithinModule); + } else { + assert((NewFunc->getParent() == nullptr || + NewFunc->getParent() != OldFunc->getParent()) && + "Expected NewFunc to have different parents, or no parent"); + + if (Changes == CloneFunctionChangeType::DifferentModule) { + assert(NewFunc->getParent() && + "Need parent of new function to maintain debug info invariants"); + + // Need to find all the compile units. + DIFinder.emplace(); + } + } + + // Loop over all of the basic blocks in the function, cloning them as + // appropriate. Note that we save BE this way in order to handle cloning of + // recursive functions into themselves. + for (const BasicBlock &BB : *OldFunc) { + + // Create a new basic block and copy instructions into it! + BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc, CodeInfo, + DIFinder ? &*DIFinder : nullptr); + + // Add basic block mapping. + VMap[&BB] = CBB; + + // It is only legal to clone a function if a block address within that + // function is never referenced outside of the function. Given that, we + // want to map block addresses from the old function to block addresses in + // the clone. (This is different from the generic ValueMapper + // implementation, which generates an invalid blockaddress when + // cloning a function.) + if (BB.hasAddressTaken()) { + Constant *OldBBAddr = BlockAddress::get(const_cast<Function *>(OldFunc), + const_cast<BasicBlock *>(&BB)); + VMap[OldBBAddr] = BlockAddress::get(NewFunc, CBB); + } + + // Note return instructions for the caller. + if (ReturnInst *RI = dyn_cast<ReturnInst>(CBB->getTerminator())) + Returns.push_back(RI); + } + + if (Changes < CloneFunctionChangeType::DifferentModule && + DIFinder->subprogram_count() > 0) { + // Turn on module-level changes, since we need to clone (some of) the + // debug info metadata. + // + // FIXME: Metadata effectively owned by a function should be made + // local, and only that local metadata should be cloned. + ModuleLevelChanges = true; + + auto mapToSelfIfNew = [&VMap](MDNode *N) { + // Avoid clobbering an existing mapping. + (void)VMap.MD().try_emplace(N, N); + }; + + // Avoid cloning types, compile units, and (other) subprograms. + SmallPtrSet<const DISubprogram *, 16> MappedToSelfSPs; + for (DISubprogram *ISP : DIFinder->subprograms()) { + if (ISP != SPClonedWithinModule) { + mapToSelfIfNew(ISP); + MappedToSelfSPs.insert(ISP); + } + } + + // If a subprogram isn't going to be cloned skip its lexical blocks as well. + for (DIScope *S : DIFinder->scopes()) { + auto *LScope = dyn_cast<DILocalScope>(S); + if (LScope && MappedToSelfSPs.count(LScope->getSubprogram())) + mapToSelfIfNew(S); + } + + for (DICompileUnit *CU : DIFinder->compile_units()) + mapToSelfIfNew(CU); + + for (DIType *Type : DIFinder->types()) + mapToSelfIfNew(Type); + } else { + assert(!SPClonedWithinModule && + "Subprogram should be in DIFinder->subprogram_count()..."); + } + + const auto RemapFlag = ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges; + // Duplicate the metadata that is attached to the cloned function. + // Subprograms/CUs/types that were already mapped to themselves won't be + // duplicated. + SmallVector<std::pair<unsigned, MDNode *>, 1> MDs; + OldFunc->getAllMetadata(MDs); + for (auto MD : MDs) { + NewFunc->addMetadata(MD.first, *MapMetadata(MD.second, VMap, RemapFlag, + TypeMapper, Materializer)); + } + + // Loop over all of the instructions in the new function, fixing up operand + // references as we go. This uses VMap to do all the hard work. + for (Function::iterator + BB = cast<BasicBlock>(VMap[&OldFunc->front()])->getIterator(), + BE = NewFunc->end(); + BB != BE; ++BB) + // Loop over all instructions, fixing each one as we find it... + for (Instruction &II : *BB) + RemapInstruction(&II, VMap, RemapFlag, TypeMapper, Materializer); + + // Only update !llvm.dbg.cu for DifferentModule (not CloneModule). In the + // same module, the compile unit will already be listed (or not). When + // cloning a module, CloneModule() will handle creating the named metadata. + if (Changes != CloneFunctionChangeType::DifferentModule) + return; + + // Update !llvm.dbg.cu with compile units added to the new module if this + // function is being cloned in isolation. + // + // FIXME: This is making global / module-level changes, which doesn't seem + // like the right encapsulation Consider dropping the requirement to update + // !llvm.dbg.cu (either obsoleting the node, or restricting it to + // non-discardable compile units) instead of discovering compile units by + // visiting the metadata attached to global values, which would allow this + // code to be deleted. Alternatively, perhaps give responsibility for this + // update to CloneFunctionInto's callers. + auto *NewModule = NewFunc->getParent(); + auto *NMD = NewModule->getOrInsertNamedMetadata("llvm.dbg.cu"); + // Avoid multiple insertions of the same DICompileUnit to NMD. + SmallPtrSet<const void *, 8> Visited; + for (auto *Operand : NMD->operands()) + Visited.insert(Operand); + for (auto *Unit : DIFinder->compile_units()) { + MDNode *MappedUnit = + MapMetadata(Unit, VMap, RF_None, TypeMapper, Materializer); + if (Visited.insert(MappedUnit).second) + NMD->addOperand(MappedUnit); + } +} + +/// Return a copy of the specified function and add it to that function's +/// module. Also, any references specified in the VMap are changed to refer to +/// their mapped value instead of the original one. If any of the arguments to +/// the function are in the VMap, the arguments are deleted from the resultant +/// function. The VMap is updated to include mappings from all of the +/// instructions and basicblocks in the function from their old to new values. +/// +Function *llvm::CloneFunction(Function *F, ValueToValueMapTy &VMap, + ClonedCodeInfo *CodeInfo) { + std::vector<Type *> ArgTypes; + + // The user might be deleting arguments to the function by specifying them in + // the VMap. If so, we need to not add the arguments to the arg ty vector + // + for (const Argument &I : F->args()) + if (VMap.count(&I) == 0) // Haven't mapped the argument to anything yet? + ArgTypes.push_back(I.getType()); + + // Create a new function type... + FunctionType *FTy = + FunctionType::get(F->getFunctionType()->getReturnType(), ArgTypes, + F->getFunctionType()->isVarArg()); + + // Create the new function... + Function *NewF = Function::Create(FTy, F->getLinkage(), F->getAddressSpace(), + F->getName(), F->getParent()); + + // Loop over the arguments, copying the names of the mapped arguments over... + Function::arg_iterator DestI = NewF->arg_begin(); + for (const Argument &I : F->args()) + if (VMap.count(&I) == 0) { // Is this argument preserved? + DestI->setName(I.getName()); // Copy the name over... + VMap[&I] = &*DestI++; // Add mapping to VMap + } + + SmallVector<ReturnInst *, 8> Returns; // Ignore returns cloned. + CloneFunctionInto(NewF, F, VMap, CloneFunctionChangeType::LocalChangesOnly, + Returns, "", CodeInfo); + + return NewF; +} + +namespace { +/// This is a private class used to implement CloneAndPruneFunctionInto. +struct PruningFunctionCloner { + Function *NewFunc; + const Function *OldFunc; + ValueToValueMapTy &VMap; + bool ModuleLevelChanges; + const char *NameSuffix; + ClonedCodeInfo *CodeInfo; + bool HostFuncIsStrictFP; + + Instruction *cloneInstruction(BasicBlock::const_iterator II); + +public: + PruningFunctionCloner(Function *newFunc, const Function *oldFunc, + ValueToValueMapTy &valueMap, bool moduleLevelChanges, + const char *nameSuffix, ClonedCodeInfo *codeInfo) + : NewFunc(newFunc), OldFunc(oldFunc), VMap(valueMap), + ModuleLevelChanges(moduleLevelChanges), NameSuffix(nameSuffix), + CodeInfo(codeInfo) { + HostFuncIsStrictFP = + newFunc->getAttributes().hasFnAttr(Attribute::StrictFP); + } + + /// The specified block is found to be reachable, clone it and + /// anything that it can reach. + void CloneBlock(const BasicBlock *BB, BasicBlock::const_iterator StartingInst, + std::vector<const BasicBlock *> &ToClone); +}; +} // namespace + +static bool hasRoundingModeOperand(Intrinsic::ID CIID) { + switch (CIID) { +#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \ + case Intrinsic::INTRINSIC: \ + return ROUND_MODE == 1; +#define FUNCTION INSTRUCTION +#include "llvm/IR/ConstrainedOps.def" + default: + llvm_unreachable("Unexpected constrained intrinsic id"); + } +} + +Instruction * +PruningFunctionCloner::cloneInstruction(BasicBlock::const_iterator II) { + const Instruction &OldInst = *II; + Instruction *NewInst = nullptr; + if (HostFuncIsStrictFP) { + Intrinsic::ID CIID = getConstrainedIntrinsicID(OldInst); + if (CIID != Intrinsic::not_intrinsic) { + // Instead of cloning the instruction, a call to constrained intrinsic + // should be created. + // Assume the first arguments of constrained intrinsics are the same as + // the operands of original instruction. + + // Determine overloaded types of the intrinsic. + SmallVector<Type *, 2> TParams; + SmallVector<Intrinsic::IITDescriptor, 8> Descriptor; + getIntrinsicInfoTableEntries(CIID, Descriptor); + for (unsigned I = 0, E = Descriptor.size(); I != E; ++I) { + Intrinsic::IITDescriptor Operand = Descriptor[I]; + switch (Operand.Kind) { + case Intrinsic::IITDescriptor::Argument: + if (Operand.getArgumentKind() != + Intrinsic::IITDescriptor::AK_MatchType) { + if (I == 0) + TParams.push_back(OldInst.getType()); + else + TParams.push_back(OldInst.getOperand(I - 1)->getType()); + } + break; + case Intrinsic::IITDescriptor::SameVecWidthArgument: + ++I; + break; + default: + break; + } + } + + // Create intrinsic call. + LLVMContext &Ctx = NewFunc->getContext(); + Function *IFn = + Intrinsic::getDeclaration(NewFunc->getParent(), CIID, TParams); + SmallVector<Value *, 4> Args; + unsigned NumOperands = OldInst.getNumOperands(); + if (isa<CallInst>(OldInst)) + --NumOperands; + for (unsigned I = 0; I < NumOperands; ++I) { + Value *Op = OldInst.getOperand(I); + Args.push_back(Op); + } + if (const auto *CmpI = dyn_cast<FCmpInst>(&OldInst)) { + FCmpInst::Predicate Pred = CmpI->getPredicate(); + StringRef PredName = FCmpInst::getPredicateName(Pred); + Args.push_back(MetadataAsValue::get(Ctx, MDString::get(Ctx, PredName))); + } + + // The last arguments of a constrained intrinsic are metadata that + // represent rounding mode (absents in some intrinsics) and exception + // behavior. The inlined function uses default settings. + if (hasRoundingModeOperand(CIID)) + Args.push_back( + MetadataAsValue::get(Ctx, MDString::get(Ctx, "round.tonearest"))); + Args.push_back( + MetadataAsValue::get(Ctx, MDString::get(Ctx, "fpexcept.ignore"))); + + NewInst = CallInst::Create(IFn, Args, OldInst.getName() + ".strict"); + } + } + if (!NewInst) + NewInst = II->clone(); + return NewInst; +} + +/// The specified block is found to be reachable, clone it and +/// anything that it can reach. +void PruningFunctionCloner::CloneBlock( + const BasicBlock *BB, BasicBlock::const_iterator StartingInst, + std::vector<const BasicBlock *> &ToClone) { + WeakTrackingVH &BBEntry = VMap[BB]; + + // Have we already cloned this block? + if (BBEntry) + return; + + // Nope, clone it now. + BasicBlock *NewBB; + BBEntry = NewBB = BasicBlock::Create(BB->getContext()); + if (BB->hasName()) + NewBB->setName(BB->getName() + NameSuffix); + + // It is only legal to clone a function if a block address within that + // function is never referenced outside of the function. Given that, we + // want to map block addresses from the old function to block addresses in + // the clone. (This is different from the generic ValueMapper + // implementation, which generates an invalid blockaddress when + // cloning a function.) + // + // Note that we don't need to fix the mapping for unreachable blocks; + // the default mapping there is safe. + if (BB->hasAddressTaken()) { + Constant *OldBBAddr = BlockAddress::get(const_cast<Function *>(OldFunc), + const_cast<BasicBlock *>(BB)); + VMap[OldBBAddr] = BlockAddress::get(NewFunc, NewBB); + } + + bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false; + bool hasMemProfMetadata = false; + + // Loop over all instructions, and copy them over, DCE'ing as we go. This + // loop doesn't include the terminator. + for (BasicBlock::const_iterator II = StartingInst, IE = --BB->end(); II != IE; + ++II) { + + Instruction *NewInst = cloneInstruction(II); + + if (HostFuncIsStrictFP) { + // All function calls in the inlined function must get 'strictfp' + // attribute to prevent undesirable optimizations. + if (auto *Call = dyn_cast<CallInst>(NewInst)) + Call->addFnAttr(Attribute::StrictFP); + } + + // Eagerly remap operands to the newly cloned instruction, except for PHI + // nodes for which we defer processing until we update the CFG. Also defer + // debug intrinsic processing because they may contain use-before-defs. + if (!isa<PHINode>(NewInst) && !isa<DbgVariableIntrinsic>(NewInst)) { + RemapInstruction(NewInst, VMap, + ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges); + + // If we can simplify this instruction to some other value, simply add + // a mapping to that value rather than inserting a new instruction into + // the basic block. + if (Value *V = + simplifyInstruction(NewInst, BB->getModule()->getDataLayout())) { + // On the off-chance that this simplifies to an instruction in the old + // function, map it back into the new function. + if (NewFunc != OldFunc) + if (Value *MappedV = VMap.lookup(V)) + V = MappedV; + + if (!NewInst->mayHaveSideEffects()) { + VMap[&*II] = V; + NewInst->deleteValue(); + continue; + } + } + } + + if (II->hasName()) + NewInst->setName(II->getName() + NameSuffix); + VMap[&*II] = NewInst; // Add instruction map to value. + NewInst->insertInto(NewBB, NewBB->end()); + if (isa<CallInst>(II) && !II->isDebugOrPseudoInst()) { + hasCalls = true; + hasMemProfMetadata |= II->hasMetadata(LLVMContext::MD_memprof); + } + + if (CodeInfo) { + CodeInfo->OrigVMap[&*II] = NewInst; + if (auto *CB = dyn_cast<CallBase>(&*II)) + if (CB->hasOperandBundles()) + CodeInfo->OperandBundleCallSites.push_back(NewInst); + } + + if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) { + if (isa<ConstantInt>(AI->getArraySize())) + hasStaticAllocas = true; + else + hasDynamicAllocas = true; + } + } + + // Finally, clone over the terminator. + const Instruction *OldTI = BB->getTerminator(); + bool TerminatorDone = false; + if (const BranchInst *BI = dyn_cast<BranchInst>(OldTI)) { + if (BI->isConditional()) { + // If the condition was a known constant in the callee... + ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition()); + // Or is a known constant in the caller... + if (!Cond) { + Value *V = VMap.lookup(BI->getCondition()); + Cond = dyn_cast_or_null<ConstantInt>(V); + } + + // Constant fold to uncond branch! + if (Cond) { + BasicBlock *Dest = BI->getSuccessor(!Cond->getZExtValue()); + VMap[OldTI] = BranchInst::Create(Dest, NewBB); + ToClone.push_back(Dest); + TerminatorDone = true; + } + } + } else if (const SwitchInst *SI = dyn_cast<SwitchInst>(OldTI)) { + // If switching on a value known constant in the caller. + ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition()); + if (!Cond) { // Or known constant after constant prop in the callee... + Value *V = VMap.lookup(SI->getCondition()); + Cond = dyn_cast_or_null<ConstantInt>(V); + } + if (Cond) { // Constant fold to uncond branch! + SwitchInst::ConstCaseHandle Case = *SI->findCaseValue(Cond); + BasicBlock *Dest = const_cast<BasicBlock *>(Case.getCaseSuccessor()); + VMap[OldTI] = BranchInst::Create(Dest, NewBB); + ToClone.push_back(Dest); + TerminatorDone = true; + } + } + + if (!TerminatorDone) { + Instruction *NewInst = OldTI->clone(); + if (OldTI->hasName()) + NewInst->setName(OldTI->getName() + NameSuffix); + NewInst->insertInto(NewBB, NewBB->end()); + VMap[OldTI] = NewInst; // Add instruction map to value. + + if (CodeInfo) { + CodeInfo->OrigVMap[OldTI] = NewInst; + if (auto *CB = dyn_cast<CallBase>(OldTI)) + if (CB->hasOperandBundles()) + CodeInfo->OperandBundleCallSites.push_back(NewInst); + } + + // Recursively clone any reachable successor blocks. + append_range(ToClone, successors(BB->getTerminator())); + } + + if (CodeInfo) { + CodeInfo->ContainsCalls |= hasCalls; + CodeInfo->ContainsMemProfMetadata |= hasMemProfMetadata; + CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas; + CodeInfo->ContainsDynamicAllocas |= + hasStaticAllocas && BB != &BB->getParent()->front(); + } +} + +/// This works like CloneAndPruneFunctionInto, except that it does not clone the +/// entire function. Instead it starts at an instruction provided by the caller +/// and copies (and prunes) only the code reachable from that instruction. +void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, + const Instruction *StartingInst, + ValueToValueMapTy &VMap, + bool ModuleLevelChanges, + SmallVectorImpl<ReturnInst *> &Returns, + const char *NameSuffix, + ClonedCodeInfo *CodeInfo) { + assert(NameSuffix && "NameSuffix cannot be null!"); + + ValueMapTypeRemapper *TypeMapper = nullptr; + ValueMaterializer *Materializer = nullptr; + +#ifndef NDEBUG + // If the cloning starts at the beginning of the function, verify that + // the function arguments are mapped. + if (!StartingInst) + for (const Argument &II : OldFunc->args()) + assert(VMap.count(&II) && "No mapping from source argument specified!"); +#endif + + PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges, + NameSuffix, CodeInfo); + const BasicBlock *StartingBB; + if (StartingInst) + StartingBB = StartingInst->getParent(); + else { + StartingBB = &OldFunc->getEntryBlock(); + StartingInst = &StartingBB->front(); + } + + // Collect debug intrinsics for remapping later. + SmallVector<const DbgVariableIntrinsic *, 8> DbgIntrinsics; + for (const auto &BB : *OldFunc) { + for (const auto &I : BB) { + if (const auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I)) + DbgIntrinsics.push_back(DVI); + } + } + + // Clone the entry block, and anything recursively reachable from it. + std::vector<const BasicBlock *> CloneWorklist; + PFC.CloneBlock(StartingBB, StartingInst->getIterator(), CloneWorklist); + while (!CloneWorklist.empty()) { + const BasicBlock *BB = CloneWorklist.back(); + CloneWorklist.pop_back(); + PFC.CloneBlock(BB, BB->begin(), CloneWorklist); + } + + // Loop over all of the basic blocks in the old function. If the block was + // reachable, we have cloned it and the old block is now in the value map: + // insert it into the new function in the right order. If not, ignore it. + // + // Defer PHI resolution until rest of function is resolved. + SmallVector<const PHINode *, 16> PHIToResolve; + for (const BasicBlock &BI : *OldFunc) { + Value *V = VMap.lookup(&BI); + BasicBlock *NewBB = cast_or_null<BasicBlock>(V); + if (!NewBB) + continue; // Dead block. + + // Add the new block to the new function. + NewFunc->insert(NewFunc->end(), NewBB); + + // Handle PHI nodes specially, as we have to remove references to dead + // blocks. + for (const PHINode &PN : BI.phis()) { + // PHI nodes may have been remapped to non-PHI nodes by the caller or + // during the cloning process. + if (isa<PHINode>(VMap[&PN])) + PHIToResolve.push_back(&PN); + else + break; + } + + // Finally, remap the terminator instructions, as those can't be remapped + // until all BBs are mapped. + RemapInstruction(NewBB->getTerminator(), VMap, + ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, + TypeMapper, Materializer); + } + + // Defer PHI resolution until rest of function is resolved, PHI resolution + // requires the CFG to be up-to-date. + for (unsigned phino = 0, e = PHIToResolve.size(); phino != e;) { + const PHINode *OPN = PHIToResolve[phino]; + unsigned NumPreds = OPN->getNumIncomingValues(); + const BasicBlock *OldBB = OPN->getParent(); + BasicBlock *NewBB = cast<BasicBlock>(VMap[OldBB]); + + // Map operands for blocks that are live and remove operands for blocks + // that are dead. + for (; phino != PHIToResolve.size() && + PHIToResolve[phino]->getParent() == OldBB; + ++phino) { + OPN = PHIToResolve[phino]; + PHINode *PN = cast<PHINode>(VMap[OPN]); + for (unsigned pred = 0, e = NumPreds; pred != e; ++pred) { + Value *V = VMap.lookup(PN->getIncomingBlock(pred)); + if (BasicBlock *MappedBlock = cast_or_null<BasicBlock>(V)) { + Value *InVal = + MapValue(PN->getIncomingValue(pred), VMap, + ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges); + assert(InVal && "Unknown input value?"); + PN->setIncomingValue(pred, InVal); + PN->setIncomingBlock(pred, MappedBlock); + } else { + PN->removeIncomingValue(pred, false); + --pred; // Revisit the next entry. + --e; + } + } + } + + // The loop above has removed PHI entries for those blocks that are dead + // and has updated others. However, if a block is live (i.e. copied over) + // but its terminator has been changed to not go to this block, then our + // phi nodes will have invalid entries. Update the PHI nodes in this + // case. + PHINode *PN = cast<PHINode>(NewBB->begin()); + NumPreds = pred_size(NewBB); + if (NumPreds != PN->getNumIncomingValues()) { + assert(NumPreds < PN->getNumIncomingValues()); + // Count how many times each predecessor comes to this block. + std::map<BasicBlock *, unsigned> PredCount; + for (BasicBlock *Pred : predecessors(NewBB)) + --PredCount[Pred]; + + // Figure out how many entries to remove from each PHI. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + ++PredCount[PN->getIncomingBlock(i)]; + + // At this point, the excess predecessor entries are positive in the + // map. Loop over all of the PHIs and remove excess predecessor + // entries. + BasicBlock::iterator I = NewBB->begin(); + for (; (PN = dyn_cast<PHINode>(I)); ++I) { + for (const auto &PCI : PredCount) { + BasicBlock *Pred = PCI.first; + for (unsigned NumToRemove = PCI.second; NumToRemove; --NumToRemove) + PN->removeIncomingValue(Pred, false); + } + } + } + + // If the loops above have made these phi nodes have 0 or 1 operand, + // replace them with poison or the input value. We must do this for + // correctness, because 0-operand phis are not valid. + PN = cast<PHINode>(NewBB->begin()); + if (PN->getNumIncomingValues() == 0) { + BasicBlock::iterator I = NewBB->begin(); + BasicBlock::const_iterator OldI = OldBB->begin(); + while ((PN = dyn_cast<PHINode>(I++))) { + Value *NV = PoisonValue::get(PN->getType()); + PN->replaceAllUsesWith(NV); + assert(VMap[&*OldI] == PN && "VMap mismatch"); + VMap[&*OldI] = NV; + PN->eraseFromParent(); + ++OldI; + } + } + } + + // Make a second pass over the PHINodes now that all of them have been + // remapped into the new function, simplifying the PHINode and performing any + // recursive simplifications exposed. This will transparently update the + // WeakTrackingVH in the VMap. Notably, we rely on that so that if we coalesce + // two PHINodes, the iteration over the old PHIs remains valid, and the + // mapping will just map us to the new node (which may not even be a PHI + // node). + const DataLayout &DL = NewFunc->getParent()->getDataLayout(); + SmallSetVector<const Value *, 8> Worklist; + for (unsigned Idx = 0, Size = PHIToResolve.size(); Idx != Size; ++Idx) + if (isa<PHINode>(VMap[PHIToResolve[Idx]])) + Worklist.insert(PHIToResolve[Idx]); + + // Note that we must test the size on each iteration, the worklist can grow. + for (unsigned Idx = 0; Idx != Worklist.size(); ++Idx) { + const Value *OrigV = Worklist[Idx]; + auto *I = dyn_cast_or_null<Instruction>(VMap.lookup(OrigV)); + if (!I) + continue; + + // Skip over non-intrinsic callsites, we don't want to remove any nodes from + // the CGSCC. + CallBase *CB = dyn_cast<CallBase>(I); + if (CB && CB->getCalledFunction() && + !CB->getCalledFunction()->isIntrinsic()) + continue; + + // See if this instruction simplifies. + Value *SimpleV = simplifyInstruction(I, DL); + if (!SimpleV) + continue; + + // Stash away all the uses of the old instruction so we can check them for + // recursive simplifications after a RAUW. This is cheaper than checking all + // uses of To on the recursive step in most cases. + for (const User *U : OrigV->users()) + Worklist.insert(cast<Instruction>(U)); + + // Replace the instruction with its simplified value. + I->replaceAllUsesWith(SimpleV); + + // If the original instruction had no side effects, remove it. + if (isInstructionTriviallyDead(I)) + I->eraseFromParent(); + else + VMap[OrigV] = I; + } + + // Remap debug intrinsic operands now that all values have been mapped. + // Doing this now (late) preserves use-before-defs in debug intrinsics. If + // we didn't do this, ValueAsMetadata(use-before-def) operands would be + // replaced by empty metadata. This would signal later cleanup passes to + // remove the debug intrinsics, potentially causing incorrect locations. + for (const auto *DVI : DbgIntrinsics) { + if (DbgVariableIntrinsic *NewDVI = + cast_or_null<DbgVariableIntrinsic>(VMap.lookup(DVI))) + RemapInstruction(NewDVI, VMap, + ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, + TypeMapper, Materializer); + } + + // Simplify conditional branches and switches with a constant operand. We try + // to prune these out when cloning, but if the simplification required + // looking through PHI nodes, those are only available after forming the full + // basic block. That may leave some here, and we still want to prune the dead + // code as early as possible. + Function::iterator Begin = cast<BasicBlock>(VMap[StartingBB])->getIterator(); + for (BasicBlock &BB : make_range(Begin, NewFunc->end())) + ConstantFoldTerminator(&BB); + + // Some blocks may have become unreachable as a result. Find and delete them. + { + SmallPtrSet<BasicBlock *, 16> ReachableBlocks; + SmallVector<BasicBlock *, 16> Worklist; + Worklist.push_back(&*Begin); + while (!Worklist.empty()) { + BasicBlock *BB = Worklist.pop_back_val(); + if (ReachableBlocks.insert(BB).second) + append_range(Worklist, successors(BB)); + } + + SmallVector<BasicBlock *, 16> UnreachableBlocks; + for (BasicBlock &BB : make_range(Begin, NewFunc->end())) + if (!ReachableBlocks.contains(&BB)) + UnreachableBlocks.push_back(&BB); + DeleteDeadBlocks(UnreachableBlocks); + } + + // Now that the inlined function body has been fully constructed, go through + // and zap unconditional fall-through branches. This happens all the time when + // specializing code: code specialization turns conditional branches into + // uncond branches, and this code folds them. + Function::iterator I = Begin; + while (I != NewFunc->end()) { + BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator()); + if (!BI || BI->isConditional()) { + ++I; + continue; + } + + BasicBlock *Dest = BI->getSuccessor(0); + if (!Dest->getSinglePredecessor()) { + ++I; + continue; + } + + // We shouldn't be able to get single-entry PHI nodes here, as instsimplify + // above should have zapped all of them.. + assert(!isa<PHINode>(Dest->begin())); + + // We know all single-entry PHI nodes in the inlined function have been + // removed, so we just need to splice the blocks. + BI->eraseFromParent(); + + // Make all PHI nodes that referred to Dest now refer to I as their source. + Dest->replaceAllUsesWith(&*I); + + // Move all the instructions in the succ to the pred. + I->splice(I->end(), Dest); + + // Remove the dest block. + Dest->eraseFromParent(); + + // Do not increment I, iteratively merge all things this block branches to. + } + + // Make a final pass over the basic blocks from the old function to gather + // any return instructions which survived folding. We have to do this here + // because we can iteratively remove and merge returns above. + for (Function::iterator I = cast<BasicBlock>(VMap[StartingBB])->getIterator(), + E = NewFunc->end(); + I != E; ++I) + if (ReturnInst *RI = dyn_cast<ReturnInst>(I->getTerminator())) + Returns.push_back(RI); +} + +/// This works exactly like CloneFunctionInto, +/// except that it does some simple constant prop and DCE on the fly. The +/// effect of this is to copy significantly less code in cases where (for +/// example) a function call with constant arguments is inlined, and those +/// constant arguments cause a significant amount of code in the callee to be +/// dead. Since this doesn't produce an exact copy of the input, it can't be +/// used for things like CloneFunction or CloneModule. +void llvm::CloneAndPruneFunctionInto( + Function *NewFunc, const Function *OldFunc, ValueToValueMapTy &VMap, + bool ModuleLevelChanges, SmallVectorImpl<ReturnInst *> &Returns, + const char *NameSuffix, ClonedCodeInfo *CodeInfo) { + CloneAndPruneIntoFromInst(NewFunc, OldFunc, &OldFunc->front().front(), VMap, + ModuleLevelChanges, Returns, NameSuffix, CodeInfo); +} + +/// Remaps instructions in \p Blocks using the mapping in \p VMap. +void llvm::remapInstructionsInBlocks( + const SmallVectorImpl<BasicBlock *> &Blocks, ValueToValueMapTy &VMap) { + // Rewrite the code to refer to itself. + for (auto *BB : Blocks) + for (auto &Inst : *BB) + RemapInstruction(&Inst, VMap, + RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); +} + +/// Clones a loop \p OrigLoop. Returns the loop and the blocks in \p +/// Blocks. +/// +/// Updates LoopInfo and DominatorTree assuming the loop is dominated by block +/// \p LoopDomBB. Insert the new blocks before block specified in \p Before. +Loop *llvm::cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB, + Loop *OrigLoop, ValueToValueMapTy &VMap, + const Twine &NameSuffix, LoopInfo *LI, + DominatorTree *DT, + SmallVectorImpl<BasicBlock *> &Blocks) { + Function *F = OrigLoop->getHeader()->getParent(); + Loop *ParentLoop = OrigLoop->getParentLoop(); + DenseMap<Loop *, Loop *> LMap; + + Loop *NewLoop = LI->AllocateLoop(); + LMap[OrigLoop] = NewLoop; + if (ParentLoop) + ParentLoop->addChildLoop(NewLoop); + else + LI->addTopLevelLoop(NewLoop); + + BasicBlock *OrigPH = OrigLoop->getLoopPreheader(); + assert(OrigPH && "No preheader"); + BasicBlock *NewPH = CloneBasicBlock(OrigPH, VMap, NameSuffix, F); + // To rename the loop PHIs. + VMap[OrigPH] = NewPH; + Blocks.push_back(NewPH); + + // Update LoopInfo. + if (ParentLoop) + ParentLoop->addBasicBlockToLoop(NewPH, *LI); + + // Update DominatorTree. + DT->addNewBlock(NewPH, LoopDomBB); + + for (Loop *CurLoop : OrigLoop->getLoopsInPreorder()) { + Loop *&NewLoop = LMap[CurLoop]; + if (!NewLoop) { + NewLoop = LI->AllocateLoop(); + + // Establish the parent/child relationship. + Loop *OrigParent = CurLoop->getParentLoop(); + assert(OrigParent && "Could not find the original parent loop"); + Loop *NewParentLoop = LMap[OrigParent]; + assert(NewParentLoop && "Could not find the new parent loop"); + + NewParentLoop->addChildLoop(NewLoop); + } + } + + for (BasicBlock *BB : OrigLoop->getBlocks()) { + Loop *CurLoop = LI->getLoopFor(BB); + Loop *&NewLoop = LMap[CurLoop]; + assert(NewLoop && "Expecting new loop to be allocated"); + + BasicBlock *NewBB = CloneBasicBlock(BB, VMap, NameSuffix, F); + VMap[BB] = NewBB; + + // Update LoopInfo. + NewLoop->addBasicBlockToLoop(NewBB, *LI); + + // Add DominatorTree node. After seeing all blocks, update to correct + // IDom. + DT->addNewBlock(NewBB, NewPH); + + Blocks.push_back(NewBB); + } + + for (BasicBlock *BB : OrigLoop->getBlocks()) { + // Update loop headers. + Loop *CurLoop = LI->getLoopFor(BB); + if (BB == CurLoop->getHeader()) + LMap[CurLoop]->moveToHeader(cast<BasicBlock>(VMap[BB])); + + // Update DominatorTree. + BasicBlock *IDomBB = DT->getNode(BB)->getIDom()->getBlock(); + DT->changeImmediateDominator(cast<BasicBlock>(VMap[BB]), + cast<BasicBlock>(VMap[IDomBB])); + } + + // Move them physically from the end of the block list. + F->splice(Before->getIterator(), F, NewPH->getIterator()); + F->splice(Before->getIterator(), F, NewLoop->getHeader()->getIterator(), + F->end()); + + return NewLoop; +} + +/// Duplicate non-Phi instructions from the beginning of block up to +/// StopAt instruction into a split block between BB and its predecessor. +BasicBlock *llvm::DuplicateInstructionsInSplitBetween( + BasicBlock *BB, BasicBlock *PredBB, Instruction *StopAt, + ValueToValueMapTy &ValueMapping, DomTreeUpdater &DTU) { + + assert(count(successors(PredBB), BB) == 1 && + "There must be a single edge between PredBB and BB!"); + // We are going to have to map operands from the original BB block to the new + // copy of the block 'NewBB'. If there are PHI nodes in BB, evaluate them to + // account for entry from PredBB. + BasicBlock::iterator BI = BB->begin(); + for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI) + ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB); + + BasicBlock *NewBB = SplitEdge(PredBB, BB); + NewBB->setName(PredBB->getName() + ".split"); + Instruction *NewTerm = NewBB->getTerminator(); + + // FIXME: SplitEdge does not yet take a DTU, so we include the split edge + // in the update set here. + DTU.applyUpdates({{DominatorTree::Delete, PredBB, BB}, + {DominatorTree::Insert, PredBB, NewBB}, + {DominatorTree::Insert, NewBB, BB}}); + + // Clone the non-phi instructions of BB into NewBB, keeping track of the + // mapping and using it to remap operands in the cloned instructions. + // Stop once we see the terminator too. This covers the case where BB's + // terminator gets replaced and StopAt == BB's terminator. + for (; StopAt != &*BI && BB->getTerminator() != &*BI; ++BI) { + Instruction *New = BI->clone(); + New->setName(BI->getName()); + New->insertBefore(NewTerm); + ValueMapping[&*BI] = New; + + // Remap operands to patch up intra-block references. + for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i) + if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) { + auto I = ValueMapping.find(Inst); + if (I != ValueMapping.end()) + New->setOperand(i, I->second); + } + } + + return NewBB; +} + +void llvm::cloneNoAliasScopes(ArrayRef<MDNode *> NoAliasDeclScopes, + DenseMap<MDNode *, MDNode *> &ClonedScopes, + StringRef Ext, LLVMContext &Context) { + MDBuilder MDB(Context); + + for (auto *ScopeList : NoAliasDeclScopes) { + for (const auto &MDOperand : ScopeList->operands()) { + if (MDNode *MD = dyn_cast<MDNode>(MDOperand)) { + AliasScopeNode SNANode(MD); + + std::string Name; + auto ScopeName = SNANode.getName(); + if (!ScopeName.empty()) + Name = (Twine(ScopeName) + ":" + Ext).str(); + else + Name = std::string(Ext); + + MDNode *NewScope = MDB.createAnonymousAliasScope( + const_cast<MDNode *>(SNANode.getDomain()), Name); + ClonedScopes.insert(std::make_pair(MD, NewScope)); + } + } + } +} + +void llvm::adaptNoAliasScopes(Instruction *I, + const DenseMap<MDNode *, MDNode *> &ClonedScopes, + LLVMContext &Context) { + auto CloneScopeList = [&](const MDNode *ScopeList) -> MDNode * { + bool NeedsReplacement = false; + SmallVector<Metadata *, 8> NewScopeList; + for (const auto &MDOp : ScopeList->operands()) { + if (MDNode *MD = dyn_cast<MDNode>(MDOp)) { + if (auto *NewMD = ClonedScopes.lookup(MD)) { + NewScopeList.push_back(NewMD); + NeedsReplacement = true; + continue; + } + NewScopeList.push_back(MD); + } + } + if (NeedsReplacement) + return MDNode::get(Context, NewScopeList); + return nullptr; + }; + + if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(I)) + if (auto *NewScopeList = CloneScopeList(Decl->getScopeList())) + Decl->setScopeList(NewScopeList); + + auto replaceWhenNeeded = [&](unsigned MD_ID) { + if (const MDNode *CSNoAlias = I->getMetadata(MD_ID)) + if (auto *NewScopeList = CloneScopeList(CSNoAlias)) + I->setMetadata(MD_ID, NewScopeList); + }; + replaceWhenNeeded(LLVMContext::MD_noalias); + replaceWhenNeeded(LLVMContext::MD_alias_scope); +} + +void llvm::cloneAndAdaptNoAliasScopes(ArrayRef<MDNode *> NoAliasDeclScopes, + ArrayRef<BasicBlock *> NewBlocks, + LLVMContext &Context, StringRef Ext) { + if (NoAliasDeclScopes.empty()) + return; + + DenseMap<MDNode *, MDNode *> ClonedScopes; + LLVM_DEBUG(dbgs() << "cloneAndAdaptNoAliasScopes: cloning " + << NoAliasDeclScopes.size() << " node(s)\n"); + + cloneNoAliasScopes(NoAliasDeclScopes, ClonedScopes, Ext, Context); + // Identify instructions using metadata that needs adaptation + for (BasicBlock *NewBlock : NewBlocks) + for (Instruction &I : *NewBlock) + adaptNoAliasScopes(&I, ClonedScopes, Context); +} + +void llvm::cloneAndAdaptNoAliasScopes(ArrayRef<MDNode *> NoAliasDeclScopes, + Instruction *IStart, Instruction *IEnd, + LLVMContext &Context, StringRef Ext) { + if (NoAliasDeclScopes.empty()) + return; + + DenseMap<MDNode *, MDNode *> ClonedScopes; + LLVM_DEBUG(dbgs() << "cloneAndAdaptNoAliasScopes: cloning " + << NoAliasDeclScopes.size() << " node(s)\n"); + + cloneNoAliasScopes(NoAliasDeclScopes, ClonedScopes, Ext, Context); + // Identify instructions using metadata that needs adaptation + assert(IStart->getParent() == IEnd->getParent() && "different basic block ?"); + auto ItStart = IStart->getIterator(); + auto ItEnd = IEnd->getIterator(); + ++ItEnd; // IEnd is included, increment ItEnd to get the end of the range + for (auto &I : llvm::make_range(ItStart, ItEnd)) + adaptNoAliasScopes(&I, ClonedScopes, Context); +} + +void llvm::identifyNoAliasScopesToClone( + ArrayRef<BasicBlock *> BBs, SmallVectorImpl<MDNode *> &NoAliasDeclScopes) { + for (BasicBlock *BB : BBs) + for (Instruction &I : *BB) + if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I)) + NoAliasDeclScopes.push_back(Decl->getScopeList()); +} + +void llvm::identifyNoAliasScopesToClone( + BasicBlock::iterator Start, BasicBlock::iterator End, + SmallVectorImpl<MDNode *> &NoAliasDeclScopes) { + for (Instruction &I : make_range(Start, End)) + if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I)) + NoAliasDeclScopes.push_back(Decl->getScopeList()); +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/CloneModule.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/CloneModule.cpp new file mode 100644 index 0000000000..55e051298a --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/CloneModule.cpp @@ -0,0 +1,218 @@ +//===- CloneModule.cpp - Clone an entire module ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the CloneModule interface which makes a copy of an +// entire module. +// +//===----------------------------------------------------------------------===// + +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Module.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/ValueMapper.h" +using namespace llvm; + +namespace llvm { +class Constant; +} + +static void copyComdat(GlobalObject *Dst, const GlobalObject *Src) { + const Comdat *SC = Src->getComdat(); + if (!SC) + return; + Comdat *DC = Dst->getParent()->getOrInsertComdat(SC->getName()); + DC->setSelectionKind(SC->getSelectionKind()); + Dst->setComdat(DC); +} + +/// This is not as easy as it might seem because we have to worry about making +/// copies of global variables and functions, and making their (initializers and +/// references, respectively) refer to the right globals. +/// +std::unique_ptr<Module> llvm::CloneModule(const Module &M) { + // Create the value map that maps things from the old module over to the new + // module. + ValueToValueMapTy VMap; + return CloneModule(M, VMap); +} + +std::unique_ptr<Module> llvm::CloneModule(const Module &M, + ValueToValueMapTy &VMap) { + return CloneModule(M, VMap, [](const GlobalValue *GV) { return true; }); +} + +std::unique_ptr<Module> llvm::CloneModule( + const Module &M, ValueToValueMapTy &VMap, + function_ref<bool(const GlobalValue *)> ShouldCloneDefinition) { + // First off, we need to create the new module. + std::unique_ptr<Module> New = + std::make_unique<Module>(M.getModuleIdentifier(), M.getContext()); + New->setSourceFileName(M.getSourceFileName()); + New->setDataLayout(M.getDataLayout()); + New->setTargetTriple(M.getTargetTriple()); + New->setModuleInlineAsm(M.getModuleInlineAsm()); + + // Loop over all of the global variables, making corresponding globals in the + // new module. Here we add them to the VMap and to the new Module. We + // don't worry about attributes or initializers, they will come later. + // + for (const GlobalVariable &I : M.globals()) { + GlobalVariable *NewGV = new GlobalVariable( + *New, I.getValueType(), I.isConstant(), I.getLinkage(), + (Constant *)nullptr, I.getName(), (GlobalVariable *)nullptr, + I.getThreadLocalMode(), I.getType()->getAddressSpace()); + NewGV->copyAttributesFrom(&I); + VMap[&I] = NewGV; + } + + // Loop over the functions in the module, making external functions as before + for (const Function &I : M) { + Function *NF = + Function::Create(cast<FunctionType>(I.getValueType()), I.getLinkage(), + I.getAddressSpace(), I.getName(), New.get()); + NF->copyAttributesFrom(&I); + VMap[&I] = NF; + } + + // Loop over the aliases in the module + for (const GlobalAlias &I : M.aliases()) { + if (!ShouldCloneDefinition(&I)) { + // An alias cannot act as an external reference, so we need to create + // either a function or a global variable depending on the value type. + // FIXME: Once pointee types are gone we can probably pick one or the + // other. + GlobalValue *GV; + if (I.getValueType()->isFunctionTy()) + GV = Function::Create(cast<FunctionType>(I.getValueType()), + GlobalValue::ExternalLinkage, I.getAddressSpace(), + I.getName(), New.get()); + else + GV = new GlobalVariable(*New, I.getValueType(), false, + GlobalValue::ExternalLinkage, nullptr, + I.getName(), nullptr, I.getThreadLocalMode(), + I.getType()->getAddressSpace()); + VMap[&I] = GV; + // We do not copy attributes (mainly because copying between different + // kinds of globals is forbidden), but this is generally not required for + // correctness. + continue; + } + auto *GA = GlobalAlias::create(I.getValueType(), + I.getType()->getPointerAddressSpace(), + I.getLinkage(), I.getName(), New.get()); + GA->copyAttributesFrom(&I); + VMap[&I] = GA; + } + + for (const GlobalIFunc &I : M.ifuncs()) { + // Defer setting the resolver function until after functions are cloned. + auto *GI = + GlobalIFunc::create(I.getValueType(), I.getAddressSpace(), + I.getLinkage(), I.getName(), nullptr, New.get()); + GI->copyAttributesFrom(&I); + VMap[&I] = GI; + } + + // Now that all of the things that global variable initializer can refer to + // have been created, loop through and copy the global variable referrers + // over... We also set the attributes on the global now. + // + for (const GlobalVariable &G : M.globals()) { + GlobalVariable *GV = cast<GlobalVariable>(VMap[&G]); + + SmallVector<std::pair<unsigned, MDNode *>, 1> MDs; + G.getAllMetadata(MDs); + for (auto MD : MDs) + GV->addMetadata(MD.first, *MapMetadata(MD.second, VMap)); + + if (G.isDeclaration()) + continue; + + if (!ShouldCloneDefinition(&G)) { + // Skip after setting the correct linkage for an external reference. + GV->setLinkage(GlobalValue::ExternalLinkage); + continue; + } + if (G.hasInitializer()) + GV->setInitializer(MapValue(G.getInitializer(), VMap)); + + copyComdat(GV, &G); + } + + // Similarly, copy over function bodies now... + // + for (const Function &I : M) { + Function *F = cast<Function>(VMap[&I]); + + if (I.isDeclaration()) { + // Copy over metadata for declarations since we're not doing it below in + // CloneFunctionInto(). + SmallVector<std::pair<unsigned, MDNode *>, 1> MDs; + I.getAllMetadata(MDs); + for (auto MD : MDs) + F->addMetadata(MD.first, *MapMetadata(MD.second, VMap)); + continue; + } + + if (!ShouldCloneDefinition(&I)) { + // Skip after setting the correct linkage for an external reference. + F->setLinkage(GlobalValue::ExternalLinkage); + // Personality function is not valid on a declaration. + F->setPersonalityFn(nullptr); + continue; + } + + Function::arg_iterator DestI = F->arg_begin(); + for (const Argument &J : I.args()) { + DestI->setName(J.getName()); + VMap[&J] = &*DestI++; + } + + SmallVector<ReturnInst *, 8> Returns; // Ignore returns cloned. + CloneFunctionInto(F, &I, VMap, CloneFunctionChangeType::ClonedModule, + Returns); + + if (I.hasPersonalityFn()) + F->setPersonalityFn(MapValue(I.getPersonalityFn(), VMap)); + + copyComdat(F, &I); + } + + // And aliases + for (const GlobalAlias &I : M.aliases()) { + // We already dealt with undefined aliases above. + if (!ShouldCloneDefinition(&I)) + continue; + GlobalAlias *GA = cast<GlobalAlias>(VMap[&I]); + if (const Constant *C = I.getAliasee()) + GA->setAliasee(MapValue(C, VMap)); + } + + for (const GlobalIFunc &I : M.ifuncs()) { + GlobalIFunc *GI = cast<GlobalIFunc>(VMap[&I]); + if (const Constant *Resolver = I.getResolver()) + GI->setResolver(MapValue(Resolver, VMap)); + } + + // And named metadata.... + for (const NamedMDNode &NMD : M.named_metadata()) { + NamedMDNode *NewNMD = New->getOrInsertNamedMetadata(NMD.getName()); + for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i) + NewNMD->addOperand(MapMetadata(NMD.getOperand(i), VMap)); + } + + return New; +} + +extern "C" { + +LLVMModuleRef LLVMCloneModule(LLVMModuleRef M) { + return wrap(CloneModule(*unwrap(M)).release()); +} + +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/CodeExtractor.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/CodeExtractor.cpp new file mode 100644 index 0000000000..c1fe10504e --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/CodeExtractor.cpp @@ -0,0 +1,1894 @@ +//===- CodeExtractor.cpp - Pull code region into a new function -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the interface to tear out a code region, such as an +// individual loop or a parallel section, into a new function, replacing it with +// a call to the new function. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/CodeExtractor.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/BlockFrequencyInfoImpl.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DIBuilder.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/IR/Verifier.h" +#include "llvm/Support/BlockFrequency.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include <cassert> +#include <cstdint> +#include <iterator> +#include <map> +#include <utility> +#include <vector> + +using namespace llvm; +using namespace llvm::PatternMatch; +using ProfileCount = Function::ProfileCount; + +#define DEBUG_TYPE "code-extractor" + +// Provide a command-line option to aggregate function arguments into a struct +// for functions produced by the code extractor. This is useful when converting +// extracted functions to pthread-based code, as only one argument (void*) can +// be passed in to pthread_create(). +static cl::opt<bool> +AggregateArgsOpt("aggregate-extracted-args", cl::Hidden, + cl::desc("Aggregate arguments to code-extracted functions")); + +/// Test whether a block is valid for extraction. +static bool isBlockValidForExtraction(const BasicBlock &BB, + const SetVector<BasicBlock *> &Result, + bool AllowVarArgs, bool AllowAlloca) { + // taking the address of a basic block moved to another function is illegal + if (BB.hasAddressTaken()) + return false; + + // don't hoist code that uses another basicblock address, as it's likely to + // lead to unexpected behavior, like cross-function jumps + SmallPtrSet<User const *, 16> Visited; + SmallVector<User const *, 16> ToVisit; + + for (Instruction const &Inst : BB) + ToVisit.push_back(&Inst); + + while (!ToVisit.empty()) { + User const *Curr = ToVisit.pop_back_val(); + if (!Visited.insert(Curr).second) + continue; + if (isa<BlockAddress const>(Curr)) + return false; // even a reference to self is likely to be not compatible + + if (isa<Instruction>(Curr) && cast<Instruction>(Curr)->getParent() != &BB) + continue; + + for (auto const &U : Curr->operands()) { + if (auto *UU = dyn_cast<User>(U)) + ToVisit.push_back(UU); + } + } + + // If explicitly requested, allow vastart and alloca. For invoke instructions + // verify that extraction is valid. + for (BasicBlock::const_iterator I = BB.begin(), E = BB.end(); I != E; ++I) { + if (isa<AllocaInst>(I)) { + if (!AllowAlloca) + return false; + continue; + } + + if (const auto *II = dyn_cast<InvokeInst>(I)) { + // Unwind destination (either a landingpad, catchswitch, or cleanuppad) + // must be a part of the subgraph which is being extracted. + if (auto *UBB = II->getUnwindDest()) + if (!Result.count(UBB)) + return false; + continue; + } + + // All catch handlers of a catchswitch instruction as well as the unwind + // destination must be in the subgraph. + if (const auto *CSI = dyn_cast<CatchSwitchInst>(I)) { + if (auto *UBB = CSI->getUnwindDest()) + if (!Result.count(UBB)) + return false; + for (const auto *HBB : CSI->handlers()) + if (!Result.count(const_cast<BasicBlock*>(HBB))) + return false; + continue; + } + + // Make sure that entire catch handler is within subgraph. It is sufficient + // to check that catch return's block is in the list. + if (const auto *CPI = dyn_cast<CatchPadInst>(I)) { + for (const auto *U : CPI->users()) + if (const auto *CRI = dyn_cast<CatchReturnInst>(U)) + if (!Result.count(const_cast<BasicBlock*>(CRI->getParent()))) + return false; + continue; + } + + // And do similar checks for cleanup handler - the entire handler must be + // in subgraph which is going to be extracted. For cleanup return should + // additionally check that the unwind destination is also in the subgraph. + if (const auto *CPI = dyn_cast<CleanupPadInst>(I)) { + for (const auto *U : CPI->users()) + if (const auto *CRI = dyn_cast<CleanupReturnInst>(U)) + if (!Result.count(const_cast<BasicBlock*>(CRI->getParent()))) + return false; + continue; + } + if (const auto *CRI = dyn_cast<CleanupReturnInst>(I)) { + if (auto *UBB = CRI->getUnwindDest()) + if (!Result.count(UBB)) + return false; + continue; + } + + if (const CallInst *CI = dyn_cast<CallInst>(I)) { + if (const Function *F = CI->getCalledFunction()) { + auto IID = F->getIntrinsicID(); + if (IID == Intrinsic::vastart) { + if (AllowVarArgs) + continue; + else + return false; + } + + // Currently, we miscompile outlined copies of eh_typid_for. There are + // proposals for fixing this in llvm.org/PR39545. + if (IID == Intrinsic::eh_typeid_for) + return false; + } + } + } + + return true; +} + +/// Build a set of blocks to extract if the input blocks are viable. +static SetVector<BasicBlock *> +buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs, DominatorTree *DT, + bool AllowVarArgs, bool AllowAlloca) { + assert(!BBs.empty() && "The set of blocks to extract must be non-empty"); + SetVector<BasicBlock *> Result; + + // Loop over the blocks, adding them to our set-vector, and aborting with an + // empty set if we encounter invalid blocks. + for (BasicBlock *BB : BBs) { + // If this block is dead, don't process it. + if (DT && !DT->isReachableFromEntry(BB)) + continue; + + if (!Result.insert(BB)) + llvm_unreachable("Repeated basic blocks in extraction input"); + } + + LLVM_DEBUG(dbgs() << "Region front block: " << Result.front()->getName() + << '\n'); + + for (auto *BB : Result) { + if (!isBlockValidForExtraction(*BB, Result, AllowVarArgs, AllowAlloca)) + return {}; + + // Make sure that the first block is not a landing pad. + if (BB == Result.front()) { + if (BB->isEHPad()) { + LLVM_DEBUG(dbgs() << "The first block cannot be an unwind block\n"); + return {}; + } + continue; + } + + // All blocks other than the first must not have predecessors outside of + // the subgraph which is being extracted. + for (auto *PBB : predecessors(BB)) + if (!Result.count(PBB)) { + LLVM_DEBUG(dbgs() << "No blocks in this region may have entries from " + "outside the region except for the first block!\n" + << "Problematic source BB: " << BB->getName() << "\n" + << "Problematic destination BB: " << PBB->getName() + << "\n"); + return {}; + } + } + + return Result; +} + +CodeExtractor::CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT, + bool AggregateArgs, BlockFrequencyInfo *BFI, + BranchProbabilityInfo *BPI, AssumptionCache *AC, + bool AllowVarArgs, bool AllowAlloca, + BasicBlock *AllocationBlock, std::string Suffix) + : DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), + BPI(BPI), AC(AC), AllocationBlock(AllocationBlock), + AllowVarArgs(AllowVarArgs), + Blocks(buildExtractionBlockSet(BBs, DT, AllowVarArgs, AllowAlloca)), + Suffix(Suffix) {} + +CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs, + BlockFrequencyInfo *BFI, + BranchProbabilityInfo *BPI, AssumptionCache *AC, + std::string Suffix) + : DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), + BPI(BPI), AC(AC), AllocationBlock(nullptr), AllowVarArgs(false), + Blocks(buildExtractionBlockSet(L.getBlocks(), &DT, + /* AllowVarArgs */ false, + /* AllowAlloca */ false)), + Suffix(Suffix) {} + +/// definedInRegion - Return true if the specified value is defined in the +/// extracted region. +static bool definedInRegion(const SetVector<BasicBlock *> &Blocks, Value *V) { + if (Instruction *I = dyn_cast<Instruction>(V)) + if (Blocks.count(I->getParent())) + return true; + return false; +} + +/// definedInCaller - Return true if the specified value is defined in the +/// function being code extracted, but not in the region being extracted. +/// These values must be passed in as live-ins to the function. +static bool definedInCaller(const SetVector<BasicBlock *> &Blocks, Value *V) { + if (isa<Argument>(V)) return true; + if (Instruction *I = dyn_cast<Instruction>(V)) + if (!Blocks.count(I->getParent())) + return true; + return false; +} + +static BasicBlock *getCommonExitBlock(const SetVector<BasicBlock *> &Blocks) { + BasicBlock *CommonExitBlock = nullptr; + auto hasNonCommonExitSucc = [&](BasicBlock *Block) { + for (auto *Succ : successors(Block)) { + // Internal edges, ok. + if (Blocks.count(Succ)) + continue; + if (!CommonExitBlock) { + CommonExitBlock = Succ; + continue; + } + if (CommonExitBlock != Succ) + return true; + } + return false; + }; + + if (any_of(Blocks, hasNonCommonExitSucc)) + return nullptr; + + return CommonExitBlock; +} + +CodeExtractorAnalysisCache::CodeExtractorAnalysisCache(Function &F) { + for (BasicBlock &BB : F) { + for (Instruction &II : BB.instructionsWithoutDebug()) + if (auto *AI = dyn_cast<AllocaInst>(&II)) + Allocas.push_back(AI); + + findSideEffectInfoForBlock(BB); + } +} + +void CodeExtractorAnalysisCache::findSideEffectInfoForBlock(BasicBlock &BB) { + for (Instruction &II : BB.instructionsWithoutDebug()) { + unsigned Opcode = II.getOpcode(); + Value *MemAddr = nullptr; + switch (Opcode) { + case Instruction::Store: + case Instruction::Load: { + if (Opcode == Instruction::Store) { + StoreInst *SI = cast<StoreInst>(&II); + MemAddr = SI->getPointerOperand(); + } else { + LoadInst *LI = cast<LoadInst>(&II); + MemAddr = LI->getPointerOperand(); + } + // Global variable can not be aliased with locals. + if (isa<Constant>(MemAddr)) + break; + Value *Base = MemAddr->stripInBoundsConstantOffsets(); + if (!isa<AllocaInst>(Base)) { + SideEffectingBlocks.insert(&BB); + return; + } + BaseMemAddrs[&BB].insert(Base); + break; + } + default: { + IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(&II); + if (IntrInst) { + if (IntrInst->isLifetimeStartOrEnd()) + break; + SideEffectingBlocks.insert(&BB); + return; + } + // Treat all the other cases conservatively if it has side effects. + if (II.mayHaveSideEffects()) { + SideEffectingBlocks.insert(&BB); + return; + } + } + } + } +} + +bool CodeExtractorAnalysisCache::doesBlockContainClobberOfAddr( + BasicBlock &BB, AllocaInst *Addr) const { + if (SideEffectingBlocks.count(&BB)) + return true; + auto It = BaseMemAddrs.find(&BB); + if (It != BaseMemAddrs.end()) + return It->second.count(Addr); + return false; +} + +bool CodeExtractor::isLegalToShrinkwrapLifetimeMarkers( + const CodeExtractorAnalysisCache &CEAC, Instruction *Addr) const { + AllocaInst *AI = cast<AllocaInst>(Addr->stripInBoundsConstantOffsets()); + Function *Func = (*Blocks.begin())->getParent(); + for (BasicBlock &BB : *Func) { + if (Blocks.count(&BB)) + continue; + if (CEAC.doesBlockContainClobberOfAddr(BB, AI)) + return false; + } + return true; +} + +BasicBlock * +CodeExtractor::findOrCreateBlockForHoisting(BasicBlock *CommonExitBlock) { + BasicBlock *SinglePredFromOutlineRegion = nullptr; + assert(!Blocks.count(CommonExitBlock) && + "Expect a block outside the region!"); + for (auto *Pred : predecessors(CommonExitBlock)) { + if (!Blocks.count(Pred)) + continue; + if (!SinglePredFromOutlineRegion) { + SinglePredFromOutlineRegion = Pred; + } else if (SinglePredFromOutlineRegion != Pred) { + SinglePredFromOutlineRegion = nullptr; + break; + } + } + + if (SinglePredFromOutlineRegion) + return SinglePredFromOutlineRegion; + +#ifndef NDEBUG + auto getFirstPHI = [](BasicBlock *BB) { + BasicBlock::iterator I = BB->begin(); + PHINode *FirstPhi = nullptr; + while (I != BB->end()) { + PHINode *Phi = dyn_cast<PHINode>(I); + if (!Phi) + break; + if (!FirstPhi) { + FirstPhi = Phi; + break; + } + } + return FirstPhi; + }; + // If there are any phi nodes, the single pred either exists or has already + // be created before code extraction. + assert(!getFirstPHI(CommonExitBlock) && "Phi not expected"); +#endif + + BasicBlock *NewExitBlock = CommonExitBlock->splitBasicBlock( + CommonExitBlock->getFirstNonPHI()->getIterator()); + + for (BasicBlock *Pred : + llvm::make_early_inc_range(predecessors(CommonExitBlock))) { + if (Blocks.count(Pred)) + continue; + Pred->getTerminator()->replaceUsesOfWith(CommonExitBlock, NewExitBlock); + } + // Now add the old exit block to the outline region. + Blocks.insert(CommonExitBlock); + OldTargets.push_back(NewExitBlock); + return CommonExitBlock; +} + +// Find the pair of life time markers for address 'Addr' that are either +// defined inside the outline region or can legally be shrinkwrapped into the +// outline region. If there are not other untracked uses of the address, return +// the pair of markers if found; otherwise return a pair of nullptr. +CodeExtractor::LifetimeMarkerInfo +CodeExtractor::getLifetimeMarkers(const CodeExtractorAnalysisCache &CEAC, + Instruction *Addr, + BasicBlock *ExitBlock) const { + LifetimeMarkerInfo Info; + + for (User *U : Addr->users()) { + IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(U); + if (IntrInst) { + // We don't model addresses with multiple start/end markers, but the + // markers do not need to be in the region. + if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start) { + if (Info.LifeStart) + return {}; + Info.LifeStart = IntrInst; + continue; + } + if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_end) { + if (Info.LifeEnd) + return {}; + Info.LifeEnd = IntrInst; + continue; + } + // At this point, permit debug uses outside of the region. + // This is fixed in a later call to fixupDebugInfoPostExtraction(). + if (isa<DbgInfoIntrinsic>(IntrInst)) + continue; + } + // Find untracked uses of the address, bail. + if (!definedInRegion(Blocks, U)) + return {}; + } + + if (!Info.LifeStart || !Info.LifeEnd) + return {}; + + Info.SinkLifeStart = !definedInRegion(Blocks, Info.LifeStart); + Info.HoistLifeEnd = !definedInRegion(Blocks, Info.LifeEnd); + // Do legality check. + if ((Info.SinkLifeStart || Info.HoistLifeEnd) && + !isLegalToShrinkwrapLifetimeMarkers(CEAC, Addr)) + return {}; + + // Check to see if we have a place to do hoisting, if not, bail. + if (Info.HoistLifeEnd && !ExitBlock) + return {}; + + return Info; +} + +void CodeExtractor::findAllocas(const CodeExtractorAnalysisCache &CEAC, + ValueSet &SinkCands, ValueSet &HoistCands, + BasicBlock *&ExitBlock) const { + Function *Func = (*Blocks.begin())->getParent(); + ExitBlock = getCommonExitBlock(Blocks); + + auto moveOrIgnoreLifetimeMarkers = + [&](const LifetimeMarkerInfo &LMI) -> bool { + if (!LMI.LifeStart) + return false; + if (LMI.SinkLifeStart) { + LLVM_DEBUG(dbgs() << "Sinking lifetime.start: " << *LMI.LifeStart + << "\n"); + SinkCands.insert(LMI.LifeStart); + } + if (LMI.HoistLifeEnd) { + LLVM_DEBUG(dbgs() << "Hoisting lifetime.end: " << *LMI.LifeEnd << "\n"); + HoistCands.insert(LMI.LifeEnd); + } + return true; + }; + + // Look up allocas in the original function in CodeExtractorAnalysisCache, as + // this is much faster than walking all the instructions. + for (AllocaInst *AI : CEAC.getAllocas()) { + BasicBlock *BB = AI->getParent(); + if (Blocks.count(BB)) + continue; + + // As a prior call to extractCodeRegion() may have shrinkwrapped the alloca, + // check whether it is actually still in the original function. + Function *AIFunc = BB->getParent(); + if (AIFunc != Func) + continue; + + LifetimeMarkerInfo MarkerInfo = getLifetimeMarkers(CEAC, AI, ExitBlock); + bool Moved = moveOrIgnoreLifetimeMarkers(MarkerInfo); + if (Moved) { + LLVM_DEBUG(dbgs() << "Sinking alloca: " << *AI << "\n"); + SinkCands.insert(AI); + continue; + } + + // Find bitcasts in the outlined region that have lifetime marker users + // outside that region. Replace the lifetime marker use with an + // outside region bitcast to avoid unnecessary alloca/reload instructions + // and extra lifetime markers. + SmallVector<Instruction *, 2> LifetimeBitcastUsers; + for (User *U : AI->users()) { + if (!definedInRegion(Blocks, U)) + continue; + + if (U->stripInBoundsConstantOffsets() != AI) + continue; + + Instruction *Bitcast = cast<Instruction>(U); + for (User *BU : Bitcast->users()) { + IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(BU); + if (!IntrInst) + continue; + + if (!IntrInst->isLifetimeStartOrEnd()) + continue; + + if (definedInRegion(Blocks, IntrInst)) + continue; + + LLVM_DEBUG(dbgs() << "Replace use of extracted region bitcast" + << *Bitcast << " in out-of-region lifetime marker " + << *IntrInst << "\n"); + LifetimeBitcastUsers.push_back(IntrInst); + } + } + + for (Instruction *I : LifetimeBitcastUsers) { + Module *M = AIFunc->getParent(); + LLVMContext &Ctx = M->getContext(); + auto *Int8PtrTy = Type::getInt8PtrTy(Ctx); + CastInst *CastI = + CastInst::CreatePointerCast(AI, Int8PtrTy, "lt.cast", I); + I->replaceUsesOfWith(I->getOperand(1), CastI); + } + + // Follow any bitcasts. + SmallVector<Instruction *, 2> Bitcasts; + SmallVector<LifetimeMarkerInfo, 2> BitcastLifetimeInfo; + for (User *U : AI->users()) { + if (U->stripInBoundsConstantOffsets() == AI) { + Instruction *Bitcast = cast<Instruction>(U); + LifetimeMarkerInfo LMI = getLifetimeMarkers(CEAC, Bitcast, ExitBlock); + if (LMI.LifeStart) { + Bitcasts.push_back(Bitcast); + BitcastLifetimeInfo.push_back(LMI); + continue; + } + } + + // Found unknown use of AI. + if (!definedInRegion(Blocks, U)) { + Bitcasts.clear(); + break; + } + } + + // Either no bitcasts reference the alloca or there are unknown uses. + if (Bitcasts.empty()) + continue; + + LLVM_DEBUG(dbgs() << "Sinking alloca (via bitcast): " << *AI << "\n"); + SinkCands.insert(AI); + for (unsigned I = 0, E = Bitcasts.size(); I != E; ++I) { + Instruction *BitcastAddr = Bitcasts[I]; + const LifetimeMarkerInfo &LMI = BitcastLifetimeInfo[I]; + assert(LMI.LifeStart && + "Unsafe to sink bitcast without lifetime markers"); + moveOrIgnoreLifetimeMarkers(LMI); + if (!definedInRegion(Blocks, BitcastAddr)) { + LLVM_DEBUG(dbgs() << "Sinking bitcast-of-alloca: " << *BitcastAddr + << "\n"); + SinkCands.insert(BitcastAddr); + } + } + } +} + +bool CodeExtractor::isEligible() const { + if (Blocks.empty()) + return false; + BasicBlock *Header = *Blocks.begin(); + Function *F = Header->getParent(); + + // For functions with varargs, check that varargs handling is only done in the + // outlined function, i.e vastart and vaend are only used in outlined blocks. + if (AllowVarArgs && F->getFunctionType()->isVarArg()) { + auto containsVarArgIntrinsic = [](const Instruction &I) { + if (const CallInst *CI = dyn_cast<CallInst>(&I)) + if (const Function *Callee = CI->getCalledFunction()) + return Callee->getIntrinsicID() == Intrinsic::vastart || + Callee->getIntrinsicID() == Intrinsic::vaend; + return false; + }; + + for (auto &BB : *F) { + if (Blocks.count(&BB)) + continue; + if (llvm::any_of(BB, containsVarArgIntrinsic)) + return false; + } + } + return true; +} + +void CodeExtractor::findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs, + const ValueSet &SinkCands) const { + for (BasicBlock *BB : Blocks) { + // If a used value is defined outside the region, it's an input. If an + // instruction is used outside the region, it's an output. + for (Instruction &II : *BB) { + for (auto &OI : II.operands()) { + Value *V = OI; + if (!SinkCands.count(V) && definedInCaller(Blocks, V)) + Inputs.insert(V); + } + + for (User *U : II.users()) + if (!definedInRegion(Blocks, U)) { + Outputs.insert(&II); + break; + } + } + } +} + +/// severSplitPHINodesOfEntry - If a PHI node has multiple inputs from outside +/// of the region, we need to split the entry block of the region so that the +/// PHI node is easier to deal with. +void CodeExtractor::severSplitPHINodesOfEntry(BasicBlock *&Header) { + unsigned NumPredsFromRegion = 0; + unsigned NumPredsOutsideRegion = 0; + + if (Header != &Header->getParent()->getEntryBlock()) { + PHINode *PN = dyn_cast<PHINode>(Header->begin()); + if (!PN) return; // No PHI nodes. + + // If the header node contains any PHI nodes, check to see if there is more + // than one entry from outside the region. If so, we need to sever the + // header block into two. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (Blocks.count(PN->getIncomingBlock(i))) + ++NumPredsFromRegion; + else + ++NumPredsOutsideRegion; + + // If there is one (or fewer) predecessor from outside the region, we don't + // need to do anything special. + if (NumPredsOutsideRegion <= 1) return; + } + + // Otherwise, we need to split the header block into two pieces: one + // containing PHI nodes merging values from outside of the region, and a + // second that contains all of the code for the block and merges back any + // incoming values from inside of the region. + BasicBlock *NewBB = SplitBlock(Header, Header->getFirstNonPHI(), DT); + + // We only want to code extract the second block now, and it becomes the new + // header of the region. + BasicBlock *OldPred = Header; + Blocks.remove(OldPred); + Blocks.insert(NewBB); + Header = NewBB; + + // Okay, now we need to adjust the PHI nodes and any branches from within the + // region to go to the new header block instead of the old header block. + if (NumPredsFromRegion) { + PHINode *PN = cast<PHINode>(OldPred->begin()); + // Loop over all of the predecessors of OldPred that are in the region, + // changing them to branch to NewBB instead. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (Blocks.count(PN->getIncomingBlock(i))) { + Instruction *TI = PN->getIncomingBlock(i)->getTerminator(); + TI->replaceUsesOfWith(OldPred, NewBB); + } + + // Okay, everything within the region is now branching to the right block, we + // just have to update the PHI nodes now, inserting PHI nodes into NewBB. + BasicBlock::iterator AfterPHIs; + for (AfterPHIs = OldPred->begin(); isa<PHINode>(AfterPHIs); ++AfterPHIs) { + PHINode *PN = cast<PHINode>(AfterPHIs); + // Create a new PHI node in the new region, which has an incoming value + // from OldPred of PN. + PHINode *NewPN = PHINode::Create(PN->getType(), 1 + NumPredsFromRegion, + PN->getName() + ".ce", &NewBB->front()); + PN->replaceAllUsesWith(NewPN); + NewPN->addIncoming(PN, OldPred); + + // Loop over all of the incoming value in PN, moving them to NewPN if they + // are from the extracted region. + for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) { + if (Blocks.count(PN->getIncomingBlock(i))) { + NewPN->addIncoming(PN->getIncomingValue(i), PN->getIncomingBlock(i)); + PN->removeIncomingValue(i); + --i; + } + } + } + } +} + +/// severSplitPHINodesOfExits - if PHI nodes in exit blocks have inputs from +/// outlined region, we split these PHIs on two: one with inputs from region +/// and other with remaining incoming blocks; then first PHIs are placed in +/// outlined region. +void CodeExtractor::severSplitPHINodesOfExits( + const SmallPtrSetImpl<BasicBlock *> &Exits) { + for (BasicBlock *ExitBB : Exits) { + BasicBlock *NewBB = nullptr; + + for (PHINode &PN : ExitBB->phis()) { + // Find all incoming values from the outlining region. + SmallVector<unsigned, 2> IncomingVals; + for (unsigned i = 0; i < PN.getNumIncomingValues(); ++i) + if (Blocks.count(PN.getIncomingBlock(i))) + IncomingVals.push_back(i); + + // Do not process PHI if there is one (or fewer) predecessor from region. + // If PHI has exactly one predecessor from region, only this one incoming + // will be replaced on codeRepl block, so it should be safe to skip PHI. + if (IncomingVals.size() <= 1) + continue; + + // Create block for new PHIs and add it to the list of outlined if it + // wasn't done before. + if (!NewBB) { + NewBB = BasicBlock::Create(ExitBB->getContext(), + ExitBB->getName() + ".split", + ExitBB->getParent(), ExitBB); + SmallVector<BasicBlock *, 4> Preds(predecessors(ExitBB)); + for (BasicBlock *PredBB : Preds) + if (Blocks.count(PredBB)) + PredBB->getTerminator()->replaceUsesOfWith(ExitBB, NewBB); + BranchInst::Create(ExitBB, NewBB); + Blocks.insert(NewBB); + } + + // Split this PHI. + PHINode *NewPN = + PHINode::Create(PN.getType(), IncomingVals.size(), + PN.getName() + ".ce", NewBB->getFirstNonPHI()); + for (unsigned i : IncomingVals) + NewPN->addIncoming(PN.getIncomingValue(i), PN.getIncomingBlock(i)); + for (unsigned i : reverse(IncomingVals)) + PN.removeIncomingValue(i, false); + PN.addIncoming(NewPN, NewBB); + } + } +} + +void CodeExtractor::splitReturnBlocks() { + for (BasicBlock *Block : Blocks) + if (ReturnInst *RI = dyn_cast<ReturnInst>(Block->getTerminator())) { + BasicBlock *New = + Block->splitBasicBlock(RI->getIterator(), Block->getName() + ".ret"); + if (DT) { + // Old dominates New. New node dominates all other nodes dominated + // by Old. + DomTreeNode *OldNode = DT->getNode(Block); + SmallVector<DomTreeNode *, 8> Children(OldNode->begin(), + OldNode->end()); + + DomTreeNode *NewNode = DT->addNewBlock(New, Block); + + for (DomTreeNode *I : Children) + DT->changeImmediateDominator(I, NewNode); + } + } +} + +/// constructFunction - make a function based on inputs and outputs, as follows: +/// f(in0, ..., inN, out0, ..., outN) +Function *CodeExtractor::constructFunction(const ValueSet &inputs, + const ValueSet &outputs, + BasicBlock *header, + BasicBlock *newRootNode, + BasicBlock *newHeader, + Function *oldFunction, + Module *M) { + LLVM_DEBUG(dbgs() << "inputs: " << inputs.size() << "\n"); + LLVM_DEBUG(dbgs() << "outputs: " << outputs.size() << "\n"); + + // This function returns unsigned, outputs will go back by reference. + switch (NumExitBlocks) { + case 0: + case 1: RetTy = Type::getVoidTy(header->getContext()); break; + case 2: RetTy = Type::getInt1Ty(header->getContext()); break; + default: RetTy = Type::getInt16Ty(header->getContext()); break; + } + + std::vector<Type *> ParamTy; + std::vector<Type *> AggParamTy; + ValueSet StructValues; + const DataLayout &DL = M->getDataLayout(); + + // Add the types of the input values to the function's argument list + for (Value *value : inputs) { + LLVM_DEBUG(dbgs() << "value used in func: " << *value << "\n"); + if (AggregateArgs && !ExcludeArgsFromAggregate.contains(value)) { + AggParamTy.push_back(value->getType()); + StructValues.insert(value); + } else + ParamTy.push_back(value->getType()); + } + + // Add the types of the output values to the function's argument list. + for (Value *output : outputs) { + LLVM_DEBUG(dbgs() << "instr used in func: " << *output << "\n"); + if (AggregateArgs && !ExcludeArgsFromAggregate.contains(output)) { + AggParamTy.push_back(output->getType()); + StructValues.insert(output); + } else + ParamTy.push_back( + PointerType::get(output->getType(), DL.getAllocaAddrSpace())); + } + + assert( + (ParamTy.size() + AggParamTy.size()) == + (inputs.size() + outputs.size()) && + "Number of scalar and aggregate params does not match inputs, outputs"); + assert((StructValues.empty() || AggregateArgs) && + "Expeced StructValues only with AggregateArgs set"); + + // Concatenate scalar and aggregate params in ParamTy. + size_t NumScalarParams = ParamTy.size(); + StructType *StructTy = nullptr; + if (AggregateArgs && !AggParamTy.empty()) { + StructTy = StructType::get(M->getContext(), AggParamTy); + ParamTy.push_back(PointerType::get(StructTy, DL.getAllocaAddrSpace())); + } + + LLVM_DEBUG({ + dbgs() << "Function type: " << *RetTy << " f("; + for (Type *i : ParamTy) + dbgs() << *i << ", "; + dbgs() << ")\n"; + }); + + FunctionType *funcType = FunctionType::get( + RetTy, ParamTy, AllowVarArgs && oldFunction->isVarArg()); + + std::string SuffixToUse = + Suffix.empty() + ? (header->getName().empty() ? "extracted" : header->getName().str()) + : Suffix; + // Create the new function + Function *newFunction = Function::Create( + funcType, GlobalValue::InternalLinkage, oldFunction->getAddressSpace(), + oldFunction->getName() + "." + SuffixToUse, M); + + // Inherit all of the target dependent attributes and white-listed + // target independent attributes. + // (e.g. If the extracted region contains a call to an x86.sse + // instruction we need to make sure that the extracted region has the + // "target-features" attribute allowing it to be lowered. + // FIXME: This should be changed to check to see if a specific + // attribute can not be inherited. + for (const auto &Attr : oldFunction->getAttributes().getFnAttrs()) { + if (Attr.isStringAttribute()) { + if (Attr.getKindAsString() == "thunk") + continue; + } else + switch (Attr.getKindAsEnum()) { + // Those attributes cannot be propagated safely. Explicitly list them + // here so we get a warning if new attributes are added. + case Attribute::AllocSize: + case Attribute::Builtin: + case Attribute::Convergent: + case Attribute::JumpTable: + case Attribute::Naked: + case Attribute::NoBuiltin: + case Attribute::NoMerge: + case Attribute::NoReturn: + case Attribute::NoSync: + case Attribute::ReturnsTwice: + case Attribute::Speculatable: + case Attribute::StackAlignment: + case Attribute::WillReturn: + case Attribute::AllocKind: + case Attribute::PresplitCoroutine: + case Attribute::Memory: + continue; + // Those attributes should be safe to propagate to the extracted function. + case Attribute::AlwaysInline: + case Attribute::Cold: + case Attribute::DisableSanitizerInstrumentation: + case Attribute::FnRetThunkExtern: + case Attribute::Hot: + case Attribute::NoRecurse: + case Attribute::InlineHint: + case Attribute::MinSize: + case Attribute::NoCallback: + case Attribute::NoDuplicate: + case Attribute::NoFree: + case Attribute::NoImplicitFloat: + case Attribute::NoInline: + case Attribute::NonLazyBind: + case Attribute::NoRedZone: + case Attribute::NoUnwind: + case Attribute::NoSanitizeBounds: + case Attribute::NoSanitizeCoverage: + case Attribute::NullPointerIsValid: + case Attribute::OptForFuzzing: + case Attribute::OptimizeNone: + case Attribute::OptimizeForSize: + case Attribute::SafeStack: + case Attribute::ShadowCallStack: + case Attribute::SanitizeAddress: + case Attribute::SanitizeMemory: + case Attribute::SanitizeThread: + case Attribute::SanitizeHWAddress: + case Attribute::SanitizeMemTag: + case Attribute::SpeculativeLoadHardening: + case Attribute::StackProtect: + case Attribute::StackProtectReq: + case Attribute::StackProtectStrong: + case Attribute::StrictFP: + case Attribute::UWTable: + case Attribute::VScaleRange: + case Attribute::NoCfCheck: + case Attribute::MustProgress: + case Attribute::NoProfile: + case Attribute::SkipProfile: + break; + // These attributes cannot be applied to functions. + case Attribute::Alignment: + case Attribute::AllocatedPointer: + case Attribute::AllocAlign: + case Attribute::ByVal: + case Attribute::Dereferenceable: + case Attribute::DereferenceableOrNull: + case Attribute::ElementType: + case Attribute::InAlloca: + case Attribute::InReg: + case Attribute::Nest: + case Attribute::NoAlias: + case Attribute::NoCapture: + case Attribute::NoUndef: + case Attribute::NonNull: + case Attribute::Preallocated: + case Attribute::ReadNone: + case Attribute::ReadOnly: + case Attribute::Returned: + case Attribute::SExt: + case Attribute::StructRet: + case Attribute::SwiftError: + case Attribute::SwiftSelf: + case Attribute::SwiftAsync: + case Attribute::ZExt: + case Attribute::ImmArg: + case Attribute::ByRef: + case Attribute::WriteOnly: + // These are not really attributes. + case Attribute::None: + case Attribute::EndAttrKinds: + case Attribute::EmptyKey: + case Attribute::TombstoneKey: + llvm_unreachable("Not a function attribute"); + } + + newFunction->addFnAttr(Attr); + } + newFunction->insert(newFunction->end(), newRootNode); + + // Create scalar and aggregate iterators to name all of the arguments we + // inserted. + Function::arg_iterator ScalarAI = newFunction->arg_begin(); + Function::arg_iterator AggAI = std::next(ScalarAI, NumScalarParams); + + // Rewrite all users of the inputs in the extracted region to use the + // arguments (or appropriate addressing into struct) instead. + for (unsigned i = 0, e = inputs.size(), aggIdx = 0; i != e; ++i) { + Value *RewriteVal; + if (AggregateArgs && StructValues.contains(inputs[i])) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(header->getContext())); + Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), aggIdx); + Instruction *TI = newFunction->begin()->getTerminator(); + GetElementPtrInst *GEP = GetElementPtrInst::Create( + StructTy, &*AggAI, Idx, "gep_" + inputs[i]->getName(), TI); + RewriteVal = new LoadInst(StructTy->getElementType(aggIdx), GEP, + "loadgep_" + inputs[i]->getName(), TI); + ++aggIdx; + } else + RewriteVal = &*ScalarAI++; + + std::vector<User *> Users(inputs[i]->user_begin(), inputs[i]->user_end()); + for (User *use : Users) + if (Instruction *inst = dyn_cast<Instruction>(use)) + if (Blocks.count(inst->getParent())) + inst->replaceUsesOfWith(inputs[i], RewriteVal); + } + + // Set names for input and output arguments. + if (NumScalarParams) { + ScalarAI = newFunction->arg_begin(); + for (unsigned i = 0, e = inputs.size(); i != e; ++i, ++ScalarAI) + if (!StructValues.contains(inputs[i])) + ScalarAI->setName(inputs[i]->getName()); + for (unsigned i = 0, e = outputs.size(); i != e; ++i, ++ScalarAI) + if (!StructValues.contains(outputs[i])) + ScalarAI->setName(outputs[i]->getName() + ".out"); + } + + // Rewrite branches to basic blocks outside of the loop to new dummy blocks + // within the new function. This must be done before we lose track of which + // blocks were originally in the code region. + std::vector<User *> Users(header->user_begin(), header->user_end()); + for (auto &U : Users) + // The BasicBlock which contains the branch is not in the region + // modify the branch target to a new block + if (Instruction *I = dyn_cast<Instruction>(U)) + if (I->isTerminator() && I->getFunction() == oldFunction && + !Blocks.count(I->getParent())) + I->replaceUsesOfWith(header, newHeader); + + return newFunction; +} + +/// Erase lifetime.start markers which reference inputs to the extraction +/// region, and insert the referenced memory into \p LifetimesStart. +/// +/// The extraction region is defined by a set of blocks (\p Blocks), and a set +/// of allocas which will be moved from the caller function into the extracted +/// function (\p SunkAllocas). +static void eraseLifetimeMarkersOnInputs(const SetVector<BasicBlock *> &Blocks, + const SetVector<Value *> &SunkAllocas, + SetVector<Value *> &LifetimesStart) { + for (BasicBlock *BB : Blocks) { + for (Instruction &I : llvm::make_early_inc_range(*BB)) { + auto *II = dyn_cast<IntrinsicInst>(&I); + if (!II || !II->isLifetimeStartOrEnd()) + continue; + + // Get the memory operand of the lifetime marker. If the underlying + // object is a sunk alloca, or is otherwise defined in the extraction + // region, the lifetime marker must not be erased. + Value *Mem = II->getOperand(1)->stripInBoundsOffsets(); + if (SunkAllocas.count(Mem) || definedInRegion(Blocks, Mem)) + continue; + + if (II->getIntrinsicID() == Intrinsic::lifetime_start) + LifetimesStart.insert(Mem); + II->eraseFromParent(); + } + } +} + +/// Insert lifetime start/end markers surrounding the call to the new function +/// for objects defined in the caller. +static void insertLifetimeMarkersSurroundingCall( + Module *M, ArrayRef<Value *> LifetimesStart, ArrayRef<Value *> LifetimesEnd, + CallInst *TheCall) { + LLVMContext &Ctx = M->getContext(); + auto Int8PtrTy = Type::getInt8PtrTy(Ctx); + auto NegativeOne = ConstantInt::getSigned(Type::getInt64Ty(Ctx), -1); + Instruction *Term = TheCall->getParent()->getTerminator(); + + // The memory argument to a lifetime marker must be a i8*. Cache any bitcasts + // needed to satisfy this requirement so they may be reused. + DenseMap<Value *, Value *> Bitcasts; + + // Emit lifetime markers for the pointers given in \p Objects. Insert the + // markers before the call if \p InsertBefore, and after the call otherwise. + auto insertMarkers = [&](Function *MarkerFunc, ArrayRef<Value *> Objects, + bool InsertBefore) { + for (Value *Mem : Objects) { + assert((!isa<Instruction>(Mem) || cast<Instruction>(Mem)->getFunction() == + TheCall->getFunction()) && + "Input memory not defined in original function"); + Value *&MemAsI8Ptr = Bitcasts[Mem]; + if (!MemAsI8Ptr) { + if (Mem->getType() == Int8PtrTy) + MemAsI8Ptr = Mem; + else + MemAsI8Ptr = + CastInst::CreatePointerCast(Mem, Int8PtrTy, "lt.cast", TheCall); + } + + auto Marker = CallInst::Create(MarkerFunc, {NegativeOne, MemAsI8Ptr}); + if (InsertBefore) + Marker->insertBefore(TheCall); + else + Marker->insertBefore(Term); + } + }; + + if (!LifetimesStart.empty()) { + auto StartFn = llvm::Intrinsic::getDeclaration( + M, llvm::Intrinsic::lifetime_start, Int8PtrTy); + insertMarkers(StartFn, LifetimesStart, /*InsertBefore=*/true); + } + + if (!LifetimesEnd.empty()) { + auto EndFn = llvm::Intrinsic::getDeclaration( + M, llvm::Intrinsic::lifetime_end, Int8PtrTy); + insertMarkers(EndFn, LifetimesEnd, /*InsertBefore=*/false); + } +} + +/// emitCallAndSwitchStatement - This method sets up the caller side by adding +/// the call instruction, splitting any PHI nodes in the header block as +/// necessary. +CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction, + BasicBlock *codeReplacer, + ValueSet &inputs, + ValueSet &outputs) { + // Emit a call to the new function, passing in: *pointer to struct (if + // aggregating parameters), or plan inputs and allocated memory for outputs + std::vector<Value *> params, ReloadOutputs, Reloads; + ValueSet StructValues; + + Module *M = newFunction->getParent(); + LLVMContext &Context = M->getContext(); + const DataLayout &DL = M->getDataLayout(); + CallInst *call = nullptr; + + // Add inputs as params, or to be filled into the struct + unsigned ScalarInputArgNo = 0; + SmallVector<unsigned, 1> SwiftErrorArgs; + for (Value *input : inputs) { + if (AggregateArgs && !ExcludeArgsFromAggregate.contains(input)) + StructValues.insert(input); + else { + params.push_back(input); + if (input->isSwiftError()) + SwiftErrorArgs.push_back(ScalarInputArgNo); + } + ++ScalarInputArgNo; + } + + // Create allocas for the outputs + unsigned ScalarOutputArgNo = 0; + for (Value *output : outputs) { + if (AggregateArgs && !ExcludeArgsFromAggregate.contains(output)) { + StructValues.insert(output); + } else { + AllocaInst *alloca = + new AllocaInst(output->getType(), DL.getAllocaAddrSpace(), + nullptr, output->getName() + ".loc", + &codeReplacer->getParent()->front().front()); + ReloadOutputs.push_back(alloca); + params.push_back(alloca); + ++ScalarOutputArgNo; + } + } + + StructType *StructArgTy = nullptr; + AllocaInst *Struct = nullptr; + unsigned NumAggregatedInputs = 0; + if (AggregateArgs && !StructValues.empty()) { + std::vector<Type *> ArgTypes; + for (Value *V : StructValues) + ArgTypes.push_back(V->getType()); + + // Allocate a struct at the beginning of this function + StructArgTy = StructType::get(newFunction->getContext(), ArgTypes); + Struct = new AllocaInst( + StructArgTy, DL.getAllocaAddrSpace(), nullptr, "structArg", + AllocationBlock ? &*AllocationBlock->getFirstInsertionPt() + : &codeReplacer->getParent()->front().front()); + params.push_back(Struct); + + // Store aggregated inputs in the struct. + for (unsigned i = 0, e = StructValues.size(); i != e; ++i) { + if (inputs.contains(StructValues[i])) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i); + GetElementPtrInst *GEP = GetElementPtrInst::Create( + StructArgTy, Struct, Idx, "gep_" + StructValues[i]->getName()); + GEP->insertInto(codeReplacer, codeReplacer->end()); + new StoreInst(StructValues[i], GEP, codeReplacer); + NumAggregatedInputs++; + } + } + } + + // Emit the call to the function + call = CallInst::Create(newFunction, params, + NumExitBlocks > 1 ? "targetBlock" : ""); + // Add debug location to the new call, if the original function has debug + // info. In that case, the terminator of the entry block of the extracted + // function contains the first debug location of the extracted function, + // set in extractCodeRegion. + if (codeReplacer->getParent()->getSubprogram()) { + if (auto DL = newFunction->getEntryBlock().getTerminator()->getDebugLoc()) + call->setDebugLoc(DL); + } + call->insertInto(codeReplacer, codeReplacer->end()); + + // Set swifterror parameter attributes. + for (unsigned SwiftErrArgNo : SwiftErrorArgs) { + call->addParamAttr(SwiftErrArgNo, Attribute::SwiftError); + newFunction->addParamAttr(SwiftErrArgNo, Attribute::SwiftError); + } + + // Reload the outputs passed in by reference, use the struct if output is in + // the aggregate or reload from the scalar argument. + for (unsigned i = 0, e = outputs.size(), scalarIdx = 0, + aggIdx = NumAggregatedInputs; + i != e; ++i) { + Value *Output = nullptr; + if (AggregateArgs && StructValues.contains(outputs[i])) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), aggIdx); + GetElementPtrInst *GEP = GetElementPtrInst::Create( + StructArgTy, Struct, Idx, "gep_reload_" + outputs[i]->getName()); + GEP->insertInto(codeReplacer, codeReplacer->end()); + Output = GEP; + ++aggIdx; + } else { + Output = ReloadOutputs[scalarIdx]; + ++scalarIdx; + } + LoadInst *load = new LoadInst(outputs[i]->getType(), Output, + outputs[i]->getName() + ".reload", + codeReplacer); + Reloads.push_back(load); + std::vector<User *> Users(outputs[i]->user_begin(), outputs[i]->user_end()); + for (User *U : Users) { + Instruction *inst = cast<Instruction>(U); + if (!Blocks.count(inst->getParent())) + inst->replaceUsesOfWith(outputs[i], load); + } + } + + // Now we can emit a switch statement using the call as a value. + SwitchInst *TheSwitch = + SwitchInst::Create(Constant::getNullValue(Type::getInt16Ty(Context)), + codeReplacer, 0, codeReplacer); + + // Since there may be multiple exits from the original region, make the new + // function return an unsigned, switch on that number. This loop iterates + // over all of the blocks in the extracted region, updating any terminator + // instructions in the to-be-extracted region that branch to blocks that are + // not in the region to be extracted. + std::map<BasicBlock *, BasicBlock *> ExitBlockMap; + + // Iterate over the previously collected targets, and create new blocks inside + // the function to branch to. + unsigned switchVal = 0; + for (BasicBlock *OldTarget : OldTargets) { + if (Blocks.count(OldTarget)) + continue; + BasicBlock *&NewTarget = ExitBlockMap[OldTarget]; + if (NewTarget) + continue; + + // If we don't already have an exit stub for this non-extracted + // destination, create one now! + NewTarget = BasicBlock::Create(Context, + OldTarget->getName() + ".exitStub", + newFunction); + unsigned SuccNum = switchVal++; + + Value *brVal = nullptr; + assert(NumExitBlocks < 0xffff && "too many exit blocks for switch"); + switch (NumExitBlocks) { + case 0: + case 1: break; // No value needed. + case 2: // Conditional branch, return a bool + brVal = ConstantInt::get(Type::getInt1Ty(Context), !SuccNum); + break; + default: + brVal = ConstantInt::get(Type::getInt16Ty(Context), SuccNum); + break; + } + + ReturnInst::Create(Context, brVal, NewTarget); + + // Update the switch instruction. + TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context), + SuccNum), + OldTarget); + } + + for (BasicBlock *Block : Blocks) { + Instruction *TI = Block->getTerminator(); + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { + if (Blocks.count(TI->getSuccessor(i))) + continue; + BasicBlock *OldTarget = TI->getSuccessor(i); + // add a new basic block which returns the appropriate value + BasicBlock *NewTarget = ExitBlockMap[OldTarget]; + assert(NewTarget && "Unknown target block!"); + + // rewrite the original branch instruction with this new target + TI->setSuccessor(i, NewTarget); + } + } + + // Store the arguments right after the definition of output value. + // This should be proceeded after creating exit stubs to be ensure that invoke + // result restore will be placed in the outlined function. + Function::arg_iterator ScalarOutputArgBegin = newFunction->arg_begin(); + std::advance(ScalarOutputArgBegin, ScalarInputArgNo); + Function::arg_iterator AggOutputArgBegin = newFunction->arg_begin(); + std::advance(AggOutputArgBegin, ScalarInputArgNo + ScalarOutputArgNo); + + for (unsigned i = 0, e = outputs.size(), aggIdx = NumAggregatedInputs; i != e; + ++i) { + auto *OutI = dyn_cast<Instruction>(outputs[i]); + if (!OutI) + continue; + + // Find proper insertion point. + BasicBlock::iterator InsertPt; + // In case OutI is an invoke, we insert the store at the beginning in the + // 'normal destination' BB. Otherwise we insert the store right after OutI. + if (auto *InvokeI = dyn_cast<InvokeInst>(OutI)) + InsertPt = InvokeI->getNormalDest()->getFirstInsertionPt(); + else if (auto *Phi = dyn_cast<PHINode>(OutI)) + InsertPt = Phi->getParent()->getFirstInsertionPt(); + else + InsertPt = std::next(OutI->getIterator()); + + Instruction *InsertBefore = &*InsertPt; + assert((InsertBefore->getFunction() == newFunction || + Blocks.count(InsertBefore->getParent())) && + "InsertPt should be in new function"); + if (AggregateArgs && StructValues.contains(outputs[i])) { + assert(AggOutputArgBegin != newFunction->arg_end() && + "Number of aggregate output arguments should match " + "the number of defined values"); + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), aggIdx); + GetElementPtrInst *GEP = GetElementPtrInst::Create( + StructArgTy, &*AggOutputArgBegin, Idx, "gep_" + outputs[i]->getName(), + InsertBefore); + new StoreInst(outputs[i], GEP, InsertBefore); + ++aggIdx; + // Since there should be only one struct argument aggregating + // all the output values, we shouldn't increment AggOutputArgBegin, which + // always points to the struct argument, in this case. + } else { + assert(ScalarOutputArgBegin != newFunction->arg_end() && + "Number of scalar output arguments should match " + "the number of defined values"); + new StoreInst(outputs[i], &*ScalarOutputArgBegin, InsertBefore); + ++ScalarOutputArgBegin; + } + } + + // Now that we've done the deed, simplify the switch instruction. + Type *OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType(); + switch (NumExitBlocks) { + case 0: + // There are no successors (the block containing the switch itself), which + // means that previously this was the last part of the function, and hence + // this should be rewritten as a `ret' + + // Check if the function should return a value + if (OldFnRetTy->isVoidTy()) { + ReturnInst::Create(Context, nullptr, TheSwitch); // Return void + } else if (OldFnRetTy == TheSwitch->getCondition()->getType()) { + // return what we have + ReturnInst::Create(Context, TheSwitch->getCondition(), TheSwitch); + } else { + // Otherwise we must have code extracted an unwind or something, just + // return whatever we want. + ReturnInst::Create(Context, + Constant::getNullValue(OldFnRetTy), TheSwitch); + } + + TheSwitch->eraseFromParent(); + break; + case 1: + // Only a single destination, change the switch into an unconditional + // branch. + BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch); + TheSwitch->eraseFromParent(); + break; + case 2: + BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch->getSuccessor(2), + call, TheSwitch); + TheSwitch->eraseFromParent(); + break; + default: + // Otherwise, make the default destination of the switch instruction be one + // of the other successors. + TheSwitch->setCondition(call); + TheSwitch->setDefaultDest(TheSwitch->getSuccessor(NumExitBlocks)); + // Remove redundant case + TheSwitch->removeCase(SwitchInst::CaseIt(TheSwitch, NumExitBlocks-1)); + break; + } + + // Insert lifetime markers around the reloads of any output values. The + // allocas output values are stored in are only in-use in the codeRepl block. + insertLifetimeMarkersSurroundingCall(M, ReloadOutputs, ReloadOutputs, call); + + return call; +} + +void CodeExtractor::moveCodeToFunction(Function *newFunction) { + auto newFuncIt = newFunction->front().getIterator(); + for (BasicBlock *Block : Blocks) { + // Delete the basic block from the old function, and the list of blocks + Block->removeFromParent(); + + // Insert this basic block into the new function + // Insert the original blocks after the entry block created + // for the new function. The entry block may be followed + // by a set of exit blocks at this point, but these exit + // blocks better be placed at the end of the new function. + newFuncIt = newFunction->insert(std::next(newFuncIt), Block); + } +} + +void CodeExtractor::calculateNewCallTerminatorWeights( + BasicBlock *CodeReplacer, + DenseMap<BasicBlock *, BlockFrequency> &ExitWeights, + BranchProbabilityInfo *BPI) { + using Distribution = BlockFrequencyInfoImplBase::Distribution; + using BlockNode = BlockFrequencyInfoImplBase::BlockNode; + + // Update the branch weights for the exit block. + Instruction *TI = CodeReplacer->getTerminator(); + SmallVector<unsigned, 8> BranchWeights(TI->getNumSuccessors(), 0); + + // Block Frequency distribution with dummy node. + Distribution BranchDist; + + SmallVector<BranchProbability, 4> EdgeProbabilities( + TI->getNumSuccessors(), BranchProbability::getUnknown()); + + // Add each of the frequencies of the successors. + for (unsigned i = 0, e = TI->getNumSuccessors(); i < e; ++i) { + BlockNode ExitNode(i); + uint64_t ExitFreq = ExitWeights[TI->getSuccessor(i)].getFrequency(); + if (ExitFreq != 0) + BranchDist.addExit(ExitNode, ExitFreq); + else + EdgeProbabilities[i] = BranchProbability::getZero(); + } + + // Check for no total weight. + if (BranchDist.Total == 0) { + BPI->setEdgeProbability(CodeReplacer, EdgeProbabilities); + return; + } + + // Normalize the distribution so that they can fit in unsigned. + BranchDist.normalize(); + + // Create normalized branch weights and set the metadata. + for (unsigned I = 0, E = BranchDist.Weights.size(); I < E; ++I) { + const auto &Weight = BranchDist.Weights[I]; + + // Get the weight and update the current BFI. + BranchWeights[Weight.TargetNode.Index] = Weight.Amount; + BranchProbability BP(Weight.Amount, BranchDist.Total); + EdgeProbabilities[Weight.TargetNode.Index] = BP; + } + BPI->setEdgeProbability(CodeReplacer, EdgeProbabilities); + TI->setMetadata( + LLVMContext::MD_prof, + MDBuilder(TI->getContext()).createBranchWeights(BranchWeights)); +} + +/// Erase debug info intrinsics which refer to values in \p F but aren't in +/// \p F. +static void eraseDebugIntrinsicsWithNonLocalRefs(Function &F) { + for (Instruction &I : instructions(F)) { + SmallVector<DbgVariableIntrinsic *, 4> DbgUsers; + findDbgUsers(DbgUsers, &I); + for (DbgVariableIntrinsic *DVI : DbgUsers) + if (DVI->getFunction() != &F) + DVI->eraseFromParent(); + } +} + +/// Fix up the debug info in the old and new functions by pointing line +/// locations and debug intrinsics to the new subprogram scope, and by deleting +/// intrinsics which point to values outside of the new function. +static void fixupDebugInfoPostExtraction(Function &OldFunc, Function &NewFunc, + CallInst &TheCall) { + DISubprogram *OldSP = OldFunc.getSubprogram(); + LLVMContext &Ctx = OldFunc.getContext(); + + if (!OldSP) { + // Erase any debug info the new function contains. + stripDebugInfo(NewFunc); + // Make sure the old function doesn't contain any non-local metadata refs. + eraseDebugIntrinsicsWithNonLocalRefs(NewFunc); + return; + } + + // Create a subprogram for the new function. Leave out a description of the + // function arguments, as the parameters don't correspond to anything at the + // source level. + assert(OldSP->getUnit() && "Missing compile unit for subprogram"); + DIBuilder DIB(*OldFunc.getParent(), /*AllowUnresolved=*/false, + OldSP->getUnit()); + auto SPType = + DIB.createSubroutineType(DIB.getOrCreateTypeArray(std::nullopt)); + DISubprogram::DISPFlags SPFlags = DISubprogram::SPFlagDefinition | + DISubprogram::SPFlagOptimized | + DISubprogram::SPFlagLocalToUnit; + auto NewSP = DIB.createFunction( + OldSP->getUnit(), NewFunc.getName(), NewFunc.getName(), OldSP->getFile(), + /*LineNo=*/0, SPType, /*ScopeLine=*/0, DINode::FlagZero, SPFlags); + NewFunc.setSubprogram(NewSP); + + // Debug intrinsics in the new function need to be updated in one of two + // ways: + // 1) They need to be deleted, because they describe a value in the old + // function. + // 2) They need to point to fresh metadata, e.g. because they currently + // point to a variable in the wrong scope. + SmallDenseMap<DINode *, DINode *> RemappedMetadata; + SmallVector<Instruction *, 4> DebugIntrinsicsToDelete; + DenseMap<const MDNode *, MDNode *> Cache; + for (Instruction &I : instructions(NewFunc)) { + auto *DII = dyn_cast<DbgInfoIntrinsic>(&I); + if (!DII) + continue; + + // Point the intrinsic to a fresh label within the new function if the + // intrinsic was not inlined from some other function. + if (auto *DLI = dyn_cast<DbgLabelInst>(&I)) { + if (DLI->getDebugLoc().getInlinedAt()) + continue; + DILabel *OldLabel = DLI->getLabel(); + DINode *&NewLabel = RemappedMetadata[OldLabel]; + if (!NewLabel) { + DILocalScope *NewScope = DILocalScope::cloneScopeForSubprogram( + *OldLabel->getScope(), *NewSP, Ctx, Cache); + NewLabel = DILabel::get(Ctx, NewScope, OldLabel->getName(), + OldLabel->getFile(), OldLabel->getLine()); + } + DLI->setArgOperand(0, MetadataAsValue::get(Ctx, NewLabel)); + continue; + } + + auto IsInvalidLocation = [&NewFunc](Value *Location) { + // Location is invalid if it isn't a constant or an instruction, or is an + // instruction but isn't in the new function. + if (!Location || + (!isa<Constant>(Location) && !isa<Instruction>(Location))) + return true; + Instruction *LocationInst = dyn_cast<Instruction>(Location); + return LocationInst && LocationInst->getFunction() != &NewFunc; + }; + + auto *DVI = cast<DbgVariableIntrinsic>(DII); + // If any of the used locations are invalid, delete the intrinsic. + if (any_of(DVI->location_ops(), IsInvalidLocation)) { + DebugIntrinsicsToDelete.push_back(DVI); + continue; + } + // If the variable was in the scope of the old function, i.e. it was not + // inlined, point the intrinsic to a fresh variable within the new function. + if (!DVI->getDebugLoc().getInlinedAt()) { + DILocalVariable *OldVar = DVI->getVariable(); + DINode *&NewVar = RemappedMetadata[OldVar]; + if (!NewVar) { + DILocalScope *NewScope = DILocalScope::cloneScopeForSubprogram( + *OldVar->getScope(), *NewSP, Ctx, Cache); + NewVar = DIB.createAutoVariable( + NewScope, OldVar->getName(), OldVar->getFile(), OldVar->getLine(), + OldVar->getType(), /*AlwaysPreserve=*/false, DINode::FlagZero, + OldVar->getAlignInBits()); + } + DVI->setVariable(cast<DILocalVariable>(NewVar)); + } + } + + for (auto *DII : DebugIntrinsicsToDelete) + DII->eraseFromParent(); + DIB.finalizeSubprogram(NewSP); + + // Fix up the scope information attached to the line locations in the new + // function. + for (Instruction &I : instructions(NewFunc)) { + if (const DebugLoc &DL = I.getDebugLoc()) + I.setDebugLoc( + DebugLoc::replaceInlinedAtSubprogram(DL, *NewSP, Ctx, Cache)); + + // Loop info metadata may contain line locations. Fix them up. + auto updateLoopInfoLoc = [&Ctx, &Cache, NewSP](Metadata *MD) -> Metadata * { + if (auto *Loc = dyn_cast_or_null<DILocation>(MD)) + return DebugLoc::replaceInlinedAtSubprogram(Loc, *NewSP, Ctx, Cache); + return MD; + }; + updateLoopMetadataDebugLocations(I, updateLoopInfoLoc); + } + if (!TheCall.getDebugLoc()) + TheCall.setDebugLoc(DILocation::get(Ctx, 0, 0, OldSP)); + + eraseDebugIntrinsicsWithNonLocalRefs(NewFunc); +} + +Function * +CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC) { + ValueSet Inputs, Outputs; + return extractCodeRegion(CEAC, Inputs, Outputs); +} + +Function * +CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, + ValueSet &inputs, ValueSet &outputs) { + if (!isEligible()) + return nullptr; + + // Assumption: this is a single-entry code region, and the header is the first + // block in the region. + BasicBlock *header = *Blocks.begin(); + Function *oldFunction = header->getParent(); + + // Calculate the entry frequency of the new function before we change the root + // block. + BlockFrequency EntryFreq; + if (BFI) { + assert(BPI && "Both BPI and BFI are required to preserve profile info"); + for (BasicBlock *Pred : predecessors(header)) { + if (Blocks.count(Pred)) + continue; + EntryFreq += + BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, header); + } + } + + // Remove CondGuardInsts that will be moved to the new function from the old + // function's assumption cache. + for (BasicBlock *Block : Blocks) { + for (Instruction &I : llvm::make_early_inc_range(*Block)) { + if (auto *CI = dyn_cast<CondGuardInst>(&I)) { + if (AC) + AC->unregisterAssumption(CI); + CI->eraseFromParent(); + } + } + } + + // If we have any return instructions in the region, split those blocks so + // that the return is not in the region. + splitReturnBlocks(); + + // Calculate the exit blocks for the extracted region and the total exit + // weights for each of those blocks. + DenseMap<BasicBlock *, BlockFrequency> ExitWeights; + SmallPtrSet<BasicBlock *, 1> ExitBlocks; + for (BasicBlock *Block : Blocks) { + for (BasicBlock *Succ : successors(Block)) { + if (!Blocks.count(Succ)) { + // Update the branch weight for this successor. + if (BFI) { + BlockFrequency &BF = ExitWeights[Succ]; + BF += BFI->getBlockFreq(Block) * BPI->getEdgeProbability(Block, Succ); + } + ExitBlocks.insert(Succ); + } + } + } + NumExitBlocks = ExitBlocks.size(); + + for (BasicBlock *Block : Blocks) { + Instruction *TI = Block->getTerminator(); + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { + if (Blocks.count(TI->getSuccessor(i))) + continue; + BasicBlock *OldTarget = TI->getSuccessor(i); + OldTargets.push_back(OldTarget); + } + } + + // If we have to split PHI nodes of the entry or exit blocks, do so now. + severSplitPHINodesOfEntry(header); + severSplitPHINodesOfExits(ExitBlocks); + + // This takes place of the original loop + BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(), + "codeRepl", oldFunction, + header); + + // The new function needs a root node because other nodes can branch to the + // head of the region, but the entry node of a function cannot have preds. + BasicBlock *newFuncRoot = BasicBlock::Create(header->getContext(), + "newFuncRoot"); + auto *BranchI = BranchInst::Create(header); + // If the original function has debug info, we have to add a debug location + // to the new branch instruction from the artificial entry block. + // We use the debug location of the first instruction in the extracted + // blocks, as there is no other equivalent line in the source code. + if (oldFunction->getSubprogram()) { + any_of(Blocks, [&BranchI](const BasicBlock *BB) { + return any_of(*BB, [&BranchI](const Instruction &I) { + if (!I.getDebugLoc()) + return false; + BranchI->setDebugLoc(I.getDebugLoc()); + return true; + }); + }); + } + BranchI->insertInto(newFuncRoot, newFuncRoot->end()); + + ValueSet SinkingCands, HoistingCands; + BasicBlock *CommonExit = nullptr; + findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit); + assert(HoistingCands.empty() || CommonExit); + + // Find inputs to, outputs from the code region. + findInputsOutputs(inputs, outputs, SinkingCands); + + // Now sink all instructions which only have non-phi uses inside the region. + // Group the allocas at the start of the block, so that any bitcast uses of + // the allocas are well-defined. + AllocaInst *FirstSunkAlloca = nullptr; + for (auto *II : SinkingCands) { + if (auto *AI = dyn_cast<AllocaInst>(II)) { + AI->moveBefore(*newFuncRoot, newFuncRoot->getFirstInsertionPt()); + if (!FirstSunkAlloca) + FirstSunkAlloca = AI; + } + } + assert((SinkingCands.empty() || FirstSunkAlloca) && + "Did not expect a sink candidate without any allocas"); + for (auto *II : SinkingCands) { + if (!isa<AllocaInst>(II)) { + cast<Instruction>(II)->moveAfter(FirstSunkAlloca); + } + } + + if (!HoistingCands.empty()) { + auto *HoistToBlock = findOrCreateBlockForHoisting(CommonExit); + Instruction *TI = HoistToBlock->getTerminator(); + for (auto *II : HoistingCands) + cast<Instruction>(II)->moveBefore(TI); + } + + // Collect objects which are inputs to the extraction region and also + // referenced by lifetime start markers within it. The effects of these + // markers must be replicated in the calling function to prevent the stack + // coloring pass from merging slots which store input objects. + ValueSet LifetimesStart; + eraseLifetimeMarkersOnInputs(Blocks, SinkingCands, LifetimesStart); + + // Construct new function based on inputs/outputs & add allocas for all defs. + Function *newFunction = + constructFunction(inputs, outputs, header, newFuncRoot, codeReplacer, + oldFunction, oldFunction->getParent()); + + // Update the entry count of the function. + if (BFI) { + auto Count = BFI->getProfileCountFromFreq(EntryFreq.getFrequency()); + if (Count) + newFunction->setEntryCount( + ProfileCount(*Count, Function::PCT_Real)); // FIXME + BFI->setBlockFreq(codeReplacer, EntryFreq.getFrequency()); + } + + CallInst *TheCall = + emitCallAndSwitchStatement(newFunction, codeReplacer, inputs, outputs); + + moveCodeToFunction(newFunction); + + // Replicate the effects of any lifetime start/end markers which referenced + // input objects in the extraction region by placing markers around the call. + insertLifetimeMarkersSurroundingCall( + oldFunction->getParent(), LifetimesStart.getArrayRef(), {}, TheCall); + + // Propagate personality info to the new function if there is one. + if (oldFunction->hasPersonalityFn()) + newFunction->setPersonalityFn(oldFunction->getPersonalityFn()); + + // Update the branch weights for the exit block. + if (BFI && NumExitBlocks > 1) + calculateNewCallTerminatorWeights(codeReplacer, ExitWeights, BPI); + + // Loop over all of the PHI nodes in the header and exit blocks, and change + // any references to the old incoming edge to be the new incoming edge. + for (BasicBlock::iterator I = header->begin(); isa<PHINode>(I); ++I) { + PHINode *PN = cast<PHINode>(I); + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (!Blocks.count(PN->getIncomingBlock(i))) + PN->setIncomingBlock(i, newFuncRoot); + } + + for (BasicBlock *ExitBB : ExitBlocks) + for (PHINode &PN : ExitBB->phis()) { + Value *IncomingCodeReplacerVal = nullptr; + for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) { + // Ignore incoming values from outside of the extracted region. + if (!Blocks.count(PN.getIncomingBlock(i))) + continue; + + // Ensure that there is only one incoming value from codeReplacer. + if (!IncomingCodeReplacerVal) { + PN.setIncomingBlock(i, codeReplacer); + IncomingCodeReplacerVal = PN.getIncomingValue(i); + } else + assert(IncomingCodeReplacerVal == PN.getIncomingValue(i) && + "PHI has two incompatbile incoming values from codeRepl"); + } + } + + fixupDebugInfoPostExtraction(*oldFunction, *newFunction, *TheCall); + + // Mark the new function `noreturn` if applicable. Terminators which resume + // exception propagation are treated as returning instructions. This is to + // avoid inserting traps after calls to outlined functions which unwind. + bool doesNotReturn = none_of(*newFunction, [](const BasicBlock &BB) { + const Instruction *Term = BB.getTerminator(); + return isa<ReturnInst>(Term) || isa<ResumeInst>(Term); + }); + if (doesNotReturn) + newFunction->setDoesNotReturn(); + + LLVM_DEBUG(if (verifyFunction(*newFunction, &errs())) { + newFunction->dump(); + report_fatal_error("verification of newFunction failed!"); + }); + LLVM_DEBUG(if (verifyFunction(*oldFunction)) + report_fatal_error("verification of oldFunction failed!")); + LLVM_DEBUG(if (AC && verifyAssumptionCache(*oldFunction, *newFunction, AC)) + report_fatal_error("Stale Asumption cache for old Function!")); + return newFunction; +} + +bool CodeExtractor::verifyAssumptionCache(const Function &OldFunc, + const Function &NewFunc, + AssumptionCache *AC) { + for (auto AssumeVH : AC->assumptions()) { + auto *I = dyn_cast_or_null<CondGuardInst>(AssumeVH); + if (!I) + continue; + + // There shouldn't be any llvm.assume intrinsics in the new function. + if (I->getFunction() != &OldFunc) + return true; + + // There shouldn't be any stale affected values in the assumption cache + // that were previously in the old function, but that have now been moved + // to the new function. + for (auto AffectedValVH : AC->assumptionsFor(I->getOperand(0))) { + auto *AffectedCI = dyn_cast_or_null<CondGuardInst>(AffectedValVH); + if (!AffectedCI) + continue; + if (AffectedCI->getFunction() != &OldFunc) + return true; + auto *AssumedInst = cast<Instruction>(AffectedCI->getOperand(0)); + if (AssumedInst->getFunction() != &OldFunc) + return true; + } + } + return false; +} + +void CodeExtractor::excludeArgFromAggregate(Value *Arg) { + ExcludeArgsFromAggregate.insert(Arg); +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/CodeLayout.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/CodeLayout.cpp new file mode 100644 index 0000000000..9eb3aff3ff --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/CodeLayout.cpp @@ -0,0 +1,1014 @@ +//===- CodeLayout.cpp - Implementation of code layout algorithms ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// ExtTSP - layout of basic blocks with i-cache optimization. +// +// The algorithm tries to find a layout of nodes (basic blocks) of a given CFG +// optimizing jump locality and thus processor I-cache utilization. This is +// achieved via increasing the number of fall-through jumps and co-locating +// frequently executed nodes together. The name follows the underlying +// optimization problem, Extended-TSP, which is a generalization of classical +// (maximum) Traveling Salesmen Problem. +// +// The algorithm is a greedy heuristic that works with chains (ordered lists) +// of basic blocks. Initially all chains are isolated basic blocks. On every +// iteration, we pick a pair of chains whose merging yields the biggest increase +// in the ExtTSP score, which models how i-cache "friendly" a specific chain is. +// A pair of chains giving the maximum gain is merged into a new chain. The +// procedure stops when there is only one chain left, or when merging does not +// increase ExtTSP. In the latter case, the remaining chains are sorted by +// density in the decreasing order. +// +// An important aspect is the way two chains are merged. Unlike earlier +// algorithms (e.g., based on the approach of Pettis-Hansen), two +// chains, X and Y, are first split into three, X1, X2, and Y. Then we +// consider all possible ways of gluing the three chains (e.g., X1YX2, X1X2Y, +// X2X1Y, X2YX1, YX1X2, YX2X1) and choose the one producing the largest score. +// This improves the quality of the final result (the search space is larger) +// while keeping the implementation sufficiently fast. +// +// Reference: +// * A. Newell and S. Pupyrev, Improved Basic Block Reordering, +// IEEE Transactions on Computers, 2020 +// https://arxiv.org/abs/1809.04676 +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/CodeLayout.h" +#include "llvm/Support/CommandLine.h" + +#include <cmath> + +using namespace llvm; +#define DEBUG_TYPE "code-layout" + +cl::opt<bool> EnableExtTspBlockPlacement( + "enable-ext-tsp-block-placement", cl::Hidden, cl::init(false), + cl::desc("Enable machine block placement based on the ext-tsp model, " + "optimizing I-cache utilization.")); + +cl::opt<bool> ApplyExtTspWithoutProfile( + "ext-tsp-apply-without-profile", + cl::desc("Whether to apply ext-tsp placement for instances w/o profile"), + cl::init(true), cl::Hidden); + +// Algorithm-specific params. The values are tuned for the best performance +// of large-scale front-end bound binaries. +static cl::opt<double> ForwardWeightCond( + "ext-tsp-forward-weight-cond", cl::ReallyHidden, cl::init(0.1), + cl::desc("The weight of conditional forward jumps for ExtTSP value")); + +static cl::opt<double> ForwardWeightUncond( + "ext-tsp-forward-weight-uncond", cl::ReallyHidden, cl::init(0.1), + cl::desc("The weight of unconditional forward jumps for ExtTSP value")); + +static cl::opt<double> BackwardWeightCond( + "ext-tsp-backward-weight-cond", cl::ReallyHidden, cl::init(0.1), + cl::desc("The weight of conditonal backward jumps for ExtTSP value")); + +static cl::opt<double> BackwardWeightUncond( + "ext-tsp-backward-weight-uncond", cl::ReallyHidden, cl::init(0.1), + cl::desc("The weight of unconditonal backward jumps for ExtTSP value")); + +static cl::opt<double> FallthroughWeightCond( + "ext-tsp-fallthrough-weight-cond", cl::ReallyHidden, cl::init(1.0), + cl::desc("The weight of conditional fallthrough jumps for ExtTSP value")); + +static cl::opt<double> FallthroughWeightUncond( + "ext-tsp-fallthrough-weight-uncond", cl::ReallyHidden, cl::init(1.05), + cl::desc("The weight of unconditional fallthrough jumps for ExtTSP value")); + +static cl::opt<unsigned> ForwardDistance( + "ext-tsp-forward-distance", cl::ReallyHidden, cl::init(1024), + cl::desc("The maximum distance (in bytes) of a forward jump for ExtTSP")); + +static cl::opt<unsigned> BackwardDistance( + "ext-tsp-backward-distance", cl::ReallyHidden, cl::init(640), + cl::desc("The maximum distance (in bytes) of a backward jump for ExtTSP")); + +// The maximum size of a chain created by the algorithm. The size is bounded +// so that the algorithm can efficiently process extremely large instance. +static cl::opt<unsigned> + MaxChainSize("ext-tsp-max-chain-size", cl::ReallyHidden, cl::init(4096), + cl::desc("The maximum size of a chain to create.")); + +// The maximum size of a chain for splitting. Larger values of the threshold +// may yield better quality at the cost of worsen run-time. +static cl::opt<unsigned> ChainSplitThreshold( + "ext-tsp-chain-split-threshold", cl::ReallyHidden, cl::init(128), + cl::desc("The maximum size of a chain to apply splitting")); + +// The option enables splitting (large) chains along in-coming and out-going +// jumps. This typically results in a better quality. +static cl::opt<bool> EnableChainSplitAlongJumps( + "ext-tsp-enable-chain-split-along-jumps", cl::ReallyHidden, cl::init(true), + cl::desc("The maximum size of a chain to apply splitting")); + +namespace { + +// Epsilon for comparison of doubles. +constexpr double EPS = 1e-8; + +// Compute the Ext-TSP score for a given jump. +double jumpExtTSPScore(uint64_t JumpDist, uint64_t JumpMaxDist, uint64_t Count, + double Weight) { + if (JumpDist > JumpMaxDist) + return 0; + double Prob = 1.0 - static_cast<double>(JumpDist) / JumpMaxDist; + return Weight * Prob * Count; +} + +// Compute the Ext-TSP score for a jump between a given pair of blocks, +// using their sizes, (estimated) addresses and the jump execution count. +double extTSPScore(uint64_t SrcAddr, uint64_t SrcSize, uint64_t DstAddr, + uint64_t Count, bool IsConditional) { + // Fallthrough + if (SrcAddr + SrcSize == DstAddr) { + return jumpExtTSPScore(0, 1, Count, + IsConditional ? FallthroughWeightCond + : FallthroughWeightUncond); + } + // Forward + if (SrcAddr + SrcSize < DstAddr) { + const uint64_t Dist = DstAddr - (SrcAddr + SrcSize); + return jumpExtTSPScore(Dist, ForwardDistance, Count, + IsConditional ? ForwardWeightCond + : ForwardWeightUncond); + } + // Backward + const uint64_t Dist = SrcAddr + SrcSize - DstAddr; + return jumpExtTSPScore(Dist, BackwardDistance, Count, + IsConditional ? BackwardWeightCond + : BackwardWeightUncond); +} + +/// A type of merging two chains, X and Y. The former chain is split into +/// X1 and X2 and then concatenated with Y in the order specified by the type. +enum class MergeTypeTy : int { X_Y, X1_Y_X2, Y_X2_X1, X2_X1_Y }; + +/// The gain of merging two chains, that is, the Ext-TSP score of the merge +/// together with the corresponfiding merge 'type' and 'offset'. +class MergeGainTy { +public: + explicit MergeGainTy() = default; + explicit MergeGainTy(double Score, size_t MergeOffset, MergeTypeTy MergeType) + : Score(Score), MergeOffset(MergeOffset), MergeType(MergeType) {} + + double score() const { return Score; } + + size_t mergeOffset() const { return MergeOffset; } + + MergeTypeTy mergeType() const { return MergeType; } + + // Returns 'true' iff Other is preferred over this. + bool operator<(const MergeGainTy &Other) const { + return (Other.Score > EPS && Other.Score > Score + EPS); + } + + // Update the current gain if Other is preferred over this. + void updateIfLessThan(const MergeGainTy &Other) { + if (*this < Other) + *this = Other; + } + +private: + double Score{-1.0}; + size_t MergeOffset{0}; + MergeTypeTy MergeType{MergeTypeTy::X_Y}; +}; + +class Jump; +class Chain; +class ChainEdge; + +/// A node in the graph, typically corresponding to a basic block in CFG. +class Block { +public: + Block(const Block &) = delete; + Block(Block &&) = default; + Block &operator=(const Block &) = delete; + Block &operator=(Block &&) = default; + + // The original index of the block in CFG. + size_t Index{0}; + // The index of the block in the current chain. + size_t CurIndex{0}; + // Size of the block in the binary. + uint64_t Size{0}; + // Execution count of the block in the profile data. + uint64_t ExecutionCount{0}; + // Current chain of the node. + Chain *CurChain{nullptr}; + // An offset of the block in the current chain. + mutable uint64_t EstimatedAddr{0}; + // Forced successor of the block in CFG. + Block *ForcedSucc{nullptr}; + // Forced predecessor of the block in CFG. + Block *ForcedPred{nullptr}; + // Outgoing jumps from the block. + std::vector<Jump *> OutJumps; + // Incoming jumps to the block. + std::vector<Jump *> InJumps; + +public: + explicit Block(size_t Index, uint64_t Size, uint64_t EC) + : Index(Index), Size(Size), ExecutionCount(EC) {} + bool isEntry() const { return Index == 0; } +}; + +/// An arc in the graph, typically corresponding to a jump between two blocks. +class Jump { +public: + Jump(const Jump &) = delete; + Jump(Jump &&) = default; + Jump &operator=(const Jump &) = delete; + Jump &operator=(Jump &&) = default; + + // Source block of the jump. + Block *Source; + // Target block of the jump. + Block *Target; + // Execution count of the arc in the profile data. + uint64_t ExecutionCount{0}; + // Whether the jump corresponds to a conditional branch. + bool IsConditional{false}; + +public: + explicit Jump(Block *Source, Block *Target, uint64_t ExecutionCount) + : Source(Source), Target(Target), ExecutionCount(ExecutionCount) {} +}; + +/// A chain (ordered sequence) of blocks. +class Chain { +public: + Chain(const Chain &) = delete; + Chain(Chain &&) = default; + Chain &operator=(const Chain &) = delete; + Chain &operator=(Chain &&) = default; + + explicit Chain(uint64_t Id, Block *Block) + : Id(Id), Score(0), Blocks(1, Block) {} + + uint64_t id() const { return Id; } + + bool isEntry() const { return Blocks[0]->Index == 0; } + + bool isCold() const { + for (auto *Block : Blocks) { + if (Block->ExecutionCount > 0) + return false; + } + return true; + } + + double score() const { return Score; } + + void setScore(double NewScore) { Score = NewScore; } + + const std::vector<Block *> &blocks() const { return Blocks; } + + size_t numBlocks() const { return Blocks.size(); } + + const std::vector<std::pair<Chain *, ChainEdge *>> &edges() const { + return Edges; + } + + ChainEdge *getEdge(Chain *Other) const { + for (auto It : Edges) { + if (It.first == Other) + return It.second; + } + return nullptr; + } + + void removeEdge(Chain *Other) { + auto It = Edges.begin(); + while (It != Edges.end()) { + if (It->first == Other) { + Edges.erase(It); + return; + } + It++; + } + } + + void addEdge(Chain *Other, ChainEdge *Edge) { + Edges.push_back(std::make_pair(Other, Edge)); + } + + void merge(Chain *Other, const std::vector<Block *> &MergedBlocks) { + Blocks = MergedBlocks; + // Update the block's chains + for (size_t Idx = 0; Idx < Blocks.size(); Idx++) { + Blocks[Idx]->CurChain = this; + Blocks[Idx]->CurIndex = Idx; + } + } + + void mergeEdges(Chain *Other); + + void clear() { + Blocks.clear(); + Blocks.shrink_to_fit(); + Edges.clear(); + Edges.shrink_to_fit(); + } + +private: + // Unique chain identifier. + uint64_t Id; + // Cached ext-tsp score for the chain. + double Score; + // Blocks of the chain. + std::vector<Block *> Blocks; + // Adjacent chains and corresponding edges (lists of jumps). + std::vector<std::pair<Chain *, ChainEdge *>> Edges; +}; + +/// An edge in CFG representing jumps between two chains. +/// When blocks are merged into chains, the edges are combined too so that +/// there is always at most one edge between a pair of chains +class ChainEdge { +public: + ChainEdge(const ChainEdge &) = delete; + ChainEdge(ChainEdge &&) = default; + ChainEdge &operator=(const ChainEdge &) = delete; + ChainEdge &operator=(ChainEdge &&) = default; + + explicit ChainEdge(Jump *Jump) + : SrcChain(Jump->Source->CurChain), DstChain(Jump->Target->CurChain), + Jumps(1, Jump) {} + + const std::vector<Jump *> &jumps() const { return Jumps; } + + void changeEndpoint(Chain *From, Chain *To) { + if (From == SrcChain) + SrcChain = To; + if (From == DstChain) + DstChain = To; + } + + void appendJump(Jump *Jump) { Jumps.push_back(Jump); } + + void moveJumps(ChainEdge *Other) { + Jumps.insert(Jumps.end(), Other->Jumps.begin(), Other->Jumps.end()); + Other->Jumps.clear(); + Other->Jumps.shrink_to_fit(); + } + + bool hasCachedMergeGain(Chain *Src, Chain *Dst) const { + return Src == SrcChain ? CacheValidForward : CacheValidBackward; + } + + MergeGainTy getCachedMergeGain(Chain *Src, Chain *Dst) const { + return Src == SrcChain ? CachedGainForward : CachedGainBackward; + } + + void setCachedMergeGain(Chain *Src, Chain *Dst, MergeGainTy MergeGain) { + if (Src == SrcChain) { + CachedGainForward = MergeGain; + CacheValidForward = true; + } else { + CachedGainBackward = MergeGain; + CacheValidBackward = true; + } + } + + void invalidateCache() { + CacheValidForward = false; + CacheValidBackward = false; + } + +private: + // Source chain. + Chain *SrcChain{nullptr}; + // Destination chain. + Chain *DstChain{nullptr}; + // Original jumps in the binary with correspinding execution counts. + std::vector<Jump *> Jumps; + // Cached ext-tsp value for merging the pair of chains. + // Since the gain of merging (Src, Dst) and (Dst, Src) might be different, + // we store both values here. + MergeGainTy CachedGainForward; + MergeGainTy CachedGainBackward; + // Whether the cached value must be recomputed. + bool CacheValidForward{false}; + bool CacheValidBackward{false}; +}; + +void Chain::mergeEdges(Chain *Other) { + assert(this != Other && "cannot merge a chain with itself"); + + // Update edges adjacent to chain Other + for (auto EdgeIt : Other->Edges) { + Chain *DstChain = EdgeIt.first; + ChainEdge *DstEdge = EdgeIt.second; + Chain *TargetChain = DstChain == Other ? this : DstChain; + ChainEdge *CurEdge = getEdge(TargetChain); + if (CurEdge == nullptr) { + DstEdge->changeEndpoint(Other, this); + this->addEdge(TargetChain, DstEdge); + if (DstChain != this && DstChain != Other) { + DstChain->addEdge(this, DstEdge); + } + } else { + CurEdge->moveJumps(DstEdge); + } + // Cleanup leftover edge + if (DstChain != Other) { + DstChain->removeEdge(Other); + } + } +} + +using BlockIter = std::vector<Block *>::const_iterator; + +/// A wrapper around three chains of blocks; it is used to avoid extra +/// instantiation of the vectors. +class MergedChain { +public: + MergedChain(BlockIter Begin1, BlockIter End1, BlockIter Begin2 = BlockIter(), + BlockIter End2 = BlockIter(), BlockIter Begin3 = BlockIter(), + BlockIter End3 = BlockIter()) + : Begin1(Begin1), End1(End1), Begin2(Begin2), End2(End2), Begin3(Begin3), + End3(End3) {} + + template <typename F> void forEach(const F &Func) const { + for (auto It = Begin1; It != End1; It++) + Func(*It); + for (auto It = Begin2; It != End2; It++) + Func(*It); + for (auto It = Begin3; It != End3; It++) + Func(*It); + } + + std::vector<Block *> getBlocks() const { + std::vector<Block *> Result; + Result.reserve(std::distance(Begin1, End1) + std::distance(Begin2, End2) + + std::distance(Begin3, End3)); + Result.insert(Result.end(), Begin1, End1); + Result.insert(Result.end(), Begin2, End2); + Result.insert(Result.end(), Begin3, End3); + return Result; + } + + const Block *getFirstBlock() const { return *Begin1; } + +private: + BlockIter Begin1; + BlockIter End1; + BlockIter Begin2; + BlockIter End2; + BlockIter Begin3; + BlockIter End3; +}; + +/// The implementation of the ExtTSP algorithm. +class ExtTSPImpl { + using EdgeT = std::pair<uint64_t, uint64_t>; + using EdgeCountMap = std::vector<std::pair<EdgeT, uint64_t>>; + +public: + ExtTSPImpl(size_t NumNodes, const std::vector<uint64_t> &NodeSizes, + const std::vector<uint64_t> &NodeCounts, + const EdgeCountMap &EdgeCounts) + : NumNodes(NumNodes) { + initialize(NodeSizes, NodeCounts, EdgeCounts); + } + + /// Run the algorithm and return an optimized ordering of blocks. + void run(std::vector<uint64_t> &Result) { + // Pass 1: Merge blocks with their mutually forced successors + mergeForcedPairs(); + + // Pass 2: Merge pairs of chains while improving the ExtTSP objective + mergeChainPairs(); + + // Pass 3: Merge cold blocks to reduce code size + mergeColdChains(); + + // Collect blocks from all chains + concatChains(Result); + } + +private: + /// Initialize the algorithm's data structures. + void initialize(const std::vector<uint64_t> &NodeSizes, + const std::vector<uint64_t> &NodeCounts, + const EdgeCountMap &EdgeCounts) { + // Initialize blocks + AllBlocks.reserve(NumNodes); + for (uint64_t Node = 0; Node < NumNodes; Node++) { + uint64_t Size = std::max<uint64_t>(NodeSizes[Node], 1ULL); + uint64_t ExecutionCount = NodeCounts[Node]; + // The execution count of the entry block is set to at least 1 + if (Node == 0 && ExecutionCount == 0) + ExecutionCount = 1; + AllBlocks.emplace_back(Node, Size, ExecutionCount); + } + + // Initialize jumps between blocks + SuccNodes.resize(NumNodes); + PredNodes.resize(NumNodes); + std::vector<uint64_t> OutDegree(NumNodes, 0); + AllJumps.reserve(EdgeCounts.size()); + for (auto It : EdgeCounts) { + auto Pred = It.first.first; + auto Succ = It.first.second; + OutDegree[Pred]++; + // Ignore self-edges + if (Pred == Succ) + continue; + + SuccNodes[Pred].push_back(Succ); + PredNodes[Succ].push_back(Pred); + auto ExecutionCount = It.second; + if (ExecutionCount > 0) { + auto &Block = AllBlocks[Pred]; + auto &SuccBlock = AllBlocks[Succ]; + AllJumps.emplace_back(&Block, &SuccBlock, ExecutionCount); + SuccBlock.InJumps.push_back(&AllJumps.back()); + Block.OutJumps.push_back(&AllJumps.back()); + } + } + for (auto &Jump : AllJumps) { + assert(OutDegree[Jump.Source->Index] > 0); + Jump.IsConditional = OutDegree[Jump.Source->Index] > 1; + } + + // Initialize chains + AllChains.reserve(NumNodes); + HotChains.reserve(NumNodes); + for (Block &Block : AllBlocks) { + AllChains.emplace_back(Block.Index, &Block); + Block.CurChain = &AllChains.back(); + if (Block.ExecutionCount > 0) { + HotChains.push_back(&AllChains.back()); + } + } + + // Initialize chain edges + AllEdges.reserve(AllJumps.size()); + for (Block &Block : AllBlocks) { + for (auto &Jump : Block.OutJumps) { + auto SuccBlock = Jump->Target; + ChainEdge *CurEdge = Block.CurChain->getEdge(SuccBlock->CurChain); + // this edge is already present in the graph + if (CurEdge != nullptr) { + assert(SuccBlock->CurChain->getEdge(Block.CurChain) != nullptr); + CurEdge->appendJump(Jump); + continue; + } + // this is a new edge + AllEdges.emplace_back(Jump); + Block.CurChain->addEdge(SuccBlock->CurChain, &AllEdges.back()); + SuccBlock->CurChain->addEdge(Block.CurChain, &AllEdges.back()); + } + } + } + + /// For a pair of blocks, A and B, block B is the forced successor of A, + /// if (i) all jumps (based on profile) from A goes to B and (ii) all jumps + /// to B are from A. Such blocks should be adjacent in the optimal ordering; + /// the method finds and merges such pairs of blocks. + void mergeForcedPairs() { + // Find fallthroughs based on edge weights + for (auto &Block : AllBlocks) { + if (SuccNodes[Block.Index].size() == 1 && + PredNodes[SuccNodes[Block.Index][0]].size() == 1 && + SuccNodes[Block.Index][0] != 0) { + size_t SuccIndex = SuccNodes[Block.Index][0]; + Block.ForcedSucc = &AllBlocks[SuccIndex]; + AllBlocks[SuccIndex].ForcedPred = &Block; + } + } + + // There might be 'cycles' in the forced dependencies, since profile + // data isn't 100% accurate. Typically this is observed in loops, when the + // loop edges are the hottest successors for the basic blocks of the loop. + // Break the cycles by choosing the block with the smallest index as the + // head. This helps to keep the original order of the loops, which likely + // have already been rotated in the optimized manner. + for (auto &Block : AllBlocks) { + if (Block.ForcedSucc == nullptr || Block.ForcedPred == nullptr) + continue; + + auto SuccBlock = Block.ForcedSucc; + while (SuccBlock != nullptr && SuccBlock != &Block) { + SuccBlock = SuccBlock->ForcedSucc; + } + if (SuccBlock == nullptr) + continue; + // Break the cycle + AllBlocks[Block.ForcedPred->Index].ForcedSucc = nullptr; + Block.ForcedPred = nullptr; + } + + // Merge blocks with their fallthrough successors + for (auto &Block : AllBlocks) { + if (Block.ForcedPred == nullptr && Block.ForcedSucc != nullptr) { + auto CurBlock = &Block; + while (CurBlock->ForcedSucc != nullptr) { + const auto NextBlock = CurBlock->ForcedSucc; + mergeChains(Block.CurChain, NextBlock->CurChain, 0, MergeTypeTy::X_Y); + CurBlock = NextBlock; + } + } + } + } + + /// Merge pairs of chains while improving the ExtTSP objective. + void mergeChainPairs() { + /// Deterministically compare pairs of chains + auto compareChainPairs = [](const Chain *A1, const Chain *B1, + const Chain *A2, const Chain *B2) { + if (A1 != A2) + return A1->id() < A2->id(); + return B1->id() < B2->id(); + }; + + while (HotChains.size() > 1) { + Chain *BestChainPred = nullptr; + Chain *BestChainSucc = nullptr; + auto BestGain = MergeGainTy(); + // Iterate over all pairs of chains + for (Chain *ChainPred : HotChains) { + // Get candidates for merging with the current chain + for (auto EdgeIter : ChainPred->edges()) { + Chain *ChainSucc = EdgeIter.first; + class ChainEdge *ChainEdge = EdgeIter.second; + // Ignore loop edges + if (ChainPred == ChainSucc) + continue; + + // Stop early if the combined chain violates the maximum allowed size + if (ChainPred->numBlocks() + ChainSucc->numBlocks() >= MaxChainSize) + continue; + + // Compute the gain of merging the two chains + MergeGainTy CurGain = + getBestMergeGain(ChainPred, ChainSucc, ChainEdge); + if (CurGain.score() <= EPS) + continue; + + if (BestGain < CurGain || + (std::abs(CurGain.score() - BestGain.score()) < EPS && + compareChainPairs(ChainPred, ChainSucc, BestChainPred, + BestChainSucc))) { + BestGain = CurGain; + BestChainPred = ChainPred; + BestChainSucc = ChainSucc; + } + } + } + + // Stop merging when there is no improvement + if (BestGain.score() <= EPS) + break; + + // Merge the best pair of chains + mergeChains(BestChainPred, BestChainSucc, BestGain.mergeOffset(), + BestGain.mergeType()); + } + } + + /// Merge remaining blocks into chains w/o taking jump counts into + /// consideration. This allows to maintain the original block order in the + /// absense of profile data + void mergeColdChains() { + for (size_t SrcBB = 0; SrcBB < NumNodes; SrcBB++) { + // Iterating in reverse order to make sure original fallthrough jumps are + // merged first; this might be beneficial for code size. + size_t NumSuccs = SuccNodes[SrcBB].size(); + for (size_t Idx = 0; Idx < NumSuccs; Idx++) { + auto DstBB = SuccNodes[SrcBB][NumSuccs - Idx - 1]; + auto SrcChain = AllBlocks[SrcBB].CurChain; + auto DstChain = AllBlocks[DstBB].CurChain; + if (SrcChain != DstChain && !DstChain->isEntry() && + SrcChain->blocks().back()->Index == SrcBB && + DstChain->blocks().front()->Index == DstBB && + SrcChain->isCold() == DstChain->isCold()) { + mergeChains(SrcChain, DstChain, 0, MergeTypeTy::X_Y); + } + } + } + } + + /// Compute the Ext-TSP score for a given block order and a list of jumps. + double extTSPScore(const MergedChain &MergedBlocks, + const std::vector<Jump *> &Jumps) const { + if (Jumps.empty()) + return 0.0; + uint64_t CurAddr = 0; + MergedBlocks.forEach([&](const Block *BB) { + BB->EstimatedAddr = CurAddr; + CurAddr += BB->Size; + }); + + double Score = 0; + for (auto &Jump : Jumps) { + const Block *SrcBlock = Jump->Source; + const Block *DstBlock = Jump->Target; + Score += ::extTSPScore(SrcBlock->EstimatedAddr, SrcBlock->Size, + DstBlock->EstimatedAddr, Jump->ExecutionCount, + Jump->IsConditional); + } + return Score; + } + + /// Compute the gain of merging two chains. + /// + /// The function considers all possible ways of merging two chains and + /// computes the one having the largest increase in ExtTSP objective. The + /// result is a pair with the first element being the gain and the second + /// element being the corresponding merging type. + MergeGainTy getBestMergeGain(Chain *ChainPred, Chain *ChainSucc, + ChainEdge *Edge) const { + if (Edge->hasCachedMergeGain(ChainPred, ChainSucc)) { + return Edge->getCachedMergeGain(ChainPred, ChainSucc); + } + + // Precompute jumps between ChainPred and ChainSucc + auto Jumps = Edge->jumps(); + ChainEdge *EdgePP = ChainPred->getEdge(ChainPred); + if (EdgePP != nullptr) { + Jumps.insert(Jumps.end(), EdgePP->jumps().begin(), EdgePP->jumps().end()); + } + assert(!Jumps.empty() && "trying to merge chains w/o jumps"); + + // The object holds the best currently chosen gain of merging the two chains + MergeGainTy Gain = MergeGainTy(); + + /// Given a merge offset and a list of merge types, try to merge two chains + /// and update Gain with a better alternative + auto tryChainMerging = [&](size_t Offset, + const std::vector<MergeTypeTy> &MergeTypes) { + // Skip merging corresponding to concatenation w/o splitting + if (Offset == 0 || Offset == ChainPred->blocks().size()) + return; + // Skip merging if it breaks Forced successors + auto BB = ChainPred->blocks()[Offset - 1]; + if (BB->ForcedSucc != nullptr) + return; + // Apply the merge, compute the corresponding gain, and update the best + // value, if the merge is beneficial + for (const auto &MergeType : MergeTypes) { + Gain.updateIfLessThan( + computeMergeGain(ChainPred, ChainSucc, Jumps, Offset, MergeType)); + } + }; + + // Try to concatenate two chains w/o splitting + Gain.updateIfLessThan( + computeMergeGain(ChainPred, ChainSucc, Jumps, 0, MergeTypeTy::X_Y)); + + if (EnableChainSplitAlongJumps) { + // Attach (a part of) ChainPred before the first block of ChainSucc + for (auto &Jump : ChainSucc->blocks().front()->InJumps) { + const auto SrcBlock = Jump->Source; + if (SrcBlock->CurChain != ChainPred) + continue; + size_t Offset = SrcBlock->CurIndex + 1; + tryChainMerging(Offset, {MergeTypeTy::X1_Y_X2, MergeTypeTy::X2_X1_Y}); + } + + // Attach (a part of) ChainPred after the last block of ChainSucc + for (auto &Jump : ChainSucc->blocks().back()->OutJumps) { + const auto DstBlock = Jump->Source; + if (DstBlock->CurChain != ChainPred) + continue; + size_t Offset = DstBlock->CurIndex; + tryChainMerging(Offset, {MergeTypeTy::X1_Y_X2, MergeTypeTy::Y_X2_X1}); + } + } + + // Try to break ChainPred in various ways and concatenate with ChainSucc + if (ChainPred->blocks().size() <= ChainSplitThreshold) { + for (size_t Offset = 1; Offset < ChainPred->blocks().size(); Offset++) { + // Try to split the chain in different ways. In practice, applying + // X2_Y_X1 merging is almost never provides benefits; thus, we exclude + // it from consideration to reduce the search space + tryChainMerging(Offset, {MergeTypeTy::X1_Y_X2, MergeTypeTy::Y_X2_X1, + MergeTypeTy::X2_X1_Y}); + } + } + Edge->setCachedMergeGain(ChainPred, ChainSucc, Gain); + return Gain; + } + + /// Compute the score gain of merging two chains, respecting a given + /// merge 'type' and 'offset'. + /// + /// The two chains are not modified in the method. + MergeGainTy computeMergeGain(const Chain *ChainPred, const Chain *ChainSucc, + const std::vector<Jump *> &Jumps, + size_t MergeOffset, + MergeTypeTy MergeType) const { + auto MergedBlocks = mergeBlocks(ChainPred->blocks(), ChainSucc->blocks(), + MergeOffset, MergeType); + + // Do not allow a merge that does not preserve the original entry block + if ((ChainPred->isEntry() || ChainSucc->isEntry()) && + !MergedBlocks.getFirstBlock()->isEntry()) + return MergeGainTy(); + + // The gain for the new chain + auto NewGainScore = extTSPScore(MergedBlocks, Jumps) - ChainPred->score(); + return MergeGainTy(NewGainScore, MergeOffset, MergeType); + } + + /// Merge two chains of blocks respecting a given merge 'type' and 'offset'. + /// + /// If MergeType == 0, then the result is a concatenation of two chains. + /// Otherwise, the first chain is cut into two sub-chains at the offset, + /// and merged using all possible ways of concatenating three chains. + MergedChain mergeBlocks(const std::vector<Block *> &X, + const std::vector<Block *> &Y, size_t MergeOffset, + MergeTypeTy MergeType) const { + // Split the first chain, X, into X1 and X2 + BlockIter BeginX1 = X.begin(); + BlockIter EndX1 = X.begin() + MergeOffset; + BlockIter BeginX2 = X.begin() + MergeOffset; + BlockIter EndX2 = X.end(); + BlockIter BeginY = Y.begin(); + BlockIter EndY = Y.end(); + + // Construct a new chain from the three existing ones + switch (MergeType) { + case MergeTypeTy::X_Y: + return MergedChain(BeginX1, EndX2, BeginY, EndY); + case MergeTypeTy::X1_Y_X2: + return MergedChain(BeginX1, EndX1, BeginY, EndY, BeginX2, EndX2); + case MergeTypeTy::Y_X2_X1: + return MergedChain(BeginY, EndY, BeginX2, EndX2, BeginX1, EndX1); + case MergeTypeTy::X2_X1_Y: + return MergedChain(BeginX2, EndX2, BeginX1, EndX1, BeginY, EndY); + } + llvm_unreachable("unexpected chain merge type"); + } + + /// Merge chain From into chain Into, update the list of active chains, + /// adjacency information, and the corresponding cached values. + void mergeChains(Chain *Into, Chain *From, size_t MergeOffset, + MergeTypeTy MergeType) { + assert(Into != From && "a chain cannot be merged with itself"); + + // Merge the blocks + MergedChain MergedBlocks = + mergeBlocks(Into->blocks(), From->blocks(), MergeOffset, MergeType); + Into->merge(From, MergedBlocks.getBlocks()); + Into->mergeEdges(From); + From->clear(); + + // Update cached ext-tsp score for the new chain + ChainEdge *SelfEdge = Into->getEdge(Into); + if (SelfEdge != nullptr) { + MergedBlocks = MergedChain(Into->blocks().begin(), Into->blocks().end()); + Into->setScore(extTSPScore(MergedBlocks, SelfEdge->jumps())); + } + + // Remove chain From from the list of active chains + llvm::erase_value(HotChains, From); + + // Invalidate caches + for (auto EdgeIter : Into->edges()) { + EdgeIter.second->invalidateCache(); + } + } + + /// Concatenate all chains into a final order of blocks. + void concatChains(std::vector<uint64_t> &Order) { + // Collect chains and calculate some stats for their sorting + std::vector<Chain *> SortedChains; + DenseMap<const Chain *, double> ChainDensity; + for (auto &Chain : AllChains) { + if (!Chain.blocks().empty()) { + SortedChains.push_back(&Chain); + // Using doubles to avoid overflow of ExecutionCount + double Size = 0; + double ExecutionCount = 0; + for (auto *Block : Chain.blocks()) { + Size += static_cast<double>(Block->Size); + ExecutionCount += static_cast<double>(Block->ExecutionCount); + } + assert(Size > 0 && "a chain of zero size"); + ChainDensity[&Chain] = ExecutionCount / Size; + } + } + + // Sorting chains by density in the decreasing order + std::stable_sort(SortedChains.begin(), SortedChains.end(), + [&](const Chain *C1, const Chain *C2) { + // Make sure the original entry block is at the + // beginning of the order + if (C1->isEntry() != C2->isEntry()) { + return C1->isEntry(); + } + + const double D1 = ChainDensity[C1]; + const double D2 = ChainDensity[C2]; + // Compare by density and break ties by chain identifiers + return (D1 != D2) ? (D1 > D2) : (C1->id() < C2->id()); + }); + + // Collect the blocks in the order specified by their chains + Order.reserve(NumNodes); + for (Chain *Chain : SortedChains) { + for (Block *Block : Chain->blocks()) { + Order.push_back(Block->Index); + } + } + } + +private: + /// The number of nodes in the graph. + const size_t NumNodes; + + /// Successors of each node. + std::vector<std::vector<uint64_t>> SuccNodes; + + /// Predecessors of each node. + std::vector<std::vector<uint64_t>> PredNodes; + + /// All basic blocks. + std::vector<Block> AllBlocks; + + /// All jumps between blocks. + std::vector<Jump> AllJumps; + + /// All chains of basic blocks. + std::vector<Chain> AllChains; + + /// All edges between chains. + std::vector<ChainEdge> AllEdges; + + /// Active chains. The vector gets updated at runtime when chains are merged. + std::vector<Chain *> HotChains; +}; + +} // end of anonymous namespace + +std::vector<uint64_t> llvm::applyExtTspLayout( + const std::vector<uint64_t> &NodeSizes, + const std::vector<uint64_t> &NodeCounts, + const std::vector<std::pair<EdgeT, uint64_t>> &EdgeCounts) { + size_t NumNodes = NodeSizes.size(); + + // Verify correctness of the input data. + assert(NodeCounts.size() == NodeSizes.size() && "Incorrect input"); + assert(NumNodes > 2 && "Incorrect input"); + + // Apply the reordering algorithm. + auto Alg = ExtTSPImpl(NumNodes, NodeSizes, NodeCounts, EdgeCounts); + std::vector<uint64_t> Result; + Alg.run(Result); + + // Verify correctness of the output. + assert(Result.front() == 0 && "Original entry point is not preserved"); + assert(Result.size() == NumNodes && "Incorrect size of reordered layout"); + return Result; +} + +double llvm::calcExtTspScore( + const std::vector<uint64_t> &Order, const std::vector<uint64_t> &NodeSizes, + const std::vector<uint64_t> &NodeCounts, + const std::vector<std::pair<EdgeT, uint64_t>> &EdgeCounts) { + // Estimate addresses of the blocks in memory + std::vector<uint64_t> Addr(NodeSizes.size(), 0); + for (size_t Idx = 1; Idx < Order.size(); Idx++) { + Addr[Order[Idx]] = Addr[Order[Idx - 1]] + NodeSizes[Order[Idx - 1]]; + } + std::vector<uint64_t> OutDegree(NodeSizes.size(), 0); + for (auto It : EdgeCounts) { + auto Pred = It.first.first; + OutDegree[Pred]++; + } + + // Increase the score for each jump + double Score = 0; + for (auto It : EdgeCounts) { + auto Pred = It.first.first; + auto Succ = It.first.second; + uint64_t Count = It.second; + bool IsConditional = OutDegree[Pred] > 1; + Score += ::extTSPScore(Addr[Pred], NodeSizes[Pred], Addr[Succ], Count, + IsConditional); + } + return Score; +} + +double llvm::calcExtTspScore( + const std::vector<uint64_t> &NodeSizes, + const std::vector<uint64_t> &NodeCounts, + const std::vector<std::pair<EdgeT, uint64_t>> &EdgeCounts) { + std::vector<uint64_t> Order(NodeSizes.size()); + for (size_t Idx = 0; Idx < NodeSizes.size(); Idx++) { + Order[Idx] = Idx; + } + return calcExtTspScore(Order, NodeSizes, NodeCounts, EdgeCounts); +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/CodeMoverUtils.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/CodeMoverUtils.cpp new file mode 100644 index 0000000000..4a67197417 --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/CodeMoverUtils.cpp @@ -0,0 +1,478 @@ +//===- CodeMoverUtils.cpp - CodeMover Utilities ----------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This family of functions perform movements on basic blocks, and instructions +// contained within a function. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/CodeMoverUtils.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/DependenceAnalysis.h" +#include "llvm/Analysis/PostDominators.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Dominators.h" + +using namespace llvm; + +#define DEBUG_TYPE "codemover-utils" + +STATISTIC(HasDependences, + "Cannot move across instructions that has memory dependences"); +STATISTIC(MayThrowException, "Cannot move across instructions that may throw"); +STATISTIC(NotControlFlowEquivalent, + "Instructions are not control flow equivalent"); +STATISTIC(NotMovedPHINode, "Movement of PHINodes are not supported"); +STATISTIC(NotMovedTerminator, "Movement of Terminator are not supported"); + +namespace { +/// Represent a control condition. A control condition is a condition of a +/// terminator to decide which successors to execute. The pointer field +/// represents the address of the condition of the terminator. The integer field +/// is a bool, it is true when the basic block is executed when V is true. For +/// example, `br %cond, bb0, bb1` %cond is a control condition of bb0 with the +/// integer field equals to true, while %cond is a control condition of bb1 with +/// the integer field equals to false. +using ControlCondition = PointerIntPair<Value *, 1, bool>; +#ifndef NDEBUG +raw_ostream &operator<<(raw_ostream &OS, const ControlCondition &C) { + OS << "[" << *C.getPointer() << ", " << (C.getInt() ? "true" : "false") + << "]"; + return OS; +} +#endif + +/// Represent a set of control conditions required to execute ToBB from FromBB. +class ControlConditions { + using ConditionVectorTy = SmallVector<ControlCondition, 6>; + + /// A SmallVector of control conditions. + ConditionVectorTy Conditions; + +public: + /// Return a ControlConditions which stores all conditions required to execute + /// \p BB from \p Dominator. If \p MaxLookup is non-zero, it limits the + /// number of conditions to collect. Return std::nullopt if not all conditions + /// are collected successfully, or we hit the limit. + static const std::optional<ControlConditions> + collectControlConditions(const BasicBlock &BB, const BasicBlock &Dominator, + const DominatorTree &DT, + const PostDominatorTree &PDT, + unsigned MaxLookup = 6); + + /// Return true if there exists no control conditions required to execute ToBB + /// from FromBB. + bool isUnconditional() const { return Conditions.empty(); } + + /// Return a constant reference of Conditions. + const ConditionVectorTy &getControlConditions() const { return Conditions; } + + /// Add \p V as one of the ControlCondition in Condition with IsTrueCondition + /// equals to \p True. Return true if inserted successfully. + bool addControlCondition(ControlCondition C); + + /// Return true if for all control conditions in Conditions, there exists an + /// equivalent control condition in \p Other.Conditions. + bool isEquivalent(const ControlConditions &Other) const; + + /// Return true if \p C1 and \p C2 are equivalent. + static bool isEquivalent(const ControlCondition &C1, + const ControlCondition &C2); + +private: + ControlConditions() = default; + + static bool isEquivalent(const Value &V1, const Value &V2); + static bool isInverse(const Value &V1, const Value &V2); +}; +} // namespace + +static bool domTreeLevelBefore(DominatorTree *DT, const Instruction *InstA, + const Instruction *InstB) { + // Use ordered basic block in case the 2 instructions are in the same + // block. + if (InstA->getParent() == InstB->getParent()) + return InstA->comesBefore(InstB); + + DomTreeNode *DA = DT->getNode(InstA->getParent()); + DomTreeNode *DB = DT->getNode(InstB->getParent()); + return DA->getLevel() < DB->getLevel(); +} + +const std::optional<ControlConditions> +ControlConditions::collectControlConditions(const BasicBlock &BB, + const BasicBlock &Dominator, + const DominatorTree &DT, + const PostDominatorTree &PDT, + unsigned MaxLookup) { + assert(DT.dominates(&Dominator, &BB) && "Expecting Dominator to dominate BB"); + + ControlConditions Conditions; + unsigned NumConditions = 0; + + // BB is executed unconditional from itself. + if (&Dominator == &BB) + return Conditions; + + const BasicBlock *CurBlock = &BB; + // Walk up the dominator tree from the associated DT node for BB to the + // associated DT node for Dominator. + do { + assert(DT.getNode(CurBlock) && "Expecting a valid DT node for CurBlock"); + BasicBlock *IDom = DT.getNode(CurBlock)->getIDom()->getBlock(); + assert(DT.dominates(&Dominator, IDom) && + "Expecting Dominator to dominate IDom"); + + // Limitation: can only handle branch instruction currently. + const BranchInst *BI = dyn_cast<BranchInst>(IDom->getTerminator()); + if (!BI) + return std::nullopt; + + bool Inserted = false; + if (PDT.dominates(CurBlock, IDom)) { + LLVM_DEBUG(dbgs() << CurBlock->getName() + << " is executed unconditionally from " + << IDom->getName() << "\n"); + } else if (PDT.dominates(CurBlock, BI->getSuccessor(0))) { + LLVM_DEBUG(dbgs() << CurBlock->getName() << " is executed when \"" + << *BI->getCondition() << "\" is true from " + << IDom->getName() << "\n"); + Inserted = Conditions.addControlCondition( + ControlCondition(BI->getCondition(), true)); + } else if (PDT.dominates(CurBlock, BI->getSuccessor(1))) { + LLVM_DEBUG(dbgs() << CurBlock->getName() << " is executed when \"" + << *BI->getCondition() << "\" is false from " + << IDom->getName() << "\n"); + Inserted = Conditions.addControlCondition( + ControlCondition(BI->getCondition(), false)); + } else + return std::nullopt; + + if (Inserted) + ++NumConditions; + + if (MaxLookup != 0 && NumConditions > MaxLookup) + return std::nullopt; + + CurBlock = IDom; + } while (CurBlock != &Dominator); + + return Conditions; +} + +bool ControlConditions::addControlCondition(ControlCondition C) { + bool Inserted = false; + if (none_of(Conditions, [&](ControlCondition &Exists) { + return ControlConditions::isEquivalent(C, Exists); + })) { + Conditions.push_back(C); + Inserted = true; + } + + LLVM_DEBUG(dbgs() << (Inserted ? "Inserted " : "Not inserted ") << C << "\n"); + return Inserted; +} + +bool ControlConditions::isEquivalent(const ControlConditions &Other) const { + if (Conditions.empty() && Other.Conditions.empty()) + return true; + + if (Conditions.size() != Other.Conditions.size()) + return false; + + return all_of(Conditions, [&](const ControlCondition &C) { + return any_of(Other.Conditions, [&](const ControlCondition &OtherC) { + return ControlConditions::isEquivalent(C, OtherC); + }); + }); +} + +bool ControlConditions::isEquivalent(const ControlCondition &C1, + const ControlCondition &C2) { + if (C1.getInt() == C2.getInt()) { + if (isEquivalent(*C1.getPointer(), *C2.getPointer())) + return true; + } else if (isInverse(*C1.getPointer(), *C2.getPointer())) + return true; + + return false; +} + +// FIXME: Use SCEV and reuse GVN/CSE logic to check for equivalence between +// Values. +// Currently, isEquivalent rely on other passes to ensure equivalent conditions +// have the same value, e.g. GVN. +bool ControlConditions::isEquivalent(const Value &V1, const Value &V2) { + return &V1 == &V2; +} + +bool ControlConditions::isInverse(const Value &V1, const Value &V2) { + if (const CmpInst *Cmp1 = dyn_cast<CmpInst>(&V1)) + if (const CmpInst *Cmp2 = dyn_cast<CmpInst>(&V2)) { + if (Cmp1->getPredicate() == Cmp2->getInversePredicate() && + Cmp1->getOperand(0) == Cmp2->getOperand(0) && + Cmp1->getOperand(1) == Cmp2->getOperand(1)) + return true; + + if (Cmp1->getPredicate() == + CmpInst::getSwappedPredicate(Cmp2->getInversePredicate()) && + Cmp1->getOperand(0) == Cmp2->getOperand(1) && + Cmp1->getOperand(1) == Cmp2->getOperand(0)) + return true; + } + return false; +} + +bool llvm::isControlFlowEquivalent(const Instruction &I0, const Instruction &I1, + const DominatorTree &DT, + const PostDominatorTree &PDT) { + return isControlFlowEquivalent(*I0.getParent(), *I1.getParent(), DT, PDT); +} + +bool llvm::isControlFlowEquivalent(const BasicBlock &BB0, const BasicBlock &BB1, + const DominatorTree &DT, + const PostDominatorTree &PDT) { + if (&BB0 == &BB1) + return true; + + if ((DT.dominates(&BB0, &BB1) && PDT.dominates(&BB1, &BB0)) || + (PDT.dominates(&BB0, &BB1) && DT.dominates(&BB1, &BB0))) + return true; + + // If the set of conditions required to execute BB0 and BB1 from their common + // dominator are the same, then BB0 and BB1 are control flow equivalent. + const BasicBlock *CommonDominator = DT.findNearestCommonDominator(&BB0, &BB1); + LLVM_DEBUG(dbgs() << "The nearest common dominator of " << BB0.getName() + << " and " << BB1.getName() << " is " + << CommonDominator->getName() << "\n"); + + const std::optional<ControlConditions> BB0Conditions = + ControlConditions::collectControlConditions(BB0, *CommonDominator, DT, + PDT); + if (BB0Conditions == std::nullopt) + return false; + + const std::optional<ControlConditions> BB1Conditions = + ControlConditions::collectControlConditions(BB1, *CommonDominator, DT, + PDT); + if (BB1Conditions == std::nullopt) + return false; + + return BB0Conditions->isEquivalent(*BB1Conditions); +} + +static bool reportInvalidCandidate(const Instruction &I, + llvm::Statistic &Stat) { + ++Stat; + LLVM_DEBUG(dbgs() << "Unable to move instruction: " << I << ". " + << Stat.getDesc()); + return false; +} + +/// Collect all instructions in between \p StartInst and \p EndInst, and store +/// them in \p InBetweenInsts. +static void +collectInstructionsInBetween(Instruction &StartInst, const Instruction &EndInst, + SmallPtrSetImpl<Instruction *> &InBetweenInsts) { + assert(InBetweenInsts.empty() && "Expecting InBetweenInsts to be empty"); + + /// Get the next instructions of \p I, and push them to \p WorkList. + auto getNextInsts = [](Instruction &I, + SmallPtrSetImpl<Instruction *> &WorkList) { + if (Instruction *NextInst = I.getNextNode()) + WorkList.insert(NextInst); + else { + assert(I.isTerminator() && "Expecting a terminator instruction"); + for (BasicBlock *Succ : successors(&I)) + WorkList.insert(&Succ->front()); + } + }; + + SmallPtrSet<Instruction *, 10> WorkList; + getNextInsts(StartInst, WorkList); + while (!WorkList.empty()) { + Instruction *CurInst = *WorkList.begin(); + WorkList.erase(CurInst); + + if (CurInst == &EndInst) + continue; + + if (!InBetweenInsts.insert(CurInst).second) + continue; + + getNextInsts(*CurInst, WorkList); + } +} + +bool llvm::isSafeToMoveBefore(Instruction &I, Instruction &InsertPoint, + DominatorTree &DT, const PostDominatorTree *PDT, + DependenceInfo *DI, bool CheckForEntireBlock) { + // Skip tests when we don't have PDT or DI + if (!PDT || !DI) + return false; + + // Cannot move itself before itself. + if (&I == &InsertPoint) + return false; + + // Not moved. + if (I.getNextNode() == &InsertPoint) + return true; + + if (isa<PHINode>(I) || isa<PHINode>(InsertPoint)) + return reportInvalidCandidate(I, NotMovedPHINode); + + if (I.isTerminator()) + return reportInvalidCandidate(I, NotMovedTerminator); + + // TODO remove this limitation. + if (!isControlFlowEquivalent(I, InsertPoint, DT, *PDT)) + return reportInvalidCandidate(I, NotControlFlowEquivalent); + + if (isReachedBefore(&I, &InsertPoint, &DT, PDT)) + for (const Use &U : I.uses()) + if (auto *UserInst = dyn_cast<Instruction>(U.getUser())) + if (UserInst != &InsertPoint && !DT.dominates(&InsertPoint, U)) + return false; + if (isReachedBefore(&InsertPoint, &I, &DT, PDT)) + for (const Value *Op : I.operands()) + if (auto *OpInst = dyn_cast<Instruction>(Op)) { + if (&InsertPoint == OpInst) + return false; + // If OpInst is an instruction that appears earlier in the same BB as + // I, then it is okay to move since OpInst will still be available. + if (CheckForEntireBlock && I.getParent() == OpInst->getParent() && + DT.dominates(OpInst, &I)) + continue; + if (!DT.dominates(OpInst, &InsertPoint)) + return false; + } + + DT.updateDFSNumbers(); + const bool MoveForward = domTreeLevelBefore(&DT, &I, &InsertPoint); + Instruction &StartInst = (MoveForward ? I : InsertPoint); + Instruction &EndInst = (MoveForward ? InsertPoint : I); + SmallPtrSet<Instruction *, 10> InstsToCheck; + collectInstructionsInBetween(StartInst, EndInst, InstsToCheck); + if (!MoveForward) + InstsToCheck.insert(&InsertPoint); + + // Check if there exists instructions which may throw, may synchonize, or may + // never return, from I to InsertPoint. + if (!isSafeToSpeculativelyExecute(&I)) + if (llvm::any_of(InstsToCheck, [](Instruction *I) { + if (I->mayThrow()) + return true; + + const CallBase *CB = dyn_cast<CallBase>(I); + if (!CB) + return false; + if (!CB->hasFnAttr(Attribute::WillReturn)) + return true; + if (!CB->hasFnAttr(Attribute::NoSync)) + return true; + + return false; + })) { + return reportInvalidCandidate(I, MayThrowException); + } + + // Check if I has any output/flow/anti dependences with instructions from \p + // StartInst to \p EndInst. + if (llvm::any_of(InstsToCheck, [&DI, &I](Instruction *CurInst) { + auto DepResult = DI->depends(&I, CurInst, true); + if (DepResult && (DepResult->isOutput() || DepResult->isFlow() || + DepResult->isAnti())) + return true; + return false; + })) + return reportInvalidCandidate(I, HasDependences); + + return true; +} + +bool llvm::isSafeToMoveBefore(BasicBlock &BB, Instruction &InsertPoint, + DominatorTree &DT, const PostDominatorTree *PDT, + DependenceInfo *DI) { + return llvm::all_of(BB, [&](Instruction &I) { + if (BB.getTerminator() == &I) + return true; + + return isSafeToMoveBefore(I, InsertPoint, DT, PDT, DI, + /*CheckForEntireBlock=*/true); + }); +} + +void llvm::moveInstructionsToTheBeginning(BasicBlock &FromBB, BasicBlock &ToBB, + DominatorTree &DT, + const PostDominatorTree &PDT, + DependenceInfo &DI) { + for (Instruction &I : + llvm::make_early_inc_range(llvm::drop_begin(llvm::reverse(FromBB)))) { + Instruction *MovePos = ToBB.getFirstNonPHIOrDbg(); + + if (isSafeToMoveBefore(I, *MovePos, DT, &PDT, &DI)) + I.moveBefore(MovePos); + } +} + +void llvm::moveInstructionsToTheEnd(BasicBlock &FromBB, BasicBlock &ToBB, + DominatorTree &DT, + const PostDominatorTree &PDT, + DependenceInfo &DI) { + Instruction *MovePos = ToBB.getTerminator(); + while (FromBB.size() > 1) { + Instruction &I = FromBB.front(); + if (isSafeToMoveBefore(I, *MovePos, DT, &PDT, &DI)) + I.moveBefore(MovePos); + } +} + +bool llvm::nonStrictlyPostDominate(const BasicBlock *ThisBlock, + const BasicBlock *OtherBlock, + const DominatorTree *DT, + const PostDominatorTree *PDT) { + assert(isControlFlowEquivalent(*ThisBlock, *OtherBlock, *DT, *PDT) && + "ThisBlock and OtherBlock must be CFG equivalent!"); + const BasicBlock *CommonDominator = + DT->findNearestCommonDominator(ThisBlock, OtherBlock); + if (CommonDominator == nullptr) + return false; + + /// Recursively check the predecessors of \p ThisBlock up to + /// their common dominator, and see if any of them post-dominates + /// \p OtherBlock. + SmallVector<const BasicBlock *, 8> WorkList; + SmallPtrSet<const BasicBlock *, 8> Visited; + WorkList.push_back(ThisBlock); + while (!WorkList.empty()) { + const BasicBlock *CurBlock = WorkList.back(); + WorkList.pop_back(); + Visited.insert(CurBlock); + if (PDT->dominates(CurBlock, OtherBlock)) + return true; + + for (const auto *Pred : predecessors(CurBlock)) { + if (Pred == CommonDominator || Visited.count(Pred)) + continue; + WorkList.push_back(Pred); + } + } + return false; +} + +bool llvm::isReachedBefore(const Instruction *I0, const Instruction *I1, + const DominatorTree *DT, + const PostDominatorTree *PDT) { + const BasicBlock *BB0 = I0->getParent(); + const BasicBlock *BB1 = I1->getParent(); + if (BB0 == BB1) + return DT->dominates(I0, I1); + + return nonStrictlyPostDominate(BB1, BB0, DT, PDT); +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/CtorUtils.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/CtorUtils.cpp new file mode 100644 index 0000000000..c997f39508 --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/CtorUtils.cpp @@ -0,0 +1,154 @@ +//===- CtorUtils.cpp - Helpers for working with global_ctors ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines functions that are used to process llvm.global_ctors. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/CtorUtils.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include <numeric> + +#define DEBUG_TYPE "ctor_utils" + +using namespace llvm; + +/// Given a specified llvm.global_ctors list, remove the listed elements. +static void removeGlobalCtors(GlobalVariable *GCL, const BitVector &CtorsToRemove) { + // Filter out the initializer elements to remove. + ConstantArray *OldCA = cast<ConstantArray>(GCL->getInitializer()); + SmallVector<Constant *, 10> CAList; + for (unsigned I = 0, E = OldCA->getNumOperands(); I < E; ++I) + if (!CtorsToRemove.test(I)) + CAList.push_back(OldCA->getOperand(I)); + + // Create the new array initializer. + ArrayType *ATy = + ArrayType::get(OldCA->getType()->getElementType(), CAList.size()); + Constant *CA = ConstantArray::get(ATy, CAList); + + // If we didn't change the number of elements, don't create a new GV. + if (CA->getType() == OldCA->getType()) { + GCL->setInitializer(CA); + return; + } + + // Create the new global and insert it next to the existing list. + GlobalVariable *NGV = + new GlobalVariable(CA->getType(), GCL->isConstant(), GCL->getLinkage(), + CA, "", GCL->getThreadLocalMode()); + GCL->getParent()->getGlobalList().insert(GCL->getIterator(), NGV); + NGV->takeName(GCL); + + // Nuke the old list, replacing any uses with the new one. + if (!GCL->use_empty()) { + Constant *V = NGV; + if (V->getType() != GCL->getType()) + V = ConstantExpr::getBitCast(V, GCL->getType()); + GCL->replaceAllUsesWith(V); + } + GCL->eraseFromParent(); +} + +/// Given a llvm.global_ctors list that we can understand, +/// return a list of the functions and null terminator as a vector. +static std::vector<std::pair<uint32_t, Function *>> +parseGlobalCtors(GlobalVariable *GV) { + ConstantArray *CA = cast<ConstantArray>(GV->getInitializer()); + std::vector<std::pair<uint32_t, Function *>> Result; + Result.reserve(CA->getNumOperands()); + for (auto &V : CA->operands()) { + ConstantStruct *CS = cast<ConstantStruct>(V); + Result.emplace_back(cast<ConstantInt>(CS->getOperand(0))->getZExtValue(), + dyn_cast<Function>(CS->getOperand(1))); + } + return Result; +} + +/// Find the llvm.global_ctors list. +static GlobalVariable *findGlobalCtors(Module &M) { + GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors"); + if (!GV) + return nullptr; + + // Verify that the initializer is simple enough for us to handle. We are + // only allowed to optimize the initializer if it is unique. + if (!GV->hasUniqueInitializer()) + return nullptr; + + // If there are no ctors, then the initializer might be null/undef/poison. + // Ignore anything but an array. + ConstantArray *CA = dyn_cast<ConstantArray>(GV->getInitializer()); + if (!CA) + return nullptr; + + for (auto &V : CA->operands()) { + if (isa<ConstantAggregateZero>(V)) + continue; + ConstantStruct *CS = cast<ConstantStruct>(V); + if (isa<ConstantPointerNull>(CS->getOperand(1))) + continue; + + // Can only handle global constructors with no arguments. + Function *F = dyn_cast<Function>(CS->getOperand(1)); + if (!F || F->arg_size() != 0) + return nullptr; + } + return GV; +} + +/// Call "ShouldRemove" for every entry in M's global_ctor list and remove the +/// entries for which it returns true. Return true if anything changed. +bool llvm::optimizeGlobalCtorsList( + Module &M, function_ref<bool(uint32_t, Function *)> ShouldRemove) { + GlobalVariable *GlobalCtors = findGlobalCtors(M); + if (!GlobalCtors) + return false; + + std::vector<std::pair<uint32_t, Function *>> Ctors = + parseGlobalCtors(GlobalCtors); + if (Ctors.empty()) + return false; + + bool MadeChange = false; + // Loop over global ctors, optimizing them when we can. + BitVector CtorsToRemove(Ctors.size()); + std::vector<size_t> CtorsByPriority(Ctors.size()); + std::iota(CtorsByPriority.begin(), CtorsByPriority.end(), 0); + stable_sort(CtorsByPriority, [&](size_t LHS, size_t RHS) { + return Ctors[LHS].first < Ctors[RHS].first; + }); + for (unsigned CtorIndex : CtorsByPriority) { + const uint32_t Priority = Ctors[CtorIndex].first; + Function *F = Ctors[CtorIndex].second; + if (!F) + continue; + + LLVM_DEBUG(dbgs() << "Optimizing Global Constructor: " << *F << "\n"); + + // If we can evaluate the ctor at compile time, do. + if (ShouldRemove(Priority, F)) { + Ctors[CtorIndex].second = nullptr; + CtorsToRemove.set(CtorIndex); + MadeChange = true; + continue; + } + } + + if (!MadeChange) + return false; + + removeGlobalCtors(GlobalCtors, CtorsToRemove); + return true; +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/Debugify.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/Debugify.cpp new file mode 100644 index 0000000000..989473693a --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/Debugify.cpp @@ -0,0 +1,1085 @@ +//===- Debugify.cpp - Check debug info preservation in optimizations ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file In the `synthetic` mode, the `-debugify` attaches synthetic debug info +/// to everything. It can be used to create targeted tests for debug info +/// preservation. In addition, when using the `original` mode, it can check +/// original debug info preservation. The `synthetic` mode is default one. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/Debugify.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/IR/DIBuilder.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PassInstrumentation.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/JSON.h" +#include <optional> + +#define DEBUG_TYPE "debugify" + +using namespace llvm; + +namespace { + +cl::opt<bool> Quiet("debugify-quiet", + cl::desc("Suppress verbose debugify output")); + +cl::opt<uint64_t> DebugifyFunctionsLimit( + "debugify-func-limit", + cl::desc("Set max number of processed functions per pass."), + cl::init(UINT_MAX)); + +enum class Level { + Locations, + LocationsAndVariables +}; + +cl::opt<Level> DebugifyLevel( + "debugify-level", cl::desc("Kind of debug info to add"), + cl::values(clEnumValN(Level::Locations, "locations", "Locations only"), + clEnumValN(Level::LocationsAndVariables, "location+variables", + "Locations and Variables")), + cl::init(Level::LocationsAndVariables)); + +raw_ostream &dbg() { return Quiet ? nulls() : errs(); } + +uint64_t getAllocSizeInBits(Module &M, Type *Ty) { + return Ty->isSized() ? M.getDataLayout().getTypeAllocSizeInBits(Ty) : 0; +} + +bool isFunctionSkipped(Function &F) { + return F.isDeclaration() || !F.hasExactDefinition(); +} + +/// Find the basic block's terminating instruction. +/// +/// Special care is needed to handle musttail and deopt calls, as these behave +/// like (but are in fact not) terminators. +Instruction *findTerminatingInstruction(BasicBlock &BB) { + if (auto *I = BB.getTerminatingMustTailCall()) + return I; + if (auto *I = BB.getTerminatingDeoptimizeCall()) + return I; + return BB.getTerminator(); +} +} // end anonymous namespace + +bool llvm::applyDebugifyMetadata( + Module &M, iterator_range<Module::iterator> Functions, StringRef Banner, + std::function<bool(DIBuilder &DIB, Function &F)> ApplyToMF) { + // Skip modules with debug info. + if (M.getNamedMetadata("llvm.dbg.cu")) { + dbg() << Banner << "Skipping module with debug info\n"; + return false; + } + + DIBuilder DIB(M); + LLVMContext &Ctx = M.getContext(); + auto *Int32Ty = Type::getInt32Ty(Ctx); + + // Get a DIType which corresponds to Ty. + DenseMap<uint64_t, DIType *> TypeCache; + auto getCachedDIType = [&](Type *Ty) -> DIType * { + uint64_t Size = getAllocSizeInBits(M, Ty); + DIType *&DTy = TypeCache[Size]; + if (!DTy) { + std::string Name = "ty" + utostr(Size); + DTy = DIB.createBasicType(Name, Size, dwarf::DW_ATE_unsigned); + } + return DTy; + }; + + unsigned NextLine = 1; + unsigned NextVar = 1; + auto File = DIB.createFile(M.getName(), "/"); + auto CU = DIB.createCompileUnit(dwarf::DW_LANG_C, File, "debugify", + /*isOptimized=*/true, "", 0); + + // Visit each instruction. + for (Function &F : Functions) { + if (isFunctionSkipped(F)) + continue; + + bool InsertedDbgVal = false; + auto SPType = + DIB.createSubroutineType(DIB.getOrCreateTypeArray(std::nullopt)); + DISubprogram::DISPFlags SPFlags = + DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized; + if (F.hasPrivateLinkage() || F.hasInternalLinkage()) + SPFlags |= DISubprogram::SPFlagLocalToUnit; + auto SP = DIB.createFunction(CU, F.getName(), F.getName(), File, NextLine, + SPType, NextLine, DINode::FlagZero, SPFlags); + F.setSubprogram(SP); + + // Helper that inserts a dbg.value before \p InsertBefore, copying the + // location (and possibly the type, if it's non-void) from \p TemplateInst. + auto insertDbgVal = [&](Instruction &TemplateInst, + Instruction *InsertBefore) { + std::string Name = utostr(NextVar++); + Value *V = &TemplateInst; + if (TemplateInst.getType()->isVoidTy()) + V = ConstantInt::get(Int32Ty, 0); + const DILocation *Loc = TemplateInst.getDebugLoc().get(); + auto LocalVar = DIB.createAutoVariable(SP, Name, File, Loc->getLine(), + getCachedDIType(V->getType()), + /*AlwaysPreserve=*/true); + DIB.insertDbgValueIntrinsic(V, LocalVar, DIB.createExpression(), Loc, + InsertBefore); + }; + + for (BasicBlock &BB : F) { + // Attach debug locations. + for (Instruction &I : BB) + I.setDebugLoc(DILocation::get(Ctx, NextLine++, 1, SP)); + + if (DebugifyLevel < Level::LocationsAndVariables) + continue; + + // Inserting debug values into EH pads can break IR invariants. + if (BB.isEHPad()) + continue; + + // Find the terminating instruction, after which no debug values are + // attached. + Instruction *LastInst = findTerminatingInstruction(BB); + assert(LastInst && "Expected basic block with a terminator"); + + // Maintain an insertion point which can't be invalidated when updates + // are made. + BasicBlock::iterator InsertPt = BB.getFirstInsertionPt(); + assert(InsertPt != BB.end() && "Expected to find an insertion point"); + Instruction *InsertBefore = &*InsertPt; + + // Attach debug values. + for (Instruction *I = &*BB.begin(); I != LastInst; I = I->getNextNode()) { + // Skip void-valued instructions. + if (I->getType()->isVoidTy()) + continue; + + // Phis and EH pads must be grouped at the beginning of the block. + // Only advance the insertion point when we finish visiting these. + if (!isa<PHINode>(I) && !I->isEHPad()) + InsertBefore = I->getNextNode(); + + insertDbgVal(*I, InsertBefore); + InsertedDbgVal = true; + } + } + // Make sure we emit at least one dbg.value, otherwise MachineDebugify may + // not have anything to work with as it goes about inserting DBG_VALUEs. + // (It's common for MIR tests to be written containing skeletal IR with + // empty functions -- we're still interested in debugifying the MIR within + // those tests, and this helps with that.) + if (DebugifyLevel == Level::LocationsAndVariables && !InsertedDbgVal) { + auto *Term = findTerminatingInstruction(F.getEntryBlock()); + insertDbgVal(*Term, Term); + } + if (ApplyToMF) + ApplyToMF(DIB, F); + DIB.finalizeSubprogram(SP); + } + DIB.finalize(); + + // Track the number of distinct lines and variables. + NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.debugify"); + auto addDebugifyOperand = [&](unsigned N) { + NMD->addOperand(MDNode::get( + Ctx, ValueAsMetadata::getConstant(ConstantInt::get(Int32Ty, N)))); + }; + addDebugifyOperand(NextLine - 1); // Original number of lines. + addDebugifyOperand(NextVar - 1); // Original number of variables. + assert(NMD->getNumOperands() == 2 && + "llvm.debugify should have exactly 2 operands!"); + + // Claim that this synthetic debug info is valid. + StringRef DIVersionKey = "Debug Info Version"; + if (!M.getModuleFlag(DIVersionKey)) + M.addModuleFlag(Module::Warning, DIVersionKey, DEBUG_METADATA_VERSION); + + return true; +} + +static bool +applyDebugify(Function &F, + enum DebugifyMode Mode = DebugifyMode::SyntheticDebugInfo, + DebugInfoPerPass *DebugInfoBeforePass = nullptr, + StringRef NameOfWrappedPass = "") { + Module &M = *F.getParent(); + auto FuncIt = F.getIterator(); + if (Mode == DebugifyMode::SyntheticDebugInfo) + return applyDebugifyMetadata(M, make_range(FuncIt, std::next(FuncIt)), + "FunctionDebugify: ", /*ApplyToMF*/ nullptr); + assert(DebugInfoBeforePass); + return collectDebugInfoMetadata(M, M.functions(), *DebugInfoBeforePass, + "FunctionDebugify (original debuginfo)", + NameOfWrappedPass); +} + +static bool +applyDebugify(Module &M, + enum DebugifyMode Mode = DebugifyMode::SyntheticDebugInfo, + DebugInfoPerPass *DebugInfoBeforePass = nullptr, + StringRef NameOfWrappedPass = "") { + if (Mode == DebugifyMode::SyntheticDebugInfo) + return applyDebugifyMetadata(M, M.functions(), + "ModuleDebugify: ", /*ApplyToMF*/ nullptr); + return collectDebugInfoMetadata(M, M.functions(), *DebugInfoBeforePass, + "ModuleDebugify (original debuginfo)", + NameOfWrappedPass); +} + +bool llvm::stripDebugifyMetadata(Module &M) { + bool Changed = false; + + // Remove the llvm.debugify and llvm.mir.debugify module-level named metadata. + NamedMDNode *DebugifyMD = M.getNamedMetadata("llvm.debugify"); + if (DebugifyMD) { + M.eraseNamedMetadata(DebugifyMD); + Changed = true; + } + + if (auto *MIRDebugifyMD = M.getNamedMetadata("llvm.mir.debugify")) { + M.eraseNamedMetadata(MIRDebugifyMD); + Changed = true; + } + + // Strip out all debug intrinsics and supporting metadata (subprograms, types, + // variables, etc). + Changed |= StripDebugInfo(M); + + // Strip out the dead dbg.value prototype. + Function *DbgValF = M.getFunction("llvm.dbg.value"); + if (DbgValF) { + assert(DbgValF->isDeclaration() && DbgValF->use_empty() && + "Not all debug info stripped?"); + DbgValF->eraseFromParent(); + Changed = true; + } + + // Strip out the module-level Debug Info Version metadata. + // FIXME: There must be an easier way to remove an operand from a NamedMDNode. + NamedMDNode *NMD = M.getModuleFlagsMetadata(); + if (!NMD) + return Changed; + SmallVector<MDNode *, 4> Flags(NMD->operands()); + NMD->clearOperands(); + for (MDNode *Flag : Flags) { + auto *Key = cast<MDString>(Flag->getOperand(1)); + if (Key->getString() == "Debug Info Version") { + Changed = true; + continue; + } + NMD->addOperand(Flag); + } + // If we left it empty we might as well remove it. + if (NMD->getNumOperands() == 0) + NMD->eraseFromParent(); + + return Changed; +} + +bool llvm::collectDebugInfoMetadata(Module &M, + iterator_range<Module::iterator> Functions, + DebugInfoPerPass &DebugInfoBeforePass, + StringRef Banner, + StringRef NameOfWrappedPass) { + LLVM_DEBUG(dbgs() << Banner << ": (before) " << NameOfWrappedPass << '\n'); + + if (!M.getNamedMetadata("llvm.dbg.cu")) { + dbg() << Banner << ": Skipping module without debug info\n"; + return false; + } + + uint64_t FunctionsCnt = DebugInfoBeforePass.DIFunctions.size(); + // Visit each instruction. + for (Function &F : Functions) { + // Use DI collected after previous Pass (when -debugify-each is used). + if (DebugInfoBeforePass.DIFunctions.count(&F)) + continue; + + if (isFunctionSkipped(F)) + continue; + + // Stop collecting DI if the Functions number reached the limit. + if (++FunctionsCnt >= DebugifyFunctionsLimit) + break; + // Collect the DISubprogram. + auto *SP = F.getSubprogram(); + DebugInfoBeforePass.DIFunctions.insert({&F, SP}); + if (SP) { + LLVM_DEBUG(dbgs() << " Collecting subprogram: " << *SP << '\n'); + for (const DINode *DN : SP->getRetainedNodes()) { + if (const auto *DV = dyn_cast<DILocalVariable>(DN)) { + DebugInfoBeforePass.DIVariables[DV] = 0; + } + } + } + + for (BasicBlock &BB : F) { + // Collect debug locations (!dbg) and debug variable intrinsics. + for (Instruction &I : BB) { + // Skip PHIs. + if (isa<PHINode>(I)) + continue; + + // Cllect dbg.values and dbg.declare. + if (DebugifyLevel > Level::Locations) { + if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I)) { + if (!SP) + continue; + // Skip inlined variables. + if (I.getDebugLoc().getInlinedAt()) + continue; + // Skip undef values. + if (DVI->isKillLocation()) + continue; + + auto *Var = DVI->getVariable(); + DebugInfoBeforePass.DIVariables[Var]++; + continue; + } + } + + // Skip debug instructions other than dbg.value and dbg.declare. + if (isa<DbgInfoIntrinsic>(&I)) + continue; + + LLVM_DEBUG(dbgs() << " Collecting info for inst: " << I << '\n'); + DebugInfoBeforePass.InstToDelete.insert({&I, &I}); + + const DILocation *Loc = I.getDebugLoc().get(); + bool HasLoc = Loc != nullptr; + DebugInfoBeforePass.DILocations.insert({&I, HasLoc}); + } + } + } + + return true; +} + +// This checks the preservation of original debug info attached to functions. +static bool checkFunctions(const DebugFnMap &DIFunctionsBefore, + const DebugFnMap &DIFunctionsAfter, + StringRef NameOfWrappedPass, + StringRef FileNameFromCU, bool ShouldWriteIntoJSON, + llvm::json::Array &Bugs) { + bool Preserved = true; + for (const auto &F : DIFunctionsAfter) { + if (F.second) + continue; + auto SPIt = DIFunctionsBefore.find(F.first); + if (SPIt == DIFunctionsBefore.end()) { + if (ShouldWriteIntoJSON) + Bugs.push_back(llvm::json::Object({{"metadata", "DISubprogram"}, + {"name", F.first->getName()}, + {"action", "not-generate"}})); + else + dbg() << "ERROR: " << NameOfWrappedPass + << " did not generate DISubprogram for " << F.first->getName() + << " from " << FileNameFromCU << '\n'; + Preserved = false; + } else { + auto SP = SPIt->second; + if (!SP) + continue; + // If the function had the SP attached before the pass, consider it as + // a debug info bug. + if (ShouldWriteIntoJSON) + Bugs.push_back(llvm::json::Object({{"metadata", "DISubprogram"}, + {"name", F.first->getName()}, + {"action", "drop"}})); + else + dbg() << "ERROR: " << NameOfWrappedPass << " dropped DISubprogram of " + << F.first->getName() << " from " << FileNameFromCU << '\n'; + Preserved = false; + } + } + + return Preserved; +} + +// This checks the preservation of the original debug info attached to +// instructions. +static bool checkInstructions(const DebugInstMap &DILocsBefore, + const DebugInstMap &DILocsAfter, + const WeakInstValueMap &InstToDelete, + StringRef NameOfWrappedPass, + StringRef FileNameFromCU, + bool ShouldWriteIntoJSON, + llvm::json::Array &Bugs) { + bool Preserved = true; + for (const auto &L : DILocsAfter) { + if (L.second) + continue; + auto Instr = L.first; + + // In order to avoid pointer reuse/recycling, skip the values that might + // have been deleted during a pass. + auto WeakInstrPtr = InstToDelete.find(Instr); + if (WeakInstrPtr != InstToDelete.end() && !WeakInstrPtr->second) + continue; + + auto FnName = Instr->getFunction()->getName(); + auto BB = Instr->getParent(); + auto BBName = BB->hasName() ? BB->getName() : "no-name"; + auto InstName = Instruction::getOpcodeName(Instr->getOpcode()); + + auto InstrIt = DILocsBefore.find(Instr); + if (InstrIt == DILocsBefore.end()) { + if (ShouldWriteIntoJSON) + Bugs.push_back(llvm::json::Object({{"metadata", "DILocation"}, + {"fn-name", FnName.str()}, + {"bb-name", BBName.str()}, + {"instr", InstName}, + {"action", "not-generate"}})); + else + dbg() << "WARNING: " << NameOfWrappedPass + << " did not generate DILocation for " << *Instr + << " (BB: " << BBName << ", Fn: " << FnName + << ", File: " << FileNameFromCU << ")\n"; + Preserved = false; + } else { + if (!InstrIt->second) + continue; + // If the instr had the !dbg attached before the pass, consider it as + // a debug info issue. + if (ShouldWriteIntoJSON) + Bugs.push_back(llvm::json::Object({{"metadata", "DILocation"}, + {"fn-name", FnName.str()}, + {"bb-name", BBName.str()}, + {"instr", InstName}, + {"action", "drop"}})); + else + dbg() << "WARNING: " << NameOfWrappedPass << " dropped DILocation of " + << *Instr << " (BB: " << BBName << ", Fn: " << FnName + << ", File: " << FileNameFromCU << ")\n"; + Preserved = false; + } + } + + return Preserved; +} + +// This checks the preservation of original debug variable intrinsics. +static bool checkVars(const DebugVarMap &DIVarsBefore, + const DebugVarMap &DIVarsAfter, + StringRef NameOfWrappedPass, StringRef FileNameFromCU, + bool ShouldWriteIntoJSON, llvm::json::Array &Bugs) { + bool Preserved = true; + for (const auto &V : DIVarsBefore) { + auto VarIt = DIVarsAfter.find(V.first); + if (VarIt == DIVarsAfter.end()) + continue; + + unsigned NumOfDbgValsAfter = VarIt->second; + + if (V.second > NumOfDbgValsAfter) { + if (ShouldWriteIntoJSON) + Bugs.push_back(llvm::json::Object( + {{"metadata", "dbg-var-intrinsic"}, + {"name", V.first->getName()}, + {"fn-name", V.first->getScope()->getSubprogram()->getName()}, + {"action", "drop"}})); + else + dbg() << "WARNING: " << NameOfWrappedPass + << " drops dbg.value()/dbg.declare() for " << V.first->getName() + << " from " + << "function " << V.first->getScope()->getSubprogram()->getName() + << " (file " << FileNameFromCU << ")\n"; + Preserved = false; + } + } + + return Preserved; +} + +// Write the json data into the specifed file. +static void writeJSON(StringRef OrigDIVerifyBugsReportFilePath, + StringRef FileNameFromCU, StringRef NameOfWrappedPass, + llvm::json::Array &Bugs) { + std::error_code EC; + raw_fd_ostream OS_FILE{OrigDIVerifyBugsReportFilePath, EC, + sys::fs::OF_Append | sys::fs::OF_TextWithCRLF}; + if (EC) { + errs() << "Could not open file: " << EC.message() << ", " + << OrigDIVerifyBugsReportFilePath << '\n'; + return; + } + + if (auto L = OS_FILE.lock()) { + OS_FILE << "{\"file\":\"" << FileNameFromCU << "\", "; + + StringRef PassName = + NameOfWrappedPass != "" ? NameOfWrappedPass : "no-name"; + OS_FILE << "\"pass\":\"" << PassName << "\", "; + + llvm::json::Value BugsToPrint{std::move(Bugs)}; + OS_FILE << "\"bugs\": " << BugsToPrint; + + OS_FILE << "}\n"; + } + OS_FILE.close(); +} + +bool llvm::checkDebugInfoMetadata(Module &M, + iterator_range<Module::iterator> Functions, + DebugInfoPerPass &DebugInfoBeforePass, + StringRef Banner, StringRef NameOfWrappedPass, + StringRef OrigDIVerifyBugsReportFilePath) { + LLVM_DEBUG(dbgs() << Banner << ": (after) " << NameOfWrappedPass << '\n'); + + if (!M.getNamedMetadata("llvm.dbg.cu")) { + dbg() << Banner << ": Skipping module without debug info\n"; + return false; + } + + // Map the debug info holding DIs after a pass. + DebugInfoPerPass DebugInfoAfterPass; + + // Visit each instruction. + for (Function &F : Functions) { + if (isFunctionSkipped(F)) + continue; + + // Don't process functions without DI collected before the Pass. + if (!DebugInfoBeforePass.DIFunctions.count(&F)) + continue; + // TODO: Collect metadata other than DISubprograms. + // Collect the DISubprogram. + auto *SP = F.getSubprogram(); + DebugInfoAfterPass.DIFunctions.insert({&F, SP}); + + if (SP) { + LLVM_DEBUG(dbgs() << " Collecting subprogram: " << *SP << '\n'); + for (const DINode *DN : SP->getRetainedNodes()) { + if (const auto *DV = dyn_cast<DILocalVariable>(DN)) { + DebugInfoAfterPass.DIVariables[DV] = 0; + } + } + } + + for (BasicBlock &BB : F) { + // Collect debug locations (!dbg) and debug variable intrinsics. + for (Instruction &I : BB) { + // Skip PHIs. + if (isa<PHINode>(I)) + continue; + + // Collect dbg.values and dbg.declares. + if (DebugifyLevel > Level::Locations) { + if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I)) { + if (!SP) + continue; + // Skip inlined variables. + if (I.getDebugLoc().getInlinedAt()) + continue; + // Skip undef values. + if (DVI->isKillLocation()) + continue; + + auto *Var = DVI->getVariable(); + DebugInfoAfterPass.DIVariables[Var]++; + continue; + } + } + + // Skip debug instructions other than dbg.value and dbg.declare. + if (isa<DbgInfoIntrinsic>(&I)) + continue; + + LLVM_DEBUG(dbgs() << " Collecting info for inst: " << I << '\n'); + + const DILocation *Loc = I.getDebugLoc().get(); + bool HasLoc = Loc != nullptr; + + DebugInfoAfterPass.DILocations.insert({&I, HasLoc}); + } + } + } + + // TODO: The name of the module could be read better? + StringRef FileNameFromCU = + (cast<DICompileUnit>(M.getNamedMetadata("llvm.dbg.cu")->getOperand(0))) + ->getFilename(); + + auto DIFunctionsBefore = DebugInfoBeforePass.DIFunctions; + auto DIFunctionsAfter = DebugInfoAfterPass.DIFunctions; + + auto DILocsBefore = DebugInfoBeforePass.DILocations; + auto DILocsAfter = DebugInfoAfterPass.DILocations; + + auto InstToDelete = DebugInfoBeforePass.InstToDelete; + + auto DIVarsBefore = DebugInfoBeforePass.DIVariables; + auto DIVarsAfter = DebugInfoAfterPass.DIVariables; + + bool ShouldWriteIntoJSON = !OrigDIVerifyBugsReportFilePath.empty(); + llvm::json::Array Bugs; + + bool ResultForFunc = + checkFunctions(DIFunctionsBefore, DIFunctionsAfter, NameOfWrappedPass, + FileNameFromCU, ShouldWriteIntoJSON, Bugs); + bool ResultForInsts = checkInstructions( + DILocsBefore, DILocsAfter, InstToDelete, NameOfWrappedPass, + FileNameFromCU, ShouldWriteIntoJSON, Bugs); + + bool ResultForVars = checkVars(DIVarsBefore, DIVarsAfter, NameOfWrappedPass, + FileNameFromCU, ShouldWriteIntoJSON, Bugs); + + bool Result = ResultForFunc && ResultForInsts && ResultForVars; + + StringRef ResultBanner = NameOfWrappedPass != "" ? NameOfWrappedPass : Banner; + if (ShouldWriteIntoJSON && !Bugs.empty()) + writeJSON(OrigDIVerifyBugsReportFilePath, FileNameFromCU, NameOfWrappedPass, + Bugs); + + if (Result) + dbg() << ResultBanner << ": PASS\n"; + else + dbg() << ResultBanner << ": FAIL\n"; + + // In the case of the `debugify-each`, no need to go over all the instructions + // again in the collectDebugInfoMetadata(), since as an input we can use + // the debugging information from the previous pass. + DebugInfoBeforePass = DebugInfoAfterPass; + + LLVM_DEBUG(dbgs() << "\n\n"); + return Result; +} + +namespace { +/// Return true if a mis-sized diagnostic is issued for \p DVI. +bool diagnoseMisSizedDbgValue(Module &M, DbgValueInst *DVI) { + // The size of a dbg.value's value operand should match the size of the + // variable it corresponds to. + // + // TODO: This, along with a check for non-null value operands, should be + // promoted to verifier failures. + + // For now, don't try to interpret anything more complicated than an empty + // DIExpression. Eventually we should try to handle OP_deref and fragments. + if (DVI->getExpression()->getNumElements()) + return false; + + Value *V = DVI->getVariableLocationOp(0); + if (!V) + return false; + + Type *Ty = V->getType(); + uint64_t ValueOperandSize = getAllocSizeInBits(M, Ty); + std::optional<uint64_t> DbgVarSize = DVI->getFragmentSizeInBits(); + if (!ValueOperandSize || !DbgVarSize) + return false; + + bool HasBadSize = false; + if (Ty->isIntegerTy()) { + auto Signedness = DVI->getVariable()->getSignedness(); + if (Signedness && *Signedness == DIBasicType::Signedness::Signed) + HasBadSize = ValueOperandSize < *DbgVarSize; + } else { + HasBadSize = ValueOperandSize != *DbgVarSize; + } + + if (HasBadSize) { + dbg() << "ERROR: dbg.value operand has size " << ValueOperandSize + << ", but its variable has size " << *DbgVarSize << ": "; + DVI->print(dbg()); + dbg() << "\n"; + } + return HasBadSize; +} + +bool checkDebugifyMetadata(Module &M, + iterator_range<Module::iterator> Functions, + StringRef NameOfWrappedPass, StringRef Banner, + bool Strip, DebugifyStatsMap *StatsMap) { + // Skip modules without debugify metadata. + NamedMDNode *NMD = M.getNamedMetadata("llvm.debugify"); + if (!NMD) { + dbg() << Banner << ": Skipping module without debugify metadata\n"; + return false; + } + + auto getDebugifyOperand = [&](unsigned Idx) -> unsigned { + return mdconst::extract<ConstantInt>(NMD->getOperand(Idx)->getOperand(0)) + ->getZExtValue(); + }; + assert(NMD->getNumOperands() == 2 && + "llvm.debugify should have exactly 2 operands!"); + unsigned OriginalNumLines = getDebugifyOperand(0); + unsigned OriginalNumVars = getDebugifyOperand(1); + bool HasErrors = false; + + // Track debug info loss statistics if able. + DebugifyStatistics *Stats = nullptr; + if (StatsMap && !NameOfWrappedPass.empty()) + Stats = &StatsMap->operator[](NameOfWrappedPass); + + BitVector MissingLines{OriginalNumLines, true}; + BitVector MissingVars{OriginalNumVars, true}; + for (Function &F : Functions) { + if (isFunctionSkipped(F)) + continue; + + // Find missing lines. + for (Instruction &I : instructions(F)) { + if (isa<DbgValueInst>(&I)) + continue; + + auto DL = I.getDebugLoc(); + if (DL && DL.getLine() != 0) { + MissingLines.reset(DL.getLine() - 1); + continue; + } + + if (!isa<PHINode>(&I) && !DL) { + dbg() << "WARNING: Instruction with empty DebugLoc in function "; + dbg() << F.getName() << " --"; + I.print(dbg()); + dbg() << "\n"; + } + } + + // Find missing variables and mis-sized debug values. + for (Instruction &I : instructions(F)) { + auto *DVI = dyn_cast<DbgValueInst>(&I); + if (!DVI) + continue; + + unsigned Var = ~0U; + (void)to_integer(DVI->getVariable()->getName(), Var, 10); + assert(Var <= OriginalNumVars && "Unexpected name for DILocalVariable"); + bool HasBadSize = diagnoseMisSizedDbgValue(M, DVI); + if (!HasBadSize) + MissingVars.reset(Var - 1); + HasErrors |= HasBadSize; + } + } + + // Print the results. + for (unsigned Idx : MissingLines.set_bits()) + dbg() << "WARNING: Missing line " << Idx + 1 << "\n"; + + for (unsigned Idx : MissingVars.set_bits()) + dbg() << "WARNING: Missing variable " << Idx + 1 << "\n"; + + // Update DI loss statistics. + if (Stats) { + Stats->NumDbgLocsExpected += OriginalNumLines; + Stats->NumDbgLocsMissing += MissingLines.count(); + Stats->NumDbgValuesExpected += OriginalNumVars; + Stats->NumDbgValuesMissing += MissingVars.count(); + } + + dbg() << Banner; + if (!NameOfWrappedPass.empty()) + dbg() << " [" << NameOfWrappedPass << "]"; + dbg() << ": " << (HasErrors ? "FAIL" : "PASS") << '\n'; + + // Strip debugify metadata if required. + if (Strip) + return stripDebugifyMetadata(M); + + return false; +} + +/// ModulePass for attaching synthetic debug info to everything, used with the +/// legacy module pass manager. +struct DebugifyModulePass : public ModulePass { + bool runOnModule(Module &M) override { + return applyDebugify(M, Mode, DebugInfoBeforePass, NameOfWrappedPass); + } + + DebugifyModulePass(enum DebugifyMode Mode = DebugifyMode::SyntheticDebugInfo, + StringRef NameOfWrappedPass = "", + DebugInfoPerPass *DebugInfoBeforePass = nullptr) + : ModulePass(ID), NameOfWrappedPass(NameOfWrappedPass), + DebugInfoBeforePass(DebugInfoBeforePass), Mode(Mode) {} + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + } + + static char ID; // Pass identification. + +private: + StringRef NameOfWrappedPass; + DebugInfoPerPass *DebugInfoBeforePass; + enum DebugifyMode Mode; +}; + +/// FunctionPass for attaching synthetic debug info to instructions within a +/// single function, used with the legacy module pass manager. +struct DebugifyFunctionPass : public FunctionPass { + bool runOnFunction(Function &F) override { + return applyDebugify(F, Mode, DebugInfoBeforePass, NameOfWrappedPass); + } + + DebugifyFunctionPass( + enum DebugifyMode Mode = DebugifyMode::SyntheticDebugInfo, + StringRef NameOfWrappedPass = "", + DebugInfoPerPass *DebugInfoBeforePass = nullptr) + : FunctionPass(ID), NameOfWrappedPass(NameOfWrappedPass), + DebugInfoBeforePass(DebugInfoBeforePass), Mode(Mode) {} + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + } + + static char ID; // Pass identification. + +private: + StringRef NameOfWrappedPass; + DebugInfoPerPass *DebugInfoBeforePass; + enum DebugifyMode Mode; +}; + +/// ModulePass for checking debug info inserted by -debugify, used with the +/// legacy module pass manager. +struct CheckDebugifyModulePass : public ModulePass { + bool runOnModule(Module &M) override { + if (Mode == DebugifyMode::SyntheticDebugInfo) + return checkDebugifyMetadata(M, M.functions(), NameOfWrappedPass, + "CheckModuleDebugify", Strip, StatsMap); + return checkDebugInfoMetadata( + M, M.functions(), *DebugInfoBeforePass, + "CheckModuleDebugify (original debuginfo)", NameOfWrappedPass, + OrigDIVerifyBugsReportFilePath); + } + + CheckDebugifyModulePass( + bool Strip = false, StringRef NameOfWrappedPass = "", + DebugifyStatsMap *StatsMap = nullptr, + enum DebugifyMode Mode = DebugifyMode::SyntheticDebugInfo, + DebugInfoPerPass *DebugInfoBeforePass = nullptr, + StringRef OrigDIVerifyBugsReportFilePath = "") + : ModulePass(ID), NameOfWrappedPass(NameOfWrappedPass), + OrigDIVerifyBugsReportFilePath(OrigDIVerifyBugsReportFilePath), + StatsMap(StatsMap), DebugInfoBeforePass(DebugInfoBeforePass), Mode(Mode), + Strip(Strip) {} + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + } + + static char ID; // Pass identification. + +private: + StringRef NameOfWrappedPass; + StringRef OrigDIVerifyBugsReportFilePath; + DebugifyStatsMap *StatsMap; + DebugInfoPerPass *DebugInfoBeforePass; + enum DebugifyMode Mode; + bool Strip; +}; + +/// FunctionPass for checking debug info inserted by -debugify-function, used +/// with the legacy module pass manager. +struct CheckDebugifyFunctionPass : public FunctionPass { + bool runOnFunction(Function &F) override { + Module &M = *F.getParent(); + auto FuncIt = F.getIterator(); + if (Mode == DebugifyMode::SyntheticDebugInfo) + return checkDebugifyMetadata(M, make_range(FuncIt, std::next(FuncIt)), + NameOfWrappedPass, "CheckFunctionDebugify", + Strip, StatsMap); + return checkDebugInfoMetadata( + M, make_range(FuncIt, std::next(FuncIt)), *DebugInfoBeforePass, + "CheckFunctionDebugify (original debuginfo)", NameOfWrappedPass, + OrigDIVerifyBugsReportFilePath); + } + + CheckDebugifyFunctionPass( + bool Strip = false, StringRef NameOfWrappedPass = "", + DebugifyStatsMap *StatsMap = nullptr, + enum DebugifyMode Mode = DebugifyMode::SyntheticDebugInfo, + DebugInfoPerPass *DebugInfoBeforePass = nullptr, + StringRef OrigDIVerifyBugsReportFilePath = "") + : FunctionPass(ID), NameOfWrappedPass(NameOfWrappedPass), + OrigDIVerifyBugsReportFilePath(OrigDIVerifyBugsReportFilePath), + StatsMap(StatsMap), DebugInfoBeforePass(DebugInfoBeforePass), Mode(Mode), + Strip(Strip) {} + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + } + + static char ID; // Pass identification. + +private: + StringRef NameOfWrappedPass; + StringRef OrigDIVerifyBugsReportFilePath; + DebugifyStatsMap *StatsMap; + DebugInfoPerPass *DebugInfoBeforePass; + enum DebugifyMode Mode; + bool Strip; +}; + +} // end anonymous namespace + +void llvm::exportDebugifyStats(StringRef Path, const DebugifyStatsMap &Map) { + std::error_code EC; + raw_fd_ostream OS{Path, EC}; + if (EC) { + errs() << "Could not open file: " << EC.message() << ", " << Path << '\n'; + return; + } + + OS << "Pass Name" << ',' << "# of missing debug values" << ',' + << "# of missing locations" << ',' << "Missing/Expected value ratio" << ',' + << "Missing/Expected location ratio" << '\n'; + for (const auto &Entry : Map) { + StringRef Pass = Entry.first; + DebugifyStatistics Stats = Entry.second; + + OS << Pass << ',' << Stats.NumDbgValuesMissing << ',' + << Stats.NumDbgLocsMissing << ',' << Stats.getMissingValueRatio() << ',' + << Stats.getEmptyLocationRatio() << '\n'; + } +} + +ModulePass *createDebugifyModulePass(enum DebugifyMode Mode, + llvm::StringRef NameOfWrappedPass, + DebugInfoPerPass *DebugInfoBeforePass) { + if (Mode == DebugifyMode::SyntheticDebugInfo) + return new DebugifyModulePass(); + assert(Mode == DebugifyMode::OriginalDebugInfo && "Must be original mode"); + return new DebugifyModulePass(Mode, NameOfWrappedPass, DebugInfoBeforePass); +} + +FunctionPass * +createDebugifyFunctionPass(enum DebugifyMode Mode, + llvm::StringRef NameOfWrappedPass, + DebugInfoPerPass *DebugInfoBeforePass) { + if (Mode == DebugifyMode::SyntheticDebugInfo) + return new DebugifyFunctionPass(); + assert(Mode == DebugifyMode::OriginalDebugInfo && "Must be original mode"); + return new DebugifyFunctionPass(Mode, NameOfWrappedPass, DebugInfoBeforePass); +} + +PreservedAnalyses NewPMDebugifyPass::run(Module &M, ModuleAnalysisManager &) { + if (Mode == DebugifyMode::SyntheticDebugInfo) + applyDebugifyMetadata(M, M.functions(), + "ModuleDebugify: ", /*ApplyToMF*/ nullptr); + else + collectDebugInfoMetadata(M, M.functions(), *DebugInfoBeforePass, + "ModuleDebugify (original debuginfo)", + NameOfWrappedPass); + return PreservedAnalyses::all(); +} + +ModulePass *createCheckDebugifyModulePass( + bool Strip, StringRef NameOfWrappedPass, DebugifyStatsMap *StatsMap, + enum DebugifyMode Mode, DebugInfoPerPass *DebugInfoBeforePass, + StringRef OrigDIVerifyBugsReportFilePath) { + if (Mode == DebugifyMode::SyntheticDebugInfo) + return new CheckDebugifyModulePass(Strip, NameOfWrappedPass, StatsMap); + assert(Mode == DebugifyMode::OriginalDebugInfo && "Must be original mode"); + return new CheckDebugifyModulePass(false, NameOfWrappedPass, nullptr, Mode, + DebugInfoBeforePass, + OrigDIVerifyBugsReportFilePath); +} + +FunctionPass *createCheckDebugifyFunctionPass( + bool Strip, StringRef NameOfWrappedPass, DebugifyStatsMap *StatsMap, + enum DebugifyMode Mode, DebugInfoPerPass *DebugInfoBeforePass, + StringRef OrigDIVerifyBugsReportFilePath) { + if (Mode == DebugifyMode::SyntheticDebugInfo) + return new CheckDebugifyFunctionPass(Strip, NameOfWrappedPass, StatsMap); + assert(Mode == DebugifyMode::OriginalDebugInfo && "Must be original mode"); + return new CheckDebugifyFunctionPass(false, NameOfWrappedPass, nullptr, Mode, + DebugInfoBeforePass, + OrigDIVerifyBugsReportFilePath); +} + +PreservedAnalyses NewPMCheckDebugifyPass::run(Module &M, + ModuleAnalysisManager &) { + if (Mode == DebugifyMode::SyntheticDebugInfo) + checkDebugifyMetadata(M, M.functions(), NameOfWrappedPass, + "CheckModuleDebugify", Strip, StatsMap); + else + checkDebugInfoMetadata( + M, M.functions(), *DebugInfoBeforePass, + "CheckModuleDebugify (original debuginfo)", NameOfWrappedPass, + OrigDIVerifyBugsReportFilePath); + return PreservedAnalyses::all(); +} + +static bool isIgnoredPass(StringRef PassID) { + return isSpecialPass(PassID, {"PassManager", "PassAdaptor", + "AnalysisManagerProxy", "PrintFunctionPass", + "PrintModulePass", "BitcodeWriterPass", + "ThinLTOBitcodeWriterPass", "VerifierPass"}); +} + +void DebugifyEachInstrumentation::registerCallbacks( + PassInstrumentationCallbacks &PIC) { + PIC.registerBeforeNonSkippedPassCallback([this](StringRef P, Any IR) { + if (isIgnoredPass(P)) + return; + if (const auto **F = any_cast<const Function *>(&IR)) + applyDebugify(*const_cast<Function *>(*F), + Mode, DebugInfoBeforePass, P); + else if (const auto **M = any_cast<const Module *>(&IR)) + applyDebugify(*const_cast<Module *>(*M), + Mode, DebugInfoBeforePass, P); + }); + PIC.registerAfterPassCallback([this](StringRef P, Any IR, + const PreservedAnalyses &PassPA) { + if (isIgnoredPass(P)) + return; + if (const auto **CF = any_cast<const Function *>(&IR)) { + auto &F = *const_cast<Function *>(*CF); + Module &M = *F.getParent(); + auto It = F.getIterator(); + if (Mode == DebugifyMode::SyntheticDebugInfo) + checkDebugifyMetadata(M, make_range(It, std::next(It)), P, + "CheckFunctionDebugify", /*Strip=*/true, DIStatsMap); + else + checkDebugInfoMetadata( + M, make_range(It, std::next(It)), *DebugInfoBeforePass, + "CheckModuleDebugify (original debuginfo)", + P, OrigDIVerifyBugsReportFilePath); + } else if (const auto **CM = any_cast<const Module *>(&IR)) { + auto &M = *const_cast<Module *>(*CM); + if (Mode == DebugifyMode::SyntheticDebugInfo) + checkDebugifyMetadata(M, M.functions(), P, "CheckModuleDebugify", + /*Strip=*/true, DIStatsMap); + else + checkDebugInfoMetadata( + M, M.functions(), *DebugInfoBeforePass, + "CheckModuleDebugify (original debuginfo)", + P, OrigDIVerifyBugsReportFilePath); + } + }); +} + +char DebugifyModulePass::ID = 0; +static RegisterPass<DebugifyModulePass> DM("debugify", + "Attach debug info to everything"); + +char CheckDebugifyModulePass::ID = 0; +static RegisterPass<CheckDebugifyModulePass> + CDM("check-debugify", "Check debug info from -debugify"); + +char DebugifyFunctionPass::ID = 0; +static RegisterPass<DebugifyFunctionPass> DF("debugify-function", + "Attach debug info to a function"); + +char CheckDebugifyFunctionPass::ID = 0; +static RegisterPass<CheckDebugifyFunctionPass> + CDF("check-debugify-function", "Check debug info from -debugify-function"); diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/DemoteRegToStack.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/DemoteRegToStack.cpp new file mode 100644 index 0000000000..086ea088dc --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/DemoteRegToStack.cpp @@ -0,0 +1,172 @@ +//===- DemoteRegToStack.cpp - Move a virtual register to the stack --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/DenseMap.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" +using namespace llvm; + +/// DemoteRegToStack - This function takes a virtual register computed by an +/// Instruction and replaces it with a slot in the stack frame, allocated via +/// alloca. This allows the CFG to be changed around without fear of +/// invalidating the SSA information for the value. It returns the pointer to +/// the alloca inserted to create a stack slot for I. +AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads, + Instruction *AllocaPoint) { + if (I.use_empty()) { + I.eraseFromParent(); + return nullptr; + } + + Function *F = I.getParent()->getParent(); + const DataLayout &DL = F->getParent()->getDataLayout(); + + // Create a stack slot to hold the value. + AllocaInst *Slot; + if (AllocaPoint) { + Slot = new AllocaInst(I.getType(), DL.getAllocaAddrSpace(), nullptr, + I.getName()+".reg2mem", AllocaPoint); + } else { + Slot = new AllocaInst(I.getType(), DL.getAllocaAddrSpace(), nullptr, + I.getName() + ".reg2mem", &F->getEntryBlock().front()); + } + + // We cannot demote invoke instructions to the stack if their normal edge + // is critical. Therefore, split the critical edge and create a basic block + // into which the store can be inserted. + if (InvokeInst *II = dyn_cast<InvokeInst>(&I)) { + if (!II->getNormalDest()->getSinglePredecessor()) { + unsigned SuccNum = GetSuccessorNumber(II->getParent(), II->getNormalDest()); + assert(isCriticalEdge(II, SuccNum) && "Expected a critical edge!"); + BasicBlock *BB = SplitCriticalEdge(II, SuccNum); + assert(BB && "Unable to split critical edge."); + (void)BB; + } + } + + // Change all of the users of the instruction to read from the stack slot. + while (!I.use_empty()) { + Instruction *U = cast<Instruction>(I.user_back()); + if (PHINode *PN = dyn_cast<PHINode>(U)) { + // If this is a PHI node, we can't insert a load of the value before the + // use. Instead insert the load in the predecessor block corresponding + // to the incoming value. + // + // Note that if there are multiple edges from a basic block to this PHI + // node that we cannot have multiple loads. The problem is that the + // resulting PHI node will have multiple values (from each load) coming in + // from the same block, which is illegal SSA form. For this reason, we + // keep track of and reuse loads we insert. + DenseMap<BasicBlock*, Value*> Loads; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (PN->getIncomingValue(i) == &I) { + Value *&V = Loads[PN->getIncomingBlock(i)]; + if (!V) { + // Insert the load into the predecessor block + V = new LoadInst(I.getType(), Slot, I.getName() + ".reload", + VolatileLoads, + PN->getIncomingBlock(i)->getTerminator()); + } + PN->setIncomingValue(i, V); + } + + } else { + // If this is a normal instruction, just insert a load. + Value *V = new LoadInst(I.getType(), Slot, I.getName() + ".reload", + VolatileLoads, U); + U->replaceUsesOfWith(&I, V); + } + } + + // Insert stores of the computed value into the stack slot. We have to be + // careful if I is an invoke instruction, because we can't insert the store + // AFTER the terminator instruction. + BasicBlock::iterator InsertPt; + if (!I.isTerminator()) { + InsertPt = ++I.getIterator(); + // Don't insert before PHI nodes or landingpad instrs. + for (; isa<PHINode>(InsertPt) || InsertPt->isEHPad(); ++InsertPt) + if (isa<CatchSwitchInst>(InsertPt)) + break; + if (isa<CatchSwitchInst>(InsertPt)) { + for (BasicBlock *Handler : successors(&*InsertPt)) + new StoreInst(&I, Slot, &*Handler->getFirstInsertionPt()); + return Slot; + } + } else { + InvokeInst &II = cast<InvokeInst>(I); + InsertPt = II.getNormalDest()->getFirstInsertionPt(); + } + + new StoreInst(&I, Slot, &*InsertPt); + return Slot; +} + +/// DemotePHIToStack - This function takes a virtual register computed by a PHI +/// node and replaces it with a slot in the stack frame allocated via alloca. +/// The PHI node is deleted. It returns the pointer to the alloca inserted. +AllocaInst *llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) { + if (P->use_empty()) { + P->eraseFromParent(); + return nullptr; + } + + const DataLayout &DL = P->getModule()->getDataLayout(); + + // Create a stack slot to hold the value. + AllocaInst *Slot; + if (AllocaPoint) { + Slot = new AllocaInst(P->getType(), DL.getAllocaAddrSpace(), nullptr, + P->getName()+".reg2mem", AllocaPoint); + } else { + Function *F = P->getParent()->getParent(); + Slot = new AllocaInst(P->getType(), DL.getAllocaAddrSpace(), nullptr, + P->getName() + ".reg2mem", + &F->getEntryBlock().front()); + } + + // Iterate over each operand inserting a store in each predecessor. + for (unsigned i = 0, e = P->getNumIncomingValues(); i < e; ++i) { + if (InvokeInst *II = dyn_cast<InvokeInst>(P->getIncomingValue(i))) { + assert(II->getParent() != P->getIncomingBlock(i) && + "Invoke edge not supported yet"); (void)II; + } + new StoreInst(P->getIncomingValue(i), Slot, + P->getIncomingBlock(i)->getTerminator()); + } + + // Insert a load in place of the PHI and replace all uses. + BasicBlock::iterator InsertPt = P->getIterator(); + // Don't insert before PHI nodes or landingpad instrs. + for (; isa<PHINode>(InsertPt) || InsertPt->isEHPad(); ++InsertPt) + if (isa<CatchSwitchInst>(InsertPt)) + break; + if (isa<CatchSwitchInst>(InsertPt)) { + // We need a separate load before each actual use of the PHI + SmallVector<Instruction *, 4> Users; + for (User *U : P->users()) { + Instruction *User = cast<Instruction>(U); + Users.push_back(User); + } + for (Instruction *User : Users) { + Value *V = + new LoadInst(P->getType(), Slot, P->getName() + ".reload", User); + User->replaceUsesOfWith(P, V); + } + } else { + Value *V = + new LoadInst(P->getType(), Slot, P->getName() + ".reload", &*InsertPt); + P->replaceAllUsesWith(V); + } + // Delete PHI. + P->eraseFromParent(); + return Slot; +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/EntryExitInstrumenter.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/EntryExitInstrumenter.cpp new file mode 100644 index 0000000000..53af1b1969 --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/EntryExitInstrumenter.cpp @@ -0,0 +1,152 @@ +//===- EntryExitInstrumenter.cpp - Function Entry/Exit Instrumentation ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/EntryExitInstrumenter.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/Utils.h" + +using namespace llvm; + +static void insertCall(Function &CurFn, StringRef Func, + Instruction *InsertionPt, DebugLoc DL) { + Module &M = *InsertionPt->getParent()->getParent()->getParent(); + LLVMContext &C = InsertionPt->getParent()->getContext(); + + if (Func == "mcount" || + Func == ".mcount" || + Func == "llvm.arm.gnu.eabi.mcount" || + Func == "\01_mcount" || + Func == "\01mcount" || + Func == "__mcount" || + Func == "_mcount" || + Func == "__cyg_profile_func_enter_bare") { + Triple TargetTriple(M.getTargetTriple()); + if (TargetTriple.isOSAIX() && Func == "__mcount") { + Type *SizeTy = M.getDataLayout().getIntPtrType(C); + Type *SizePtrTy = SizeTy->getPointerTo(); + GlobalVariable *GV = new GlobalVariable(M, SizeTy, /*isConstant=*/false, + GlobalValue::InternalLinkage, + ConstantInt::get(SizeTy, 0)); + CallInst *Call = CallInst::Create( + M.getOrInsertFunction(Func, + FunctionType::get(Type::getVoidTy(C), {SizePtrTy}, + /*isVarArg=*/false)), + {GV}, "", InsertionPt); + Call->setDebugLoc(DL); + } else { + FunctionCallee Fn = M.getOrInsertFunction(Func, Type::getVoidTy(C)); + CallInst *Call = CallInst::Create(Fn, "", InsertionPt); + Call->setDebugLoc(DL); + } + return; + } + + if (Func == "__cyg_profile_func_enter" || Func == "__cyg_profile_func_exit") { + Type *ArgTypes[] = {Type::getInt8PtrTy(C), Type::getInt8PtrTy(C)}; + + FunctionCallee Fn = M.getOrInsertFunction( + Func, FunctionType::get(Type::getVoidTy(C), ArgTypes, false)); + + Instruction *RetAddr = CallInst::Create( + Intrinsic::getDeclaration(&M, Intrinsic::returnaddress), + ArrayRef<Value *>(ConstantInt::get(Type::getInt32Ty(C), 0)), "", + InsertionPt); + RetAddr->setDebugLoc(DL); + + Value *Args[] = {ConstantExpr::getBitCast(&CurFn, Type::getInt8PtrTy(C)), + RetAddr}; + + CallInst *Call = + CallInst::Create(Fn, ArrayRef<Value *>(Args), "", InsertionPt); + Call->setDebugLoc(DL); + return; + } + + // We only know how to call a fixed set of instrumentation functions, because + // they all expect different arguments, etc. + report_fatal_error(Twine("Unknown instrumentation function: '") + Func + "'"); +} + +static bool runOnFunction(Function &F, bool PostInlining) { + StringRef EntryAttr = PostInlining ? "instrument-function-entry-inlined" + : "instrument-function-entry"; + + StringRef ExitAttr = PostInlining ? "instrument-function-exit-inlined" + : "instrument-function-exit"; + + StringRef EntryFunc = F.getFnAttribute(EntryAttr).getValueAsString(); + StringRef ExitFunc = F.getFnAttribute(ExitAttr).getValueAsString(); + + bool Changed = false; + + // If the attribute is specified, insert instrumentation and then "consume" + // the attribute so that it's not inserted again if the pass should happen to + // run later for some reason. + + if (!EntryFunc.empty()) { + DebugLoc DL; + if (auto SP = F.getSubprogram()) + DL = DILocation::get(SP->getContext(), SP->getScopeLine(), 0, SP); + + insertCall(F, EntryFunc, &*F.begin()->getFirstInsertionPt(), DL); + Changed = true; + F.removeFnAttr(EntryAttr); + } + + if (!ExitFunc.empty()) { + for (BasicBlock &BB : F) { + Instruction *T = BB.getTerminator(); + if (!isa<ReturnInst>(T)) + continue; + + // If T is preceded by a musttail call, that's the real terminator. + if (CallInst *CI = BB.getTerminatingMustTailCall()) + T = CI; + + DebugLoc DL; + if (DebugLoc TerminatorDL = T->getDebugLoc()) + DL = TerminatorDL; + else if (auto SP = F.getSubprogram()) + DL = DILocation::get(SP->getContext(), 0, 0, SP); + + insertCall(F, ExitFunc, T, DL); + Changed = true; + } + F.removeFnAttr(ExitAttr); + } + + return Changed; +} + +PreservedAnalyses +llvm::EntryExitInstrumenterPass::run(Function &F, FunctionAnalysisManager &AM) { + runOnFunction(F, PostInlining); + PreservedAnalyses PA; + PA.preserveSet<CFGAnalyses>(); + return PA; +} + +void llvm::EntryExitInstrumenterPass::printPipeline( + raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) { + static_cast<PassInfoMixin<llvm::EntryExitInstrumenterPass> *>(this) + ->printPipeline(OS, MapClassName2PassName); + OS << "<"; + if (PostInlining) + OS << "post-inline"; + OS << ">"; +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/EscapeEnumerator.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/EscapeEnumerator.cpp new file mode 100644 index 0000000000..91053338df --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/EscapeEnumerator.cpp @@ -0,0 +1,98 @@ +//===- EscapeEnumerator.cpp -----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Defines a helper class that enumerates all possible exits from a function, +// including exception handling. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/EscapeEnumerator.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Analysis/EHPersonalities.h" +#include "llvm/IR/Module.h" +#include "llvm/Transforms/Utils/Local.h" + +using namespace llvm; + +static FunctionCallee getDefaultPersonalityFn(Module *M) { + LLVMContext &C = M->getContext(); + Triple T(M->getTargetTriple()); + EHPersonality Pers = getDefaultEHPersonality(T); + return M->getOrInsertFunction(getEHPersonalityName(Pers), + FunctionType::get(Type::getInt32Ty(C), true)); +} + +IRBuilder<> *EscapeEnumerator::Next() { + if (Done) + return nullptr; + + // Find all 'return', 'resume', and 'unwind' instructions. + while (StateBB != StateE) { + BasicBlock *CurBB = &*StateBB++; + + // Branches and invokes do not escape, only unwind, resume, and return + // do. + Instruction *TI = CurBB->getTerminator(); + if (!isa<ReturnInst>(TI) && !isa<ResumeInst>(TI)) + continue; + + if (CallInst *CI = CurBB->getTerminatingMustTailCall()) + TI = CI; + Builder.SetInsertPoint(TI); + return &Builder; + } + + Done = true; + + if (!HandleExceptions) + return nullptr; + + if (F.doesNotThrow()) + return nullptr; + + // Find all 'call' instructions that may throw. + // We cannot tranform calls with musttail tag. + SmallVector<Instruction *, 16> Calls; + for (BasicBlock &BB : F) + for (Instruction &II : BB) + if (CallInst *CI = dyn_cast<CallInst>(&II)) + if (!CI->doesNotThrow() && !CI->isMustTailCall()) + Calls.push_back(CI); + + if (Calls.empty()) + return nullptr; + + // Create a cleanup block. + LLVMContext &C = F.getContext(); + BasicBlock *CleanupBB = BasicBlock::Create(C, CleanupBBName, &F); + Type *ExnTy = StructType::get(Type::getInt8PtrTy(C), Type::getInt32Ty(C)); + if (!F.hasPersonalityFn()) { + FunctionCallee PersFn = getDefaultPersonalityFn(F.getParent()); + F.setPersonalityFn(cast<Constant>(PersFn.getCallee())); + } + + if (isScopedEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) { + report_fatal_error("Scoped EH not supported"); + } + + LandingPadInst *LPad = + LandingPadInst::Create(ExnTy, 1, "cleanup.lpad", CleanupBB); + LPad->setCleanup(true); + ResumeInst *RI = ResumeInst::Create(LPad, CleanupBB); + + // Transform the 'call' instructions into 'invoke's branching to the + // cleanup block. Go in reverse order to make prettier BB names. + SmallVector<Value *, 16> Args; + for (unsigned I = Calls.size(); I != 0;) { + CallInst *CI = cast<CallInst>(Calls[--I]); + changeToInvokeAndSplitBasicBlock(CI, CleanupBB, DTU); + } + + Builder.SetInsertPoint(RI); + return &Builder; +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/Evaluator.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/Evaluator.cpp new file mode 100644 index 0000000000..dc58bebd72 --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/Evaluator.cpp @@ -0,0 +1,688 @@ +//===- Evaluator.cpp - LLVM IR evaluator ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Function evaluator for LLVM IR. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/Evaluator.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +#define DEBUG_TYPE "evaluator" + +using namespace llvm; + +static inline bool +isSimpleEnoughValueToCommit(Constant *C, + SmallPtrSetImpl<Constant *> &SimpleConstants, + const DataLayout &DL); + +/// Return true if the specified constant can be handled by the code generator. +/// We don't want to generate something like: +/// void *X = &X/42; +/// because the code generator doesn't have a relocation that can handle that. +/// +/// This function should be called if C was not found (but just got inserted) +/// in SimpleConstants to avoid having to rescan the same constants all the +/// time. +static bool +isSimpleEnoughValueToCommitHelper(Constant *C, + SmallPtrSetImpl<Constant *> &SimpleConstants, + const DataLayout &DL) { + // Simple global addresses are supported, do not allow dllimport or + // thread-local globals. + if (auto *GV = dyn_cast<GlobalValue>(C)) + return !GV->hasDLLImportStorageClass() && !GV->isThreadLocal(); + + // Simple integer, undef, constant aggregate zero, etc are all supported. + if (C->getNumOperands() == 0 || isa<BlockAddress>(C)) + return true; + + // Aggregate values are safe if all their elements are. + if (isa<ConstantAggregate>(C)) { + for (Value *Op : C->operands()) + if (!isSimpleEnoughValueToCommit(cast<Constant>(Op), SimpleConstants, DL)) + return false; + return true; + } + + // We don't know exactly what relocations are allowed in constant expressions, + // so we allow &global+constantoffset, which is safe and uniformly supported + // across targets. + ConstantExpr *CE = cast<ConstantExpr>(C); + switch (CE->getOpcode()) { + case Instruction::BitCast: + // Bitcast is fine if the casted value is fine. + return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL); + + case Instruction::IntToPtr: + case Instruction::PtrToInt: + // int <=> ptr is fine if the int type is the same size as the + // pointer type. + if (DL.getTypeSizeInBits(CE->getType()) != + DL.getTypeSizeInBits(CE->getOperand(0)->getType())) + return false; + return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL); + + // GEP is fine if it is simple + constant offset. + case Instruction::GetElementPtr: + for (unsigned i = 1, e = CE->getNumOperands(); i != e; ++i) + if (!isa<ConstantInt>(CE->getOperand(i))) + return false; + return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL); + + case Instruction::Add: + // We allow simple+cst. + if (!isa<ConstantInt>(CE->getOperand(1))) + return false; + return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL); + } + return false; +} + +static inline bool +isSimpleEnoughValueToCommit(Constant *C, + SmallPtrSetImpl<Constant *> &SimpleConstants, + const DataLayout &DL) { + // If we already checked this constant, we win. + if (!SimpleConstants.insert(C).second) + return true; + // Check the constant. + return isSimpleEnoughValueToCommitHelper(C, SimpleConstants, DL); +} + +void Evaluator::MutableValue::clear() { + if (auto *Agg = Val.dyn_cast<MutableAggregate *>()) + delete Agg; + Val = nullptr; +} + +Constant *Evaluator::MutableValue::read(Type *Ty, APInt Offset, + const DataLayout &DL) const { + TypeSize TySize = DL.getTypeStoreSize(Ty); + const MutableValue *V = this; + while (const auto *Agg = V->Val.dyn_cast<MutableAggregate *>()) { + Type *AggTy = Agg->Ty; + std::optional<APInt> Index = DL.getGEPIndexForOffset(AggTy, Offset); + if (!Index || Index->uge(Agg->Elements.size()) || + !TypeSize::isKnownLE(TySize, DL.getTypeStoreSize(AggTy))) + return nullptr; + + V = &Agg->Elements[Index->getZExtValue()]; + } + + return ConstantFoldLoadFromConst(V->Val.get<Constant *>(), Ty, Offset, DL); +} + +bool Evaluator::MutableValue::makeMutable() { + Constant *C = Val.get<Constant *>(); + Type *Ty = C->getType(); + unsigned NumElements; + if (auto *VT = dyn_cast<FixedVectorType>(Ty)) { + NumElements = VT->getNumElements(); + } else if (auto *AT = dyn_cast<ArrayType>(Ty)) + NumElements = AT->getNumElements(); + else if (auto *ST = dyn_cast<StructType>(Ty)) + NumElements = ST->getNumElements(); + else + return false; + + MutableAggregate *MA = new MutableAggregate(Ty); + MA->Elements.reserve(NumElements); + for (unsigned I = 0; I < NumElements; ++I) + MA->Elements.push_back(C->getAggregateElement(I)); + Val = MA; + return true; +} + +bool Evaluator::MutableValue::write(Constant *V, APInt Offset, + const DataLayout &DL) { + Type *Ty = V->getType(); + TypeSize TySize = DL.getTypeStoreSize(Ty); + MutableValue *MV = this; + while (Offset != 0 || + !CastInst::isBitOrNoopPointerCastable(Ty, MV->getType(), DL)) { + if (MV->Val.is<Constant *>() && !MV->makeMutable()) + return false; + + MutableAggregate *Agg = MV->Val.get<MutableAggregate *>(); + Type *AggTy = Agg->Ty; + std::optional<APInt> Index = DL.getGEPIndexForOffset(AggTy, Offset); + if (!Index || Index->uge(Agg->Elements.size()) || + !TypeSize::isKnownLE(TySize, DL.getTypeStoreSize(AggTy))) + return false; + + MV = &Agg->Elements[Index->getZExtValue()]; + } + + Type *MVType = MV->getType(); + MV->clear(); + if (Ty->isIntegerTy() && MVType->isPointerTy()) + MV->Val = ConstantExpr::getIntToPtr(V, MVType); + else if (Ty->isPointerTy() && MVType->isIntegerTy()) + MV->Val = ConstantExpr::getPtrToInt(V, MVType); + else if (Ty != MVType) + MV->Val = ConstantExpr::getBitCast(V, MVType); + else + MV->Val = V; + return true; +} + +Constant *Evaluator::MutableAggregate::toConstant() const { + SmallVector<Constant *, 32> Consts; + for (const MutableValue &MV : Elements) + Consts.push_back(MV.toConstant()); + + if (auto *ST = dyn_cast<StructType>(Ty)) + return ConstantStruct::get(ST, Consts); + if (auto *AT = dyn_cast<ArrayType>(Ty)) + return ConstantArray::get(AT, Consts); + assert(isa<FixedVectorType>(Ty) && "Must be vector"); + return ConstantVector::get(Consts); +} + +/// Return the value that would be computed by a load from P after the stores +/// reflected by 'memory' have been performed. If we can't decide, return null. +Constant *Evaluator::ComputeLoadResult(Constant *P, Type *Ty) { + APInt Offset(DL.getIndexTypeSizeInBits(P->getType()), 0); + P = cast<Constant>(P->stripAndAccumulateConstantOffsets( + DL, Offset, /* AllowNonInbounds */ true)); + Offset = Offset.sextOrTrunc(DL.getIndexTypeSizeInBits(P->getType())); + if (auto *GV = dyn_cast<GlobalVariable>(P)) + return ComputeLoadResult(GV, Ty, Offset); + return nullptr; +} + +Constant *Evaluator::ComputeLoadResult(GlobalVariable *GV, Type *Ty, + const APInt &Offset) { + auto It = MutatedMemory.find(GV); + if (It != MutatedMemory.end()) + return It->second.read(Ty, Offset, DL); + + if (!GV->hasDefinitiveInitializer()) + return nullptr; + return ConstantFoldLoadFromConst(GV->getInitializer(), Ty, Offset, DL); +} + +static Function *getFunction(Constant *C) { + if (auto *Fn = dyn_cast<Function>(C)) + return Fn; + + if (auto *Alias = dyn_cast<GlobalAlias>(C)) + if (auto *Fn = dyn_cast<Function>(Alias->getAliasee())) + return Fn; + return nullptr; +} + +Function * +Evaluator::getCalleeWithFormalArgs(CallBase &CB, + SmallVectorImpl<Constant *> &Formals) { + auto *V = CB.getCalledOperand()->stripPointerCasts(); + if (auto *Fn = getFunction(getVal(V))) + return getFormalParams(CB, Fn, Formals) ? Fn : nullptr; + return nullptr; +} + +bool Evaluator::getFormalParams(CallBase &CB, Function *F, + SmallVectorImpl<Constant *> &Formals) { + if (!F) + return false; + + auto *FTy = F->getFunctionType(); + if (FTy->getNumParams() > CB.arg_size()) { + LLVM_DEBUG(dbgs() << "Too few arguments for function.\n"); + return false; + } + + auto ArgI = CB.arg_begin(); + for (Type *PTy : FTy->params()) { + auto *ArgC = ConstantFoldLoadThroughBitcast(getVal(*ArgI), PTy, DL); + if (!ArgC) { + LLVM_DEBUG(dbgs() << "Can not convert function argument.\n"); + return false; + } + Formals.push_back(ArgC); + ++ArgI; + } + return true; +} + +/// If call expression contains bitcast then we may need to cast +/// evaluated return value to a type of the call expression. +Constant *Evaluator::castCallResultIfNeeded(Type *ReturnType, Constant *RV) { + if (!RV || RV->getType() == ReturnType) + return RV; + + RV = ConstantFoldLoadThroughBitcast(RV, ReturnType, DL); + if (!RV) + LLVM_DEBUG(dbgs() << "Failed to fold bitcast call expr\n"); + return RV; +} + +/// Evaluate all instructions in block BB, returning true if successful, false +/// if we can't evaluate it. NewBB returns the next BB that control flows into, +/// or null upon return. StrippedPointerCastsForAliasAnalysis is set to true if +/// we looked through pointer casts to evaluate something. +bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB, + bool &StrippedPointerCastsForAliasAnalysis) { + // This is the main evaluation loop. + while (true) { + Constant *InstResult = nullptr; + + LLVM_DEBUG(dbgs() << "Evaluating Instruction: " << *CurInst << "\n"); + + if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) { + if (SI->isVolatile()) { + LLVM_DEBUG(dbgs() << "Store is volatile! Can not evaluate.\n"); + return false; // no volatile accesses. + } + Constant *Ptr = getVal(SI->getOperand(1)); + Constant *FoldedPtr = ConstantFoldConstant(Ptr, DL, TLI); + if (Ptr != FoldedPtr) { + LLVM_DEBUG(dbgs() << "Folding constant ptr expression: " << *Ptr); + Ptr = FoldedPtr; + LLVM_DEBUG(dbgs() << "; To: " << *Ptr << "\n"); + } + + APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0); + Ptr = cast<Constant>(Ptr->stripAndAccumulateConstantOffsets( + DL, Offset, /* AllowNonInbounds */ true)); + Offset = Offset.sextOrTrunc(DL.getIndexTypeSizeInBits(Ptr->getType())); + auto *GV = dyn_cast<GlobalVariable>(Ptr); + if (!GV || !GV->hasUniqueInitializer()) { + LLVM_DEBUG(dbgs() << "Store is not to global with unique initializer: " + << *Ptr << "\n"); + return false; + } + + // If this might be too difficult for the backend to handle (e.g. the addr + // of one global variable divided by another) then we can't commit it. + Constant *Val = getVal(SI->getOperand(0)); + if (!isSimpleEnoughValueToCommit(Val, SimpleConstants, DL)) { + LLVM_DEBUG(dbgs() << "Store value is too complex to evaluate store. " + << *Val << "\n"); + return false; + } + + auto Res = MutatedMemory.try_emplace(GV, GV->getInitializer()); + if (!Res.first->second.write(Val, Offset, DL)) + return false; + } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) { + if (LI->isVolatile()) { + LLVM_DEBUG( + dbgs() << "Found a Load! Volatile load, can not evaluate.\n"); + return false; // no volatile accesses. + } + + Constant *Ptr = getVal(LI->getOperand(0)); + Constant *FoldedPtr = ConstantFoldConstant(Ptr, DL, TLI); + if (Ptr != FoldedPtr) { + Ptr = FoldedPtr; + LLVM_DEBUG(dbgs() << "Found a constant pointer expression, constant " + "folding: " + << *Ptr << "\n"); + } + InstResult = ComputeLoadResult(Ptr, LI->getType()); + if (!InstResult) { + LLVM_DEBUG( + dbgs() << "Failed to compute load result. Can not evaluate load." + "\n"); + return false; // Could not evaluate load. + } + + LLVM_DEBUG(dbgs() << "Evaluated load: " << *InstResult << "\n"); + } else if (AllocaInst *AI = dyn_cast<AllocaInst>(CurInst)) { + if (AI->isArrayAllocation()) { + LLVM_DEBUG(dbgs() << "Found an array alloca. Can not evaluate.\n"); + return false; // Cannot handle array allocs. + } + Type *Ty = AI->getAllocatedType(); + AllocaTmps.push_back(std::make_unique<GlobalVariable>( + Ty, false, GlobalValue::InternalLinkage, UndefValue::get(Ty), + AI->getName(), /*TLMode=*/GlobalValue::NotThreadLocal, + AI->getType()->getPointerAddressSpace())); + InstResult = AllocaTmps.back().get(); + LLVM_DEBUG(dbgs() << "Found an alloca. Result: " << *InstResult << "\n"); + } else if (isa<CallInst>(CurInst) || isa<InvokeInst>(CurInst)) { + CallBase &CB = *cast<CallBase>(&*CurInst); + + // Debug info can safely be ignored here. + if (isa<DbgInfoIntrinsic>(CB)) { + LLVM_DEBUG(dbgs() << "Ignoring debug info.\n"); + ++CurInst; + continue; + } + + // Cannot handle inline asm. + if (CB.isInlineAsm()) { + LLVM_DEBUG(dbgs() << "Found inline asm, can not evaluate.\n"); + return false; + } + + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CB)) { + if (MemSetInst *MSI = dyn_cast<MemSetInst>(II)) { + if (MSI->isVolatile()) { + LLVM_DEBUG(dbgs() << "Can not optimize a volatile memset " + << "intrinsic.\n"); + return false; + } + + auto *LenC = dyn_cast<ConstantInt>(getVal(MSI->getLength())); + if (!LenC) { + LLVM_DEBUG(dbgs() << "Memset with unknown length.\n"); + return false; + } + + Constant *Ptr = getVal(MSI->getDest()); + APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0); + Ptr = cast<Constant>(Ptr->stripAndAccumulateConstantOffsets( + DL, Offset, /* AllowNonInbounds */ true)); + auto *GV = dyn_cast<GlobalVariable>(Ptr); + if (!GV) { + LLVM_DEBUG(dbgs() << "Memset with unknown base.\n"); + return false; + } + + Constant *Val = getVal(MSI->getValue()); + APInt Len = LenC->getValue(); + while (Len != 0) { + Constant *DestVal = ComputeLoadResult(GV, Val->getType(), Offset); + if (DestVal != Val) { + LLVM_DEBUG(dbgs() << "Memset is not a no-op at offset " + << Offset << " of " << *GV << ".\n"); + return false; + } + ++Offset; + --Len; + } + + LLVM_DEBUG(dbgs() << "Ignoring no-op memset.\n"); + ++CurInst; + continue; + } + + if (II->isLifetimeStartOrEnd()) { + LLVM_DEBUG(dbgs() << "Ignoring lifetime intrinsic.\n"); + ++CurInst; + continue; + } + + if (II->getIntrinsicID() == Intrinsic::invariant_start) { + // We don't insert an entry into Values, as it doesn't have a + // meaningful return value. + if (!II->use_empty()) { + LLVM_DEBUG(dbgs() + << "Found unused invariant_start. Can't evaluate.\n"); + return false; + } + ConstantInt *Size = cast<ConstantInt>(II->getArgOperand(0)); + Value *PtrArg = getVal(II->getArgOperand(1)); + Value *Ptr = PtrArg->stripPointerCasts(); + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) { + Type *ElemTy = GV->getValueType(); + if (!Size->isMinusOne() && + Size->getValue().getLimitedValue() >= + DL.getTypeStoreSize(ElemTy)) { + Invariants.insert(GV); + LLVM_DEBUG(dbgs() << "Found a global var that is an invariant: " + << *GV << "\n"); + } else { + LLVM_DEBUG(dbgs() + << "Found a global var, but can not treat it as an " + "invariant.\n"); + } + } + // Continue even if we do nothing. + ++CurInst; + continue; + } else if (II->getIntrinsicID() == Intrinsic::assume) { + LLVM_DEBUG(dbgs() << "Skipping assume intrinsic.\n"); + ++CurInst; + continue; + } else if (II->getIntrinsicID() == Intrinsic::sideeffect) { + LLVM_DEBUG(dbgs() << "Skipping sideeffect intrinsic.\n"); + ++CurInst; + continue; + } else if (II->getIntrinsicID() == Intrinsic::pseudoprobe) { + LLVM_DEBUG(dbgs() << "Skipping pseudoprobe intrinsic.\n"); + ++CurInst; + continue; + } else { + Value *Stripped = CurInst->stripPointerCastsForAliasAnalysis(); + // Only attempt to getVal() if we've actually managed to strip + // anything away, or else we'll call getVal() on the current + // instruction. + if (Stripped != &*CurInst) { + InstResult = getVal(Stripped); + } + if (InstResult) { + LLVM_DEBUG(dbgs() + << "Stripped pointer casts for alias analysis for " + "intrinsic call.\n"); + StrippedPointerCastsForAliasAnalysis = true; + InstResult = ConstantExpr::getBitCast(InstResult, II->getType()); + } else { + LLVM_DEBUG(dbgs() << "Unknown intrinsic. Cannot evaluate.\n"); + return false; + } + } + } + + if (!InstResult) { + // Resolve function pointers. + SmallVector<Constant *, 8> Formals; + Function *Callee = getCalleeWithFormalArgs(CB, Formals); + if (!Callee || Callee->isInterposable()) { + LLVM_DEBUG(dbgs() << "Can not resolve function pointer.\n"); + return false; // Cannot resolve. + } + + if (Callee->isDeclaration()) { + // If this is a function we can constant fold, do it. + if (Constant *C = ConstantFoldCall(&CB, Callee, Formals, TLI)) { + InstResult = castCallResultIfNeeded(CB.getType(), C); + if (!InstResult) + return false; + LLVM_DEBUG(dbgs() << "Constant folded function call. Result: " + << *InstResult << "\n"); + } else { + LLVM_DEBUG(dbgs() << "Can not constant fold function call.\n"); + return false; + } + } else { + if (Callee->getFunctionType()->isVarArg()) { + LLVM_DEBUG(dbgs() + << "Can not constant fold vararg function call.\n"); + return false; + } + + Constant *RetVal = nullptr; + // Execute the call, if successful, use the return value. + ValueStack.emplace_back(); + if (!EvaluateFunction(Callee, RetVal, Formals)) { + LLVM_DEBUG(dbgs() << "Failed to evaluate function.\n"); + return false; + } + ValueStack.pop_back(); + InstResult = castCallResultIfNeeded(CB.getType(), RetVal); + if (RetVal && !InstResult) + return false; + + if (InstResult) { + LLVM_DEBUG(dbgs() << "Successfully evaluated function. Result: " + << *InstResult << "\n\n"); + } else { + LLVM_DEBUG(dbgs() + << "Successfully evaluated function. Result: 0\n\n"); + } + } + } + } else if (CurInst->isTerminator()) { + LLVM_DEBUG(dbgs() << "Found a terminator instruction.\n"); + + if (BranchInst *BI = dyn_cast<BranchInst>(CurInst)) { + if (BI->isUnconditional()) { + NextBB = BI->getSuccessor(0); + } else { + ConstantInt *Cond = + dyn_cast<ConstantInt>(getVal(BI->getCondition())); + if (!Cond) return false; // Cannot determine. + + NextBB = BI->getSuccessor(!Cond->getZExtValue()); + } + } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurInst)) { + ConstantInt *Val = + dyn_cast<ConstantInt>(getVal(SI->getCondition())); + if (!Val) return false; // Cannot determine. + NextBB = SI->findCaseValue(Val)->getCaseSuccessor(); + } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(CurInst)) { + Value *Val = getVal(IBI->getAddress())->stripPointerCasts(); + if (BlockAddress *BA = dyn_cast<BlockAddress>(Val)) + NextBB = BA->getBasicBlock(); + else + return false; // Cannot determine. + } else if (isa<ReturnInst>(CurInst)) { + NextBB = nullptr; + } else { + // invoke, unwind, resume, unreachable. + LLVM_DEBUG(dbgs() << "Can not handle terminator."); + return false; // Cannot handle this terminator. + } + + // We succeeded at evaluating this block! + LLVM_DEBUG(dbgs() << "Successfully evaluated block.\n"); + return true; + } else { + SmallVector<Constant *> Ops; + for (Value *Op : CurInst->operands()) + Ops.push_back(getVal(Op)); + InstResult = ConstantFoldInstOperands(&*CurInst, Ops, DL, TLI); + if (!InstResult) { + LLVM_DEBUG(dbgs() << "Cannot fold instruction: " << *CurInst << "\n"); + return false; + } + LLVM_DEBUG(dbgs() << "Folded instruction " << *CurInst << " to " + << *InstResult << "\n"); + } + + if (!CurInst->use_empty()) { + InstResult = ConstantFoldConstant(InstResult, DL, TLI); + setVal(&*CurInst, InstResult); + } + + // If we just processed an invoke, we finished evaluating the block. + if (InvokeInst *II = dyn_cast<InvokeInst>(CurInst)) { + NextBB = II->getNormalDest(); + LLVM_DEBUG(dbgs() << "Found an invoke instruction. Finished Block.\n\n"); + return true; + } + + // Advance program counter. + ++CurInst; + } +} + +/// Evaluate a call to function F, returning true if successful, false if we +/// can't evaluate it. ActualArgs contains the formal arguments for the +/// function. +bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal, + const SmallVectorImpl<Constant*> &ActualArgs) { + assert(ActualArgs.size() == F->arg_size() && "wrong number of arguments"); + + // Check to see if this function is already executing (recursion). If so, + // bail out. TODO: we might want to accept limited recursion. + if (is_contained(CallStack, F)) + return false; + + CallStack.push_back(F); + + // Initialize arguments to the incoming values specified. + for (const auto &[ArgNo, Arg] : llvm::enumerate(F->args())) + setVal(&Arg, ActualArgs[ArgNo]); + + // ExecutedBlocks - We only handle non-looping, non-recursive code. As such, + // we can only evaluate any one basic block at most once. This set keeps + // track of what we have executed so we can detect recursive cases etc. + SmallPtrSet<BasicBlock*, 32> ExecutedBlocks; + + // CurBB - The current basic block we're evaluating. + BasicBlock *CurBB = &F->front(); + + BasicBlock::iterator CurInst = CurBB->begin(); + + while (true) { + BasicBlock *NextBB = nullptr; // Initialized to avoid compiler warnings. + LLVM_DEBUG(dbgs() << "Trying to evaluate BB: " << *CurBB << "\n"); + + bool StrippedPointerCastsForAliasAnalysis = false; + + if (!EvaluateBlock(CurInst, NextBB, StrippedPointerCastsForAliasAnalysis)) + return false; + + if (!NextBB) { + // Successfully running until there's no next block means that we found + // the return. Fill it the return value and pop the call stack. + ReturnInst *RI = cast<ReturnInst>(CurBB->getTerminator()); + if (RI->getNumOperands()) { + // The Evaluator can look through pointer casts as long as alias + // analysis holds because it's just a simple interpreter and doesn't + // skip memory accesses due to invariant group metadata, but we can't + // let users of Evaluator use a value that's been gleaned looking + // through stripping pointer casts. + if (StrippedPointerCastsForAliasAnalysis && + !RI->getReturnValue()->getType()->isVoidTy()) { + return false; + } + RetVal = getVal(RI->getOperand(0)); + } + CallStack.pop_back(); + return true; + } + + // Okay, we succeeded in evaluating this control flow. See if we have + // executed the new block before. If so, we have a looping function, + // which we cannot evaluate in reasonable time. + if (!ExecutedBlocks.insert(NextBB).second) + return false; // looped! + + // Okay, we have never been in this block before. Check to see if there + // are any PHI nodes. If so, evaluate them with information about where + // we came from. + PHINode *PN = nullptr; + for (CurInst = NextBB->begin(); + (PN = dyn_cast<PHINode>(CurInst)); ++CurInst) + setVal(PN, getVal(PN->getIncomingValueForBlock(CurBB))); + + // Advance to the next block. + CurBB = NextBB; + } +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/FixIrreducible.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/FixIrreducible.cpp new file mode 100644 index 0000000000..dda2361673 --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/FixIrreducible.cpp @@ -0,0 +1,359 @@ +//===- FixIrreducible.cpp - Convert irreducible control-flow into loops ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// An irreducible SCC is one which has multiple "header" blocks, i.e., blocks +// with control-flow edges incident from outside the SCC. This pass converts a +// irreducible SCC into a natural loop by applying the following transformation: +// +// 1. Collect the set of headers H of the SCC. +// 2. Collect the set of predecessors P of these headers. These may be inside as +// well as outside the SCC. +// 3. Create block N and redirect every edge from set P to set H through N. +// +// This converts the SCC into a natural loop with N as the header: N is the only +// block with edges incident from outside the SCC, and all backedges in the SCC +// are incident on N, i.e., for every backedge, the head now dominates the tail. +// +// INPUT CFG: The blocks A and B form an irreducible loop with two headers. +// +// Entry +// / \ +// v v +// A ----> B +// ^ /| +// `----' | +// v +// Exit +// +// OUTPUT CFG: Edges incident on A and B are now redirected through a +// new block N, forming a natural loop consisting of N, A and B. +// +// Entry +// | +// v +// .---> N <---. +// / / \ \ +// | / \ | +// \ v v / +// `-- A B --' +// | +// v +// Exit +// +// The transformation is applied to every maximal SCC that is not already +// recognized as a loop. The pass operates on all maximal SCCs found in the +// function body outside of any loop, as well as those found inside each loop, +// including inside any newly created loops. This ensures that any SCC hidden +// inside a maximal SCC is also transformed. +// +// The actual transformation is handled by function CreateControlFlowHub, which +// takes a set of incoming blocks (the predecessors) and outgoing blocks (the +// headers). The function also moves every PHINode in an outgoing block to the +// hub. Since the hub dominates all the outgoing blocks, each such PHINode +// continues to dominate its uses. Since every header in an SCC has at least two +// predecessors, every value used in the header (or later) but defined in a +// predecessor (or earlier) is represented by a PHINode in a header. Hence the +// above handling of PHINodes is sufficient and no further processing is +// required to restore SSA. +// +// Limitation: The pass cannot handle switch statements and indirect +// branches. Both must be lowered to plain branches first. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/FixIrreducible.h" +#include "llvm/ADT/SCCIterator.h" +#include "llvm/Analysis/DomTreeUpdater.h" +#include "llvm/Analysis/LoopIterator.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/Utils.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" + +#define DEBUG_TYPE "fix-irreducible" + +using namespace llvm; + +namespace { +struct FixIrreducible : public FunctionPass { + static char ID; + FixIrreducible() : FunctionPass(ID) { + initializeFixIrreduciblePass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequiredID(LowerSwitchID); + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addRequired<LoopInfoWrapperPass>(); + AU.addPreservedID(LowerSwitchID); + AU.addPreserved<DominatorTreeWrapperPass>(); + AU.addPreserved<LoopInfoWrapperPass>(); + } + + bool runOnFunction(Function &F) override; +}; +} // namespace + +char FixIrreducible::ID = 0; + +FunctionPass *llvm::createFixIrreduciblePass() { return new FixIrreducible(); } + +INITIALIZE_PASS_BEGIN(FixIrreducible, "fix-irreducible", + "Convert irreducible control-flow into natural loops", + false /* Only looks at CFG */, false /* Analysis Pass */) +INITIALIZE_PASS_DEPENDENCY(LowerSwitchLegacyPass) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_END(FixIrreducible, "fix-irreducible", + "Convert irreducible control-flow into natural loops", + false /* Only looks at CFG */, false /* Analysis Pass */) + +// When a new loop is created, existing children of the parent loop may now be +// fully inside the new loop. Reconnect these as children of the new loop. +static void reconnectChildLoops(LoopInfo &LI, Loop *ParentLoop, Loop *NewLoop, + SetVector<BasicBlock *> &Blocks, + SetVector<BasicBlock *> &Headers) { + auto &CandidateLoops = ParentLoop ? ParentLoop->getSubLoopsVector() + : LI.getTopLevelLoopsVector(); + // The new loop cannot be its own child, and any candidate is a + // child iff its header is owned by the new loop. Move all the + // children to a new vector. + auto FirstChild = std::partition( + CandidateLoops.begin(), CandidateLoops.end(), [&](Loop *L) { + return L == NewLoop || !Blocks.contains(L->getHeader()); + }); + SmallVector<Loop *, 8> ChildLoops(FirstChild, CandidateLoops.end()); + CandidateLoops.erase(FirstChild, CandidateLoops.end()); + + for (Loop *Child : ChildLoops) { + LLVM_DEBUG(dbgs() << "child loop: " << Child->getHeader()->getName() + << "\n"); + // TODO: A child loop whose header is also a header in the current + // SCC gets destroyed since its backedges are removed. That may + // not be necessary if we can retain such backedges. + if (Headers.count(Child->getHeader())) { + for (auto *BB : Child->blocks()) { + if (LI.getLoopFor(BB) != Child) + continue; + LI.changeLoopFor(BB, NewLoop); + LLVM_DEBUG(dbgs() << "moved block from child: " << BB->getName() + << "\n"); + } + std::vector<Loop *> GrandChildLoops; + std::swap(GrandChildLoops, Child->getSubLoopsVector()); + for (auto *GrandChildLoop : GrandChildLoops) { + GrandChildLoop->setParentLoop(nullptr); + NewLoop->addChildLoop(GrandChildLoop); + } + LI.destroy(Child); + LLVM_DEBUG(dbgs() << "subsumed child loop (common header)\n"); + continue; + } + + Child->setParentLoop(nullptr); + NewLoop->addChildLoop(Child); + LLVM_DEBUG(dbgs() << "added child loop to new loop\n"); + } +} + +// Given a set of blocks and headers in an irreducible SCC, convert it into a +// natural loop. Also insert this new loop at its appropriate place in the +// hierarchy of loops. +static void createNaturalLoopInternal(LoopInfo &LI, DominatorTree &DT, + Loop *ParentLoop, + SetVector<BasicBlock *> &Blocks, + SetVector<BasicBlock *> &Headers) { +#ifndef NDEBUG + // All headers are part of the SCC + for (auto *H : Headers) { + assert(Blocks.count(H)); + } +#endif + + SetVector<BasicBlock *> Predecessors; + for (auto *H : Headers) { + for (auto *P : predecessors(H)) { + Predecessors.insert(P); + } + } + + LLVM_DEBUG( + dbgs() << "Found predecessors:"; + for (auto P : Predecessors) { + dbgs() << " " << P->getName(); + } + dbgs() << "\n"); + + // Redirect all the backedges through a "hub" consisting of a series + // of guard blocks that manage the flow of control from the + // predecessors to the headers. + SmallVector<BasicBlock *, 8> GuardBlocks; + DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); + CreateControlFlowHub(&DTU, GuardBlocks, Predecessors, Headers, "irr"); +#if defined(EXPENSIVE_CHECKS) + assert(DT.verify(DominatorTree::VerificationLevel::Full)); +#else + assert(DT.verify(DominatorTree::VerificationLevel::Fast)); +#endif + + // Create a new loop from the now-transformed cycle + auto NewLoop = LI.AllocateLoop(); + if (ParentLoop) { + ParentLoop->addChildLoop(NewLoop); + } else { + LI.addTopLevelLoop(NewLoop); + } + + // Add the guard blocks to the new loop. The first guard block is + // the head of all the backedges, and it is the first to be inserted + // in the loop. This ensures that it is recognized as the + // header. Since the new loop is already in LoopInfo, the new blocks + // are also propagated up the chain of parent loops. + for (auto *G : GuardBlocks) { + LLVM_DEBUG(dbgs() << "added guard block: " << G->getName() << "\n"); + NewLoop->addBasicBlockToLoop(G, LI); + } + + // Add the SCC blocks to the new loop. + for (auto *BB : Blocks) { + NewLoop->addBlockEntry(BB); + if (LI.getLoopFor(BB) == ParentLoop) { + LLVM_DEBUG(dbgs() << "moved block from parent: " << BB->getName() + << "\n"); + LI.changeLoopFor(BB, NewLoop); + } else { + LLVM_DEBUG(dbgs() << "added block from child: " << BB->getName() << "\n"); + } + } + LLVM_DEBUG(dbgs() << "header for new loop: " + << NewLoop->getHeader()->getName() << "\n"); + + reconnectChildLoops(LI, ParentLoop, NewLoop, Blocks, Headers); + + NewLoop->verifyLoop(); + if (ParentLoop) { + ParentLoop->verifyLoop(); + } +#if defined(EXPENSIVE_CHECKS) + LI.verify(DT); +#endif // EXPENSIVE_CHECKS +} + +namespace llvm { +// Enable the graph traits required for traversing a Loop body. +template <> struct GraphTraits<Loop> : LoopBodyTraits {}; +} // namespace llvm + +// Overloaded wrappers to go with the function template below. +static BasicBlock *unwrapBlock(BasicBlock *B) { return B; } +static BasicBlock *unwrapBlock(LoopBodyTraits::NodeRef &N) { return N.second; } + +static void createNaturalLoop(LoopInfo &LI, DominatorTree &DT, Function *F, + SetVector<BasicBlock *> &Blocks, + SetVector<BasicBlock *> &Headers) { + createNaturalLoopInternal(LI, DT, nullptr, Blocks, Headers); +} + +static void createNaturalLoop(LoopInfo &LI, DominatorTree &DT, Loop &L, + SetVector<BasicBlock *> &Blocks, + SetVector<BasicBlock *> &Headers) { + createNaturalLoopInternal(LI, DT, &L, Blocks, Headers); +} + +// Convert irreducible SCCs; Graph G may be a Function* or a Loop&. +template <class Graph> +static bool makeReducible(LoopInfo &LI, DominatorTree &DT, Graph &&G) { + bool Changed = false; + for (auto Scc = scc_begin(G); !Scc.isAtEnd(); ++Scc) { + if (Scc->size() < 2) + continue; + SetVector<BasicBlock *> Blocks; + LLVM_DEBUG(dbgs() << "Found SCC:"); + for (auto N : *Scc) { + auto BB = unwrapBlock(N); + LLVM_DEBUG(dbgs() << " " << BB->getName()); + Blocks.insert(BB); + } + LLVM_DEBUG(dbgs() << "\n"); + + // Minor optimization: The SCC blocks are usually discovered in an order + // that is the opposite of the order in which these blocks appear as branch + // targets. This results in a lot of condition inversions in the control + // flow out of the new ControlFlowHub, which can be mitigated if the orders + // match. So we discover the headers using the reverse of the block order. + SetVector<BasicBlock *> Headers; + LLVM_DEBUG(dbgs() << "Found headers:"); + for (auto *BB : reverse(Blocks)) { + for (const auto P : predecessors(BB)) { + // Skip unreachable predecessors. + if (!DT.isReachableFromEntry(P)) + continue; + if (!Blocks.count(P)) { + LLVM_DEBUG(dbgs() << " " << BB->getName()); + Headers.insert(BB); + break; + } + } + } + LLVM_DEBUG(dbgs() << "\n"); + + if (Headers.size() == 1) { + assert(LI.isLoopHeader(Headers.front())); + LLVM_DEBUG(dbgs() << "Natural loop with a single header: skipped\n"); + continue; + } + createNaturalLoop(LI, DT, G, Blocks, Headers); + Changed = true; + } + return Changed; +} + +static bool FixIrreducibleImpl(Function &F, LoopInfo &LI, DominatorTree &DT) { + LLVM_DEBUG(dbgs() << "===== Fix irreducible control-flow in function: " + << F.getName() << "\n"); + + bool Changed = false; + SmallVector<Loop *, 8> WorkList; + + LLVM_DEBUG(dbgs() << "visiting top-level\n"); + Changed |= makeReducible(LI, DT, &F); + + // Any SCCs reduced are now already in the list of top-level loops, so simply + // add them all to the worklist. + append_range(WorkList, LI); + + while (!WorkList.empty()) { + auto L = WorkList.pop_back_val(); + LLVM_DEBUG(dbgs() << "visiting loop with header " + << L->getHeader()->getName() << "\n"); + Changed |= makeReducible(LI, DT, *L); + // Any SCCs reduced are now already in the list of child loops, so simply + // add them all to the worklist. + WorkList.append(L->begin(), L->end()); + } + + return Changed; +} + +bool FixIrreducible::runOnFunction(Function &F) { + auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); + auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + return FixIrreducibleImpl(F, LI, DT); +} + +PreservedAnalyses FixIrreduciblePass::run(Function &F, + FunctionAnalysisManager &AM) { + auto &LI = AM.getResult<LoopAnalysis>(F); + auto &DT = AM.getResult<DominatorTreeAnalysis>(F); + if (!FixIrreducibleImpl(F, LI, DT)) + return PreservedAnalyses::all(); + PreservedAnalyses PA; + PA.preserve<LoopAnalysis>(); + PA.preserve<DominatorTreeAnalysis>(); + return PA; +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/FlattenCFG.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/FlattenCFG.cpp new file mode 100644 index 0000000000..2fb2ab82e4 --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/FlattenCFG.cpp @@ -0,0 +1,548 @@ +//===- FlatternCFG.cpp - Code to perform CFG flattening -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Reduce conditional branches in CFG. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include <cassert> + +using namespace llvm; + +#define DEBUG_TYPE "flattencfg" + +namespace { + +class FlattenCFGOpt { + AliasAnalysis *AA; + + /// Use parallel-and or parallel-or to generate conditions for + /// conditional branches. + bool FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder); + + /// If \param BB is the merge block of an if-region, attempt to merge + /// the if-region with an adjacent if-region upstream if two if-regions + /// contain identical instructions. + bool MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder); + + /// Compare a pair of blocks: \p Block1 and \p Block2, which + /// are from two if-regions, where \p Head2 is the entry block of the 2nd + /// if-region. \returns true if \p Block1 and \p Block2 contain identical + /// instructions, and have no memory reference alias with \p Head2. + /// This is used as a legality check for merging if-regions. + bool CompareIfRegionBlock(BasicBlock *Block1, BasicBlock *Block2, + BasicBlock *Head2); + +public: + FlattenCFGOpt(AliasAnalysis *AA) : AA(AA) {} + + bool run(BasicBlock *BB); +}; + +} // end anonymous namespace + +/// If \param [in] BB has more than one predecessor that is a conditional +/// branch, attempt to use parallel and/or for the branch condition. \returns +/// true on success. +/// +/// Before: +/// ...... +/// %cmp10 = fcmp une float %tmp1, %tmp2 +/// br i1 %cmp10, label %if.then, label %lor.rhs +/// +/// lor.rhs: +/// ...... +/// %cmp11 = fcmp une float %tmp3, %tmp4 +/// br i1 %cmp11, label %if.then, label %ifend +/// +/// if.end: // the merge block +/// ...... +/// +/// if.then: // has two predecessors, both of them contains conditional branch. +/// ...... +/// br label %if.end; +/// +/// After: +/// ...... +/// %cmp10 = fcmp une float %tmp1, %tmp2 +/// ...... +/// %cmp11 = fcmp une float %tmp3, %tmp4 +/// %cmp12 = or i1 %cmp10, %cmp11 // parallel-or mode. +/// br i1 %cmp12, label %if.then, label %ifend +/// +/// if.end: +/// ...... +/// +/// if.then: +/// ...... +/// br label %if.end; +/// +/// Current implementation handles two cases. +/// Case 1: BB is on the else-path. +/// +/// BB1 +/// / | +/// BB2 | +/// / \ | +/// BB3 \ | where, BB1, BB2 contain conditional branches. +/// \ | / BB3 contains unconditional branch. +/// \ | / BB4 corresponds to BB which is also the merge. +/// BB => BB4 +/// +/// +/// Corresponding source code: +/// +/// if (a == b && c == d) +/// statement; // BB3 +/// +/// Case 2: BB is on the then-path. +/// +/// BB1 +/// / | +/// | BB2 +/// \ / | where BB1, BB2 contain conditional branches. +/// BB => BB3 | BB3 contains unconditiona branch and corresponds +/// \ / to BB. BB4 is the merge. +/// BB4 +/// +/// Corresponding source code: +/// +/// if (a == b || c == d) +/// statement; // BB3 +/// +/// In both cases, BB is the common successor of conditional branches. +/// In Case 1, BB (BB4) has an unconditional branch (BB3) as +/// its predecessor. In Case 2, BB (BB3) only has conditional branches +/// as its predecessors. +bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder) { + PHINode *PHI = dyn_cast<PHINode>(BB->begin()); + if (PHI) + return false; // For simplicity, avoid cases containing PHI nodes. + + BasicBlock *LastCondBlock = nullptr; + BasicBlock *FirstCondBlock = nullptr; + BasicBlock *UnCondBlock = nullptr; + int Idx = -1; + + // Check predecessors of \param BB. + SmallPtrSet<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB)); + for (BasicBlock *Pred : Preds) { + BranchInst *PBI = dyn_cast<BranchInst>(Pred->getTerminator()); + + // All predecessors should terminate with a branch. + if (!PBI) + return false; + + BasicBlock *PP = Pred->getSinglePredecessor(); + + if (PBI->isUnconditional()) { + // Case 1: Pred (BB3) is an unconditional block, it should + // have a single predecessor (BB2) that is also a predecessor + // of \param BB (BB4) and should not have address-taken. + // There should exist only one such unconditional + // branch among the predecessors. + if (UnCondBlock || !PP || !Preds.contains(PP) || + Pred->hasAddressTaken()) + return false; + + UnCondBlock = Pred; + continue; + } + + // Only conditional branches are allowed beyond this point. + assert(PBI->isConditional()); + + // Condition's unique use should be the branch instruction. + Value *PC = PBI->getCondition(); + if (!PC || !PC->hasOneUse()) + return false; + + if (PP && Preds.count(PP)) { + // These are internal condition blocks to be merged from, e.g., + // BB2 in both cases. + // Should not be address-taken. + if (Pred->hasAddressTaken()) + return false; + + // Instructions in the internal condition blocks should be safe + // to hoist up. + for (BasicBlock::iterator BI = Pred->begin(), BE = PBI->getIterator(); + BI != BE;) { + Instruction *CI = &*BI++; + if (isa<PHINode>(CI) || !isSafeToSpeculativelyExecute(CI)) + return false; + } + } else { + // This is the condition block to be merged into, e.g. BB1 in + // both cases. + if (FirstCondBlock) + return false; + FirstCondBlock = Pred; + } + + // Find whether BB is uniformly on the true (or false) path + // for all of its predecessors. + BasicBlock *PS1 = PBI->getSuccessor(0); + BasicBlock *PS2 = PBI->getSuccessor(1); + BasicBlock *PS = (PS1 == BB) ? PS2 : PS1; + int CIdx = (PS1 == BB) ? 0 : 1; + + if (Idx == -1) + Idx = CIdx; + else if (CIdx != Idx) + return false; + + // PS is the successor which is not BB. Check successors to identify + // the last conditional branch. + if (!Preds.contains(PS)) { + // Case 2. + LastCondBlock = Pred; + } else { + // Case 1 + BranchInst *BPS = dyn_cast<BranchInst>(PS->getTerminator()); + if (BPS && BPS->isUnconditional()) { + // Case 1: PS(BB3) should be an unconditional branch. + LastCondBlock = Pred; + } + } + } + + if (!FirstCondBlock || !LastCondBlock || (FirstCondBlock == LastCondBlock)) + return false; + + Instruction *TBB = LastCondBlock->getTerminator(); + BasicBlock *PS1 = TBB->getSuccessor(0); + BasicBlock *PS2 = TBB->getSuccessor(1); + BranchInst *PBI1 = dyn_cast<BranchInst>(PS1->getTerminator()); + BranchInst *PBI2 = dyn_cast<BranchInst>(PS2->getTerminator()); + + // If PS1 does not jump into PS2, but PS2 jumps into PS1, + // attempt branch inversion. + if (!PBI1 || !PBI1->isUnconditional() || + (PS1->getTerminator()->getSuccessor(0) != PS2)) { + // Check whether PS2 jumps into PS1. + if (!PBI2 || !PBI2->isUnconditional() || + (PS2->getTerminator()->getSuccessor(0) != PS1)) + return false; + + // Do branch inversion. + BasicBlock *CurrBlock = LastCondBlock; + bool EverChanged = false; + for (; CurrBlock != FirstCondBlock; + CurrBlock = CurrBlock->getSinglePredecessor()) { + auto *BI = cast<BranchInst>(CurrBlock->getTerminator()); + auto *CI = dyn_cast<CmpInst>(BI->getCondition()); + if (!CI) + continue; + + CmpInst::Predicate Predicate = CI->getPredicate(); + // Canonicalize icmp_ne -> icmp_eq, fcmp_one -> fcmp_oeq + if ((Predicate == CmpInst::ICMP_NE) || (Predicate == CmpInst::FCMP_ONE)) { + CI->setPredicate(ICmpInst::getInversePredicate(Predicate)); + BI->swapSuccessors(); + EverChanged = true; + } + } + return EverChanged; + } + + // PS1 must have a conditional branch. + if (!PBI1 || !PBI1->isUnconditional()) + return false; + + // PS2 should not contain PHI node. + PHI = dyn_cast<PHINode>(PS2->begin()); + if (PHI) + return false; + + // Do the transformation. + BasicBlock *CB; + BranchInst *PBI = cast<BranchInst>(FirstCondBlock->getTerminator()); + bool Iteration = true; + IRBuilder<>::InsertPointGuard Guard(Builder); + Value *PC = PBI->getCondition(); + + do { + CB = PBI->getSuccessor(1 - Idx); + // Delete the conditional branch. + FirstCondBlock->back().eraseFromParent(); + FirstCondBlock->splice(FirstCondBlock->end(), CB); + PBI = cast<BranchInst>(FirstCondBlock->getTerminator()); + Value *CC = PBI->getCondition(); + // Merge conditions. + Builder.SetInsertPoint(PBI); + Value *NC; + if (Idx == 0) + // Case 2, use parallel or. + NC = Builder.CreateOr(PC, CC); + else + // Case 1, use parallel and. + NC = Builder.CreateAnd(PC, CC); + + PBI->replaceUsesOfWith(CC, NC); + PC = NC; + if (CB == LastCondBlock) + Iteration = false; + // Remove internal conditional branches. + CB->dropAllReferences(); + // make CB unreachable and let downstream to delete the block. + new UnreachableInst(CB->getContext(), CB); + } while (Iteration); + + LLVM_DEBUG(dbgs() << "Use parallel and/or in:\n" << *FirstCondBlock); + return true; +} + +/// Compare blocks from two if-regions, where \param Head2 is the entry of the +/// 2nd if-region. \param Block1 is a block in the 1st if-region to compare. +/// \param Block2 is a block in the 2nd if-region to compare. \returns true if +/// Block1 and Block2 have identical instructions and do not have +/// memory reference alias with Head2. +bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Block1, BasicBlock *Block2, + BasicBlock *Head2) { + Instruction *PTI2 = Head2->getTerminator(); + Instruction *PBI2 = &Head2->front(); + + // Check whether instructions in Block1 and Block2 are identical + // and do not alias with instructions in Head2. + BasicBlock::iterator iter1 = Block1->begin(); + BasicBlock::iterator end1 = Block1->getTerminator()->getIterator(); + BasicBlock::iterator iter2 = Block2->begin(); + BasicBlock::iterator end2 = Block2->getTerminator()->getIterator(); + + while (true) { + if (iter1 == end1) { + if (iter2 != end2) + return false; + break; + } + + if (!iter1->isIdenticalTo(&*iter2)) + return false; + + // Illegal to remove instructions with side effects except + // non-volatile stores. + if (iter1->mayHaveSideEffects()) { + Instruction *CurI = &*iter1; + StoreInst *SI = dyn_cast<StoreInst>(CurI); + if (!SI || SI->isVolatile()) + return false; + } + + // For simplicity and speed, data dependency check can be + // avoided if read from memory doesn't exist. + if (iter1->mayReadFromMemory()) + return false; + + if (iter1->mayWriteToMemory()) { + for (BasicBlock::iterator BI(PBI2), BE(PTI2); BI != BE; ++BI) { + if (BI->mayReadFromMemory() || BI->mayWriteToMemory()) { + // Check alias with Head2. + if (!AA || !AA->isNoAlias(&*iter1, &*BI)) + return false; + } + } + } + ++iter1; + ++iter2; + } + + return true; +} + +/// Check whether \param BB is the merge block of a if-region. If yes, check +/// whether there exists an adjacent if-region upstream, the two if-regions +/// contain identical instructions and can be legally merged. \returns true if +/// the two if-regions are merged. +/// +/// From: +/// if (a) +/// statement; +/// if (b) +/// statement; +/// +/// To: +/// if (a || b) +/// statement; +/// +/// +/// And from: +/// if (a) +/// ; +/// else +/// statement; +/// if (b) +/// ; +/// else +/// statement; +/// +/// To: +/// if (a && b) +/// ; +/// else +/// statement; +/// +/// We always take the form of the first if-region. This means that if the +/// statement in the first if-region, is in the "then-path", while in the second +/// if-region it is in the "else-path", then we convert the second to the first +/// form, by inverting the condition and the branch successors. The same +/// approach goes for the opposite case. +bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder) { + BasicBlock *IfTrue2, *IfFalse2; + BranchInst *DomBI2 = GetIfCondition(BB, IfTrue2, IfFalse2); + if (!DomBI2) + return false; + Instruction *CInst2 = dyn_cast<Instruction>(DomBI2->getCondition()); + if (!CInst2) + return false; + + BasicBlock *SecondEntryBlock = CInst2->getParent(); + if (SecondEntryBlock->hasAddressTaken()) + return false; + + BasicBlock *IfTrue1, *IfFalse1; + BranchInst *DomBI1 = GetIfCondition(SecondEntryBlock, IfTrue1, IfFalse1); + if (!DomBI1) + return false; + Instruction *CInst1 = dyn_cast<Instruction>(DomBI1->getCondition()); + if (!CInst1) + return false; + + BasicBlock *FirstEntryBlock = CInst1->getParent(); + // Don't die trying to process degenerate/unreachable code. + if (FirstEntryBlock == SecondEntryBlock) + return false; + + // Either then-path or else-path should be empty. + bool InvertCond2 = false; + BinaryOperator::BinaryOps CombineOp; + if (IfFalse1 == FirstEntryBlock) { + // The else-path is empty, so we must use "or" operation to combine the + // conditions. + CombineOp = BinaryOperator::Or; + if (IfFalse2 != SecondEntryBlock) { + if (IfTrue2 != SecondEntryBlock) + return false; + + InvertCond2 = true; + std::swap(IfTrue2, IfFalse2); + } + + if (!CompareIfRegionBlock(IfTrue1, IfTrue2, SecondEntryBlock)) + return false; + } else if (IfTrue1 == FirstEntryBlock) { + // The then-path is empty, so we must use "and" operation to combine the + // conditions. + CombineOp = BinaryOperator::And; + if (IfTrue2 != SecondEntryBlock) { + if (IfFalse2 != SecondEntryBlock) + return false; + + InvertCond2 = true; + std::swap(IfTrue2, IfFalse2); + } + + if (!CompareIfRegionBlock(IfFalse1, IfFalse2, SecondEntryBlock)) + return false; + } else + return false; + + Instruction *PTI2 = SecondEntryBlock->getTerminator(); + Instruction *PBI2 = &SecondEntryBlock->front(); + + // Check whether \param SecondEntryBlock has side-effect and is safe to + // speculate. + for (BasicBlock::iterator BI(PBI2), BE(PTI2); BI != BE; ++BI) { + Instruction *CI = &*BI; + if (isa<PHINode>(CI) || CI->mayHaveSideEffects() || + !isSafeToSpeculativelyExecute(CI)) + return false; + } + + // Merge \param SecondEntryBlock into \param FirstEntryBlock. + FirstEntryBlock->back().eraseFromParent(); + FirstEntryBlock->splice(FirstEntryBlock->end(), SecondEntryBlock); + BranchInst *PBI = cast<BranchInst>(FirstEntryBlock->getTerminator()); + assert(PBI->getCondition() == CInst2); + BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); + BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); + Builder.SetInsertPoint(PBI); + if (InvertCond2) { + // If this is a "cmp" instruction, only used for branching (and nowhere + // else), then we can simply invert the predicate. + auto Cmp2 = dyn_cast<CmpInst>(CInst2); + if (Cmp2 && Cmp2->hasOneUse()) + Cmp2->setPredicate(Cmp2->getInversePredicate()); + else + CInst2 = cast<Instruction>(Builder.CreateNot(CInst2)); + PBI->swapSuccessors(); + } + Value *NC = Builder.CreateBinOp(CombineOp, CInst1, CInst2); + PBI->replaceUsesOfWith(CInst2, NC); + Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt); + + // Handle PHI node to replace its predecessors to FirstEntryBlock. + for (BasicBlock *Succ : successors(PBI)) { + for (PHINode &Phi : Succ->phis()) { + for (unsigned i = 0, e = Phi.getNumIncomingValues(); i != e; ++i) { + if (Phi.getIncomingBlock(i) == SecondEntryBlock) + Phi.setIncomingBlock(i, FirstEntryBlock); + } + } + } + + // Remove IfTrue1 + if (IfTrue1 != FirstEntryBlock) { + IfTrue1->dropAllReferences(); + IfTrue1->eraseFromParent(); + } + + // Remove IfFalse1 + if (IfFalse1 != FirstEntryBlock) { + IfFalse1->dropAllReferences(); + IfFalse1->eraseFromParent(); + } + + // Remove \param SecondEntryBlock + SecondEntryBlock->dropAllReferences(); + SecondEntryBlock->eraseFromParent(); + LLVM_DEBUG(dbgs() << "If conditions merged into:\n" << *FirstEntryBlock); + return true; +} + +bool FlattenCFGOpt::run(BasicBlock *BB) { + assert(BB && BB->getParent() && "Block not embedded in function!"); + assert(BB->getTerminator() && "Degenerate basic block encountered!"); + + IRBuilder<> Builder(BB); + + if (FlattenParallelAndOr(BB, Builder) || MergeIfRegion(BB, Builder)) + return true; + return false; +} + +/// FlattenCFG - This function is used to flatten a CFG. For +/// example, it uses parallel-and and parallel-or mode to collapse +/// if-conditions and merge if-regions with identical statements. +bool llvm::FlattenCFG(BasicBlock *BB, AAResults *AA) { + return FlattenCFGOpt(AA).run(BB); +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/FunctionComparator.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/FunctionComparator.cpp new file mode 100644 index 0000000000..3fa61ec68c --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/FunctionComparator.cpp @@ -0,0 +1,991 @@ +//===- FunctionComparator.h - Function Comparator -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the FunctionComparator and GlobalNumberState classes +// which are used by the MergeFunctions pass for comparing functions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/FunctionComparator.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <utility> + +using namespace llvm; + +#define DEBUG_TYPE "functioncomparator" + +int FunctionComparator::cmpNumbers(uint64_t L, uint64_t R) const { + if (L < R) + return -1; + if (L > R) + return 1; + return 0; +} + +int FunctionComparator::cmpAligns(Align L, Align R) const { + if (L.value() < R.value()) + return -1; + if (L.value() > R.value()) + return 1; + return 0; +} + +int FunctionComparator::cmpOrderings(AtomicOrdering L, AtomicOrdering R) const { + if ((int)L < (int)R) + return -1; + if ((int)L > (int)R) + return 1; + return 0; +} + +int FunctionComparator::cmpAPInts(const APInt &L, const APInt &R) const { + if (int Res = cmpNumbers(L.getBitWidth(), R.getBitWidth())) + return Res; + if (L.ugt(R)) + return 1; + if (R.ugt(L)) + return -1; + return 0; +} + +int FunctionComparator::cmpAPFloats(const APFloat &L, const APFloat &R) const { + // Floats are ordered first by semantics (i.e. float, double, half, etc.), + // then by value interpreted as a bitstring (aka APInt). + const fltSemantics &SL = L.getSemantics(), &SR = R.getSemantics(); + if (int Res = cmpNumbers(APFloat::semanticsPrecision(SL), + APFloat::semanticsPrecision(SR))) + return Res; + if (int Res = cmpNumbers(APFloat::semanticsMaxExponent(SL), + APFloat::semanticsMaxExponent(SR))) + return Res; + if (int Res = cmpNumbers(APFloat::semanticsMinExponent(SL), + APFloat::semanticsMinExponent(SR))) + return Res; + if (int Res = cmpNumbers(APFloat::semanticsSizeInBits(SL), + APFloat::semanticsSizeInBits(SR))) + return Res; + return cmpAPInts(L.bitcastToAPInt(), R.bitcastToAPInt()); +} + +int FunctionComparator::cmpMem(StringRef L, StringRef R) const { + // Prevent heavy comparison, compare sizes first. + if (int Res = cmpNumbers(L.size(), R.size())) + return Res; + + // Compare strings lexicographically only when it is necessary: only when + // strings are equal in size. + return std::clamp(L.compare(R), -1, 1); +} + +int FunctionComparator::cmpAttrs(const AttributeList L, + const AttributeList R) const { + if (int Res = cmpNumbers(L.getNumAttrSets(), R.getNumAttrSets())) + return Res; + + for (unsigned i : L.indexes()) { + AttributeSet LAS = L.getAttributes(i); + AttributeSet RAS = R.getAttributes(i); + AttributeSet::iterator LI = LAS.begin(), LE = LAS.end(); + AttributeSet::iterator RI = RAS.begin(), RE = RAS.end(); + for (; LI != LE && RI != RE; ++LI, ++RI) { + Attribute LA = *LI; + Attribute RA = *RI; + if (LA.isTypeAttribute() && RA.isTypeAttribute()) { + if (LA.getKindAsEnum() != RA.getKindAsEnum()) + return cmpNumbers(LA.getKindAsEnum(), RA.getKindAsEnum()); + + Type *TyL = LA.getValueAsType(); + Type *TyR = RA.getValueAsType(); + if (TyL && TyR) { + if (int Res = cmpTypes(TyL, TyR)) + return Res; + continue; + } + + // Two pointers, at least one null, so the comparison result is + // independent of the value of a real pointer. + if (int Res = cmpNumbers((uint64_t)TyL, (uint64_t)TyR)) + return Res; + continue; + } + if (LA < RA) + return -1; + if (RA < LA) + return 1; + } + if (LI != LE) + return 1; + if (RI != RE) + return -1; + } + return 0; +} + +int FunctionComparator::cmpRangeMetadata(const MDNode *L, + const MDNode *R) const { + if (L == R) + return 0; + if (!L) + return -1; + if (!R) + return 1; + // Range metadata is a sequence of numbers. Make sure they are the same + // sequence. + // TODO: Note that as this is metadata, it is possible to drop and/or merge + // this data when considering functions to merge. Thus this comparison would + // return 0 (i.e. equivalent), but merging would become more complicated + // because the ranges would need to be unioned. It is not likely that + // functions differ ONLY in this metadata if they are actually the same + // function semantically. + if (int Res = cmpNumbers(L->getNumOperands(), R->getNumOperands())) + return Res; + for (size_t I = 0; I < L->getNumOperands(); ++I) { + ConstantInt *LLow = mdconst::extract<ConstantInt>(L->getOperand(I)); + ConstantInt *RLow = mdconst::extract<ConstantInt>(R->getOperand(I)); + if (int Res = cmpAPInts(LLow->getValue(), RLow->getValue())) + return Res; + } + return 0; +} + +int FunctionComparator::cmpOperandBundlesSchema(const CallBase &LCS, + const CallBase &RCS) const { + assert(LCS.getOpcode() == RCS.getOpcode() && "Can't compare otherwise!"); + + if (int Res = + cmpNumbers(LCS.getNumOperandBundles(), RCS.getNumOperandBundles())) + return Res; + + for (unsigned I = 0, E = LCS.getNumOperandBundles(); I != E; ++I) { + auto OBL = LCS.getOperandBundleAt(I); + auto OBR = RCS.getOperandBundleAt(I); + + if (int Res = OBL.getTagName().compare(OBR.getTagName())) + return Res; + + if (int Res = cmpNumbers(OBL.Inputs.size(), OBR.Inputs.size())) + return Res; + } + + return 0; +} + +/// Constants comparison: +/// 1. Check whether type of L constant could be losslessly bitcasted to R +/// type. +/// 2. Compare constant contents. +/// For more details see declaration comments. +int FunctionComparator::cmpConstants(const Constant *L, + const Constant *R) const { + Type *TyL = L->getType(); + Type *TyR = R->getType(); + + // Check whether types are bitcastable. This part is just re-factored + // Type::canLosslesslyBitCastTo method, but instead of returning true/false, + // we also pack into result which type is "less" for us. + int TypesRes = cmpTypes(TyL, TyR); + if (TypesRes != 0) { + // Types are different, but check whether we can bitcast them. + if (!TyL->isFirstClassType()) { + if (TyR->isFirstClassType()) + return -1; + // Neither TyL nor TyR are values of first class type. Return the result + // of comparing the types + return TypesRes; + } + if (!TyR->isFirstClassType()) { + if (TyL->isFirstClassType()) + return 1; + return TypesRes; + } + + // Vector -> Vector conversions are always lossless if the two vector types + // have the same size, otherwise not. + unsigned TyLWidth = 0; + unsigned TyRWidth = 0; + + if (auto *VecTyL = dyn_cast<VectorType>(TyL)) + TyLWidth = VecTyL->getPrimitiveSizeInBits().getFixedValue(); + if (auto *VecTyR = dyn_cast<VectorType>(TyR)) + TyRWidth = VecTyR->getPrimitiveSizeInBits().getFixedValue(); + + if (TyLWidth != TyRWidth) + return cmpNumbers(TyLWidth, TyRWidth); + + // Zero bit-width means neither TyL nor TyR are vectors. + if (!TyLWidth) { + PointerType *PTyL = dyn_cast<PointerType>(TyL); + PointerType *PTyR = dyn_cast<PointerType>(TyR); + if (PTyL && PTyR) { + unsigned AddrSpaceL = PTyL->getAddressSpace(); + unsigned AddrSpaceR = PTyR->getAddressSpace(); + if (int Res = cmpNumbers(AddrSpaceL, AddrSpaceR)) + return Res; + } + if (PTyL) + return 1; + if (PTyR) + return -1; + + // TyL and TyR aren't vectors, nor pointers. We don't know how to + // bitcast them. + return TypesRes; + } + } + + // OK, types are bitcastable, now check constant contents. + + if (L->isNullValue() && R->isNullValue()) + return TypesRes; + if (L->isNullValue() && !R->isNullValue()) + return 1; + if (!L->isNullValue() && R->isNullValue()) + return -1; + + auto GlobalValueL = const_cast<GlobalValue *>(dyn_cast<GlobalValue>(L)); + auto GlobalValueR = const_cast<GlobalValue *>(dyn_cast<GlobalValue>(R)); + if (GlobalValueL && GlobalValueR) { + return cmpGlobalValues(GlobalValueL, GlobalValueR); + } + + if (int Res = cmpNumbers(L->getValueID(), R->getValueID())) + return Res; + + if (const auto *SeqL = dyn_cast<ConstantDataSequential>(L)) { + const auto *SeqR = cast<ConstantDataSequential>(R); + // This handles ConstantDataArray and ConstantDataVector. Note that we + // compare the two raw data arrays, which might differ depending on the host + // endianness. This isn't a problem though, because the endiness of a module + // will affect the order of the constants, but this order is the same + // for a given input module and host platform. + return cmpMem(SeqL->getRawDataValues(), SeqR->getRawDataValues()); + } + + switch (L->getValueID()) { + case Value::UndefValueVal: + case Value::PoisonValueVal: + case Value::ConstantTokenNoneVal: + return TypesRes; + case Value::ConstantIntVal: { + const APInt &LInt = cast<ConstantInt>(L)->getValue(); + const APInt &RInt = cast<ConstantInt>(R)->getValue(); + return cmpAPInts(LInt, RInt); + } + case Value::ConstantFPVal: { + const APFloat &LAPF = cast<ConstantFP>(L)->getValueAPF(); + const APFloat &RAPF = cast<ConstantFP>(R)->getValueAPF(); + return cmpAPFloats(LAPF, RAPF); + } + case Value::ConstantArrayVal: { + const ConstantArray *LA = cast<ConstantArray>(L); + const ConstantArray *RA = cast<ConstantArray>(R); + uint64_t NumElementsL = cast<ArrayType>(TyL)->getNumElements(); + uint64_t NumElementsR = cast<ArrayType>(TyR)->getNumElements(); + if (int Res = cmpNumbers(NumElementsL, NumElementsR)) + return Res; + for (uint64_t i = 0; i < NumElementsL; ++i) { + if (int Res = cmpConstants(cast<Constant>(LA->getOperand(i)), + cast<Constant>(RA->getOperand(i)))) + return Res; + } + return 0; + } + case Value::ConstantStructVal: { + const ConstantStruct *LS = cast<ConstantStruct>(L); + const ConstantStruct *RS = cast<ConstantStruct>(R); + unsigned NumElementsL = cast<StructType>(TyL)->getNumElements(); + unsigned NumElementsR = cast<StructType>(TyR)->getNumElements(); + if (int Res = cmpNumbers(NumElementsL, NumElementsR)) + return Res; + for (unsigned i = 0; i != NumElementsL; ++i) { + if (int Res = cmpConstants(cast<Constant>(LS->getOperand(i)), + cast<Constant>(RS->getOperand(i)))) + return Res; + } + return 0; + } + case Value::ConstantVectorVal: { + const ConstantVector *LV = cast<ConstantVector>(L); + const ConstantVector *RV = cast<ConstantVector>(R); + unsigned NumElementsL = cast<FixedVectorType>(TyL)->getNumElements(); + unsigned NumElementsR = cast<FixedVectorType>(TyR)->getNumElements(); + if (int Res = cmpNumbers(NumElementsL, NumElementsR)) + return Res; + for (uint64_t i = 0; i < NumElementsL; ++i) { + if (int Res = cmpConstants(cast<Constant>(LV->getOperand(i)), + cast<Constant>(RV->getOperand(i)))) + return Res; + } + return 0; + } + case Value::ConstantExprVal: { + const ConstantExpr *LE = cast<ConstantExpr>(L); + const ConstantExpr *RE = cast<ConstantExpr>(R); + unsigned NumOperandsL = LE->getNumOperands(); + unsigned NumOperandsR = RE->getNumOperands(); + if (int Res = cmpNumbers(NumOperandsL, NumOperandsR)) + return Res; + for (unsigned i = 0; i < NumOperandsL; ++i) { + if (int Res = cmpConstants(cast<Constant>(LE->getOperand(i)), + cast<Constant>(RE->getOperand(i)))) + return Res; + } + return 0; + } + case Value::BlockAddressVal: { + const BlockAddress *LBA = cast<BlockAddress>(L); + const BlockAddress *RBA = cast<BlockAddress>(R); + if (int Res = cmpValues(LBA->getFunction(), RBA->getFunction())) + return Res; + if (LBA->getFunction() == RBA->getFunction()) { + // They are BBs in the same function. Order by which comes first in the + // BB order of the function. This order is deterministic. + Function *F = LBA->getFunction(); + BasicBlock *LBB = LBA->getBasicBlock(); + BasicBlock *RBB = RBA->getBasicBlock(); + if (LBB == RBB) + return 0; + for (BasicBlock &BB : *F) { + if (&BB == LBB) { + assert(&BB != RBB); + return -1; + } + if (&BB == RBB) + return 1; + } + llvm_unreachable("Basic Block Address does not point to a basic block in " + "its function."); + return -1; + } else { + // cmpValues said the functions are the same. So because they aren't + // literally the same pointer, they must respectively be the left and + // right functions. + assert(LBA->getFunction() == FnL && RBA->getFunction() == FnR); + // cmpValues will tell us if these are equivalent BasicBlocks, in the + // context of their respective functions. + return cmpValues(LBA->getBasicBlock(), RBA->getBasicBlock()); + } + } + case Value::DSOLocalEquivalentVal: { + // dso_local_equivalent is functionally equivalent to whatever it points to. + // This means the behavior of the IR should be the exact same as if the + // function was referenced directly rather than through a + // dso_local_equivalent. + const auto *LEquiv = cast<DSOLocalEquivalent>(L); + const auto *REquiv = cast<DSOLocalEquivalent>(R); + return cmpGlobalValues(LEquiv->getGlobalValue(), REquiv->getGlobalValue()); + } + default: // Unknown constant, abort. + LLVM_DEBUG(dbgs() << "Looking at valueID " << L->getValueID() << "\n"); + llvm_unreachable("Constant ValueID not recognized."); + return -1; + } +} + +int FunctionComparator::cmpGlobalValues(GlobalValue *L, GlobalValue *R) const { + uint64_t LNumber = GlobalNumbers->getNumber(L); + uint64_t RNumber = GlobalNumbers->getNumber(R); + return cmpNumbers(LNumber, RNumber); +} + +/// cmpType - compares two types, +/// defines total ordering among the types set. +/// See method declaration comments for more details. +int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const { + PointerType *PTyL = dyn_cast<PointerType>(TyL); + PointerType *PTyR = dyn_cast<PointerType>(TyR); + + const DataLayout &DL = FnL->getParent()->getDataLayout(); + if (PTyL && PTyL->getAddressSpace() == 0) + TyL = DL.getIntPtrType(TyL); + if (PTyR && PTyR->getAddressSpace() == 0) + TyR = DL.getIntPtrType(TyR); + + if (TyL == TyR) + return 0; + + if (int Res = cmpNumbers(TyL->getTypeID(), TyR->getTypeID())) + return Res; + + switch (TyL->getTypeID()) { + default: + llvm_unreachable("Unknown type!"); + case Type::IntegerTyID: + return cmpNumbers(cast<IntegerType>(TyL)->getBitWidth(), + cast<IntegerType>(TyR)->getBitWidth()); + // TyL == TyR would have returned true earlier, because types are uniqued. + case Type::VoidTyID: + case Type::FloatTyID: + case Type::DoubleTyID: + case Type::X86_FP80TyID: + case Type::FP128TyID: + case Type::PPC_FP128TyID: + case Type::LabelTyID: + case Type::MetadataTyID: + case Type::TokenTyID: + return 0; + + case Type::PointerTyID: + assert(PTyL && PTyR && "Both types must be pointers here."); + return cmpNumbers(PTyL->getAddressSpace(), PTyR->getAddressSpace()); + + case Type::StructTyID: { + StructType *STyL = cast<StructType>(TyL); + StructType *STyR = cast<StructType>(TyR); + if (STyL->getNumElements() != STyR->getNumElements()) + return cmpNumbers(STyL->getNumElements(), STyR->getNumElements()); + + if (STyL->isPacked() != STyR->isPacked()) + return cmpNumbers(STyL->isPacked(), STyR->isPacked()); + + for (unsigned i = 0, e = STyL->getNumElements(); i != e; ++i) { + if (int Res = cmpTypes(STyL->getElementType(i), STyR->getElementType(i))) + return Res; + } + return 0; + } + + case Type::FunctionTyID: { + FunctionType *FTyL = cast<FunctionType>(TyL); + FunctionType *FTyR = cast<FunctionType>(TyR); + if (FTyL->getNumParams() != FTyR->getNumParams()) + return cmpNumbers(FTyL->getNumParams(), FTyR->getNumParams()); + + if (FTyL->isVarArg() != FTyR->isVarArg()) + return cmpNumbers(FTyL->isVarArg(), FTyR->isVarArg()); + + if (int Res = cmpTypes(FTyL->getReturnType(), FTyR->getReturnType())) + return Res; + + for (unsigned i = 0, e = FTyL->getNumParams(); i != e; ++i) { + if (int Res = cmpTypes(FTyL->getParamType(i), FTyR->getParamType(i))) + return Res; + } + return 0; + } + + case Type::ArrayTyID: { + auto *STyL = cast<ArrayType>(TyL); + auto *STyR = cast<ArrayType>(TyR); + if (STyL->getNumElements() != STyR->getNumElements()) + return cmpNumbers(STyL->getNumElements(), STyR->getNumElements()); + return cmpTypes(STyL->getElementType(), STyR->getElementType()); + } + case Type::FixedVectorTyID: + case Type::ScalableVectorTyID: { + auto *STyL = cast<VectorType>(TyL); + auto *STyR = cast<VectorType>(TyR); + if (STyL->getElementCount().isScalable() != + STyR->getElementCount().isScalable()) + return cmpNumbers(STyL->getElementCount().isScalable(), + STyR->getElementCount().isScalable()); + if (STyL->getElementCount() != STyR->getElementCount()) + return cmpNumbers(STyL->getElementCount().getKnownMinValue(), + STyR->getElementCount().getKnownMinValue()); + return cmpTypes(STyL->getElementType(), STyR->getElementType()); + } + } +} + +// Determine whether the two operations are the same except that pointer-to-A +// and pointer-to-B are equivalent. This should be kept in sync with +// Instruction::isSameOperationAs. +// Read method declaration comments for more details. +int FunctionComparator::cmpOperations(const Instruction *L, + const Instruction *R, + bool &needToCmpOperands) const { + needToCmpOperands = true; + if (int Res = cmpValues(L, R)) + return Res; + + // Differences from Instruction::isSameOperationAs: + // * replace type comparison with calls to cmpTypes. + // * we test for I->getRawSubclassOptionalData (nuw/nsw/tail) at the top. + // * because of the above, we don't test for the tail bit on calls later on. + if (int Res = cmpNumbers(L->getOpcode(), R->getOpcode())) + return Res; + + if (const GetElementPtrInst *GEPL = dyn_cast<GetElementPtrInst>(L)) { + needToCmpOperands = false; + const GetElementPtrInst *GEPR = cast<GetElementPtrInst>(R); + if (int Res = + cmpValues(GEPL->getPointerOperand(), GEPR->getPointerOperand())) + return Res; + return cmpGEPs(GEPL, GEPR); + } + + if (int Res = cmpNumbers(L->getNumOperands(), R->getNumOperands())) + return Res; + + if (int Res = cmpTypes(L->getType(), R->getType())) + return Res; + + if (int Res = cmpNumbers(L->getRawSubclassOptionalData(), + R->getRawSubclassOptionalData())) + return Res; + + // We have two instructions of identical opcode and #operands. Check to see + // if all operands are the same type + for (unsigned i = 0, e = L->getNumOperands(); i != e; ++i) { + if (int Res = + cmpTypes(L->getOperand(i)->getType(), R->getOperand(i)->getType())) + return Res; + } + + // Check special state that is a part of some instructions. + if (const AllocaInst *AI = dyn_cast<AllocaInst>(L)) { + if (int Res = cmpTypes(AI->getAllocatedType(), + cast<AllocaInst>(R)->getAllocatedType())) + return Res; + return cmpAligns(AI->getAlign(), cast<AllocaInst>(R)->getAlign()); + } + if (const LoadInst *LI = dyn_cast<LoadInst>(L)) { + if (int Res = cmpNumbers(LI->isVolatile(), cast<LoadInst>(R)->isVolatile())) + return Res; + if (int Res = cmpAligns(LI->getAlign(), cast<LoadInst>(R)->getAlign())) + return Res; + if (int Res = + cmpOrderings(LI->getOrdering(), cast<LoadInst>(R)->getOrdering())) + return Res; + if (int Res = cmpNumbers(LI->getSyncScopeID(), + cast<LoadInst>(R)->getSyncScopeID())) + return Res; + return cmpRangeMetadata( + LI->getMetadata(LLVMContext::MD_range), + cast<LoadInst>(R)->getMetadata(LLVMContext::MD_range)); + } + if (const StoreInst *SI = dyn_cast<StoreInst>(L)) { + if (int Res = + cmpNumbers(SI->isVolatile(), cast<StoreInst>(R)->isVolatile())) + return Res; + if (int Res = cmpAligns(SI->getAlign(), cast<StoreInst>(R)->getAlign())) + return Res; + if (int Res = + cmpOrderings(SI->getOrdering(), cast<StoreInst>(R)->getOrdering())) + return Res; + return cmpNumbers(SI->getSyncScopeID(), + cast<StoreInst>(R)->getSyncScopeID()); + } + if (const CmpInst *CI = dyn_cast<CmpInst>(L)) + return cmpNumbers(CI->getPredicate(), cast<CmpInst>(R)->getPredicate()); + if (auto *CBL = dyn_cast<CallBase>(L)) { + auto *CBR = cast<CallBase>(R); + if (int Res = cmpNumbers(CBL->getCallingConv(), CBR->getCallingConv())) + return Res; + if (int Res = cmpAttrs(CBL->getAttributes(), CBR->getAttributes())) + return Res; + if (int Res = cmpOperandBundlesSchema(*CBL, *CBR)) + return Res; + if (const CallInst *CI = dyn_cast<CallInst>(L)) + if (int Res = cmpNumbers(CI->getTailCallKind(), + cast<CallInst>(R)->getTailCallKind())) + return Res; + return cmpRangeMetadata(L->getMetadata(LLVMContext::MD_range), + R->getMetadata(LLVMContext::MD_range)); + } + if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(L)) { + ArrayRef<unsigned> LIndices = IVI->getIndices(); + ArrayRef<unsigned> RIndices = cast<InsertValueInst>(R)->getIndices(); + if (int Res = cmpNumbers(LIndices.size(), RIndices.size())) + return Res; + for (size_t i = 0, e = LIndices.size(); i != e; ++i) { + if (int Res = cmpNumbers(LIndices[i], RIndices[i])) + return Res; + } + return 0; + } + if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(L)) { + ArrayRef<unsigned> LIndices = EVI->getIndices(); + ArrayRef<unsigned> RIndices = cast<ExtractValueInst>(R)->getIndices(); + if (int Res = cmpNumbers(LIndices.size(), RIndices.size())) + return Res; + for (size_t i = 0, e = LIndices.size(); i != e; ++i) { + if (int Res = cmpNumbers(LIndices[i], RIndices[i])) + return Res; + } + } + if (const FenceInst *FI = dyn_cast<FenceInst>(L)) { + if (int Res = + cmpOrderings(FI->getOrdering(), cast<FenceInst>(R)->getOrdering())) + return Res; + return cmpNumbers(FI->getSyncScopeID(), + cast<FenceInst>(R)->getSyncScopeID()); + } + if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(L)) { + if (int Res = cmpNumbers(CXI->isVolatile(), + cast<AtomicCmpXchgInst>(R)->isVolatile())) + return Res; + if (int Res = + cmpNumbers(CXI->isWeak(), cast<AtomicCmpXchgInst>(R)->isWeak())) + return Res; + if (int Res = + cmpOrderings(CXI->getSuccessOrdering(), + cast<AtomicCmpXchgInst>(R)->getSuccessOrdering())) + return Res; + if (int Res = + cmpOrderings(CXI->getFailureOrdering(), + cast<AtomicCmpXchgInst>(R)->getFailureOrdering())) + return Res; + return cmpNumbers(CXI->getSyncScopeID(), + cast<AtomicCmpXchgInst>(R)->getSyncScopeID()); + } + if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(L)) { + if (int Res = cmpNumbers(RMWI->getOperation(), + cast<AtomicRMWInst>(R)->getOperation())) + return Res; + if (int Res = cmpNumbers(RMWI->isVolatile(), + cast<AtomicRMWInst>(R)->isVolatile())) + return Res; + if (int Res = cmpOrderings(RMWI->getOrdering(), + cast<AtomicRMWInst>(R)->getOrdering())) + return Res; + return cmpNumbers(RMWI->getSyncScopeID(), + cast<AtomicRMWInst>(R)->getSyncScopeID()); + } + if (const ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(L)) { + ArrayRef<int> LMask = SVI->getShuffleMask(); + ArrayRef<int> RMask = cast<ShuffleVectorInst>(R)->getShuffleMask(); + if (int Res = cmpNumbers(LMask.size(), RMask.size())) + return Res; + for (size_t i = 0, e = LMask.size(); i != e; ++i) { + if (int Res = cmpNumbers(LMask[i], RMask[i])) + return Res; + } + } + if (const PHINode *PNL = dyn_cast<PHINode>(L)) { + const PHINode *PNR = cast<PHINode>(R); + // Ensure that in addition to the incoming values being identical + // (checked by the caller of this function), the incoming blocks + // are also identical. + for (unsigned i = 0, e = PNL->getNumIncomingValues(); i != e; ++i) { + if (int Res = + cmpValues(PNL->getIncomingBlock(i), PNR->getIncomingBlock(i))) + return Res; + } + } + return 0; +} + +// Determine whether two GEP operations perform the same underlying arithmetic. +// Read method declaration comments for more details. +int FunctionComparator::cmpGEPs(const GEPOperator *GEPL, + const GEPOperator *GEPR) const { + unsigned int ASL = GEPL->getPointerAddressSpace(); + unsigned int ASR = GEPR->getPointerAddressSpace(); + + if (int Res = cmpNumbers(ASL, ASR)) + return Res; + + // When we have target data, we can reduce the GEP down to the value in bytes + // added to the address. + const DataLayout &DL = FnL->getParent()->getDataLayout(); + unsigned BitWidth = DL.getPointerSizeInBits(ASL); + APInt OffsetL(BitWidth, 0), OffsetR(BitWidth, 0); + if (GEPL->accumulateConstantOffset(DL, OffsetL) && + GEPR->accumulateConstantOffset(DL, OffsetR)) + return cmpAPInts(OffsetL, OffsetR); + if (int Res = + cmpTypes(GEPL->getSourceElementType(), GEPR->getSourceElementType())) + return Res; + + if (int Res = cmpNumbers(GEPL->getNumOperands(), GEPR->getNumOperands())) + return Res; + + for (unsigned i = 0, e = GEPL->getNumOperands(); i != e; ++i) { + if (int Res = cmpValues(GEPL->getOperand(i), GEPR->getOperand(i))) + return Res; + } + + return 0; +} + +int FunctionComparator::cmpInlineAsm(const InlineAsm *L, + const InlineAsm *R) const { + // InlineAsm's are uniqued. If they are the same pointer, obviously they are + // the same, otherwise compare the fields. + if (L == R) + return 0; + if (int Res = cmpTypes(L->getFunctionType(), R->getFunctionType())) + return Res; + if (int Res = cmpMem(L->getAsmString(), R->getAsmString())) + return Res; + if (int Res = cmpMem(L->getConstraintString(), R->getConstraintString())) + return Res; + if (int Res = cmpNumbers(L->hasSideEffects(), R->hasSideEffects())) + return Res; + if (int Res = cmpNumbers(L->isAlignStack(), R->isAlignStack())) + return Res; + if (int Res = cmpNumbers(L->getDialect(), R->getDialect())) + return Res; + assert(L->getFunctionType() != R->getFunctionType()); + return 0; +} + +/// Compare two values used by the two functions under pair-wise comparison. If +/// this is the first time the values are seen, they're added to the mapping so +/// that we will detect mismatches on next use. +/// See comments in declaration for more details. +int FunctionComparator::cmpValues(const Value *L, const Value *R) const { + // Catch self-reference case. + if (L == FnL) { + if (R == FnR) + return 0; + return -1; + } + if (R == FnR) { + if (L == FnL) + return 0; + return 1; + } + + const Constant *ConstL = dyn_cast<Constant>(L); + const Constant *ConstR = dyn_cast<Constant>(R); + if (ConstL && ConstR) { + if (L == R) + return 0; + return cmpConstants(ConstL, ConstR); + } + + if (ConstL) + return 1; + if (ConstR) + return -1; + + const InlineAsm *InlineAsmL = dyn_cast<InlineAsm>(L); + const InlineAsm *InlineAsmR = dyn_cast<InlineAsm>(R); + + if (InlineAsmL && InlineAsmR) + return cmpInlineAsm(InlineAsmL, InlineAsmR); + if (InlineAsmL) + return 1; + if (InlineAsmR) + return -1; + + auto LeftSN = sn_mapL.insert(std::make_pair(L, sn_mapL.size())), + RightSN = sn_mapR.insert(std::make_pair(R, sn_mapR.size())); + + return cmpNumbers(LeftSN.first->second, RightSN.first->second); +} + +// Test whether two basic blocks have equivalent behaviour. +int FunctionComparator::cmpBasicBlocks(const BasicBlock *BBL, + const BasicBlock *BBR) const { + BasicBlock::const_iterator InstL = BBL->begin(), InstLE = BBL->end(); + BasicBlock::const_iterator InstR = BBR->begin(), InstRE = BBR->end(); + + do { + bool needToCmpOperands = true; + if (int Res = cmpOperations(&*InstL, &*InstR, needToCmpOperands)) + return Res; + if (needToCmpOperands) { + assert(InstL->getNumOperands() == InstR->getNumOperands()); + + for (unsigned i = 0, e = InstL->getNumOperands(); i != e; ++i) { + Value *OpL = InstL->getOperand(i); + Value *OpR = InstR->getOperand(i); + if (int Res = cmpValues(OpL, OpR)) + return Res; + // cmpValues should ensure this is true. + assert(cmpTypes(OpL->getType(), OpR->getType()) == 0); + } + } + + ++InstL; + ++InstR; + } while (InstL != InstLE && InstR != InstRE); + + if (InstL != InstLE && InstR == InstRE) + return 1; + if (InstL == InstLE && InstR != InstRE) + return -1; + return 0; +} + +int FunctionComparator::compareSignature() const { + if (int Res = cmpAttrs(FnL->getAttributes(), FnR->getAttributes())) + return Res; + + if (int Res = cmpNumbers(FnL->hasGC(), FnR->hasGC())) + return Res; + + if (FnL->hasGC()) { + if (int Res = cmpMem(FnL->getGC(), FnR->getGC())) + return Res; + } + + if (int Res = cmpNumbers(FnL->hasSection(), FnR->hasSection())) + return Res; + + if (FnL->hasSection()) { + if (int Res = cmpMem(FnL->getSection(), FnR->getSection())) + return Res; + } + + if (int Res = cmpNumbers(FnL->isVarArg(), FnR->isVarArg())) + return Res; + + // TODO: if it's internal and only used in direct calls, we could handle this + // case too. + if (int Res = cmpNumbers(FnL->getCallingConv(), FnR->getCallingConv())) + return Res; + + if (int Res = cmpTypes(FnL->getFunctionType(), FnR->getFunctionType())) + return Res; + + assert(FnL->arg_size() == FnR->arg_size() && + "Identically typed functions have different numbers of args!"); + + // Visit the arguments so that they get enumerated in the order they're + // passed in. + for (Function::const_arg_iterator ArgLI = FnL->arg_begin(), + ArgRI = FnR->arg_begin(), + ArgLE = FnL->arg_end(); + ArgLI != ArgLE; ++ArgLI, ++ArgRI) { + if (cmpValues(&*ArgLI, &*ArgRI) != 0) + llvm_unreachable("Arguments repeat!"); + } + return 0; +} + +// Test whether the two functions have equivalent behaviour. +int FunctionComparator::compare() { + beginCompare(); + + if (int Res = compareSignature()) + return Res; + + // We do a CFG-ordered walk since the actual ordering of the blocks in the + // linked list is immaterial. Our walk starts at the entry block for both + // functions, then takes each block from each terminator in order. As an + // artifact, this also means that unreachable blocks are ignored. + SmallVector<const BasicBlock *, 8> FnLBBs, FnRBBs; + SmallPtrSet<const BasicBlock *, 32> VisitedBBs; // in terms of F1. + + FnLBBs.push_back(&FnL->getEntryBlock()); + FnRBBs.push_back(&FnR->getEntryBlock()); + + VisitedBBs.insert(FnLBBs[0]); + while (!FnLBBs.empty()) { + const BasicBlock *BBL = FnLBBs.pop_back_val(); + const BasicBlock *BBR = FnRBBs.pop_back_val(); + + if (int Res = cmpValues(BBL, BBR)) + return Res; + + if (int Res = cmpBasicBlocks(BBL, BBR)) + return Res; + + const Instruction *TermL = BBL->getTerminator(); + const Instruction *TermR = BBR->getTerminator(); + + assert(TermL->getNumSuccessors() == TermR->getNumSuccessors()); + for (unsigned i = 0, e = TermL->getNumSuccessors(); i != e; ++i) { + if (!VisitedBBs.insert(TermL->getSuccessor(i)).second) + continue; + + FnLBBs.push_back(TermL->getSuccessor(i)); + FnRBBs.push_back(TermR->getSuccessor(i)); + } + } + return 0; +} + +namespace { + +// Accumulate the hash of a sequence of 64-bit integers. This is similar to a +// hash of a sequence of 64bit ints, but the entire input does not need to be +// available at once. This interface is necessary for functionHash because it +// needs to accumulate the hash as the structure of the function is traversed +// without saving these values to an intermediate buffer. This form of hashing +// is not often needed, as usually the object to hash is just read from a +// buffer. +class HashAccumulator64 { + uint64_t Hash; + +public: + // Initialize to random constant, so the state isn't zero. + HashAccumulator64() { Hash = 0x6acaa36bef8325c5ULL; } + + void add(uint64_t V) { Hash = hashing::detail::hash_16_bytes(Hash, V); } + + // No finishing is required, because the entire hash value is used. + uint64_t getHash() { return Hash; } +}; + +} // end anonymous namespace + +// A function hash is calculated by considering only the number of arguments and +// whether a function is varargs, the order of basic blocks (given by the +// successors of each basic block in depth first order), and the order of +// opcodes of each instruction within each of these basic blocks. This mirrors +// the strategy compare() uses to compare functions by walking the BBs in depth +// first order and comparing each instruction in sequence. Because this hash +// does not look at the operands, it is insensitive to things such as the +// target of calls and the constants used in the function, which makes it useful +// when possibly merging functions which are the same modulo constants and call +// targets. +FunctionComparator::FunctionHash FunctionComparator::functionHash(Function &F) { + HashAccumulator64 H; + H.add(F.isVarArg()); + H.add(F.arg_size()); + + SmallVector<const BasicBlock *, 8> BBs; + SmallPtrSet<const BasicBlock *, 16> VisitedBBs; + + // Walk the blocks in the same order as FunctionComparator::cmpBasicBlocks(), + // accumulating the hash of the function "structure." (BB and opcode sequence) + BBs.push_back(&F.getEntryBlock()); + VisitedBBs.insert(BBs[0]); + while (!BBs.empty()) { + const BasicBlock *BB = BBs.pop_back_val(); + // This random value acts as a block header, as otherwise the partition of + // opcodes into BBs wouldn't affect the hash, only the order of the opcodes + H.add(45798); + for (const auto &Inst : *BB) { + H.add(Inst.getOpcode()); + } + const Instruction *Term = BB->getTerminator(); + for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) { + if (!VisitedBBs.insert(Term->getSuccessor(i)).second) + continue; + BBs.push_back(Term->getSuccessor(i)); + } + } + return H.getHash(); +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/FunctionImportUtils.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/FunctionImportUtils.cpp new file mode 100644 index 0000000000..87be6be018 --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/FunctionImportUtils.cpp @@ -0,0 +1,361 @@ +//===- lib/Transforms/Utils/FunctionImportUtils.cpp - Importing utilities -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the FunctionImportGlobalProcessing class, used +// to perform the necessary global value handling for function importing. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/FunctionImportUtils.h" +#include "llvm/Support/CommandLine.h" +using namespace llvm; + +/// Uses the "source_filename" instead of a Module hash ID for the suffix of +/// promoted locals during LTO. NOTE: This requires that the source filename +/// has a unique name / path to avoid name collisions. +static cl::opt<bool> UseSourceFilenameForPromotedLocals( + "use-source-filename-for-promoted-locals", cl::Hidden, + cl::desc("Uses the source file name instead of the Module hash. " + "This requires that the source filename has a unique name / " + "path to avoid name collisions.")); + +/// Checks if we should import SGV as a definition, otherwise import as a +/// declaration. +bool FunctionImportGlobalProcessing::doImportAsDefinition( + const GlobalValue *SGV) { + if (!isPerformingImport()) + return false; + + // Only import the globals requested for importing. + if (!GlobalsToImport->count(const_cast<GlobalValue *>(SGV))) + return false; + + assert(!isa<GlobalAlias>(SGV) && + "Unexpected global alias in the import list."); + + // Otherwise yes. + return true; +} + +bool FunctionImportGlobalProcessing::shouldPromoteLocalToGlobal( + const GlobalValue *SGV, ValueInfo VI) { + assert(SGV->hasLocalLinkage()); + + // Ifuncs and ifunc alias does not have summary. + if (isa<GlobalIFunc>(SGV) || + (isa<GlobalAlias>(SGV) && + isa<GlobalIFunc>(cast<GlobalAlias>(SGV)->getAliaseeObject()))) + return false; + + // Both the imported references and the original local variable must + // be promoted. + if (!isPerformingImport() && !isModuleExporting()) + return false; + + if (isPerformingImport()) { + assert((!GlobalsToImport->count(const_cast<GlobalValue *>(SGV)) || + !isNonRenamableLocal(*SGV)) && + "Attempting to promote non-renamable local"); + // We don't know for sure yet if we are importing this value (as either + // a reference or a def), since we are simply walking all values in the + // module. But by necessity if we end up importing it and it is local, + // it must be promoted, so unconditionally promote all values in the + // importing module. + return true; + } + + // When exporting, consult the index. We can have more than one local + // with the same GUID, in the case of same-named locals in different but + // same-named source files that were compiled in their respective directories + // (so the source file name and resulting GUID is the same). Find the one + // in this module. + auto Summary = ImportIndex.findSummaryInModule( + VI, SGV->getParent()->getModuleIdentifier()); + assert(Summary && "Missing summary for global value when exporting"); + auto Linkage = Summary->linkage(); + if (!GlobalValue::isLocalLinkage(Linkage)) { + assert(!isNonRenamableLocal(*SGV) && + "Attempting to promote non-renamable local"); + return true; + } + + return false; +} + +#ifndef NDEBUG +bool FunctionImportGlobalProcessing::isNonRenamableLocal( + const GlobalValue &GV) const { + if (!GV.hasLocalLinkage()) + return false; + // This needs to stay in sync with the logic in buildModuleSummaryIndex. + if (GV.hasSection()) + return true; + if (Used.count(const_cast<GlobalValue *>(&GV))) + return true; + return false; +} +#endif + +std::string +FunctionImportGlobalProcessing::getPromotedName(const GlobalValue *SGV) { + assert(SGV->hasLocalLinkage()); + + // For locals that must be promoted to global scope, ensure that + // the promoted name uniquely identifies the copy in the original module, + // using the ID assigned during combined index creation. + if (UseSourceFilenameForPromotedLocals && + !SGV->getParent()->getSourceFileName().empty()) { + SmallString<256> Suffix(SGV->getParent()->getSourceFileName()); + std::replace_if(std::begin(Suffix), std::end(Suffix), + [&](char ch) { return !isAlnum(ch); }, '_'); + return ModuleSummaryIndex::getGlobalNameForLocal( + SGV->getName(), Suffix); + } + + return ModuleSummaryIndex::getGlobalNameForLocal( + SGV->getName(), + ImportIndex.getModuleHash(SGV->getParent()->getModuleIdentifier())); +} + +GlobalValue::LinkageTypes +FunctionImportGlobalProcessing::getLinkage(const GlobalValue *SGV, + bool DoPromote) { + // Any local variable that is referenced by an exported function needs + // to be promoted to global scope. Since we don't currently know which + // functions reference which local variables/functions, we must treat + // all as potentially exported if this module is exporting anything. + if (isModuleExporting()) { + if (SGV->hasLocalLinkage() && DoPromote) + return GlobalValue::ExternalLinkage; + return SGV->getLinkage(); + } + + // Otherwise, if we aren't importing, no linkage change is needed. + if (!isPerformingImport()) + return SGV->getLinkage(); + + switch (SGV->getLinkage()) { + case GlobalValue::LinkOnceODRLinkage: + case GlobalValue::ExternalLinkage: + // External and linkonce definitions are converted to available_externally + // definitions upon import, so that they are available for inlining + // and/or optimization, but are turned into declarations later + // during the EliminateAvailableExternally pass. + if (doImportAsDefinition(SGV) && !isa<GlobalAlias>(SGV)) + return GlobalValue::AvailableExternallyLinkage; + // An imported external declaration stays external. + return SGV->getLinkage(); + + case GlobalValue::AvailableExternallyLinkage: + // An imported available_externally definition converts + // to external if imported as a declaration. + if (!doImportAsDefinition(SGV)) + return GlobalValue::ExternalLinkage; + // An imported available_externally declaration stays that way. + return SGV->getLinkage(); + + case GlobalValue::LinkOnceAnyLinkage: + case GlobalValue::WeakAnyLinkage: + // Can't import linkonce_any/weak_any definitions correctly, or we might + // change the program semantics, since the linker will pick the first + // linkonce_any/weak_any definition and importing would change the order + // they are seen by the linker. The module linking caller needs to enforce + // this. + assert(!doImportAsDefinition(SGV)); + // If imported as a declaration, it becomes external_weak. + return SGV->getLinkage(); + + case GlobalValue::WeakODRLinkage: + // For weak_odr linkage, there is a guarantee that all copies will be + // equivalent, so the issue described above for weak_any does not exist, + // and the definition can be imported. It can be treated similarly + // to an imported externally visible global value. + if (doImportAsDefinition(SGV) && !isa<GlobalAlias>(SGV)) + return GlobalValue::AvailableExternallyLinkage; + else + return GlobalValue::ExternalLinkage; + + case GlobalValue::AppendingLinkage: + // It would be incorrect to import an appending linkage variable, + // since it would cause global constructors/destructors to be + // executed multiple times. This should have already been handled + // by linkIfNeeded, and we will assert in shouldLinkFromSource + // if we try to import, so we simply return AppendingLinkage. + return GlobalValue::AppendingLinkage; + + case GlobalValue::InternalLinkage: + case GlobalValue::PrivateLinkage: + // If we are promoting the local to global scope, it is handled + // similarly to a normal externally visible global. + if (DoPromote) { + if (doImportAsDefinition(SGV) && !isa<GlobalAlias>(SGV)) + return GlobalValue::AvailableExternallyLinkage; + else + return GlobalValue::ExternalLinkage; + } + // A non-promoted imported local definition stays local. + // The ThinLTO pass will eventually force-import their definitions. + return SGV->getLinkage(); + + case GlobalValue::ExternalWeakLinkage: + // External weak doesn't apply to definitions, must be a declaration. + assert(!doImportAsDefinition(SGV)); + // Linkage stays external_weak. + return SGV->getLinkage(); + + case GlobalValue::CommonLinkage: + // Linkage stays common on definitions. + // The ThinLTO pass will eventually force-import their definitions. + return SGV->getLinkage(); + } + + llvm_unreachable("unknown linkage type"); +} + +void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) { + + ValueInfo VI; + if (GV.hasName()) { + VI = ImportIndex.getValueInfo(GV.getGUID()); + // Set synthetic function entry counts. + if (VI && ImportIndex.hasSyntheticEntryCounts()) { + if (Function *F = dyn_cast<Function>(&GV)) { + if (!F->isDeclaration()) { + for (const auto &S : VI.getSummaryList()) { + auto *FS = cast<FunctionSummary>(S->getBaseObject()); + if (FS->modulePath() == M.getModuleIdentifier()) { + F->setEntryCount(Function::ProfileCount(FS->entryCount(), + Function::PCT_Synthetic)); + break; + } + } + } + } + } + } + + // We should always have a ValueInfo (i.e. GV in index) for definitions when + // we are exporting, and also when importing that value. + assert(VI || GV.isDeclaration() || + (isPerformingImport() && !doImportAsDefinition(&GV))); + + // Mark read/write-only variables which can be imported with specific + // attribute. We can't internalize them now because IRMover will fail + // to link variable definitions to their external declarations during + // ThinLTO import. We'll internalize read-only variables later, after + // import is finished. See internalizeGVsAfterImport. + // + // If global value dead stripping is not enabled in summary then + // propagateConstants hasn't been run. We can't internalize GV + // in such case. + if (!GV.isDeclaration() && VI && ImportIndex.withAttributePropagation()) { + if (GlobalVariable *V = dyn_cast<GlobalVariable>(&GV)) { + // We can have more than one local with the same GUID, in the case of + // same-named locals in different but same-named source files that were + // compiled in their respective directories (so the source file name + // and resulting GUID is the same). Find the one in this module. + // Handle the case where there is no summary found in this module. That + // can happen in the distributed ThinLTO backend, because the index only + // contains summaries from the source modules if they are being imported. + // We might have a non-null VI and get here even in that case if the name + // matches one in this module (e.g. weak or appending linkage). + auto *GVS = dyn_cast_or_null<GlobalVarSummary>( + ImportIndex.findSummaryInModule(VI, M.getModuleIdentifier())); + if (GVS && + (ImportIndex.isReadOnly(GVS) || ImportIndex.isWriteOnly(GVS))) { + V->addAttribute("thinlto-internalize"); + // Objects referenced by writeonly GV initializer should not be + // promoted, because there is no any kind of read access to them + // on behalf of this writeonly GV. To avoid promotion we convert + // GV initializer to 'zeroinitializer'. This effectively drops + // references in IR module (not in combined index), so we can + // ignore them when computing import. We do not export references + // of writeonly object. See computeImportForReferencedGlobals + if (ImportIndex.isWriteOnly(GVS)) + V->setInitializer(Constant::getNullValue(V->getValueType())); + } + } + } + + if (GV.hasLocalLinkage() && shouldPromoteLocalToGlobal(&GV, VI)) { + // Save the original name string before we rename GV below. + auto Name = GV.getName().str(); + GV.setName(getPromotedName(&GV)); + GV.setLinkage(getLinkage(&GV, /* DoPromote */ true)); + assert(!GV.hasLocalLinkage()); + GV.setVisibility(GlobalValue::HiddenVisibility); + + // If we are renaming a COMDAT leader, ensure that we record the COMDAT + // for later renaming as well. This is required for COFF. + if (const auto *C = GV.getComdat()) + if (C->getName() == Name) + RenamedComdats.try_emplace(C, M.getOrInsertComdat(GV.getName())); + } else + GV.setLinkage(getLinkage(&GV, /* DoPromote */ false)); + + // When ClearDSOLocalOnDeclarations is true, clear dso_local if GV is + // converted to a declaration, to disable direct access. Don't do this if GV + // is implicitly dso_local due to a non-default visibility. + if (ClearDSOLocalOnDeclarations && + (GV.isDeclarationForLinker() || + (isPerformingImport() && !doImportAsDefinition(&GV))) && + !GV.isImplicitDSOLocal()) { + GV.setDSOLocal(false); + } else if (VI && VI.isDSOLocal(ImportIndex.withDSOLocalPropagation())) { + // If all summaries are dso_local, symbol gets resolved to a known local + // definition. + GV.setDSOLocal(true); + if (GV.hasDLLImportStorageClass()) + GV.setDLLStorageClass(GlobalValue::DefaultStorageClass); + } + + // Remove functions imported as available externally defs from comdats, + // as this is a declaration for the linker, and will be dropped eventually. + // It is illegal for comdats to contain declarations. + auto *GO = dyn_cast<GlobalObject>(&GV); + if (GO && GO->isDeclarationForLinker() && GO->hasComdat()) { + // The IRMover should not have placed any imported declarations in + // a comdat, so the only declaration that should be in a comdat + // at this point would be a definition imported as available_externally. + assert(GO->hasAvailableExternallyLinkage() && + "Expected comdat on definition (possibly available external)"); + GO->setComdat(nullptr); + } +} + +void FunctionImportGlobalProcessing::processGlobalsForThinLTO() { + for (GlobalVariable &GV : M.globals()) + processGlobalForThinLTO(GV); + for (Function &SF : M) + processGlobalForThinLTO(SF); + for (GlobalAlias &GA : M.aliases()) + processGlobalForThinLTO(GA); + + // Replace any COMDATS that required renaming (because the COMDAT leader was + // promoted and renamed). + if (!RenamedComdats.empty()) + for (auto &GO : M.global_objects()) + if (auto *C = GO.getComdat()) { + auto Replacement = RenamedComdats.find(C); + if (Replacement != RenamedComdats.end()) + GO.setComdat(Replacement->second); + } +} + +bool FunctionImportGlobalProcessing::run() { + processGlobalsForThinLTO(); + return false; +} + +bool llvm::renameModuleForThinLTO(Module &M, const ModuleSummaryIndex &Index, + bool ClearDSOLocalOnDeclarations, + SetVector<GlobalValue *> *GlobalsToImport) { + FunctionImportGlobalProcessing ThinLTOProcessing(M, Index, GlobalsToImport, + ClearDSOLocalOnDeclarations); + return ThinLTOProcessing.run(); +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/GlobalStatus.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/GlobalStatus.cpp new file mode 100644 index 0000000000..c5aded3c45 --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/GlobalStatus.cpp @@ -0,0 +1,195 @@ +//===-- GlobalStatus.cpp - Compute status info for globals -----------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/GlobalStatus.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/AtomicOrdering.h" +#include "llvm/Support/Casting.h" +#include <algorithm> +#include <cassert> + +using namespace llvm; + +/// Return the stronger of the two ordering. If the two orderings are acquire +/// and release, then return AcquireRelease. +/// +static AtomicOrdering strongerOrdering(AtomicOrdering X, AtomicOrdering Y) { + if ((X == AtomicOrdering::Acquire && Y == AtomicOrdering::Release) || + (Y == AtomicOrdering::Acquire && X == AtomicOrdering::Release)) + return AtomicOrdering::AcquireRelease; + return (AtomicOrdering)std::max((unsigned)X, (unsigned)Y); +} + +/// It is safe to destroy a constant iff it is only used by constants itself. +/// Note that while constants cannot be cyclic, they can be tree-like, so we +/// should keep a visited set to avoid exponential runtime. +bool llvm::isSafeToDestroyConstant(const Constant *C) { + SmallVector<const Constant *, 8> Worklist; + SmallPtrSet<const Constant *, 8> Visited; + Worklist.push_back(C); + while (!Worklist.empty()) { + const Constant *C = Worklist.pop_back_val(); + if (!Visited.insert(C).second) + continue; + if (isa<GlobalValue>(C) || isa<ConstantData>(C)) + return false; + + for (const User *U : C->users()) { + if (const Constant *CU = dyn_cast<Constant>(U)) + Worklist.push_back(CU); + else + return false; + } + } + return true; +} + +static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS, + SmallPtrSetImpl<const Value *> &VisitedUsers) { + if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) + if (GV->isExternallyInitialized()) + GS.StoredType = GlobalStatus::StoredOnce; + + for (const Use &U : V->uses()) { + const User *UR = U.getUser(); + if (const Constant *C = dyn_cast<Constant>(UR)) { + const ConstantExpr *CE = dyn_cast<ConstantExpr>(C); + if (CE && isa<PointerType>(CE->getType())) { + // Recursively analyze pointer-typed constant expressions. + // FIXME: Do we need to add constexpr selects to VisitedUsers? + if (analyzeGlobalAux(CE, GS, VisitedUsers)) + return true; + } else { + // Ignore dead constant users. + if (!isSafeToDestroyConstant(C)) + return true; + } + } else if (const Instruction *I = dyn_cast<Instruction>(UR)) { + if (!GS.HasMultipleAccessingFunctions) { + const Function *F = I->getParent()->getParent(); + if (!GS.AccessingFunction) + GS.AccessingFunction = F; + else if (GS.AccessingFunction != F) + GS.HasMultipleAccessingFunctions = true; + } + if (const LoadInst *LI = dyn_cast<LoadInst>(I)) { + GS.IsLoaded = true; + // Don't hack on volatile loads. + if (LI->isVolatile()) + return true; + GS.Ordering = strongerOrdering(GS.Ordering, LI->getOrdering()); + } else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) { + // Don't allow a store OF the address, only stores TO the address. + if (SI->getOperand(0) == V) + return true; + + // Don't hack on volatile stores. + if (SI->isVolatile()) + return true; + + ++GS.NumStores; + + GS.Ordering = strongerOrdering(GS.Ordering, SI->getOrdering()); + + // If this is a direct store to the global (i.e., the global is a scalar + // value, not an aggregate), keep more specific information about + // stores. + if (GS.StoredType != GlobalStatus::Stored) { + const Value *Ptr = SI->getPointerOperand()->stripPointerCasts(); + if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) { + Value *StoredVal = SI->getOperand(0); + + if (Constant *C = dyn_cast<Constant>(StoredVal)) { + if (C->isThreadDependent()) { + // The stored value changes between threads; don't track it. + return true; + } + } + + if (GV->hasInitializer() && StoredVal == GV->getInitializer()) { + if (GS.StoredType < GlobalStatus::InitializerStored) + GS.StoredType = GlobalStatus::InitializerStored; + } else if (isa<LoadInst>(StoredVal) && + cast<LoadInst>(StoredVal)->getOperand(0) == GV) { + if (GS.StoredType < GlobalStatus::InitializerStored) + GS.StoredType = GlobalStatus::InitializerStored; + } else if (GS.StoredType < GlobalStatus::StoredOnce) { + GS.StoredType = GlobalStatus::StoredOnce; + GS.StoredOnceStore = SI; + } else if (GS.StoredType == GlobalStatus::StoredOnce && + GS.getStoredOnceValue() == StoredVal) { + // noop. + } else { + GS.StoredType = GlobalStatus::Stored; + } + } else { + GS.StoredType = GlobalStatus::Stored; + } + } + } else if (isa<BitCastInst>(I) || isa<GetElementPtrInst>(I) || + isa<AddrSpaceCastInst>(I)) { + // Skip over bitcasts and GEPs; we don't care about the type or offset + // of the pointer. + if (analyzeGlobalAux(I, GS, VisitedUsers)) + return true; + } else if (isa<SelectInst>(I) || isa<PHINode>(I)) { + // Look through selects and PHIs to find if the pointer is + // conditionally accessed. Make sure we only visit an instruction + // once; otherwise, we can get infinite recursion or exponential + // compile time. + if (VisitedUsers.insert(I).second) + if (analyzeGlobalAux(I, GS, VisitedUsers)) + return true; + } else if (isa<CmpInst>(I)) { + GS.IsCompared = true; + } else if (const MemTransferInst *MTI = dyn_cast<MemTransferInst>(I)) { + if (MTI->isVolatile()) + return true; + if (MTI->getArgOperand(0) == V) + GS.StoredType = GlobalStatus::Stored; + if (MTI->getArgOperand(1) == V) + GS.IsLoaded = true; + } else if (const MemSetInst *MSI = dyn_cast<MemSetInst>(I)) { + assert(MSI->getArgOperand(0) == V && "Memset only takes one pointer!"); + if (MSI->isVolatile()) + return true; + GS.StoredType = GlobalStatus::Stored; + } else if (const auto *CB = dyn_cast<CallBase>(I)) { + if (!CB->isCallee(&U)) + return true; + GS.IsLoaded = true; + } else { + return true; // Any other non-load instruction might take address! + } + } else { + // Otherwise must be some other user. + return true; + } + } + + return false; +} + +GlobalStatus::GlobalStatus() = default; + +bool GlobalStatus::analyzeGlobal(const Value *V, GlobalStatus &GS) { + SmallPtrSet<const Value *, 16> VisitedUsers; + return analyzeGlobalAux(V, GS, VisitedUsers); +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/GuardUtils.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/GuardUtils.cpp new file mode 100644 index 0000000000..7c310f16d4 --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/GuardUtils.cpp @@ -0,0 +1,126 @@ +//===-- GuardUtils.cpp - Utils for work with guards -------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Utils that are used to perform transformations related to guards and their +// conditions. +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/GuardUtils.h" +#include "llvm/Analysis/GuardUtils.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" + +using namespace llvm; +using namespace llvm::PatternMatch; + +static cl::opt<uint32_t> PredicatePassBranchWeight( + "guards-predicate-pass-branch-weight", cl::Hidden, cl::init(1 << 20), + cl::desc("The probability of a guard failing is assumed to be the " + "reciprocal of this value (default = 1 << 20)")); + +void llvm::makeGuardControlFlowExplicit(Function *DeoptIntrinsic, + CallInst *Guard, bool UseWC) { + OperandBundleDef DeoptOB(*Guard->getOperandBundle(LLVMContext::OB_deopt)); + SmallVector<Value *, 4> Args(drop_begin(Guard->args())); + + auto *CheckBB = Guard->getParent(); + auto *DeoptBlockTerm = + SplitBlockAndInsertIfThen(Guard->getArgOperand(0), Guard, true); + + auto *CheckBI = cast<BranchInst>(CheckBB->getTerminator()); + + // SplitBlockAndInsertIfThen inserts control flow that branches to + // DeoptBlockTerm if the condition is true. We want the opposite. + CheckBI->swapSuccessors(); + + CheckBI->getSuccessor(0)->setName("guarded"); + CheckBI->getSuccessor(1)->setName("deopt"); + + if (auto *MD = Guard->getMetadata(LLVMContext::MD_make_implicit)) + CheckBI->setMetadata(LLVMContext::MD_make_implicit, MD); + + MDBuilder MDB(Guard->getContext()); + CheckBI->setMetadata(LLVMContext::MD_prof, + MDB.createBranchWeights(PredicatePassBranchWeight, 1)); + + IRBuilder<> B(DeoptBlockTerm); + auto *DeoptCall = B.CreateCall(DeoptIntrinsic, Args, {DeoptOB}, ""); + + if (DeoptIntrinsic->getReturnType()->isVoidTy()) { + B.CreateRetVoid(); + } else { + DeoptCall->setName("deoptcall"); + B.CreateRet(DeoptCall); + } + + DeoptCall->setCallingConv(Guard->getCallingConv()); + DeoptBlockTerm->eraseFromParent(); + + if (UseWC) { + // We want the guard to be expressed as explicit control flow, but still be + // widenable. For that, we add Widenable Condition intrinsic call to the + // guard's condition. + IRBuilder<> B(CheckBI); + auto *WC = B.CreateIntrinsic(Intrinsic::experimental_widenable_condition, + {}, {}, nullptr, "widenable_cond"); + CheckBI->setCondition(B.CreateAnd(CheckBI->getCondition(), WC, + "exiplicit_guard_cond")); + assert(isWidenableBranch(CheckBI) && "Branch must be widenable."); + } +} + + +void llvm::widenWidenableBranch(BranchInst *WidenableBR, Value *NewCond) { + assert(isWidenableBranch(WidenableBR) && "precondition"); + + // The tempting trivially option is to produce something like this: + // br (and oldcond, newcond) where oldcond is assumed to contain a widenable + // condition, but that doesn't match the pattern parseWidenableBranch expects + // so we have to be more sophisticated. + + Use *C, *WC; + BasicBlock *IfTrueBB, *IfFalseBB; + parseWidenableBranch(WidenableBR, C, WC, IfTrueBB, IfFalseBB); + if (!C) { + // br (wc()), ... form + IRBuilder<> B(WidenableBR); + WidenableBR->setCondition(B.CreateAnd(NewCond, WC->get())); + } else { + // br (wc & C), ... form + IRBuilder<> B(WidenableBR); + C->set(B.CreateAnd(NewCond, C->get())); + Instruction *WCAnd = cast<Instruction>(WidenableBR->getCondition()); + // Condition is only guaranteed to dominate branch + WCAnd->moveBefore(WidenableBR); + } + assert(isWidenableBranch(WidenableBR) && "preserve widenabiliy"); +} + +void llvm::setWidenableBranchCond(BranchInst *WidenableBR, Value *NewCond) { + assert(isWidenableBranch(WidenableBR) && "precondition"); + + Use *C, *WC; + BasicBlock *IfTrueBB, *IfFalseBB; + parseWidenableBranch(WidenableBR, C, WC, IfTrueBB, IfFalseBB); + if (!C) { + // br (wc()), ... form + IRBuilder<> B(WidenableBR); + WidenableBR->setCondition(B.CreateAnd(NewCond, WC->get())); + } else { + // br (wc & C), ... form + Instruction *WCAnd = cast<Instruction>(WidenableBR->getCondition()); + // Condition is only guaranteed to dominate branch + WCAnd->moveBefore(WidenableBR); + C->set(NewCond); + } + assert(isWidenableBranch(WidenableBR) && "preserve widenabiliy"); +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/HelloWorld.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/HelloWorld.cpp new file mode 100644 index 0000000000..7019e9e445 --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/HelloWorld.cpp @@ -0,0 +1,17 @@ +//===-- HelloWorld.cpp - Example Transformations --------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/HelloWorld.h" + +using namespace llvm; + +PreservedAnalyses HelloWorldPass::run(Function &F, + FunctionAnalysisManager &AM) { + errs() << F.getName() << "\n"; + return PreservedAnalyses::all(); +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/InjectTLIMappings.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/InjectTLIMappings.cpp new file mode 100644 index 0000000000..55bcb6f3b1 --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/InjectTLIMappings.cpp @@ -0,0 +1,176 @@ +//===- InjectTLIMAppings.cpp - TLI to VFABI attribute injection ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Populates the VFABI attribute with the scalar-to-vector mappings +// from the TargetLibraryInfo. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/InjectTLIMappings.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/DemandedBits.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/VectorUtils.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/Transforms/Utils.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" + +using namespace llvm; + +#define DEBUG_TYPE "inject-tli-mappings" + +STATISTIC(NumCallInjected, + "Number of calls in which the mappings have been injected."); + +STATISTIC(NumVFDeclAdded, + "Number of function declarations that have been added."); +STATISTIC(NumCompUsedAdded, + "Number of `@llvm.compiler.used` operands that have been added."); + +/// A helper function that adds the vector function declaration that +/// vectorizes the CallInst CI with a vectorization factor of VF +/// lanes. The TLI assumes that all parameters and the return type of +/// CI (other than void) need to be widened to a VectorType of VF +/// lanes. +static void addVariantDeclaration(CallInst &CI, const ElementCount &VF, + const StringRef VFName) { + Module *M = CI.getModule(); + + // Add function declaration. + Type *RetTy = ToVectorTy(CI.getType(), VF); + SmallVector<Type *, 4> Tys; + for (Value *ArgOperand : CI.args()) + Tys.push_back(ToVectorTy(ArgOperand->getType(), VF)); + assert(!CI.getFunctionType()->isVarArg() && + "VarArg functions are not supported."); + FunctionType *FTy = FunctionType::get(RetTy, Tys, /*isVarArg=*/false); + Function *VectorF = + Function::Create(FTy, Function::ExternalLinkage, VFName, M); + VectorF->copyAttributesFrom(CI.getCalledFunction()); + ++NumVFDeclAdded; + LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Added to the module: `" << VFName + << "` of type " << *(VectorF->getType()) << "\n"); + + // Make function declaration (without a body) "sticky" in the IR by + // listing it in the @llvm.compiler.used intrinsic. + assert(!VectorF->size() && "VFABI attribute requires `@llvm.compiler.used` " + "only on declarations."); + appendToCompilerUsed(*M, {VectorF}); + LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Adding `" << VFName + << "` to `@llvm.compiler.used`.\n"); + ++NumCompUsedAdded; +} + +static void addMappingsFromTLI(const TargetLibraryInfo &TLI, CallInst &CI) { + // This is needed to make sure we don't query the TLI for calls to + // bitcast of function pointers, like `%call = call i32 (i32*, ...) + // bitcast (i32 (...)* @goo to i32 (i32*, ...)*)(i32* nonnull %i)`, + // as such calls make the `isFunctionVectorizable` raise an + // exception. + if (CI.isNoBuiltin() || !CI.getCalledFunction()) + return; + + StringRef ScalarName = CI.getCalledFunction()->getName(); + + // Nothing to be done if the TLI thinks the function is not + // vectorizable. + if (!TLI.isFunctionVectorizable(ScalarName)) + return; + SmallVector<std::string, 8> Mappings; + VFABI::getVectorVariantNames(CI, Mappings); + Module *M = CI.getModule(); + const SetVector<StringRef> OriginalSetOfMappings(Mappings.begin(), + Mappings.end()); + + auto AddVariantDecl = [&](const ElementCount &VF) { + const std::string TLIName = + std::string(TLI.getVectorizedFunction(ScalarName, VF)); + if (!TLIName.empty()) { + std::string MangledName = + VFABI::mangleTLIVectorName(TLIName, ScalarName, CI.arg_size(), VF); + if (!OriginalSetOfMappings.count(MangledName)) { + Mappings.push_back(MangledName); + ++NumCallInjected; + } + Function *VariantF = M->getFunction(TLIName); + if (!VariantF) + addVariantDeclaration(CI, VF, TLIName); + } + }; + + // All VFs in the TLI are powers of 2. + ElementCount WidestFixedVF, WidestScalableVF; + TLI.getWidestVF(ScalarName, WidestFixedVF, WidestScalableVF); + + for (ElementCount VF = ElementCount::getFixed(2); + ElementCount::isKnownLE(VF, WidestFixedVF); VF *= 2) + AddVariantDecl(VF); + + // TODO: Add scalable variants once we're able to test them. + assert(WidestScalableVF.isZero() && + "Scalable vector mappings not yet supported"); + + VFABI::setVectorVariantNames(&CI, Mappings); +} + +static bool runImpl(const TargetLibraryInfo &TLI, Function &F) { + for (auto &I : instructions(F)) + if (auto CI = dyn_cast<CallInst>(&I)) + addMappingsFromTLI(TLI, *CI); + // Even if the pass adds IR attributes, the analyses are preserved. + return false; +} + +//////////////////////////////////////////////////////////////////////////////// +// New pass manager implementation. +//////////////////////////////////////////////////////////////////////////////// +PreservedAnalyses InjectTLIMappings::run(Function &F, + FunctionAnalysisManager &AM) { + const TargetLibraryInfo &TLI = AM.getResult<TargetLibraryAnalysis>(F); + runImpl(TLI, F); + // Even if the pass adds IR attributes, the analyses are preserved. + return PreservedAnalyses::all(); +} + +//////////////////////////////////////////////////////////////////////////////// +// Legacy PM Implementation. +//////////////////////////////////////////////////////////////////////////////// +bool InjectTLIMappingsLegacy::runOnFunction(Function &F) { + const TargetLibraryInfo &TLI = + getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); + return runImpl(TLI, F); +} + +void InjectTLIMappingsLegacy::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); + AU.addPreserved<TargetLibraryInfoWrapperPass>(); + AU.addPreserved<ScalarEvolutionWrapperPass>(); + AU.addPreserved<AAResultsWrapperPass>(); + AU.addPreserved<LoopAccessLegacyAnalysis>(); + AU.addPreserved<DemandedBitsWrapperPass>(); + AU.addPreserved<OptimizationRemarkEmitterWrapperPass>(); + AU.addPreserved<GlobalsAAWrapperPass>(); +} + +//////////////////////////////////////////////////////////////////////////////// +// Legacy Pass manager initialization +//////////////////////////////////////////////////////////////////////////////// +char InjectTLIMappingsLegacy::ID = 0; + +INITIALIZE_PASS_BEGIN(InjectTLIMappingsLegacy, DEBUG_TYPE, + "Inject TLI Mappings", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_END(InjectTLIMappingsLegacy, DEBUG_TYPE, "Inject TLI Mappings", + false, false) + +FunctionPass *llvm::createInjectTLIMappingsLegacyPass() { + return new InjectTLIMappingsLegacy(); +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/InlineFunction.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/InlineFunction.cpp new file mode 100644 index 0000000000..399c9a4379 --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/InlineFunction.cpp @@ -0,0 +1,2915 @@ +//===- InlineFunction.cpp - Code to perform function inlining -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements inlining of a function into a call site, resolving +// parameters and the return value as appropriate. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/EHPersonalities.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/MemoryProfileInfo.h" +#include "llvm/Analysis/ObjCARCAnalysisUtils.h" +#include "llvm/Analysis/ObjCARCUtil.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/VectorUtils.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Transforms/Utils/AssumeBundleBuilder.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/ValueMapper.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <iterator> +#include <limits> +#include <optional> +#include <string> +#include <utility> +#include <vector> + +#define DEBUG_TYPE "inline-function" + +using namespace llvm; +using namespace llvm::memprof; +using ProfileCount = Function::ProfileCount; + +static cl::opt<bool> +EnableNoAliasConversion("enable-noalias-to-md-conversion", cl::init(true), + cl::Hidden, + cl::desc("Convert noalias attributes to metadata during inlining.")); + +static cl::opt<bool> + UseNoAliasIntrinsic("use-noalias-intrinsic-during-inlining", cl::Hidden, + cl::init(true), + cl::desc("Use the llvm.experimental.noalias.scope.decl " + "intrinsic during inlining.")); + +// Disabled by default, because the added alignment assumptions may increase +// compile-time and block optimizations. This option is not suitable for use +// with frontends that emit comprehensive parameter alignment annotations. +static cl::opt<bool> +PreserveAlignmentAssumptions("preserve-alignment-assumptions-during-inlining", + cl::init(false), cl::Hidden, + cl::desc("Convert align attributes to assumptions during inlining.")); + +static cl::opt<bool> UpdateReturnAttributes( + "update-return-attrs", cl::init(true), cl::Hidden, + cl::desc("Update return attributes on calls within inlined body")); + +static cl::opt<unsigned> InlinerAttributeWindow( + "max-inst-checked-for-throw-during-inlining", cl::Hidden, + cl::desc("the maximum number of instructions analyzed for may throw during " + "attribute inference in inlined body"), + cl::init(4)); + +namespace { + + /// A class for recording information about inlining a landing pad. + class LandingPadInliningInfo { + /// Destination of the invoke's unwind. + BasicBlock *OuterResumeDest; + + /// Destination for the callee's resume. + BasicBlock *InnerResumeDest = nullptr; + + /// LandingPadInst associated with the invoke. + LandingPadInst *CallerLPad = nullptr; + + /// PHI for EH values from landingpad insts. + PHINode *InnerEHValuesPHI = nullptr; + + SmallVector<Value*, 8> UnwindDestPHIValues; + + public: + LandingPadInliningInfo(InvokeInst *II) + : OuterResumeDest(II->getUnwindDest()) { + // If there are PHI nodes in the unwind destination block, we need to keep + // track of which values came into them from the invoke before removing + // the edge from this block. + BasicBlock *InvokeBB = II->getParent(); + BasicBlock::iterator I = OuterResumeDest->begin(); + for (; isa<PHINode>(I); ++I) { + // Save the value to use for this edge. + PHINode *PHI = cast<PHINode>(I); + UnwindDestPHIValues.push_back(PHI->getIncomingValueForBlock(InvokeBB)); + } + + CallerLPad = cast<LandingPadInst>(I); + } + + /// The outer unwind destination is the target of + /// unwind edges introduced for calls within the inlined function. + BasicBlock *getOuterResumeDest() const { + return OuterResumeDest; + } + + BasicBlock *getInnerResumeDest(); + + LandingPadInst *getLandingPadInst() const { return CallerLPad; } + + /// Forward the 'resume' instruction to the caller's landing pad block. + /// When the landing pad block has only one predecessor, this is + /// a simple branch. When there is more than one predecessor, we need to + /// split the landing pad block after the landingpad instruction and jump + /// to there. + void forwardResume(ResumeInst *RI, + SmallPtrSetImpl<LandingPadInst*> &InlinedLPads); + + /// Add incoming-PHI values to the unwind destination block for the given + /// basic block, using the values for the original invoke's source block. + void addIncomingPHIValuesFor(BasicBlock *BB) const { + addIncomingPHIValuesForInto(BB, OuterResumeDest); + } + + void addIncomingPHIValuesForInto(BasicBlock *src, BasicBlock *dest) const { + BasicBlock::iterator I = dest->begin(); + for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) { + PHINode *phi = cast<PHINode>(I); + phi->addIncoming(UnwindDestPHIValues[i], src); + } + } + }; + +} // end anonymous namespace + +/// Get or create a target for the branch from ResumeInsts. +BasicBlock *LandingPadInliningInfo::getInnerResumeDest() { + if (InnerResumeDest) return InnerResumeDest; + + // Split the landing pad. + BasicBlock::iterator SplitPoint = ++CallerLPad->getIterator(); + InnerResumeDest = + OuterResumeDest->splitBasicBlock(SplitPoint, + OuterResumeDest->getName() + ".body"); + + // The number of incoming edges we expect to the inner landing pad. + const unsigned PHICapacity = 2; + + // Create corresponding new PHIs for all the PHIs in the outer landing pad. + Instruction *InsertPoint = &InnerResumeDest->front(); + BasicBlock::iterator I = OuterResumeDest->begin(); + for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) { + PHINode *OuterPHI = cast<PHINode>(I); + PHINode *InnerPHI = PHINode::Create(OuterPHI->getType(), PHICapacity, + OuterPHI->getName() + ".lpad-body", + InsertPoint); + OuterPHI->replaceAllUsesWith(InnerPHI); + InnerPHI->addIncoming(OuterPHI, OuterResumeDest); + } + + // Create a PHI for the exception values. + InnerEHValuesPHI = PHINode::Create(CallerLPad->getType(), PHICapacity, + "eh.lpad-body", InsertPoint); + CallerLPad->replaceAllUsesWith(InnerEHValuesPHI); + InnerEHValuesPHI->addIncoming(CallerLPad, OuterResumeDest); + + // All done. + return InnerResumeDest; +} + +/// Forward the 'resume' instruction to the caller's landing pad block. +/// When the landing pad block has only one predecessor, this is a simple +/// branch. When there is more than one predecessor, we need to split the +/// landing pad block after the landingpad instruction and jump to there. +void LandingPadInliningInfo::forwardResume( + ResumeInst *RI, SmallPtrSetImpl<LandingPadInst *> &InlinedLPads) { + BasicBlock *Dest = getInnerResumeDest(); + BasicBlock *Src = RI->getParent(); + + BranchInst::Create(Dest, Src); + + // Update the PHIs in the destination. They were inserted in an order which + // makes this work. + addIncomingPHIValuesForInto(Src, Dest); + + InnerEHValuesPHI->addIncoming(RI->getOperand(0), Src); + RI->eraseFromParent(); +} + +/// Helper for getUnwindDestToken/getUnwindDestTokenHelper. +static Value *getParentPad(Value *EHPad) { + if (auto *FPI = dyn_cast<FuncletPadInst>(EHPad)) + return FPI->getParentPad(); + return cast<CatchSwitchInst>(EHPad)->getParentPad(); +} + +using UnwindDestMemoTy = DenseMap<Instruction *, Value *>; + +/// Helper for getUnwindDestToken that does the descendant-ward part of +/// the search. +static Value *getUnwindDestTokenHelper(Instruction *EHPad, + UnwindDestMemoTy &MemoMap) { + SmallVector<Instruction *, 8> Worklist(1, EHPad); + + while (!Worklist.empty()) { + Instruction *CurrentPad = Worklist.pop_back_val(); + // We only put pads on the worklist that aren't in the MemoMap. When + // we find an unwind dest for a pad we may update its ancestors, but + // the queue only ever contains uncles/great-uncles/etc. of CurrentPad, + // so they should never get updated while queued on the worklist. + assert(!MemoMap.count(CurrentPad)); + Value *UnwindDestToken = nullptr; + if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(CurrentPad)) { + if (CatchSwitch->hasUnwindDest()) { + UnwindDestToken = CatchSwitch->getUnwindDest()->getFirstNonPHI(); + } else { + // Catchswitch doesn't have a 'nounwind' variant, and one might be + // annotated as "unwinds to caller" when really it's nounwind (see + // e.g. SimplifyCFGOpt::SimplifyUnreachable), so we can't infer the + // parent's unwind dest from this. We can check its catchpads' + // descendants, since they might include a cleanuppad with an + // "unwinds to caller" cleanupret, which can be trusted. + for (auto HI = CatchSwitch->handler_begin(), + HE = CatchSwitch->handler_end(); + HI != HE && !UnwindDestToken; ++HI) { + BasicBlock *HandlerBlock = *HI; + auto *CatchPad = cast<CatchPadInst>(HandlerBlock->getFirstNonPHI()); + for (User *Child : CatchPad->users()) { + // Intentionally ignore invokes here -- since the catchswitch is + // marked "unwind to caller", it would be a verifier error if it + // contained an invoke which unwinds out of it, so any invoke we'd + // encounter must unwind to some child of the catch. + if (!isa<CleanupPadInst>(Child) && !isa<CatchSwitchInst>(Child)) + continue; + + Instruction *ChildPad = cast<Instruction>(Child); + auto Memo = MemoMap.find(ChildPad); + if (Memo == MemoMap.end()) { + // Haven't figured out this child pad yet; queue it. + Worklist.push_back(ChildPad); + continue; + } + // We've already checked this child, but might have found that + // it offers no proof either way. + Value *ChildUnwindDestToken = Memo->second; + if (!ChildUnwindDestToken) + continue; + // We already know the child's unwind dest, which can either + // be ConstantTokenNone to indicate unwind to caller, or can + // be another child of the catchpad. Only the former indicates + // the unwind dest of the catchswitch. + if (isa<ConstantTokenNone>(ChildUnwindDestToken)) { + UnwindDestToken = ChildUnwindDestToken; + break; + } + assert(getParentPad(ChildUnwindDestToken) == CatchPad); + } + } + } + } else { + auto *CleanupPad = cast<CleanupPadInst>(CurrentPad); + for (User *U : CleanupPad->users()) { + if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(U)) { + if (BasicBlock *RetUnwindDest = CleanupRet->getUnwindDest()) + UnwindDestToken = RetUnwindDest->getFirstNonPHI(); + else + UnwindDestToken = ConstantTokenNone::get(CleanupPad->getContext()); + break; + } + Value *ChildUnwindDestToken; + if (auto *Invoke = dyn_cast<InvokeInst>(U)) { + ChildUnwindDestToken = Invoke->getUnwindDest()->getFirstNonPHI(); + } else if (isa<CleanupPadInst>(U) || isa<CatchSwitchInst>(U)) { + Instruction *ChildPad = cast<Instruction>(U); + auto Memo = MemoMap.find(ChildPad); + if (Memo == MemoMap.end()) { + // Haven't resolved this child yet; queue it and keep searching. + Worklist.push_back(ChildPad); + continue; + } + // We've checked this child, but still need to ignore it if it + // had no proof either way. + ChildUnwindDestToken = Memo->second; + if (!ChildUnwindDestToken) + continue; + } else { + // Not a relevant user of the cleanuppad + continue; + } + // In a well-formed program, the child/invoke must either unwind to + // an(other) child of the cleanup, or exit the cleanup. In the + // first case, continue searching. + if (isa<Instruction>(ChildUnwindDestToken) && + getParentPad(ChildUnwindDestToken) == CleanupPad) + continue; + UnwindDestToken = ChildUnwindDestToken; + break; + } + } + // If we haven't found an unwind dest for CurrentPad, we may have queued its + // children, so move on to the next in the worklist. + if (!UnwindDestToken) + continue; + + // Now we know that CurrentPad unwinds to UnwindDestToken. It also exits + // any ancestors of CurrentPad up to but not including UnwindDestToken's + // parent pad. Record this in the memo map, and check to see if the + // original EHPad being queried is one of the ones exited. + Value *UnwindParent; + if (auto *UnwindPad = dyn_cast<Instruction>(UnwindDestToken)) + UnwindParent = getParentPad(UnwindPad); + else + UnwindParent = nullptr; + bool ExitedOriginalPad = false; + for (Instruction *ExitedPad = CurrentPad; + ExitedPad && ExitedPad != UnwindParent; + ExitedPad = dyn_cast<Instruction>(getParentPad(ExitedPad))) { + // Skip over catchpads since they just follow their catchswitches. + if (isa<CatchPadInst>(ExitedPad)) + continue; + MemoMap[ExitedPad] = UnwindDestToken; + ExitedOriginalPad |= (ExitedPad == EHPad); + } + + if (ExitedOriginalPad) + return UnwindDestToken; + + // Continue the search. + } + + // No definitive information is contained within this funclet. + return nullptr; +} + +/// Given an EH pad, find where it unwinds. If it unwinds to an EH pad, +/// return that pad instruction. If it unwinds to caller, return +/// ConstantTokenNone. If it does not have a definitive unwind destination, +/// return nullptr. +/// +/// This routine gets invoked for calls in funclets in inlinees when inlining +/// an invoke. Since many funclets don't have calls inside them, it's queried +/// on-demand rather than building a map of pads to unwind dests up front. +/// Determining a funclet's unwind dest may require recursively searching its +/// descendants, and also ancestors and cousins if the descendants don't provide +/// an answer. Since most funclets will have their unwind dest immediately +/// available as the unwind dest of a catchswitch or cleanupret, this routine +/// searches top-down from the given pad and then up. To avoid worst-case +/// quadratic run-time given that approach, it uses a memo map to avoid +/// re-processing funclet trees. The callers that rewrite the IR as they go +/// take advantage of this, for correctness, by checking/forcing rewritten +/// pads' entries to match the original callee view. +static Value *getUnwindDestToken(Instruction *EHPad, + UnwindDestMemoTy &MemoMap) { + // Catchpads unwind to the same place as their catchswitch; + // redirct any queries on catchpads so the code below can + // deal with just catchswitches and cleanuppads. + if (auto *CPI = dyn_cast<CatchPadInst>(EHPad)) + EHPad = CPI->getCatchSwitch(); + + // Check if we've already determined the unwind dest for this pad. + auto Memo = MemoMap.find(EHPad); + if (Memo != MemoMap.end()) + return Memo->second; + + // Search EHPad and, if necessary, its descendants. + Value *UnwindDestToken = getUnwindDestTokenHelper(EHPad, MemoMap); + assert((UnwindDestToken == nullptr) != (MemoMap.count(EHPad) != 0)); + if (UnwindDestToken) + return UnwindDestToken; + + // No information is available for this EHPad from itself or any of its + // descendants. An unwind all the way out to a pad in the caller would + // need also to agree with the unwind dest of the parent funclet, so + // search up the chain to try to find a funclet with information. Put + // null entries in the memo map to avoid re-processing as we go up. + MemoMap[EHPad] = nullptr; +#ifndef NDEBUG + SmallPtrSet<Instruction *, 4> TempMemos; + TempMemos.insert(EHPad); +#endif + Instruction *LastUselessPad = EHPad; + Value *AncestorToken; + for (AncestorToken = getParentPad(EHPad); + auto *AncestorPad = dyn_cast<Instruction>(AncestorToken); + AncestorToken = getParentPad(AncestorToken)) { + // Skip over catchpads since they just follow their catchswitches. + if (isa<CatchPadInst>(AncestorPad)) + continue; + // If the MemoMap had an entry mapping AncestorPad to nullptr, since we + // haven't yet called getUnwindDestTokenHelper for AncestorPad in this + // call to getUnwindDestToken, that would mean that AncestorPad had no + // information in itself, its descendants, or its ancestors. If that + // were the case, then we should also have recorded the lack of information + // for the descendant that we're coming from. So assert that we don't + // find a null entry in the MemoMap for AncestorPad. + assert(!MemoMap.count(AncestorPad) || MemoMap[AncestorPad]); + auto AncestorMemo = MemoMap.find(AncestorPad); + if (AncestorMemo == MemoMap.end()) { + UnwindDestToken = getUnwindDestTokenHelper(AncestorPad, MemoMap); + } else { + UnwindDestToken = AncestorMemo->second; + } + if (UnwindDestToken) + break; + LastUselessPad = AncestorPad; + MemoMap[LastUselessPad] = nullptr; +#ifndef NDEBUG + TempMemos.insert(LastUselessPad); +#endif + } + + // We know that getUnwindDestTokenHelper was called on LastUselessPad and + // returned nullptr (and likewise for EHPad and any of its ancestors up to + // LastUselessPad), so LastUselessPad has no information from below. Since + // getUnwindDestTokenHelper must investigate all downward paths through + // no-information nodes to prove that a node has no information like this, + // and since any time it finds information it records it in the MemoMap for + // not just the immediately-containing funclet but also any ancestors also + // exited, it must be the case that, walking downward from LastUselessPad, + // visiting just those nodes which have not been mapped to an unwind dest + // by getUnwindDestTokenHelper (the nullptr TempMemos notwithstanding, since + // they are just used to keep getUnwindDestTokenHelper from repeating work), + // any node visited must have been exhaustively searched with no information + // for it found. + SmallVector<Instruction *, 8> Worklist(1, LastUselessPad); + while (!Worklist.empty()) { + Instruction *UselessPad = Worklist.pop_back_val(); + auto Memo = MemoMap.find(UselessPad); + if (Memo != MemoMap.end() && Memo->second) { + // Here the name 'UselessPad' is a bit of a misnomer, because we've found + // that it is a funclet that does have information about unwinding to + // a particular destination; its parent was a useless pad. + // Since its parent has no information, the unwind edge must not escape + // the parent, and must target a sibling of this pad. This local unwind + // gives us no information about EHPad. Leave it and the subtree rooted + // at it alone. + assert(getParentPad(Memo->second) == getParentPad(UselessPad)); + continue; + } + // We know we don't have information for UselesPad. If it has an entry in + // the MemoMap (mapping it to nullptr), it must be one of the TempMemos + // added on this invocation of getUnwindDestToken; if a previous invocation + // recorded nullptr, it would have had to prove that the ancestors of + // UselessPad, which include LastUselessPad, had no information, and that + // in turn would have required proving that the descendants of + // LastUselesPad, which include EHPad, have no information about + // LastUselessPad, which would imply that EHPad was mapped to nullptr in + // the MemoMap on that invocation, which isn't the case if we got here. + assert(!MemoMap.count(UselessPad) || TempMemos.count(UselessPad)); + // Assert as we enumerate users that 'UselessPad' doesn't have any unwind + // information that we'd be contradicting by making a map entry for it + // (which is something that getUnwindDestTokenHelper must have proved for + // us to get here). Just assert on is direct users here; the checks in + // this downward walk at its descendants will verify that they don't have + // any unwind edges that exit 'UselessPad' either (i.e. they either have no + // unwind edges or unwind to a sibling). + MemoMap[UselessPad] = UnwindDestToken; + if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(UselessPad)) { + assert(CatchSwitch->getUnwindDest() == nullptr && "Expected useless pad"); + for (BasicBlock *HandlerBlock : CatchSwitch->handlers()) { + auto *CatchPad = HandlerBlock->getFirstNonPHI(); + for (User *U : CatchPad->users()) { + assert( + (!isa<InvokeInst>(U) || + (getParentPad( + cast<InvokeInst>(U)->getUnwindDest()->getFirstNonPHI()) == + CatchPad)) && + "Expected useless pad"); + if (isa<CatchSwitchInst>(U) || isa<CleanupPadInst>(U)) + Worklist.push_back(cast<Instruction>(U)); + } + } + } else { + assert(isa<CleanupPadInst>(UselessPad)); + for (User *U : UselessPad->users()) { + assert(!isa<CleanupReturnInst>(U) && "Expected useless pad"); + assert((!isa<InvokeInst>(U) || + (getParentPad( + cast<InvokeInst>(U)->getUnwindDest()->getFirstNonPHI()) == + UselessPad)) && + "Expected useless pad"); + if (isa<CatchSwitchInst>(U) || isa<CleanupPadInst>(U)) + Worklist.push_back(cast<Instruction>(U)); + } + } + } + + return UnwindDestToken; +} + +/// When we inline a basic block into an invoke, +/// we have to turn all of the calls that can throw into invokes. +/// This function analyze BB to see if there are any calls, and if so, +/// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI +/// nodes in that block with the values specified in InvokeDestPHIValues. +static BasicBlock *HandleCallsInBlockInlinedThroughInvoke( + BasicBlock *BB, BasicBlock *UnwindEdge, + UnwindDestMemoTy *FuncletUnwindMap = nullptr) { + for (Instruction &I : llvm::make_early_inc_range(*BB)) { + // We only need to check for function calls: inlined invoke + // instructions require no special handling. + CallInst *CI = dyn_cast<CallInst>(&I); + + if (!CI || CI->doesNotThrow()) + continue; + + // We do not need to (and in fact, cannot) convert possibly throwing calls + // to @llvm.experimental_deoptimize (resp. @llvm.experimental.guard) into + // invokes. The caller's "segment" of the deoptimization continuation + // attached to the newly inlined @llvm.experimental_deoptimize + // (resp. @llvm.experimental.guard) call should contain the exception + // handling logic, if any. + if (auto *F = CI->getCalledFunction()) + if (F->getIntrinsicID() == Intrinsic::experimental_deoptimize || + F->getIntrinsicID() == Intrinsic::experimental_guard) + continue; + + if (auto FuncletBundle = CI->getOperandBundle(LLVMContext::OB_funclet)) { + // This call is nested inside a funclet. If that funclet has an unwind + // destination within the inlinee, then unwinding out of this call would + // be UB. Rewriting this call to an invoke which targets the inlined + // invoke's unwind dest would give the call's parent funclet multiple + // unwind destinations, which is something that subsequent EH table + // generation can't handle and that the veirifer rejects. So when we + // see such a call, leave it as a call. + auto *FuncletPad = cast<Instruction>(FuncletBundle->Inputs[0]); + Value *UnwindDestToken = + getUnwindDestToken(FuncletPad, *FuncletUnwindMap); + if (UnwindDestToken && !isa<ConstantTokenNone>(UnwindDestToken)) + continue; +#ifndef NDEBUG + Instruction *MemoKey; + if (auto *CatchPad = dyn_cast<CatchPadInst>(FuncletPad)) + MemoKey = CatchPad->getCatchSwitch(); + else + MemoKey = FuncletPad; + assert(FuncletUnwindMap->count(MemoKey) && + (*FuncletUnwindMap)[MemoKey] == UnwindDestToken && + "must get memoized to avoid confusing later searches"); +#endif // NDEBUG + } + + changeToInvokeAndSplitBasicBlock(CI, UnwindEdge); + return BB; + } + return nullptr; +} + +/// If we inlined an invoke site, we need to convert calls +/// in the body of the inlined function into invokes. +/// +/// II is the invoke instruction being inlined. FirstNewBlock is the first +/// block of the inlined code (the last block is the end of the function), +/// and InlineCodeInfo is information about the code that got inlined. +static void HandleInlinedLandingPad(InvokeInst *II, BasicBlock *FirstNewBlock, + ClonedCodeInfo &InlinedCodeInfo) { + BasicBlock *InvokeDest = II->getUnwindDest(); + + Function *Caller = FirstNewBlock->getParent(); + + // The inlined code is currently at the end of the function, scan from the + // start of the inlined code to its end, checking for stuff we need to + // rewrite. + LandingPadInliningInfo Invoke(II); + + // Get all of the inlined landing pad instructions. + SmallPtrSet<LandingPadInst*, 16> InlinedLPads; + for (Function::iterator I = FirstNewBlock->getIterator(), E = Caller->end(); + I != E; ++I) + if (InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) + InlinedLPads.insert(II->getLandingPadInst()); + + // Append the clauses from the outer landing pad instruction into the inlined + // landing pad instructions. + LandingPadInst *OuterLPad = Invoke.getLandingPadInst(); + for (LandingPadInst *InlinedLPad : InlinedLPads) { + unsigned OuterNum = OuterLPad->getNumClauses(); + InlinedLPad->reserveClauses(OuterNum); + for (unsigned OuterIdx = 0; OuterIdx != OuterNum; ++OuterIdx) + InlinedLPad->addClause(OuterLPad->getClause(OuterIdx)); + if (OuterLPad->isCleanup()) + InlinedLPad->setCleanup(true); + } + + for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end(); + BB != E; ++BB) { + if (InlinedCodeInfo.ContainsCalls) + if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke( + &*BB, Invoke.getOuterResumeDest())) + // Update any PHI nodes in the exceptional block to indicate that there + // is now a new entry in them. + Invoke.addIncomingPHIValuesFor(NewBB); + + // Forward any resumes that are remaining here. + if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator())) + Invoke.forwardResume(RI, InlinedLPads); + } + + // Now that everything is happy, we have one final detail. The PHI nodes in + // the exception destination block still have entries due to the original + // invoke instruction. Eliminate these entries (which might even delete the + // PHI node) now. + InvokeDest->removePredecessor(II->getParent()); +} + +/// If we inlined an invoke site, we need to convert calls +/// in the body of the inlined function into invokes. +/// +/// II is the invoke instruction being inlined. FirstNewBlock is the first +/// block of the inlined code (the last block is the end of the function), +/// and InlineCodeInfo is information about the code that got inlined. +static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock, + ClonedCodeInfo &InlinedCodeInfo) { + BasicBlock *UnwindDest = II->getUnwindDest(); + Function *Caller = FirstNewBlock->getParent(); + + assert(UnwindDest->getFirstNonPHI()->isEHPad() && "unexpected BasicBlock!"); + + // If there are PHI nodes in the unwind destination block, we need to keep + // track of which values came into them from the invoke before removing the + // edge from this block. + SmallVector<Value *, 8> UnwindDestPHIValues; + BasicBlock *InvokeBB = II->getParent(); + for (PHINode &PHI : UnwindDest->phis()) { + // Save the value to use for this edge. + UnwindDestPHIValues.push_back(PHI.getIncomingValueForBlock(InvokeBB)); + } + + // Add incoming-PHI values to the unwind destination block for the given basic + // block, using the values for the original invoke's source block. + auto UpdatePHINodes = [&](BasicBlock *Src) { + BasicBlock::iterator I = UnwindDest->begin(); + for (Value *V : UnwindDestPHIValues) { + PHINode *PHI = cast<PHINode>(I); + PHI->addIncoming(V, Src); + ++I; + } + }; + + // This connects all the instructions which 'unwind to caller' to the invoke + // destination. + UnwindDestMemoTy FuncletUnwindMap; + for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end(); + BB != E; ++BB) { + if (auto *CRI = dyn_cast<CleanupReturnInst>(BB->getTerminator())) { + if (CRI->unwindsToCaller()) { + auto *CleanupPad = CRI->getCleanupPad(); + CleanupReturnInst::Create(CleanupPad, UnwindDest, CRI); + CRI->eraseFromParent(); + UpdatePHINodes(&*BB); + // Finding a cleanupret with an unwind destination would confuse + // subsequent calls to getUnwindDestToken, so map the cleanuppad + // to short-circuit any such calls and recognize this as an "unwind + // to caller" cleanup. + assert(!FuncletUnwindMap.count(CleanupPad) || + isa<ConstantTokenNone>(FuncletUnwindMap[CleanupPad])); + FuncletUnwindMap[CleanupPad] = + ConstantTokenNone::get(Caller->getContext()); + } + } + + Instruction *I = BB->getFirstNonPHI(); + if (!I->isEHPad()) + continue; + + Instruction *Replacement = nullptr; + if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(I)) { + if (CatchSwitch->unwindsToCaller()) { + Value *UnwindDestToken; + if (auto *ParentPad = + dyn_cast<Instruction>(CatchSwitch->getParentPad())) { + // This catchswitch is nested inside another funclet. If that + // funclet has an unwind destination within the inlinee, then + // unwinding out of this catchswitch would be UB. Rewriting this + // catchswitch to unwind to the inlined invoke's unwind dest would + // give the parent funclet multiple unwind destinations, which is + // something that subsequent EH table generation can't handle and + // that the veirifer rejects. So when we see such a call, leave it + // as "unwind to caller". + UnwindDestToken = getUnwindDestToken(ParentPad, FuncletUnwindMap); + if (UnwindDestToken && !isa<ConstantTokenNone>(UnwindDestToken)) + continue; + } else { + // This catchswitch has no parent to inherit constraints from, and + // none of its descendants can have an unwind edge that exits it and + // targets another funclet in the inlinee. It may or may not have a + // descendant that definitively has an unwind to caller. In either + // case, we'll have to assume that any unwinds out of it may need to + // be routed to the caller, so treat it as though it has a definitive + // unwind to caller. + UnwindDestToken = ConstantTokenNone::get(Caller->getContext()); + } + auto *NewCatchSwitch = CatchSwitchInst::Create( + CatchSwitch->getParentPad(), UnwindDest, + CatchSwitch->getNumHandlers(), CatchSwitch->getName(), + CatchSwitch); + for (BasicBlock *PadBB : CatchSwitch->handlers()) + NewCatchSwitch->addHandler(PadBB); + // Propagate info for the old catchswitch over to the new one in + // the unwind map. This also serves to short-circuit any subsequent + // checks for the unwind dest of this catchswitch, which would get + // confused if they found the outer handler in the callee. + FuncletUnwindMap[NewCatchSwitch] = UnwindDestToken; + Replacement = NewCatchSwitch; + } + } else if (!isa<FuncletPadInst>(I)) { + llvm_unreachable("unexpected EHPad!"); + } + + if (Replacement) { + Replacement->takeName(I); + I->replaceAllUsesWith(Replacement); + I->eraseFromParent(); + UpdatePHINodes(&*BB); + } + } + + if (InlinedCodeInfo.ContainsCalls) + for (Function::iterator BB = FirstNewBlock->getIterator(), + E = Caller->end(); + BB != E; ++BB) + if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke( + &*BB, UnwindDest, &FuncletUnwindMap)) + // Update any PHI nodes in the exceptional block to indicate that there + // is now a new entry in them. + UpdatePHINodes(NewBB); + + // Now that everything is happy, we have one final detail. The PHI nodes in + // the exception destination block still have entries due to the original + // invoke instruction. Eliminate these entries (which might even delete the + // PHI node) now. + UnwindDest->removePredecessor(InvokeBB); +} + +static bool haveCommonPrefix(MDNode *MIBStackContext, + MDNode *CallsiteStackContext) { + assert(MIBStackContext->getNumOperands() > 0 && + CallsiteStackContext->getNumOperands() > 0); + // Because of the context trimming performed during matching, the callsite + // context could have more stack ids than the MIB. We match up to the end of + // the shortest stack context. + for (auto MIBStackIter = MIBStackContext->op_begin(), + CallsiteStackIter = CallsiteStackContext->op_begin(); + MIBStackIter != MIBStackContext->op_end() && + CallsiteStackIter != CallsiteStackContext->op_end(); + MIBStackIter++, CallsiteStackIter++) { + auto *Val1 = mdconst::dyn_extract<ConstantInt>(*MIBStackIter); + auto *Val2 = mdconst::dyn_extract<ConstantInt>(*CallsiteStackIter); + assert(Val1 && Val2); + if (Val1->getZExtValue() != Val2->getZExtValue()) + return false; + } + return true; +} + +static void removeMemProfMetadata(CallBase *Call) { + Call->setMetadata(LLVMContext::MD_memprof, nullptr); +} + +static void removeCallsiteMetadata(CallBase *Call) { + Call->setMetadata(LLVMContext::MD_callsite, nullptr); +} + +static void updateMemprofMetadata(CallBase *CI, + const std::vector<Metadata *> &MIBList) { + assert(!MIBList.empty()); + // Remove existing memprof, which will either be replaced or may not be needed + // if we are able to use a single allocation type function attribute. + removeMemProfMetadata(CI); + CallStackTrie CallStack; + for (Metadata *MIB : MIBList) + CallStack.addCallStack(cast<MDNode>(MIB)); + bool MemprofMDAttached = CallStack.buildAndAttachMIBMetadata(CI); + assert(MemprofMDAttached == CI->hasMetadata(LLVMContext::MD_memprof)); + if (!MemprofMDAttached) + // If we used a function attribute remove the callsite metadata as well. + removeCallsiteMetadata(CI); +} + +// Update the metadata on the inlined copy ClonedCall of a call OrigCall in the +// inlined callee body, based on the callsite metadata InlinedCallsiteMD from +// the call that was inlined. +static void propagateMemProfHelper(const CallBase *OrigCall, + CallBase *ClonedCall, + MDNode *InlinedCallsiteMD) { + MDNode *OrigCallsiteMD = ClonedCall->getMetadata(LLVMContext::MD_callsite); + MDNode *ClonedCallsiteMD = nullptr; + // Check if the call originally had callsite metadata, and update it for the + // new call in the inlined body. + if (OrigCallsiteMD) { + // The cloned call's context is now the concatenation of the original call's + // callsite metadata and the callsite metadata on the call where it was + // inlined. + ClonedCallsiteMD = MDNode::concatenate(OrigCallsiteMD, InlinedCallsiteMD); + ClonedCall->setMetadata(LLVMContext::MD_callsite, ClonedCallsiteMD); + } + + // Update any memprof metadata on the cloned call. + MDNode *OrigMemProfMD = ClonedCall->getMetadata(LLVMContext::MD_memprof); + if (!OrigMemProfMD) + return; + // We currently expect that allocations with memprof metadata also have + // callsite metadata for the allocation's part of the context. + assert(OrigCallsiteMD); + + // New call's MIB list. + std::vector<Metadata *> NewMIBList; + + // For each MIB metadata, check if its call stack context starts with the + // new clone's callsite metadata. If so, that MIB goes onto the cloned call in + // the inlined body. If not, it stays on the out-of-line original call. + for (auto &MIBOp : OrigMemProfMD->operands()) { + MDNode *MIB = dyn_cast<MDNode>(MIBOp); + // Stack is first operand of MIB. + MDNode *StackMD = getMIBStackNode(MIB); + assert(StackMD); + // See if the new cloned callsite context matches this profiled context. + if (haveCommonPrefix(StackMD, ClonedCallsiteMD)) + // Add it to the cloned call's MIB list. + NewMIBList.push_back(MIB); + } + if (NewMIBList.empty()) { + removeMemProfMetadata(ClonedCall); + removeCallsiteMetadata(ClonedCall); + return; + } + if (NewMIBList.size() < OrigMemProfMD->getNumOperands()) + updateMemprofMetadata(ClonedCall, NewMIBList); +} + +// Update memprof related metadata (!memprof and !callsite) based on the +// inlining of Callee into the callsite at CB. The updates include merging the +// inlined callee's callsite metadata with that of the inlined call, +// and moving the subset of any memprof contexts to the inlined callee +// allocations if they match the new inlined call stack. +// FIXME: Replace memprof metadata with function attribute if all MIB end up +// having the same behavior. Do other context trimming/merging optimizations +// too. +static void +propagateMemProfMetadata(Function *Callee, CallBase &CB, + bool ContainsMemProfMetadata, + const ValueMap<const Value *, WeakTrackingVH> &VMap) { + MDNode *CallsiteMD = CB.getMetadata(LLVMContext::MD_callsite); + // Only need to update if the inlined callsite had callsite metadata, or if + // there was any memprof metadata inlined. + if (!CallsiteMD && !ContainsMemProfMetadata) + return; + + // Propagate metadata onto the cloned calls in the inlined callee. + for (const auto &Entry : VMap) { + // See if this is a call that has been inlined and remapped, and not + // simplified away in the process. + auto *OrigCall = dyn_cast_or_null<CallBase>(Entry.first); + auto *ClonedCall = dyn_cast_or_null<CallBase>(Entry.second); + if (!OrigCall || !ClonedCall) + continue; + // If the inlined callsite did not have any callsite metadata, then it isn't + // involved in any profiled call contexts, and we can remove any memprof + // metadata on the cloned call. + if (!CallsiteMD) { + removeMemProfMetadata(ClonedCall); + removeCallsiteMetadata(ClonedCall); + continue; + } + propagateMemProfHelper(OrigCall, ClonedCall, CallsiteMD); + } +} + +/// When inlining a call site that has !llvm.mem.parallel_loop_access, +/// !llvm.access.group, !alias.scope or !noalias metadata, that metadata should +/// be propagated to all memory-accessing cloned instructions. +static void PropagateCallSiteMetadata(CallBase &CB, Function::iterator FStart, + Function::iterator FEnd) { + MDNode *MemParallelLoopAccess = + CB.getMetadata(LLVMContext::MD_mem_parallel_loop_access); + MDNode *AccessGroup = CB.getMetadata(LLVMContext::MD_access_group); + MDNode *AliasScope = CB.getMetadata(LLVMContext::MD_alias_scope); + MDNode *NoAlias = CB.getMetadata(LLVMContext::MD_noalias); + if (!MemParallelLoopAccess && !AccessGroup && !AliasScope && !NoAlias) + return; + + for (BasicBlock &BB : make_range(FStart, FEnd)) { + for (Instruction &I : BB) { + // This metadata is only relevant for instructions that access memory. + if (!I.mayReadOrWriteMemory()) + continue; + + if (MemParallelLoopAccess) { + // TODO: This probably should not overwrite MemParalleLoopAccess. + MemParallelLoopAccess = MDNode::concatenate( + I.getMetadata(LLVMContext::MD_mem_parallel_loop_access), + MemParallelLoopAccess); + I.setMetadata(LLVMContext::MD_mem_parallel_loop_access, + MemParallelLoopAccess); + } + + if (AccessGroup) + I.setMetadata(LLVMContext::MD_access_group, uniteAccessGroups( + I.getMetadata(LLVMContext::MD_access_group), AccessGroup)); + + if (AliasScope) + I.setMetadata(LLVMContext::MD_alias_scope, MDNode::concatenate( + I.getMetadata(LLVMContext::MD_alias_scope), AliasScope)); + + if (NoAlias) + I.setMetadata(LLVMContext::MD_noalias, MDNode::concatenate( + I.getMetadata(LLVMContext::MD_noalias), NoAlias)); + } + } +} + +/// Bundle operands of the inlined function must be added to inlined call sites. +static void PropagateOperandBundles(Function::iterator InlinedBB, + Instruction *CallSiteEHPad) { + for (Instruction &II : llvm::make_early_inc_range(*InlinedBB)) { + CallBase *I = dyn_cast<CallBase>(&II); + if (!I) + continue; + // Skip call sites which already have a "funclet" bundle. + if (I->getOperandBundle(LLVMContext::OB_funclet)) + continue; + // Skip call sites which are nounwind intrinsics (as long as they don't + // lower into regular function calls in the course of IR transformations). + auto *CalledFn = + dyn_cast<Function>(I->getCalledOperand()->stripPointerCasts()); + if (CalledFn && CalledFn->isIntrinsic() && I->doesNotThrow() && + !IntrinsicInst::mayLowerToFunctionCall(CalledFn->getIntrinsicID())) + continue; + + SmallVector<OperandBundleDef, 1> OpBundles; + I->getOperandBundlesAsDefs(OpBundles); + OpBundles.emplace_back("funclet", CallSiteEHPad); + + Instruction *NewInst = CallBase::Create(I, OpBundles, I); + NewInst->takeName(I); + I->replaceAllUsesWith(NewInst); + I->eraseFromParent(); + } +} + +namespace { +/// Utility for cloning !noalias and !alias.scope metadata. When a code region +/// using scoped alias metadata is inlined, the aliasing relationships may not +/// hold between the two version. It is necessary to create a deep clone of the +/// metadata, putting the two versions in separate scope domains. +class ScopedAliasMetadataDeepCloner { + using MetadataMap = DenseMap<const MDNode *, TrackingMDNodeRef>; + SetVector<const MDNode *> MD; + MetadataMap MDMap; + void addRecursiveMetadataUses(); + +public: + ScopedAliasMetadataDeepCloner(const Function *F); + + /// Create a new clone of the scoped alias metadata, which will be used by + /// subsequent remap() calls. + void clone(); + + /// Remap instructions in the given range from the original to the cloned + /// metadata. + void remap(Function::iterator FStart, Function::iterator FEnd); +}; +} // namespace + +ScopedAliasMetadataDeepCloner::ScopedAliasMetadataDeepCloner( + const Function *F) { + for (const BasicBlock &BB : *F) { + for (const Instruction &I : BB) { + if (const MDNode *M = I.getMetadata(LLVMContext::MD_alias_scope)) + MD.insert(M); + if (const MDNode *M = I.getMetadata(LLVMContext::MD_noalias)) + MD.insert(M); + + // We also need to clone the metadata in noalias intrinsics. + if (const auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I)) + MD.insert(Decl->getScopeList()); + } + } + addRecursiveMetadataUses(); +} + +void ScopedAliasMetadataDeepCloner::addRecursiveMetadataUses() { + SmallVector<const Metadata *, 16> Queue(MD.begin(), MD.end()); + while (!Queue.empty()) { + const MDNode *M = cast<MDNode>(Queue.pop_back_val()); + for (const Metadata *Op : M->operands()) + if (const MDNode *OpMD = dyn_cast<MDNode>(Op)) + if (MD.insert(OpMD)) + Queue.push_back(OpMD); + } +} + +void ScopedAliasMetadataDeepCloner::clone() { + assert(MDMap.empty() && "clone() already called ?"); + + SmallVector<TempMDTuple, 16> DummyNodes; + for (const MDNode *I : MD) { + DummyNodes.push_back(MDTuple::getTemporary(I->getContext(), std::nullopt)); + MDMap[I].reset(DummyNodes.back().get()); + } + + // Create new metadata nodes to replace the dummy nodes, replacing old + // metadata references with either a dummy node or an already-created new + // node. + SmallVector<Metadata *, 4> NewOps; + for (const MDNode *I : MD) { + for (const Metadata *Op : I->operands()) { + if (const MDNode *M = dyn_cast<MDNode>(Op)) + NewOps.push_back(MDMap[M]); + else + NewOps.push_back(const_cast<Metadata *>(Op)); + } + + MDNode *NewM = MDNode::get(I->getContext(), NewOps); + MDTuple *TempM = cast<MDTuple>(MDMap[I]); + assert(TempM->isTemporary() && "Expected temporary node"); + + TempM->replaceAllUsesWith(NewM); + NewOps.clear(); + } +} + +void ScopedAliasMetadataDeepCloner::remap(Function::iterator FStart, + Function::iterator FEnd) { + if (MDMap.empty()) + return; // Nothing to do. + + for (BasicBlock &BB : make_range(FStart, FEnd)) { + for (Instruction &I : BB) { + // TODO: The null checks for the MDMap.lookup() results should no longer + // be necessary. + if (MDNode *M = I.getMetadata(LLVMContext::MD_alias_scope)) + if (MDNode *MNew = MDMap.lookup(M)) + I.setMetadata(LLVMContext::MD_alias_scope, MNew); + + if (MDNode *M = I.getMetadata(LLVMContext::MD_noalias)) + if (MDNode *MNew = MDMap.lookup(M)) + I.setMetadata(LLVMContext::MD_noalias, MNew); + + if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I)) + if (MDNode *MNew = MDMap.lookup(Decl->getScopeList())) + Decl->setScopeList(MNew); + } + } +} + +/// If the inlined function has noalias arguments, +/// then add new alias scopes for each noalias argument, tag the mapped noalias +/// parameters with noalias metadata specifying the new scope, and tag all +/// non-derived loads, stores and memory intrinsics with the new alias scopes. +static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap, + const DataLayout &DL, AAResults *CalleeAAR, + ClonedCodeInfo &InlinedFunctionInfo) { + if (!EnableNoAliasConversion) + return; + + const Function *CalledFunc = CB.getCalledFunction(); + SmallVector<const Argument *, 4> NoAliasArgs; + + for (const Argument &Arg : CalledFunc->args()) + if (CB.paramHasAttr(Arg.getArgNo(), Attribute::NoAlias) && !Arg.use_empty()) + NoAliasArgs.push_back(&Arg); + + if (NoAliasArgs.empty()) + return; + + // To do a good job, if a noalias variable is captured, we need to know if + // the capture point dominates the particular use we're considering. + DominatorTree DT; + DT.recalculate(const_cast<Function&>(*CalledFunc)); + + // noalias indicates that pointer values based on the argument do not alias + // pointer values which are not based on it. So we add a new "scope" for each + // noalias function argument. Accesses using pointers based on that argument + // become part of that alias scope, accesses using pointers not based on that + // argument are tagged as noalias with that scope. + + DenseMap<const Argument *, MDNode *> NewScopes; + MDBuilder MDB(CalledFunc->getContext()); + + // Create a new scope domain for this function. + MDNode *NewDomain = + MDB.createAnonymousAliasScopeDomain(CalledFunc->getName()); + for (unsigned i = 0, e = NoAliasArgs.size(); i != e; ++i) { + const Argument *A = NoAliasArgs[i]; + + std::string Name = std::string(CalledFunc->getName()); + if (A->hasName()) { + Name += ": %"; + Name += A->getName(); + } else { + Name += ": argument "; + Name += utostr(i); + } + + // Note: We always create a new anonymous root here. This is true regardless + // of the linkage of the callee because the aliasing "scope" is not just a + // property of the callee, but also all control dependencies in the caller. + MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name); + NewScopes.insert(std::make_pair(A, NewScope)); + + if (UseNoAliasIntrinsic) { + // Introduce a llvm.experimental.noalias.scope.decl for the noalias + // argument. + MDNode *AScopeList = MDNode::get(CalledFunc->getContext(), NewScope); + auto *NoAliasDecl = + IRBuilder<>(&CB).CreateNoAliasScopeDeclaration(AScopeList); + // Ignore the result for now. The result will be used when the + // llvm.noalias intrinsic is introduced. + (void)NoAliasDecl; + } + } + + // Iterate over all new instructions in the map; for all memory-access + // instructions, add the alias scope metadata. + for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end(); + VMI != VMIE; ++VMI) { + if (const Instruction *I = dyn_cast<Instruction>(VMI->first)) { + if (!VMI->second) + continue; + + Instruction *NI = dyn_cast<Instruction>(VMI->second); + if (!NI || InlinedFunctionInfo.isSimplified(I, NI)) + continue; + + bool IsArgMemOnlyCall = false, IsFuncCall = false; + SmallVector<const Value *, 2> PtrArgs; + + if (const LoadInst *LI = dyn_cast<LoadInst>(I)) + PtrArgs.push_back(LI->getPointerOperand()); + else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) + PtrArgs.push_back(SI->getPointerOperand()); + else if (const VAArgInst *VAAI = dyn_cast<VAArgInst>(I)) + PtrArgs.push_back(VAAI->getPointerOperand()); + else if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(I)) + PtrArgs.push_back(CXI->getPointerOperand()); + else if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I)) + PtrArgs.push_back(RMWI->getPointerOperand()); + else if (const auto *Call = dyn_cast<CallBase>(I)) { + // If we know that the call does not access memory, then we'll still + // know that about the inlined clone of this call site, and we don't + // need to add metadata. + if (Call->doesNotAccessMemory()) + continue; + + IsFuncCall = true; + if (CalleeAAR) { + MemoryEffects ME = CalleeAAR->getMemoryEffects(Call); + + // We'll retain this knowledge without additional metadata. + if (ME.onlyAccessesInaccessibleMem()) + continue; + + if (ME.onlyAccessesArgPointees()) + IsArgMemOnlyCall = true; + } + + for (Value *Arg : Call->args()) { + // Only care about pointer arguments. If a noalias argument is + // accessed through a non-pointer argument, it must be captured + // first (e.g. via ptrtoint), and we protect against captures below. + if (!Arg->getType()->isPointerTy()) + continue; + + PtrArgs.push_back(Arg); + } + } + + // If we found no pointers, then this instruction is not suitable for + // pairing with an instruction to receive aliasing metadata. + // However, if this is a call, this we might just alias with none of the + // noalias arguments. + if (PtrArgs.empty() && !IsFuncCall) + continue; + + // It is possible that there is only one underlying object, but you + // need to go through several PHIs to see it, and thus could be + // repeated in the Objects list. + SmallPtrSet<const Value *, 4> ObjSet; + SmallVector<Metadata *, 4> Scopes, NoAliases; + + SmallSetVector<const Argument *, 4> NAPtrArgs; + for (const Value *V : PtrArgs) { + SmallVector<const Value *, 4> Objects; + getUnderlyingObjects(V, Objects, /* LI = */ nullptr); + + for (const Value *O : Objects) + ObjSet.insert(O); + } + + // Figure out if we're derived from anything that is not a noalias + // argument. + bool RequiresNoCaptureBefore = false, UsesAliasingPtr = false, + UsesUnknownObject = false; + for (const Value *V : ObjSet) { + // Is this value a constant that cannot be derived from any pointer + // value (we need to exclude constant expressions, for example, that + // are formed from arithmetic on global symbols). + bool IsNonPtrConst = isa<ConstantInt>(V) || isa<ConstantFP>(V) || + isa<ConstantPointerNull>(V) || + isa<ConstantDataVector>(V) || isa<UndefValue>(V); + if (IsNonPtrConst) + continue; + + // If this is anything other than a noalias argument, then we cannot + // completely describe the aliasing properties using alias.scope + // metadata (and, thus, won't add any). + if (const Argument *A = dyn_cast<Argument>(V)) { + if (!CB.paramHasAttr(A->getArgNo(), Attribute::NoAlias)) + UsesAliasingPtr = true; + } else { + UsesAliasingPtr = true; + } + + if (isEscapeSource(V)) { + // An escape source can only alias with a noalias argument if it has + // been captured beforehand. + RequiresNoCaptureBefore = true; + } else if (!isa<Argument>(V) && !isIdentifiedObject(V)) { + // If this is neither an escape source, nor some identified object + // (which cannot directly alias a noalias argument), nor some other + // argument (which, by definition, also cannot alias a noalias + // argument), conservatively do not make any assumptions. + UsesUnknownObject = true; + } + } + + // Nothing we can do if the used underlying object cannot be reliably + // determined. + if (UsesUnknownObject) + continue; + + // A function call can always get captured noalias pointers (via other + // parameters, globals, etc.). + if (IsFuncCall && !IsArgMemOnlyCall) + RequiresNoCaptureBefore = true; + + // First, we want to figure out all of the sets with which we definitely + // don't alias. Iterate over all noalias set, and add those for which: + // 1. The noalias argument is not in the set of objects from which we + // definitely derive. + // 2. The noalias argument has not yet been captured. + // An arbitrary function that might load pointers could see captured + // noalias arguments via other noalias arguments or globals, and so we + // must always check for prior capture. + for (const Argument *A : NoAliasArgs) { + if (ObjSet.contains(A)) + continue; // May be based on a noalias argument. + + // It might be tempting to skip the PointerMayBeCapturedBefore check if + // A->hasNoCaptureAttr() is true, but this is incorrect because + // nocapture only guarantees that no copies outlive the function, not + // that the value cannot be locally captured. + if (!RequiresNoCaptureBefore || + !PointerMayBeCapturedBefore(A, /* ReturnCaptures */ false, + /* StoreCaptures */ false, I, &DT)) + NoAliases.push_back(NewScopes[A]); + } + + if (!NoAliases.empty()) + NI->setMetadata(LLVMContext::MD_noalias, + MDNode::concatenate( + NI->getMetadata(LLVMContext::MD_noalias), + MDNode::get(CalledFunc->getContext(), NoAliases))); + + // Next, we want to figure out all of the sets to which we might belong. + // We might belong to a set if the noalias argument is in the set of + // underlying objects. If there is some non-noalias argument in our list + // of underlying objects, then we cannot add a scope because the fact + // that some access does not alias with any set of our noalias arguments + // cannot itself guarantee that it does not alias with this access + // (because there is some pointer of unknown origin involved and the + // other access might also depend on this pointer). We also cannot add + // scopes to arbitrary functions unless we know they don't access any + // non-parameter pointer-values. + bool CanAddScopes = !UsesAliasingPtr; + if (CanAddScopes && IsFuncCall) + CanAddScopes = IsArgMemOnlyCall; + + if (CanAddScopes) + for (const Argument *A : NoAliasArgs) { + if (ObjSet.count(A)) + Scopes.push_back(NewScopes[A]); + } + + if (!Scopes.empty()) + NI->setMetadata( + LLVMContext::MD_alias_scope, + MDNode::concatenate(NI->getMetadata(LLVMContext::MD_alias_scope), + MDNode::get(CalledFunc->getContext(), Scopes))); + } + } +} + +static bool MayContainThrowingOrExitingCall(Instruction *Begin, + Instruction *End) { + + assert(Begin->getParent() == End->getParent() && + "Expected to be in same basic block!"); + return !llvm::isGuaranteedToTransferExecutionToSuccessor( + Begin->getIterator(), End->getIterator(), InlinerAttributeWindow + 1); +} + +static AttrBuilder IdentifyValidAttributes(CallBase &CB) { + + AttrBuilder AB(CB.getContext(), CB.getAttributes().getRetAttrs()); + if (!AB.hasAttributes()) + return AB; + AttrBuilder Valid(CB.getContext()); + // Only allow these white listed attributes to be propagated back to the + // callee. This is because other attributes may only be valid on the call + // itself, i.e. attributes such as signext and zeroext. + if (auto DerefBytes = AB.getDereferenceableBytes()) + Valid.addDereferenceableAttr(DerefBytes); + if (auto DerefOrNullBytes = AB.getDereferenceableOrNullBytes()) + Valid.addDereferenceableOrNullAttr(DerefOrNullBytes); + if (AB.contains(Attribute::NoAlias)) + Valid.addAttribute(Attribute::NoAlias); + if (AB.contains(Attribute::NonNull)) + Valid.addAttribute(Attribute::NonNull); + return Valid; +} + +static void AddReturnAttributes(CallBase &CB, ValueToValueMapTy &VMap) { + if (!UpdateReturnAttributes) + return; + + AttrBuilder Valid = IdentifyValidAttributes(CB); + if (!Valid.hasAttributes()) + return; + auto *CalledFunction = CB.getCalledFunction(); + auto &Context = CalledFunction->getContext(); + + for (auto &BB : *CalledFunction) { + auto *RI = dyn_cast<ReturnInst>(BB.getTerminator()); + if (!RI || !isa<CallBase>(RI->getOperand(0))) + continue; + auto *RetVal = cast<CallBase>(RI->getOperand(0)); + // Check that the cloned RetVal exists and is a call, otherwise we cannot + // add the attributes on the cloned RetVal. Simplification during inlining + // could have transformed the cloned instruction. + auto *NewRetVal = dyn_cast_or_null<CallBase>(VMap.lookup(RetVal)); + if (!NewRetVal) + continue; + // Backward propagation of attributes to the returned value may be incorrect + // if it is control flow dependent. + // Consider: + // @callee { + // %rv = call @foo() + // %rv2 = call @bar() + // if (%rv2 != null) + // return %rv2 + // if (%rv == null) + // exit() + // return %rv + // } + // caller() { + // %val = call nonnull @callee() + // } + // Here we cannot add the nonnull attribute on either foo or bar. So, we + // limit the check to both RetVal and RI are in the same basic block and + // there are no throwing/exiting instructions between these instructions. + if (RI->getParent() != RetVal->getParent() || + MayContainThrowingOrExitingCall(RetVal, RI)) + continue; + // Add to the existing attributes of NewRetVal, i.e. the cloned call + // instruction. + // NB! When we have the same attribute already existing on NewRetVal, but + // with a differing value, the AttributeList's merge API honours the already + // existing attribute value (i.e. attributes such as dereferenceable, + // dereferenceable_or_null etc). See AttrBuilder::merge for more details. + AttributeList AL = NewRetVal->getAttributes(); + AttributeList NewAL = AL.addRetAttributes(Context, Valid); + NewRetVal->setAttributes(NewAL); + } +} + +/// If the inlined function has non-byval align arguments, then +/// add @llvm.assume-based alignment assumptions to preserve this information. +static void AddAlignmentAssumptions(CallBase &CB, InlineFunctionInfo &IFI) { + if (!PreserveAlignmentAssumptions || !IFI.GetAssumptionCache) + return; + + AssumptionCache *AC = &IFI.GetAssumptionCache(*CB.getCaller()); + auto &DL = CB.getCaller()->getParent()->getDataLayout(); + + // To avoid inserting redundant assumptions, we should check for assumptions + // already in the caller. To do this, we might need a DT of the caller. + DominatorTree DT; + bool DTCalculated = false; + + Function *CalledFunc = CB.getCalledFunction(); + for (Argument &Arg : CalledFunc->args()) { + if (!Arg.getType()->isPointerTy() || Arg.hasPassPointeeByValueCopyAttr() || + Arg.hasNUses(0)) + continue; + MaybeAlign Alignment = Arg.getParamAlign(); + if (!Alignment) + continue; + + if (!DTCalculated) { + DT.recalculate(*CB.getCaller()); + DTCalculated = true; + } + // If we can already prove the asserted alignment in the context of the + // caller, then don't bother inserting the assumption. + Value *ArgVal = CB.getArgOperand(Arg.getArgNo()); + if (getKnownAlignment(ArgVal, DL, &CB, AC, &DT) >= *Alignment) + continue; + + CallInst *NewAsmp = IRBuilder<>(&CB).CreateAlignmentAssumption( + DL, ArgVal, Alignment->value()); + AC->registerAssumption(cast<AssumeInst>(NewAsmp)); + } +} + +/// Once we have cloned code over from a callee into the caller, +/// update the specified callgraph to reflect the changes we made. +/// Note that it's possible that not all code was copied over, so only +/// some edges of the callgraph may remain. +static void UpdateCallGraphAfterInlining(CallBase &CB, + Function::iterator FirstNewBlock, + ValueToValueMapTy &VMap, + InlineFunctionInfo &IFI) { + CallGraph &CG = *IFI.CG; + const Function *Caller = CB.getCaller(); + const Function *Callee = CB.getCalledFunction(); + CallGraphNode *CalleeNode = CG[Callee]; + CallGraphNode *CallerNode = CG[Caller]; + + // Since we inlined some uninlined call sites in the callee into the caller, + // add edges from the caller to all of the callees of the callee. + CallGraphNode::iterator I = CalleeNode->begin(), E = CalleeNode->end(); + + // Consider the case where CalleeNode == CallerNode. + CallGraphNode::CalledFunctionsVector CallCache; + if (CalleeNode == CallerNode) { + CallCache.assign(I, E); + I = CallCache.begin(); + E = CallCache.end(); + } + + for (; I != E; ++I) { + // Skip 'refererence' call records. + if (!I->first) + continue; + + const Value *OrigCall = *I->first; + + ValueToValueMapTy::iterator VMI = VMap.find(OrigCall); + // Only copy the edge if the call was inlined! + if (VMI == VMap.end() || VMI->second == nullptr) + continue; + + // If the call was inlined, but then constant folded, there is no edge to + // add. Check for this case. + auto *NewCall = dyn_cast<CallBase>(VMI->second); + if (!NewCall) + continue; + + // We do not treat intrinsic calls like real function calls because we + // expect them to become inline code; do not add an edge for an intrinsic. + if (NewCall->getCalledFunction() && + NewCall->getCalledFunction()->isIntrinsic()) + continue; + + // Remember that this call site got inlined for the client of + // InlineFunction. + IFI.InlinedCalls.push_back(NewCall); + + // It's possible that inlining the callsite will cause it to go from an + // indirect to a direct call by resolving a function pointer. If this + // happens, set the callee of the new call site to a more precise + // destination. This can also happen if the call graph node of the caller + // was just unnecessarily imprecise. + if (!I->second->getFunction()) + if (Function *F = NewCall->getCalledFunction()) { + // Indirect call site resolved to direct call. + CallerNode->addCalledFunction(NewCall, CG[F]); + + continue; + } + + CallerNode->addCalledFunction(NewCall, I->second); + } + + // Update the call graph by deleting the edge from Callee to Caller. We must + // do this after the loop above in case Caller and Callee are the same. + CallerNode->removeCallEdgeFor(*cast<CallBase>(&CB)); +} + +static void HandleByValArgumentInit(Type *ByValType, Value *Dst, Value *Src, + Module *M, BasicBlock *InsertBlock, + InlineFunctionInfo &IFI) { + IRBuilder<> Builder(InsertBlock, InsertBlock->begin()); + + Value *Size = + Builder.getInt64(M->getDataLayout().getTypeStoreSize(ByValType)); + + // Always generate a memcpy of alignment 1 here because we don't know + // the alignment of the src pointer. Other optimizations can infer + // better alignment. + Builder.CreateMemCpy(Dst, /*DstAlign*/ Align(1), Src, + /*SrcAlign*/ Align(1), Size); +} + +/// When inlining a call site that has a byval argument, +/// we have to make the implicit memcpy explicit by adding it. +static Value *HandleByValArgument(Type *ByValType, Value *Arg, + Instruction *TheCall, + const Function *CalledFunc, + InlineFunctionInfo &IFI, + MaybeAlign ByValAlignment) { + assert(cast<PointerType>(Arg->getType()) + ->isOpaqueOrPointeeTypeMatches(ByValType)); + Function *Caller = TheCall->getFunction(); + const DataLayout &DL = Caller->getParent()->getDataLayout(); + + // If the called function is readonly, then it could not mutate the caller's + // copy of the byval'd memory. In this case, it is safe to elide the copy and + // temporary. + if (CalledFunc->onlyReadsMemory()) { + // If the byval argument has a specified alignment that is greater than the + // passed in pointer, then we either have to round up the input pointer or + // give up on this transformation. + if (ByValAlignment.valueOrOne() == 1) + return Arg; + + AssumptionCache *AC = + IFI.GetAssumptionCache ? &IFI.GetAssumptionCache(*Caller) : nullptr; + + // If the pointer is already known to be sufficiently aligned, or if we can + // round it up to a larger alignment, then we don't need a temporary. + if (getOrEnforceKnownAlignment(Arg, *ByValAlignment, DL, TheCall, AC) >= + *ByValAlignment) + return Arg; + + // Otherwise, we have to make a memcpy to get a safe alignment. This is bad + // for code quality, but rarely happens and is required for correctness. + } + + // Create the alloca. If we have DataLayout, use nice alignment. + Align Alignment = DL.getPrefTypeAlign(ByValType); + + // If the byval had an alignment specified, we *must* use at least that + // alignment, as it is required by the byval argument (and uses of the + // pointer inside the callee). + if (ByValAlignment) + Alignment = std::max(Alignment, *ByValAlignment); + + Value *NewAlloca = + new AllocaInst(ByValType, DL.getAllocaAddrSpace(), nullptr, Alignment, + Arg->getName(), &*Caller->begin()->begin()); + IFI.StaticAllocas.push_back(cast<AllocaInst>(NewAlloca)); + + // Uses of the argument in the function should use our new alloca + // instead. + return NewAlloca; +} + +// Check whether this Value is used by a lifetime intrinsic. +static bool isUsedByLifetimeMarker(Value *V) { + for (User *U : V->users()) + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) + if (II->isLifetimeStartOrEnd()) + return true; + return false; +} + +// Check whether the given alloca already has +// lifetime.start or lifetime.end intrinsics. +static bool hasLifetimeMarkers(AllocaInst *AI) { + Type *Ty = AI->getType(); + Type *Int8PtrTy = Type::getInt8PtrTy(Ty->getContext(), + Ty->getPointerAddressSpace()); + if (Ty == Int8PtrTy) + return isUsedByLifetimeMarker(AI); + + // Do a scan to find all the casts to i8*. + for (User *U : AI->users()) { + if (U->getType() != Int8PtrTy) continue; + if (U->stripPointerCasts() != AI) continue; + if (isUsedByLifetimeMarker(U)) + return true; + } + return false; +} + +/// Return the result of AI->isStaticAlloca() if AI were moved to the entry +/// block. Allocas used in inalloca calls and allocas of dynamic array size +/// cannot be static. +static bool allocaWouldBeStaticInEntry(const AllocaInst *AI ) { + return isa<Constant>(AI->getArraySize()) && !AI->isUsedWithInAlloca(); +} + +/// Returns a DebugLoc for a new DILocation which is a clone of \p OrigDL +/// inlined at \p InlinedAt. \p IANodes is an inlined-at cache. +static DebugLoc inlineDebugLoc(DebugLoc OrigDL, DILocation *InlinedAt, + LLVMContext &Ctx, + DenseMap<const MDNode *, MDNode *> &IANodes) { + auto IA = DebugLoc::appendInlinedAt(OrigDL, InlinedAt, Ctx, IANodes); + return DILocation::get(Ctx, OrigDL.getLine(), OrigDL.getCol(), + OrigDL.getScope(), IA); +} + +/// Update inlined instructions' line numbers to +/// to encode location where these instructions are inlined. +static void fixupLineNumbers(Function *Fn, Function::iterator FI, + Instruction *TheCall, bool CalleeHasDebugInfo) { + const DebugLoc &TheCallDL = TheCall->getDebugLoc(); + if (!TheCallDL) + return; + + auto &Ctx = Fn->getContext(); + DILocation *InlinedAtNode = TheCallDL; + + // Create a unique call site, not to be confused with any other call from the + // same location. + InlinedAtNode = DILocation::getDistinct( + Ctx, InlinedAtNode->getLine(), InlinedAtNode->getColumn(), + InlinedAtNode->getScope(), InlinedAtNode->getInlinedAt()); + + // Cache the inlined-at nodes as they're built so they are reused, without + // this every instruction's inlined-at chain would become distinct from each + // other. + DenseMap<const MDNode *, MDNode *> IANodes; + + // Check if we are not generating inline line tables and want to use + // the call site location instead. + bool NoInlineLineTables = Fn->hasFnAttribute("no-inline-line-tables"); + + for (; FI != Fn->end(); ++FI) { + for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); + BI != BE; ++BI) { + // Loop metadata needs to be updated so that the start and end locs + // reference inlined-at locations. + auto updateLoopInfoLoc = [&Ctx, &InlinedAtNode, + &IANodes](Metadata *MD) -> Metadata * { + if (auto *Loc = dyn_cast_or_null<DILocation>(MD)) + return inlineDebugLoc(Loc, InlinedAtNode, Ctx, IANodes).get(); + return MD; + }; + updateLoopMetadataDebugLocations(*BI, updateLoopInfoLoc); + + if (!NoInlineLineTables) + if (DebugLoc DL = BI->getDebugLoc()) { + DebugLoc IDL = + inlineDebugLoc(DL, InlinedAtNode, BI->getContext(), IANodes); + BI->setDebugLoc(IDL); + continue; + } + + if (CalleeHasDebugInfo && !NoInlineLineTables) + continue; + + // If the inlined instruction has no line number, or if inline info + // is not being generated, make it look as if it originates from the call + // location. This is important for ((__always_inline, __nodebug__)) + // functions which must use caller location for all instructions in their + // function body. + + // Don't update static allocas, as they may get moved later. + if (auto *AI = dyn_cast<AllocaInst>(BI)) + if (allocaWouldBeStaticInEntry(AI)) + continue; + + BI->setDebugLoc(TheCallDL); + } + + // Remove debug info intrinsics if we're not keeping inline info. + if (NoInlineLineTables) { + BasicBlock::iterator BI = FI->begin(); + while (BI != FI->end()) { + if (isa<DbgInfoIntrinsic>(BI)) { + BI = BI->eraseFromParent(); + continue; + } + ++BI; + } + } + + } +} + +#undef DEBUG_TYPE +#define DEBUG_TYPE "assignment-tracking" +/// Find Alloca and linked DbgAssignIntrinsic for locals escaped by \p CB. +static at::StorageToVarsMap collectEscapedLocals(const DataLayout &DL, + const CallBase &CB) { + at::StorageToVarsMap EscapedLocals; + SmallPtrSet<const Value *, 4> SeenBases; + + LLVM_DEBUG( + errs() << "# Finding caller local variables escaped by callee\n"); + for (const Value *Arg : CB.args()) { + LLVM_DEBUG(errs() << "INSPECT: " << *Arg << "\n"); + if (!Arg->getType()->isPointerTy()) { + LLVM_DEBUG(errs() << " | SKIP: Not a pointer\n"); + continue; + } + + const Instruction *I = dyn_cast<Instruction>(Arg); + if (!I) { + LLVM_DEBUG(errs() << " | SKIP: Not result of instruction\n"); + continue; + } + + // Walk back to the base storage. + assert(Arg->getType()->isPtrOrPtrVectorTy()); + APInt TmpOffset(DL.getIndexTypeSizeInBits(Arg->getType()), 0, false); + const AllocaInst *Base = dyn_cast<AllocaInst>( + Arg->stripAndAccumulateConstantOffsets(DL, TmpOffset, true)); + if (!Base) { + LLVM_DEBUG(errs() << " | SKIP: Couldn't walk back to base storage\n"); + continue; + } + + assert(Base); + LLVM_DEBUG(errs() << " | BASE: " << *Base << "\n"); + // We only need to process each base address once - skip any duplicates. + if (!SeenBases.insert(Base).second) + continue; + + // Find all local variables associated with the backing storage. + for (auto *DAI : at::getAssignmentMarkers(Base)) { + // Skip variables from inlined functions - they are not local variables. + if (DAI->getDebugLoc().getInlinedAt()) + continue; + LLVM_DEBUG(errs() << " > DEF : " << *DAI << "\n"); + EscapedLocals[Base].insert(at::VarRecord(DAI)); + } + } + return EscapedLocals; +} + +static void trackInlinedStores(Function::iterator Start, Function::iterator End, + const CallBase &CB) { + LLVM_DEBUG(errs() << "trackInlinedStores into " + << Start->getParent()->getName() << " from " + << CB.getCalledFunction()->getName() << "\n"); + std::unique_ptr<DataLayout> DL = std::make_unique<DataLayout>(CB.getModule()); + at::trackAssignments(Start, End, collectEscapedLocals(*DL, CB), *DL); +} + +/// Update inlined instructions' DIAssignID metadata. We need to do this +/// otherwise a function inlined more than once into the same function +/// will cause DIAssignID to be shared by many instructions. +static void fixupAssignments(Function::iterator Start, Function::iterator End) { + // Map {Old, New} metadata. Not used directly - use GetNewID. + DenseMap<DIAssignID *, DIAssignID *> Map; + auto GetNewID = [&Map](Metadata *Old) { + DIAssignID *OldID = cast<DIAssignID>(Old); + if (DIAssignID *NewID = Map.lookup(OldID)) + return NewID; + DIAssignID *NewID = DIAssignID::getDistinct(OldID->getContext()); + Map[OldID] = NewID; + return NewID; + }; + // Loop over all the inlined instructions. If we find a DIAssignID + // attachment or use, replace it with a new version. + for (auto BBI = Start; BBI != End; ++BBI) { + for (Instruction &I : *BBI) { + if (auto *ID = I.getMetadata(LLVMContext::MD_DIAssignID)) + I.setMetadata(LLVMContext::MD_DIAssignID, GetNewID(ID)); + else if (auto *DAI = dyn_cast<DbgAssignIntrinsic>(&I)) + DAI->setAssignId(GetNewID(DAI->getAssignID())); + } + } +} +#undef DEBUG_TYPE +#define DEBUG_TYPE "inline-function" + +/// Update the block frequencies of the caller after a callee has been inlined. +/// +/// Each block cloned into the caller has its block frequency scaled by the +/// ratio of CallSiteFreq/CalleeEntryFreq. This ensures that the cloned copy of +/// callee's entry block gets the same frequency as the callsite block and the +/// relative frequencies of all cloned blocks remain the same after cloning. +static void updateCallerBFI(BasicBlock *CallSiteBlock, + const ValueToValueMapTy &VMap, + BlockFrequencyInfo *CallerBFI, + BlockFrequencyInfo *CalleeBFI, + const BasicBlock &CalleeEntryBlock) { + SmallPtrSet<BasicBlock *, 16> ClonedBBs; + for (auto Entry : VMap) { + if (!isa<BasicBlock>(Entry.first) || !Entry.second) + continue; + auto *OrigBB = cast<BasicBlock>(Entry.first); + auto *ClonedBB = cast<BasicBlock>(Entry.second); + uint64_t Freq = CalleeBFI->getBlockFreq(OrigBB).getFrequency(); + if (!ClonedBBs.insert(ClonedBB).second) { + // Multiple blocks in the callee might get mapped to one cloned block in + // the caller since we prune the callee as we clone it. When that happens, + // we want to use the maximum among the original blocks' frequencies. + uint64_t NewFreq = CallerBFI->getBlockFreq(ClonedBB).getFrequency(); + if (NewFreq > Freq) + Freq = NewFreq; + } + CallerBFI->setBlockFreq(ClonedBB, Freq); + } + BasicBlock *EntryClone = cast<BasicBlock>(VMap.lookup(&CalleeEntryBlock)); + CallerBFI->setBlockFreqAndScale( + EntryClone, CallerBFI->getBlockFreq(CallSiteBlock).getFrequency(), + ClonedBBs); +} + +/// Update the branch metadata for cloned call instructions. +static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap, + const ProfileCount &CalleeEntryCount, + const CallBase &TheCall, ProfileSummaryInfo *PSI, + BlockFrequencyInfo *CallerBFI) { + if (CalleeEntryCount.isSynthetic() || CalleeEntryCount.getCount() < 1) + return; + auto CallSiteCount = + PSI ? PSI->getProfileCount(TheCall, CallerBFI) : std::nullopt; + int64_t CallCount = + std::min(CallSiteCount.value_or(0), CalleeEntryCount.getCount()); + updateProfileCallee(Callee, -CallCount, &VMap); +} + +void llvm::updateProfileCallee( + Function *Callee, int64_t EntryDelta, + const ValueMap<const Value *, WeakTrackingVH> *VMap) { + auto CalleeCount = Callee->getEntryCount(); + if (!CalleeCount) + return; + + const uint64_t PriorEntryCount = CalleeCount->getCount(); + + // Since CallSiteCount is an estimate, it could exceed the original callee + // count and has to be set to 0 so guard against underflow. + const uint64_t NewEntryCount = + (EntryDelta < 0 && static_cast<uint64_t>(-EntryDelta) > PriorEntryCount) + ? 0 + : PriorEntryCount + EntryDelta; + + // During inlining ? + if (VMap) { + uint64_t CloneEntryCount = PriorEntryCount - NewEntryCount; + for (auto Entry : *VMap) + if (isa<CallInst>(Entry.first)) + if (auto *CI = dyn_cast_or_null<CallInst>(Entry.second)) + CI->updateProfWeight(CloneEntryCount, PriorEntryCount); + } + + if (EntryDelta) { + Callee->setEntryCount(NewEntryCount); + + for (BasicBlock &BB : *Callee) + // No need to update the callsite if it is pruned during inlining. + if (!VMap || VMap->count(&BB)) + for (Instruction &I : BB) + if (CallInst *CI = dyn_cast<CallInst>(&I)) + CI->updateProfWeight(NewEntryCount, PriorEntryCount); + } +} + +/// An operand bundle "clang.arc.attachedcall" on a call indicates the call +/// result is implicitly consumed by a call to retainRV or claimRV immediately +/// after the call. This function inlines the retainRV/claimRV calls. +/// +/// There are three cases to consider: +/// +/// 1. If there is a call to autoreleaseRV that takes a pointer to the returned +/// object in the callee return block, the autoreleaseRV call and the +/// retainRV/claimRV call in the caller cancel out. If the call in the caller +/// is a claimRV call, a call to objc_release is emitted. +/// +/// 2. If there is a call in the callee return block that doesn't have operand +/// bundle "clang.arc.attachedcall", the operand bundle on the original call +/// is transferred to the call in the callee. +/// +/// 3. Otherwise, a call to objc_retain is inserted if the call in the caller is +/// a retainRV call. +static void +inlineRetainOrClaimRVCalls(CallBase &CB, objcarc::ARCInstKind RVCallKind, + const SmallVectorImpl<ReturnInst *> &Returns) { + Module *Mod = CB.getModule(); + assert(objcarc::isRetainOrClaimRV(RVCallKind) && "unexpected ARC function"); + bool IsRetainRV = RVCallKind == objcarc::ARCInstKind::RetainRV, + IsUnsafeClaimRV = !IsRetainRV; + + for (auto *RI : Returns) { + Value *RetOpnd = objcarc::GetRCIdentityRoot(RI->getOperand(0)); + bool InsertRetainCall = IsRetainRV; + IRBuilder<> Builder(RI->getContext()); + + // Walk backwards through the basic block looking for either a matching + // autoreleaseRV call or an unannotated call. + auto InstRange = llvm::make_range(++(RI->getIterator().getReverse()), + RI->getParent()->rend()); + for (Instruction &I : llvm::make_early_inc_range(InstRange)) { + // Ignore casts. + if (isa<CastInst>(I)) + continue; + + if (auto *II = dyn_cast<IntrinsicInst>(&I)) { + if (II->getIntrinsicID() != Intrinsic::objc_autoreleaseReturnValue || + !II->hasNUses(0) || + objcarc::GetRCIdentityRoot(II->getOperand(0)) != RetOpnd) + break; + + // If we've found a matching authoreleaseRV call: + // - If claimRV is attached to the call, insert a call to objc_release + // and erase the autoreleaseRV call. + // - If retainRV is attached to the call, just erase the autoreleaseRV + // call. + if (IsUnsafeClaimRV) { + Builder.SetInsertPoint(II); + Function *IFn = + Intrinsic::getDeclaration(Mod, Intrinsic::objc_release); + Value *BC = Builder.CreateBitCast(RetOpnd, IFn->getArg(0)->getType()); + Builder.CreateCall(IFn, BC, ""); + } + II->eraseFromParent(); + InsertRetainCall = false; + break; + } + + auto *CI = dyn_cast<CallInst>(&I); + + if (!CI) + break; + + if (objcarc::GetRCIdentityRoot(CI) != RetOpnd || + objcarc::hasAttachedCallOpBundle(CI)) + break; + + // If we've found an unannotated call that defines RetOpnd, add a + // "clang.arc.attachedcall" operand bundle. + Value *BundleArgs[] = {*objcarc::getAttachedARCFunction(&CB)}; + OperandBundleDef OB("clang.arc.attachedcall", BundleArgs); + auto *NewCall = CallBase::addOperandBundle( + CI, LLVMContext::OB_clang_arc_attachedcall, OB, CI); + NewCall->copyMetadata(*CI); + CI->replaceAllUsesWith(NewCall); + CI->eraseFromParent(); + InsertRetainCall = false; + break; + } + + if (InsertRetainCall) { + // The retainRV is attached to the call and we've failed to find a + // matching autoreleaseRV or an annotated call in the callee. Emit a call + // to objc_retain. + Builder.SetInsertPoint(RI); + Function *IFn = Intrinsic::getDeclaration(Mod, Intrinsic::objc_retain); + Value *BC = Builder.CreateBitCast(RetOpnd, IFn->getArg(0)->getType()); + Builder.CreateCall(IFn, BC, ""); + } + } +} + +/// This function inlines the called function into the basic block of the +/// caller. This returns false if it is not possible to inline this call. +/// The program is still in a well defined state if this occurs though. +/// +/// Note that this only does one level of inlining. For example, if the +/// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now +/// exists in the instruction stream. Similarly this will inline a recursive +/// function by one level. +llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, + bool MergeAttributes, + AAResults *CalleeAAR, + bool InsertLifetime, + Function *ForwardVarArgsTo) { + assert(CB.getParent() && CB.getFunction() && "Instruction not in function!"); + + // FIXME: we don't inline callbr yet. + if (isa<CallBrInst>(CB)) + return InlineResult::failure("We don't inline callbr yet."); + + // If IFI has any state in it, zap it before we fill it in. + IFI.reset(); + + Function *CalledFunc = CB.getCalledFunction(); + if (!CalledFunc || // Can't inline external function or indirect + CalledFunc->isDeclaration()) // call! + return InlineResult::failure("external or indirect"); + + // The inliner does not know how to inline through calls with operand bundles + // in general ... + if (CB.hasOperandBundles()) { + for (int i = 0, e = CB.getNumOperandBundles(); i != e; ++i) { + uint32_t Tag = CB.getOperandBundleAt(i).getTagID(); + // ... but it knows how to inline through "deopt" operand bundles ... + if (Tag == LLVMContext::OB_deopt) + continue; + // ... and "funclet" operand bundles. + if (Tag == LLVMContext::OB_funclet) + continue; + if (Tag == LLVMContext::OB_clang_arc_attachedcall) + continue; + if (Tag == LLVMContext::OB_kcfi) + continue; + + return InlineResult::failure("unsupported operand bundle"); + } + } + + // If the call to the callee cannot throw, set the 'nounwind' flag on any + // calls that we inline. + bool MarkNoUnwind = CB.doesNotThrow(); + + BasicBlock *OrigBB = CB.getParent(); + Function *Caller = OrigBB->getParent(); + + // Do not inline strictfp function into non-strictfp one. It would require + // conversion of all FP operations in host function to constrained intrinsics. + if (CalledFunc->getAttributes().hasFnAttr(Attribute::StrictFP) && + !Caller->getAttributes().hasFnAttr(Attribute::StrictFP)) { + return InlineResult::failure("incompatible strictfp attributes"); + } + + // GC poses two hazards to inlining, which only occur when the callee has GC: + // 1. If the caller has no GC, then the callee's GC must be propagated to the + // caller. + // 2. If the caller has a differing GC, it is invalid to inline. + if (CalledFunc->hasGC()) { + if (!Caller->hasGC()) + Caller->setGC(CalledFunc->getGC()); + else if (CalledFunc->getGC() != Caller->getGC()) + return InlineResult::failure("incompatible GC"); + } + + // Get the personality function from the callee if it contains a landing pad. + Constant *CalledPersonality = + CalledFunc->hasPersonalityFn() + ? CalledFunc->getPersonalityFn()->stripPointerCasts() + : nullptr; + + // Find the personality function used by the landing pads of the caller. If it + // exists, then check to see that it matches the personality function used in + // the callee. + Constant *CallerPersonality = + Caller->hasPersonalityFn() + ? Caller->getPersonalityFn()->stripPointerCasts() + : nullptr; + if (CalledPersonality) { + if (!CallerPersonality) + Caller->setPersonalityFn(CalledPersonality); + // If the personality functions match, then we can perform the + // inlining. Otherwise, we can't inline. + // TODO: This isn't 100% true. Some personality functions are proper + // supersets of others and can be used in place of the other. + else if (CalledPersonality != CallerPersonality) + return InlineResult::failure("incompatible personality"); + } + + // We need to figure out which funclet the callsite was in so that we may + // properly nest the callee. + Instruction *CallSiteEHPad = nullptr; + if (CallerPersonality) { + EHPersonality Personality = classifyEHPersonality(CallerPersonality); + if (isScopedEHPersonality(Personality)) { + std::optional<OperandBundleUse> ParentFunclet = + CB.getOperandBundle(LLVMContext::OB_funclet); + if (ParentFunclet) + CallSiteEHPad = cast<FuncletPadInst>(ParentFunclet->Inputs.front()); + + // OK, the inlining site is legal. What about the target function? + + if (CallSiteEHPad) { + if (Personality == EHPersonality::MSVC_CXX) { + // The MSVC personality cannot tolerate catches getting inlined into + // cleanup funclets. + if (isa<CleanupPadInst>(CallSiteEHPad)) { + // Ok, the call site is within a cleanuppad. Let's check the callee + // for catchpads. + for (const BasicBlock &CalledBB : *CalledFunc) { + if (isa<CatchSwitchInst>(CalledBB.getFirstNonPHI())) + return InlineResult::failure("catch in cleanup funclet"); + } + } + } else if (isAsynchronousEHPersonality(Personality)) { + // SEH is even less tolerant, there may not be any sort of exceptional + // funclet in the callee. + for (const BasicBlock &CalledBB : *CalledFunc) { + if (CalledBB.isEHPad()) + return InlineResult::failure("SEH in cleanup funclet"); + } + } + } + } + } + + // Determine if we are dealing with a call in an EHPad which does not unwind + // to caller. + bool EHPadForCallUnwindsLocally = false; + if (CallSiteEHPad && isa<CallInst>(CB)) { + UnwindDestMemoTy FuncletUnwindMap; + Value *CallSiteUnwindDestToken = + getUnwindDestToken(CallSiteEHPad, FuncletUnwindMap); + + EHPadForCallUnwindsLocally = + CallSiteUnwindDestToken && + !isa<ConstantTokenNone>(CallSiteUnwindDestToken); + } + + // Get an iterator to the last basic block in the function, which will have + // the new function inlined after it. + Function::iterator LastBlock = --Caller->end(); + + // Make sure to capture all of the return instructions from the cloned + // function. + SmallVector<ReturnInst*, 8> Returns; + ClonedCodeInfo InlinedFunctionInfo; + Function::iterator FirstNewBlock; + + { // Scope to destroy VMap after cloning. + ValueToValueMapTy VMap; + struct ByValInit { + Value *Dst; + Value *Src; + Type *Ty; + }; + // Keep a list of pair (dst, src) to emit byval initializations. + SmallVector<ByValInit, 4> ByValInits; + + // When inlining a function that contains noalias scope metadata, + // this metadata needs to be cloned so that the inlined blocks + // have different "unique scopes" at every call site. + // Track the metadata that must be cloned. Do this before other changes to + // the function, so that we do not get in trouble when inlining caller == + // callee. + ScopedAliasMetadataDeepCloner SAMetadataCloner(CB.getCalledFunction()); + + auto &DL = Caller->getParent()->getDataLayout(); + + // Calculate the vector of arguments to pass into the function cloner, which + // matches up the formal to the actual argument values. + auto AI = CB.arg_begin(); + unsigned ArgNo = 0; + for (Function::arg_iterator I = CalledFunc->arg_begin(), + E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) { + Value *ActualArg = *AI; + + // When byval arguments actually inlined, we need to make the copy implied + // by them explicit. However, we don't do this if the callee is readonly + // or readnone, because the copy would be unneeded: the callee doesn't + // modify the struct. + if (CB.isByValArgument(ArgNo)) { + ActualArg = HandleByValArgument(CB.getParamByValType(ArgNo), ActualArg, + &CB, CalledFunc, IFI, + CalledFunc->getParamAlign(ArgNo)); + if (ActualArg != *AI) + ByValInits.push_back( + {ActualArg, (Value *)*AI, CB.getParamByValType(ArgNo)}); + } + + VMap[&*I] = ActualArg; + } + + // TODO: Remove this when users have been updated to the assume bundles. + // Add alignment assumptions if necessary. We do this before the inlined + // instructions are actually cloned into the caller so that we can easily + // check what will be known at the start of the inlined code. + AddAlignmentAssumptions(CB, IFI); + + AssumptionCache *AC = + IFI.GetAssumptionCache ? &IFI.GetAssumptionCache(*Caller) : nullptr; + + /// Preserve all attributes on of the call and its parameters. + salvageKnowledge(&CB, AC); + + // We want the inliner to prune the code as it copies. We would LOVE to + // have no dead or constant instructions leftover after inlining occurs + // (which can happen, e.g., because an argument was constant), but we'll be + // happy with whatever the cloner can do. + CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, + /*ModuleLevelChanges=*/false, Returns, ".i", + &InlinedFunctionInfo); + // Remember the first block that is newly cloned over. + FirstNewBlock = LastBlock; ++FirstNewBlock; + + // Insert retainRV/clainRV runtime calls. + objcarc::ARCInstKind RVCallKind = objcarc::getAttachedARCFunctionKind(&CB); + if (RVCallKind != objcarc::ARCInstKind::None) + inlineRetainOrClaimRVCalls(CB, RVCallKind, Returns); + + // Updated caller/callee profiles only when requested. For sample loader + // inlining, the context-sensitive inlinee profile doesn't need to be + // subtracted from callee profile, and the inlined clone also doesn't need + // to be scaled based on call site count. + if (IFI.UpdateProfile) { + if (IFI.CallerBFI != nullptr && IFI.CalleeBFI != nullptr) + // Update the BFI of blocks cloned into the caller. + updateCallerBFI(OrigBB, VMap, IFI.CallerBFI, IFI.CalleeBFI, + CalledFunc->front()); + + if (auto Profile = CalledFunc->getEntryCount()) + updateCallProfile(CalledFunc, VMap, *Profile, CB, IFI.PSI, + IFI.CallerBFI); + } + + // Inject byval arguments initialization. + for (ByValInit &Init : ByValInits) + HandleByValArgumentInit(Init.Ty, Init.Dst, Init.Src, Caller->getParent(), + &*FirstNewBlock, IFI); + + std::optional<OperandBundleUse> ParentDeopt = + CB.getOperandBundle(LLVMContext::OB_deopt); + if (ParentDeopt) { + SmallVector<OperandBundleDef, 2> OpDefs; + + for (auto &VH : InlinedFunctionInfo.OperandBundleCallSites) { + CallBase *ICS = dyn_cast_or_null<CallBase>(VH); + if (!ICS) + continue; // instruction was DCE'd or RAUW'ed to undef + + OpDefs.clear(); + + OpDefs.reserve(ICS->getNumOperandBundles()); + + for (unsigned COBi = 0, COBe = ICS->getNumOperandBundles(); COBi < COBe; + ++COBi) { + auto ChildOB = ICS->getOperandBundleAt(COBi); + if (ChildOB.getTagID() != LLVMContext::OB_deopt) { + // If the inlined call has other operand bundles, let them be + OpDefs.emplace_back(ChildOB); + continue; + } + + // It may be useful to separate this logic (of handling operand + // bundles) out to a separate "policy" component if this gets crowded. + // Prepend the parent's deoptimization continuation to the newly + // inlined call's deoptimization continuation. + std::vector<Value *> MergedDeoptArgs; + MergedDeoptArgs.reserve(ParentDeopt->Inputs.size() + + ChildOB.Inputs.size()); + + llvm::append_range(MergedDeoptArgs, ParentDeopt->Inputs); + llvm::append_range(MergedDeoptArgs, ChildOB.Inputs); + + OpDefs.emplace_back("deopt", std::move(MergedDeoptArgs)); + } + + Instruction *NewI = CallBase::Create(ICS, OpDefs, ICS); + + // Note: the RAUW does the appropriate fixup in VMap, so we need to do + // this even if the call returns void. + ICS->replaceAllUsesWith(NewI); + + VH = nullptr; + ICS->eraseFromParent(); + } + } + + // Update the callgraph if requested. + if (IFI.CG) + UpdateCallGraphAfterInlining(CB, FirstNewBlock, VMap, IFI); + + // For 'nodebug' functions, the associated DISubprogram is always null. + // Conservatively avoid propagating the callsite debug location to + // instructions inlined from a function whose DISubprogram is not null. + fixupLineNumbers(Caller, FirstNewBlock, &CB, + CalledFunc->getSubprogram() != nullptr); + + if (isAssignmentTrackingEnabled(*Caller->getParent())) { + // Interpret inlined stores to caller-local variables as assignments. + trackInlinedStores(FirstNewBlock, Caller->end(), CB); + + // Update DIAssignID metadata attachments and uses so that they are + // unique to this inlined instance. + fixupAssignments(FirstNewBlock, Caller->end()); + } + + // Now clone the inlined noalias scope metadata. + SAMetadataCloner.clone(); + SAMetadataCloner.remap(FirstNewBlock, Caller->end()); + + // Add noalias metadata if necessary. + AddAliasScopeMetadata(CB, VMap, DL, CalleeAAR, InlinedFunctionInfo); + + // Clone return attributes on the callsite into the calls within the inlined + // function which feed into its return value. + AddReturnAttributes(CB, VMap); + + propagateMemProfMetadata(CalledFunc, CB, + InlinedFunctionInfo.ContainsMemProfMetadata, VMap); + + // Propagate metadata on the callsite if necessary. + PropagateCallSiteMetadata(CB, FirstNewBlock, Caller->end()); + + // Register any cloned assumptions. + if (IFI.GetAssumptionCache) + for (BasicBlock &NewBlock : + make_range(FirstNewBlock->getIterator(), Caller->end())) + for (Instruction &I : NewBlock) + if (auto *II = dyn_cast<CondGuardInst>(&I)) + IFI.GetAssumptionCache(*Caller).registerAssumption(II); + } + + // If there are any alloca instructions in the block that used to be the entry + // block for the callee, move them to the entry block of the caller. First + // calculate which instruction they should be inserted before. We insert the + // instructions at the end of the current alloca list. + { + BasicBlock::iterator InsertPoint = Caller->begin()->begin(); + for (BasicBlock::iterator I = FirstNewBlock->begin(), + E = FirstNewBlock->end(); I != E; ) { + AllocaInst *AI = dyn_cast<AllocaInst>(I++); + if (!AI) continue; + + // If the alloca is now dead, remove it. This often occurs due to code + // specialization. + if (AI->use_empty()) { + AI->eraseFromParent(); + continue; + } + + if (!allocaWouldBeStaticInEntry(AI)) + continue; + + // Keep track of the static allocas that we inline into the caller. + IFI.StaticAllocas.push_back(AI); + + // Scan for the block of allocas that we can move over, and move them + // all at once. + while (isa<AllocaInst>(I) && + !cast<AllocaInst>(I)->use_empty() && + allocaWouldBeStaticInEntry(cast<AllocaInst>(I))) { + IFI.StaticAllocas.push_back(cast<AllocaInst>(I)); + ++I; + } + + // Transfer all of the allocas over in a block. Using splice means + // that the instructions aren't removed from the symbol table, then + // reinserted. + Caller->getEntryBlock().splice(InsertPoint, &*FirstNewBlock, + AI->getIterator(), I); + } + } + + SmallVector<Value*,4> VarArgsToForward; + SmallVector<AttributeSet, 4> VarArgsAttrs; + for (unsigned i = CalledFunc->getFunctionType()->getNumParams(); + i < CB.arg_size(); i++) { + VarArgsToForward.push_back(CB.getArgOperand(i)); + VarArgsAttrs.push_back(CB.getAttributes().getParamAttrs(i)); + } + + bool InlinedMustTailCalls = false, InlinedDeoptimizeCalls = false; + if (InlinedFunctionInfo.ContainsCalls) { + CallInst::TailCallKind CallSiteTailKind = CallInst::TCK_None; + if (CallInst *CI = dyn_cast<CallInst>(&CB)) + CallSiteTailKind = CI->getTailCallKind(); + + // For inlining purposes, the "notail" marker is the same as no marker. + if (CallSiteTailKind == CallInst::TCK_NoTail) + CallSiteTailKind = CallInst::TCK_None; + + for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; + ++BB) { + for (Instruction &I : llvm::make_early_inc_range(*BB)) { + CallInst *CI = dyn_cast<CallInst>(&I); + if (!CI) + continue; + + // Forward varargs from inlined call site to calls to the + // ForwardVarArgsTo function, if requested, and to musttail calls. + if (!VarArgsToForward.empty() && + ((ForwardVarArgsTo && + CI->getCalledFunction() == ForwardVarArgsTo) || + CI->isMustTailCall())) { + // Collect attributes for non-vararg parameters. + AttributeList Attrs = CI->getAttributes(); + SmallVector<AttributeSet, 8> ArgAttrs; + if (!Attrs.isEmpty() || !VarArgsAttrs.empty()) { + for (unsigned ArgNo = 0; + ArgNo < CI->getFunctionType()->getNumParams(); ++ArgNo) + ArgAttrs.push_back(Attrs.getParamAttrs(ArgNo)); + } + + // Add VarArg attributes. + ArgAttrs.append(VarArgsAttrs.begin(), VarArgsAttrs.end()); + Attrs = AttributeList::get(CI->getContext(), Attrs.getFnAttrs(), + Attrs.getRetAttrs(), ArgAttrs); + // Add VarArgs to existing parameters. + SmallVector<Value *, 6> Params(CI->args()); + Params.append(VarArgsToForward.begin(), VarArgsToForward.end()); + CallInst *NewCI = CallInst::Create( + CI->getFunctionType(), CI->getCalledOperand(), Params, "", CI); + NewCI->setDebugLoc(CI->getDebugLoc()); + NewCI->setAttributes(Attrs); + NewCI->setCallingConv(CI->getCallingConv()); + CI->replaceAllUsesWith(NewCI); + CI->eraseFromParent(); + CI = NewCI; + } + + if (Function *F = CI->getCalledFunction()) + InlinedDeoptimizeCalls |= + F->getIntrinsicID() == Intrinsic::experimental_deoptimize; + + // We need to reduce the strength of any inlined tail calls. For + // musttail, we have to avoid introducing potential unbounded stack + // growth. For example, if functions 'f' and 'g' are mutually recursive + // with musttail, we can inline 'g' into 'f' so long as we preserve + // musttail on the cloned call to 'f'. If either the inlined call site + // or the cloned call site is *not* musttail, the program already has + // one frame of stack growth, so it's safe to remove musttail. Here is + // a table of example transformations: + // + // f -> musttail g -> musttail f ==> f -> musttail f + // f -> musttail g -> tail f ==> f -> tail f + // f -> g -> musttail f ==> f -> f + // f -> g -> tail f ==> f -> f + // + // Inlined notail calls should remain notail calls. + CallInst::TailCallKind ChildTCK = CI->getTailCallKind(); + if (ChildTCK != CallInst::TCK_NoTail) + ChildTCK = std::min(CallSiteTailKind, ChildTCK); + CI->setTailCallKind(ChildTCK); + InlinedMustTailCalls |= CI->isMustTailCall(); + + // Call sites inlined through a 'nounwind' call site should be + // 'nounwind' as well. However, avoid marking call sites explicitly + // where possible. This helps expose more opportunities for CSE after + // inlining, commonly when the callee is an intrinsic. + if (MarkNoUnwind && !CI->doesNotThrow()) + CI->setDoesNotThrow(); + } + } + } + + // Leave lifetime markers for the static alloca's, scoping them to the + // function we just inlined. + // We need to insert lifetime intrinsics even at O0 to avoid invalid + // access caused by multithreaded coroutines. The check + // `Caller->isPresplitCoroutine()` would affect AlwaysInliner at O0 only. + if ((InsertLifetime || Caller->isPresplitCoroutine()) && + !IFI.StaticAllocas.empty()) { + IRBuilder<> builder(&FirstNewBlock->front()); + for (unsigned ai = 0, ae = IFI.StaticAllocas.size(); ai != ae; ++ai) { + AllocaInst *AI = IFI.StaticAllocas[ai]; + // Don't mark swifterror allocas. They can't have bitcast uses. + if (AI->isSwiftError()) + continue; + + // If the alloca is already scoped to something smaller than the whole + // function then there's no need to add redundant, less accurate markers. + if (hasLifetimeMarkers(AI)) + continue; + + // Try to determine the size of the allocation. + ConstantInt *AllocaSize = nullptr; + if (ConstantInt *AIArraySize = + dyn_cast<ConstantInt>(AI->getArraySize())) { + auto &DL = Caller->getParent()->getDataLayout(); + Type *AllocaType = AI->getAllocatedType(); + TypeSize AllocaTypeSize = DL.getTypeAllocSize(AllocaType); + uint64_t AllocaArraySize = AIArraySize->getLimitedValue(); + + // Don't add markers for zero-sized allocas. + if (AllocaArraySize == 0) + continue; + + // Check that array size doesn't saturate uint64_t and doesn't + // overflow when it's multiplied by type size. + if (!AllocaTypeSize.isScalable() && + AllocaArraySize != std::numeric_limits<uint64_t>::max() && + std::numeric_limits<uint64_t>::max() / AllocaArraySize >= + AllocaTypeSize.getFixedValue()) { + AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()), + AllocaArraySize * AllocaTypeSize); + } + } + + builder.CreateLifetimeStart(AI, AllocaSize); + for (ReturnInst *RI : Returns) { + // Don't insert llvm.lifetime.end calls between a musttail or deoptimize + // call and a return. The return kills all local allocas. + if (InlinedMustTailCalls && + RI->getParent()->getTerminatingMustTailCall()) + continue; + if (InlinedDeoptimizeCalls && + RI->getParent()->getTerminatingDeoptimizeCall()) + continue; + IRBuilder<>(RI).CreateLifetimeEnd(AI, AllocaSize); + } + } + } + + // If the inlined code contained dynamic alloca instructions, wrap the inlined + // code with llvm.stacksave/llvm.stackrestore intrinsics. + if (InlinedFunctionInfo.ContainsDynamicAllocas) { + Module *M = Caller->getParent(); + // Get the two intrinsics we care about. + Function *StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave); + Function *StackRestore=Intrinsic::getDeclaration(M,Intrinsic::stackrestore); + + // Insert the llvm.stacksave. + CallInst *SavedPtr = IRBuilder<>(&*FirstNewBlock, FirstNewBlock->begin()) + .CreateCall(StackSave, {}, "savedstack"); + + // Insert a call to llvm.stackrestore before any return instructions in the + // inlined function. + for (ReturnInst *RI : Returns) { + // Don't insert llvm.stackrestore calls between a musttail or deoptimize + // call and a return. The return will restore the stack pointer. + if (InlinedMustTailCalls && RI->getParent()->getTerminatingMustTailCall()) + continue; + if (InlinedDeoptimizeCalls && RI->getParent()->getTerminatingDeoptimizeCall()) + continue; + IRBuilder<>(RI).CreateCall(StackRestore, SavedPtr); + } + } + + // If we are inlining for an invoke instruction, we must make sure to rewrite + // any call instructions into invoke instructions. This is sensitive to which + // funclet pads were top-level in the inlinee, so must be done before + // rewriting the "parent pad" links. + if (auto *II = dyn_cast<InvokeInst>(&CB)) { + BasicBlock *UnwindDest = II->getUnwindDest(); + Instruction *FirstNonPHI = UnwindDest->getFirstNonPHI(); + if (isa<LandingPadInst>(FirstNonPHI)) { + HandleInlinedLandingPad(II, &*FirstNewBlock, InlinedFunctionInfo); + } else { + HandleInlinedEHPad(II, &*FirstNewBlock, InlinedFunctionInfo); + } + } + + // Update the lexical scopes of the new funclets and callsites. + // Anything that had 'none' as its parent is now nested inside the callsite's + // EHPad. + if (CallSiteEHPad) { + for (Function::iterator BB = FirstNewBlock->getIterator(), + E = Caller->end(); + BB != E; ++BB) { + // Add bundle operands to inlined call sites. + PropagateOperandBundles(BB, CallSiteEHPad); + + // It is problematic if the inlinee has a cleanupret which unwinds to + // caller and we inline it into a call site which doesn't unwind but into + // an EH pad that does. Such an edge must be dynamically unreachable. + // As such, we replace the cleanupret with unreachable. + if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(BB->getTerminator())) + if (CleanupRet->unwindsToCaller() && EHPadForCallUnwindsLocally) + changeToUnreachable(CleanupRet); + + Instruction *I = BB->getFirstNonPHI(); + if (!I->isEHPad()) + continue; + + if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(I)) { + if (isa<ConstantTokenNone>(CatchSwitch->getParentPad())) + CatchSwitch->setParentPad(CallSiteEHPad); + } else { + auto *FPI = cast<FuncletPadInst>(I); + if (isa<ConstantTokenNone>(FPI->getParentPad())) + FPI->setParentPad(CallSiteEHPad); + } + } + } + + if (InlinedDeoptimizeCalls) { + // We need to at least remove the deoptimizing returns from the Return set, + // so that the control flow from those returns does not get merged into the + // caller (but terminate it instead). If the caller's return type does not + // match the callee's return type, we also need to change the return type of + // the intrinsic. + if (Caller->getReturnType() == CB.getType()) { + llvm::erase_if(Returns, [](ReturnInst *RI) { + return RI->getParent()->getTerminatingDeoptimizeCall() != nullptr; + }); + } else { + SmallVector<ReturnInst *, 8> NormalReturns; + Function *NewDeoptIntrinsic = Intrinsic::getDeclaration( + Caller->getParent(), Intrinsic::experimental_deoptimize, + {Caller->getReturnType()}); + + for (ReturnInst *RI : Returns) { + CallInst *DeoptCall = RI->getParent()->getTerminatingDeoptimizeCall(); + if (!DeoptCall) { + NormalReturns.push_back(RI); + continue; + } + + // The calling convention on the deoptimize call itself may be bogus, + // since the code we're inlining may have undefined behavior (and may + // never actually execute at runtime); but all + // @llvm.experimental.deoptimize declarations have to have the same + // calling convention in a well-formed module. + auto CallingConv = DeoptCall->getCalledFunction()->getCallingConv(); + NewDeoptIntrinsic->setCallingConv(CallingConv); + auto *CurBB = RI->getParent(); + RI->eraseFromParent(); + + SmallVector<Value *, 4> CallArgs(DeoptCall->args()); + + SmallVector<OperandBundleDef, 1> OpBundles; + DeoptCall->getOperandBundlesAsDefs(OpBundles); + auto DeoptAttributes = DeoptCall->getAttributes(); + DeoptCall->eraseFromParent(); + assert(!OpBundles.empty() && + "Expected at least the deopt operand bundle"); + + IRBuilder<> Builder(CurBB); + CallInst *NewDeoptCall = + Builder.CreateCall(NewDeoptIntrinsic, CallArgs, OpBundles); + NewDeoptCall->setCallingConv(CallingConv); + NewDeoptCall->setAttributes(DeoptAttributes); + if (NewDeoptCall->getType()->isVoidTy()) + Builder.CreateRetVoid(); + else + Builder.CreateRet(NewDeoptCall); + } + + // Leave behind the normal returns so we can merge control flow. + std::swap(Returns, NormalReturns); + } + } + + // Handle any inlined musttail call sites. In order for a new call site to be + // musttail, the source of the clone and the inlined call site must have been + // musttail. Therefore it's safe to return without merging control into the + // phi below. + if (InlinedMustTailCalls) { + // Check if we need to bitcast the result of any musttail calls. + Type *NewRetTy = Caller->getReturnType(); + bool NeedBitCast = !CB.use_empty() && CB.getType() != NewRetTy; + + // Handle the returns preceded by musttail calls separately. + SmallVector<ReturnInst *, 8> NormalReturns; + for (ReturnInst *RI : Returns) { + CallInst *ReturnedMustTail = + RI->getParent()->getTerminatingMustTailCall(); + if (!ReturnedMustTail) { + NormalReturns.push_back(RI); + continue; + } + if (!NeedBitCast) + continue; + + // Delete the old return and any preceding bitcast. + BasicBlock *CurBB = RI->getParent(); + auto *OldCast = dyn_cast_or_null<BitCastInst>(RI->getReturnValue()); + RI->eraseFromParent(); + if (OldCast) + OldCast->eraseFromParent(); + + // Insert a new bitcast and return with the right type. + IRBuilder<> Builder(CurBB); + Builder.CreateRet(Builder.CreateBitCast(ReturnedMustTail, NewRetTy)); + } + + // Leave behind the normal returns so we can merge control flow. + std::swap(Returns, NormalReturns); + } + + // Now that all of the transforms on the inlined code have taken place but + // before we splice the inlined code into the CFG and lose track of which + // blocks were actually inlined, collect the call sites. We only do this if + // call graph updates weren't requested, as those provide value handle based + // tracking of inlined call sites instead. Calls to intrinsics are not + // collected because they are not inlineable. + if (InlinedFunctionInfo.ContainsCalls && !IFI.CG) { + // Otherwise just collect the raw call sites that were inlined. + for (BasicBlock &NewBB : + make_range(FirstNewBlock->getIterator(), Caller->end())) + for (Instruction &I : NewBB) + if (auto *CB = dyn_cast<CallBase>(&I)) + if (!(CB->getCalledFunction() && + CB->getCalledFunction()->isIntrinsic())) + IFI.InlinedCallSites.push_back(CB); + } + + // If we cloned in _exactly one_ basic block, and if that block ends in a + // return instruction, we splice the body of the inlined callee directly into + // the calling basic block. + if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) { + // Move all of the instructions right before the call. + OrigBB->splice(CB.getIterator(), &*FirstNewBlock, FirstNewBlock->begin(), + FirstNewBlock->end()); + // Remove the cloned basic block. + Caller->back().eraseFromParent(); + + // If the call site was an invoke instruction, add a branch to the normal + // destination. + if (InvokeInst *II = dyn_cast<InvokeInst>(&CB)) { + BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), &CB); + NewBr->setDebugLoc(Returns[0]->getDebugLoc()); + } + + // If the return instruction returned a value, replace uses of the call with + // uses of the returned value. + if (!CB.use_empty()) { + ReturnInst *R = Returns[0]; + if (&CB == R->getReturnValue()) + CB.replaceAllUsesWith(UndefValue::get(CB.getType())); + else + CB.replaceAllUsesWith(R->getReturnValue()); + } + // Since we are now done with the Call/Invoke, we can delete it. + CB.eraseFromParent(); + + // Since we are now done with the return instruction, delete it also. + Returns[0]->eraseFromParent(); + + if (MergeAttributes) + AttributeFuncs::mergeAttributesForInlining(*Caller, *CalledFunc); + + // We are now done with the inlining. + return InlineResult::success(); + } + + // Otherwise, we have the normal case, of more than one block to inline or + // multiple return sites. + + // We want to clone the entire callee function into the hole between the + // "starter" and "ender" blocks. How we accomplish this depends on whether + // this is an invoke instruction or a call instruction. + BasicBlock *AfterCallBB; + BranchInst *CreatedBranchToNormalDest = nullptr; + if (InvokeInst *II = dyn_cast<InvokeInst>(&CB)) { + + // Add an unconditional branch to make this look like the CallInst case... + CreatedBranchToNormalDest = BranchInst::Create(II->getNormalDest(), &CB); + + // Split the basic block. This guarantees that no PHI nodes will have to be + // updated due to new incoming edges, and make the invoke case more + // symmetric to the call case. + AfterCallBB = + OrigBB->splitBasicBlock(CreatedBranchToNormalDest->getIterator(), + CalledFunc->getName() + ".exit"); + + } else { // It's a call + // If this is a call instruction, we need to split the basic block that + // the call lives in. + // + AfterCallBB = OrigBB->splitBasicBlock(CB.getIterator(), + CalledFunc->getName() + ".exit"); + } + + if (IFI.CallerBFI) { + // Copy original BB's block frequency to AfterCallBB + IFI.CallerBFI->setBlockFreq( + AfterCallBB, IFI.CallerBFI->getBlockFreq(OrigBB).getFrequency()); + } + + // Change the branch that used to go to AfterCallBB to branch to the first + // basic block of the inlined function. + // + Instruction *Br = OrigBB->getTerminator(); + assert(Br && Br->getOpcode() == Instruction::Br && + "splitBasicBlock broken!"); + Br->setOperand(0, &*FirstNewBlock); + + // Now that the function is correct, make it a little bit nicer. In + // particular, move the basic blocks inserted from the end of the function + // into the space made by splitting the source basic block. + Caller->splice(AfterCallBB->getIterator(), Caller, FirstNewBlock, + Caller->end()); + + // Handle all of the return instructions that we just cloned in, and eliminate + // any users of the original call/invoke instruction. + Type *RTy = CalledFunc->getReturnType(); + + PHINode *PHI = nullptr; + if (Returns.size() > 1) { + // The PHI node should go at the front of the new basic block to merge all + // possible incoming values. + if (!CB.use_empty()) { + PHI = PHINode::Create(RTy, Returns.size(), CB.getName(), + &AfterCallBB->front()); + // Anything that used the result of the function call should now use the + // PHI node as their operand. + CB.replaceAllUsesWith(PHI); + } + + // Loop over all of the return instructions adding entries to the PHI node + // as appropriate. + if (PHI) { + for (unsigned i = 0, e = Returns.size(); i != e; ++i) { + ReturnInst *RI = Returns[i]; + assert(RI->getReturnValue()->getType() == PHI->getType() && + "Ret value not consistent in function!"); + PHI->addIncoming(RI->getReturnValue(), RI->getParent()); + } + } + + // Add a branch to the merge points and remove return instructions. + DebugLoc Loc; + for (unsigned i = 0, e = Returns.size(); i != e; ++i) { + ReturnInst *RI = Returns[i]; + BranchInst* BI = BranchInst::Create(AfterCallBB, RI); + Loc = RI->getDebugLoc(); + BI->setDebugLoc(Loc); + RI->eraseFromParent(); + } + // We need to set the debug location to *somewhere* inside the + // inlined function. The line number may be nonsensical, but the + // instruction will at least be associated with the right + // function. + if (CreatedBranchToNormalDest) + CreatedBranchToNormalDest->setDebugLoc(Loc); + } else if (!Returns.empty()) { + // Otherwise, if there is exactly one return value, just replace anything + // using the return value of the call with the computed value. + if (!CB.use_empty()) { + if (&CB == Returns[0]->getReturnValue()) + CB.replaceAllUsesWith(UndefValue::get(CB.getType())); + else + CB.replaceAllUsesWith(Returns[0]->getReturnValue()); + } + + // Update PHI nodes that use the ReturnBB to use the AfterCallBB. + BasicBlock *ReturnBB = Returns[0]->getParent(); + ReturnBB->replaceAllUsesWith(AfterCallBB); + + // Splice the code from the return block into the block that it will return + // to, which contains the code that was after the call. + AfterCallBB->splice(AfterCallBB->begin(), ReturnBB); + + if (CreatedBranchToNormalDest) + CreatedBranchToNormalDest->setDebugLoc(Returns[0]->getDebugLoc()); + + // Delete the return instruction now and empty ReturnBB now. + Returns[0]->eraseFromParent(); + ReturnBB->eraseFromParent(); + } else if (!CB.use_empty()) { + // No returns, but something is using the return value of the call. Just + // nuke the result. + CB.replaceAllUsesWith(PoisonValue::get(CB.getType())); + } + + // Since we are now done with the Call/Invoke, we can delete it. + CB.eraseFromParent(); + + // If we inlined any musttail calls and the original return is now + // unreachable, delete it. It can only contain a bitcast and ret. + if (InlinedMustTailCalls && pred_empty(AfterCallBB)) + AfterCallBB->eraseFromParent(); + + // We should always be able to fold the entry block of the function into the + // single predecessor of the block... + assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!"); + BasicBlock *CalleeEntry = cast<BranchInst>(Br)->getSuccessor(0); + + // Splice the code entry block into calling block, right before the + // unconditional branch. + CalleeEntry->replaceAllUsesWith(OrigBB); // Update PHI nodes + OrigBB->splice(Br->getIterator(), CalleeEntry); + + // Remove the unconditional branch. + Br->eraseFromParent(); + + // Now we can remove the CalleeEntry block, which is now empty. + CalleeEntry->eraseFromParent(); + + // If we inserted a phi node, check to see if it has a single value (e.g. all + // the entries are the same or undef). If so, remove the PHI so it doesn't + // block other optimizations. + if (PHI) { + AssumptionCache *AC = + IFI.GetAssumptionCache ? &IFI.GetAssumptionCache(*Caller) : nullptr; + auto &DL = Caller->getParent()->getDataLayout(); + if (Value *V = simplifyInstruction(PHI, {DL, nullptr, nullptr, AC})) { + PHI->replaceAllUsesWith(V); + PHI->eraseFromParent(); + } + } + + if (MergeAttributes) + AttributeFuncs::mergeAttributesForInlining(*Caller, *CalledFunc); + + return InlineResult::success(); +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/InstructionNamer.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/InstructionNamer.cpp new file mode 100644 index 0000000000..f3499c9c8a --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/InstructionNamer.cpp @@ -0,0 +1,78 @@ +//===- InstructionNamer.cpp - Give anonymous instructions names -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This is a little utility pass that gives instructions names, this is mostly +// useful when diffing the effect of an optimization because deleting an +// unnamed instruction can change all other instruction numbering, making the +// diff very noisy. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/InstructionNamer.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/Type.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/Utils.h" + +using namespace llvm; + +namespace { +void nameInstructions(Function &F) { + for (auto &Arg : F.args()) { + if (!Arg.hasName()) + Arg.setName("arg"); + } + + for (BasicBlock &BB : F) { + if (!BB.hasName()) + BB.setName("bb"); + + for (Instruction &I : BB) { + if (!I.hasName() && !I.getType()->isVoidTy()) + I.setName("i"); + } + } +} + +struct InstNamer : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + InstNamer() : FunctionPass(ID) { + initializeInstNamerPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &Info) const override { + Info.setPreservesAll(); + } + + bool runOnFunction(Function &F) override { + nameInstructions(F); + return true; + } +}; + + char InstNamer::ID = 0; + } // namespace + +INITIALIZE_PASS(InstNamer, "instnamer", + "Assign names to anonymous instructions", false, false) +char &llvm::InstructionNamerID = InstNamer::ID; +//===----------------------------------------------------------------------===// +// +// InstructionNamer - Give any unnamed non-void instructions "tmp" names. +// +FunctionPass *llvm::createInstructionNamerPass() { + return new InstNamer(); +} + +PreservedAnalyses InstructionNamerPass::run(Function &F, + FunctionAnalysisManager &FAM) { + nameInstructions(F); + return PreservedAnalyses::all(); +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/IntegerDivision.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/IntegerDivision.cpp new file mode 100644 index 0000000000..cea095408b --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/IntegerDivision.cpp @@ -0,0 +1,639 @@ +//===-- IntegerDivision.cpp - Expand integer division ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains an implementation of 32bit and 64bit scalar integer +// division for targets that don't have native support. It's largely derived +// from compiler-rt's implementations of __udivsi3 and __udivmoddi4, +// but hand-tuned for targets that prefer less control flow. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/IntegerDivision.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" + +using namespace llvm; + +#define DEBUG_TYPE "integer-division" + +/// Generate code to compute the remainder of two signed integers. Returns the +/// remainder, which will have the sign of the dividend. Builder's insert point +/// should be pointing where the caller wants code generated, e.g. at the srem +/// instruction. This will generate a urem in the process, and Builder's insert +/// point will be pointing at the uren (if present, i.e. not folded), ready to +/// be expanded if the user wishes +static Value *generateSignedRemainderCode(Value *Dividend, Value *Divisor, + IRBuilder<> &Builder) { + unsigned BitWidth = Dividend->getType()->getIntegerBitWidth(); + ConstantInt *Shift = Builder.getIntN(BitWidth, BitWidth - 1); + + // Following instructions are generated for both i32 (shift 31) and + // i64 (shift 63). + + // ; %dividend_sgn = ashr i32 %dividend, 31 + // ; %divisor_sgn = ashr i32 %divisor, 31 + // ; %dvd_xor = xor i32 %dividend, %dividend_sgn + // ; %dvs_xor = xor i32 %divisor, %divisor_sgn + // ; %u_dividend = sub i32 %dvd_xor, %dividend_sgn + // ; %u_divisor = sub i32 %dvs_xor, %divisor_sgn + // ; %urem = urem i32 %dividend, %divisor + // ; %xored = xor i32 %urem, %dividend_sgn + // ; %srem = sub i32 %xored, %dividend_sgn + Dividend = Builder.CreateFreeze(Dividend); + Divisor = Builder.CreateFreeze(Divisor); + Value *DividendSign = Builder.CreateAShr(Dividend, Shift); + Value *DivisorSign = Builder.CreateAShr(Divisor, Shift); + Value *DvdXor = Builder.CreateXor(Dividend, DividendSign); + Value *DvsXor = Builder.CreateXor(Divisor, DivisorSign); + Value *UDividend = Builder.CreateSub(DvdXor, DividendSign); + Value *UDivisor = Builder.CreateSub(DvsXor, DivisorSign); + Value *URem = Builder.CreateURem(UDividend, UDivisor); + Value *Xored = Builder.CreateXor(URem, DividendSign); + Value *SRem = Builder.CreateSub(Xored, DividendSign); + + if (Instruction *URemInst = dyn_cast<Instruction>(URem)) + Builder.SetInsertPoint(URemInst); + + return SRem; +} + + +/// Generate code to compute the remainder of two unsigned integers. Returns the +/// remainder. Builder's insert point should be pointing where the caller wants +/// code generated, e.g. at the urem instruction. This will generate a udiv in +/// the process, and Builder's insert point will be pointing at the udiv (if +/// present, i.e. not folded), ready to be expanded if the user wishes +static Value *generatedUnsignedRemainderCode(Value *Dividend, Value *Divisor, + IRBuilder<> &Builder) { + // Remainder = Dividend - Quotient*Divisor + + // Following instructions are generated for both i32 and i64 + + // ; %quotient = udiv i32 %dividend, %divisor + // ; %product = mul i32 %divisor, %quotient + // ; %remainder = sub i32 %dividend, %product + Dividend = Builder.CreateFreeze(Dividend); + Divisor = Builder.CreateFreeze(Divisor); + Value *Quotient = Builder.CreateUDiv(Dividend, Divisor); + Value *Product = Builder.CreateMul(Divisor, Quotient); + Value *Remainder = Builder.CreateSub(Dividend, Product); + + if (Instruction *UDiv = dyn_cast<Instruction>(Quotient)) + Builder.SetInsertPoint(UDiv); + + return Remainder; +} + +/// Generate code to divide two signed integers. Returns the quotient, rounded +/// towards 0. Builder's insert point should be pointing where the caller wants +/// code generated, e.g. at the sdiv instruction. This will generate a udiv in +/// the process, and Builder's insert point will be pointing at the udiv (if +/// present, i.e. not folded), ready to be expanded if the user wishes. +static Value *generateSignedDivisionCode(Value *Dividend, Value *Divisor, + IRBuilder<> &Builder) { + // Implementation taken from compiler-rt's __divsi3 and __divdi3 + + unsigned BitWidth = Dividend->getType()->getIntegerBitWidth(); + ConstantInt *Shift = Builder.getIntN(BitWidth, BitWidth - 1); + + // Following instructions are generated for both i32 (shift 31) and + // i64 (shift 63). + + // ; %tmp = ashr i32 %dividend, 31 + // ; %tmp1 = ashr i32 %divisor, 31 + // ; %tmp2 = xor i32 %tmp, %dividend + // ; %u_dvnd = sub nsw i32 %tmp2, %tmp + // ; %tmp3 = xor i32 %tmp1, %divisor + // ; %u_dvsr = sub nsw i32 %tmp3, %tmp1 + // ; %q_sgn = xor i32 %tmp1, %tmp + // ; %q_mag = udiv i32 %u_dvnd, %u_dvsr + // ; %tmp4 = xor i32 %q_mag, %q_sgn + // ; %q = sub i32 %tmp4, %q_sgn + Dividend = Builder.CreateFreeze(Dividend); + Divisor = Builder.CreateFreeze(Divisor); + Value *Tmp = Builder.CreateAShr(Dividend, Shift); + Value *Tmp1 = Builder.CreateAShr(Divisor, Shift); + Value *Tmp2 = Builder.CreateXor(Tmp, Dividend); + Value *U_Dvnd = Builder.CreateSub(Tmp2, Tmp); + Value *Tmp3 = Builder.CreateXor(Tmp1, Divisor); + Value *U_Dvsr = Builder.CreateSub(Tmp3, Tmp1); + Value *Q_Sgn = Builder.CreateXor(Tmp1, Tmp); + Value *Q_Mag = Builder.CreateUDiv(U_Dvnd, U_Dvsr); + Value *Tmp4 = Builder.CreateXor(Q_Mag, Q_Sgn); + Value *Q = Builder.CreateSub(Tmp4, Q_Sgn); + + if (Instruction *UDiv = dyn_cast<Instruction>(Q_Mag)) + Builder.SetInsertPoint(UDiv); + + return Q; +} + +/// Generates code to divide two unsigned scalar 32-bit or 64-bit integers. +/// Returns the quotient, rounded towards 0. Builder's insert point should +/// point where the caller wants code generated, e.g. at the udiv instruction. +static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor, + IRBuilder<> &Builder) { + // The basic algorithm can be found in the compiler-rt project's + // implementation of __udivsi3.c. Here, we do a lower-level IR based approach + // that's been hand-tuned to lessen the amount of control flow involved. + + // Some helper values + IntegerType *DivTy = cast<IntegerType>(Dividend->getType()); + unsigned BitWidth = DivTy->getBitWidth(); + + ConstantInt *Zero = ConstantInt::get(DivTy, 0); + ConstantInt *One = ConstantInt::get(DivTy, 1); + ConstantInt *NegOne = ConstantInt::getSigned(DivTy, -1); + ConstantInt *MSB = ConstantInt::get(DivTy, BitWidth - 1); + + ConstantInt *True = Builder.getTrue(); + + BasicBlock *IBB = Builder.GetInsertBlock(); + Function *F = IBB->getParent(); + Function *CTLZ = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz, + DivTy); + + // Our CFG is going to look like: + // +---------------------+ + // | special-cases | + // | ... | + // +---------------------+ + // | | + // | +----------+ + // | | bb1 | + // | | ... | + // | +----------+ + // | | | + // | | +------------+ + // | | | preheader | + // | | | ... | + // | | +------------+ + // | | | + // | | | +---+ + // | | | | | + // | | +------------+ | + // | | | do-while | | + // | | | ... | | + // | | +------------+ | + // | | | | | + // | +-----------+ +---+ + // | | loop-exit | + // | | ... | + // | +-----------+ + // | | + // +-------+ + // | ... | + // | end | + // +-------+ + BasicBlock *SpecialCases = Builder.GetInsertBlock(); + SpecialCases->setName(Twine(SpecialCases->getName(), "_udiv-special-cases")); + BasicBlock *End = SpecialCases->splitBasicBlock(Builder.GetInsertPoint(), + "udiv-end"); + BasicBlock *LoopExit = BasicBlock::Create(Builder.getContext(), + "udiv-loop-exit", F, End); + BasicBlock *DoWhile = BasicBlock::Create(Builder.getContext(), + "udiv-do-while", F, End); + BasicBlock *Preheader = BasicBlock::Create(Builder.getContext(), + "udiv-preheader", F, End); + BasicBlock *BB1 = BasicBlock::Create(Builder.getContext(), + "udiv-bb1", F, End); + + // We'll be overwriting the terminator to insert our extra blocks + SpecialCases->getTerminator()->eraseFromParent(); + + // Same instructions are generated for both i32 (msb 31) and i64 (msb 63). + + // First off, check for special cases: dividend or divisor is zero, divisor + // is greater than dividend, and divisor is 1. + // ; special-cases: + // ; %ret0_1 = icmp eq i32 %divisor, 0 + // ; %ret0_2 = icmp eq i32 %dividend, 0 + // ; %ret0_3 = or i1 %ret0_1, %ret0_2 + // ; %tmp0 = tail call i32 @llvm.ctlz.i32(i32 %divisor, i1 true) + // ; %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %dividend, i1 true) + // ; %sr = sub nsw i32 %tmp0, %tmp1 + // ; %ret0_4 = icmp ugt i32 %sr, 31 + // ; %ret0 = select i1 %ret0_3, i1 true, i1 %ret0_4 + // ; %retDividend = icmp eq i32 %sr, 31 + // ; %retVal = select i1 %ret0, i32 0, i32 %dividend + // ; %earlyRet = select i1 %ret0, i1 true, %retDividend + // ; br i1 %earlyRet, label %end, label %bb1 + Builder.SetInsertPoint(SpecialCases); + Divisor = Builder.CreateFreeze(Divisor); + Dividend = Builder.CreateFreeze(Dividend); + Value *Ret0_1 = Builder.CreateICmpEQ(Divisor, Zero); + Value *Ret0_2 = Builder.CreateICmpEQ(Dividend, Zero); + Value *Ret0_3 = Builder.CreateOr(Ret0_1, Ret0_2); + Value *Tmp0 = Builder.CreateCall(CTLZ, {Divisor, True}); + Value *Tmp1 = Builder.CreateCall(CTLZ, {Dividend, True}); + Value *SR = Builder.CreateSub(Tmp0, Tmp1); + Value *Ret0_4 = Builder.CreateICmpUGT(SR, MSB); + Value *Ret0 = Builder.CreateLogicalOr(Ret0_3, Ret0_4); + Value *RetDividend = Builder.CreateICmpEQ(SR, MSB); + Value *RetVal = Builder.CreateSelect(Ret0, Zero, Dividend); + Value *EarlyRet = Builder.CreateLogicalOr(Ret0, RetDividend); + Builder.CreateCondBr(EarlyRet, End, BB1); + + // ; bb1: ; preds = %special-cases + // ; %sr_1 = add i32 %sr, 1 + // ; %tmp2 = sub i32 31, %sr + // ; %q = shl i32 %dividend, %tmp2 + // ; %skipLoop = icmp eq i32 %sr_1, 0 + // ; br i1 %skipLoop, label %loop-exit, label %preheader + Builder.SetInsertPoint(BB1); + Value *SR_1 = Builder.CreateAdd(SR, One); + Value *Tmp2 = Builder.CreateSub(MSB, SR); + Value *Q = Builder.CreateShl(Dividend, Tmp2); + Value *SkipLoop = Builder.CreateICmpEQ(SR_1, Zero); + Builder.CreateCondBr(SkipLoop, LoopExit, Preheader); + + // ; preheader: ; preds = %bb1 + // ; %tmp3 = lshr i32 %dividend, %sr_1 + // ; %tmp4 = add i32 %divisor, -1 + // ; br label %do-while + Builder.SetInsertPoint(Preheader); + Value *Tmp3 = Builder.CreateLShr(Dividend, SR_1); + Value *Tmp4 = Builder.CreateAdd(Divisor, NegOne); + Builder.CreateBr(DoWhile); + + // ; do-while: ; preds = %do-while, %preheader + // ; %carry_1 = phi i32 [ 0, %preheader ], [ %carry, %do-while ] + // ; %sr_3 = phi i32 [ %sr_1, %preheader ], [ %sr_2, %do-while ] + // ; %r_1 = phi i32 [ %tmp3, %preheader ], [ %r, %do-while ] + // ; %q_2 = phi i32 [ %q, %preheader ], [ %q_1, %do-while ] + // ; %tmp5 = shl i32 %r_1, 1 + // ; %tmp6 = lshr i32 %q_2, 31 + // ; %tmp7 = or i32 %tmp5, %tmp6 + // ; %tmp8 = shl i32 %q_2, 1 + // ; %q_1 = or i32 %carry_1, %tmp8 + // ; %tmp9 = sub i32 %tmp4, %tmp7 + // ; %tmp10 = ashr i32 %tmp9, 31 + // ; %carry = and i32 %tmp10, 1 + // ; %tmp11 = and i32 %tmp10, %divisor + // ; %r = sub i32 %tmp7, %tmp11 + // ; %sr_2 = add i32 %sr_3, -1 + // ; %tmp12 = icmp eq i32 %sr_2, 0 + // ; br i1 %tmp12, label %loop-exit, label %do-while + Builder.SetInsertPoint(DoWhile); + PHINode *Carry_1 = Builder.CreatePHI(DivTy, 2); + PHINode *SR_3 = Builder.CreatePHI(DivTy, 2); + PHINode *R_1 = Builder.CreatePHI(DivTy, 2); + PHINode *Q_2 = Builder.CreatePHI(DivTy, 2); + Value *Tmp5 = Builder.CreateShl(R_1, One); + Value *Tmp6 = Builder.CreateLShr(Q_2, MSB); + Value *Tmp7 = Builder.CreateOr(Tmp5, Tmp6); + Value *Tmp8 = Builder.CreateShl(Q_2, One); + Value *Q_1 = Builder.CreateOr(Carry_1, Tmp8); + Value *Tmp9 = Builder.CreateSub(Tmp4, Tmp7); + Value *Tmp10 = Builder.CreateAShr(Tmp9, MSB); + Value *Carry = Builder.CreateAnd(Tmp10, One); + Value *Tmp11 = Builder.CreateAnd(Tmp10, Divisor); + Value *R = Builder.CreateSub(Tmp7, Tmp11); + Value *SR_2 = Builder.CreateAdd(SR_3, NegOne); + Value *Tmp12 = Builder.CreateICmpEQ(SR_2, Zero); + Builder.CreateCondBr(Tmp12, LoopExit, DoWhile); + + // ; loop-exit: ; preds = %do-while, %bb1 + // ; %carry_2 = phi i32 [ 0, %bb1 ], [ %carry, %do-while ] + // ; %q_3 = phi i32 [ %q, %bb1 ], [ %q_1, %do-while ] + // ; %tmp13 = shl i32 %q_3, 1 + // ; %q_4 = or i32 %carry_2, %tmp13 + // ; br label %end + Builder.SetInsertPoint(LoopExit); + PHINode *Carry_2 = Builder.CreatePHI(DivTy, 2); + PHINode *Q_3 = Builder.CreatePHI(DivTy, 2); + Value *Tmp13 = Builder.CreateShl(Q_3, One); + Value *Q_4 = Builder.CreateOr(Carry_2, Tmp13); + Builder.CreateBr(End); + + // ; end: ; preds = %loop-exit, %special-cases + // ; %q_5 = phi i32 [ %q_4, %loop-exit ], [ %retVal, %special-cases ] + // ; ret i32 %q_5 + Builder.SetInsertPoint(End, End->begin()); + PHINode *Q_5 = Builder.CreatePHI(DivTy, 2); + + // Populate the Phis, since all values have now been created. Our Phis were: + // ; %carry_1 = phi i32 [ 0, %preheader ], [ %carry, %do-while ] + Carry_1->addIncoming(Zero, Preheader); + Carry_1->addIncoming(Carry, DoWhile); + // ; %sr_3 = phi i32 [ %sr_1, %preheader ], [ %sr_2, %do-while ] + SR_3->addIncoming(SR_1, Preheader); + SR_3->addIncoming(SR_2, DoWhile); + // ; %r_1 = phi i32 [ %tmp3, %preheader ], [ %r, %do-while ] + R_1->addIncoming(Tmp3, Preheader); + R_1->addIncoming(R, DoWhile); + // ; %q_2 = phi i32 [ %q, %preheader ], [ %q_1, %do-while ] + Q_2->addIncoming(Q, Preheader); + Q_2->addIncoming(Q_1, DoWhile); + // ; %carry_2 = phi i32 [ 0, %bb1 ], [ %carry, %do-while ] + Carry_2->addIncoming(Zero, BB1); + Carry_2->addIncoming(Carry, DoWhile); + // ; %q_3 = phi i32 [ %q, %bb1 ], [ %q_1, %do-while ] + Q_3->addIncoming(Q, BB1); + Q_3->addIncoming(Q_1, DoWhile); + // ; %q_5 = phi i32 [ %q_4, %loop-exit ], [ %retVal, %special-cases ] + Q_5->addIncoming(Q_4, LoopExit); + Q_5->addIncoming(RetVal, SpecialCases); + + return Q_5; +} + +/// Generate code to calculate the remainder of two integers, replacing Rem with +/// the generated code. This currently generates code using the udiv expansion, +/// but future work includes generating more specialized code, e.g. when more +/// information about the operands are known. +/// +/// Replace Rem with generated code. +bool llvm::expandRemainder(BinaryOperator *Rem) { + assert((Rem->getOpcode() == Instruction::SRem || + Rem->getOpcode() == Instruction::URem) && + "Trying to expand remainder from a non-remainder function"); + + IRBuilder<> Builder(Rem); + + assert(!Rem->getType()->isVectorTy() && "Div over vectors not supported"); + + // First prepare the sign if it's a signed remainder + if (Rem->getOpcode() == Instruction::SRem) { + Value *Remainder = generateSignedRemainderCode(Rem->getOperand(0), + Rem->getOperand(1), Builder); + + // Check whether this is the insert point while Rem is still valid. + bool IsInsertPoint = Rem->getIterator() == Builder.GetInsertPoint(); + Rem->replaceAllUsesWith(Remainder); + Rem->dropAllReferences(); + Rem->eraseFromParent(); + + // If we didn't actually generate an urem instruction, we're done + // This happens for example if the input were constant. In this case the + // Builder insertion point was unchanged + if (IsInsertPoint) + return true; + + BinaryOperator *BO = dyn_cast<BinaryOperator>(Builder.GetInsertPoint()); + Rem = BO; + } + + Value *Remainder = generatedUnsignedRemainderCode(Rem->getOperand(0), + Rem->getOperand(1), + Builder); + + Rem->replaceAllUsesWith(Remainder); + Rem->dropAllReferences(); + Rem->eraseFromParent(); + + // Expand the udiv + if (BinaryOperator *UDiv = dyn_cast<BinaryOperator>(Builder.GetInsertPoint())) { + assert(UDiv->getOpcode() == Instruction::UDiv && "Non-udiv in expansion?"); + expandDivision(UDiv); + } + + return true; +} + +/// Generate code to divide two integers, replacing Div with the generated +/// code. This currently generates code similarly to compiler-rt's +/// implementations, but future work includes generating more specialized code +/// when more information about the operands are known. +/// +/// Replace Div with generated code. +bool llvm::expandDivision(BinaryOperator *Div) { + assert((Div->getOpcode() == Instruction::SDiv || + Div->getOpcode() == Instruction::UDiv) && + "Trying to expand division from a non-division function"); + + IRBuilder<> Builder(Div); + + assert(!Div->getType()->isVectorTy() && "Div over vectors not supported"); + + // First prepare the sign if it's a signed division + if (Div->getOpcode() == Instruction::SDiv) { + // Lower the code to unsigned division, and reset Div to point to the udiv. + Value *Quotient = generateSignedDivisionCode(Div->getOperand(0), + Div->getOperand(1), Builder); + + // Check whether this is the insert point while Div is still valid. + bool IsInsertPoint = Div->getIterator() == Builder.GetInsertPoint(); + Div->replaceAllUsesWith(Quotient); + Div->dropAllReferences(); + Div->eraseFromParent(); + + // If we didn't actually generate an udiv instruction, we're done + // This happens for example if the input were constant. In this case the + // Builder insertion point was unchanged + if (IsInsertPoint) + return true; + + BinaryOperator *BO = dyn_cast<BinaryOperator>(Builder.GetInsertPoint()); + Div = BO; + } + + // Insert the unsigned division code + Value *Quotient = generateUnsignedDivisionCode(Div->getOperand(0), + Div->getOperand(1), + Builder); + Div->replaceAllUsesWith(Quotient); + Div->dropAllReferences(); + Div->eraseFromParent(); + + return true; +} + +/// Generate code to compute the remainder of two integers of bitwidth up to +/// 32 bits. Uses the above routines and extends the inputs/truncates the +/// outputs to operate in 32 bits; that is, these routines are good for targets +/// that have no or very little suppport for smaller than 32 bit integer +/// arithmetic. +/// +/// Replace Rem with emulation code. +bool llvm::expandRemainderUpTo32Bits(BinaryOperator *Rem) { + assert((Rem->getOpcode() == Instruction::SRem || + Rem->getOpcode() == Instruction::URem) && + "Trying to expand remainder from a non-remainder function"); + + Type *RemTy = Rem->getType(); + assert(!RemTy->isVectorTy() && "Div over vectors not supported"); + + unsigned RemTyBitWidth = RemTy->getIntegerBitWidth(); + + assert(RemTyBitWidth <= 32 && + "Div of bitwidth greater than 32 not supported"); + + if (RemTyBitWidth == 32) + return expandRemainder(Rem); + + // If bitwidth smaller than 32 extend inputs, extend output and proceed + // with 32 bit division. + IRBuilder<> Builder(Rem); + + Value *ExtDividend; + Value *ExtDivisor; + Value *ExtRem; + Value *Trunc; + Type *Int32Ty = Builder.getInt32Ty(); + + if (Rem->getOpcode() == Instruction::SRem) { + ExtDividend = Builder.CreateSExt(Rem->getOperand(0), Int32Ty); + ExtDivisor = Builder.CreateSExt(Rem->getOperand(1), Int32Ty); + ExtRem = Builder.CreateSRem(ExtDividend, ExtDivisor); + } else { + ExtDividend = Builder.CreateZExt(Rem->getOperand(0), Int32Ty); + ExtDivisor = Builder.CreateZExt(Rem->getOperand(1), Int32Ty); + ExtRem = Builder.CreateURem(ExtDividend, ExtDivisor); + } + Trunc = Builder.CreateTrunc(ExtRem, RemTy); + + Rem->replaceAllUsesWith(Trunc); + Rem->dropAllReferences(); + Rem->eraseFromParent(); + + return expandRemainder(cast<BinaryOperator>(ExtRem)); +} + +/// Generate code to compute the remainder of two integers of bitwidth up to +/// 64 bits. Uses the above routines and extends the inputs/truncates the +/// outputs to operate in 64 bits. +/// +/// Replace Rem with emulation code. +bool llvm::expandRemainderUpTo64Bits(BinaryOperator *Rem) { + assert((Rem->getOpcode() == Instruction::SRem || + Rem->getOpcode() == Instruction::URem) && + "Trying to expand remainder from a non-remainder function"); + + Type *RemTy = Rem->getType(); + assert(!RemTy->isVectorTy() && "Div over vectors not supported"); + + unsigned RemTyBitWidth = RemTy->getIntegerBitWidth(); + + if (RemTyBitWidth >= 64) + return expandRemainder(Rem); + + // If bitwidth smaller than 64 extend inputs, extend output and proceed + // with 64 bit division. + IRBuilder<> Builder(Rem); + + Value *ExtDividend; + Value *ExtDivisor; + Value *ExtRem; + Value *Trunc; + Type *Int64Ty = Builder.getInt64Ty(); + + if (Rem->getOpcode() == Instruction::SRem) { + ExtDividend = Builder.CreateSExt(Rem->getOperand(0), Int64Ty); + ExtDivisor = Builder.CreateSExt(Rem->getOperand(1), Int64Ty); + ExtRem = Builder.CreateSRem(ExtDividend, ExtDivisor); + } else { + ExtDividend = Builder.CreateZExt(Rem->getOperand(0), Int64Ty); + ExtDivisor = Builder.CreateZExt(Rem->getOperand(1), Int64Ty); + ExtRem = Builder.CreateURem(ExtDividend, ExtDivisor); + } + Trunc = Builder.CreateTrunc(ExtRem, RemTy); + + Rem->replaceAllUsesWith(Trunc); + Rem->dropAllReferences(); + Rem->eraseFromParent(); + + return expandRemainder(cast<BinaryOperator>(ExtRem)); +} + +/// Generate code to divide two integers of bitwidth up to 32 bits. Uses the +/// above routines and extends the inputs/truncates the outputs to operate +/// in 32 bits; that is, these routines are good for targets that have no +/// or very little support for smaller than 32 bit integer arithmetic. +/// +/// Replace Div with emulation code. +bool llvm::expandDivisionUpTo32Bits(BinaryOperator *Div) { + assert((Div->getOpcode() == Instruction::SDiv || + Div->getOpcode() == Instruction::UDiv) && + "Trying to expand division from a non-division function"); + + Type *DivTy = Div->getType(); + assert(!DivTy->isVectorTy() && "Div over vectors not supported"); + + unsigned DivTyBitWidth = DivTy->getIntegerBitWidth(); + + assert(DivTyBitWidth <= 32 && "Div of bitwidth greater than 32 not supported"); + + if (DivTyBitWidth == 32) + return expandDivision(Div); + + // If bitwidth smaller than 32 extend inputs, extend output and proceed + // with 32 bit division. + IRBuilder<> Builder(Div); + + Value *ExtDividend; + Value *ExtDivisor; + Value *ExtDiv; + Value *Trunc; + Type *Int32Ty = Builder.getInt32Ty(); + + if (Div->getOpcode() == Instruction::SDiv) { + ExtDividend = Builder.CreateSExt(Div->getOperand(0), Int32Ty); + ExtDivisor = Builder.CreateSExt(Div->getOperand(1), Int32Ty); + ExtDiv = Builder.CreateSDiv(ExtDividend, ExtDivisor); + } else { + ExtDividend = Builder.CreateZExt(Div->getOperand(0), Int32Ty); + ExtDivisor = Builder.CreateZExt(Div->getOperand(1), Int32Ty); + ExtDiv = Builder.CreateUDiv(ExtDividend, ExtDivisor); + } + Trunc = Builder.CreateTrunc(ExtDiv, DivTy); + + Div->replaceAllUsesWith(Trunc); + Div->dropAllReferences(); + Div->eraseFromParent(); + + return expandDivision(cast<BinaryOperator>(ExtDiv)); +} + +/// Generate code to divide two integers of bitwidth up to 64 bits. Uses the +/// above routines and extends the inputs/truncates the outputs to operate +/// in 64 bits. +/// +/// Replace Div with emulation code. +bool llvm::expandDivisionUpTo64Bits(BinaryOperator *Div) { + assert((Div->getOpcode() == Instruction::SDiv || + Div->getOpcode() == Instruction::UDiv) && + "Trying to expand division from a non-division function"); + + Type *DivTy = Div->getType(); + assert(!DivTy->isVectorTy() && "Div over vectors not supported"); + + unsigned DivTyBitWidth = DivTy->getIntegerBitWidth(); + + if (DivTyBitWidth >= 64) + return expandDivision(Div); + + // If bitwidth smaller than 64 extend inputs, extend output and proceed + // with 64 bit division. + IRBuilder<> Builder(Div); + + Value *ExtDividend; + Value *ExtDivisor; + Value *ExtDiv; + Value *Trunc; + Type *Int64Ty = Builder.getInt64Ty(); + + if (Div->getOpcode() == Instruction::SDiv) { + ExtDividend = Builder.CreateSExt(Div->getOperand(0), Int64Ty); + ExtDivisor = Builder.CreateSExt(Div->getOperand(1), Int64Ty); + ExtDiv = Builder.CreateSDiv(ExtDividend, ExtDivisor); + } else { + ExtDividend = Builder.CreateZExt(Div->getOperand(0), Int64Ty); + ExtDivisor = Builder.CreateZExt(Div->getOperand(1), Int64Ty); + ExtDiv = Builder.CreateUDiv(ExtDividend, ExtDivisor); + } + Trunc = Builder.CreateTrunc(ExtDiv, DivTy); + + Div->replaceAllUsesWith(Trunc); + Div->dropAllReferences(); + Div->eraseFromParent(); + + return expandDivision(cast<BinaryOperator>(ExtDiv)); +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/LCSSA.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/LCSSA.cpp new file mode 100644 index 0000000000..af79dc456e --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/LCSSA.cpp @@ -0,0 +1,519 @@ +//===-- LCSSA.cpp - Convert loops into loop-closed SSA form ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass transforms loops by placing phi nodes at the end of the loops for +// all values that are live across the loop boundary. For example, it turns +// the left into the right code: +// +// for (...) for (...) +// if (c) if (c) +// X1 = ... X1 = ... +// else else +// X2 = ... X2 = ... +// X3 = phi(X1, X2) X3 = phi(X1, X2) +// ... = X3 + 4 X4 = phi(X3) +// ... = X4 + 4 +// +// This is still valid LLVM; the extra phi nodes are purely redundant, and will +// be trivially eliminated by InstCombine. The major benefit of this +// transformation is that it makes many other loop optimizations, such as +// LoopUnswitching, simpler. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/LCSSA.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/PredIteratorCache.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/Utils.h" +#include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" +using namespace llvm; + +#define DEBUG_TYPE "lcssa" + +STATISTIC(NumLCSSA, "Number of live out of a loop variables"); + +#ifdef EXPENSIVE_CHECKS +static bool VerifyLoopLCSSA = true; +#else +static bool VerifyLoopLCSSA = false; +#endif +static cl::opt<bool, true> + VerifyLoopLCSSAFlag("verify-loop-lcssa", cl::location(VerifyLoopLCSSA), + cl::Hidden, + cl::desc("Verify loop lcssa form (time consuming)")); + +/// Return true if the specified block is in the list. +static bool isExitBlock(BasicBlock *BB, + const SmallVectorImpl<BasicBlock *> &ExitBlocks) { + return is_contained(ExitBlocks, BB); +} + +/// For every instruction from the worklist, check to see if it has any uses +/// that are outside the current loop. If so, insert LCSSA PHI nodes and +/// rewrite the uses. +bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist, + const DominatorTree &DT, const LoopInfo &LI, + ScalarEvolution *SE, IRBuilderBase &Builder, + SmallVectorImpl<PHINode *> *PHIsToRemove) { + SmallVector<Use *, 16> UsesToRewrite; + SmallSetVector<PHINode *, 16> LocalPHIsToRemove; + PredIteratorCache PredCache; + bool Changed = false; + + IRBuilderBase::InsertPointGuard InsertPtGuard(Builder); + + // Cache the Loop ExitBlocks across this loop. We expect to get a lot of + // instructions within the same loops, computing the exit blocks is + // expensive, and we're not mutating the loop structure. + SmallDenseMap<Loop*, SmallVector<BasicBlock *,1>> LoopExitBlocks; + + while (!Worklist.empty()) { + UsesToRewrite.clear(); + + Instruction *I = Worklist.pop_back_val(); + assert(!I->getType()->isTokenTy() && "Tokens shouldn't be in the worklist"); + BasicBlock *InstBB = I->getParent(); + Loop *L = LI.getLoopFor(InstBB); + assert(L && "Instruction belongs to a BB that's not part of a loop"); + if (!LoopExitBlocks.count(L)) + L->getExitBlocks(LoopExitBlocks[L]); + assert(LoopExitBlocks.count(L)); + const SmallVectorImpl<BasicBlock *> &ExitBlocks = LoopExitBlocks[L]; + + if (ExitBlocks.empty()) + continue; + + for (Use &U : make_early_inc_range(I->uses())) { + Instruction *User = cast<Instruction>(U.getUser()); + BasicBlock *UserBB = User->getParent(); + + // Skip uses in unreachable blocks. + if (!DT.isReachableFromEntry(UserBB)) { + U.set(PoisonValue::get(I->getType())); + continue; + } + + // For practical purposes, we consider that the use in a PHI + // occurs in the respective predecessor block. For more info, + // see the `phi` doc in LangRef and the LCSSA doc. + if (auto *PN = dyn_cast<PHINode>(User)) + UserBB = PN->getIncomingBlock(U); + + if (InstBB != UserBB && !L->contains(UserBB)) + UsesToRewrite.push_back(&U); + } + + // If there are no uses outside the loop, exit with no change. + if (UsesToRewrite.empty()) + continue; + + ++NumLCSSA; // We are applying the transformation + + // Invoke instructions are special in that their result value is not + // available along their unwind edge. The code below tests to see whether + // DomBB dominates the value, so adjust DomBB to the normal destination + // block, which is effectively where the value is first usable. + BasicBlock *DomBB = InstBB; + if (auto *Inv = dyn_cast<InvokeInst>(I)) + DomBB = Inv->getNormalDest(); + + const DomTreeNode *DomNode = DT.getNode(DomBB); + + SmallVector<PHINode *, 16> AddedPHIs; + SmallVector<PHINode *, 8> PostProcessPHIs; + + SmallVector<PHINode *, 4> InsertedPHIs; + SSAUpdater SSAUpdate(&InsertedPHIs); + SSAUpdate.Initialize(I->getType(), I->getName()); + + // Force re-computation of I, as some users now need to use the new PHI + // node. + if (SE) + SE->forgetValue(I); + + // Insert the LCSSA phi's into all of the exit blocks dominated by the + // value, and add them to the Phi's map. + for (BasicBlock *ExitBB : ExitBlocks) { + if (!DT.dominates(DomNode, DT.getNode(ExitBB))) + continue; + + // If we already inserted something for this BB, don't reprocess it. + if (SSAUpdate.HasValueForBlock(ExitBB)) + continue; + Builder.SetInsertPoint(&ExitBB->front()); + PHINode *PN = Builder.CreatePHI(I->getType(), PredCache.size(ExitBB), + I->getName() + ".lcssa"); + // Get the debug location from the original instruction. + PN->setDebugLoc(I->getDebugLoc()); + + // Add inputs from inside the loop for this PHI. This is valid + // because `I` dominates `ExitBB` (checked above). This implies + // that every incoming block/edge is dominated by `I` as well, + // i.e. we can add uses of `I` to those incoming edges/append to the incoming + // blocks without violating the SSA dominance property. + for (BasicBlock *Pred : PredCache.get(ExitBB)) { + PN->addIncoming(I, Pred); + + // If the exit block has a predecessor not within the loop, arrange for + // the incoming value use corresponding to that predecessor to be + // rewritten in terms of a different LCSSA PHI. + if (!L->contains(Pred)) + UsesToRewrite.push_back( + &PN->getOperandUse(PN->getOperandNumForIncomingValue( + PN->getNumIncomingValues() - 1))); + } + + AddedPHIs.push_back(PN); + + // Remember that this phi makes the value alive in this block. + SSAUpdate.AddAvailableValue(ExitBB, PN); + + // LoopSimplify might fail to simplify some loops (e.g. when indirect + // branches are involved). In such situations, it might happen that an + // exit for Loop L1 is the header of a disjoint Loop L2. Thus, when we + // create PHIs in such an exit block, we are also inserting PHIs into L2's + // header. This could break LCSSA form for L2 because these inserted PHIs + // can also have uses outside of L2. Remember all PHIs in such situation + // as to revisit than later on. FIXME: Remove this if indirectbr support + // into LoopSimplify gets improved. + if (auto *OtherLoop = LI.getLoopFor(ExitBB)) + if (!L->contains(OtherLoop)) + PostProcessPHIs.push_back(PN); + } + + // Rewrite all uses outside the loop in terms of the new PHIs we just + // inserted. + for (Use *UseToRewrite : UsesToRewrite) { + Instruction *User = cast<Instruction>(UseToRewrite->getUser()); + BasicBlock *UserBB = User->getParent(); + + // For practical purposes, we consider that the use in a PHI + // occurs in the respective predecessor block. For more info, + // see the `phi` doc in LangRef and the LCSSA doc. + if (auto *PN = dyn_cast<PHINode>(User)) + UserBB = PN->getIncomingBlock(*UseToRewrite); + + // If this use is in an exit block, rewrite to use the newly inserted PHI. + // This is required for correctness because SSAUpdate doesn't handle uses + // in the same block. It assumes the PHI we inserted is at the end of the + // block. + if (isa<PHINode>(UserBB->begin()) && isExitBlock(UserBB, ExitBlocks)) { + UseToRewrite->set(&UserBB->front()); + continue; + } + + // If we added a single PHI, it must dominate all uses and we can directly + // rename it. + if (AddedPHIs.size() == 1) { + UseToRewrite->set(AddedPHIs[0]); + continue; + } + + // Otherwise, do full PHI insertion. + SSAUpdate.RewriteUse(*UseToRewrite); + } + + SmallVector<DbgValueInst *, 4> DbgValues; + llvm::findDbgValues(DbgValues, I); + + // Update pre-existing debug value uses that reside outside the loop. + for (auto *DVI : DbgValues) { + BasicBlock *UserBB = DVI->getParent(); + if (InstBB == UserBB || L->contains(UserBB)) + continue; + // We currently only handle debug values residing in blocks that were + // traversed while rewriting the uses. If we inserted just a single PHI, + // we will handle all relevant debug values. + Value *V = AddedPHIs.size() == 1 ? AddedPHIs[0] + : SSAUpdate.FindValueForBlock(UserBB); + if (V) + DVI->replaceVariableLocationOp(I, V); + } + + // SSAUpdater might have inserted phi-nodes inside other loops. We'll need + // to post-process them to keep LCSSA form. + for (PHINode *InsertedPN : InsertedPHIs) { + if (auto *OtherLoop = LI.getLoopFor(InsertedPN->getParent())) + if (!L->contains(OtherLoop)) + PostProcessPHIs.push_back(InsertedPN); + } + + // Post process PHI instructions that were inserted into another disjoint + // loop and update their exits properly. + for (auto *PostProcessPN : PostProcessPHIs) + if (!PostProcessPN->use_empty()) + Worklist.push_back(PostProcessPN); + + // Keep track of PHI nodes that we want to remove because they did not have + // any uses rewritten. + for (PHINode *PN : AddedPHIs) + if (PN->use_empty()) + LocalPHIsToRemove.insert(PN); + + Changed = true; + } + + // Remove PHI nodes that did not have any uses rewritten or add them to + // PHIsToRemove, so the caller can remove them after some additional cleanup. + // We need to redo the use_empty() check here, because even if the PHI node + // wasn't used when added to LocalPHIsToRemove, later added PHI nodes can be + // using it. This cleanup is not guaranteed to handle trees/cycles of PHI + // nodes that only are used by each other. Such situations has only been + // noticed when the input IR contains unreachable code, and leaving some extra + // redundant PHI nodes in such situations is considered a minor problem. + if (PHIsToRemove) { + PHIsToRemove->append(LocalPHIsToRemove.begin(), LocalPHIsToRemove.end()); + } else { + for (PHINode *PN : LocalPHIsToRemove) + if (PN->use_empty()) + PN->eraseFromParent(); + } + return Changed; +} + +// Compute the set of BasicBlocks in the loop `L` dominating at least one exit. +static void computeBlocksDominatingExits( + Loop &L, const DominatorTree &DT, SmallVector<BasicBlock *, 8> &ExitBlocks, + SmallSetVector<BasicBlock *, 8> &BlocksDominatingExits) { + // We start from the exit blocks, as every block trivially dominates itself + // (not strictly). + SmallVector<BasicBlock *, 8> BBWorklist(ExitBlocks); + + while (!BBWorklist.empty()) { + BasicBlock *BB = BBWorklist.pop_back_val(); + + // Check if this is a loop header. If this is the case, we're done. + if (L.getHeader() == BB) + continue; + + // Otherwise, add its immediate predecessor in the dominator tree to the + // worklist, unless we visited it already. + BasicBlock *IDomBB = DT.getNode(BB)->getIDom()->getBlock(); + + // Exit blocks can have an immediate dominator not belonging to the + // loop. For an exit block to be immediately dominated by another block + // outside the loop, it implies not all paths from that dominator, to the + // exit block, go through the loop. + // Example: + // + // |---- A + // | | + // | B<-- + // | | | + // |---> C -- + // | + // D + // + // C is the exit block of the loop and it's immediately dominated by A, + // which doesn't belong to the loop. + if (!L.contains(IDomBB)) + continue; + + if (BlocksDominatingExits.insert(IDomBB)) + BBWorklist.push_back(IDomBB); + } +} + +bool llvm::formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI, + ScalarEvolution *SE) { + bool Changed = false; + +#ifdef EXPENSIVE_CHECKS + // Verify all sub-loops are in LCSSA form already. + for (Loop *SubLoop: L) { + (void)SubLoop; // Silence unused variable warning. + assert(SubLoop->isRecursivelyLCSSAForm(DT, *LI) && "Subloop not in LCSSA!"); + } +#endif + + SmallVector<BasicBlock *, 8> ExitBlocks; + L.getExitBlocks(ExitBlocks); + if (ExitBlocks.empty()) + return false; + + SmallSetVector<BasicBlock *, 8> BlocksDominatingExits; + + // We want to avoid use-scanning leveraging dominance informations. + // If a block doesn't dominate any of the loop exits, the none of the values + // defined in the loop can be used outside. + // We compute the set of blocks fullfilling the conditions in advance + // walking the dominator tree upwards until we hit a loop header. + computeBlocksDominatingExits(L, DT, ExitBlocks, BlocksDominatingExits); + + SmallVector<Instruction *, 8> Worklist; + + // Look at all the instructions in the loop, checking to see if they have uses + // outside the loop. If so, put them into the worklist to rewrite those uses. + for (BasicBlock *BB : BlocksDominatingExits) { + // Skip blocks that are part of any sub-loops, they must be in LCSSA + // already. + if (LI->getLoopFor(BB) != &L) + continue; + for (Instruction &I : *BB) { + // Reject two common cases fast: instructions with no uses (like stores) + // and instructions with one use that is in the same block as this. + if (I.use_empty() || + (I.hasOneUse() && I.user_back()->getParent() == BB && + !isa<PHINode>(I.user_back()))) + continue; + + // Tokens cannot be used in PHI nodes, so we skip over them. + // We can run into tokens which are live out of a loop with catchswitch + // instructions in Windows EH if the catchswitch has one catchpad which + // is inside the loop and another which is not. + if (I.getType()->isTokenTy()) + continue; + + Worklist.push_back(&I); + } + } + + IRBuilder<> Builder(L.getHeader()->getContext()); + Changed = formLCSSAForInstructions(Worklist, DT, *LI, SE, Builder); + + // If we modified the code, remove any caches about the loop from SCEV to + // avoid dangling entries. + // FIXME: This is a big hammer, can we clear the cache more selectively? + if (SE && Changed) + SE->forgetLoop(&L); + + assert(L.isLCSSAForm(DT)); + + return Changed; +} + +/// Process a loop nest depth first. +bool llvm::formLCSSARecursively(Loop &L, const DominatorTree &DT, + const LoopInfo *LI, ScalarEvolution *SE) { + bool Changed = false; + + // Recurse depth-first through inner loops. + for (Loop *SubLoop : L.getSubLoops()) + Changed |= formLCSSARecursively(*SubLoop, DT, LI, SE); + + Changed |= formLCSSA(L, DT, LI, SE); + return Changed; +} + +/// Process all loops in the function, inner-most out. +static bool formLCSSAOnAllLoops(const LoopInfo *LI, const DominatorTree &DT, + ScalarEvolution *SE) { + bool Changed = false; + for (const auto &L : *LI) + Changed |= formLCSSARecursively(*L, DT, LI, SE); + return Changed; +} + +namespace { +struct LCSSAWrapperPass : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + LCSSAWrapperPass() : FunctionPass(ID) { + initializeLCSSAWrapperPassPass(*PassRegistry::getPassRegistry()); + } + + // Cached analysis information for the current function. + DominatorTree *DT; + LoopInfo *LI; + ScalarEvolution *SE; + + bool runOnFunction(Function &F) override; + void verifyAnalysis() const override { + // This check is very expensive. On the loop intensive compiles it may cause + // up to 10x slowdown. Currently it's disabled by default. LPPassManager + // always does limited form of the LCSSA verification. Similar reasoning + // was used for the LoopInfo verifier. + if (VerifyLoopLCSSA) { + assert(all_of(*LI, + [&](Loop *L) { + return L->isRecursivelyLCSSAForm(*DT, *LI); + }) && + "LCSSA form is broken!"); + } + }; + + /// This transformation requires natural loop information & requires that + /// loop preheaders be inserted into the CFG. It maintains both of these, + /// as well as the CFG. It also requires dominator information. + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addRequired<LoopInfoWrapperPass>(); + AU.addPreservedID(LoopSimplifyID); + AU.addPreserved<AAResultsWrapperPass>(); + AU.addPreserved<BasicAAWrapperPass>(); + AU.addPreserved<GlobalsAAWrapperPass>(); + AU.addPreserved<ScalarEvolutionWrapperPass>(); + AU.addPreserved<SCEVAAWrapperPass>(); + AU.addPreserved<BranchProbabilityInfoWrapperPass>(); + AU.addPreserved<MemorySSAWrapperPass>(); + + // This is needed to perform LCSSA verification inside LPPassManager + AU.addRequired<LCSSAVerificationPass>(); + AU.addPreserved<LCSSAVerificationPass>(); + } +}; +} + +char LCSSAWrapperPass::ID = 0; +INITIALIZE_PASS_BEGIN(LCSSAWrapperPass, "lcssa", "Loop-Closed SSA Form Pass", + false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LCSSAVerificationPass) +INITIALIZE_PASS_END(LCSSAWrapperPass, "lcssa", "Loop-Closed SSA Form Pass", + false, false) + +Pass *llvm::createLCSSAPass() { return new LCSSAWrapperPass(); } +char &llvm::LCSSAID = LCSSAWrapperPass::ID; + +/// Transform \p F into loop-closed SSA form. +bool LCSSAWrapperPass::runOnFunction(Function &F) { + LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); + DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>(); + SE = SEWP ? &SEWP->getSE() : nullptr; + + return formLCSSAOnAllLoops(LI, *DT, SE); +} + +PreservedAnalyses LCSSAPass::run(Function &F, FunctionAnalysisManager &AM) { + auto &LI = AM.getResult<LoopAnalysis>(F); + auto &DT = AM.getResult<DominatorTreeAnalysis>(F); + auto *SE = AM.getCachedResult<ScalarEvolutionAnalysis>(F); + if (!formLCSSAOnAllLoops(&LI, DT, SE)) + return PreservedAnalyses::all(); + + PreservedAnalyses PA; + PA.preserveSet<CFGAnalyses>(); + PA.preserve<ScalarEvolutionAnalysis>(); + // BPI maps terminators to probabilities, since we don't modify the CFG, no + // updates are needed to preserve it. + PA.preserve<BranchProbabilityAnalysis>(); + PA.preserve<MemorySSAAnalysis>(); + return PA; +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/LibCallsShrinkWrap.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/LibCallsShrinkWrap.cpp new file mode 100644 index 0000000000..5dd469c7af --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/LibCallsShrinkWrap.cpp @@ -0,0 +1,562 @@ +//===-- LibCallsShrinkWrap.cpp ----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass shrink-wraps a call to function if the result is not used. +// The call can set errno but is otherwise side effect free. For example: +// sqrt(val); +// is transformed to +// if (val < 0) +// sqrt(val); +// Even if the result of library call is not being used, the compiler cannot +// safely delete the call because the function can set errno on error +// conditions. +// Note in many functions, the error condition solely depends on the incoming +// parameter. In this optimization, we can generate the condition can lead to +// the errno to shrink-wrap the call. Since the chances of hitting the error +// condition is low, the runtime call is effectively eliminated. +// +// These partially dead calls are usually results of C++ abstraction penalty +// exposed by inlining. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/LibCallsShrinkWrap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstVisitor.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" + +#include <cmath> + +using namespace llvm; + +#define DEBUG_TYPE "libcalls-shrinkwrap" + +STATISTIC(NumWrappedOneCond, "Number of One-Condition Wrappers Inserted"); +STATISTIC(NumWrappedTwoCond, "Number of Two-Condition Wrappers Inserted"); + +namespace { +class LibCallsShrinkWrapLegacyPass : public FunctionPass { +public: + static char ID; // Pass identification, replacement for typeid + explicit LibCallsShrinkWrapLegacyPass() : FunctionPass(ID) { + initializeLibCallsShrinkWrapLegacyPassPass( + *PassRegistry::getPassRegistry()); + } + void getAnalysisUsage(AnalysisUsage &AU) const override; + bool runOnFunction(Function &F) override; +}; +} + +char LibCallsShrinkWrapLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(LibCallsShrinkWrapLegacyPass, "libcalls-shrinkwrap", + "Conditionally eliminate dead library calls", false, + false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_END(LibCallsShrinkWrapLegacyPass, "libcalls-shrinkwrap", + "Conditionally eliminate dead library calls", false, false) + +namespace { +class LibCallsShrinkWrap : public InstVisitor<LibCallsShrinkWrap> { +public: + LibCallsShrinkWrap(const TargetLibraryInfo &TLI, DominatorTree *DT) + : TLI(TLI), DT(DT){}; + void visitCallInst(CallInst &CI) { checkCandidate(CI); } + bool perform() { + bool Changed = false; + for (auto &CI : WorkList) { + LLVM_DEBUG(dbgs() << "CDCE calls: " << CI->getCalledFunction()->getName() + << "\n"); + if (perform(CI)) { + Changed = true; + LLVM_DEBUG(dbgs() << "Transformed\n"); + } + } + return Changed; + } + +private: + bool perform(CallInst *CI); + void checkCandidate(CallInst &CI); + void shrinkWrapCI(CallInst *CI, Value *Cond); + bool performCallDomainErrorOnly(CallInst *CI, const LibFunc &Func); + bool performCallErrors(CallInst *CI, const LibFunc &Func); + bool performCallRangeErrorOnly(CallInst *CI, const LibFunc &Func); + Value *generateOneRangeCond(CallInst *CI, const LibFunc &Func); + Value *generateTwoRangeCond(CallInst *CI, const LibFunc &Func); + Value *generateCondForPow(CallInst *CI, const LibFunc &Func); + + // Create an OR of two conditions. + Value *createOrCond(CallInst *CI, CmpInst::Predicate Cmp, float Val, + CmpInst::Predicate Cmp2, float Val2) { + IRBuilder<> BBBuilder(CI); + Value *Arg = CI->getArgOperand(0); + auto Cond2 = createCond(BBBuilder, Arg, Cmp2, Val2); + auto Cond1 = createCond(BBBuilder, Arg, Cmp, Val); + return BBBuilder.CreateOr(Cond1, Cond2); + } + + // Create a single condition using IRBuilder. + Value *createCond(IRBuilder<> &BBBuilder, Value *Arg, CmpInst::Predicate Cmp, + float Val) { + Constant *V = ConstantFP::get(BBBuilder.getContext(), APFloat(Val)); + if (!Arg->getType()->isFloatTy()) + V = ConstantExpr::getFPExtend(V, Arg->getType()); + return BBBuilder.CreateFCmp(Cmp, Arg, V); + } + + // Create a single condition. + Value *createCond(CallInst *CI, CmpInst::Predicate Cmp, float Val) { + IRBuilder<> BBBuilder(CI); + Value *Arg = CI->getArgOperand(0); + return createCond(BBBuilder, Arg, Cmp, Val); + } + + const TargetLibraryInfo &TLI; + DominatorTree *DT; + SmallVector<CallInst *, 16> WorkList; +}; +} // end anonymous namespace + +// Perform the transformation to calls with errno set by domain error. +bool LibCallsShrinkWrap::performCallDomainErrorOnly(CallInst *CI, + const LibFunc &Func) { + Value *Cond = nullptr; + + switch (Func) { + case LibFunc_acos: // DomainError: (x < -1 || x > 1) + case LibFunc_acosf: // Same as acos + case LibFunc_acosl: // Same as acos + case LibFunc_asin: // DomainError: (x < -1 || x > 1) + case LibFunc_asinf: // Same as asin + case LibFunc_asinl: // Same as asin + { + ++NumWrappedTwoCond; + Cond = createOrCond(CI, CmpInst::FCMP_OLT, -1.0f, CmpInst::FCMP_OGT, 1.0f); + break; + } + case LibFunc_cos: // DomainError: (x == +inf || x == -inf) + case LibFunc_cosf: // Same as cos + case LibFunc_cosl: // Same as cos + case LibFunc_sin: // DomainError: (x == +inf || x == -inf) + case LibFunc_sinf: // Same as sin + case LibFunc_sinl: // Same as sin + { + ++NumWrappedTwoCond; + Cond = createOrCond(CI, CmpInst::FCMP_OEQ, INFINITY, CmpInst::FCMP_OEQ, + -INFINITY); + break; + } + case LibFunc_acosh: // DomainError: (x < 1) + case LibFunc_acoshf: // Same as acosh + case LibFunc_acoshl: // Same as acosh + { + ++NumWrappedOneCond; + Cond = createCond(CI, CmpInst::FCMP_OLT, 1.0f); + break; + } + case LibFunc_sqrt: // DomainError: (x < 0) + case LibFunc_sqrtf: // Same as sqrt + case LibFunc_sqrtl: // Same as sqrt + { + ++NumWrappedOneCond; + Cond = createCond(CI, CmpInst::FCMP_OLT, 0.0f); + break; + } + default: + return false; + } + shrinkWrapCI(CI, Cond); + return true; +} + +// Perform the transformation to calls with errno set by range error. +bool LibCallsShrinkWrap::performCallRangeErrorOnly(CallInst *CI, + const LibFunc &Func) { + Value *Cond = nullptr; + + switch (Func) { + case LibFunc_cosh: + case LibFunc_coshf: + case LibFunc_coshl: + case LibFunc_exp: + case LibFunc_expf: + case LibFunc_expl: + case LibFunc_exp10: + case LibFunc_exp10f: + case LibFunc_exp10l: + case LibFunc_exp2: + case LibFunc_exp2f: + case LibFunc_exp2l: + case LibFunc_sinh: + case LibFunc_sinhf: + case LibFunc_sinhl: { + Cond = generateTwoRangeCond(CI, Func); + break; + } + case LibFunc_expm1: // RangeError: (709, inf) + case LibFunc_expm1f: // RangeError: (88, inf) + case LibFunc_expm1l: // RangeError: (11356, inf) + { + Cond = generateOneRangeCond(CI, Func); + break; + } + default: + return false; + } + shrinkWrapCI(CI, Cond); + return true; +} + +// Perform the transformation to calls with errno set by combination of errors. +bool LibCallsShrinkWrap::performCallErrors(CallInst *CI, + const LibFunc &Func) { + Value *Cond = nullptr; + + switch (Func) { + case LibFunc_atanh: // DomainError: (x < -1 || x > 1) + // PoleError: (x == -1 || x == 1) + // Overall Cond: (x <= -1 || x >= 1) + case LibFunc_atanhf: // Same as atanh + case LibFunc_atanhl: // Same as atanh + { + ++NumWrappedTwoCond; + Cond = createOrCond(CI, CmpInst::FCMP_OLE, -1.0f, CmpInst::FCMP_OGE, 1.0f); + break; + } + case LibFunc_log: // DomainError: (x < 0) + // PoleError: (x == 0) + // Overall Cond: (x <= 0) + case LibFunc_logf: // Same as log + case LibFunc_logl: // Same as log + case LibFunc_log10: // Same as log + case LibFunc_log10f: // Same as log + case LibFunc_log10l: // Same as log + case LibFunc_log2: // Same as log + case LibFunc_log2f: // Same as log + case LibFunc_log2l: // Same as log + case LibFunc_logb: // Same as log + case LibFunc_logbf: // Same as log + case LibFunc_logbl: // Same as log + { + ++NumWrappedOneCond; + Cond = createCond(CI, CmpInst::FCMP_OLE, 0.0f); + break; + } + case LibFunc_log1p: // DomainError: (x < -1) + // PoleError: (x == -1) + // Overall Cond: (x <= -1) + case LibFunc_log1pf: // Same as log1p + case LibFunc_log1pl: // Same as log1p + { + ++NumWrappedOneCond; + Cond = createCond(CI, CmpInst::FCMP_OLE, -1.0f); + break; + } + case LibFunc_pow: // DomainError: x < 0 and y is noninteger + // PoleError: x == 0 and y < 0 + // RangeError: overflow or underflow + case LibFunc_powf: + case LibFunc_powl: { + Cond = generateCondForPow(CI, Func); + if (Cond == nullptr) + return false; + break; + } + default: + return false; + } + assert(Cond && "performCallErrors should not see an empty condition"); + shrinkWrapCI(CI, Cond); + return true; +} + +// Checks if CI is a candidate for shrinkwrapping and put it into work list if +// true. +void LibCallsShrinkWrap::checkCandidate(CallInst &CI) { + if (CI.isNoBuiltin()) + return; + // A possible improvement is to handle the calls with the return value being + // used. If there is API for fast libcall implementation without setting + // errno, we can use the same framework to direct/wrap the call to the fast + // API in the error free path, and leave the original call in the slow path. + if (!CI.use_empty()) + return; + + LibFunc Func; + Function *Callee = CI.getCalledFunction(); + if (!Callee) + return; + if (!TLI.getLibFunc(*Callee, Func) || !TLI.has(Func)) + return; + + if (CI.arg_empty()) + return; + // TODO: Handle long double in other formats. + Type *ArgType = CI.getArgOperand(0)->getType(); + if (!(ArgType->isFloatTy() || ArgType->isDoubleTy() || + ArgType->isX86_FP80Ty())) + return; + + WorkList.push_back(&CI); +} + +// Generate the upper bound condition for RangeError. +Value *LibCallsShrinkWrap::generateOneRangeCond(CallInst *CI, + const LibFunc &Func) { + float UpperBound; + switch (Func) { + case LibFunc_expm1: // RangeError: (709, inf) + UpperBound = 709.0f; + break; + case LibFunc_expm1f: // RangeError: (88, inf) + UpperBound = 88.0f; + break; + case LibFunc_expm1l: // RangeError: (11356, inf) + UpperBound = 11356.0f; + break; + default: + llvm_unreachable("Unhandled library call!"); + } + + ++NumWrappedOneCond; + return createCond(CI, CmpInst::FCMP_OGT, UpperBound); +} + +// Generate the lower and upper bound condition for RangeError. +Value *LibCallsShrinkWrap::generateTwoRangeCond(CallInst *CI, + const LibFunc &Func) { + float UpperBound, LowerBound; + switch (Func) { + case LibFunc_cosh: // RangeError: (x < -710 || x > 710) + case LibFunc_sinh: // Same as cosh + LowerBound = -710.0f; + UpperBound = 710.0f; + break; + case LibFunc_coshf: // RangeError: (x < -89 || x > 89) + case LibFunc_sinhf: // Same as coshf + LowerBound = -89.0f; + UpperBound = 89.0f; + break; + case LibFunc_coshl: // RangeError: (x < -11357 || x > 11357) + case LibFunc_sinhl: // Same as coshl + LowerBound = -11357.0f; + UpperBound = 11357.0f; + break; + case LibFunc_exp: // RangeError: (x < -745 || x > 709) + LowerBound = -745.0f; + UpperBound = 709.0f; + break; + case LibFunc_expf: // RangeError: (x < -103 || x > 88) + LowerBound = -103.0f; + UpperBound = 88.0f; + break; + case LibFunc_expl: // RangeError: (x < -11399 || x > 11356) + LowerBound = -11399.0f; + UpperBound = 11356.0f; + break; + case LibFunc_exp10: // RangeError: (x < -323 || x > 308) + LowerBound = -323.0f; + UpperBound = 308.0f; + break; + case LibFunc_exp10f: // RangeError: (x < -45 || x > 38) + LowerBound = -45.0f; + UpperBound = 38.0f; + break; + case LibFunc_exp10l: // RangeError: (x < -4950 || x > 4932) + LowerBound = -4950.0f; + UpperBound = 4932.0f; + break; + case LibFunc_exp2: // RangeError: (x < -1074 || x > 1023) + LowerBound = -1074.0f; + UpperBound = 1023.0f; + break; + case LibFunc_exp2f: // RangeError: (x < -149 || x > 127) + LowerBound = -149.0f; + UpperBound = 127.0f; + break; + case LibFunc_exp2l: // RangeError: (x < -16445 || x > 11383) + LowerBound = -16445.0f; + UpperBound = 11383.0f; + break; + default: + llvm_unreachable("Unhandled library call!"); + } + + ++NumWrappedTwoCond; + return createOrCond(CI, CmpInst::FCMP_OGT, UpperBound, CmpInst::FCMP_OLT, + LowerBound); +} + +// For pow(x,y), We only handle the following cases: +// (1) x is a constant && (x >= 1) && (x < MaxUInt8) +// Cond is: (y > 127) +// (2) x is a value coming from an integer type. +// (2.1) if x's bit_size == 8 +// Cond: (x <= 0 || y > 128) +// (2.2) if x's bit_size is 16 +// Cond: (x <= 0 || y > 64) +// (2.3) if x's bit_size is 32 +// Cond: (x <= 0 || y > 32) +// Support for powl(x,y) and powf(x,y) are TBD. +// +// Note that condition can be more conservative than the actual condition +// (i.e. we might invoke the calls that will not set the errno.). +// +Value *LibCallsShrinkWrap::generateCondForPow(CallInst *CI, + const LibFunc &Func) { + // FIXME: LibFunc_powf and powl TBD. + if (Func != LibFunc_pow) { + LLVM_DEBUG(dbgs() << "Not handled powf() and powl()\n"); + return nullptr; + } + + Value *Base = CI->getArgOperand(0); + Value *Exp = CI->getArgOperand(1); + IRBuilder<> BBBuilder(CI); + + // Constant Base case. + if (ConstantFP *CF = dyn_cast<ConstantFP>(Base)) { + double D = CF->getValueAPF().convertToDouble(); + if (D < 1.0f || D > APInt::getMaxValue(8).getZExtValue()) { + LLVM_DEBUG(dbgs() << "Not handled pow(): constant base out of range\n"); + return nullptr; + } + + ++NumWrappedOneCond; + Constant *V = ConstantFP::get(CI->getContext(), APFloat(127.0f)); + if (!Exp->getType()->isFloatTy()) + V = ConstantExpr::getFPExtend(V, Exp->getType()); + return BBBuilder.CreateFCmp(CmpInst::FCMP_OGT, Exp, V); + } + + // If the Base value coming from an integer type. + Instruction *I = dyn_cast<Instruction>(Base); + if (!I) { + LLVM_DEBUG(dbgs() << "Not handled pow(): FP type base\n"); + return nullptr; + } + unsigned Opcode = I->getOpcode(); + if (Opcode == Instruction::UIToFP || Opcode == Instruction::SIToFP) { + unsigned BW = I->getOperand(0)->getType()->getPrimitiveSizeInBits(); + float UpperV = 0.0f; + if (BW == 8) + UpperV = 128.0f; + else if (BW == 16) + UpperV = 64.0f; + else if (BW == 32) + UpperV = 32.0f; + else { + LLVM_DEBUG(dbgs() << "Not handled pow(): type too wide\n"); + return nullptr; + } + + ++NumWrappedTwoCond; + Constant *V = ConstantFP::get(CI->getContext(), APFloat(UpperV)); + Constant *V0 = ConstantFP::get(CI->getContext(), APFloat(0.0f)); + if (!Exp->getType()->isFloatTy()) + V = ConstantExpr::getFPExtend(V, Exp->getType()); + if (!Base->getType()->isFloatTy()) + V0 = ConstantExpr::getFPExtend(V0, Exp->getType()); + + Value *Cond = BBBuilder.CreateFCmp(CmpInst::FCMP_OGT, Exp, V); + Value *Cond0 = BBBuilder.CreateFCmp(CmpInst::FCMP_OLE, Base, V0); + return BBBuilder.CreateOr(Cond0, Cond); + } + LLVM_DEBUG(dbgs() << "Not handled pow(): base not from integer convert\n"); + return nullptr; +} + +// Wrap conditions that can potentially generate errno to the library call. +void LibCallsShrinkWrap::shrinkWrapCI(CallInst *CI, Value *Cond) { + assert(Cond != nullptr && "ShrinkWrapCI is not expecting an empty call inst"); + MDNode *BranchWeights = + MDBuilder(CI->getContext()).createBranchWeights(1, 2000); + + Instruction *NewInst = + SplitBlockAndInsertIfThen(Cond, CI, false, BranchWeights, DT); + BasicBlock *CallBB = NewInst->getParent(); + CallBB->setName("cdce.call"); + BasicBlock *SuccBB = CallBB->getSingleSuccessor(); + assert(SuccBB && "The split block should have a single successor"); + SuccBB->setName("cdce.end"); + CI->removeFromParent(); + CI->insertInto(CallBB, CallBB->getFirstInsertionPt()); + LLVM_DEBUG(dbgs() << "== Basic Block After =="); + LLVM_DEBUG(dbgs() << *CallBB->getSinglePredecessor() << *CallBB + << *CallBB->getSingleSuccessor() << "\n"); +} + +// Perform the transformation to a single candidate. +bool LibCallsShrinkWrap::perform(CallInst *CI) { + LibFunc Func; + Function *Callee = CI->getCalledFunction(); + assert(Callee && "perform() should apply to a non-empty callee"); + TLI.getLibFunc(*Callee, Func); + assert(Func && "perform() is not expecting an empty function"); + + if (performCallDomainErrorOnly(CI, Func) || performCallRangeErrorOnly(CI, Func)) + return true; + return performCallErrors(CI, Func); +} + +void LibCallsShrinkWrapLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreserved<DominatorTreeWrapperPass>(); + AU.addPreserved<GlobalsAAWrapperPass>(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); +} + +static bool runImpl(Function &F, const TargetLibraryInfo &TLI, + DominatorTree *DT) { + if (F.hasFnAttribute(Attribute::OptimizeForSize)) + return false; + LibCallsShrinkWrap CCDCE(TLI, DT); + CCDCE.visit(F); + bool Changed = CCDCE.perform(); + +// Verify the dominator after we've updated it locally. + assert(!DT || DT->verify(DominatorTree::VerificationLevel::Fast)); + return Changed; +} + +bool LibCallsShrinkWrapLegacyPass::runOnFunction(Function &F) { + auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); + auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); + auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; + return runImpl(F, TLI, DT); +} + +namespace llvm { +char &LibCallsShrinkWrapPassID = LibCallsShrinkWrapLegacyPass::ID; + +// Public interface to LibCallsShrinkWrap pass. +FunctionPass *createLibCallsShrinkWrapPass() { + return new LibCallsShrinkWrapLegacyPass(); +} + +PreservedAnalyses LibCallsShrinkWrapPass::run(Function &F, + FunctionAnalysisManager &FAM) { + auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F); + auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(F); + if (!runImpl(F, TLI, DT)) + return PreservedAnalyses::all(); + auto PA = PreservedAnalyses(); + PA.preserve<DominatorTreeAnalysis>(); + return PA; +} +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/Local.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/Local.cpp new file mode 100644 index 0000000000..31cdd2ee56 --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/Local.cpp @@ -0,0 +1,3518 @@ +//===- Local.cpp - Functions to perform local transformations -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This family of functions perform various local transformations to the +// program. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AssumeBundleQueries.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/DomTreeUpdater.h" +#include "llvm/Analysis/EHPersonalities.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/MemorySSAUpdater.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/VectorUtils.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/ConstantRange.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DIBuilder.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GetElementPtrTypeIterator.h" +#include "llvm/IR/GlobalObject.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsWebAssembly.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/IR/ProfDataUtils.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/ValueMapper.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <iterator> +#include <map> +#include <optional> +#include <utility> + +using namespace llvm; +using namespace llvm::PatternMatch; + +#define DEBUG_TYPE "local" + +STATISTIC(NumRemoved, "Number of unreachable basic blocks removed"); +STATISTIC(NumPHICSEs, "Number of PHI's that got CSE'd"); + +static cl::opt<bool> PHICSEDebugHash( + "phicse-debug-hash", +#ifdef EXPENSIVE_CHECKS + cl::init(true), +#else + cl::init(false), +#endif + cl::Hidden, + cl::desc("Perform extra assertion checking to verify that PHINodes's hash " + "function is well-behaved w.r.t. its isEqual predicate")); + +static cl::opt<unsigned> PHICSENumPHISmallSize( + "phicse-num-phi-smallsize", cl::init(32), cl::Hidden, + cl::desc( + "When the basic block contains not more than this number of PHI nodes, " + "perform a (faster!) exhaustive search instead of set-driven one.")); + +// Max recursion depth for collectBitParts used when detecting bswap and +// bitreverse idioms. +static const unsigned BitPartRecursionMaxDepth = 48; + +//===----------------------------------------------------------------------===// +// Local constant propagation. +// + +/// ConstantFoldTerminator - If a terminator instruction is predicated on a +/// constant value, convert it into an unconditional branch to the constant +/// destination. This is a nontrivial operation because the successors of this +/// basic block must have their PHI nodes updated. +/// Also calls RecursivelyDeleteTriviallyDeadInstructions() on any branch/switch +/// conditions and indirectbr addresses this might make dead if +/// DeleteDeadConditions is true. +bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, + const TargetLibraryInfo *TLI, + DomTreeUpdater *DTU) { + Instruction *T = BB->getTerminator(); + IRBuilder<> Builder(T); + + // Branch - See if we are conditional jumping on constant + if (auto *BI = dyn_cast<BranchInst>(T)) { + if (BI->isUnconditional()) return false; // Can't optimize uncond branch + + BasicBlock *Dest1 = BI->getSuccessor(0); + BasicBlock *Dest2 = BI->getSuccessor(1); + + if (Dest2 == Dest1) { // Conditional branch to same location? + // This branch matches something like this: + // br bool %cond, label %Dest, label %Dest + // and changes it into: br label %Dest + + // Let the basic block know that we are letting go of one copy of it. + assert(BI->getParent() && "Terminator not inserted in block!"); + Dest1->removePredecessor(BI->getParent()); + + // Replace the conditional branch with an unconditional one. + BranchInst *NewBI = Builder.CreateBr(Dest1); + + // Transfer the metadata to the new branch instruction. + NewBI->copyMetadata(*BI, {LLVMContext::MD_loop, LLVMContext::MD_dbg, + LLVMContext::MD_annotation}); + + Value *Cond = BI->getCondition(); + BI->eraseFromParent(); + if (DeleteDeadConditions) + RecursivelyDeleteTriviallyDeadInstructions(Cond, TLI); + return true; + } + + if (auto *Cond = dyn_cast<ConstantInt>(BI->getCondition())) { + // Are we branching on constant? + // YES. Change to unconditional branch... + BasicBlock *Destination = Cond->getZExtValue() ? Dest1 : Dest2; + BasicBlock *OldDest = Cond->getZExtValue() ? Dest2 : Dest1; + + // Let the basic block know that we are letting go of it. Based on this, + // it will adjust it's PHI nodes. + OldDest->removePredecessor(BB); + + // Replace the conditional branch with an unconditional one. + BranchInst *NewBI = Builder.CreateBr(Destination); + + // Transfer the metadata to the new branch instruction. + NewBI->copyMetadata(*BI, {LLVMContext::MD_loop, LLVMContext::MD_dbg, + LLVMContext::MD_annotation}); + + BI->eraseFromParent(); + if (DTU) + DTU->applyUpdates({{DominatorTree::Delete, BB, OldDest}}); + return true; + } + + return false; + } + + if (auto *SI = dyn_cast<SwitchInst>(T)) { + // If we are switching on a constant, we can convert the switch to an + // unconditional branch. + auto *CI = dyn_cast<ConstantInt>(SI->getCondition()); + BasicBlock *DefaultDest = SI->getDefaultDest(); + BasicBlock *TheOnlyDest = DefaultDest; + + // If the default is unreachable, ignore it when searching for TheOnlyDest. + if (isa<UnreachableInst>(DefaultDest->getFirstNonPHIOrDbg()) && + SI->getNumCases() > 0) { + TheOnlyDest = SI->case_begin()->getCaseSuccessor(); + } + + bool Changed = false; + + // Figure out which case it goes to. + for (auto i = SI->case_begin(), e = SI->case_end(); i != e;) { + // Found case matching a constant operand? + if (i->getCaseValue() == CI) { + TheOnlyDest = i->getCaseSuccessor(); + break; + } + + // Check to see if this branch is going to the same place as the default + // dest. If so, eliminate it as an explicit compare. + if (i->getCaseSuccessor() == DefaultDest) { + MDNode *MD = getValidBranchWeightMDNode(*SI); + unsigned NCases = SI->getNumCases(); + // Fold the case metadata into the default if there will be any branches + // left, unless the metadata doesn't match the switch. + if (NCases > 1 && MD) { + // Collect branch weights into a vector. + SmallVector<uint32_t, 8> Weights; + extractBranchWeights(MD, Weights); + + // Merge weight of this case to the default weight. + unsigned idx = i->getCaseIndex(); + // TODO: Add overflow check. + Weights[0] += Weights[idx+1]; + // Remove weight for this case. + std::swap(Weights[idx+1], Weights.back()); + Weights.pop_back(); + SI->setMetadata(LLVMContext::MD_prof, + MDBuilder(BB->getContext()). + createBranchWeights(Weights)); + } + // Remove this entry. + BasicBlock *ParentBB = SI->getParent(); + DefaultDest->removePredecessor(ParentBB); + i = SI->removeCase(i); + e = SI->case_end(); + + // Removing this case may have made the condition constant. In that + // case, update CI and restart iteration through the cases. + if (auto *NewCI = dyn_cast<ConstantInt>(SI->getCondition())) { + CI = NewCI; + i = SI->case_begin(); + } + + Changed = true; + continue; + } + + // Otherwise, check to see if the switch only branches to one destination. + // We do this by reseting "TheOnlyDest" to null when we find two non-equal + // destinations. + if (i->getCaseSuccessor() != TheOnlyDest) + TheOnlyDest = nullptr; + + // Increment this iterator as we haven't removed the case. + ++i; + } + + if (CI && !TheOnlyDest) { + // Branching on a constant, but not any of the cases, go to the default + // successor. + TheOnlyDest = SI->getDefaultDest(); + } + + // If we found a single destination that we can fold the switch into, do so + // now. + if (TheOnlyDest) { + // Insert the new branch. + Builder.CreateBr(TheOnlyDest); + BasicBlock *BB = SI->getParent(); + + SmallSet<BasicBlock *, 8> RemovedSuccessors; + + // Remove entries from PHI nodes which we no longer branch to... + BasicBlock *SuccToKeep = TheOnlyDest; + for (BasicBlock *Succ : successors(SI)) { + if (DTU && Succ != TheOnlyDest) + RemovedSuccessors.insert(Succ); + // Found case matching a constant operand? + if (Succ == SuccToKeep) { + SuccToKeep = nullptr; // Don't modify the first branch to TheOnlyDest + } else { + Succ->removePredecessor(BB); + } + } + + // Delete the old switch. + Value *Cond = SI->getCondition(); + SI->eraseFromParent(); + if (DeleteDeadConditions) + RecursivelyDeleteTriviallyDeadInstructions(Cond, TLI); + if (DTU) { + std::vector<DominatorTree::UpdateType> Updates; + Updates.reserve(RemovedSuccessors.size()); + for (auto *RemovedSuccessor : RemovedSuccessors) + Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor}); + DTU->applyUpdates(Updates); + } + return true; + } + + if (SI->getNumCases() == 1) { + // Otherwise, we can fold this switch into a conditional branch + // instruction if it has only one non-default destination. + auto FirstCase = *SI->case_begin(); + Value *Cond = Builder.CreateICmpEQ(SI->getCondition(), + FirstCase.getCaseValue(), "cond"); + + // Insert the new branch. + BranchInst *NewBr = Builder.CreateCondBr(Cond, + FirstCase.getCaseSuccessor(), + SI->getDefaultDest()); + SmallVector<uint32_t> Weights; + if (extractBranchWeights(*SI, Weights) && Weights.size() == 2) { + uint32_t DefWeight = Weights[0]; + uint32_t CaseWeight = Weights[1]; + // The TrueWeight should be the weight for the single case of SI. + NewBr->setMetadata(LLVMContext::MD_prof, + MDBuilder(BB->getContext()) + .createBranchWeights(CaseWeight, DefWeight)); + } + + // Update make.implicit metadata to the newly-created conditional branch. + MDNode *MakeImplicitMD = SI->getMetadata(LLVMContext::MD_make_implicit); + if (MakeImplicitMD) + NewBr->setMetadata(LLVMContext::MD_make_implicit, MakeImplicitMD); + + // Delete the old switch. + SI->eraseFromParent(); + return true; + } + return Changed; + } + + if (auto *IBI = dyn_cast<IndirectBrInst>(T)) { + // indirectbr blockaddress(@F, @BB) -> br label @BB + if (auto *BA = + dyn_cast<BlockAddress>(IBI->getAddress()->stripPointerCasts())) { + BasicBlock *TheOnlyDest = BA->getBasicBlock(); + SmallSet<BasicBlock *, 8> RemovedSuccessors; + + // Insert the new branch. + Builder.CreateBr(TheOnlyDest); + + BasicBlock *SuccToKeep = TheOnlyDest; + for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) { + BasicBlock *DestBB = IBI->getDestination(i); + if (DTU && DestBB != TheOnlyDest) + RemovedSuccessors.insert(DestBB); + if (IBI->getDestination(i) == SuccToKeep) { + SuccToKeep = nullptr; + } else { + DestBB->removePredecessor(BB); + } + } + Value *Address = IBI->getAddress(); + IBI->eraseFromParent(); + if (DeleteDeadConditions) + // Delete pointer cast instructions. + RecursivelyDeleteTriviallyDeadInstructions(Address, TLI); + + // Also zap the blockaddress constant if there are no users remaining, + // otherwise the destination is still marked as having its address taken. + if (BA->use_empty()) + BA->destroyConstant(); + + // If we didn't find our destination in the IBI successor list, then we + // have undefined behavior. Replace the unconditional branch with an + // 'unreachable' instruction. + if (SuccToKeep) { + BB->getTerminator()->eraseFromParent(); + new UnreachableInst(BB->getContext(), BB); + } + + if (DTU) { + std::vector<DominatorTree::UpdateType> Updates; + Updates.reserve(RemovedSuccessors.size()); + for (auto *RemovedSuccessor : RemovedSuccessors) + Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor}); + DTU->applyUpdates(Updates); + } + return true; + } + } + + return false; +} + +//===----------------------------------------------------------------------===// +// Local dead code elimination. +// + +/// isInstructionTriviallyDead - Return true if the result produced by the +/// instruction is not used, and the instruction has no side effects. +/// +bool llvm::isInstructionTriviallyDead(Instruction *I, + const TargetLibraryInfo *TLI) { + if (!I->use_empty()) + return false; + return wouldInstructionBeTriviallyDead(I, TLI); +} + +bool llvm::wouldInstructionBeTriviallyDeadOnUnusedPaths( + Instruction *I, const TargetLibraryInfo *TLI) { + // Instructions that are "markers" and have implied meaning on code around + // them (without explicit uses), are not dead on unused paths. + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) + if (II->getIntrinsicID() == Intrinsic::stacksave || + II->getIntrinsicID() == Intrinsic::launder_invariant_group || + II->isLifetimeStartOrEnd()) + return false; + return wouldInstructionBeTriviallyDead(I, TLI); +} + +bool llvm::wouldInstructionBeTriviallyDead(Instruction *I, + const TargetLibraryInfo *TLI) { + if (I->isTerminator()) + return false; + + // We don't want the landingpad-like instructions removed by anything this + // general. + if (I->isEHPad()) + return false; + + // We don't want debug info removed by anything this general, unless + // debug info is empty. + if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(I)) { + if (DDI->getAddress()) + return false; + return true; + } + if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(I)) { + if (DVI->hasArgList() || DVI->getValue(0)) + return false; + return true; + } + if (DbgLabelInst *DLI = dyn_cast<DbgLabelInst>(I)) { + if (DLI->getLabel()) + return false; + return true; + } + + if (auto *CB = dyn_cast<CallBase>(I)) + if (isRemovableAlloc(CB, TLI)) + return true; + + if (!I->willReturn()) { + auto *II = dyn_cast<IntrinsicInst>(I); + if (!II) + return false; + + // TODO: These intrinsics are not safe to remove, because this may remove + // a well-defined trap. + switch (II->getIntrinsicID()) { + case Intrinsic::wasm_trunc_signed: + case Intrinsic::wasm_trunc_unsigned: + case Intrinsic::ptrauth_auth: + case Intrinsic::ptrauth_resign: + return true; + default: + return false; + } + } + + if (!I->mayHaveSideEffects()) + return true; + + // Special case intrinsics that "may have side effects" but can be deleted + // when dead. + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { + // Safe to delete llvm.stacksave and launder.invariant.group if dead. + if (II->getIntrinsicID() == Intrinsic::stacksave || + II->getIntrinsicID() == Intrinsic::launder_invariant_group) + return true; + + if (II->isLifetimeStartOrEnd()) { + auto *Arg = II->getArgOperand(1); + // Lifetime intrinsics are dead when their right-hand is undef. + if (isa<UndefValue>(Arg)) + return true; + // If the right-hand is an alloc, global, or argument and the only uses + // are lifetime intrinsics then the intrinsics are dead. + if (isa<AllocaInst>(Arg) || isa<GlobalValue>(Arg) || isa<Argument>(Arg)) + return llvm::all_of(Arg->uses(), [](Use &Use) { + if (IntrinsicInst *IntrinsicUse = + dyn_cast<IntrinsicInst>(Use.getUser())) + return IntrinsicUse->isLifetimeStartOrEnd(); + return false; + }); + return false; + } + + // Assumptions are dead if their condition is trivially true. Guards on + // true are operationally no-ops. In the future we can consider more + // sophisticated tradeoffs for guards considering potential for check + // widening, but for now we keep things simple. + if ((II->getIntrinsicID() == Intrinsic::assume && + isAssumeWithEmptyBundle(cast<AssumeInst>(*II))) || + II->getIntrinsicID() == Intrinsic::experimental_guard) { + if (ConstantInt *Cond = dyn_cast<ConstantInt>(II->getArgOperand(0))) + return !Cond->isZero(); + + return false; + } + + if (auto *FPI = dyn_cast<ConstrainedFPIntrinsic>(I)) { + std::optional<fp::ExceptionBehavior> ExBehavior = + FPI->getExceptionBehavior(); + return *ExBehavior != fp::ebStrict; + } + } + + if (auto *Call = dyn_cast<CallBase>(I)) { + if (Value *FreedOp = getFreedOperand(Call, TLI)) + if (Constant *C = dyn_cast<Constant>(FreedOp)) + return C->isNullValue() || isa<UndefValue>(C); + if (isMathLibCallNoop(Call, TLI)) + return true; + } + + // Non-volatile atomic loads from constants can be removed. + if (auto *LI = dyn_cast<LoadInst>(I)) + if (auto *GV = dyn_cast<GlobalVariable>( + LI->getPointerOperand()->stripPointerCasts())) + if (!LI->isVolatile() && GV->isConstant()) + return true; + + return false; +} + +/// RecursivelyDeleteTriviallyDeadInstructions - If the specified value is a +/// trivially dead instruction, delete it. If that makes any of its operands +/// trivially dead, delete them too, recursively. Return true if any +/// instructions were deleted. +bool llvm::RecursivelyDeleteTriviallyDeadInstructions( + Value *V, const TargetLibraryInfo *TLI, MemorySSAUpdater *MSSAU, + std::function<void(Value *)> AboutToDeleteCallback) { + Instruction *I = dyn_cast<Instruction>(V); + if (!I || !isInstructionTriviallyDead(I, TLI)) + return false; + + SmallVector<WeakTrackingVH, 16> DeadInsts; + DeadInsts.push_back(I); + RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, TLI, MSSAU, + AboutToDeleteCallback); + + return true; +} + +bool llvm::RecursivelyDeleteTriviallyDeadInstructionsPermissive( + SmallVectorImpl<WeakTrackingVH> &DeadInsts, const TargetLibraryInfo *TLI, + MemorySSAUpdater *MSSAU, + std::function<void(Value *)> AboutToDeleteCallback) { + unsigned S = 0, E = DeadInsts.size(), Alive = 0; + for (; S != E; ++S) { + auto *I = dyn_cast<Instruction>(DeadInsts[S]); + if (!I || !isInstructionTriviallyDead(I)) { + DeadInsts[S] = nullptr; + ++Alive; + } + } + if (Alive == E) + return false; + RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, TLI, MSSAU, + AboutToDeleteCallback); + return true; +} + +void llvm::RecursivelyDeleteTriviallyDeadInstructions( + SmallVectorImpl<WeakTrackingVH> &DeadInsts, const TargetLibraryInfo *TLI, + MemorySSAUpdater *MSSAU, + std::function<void(Value *)> AboutToDeleteCallback) { + // Process the dead instruction list until empty. + while (!DeadInsts.empty()) { + Value *V = DeadInsts.pop_back_val(); + Instruction *I = cast_or_null<Instruction>(V); + if (!I) + continue; + assert(isInstructionTriviallyDead(I, TLI) && + "Live instruction found in dead worklist!"); + assert(I->use_empty() && "Instructions with uses are not dead."); + + // Don't lose the debug info while deleting the instructions. + salvageDebugInfo(*I); + + if (AboutToDeleteCallback) + AboutToDeleteCallback(I); + + // Null out all of the instruction's operands to see if any operand becomes + // dead as we go. + for (Use &OpU : I->operands()) { + Value *OpV = OpU.get(); + OpU.set(nullptr); + + if (!OpV->use_empty()) + continue; + + // If the operand is an instruction that became dead as we nulled out the + // operand, and if it is 'trivially' dead, delete it in a future loop + // iteration. + if (Instruction *OpI = dyn_cast<Instruction>(OpV)) + if (isInstructionTriviallyDead(OpI, TLI)) + DeadInsts.push_back(OpI); + } + if (MSSAU) + MSSAU->removeMemoryAccess(I); + + I->eraseFromParent(); + } +} + +bool llvm::replaceDbgUsesWithUndef(Instruction *I) { + SmallVector<DbgVariableIntrinsic *, 1> DbgUsers; + findDbgUsers(DbgUsers, I); + for (auto *DII : DbgUsers) + DII->setKillLocation(); + return !DbgUsers.empty(); +} + +/// areAllUsesEqual - Check whether the uses of a value are all the same. +/// This is similar to Instruction::hasOneUse() except this will also return +/// true when there are no uses or multiple uses that all refer to the same +/// value. +static bool areAllUsesEqual(Instruction *I) { + Value::user_iterator UI = I->user_begin(); + Value::user_iterator UE = I->user_end(); + if (UI == UE) + return true; + + User *TheUse = *UI; + for (++UI; UI != UE; ++UI) { + if (*UI != TheUse) + return false; + } + return true; +} + +/// RecursivelyDeleteDeadPHINode - If the specified value is an effectively +/// dead PHI node, due to being a def-use chain of single-use nodes that +/// either forms a cycle or is terminated by a trivially dead instruction, +/// delete it. If that makes any of its operands trivially dead, delete them +/// too, recursively. Return true if a change was made. +bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN, + const TargetLibraryInfo *TLI, + llvm::MemorySSAUpdater *MSSAU) { + SmallPtrSet<Instruction*, 4> Visited; + for (Instruction *I = PN; areAllUsesEqual(I) && !I->mayHaveSideEffects(); + I = cast<Instruction>(*I->user_begin())) { + if (I->use_empty()) + return RecursivelyDeleteTriviallyDeadInstructions(I, TLI, MSSAU); + + // If we find an instruction more than once, we're on a cycle that + // won't prove fruitful. + if (!Visited.insert(I).second) { + // Break the cycle and delete the instruction and its operands. + I->replaceAllUsesWith(PoisonValue::get(I->getType())); + (void)RecursivelyDeleteTriviallyDeadInstructions(I, TLI, MSSAU); + return true; + } + } + return false; +} + +static bool +simplifyAndDCEInstruction(Instruction *I, + SmallSetVector<Instruction *, 16> &WorkList, + const DataLayout &DL, + const TargetLibraryInfo *TLI) { + if (isInstructionTriviallyDead(I, TLI)) { + salvageDebugInfo(*I); + + // Null out all of the instruction's operands to see if any operand becomes + // dead as we go. + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + Value *OpV = I->getOperand(i); + I->setOperand(i, nullptr); + + if (!OpV->use_empty() || I == OpV) + continue; + + // If the operand is an instruction that became dead as we nulled out the + // operand, and if it is 'trivially' dead, delete it in a future loop + // iteration. + if (Instruction *OpI = dyn_cast<Instruction>(OpV)) + if (isInstructionTriviallyDead(OpI, TLI)) + WorkList.insert(OpI); + } + + I->eraseFromParent(); + + return true; + } + + if (Value *SimpleV = simplifyInstruction(I, DL)) { + // Add the users to the worklist. CAREFUL: an instruction can use itself, + // in the case of a phi node. + for (User *U : I->users()) { + if (U != I) { + WorkList.insert(cast<Instruction>(U)); + } + } + + // Replace the instruction with its simplified value. + bool Changed = false; + if (!I->use_empty()) { + I->replaceAllUsesWith(SimpleV); + Changed = true; + } + if (isInstructionTriviallyDead(I, TLI)) { + I->eraseFromParent(); + Changed = true; + } + return Changed; + } + return false; +} + +/// SimplifyInstructionsInBlock - Scan the specified basic block and try to +/// simplify any instructions in it and recursively delete dead instructions. +/// +/// This returns true if it changed the code, note that it can delete +/// instructions in other blocks as well in this block. +bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, + const TargetLibraryInfo *TLI) { + bool MadeChange = false; + const DataLayout &DL = BB->getModule()->getDataLayout(); + +#ifndef NDEBUG + // In debug builds, ensure that the terminator of the block is never replaced + // or deleted by these simplifications. The idea of simplification is that it + // cannot introduce new instructions, and there is no way to replace the + // terminator of a block without introducing a new instruction. + AssertingVH<Instruction> TerminatorVH(&BB->back()); +#endif + + SmallSetVector<Instruction *, 16> WorkList; + // Iterate over the original function, only adding insts to the worklist + // if they actually need to be revisited. This avoids having to pre-init + // the worklist with the entire function's worth of instructions. + for (BasicBlock::iterator BI = BB->begin(), E = std::prev(BB->end()); + BI != E;) { + assert(!BI->isTerminator()); + Instruction *I = &*BI; + ++BI; + + // We're visiting this instruction now, so make sure it's not in the + // worklist from an earlier visit. + if (!WorkList.count(I)) + MadeChange |= simplifyAndDCEInstruction(I, WorkList, DL, TLI); + } + + while (!WorkList.empty()) { + Instruction *I = WorkList.pop_back_val(); + MadeChange |= simplifyAndDCEInstruction(I, WorkList, DL, TLI); + } + return MadeChange; +} + +//===----------------------------------------------------------------------===// +// Control Flow Graph Restructuring. +// + +void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, + DomTreeUpdater *DTU) { + + // If BB has single-entry PHI nodes, fold them. + while (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) { + Value *NewVal = PN->getIncomingValue(0); + // Replace self referencing PHI with poison, it must be dead. + if (NewVal == PN) NewVal = PoisonValue::get(PN->getType()); + PN->replaceAllUsesWith(NewVal); + PN->eraseFromParent(); + } + + BasicBlock *PredBB = DestBB->getSinglePredecessor(); + assert(PredBB && "Block doesn't have a single predecessor!"); + + bool ReplaceEntryBB = PredBB->isEntryBlock(); + + // DTU updates: Collect all the edges that enter + // PredBB. These dominator edges will be redirected to DestBB. + SmallVector<DominatorTree::UpdateType, 32> Updates; + + if (DTU) { + // To avoid processing the same predecessor more than once. + SmallPtrSet<BasicBlock *, 2> SeenPreds; + Updates.reserve(Updates.size() + 2 * pred_size(PredBB) + 1); + for (BasicBlock *PredOfPredBB : predecessors(PredBB)) + // This predecessor of PredBB may already have DestBB as a successor. + if (PredOfPredBB != PredBB) + if (SeenPreds.insert(PredOfPredBB).second) + Updates.push_back({DominatorTree::Insert, PredOfPredBB, DestBB}); + SeenPreds.clear(); + for (BasicBlock *PredOfPredBB : predecessors(PredBB)) + if (SeenPreds.insert(PredOfPredBB).second) + Updates.push_back({DominatorTree::Delete, PredOfPredBB, PredBB}); + Updates.push_back({DominatorTree::Delete, PredBB, DestBB}); + } + + // Zap anything that took the address of DestBB. Not doing this will give the + // address an invalid value. + if (DestBB->hasAddressTaken()) { + BlockAddress *BA = BlockAddress::get(DestBB); + Constant *Replacement = + ConstantInt::get(Type::getInt32Ty(BA->getContext()), 1); + BA->replaceAllUsesWith(ConstantExpr::getIntToPtr(Replacement, + BA->getType())); + BA->destroyConstant(); + } + + // Anything that branched to PredBB now branches to DestBB. + PredBB->replaceAllUsesWith(DestBB); + + // Splice all the instructions from PredBB to DestBB. + PredBB->getTerminator()->eraseFromParent(); + DestBB->splice(DestBB->begin(), PredBB); + new UnreachableInst(PredBB->getContext(), PredBB); + + // If the PredBB is the entry block of the function, move DestBB up to + // become the entry block after we erase PredBB. + if (ReplaceEntryBB) + DestBB->moveAfter(PredBB); + + if (DTU) { + assert(PredBB->size() == 1 && + isa<UnreachableInst>(PredBB->getTerminator()) && + "The successor list of PredBB isn't empty before " + "applying corresponding DTU updates."); + DTU->applyUpdatesPermissive(Updates); + DTU->deleteBB(PredBB); + // Recalculation of DomTree is needed when updating a forward DomTree and + // the Entry BB is replaced. + if (ReplaceEntryBB && DTU->hasDomTree()) { + // The entry block was removed and there is no external interface for + // the dominator tree to be notified of this change. In this corner-case + // we recalculate the entire tree. + DTU->recalculate(*(DestBB->getParent())); + } + } + + else { + PredBB->eraseFromParent(); // Nuke BB if DTU is nullptr. + } +} + +/// Return true if we can choose one of these values to use in place of the +/// other. Note that we will always choose the non-undef value to keep. +static bool CanMergeValues(Value *First, Value *Second) { + return First == Second || isa<UndefValue>(First) || isa<UndefValue>(Second); +} + +/// Return true if we can fold BB, an almost-empty BB ending in an unconditional +/// branch to Succ, into Succ. +/// +/// Assumption: Succ is the single successor for BB. +static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) { + assert(*succ_begin(BB) == Succ && "Succ is not successor of BB!"); + + LLVM_DEBUG(dbgs() << "Looking to fold " << BB->getName() << " into " + << Succ->getName() << "\n"); + // Shortcut, if there is only a single predecessor it must be BB and merging + // is always safe + if (Succ->getSinglePredecessor()) return true; + + // Make a list of the predecessors of BB + SmallPtrSet<BasicBlock*, 16> BBPreds(pred_begin(BB), pred_end(BB)); + + // Look at all the phi nodes in Succ, to see if they present a conflict when + // merging these blocks + for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) { + PHINode *PN = cast<PHINode>(I); + + // If the incoming value from BB is again a PHINode in + // BB which has the same incoming value for *PI as PN does, we can + // merge the phi nodes and then the blocks can still be merged + PHINode *BBPN = dyn_cast<PHINode>(PN->getIncomingValueForBlock(BB)); + if (BBPN && BBPN->getParent() == BB) { + for (unsigned PI = 0, PE = PN->getNumIncomingValues(); PI != PE; ++PI) { + BasicBlock *IBB = PN->getIncomingBlock(PI); + if (BBPreds.count(IBB) && + !CanMergeValues(BBPN->getIncomingValueForBlock(IBB), + PN->getIncomingValue(PI))) { + LLVM_DEBUG(dbgs() + << "Can't fold, phi node " << PN->getName() << " in " + << Succ->getName() << " is conflicting with " + << BBPN->getName() << " with regard to common predecessor " + << IBB->getName() << "\n"); + return false; + } + } + } else { + Value* Val = PN->getIncomingValueForBlock(BB); + for (unsigned PI = 0, PE = PN->getNumIncomingValues(); PI != PE; ++PI) { + // See if the incoming value for the common predecessor is equal to the + // one for BB, in which case this phi node will not prevent the merging + // of the block. + BasicBlock *IBB = PN->getIncomingBlock(PI); + if (BBPreds.count(IBB) && + !CanMergeValues(Val, PN->getIncomingValue(PI))) { + LLVM_DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() + << " in " << Succ->getName() + << " is conflicting with regard to common " + << "predecessor " << IBB->getName() << "\n"); + return false; + } + } + } + } + + return true; +} + +using PredBlockVector = SmallVector<BasicBlock *, 16>; +using IncomingValueMap = DenseMap<BasicBlock *, Value *>; + +/// Determines the value to use as the phi node input for a block. +/// +/// Select between \p OldVal any value that we know flows from \p BB +/// to a particular phi on the basis of which one (if either) is not +/// undef. Update IncomingValues based on the selected value. +/// +/// \param OldVal The value we are considering selecting. +/// \param BB The block that the value flows in from. +/// \param IncomingValues A map from block-to-value for other phi inputs +/// that we have examined. +/// +/// \returns the selected value. +static Value *selectIncomingValueForBlock(Value *OldVal, BasicBlock *BB, + IncomingValueMap &IncomingValues) { + if (!isa<UndefValue>(OldVal)) { + assert((!IncomingValues.count(BB) || + IncomingValues.find(BB)->second == OldVal) && + "Expected OldVal to match incoming value from BB!"); + + IncomingValues.insert(std::make_pair(BB, OldVal)); + return OldVal; + } + + IncomingValueMap::const_iterator It = IncomingValues.find(BB); + if (It != IncomingValues.end()) return It->second; + + return OldVal; +} + +/// Create a map from block to value for the operands of a +/// given phi. +/// +/// Create a map from block to value for each non-undef value flowing +/// into \p PN. +/// +/// \param PN The phi we are collecting the map for. +/// \param IncomingValues [out] The map from block to value for this phi. +static void gatherIncomingValuesToPhi(PHINode *PN, + IncomingValueMap &IncomingValues) { + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + BasicBlock *BB = PN->getIncomingBlock(i); + Value *V = PN->getIncomingValue(i); + + if (!isa<UndefValue>(V)) + IncomingValues.insert(std::make_pair(BB, V)); + } +} + +/// Replace the incoming undef values to a phi with the values +/// from a block-to-value map. +/// +/// \param PN The phi we are replacing the undefs in. +/// \param IncomingValues A map from block to value. +static void replaceUndefValuesInPhi(PHINode *PN, + const IncomingValueMap &IncomingValues) { + SmallVector<unsigned> TrueUndefOps; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *V = PN->getIncomingValue(i); + + if (!isa<UndefValue>(V)) continue; + + BasicBlock *BB = PN->getIncomingBlock(i); + IncomingValueMap::const_iterator It = IncomingValues.find(BB); + + // Keep track of undef/poison incoming values. Those must match, so we fix + // them up below if needed. + // Note: this is conservatively correct, but we could try harder and group + // the undef values per incoming basic block. + if (It == IncomingValues.end()) { + TrueUndefOps.push_back(i); + continue; + } + + // There is a defined value for this incoming block, so map this undef + // incoming value to the defined value. + PN->setIncomingValue(i, It->second); + } + + // If there are both undef and poison values incoming, then convert those + // values to undef. It is invalid to have different values for the same + // incoming block. + unsigned PoisonCount = count_if(TrueUndefOps, [&](unsigned i) { + return isa<PoisonValue>(PN->getIncomingValue(i)); + }); + if (PoisonCount != 0 && PoisonCount != TrueUndefOps.size()) { + for (unsigned i : TrueUndefOps) + PN->setIncomingValue(i, UndefValue::get(PN->getType())); + } +} + +/// Replace a value flowing from a block to a phi with +/// potentially multiple instances of that value flowing from the +/// block's predecessors to the phi. +/// +/// \param BB The block with the value flowing into the phi. +/// \param BBPreds The predecessors of BB. +/// \param PN The phi that we are updating. +static void redirectValuesFromPredecessorsToPhi(BasicBlock *BB, + const PredBlockVector &BBPreds, + PHINode *PN) { + Value *OldVal = PN->removeIncomingValue(BB, false); + assert(OldVal && "No entry in PHI for Pred BB!"); + + IncomingValueMap IncomingValues; + + // We are merging two blocks - BB, and the block containing PN - and + // as a result we need to redirect edges from the predecessors of BB + // to go to the block containing PN, and update PN + // accordingly. Since we allow merging blocks in the case where the + // predecessor and successor blocks both share some predecessors, + // and where some of those common predecessors might have undef + // values flowing into PN, we want to rewrite those values to be + // consistent with the non-undef values. + + gatherIncomingValuesToPhi(PN, IncomingValues); + + // If this incoming value is one of the PHI nodes in BB, the new entries + // in the PHI node are the entries from the old PHI. + if (isa<PHINode>(OldVal) && cast<PHINode>(OldVal)->getParent() == BB) { + PHINode *OldValPN = cast<PHINode>(OldVal); + for (unsigned i = 0, e = OldValPN->getNumIncomingValues(); i != e; ++i) { + // Note that, since we are merging phi nodes and BB and Succ might + // have common predecessors, we could end up with a phi node with + // identical incoming branches. This will be cleaned up later (and + // will trigger asserts if we try to clean it up now, without also + // simplifying the corresponding conditional branch). + BasicBlock *PredBB = OldValPN->getIncomingBlock(i); + Value *PredVal = OldValPN->getIncomingValue(i); + Value *Selected = selectIncomingValueForBlock(PredVal, PredBB, + IncomingValues); + + // And add a new incoming value for this predecessor for the + // newly retargeted branch. + PN->addIncoming(Selected, PredBB); + } + } else { + for (unsigned i = 0, e = BBPreds.size(); i != e; ++i) { + // Update existing incoming values in PN for this + // predecessor of BB. + BasicBlock *PredBB = BBPreds[i]; + Value *Selected = selectIncomingValueForBlock(OldVal, PredBB, + IncomingValues); + + // And add a new incoming value for this predecessor for the + // newly retargeted branch. + PN->addIncoming(Selected, PredBB); + } + } + + replaceUndefValuesInPhi(PN, IncomingValues); +} + +bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, + DomTreeUpdater *DTU) { + assert(BB != &BB->getParent()->getEntryBlock() && + "TryToSimplifyUncondBranchFromEmptyBlock called on entry block!"); + + // We can't eliminate infinite loops. + BasicBlock *Succ = cast<BranchInst>(BB->getTerminator())->getSuccessor(0); + if (BB == Succ) return false; + + // Check to see if merging these blocks would cause conflicts for any of the + // phi nodes in BB or Succ. If not, we can safely merge. + if (!CanPropagatePredecessorsForPHIs(BB, Succ)) return false; + + // Check for cases where Succ has multiple predecessors and a PHI node in BB + // has uses which will not disappear when the PHI nodes are merged. It is + // possible to handle such cases, but difficult: it requires checking whether + // BB dominates Succ, which is non-trivial to calculate in the case where + // Succ has multiple predecessors. Also, it requires checking whether + // constructing the necessary self-referential PHI node doesn't introduce any + // conflicts; this isn't too difficult, but the previous code for doing this + // was incorrect. + // + // Note that if this check finds a live use, BB dominates Succ, so BB is + // something like a loop pre-header (or rarely, a part of an irreducible CFG); + // folding the branch isn't profitable in that case anyway. + if (!Succ->getSinglePredecessor()) { + BasicBlock::iterator BBI = BB->begin(); + while (isa<PHINode>(*BBI)) { + for (Use &U : BBI->uses()) { + if (PHINode* PN = dyn_cast<PHINode>(U.getUser())) { + if (PN->getIncomingBlock(U) != BB) + return false; + } else { + return false; + } + } + ++BBI; + } + } + + // 'BB' and 'BB->Pred' are loop latches, bail out to presrve inner loop + // metadata. + // + // FIXME: This is a stop-gap solution to preserve inner-loop metadata given + // current status (that loop metadata is implemented as metadata attached to + // the branch instruction in the loop latch block). To quote from review + // comments, "the current representation of loop metadata (using a loop latch + // terminator attachment) is known to be fundamentally broken. Loop latches + // are not uniquely associated with loops (both in that a latch can be part of + // multiple loops and a loop may have multiple latches). Loop headers are. The + // solution to this problem is also known: Add support for basic block + // metadata, and attach loop metadata to the loop header." + // + // Why bail out: + // In this case, we expect 'BB' is the latch for outer-loop and 'BB->Pred' is + // the latch for inner-loop (see reason below), so bail out to prerserve + // inner-loop metadata rather than eliminating 'BB' and attaching its metadata + // to this inner-loop. + // - The reason we believe 'BB' and 'BB->Pred' have different inner-most + // loops: assuming 'BB' and 'BB->Pred' are from the same inner-most loop L, + // then 'BB' is the header and latch of 'L' and thereby 'L' must consist of + // one self-looping basic block, which is contradictory with the assumption. + // + // To illustrate how inner-loop metadata is dropped: + // + // CFG Before + // + // BB is while.cond.exit, attached with loop metdata md2. + // BB->Pred is for.body, attached with loop metadata md1. + // + // entry + // | + // v + // ---> while.cond -------------> while.end + // | | + // | v + // | while.body + // | | + // | v + // | for.body <---- (md1) + // | | |______| + // | v + // | while.cond.exit (md2) + // | | + // |_______| + // + // CFG After + // + // while.cond1 is the merge of while.cond.exit and while.cond above. + // for.body is attached with md2, and md1 is dropped. + // If LoopSimplify runs later (as a part of loop pass), it could create + // dedicated exits for inner-loop (essentially adding `while.cond.exit` + // back), but won't it won't see 'md1' nor restore it for the inner-loop. + // + // entry + // | + // v + // ---> while.cond1 -------------> while.end + // | | + // | v + // | while.body + // | | + // | v + // | for.body <---- (md2) + // |_______| |______| + if (Instruction *TI = BB->getTerminator()) + if (TI->hasMetadata(LLVMContext::MD_loop)) + for (BasicBlock *Pred : predecessors(BB)) + if (Instruction *PredTI = Pred->getTerminator()) + if (PredTI->hasMetadata(LLVMContext::MD_loop)) + return false; + + LLVM_DEBUG(dbgs() << "Killing Trivial BB: \n" << *BB); + + SmallVector<DominatorTree::UpdateType, 32> Updates; + if (DTU) { + // To avoid processing the same predecessor more than once. + SmallPtrSet<BasicBlock *, 8> SeenPreds; + // All predecessors of BB will be moved to Succ. + SmallPtrSet<BasicBlock *, 8> PredsOfSucc(pred_begin(Succ), pred_end(Succ)); + Updates.reserve(Updates.size() + 2 * pred_size(BB) + 1); + for (auto *PredOfBB : predecessors(BB)) + // This predecessor of BB may already have Succ as a successor. + if (!PredsOfSucc.contains(PredOfBB)) + if (SeenPreds.insert(PredOfBB).second) + Updates.push_back({DominatorTree::Insert, PredOfBB, Succ}); + SeenPreds.clear(); + for (auto *PredOfBB : predecessors(BB)) + if (SeenPreds.insert(PredOfBB).second) + Updates.push_back({DominatorTree::Delete, PredOfBB, BB}); + Updates.push_back({DominatorTree::Delete, BB, Succ}); + } + + if (isa<PHINode>(Succ->begin())) { + // If there is more than one pred of succ, and there are PHI nodes in + // the successor, then we need to add incoming edges for the PHI nodes + // + const PredBlockVector BBPreds(predecessors(BB)); + + // Loop over all of the PHI nodes in the successor of BB. + for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) { + PHINode *PN = cast<PHINode>(I); + + redirectValuesFromPredecessorsToPhi(BB, BBPreds, PN); + } + } + + if (Succ->getSinglePredecessor()) { + // BB is the only predecessor of Succ, so Succ will end up with exactly + // the same predecessors BB had. + + // Copy over any phi, debug or lifetime instruction. + BB->getTerminator()->eraseFromParent(); + Succ->splice(Succ->getFirstNonPHI()->getIterator(), BB); + } else { + while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) { + // We explicitly check for such uses in CanPropagatePredecessorsForPHIs. + assert(PN->use_empty() && "There shouldn't be any uses here!"); + PN->eraseFromParent(); + } + } + + // If the unconditional branch we replaced contains llvm.loop metadata, we + // add the metadata to the branch instructions in the predecessors. + unsigned LoopMDKind = BB->getContext().getMDKindID("llvm.loop"); + Instruction *TI = BB->getTerminator(); + if (TI) + if (MDNode *LoopMD = TI->getMetadata(LoopMDKind)) + for (BasicBlock *Pred : predecessors(BB)) + Pred->getTerminator()->setMetadata(LoopMDKind, LoopMD); + + // Everything that jumped to BB now goes to Succ. + BB->replaceAllUsesWith(Succ); + if (!Succ->hasName()) Succ->takeName(BB); + + // Clear the successor list of BB to match updates applying to DTU later. + if (BB->getTerminator()) + BB->back().eraseFromParent(); + new UnreachableInst(BB->getContext(), BB); + assert(succ_empty(BB) && "The successor list of BB isn't empty before " + "applying corresponding DTU updates."); + + if (DTU) + DTU->applyUpdates(Updates); + + DeleteDeadBlock(BB, DTU); + + return true; +} + +static bool EliminateDuplicatePHINodesNaiveImpl(BasicBlock *BB) { + // This implementation doesn't currently consider undef operands + // specially. Theoretically, two phis which are identical except for + // one having an undef where the other doesn't could be collapsed. + + bool Changed = false; + + // Examine each PHI. + // Note that increment of I must *NOT* be in the iteration_expression, since + // we don't want to immediately advance when we restart from the beginning. + for (auto I = BB->begin(); PHINode *PN = dyn_cast<PHINode>(I);) { + ++I; + // Is there an identical PHI node in this basic block? + // Note that we only look in the upper square's triangle, + // we already checked that the lower triangle PHI's aren't identical. + for (auto J = I; PHINode *DuplicatePN = dyn_cast<PHINode>(J); ++J) { + if (!DuplicatePN->isIdenticalToWhenDefined(PN)) + continue; + // A duplicate. Replace this PHI with the base PHI. + ++NumPHICSEs; + DuplicatePN->replaceAllUsesWith(PN); + DuplicatePN->eraseFromParent(); + Changed = true; + + // The RAUW can change PHIs that we already visited. + I = BB->begin(); + break; // Start over from the beginning. + } + } + return Changed; +} + +static bool EliminateDuplicatePHINodesSetBasedImpl(BasicBlock *BB) { + // This implementation doesn't currently consider undef operands + // specially. Theoretically, two phis which are identical except for + // one having an undef where the other doesn't could be collapsed. + + struct PHIDenseMapInfo { + static PHINode *getEmptyKey() { + return DenseMapInfo<PHINode *>::getEmptyKey(); + } + + static PHINode *getTombstoneKey() { + return DenseMapInfo<PHINode *>::getTombstoneKey(); + } + + static bool isSentinel(PHINode *PN) { + return PN == getEmptyKey() || PN == getTombstoneKey(); + } + + // WARNING: this logic must be kept in sync with + // Instruction::isIdenticalToWhenDefined()! + static unsigned getHashValueImpl(PHINode *PN) { + // Compute a hash value on the operands. Instcombine will likely have + // sorted them, which helps expose duplicates, but we have to check all + // the operands to be safe in case instcombine hasn't run. + return static_cast<unsigned>(hash_combine( + hash_combine_range(PN->value_op_begin(), PN->value_op_end()), + hash_combine_range(PN->block_begin(), PN->block_end()))); + } + + static unsigned getHashValue(PHINode *PN) { +#ifndef NDEBUG + // If -phicse-debug-hash was specified, return a constant -- this + // will force all hashing to collide, so we'll exhaustively search + // the table for a match, and the assertion in isEqual will fire if + // there's a bug causing equal keys to hash differently. + if (PHICSEDebugHash) + return 0; +#endif + return getHashValueImpl(PN); + } + + static bool isEqualImpl(PHINode *LHS, PHINode *RHS) { + if (isSentinel(LHS) || isSentinel(RHS)) + return LHS == RHS; + return LHS->isIdenticalTo(RHS); + } + + static bool isEqual(PHINode *LHS, PHINode *RHS) { + // These comparisons are nontrivial, so assert that equality implies + // hash equality (DenseMap demands this as an invariant). + bool Result = isEqualImpl(LHS, RHS); + assert(!Result || (isSentinel(LHS) && LHS == RHS) || + getHashValueImpl(LHS) == getHashValueImpl(RHS)); + return Result; + } + }; + + // Set of unique PHINodes. + DenseSet<PHINode *, PHIDenseMapInfo> PHISet; + PHISet.reserve(4 * PHICSENumPHISmallSize); + + // Examine each PHI. + bool Changed = false; + for (auto I = BB->begin(); PHINode *PN = dyn_cast<PHINode>(I++);) { + auto Inserted = PHISet.insert(PN); + if (!Inserted.second) { + // A duplicate. Replace this PHI with its duplicate. + ++NumPHICSEs; + PN->replaceAllUsesWith(*Inserted.first); + PN->eraseFromParent(); + Changed = true; + + // The RAUW can change PHIs that we already visited. Start over from the + // beginning. + PHISet.clear(); + I = BB->begin(); + } + } + + return Changed; +} + +bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { + if ( +#ifndef NDEBUG + !PHICSEDebugHash && +#endif + hasNItemsOrLess(BB->phis(), PHICSENumPHISmallSize)) + return EliminateDuplicatePHINodesNaiveImpl(BB); + return EliminateDuplicatePHINodesSetBasedImpl(BB); +} + +/// If the specified pointer points to an object that we control, try to modify +/// the object's alignment to PrefAlign. Returns a minimum known alignment of +/// the value after the operation, which may be lower than PrefAlign. +/// +/// Increating value alignment isn't often possible though. If alignment is +/// important, a more reliable approach is to simply align all global variables +/// and allocation instructions to their preferred alignment from the beginning. +static Align tryEnforceAlignment(Value *V, Align PrefAlign, + const DataLayout &DL) { + V = V->stripPointerCasts(); + + if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) { + // TODO: Ideally, this function would not be called if PrefAlign is smaller + // than the current alignment, as the known bits calculation should have + // already taken it into account. However, this is not always the case, + // as computeKnownBits() has a depth limit, while stripPointerCasts() + // doesn't. + Align CurrentAlign = AI->getAlign(); + if (PrefAlign <= CurrentAlign) + return CurrentAlign; + + // If the preferred alignment is greater than the natural stack alignment + // then don't round up. This avoids dynamic stack realignment. + if (DL.exceedsNaturalStackAlignment(PrefAlign)) + return CurrentAlign; + AI->setAlignment(PrefAlign); + return PrefAlign; + } + + if (auto *GO = dyn_cast<GlobalObject>(V)) { + // TODO: as above, this shouldn't be necessary. + Align CurrentAlign = GO->getPointerAlignment(DL); + if (PrefAlign <= CurrentAlign) + return CurrentAlign; + + // If there is a large requested alignment and we can, bump up the alignment + // of the global. If the memory we set aside for the global may not be the + // memory used by the final program then it is impossible for us to reliably + // enforce the preferred alignment. + if (!GO->canIncreaseAlignment()) + return CurrentAlign; + + GO->setAlignment(PrefAlign); + return PrefAlign; + } + + return Align(1); +} + +Align llvm::getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, + const DataLayout &DL, + const Instruction *CxtI, + AssumptionCache *AC, + const DominatorTree *DT) { + assert(V->getType()->isPointerTy() && + "getOrEnforceKnownAlignment expects a pointer!"); + + KnownBits Known = computeKnownBits(V, DL, 0, AC, CxtI, DT); + unsigned TrailZ = Known.countMinTrailingZeros(); + + // Avoid trouble with ridiculously large TrailZ values, such as + // those computed from a null pointer. + // LLVM doesn't support alignments larger than (1 << MaxAlignmentExponent). + TrailZ = std::min(TrailZ, +Value::MaxAlignmentExponent); + + Align Alignment = Align(1ull << std::min(Known.getBitWidth() - 1, TrailZ)); + + if (PrefAlign && *PrefAlign > Alignment) + Alignment = std::max(Alignment, tryEnforceAlignment(V, *PrefAlign, DL)); + + // We don't need to make any adjustment. + return Alignment; +} + +///===---------------------------------------------------------------------===// +/// Dbg Intrinsic utilities +/// + +/// See if there is a dbg.value intrinsic for DIVar for the PHI node. +static bool PhiHasDebugValue(DILocalVariable *DIVar, + DIExpression *DIExpr, + PHINode *APN) { + // Since we can't guarantee that the original dbg.declare intrinsic + // is removed by LowerDbgDeclare(), we need to make sure that we are + // not inserting the same dbg.value intrinsic over and over. + SmallVector<DbgValueInst *, 1> DbgValues; + findDbgValues(DbgValues, APN); + for (auto *DVI : DbgValues) { + assert(is_contained(DVI->getValues(), APN)); + if ((DVI->getVariable() == DIVar) && (DVI->getExpression() == DIExpr)) + return true; + } + return false; +} + +/// Check if the alloc size of \p ValTy is large enough to cover the variable +/// (or fragment of the variable) described by \p DII. +/// +/// This is primarily intended as a helper for the different +/// ConvertDebugDeclareToDebugValue functions. The dbg.declare/dbg.addr that is +/// converted describes an alloca'd variable, so we need to use the +/// alloc size of the value when doing the comparison. E.g. an i1 value will be +/// identified as covering an n-bit fragment, if the store size of i1 is at +/// least n bits. +static bool valueCoversEntireFragment(Type *ValTy, DbgVariableIntrinsic *DII) { + const DataLayout &DL = DII->getModule()->getDataLayout(); + TypeSize ValueSize = DL.getTypeAllocSizeInBits(ValTy); + if (std::optional<uint64_t> FragmentSize = DII->getFragmentSizeInBits()) { + assert(!ValueSize.isScalable() && + "Fragments don't work on scalable types."); + return ValueSize.getFixedValue() >= *FragmentSize; + } + // We can't always calculate the size of the DI variable (e.g. if it is a + // VLA). Try to use the size of the alloca that the dbg intrinsic describes + // intead. + if (DII->isAddressOfVariable()) { + // DII should have exactly 1 location when it is an address. + assert(DII->getNumVariableLocationOps() == 1 && + "address of variable must have exactly 1 location operand."); + if (auto *AI = + dyn_cast_or_null<AllocaInst>(DII->getVariableLocationOp(0))) { + if (std::optional<TypeSize> FragmentSize = + AI->getAllocationSizeInBits(DL)) { + return TypeSize::isKnownGE(ValueSize, *FragmentSize); + } + } + } + // Could not determine size of variable. Conservatively return false. + return false; +} + +/// Inserts a llvm.dbg.value intrinsic before a store to an alloca'd value +/// that has an associated llvm.dbg.declare or llvm.dbg.addr intrinsic. +void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII, + StoreInst *SI, DIBuilder &Builder) { + assert(DII->isAddressOfVariable() || isa<DbgAssignIntrinsic>(DII)); + auto *DIVar = DII->getVariable(); + assert(DIVar && "Missing variable"); + auto *DIExpr = DII->getExpression(); + Value *DV = SI->getValueOperand(); + + DebugLoc NewLoc = getDebugValueLoc(DII); + + if (!valueCoversEntireFragment(DV->getType(), DII)) { + // FIXME: If storing to a part of the variable described by the dbg.declare, + // then we want to insert a dbg.value for the corresponding fragment. + LLVM_DEBUG(dbgs() << "Failed to convert dbg.declare to dbg.value: " + << *DII << '\n'); + // For now, when there is a store to parts of the variable (but we do not + // know which part) we insert an dbg.value intrinsic to indicate that we + // know nothing about the variable's content. + DV = UndefValue::get(DV->getType()); + Builder.insertDbgValueIntrinsic(DV, DIVar, DIExpr, NewLoc, SI); + return; + } + + Builder.insertDbgValueIntrinsic(DV, DIVar, DIExpr, NewLoc, SI); +} + +/// Inserts a llvm.dbg.value intrinsic before a load of an alloca'd value +/// that has an associated llvm.dbg.declare or llvm.dbg.addr intrinsic. +void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII, + LoadInst *LI, DIBuilder &Builder) { + auto *DIVar = DII->getVariable(); + auto *DIExpr = DII->getExpression(); + assert(DIVar && "Missing variable"); + + if (!valueCoversEntireFragment(LI->getType(), DII)) { + // FIXME: If only referring to a part of the variable described by the + // dbg.declare, then we want to insert a dbg.value for the corresponding + // fragment. + LLVM_DEBUG(dbgs() << "Failed to convert dbg.declare to dbg.value: " + << *DII << '\n'); + return; + } + + DebugLoc NewLoc = getDebugValueLoc(DII); + + // We are now tracking the loaded value instead of the address. In the + // future if multi-location support is added to the IR, it might be + // preferable to keep tracking both the loaded value and the original + // address in case the alloca can not be elided. + Instruction *DbgValue = Builder.insertDbgValueIntrinsic( + LI, DIVar, DIExpr, NewLoc, (Instruction *)nullptr); + DbgValue->insertAfter(LI); +} + +/// Inserts a llvm.dbg.value intrinsic after a phi that has an associated +/// llvm.dbg.declare or llvm.dbg.addr intrinsic. +void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII, + PHINode *APN, DIBuilder &Builder) { + auto *DIVar = DII->getVariable(); + auto *DIExpr = DII->getExpression(); + assert(DIVar && "Missing variable"); + + if (PhiHasDebugValue(DIVar, DIExpr, APN)) + return; + + if (!valueCoversEntireFragment(APN->getType(), DII)) { + // FIXME: If only referring to a part of the variable described by the + // dbg.declare, then we want to insert a dbg.value for the corresponding + // fragment. + LLVM_DEBUG(dbgs() << "Failed to convert dbg.declare to dbg.value: " + << *DII << '\n'); + return; + } + + BasicBlock *BB = APN->getParent(); + auto InsertionPt = BB->getFirstInsertionPt(); + + DebugLoc NewLoc = getDebugValueLoc(DII); + + // The block may be a catchswitch block, which does not have a valid + // insertion point. + // FIXME: Insert dbg.value markers in the successors when appropriate. + if (InsertionPt != BB->end()) + Builder.insertDbgValueIntrinsic(APN, DIVar, DIExpr, NewLoc, &*InsertionPt); +} + +/// Determine whether this alloca is either a VLA or an array. +static bool isArray(AllocaInst *AI) { + return AI->isArrayAllocation() || + (AI->getAllocatedType() && AI->getAllocatedType()->isArrayTy()); +} + +/// Determine whether this alloca is a structure. +static bool isStructure(AllocaInst *AI) { + return AI->getAllocatedType() && AI->getAllocatedType()->isStructTy(); +} + +/// LowerDbgDeclare - Lowers llvm.dbg.declare intrinsics into appropriate set +/// of llvm.dbg.value intrinsics. +bool llvm::LowerDbgDeclare(Function &F) { + bool Changed = false; + DIBuilder DIB(*F.getParent(), /*AllowUnresolved*/ false); + SmallVector<DbgDeclareInst *, 4> Dbgs; + for (auto &FI : F) + for (Instruction &BI : FI) + if (auto DDI = dyn_cast<DbgDeclareInst>(&BI)) + Dbgs.push_back(DDI); + + if (Dbgs.empty()) + return Changed; + + for (auto &I : Dbgs) { + DbgDeclareInst *DDI = I; + AllocaInst *AI = dyn_cast_or_null<AllocaInst>(DDI->getAddress()); + // If this is an alloca for a scalar variable, insert a dbg.value + // at each load and store to the alloca and erase the dbg.declare. + // The dbg.values allow tracking a variable even if it is not + // stored on the stack, while the dbg.declare can only describe + // the stack slot (and at a lexical-scope granularity). Later + // passes will attempt to elide the stack slot. + if (!AI || isArray(AI) || isStructure(AI)) + continue; + + // A volatile load/store means that the alloca can't be elided anyway. + if (llvm::any_of(AI->users(), [](User *U) -> bool { + if (LoadInst *LI = dyn_cast<LoadInst>(U)) + return LI->isVolatile(); + if (StoreInst *SI = dyn_cast<StoreInst>(U)) + return SI->isVolatile(); + return false; + })) + continue; + + SmallVector<const Value *, 8> WorkList; + WorkList.push_back(AI); + while (!WorkList.empty()) { + const Value *V = WorkList.pop_back_val(); + for (const auto &AIUse : V->uses()) { + User *U = AIUse.getUser(); + if (StoreInst *SI = dyn_cast<StoreInst>(U)) { + if (AIUse.getOperandNo() == 1) + ConvertDebugDeclareToDebugValue(DDI, SI, DIB); + } else if (LoadInst *LI = dyn_cast<LoadInst>(U)) { + ConvertDebugDeclareToDebugValue(DDI, LI, DIB); + } else if (CallInst *CI = dyn_cast<CallInst>(U)) { + // This is a call by-value or some other instruction that takes a + // pointer to the variable. Insert a *value* intrinsic that describes + // the variable by dereferencing the alloca. + if (!CI->isLifetimeStartOrEnd()) { + DebugLoc NewLoc = getDebugValueLoc(DDI); + auto *DerefExpr = + DIExpression::append(DDI->getExpression(), dwarf::DW_OP_deref); + DIB.insertDbgValueIntrinsic(AI, DDI->getVariable(), DerefExpr, + NewLoc, CI); + } + } else if (BitCastInst *BI = dyn_cast<BitCastInst>(U)) { + if (BI->getType()->isPointerTy()) + WorkList.push_back(BI); + } + } + } + DDI->eraseFromParent(); + Changed = true; + } + + if (Changed) + for (BasicBlock &BB : F) + RemoveRedundantDbgInstrs(&BB); + + return Changed; +} + +/// Propagate dbg.value intrinsics through the newly inserted PHIs. +void llvm::insertDebugValuesForPHIs(BasicBlock *BB, + SmallVectorImpl<PHINode *> &InsertedPHIs) { + assert(BB && "No BasicBlock to clone dbg.value(s) from."); + if (InsertedPHIs.size() == 0) + return; + + // Map existing PHI nodes to their dbg.values. + ValueToValueMapTy DbgValueMap; + for (auto &I : *BB) { + if (auto DbgII = dyn_cast<DbgVariableIntrinsic>(&I)) { + for (Value *V : DbgII->location_ops()) + if (auto *Loc = dyn_cast_or_null<PHINode>(V)) + DbgValueMap.insert({Loc, DbgII}); + } + } + if (DbgValueMap.size() == 0) + return; + + // Map a pair of the destination BB and old dbg.value to the new dbg.value, + // so that if a dbg.value is being rewritten to use more than one of the + // inserted PHIs in the same destination BB, we can update the same dbg.value + // with all the new PHIs instead of creating one copy for each. + MapVector<std::pair<BasicBlock *, DbgVariableIntrinsic *>, + DbgVariableIntrinsic *> + NewDbgValueMap; + // Then iterate through the new PHIs and look to see if they use one of the + // previously mapped PHIs. If so, create a new dbg.value intrinsic that will + // propagate the info through the new PHI. If we use more than one new PHI in + // a single destination BB with the same old dbg.value, merge the updates so + // that we get a single new dbg.value with all the new PHIs. + for (auto *PHI : InsertedPHIs) { + BasicBlock *Parent = PHI->getParent(); + // Avoid inserting an intrinsic into an EH block. + if (Parent->getFirstNonPHI()->isEHPad()) + continue; + for (auto *VI : PHI->operand_values()) { + auto V = DbgValueMap.find(VI); + if (V != DbgValueMap.end()) { + auto *DbgII = cast<DbgVariableIntrinsic>(V->second); + auto NewDI = NewDbgValueMap.find({Parent, DbgII}); + if (NewDI == NewDbgValueMap.end()) { + auto *NewDbgII = cast<DbgVariableIntrinsic>(DbgII->clone()); + NewDI = NewDbgValueMap.insert({{Parent, DbgII}, NewDbgII}).first; + } + DbgVariableIntrinsic *NewDbgII = NewDI->second; + // If PHI contains VI as an operand more than once, we may + // replaced it in NewDbgII; confirm that it is present. + if (is_contained(NewDbgII->location_ops(), VI)) + NewDbgII->replaceVariableLocationOp(VI, PHI); + } + } + } + // Insert thew new dbg.values into their destination blocks. + for (auto DI : NewDbgValueMap) { + BasicBlock *Parent = DI.first.first; + auto *NewDbgII = DI.second; + auto InsertionPt = Parent->getFirstInsertionPt(); + assert(InsertionPt != Parent->end() && "Ill-formed basic block"); + NewDbgII->insertBefore(&*InsertionPt); + } +} + +bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress, + DIBuilder &Builder, uint8_t DIExprFlags, + int Offset) { + auto DbgAddrs = FindDbgAddrUses(Address); + for (DbgVariableIntrinsic *DII : DbgAddrs) { + const DebugLoc &Loc = DII->getDebugLoc(); + auto *DIVar = DII->getVariable(); + auto *DIExpr = DII->getExpression(); + assert(DIVar && "Missing variable"); + DIExpr = DIExpression::prepend(DIExpr, DIExprFlags, Offset); + // Insert llvm.dbg.declare immediately before DII, and remove old + // llvm.dbg.declare. + Builder.insertDeclare(NewAddress, DIVar, DIExpr, Loc, DII); + DII->eraseFromParent(); + } + return !DbgAddrs.empty(); +} + +static void replaceOneDbgValueForAlloca(DbgValueInst *DVI, Value *NewAddress, + DIBuilder &Builder, int Offset) { + const DebugLoc &Loc = DVI->getDebugLoc(); + auto *DIVar = DVI->getVariable(); + auto *DIExpr = DVI->getExpression(); + assert(DIVar && "Missing variable"); + + // This is an alloca-based llvm.dbg.value. The first thing it should do with + // the alloca pointer is dereference it. Otherwise we don't know how to handle + // it and give up. + if (!DIExpr || DIExpr->getNumElements() < 1 || + DIExpr->getElement(0) != dwarf::DW_OP_deref) + return; + + // Insert the offset before the first deref. + // We could just change the offset argument of dbg.value, but it's unsigned... + if (Offset) + DIExpr = DIExpression::prepend(DIExpr, 0, Offset); + + Builder.insertDbgValueIntrinsic(NewAddress, DIVar, DIExpr, Loc, DVI); + DVI->eraseFromParent(); +} + +void llvm::replaceDbgValueForAlloca(AllocaInst *AI, Value *NewAllocaAddress, + DIBuilder &Builder, int Offset) { + if (auto *L = LocalAsMetadata::getIfExists(AI)) + if (auto *MDV = MetadataAsValue::getIfExists(AI->getContext(), L)) + for (Use &U : llvm::make_early_inc_range(MDV->uses())) + if (auto *DVI = dyn_cast<DbgValueInst>(U.getUser())) + replaceOneDbgValueForAlloca(DVI, NewAllocaAddress, Builder, Offset); +} + +/// Where possible to salvage debug information for \p I do so. +/// If not possible mark undef. +void llvm::salvageDebugInfo(Instruction &I) { + SmallVector<DbgVariableIntrinsic *, 1> DbgUsers; + findDbgUsers(DbgUsers, &I); + salvageDebugInfoForDbgValues(I, DbgUsers); +} + +/// Salvage the address component of \p DAI. +static void salvageDbgAssignAddress(DbgAssignIntrinsic *DAI) { + Instruction *I = dyn_cast<Instruction>(DAI->getAddress()); + // Only instructions can be salvaged at the moment. + if (!I) + return; + + assert(!DAI->getAddressExpression()->getFragmentInfo().has_value() && + "address-expression shouldn't have fragment info"); + + // The address component of a dbg.assign cannot be variadic. + uint64_t CurrentLocOps = 0; + SmallVector<Value *, 4> AdditionalValues; + SmallVector<uint64_t, 16> Ops; + Value *NewV = salvageDebugInfoImpl(*I, CurrentLocOps, Ops, AdditionalValues); + + // Check if the salvage failed. + if (!NewV) + return; + + DIExpression *SalvagedExpr = DIExpression::appendOpsToArg( + DAI->getAddressExpression(), Ops, 0, /*StackValue=*/false); + assert(!SalvagedExpr->getFragmentInfo().has_value() && + "address-expression shouldn't have fragment info"); + + // Salvage succeeds if no additional values are required. + if (AdditionalValues.empty()) { + DAI->setAddress(NewV); + DAI->setAddressExpression(SalvagedExpr); + } else { + DAI->setKillAddress(); + } +} + +void llvm::salvageDebugInfoForDbgValues( + Instruction &I, ArrayRef<DbgVariableIntrinsic *> DbgUsers) { + // These are arbitrary chosen limits on the maximum number of values and the + // maximum size of a debug expression we can salvage up to, used for + // performance reasons. + const unsigned MaxDebugArgs = 16; + const unsigned MaxExpressionSize = 128; + bool Salvaged = false; + + for (auto *DII : DbgUsers) { + if (auto *DAI = dyn_cast<DbgAssignIntrinsic>(DII)) { + if (DAI->getAddress() == &I) { + salvageDbgAssignAddress(DAI); + Salvaged = true; + } + if (DAI->getValue() != &I) + continue; + } + + // Do not add DW_OP_stack_value for DbgDeclare and DbgAddr, because they + // are implicitly pointing out the value as a DWARF memory location + // description. + bool StackValue = isa<DbgValueInst>(DII); + auto DIILocation = DII->location_ops(); + assert( + is_contained(DIILocation, &I) && + "DbgVariableIntrinsic must use salvaged instruction as its location"); + SmallVector<Value *, 4> AdditionalValues; + // `I` may appear more than once in DII's location ops, and each use of `I` + // must be updated in the DIExpression and potentially have additional + // values added; thus we call salvageDebugInfoImpl for each `I` instance in + // DIILocation. + Value *Op0 = nullptr; + DIExpression *SalvagedExpr = DII->getExpression(); + auto LocItr = find(DIILocation, &I); + while (SalvagedExpr && LocItr != DIILocation.end()) { + SmallVector<uint64_t, 16> Ops; + unsigned LocNo = std::distance(DIILocation.begin(), LocItr); + uint64_t CurrentLocOps = SalvagedExpr->getNumLocationOperands(); + Op0 = salvageDebugInfoImpl(I, CurrentLocOps, Ops, AdditionalValues); + if (!Op0) + break; + SalvagedExpr = + DIExpression::appendOpsToArg(SalvagedExpr, Ops, LocNo, StackValue); + LocItr = std::find(++LocItr, DIILocation.end(), &I); + } + // salvageDebugInfoImpl should fail on examining the first element of + // DbgUsers, or none of them. + if (!Op0) + break; + + DII->replaceVariableLocationOp(&I, Op0); + bool IsValidSalvageExpr = SalvagedExpr->getNumElements() <= MaxExpressionSize; + if (AdditionalValues.empty() && IsValidSalvageExpr) { + DII->setExpression(SalvagedExpr); + } else if (isa<DbgValueInst>(DII) && !isa<DbgAssignIntrinsic>(DII) && + IsValidSalvageExpr && + DII->getNumVariableLocationOps() + AdditionalValues.size() <= + MaxDebugArgs) { + DII->addVariableLocationOps(AdditionalValues, SalvagedExpr); + } else { + // Do not salvage using DIArgList for dbg.addr/dbg.declare, as it is + // not currently supported in those instructions. Do not salvage using + // DIArgList for dbg.assign yet. FIXME: support this. + // Also do not salvage if the resulting DIArgList would contain an + // unreasonably large number of values. + DII->setKillLocation(); + } + LLVM_DEBUG(dbgs() << "SALVAGE: " << *DII << '\n'); + Salvaged = true; + } + + if (Salvaged) + return; + + for (auto *DII : DbgUsers) + DII->setKillLocation(); +} + +Value *getSalvageOpsForGEP(GetElementPtrInst *GEP, const DataLayout &DL, + uint64_t CurrentLocOps, + SmallVectorImpl<uint64_t> &Opcodes, + SmallVectorImpl<Value *> &AdditionalValues) { + unsigned BitWidth = DL.getIndexSizeInBits(GEP->getPointerAddressSpace()); + // Rewrite a GEP into a DIExpression. + MapVector<Value *, APInt> VariableOffsets; + APInt ConstantOffset(BitWidth, 0); + if (!GEP->collectOffset(DL, BitWidth, VariableOffsets, ConstantOffset)) + return nullptr; + if (!VariableOffsets.empty() && !CurrentLocOps) { + Opcodes.insert(Opcodes.begin(), {dwarf::DW_OP_LLVM_arg, 0}); + CurrentLocOps = 1; + } + for (auto Offset : VariableOffsets) { + AdditionalValues.push_back(Offset.first); + assert(Offset.second.isStrictlyPositive() && + "Expected strictly positive multiplier for offset."); + Opcodes.append({dwarf::DW_OP_LLVM_arg, CurrentLocOps++, dwarf::DW_OP_constu, + Offset.second.getZExtValue(), dwarf::DW_OP_mul, + dwarf::DW_OP_plus}); + } + DIExpression::appendOffset(Opcodes, ConstantOffset.getSExtValue()); + return GEP->getOperand(0); +} + +uint64_t getDwarfOpForBinOp(Instruction::BinaryOps Opcode) { + switch (Opcode) { + case Instruction::Add: + return dwarf::DW_OP_plus; + case Instruction::Sub: + return dwarf::DW_OP_minus; + case Instruction::Mul: + return dwarf::DW_OP_mul; + case Instruction::SDiv: + return dwarf::DW_OP_div; + case Instruction::SRem: + return dwarf::DW_OP_mod; + case Instruction::Or: + return dwarf::DW_OP_or; + case Instruction::And: + return dwarf::DW_OP_and; + case Instruction::Xor: + return dwarf::DW_OP_xor; + case Instruction::Shl: + return dwarf::DW_OP_shl; + case Instruction::LShr: + return dwarf::DW_OP_shr; + case Instruction::AShr: + return dwarf::DW_OP_shra; + default: + // TODO: Salvage from each kind of binop we know about. + return 0; + } +} + +Value *getSalvageOpsForBinOp(BinaryOperator *BI, uint64_t CurrentLocOps, + SmallVectorImpl<uint64_t> &Opcodes, + SmallVectorImpl<Value *> &AdditionalValues) { + // Handle binary operations with constant integer operands as a special case. + auto *ConstInt = dyn_cast<ConstantInt>(BI->getOperand(1)); + // Values wider than 64 bits cannot be represented within a DIExpression. + if (ConstInt && ConstInt->getBitWidth() > 64) + return nullptr; + + Instruction::BinaryOps BinOpcode = BI->getOpcode(); + // Push any Constant Int operand onto the expression stack. + if (ConstInt) { + uint64_t Val = ConstInt->getSExtValue(); + // Add or Sub Instructions with a constant operand can potentially be + // simplified. + if (BinOpcode == Instruction::Add || BinOpcode == Instruction::Sub) { + uint64_t Offset = BinOpcode == Instruction::Add ? Val : -int64_t(Val); + DIExpression::appendOffset(Opcodes, Offset); + return BI->getOperand(0); + } + Opcodes.append({dwarf::DW_OP_constu, Val}); + } else { + if (!CurrentLocOps) { + Opcodes.append({dwarf::DW_OP_LLVM_arg, 0}); + CurrentLocOps = 1; + } + Opcodes.append({dwarf::DW_OP_LLVM_arg, CurrentLocOps}); + AdditionalValues.push_back(BI->getOperand(1)); + } + + // Add salvaged binary operator to expression stack, if it has a valid + // representation in a DIExpression. + uint64_t DwarfBinOp = getDwarfOpForBinOp(BinOpcode); + if (!DwarfBinOp) + return nullptr; + Opcodes.push_back(DwarfBinOp); + return BI->getOperand(0); +} + +Value *llvm::salvageDebugInfoImpl(Instruction &I, uint64_t CurrentLocOps, + SmallVectorImpl<uint64_t> &Ops, + SmallVectorImpl<Value *> &AdditionalValues) { + auto &M = *I.getModule(); + auto &DL = M.getDataLayout(); + + if (auto *CI = dyn_cast<CastInst>(&I)) { + Value *FromValue = CI->getOperand(0); + // No-op casts are irrelevant for debug info. + if (CI->isNoopCast(DL)) { + return FromValue; + } + + Type *Type = CI->getType(); + if (Type->isPointerTy()) + Type = DL.getIntPtrType(Type); + // Casts other than Trunc, SExt, or ZExt to scalar types cannot be salvaged. + if (Type->isVectorTy() || + !(isa<TruncInst>(&I) || isa<SExtInst>(&I) || isa<ZExtInst>(&I) || + isa<IntToPtrInst>(&I) || isa<PtrToIntInst>(&I))) + return nullptr; + + llvm::Type *FromType = FromValue->getType(); + if (FromType->isPointerTy()) + FromType = DL.getIntPtrType(FromType); + + unsigned FromTypeBitSize = FromType->getScalarSizeInBits(); + unsigned ToTypeBitSize = Type->getScalarSizeInBits(); + + auto ExtOps = DIExpression::getExtOps(FromTypeBitSize, ToTypeBitSize, + isa<SExtInst>(&I)); + Ops.append(ExtOps.begin(), ExtOps.end()); + return FromValue; + } + + if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) + return getSalvageOpsForGEP(GEP, DL, CurrentLocOps, Ops, AdditionalValues); + if (auto *BI = dyn_cast<BinaryOperator>(&I)) + return getSalvageOpsForBinOp(BI, CurrentLocOps, Ops, AdditionalValues); + + // *Not* to do: we should not attempt to salvage load instructions, + // because the validity and lifetime of a dbg.value containing + // DW_OP_deref becomes difficult to analyze. See PR40628 for examples. + return nullptr; +} + +/// A replacement for a dbg.value expression. +using DbgValReplacement = std::optional<DIExpression *>; + +/// Point debug users of \p From to \p To using exprs given by \p RewriteExpr, +/// possibly moving/undefing users to prevent use-before-def. Returns true if +/// changes are made. +static bool rewriteDebugUsers( + Instruction &From, Value &To, Instruction &DomPoint, DominatorTree &DT, + function_ref<DbgValReplacement(DbgVariableIntrinsic &DII)> RewriteExpr) { + // Find debug users of From. + SmallVector<DbgVariableIntrinsic *, 1> Users; + findDbgUsers(Users, &From); + if (Users.empty()) + return false; + + // Prevent use-before-def of To. + bool Changed = false; + SmallPtrSet<DbgVariableIntrinsic *, 1> UndefOrSalvage; + if (isa<Instruction>(&To)) { + bool DomPointAfterFrom = From.getNextNonDebugInstruction() == &DomPoint; + + for (auto *DII : Users) { + // It's common to see a debug user between From and DomPoint. Move it + // after DomPoint to preserve the variable update without any reordering. + if (DomPointAfterFrom && DII->getNextNonDebugInstruction() == &DomPoint) { + LLVM_DEBUG(dbgs() << "MOVE: " << *DII << '\n'); + DII->moveAfter(&DomPoint); + Changed = true; + + // Users which otherwise aren't dominated by the replacement value must + // be salvaged or deleted. + } else if (!DT.dominates(&DomPoint, DII)) { + UndefOrSalvage.insert(DII); + } + } + } + + // Update debug users without use-before-def risk. + for (auto *DII : Users) { + if (UndefOrSalvage.count(DII)) + continue; + + DbgValReplacement DVR = RewriteExpr(*DII); + if (!DVR) + continue; + + DII->replaceVariableLocationOp(&From, &To); + DII->setExpression(*DVR); + LLVM_DEBUG(dbgs() << "REWRITE: " << *DII << '\n'); + Changed = true; + } + + if (!UndefOrSalvage.empty()) { + // Try to salvage the remaining debug users. + salvageDebugInfo(From); + Changed = true; + } + + return Changed; +} + +/// Check if a bitcast between a value of type \p FromTy to type \p ToTy would +/// losslessly preserve the bits and semantics of the value. This predicate is +/// symmetric, i.e swapping \p FromTy and \p ToTy should give the same result. +/// +/// Note that Type::canLosslesslyBitCastTo is not suitable here because it +/// allows semantically unequivalent bitcasts, such as <2 x i64> -> <4 x i32>, +/// and also does not allow lossless pointer <-> integer conversions. +static bool isBitCastSemanticsPreserving(const DataLayout &DL, Type *FromTy, + Type *ToTy) { + // Trivially compatible types. + if (FromTy == ToTy) + return true; + + // Handle compatible pointer <-> integer conversions. + if (FromTy->isIntOrPtrTy() && ToTy->isIntOrPtrTy()) { + bool SameSize = DL.getTypeSizeInBits(FromTy) == DL.getTypeSizeInBits(ToTy); + bool LosslessConversion = !DL.isNonIntegralPointerType(FromTy) && + !DL.isNonIntegralPointerType(ToTy); + return SameSize && LosslessConversion; + } + + // TODO: This is not exhaustive. + return false; +} + +bool llvm::replaceAllDbgUsesWith(Instruction &From, Value &To, + Instruction &DomPoint, DominatorTree &DT) { + // Exit early if From has no debug users. + if (!From.isUsedByMetadata()) + return false; + + assert(&From != &To && "Can't replace something with itself"); + + Type *FromTy = From.getType(); + Type *ToTy = To.getType(); + + auto Identity = [&](DbgVariableIntrinsic &DII) -> DbgValReplacement { + return DII.getExpression(); + }; + + // Handle no-op conversions. + Module &M = *From.getModule(); + const DataLayout &DL = M.getDataLayout(); + if (isBitCastSemanticsPreserving(DL, FromTy, ToTy)) + return rewriteDebugUsers(From, To, DomPoint, DT, Identity); + + // Handle integer-to-integer widening and narrowing. + // FIXME: Use DW_OP_convert when it's available everywhere. + if (FromTy->isIntegerTy() && ToTy->isIntegerTy()) { + uint64_t FromBits = FromTy->getPrimitiveSizeInBits(); + uint64_t ToBits = ToTy->getPrimitiveSizeInBits(); + assert(FromBits != ToBits && "Unexpected no-op conversion"); + + // When the width of the result grows, assume that a debugger will only + // access the low `FromBits` bits when inspecting the source variable. + if (FromBits < ToBits) + return rewriteDebugUsers(From, To, DomPoint, DT, Identity); + + // The width of the result has shrunk. Use sign/zero extension to describe + // the source variable's high bits. + auto SignOrZeroExt = [&](DbgVariableIntrinsic &DII) -> DbgValReplacement { + DILocalVariable *Var = DII.getVariable(); + + // Without knowing signedness, sign/zero extension isn't possible. + auto Signedness = Var->getSignedness(); + if (!Signedness) + return std::nullopt; + + bool Signed = *Signedness == DIBasicType::Signedness::Signed; + return DIExpression::appendExt(DII.getExpression(), ToBits, FromBits, + Signed); + }; + return rewriteDebugUsers(From, To, DomPoint, DT, SignOrZeroExt); + } + + // TODO: Floating-point conversions, vectors. + return false; +} + +std::pair<unsigned, unsigned> +llvm::removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB) { + unsigned NumDeadInst = 0; + unsigned NumDeadDbgInst = 0; + // Delete the instructions backwards, as it has a reduced likelihood of + // having to update as many def-use and use-def chains. + Instruction *EndInst = BB->getTerminator(); // Last not to be deleted. + while (EndInst != &BB->front()) { + // Delete the next to last instruction. + Instruction *Inst = &*--EndInst->getIterator(); + if (!Inst->use_empty() && !Inst->getType()->isTokenTy()) + Inst->replaceAllUsesWith(PoisonValue::get(Inst->getType())); + if (Inst->isEHPad() || Inst->getType()->isTokenTy()) { + EndInst = Inst; + continue; + } + if (isa<DbgInfoIntrinsic>(Inst)) + ++NumDeadDbgInst; + else + ++NumDeadInst; + Inst->eraseFromParent(); + } + return {NumDeadInst, NumDeadDbgInst}; +} + +unsigned llvm::changeToUnreachable(Instruction *I, bool PreserveLCSSA, + DomTreeUpdater *DTU, + MemorySSAUpdater *MSSAU) { + BasicBlock *BB = I->getParent(); + + if (MSSAU) + MSSAU->changeToUnreachable(I); + + SmallSet<BasicBlock *, 8> UniqueSuccessors; + + // Loop over all of the successors, removing BB's entry from any PHI + // nodes. + for (BasicBlock *Successor : successors(BB)) { + Successor->removePredecessor(BB, PreserveLCSSA); + if (DTU) + UniqueSuccessors.insert(Successor); + } + auto *UI = new UnreachableInst(I->getContext(), I); + UI->setDebugLoc(I->getDebugLoc()); + + // All instructions after this are dead. + unsigned NumInstrsRemoved = 0; + BasicBlock::iterator BBI = I->getIterator(), BBE = BB->end(); + while (BBI != BBE) { + if (!BBI->use_empty()) + BBI->replaceAllUsesWith(PoisonValue::get(BBI->getType())); + BBI++->eraseFromParent(); + ++NumInstrsRemoved; + } + if (DTU) { + SmallVector<DominatorTree::UpdateType, 8> Updates; + Updates.reserve(UniqueSuccessors.size()); + for (BasicBlock *UniqueSuccessor : UniqueSuccessors) + Updates.push_back({DominatorTree::Delete, BB, UniqueSuccessor}); + DTU->applyUpdates(Updates); + } + return NumInstrsRemoved; +} + +CallInst *llvm::createCallMatchingInvoke(InvokeInst *II) { + SmallVector<Value *, 8> Args(II->args()); + SmallVector<OperandBundleDef, 1> OpBundles; + II->getOperandBundlesAsDefs(OpBundles); + CallInst *NewCall = CallInst::Create(II->getFunctionType(), + II->getCalledOperand(), Args, OpBundles); + NewCall->setCallingConv(II->getCallingConv()); + NewCall->setAttributes(II->getAttributes()); + NewCall->setDebugLoc(II->getDebugLoc()); + NewCall->copyMetadata(*II); + + // If the invoke had profile metadata, try converting them for CallInst. + uint64_t TotalWeight; + if (NewCall->extractProfTotalWeight(TotalWeight)) { + // Set the total weight if it fits into i32, otherwise reset. + MDBuilder MDB(NewCall->getContext()); + auto NewWeights = uint32_t(TotalWeight) != TotalWeight + ? nullptr + : MDB.createBranchWeights({uint32_t(TotalWeight)}); + NewCall->setMetadata(LLVMContext::MD_prof, NewWeights); + } + + return NewCall; +} + +// changeToCall - Convert the specified invoke into a normal call. +CallInst *llvm::changeToCall(InvokeInst *II, DomTreeUpdater *DTU) { + CallInst *NewCall = createCallMatchingInvoke(II); + NewCall->takeName(II); + NewCall->insertBefore(II); + II->replaceAllUsesWith(NewCall); + + // Follow the call by a branch to the normal destination. + BasicBlock *NormalDestBB = II->getNormalDest(); + BranchInst::Create(NormalDestBB, II); + + // Update PHI nodes in the unwind destination + BasicBlock *BB = II->getParent(); + BasicBlock *UnwindDestBB = II->getUnwindDest(); + UnwindDestBB->removePredecessor(BB); + II->eraseFromParent(); + if (DTU) + DTU->applyUpdates({{DominatorTree::Delete, BB, UnwindDestBB}}); + return NewCall; +} + +BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI, + BasicBlock *UnwindEdge, + DomTreeUpdater *DTU) { + BasicBlock *BB = CI->getParent(); + + // Convert this function call into an invoke instruction. First, split the + // basic block. + BasicBlock *Split = SplitBlock(BB, CI, DTU, /*LI=*/nullptr, /*MSSAU*/ nullptr, + CI->getName() + ".noexc"); + + // Delete the unconditional branch inserted by SplitBlock + BB->back().eraseFromParent(); + + // Create the new invoke instruction. + SmallVector<Value *, 8> InvokeArgs(CI->args()); + SmallVector<OperandBundleDef, 1> OpBundles; + + CI->getOperandBundlesAsDefs(OpBundles); + + // Note: we're round tripping operand bundles through memory here, and that + // can potentially be avoided with a cleverer API design that we do not have + // as of this time. + + InvokeInst *II = + InvokeInst::Create(CI->getFunctionType(), CI->getCalledOperand(), Split, + UnwindEdge, InvokeArgs, OpBundles, CI->getName(), BB); + II->setDebugLoc(CI->getDebugLoc()); + II->setCallingConv(CI->getCallingConv()); + II->setAttributes(CI->getAttributes()); + II->setMetadata(LLVMContext::MD_prof, CI->getMetadata(LLVMContext::MD_prof)); + + if (DTU) + DTU->applyUpdates({{DominatorTree::Insert, BB, UnwindEdge}}); + + // Make sure that anything using the call now uses the invoke! This also + // updates the CallGraph if present, because it uses a WeakTrackingVH. + CI->replaceAllUsesWith(II); + + // Delete the original call + Split->front().eraseFromParent(); + return Split; +} + +static bool markAliveBlocks(Function &F, + SmallPtrSetImpl<BasicBlock *> &Reachable, + DomTreeUpdater *DTU = nullptr) { + SmallVector<BasicBlock*, 128> Worklist; + BasicBlock *BB = &F.front(); + Worklist.push_back(BB); + Reachable.insert(BB); + bool Changed = false; + do { + BB = Worklist.pop_back_val(); + + // Do a quick scan of the basic block, turning any obviously unreachable + // instructions into LLVM unreachable insts. The instruction combining pass + // canonicalizes unreachable insts into stores to null or undef. + for (Instruction &I : *BB) { + if (auto *CI = dyn_cast<CallInst>(&I)) { + Value *Callee = CI->getCalledOperand(); + // Handle intrinsic calls. + if (Function *F = dyn_cast<Function>(Callee)) { + auto IntrinsicID = F->getIntrinsicID(); + // Assumptions that are known to be false are equivalent to + // unreachable. Also, if the condition is undefined, then we make the + // choice most beneficial to the optimizer, and choose that to also be + // unreachable. + if (IntrinsicID == Intrinsic::assume) { + if (match(CI->getArgOperand(0), m_CombineOr(m_Zero(), m_Undef()))) { + // Don't insert a call to llvm.trap right before the unreachable. + changeToUnreachable(CI, false, DTU); + Changed = true; + break; + } + } else if (IntrinsicID == Intrinsic::experimental_guard) { + // A call to the guard intrinsic bails out of the current + // compilation unit if the predicate passed to it is false. If the + // predicate is a constant false, then we know the guard will bail + // out of the current compile unconditionally, so all code following + // it is dead. + // + // Note: unlike in llvm.assume, it is not "obviously profitable" for + // guards to treat `undef` as `false` since a guard on `undef` can + // still be useful for widening. + if (match(CI->getArgOperand(0), m_Zero())) + if (!isa<UnreachableInst>(CI->getNextNode())) { + changeToUnreachable(CI->getNextNode(), false, DTU); + Changed = true; + break; + } + } + } else if ((isa<ConstantPointerNull>(Callee) && + !NullPointerIsDefined(CI->getFunction(), + cast<PointerType>(Callee->getType()) + ->getAddressSpace())) || + isa<UndefValue>(Callee)) { + changeToUnreachable(CI, false, DTU); + Changed = true; + break; + } + if (CI->doesNotReturn() && !CI->isMustTailCall()) { + // If we found a call to a no-return function, insert an unreachable + // instruction after it. Make sure there isn't *already* one there + // though. + if (!isa<UnreachableInst>(CI->getNextNode())) { + // Don't insert a call to llvm.trap right before the unreachable. + changeToUnreachable(CI->getNextNode(), false, DTU); + Changed = true; + } + break; + } + } else if (auto *SI = dyn_cast<StoreInst>(&I)) { + // Store to undef and store to null are undefined and used to signal + // that they should be changed to unreachable by passes that can't + // modify the CFG. + + // Don't touch volatile stores. + if (SI->isVolatile()) continue; + + Value *Ptr = SI->getOperand(1); + + if (isa<UndefValue>(Ptr) || + (isa<ConstantPointerNull>(Ptr) && + !NullPointerIsDefined(SI->getFunction(), + SI->getPointerAddressSpace()))) { + changeToUnreachable(SI, false, DTU); + Changed = true; + break; + } + } + } + + Instruction *Terminator = BB->getTerminator(); + if (auto *II = dyn_cast<InvokeInst>(Terminator)) { + // Turn invokes that call 'nounwind' functions into ordinary calls. + Value *Callee = II->getCalledOperand(); + if ((isa<ConstantPointerNull>(Callee) && + !NullPointerIsDefined(BB->getParent())) || + isa<UndefValue>(Callee)) { + changeToUnreachable(II, false, DTU); + Changed = true; + } else { + if (II->doesNotReturn() && + !isa<UnreachableInst>(II->getNormalDest()->front())) { + // If we found an invoke of a no-return function, + // create a new empty basic block with an `unreachable` terminator, + // and set it as the normal destination for the invoke, + // unless that is already the case. + // Note that the original normal destination could have other uses. + BasicBlock *OrigNormalDest = II->getNormalDest(); + OrigNormalDest->removePredecessor(II->getParent()); + LLVMContext &Ctx = II->getContext(); + BasicBlock *UnreachableNormalDest = BasicBlock::Create( + Ctx, OrigNormalDest->getName() + ".unreachable", + II->getFunction(), OrigNormalDest); + new UnreachableInst(Ctx, UnreachableNormalDest); + II->setNormalDest(UnreachableNormalDest); + if (DTU) + DTU->applyUpdates( + {{DominatorTree::Delete, BB, OrigNormalDest}, + {DominatorTree::Insert, BB, UnreachableNormalDest}}); + Changed = true; + } + if (II->doesNotThrow() && canSimplifyInvokeNoUnwind(&F)) { + if (II->use_empty() && !II->mayHaveSideEffects()) { + // jump to the normal destination branch. + BasicBlock *NormalDestBB = II->getNormalDest(); + BasicBlock *UnwindDestBB = II->getUnwindDest(); + BranchInst::Create(NormalDestBB, II); + UnwindDestBB->removePredecessor(II->getParent()); + II->eraseFromParent(); + if (DTU) + DTU->applyUpdates({{DominatorTree::Delete, BB, UnwindDestBB}}); + } else + changeToCall(II, DTU); + Changed = true; + } + } + } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Terminator)) { + // Remove catchpads which cannot be reached. + struct CatchPadDenseMapInfo { + static CatchPadInst *getEmptyKey() { + return DenseMapInfo<CatchPadInst *>::getEmptyKey(); + } + + static CatchPadInst *getTombstoneKey() { + return DenseMapInfo<CatchPadInst *>::getTombstoneKey(); + } + + static unsigned getHashValue(CatchPadInst *CatchPad) { + return static_cast<unsigned>(hash_combine_range( + CatchPad->value_op_begin(), CatchPad->value_op_end())); + } + + static bool isEqual(CatchPadInst *LHS, CatchPadInst *RHS) { + if (LHS == getEmptyKey() || LHS == getTombstoneKey() || + RHS == getEmptyKey() || RHS == getTombstoneKey()) + return LHS == RHS; + return LHS->isIdenticalTo(RHS); + } + }; + + SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases; + // Set of unique CatchPads. + SmallDenseMap<CatchPadInst *, detail::DenseSetEmpty, 4, + CatchPadDenseMapInfo, detail::DenseSetPair<CatchPadInst *>> + HandlerSet; + detail::DenseSetEmpty Empty; + for (CatchSwitchInst::handler_iterator I = CatchSwitch->handler_begin(), + E = CatchSwitch->handler_end(); + I != E; ++I) { + BasicBlock *HandlerBB = *I; + if (DTU) + ++NumPerSuccessorCases[HandlerBB]; + auto *CatchPad = cast<CatchPadInst>(HandlerBB->getFirstNonPHI()); + if (!HandlerSet.insert({CatchPad, Empty}).second) { + if (DTU) + --NumPerSuccessorCases[HandlerBB]; + CatchSwitch->removeHandler(I); + --I; + --E; + Changed = true; + } + } + if (DTU) { + std::vector<DominatorTree::UpdateType> Updates; + for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases) + if (I.second == 0) + Updates.push_back({DominatorTree::Delete, BB, I.first}); + DTU->applyUpdates(Updates); + } + } + + Changed |= ConstantFoldTerminator(BB, true, nullptr, DTU); + for (BasicBlock *Successor : successors(BB)) + if (Reachable.insert(Successor).second) + Worklist.push_back(Successor); + } while (!Worklist.empty()); + return Changed; +} + +Instruction *llvm::removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU) { + Instruction *TI = BB->getTerminator(); + + if (auto *II = dyn_cast<InvokeInst>(TI)) + return changeToCall(II, DTU); + + Instruction *NewTI; + BasicBlock *UnwindDest; + + if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) { + NewTI = CleanupReturnInst::Create(CRI->getCleanupPad(), nullptr, CRI); + UnwindDest = CRI->getUnwindDest(); + } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(TI)) { + auto *NewCatchSwitch = CatchSwitchInst::Create( + CatchSwitch->getParentPad(), nullptr, CatchSwitch->getNumHandlers(), + CatchSwitch->getName(), CatchSwitch); + for (BasicBlock *PadBB : CatchSwitch->handlers()) + NewCatchSwitch->addHandler(PadBB); + + NewTI = NewCatchSwitch; + UnwindDest = CatchSwitch->getUnwindDest(); + } else { + llvm_unreachable("Could not find unwind successor"); + } + + NewTI->takeName(TI); + NewTI->setDebugLoc(TI->getDebugLoc()); + UnwindDest->removePredecessor(BB); + TI->replaceAllUsesWith(NewTI); + TI->eraseFromParent(); + if (DTU) + DTU->applyUpdates({{DominatorTree::Delete, BB, UnwindDest}}); + return NewTI; +} + +/// removeUnreachableBlocks - Remove blocks that are not reachable, even +/// if they are in a dead cycle. Return true if a change was made, false +/// otherwise. +bool llvm::removeUnreachableBlocks(Function &F, DomTreeUpdater *DTU, + MemorySSAUpdater *MSSAU) { + SmallPtrSet<BasicBlock *, 16> Reachable; + bool Changed = markAliveBlocks(F, Reachable, DTU); + + // If there are unreachable blocks in the CFG... + if (Reachable.size() == F.size()) + return Changed; + + assert(Reachable.size() < F.size()); + + // Are there any blocks left to actually delete? + SmallSetVector<BasicBlock *, 8> BlocksToRemove; + for (BasicBlock &BB : F) { + // Skip reachable basic blocks + if (Reachable.count(&BB)) + continue; + // Skip already-deleted blocks + if (DTU && DTU->isBBPendingDeletion(&BB)) + continue; + BlocksToRemove.insert(&BB); + } + + if (BlocksToRemove.empty()) + return Changed; + + Changed = true; + NumRemoved += BlocksToRemove.size(); + + if (MSSAU) + MSSAU->removeBlocks(BlocksToRemove); + + DeleteDeadBlocks(BlocksToRemove.takeVector(), DTU); + + return Changed; +} + +void llvm::combineMetadata(Instruction *K, const Instruction *J, + ArrayRef<unsigned> KnownIDs, bool DoesKMove) { + SmallVector<std::pair<unsigned, MDNode *>, 4> Metadata; + K->dropUnknownNonDebugMetadata(KnownIDs); + K->getAllMetadataOtherThanDebugLoc(Metadata); + for (const auto &MD : Metadata) { + unsigned Kind = MD.first; + MDNode *JMD = J->getMetadata(Kind); + MDNode *KMD = MD.second; + + switch (Kind) { + default: + K->setMetadata(Kind, nullptr); // Remove unknown metadata + break; + case LLVMContext::MD_dbg: + llvm_unreachable("getAllMetadataOtherThanDebugLoc returned a MD_dbg"); + case LLVMContext::MD_DIAssignID: + K->mergeDIAssignID(J); + break; + case LLVMContext::MD_tbaa: + K->setMetadata(Kind, MDNode::getMostGenericTBAA(JMD, KMD)); + break; + case LLVMContext::MD_alias_scope: + K->setMetadata(Kind, MDNode::getMostGenericAliasScope(JMD, KMD)); + break; + case LLVMContext::MD_noalias: + case LLVMContext::MD_mem_parallel_loop_access: + K->setMetadata(Kind, MDNode::intersect(JMD, KMD)); + break; + case LLVMContext::MD_access_group: + K->setMetadata(LLVMContext::MD_access_group, + intersectAccessGroups(K, J)); + break; + case LLVMContext::MD_range: + + // If K does move, use most generic range. Otherwise keep the range of + // K. + if (DoesKMove) + // FIXME: If K does move, we should drop the range info and nonnull. + // Currently this function is used with DoesKMove in passes + // doing hoisting/sinking and the current behavior of using the + // most generic range is correct in those cases. + K->setMetadata(Kind, MDNode::getMostGenericRange(JMD, KMD)); + break; + case LLVMContext::MD_fpmath: + K->setMetadata(Kind, MDNode::getMostGenericFPMath(JMD, KMD)); + break; + case LLVMContext::MD_invariant_load: + // Only set the !invariant.load if it is present in both instructions. + K->setMetadata(Kind, JMD); + break; + case LLVMContext::MD_nonnull: + // If K does move, keep nonull if it is present in both instructions. + if (DoesKMove) + K->setMetadata(Kind, JMD); + break; + case LLVMContext::MD_invariant_group: + // Preserve !invariant.group in K. + break; + case LLVMContext::MD_align: + K->setMetadata(Kind, + MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD)); + break; + case LLVMContext::MD_dereferenceable: + case LLVMContext::MD_dereferenceable_or_null: + K->setMetadata(Kind, + MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD)); + break; + case LLVMContext::MD_preserve_access_index: + // Preserve !preserve.access.index in K. + break; + } + } + // Set !invariant.group from J if J has it. If both instructions have it + // then we will just pick it from J - even when they are different. + // Also make sure that K is load or store - f.e. combining bitcast with load + // could produce bitcast with invariant.group metadata, which is invalid. + // FIXME: we should try to preserve both invariant.group md if they are + // different, but right now instruction can only have one invariant.group. + if (auto *JMD = J->getMetadata(LLVMContext::MD_invariant_group)) + if (isa<LoadInst>(K) || isa<StoreInst>(K)) + K->setMetadata(LLVMContext::MD_invariant_group, JMD); +} + +void llvm::combineMetadataForCSE(Instruction *K, const Instruction *J, + bool KDominatesJ) { + unsigned KnownIDs[] = { + LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope, + LLVMContext::MD_noalias, LLVMContext::MD_range, + LLVMContext::MD_invariant_load, LLVMContext::MD_nonnull, + LLVMContext::MD_invariant_group, LLVMContext::MD_align, + LLVMContext::MD_dereferenceable, + LLVMContext::MD_dereferenceable_or_null, + LLVMContext::MD_access_group, LLVMContext::MD_preserve_access_index}; + combineMetadata(K, J, KnownIDs, KDominatesJ); +} + +void llvm::copyMetadataForLoad(LoadInst &Dest, const LoadInst &Source) { + SmallVector<std::pair<unsigned, MDNode *>, 8> MD; + Source.getAllMetadata(MD); + MDBuilder MDB(Dest.getContext()); + Type *NewType = Dest.getType(); + const DataLayout &DL = Source.getModule()->getDataLayout(); + for (const auto &MDPair : MD) { + unsigned ID = MDPair.first; + MDNode *N = MDPair.second; + // Note, essentially every kind of metadata should be preserved here! This + // routine is supposed to clone a load instruction changing *only its type*. + // The only metadata it makes sense to drop is metadata which is invalidated + // when the pointer type changes. This should essentially never be the case + // in LLVM, but we explicitly switch over only known metadata to be + // conservatively correct. If you are adding metadata to LLVM which pertains + // to loads, you almost certainly want to add it here. + switch (ID) { + case LLVMContext::MD_dbg: + case LLVMContext::MD_tbaa: + case LLVMContext::MD_prof: + case LLVMContext::MD_fpmath: + case LLVMContext::MD_tbaa_struct: + case LLVMContext::MD_invariant_load: + case LLVMContext::MD_alias_scope: + case LLVMContext::MD_noalias: + case LLVMContext::MD_nontemporal: + case LLVMContext::MD_mem_parallel_loop_access: + case LLVMContext::MD_access_group: + case LLVMContext::MD_noundef: + // All of these directly apply. + Dest.setMetadata(ID, N); + break; + + case LLVMContext::MD_nonnull: + copyNonnullMetadata(Source, N, Dest); + break; + + case LLVMContext::MD_align: + case LLVMContext::MD_dereferenceable: + case LLVMContext::MD_dereferenceable_or_null: + // These only directly apply if the new type is also a pointer. + if (NewType->isPointerTy()) + Dest.setMetadata(ID, N); + break; + + case LLVMContext::MD_range: + copyRangeMetadata(DL, Source, N, Dest); + break; + } + } +} + +void llvm::patchReplacementInstruction(Instruction *I, Value *Repl) { + auto *ReplInst = dyn_cast<Instruction>(Repl); + if (!ReplInst) + return; + + // Patch the replacement so that it is not more restrictive than the value + // being replaced. + // Note that if 'I' is a load being replaced by some operation, + // for example, by an arithmetic operation, then andIRFlags() + // would just erase all math flags from the original arithmetic + // operation, which is clearly not wanted and not needed. + if (!isa<LoadInst>(I)) + ReplInst->andIRFlags(I); + + // FIXME: If both the original and replacement value are part of the + // same control-flow region (meaning that the execution of one + // guarantees the execution of the other), then we can combine the + // noalias scopes here and do better than the general conservative + // answer used in combineMetadata(). + + // In general, GVN unifies expressions over different control-flow + // regions, and so we need a conservative combination of the noalias + // scopes. + static const unsigned KnownIDs[] = { + LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope, + LLVMContext::MD_noalias, LLVMContext::MD_range, + LLVMContext::MD_fpmath, LLVMContext::MD_invariant_load, + LLVMContext::MD_invariant_group, LLVMContext::MD_nonnull, + LLVMContext::MD_access_group, LLVMContext::MD_preserve_access_index}; + combineMetadata(ReplInst, I, KnownIDs, false); +} + +template <typename RootType, typename DominatesFn> +static unsigned replaceDominatedUsesWith(Value *From, Value *To, + const RootType &Root, + const DominatesFn &Dominates) { + assert(From->getType() == To->getType()); + + unsigned Count = 0; + for (Use &U : llvm::make_early_inc_range(From->uses())) { + if (!Dominates(Root, U)) + continue; + U.set(To); + LLVM_DEBUG(dbgs() << "Replace dominated use of '" << From->getName() + << "' as " << *To << " in " << *U << "\n"); + ++Count; + } + return Count; +} + +unsigned llvm::replaceNonLocalUsesWith(Instruction *From, Value *To) { + assert(From->getType() == To->getType()); + auto *BB = From->getParent(); + unsigned Count = 0; + + for (Use &U : llvm::make_early_inc_range(From->uses())) { + auto *I = cast<Instruction>(U.getUser()); + if (I->getParent() == BB) + continue; + U.set(To); + ++Count; + } + return Count; +} + +unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To, + DominatorTree &DT, + const BasicBlockEdge &Root) { + auto Dominates = [&DT](const BasicBlockEdge &Root, const Use &U) { + return DT.dominates(Root, U); + }; + return ::replaceDominatedUsesWith(From, To, Root, Dominates); +} + +unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To, + DominatorTree &DT, + const BasicBlock *BB) { + auto Dominates = [&DT](const BasicBlock *BB, const Use &U) { + return DT.dominates(BB, U); + }; + return ::replaceDominatedUsesWith(From, To, BB, Dominates); +} + +bool llvm::callsGCLeafFunction(const CallBase *Call, + const TargetLibraryInfo &TLI) { + // Check if the function is specifically marked as a gc leaf function. + if (Call->hasFnAttr("gc-leaf-function")) + return true; + if (const Function *F = Call->getCalledFunction()) { + if (F->hasFnAttribute("gc-leaf-function")) + return true; + + if (auto IID = F->getIntrinsicID()) { + // Most LLVM intrinsics do not take safepoints. + return IID != Intrinsic::experimental_gc_statepoint && + IID != Intrinsic::experimental_deoptimize && + IID != Intrinsic::memcpy_element_unordered_atomic && + IID != Intrinsic::memmove_element_unordered_atomic; + } + } + + // Lib calls can be materialized by some passes, and won't be + // marked as 'gc-leaf-function.' All available Libcalls are + // GC-leaf. + LibFunc LF; + if (TLI.getLibFunc(*Call, LF)) { + return TLI.has(LF); + } + + return false; +} + +void llvm::copyNonnullMetadata(const LoadInst &OldLI, MDNode *N, + LoadInst &NewLI) { + auto *NewTy = NewLI.getType(); + + // This only directly applies if the new type is also a pointer. + if (NewTy->isPointerTy()) { + NewLI.setMetadata(LLVMContext::MD_nonnull, N); + return; + } + + // The only other translation we can do is to integral loads with !range + // metadata. + if (!NewTy->isIntegerTy()) + return; + + MDBuilder MDB(NewLI.getContext()); + const Value *Ptr = OldLI.getPointerOperand(); + auto *ITy = cast<IntegerType>(NewTy); + auto *NullInt = ConstantExpr::getPtrToInt( + ConstantPointerNull::get(cast<PointerType>(Ptr->getType())), ITy); + auto *NonNullInt = ConstantExpr::getAdd(NullInt, ConstantInt::get(ITy, 1)); + NewLI.setMetadata(LLVMContext::MD_range, + MDB.createRange(NonNullInt, NullInt)); +} + +void llvm::copyRangeMetadata(const DataLayout &DL, const LoadInst &OldLI, + MDNode *N, LoadInst &NewLI) { + auto *NewTy = NewLI.getType(); + // Simply copy the metadata if the type did not change. + if (NewTy == OldLI.getType()) { + NewLI.setMetadata(LLVMContext::MD_range, N); + return; + } + + // Give up unless it is converted to a pointer where there is a single very + // valuable mapping we can do reliably. + // FIXME: It would be nice to propagate this in more ways, but the type + // conversions make it hard. + if (!NewTy->isPointerTy()) + return; + + unsigned BitWidth = DL.getPointerTypeSizeInBits(NewTy); + if (!getConstantRangeFromMetadata(*N).contains(APInt(BitWidth, 0))) { + MDNode *NN = MDNode::get(OldLI.getContext(), std::nullopt); + NewLI.setMetadata(LLVMContext::MD_nonnull, NN); + } +} + +void llvm::dropDebugUsers(Instruction &I) { + SmallVector<DbgVariableIntrinsic *, 1> DbgUsers; + findDbgUsers(DbgUsers, &I); + for (auto *DII : DbgUsers) + DII->eraseFromParent(); +} + +void llvm::hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt, + BasicBlock *BB) { + // Since we are moving the instructions out of its basic block, we do not + // retain their original debug locations (DILocations) and debug intrinsic + // instructions. + // + // Doing so would degrade the debugging experience and adversely affect the + // accuracy of profiling information. + // + // Currently, when hoisting the instructions, we take the following actions: + // - Remove their debug intrinsic instructions. + // - Set their debug locations to the values from the insertion point. + // + // As per PR39141 (comment #8), the more fundamental reason why the dbg.values + // need to be deleted, is because there will not be any instructions with a + // DILocation in either branch left after performing the transformation. We + // can only insert a dbg.value after the two branches are joined again. + // + // See PR38762, PR39243 for more details. + // + // TODO: Extend llvm.dbg.value to take more than one SSA Value (PR39141) to + // encode predicated DIExpressions that yield different results on different + // code paths. + + for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) { + Instruction *I = &*II; + I->dropUndefImplyingAttrsAndUnknownMetadata(); + if (I->isUsedByMetadata()) + dropDebugUsers(*I); + if (I->isDebugOrPseudoInst()) { + // Remove DbgInfo and pseudo probe Intrinsics. + II = I->eraseFromParent(); + continue; + } + I->setDebugLoc(InsertPt->getDebugLoc()); + ++II; + } + DomBlock->splice(InsertPt->getIterator(), BB, BB->begin(), + BB->getTerminator()->getIterator()); +} + +namespace { + +/// A potential constituent of a bitreverse or bswap expression. See +/// collectBitParts for a fuller explanation. +struct BitPart { + BitPart(Value *P, unsigned BW) : Provider(P) { + Provenance.resize(BW); + } + + /// The Value that this is a bitreverse/bswap of. + Value *Provider; + + /// The "provenance" of each bit. Provenance[A] = B means that bit A + /// in Provider becomes bit B in the result of this expression. + SmallVector<int8_t, 32> Provenance; // int8_t means max size is i128. + + enum { Unset = -1 }; +}; + +} // end anonymous namespace + +/// Analyze the specified subexpression and see if it is capable of providing +/// pieces of a bswap or bitreverse. The subexpression provides a potential +/// piece of a bswap or bitreverse if it can be proved that each non-zero bit in +/// the output of the expression came from a corresponding bit in some other +/// value. This function is recursive, and the end result is a mapping of +/// bitnumber to bitnumber. It is the caller's responsibility to validate that +/// the bitnumber to bitnumber mapping is correct for a bswap or bitreverse. +/// +/// For example, if the current subexpression if "(shl i32 %X, 24)" then we know +/// that the expression deposits the low byte of %X into the high byte of the +/// result and that all other bits are zero. This expression is accepted and a +/// BitPart is returned with Provider set to %X and Provenance[24-31] set to +/// [0-7]. +/// +/// For vector types, all analysis is performed at the per-element level. No +/// cross-element analysis is supported (shuffle/insertion/reduction), and all +/// constant masks must be splatted across all elements. +/// +/// To avoid revisiting values, the BitPart results are memoized into the +/// provided map. To avoid unnecessary copying of BitParts, BitParts are +/// constructed in-place in the \c BPS map. Because of this \c BPS needs to +/// store BitParts objects, not pointers. As we need the concept of a nullptr +/// BitParts (Value has been analyzed and the analysis failed), we an Optional +/// type instead to provide the same functionality. +/// +/// Because we pass around references into \c BPS, we must use a container that +/// does not invalidate internal references (std::map instead of DenseMap). +static const std::optional<BitPart> & +collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, + std::map<Value *, std::optional<BitPart>> &BPS, int Depth, + bool &FoundRoot) { + auto I = BPS.find(V); + if (I != BPS.end()) + return I->second; + + auto &Result = BPS[V] = std::nullopt; + auto BitWidth = V->getType()->getScalarSizeInBits(); + + // Can't do integer/elements > 128 bits. + if (BitWidth > 128) + return Result; + + // Prevent stack overflow by limiting the recursion depth + if (Depth == BitPartRecursionMaxDepth) { + LLVM_DEBUG(dbgs() << "collectBitParts max recursion depth reached.\n"); + return Result; + } + + if (auto *I = dyn_cast<Instruction>(V)) { + Value *X, *Y; + const APInt *C; + + // If this is an or instruction, it may be an inner node of the bswap. + if (match(V, m_Or(m_Value(X), m_Value(Y)))) { + // Check we have both sources and they are from the same provider. + const auto &A = collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, + Depth + 1, FoundRoot); + if (!A || !A->Provider) + return Result; + + const auto &B = collectBitParts(Y, MatchBSwaps, MatchBitReversals, BPS, + Depth + 1, FoundRoot); + if (!B || A->Provider != B->Provider) + return Result; + + // Try and merge the two together. + Result = BitPart(A->Provider, BitWidth); + for (unsigned BitIdx = 0; BitIdx < BitWidth; ++BitIdx) { + if (A->Provenance[BitIdx] != BitPart::Unset && + B->Provenance[BitIdx] != BitPart::Unset && + A->Provenance[BitIdx] != B->Provenance[BitIdx]) + return Result = std::nullopt; + + if (A->Provenance[BitIdx] == BitPart::Unset) + Result->Provenance[BitIdx] = B->Provenance[BitIdx]; + else + Result->Provenance[BitIdx] = A->Provenance[BitIdx]; + } + + return Result; + } + + // If this is a logical shift by a constant, recurse then shift the result. + if (match(V, m_LogicalShift(m_Value(X), m_APInt(C)))) { + const APInt &BitShift = *C; + + // Ensure the shift amount is defined. + if (BitShift.uge(BitWidth)) + return Result; + + // For bswap-only, limit shift amounts to whole bytes, for an early exit. + if (!MatchBitReversals && (BitShift.getZExtValue() % 8) != 0) + return Result; + + const auto &Res = collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, + Depth + 1, FoundRoot); + if (!Res) + return Result; + Result = Res; + + // Perform the "shift" on BitProvenance. + auto &P = Result->Provenance; + if (I->getOpcode() == Instruction::Shl) { + P.erase(std::prev(P.end(), BitShift.getZExtValue()), P.end()); + P.insert(P.begin(), BitShift.getZExtValue(), BitPart::Unset); + } else { + P.erase(P.begin(), std::next(P.begin(), BitShift.getZExtValue())); + P.insert(P.end(), BitShift.getZExtValue(), BitPart::Unset); + } + + return Result; + } + + // If this is a logical 'and' with a mask that clears bits, recurse then + // unset the appropriate bits. + if (match(V, m_And(m_Value(X), m_APInt(C)))) { + const APInt &AndMask = *C; + + // Check that the mask allows a multiple of 8 bits for a bswap, for an + // early exit. + unsigned NumMaskedBits = AndMask.countPopulation(); + if (!MatchBitReversals && (NumMaskedBits % 8) != 0) + return Result; + + const auto &Res = collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, + Depth + 1, FoundRoot); + if (!Res) + return Result; + Result = Res; + + for (unsigned BitIdx = 0; BitIdx < BitWidth; ++BitIdx) + // If the AndMask is zero for this bit, clear the bit. + if (AndMask[BitIdx] == 0) + Result->Provenance[BitIdx] = BitPart::Unset; + return Result; + } + + // If this is a zext instruction zero extend the result. + if (match(V, m_ZExt(m_Value(X)))) { + const auto &Res = collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, + Depth + 1, FoundRoot); + if (!Res) + return Result; + + Result = BitPart(Res->Provider, BitWidth); + auto NarrowBitWidth = X->getType()->getScalarSizeInBits(); + for (unsigned BitIdx = 0; BitIdx < NarrowBitWidth; ++BitIdx) + Result->Provenance[BitIdx] = Res->Provenance[BitIdx]; + for (unsigned BitIdx = NarrowBitWidth; BitIdx < BitWidth; ++BitIdx) + Result->Provenance[BitIdx] = BitPart::Unset; + return Result; + } + + // If this is a truncate instruction, extract the lower bits. + if (match(V, m_Trunc(m_Value(X)))) { + const auto &Res = collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, + Depth + 1, FoundRoot); + if (!Res) + return Result; + + Result = BitPart(Res->Provider, BitWidth); + for (unsigned BitIdx = 0; BitIdx < BitWidth; ++BitIdx) + Result->Provenance[BitIdx] = Res->Provenance[BitIdx]; + return Result; + } + + // BITREVERSE - most likely due to us previous matching a partial + // bitreverse. + if (match(V, m_BitReverse(m_Value(X)))) { + const auto &Res = collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, + Depth + 1, FoundRoot); + if (!Res) + return Result; + + Result = BitPart(Res->Provider, BitWidth); + for (unsigned BitIdx = 0; BitIdx < BitWidth; ++BitIdx) + Result->Provenance[(BitWidth - 1) - BitIdx] = Res->Provenance[BitIdx]; + return Result; + } + + // BSWAP - most likely due to us previous matching a partial bswap. + if (match(V, m_BSwap(m_Value(X)))) { + const auto &Res = collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, + Depth + 1, FoundRoot); + if (!Res) + return Result; + + unsigned ByteWidth = BitWidth / 8; + Result = BitPart(Res->Provider, BitWidth); + for (unsigned ByteIdx = 0; ByteIdx < ByteWidth; ++ByteIdx) { + unsigned ByteBitOfs = ByteIdx * 8; + for (unsigned BitIdx = 0; BitIdx < 8; ++BitIdx) + Result->Provenance[(BitWidth - 8 - ByteBitOfs) + BitIdx] = + Res->Provenance[ByteBitOfs + BitIdx]; + } + return Result; + } + + // Funnel 'double' shifts take 3 operands, 2 inputs and the shift + // amount (modulo). + // fshl(X,Y,Z): (X << (Z % BW)) | (Y >> (BW - (Z % BW))) + // fshr(X,Y,Z): (X << (BW - (Z % BW))) | (Y >> (Z % BW)) + if (match(V, m_FShl(m_Value(X), m_Value(Y), m_APInt(C))) || + match(V, m_FShr(m_Value(X), m_Value(Y), m_APInt(C)))) { + // We can treat fshr as a fshl by flipping the modulo amount. + unsigned ModAmt = C->urem(BitWidth); + if (cast<IntrinsicInst>(I)->getIntrinsicID() == Intrinsic::fshr) + ModAmt = BitWidth - ModAmt; + + // For bswap-only, limit shift amounts to whole bytes, for an early exit. + if (!MatchBitReversals && (ModAmt % 8) != 0) + return Result; + + // Check we have both sources and they are from the same provider. + const auto &LHS = collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, + Depth + 1, FoundRoot); + if (!LHS || !LHS->Provider) + return Result; + + const auto &RHS = collectBitParts(Y, MatchBSwaps, MatchBitReversals, BPS, + Depth + 1, FoundRoot); + if (!RHS || LHS->Provider != RHS->Provider) + return Result; + + unsigned StartBitRHS = BitWidth - ModAmt; + Result = BitPart(LHS->Provider, BitWidth); + for (unsigned BitIdx = 0; BitIdx < StartBitRHS; ++BitIdx) + Result->Provenance[BitIdx + ModAmt] = LHS->Provenance[BitIdx]; + for (unsigned BitIdx = 0; BitIdx < ModAmt; ++BitIdx) + Result->Provenance[BitIdx] = RHS->Provenance[BitIdx + StartBitRHS]; + return Result; + } + } + + // If we've already found a root input value then we're never going to merge + // these back together. + if (FoundRoot) + return Result; + + // Okay, we got to something that isn't a shift, 'or', 'and', etc. This must + // be the root input value to the bswap/bitreverse. + FoundRoot = true; + Result = BitPart(V, BitWidth); + for (unsigned BitIdx = 0; BitIdx < BitWidth; ++BitIdx) + Result->Provenance[BitIdx] = BitIdx; + return Result; +} + +static bool bitTransformIsCorrectForBSwap(unsigned From, unsigned To, + unsigned BitWidth) { + if (From % 8 != To % 8) + return false; + // Convert from bit indices to byte indices and check for a byte reversal. + From >>= 3; + To >>= 3; + BitWidth >>= 3; + return From == BitWidth - To - 1; +} + +static bool bitTransformIsCorrectForBitReverse(unsigned From, unsigned To, + unsigned BitWidth) { + return From == BitWidth - To - 1; +} + +bool llvm::recognizeBSwapOrBitReverseIdiom( + Instruction *I, bool MatchBSwaps, bool MatchBitReversals, + SmallVectorImpl<Instruction *> &InsertedInsts) { + if (!match(I, m_Or(m_Value(), m_Value())) && + !match(I, m_FShl(m_Value(), m_Value(), m_Value())) && + !match(I, m_FShr(m_Value(), m_Value(), m_Value()))) + return false; + if (!MatchBSwaps && !MatchBitReversals) + return false; + Type *ITy = I->getType(); + if (!ITy->isIntOrIntVectorTy() || ITy->getScalarSizeInBits() > 128) + return false; // Can't do integer/elements > 128 bits. + + // Try to find all the pieces corresponding to the bswap. + bool FoundRoot = false; + std::map<Value *, std::optional<BitPart>> BPS; + const auto &Res = + collectBitParts(I, MatchBSwaps, MatchBitReversals, BPS, 0, FoundRoot); + if (!Res) + return false; + ArrayRef<int8_t> BitProvenance = Res->Provenance; + assert(all_of(BitProvenance, + [](int8_t I) { return I == BitPart::Unset || 0 <= I; }) && + "Illegal bit provenance index"); + + // If the upper bits are zero, then attempt to perform as a truncated op. + Type *DemandedTy = ITy; + if (BitProvenance.back() == BitPart::Unset) { + while (!BitProvenance.empty() && BitProvenance.back() == BitPart::Unset) + BitProvenance = BitProvenance.drop_back(); + if (BitProvenance.empty()) + return false; // TODO - handle null value? + DemandedTy = Type::getIntNTy(I->getContext(), BitProvenance.size()); + if (auto *IVecTy = dyn_cast<VectorType>(ITy)) + DemandedTy = VectorType::get(DemandedTy, IVecTy); + } + + // Check BitProvenance hasn't found a source larger than the result type. + unsigned DemandedBW = DemandedTy->getScalarSizeInBits(); + if (DemandedBW > ITy->getScalarSizeInBits()) + return false; + + // Now, is the bit permutation correct for a bswap or a bitreverse? We can + // only byteswap values with an even number of bytes. + APInt DemandedMask = APInt::getAllOnes(DemandedBW); + bool OKForBSwap = MatchBSwaps && (DemandedBW % 16) == 0; + bool OKForBitReverse = MatchBitReversals; + for (unsigned BitIdx = 0; + (BitIdx < DemandedBW) && (OKForBSwap || OKForBitReverse); ++BitIdx) { + if (BitProvenance[BitIdx] == BitPart::Unset) { + DemandedMask.clearBit(BitIdx); + continue; + } + OKForBSwap &= bitTransformIsCorrectForBSwap(BitProvenance[BitIdx], BitIdx, + DemandedBW); + OKForBitReverse &= bitTransformIsCorrectForBitReverse(BitProvenance[BitIdx], + BitIdx, DemandedBW); + } + + Intrinsic::ID Intrin; + if (OKForBSwap) + Intrin = Intrinsic::bswap; + else if (OKForBitReverse) + Intrin = Intrinsic::bitreverse; + else + return false; + + Function *F = Intrinsic::getDeclaration(I->getModule(), Intrin, DemandedTy); + Value *Provider = Res->Provider; + + // We may need to truncate the provider. + if (DemandedTy != Provider->getType()) { + auto *Trunc = + CastInst::CreateIntegerCast(Provider, DemandedTy, false, "trunc", I); + InsertedInsts.push_back(Trunc); + Provider = Trunc; + } + + Instruction *Result = CallInst::Create(F, Provider, "rev", I); + InsertedInsts.push_back(Result); + + if (!DemandedMask.isAllOnes()) { + auto *Mask = ConstantInt::get(DemandedTy, DemandedMask); + Result = BinaryOperator::Create(Instruction::And, Result, Mask, "mask", I); + InsertedInsts.push_back(Result); + } + + // We may need to zeroextend back to the result type. + if (ITy != Result->getType()) { + auto *ExtInst = CastInst::CreateIntegerCast(Result, ITy, false, "zext", I); + InsertedInsts.push_back(ExtInst); + } + + return true; +} + +// CodeGen has special handling for some string functions that may replace +// them with target-specific intrinsics. Since that'd skip our interceptors +// in ASan/MSan/TSan/DFSan, and thus make us miss some memory accesses, +// we mark affected calls as NoBuiltin, which will disable optimization +// in CodeGen. +void llvm::maybeMarkSanitizerLibraryCallNoBuiltin( + CallInst *CI, const TargetLibraryInfo *TLI) { + Function *F = CI->getCalledFunction(); + LibFunc Func; + if (F && !F->hasLocalLinkage() && F->hasName() && + TLI->getLibFunc(F->getName(), Func) && TLI->hasOptimizedCodeGen(Func) && + !F->doesNotAccessMemory()) + CI->addFnAttr(Attribute::NoBuiltin); +} + +bool llvm::canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx) { + // We can't have a PHI with a metadata type. + if (I->getOperand(OpIdx)->getType()->isMetadataTy()) + return false; + + // Early exit. + if (!isa<Constant>(I->getOperand(OpIdx))) + return true; + + switch (I->getOpcode()) { + default: + return true; + case Instruction::Call: + case Instruction::Invoke: { + const auto &CB = cast<CallBase>(*I); + + // Can't handle inline asm. Skip it. + if (CB.isInlineAsm()) + return false; + + // Constant bundle operands may need to retain their constant-ness for + // correctness. + if (CB.isBundleOperand(OpIdx)) + return false; + + if (OpIdx < CB.arg_size()) { + // Some variadic intrinsics require constants in the variadic arguments, + // which currently aren't markable as immarg. + if (isa<IntrinsicInst>(CB) && + OpIdx >= CB.getFunctionType()->getNumParams()) { + // This is known to be OK for stackmap. + return CB.getIntrinsicID() == Intrinsic::experimental_stackmap; + } + + // gcroot is a special case, since it requires a constant argument which + // isn't also required to be a simple ConstantInt. + if (CB.getIntrinsicID() == Intrinsic::gcroot) + return false; + + // Some intrinsic operands are required to be immediates. + return !CB.paramHasAttr(OpIdx, Attribute::ImmArg); + } + + // It is never allowed to replace the call argument to an intrinsic, but it + // may be possible for a call. + return !isa<IntrinsicInst>(CB); + } + case Instruction::ShuffleVector: + // Shufflevector masks are constant. + return OpIdx != 2; + case Instruction::Switch: + case Instruction::ExtractValue: + // All operands apart from the first are constant. + return OpIdx == 0; + case Instruction::InsertValue: + // All operands apart from the first and the second are constant. + return OpIdx < 2; + case Instruction::Alloca: + // Static allocas (constant size in the entry block) are handled by + // prologue/epilogue insertion so they're free anyway. We definitely don't + // want to make them non-constant. + return !cast<AllocaInst>(I)->isStaticAlloca(); + case Instruction::GetElementPtr: + if (OpIdx == 0) + return true; + gep_type_iterator It = gep_type_begin(I); + for (auto E = std::next(It, OpIdx); It != E; ++It) + if (It.isStruct()) + return false; + return true; + } +} + +Value *llvm::invertCondition(Value *Condition) { + // First: Check if it's a constant + if (Constant *C = dyn_cast<Constant>(Condition)) + return ConstantExpr::getNot(C); + + // Second: If the condition is already inverted, return the original value + Value *NotCondition; + if (match(Condition, m_Not(m_Value(NotCondition)))) + return NotCondition; + + BasicBlock *Parent = nullptr; + Instruction *Inst = dyn_cast<Instruction>(Condition); + if (Inst) + Parent = Inst->getParent(); + else if (Argument *Arg = dyn_cast<Argument>(Condition)) + Parent = &Arg->getParent()->getEntryBlock(); + assert(Parent && "Unsupported condition to invert"); + + // Third: Check all the users for an invert + for (User *U : Condition->users()) + if (Instruction *I = dyn_cast<Instruction>(U)) + if (I->getParent() == Parent && match(I, m_Not(m_Specific(Condition)))) + return I; + + // Last option: Create a new instruction + auto *Inverted = + BinaryOperator::CreateNot(Condition, Condition->getName() + ".inv"); + if (Inst && !isa<PHINode>(Inst)) + Inverted->insertAfter(Inst); + else + Inverted->insertBefore(&*Parent->getFirstInsertionPt()); + return Inverted; +} + +bool llvm::inferAttributesFromOthers(Function &F) { + // Note: We explicitly check for attributes rather than using cover functions + // because some of the cover functions include the logic being implemented. + + bool Changed = false; + // readnone + not convergent implies nosync + if (!F.hasFnAttribute(Attribute::NoSync) && + F.doesNotAccessMemory() && !F.isConvergent()) { + F.setNoSync(); + Changed = true; + } + + // readonly implies nofree + if (!F.hasFnAttribute(Attribute::NoFree) && F.onlyReadsMemory()) { + F.setDoesNotFreeMemory(); + Changed = true; + } + + // willreturn implies mustprogress + if (!F.hasFnAttribute(Attribute::MustProgress) && F.willReturn()) { + F.setMustProgress(); + Changed = true; + } + + // TODO: There are a bunch of cases of restrictive memory effects we + // can infer by inspecting arguments of argmemonly-ish functions. + + return Changed; +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/LoopPeel.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/LoopPeel.cpp new file mode 100644 index 0000000000..2acbe90023 --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/LoopPeel.cpp @@ -0,0 +1,1040 @@ +//===- LoopPeel.cpp -------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Loop Peeling Utilities. +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/LoopPeel.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopIterator.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/IR/ProfDataUtils.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/LoopSimplify.h" +#include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Transforms/Utils/ValueMapper.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <optional> + +using namespace llvm; +using namespace llvm::PatternMatch; + +#define DEBUG_TYPE "loop-peel" + +STATISTIC(NumPeeled, "Number of loops peeled"); + +static cl::opt<unsigned> UnrollPeelCount( + "unroll-peel-count", cl::Hidden, + cl::desc("Set the unroll peeling count, for testing purposes")); + +static cl::opt<bool> + UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden, + cl::desc("Allows loops to be peeled when the dynamic " + "trip count is known to be low.")); + +static cl::opt<bool> + UnrollAllowLoopNestsPeeling("unroll-allow-loop-nests-peeling", + cl::init(false), cl::Hidden, + cl::desc("Allows loop nests to be peeled.")); + +static cl::opt<unsigned> UnrollPeelMaxCount( + "unroll-peel-max-count", cl::init(7), cl::Hidden, + cl::desc("Max average trip count which will cause loop peeling.")); + +static cl::opt<unsigned> UnrollForcePeelCount( + "unroll-force-peel-count", cl::init(0), cl::Hidden, + cl::desc("Force a peel count regardless of profiling information.")); + +static cl::opt<bool> DisableAdvancedPeeling( + "disable-advanced-peeling", cl::init(false), cl::Hidden, + cl::desc( + "Disable advance peeling. Issues for convergent targets (D134803).")); + +static const char *PeeledCountMetaData = "llvm.loop.peeled.count"; + +// Check whether we are capable of peeling this loop. +bool llvm::canPeel(const Loop *L) { + // Make sure the loop is in simplified form + if (!L->isLoopSimplifyForm()) + return false; + if (!DisableAdvancedPeeling) + return true; + + SmallVector<BasicBlock *, 4> Exits; + L->getUniqueNonLatchExitBlocks(Exits); + // The latch must either be the only exiting block or all non-latch exit + // blocks have either a deopt or unreachable terminator or compose a chain of + // blocks where the last one is either deopt or unreachable terminated. Both + // deopt and unreachable terminators are a strong indication they are not + // taken. Note that this is a profitability check, not a legality check. Also + // note that LoopPeeling currently can only update the branch weights of latch + // blocks and branch weights to blocks with deopt or unreachable do not need + // updating. + return llvm::all_of(Exits, IsBlockFollowedByDeoptOrUnreachable); +} + +namespace { + +// As a loop is peeled, it may be the case that Phi nodes become +// loop-invariant (ie, known because there is only one choice). +// For example, consider the following function: +// void g(int); +// void binary() { +// int x = 0; +// int y = 0; +// int a = 0; +// for(int i = 0; i <100000; ++i) { +// g(x); +// x = y; +// g(a); +// y = a + 1; +// a = 5; +// } +// } +// Peeling 3 iterations is beneficial because the values for x, y and a +// become known. The IR for this loop looks something like the following: +// +// %i = phi i32 [ 0, %entry ], [ %inc, %if.end ] +// %a = phi i32 [ 0, %entry ], [ 5, %if.end ] +// %y = phi i32 [ 0, %entry ], [ %add, %if.end ] +// %x = phi i32 [ 0, %entry ], [ %y, %if.end ] +// ... +// tail call void @_Z1gi(i32 signext %x) +// tail call void @_Z1gi(i32 signext %a) +// %add = add nuw nsw i32 %a, 1 +// %inc = add nuw nsw i32 %i, 1 +// %exitcond = icmp eq i32 %inc, 100000 +// br i1 %exitcond, label %for.cond.cleanup, label %for.body +// +// The arguments for the calls to g will become known after 3 iterations +// of the loop, because the phi nodes values become known after 3 iterations +// of the loop (ie, they are known on the 4th iteration, so peel 3 iterations). +// The first iteration has g(0), g(0); the second has g(0), g(5); the +// third has g(1), g(5) and the fourth (and all subsequent) have g(6), g(5). +// Now consider the phi nodes: +// %a is a phi with constants so it is determined after iteration 1. +// %y is a phi based on a constant and %a so it is determined on +// the iteration after %a is determined, so iteration 2. +// %x is a phi based on a constant and %y so it is determined on +// the iteration after %y, so iteration 3. +// %i is based on itself (and is an induction variable) so it is +// never determined. +// This means that peeling off 3 iterations will result in being able to +// remove the phi nodes for %a, %y, and %x. The arguments for the +// corresponding calls to g are determined and the code for computing +// x, y, and a can be removed. +// +// The PhiAnalyzer class calculates how many times a loop should be +// peeled based on the above analysis of the phi nodes in the loop while +// respecting the maximum specified. +class PhiAnalyzer { +public: + PhiAnalyzer(const Loop &L, unsigned MaxIterations); + + // Calculate the sufficient minimum number of iterations of the loop to peel + // such that phi instructions become determined (subject to allowable limits) + std::optional<unsigned> calculateIterationsToPeel(); + +protected: + using PeelCounter = std::optional<unsigned>; + const PeelCounter Unknown = std::nullopt; + + // Add 1 respecting Unknown and return Unknown if result over MaxIterations + PeelCounter addOne(PeelCounter PC) const { + if (PC == Unknown) + return Unknown; + return (*PC + 1 <= MaxIterations) ? PeelCounter{*PC + 1} : Unknown; + } + + // Calculate the number of iterations after which the given value + // becomes an invariant. + PeelCounter calculate(const Value &); + + const Loop &L; + const unsigned MaxIterations; + + // Map of Values to number of iterations to invariance + SmallDenseMap<const Value *, PeelCounter> IterationsToInvariance; +}; + +PhiAnalyzer::PhiAnalyzer(const Loop &L, unsigned MaxIterations) + : L(L), MaxIterations(MaxIterations) { + assert(canPeel(&L) && "loop is not suitable for peeling"); + assert(MaxIterations > 0 && "no peeling is allowed?"); +} + +// This function calculates the number of iterations after which the value +// becomes an invariant. The pre-calculated values are memorized in a map. +// N.B. This number will be Unknown or <= MaxIterations. +// The function is calculated according to the following definition: +// Given %x = phi <Inputs from above the loop>, ..., [%y, %back.edge]. +// F(%x) = G(%y) + 1 (N.B. [MaxIterations | Unknown] + 1 => Unknown) +// G(%y) = 0 if %y is a loop invariant +// G(%y) = G(%BackEdgeValue) if %y is a phi in the header block +// G(%y) = TODO: if %y is an expression based on phis and loop invariants +// The example looks like: +// %x = phi(0, %a) <-- becomes invariant starting from 3rd iteration. +// %y = phi(0, 5) +// %a = %y + 1 +// G(%y) = Unknown otherwise (including phi not in header block) +PhiAnalyzer::PeelCounter PhiAnalyzer::calculate(const Value &V) { + // If we already know the answer, take it from the map. + auto I = IterationsToInvariance.find(&V); + if (I != IterationsToInvariance.end()) + return I->second; + + // Place Unknown to map to avoid infinite recursion. Such + // cycles can never stop on an invariant. + IterationsToInvariance[&V] = Unknown; + + if (L.isLoopInvariant(&V)) + // Loop invariant so known at start. + return (IterationsToInvariance[&V] = 0); + if (const PHINode *Phi = dyn_cast<PHINode>(&V)) { + if (Phi->getParent() != L.getHeader()) { + // Phi is not in header block so Unknown. + assert(IterationsToInvariance[&V] == Unknown && "unexpected value saved"); + return Unknown; + } + // We need to analyze the input from the back edge and add 1. + Value *Input = Phi->getIncomingValueForBlock(L.getLoopLatch()); + PeelCounter Iterations = calculate(*Input); + assert(IterationsToInvariance[Input] == Iterations && + "unexpected value saved"); + return (IterationsToInvariance[Phi] = addOne(Iterations)); + } + if (const Instruction *I = dyn_cast<Instruction>(&V)) { + if (isa<CmpInst>(I) || I->isBinaryOp()) { + // Binary instructions get the max of the operands. + PeelCounter LHS = calculate(*I->getOperand(0)); + if (LHS == Unknown) + return Unknown; + PeelCounter RHS = calculate(*I->getOperand(1)); + if (RHS == Unknown) + return Unknown; + return (IterationsToInvariance[I] = {std::max(*LHS, *RHS)}); + } + if (I->isCast()) + // Cast instructions get the value of the operand. + return (IterationsToInvariance[I] = calculate(*I->getOperand(0))); + } + // TODO: handle more expressions + + // Everything else is Unknown. + assert(IterationsToInvariance[&V] == Unknown && "unexpected value saved"); + return Unknown; +} + +std::optional<unsigned> PhiAnalyzer::calculateIterationsToPeel() { + unsigned Iterations = 0; + for (auto &PHI : L.getHeader()->phis()) { + PeelCounter ToInvariance = calculate(PHI); + if (ToInvariance != Unknown) { + assert(*ToInvariance <= MaxIterations && "bad result in phi analysis"); + Iterations = std::max(Iterations, *ToInvariance); + if (Iterations == MaxIterations) + break; + } + } + assert((Iterations <= MaxIterations) && "bad result in phi analysis"); + return Iterations ? std::optional<unsigned>(Iterations) : std::nullopt; +} + +} // unnamed namespace + +// Try to find any invariant memory reads that will become dereferenceable in +// the remainder loop after peeling. The load must also be used (transitively) +// by an exit condition. Returns the number of iterations to peel off (at the +// moment either 0 or 1). +static unsigned peelToTurnInvariantLoadsDerefencebale(Loop &L, + DominatorTree &DT, + AssumptionCache *AC) { + // Skip loops with a single exiting block, because there should be no benefit + // for the heuristic below. + if (L.getExitingBlock()) + return 0; + + // All non-latch exit blocks must have an UnreachableInst terminator. + // Otherwise the heuristic below may not be profitable. + SmallVector<BasicBlock *, 4> Exits; + L.getUniqueNonLatchExitBlocks(Exits); + if (any_of(Exits, [](const BasicBlock *BB) { + return !isa<UnreachableInst>(BB->getTerminator()); + })) + return 0; + + // Now look for invariant loads that dominate the latch and are not known to + // be dereferenceable. If there are such loads and no writes, they will become + // dereferenceable in the loop if the first iteration is peeled off. Also + // collect the set of instructions controlled by such loads. Only peel if an + // exit condition uses (transitively) such a load. + BasicBlock *Header = L.getHeader(); + BasicBlock *Latch = L.getLoopLatch(); + SmallPtrSet<Value *, 8> LoadUsers; + const DataLayout &DL = L.getHeader()->getModule()->getDataLayout(); + for (BasicBlock *BB : L.blocks()) { + for (Instruction &I : *BB) { + if (I.mayWriteToMemory()) + return 0; + + auto Iter = LoadUsers.find(&I); + if (Iter != LoadUsers.end()) { + for (Value *U : I.users()) + LoadUsers.insert(U); + } + // Do not look for reads in the header; they can already be hoisted + // without peeling. + if (BB == Header) + continue; + if (auto *LI = dyn_cast<LoadInst>(&I)) { + Value *Ptr = LI->getPointerOperand(); + if (DT.dominates(BB, Latch) && L.isLoopInvariant(Ptr) && + !isDereferenceablePointer(Ptr, LI->getType(), DL, LI, AC, &DT)) + for (Value *U : I.users()) + LoadUsers.insert(U); + } + } + } + SmallVector<BasicBlock *> ExitingBlocks; + L.getExitingBlocks(ExitingBlocks); + if (any_of(ExitingBlocks, [&LoadUsers](BasicBlock *Exiting) { + return LoadUsers.contains(Exiting->getTerminator()); + })) + return 1; + return 0; +} + +// Return the number of iterations to peel off that make conditions in the +// body true/false. For example, if we peel 2 iterations off the loop below, +// the condition i < 2 can be evaluated at compile time. +// for (i = 0; i < n; i++) +// if (i < 2) +// .. +// else +// .. +// } +static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount, + ScalarEvolution &SE) { + assert(L.isLoopSimplifyForm() && "Loop needs to be in loop simplify form"); + unsigned DesiredPeelCount = 0; + + for (auto *BB : L.blocks()) { + auto *BI = dyn_cast<BranchInst>(BB->getTerminator()); + if (!BI || BI->isUnconditional()) + continue; + + // Ignore loop exit condition. + if (L.getLoopLatch() == BB) + continue; + + Value *Condition = BI->getCondition(); + Value *LeftVal, *RightVal; + CmpInst::Predicate Pred; + if (!match(Condition, m_ICmp(Pred, m_Value(LeftVal), m_Value(RightVal)))) + continue; + + const SCEV *LeftSCEV = SE.getSCEV(LeftVal); + const SCEV *RightSCEV = SE.getSCEV(RightVal); + + // Do not consider predicates that are known to be true or false + // independently of the loop iteration. + if (SE.evaluatePredicate(Pred, LeftSCEV, RightSCEV)) + continue; + + // Check if we have a condition with one AddRec and one non AddRec + // expression. Normalize LeftSCEV to be the AddRec. + if (!isa<SCEVAddRecExpr>(LeftSCEV)) { + if (isa<SCEVAddRecExpr>(RightSCEV)) { + std::swap(LeftSCEV, RightSCEV); + Pred = ICmpInst::getSwappedPredicate(Pred); + } else + continue; + } + + const SCEVAddRecExpr *LeftAR = cast<SCEVAddRecExpr>(LeftSCEV); + + // Avoid huge SCEV computations in the loop below, make sure we only + // consider AddRecs of the loop we are trying to peel. + if (!LeftAR->isAffine() || LeftAR->getLoop() != &L) + continue; + if (!(ICmpInst::isEquality(Pred) && LeftAR->hasNoSelfWrap()) && + !SE.getMonotonicPredicateType(LeftAR, Pred)) + continue; + + // Check if extending the current DesiredPeelCount lets us evaluate Pred + // or !Pred in the loop body statically. + unsigned NewPeelCount = DesiredPeelCount; + + const SCEV *IterVal = LeftAR->evaluateAtIteration( + SE.getConstant(LeftSCEV->getType(), NewPeelCount), SE); + + // If the original condition is not known, get the negated predicate + // (which holds on the else branch) and check if it is known. This allows + // us to peel of iterations that make the original condition false. + if (!SE.isKnownPredicate(Pred, IterVal, RightSCEV)) + Pred = ICmpInst::getInversePredicate(Pred); + + const SCEV *Step = LeftAR->getStepRecurrence(SE); + const SCEV *NextIterVal = SE.getAddExpr(IterVal, Step); + auto PeelOneMoreIteration = [&IterVal, &NextIterVal, &SE, Step, + &NewPeelCount]() { + IterVal = NextIterVal; + NextIterVal = SE.getAddExpr(IterVal, Step); + NewPeelCount++; + }; + + auto CanPeelOneMoreIteration = [&NewPeelCount, &MaxPeelCount]() { + return NewPeelCount < MaxPeelCount; + }; + + while (CanPeelOneMoreIteration() && + SE.isKnownPredicate(Pred, IterVal, RightSCEV)) + PeelOneMoreIteration(); + + // With *that* peel count, does the predicate !Pred become known in the + // first iteration of the loop body after peeling? + if (!SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), IterVal, + RightSCEV)) + continue; // If not, give up. + + // However, for equality comparisons, that isn't always sufficient to + // eliminate the comparsion in loop body, we may need to peel one more + // iteration. See if that makes !Pred become unknown again. + if (ICmpInst::isEquality(Pred) && + !SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), NextIterVal, + RightSCEV) && + !SE.isKnownPredicate(Pred, IterVal, RightSCEV) && + SE.isKnownPredicate(Pred, NextIterVal, RightSCEV)) { + if (!CanPeelOneMoreIteration()) + continue; // Need to peel one more iteration, but can't. Give up. + PeelOneMoreIteration(); // Great! + } + + DesiredPeelCount = std::max(DesiredPeelCount, NewPeelCount); + } + + return DesiredPeelCount; +} + +/// This "heuristic" exactly matches implicit behavior which used to exist +/// inside getLoopEstimatedTripCount. It was added here to keep an +/// improvement inside that API from causing peeling to become more aggressive. +/// This should probably be removed. +static bool violatesLegacyMultiExitLoopCheck(Loop *L) { + BasicBlock *Latch = L->getLoopLatch(); + if (!Latch) + return true; + + BranchInst *LatchBR = dyn_cast<BranchInst>(Latch->getTerminator()); + if (!LatchBR || LatchBR->getNumSuccessors() != 2 || !L->isLoopExiting(Latch)) + return true; + + assert((LatchBR->getSuccessor(0) == L->getHeader() || + LatchBR->getSuccessor(1) == L->getHeader()) && + "At least one edge out of the latch must go to the header"); + + SmallVector<BasicBlock *, 4> ExitBlocks; + L->getUniqueNonLatchExitBlocks(ExitBlocks); + return any_of(ExitBlocks, [](const BasicBlock *EB) { + return !EB->getTerminatingDeoptimizeCall(); + }); +} + + +// Return the number of iterations we want to peel off. +void llvm::computePeelCount(Loop *L, unsigned LoopSize, + TargetTransformInfo::PeelingPreferences &PP, + unsigned TripCount, DominatorTree &DT, + ScalarEvolution &SE, AssumptionCache *AC, + unsigned Threshold) { + assert(LoopSize > 0 && "Zero loop size is not allowed!"); + // Save the PP.PeelCount value set by the target in + // TTI.getPeelingPreferences or by the flag -unroll-peel-count. + unsigned TargetPeelCount = PP.PeelCount; + PP.PeelCount = 0; + if (!canPeel(L)) + return; + + // Only try to peel innermost loops by default. + // The constraint can be relaxed by the target in TTI.getPeelingPreferences + // or by the flag -unroll-allow-loop-nests-peeling. + if (!PP.AllowLoopNestsPeeling && !L->isInnermost()) + return; + + // If the user provided a peel count, use that. + bool UserPeelCount = UnrollForcePeelCount.getNumOccurrences() > 0; + if (UserPeelCount) { + LLVM_DEBUG(dbgs() << "Force-peeling first " << UnrollForcePeelCount + << " iterations.\n"); + PP.PeelCount = UnrollForcePeelCount; + PP.PeelProfiledIterations = true; + return; + } + + // Skip peeling if it's disabled. + if (!PP.AllowPeeling) + return; + + // Check that we can peel at least one iteration. + if (2 * LoopSize > Threshold) + return; + + unsigned AlreadyPeeled = 0; + if (auto Peeled = getOptionalIntLoopAttribute(L, PeeledCountMetaData)) + AlreadyPeeled = *Peeled; + // Stop if we already peeled off the maximum number of iterations. + if (AlreadyPeeled >= UnrollPeelMaxCount) + return; + + // Pay respect to limitations implied by loop size and the max peel count. + unsigned MaxPeelCount = UnrollPeelMaxCount; + MaxPeelCount = std::min(MaxPeelCount, Threshold / LoopSize - 1); + + // Start the max computation with the PP.PeelCount value set by the target + // in TTI.getPeelingPreferences or by the flag -unroll-peel-count. + unsigned DesiredPeelCount = TargetPeelCount; + + // Here we try to get rid of Phis which become invariants after 1, 2, ..., N + // iterations of the loop. For this we compute the number for iterations after + // which every Phi is guaranteed to become an invariant, and try to peel the + // maximum number of iterations among these values, thus turning all those + // Phis into invariants. + if (MaxPeelCount > DesiredPeelCount) { + // Check how many iterations are useful for resolving Phis + auto NumPeels = PhiAnalyzer(*L, MaxPeelCount).calculateIterationsToPeel(); + if (NumPeels) + DesiredPeelCount = std::max(DesiredPeelCount, *NumPeels); + } + + DesiredPeelCount = std::max(DesiredPeelCount, + countToEliminateCompares(*L, MaxPeelCount, SE)); + + if (DesiredPeelCount == 0) + DesiredPeelCount = peelToTurnInvariantLoadsDerefencebale(*L, DT, AC); + + if (DesiredPeelCount > 0) { + DesiredPeelCount = std::min(DesiredPeelCount, MaxPeelCount); + // Consider max peel count limitation. + assert(DesiredPeelCount > 0 && "Wrong loop size estimation?"); + if (DesiredPeelCount + AlreadyPeeled <= UnrollPeelMaxCount) { + LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount + << " iteration(s) to turn" + << " some Phis into invariants.\n"); + PP.PeelCount = DesiredPeelCount; + PP.PeelProfiledIterations = false; + return; + } + } + + // Bail if we know the statically calculated trip count. + // In this case we rather prefer partial unrolling. + if (TripCount) + return; + + // Do not apply profile base peeling if it is disabled. + if (!PP.PeelProfiledIterations) + return; + // If we don't know the trip count, but have reason to believe the average + // trip count is low, peeling should be beneficial, since we will usually + // hit the peeled section. + // We only do this in the presence of profile information, since otherwise + // our estimates of the trip count are not reliable enough. + if (L->getHeader()->getParent()->hasProfileData()) { + if (violatesLegacyMultiExitLoopCheck(L)) + return; + std::optional<unsigned> EstimatedTripCount = getLoopEstimatedTripCount(L); + if (!EstimatedTripCount) + return; + + LLVM_DEBUG(dbgs() << "Profile-based estimated trip count is " + << *EstimatedTripCount << "\n"); + + if (*EstimatedTripCount) { + if (*EstimatedTripCount + AlreadyPeeled <= MaxPeelCount) { + unsigned PeelCount = *EstimatedTripCount; + LLVM_DEBUG(dbgs() << "Peeling first " << PeelCount << " iterations.\n"); + PP.PeelCount = PeelCount; + return; + } + LLVM_DEBUG(dbgs() << "Already peel count: " << AlreadyPeeled << "\n"); + LLVM_DEBUG(dbgs() << "Max peel count: " << UnrollPeelMaxCount << "\n"); + LLVM_DEBUG(dbgs() << "Loop cost: " << LoopSize << "\n"); + LLVM_DEBUG(dbgs() << "Max peel cost: " << Threshold << "\n"); + LLVM_DEBUG(dbgs() << "Max peel count by cost: " + << (Threshold / LoopSize - 1) << "\n"); + } + } +} + +struct WeightInfo { + // Weights for current iteration. + SmallVector<uint32_t> Weights; + // Weights to subtract after each iteration. + const SmallVector<uint32_t> SubWeights; +}; + +/// Update the branch weights of an exiting block of a peeled-off loop +/// iteration. +/// Let F is a weight of the edge to continue (fallthrough) into the loop. +/// Let E is a weight of the edge to an exit. +/// F/(F+E) is a probability to go to loop and E/(F+E) is a probability to +/// go to exit. +/// Then, Estimated ExitCount = F / E. +/// For I-th (counting from 0) peeled off iteration we set the the weights for +/// the peeled exit as (EC - I, 1). It gives us reasonable distribution, +/// The probability to go to exit 1/(EC-I) increases. At the same time +/// the estimated exit count in the remainder loop reduces by I. +/// To avoid dealing with division rounding we can just multiple both part +/// of weights to E and use weight as (F - I * E, E). +static void updateBranchWeights(Instruction *Term, WeightInfo &Info) { + MDBuilder MDB(Term->getContext()); + Term->setMetadata(LLVMContext::MD_prof, + MDB.createBranchWeights(Info.Weights)); + for (auto [Idx, SubWeight] : enumerate(Info.SubWeights)) + if (SubWeight != 0) + Info.Weights[Idx] = Info.Weights[Idx] > SubWeight + ? Info.Weights[Idx] - SubWeight + : 1; +} + +/// Initialize the weights for all exiting blocks. +static void initBranchWeights(DenseMap<Instruction *, WeightInfo> &WeightInfos, + Loop *L) { + SmallVector<BasicBlock *> ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + for (BasicBlock *ExitingBlock : ExitingBlocks) { + Instruction *Term = ExitingBlock->getTerminator(); + SmallVector<uint32_t> Weights; + if (!extractBranchWeights(*Term, Weights)) + continue; + + // See the comment on updateBranchWeights() for an explanation of what we + // do here. + uint32_t FallThroughWeights = 0; + uint32_t ExitWeights = 0; + for (auto [Succ, Weight] : zip(successors(Term), Weights)) { + if (L->contains(Succ)) + FallThroughWeights += Weight; + else + ExitWeights += Weight; + } + + // Don't try to update weights for degenerate case. + if (FallThroughWeights == 0) + continue; + + SmallVector<uint32_t> SubWeights; + for (auto [Succ, Weight] : zip(successors(Term), Weights)) { + if (!L->contains(Succ)) { + // Exit weights stay the same. + SubWeights.push_back(0); + continue; + } + + // Subtract exit weights on each iteration, distributed across all + // fallthrough edges. + double W = (double)Weight / (double)FallThroughWeights; + SubWeights.push_back((uint32_t)(ExitWeights * W)); + } + + WeightInfos.insert({Term, {std::move(Weights), std::move(SubWeights)}}); + } +} + +/// Update the weights of original exiting block after peeling off all +/// iterations. +static void fixupBranchWeights(Instruction *Term, const WeightInfo &Info) { + MDBuilder MDB(Term->getContext()); + Term->setMetadata(LLVMContext::MD_prof, + MDB.createBranchWeights(Info.Weights)); +} + +/// Clones the body of the loop L, putting it between \p InsertTop and \p +/// InsertBot. +/// \param IterNumber The serial number of the iteration currently being +/// peeled off. +/// \param ExitEdges The exit edges of the original loop. +/// \param[out] NewBlocks A list of the blocks in the newly created clone +/// \param[out] VMap The value map between the loop and the new clone. +/// \param LoopBlocks A helper for DFS-traversal of the loop. +/// \param LVMap A value-map that maps instructions from the original loop to +/// instructions in the last peeled-off iteration. +static void cloneLoopBlocks( + Loop *L, unsigned IterNumber, BasicBlock *InsertTop, BasicBlock *InsertBot, + SmallVectorImpl<std::pair<BasicBlock *, BasicBlock *>> &ExitEdges, + SmallVectorImpl<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks, + ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT, + LoopInfo *LI, ArrayRef<MDNode *> LoopLocalNoAliasDeclScopes, + ScalarEvolution &SE) { + BasicBlock *Header = L->getHeader(); + BasicBlock *Latch = L->getLoopLatch(); + BasicBlock *PreHeader = L->getLoopPreheader(); + + Function *F = Header->getParent(); + LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO(); + LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO(); + Loop *ParentLoop = L->getParentLoop(); + + // For each block in the original loop, create a new copy, + // and update the value map with the newly created values. + for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { + BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".peel", F); + NewBlocks.push_back(NewBB); + + // If an original block is an immediate child of the loop L, its copy + // is a child of a ParentLoop after peeling. If a block is a child of + // a nested loop, it is handled in the cloneLoop() call below. + if (ParentLoop && LI->getLoopFor(*BB) == L) + ParentLoop->addBasicBlockToLoop(NewBB, *LI); + + VMap[*BB] = NewBB; + + // If dominator tree is available, insert nodes to represent cloned blocks. + if (DT) { + if (Header == *BB) + DT->addNewBlock(NewBB, InsertTop); + else { + DomTreeNode *IDom = DT->getNode(*BB)->getIDom(); + // VMap must contain entry for IDom, as the iteration order is RPO. + DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDom->getBlock()])); + } + } + } + + { + // Identify what other metadata depends on the cloned version. After + // cloning, replace the metadata with the corrected version for both + // memory instructions and noalias intrinsics. + std::string Ext = (Twine("Peel") + Twine(IterNumber)).str(); + cloneAndAdaptNoAliasScopes(LoopLocalNoAliasDeclScopes, NewBlocks, + Header->getContext(), Ext); + } + + // Recursively create the new Loop objects for nested loops, if any, + // to preserve LoopInfo. + for (Loop *ChildLoop : *L) { + cloneLoop(ChildLoop, ParentLoop, VMap, LI, nullptr); + } + + // Hook-up the control flow for the newly inserted blocks. + // The new header is hooked up directly to the "top", which is either + // the original loop preheader (for the first iteration) or the previous + // iteration's exiting block (for every other iteration) + InsertTop->getTerminator()->setSuccessor(0, cast<BasicBlock>(VMap[Header])); + + // Similarly, for the latch: + // The original exiting edge is still hooked up to the loop exit. + // The backedge now goes to the "bottom", which is either the loop's real + // header (for the last peeled iteration) or the copied header of the next + // iteration (for every other iteration) + BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]); + auto *LatchTerm = cast<Instruction>(NewLatch->getTerminator()); + for (unsigned idx = 0, e = LatchTerm->getNumSuccessors(); idx < e; ++idx) + if (LatchTerm->getSuccessor(idx) == Header) { + LatchTerm->setSuccessor(idx, InsertBot); + break; + } + if (DT) + DT->changeImmediateDominator(InsertBot, NewLatch); + + // The new copy of the loop body starts with a bunch of PHI nodes + // that pick an incoming value from either the preheader, or the previous + // loop iteration. Since this copy is no longer part of the loop, we + // resolve this statically: + // For the first iteration, we use the value from the preheader directly. + // For any other iteration, we replace the phi with the value generated by + // the immediately preceding clone of the loop body (which represents + // the previous iteration). + for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { + PHINode *NewPHI = cast<PHINode>(VMap[&*I]); + if (IterNumber == 0) { + VMap[&*I] = NewPHI->getIncomingValueForBlock(PreHeader); + } else { + Value *LatchVal = NewPHI->getIncomingValueForBlock(Latch); + Instruction *LatchInst = dyn_cast<Instruction>(LatchVal); + if (LatchInst && L->contains(LatchInst)) + VMap[&*I] = LVMap[LatchInst]; + else + VMap[&*I] = LatchVal; + } + NewPHI->eraseFromParent(); + } + + // Fix up the outgoing values - we need to add a value for the iteration + // we've just created. Note that this must happen *after* the incoming + // values are adjusted, since the value going out of the latch may also be + // a value coming into the header. + for (auto Edge : ExitEdges) + for (PHINode &PHI : Edge.second->phis()) { + Value *LatchVal = PHI.getIncomingValueForBlock(Edge.first); + Instruction *LatchInst = dyn_cast<Instruction>(LatchVal); + if (LatchInst && L->contains(LatchInst)) + LatchVal = VMap[LatchVal]; + PHI.addIncoming(LatchVal, cast<BasicBlock>(VMap[Edge.first])); + SE.forgetValue(&PHI); + } + + // LastValueMap is updated with the values for the current loop + // which are used the next time this function is called. + for (auto KV : VMap) + LVMap[KV.first] = KV.second; +} + +TargetTransformInfo::PeelingPreferences +llvm::gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, + const TargetTransformInfo &TTI, + std::optional<bool> UserAllowPeeling, + std::optional<bool> UserAllowProfileBasedPeeling, + bool UnrollingSpecficValues) { + TargetTransformInfo::PeelingPreferences PP; + + // Set the default values. + PP.PeelCount = 0; + PP.AllowPeeling = true; + PP.AllowLoopNestsPeeling = false; + PP.PeelProfiledIterations = true; + + // Get the target specifc values. + TTI.getPeelingPreferences(L, SE, PP); + + // User specified values using cl::opt. + if (UnrollingSpecficValues) { + if (UnrollPeelCount.getNumOccurrences() > 0) + PP.PeelCount = UnrollPeelCount; + if (UnrollAllowPeeling.getNumOccurrences() > 0) + PP.AllowPeeling = UnrollAllowPeeling; + if (UnrollAllowLoopNestsPeeling.getNumOccurrences() > 0) + PP.AllowLoopNestsPeeling = UnrollAllowLoopNestsPeeling; + } + + // User specifed values provided by argument. + if (UserAllowPeeling) + PP.AllowPeeling = *UserAllowPeeling; + if (UserAllowProfileBasedPeeling) + PP.PeelProfiledIterations = *UserAllowProfileBasedPeeling; + + return PP; +} + +/// Peel off the first \p PeelCount iterations of loop \p L. +/// +/// Note that this does not peel them off as a single straight-line block. +/// Rather, each iteration is peeled off separately, and needs to check the +/// exit condition. +/// For loops that dynamically execute \p PeelCount iterations or less +/// this provides a benefit, since the peeled off iterations, which account +/// for the bulk of dynamic execution, can be further simplified by scalar +/// optimizations. +bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, + ScalarEvolution *SE, DominatorTree &DT, AssumptionCache *AC, + bool PreserveLCSSA, ValueToValueMapTy &LVMap) { + assert(PeelCount > 0 && "Attempt to peel out zero iterations?"); + assert(canPeel(L) && "Attempt to peel a loop which is not peelable?"); + + LoopBlocksDFS LoopBlocks(L); + LoopBlocks.perform(LI); + + BasicBlock *Header = L->getHeader(); + BasicBlock *PreHeader = L->getLoopPreheader(); + BasicBlock *Latch = L->getLoopLatch(); + SmallVector<std::pair<BasicBlock *, BasicBlock *>, 4> ExitEdges; + L->getExitEdges(ExitEdges); + + // Remember dominators of blocks we might reach through exits to change them + // later. Immediate dominator of such block might change, because we add more + // routes which can lead to the exit: we can reach it from the peeled + // iterations too. + DenseMap<BasicBlock *, BasicBlock *> NonLoopBlocksIDom; + for (auto *BB : L->blocks()) { + auto *BBDomNode = DT.getNode(BB); + SmallVector<BasicBlock *, 16> ChildrenToUpdate; + for (auto *ChildDomNode : BBDomNode->children()) { + auto *ChildBB = ChildDomNode->getBlock(); + if (!L->contains(ChildBB)) + ChildrenToUpdate.push_back(ChildBB); + } + // The new idom of the block will be the nearest common dominator + // of all copies of the previous idom. This is equivalent to the + // nearest common dominator of the previous idom and the first latch, + // which dominates all copies of the previous idom. + BasicBlock *NewIDom = DT.findNearestCommonDominator(BB, Latch); + for (auto *ChildBB : ChildrenToUpdate) + NonLoopBlocksIDom[ChildBB] = NewIDom; + } + + Function *F = Header->getParent(); + + // Set up all the necessary basic blocks. It is convenient to split the + // preheader into 3 parts - two blocks to anchor the peeled copy of the loop + // body, and a new preheader for the "real" loop. + + // Peeling the first iteration transforms. + // + // PreHeader: + // ... + // Header: + // LoopBody + // If (cond) goto Header + // Exit: + // + // into + // + // InsertTop: + // LoopBody + // If (!cond) goto Exit + // InsertBot: + // NewPreHeader: + // ... + // Header: + // LoopBody + // If (cond) goto Header + // Exit: + // + // Each following iteration will split the current bottom anchor in two, + // and put the new copy of the loop body between these two blocks. That is, + // after peeling another iteration from the example above, we'll split + // InsertBot, and get: + // + // InsertTop: + // LoopBody + // If (!cond) goto Exit + // InsertBot: + // LoopBody + // If (!cond) goto Exit + // InsertBot.next: + // NewPreHeader: + // ... + // Header: + // LoopBody + // If (cond) goto Header + // Exit: + + BasicBlock *InsertTop = SplitEdge(PreHeader, Header, &DT, LI); + BasicBlock *InsertBot = + SplitBlock(InsertTop, InsertTop->getTerminator(), &DT, LI); + BasicBlock *NewPreHeader = + SplitBlock(InsertBot, InsertBot->getTerminator(), &DT, LI); + + InsertTop->setName(Header->getName() + ".peel.begin"); + InsertBot->setName(Header->getName() + ".peel.next"); + NewPreHeader->setName(PreHeader->getName() + ".peel.newph"); + + Instruction *LatchTerm = + cast<Instruction>(cast<BasicBlock>(Latch)->getTerminator()); + + // If we have branch weight information, we'll want to update it for the + // newly created branches. + DenseMap<Instruction *, WeightInfo> Weights; + initBranchWeights(Weights, L); + + // Identify what noalias metadata is inside the loop: if it is inside the + // loop, the associated metadata must be cloned for each iteration. + SmallVector<MDNode *, 6> LoopLocalNoAliasDeclScopes; + identifyNoAliasScopesToClone(L->getBlocks(), LoopLocalNoAliasDeclScopes); + + // For each peeled-off iteration, make a copy of the loop. + for (unsigned Iter = 0; Iter < PeelCount; ++Iter) { + SmallVector<BasicBlock *, 8> NewBlocks; + ValueToValueMapTy VMap; + + cloneLoopBlocks(L, Iter, InsertTop, InsertBot, ExitEdges, NewBlocks, + LoopBlocks, VMap, LVMap, &DT, LI, + LoopLocalNoAliasDeclScopes, *SE); + + // Remap to use values from the current iteration instead of the + // previous one. + remapInstructionsInBlocks(NewBlocks, VMap); + + // Update IDoms of the blocks reachable through exits. + if (Iter == 0) + for (auto BBIDom : NonLoopBlocksIDom) + DT.changeImmediateDominator(BBIDom.first, + cast<BasicBlock>(LVMap[BBIDom.second])); +#ifdef EXPENSIVE_CHECKS + assert(DT.verify(DominatorTree::VerificationLevel::Fast)); +#endif + + for (auto &[Term, Info] : Weights) { + auto *TermCopy = cast<Instruction>(VMap[Term]); + updateBranchWeights(TermCopy, Info); + } + + // Remove Loop metadata from the latch branch instruction + // because it is not the Loop's latch branch anymore. + auto *LatchTermCopy = cast<Instruction>(VMap[LatchTerm]); + LatchTermCopy->setMetadata(LLVMContext::MD_loop, nullptr); + + InsertTop = InsertBot; + InsertBot = SplitBlock(InsertBot, InsertBot->getTerminator(), &DT, LI); + InsertBot->setName(Header->getName() + ".peel.next"); + + F->splice(InsertTop->getIterator(), F, NewBlocks[0]->getIterator(), + F->end()); + } + + // Now adjust the phi nodes in the loop header to get their initial values + // from the last peeled-off iteration instead of the preheader. + for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { + PHINode *PHI = cast<PHINode>(I); + Value *NewVal = PHI->getIncomingValueForBlock(Latch); + Instruction *LatchInst = dyn_cast<Instruction>(NewVal); + if (LatchInst && L->contains(LatchInst)) + NewVal = LVMap[LatchInst]; + + PHI->setIncomingValueForBlock(NewPreHeader, NewVal); + } + + for (const auto &[Term, Info] : Weights) + fixupBranchWeights(Term, Info); + + // Update Metadata for count of peeled off iterations. + unsigned AlreadyPeeled = 0; + if (auto Peeled = getOptionalIntLoopAttribute(L, PeeledCountMetaData)) + AlreadyPeeled = *Peeled; + addStringMetadataToLoop(L, PeeledCountMetaData, AlreadyPeeled + PeelCount); + + if (Loop *ParentLoop = L->getParentLoop()) + L = ParentLoop; + + // We modified the loop, update SE. + SE->forgetTopmostLoop(L); + +#ifdef EXPENSIVE_CHECKS + // Finally DomtTree must be correct. + assert(DT.verify(DominatorTree::VerificationLevel::Fast)); +#endif + + // FIXME: Incrementally update loop-simplify + simplifyLoop(L, &DT, LI, SE, AC, nullptr, PreserveLCSSA); + + NumPeeled++; + + return true; +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/LoopRotationUtils.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/LoopRotationUtils.cpp new file mode 100644 index 0000000000..1a9eaf2421 --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/LoopRotationUtils.cpp @@ -0,0 +1,845 @@ +//===----------------- LoopRotationUtils.cpp -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides utilities to convert a loop into a loop with bottom test. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/LoopRotationUtils.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Analysis/DomTreeUpdater.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/MemorySSAUpdater.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" +#include "llvm/Transforms/Utils/ValueMapper.h" +using namespace llvm; + +#define DEBUG_TYPE "loop-rotate" + +STATISTIC(NumNotRotatedDueToHeaderSize, + "Number of loops not rotated due to the header size"); +STATISTIC(NumInstrsHoisted, + "Number of instructions hoisted into loop preheader"); +STATISTIC(NumInstrsDuplicated, + "Number of instructions cloned into loop preheader"); +STATISTIC(NumRotated, "Number of loops rotated"); + +static cl::opt<bool> + MultiRotate("loop-rotate-multi", cl::init(false), cl::Hidden, + cl::desc("Allow loop rotation multiple times in order to reach " + "a better latch exit")); + +namespace { +/// A simple loop rotation transformation. +class LoopRotate { + const unsigned MaxHeaderSize; + LoopInfo *LI; + const TargetTransformInfo *TTI; + AssumptionCache *AC; + DominatorTree *DT; + ScalarEvolution *SE; + MemorySSAUpdater *MSSAU; + const SimplifyQuery &SQ; + bool RotationOnly; + bool IsUtilMode; + bool PrepareForLTO; + +public: + LoopRotate(unsigned MaxHeaderSize, LoopInfo *LI, + const TargetTransformInfo *TTI, AssumptionCache *AC, + DominatorTree *DT, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, + const SimplifyQuery &SQ, bool RotationOnly, bool IsUtilMode, + bool PrepareForLTO) + : MaxHeaderSize(MaxHeaderSize), LI(LI), TTI(TTI), AC(AC), DT(DT), SE(SE), + MSSAU(MSSAU), SQ(SQ), RotationOnly(RotationOnly), + IsUtilMode(IsUtilMode), PrepareForLTO(PrepareForLTO) {} + bool processLoop(Loop *L); + +private: + bool rotateLoop(Loop *L, bool SimplifiedLatch); + bool simplifyLoopLatch(Loop *L); +}; +} // end anonymous namespace + +/// Insert (K, V) pair into the ValueToValueMap, and verify the key did not +/// previously exist in the map, and the value was inserted. +static void InsertNewValueIntoMap(ValueToValueMapTy &VM, Value *K, Value *V) { + bool Inserted = VM.insert({K, V}).second; + assert(Inserted); + (void)Inserted; +} +/// RewriteUsesOfClonedInstructions - We just cloned the instructions from the +/// old header into the preheader. If there were uses of the values produced by +/// these instruction that were outside of the loop, we have to insert PHI nodes +/// to merge the two values. Do this now. +static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader, + BasicBlock *OrigPreheader, + ValueToValueMapTy &ValueMap, + ScalarEvolution *SE, + SmallVectorImpl<PHINode*> *InsertedPHIs) { + // Remove PHI node entries that are no longer live. + BasicBlock::iterator I, E = OrigHeader->end(); + for (I = OrigHeader->begin(); PHINode *PN = dyn_cast<PHINode>(I); ++I) + PN->removeIncomingValue(PN->getBasicBlockIndex(OrigPreheader)); + + // Now fix up users of the instructions in OrigHeader, inserting PHI nodes + // as necessary. + SSAUpdater SSA(InsertedPHIs); + for (I = OrigHeader->begin(); I != E; ++I) { + Value *OrigHeaderVal = &*I; + + // If there are no uses of the value (e.g. because it returns void), there + // is nothing to rewrite. + if (OrigHeaderVal->use_empty()) + continue; + + Value *OrigPreHeaderVal = ValueMap.lookup(OrigHeaderVal); + + // The value now exits in two versions: the initial value in the preheader + // and the loop "next" value in the original header. + SSA.Initialize(OrigHeaderVal->getType(), OrigHeaderVal->getName()); + // Force re-computation of OrigHeaderVal, as some users now need to use the + // new PHI node. + if (SE) + SE->forgetValue(OrigHeaderVal); + SSA.AddAvailableValue(OrigHeader, OrigHeaderVal); + SSA.AddAvailableValue(OrigPreheader, OrigPreHeaderVal); + + // Visit each use of the OrigHeader instruction. + for (Use &U : llvm::make_early_inc_range(OrigHeaderVal->uses())) { + // SSAUpdater can't handle a non-PHI use in the same block as an + // earlier def. We can easily handle those cases manually. + Instruction *UserInst = cast<Instruction>(U.getUser()); + if (!isa<PHINode>(UserInst)) { + BasicBlock *UserBB = UserInst->getParent(); + + // The original users in the OrigHeader are already using the + // original definitions. + if (UserBB == OrigHeader) + continue; + + // Users in the OrigPreHeader need to use the value to which the + // original definitions are mapped. + if (UserBB == OrigPreheader) { + U = OrigPreHeaderVal; + continue; + } + } + + // Anything else can be handled by SSAUpdater. + SSA.RewriteUse(U); + } + + // Replace MetadataAsValue(ValueAsMetadata(OrigHeaderVal)) uses in debug + // intrinsics. + SmallVector<DbgValueInst *, 1> DbgValues; + llvm::findDbgValues(DbgValues, OrigHeaderVal); + for (auto &DbgValue : DbgValues) { + // The original users in the OrigHeader are already using the original + // definitions. + BasicBlock *UserBB = DbgValue->getParent(); + if (UserBB == OrigHeader) + continue; + + // Users in the OrigPreHeader need to use the value to which the + // original definitions are mapped and anything else can be handled by + // the SSAUpdater. To avoid adding PHINodes, check if the value is + // available in UserBB, if not substitute undef. + Value *NewVal; + if (UserBB == OrigPreheader) + NewVal = OrigPreHeaderVal; + else if (SSA.HasValueForBlock(UserBB)) + NewVal = SSA.GetValueInMiddleOfBlock(UserBB); + else + NewVal = UndefValue::get(OrigHeaderVal->getType()); + DbgValue->replaceVariableLocationOp(OrigHeaderVal, NewVal); + } + } +} + +// Assuming both header and latch are exiting, look for a phi which is only +// used outside the loop (via a LCSSA phi) in the exit from the header. +// This means that rotating the loop can remove the phi. +static bool profitableToRotateLoopExitingLatch(Loop *L) { + BasicBlock *Header = L->getHeader(); + BranchInst *BI = dyn_cast<BranchInst>(Header->getTerminator()); + assert(BI && BI->isConditional() && "need header with conditional exit"); + BasicBlock *HeaderExit = BI->getSuccessor(0); + if (L->contains(HeaderExit)) + HeaderExit = BI->getSuccessor(1); + + for (auto &Phi : Header->phis()) { + // Look for uses of this phi in the loop/via exits other than the header. + if (llvm::any_of(Phi.users(), [HeaderExit](const User *U) { + return cast<Instruction>(U)->getParent() != HeaderExit; + })) + continue; + return true; + } + return false; +} + +// Check that latch exit is deoptimizing (which means - very unlikely to happen) +// and there is another exit from the loop which is non-deoptimizing. +// If we rotate latch to that exit our loop has a better chance of being fully +// canonical. +// +// It can give false positives in some rare cases. +static bool canRotateDeoptimizingLatchExit(Loop *L) { + BasicBlock *Latch = L->getLoopLatch(); + assert(Latch && "need latch"); + BranchInst *BI = dyn_cast<BranchInst>(Latch->getTerminator()); + // Need normal exiting latch. + if (!BI || !BI->isConditional()) + return false; + + BasicBlock *Exit = BI->getSuccessor(1); + if (L->contains(Exit)) + Exit = BI->getSuccessor(0); + + // Latch exit is non-deoptimizing, no need to rotate. + if (!Exit->getPostdominatingDeoptimizeCall()) + return false; + + SmallVector<BasicBlock *, 4> Exits; + L->getUniqueExitBlocks(Exits); + if (!Exits.empty()) { + // There is at least one non-deoptimizing exit. + // + // Note, that BasicBlock::getPostdominatingDeoptimizeCall is not exact, + // as it can conservatively return false for deoptimizing exits with + // complex enough control flow down to deoptimize call. + // + // That means here we can report success for a case where + // all exits are deoptimizing but one of them has complex enough + // control flow (e.g. with loops). + // + // That should be a very rare case and false positives for this function + // have compile-time effect only. + return any_of(Exits, [](const BasicBlock *BB) { + return !BB->getPostdominatingDeoptimizeCall(); + }); + } + return false; +} + +/// Rotate loop LP. Return true if the loop is rotated. +/// +/// \param SimplifiedLatch is true if the latch was just folded into the final +/// loop exit. In this case we may want to rotate even though the new latch is +/// now an exiting branch. This rotation would have happened had the latch not +/// been simplified. However, if SimplifiedLatch is false, then we avoid +/// rotating loops in which the latch exits to avoid excessive or endless +/// rotation. LoopRotate should be repeatable and converge to a canonical +/// form. This property is satisfied because simplifying the loop latch can only +/// happen once across multiple invocations of the LoopRotate pass. +/// +/// If -loop-rotate-multi is enabled we can do multiple rotations in one go +/// so to reach a suitable (non-deoptimizing) exit. +bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { + // If the loop has only one block then there is not much to rotate. + if (L->getBlocks().size() == 1) + return false; + + bool Rotated = false; + do { + BasicBlock *OrigHeader = L->getHeader(); + BasicBlock *OrigLatch = L->getLoopLatch(); + + BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator()); + if (!BI || BI->isUnconditional()) + return Rotated; + + // If the loop header is not one of the loop exiting blocks then + // either this loop is already rotated or it is not + // suitable for loop rotation transformations. + if (!L->isLoopExiting(OrigHeader)) + return Rotated; + + // If the loop latch already contains a branch that leaves the loop then the + // loop is already rotated. + if (!OrigLatch) + return Rotated; + + // Rotate if either the loop latch does *not* exit the loop, or if the loop + // latch was just simplified. Or if we think it will be profitable. + if (L->isLoopExiting(OrigLatch) && !SimplifiedLatch && IsUtilMode == false && + !profitableToRotateLoopExitingLatch(L) && + !canRotateDeoptimizingLatchExit(L)) + return Rotated; + + // Check size of original header and reject loop if it is very big or we can't + // duplicate blocks inside it. + { + SmallPtrSet<const Value *, 32> EphValues; + CodeMetrics::collectEphemeralValues(L, AC, EphValues); + + CodeMetrics Metrics; + Metrics.analyzeBasicBlock(OrigHeader, *TTI, EphValues, PrepareForLTO); + if (Metrics.notDuplicatable) { + LLVM_DEBUG( + dbgs() << "LoopRotation: NOT rotating - contains non-duplicatable" + << " instructions: "; + L->dump()); + return Rotated; + } + if (Metrics.convergent) { + LLVM_DEBUG(dbgs() << "LoopRotation: NOT rotating - contains convergent " + "instructions: "; + L->dump()); + return Rotated; + } + if (!Metrics.NumInsts.isValid()) { + LLVM_DEBUG(dbgs() << "LoopRotation: NOT rotating - contains instructions" + " with invalid cost: "; + L->dump()); + return Rotated; + } + if (Metrics.NumInsts > MaxHeaderSize) { + LLVM_DEBUG(dbgs() << "LoopRotation: NOT rotating - contains " + << Metrics.NumInsts + << " instructions, which is more than the threshold (" + << MaxHeaderSize << " instructions): "; + L->dump()); + ++NumNotRotatedDueToHeaderSize; + return Rotated; + } + + // When preparing for LTO, avoid rotating loops with calls that could be + // inlined during the LTO stage. + if (PrepareForLTO && Metrics.NumInlineCandidates > 0) + return Rotated; + } + + // Now, this loop is suitable for rotation. + BasicBlock *OrigPreheader = L->getLoopPreheader(); + + // If the loop could not be converted to canonical form, it must have an + // indirectbr in it, just give up. + if (!OrigPreheader || !L->hasDedicatedExits()) + return Rotated; + + // Anything ScalarEvolution may know about this loop or the PHI nodes + // in its header will soon be invalidated. We should also invalidate + // all outer loops because insertion and deletion of blocks that happens + // during the rotation may violate invariants related to backedge taken + // infos in them. + if (SE) { + SE->forgetTopmostLoop(L); + // We may hoist some instructions out of loop. In case if they were cached + // as "loop variant" or "loop computable", these caches must be dropped. + // We also may fold basic blocks, so cached block dispositions also need + // to be dropped. + SE->forgetBlockAndLoopDispositions(); + } + + LLVM_DEBUG(dbgs() << "LoopRotation: rotating "; L->dump()); + if (MSSAU && VerifyMemorySSA) + MSSAU->getMemorySSA()->verifyMemorySSA(); + + // Find new Loop header. NewHeader is a Header's one and only successor + // that is inside loop. Header's other successor is outside the + // loop. Otherwise loop is not suitable for rotation. + BasicBlock *Exit = BI->getSuccessor(0); + BasicBlock *NewHeader = BI->getSuccessor(1); + if (L->contains(Exit)) + std::swap(Exit, NewHeader); + assert(NewHeader && "Unable to determine new loop header"); + assert(L->contains(NewHeader) && !L->contains(Exit) && + "Unable to determine loop header and exit blocks"); + + // This code assumes that the new header has exactly one predecessor. + // Remove any single-entry PHI nodes in it. + assert(NewHeader->getSinglePredecessor() && + "New header doesn't have one pred!"); + FoldSingleEntryPHINodes(NewHeader); + + // Begin by walking OrigHeader and populating ValueMap with an entry for + // each Instruction. + BasicBlock::iterator I = OrigHeader->begin(), E = OrigHeader->end(); + ValueToValueMapTy ValueMap, ValueMapMSSA; + + // For PHI nodes, the value available in OldPreHeader is just the + // incoming value from OldPreHeader. + for (; PHINode *PN = dyn_cast<PHINode>(I); ++I) + InsertNewValueIntoMap(ValueMap, PN, + PN->getIncomingValueForBlock(OrigPreheader)); + + // For the rest of the instructions, either hoist to the OrigPreheader if + // possible or create a clone in the OldPreHeader if not. + Instruction *LoopEntryBranch = OrigPreheader->getTerminator(); + + // Record all debug intrinsics preceding LoopEntryBranch to avoid + // duplication. + using DbgIntrinsicHash = + std::pair<std::pair<hash_code, DILocalVariable *>, DIExpression *>; + auto makeHash = [](DbgVariableIntrinsic *D) -> DbgIntrinsicHash { + auto VarLocOps = D->location_ops(); + return {{hash_combine_range(VarLocOps.begin(), VarLocOps.end()), + D->getVariable()}, + D->getExpression()}; + }; + SmallDenseSet<DbgIntrinsicHash, 8> DbgIntrinsics; + for (Instruction &I : llvm::drop_begin(llvm::reverse(*OrigPreheader))) { + if (auto *DII = dyn_cast<DbgVariableIntrinsic>(&I)) + DbgIntrinsics.insert(makeHash(DII)); + else + break; + } + + // Remember the local noalias scope declarations in the header. After the + // rotation, they must be duplicated and the scope must be cloned. This + // avoids unwanted interaction across iterations. + SmallVector<NoAliasScopeDeclInst *, 6> NoAliasDeclInstructions; + for (Instruction &I : *OrigHeader) + if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I)) + NoAliasDeclInstructions.push_back(Decl); + + while (I != E) { + Instruction *Inst = &*I++; + + // If the instruction's operands are invariant and it doesn't read or write + // memory, then it is safe to hoist. Doing this doesn't change the order of + // execution in the preheader, but does prevent the instruction from + // executing in each iteration of the loop. This means it is safe to hoist + // something that might trap, but isn't safe to hoist something that reads + // memory (without proving that the loop doesn't write). + if (L->hasLoopInvariantOperands(Inst) && !Inst->mayReadFromMemory() && + !Inst->mayWriteToMemory() && !Inst->isTerminator() && + !isa<DbgInfoIntrinsic>(Inst) && !isa<AllocaInst>(Inst)) { + Inst->moveBefore(LoopEntryBranch); + ++NumInstrsHoisted; + continue; + } + + // Otherwise, create a duplicate of the instruction. + Instruction *C = Inst->clone(); + ++NumInstrsDuplicated; + + // Eagerly remap the operands of the instruction. + RemapInstruction(C, ValueMap, + RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); + + // Avoid inserting the same intrinsic twice. + if (auto *DII = dyn_cast<DbgVariableIntrinsic>(C)) + if (DbgIntrinsics.count(makeHash(DII))) { + C->deleteValue(); + continue; + } + + // With the operands remapped, see if the instruction constant folds or is + // otherwise simplifyable. This commonly occurs because the entry from PHI + // nodes allows icmps and other instructions to fold. + Value *V = simplifyInstruction(C, SQ); + if (V && LI->replacementPreservesLCSSAForm(C, V)) { + // If so, then delete the temporary instruction and stick the folded value + // in the map. + InsertNewValueIntoMap(ValueMap, Inst, V); + if (!C->mayHaveSideEffects()) { + C->deleteValue(); + C = nullptr; + } + } else { + InsertNewValueIntoMap(ValueMap, Inst, C); + } + if (C) { + // Otherwise, stick the new instruction into the new block! + C->setName(Inst->getName()); + C->insertBefore(LoopEntryBranch); + + if (auto *II = dyn_cast<AssumeInst>(C)) + AC->registerAssumption(II); + // MemorySSA cares whether the cloned instruction was inserted or not, and + // not whether it can be remapped to a simplified value. + if (MSSAU) + InsertNewValueIntoMap(ValueMapMSSA, Inst, C); + } + } + + if (!NoAliasDeclInstructions.empty()) { + // There are noalias scope declarations: + // (general): + // Original: OrigPre { OrigHeader NewHeader ... Latch } + // after: (OrigPre+OrigHeader') { NewHeader ... Latch OrigHeader } + // + // with D: llvm.experimental.noalias.scope.decl, + // U: !noalias or !alias.scope depending on D + // ... { D U1 U2 } can transform into: + // (0) : ... { D U1 U2 } // no relevant rotation for this part + // (1) : ... D' { U1 U2 D } // D is part of OrigHeader + // (2) : ... D' U1' { U2 D U1 } // D, U1 are part of OrigHeader + // + // We now want to transform: + // (1) -> : ... D' { D U1 U2 D'' } + // (2) -> : ... D' U1' { D U2 D'' U1'' } + // D: original llvm.experimental.noalias.scope.decl + // D', U1': duplicate with replaced scopes + // D'', U1'': different duplicate with replaced scopes + // This ensures a safe fallback to 'may_alias' introduced by the rotate, + // as U1'' and U1' scopes will not be compatible wrt to the local restrict + + // Clone the llvm.experimental.noalias.decl again for the NewHeader. + Instruction *NewHeaderInsertionPoint = &(*NewHeader->getFirstNonPHI()); + for (NoAliasScopeDeclInst *NAD : NoAliasDeclInstructions) { + LLVM_DEBUG(dbgs() << " Cloning llvm.experimental.noalias.scope.decl:" + << *NAD << "\n"); + Instruction *NewNAD = NAD->clone(); + NewNAD->insertBefore(NewHeaderInsertionPoint); + } + + // Scopes must now be duplicated, once for OrigHeader and once for + // OrigPreHeader'. + { + auto &Context = NewHeader->getContext(); + + SmallVector<MDNode *, 8> NoAliasDeclScopes; + for (NoAliasScopeDeclInst *NAD : NoAliasDeclInstructions) + NoAliasDeclScopes.push_back(NAD->getScopeList()); + + LLVM_DEBUG(dbgs() << " Updating OrigHeader scopes\n"); + cloneAndAdaptNoAliasScopes(NoAliasDeclScopes, {OrigHeader}, Context, + "h.rot"); + LLVM_DEBUG(OrigHeader->dump()); + + // Keep the compile time impact low by only adapting the inserted block + // of instructions in the OrigPreHeader. This might result in slightly + // more aliasing between these instructions and those that were already + // present, but it will be much faster when the original PreHeader is + // large. + LLVM_DEBUG(dbgs() << " Updating part of OrigPreheader scopes\n"); + auto *FirstDecl = + cast<Instruction>(ValueMap[*NoAliasDeclInstructions.begin()]); + auto *LastInst = &OrigPreheader->back(); + cloneAndAdaptNoAliasScopes(NoAliasDeclScopes, FirstDecl, LastInst, + Context, "pre.rot"); + LLVM_DEBUG(OrigPreheader->dump()); + + LLVM_DEBUG(dbgs() << " Updated NewHeader:\n"); + LLVM_DEBUG(NewHeader->dump()); + } + } + + // Along with all the other instructions, we just cloned OrigHeader's + // terminator into OrigPreHeader. Fix up the PHI nodes in each of OrigHeader's + // successors by duplicating their incoming values for OrigHeader. + for (BasicBlock *SuccBB : successors(OrigHeader)) + for (BasicBlock::iterator BI = SuccBB->begin(); + PHINode *PN = dyn_cast<PHINode>(BI); ++BI) + PN->addIncoming(PN->getIncomingValueForBlock(OrigHeader), OrigPreheader); + + // Now that OrigPreHeader has a clone of OrigHeader's terminator, remove + // OrigPreHeader's old terminator (the original branch into the loop), and + // remove the corresponding incoming values from the PHI nodes in OrigHeader. + LoopEntryBranch->eraseFromParent(); + + // Update MemorySSA before the rewrite call below changes the 1:1 + // instruction:cloned_instruction_or_value mapping. + if (MSSAU) { + InsertNewValueIntoMap(ValueMapMSSA, OrigHeader, OrigPreheader); + MSSAU->updateForClonedBlockIntoPred(OrigHeader, OrigPreheader, + ValueMapMSSA); + } + + SmallVector<PHINode*, 2> InsertedPHIs; + // If there were any uses of instructions in the duplicated block outside the + // loop, update them, inserting PHI nodes as required + RewriteUsesOfClonedInstructions(OrigHeader, OrigPreheader, ValueMap, SE, + &InsertedPHIs); + + // Attach dbg.value intrinsics to the new phis if that phi uses a value that + // previously had debug metadata attached. This keeps the debug info + // up-to-date in the loop body. + if (!InsertedPHIs.empty()) + insertDebugValuesForPHIs(OrigHeader, InsertedPHIs); + + // NewHeader is now the header of the loop. + L->moveToHeader(NewHeader); + assert(L->getHeader() == NewHeader && "Latch block is our new header"); + + // Inform DT about changes to the CFG. + if (DT) { + // The OrigPreheader branches to the NewHeader and Exit now. Then, inform + // the DT about the removed edge to the OrigHeader (that got removed). + SmallVector<DominatorTree::UpdateType, 3> Updates; + Updates.push_back({DominatorTree::Insert, OrigPreheader, Exit}); + Updates.push_back({DominatorTree::Insert, OrigPreheader, NewHeader}); + Updates.push_back({DominatorTree::Delete, OrigPreheader, OrigHeader}); + + if (MSSAU) { + MSSAU->applyUpdates(Updates, *DT, /*UpdateDT=*/true); + if (VerifyMemorySSA) + MSSAU->getMemorySSA()->verifyMemorySSA(); + } else { + DT->applyUpdates(Updates); + } + } + + // At this point, we've finished our major CFG changes. As part of cloning + // the loop into the preheader we've simplified instructions and the + // duplicated conditional branch may now be branching on a constant. If it is + // branching on a constant and if that constant means that we enter the loop, + // then we fold away the cond branch to an uncond branch. This simplifies the + // loop in cases important for nested loops, and it also means we don't have + // to split as many edges. + BranchInst *PHBI = cast<BranchInst>(OrigPreheader->getTerminator()); + assert(PHBI->isConditional() && "Should be clone of BI condbr!"); + if (!isa<ConstantInt>(PHBI->getCondition()) || + PHBI->getSuccessor(cast<ConstantInt>(PHBI->getCondition())->isZero()) != + NewHeader) { + // The conditional branch can't be folded, handle the general case. + // Split edges as necessary to preserve LoopSimplify form. + + // Right now OrigPreHeader has two successors, NewHeader and ExitBlock, and + // thus is not a preheader anymore. + // Split the edge to form a real preheader. + BasicBlock *NewPH = SplitCriticalEdge( + OrigPreheader, NewHeader, + CriticalEdgeSplittingOptions(DT, LI, MSSAU).setPreserveLCSSA()); + NewPH->setName(NewHeader->getName() + ".lr.ph"); + + // Preserve canonical loop form, which means that 'Exit' should have only + // one predecessor. Note that Exit could be an exit block for multiple + // nested loops, causing both of the edges to now be critical and need to + // be split. + SmallVector<BasicBlock *, 4> ExitPreds(predecessors(Exit)); + bool SplitLatchEdge = false; + for (BasicBlock *ExitPred : ExitPreds) { + // We only need to split loop exit edges. + Loop *PredLoop = LI->getLoopFor(ExitPred); + if (!PredLoop || PredLoop->contains(Exit) || + isa<IndirectBrInst>(ExitPred->getTerminator())) + continue; + SplitLatchEdge |= L->getLoopLatch() == ExitPred; + BasicBlock *ExitSplit = SplitCriticalEdge( + ExitPred, Exit, + CriticalEdgeSplittingOptions(DT, LI, MSSAU).setPreserveLCSSA()); + ExitSplit->moveBefore(Exit); + } + assert(SplitLatchEdge && + "Despite splitting all preds, failed to split latch exit?"); + (void)SplitLatchEdge; + } else { + // We can fold the conditional branch in the preheader, this makes things + // simpler. The first step is to remove the extra edge to the Exit block. + Exit->removePredecessor(OrigPreheader, true /*preserve LCSSA*/); + BranchInst *NewBI = BranchInst::Create(NewHeader, PHBI); + NewBI->setDebugLoc(PHBI->getDebugLoc()); + PHBI->eraseFromParent(); + + // With our CFG finalized, update DomTree if it is available. + if (DT) DT->deleteEdge(OrigPreheader, Exit); + + // Update MSSA too, if available. + if (MSSAU) + MSSAU->removeEdge(OrigPreheader, Exit); + } + + assert(L->getLoopPreheader() && "Invalid loop preheader after loop rotation"); + assert(L->getLoopLatch() && "Invalid loop latch after loop rotation"); + + if (MSSAU && VerifyMemorySSA) + MSSAU->getMemorySSA()->verifyMemorySSA(); + + // Now that the CFG and DomTree are in a consistent state again, try to merge + // the OrigHeader block into OrigLatch. This will succeed if they are + // connected by an unconditional branch. This is just a cleanup so the + // emitted code isn't too gross in this common case. + DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); + BasicBlock *PredBB = OrigHeader->getUniquePredecessor(); + bool DidMerge = MergeBlockIntoPredecessor(OrigHeader, &DTU, LI, MSSAU); + if (DidMerge) + RemoveRedundantDbgInstrs(PredBB); + + if (MSSAU && VerifyMemorySSA) + MSSAU->getMemorySSA()->verifyMemorySSA(); + + LLVM_DEBUG(dbgs() << "LoopRotation: into "; L->dump()); + + ++NumRotated; + + Rotated = true; + SimplifiedLatch = false; + + // Check that new latch is a deoptimizing exit and then repeat rotation if possible. + // Deoptimizing latch exit is not a generally typical case, so we just loop over. + // TODO: if it becomes a performance bottleneck extend rotation algorithm + // to handle multiple rotations in one go. + } while (MultiRotate && canRotateDeoptimizingLatchExit(L)); + + + return true; +} + +/// Determine whether the instructions in this range may be safely and cheaply +/// speculated. This is not an important enough situation to develop complex +/// heuristics. We handle a single arithmetic instruction along with any type +/// conversions. +static bool shouldSpeculateInstrs(BasicBlock::iterator Begin, + BasicBlock::iterator End, Loop *L) { + bool seenIncrement = false; + bool MultiExitLoop = false; + + if (!L->getExitingBlock()) + MultiExitLoop = true; + + for (BasicBlock::iterator I = Begin; I != End; ++I) { + + if (!isSafeToSpeculativelyExecute(&*I)) + return false; + + if (isa<DbgInfoIntrinsic>(I)) + continue; + + switch (I->getOpcode()) { + default: + return false; + case Instruction::GetElementPtr: + // GEPs are cheap if all indices are constant. + if (!cast<GEPOperator>(I)->hasAllConstantIndices()) + return false; + // fall-thru to increment case + [[fallthrough]]; + case Instruction::Add: + case Instruction::Sub: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: { + Value *IVOpnd = + !isa<Constant>(I->getOperand(0)) + ? I->getOperand(0) + : !isa<Constant>(I->getOperand(1)) ? I->getOperand(1) : nullptr; + if (!IVOpnd) + return false; + + // If increment operand is used outside of the loop, this speculation + // could cause extra live range interference. + if (MultiExitLoop) { + for (User *UseI : IVOpnd->users()) { + auto *UserInst = cast<Instruction>(UseI); + if (!L->contains(UserInst)) + return false; + } + } + + if (seenIncrement) + return false; + seenIncrement = true; + break; + } + case Instruction::Trunc: + case Instruction::ZExt: + case Instruction::SExt: + // ignore type conversions + break; + } + } + return true; +} + +/// Fold the loop tail into the loop exit by speculating the loop tail +/// instructions. Typically, this is a single post-increment. In the case of a +/// simple 2-block loop, hoisting the increment can be much better than +/// duplicating the entire loop header. In the case of loops with early exits, +/// rotation will not work anyway, but simplifyLoopLatch will put the loop in +/// canonical form so downstream passes can handle it. +/// +/// I don't believe this invalidates SCEV. +bool LoopRotate::simplifyLoopLatch(Loop *L) { + BasicBlock *Latch = L->getLoopLatch(); + if (!Latch || Latch->hasAddressTaken()) + return false; + + BranchInst *Jmp = dyn_cast<BranchInst>(Latch->getTerminator()); + if (!Jmp || !Jmp->isUnconditional()) + return false; + + BasicBlock *LastExit = Latch->getSinglePredecessor(); + if (!LastExit || !L->isLoopExiting(LastExit)) + return false; + + BranchInst *BI = dyn_cast<BranchInst>(LastExit->getTerminator()); + if (!BI) + return false; + + if (!shouldSpeculateInstrs(Latch->begin(), Jmp->getIterator(), L)) + return false; + + LLVM_DEBUG(dbgs() << "Folding loop latch " << Latch->getName() << " into " + << LastExit->getName() << "\n"); + + DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); + MergeBlockIntoPredecessor(Latch, &DTU, LI, MSSAU, nullptr, + /*PredecessorWithTwoSuccessors=*/true); + + if (SE) { + // Merging blocks may remove blocks reference in the block disposition cache. Clear the cache. + SE->forgetBlockAndLoopDispositions(); + } + + if (MSSAU && VerifyMemorySSA) + MSSAU->getMemorySSA()->verifyMemorySSA(); + + return true; +} + +/// Rotate \c L, and return true if any modification was made. +bool LoopRotate::processLoop(Loop *L) { + // Save the loop metadata. + MDNode *LoopMD = L->getLoopID(); + + bool SimplifiedLatch = false; + + // Simplify the loop latch before attempting to rotate the header + // upward. Rotation may not be needed if the loop tail can be folded into the + // loop exit. + if (!RotationOnly) + SimplifiedLatch = simplifyLoopLatch(L); + + bool MadeChange = rotateLoop(L, SimplifiedLatch); + assert((!MadeChange || L->isLoopExiting(L->getLoopLatch())) && + "Loop latch should be exiting after loop-rotate."); + + // Restore the loop metadata. + // NB! We presume LoopRotation DOESN'T ADD its own metadata. + if ((MadeChange || SimplifiedLatch) && LoopMD) + L->setLoopID(LoopMD); + + return MadeChange || SimplifiedLatch; +} + + +/// The utility to convert a loop into a loop with bottom test. +bool llvm::LoopRotation(Loop *L, LoopInfo *LI, const TargetTransformInfo *TTI, + AssumptionCache *AC, DominatorTree *DT, + ScalarEvolution *SE, MemorySSAUpdater *MSSAU, + const SimplifyQuery &SQ, bool RotationOnly = true, + unsigned Threshold = unsigned(-1), + bool IsUtilMode = true, bool PrepareForLTO) { + LoopRotate LR(Threshold, LI, TTI, AC, DT, SE, MSSAU, SQ, RotationOnly, + IsUtilMode, PrepareForLTO); + return LR.processLoop(L); +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/LoopSimplify.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/LoopSimplify.cpp new file mode 100644 index 0000000000..87a0e54e27 --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/LoopSimplify.cpp @@ -0,0 +1,921 @@ +//===- LoopSimplify.cpp - Loop Canonicalization Pass ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass performs several transformations to transform natural loops into a +// simpler form, which makes subsequent analyses and transformations simpler and +// more effective. +// +// Loop pre-header insertion guarantees that there is a single, non-critical +// entry edge from outside of the loop to the loop header. This simplifies a +// number of analyses and transformations, such as LICM. +// +// Loop exit-block insertion guarantees that all exit blocks from the loop +// (blocks which are outside of the loop that have predecessors inside of the +// loop) only have predecessors from inside of the loop (and are thus dominated +// by the loop header). This simplifies transformations such as store-sinking +// that are built into LICM. +// +// This pass also guarantees that loops will have exactly one backedge. +// +// Indirectbr instructions introduce several complications. If the loop +// contains or is entered by an indirectbr instruction, it may not be possible +// to transform the loop and make these guarantees. Client code should check +// that these conditions are true before relying on them. +// +// Similar complications arise from callbr instructions, particularly in +// asm-goto where blockaddress expressions are used. +// +// Note that the simplifycfg pass will clean up blocks which are split out but +// end up being unnecessary, so usage of this pass should not pessimize +// generated code. +// +// This pass obviously modifies the CFG, but updates loop information and +// dominator information. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/LoopSimplify.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/DependenceAnalysis.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/MemorySSAUpdater.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/LoopUtils.h" +using namespace llvm; + +#define DEBUG_TYPE "loop-simplify" + +STATISTIC(NumNested , "Number of nested loops split out"); + +// If the block isn't already, move the new block to right after some 'outside +// block' block. This prevents the preheader from being placed inside the loop +// body, e.g. when the loop hasn't been rotated. +static void placeSplitBlockCarefully(BasicBlock *NewBB, + SmallVectorImpl<BasicBlock *> &SplitPreds, + Loop *L) { + // Check to see if NewBB is already well placed. + Function::iterator BBI = --NewBB->getIterator(); + for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) { + if (&*BBI == SplitPreds[i]) + return; + } + + // If it isn't already after an outside block, move it after one. This is + // always good as it makes the uncond branch from the outside block into a + // fall-through. + + // Figure out *which* outside block to put this after. Prefer an outside + // block that neighbors a BB actually in the loop. + BasicBlock *FoundBB = nullptr; + for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) { + Function::iterator BBI = SplitPreds[i]->getIterator(); + if (++BBI != NewBB->getParent()->end() && L->contains(&*BBI)) { + FoundBB = SplitPreds[i]; + break; + } + } + + // If our heuristic for a *good* bb to place this after doesn't find + // anything, just pick something. It's likely better than leaving it within + // the loop. + if (!FoundBB) + FoundBB = SplitPreds[0]; + NewBB->moveAfter(FoundBB); +} + +/// InsertPreheaderForLoop - Once we discover that a loop doesn't have a +/// preheader, this method is called to insert one. This method has two phases: +/// preheader insertion and analysis updating. +/// +BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, DominatorTree *DT, + LoopInfo *LI, MemorySSAUpdater *MSSAU, + bool PreserveLCSSA) { + BasicBlock *Header = L->getHeader(); + + // Compute the set of predecessors of the loop that are not in the loop. + SmallVector<BasicBlock*, 8> OutsideBlocks; + for (BasicBlock *P : predecessors(Header)) { + if (!L->contains(P)) { // Coming in from outside the loop? + // If the loop is branched to from an indirect terminator, we won't + // be able to fully transform the loop, because it prohibits + // edge splitting. + if (isa<IndirectBrInst>(P->getTerminator())) + return nullptr; + + // Keep track of it. + OutsideBlocks.push_back(P); + } + } + + // Split out the loop pre-header. + BasicBlock *PreheaderBB; + PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader", DT, + LI, MSSAU, PreserveLCSSA); + if (!PreheaderBB) + return nullptr; + + LLVM_DEBUG(dbgs() << "LoopSimplify: Creating pre-header " + << PreheaderBB->getName() << "\n"); + + // Make sure that NewBB is put someplace intelligent, which doesn't mess up + // code layout too horribly. + placeSplitBlockCarefully(PreheaderBB, OutsideBlocks, L); + + return PreheaderBB; +} + +/// Add the specified block, and all of its predecessors, to the specified set, +/// if it's not already in there. Stop predecessor traversal when we reach +/// StopBlock. +static void addBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock, + SmallPtrSetImpl<BasicBlock *> &Blocks) { + SmallVector<BasicBlock *, 8> Worklist; + Worklist.push_back(InputBB); + do { + BasicBlock *BB = Worklist.pop_back_val(); + if (Blocks.insert(BB).second && BB != StopBlock) + // If BB is not already processed and it is not a stop block then + // insert its predecessor in the work list + append_range(Worklist, predecessors(BB)); + } while (!Worklist.empty()); +} + +/// The first part of loop-nestification is to find a PHI node that tells +/// us how to partition the loops. +static PHINode *findPHIToPartitionLoops(Loop *L, DominatorTree *DT, + AssumptionCache *AC) { + const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); + for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) { + PHINode *PN = cast<PHINode>(I); + ++I; + if (Value *V = simplifyInstruction(PN, {DL, nullptr, DT, AC})) { + // This is a degenerate PHI already, don't modify it! + PN->replaceAllUsesWith(V); + PN->eraseFromParent(); + continue; + } + + // Scan this PHI node looking for a use of the PHI node by itself. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (PN->getIncomingValue(i) == PN && + L->contains(PN->getIncomingBlock(i))) + // We found something tasty to remove. + return PN; + } + return nullptr; +} + +/// If this loop has multiple backedges, try to pull one of them out into +/// a nested loop. +/// +/// This is important for code that looks like +/// this: +/// +/// Loop: +/// ... +/// br cond, Loop, Next +/// ... +/// br cond2, Loop, Out +/// +/// To identify this common case, we look at the PHI nodes in the header of the +/// loop. PHI nodes with unchanging values on one backedge correspond to values +/// that change in the "outer" loop, but not in the "inner" loop. +/// +/// If we are able to separate out a loop, return the new outer loop that was +/// created. +/// +static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader, + DominatorTree *DT, LoopInfo *LI, + ScalarEvolution *SE, bool PreserveLCSSA, + AssumptionCache *AC, MemorySSAUpdater *MSSAU) { + // Don't try to separate loops without a preheader. + if (!Preheader) + return nullptr; + + // Treat the presence of convergent functions conservatively. The + // transformation is invalid if calls to certain convergent + // functions (like an AMDGPU barrier) get included in the resulting + // inner loop. But blocks meant for the inner loop will be + // identified later at a point where it's too late to abort the + // transformation. Also, the convergent attribute is not really + // sufficient to express the semantics of functions that are + // affected by this transformation. So we choose to back off if such + // a function call is present until a better alternative becomes + // available. This is similar to the conservative treatment of + // convergent function calls in GVNHoist and JumpThreading. + for (auto *BB : L->blocks()) { + for (auto &II : *BB) { + if (auto CI = dyn_cast<CallBase>(&II)) { + if (CI->isConvergent()) { + return nullptr; + } + } + } + } + + // The header is not a landing pad; preheader insertion should ensure this. + BasicBlock *Header = L->getHeader(); + assert(!Header->isEHPad() && "Can't insert backedge to EH pad"); + + PHINode *PN = findPHIToPartitionLoops(L, DT, AC); + if (!PN) return nullptr; // No known way to partition. + + // Pull out all predecessors that have varying values in the loop. This + // handles the case when a PHI node has multiple instances of itself as + // arguments. + SmallVector<BasicBlock*, 8> OuterLoopPreds; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + if (PN->getIncomingValue(i) != PN || + !L->contains(PN->getIncomingBlock(i))) { + // We can't split indirect control flow edges. + if (isa<IndirectBrInst>(PN->getIncomingBlock(i)->getTerminator())) + return nullptr; + OuterLoopPreds.push_back(PN->getIncomingBlock(i)); + } + } + LLVM_DEBUG(dbgs() << "LoopSimplify: Splitting out a new outer loop\n"); + + // If ScalarEvolution is around and knows anything about values in + // this loop, tell it to forget them, because we're about to + // substantially change it. + if (SE) + SE->forgetLoop(L); + + BasicBlock *NewBB = SplitBlockPredecessors(Header, OuterLoopPreds, ".outer", + DT, LI, MSSAU, PreserveLCSSA); + + // Make sure that NewBB is put someplace intelligent, which doesn't mess up + // code layout too horribly. + placeSplitBlockCarefully(NewBB, OuterLoopPreds, L); + + // Create the new outer loop. + Loop *NewOuter = LI->AllocateLoop(); + + // Change the parent loop to use the outer loop as its child now. + if (Loop *Parent = L->getParentLoop()) + Parent->replaceChildLoopWith(L, NewOuter); + else + LI->changeTopLevelLoop(L, NewOuter); + + // L is now a subloop of our outer loop. + NewOuter->addChildLoop(L); + + for (BasicBlock *BB : L->blocks()) + NewOuter->addBlockEntry(BB); + + // Now reset the header in L, which had been moved by + // SplitBlockPredecessors for the outer loop. + L->moveToHeader(Header); + + // Determine which blocks should stay in L and which should be moved out to + // the Outer loop now. + SmallPtrSet<BasicBlock *, 4> BlocksInL; + for (BasicBlock *P : predecessors(Header)) { + if (DT->dominates(Header, P)) + addBlockAndPredsToSet(P, Header, BlocksInL); + } + + // Scan all of the loop children of L, moving them to OuterLoop if they are + // not part of the inner loop. + const std::vector<Loop*> &SubLoops = L->getSubLoops(); + for (size_t I = 0; I != SubLoops.size(); ) + if (BlocksInL.count(SubLoops[I]->getHeader())) + ++I; // Loop remains in L + else + NewOuter->addChildLoop(L->removeChildLoop(SubLoops.begin() + I)); + + SmallVector<BasicBlock *, 8> OuterLoopBlocks; + OuterLoopBlocks.push_back(NewBB); + // Now that we know which blocks are in L and which need to be moved to + // OuterLoop, move any blocks that need it. + for (unsigned i = 0; i != L->getBlocks().size(); ++i) { + BasicBlock *BB = L->getBlocks()[i]; + if (!BlocksInL.count(BB)) { + // Move this block to the parent, updating the exit blocks sets + L->removeBlockFromLoop(BB); + if ((*LI)[BB] == L) { + LI->changeLoopFor(BB, NewOuter); + OuterLoopBlocks.push_back(BB); + } + --i; + } + } + + // Split edges to exit blocks from the inner loop, if they emerged in the + // process of separating the outer one. + formDedicatedExitBlocks(L, DT, LI, MSSAU, PreserveLCSSA); + + if (PreserveLCSSA) { + // Fix LCSSA form for L. Some values, which previously were only used inside + // L, can now be used in NewOuter loop. We need to insert phi-nodes for them + // in corresponding exit blocks. + // We don't need to form LCSSA recursively, because there cannot be uses + // inside a newly created loop of defs from inner loops as those would + // already be a use of an LCSSA phi node. + formLCSSA(*L, *DT, LI, SE); + + assert(NewOuter->isRecursivelyLCSSAForm(*DT, *LI) && + "LCSSA is broken after separating nested loops!"); + } + + return NewOuter; +} + +/// This method is called when the specified loop has more than one +/// backedge in it. +/// +/// If this occurs, revector all of these backedges to target a new basic block +/// and have that block branch to the loop header. This ensures that loops +/// have exactly one backedge. +static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader, + DominatorTree *DT, LoopInfo *LI, + MemorySSAUpdater *MSSAU) { + assert(L->getNumBackEdges() > 1 && "Must have > 1 backedge!"); + + // Get information about the loop + BasicBlock *Header = L->getHeader(); + Function *F = Header->getParent(); + + // Unique backedge insertion currently depends on having a preheader. + if (!Preheader) + return nullptr; + + // The header is not an EH pad; preheader insertion should ensure this. + assert(!Header->isEHPad() && "Can't insert backedge to EH pad"); + + // Figure out which basic blocks contain back-edges to the loop header. + std::vector<BasicBlock*> BackedgeBlocks; + for (BasicBlock *P : predecessors(Header)) { + // Indirect edges cannot be split, so we must fail if we find one. + if (isa<IndirectBrInst>(P->getTerminator())) + return nullptr; + + if (P != Preheader) BackedgeBlocks.push_back(P); + } + + // Create and insert the new backedge block... + BasicBlock *BEBlock = BasicBlock::Create(Header->getContext(), + Header->getName() + ".backedge", F); + BranchInst *BETerminator = BranchInst::Create(Header, BEBlock); + BETerminator->setDebugLoc(Header->getFirstNonPHI()->getDebugLoc()); + + LLVM_DEBUG(dbgs() << "LoopSimplify: Inserting unique backedge block " + << BEBlock->getName() << "\n"); + + // Move the new backedge block to right after the last backedge block. + Function::iterator InsertPos = ++BackedgeBlocks.back()->getIterator(); + F->splice(InsertPos, F, BEBlock->getIterator()); + + // Now that the block has been inserted into the function, create PHI nodes in + // the backedge block which correspond to any PHI nodes in the header block. + for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { + PHINode *PN = cast<PHINode>(I); + PHINode *NewPN = PHINode::Create(PN->getType(), BackedgeBlocks.size(), + PN->getName()+".be", BETerminator); + + // Loop over the PHI node, moving all entries except the one for the + // preheader over to the new PHI node. + unsigned PreheaderIdx = ~0U; + bool HasUniqueIncomingValue = true; + Value *UniqueValue = nullptr; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + BasicBlock *IBB = PN->getIncomingBlock(i); + Value *IV = PN->getIncomingValue(i); + if (IBB == Preheader) { + PreheaderIdx = i; + } else { + NewPN->addIncoming(IV, IBB); + if (HasUniqueIncomingValue) { + if (!UniqueValue) + UniqueValue = IV; + else if (UniqueValue != IV) + HasUniqueIncomingValue = false; + } + } + } + + // Delete all of the incoming values from the old PN except the preheader's + assert(PreheaderIdx != ~0U && "PHI has no preheader entry??"); + if (PreheaderIdx != 0) { + PN->setIncomingValue(0, PN->getIncomingValue(PreheaderIdx)); + PN->setIncomingBlock(0, PN->getIncomingBlock(PreheaderIdx)); + } + // Nuke all entries except the zero'th. + for (unsigned i = 0, e = PN->getNumIncomingValues()-1; i != e; ++i) + PN->removeIncomingValue(e-i, false); + + // Finally, add the newly constructed PHI node as the entry for the BEBlock. + PN->addIncoming(NewPN, BEBlock); + + // As an optimization, if all incoming values in the new PhiNode (which is a + // subset of the incoming values of the old PHI node) have the same value, + // eliminate the PHI Node. + if (HasUniqueIncomingValue) { + NewPN->replaceAllUsesWith(UniqueValue); + NewPN->eraseFromParent(); + } + } + + // Now that all of the PHI nodes have been inserted and adjusted, modify the + // backedge blocks to jump to the BEBlock instead of the header. + // If one of the backedges has llvm.loop metadata attached, we remove + // it from the backedge and add it to BEBlock. + unsigned LoopMDKind = BEBlock->getContext().getMDKindID("llvm.loop"); + MDNode *LoopMD = nullptr; + for (BasicBlock *BB : BackedgeBlocks) { + Instruction *TI = BB->getTerminator(); + if (!LoopMD) + LoopMD = TI->getMetadata(LoopMDKind); + TI->setMetadata(LoopMDKind, nullptr); + TI->replaceSuccessorWith(Header, BEBlock); + } + BEBlock->getTerminator()->setMetadata(LoopMDKind, LoopMD); + + //===--- Update all analyses which we must preserve now -----------------===// + + // Update Loop Information - we know that this block is now in the current + // loop and all parent loops. + L->addBasicBlockToLoop(BEBlock, *LI); + + // Update dominator information + DT->splitBlock(BEBlock); + + if (MSSAU) + MSSAU->updatePhisWhenInsertingUniqueBackedgeBlock(Header, Preheader, + BEBlock); + + return BEBlock; +} + +/// Simplify one loop and queue further loops for simplification. +static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist, + DominatorTree *DT, LoopInfo *LI, + ScalarEvolution *SE, AssumptionCache *AC, + MemorySSAUpdater *MSSAU, bool PreserveLCSSA) { + bool Changed = false; + if (MSSAU && VerifyMemorySSA) + MSSAU->getMemorySSA()->verifyMemorySSA(); + +ReprocessLoop: + + // Check to see that no blocks (other than the header) in this loop have + // predecessors that are not in the loop. This is not valid for natural + // loops, but can occur if the blocks are unreachable. Since they are + // unreachable we can just shamelessly delete those CFG edges! + for (BasicBlock *BB : L->blocks()) { + if (BB == L->getHeader()) + continue; + + SmallPtrSet<BasicBlock*, 4> BadPreds; + for (BasicBlock *P : predecessors(BB)) + if (!L->contains(P)) + BadPreds.insert(P); + + // Delete each unique out-of-loop (and thus dead) predecessor. + for (BasicBlock *P : BadPreds) { + + LLVM_DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor " + << P->getName() << "\n"); + + // Zap the dead pred's terminator and replace it with unreachable. + Instruction *TI = P->getTerminator(); + changeToUnreachable(TI, PreserveLCSSA, + /*DTU=*/nullptr, MSSAU); + Changed = true; + } + } + + if (MSSAU && VerifyMemorySSA) + MSSAU->getMemorySSA()->verifyMemorySSA(); + + // If there are exiting blocks with branches on undef, resolve the undef in + // the direction which will exit the loop. This will help simplify loop + // trip count computations. + SmallVector<BasicBlock*, 8> ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + for (BasicBlock *ExitingBlock : ExitingBlocks) + if (BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator())) + if (BI->isConditional()) { + if (UndefValue *Cond = dyn_cast<UndefValue>(BI->getCondition())) { + + LLVM_DEBUG(dbgs() + << "LoopSimplify: Resolving \"br i1 undef\" to exit in " + << ExitingBlock->getName() << "\n"); + + BI->setCondition(ConstantInt::get(Cond->getType(), + !L->contains(BI->getSuccessor(0)))); + + Changed = true; + } + } + + // Does the loop already have a preheader? If so, don't insert one. + BasicBlock *Preheader = L->getLoopPreheader(); + if (!Preheader) { + Preheader = InsertPreheaderForLoop(L, DT, LI, MSSAU, PreserveLCSSA); + if (Preheader) + Changed = true; + } + + // Next, check to make sure that all exit nodes of the loop only have + // predecessors that are inside of the loop. This check guarantees that the + // loop preheader/header will dominate the exit blocks. If the exit block has + // predecessors from outside of the loop, split the edge now. + if (formDedicatedExitBlocks(L, DT, LI, MSSAU, PreserveLCSSA)) + Changed = true; + + if (MSSAU && VerifyMemorySSA) + MSSAU->getMemorySSA()->verifyMemorySSA(); + + // If the header has more than two predecessors at this point (from the + // preheader and from multiple backedges), we must adjust the loop. + BasicBlock *LoopLatch = L->getLoopLatch(); + if (!LoopLatch) { + // If this is really a nested loop, rip it out into a child loop. Don't do + // this for loops with a giant number of backedges, just factor them into a + // common backedge instead. + if (L->getNumBackEdges() < 8) { + if (Loop *OuterL = separateNestedLoop(L, Preheader, DT, LI, SE, + PreserveLCSSA, AC, MSSAU)) { + ++NumNested; + // Enqueue the outer loop as it should be processed next in our + // depth-first nest walk. + Worklist.push_back(OuterL); + + // This is a big restructuring change, reprocess the whole loop. + Changed = true; + // GCC doesn't tail recursion eliminate this. + // FIXME: It isn't clear we can't rely on LLVM to TRE this. + goto ReprocessLoop; + } + } + + // If we either couldn't, or didn't want to, identify nesting of the loops, + // insert a new block that all backedges target, then make it jump to the + // loop header. + LoopLatch = insertUniqueBackedgeBlock(L, Preheader, DT, LI, MSSAU); + if (LoopLatch) + Changed = true; + } + + if (MSSAU && VerifyMemorySSA) + MSSAU->getMemorySSA()->verifyMemorySSA(); + + const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); + + // Scan over the PHI nodes in the loop header. Since they now have only two + // incoming values (the loop is canonicalized), we may have simplified the PHI + // down to 'X = phi [X, Y]', which should be replaced with 'Y'. + PHINode *PN; + for (BasicBlock::iterator I = L->getHeader()->begin(); + (PN = dyn_cast<PHINode>(I++)); ) + if (Value *V = simplifyInstruction(PN, {DL, nullptr, DT, AC})) { + if (SE) SE->forgetValue(PN); + if (!PreserveLCSSA || LI->replacementPreservesLCSSAForm(PN, V)) { + PN->replaceAllUsesWith(V); + PN->eraseFromParent(); + Changed = true; + } + } + + // If this loop has multiple exits and the exits all go to the same + // block, attempt to merge the exits. This helps several passes, such + // as LoopRotation, which do not support loops with multiple exits. + // SimplifyCFG also does this (and this code uses the same utility + // function), however this code is loop-aware, where SimplifyCFG is + // not. That gives it the advantage of being able to hoist + // loop-invariant instructions out of the way to open up more + // opportunities, and the disadvantage of having the responsibility + // to preserve dominator information. + auto HasUniqueExitBlock = [&]() { + BasicBlock *UniqueExit = nullptr; + for (auto *ExitingBB : ExitingBlocks) + for (auto *SuccBB : successors(ExitingBB)) { + if (L->contains(SuccBB)) + continue; + + if (!UniqueExit) + UniqueExit = SuccBB; + else if (UniqueExit != SuccBB) + return false; + } + + return true; + }; + if (HasUniqueExitBlock()) { + for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { + BasicBlock *ExitingBlock = ExitingBlocks[i]; + if (!ExitingBlock->getSinglePredecessor()) continue; + BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator()); + if (!BI || !BI->isConditional()) continue; + CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition()); + if (!CI || CI->getParent() != ExitingBlock) continue; + + // Attempt to hoist out all instructions except for the + // comparison and the branch. + bool AllInvariant = true; + bool AnyInvariant = false; + for (auto I = ExitingBlock->instructionsWithoutDebug().begin(); &*I != BI; ) { + Instruction *Inst = &*I++; + if (Inst == CI) + continue; + if (!L->makeLoopInvariant( + Inst, AnyInvariant, + Preheader ? Preheader->getTerminator() : nullptr, MSSAU, SE)) { + AllInvariant = false; + break; + } + } + if (AnyInvariant) + Changed = true; + if (!AllInvariant) continue; + + // The block has now been cleared of all instructions except for + // a comparison and a conditional branch. SimplifyCFG may be able + // to fold it now. + if (!FoldBranchToCommonDest(BI, /*DTU=*/nullptr, MSSAU)) + continue; + + // Success. The block is now dead, so remove it from the loop, + // update the dominator tree and delete it. + LLVM_DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block " + << ExitingBlock->getName() << "\n"); + + assert(pred_empty(ExitingBlock)); + Changed = true; + LI->removeBlock(ExitingBlock); + + DomTreeNode *Node = DT->getNode(ExitingBlock); + while (!Node->isLeaf()) { + DomTreeNode *Child = Node->back(); + DT->changeImmediateDominator(Child, Node->getIDom()); + } + DT->eraseNode(ExitingBlock); + if (MSSAU) { + SmallSetVector<BasicBlock *, 8> ExitBlockSet; + ExitBlockSet.insert(ExitingBlock); + MSSAU->removeBlocks(ExitBlockSet); + } + + BI->getSuccessor(0)->removePredecessor( + ExitingBlock, /* KeepOneInputPHIs */ PreserveLCSSA); + BI->getSuccessor(1)->removePredecessor( + ExitingBlock, /* KeepOneInputPHIs */ PreserveLCSSA); + ExitingBlock->eraseFromParent(); + } + } + + // Changing exit conditions for blocks may affect exit counts of this loop and + // any of its paretns, so we must invalidate the entire subtree if we've made + // any changes. + if (Changed && SE) + SE->forgetTopmostLoop(L); + + if (MSSAU && VerifyMemorySSA) + MSSAU->getMemorySSA()->verifyMemorySSA(); + + return Changed; +} + +bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, + ScalarEvolution *SE, AssumptionCache *AC, + MemorySSAUpdater *MSSAU, bool PreserveLCSSA) { + bool Changed = false; + +#ifndef NDEBUG + // If we're asked to preserve LCSSA, the loop nest needs to start in LCSSA + // form. + if (PreserveLCSSA) { + assert(DT && "DT not available."); + assert(LI && "LI not available."); + assert(L->isRecursivelyLCSSAForm(*DT, *LI) && + "Requested to preserve LCSSA, but it's already broken."); + } +#endif + + // Worklist maintains our depth-first queue of loops in this nest to process. + SmallVector<Loop *, 4> Worklist; + Worklist.push_back(L); + + // Walk the worklist from front to back, pushing newly found sub loops onto + // the back. This will let us process loops from back to front in depth-first + // order. We can use this simple process because loops form a tree. + for (unsigned Idx = 0; Idx != Worklist.size(); ++Idx) { + Loop *L2 = Worklist[Idx]; + Worklist.append(L2->begin(), L2->end()); + } + + while (!Worklist.empty()) + Changed |= simplifyOneLoop(Worklist.pop_back_val(), Worklist, DT, LI, SE, + AC, MSSAU, PreserveLCSSA); + + return Changed; +} + +namespace { + struct LoopSimplify : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + LoopSimplify() : FunctionPass(ID) { + initializeLoopSimplifyPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<AssumptionCacheTracker>(); + + // We need loop information to identify the loops... + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addPreserved<DominatorTreeWrapperPass>(); + + AU.addRequired<LoopInfoWrapperPass>(); + AU.addPreserved<LoopInfoWrapperPass>(); + + AU.addPreserved<BasicAAWrapperPass>(); + AU.addPreserved<AAResultsWrapperPass>(); + AU.addPreserved<GlobalsAAWrapperPass>(); + AU.addPreserved<ScalarEvolutionWrapperPass>(); + AU.addPreserved<SCEVAAWrapperPass>(); + AU.addPreservedID(LCSSAID); + AU.addPreserved<DependenceAnalysisWrapperPass>(); + AU.addPreservedID(BreakCriticalEdgesID); // No critical edges added. + AU.addPreserved<BranchProbabilityInfoWrapperPass>(); + AU.addPreserved<MemorySSAWrapperPass>(); + } + + /// verifyAnalysis() - Verify LoopSimplifyForm's guarantees. + void verifyAnalysis() const override; + }; +} + +char LoopSimplify::ID = 0; +INITIALIZE_PASS_BEGIN(LoopSimplify, "loop-simplify", + "Canonicalize natural loops", false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_END(LoopSimplify, "loop-simplify", + "Canonicalize natural loops", false, false) + +// Publicly exposed interface to pass... +char &llvm::LoopSimplifyID = LoopSimplify::ID; +Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); } + +/// runOnFunction - Run down all loops in the CFG (recursively, but we could do +/// it in any convenient order) inserting preheaders... +/// +bool LoopSimplify::runOnFunction(Function &F) { + bool Changed = false; + LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); + DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>(); + ScalarEvolution *SE = SEWP ? &SEWP->getSE() : nullptr; + AssumptionCache *AC = + &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); + MemorySSA *MSSA = nullptr; + std::unique_ptr<MemorySSAUpdater> MSSAU; + auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>(); + if (MSSAAnalysis) { + MSSA = &MSSAAnalysis->getMSSA(); + MSSAU = std::make_unique<MemorySSAUpdater>(MSSA); + } + + bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); + + // Simplify each loop nest in the function. + for (auto *L : *LI) + Changed |= simplifyLoop(L, DT, LI, SE, AC, MSSAU.get(), PreserveLCSSA); + +#ifndef NDEBUG + if (PreserveLCSSA) { + bool InLCSSA = all_of( + *LI, [&](Loop *L) { return L->isRecursivelyLCSSAForm(*DT, *LI); }); + assert(InLCSSA && "LCSSA is broken after loop-simplify."); + } +#endif + return Changed; +} + +PreservedAnalyses LoopSimplifyPass::run(Function &F, + FunctionAnalysisManager &AM) { + bool Changed = false; + LoopInfo *LI = &AM.getResult<LoopAnalysis>(F); + DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F); + ScalarEvolution *SE = AM.getCachedResult<ScalarEvolutionAnalysis>(F); + AssumptionCache *AC = &AM.getResult<AssumptionAnalysis>(F); + auto *MSSAAnalysis = AM.getCachedResult<MemorySSAAnalysis>(F); + std::unique_ptr<MemorySSAUpdater> MSSAU; + if (MSSAAnalysis) { + auto *MSSA = &MSSAAnalysis->getMSSA(); + MSSAU = std::make_unique<MemorySSAUpdater>(MSSA); + } + + + // Note that we don't preserve LCSSA in the new PM, if you need it run LCSSA + // after simplifying the loops. MemorySSA is preserved if it exists. + for (auto *L : *LI) + Changed |= + simplifyLoop(L, DT, LI, SE, AC, MSSAU.get(), /*PreserveLCSSA*/ false); + + if (!Changed) + return PreservedAnalyses::all(); + + PreservedAnalyses PA; + PA.preserve<DominatorTreeAnalysis>(); + PA.preserve<LoopAnalysis>(); + PA.preserve<ScalarEvolutionAnalysis>(); + PA.preserve<DependenceAnalysis>(); + if (MSSAAnalysis) + PA.preserve<MemorySSAAnalysis>(); + // BPI maps conditional terminators to probabilities, LoopSimplify can insert + // blocks, but it does so only by splitting existing blocks and edges. This + // results in the interesting property that all new terminators inserted are + // unconditional branches which do not appear in BPI. All deletions are + // handled via ValueHandle callbacks w/in BPI. + PA.preserve<BranchProbabilityAnalysis>(); + return PA; +} + +// FIXME: Restore this code when we re-enable verification in verifyAnalysis +// below. +#if 0 +static void verifyLoop(Loop *L) { + // Verify subloops. + for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) + verifyLoop(*I); + + // It used to be possible to just assert L->isLoopSimplifyForm(), however + // with the introduction of indirectbr, there are now cases where it's + // not possible to transform a loop as necessary. We can at least check + // that there is an indirectbr near any time there's trouble. + + // Indirectbr can interfere with preheader and unique backedge insertion. + if (!L->getLoopPreheader() || !L->getLoopLatch()) { + bool HasIndBrPred = false; + for (BasicBlock *Pred : predecessors(L->getHeader())) + if (isa<IndirectBrInst>(Pred->getTerminator())) { + HasIndBrPred = true; + break; + } + assert(HasIndBrPred && + "LoopSimplify has no excuse for missing loop header info!"); + (void)HasIndBrPred; + } + + // Indirectbr can interfere with exit block canonicalization. + if (!L->hasDedicatedExits()) { + bool HasIndBrExiting = false; + SmallVector<BasicBlock*, 8> ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { + if (isa<IndirectBrInst>((ExitingBlocks[i])->getTerminator())) { + HasIndBrExiting = true; + break; + } + } + + assert(HasIndBrExiting && + "LoopSimplify has no excuse for missing exit block info!"); + (void)HasIndBrExiting; + } +} +#endif + +void LoopSimplify::verifyAnalysis() const { + // FIXME: This routine is being called mid-way through the loop pass manager + // as loop passes destroy this analysis. That's actually fine, but we have no + // way of expressing that here. Once all of the passes that destroy this are + // hoisted out of the loop pass manager we can add back verification here. +#if 0 + for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) + verifyLoop(*I); +#endif +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/LoopUnroll.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/LoopUnroll.cpp new file mode 100644 index 0000000000..e8f585b4a9 --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/LoopUnroll.cpp @@ -0,0 +1,908 @@ +//===-- UnrollLoop.cpp - Loop unrolling utilities -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements some loop unrolling utilities. It does not define any +// actual pass or policy, but provides a single function to perform loop +// unrolling. +// +// The process of unrolling can produce extraneous basic blocks linked with +// unconditional branches. This will be corrected in the future. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/ADT/ilist_iterator.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/DomTreeUpdater.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopIterator.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/User.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/IR/ValueMap.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/GenericDomTree.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/LoopSimplify.h" +#include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Transforms/Utils/SimplifyIndVar.h" +#include "llvm/Transforms/Utils/UnrollLoop.h" +#include "llvm/Transforms/Utils/ValueMapper.h" +#include <algorithm> +#include <assert.h> +#include <numeric> +#include <type_traits> +#include <vector> + +namespace llvm { +class DataLayout; +class Value; +} // namespace llvm + +using namespace llvm; + +#define DEBUG_TYPE "loop-unroll" + +// TODO: Should these be here or in LoopUnroll? +STATISTIC(NumCompletelyUnrolled, "Number of loops completely unrolled"); +STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)"); +STATISTIC(NumUnrolledNotLatch, "Number of loops unrolled without a conditional " + "latch (completely or otherwise)"); + +static cl::opt<bool> +UnrollRuntimeEpilog("unroll-runtime-epilog", cl::init(false), cl::Hidden, + cl::desc("Allow runtime unrolled loops to be unrolled " + "with epilog instead of prolog.")); + +static cl::opt<bool> +UnrollVerifyDomtree("unroll-verify-domtree", cl::Hidden, + cl::desc("Verify domtree after unrolling"), +#ifdef EXPENSIVE_CHECKS + cl::init(true) +#else + cl::init(false) +#endif + ); + +static cl::opt<bool> +UnrollVerifyLoopInfo("unroll-verify-loopinfo", cl::Hidden, + cl::desc("Verify loopinfo after unrolling"), +#ifdef EXPENSIVE_CHECKS + cl::init(true) +#else + cl::init(false) +#endif + ); + + +/// Check if unrolling created a situation where we need to insert phi nodes to +/// preserve LCSSA form. +/// \param Blocks is a vector of basic blocks representing unrolled loop. +/// \param L is the outer loop. +/// It's possible that some of the blocks are in L, and some are not. In this +/// case, if there is a use is outside L, and definition is inside L, we need to +/// insert a phi-node, otherwise LCSSA will be broken. +/// The function is just a helper function for llvm::UnrollLoop that returns +/// true if this situation occurs, indicating that LCSSA needs to be fixed. +static bool needToInsertPhisForLCSSA(Loop *L, + const std::vector<BasicBlock *> &Blocks, + LoopInfo *LI) { + for (BasicBlock *BB : Blocks) { + if (LI->getLoopFor(BB) == L) + continue; + for (Instruction &I : *BB) { + for (Use &U : I.operands()) { + if (const auto *Def = dyn_cast<Instruction>(U)) { + Loop *DefLoop = LI->getLoopFor(Def->getParent()); + if (!DefLoop) + continue; + if (DefLoop->contains(L)) + return true; + } + } + } + } + return false; +} + +/// Adds ClonedBB to LoopInfo, creates a new loop for ClonedBB if necessary +/// and adds a mapping from the original loop to the new loop to NewLoops. +/// Returns nullptr if no new loop was created and a pointer to the +/// original loop OriginalBB was part of otherwise. +const Loop* llvm::addClonedBlockToLoopInfo(BasicBlock *OriginalBB, + BasicBlock *ClonedBB, LoopInfo *LI, + NewLoopsMap &NewLoops) { + // Figure out which loop New is in. + const Loop *OldLoop = LI->getLoopFor(OriginalBB); + assert(OldLoop && "Should (at least) be in the loop being unrolled!"); + + Loop *&NewLoop = NewLoops[OldLoop]; + if (!NewLoop) { + // Found a new sub-loop. + assert(OriginalBB == OldLoop->getHeader() && + "Header should be first in RPO"); + + NewLoop = LI->AllocateLoop(); + Loop *NewLoopParent = NewLoops.lookup(OldLoop->getParentLoop()); + + if (NewLoopParent) + NewLoopParent->addChildLoop(NewLoop); + else + LI->addTopLevelLoop(NewLoop); + + NewLoop->addBasicBlockToLoop(ClonedBB, *LI); + return OldLoop; + } else { + NewLoop->addBasicBlockToLoop(ClonedBB, *LI); + return nullptr; + } +} + +/// The function chooses which type of unroll (epilog or prolog) is more +/// profitabale. +/// Epilog unroll is more profitable when there is PHI that starts from +/// constant. In this case epilog will leave PHI start from constant, +/// but prolog will convert it to non-constant. +/// +/// loop: +/// PN = PHI [I, Latch], [CI, PreHeader] +/// I = foo(PN) +/// ... +/// +/// Epilog unroll case. +/// loop: +/// PN = PHI [I2, Latch], [CI, PreHeader] +/// I1 = foo(PN) +/// I2 = foo(I1) +/// ... +/// Prolog unroll case. +/// NewPN = PHI [PrologI, Prolog], [CI, PreHeader] +/// loop: +/// PN = PHI [I2, Latch], [NewPN, PreHeader] +/// I1 = foo(PN) +/// I2 = foo(I1) +/// ... +/// +static bool isEpilogProfitable(Loop *L) { + BasicBlock *PreHeader = L->getLoopPreheader(); + BasicBlock *Header = L->getHeader(); + assert(PreHeader && Header); + for (const PHINode &PN : Header->phis()) { + if (isa<ConstantInt>(PN.getIncomingValueForBlock(PreHeader))) + return true; + } + return false; +} + +/// Perform some cleanup and simplifications on loops after unrolling. It is +/// useful to simplify the IV's in the new loop, as well as do a quick +/// simplify/dce pass of the instructions. +void llvm::simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI, + ScalarEvolution *SE, DominatorTree *DT, + AssumptionCache *AC, + const TargetTransformInfo *TTI) { + // Simplify any new induction variables in the partially unrolled loop. + if (SE && SimplifyIVs) { + SmallVector<WeakTrackingVH, 16> DeadInsts; + simplifyLoopIVs(L, SE, DT, LI, TTI, DeadInsts); + + // Aggressively clean up dead instructions that simplifyLoopIVs already + // identified. Any remaining should be cleaned up below. + while (!DeadInsts.empty()) { + Value *V = DeadInsts.pop_back_val(); + if (Instruction *Inst = dyn_cast_or_null<Instruction>(V)) + RecursivelyDeleteTriviallyDeadInstructions(Inst); + } + } + + // At this point, the code is well formed. Perform constprop, instsimplify, + // and dce. + const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); + SmallVector<WeakTrackingVH, 16> DeadInsts; + for (BasicBlock *BB : L->getBlocks()) { + for (Instruction &Inst : llvm::make_early_inc_range(*BB)) { + if (Value *V = simplifyInstruction(&Inst, {DL, nullptr, DT, AC})) + if (LI->replacementPreservesLCSSAForm(&Inst, V)) + Inst.replaceAllUsesWith(V); + if (isInstructionTriviallyDead(&Inst)) + DeadInsts.emplace_back(&Inst); + } + // We can't do recursive deletion until we're done iterating, as we might + // have a phi which (potentially indirectly) uses instructions later in + // the block we're iterating through. + RecursivelyDeleteTriviallyDeadInstructions(DeadInsts); + } +} + +/// Unroll the given loop by Count. The loop must be in LCSSA form. Unrolling +/// can only fail when the loop's latch block is not terminated by a conditional +/// branch instruction. However, if the trip count (and multiple) are not known, +/// loop unrolling will mostly produce more code that is no faster. +/// +/// If Runtime is true then UnrollLoop will try to insert a prologue or +/// epilogue that ensures the latch has a trip multiple of Count. UnrollLoop +/// will not runtime-unroll the loop if computing the run-time trip count will +/// be expensive and AllowExpensiveTripCount is false. +/// +/// The LoopInfo Analysis that is passed will be kept consistent. +/// +/// This utility preserves LoopInfo. It will also preserve ScalarEvolution and +/// DominatorTree if they are non-null. +/// +/// If RemainderLoop is non-null, it will receive the remainder loop (if +/// required and not fully unrolled). +LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, + ScalarEvolution *SE, DominatorTree *DT, + AssumptionCache *AC, + const TargetTransformInfo *TTI, + OptimizationRemarkEmitter *ORE, + bool PreserveLCSSA, Loop **RemainderLoop) { + assert(DT && "DomTree is required"); + + if (!L->getLoopPreheader()) { + LLVM_DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n"); + return LoopUnrollResult::Unmodified; + } + + if (!L->getLoopLatch()) { + LLVM_DEBUG(dbgs() << " Can't unroll; loop exit-block-insertion failed.\n"); + return LoopUnrollResult::Unmodified; + } + + // Loops with indirectbr cannot be cloned. + if (!L->isSafeToClone()) { + LLVM_DEBUG(dbgs() << " Can't unroll; Loop body cannot be cloned.\n"); + return LoopUnrollResult::Unmodified; + } + + if (L->getHeader()->hasAddressTaken()) { + // The loop-rotate pass can be helpful to avoid this in many cases. + LLVM_DEBUG( + dbgs() << " Won't unroll loop: address of header block is taken.\n"); + return LoopUnrollResult::Unmodified; + } + + assert(ULO.Count > 0); + + // All these values should be taken only after peeling because they might have + // changed. + BasicBlock *Preheader = L->getLoopPreheader(); + BasicBlock *Header = L->getHeader(); + BasicBlock *LatchBlock = L->getLoopLatch(); + SmallVector<BasicBlock *, 4> ExitBlocks; + L->getExitBlocks(ExitBlocks); + std::vector<BasicBlock *> OriginalLoopBlocks = L->getBlocks(); + + const unsigned MaxTripCount = SE->getSmallConstantMaxTripCount(L); + const bool MaxOrZero = SE->isBackedgeTakenCountMaxOrZero(L); + + // Effectively "DCE" unrolled iterations that are beyond the max tripcount + // and will never be executed. + if (MaxTripCount && ULO.Count > MaxTripCount) + ULO.Count = MaxTripCount; + + struct ExitInfo { + unsigned TripCount; + unsigned TripMultiple; + unsigned BreakoutTrip; + bool ExitOnTrue; + BasicBlock *FirstExitingBlock = nullptr; + SmallVector<BasicBlock *> ExitingBlocks; + }; + DenseMap<BasicBlock *, ExitInfo> ExitInfos; + SmallVector<BasicBlock *, 4> ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + for (auto *ExitingBlock : ExitingBlocks) { + // The folding code is not prepared to deal with non-branch instructions + // right now. + auto *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator()); + if (!BI) + continue; + + ExitInfo &Info = ExitInfos.try_emplace(ExitingBlock).first->second; + Info.TripCount = SE->getSmallConstantTripCount(L, ExitingBlock); + Info.TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock); + if (Info.TripCount != 0) { + Info.BreakoutTrip = Info.TripCount % ULO.Count; + Info.TripMultiple = 0; + } else { + Info.BreakoutTrip = Info.TripMultiple = + (unsigned)std::gcd(ULO.Count, Info.TripMultiple); + } + Info.ExitOnTrue = !L->contains(BI->getSuccessor(0)); + Info.ExitingBlocks.push_back(ExitingBlock); + LLVM_DEBUG(dbgs() << " Exiting block %" << ExitingBlock->getName() + << ": TripCount=" << Info.TripCount + << ", TripMultiple=" << Info.TripMultiple + << ", BreakoutTrip=" << Info.BreakoutTrip << "\n"); + } + + // Are we eliminating the loop control altogether? Note that we can know + // we're eliminating the backedge without knowing exactly which iteration + // of the unrolled body exits. + const bool CompletelyUnroll = ULO.Count == MaxTripCount; + + const bool PreserveOnlyFirst = CompletelyUnroll && MaxOrZero; + + // There's no point in performing runtime unrolling if this unroll count + // results in a full unroll. + if (CompletelyUnroll) + ULO.Runtime = false; + + // Go through all exits of L and see if there are any phi-nodes there. We just + // conservatively assume that they're inserted to preserve LCSSA form, which + // means that complete unrolling might break this form. We need to either fix + // it in-place after the transformation, or entirely rebuild LCSSA. TODO: For + // now we just recompute LCSSA for the outer loop, but it should be possible + // to fix it in-place. + bool NeedToFixLCSSA = + PreserveLCSSA && CompletelyUnroll && + any_of(ExitBlocks, + [](const BasicBlock *BB) { return isa<PHINode>(BB->begin()); }); + + // The current loop unroll pass can unroll loops that have + // (1) single latch; and + // (2a) latch is unconditional; or + // (2b) latch is conditional and is an exiting block + // FIXME: The implementation can be extended to work with more complicated + // cases, e.g. loops with multiple latches. + BranchInst *LatchBI = dyn_cast<BranchInst>(LatchBlock->getTerminator()); + + // A conditional branch which exits the loop, which can be optimized to an + // unconditional branch in the unrolled loop in some cases. + bool LatchIsExiting = L->isLoopExiting(LatchBlock); + if (!LatchBI || (LatchBI->isConditional() && !LatchIsExiting)) { + LLVM_DEBUG( + dbgs() << "Can't unroll; a conditional latch must exit the loop"); + return LoopUnrollResult::Unmodified; + } + + // Loops containing convergent instructions cannot use runtime unrolling, + // as the prologue/epilogue may add additional control-dependencies to + // convergent operations. + LLVM_DEBUG( + { + bool HasConvergent = false; + for (auto &BB : L->blocks()) + for (auto &I : *BB) + if (auto *CB = dyn_cast<CallBase>(&I)) + HasConvergent |= CB->isConvergent(); + assert((!HasConvergent || !ULO.Runtime) && + "Can't runtime unroll if loop contains a convergent operation."); + }); + + bool EpilogProfitability = + UnrollRuntimeEpilog.getNumOccurrences() ? UnrollRuntimeEpilog + : isEpilogProfitable(L); + + if (ULO.Runtime && + !UnrollRuntimeLoopRemainder(L, ULO.Count, ULO.AllowExpensiveTripCount, + EpilogProfitability, ULO.UnrollRemainder, + ULO.ForgetAllSCEV, LI, SE, DT, AC, TTI, + PreserveLCSSA, RemainderLoop)) { + if (ULO.Force) + ULO.Runtime = false; + else { + LLVM_DEBUG(dbgs() << "Won't unroll; remainder loop could not be " + "generated when assuming runtime trip count\n"); + return LoopUnrollResult::Unmodified; + } + } + + using namespace ore; + // Report the unrolling decision. + if (CompletelyUnroll) { + LLVM_DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName() + << " with trip count " << ULO.Count << "!\n"); + if (ORE) + ORE->emit([&]() { + return OptimizationRemark(DEBUG_TYPE, "FullyUnrolled", L->getStartLoc(), + L->getHeader()) + << "completely unrolled loop with " + << NV("UnrollCount", ULO.Count) << " iterations"; + }); + } else { + LLVM_DEBUG(dbgs() << "UNROLLING loop %" << Header->getName() << " by " + << ULO.Count); + if (ULO.Runtime) + LLVM_DEBUG(dbgs() << " with run-time trip count"); + LLVM_DEBUG(dbgs() << "!\n"); + + if (ORE) + ORE->emit([&]() { + OptimizationRemark Diag(DEBUG_TYPE, "PartialUnrolled", L->getStartLoc(), + L->getHeader()); + Diag << "unrolled loop by a factor of " << NV("UnrollCount", ULO.Count); + if (ULO.Runtime) + Diag << " with run-time trip count"; + return Diag; + }); + } + + // We are going to make changes to this loop. SCEV may be keeping cached info + // about it, in particular about backedge taken count. The changes we make + // are guaranteed to invalidate this information for our loop. It is tempting + // to only invalidate the loop being unrolled, but it is incorrect as long as + // all exiting branches from all inner loops have impact on the outer loops, + // and if something changes inside them then any of outer loops may also + // change. When we forget outermost loop, we also forget all contained loops + // and this is what we need here. + if (SE) { + if (ULO.ForgetAllSCEV) + SE->forgetAllLoops(); + else { + SE->forgetTopmostLoop(L); + SE->forgetBlockAndLoopDispositions(); + } + } + + if (!LatchIsExiting) + ++NumUnrolledNotLatch; + + // For the first iteration of the loop, we should use the precloned values for + // PHI nodes. Insert associations now. + ValueToValueMapTy LastValueMap; + std::vector<PHINode*> OrigPHINode; + for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { + OrigPHINode.push_back(cast<PHINode>(I)); + } + + std::vector<BasicBlock *> Headers; + std::vector<BasicBlock *> Latches; + Headers.push_back(Header); + Latches.push_back(LatchBlock); + + // The current on-the-fly SSA update requires blocks to be processed in + // reverse postorder so that LastValueMap contains the correct value at each + // exit. + LoopBlocksDFS DFS(L); + DFS.perform(LI); + + // Stash the DFS iterators before adding blocks to the loop. + LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO(); + LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO(); + + std::vector<BasicBlock*> UnrolledLoopBlocks = L->getBlocks(); + + // Loop Unrolling might create new loops. While we do preserve LoopInfo, we + // might break loop-simplified form for these loops (as they, e.g., would + // share the same exit blocks). We'll keep track of loops for which we can + // break this so that later we can re-simplify them. + SmallSetVector<Loop *, 4> LoopsToSimplify; + for (Loop *SubLoop : *L) + LoopsToSimplify.insert(SubLoop); + + // When a FSDiscriminator is enabled, we don't need to add the multiply + // factors to the discriminators. + if (Header->getParent()->shouldEmitDebugInfoForProfiling() && + !EnableFSDiscriminator) + for (BasicBlock *BB : L->getBlocks()) + for (Instruction &I : *BB) + if (!isa<DbgInfoIntrinsic>(&I)) + if (const DILocation *DIL = I.getDebugLoc()) { + auto NewDIL = DIL->cloneByMultiplyingDuplicationFactor(ULO.Count); + if (NewDIL) + I.setDebugLoc(*NewDIL); + else + LLVM_DEBUG(dbgs() + << "Failed to create new discriminator: " + << DIL->getFilename() << " Line: " << DIL->getLine()); + } + + // Identify what noalias metadata is inside the loop: if it is inside the + // loop, the associated metadata must be cloned for each iteration. + SmallVector<MDNode *, 6> LoopLocalNoAliasDeclScopes; + identifyNoAliasScopesToClone(L->getBlocks(), LoopLocalNoAliasDeclScopes); + + // We place the unrolled iterations immediately after the original loop + // latch. This is a reasonable default placement if we don't have block + // frequencies, and if we do, well the layout will be adjusted later. + auto BlockInsertPt = std::next(LatchBlock->getIterator()); + for (unsigned It = 1; It != ULO.Count; ++It) { + SmallVector<BasicBlock *, 8> NewBlocks; + SmallDenseMap<const Loop *, Loop *, 4> NewLoops; + NewLoops[L] = L; + + for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { + ValueToValueMapTy VMap; + BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It)); + Header->getParent()->insert(BlockInsertPt, New); + + assert((*BB != Header || LI->getLoopFor(*BB) == L) && + "Header should not be in a sub-loop"); + // Tell LI about New. + const Loop *OldLoop = addClonedBlockToLoopInfo(*BB, New, LI, NewLoops); + if (OldLoop) + LoopsToSimplify.insert(NewLoops[OldLoop]); + + if (*BB == Header) + // Loop over all of the PHI nodes in the block, changing them to use + // the incoming values from the previous block. + for (PHINode *OrigPHI : OrigPHINode) { + PHINode *NewPHI = cast<PHINode>(VMap[OrigPHI]); + Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock); + if (Instruction *InValI = dyn_cast<Instruction>(InVal)) + if (It > 1 && L->contains(InValI)) + InVal = LastValueMap[InValI]; + VMap[OrigPHI] = InVal; + NewPHI->eraseFromParent(); + } + + // Update our running map of newest clones + LastValueMap[*BB] = New; + for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end(); + VI != VE; ++VI) + LastValueMap[VI->first] = VI->second; + + // Add phi entries for newly created values to all exit blocks. + for (BasicBlock *Succ : successors(*BB)) { + if (L->contains(Succ)) + continue; + for (PHINode &PHI : Succ->phis()) { + Value *Incoming = PHI.getIncomingValueForBlock(*BB); + ValueToValueMapTy::iterator It = LastValueMap.find(Incoming); + if (It != LastValueMap.end()) + Incoming = It->second; + PHI.addIncoming(Incoming, New); + SE->forgetValue(&PHI); + } + } + // Keep track of new headers and latches as we create them, so that + // we can insert the proper branches later. + if (*BB == Header) + Headers.push_back(New); + if (*BB == LatchBlock) + Latches.push_back(New); + + // Keep track of the exiting block and its successor block contained in + // the loop for the current iteration. + auto ExitInfoIt = ExitInfos.find(*BB); + if (ExitInfoIt != ExitInfos.end()) + ExitInfoIt->second.ExitingBlocks.push_back(New); + + NewBlocks.push_back(New); + UnrolledLoopBlocks.push_back(New); + + // Update DomTree: since we just copy the loop body, and each copy has a + // dedicated entry block (copy of the header block), this header's copy + // dominates all copied blocks. That means, dominance relations in the + // copied body are the same as in the original body. + if (*BB == Header) + DT->addNewBlock(New, Latches[It - 1]); + else { + auto BBDomNode = DT->getNode(*BB); + auto BBIDom = BBDomNode->getIDom(); + BasicBlock *OriginalBBIDom = BBIDom->getBlock(); + DT->addNewBlock( + New, cast<BasicBlock>(LastValueMap[cast<Value>(OriginalBBIDom)])); + } + } + + // Remap all instructions in the most recent iteration + remapInstructionsInBlocks(NewBlocks, LastValueMap); + for (BasicBlock *NewBlock : NewBlocks) + for (Instruction &I : *NewBlock) + if (auto *II = dyn_cast<AssumeInst>(&I)) + AC->registerAssumption(II); + + { + // Identify what other metadata depends on the cloned version. After + // cloning, replace the metadata with the corrected version for both + // memory instructions and noalias intrinsics. + std::string ext = (Twine("It") + Twine(It)).str(); + cloneAndAdaptNoAliasScopes(LoopLocalNoAliasDeclScopes, NewBlocks, + Header->getContext(), ext); + } + } + + // Loop over the PHI nodes in the original block, setting incoming values. + for (PHINode *PN : OrigPHINode) { + if (CompletelyUnroll) { + PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader)); + PN->eraseFromParent(); + } else if (ULO.Count > 1) { + Value *InVal = PN->removeIncomingValue(LatchBlock, false); + // If this value was defined in the loop, take the value defined by the + // last iteration of the loop. + if (Instruction *InValI = dyn_cast<Instruction>(InVal)) { + if (L->contains(InValI)) + InVal = LastValueMap[InVal]; + } + assert(Latches.back() == LastValueMap[LatchBlock] && "bad last latch"); + PN->addIncoming(InVal, Latches.back()); + } + } + + // Connect latches of the unrolled iterations to the headers of the next + // iteration. Currently they point to the header of the same iteration. + for (unsigned i = 0, e = Latches.size(); i != e; ++i) { + unsigned j = (i + 1) % e; + Latches[i]->getTerminator()->replaceSuccessorWith(Headers[i], Headers[j]); + } + + // Update dominators of blocks we might reach through exits. + // Immediate dominator of such block might change, because we add more + // routes which can lead to the exit: we can now reach it from the copied + // iterations too. + if (ULO.Count > 1) { + for (auto *BB : OriginalLoopBlocks) { + auto *BBDomNode = DT->getNode(BB); + SmallVector<BasicBlock *, 16> ChildrenToUpdate; + for (auto *ChildDomNode : BBDomNode->children()) { + auto *ChildBB = ChildDomNode->getBlock(); + if (!L->contains(ChildBB)) + ChildrenToUpdate.push_back(ChildBB); + } + // The new idom of the block will be the nearest common dominator + // of all copies of the previous idom. This is equivalent to the + // nearest common dominator of the previous idom and the first latch, + // which dominates all copies of the previous idom. + BasicBlock *NewIDom = DT->findNearestCommonDominator(BB, LatchBlock); + for (auto *ChildBB : ChildrenToUpdate) + DT->changeImmediateDominator(ChildBB, NewIDom); + } + } + + assert(!UnrollVerifyDomtree || + DT->verify(DominatorTree::VerificationLevel::Fast)); + + SmallVector<DominatorTree::UpdateType> DTUpdates; + auto SetDest = [&](BasicBlock *Src, bool WillExit, bool ExitOnTrue) { + auto *Term = cast<BranchInst>(Src->getTerminator()); + const unsigned Idx = ExitOnTrue ^ WillExit; + BasicBlock *Dest = Term->getSuccessor(Idx); + BasicBlock *DeadSucc = Term->getSuccessor(1-Idx); + + // Remove predecessors from all non-Dest successors. + DeadSucc->removePredecessor(Src, /* KeepOneInputPHIs */ true); + + // Replace the conditional branch with an unconditional one. + BranchInst::Create(Dest, Term); + Term->eraseFromParent(); + + DTUpdates.emplace_back(DominatorTree::Delete, Src, DeadSucc); + }; + + auto WillExit = [&](const ExitInfo &Info, unsigned i, unsigned j, + bool IsLatch) -> std::optional<bool> { + if (CompletelyUnroll) { + if (PreserveOnlyFirst) { + if (i == 0) + return std::nullopt; + return j == 0; + } + // Complete (but possibly inexact) unrolling + if (j == 0) + return true; + if (Info.TripCount && j != Info.TripCount) + return false; + return std::nullopt; + } + + if (ULO.Runtime) { + // If runtime unrolling inserts a prologue, information about non-latch + // exits may be stale. + if (IsLatch && j != 0) + return false; + return std::nullopt; + } + + if (j != Info.BreakoutTrip && + (Info.TripMultiple == 0 || j % Info.TripMultiple != 0)) { + // If we know the trip count or a multiple of it, we can safely use an + // unconditional branch for some iterations. + return false; + } + return std::nullopt; + }; + + // Fold branches for iterations where we know that they will exit or not + // exit. + for (auto &Pair : ExitInfos) { + ExitInfo &Info = Pair.second; + for (unsigned i = 0, e = Info.ExitingBlocks.size(); i != e; ++i) { + // The branch destination. + unsigned j = (i + 1) % e; + bool IsLatch = Pair.first == LatchBlock; + std::optional<bool> KnownWillExit = WillExit(Info, i, j, IsLatch); + if (!KnownWillExit) { + if (!Info.FirstExitingBlock) + Info.FirstExitingBlock = Info.ExitingBlocks[i]; + continue; + } + + // We don't fold known-exiting branches for non-latch exits here, + // because this ensures that both all loop blocks and all exit blocks + // remain reachable in the CFG. + // TODO: We could fold these branches, but it would require much more + // sophisticated updates to LoopInfo. + if (*KnownWillExit && !IsLatch) { + if (!Info.FirstExitingBlock) + Info.FirstExitingBlock = Info.ExitingBlocks[i]; + continue; + } + + SetDest(Info.ExitingBlocks[i], *KnownWillExit, Info.ExitOnTrue); + } + } + + DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); + DomTreeUpdater *DTUToUse = &DTU; + if (ExitingBlocks.size() == 1 && ExitInfos.size() == 1) { + // Manually update the DT if there's a single exiting node. In that case + // there's a single exit node and it is sufficient to update the nodes + // immediately dominated by the original exiting block. They will become + // dominated by the first exiting block that leaves the loop after + // unrolling. Note that the CFG inside the loop does not change, so there's + // no need to update the DT inside the unrolled loop. + DTUToUse = nullptr; + auto &[OriginalExit, Info] = *ExitInfos.begin(); + if (!Info.FirstExitingBlock) + Info.FirstExitingBlock = Info.ExitingBlocks.back(); + for (auto *C : to_vector(DT->getNode(OriginalExit)->children())) { + if (L->contains(C->getBlock())) + continue; + C->setIDom(DT->getNode(Info.FirstExitingBlock)); + } + } else { + DTU.applyUpdates(DTUpdates); + } + + // When completely unrolling, the last latch becomes unreachable. + if (!LatchIsExiting && CompletelyUnroll) { + // There is no need to update the DT here, because there must be a unique + // latch. Hence if the latch is not exiting it must directly branch back to + // the original loop header and does not dominate any nodes. + assert(LatchBlock->getSingleSuccessor() && "Loop with multiple latches?"); + changeToUnreachable(Latches.back()->getTerminator(), PreserveLCSSA); + } + + // Merge adjacent basic blocks, if possible. + for (BasicBlock *Latch : Latches) { + BranchInst *Term = dyn_cast<BranchInst>(Latch->getTerminator()); + assert((Term || + (CompletelyUnroll && !LatchIsExiting && Latch == Latches.back())) && + "Need a branch as terminator, except when fully unrolling with " + "unconditional latch"); + if (Term && Term->isUnconditional()) { + BasicBlock *Dest = Term->getSuccessor(0); + BasicBlock *Fold = Dest->getUniquePredecessor(); + if (MergeBlockIntoPredecessor(Dest, /*DTU=*/DTUToUse, LI, + /*MSSAU=*/nullptr, /*MemDep=*/nullptr, + /*PredecessorWithTwoSuccessors=*/false, + DTUToUse ? nullptr : DT)) { + // Dest has been folded into Fold. Update our worklists accordingly. + std::replace(Latches.begin(), Latches.end(), Dest, Fold); + llvm::erase_value(UnrolledLoopBlocks, Dest); + } + } + } + + if (DTUToUse) { + // Apply updates to the DomTree. + DT = &DTU.getDomTree(); + } + assert(!UnrollVerifyDomtree || + DT->verify(DominatorTree::VerificationLevel::Fast)); + + // At this point, the code is well formed. We now simplify the unrolled loop, + // doing constant propagation and dead code elimination as we go. + simplifyLoopAfterUnroll(L, !CompletelyUnroll && ULO.Count > 1, LI, SE, DT, AC, + TTI); + + NumCompletelyUnrolled += CompletelyUnroll; + ++NumUnrolled; + + Loop *OuterL = L->getParentLoop(); + // Update LoopInfo if the loop is completely removed. + if (CompletelyUnroll) + LI->erase(L); + + // LoopInfo should not be valid, confirm that. + if (UnrollVerifyLoopInfo) + LI->verify(*DT); + + // After complete unrolling most of the blocks should be contained in OuterL. + // However, some of them might happen to be out of OuterL (e.g. if they + // precede a loop exit). In this case we might need to insert PHI nodes in + // order to preserve LCSSA form. + // We don't need to check this if we already know that we need to fix LCSSA + // form. + // TODO: For now we just recompute LCSSA for the outer loop in this case, but + // it should be possible to fix it in-place. + if (PreserveLCSSA && OuterL && CompletelyUnroll && !NeedToFixLCSSA) + NeedToFixLCSSA |= ::needToInsertPhisForLCSSA(OuterL, UnrolledLoopBlocks, LI); + + // Make sure that loop-simplify form is preserved. We want to simplify + // at least one layer outside of the loop that was unrolled so that any + // changes to the parent loop exposed by the unrolling are considered. + if (OuterL) { + // OuterL includes all loops for which we can break loop-simplify, so + // it's sufficient to simplify only it (it'll recursively simplify inner + // loops too). + if (NeedToFixLCSSA) { + // LCSSA must be performed on the outermost affected loop. The unrolled + // loop's last loop latch is guaranteed to be in the outermost loop + // after LoopInfo's been updated by LoopInfo::erase. + Loop *LatchLoop = LI->getLoopFor(Latches.back()); + Loop *FixLCSSALoop = OuterL; + if (!FixLCSSALoop->contains(LatchLoop)) + while (FixLCSSALoop->getParentLoop() != LatchLoop) + FixLCSSALoop = FixLCSSALoop->getParentLoop(); + + formLCSSARecursively(*FixLCSSALoop, *DT, LI, SE); + } else if (PreserveLCSSA) { + assert(OuterL->isLCSSAForm(*DT) && + "Loops should be in LCSSA form after loop-unroll."); + } + + // TODO: That potentially might be compile-time expensive. We should try + // to fix the loop-simplified form incrementally. + simplifyLoop(OuterL, DT, LI, SE, AC, nullptr, PreserveLCSSA); + } else { + // Simplify loops for which we might've broken loop-simplify form. + for (Loop *SubLoop : LoopsToSimplify) + simplifyLoop(SubLoop, DT, LI, SE, AC, nullptr, PreserveLCSSA); + } + + return CompletelyUnroll ? LoopUnrollResult::FullyUnrolled + : LoopUnrollResult::PartiallyUnrolled; +} + +/// Given an llvm.loop loop id metadata node, returns the loop hint metadata +/// node with the given name (for example, "llvm.loop.unroll.count"). If no +/// such metadata node exists, then nullptr is returned. +MDNode *llvm::GetUnrollMetadata(MDNode *LoopID, StringRef Name) { + // First operand should refer to the loop id itself. + assert(LoopID->getNumOperands() > 0 && "requires at least one operand"); + assert(LoopID->getOperand(0) == LoopID && "invalid loop id"); + + for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) { + MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i)); + if (!MD) + continue; + + MDString *S = dyn_cast<MDString>(MD->getOperand(0)); + if (!S) + continue; + + if (Name.equals(S->getString())) + return MD; + } + return nullptr; +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/LoopUnrollAndJam.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/LoopUnrollAndJam.cpp new file mode 100644 index 0000000000..b125e952ec --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/LoopUnrollAndJam.cpp @@ -0,0 +1,999 @@ +//===-- LoopUnrollAndJam.cpp - Loop unrolling utilities -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements loop unroll and jam as a routine, much like +// LoopUnroll.cpp implements loop unroll. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/DependenceAnalysis.h" +#include "llvm/Analysis/DomTreeUpdater.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopIterator.h" +#include "llvm/Analysis/MustExecute.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/IR/ValueMap.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/GenericDomTree.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Transforms/Utils/UnrollLoop.h" +#include "llvm/Transforms/Utils/ValueMapper.h" +#include <assert.h> +#include <memory> +#include <type_traits> +#include <vector> + +using namespace llvm; + +#define DEBUG_TYPE "loop-unroll-and-jam" + +STATISTIC(NumUnrolledAndJammed, "Number of loops unroll and jammed"); +STATISTIC(NumCompletelyUnrolledAndJammed, "Number of loops unroll and jammed"); + +typedef SmallPtrSet<BasicBlock *, 4> BasicBlockSet; + +// Partition blocks in an outer/inner loop pair into blocks before and after +// the loop +static bool partitionLoopBlocks(Loop &L, BasicBlockSet &ForeBlocks, + BasicBlockSet &AftBlocks, DominatorTree &DT) { + Loop *SubLoop = L.getSubLoops()[0]; + BasicBlock *SubLoopLatch = SubLoop->getLoopLatch(); + + for (BasicBlock *BB : L.blocks()) { + if (!SubLoop->contains(BB)) { + if (DT.dominates(SubLoopLatch, BB)) + AftBlocks.insert(BB); + else + ForeBlocks.insert(BB); + } + } + + // Check that all blocks in ForeBlocks together dominate the subloop + // TODO: This might ideally be done better with a dominator/postdominators. + BasicBlock *SubLoopPreHeader = SubLoop->getLoopPreheader(); + for (BasicBlock *BB : ForeBlocks) { + if (BB == SubLoopPreHeader) + continue; + Instruction *TI = BB->getTerminator(); + for (BasicBlock *Succ : successors(TI)) + if (!ForeBlocks.count(Succ)) + return false; + } + + return true; +} + +/// Partition blocks in a loop nest into blocks before and after each inner +/// loop. +static bool partitionOuterLoopBlocks( + Loop &Root, Loop &JamLoop, BasicBlockSet &JamLoopBlocks, + DenseMap<Loop *, BasicBlockSet> &ForeBlocksMap, + DenseMap<Loop *, BasicBlockSet> &AftBlocksMap, DominatorTree &DT) { + JamLoopBlocks.insert(JamLoop.block_begin(), JamLoop.block_end()); + + for (Loop *L : Root.getLoopsInPreorder()) { + if (L == &JamLoop) + break; + + if (!partitionLoopBlocks(*L, ForeBlocksMap[L], AftBlocksMap[L], DT)) + return false; + } + + return true; +} + +// TODO Remove when UnrollAndJamLoop changed to support unroll and jamming more +// than 2 levels loop. +static bool partitionOuterLoopBlocks(Loop *L, Loop *SubLoop, + BasicBlockSet &ForeBlocks, + BasicBlockSet &SubLoopBlocks, + BasicBlockSet &AftBlocks, + DominatorTree *DT) { + SubLoopBlocks.insert(SubLoop->block_begin(), SubLoop->block_end()); + return partitionLoopBlocks(*L, ForeBlocks, AftBlocks, *DT); +} + +// Looks at the phi nodes in Header for values coming from Latch. For these +// instructions and all their operands calls Visit on them, keeping going for +// all the operands in AftBlocks. Returns false if Visit returns false, +// otherwise returns true. This is used to process the instructions in the +// Aft blocks that need to be moved before the subloop. It is used in two +// places. One to check that the required set of instructions can be moved +// before the loop. Then to collect the instructions to actually move in +// moveHeaderPhiOperandsToForeBlocks. +template <typename T> +static bool processHeaderPhiOperands(BasicBlock *Header, BasicBlock *Latch, + BasicBlockSet &AftBlocks, T Visit) { + SmallPtrSet<Instruction *, 8> VisitedInstr; + + std::function<bool(Instruction * I)> ProcessInstr = [&](Instruction *I) { + if (VisitedInstr.count(I)) + return true; + + VisitedInstr.insert(I); + + if (AftBlocks.count(I->getParent())) + for (auto &U : I->operands()) + if (Instruction *II = dyn_cast<Instruction>(U)) + if (!ProcessInstr(II)) + return false; + + return Visit(I); + }; + + for (auto &Phi : Header->phis()) { + Value *V = Phi.getIncomingValueForBlock(Latch); + if (Instruction *I = dyn_cast<Instruction>(V)) + if (!ProcessInstr(I)) + return false; + } + + return true; +} + +// Move the phi operands of Header from Latch out of AftBlocks to InsertLoc. +static void moveHeaderPhiOperandsToForeBlocks(BasicBlock *Header, + BasicBlock *Latch, + Instruction *InsertLoc, + BasicBlockSet &AftBlocks) { + // We need to ensure we move the instructions in the correct order, + // starting with the earliest required instruction and moving forward. + processHeaderPhiOperands(Header, Latch, AftBlocks, + [&AftBlocks, &InsertLoc](Instruction *I) { + if (AftBlocks.count(I->getParent())) + I->moveBefore(InsertLoc); + return true; + }); +} + +/* + This method performs Unroll and Jam. For a simple loop like: + for (i = ..) + Fore(i) + for (j = ..) + SubLoop(i, j) + Aft(i) + + Instead of doing normal inner or outer unrolling, we do: + for (i = .., i+=2) + Fore(i) + Fore(i+1) + for (j = ..) + SubLoop(i, j) + SubLoop(i+1, j) + Aft(i) + Aft(i+1) + + So the outer loop is essetially unrolled and then the inner loops are fused + ("jammed") together into a single loop. This can increase speed when there + are loads in SubLoop that are invariant to i, as they become shared between + the now jammed inner loops. + + We do this by spliting the blocks in the loop into Fore, Subloop and Aft. + Fore blocks are those before the inner loop, Aft are those after. Normal + Unroll code is used to copy each of these sets of blocks and the results are + combined together into the final form above. + + isSafeToUnrollAndJam should be used prior to calling this to make sure the + unrolling will be valid. Checking profitablility is also advisable. + + If EpilogueLoop is non-null, it receives the epilogue loop (if it was + necessary to create one and not fully unrolled). +*/ +LoopUnrollResult +llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount, + unsigned TripMultiple, bool UnrollRemainder, + LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, + AssumptionCache *AC, const TargetTransformInfo *TTI, + OptimizationRemarkEmitter *ORE, Loop **EpilogueLoop) { + + // When we enter here we should have already checked that it is safe + BasicBlock *Header = L->getHeader(); + assert(Header && "No header."); + assert(L->getSubLoops().size() == 1); + Loop *SubLoop = *L->begin(); + + // Don't enter the unroll code if there is nothing to do. + if (TripCount == 0 && Count < 2) { + LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; almost nothing to do\n"); + return LoopUnrollResult::Unmodified; + } + + assert(Count > 0); + assert(TripMultiple > 0); + assert(TripCount == 0 || TripCount % TripMultiple == 0); + + // Are we eliminating the loop control altogether? + bool CompletelyUnroll = (Count == TripCount); + + // We use the runtime remainder in cases where we don't know trip multiple + if (TripMultiple % Count != 0) { + if (!UnrollRuntimeLoopRemainder(L, Count, /*AllowExpensiveTripCount*/ false, + /*UseEpilogRemainder*/ true, + UnrollRemainder, /*ForgetAllSCEV*/ false, + LI, SE, DT, AC, TTI, true, EpilogueLoop)) { + LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; remainder loop could not be " + "generated when assuming runtime trip count\n"); + return LoopUnrollResult::Unmodified; + } + } + + // Notify ScalarEvolution that the loop will be substantially changed, + // if not outright eliminated. + if (SE) { + SE->forgetLoop(L); + SE->forgetBlockAndLoopDispositions(); + } + + using namespace ore; + // Report the unrolling decision. + if (CompletelyUnroll) { + LLVM_DEBUG(dbgs() << "COMPLETELY UNROLL AND JAMMING loop %" + << Header->getName() << " with trip count " << TripCount + << "!\n"); + ORE->emit(OptimizationRemark(DEBUG_TYPE, "FullyUnrolled", L->getStartLoc(), + L->getHeader()) + << "completely unroll and jammed loop with " + << NV("UnrollCount", TripCount) << " iterations"); + } else { + auto DiagBuilder = [&]() { + OptimizationRemark Diag(DEBUG_TYPE, "PartialUnrolled", L->getStartLoc(), + L->getHeader()); + return Diag << "unroll and jammed loop by a factor of " + << NV("UnrollCount", Count); + }; + + LLVM_DEBUG(dbgs() << "UNROLL AND JAMMING loop %" << Header->getName() + << " by " << Count); + if (TripMultiple != 1) { + LLVM_DEBUG(dbgs() << " with " << TripMultiple << " trips per branch"); + ORE->emit([&]() { + return DiagBuilder() << " with " << NV("TripMultiple", TripMultiple) + << " trips per branch"; + }); + } else { + LLVM_DEBUG(dbgs() << " with run-time trip count"); + ORE->emit([&]() { return DiagBuilder() << " with run-time trip count"; }); + } + LLVM_DEBUG(dbgs() << "!\n"); + } + + BasicBlock *Preheader = L->getLoopPreheader(); + BasicBlock *LatchBlock = L->getLoopLatch(); + assert(Preheader && "No preheader"); + assert(LatchBlock && "No latch block"); + BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator()); + assert(BI && !BI->isUnconditional()); + bool ContinueOnTrue = L->contains(BI->getSuccessor(0)); + BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue); + bool SubLoopContinueOnTrue = SubLoop->contains( + SubLoop->getLoopLatch()->getTerminator()->getSuccessor(0)); + + // Partition blocks in an outer/inner loop pair into blocks before and after + // the loop + BasicBlockSet SubLoopBlocks; + BasicBlockSet ForeBlocks; + BasicBlockSet AftBlocks; + partitionOuterLoopBlocks(L, SubLoop, ForeBlocks, SubLoopBlocks, AftBlocks, + DT); + + // We keep track of the entering/first and exiting/last block of each of + // Fore/SubLoop/Aft in each iteration. This helps make the stapling up of + // blocks easier. + std::vector<BasicBlock *> ForeBlocksFirst; + std::vector<BasicBlock *> ForeBlocksLast; + std::vector<BasicBlock *> SubLoopBlocksFirst; + std::vector<BasicBlock *> SubLoopBlocksLast; + std::vector<BasicBlock *> AftBlocksFirst; + std::vector<BasicBlock *> AftBlocksLast; + ForeBlocksFirst.push_back(Header); + ForeBlocksLast.push_back(SubLoop->getLoopPreheader()); + SubLoopBlocksFirst.push_back(SubLoop->getHeader()); + SubLoopBlocksLast.push_back(SubLoop->getExitingBlock()); + AftBlocksFirst.push_back(SubLoop->getExitBlock()); + AftBlocksLast.push_back(L->getExitingBlock()); + // Maps Blocks[0] -> Blocks[It] + ValueToValueMapTy LastValueMap; + + // Move any instructions from fore phi operands from AftBlocks into Fore. + moveHeaderPhiOperandsToForeBlocks( + Header, LatchBlock, ForeBlocksLast[0]->getTerminator(), AftBlocks); + + // The current on-the-fly SSA update requires blocks to be processed in + // reverse postorder so that LastValueMap contains the correct value at each + // exit. + LoopBlocksDFS DFS(L); + DFS.perform(LI); + // Stash the DFS iterators before adding blocks to the loop. + LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO(); + LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO(); + + // When a FSDiscriminator is enabled, we don't need to add the multiply + // factors to the discriminators. + if (Header->getParent()->shouldEmitDebugInfoForProfiling() && + !EnableFSDiscriminator) + for (BasicBlock *BB : L->getBlocks()) + for (Instruction &I : *BB) + if (!isa<DbgInfoIntrinsic>(&I)) + if (const DILocation *DIL = I.getDebugLoc()) { + auto NewDIL = DIL->cloneByMultiplyingDuplicationFactor(Count); + if (NewDIL) + I.setDebugLoc(*NewDIL); + else + LLVM_DEBUG(dbgs() + << "Failed to create new discriminator: " + << DIL->getFilename() << " Line: " << DIL->getLine()); + } + + // Copy all blocks + for (unsigned It = 1; It != Count; ++It) { + SmallVector<BasicBlock *, 8> NewBlocks; + // Maps Blocks[It] -> Blocks[It-1] + DenseMap<Value *, Value *> PrevItValueMap; + SmallDenseMap<const Loop *, Loop *, 4> NewLoops; + NewLoops[L] = L; + NewLoops[SubLoop] = SubLoop; + + for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { + ValueToValueMapTy VMap; + BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It)); + Header->getParent()->insert(Header->getParent()->end(), New); + + // Tell LI about New. + addClonedBlockToLoopInfo(*BB, New, LI, NewLoops); + + if (ForeBlocks.count(*BB)) { + if (*BB == ForeBlocksFirst[0]) + ForeBlocksFirst.push_back(New); + if (*BB == ForeBlocksLast[0]) + ForeBlocksLast.push_back(New); + } else if (SubLoopBlocks.count(*BB)) { + if (*BB == SubLoopBlocksFirst[0]) + SubLoopBlocksFirst.push_back(New); + if (*BB == SubLoopBlocksLast[0]) + SubLoopBlocksLast.push_back(New); + } else if (AftBlocks.count(*BB)) { + if (*BB == AftBlocksFirst[0]) + AftBlocksFirst.push_back(New); + if (*BB == AftBlocksLast[0]) + AftBlocksLast.push_back(New); + } else { + llvm_unreachable("BB being cloned should be in Fore/Sub/Aft"); + } + + // Update our running maps of newest clones + PrevItValueMap[New] = (It == 1 ? *BB : LastValueMap[*BB]); + LastValueMap[*BB] = New; + for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end(); + VI != VE; ++VI) { + PrevItValueMap[VI->second] = + const_cast<Value *>(It == 1 ? VI->first : LastValueMap[VI->first]); + LastValueMap[VI->first] = VI->second; + } + + NewBlocks.push_back(New); + + // Update DomTree: + if (*BB == ForeBlocksFirst[0]) + DT->addNewBlock(New, ForeBlocksLast[It - 1]); + else if (*BB == SubLoopBlocksFirst[0]) + DT->addNewBlock(New, SubLoopBlocksLast[It - 1]); + else if (*BB == AftBlocksFirst[0]) + DT->addNewBlock(New, AftBlocksLast[It - 1]); + else { + // Each set of blocks (Fore/Sub/Aft) will have the same internal domtree + // structure. + auto BBDomNode = DT->getNode(*BB); + auto BBIDom = BBDomNode->getIDom(); + BasicBlock *OriginalBBIDom = BBIDom->getBlock(); + assert(OriginalBBIDom); + assert(LastValueMap[cast<Value>(OriginalBBIDom)]); + DT->addNewBlock( + New, cast<BasicBlock>(LastValueMap[cast<Value>(OriginalBBIDom)])); + } + } + + // Remap all instructions in the most recent iteration + remapInstructionsInBlocks(NewBlocks, LastValueMap); + for (BasicBlock *NewBlock : NewBlocks) { + for (Instruction &I : *NewBlock) { + if (auto *II = dyn_cast<AssumeInst>(&I)) + AC->registerAssumption(II); + } + } + + // Alter the ForeBlocks phi's, pointing them at the latest version of the + // value from the previous iteration's phis + for (PHINode &Phi : ForeBlocksFirst[It]->phis()) { + Value *OldValue = Phi.getIncomingValueForBlock(AftBlocksLast[It]); + assert(OldValue && "should have incoming edge from Aft[It]"); + Value *NewValue = OldValue; + if (Value *PrevValue = PrevItValueMap[OldValue]) + NewValue = PrevValue; + + assert(Phi.getNumOperands() == 2); + Phi.setIncomingBlock(0, ForeBlocksLast[It - 1]); + Phi.setIncomingValue(0, NewValue); + Phi.removeIncomingValue(1); + } + } + + // Now that all the basic blocks for the unrolled iterations are in place, + // finish up connecting the blocks and phi nodes. At this point LastValueMap + // is the last unrolled iterations values. + + // Update Phis in BB from OldBB to point to NewBB and use the latest value + // from LastValueMap + auto updatePHIBlocksAndValues = [](BasicBlock *BB, BasicBlock *OldBB, + BasicBlock *NewBB, + ValueToValueMapTy &LastValueMap) { + for (PHINode &Phi : BB->phis()) { + for (unsigned b = 0; b < Phi.getNumIncomingValues(); ++b) { + if (Phi.getIncomingBlock(b) == OldBB) { + Value *OldValue = Phi.getIncomingValue(b); + if (Value *LastValue = LastValueMap[OldValue]) + Phi.setIncomingValue(b, LastValue); + Phi.setIncomingBlock(b, NewBB); + break; + } + } + } + }; + // Move all the phis from Src into Dest + auto movePHIs = [](BasicBlock *Src, BasicBlock *Dest) { + Instruction *insertPoint = Dest->getFirstNonPHI(); + while (PHINode *Phi = dyn_cast<PHINode>(Src->begin())) + Phi->moveBefore(insertPoint); + }; + + // Update the PHI values outside the loop to point to the last block + updatePHIBlocksAndValues(LoopExit, AftBlocksLast[0], AftBlocksLast.back(), + LastValueMap); + + // Update ForeBlocks successors and phi nodes + BranchInst *ForeTerm = + cast<BranchInst>(ForeBlocksLast.back()->getTerminator()); + assert(ForeTerm->getNumSuccessors() == 1 && "Expecting one successor"); + ForeTerm->setSuccessor(0, SubLoopBlocksFirst[0]); + + if (CompletelyUnroll) { + while (PHINode *Phi = dyn_cast<PHINode>(ForeBlocksFirst[0]->begin())) { + Phi->replaceAllUsesWith(Phi->getIncomingValueForBlock(Preheader)); + Phi->eraseFromParent(); + } + } else { + // Update the PHI values to point to the last aft block + updatePHIBlocksAndValues(ForeBlocksFirst[0], AftBlocksLast[0], + AftBlocksLast.back(), LastValueMap); + } + + for (unsigned It = 1; It != Count; It++) { + // Remap ForeBlock successors from previous iteration to this + BranchInst *ForeTerm = + cast<BranchInst>(ForeBlocksLast[It - 1]->getTerminator()); + assert(ForeTerm->getNumSuccessors() == 1 && "Expecting one successor"); + ForeTerm->setSuccessor(0, ForeBlocksFirst[It]); + } + + // Subloop successors and phis + BranchInst *SubTerm = + cast<BranchInst>(SubLoopBlocksLast.back()->getTerminator()); + SubTerm->setSuccessor(!SubLoopContinueOnTrue, SubLoopBlocksFirst[0]); + SubTerm->setSuccessor(SubLoopContinueOnTrue, AftBlocksFirst[0]); + SubLoopBlocksFirst[0]->replacePhiUsesWith(ForeBlocksLast[0], + ForeBlocksLast.back()); + SubLoopBlocksFirst[0]->replacePhiUsesWith(SubLoopBlocksLast[0], + SubLoopBlocksLast.back()); + + for (unsigned It = 1; It != Count; It++) { + // Replace the conditional branch of the previous iteration subloop with an + // unconditional one to this one + BranchInst *SubTerm = + cast<BranchInst>(SubLoopBlocksLast[It - 1]->getTerminator()); + BranchInst::Create(SubLoopBlocksFirst[It], SubTerm); + SubTerm->eraseFromParent(); + + SubLoopBlocksFirst[It]->replacePhiUsesWith(ForeBlocksLast[It], + ForeBlocksLast.back()); + SubLoopBlocksFirst[It]->replacePhiUsesWith(SubLoopBlocksLast[It], + SubLoopBlocksLast.back()); + movePHIs(SubLoopBlocksFirst[It], SubLoopBlocksFirst[0]); + } + + // Aft blocks successors and phis + BranchInst *AftTerm = cast<BranchInst>(AftBlocksLast.back()->getTerminator()); + if (CompletelyUnroll) { + BranchInst::Create(LoopExit, AftTerm); + AftTerm->eraseFromParent(); + } else { + AftTerm->setSuccessor(!ContinueOnTrue, ForeBlocksFirst[0]); + assert(AftTerm->getSuccessor(ContinueOnTrue) == LoopExit && + "Expecting the ContinueOnTrue successor of AftTerm to be LoopExit"); + } + AftBlocksFirst[0]->replacePhiUsesWith(SubLoopBlocksLast[0], + SubLoopBlocksLast.back()); + + for (unsigned It = 1; It != Count; It++) { + // Replace the conditional branch of the previous iteration subloop with an + // unconditional one to this one + BranchInst *AftTerm = + cast<BranchInst>(AftBlocksLast[It - 1]->getTerminator()); + BranchInst::Create(AftBlocksFirst[It], AftTerm); + AftTerm->eraseFromParent(); + + AftBlocksFirst[It]->replacePhiUsesWith(SubLoopBlocksLast[It], + SubLoopBlocksLast.back()); + movePHIs(AftBlocksFirst[It], AftBlocksFirst[0]); + } + + DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); + // Dominator Tree. Remove the old links between Fore, Sub and Aft, adding the + // new ones required. + if (Count != 1) { + SmallVector<DominatorTree::UpdateType, 4> DTUpdates; + DTUpdates.emplace_back(DominatorTree::UpdateKind::Delete, ForeBlocksLast[0], + SubLoopBlocksFirst[0]); + DTUpdates.emplace_back(DominatorTree::UpdateKind::Delete, + SubLoopBlocksLast[0], AftBlocksFirst[0]); + + DTUpdates.emplace_back(DominatorTree::UpdateKind::Insert, + ForeBlocksLast.back(), SubLoopBlocksFirst[0]); + DTUpdates.emplace_back(DominatorTree::UpdateKind::Insert, + SubLoopBlocksLast.back(), AftBlocksFirst[0]); + DTU.applyUpdatesPermissive(DTUpdates); + } + + // Merge adjacent basic blocks, if possible. + SmallPtrSet<BasicBlock *, 16> MergeBlocks; + MergeBlocks.insert(ForeBlocksLast.begin(), ForeBlocksLast.end()); + MergeBlocks.insert(SubLoopBlocksLast.begin(), SubLoopBlocksLast.end()); + MergeBlocks.insert(AftBlocksLast.begin(), AftBlocksLast.end()); + + MergeBlockSuccessorsIntoGivenBlocks(MergeBlocks, L, &DTU, LI); + + // Apply updates to the DomTree. + DT = &DTU.getDomTree(); + + // At this point, the code is well formed. We now do a quick sweep over the + // inserted code, doing constant propagation and dead code elimination as we + // go. + simplifyLoopAfterUnroll(SubLoop, true, LI, SE, DT, AC, TTI); + simplifyLoopAfterUnroll(L, !CompletelyUnroll && Count > 1, LI, SE, DT, AC, + TTI); + + NumCompletelyUnrolledAndJammed += CompletelyUnroll; + ++NumUnrolledAndJammed; + + // Update LoopInfo if the loop is completely removed. + if (CompletelyUnroll) + LI->erase(L); + +#ifndef NDEBUG + // We shouldn't have done anything to break loop simplify form or LCSSA. + Loop *OutestLoop = SubLoop->getParentLoop() + ? SubLoop->getParentLoop()->getParentLoop() + ? SubLoop->getParentLoop()->getParentLoop() + : SubLoop->getParentLoop() + : SubLoop; + assert(DT->verify()); + LI->verify(*DT); + assert(OutestLoop->isRecursivelyLCSSAForm(*DT, *LI)); + if (!CompletelyUnroll) + assert(L->isLoopSimplifyForm()); + assert(SubLoop->isLoopSimplifyForm()); + SE->verify(); +#endif + + return CompletelyUnroll ? LoopUnrollResult::FullyUnrolled + : LoopUnrollResult::PartiallyUnrolled; +} + +static bool getLoadsAndStores(BasicBlockSet &Blocks, + SmallVector<Instruction *, 4> &MemInstr) { + // Scan the BBs and collect legal loads and stores. + // Returns false if non-simple loads/stores are found. + for (BasicBlock *BB : Blocks) { + for (Instruction &I : *BB) { + if (auto *Ld = dyn_cast<LoadInst>(&I)) { + if (!Ld->isSimple()) + return false; + MemInstr.push_back(&I); + } else if (auto *St = dyn_cast<StoreInst>(&I)) { + if (!St->isSimple()) + return false; + MemInstr.push_back(&I); + } else if (I.mayReadOrWriteMemory()) { + return false; + } + } + } + return true; +} + +static bool preservesForwardDependence(Instruction *Src, Instruction *Dst, + unsigned UnrollLevel, unsigned JamLevel, + bool Sequentialized, Dependence *D) { + // UnrollLevel might carry the dependency Src --> Dst + // Does a different loop after unrolling? + for (unsigned CurLoopDepth = UnrollLevel + 1; CurLoopDepth <= JamLevel; + ++CurLoopDepth) { + auto JammedDir = D->getDirection(CurLoopDepth); + if (JammedDir == Dependence::DVEntry::LT) + return true; + + if (JammedDir & Dependence::DVEntry::GT) + return false; + } + + return true; +} + +static bool preservesBackwardDependence(Instruction *Src, Instruction *Dst, + unsigned UnrollLevel, unsigned JamLevel, + bool Sequentialized, Dependence *D) { + // UnrollLevel might carry the dependency Dst --> Src + for (unsigned CurLoopDepth = UnrollLevel + 1; CurLoopDepth <= JamLevel; + ++CurLoopDepth) { + auto JammedDir = D->getDirection(CurLoopDepth); + if (JammedDir == Dependence::DVEntry::GT) + return true; + + if (JammedDir & Dependence::DVEntry::LT) + return false; + } + + // Backward dependencies are only preserved if not interleaved. + return Sequentialized; +} + +// Check whether it is semantically safe Src and Dst considering any potential +// dependency between them. +// +// @param UnrollLevel The level of the loop being unrolled +// @param JamLevel The level of the loop being jammed; if Src and Dst are on +// different levels, the outermost common loop counts as jammed level +// +// @return true if is safe and false if there is a dependency violation. +static bool checkDependency(Instruction *Src, Instruction *Dst, + unsigned UnrollLevel, unsigned JamLevel, + bool Sequentialized, DependenceInfo &DI) { + assert(UnrollLevel <= JamLevel && + "Expecting JamLevel to be at least UnrollLevel"); + + if (Src == Dst) + return true; + // Ignore Input dependencies. + if (isa<LoadInst>(Src) && isa<LoadInst>(Dst)) + return true; + + // Check whether unroll-and-jam may violate a dependency. + // By construction, every dependency will be lexicographically non-negative + // (if it was, it would violate the current execution order), such as + // (0,0,>,*,*) + // Unroll-and-jam changes the GT execution of two executions to the same + // iteration of the chosen unroll level. That is, a GT dependence becomes a GE + // dependence (or EQ, if we fully unrolled the loop) at the loop's position: + // (0,0,>=,*,*) + // Now, the dependency is not necessarily non-negative anymore, i.e. + // unroll-and-jam may violate correctness. + std::unique_ptr<Dependence> D = DI.depends(Src, Dst, true); + if (!D) + return true; + assert(D->isOrdered() && "Expected an output, flow or anti dep."); + + if (D->isConfused()) { + LLVM_DEBUG(dbgs() << " Confused dependency between:\n" + << " " << *Src << "\n" + << " " << *Dst << "\n"); + return false; + } + + // If outer levels (levels enclosing the loop being unroll-and-jammed) have a + // non-equal direction, then the locations accessed in the inner levels cannot + // overlap in memory. We assumes the indexes never overlap into neighboring + // dimensions. + for (unsigned CurLoopDepth = 1; CurLoopDepth < UnrollLevel; ++CurLoopDepth) + if (!(D->getDirection(CurLoopDepth) & Dependence::DVEntry::EQ)) + return true; + + auto UnrollDirection = D->getDirection(UnrollLevel); + + // If the distance carried by the unrolled loop is 0, then after unrolling + // that distance will become non-zero resulting in non-overlapping accesses in + // the inner loops. + if (UnrollDirection == Dependence::DVEntry::EQ) + return true; + + if (UnrollDirection & Dependence::DVEntry::LT && + !preservesForwardDependence(Src, Dst, UnrollLevel, JamLevel, + Sequentialized, D.get())) + return false; + + if (UnrollDirection & Dependence::DVEntry::GT && + !preservesBackwardDependence(Src, Dst, UnrollLevel, JamLevel, + Sequentialized, D.get())) + return false; + + return true; +} + +static bool +checkDependencies(Loop &Root, const BasicBlockSet &SubLoopBlocks, + const DenseMap<Loop *, BasicBlockSet> &ForeBlocksMap, + const DenseMap<Loop *, BasicBlockSet> &AftBlocksMap, + DependenceInfo &DI, LoopInfo &LI) { + SmallVector<BasicBlockSet, 8> AllBlocks; + for (Loop *L : Root.getLoopsInPreorder()) + if (ForeBlocksMap.find(L) != ForeBlocksMap.end()) + AllBlocks.push_back(ForeBlocksMap.lookup(L)); + AllBlocks.push_back(SubLoopBlocks); + for (Loop *L : Root.getLoopsInPreorder()) + if (AftBlocksMap.find(L) != AftBlocksMap.end()) + AllBlocks.push_back(AftBlocksMap.lookup(L)); + + unsigned LoopDepth = Root.getLoopDepth(); + SmallVector<Instruction *, 4> EarlierLoadsAndStores; + SmallVector<Instruction *, 4> CurrentLoadsAndStores; + for (BasicBlockSet &Blocks : AllBlocks) { + CurrentLoadsAndStores.clear(); + if (!getLoadsAndStores(Blocks, CurrentLoadsAndStores)) + return false; + + Loop *CurLoop = LI.getLoopFor((*Blocks.begin())->front().getParent()); + unsigned CurLoopDepth = CurLoop->getLoopDepth(); + + for (auto *Earlier : EarlierLoadsAndStores) { + Loop *EarlierLoop = LI.getLoopFor(Earlier->getParent()); + unsigned EarlierDepth = EarlierLoop->getLoopDepth(); + unsigned CommonLoopDepth = std::min(EarlierDepth, CurLoopDepth); + for (auto *Later : CurrentLoadsAndStores) { + if (!checkDependency(Earlier, Later, LoopDepth, CommonLoopDepth, false, + DI)) + return false; + } + } + + size_t NumInsts = CurrentLoadsAndStores.size(); + for (size_t I = 0; I < NumInsts; ++I) { + for (size_t J = I; J < NumInsts; ++J) { + if (!checkDependency(CurrentLoadsAndStores[I], CurrentLoadsAndStores[J], + LoopDepth, CurLoopDepth, true, DI)) + return false; + } + } + + EarlierLoadsAndStores.append(CurrentLoadsAndStores.begin(), + CurrentLoadsAndStores.end()); + } + return true; +} + +static bool isEligibleLoopForm(const Loop &Root) { + // Root must have a child. + if (Root.getSubLoops().size() != 1) + return false; + + const Loop *L = &Root; + do { + // All loops in Root need to be in simplify and rotated form. + if (!L->isLoopSimplifyForm()) + return false; + + if (!L->isRotatedForm()) + return false; + + if (L->getHeader()->hasAddressTaken()) { + LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Address taken\n"); + return false; + } + + unsigned SubLoopsSize = L->getSubLoops().size(); + if (SubLoopsSize == 0) + return true; + + // Only one child is allowed. + if (SubLoopsSize != 1) + return false; + + // Only loops with a single exit block can be unrolled and jammed. + // The function getExitBlock() is used for this check, rather than + // getUniqueExitBlock() to ensure loops with mulitple exit edges are + // disallowed. + if (!L->getExitBlock()) { + LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; only loops with single exit " + "blocks can be unrolled and jammed.\n"); + return false; + } + + // Only loops with a single exiting block can be unrolled and jammed. + if (!L->getExitingBlock()) { + LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; only loops with single " + "exiting blocks can be unrolled and jammed.\n"); + return false; + } + + L = L->getSubLoops()[0]; + } while (L); + + return true; +} + +static Loop *getInnerMostLoop(Loop *L) { + while (!L->getSubLoops().empty()) + L = L->getSubLoops()[0]; + return L; +} + +bool llvm::isSafeToUnrollAndJam(Loop *L, ScalarEvolution &SE, DominatorTree &DT, + DependenceInfo &DI, LoopInfo &LI) { + if (!isEligibleLoopForm(*L)) { + LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Ineligible loop form\n"); + return false; + } + + /* We currently handle outer loops like this: + | + ForeFirst <------\ } + Blocks | } ForeBlocks of L + ForeLast | } + | | + ... | + | | + ForeFirst <----\ | } + Blocks | | } ForeBlocks of a inner loop of L + ForeLast | | } + | | | + JamLoopFirst <\ | | } + Blocks | | | } JamLoopBlocks of the innermost loop + JamLoopLast -/ | | } + | | | + AftFirst | | } + Blocks | | } AftBlocks of a inner loop of L + AftLast ------/ | } + | | + ... | + | | + AftFirst | } + Blocks | } AftBlocks of L + AftLast --------/ } + | + + There are (theoretically) any number of blocks in ForeBlocks, SubLoopBlocks + and AftBlocks, providing that there is one edge from Fores to SubLoops, + one edge from SubLoops to Afts and a single outer loop exit (from Afts). + In practice we currently limit Aft blocks to a single block, and limit + things further in the profitablility checks of the unroll and jam pass. + + Because of the way we rearrange basic blocks, we also require that + the Fore blocks of L on all unrolled iterations are safe to move before the + blocks of the direct child of L of all iterations. So we require that the + phi node looping operands of ForeHeader can be moved to at least the end of + ForeEnd, so that we can arrange cloned Fore Blocks before the subloop and + match up Phi's correctly. + + i.e. The old order of blocks used to be + (F1)1 (F2)1 J1_1 J1_2 (A2)1 (A1)1 (F1)2 (F2)2 J2_1 J2_2 (A2)2 (A1)2. + It needs to be safe to transform this to + (F1)1 (F1)2 (F2)1 (F2)2 J1_1 J1_2 J2_1 J2_2 (A2)1 (A2)2 (A1)1 (A1)2. + + There are then a number of checks along the lines of no calls, no + exceptions, inner loop IV is consistent, etc. Note that for loops requiring + runtime unrolling, UnrollRuntimeLoopRemainder can also fail in + UnrollAndJamLoop if the trip count cannot be easily calculated. + */ + + // Split blocks into Fore/SubLoop/Aft based on dominators + Loop *JamLoop = getInnerMostLoop(L); + BasicBlockSet SubLoopBlocks; + DenseMap<Loop *, BasicBlockSet> ForeBlocksMap; + DenseMap<Loop *, BasicBlockSet> AftBlocksMap; + if (!partitionOuterLoopBlocks(*L, *JamLoop, SubLoopBlocks, ForeBlocksMap, + AftBlocksMap, DT)) { + LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Incompatible loop layout\n"); + return false; + } + + // Aft blocks may need to move instructions to fore blocks, which becomes more + // difficult if there are multiple (potentially conditionally executed) + // blocks. For now we just exclude loops with multiple aft blocks. + if (AftBlocksMap[L].size() != 1) { + LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Can't currently handle " + "multiple blocks after the loop\n"); + return false; + } + + // Check inner loop backedge count is consistent on all iterations of the + // outer loop + if (any_of(L->getLoopsInPreorder(), [&SE](Loop *SubLoop) { + return !hasIterationCountInvariantInParent(SubLoop, SE); + })) { + LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Inner loop iteration count is " + "not consistent on each iteration\n"); + return false; + } + + // Check the loop safety info for exceptions. + SimpleLoopSafetyInfo LSI; + LSI.computeLoopSafetyInfo(L); + if (LSI.anyBlockMayThrow()) { + LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Something may throw\n"); + return false; + } + + // We've ruled out the easy stuff and now need to check that there are no + // interdependencies which may prevent us from moving the: + // ForeBlocks before Subloop and AftBlocks. + // Subloop before AftBlocks. + // ForeBlock phi operands before the subloop + + // Make sure we can move all instructions we need to before the subloop + BasicBlock *Header = L->getHeader(); + BasicBlock *Latch = L->getLoopLatch(); + BasicBlockSet AftBlocks = AftBlocksMap[L]; + Loop *SubLoop = L->getSubLoops()[0]; + if (!processHeaderPhiOperands( + Header, Latch, AftBlocks, [&AftBlocks, &SubLoop](Instruction *I) { + if (SubLoop->contains(I->getParent())) + return false; + if (AftBlocks.count(I->getParent())) { + // If we hit a phi node in afts we know we are done (probably + // LCSSA) + if (isa<PHINode>(I)) + return false; + // Can't move instructions with side effects or memory + // reads/writes + if (I->mayHaveSideEffects() || I->mayReadOrWriteMemory()) + return false; + } + // Keep going + return true; + })) { + LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; can't move required " + "instructions after subloop to before it\n"); + return false; + } + + // Check for memory dependencies which prohibit the unrolling we are doing. + // Because of the way we are unrolling Fore/Sub/Aft blocks, we need to check + // there are no dependencies between Fore-Sub, Fore-Aft, Sub-Aft and Sub-Sub. + if (!checkDependencies(*L, SubLoopBlocks, ForeBlocksMap, AftBlocksMap, DI, + LI)) { + LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; failed dependency check\n"); + return false; + } + + return true; +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/LoopUnrollRuntime.cpp new file mode 100644 index 0000000000..b19156bcb4 --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -0,0 +1,1008 @@ +//===-- UnrollLoopRuntime.cpp - Runtime Loop unrolling utilities ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements some loop unrolling utilities for loops with run-time +// trip counts. See LoopUnroll.cpp for unrolling loops with compile-time +// trip counts. +// +// The functions in this file are used to generate extra code when the +// run-time trip count modulo the unroll factor is not 0. When this is the +// case, we need to generate code to execute these 'left over' iterations. +// +// The current strategy generates an if-then-else sequence prior to the +// unrolled loop to execute the 'left over' iterations before or after the +// unrolled loop. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/DomTreeUpdater.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LoopIterator.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/ProfDataUtils.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" +#include "llvm/Transforms/Utils/UnrollLoop.h" +#include <algorithm> + +using namespace llvm; + +#define DEBUG_TYPE "loop-unroll" + +STATISTIC(NumRuntimeUnrolled, + "Number of loops unrolled with run-time trip counts"); +static cl::opt<bool> UnrollRuntimeMultiExit( + "unroll-runtime-multi-exit", cl::init(false), cl::Hidden, + cl::desc("Allow runtime unrolling for loops with multiple exits, when " + "epilog is generated")); +static cl::opt<bool> UnrollRuntimeOtherExitPredictable( + "unroll-runtime-other-exit-predictable", cl::init(false), cl::Hidden, + cl::desc("Assume the non latch exit block to be predictable")); + +/// Connect the unrolling prolog code to the original loop. +/// The unrolling prolog code contains code to execute the +/// 'extra' iterations if the run-time trip count modulo the +/// unroll count is non-zero. +/// +/// This function performs the following: +/// - Create PHI nodes at prolog end block to combine values +/// that exit the prolog code and jump around the prolog. +/// - Add a PHI operand to a PHI node at the loop exit block +/// for values that exit the prolog and go around the loop. +/// - Branch around the original loop if the trip count is less +/// than the unroll factor. +/// +static void ConnectProlog(Loop *L, Value *BECount, unsigned Count, + BasicBlock *PrologExit, + BasicBlock *OriginalLoopLatchExit, + BasicBlock *PreHeader, BasicBlock *NewPreHeader, + ValueToValueMapTy &VMap, DominatorTree *DT, + LoopInfo *LI, bool PreserveLCSSA, + ScalarEvolution &SE) { + // Loop structure should be the following: + // Preheader + // PrologHeader + // ... + // PrologLatch + // PrologExit + // NewPreheader + // Header + // ... + // Latch + // LatchExit + BasicBlock *Latch = L->getLoopLatch(); + assert(Latch && "Loop must have a latch"); + BasicBlock *PrologLatch = cast<BasicBlock>(VMap[Latch]); + + // Create a PHI node for each outgoing value from the original loop + // (which means it is an outgoing value from the prolog code too). + // The new PHI node is inserted in the prolog end basic block. + // The new PHI node value is added as an operand of a PHI node in either + // the loop header or the loop exit block. + for (BasicBlock *Succ : successors(Latch)) { + for (PHINode &PN : Succ->phis()) { + // Add a new PHI node to the prolog end block and add the + // appropriate incoming values. + // TODO: This code assumes that the PrologExit (or the LatchExit block for + // prolog loop) contains only one predecessor from the loop, i.e. the + // PrologLatch. When supporting multiple-exiting block loops, we can have + // two or more blocks that have the LatchExit as the target in the + // original loop. + PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr", + PrologExit->getFirstNonPHI()); + // Adding a value to the new PHI node from the original loop preheader. + // This is the value that skips all the prolog code. + if (L->contains(&PN)) { + // Succ is loop header. + NewPN->addIncoming(PN.getIncomingValueForBlock(NewPreHeader), + PreHeader); + } else { + // Succ is LatchExit. + NewPN->addIncoming(UndefValue::get(PN.getType()), PreHeader); + } + + Value *V = PN.getIncomingValueForBlock(Latch); + if (Instruction *I = dyn_cast<Instruction>(V)) { + if (L->contains(I)) { + V = VMap.lookup(I); + } + } + // Adding a value to the new PHI node from the last prolog block + // that was created. + NewPN->addIncoming(V, PrologLatch); + + // Update the existing PHI node operand with the value from the + // new PHI node. How this is done depends on if the existing + // PHI node is in the original loop block, or the exit block. + if (L->contains(&PN)) + PN.setIncomingValueForBlock(NewPreHeader, NewPN); + else + PN.addIncoming(NewPN, PrologExit); + SE.forgetValue(&PN); + } + } + + // Make sure that created prolog loop is in simplified form + SmallVector<BasicBlock *, 4> PrologExitPreds; + Loop *PrologLoop = LI->getLoopFor(PrologLatch); + if (PrologLoop) { + for (BasicBlock *PredBB : predecessors(PrologExit)) + if (PrologLoop->contains(PredBB)) + PrologExitPreds.push_back(PredBB); + + SplitBlockPredecessors(PrologExit, PrologExitPreds, ".unr-lcssa", DT, LI, + nullptr, PreserveLCSSA); + } + + // Create a branch around the original loop, which is taken if there are no + // iterations remaining to be executed after running the prologue. + Instruction *InsertPt = PrologExit->getTerminator(); + IRBuilder<> B(InsertPt); + + assert(Count != 0 && "nonsensical Count!"); + + // If BECount <u (Count - 1) then (BECount + 1) % Count == (BECount + 1) + // This means %xtraiter is (BECount + 1) and all of the iterations of this + // loop were executed by the prologue. Note that if BECount <u (Count - 1) + // then (BECount + 1) cannot unsigned-overflow. + Value *BrLoopExit = + B.CreateICmpULT(BECount, ConstantInt::get(BECount->getType(), Count - 1)); + // Split the exit to maintain loop canonicalization guarantees + SmallVector<BasicBlock *, 4> Preds(predecessors(OriginalLoopLatchExit)); + SplitBlockPredecessors(OriginalLoopLatchExit, Preds, ".unr-lcssa", DT, LI, + nullptr, PreserveLCSSA); + // Add the branch to the exit block (around the unrolled loop) + B.CreateCondBr(BrLoopExit, OriginalLoopLatchExit, NewPreHeader); + InsertPt->eraseFromParent(); + if (DT) { + auto *NewDom = DT->findNearestCommonDominator(OriginalLoopLatchExit, + PrologExit); + DT->changeImmediateDominator(OriginalLoopLatchExit, NewDom); + } +} + +/// Connect the unrolling epilog code to the original loop. +/// The unrolling epilog code contains code to execute the +/// 'extra' iterations if the run-time trip count modulo the +/// unroll count is non-zero. +/// +/// This function performs the following: +/// - Update PHI nodes at the unrolling loop exit and epilog loop exit +/// - Create PHI nodes at the unrolling loop exit to combine +/// values that exit the unrolling loop code and jump around it. +/// - Update PHI operands in the epilog loop by the new PHI nodes +/// - Branch around the epilog loop if extra iters (ModVal) is zero. +/// +static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit, + BasicBlock *Exit, BasicBlock *PreHeader, + BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader, + ValueToValueMapTy &VMap, DominatorTree *DT, + LoopInfo *LI, bool PreserveLCSSA, + ScalarEvolution &SE) { + BasicBlock *Latch = L->getLoopLatch(); + assert(Latch && "Loop must have a latch"); + BasicBlock *EpilogLatch = cast<BasicBlock>(VMap[Latch]); + + // Loop structure should be the following: + // + // PreHeader + // NewPreHeader + // Header + // ... + // Latch + // NewExit (PN) + // EpilogPreHeader + // EpilogHeader + // ... + // EpilogLatch + // Exit (EpilogPN) + + // Update PHI nodes at NewExit and Exit. + for (PHINode &PN : NewExit->phis()) { + // PN should be used in another PHI located in Exit block as + // Exit was split by SplitBlockPredecessors into Exit and NewExit + // Basically it should look like: + // NewExit: + // PN = PHI [I, Latch] + // ... + // Exit: + // EpilogPN = PHI [PN, EpilogPreHeader], [X, Exit2], [Y, Exit2.epil] + // + // Exits from non-latch blocks point to the original exit block and the + // epilogue edges have already been added. + // + // There is EpilogPreHeader incoming block instead of NewExit as + // NewExit was spilt 1 more time to get EpilogPreHeader. + assert(PN.hasOneUse() && "The phi should have 1 use"); + PHINode *EpilogPN = cast<PHINode>(PN.use_begin()->getUser()); + assert(EpilogPN->getParent() == Exit && "EpilogPN should be in Exit block"); + + // Add incoming PreHeader from branch around the Loop + PN.addIncoming(UndefValue::get(PN.getType()), PreHeader); + SE.forgetValue(&PN); + + Value *V = PN.getIncomingValueForBlock(Latch); + Instruction *I = dyn_cast<Instruction>(V); + if (I && L->contains(I)) + // If value comes from an instruction in the loop add VMap value. + V = VMap.lookup(I); + // For the instruction out of the loop, constant or undefined value + // insert value itself. + EpilogPN->addIncoming(V, EpilogLatch); + + assert(EpilogPN->getBasicBlockIndex(EpilogPreHeader) >= 0 && + "EpilogPN should have EpilogPreHeader incoming block"); + // Change EpilogPreHeader incoming block to NewExit. + EpilogPN->setIncomingBlock(EpilogPN->getBasicBlockIndex(EpilogPreHeader), + NewExit); + // Now PHIs should look like: + // NewExit: + // PN = PHI [I, Latch], [undef, PreHeader] + // ... + // Exit: + // EpilogPN = PHI [PN, NewExit], [VMap[I], EpilogLatch] + } + + // Create PHI nodes at NewExit (from the unrolling loop Latch and PreHeader). + // Update corresponding PHI nodes in epilog loop. + for (BasicBlock *Succ : successors(Latch)) { + // Skip this as we already updated phis in exit blocks. + if (!L->contains(Succ)) + continue; + for (PHINode &PN : Succ->phis()) { + // Add new PHI nodes to the loop exit block and update epilog + // PHIs with the new PHI values. + PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr", + NewExit->getFirstNonPHI()); + // Adding a value to the new PHI node from the unrolling loop preheader. + NewPN->addIncoming(PN.getIncomingValueForBlock(NewPreHeader), PreHeader); + // Adding a value to the new PHI node from the unrolling loop latch. + NewPN->addIncoming(PN.getIncomingValueForBlock(Latch), Latch); + + // Update the existing PHI node operand with the value from the new PHI + // node. Corresponding instruction in epilog loop should be PHI. + PHINode *VPN = cast<PHINode>(VMap[&PN]); + VPN->setIncomingValueForBlock(EpilogPreHeader, NewPN); + } + } + + Instruction *InsertPt = NewExit->getTerminator(); + IRBuilder<> B(InsertPt); + Value *BrLoopExit = B.CreateIsNotNull(ModVal, "lcmp.mod"); + assert(Exit && "Loop must have a single exit block only"); + // Split the epilogue exit to maintain loop canonicalization guarantees + SmallVector<BasicBlock*, 4> Preds(predecessors(Exit)); + SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI, nullptr, + PreserveLCSSA); + // Add the branch to the exit block (around the unrolling loop) + B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit); + InsertPt->eraseFromParent(); + if (DT) { + auto *NewDom = DT->findNearestCommonDominator(Exit, NewExit); + DT->changeImmediateDominator(Exit, NewDom); + } + + // Split the main loop exit to maintain canonicalization guarantees. + SmallVector<BasicBlock*, 4> NewExitPreds{Latch}; + SplitBlockPredecessors(NewExit, NewExitPreds, ".loopexit", DT, LI, nullptr, + PreserveLCSSA); +} + +/// Create a clone of the blocks in a loop and connect them together. A new +/// loop will be created including all cloned blocks, and the iterator of the +/// new loop switched to count NewIter down to 0. +/// The cloned blocks should be inserted between InsertTop and InsertBot. +/// InsertTop should be new preheader, InsertBot new loop exit. +/// Returns the new cloned loop that is created. +static Loop * +CloneLoopBlocks(Loop *L, Value *NewIter, const bool UseEpilogRemainder, + const bool UnrollRemainder, + BasicBlock *InsertTop, + BasicBlock *InsertBot, BasicBlock *Preheader, + std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks, + ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI) { + StringRef suffix = UseEpilogRemainder ? "epil" : "prol"; + BasicBlock *Header = L->getHeader(); + BasicBlock *Latch = L->getLoopLatch(); + Function *F = Header->getParent(); + LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO(); + LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO(); + Loop *ParentLoop = L->getParentLoop(); + NewLoopsMap NewLoops; + NewLoops[ParentLoop] = ParentLoop; + + // For each block in the original loop, create a new copy, + // and update the value map with the newly created values. + for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { + BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F); + NewBlocks.push_back(NewBB); + + addClonedBlockToLoopInfo(*BB, NewBB, LI, NewLoops); + + VMap[*BB] = NewBB; + if (Header == *BB) { + // For the first block, add a CFG connection to this newly + // created block. + InsertTop->getTerminator()->setSuccessor(0, NewBB); + } + + if (DT) { + if (Header == *BB) { + // The header is dominated by the preheader. + DT->addNewBlock(NewBB, InsertTop); + } else { + // Copy information from original loop to unrolled loop. + BasicBlock *IDomBB = DT->getNode(*BB)->getIDom()->getBlock(); + DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDomBB])); + } + } + + if (Latch == *BB) { + // For the last block, create a loop back to cloned head. + VMap.erase((*BB)->getTerminator()); + // Use an incrementing IV. Pre-incr/post-incr is backedge/trip count. + // Subtle: NewIter can be 0 if we wrapped when computing the trip count, + // thus we must compare the post-increment (wrapping) value. + BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]); + BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator()); + IRBuilder<> Builder(LatchBR); + PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2, + suffix + ".iter", + FirstLoopBB->getFirstNonPHI()); + auto *Zero = ConstantInt::get(NewIdx->getType(), 0); + auto *One = ConstantInt::get(NewIdx->getType(), 1); + Value *IdxNext = Builder.CreateAdd(NewIdx, One, NewIdx->getName() + ".next"); + Value *IdxCmp = Builder.CreateICmpNE(IdxNext, NewIter, NewIdx->getName() + ".cmp"); + Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot); + NewIdx->addIncoming(Zero, InsertTop); + NewIdx->addIncoming(IdxNext, NewBB); + LatchBR->eraseFromParent(); + } + } + + // Change the incoming values to the ones defined in the preheader or + // cloned loop. + for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { + PHINode *NewPHI = cast<PHINode>(VMap[&*I]); + unsigned idx = NewPHI->getBasicBlockIndex(Preheader); + NewPHI->setIncomingBlock(idx, InsertTop); + BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]); + idx = NewPHI->getBasicBlockIndex(Latch); + Value *InVal = NewPHI->getIncomingValue(idx); + NewPHI->setIncomingBlock(idx, NewLatch); + if (Value *V = VMap.lookup(InVal)) + NewPHI->setIncomingValue(idx, V); + } + + Loop *NewLoop = NewLoops[L]; + assert(NewLoop && "L should have been cloned"); + MDNode *LoopID = NewLoop->getLoopID(); + + // Only add loop metadata if the loop is not going to be completely + // unrolled. + if (UnrollRemainder) + return NewLoop; + + std::optional<MDNode *> NewLoopID = makeFollowupLoopID( + LoopID, {LLVMLoopUnrollFollowupAll, LLVMLoopUnrollFollowupRemainder}); + if (NewLoopID) { + NewLoop->setLoopID(*NewLoopID); + + // Do not setLoopAlreadyUnrolled if loop attributes have been defined + // explicitly. + return NewLoop; + } + + // Add unroll disable metadata to disable future unrolling for this loop. + NewLoop->setLoopAlreadyUnrolled(); + return NewLoop; +} + +/// Returns true if we can profitably unroll the multi-exit loop L. Currently, +/// we return true only if UnrollRuntimeMultiExit is set to true. +static bool canProfitablyUnrollMultiExitLoop( + Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits, BasicBlock *LatchExit, + bool UseEpilogRemainder) { + + // Priority goes to UnrollRuntimeMultiExit if it's supplied. + if (UnrollRuntimeMultiExit.getNumOccurrences()) + return UnrollRuntimeMultiExit; + + // The main pain point with multi-exit loop unrolling is that once unrolled, + // we will not be able to merge all blocks into a straight line code. + // There are branches within the unrolled loop that go to the OtherExits. + // The second point is the increase in code size, but this is true + // irrespective of multiple exits. + + // Note: Both the heuristics below are coarse grained. We are essentially + // enabling unrolling of loops that have a single side exit other than the + // normal LatchExit (i.e. exiting into a deoptimize block). + // The heuristics considered are: + // 1. low number of branches in the unrolled version. + // 2. high predictability of these extra branches. + // We avoid unrolling loops that have more than two exiting blocks. This + // limits the total number of branches in the unrolled loop to be atmost + // the unroll factor (since one of the exiting blocks is the latch block). + SmallVector<BasicBlock*, 4> ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + if (ExitingBlocks.size() > 2) + return false; + + // Allow unrolling of loops with no non latch exit blocks. + if (OtherExits.size() == 0) + return true; + + // The second heuristic is that L has one exit other than the latchexit and + // that exit is a deoptimize block. We know that deoptimize blocks are rarely + // taken, which also implies the branch leading to the deoptimize block is + // highly predictable. When UnrollRuntimeOtherExitPredictable is specified, we + // assume the other exit branch is predictable even if it has no deoptimize + // call. + return (OtherExits.size() == 1 && + (UnrollRuntimeOtherExitPredictable || + OtherExits[0]->getTerminatingDeoptimizeCall())); + // TODO: These can be fine-tuned further to consider code size or deopt states + // that are captured by the deoptimize exit block. + // Also, we can extend this to support more cases, if we actually + // know of kinds of multiexit loops that would benefit from unrolling. +} + +// Assign the maximum possible trip count as the back edge weight for the +// remainder loop if the original loop comes with a branch weight. +static void updateLatchBranchWeightsForRemainderLoop(Loop *OrigLoop, + Loop *RemainderLoop, + uint64_t UnrollFactor) { + uint64_t TrueWeight, FalseWeight; + BranchInst *LatchBR = + cast<BranchInst>(OrigLoop->getLoopLatch()->getTerminator()); + if (!extractBranchWeights(*LatchBR, TrueWeight, FalseWeight)) + return; + uint64_t ExitWeight = LatchBR->getSuccessor(0) == OrigLoop->getHeader() + ? FalseWeight + : TrueWeight; + assert(UnrollFactor > 1); + uint64_t BackEdgeWeight = (UnrollFactor - 1) * ExitWeight; + BasicBlock *Header = RemainderLoop->getHeader(); + BasicBlock *Latch = RemainderLoop->getLoopLatch(); + auto *RemainderLatchBR = cast<BranchInst>(Latch->getTerminator()); + unsigned HeaderIdx = (RemainderLatchBR->getSuccessor(0) == Header ? 0 : 1); + MDBuilder MDB(RemainderLatchBR->getContext()); + MDNode *WeightNode = + HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight) + : MDB.createBranchWeights(BackEdgeWeight, ExitWeight); + RemainderLatchBR->setMetadata(LLVMContext::MD_prof, WeightNode); +} + +/// Calculate ModVal = (BECount + 1) % Count on the abstract integer domain +/// accounting for the possibility of unsigned overflow in the 2s complement +/// domain. Preconditions: +/// 1) TripCount = BECount + 1 (allowing overflow) +/// 2) Log2(Count) <= BitWidth(BECount) +static Value *CreateTripRemainder(IRBuilder<> &B, Value *BECount, + Value *TripCount, unsigned Count) { + // Note that TripCount is BECount + 1. + if (isPowerOf2_32(Count)) + // If the expression is zero, then either: + // 1. There are no iterations to be run in the prolog/epilog loop. + // OR + // 2. The addition computing TripCount overflowed. + // + // If (2) is true, we know that TripCount really is (1 << BEWidth) and so + // the number of iterations that remain to be run in the original loop is a + // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (a + // precondition of this method). + return B.CreateAnd(TripCount, Count - 1, "xtraiter"); + + // As (BECount + 1) can potentially unsigned overflow we count + // (BECount % Count) + 1 which is overflow safe as BECount % Count < Count. + Constant *CountC = ConstantInt::get(BECount->getType(), Count); + Value *ModValTmp = B.CreateURem(BECount, CountC); + Value *ModValAdd = B.CreateAdd(ModValTmp, + ConstantInt::get(ModValTmp->getType(), 1)); + // At that point (BECount % Count) + 1 could be equal to Count. + // To handle this case we need to take mod by Count one more time. + return B.CreateURem(ModValAdd, CountC, "xtraiter"); +} + + +/// Insert code in the prolog/epilog code when unrolling a loop with a +/// run-time trip-count. +/// +/// This method assumes that the loop unroll factor is total number +/// of loop bodies in the loop after unrolling. (Some folks refer +/// to the unroll factor as the number of *extra* copies added). +/// We assume also that the loop unroll factor is a power-of-two. So, after +/// unrolling the loop, the number of loop bodies executed is 2, +/// 4, 8, etc. Note - LLVM converts the if-then-sequence to a switch +/// instruction in SimplifyCFG.cpp. Then, the backend decides how code for +/// the switch instruction is generated. +/// +/// ***Prolog case*** +/// extraiters = tripcount % loopfactor +/// if (extraiters == 0) jump Loop: +/// else jump Prol: +/// Prol: LoopBody; +/// extraiters -= 1 // Omitted if unroll factor is 2. +/// if (extraiters != 0) jump Prol: // Omitted if unroll factor is 2. +/// if (tripcount < loopfactor) jump End: +/// Loop: +/// ... +/// End: +/// +/// ***Epilog case*** +/// extraiters = tripcount % loopfactor +/// if (tripcount < loopfactor) jump LoopExit: +/// unroll_iters = tripcount - extraiters +/// Loop: LoopBody; (executes unroll_iter times); +/// unroll_iter -= 1 +/// if (unroll_iter != 0) jump Loop: +/// LoopExit: +/// if (extraiters == 0) jump EpilExit: +/// Epil: LoopBody; (executes extraiters times) +/// extraiters -= 1 // Omitted if unroll factor is 2. +/// if (extraiters != 0) jump Epil: // Omitted if unroll factor is 2. +/// EpilExit: + +bool llvm::UnrollRuntimeLoopRemainder( + Loop *L, unsigned Count, bool AllowExpensiveTripCount, + bool UseEpilogRemainder, bool UnrollRemainder, bool ForgetAllSCEV, + LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, + const TargetTransformInfo *TTI, bool PreserveLCSSA, Loop **ResultLoop) { + LLVM_DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n"); + LLVM_DEBUG(L->dump()); + LLVM_DEBUG(UseEpilogRemainder ? dbgs() << "Using epilog remainder.\n" + : dbgs() << "Using prolog remainder.\n"); + + // Make sure the loop is in canonical form. + if (!L->isLoopSimplifyForm()) { + LLVM_DEBUG(dbgs() << "Not in simplify form!\n"); + return false; + } + + // Guaranteed by LoopSimplifyForm. + BasicBlock *Latch = L->getLoopLatch(); + BasicBlock *Header = L->getHeader(); + + BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator()); + + if (!LatchBR || LatchBR->isUnconditional()) { + // The loop-rotate pass can be helpful to avoid this in many cases. + LLVM_DEBUG( + dbgs() + << "Loop latch not terminated by a conditional branch.\n"); + return false; + } + + unsigned ExitIndex = LatchBR->getSuccessor(0) == Header ? 1 : 0; + BasicBlock *LatchExit = LatchBR->getSuccessor(ExitIndex); + + if (L->contains(LatchExit)) { + // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the + // targets of the Latch be an exit block out of the loop. + LLVM_DEBUG( + dbgs() + << "One of the loop latch successors must be the exit block.\n"); + return false; + } + + // These are exit blocks other than the target of the latch exiting block. + SmallVector<BasicBlock *, 4> OtherExits; + L->getUniqueNonLatchExitBlocks(OtherExits); + // Support only single exit and exiting block unless multi-exit loop + // unrolling is enabled. + if (!L->getExitingBlock() || OtherExits.size()) { + // We rely on LCSSA form being preserved when the exit blocks are transformed. + // (Note that only an off-by-default mode of the old PM disables PreserveLCCA.) + if (!PreserveLCSSA) + return false; + + if (!canProfitablyUnrollMultiExitLoop(L, OtherExits, LatchExit, + UseEpilogRemainder)) { + LLVM_DEBUG( + dbgs() + << "Multiple exit/exiting blocks in loop and multi-exit unrolling not " + "enabled!\n"); + return false; + } + } + // Use Scalar Evolution to compute the trip count. This allows more loops to + // be unrolled than relying on induction var simplification. + if (!SE) + return false; + + // Only unroll loops with a computable trip count. + // We calculate the backedge count by using getExitCount on the Latch block, + // which is proven to be the only exiting block in this loop. This is same as + // calculating getBackedgeTakenCount on the loop (which computes SCEV for all + // exiting blocks). + const SCEV *BECountSC = SE->getExitCount(L, Latch); + if (isa<SCEVCouldNotCompute>(BECountSC)) { + LLVM_DEBUG(dbgs() << "Could not compute exit block SCEV\n"); + return false; + } + + unsigned BEWidth = cast<IntegerType>(BECountSC->getType())->getBitWidth(); + + // Add 1 since the backedge count doesn't include the first loop iteration. + // (Note that overflow can occur, this is handled explicitly below) + const SCEV *TripCountSC = + SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1)); + if (isa<SCEVCouldNotCompute>(TripCountSC)) { + LLVM_DEBUG(dbgs() << "Could not compute trip count SCEV.\n"); + return false; + } + + BasicBlock *PreHeader = L->getLoopPreheader(); + BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator()); + const DataLayout &DL = Header->getModule()->getDataLayout(); + SCEVExpander Expander(*SE, DL, "loop-unroll"); + if (!AllowExpensiveTripCount && + Expander.isHighCostExpansion(TripCountSC, L, SCEVCheapExpansionBudget, + TTI, PreHeaderBR)) { + LLVM_DEBUG(dbgs() << "High cost for expanding trip count scev!\n"); + return false; + } + + // This constraint lets us deal with an overflowing trip count easily; see the + // comment on ModVal below. + if (Log2_32(Count) > BEWidth) { + LLVM_DEBUG( + dbgs() + << "Count failed constraint on overflow trip count calculation.\n"); + return false; + } + + // Loop structure is the following: + // + // PreHeader + // Header + // ... + // Latch + // LatchExit + + BasicBlock *NewPreHeader; + BasicBlock *NewExit = nullptr; + BasicBlock *PrologExit = nullptr; + BasicBlock *EpilogPreHeader = nullptr; + BasicBlock *PrologPreHeader = nullptr; + + if (UseEpilogRemainder) { + // If epilog remainder + // Split PreHeader to insert a branch around loop for unrolling. + NewPreHeader = SplitBlock(PreHeader, PreHeader->getTerminator(), DT, LI); + NewPreHeader->setName(PreHeader->getName() + ".new"); + // Split LatchExit to create phi nodes from branch above. + NewExit = SplitBlockPredecessors(LatchExit, {Latch}, ".unr-lcssa", DT, LI, + nullptr, PreserveLCSSA); + // NewExit gets its DebugLoc from LatchExit, which is not part of the + // original Loop. + // Fix this by setting Loop's DebugLoc to NewExit. + auto *NewExitTerminator = NewExit->getTerminator(); + NewExitTerminator->setDebugLoc(Header->getTerminator()->getDebugLoc()); + // Split NewExit to insert epilog remainder loop. + EpilogPreHeader = SplitBlock(NewExit, NewExitTerminator, DT, LI); + EpilogPreHeader->setName(Header->getName() + ".epil.preheader"); + + // If the latch exits from multiple level of nested loops, then + // by assumption there must be another loop exit which branches to the + // outer loop and we must adjust the loop for the newly inserted blocks + // to account for the fact that our epilogue is still in the same outer + // loop. Note that this leaves loopinfo temporarily out of sync with the + // CFG until the actual epilogue loop is inserted. + if (auto *ParentL = L->getParentLoop()) + if (LI->getLoopFor(LatchExit) != ParentL) { + LI->removeBlock(NewExit); + ParentL->addBasicBlockToLoop(NewExit, *LI); + LI->removeBlock(EpilogPreHeader); + ParentL->addBasicBlockToLoop(EpilogPreHeader, *LI); + } + + } else { + // If prolog remainder + // Split the original preheader twice to insert prolog remainder loop + PrologPreHeader = SplitEdge(PreHeader, Header, DT, LI); + PrologPreHeader->setName(Header->getName() + ".prol.preheader"); + PrologExit = SplitBlock(PrologPreHeader, PrologPreHeader->getTerminator(), + DT, LI); + PrologExit->setName(Header->getName() + ".prol.loopexit"); + // Split PrologExit to get NewPreHeader. + NewPreHeader = SplitBlock(PrologExit, PrologExit->getTerminator(), DT, LI); + NewPreHeader->setName(PreHeader->getName() + ".new"); + } + // Loop structure should be the following: + // Epilog Prolog + // + // PreHeader PreHeader + // *NewPreHeader *PrologPreHeader + // Header *PrologExit + // ... *NewPreHeader + // Latch Header + // *NewExit ... + // *EpilogPreHeader Latch + // LatchExit LatchExit + + // Calculate conditions for branch around loop for unrolling + // in epilog case and around prolog remainder loop in prolog case. + // Compute the number of extra iterations required, which is: + // extra iterations = run-time trip count % loop unroll factor + PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator()); + IRBuilder<> B(PreHeaderBR); + Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(), + PreHeaderBR); + Value *BECount; + // If there are other exits before the latch, that may cause the latch exit + // branch to never be executed, and the latch exit count may be poison. + // In this case, freeze the TripCount and base BECount on the frozen + // TripCount. We will introduce two branches using these values, and it's + // important that they see a consistent value (which would not be guaranteed + // if were frozen independently.) + if ((!OtherExits.empty() || !SE->loopHasNoAbnormalExits(L)) && + !isGuaranteedNotToBeUndefOrPoison(TripCount, AC, PreHeaderBR, DT)) { + TripCount = B.CreateFreeze(TripCount); + BECount = + B.CreateAdd(TripCount, ConstantInt::get(TripCount->getType(), -1)); + } else { + // If we don't need to freeze, use SCEVExpander for BECount as well, to + // allow slightly better value reuse. + BECount = + Expander.expandCodeFor(BECountSC, BECountSC->getType(), PreHeaderBR); + } + + Value * const ModVal = CreateTripRemainder(B, BECount, TripCount, Count); + + Value *BranchVal = + UseEpilogRemainder ? B.CreateICmpULT(BECount, + ConstantInt::get(BECount->getType(), + Count - 1)) : + B.CreateIsNotNull(ModVal, "lcmp.mod"); + BasicBlock *RemainderLoop = UseEpilogRemainder ? NewExit : PrologPreHeader; + BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit; + // Branch to either remainder (extra iterations) loop or unrolling loop. + B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop); + PreHeaderBR->eraseFromParent(); + if (DT) { + if (UseEpilogRemainder) + DT->changeImmediateDominator(NewExit, PreHeader); + else + DT->changeImmediateDominator(PrologExit, PreHeader); + } + Function *F = Header->getParent(); + // Get an ordered list of blocks in the loop to help with the ordering of the + // cloned blocks in the prolog/epilog code + LoopBlocksDFS LoopBlocks(L); + LoopBlocks.perform(LI); + + // + // For each extra loop iteration, create a copy of the loop's basic blocks + // and generate a condition that branches to the copy depending on the + // number of 'left over' iterations. + // + std::vector<BasicBlock *> NewBlocks; + ValueToValueMapTy VMap; + + // Clone all the basic blocks in the loop. If Count is 2, we don't clone + // the loop, otherwise we create a cloned loop to execute the extra + // iterations. This function adds the appropriate CFG connections. + BasicBlock *InsertBot = UseEpilogRemainder ? LatchExit : PrologExit; + BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader; + Loop *remainderLoop = CloneLoopBlocks( + L, ModVal, UseEpilogRemainder, UnrollRemainder, InsertTop, InsertBot, + NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI); + + // Assign the maximum possible trip count as the back edge weight for the + // remainder loop if the original loop comes with a branch weight. + if (remainderLoop && !UnrollRemainder) + updateLatchBranchWeightsForRemainderLoop(L, remainderLoop, Count); + + // Insert the cloned blocks into the function. + F->splice(InsertBot->getIterator(), F, NewBlocks[0]->getIterator(), F->end()); + + // Now the loop blocks are cloned and the other exiting blocks from the + // remainder are connected to the original Loop's exit blocks. The remaining + // work is to update the phi nodes in the original loop, and take in the + // values from the cloned region. + for (auto *BB : OtherExits) { + // Given we preserve LCSSA form, we know that the values used outside the + // loop will be used through these phi nodes at the exit blocks that are + // transformed below. + for (PHINode &PN : BB->phis()) { + unsigned oldNumOperands = PN.getNumIncomingValues(); + // Add the incoming values from the remainder code to the end of the phi + // node. + for (unsigned i = 0; i < oldNumOperands; i++){ + auto *PredBB =PN.getIncomingBlock(i); + if (PredBB == Latch) + // The latch exit is handled seperately, see connectX + continue; + if (!L->contains(PredBB)) + // Even if we had dedicated exits, the code above inserted an + // extra branch which can reach the latch exit. + continue; + + auto *V = PN.getIncomingValue(i); + if (Instruction *I = dyn_cast<Instruction>(V)) + if (L->contains(I)) + V = VMap.lookup(I); + PN.addIncoming(V, cast<BasicBlock>(VMap[PredBB])); + } + } +#if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG) + for (BasicBlock *SuccBB : successors(BB)) { + assert(!(llvm::is_contained(OtherExits, SuccBB) || SuccBB == LatchExit) && + "Breaks the definition of dedicated exits!"); + } +#endif + } + + // Update the immediate dominator of the exit blocks and blocks that are + // reachable from the exit blocks. This is needed because we now have paths + // from both the original loop and the remainder code reaching the exit + // blocks. While the IDom of these exit blocks were from the original loop, + // now the IDom is the preheader (which decides whether the original loop or + // remainder code should run). + if (DT && !L->getExitingBlock()) { + SmallVector<BasicBlock *, 16> ChildrenToUpdate; + // NB! We have to examine the dom children of all loop blocks, not just + // those which are the IDom of the exit blocks. This is because blocks + // reachable from the exit blocks can have their IDom as the nearest common + // dominator of the exit blocks. + for (auto *BB : L->blocks()) { + auto *DomNodeBB = DT->getNode(BB); + for (auto *DomChild : DomNodeBB->children()) { + auto *DomChildBB = DomChild->getBlock(); + if (!L->contains(LI->getLoopFor(DomChildBB))) + ChildrenToUpdate.push_back(DomChildBB); + } + } + for (auto *BB : ChildrenToUpdate) + DT->changeImmediateDominator(BB, PreHeader); + } + + // Loop structure should be the following: + // Epilog Prolog + // + // PreHeader PreHeader + // NewPreHeader PrologPreHeader + // Header PrologHeader + // ... ... + // Latch PrologLatch + // NewExit PrologExit + // EpilogPreHeader NewPreHeader + // EpilogHeader Header + // ... ... + // EpilogLatch Latch + // LatchExit LatchExit + + // Rewrite the cloned instruction operands to use the values created when the + // clone is created. + for (BasicBlock *BB : NewBlocks) { + for (Instruction &I : *BB) { + RemapInstruction(&I, VMap, + RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); + } + } + + if (UseEpilogRemainder) { + // Connect the epilog code to the original loop and update the + // PHI functions. + ConnectEpilog(L, ModVal, NewExit, LatchExit, PreHeader, EpilogPreHeader, + NewPreHeader, VMap, DT, LI, PreserveLCSSA, *SE); + + // Update counter in loop for unrolling. + // Use an incrementing IV. Pre-incr/post-incr is backedge/trip count. + // Subtle: TestVal can be 0 if we wrapped when computing the trip count, + // thus we must compare the post-increment (wrapping) value. + IRBuilder<> B2(NewPreHeader->getTerminator()); + Value *TestVal = B2.CreateSub(TripCount, ModVal, "unroll_iter"); + BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator()); + PHINode *NewIdx = PHINode::Create(TestVal->getType(), 2, "niter", + Header->getFirstNonPHI()); + B2.SetInsertPoint(LatchBR); + auto *Zero = ConstantInt::get(NewIdx->getType(), 0); + auto *One = ConstantInt::get(NewIdx->getType(), 1); + Value *IdxNext = B2.CreateAdd(NewIdx, One, NewIdx->getName() + ".next"); + auto Pred = LatchBR->getSuccessor(0) == Header ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ; + Value *IdxCmp = B2.CreateICmp(Pred, IdxNext, TestVal, NewIdx->getName() + ".ncmp"); + NewIdx->addIncoming(Zero, NewPreHeader); + NewIdx->addIncoming(IdxNext, Latch); + LatchBR->setCondition(IdxCmp); + } else { + // Connect the prolog code to the original loop and update the + // PHI functions. + ConnectProlog(L, BECount, Count, PrologExit, LatchExit, PreHeader, + NewPreHeader, VMap, DT, LI, PreserveLCSSA, *SE); + } + + // If this loop is nested, then the loop unroller changes the code in the any + // of its parent loops, so the Scalar Evolution pass needs to be run again. + SE->forgetTopmostLoop(L); + + // Verify that the Dom Tree and Loop Info are correct. +#if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG) + if (DT) { + assert(DT->verify(DominatorTree::VerificationLevel::Full)); + LI->verify(*DT); + } +#endif + + // For unroll factor 2 remainder loop will have 1 iteration. + if (Count == 2 && DT && LI && SE) { + // TODO: This code could probably be pulled out into a helper function + // (e.g. breakLoopBackedgeAndSimplify) and reused in loop-deletion. + BasicBlock *RemainderLatch = remainderLoop->getLoopLatch(); + assert(RemainderLatch); + SmallVector<BasicBlock*> RemainderBlocks(remainderLoop->getBlocks().begin(), + remainderLoop->getBlocks().end()); + breakLoopBackedge(remainderLoop, *DT, *SE, *LI, nullptr); + remainderLoop = nullptr; + + // Simplify loop values after breaking the backedge + const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); + SmallVector<WeakTrackingVH, 16> DeadInsts; + for (BasicBlock *BB : RemainderBlocks) { + for (Instruction &Inst : llvm::make_early_inc_range(*BB)) { + if (Value *V = simplifyInstruction(&Inst, {DL, nullptr, DT, AC})) + if (LI->replacementPreservesLCSSAForm(&Inst, V)) + Inst.replaceAllUsesWith(V); + if (isInstructionTriviallyDead(&Inst)) + DeadInsts.emplace_back(&Inst); + } + // We can't do recursive deletion until we're done iterating, as we might + // have a phi which (potentially indirectly) uses instructions later in + // the block we're iterating through. + RecursivelyDeleteTriviallyDeadInstructions(DeadInsts); + } + + // Merge latch into exit block. + auto *ExitBB = RemainderLatch->getSingleSuccessor(); + assert(ExitBB && "required after breaking cond br backedge"); + DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); + MergeBlockIntoPredecessor(ExitBB, &DTU, LI); + } + + // Canonicalize to LoopSimplifyForm both original and remainder loops. We + // cannot rely on the LoopUnrollPass to do this because it only does + // canonicalization for parent/subloops and not the sibling loops. + if (OtherExits.size() > 0) { + // Generate dedicated exit blocks for the original loop, to preserve + // LoopSimplifyForm. + formDedicatedExitBlocks(L, DT, LI, nullptr, PreserveLCSSA); + // Generate dedicated exit blocks for the remainder loop if one exists, to + // preserve LoopSimplifyForm. + if (remainderLoop) + formDedicatedExitBlocks(remainderLoop, DT, LI, nullptr, PreserveLCSSA); + } + + auto UnrollResult = LoopUnrollResult::Unmodified; + if (remainderLoop && UnrollRemainder) { + LLVM_DEBUG(dbgs() << "Unrolling remainder loop\n"); + UnrollResult = + UnrollLoop(remainderLoop, + {/*Count*/ Count - 1, /*Force*/ false, /*Runtime*/ false, + /*AllowExpensiveTripCount*/ false, + /*UnrollRemainder*/ false, ForgetAllSCEV}, + LI, SE, DT, AC, TTI, /*ORE*/ nullptr, PreserveLCSSA); + } + + if (ResultLoop && UnrollResult != LoopUnrollResult::FullyUnrolled) + *ResultLoop = remainderLoop; + NumRuntimeUnrolled++; + return true; +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/LoopUtils.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/LoopUtils.cpp new file mode 100644 index 0000000000..7df8651ede --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/LoopUtils.cpp @@ -0,0 +1,1877 @@ +//===-- LoopUtils.cpp - Loop Utility functions -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines common loop utility functions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/PriorityWorklist.h" +#include "llvm/ADT/ScopeExit.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/DomTreeUpdater.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/InstSimplifyFolder.h" +#include "llvm/Analysis/LoopAccessAnalysis.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/MemorySSAUpdater.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/IR/DIBuilder.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/IR/ProfDataUtils.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" + +using namespace llvm; +using namespace llvm::PatternMatch; + +#define DEBUG_TYPE "loop-utils" + +static const char *LLVMLoopDisableNonforced = "llvm.loop.disable_nonforced"; +static const char *LLVMLoopDisableLICM = "llvm.licm.disable"; + +bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, + MemorySSAUpdater *MSSAU, + bool PreserveLCSSA) { + bool Changed = false; + + // We re-use a vector for the in-loop predecesosrs. + SmallVector<BasicBlock *, 4> InLoopPredecessors; + + auto RewriteExit = [&](BasicBlock *BB) { + assert(InLoopPredecessors.empty() && + "Must start with an empty predecessors list!"); + auto Cleanup = make_scope_exit([&] { InLoopPredecessors.clear(); }); + + // See if there are any non-loop predecessors of this exit block and + // keep track of the in-loop predecessors. + bool IsDedicatedExit = true; + for (auto *PredBB : predecessors(BB)) + if (L->contains(PredBB)) { + if (isa<IndirectBrInst>(PredBB->getTerminator())) + // We cannot rewrite exiting edges from an indirectbr. + return false; + + InLoopPredecessors.push_back(PredBB); + } else { + IsDedicatedExit = false; + } + + assert(!InLoopPredecessors.empty() && "Must have *some* loop predecessor!"); + + // Nothing to do if this is already a dedicated exit. + if (IsDedicatedExit) + return false; + + auto *NewExitBB = SplitBlockPredecessors( + BB, InLoopPredecessors, ".loopexit", DT, LI, MSSAU, PreserveLCSSA); + + if (!NewExitBB) + LLVM_DEBUG( + dbgs() << "WARNING: Can't create a dedicated exit block for loop: " + << *L << "\n"); + else + LLVM_DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block " + << NewExitBB->getName() << "\n"); + return true; + }; + + // Walk the exit blocks directly rather than building up a data structure for + // them, but only visit each one once. + SmallPtrSet<BasicBlock *, 4> Visited; + for (auto *BB : L->blocks()) + for (auto *SuccBB : successors(BB)) { + // We're looking for exit blocks so skip in-loop successors. + if (L->contains(SuccBB)) + continue; + + // Visit each exit block exactly once. + if (!Visited.insert(SuccBB).second) + continue; + + Changed |= RewriteExit(SuccBB); + } + + return Changed; +} + +/// Returns the instructions that use values defined in the loop. +SmallVector<Instruction *, 8> llvm::findDefsUsedOutsideOfLoop(Loop *L) { + SmallVector<Instruction *, 8> UsedOutside; + + for (auto *Block : L->getBlocks()) + // FIXME: I believe that this could use copy_if if the Inst reference could + // be adapted into a pointer. + for (auto &Inst : *Block) { + auto Users = Inst.users(); + if (any_of(Users, [&](User *U) { + auto *Use = cast<Instruction>(U); + return !L->contains(Use->getParent()); + })) + UsedOutside.push_back(&Inst); + } + + return UsedOutside; +} + +void llvm::getLoopAnalysisUsage(AnalysisUsage &AU) { + // By definition, all loop passes need the LoopInfo analysis and the + // Dominator tree it depends on. Because they all participate in the loop + // pass manager, they must also preserve these. + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addPreserved<DominatorTreeWrapperPass>(); + AU.addRequired<LoopInfoWrapperPass>(); + AU.addPreserved<LoopInfoWrapperPass>(); + + // We must also preserve LoopSimplify and LCSSA. We locally access their IDs + // here because users shouldn't directly get them from this header. + extern char &LoopSimplifyID; + extern char &LCSSAID; + AU.addRequiredID(LoopSimplifyID); + AU.addPreservedID(LoopSimplifyID); + AU.addRequiredID(LCSSAID); + AU.addPreservedID(LCSSAID); + // This is used in the LPPassManager to perform LCSSA verification on passes + // which preserve lcssa form + AU.addRequired<LCSSAVerificationPass>(); + AU.addPreserved<LCSSAVerificationPass>(); + + // Loop passes are designed to run inside of a loop pass manager which means + // that any function analyses they require must be required by the first loop + // pass in the manager (so that it is computed before the loop pass manager + // runs) and preserved by all loop pasess in the manager. To make this + // reasonably robust, the set needed for most loop passes is maintained here. + // If your loop pass requires an analysis not listed here, you will need to + // carefully audit the loop pass manager nesting structure that results. + AU.addRequired<AAResultsWrapperPass>(); + AU.addPreserved<AAResultsWrapperPass>(); + AU.addPreserved<BasicAAWrapperPass>(); + AU.addPreserved<GlobalsAAWrapperPass>(); + AU.addPreserved<SCEVAAWrapperPass>(); + AU.addRequired<ScalarEvolutionWrapperPass>(); + AU.addPreserved<ScalarEvolutionWrapperPass>(); + // FIXME: When all loop passes preserve MemorySSA, it can be required and + // preserved here instead of the individual handling in each pass. +} + +/// Manually defined generic "LoopPass" dependency initialization. This is used +/// to initialize the exact set of passes from above in \c +/// getLoopAnalysisUsage. It can be used within a loop pass's initialization +/// with: +/// +/// INITIALIZE_PASS_DEPENDENCY(LoopPass) +/// +/// As-if "LoopPass" were a pass. +void llvm::initializeLoopPassPass(PassRegistry &Registry) { + INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) + INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) + INITIALIZE_PASS_DEPENDENCY(LoopSimplify) + INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass) + INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) + INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass) + INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) + INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass) + INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) + INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) +} + +/// Create MDNode for input string. +static MDNode *createStringMetadata(Loop *TheLoop, StringRef Name, unsigned V) { + LLVMContext &Context = TheLoop->getHeader()->getContext(); + Metadata *MDs[] = { + MDString::get(Context, Name), + ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(Context), V))}; + return MDNode::get(Context, MDs); +} + +/// Set input string into loop metadata by keeping other values intact. +/// If the string is already in loop metadata update value if it is +/// different. +void llvm::addStringMetadataToLoop(Loop *TheLoop, const char *StringMD, + unsigned V) { + SmallVector<Metadata *, 4> MDs(1); + // If the loop already has metadata, retain it. + MDNode *LoopID = TheLoop->getLoopID(); + if (LoopID) { + for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { + MDNode *Node = cast<MDNode>(LoopID->getOperand(i)); + // If it is of form key = value, try to parse it. + if (Node->getNumOperands() == 2) { + MDString *S = dyn_cast<MDString>(Node->getOperand(0)); + if (S && S->getString().equals(StringMD)) { + ConstantInt *IntMD = + mdconst::extract_or_null<ConstantInt>(Node->getOperand(1)); + if (IntMD && IntMD->getSExtValue() == V) + // It is already in place. Do nothing. + return; + // We need to update the value, so just skip it here and it will + // be added after copying other existed nodes. + continue; + } + } + MDs.push_back(Node); + } + } + // Add new metadata. + MDs.push_back(createStringMetadata(TheLoop, StringMD, V)); + // Replace current metadata node with new one. + LLVMContext &Context = TheLoop->getHeader()->getContext(); + MDNode *NewLoopID = MDNode::get(Context, MDs); + // Set operand 0 to refer to the loop id itself. + NewLoopID->replaceOperandWith(0, NewLoopID); + TheLoop->setLoopID(NewLoopID); +} + +std::optional<ElementCount> +llvm::getOptionalElementCountLoopAttribute(const Loop *TheLoop) { + std::optional<int> Width = + getOptionalIntLoopAttribute(TheLoop, "llvm.loop.vectorize.width"); + + if (Width) { + std::optional<int> IsScalable = getOptionalIntLoopAttribute( + TheLoop, "llvm.loop.vectorize.scalable.enable"); + return ElementCount::get(*Width, IsScalable.value_or(false)); + } + + return std::nullopt; +} + +std::optional<MDNode *> llvm::makeFollowupLoopID( + MDNode *OrigLoopID, ArrayRef<StringRef> FollowupOptions, + const char *InheritOptionsExceptPrefix, bool AlwaysNew) { + if (!OrigLoopID) { + if (AlwaysNew) + return nullptr; + return std::nullopt; + } + + assert(OrigLoopID->getOperand(0) == OrigLoopID); + + bool InheritAllAttrs = !InheritOptionsExceptPrefix; + bool InheritSomeAttrs = + InheritOptionsExceptPrefix && InheritOptionsExceptPrefix[0] != '\0'; + SmallVector<Metadata *, 8> MDs; + MDs.push_back(nullptr); + + bool Changed = false; + if (InheritAllAttrs || InheritSomeAttrs) { + for (const MDOperand &Existing : drop_begin(OrigLoopID->operands())) { + MDNode *Op = cast<MDNode>(Existing.get()); + + auto InheritThisAttribute = [InheritSomeAttrs, + InheritOptionsExceptPrefix](MDNode *Op) { + if (!InheritSomeAttrs) + return false; + + // Skip malformatted attribute metadata nodes. + if (Op->getNumOperands() == 0) + return true; + Metadata *NameMD = Op->getOperand(0).get(); + if (!isa<MDString>(NameMD)) + return true; + StringRef AttrName = cast<MDString>(NameMD)->getString(); + + // Do not inherit excluded attributes. + return !AttrName.startswith(InheritOptionsExceptPrefix); + }; + + if (InheritThisAttribute(Op)) + MDs.push_back(Op); + else + Changed = true; + } + } else { + // Modified if we dropped at least one attribute. + Changed = OrigLoopID->getNumOperands() > 1; + } + + bool HasAnyFollowup = false; + for (StringRef OptionName : FollowupOptions) { + MDNode *FollowupNode = findOptionMDForLoopID(OrigLoopID, OptionName); + if (!FollowupNode) + continue; + + HasAnyFollowup = true; + for (const MDOperand &Option : drop_begin(FollowupNode->operands())) { + MDs.push_back(Option.get()); + Changed = true; + } + } + + // Attributes of the followup loop not specified explicity, so signal to the + // transformation pass to add suitable attributes. + if (!AlwaysNew && !HasAnyFollowup) + return std::nullopt; + + // If no attributes were added or remove, the previous loop Id can be reused. + if (!AlwaysNew && !Changed) + return OrigLoopID; + + // No attributes is equivalent to having no !llvm.loop metadata at all. + if (MDs.size() == 1) + return nullptr; + + // Build the new loop ID. + MDTuple *FollowupLoopID = MDNode::get(OrigLoopID->getContext(), MDs); + FollowupLoopID->replaceOperandWith(0, FollowupLoopID); + return FollowupLoopID; +} + +bool llvm::hasDisableAllTransformsHint(const Loop *L) { + return getBooleanLoopAttribute(L, LLVMLoopDisableNonforced); +} + +bool llvm::hasDisableLICMTransformsHint(const Loop *L) { + return getBooleanLoopAttribute(L, LLVMLoopDisableLICM); +} + +TransformationMode llvm::hasUnrollTransformation(const Loop *L) { + if (getBooleanLoopAttribute(L, "llvm.loop.unroll.disable")) + return TM_SuppressedByUser; + + std::optional<int> Count = + getOptionalIntLoopAttribute(L, "llvm.loop.unroll.count"); + if (Count) + return *Count == 1 ? TM_SuppressedByUser : TM_ForcedByUser; + + if (getBooleanLoopAttribute(L, "llvm.loop.unroll.enable")) + return TM_ForcedByUser; + + if (getBooleanLoopAttribute(L, "llvm.loop.unroll.full")) + return TM_ForcedByUser; + + if (hasDisableAllTransformsHint(L)) + return TM_Disable; + + return TM_Unspecified; +} + +TransformationMode llvm::hasUnrollAndJamTransformation(const Loop *L) { + if (getBooleanLoopAttribute(L, "llvm.loop.unroll_and_jam.disable")) + return TM_SuppressedByUser; + + std::optional<int> Count = + getOptionalIntLoopAttribute(L, "llvm.loop.unroll_and_jam.count"); + if (Count) + return *Count == 1 ? TM_SuppressedByUser : TM_ForcedByUser; + + if (getBooleanLoopAttribute(L, "llvm.loop.unroll_and_jam.enable")) + return TM_ForcedByUser; + + if (hasDisableAllTransformsHint(L)) + return TM_Disable; + + return TM_Unspecified; +} + +TransformationMode llvm::hasVectorizeTransformation(const Loop *L) { + std::optional<bool> Enable = + getOptionalBoolLoopAttribute(L, "llvm.loop.vectorize.enable"); + + if (Enable == false) + return TM_SuppressedByUser; + + std::optional<ElementCount> VectorizeWidth = + getOptionalElementCountLoopAttribute(L); + std::optional<int> InterleaveCount = + getOptionalIntLoopAttribute(L, "llvm.loop.interleave.count"); + + // 'Forcing' vector width and interleave count to one effectively disables + // this tranformation. + if (Enable == true && VectorizeWidth && VectorizeWidth->isScalar() && + InterleaveCount == 1) + return TM_SuppressedByUser; + + if (getBooleanLoopAttribute(L, "llvm.loop.isvectorized")) + return TM_Disable; + + if (Enable == true) + return TM_ForcedByUser; + + if ((VectorizeWidth && VectorizeWidth->isScalar()) && InterleaveCount == 1) + return TM_Disable; + + if ((VectorizeWidth && VectorizeWidth->isVector()) || InterleaveCount > 1) + return TM_Enable; + + if (hasDisableAllTransformsHint(L)) + return TM_Disable; + + return TM_Unspecified; +} + +TransformationMode llvm::hasDistributeTransformation(const Loop *L) { + if (getBooleanLoopAttribute(L, "llvm.loop.distribute.enable")) + return TM_ForcedByUser; + + if (hasDisableAllTransformsHint(L)) + return TM_Disable; + + return TM_Unspecified; +} + +TransformationMode llvm::hasLICMVersioningTransformation(const Loop *L) { + if (getBooleanLoopAttribute(L, "llvm.loop.licm_versioning.disable")) + return TM_SuppressedByUser; + + if (hasDisableAllTransformsHint(L)) + return TM_Disable; + + return TM_Unspecified; +} + +/// Does a BFS from a given node to all of its children inside a given loop. +/// The returned vector of nodes includes the starting point. +SmallVector<DomTreeNode *, 16> +llvm::collectChildrenInLoop(DomTreeNode *N, const Loop *CurLoop) { + SmallVector<DomTreeNode *, 16> Worklist; + auto AddRegionToWorklist = [&](DomTreeNode *DTN) { + // Only include subregions in the top level loop. + BasicBlock *BB = DTN->getBlock(); + if (CurLoop->contains(BB)) + Worklist.push_back(DTN); + }; + + AddRegionToWorklist(N); + + for (size_t I = 0; I < Worklist.size(); I++) { + for (DomTreeNode *Child : Worklist[I]->children()) + AddRegionToWorklist(Child); + } + + return Worklist; +} + +void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, + LoopInfo *LI, MemorySSA *MSSA) { + assert((!DT || L->isLCSSAForm(*DT)) && "Expected LCSSA!"); + auto *Preheader = L->getLoopPreheader(); + assert(Preheader && "Preheader should exist!"); + + std::unique_ptr<MemorySSAUpdater> MSSAU; + if (MSSA) + MSSAU = std::make_unique<MemorySSAUpdater>(MSSA); + + // Now that we know the removal is safe, remove the loop by changing the + // branch from the preheader to go to the single exit block. + // + // Because we're deleting a large chunk of code at once, the sequence in which + // we remove things is very important to avoid invalidation issues. + + // Tell ScalarEvolution that the loop is deleted. Do this before + // deleting the loop so that ScalarEvolution can look at the loop + // to determine what it needs to clean up. + if (SE) { + SE->forgetLoop(L); + SE->forgetBlockAndLoopDispositions(); + } + + Instruction *OldTerm = Preheader->getTerminator(); + assert(!OldTerm->mayHaveSideEffects() && + "Preheader must end with a side-effect-free terminator"); + assert(OldTerm->getNumSuccessors() == 1 && + "Preheader must have a single successor"); + // Connect the preheader to the exit block. Keep the old edge to the header + // around to perform the dominator tree update in two separate steps + // -- #1 insertion of the edge preheader -> exit and #2 deletion of the edge + // preheader -> header. + // + // + // 0. Preheader 1. Preheader 2. Preheader + // | | | | + // V | V | + // Header <--\ | Header <--\ | Header <--\ + // | | | | | | | | | | | + // | V | | | V | | | V | + // | Body --/ | | Body --/ | | Body --/ + // V V V V V + // Exit Exit Exit + // + // By doing this is two separate steps we can perform the dominator tree + // update without using the batch update API. + // + // Even when the loop is never executed, we cannot remove the edge from the + // source block to the exit block. Consider the case where the unexecuted loop + // branches back to an outer loop. If we deleted the loop and removed the edge + // coming to this inner loop, this will break the outer loop structure (by + // deleting the backedge of the outer loop). If the outer loop is indeed a + // non-loop, it will be deleted in a future iteration of loop deletion pass. + IRBuilder<> Builder(OldTerm); + + auto *ExitBlock = L->getUniqueExitBlock(); + DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); + if (ExitBlock) { + assert(ExitBlock && "Should have a unique exit block!"); + assert(L->hasDedicatedExits() && "Loop should have dedicated exits!"); + + Builder.CreateCondBr(Builder.getFalse(), L->getHeader(), ExitBlock); + // Remove the old branch. The conditional branch becomes a new terminator. + OldTerm->eraseFromParent(); + + // Rewrite phis in the exit block to get their inputs from the Preheader + // instead of the exiting block. + for (PHINode &P : ExitBlock->phis()) { + // Set the zero'th element of Phi to be from the preheader and remove all + // other incoming values. Given the loop has dedicated exits, all other + // incoming values must be from the exiting blocks. + int PredIndex = 0; + P.setIncomingBlock(PredIndex, Preheader); + // Removes all incoming values from all other exiting blocks (including + // duplicate values from an exiting block). + // Nuke all entries except the zero'th entry which is the preheader entry. + // NOTE! We need to remove Incoming Values in the reverse order as done + // below, to keep the indices valid for deletion (removeIncomingValues + // updates getNumIncomingValues and shifts all values down into the + // operand being deleted). + for (unsigned i = 0, e = P.getNumIncomingValues() - 1; i != e; ++i) + P.removeIncomingValue(e - i, false); + + assert((P.getNumIncomingValues() == 1 && + P.getIncomingBlock(PredIndex) == Preheader) && + "Should have exactly one value and that's from the preheader!"); + } + + if (DT) { + DTU.applyUpdates({{DominatorTree::Insert, Preheader, ExitBlock}}); + if (MSSA) { + MSSAU->applyUpdates({{DominatorTree::Insert, Preheader, ExitBlock}}, + *DT); + if (VerifyMemorySSA) + MSSA->verifyMemorySSA(); + } + } + + // Disconnect the loop body by branching directly to its exit. + Builder.SetInsertPoint(Preheader->getTerminator()); + Builder.CreateBr(ExitBlock); + // Remove the old branch. + Preheader->getTerminator()->eraseFromParent(); + } else { + assert(L->hasNoExitBlocks() && + "Loop should have either zero or one exit blocks."); + + Builder.SetInsertPoint(OldTerm); + Builder.CreateUnreachable(); + Preheader->getTerminator()->eraseFromParent(); + } + + if (DT) { + DTU.applyUpdates({{DominatorTree::Delete, Preheader, L->getHeader()}}); + if (MSSA) { + MSSAU->applyUpdates({{DominatorTree::Delete, Preheader, L->getHeader()}}, + *DT); + SmallSetVector<BasicBlock *, 8> DeadBlockSet(L->block_begin(), + L->block_end()); + MSSAU->removeBlocks(DeadBlockSet); + if (VerifyMemorySSA) + MSSA->verifyMemorySSA(); + } + } + + // Use a map to unique and a vector to guarantee deterministic ordering. + llvm::SmallDenseSet<DebugVariable, 4> DeadDebugSet; + llvm::SmallVector<DbgVariableIntrinsic *, 4> DeadDebugInst; + + if (ExitBlock) { + // Given LCSSA form is satisfied, we should not have users of instructions + // within the dead loop outside of the loop. However, LCSSA doesn't take + // unreachable uses into account. We handle them here. + // We could do it after drop all references (in this case all users in the + // loop will be already eliminated and we have less work to do but according + // to API doc of User::dropAllReferences only valid operation after dropping + // references, is deletion. So let's substitute all usages of + // instruction from the loop with poison value of corresponding type first. + for (auto *Block : L->blocks()) + for (Instruction &I : *Block) { + auto *Poison = PoisonValue::get(I.getType()); + for (Use &U : llvm::make_early_inc_range(I.uses())) { + if (auto *Usr = dyn_cast<Instruction>(U.getUser())) + if (L->contains(Usr->getParent())) + continue; + // If we have a DT then we can check that uses outside a loop only in + // unreachable block. + if (DT) + assert(!DT->isReachableFromEntry(U) && + "Unexpected user in reachable block"); + U.set(Poison); + } + auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I); + if (!DVI) + continue; + if (!DeadDebugSet.insert(DebugVariable(DVI)).second) + continue; + DeadDebugInst.push_back(DVI); + } + + // After the loop has been deleted all the values defined and modified + // inside the loop are going to be unavailable. + // Since debug values in the loop have been deleted, inserting an undef + // dbg.value truncates the range of any dbg.value before the loop where the + // loop used to be. This is particularly important for constant values. + Instruction *InsertDbgValueBefore = ExitBlock->getFirstNonPHI(); + assert(InsertDbgValueBefore && + "There should be a non-PHI instruction in exit block, else these " + "instructions will have no parent."); + for (auto *DVI : DeadDebugInst) { + DVI->setKillLocation(); + DVI->moveBefore(InsertDbgValueBefore); + } + } + + // Remove the block from the reference counting scheme, so that we can + // delete it freely later. + for (auto *Block : L->blocks()) + Block->dropAllReferences(); + + if (MSSA && VerifyMemorySSA) + MSSA->verifyMemorySSA(); + + if (LI) { + // Erase the instructions and the blocks without having to worry + // about ordering because we already dropped the references. + // NOTE: This iteration is safe because erasing the block does not remove + // its entry from the loop's block list. We do that in the next section. + for (BasicBlock *BB : L->blocks()) + BB->eraseFromParent(); + + // Finally, the blocks from loopinfo. This has to happen late because + // otherwise our loop iterators won't work. + + SmallPtrSet<BasicBlock *, 8> blocks; + blocks.insert(L->block_begin(), L->block_end()); + for (BasicBlock *BB : blocks) + LI->removeBlock(BB); + + // The last step is to update LoopInfo now that we've eliminated this loop. + // Note: LoopInfo::erase remove the given loop and relink its subloops with + // its parent. While removeLoop/removeChildLoop remove the given loop but + // not relink its subloops, which is what we want. + if (Loop *ParentLoop = L->getParentLoop()) { + Loop::iterator I = find(*ParentLoop, L); + assert(I != ParentLoop->end() && "Couldn't find loop"); + ParentLoop->removeChildLoop(I); + } else { + Loop::iterator I = find(*LI, L); + assert(I != LI->end() && "Couldn't find loop"); + LI->removeLoop(I); + } + LI->destroy(L); + } +} + +void llvm::breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE, + LoopInfo &LI, MemorySSA *MSSA) { + auto *Latch = L->getLoopLatch(); + assert(Latch && "multiple latches not yet supported"); + auto *Header = L->getHeader(); + Loop *OutermostLoop = L->getOutermostLoop(); + + SE.forgetLoop(L); + SE.forgetBlockAndLoopDispositions(); + + std::unique_ptr<MemorySSAUpdater> MSSAU; + if (MSSA) + MSSAU = std::make_unique<MemorySSAUpdater>(MSSA); + + // Update the CFG and domtree. We chose to special case a couple of + // of common cases for code quality and test readability reasons. + [&]() -> void { + if (auto *BI = dyn_cast<BranchInst>(Latch->getTerminator())) { + if (!BI->isConditional()) { + DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager); + (void)changeToUnreachable(BI, /*PreserveLCSSA*/ true, &DTU, + MSSAU.get()); + return; + } + + // Conditional latch/exit - note that latch can be shared by inner + // and outer loop so the other target doesn't need to an exit + if (L->isLoopExiting(Latch)) { + // TODO: Generalize ConstantFoldTerminator so that it can be used + // here without invalidating LCSSA or MemorySSA. (Tricky case for + // LCSSA: header is an exit block of a preceeding sibling loop w/o + // dedicated exits.) + const unsigned ExitIdx = L->contains(BI->getSuccessor(0)) ? 1 : 0; + BasicBlock *ExitBB = BI->getSuccessor(ExitIdx); + + DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager); + Header->removePredecessor(Latch, true); + + IRBuilder<> Builder(BI); + auto *NewBI = Builder.CreateBr(ExitBB); + // Transfer the metadata to the new branch instruction (minus the + // loop info since this is no longer a loop) + NewBI->copyMetadata(*BI, {LLVMContext::MD_dbg, + LLVMContext::MD_annotation}); + + BI->eraseFromParent(); + DTU.applyUpdates({{DominatorTree::Delete, Latch, Header}}); + if (MSSA) + MSSAU->applyUpdates({{DominatorTree::Delete, Latch, Header}}, DT); + return; + } + } + + // General case. By splitting the backedge, and then explicitly making it + // unreachable we gracefully handle corner cases such as switch and invoke + // termiantors. + auto *BackedgeBB = SplitEdge(Latch, Header, &DT, &LI, MSSAU.get()); + + DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager); + (void)changeToUnreachable(BackedgeBB->getTerminator(), + /*PreserveLCSSA*/ true, &DTU, MSSAU.get()); + }(); + + // Erase (and destroy) this loop instance. Handles relinking sub-loops + // and blocks within the loop as needed. + LI.erase(L); + + // If the loop we broke had a parent, then changeToUnreachable might have + // caused a block to be removed from the parent loop (see loop_nest_lcssa + // test case in zero-btc.ll for an example), thus changing the parent's + // exit blocks. If that happened, we need to rebuild LCSSA on the outermost + // loop which might have a had a block removed. + if (OutermostLoop != L) + formLCSSARecursively(*OutermostLoop, DT, &LI, &SE); +} + + +/// Checks if \p L has an exiting latch branch. There may also be other +/// exiting blocks. Returns branch instruction terminating the loop +/// latch if above check is successful, nullptr otherwise. +static BranchInst *getExpectedExitLoopLatchBranch(Loop *L) { + BasicBlock *Latch = L->getLoopLatch(); + if (!Latch) + return nullptr; + + BranchInst *LatchBR = dyn_cast<BranchInst>(Latch->getTerminator()); + if (!LatchBR || LatchBR->getNumSuccessors() != 2 || !L->isLoopExiting(Latch)) + return nullptr; + + assert((LatchBR->getSuccessor(0) == L->getHeader() || + LatchBR->getSuccessor(1) == L->getHeader()) && + "At least one edge out of the latch must go to the header"); + + return LatchBR; +} + +/// Return the estimated trip count for any exiting branch which dominates +/// the loop latch. +static std::optional<uint64_t> getEstimatedTripCount(BranchInst *ExitingBranch, + Loop *L, + uint64_t &OrigExitWeight) { + // To estimate the number of times the loop body was executed, we want to + // know the number of times the backedge was taken, vs. the number of times + // we exited the loop. + uint64_t LoopWeight, ExitWeight; + if (!extractBranchWeights(*ExitingBranch, LoopWeight, ExitWeight)) + return std::nullopt; + + if (L->contains(ExitingBranch->getSuccessor(1))) + std::swap(LoopWeight, ExitWeight); + + if (!ExitWeight) + // Don't have a way to return predicated infinite + return std::nullopt; + + OrigExitWeight = ExitWeight; + + // Estimated exit count is a ratio of the loop weight by the weight of the + // edge exiting the loop, rounded to nearest. + uint64_t ExitCount = llvm::divideNearest(LoopWeight, ExitWeight); + // Estimated trip count is one plus estimated exit count. + return ExitCount + 1; +} + +std::optional<unsigned> +llvm::getLoopEstimatedTripCount(Loop *L, + unsigned *EstimatedLoopInvocationWeight) { + // Currently we take the estimate exit count only from the loop latch, + // ignoring other exiting blocks. This can overestimate the trip count + // if we exit through another exit, but can never underestimate it. + // TODO: incorporate information from other exits + if (BranchInst *LatchBranch = getExpectedExitLoopLatchBranch(L)) { + uint64_t ExitWeight; + if (std::optional<uint64_t> EstTripCount = + getEstimatedTripCount(LatchBranch, L, ExitWeight)) { + if (EstimatedLoopInvocationWeight) + *EstimatedLoopInvocationWeight = ExitWeight; + return *EstTripCount; + } + } + return std::nullopt; +} + +bool llvm::setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount, + unsigned EstimatedloopInvocationWeight) { + // At the moment, we currently support changing the estimate trip count of + // the latch branch only. We could extend this API to manipulate estimated + // trip counts for any exit. + BranchInst *LatchBranch = getExpectedExitLoopLatchBranch(L); + if (!LatchBranch) + return false; + + // Calculate taken and exit weights. + unsigned LatchExitWeight = 0; + unsigned BackedgeTakenWeight = 0; + + if (EstimatedTripCount > 0) { + LatchExitWeight = EstimatedloopInvocationWeight; + BackedgeTakenWeight = (EstimatedTripCount - 1) * LatchExitWeight; + } + + // Make a swap if back edge is taken when condition is "false". + if (LatchBranch->getSuccessor(0) != L->getHeader()) + std::swap(BackedgeTakenWeight, LatchExitWeight); + + MDBuilder MDB(LatchBranch->getContext()); + + // Set/Update profile metadata. + LatchBranch->setMetadata( + LLVMContext::MD_prof, + MDB.createBranchWeights(BackedgeTakenWeight, LatchExitWeight)); + + return true; +} + +bool llvm::hasIterationCountInvariantInParent(Loop *InnerLoop, + ScalarEvolution &SE) { + Loop *OuterL = InnerLoop->getParentLoop(); + if (!OuterL) + return true; + + // Get the backedge taken count for the inner loop + BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch(); + const SCEV *InnerLoopBECountSC = SE.getExitCount(InnerLoop, InnerLoopLatch); + if (isa<SCEVCouldNotCompute>(InnerLoopBECountSC) || + !InnerLoopBECountSC->getType()->isIntegerTy()) + return false; + + // Get whether count is invariant to the outer loop + ScalarEvolution::LoopDisposition LD = + SE.getLoopDisposition(InnerLoopBECountSC, OuterL); + if (LD != ScalarEvolution::LoopInvariant) + return false; + + return true; +} + +CmpInst::Predicate llvm::getMinMaxReductionPredicate(RecurKind RK) { + switch (RK) { + default: + llvm_unreachable("Unknown min/max recurrence kind"); + case RecurKind::UMin: + return CmpInst::ICMP_ULT; + case RecurKind::UMax: + return CmpInst::ICMP_UGT; + case RecurKind::SMin: + return CmpInst::ICMP_SLT; + case RecurKind::SMax: + return CmpInst::ICMP_SGT; + case RecurKind::FMin: + return CmpInst::FCMP_OLT; + case RecurKind::FMax: + return CmpInst::FCMP_OGT; + } +} + +Value *llvm::createSelectCmpOp(IRBuilderBase &Builder, Value *StartVal, + RecurKind RK, Value *Left, Value *Right) { + if (auto VTy = dyn_cast<VectorType>(Left->getType())) + StartVal = Builder.CreateVectorSplat(VTy->getElementCount(), StartVal); + Value *Cmp = + Builder.CreateCmp(CmpInst::ICMP_NE, Left, StartVal, "rdx.select.cmp"); + return Builder.CreateSelect(Cmp, Left, Right, "rdx.select"); +} + +Value *llvm::createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left, + Value *Right) { + CmpInst::Predicate Pred = getMinMaxReductionPredicate(RK); + Value *Cmp = Builder.CreateCmp(Pred, Left, Right, "rdx.minmax.cmp"); + Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select"); + return Select; +} + +// Helper to generate an ordered reduction. +Value *llvm::getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src, + unsigned Op, RecurKind RdxKind) { + unsigned VF = cast<FixedVectorType>(Src->getType())->getNumElements(); + + // Extract and apply reduction ops in ascending order: + // e.g. ((((Acc + Scl[0]) + Scl[1]) + Scl[2]) + ) ... + Scl[VF-1] + Value *Result = Acc; + for (unsigned ExtractIdx = 0; ExtractIdx != VF; ++ExtractIdx) { + Value *Ext = + Builder.CreateExtractElement(Src, Builder.getInt32(ExtractIdx)); + + if (Op != Instruction::ICmp && Op != Instruction::FCmp) { + Result = Builder.CreateBinOp((Instruction::BinaryOps)Op, Result, Ext, + "bin.rdx"); + } else { + assert(RecurrenceDescriptor::isMinMaxRecurrenceKind(RdxKind) && + "Invalid min/max"); + Result = createMinMaxOp(Builder, RdxKind, Result, Ext); + } + } + + return Result; +} + +// Helper to generate a log2 shuffle reduction. +Value *llvm::getShuffleReduction(IRBuilderBase &Builder, Value *Src, + unsigned Op, RecurKind RdxKind) { + unsigned VF = cast<FixedVectorType>(Src->getType())->getNumElements(); + // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles + // and vector ops, reducing the set of values being computed by half each + // round. + assert(isPowerOf2_32(VF) && + "Reduction emission only supported for pow2 vectors!"); + // Note: fast-math-flags flags are controlled by the builder configuration + // and are assumed to apply to all generated arithmetic instructions. Other + // poison generating flags (nsw/nuw/inbounds/inrange/exact) are not part + // of the builder configuration, and since they're not passed explicitly, + // will never be relevant here. Note that it would be generally unsound to + // propagate these from an intrinsic call to the expansion anyways as we/ + // change the order of operations. + Value *TmpVec = Src; + SmallVector<int, 32> ShuffleMask(VF); + for (unsigned i = VF; i != 1; i >>= 1) { + // Move the upper half of the vector to the lower half. + for (unsigned j = 0; j != i / 2; ++j) + ShuffleMask[j] = i / 2 + j; + + // Fill the rest of the mask with undef. + std::fill(&ShuffleMask[i / 2], ShuffleMask.end(), -1); + + Value *Shuf = Builder.CreateShuffleVector(TmpVec, ShuffleMask, "rdx.shuf"); + + if (Op != Instruction::ICmp && Op != Instruction::FCmp) { + TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf, + "bin.rdx"); + } else { + assert(RecurrenceDescriptor::isMinMaxRecurrenceKind(RdxKind) && + "Invalid min/max"); + TmpVec = createMinMaxOp(Builder, RdxKind, TmpVec, Shuf); + } + } + // The result is in the first element of the vector. + return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0)); +} + +Value *llvm::createSelectCmpTargetReduction(IRBuilderBase &Builder, + const TargetTransformInfo *TTI, + Value *Src, + const RecurrenceDescriptor &Desc, + PHINode *OrigPhi) { + assert(RecurrenceDescriptor::isSelectCmpRecurrenceKind( + Desc.getRecurrenceKind()) && + "Unexpected reduction kind"); + Value *InitVal = Desc.getRecurrenceStartValue(); + Value *NewVal = nullptr; + + // First use the original phi to determine the new value we're trying to + // select from in the loop. + SelectInst *SI = nullptr; + for (auto *U : OrigPhi->users()) { + if ((SI = dyn_cast<SelectInst>(U))) + break; + } + assert(SI && "One user of the original phi should be a select"); + + if (SI->getTrueValue() == OrigPhi) + NewVal = SI->getFalseValue(); + else { + assert(SI->getFalseValue() == OrigPhi && + "At least one input to the select should be the original Phi"); + NewVal = SI->getTrueValue(); + } + + // Create a splat vector with the new value and compare this to the vector + // we want to reduce. + ElementCount EC = cast<VectorType>(Src->getType())->getElementCount(); + Value *Right = Builder.CreateVectorSplat(EC, InitVal); + Value *Cmp = + Builder.CreateCmp(CmpInst::ICMP_NE, Src, Right, "rdx.select.cmp"); + + // If any predicate is true it means that we want to select the new value. + Cmp = Builder.CreateOrReduce(Cmp); + return Builder.CreateSelect(Cmp, NewVal, InitVal, "rdx.select"); +} + +Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder, + const TargetTransformInfo *TTI, + Value *Src, RecurKind RdxKind) { + auto *SrcVecEltTy = cast<VectorType>(Src->getType())->getElementType(); + switch (RdxKind) { + case RecurKind::Add: + return Builder.CreateAddReduce(Src); + case RecurKind::Mul: + return Builder.CreateMulReduce(Src); + case RecurKind::And: + return Builder.CreateAndReduce(Src); + case RecurKind::Or: + return Builder.CreateOrReduce(Src); + case RecurKind::Xor: + return Builder.CreateXorReduce(Src); + case RecurKind::FMulAdd: + case RecurKind::FAdd: + return Builder.CreateFAddReduce(ConstantFP::getNegativeZero(SrcVecEltTy), + Src); + case RecurKind::FMul: + return Builder.CreateFMulReduce(ConstantFP::get(SrcVecEltTy, 1.0), Src); + case RecurKind::SMax: + return Builder.CreateIntMaxReduce(Src, true); + case RecurKind::SMin: + return Builder.CreateIntMinReduce(Src, true); + case RecurKind::UMax: + return Builder.CreateIntMaxReduce(Src, false); + case RecurKind::UMin: + return Builder.CreateIntMinReduce(Src, false); + case RecurKind::FMax: + return Builder.CreateFPMaxReduce(Src); + case RecurKind::FMin: + return Builder.CreateFPMinReduce(Src); + default: + llvm_unreachable("Unhandled opcode"); + } +} + +Value *llvm::createTargetReduction(IRBuilderBase &B, + const TargetTransformInfo *TTI, + const RecurrenceDescriptor &Desc, Value *Src, + PHINode *OrigPhi) { + // TODO: Support in-order reductions based on the recurrence descriptor. + // All ops in the reduction inherit fast-math-flags from the recurrence + // descriptor. + IRBuilderBase::FastMathFlagGuard FMFGuard(B); + B.setFastMathFlags(Desc.getFastMathFlags()); + + RecurKind RK = Desc.getRecurrenceKind(); + if (RecurrenceDescriptor::isSelectCmpRecurrenceKind(RK)) + return createSelectCmpTargetReduction(B, TTI, Src, Desc, OrigPhi); + + return createSimpleTargetReduction(B, TTI, Src, RK); +} + +Value *llvm::createOrderedReduction(IRBuilderBase &B, + const RecurrenceDescriptor &Desc, + Value *Src, Value *Start) { + assert((Desc.getRecurrenceKind() == RecurKind::FAdd || + Desc.getRecurrenceKind() == RecurKind::FMulAdd) && + "Unexpected reduction kind"); + assert(Src->getType()->isVectorTy() && "Expected a vector type"); + assert(!Start->getType()->isVectorTy() && "Expected a scalar type"); + + return B.CreateFAddReduce(Start, Src); +} + +void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue, + bool IncludeWrapFlags) { + auto *VecOp = dyn_cast<Instruction>(I); + if (!VecOp) + return; + auto *Intersection = (OpValue == nullptr) ? dyn_cast<Instruction>(VL[0]) + : dyn_cast<Instruction>(OpValue); + if (!Intersection) + return; + const unsigned Opcode = Intersection->getOpcode(); + VecOp->copyIRFlags(Intersection, IncludeWrapFlags); + for (auto *V : VL) { + auto *Instr = dyn_cast<Instruction>(V); + if (!Instr) + continue; + if (OpValue == nullptr || Opcode == Instr->getOpcode()) + VecOp->andIRFlags(V); + } +} + +bool llvm::isKnownNegativeInLoop(const SCEV *S, const Loop *L, + ScalarEvolution &SE) { + const SCEV *Zero = SE.getZero(S->getType()); + return SE.isAvailableAtLoopEntry(S, L) && + SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SLT, S, Zero); +} + +bool llvm::isKnownNonNegativeInLoop(const SCEV *S, const Loop *L, + ScalarEvolution &SE) { + const SCEV *Zero = SE.getZero(S->getType()); + return SE.isAvailableAtLoopEntry(S, L) && + SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SGE, S, Zero); +} + +bool llvm::cannotBeMinInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE, + bool Signed) { + unsigned BitWidth = cast<IntegerType>(S->getType())->getBitWidth(); + APInt Min = Signed ? APInt::getSignedMinValue(BitWidth) : + APInt::getMinValue(BitWidth); + auto Predicate = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; + return SE.isAvailableAtLoopEntry(S, L) && + SE.isLoopEntryGuardedByCond(L, Predicate, S, + SE.getConstant(Min)); +} + +bool llvm::cannotBeMaxInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE, + bool Signed) { + unsigned BitWidth = cast<IntegerType>(S->getType())->getBitWidth(); + APInt Max = Signed ? APInt::getSignedMaxValue(BitWidth) : + APInt::getMaxValue(BitWidth); + auto Predicate = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; + return SE.isAvailableAtLoopEntry(S, L) && + SE.isLoopEntryGuardedByCond(L, Predicate, S, + SE.getConstant(Max)); +} + +//===----------------------------------------------------------------------===// +// rewriteLoopExitValues - Optimize IV users outside the loop. +// As a side effect, reduces the amount of IV processing within the loop. +//===----------------------------------------------------------------------===// + +static bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) { + SmallPtrSet<const Instruction *, 8> Visited; + SmallVector<const Instruction *, 8> WorkList; + Visited.insert(I); + WorkList.push_back(I); + while (!WorkList.empty()) { + const Instruction *Curr = WorkList.pop_back_val(); + // This use is outside the loop, nothing to do. + if (!L->contains(Curr)) + continue; + // Do we assume it is a "hard" use which will not be eliminated easily? + if (Curr->mayHaveSideEffects()) + return true; + // Otherwise, add all its users to worklist. + for (const auto *U : Curr->users()) { + auto *UI = cast<Instruction>(U); + if (Visited.insert(UI).second) + WorkList.push_back(UI); + } + } + return false; +} + +// Collect information about PHI nodes which can be transformed in +// rewriteLoopExitValues. +struct RewritePhi { + PHINode *PN; // For which PHI node is this replacement? + unsigned Ith; // For which incoming value? + const SCEV *ExpansionSCEV; // The SCEV of the incoming value we are rewriting. + Instruction *ExpansionPoint; // Where we'd like to expand that SCEV? + bool HighCost; // Is this expansion a high-cost? + + RewritePhi(PHINode *P, unsigned I, const SCEV *Val, Instruction *ExpansionPt, + bool H) + : PN(P), Ith(I), ExpansionSCEV(Val), ExpansionPoint(ExpansionPt), + HighCost(H) {} +}; + +// Check whether it is possible to delete the loop after rewriting exit +// value. If it is possible, ignore ReplaceExitValue and do rewriting +// aggressively. +static bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) { + BasicBlock *Preheader = L->getLoopPreheader(); + // If there is no preheader, the loop will not be deleted. + if (!Preheader) + return false; + + // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1. + // We obviate multiple ExitingBlocks case for simplicity. + // TODO: If we see testcase with multiple ExitingBlocks can be deleted + // after exit value rewriting, we can enhance the logic here. + SmallVector<BasicBlock *, 4> ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + SmallVector<BasicBlock *, 8> ExitBlocks; + L->getUniqueExitBlocks(ExitBlocks); + if (ExitBlocks.size() != 1 || ExitingBlocks.size() != 1) + return false; + + BasicBlock *ExitBlock = ExitBlocks[0]; + BasicBlock::iterator BI = ExitBlock->begin(); + while (PHINode *P = dyn_cast<PHINode>(BI)) { + Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]); + + // If the Incoming value of P is found in RewritePhiSet, we know it + // could be rewritten to use a loop invariant value in transformation + // phase later. Skip it in the loop invariant check below. + bool found = false; + for (const RewritePhi &Phi : RewritePhiSet) { + unsigned i = Phi.Ith; + if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) { + found = true; + break; + } + } + + Instruction *I; + if (!found && (I = dyn_cast<Instruction>(Incoming))) + if (!L->hasLoopInvariantOperands(I)) + return false; + + ++BI; + } + + for (auto *BB : L->blocks()) + if (llvm::any_of(*BB, [](Instruction &I) { + return I.mayHaveSideEffects(); + })) + return false; + + return true; +} + +/// Checks if it is safe to call InductionDescriptor::isInductionPHI for \p Phi, +/// and returns true if this Phi is an induction phi in the loop. When +/// isInductionPHI returns true, \p ID will be also be set by isInductionPHI. +static bool checkIsIndPhi(PHINode *Phi, Loop *L, ScalarEvolution *SE, + InductionDescriptor &ID) { + if (!Phi) + return false; + if (!L->getLoopPreheader()) + return false; + if (Phi->getParent() != L->getHeader()) + return false; + return InductionDescriptor::isInductionPHI(Phi, L, SE, ID); +} + +int llvm::rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, + ScalarEvolution *SE, + const TargetTransformInfo *TTI, + SCEVExpander &Rewriter, DominatorTree *DT, + ReplaceExitVal ReplaceExitValue, + SmallVector<WeakTrackingVH, 16> &DeadInsts) { + // Check a pre-condition. + assert(L->isRecursivelyLCSSAForm(*DT, *LI) && + "Indvars did not preserve LCSSA!"); + + SmallVector<BasicBlock*, 8> ExitBlocks; + L->getUniqueExitBlocks(ExitBlocks); + + SmallVector<RewritePhi, 8> RewritePhiSet; + // Find all values that are computed inside the loop, but used outside of it. + // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan + // the exit blocks of the loop to find them. + for (BasicBlock *ExitBB : ExitBlocks) { + // If there are no PHI nodes in this exit block, then no values defined + // inside the loop are used on this path, skip it. + PHINode *PN = dyn_cast<PHINode>(ExitBB->begin()); + if (!PN) continue; + + unsigned NumPreds = PN->getNumIncomingValues(); + + // Iterate over all of the PHI nodes. + BasicBlock::iterator BBI = ExitBB->begin(); + while ((PN = dyn_cast<PHINode>(BBI++))) { + if (PN->use_empty()) + continue; // dead use, don't replace it + + if (!SE->isSCEVable(PN->getType())) + continue; + + // Iterate over all of the values in all the PHI nodes. + for (unsigned i = 0; i != NumPreds; ++i) { + // If the value being merged in is not integer or is not defined + // in the loop, skip it. + Value *InVal = PN->getIncomingValue(i); + if (!isa<Instruction>(InVal)) + continue; + + // If this pred is for a subloop, not L itself, skip it. + if (LI->getLoopFor(PN->getIncomingBlock(i)) != L) + continue; // The Block is in a subloop, skip it. + + // Check that InVal is defined in the loop. + Instruction *Inst = cast<Instruction>(InVal); + if (!L->contains(Inst)) + continue; + + // Find exit values which are induction variables in the loop, and are + // unused in the loop, with the only use being the exit block PhiNode, + // and the induction variable update binary operator. + // The exit value can be replaced with the final value when it is cheap + // to do so. + if (ReplaceExitValue == UnusedIndVarInLoop) { + InductionDescriptor ID; + PHINode *IndPhi = dyn_cast<PHINode>(Inst); + if (IndPhi) { + if (!checkIsIndPhi(IndPhi, L, SE, ID)) + continue; + // This is an induction PHI. Check that the only users are PHI + // nodes, and induction variable update binary operators. + if (llvm::any_of(Inst->users(), [&](User *U) { + if (!isa<PHINode>(U) && !isa<BinaryOperator>(U)) + return true; + BinaryOperator *B = dyn_cast<BinaryOperator>(U); + if (B && B != ID.getInductionBinOp()) + return true; + return false; + })) + continue; + } else { + // If it is not an induction phi, it must be an induction update + // binary operator with an induction phi user. + BinaryOperator *B = dyn_cast<BinaryOperator>(Inst); + if (!B) + continue; + if (llvm::any_of(Inst->users(), [&](User *U) { + PHINode *Phi = dyn_cast<PHINode>(U); + if (Phi != PN && !checkIsIndPhi(Phi, L, SE, ID)) + return true; + return false; + })) + continue; + if (B != ID.getInductionBinOp()) + continue; + } + } + + // Okay, this instruction has a user outside of the current loop + // and varies predictably *inside* the loop. Evaluate the value it + // contains when the loop exits, if possible. We prefer to start with + // expressions which are true for all exits (so as to maximize + // expression reuse by the SCEVExpander), but resort to per-exit + // evaluation if that fails. + const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop()); + if (isa<SCEVCouldNotCompute>(ExitValue) || + !SE->isLoopInvariant(ExitValue, L) || + !Rewriter.isSafeToExpand(ExitValue)) { + // TODO: This should probably be sunk into SCEV in some way; maybe a + // getSCEVForExit(SCEV*, L, ExitingBB)? It can be generalized for + // most SCEV expressions and other recurrence types (e.g. shift + // recurrences). Is there existing code we can reuse? + const SCEV *ExitCount = SE->getExitCount(L, PN->getIncomingBlock(i)); + if (isa<SCEVCouldNotCompute>(ExitCount)) + continue; + if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Inst))) + if (AddRec->getLoop() == L) + ExitValue = AddRec->evaluateAtIteration(ExitCount, *SE); + if (isa<SCEVCouldNotCompute>(ExitValue) || + !SE->isLoopInvariant(ExitValue, L) || + !Rewriter.isSafeToExpand(ExitValue)) + continue; + } + + // Computing the value outside of the loop brings no benefit if it is + // definitely used inside the loop in a way which can not be optimized + // away. Avoid doing so unless we know we have a value which computes + // the ExitValue already. TODO: This should be merged into SCEV + // expander to leverage its knowledge of existing expressions. + if (ReplaceExitValue != AlwaysRepl && !isa<SCEVConstant>(ExitValue) && + !isa<SCEVUnknown>(ExitValue) && hasHardUserWithinLoop(L, Inst)) + continue; + + // Check if expansions of this SCEV would count as being high cost. + bool HighCost = Rewriter.isHighCostExpansion( + ExitValue, L, SCEVCheapExpansionBudget, TTI, Inst); + + // Note that we must not perform expansions until after + // we query *all* the costs, because if we perform temporary expansion + // inbetween, one that we might not intend to keep, said expansion + // *may* affect cost calculation of the the next SCEV's we'll query, + // and next SCEV may errneously get smaller cost. + + // Collect all the candidate PHINodes to be rewritten. + Instruction *InsertPt = + (isa<PHINode>(Inst) || isa<LandingPadInst>(Inst)) ? + &*Inst->getParent()->getFirstInsertionPt() : Inst; + RewritePhiSet.emplace_back(PN, i, ExitValue, InsertPt, HighCost); + } + } + } + + // TODO: evaluate whether it is beneficial to change how we calculate + // high-cost: if we have SCEV 'A' which we know we will expand, should we + // calculate the cost of other SCEV's after expanding SCEV 'A', thus + // potentially giving cost bonus to those other SCEV's? + + bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet); + int NumReplaced = 0; + + // Transformation. + for (const RewritePhi &Phi : RewritePhiSet) { + PHINode *PN = Phi.PN; + + // Only do the rewrite when the ExitValue can be expanded cheaply. + // If LoopCanBeDel is true, rewrite exit value aggressively. + if ((ReplaceExitValue == OnlyCheapRepl || + ReplaceExitValue == UnusedIndVarInLoop) && + !LoopCanBeDel && Phi.HighCost) + continue; + + Value *ExitVal = Rewriter.expandCodeFor( + Phi.ExpansionSCEV, Phi.PN->getType(), Phi.ExpansionPoint); + + LLVM_DEBUG(dbgs() << "rewriteLoopExitValues: AfterLoopVal = " << *ExitVal + << '\n' + << " LoopVal = " << *(Phi.ExpansionPoint) << "\n"); + +#ifndef NDEBUG + // If we reuse an instruction from a loop which is neither L nor one of + // its containing loops, we end up breaking LCSSA form for this loop by + // creating a new use of its instruction. + if (auto *ExitInsn = dyn_cast<Instruction>(ExitVal)) + if (auto *EVL = LI->getLoopFor(ExitInsn->getParent())) + if (EVL != L) + assert(EVL->contains(L) && "LCSSA breach detected!"); +#endif + + NumReplaced++; + Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith)); + PN->setIncomingValue(Phi.Ith, ExitVal); + // It's necessary to tell ScalarEvolution about this explicitly so that + // it can walk the def-use list and forget all SCEVs, as it may not be + // watching the PHI itself. Once the new exit value is in place, there + // may not be a def-use connection between the loop and every instruction + // which got a SCEVAddRecExpr for that loop. + SE->forgetValue(PN); + + // If this instruction is dead now, delete it. Don't do it now to avoid + // invalidating iterators. + if (isInstructionTriviallyDead(Inst, TLI)) + DeadInsts.push_back(Inst); + + // Replace PN with ExitVal if that is legal and does not break LCSSA. + if (PN->getNumIncomingValues() == 1 && + LI->replacementPreservesLCSSAForm(PN, ExitVal)) { + PN->replaceAllUsesWith(ExitVal); + PN->eraseFromParent(); + } + } + + // The insertion point instruction may have been deleted; clear it out + // so that the rewriter doesn't trip over it later. + Rewriter.clearInsertPoint(); + return NumReplaced; +} + +/// Set weights for \p UnrolledLoop and \p RemainderLoop based on weights for +/// \p OrigLoop. +void llvm::setProfileInfoAfterUnrolling(Loop *OrigLoop, Loop *UnrolledLoop, + Loop *RemainderLoop, uint64_t UF) { + assert(UF > 0 && "Zero unrolled factor is not supported"); + assert(UnrolledLoop != RemainderLoop && + "Unrolled and Remainder loops are expected to distinct"); + + // Get number of iterations in the original scalar loop. + unsigned OrigLoopInvocationWeight = 0; + std::optional<unsigned> OrigAverageTripCount = + getLoopEstimatedTripCount(OrigLoop, &OrigLoopInvocationWeight); + if (!OrigAverageTripCount) + return; + + // Calculate number of iterations in unrolled loop. + unsigned UnrolledAverageTripCount = *OrigAverageTripCount / UF; + // Calculate number of iterations for remainder loop. + unsigned RemainderAverageTripCount = *OrigAverageTripCount % UF; + + setLoopEstimatedTripCount(UnrolledLoop, UnrolledAverageTripCount, + OrigLoopInvocationWeight); + setLoopEstimatedTripCount(RemainderLoop, RemainderAverageTripCount, + OrigLoopInvocationWeight); +} + +/// Utility that implements appending of loops onto a worklist. +/// Loops are added in preorder (analogous for reverse postorder for trees), +/// and the worklist is processed LIFO. +template <typename RangeT> +void llvm::appendReversedLoopsToWorklist( + RangeT &&Loops, SmallPriorityWorklist<Loop *, 4> &Worklist) { + // We use an internal worklist to build up the preorder traversal without + // recursion. + SmallVector<Loop *, 4> PreOrderLoops, PreOrderWorklist; + + // We walk the initial sequence of loops in reverse because we generally want + // to visit defs before uses and the worklist is LIFO. + for (Loop *RootL : Loops) { + assert(PreOrderLoops.empty() && "Must start with an empty preorder walk."); + assert(PreOrderWorklist.empty() && + "Must start with an empty preorder walk worklist."); + PreOrderWorklist.push_back(RootL); + do { + Loop *L = PreOrderWorklist.pop_back_val(); + PreOrderWorklist.append(L->begin(), L->end()); + PreOrderLoops.push_back(L); + } while (!PreOrderWorklist.empty()); + + Worklist.insert(std::move(PreOrderLoops)); + PreOrderLoops.clear(); + } +} + +template <typename RangeT> +void llvm::appendLoopsToWorklist(RangeT &&Loops, + SmallPriorityWorklist<Loop *, 4> &Worklist) { + appendReversedLoopsToWorklist(reverse(Loops), Worklist); +} + +template void llvm::appendLoopsToWorklist<ArrayRef<Loop *> &>( + ArrayRef<Loop *> &Loops, SmallPriorityWorklist<Loop *, 4> &Worklist); + +template void +llvm::appendLoopsToWorklist<Loop &>(Loop &L, + SmallPriorityWorklist<Loop *, 4> &Worklist); + +void llvm::appendLoopsToWorklist(LoopInfo &LI, + SmallPriorityWorklist<Loop *, 4> &Worklist) { + appendReversedLoopsToWorklist(LI, Worklist); +} + +Loop *llvm::cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM, + LoopInfo *LI, LPPassManager *LPM) { + Loop &New = *LI->AllocateLoop(); + if (PL) + PL->addChildLoop(&New); + else + LI->addTopLevelLoop(&New); + + if (LPM) + LPM->addLoop(New); + + // Add all of the blocks in L to the new loop. + for (BasicBlock *BB : L->blocks()) + if (LI->getLoopFor(BB) == L) + New.addBasicBlockToLoop(cast<BasicBlock>(VM[BB]), *LI); + + // Add all of the subloops to the new loop. + for (Loop *I : *L) + cloneLoop(I, &New, VM, LI, LPM); + + return &New; +} + +/// IR Values for the lower and upper bounds of a pointer evolution. We +/// need to use value-handles because SCEV expansion can invalidate previously +/// expanded values. Thus expansion of a pointer can invalidate the bounds for +/// a previous one. +struct PointerBounds { + TrackingVH<Value> Start; + TrackingVH<Value> End; +}; + +/// Expand code for the lower and upper bound of the pointer group \p CG +/// in \p TheLoop. \return the values for the bounds. +static PointerBounds expandBounds(const RuntimeCheckingPtrGroup *CG, + Loop *TheLoop, Instruction *Loc, + SCEVExpander &Exp) { + LLVMContext &Ctx = Loc->getContext(); + Type *PtrArithTy = Type::getInt8PtrTy(Ctx, CG->AddressSpace); + + Value *Start = nullptr, *End = nullptr; + LLVM_DEBUG(dbgs() << "LAA: Adding RT check for range:\n"); + Start = Exp.expandCodeFor(CG->Low, PtrArithTy, Loc); + End = Exp.expandCodeFor(CG->High, PtrArithTy, Loc); + if (CG->NeedsFreeze) { + IRBuilder<> Builder(Loc); + Start = Builder.CreateFreeze(Start, Start->getName() + ".fr"); + End = Builder.CreateFreeze(End, End->getName() + ".fr"); + } + LLVM_DEBUG(dbgs() << "Start: " << *CG->Low << " End: " << *CG->High << "\n"); + return {Start, End}; +} + +/// Turns a collection of checks into a collection of expanded upper and +/// lower bounds for both pointers in the check. +static SmallVector<std::pair<PointerBounds, PointerBounds>, 4> +expandBounds(const SmallVectorImpl<RuntimePointerCheck> &PointerChecks, Loop *L, + Instruction *Loc, SCEVExpander &Exp) { + SmallVector<std::pair<PointerBounds, PointerBounds>, 4> ChecksWithBounds; + + // Here we're relying on the SCEV Expander's cache to only emit code for the + // same bounds once. + transform(PointerChecks, std::back_inserter(ChecksWithBounds), + [&](const RuntimePointerCheck &Check) { + PointerBounds First = expandBounds(Check.first, L, Loc, Exp), + Second = expandBounds(Check.second, L, Loc, Exp); + return std::make_pair(First, Second); + }); + + return ChecksWithBounds; +} + +Value *llvm::addRuntimeChecks( + Instruction *Loc, Loop *TheLoop, + const SmallVectorImpl<RuntimePointerCheck> &PointerChecks, + SCEVExpander &Exp) { + // TODO: Move noalias annotation code from LoopVersioning here and share with LV if possible. + // TODO: Pass RtPtrChecking instead of PointerChecks and SE separately, if possible + auto ExpandedChecks = expandBounds(PointerChecks, TheLoop, Loc, Exp); + + LLVMContext &Ctx = Loc->getContext(); + IRBuilder<InstSimplifyFolder> ChkBuilder(Ctx, + Loc->getModule()->getDataLayout()); + ChkBuilder.SetInsertPoint(Loc); + // Our instructions might fold to a constant. + Value *MemoryRuntimeCheck = nullptr; + + for (const auto &Check : ExpandedChecks) { + const PointerBounds &A = Check.first, &B = Check.second; + // Check if two pointers (A and B) conflict where conflict is computed as: + // start(A) <= end(B) && start(B) <= end(A) + unsigned AS0 = A.Start->getType()->getPointerAddressSpace(); + unsigned AS1 = B.Start->getType()->getPointerAddressSpace(); + + assert((AS0 == B.End->getType()->getPointerAddressSpace()) && + (AS1 == A.End->getType()->getPointerAddressSpace()) && + "Trying to bounds check pointers with different address spaces"); + + Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0); + Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1); + + Value *Start0 = ChkBuilder.CreateBitCast(A.Start, PtrArithTy0, "bc"); + Value *Start1 = ChkBuilder.CreateBitCast(B.Start, PtrArithTy1, "bc"); + Value *End0 = ChkBuilder.CreateBitCast(A.End, PtrArithTy1, "bc"); + Value *End1 = ChkBuilder.CreateBitCast(B.End, PtrArithTy0, "bc"); + + // [A|B].Start points to the first accessed byte under base [A|B]. + // [A|B].End points to the last accessed byte, plus one. + // There is no conflict when the intervals are disjoint: + // NoConflict = (B.Start >= A.End) || (A.Start >= B.End) + // + // bound0 = (B.Start < A.End) + // bound1 = (A.Start < B.End) + // IsConflict = bound0 & bound1 + Value *Cmp0 = ChkBuilder.CreateICmpULT(Start0, End1, "bound0"); + Value *Cmp1 = ChkBuilder.CreateICmpULT(Start1, End0, "bound1"); + Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict"); + if (MemoryRuntimeCheck) { + IsConflict = + ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx"); + } + MemoryRuntimeCheck = IsConflict; + } + + return MemoryRuntimeCheck; +} + +Value *llvm::addDiffRuntimeChecks( + Instruction *Loc, ArrayRef<PointerDiffInfo> Checks, SCEVExpander &Expander, + function_ref<Value *(IRBuilderBase &, unsigned)> GetVF, unsigned IC) { + + LLVMContext &Ctx = Loc->getContext(); + IRBuilder<InstSimplifyFolder> ChkBuilder(Ctx, + Loc->getModule()->getDataLayout()); + ChkBuilder.SetInsertPoint(Loc); + // Our instructions might fold to a constant. + Value *MemoryRuntimeCheck = nullptr; + + for (const auto &C : Checks) { + Type *Ty = C.SinkStart->getType(); + // Compute VF * IC * AccessSize. + auto *VFTimesUFTimesSize = + ChkBuilder.CreateMul(GetVF(ChkBuilder, Ty->getScalarSizeInBits()), + ConstantInt::get(Ty, IC * C.AccessSize)); + Value *Sink = Expander.expandCodeFor(C.SinkStart, Ty, Loc); + Value *Src = Expander.expandCodeFor(C.SrcStart, Ty, Loc); + if (C.NeedsFreeze) { + IRBuilder<> Builder(Loc); + Sink = Builder.CreateFreeze(Sink, Sink->getName() + ".fr"); + Src = Builder.CreateFreeze(Src, Src->getName() + ".fr"); + } + Value *Diff = ChkBuilder.CreateSub(Sink, Src); + Value *IsConflict = + ChkBuilder.CreateICmpULT(Diff, VFTimesUFTimesSize, "diff.check"); + + if (MemoryRuntimeCheck) { + IsConflict = + ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx"); + } + MemoryRuntimeCheck = IsConflict; + } + + return MemoryRuntimeCheck; +} + +std::optional<IVConditionInfo> +llvm::hasPartialIVCondition(const Loop &L, unsigned MSSAThreshold, + const MemorySSA &MSSA, AAResults &AA) { + auto *TI = dyn_cast<BranchInst>(L.getHeader()->getTerminator()); + if (!TI || !TI->isConditional()) + return {}; + + auto *CondI = dyn_cast<CmpInst>(TI->getCondition()); + // The case with the condition outside the loop should already be handled + // earlier. + if (!CondI || !L.contains(CondI)) + return {}; + + SmallVector<Instruction *> InstToDuplicate; + InstToDuplicate.push_back(CondI); + + SmallVector<Value *, 4> WorkList; + WorkList.append(CondI->op_begin(), CondI->op_end()); + + SmallVector<MemoryAccess *, 4> AccessesToCheck; + SmallVector<MemoryLocation, 4> AccessedLocs; + while (!WorkList.empty()) { + Instruction *I = dyn_cast<Instruction>(WorkList.pop_back_val()); + if (!I || !L.contains(I)) + continue; + + // TODO: support additional instructions. + if (!isa<LoadInst>(I) && !isa<GetElementPtrInst>(I)) + return {}; + + // Do not duplicate volatile and atomic loads. + if (auto *LI = dyn_cast<LoadInst>(I)) + if (LI->isVolatile() || LI->isAtomic()) + return {}; + + InstToDuplicate.push_back(I); + if (MemoryAccess *MA = MSSA.getMemoryAccess(I)) { + if (auto *MemUse = dyn_cast_or_null<MemoryUse>(MA)) { + // Queue the defining access to check for alias checks. + AccessesToCheck.push_back(MemUse->getDefiningAccess()); + AccessedLocs.push_back(MemoryLocation::get(I)); + } else { + // MemoryDefs may clobber the location or may be atomic memory + // operations. Bail out. + return {}; + } + } + WorkList.append(I->op_begin(), I->op_end()); + } + + if (InstToDuplicate.empty()) + return {}; + + SmallVector<BasicBlock *, 4> ExitingBlocks; + L.getExitingBlocks(ExitingBlocks); + auto HasNoClobbersOnPath = + [&L, &AA, &AccessedLocs, &ExitingBlocks, &InstToDuplicate, + MSSAThreshold](BasicBlock *Succ, BasicBlock *Header, + SmallVector<MemoryAccess *, 4> AccessesToCheck) + -> std::optional<IVConditionInfo> { + IVConditionInfo Info; + // First, collect all blocks in the loop that are on a patch from Succ + // to the header. + SmallVector<BasicBlock *, 4> WorkList; + WorkList.push_back(Succ); + WorkList.push_back(Header); + SmallPtrSet<BasicBlock *, 4> Seen; + Seen.insert(Header); + Info.PathIsNoop &= + all_of(*Header, [](Instruction &I) { return !I.mayHaveSideEffects(); }); + + while (!WorkList.empty()) { + BasicBlock *Current = WorkList.pop_back_val(); + if (!L.contains(Current)) + continue; + const auto &SeenIns = Seen.insert(Current); + if (!SeenIns.second) + continue; + + Info.PathIsNoop &= all_of( + *Current, [](Instruction &I) { return !I.mayHaveSideEffects(); }); + WorkList.append(succ_begin(Current), succ_end(Current)); + } + + // Require at least 2 blocks on a path through the loop. This skips + // paths that directly exit the loop. + if (Seen.size() < 2) + return {}; + + // Next, check if there are any MemoryDefs that are on the path through + // the loop (in the Seen set) and they may-alias any of the locations in + // AccessedLocs. If that is the case, they may modify the condition and + // partial unswitching is not possible. + SmallPtrSet<MemoryAccess *, 4> SeenAccesses; + while (!AccessesToCheck.empty()) { + MemoryAccess *Current = AccessesToCheck.pop_back_val(); + auto SeenI = SeenAccesses.insert(Current); + if (!SeenI.second || !Seen.contains(Current->getBlock())) + continue; + + // Bail out if exceeded the threshold. + if (SeenAccesses.size() >= MSSAThreshold) + return {}; + + // MemoryUse are read-only accesses. + if (isa<MemoryUse>(Current)) + continue; + + // For a MemoryDef, check if is aliases any of the location feeding + // the original condition. + if (auto *CurrentDef = dyn_cast<MemoryDef>(Current)) { + if (any_of(AccessedLocs, [&AA, CurrentDef](MemoryLocation &Loc) { + return isModSet( + AA.getModRefInfo(CurrentDef->getMemoryInst(), Loc)); + })) + return {}; + } + + for (Use &U : Current->uses()) + AccessesToCheck.push_back(cast<MemoryAccess>(U.getUser())); + } + + // We could also allow loops with known trip counts without mustprogress, + // but ScalarEvolution may not be available. + Info.PathIsNoop &= isMustProgress(&L); + + // If the path is considered a no-op so far, check if it reaches a + // single exit block without any phis. This ensures no values from the + // loop are used outside of the loop. + if (Info.PathIsNoop) { + for (auto *Exiting : ExitingBlocks) { + if (!Seen.contains(Exiting)) + continue; + for (auto *Succ : successors(Exiting)) { + if (L.contains(Succ)) + continue; + + Info.PathIsNoop &= Succ->phis().empty() && + (!Info.ExitForPath || Info.ExitForPath == Succ); + if (!Info.PathIsNoop) + break; + assert((!Info.ExitForPath || Info.ExitForPath == Succ) && + "cannot have multiple exit blocks"); + Info.ExitForPath = Succ; + } + } + } + if (!Info.ExitForPath) + Info.PathIsNoop = false; + + Info.InstToDuplicate = InstToDuplicate; + return Info; + }; + + // If we branch to the same successor, partial unswitching will not be + // beneficial. + if (TI->getSuccessor(0) == TI->getSuccessor(1)) + return {}; + + if (auto Info = HasNoClobbersOnPath(TI->getSuccessor(0), L.getHeader(), + AccessesToCheck)) { + Info->KnownValue = ConstantInt::getTrue(TI->getContext()); + return Info; + } + if (auto Info = HasNoClobbersOnPath(TI->getSuccessor(1), L.getHeader(), + AccessesToCheck)) { + Info->KnownValue = ConstantInt::getFalse(TI->getContext()); + return Info; + } + + return {}; +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/LoopVersioning.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/LoopVersioning.cpp new file mode 100644 index 0000000000..17e71cf5a6 --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/LoopVersioning.cpp @@ -0,0 +1,356 @@ +//===- LoopVersioning.cpp - Utility to version a loop ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a utility class to perform loop versioning. The versioned +// loop speculates that otherwise may-aliasing memory accesses don't overlap and +// emits checks to prove this. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/LoopVersioning.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/InstSimplifyFolder.h" +#include "llvm/Analysis/LoopAccessAnalysis.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/PassManager.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" + +using namespace llvm; + +static cl::opt<bool> + AnnotateNoAlias("loop-version-annotate-no-alias", cl::init(true), + cl::Hidden, + cl::desc("Add no-alias annotation for instructions that " + "are disambiguated by memchecks")); + +LoopVersioning::LoopVersioning(const LoopAccessInfo &LAI, + ArrayRef<RuntimePointerCheck> Checks, Loop *L, + LoopInfo *LI, DominatorTree *DT, + ScalarEvolution *SE) + : VersionedLoop(L), AliasChecks(Checks.begin(), Checks.end()), + Preds(LAI.getPSE().getPredicate()), LAI(LAI), LI(LI), DT(DT), + SE(SE) { +} + +void LoopVersioning::versionLoop( + const SmallVectorImpl<Instruction *> &DefsUsedOutside) { + assert(VersionedLoop->getUniqueExitBlock() && "No single exit block"); + assert(VersionedLoop->isLoopSimplifyForm() && + "Loop is not in loop-simplify form"); + + Value *MemRuntimeCheck; + Value *SCEVRuntimeCheck; + Value *RuntimeCheck = nullptr; + + // Add the memcheck in the original preheader (this is empty initially). + BasicBlock *RuntimeCheckBB = VersionedLoop->getLoopPreheader(); + const auto &RtPtrChecking = *LAI.getRuntimePointerChecking(); + + SCEVExpander Exp2(*RtPtrChecking.getSE(), + VersionedLoop->getHeader()->getModule()->getDataLayout(), + "induction"); + MemRuntimeCheck = addRuntimeChecks(RuntimeCheckBB->getTerminator(), + VersionedLoop, AliasChecks, Exp2); + + SCEVExpander Exp(*SE, RuntimeCheckBB->getModule()->getDataLayout(), + "scev.check"); + SCEVRuntimeCheck = + Exp.expandCodeForPredicate(&Preds, RuntimeCheckBB->getTerminator()); + + IRBuilder<InstSimplifyFolder> Builder( + RuntimeCheckBB->getContext(), + InstSimplifyFolder(RuntimeCheckBB->getModule()->getDataLayout())); + if (MemRuntimeCheck && SCEVRuntimeCheck) { + Builder.SetInsertPoint(RuntimeCheckBB->getTerminator()); + RuntimeCheck = + Builder.CreateOr(MemRuntimeCheck, SCEVRuntimeCheck, "lver.safe"); + } else + RuntimeCheck = MemRuntimeCheck ? MemRuntimeCheck : SCEVRuntimeCheck; + + assert(RuntimeCheck && "called even though we don't need " + "any runtime checks"); + + // Rename the block to make the IR more readable. + RuntimeCheckBB->setName(VersionedLoop->getHeader()->getName() + + ".lver.check"); + + // Create empty preheader for the loop (and after cloning for the + // non-versioned loop). + BasicBlock *PH = + SplitBlock(RuntimeCheckBB, RuntimeCheckBB->getTerminator(), DT, LI, + nullptr, VersionedLoop->getHeader()->getName() + ".ph"); + + // Clone the loop including the preheader. + // + // FIXME: This does not currently preserve SimplifyLoop because the exit + // block is a join between the two loops. + SmallVector<BasicBlock *, 8> NonVersionedLoopBlocks; + NonVersionedLoop = + cloneLoopWithPreheader(PH, RuntimeCheckBB, VersionedLoop, VMap, + ".lver.orig", LI, DT, NonVersionedLoopBlocks); + remapInstructionsInBlocks(NonVersionedLoopBlocks, VMap); + + // Insert the conditional branch based on the result of the memchecks. + Instruction *OrigTerm = RuntimeCheckBB->getTerminator(); + Builder.SetInsertPoint(OrigTerm); + Builder.CreateCondBr(RuntimeCheck, NonVersionedLoop->getLoopPreheader(), + VersionedLoop->getLoopPreheader()); + OrigTerm->eraseFromParent(); + + // The loops merge in the original exit block. This is now dominated by the + // memchecking block. + DT->changeImmediateDominator(VersionedLoop->getExitBlock(), RuntimeCheckBB); + + // Adds the necessary PHI nodes for the versioned loops based on the + // loop-defined values used outside of the loop. + addPHINodes(DefsUsedOutside); + formDedicatedExitBlocks(NonVersionedLoop, DT, LI, nullptr, true); + formDedicatedExitBlocks(VersionedLoop, DT, LI, nullptr, true); + assert(NonVersionedLoop->isLoopSimplifyForm() && + VersionedLoop->isLoopSimplifyForm() && + "The versioned loops should be in simplify form."); +} + +void LoopVersioning::addPHINodes( + const SmallVectorImpl<Instruction *> &DefsUsedOutside) { + BasicBlock *PHIBlock = VersionedLoop->getExitBlock(); + assert(PHIBlock && "No single successor to loop exit block"); + PHINode *PN; + + // First add a single-operand PHI for each DefsUsedOutside if one does not + // exists yet. + for (auto *Inst : DefsUsedOutside) { + // See if we have a single-operand PHI with the value defined by the + // original loop. + for (auto I = PHIBlock->begin(); (PN = dyn_cast<PHINode>(I)); ++I) { + if (PN->getIncomingValue(0) == Inst) { + SE->forgetValue(PN); + break; + } + } + // If not create it. + if (!PN) { + PN = PHINode::Create(Inst->getType(), 2, Inst->getName() + ".lver", + &PHIBlock->front()); + SmallVector<User*, 8> UsersToUpdate; + for (User *U : Inst->users()) + if (!VersionedLoop->contains(cast<Instruction>(U)->getParent())) + UsersToUpdate.push_back(U); + for (User *U : UsersToUpdate) + U->replaceUsesOfWith(Inst, PN); + PN->addIncoming(Inst, VersionedLoop->getExitingBlock()); + } + } + + // Then for each PHI add the operand for the edge from the cloned loop. + for (auto I = PHIBlock->begin(); (PN = dyn_cast<PHINode>(I)); ++I) { + assert(PN->getNumOperands() == 1 && + "Exit block should only have on predecessor"); + + // If the definition was cloned used that otherwise use the same value. + Value *ClonedValue = PN->getIncomingValue(0); + auto Mapped = VMap.find(ClonedValue); + if (Mapped != VMap.end()) + ClonedValue = Mapped->second; + + PN->addIncoming(ClonedValue, NonVersionedLoop->getExitingBlock()); + } +} + +void LoopVersioning::prepareNoAliasMetadata() { + // We need to turn the no-alias relation between pointer checking groups into + // no-aliasing annotations between instructions. + // + // We accomplish this by mapping each pointer checking group (a set of + // pointers memchecked together) to an alias scope and then also mapping each + // group to the list of scopes it can't alias. + + const RuntimePointerChecking *RtPtrChecking = LAI.getRuntimePointerChecking(); + LLVMContext &Context = VersionedLoop->getHeader()->getContext(); + + // First allocate an aliasing scope for each pointer checking group. + // + // While traversing through the checking groups in the loop, also create a + // reverse map from pointers to the pointer checking group they were assigned + // to. + MDBuilder MDB(Context); + MDNode *Domain = MDB.createAnonymousAliasScopeDomain("LVerDomain"); + + for (const auto &Group : RtPtrChecking->CheckingGroups) { + GroupToScope[&Group] = MDB.createAnonymousAliasScope(Domain); + + for (unsigned PtrIdx : Group.Members) + PtrToGroup[RtPtrChecking->getPointerInfo(PtrIdx).PointerValue] = &Group; + } + + // Go through the checks and for each pointer group, collect the scopes for + // each non-aliasing pointer group. + DenseMap<const RuntimeCheckingPtrGroup *, SmallVector<Metadata *, 4>> + GroupToNonAliasingScopes; + + for (const auto &Check : AliasChecks) + GroupToNonAliasingScopes[Check.first].push_back(GroupToScope[Check.second]); + + // Finally, transform the above to actually map to scope list which is what + // the metadata uses. + + for (auto Pair : GroupToNonAliasingScopes) + GroupToNonAliasingScopeList[Pair.first] = MDNode::get(Context, Pair.second); +} + +void LoopVersioning::annotateLoopWithNoAlias() { + if (!AnnotateNoAlias) + return; + + // First prepare the maps. + prepareNoAliasMetadata(); + + // Add the scope and no-alias metadata to the instructions. + for (Instruction *I : LAI.getDepChecker().getMemoryInstructions()) { + annotateInstWithNoAlias(I); + } +} + +void LoopVersioning::annotateInstWithNoAlias(Instruction *VersionedInst, + const Instruction *OrigInst) { + if (!AnnotateNoAlias) + return; + + LLVMContext &Context = VersionedLoop->getHeader()->getContext(); + const Value *Ptr = isa<LoadInst>(OrigInst) + ? cast<LoadInst>(OrigInst)->getPointerOperand() + : cast<StoreInst>(OrigInst)->getPointerOperand(); + + // Find the group for the pointer and then add the scope metadata. + auto Group = PtrToGroup.find(Ptr); + if (Group != PtrToGroup.end()) { + VersionedInst->setMetadata( + LLVMContext::MD_alias_scope, + MDNode::concatenate( + VersionedInst->getMetadata(LLVMContext::MD_alias_scope), + MDNode::get(Context, GroupToScope[Group->second]))); + + // Add the no-alias metadata. + auto NonAliasingScopeList = GroupToNonAliasingScopeList.find(Group->second); + if (NonAliasingScopeList != GroupToNonAliasingScopeList.end()) + VersionedInst->setMetadata( + LLVMContext::MD_noalias, + MDNode::concatenate( + VersionedInst->getMetadata(LLVMContext::MD_noalias), + NonAliasingScopeList->second)); + } +} + +namespace { +bool runImpl(LoopInfo *LI, LoopAccessInfoManager &LAIs, DominatorTree *DT, + ScalarEvolution *SE) { + // Build up a worklist of inner-loops to version. This is necessary as the + // act of versioning a loop creates new loops and can invalidate iterators + // across the loops. + SmallVector<Loop *, 8> Worklist; + + for (Loop *TopLevelLoop : *LI) + for (Loop *L : depth_first(TopLevelLoop)) + // We only handle inner-most loops. + if (L->isInnermost()) + Worklist.push_back(L); + + // Now walk the identified inner loops. + bool Changed = false; + for (Loop *L : Worklist) { + if (!L->isLoopSimplifyForm() || !L->isRotatedForm() || + !L->getExitingBlock()) + continue; + const LoopAccessInfo &LAI = LAIs.getInfo(*L); + if (!LAI.hasConvergentOp() && + (LAI.getNumRuntimePointerChecks() || + !LAI.getPSE().getPredicate().isAlwaysTrue())) { + LoopVersioning LVer(LAI, LAI.getRuntimePointerChecking()->getChecks(), L, + LI, DT, SE); + LVer.versionLoop(); + LVer.annotateLoopWithNoAlias(); + Changed = true; + LAIs.clear(); + } + } + + return Changed; +} + +/// Also expose this is a pass. Currently this is only used for +/// unit-testing. It adds all memchecks necessary to remove all may-aliasing +/// array accesses from the loop. +class LoopVersioningLegacyPass : public FunctionPass { +public: + LoopVersioningLegacyPass() : FunctionPass(ID) { + initializeLoopVersioningLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override { + auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); + auto &LAIs = getAnalysis<LoopAccessLegacyAnalysis>().getLAIs(); + auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); + + return runImpl(LI, LAIs, DT, SE); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<LoopInfoWrapperPass>(); + AU.addPreserved<LoopInfoWrapperPass>(); + AU.addRequired<LoopAccessLegacyAnalysis>(); + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addPreserved<DominatorTreeWrapperPass>(); + AU.addRequired<ScalarEvolutionWrapperPass>(); + } + + static char ID; +}; +} + +#define LVER_OPTION "loop-versioning" +#define DEBUG_TYPE LVER_OPTION + +char LoopVersioningLegacyPass::ID; +static const char LVer_name[] = "Loop Versioning"; + +INITIALIZE_PASS_BEGIN(LoopVersioningLegacyPass, LVER_OPTION, LVer_name, false, + false) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopAccessLegacyAnalysis) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) +INITIALIZE_PASS_END(LoopVersioningLegacyPass, LVER_OPTION, LVer_name, false, + false) + +namespace llvm { +FunctionPass *createLoopVersioningLegacyPass() { + return new LoopVersioningLegacyPass(); +} + +PreservedAnalyses LoopVersioningPass::run(Function &F, + FunctionAnalysisManager &AM) { + auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F); + auto &LI = AM.getResult<LoopAnalysis>(F); + LoopAccessInfoManager &LAIs = AM.getResult<LoopAccessAnalysis>(F); + auto &DT = AM.getResult<DominatorTreeAnalysis>(F); + + if (runImpl(&LI, LAIs, &DT, &SE)) + return PreservedAnalyses::none(); + return PreservedAnalyses::all(); +} +} // namespace llvm diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/LowerAtomic.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/LowerAtomic.cpp new file mode 100644 index 0000000000..b6f40de0da --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/LowerAtomic.cpp @@ -0,0 +1,114 @@ +//===- LowerAtomic.cpp - Lower atomic intrinsics --------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass lowers atomic intrinsics to non-atomic form for use in a known +// non-preemptible environment. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/LowerAtomic.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +using namespace llvm; + +#define DEBUG_TYPE "loweratomic" + +bool llvm::lowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI) { + IRBuilder<> Builder(CXI); + Value *Ptr = CXI->getPointerOperand(); + Value *Cmp = CXI->getCompareOperand(); + Value *Val = CXI->getNewValOperand(); + + LoadInst *Orig = Builder.CreateLoad(Val->getType(), Ptr); + Value *Equal = Builder.CreateICmpEQ(Orig, Cmp); + Value *Res = Builder.CreateSelect(Equal, Val, Orig); + Builder.CreateStore(Res, Ptr); + + Res = Builder.CreateInsertValue(PoisonValue::get(CXI->getType()), Orig, 0); + Res = Builder.CreateInsertValue(Res, Equal, 1); + + CXI->replaceAllUsesWith(Res); + CXI->eraseFromParent(); + return true; +} + +Value *llvm::buildAtomicRMWValue(AtomicRMWInst::BinOp Op, + IRBuilderBase &Builder, Value *Loaded, + Value *Val) { + Value *NewVal; + switch (Op) { + case AtomicRMWInst::Xchg: + return Val; + case AtomicRMWInst::Add: + return Builder.CreateAdd(Loaded, Val, "new"); + case AtomicRMWInst::Sub: + return Builder.CreateSub(Loaded, Val, "new"); + case AtomicRMWInst::And: + return Builder.CreateAnd(Loaded, Val, "new"); + case AtomicRMWInst::Nand: + return Builder.CreateNot(Builder.CreateAnd(Loaded, Val), "new"); + case AtomicRMWInst::Or: + return Builder.CreateOr(Loaded, Val, "new"); + case AtomicRMWInst::Xor: + return Builder.CreateXor(Loaded, Val, "new"); + case AtomicRMWInst::Max: + NewVal = Builder.CreateICmpSGT(Loaded, Val); + return Builder.CreateSelect(NewVal, Loaded, Val, "new"); + case AtomicRMWInst::Min: + NewVal = Builder.CreateICmpSLE(Loaded, Val); + return Builder.CreateSelect(NewVal, Loaded, Val, "new"); + case AtomicRMWInst::UMax: + NewVal = Builder.CreateICmpUGT(Loaded, Val); + return Builder.CreateSelect(NewVal, Loaded, Val, "new"); + case AtomicRMWInst::UMin: + NewVal = Builder.CreateICmpULE(Loaded, Val); + return Builder.CreateSelect(NewVal, Loaded, Val, "new"); + case AtomicRMWInst::FAdd: + return Builder.CreateFAdd(Loaded, Val, "new"); + case AtomicRMWInst::FSub: + return Builder.CreateFSub(Loaded, Val, "new"); + case AtomicRMWInst::FMax: + return Builder.CreateMaxNum(Loaded, Val); + case AtomicRMWInst::FMin: + return Builder.CreateMinNum(Loaded, Val); + case AtomicRMWInst::UIncWrap: { + Constant *One = ConstantInt::get(Loaded->getType(), 1); + Value *Inc = Builder.CreateAdd(Loaded, One); + Value *Cmp = Builder.CreateICmpUGE(Loaded, Val); + Constant *Zero = ConstantInt::get(Loaded->getType(), 0); + return Builder.CreateSelect(Cmp, Zero, Inc, "new"); + } + case AtomicRMWInst::UDecWrap: { + Constant *Zero = ConstantInt::get(Loaded->getType(), 0); + Constant *One = ConstantInt::get(Loaded->getType(), 1); + + Value *Dec = Builder.CreateSub(Loaded, One); + Value *CmpEq0 = Builder.CreateICmpEQ(Loaded, Zero); + Value *CmpOldGtVal = Builder.CreateICmpUGT(Loaded, Val); + Value *Or = Builder.CreateOr(CmpEq0, CmpOldGtVal); + return Builder.CreateSelect(Or, Val, Dec, "new"); + } + default: + llvm_unreachable("Unknown atomic op"); + } +} + +bool llvm::lowerAtomicRMWInst(AtomicRMWInst *RMWI) { + IRBuilder<> Builder(RMWI); + Value *Ptr = RMWI->getPointerOperand(); + Value *Val = RMWI->getValOperand(); + + LoadInst *Orig = Builder.CreateLoad(Val->getType(), Ptr); + Value *Res = buildAtomicRMWValue(RMWI->getOperation(), Builder, Orig, Val); + Builder.CreateStore(Res, Ptr); + RMWI->replaceAllUsesWith(Orig); + RMWI->eraseFromParent(); + return true; +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/LowerGlobalDtors.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/LowerGlobalDtors.cpp new file mode 100644 index 0000000000..195c274ff1 --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/LowerGlobalDtors.cpp @@ -0,0 +1,221 @@ +//===-- LowerGlobalDtors.cpp - Lower @llvm.global_dtors -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Lower @llvm.global_dtors. +/// +/// Implement @llvm.global_dtors by creating wrapper functions that are +/// registered in @llvm.global_ctors and which contain a call to +/// `__cxa_atexit` to register their destructor functions. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/LowerGlobalDtors.h" + +#include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/Utils.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" +#include <map> + +using namespace llvm; + +#define DEBUG_TYPE "lower-global-dtors" + +namespace { +class LowerGlobalDtorsLegacyPass final : public ModulePass { + StringRef getPassName() const override { + return "Lower @llvm.global_dtors via `__cxa_atexit`"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + ModulePass::getAnalysisUsage(AU); + } + + bool runOnModule(Module &M) override; + +public: + static char ID; + LowerGlobalDtorsLegacyPass() : ModulePass(ID) { + initializeLowerGlobalDtorsLegacyPassPass(*PassRegistry::getPassRegistry()); + } +}; +} // End anonymous namespace + +char LowerGlobalDtorsLegacyPass::ID = 0; +INITIALIZE_PASS(LowerGlobalDtorsLegacyPass, DEBUG_TYPE, + "Lower @llvm.global_dtors via `__cxa_atexit`", false, false) + +ModulePass *llvm::createLowerGlobalDtorsLegacyPass() { + return new LowerGlobalDtorsLegacyPass(); +} + +static bool runImpl(Module &M); +bool LowerGlobalDtorsLegacyPass::runOnModule(Module &M) { return runImpl(M); } + +PreservedAnalyses LowerGlobalDtorsPass::run(Module &M, + ModuleAnalysisManager &AM) { + bool Changed = runImpl(M); + if (!Changed) + return PreservedAnalyses::all(); + + PreservedAnalyses PA; + PA.preserveSet<CFGAnalyses>(); + return PA; +} + +static bool runImpl(Module &M) { + GlobalVariable *GV = M.getGlobalVariable("llvm.global_dtors"); + if (!GV || !GV->hasInitializer()) + return false; + + const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer()); + if (!InitList) + return false; + + // Validate @llvm.global_dtor's type. + auto *ETy = dyn_cast<StructType>(InitList->getType()->getElementType()); + if (!ETy || ETy->getNumElements() != 3 || + !ETy->getTypeAtIndex(0U)->isIntegerTy() || + !ETy->getTypeAtIndex(1U)->isPointerTy() || + !ETy->getTypeAtIndex(2U)->isPointerTy()) + return false; // Not (int, ptr, ptr). + + // Collect the contents of @llvm.global_dtors, ordered by priority. Within a + // priority, sequences of destructors with the same associated object are + // recorded so that we can register them as a group. + std::map< + uint16_t, + std::vector<std::pair<Constant *, std::vector<Constant *>>> + > DtorFuncs; + for (Value *O : InitList->operands()) { + auto *CS = dyn_cast<ConstantStruct>(O); + if (!CS) + continue; // Malformed. + + auto *Priority = dyn_cast<ConstantInt>(CS->getOperand(0)); + if (!Priority) + continue; // Malformed. + uint16_t PriorityValue = Priority->getLimitedValue(UINT16_MAX); + + Constant *DtorFunc = CS->getOperand(1); + if (DtorFunc->isNullValue()) + break; // Found a null terminator, skip the rest. + + Constant *Associated = CS->getOperand(2); + Associated = cast<Constant>(Associated->stripPointerCasts()); + + auto &AtThisPriority = DtorFuncs[PriorityValue]; + if (AtThisPriority.empty() || AtThisPriority.back().first != Associated) { + std::vector<Constant *> NewList; + NewList.push_back(DtorFunc); + AtThisPriority.push_back(std::make_pair(Associated, NewList)); + } else { + AtThisPriority.back().second.push_back(DtorFunc); + } + } + if (DtorFuncs.empty()) + return false; + + // extern "C" int __cxa_atexit(void (*f)(void *), void *p, void *d); + LLVMContext &C = M.getContext(); + PointerType *VoidStar = Type::getInt8PtrTy(C); + Type *AtExitFuncArgs[] = {VoidStar}; + FunctionType *AtExitFuncTy = + FunctionType::get(Type::getVoidTy(C), AtExitFuncArgs, + /*isVarArg=*/false); + + FunctionCallee AtExit = M.getOrInsertFunction( + "__cxa_atexit", + FunctionType::get(Type::getInt32Ty(C), + {PointerType::get(AtExitFuncTy, 0), VoidStar, VoidStar}, + /*isVarArg=*/false)); + + // Declare __dso_local. + Type *DsoHandleTy = Type::getInt8Ty(C); + Constant *DsoHandle = M.getOrInsertGlobal("__dso_handle", DsoHandleTy, [&] { + auto *GV = new GlobalVariable(M, DsoHandleTy, /*isConstant=*/true, + GlobalVariable::ExternalWeakLinkage, nullptr, + "__dso_handle"); + GV->setVisibility(GlobalVariable::HiddenVisibility); + return GV; + }); + + // For each unique priority level and associated symbol, generate a function + // to call all the destructors at that level, and a function to register the + // first function with __cxa_atexit. + for (auto &PriorityAndMore : DtorFuncs) { + uint16_t Priority = PriorityAndMore.first; + uint64_t Id = 0; + auto &AtThisPriority = PriorityAndMore.second; + for (auto &AssociatedAndMore : AtThisPriority) { + Constant *Associated = AssociatedAndMore.first; + auto ThisId = Id++; + + Function *CallDtors = Function::Create( + AtExitFuncTy, Function::PrivateLinkage, + "call_dtors" + + (Priority != UINT16_MAX ? (Twine(".") + Twine(Priority)) + : Twine()) + + (AtThisPriority.size() > 1 ? Twine("$") + Twine(ThisId) + : Twine()) + + (!Associated->isNullValue() ? (Twine(".") + Associated->getName()) + : Twine()), + &M); + BasicBlock *BB = BasicBlock::Create(C, "body", CallDtors); + FunctionType *VoidVoid = FunctionType::get(Type::getVoidTy(C), + /*isVarArg=*/false); + + for (auto *Dtor : reverse(AssociatedAndMore.second)) + CallInst::Create(VoidVoid, Dtor, "", BB); + ReturnInst::Create(C, BB); + + Function *RegisterCallDtors = Function::Create( + VoidVoid, Function::PrivateLinkage, + "register_call_dtors" + + (Priority != UINT16_MAX ? (Twine(".") + Twine(Priority)) + : Twine()) + + (AtThisPriority.size() > 1 ? Twine("$") + Twine(ThisId) + : Twine()) + + (!Associated->isNullValue() ? (Twine(".") + Associated->getName()) + : Twine()), + &M); + BasicBlock *EntryBB = BasicBlock::Create(C, "entry", RegisterCallDtors); + BasicBlock *FailBB = BasicBlock::Create(C, "fail", RegisterCallDtors); + BasicBlock *RetBB = BasicBlock::Create(C, "return", RegisterCallDtors); + + Value *Null = ConstantPointerNull::get(VoidStar); + Value *Args[] = {CallDtors, Null, DsoHandle}; + Value *Res = CallInst::Create(AtExit, Args, "call", EntryBB); + Value *Cmp = new ICmpInst(*EntryBB, ICmpInst::ICMP_NE, Res, + Constant::getNullValue(Res->getType())); + BranchInst::Create(FailBB, RetBB, Cmp, EntryBB); + + // If `__cxa_atexit` hits out-of-memory, trap, so that we don't misbehave. + // This should be very rare, because if the process is running out of + // memory before main has even started, something is wrong. + CallInst::Create(Intrinsic::getDeclaration(&M, Intrinsic::trap), "", + FailBB); + new UnreachableInst(C, FailBB); + + ReturnInst::Create(C, RetBB); + + // Now register the registration function with @llvm.global_ctors. + appendToGlobalCtors(M, RegisterCallDtors, Priority, Associated); + } + } + + // Now that we've lowered everything, remove @llvm.global_dtors. + GV->eraseFromParent(); + + return true; +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/LowerIFunc.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/LowerIFunc.cpp new file mode 100644 index 0000000000..18ae0bbe2e --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/LowerIFunc.cpp @@ -0,0 +1,27 @@ +//===- LowerIFunc.cpp -----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements replacing calls to ifuncs by introducing indirect calls. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/LowerIFunc.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" + +using namespace llvm; + +/// Replace all call users of ifuncs in the module. +PreservedAnalyses LowerIFuncPass::run(Module &M, ModuleAnalysisManager &AM) { + if (M.ifunc_empty()) + return PreservedAnalyses::all(); + + lowerGlobalIFuncUsersAsGlobalCtor(M, {}); + return PreservedAnalyses::none(); +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/LowerInvoke.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/LowerInvoke.cpp new file mode 100644 index 0000000000..6d788857c1 --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/LowerInvoke.cpp @@ -0,0 +1,95 @@ +//===- LowerInvoke.cpp - Eliminate Invoke instructions --------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This transformation is designed for use by code generators which do not yet +// support stack unwinding. This pass converts 'invoke' instructions to 'call' +// instructions, so that any exception-handling 'landingpad' blocks become dead +// code (which can be removed by running the '-simplifycfg' pass afterwards). +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/LowerInvoke.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/IR/Instructions.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/Utils.h" +using namespace llvm; + +#define DEBUG_TYPE "lowerinvoke" + +STATISTIC(NumInvokes, "Number of invokes replaced"); + +namespace { + class LowerInvokeLegacyPass : public FunctionPass { + public: + static char ID; // Pass identification, replacement for typeid + explicit LowerInvokeLegacyPass() : FunctionPass(ID) { + initializeLowerInvokeLegacyPassPass(*PassRegistry::getPassRegistry()); + } + bool runOnFunction(Function &F) override; + }; +} + +char LowerInvokeLegacyPass::ID = 0; +INITIALIZE_PASS(LowerInvokeLegacyPass, "lowerinvoke", + "Lower invoke and unwind, for unwindless code generators", + false, false) + +static bool runImpl(Function &F) { + bool Changed = false; + for (BasicBlock &BB : F) + if (InvokeInst *II = dyn_cast<InvokeInst>(BB.getTerminator())) { + SmallVector<Value *, 16> CallArgs(II->args()); + SmallVector<OperandBundleDef, 1> OpBundles; + II->getOperandBundlesAsDefs(OpBundles); + // Insert a normal call instruction... + CallInst *NewCall = + CallInst::Create(II->getFunctionType(), II->getCalledOperand(), + CallArgs, OpBundles, "", II); + NewCall->takeName(II); + NewCall->setCallingConv(II->getCallingConv()); + NewCall->setAttributes(II->getAttributes()); + NewCall->setDebugLoc(II->getDebugLoc()); + II->replaceAllUsesWith(NewCall); + + // Insert an unconditional branch to the normal destination. + BranchInst::Create(II->getNormalDest(), II); + + // Remove any PHI node entries from the exception destination. + II->getUnwindDest()->removePredecessor(&BB); + + // Remove the invoke instruction now. + II->eraseFromParent(); + + ++NumInvokes; + Changed = true; + } + return Changed; +} + +bool LowerInvokeLegacyPass::runOnFunction(Function &F) { + return runImpl(F); +} + +namespace llvm { +char &LowerInvokePassID = LowerInvokeLegacyPass::ID; + +// Public Interface To the LowerInvoke pass. +FunctionPass *createLowerInvokePass() { return new LowerInvokeLegacyPass(); } + +PreservedAnalyses LowerInvokePass::run(Function &F, + FunctionAnalysisManager &AM) { + bool Changed = runImpl(F); + if (!Changed) + return PreservedAnalyses::all(); + + return PreservedAnalyses::none(); +} +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/LowerMemIntrinsics.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/LowerMemIntrinsics.cpp new file mode 100644 index 0000000000..165740b552 --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/LowerMemIntrinsics.cpp @@ -0,0 +1,605 @@ +//===- LowerMemIntrinsics.cpp ----------------------------------*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/LowerMemIntrinsics.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include <optional> + +using namespace llvm; + +void llvm::createMemCpyLoopKnownSize( + Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, + ConstantInt *CopyLen, Align SrcAlign, Align DstAlign, bool SrcIsVolatile, + bool DstIsVolatile, bool CanOverlap, const TargetTransformInfo &TTI, + std::optional<uint32_t> AtomicElementSize) { + // No need to expand zero length copies. + if (CopyLen->isZero()) + return; + + BasicBlock *PreLoopBB = InsertBefore->getParent(); + BasicBlock *PostLoopBB = nullptr; + Function *ParentFunc = PreLoopBB->getParent(); + LLVMContext &Ctx = PreLoopBB->getContext(); + const DataLayout &DL = ParentFunc->getParent()->getDataLayout(); + MDBuilder MDB(Ctx); + MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain("MemCopyDomain"); + StringRef Name = "MemCopyAliasScope"; + MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name); + + unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace(); + unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace(); + + Type *TypeOfCopyLen = CopyLen->getType(); + Type *LoopOpType = TTI.getMemcpyLoopLoweringType( + Ctx, CopyLen, SrcAS, DstAS, SrcAlign.value(), DstAlign.value(), + AtomicElementSize); + assert((!AtomicElementSize || !LoopOpType->isVectorTy()) && + "Atomic memcpy lowering is not supported for vector operand type"); + + unsigned LoopOpSize = DL.getTypeStoreSize(LoopOpType); + assert((!AtomicElementSize || LoopOpSize % *AtomicElementSize == 0) && + "Atomic memcpy lowering is not supported for selected operand size"); + + uint64_t LoopEndCount = CopyLen->getZExtValue() / LoopOpSize; + + if (LoopEndCount != 0) { + // Split + PostLoopBB = PreLoopBB->splitBasicBlock(InsertBefore, "memcpy-split"); + BasicBlock *LoopBB = + BasicBlock::Create(Ctx, "load-store-loop", ParentFunc, PostLoopBB); + PreLoopBB->getTerminator()->setSuccessor(0, LoopBB); + + IRBuilder<> PLBuilder(PreLoopBB->getTerminator()); + + // Cast the Src and Dst pointers to pointers to the loop operand type (if + // needed). + PointerType *SrcOpType = PointerType::get(LoopOpType, SrcAS); + PointerType *DstOpType = PointerType::get(LoopOpType, DstAS); + if (SrcAddr->getType() != SrcOpType) { + SrcAddr = PLBuilder.CreateBitCast(SrcAddr, SrcOpType); + } + if (DstAddr->getType() != DstOpType) { + DstAddr = PLBuilder.CreateBitCast(DstAddr, DstOpType); + } + + Align PartDstAlign(commonAlignment(DstAlign, LoopOpSize)); + Align PartSrcAlign(commonAlignment(SrcAlign, LoopOpSize)); + + IRBuilder<> LoopBuilder(LoopBB); + PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 2, "loop-index"); + LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0U), PreLoopBB); + // Loop Body + Value *SrcGEP = + LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex); + LoadInst *Load = LoopBuilder.CreateAlignedLoad(LoopOpType, SrcGEP, + PartSrcAlign, SrcIsVolatile); + if (!CanOverlap) { + // Set alias scope for loads. + Load->setMetadata(LLVMContext::MD_alias_scope, + MDNode::get(Ctx, NewScope)); + } + Value *DstGEP = + LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex); + StoreInst *Store = LoopBuilder.CreateAlignedStore( + Load, DstGEP, PartDstAlign, DstIsVolatile); + if (!CanOverlap) { + // Indicate that stores don't overlap loads. + Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope)); + } + if (AtomicElementSize) { + Load->setAtomic(AtomicOrdering::Unordered); + Store->setAtomic(AtomicOrdering::Unordered); + } + Value *NewIndex = + LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1U)); + LoopIndex->addIncoming(NewIndex, LoopBB); + + // Create the loop branch condition. + Constant *LoopEndCI = ConstantInt::get(TypeOfCopyLen, LoopEndCount); + LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, LoopEndCI), + LoopBB, PostLoopBB); + } + + uint64_t BytesCopied = LoopEndCount * LoopOpSize; + uint64_t RemainingBytes = CopyLen->getZExtValue() - BytesCopied; + if (RemainingBytes) { + IRBuilder<> RBuilder(PostLoopBB ? PostLoopBB->getFirstNonPHI() + : InsertBefore); + + SmallVector<Type *, 5> RemainingOps; + TTI.getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes, + SrcAS, DstAS, SrcAlign.value(), + DstAlign.value(), AtomicElementSize); + + for (auto *OpTy : RemainingOps) { + Align PartSrcAlign(commonAlignment(SrcAlign, BytesCopied)); + Align PartDstAlign(commonAlignment(DstAlign, BytesCopied)); + + // Calculate the new index + unsigned OperandSize = DL.getTypeStoreSize(OpTy); + assert( + (!AtomicElementSize || OperandSize % *AtomicElementSize == 0) && + "Atomic memcpy lowering is not supported for selected operand size"); + + uint64_t GepIndex = BytesCopied / OperandSize; + assert(GepIndex * OperandSize == BytesCopied && + "Division should have no Remainder!"); + // Cast source to operand type and load + PointerType *SrcPtrType = PointerType::get(OpTy, SrcAS); + Value *CastedSrc = SrcAddr->getType() == SrcPtrType + ? SrcAddr + : RBuilder.CreateBitCast(SrcAddr, SrcPtrType); + Value *SrcGEP = RBuilder.CreateInBoundsGEP( + OpTy, CastedSrc, ConstantInt::get(TypeOfCopyLen, GepIndex)); + LoadInst *Load = + RBuilder.CreateAlignedLoad(OpTy, SrcGEP, PartSrcAlign, SrcIsVolatile); + if (!CanOverlap) { + // Set alias scope for loads. + Load->setMetadata(LLVMContext::MD_alias_scope, + MDNode::get(Ctx, NewScope)); + } + // Cast destination to operand type and store. + PointerType *DstPtrType = PointerType::get(OpTy, DstAS); + Value *CastedDst = DstAddr->getType() == DstPtrType + ? DstAddr + : RBuilder.CreateBitCast(DstAddr, DstPtrType); + Value *DstGEP = RBuilder.CreateInBoundsGEP( + OpTy, CastedDst, ConstantInt::get(TypeOfCopyLen, GepIndex)); + StoreInst *Store = RBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, + DstIsVolatile); + if (!CanOverlap) { + // Indicate that stores don't overlap loads. + Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope)); + } + if (AtomicElementSize) { + Load->setAtomic(AtomicOrdering::Unordered); + Store->setAtomic(AtomicOrdering::Unordered); + } + BytesCopied += OperandSize; + } + } + assert(BytesCopied == CopyLen->getZExtValue() && + "Bytes copied should match size in the call!"); +} + +void llvm::createMemCpyLoopUnknownSize( + Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, Value *CopyLen, + Align SrcAlign, Align DstAlign, bool SrcIsVolatile, bool DstIsVolatile, + bool CanOverlap, const TargetTransformInfo &TTI, + std::optional<uint32_t> AtomicElementSize) { + BasicBlock *PreLoopBB = InsertBefore->getParent(); + BasicBlock *PostLoopBB = + PreLoopBB->splitBasicBlock(InsertBefore, "post-loop-memcpy-expansion"); + + Function *ParentFunc = PreLoopBB->getParent(); + const DataLayout &DL = ParentFunc->getParent()->getDataLayout(); + LLVMContext &Ctx = PreLoopBB->getContext(); + MDBuilder MDB(Ctx); + MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain("MemCopyDomain"); + StringRef Name = "MemCopyAliasScope"; + MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name); + + unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace(); + unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace(); + + Type *LoopOpType = TTI.getMemcpyLoopLoweringType( + Ctx, CopyLen, SrcAS, DstAS, SrcAlign.value(), DstAlign.value(), + AtomicElementSize); + assert((!AtomicElementSize || !LoopOpType->isVectorTy()) && + "Atomic memcpy lowering is not supported for vector operand type"); + unsigned LoopOpSize = DL.getTypeStoreSize(LoopOpType); + assert((!AtomicElementSize || LoopOpSize % *AtomicElementSize == 0) && + "Atomic memcpy lowering is not supported for selected operand size"); + + IRBuilder<> PLBuilder(PreLoopBB->getTerminator()); + + PointerType *SrcOpType = PointerType::get(LoopOpType, SrcAS); + PointerType *DstOpType = PointerType::get(LoopOpType, DstAS); + if (SrcAddr->getType() != SrcOpType) { + SrcAddr = PLBuilder.CreateBitCast(SrcAddr, SrcOpType); + } + if (DstAddr->getType() != DstOpType) { + DstAddr = PLBuilder.CreateBitCast(DstAddr, DstOpType); + } + + // Calculate the loop trip count, and remaining bytes to copy after the loop. + Type *CopyLenType = CopyLen->getType(); + IntegerType *ILengthType = dyn_cast<IntegerType>(CopyLenType); + assert(ILengthType && + "expected size argument to memcpy to be an integer type!"); + Type *Int8Type = Type::getInt8Ty(Ctx); + bool LoopOpIsInt8 = LoopOpType == Int8Type; + ConstantInt *CILoopOpSize = ConstantInt::get(ILengthType, LoopOpSize); + Value *RuntimeLoopCount = LoopOpIsInt8 ? + CopyLen : + PLBuilder.CreateUDiv(CopyLen, CILoopOpSize); + BasicBlock *LoopBB = + BasicBlock::Create(Ctx, "loop-memcpy-expansion", ParentFunc, PostLoopBB); + IRBuilder<> LoopBuilder(LoopBB); + + Align PartSrcAlign(commonAlignment(SrcAlign, LoopOpSize)); + Align PartDstAlign(commonAlignment(DstAlign, LoopOpSize)); + + PHINode *LoopIndex = LoopBuilder.CreatePHI(CopyLenType, 2, "loop-index"); + LoopIndex->addIncoming(ConstantInt::get(CopyLenType, 0U), PreLoopBB); + + Value *SrcGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex); + LoadInst *Load = LoopBuilder.CreateAlignedLoad(LoopOpType, SrcGEP, + PartSrcAlign, SrcIsVolatile); + if (!CanOverlap) { + // Set alias scope for loads. + Load->setMetadata(LLVMContext::MD_alias_scope, MDNode::get(Ctx, NewScope)); + } + Value *DstGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex); + StoreInst *Store = + LoopBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, DstIsVolatile); + if (!CanOverlap) { + // Indicate that stores don't overlap loads. + Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope)); + } + if (AtomicElementSize) { + Load->setAtomic(AtomicOrdering::Unordered); + Store->setAtomic(AtomicOrdering::Unordered); + } + Value *NewIndex = + LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(CopyLenType, 1U)); + LoopIndex->addIncoming(NewIndex, LoopBB); + + bool requiresResidual = + !LoopOpIsInt8 && !(AtomicElementSize && LoopOpSize == AtomicElementSize); + if (requiresResidual) { + Type *ResLoopOpType = AtomicElementSize + ? Type::getIntNTy(Ctx, *AtomicElementSize * 8) + : Int8Type; + unsigned ResLoopOpSize = DL.getTypeStoreSize(ResLoopOpType); + assert((ResLoopOpSize == AtomicElementSize ? *AtomicElementSize : 1) && + "Store size is expected to match type size"); + + // Add in the + Value *RuntimeResidual = PLBuilder.CreateURem(CopyLen, CILoopOpSize); + Value *RuntimeBytesCopied = PLBuilder.CreateSub(CopyLen, RuntimeResidual); + + // Loop body for the residual copy. + BasicBlock *ResLoopBB = BasicBlock::Create(Ctx, "loop-memcpy-residual", + PreLoopBB->getParent(), + PostLoopBB); + // Residual loop header. + BasicBlock *ResHeaderBB = BasicBlock::Create( + Ctx, "loop-memcpy-residual-header", PreLoopBB->getParent(), nullptr); + + // Need to update the pre-loop basic block to branch to the correct place. + // branch to the main loop if the count is non-zero, branch to the residual + // loop if the copy size is smaller then 1 iteration of the main loop but + // non-zero and finally branch to after the residual loop if the memcpy + // size is zero. + ConstantInt *Zero = ConstantInt::get(ILengthType, 0U); + PLBuilder.CreateCondBr(PLBuilder.CreateICmpNE(RuntimeLoopCount, Zero), + LoopBB, ResHeaderBB); + PreLoopBB->getTerminator()->eraseFromParent(); + + LoopBuilder.CreateCondBr( + LoopBuilder.CreateICmpULT(NewIndex, RuntimeLoopCount), LoopBB, + ResHeaderBB); + + // Determine if we need to branch to the residual loop or bypass it. + IRBuilder<> RHBuilder(ResHeaderBB); + RHBuilder.CreateCondBr(RHBuilder.CreateICmpNE(RuntimeResidual, Zero), + ResLoopBB, PostLoopBB); + + // Copy the residual with single byte load/store loop. + IRBuilder<> ResBuilder(ResLoopBB); + PHINode *ResidualIndex = + ResBuilder.CreatePHI(CopyLenType, 2, "residual-loop-index"); + ResidualIndex->addIncoming(Zero, ResHeaderBB); + + Value *SrcAsResLoopOpType = ResBuilder.CreateBitCast( + SrcAddr, PointerType::get(ResLoopOpType, SrcAS)); + Value *DstAsResLoopOpType = ResBuilder.CreateBitCast( + DstAddr, PointerType::get(ResLoopOpType, DstAS)); + Value *FullOffset = ResBuilder.CreateAdd(RuntimeBytesCopied, ResidualIndex); + Value *SrcGEP = ResBuilder.CreateInBoundsGEP( + ResLoopOpType, SrcAsResLoopOpType, FullOffset); + LoadInst *Load = ResBuilder.CreateAlignedLoad(ResLoopOpType, SrcGEP, + PartSrcAlign, SrcIsVolatile); + if (!CanOverlap) { + // Set alias scope for loads. + Load->setMetadata(LLVMContext::MD_alias_scope, + MDNode::get(Ctx, NewScope)); + } + Value *DstGEP = ResBuilder.CreateInBoundsGEP( + ResLoopOpType, DstAsResLoopOpType, FullOffset); + StoreInst *Store = ResBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, + DstIsVolatile); + if (!CanOverlap) { + // Indicate that stores don't overlap loads. + Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope)); + } + if (AtomicElementSize) { + Load->setAtomic(AtomicOrdering::Unordered); + Store->setAtomic(AtomicOrdering::Unordered); + } + Value *ResNewIndex = ResBuilder.CreateAdd( + ResidualIndex, ConstantInt::get(CopyLenType, ResLoopOpSize)); + ResidualIndex->addIncoming(ResNewIndex, ResLoopBB); + + // Create the loop branch condition. + ResBuilder.CreateCondBr( + ResBuilder.CreateICmpULT(ResNewIndex, RuntimeResidual), ResLoopBB, + PostLoopBB); + } else { + // In this case the loop operand type was a byte, and there is no need for a + // residual loop to copy the remaining memory after the main loop. + // We do however need to patch up the control flow by creating the + // terminators for the preloop block and the memcpy loop. + ConstantInt *Zero = ConstantInt::get(ILengthType, 0U); + PLBuilder.CreateCondBr(PLBuilder.CreateICmpNE(RuntimeLoopCount, Zero), + LoopBB, PostLoopBB); + PreLoopBB->getTerminator()->eraseFromParent(); + LoopBuilder.CreateCondBr( + LoopBuilder.CreateICmpULT(NewIndex, RuntimeLoopCount), LoopBB, + PostLoopBB); + } +} + +// Lower memmove to IR. memmove is required to correctly copy overlapping memory +// regions; therefore, it has to check the relative positions of the source and +// destination pointers and choose the copy direction accordingly. +// +// The code below is an IR rendition of this C function: +// +// void* memmove(void* dst, const void* src, size_t n) { +// unsigned char* d = dst; +// const unsigned char* s = src; +// if (s < d) { +// // copy backwards +// while (n--) { +// d[n] = s[n]; +// } +// } else { +// // copy forward +// for (size_t i = 0; i < n; ++i) { +// d[i] = s[i]; +// } +// } +// return dst; +// } +static void createMemMoveLoop(Instruction *InsertBefore, Value *SrcAddr, + Value *DstAddr, Value *CopyLen, Align SrcAlign, + Align DstAlign, bool SrcIsVolatile, + bool DstIsVolatile) { + Type *TypeOfCopyLen = CopyLen->getType(); + BasicBlock *OrigBB = InsertBefore->getParent(); + Function *F = OrigBB->getParent(); + const DataLayout &DL = F->getParent()->getDataLayout(); + + // TODO: Use different element type if possible? + IRBuilder<> CastBuilder(InsertBefore); + Type *EltTy = CastBuilder.getInt8Ty(); + Type *PtrTy = + CastBuilder.getInt8PtrTy(SrcAddr->getType()->getPointerAddressSpace()); + SrcAddr = CastBuilder.CreateBitCast(SrcAddr, PtrTy); + DstAddr = CastBuilder.CreateBitCast(DstAddr, PtrTy); + + // Create the a comparison of src and dst, based on which we jump to either + // the forward-copy part of the function (if src >= dst) or the backwards-copy + // part (if src < dst). + // SplitBlockAndInsertIfThenElse conveniently creates the basic if-then-else + // structure. Its block terminators (unconditional branches) are replaced by + // the appropriate conditional branches when the loop is built. + ICmpInst *PtrCompare = new ICmpInst(InsertBefore, ICmpInst::ICMP_ULT, + SrcAddr, DstAddr, "compare_src_dst"); + Instruction *ThenTerm, *ElseTerm; + SplitBlockAndInsertIfThenElse(PtrCompare, InsertBefore, &ThenTerm, + &ElseTerm); + + // Each part of the function consists of two blocks: + // copy_backwards: used to skip the loop when n == 0 + // copy_backwards_loop: the actual backwards loop BB + // copy_forward: used to skip the loop when n == 0 + // copy_forward_loop: the actual forward loop BB + BasicBlock *CopyBackwardsBB = ThenTerm->getParent(); + CopyBackwardsBB->setName("copy_backwards"); + BasicBlock *CopyForwardBB = ElseTerm->getParent(); + CopyForwardBB->setName("copy_forward"); + BasicBlock *ExitBB = InsertBefore->getParent(); + ExitBB->setName("memmove_done"); + + unsigned PartSize = DL.getTypeStoreSize(EltTy); + Align PartSrcAlign(commonAlignment(SrcAlign, PartSize)); + Align PartDstAlign(commonAlignment(DstAlign, PartSize)); + + // Initial comparison of n == 0 that lets us skip the loops altogether. Shared + // between both backwards and forward copy clauses. + ICmpInst *CompareN = + new ICmpInst(OrigBB->getTerminator(), ICmpInst::ICMP_EQ, CopyLen, + ConstantInt::get(TypeOfCopyLen, 0), "compare_n_to_0"); + + // Copying backwards. + BasicBlock *LoopBB = + BasicBlock::Create(F->getContext(), "copy_backwards_loop", F, CopyForwardBB); + IRBuilder<> LoopBuilder(LoopBB); + PHINode *LoopPhi = LoopBuilder.CreatePHI(TypeOfCopyLen, 0); + Value *IndexPtr = LoopBuilder.CreateSub( + LoopPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_ptr"); + Value *Element = LoopBuilder.CreateAlignedLoad( + EltTy, LoopBuilder.CreateInBoundsGEP(EltTy, SrcAddr, IndexPtr), + PartSrcAlign, "element"); + LoopBuilder.CreateAlignedStore( + Element, LoopBuilder.CreateInBoundsGEP(EltTy, DstAddr, IndexPtr), + PartDstAlign); + LoopBuilder.CreateCondBr( + LoopBuilder.CreateICmpEQ(IndexPtr, ConstantInt::get(TypeOfCopyLen, 0)), + ExitBB, LoopBB); + LoopPhi->addIncoming(IndexPtr, LoopBB); + LoopPhi->addIncoming(CopyLen, CopyBackwardsBB); + BranchInst::Create(ExitBB, LoopBB, CompareN, ThenTerm); + ThenTerm->eraseFromParent(); + + // Copying forward. + BasicBlock *FwdLoopBB = + BasicBlock::Create(F->getContext(), "copy_forward_loop", F, ExitBB); + IRBuilder<> FwdLoopBuilder(FwdLoopBB); + PHINode *FwdCopyPhi = FwdLoopBuilder.CreatePHI(TypeOfCopyLen, 0, "index_ptr"); + Value *SrcGEP = FwdLoopBuilder.CreateInBoundsGEP(EltTy, SrcAddr, FwdCopyPhi); + Value *FwdElement = + FwdLoopBuilder.CreateAlignedLoad(EltTy, SrcGEP, PartSrcAlign, "element"); + Value *DstGEP = FwdLoopBuilder.CreateInBoundsGEP(EltTy, DstAddr, FwdCopyPhi); + FwdLoopBuilder.CreateAlignedStore(FwdElement, DstGEP, PartDstAlign); + Value *FwdIndexPtr = FwdLoopBuilder.CreateAdd( + FwdCopyPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_increment"); + FwdLoopBuilder.CreateCondBr(FwdLoopBuilder.CreateICmpEQ(FwdIndexPtr, CopyLen), + ExitBB, FwdLoopBB); + FwdCopyPhi->addIncoming(FwdIndexPtr, FwdLoopBB); + FwdCopyPhi->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), CopyForwardBB); + + BranchInst::Create(ExitBB, FwdLoopBB, CompareN, ElseTerm); + ElseTerm->eraseFromParent(); +} + +static void createMemSetLoop(Instruction *InsertBefore, Value *DstAddr, + Value *CopyLen, Value *SetValue, Align DstAlign, + bool IsVolatile) { + Type *TypeOfCopyLen = CopyLen->getType(); + BasicBlock *OrigBB = InsertBefore->getParent(); + Function *F = OrigBB->getParent(); + const DataLayout &DL = F->getParent()->getDataLayout(); + BasicBlock *NewBB = + OrigBB->splitBasicBlock(InsertBefore, "split"); + BasicBlock *LoopBB + = BasicBlock::Create(F->getContext(), "loadstoreloop", F, NewBB); + + IRBuilder<> Builder(OrigBB->getTerminator()); + + // Cast pointer to the type of value getting stored + unsigned dstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace(); + DstAddr = Builder.CreateBitCast(DstAddr, + PointerType::get(SetValue->getType(), dstAS)); + + Builder.CreateCondBr( + Builder.CreateICmpEQ(ConstantInt::get(TypeOfCopyLen, 0), CopyLen), NewBB, + LoopBB); + OrigBB->getTerminator()->eraseFromParent(); + + unsigned PartSize = DL.getTypeStoreSize(SetValue->getType()); + Align PartAlign(commonAlignment(DstAlign, PartSize)); + + IRBuilder<> LoopBuilder(LoopBB); + PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 0); + LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), OrigBB); + + LoopBuilder.CreateAlignedStore( + SetValue, + LoopBuilder.CreateInBoundsGEP(SetValue->getType(), DstAddr, LoopIndex), + PartAlign, IsVolatile); + + Value *NewIndex = + LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1)); + LoopIndex->addIncoming(NewIndex, LoopBB); + + LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB, + NewBB); +} + +template <typename T> +static bool canOverlap(MemTransferBase<T> *Memcpy, ScalarEvolution *SE) { + if (SE) { + auto *SrcSCEV = SE->getSCEV(Memcpy->getRawSource()); + auto *DestSCEV = SE->getSCEV(Memcpy->getRawDest()); + if (SE->isKnownPredicateAt(CmpInst::ICMP_NE, SrcSCEV, DestSCEV, Memcpy)) + return false; + } + return true; +} + +void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy, + const TargetTransformInfo &TTI, + ScalarEvolution *SE) { + bool CanOverlap = canOverlap(Memcpy, SE); + if (ConstantInt *CI = dyn_cast<ConstantInt>(Memcpy->getLength())) { + createMemCpyLoopKnownSize( + /* InsertBefore */ Memcpy, + /* SrcAddr */ Memcpy->getRawSource(), + /* DstAddr */ Memcpy->getRawDest(), + /* CopyLen */ CI, + /* SrcAlign */ Memcpy->getSourceAlign().valueOrOne(), + /* DestAlign */ Memcpy->getDestAlign().valueOrOne(), + /* SrcIsVolatile */ Memcpy->isVolatile(), + /* DstIsVolatile */ Memcpy->isVolatile(), + /* CanOverlap */ CanOverlap, + /* TargetTransformInfo */ TTI); + } else { + createMemCpyLoopUnknownSize( + /* InsertBefore */ Memcpy, + /* SrcAddr */ Memcpy->getRawSource(), + /* DstAddr */ Memcpy->getRawDest(), + /* CopyLen */ Memcpy->getLength(), + /* SrcAlign */ Memcpy->getSourceAlign().valueOrOne(), + /* DestAlign */ Memcpy->getDestAlign().valueOrOne(), + /* SrcIsVolatile */ Memcpy->isVolatile(), + /* DstIsVolatile */ Memcpy->isVolatile(), + /* CanOverlap */ CanOverlap, + /* TargetTransformInfo */ TTI); + } +} + +void llvm::expandMemMoveAsLoop(MemMoveInst *Memmove) { + createMemMoveLoop(/* InsertBefore */ Memmove, + /* SrcAddr */ Memmove->getRawSource(), + /* DstAddr */ Memmove->getRawDest(), + /* CopyLen */ Memmove->getLength(), + /* SrcAlign */ Memmove->getSourceAlign().valueOrOne(), + /* DestAlign */ Memmove->getDestAlign().valueOrOne(), + /* SrcIsVolatile */ Memmove->isVolatile(), + /* DstIsVolatile */ Memmove->isVolatile()); +} + +void llvm::expandMemSetAsLoop(MemSetInst *Memset) { + createMemSetLoop(/* InsertBefore */ Memset, + /* DstAddr */ Memset->getRawDest(), + /* CopyLen */ Memset->getLength(), + /* SetValue */ Memset->getValue(), + /* Alignment */ Memset->getDestAlign().valueOrOne(), + Memset->isVolatile()); +} + +void llvm::expandAtomicMemCpyAsLoop(AtomicMemCpyInst *AtomicMemcpy, + const TargetTransformInfo &TTI, + ScalarEvolution *SE) { + if (ConstantInt *CI = dyn_cast<ConstantInt>(AtomicMemcpy->getLength())) { + createMemCpyLoopKnownSize( + /* InsertBefore */ AtomicMemcpy, + /* SrcAddr */ AtomicMemcpy->getRawSource(), + /* DstAddr */ AtomicMemcpy->getRawDest(), + /* CopyLen */ CI, + /* SrcAlign */ AtomicMemcpy->getSourceAlign().valueOrOne(), + /* DestAlign */ AtomicMemcpy->getDestAlign().valueOrOne(), + /* SrcIsVolatile */ AtomicMemcpy->isVolatile(), + /* DstIsVolatile */ AtomicMemcpy->isVolatile(), + /* CanOverlap */ false, // SrcAddr & DstAddr may not overlap by spec. + /* TargetTransformInfo */ TTI, + /* AtomicCpySize */ AtomicMemcpy->getElementSizeInBytes()); + } else { + createMemCpyLoopUnknownSize( + /* InsertBefore */ AtomicMemcpy, + /* SrcAddr */ AtomicMemcpy->getRawSource(), + /* DstAddr */ AtomicMemcpy->getRawDest(), + /* CopyLen */ AtomicMemcpy->getLength(), + /* SrcAlign */ AtomicMemcpy->getSourceAlign().valueOrOne(), + /* DestAlign */ AtomicMemcpy->getDestAlign().valueOrOne(), + /* SrcIsVolatile */ AtomicMemcpy->isVolatile(), + /* DstIsVolatile */ AtomicMemcpy->isVolatile(), + /* CanOverlap */ false, // SrcAddr & DstAddr may not overlap by spec. + /* TargetTransformInfo */ TTI, + /* AtomicCpySize */ AtomicMemcpy->getElementSizeInBytes()); + } +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/LowerSwitch.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/LowerSwitch.cpp new file mode 100644 index 0000000000..227de425ff --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/LowerSwitch.cpp @@ -0,0 +1,611 @@ +//===- LowerSwitch.cpp - Eliminate Switch instructions --------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// The LowerSwitch transformation rewrites switch instructions with a sequence +// of branches, which allows targets to get away with not implementing the +// switch instruction until it is convenient. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/LowerSwitch.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/LazyValueInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/ConstantRange.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/Value.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/KnownBits.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <iterator> +#include <limits> +#include <vector> + +using namespace llvm; + +#define DEBUG_TYPE "lower-switch" + +namespace { + +struct IntRange { + APInt Low, High; +}; + +} // end anonymous namespace + +namespace { +// Return true iff R is covered by Ranges. +bool IsInRanges(const IntRange &R, const std::vector<IntRange> &Ranges) { + // Note: Ranges must be sorted, non-overlapping and non-adjacent. + + // Find the first range whose High field is >= R.High, + // then check if the Low field is <= R.Low. If so, we + // have a Range that covers R. + auto I = llvm::lower_bound( + Ranges, R, [](IntRange A, IntRange B) { return A.High.slt(B.High); }); + return I != Ranges.end() && I->Low.sle(R.Low); +} + +struct CaseRange { + ConstantInt *Low; + ConstantInt *High; + BasicBlock *BB; + + CaseRange(ConstantInt *low, ConstantInt *high, BasicBlock *bb) + : Low(low), High(high), BB(bb) {} +}; + +using CaseVector = std::vector<CaseRange>; +using CaseItr = std::vector<CaseRange>::iterator; + +/// The comparison function for sorting the switch case values in the vector. +/// WARNING: Case ranges should be disjoint! +struct CaseCmp { + bool operator()(const CaseRange &C1, const CaseRange &C2) { + const ConstantInt *CI1 = cast<const ConstantInt>(C1.Low); + const ConstantInt *CI2 = cast<const ConstantInt>(C2.High); + return CI1->getValue().slt(CI2->getValue()); + } +}; + +/// Used for debugging purposes. +LLVM_ATTRIBUTE_USED +raw_ostream &operator<<(raw_ostream &O, const CaseVector &C) { + O << "["; + + for (CaseVector::const_iterator B = C.begin(), E = C.end(); B != E;) { + O << "[" << B->Low->getValue() << ", " << B->High->getValue() << "]"; + if (++B != E) + O << ", "; + } + + return O << "]"; +} + +/// Update the first occurrence of the "switch statement" BB in the PHI +/// node with the "new" BB. The other occurrences will: +/// +/// 1) Be updated by subsequent calls to this function. Switch statements may +/// have more than one outcoming edge into the same BB if they all have the same +/// value. When the switch statement is converted these incoming edges are now +/// coming from multiple BBs. +/// 2) Removed if subsequent incoming values now share the same case, i.e., +/// multiple outcome edges are condensed into one. This is necessary to keep the +/// number of phi values equal to the number of branches to SuccBB. +void FixPhis(BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB, + const APInt &NumMergedCases) { + for (auto &I : SuccBB->phis()) { + PHINode *PN = cast<PHINode>(&I); + + // Only update the first occurrence if NewBB exists. + unsigned Idx = 0, E = PN->getNumIncomingValues(); + APInt LocalNumMergedCases = NumMergedCases; + for (; Idx != E && NewBB; ++Idx) { + if (PN->getIncomingBlock(Idx) == OrigBB) { + PN->setIncomingBlock(Idx, NewBB); + break; + } + } + + // Skip the updated incoming block so that it will not be removed. + if (NewBB) + ++Idx; + + // Remove additional occurrences coming from condensed cases and keep the + // number of incoming values equal to the number of branches to SuccBB. + SmallVector<unsigned, 8> Indices; + for (; LocalNumMergedCases.ugt(0) && Idx < E; ++Idx) + if (PN->getIncomingBlock(Idx) == OrigBB) { + Indices.push_back(Idx); + LocalNumMergedCases -= 1; + } + // Remove incoming values in the reverse order to prevent invalidating + // *successive* index. + for (unsigned III : llvm::reverse(Indices)) + PN->removeIncomingValue(III); + } +} + +/// Create a new leaf block for the binary lookup tree. It checks if the +/// switch's value == the case's value. If not, then it jumps to the default +/// branch. At this point in the tree, the value can't be another valid case +/// value, so the jump to the "default" branch is warranted. +BasicBlock *NewLeafBlock(CaseRange &Leaf, Value *Val, ConstantInt *LowerBound, + ConstantInt *UpperBound, BasicBlock *OrigBlock, + BasicBlock *Default) { + Function *F = OrigBlock->getParent(); + BasicBlock *NewLeaf = BasicBlock::Create(Val->getContext(), "LeafBlock"); + F->insert(++OrigBlock->getIterator(), NewLeaf); + + // Emit comparison + ICmpInst *Comp = nullptr; + if (Leaf.Low == Leaf.High) { + // Make the seteq instruction... + Comp = + new ICmpInst(*NewLeaf, ICmpInst::ICMP_EQ, Val, Leaf.Low, "SwitchLeaf"); + } else { + // Make range comparison + if (Leaf.Low == LowerBound) { + // Val >= Min && Val <= Hi --> Val <= Hi + Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_SLE, Val, Leaf.High, + "SwitchLeaf"); + } else if (Leaf.High == UpperBound) { + // Val <= Max && Val >= Lo --> Val >= Lo + Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_SGE, Val, Leaf.Low, + "SwitchLeaf"); + } else if (Leaf.Low->isZero()) { + // Val >= 0 && Val <= Hi --> Val <=u Hi + Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Val, Leaf.High, + "SwitchLeaf"); + } else { + // Emit V-Lo <=u Hi-Lo + Constant *NegLo = ConstantExpr::getNeg(Leaf.Low); + Instruction *Add = BinaryOperator::CreateAdd( + Val, NegLo, Val->getName() + ".off", NewLeaf); + Constant *UpperBound = ConstantExpr::getAdd(NegLo, Leaf.High); + Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Add, UpperBound, + "SwitchLeaf"); + } + } + + // Make the conditional branch... + BasicBlock *Succ = Leaf.BB; + BranchInst::Create(Succ, Default, Comp, NewLeaf); + + // Update the PHI incoming value/block for the default. + for (auto &I : Default->phis()) { + PHINode *PN = cast<PHINode>(&I); + auto *V = PN->getIncomingValueForBlock(OrigBlock); + PN->addIncoming(V, NewLeaf); + } + + // If there were any PHI nodes in this successor, rewrite one entry + // from OrigBlock to come from NewLeaf. + for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) { + PHINode *PN = cast<PHINode>(I); + // Remove all but one incoming entries from the cluster + APInt Range = Leaf.High->getValue() - Leaf.Low->getValue(); + for (APInt j(Range.getBitWidth(), 0, true); j.slt(Range); ++j) { + PN->removeIncomingValue(OrigBlock); + } + + int BlockIdx = PN->getBasicBlockIndex(OrigBlock); + assert(BlockIdx != -1 && "Switch didn't go to this successor??"); + PN->setIncomingBlock((unsigned)BlockIdx, NewLeaf); + } + + return NewLeaf; +} + +/// Convert the switch statement into a binary lookup of the case values. +/// The function recursively builds this tree. LowerBound and UpperBound are +/// used to keep track of the bounds for Val that have already been checked by +/// a block emitted by one of the previous calls to switchConvert in the call +/// stack. +BasicBlock *SwitchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound, + ConstantInt *UpperBound, Value *Val, + BasicBlock *Predecessor, BasicBlock *OrigBlock, + BasicBlock *Default, + const std::vector<IntRange> &UnreachableRanges) { + assert(LowerBound && UpperBound && "Bounds must be initialized"); + unsigned Size = End - Begin; + + if (Size == 1) { + // Check if the Case Range is perfectly squeezed in between + // already checked Upper and Lower bounds. If it is then we can avoid + // emitting the code that checks if the value actually falls in the range + // because the bounds already tell us so. + if (Begin->Low == LowerBound && Begin->High == UpperBound) { + APInt NumMergedCases = UpperBound->getValue() - LowerBound->getValue(); + FixPhis(Begin->BB, OrigBlock, Predecessor, NumMergedCases); + return Begin->BB; + } + return NewLeafBlock(*Begin, Val, LowerBound, UpperBound, OrigBlock, + Default); + } + + unsigned Mid = Size / 2; + std::vector<CaseRange> LHS(Begin, Begin + Mid); + LLVM_DEBUG(dbgs() << "LHS: " << LHS << "\n"); + std::vector<CaseRange> RHS(Begin + Mid, End); + LLVM_DEBUG(dbgs() << "RHS: " << RHS << "\n"); + + CaseRange &Pivot = *(Begin + Mid); + LLVM_DEBUG(dbgs() << "Pivot ==> [" << Pivot.Low->getValue() << ", " + << Pivot.High->getValue() << "]\n"); + + // NewLowerBound here should never be the integer minimal value. + // This is because it is computed from a case range that is never + // the smallest, so there is always a case range that has at least + // a smaller value. + ConstantInt *NewLowerBound = Pivot.Low; + + // Because NewLowerBound is never the smallest representable integer + // it is safe here to subtract one. + ConstantInt *NewUpperBound = ConstantInt::get(NewLowerBound->getContext(), + NewLowerBound->getValue() - 1); + + if (!UnreachableRanges.empty()) { + // Check if the gap between LHS's highest and NewLowerBound is unreachable. + APInt GapLow = LHS.back().High->getValue() + 1; + APInt GapHigh = NewLowerBound->getValue() - 1; + IntRange Gap = {GapLow, GapHigh}; + if (GapHigh.sge(GapLow) && IsInRanges(Gap, UnreachableRanges)) + NewUpperBound = LHS.back().High; + } + + LLVM_DEBUG(dbgs() << "LHS Bounds ==> [" << LowerBound->getValue() << ", " + << NewUpperBound->getValue() << "]\n" + << "RHS Bounds ==> [" << NewLowerBound->getValue() << ", " + << UpperBound->getValue() << "]\n"); + + // Create a new node that checks if the value is < pivot. Go to the + // left branch if it is and right branch if not. + Function *F = OrigBlock->getParent(); + BasicBlock *NewNode = BasicBlock::Create(Val->getContext(), "NodeBlock"); + + ICmpInst *Comp = new ICmpInst(ICmpInst::ICMP_SLT, Val, Pivot.Low, "Pivot"); + + BasicBlock *LBranch = + SwitchConvert(LHS.begin(), LHS.end(), LowerBound, NewUpperBound, Val, + NewNode, OrigBlock, Default, UnreachableRanges); + BasicBlock *RBranch = + SwitchConvert(RHS.begin(), RHS.end(), NewLowerBound, UpperBound, Val, + NewNode, OrigBlock, Default, UnreachableRanges); + + F->insert(++OrigBlock->getIterator(), NewNode); + Comp->insertInto(NewNode, NewNode->end()); + + BranchInst::Create(LBranch, RBranch, Comp, NewNode); + return NewNode; +} + +/// Transform simple list of \p SI's cases into list of CaseRange's \p Cases. +/// \post \p Cases wouldn't contain references to \p SI's default BB. +/// \returns Number of \p SI's cases that do not reference \p SI's default BB. +unsigned Clusterify(CaseVector &Cases, SwitchInst *SI) { + unsigned NumSimpleCases = 0; + + // Start with "simple" cases + for (auto Case : SI->cases()) { + if (Case.getCaseSuccessor() == SI->getDefaultDest()) + continue; + Cases.push_back(CaseRange(Case.getCaseValue(), Case.getCaseValue(), + Case.getCaseSuccessor())); + ++NumSimpleCases; + } + + llvm::sort(Cases, CaseCmp()); + + // Merge case into clusters + if (Cases.size() >= 2) { + CaseItr I = Cases.begin(); + for (CaseItr J = std::next(I), E = Cases.end(); J != E; ++J) { + const APInt &nextValue = J->Low->getValue(); + const APInt ¤tValue = I->High->getValue(); + BasicBlock *nextBB = J->BB; + BasicBlock *currentBB = I->BB; + + // If the two neighboring cases go to the same destination, merge them + // into a single case. + assert(nextValue.sgt(currentValue) && + "Cases should be strictly ascending"); + if ((nextValue == currentValue + 1) && (currentBB == nextBB)) { + I->High = J->High; + // FIXME: Combine branch weights. + } else if (++I != J) { + *I = *J; + } + } + Cases.erase(std::next(I), Cases.end()); + } + + return NumSimpleCases; +} + +/// Replace the specified switch instruction with a sequence of chained if-then +/// insts in a balanced binary search. +void ProcessSwitchInst(SwitchInst *SI, + SmallPtrSetImpl<BasicBlock *> &DeleteList, + AssumptionCache *AC, LazyValueInfo *LVI) { + BasicBlock *OrigBlock = SI->getParent(); + Function *F = OrigBlock->getParent(); + Value *Val = SI->getCondition(); // The value we are switching on... + BasicBlock *Default = SI->getDefaultDest(); + + // Don't handle unreachable blocks. If there are successors with phis, this + // would leave them behind with missing predecessors. + if ((OrigBlock != &F->getEntryBlock() && pred_empty(OrigBlock)) || + OrigBlock->getSinglePredecessor() == OrigBlock) { + DeleteList.insert(OrigBlock); + return; + } + + // Prepare cases vector. + CaseVector Cases; + const unsigned NumSimpleCases = Clusterify(Cases, SI); + IntegerType *IT = cast<IntegerType>(SI->getCondition()->getType()); + const unsigned BitWidth = IT->getBitWidth(); + // Explictly use higher precision to prevent unsigned overflow where + // `UnsignedMax - 0 + 1 == 0` + APInt UnsignedZero(BitWidth + 1, 0); + APInt UnsignedMax = APInt::getMaxValue(BitWidth); + LLVM_DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size() + << ". Total non-default cases: " << NumSimpleCases + << "\nCase clusters: " << Cases << "\n"); + + // If there is only the default destination, just branch. + if (Cases.empty()) { + BranchInst::Create(Default, OrigBlock); + // Remove all the references from Default's PHIs to OrigBlock, but one. + FixPhis(Default, OrigBlock, OrigBlock, UnsignedMax); + SI->eraseFromParent(); + return; + } + + ConstantInt *LowerBound = nullptr; + ConstantInt *UpperBound = nullptr; + bool DefaultIsUnreachableFromSwitch = false; + + if (isa<UnreachableInst>(Default->getFirstNonPHIOrDbg())) { + // Make the bounds tightly fitted around the case value range, because we + // know that the value passed to the switch must be exactly one of the case + // values. + LowerBound = Cases.front().Low; + UpperBound = Cases.back().High; + DefaultIsUnreachableFromSwitch = true; + } else { + // Constraining the range of the value being switched over helps eliminating + // unreachable BBs and minimizing the number of `add` instructions + // newLeafBlock ends up emitting. Running CorrelatedValuePropagation after + // LowerSwitch isn't as good, and also much more expensive in terms of + // compile time for the following reasons: + // 1. it processes many kinds of instructions, not just switches; + // 2. even if limited to icmp instructions only, it will have to process + // roughly C icmp's per switch, where C is the number of cases in the + // switch, while LowerSwitch only needs to call LVI once per switch. + const DataLayout &DL = F->getParent()->getDataLayout(); + KnownBits Known = computeKnownBits(Val, DL, /*Depth=*/0, AC, SI); + // TODO Shouldn't this create a signed range? + ConstantRange KnownBitsRange = + ConstantRange::fromKnownBits(Known, /*IsSigned=*/false); + const ConstantRange LVIRange = LVI->getConstantRange(Val, SI); + ConstantRange ValRange = KnownBitsRange.intersectWith(LVIRange); + // We delegate removal of unreachable non-default cases to other passes. In + // the unlikely event that some of them survived, we just conservatively + // maintain the invariant that all the cases lie between the bounds. This + // may, however, still render the default case effectively unreachable. + const APInt &Low = Cases.front().Low->getValue(); + const APInt &High = Cases.back().High->getValue(); + APInt Min = APIntOps::smin(ValRange.getSignedMin(), Low); + APInt Max = APIntOps::smax(ValRange.getSignedMax(), High); + + LowerBound = ConstantInt::get(SI->getContext(), Min); + UpperBound = ConstantInt::get(SI->getContext(), Max); + DefaultIsUnreachableFromSwitch = (Min + (NumSimpleCases - 1) == Max); + } + + std::vector<IntRange> UnreachableRanges; + + if (DefaultIsUnreachableFromSwitch) { + DenseMap<BasicBlock *, APInt> Popularity; + APInt MaxPop(UnsignedZero); + BasicBlock *PopSucc = nullptr; + + APInt SignedMax = APInt::getSignedMaxValue(BitWidth); + APInt SignedMin = APInt::getSignedMinValue(BitWidth); + IntRange R = {SignedMin, SignedMax}; + UnreachableRanges.push_back(R); + for (const auto &I : Cases) { + const APInt &Low = I.Low->getValue(); + const APInt &High = I.High->getValue(); + + IntRange &LastRange = UnreachableRanges.back(); + if (LastRange.Low.eq(Low)) { + // There is nothing left of the previous range. + UnreachableRanges.pop_back(); + } else { + // Terminate the previous range. + assert(Low.sgt(LastRange.Low)); + LastRange.High = Low - 1; + } + if (High.ne(SignedMax)) { + IntRange R = {High + 1, SignedMax}; + UnreachableRanges.push_back(R); + } + + // Count popularity. + assert(High.sge(Low) && "Popularity shouldn't be negative."); + APInt N = High.sext(BitWidth + 1) - Low.sext(BitWidth + 1) + 1; + // Explict insert to make sure the bitwidth of APInts match + APInt &Pop = Popularity.insert({I.BB, APInt(UnsignedZero)}).first->second; + if ((Pop += N).ugt(MaxPop)) { + MaxPop = Pop; + PopSucc = I.BB; + } + } +#ifndef NDEBUG + /* UnreachableRanges should be sorted and the ranges non-adjacent. */ + for (auto I = UnreachableRanges.begin(), E = UnreachableRanges.end(); + I != E; ++I) { + assert(I->Low.sle(I->High)); + auto Next = I + 1; + if (Next != E) { + assert(Next->Low.sgt(I->High)); + } + } +#endif + + // As the default block in the switch is unreachable, update the PHI nodes + // (remove all of the references to the default block) to reflect this. + const unsigned NumDefaultEdges = SI->getNumCases() + 1 - NumSimpleCases; + for (unsigned I = 0; I < NumDefaultEdges; ++I) + Default->removePredecessor(OrigBlock); + + // Use the most popular block as the new default, reducing the number of + // cases. + Default = PopSucc; + llvm::erase_if(Cases, + [PopSucc](const CaseRange &R) { return R.BB == PopSucc; }); + + // If there are no cases left, just branch. + if (Cases.empty()) { + BranchInst::Create(Default, OrigBlock); + SI->eraseFromParent(); + // As all the cases have been replaced with a single branch, only keep + // one entry in the PHI nodes. + if (!MaxPop.isZero()) + for (APInt I(UnsignedZero); I.ult(MaxPop - 1); ++I) + PopSucc->removePredecessor(OrigBlock); + return; + } + + // If the condition was a PHI node with the switch block as a predecessor + // removing predecessors may have caused the condition to be erased. + // Getting the condition value again here protects against that. + Val = SI->getCondition(); + } + + BasicBlock *SwitchBlock = + SwitchConvert(Cases.begin(), Cases.end(), LowerBound, UpperBound, Val, + OrigBlock, OrigBlock, Default, UnreachableRanges); + + // We have added incoming values for newly-created predecessors in + // NewLeafBlock(). The only meaningful work we offload to FixPhis() is to + // remove the incoming values from OrigBlock. There might be a special case + // that SwitchBlock is the same as Default, under which the PHIs in Default + // are fixed inside SwitchConvert(). + if (SwitchBlock != Default) + FixPhis(Default, OrigBlock, nullptr, UnsignedMax); + + // Branch to our shiny new if-then stuff... + BranchInst::Create(SwitchBlock, OrigBlock); + + // We are now done with the switch instruction, delete it. + BasicBlock *OldDefault = SI->getDefaultDest(); + SI->eraseFromParent(); + + // If the Default block has no more predecessors just add it to DeleteList. + if (pred_empty(OldDefault)) + DeleteList.insert(OldDefault); +} + +bool LowerSwitch(Function &F, LazyValueInfo *LVI, AssumptionCache *AC) { + bool Changed = false; + SmallPtrSet<BasicBlock *, 8> DeleteList; + + // We use make_early_inc_range here so that we don't traverse new blocks. + for (BasicBlock &Cur : llvm::make_early_inc_range(F)) { + // If the block is a dead Default block that will be deleted later, don't + // waste time processing it. + if (DeleteList.count(&Cur)) + continue; + + if (SwitchInst *SI = dyn_cast<SwitchInst>(Cur.getTerminator())) { + Changed = true; + ProcessSwitchInst(SI, DeleteList, AC, LVI); + } + } + + for (BasicBlock *BB : DeleteList) { + LVI->eraseBlock(BB); + DeleteDeadBlock(BB); + } + + return Changed; +} + +/// Replace all SwitchInst instructions with chained branch instructions. +class LowerSwitchLegacyPass : public FunctionPass { +public: + // Pass identification, replacement for typeid + static char ID; + + LowerSwitchLegacyPass() : FunctionPass(ID) { + initializeLowerSwitchLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<LazyValueInfoWrapperPass>(); + } +}; + +} // end anonymous namespace + +char LowerSwitchLegacyPass::ID = 0; + +// Publicly exposed interface to pass... +char &llvm::LowerSwitchID = LowerSwitchLegacyPass::ID; + +INITIALIZE_PASS_BEGIN(LowerSwitchLegacyPass, "lowerswitch", + "Lower SwitchInst's to branches", false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(LazyValueInfoWrapperPass) +INITIALIZE_PASS_END(LowerSwitchLegacyPass, "lowerswitch", + "Lower SwitchInst's to branches", false, false) + +// createLowerSwitchPass - Interface to this file... +FunctionPass *llvm::createLowerSwitchPass() { + return new LowerSwitchLegacyPass(); +} + +bool LowerSwitchLegacyPass::runOnFunction(Function &F) { + LazyValueInfo *LVI = &getAnalysis<LazyValueInfoWrapperPass>().getLVI(); + auto *ACT = getAnalysisIfAvailable<AssumptionCacheTracker>(); + AssumptionCache *AC = ACT ? &ACT->getAssumptionCache(F) : nullptr; + return LowerSwitch(F, LVI, AC); +} + +PreservedAnalyses LowerSwitchPass::run(Function &F, + FunctionAnalysisManager &AM) { + LazyValueInfo *LVI = &AM.getResult<LazyValueAnalysis>(F); + AssumptionCache *AC = AM.getCachedResult<AssumptionAnalysis>(F); + return LowerSwitch(F, LVI, AC) ? PreservedAnalyses::none() + : PreservedAnalyses::all(); +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/MatrixUtils.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/MatrixUtils.cpp new file mode 100644 index 0000000000..e218773cf5 --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/MatrixUtils.cpp @@ -0,0 +1,104 @@ +//===- MatrixUtils.cpp - Utilities to lower matrix intrinsics ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Utilities for generating tiled loops for matrix operations. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/MatrixUtils.h" +#include "llvm/Analysis/DomTreeUpdater.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Type.h" + +using namespace llvm; + +BasicBlock *TileInfo::CreateLoop(BasicBlock *Preheader, BasicBlock *Exit, + Value *Bound, Value *Step, StringRef Name, + IRBuilderBase &B, DomTreeUpdater &DTU, Loop *L, + LoopInfo &LI) { + LLVMContext &Ctx = Preheader->getContext(); + BasicBlock *Header = BasicBlock::Create( + Preheader->getContext(), Name + ".header", Preheader->getParent(), Exit); + BasicBlock *Body = BasicBlock::Create(Header->getContext(), Name + ".body", + Header->getParent(), Exit); + BasicBlock *Latch = BasicBlock::Create(Header->getContext(), Name + ".latch", + Header->getParent(), Exit); + + Type *I32Ty = Type::getInt64Ty(Ctx); + BranchInst::Create(Body, Header); + BranchInst::Create(Latch, Body); + PHINode *IV = + PHINode::Create(I32Ty, 2, Name + ".iv", Header->getTerminator()); + IV->addIncoming(ConstantInt::get(I32Ty, 0), Preheader); + + B.SetInsertPoint(Latch); + Value *Inc = B.CreateAdd(IV, Step, Name + ".step"); + Value *Cond = B.CreateICmpNE(Inc, Bound, Name + ".cond"); + BranchInst::Create(Header, Exit, Cond, Latch); + IV->addIncoming(Inc, Latch); + + BranchInst *PreheaderBr = cast<BranchInst>(Preheader->getTerminator()); + BasicBlock *Tmp = PreheaderBr->getSuccessor(0); + PreheaderBr->setSuccessor(0, Header); + DTU.applyUpdatesPermissive({ + {DominatorTree::Delete, Preheader, Tmp}, + {DominatorTree::Insert, Header, Body}, + {DominatorTree::Insert, Body, Latch}, + {DominatorTree::Insert, Latch, Header}, + {DominatorTree::Insert, Latch, Exit}, + {DominatorTree::Insert, Preheader, Header}, + }); + + L->addBasicBlockToLoop(Header, LI); + L->addBasicBlockToLoop(Body, LI); + L->addBasicBlockToLoop(Latch, LI); + return Body; +} + +// Creates the following loop nest skeleton: +// for C = 0; C < NumColumns; C += TileSize +// for R = 0; R < NumRows; R += TileSize +// for K = 0; K < Inner ; K += TileSize +BasicBlock *TileInfo::CreateTiledLoops(BasicBlock *Start, BasicBlock *End, + IRBuilderBase &B, DomTreeUpdater &DTU, + LoopInfo &LI) { + Loop *ColumnLoopInfo = LI.AllocateLoop(); + Loop *RowLoopInfo = LI.AllocateLoop(); + Loop *KLoopInfo = LI.AllocateLoop(); + RowLoopInfo->addChildLoop(KLoopInfo); + ColumnLoopInfo->addChildLoop(RowLoopInfo); + if (Loop *ParentL = LI.getLoopFor(Start)) + ParentL->addChildLoop(ColumnLoopInfo); + else + LI.addTopLevelLoop(ColumnLoopInfo); + + BasicBlock *ColBody = + CreateLoop(Start, End, B.getInt64(NumColumns), B.getInt64(TileSize), + "cols", B, DTU, ColumnLoopInfo, LI); + ColumnLoop.Latch = ColBody->getSingleSuccessor(); + BasicBlock *RowBody = + CreateLoop(ColBody, ColumnLoop.Latch, B.getInt64(NumRows), + B.getInt64(TileSize), "rows", B, DTU, RowLoopInfo, LI); + RowLoop.Latch = RowBody->getSingleSuccessor(); + + BasicBlock *InnerBody = + CreateLoop(RowBody, RowLoop.Latch, B.getInt64(NumInner), + B.getInt64(TileSize), "inner", B, DTU, KLoopInfo, LI); + KLoop.Latch = InnerBody->getSingleSuccessor(); + ColumnLoop.Header = ColBody->getSinglePredecessor(); + RowLoop.Header = RowBody->getSinglePredecessor(); + KLoop.Header = InnerBody->getSinglePredecessor(); + RowLoop.Index = &*RowLoop.Header->begin(); + ColumnLoop.Index = &*ColumnLoop.Header->begin(); + KLoop.Index = &*KLoop.Header->begin(); + + return InnerBody; +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/Mem2Reg.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/Mem2Reg.cpp new file mode 100644 index 0000000000..5ad7aeb463 --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/Mem2Reg.cpp @@ -0,0 +1,116 @@ +//===- Mem2Reg.cpp - The -mem2reg pass, a wrapper around the Utils lib ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass is a simple pass wrapper around the PromoteMemToReg function call +// exposed by the Utils library. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/Mem2Reg.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/PassManager.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include "llvm/Transforms/Utils.h" +#include "llvm/Transforms/Utils/PromoteMemToReg.h" +#include <vector> + +using namespace llvm; + +#define DEBUG_TYPE "mem2reg" + +STATISTIC(NumPromoted, "Number of alloca's promoted"); + +static bool promoteMemoryToRegister(Function &F, DominatorTree &DT, + AssumptionCache &AC) { + std::vector<AllocaInst *> Allocas; + BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function + bool Changed = false; + + while (true) { + Allocas.clear(); + + // Find allocas that are safe to promote, by looking at all instructions in + // the entry node + for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I) + if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) // Is it an alloca? + if (isAllocaPromotable(AI)) + Allocas.push_back(AI); + + if (Allocas.empty()) + break; + + PromoteMemToReg(Allocas, DT, &AC); + NumPromoted += Allocas.size(); + Changed = true; + } + return Changed; +} + +PreservedAnalyses PromotePass::run(Function &F, FunctionAnalysisManager &AM) { + auto &DT = AM.getResult<DominatorTreeAnalysis>(F); + auto &AC = AM.getResult<AssumptionAnalysis>(F); + if (!promoteMemoryToRegister(F, DT, AC)) + return PreservedAnalyses::all(); + + PreservedAnalyses PA; + PA.preserveSet<CFGAnalyses>(); + return PA; +} + +namespace { + +struct PromoteLegacyPass : public FunctionPass { + // Pass identification, replacement for typeid + static char ID; + + PromoteLegacyPass() : FunctionPass(ID) { + initializePromoteLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + // runOnFunction - To run this pass, first we calculate the alloca + // instructions that are safe for promotion, then we promote each one. + bool runOnFunction(Function &F) override { + if (skipFunction(F)) + return false; + + DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + AssumptionCache &AC = + getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); + return promoteMemoryToRegister(F, DT, AC); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<AssumptionCacheTracker>(); + AU.addRequired<DominatorTreeWrapperPass>(); + AU.setPreservesCFG(); + } +}; + +} // end anonymous namespace + +char PromoteLegacyPass::ID = 0; + +INITIALIZE_PASS_BEGIN(PromoteLegacyPass, "mem2reg", "Promote Memory to " + "Register", + false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_END(PromoteLegacyPass, "mem2reg", "Promote Memory to Register", + false, false) + +// createPromoteMemoryToRegister - Provide an entry point to create this pass. +FunctionPass *llvm::createPromoteMemoryToRegisterPass() { + return new PromoteLegacyPass(); +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/MemoryOpRemark.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/MemoryOpRemark.cpp new file mode 100644 index 0000000000..899928c085 --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/MemoryOpRemark.cpp @@ -0,0 +1,410 @@ +//===-- MemoryOpRemark.cpp - Auto-init remark analysis---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Implementation of the analysis for the "auto-init" remark. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/MemoryOpRemark.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include <optional> + +using namespace llvm; +using namespace llvm::ore; + +MemoryOpRemark::~MemoryOpRemark() = default; + +bool MemoryOpRemark::canHandle(const Instruction *I, const TargetLibraryInfo &TLI) { + if (isa<StoreInst>(I)) + return true; + + if (auto *II = dyn_cast<IntrinsicInst>(I)) { + switch (II->getIntrinsicID()) { + case Intrinsic::memcpy_inline: + case Intrinsic::memcpy: + case Intrinsic::memmove: + case Intrinsic::memset: + case Intrinsic::memcpy_element_unordered_atomic: + case Intrinsic::memmove_element_unordered_atomic: + case Intrinsic::memset_element_unordered_atomic: + return true; + default: + return false; + } + } + + if (auto *CI = dyn_cast<CallInst>(I)) { + auto *CF = CI->getCalledFunction(); + if (!CF) + return false; + + if (!CF->hasName()) + return false; + + LibFunc LF; + bool KnownLibCall = TLI.getLibFunc(*CF, LF) && TLI.has(LF); + if (!KnownLibCall) + return false; + + switch (LF) { + case LibFunc_memcpy_chk: + case LibFunc_mempcpy_chk: + case LibFunc_memset_chk: + case LibFunc_memmove_chk: + case LibFunc_memcpy: + case LibFunc_mempcpy: + case LibFunc_memset: + case LibFunc_memmove: + case LibFunc_bzero: + case LibFunc_bcopy: + return true; + default: + return false; + } + } + + return false; +} + +void MemoryOpRemark::visit(const Instruction *I) { + // For some of them, we can provide more information: + + // For stores: + // * size + // * volatile / atomic + if (auto *SI = dyn_cast<StoreInst>(I)) { + visitStore(*SI); + return; + } + + // For intrinsics: + // * user-friendly name + // * size + if (auto *II = dyn_cast<IntrinsicInst>(I)) { + visitIntrinsicCall(*II); + return; + } + + // For calls: + // * known/unknown function (e.g. the compiler knows bzero, but it doesn't + // know my_bzero) + // * memory operation size + if (auto *CI = dyn_cast<CallInst>(I)) { + visitCall(*CI); + return; + } + + visitUnknown(*I); +} + +std::string MemoryOpRemark::explainSource(StringRef Type) const { + return (Type + ".").str(); +} + +StringRef MemoryOpRemark::remarkName(RemarkKind RK) const { + switch (RK) { + case RK_Store: + return "MemoryOpStore"; + case RK_Unknown: + return "MemoryOpUnknown"; + case RK_IntrinsicCall: + return "MemoryOpIntrinsicCall"; + case RK_Call: + return "MemoryOpCall"; + } + llvm_unreachable("missing RemarkKind case"); +} + +static void inlineVolatileOrAtomicWithExtraArgs(bool *Inline, bool Volatile, + bool Atomic, + DiagnosticInfoIROptimization &R) { + if (Inline && *Inline) + R << " Inlined: " << NV("StoreInlined", true) << "."; + if (Volatile) + R << " Volatile: " << NV("StoreVolatile", true) << "."; + if (Atomic) + R << " Atomic: " << NV("StoreAtomic", true) << "."; + // Emit the false cases under ExtraArgs. This won't show them in the remark + // message but will end up in the serialized remarks. + if ((Inline && !*Inline) || !Volatile || !Atomic) + R << setExtraArgs(); + if (Inline && !*Inline) + R << " Inlined: " << NV("StoreInlined", false) << "."; + if (!Volatile) + R << " Volatile: " << NV("StoreVolatile", false) << "."; + if (!Atomic) + R << " Atomic: " << NV("StoreAtomic", false) << "."; +} + +static std::optional<uint64_t> +getSizeInBytes(std::optional<uint64_t> SizeInBits) { + if (!SizeInBits || *SizeInBits % 8 != 0) + return std::nullopt; + return *SizeInBits / 8; +} + +template<typename ...Ts> +std::unique_ptr<DiagnosticInfoIROptimization> +MemoryOpRemark::makeRemark(Ts... Args) { + switch (diagnosticKind()) { + case DK_OptimizationRemarkAnalysis: + return std::make_unique<OptimizationRemarkAnalysis>(Args...); + case DK_OptimizationRemarkMissed: + return std::make_unique<OptimizationRemarkMissed>(Args...); + default: + llvm_unreachable("unexpected DiagnosticKind"); + } +} + +void MemoryOpRemark::visitStore(const StoreInst &SI) { + bool Volatile = SI.isVolatile(); + bool Atomic = SI.isAtomic(); + int64_t Size = DL.getTypeStoreSize(SI.getOperand(0)->getType()); + + auto R = makeRemark(RemarkPass.data(), remarkName(RK_Store), &SI); + *R << explainSource("Store") << "\nStore size: " << NV("StoreSize", Size) + << " bytes."; + visitPtr(SI.getOperand(1), /*IsRead=*/false, *R); + inlineVolatileOrAtomicWithExtraArgs(nullptr, Volatile, Atomic, *R); + ORE.emit(*R); +} + +void MemoryOpRemark::visitUnknown(const Instruction &I) { + auto R = makeRemark(RemarkPass.data(), remarkName(RK_Unknown), &I); + *R << explainSource("Initialization"); + ORE.emit(*R); +} + +void MemoryOpRemark::visitIntrinsicCall(const IntrinsicInst &II) { + SmallString<32> CallTo; + bool Atomic = false; + bool Inline = false; + switch (II.getIntrinsicID()) { + case Intrinsic::memcpy_inline: + CallTo = "memcpy"; + Inline = true; + break; + case Intrinsic::memcpy: + CallTo = "memcpy"; + break; + case Intrinsic::memmove: + CallTo = "memmove"; + break; + case Intrinsic::memset: + CallTo = "memset"; + break; + case Intrinsic::memcpy_element_unordered_atomic: + CallTo = "memcpy"; + Atomic = true; + break; + case Intrinsic::memmove_element_unordered_atomic: + CallTo = "memmove"; + Atomic = true; + break; + case Intrinsic::memset_element_unordered_atomic: + CallTo = "memset"; + Atomic = true; + break; + default: + return visitUnknown(II); + } + + auto R = makeRemark(RemarkPass.data(), remarkName(RK_IntrinsicCall), &II); + visitCallee(CallTo.str(), /*KnownLibCall=*/true, *R); + visitSizeOperand(II.getOperand(2), *R); + + auto *CIVolatile = dyn_cast<ConstantInt>(II.getOperand(3)); + // No such thing as a memory intrinsic that is both atomic and volatile. + bool Volatile = !Atomic && CIVolatile && CIVolatile->getZExtValue(); + switch (II.getIntrinsicID()) { + case Intrinsic::memcpy_inline: + case Intrinsic::memcpy: + case Intrinsic::memmove: + case Intrinsic::memcpy_element_unordered_atomic: + visitPtr(II.getOperand(1), /*IsRead=*/true, *R); + visitPtr(II.getOperand(0), /*IsRead=*/false, *R); + break; + case Intrinsic::memset: + case Intrinsic::memset_element_unordered_atomic: + visitPtr(II.getOperand(0), /*IsRead=*/false, *R); + break; + } + inlineVolatileOrAtomicWithExtraArgs(&Inline, Volatile, Atomic, *R); + ORE.emit(*R); +} + +void MemoryOpRemark::visitCall(const CallInst &CI) { + Function *F = CI.getCalledFunction(); + if (!F) + return visitUnknown(CI); + + LibFunc LF; + bool KnownLibCall = TLI.getLibFunc(*F, LF) && TLI.has(LF); + auto R = makeRemark(RemarkPass.data(), remarkName(RK_Call), &CI); + visitCallee(F, KnownLibCall, *R); + visitKnownLibCall(CI, LF, *R); + ORE.emit(*R); +} + +template <typename FTy> +void MemoryOpRemark::visitCallee(FTy F, bool KnownLibCall, + DiagnosticInfoIROptimization &R) { + R << "Call to "; + if (!KnownLibCall) + R << NV("UnknownLibCall", "unknown") << " function "; + R << NV("Callee", F) << explainSource(""); +} + +void MemoryOpRemark::visitKnownLibCall(const CallInst &CI, LibFunc LF, + DiagnosticInfoIROptimization &R) { + switch (LF) { + default: + return; + case LibFunc_memset_chk: + case LibFunc_memset: + visitSizeOperand(CI.getOperand(2), R); + visitPtr(CI.getOperand(0), /*IsRead=*/false, R); + break; + case LibFunc_bzero: + visitSizeOperand(CI.getOperand(1), R); + visitPtr(CI.getOperand(0), /*IsRead=*/false, R); + break; + case LibFunc_memcpy_chk: + case LibFunc_mempcpy_chk: + case LibFunc_memmove_chk: + case LibFunc_memcpy: + case LibFunc_mempcpy: + case LibFunc_memmove: + case LibFunc_bcopy: + visitSizeOperand(CI.getOperand(2), R); + visitPtr(CI.getOperand(1), /*IsRead=*/true, R); + visitPtr(CI.getOperand(0), /*IsRead=*/false, R); + break; + } +} + +void MemoryOpRemark::visitSizeOperand(Value *V, DiagnosticInfoIROptimization &R) { + if (auto *Len = dyn_cast<ConstantInt>(V)) { + uint64_t Size = Len->getZExtValue(); + R << " Memory operation size: " << NV("StoreSize", Size) << " bytes."; + } +} + +static std::optional<StringRef> nameOrNone(const Value *V) { + if (V->hasName()) + return V->getName(); + return std::nullopt; +} + +void MemoryOpRemark::visitVariable(const Value *V, + SmallVectorImpl<VariableInfo> &Result) { + if (auto *GV = dyn_cast<GlobalVariable>(V)) { + auto *Ty = GV->getValueType(); + uint64_t Size = DL.getTypeSizeInBits(Ty).getFixedValue(); + VariableInfo Var{nameOrNone(GV), Size}; + if (!Var.isEmpty()) + Result.push_back(std::move(Var)); + return; + } + + // If we find some information in the debug info, take that. + bool FoundDI = false; + // Try to get an llvm.dbg.declare, which has a DILocalVariable giving us the + // real debug info name and size of the variable. + for (const DbgVariableIntrinsic *DVI : + FindDbgAddrUses(const_cast<Value *>(V))) { + if (DILocalVariable *DILV = DVI->getVariable()) { + std::optional<uint64_t> DISize = getSizeInBytes(DILV->getSizeInBits()); + VariableInfo Var{DILV->getName(), DISize}; + if (!Var.isEmpty()) { + Result.push_back(std::move(Var)); + FoundDI = true; + } + } + } + if (FoundDI) { + assert(!Result.empty()); + return; + } + + const auto *AI = dyn_cast<AllocaInst>(V); + if (!AI) + return; + + // If not, get it from the alloca. + std::optional<TypeSize> TySize = AI->getAllocationSize(DL); + std::optional<uint64_t> Size = + TySize ? std::optional(TySize->getFixedValue()) : std::nullopt; + VariableInfo Var{nameOrNone(AI), Size}; + if (!Var.isEmpty()) + Result.push_back(std::move(Var)); +} + +void MemoryOpRemark::visitPtr(Value *Ptr, bool IsRead, DiagnosticInfoIROptimization &R) { + // Find if Ptr is a known variable we can give more information on. + SmallVector<Value *, 2> Objects; + getUnderlyingObjectsForCodeGen(Ptr, Objects); + SmallVector<VariableInfo, 2> VIs; + for (const Value *V : Objects) + visitVariable(V, VIs); + + if (VIs.empty()) { + bool CanBeNull; + bool CanBeFreed; + uint64_t Size = Ptr->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed); + if (!Size) + return; + VIs.push_back({std::nullopt, Size}); + } + + R << (IsRead ? "\n Read Variables: " : "\n Written Variables: "); + for (unsigned i = 0; i < VIs.size(); ++i) { + const VariableInfo &VI = VIs[i]; + assert(!VI.isEmpty() && "No extra content to display."); + if (i != 0) + R << ", "; + if (VI.Name) + R << NV(IsRead ? "RVarName" : "WVarName", *VI.Name); + else + R << NV(IsRead ? "RVarName" : "WVarName", "<unknown>"); + if (VI.Size) + R << " (" << NV(IsRead ? "RVarSize" : "WVarSize", *VI.Size) << " bytes)"; + } + R << "."; +} + +bool AutoInitRemark::canHandle(const Instruction *I) { + if (!I->hasMetadata(LLVMContext::MD_annotation)) + return false; + return any_of(I->getMetadata(LLVMContext::MD_annotation)->operands(), + [](const MDOperand &Op) { + return cast<MDString>(Op.get())->getString() == "auto-init"; + }); +} + +std::string AutoInitRemark::explainSource(StringRef Type) const { + return (Type + " inserted by -ftrivial-auto-var-init.").str(); +} + +StringRef AutoInitRemark::remarkName(RemarkKind RK) const { + switch (RK) { + case RK_Store: + return "AutoInitStore"; + case RK_Unknown: + return "AutoInitUnknownInstruction"; + case RK_IntrinsicCall: + return "AutoInitIntrinsicCall"; + case RK_Call: + return "AutoInitCall"; + } + llvm_unreachable("missing RemarkKind case"); +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/MemoryTaggingSupport.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/MemoryTaggingSupport.cpp new file mode 100644 index 0000000000..1e42d74916 --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/MemoryTaggingSupport.cpp @@ -0,0 +1,219 @@ +//== MemoryTaggingSupport.cpp - helpers for memory tagging implementations ===// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares common infrastructure for HWAddressSanitizer and +// Aarch64StackTagging. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/MemoryTaggingSupport.h" + +#include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/PostDominators.h" +#include "llvm/Analysis/StackSafetyAnalysis.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/Transforms/Utils/PromoteMemToReg.h" + +namespace llvm { +namespace memtag { +namespace { +bool maybeReachableFromEachOther(const SmallVectorImpl<IntrinsicInst *> &Insts, + const DominatorTree *DT, const LoopInfo *LI, + size_t MaxLifetimes) { + // If we have too many lifetime ends, give up, as the algorithm below is N^2. + if (Insts.size() > MaxLifetimes) + return true; + for (size_t I = 0; I < Insts.size(); ++I) { + for (size_t J = 0; J < Insts.size(); ++J) { + if (I == J) + continue; + if (isPotentiallyReachable(Insts[I], Insts[J], nullptr, DT, LI)) + return true; + } + } + return false; +} +} // namespace + +bool forAllReachableExits(const DominatorTree &DT, const PostDominatorTree &PDT, + const LoopInfo &LI, const Instruction *Start, + const SmallVectorImpl<IntrinsicInst *> &Ends, + const SmallVectorImpl<Instruction *> &RetVec, + llvm::function_ref<void(Instruction *)> Callback) { + if (Ends.size() == 1 && PDT.dominates(Ends[0], Start)) { + Callback(Ends[0]); + return true; + } + SmallPtrSet<BasicBlock *, 2> EndBlocks; + for (auto *End : Ends) { + EndBlocks.insert(End->getParent()); + } + SmallVector<Instruction *, 8> ReachableRetVec; + unsigned NumCoveredExits = 0; + for (auto *RI : RetVec) { + if (!isPotentiallyReachable(Start, RI, nullptr, &DT, &LI)) + continue; + ReachableRetVec.push_back(RI); + // If there is an end in the same basic block as the return, we know for + // sure that the return is covered. Otherwise, we can check whether there + // is a way to reach the RI from the start of the lifetime without passing + // through an end. + if (EndBlocks.count(RI->getParent()) > 0 || + !isPotentiallyReachable(Start, RI, &EndBlocks, &DT, &LI)) { + ++NumCoveredExits; + } + } + // If there's a mix of covered and non-covered exits, just put the untag + // on exits, so we avoid the redundancy of untagging twice. + if (NumCoveredExits == ReachableRetVec.size()) { + for (auto *End : Ends) + Callback(End); + } else { + for (auto *RI : ReachableRetVec) + Callback(RI); + // We may have inserted untag outside of the lifetime interval. + // Signal the caller to remove the lifetime end call for this alloca. + return false; + } + return true; +} + +bool isStandardLifetime(const SmallVectorImpl<IntrinsicInst *> &LifetimeStart, + const SmallVectorImpl<IntrinsicInst *> &LifetimeEnd, + const DominatorTree *DT, const LoopInfo *LI, + size_t MaxLifetimes) { + // An alloca that has exactly one start and end in every possible execution. + // If it has multiple ends, they have to be unreachable from each other, so + // at most one of them is actually used for each execution of the function. + return LifetimeStart.size() == 1 && + (LifetimeEnd.size() == 1 || + (LifetimeEnd.size() > 0 && + !maybeReachableFromEachOther(LifetimeEnd, DT, LI, MaxLifetimes))); +} + +Instruction *getUntagLocationIfFunctionExit(Instruction &Inst) { + if (isa<ReturnInst>(Inst)) { + if (CallInst *CI = Inst.getParent()->getTerminatingMustTailCall()) + return CI; + return &Inst; + } + if (isa<ResumeInst, CleanupReturnInst>(Inst)) { + return &Inst; + } + return nullptr; +} + +void StackInfoBuilder::visit(Instruction &Inst) { + if (CallInst *CI = dyn_cast<CallInst>(&Inst)) { + if (CI->canReturnTwice()) { + Info.CallsReturnTwice = true; + } + } + if (AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) { + if (isInterestingAlloca(*AI)) { + Info.AllocasToInstrument[AI].AI = AI; + } + return; + } + auto *II = dyn_cast<IntrinsicInst>(&Inst); + if (II && (II->getIntrinsicID() == Intrinsic::lifetime_start || + II->getIntrinsicID() == Intrinsic::lifetime_end)) { + AllocaInst *AI = findAllocaForValue(II->getArgOperand(1)); + if (!AI) { + Info.UnrecognizedLifetimes.push_back(&Inst); + return; + } + if (!isInterestingAlloca(*AI)) + return; + if (II->getIntrinsicID() == Intrinsic::lifetime_start) + Info.AllocasToInstrument[AI].LifetimeStart.push_back(II); + else + Info.AllocasToInstrument[AI].LifetimeEnd.push_back(II); + return; + } + if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&Inst)) { + for (Value *V : DVI->location_ops()) { + if (auto *AI = dyn_cast_or_null<AllocaInst>(V)) { + if (!isInterestingAlloca(*AI)) + continue; + AllocaInfo &AInfo = Info.AllocasToInstrument[AI]; + auto &DVIVec = AInfo.DbgVariableIntrinsics; + if (DVIVec.empty() || DVIVec.back() != DVI) + DVIVec.push_back(DVI); + } + } + } + Instruction *ExitUntag = getUntagLocationIfFunctionExit(Inst); + if (ExitUntag) + Info.RetVec.push_back(ExitUntag); +} + +bool StackInfoBuilder::isInterestingAlloca(const AllocaInst &AI) { + return (AI.getAllocatedType()->isSized() && + // FIXME: instrument dynamic allocas, too + AI.isStaticAlloca() && + // alloca() may be called with 0 size, ignore it. + memtag::getAllocaSizeInBytes(AI) > 0 && + // We are only interested in allocas not promotable to registers. + // Promotable allocas are common under -O0. + !isAllocaPromotable(&AI) && + // inalloca allocas are not treated as static, and we don't want + // dynamic alloca instrumentation for them as well. + !AI.isUsedWithInAlloca() && + // swifterror allocas are register promoted by ISel + !AI.isSwiftError()) && + // safe allocas are not interesting + !(SSI && SSI->isSafe(AI)); +} + +uint64_t getAllocaSizeInBytes(const AllocaInst &AI) { + auto DL = AI.getModule()->getDataLayout(); + return *AI.getAllocationSize(DL); +} + +void alignAndPadAlloca(memtag::AllocaInfo &Info, llvm::Align Alignment) { + const Align NewAlignment = std::max(Info.AI->getAlign(), Alignment); + Info.AI->setAlignment(NewAlignment); + auto &Ctx = Info.AI->getFunction()->getContext(); + + uint64_t Size = getAllocaSizeInBytes(*Info.AI); + uint64_t AlignedSize = alignTo(Size, Alignment); + if (Size == AlignedSize) + return; + + // Add padding to the alloca. + Type *AllocatedType = + Info.AI->isArrayAllocation() + ? ArrayType::get( + Info.AI->getAllocatedType(), + cast<ConstantInt>(Info.AI->getArraySize())->getZExtValue()) + : Info.AI->getAllocatedType(); + Type *PaddingType = ArrayType::get(Type::getInt8Ty(Ctx), AlignedSize - Size); + Type *TypeWithPadding = StructType::get(AllocatedType, PaddingType); + auto *NewAI = new AllocaInst(TypeWithPadding, Info.AI->getAddressSpace(), + nullptr, "", Info.AI); + NewAI->takeName(Info.AI); + NewAI->setAlignment(Info.AI->getAlign()); + NewAI->setUsedWithInAlloca(Info.AI->isUsedWithInAlloca()); + NewAI->setSwiftError(Info.AI->isSwiftError()); + NewAI->copyMetadata(*Info.AI); + + Value *NewPtr = NewAI; + + // TODO: Remove when typed pointers dropped + if (Info.AI->getType() != NewAI->getType()) + NewPtr = new BitCastInst(NewAI, Info.AI->getType(), "", Info.AI); + + Info.AI->replaceAllUsesWith(NewPtr); + Info.AI->eraseFromParent(); + Info.AI = NewAI; +} + +} // namespace memtag +} // namespace llvm diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/MetaRenamer.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/MetaRenamer.cpp new file mode 100644 index 0000000000..0ea210671b --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/MetaRenamer.cpp @@ -0,0 +1,251 @@ +//===- MetaRenamer.cpp - Rename everything with metasyntatic names --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass renames everything with metasyntatic names. The intent is to use +// this pass after bugpoint reduction to conceal the nature of the original +// program. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/MetaRenamer.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/TypeFinder.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/Utils.h" + +using namespace llvm; + +static cl::opt<std::string> RenameExcludeFunctionPrefixes( + "rename-exclude-function-prefixes", + cl::desc("Prefixes for functions that don't need to be renamed, separated " + "by a comma"), + cl::Hidden); + +static cl::opt<std::string> RenameExcludeAliasPrefixes( + "rename-exclude-alias-prefixes", + cl::desc("Prefixes for aliases that don't need to be renamed, separated " + "by a comma"), + cl::Hidden); + +static cl::opt<std::string> RenameExcludeGlobalPrefixes( + "rename-exclude-global-prefixes", + cl::desc( + "Prefixes for global values that don't need to be renamed, separated " + "by a comma"), + cl::Hidden); + +static cl::opt<std::string> RenameExcludeStructPrefixes( + "rename-exclude-struct-prefixes", + cl::desc("Prefixes for structs that don't need to be renamed, separated " + "by a comma"), + cl::Hidden); + +static const char *const metaNames[] = { + // See http://en.wikipedia.org/wiki/Metasyntactic_variable + "foo", "bar", "baz", "quux", "barney", "snork", "zot", "blam", "hoge", + "wibble", "wobble", "widget", "wombat", "ham", "eggs", "pluto", "spam" +}; + +namespace { +// This PRNG is from the ISO C spec. It is intentionally simple and +// unsuitable for cryptographic use. We're just looking for enough +// variety to surprise and delight users. +struct PRNG { + unsigned long next; + + void srand(unsigned int seed) { next = seed; } + + int rand() { + next = next * 1103515245 + 12345; + return (unsigned int)(next / 65536) % 32768; + } +}; + +struct Renamer { + Renamer(unsigned int seed) { prng.srand(seed); } + + const char *newName() { + return metaNames[prng.rand() % std::size(metaNames)]; + } + + PRNG prng; +}; + +static void +parseExcludedPrefixes(StringRef PrefixesStr, + SmallVectorImpl<StringRef> &ExcludedPrefixes) { + for (;;) { + auto PrefixesSplit = PrefixesStr.split(','); + if (PrefixesSplit.first.empty()) + break; + ExcludedPrefixes.push_back(PrefixesSplit.first); + PrefixesStr = PrefixesSplit.second; + } +} + +void MetaRename(Function &F) { + for (Argument &Arg : F.args()) + if (!Arg.getType()->isVoidTy()) + Arg.setName("arg"); + + for (auto &BB : F) { + BB.setName("bb"); + + for (auto &I : BB) + if (!I.getType()->isVoidTy()) + I.setName("tmp"); + } +} + +void MetaRename(Module &M, + function_ref<TargetLibraryInfo &(Function &)> GetTLI) { + // Seed our PRNG with simple additive sum of ModuleID. We're looking to + // simply avoid always having the same function names, and we need to + // remain deterministic. + unsigned int randSeed = 0; + for (auto C : M.getModuleIdentifier()) + randSeed += C; + + Renamer renamer(randSeed); + + SmallVector<StringRef, 8> ExcludedAliasesPrefixes; + SmallVector<StringRef, 8> ExcludedGlobalsPrefixes; + SmallVector<StringRef, 8> ExcludedStructsPrefixes; + SmallVector<StringRef, 8> ExcludedFuncPrefixes; + parseExcludedPrefixes(RenameExcludeAliasPrefixes, ExcludedAliasesPrefixes); + parseExcludedPrefixes(RenameExcludeGlobalPrefixes, ExcludedGlobalsPrefixes); + parseExcludedPrefixes(RenameExcludeStructPrefixes, ExcludedStructsPrefixes); + parseExcludedPrefixes(RenameExcludeFunctionPrefixes, ExcludedFuncPrefixes); + + auto IsNameExcluded = [](StringRef &Name, + SmallVectorImpl<StringRef> &ExcludedPrefixes) { + return any_of(ExcludedPrefixes, + [&Name](auto &Prefix) { return Name.startswith(Prefix); }); + }; + + // Rename all aliases + for (GlobalAlias &GA : M.aliases()) { + StringRef Name = GA.getName(); + if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1) || + IsNameExcluded(Name, ExcludedAliasesPrefixes)) + continue; + + GA.setName("alias"); + } + + // Rename all global variables + for (GlobalVariable &GV : M.globals()) { + StringRef Name = GV.getName(); + if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1) || + IsNameExcluded(Name, ExcludedGlobalsPrefixes)) + continue; + + GV.setName("global"); + } + + // Rename all struct types + TypeFinder StructTypes; + StructTypes.run(M, true); + for (StructType *STy : StructTypes) { + StringRef Name = STy->getName(); + if (STy->isLiteral() || Name.empty() || + IsNameExcluded(Name, ExcludedStructsPrefixes)) + continue; + + SmallString<128> NameStorage; + STy->setName( + (Twine("struct.") + renamer.newName()).toStringRef(NameStorage)); + } + + // Rename all functions + for (auto &F : M) { + StringRef Name = F.getName(); + LibFunc Tmp; + // Leave library functions alone because their presence or absence could + // affect the behavior of other passes. + if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1) || + GetTLI(F).getLibFunc(F, Tmp) || + IsNameExcluded(Name, ExcludedFuncPrefixes)) + continue; + + // Leave @main alone. The output of -metarenamer might be passed to + // lli for execution and the latter needs a main entry point. + if (Name != "main") + F.setName(renamer.newName()); + + MetaRename(F); + } +} + +struct MetaRenamer : public ModulePass { + // Pass identification, replacement for typeid + static char ID; + + MetaRenamer() : ModulePass(ID) { + initializeMetaRenamerPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<TargetLibraryInfoWrapperPass>(); + AU.setPreservesAll(); + } + + bool runOnModule(Module &M) override { + auto GetTLI = [this](Function &F) -> TargetLibraryInfo & { + return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); + }; + MetaRename(M, GetTLI); + return true; + } +}; + +} // end anonymous namespace + +char MetaRenamer::ID = 0; + +INITIALIZE_PASS_BEGIN(MetaRenamer, "metarenamer", + "Assign new names to everything", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_END(MetaRenamer, "metarenamer", + "Assign new names to everything", false, false) + +//===----------------------------------------------------------------------===// +// +// MetaRenamer - Rename everything with metasyntactic names. +// +ModulePass *llvm::createMetaRenamerPass() { + return new MetaRenamer(); +} + +PreservedAnalyses MetaRenamerPass::run(Module &M, ModuleAnalysisManager &AM) { + FunctionAnalysisManager &FAM = + AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); + auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & { + return FAM.getResult<TargetLibraryAnalysis>(F); + }; + MetaRename(M, GetTLI); + + return PreservedAnalyses::all(); +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/MisExpect.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/MisExpect.cpp new file mode 100644 index 0000000000..6f5a25a268 --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/MisExpect.cpp @@ -0,0 +1,214 @@ +//===--- MisExpect.cpp - Check the use of llvm.expect with PGO data -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This contains code to emit warnings for potentially incorrect usage of the +// llvm.expect intrinsic. This utility extracts the threshold values from +// metadata associated with the instrumented Branch or Switch instruction. The +// threshold values are then used to determine if a warning should be emmited. +// +// MisExpect's implementation relies on two assumptions about how branch weights +// are managed in LLVM. +// +// 1) Frontend profiling weights are always in place before llvm.expect is +// lowered in LowerExpectIntrinsic.cpp. Frontend based instrumentation therefore +// needs to extract the branch weights and then compare them to the weights +// being added by the llvm.expect intrinsic lowering. +// +// 2) Sampling and IR based profiles will *only* have branch weight metadata +// before profiling data is consulted if they are from a lowered llvm.expect +// intrinsic. These profiles thus always extract the expected weights and then +// compare them to the weights collected during profiling to determine if a +// diagnostic message is warranted. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/MisExpect.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/ProfDataUtils.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/FormatVariadic.h" +#include <algorithm> +#include <cstdint> +#include <functional> +#include <numeric> + +#define DEBUG_TYPE "misexpect" + +using namespace llvm; +using namespace misexpect; + +namespace llvm { + +// Command line option to enable/disable the warning when profile data suggests +// a mismatch with the use of the llvm.expect intrinsic +static cl::opt<bool> PGOWarnMisExpect( + "pgo-warn-misexpect", cl::init(false), cl::Hidden, + cl::desc("Use this option to turn on/off " + "warnings about incorrect usage of llvm.expect intrinsics.")); + +static cl::opt<uint32_t> MisExpectTolerance( + "misexpect-tolerance", cl::init(0), + cl::desc("Prevents emiting diagnostics when profile counts are " + "within N% of the threshold..")); + +} // namespace llvm + +namespace { + +bool isMisExpectDiagEnabled(LLVMContext &Ctx) { + return PGOWarnMisExpect || Ctx.getMisExpectWarningRequested(); +} + +uint32_t getMisExpectTolerance(LLVMContext &Ctx) { + return std::max(static_cast<uint32_t>(MisExpectTolerance), + Ctx.getDiagnosticsMisExpectTolerance()); +} + +Instruction *getInstCondition(Instruction *I) { + assert(I != nullptr && "MisExpect target Instruction cannot be nullptr"); + Instruction *Ret = nullptr; + if (auto *B = dyn_cast<BranchInst>(I)) { + Ret = dyn_cast<Instruction>(B->getCondition()); + } + // TODO: Find a way to resolve condition location for switches + // Using the condition of the switch seems to often resolve to an earlier + // point in the program, i.e. the calculation of the switch condition, rather + // than the switch's location in the source code. Thus, we should use the + // instruction to get source code locations rather than the condition to + // improve diagnostic output, such as the caret. If the same problem exists + // for branch instructions, then we should remove this function and directly + // use the instruction + // + else if (auto *S = dyn_cast<SwitchInst>(I)) { + Ret = dyn_cast<Instruction>(S->getCondition()); + } + return Ret ? Ret : I; +} + +void emitMisexpectDiagnostic(Instruction *I, LLVMContext &Ctx, + uint64_t ProfCount, uint64_t TotalCount) { + double PercentageCorrect = (double)ProfCount / TotalCount; + auto PerString = + formatv("{0:P} ({1} / {2})", PercentageCorrect, ProfCount, TotalCount); + auto RemStr = formatv( + "Potential performance regression from use of the llvm.expect intrinsic: " + "Annotation was correct on {0} of profiled executions.", + PerString); + Twine Msg(PerString); + Instruction *Cond = getInstCondition(I); + if (isMisExpectDiagEnabled(Ctx)) + Ctx.diagnose(DiagnosticInfoMisExpect(Cond, Msg)); + OptimizationRemarkEmitter ORE(I->getParent()->getParent()); + ORE.emit(OptimizationRemark(DEBUG_TYPE, "misexpect", Cond) << RemStr.str()); +} + +} // namespace + +namespace llvm { +namespace misexpect { + +void verifyMisExpect(Instruction &I, ArrayRef<uint32_t> RealWeights, + ArrayRef<uint32_t> ExpectedWeights) { + // To determine if we emit a diagnostic, we need to compare the branch weights + // from the profile to those added by the llvm.expect intrinsic. + // So first, we extract the "likely" and "unlikely" weights from + // ExpectedWeights And determine the correct weight in the profile to compare + // against. + uint64_t LikelyBranchWeight = 0, + UnlikelyBranchWeight = std::numeric_limits<uint32_t>::max(); + size_t MaxIndex = 0; + for (size_t Idx = 0, End = ExpectedWeights.size(); Idx < End; Idx++) { + uint32_t V = ExpectedWeights[Idx]; + if (LikelyBranchWeight < V) { + LikelyBranchWeight = V; + MaxIndex = Idx; + } + if (UnlikelyBranchWeight > V) { + UnlikelyBranchWeight = V; + } + } + + const uint64_t ProfiledWeight = RealWeights[MaxIndex]; + const uint64_t RealWeightsTotal = + std::accumulate(RealWeights.begin(), RealWeights.end(), (uint64_t)0, + std::plus<uint64_t>()); + const uint64_t NumUnlikelyTargets = RealWeights.size() - 1; + + uint64_t TotalBranchWeight = + LikelyBranchWeight + (UnlikelyBranchWeight * NumUnlikelyTargets); + + // FIXME: When we've addressed sample profiling, restore the assertion + // + // We cannot calculate branch probability if either of these invariants aren't + // met. However, MisExpect diagnostics should not prevent code from compiling, + // so we simply forgo emitting diagnostics here, and return early. + // assert((TotalBranchWeight >= LikelyBranchWeight) && (TotalBranchWeight > 0) + // && "TotalBranchWeight is less than the Likely branch weight"); + if ((TotalBranchWeight == 0) || (TotalBranchWeight <= LikelyBranchWeight)) + return; + + // To determine our threshold value we need to obtain the branch probability + // for the weights added by llvm.expect and use that proportion to calculate + // our threshold based on the collected profile data. + auto LikelyProbablilty = BranchProbability::getBranchProbability( + LikelyBranchWeight, TotalBranchWeight); + + uint64_t ScaledThreshold = LikelyProbablilty.scale(RealWeightsTotal); + + // clamp tolerance range to [0, 100) + auto Tolerance = getMisExpectTolerance(I.getContext()); + Tolerance = std::clamp(Tolerance, 0u, 99u); + + // Allow users to relax checking by N% i.e., if they use a 5% tolerance, + // then we check against 0.95*ScaledThreshold + if (Tolerance > 0) + ScaledThreshold *= (1.0 - Tolerance / 100.0); + + // When the profile weight is below the threshold, we emit the diagnostic + if (ProfiledWeight < ScaledThreshold) + emitMisexpectDiagnostic(&I, I.getContext(), ProfiledWeight, + RealWeightsTotal); +} + +void checkBackendInstrumentation(Instruction &I, + const ArrayRef<uint32_t> RealWeights) { + SmallVector<uint32_t> ExpectedWeights; + if (!extractBranchWeights(I, ExpectedWeights)) + return; + verifyMisExpect(I, RealWeights, ExpectedWeights); +} + +void checkFrontendInstrumentation(Instruction &I, + const ArrayRef<uint32_t> ExpectedWeights) { + SmallVector<uint32_t> RealWeights; + if (!extractBranchWeights(I, RealWeights)) + return; + verifyMisExpect(I, RealWeights, ExpectedWeights); +} + +void checkExpectAnnotations(Instruction &I, + const ArrayRef<uint32_t> ExistingWeights, + bool IsFrontend) { + if (IsFrontend) { + checkFrontendInstrumentation(I, ExistingWeights); + } else { + checkBackendInstrumentation(I, ExistingWeights); + } +} + +} // namespace misexpect +} // namespace llvm +#undef DEBUG_TYPE diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/ModuleUtils.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/ModuleUtils.cpp new file mode 100644 index 0000000000..6d17a46695 --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/ModuleUtils.cpp @@ -0,0 +1,475 @@ +//===-- ModuleUtils.cpp - Functions to manipulate Modules -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This family of functions perform manipulations on Modules. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/ModuleUtils.h" +#include "llvm/Analysis/VectorUtils.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/xxhash.h" +using namespace llvm; + +#define DEBUG_TYPE "moduleutils" + +static void appendToGlobalArray(StringRef ArrayName, Module &M, Function *F, + int Priority, Constant *Data) { + IRBuilder<> IRB(M.getContext()); + FunctionType *FnTy = FunctionType::get(IRB.getVoidTy(), false); + + // Get the current set of static global constructors and add the new ctor + // to the list. + SmallVector<Constant *, 16> CurrentCtors; + StructType *EltTy = StructType::get( + IRB.getInt32Ty(), PointerType::get(FnTy, F->getAddressSpace()), + IRB.getInt8PtrTy()); + + if (GlobalVariable *GVCtor = M.getNamedGlobal(ArrayName)) { + if (Constant *Init = GVCtor->getInitializer()) { + unsigned n = Init->getNumOperands(); + CurrentCtors.reserve(n + 1); + for (unsigned i = 0; i != n; ++i) + CurrentCtors.push_back(cast<Constant>(Init->getOperand(i))); + } + GVCtor->eraseFromParent(); + } + + // Build a 3 field global_ctor entry. We don't take a comdat key. + Constant *CSVals[3]; + CSVals[0] = IRB.getInt32(Priority); + CSVals[1] = F; + CSVals[2] = Data ? ConstantExpr::getPointerCast(Data, IRB.getInt8PtrTy()) + : Constant::getNullValue(IRB.getInt8PtrTy()); + Constant *RuntimeCtorInit = + ConstantStruct::get(EltTy, ArrayRef(CSVals, EltTy->getNumElements())); + + CurrentCtors.push_back(RuntimeCtorInit); + + // Create a new initializer. + ArrayType *AT = ArrayType::get(EltTy, CurrentCtors.size()); + Constant *NewInit = ConstantArray::get(AT, CurrentCtors); + + // Create the new global variable and replace all uses of + // the old global variable with the new one. + (void)new GlobalVariable(M, NewInit->getType(), false, + GlobalValue::AppendingLinkage, NewInit, ArrayName); +} + +void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority, Constant *Data) { + appendToGlobalArray("llvm.global_ctors", M, F, Priority, Data); +} + +void llvm::appendToGlobalDtors(Module &M, Function *F, int Priority, Constant *Data) { + appendToGlobalArray("llvm.global_dtors", M, F, Priority, Data); +} + +static void collectUsedGlobals(GlobalVariable *GV, + SmallSetVector<Constant *, 16> &Init) { + if (!GV || !GV->hasInitializer()) + return; + + auto *CA = cast<ConstantArray>(GV->getInitializer()); + for (Use &Op : CA->operands()) + Init.insert(cast<Constant>(Op)); +} + +static void appendToUsedList(Module &M, StringRef Name, ArrayRef<GlobalValue *> Values) { + GlobalVariable *GV = M.getGlobalVariable(Name); + + SmallSetVector<Constant *, 16> Init; + collectUsedGlobals(GV, Init); + if (GV) + GV->eraseFromParent(); + + Type *ArrayEltTy = llvm::Type::getInt8PtrTy(M.getContext()); + for (auto *V : Values) + Init.insert(ConstantExpr::getPointerBitCastOrAddrSpaceCast(V, ArrayEltTy)); + + if (Init.empty()) + return; + + ArrayType *ATy = ArrayType::get(ArrayEltTy, Init.size()); + GV = new llvm::GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage, + ConstantArray::get(ATy, Init.getArrayRef()), + Name); + GV->setSection("llvm.metadata"); +} + +void llvm::appendToUsed(Module &M, ArrayRef<GlobalValue *> Values) { + appendToUsedList(M, "llvm.used", Values); +} + +void llvm::appendToCompilerUsed(Module &M, ArrayRef<GlobalValue *> Values) { + appendToUsedList(M, "llvm.compiler.used", Values); +} + +static void removeFromUsedList(Module &M, StringRef Name, + function_ref<bool(Constant *)> ShouldRemove) { + GlobalVariable *GV = M.getNamedGlobal(Name); + if (!GV) + return; + + SmallSetVector<Constant *, 16> Init; + collectUsedGlobals(GV, Init); + + Type *ArrayEltTy = cast<ArrayType>(GV->getValueType())->getElementType(); + + SmallVector<Constant *, 16> NewInit; + for (Constant *MaybeRemoved : Init) { + if (!ShouldRemove(MaybeRemoved->stripPointerCasts())) + NewInit.push_back(MaybeRemoved); + } + + if (!NewInit.empty()) { + ArrayType *ATy = ArrayType::get(ArrayEltTy, NewInit.size()); + GlobalVariable *NewGV = + new GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage, + ConstantArray::get(ATy, NewInit), "", GV, + GV->getThreadLocalMode(), GV->getAddressSpace()); + NewGV->setSection(GV->getSection()); + NewGV->takeName(GV); + } + + GV->eraseFromParent(); +} + +void llvm::removeFromUsedLists(Module &M, + function_ref<bool(Constant *)> ShouldRemove) { + removeFromUsedList(M, "llvm.used", ShouldRemove); + removeFromUsedList(M, "llvm.compiler.used", ShouldRemove); +} + +void llvm::setKCFIType(Module &M, Function &F, StringRef MangledType) { + if (!M.getModuleFlag("kcfi")) + return; + // Matches CodeGenModule::CreateKCFITypeId in Clang. + LLVMContext &Ctx = M.getContext(); + MDBuilder MDB(Ctx); + F.setMetadata( + LLVMContext::MD_kcfi_type, + MDNode::get(Ctx, MDB.createConstant(ConstantInt::get( + Type::getInt32Ty(Ctx), + static_cast<uint32_t>(xxHash64(MangledType)))))); + // If the module was compiled with -fpatchable-function-entry, ensure + // we use the same patchable-function-prefix. + if (auto *MD = mdconst::extract_or_null<ConstantInt>( + M.getModuleFlag("kcfi-offset"))) { + if (unsigned Offset = MD->getZExtValue()) + F.addFnAttr("patchable-function-prefix", std::to_string(Offset)); + } +} + +FunctionCallee llvm::declareSanitizerInitFunction(Module &M, StringRef InitName, + ArrayRef<Type *> InitArgTypes, + bool Weak) { + assert(!InitName.empty() && "Expected init function name"); + auto *VoidTy = Type::getVoidTy(M.getContext()); + auto *FnTy = FunctionType::get(VoidTy, InitArgTypes, false); + auto FnCallee = M.getOrInsertFunction(InitName, FnTy); + auto *Fn = cast<Function>(FnCallee.getCallee()); + if (Weak && Fn->isDeclaration()) + Fn->setLinkage(Function::ExternalWeakLinkage); + return FnCallee; +} + +Function *llvm::createSanitizerCtor(Module &M, StringRef CtorName) { + Function *Ctor = Function::createWithDefaultAttr( + FunctionType::get(Type::getVoidTy(M.getContext()), false), + GlobalValue::InternalLinkage, M.getDataLayout().getProgramAddressSpace(), + CtorName, &M); + Ctor->addFnAttr(Attribute::NoUnwind); + setKCFIType(M, *Ctor, "_ZTSFvvE"); // void (*)(void) + BasicBlock *CtorBB = BasicBlock::Create(M.getContext(), "", Ctor); + ReturnInst::Create(M.getContext(), CtorBB); + // Ensure Ctor cannot be discarded, even if in a comdat. + appendToUsed(M, {Ctor}); + return Ctor; +} + +std::pair<Function *, FunctionCallee> llvm::createSanitizerCtorAndInitFunctions( + Module &M, StringRef CtorName, StringRef InitName, + ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs, + StringRef VersionCheckName, bool Weak) { + assert(!InitName.empty() && "Expected init function name"); + assert(InitArgs.size() == InitArgTypes.size() && + "Sanitizer's init function expects different number of arguments"); + FunctionCallee InitFunction = + declareSanitizerInitFunction(M, InitName, InitArgTypes, Weak); + Function *Ctor = createSanitizerCtor(M, CtorName); + IRBuilder<> IRB(M.getContext()); + + BasicBlock *RetBB = &Ctor->getEntryBlock(); + if (Weak) { + RetBB->setName("ret"); + auto *EntryBB = BasicBlock::Create(M.getContext(), "entry", Ctor, RetBB); + auto *CallInitBB = + BasicBlock::Create(M.getContext(), "callfunc", Ctor, RetBB); + auto *InitFn = cast<Function>(InitFunction.getCallee()); + auto *InitFnPtr = + PointerType::get(InitFn->getType(), InitFn->getAddressSpace()); + IRB.SetInsertPoint(EntryBB); + Value *InitNotNull = + IRB.CreateICmpNE(InitFn, ConstantPointerNull::get(InitFnPtr)); + IRB.CreateCondBr(InitNotNull, CallInitBB, RetBB); + IRB.SetInsertPoint(CallInitBB); + } else { + IRB.SetInsertPoint(RetBB->getTerminator()); + } + + IRB.CreateCall(InitFunction, InitArgs); + if (!VersionCheckName.empty()) { + FunctionCallee VersionCheckFunction = M.getOrInsertFunction( + VersionCheckName, FunctionType::get(IRB.getVoidTy(), {}, false), + AttributeList()); + IRB.CreateCall(VersionCheckFunction, {}); + } + + if (Weak) + IRB.CreateBr(RetBB); + + return std::make_pair(Ctor, InitFunction); +} + +std::pair<Function *, FunctionCallee> +llvm::getOrCreateSanitizerCtorAndInitFunctions( + Module &M, StringRef CtorName, StringRef InitName, + ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs, + function_ref<void(Function *, FunctionCallee)> FunctionsCreatedCallback, + StringRef VersionCheckName, bool Weak) { + assert(!CtorName.empty() && "Expected ctor function name"); + + if (Function *Ctor = M.getFunction(CtorName)) + // FIXME: Sink this logic into the module, similar to the handling of + // globals. This will make moving to a concurrent model much easier. + if (Ctor->arg_empty() || + Ctor->getReturnType() == Type::getVoidTy(M.getContext())) + return {Ctor, + declareSanitizerInitFunction(M, InitName, InitArgTypes, Weak)}; + + Function *Ctor; + FunctionCallee InitFunction; + std::tie(Ctor, InitFunction) = llvm::createSanitizerCtorAndInitFunctions( + M, CtorName, InitName, InitArgTypes, InitArgs, VersionCheckName, Weak); + FunctionsCreatedCallback(Ctor, InitFunction); + return std::make_pair(Ctor, InitFunction); +} + +void llvm::filterDeadComdatFunctions( + SmallVectorImpl<Function *> &DeadComdatFunctions) { + SmallPtrSet<Function *, 32> MaybeDeadFunctions; + SmallPtrSet<Comdat *, 32> MaybeDeadComdats; + for (Function *F : DeadComdatFunctions) { + MaybeDeadFunctions.insert(F); + if (Comdat *C = F->getComdat()) + MaybeDeadComdats.insert(C); + } + + // Find comdats for which all users are dead now. + SmallPtrSet<Comdat *, 32> DeadComdats; + for (Comdat *C : MaybeDeadComdats) { + auto IsUserDead = [&](GlobalObject *GO) { + auto *F = dyn_cast<Function>(GO); + return F && MaybeDeadFunctions.contains(F); + }; + if (all_of(C->getUsers(), IsUserDead)) + DeadComdats.insert(C); + } + + // Only keep functions which have no comdat or a dead comdat. + erase_if(DeadComdatFunctions, [&](Function *F) { + Comdat *C = F->getComdat(); + return C && !DeadComdats.contains(C); + }); +} + +std::string llvm::getUniqueModuleId(Module *M) { + MD5 Md5; + bool ExportsSymbols = false; + auto AddGlobal = [&](GlobalValue &GV) { + if (GV.isDeclaration() || GV.getName().startswith("llvm.") || + !GV.hasExternalLinkage() || GV.hasComdat()) + return; + ExportsSymbols = true; + Md5.update(GV.getName()); + Md5.update(ArrayRef<uint8_t>{0}); + }; + + for (auto &F : *M) + AddGlobal(F); + for (auto &GV : M->globals()) + AddGlobal(GV); + for (auto &GA : M->aliases()) + AddGlobal(GA); + for (auto &IF : M->ifuncs()) + AddGlobal(IF); + + if (!ExportsSymbols) + return ""; + + MD5::MD5Result R; + Md5.final(R); + + SmallString<32> Str; + MD5::stringifyResult(R, Str); + return ("." + Str).str(); +} + +void VFABI::setVectorVariantNames(CallInst *CI, + ArrayRef<std::string> VariantMappings) { + if (VariantMappings.empty()) + return; + + SmallString<256> Buffer; + llvm::raw_svector_ostream Out(Buffer); + for (const std::string &VariantMapping : VariantMappings) + Out << VariantMapping << ","; + // Get rid of the trailing ','. + assert(!Buffer.str().empty() && "Must have at least one char."); + Buffer.pop_back(); + + Module *M = CI->getModule(); +#ifndef NDEBUG + for (const std::string &VariantMapping : VariantMappings) { + LLVM_DEBUG(dbgs() << "VFABI: adding mapping '" << VariantMapping << "'\n"); + std::optional<VFInfo> VI = VFABI::tryDemangleForVFABI(VariantMapping, *M); + assert(VI && "Cannot add an invalid VFABI name."); + assert(M->getNamedValue(VI->VectorName) && + "Cannot add variant to attribute: " + "vector function declaration is missing."); + } +#endif + CI->addFnAttr( + Attribute::get(M->getContext(), MappingsAttrName, Buffer.str())); +} + +void llvm::embedBufferInModule(Module &M, MemoryBufferRef Buf, + StringRef SectionName, Align Alignment) { + // Embed the memory buffer into the module. + Constant *ModuleConstant = ConstantDataArray::get( + M.getContext(), ArrayRef(Buf.getBufferStart(), Buf.getBufferSize())); + GlobalVariable *GV = new GlobalVariable( + M, ModuleConstant->getType(), true, GlobalValue::PrivateLinkage, + ModuleConstant, "llvm.embedded.object"); + GV->setSection(SectionName); + GV->setAlignment(Alignment); + + LLVMContext &Ctx = M.getContext(); + NamedMDNode *MD = M.getOrInsertNamedMetadata("llvm.embedded.objects"); + Metadata *MDVals[] = {ConstantAsMetadata::get(GV), + MDString::get(Ctx, SectionName)}; + + MD->addOperand(llvm::MDNode::get(Ctx, MDVals)); + GV->setMetadata(LLVMContext::MD_exclude, llvm::MDNode::get(Ctx, {})); + + appendToCompilerUsed(M, GV); +} + +bool llvm::lowerGlobalIFuncUsersAsGlobalCtor( + Module &M, ArrayRef<GlobalIFunc *> FilteredIFuncsToLower) { + SmallVector<GlobalIFunc *, 32> AllIFuncs; + ArrayRef<GlobalIFunc *> IFuncsToLower = FilteredIFuncsToLower; + if (FilteredIFuncsToLower.empty()) { // Default to lowering all ifuncs + for (GlobalIFunc &GI : M.ifuncs()) + AllIFuncs.push_back(&GI); + IFuncsToLower = AllIFuncs; + } + + bool UnhandledUsers = false; + LLVMContext &Ctx = M.getContext(); + const DataLayout &DL = M.getDataLayout(); + + PointerType *TableEntryTy = + Ctx.supportsTypedPointers() + ? PointerType::get(Type::getInt8Ty(Ctx), DL.getProgramAddressSpace()) + : PointerType::get(Ctx, DL.getProgramAddressSpace()); + + ArrayType *FuncPtrTableTy = + ArrayType::get(TableEntryTy, IFuncsToLower.size()); + + Align PtrAlign = DL.getABITypeAlign(TableEntryTy); + + // Create a global table of function pointers we'll initialize in a global + // constructor. + auto *FuncPtrTable = new GlobalVariable( + M, FuncPtrTableTy, false, GlobalValue::InternalLinkage, + PoisonValue::get(FuncPtrTableTy), "", nullptr, + GlobalVariable::NotThreadLocal, DL.getDefaultGlobalsAddressSpace()); + FuncPtrTable->setAlignment(PtrAlign); + + // Create a function to initialize the function pointer table. + Function *NewCtor = Function::Create( + FunctionType::get(Type::getVoidTy(Ctx), false), Function::InternalLinkage, + DL.getProgramAddressSpace(), "", &M); + + BasicBlock *BB = BasicBlock::Create(Ctx, "", NewCtor); + IRBuilder<> InitBuilder(BB); + + size_t TableIndex = 0; + for (GlobalIFunc *GI : IFuncsToLower) { + Function *ResolvedFunction = GI->getResolverFunction(); + + // We don't know what to pass to a resolver function taking arguments + // + // FIXME: Is this even valid? clang and gcc don't complain but this + // probably should be invalid IR. We could just pass through undef. + if (!std::empty(ResolvedFunction->getFunctionType()->params())) { + LLVM_DEBUG(dbgs() << "Not lowering ifunc resolver function " + << ResolvedFunction->getName() << " with parameters\n"); + UnhandledUsers = true; + continue; + } + + // Initialize the function pointer table. + CallInst *ResolvedFunc = InitBuilder.CreateCall(ResolvedFunction); + Value *Casted = InitBuilder.CreatePointerCast(ResolvedFunc, TableEntryTy); + Constant *GEP = cast<Constant>(InitBuilder.CreateConstInBoundsGEP2_32( + FuncPtrTableTy, FuncPtrTable, 0, TableIndex++)); + InitBuilder.CreateAlignedStore(Casted, GEP, PtrAlign); + + // Update all users to load a pointer from the global table. + for (User *User : make_early_inc_range(GI->users())) { + Instruction *UserInst = dyn_cast<Instruction>(User); + if (!UserInst) { + // TODO: Should handle constantexpr casts in user instructions. Probably + // can't do much about constant initializers. + UnhandledUsers = true; + continue; + } + + IRBuilder<> UseBuilder(UserInst); + LoadInst *ResolvedTarget = + UseBuilder.CreateAlignedLoad(TableEntryTy, GEP, PtrAlign); + Value *ResolvedCast = + UseBuilder.CreatePointerCast(ResolvedTarget, GI->getType()); + UserInst->replaceUsesOfWith(GI, ResolvedCast); + } + + // If we handled all users, erase the ifunc. + if (GI->use_empty()) + GI->eraseFromParent(); + } + + InitBuilder.CreateRetVoid(); + + PointerType *ConstantDataTy = Ctx.supportsTypedPointers() + ? PointerType::get(Type::getInt8Ty(Ctx), 0) + : PointerType::get(Ctx, 0); + + // TODO: Is this the right priority? Probably should be before any other + // constructors? + const int Priority = 10; + appendToGlobalCtors(M, NewCtor, Priority, + ConstantPointerNull::get(ConstantDataTy)); + return UnhandledUsers; +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/NameAnonGlobals.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/NameAnonGlobals.cpp new file mode 100644 index 0000000000..d4ab450406 --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/NameAnonGlobals.cpp @@ -0,0 +1,90 @@ +//===- NameAnonGlobals.cpp - ThinLTO Support: Name Unnamed Globals --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements naming anonymous globals to make sure they can be +// referred to by ThinLTO. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/NameAnonGlobals.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/IR/Module.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/MD5.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" + +using namespace llvm; + +namespace { +// Compute a "unique" hash for the module based on the name of the public +// globals. +class ModuleHasher { + Module &TheModule; + std::string TheHash; + +public: + ModuleHasher(Module &M) : TheModule(M) {} + + /// Return the lazily computed hash. + std::string &get() { + if (!TheHash.empty()) + // Cache hit :) + return TheHash; + + MD5 Hasher; + for (auto &F : TheModule) { + if (F.isDeclaration() || F.hasLocalLinkage() || !F.hasName()) + continue; + auto Name = F.getName(); + Hasher.update(Name); + } + for (auto &GV : TheModule.globals()) { + if (GV.isDeclaration() || GV.hasLocalLinkage() || !GV.hasName()) + continue; + auto Name = GV.getName(); + Hasher.update(Name); + } + + // Now return the result. + MD5::MD5Result Hash; + Hasher.final(Hash); + SmallString<32> Result; + MD5::stringifyResult(Hash, Result); + TheHash = std::string(Result.str()); + return TheHash; + } +}; +} // end anonymous namespace + +// Rename all the anon globals in the module +bool llvm::nameUnamedGlobals(Module &M) { + bool Changed = false; + ModuleHasher ModuleHash(M); + int count = 0; + auto RenameIfNeed = [&](GlobalValue &GV) { + if (GV.hasName()) + return; + GV.setName(Twine("anon.") + ModuleHash.get() + "." + Twine(count++)); + Changed = true; + }; + for (auto &GO : M.global_objects()) + RenameIfNeed(GO); + for (auto &GA : M.aliases()) + RenameIfNeed(GA); + + return Changed; +} + +PreservedAnalyses NameAnonGlobalPass::run(Module &M, + ModuleAnalysisManager &AM) { + if (!nameUnamedGlobals(M)) + return PreservedAnalyses::all(); + + return PreservedAnalyses::none(); +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/PredicateInfo.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/PredicateInfo.cpp new file mode 100644 index 0000000000..1f16ba78bd --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/PredicateInfo.cpp @@ -0,0 +1,948 @@ +//===-- PredicateInfo.cpp - PredicateInfo Builder--------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------===// +// +// This file implements the PredicateInfo class. +// +//===----------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/PredicateInfo.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/IR/AssemblyAnnotationWriter.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/DebugCounter.h" +#include "llvm/Support/FormattedStream.h" +#include <algorithm> +#define DEBUG_TYPE "predicateinfo" +using namespace llvm; +using namespace PatternMatch; + +INITIALIZE_PASS_BEGIN(PredicateInfoPrinterLegacyPass, "print-predicateinfo", + "PredicateInfo Printer", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_END(PredicateInfoPrinterLegacyPass, "print-predicateinfo", + "PredicateInfo Printer", false, false) +static cl::opt<bool> VerifyPredicateInfo( + "verify-predicateinfo", cl::init(false), cl::Hidden, + cl::desc("Verify PredicateInfo in legacy printer pass.")); +DEBUG_COUNTER(RenameCounter, "predicateinfo-rename", + "Controls which variables are renamed with predicateinfo"); + +// Maximum number of conditions considered for renaming for each branch/assume. +// This limits renaming of deep and/or chains. +static const unsigned MaxCondsPerBranch = 8; + +namespace { +// Given a predicate info that is a type of branching terminator, get the +// branching block. +const BasicBlock *getBranchBlock(const PredicateBase *PB) { + assert(isa<PredicateWithEdge>(PB) && + "Only branches and switches should have PHIOnly defs that " + "require branch blocks."); + return cast<PredicateWithEdge>(PB)->From; +} + +// Given a predicate info that is a type of branching terminator, get the +// branching terminator. +static Instruction *getBranchTerminator(const PredicateBase *PB) { + assert(isa<PredicateWithEdge>(PB) && + "Not a predicate info type we know how to get a terminator from."); + return cast<PredicateWithEdge>(PB)->From->getTerminator(); +} + +// Given a predicate info that is a type of branching terminator, get the +// edge this predicate info represents +std::pair<BasicBlock *, BasicBlock *> getBlockEdge(const PredicateBase *PB) { + assert(isa<PredicateWithEdge>(PB) && + "Not a predicate info type we know how to get an edge from."); + const auto *PEdge = cast<PredicateWithEdge>(PB); + return std::make_pair(PEdge->From, PEdge->To); +} +} + +namespace llvm { +enum LocalNum { + // Operations that must appear first in the block. + LN_First, + // Operations that are somewhere in the middle of the block, and are sorted on + // demand. + LN_Middle, + // Operations that must appear last in a block, like successor phi node uses. + LN_Last +}; + +// Associate global and local DFS info with defs and uses, so we can sort them +// into a global domination ordering. +struct ValueDFS { + int DFSIn = 0; + int DFSOut = 0; + unsigned int LocalNum = LN_Middle; + // Only one of Def or Use will be set. + Value *Def = nullptr; + Use *U = nullptr; + // Neither PInfo nor EdgeOnly participate in the ordering + PredicateBase *PInfo = nullptr; + bool EdgeOnly = false; +}; + +// Perform a strict weak ordering on instructions and arguments. +static bool valueComesBefore(const Value *A, const Value *B) { + auto *ArgA = dyn_cast_or_null<Argument>(A); + auto *ArgB = dyn_cast_or_null<Argument>(B); + if (ArgA && !ArgB) + return true; + if (ArgB && !ArgA) + return false; + if (ArgA && ArgB) + return ArgA->getArgNo() < ArgB->getArgNo(); + return cast<Instruction>(A)->comesBefore(cast<Instruction>(B)); +} + +// This compares ValueDFS structures. Doing so allows us to walk the minimum +// number of instructions necessary to compute our def/use ordering. +struct ValueDFS_Compare { + DominatorTree &DT; + ValueDFS_Compare(DominatorTree &DT) : DT(DT) {} + + bool operator()(const ValueDFS &A, const ValueDFS &B) const { + if (&A == &B) + return false; + // The only case we can't directly compare them is when they in the same + // block, and both have localnum == middle. In that case, we have to use + // comesbefore to see what the real ordering is, because they are in the + // same basic block. + + assert((A.DFSIn != B.DFSIn || A.DFSOut == B.DFSOut) && + "Equal DFS-in numbers imply equal out numbers"); + bool SameBlock = A.DFSIn == B.DFSIn; + + // We want to put the def that will get used for a given set of phi uses, + // before those phi uses. + // So we sort by edge, then by def. + // Note that only phi nodes uses and defs can come last. + if (SameBlock && A.LocalNum == LN_Last && B.LocalNum == LN_Last) + return comparePHIRelated(A, B); + + bool isADef = A.Def; + bool isBDef = B.Def; + if (!SameBlock || A.LocalNum != LN_Middle || B.LocalNum != LN_Middle) + return std::tie(A.DFSIn, A.LocalNum, isADef) < + std::tie(B.DFSIn, B.LocalNum, isBDef); + return localComesBefore(A, B); + } + + // For a phi use, or a non-materialized def, return the edge it represents. + std::pair<BasicBlock *, BasicBlock *> getBlockEdge(const ValueDFS &VD) const { + if (!VD.Def && VD.U) { + auto *PHI = cast<PHINode>(VD.U->getUser()); + return std::make_pair(PHI->getIncomingBlock(*VD.U), PHI->getParent()); + } + // This is really a non-materialized def. + return ::getBlockEdge(VD.PInfo); + } + + // For two phi related values, return the ordering. + bool comparePHIRelated(const ValueDFS &A, const ValueDFS &B) const { + BasicBlock *ASrc, *ADest, *BSrc, *BDest; + std::tie(ASrc, ADest) = getBlockEdge(A); + std::tie(BSrc, BDest) = getBlockEdge(B); + +#ifndef NDEBUG + // This function should only be used for values in the same BB, check that. + DomTreeNode *DomASrc = DT.getNode(ASrc); + DomTreeNode *DomBSrc = DT.getNode(BSrc); + assert(DomASrc->getDFSNumIn() == (unsigned)A.DFSIn && + "DFS numbers for A should match the ones of the source block"); + assert(DomBSrc->getDFSNumIn() == (unsigned)B.DFSIn && + "DFS numbers for B should match the ones of the source block"); + assert(A.DFSIn == B.DFSIn && "Values must be in the same block"); +#endif + (void)ASrc; + (void)BSrc; + + // Use DFS numbers to compare destination blocks, to guarantee a + // deterministic order. + DomTreeNode *DomADest = DT.getNode(ADest); + DomTreeNode *DomBDest = DT.getNode(BDest); + unsigned AIn = DomADest->getDFSNumIn(); + unsigned BIn = DomBDest->getDFSNumIn(); + bool isADef = A.Def; + bool isBDef = B.Def; + assert((!A.Def || !A.U) && (!B.Def || !B.U) && + "Def and U cannot be set at the same time"); + // Now sort by edge destination and then defs before uses. + return std::tie(AIn, isADef) < std::tie(BIn, isBDef); + } + + // Get the definition of an instruction that occurs in the middle of a block. + Value *getMiddleDef(const ValueDFS &VD) const { + if (VD.Def) + return VD.Def; + // It's possible for the defs and uses to be null. For branches, the local + // numbering will say the placed predicaeinfos should go first (IE + // LN_beginning), so we won't be in this function. For assumes, we will end + // up here, beause we need to order the def we will place relative to the + // assume. So for the purpose of ordering, we pretend the def is right + // after the assume, because that is where we will insert the info. + if (!VD.U) { + assert(VD.PInfo && + "No def, no use, and no predicateinfo should not occur"); + assert(isa<PredicateAssume>(VD.PInfo) && + "Middle of block should only occur for assumes"); + return cast<PredicateAssume>(VD.PInfo)->AssumeInst->getNextNode(); + } + return nullptr; + } + + // Return either the Def, if it's not null, or the user of the Use, if the def + // is null. + const Instruction *getDefOrUser(const Value *Def, const Use *U) const { + if (Def) + return cast<Instruction>(Def); + return cast<Instruction>(U->getUser()); + } + + // This performs the necessary local basic block ordering checks to tell + // whether A comes before B, where both are in the same basic block. + bool localComesBefore(const ValueDFS &A, const ValueDFS &B) const { + auto *ADef = getMiddleDef(A); + auto *BDef = getMiddleDef(B); + + // See if we have real values or uses. If we have real values, we are + // guaranteed they are instructions or arguments. No matter what, we are + // guaranteed they are in the same block if they are instructions. + auto *ArgA = dyn_cast_or_null<Argument>(ADef); + auto *ArgB = dyn_cast_or_null<Argument>(BDef); + + if (ArgA || ArgB) + return valueComesBefore(ArgA, ArgB); + + auto *AInst = getDefOrUser(ADef, A.U); + auto *BInst = getDefOrUser(BDef, B.U); + return valueComesBefore(AInst, BInst); + } +}; + +class PredicateInfoBuilder { + // Used to store information about each value we might rename. + struct ValueInfo { + SmallVector<PredicateBase *, 4> Infos; + }; + + PredicateInfo &PI; + Function &F; + DominatorTree &DT; + AssumptionCache &AC; + + // This stores info about each operand or comparison result we make copies + // of. The real ValueInfos start at index 1, index 0 is unused so that we + // can more easily detect invalid indexing. + SmallVector<ValueInfo, 32> ValueInfos; + + // This gives the index into the ValueInfos array for a given Value. Because + // 0 is not a valid Value Info index, you can use DenseMap::lookup and tell + // whether it returned a valid result. + DenseMap<Value *, unsigned int> ValueInfoNums; + + // The set of edges along which we can only handle phi uses, due to critical + // edges. + DenseSet<std::pair<BasicBlock *, BasicBlock *>> EdgeUsesOnly; + + ValueInfo &getOrCreateValueInfo(Value *); + const ValueInfo &getValueInfo(Value *) const; + + void processAssume(IntrinsicInst *, BasicBlock *, + SmallVectorImpl<Value *> &OpsToRename); + void processBranch(BranchInst *, BasicBlock *, + SmallVectorImpl<Value *> &OpsToRename); + void processSwitch(SwitchInst *, BasicBlock *, + SmallVectorImpl<Value *> &OpsToRename); + void renameUses(SmallVectorImpl<Value *> &OpsToRename); + void addInfoFor(SmallVectorImpl<Value *> &OpsToRename, Value *Op, + PredicateBase *PB); + + typedef SmallVectorImpl<ValueDFS> ValueDFSStack; + void convertUsesToDFSOrdered(Value *, SmallVectorImpl<ValueDFS> &); + Value *materializeStack(unsigned int &, ValueDFSStack &, Value *); + bool stackIsInScope(const ValueDFSStack &, const ValueDFS &) const; + void popStackUntilDFSScope(ValueDFSStack &, const ValueDFS &); + +public: + PredicateInfoBuilder(PredicateInfo &PI, Function &F, DominatorTree &DT, + AssumptionCache &AC) + : PI(PI), F(F), DT(DT), AC(AC) { + // Push an empty operand info so that we can detect 0 as not finding one + ValueInfos.resize(1); + } + + void buildPredicateInfo(); +}; + +bool PredicateInfoBuilder::stackIsInScope(const ValueDFSStack &Stack, + const ValueDFS &VDUse) const { + if (Stack.empty()) + return false; + // If it's a phi only use, make sure it's for this phi node edge, and that the + // use is in a phi node. If it's anything else, and the top of the stack is + // EdgeOnly, we need to pop the stack. We deliberately sort phi uses next to + // the defs they must go with so that we can know it's time to pop the stack + // when we hit the end of the phi uses for a given def. + if (Stack.back().EdgeOnly) { + if (!VDUse.U) + return false; + auto *PHI = dyn_cast<PHINode>(VDUse.U->getUser()); + if (!PHI) + return false; + // Check edge + BasicBlock *EdgePred = PHI->getIncomingBlock(*VDUse.U); + if (EdgePred != getBranchBlock(Stack.back().PInfo)) + return false; + + // Use dominates, which knows how to handle edge dominance. + return DT.dominates(getBlockEdge(Stack.back().PInfo), *VDUse.U); + } + + return (VDUse.DFSIn >= Stack.back().DFSIn && + VDUse.DFSOut <= Stack.back().DFSOut); +} + +void PredicateInfoBuilder::popStackUntilDFSScope(ValueDFSStack &Stack, + const ValueDFS &VD) { + while (!Stack.empty() && !stackIsInScope(Stack, VD)) + Stack.pop_back(); +} + +// Convert the uses of Op into a vector of uses, associating global and local +// DFS info with each one. +void PredicateInfoBuilder::convertUsesToDFSOrdered( + Value *Op, SmallVectorImpl<ValueDFS> &DFSOrderedSet) { + for (auto &U : Op->uses()) { + if (auto *I = dyn_cast<Instruction>(U.getUser())) { + ValueDFS VD; + // Put the phi node uses in the incoming block. + BasicBlock *IBlock; + if (auto *PN = dyn_cast<PHINode>(I)) { + IBlock = PN->getIncomingBlock(U); + // Make phi node users appear last in the incoming block + // they are from. + VD.LocalNum = LN_Last; + } else { + // If it's not a phi node use, it is somewhere in the middle of the + // block. + IBlock = I->getParent(); + VD.LocalNum = LN_Middle; + } + DomTreeNode *DomNode = DT.getNode(IBlock); + // It's possible our use is in an unreachable block. Skip it if so. + if (!DomNode) + continue; + VD.DFSIn = DomNode->getDFSNumIn(); + VD.DFSOut = DomNode->getDFSNumOut(); + VD.U = &U; + DFSOrderedSet.push_back(VD); + } + } +} + +bool shouldRename(Value *V) { + // Only want real values, not constants. Additionally, operands with one use + // are only being used in the comparison, which means they will not be useful + // for us to consider for predicateinfo. + return (isa<Instruction>(V) || isa<Argument>(V)) && !V->hasOneUse(); +} + +// Collect relevant operations from Comparison that we may want to insert copies +// for. +void collectCmpOps(CmpInst *Comparison, SmallVectorImpl<Value *> &CmpOperands) { + auto *Op0 = Comparison->getOperand(0); + auto *Op1 = Comparison->getOperand(1); + if (Op0 == Op1) + return; + + CmpOperands.push_back(Op0); + CmpOperands.push_back(Op1); +} + +// Add Op, PB to the list of value infos for Op, and mark Op to be renamed. +void PredicateInfoBuilder::addInfoFor(SmallVectorImpl<Value *> &OpsToRename, + Value *Op, PredicateBase *PB) { + auto &OperandInfo = getOrCreateValueInfo(Op); + if (OperandInfo.Infos.empty()) + OpsToRename.push_back(Op); + PI.AllInfos.push_back(PB); + OperandInfo.Infos.push_back(PB); +} + +// Process an assume instruction and place relevant operations we want to rename +// into OpsToRename. +void PredicateInfoBuilder::processAssume( + IntrinsicInst *II, BasicBlock *AssumeBB, + SmallVectorImpl<Value *> &OpsToRename) { + SmallVector<Value *, 4> Worklist; + SmallPtrSet<Value *, 4> Visited; + Worklist.push_back(II->getOperand(0)); + while (!Worklist.empty()) { + Value *Cond = Worklist.pop_back_val(); + if (!Visited.insert(Cond).second) + continue; + if (Visited.size() > MaxCondsPerBranch) + break; + + Value *Op0, *Op1; + if (match(Cond, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) { + Worklist.push_back(Op1); + Worklist.push_back(Op0); + } + + SmallVector<Value *, 4> Values; + Values.push_back(Cond); + if (auto *Cmp = dyn_cast<CmpInst>(Cond)) + collectCmpOps(Cmp, Values); + + for (Value *V : Values) { + if (shouldRename(V)) { + auto *PA = new PredicateAssume(V, II, Cond); + addInfoFor(OpsToRename, V, PA); + } + } + } +} + +// Process a block terminating branch, and place relevant operations to be +// renamed into OpsToRename. +void PredicateInfoBuilder::processBranch( + BranchInst *BI, BasicBlock *BranchBB, + SmallVectorImpl<Value *> &OpsToRename) { + BasicBlock *FirstBB = BI->getSuccessor(0); + BasicBlock *SecondBB = BI->getSuccessor(1); + + for (BasicBlock *Succ : {FirstBB, SecondBB}) { + bool TakenEdge = Succ == FirstBB; + // Don't try to insert on a self-edge. This is mainly because we will + // eliminate during renaming anyway. + if (Succ == BranchBB) + continue; + + SmallVector<Value *, 4> Worklist; + SmallPtrSet<Value *, 4> Visited; + Worklist.push_back(BI->getCondition()); + while (!Worklist.empty()) { + Value *Cond = Worklist.pop_back_val(); + if (!Visited.insert(Cond).second) + continue; + if (Visited.size() > MaxCondsPerBranch) + break; + + Value *Op0, *Op1; + if (TakenEdge ? match(Cond, m_LogicalAnd(m_Value(Op0), m_Value(Op1))) + : match(Cond, m_LogicalOr(m_Value(Op0), m_Value(Op1)))) { + Worklist.push_back(Op1); + Worklist.push_back(Op0); + } + + SmallVector<Value *, 4> Values; + Values.push_back(Cond); + if (auto *Cmp = dyn_cast<CmpInst>(Cond)) + collectCmpOps(Cmp, Values); + + for (Value *V : Values) { + if (shouldRename(V)) { + PredicateBase *PB = + new PredicateBranch(V, BranchBB, Succ, Cond, TakenEdge); + addInfoFor(OpsToRename, V, PB); + if (!Succ->getSinglePredecessor()) + EdgeUsesOnly.insert({BranchBB, Succ}); + } + } + } + } +} +// Process a block terminating switch, and place relevant operations to be +// renamed into OpsToRename. +void PredicateInfoBuilder::processSwitch( + SwitchInst *SI, BasicBlock *BranchBB, + SmallVectorImpl<Value *> &OpsToRename) { + Value *Op = SI->getCondition(); + if ((!isa<Instruction>(Op) && !isa<Argument>(Op)) || Op->hasOneUse()) + return; + + // Remember how many outgoing edges there are to every successor. + SmallDenseMap<BasicBlock *, unsigned, 16> SwitchEdges; + for (unsigned i = 0, e = SI->getNumSuccessors(); i != e; ++i) { + BasicBlock *TargetBlock = SI->getSuccessor(i); + ++SwitchEdges[TargetBlock]; + } + + // Now propagate info for each case value + for (auto C : SI->cases()) { + BasicBlock *TargetBlock = C.getCaseSuccessor(); + if (SwitchEdges.lookup(TargetBlock) == 1) { + PredicateSwitch *PS = new PredicateSwitch( + Op, SI->getParent(), TargetBlock, C.getCaseValue(), SI); + addInfoFor(OpsToRename, Op, PS); + if (!TargetBlock->getSinglePredecessor()) + EdgeUsesOnly.insert({BranchBB, TargetBlock}); + } + } +} + +// Build predicate info for our function +void PredicateInfoBuilder::buildPredicateInfo() { + DT.updateDFSNumbers(); + // Collect operands to rename from all conditional branch terminators, as well + // as assume statements. + SmallVector<Value *, 8> OpsToRename; + for (auto *DTN : depth_first(DT.getRootNode())) { + BasicBlock *BranchBB = DTN->getBlock(); + if (auto *BI = dyn_cast<BranchInst>(BranchBB->getTerminator())) { + if (!BI->isConditional()) + continue; + // Can't insert conditional information if they all go to the same place. + if (BI->getSuccessor(0) == BI->getSuccessor(1)) + continue; + processBranch(BI, BranchBB, OpsToRename); + } else if (auto *SI = dyn_cast<SwitchInst>(BranchBB->getTerminator())) { + processSwitch(SI, BranchBB, OpsToRename); + } + } + for (auto &Assume : AC.assumptions()) { + if (auto *II = dyn_cast_or_null<IntrinsicInst>(Assume)) + if (DT.isReachableFromEntry(II->getParent())) + processAssume(II, II->getParent(), OpsToRename); + } + // Now rename all our operations. + renameUses(OpsToRename); +} + +// Given the renaming stack, make all the operands currently on the stack real +// by inserting them into the IR. Return the last operation's value. +Value *PredicateInfoBuilder::materializeStack(unsigned int &Counter, + ValueDFSStack &RenameStack, + Value *OrigOp) { + // Find the first thing we have to materialize + auto RevIter = RenameStack.rbegin(); + for (; RevIter != RenameStack.rend(); ++RevIter) + if (RevIter->Def) + break; + + size_t Start = RevIter - RenameStack.rbegin(); + // The maximum number of things we should be trying to materialize at once + // right now is 4, depending on if we had an assume, a branch, and both used + // and of conditions. + for (auto RenameIter = RenameStack.end() - Start; + RenameIter != RenameStack.end(); ++RenameIter) { + auto *Op = + RenameIter == RenameStack.begin() ? OrigOp : (RenameIter - 1)->Def; + ValueDFS &Result = *RenameIter; + auto *ValInfo = Result.PInfo; + ValInfo->RenamedOp = (RenameStack.end() - Start) == RenameStack.begin() + ? OrigOp + : (RenameStack.end() - Start - 1)->Def; + // For edge predicates, we can just place the operand in the block before + // the terminator. For assume, we have to place it right before the assume + // to ensure we dominate all of our uses. Always insert right before the + // relevant instruction (terminator, assume), so that we insert in proper + // order in the case of multiple predicateinfo in the same block. + // The number of named values is used to detect if a new declaration was + // added. If so, that declaration is tracked so that it can be removed when + // the analysis is done. The corner case were a new declaration results in + // a name clash and the old name being renamed is not considered as that + // represents an invalid module. + if (isa<PredicateWithEdge>(ValInfo)) { + IRBuilder<> B(getBranchTerminator(ValInfo)); + auto NumDecls = F.getParent()->getNumNamedValues(); + Function *IF = Intrinsic::getDeclaration( + F.getParent(), Intrinsic::ssa_copy, Op->getType()); + if (NumDecls != F.getParent()->getNumNamedValues()) + PI.CreatedDeclarations.insert(IF); + CallInst *PIC = + B.CreateCall(IF, Op, Op->getName() + "." + Twine(Counter++)); + PI.PredicateMap.insert({PIC, ValInfo}); + Result.Def = PIC; + } else { + auto *PAssume = dyn_cast<PredicateAssume>(ValInfo); + assert(PAssume && + "Should not have gotten here without it being an assume"); + // Insert the predicate directly after the assume. While it also holds + // directly before it, assume(i1 true) is not a useful fact. + IRBuilder<> B(PAssume->AssumeInst->getNextNode()); + auto NumDecls = F.getParent()->getNumNamedValues(); + Function *IF = Intrinsic::getDeclaration( + F.getParent(), Intrinsic::ssa_copy, Op->getType()); + if (NumDecls != F.getParent()->getNumNamedValues()) + PI.CreatedDeclarations.insert(IF); + CallInst *PIC = B.CreateCall(IF, Op); + PI.PredicateMap.insert({PIC, ValInfo}); + Result.Def = PIC; + } + } + return RenameStack.back().Def; +} + +// Instead of the standard SSA renaming algorithm, which is O(Number of +// instructions), and walks the entire dominator tree, we walk only the defs + +// uses. The standard SSA renaming algorithm does not really rely on the +// dominator tree except to order the stack push/pops of the renaming stacks, so +// that defs end up getting pushed before hitting the correct uses. This does +// not require the dominator tree, only the *order* of the dominator tree. The +// complete and correct ordering of the defs and uses, in dominator tree is +// contained in the DFS numbering of the dominator tree. So we sort the defs and +// uses into the DFS ordering, and then just use the renaming stack as per +// normal, pushing when we hit a def (which is a predicateinfo instruction), +// popping when we are out of the dfs scope for that def, and replacing any uses +// with top of stack if it exists. In order to handle liveness without +// propagating liveness info, we don't actually insert the predicateinfo +// instruction def until we see a use that it would dominate. Once we see such +// a use, we materialize the predicateinfo instruction in the right place and +// use it. +// +// TODO: Use this algorithm to perform fast single-variable renaming in +// promotememtoreg and memoryssa. +void PredicateInfoBuilder::renameUses(SmallVectorImpl<Value *> &OpsToRename) { + ValueDFS_Compare Compare(DT); + // Compute liveness, and rename in O(uses) per Op. + for (auto *Op : OpsToRename) { + LLVM_DEBUG(dbgs() << "Visiting " << *Op << "\n"); + unsigned Counter = 0; + SmallVector<ValueDFS, 16> OrderedUses; + const auto &ValueInfo = getValueInfo(Op); + // Insert the possible copies into the def/use list. + // They will become real copies if we find a real use for them, and never + // created otherwise. + for (const auto &PossibleCopy : ValueInfo.Infos) { + ValueDFS VD; + // Determine where we are going to place the copy by the copy type. + // The predicate info for branches always come first, they will get + // materialized in the split block at the top of the block. + // The predicate info for assumes will be somewhere in the middle, + // it will get materialized in front of the assume. + if (const auto *PAssume = dyn_cast<PredicateAssume>(PossibleCopy)) { + VD.LocalNum = LN_Middle; + DomTreeNode *DomNode = DT.getNode(PAssume->AssumeInst->getParent()); + if (!DomNode) + continue; + VD.DFSIn = DomNode->getDFSNumIn(); + VD.DFSOut = DomNode->getDFSNumOut(); + VD.PInfo = PossibleCopy; + OrderedUses.push_back(VD); + } else if (isa<PredicateWithEdge>(PossibleCopy)) { + // If we can only do phi uses, we treat it like it's in the branch + // block, and handle it specially. We know that it goes last, and only + // dominate phi uses. + auto BlockEdge = getBlockEdge(PossibleCopy); + if (EdgeUsesOnly.count(BlockEdge)) { + VD.LocalNum = LN_Last; + auto *DomNode = DT.getNode(BlockEdge.first); + if (DomNode) { + VD.DFSIn = DomNode->getDFSNumIn(); + VD.DFSOut = DomNode->getDFSNumOut(); + VD.PInfo = PossibleCopy; + VD.EdgeOnly = true; + OrderedUses.push_back(VD); + } + } else { + // Otherwise, we are in the split block (even though we perform + // insertion in the branch block). + // Insert a possible copy at the split block and before the branch. + VD.LocalNum = LN_First; + auto *DomNode = DT.getNode(BlockEdge.second); + if (DomNode) { + VD.DFSIn = DomNode->getDFSNumIn(); + VD.DFSOut = DomNode->getDFSNumOut(); + VD.PInfo = PossibleCopy; + OrderedUses.push_back(VD); + } + } + } + } + + convertUsesToDFSOrdered(Op, OrderedUses); + // Here we require a stable sort because we do not bother to try to + // assign an order to the operands the uses represent. Thus, two + // uses in the same instruction do not have a strict sort order + // currently and will be considered equal. We could get rid of the + // stable sort by creating one if we wanted. + llvm::stable_sort(OrderedUses, Compare); + SmallVector<ValueDFS, 8> RenameStack; + // For each use, sorted into dfs order, push values and replaces uses with + // top of stack, which will represent the reaching def. + for (auto &VD : OrderedUses) { + // We currently do not materialize copy over copy, but we should decide if + // we want to. + bool PossibleCopy = VD.PInfo != nullptr; + if (RenameStack.empty()) { + LLVM_DEBUG(dbgs() << "Rename Stack is empty\n"); + } else { + LLVM_DEBUG(dbgs() << "Rename Stack Top DFS numbers are (" + << RenameStack.back().DFSIn << "," + << RenameStack.back().DFSOut << ")\n"); + } + + LLVM_DEBUG(dbgs() << "Current DFS numbers are (" << VD.DFSIn << "," + << VD.DFSOut << ")\n"); + + bool ShouldPush = (VD.Def || PossibleCopy); + bool OutOfScope = !stackIsInScope(RenameStack, VD); + if (OutOfScope || ShouldPush) { + // Sync to our current scope. + popStackUntilDFSScope(RenameStack, VD); + if (ShouldPush) { + RenameStack.push_back(VD); + } + } + // If we get to this point, and the stack is empty we must have a use + // with no renaming needed, just skip it. + if (RenameStack.empty()) + continue; + // Skip values, only want to rename the uses + if (VD.Def || PossibleCopy) + continue; + if (!DebugCounter::shouldExecute(RenameCounter)) { + LLVM_DEBUG(dbgs() << "Skipping execution due to debug counter\n"); + continue; + } + ValueDFS &Result = RenameStack.back(); + + // If the possible copy dominates something, materialize our stack up to + // this point. This ensures every comparison that affects our operation + // ends up with predicateinfo. + if (!Result.Def) + Result.Def = materializeStack(Counter, RenameStack, Op); + + LLVM_DEBUG(dbgs() << "Found replacement " << *Result.Def << " for " + << *VD.U->get() << " in " << *(VD.U->getUser()) + << "\n"); + assert(DT.dominates(cast<Instruction>(Result.Def), *VD.U) && + "Predicateinfo def should have dominated this use"); + VD.U->set(Result.Def); + } + } +} + +PredicateInfoBuilder::ValueInfo & +PredicateInfoBuilder::getOrCreateValueInfo(Value *Operand) { + auto OIN = ValueInfoNums.find(Operand); + if (OIN == ValueInfoNums.end()) { + // This will grow it + ValueInfos.resize(ValueInfos.size() + 1); + // This will use the new size and give us a 0 based number of the info + auto InsertResult = ValueInfoNums.insert({Operand, ValueInfos.size() - 1}); + assert(InsertResult.second && "Value info number already existed?"); + return ValueInfos[InsertResult.first->second]; + } + return ValueInfos[OIN->second]; +} + +const PredicateInfoBuilder::ValueInfo & +PredicateInfoBuilder::getValueInfo(Value *Operand) const { + auto OINI = ValueInfoNums.lookup(Operand); + assert(OINI != 0 && "Operand was not really in the Value Info Numbers"); + assert(OINI < ValueInfos.size() && + "Value Info Number greater than size of Value Info Table"); + return ValueInfos[OINI]; +} + +PredicateInfo::PredicateInfo(Function &F, DominatorTree &DT, + AssumptionCache &AC) + : F(F) { + PredicateInfoBuilder Builder(*this, F, DT, AC); + Builder.buildPredicateInfo(); +} + +// Remove all declarations we created . The PredicateInfo consumers are +// responsible for remove the ssa_copy calls created. +PredicateInfo::~PredicateInfo() { + // Collect function pointers in set first, as SmallSet uses a SmallVector + // internally and we have to remove the asserting value handles first. + SmallPtrSet<Function *, 20> FunctionPtrs; + for (const auto &F : CreatedDeclarations) + FunctionPtrs.insert(&*F); + CreatedDeclarations.clear(); + + for (Function *F : FunctionPtrs) { + assert(F->user_begin() == F->user_end() && + "PredicateInfo consumer did not remove all SSA copies."); + F->eraseFromParent(); + } +} + +std::optional<PredicateConstraint> PredicateBase::getConstraint() const { + switch (Type) { + case PT_Assume: + case PT_Branch: { + bool TrueEdge = true; + if (auto *PBranch = dyn_cast<PredicateBranch>(this)) + TrueEdge = PBranch->TrueEdge; + + if (Condition == RenamedOp) { + return {{CmpInst::ICMP_EQ, + TrueEdge ? ConstantInt::getTrue(Condition->getType()) + : ConstantInt::getFalse(Condition->getType())}}; + } + + CmpInst *Cmp = dyn_cast<CmpInst>(Condition); + if (!Cmp) { + // TODO: Make this an assertion once RenamedOp is fully accurate. + return std::nullopt; + } + + CmpInst::Predicate Pred; + Value *OtherOp; + if (Cmp->getOperand(0) == RenamedOp) { + Pred = Cmp->getPredicate(); + OtherOp = Cmp->getOperand(1); + } else if (Cmp->getOperand(1) == RenamedOp) { + Pred = Cmp->getSwappedPredicate(); + OtherOp = Cmp->getOperand(0); + } else { + // TODO: Make this an assertion once RenamedOp is fully accurate. + return std::nullopt; + } + + // Invert predicate along false edge. + if (!TrueEdge) + Pred = CmpInst::getInversePredicate(Pred); + + return {{Pred, OtherOp}}; + } + case PT_Switch: + if (Condition != RenamedOp) { + // TODO: Make this an assertion once RenamedOp is fully accurate. + return std::nullopt; + } + + return {{CmpInst::ICMP_EQ, cast<PredicateSwitch>(this)->CaseValue}}; + } + llvm_unreachable("Unknown predicate type"); +} + +void PredicateInfo::verifyPredicateInfo() const {} + +char PredicateInfoPrinterLegacyPass::ID = 0; + +PredicateInfoPrinterLegacyPass::PredicateInfoPrinterLegacyPass() + : FunctionPass(ID) { + initializePredicateInfoPrinterLegacyPassPass( + *PassRegistry::getPassRegistry()); +} + +void PredicateInfoPrinterLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequiredTransitive<DominatorTreeWrapperPass>(); + AU.addRequired<AssumptionCacheTracker>(); +} + +// Replace ssa_copy calls created by PredicateInfo with their operand. +static void replaceCreatedSSACopys(PredicateInfo &PredInfo, Function &F) { + for (Instruction &Inst : llvm::make_early_inc_range(instructions(F))) { + const auto *PI = PredInfo.getPredicateInfoFor(&Inst); + auto *II = dyn_cast<IntrinsicInst>(&Inst); + if (!PI || !II || II->getIntrinsicID() != Intrinsic::ssa_copy) + continue; + + Inst.replaceAllUsesWith(II->getOperand(0)); + Inst.eraseFromParent(); + } +} + +bool PredicateInfoPrinterLegacyPass::runOnFunction(Function &F) { + auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); + auto PredInfo = std::make_unique<PredicateInfo>(F, DT, AC); + PredInfo->print(dbgs()); + if (VerifyPredicateInfo) + PredInfo->verifyPredicateInfo(); + + replaceCreatedSSACopys(*PredInfo, F); + return false; +} + +PreservedAnalyses PredicateInfoPrinterPass::run(Function &F, + FunctionAnalysisManager &AM) { + auto &DT = AM.getResult<DominatorTreeAnalysis>(F); + auto &AC = AM.getResult<AssumptionAnalysis>(F); + OS << "PredicateInfo for function: " << F.getName() << "\n"; + auto PredInfo = std::make_unique<PredicateInfo>(F, DT, AC); + PredInfo->print(OS); + + replaceCreatedSSACopys(*PredInfo, F); + return PreservedAnalyses::all(); +} + +/// An assembly annotator class to print PredicateInfo information in +/// comments. +class PredicateInfoAnnotatedWriter : public AssemblyAnnotationWriter { + friend class PredicateInfo; + const PredicateInfo *PredInfo; + +public: + PredicateInfoAnnotatedWriter(const PredicateInfo *M) : PredInfo(M) {} + + void emitBasicBlockStartAnnot(const BasicBlock *BB, + formatted_raw_ostream &OS) override {} + + void emitInstructionAnnot(const Instruction *I, + formatted_raw_ostream &OS) override { + if (const auto *PI = PredInfo->getPredicateInfoFor(I)) { + OS << "; Has predicate info\n"; + if (const auto *PB = dyn_cast<PredicateBranch>(PI)) { + OS << "; branch predicate info { TrueEdge: " << PB->TrueEdge + << " Comparison:" << *PB->Condition << " Edge: ["; + PB->From->printAsOperand(OS); + OS << ","; + PB->To->printAsOperand(OS); + OS << "]"; + } else if (const auto *PS = dyn_cast<PredicateSwitch>(PI)) { + OS << "; switch predicate info { CaseValue: " << *PS->CaseValue + << " Switch:" << *PS->Switch << " Edge: ["; + PS->From->printAsOperand(OS); + OS << ","; + PS->To->printAsOperand(OS); + OS << "]"; + } else if (const auto *PA = dyn_cast<PredicateAssume>(PI)) { + OS << "; assume predicate info {" + << " Comparison:" << *PA->Condition; + } + OS << ", RenamedOp: "; + PI->RenamedOp->printAsOperand(OS, false); + OS << " }\n"; + } + } +}; + +void PredicateInfo::print(raw_ostream &OS) const { + PredicateInfoAnnotatedWriter Writer(this); + F.print(OS, &Writer); +} + +void PredicateInfo::dump() const { + PredicateInfoAnnotatedWriter Writer(this); + F.print(dbgs(), &Writer); +} + +PreservedAnalyses PredicateInfoVerifierPass::run(Function &F, + FunctionAnalysisManager &AM) { + auto &DT = AM.getResult<DominatorTreeAnalysis>(F); + auto &AC = AM.getResult<AssumptionAnalysis>(F); + std::make_unique<PredicateInfo>(F, DT, AC)->verifyPredicateInfo(); + + return PreservedAnalyses::all(); +} +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/PromoteMemoryToRegister.cpp new file mode 100644 index 0000000000..75ea9dc5df --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -0,0 +1,1111 @@ +//===- PromoteMemoryToRegister.cpp - Convert allocas to registers ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file promotes memory references to be register references. It promotes +// alloca instructions which only have loads and stores as uses. An alloca is +// transformed by using iterated dominator frontiers to place PHI nodes, then +// traversing the function in depth-first order to rewrite loads and stores as +// appropriate. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/IteratedDominanceFrontier.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DIBuilder.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/User.h" +#include "llvm/Support/Casting.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/PromoteMemToReg.h" +#include <algorithm> +#include <cassert> +#include <iterator> +#include <utility> +#include <vector> + +using namespace llvm; + +#define DEBUG_TYPE "mem2reg" + +STATISTIC(NumLocalPromoted, "Number of alloca's promoted within one block"); +STATISTIC(NumSingleStore, "Number of alloca's promoted with a single store"); +STATISTIC(NumDeadAlloca, "Number of dead alloca's removed"); +STATISTIC(NumPHIInsert, "Number of PHI nodes inserted"); + +bool llvm::isAllocaPromotable(const AllocaInst *AI) { + // Only allow direct and non-volatile loads and stores... + for (const User *U : AI->users()) { + if (const LoadInst *LI = dyn_cast<LoadInst>(U)) { + // Note that atomic loads can be transformed; atomic semantics do + // not have any meaning for a local alloca. + if (LI->isVolatile() || LI->getType() != AI->getAllocatedType()) + return false; + } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) { + if (SI->getValueOperand() == AI || + SI->getValueOperand()->getType() != AI->getAllocatedType()) + return false; // Don't allow a store OF the AI, only INTO the AI. + // Note that atomic stores can be transformed; atomic semantics do + // not have any meaning for a local alloca. + if (SI->isVolatile()) + return false; + } else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) { + if (!II->isLifetimeStartOrEnd() && !II->isDroppable()) + return false; + } else if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { + if (!onlyUsedByLifetimeMarkersOrDroppableInsts(BCI)) + return false; + } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) { + if (!GEPI->hasAllZeroIndices()) + return false; + if (!onlyUsedByLifetimeMarkersOrDroppableInsts(GEPI)) + return false; + } else if (const AddrSpaceCastInst *ASCI = dyn_cast<AddrSpaceCastInst>(U)) { + if (!onlyUsedByLifetimeMarkers(ASCI)) + return false; + } else { + return false; + } + } + + return true; +} + +namespace { + +/// Helper for updating assignment tracking debug info when promoting allocas. +class AssignmentTrackingInfo { + /// DbgAssignIntrinsics linked to the alloca with at most one per variable + /// fragment. (i.e. not be a comprehensive set if there are multiple + /// dbg.assigns for one variable fragment). + SmallVector<DbgVariableIntrinsic *> DbgAssigns; + +public: + void init(AllocaInst *AI) { + SmallSet<DebugVariable, 2> Vars; + for (DbgAssignIntrinsic *DAI : at::getAssignmentMarkers(AI)) { + if (Vars.insert(DebugVariable(DAI)).second) + DbgAssigns.push_back(DAI); + } + } + + /// Update assignment tracking debug info given for the to-be-deleted store + /// \p ToDelete that stores to this alloca. + void updateForDeletedStore(StoreInst *ToDelete, DIBuilder &DIB) const { + // There's nothing to do if the alloca doesn't have any variables using + // assignment tracking. + if (DbgAssigns.empty()) { + assert(at::getAssignmentMarkers(ToDelete).empty()); + return; + } + + // Just leave dbg.assign intrinsics in place and remember that we've seen + // one for each variable fragment. + SmallSet<DebugVariable, 2> VarHasDbgAssignForStore; + for (DbgAssignIntrinsic *DAI : at::getAssignmentMarkers(ToDelete)) + VarHasDbgAssignForStore.insert(DebugVariable(DAI)); + + // It's possible for variables using assignment tracking to have no + // dbg.assign linked to this store. These are variables in DbgAssigns that + // are missing from VarHasDbgAssignForStore. Since there isn't a dbg.assign + // to mark the assignment - and the store is going to be deleted - insert a + // dbg.value to do that now. An untracked store may be either one that + // cannot be represented using assignment tracking (non-const offset or + // size) or one that is trackable but has had its DIAssignID attachment + // dropped accidentally. + for (auto *DAI : DbgAssigns) { + if (VarHasDbgAssignForStore.contains(DebugVariable(DAI))) + continue; + ConvertDebugDeclareToDebugValue(DAI, ToDelete, DIB); + } + } + + /// Update assignment tracking debug info given for the newly inserted PHI \p + /// NewPhi. + void updateForNewPhi(PHINode *NewPhi, DIBuilder &DIB) const { + // Regardless of the position of dbg.assigns relative to stores, the + // incoming values into a new PHI should be the same for the (imaginary) + // debug-phi. + for (auto *DAI : DbgAssigns) + ConvertDebugDeclareToDebugValue(DAI, NewPhi, DIB); + } + + void clear() { DbgAssigns.clear(); } + bool empty() { return DbgAssigns.empty(); } +}; + +struct AllocaInfo { + using DbgUserVec = SmallVector<DbgVariableIntrinsic *, 1>; + + SmallVector<BasicBlock *, 32> DefiningBlocks; + SmallVector<BasicBlock *, 32> UsingBlocks; + + StoreInst *OnlyStore; + BasicBlock *OnlyBlock; + bool OnlyUsedInOneBlock; + + /// Debug users of the alloca - does not include dbg.assign intrinsics. + DbgUserVec DbgUsers; + /// Helper to update assignment tracking debug info. + AssignmentTrackingInfo AssignmentTracking; + + void clear() { + DefiningBlocks.clear(); + UsingBlocks.clear(); + OnlyStore = nullptr; + OnlyBlock = nullptr; + OnlyUsedInOneBlock = true; + DbgUsers.clear(); + AssignmentTracking.clear(); + } + + /// Scan the uses of the specified alloca, filling in the AllocaInfo used + /// by the rest of the pass to reason about the uses of this alloca. + void AnalyzeAlloca(AllocaInst *AI) { + clear(); + + // As we scan the uses of the alloca instruction, keep track of stores, + // and decide whether all of the loads and stores to the alloca are within + // the same basic block. + for (User *U : AI->users()) { + Instruction *User = cast<Instruction>(U); + + if (StoreInst *SI = dyn_cast<StoreInst>(User)) { + // Remember the basic blocks which define new values for the alloca + DefiningBlocks.push_back(SI->getParent()); + OnlyStore = SI; + } else { + LoadInst *LI = cast<LoadInst>(User); + // Otherwise it must be a load instruction, keep track of variable + // reads. + UsingBlocks.push_back(LI->getParent()); + } + + if (OnlyUsedInOneBlock) { + if (!OnlyBlock) + OnlyBlock = User->getParent(); + else if (OnlyBlock != User->getParent()) + OnlyUsedInOneBlock = false; + } + } + DbgUserVec AllDbgUsers; + findDbgUsers(AllDbgUsers, AI); + std::copy_if(AllDbgUsers.begin(), AllDbgUsers.end(), + std::back_inserter(DbgUsers), [](DbgVariableIntrinsic *DII) { + return !isa<DbgAssignIntrinsic>(DII); + }); + AssignmentTracking.init(AI); + } +}; + +/// Data package used by RenamePass(). +struct RenamePassData { + using ValVector = std::vector<Value *>; + using LocationVector = std::vector<DebugLoc>; + + RenamePassData(BasicBlock *B, BasicBlock *P, ValVector V, LocationVector L) + : BB(B), Pred(P), Values(std::move(V)), Locations(std::move(L)) {} + + BasicBlock *BB; + BasicBlock *Pred; + ValVector Values; + LocationVector Locations; +}; + +/// This assigns and keeps a per-bb relative ordering of load/store +/// instructions in the block that directly load or store an alloca. +/// +/// This functionality is important because it avoids scanning large basic +/// blocks multiple times when promoting many allocas in the same block. +class LargeBlockInfo { + /// For each instruction that we track, keep the index of the + /// instruction. + /// + /// The index starts out as the number of the instruction from the start of + /// the block. + DenseMap<const Instruction *, unsigned> InstNumbers; + +public: + + /// This code only looks at accesses to allocas. + static bool isInterestingInstruction(const Instruction *I) { + return (isa<LoadInst>(I) && isa<AllocaInst>(I->getOperand(0))) || + (isa<StoreInst>(I) && isa<AllocaInst>(I->getOperand(1))); + } + + /// Get or calculate the index of the specified instruction. + unsigned getInstructionIndex(const Instruction *I) { + assert(isInterestingInstruction(I) && + "Not a load/store to/from an alloca?"); + + // If we already have this instruction number, return it. + DenseMap<const Instruction *, unsigned>::iterator It = InstNumbers.find(I); + if (It != InstNumbers.end()) + return It->second; + + // Scan the whole block to get the instruction. This accumulates + // information for every interesting instruction in the block, in order to + // avoid gratuitus rescans. + const BasicBlock *BB = I->getParent(); + unsigned InstNo = 0; + for (const Instruction &BBI : *BB) + if (isInterestingInstruction(&BBI)) + InstNumbers[&BBI] = InstNo++; + It = InstNumbers.find(I); + + assert(It != InstNumbers.end() && "Didn't insert instruction?"); + return It->second; + } + + void deleteValue(const Instruction *I) { InstNumbers.erase(I); } + + void clear() { InstNumbers.clear(); } +}; + +struct PromoteMem2Reg { + /// The alloca instructions being promoted. + std::vector<AllocaInst *> Allocas; + + DominatorTree &DT; + DIBuilder DIB; + + /// A cache of @llvm.assume intrinsics used by SimplifyInstruction. + AssumptionCache *AC; + + const SimplifyQuery SQ; + + /// Reverse mapping of Allocas. + DenseMap<AllocaInst *, unsigned> AllocaLookup; + + /// The PhiNodes we're adding. + /// + /// That map is used to simplify some Phi nodes as we iterate over it, so + /// it should have deterministic iterators. We could use a MapVector, but + /// since we already maintain a map from BasicBlock* to a stable numbering + /// (BBNumbers), the DenseMap is more efficient (also supports removal). + DenseMap<std::pair<unsigned, unsigned>, PHINode *> NewPhiNodes; + + /// For each PHI node, keep track of which entry in Allocas it corresponds + /// to. + DenseMap<PHINode *, unsigned> PhiToAllocaMap; + + /// For each alloca, we keep track of the dbg.declare intrinsic that + /// describes it, if any, so that we can convert it to a dbg.value + /// intrinsic if the alloca gets promoted. + SmallVector<AllocaInfo::DbgUserVec, 8> AllocaDbgUsers; + + /// For each alloca, keep an instance of a helper class that gives us an easy + /// way to update assignment tracking debug info if the alloca is promoted. + SmallVector<AssignmentTrackingInfo, 8> AllocaATInfo; + + /// The set of basic blocks the renamer has already visited. + SmallPtrSet<BasicBlock *, 16> Visited; + + /// Contains a stable numbering of basic blocks to avoid non-determinstic + /// behavior. + DenseMap<BasicBlock *, unsigned> BBNumbers; + + /// Lazily compute the number of predecessors a block has. + DenseMap<const BasicBlock *, unsigned> BBNumPreds; + +public: + PromoteMem2Reg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT, + AssumptionCache *AC) + : Allocas(Allocas.begin(), Allocas.end()), DT(DT), + DIB(*DT.getRoot()->getParent()->getParent(), /*AllowUnresolved*/ false), + AC(AC), SQ(DT.getRoot()->getParent()->getParent()->getDataLayout(), + nullptr, &DT, AC) {} + + void run(); + +private: + void RemoveFromAllocasList(unsigned &AllocaIdx) { + Allocas[AllocaIdx] = Allocas.back(); + Allocas.pop_back(); + --AllocaIdx; + } + + unsigned getNumPreds(const BasicBlock *BB) { + unsigned &NP = BBNumPreds[BB]; + if (NP == 0) + NP = pred_size(BB) + 1; + return NP - 1; + } + + void ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, + const SmallPtrSetImpl<BasicBlock *> &DefBlocks, + SmallPtrSetImpl<BasicBlock *> &LiveInBlocks); + void RenamePass(BasicBlock *BB, BasicBlock *Pred, + RenamePassData::ValVector &IncVals, + RenamePassData::LocationVector &IncLocs, + std::vector<RenamePassData> &Worklist); + bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version); +}; + +} // end anonymous namespace + +/// Given a LoadInst LI this adds assume(LI != null) after it. +static void addAssumeNonNull(AssumptionCache *AC, LoadInst *LI) { + Function *AssumeIntrinsic = + Intrinsic::getDeclaration(LI->getModule(), Intrinsic::assume); + ICmpInst *LoadNotNull = new ICmpInst(ICmpInst::ICMP_NE, LI, + Constant::getNullValue(LI->getType())); + LoadNotNull->insertAfter(LI); + CallInst *CI = CallInst::Create(AssumeIntrinsic, {LoadNotNull}); + CI->insertAfter(LoadNotNull); + AC->registerAssumption(cast<AssumeInst>(CI)); +} + +static void convertMetadataToAssumes(LoadInst *LI, Value *Val, + const DataLayout &DL, AssumptionCache *AC, + const DominatorTree *DT) { + // If the load was marked as nonnull we don't want to lose that information + // when we erase this Load. So we preserve it with an assume. As !nonnull + // returns poison while assume violations are immediate undefined behavior, + // we can only do this if the value is known non-poison. + if (AC && LI->getMetadata(LLVMContext::MD_nonnull) && + LI->getMetadata(LLVMContext::MD_noundef) && + !isKnownNonZero(Val, DL, 0, AC, LI, DT)) + addAssumeNonNull(AC, LI); +} + +static void removeIntrinsicUsers(AllocaInst *AI) { + // Knowing that this alloca is promotable, we know that it's safe to kill all + // instructions except for load and store. + + for (Use &U : llvm::make_early_inc_range(AI->uses())) { + Instruction *I = cast<Instruction>(U.getUser()); + if (isa<LoadInst>(I) || isa<StoreInst>(I)) + continue; + + // Drop the use of AI in droppable instructions. + if (I->isDroppable()) { + I->dropDroppableUse(U); + continue; + } + + if (!I->getType()->isVoidTy()) { + // The only users of this bitcast/GEP instruction are lifetime intrinsics. + // Follow the use/def chain to erase them now instead of leaving it for + // dead code elimination later. + for (Use &UU : llvm::make_early_inc_range(I->uses())) { + Instruction *Inst = cast<Instruction>(UU.getUser()); + + // Drop the use of I in droppable instructions. + if (Inst->isDroppable()) { + Inst->dropDroppableUse(UU); + continue; + } + Inst->eraseFromParent(); + } + } + I->eraseFromParent(); + } +} + +/// Rewrite as many loads as possible given a single store. +/// +/// When there is only a single store, we can use the domtree to trivially +/// replace all of the dominated loads with the stored value. Do so, and return +/// true if this has successfully promoted the alloca entirely. If this returns +/// false there were some loads which were not dominated by the single store +/// and thus must be phi-ed with undef. We fall back to the standard alloca +/// promotion algorithm in that case. +static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, + LargeBlockInfo &LBI, const DataLayout &DL, + DominatorTree &DT, AssumptionCache *AC) { + StoreInst *OnlyStore = Info.OnlyStore; + bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0)); + BasicBlock *StoreBB = OnlyStore->getParent(); + int StoreIndex = -1; + + // Clear out UsingBlocks. We will reconstruct it here if needed. + Info.UsingBlocks.clear(); + + for (User *U : make_early_inc_range(AI->users())) { + Instruction *UserInst = cast<Instruction>(U); + if (UserInst == OnlyStore) + continue; + LoadInst *LI = cast<LoadInst>(UserInst); + + // Okay, if we have a load from the alloca, we want to replace it with the + // only value stored to the alloca. We can do this if the value is + // dominated by the store. If not, we use the rest of the mem2reg machinery + // to insert the phi nodes as needed. + if (!StoringGlobalVal) { // Non-instructions are always dominated. + if (LI->getParent() == StoreBB) { + // If we have a use that is in the same block as the store, compare the + // indices of the two instructions to see which one came first. If the + // load came before the store, we can't handle it. + if (StoreIndex == -1) + StoreIndex = LBI.getInstructionIndex(OnlyStore); + + if (unsigned(StoreIndex) > LBI.getInstructionIndex(LI)) { + // Can't handle this load, bail out. + Info.UsingBlocks.push_back(StoreBB); + continue; + } + } else if (!DT.dominates(StoreBB, LI->getParent())) { + // If the load and store are in different blocks, use BB dominance to + // check their relationships. If the store doesn't dom the use, bail + // out. + Info.UsingBlocks.push_back(LI->getParent()); + continue; + } + } + + // Otherwise, we *can* safely rewrite this load. + Value *ReplVal = OnlyStore->getOperand(0); + // If the replacement value is the load, this must occur in unreachable + // code. + if (ReplVal == LI) + ReplVal = PoisonValue::get(LI->getType()); + + convertMetadataToAssumes(LI, ReplVal, DL, AC, &DT); + LI->replaceAllUsesWith(ReplVal); + LI->eraseFromParent(); + LBI.deleteValue(LI); + } + + // Finally, after the scan, check to see if the store is all that is left. + if (!Info.UsingBlocks.empty()) + return false; // If not, we'll have to fall back for the remainder. + + DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false); + // Update assignment tracking info for the store we're going to delete. + Info.AssignmentTracking.updateForDeletedStore(Info.OnlyStore, DIB); + + // Record debuginfo for the store and remove the declaration's + // debuginfo. + for (DbgVariableIntrinsic *DII : Info.DbgUsers) { + if (DII->isAddressOfVariable()) { + ConvertDebugDeclareToDebugValue(DII, Info.OnlyStore, DIB); + DII->eraseFromParent(); + } else if (DII->getExpression()->startsWithDeref()) { + DII->eraseFromParent(); + } + } + + // Remove dbg.assigns linked to the alloca as these are now redundant. + at::deleteAssignmentMarkers(AI); + + // Remove the (now dead) store and alloca. + Info.OnlyStore->eraseFromParent(); + LBI.deleteValue(Info.OnlyStore); + + AI->eraseFromParent(); + return true; +} + +/// Many allocas are only used within a single basic block. If this is the +/// case, avoid traversing the CFG and inserting a lot of potentially useless +/// PHI nodes by just performing a single linear pass over the basic block +/// using the Alloca. +/// +/// If we cannot promote this alloca (because it is read before it is written), +/// return false. This is necessary in cases where, due to control flow, the +/// alloca is undefined only on some control flow paths. e.g. code like +/// this is correct in LLVM IR: +/// // A is an alloca with no stores so far +/// for (...) { +/// int t = *A; +/// if (!first_iteration) +/// use(t); +/// *A = 42; +/// } +static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, + LargeBlockInfo &LBI, + const DataLayout &DL, + DominatorTree &DT, + AssumptionCache *AC) { + // The trickiest case to handle is when we have large blocks. Because of this, + // this code is optimized assuming that large blocks happen. This does not + // significantly pessimize the small block case. This uses LargeBlockInfo to + // make it efficient to get the index of various operations in the block. + + // Walk the use-def list of the alloca, getting the locations of all stores. + using StoresByIndexTy = SmallVector<std::pair<unsigned, StoreInst *>, 64>; + StoresByIndexTy StoresByIndex; + + for (User *U : AI->users()) + if (StoreInst *SI = dyn_cast<StoreInst>(U)) + StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI)); + + // Sort the stores by their index, making it efficient to do a lookup with a + // binary search. + llvm::sort(StoresByIndex, less_first()); + + // Walk all of the loads from this alloca, replacing them with the nearest + // store above them, if any. + for (User *U : make_early_inc_range(AI->users())) { + LoadInst *LI = dyn_cast<LoadInst>(U); + if (!LI) + continue; + + unsigned LoadIdx = LBI.getInstructionIndex(LI); + + // Find the nearest store that has a lower index than this load. + StoresByIndexTy::iterator I = llvm::lower_bound( + StoresByIndex, + std::make_pair(LoadIdx, static_cast<StoreInst *>(nullptr)), + less_first()); + Value *ReplVal; + if (I == StoresByIndex.begin()) { + if (StoresByIndex.empty()) + // If there are no stores, the load takes the undef value. + ReplVal = UndefValue::get(LI->getType()); + else + // There is no store before this load, bail out (load may be affected + // by the following stores - see main comment). + return false; + } else { + // Otherwise, there was a store before this load, the load takes its + // value. + ReplVal = std::prev(I)->second->getOperand(0); + } + + convertMetadataToAssumes(LI, ReplVal, DL, AC, &DT); + + // If the replacement value is the load, this must occur in unreachable + // code. + if (ReplVal == LI) + ReplVal = PoisonValue::get(LI->getType()); + + LI->replaceAllUsesWith(ReplVal); + LI->eraseFromParent(); + LBI.deleteValue(LI); + } + + // Remove the (now dead) stores and alloca. + DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false); + while (!AI->use_empty()) { + StoreInst *SI = cast<StoreInst>(AI->user_back()); + // Update assignment tracking info for the store we're going to delete. + Info.AssignmentTracking.updateForDeletedStore(SI, DIB); + // Record debuginfo for the store before removing it. + for (DbgVariableIntrinsic *DII : Info.DbgUsers) { + if (DII->isAddressOfVariable()) { + ConvertDebugDeclareToDebugValue(DII, SI, DIB); + } + } + SI->eraseFromParent(); + LBI.deleteValue(SI); + } + + // Remove dbg.assigns linked to the alloca as these are now redundant. + at::deleteAssignmentMarkers(AI); + AI->eraseFromParent(); + + // The alloca's debuginfo can be removed as well. + for (DbgVariableIntrinsic *DII : Info.DbgUsers) + if (DII->isAddressOfVariable() || DII->getExpression()->startsWithDeref()) + DII->eraseFromParent(); + + ++NumLocalPromoted; + return true; +} + +void PromoteMem2Reg::run() { + Function &F = *DT.getRoot()->getParent(); + + AllocaDbgUsers.resize(Allocas.size()); + AllocaATInfo.resize(Allocas.size()); + + AllocaInfo Info; + LargeBlockInfo LBI; + ForwardIDFCalculator IDF(DT); + + for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) { + AllocaInst *AI = Allocas[AllocaNum]; + + assert(isAllocaPromotable(AI) && "Cannot promote non-promotable alloca!"); + assert(AI->getParent()->getParent() == &F && + "All allocas should be in the same function, which is same as DF!"); + + removeIntrinsicUsers(AI); + + if (AI->use_empty()) { + // If there are no uses of the alloca, just delete it now. + AI->eraseFromParent(); + + // Remove the alloca from the Allocas list, since it has been processed + RemoveFromAllocasList(AllocaNum); + ++NumDeadAlloca; + continue; + } + + // Calculate the set of read and write-locations for each alloca. This is + // analogous to finding the 'uses' and 'definitions' of each variable. + Info.AnalyzeAlloca(AI); + + // If there is only a single store to this value, replace any loads of + // it that are directly dominated by the definition with the value stored. + if (Info.DefiningBlocks.size() == 1) { + if (rewriteSingleStoreAlloca(AI, Info, LBI, SQ.DL, DT, AC)) { + // The alloca has been processed, move on. + RemoveFromAllocasList(AllocaNum); + ++NumSingleStore; + continue; + } + } + + // If the alloca is only read and written in one basic block, just perform a + // linear sweep over the block to eliminate it. + if (Info.OnlyUsedInOneBlock && + promoteSingleBlockAlloca(AI, Info, LBI, SQ.DL, DT, AC)) { + // The alloca has been processed, move on. + RemoveFromAllocasList(AllocaNum); + continue; + } + + // If we haven't computed a numbering for the BB's in the function, do so + // now. + if (BBNumbers.empty()) { + unsigned ID = 0; + for (auto &BB : F) + BBNumbers[&BB] = ID++; + } + + // Remember the dbg.declare intrinsic describing this alloca, if any. + if (!Info.DbgUsers.empty()) + AllocaDbgUsers[AllocaNum] = Info.DbgUsers; + if (!Info.AssignmentTracking.empty()) + AllocaATInfo[AllocaNum] = Info.AssignmentTracking; + + // Keep the reverse mapping of the 'Allocas' array for the rename pass. + AllocaLookup[Allocas[AllocaNum]] = AllocaNum; + + // Unique the set of defining blocks for efficient lookup. + SmallPtrSet<BasicBlock *, 32> DefBlocks(Info.DefiningBlocks.begin(), + Info.DefiningBlocks.end()); + + // Determine which blocks the value is live in. These are blocks which lead + // to uses. + SmallPtrSet<BasicBlock *, 32> LiveInBlocks; + ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks); + + // At this point, we're committed to promoting the alloca using IDF's, and + // the standard SSA construction algorithm. Determine which blocks need phi + // nodes and see if we can optimize out some work by avoiding insertion of + // dead phi nodes. + IDF.setLiveInBlocks(LiveInBlocks); + IDF.setDefiningBlocks(DefBlocks); + SmallVector<BasicBlock *, 32> PHIBlocks; + IDF.calculate(PHIBlocks); + llvm::sort(PHIBlocks, [this](BasicBlock *A, BasicBlock *B) { + return BBNumbers.find(A)->second < BBNumbers.find(B)->second; + }); + + unsigned CurrentVersion = 0; + for (BasicBlock *BB : PHIBlocks) + QueuePhiNode(BB, AllocaNum, CurrentVersion); + } + + if (Allocas.empty()) + return; // All of the allocas must have been trivial! + + LBI.clear(); + + // Set the incoming values for the basic block to be null values for all of + // the alloca's. We do this in case there is a load of a value that has not + // been stored yet. In this case, it will get this null value. + RenamePassData::ValVector Values(Allocas.size()); + for (unsigned i = 0, e = Allocas.size(); i != e; ++i) + Values[i] = UndefValue::get(Allocas[i]->getAllocatedType()); + + // When handling debug info, treat all incoming values as if they have unknown + // locations until proven otherwise. + RenamePassData::LocationVector Locations(Allocas.size()); + + // Walks all basic blocks in the function performing the SSA rename algorithm + // and inserting the phi nodes we marked as necessary + std::vector<RenamePassData> RenamePassWorkList; + RenamePassWorkList.emplace_back(&F.front(), nullptr, std::move(Values), + std::move(Locations)); + do { + RenamePassData RPD = std::move(RenamePassWorkList.back()); + RenamePassWorkList.pop_back(); + // RenamePass may add new worklist entries. + RenamePass(RPD.BB, RPD.Pred, RPD.Values, RPD.Locations, RenamePassWorkList); + } while (!RenamePassWorkList.empty()); + + // The renamer uses the Visited set to avoid infinite loops. Clear it now. + Visited.clear(); + + // Remove the allocas themselves from the function. + for (Instruction *A : Allocas) { + // Remove dbg.assigns linked to the alloca as these are now redundant. + at::deleteAssignmentMarkers(A); + // If there are any uses of the alloca instructions left, they must be in + // unreachable basic blocks that were not processed by walking the dominator + // tree. Just delete the users now. + if (!A->use_empty()) + A->replaceAllUsesWith(PoisonValue::get(A->getType())); + A->eraseFromParent(); + } + + // Remove alloca's dbg.declare intrinsics from the function. + for (auto &DbgUsers : AllocaDbgUsers) { + for (auto *DII : DbgUsers) + if (DII->isAddressOfVariable() || DII->getExpression()->startsWithDeref()) + DII->eraseFromParent(); + } + + // Loop over all of the PHI nodes and see if there are any that we can get + // rid of because they merge all of the same incoming values. This can + // happen due to undef values coming into the PHI nodes. This process is + // iterative, because eliminating one PHI node can cause others to be removed. + bool EliminatedAPHI = true; + while (EliminatedAPHI) { + EliminatedAPHI = false; + + // Iterating over NewPhiNodes is deterministic, so it is safe to try to + // simplify and RAUW them as we go. If it was not, we could add uses to + // the values we replace with in a non-deterministic order, thus creating + // non-deterministic def->use chains. + for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator + I = NewPhiNodes.begin(), + E = NewPhiNodes.end(); + I != E;) { + PHINode *PN = I->second; + + // If this PHI node merges one value and/or undefs, get the value. + if (Value *V = simplifyInstruction(PN, SQ)) { + PN->replaceAllUsesWith(V); + PN->eraseFromParent(); + NewPhiNodes.erase(I++); + EliminatedAPHI = true; + continue; + } + ++I; + } + } + + // At this point, the renamer has added entries to PHI nodes for all reachable + // code. Unfortunately, there may be unreachable blocks which the renamer + // hasn't traversed. If this is the case, the PHI nodes may not + // have incoming values for all predecessors. Loop over all PHI nodes we have + // created, inserting undef values if they are missing any incoming values. + for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator + I = NewPhiNodes.begin(), + E = NewPhiNodes.end(); + I != E; ++I) { + // We want to do this once per basic block. As such, only process a block + // when we find the PHI that is the first entry in the block. + PHINode *SomePHI = I->second; + BasicBlock *BB = SomePHI->getParent(); + if (&BB->front() != SomePHI) + continue; + + // Only do work here if there the PHI nodes are missing incoming values. We + // know that all PHI nodes that were inserted in a block will have the same + // number of incoming values, so we can just check any of them. + if (SomePHI->getNumIncomingValues() == getNumPreds(BB)) + continue; + + // Get the preds for BB. + SmallVector<BasicBlock *, 16> Preds(predecessors(BB)); + + // Ok, now we know that all of the PHI nodes are missing entries for some + // basic blocks. Start by sorting the incoming predecessors for efficient + // access. + auto CompareBBNumbers = [this](BasicBlock *A, BasicBlock *B) { + return BBNumbers.find(A)->second < BBNumbers.find(B)->second; + }; + llvm::sort(Preds, CompareBBNumbers); + + // Now we loop through all BB's which have entries in SomePHI and remove + // them from the Preds list. + for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) { + // Do a log(n) search of the Preds list for the entry we want. + SmallVectorImpl<BasicBlock *>::iterator EntIt = llvm::lower_bound( + Preds, SomePHI->getIncomingBlock(i), CompareBBNumbers); + assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i) && + "PHI node has entry for a block which is not a predecessor!"); + + // Remove the entry + Preds.erase(EntIt); + } + + // At this point, the blocks left in the preds list must have dummy + // entries inserted into every PHI nodes for the block. Update all the phi + // nodes in this block that we are inserting (there could be phis before + // mem2reg runs). + unsigned NumBadPreds = SomePHI->getNumIncomingValues(); + BasicBlock::iterator BBI = BB->begin(); + while ((SomePHI = dyn_cast<PHINode>(BBI++)) && + SomePHI->getNumIncomingValues() == NumBadPreds) { + Value *UndefVal = UndefValue::get(SomePHI->getType()); + for (BasicBlock *Pred : Preds) + SomePHI->addIncoming(UndefVal, Pred); + } + } + + NewPhiNodes.clear(); +} + +/// Determine which blocks the value is live in. +/// +/// These are blocks which lead to uses. Knowing this allows us to avoid +/// inserting PHI nodes into blocks which don't lead to uses (thus, the +/// inserted phi nodes would be dead). +void PromoteMem2Reg::ComputeLiveInBlocks( + AllocaInst *AI, AllocaInfo &Info, + const SmallPtrSetImpl<BasicBlock *> &DefBlocks, + SmallPtrSetImpl<BasicBlock *> &LiveInBlocks) { + // To determine liveness, we must iterate through the predecessors of blocks + // where the def is live. Blocks are added to the worklist if we need to + // check their predecessors. Start with all the using blocks. + SmallVector<BasicBlock *, 64> LiveInBlockWorklist(Info.UsingBlocks.begin(), + Info.UsingBlocks.end()); + + // If any of the using blocks is also a definition block, check to see if the + // definition occurs before or after the use. If it happens before the use, + // the value isn't really live-in. + for (unsigned i = 0, e = LiveInBlockWorklist.size(); i != e; ++i) { + BasicBlock *BB = LiveInBlockWorklist[i]; + if (!DefBlocks.count(BB)) + continue; + + // Okay, this is a block that both uses and defines the value. If the first + // reference to the alloca is a def (store), then we know it isn't live-in. + for (BasicBlock::iterator I = BB->begin();; ++I) { + if (StoreInst *SI = dyn_cast<StoreInst>(I)) { + if (SI->getOperand(1) != AI) + continue; + + // We found a store to the alloca before a load. The alloca is not + // actually live-in here. + LiveInBlockWorklist[i] = LiveInBlockWorklist.back(); + LiveInBlockWorklist.pop_back(); + --i; + --e; + break; + } + + if (LoadInst *LI = dyn_cast<LoadInst>(I)) + // Okay, we found a load before a store to the alloca. It is actually + // live into this block. + if (LI->getOperand(0) == AI) + break; + } + } + + // Now that we have a set of blocks where the phi is live-in, recursively add + // their predecessors until we find the full region the value is live. + while (!LiveInBlockWorklist.empty()) { + BasicBlock *BB = LiveInBlockWorklist.pop_back_val(); + + // The block really is live in here, insert it into the set. If already in + // the set, then it has already been processed. + if (!LiveInBlocks.insert(BB).second) + continue; + + // Since the value is live into BB, it is either defined in a predecessor or + // live into it to. Add the preds to the worklist unless they are a + // defining block. + for (BasicBlock *P : predecessors(BB)) { + // The value is not live into a predecessor if it defines the value. + if (DefBlocks.count(P)) + continue; + + // Otherwise it is, add to the worklist. + LiveInBlockWorklist.push_back(P); + } + } +} + +/// Queue a phi-node to be added to a basic-block for a specific Alloca. +/// +/// Returns true if there wasn't already a phi-node for that variable +bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo, + unsigned &Version) { + // Look up the basic-block in question. + PHINode *&PN = NewPhiNodes[std::make_pair(BBNumbers[BB], AllocaNo)]; + + // If the BB already has a phi node added for the i'th alloca then we're done! + if (PN) + return false; + + // Create a PhiNode using the dereferenced type... and add the phi-node to the + // BasicBlock. + PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), getNumPreds(BB), + Allocas[AllocaNo]->getName() + "." + Twine(Version++), + &BB->front()); + ++NumPHIInsert; + PhiToAllocaMap[PN] = AllocaNo; + return true; +} + +/// Update the debug location of a phi. \p ApplyMergedLoc indicates whether to +/// create a merged location incorporating \p DL, or to set \p DL directly. +static void updateForIncomingValueLocation(PHINode *PN, DebugLoc DL, + bool ApplyMergedLoc) { + if (ApplyMergedLoc) + PN->applyMergedLocation(PN->getDebugLoc(), DL); + else + PN->setDebugLoc(DL); +} + +/// Recursively traverse the CFG of the function, renaming loads and +/// stores to the allocas which we are promoting. +/// +/// IncomingVals indicates what value each Alloca contains on exit from the +/// predecessor block Pred. +void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred, + RenamePassData::ValVector &IncomingVals, + RenamePassData::LocationVector &IncomingLocs, + std::vector<RenamePassData> &Worklist) { +NextIteration: + // If we are inserting any phi nodes into this BB, they will already be in the + // block. + if (PHINode *APN = dyn_cast<PHINode>(BB->begin())) { + // If we have PHI nodes to update, compute the number of edges from Pred to + // BB. + if (PhiToAllocaMap.count(APN)) { + // We want to be able to distinguish between PHI nodes being inserted by + // this invocation of mem2reg from those phi nodes that already existed in + // the IR before mem2reg was run. We determine that APN is being inserted + // because it is missing incoming edges. All other PHI nodes being + // inserted by this pass of mem2reg will have the same number of incoming + // operands so far. Remember this count. + unsigned NewPHINumOperands = APN->getNumOperands(); + + unsigned NumEdges = llvm::count(successors(Pred), BB); + assert(NumEdges && "Must be at least one edge from Pred to BB!"); + + // Add entries for all the phis. + BasicBlock::iterator PNI = BB->begin(); + do { + unsigned AllocaNo = PhiToAllocaMap[APN]; + + // Update the location of the phi node. + updateForIncomingValueLocation(APN, IncomingLocs[AllocaNo], + APN->getNumIncomingValues() > 0); + + // Add N incoming values to the PHI node. + for (unsigned i = 0; i != NumEdges; ++i) + APN->addIncoming(IncomingVals[AllocaNo], Pred); + + // The currently active variable for this block is now the PHI. + IncomingVals[AllocaNo] = APN; + AllocaATInfo[AllocaNo].updateForNewPhi(APN, DIB); + for (DbgVariableIntrinsic *DII : AllocaDbgUsers[AllocaNo]) + if (DII->isAddressOfVariable()) + ConvertDebugDeclareToDebugValue(DII, APN, DIB); + + // Get the next phi node. + ++PNI; + APN = dyn_cast<PHINode>(PNI); + if (!APN) + break; + + // Verify that it is missing entries. If not, it is not being inserted + // by this mem2reg invocation so we want to ignore it. + } while (APN->getNumOperands() == NewPHINumOperands); + } + } + + // Don't revisit blocks. + if (!Visited.insert(BB).second) + return; + + for (BasicBlock::iterator II = BB->begin(); !II->isTerminator();) { + Instruction *I = &*II++; // get the instruction, increment iterator + + if (LoadInst *LI = dyn_cast<LoadInst>(I)) { + AllocaInst *Src = dyn_cast<AllocaInst>(LI->getPointerOperand()); + if (!Src) + continue; + + DenseMap<AllocaInst *, unsigned>::iterator AI = AllocaLookup.find(Src); + if (AI == AllocaLookup.end()) + continue; + + Value *V = IncomingVals[AI->second]; + convertMetadataToAssumes(LI, V, SQ.DL, AC, &DT); + + // Anything using the load now uses the current value. + LI->replaceAllUsesWith(V); + LI->eraseFromParent(); + } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { + // Delete this instruction and mark the name as the current holder of the + // value + AllocaInst *Dest = dyn_cast<AllocaInst>(SI->getPointerOperand()); + if (!Dest) + continue; + + DenseMap<AllocaInst *, unsigned>::iterator ai = AllocaLookup.find(Dest); + if (ai == AllocaLookup.end()) + continue; + + // what value were we writing? + unsigned AllocaNo = ai->second; + IncomingVals[AllocaNo] = SI->getOperand(0); + + // Record debuginfo for the store before removing it. + IncomingLocs[AllocaNo] = SI->getDebugLoc(); + AllocaATInfo[AllocaNo].updateForDeletedStore(SI, DIB); + for (DbgVariableIntrinsic *DII : AllocaDbgUsers[ai->second]) + if (DII->isAddressOfVariable()) + ConvertDebugDeclareToDebugValue(DII, SI, DIB); + SI->eraseFromParent(); + } + } + + // 'Recurse' to our successors. + succ_iterator I = succ_begin(BB), E = succ_end(BB); + if (I == E) + return; + + // Keep track of the successors so we don't visit the same successor twice + SmallPtrSet<BasicBlock *, 8> VisitedSuccs; + + // Handle the first successor without using the worklist. + VisitedSuccs.insert(*I); + Pred = BB; + BB = *I; + ++I; + + for (; I != E; ++I) + if (VisitedSuccs.insert(*I).second) + Worklist.emplace_back(*I, Pred, IncomingVals, IncomingLocs); + + goto NextIteration; +} + +void llvm::PromoteMemToReg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT, + AssumptionCache *AC) { + // If there is nothing to do, bail out... + if (Allocas.empty()) + return; + + PromoteMem2Reg(Allocas, DT, AC).run(); +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/RelLookupTableConverter.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/RelLookupTableConverter.cpp new file mode 100644 index 0000000000..c9ff94dc97 --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/RelLookupTableConverter.cpp @@ -0,0 +1,221 @@ +//===- RelLookupTableConverterPass - Rel Table Conv -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements relative lookup table converter that converts +// lookup tables to relative lookup tables to make them PIC-friendly. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/RelLookupTableConverter.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" + +using namespace llvm; + +static bool shouldConvertToRelLookupTable(Module &M, GlobalVariable &GV) { + // If lookup table has more than one user, + // do not generate a relative lookup table. + // This is to simplify the analysis that needs to be done for this pass. + // TODO: Add support for lookup tables with multiple uses. + // For ex, this can happen when a function that uses a lookup table gets + // inlined into multiple call sites. + if (!GV.hasInitializer() || + !GV.isConstant() || + !GV.hasOneUse()) + return false; + + GetElementPtrInst *GEP = + dyn_cast<GetElementPtrInst>(GV.use_begin()->getUser()); + if (!GEP || !GEP->hasOneUse() || + GV.getValueType() != GEP->getSourceElementType()) + return false; + + LoadInst *Load = dyn_cast<LoadInst>(GEP->use_begin()->getUser()); + if (!Load || !Load->hasOneUse() || + Load->getType() != GEP->getResultElementType()) + return false; + + // If the original lookup table does not have local linkage and is + // not dso_local, do not generate a relative lookup table. + // This optimization creates a relative lookup table that consists of + // offsets between the start of the lookup table and its elements. + // To be able to generate these offsets, relative lookup table and + // its elements should have internal linkage and be dso_local, which means + // that they should resolve to symbols within the same linkage unit. + if (!GV.hasLocalLinkage() || + !GV.isDSOLocal() || + !GV.isImplicitDSOLocal()) + return false; + + ConstantArray *Array = dyn_cast<ConstantArray>(GV.getInitializer()); + if (!Array) + return false; + + // If values are not 64-bit pointers, do not generate a relative lookup table. + const DataLayout &DL = M.getDataLayout(); + Type *ElemType = Array->getType()->getElementType(); + if (!ElemType->isPointerTy() || DL.getPointerTypeSizeInBits(ElemType) != 64) + return false; + + for (const Use &Op : Array->operands()) { + Constant *ConstOp = cast<Constant>(&Op); + GlobalValue *GVOp; + APInt Offset; + + // If an operand is not a constant offset from a lookup table, + // do not generate a relative lookup table. + if (!IsConstantOffsetFromGlobal(ConstOp, GVOp, Offset, DL)) + return false; + + // If operand is mutable, do not generate a relative lookup table. + auto *GlovalVarOp = dyn_cast<GlobalVariable>(GVOp); + if (!GlovalVarOp || !GlovalVarOp->isConstant()) + return false; + + if (!GlovalVarOp->hasLocalLinkage() || + !GlovalVarOp->isDSOLocal() || + !GlovalVarOp->isImplicitDSOLocal()) + return false; + } + + return true; +} + +static GlobalVariable *createRelLookupTable(Function &Func, + GlobalVariable &LookupTable) { + Module &M = *Func.getParent(); + ConstantArray *LookupTableArr = + cast<ConstantArray>(LookupTable.getInitializer()); + unsigned NumElts = LookupTableArr->getType()->getNumElements(); + ArrayType *IntArrayTy = + ArrayType::get(Type::getInt32Ty(M.getContext()), NumElts); + + GlobalVariable *RelLookupTable = new GlobalVariable( + M, IntArrayTy, LookupTable.isConstant(), LookupTable.getLinkage(), + nullptr, "reltable." + Func.getName(), &LookupTable, + LookupTable.getThreadLocalMode(), LookupTable.getAddressSpace(), + LookupTable.isExternallyInitialized()); + + uint64_t Idx = 0; + SmallVector<Constant *, 64> RelLookupTableContents(NumElts); + + for (Use &Operand : LookupTableArr->operands()) { + Constant *Element = cast<Constant>(Operand); + Type *IntPtrTy = M.getDataLayout().getIntPtrType(M.getContext()); + Constant *Base = llvm::ConstantExpr::getPtrToInt(RelLookupTable, IntPtrTy); + Constant *Target = llvm::ConstantExpr::getPtrToInt(Element, IntPtrTy); + Constant *Sub = llvm::ConstantExpr::getSub(Target, Base); + Constant *RelOffset = + llvm::ConstantExpr::getTrunc(Sub, Type::getInt32Ty(M.getContext())); + RelLookupTableContents[Idx++] = RelOffset; + } + + Constant *Initializer = + ConstantArray::get(IntArrayTy, RelLookupTableContents); + RelLookupTable->setInitializer(Initializer); + RelLookupTable->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); + RelLookupTable->setAlignment(llvm::Align(4)); + return RelLookupTable; +} + +static void convertToRelLookupTable(GlobalVariable &LookupTable) { + GetElementPtrInst *GEP = + cast<GetElementPtrInst>(LookupTable.use_begin()->getUser()); + LoadInst *Load = cast<LoadInst>(GEP->use_begin()->getUser()); + + Module &M = *LookupTable.getParent(); + BasicBlock *BB = GEP->getParent(); + IRBuilder<> Builder(BB); + Function &Func = *BB->getParent(); + + // Generate an array that consists of relative offsets. + GlobalVariable *RelLookupTable = createRelLookupTable(Func, LookupTable); + + // Place new instruction sequence before GEP. + Builder.SetInsertPoint(GEP); + Value *Index = GEP->getOperand(2); + IntegerType *IntTy = cast<IntegerType>(Index->getType()); + Value *Offset = + Builder.CreateShl(Index, ConstantInt::get(IntTy, 2), "reltable.shift"); + + // Insert the call to load.relative intrinsic before LOAD. + // GEP might not be immediately followed by a LOAD, like it can be hoisted + // outside the loop or another instruction might be inserted them in between. + Builder.SetInsertPoint(Load); + Function *LoadRelIntrinsic = llvm::Intrinsic::getDeclaration( + &M, Intrinsic::load_relative, {Index->getType()}); + Value *Base = Builder.CreateBitCast(RelLookupTable, Builder.getInt8PtrTy()); + + // Create a call to load.relative intrinsic that computes the target address + // by adding base address (lookup table address) and relative offset. + Value *Result = Builder.CreateCall(LoadRelIntrinsic, {Base, Offset}, + "reltable.intrinsic"); + + // Create a bitcast instruction if necessary. + if (Load->getType() != Builder.getInt8PtrTy()) + Result = Builder.CreateBitCast(Result, Load->getType(), "reltable.bitcast"); + + // Replace load instruction with the new generated instruction sequence. + Load->replaceAllUsesWith(Result); + // Remove Load and GEP instructions. + Load->eraseFromParent(); + GEP->eraseFromParent(); +} + +// Convert lookup tables to relative lookup tables in the module. +static bool convertToRelativeLookupTables( + Module &M, function_ref<TargetTransformInfo &(Function &)> GetTTI) { + for (Function &F : M) { + if (F.isDeclaration()) + continue; + + // Check if we have a target that supports relative lookup tables. + if (!GetTTI(F).shouldBuildRelLookupTables()) + return false; + + // We assume that the result is independent of the checked function. + break; + } + + bool Changed = false; + + for (GlobalVariable &GV : llvm::make_early_inc_range(M.globals())) { + if (!shouldConvertToRelLookupTable(M, GV)) + continue; + + convertToRelLookupTable(GV); + + // Remove the original lookup table. + GV.eraseFromParent(); + + Changed = true; + } + + return Changed; +} + +PreservedAnalyses RelLookupTableConverterPass::run(Module &M, + ModuleAnalysisManager &AM) { + FunctionAnalysisManager &FAM = + AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); + + auto GetTTI = [&](Function &F) -> TargetTransformInfo & { + return FAM.getResult<TargetIRAnalysis>(F); + }; + + if (!convertToRelativeLookupTables(M, GetTTI)) + return PreservedAnalyses::all(); + + PreservedAnalyses PA; + PA.preserveSet<CFGAnalyses>(); + return PA; +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/SCCPSolver.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/SCCPSolver.cpp new file mode 100644 index 0000000000..8d03a0d8a2 --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/SCCPSolver.cpp @@ -0,0 +1,1922 @@ +//===- SCCPSolver.cpp - SCCP Utility --------------------------- *- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// \file +// This file implements the Sparse Conditional Constant Propagation (SCCP) +// utility. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/SCCPSolver.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/ValueLattice.h" +#include "llvm/Analysis/ValueLatticeUtils.h" +#include "llvm/IR/InstVisitor.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/Local.h" +#include <cassert> +#include <utility> +#include <vector> + +using namespace llvm; + +#define DEBUG_TYPE "sccp" + +// The maximum number of range extensions allowed for operations requiring +// widening. +static const unsigned MaxNumRangeExtensions = 10; + +/// Returns MergeOptions with MaxWidenSteps set to MaxNumRangeExtensions. +static ValueLatticeElement::MergeOptions getMaxWidenStepsOpts() { + return ValueLatticeElement::MergeOptions().setMaxWidenSteps( + MaxNumRangeExtensions); +} + +namespace llvm { + +bool SCCPSolver::isConstant(const ValueLatticeElement &LV) { + return LV.isConstant() || + (LV.isConstantRange() && LV.getConstantRange().isSingleElement()); +} + +bool SCCPSolver::isOverdefined(const ValueLatticeElement &LV) { + return !LV.isUnknownOrUndef() && !SCCPSolver::isConstant(LV); +} + +static bool canRemoveInstruction(Instruction *I) { + if (wouldInstructionBeTriviallyDead(I)) + return true; + + // Some instructions can be handled but are rejected above. Catch + // those cases by falling through to here. + // TODO: Mark globals as being constant earlier, so + // TODO: wouldInstructionBeTriviallyDead() knows that atomic loads + // TODO: are safe to remove. + return isa<LoadInst>(I); +} + +bool SCCPSolver::tryToReplaceWithConstant(Value *V) { + Constant *Const = nullptr; + if (V->getType()->isStructTy()) { + std::vector<ValueLatticeElement> IVs = getStructLatticeValueFor(V); + if (llvm::any_of(IVs, isOverdefined)) + return false; + std::vector<Constant *> ConstVals; + auto *ST = cast<StructType>(V->getType()); + for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) { + ValueLatticeElement V = IVs[i]; + ConstVals.push_back(SCCPSolver::isConstant(V) + ? getConstant(V) + : UndefValue::get(ST->getElementType(i))); + } + Const = ConstantStruct::get(ST, ConstVals); + } else { + const ValueLatticeElement &IV = getLatticeValueFor(V); + if (isOverdefined(IV)) + return false; + + Const = SCCPSolver::isConstant(IV) ? getConstant(IV) + : UndefValue::get(V->getType()); + } + assert(Const && "Constant is nullptr here!"); + + // Replacing `musttail` instructions with constant breaks `musttail` invariant + // unless the call itself can be removed. + // Calls with "clang.arc.attachedcall" implicitly use the return value and + // those uses cannot be updated with a constant. + CallBase *CB = dyn_cast<CallBase>(V); + if (CB && ((CB->isMustTailCall() && + !canRemoveInstruction(CB)) || + CB->getOperandBundle(LLVMContext::OB_clang_arc_attachedcall))) { + Function *F = CB->getCalledFunction(); + + // Don't zap returns of the callee + if (F) + addToMustPreserveReturnsInFunctions(F); + + LLVM_DEBUG(dbgs() << " Can\'t treat the result of call " << *CB + << " as a constant\n"); + return false; + } + + LLVM_DEBUG(dbgs() << " Constant: " << *Const << " = " << *V << '\n'); + + // Replaces all of the uses of a variable with uses of the constant. + V->replaceAllUsesWith(Const); + return true; +} + +/// Try to replace signed instructions with their unsigned equivalent. +static bool replaceSignedInst(SCCPSolver &Solver, + SmallPtrSetImpl<Value *> &InsertedValues, + Instruction &Inst) { + // Determine if a signed value is known to be >= 0. + auto isNonNegative = [&Solver](Value *V) { + // If this value was constant-folded, it may not have a solver entry. + // Handle integers. Otherwise, return false. + if (auto *C = dyn_cast<Constant>(V)) { + auto *CInt = dyn_cast<ConstantInt>(C); + return CInt && !CInt->isNegative(); + } + const ValueLatticeElement &IV = Solver.getLatticeValueFor(V); + return IV.isConstantRange(/*UndefAllowed=*/false) && + IV.getConstantRange().isAllNonNegative(); + }; + + Instruction *NewInst = nullptr; + switch (Inst.getOpcode()) { + // Note: We do not fold sitofp -> uitofp here because that could be more + // expensive in codegen and may not be reversible in the backend. + case Instruction::SExt: { + // If the source value is not negative, this is a zext. + Value *Op0 = Inst.getOperand(0); + if (InsertedValues.count(Op0) || !isNonNegative(Op0)) + return false; + NewInst = new ZExtInst(Op0, Inst.getType(), "", &Inst); + break; + } + case Instruction::AShr: { + // If the shifted value is not negative, this is a logical shift right. + Value *Op0 = Inst.getOperand(0); + if (InsertedValues.count(Op0) || !isNonNegative(Op0)) + return false; + NewInst = BinaryOperator::CreateLShr(Op0, Inst.getOperand(1), "", &Inst); + break; + } + case Instruction::SDiv: + case Instruction::SRem: { + // If both operands are not negative, this is the same as udiv/urem. + Value *Op0 = Inst.getOperand(0), *Op1 = Inst.getOperand(1); + if (InsertedValues.count(Op0) || InsertedValues.count(Op1) || + !isNonNegative(Op0) || !isNonNegative(Op1)) + return false; + auto NewOpcode = Inst.getOpcode() == Instruction::SDiv ? Instruction::UDiv + : Instruction::URem; + NewInst = BinaryOperator::Create(NewOpcode, Op0, Op1, "", &Inst); + break; + } + default: + return false; + } + + // Wire up the new instruction and update state. + assert(NewInst && "Expected replacement instruction"); + NewInst->takeName(&Inst); + InsertedValues.insert(NewInst); + Inst.replaceAllUsesWith(NewInst); + Solver.removeLatticeValueFor(&Inst); + Inst.eraseFromParent(); + return true; +} + +bool SCCPSolver::simplifyInstsInBlock(BasicBlock &BB, + SmallPtrSetImpl<Value *> &InsertedValues, + Statistic &InstRemovedStat, + Statistic &InstReplacedStat) { + bool MadeChanges = false; + for (Instruction &Inst : make_early_inc_range(BB)) { + if (Inst.getType()->isVoidTy()) + continue; + if (tryToReplaceWithConstant(&Inst)) { + if (canRemoveInstruction(&Inst)) + Inst.eraseFromParent(); + + MadeChanges = true; + ++InstRemovedStat; + } else if (replaceSignedInst(*this, InsertedValues, Inst)) { + MadeChanges = true; + ++InstReplacedStat; + } + } + return MadeChanges; +} + +bool SCCPSolver::removeNonFeasibleEdges(BasicBlock *BB, DomTreeUpdater &DTU, + BasicBlock *&NewUnreachableBB) const { + SmallPtrSet<BasicBlock *, 8> FeasibleSuccessors; + bool HasNonFeasibleEdges = false; + for (BasicBlock *Succ : successors(BB)) { + if (isEdgeFeasible(BB, Succ)) + FeasibleSuccessors.insert(Succ); + else + HasNonFeasibleEdges = true; + } + + // All edges feasible, nothing to do. + if (!HasNonFeasibleEdges) + return false; + + // SCCP can only determine non-feasible edges for br, switch and indirectbr. + Instruction *TI = BB->getTerminator(); + assert((isa<BranchInst>(TI) || isa<SwitchInst>(TI) || + isa<IndirectBrInst>(TI)) && + "Terminator must be a br, switch or indirectbr"); + + if (FeasibleSuccessors.size() == 0) { + // Branch on undef/poison, replace with unreachable. + SmallPtrSet<BasicBlock *, 8> SeenSuccs; + SmallVector<DominatorTree::UpdateType, 8> Updates; + for (BasicBlock *Succ : successors(BB)) { + Succ->removePredecessor(BB); + if (SeenSuccs.insert(Succ).second) + Updates.push_back({DominatorTree::Delete, BB, Succ}); + } + TI->eraseFromParent(); + new UnreachableInst(BB->getContext(), BB); + DTU.applyUpdatesPermissive(Updates); + } else if (FeasibleSuccessors.size() == 1) { + // Replace with an unconditional branch to the only feasible successor. + BasicBlock *OnlyFeasibleSuccessor = *FeasibleSuccessors.begin(); + SmallVector<DominatorTree::UpdateType, 8> Updates; + bool HaveSeenOnlyFeasibleSuccessor = false; + for (BasicBlock *Succ : successors(BB)) { + if (Succ == OnlyFeasibleSuccessor && !HaveSeenOnlyFeasibleSuccessor) { + // Don't remove the edge to the only feasible successor the first time + // we see it. We still do need to remove any multi-edges to it though. + HaveSeenOnlyFeasibleSuccessor = true; + continue; + } + + Succ->removePredecessor(BB); + Updates.push_back({DominatorTree::Delete, BB, Succ}); + } + + BranchInst::Create(OnlyFeasibleSuccessor, BB); + TI->eraseFromParent(); + DTU.applyUpdatesPermissive(Updates); + } else if (FeasibleSuccessors.size() > 1) { + SwitchInstProfUpdateWrapper SI(*cast<SwitchInst>(TI)); + SmallVector<DominatorTree::UpdateType, 8> Updates; + + // If the default destination is unfeasible it will never be taken. Replace + // it with a new block with a single Unreachable instruction. + BasicBlock *DefaultDest = SI->getDefaultDest(); + if (!FeasibleSuccessors.contains(DefaultDest)) { + if (!NewUnreachableBB) { + NewUnreachableBB = + BasicBlock::Create(DefaultDest->getContext(), "default.unreachable", + DefaultDest->getParent(), DefaultDest); + new UnreachableInst(DefaultDest->getContext(), NewUnreachableBB); + } + + SI->setDefaultDest(NewUnreachableBB); + Updates.push_back({DominatorTree::Delete, BB, DefaultDest}); + Updates.push_back({DominatorTree::Insert, BB, NewUnreachableBB}); + } + + for (auto CI = SI->case_begin(); CI != SI->case_end();) { + if (FeasibleSuccessors.contains(CI->getCaseSuccessor())) { + ++CI; + continue; + } + + BasicBlock *Succ = CI->getCaseSuccessor(); + Succ->removePredecessor(BB); + Updates.push_back({DominatorTree::Delete, BB, Succ}); + SI.removeCase(CI); + // Don't increment CI, as we removed a case. + } + + DTU.applyUpdatesPermissive(Updates); + } else { + llvm_unreachable("Must have at least one feasible successor"); + } + return true; +} + +/// Helper class for SCCPSolver. This implements the instruction visitor and +/// holds all the state. +class SCCPInstVisitor : public InstVisitor<SCCPInstVisitor> { + const DataLayout &DL; + std::function<const TargetLibraryInfo &(Function &)> GetTLI; + SmallPtrSet<BasicBlock *, 8> BBExecutable; // The BBs that are executable. + DenseMap<Value *, ValueLatticeElement> + ValueState; // The state each value is in. + + /// StructValueState - This maintains ValueState for values that have + /// StructType, for example for formal arguments, calls, insertelement, etc. + DenseMap<std::pair<Value *, unsigned>, ValueLatticeElement> StructValueState; + + /// GlobalValue - If we are tracking any values for the contents of a global + /// variable, we keep a mapping from the constant accessor to the element of + /// the global, to the currently known value. If the value becomes + /// overdefined, it's entry is simply removed from this map. + DenseMap<GlobalVariable *, ValueLatticeElement> TrackedGlobals; + + /// TrackedRetVals - If we are tracking arguments into and the return + /// value out of a function, it will have an entry in this map, indicating + /// what the known return value for the function is. + MapVector<Function *, ValueLatticeElement> TrackedRetVals; + + /// TrackedMultipleRetVals - Same as TrackedRetVals, but used for functions + /// that return multiple values. + MapVector<std::pair<Function *, unsigned>, ValueLatticeElement> + TrackedMultipleRetVals; + + /// MRVFunctionsTracked - Each function in TrackedMultipleRetVals is + /// represented here for efficient lookup. + SmallPtrSet<Function *, 16> MRVFunctionsTracked; + + /// A list of functions whose return cannot be modified. + SmallPtrSet<Function *, 16> MustPreserveReturnsInFunctions; + + /// TrackingIncomingArguments - This is the set of functions for whose + /// arguments we make optimistic assumptions about and try to prove as + /// constants. + SmallPtrSet<Function *, 16> TrackingIncomingArguments; + + /// The reason for two worklists is that overdefined is the lowest state + /// on the lattice, and moving things to overdefined as fast as possible + /// makes SCCP converge much faster. + /// + /// By having a separate worklist, we accomplish this because everything + /// possibly overdefined will become overdefined at the soonest possible + /// point. + SmallVector<Value *, 64> OverdefinedInstWorkList; + SmallVector<Value *, 64> InstWorkList; + + // The BasicBlock work list + SmallVector<BasicBlock *, 64> BBWorkList; + + /// KnownFeasibleEdges - Entries in this set are edges which have already had + /// PHI nodes retriggered. + using Edge = std::pair<BasicBlock *, BasicBlock *>; + DenseSet<Edge> KnownFeasibleEdges; + + DenseMap<Function *, AnalysisResultsForFn> AnalysisResults; + DenseMap<Value *, SmallPtrSet<User *, 2>> AdditionalUsers; + + LLVMContext &Ctx; + +private: + ConstantInt *getConstantInt(const ValueLatticeElement &IV) const { + return dyn_cast_or_null<ConstantInt>(getConstant(IV)); + } + + // pushToWorkList - Helper for markConstant/markOverdefined + void pushToWorkList(ValueLatticeElement &IV, Value *V); + + // Helper to push \p V to the worklist, after updating it to \p IV. Also + // prints a debug message with the updated value. + void pushToWorkListMsg(ValueLatticeElement &IV, Value *V); + + // markConstant - Make a value be marked as "constant". If the value + // is not already a constant, add it to the instruction work list so that + // the users of the instruction are updated later. + bool markConstant(ValueLatticeElement &IV, Value *V, Constant *C, + bool MayIncludeUndef = false); + + bool markConstant(Value *V, Constant *C) { + assert(!V->getType()->isStructTy() && "structs should use mergeInValue"); + return markConstant(ValueState[V], V, C); + } + + // markOverdefined - Make a value be marked as "overdefined". If the + // value is not already overdefined, add it to the overdefined instruction + // work list so that the users of the instruction are updated later. + bool markOverdefined(ValueLatticeElement &IV, Value *V); + + /// Merge \p MergeWithV into \p IV and push \p V to the worklist, if \p IV + /// changes. + bool mergeInValue(ValueLatticeElement &IV, Value *V, + ValueLatticeElement MergeWithV, + ValueLatticeElement::MergeOptions Opts = { + /*MayIncludeUndef=*/false, /*CheckWiden=*/false}); + + bool mergeInValue(Value *V, ValueLatticeElement MergeWithV, + ValueLatticeElement::MergeOptions Opts = { + /*MayIncludeUndef=*/false, /*CheckWiden=*/false}) { + assert(!V->getType()->isStructTy() && + "non-structs should use markConstant"); + return mergeInValue(ValueState[V], V, MergeWithV, Opts); + } + + /// getValueState - Return the ValueLatticeElement object that corresponds to + /// the value. This function handles the case when the value hasn't been seen + /// yet by properly seeding constants etc. + ValueLatticeElement &getValueState(Value *V) { + assert(!V->getType()->isStructTy() && "Should use getStructValueState"); + + auto I = ValueState.insert(std::make_pair(V, ValueLatticeElement())); + ValueLatticeElement &LV = I.first->second; + + if (!I.second) + return LV; // Common case, already in the map. + + if (auto *C = dyn_cast<Constant>(V)) + LV.markConstant(C); // Constants are constant + + // All others are unknown by default. + return LV; + } + + /// getStructValueState - Return the ValueLatticeElement object that + /// corresponds to the value/field pair. This function handles the case when + /// the value hasn't been seen yet by properly seeding constants etc. + ValueLatticeElement &getStructValueState(Value *V, unsigned i) { + assert(V->getType()->isStructTy() && "Should use getValueState"); + assert(i < cast<StructType>(V->getType())->getNumElements() && + "Invalid element #"); + + auto I = StructValueState.insert( + std::make_pair(std::make_pair(V, i), ValueLatticeElement())); + ValueLatticeElement &LV = I.first->second; + + if (!I.second) + return LV; // Common case, already in the map. + + if (auto *C = dyn_cast<Constant>(V)) { + Constant *Elt = C->getAggregateElement(i); + + if (!Elt) + LV.markOverdefined(); // Unknown sort of constant. + else + LV.markConstant(Elt); // Constants are constant. + } + + // All others are underdefined by default. + return LV; + } + + /// markEdgeExecutable - Mark a basic block as executable, adding it to the BB + /// work list if it is not already executable. + bool markEdgeExecutable(BasicBlock *Source, BasicBlock *Dest); + + // getFeasibleSuccessors - Return a vector of booleans to indicate which + // successors are reachable from a given terminator instruction. + void getFeasibleSuccessors(Instruction &TI, SmallVectorImpl<bool> &Succs); + + // OperandChangedState - This method is invoked on all of the users of an + // instruction that was just changed state somehow. Based on this + // information, we need to update the specified user of this instruction. + void operandChangedState(Instruction *I) { + if (BBExecutable.count(I->getParent())) // Inst is executable? + visit(*I); + } + + // Add U as additional user of V. + void addAdditionalUser(Value *V, User *U) { + auto Iter = AdditionalUsers.insert({V, {}}); + Iter.first->second.insert(U); + } + + // Mark I's users as changed, including AdditionalUsers. + void markUsersAsChanged(Value *I) { + // Functions include their arguments in the use-list. Changed function + // values mean that the result of the function changed. We only need to + // update the call sites with the new function result and do not have to + // propagate the call arguments. + if (isa<Function>(I)) { + for (User *U : I->users()) { + if (auto *CB = dyn_cast<CallBase>(U)) + handleCallResult(*CB); + } + } else { + for (User *U : I->users()) + if (auto *UI = dyn_cast<Instruction>(U)) + operandChangedState(UI); + } + + auto Iter = AdditionalUsers.find(I); + if (Iter != AdditionalUsers.end()) { + // Copy additional users before notifying them of changes, because new + // users may be added, potentially invalidating the iterator. + SmallVector<Instruction *, 2> ToNotify; + for (User *U : Iter->second) + if (auto *UI = dyn_cast<Instruction>(U)) + ToNotify.push_back(UI); + for (Instruction *UI : ToNotify) + operandChangedState(UI); + } + } + void handleCallOverdefined(CallBase &CB); + void handleCallResult(CallBase &CB); + void handleCallArguments(CallBase &CB); + void handleExtractOfWithOverflow(ExtractValueInst &EVI, + const WithOverflowInst *WO, unsigned Idx); + +private: + friend class InstVisitor<SCCPInstVisitor>; + + // visit implementations - Something changed in this instruction. Either an + // operand made a transition, or the instruction is newly executable. Change + // the value type of I to reflect these changes if appropriate. + void visitPHINode(PHINode &I); + + // Terminators + + void visitReturnInst(ReturnInst &I); + void visitTerminator(Instruction &TI); + + void visitCastInst(CastInst &I); + void visitSelectInst(SelectInst &I); + void visitUnaryOperator(Instruction &I); + void visitBinaryOperator(Instruction &I); + void visitCmpInst(CmpInst &I); + void visitExtractValueInst(ExtractValueInst &EVI); + void visitInsertValueInst(InsertValueInst &IVI); + + void visitCatchSwitchInst(CatchSwitchInst &CPI) { + markOverdefined(&CPI); + visitTerminator(CPI); + } + + // Instructions that cannot be folded away. + + void visitStoreInst(StoreInst &I); + void visitLoadInst(LoadInst &I); + void visitGetElementPtrInst(GetElementPtrInst &I); + + void visitInvokeInst(InvokeInst &II) { + visitCallBase(II); + visitTerminator(II); + } + + void visitCallBrInst(CallBrInst &CBI) { + visitCallBase(CBI); + visitTerminator(CBI); + } + + void visitCallBase(CallBase &CB); + void visitResumeInst(ResumeInst &I) { /*returns void*/ + } + void visitUnreachableInst(UnreachableInst &I) { /*returns void*/ + } + void visitFenceInst(FenceInst &I) { /*returns void*/ + } + + void visitInstruction(Instruction &I); + +public: + void addAnalysis(Function &F, AnalysisResultsForFn A) { + AnalysisResults.insert({&F, std::move(A)}); + } + + void visitCallInst(CallInst &I) { visitCallBase(I); } + + bool markBlockExecutable(BasicBlock *BB); + + const PredicateBase *getPredicateInfoFor(Instruction *I) { + auto A = AnalysisResults.find(I->getParent()->getParent()); + if (A == AnalysisResults.end()) + return nullptr; + return A->second.PredInfo->getPredicateInfoFor(I); + } + + const LoopInfo &getLoopInfo(Function &F) { + auto A = AnalysisResults.find(&F); + assert(A != AnalysisResults.end() && A->second.LI && + "Need LoopInfo analysis results for function."); + return *A->second.LI; + } + + DomTreeUpdater getDTU(Function &F) { + auto A = AnalysisResults.find(&F); + assert(A != AnalysisResults.end() && "Need analysis results for function."); + return {A->second.DT, A->second.PDT, DomTreeUpdater::UpdateStrategy::Lazy}; + } + + SCCPInstVisitor(const DataLayout &DL, + std::function<const TargetLibraryInfo &(Function &)> GetTLI, + LLVMContext &Ctx) + : DL(DL), GetTLI(GetTLI), Ctx(Ctx) {} + + void trackValueOfGlobalVariable(GlobalVariable *GV) { + // We only track the contents of scalar globals. + if (GV->getValueType()->isSingleValueType()) { + ValueLatticeElement &IV = TrackedGlobals[GV]; + IV.markConstant(GV->getInitializer()); + } + } + + void addTrackedFunction(Function *F) { + // Add an entry, F -> undef. + if (auto *STy = dyn_cast<StructType>(F->getReturnType())) { + MRVFunctionsTracked.insert(F); + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) + TrackedMultipleRetVals.insert( + std::make_pair(std::make_pair(F, i), ValueLatticeElement())); + } else if (!F->getReturnType()->isVoidTy()) + TrackedRetVals.insert(std::make_pair(F, ValueLatticeElement())); + } + + void addToMustPreserveReturnsInFunctions(Function *F) { + MustPreserveReturnsInFunctions.insert(F); + } + + bool mustPreserveReturn(Function *F) { + return MustPreserveReturnsInFunctions.count(F); + } + + void addArgumentTrackedFunction(Function *F) { + TrackingIncomingArguments.insert(F); + } + + bool isArgumentTrackedFunction(Function *F) { + return TrackingIncomingArguments.count(F); + } + + void solve(); + + bool resolvedUndefsIn(Function &F); + + bool isBlockExecutable(BasicBlock *BB) const { + return BBExecutable.count(BB); + } + + bool isEdgeFeasible(BasicBlock *From, BasicBlock *To) const; + + std::vector<ValueLatticeElement> getStructLatticeValueFor(Value *V) const { + std::vector<ValueLatticeElement> StructValues; + auto *STy = dyn_cast<StructType>(V->getType()); + assert(STy && "getStructLatticeValueFor() can be called only on structs"); + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + auto I = StructValueState.find(std::make_pair(V, i)); + assert(I != StructValueState.end() && "Value not in valuemap!"); + StructValues.push_back(I->second); + } + return StructValues; + } + + void removeLatticeValueFor(Value *V) { ValueState.erase(V); } + + const ValueLatticeElement &getLatticeValueFor(Value *V) const { + assert(!V->getType()->isStructTy() && + "Should use getStructLatticeValueFor"); + DenseMap<Value *, ValueLatticeElement>::const_iterator I = + ValueState.find(V); + assert(I != ValueState.end() && + "V not found in ValueState nor Paramstate map!"); + return I->second; + } + + const MapVector<Function *, ValueLatticeElement> &getTrackedRetVals() { + return TrackedRetVals; + } + + const DenseMap<GlobalVariable *, ValueLatticeElement> &getTrackedGlobals() { + return TrackedGlobals; + } + + const SmallPtrSet<Function *, 16> getMRVFunctionsTracked() { + return MRVFunctionsTracked; + } + + void markOverdefined(Value *V) { + if (auto *STy = dyn_cast<StructType>(V->getType())) + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) + markOverdefined(getStructValueState(V, i), V); + else + markOverdefined(ValueState[V], V); + } + + bool isStructLatticeConstant(Function *F, StructType *STy); + + Constant *getConstant(const ValueLatticeElement &LV) const; + ConstantRange getConstantRange(const ValueLatticeElement &LV, Type *Ty) const; + + SmallPtrSetImpl<Function *> &getArgumentTrackedFunctions() { + return TrackingIncomingArguments; + } + + void markArgInFuncSpecialization(Function *F, + const SmallVectorImpl<ArgInfo> &Args); + + void markFunctionUnreachable(Function *F) { + for (auto &BB : *F) + BBExecutable.erase(&BB); + } + + void solveWhileResolvedUndefsIn(Module &M) { + bool ResolvedUndefs = true; + while (ResolvedUndefs) { + solve(); + ResolvedUndefs = false; + for (Function &F : M) + ResolvedUndefs |= resolvedUndefsIn(F); + } + } + + void solveWhileResolvedUndefsIn(SmallVectorImpl<Function *> &WorkList) { + bool ResolvedUndefs = true; + while (ResolvedUndefs) { + solve(); + ResolvedUndefs = false; + for (Function *F : WorkList) + ResolvedUndefs |= resolvedUndefsIn(*F); + } + } +}; + +} // namespace llvm + +bool SCCPInstVisitor::markBlockExecutable(BasicBlock *BB) { + if (!BBExecutable.insert(BB).second) + return false; + LLVM_DEBUG(dbgs() << "Marking Block Executable: " << BB->getName() << '\n'); + BBWorkList.push_back(BB); // Add the block to the work list! + return true; +} + +void SCCPInstVisitor::pushToWorkList(ValueLatticeElement &IV, Value *V) { + if (IV.isOverdefined()) + return OverdefinedInstWorkList.push_back(V); + InstWorkList.push_back(V); +} + +void SCCPInstVisitor::pushToWorkListMsg(ValueLatticeElement &IV, Value *V) { + LLVM_DEBUG(dbgs() << "updated " << IV << ": " << *V << '\n'); + pushToWorkList(IV, V); +} + +bool SCCPInstVisitor::markConstant(ValueLatticeElement &IV, Value *V, + Constant *C, bool MayIncludeUndef) { + if (!IV.markConstant(C, MayIncludeUndef)) + return false; + LLVM_DEBUG(dbgs() << "markConstant: " << *C << ": " << *V << '\n'); + pushToWorkList(IV, V); + return true; +} + +bool SCCPInstVisitor::markOverdefined(ValueLatticeElement &IV, Value *V) { + if (!IV.markOverdefined()) + return false; + + LLVM_DEBUG(dbgs() << "markOverdefined: "; + if (auto *F = dyn_cast<Function>(V)) dbgs() + << "Function '" << F->getName() << "'\n"; + else dbgs() << *V << '\n'); + // Only instructions go on the work list + pushToWorkList(IV, V); + return true; +} + +bool SCCPInstVisitor::isStructLatticeConstant(Function *F, StructType *STy) { + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + const auto &It = TrackedMultipleRetVals.find(std::make_pair(F, i)); + assert(It != TrackedMultipleRetVals.end()); + ValueLatticeElement LV = It->second; + if (!SCCPSolver::isConstant(LV)) + return false; + } + return true; +} + +Constant *SCCPInstVisitor::getConstant(const ValueLatticeElement &LV) const { + if (LV.isConstant()) + return LV.getConstant(); + + if (LV.isConstantRange()) { + const auto &CR = LV.getConstantRange(); + if (CR.getSingleElement()) + return ConstantInt::get(Ctx, *CR.getSingleElement()); + } + return nullptr; +} + +ConstantRange +SCCPInstVisitor::getConstantRange(const ValueLatticeElement &LV, + Type *Ty) const { + assert(Ty->isIntOrIntVectorTy() && "Should be int or int vector"); + if (LV.isConstantRange()) + return LV.getConstantRange(); + return ConstantRange::getFull(Ty->getScalarSizeInBits()); +} + +void SCCPInstVisitor::markArgInFuncSpecialization( + Function *F, const SmallVectorImpl<ArgInfo> &Args) { + assert(!Args.empty() && "Specialization without arguments"); + assert(F->arg_size() == Args[0].Formal->getParent()->arg_size() && + "Functions should have the same number of arguments"); + + auto Iter = Args.begin(); + Argument *NewArg = F->arg_begin(); + Argument *OldArg = Args[0].Formal->getParent()->arg_begin(); + for (auto End = F->arg_end(); NewArg != End; ++NewArg, ++OldArg) { + + LLVM_DEBUG(dbgs() << "SCCP: Marking argument " + << NewArg->getNameOrAsOperand() << "\n"); + + if (Iter != Args.end() && OldArg == Iter->Formal) { + // Mark the argument constants in the new function. + markConstant(NewArg, Iter->Actual); + ++Iter; + } else if (ValueState.count(OldArg)) { + // For the remaining arguments in the new function, copy the lattice state + // over from the old function. + // + // Note: This previously looked like this: + // ValueState[NewArg] = ValueState[OldArg]; + // This is incorrect because the DenseMap class may resize the underlying + // memory when inserting `NewArg`, which will invalidate the reference to + // `OldArg`. Instead, we make sure `NewArg` exists before setting it. + auto &NewValue = ValueState[NewArg]; + NewValue = ValueState[OldArg]; + pushToWorkList(NewValue, NewArg); + } + } +} + +void SCCPInstVisitor::visitInstruction(Instruction &I) { + // All the instructions we don't do any special handling for just + // go to overdefined. + LLVM_DEBUG(dbgs() << "SCCP: Don't know how to handle: " << I << '\n'); + markOverdefined(&I); +} + +bool SCCPInstVisitor::mergeInValue(ValueLatticeElement &IV, Value *V, + ValueLatticeElement MergeWithV, + ValueLatticeElement::MergeOptions Opts) { + if (IV.mergeIn(MergeWithV, Opts)) { + pushToWorkList(IV, V); + LLVM_DEBUG(dbgs() << "Merged " << MergeWithV << " into " << *V << " : " + << IV << "\n"); + return true; + } + return false; +} + +bool SCCPInstVisitor::markEdgeExecutable(BasicBlock *Source, BasicBlock *Dest) { + if (!KnownFeasibleEdges.insert(Edge(Source, Dest)).second) + return false; // This edge is already known to be executable! + + if (!markBlockExecutable(Dest)) { + // If the destination is already executable, we just made an *edge* + // feasible that wasn't before. Revisit the PHI nodes in the block + // because they have potentially new operands. + LLVM_DEBUG(dbgs() << "Marking Edge Executable: " << Source->getName() + << " -> " << Dest->getName() << '\n'); + + for (PHINode &PN : Dest->phis()) + visitPHINode(PN); + } + return true; +} + +// getFeasibleSuccessors - Return a vector of booleans to indicate which +// successors are reachable from a given terminator instruction. +void SCCPInstVisitor::getFeasibleSuccessors(Instruction &TI, + SmallVectorImpl<bool> &Succs) { + Succs.resize(TI.getNumSuccessors()); + if (auto *BI = dyn_cast<BranchInst>(&TI)) { + if (BI->isUnconditional()) { + Succs[0] = true; + return; + } + + ValueLatticeElement BCValue = getValueState(BI->getCondition()); + ConstantInt *CI = getConstantInt(BCValue); + if (!CI) { + // Overdefined condition variables, and branches on unfoldable constant + // conditions, mean the branch could go either way. + if (!BCValue.isUnknownOrUndef()) + Succs[0] = Succs[1] = true; + return; + } + + // Constant condition variables mean the branch can only go a single way. + Succs[CI->isZero()] = true; + return; + } + + // Unwinding instructions successors are always executable. + if (TI.isExceptionalTerminator()) { + Succs.assign(TI.getNumSuccessors(), true); + return; + } + + if (auto *SI = dyn_cast<SwitchInst>(&TI)) { + if (!SI->getNumCases()) { + Succs[0] = true; + return; + } + const ValueLatticeElement &SCValue = getValueState(SI->getCondition()); + if (ConstantInt *CI = getConstantInt(SCValue)) { + Succs[SI->findCaseValue(CI)->getSuccessorIndex()] = true; + return; + } + + // TODO: Switch on undef is UB. Stop passing false once the rest of LLVM + // is ready. + if (SCValue.isConstantRange(/*UndefAllowed=*/false)) { + const ConstantRange &Range = SCValue.getConstantRange(); + for (const auto &Case : SI->cases()) { + const APInt &CaseValue = Case.getCaseValue()->getValue(); + if (Range.contains(CaseValue)) + Succs[Case.getSuccessorIndex()] = true; + } + + // TODO: Determine whether default case is reachable. + Succs[SI->case_default()->getSuccessorIndex()] = true; + return; + } + + // Overdefined or unknown condition? All destinations are executable! + if (!SCValue.isUnknownOrUndef()) + Succs.assign(TI.getNumSuccessors(), true); + return; + } + + // In case of indirect branch and its address is a blockaddress, we mark + // the target as executable. + if (auto *IBR = dyn_cast<IndirectBrInst>(&TI)) { + // Casts are folded by visitCastInst. + ValueLatticeElement IBRValue = getValueState(IBR->getAddress()); + BlockAddress *Addr = dyn_cast_or_null<BlockAddress>(getConstant(IBRValue)); + if (!Addr) { // Overdefined or unknown condition? + // All destinations are executable! + if (!IBRValue.isUnknownOrUndef()) + Succs.assign(TI.getNumSuccessors(), true); + return; + } + + BasicBlock *T = Addr->getBasicBlock(); + assert(Addr->getFunction() == T->getParent() && + "Block address of a different function ?"); + for (unsigned i = 0; i < IBR->getNumSuccessors(); ++i) { + // This is the target. + if (IBR->getDestination(i) == T) { + Succs[i] = true; + return; + } + } + + // If we didn't find our destination in the IBR successor list, then we + // have undefined behavior. Its ok to assume no successor is executable. + return; + } + + // In case of callbr, we pessimistically assume that all successors are + // feasible. + if (isa<CallBrInst>(&TI)) { + Succs.assign(TI.getNumSuccessors(), true); + return; + } + + LLVM_DEBUG(dbgs() << "Unknown terminator instruction: " << TI << '\n'); + llvm_unreachable("SCCP: Don't know how to handle this terminator!"); +} + +// isEdgeFeasible - Return true if the control flow edge from the 'From' basic +// block to the 'To' basic block is currently feasible. +bool SCCPInstVisitor::isEdgeFeasible(BasicBlock *From, BasicBlock *To) const { + // Check if we've called markEdgeExecutable on the edge yet. (We could + // be more aggressive and try to consider edges which haven't been marked + // yet, but there isn't any need.) + return KnownFeasibleEdges.count(Edge(From, To)); +} + +// visit Implementations - Something changed in this instruction, either an +// operand made a transition, or the instruction is newly executable. Change +// the value type of I to reflect these changes if appropriate. This method +// makes sure to do the following actions: +// +// 1. If a phi node merges two constants in, and has conflicting value coming +// from different branches, or if the PHI node merges in an overdefined +// value, then the PHI node becomes overdefined. +// 2. If a phi node merges only constants in, and they all agree on value, the +// PHI node becomes a constant value equal to that. +// 3. If V <- x (op) y && isConstant(x) && isConstant(y) V = Constant +// 4. If V <- x (op) y && (isOverdefined(x) || isOverdefined(y)) V = Overdefined +// 5. If V <- MEM or V <- CALL or V <- (unknown) then V = Overdefined +// 6. If a conditional branch has a value that is constant, make the selected +// destination executable +// 7. If a conditional branch has a value that is overdefined, make all +// successors executable. +void SCCPInstVisitor::visitPHINode(PHINode &PN) { + // If this PN returns a struct, just mark the result overdefined. + // TODO: We could do a lot better than this if code actually uses this. + if (PN.getType()->isStructTy()) + return (void)markOverdefined(&PN); + + if (getValueState(&PN).isOverdefined()) + return; // Quick exit + + // Super-extra-high-degree PHI nodes are unlikely to ever be marked constant, + // and slow us down a lot. Just mark them overdefined. + if (PN.getNumIncomingValues() > 64) + return (void)markOverdefined(&PN); + + unsigned NumActiveIncoming = 0; + + // Look at all of the executable operands of the PHI node. If any of them + // are overdefined, the PHI becomes overdefined as well. If they are all + // constant, and they agree with each other, the PHI becomes the identical + // constant. If they are constant and don't agree, the PHI is a constant + // range. If there are no executable operands, the PHI remains unknown. + ValueLatticeElement PhiState = getValueState(&PN); + for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) { + if (!isEdgeFeasible(PN.getIncomingBlock(i), PN.getParent())) + continue; + + ValueLatticeElement IV = getValueState(PN.getIncomingValue(i)); + PhiState.mergeIn(IV); + NumActiveIncoming++; + if (PhiState.isOverdefined()) + break; + } + + // We allow up to 1 range extension per active incoming value and one + // additional extension. Note that we manually adjust the number of range + // extensions to match the number of active incoming values. This helps to + // limit multiple extensions caused by the same incoming value, if other + // incoming values are equal. + mergeInValue(&PN, PhiState, + ValueLatticeElement::MergeOptions().setMaxWidenSteps( + NumActiveIncoming + 1)); + ValueLatticeElement &PhiStateRef = getValueState(&PN); + PhiStateRef.setNumRangeExtensions( + std::max(NumActiveIncoming, PhiStateRef.getNumRangeExtensions())); +} + +void SCCPInstVisitor::visitReturnInst(ReturnInst &I) { + if (I.getNumOperands() == 0) + return; // ret void + + Function *F = I.getParent()->getParent(); + Value *ResultOp = I.getOperand(0); + + // If we are tracking the return value of this function, merge it in. + if (!TrackedRetVals.empty() && !ResultOp->getType()->isStructTy()) { + auto TFRVI = TrackedRetVals.find(F); + if (TFRVI != TrackedRetVals.end()) { + mergeInValue(TFRVI->second, F, getValueState(ResultOp)); + return; + } + } + + // Handle functions that return multiple values. + if (!TrackedMultipleRetVals.empty()) { + if (auto *STy = dyn_cast<StructType>(ResultOp->getType())) + if (MRVFunctionsTracked.count(F)) + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) + mergeInValue(TrackedMultipleRetVals[std::make_pair(F, i)], F, + getStructValueState(ResultOp, i)); + } +} + +void SCCPInstVisitor::visitTerminator(Instruction &TI) { + SmallVector<bool, 16> SuccFeasible; + getFeasibleSuccessors(TI, SuccFeasible); + + BasicBlock *BB = TI.getParent(); + + // Mark all feasible successors executable. + for (unsigned i = 0, e = SuccFeasible.size(); i != e; ++i) + if (SuccFeasible[i]) + markEdgeExecutable(BB, TI.getSuccessor(i)); +} + +void SCCPInstVisitor::visitCastInst(CastInst &I) { + // ResolvedUndefsIn might mark I as overdefined. Bail out, even if we would + // discover a concrete value later. + if (ValueState[&I].isOverdefined()) + return; + + ValueLatticeElement OpSt = getValueState(I.getOperand(0)); + if (OpSt.isUnknownOrUndef()) + return; + + if (Constant *OpC = getConstant(OpSt)) { + // Fold the constant as we build. + Constant *C = ConstantFoldCastOperand(I.getOpcode(), OpC, I.getType(), DL); + markConstant(&I, C); + } else if (I.getDestTy()->isIntegerTy() && + I.getSrcTy()->isIntOrIntVectorTy()) { + auto &LV = getValueState(&I); + ConstantRange OpRange = getConstantRange(OpSt, I.getSrcTy()); + + Type *DestTy = I.getDestTy(); + // Vectors where all elements have the same known constant range are treated + // as a single constant range in the lattice. When bitcasting such vectors, + // there is a mis-match between the width of the lattice value (single + // constant range) and the original operands (vector). Go to overdefined in + // that case. + if (I.getOpcode() == Instruction::BitCast && + I.getOperand(0)->getType()->isVectorTy() && + OpRange.getBitWidth() < DL.getTypeSizeInBits(DestTy)) + return (void)markOverdefined(&I); + + ConstantRange Res = + OpRange.castOp(I.getOpcode(), DL.getTypeSizeInBits(DestTy)); + mergeInValue(LV, &I, ValueLatticeElement::getRange(Res)); + } else + markOverdefined(&I); +} + +void SCCPInstVisitor::handleExtractOfWithOverflow(ExtractValueInst &EVI, + const WithOverflowInst *WO, + unsigned Idx) { + Value *LHS = WO->getLHS(), *RHS = WO->getRHS(); + ValueLatticeElement L = getValueState(LHS); + ValueLatticeElement R = getValueState(RHS); + addAdditionalUser(LHS, &EVI); + addAdditionalUser(RHS, &EVI); + if (L.isUnknownOrUndef() || R.isUnknownOrUndef()) + return; // Wait to resolve. + + Type *Ty = LHS->getType(); + ConstantRange LR = getConstantRange(L, Ty); + ConstantRange RR = getConstantRange(R, Ty); + if (Idx == 0) { + ConstantRange Res = LR.binaryOp(WO->getBinaryOp(), RR); + mergeInValue(&EVI, ValueLatticeElement::getRange(Res)); + } else { + assert(Idx == 1 && "Index can only be 0 or 1"); + ConstantRange NWRegion = ConstantRange::makeGuaranteedNoWrapRegion( + WO->getBinaryOp(), RR, WO->getNoWrapKind()); + if (NWRegion.contains(LR)) + return (void)markConstant(&EVI, ConstantInt::getFalse(EVI.getType())); + markOverdefined(&EVI); + } +} + +void SCCPInstVisitor::visitExtractValueInst(ExtractValueInst &EVI) { + // If this returns a struct, mark all elements over defined, we don't track + // structs in structs. + if (EVI.getType()->isStructTy()) + return (void)markOverdefined(&EVI); + + // resolvedUndefsIn might mark I as overdefined. Bail out, even if we would + // discover a concrete value later. + if (ValueState[&EVI].isOverdefined()) + return (void)markOverdefined(&EVI); + + // If this is extracting from more than one level of struct, we don't know. + if (EVI.getNumIndices() != 1) + return (void)markOverdefined(&EVI); + + Value *AggVal = EVI.getAggregateOperand(); + if (AggVal->getType()->isStructTy()) { + unsigned i = *EVI.idx_begin(); + if (auto *WO = dyn_cast<WithOverflowInst>(AggVal)) + return handleExtractOfWithOverflow(EVI, WO, i); + ValueLatticeElement EltVal = getStructValueState(AggVal, i); + mergeInValue(getValueState(&EVI), &EVI, EltVal); + } else { + // Otherwise, must be extracting from an array. + return (void)markOverdefined(&EVI); + } +} + +void SCCPInstVisitor::visitInsertValueInst(InsertValueInst &IVI) { + auto *STy = dyn_cast<StructType>(IVI.getType()); + if (!STy) + return (void)markOverdefined(&IVI); + + // resolvedUndefsIn might mark I as overdefined. Bail out, even if we would + // discover a concrete value later. + if (SCCPSolver::isOverdefined(ValueState[&IVI])) + return (void)markOverdefined(&IVI); + + // If this has more than one index, we can't handle it, drive all results to + // undef. + if (IVI.getNumIndices() != 1) + return (void)markOverdefined(&IVI); + + Value *Aggr = IVI.getAggregateOperand(); + unsigned Idx = *IVI.idx_begin(); + + // Compute the result based on what we're inserting. + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + // This passes through all values that aren't the inserted element. + if (i != Idx) { + ValueLatticeElement EltVal = getStructValueState(Aggr, i); + mergeInValue(getStructValueState(&IVI, i), &IVI, EltVal); + continue; + } + + Value *Val = IVI.getInsertedValueOperand(); + if (Val->getType()->isStructTy()) + // We don't track structs in structs. + markOverdefined(getStructValueState(&IVI, i), &IVI); + else { + ValueLatticeElement InVal = getValueState(Val); + mergeInValue(getStructValueState(&IVI, i), &IVI, InVal); + } + } +} + +void SCCPInstVisitor::visitSelectInst(SelectInst &I) { + // If this select returns a struct, just mark the result overdefined. + // TODO: We could do a lot better than this if code actually uses this. + if (I.getType()->isStructTy()) + return (void)markOverdefined(&I); + + // resolvedUndefsIn might mark I as overdefined. Bail out, even if we would + // discover a concrete value later. + if (ValueState[&I].isOverdefined()) + return (void)markOverdefined(&I); + + ValueLatticeElement CondValue = getValueState(I.getCondition()); + if (CondValue.isUnknownOrUndef()) + return; + + if (ConstantInt *CondCB = getConstantInt(CondValue)) { + Value *OpVal = CondCB->isZero() ? I.getFalseValue() : I.getTrueValue(); + mergeInValue(&I, getValueState(OpVal)); + return; + } + + // Otherwise, the condition is overdefined or a constant we can't evaluate. + // See if we can produce something better than overdefined based on the T/F + // value. + ValueLatticeElement TVal = getValueState(I.getTrueValue()); + ValueLatticeElement FVal = getValueState(I.getFalseValue()); + + bool Changed = ValueState[&I].mergeIn(TVal); + Changed |= ValueState[&I].mergeIn(FVal); + if (Changed) + pushToWorkListMsg(ValueState[&I], &I); +} + +// Handle Unary Operators. +void SCCPInstVisitor::visitUnaryOperator(Instruction &I) { + ValueLatticeElement V0State = getValueState(I.getOperand(0)); + + ValueLatticeElement &IV = ValueState[&I]; + // resolvedUndefsIn might mark I as overdefined. Bail out, even if we would + // discover a concrete value later. + if (SCCPSolver::isOverdefined(IV)) + return (void)markOverdefined(&I); + + // If something is unknown/undef, wait for it to resolve. + if (V0State.isUnknownOrUndef()) + return; + + if (SCCPSolver::isConstant(V0State)) + if (Constant *C = ConstantFoldUnaryOpOperand(I.getOpcode(), + getConstant(V0State), DL)) + return (void)markConstant(IV, &I, C); + + markOverdefined(&I); +} + +// Handle Binary Operators. +void SCCPInstVisitor::visitBinaryOperator(Instruction &I) { + ValueLatticeElement V1State = getValueState(I.getOperand(0)); + ValueLatticeElement V2State = getValueState(I.getOperand(1)); + + ValueLatticeElement &IV = ValueState[&I]; + if (IV.isOverdefined()) + return; + + // If something is undef, wait for it to resolve. + if (V1State.isUnknownOrUndef() || V2State.isUnknownOrUndef()) + return; + + if (V1State.isOverdefined() && V2State.isOverdefined()) + return (void)markOverdefined(&I); + + // If either of the operands is a constant, try to fold it to a constant. + // TODO: Use information from notconstant better. + if ((V1State.isConstant() || V2State.isConstant())) { + Value *V1 = SCCPSolver::isConstant(V1State) ? getConstant(V1State) + : I.getOperand(0); + Value *V2 = SCCPSolver::isConstant(V2State) ? getConstant(V2State) + : I.getOperand(1); + Value *R = simplifyBinOp(I.getOpcode(), V1, V2, SimplifyQuery(DL)); + auto *C = dyn_cast_or_null<Constant>(R); + if (C) { + // Conservatively assume that the result may be based on operands that may + // be undef. Note that we use mergeInValue to combine the constant with + // the existing lattice value for I, as different constants might be found + // after one of the operands go to overdefined, e.g. due to one operand + // being a special floating value. + ValueLatticeElement NewV; + NewV.markConstant(C, /*MayIncludeUndef=*/true); + return (void)mergeInValue(&I, NewV); + } + } + + // Only use ranges for binary operators on integers. + if (!I.getType()->isIntegerTy()) + return markOverdefined(&I); + + // Try to simplify to a constant range. + ConstantRange A = getConstantRange(V1State, I.getType()); + ConstantRange B = getConstantRange(V2State, I.getType()); + ConstantRange R = A.binaryOp(cast<BinaryOperator>(&I)->getOpcode(), B); + mergeInValue(&I, ValueLatticeElement::getRange(R)); + + // TODO: Currently we do not exploit special values that produce something + // better than overdefined with an overdefined operand for vector or floating + // point types, like and <4 x i32> overdefined, zeroinitializer. +} + +// Handle ICmpInst instruction. +void SCCPInstVisitor::visitCmpInst(CmpInst &I) { + // Do not cache this lookup, getValueState calls later in the function might + // invalidate the reference. + if (SCCPSolver::isOverdefined(ValueState[&I])) + return (void)markOverdefined(&I); + + Value *Op1 = I.getOperand(0); + Value *Op2 = I.getOperand(1); + + // For parameters, use ParamState which includes constant range info if + // available. + auto V1State = getValueState(Op1); + auto V2State = getValueState(Op2); + + Constant *C = V1State.getCompare(I.getPredicate(), I.getType(), V2State, DL); + if (C) { + ValueLatticeElement CV; + CV.markConstant(C); + mergeInValue(&I, CV); + return; + } + + // If operands are still unknown, wait for it to resolve. + if ((V1State.isUnknownOrUndef() || V2State.isUnknownOrUndef()) && + !SCCPSolver::isConstant(ValueState[&I])) + return; + + markOverdefined(&I); +} + +// Handle getelementptr instructions. If all operands are constants then we +// can turn this into a getelementptr ConstantExpr. +void SCCPInstVisitor::visitGetElementPtrInst(GetElementPtrInst &I) { + if (SCCPSolver::isOverdefined(ValueState[&I])) + return (void)markOverdefined(&I); + + SmallVector<Constant *, 8> Operands; + Operands.reserve(I.getNumOperands()); + + for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) { + ValueLatticeElement State = getValueState(I.getOperand(i)); + if (State.isUnknownOrUndef()) + return; // Operands are not resolved yet. + + if (SCCPSolver::isOverdefined(State)) + return (void)markOverdefined(&I); + + if (Constant *C = getConstant(State)) { + Operands.push_back(C); + continue; + } + + return (void)markOverdefined(&I); + } + + Constant *Ptr = Operands[0]; + auto Indices = ArrayRef(Operands.begin() + 1, Operands.end()); + Constant *C = + ConstantExpr::getGetElementPtr(I.getSourceElementType(), Ptr, Indices); + markConstant(&I, C); +} + +void SCCPInstVisitor::visitStoreInst(StoreInst &SI) { + // If this store is of a struct, ignore it. + if (SI.getOperand(0)->getType()->isStructTy()) + return; + + if (TrackedGlobals.empty() || !isa<GlobalVariable>(SI.getOperand(1))) + return; + + GlobalVariable *GV = cast<GlobalVariable>(SI.getOperand(1)); + auto I = TrackedGlobals.find(GV); + if (I == TrackedGlobals.end()) + return; + + // Get the value we are storing into the global, then merge it. + mergeInValue(I->second, GV, getValueState(SI.getOperand(0)), + ValueLatticeElement::MergeOptions().setCheckWiden(false)); + if (I->second.isOverdefined()) + TrackedGlobals.erase(I); // No need to keep tracking this! +} + +static ValueLatticeElement getValueFromMetadata(const Instruction *I) { + if (MDNode *Ranges = I->getMetadata(LLVMContext::MD_range)) + if (I->getType()->isIntegerTy()) + return ValueLatticeElement::getRange( + getConstantRangeFromMetadata(*Ranges)); + if (I->hasMetadata(LLVMContext::MD_nonnull)) + return ValueLatticeElement::getNot( + ConstantPointerNull::get(cast<PointerType>(I->getType()))); + return ValueLatticeElement::getOverdefined(); +} + +// Handle load instructions. If the operand is a constant pointer to a constant +// global, we can replace the load with the loaded constant value! +void SCCPInstVisitor::visitLoadInst(LoadInst &I) { + // If this load is of a struct or the load is volatile, just mark the result + // as overdefined. + if (I.getType()->isStructTy() || I.isVolatile()) + return (void)markOverdefined(&I); + + // resolvedUndefsIn might mark I as overdefined. Bail out, even if we would + // discover a concrete value later. + if (ValueState[&I].isOverdefined()) + return (void)markOverdefined(&I); + + ValueLatticeElement PtrVal = getValueState(I.getOperand(0)); + if (PtrVal.isUnknownOrUndef()) + return; // The pointer is not resolved yet! + + ValueLatticeElement &IV = ValueState[&I]; + + if (SCCPSolver::isConstant(PtrVal)) { + Constant *Ptr = getConstant(PtrVal); + + // load null is undefined. + if (isa<ConstantPointerNull>(Ptr)) { + if (NullPointerIsDefined(I.getFunction(), I.getPointerAddressSpace())) + return (void)markOverdefined(IV, &I); + else + return; + } + + // Transform load (constant global) into the value loaded. + if (auto *GV = dyn_cast<GlobalVariable>(Ptr)) { + if (!TrackedGlobals.empty()) { + // If we are tracking this global, merge in the known value for it. + auto It = TrackedGlobals.find(GV); + if (It != TrackedGlobals.end()) { + mergeInValue(IV, &I, It->second, getMaxWidenStepsOpts()); + return; + } + } + } + + // Transform load from a constant into a constant if possible. + if (Constant *C = ConstantFoldLoadFromConstPtr(Ptr, I.getType(), DL)) + return (void)markConstant(IV, &I, C); + } + + // Fall back to metadata. + mergeInValue(&I, getValueFromMetadata(&I)); +} + +void SCCPInstVisitor::visitCallBase(CallBase &CB) { + handleCallResult(CB); + handleCallArguments(CB); +} + +void SCCPInstVisitor::handleCallOverdefined(CallBase &CB) { + Function *F = CB.getCalledFunction(); + + // Void return and not tracking callee, just bail. + if (CB.getType()->isVoidTy()) + return; + + // Always mark struct return as overdefined. + if (CB.getType()->isStructTy()) + return (void)markOverdefined(&CB); + + // Otherwise, if we have a single return value case, and if the function is + // a declaration, maybe we can constant fold it. + if (F && F->isDeclaration() && canConstantFoldCallTo(&CB, F)) { + SmallVector<Constant *, 8> Operands; + for (const Use &A : CB.args()) { + if (A.get()->getType()->isStructTy()) + return markOverdefined(&CB); // Can't handle struct args. + if (A.get()->getType()->isMetadataTy()) + continue; // Carried in CB, not allowed in Operands. + ValueLatticeElement State = getValueState(A); + + if (State.isUnknownOrUndef()) + return; // Operands are not resolved yet. + if (SCCPSolver::isOverdefined(State)) + return (void)markOverdefined(&CB); + assert(SCCPSolver::isConstant(State) && "Unknown state!"); + Operands.push_back(getConstant(State)); + } + + if (SCCPSolver::isOverdefined(getValueState(&CB))) + return (void)markOverdefined(&CB); + + // If we can constant fold this, mark the result of the call as a + // constant. + if (Constant *C = ConstantFoldCall(&CB, F, Operands, &GetTLI(*F))) + return (void)markConstant(&CB, C); + } + + // Fall back to metadata. + mergeInValue(&CB, getValueFromMetadata(&CB)); +} + +void SCCPInstVisitor::handleCallArguments(CallBase &CB) { + Function *F = CB.getCalledFunction(); + // If this is a local function that doesn't have its address taken, mark its + // entry block executable and merge in the actual arguments to the call into + // the formal arguments of the function. + if (TrackingIncomingArguments.count(F)) { + markBlockExecutable(&F->front()); + + // Propagate information from this call site into the callee. + auto CAI = CB.arg_begin(); + for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E; + ++AI, ++CAI) { + // If this argument is byval, and if the function is not readonly, there + // will be an implicit copy formed of the input aggregate. + if (AI->hasByValAttr() && !F->onlyReadsMemory()) { + markOverdefined(&*AI); + continue; + } + + if (auto *STy = dyn_cast<StructType>(AI->getType())) { + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + ValueLatticeElement CallArg = getStructValueState(*CAI, i); + mergeInValue(getStructValueState(&*AI, i), &*AI, CallArg, + getMaxWidenStepsOpts()); + } + } else + mergeInValue(&*AI, getValueState(*CAI), getMaxWidenStepsOpts()); + } + } +} + +void SCCPInstVisitor::handleCallResult(CallBase &CB) { + Function *F = CB.getCalledFunction(); + + if (auto *II = dyn_cast<IntrinsicInst>(&CB)) { + if (II->getIntrinsicID() == Intrinsic::ssa_copy) { + if (ValueState[&CB].isOverdefined()) + return; + + Value *CopyOf = CB.getOperand(0); + ValueLatticeElement CopyOfVal = getValueState(CopyOf); + const auto *PI = getPredicateInfoFor(&CB); + assert(PI && "Missing predicate info for ssa.copy"); + + const std::optional<PredicateConstraint> &Constraint = + PI->getConstraint(); + if (!Constraint) { + mergeInValue(ValueState[&CB], &CB, CopyOfVal); + return; + } + + CmpInst::Predicate Pred = Constraint->Predicate; + Value *OtherOp = Constraint->OtherOp; + + // Wait until OtherOp is resolved. + if (getValueState(OtherOp).isUnknown()) { + addAdditionalUser(OtherOp, &CB); + return; + } + + ValueLatticeElement CondVal = getValueState(OtherOp); + ValueLatticeElement &IV = ValueState[&CB]; + if (CondVal.isConstantRange() || CopyOfVal.isConstantRange()) { + auto ImposedCR = + ConstantRange::getFull(DL.getTypeSizeInBits(CopyOf->getType())); + + // Get the range imposed by the condition. + if (CondVal.isConstantRange()) + ImposedCR = ConstantRange::makeAllowedICmpRegion( + Pred, CondVal.getConstantRange()); + + // Combine range info for the original value with the new range from the + // condition. + auto CopyOfCR = getConstantRange(CopyOfVal, CopyOf->getType()); + auto NewCR = ImposedCR.intersectWith(CopyOfCR); + // If the existing information is != x, do not use the information from + // a chained predicate, as the != x information is more likely to be + // helpful in practice. + if (!CopyOfCR.contains(NewCR) && CopyOfCR.getSingleMissingElement()) + NewCR = CopyOfCR; + + // The new range is based on a branch condition. That guarantees that + // neither of the compare operands can be undef in the branch targets, + // unless we have conditions that are always true/false (e.g. icmp ule + // i32, %a, i32_max). For the latter overdefined/empty range will be + // inferred, but the branch will get folded accordingly anyways. + addAdditionalUser(OtherOp, &CB); + mergeInValue( + IV, &CB, + ValueLatticeElement::getRange(NewCR, /*MayIncludeUndef*/ false)); + return; + } else if (Pred == CmpInst::ICMP_EQ && + (CondVal.isConstant() || CondVal.isNotConstant())) { + // For non-integer values or integer constant expressions, only + // propagate equal constants or not-constants. + addAdditionalUser(OtherOp, &CB); + mergeInValue(IV, &CB, CondVal); + return; + } else if (Pred == CmpInst::ICMP_NE && CondVal.isConstant()) { + // Propagate inequalities. + addAdditionalUser(OtherOp, &CB); + mergeInValue(IV, &CB, + ValueLatticeElement::getNot(CondVal.getConstant())); + return; + } + + return (void)mergeInValue(IV, &CB, CopyOfVal); + } + + if (ConstantRange::isIntrinsicSupported(II->getIntrinsicID())) { + // Compute result range for intrinsics supported by ConstantRange. + // Do this even if we don't know a range for all operands, as we may + // still know something about the result range, e.g. of abs(x). + SmallVector<ConstantRange, 2> OpRanges; + for (Value *Op : II->args()) { + const ValueLatticeElement &State = getValueState(Op); + OpRanges.push_back(getConstantRange(State, Op->getType())); + } + + ConstantRange Result = + ConstantRange::intrinsic(II->getIntrinsicID(), OpRanges); + return (void)mergeInValue(II, ValueLatticeElement::getRange(Result)); + } + } + + // The common case is that we aren't tracking the callee, either because we + // are not doing interprocedural analysis or the callee is indirect, or is + // external. Handle these cases first. + if (!F || F->isDeclaration()) + return handleCallOverdefined(CB); + + // If this is a single/zero retval case, see if we're tracking the function. + if (auto *STy = dyn_cast<StructType>(F->getReturnType())) { + if (!MRVFunctionsTracked.count(F)) + return handleCallOverdefined(CB); // Not tracking this callee. + + // If we are tracking this callee, propagate the result of the function + // into this call site. + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) + mergeInValue(getStructValueState(&CB, i), &CB, + TrackedMultipleRetVals[std::make_pair(F, i)], + getMaxWidenStepsOpts()); + } else { + auto TFRVI = TrackedRetVals.find(F); + if (TFRVI == TrackedRetVals.end()) + return handleCallOverdefined(CB); // Not tracking this callee. + + // If so, propagate the return value of the callee into this call result. + mergeInValue(&CB, TFRVI->second, getMaxWidenStepsOpts()); + } +} + +void SCCPInstVisitor::solve() { + // Process the work lists until they are empty! + while (!BBWorkList.empty() || !InstWorkList.empty() || + !OverdefinedInstWorkList.empty()) { + // Process the overdefined instruction's work list first, which drives other + // things to overdefined more quickly. + while (!OverdefinedInstWorkList.empty()) { + Value *I = OverdefinedInstWorkList.pop_back_val(); + + LLVM_DEBUG(dbgs() << "\nPopped off OI-WL: " << *I << '\n'); + + // "I" got into the work list because it either made the transition from + // bottom to constant, or to overdefined. + // + // Anything on this worklist that is overdefined need not be visited + // since all of its users will have already been marked as overdefined + // Update all of the users of this instruction's value. + // + markUsersAsChanged(I); + } + + // Process the instruction work list. + while (!InstWorkList.empty()) { + Value *I = InstWorkList.pop_back_val(); + + LLVM_DEBUG(dbgs() << "\nPopped off I-WL: " << *I << '\n'); + + // "I" got into the work list because it made the transition from undef to + // constant. + // + // Anything on this worklist that is overdefined need not be visited + // since all of its users will have already been marked as overdefined. + // Update all of the users of this instruction's value. + // + if (I->getType()->isStructTy() || !getValueState(I).isOverdefined()) + markUsersAsChanged(I); + } + + // Process the basic block work list. + while (!BBWorkList.empty()) { + BasicBlock *BB = BBWorkList.pop_back_val(); + + LLVM_DEBUG(dbgs() << "\nPopped off BBWL: " << *BB << '\n'); + + // Notify all instructions in this basic block that they are newly + // executable. + visit(BB); + } + } +} + +/// While solving the dataflow for a function, we don't compute a result for +/// operations with an undef operand, to allow undef to be lowered to a +/// constant later. For example, constant folding of "zext i8 undef to i16" +/// would result in "i16 0", and if undef is later lowered to "i8 1", then the +/// zext result would become "i16 1" and would result into an overdefined +/// lattice value once merged with the previous result. Not computing the +/// result of the zext (treating undef the same as unknown) allows us to handle +/// a later undef->constant lowering more optimally. +/// +/// However, if the operand remains undef when the solver returns, we do need +/// to assign some result to the instruction (otherwise we would treat it as +/// unreachable). For simplicity, we mark any instructions that are still +/// unknown as overdefined. +bool SCCPInstVisitor::resolvedUndefsIn(Function &F) { + bool MadeChange = false; + for (BasicBlock &BB : F) { + if (!BBExecutable.count(&BB)) + continue; + + for (Instruction &I : BB) { + // Look for instructions which produce undef values. + if (I.getType()->isVoidTy()) + continue; + + if (auto *STy = dyn_cast<StructType>(I.getType())) { + // Only a few things that can be structs matter for undef. + + // Tracked calls must never be marked overdefined in resolvedUndefsIn. + if (auto *CB = dyn_cast<CallBase>(&I)) + if (Function *F = CB->getCalledFunction()) + if (MRVFunctionsTracked.count(F)) + continue; + + // extractvalue and insertvalue don't need to be marked; they are + // tracked as precisely as their operands. + if (isa<ExtractValueInst>(I) || isa<InsertValueInst>(I)) + continue; + // Send the results of everything else to overdefined. We could be + // more precise than this but it isn't worth bothering. + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + ValueLatticeElement &LV = getStructValueState(&I, i); + if (LV.isUnknown()) { + markOverdefined(LV, &I); + MadeChange = true; + } + } + continue; + } + + ValueLatticeElement &LV = getValueState(&I); + if (!LV.isUnknown()) + continue; + + // There are two reasons a call can have an undef result + // 1. It could be tracked. + // 2. It could be constant-foldable. + // Because of the way we solve return values, tracked calls must + // never be marked overdefined in resolvedUndefsIn. + if (auto *CB = dyn_cast<CallBase>(&I)) + if (Function *F = CB->getCalledFunction()) + if (TrackedRetVals.count(F)) + continue; + + if (isa<LoadInst>(I)) { + // A load here means one of two things: a load of undef from a global, + // a load from an unknown pointer. Either way, having it return undef + // is okay. + continue; + } + + markOverdefined(&I); + MadeChange = true; + } + } + + LLVM_DEBUG(if (MadeChange) dbgs() + << "\nResolved undefs in " << F.getName() << '\n'); + + return MadeChange; +} + +//===----------------------------------------------------------------------===// +// +// SCCPSolver implementations +// +SCCPSolver::SCCPSolver( + const DataLayout &DL, + std::function<const TargetLibraryInfo &(Function &)> GetTLI, + LLVMContext &Ctx) + : Visitor(new SCCPInstVisitor(DL, std::move(GetTLI), Ctx)) {} + +SCCPSolver::~SCCPSolver() = default; + +void SCCPSolver::addAnalysis(Function &F, AnalysisResultsForFn A) { + return Visitor->addAnalysis(F, std::move(A)); +} + +bool SCCPSolver::markBlockExecutable(BasicBlock *BB) { + return Visitor->markBlockExecutable(BB); +} + +const PredicateBase *SCCPSolver::getPredicateInfoFor(Instruction *I) { + return Visitor->getPredicateInfoFor(I); +} + +const LoopInfo &SCCPSolver::getLoopInfo(Function &F) { + return Visitor->getLoopInfo(F); +} + +DomTreeUpdater SCCPSolver::getDTU(Function &F) { return Visitor->getDTU(F); } + +void SCCPSolver::trackValueOfGlobalVariable(GlobalVariable *GV) { + Visitor->trackValueOfGlobalVariable(GV); +} + +void SCCPSolver::addTrackedFunction(Function *F) { + Visitor->addTrackedFunction(F); +} + +void SCCPSolver::addToMustPreserveReturnsInFunctions(Function *F) { + Visitor->addToMustPreserveReturnsInFunctions(F); +} + +bool SCCPSolver::mustPreserveReturn(Function *F) { + return Visitor->mustPreserveReturn(F); +} + +void SCCPSolver::addArgumentTrackedFunction(Function *F) { + Visitor->addArgumentTrackedFunction(F); +} + +bool SCCPSolver::isArgumentTrackedFunction(Function *F) { + return Visitor->isArgumentTrackedFunction(F); +} + +void SCCPSolver::solve() { Visitor->solve(); } + +bool SCCPSolver::resolvedUndefsIn(Function &F) { + return Visitor->resolvedUndefsIn(F); +} + +void SCCPSolver::solveWhileResolvedUndefsIn(Module &M) { + Visitor->solveWhileResolvedUndefsIn(M); +} + +void +SCCPSolver::solveWhileResolvedUndefsIn(SmallVectorImpl<Function *> &WorkList) { + Visitor->solveWhileResolvedUndefsIn(WorkList); +} + +bool SCCPSolver::isBlockExecutable(BasicBlock *BB) const { + return Visitor->isBlockExecutable(BB); +} + +bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) const { + return Visitor->isEdgeFeasible(From, To); +} + +std::vector<ValueLatticeElement> +SCCPSolver::getStructLatticeValueFor(Value *V) const { + return Visitor->getStructLatticeValueFor(V); +} + +void SCCPSolver::removeLatticeValueFor(Value *V) { + return Visitor->removeLatticeValueFor(V); +} + +const ValueLatticeElement &SCCPSolver::getLatticeValueFor(Value *V) const { + return Visitor->getLatticeValueFor(V); +} + +const MapVector<Function *, ValueLatticeElement> & +SCCPSolver::getTrackedRetVals() { + return Visitor->getTrackedRetVals(); +} + +const DenseMap<GlobalVariable *, ValueLatticeElement> & +SCCPSolver::getTrackedGlobals() { + return Visitor->getTrackedGlobals(); +} + +const SmallPtrSet<Function *, 16> SCCPSolver::getMRVFunctionsTracked() { + return Visitor->getMRVFunctionsTracked(); +} + +void SCCPSolver::markOverdefined(Value *V) { Visitor->markOverdefined(V); } + +bool SCCPSolver::isStructLatticeConstant(Function *F, StructType *STy) { + return Visitor->isStructLatticeConstant(F, STy); +} + +Constant *SCCPSolver::getConstant(const ValueLatticeElement &LV) const { + return Visitor->getConstant(LV); +} + +SmallPtrSetImpl<Function *> &SCCPSolver::getArgumentTrackedFunctions() { + return Visitor->getArgumentTrackedFunctions(); +} + +void SCCPSolver::markArgInFuncSpecialization( + Function *F, const SmallVectorImpl<ArgInfo> &Args) { + Visitor->markArgInFuncSpecialization(F, Args); +} + +void SCCPSolver::markFunctionUnreachable(Function *F) { + Visitor->markFunctionUnreachable(F); +} + +void SCCPSolver::visit(Instruction *I) { Visitor->visit(I); } + +void SCCPSolver::visitCall(CallInst &I) { Visitor->visitCall(I); } diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/SSAUpdater.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/SSAUpdater.cpp new file mode 100644 index 0000000000..2520aa5d9d --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/SSAUpdater.cpp @@ -0,0 +1,482 @@ +//===- SSAUpdater.cpp - Unstructured SSA Update Tool ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the SSAUpdater class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/SSAUpdater.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/TinyPtrVector.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/SSAUpdaterImpl.h" +#include <cassert> +#include <utility> + +using namespace llvm; + +#define DEBUG_TYPE "ssaupdater" + +using AvailableValsTy = DenseMap<BasicBlock *, Value *>; + +static AvailableValsTy &getAvailableVals(void *AV) { + return *static_cast<AvailableValsTy*>(AV); +} + +SSAUpdater::SSAUpdater(SmallVectorImpl<PHINode *> *NewPHI) + : InsertedPHIs(NewPHI) {} + +SSAUpdater::~SSAUpdater() { + delete static_cast<AvailableValsTy*>(AV); +} + +void SSAUpdater::Initialize(Type *Ty, StringRef Name) { + if (!AV) + AV = new AvailableValsTy(); + else + getAvailableVals(AV).clear(); + ProtoType = Ty; + ProtoName = std::string(Name); +} + +bool SSAUpdater::HasValueForBlock(BasicBlock *BB) const { + return getAvailableVals(AV).count(BB); +} + +Value *SSAUpdater::FindValueForBlock(BasicBlock *BB) const { + return getAvailableVals(AV).lookup(BB); +} + +void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) { + assert(ProtoType && "Need to initialize SSAUpdater"); + assert(ProtoType == V->getType() && + "All rewritten values must have the same type"); + getAvailableVals(AV)[BB] = V; +} + +static bool IsEquivalentPHI(PHINode *PHI, + SmallDenseMap<BasicBlock *, Value *, 8> &ValueMapping) { + unsigned PHINumValues = PHI->getNumIncomingValues(); + if (PHINumValues != ValueMapping.size()) + return false; + + // Scan the phi to see if it matches. + for (unsigned i = 0, e = PHINumValues; i != e; ++i) + if (ValueMapping[PHI->getIncomingBlock(i)] != + PHI->getIncomingValue(i)) { + return false; + } + + return true; +} + +Value *SSAUpdater::GetValueAtEndOfBlock(BasicBlock *BB) { + Value *Res = GetValueAtEndOfBlockInternal(BB); + return Res; +} + +Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { + // If there is no definition of the renamed variable in this block, just use + // GetValueAtEndOfBlock to do our work. + if (!HasValueForBlock(BB)) + return GetValueAtEndOfBlock(BB); + + // Otherwise, we have the hard case. Get the live-in values for each + // predecessor. + SmallVector<std::pair<BasicBlock *, Value *>, 8> PredValues; + Value *SingularValue = nullptr; + + // We can get our predecessor info by walking the pred_iterator list, but it + // is relatively slow. If we already have PHI nodes in this block, walk one + // of them to get the predecessor list instead. + if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) { + for (unsigned i = 0, e = SomePhi->getNumIncomingValues(); i != e; ++i) { + BasicBlock *PredBB = SomePhi->getIncomingBlock(i); + Value *PredVal = GetValueAtEndOfBlock(PredBB); + PredValues.push_back(std::make_pair(PredBB, PredVal)); + + // Compute SingularValue. + if (i == 0) + SingularValue = PredVal; + else if (PredVal != SingularValue) + SingularValue = nullptr; + } + } else { + bool isFirstPred = true; + for (BasicBlock *PredBB : predecessors(BB)) { + Value *PredVal = GetValueAtEndOfBlock(PredBB); + PredValues.push_back(std::make_pair(PredBB, PredVal)); + + // Compute SingularValue. + if (isFirstPred) { + SingularValue = PredVal; + isFirstPred = false; + } else if (PredVal != SingularValue) + SingularValue = nullptr; + } + } + + // If there are no predecessors, just return undef. + if (PredValues.empty()) + return UndefValue::get(ProtoType); + + // Otherwise, if all the merged values are the same, just use it. + if (SingularValue) + return SingularValue; + + // Otherwise, we do need a PHI: check to see if we already have one available + // in this block that produces the right value. + if (isa<PHINode>(BB->begin())) { + SmallDenseMap<BasicBlock *, Value *, 8> ValueMapping(PredValues.begin(), + PredValues.end()); + for (PHINode &SomePHI : BB->phis()) { + if (IsEquivalentPHI(&SomePHI, ValueMapping)) + return &SomePHI; + } + } + + // Ok, we have no way out, insert a new one now. + PHINode *InsertedPHI = PHINode::Create(ProtoType, PredValues.size(), + ProtoName, &BB->front()); + + // Fill in all the predecessors of the PHI. + for (const auto &PredValue : PredValues) + InsertedPHI->addIncoming(PredValue.second, PredValue.first); + + // See if the PHI node can be merged to a single value. This can happen in + // loop cases when we get a PHI of itself and one other value. + if (Value *V = + simplifyInstruction(InsertedPHI, BB->getModule()->getDataLayout())) { + InsertedPHI->eraseFromParent(); + return V; + } + + // Set the DebugLoc of the inserted PHI, if available. + DebugLoc DL; + if (const Instruction *I = BB->getFirstNonPHI()) + DL = I->getDebugLoc(); + InsertedPHI->setDebugLoc(DL); + + // If the client wants to know about all new instructions, tell it. + if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI); + + LLVM_DEBUG(dbgs() << " Inserted PHI: " << *InsertedPHI << "\n"); + return InsertedPHI; +} + +void SSAUpdater::RewriteUse(Use &U) { + Instruction *User = cast<Instruction>(U.getUser()); + + Value *V; + if (PHINode *UserPN = dyn_cast<PHINode>(User)) + V = GetValueAtEndOfBlock(UserPN->getIncomingBlock(U)); + else + V = GetValueInMiddleOfBlock(User->getParent()); + + U.set(V); +} + +void SSAUpdater::RewriteUseAfterInsertions(Use &U) { + Instruction *User = cast<Instruction>(U.getUser()); + + Value *V; + if (PHINode *UserPN = dyn_cast<PHINode>(User)) + V = GetValueAtEndOfBlock(UserPN->getIncomingBlock(U)); + else + V = GetValueAtEndOfBlock(User->getParent()); + + U.set(V); +} + +namespace llvm { + +template<> +class SSAUpdaterTraits<SSAUpdater> { +public: + using BlkT = BasicBlock; + using ValT = Value *; + using PhiT = PHINode; + using BlkSucc_iterator = succ_iterator; + + static BlkSucc_iterator BlkSucc_begin(BlkT *BB) { return succ_begin(BB); } + static BlkSucc_iterator BlkSucc_end(BlkT *BB) { return succ_end(BB); } + + class PHI_iterator { + private: + PHINode *PHI; + unsigned idx; + + public: + explicit PHI_iterator(PHINode *P) // begin iterator + : PHI(P), idx(0) {} + PHI_iterator(PHINode *P, bool) // end iterator + : PHI(P), idx(PHI->getNumIncomingValues()) {} + + PHI_iterator &operator++() { ++idx; return *this; } + bool operator==(const PHI_iterator& x) const { return idx == x.idx; } + bool operator!=(const PHI_iterator& x) const { return !operator==(x); } + + Value *getIncomingValue() { return PHI->getIncomingValue(idx); } + BasicBlock *getIncomingBlock() { return PHI->getIncomingBlock(idx); } + }; + + static PHI_iterator PHI_begin(PhiT *PHI) { return PHI_iterator(PHI); } + static PHI_iterator PHI_end(PhiT *PHI) { + return PHI_iterator(PHI, true); + } + + /// FindPredecessorBlocks - Put the predecessors of Info->BB into the Preds + /// vector, set Info->NumPreds, and allocate space in Info->Preds. + static void FindPredecessorBlocks(BasicBlock *BB, + SmallVectorImpl<BasicBlock *> *Preds) { + // We can get our predecessor info by walking the pred_iterator list, + // but it is relatively slow. If we already have PHI nodes in this + // block, walk one of them to get the predecessor list instead. + if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) + append_range(*Preds, SomePhi->blocks()); + else + append_range(*Preds, predecessors(BB)); + } + + /// GetUndefVal - Get an undefined value of the same type as the value + /// being handled. + static Value *GetUndefVal(BasicBlock *BB, SSAUpdater *Updater) { + return UndefValue::get(Updater->ProtoType); + } + + /// CreateEmptyPHI - Create a new PHI instruction in the specified block. + /// Reserve space for the operands but do not fill them in yet. + static Value *CreateEmptyPHI(BasicBlock *BB, unsigned NumPreds, + SSAUpdater *Updater) { + PHINode *PHI = PHINode::Create(Updater->ProtoType, NumPreds, + Updater->ProtoName, &BB->front()); + return PHI; + } + + /// AddPHIOperand - Add the specified value as an operand of the PHI for + /// the specified predecessor block. + static void AddPHIOperand(PHINode *PHI, Value *Val, BasicBlock *Pred) { + PHI->addIncoming(Val, Pred); + } + + /// ValueIsPHI - Check if a value is a PHI. + static PHINode *ValueIsPHI(Value *Val, SSAUpdater *Updater) { + return dyn_cast<PHINode>(Val); + } + + /// ValueIsNewPHI - Like ValueIsPHI but also check if the PHI has no source + /// operands, i.e., it was just added. + static PHINode *ValueIsNewPHI(Value *Val, SSAUpdater *Updater) { + PHINode *PHI = ValueIsPHI(Val, Updater); + if (PHI && PHI->getNumIncomingValues() == 0) + return PHI; + return nullptr; + } + + /// GetPHIValue - For the specified PHI instruction, return the value + /// that it defines. + static Value *GetPHIValue(PHINode *PHI) { + return PHI; + } +}; + +} // end namespace llvm + +/// Check to see if AvailableVals has an entry for the specified BB and if so, +/// return it. If not, construct SSA form by first calculating the required +/// placement of PHIs and then inserting new PHIs where needed. +Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) { + AvailableValsTy &AvailableVals = getAvailableVals(AV); + if (Value *V = AvailableVals[BB]) + return V; + + SSAUpdaterImpl<SSAUpdater> Impl(this, &AvailableVals, InsertedPHIs); + return Impl.GetValue(BB); +} + +//===----------------------------------------------------------------------===// +// LoadAndStorePromoter Implementation +//===----------------------------------------------------------------------===// + +LoadAndStorePromoter:: +LoadAndStorePromoter(ArrayRef<const Instruction *> Insts, + SSAUpdater &S, StringRef BaseName) : SSA(S) { + if (Insts.empty()) return; + + const Value *SomeVal; + if (const LoadInst *LI = dyn_cast<LoadInst>(Insts[0])) + SomeVal = LI; + else + SomeVal = cast<StoreInst>(Insts[0])->getOperand(0); + + if (BaseName.empty()) + BaseName = SomeVal->getName(); + SSA.Initialize(SomeVal->getType(), BaseName); +} + +void LoadAndStorePromoter::run(const SmallVectorImpl<Instruction *> &Insts) { + // First step: bucket up uses of the alloca by the block they occur in. + // This is important because we have to handle multiple defs/uses in a block + // ourselves: SSAUpdater is purely for cross-block references. + DenseMap<BasicBlock *, TinyPtrVector<Instruction *>> UsesByBlock; + + for (Instruction *User : Insts) + UsesByBlock[User->getParent()].push_back(User); + + // Okay, now we can iterate over all the blocks in the function with uses, + // processing them. Keep track of which loads are loading a live-in value. + // Walk the uses in the use-list order to be determinstic. + SmallVector<LoadInst *, 32> LiveInLoads; + DenseMap<Value *, Value *> ReplacedLoads; + + for (Instruction *User : Insts) { + BasicBlock *BB = User->getParent(); + TinyPtrVector<Instruction *> &BlockUses = UsesByBlock[BB]; + + // If this block has already been processed, ignore this repeat use. + if (BlockUses.empty()) continue; + + // Okay, this is the first use in the block. If this block just has a + // single user in it, we can rewrite it trivially. + if (BlockUses.size() == 1) { + // If it is a store, it is a trivial def of the value in the block. + if (StoreInst *SI = dyn_cast<StoreInst>(User)) { + updateDebugInfo(SI); + SSA.AddAvailableValue(BB, SI->getOperand(0)); + } else + // Otherwise it is a load, queue it to rewrite as a live-in load. + LiveInLoads.push_back(cast<LoadInst>(User)); + BlockUses.clear(); + continue; + } + + // Otherwise, check to see if this block is all loads. + bool HasStore = false; + for (Instruction *I : BlockUses) { + if (isa<StoreInst>(I)) { + HasStore = true; + break; + } + } + + // If so, we can queue them all as live in loads. We don't have an + // efficient way to tell which on is first in the block and don't want to + // scan large blocks, so just add all loads as live ins. + if (!HasStore) { + for (Instruction *I : BlockUses) + LiveInLoads.push_back(cast<LoadInst>(I)); + BlockUses.clear(); + continue; + } + + // Otherwise, we have mixed loads and stores (or just a bunch of stores). + // Since SSAUpdater is purely for cross-block values, we need to determine + // the order of these instructions in the block. If the first use in the + // block is a load, then it uses the live in value. The last store defines + // the live out value. We handle this by doing a linear scan of the block. + Value *StoredValue = nullptr; + for (Instruction &I : *BB) { + if (LoadInst *L = dyn_cast<LoadInst>(&I)) { + // If this is a load from an unrelated pointer, ignore it. + if (!isInstInList(L, Insts)) continue; + + // If we haven't seen a store yet, this is a live in use, otherwise + // use the stored value. + if (StoredValue) { + replaceLoadWithValue(L, StoredValue); + L->replaceAllUsesWith(StoredValue); + ReplacedLoads[L] = StoredValue; + } else { + LiveInLoads.push_back(L); + } + continue; + } + + if (StoreInst *SI = dyn_cast<StoreInst>(&I)) { + // If this is a store to an unrelated pointer, ignore it. + if (!isInstInList(SI, Insts)) continue; + updateDebugInfo(SI); + + // Remember that this is the active value in the block. + StoredValue = SI->getOperand(0); + } + } + + // The last stored value that happened is the live-out for the block. + assert(StoredValue && "Already checked that there is a store in block"); + SSA.AddAvailableValue(BB, StoredValue); + BlockUses.clear(); + } + + // Okay, now we rewrite all loads that use live-in values in the loop, + // inserting PHI nodes as necessary. + for (LoadInst *ALoad : LiveInLoads) { + Value *NewVal = SSA.GetValueInMiddleOfBlock(ALoad->getParent()); + replaceLoadWithValue(ALoad, NewVal); + + // Avoid assertions in unreachable code. + if (NewVal == ALoad) NewVal = PoisonValue::get(NewVal->getType()); + ALoad->replaceAllUsesWith(NewVal); + ReplacedLoads[ALoad] = NewVal; + } + + // Allow the client to do stuff before we start nuking things. + doExtraRewritesBeforeFinalDeletion(); + + // Now that everything is rewritten, delete the old instructions from the + // function. They should all be dead now. + for (Instruction *User : Insts) { + if (!shouldDelete(User)) + continue; + + // If this is a load that still has uses, then the load must have been added + // as a live value in the SSAUpdate data structure for a block (e.g. because + // the loaded value was stored later). In this case, we need to recursively + // propagate the updates until we get to the real value. + if (!User->use_empty()) { + Value *NewVal = ReplacedLoads[User]; + assert(NewVal && "not a replaced load?"); + + // Propagate down to the ultimate replacee. The intermediately loads + // could theoretically already have been deleted, so we don't want to + // dereference the Value*'s. + DenseMap<Value*, Value*>::iterator RLI = ReplacedLoads.find(NewVal); + while (RLI != ReplacedLoads.end()) { + NewVal = RLI->second; + RLI = ReplacedLoads.find(NewVal); + } + + replaceLoadWithValue(cast<LoadInst>(User), NewVal); + User->replaceAllUsesWith(NewVal); + } + + instructionDeleted(User); + User->eraseFromParent(); + } +} + +bool +LoadAndStorePromoter::isInstInList(Instruction *I, + const SmallVectorImpl<Instruction *> &Insts) + const { + return is_contained(Insts, I); +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/SSAUpdaterBulk.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/SSAUpdaterBulk.cpp new file mode 100644 index 0000000000..cad7ff64c0 --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/SSAUpdaterBulk.cpp @@ -0,0 +1,184 @@ +//===- SSAUpdaterBulk.cpp - Unstructured SSA Update Tool ------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the SSAUpdaterBulk class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/SSAUpdaterBulk.h" +#include "llvm/Analysis/IteratedDominanceFrontier.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/Value.h" + +using namespace llvm; + +#define DEBUG_TYPE "ssaupdaterbulk" + +/// Helper function for finding a block which should have a value for the given +/// user. For PHI-nodes this block is the corresponding predecessor, for other +/// instructions it's their parent block. +static BasicBlock *getUserBB(Use *U) { + auto *User = cast<Instruction>(U->getUser()); + + if (auto *UserPN = dyn_cast<PHINode>(User)) + return UserPN->getIncomingBlock(*U); + else + return User->getParent(); +} + +/// Add a new variable to the SSA rewriter. This needs to be called before +/// AddAvailableValue or AddUse calls. +unsigned SSAUpdaterBulk::AddVariable(StringRef Name, Type *Ty) { + unsigned Var = Rewrites.size(); + LLVM_DEBUG(dbgs() << "SSAUpdater: Var=" << Var << ": initialized with Ty = " + << *Ty << ", Name = " << Name << "\n"); + RewriteInfo RI(Name, Ty); + Rewrites.push_back(RI); + return Var; +} + +/// Indicate that a rewritten value is available in the specified block with the +/// specified value. +void SSAUpdaterBulk::AddAvailableValue(unsigned Var, BasicBlock *BB, Value *V) { + assert(Var < Rewrites.size() && "Variable not found!"); + LLVM_DEBUG(dbgs() << "SSAUpdater: Var=" << Var + << ": added new available value " << *V << " in " + << BB->getName() << "\n"); + Rewrites[Var].Defines[BB] = V; +} + +/// Record a use of the symbolic value. This use will be updated with a +/// rewritten value when RewriteAllUses is called. +void SSAUpdaterBulk::AddUse(unsigned Var, Use *U) { + assert(Var < Rewrites.size() && "Variable not found!"); + LLVM_DEBUG(dbgs() << "SSAUpdater: Var=" << Var << ": added a use" << *U->get() + << " in " << getUserBB(U)->getName() << "\n"); + Rewrites[Var].Uses.push_back(U); +} + +// Compute value at the given block BB. We either should already know it, or we +// should be able to recursively reach it going up dominator tree. +Value *SSAUpdaterBulk::computeValueAt(BasicBlock *BB, RewriteInfo &R, + DominatorTree *DT) { + if (!R.Defines.count(BB)) { + if (DT->isReachableFromEntry(BB) && PredCache.get(BB).size()) { + BasicBlock *IDom = DT->getNode(BB)->getIDom()->getBlock(); + Value *V = computeValueAt(IDom, R, DT); + R.Defines[BB] = V; + } else + R.Defines[BB] = UndefValue::get(R.Ty); + } + return R.Defines[BB]; +} + +/// Given sets of UsingBlocks and DefBlocks, compute the set of LiveInBlocks. +/// This is basically a subgraph limited by DefBlocks and UsingBlocks. +static void +ComputeLiveInBlocks(const SmallPtrSetImpl<BasicBlock *> &UsingBlocks, + const SmallPtrSetImpl<BasicBlock *> &DefBlocks, + SmallPtrSetImpl<BasicBlock *> &LiveInBlocks, + PredIteratorCache &PredCache) { + // To determine liveness, we must iterate through the predecessors of blocks + // where the def is live. Blocks are added to the worklist if we need to + // check their predecessors. Start with all the using blocks. + SmallVector<BasicBlock *, 64> LiveInBlockWorklist(UsingBlocks.begin(), + UsingBlocks.end()); + + // Now that we have a set of blocks where the phi is live-in, recursively add + // their predecessors until we find the full region the value is live. + while (!LiveInBlockWorklist.empty()) { + BasicBlock *BB = LiveInBlockWorklist.pop_back_val(); + + // The block really is live in here, insert it into the set. If already in + // the set, then it has already been processed. + if (!LiveInBlocks.insert(BB).second) + continue; + + // Since the value is live into BB, it is either defined in a predecessor or + // live into it to. Add the preds to the worklist unless they are a + // defining block. + for (BasicBlock *P : PredCache.get(BB)) { + // The value is not live into a predecessor if it defines the value. + if (DefBlocks.count(P)) + continue; + + // Otherwise it is, add to the worklist. + LiveInBlockWorklist.push_back(P); + } + } +} + +/// Perform all the necessary updates, including new PHI-nodes insertion and the +/// requested uses update. +void SSAUpdaterBulk::RewriteAllUses(DominatorTree *DT, + SmallVectorImpl<PHINode *> *InsertedPHIs) { + for (auto &R : Rewrites) { + // Compute locations for new phi-nodes. + // For that we need to initialize DefBlocks from definitions in R.Defines, + // UsingBlocks from uses in R.Uses, then compute LiveInBlocks, and then use + // this set for computing iterated dominance frontier (IDF). + // The IDF blocks are the blocks where we need to insert new phi-nodes. + ForwardIDFCalculator IDF(*DT); + LLVM_DEBUG(dbgs() << "SSAUpdater: rewriting " << R.Uses.size() + << " use(s)\n"); + + SmallPtrSet<BasicBlock *, 2> DefBlocks; + for (auto &Def : R.Defines) + DefBlocks.insert(Def.first); + IDF.setDefiningBlocks(DefBlocks); + + SmallPtrSet<BasicBlock *, 2> UsingBlocks; + for (Use *U : R.Uses) + UsingBlocks.insert(getUserBB(U)); + + SmallVector<BasicBlock *, 32> IDFBlocks; + SmallPtrSet<BasicBlock *, 32> LiveInBlocks; + ComputeLiveInBlocks(UsingBlocks, DefBlocks, LiveInBlocks, PredCache); + IDF.resetLiveInBlocks(); + IDF.setLiveInBlocks(LiveInBlocks); + IDF.calculate(IDFBlocks); + + // We've computed IDF, now insert new phi-nodes there. + SmallVector<PHINode *, 4> InsertedPHIsForVar; + for (auto *FrontierBB : IDFBlocks) { + IRBuilder<> B(FrontierBB, FrontierBB->begin()); + PHINode *PN = B.CreatePHI(R.Ty, 0, R.Name); + R.Defines[FrontierBB] = PN; + InsertedPHIsForVar.push_back(PN); + if (InsertedPHIs) + InsertedPHIs->push_back(PN); + } + + // Fill in arguments of the inserted PHIs. + for (auto *PN : InsertedPHIsForVar) { + BasicBlock *PBB = PN->getParent(); + for (BasicBlock *Pred : PredCache.get(PBB)) + PN->addIncoming(computeValueAt(Pred, R, DT), Pred); + } + + // Rewrite actual uses with the inserted definitions. + SmallPtrSet<Use *, 4> ProcessedUses; + for (Use *U : R.Uses) { + if (!ProcessedUses.insert(U).second) + continue; + Value *V = computeValueAt(getUserBB(U), R, DT); + Value *OldVal = U->get(); + assert(OldVal && "Invalid use!"); + // Notify that users of the existing value that it is being replaced. + if (OldVal != V && OldVal->hasValueHandle()) + ValueHandleBase::ValueIsRAUWd(OldVal, V); + LLVM_DEBUG(dbgs() << "SSAUpdater: replacing " << *OldVal << " with " << *V + << "\n"); + U->set(V); + } + } +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/SampleProfileInference.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/SampleProfileInference.cpp new file mode 100644 index 0000000000..691ee00bd8 --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/SampleProfileInference.cpp @@ -0,0 +1,1347 @@ +//===- SampleProfileInference.cpp - Adjust sample profiles in the IR ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a profile inference algorithm. Given an incomplete and +// possibly imprecise block counts, the algorithm reconstructs realistic block +// and edge counts that satisfy flow conservation rules, while minimally modify +// input block counts. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/SampleProfileInference.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include <queue> +#include <set> +#include <stack> + +using namespace llvm; +#define DEBUG_TYPE "sample-profile-inference" + +namespace { + +static cl::opt<bool> SampleProfileEvenFlowDistribution( + "sample-profile-even-flow-distribution", cl::init(true), cl::Hidden, + cl::desc("Try to evenly distribute flow when there are multiple equally " + "likely options.")); + +static cl::opt<bool> SampleProfileRebalanceUnknown( + "sample-profile-rebalance-unknown", cl::init(true), cl::Hidden, + cl::desc("Evenly re-distribute flow among unknown subgraphs.")); + +static cl::opt<bool> SampleProfileJoinIslands( + "sample-profile-join-islands", cl::init(true), cl::Hidden, + cl::desc("Join isolated components having positive flow.")); + +static cl::opt<unsigned> SampleProfileProfiCostBlockInc( + "sample-profile-profi-cost-block-inc", cl::init(10), cl::Hidden, + cl::desc("The cost of increasing a block's count by one.")); + +static cl::opt<unsigned> SampleProfileProfiCostBlockDec( + "sample-profile-profi-cost-block-dec", cl::init(20), cl::Hidden, + cl::desc("The cost of decreasing a block's count by one.")); + +static cl::opt<unsigned> SampleProfileProfiCostBlockEntryInc( + "sample-profile-profi-cost-block-entry-inc", cl::init(40), cl::Hidden, + cl::desc("The cost of increasing the entry block's count by one.")); + +static cl::opt<unsigned> SampleProfileProfiCostBlockEntryDec( + "sample-profile-profi-cost-block-entry-dec", cl::init(10), cl::Hidden, + cl::desc("The cost of decreasing the entry block's count by one.")); + +static cl::opt<unsigned> SampleProfileProfiCostBlockZeroInc( + "sample-profile-profi-cost-block-zero-inc", cl::init(11), cl::Hidden, + cl::desc("The cost of increasing a count of zero-weight block by one.")); + +static cl::opt<unsigned> SampleProfileProfiCostBlockUnknownInc( + "sample-profile-profi-cost-block-unknown-inc", cl::init(0), cl::Hidden, + cl::desc("The cost of increasing an unknown block's count by one.")); + +/// A value indicating an infinite flow/capacity/weight of a block/edge. +/// Not using numeric_limits<int64_t>::max(), as the values can be summed up +/// during the execution. +static constexpr int64_t INF = ((int64_t)1) << 50; + +/// The minimum-cost maximum flow algorithm. +/// +/// The algorithm finds the maximum flow of minimum cost on a given (directed) +/// network using a modified version of the classical Moore-Bellman-Ford +/// approach. The algorithm applies a number of augmentation iterations in which +/// flow is sent along paths of positive capacity from the source to the sink. +/// The worst-case time complexity of the implementation is O(v(f)*m*n), where +/// where m is the number of edges, n is the number of vertices, and v(f) is the +/// value of the maximum flow. However, the observed running time on typical +/// instances is sub-quadratic, that is, o(n^2). +/// +/// The input is a set of edges with specified costs and capacities, and a pair +/// of nodes (source and sink). The output is the flow along each edge of the +/// minimum total cost respecting the given edge capacities. +class MinCostMaxFlow { +public: + MinCostMaxFlow(const ProfiParams &Params) : Params(Params) {} + + // Initialize algorithm's data structures for a network of a given size. + void initialize(uint64_t NodeCount, uint64_t SourceNode, uint64_t SinkNode) { + Source = SourceNode; + Target = SinkNode; + + Nodes = std::vector<Node>(NodeCount); + Edges = std::vector<std::vector<Edge>>(NodeCount, std::vector<Edge>()); + if (Params.EvenFlowDistribution) + AugmentingEdges = + std::vector<std::vector<Edge *>>(NodeCount, std::vector<Edge *>()); + } + + // Run the algorithm. + int64_t run() { + LLVM_DEBUG(dbgs() << "Starting profi for " << Nodes.size() << " nodes\n"); + + // Iteratively find an augmentation path/dag in the network and send the + // flow along its edges + size_t AugmentationIters = applyFlowAugmentation(); + + // Compute the total flow and its cost + int64_t TotalCost = 0; + int64_t TotalFlow = 0; + for (uint64_t Src = 0; Src < Nodes.size(); Src++) { + for (auto &Edge : Edges[Src]) { + if (Edge.Flow > 0) { + TotalCost += Edge.Cost * Edge.Flow; + if (Src == Source) + TotalFlow += Edge.Flow; + } + } + } + LLVM_DEBUG(dbgs() << "Completed profi after " << AugmentationIters + << " iterations with " << TotalFlow << " total flow" + << " of " << TotalCost << " cost\n"); + (void)TotalFlow; + (void)AugmentationIters; + return TotalCost; + } + + /// Adding an edge to the network with a specified capacity and a cost. + /// Multiple edges between a pair of nodes are allowed but self-edges + /// are not supported. + void addEdge(uint64_t Src, uint64_t Dst, int64_t Capacity, int64_t Cost) { + assert(Capacity > 0 && "adding an edge of zero capacity"); + assert(Src != Dst && "loop edge are not supported"); + + Edge SrcEdge; + SrcEdge.Dst = Dst; + SrcEdge.Cost = Cost; + SrcEdge.Capacity = Capacity; + SrcEdge.Flow = 0; + SrcEdge.RevEdgeIndex = Edges[Dst].size(); + + Edge DstEdge; + DstEdge.Dst = Src; + DstEdge.Cost = -Cost; + DstEdge.Capacity = 0; + DstEdge.Flow = 0; + DstEdge.RevEdgeIndex = Edges[Src].size(); + + Edges[Src].push_back(SrcEdge); + Edges[Dst].push_back(DstEdge); + } + + /// Adding an edge to the network of infinite capacity and a given cost. + void addEdge(uint64_t Src, uint64_t Dst, int64_t Cost) { + addEdge(Src, Dst, INF, Cost); + } + + /// Get the total flow from a given source node. + /// Returns a list of pairs (target node, amount of flow to the target). + const std::vector<std::pair<uint64_t, int64_t>> getFlow(uint64_t Src) const { + std::vector<std::pair<uint64_t, int64_t>> Flow; + for (const auto &Edge : Edges[Src]) { + if (Edge.Flow > 0) + Flow.push_back(std::make_pair(Edge.Dst, Edge.Flow)); + } + return Flow; + } + + /// Get the total flow between a pair of nodes. + int64_t getFlow(uint64_t Src, uint64_t Dst) const { + int64_t Flow = 0; + for (const auto &Edge : Edges[Src]) { + if (Edge.Dst == Dst) { + Flow += Edge.Flow; + } + } + return Flow; + } + +private: + /// Iteratively find an augmentation path/dag in the network and send the + /// flow along its edges. The method returns the number of applied iterations. + size_t applyFlowAugmentation() { + size_t AugmentationIters = 0; + while (findAugmentingPath()) { + uint64_t PathCapacity = computeAugmentingPathCapacity(); + while (PathCapacity > 0) { + bool Progress = false; + if (Params.EvenFlowDistribution) { + // Identify node/edge candidates for augmentation + identifyShortestEdges(PathCapacity); + + // Find an augmenting DAG + auto AugmentingOrder = findAugmentingDAG(); + + // Apply the DAG augmentation + Progress = augmentFlowAlongDAG(AugmentingOrder); + PathCapacity = computeAugmentingPathCapacity(); + } + + if (!Progress) { + augmentFlowAlongPath(PathCapacity); + PathCapacity = 0; + } + + AugmentationIters++; + } + } + return AugmentationIters; + } + + /// Compute the capacity of the cannonical augmenting path. If the path is + /// saturated (that is, no flow can be sent along the path), then return 0. + uint64_t computeAugmentingPathCapacity() { + uint64_t PathCapacity = INF; + uint64_t Now = Target; + while (Now != Source) { + uint64_t Pred = Nodes[Now].ParentNode; + auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex]; + + assert(Edge.Capacity >= Edge.Flow && "incorrect edge flow"); + uint64_t EdgeCapacity = uint64_t(Edge.Capacity - Edge.Flow); + PathCapacity = std::min(PathCapacity, EdgeCapacity); + + Now = Pred; + } + return PathCapacity; + } + + /// Check for existence of an augmenting path with a positive capacity. + bool findAugmentingPath() { + // Initialize data structures + for (auto &Node : Nodes) { + Node.Distance = INF; + Node.ParentNode = uint64_t(-1); + Node.ParentEdgeIndex = uint64_t(-1); + Node.Taken = false; + } + + std::queue<uint64_t> Queue; + Queue.push(Source); + Nodes[Source].Distance = 0; + Nodes[Source].Taken = true; + while (!Queue.empty()) { + uint64_t Src = Queue.front(); + Queue.pop(); + Nodes[Src].Taken = false; + // Although the residual network contains edges with negative costs + // (in particular, backward edges), it can be shown that there are no + // negative-weight cycles and the following two invariants are maintained: + // (i) Dist[Source, V] >= 0 and (ii) Dist[V, Target] >= 0 for all nodes V, + // where Dist is the length of the shortest path between two nodes. This + // allows to prune the search-space of the path-finding algorithm using + // the following early-stop criteria: + // -- If we find a path with zero-distance from Source to Target, stop the + // search, as the path is the shortest since Dist[Source, Target] >= 0; + // -- If we have Dist[Source, V] > Dist[Source, Target], then do not + // process node V, as it is guaranteed _not_ to be on a shortest path + // from Source to Target; it follows from inequalities + // Dist[Source, Target] >= Dist[Source, V] + Dist[V, Target] + // >= Dist[Source, V] + if (!Params.EvenFlowDistribution && Nodes[Target].Distance == 0) + break; + if (Nodes[Src].Distance > Nodes[Target].Distance) + continue; + + // Process adjacent edges + for (uint64_t EdgeIdx = 0; EdgeIdx < Edges[Src].size(); EdgeIdx++) { + auto &Edge = Edges[Src][EdgeIdx]; + if (Edge.Flow < Edge.Capacity) { + uint64_t Dst = Edge.Dst; + int64_t NewDistance = Nodes[Src].Distance + Edge.Cost; + if (Nodes[Dst].Distance > NewDistance) { + // Update the distance and the parent node/edge + Nodes[Dst].Distance = NewDistance; + Nodes[Dst].ParentNode = Src; + Nodes[Dst].ParentEdgeIndex = EdgeIdx; + // Add the node to the queue, if it is not there yet + if (!Nodes[Dst].Taken) { + Queue.push(Dst); + Nodes[Dst].Taken = true; + } + } + } + } + } + + return Nodes[Target].Distance != INF; + } + + /// Update the current flow along the augmenting path. + void augmentFlowAlongPath(uint64_t PathCapacity) { + assert(PathCapacity > 0 && "found an incorrect augmenting path"); + uint64_t Now = Target; + while (Now != Source) { + uint64_t Pred = Nodes[Now].ParentNode; + auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex]; + auto &RevEdge = Edges[Now][Edge.RevEdgeIndex]; + + Edge.Flow += PathCapacity; + RevEdge.Flow -= PathCapacity; + + Now = Pred; + } + } + + /// Find an Augmenting DAG order using a modified version of DFS in which we + /// can visit a node multiple times. In the DFS search, when scanning each + /// edge out of a node, continue search at Edge.Dst endpoint if it has not + /// been discovered yet and its NumCalls < MaxDfsCalls. The algorithm + /// runs in O(MaxDfsCalls * |Edges| + |Nodes|) time. + /// It returns an Augmenting Order (Taken nodes in decreasing Finish time) + /// that starts with Source and ends with Target. + std::vector<uint64_t> findAugmentingDAG() { + // We use a stack based implemenation of DFS to avoid recursion. + // Defining DFS data structures: + // A pair (NodeIdx, EdgeIdx) at the top of the Stack denotes that + // - we are currently visiting Nodes[NodeIdx] and + // - the next edge to scan is Edges[NodeIdx][EdgeIdx] + typedef std::pair<uint64_t, uint64_t> StackItemType; + std::stack<StackItemType> Stack; + std::vector<uint64_t> AugmentingOrder; + + // Phase 0: Initialize Node attributes and Time for DFS run + for (auto &Node : Nodes) { + Node.Discovery = 0; + Node.Finish = 0; + Node.NumCalls = 0; + Node.Taken = false; + } + uint64_t Time = 0; + // Mark Target as Taken + // Taken attribute will be propagated backwards from Target towards Source + Nodes[Target].Taken = true; + + // Phase 1: Start DFS traversal from Source + Stack.emplace(Source, 0); + Nodes[Source].Discovery = ++Time; + while (!Stack.empty()) { + auto NodeIdx = Stack.top().first; + auto EdgeIdx = Stack.top().second; + + // If we haven't scanned all edges out of NodeIdx, continue scanning + if (EdgeIdx < Edges[NodeIdx].size()) { + auto &Edge = Edges[NodeIdx][EdgeIdx]; + auto &Dst = Nodes[Edge.Dst]; + Stack.top().second++; + + if (Edge.OnShortestPath) { + // If we haven't seen Edge.Dst so far, continue DFS search there + if (Dst.Discovery == 0 && Dst.NumCalls < MaxDfsCalls) { + Dst.Discovery = ++Time; + Stack.emplace(Edge.Dst, 0); + Dst.NumCalls++; + } else if (Dst.Taken && Dst.Finish != 0) { + // Else, if Edge.Dst already have a path to Target, so that NodeIdx + Nodes[NodeIdx].Taken = true; + } + } + } else { + // If we are done scanning all edge out of NodeIdx + Stack.pop(); + // If we haven't found a path from NodeIdx to Target, forget about it + if (!Nodes[NodeIdx].Taken) { + Nodes[NodeIdx].Discovery = 0; + } else { + // If we have found a path from NodeIdx to Target, then finish NodeIdx + // and propagate Taken flag to DFS parent unless at the Source + Nodes[NodeIdx].Finish = ++Time; + // NodeIdx == Source if and only if the stack is empty + if (NodeIdx != Source) { + assert(!Stack.empty() && "empty stack while running dfs"); + Nodes[Stack.top().first].Taken = true; + } + AugmentingOrder.push_back(NodeIdx); + } + } + } + // Nodes are collected decreasing Finish time, so the order is reversed + std::reverse(AugmentingOrder.begin(), AugmentingOrder.end()); + + // Phase 2: Extract all forward (DAG) edges and fill in AugmentingEdges + for (size_t Src : AugmentingOrder) { + AugmentingEdges[Src].clear(); + for (auto &Edge : Edges[Src]) { + uint64_t Dst = Edge.Dst; + if (Edge.OnShortestPath && Nodes[Src].Taken && Nodes[Dst].Taken && + Nodes[Dst].Finish < Nodes[Src].Finish) { + AugmentingEdges[Src].push_back(&Edge); + } + } + assert((Src == Target || !AugmentingEdges[Src].empty()) && + "incorrectly constructed augmenting edges"); + } + + return AugmentingOrder; + } + + /// Update the current flow along the given (acyclic) subgraph specified by + /// the vertex order, AugmentingOrder. The objective is to send as much flow + /// as possible while evenly distributing flow among successors of each node. + /// After the update at least one edge is saturated. + bool augmentFlowAlongDAG(const std::vector<uint64_t> &AugmentingOrder) { + // Phase 0: Initialization + for (uint64_t Src : AugmentingOrder) { + Nodes[Src].FracFlow = 0; + Nodes[Src].IntFlow = 0; + for (auto &Edge : AugmentingEdges[Src]) { + Edge->AugmentedFlow = 0; + } + } + + // Phase 1: Send a unit of fractional flow along the DAG + uint64_t MaxFlowAmount = INF; + Nodes[Source].FracFlow = 1.0; + for (uint64_t Src : AugmentingOrder) { + assert((Src == Target || Nodes[Src].FracFlow > 0.0) && + "incorrectly computed fractional flow"); + // Distribute flow evenly among successors of Src + uint64_t Degree = AugmentingEdges[Src].size(); + for (auto &Edge : AugmentingEdges[Src]) { + double EdgeFlow = Nodes[Src].FracFlow / Degree; + Nodes[Edge->Dst].FracFlow += EdgeFlow; + if (Edge->Capacity == INF) + continue; + uint64_t MaxIntFlow = double(Edge->Capacity - Edge->Flow) / EdgeFlow; + MaxFlowAmount = std::min(MaxFlowAmount, MaxIntFlow); + } + } + // Stop early if we cannot send any (integral) flow from Source to Target + if (MaxFlowAmount == 0) + return false; + + // Phase 2: Send an integral flow of MaxFlowAmount + Nodes[Source].IntFlow = MaxFlowAmount; + for (uint64_t Src : AugmentingOrder) { + if (Src == Target) + break; + // Distribute flow evenly among successors of Src, rounding up to make + // sure all flow is sent + uint64_t Degree = AugmentingEdges[Src].size(); + // We are guaranteeed that Node[Src].IntFlow <= SuccFlow * Degree + uint64_t SuccFlow = (Nodes[Src].IntFlow + Degree - 1) / Degree; + for (auto &Edge : AugmentingEdges[Src]) { + uint64_t Dst = Edge->Dst; + uint64_t EdgeFlow = std::min(Nodes[Src].IntFlow, SuccFlow); + EdgeFlow = std::min(EdgeFlow, uint64_t(Edge->Capacity - Edge->Flow)); + Nodes[Dst].IntFlow += EdgeFlow; + Nodes[Src].IntFlow -= EdgeFlow; + Edge->AugmentedFlow += EdgeFlow; + } + } + assert(Nodes[Target].IntFlow <= MaxFlowAmount); + Nodes[Target].IntFlow = 0; + + // Phase 3: Send excess flow back traversing the nodes backwards. + // Because of rounding, not all flow can be sent along the edges of Src. + // Hence, sending the remaining flow back to maintain flow conservation + for (size_t Idx = AugmentingOrder.size() - 1; Idx > 0; Idx--) { + uint64_t Src = AugmentingOrder[Idx - 1]; + // Try to send excess flow back along each edge. + // Make sure we only send back flow we just augmented (AugmentedFlow). + for (auto &Edge : AugmentingEdges[Src]) { + uint64_t Dst = Edge->Dst; + if (Nodes[Dst].IntFlow == 0) + continue; + uint64_t EdgeFlow = std::min(Nodes[Dst].IntFlow, Edge->AugmentedFlow); + Nodes[Dst].IntFlow -= EdgeFlow; + Nodes[Src].IntFlow += EdgeFlow; + Edge->AugmentedFlow -= EdgeFlow; + } + } + + // Phase 4: Update flow values along all edges + bool HasSaturatedEdges = false; + for (uint64_t Src : AugmentingOrder) { + // Verify that we have sent all the excess flow from the node + assert(Src == Source || Nodes[Src].IntFlow == 0); + for (auto &Edge : AugmentingEdges[Src]) { + assert(uint64_t(Edge->Capacity - Edge->Flow) >= Edge->AugmentedFlow); + // Update flow values along the edge and its reverse copy + auto &RevEdge = Edges[Edge->Dst][Edge->RevEdgeIndex]; + Edge->Flow += Edge->AugmentedFlow; + RevEdge.Flow -= Edge->AugmentedFlow; + if (Edge->Capacity == Edge->Flow && Edge->AugmentedFlow > 0) + HasSaturatedEdges = true; + } + } + + // The augmentation is successful iff at least one edge becomes saturated + return HasSaturatedEdges; + } + + /// Identify candidate (shortest) edges for augmentation. + void identifyShortestEdges(uint64_t PathCapacity) { + assert(PathCapacity > 0 && "found an incorrect augmenting DAG"); + // To make sure the augmentation DAG contains only edges with large residual + // capacity, we prune all edges whose capacity is below a fraction of + // the capacity of the augmented path. + // (All edges of the path itself are always in the DAG) + uint64_t MinCapacity = std::max(PathCapacity / 2, uint64_t(1)); + + // Decide which edges are on a shortest path from Source to Target + for (size_t Src = 0; Src < Nodes.size(); Src++) { + // An edge cannot be augmenting if the endpoint has large distance + if (Nodes[Src].Distance > Nodes[Target].Distance) + continue; + + for (auto &Edge : Edges[Src]) { + uint64_t Dst = Edge.Dst; + Edge.OnShortestPath = + Src != Target && Dst != Source && + Nodes[Dst].Distance <= Nodes[Target].Distance && + Nodes[Dst].Distance == Nodes[Src].Distance + Edge.Cost && + Edge.Capacity > Edge.Flow && + uint64_t(Edge.Capacity - Edge.Flow) >= MinCapacity; + } + } + } + + /// Maximum number of DFS iterations for DAG finding. + static constexpr uint64_t MaxDfsCalls = 10; + + /// A node in a flow network. + struct Node { + /// The cost of the cheapest path from the source to the current node. + int64_t Distance; + /// The node preceding the current one in the path. + uint64_t ParentNode; + /// The index of the edge between ParentNode and the current node. + uint64_t ParentEdgeIndex; + /// An indicator of whether the current node is in a queue. + bool Taken; + + /// Data fields utilized in DAG-augmentation: + /// Fractional flow. + double FracFlow; + /// Integral flow. + uint64_t IntFlow; + /// Discovery time. + uint64_t Discovery; + /// Finish time. + uint64_t Finish; + /// NumCalls. + uint64_t NumCalls; + }; + + /// An edge in a flow network. + struct Edge { + /// The cost of the edge. + int64_t Cost; + /// The capacity of the edge. + int64_t Capacity; + /// The current flow on the edge. + int64_t Flow; + /// The destination node of the edge. + uint64_t Dst; + /// The index of the reverse edge between Dst and the current node. + uint64_t RevEdgeIndex; + + /// Data fields utilized in DAG-augmentation: + /// Whether the edge is currently on a shortest path from Source to Target. + bool OnShortestPath; + /// Extra flow along the edge. + uint64_t AugmentedFlow; + }; + + /// The set of network nodes. + std::vector<Node> Nodes; + /// The set of network edges. + std::vector<std::vector<Edge>> Edges; + /// Source node of the flow. + uint64_t Source; + /// Target (sink) node of the flow. + uint64_t Target; + /// Augmenting edges. + std::vector<std::vector<Edge *>> AugmentingEdges; + /// Params for flow computation. + const ProfiParams &Params; +}; + +/// A post-processing adjustment of the control flow. It applies two steps by +/// rerouting some flow and making it more realistic: +/// +/// - First, it removes all isolated components ("islands") with a positive flow +/// that are unreachable from the entry block. For every such component, we +/// find the shortest from the entry to an exit passing through the component, +/// and increase the flow by one unit along the path. +/// +/// - Second, it identifies all "unknown subgraphs" consisting of basic blocks +/// with no sampled counts. Then it rebalnces the flow that goes through such +/// a subgraph so that each branch is taken with probability 50%. +/// An unknown subgraph is such that for every two nodes u and v: +/// - u dominates v and u is not unknown; +/// - v post-dominates u; and +/// - all inner-nodes of all (u,v)-paths are unknown. +/// +class FlowAdjuster { +public: + FlowAdjuster(const ProfiParams &Params, FlowFunction &Func) + : Params(Params), Func(Func) {} + + /// Apply the post-processing. + void run() { + if (Params.JoinIslands) { + // Adjust the flow to get rid of isolated components + joinIsolatedComponents(); + } + + if (Params.RebalanceUnknown) { + // Rebalance the flow inside unknown subgraphs + rebalanceUnknownSubgraphs(); + } + } + +private: + void joinIsolatedComponents() { + // Find blocks that are reachable from the source + auto Visited = BitVector(NumBlocks(), false); + findReachable(Func.Entry, Visited); + + // Iterate over all non-reachable blocks and adjust their weights + for (uint64_t I = 0; I < NumBlocks(); I++) { + auto &Block = Func.Blocks[I]; + if (Block.Flow > 0 && !Visited[I]) { + // Find a path from the entry to an exit passing through the block I + auto Path = findShortestPath(I); + // Increase the flow along the path + assert(Path.size() > 0 && Path[0]->Source == Func.Entry && + "incorrectly computed path adjusting control flow"); + Func.Blocks[Func.Entry].Flow += 1; + for (auto &Jump : Path) { + Jump->Flow += 1; + Func.Blocks[Jump->Target].Flow += 1; + // Update reachability + findReachable(Jump->Target, Visited); + } + } + } + } + + /// Run BFS from a given block along the jumps with a positive flow and mark + /// all reachable blocks. + void findReachable(uint64_t Src, BitVector &Visited) { + if (Visited[Src]) + return; + std::queue<uint64_t> Queue; + Queue.push(Src); + Visited[Src] = true; + while (!Queue.empty()) { + Src = Queue.front(); + Queue.pop(); + for (auto *Jump : Func.Blocks[Src].SuccJumps) { + uint64_t Dst = Jump->Target; + if (Jump->Flow > 0 && !Visited[Dst]) { + Queue.push(Dst); + Visited[Dst] = true; + } + } + } + } + + /// Find the shortest path from the entry block to an exit block passing + /// through a given block. + std::vector<FlowJump *> findShortestPath(uint64_t BlockIdx) { + // A path from the entry block to BlockIdx + auto ForwardPath = findShortestPath(Func.Entry, BlockIdx); + // A path from BlockIdx to an exit block + auto BackwardPath = findShortestPath(BlockIdx, AnyExitBlock); + + // Concatenate the two paths + std::vector<FlowJump *> Result; + Result.insert(Result.end(), ForwardPath.begin(), ForwardPath.end()); + Result.insert(Result.end(), BackwardPath.begin(), BackwardPath.end()); + return Result; + } + + /// Apply the Dijkstra algorithm to find the shortest path from a given + /// Source to a given Target block. + /// If Target == -1, then the path ends at an exit block. + std::vector<FlowJump *> findShortestPath(uint64_t Source, uint64_t Target) { + // Quit early, if possible + if (Source == Target) + return std::vector<FlowJump *>(); + if (Func.Blocks[Source].isExit() && Target == AnyExitBlock) + return std::vector<FlowJump *>(); + + // Initialize data structures + auto Distance = std::vector<int64_t>(NumBlocks(), INF); + auto Parent = std::vector<FlowJump *>(NumBlocks(), nullptr); + Distance[Source] = 0; + std::set<std::pair<uint64_t, uint64_t>> Queue; + Queue.insert(std::make_pair(Distance[Source], Source)); + + // Run the Dijkstra algorithm + while (!Queue.empty()) { + uint64_t Src = Queue.begin()->second; + Queue.erase(Queue.begin()); + // If we found a solution, quit early + if (Src == Target || + (Func.Blocks[Src].isExit() && Target == AnyExitBlock)) + break; + + for (auto *Jump : Func.Blocks[Src].SuccJumps) { + uint64_t Dst = Jump->Target; + int64_t JumpDist = jumpDistance(Jump); + if (Distance[Dst] > Distance[Src] + JumpDist) { + Queue.erase(std::make_pair(Distance[Dst], Dst)); + + Distance[Dst] = Distance[Src] + JumpDist; + Parent[Dst] = Jump; + + Queue.insert(std::make_pair(Distance[Dst], Dst)); + } + } + } + // If Target is not provided, find the closest exit block + if (Target == AnyExitBlock) { + for (uint64_t I = 0; I < NumBlocks(); I++) { + if (Func.Blocks[I].isExit() && Parent[I] != nullptr) { + if (Target == AnyExitBlock || Distance[Target] > Distance[I]) { + Target = I; + } + } + } + } + assert(Parent[Target] != nullptr && "a path does not exist"); + + // Extract the constructed path + std::vector<FlowJump *> Result; + uint64_t Now = Target; + while (Now != Source) { + assert(Now == Parent[Now]->Target && "incorrect parent jump"); + Result.push_back(Parent[Now]); + Now = Parent[Now]->Source; + } + // Reverse the path, since it is extracted from Target to Source + std::reverse(Result.begin(), Result.end()); + return Result; + } + + /// A distance of a path for a given jump. + /// In order to incite the path to use blocks/jumps with large positive flow, + /// and avoid changing branch probability of outgoing edges drastically, + /// set the jump distance so as: + /// - to minimize the number of unlikely jumps used and subject to that, + /// - to minimize the number of Flow == 0 jumps used and subject to that, + /// - minimizes total multiplicative Flow increase for the remaining edges. + /// To capture this objective with integer distances, we round off fractional + /// parts to a multiple of 1 / BaseDistance. + int64_t jumpDistance(FlowJump *Jump) const { + if (Jump->IsUnlikely) + return Params.CostUnlikely; + uint64_t BaseDistance = + std::max(FlowAdjuster::MinBaseDistance, + std::min(Func.Blocks[Func.Entry].Flow, + Params.CostUnlikely / (2 * (NumBlocks() + 1)))); + if (Jump->Flow > 0) + return BaseDistance + BaseDistance / Jump->Flow; + return 2 * BaseDistance * (NumBlocks() + 1); + }; + + uint64_t NumBlocks() const { return Func.Blocks.size(); } + + /// Rebalance unknown subgraphs so that the flow is split evenly across the + /// outgoing branches of every block of the subgraph. The method iterates over + /// blocks with known weight and identifies unknown subgraphs rooted at the + /// blocks. Then it verifies if flow rebalancing is feasible and applies it. + void rebalanceUnknownSubgraphs() { + // Try to find unknown subgraphs from each block + for (const FlowBlock &SrcBlock : Func.Blocks) { + // Verify if rebalancing rooted at SrcBlock is feasible + if (!canRebalanceAtRoot(&SrcBlock)) + continue; + + // Find an unknown subgraphs starting at SrcBlock. Along the way, + // fill in known destinations and intermediate unknown blocks. + std::vector<FlowBlock *> UnknownBlocks; + std::vector<FlowBlock *> KnownDstBlocks; + findUnknownSubgraph(&SrcBlock, KnownDstBlocks, UnknownBlocks); + + // Verify if rebalancing of the subgraph is feasible. If the search is + // successful, find the unique destination block (which can be null) + FlowBlock *DstBlock = nullptr; + if (!canRebalanceSubgraph(&SrcBlock, KnownDstBlocks, UnknownBlocks, + DstBlock)) + continue; + + // We cannot rebalance subgraphs containing cycles among unknown blocks + if (!isAcyclicSubgraph(&SrcBlock, DstBlock, UnknownBlocks)) + continue; + + // Rebalance the flow + rebalanceUnknownSubgraph(&SrcBlock, DstBlock, UnknownBlocks); + } + } + + /// Verify if rebalancing rooted at a given block is possible. + bool canRebalanceAtRoot(const FlowBlock *SrcBlock) { + // Do not attempt to find unknown subgraphs from an unknown or a + // zero-flow block + if (SrcBlock->HasUnknownWeight || SrcBlock->Flow == 0) + return false; + + // Do not attempt to process subgraphs from a block w/o unknown sucessors + bool HasUnknownSuccs = false; + for (auto *Jump : SrcBlock->SuccJumps) { + if (Func.Blocks[Jump->Target].HasUnknownWeight) { + HasUnknownSuccs = true; + break; + } + } + if (!HasUnknownSuccs) + return false; + + return true; + } + + /// Find an unknown subgraph starting at block SrcBlock. The method sets + /// identified destinations, KnownDstBlocks, and intermediate UnknownBlocks. + void findUnknownSubgraph(const FlowBlock *SrcBlock, + std::vector<FlowBlock *> &KnownDstBlocks, + std::vector<FlowBlock *> &UnknownBlocks) { + // Run BFS from SrcBlock and make sure all paths are going through unknown + // blocks and end at a known DstBlock + auto Visited = BitVector(NumBlocks(), false); + std::queue<uint64_t> Queue; + + Queue.push(SrcBlock->Index); + Visited[SrcBlock->Index] = true; + while (!Queue.empty()) { + auto &Block = Func.Blocks[Queue.front()]; + Queue.pop(); + // Process blocks reachable from Block + for (auto *Jump : Block.SuccJumps) { + // If Jump can be ignored, skip it + if (ignoreJump(SrcBlock, nullptr, Jump)) + continue; + + uint64_t Dst = Jump->Target; + // If Dst has been visited, skip Jump + if (Visited[Dst]) + continue; + // Process block Dst + Visited[Dst] = true; + if (!Func.Blocks[Dst].HasUnknownWeight) { + KnownDstBlocks.push_back(&Func.Blocks[Dst]); + } else { + Queue.push(Dst); + UnknownBlocks.push_back(&Func.Blocks[Dst]); + } + } + } + } + + /// Verify if rebalancing of the subgraph is feasible. If the checks are + /// successful, set the unique destination block, DstBlock (can be null). + bool canRebalanceSubgraph(const FlowBlock *SrcBlock, + const std::vector<FlowBlock *> &KnownDstBlocks, + const std::vector<FlowBlock *> &UnknownBlocks, + FlowBlock *&DstBlock) { + // If the list of unknown blocks is empty, we don't need rebalancing + if (UnknownBlocks.empty()) + return false; + + // If there are multiple known sinks, we can't rebalance + if (KnownDstBlocks.size() > 1) + return false; + DstBlock = KnownDstBlocks.empty() ? nullptr : KnownDstBlocks.front(); + + // Verify sinks of the subgraph + for (auto *Block : UnknownBlocks) { + if (Block->SuccJumps.empty()) { + // If there are multiple (known and unknown) sinks, we can't rebalance + if (DstBlock != nullptr) + return false; + continue; + } + size_t NumIgnoredJumps = 0; + for (auto *Jump : Block->SuccJumps) { + if (ignoreJump(SrcBlock, DstBlock, Jump)) + NumIgnoredJumps++; + } + // If there is a non-sink block in UnknownBlocks with all jumps ignored, + // then we can't rebalance + if (NumIgnoredJumps == Block->SuccJumps.size()) + return false; + } + + return true; + } + + /// Decide whether the Jump is ignored while processing an unknown subgraphs + /// rooted at basic block SrcBlock with the destination block, DstBlock. + bool ignoreJump(const FlowBlock *SrcBlock, const FlowBlock *DstBlock, + const FlowJump *Jump) { + // Ignore unlikely jumps with zero flow + if (Jump->IsUnlikely && Jump->Flow == 0) + return true; + + auto JumpSource = &Func.Blocks[Jump->Source]; + auto JumpTarget = &Func.Blocks[Jump->Target]; + + // Do not ignore jumps coming into DstBlock + if (DstBlock != nullptr && JumpTarget == DstBlock) + return false; + + // Ignore jumps out of SrcBlock to known blocks + if (!JumpTarget->HasUnknownWeight && JumpSource == SrcBlock) + return true; + + // Ignore jumps to known blocks with zero flow + if (!JumpTarget->HasUnknownWeight && JumpTarget->Flow == 0) + return true; + + return false; + } + + /// Verify if the given unknown subgraph is acyclic, and if yes, reorder + /// UnknownBlocks in the topological order (so that all jumps are "forward"). + bool isAcyclicSubgraph(const FlowBlock *SrcBlock, const FlowBlock *DstBlock, + std::vector<FlowBlock *> &UnknownBlocks) { + // Extract local in-degrees in the considered subgraph + auto LocalInDegree = std::vector<uint64_t>(NumBlocks(), 0); + auto fillInDegree = [&](const FlowBlock *Block) { + for (auto *Jump : Block->SuccJumps) { + if (ignoreJump(SrcBlock, DstBlock, Jump)) + continue; + LocalInDegree[Jump->Target]++; + } + }; + fillInDegree(SrcBlock); + for (auto *Block : UnknownBlocks) { + fillInDegree(Block); + } + // A loop containing SrcBlock + if (LocalInDegree[SrcBlock->Index] > 0) + return false; + + std::vector<FlowBlock *> AcyclicOrder; + std::queue<uint64_t> Queue; + Queue.push(SrcBlock->Index); + while (!Queue.empty()) { + FlowBlock *Block = &Func.Blocks[Queue.front()]; + Queue.pop(); + // Stop propagation once we reach DstBlock, if any + if (DstBlock != nullptr && Block == DstBlock) + break; + + // Keep an acyclic order of unknown blocks + if (Block->HasUnknownWeight && Block != SrcBlock) + AcyclicOrder.push_back(Block); + + // Add to the queue all successors with zero local in-degree + for (auto *Jump : Block->SuccJumps) { + if (ignoreJump(SrcBlock, DstBlock, Jump)) + continue; + uint64_t Dst = Jump->Target; + LocalInDegree[Dst]--; + if (LocalInDegree[Dst] == 0) { + Queue.push(Dst); + } + } + } + + // If there is a cycle in the subgraph, AcyclicOrder contains only a subset + // of all blocks + if (UnknownBlocks.size() != AcyclicOrder.size()) + return false; + UnknownBlocks = AcyclicOrder; + return true; + } + + /// Rebalance a given subgraph rooted at SrcBlock, ending at DstBlock and + /// having UnknownBlocks intermediate blocks. + void rebalanceUnknownSubgraph(const FlowBlock *SrcBlock, + const FlowBlock *DstBlock, + const std::vector<FlowBlock *> &UnknownBlocks) { + assert(SrcBlock->Flow > 0 && "zero-flow block in unknown subgraph"); + + // Ditribute flow from the source block + uint64_t BlockFlow = 0; + // SrcBlock's flow is the sum of outgoing flows along non-ignored jumps + for (auto *Jump : SrcBlock->SuccJumps) { + if (ignoreJump(SrcBlock, DstBlock, Jump)) + continue; + BlockFlow += Jump->Flow; + } + rebalanceBlock(SrcBlock, DstBlock, SrcBlock, BlockFlow); + + // Ditribute flow from the remaining blocks + for (auto *Block : UnknownBlocks) { + assert(Block->HasUnknownWeight && "incorrect unknown subgraph"); + uint64_t BlockFlow = 0; + // Block's flow is the sum of incoming flows + for (auto *Jump : Block->PredJumps) { + BlockFlow += Jump->Flow; + } + Block->Flow = BlockFlow; + rebalanceBlock(SrcBlock, DstBlock, Block, BlockFlow); + } + } + + /// Redistribute flow for a block in a subgraph rooted at SrcBlock, + /// and ending at DstBlock. + void rebalanceBlock(const FlowBlock *SrcBlock, const FlowBlock *DstBlock, + const FlowBlock *Block, uint64_t BlockFlow) { + // Process all successor jumps and update corresponding flow values + size_t BlockDegree = 0; + for (auto *Jump : Block->SuccJumps) { + if (ignoreJump(SrcBlock, DstBlock, Jump)) + continue; + BlockDegree++; + } + // If all successor jumps of the block are ignored, skip it + if (DstBlock == nullptr && BlockDegree == 0) + return; + assert(BlockDegree > 0 && "all outgoing jumps are ignored"); + + // Each of the Block's successors gets the following amount of flow. + // Rounding the value up so that all flow is propagated + uint64_t SuccFlow = (BlockFlow + BlockDegree - 1) / BlockDegree; + for (auto *Jump : Block->SuccJumps) { + if (ignoreJump(SrcBlock, DstBlock, Jump)) + continue; + uint64_t Flow = std::min(SuccFlow, BlockFlow); + Jump->Flow = Flow; + BlockFlow -= Flow; + } + assert(BlockFlow == 0 && "not all flow is propagated"); + } + + /// A constant indicating an arbitrary exit block of a function. + static constexpr uint64_t AnyExitBlock = uint64_t(-1); + /// Minimum BaseDistance for the jump distance values in island joining. + static constexpr uint64_t MinBaseDistance = 10000; + + /// Params for flow computation. + const ProfiParams &Params; + /// The function. + FlowFunction &Func; +}; + +std::pair<int64_t, int64_t> assignBlockCosts(const ProfiParams &Params, + const FlowBlock &Block); +std::pair<int64_t, int64_t> assignJumpCosts(const ProfiParams &Params, + const FlowJump &Jump); + +/// Initializing flow network for a given function. +/// +/// Every block is split into two nodes that are responsible for (i) an +/// incoming flow, (ii) an outgoing flow; they penalize an increase or a +/// reduction of the block weight. +void initializeNetwork(const ProfiParams &Params, MinCostMaxFlow &Network, + FlowFunction &Func) { + uint64_t NumBlocks = Func.Blocks.size(); + assert(NumBlocks > 1 && "Too few blocks in a function"); + uint64_t NumJumps = Func.Jumps.size(); + assert(NumJumps > 0 && "Too few jumps in a function"); + + // Introducing dummy source/sink pairs to allow flow circulation. + // The nodes corresponding to blocks of the function have indicies in + // the range [0 .. 2 * NumBlocks); the dummy sources/sinks are indexed by the + // next four values. + uint64_t S = 2 * NumBlocks; + uint64_t T = S + 1; + uint64_t S1 = S + 2; + uint64_t T1 = S + 3; + + Network.initialize(2 * NumBlocks + 4, S1, T1); + + // Initialize nodes of the flow network + for (uint64_t B = 0; B < NumBlocks; B++) { + auto &Block = Func.Blocks[B]; + + // Split every block into two auxiliary nodes to allow + // increase/reduction of the block count. + uint64_t Bin = 2 * B; + uint64_t Bout = 2 * B + 1; + + // Edges from S and to T + if (Block.isEntry()) { + Network.addEdge(S, Bin, 0); + } else if (Block.isExit()) { + Network.addEdge(Bout, T, 0); + } + + // Assign costs for increasing/decreasing the block counts + auto [AuxCostInc, AuxCostDec] = assignBlockCosts(Params, Block); + + // Add the corresponding edges to the network + Network.addEdge(Bin, Bout, AuxCostInc); + if (Block.Weight > 0) { + Network.addEdge(Bout, Bin, Block.Weight, AuxCostDec); + Network.addEdge(S1, Bout, Block.Weight, 0); + Network.addEdge(Bin, T1, Block.Weight, 0); + } + } + + // Initialize edges of the flow network + for (uint64_t J = 0; J < NumJumps; J++) { + auto &Jump = Func.Jumps[J]; + + // Get the endpoints corresponding to the jump + uint64_t Jin = 2 * Jump.Source + 1; + uint64_t Jout = 2 * Jump.Target; + + // Assign costs for increasing/decreasing the jump counts + auto [AuxCostInc, AuxCostDec] = assignJumpCosts(Params, Jump); + + // Add the corresponding edges to the network + Network.addEdge(Jin, Jout, AuxCostInc); + if (Jump.Weight > 0) { + Network.addEdge(Jout, Jin, Jump.Weight, AuxCostDec); + Network.addEdge(S1, Jout, Jump.Weight, 0); + Network.addEdge(Jin, T1, Jump.Weight, 0); + } + } + + // Make sure we have a valid flow circulation + Network.addEdge(T, S, 0); +} + +/// Assign costs for increasing/decreasing the block counts. +std::pair<int64_t, int64_t> assignBlockCosts(const ProfiParams &Params, + const FlowBlock &Block) { + // Modifying the weight of an unlikely block is expensive + if (Block.IsUnlikely) + return std::make_pair(Params.CostUnlikely, Params.CostUnlikely); + + // Assign default values for the costs + int64_t CostInc = Params.CostBlockInc; + int64_t CostDec = Params.CostBlockDec; + // Update the costs depending on the block metadata + if (Block.HasUnknownWeight) { + CostInc = Params.CostBlockUnknownInc; + CostDec = 0; + } else { + // Increasing the count for "cold" blocks with zero initial count is more + // expensive than for "hot" ones + if (Block.Weight == 0) + CostInc = Params.CostBlockZeroInc; + // Modifying the count of the entry block is expensive + if (Block.isEntry()) { + CostInc = Params.CostBlockEntryInc; + CostDec = Params.CostBlockEntryDec; + } + } + return std::make_pair(CostInc, CostDec); +} + +/// Assign costs for increasing/decreasing the jump counts. +std::pair<int64_t, int64_t> assignJumpCosts(const ProfiParams &Params, + const FlowJump &Jump) { + // Modifying the weight of an unlikely jump is expensive + if (Jump.IsUnlikely) + return std::make_pair(Params.CostUnlikely, Params.CostUnlikely); + + // Assign default values for the costs + int64_t CostInc = Params.CostJumpInc; + int64_t CostDec = Params.CostJumpDec; + // Update the costs depending on the block metadata + if (Jump.Source + 1 == Jump.Target) { + // Adjusting the fall-through branch + CostInc = Params.CostJumpFTInc; + CostDec = Params.CostJumpFTDec; + } + if (Jump.HasUnknownWeight) { + // The cost is different for fall-through and non-fall-through branches + if (Jump.Source + 1 == Jump.Target) + CostInc = Params.CostJumpUnknownFTInc; + else + CostInc = Params.CostJumpUnknownInc; + CostDec = 0; + } else { + assert(Jump.Weight > 0 && "found zero-weight jump with a positive weight"); + } + return std::make_pair(CostInc, CostDec); +} + +/// Extract resulting block and edge counts from the flow network. +void extractWeights(const ProfiParams &Params, MinCostMaxFlow &Network, + FlowFunction &Func) { + uint64_t NumBlocks = Func.Blocks.size(); + uint64_t NumJumps = Func.Jumps.size(); + + // Extract resulting jump counts + for (uint64_t J = 0; J < NumJumps; J++) { + auto &Jump = Func.Jumps[J]; + uint64_t SrcOut = 2 * Jump.Source + 1; + uint64_t DstIn = 2 * Jump.Target; + + int64_t Flow = 0; + int64_t AuxFlow = Network.getFlow(SrcOut, DstIn); + if (Jump.Source != Jump.Target) + Flow = int64_t(Jump.Weight) + AuxFlow; + else + Flow = int64_t(Jump.Weight) + (AuxFlow > 0 ? AuxFlow : 0); + + Jump.Flow = Flow; + assert(Flow >= 0 && "negative jump flow"); + } + + // Extract resulting block counts + auto InFlow = std::vector<uint64_t>(NumBlocks, 0); + auto OutFlow = std::vector<uint64_t>(NumBlocks, 0); + for (auto &Jump : Func.Jumps) { + InFlow[Jump.Target] += Jump.Flow; + OutFlow[Jump.Source] += Jump.Flow; + } + for (uint64_t B = 0; B < NumBlocks; B++) { + auto &Block = Func.Blocks[B]; + Block.Flow = std::max(OutFlow[B], InFlow[B]); + } +} + +#ifndef NDEBUG +/// Verify that the provided block/jump weights are as expected. +void verifyInput(const FlowFunction &Func) { + // Verify the entry block + assert(Func.Entry == 0 && Func.Blocks[0].isEntry()); + for (size_t I = 1; I < Func.Blocks.size(); I++) { + assert(!Func.Blocks[I].isEntry() && "multiple entry blocks"); + } + // Verify CFG jumps + for (auto &Block : Func.Blocks) { + assert((!Block.isEntry() || !Block.isExit()) && + "a block cannot be an entry and an exit"); + } + // Verify input block weights + for (auto &Block : Func.Blocks) { + assert((!Block.HasUnknownWeight || Block.Weight == 0 || Block.isEntry()) && + "non-zero weight of a block w/o weight except for an entry"); + } + // Verify input jump weights + for (auto &Jump : Func.Jumps) { + assert((!Jump.HasUnknownWeight || Jump.Weight == 0) && + "non-zero weight of a jump w/o weight"); + } +} + +/// Verify that the computed flow values satisfy flow conservation rules. +void verifyOutput(const FlowFunction &Func) { + const uint64_t NumBlocks = Func.Blocks.size(); + auto InFlow = std::vector<uint64_t>(NumBlocks, 0); + auto OutFlow = std::vector<uint64_t>(NumBlocks, 0); + for (const auto &Jump : Func.Jumps) { + InFlow[Jump.Target] += Jump.Flow; + OutFlow[Jump.Source] += Jump.Flow; + } + + uint64_t TotalInFlow = 0; + uint64_t TotalOutFlow = 0; + for (uint64_t I = 0; I < NumBlocks; I++) { + auto &Block = Func.Blocks[I]; + if (Block.isEntry()) { + TotalInFlow += Block.Flow; + assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow"); + } else if (Block.isExit()) { + TotalOutFlow += Block.Flow; + assert(Block.Flow == InFlow[I] && "incorrectly computed control flow"); + } else { + assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow"); + assert(Block.Flow == InFlow[I] && "incorrectly computed control flow"); + } + } + assert(TotalInFlow == TotalOutFlow && "incorrectly computed control flow"); + + // Verify that there are no isolated flow components + // One could modify FlowFunction to hold edges indexed by the sources, which + // will avoid a creation of the object + auto PositiveFlowEdges = std::vector<std::vector<uint64_t>>(NumBlocks); + for (const auto &Jump : Func.Jumps) { + if (Jump.Flow > 0) { + PositiveFlowEdges[Jump.Source].push_back(Jump.Target); + } + } + + // Run BFS from the source along edges with positive flow + std::queue<uint64_t> Queue; + auto Visited = BitVector(NumBlocks, false); + Queue.push(Func.Entry); + Visited[Func.Entry] = true; + while (!Queue.empty()) { + uint64_t Src = Queue.front(); + Queue.pop(); + for (uint64_t Dst : PositiveFlowEdges[Src]) { + if (!Visited[Dst]) { + Queue.push(Dst); + Visited[Dst] = true; + } + } + } + + // Verify that every block that has a positive flow is reached from the source + // along edges with a positive flow + for (uint64_t I = 0; I < NumBlocks; I++) { + auto &Block = Func.Blocks[I]; + assert((Visited[I] || Block.Flow == 0) && "an isolated flow component"); + } +} +#endif + +} // end of anonymous namespace + +/// Apply the profile inference algorithm for a given function +void llvm::applyFlowInference(const ProfiParams &Params, FlowFunction &Func) { +#ifndef NDEBUG + // Verify the input data + verifyInput(Func); +#endif + + // Create and apply an inference network model + auto InferenceNetwork = MinCostMaxFlow(Params); + initializeNetwork(Params, InferenceNetwork, Func); + InferenceNetwork.run(); + + // Extract flow values for every block and every edge + extractWeights(Params, InferenceNetwork, Func); + + // Post-processing adjustments to the flow + auto Adjuster = FlowAdjuster(Params, Func); + Adjuster.run(); + +#ifndef NDEBUG + // Verify the result + verifyOutput(Func); +#endif +} + +/// Apply the profile inference algorithm for a given flow function +void llvm::applyFlowInference(FlowFunction &Func) { + ProfiParams Params; + // Set the params from the command-line flags. + Params.EvenFlowDistribution = SampleProfileEvenFlowDistribution; + Params.RebalanceUnknown = SampleProfileRebalanceUnknown; + Params.JoinIslands = SampleProfileJoinIslands; + Params.CostBlockInc = SampleProfileProfiCostBlockInc; + Params.CostBlockDec = SampleProfileProfiCostBlockDec; + Params.CostBlockEntryInc = SampleProfileProfiCostBlockEntryInc; + Params.CostBlockEntryDec = SampleProfileProfiCostBlockEntryDec; + Params.CostBlockZeroInc = SampleProfileProfiCostBlockZeroInc; + Params.CostBlockUnknownInc = SampleProfileProfiCostBlockUnknownInc; + + applyFlowInference(Params, Func); +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp new file mode 100644 index 0000000000..f7ae6ad844 --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp @@ -0,0 +1,185 @@ +//===- SampleProfileLoaderBaseUtil.cpp - Profile loader Util func ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the SampleProfileLoader base utility functions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Module.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" + +namespace llvm { + +cl::opt<unsigned> SampleProfileMaxPropagateIterations( + "sample-profile-max-propagate-iterations", cl::init(100), + cl::desc("Maximum number of iterations to go through when propagating " + "sample block/edge weights through the CFG.")); + +cl::opt<unsigned> SampleProfileRecordCoverage( + "sample-profile-check-record-coverage", cl::init(0), cl::value_desc("N"), + cl::desc("Emit a warning if less than N% of records in the input profile " + "are matched to the IR.")); + +cl::opt<unsigned> SampleProfileSampleCoverage( + "sample-profile-check-sample-coverage", cl::init(0), cl::value_desc("N"), + cl::desc("Emit a warning if less than N% of samples in the input profile " + "are matched to the IR.")); + +cl::opt<bool> NoWarnSampleUnused( + "no-warn-sample-unused", cl::init(false), cl::Hidden, + cl::desc("Use this option to turn off/on warnings about function with " + "samples but without debug information to use those samples. ")); + +cl::opt<bool> SampleProfileUseProfi( + "sample-profile-use-profi", cl::Hidden, + cl::desc("Use profi to infer block and edge counts.")); + +namespace sampleprofutil { + +/// Return true if the given callsite is hot wrt to hot cutoff threshold. +/// +/// Functions that were inlined in the original binary will be represented +/// in the inline stack in the sample profile. If the profile shows that +/// the original inline decision was "good" (i.e., the callsite is executed +/// frequently), then we will recreate the inline decision and apply the +/// profile from the inlined callsite. +/// +/// To decide whether an inlined callsite is hot, we compare the callsite +/// sample count with the hot cutoff computed by ProfileSummaryInfo, it is +/// regarded as hot if the count is above the cutoff value. +/// +/// When ProfileAccurateForSymsInList is enabled and profile symbol list +/// is present, functions in the profile symbol list but without profile will +/// be regarded as cold and much less inlining will happen in CGSCC inlining +/// pass, so we tend to lower the hot criteria here to allow more early +/// inlining to happen for warm callsites and it is helpful for performance. +bool callsiteIsHot(const FunctionSamples *CallsiteFS, ProfileSummaryInfo *PSI, + bool ProfAccForSymsInList) { + if (!CallsiteFS) + return false; // The callsite was not inlined in the original binary. + + assert(PSI && "PSI is expected to be non null"); + uint64_t CallsiteTotalSamples = CallsiteFS->getTotalSamples(); + if (ProfAccForSymsInList) + return !PSI->isColdCount(CallsiteTotalSamples); + else + return PSI->isHotCount(CallsiteTotalSamples); +} + +/// Mark as used the sample record for the given function samples at +/// (LineOffset, Discriminator). +/// +/// \returns true if this is the first time we mark the given record. +bool SampleCoverageTracker::markSamplesUsed(const FunctionSamples *FS, + uint32_t LineOffset, + uint32_t Discriminator, + uint64_t Samples) { + LineLocation Loc(LineOffset, Discriminator); + unsigned &Count = SampleCoverage[FS][Loc]; + bool FirstTime = (++Count == 1); + if (FirstTime) + TotalUsedSamples += Samples; + return FirstTime; +} + +/// Return the number of sample records that were applied from this profile. +/// +/// This count does not include records from cold inlined callsites. +unsigned +SampleCoverageTracker::countUsedRecords(const FunctionSamples *FS, + ProfileSummaryInfo *PSI) const { + auto I = SampleCoverage.find(FS); + + // The size of the coverage map for FS represents the number of records + // that were marked used at least once. + unsigned Count = (I != SampleCoverage.end()) ? I->second.size() : 0; + + // If there are inlined callsites in this function, count the samples found + // in the respective bodies. However, do not bother counting callees with 0 + // total samples, these are callees that were never invoked at runtime. + for (const auto &I : FS->getCallsiteSamples()) + for (const auto &J : I.second) { + const FunctionSamples *CalleeSamples = &J.second; + if (callsiteIsHot(CalleeSamples, PSI, ProfAccForSymsInList)) + Count += countUsedRecords(CalleeSamples, PSI); + } + + return Count; +} + +/// Return the number of sample records in the body of this profile. +/// +/// This count does not include records from cold inlined callsites. +unsigned +SampleCoverageTracker::countBodyRecords(const FunctionSamples *FS, + ProfileSummaryInfo *PSI) const { + unsigned Count = FS->getBodySamples().size(); + + // Only count records in hot callsites. + for (const auto &I : FS->getCallsiteSamples()) + for (const auto &J : I.second) { + const FunctionSamples *CalleeSamples = &J.second; + if (callsiteIsHot(CalleeSamples, PSI, ProfAccForSymsInList)) + Count += countBodyRecords(CalleeSamples, PSI); + } + + return Count; +} + +/// Return the number of samples collected in the body of this profile. +/// +/// This count does not include samples from cold inlined callsites. +uint64_t +SampleCoverageTracker::countBodySamples(const FunctionSamples *FS, + ProfileSummaryInfo *PSI) const { + uint64_t Total = 0; + for (const auto &I : FS->getBodySamples()) + Total += I.second.getSamples(); + + // Only count samples in hot callsites. + for (const auto &I : FS->getCallsiteSamples()) + for (const auto &J : I.second) { + const FunctionSamples *CalleeSamples = &J.second; + if (callsiteIsHot(CalleeSamples, PSI, ProfAccForSymsInList)) + Total += countBodySamples(CalleeSamples, PSI); + } + + return Total; +} + +/// Return the fraction of sample records used in this profile. +/// +/// The returned value is an unsigned integer in the range 0-100 indicating +/// the percentage of sample records that were used while applying this +/// profile to the associated function. +unsigned SampleCoverageTracker::computeCoverage(unsigned Used, + unsigned Total) const { + assert(Used <= Total && + "number of used records cannot exceed the total number of records"); + return Total > 0 ? Used * 100 / Total : 100; +} + +/// Create a global variable to flag FSDiscriminators are used. +void createFSDiscriminatorVariable(Module *M) { + const char *FSDiscriminatorVar = "__llvm_fs_discriminator__"; + if (M->getGlobalVariable(FSDiscriminatorVar)) + return; + + auto &Context = M->getContext(); + // Place this variable to llvm.used so it won't be GC'ed. + appendToUsed(*M, {new GlobalVariable(*M, Type::getInt1Ty(Context), true, + GlobalValue::WeakODRLinkage, + ConstantInt::getTrue(Context), + FSDiscriminatorVar)}); +} + +} // end of namespace sampleprofutil +} // end of namespace llvm diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/SanitizerStats.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/SanitizerStats.cpp new file mode 100644 index 0000000000..fd21ee4cc4 --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/SanitizerStats.cpp @@ -0,0 +1,106 @@ +//===- SanitizerStats.cpp - Sanitizer statistics gathering ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Implements code generation for sanitizer statistics gathering. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/SanitizerStats.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Module.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" + +using namespace llvm; + +SanitizerStatReport::SanitizerStatReport(Module *M) : M(M) { + StatTy = ArrayType::get(Type::getInt8PtrTy(M->getContext()), 2); + EmptyModuleStatsTy = makeModuleStatsTy(); + + ModuleStatsGV = new GlobalVariable(*M, EmptyModuleStatsTy, false, + GlobalValue::InternalLinkage, nullptr); +} + +ArrayType *SanitizerStatReport::makeModuleStatsArrayTy() { + return ArrayType::get(StatTy, Inits.size()); +} + +StructType *SanitizerStatReport::makeModuleStatsTy() { + return StructType::get(M->getContext(), {Type::getInt8PtrTy(M->getContext()), + Type::getInt32Ty(M->getContext()), + makeModuleStatsArrayTy()}); +} + +void SanitizerStatReport::create(IRBuilder<> &B, SanitizerStatKind SK) { + Function *F = B.GetInsertBlock()->getParent(); + Module *M = F->getParent(); + PointerType *Int8PtrTy = B.getInt8PtrTy(); + IntegerType *IntPtrTy = B.getIntPtrTy(M->getDataLayout()); + ArrayType *StatTy = ArrayType::get(Int8PtrTy, 2); + + Inits.push_back(ConstantArray::get( + StatTy, + {Constant::getNullValue(Int8PtrTy), + ConstantExpr::getIntToPtr( + ConstantInt::get(IntPtrTy, uint64_t(SK) << (IntPtrTy->getBitWidth() - + kSanitizerStatKindBits)), + Int8PtrTy)})); + + FunctionType *StatReportTy = + FunctionType::get(B.getVoidTy(), Int8PtrTy, false); + FunctionCallee StatReport = + M->getOrInsertFunction("__sanitizer_stat_report", StatReportTy); + + auto InitAddr = ConstantExpr::getGetElementPtr( + EmptyModuleStatsTy, ModuleStatsGV, + ArrayRef<Constant *>{ + ConstantInt::get(IntPtrTy, 0), ConstantInt::get(B.getInt32Ty(), 2), + ConstantInt::get(IntPtrTy, Inits.size() - 1), + }); + B.CreateCall(StatReport, ConstantExpr::getBitCast(InitAddr, Int8PtrTy)); +} + +void SanitizerStatReport::finish() { + if (Inits.empty()) { + ModuleStatsGV->eraseFromParent(); + return; + } + + PointerType *Int8PtrTy = Type::getInt8PtrTy(M->getContext()); + IntegerType *Int32Ty = Type::getInt32Ty(M->getContext()); + Type *VoidTy = Type::getVoidTy(M->getContext()); + + // Create a new ModuleStatsGV to replace the old one. We can't just set the + // old one's initializer because its type is different. + auto NewModuleStatsGV = new GlobalVariable( + *M, makeModuleStatsTy(), false, GlobalValue::InternalLinkage, + ConstantStruct::getAnon( + {Constant::getNullValue(Int8PtrTy), + ConstantInt::get(Int32Ty, Inits.size()), + ConstantArray::get(makeModuleStatsArrayTy(), Inits)})); + ModuleStatsGV->replaceAllUsesWith( + ConstantExpr::getBitCast(NewModuleStatsGV, ModuleStatsGV->getType())); + ModuleStatsGV->eraseFromParent(); + + // Create a global constructor to register NewModuleStatsGV. + auto F = Function::Create(FunctionType::get(VoidTy, false), + GlobalValue::InternalLinkage, "", M); + auto BB = BasicBlock::Create(M->getContext(), "", F); + IRBuilder<> B(BB); + + FunctionType *StatInitTy = FunctionType::get(VoidTy, Int8PtrTy, false); + FunctionCallee StatInit = + M->getOrInsertFunction("__sanitizer_stat_init", StatInitTy); + + B.CreateCall(StatInit, ConstantExpr::getBitCast(NewModuleStatsGV, Int8PtrTy)); + B.CreateRetVoid(); + + appendToGlobalCtors(*M, F, 0); +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/ScalarEvolutionExpander.cpp new file mode 100644 index 0000000000..24f1966edd --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/ScalarEvolutionExpander.cpp @@ -0,0 +1,2678 @@ +//===- ScalarEvolutionExpander.cpp - Scalar Evolution Analysis ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the implementation of the scalar evolution expander, +// which is used to generate the code corresponding to a given scalar evolution +// expression. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/ScopeExit.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/LoopUtils.h" + +#ifdef LLVM_ENABLE_ABI_BREAKING_CHECKS +#define SCEV_DEBUG_WITH_TYPE(TYPE, X) DEBUG_WITH_TYPE(TYPE, X) +#else +#define SCEV_DEBUG_WITH_TYPE(TYPE, X) +#endif + +using namespace llvm; + +cl::opt<unsigned> llvm::SCEVCheapExpansionBudget( + "scev-cheap-expansion-budget", cl::Hidden, cl::init(4), + cl::desc("When performing SCEV expansion only if it is cheap to do, this " + "controls the budget that is considered cheap (default = 4)")); + +using namespace PatternMatch; + +/// ReuseOrCreateCast - Arrange for there to be a cast of V to Ty at IP, +/// reusing an existing cast if a suitable one (= dominating IP) exists, or +/// creating a new one. +Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty, + Instruction::CastOps Op, + BasicBlock::iterator IP) { + // This function must be called with the builder having a valid insertion + // point. It doesn't need to be the actual IP where the uses of the returned + // cast will be added, but it must dominate such IP. + // We use this precondition to produce a cast that will dominate all its + // uses. In particular, this is crucial for the case where the builder's + // insertion point *is* the point where we were asked to put the cast. + // Since we don't know the builder's insertion point is actually + // where the uses will be added (only that it dominates it), we are + // not allowed to move it. + BasicBlock::iterator BIP = Builder.GetInsertPoint(); + + Value *Ret = nullptr; + + // Check to see if there is already a cast! + for (User *U : V->users()) { + if (U->getType() != Ty) + continue; + CastInst *CI = dyn_cast<CastInst>(U); + if (!CI || CI->getOpcode() != Op) + continue; + + // Found a suitable cast that is at IP or comes before IP. Use it. Note that + // the cast must also properly dominate the Builder's insertion point. + if (IP->getParent() == CI->getParent() && &*BIP != CI && + (&*IP == CI || CI->comesBefore(&*IP))) { + Ret = CI; + break; + } + } + + // Create a new cast. + if (!Ret) { + SCEVInsertPointGuard Guard(Builder, this); + Builder.SetInsertPoint(&*IP); + Ret = Builder.CreateCast(Op, V, Ty, V->getName()); + } + + // We assert at the end of the function since IP might point to an + // instruction with different dominance properties than a cast + // (an invoke for example) and not dominate BIP (but the cast does). + assert(!isa<Instruction>(Ret) || + SE.DT.dominates(cast<Instruction>(Ret), &*BIP)); + + return Ret; +} + +BasicBlock::iterator +SCEVExpander::findInsertPointAfter(Instruction *I, + Instruction *MustDominate) const { + BasicBlock::iterator IP = ++I->getIterator(); + if (auto *II = dyn_cast<InvokeInst>(I)) + IP = II->getNormalDest()->begin(); + + while (isa<PHINode>(IP)) + ++IP; + + if (isa<FuncletPadInst>(IP) || isa<LandingPadInst>(IP)) { + ++IP; + } else if (isa<CatchSwitchInst>(IP)) { + IP = MustDominate->getParent()->getFirstInsertionPt(); + } else { + assert(!IP->isEHPad() && "unexpected eh pad!"); + } + + // Adjust insert point to be after instructions inserted by the expander, so + // we can re-use already inserted instructions. Avoid skipping past the + // original \p MustDominate, in case it is an inserted instruction. + while (isInsertedInstruction(&*IP) && &*IP != MustDominate) + ++IP; + + return IP; +} + +BasicBlock::iterator +SCEVExpander::GetOptimalInsertionPointForCastOf(Value *V) const { + // Cast the argument at the beginning of the entry block, after + // any bitcasts of other arguments. + if (Argument *A = dyn_cast<Argument>(V)) { + BasicBlock::iterator IP = A->getParent()->getEntryBlock().begin(); + while ((isa<BitCastInst>(IP) && + isa<Argument>(cast<BitCastInst>(IP)->getOperand(0)) && + cast<BitCastInst>(IP)->getOperand(0) != A) || + isa<DbgInfoIntrinsic>(IP)) + ++IP; + return IP; + } + + // Cast the instruction immediately after the instruction. + if (Instruction *I = dyn_cast<Instruction>(V)) + return findInsertPointAfter(I, &*Builder.GetInsertPoint()); + + // Otherwise, this must be some kind of a constant, + // so let's plop this cast into the function's entry block. + assert(isa<Constant>(V) && + "Expected the cast argument to be a global/constant"); + return Builder.GetInsertBlock() + ->getParent() + ->getEntryBlock() + .getFirstInsertionPt(); +} + +/// InsertNoopCastOfTo - Insert a cast of V to the specified type, +/// which must be possible with a noop cast, doing what we can to share +/// the casts. +Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) { + Instruction::CastOps Op = CastInst::getCastOpcode(V, false, Ty, false); + assert((Op == Instruction::BitCast || + Op == Instruction::PtrToInt || + Op == Instruction::IntToPtr) && + "InsertNoopCastOfTo cannot perform non-noop casts!"); + assert(SE.getTypeSizeInBits(V->getType()) == SE.getTypeSizeInBits(Ty) && + "InsertNoopCastOfTo cannot change sizes!"); + + // inttoptr only works for integral pointers. For non-integral pointers, we + // can create a GEP on i8* null with the integral value as index. Note that + // it is safe to use GEP of null instead of inttoptr here, because only + // expressions already based on a GEP of null should be converted to pointers + // during expansion. + if (Op == Instruction::IntToPtr) { + auto *PtrTy = cast<PointerType>(Ty); + if (DL.isNonIntegralPointerType(PtrTy)) { + auto *Int8PtrTy = Builder.getInt8PtrTy(PtrTy->getAddressSpace()); + assert(DL.getTypeAllocSize(Builder.getInt8Ty()) == 1 && + "alloc size of i8 must by 1 byte for the GEP to be correct"); + auto *GEP = Builder.CreateGEP( + Builder.getInt8Ty(), Constant::getNullValue(Int8PtrTy), V, "uglygep"); + return Builder.CreateBitCast(GEP, Ty); + } + } + // Short-circuit unnecessary bitcasts. + if (Op == Instruction::BitCast) { + if (V->getType() == Ty) + return V; + if (CastInst *CI = dyn_cast<CastInst>(V)) { + if (CI->getOperand(0)->getType() == Ty) + return CI->getOperand(0); + } + } + // Short-circuit unnecessary inttoptr<->ptrtoint casts. + if ((Op == Instruction::PtrToInt || Op == Instruction::IntToPtr) && + SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(V->getType())) { + if (CastInst *CI = dyn_cast<CastInst>(V)) + if ((CI->getOpcode() == Instruction::PtrToInt || + CI->getOpcode() == Instruction::IntToPtr) && + SE.getTypeSizeInBits(CI->getType()) == + SE.getTypeSizeInBits(CI->getOperand(0)->getType())) + return CI->getOperand(0); + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) + if ((CE->getOpcode() == Instruction::PtrToInt || + CE->getOpcode() == Instruction::IntToPtr) && + SE.getTypeSizeInBits(CE->getType()) == + SE.getTypeSizeInBits(CE->getOperand(0)->getType())) + return CE->getOperand(0); + } + + // Fold a cast of a constant. + if (Constant *C = dyn_cast<Constant>(V)) + return ConstantExpr::getCast(Op, C, Ty); + + // Try to reuse existing cast, or insert one. + return ReuseOrCreateCast(V, Ty, Op, GetOptimalInsertionPointForCastOf(V)); +} + +/// InsertBinop - Insert the specified binary operator, doing a small amount +/// of work to avoid inserting an obviously redundant operation, and hoisting +/// to an outer loop when the opportunity is there and it is safe. +Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode, + Value *LHS, Value *RHS, + SCEV::NoWrapFlags Flags, bool IsSafeToHoist) { + // Fold a binop with constant operands. + if (Constant *CLHS = dyn_cast<Constant>(LHS)) + if (Constant *CRHS = dyn_cast<Constant>(RHS)) + if (Constant *Res = ConstantFoldBinaryOpOperands(Opcode, CLHS, CRHS, DL)) + return Res; + + // Do a quick scan to see if we have this binop nearby. If so, reuse it. + unsigned ScanLimit = 6; + BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin(); + // Scanning starts from the last instruction before the insertion point. + BasicBlock::iterator IP = Builder.GetInsertPoint(); + if (IP != BlockBegin) { + --IP; + for (; ScanLimit; --IP, --ScanLimit) { + // Don't count dbg.value against the ScanLimit, to avoid perturbing the + // generated code. + if (isa<DbgInfoIntrinsic>(IP)) + ScanLimit++; + + auto canGenerateIncompatiblePoison = [&Flags](Instruction *I) { + // Ensure that no-wrap flags match. + if (isa<OverflowingBinaryOperator>(I)) { + if (I->hasNoSignedWrap() != (Flags & SCEV::FlagNSW)) + return true; + if (I->hasNoUnsignedWrap() != (Flags & SCEV::FlagNUW)) + return true; + } + // Conservatively, do not use any instruction which has any of exact + // flags installed. + if (isa<PossiblyExactOperator>(I) && I->isExact()) + return true; + return false; + }; + if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(0) == LHS && + IP->getOperand(1) == RHS && !canGenerateIncompatiblePoison(&*IP)) + return &*IP; + if (IP == BlockBegin) break; + } + } + + // Save the original insertion point so we can restore it when we're done. + DebugLoc Loc = Builder.GetInsertPoint()->getDebugLoc(); + SCEVInsertPointGuard Guard(Builder, this); + + if (IsSafeToHoist) { + // Move the insertion point out of as many loops as we can. + while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) { + if (!L->isLoopInvariant(LHS) || !L->isLoopInvariant(RHS)) break; + BasicBlock *Preheader = L->getLoopPreheader(); + if (!Preheader) break; + + // Ok, move up a level. + Builder.SetInsertPoint(Preheader->getTerminator()); + } + } + + // If we haven't found this binop, insert it. + // TODO: Use the Builder, which will make CreateBinOp below fold with + // InstSimplifyFolder. + Instruction *BO = Builder.Insert(BinaryOperator::Create(Opcode, LHS, RHS)); + BO->setDebugLoc(Loc); + if (Flags & SCEV::FlagNUW) + BO->setHasNoUnsignedWrap(); + if (Flags & SCEV::FlagNSW) + BO->setHasNoSignedWrap(); + + return BO; +} + +/// FactorOutConstant - Test if S is divisible by Factor, using signed +/// division. If so, update S with Factor divided out and return true. +/// S need not be evenly divisible if a reasonable remainder can be +/// computed. +static bool FactorOutConstant(const SCEV *&S, const SCEV *&Remainder, + const SCEV *Factor, ScalarEvolution &SE, + const DataLayout &DL) { + // Everything is divisible by one. + if (Factor->isOne()) + return true; + + // x/x == 1. + if (S == Factor) { + S = SE.getConstant(S->getType(), 1); + return true; + } + + // For a Constant, check for a multiple of the given factor. + if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) { + // 0/x == 0. + if (C->isZero()) + return true; + // Check for divisibility. + if (const SCEVConstant *FC = dyn_cast<SCEVConstant>(Factor)) { + ConstantInt *CI = + ConstantInt::get(SE.getContext(), C->getAPInt().sdiv(FC->getAPInt())); + // If the quotient is zero and the remainder is non-zero, reject + // the value at this scale. It will be considered for subsequent + // smaller scales. + if (!CI->isZero()) { + const SCEV *Div = SE.getConstant(CI); + S = Div; + Remainder = SE.getAddExpr( + Remainder, SE.getConstant(C->getAPInt().srem(FC->getAPInt()))); + return true; + } + } + } + + // In a Mul, check if there is a constant operand which is a multiple + // of the given factor. + if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) { + // Size is known, check if there is a constant operand which is a multiple + // of the given factor. If so, we can factor it. + if (const SCEVConstant *FC = dyn_cast<SCEVConstant>(Factor)) + if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0))) + if (!C->getAPInt().srem(FC->getAPInt())) { + SmallVector<const SCEV *, 4> NewMulOps(M->operands()); + NewMulOps[0] = SE.getConstant(C->getAPInt().sdiv(FC->getAPInt())); + S = SE.getMulExpr(NewMulOps); + return true; + } + } + + // In an AddRec, check if both start and step are divisible. + if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) { + const SCEV *Step = A->getStepRecurrence(SE); + const SCEV *StepRem = SE.getConstant(Step->getType(), 0); + if (!FactorOutConstant(Step, StepRem, Factor, SE, DL)) + return false; + if (!StepRem->isZero()) + return false; + const SCEV *Start = A->getStart(); + if (!FactorOutConstant(Start, Remainder, Factor, SE, DL)) + return false; + S = SE.getAddRecExpr(Start, Step, A->getLoop(), + A->getNoWrapFlags(SCEV::FlagNW)); + return true; + } + + return false; +} + +/// SimplifyAddOperands - Sort and simplify a list of add operands. NumAddRecs +/// is the number of SCEVAddRecExprs present, which are kept at the end of +/// the list. +/// +static void SimplifyAddOperands(SmallVectorImpl<const SCEV *> &Ops, + Type *Ty, + ScalarEvolution &SE) { + unsigned NumAddRecs = 0; + for (unsigned i = Ops.size(); i > 0 && isa<SCEVAddRecExpr>(Ops[i-1]); --i) + ++NumAddRecs; + // Group Ops into non-addrecs and addrecs. + SmallVector<const SCEV *, 8> NoAddRecs(Ops.begin(), Ops.end() - NumAddRecs); + SmallVector<const SCEV *, 8> AddRecs(Ops.end() - NumAddRecs, Ops.end()); + // Let ScalarEvolution sort and simplify the non-addrecs list. + const SCEV *Sum = NoAddRecs.empty() ? + SE.getConstant(Ty, 0) : + SE.getAddExpr(NoAddRecs); + // If it returned an add, use the operands. Otherwise it simplified + // the sum into a single value, so just use that. + Ops.clear(); + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Sum)) + append_range(Ops, Add->operands()); + else if (!Sum->isZero()) + Ops.push_back(Sum); + // Then append the addrecs. + Ops.append(AddRecs.begin(), AddRecs.end()); +} + +/// SplitAddRecs - Flatten a list of add operands, moving addrec start values +/// out to the top level. For example, convert {a + b,+,c} to a, b, {0,+,d}. +/// This helps expose more opportunities for folding parts of the expressions +/// into GEP indices. +/// +static void SplitAddRecs(SmallVectorImpl<const SCEV *> &Ops, + Type *Ty, + ScalarEvolution &SE) { + // Find the addrecs. + SmallVector<const SCEV *, 8> AddRecs; + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + while (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Ops[i])) { + const SCEV *Start = A->getStart(); + if (Start->isZero()) break; + const SCEV *Zero = SE.getConstant(Ty, 0); + AddRecs.push_back(SE.getAddRecExpr(Zero, + A->getStepRecurrence(SE), + A->getLoop(), + A->getNoWrapFlags(SCEV::FlagNW))); + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Start)) { + Ops[i] = Zero; + append_range(Ops, Add->operands()); + e += Add->getNumOperands(); + } else { + Ops[i] = Start; + } + } + if (!AddRecs.empty()) { + // Add the addrecs onto the end of the list. + Ops.append(AddRecs.begin(), AddRecs.end()); + // Resort the operand list, moving any constants to the front. + SimplifyAddOperands(Ops, Ty, SE); + } +} + +/// expandAddToGEP - Expand an addition expression with a pointer type into +/// a GEP instead of using ptrtoint+arithmetic+inttoptr. This helps +/// BasicAliasAnalysis and other passes analyze the result. See the rules +/// for getelementptr vs. inttoptr in +/// http://llvm.org/docs/LangRef.html#pointeraliasing +/// for details. +/// +/// Design note: The correctness of using getelementptr here depends on +/// ScalarEvolution not recognizing inttoptr and ptrtoint operators, as +/// they may introduce pointer arithmetic which may not be safely converted +/// into getelementptr. +/// +/// Design note: It might seem desirable for this function to be more +/// loop-aware. If some of the indices are loop-invariant while others +/// aren't, it might seem desirable to emit multiple GEPs, keeping the +/// loop-invariant portions of the overall computation outside the loop. +/// However, there are a few reasons this is not done here. Hoisting simple +/// arithmetic is a low-level optimization that often isn't very +/// important until late in the optimization process. In fact, passes +/// like InstructionCombining will combine GEPs, even if it means +/// pushing loop-invariant computation down into loops, so even if the +/// GEPs were split here, the work would quickly be undone. The +/// LoopStrengthReduction pass, which is usually run quite late (and +/// after the last InstructionCombining pass), takes care of hoisting +/// loop-invariant portions of expressions, after considering what +/// can be folded using target addressing modes. +/// +Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, + const SCEV *const *op_end, + PointerType *PTy, + Type *Ty, + Value *V) { + SmallVector<Value *, 4> GepIndices; + SmallVector<const SCEV *, 8> Ops(op_begin, op_end); + bool AnyNonZeroIndices = false; + + // Split AddRecs up into parts as either of the parts may be usable + // without the other. + SplitAddRecs(Ops, Ty, SE); + + Type *IntIdxTy = DL.getIndexType(PTy); + + // For opaque pointers, always generate i8 GEP. + if (!PTy->isOpaque()) { + // Descend down the pointer's type and attempt to convert the other + // operands into GEP indices, at each level. The first index in a GEP + // indexes into the array implied by the pointer operand; the rest of + // the indices index into the element or field type selected by the + // preceding index. + Type *ElTy = PTy->getNonOpaquePointerElementType(); + for (;;) { + // If the scale size is not 0, attempt to factor out a scale for + // array indexing. + SmallVector<const SCEV *, 8> ScaledOps; + if (ElTy->isSized()) { + const SCEV *ElSize = SE.getSizeOfExpr(IntIdxTy, ElTy); + if (!ElSize->isZero()) { + SmallVector<const SCEV *, 8> NewOps; + for (const SCEV *Op : Ops) { + const SCEV *Remainder = SE.getConstant(Ty, 0); + if (FactorOutConstant(Op, Remainder, ElSize, SE, DL)) { + // Op now has ElSize factored out. + ScaledOps.push_back(Op); + if (!Remainder->isZero()) + NewOps.push_back(Remainder); + AnyNonZeroIndices = true; + } else { + // The operand was not divisible, so add it to the list of + // operands we'll scan next iteration. + NewOps.push_back(Op); + } + } + // If we made any changes, update Ops. + if (!ScaledOps.empty()) { + Ops = NewOps; + SimplifyAddOperands(Ops, Ty, SE); + } + } + } + + // Record the scaled array index for this level of the type. If + // we didn't find any operands that could be factored, tentatively + // assume that element zero was selected (since the zero offset + // would obviously be folded away). + Value *Scaled = + ScaledOps.empty() + ? Constant::getNullValue(Ty) + : expandCodeForImpl(SE.getAddExpr(ScaledOps), Ty); + GepIndices.push_back(Scaled); + + // Collect struct field index operands. + while (StructType *STy = dyn_cast<StructType>(ElTy)) { + bool FoundFieldNo = false; + // An empty struct has no fields. + if (STy->getNumElements() == 0) break; + // Field offsets are known. See if a constant offset falls within any of + // the struct fields. + if (Ops.empty()) + break; + if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[0])) + if (SE.getTypeSizeInBits(C->getType()) <= 64) { + const StructLayout &SL = *DL.getStructLayout(STy); + uint64_t FullOffset = C->getValue()->getZExtValue(); + if (FullOffset < SL.getSizeInBytes()) { + unsigned ElIdx = SL.getElementContainingOffset(FullOffset); + GepIndices.push_back( + ConstantInt::get(Type::getInt32Ty(Ty->getContext()), ElIdx)); + ElTy = STy->getTypeAtIndex(ElIdx); + Ops[0] = + SE.getConstant(Ty, FullOffset - SL.getElementOffset(ElIdx)); + AnyNonZeroIndices = true; + FoundFieldNo = true; + } + } + // If no struct field offsets were found, tentatively assume that + // field zero was selected (since the zero offset would obviously + // be folded away). + if (!FoundFieldNo) { + ElTy = STy->getTypeAtIndex(0u); + GepIndices.push_back( + Constant::getNullValue(Type::getInt32Ty(Ty->getContext()))); + } + } + + if (ArrayType *ATy = dyn_cast<ArrayType>(ElTy)) + ElTy = ATy->getElementType(); + else + // FIXME: Handle VectorType. + // E.g., If ElTy is scalable vector, then ElSize is not a compile-time + // constant, therefore can not be factored out. The generated IR is less + // ideal with base 'V' cast to i8* and do ugly getelementptr over that. + break; + } + } + + // If none of the operands were convertible to proper GEP indices, cast + // the base to i8* and do an ugly getelementptr with that. It's still + // better than ptrtoint+arithmetic+inttoptr at least. + if (!AnyNonZeroIndices) { + // Cast the base to i8*. + if (!PTy->isOpaque()) + V = InsertNoopCastOfTo(V, + Type::getInt8PtrTy(Ty->getContext(), PTy->getAddressSpace())); + + assert(!isa<Instruction>(V) || + SE.DT.dominates(cast<Instruction>(V), &*Builder.GetInsertPoint())); + + // Expand the operands for a plain byte offset. + Value *Idx = expandCodeForImpl(SE.getAddExpr(Ops), Ty); + + // Fold a GEP with constant operands. + if (Constant *CLHS = dyn_cast<Constant>(V)) + if (Constant *CRHS = dyn_cast<Constant>(Idx)) + return Builder.CreateGEP(Builder.getInt8Ty(), CLHS, CRHS); + + // Do a quick scan to see if we have this GEP nearby. If so, reuse it. + unsigned ScanLimit = 6; + BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin(); + // Scanning starts from the last instruction before the insertion point. + BasicBlock::iterator IP = Builder.GetInsertPoint(); + if (IP != BlockBegin) { + --IP; + for (; ScanLimit; --IP, --ScanLimit) { + // Don't count dbg.value against the ScanLimit, to avoid perturbing the + // generated code. + if (isa<DbgInfoIntrinsic>(IP)) + ScanLimit++; + if (IP->getOpcode() == Instruction::GetElementPtr && + IP->getOperand(0) == V && IP->getOperand(1) == Idx && + cast<GEPOperator>(&*IP)->getSourceElementType() == + Type::getInt8Ty(Ty->getContext())) + return &*IP; + if (IP == BlockBegin) break; + } + } + + // Save the original insertion point so we can restore it when we're done. + SCEVInsertPointGuard Guard(Builder, this); + + // Move the insertion point out of as many loops as we can. + while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) { + if (!L->isLoopInvariant(V) || !L->isLoopInvariant(Idx)) break; + BasicBlock *Preheader = L->getLoopPreheader(); + if (!Preheader) break; + + // Ok, move up a level. + Builder.SetInsertPoint(Preheader->getTerminator()); + } + + // Emit a GEP. + return Builder.CreateGEP(Builder.getInt8Ty(), V, Idx, "uglygep"); + } + + { + SCEVInsertPointGuard Guard(Builder, this); + + // Move the insertion point out of as many loops as we can. + while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) { + if (!L->isLoopInvariant(V)) break; + + bool AnyIndexNotLoopInvariant = any_of( + GepIndices, [L](Value *Op) { return !L->isLoopInvariant(Op); }); + + if (AnyIndexNotLoopInvariant) + break; + + BasicBlock *Preheader = L->getLoopPreheader(); + if (!Preheader) break; + + // Ok, move up a level. + Builder.SetInsertPoint(Preheader->getTerminator()); + } + + // Insert a pretty getelementptr. Note that this GEP is not marked inbounds, + // because ScalarEvolution may have changed the address arithmetic to + // compute a value which is beyond the end of the allocated object. + Value *Casted = V; + if (V->getType() != PTy) + Casted = InsertNoopCastOfTo(Casted, PTy); + Value *GEP = Builder.CreateGEP(PTy->getNonOpaquePointerElementType(), + Casted, GepIndices, "scevgep"); + Ops.push_back(SE.getUnknown(GEP)); + } + + return expand(SE.getAddExpr(Ops)); +} + +Value *SCEVExpander::expandAddToGEP(const SCEV *Op, PointerType *PTy, Type *Ty, + Value *V) { + const SCEV *const Ops[1] = {Op}; + return expandAddToGEP(Ops, Ops + 1, PTy, Ty, V); +} + +/// PickMostRelevantLoop - Given two loops pick the one that's most relevant for +/// SCEV expansion. If they are nested, this is the most nested. If they are +/// neighboring, pick the later. +static const Loop *PickMostRelevantLoop(const Loop *A, const Loop *B, + DominatorTree &DT) { + if (!A) return B; + if (!B) return A; + if (A->contains(B)) return B; + if (B->contains(A)) return A; + if (DT.dominates(A->getHeader(), B->getHeader())) return B; + if (DT.dominates(B->getHeader(), A->getHeader())) return A; + return A; // Arbitrarily break the tie. +} + +/// getRelevantLoop - Get the most relevant loop associated with the given +/// expression, according to PickMostRelevantLoop. +const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) { + // Test whether we've already computed the most relevant loop for this SCEV. + auto Pair = RelevantLoops.insert(std::make_pair(S, nullptr)); + if (!Pair.second) + return Pair.first->second; + + switch (S->getSCEVType()) { + case scConstant: + return nullptr; // A constant has no relevant loops. + case scTruncate: + case scZeroExtend: + case scSignExtend: + case scPtrToInt: + case scAddExpr: + case scMulExpr: + case scUDivExpr: + case scAddRecExpr: + case scUMaxExpr: + case scSMaxExpr: + case scUMinExpr: + case scSMinExpr: + case scSequentialUMinExpr: { + const Loop *L = nullptr; + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) + L = AR->getLoop(); + for (const SCEV *Op : S->operands()) + L = PickMostRelevantLoop(L, getRelevantLoop(Op), SE.DT); + return RelevantLoops[S] = L; + } + case scUnknown: { + const SCEVUnknown *U = cast<SCEVUnknown>(S); + if (const Instruction *I = dyn_cast<Instruction>(U->getValue())) + return Pair.first->second = SE.LI.getLoopFor(I->getParent()); + // A non-instruction has no relevant loops. + return nullptr; + } + case scCouldNotCompute: + llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); + } + llvm_unreachable("Unexpected SCEV type!"); +} + +namespace { + +/// LoopCompare - Compare loops by PickMostRelevantLoop. +class LoopCompare { + DominatorTree &DT; +public: + explicit LoopCompare(DominatorTree &dt) : DT(dt) {} + + bool operator()(std::pair<const Loop *, const SCEV *> LHS, + std::pair<const Loop *, const SCEV *> RHS) const { + // Keep pointer operands sorted at the end. + if (LHS.second->getType()->isPointerTy() != + RHS.second->getType()->isPointerTy()) + return LHS.second->getType()->isPointerTy(); + + // Compare loops with PickMostRelevantLoop. + if (LHS.first != RHS.first) + return PickMostRelevantLoop(LHS.first, RHS.first, DT) != LHS.first; + + // If one operand is a non-constant negative and the other is not, + // put the non-constant negative on the right so that a sub can + // be used instead of a negate and add. + if (LHS.second->isNonConstantNegative()) { + if (!RHS.second->isNonConstantNegative()) + return false; + } else if (RHS.second->isNonConstantNegative()) + return true; + + // Otherwise they are equivalent according to this comparison. + return false; + } +}; + +} + +Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) { + Type *Ty = SE.getEffectiveSCEVType(S->getType()); + + // Collect all the add operands in a loop, along with their associated loops. + // Iterate in reverse so that constants are emitted last, all else equal, and + // so that pointer operands are inserted first, which the code below relies on + // to form more involved GEPs. + SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops; + for (const SCEV *Op : reverse(S->operands())) + OpsAndLoops.push_back(std::make_pair(getRelevantLoop(Op), Op)); + + // Sort by loop. Use a stable sort so that constants follow non-constants and + // pointer operands precede non-pointer operands. + llvm::stable_sort(OpsAndLoops, LoopCompare(SE.DT)); + + // Emit instructions to add all the operands. Hoist as much as possible + // out of loops, and form meaningful getelementptrs where possible. + Value *Sum = nullptr; + for (auto I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E;) { + const Loop *CurLoop = I->first; + const SCEV *Op = I->second; + if (!Sum) { + // This is the first operand. Just expand it. + Sum = expand(Op); + ++I; + continue; + } + + assert(!Op->getType()->isPointerTy() && "Only first op can be pointer"); + if (PointerType *PTy = dyn_cast<PointerType>(Sum->getType())) { + // The running sum expression is a pointer. Try to form a getelementptr + // at this level with that as the base. + SmallVector<const SCEV *, 4> NewOps; + for (; I != E && I->first == CurLoop; ++I) { + // If the operand is SCEVUnknown and not instructions, peek through + // it, to enable more of it to be folded into the GEP. + const SCEV *X = I->second; + if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(X)) + if (!isa<Instruction>(U->getValue())) + X = SE.getSCEV(U->getValue()); + NewOps.push_back(X); + } + Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, Sum); + } else if (Op->isNonConstantNegative()) { + // Instead of doing a negate and add, just do a subtract. + Value *W = expandCodeForImpl(SE.getNegativeSCEV(Op), Ty); + Sum = InsertNoopCastOfTo(Sum, Ty); + Sum = InsertBinop(Instruction::Sub, Sum, W, SCEV::FlagAnyWrap, + /*IsSafeToHoist*/ true); + ++I; + } else { + // A simple add. + Value *W = expandCodeForImpl(Op, Ty); + Sum = InsertNoopCastOfTo(Sum, Ty); + // Canonicalize a constant to the RHS. + if (isa<Constant>(Sum)) std::swap(Sum, W); + Sum = InsertBinop(Instruction::Add, Sum, W, S->getNoWrapFlags(), + /*IsSafeToHoist*/ true); + ++I; + } + } + + return Sum; +} + +Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) { + Type *Ty = SE.getEffectiveSCEVType(S->getType()); + + // Collect all the mul operands in a loop, along with their associated loops. + // Iterate in reverse so that constants are emitted last, all else equal. + SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops; + for (const SCEV *Op : reverse(S->operands())) + OpsAndLoops.push_back(std::make_pair(getRelevantLoop(Op), Op)); + + // Sort by loop. Use a stable sort so that constants follow non-constants. + llvm::stable_sort(OpsAndLoops, LoopCompare(SE.DT)); + + // Emit instructions to mul all the operands. Hoist as much as possible + // out of loops. + Value *Prod = nullptr; + auto I = OpsAndLoops.begin(); + + // Expand the calculation of X pow N in the following manner: + // Let N = P1 + P2 + ... + PK, where all P are powers of 2. Then: + // X pow N = (X pow P1) * (X pow P2) * ... * (X pow PK). + const auto ExpandOpBinPowN = [this, &I, &OpsAndLoops, &Ty]() { + auto E = I; + // Calculate how many times the same operand from the same loop is included + // into this power. + uint64_t Exponent = 0; + const uint64_t MaxExponent = UINT64_MAX >> 1; + // No one sane will ever try to calculate such huge exponents, but if we + // need this, we stop on UINT64_MAX / 2 because we need to exit the loop + // below when the power of 2 exceeds our Exponent, and we want it to be + // 1u << 31 at most to not deal with unsigned overflow. + while (E != OpsAndLoops.end() && *I == *E && Exponent != MaxExponent) { + ++Exponent; + ++E; + } + assert(Exponent > 0 && "Trying to calculate a zeroth exponent of operand?"); + + // Calculate powers with exponents 1, 2, 4, 8 etc. and include those of them + // that are needed into the result. + Value *P = expandCodeForImpl(I->second, Ty); + Value *Result = nullptr; + if (Exponent & 1) + Result = P; + for (uint64_t BinExp = 2; BinExp <= Exponent; BinExp <<= 1) { + P = InsertBinop(Instruction::Mul, P, P, SCEV::FlagAnyWrap, + /*IsSafeToHoist*/ true); + if (Exponent & BinExp) + Result = Result ? InsertBinop(Instruction::Mul, Result, P, + SCEV::FlagAnyWrap, + /*IsSafeToHoist*/ true) + : P; + } + + I = E; + assert(Result && "Nothing was expanded?"); + return Result; + }; + + while (I != OpsAndLoops.end()) { + if (!Prod) { + // This is the first operand. Just expand it. + Prod = ExpandOpBinPowN(); + } else if (I->second->isAllOnesValue()) { + // Instead of doing a multiply by negative one, just do a negate. + Prod = InsertNoopCastOfTo(Prod, Ty); + Prod = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), Prod, + SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true); + ++I; + } else { + // A simple mul. + Value *W = ExpandOpBinPowN(); + Prod = InsertNoopCastOfTo(Prod, Ty); + // Canonicalize a constant to the RHS. + if (isa<Constant>(Prod)) std::swap(Prod, W); + const APInt *RHS; + if (match(W, m_Power2(RHS))) { + // Canonicalize Prod*(1<<C) to Prod<<C. + assert(!Ty->isVectorTy() && "vector types are not SCEVable"); + auto NWFlags = S->getNoWrapFlags(); + // clear nsw flag if shl will produce poison value. + if (RHS->logBase2() == RHS->getBitWidth() - 1) + NWFlags = ScalarEvolution::clearFlags(NWFlags, SCEV::FlagNSW); + Prod = InsertBinop(Instruction::Shl, Prod, + ConstantInt::get(Ty, RHS->logBase2()), NWFlags, + /*IsSafeToHoist*/ true); + } else { + Prod = InsertBinop(Instruction::Mul, Prod, W, S->getNoWrapFlags(), + /*IsSafeToHoist*/ true); + } + } + } + + return Prod; +} + +Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) { + Type *Ty = SE.getEffectiveSCEVType(S->getType()); + + Value *LHS = expandCodeForImpl(S->getLHS(), Ty); + if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getRHS())) { + const APInt &RHS = SC->getAPInt(); + if (RHS.isPowerOf2()) + return InsertBinop(Instruction::LShr, LHS, + ConstantInt::get(Ty, RHS.logBase2()), + SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true); + } + + Value *RHS = expandCodeForImpl(S->getRHS(), Ty); + return InsertBinop(Instruction::UDiv, LHS, RHS, SCEV::FlagAnyWrap, + /*IsSafeToHoist*/ SE.isKnownNonZero(S->getRHS())); +} + +/// Determine if this is a well-behaved chain of instructions leading back to +/// the PHI. If so, it may be reused by expanded expressions. +bool SCEVExpander::isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV, + const Loop *L) { + if (IncV->getNumOperands() == 0 || isa<PHINode>(IncV) || + (isa<CastInst>(IncV) && !isa<BitCastInst>(IncV))) + return false; + // If any of the operands don't dominate the insert position, bail. + // Addrec operands are always loop-invariant, so this can only happen + // if there are instructions which haven't been hoisted. + if (L == IVIncInsertLoop) { + for (Use &Op : llvm::drop_begin(IncV->operands())) + if (Instruction *OInst = dyn_cast<Instruction>(Op)) + if (!SE.DT.dominates(OInst, IVIncInsertPos)) + return false; + } + // Advance to the next instruction. + IncV = dyn_cast<Instruction>(IncV->getOperand(0)); + if (!IncV) + return false; + + if (IncV->mayHaveSideEffects()) + return false; + + if (IncV == PN) + return true; + + return isNormalAddRecExprPHI(PN, IncV, L); +} + +/// getIVIncOperand returns an induction variable increment's induction +/// variable operand. +/// +/// If allowScale is set, any type of GEP is allowed as long as the nonIV +/// operands dominate InsertPos. +/// +/// If allowScale is not set, ensure that a GEP increment conforms to one of the +/// simple patterns generated by getAddRecExprPHILiterally and +/// expandAddtoGEP. If the pattern isn't recognized, return NULL. +Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV, + Instruction *InsertPos, + bool allowScale) { + if (IncV == InsertPos) + return nullptr; + + switch (IncV->getOpcode()) { + default: + return nullptr; + // Check for a simple Add/Sub or GEP of a loop invariant step. + case Instruction::Add: + case Instruction::Sub: { + Instruction *OInst = dyn_cast<Instruction>(IncV->getOperand(1)); + if (!OInst || SE.DT.dominates(OInst, InsertPos)) + return dyn_cast<Instruction>(IncV->getOperand(0)); + return nullptr; + } + case Instruction::BitCast: + return dyn_cast<Instruction>(IncV->getOperand(0)); + case Instruction::GetElementPtr: + for (Use &U : llvm::drop_begin(IncV->operands())) { + if (isa<Constant>(U)) + continue; + if (Instruction *OInst = dyn_cast<Instruction>(U)) { + if (!SE.DT.dominates(OInst, InsertPos)) + return nullptr; + } + if (allowScale) { + // allow any kind of GEP as long as it can be hoisted. + continue; + } + // This must be a pointer addition of constants (pretty), which is already + // handled, or some number of address-size elements (ugly). Ugly geps + // have 2 operands. i1* is used by the expander to represent an + // address-size element. + if (IncV->getNumOperands() != 2) + return nullptr; + unsigned AS = cast<PointerType>(IncV->getType())->getAddressSpace(); + if (IncV->getType() != Type::getInt1PtrTy(SE.getContext(), AS) + && IncV->getType() != Type::getInt8PtrTy(SE.getContext(), AS)) + return nullptr; + break; + } + return dyn_cast<Instruction>(IncV->getOperand(0)); + } +} + +/// If the insert point of the current builder or any of the builders on the +/// stack of saved builders has 'I' as its insert point, update it to point to +/// the instruction after 'I'. This is intended to be used when the instruction +/// 'I' is being moved. If this fixup is not done and 'I' is moved to a +/// different block, the inconsistent insert point (with a mismatched +/// Instruction and Block) can lead to an instruction being inserted in a block +/// other than its parent. +void SCEVExpander::fixupInsertPoints(Instruction *I) { + BasicBlock::iterator It(*I); + BasicBlock::iterator NewInsertPt = std::next(It); + if (Builder.GetInsertPoint() == It) + Builder.SetInsertPoint(&*NewInsertPt); + for (auto *InsertPtGuard : InsertPointGuards) + if (InsertPtGuard->GetInsertPoint() == It) + InsertPtGuard->SetInsertPoint(NewInsertPt); +} + +/// hoistStep - Attempt to hoist a simple IV increment above InsertPos to make +/// it available to other uses in this loop. Recursively hoist any operands, +/// until we reach a value that dominates InsertPos. +bool SCEVExpander::hoistIVInc(Instruction *IncV, Instruction *InsertPos, + bool RecomputePoisonFlags) { + auto FixupPoisonFlags = [this](Instruction *I) { + // Drop flags that are potentially inferred from old context and infer flags + // in new context. + I->dropPoisonGeneratingFlags(); + if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(I)) + if (auto Flags = SE.getStrengthenedNoWrapFlagsFromBinOp(OBO)) { + auto *BO = cast<BinaryOperator>(I); + BO->setHasNoUnsignedWrap( + ScalarEvolution::maskFlags(*Flags, SCEV::FlagNUW) == SCEV::FlagNUW); + BO->setHasNoSignedWrap( + ScalarEvolution::maskFlags(*Flags, SCEV::FlagNSW) == SCEV::FlagNSW); + } + }; + + if (SE.DT.dominates(IncV, InsertPos)) { + if (RecomputePoisonFlags) + FixupPoisonFlags(IncV); + return true; + } + + // InsertPos must itself dominate IncV so that IncV's new position satisfies + // its existing users. + if (isa<PHINode>(InsertPos) || + !SE.DT.dominates(InsertPos->getParent(), IncV->getParent())) + return false; + + if (!SE.LI.movementPreservesLCSSAForm(IncV, InsertPos)) + return false; + + // Check that the chain of IV operands leading back to Phi can be hoisted. + SmallVector<Instruction*, 4> IVIncs; + for(;;) { + Instruction *Oper = getIVIncOperand(IncV, InsertPos, /*allowScale*/true); + if (!Oper) + return false; + // IncV is safe to hoist. + IVIncs.push_back(IncV); + IncV = Oper; + if (SE.DT.dominates(IncV, InsertPos)) + break; + } + for (Instruction *I : llvm::reverse(IVIncs)) { + fixupInsertPoints(I); + I->moveBefore(InsertPos); + if (RecomputePoisonFlags) + FixupPoisonFlags(I); + } + return true; +} + +/// Determine if this cyclic phi is in a form that would have been generated by +/// LSR. We don't care if the phi was actually expanded in this pass, as long +/// as it is in a low-cost form, for example, no implied multiplication. This +/// should match any patterns generated by getAddRecExprPHILiterally and +/// expandAddtoGEP. +bool SCEVExpander::isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV, + const Loop *L) { + for(Instruction *IVOper = IncV; + (IVOper = getIVIncOperand(IVOper, L->getLoopPreheader()->getTerminator(), + /*allowScale=*/false));) { + if (IVOper == PN) + return true; + } + return false; +} + +/// expandIVInc - Expand an IV increment at Builder's current InsertPos. +/// Typically this is the LatchBlock terminator or IVIncInsertPos, but we may +/// need to materialize IV increments elsewhere to handle difficult situations. +Value *SCEVExpander::expandIVInc(PHINode *PN, Value *StepV, const Loop *L, + Type *ExpandTy, Type *IntTy, + bool useSubtract) { + Value *IncV; + // If the PHI is a pointer, use a GEP, otherwise use an add or sub. + if (ExpandTy->isPointerTy()) { + PointerType *GEPPtrTy = cast<PointerType>(ExpandTy); + // If the step isn't constant, don't use an implicitly scaled GEP, because + // that would require a multiply inside the loop. + if (!isa<ConstantInt>(StepV)) + GEPPtrTy = PointerType::get(Type::getInt1Ty(SE.getContext()), + GEPPtrTy->getAddressSpace()); + IncV = expandAddToGEP(SE.getSCEV(StepV), GEPPtrTy, IntTy, PN); + if (IncV->getType() != PN->getType()) + IncV = Builder.CreateBitCast(IncV, PN->getType()); + } else { + IncV = useSubtract ? + Builder.CreateSub(PN, StepV, Twine(IVName) + ".iv.next") : + Builder.CreateAdd(PN, StepV, Twine(IVName) + ".iv.next"); + } + return IncV; +} + +/// Check whether we can cheaply express the requested SCEV in terms of +/// the available PHI SCEV by truncation and/or inversion of the step. +static bool canBeCheaplyTransformed(ScalarEvolution &SE, + const SCEVAddRecExpr *Phi, + const SCEVAddRecExpr *Requested, + bool &InvertStep) { + // We can't transform to match a pointer PHI. + if (Phi->getType()->isPointerTy()) + return false; + + Type *PhiTy = SE.getEffectiveSCEVType(Phi->getType()); + Type *RequestedTy = SE.getEffectiveSCEVType(Requested->getType()); + + if (RequestedTy->getIntegerBitWidth() > PhiTy->getIntegerBitWidth()) + return false; + + // Try truncate it if necessary. + Phi = dyn_cast<SCEVAddRecExpr>(SE.getTruncateOrNoop(Phi, RequestedTy)); + if (!Phi) + return false; + + // Check whether truncation will help. + if (Phi == Requested) { + InvertStep = false; + return true; + } + + // Check whether inverting will help: {R,+,-1} == R - {0,+,1}. + if (SE.getMinusSCEV(Requested->getStart(), Requested) == Phi) { + InvertStep = true; + return true; + } + + return false; +} + +static bool IsIncrementNSW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) { + if (!isa<IntegerType>(AR->getType())) + return false; + + unsigned BitWidth = cast<IntegerType>(AR->getType())->getBitWidth(); + Type *WideTy = IntegerType::get(AR->getType()->getContext(), BitWidth * 2); + const SCEV *Step = AR->getStepRecurrence(SE); + const SCEV *OpAfterExtend = SE.getAddExpr(SE.getSignExtendExpr(Step, WideTy), + SE.getSignExtendExpr(AR, WideTy)); + const SCEV *ExtendAfterOp = + SE.getSignExtendExpr(SE.getAddExpr(AR, Step), WideTy); + return ExtendAfterOp == OpAfterExtend; +} + +static bool IsIncrementNUW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) { + if (!isa<IntegerType>(AR->getType())) + return false; + + unsigned BitWidth = cast<IntegerType>(AR->getType())->getBitWidth(); + Type *WideTy = IntegerType::get(AR->getType()->getContext(), BitWidth * 2); + const SCEV *Step = AR->getStepRecurrence(SE); + const SCEV *OpAfterExtend = SE.getAddExpr(SE.getZeroExtendExpr(Step, WideTy), + SE.getZeroExtendExpr(AR, WideTy)); + const SCEV *ExtendAfterOp = + SE.getZeroExtendExpr(SE.getAddExpr(AR, Step), WideTy); + return ExtendAfterOp == OpAfterExtend; +} + +/// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand +/// the base addrec, which is the addrec without any non-loop-dominating +/// values, and return the PHI. +PHINode * +SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, + const Loop *L, + Type *ExpandTy, + Type *IntTy, + Type *&TruncTy, + bool &InvertStep) { + assert((!IVIncInsertLoop||IVIncInsertPos) && "Uninitialized insert position"); + + // Reuse a previously-inserted PHI, if present. + BasicBlock *LatchBlock = L->getLoopLatch(); + if (LatchBlock) { + PHINode *AddRecPhiMatch = nullptr; + Instruction *IncV = nullptr; + TruncTy = nullptr; + InvertStep = false; + + // Only try partially matching scevs that need truncation and/or + // step-inversion if we know this loop is outside the current loop. + bool TryNonMatchingSCEV = + IVIncInsertLoop && + SE.DT.properlyDominates(LatchBlock, IVIncInsertLoop->getHeader()); + + for (PHINode &PN : L->getHeader()->phis()) { + if (!SE.isSCEVable(PN.getType())) + continue; + + // We should not look for a incomplete PHI. Getting SCEV for a incomplete + // PHI has no meaning at all. + if (!PN.isComplete()) { + SCEV_DEBUG_WITH_TYPE( + DebugType, dbgs() << "One incomplete PHI is found: " << PN << "\n"); + continue; + } + + const SCEVAddRecExpr *PhiSCEV = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(&PN)); + if (!PhiSCEV) + continue; + + bool IsMatchingSCEV = PhiSCEV == Normalized; + // We only handle truncation and inversion of phi recurrences for the + // expanded expression if the expanded expression's loop dominates the + // loop we insert to. Check now, so we can bail out early. + if (!IsMatchingSCEV && !TryNonMatchingSCEV) + continue; + + // TODO: this possibly can be reworked to avoid this cast at all. + Instruction *TempIncV = + dyn_cast<Instruction>(PN.getIncomingValueForBlock(LatchBlock)); + if (!TempIncV) + continue; + + // Check whether we can reuse this PHI node. + if (LSRMode) { + if (!isExpandedAddRecExprPHI(&PN, TempIncV, L)) + continue; + } else { + if (!isNormalAddRecExprPHI(&PN, TempIncV, L)) + continue; + } + + // Stop if we have found an exact match SCEV. + if (IsMatchingSCEV) { + IncV = TempIncV; + TruncTy = nullptr; + InvertStep = false; + AddRecPhiMatch = &PN; + break; + } + + // Try whether the phi can be translated into the requested form + // (truncated and/or offset by a constant). + if ((!TruncTy || InvertStep) && + canBeCheaplyTransformed(SE, PhiSCEV, Normalized, InvertStep)) { + // Record the phi node. But don't stop we might find an exact match + // later. + AddRecPhiMatch = &PN; + IncV = TempIncV; + TruncTy = SE.getEffectiveSCEVType(Normalized->getType()); + } + } + + if (AddRecPhiMatch) { + // Ok, the add recurrence looks usable. + // Remember this PHI, even in post-inc mode. + InsertedValues.insert(AddRecPhiMatch); + // Remember the increment. + rememberInstruction(IncV); + // Those values were not actually inserted but re-used. + ReusedValues.insert(AddRecPhiMatch); + ReusedValues.insert(IncV); + return AddRecPhiMatch; + } + } + + // Save the original insertion point so we can restore it when we're done. + SCEVInsertPointGuard Guard(Builder, this); + + // Another AddRec may need to be recursively expanded below. For example, if + // this AddRec is quadratic, the StepV may itself be an AddRec in this + // loop. Remove this loop from the PostIncLoops set before expanding such + // AddRecs. Otherwise, we cannot find a valid position for the step + // (i.e. StepV can never dominate its loop header). Ideally, we could do + // SavedIncLoops.swap(PostIncLoops), but we generally have a single element, + // so it's not worth implementing SmallPtrSet::swap. + PostIncLoopSet SavedPostIncLoops = PostIncLoops; + PostIncLoops.clear(); + + // Expand code for the start value into the loop preheader. + assert(L->getLoopPreheader() && + "Can't expand add recurrences without a loop preheader!"); + Value *StartV = + expandCodeForImpl(Normalized->getStart(), ExpandTy, + L->getLoopPreheader()->getTerminator()); + + // StartV must have been be inserted into L's preheader to dominate the new + // phi. + assert(!isa<Instruction>(StartV) || + SE.DT.properlyDominates(cast<Instruction>(StartV)->getParent(), + L->getHeader())); + + // Expand code for the step value. Do this before creating the PHI so that PHI + // reuse code doesn't see an incomplete PHI. + const SCEV *Step = Normalized->getStepRecurrence(SE); + // If the stride is negative, insert a sub instead of an add for the increment + // (unless it's a constant, because subtracts of constants are canonicalized + // to adds). + bool useSubtract = !ExpandTy->isPointerTy() && Step->isNonConstantNegative(); + if (useSubtract) + Step = SE.getNegativeSCEV(Step); + // Expand the step somewhere that dominates the loop header. + Value *StepV = expandCodeForImpl( + Step, IntTy, &*L->getHeader()->getFirstInsertionPt()); + + // The no-wrap behavior proved by IsIncrement(NUW|NSW) is only applicable if + // we actually do emit an addition. It does not apply if we emit a + // subtraction. + bool IncrementIsNUW = !useSubtract && IsIncrementNUW(SE, Normalized); + bool IncrementIsNSW = !useSubtract && IsIncrementNSW(SE, Normalized); + + // Create the PHI. + BasicBlock *Header = L->getHeader(); + Builder.SetInsertPoint(Header, Header->begin()); + pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header); + PHINode *PN = Builder.CreatePHI(ExpandTy, std::distance(HPB, HPE), + Twine(IVName) + ".iv"); + + // Create the step instructions and populate the PHI. + for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) { + BasicBlock *Pred = *HPI; + + // Add a start value. + if (!L->contains(Pred)) { + PN->addIncoming(StartV, Pred); + continue; + } + + // Create a step value and add it to the PHI. + // If IVIncInsertLoop is non-null and equal to the addrec's loop, insert the + // instructions at IVIncInsertPos. + Instruction *InsertPos = L == IVIncInsertLoop ? + IVIncInsertPos : Pred->getTerminator(); + Builder.SetInsertPoint(InsertPos); + Value *IncV = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract); + + if (isa<OverflowingBinaryOperator>(IncV)) { + if (IncrementIsNUW) + cast<BinaryOperator>(IncV)->setHasNoUnsignedWrap(); + if (IncrementIsNSW) + cast<BinaryOperator>(IncV)->setHasNoSignedWrap(); + } + PN->addIncoming(IncV, Pred); + } + + // After expanding subexpressions, restore the PostIncLoops set so the caller + // can ensure that IVIncrement dominates the current uses. + PostIncLoops = SavedPostIncLoops; + + // Remember this PHI, even in post-inc mode. LSR SCEV-based salvaging is most + // effective when we are able to use an IV inserted here, so record it. + InsertedValues.insert(PN); + InsertedIVs.push_back(PN); + return PN; +} + +Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { + Type *STy = S->getType(); + Type *IntTy = SE.getEffectiveSCEVType(STy); + const Loop *L = S->getLoop(); + + // Determine a normalized form of this expression, which is the expression + // before any post-inc adjustment is made. + const SCEVAddRecExpr *Normalized = S; + if (PostIncLoops.count(L)) { + PostIncLoopSet Loops; + Loops.insert(L); + Normalized = cast<SCEVAddRecExpr>(normalizeForPostIncUse(S, Loops, SE)); + } + + // Strip off any non-loop-dominating component from the addrec start. + const SCEV *Start = Normalized->getStart(); + const SCEV *PostLoopOffset = nullptr; + if (!SE.properlyDominates(Start, L->getHeader())) { + PostLoopOffset = Start; + Start = SE.getConstant(Normalized->getType(), 0); + Normalized = cast<SCEVAddRecExpr>( + SE.getAddRecExpr(Start, Normalized->getStepRecurrence(SE), + Normalized->getLoop(), + Normalized->getNoWrapFlags(SCEV::FlagNW))); + } + + // Strip off any non-loop-dominating component from the addrec step. + const SCEV *Step = Normalized->getStepRecurrence(SE); + const SCEV *PostLoopScale = nullptr; + if (!SE.dominates(Step, L->getHeader())) { + PostLoopScale = Step; + Step = SE.getConstant(Normalized->getType(), 1); + if (!Start->isZero()) { + // The normalization below assumes that Start is constant zero, so if + // it isn't re-associate Start to PostLoopOffset. + assert(!PostLoopOffset && "Start not-null but PostLoopOffset set?"); + PostLoopOffset = Start; + Start = SE.getConstant(Normalized->getType(), 0); + } + Normalized = + cast<SCEVAddRecExpr>(SE.getAddRecExpr( + Start, Step, Normalized->getLoop(), + Normalized->getNoWrapFlags(SCEV::FlagNW))); + } + + // Expand the core addrec. If we need post-loop scaling, force it to + // expand to an integer type to avoid the need for additional casting. + Type *ExpandTy = PostLoopScale ? IntTy : STy; + // We can't use a pointer type for the addrec if the pointer type is + // non-integral. + Type *AddRecPHIExpandTy = + DL.isNonIntegralPointerType(STy) ? Normalized->getType() : ExpandTy; + + // In some cases, we decide to reuse an existing phi node but need to truncate + // it and/or invert the step. + Type *TruncTy = nullptr; + bool InvertStep = false; + PHINode *PN = getAddRecExprPHILiterally(Normalized, L, AddRecPHIExpandTy, + IntTy, TruncTy, InvertStep); + + // Accommodate post-inc mode, if necessary. + Value *Result; + if (!PostIncLoops.count(L)) + Result = PN; + else { + // In PostInc mode, use the post-incremented value. + BasicBlock *LatchBlock = L->getLoopLatch(); + assert(LatchBlock && "PostInc mode requires a unique loop latch!"); + Result = PN->getIncomingValueForBlock(LatchBlock); + + // We might be introducing a new use of the post-inc IV that is not poison + // safe, in which case we should drop poison generating flags. Only keep + // those flags for which SCEV has proven that they always hold. + if (isa<OverflowingBinaryOperator>(Result)) { + auto *I = cast<Instruction>(Result); + if (!S->hasNoUnsignedWrap()) + I->setHasNoUnsignedWrap(false); + if (!S->hasNoSignedWrap()) + I->setHasNoSignedWrap(false); + } + + // For an expansion to use the postinc form, the client must call + // expandCodeFor with an InsertPoint that is either outside the PostIncLoop + // or dominated by IVIncInsertPos. + if (isa<Instruction>(Result) && + !SE.DT.dominates(cast<Instruction>(Result), + &*Builder.GetInsertPoint())) { + // The induction variable's postinc expansion does not dominate this use. + // IVUsers tries to prevent this case, so it is rare. However, it can + // happen when an IVUser outside the loop is not dominated by the latch + // block. Adjusting IVIncInsertPos before expansion begins cannot handle + // all cases. Consider a phi outside whose operand is replaced during + // expansion with the value of the postinc user. Without fundamentally + // changing the way postinc users are tracked, the only remedy is + // inserting an extra IV increment. StepV might fold into PostLoopOffset, + // but hopefully expandCodeFor handles that. + bool useSubtract = + !ExpandTy->isPointerTy() && Step->isNonConstantNegative(); + if (useSubtract) + Step = SE.getNegativeSCEV(Step); + Value *StepV; + { + // Expand the step somewhere that dominates the loop header. + SCEVInsertPointGuard Guard(Builder, this); + StepV = expandCodeForImpl( + Step, IntTy, &*L->getHeader()->getFirstInsertionPt()); + } + Result = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract); + } + } + + // We have decided to reuse an induction variable of a dominating loop. Apply + // truncation and/or inversion of the step. + if (TruncTy) { + Type *ResTy = Result->getType(); + // Normalize the result type. + if (ResTy != SE.getEffectiveSCEVType(ResTy)) + Result = InsertNoopCastOfTo(Result, SE.getEffectiveSCEVType(ResTy)); + // Truncate the result. + if (TruncTy != Result->getType()) + Result = Builder.CreateTrunc(Result, TruncTy); + + // Invert the result. + if (InvertStep) + Result = Builder.CreateSub( + expandCodeForImpl(Normalized->getStart(), TruncTy), Result); + } + + // Re-apply any non-loop-dominating scale. + if (PostLoopScale) { + assert(S->isAffine() && "Can't linearly scale non-affine recurrences."); + Result = InsertNoopCastOfTo(Result, IntTy); + Result = Builder.CreateMul(Result, + expandCodeForImpl(PostLoopScale, IntTy)); + } + + // Re-apply any non-loop-dominating offset. + if (PostLoopOffset) { + if (PointerType *PTy = dyn_cast<PointerType>(ExpandTy)) { + if (Result->getType()->isIntegerTy()) { + Value *Base = expandCodeForImpl(PostLoopOffset, ExpandTy); + Result = expandAddToGEP(SE.getUnknown(Result), PTy, IntTy, Base); + } else { + Result = expandAddToGEP(PostLoopOffset, PTy, IntTy, Result); + } + } else { + Result = InsertNoopCastOfTo(Result, IntTy); + Result = Builder.CreateAdd( + Result, expandCodeForImpl(PostLoopOffset, IntTy)); + } + } + + return Result; +} + +Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { + // In canonical mode we compute the addrec as an expression of a canonical IV + // using evaluateAtIteration and expand the resulting SCEV expression. This + // way we avoid introducing new IVs to carry on the computation of the addrec + // throughout the loop. + // + // For nested addrecs evaluateAtIteration might need a canonical IV of a + // type wider than the addrec itself. Emitting a canonical IV of the + // proper type might produce non-legal types, for example expanding an i64 + // {0,+,2,+,1} addrec would need an i65 canonical IV. To avoid this just fall + // back to non-canonical mode for nested addrecs. + if (!CanonicalMode || (S->getNumOperands() > 2)) + return expandAddRecExprLiterally(S); + + Type *Ty = SE.getEffectiveSCEVType(S->getType()); + const Loop *L = S->getLoop(); + + // First check for an existing canonical IV in a suitable type. + PHINode *CanonicalIV = nullptr; + if (PHINode *PN = L->getCanonicalInductionVariable()) + if (SE.getTypeSizeInBits(PN->getType()) >= SE.getTypeSizeInBits(Ty)) + CanonicalIV = PN; + + // Rewrite an AddRec in terms of the canonical induction variable, if + // its type is more narrow. + if (CanonicalIV && + SE.getTypeSizeInBits(CanonicalIV->getType()) > SE.getTypeSizeInBits(Ty) && + !S->getType()->isPointerTy()) { + SmallVector<const SCEV *, 4> NewOps(S->getNumOperands()); + for (unsigned i = 0, e = S->getNumOperands(); i != e; ++i) + NewOps[i] = SE.getAnyExtendExpr(S->getOperand(i), CanonicalIV->getType()); + Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop(), + S->getNoWrapFlags(SCEV::FlagNW))); + BasicBlock::iterator NewInsertPt = + findInsertPointAfter(cast<Instruction>(V), &*Builder.GetInsertPoint()); + V = expandCodeForImpl(SE.getTruncateExpr(SE.getUnknown(V), Ty), nullptr, + &*NewInsertPt); + return V; + } + + // {X,+,F} --> X + {0,+,F} + if (!S->getStart()->isZero()) { + if (PointerType *PTy = dyn_cast<PointerType>(S->getType())) { + Value *StartV = expand(SE.getPointerBase(S)); + assert(StartV->getType() == PTy && "Pointer type mismatch for GEP!"); + return expandAddToGEP(SE.removePointerBase(S), PTy, Ty, StartV); + } + + SmallVector<const SCEV *, 4> NewOps(S->operands()); + NewOps[0] = SE.getConstant(Ty, 0); + const SCEV *Rest = SE.getAddRecExpr(NewOps, L, + S->getNoWrapFlags(SCEV::FlagNW)); + + // Just do a normal add. Pre-expand the operands to suppress folding. + // + // The LHS and RHS values are factored out of the expand call to make the + // output independent of the argument evaluation order. + const SCEV *AddExprLHS = SE.getUnknown(expand(S->getStart())); + const SCEV *AddExprRHS = SE.getUnknown(expand(Rest)); + return expand(SE.getAddExpr(AddExprLHS, AddExprRHS)); + } + + // If we don't yet have a canonical IV, create one. + if (!CanonicalIV) { + // Create and insert the PHI node for the induction variable in the + // specified loop. + BasicBlock *Header = L->getHeader(); + pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header); + CanonicalIV = PHINode::Create(Ty, std::distance(HPB, HPE), "indvar", + &Header->front()); + rememberInstruction(CanonicalIV); + + SmallSet<BasicBlock *, 4> PredSeen; + Constant *One = ConstantInt::get(Ty, 1); + for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) { + BasicBlock *HP = *HPI; + if (!PredSeen.insert(HP).second) { + // There must be an incoming value for each predecessor, even the + // duplicates! + CanonicalIV->addIncoming(CanonicalIV->getIncomingValueForBlock(HP), HP); + continue; + } + + if (L->contains(HP)) { + // Insert a unit add instruction right before the terminator + // corresponding to the back-edge. + Instruction *Add = BinaryOperator::CreateAdd(CanonicalIV, One, + "indvar.next", + HP->getTerminator()); + Add->setDebugLoc(HP->getTerminator()->getDebugLoc()); + rememberInstruction(Add); + CanonicalIV->addIncoming(Add, HP); + } else { + CanonicalIV->addIncoming(Constant::getNullValue(Ty), HP); + } + } + } + + // {0,+,1} --> Insert a canonical induction variable into the loop! + if (S->isAffine() && S->getOperand(1)->isOne()) { + assert(Ty == SE.getEffectiveSCEVType(CanonicalIV->getType()) && + "IVs with types different from the canonical IV should " + "already have been handled!"); + return CanonicalIV; + } + + // {0,+,F} --> {0,+,1} * F + + // If this is a simple linear addrec, emit it now as a special case. + if (S->isAffine()) // {0,+,F} --> i*F + return + expand(SE.getTruncateOrNoop( + SE.getMulExpr(SE.getUnknown(CanonicalIV), + SE.getNoopOrAnyExtend(S->getOperand(1), + CanonicalIV->getType())), + Ty)); + + // If this is a chain of recurrences, turn it into a closed form, using the + // folders, then expandCodeFor the closed form. This allows the folders to + // simplify the expression without having to build a bunch of special code + // into this folder. + const SCEV *IH = SE.getUnknown(CanonicalIV); // Get I as a "symbolic" SCEV. + + // Promote S up to the canonical IV type, if the cast is foldable. + const SCEV *NewS = S; + const SCEV *Ext = SE.getNoopOrAnyExtend(S, CanonicalIV->getType()); + if (isa<SCEVAddRecExpr>(Ext)) + NewS = Ext; + + const SCEV *V = cast<SCEVAddRecExpr>(NewS)->evaluateAtIteration(IH, SE); + + // Truncate the result down to the original type, if needed. + const SCEV *T = SE.getTruncateOrNoop(V, Ty); + return expand(T); +} + +Value *SCEVExpander::visitPtrToIntExpr(const SCEVPtrToIntExpr *S) { + Value *V = + expandCodeForImpl(S->getOperand(), S->getOperand()->getType()); + return ReuseOrCreateCast(V, S->getType(), CastInst::PtrToInt, + GetOptimalInsertionPointForCastOf(V)); +} + +Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) { + Type *Ty = SE.getEffectiveSCEVType(S->getType()); + Value *V = expandCodeForImpl( + S->getOperand(), SE.getEffectiveSCEVType(S->getOperand()->getType()) + ); + return Builder.CreateTrunc(V, Ty); +} + +Value *SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr *S) { + Type *Ty = SE.getEffectiveSCEVType(S->getType()); + Value *V = expandCodeForImpl( + S->getOperand(), SE.getEffectiveSCEVType(S->getOperand()->getType()) + ); + return Builder.CreateZExt(V, Ty); +} + +Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) { + Type *Ty = SE.getEffectiveSCEVType(S->getType()); + Value *V = expandCodeForImpl( + S->getOperand(), SE.getEffectiveSCEVType(S->getOperand()->getType()) + ); + return Builder.CreateSExt(V, Ty); +} + +Value *SCEVExpander::expandMinMaxExpr(const SCEVNAryExpr *S, + Intrinsic::ID IntrinID, Twine Name, + bool IsSequential) { + Value *LHS = expand(S->getOperand(S->getNumOperands() - 1)); + Type *Ty = LHS->getType(); + if (IsSequential) + LHS = Builder.CreateFreeze(LHS); + for (int i = S->getNumOperands() - 2; i >= 0; --i) { + Value *RHS = expandCodeForImpl(S->getOperand(i), Ty); + if (IsSequential && i != 0) + RHS = Builder.CreateFreeze(RHS); + Value *Sel; + if (Ty->isIntegerTy()) + Sel = Builder.CreateIntrinsic(IntrinID, {Ty}, {LHS, RHS}, + /*FMFSource=*/nullptr, Name); + else { + Value *ICmp = + Builder.CreateICmp(MinMaxIntrinsic::getPredicate(IntrinID), LHS, RHS); + Sel = Builder.CreateSelect(ICmp, LHS, RHS, Name); + } + LHS = Sel; + } + return LHS; +} + +Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) { + return expandMinMaxExpr(S, Intrinsic::smax, "smax"); +} + +Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) { + return expandMinMaxExpr(S, Intrinsic::umax, "umax"); +} + +Value *SCEVExpander::visitSMinExpr(const SCEVSMinExpr *S) { + return expandMinMaxExpr(S, Intrinsic::smin, "smin"); +} + +Value *SCEVExpander::visitUMinExpr(const SCEVUMinExpr *S) { + return expandMinMaxExpr(S, Intrinsic::umin, "umin"); +} + +Value *SCEVExpander::visitSequentialUMinExpr(const SCEVSequentialUMinExpr *S) { + return expandMinMaxExpr(S, Intrinsic::umin, "umin", /*IsSequential*/true); +} + +Value *SCEVExpander::expandCodeForImpl(const SCEV *SH, Type *Ty, + Instruction *IP) { + setInsertPoint(IP); + Value *V = expandCodeForImpl(SH, Ty); + return V; +} + +Value *SCEVExpander::expandCodeForImpl(const SCEV *SH, Type *Ty) { + // Expand the code for this SCEV. + Value *V = expand(SH); + + if (Ty) { + assert(SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(SH->getType()) && + "non-trivial casts should be done with the SCEVs directly!"); + V = InsertNoopCastOfTo(V, Ty); + } + return V; +} + +Value *SCEVExpander::FindValueInExprValueMap(const SCEV *S, + const Instruction *InsertPt) { + // If the expansion is not in CanonicalMode, and the SCEV contains any + // sub scAddRecExpr type SCEV, it is required to expand the SCEV literally. + if (!CanonicalMode && SE.containsAddRecurrence(S)) + return nullptr; + + // If S is a constant, it may be worse to reuse an existing Value. + if (isa<SCEVConstant>(S)) + return nullptr; + + // Choose a Value from the set which dominates the InsertPt. + // InsertPt should be inside the Value's parent loop so as not to break + // the LCSSA form. + for (Value *V : SE.getSCEVValues(S)) { + Instruction *EntInst = dyn_cast<Instruction>(V); + if (!EntInst) + continue; + + assert(EntInst->getFunction() == InsertPt->getFunction()); + if (S->getType() == V->getType() && + SE.DT.dominates(EntInst, InsertPt) && + (SE.LI.getLoopFor(EntInst->getParent()) == nullptr || + SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt))) + return V; + } + return nullptr; +} + +// The expansion of SCEV will either reuse a previous Value in ExprValueMap, +// or expand the SCEV literally. Specifically, if the expansion is in LSRMode, +// and the SCEV contains any sub scAddRecExpr type SCEV, it will be expanded +// literally, to prevent LSR's transformed SCEV from being reverted. Otherwise, +// the expansion will try to reuse Value from ExprValueMap, and only when it +// fails, expand the SCEV literally. +Value *SCEVExpander::expand(const SCEV *S) { + // Compute an insertion point for this SCEV object. Hoist the instructions + // as far out in the loop nest as possible. + Instruction *InsertPt = &*Builder.GetInsertPoint(); + + // We can move insertion point only if there is no div or rem operations + // otherwise we are risky to move it over the check for zero denominator. + auto SafeToHoist = [](const SCEV *S) { + return !SCEVExprContains(S, [](const SCEV *S) { + if (const auto *D = dyn_cast<SCEVUDivExpr>(S)) { + if (const auto *SC = dyn_cast<SCEVConstant>(D->getRHS())) + // Division by non-zero constants can be hoisted. + return SC->getValue()->isZero(); + // All other divisions should not be moved as they may be + // divisions by zero and should be kept within the + // conditions of the surrounding loops that guard their + // execution (see PR35406). + return true; + } + return false; + }); + }; + if (SafeToHoist(S)) { + for (Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock());; + L = L->getParentLoop()) { + if (SE.isLoopInvariant(S, L)) { + if (!L) break; + if (BasicBlock *Preheader = L->getLoopPreheader()) + InsertPt = Preheader->getTerminator(); + else + // LSR sets the insertion point for AddRec start/step values to the + // block start to simplify value reuse, even though it's an invalid + // position. SCEVExpander must correct for this in all cases. + InsertPt = &*L->getHeader()->getFirstInsertionPt(); + } else { + // If the SCEV is computable at this level, insert it into the header + // after the PHIs (and after any other instructions that we've inserted + // there) so that it is guaranteed to dominate any user inside the loop. + if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L)) + InsertPt = &*L->getHeader()->getFirstInsertionPt(); + + while (InsertPt->getIterator() != Builder.GetInsertPoint() && + (isInsertedInstruction(InsertPt) || + isa<DbgInfoIntrinsic>(InsertPt))) { + InsertPt = &*std::next(InsertPt->getIterator()); + } + break; + } + } + } + + // Check to see if we already expanded this here. + auto I = InsertedExpressions.find(std::make_pair(S, InsertPt)); + if (I != InsertedExpressions.end()) + return I->second; + + SCEVInsertPointGuard Guard(Builder, this); + Builder.SetInsertPoint(InsertPt); + + // Expand the expression into instructions. + Value *V = FindValueInExprValueMap(S, InsertPt); + if (!V) { + V = visit(S); + V = fixupLCSSAFormFor(V); + } else { + // If we're reusing an existing instruction, we are effectively CSEing two + // copies of the instruction (with potentially different flags). As such, + // we need to drop any poison generating flags unless we can prove that + // said flags must be valid for all new users. + if (auto *I = dyn_cast<Instruction>(V)) + if (I->hasPoisonGeneratingFlags() && !programUndefinedIfPoison(I)) + I->dropPoisonGeneratingFlags(); + } + // Remember the expanded value for this SCEV at this location. + // + // This is independent of PostIncLoops. The mapped value simply materializes + // the expression at this insertion point. If the mapped value happened to be + // a postinc expansion, it could be reused by a non-postinc user, but only if + // its insertion point was already at the head of the loop. + InsertedExpressions[std::make_pair(S, InsertPt)] = V; + return V; +} + +void SCEVExpander::rememberInstruction(Value *I) { + auto DoInsert = [this](Value *V) { + if (!PostIncLoops.empty()) + InsertedPostIncValues.insert(V); + else + InsertedValues.insert(V); + }; + DoInsert(I); +} + +/// replaceCongruentIVs - Check for congruent phis in this loop header and +/// replace them with their most canonical representative. Return the number of +/// phis eliminated. +/// +/// This does not depend on any SCEVExpander state but should be used in +/// the same context that SCEVExpander is used. +unsigned +SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, + SmallVectorImpl<WeakTrackingVH> &DeadInsts, + const TargetTransformInfo *TTI) { + // Find integer phis in order of increasing width. + SmallVector<PHINode*, 8> Phis; + for (PHINode &PN : L->getHeader()->phis()) + Phis.push_back(&PN); + + if (TTI) + // Use stable_sort to preserve order of equivalent PHIs, so the order + // of the sorted Phis is the same from run to run on the same loop. + llvm::stable_sort(Phis, [](Value *LHS, Value *RHS) { + // Put pointers at the back and make sure pointer < pointer = false. + if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy()) + return RHS->getType()->isIntegerTy() && !LHS->getType()->isIntegerTy(); + return RHS->getType()->getPrimitiveSizeInBits().getFixedValue() < + LHS->getType()->getPrimitiveSizeInBits().getFixedValue(); + }); + + unsigned NumElim = 0; + DenseMap<const SCEV *, PHINode *> ExprToIVMap; + // Process phis from wide to narrow. Map wide phis to their truncation + // so narrow phis can reuse them. + for (PHINode *Phi : Phis) { + auto SimplifyPHINode = [&](PHINode *PN) -> Value * { + if (Value *V = simplifyInstruction(PN, {DL, &SE.TLI, &SE.DT, &SE.AC})) + return V; + if (!SE.isSCEVable(PN->getType())) + return nullptr; + auto *Const = dyn_cast<SCEVConstant>(SE.getSCEV(PN)); + if (!Const) + return nullptr; + return Const->getValue(); + }; + + // Fold constant phis. They may be congruent to other constant phis and + // would confuse the logic below that expects proper IVs. + if (Value *V = SimplifyPHINode(Phi)) { + if (V->getType() != Phi->getType()) + continue; + SE.forgetValue(Phi); + Phi->replaceAllUsesWith(V); + DeadInsts.emplace_back(Phi); + ++NumElim; + SCEV_DEBUG_WITH_TYPE(DebugType, + dbgs() << "INDVARS: Eliminated constant iv: " << *Phi + << '\n'); + continue; + } + + if (!SE.isSCEVable(Phi->getType())) + continue; + + PHINode *&OrigPhiRef = ExprToIVMap[SE.getSCEV(Phi)]; + if (!OrigPhiRef) { + OrigPhiRef = Phi; + if (Phi->getType()->isIntegerTy() && TTI && + TTI->isTruncateFree(Phi->getType(), Phis.back()->getType())) { + // This phi can be freely truncated to the narrowest phi type. Map the + // truncated expression to it so it will be reused for narrow types. + const SCEV *TruncExpr = + SE.getTruncateExpr(SE.getSCEV(Phi), Phis.back()->getType()); + ExprToIVMap[TruncExpr] = Phi; + } + continue; + } + + // Replacing a pointer phi with an integer phi or vice-versa doesn't make + // sense. + if (OrigPhiRef->getType()->isPointerTy() != Phi->getType()->isPointerTy()) + continue; + + if (BasicBlock *LatchBlock = L->getLoopLatch()) { + Instruction *OrigInc = dyn_cast<Instruction>( + OrigPhiRef->getIncomingValueForBlock(LatchBlock)); + Instruction *IsomorphicInc = + dyn_cast<Instruction>(Phi->getIncomingValueForBlock(LatchBlock)); + + if (OrigInc && IsomorphicInc) { + // If this phi has the same width but is more canonical, replace the + // original with it. As part of the "more canonical" determination, + // respect a prior decision to use an IV chain. + if (OrigPhiRef->getType() == Phi->getType() && + !(ChainedPhis.count(Phi) || + isExpandedAddRecExprPHI(OrigPhiRef, OrigInc, L)) && + (ChainedPhis.count(Phi) || + isExpandedAddRecExprPHI(Phi, IsomorphicInc, L))) { + std::swap(OrigPhiRef, Phi); + std::swap(OrigInc, IsomorphicInc); + } + // Replacing the congruent phi is sufficient because acyclic + // redundancy elimination, CSE/GVN, should handle the + // rest. However, once SCEV proves that a phi is congruent, + // it's often the head of an IV user cycle that is isomorphic + // with the original phi. It's worth eagerly cleaning up the + // common case of a single IV increment so that DeleteDeadPHIs + // can remove cycles that had postinc uses. + // Because we may potentially introduce a new use of OrigIV that didn't + // exist before at this point, its poison flags need readjustment. + const SCEV *TruncExpr = + SE.getTruncateOrNoop(SE.getSCEV(OrigInc), IsomorphicInc->getType()); + if (OrigInc != IsomorphicInc && + TruncExpr == SE.getSCEV(IsomorphicInc) && + SE.LI.replacementPreservesLCSSAForm(IsomorphicInc, OrigInc) && + hoistIVInc(OrigInc, IsomorphicInc, /*RecomputePoisonFlags*/ true)) { + SCEV_DEBUG_WITH_TYPE( + DebugType, dbgs() << "INDVARS: Eliminated congruent iv.inc: " + << *IsomorphicInc << '\n'); + Value *NewInc = OrigInc; + if (OrigInc->getType() != IsomorphicInc->getType()) { + Instruction *IP = nullptr; + if (PHINode *PN = dyn_cast<PHINode>(OrigInc)) + IP = &*PN->getParent()->getFirstInsertionPt(); + else + IP = OrigInc->getNextNode(); + + IRBuilder<> Builder(IP); + Builder.SetCurrentDebugLocation(IsomorphicInc->getDebugLoc()); + NewInc = Builder.CreateTruncOrBitCast( + OrigInc, IsomorphicInc->getType(), IVName); + } + IsomorphicInc->replaceAllUsesWith(NewInc); + DeadInsts.emplace_back(IsomorphicInc); + } + } + } + SCEV_DEBUG_WITH_TYPE(DebugType, + dbgs() << "INDVARS: Eliminated congruent iv: " << *Phi + << '\n'); + SCEV_DEBUG_WITH_TYPE( + DebugType, dbgs() << "INDVARS: Original iv: " << *OrigPhiRef << '\n'); + ++NumElim; + Value *NewIV = OrigPhiRef; + if (OrigPhiRef->getType() != Phi->getType()) { + IRBuilder<> Builder(&*L->getHeader()->getFirstInsertionPt()); + Builder.SetCurrentDebugLocation(Phi->getDebugLoc()); + NewIV = Builder.CreateTruncOrBitCast(OrigPhiRef, Phi->getType(), IVName); + } + Phi->replaceAllUsesWith(NewIV); + DeadInsts.emplace_back(Phi); + } + return NumElim; +} + +Value *SCEVExpander::getRelatedExistingExpansion(const SCEV *S, + const Instruction *At, + Loop *L) { + using namespace llvm::PatternMatch; + + SmallVector<BasicBlock *, 4> ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + + // Look for suitable value in simple conditions at the loop exits. + for (BasicBlock *BB : ExitingBlocks) { + ICmpInst::Predicate Pred; + Instruction *LHS, *RHS; + + if (!match(BB->getTerminator(), + m_Br(m_ICmp(Pred, m_Instruction(LHS), m_Instruction(RHS)), + m_BasicBlock(), m_BasicBlock()))) + continue; + + if (SE.getSCEV(LHS) == S && SE.DT.dominates(LHS, At)) + return LHS; + + if (SE.getSCEV(RHS) == S && SE.DT.dominates(RHS, At)) + return RHS; + } + + // Use expand's logic which is used for reusing a previous Value in + // ExprValueMap. Note that we don't currently model the cost of + // needing to drop poison generating flags on the instruction if we + // want to reuse it. We effectively assume that has zero cost. + return FindValueInExprValueMap(S, At); +} + +template<typename T> static InstructionCost costAndCollectOperands( + const SCEVOperand &WorkItem, const TargetTransformInfo &TTI, + TargetTransformInfo::TargetCostKind CostKind, + SmallVectorImpl<SCEVOperand> &Worklist) { + + const T *S = cast<T>(WorkItem.S); + InstructionCost Cost = 0; + // Object to help map SCEV operands to expanded IR instructions. + struct OperationIndices { + OperationIndices(unsigned Opc, size_t min, size_t max) : + Opcode(Opc), MinIdx(min), MaxIdx(max) { } + unsigned Opcode; + size_t MinIdx; + size_t MaxIdx; + }; + + // Collect the operations of all the instructions that will be needed to + // expand the SCEVExpr. This is so that when we come to cost the operands, + // we know what the generated user(s) will be. + SmallVector<OperationIndices, 2> Operations; + + auto CastCost = [&](unsigned Opcode) -> InstructionCost { + Operations.emplace_back(Opcode, 0, 0); + return TTI.getCastInstrCost(Opcode, S->getType(), + S->getOperand(0)->getType(), + TTI::CastContextHint::None, CostKind); + }; + + auto ArithCost = [&](unsigned Opcode, unsigned NumRequired, + unsigned MinIdx = 0, + unsigned MaxIdx = 1) -> InstructionCost { + Operations.emplace_back(Opcode, MinIdx, MaxIdx); + return NumRequired * + TTI.getArithmeticInstrCost(Opcode, S->getType(), CostKind); + }; + + auto CmpSelCost = [&](unsigned Opcode, unsigned NumRequired, unsigned MinIdx, + unsigned MaxIdx) -> InstructionCost { + Operations.emplace_back(Opcode, MinIdx, MaxIdx); + Type *OpType = S->getType(); + return NumRequired * TTI.getCmpSelInstrCost( + Opcode, OpType, CmpInst::makeCmpResultType(OpType), + CmpInst::BAD_ICMP_PREDICATE, CostKind); + }; + + switch (S->getSCEVType()) { + case scCouldNotCompute: + llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); + case scUnknown: + case scConstant: + return 0; + case scPtrToInt: + Cost = CastCost(Instruction::PtrToInt); + break; + case scTruncate: + Cost = CastCost(Instruction::Trunc); + break; + case scZeroExtend: + Cost = CastCost(Instruction::ZExt); + break; + case scSignExtend: + Cost = CastCost(Instruction::SExt); + break; + case scUDivExpr: { + unsigned Opcode = Instruction::UDiv; + if (auto *SC = dyn_cast<SCEVConstant>(S->getOperand(1))) + if (SC->getAPInt().isPowerOf2()) + Opcode = Instruction::LShr; + Cost = ArithCost(Opcode, 1); + break; + } + case scAddExpr: + Cost = ArithCost(Instruction::Add, S->getNumOperands() - 1); + break; + case scMulExpr: + // TODO: this is a very pessimistic cost modelling for Mul, + // because of Bin Pow algorithm actually used by the expander, + // see SCEVExpander::visitMulExpr(), ExpandOpBinPowN(). + Cost = ArithCost(Instruction::Mul, S->getNumOperands() - 1); + break; + case scSMaxExpr: + case scUMaxExpr: + case scSMinExpr: + case scUMinExpr: + case scSequentialUMinExpr: { + // FIXME: should this ask the cost for Intrinsic's? + // The reduction tree. + Cost += CmpSelCost(Instruction::ICmp, S->getNumOperands() - 1, 0, 1); + Cost += CmpSelCost(Instruction::Select, S->getNumOperands() - 1, 0, 2); + switch (S->getSCEVType()) { + case scSequentialUMinExpr: { + // The safety net against poison. + // FIXME: this is broken. + Cost += CmpSelCost(Instruction::ICmp, S->getNumOperands() - 1, 0, 0); + Cost += ArithCost(Instruction::Or, + S->getNumOperands() > 2 ? S->getNumOperands() - 2 : 0); + Cost += CmpSelCost(Instruction::Select, 1, 0, 1); + break; + } + default: + assert(!isa<SCEVSequentialMinMaxExpr>(S) && + "Unhandled SCEV expression type?"); + break; + } + break; + } + case scAddRecExpr: { + // In this polynominal, we may have some zero operands, and we shouldn't + // really charge for those. So how many non-zero coefficients are there? + int NumTerms = llvm::count_if(S->operands(), [](const SCEV *Op) { + return !Op->isZero(); + }); + + assert(NumTerms >= 1 && "Polynominal should have at least one term."); + assert(!(*std::prev(S->operands().end()))->isZero() && + "Last operand should not be zero"); + + // Ignoring constant term (operand 0), how many of the coefficients are u> 1? + int NumNonZeroDegreeNonOneTerms = + llvm::count_if(S->operands(), [](const SCEV *Op) { + auto *SConst = dyn_cast<SCEVConstant>(Op); + return !SConst || SConst->getAPInt().ugt(1); + }); + + // Much like with normal add expr, the polynominal will require + // one less addition than the number of it's terms. + InstructionCost AddCost = ArithCost(Instruction::Add, NumTerms - 1, + /*MinIdx*/ 1, /*MaxIdx*/ 1); + // Here, *each* one of those will require a multiplication. + InstructionCost MulCost = + ArithCost(Instruction::Mul, NumNonZeroDegreeNonOneTerms); + Cost = AddCost + MulCost; + + // What is the degree of this polynominal? + int PolyDegree = S->getNumOperands() - 1; + assert(PolyDegree >= 1 && "Should be at least affine."); + + // The final term will be: + // Op_{PolyDegree} * x ^ {PolyDegree} + // Where x ^ {PolyDegree} will again require PolyDegree-1 mul operations. + // Note that x ^ {PolyDegree} = x * x ^ {PolyDegree-1} so charging for + // x ^ {PolyDegree} will give us x ^ {2} .. x ^ {PolyDegree-1} for free. + // FIXME: this is conservatively correct, but might be overly pessimistic. + Cost += MulCost * (PolyDegree - 1); + break; + } + } + + for (auto &CostOp : Operations) { + for (auto SCEVOp : enumerate(S->operands())) { + // Clamp the index to account for multiple IR operations being chained. + size_t MinIdx = std::max(SCEVOp.index(), CostOp.MinIdx); + size_t OpIdx = std::min(MinIdx, CostOp.MaxIdx); + Worklist.emplace_back(CostOp.Opcode, OpIdx, SCEVOp.value()); + } + } + return Cost; +} + +bool SCEVExpander::isHighCostExpansionHelper( + const SCEVOperand &WorkItem, Loop *L, const Instruction &At, + InstructionCost &Cost, unsigned Budget, const TargetTransformInfo &TTI, + SmallPtrSetImpl<const SCEV *> &Processed, + SmallVectorImpl<SCEVOperand> &Worklist) { + if (Cost > Budget) + return true; // Already run out of budget, give up. + + const SCEV *S = WorkItem.S; + // Was the cost of expansion of this expression already accounted for? + if (!isa<SCEVConstant>(S) && !Processed.insert(S).second) + return false; // We have already accounted for this expression. + + // If we can find an existing value for this scev available at the point "At" + // then consider the expression cheap. + if (getRelatedExistingExpansion(S, &At, L)) + return false; // Consider the expression to be free. + + TargetTransformInfo::TargetCostKind CostKind = + L->getHeader()->getParent()->hasMinSize() + ? TargetTransformInfo::TCK_CodeSize + : TargetTransformInfo::TCK_RecipThroughput; + + switch (S->getSCEVType()) { + case scCouldNotCompute: + llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); + case scUnknown: + // Assume to be zero-cost. + return false; + case scConstant: { + // Only evalulate the costs of constants when optimizing for size. + if (CostKind != TargetTransformInfo::TCK_CodeSize) + return false; + const APInt &Imm = cast<SCEVConstant>(S)->getAPInt(); + Type *Ty = S->getType(); + Cost += TTI.getIntImmCostInst( + WorkItem.ParentOpcode, WorkItem.OperandIdx, Imm, Ty, CostKind); + return Cost > Budget; + } + case scTruncate: + case scPtrToInt: + case scZeroExtend: + case scSignExtend: { + Cost += + costAndCollectOperands<SCEVCastExpr>(WorkItem, TTI, CostKind, Worklist); + return false; // Will answer upon next entry into this function. + } + case scUDivExpr: { + // UDivExpr is very likely a UDiv that ScalarEvolution's HowFarToZero or + // HowManyLessThans produced to compute a precise expression, rather than a + // UDiv from the user's code. If we can't find a UDiv in the code with some + // simple searching, we need to account for it's cost. + + // At the beginning of this function we already tried to find existing + // value for plain 'S'. Now try to lookup 'S + 1' since it is common + // pattern involving division. This is just a simple search heuristic. + if (getRelatedExistingExpansion( + SE.getAddExpr(S, SE.getConstant(S->getType(), 1)), &At, L)) + return false; // Consider it to be free. + + Cost += + costAndCollectOperands<SCEVUDivExpr>(WorkItem, TTI, CostKind, Worklist); + return false; // Will answer upon next entry into this function. + } + case scAddExpr: + case scMulExpr: + case scUMaxExpr: + case scSMaxExpr: + case scUMinExpr: + case scSMinExpr: + case scSequentialUMinExpr: { + assert(cast<SCEVNAryExpr>(S)->getNumOperands() > 1 && + "Nary expr should have more than 1 operand."); + // The simple nary expr will require one less op (or pair of ops) + // than the number of it's terms. + Cost += + costAndCollectOperands<SCEVNAryExpr>(WorkItem, TTI, CostKind, Worklist); + return Cost > Budget; + } + case scAddRecExpr: { + assert(cast<SCEVAddRecExpr>(S)->getNumOperands() >= 2 && + "Polynomial should be at least linear"); + Cost += costAndCollectOperands<SCEVAddRecExpr>( + WorkItem, TTI, CostKind, Worklist); + return Cost > Budget; + } + } + llvm_unreachable("Unknown SCEV kind!"); +} + +Value *SCEVExpander::expandCodeForPredicate(const SCEVPredicate *Pred, + Instruction *IP) { + assert(IP); + switch (Pred->getKind()) { + case SCEVPredicate::P_Union: + return expandUnionPredicate(cast<SCEVUnionPredicate>(Pred), IP); + case SCEVPredicate::P_Compare: + return expandComparePredicate(cast<SCEVComparePredicate>(Pred), IP); + case SCEVPredicate::P_Wrap: { + auto *AddRecPred = cast<SCEVWrapPredicate>(Pred); + return expandWrapPredicate(AddRecPred, IP); + } + } + llvm_unreachable("Unknown SCEV predicate type"); +} + +Value *SCEVExpander::expandComparePredicate(const SCEVComparePredicate *Pred, + Instruction *IP) { + Value *Expr0 = + expandCodeForImpl(Pred->getLHS(), Pred->getLHS()->getType(), IP); + Value *Expr1 = + expandCodeForImpl(Pred->getRHS(), Pred->getRHS()->getType(), IP); + + Builder.SetInsertPoint(IP); + auto InvPred = ICmpInst::getInversePredicate(Pred->getPredicate()); + auto *I = Builder.CreateICmp(InvPred, Expr0, Expr1, "ident.check"); + return I; +} + +Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR, + Instruction *Loc, bool Signed) { + assert(AR->isAffine() && "Cannot generate RT check for " + "non-affine expression"); + + // FIXME: It is highly suspicious that we're ignoring the predicates here. + SmallVector<const SCEVPredicate *, 4> Pred; + const SCEV *ExitCount = + SE.getPredicatedBackedgeTakenCount(AR->getLoop(), Pred); + + assert(!isa<SCEVCouldNotCompute>(ExitCount) && "Invalid loop count"); + + const SCEV *Step = AR->getStepRecurrence(SE); + const SCEV *Start = AR->getStart(); + + Type *ARTy = AR->getType(); + unsigned SrcBits = SE.getTypeSizeInBits(ExitCount->getType()); + unsigned DstBits = SE.getTypeSizeInBits(ARTy); + + // The expression {Start,+,Step} has nusw/nssw if + // Step < 0, Start - |Step| * Backedge <= Start + // Step >= 0, Start + |Step| * Backedge > Start + // and |Step| * Backedge doesn't unsigned overflow. + + IntegerType *CountTy = IntegerType::get(Loc->getContext(), SrcBits); + Builder.SetInsertPoint(Loc); + Value *TripCountVal = expandCodeForImpl(ExitCount, CountTy, Loc); + + IntegerType *Ty = + IntegerType::get(Loc->getContext(), SE.getTypeSizeInBits(ARTy)); + + Value *StepValue = expandCodeForImpl(Step, Ty, Loc); + Value *NegStepValue = + expandCodeForImpl(SE.getNegativeSCEV(Step), Ty, Loc); + Value *StartValue = expandCodeForImpl(Start, ARTy, Loc); + + ConstantInt *Zero = + ConstantInt::get(Loc->getContext(), APInt::getZero(DstBits)); + + Builder.SetInsertPoint(Loc); + // Compute |Step| + Value *StepCompare = Builder.CreateICmp(ICmpInst::ICMP_SLT, StepValue, Zero); + Value *AbsStep = Builder.CreateSelect(StepCompare, NegStepValue, StepValue); + + // Compute |Step| * Backedge + // Compute: + // 1. Start + |Step| * Backedge < Start + // 2. Start - |Step| * Backedge > Start + // + // And select either 1. or 2. depending on whether step is positive or + // negative. If Step is known to be positive or negative, only create + // either 1. or 2. + auto ComputeEndCheck = [&]() -> Value * { + // Checking <u 0 is always false. + if (!Signed && Start->isZero() && SE.isKnownPositive(Step)) + return ConstantInt::getFalse(Loc->getContext()); + + // Get the backedge taken count and truncate or extended to the AR type. + Value *TruncTripCount = Builder.CreateZExtOrTrunc(TripCountVal, Ty); + + Value *MulV, *OfMul; + if (Step->isOne()) { + // Special-case Step of one. Potentially-costly `umul_with_overflow` isn't + // needed, there is never an overflow, so to avoid artificially inflating + // the cost of the check, directly emit the optimized IR. + MulV = TruncTripCount; + OfMul = ConstantInt::getFalse(MulV->getContext()); + } else { + auto *MulF = Intrinsic::getDeclaration(Loc->getModule(), + Intrinsic::umul_with_overflow, Ty); + CallInst *Mul = + Builder.CreateCall(MulF, {AbsStep, TruncTripCount}, "mul"); + MulV = Builder.CreateExtractValue(Mul, 0, "mul.result"); + OfMul = Builder.CreateExtractValue(Mul, 1, "mul.overflow"); + } + + Value *Add = nullptr, *Sub = nullptr; + bool NeedPosCheck = !SE.isKnownNegative(Step); + bool NeedNegCheck = !SE.isKnownPositive(Step); + + if (PointerType *ARPtrTy = dyn_cast<PointerType>(ARTy)) { + StartValue = InsertNoopCastOfTo( + StartValue, Builder.getInt8PtrTy(ARPtrTy->getAddressSpace())); + Value *NegMulV = Builder.CreateNeg(MulV); + if (NeedPosCheck) + Add = Builder.CreateGEP(Builder.getInt8Ty(), StartValue, MulV); + if (NeedNegCheck) + Sub = Builder.CreateGEP(Builder.getInt8Ty(), StartValue, NegMulV); + } else { + if (NeedPosCheck) + Add = Builder.CreateAdd(StartValue, MulV); + if (NeedNegCheck) + Sub = Builder.CreateSub(StartValue, MulV); + } + + Value *EndCompareLT = nullptr; + Value *EndCompareGT = nullptr; + Value *EndCheck = nullptr; + if (NeedPosCheck) + EndCheck = EndCompareLT = Builder.CreateICmp( + Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, Add, StartValue); + if (NeedNegCheck) + EndCheck = EndCompareGT = Builder.CreateICmp( + Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT, Sub, StartValue); + if (NeedPosCheck && NeedNegCheck) { + // Select the answer based on the sign of Step. + EndCheck = Builder.CreateSelect(StepCompare, EndCompareGT, EndCompareLT); + } + return Builder.CreateOr(EndCheck, OfMul); + }; + Value *EndCheck = ComputeEndCheck(); + + // If the backedge taken count type is larger than the AR type, + // check that we don't drop any bits by truncating it. If we are + // dropping bits, then we have overflow (unless the step is zero). + if (SE.getTypeSizeInBits(CountTy) > SE.getTypeSizeInBits(Ty)) { + auto MaxVal = APInt::getMaxValue(DstBits).zext(SrcBits); + auto *BackedgeCheck = + Builder.CreateICmp(ICmpInst::ICMP_UGT, TripCountVal, + ConstantInt::get(Loc->getContext(), MaxVal)); + BackedgeCheck = Builder.CreateAnd( + BackedgeCheck, Builder.CreateICmp(ICmpInst::ICMP_NE, StepValue, Zero)); + + EndCheck = Builder.CreateOr(EndCheck, BackedgeCheck); + } + + return EndCheck; +} + +Value *SCEVExpander::expandWrapPredicate(const SCEVWrapPredicate *Pred, + Instruction *IP) { + const auto *A = cast<SCEVAddRecExpr>(Pred->getExpr()); + Value *NSSWCheck = nullptr, *NUSWCheck = nullptr; + + // Add a check for NUSW + if (Pred->getFlags() & SCEVWrapPredicate::IncrementNUSW) + NUSWCheck = generateOverflowCheck(A, IP, false); + + // Add a check for NSSW + if (Pred->getFlags() & SCEVWrapPredicate::IncrementNSSW) + NSSWCheck = generateOverflowCheck(A, IP, true); + + if (NUSWCheck && NSSWCheck) + return Builder.CreateOr(NUSWCheck, NSSWCheck); + + if (NUSWCheck) + return NUSWCheck; + + if (NSSWCheck) + return NSSWCheck; + + return ConstantInt::getFalse(IP->getContext()); +} + +Value *SCEVExpander::expandUnionPredicate(const SCEVUnionPredicate *Union, + Instruction *IP) { + // Loop over all checks in this set. + SmallVector<Value *> Checks; + for (const auto *Pred : Union->getPredicates()) { + Checks.push_back(expandCodeForPredicate(Pred, IP)); + Builder.SetInsertPoint(IP); + } + + if (Checks.empty()) + return ConstantInt::getFalse(IP->getContext()); + return Builder.CreateOr(Checks); +} + +Value *SCEVExpander::fixupLCSSAFormFor(Value *V) { + auto *DefI = dyn_cast<Instruction>(V); + if (!PreserveLCSSA || !DefI) + return V; + + Instruction *InsertPt = &*Builder.GetInsertPoint(); + Loop *DefLoop = SE.LI.getLoopFor(DefI->getParent()); + Loop *UseLoop = SE.LI.getLoopFor(InsertPt->getParent()); + if (!DefLoop || UseLoop == DefLoop || DefLoop->contains(UseLoop)) + return V; + + // Create a temporary instruction to at the current insertion point, so we + // can hand it off to the helper to create LCSSA PHIs if required for the + // new use. + // FIXME: Ideally formLCSSAForInstructions (used in fixupLCSSAFormFor) + // would accept a insertion point and return an LCSSA phi for that + // insertion point, so there is no need to insert & remove the temporary + // instruction. + Type *ToTy; + if (DefI->getType()->isIntegerTy()) + ToTy = DefI->getType()->getPointerTo(); + else + ToTy = Type::getInt32Ty(DefI->getContext()); + Instruction *User = + CastInst::CreateBitOrPointerCast(DefI, ToTy, "tmp.lcssa.user", InsertPt); + auto RemoveUserOnExit = + make_scope_exit([User]() { User->eraseFromParent(); }); + + SmallVector<Instruction *, 1> ToUpdate; + ToUpdate.push_back(DefI); + SmallVector<PHINode *, 16> PHIsToRemove; + formLCSSAForInstructions(ToUpdate, SE.DT, SE.LI, &SE, Builder, &PHIsToRemove); + for (PHINode *PN : PHIsToRemove) { + if (!PN->use_empty()) + continue; + InsertedValues.erase(PN); + InsertedPostIncValues.erase(PN); + PN->eraseFromParent(); + } + + return User->getOperand(0); +} + +namespace { +// Search for a SCEV subexpression that is not safe to expand. Any expression +// that may expand to a !isSafeToSpeculativelyExecute value is unsafe, namely +// UDiv expressions. We don't know if the UDiv is derived from an IR divide +// instruction, but the important thing is that we prove the denominator is +// nonzero before expansion. +// +// IVUsers already checks that IV-derived expressions are safe. So this check is +// only needed when the expression includes some subexpression that is not IV +// derived. +// +// Currently, we only allow division by a value provably non-zero here. +// +// We cannot generally expand recurrences unless the step dominates the loop +// header. The expander handles the special case of affine recurrences by +// scaling the recurrence outside the loop, but this technique isn't generally +// applicable. Expanding a nested recurrence outside a loop requires computing +// binomial coefficients. This could be done, but the recurrence has to be in a +// perfectly reduced form, which can't be guaranteed. +struct SCEVFindUnsafe { + ScalarEvolution &SE; + bool CanonicalMode; + bool IsUnsafe = false; + + SCEVFindUnsafe(ScalarEvolution &SE, bool CanonicalMode) + : SE(SE), CanonicalMode(CanonicalMode) {} + + bool follow(const SCEV *S) { + if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) { + if (!SE.isKnownNonZero(D->getRHS())) { + IsUnsafe = true; + return false; + } + } + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { + const SCEV *Step = AR->getStepRecurrence(SE); + if (!AR->isAffine() && !SE.dominates(Step, AR->getLoop()->getHeader())) { + IsUnsafe = true; + return false; + } + + // For non-affine addrecs or in non-canonical mode we need a preheader + // to insert into. + if (!AR->getLoop()->getLoopPreheader() && + (!CanonicalMode || !AR->isAffine())) { + IsUnsafe = true; + return false; + } + } + return true; + } + bool isDone() const { return IsUnsafe; } +}; +} // namespace + +bool SCEVExpander::isSafeToExpand(const SCEV *S) const { + SCEVFindUnsafe Search(SE, CanonicalMode); + visitAll(S, Search); + return !Search.IsUnsafe; +} + +bool SCEVExpander::isSafeToExpandAt(const SCEV *S, + const Instruction *InsertionPoint) const { + if (!isSafeToExpand(S)) + return false; + // We have to prove that the expanded site of S dominates InsertionPoint. + // This is easy when not in the same block, but hard when S is an instruction + // to be expanded somewhere inside the same block as our insertion point. + // What we really need here is something analogous to an OrderedBasicBlock, + // but for the moment, we paper over the problem by handling two common and + // cheap to check cases. + if (SE.properlyDominates(S, InsertionPoint->getParent())) + return true; + if (SE.dominates(S, InsertionPoint->getParent())) { + if (InsertionPoint->getParent()->getTerminator() == InsertionPoint) + return true; + if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) + if (llvm::is_contained(InsertionPoint->operand_values(), U->getValue())) + return true; + } + return false; +} + +void SCEVExpanderCleaner::cleanup() { + // Result is used, nothing to remove. + if (ResultUsed) + return; + + auto InsertedInstructions = Expander.getAllInsertedInstructions(); +#ifndef NDEBUG + SmallPtrSet<Instruction *, 8> InsertedSet(InsertedInstructions.begin(), + InsertedInstructions.end()); + (void)InsertedSet; +#endif + // Remove sets with value handles. + Expander.clear(); + + // Remove all inserted instructions. + for (Instruction *I : reverse(InsertedInstructions)) { +#ifndef NDEBUG + assert(all_of(I->users(), + [&InsertedSet](Value *U) { + return InsertedSet.contains(cast<Instruction>(U)); + }) && + "removed instruction should only be used by instructions inserted " + "during expansion"); +#endif + assert(!I->getType()->isVoidTy() && + "inserted instruction should have non-void types"); + I->replaceAllUsesWith(PoisonValue::get(I->getType())); + I->eraseFromParent(); + } +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/SimplifyCFG.cpp new file mode 100644 index 0000000000..9e0483966d --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/SimplifyCFG.cpp @@ -0,0 +1,7341 @@ +//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Peephole optimize the CFG. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/ScopeExit.h" +#include "llvm/ADT/Sequence.h" +#include "llvm/ADT/SetOperations.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/DomTreeUpdater.h" +#include "llvm/Analysis/GuardUtils.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/MemorySSAUpdater.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/ConstantRange.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/NoFolder.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/IR/ProfDataUtils.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/ValueMapper.h" +#include <algorithm> +#include <cassert> +#include <climits> +#include <cstddef> +#include <cstdint> +#include <iterator> +#include <map> +#include <optional> +#include <set> +#include <tuple> +#include <utility> +#include <vector> + +using namespace llvm; +using namespace PatternMatch; + +#define DEBUG_TYPE "simplifycfg" + +cl::opt<bool> llvm::RequireAndPreserveDomTree( + "simplifycfg-require-and-preserve-domtree", cl::Hidden, + + cl::desc("Temorary development switch used to gradually uplift SimplifyCFG " + "into preserving DomTree,")); + +// Chosen as 2 so as to be cheap, but still to have enough power to fold +// a select, so the "clamp" idiom (of a min followed by a max) will be caught. +// To catch this, we need to fold a compare and a select, hence '2' being the +// minimum reasonable default. +static cl::opt<unsigned> PHINodeFoldingThreshold( + "phi-node-folding-threshold", cl::Hidden, cl::init(2), + cl::desc( + "Control the amount of phi node folding to perform (default = 2)")); + +static cl::opt<unsigned> TwoEntryPHINodeFoldingThreshold( + "two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4), + cl::desc("Control the maximal total instruction cost that we are willing " + "to speculatively execute to fold a 2-entry PHI node into a " + "select (default = 4)")); + +static cl::opt<bool> + HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true), + cl::desc("Hoist common instructions up to the parent block")); + +static cl::opt<unsigned> + HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden, + cl::init(20), + cl::desc("Allow reordering across at most this many " + "instructions when hoisting")); + +static cl::opt<bool> + SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true), + cl::desc("Sink common instructions down to the end block")); + +static cl::opt<bool> HoistCondStores( + "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true), + cl::desc("Hoist conditional stores if an unconditional store precedes")); + +static cl::opt<bool> MergeCondStores( + "simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true), + cl::desc("Hoist conditional stores even if an unconditional store does not " + "precede - hoist multiple conditional stores into a single " + "predicated store")); + +static cl::opt<bool> MergeCondStoresAggressively( + "simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false), + cl::desc("When merging conditional stores, do so even if the resultant " + "basic blocks are unlikely to be if-converted as a result")); + +static cl::opt<bool> SpeculateOneExpensiveInst( + "speculate-one-expensive-inst", cl::Hidden, cl::init(true), + cl::desc("Allow exactly one expensive instruction to be speculatively " + "executed")); + +static cl::opt<unsigned> MaxSpeculationDepth( + "max-speculation-depth", cl::Hidden, cl::init(10), + cl::desc("Limit maximum recursion depth when calculating costs of " + "speculatively executed instructions")); + +static cl::opt<int> + MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden, + cl::init(10), + cl::desc("Max size of a block which is still considered " + "small enough to thread through")); + +// Two is chosen to allow one negation and a logical combine. +static cl::opt<unsigned> + BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden, + cl::init(2), + cl::desc("Maximum cost of combining conditions when " + "folding branches")); + +static cl::opt<unsigned> BranchFoldToCommonDestVectorMultiplier( + "simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden, + cl::init(2), + cl::desc("Multiplier to apply to threshold when determining whether or not " + "to fold branch to common destination when vector operations are " + "present")); + +static cl::opt<bool> EnableMergeCompatibleInvokes( + "simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true), + cl::desc("Allow SimplifyCFG to merge invokes together when appropriate")); + +static cl::opt<unsigned> MaxSwitchCasesPerResult( + "max-switch-cases-per-result", cl::Hidden, cl::init(16), + cl::desc("Limit cases to analyze when converting a switch to select")); + +STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps"); +STATISTIC(NumLinearMaps, + "Number of switch instructions turned into linear mapping"); +STATISTIC(NumLookupTables, + "Number of switch instructions turned into lookup tables"); +STATISTIC( + NumLookupTablesHoles, + "Number of switch instructions turned into lookup tables (holes checked)"); +STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares"); +STATISTIC(NumFoldValueComparisonIntoPredecessors, + "Number of value comparisons folded into predecessor basic blocks"); +STATISTIC(NumFoldBranchToCommonDest, + "Number of branches folded into predecessor basic block"); +STATISTIC( + NumHoistCommonCode, + "Number of common instruction 'blocks' hoisted up to the begin block"); +STATISTIC(NumHoistCommonInstrs, + "Number of common instructions hoisted up to the begin block"); +STATISTIC(NumSinkCommonCode, + "Number of common instruction 'blocks' sunk down to the end block"); +STATISTIC(NumSinkCommonInstrs, + "Number of common instructions sunk down to the end block"); +STATISTIC(NumSpeculations, "Number of speculative executed instructions"); +STATISTIC(NumInvokes, + "Number of invokes with empty resume blocks simplified into calls"); +STATISTIC(NumInvokesMerged, "Number of invokes that were merged together"); +STATISTIC(NumInvokeSetsFormed, "Number of invoke sets that were formed"); + +namespace { + +// The first field contains the value that the switch produces when a certain +// case group is selected, and the second field is a vector containing the +// cases composing the case group. +using SwitchCaseResultVectorTy = + SmallVector<std::pair<Constant *, SmallVector<ConstantInt *, 4>>, 2>; + +// The first field contains the phi node that generates a result of the switch +// and the second field contains the value generated for a certain case in the +// switch for that PHI. +using SwitchCaseResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>; + +/// ValueEqualityComparisonCase - Represents a case of a switch. +struct ValueEqualityComparisonCase { + ConstantInt *Value; + BasicBlock *Dest; + + ValueEqualityComparisonCase(ConstantInt *Value, BasicBlock *Dest) + : Value(Value), Dest(Dest) {} + + bool operator<(ValueEqualityComparisonCase RHS) const { + // Comparing pointers is ok as we only rely on the order for uniquing. + return Value < RHS.Value; + } + + bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; } +}; + +class SimplifyCFGOpt { + const TargetTransformInfo &TTI; + DomTreeUpdater *DTU; + const DataLayout &DL; + ArrayRef<WeakVH> LoopHeaders; + const SimplifyCFGOptions &Options; + bool Resimplify; + + Value *isValueEqualityComparison(Instruction *TI); + BasicBlock *GetValueEqualityComparisonCases( + Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases); + bool SimplifyEqualityComparisonWithOnlyPredecessor(Instruction *TI, + BasicBlock *Pred, + IRBuilder<> &Builder); + bool PerformValueComparisonIntoPredecessorFolding(Instruction *TI, Value *&CV, + Instruction *PTI, + IRBuilder<> &Builder); + bool FoldValueComparisonIntoPredecessors(Instruction *TI, + IRBuilder<> &Builder); + + bool simplifyResume(ResumeInst *RI, IRBuilder<> &Builder); + bool simplifySingleResume(ResumeInst *RI); + bool simplifyCommonResume(ResumeInst *RI); + bool simplifyCleanupReturn(CleanupReturnInst *RI); + bool simplifyUnreachable(UnreachableInst *UI); + bool simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder); + bool simplifyIndirectBr(IndirectBrInst *IBI); + bool simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder); + bool simplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder); + bool simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder); + + bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, + IRBuilder<> &Builder); + + bool HoistThenElseCodeToIf(BranchInst *BI, const TargetTransformInfo &TTI, + bool EqTermsOnly); + bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, + const TargetTransformInfo &TTI); + bool SimplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond, + BasicBlock *TrueBB, BasicBlock *FalseBB, + uint32_t TrueWeight, uint32_t FalseWeight); + bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder, + const DataLayout &DL); + bool SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select); + bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI); + bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder); + +public: + SimplifyCFGOpt(const TargetTransformInfo &TTI, DomTreeUpdater *DTU, + const DataLayout &DL, ArrayRef<WeakVH> LoopHeaders, + const SimplifyCFGOptions &Opts) + : TTI(TTI), DTU(DTU), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) { + assert((!DTU || !DTU->hasPostDomTree()) && + "SimplifyCFG is not yet capable of maintaining validity of a " + "PostDomTree, so don't ask for it."); + } + + bool simplifyOnce(BasicBlock *BB); + bool run(BasicBlock *BB); + + // Helper to set Resimplify and return change indication. + bool requestResimplify() { + Resimplify = true; + return true; + } +}; + +} // end anonymous namespace + +/// Return true if all the PHI nodes in the basic block \p BB +/// receive compatible (identical) incoming values when coming from +/// all of the predecessor blocks that are specified in \p IncomingBlocks. +/// +/// Note that if the values aren't exactly identical, but \p EquivalenceSet +/// is provided, and *both* of the values are present in the set, +/// then they are considered equal. +static bool IncomingValuesAreCompatible( + BasicBlock *BB, ArrayRef<BasicBlock *> IncomingBlocks, + SmallPtrSetImpl<Value *> *EquivalenceSet = nullptr) { + assert(IncomingBlocks.size() == 2 && + "Only for a pair of incoming blocks at the time!"); + + // FIXME: it is okay if one of the incoming values is an `undef` value, + // iff the other incoming value is guaranteed to be a non-poison value. + // FIXME: it is okay if one of the incoming values is a `poison` value. + return all_of(BB->phis(), [IncomingBlocks, EquivalenceSet](PHINode &PN) { + Value *IV0 = PN.getIncomingValueForBlock(IncomingBlocks[0]); + Value *IV1 = PN.getIncomingValueForBlock(IncomingBlocks[1]); + if (IV0 == IV1) + return true; + if (EquivalenceSet && EquivalenceSet->contains(IV0) && + EquivalenceSet->contains(IV1)) + return true; + return false; + }); +} + +/// Return true if it is safe to merge these two +/// terminator instructions together. +static bool +SafeToMergeTerminators(Instruction *SI1, Instruction *SI2, + SmallSetVector<BasicBlock *, 4> *FailBlocks = nullptr) { + if (SI1 == SI2) + return false; // Can't merge with self! + + // It is not safe to merge these two switch instructions if they have a common + // successor, and if that successor has a PHI node, and if *that* PHI node has + // conflicting incoming values from the two switch blocks. + BasicBlock *SI1BB = SI1->getParent(); + BasicBlock *SI2BB = SI2->getParent(); + + SmallPtrSet<BasicBlock *, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB)); + bool Fail = false; + for (BasicBlock *Succ : successors(SI2BB)) { + if (!SI1Succs.count(Succ)) + continue; + if (IncomingValuesAreCompatible(Succ, {SI1BB, SI2BB})) + continue; + Fail = true; + if (FailBlocks) + FailBlocks->insert(Succ); + else + break; + } + + return !Fail; +} + +/// Update PHI nodes in Succ to indicate that there will now be entries in it +/// from the 'NewPred' block. The values that will be flowing into the PHI nodes +/// will be the same as those coming in from ExistPred, an existing predecessor +/// of Succ. +static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, + BasicBlock *ExistPred, + MemorySSAUpdater *MSSAU = nullptr) { + for (PHINode &PN : Succ->phis()) + PN.addIncoming(PN.getIncomingValueForBlock(ExistPred), NewPred); + if (MSSAU) + if (auto *MPhi = MSSAU->getMemorySSA()->getMemoryAccess(Succ)) + MPhi->addIncoming(MPhi->getIncomingValueForBlock(ExistPred), NewPred); +} + +/// Compute an abstract "cost" of speculating the given instruction, +/// which is assumed to be safe to speculate. TCC_Free means cheap, +/// TCC_Basic means less cheap, and TCC_Expensive means prohibitively +/// expensive. +static InstructionCost computeSpeculationCost(const User *I, + const TargetTransformInfo &TTI) { + assert((!isa<Instruction>(I) || + isSafeToSpeculativelyExecute(cast<Instruction>(I))) && + "Instruction is not safe to speculatively execute!"); + return TTI.getInstructionCost(I, TargetTransformInfo::TCK_SizeAndLatency); +} + +/// If we have a merge point of an "if condition" as accepted above, +/// return true if the specified value dominates the block. We +/// don't handle the true generality of domination here, just a special case +/// which works well enough for us. +/// +/// If AggressiveInsts is non-null, and if V does not dominate BB, we check to +/// see if V (which must be an instruction) and its recursive operands +/// that do not dominate BB have a combined cost lower than Budget and +/// are non-trapping. If both are true, the instruction is inserted into the +/// set and true is returned. +/// +/// The cost for most non-trapping instructions is defined as 1 except for +/// Select whose cost is 2. +/// +/// After this function returns, Cost is increased by the cost of +/// V plus its non-dominating operands. If that cost is greater than +/// Budget, false is returned and Cost is undefined. +static bool dominatesMergePoint(Value *V, BasicBlock *BB, + SmallPtrSetImpl<Instruction *> &AggressiveInsts, + InstructionCost &Cost, + InstructionCost Budget, + const TargetTransformInfo &TTI, + unsigned Depth = 0) { + // It is possible to hit a zero-cost cycle (phi/gep instructions for example), + // so limit the recursion depth. + // TODO: While this recursion limit does prevent pathological behavior, it + // would be better to track visited instructions to avoid cycles. + if (Depth == MaxSpeculationDepth) + return false; + + Instruction *I = dyn_cast<Instruction>(V); + if (!I) { + // Non-instructions dominate all instructions and can be executed + // unconditionally. + return true; + } + BasicBlock *PBB = I->getParent(); + + // We don't want to allow weird loops that might have the "if condition" in + // the bottom of this block. + if (PBB == BB) + return false; + + // If this instruction is defined in a block that contains an unconditional + // branch to BB, then it must be in the 'conditional' part of the "if + // statement". If not, it definitely dominates the region. + BranchInst *BI = dyn_cast<BranchInst>(PBB->getTerminator()); + if (!BI || BI->isConditional() || BI->getSuccessor(0) != BB) + return true; + + // If we have seen this instruction before, don't count it again. + if (AggressiveInsts.count(I)) + return true; + + // Okay, it looks like the instruction IS in the "condition". Check to + // see if it's a cheap instruction to unconditionally compute, and if it + // only uses stuff defined outside of the condition. If so, hoist it out. + if (!isSafeToSpeculativelyExecute(I)) + return false; + + Cost += computeSpeculationCost(I, TTI); + + // Allow exactly one instruction to be speculated regardless of its cost + // (as long as it is safe to do so). + // This is intended to flatten the CFG even if the instruction is a division + // or other expensive operation. The speculation of an expensive instruction + // is expected to be undone in CodeGenPrepare if the speculation has not + // enabled further IR optimizations. + if (Cost > Budget && + (!SpeculateOneExpensiveInst || !AggressiveInsts.empty() || Depth > 0 || + !Cost.isValid())) + return false; + + // Okay, we can only really hoist these out if their operands do + // not take us over the cost threshold. + for (Use &Op : I->operands()) + if (!dominatesMergePoint(Op, BB, AggressiveInsts, Cost, Budget, TTI, + Depth + 1)) + return false; + // Okay, it's safe to do this! Remember this instruction. + AggressiveInsts.insert(I); + return true; +} + +/// Extract ConstantInt from value, looking through IntToPtr +/// and PointerNullValue. Return NULL if value is not a constant int. +static ConstantInt *GetConstantInt(Value *V, const DataLayout &DL) { + // Normal constant int. + ConstantInt *CI = dyn_cast<ConstantInt>(V); + if (CI || !isa<Constant>(V) || !V->getType()->isPointerTy() || + DL.isNonIntegralPointerType(V->getType())) + return CI; + + // This is some kind of pointer constant. Turn it into a pointer-sized + // ConstantInt if possible. + IntegerType *PtrTy = cast<IntegerType>(DL.getIntPtrType(V->getType())); + + // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*). + if (isa<ConstantPointerNull>(V)) + return ConstantInt::get(PtrTy, 0); + + // IntToPtr const int. + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) + if (CE->getOpcode() == Instruction::IntToPtr) + if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(0))) { + // The constant is very likely to have the right type already. + if (CI->getType() == PtrTy) + return CI; + else + return cast<ConstantInt>( + ConstantExpr::getIntegerCast(CI, PtrTy, /*isSigned=*/false)); + } + return nullptr; +} + +namespace { + +/// Given a chain of or (||) or and (&&) comparison of a value against a +/// constant, this will try to recover the information required for a switch +/// structure. +/// It will depth-first traverse the chain of comparison, seeking for patterns +/// like %a == 12 or %a < 4 and combine them to produce a set of integer +/// representing the different cases for the switch. +/// Note that if the chain is composed of '||' it will build the set of elements +/// that matches the comparisons (i.e. any of this value validate the chain) +/// while for a chain of '&&' it will build the set elements that make the test +/// fail. +struct ConstantComparesGatherer { + const DataLayout &DL; + + /// Value found for the switch comparison + Value *CompValue = nullptr; + + /// Extra clause to be checked before the switch + Value *Extra = nullptr; + + /// Set of integers to match in switch + SmallVector<ConstantInt *, 8> Vals; + + /// Number of comparisons matched in the and/or chain + unsigned UsedICmps = 0; + + /// Construct and compute the result for the comparison instruction Cond + ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL) : DL(DL) { + gather(Cond); + } + + ConstantComparesGatherer(const ConstantComparesGatherer &) = delete; + ConstantComparesGatherer & + operator=(const ConstantComparesGatherer &) = delete; + +private: + /// Try to set the current value used for the comparison, it succeeds only if + /// it wasn't set before or if the new value is the same as the old one + bool setValueOnce(Value *NewVal) { + if (CompValue && CompValue != NewVal) + return false; + CompValue = NewVal; + return (CompValue != nullptr); + } + + /// Try to match Instruction "I" as a comparison against a constant and + /// populates the array Vals with the set of values that match (or do not + /// match depending on isEQ). + /// Return false on failure. On success, the Value the comparison matched + /// against is placed in CompValue. + /// If CompValue is already set, the function is expected to fail if a match + /// is found but the value compared to is different. + bool matchInstruction(Instruction *I, bool isEQ) { + // If this is an icmp against a constant, handle this as one of the cases. + ICmpInst *ICI; + ConstantInt *C; + if (!((ICI = dyn_cast<ICmpInst>(I)) && + (C = GetConstantInt(I->getOperand(1), DL)))) { + return false; + } + + Value *RHSVal; + const APInt *RHSC; + + // Pattern match a special case + // (x & ~2^z) == y --> x == y || x == y|2^z + // This undoes a transformation done by instcombine to fuse 2 compares. + if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE)) { + // It's a little bit hard to see why the following transformations are + // correct. Here is a CVC3 program to verify them for 64-bit values: + + /* + ONE : BITVECTOR(64) = BVZEROEXTEND(0bin1, 63); + x : BITVECTOR(64); + y : BITVECTOR(64); + z : BITVECTOR(64); + mask : BITVECTOR(64) = BVSHL(ONE, z); + QUERY( (y & ~mask = y) => + ((x & ~mask = y) <=> (x = y OR x = (y | mask))) + ); + QUERY( (y | mask = y) => + ((x | mask = y) <=> (x = y OR x = (y & ~mask))) + ); + */ + + // Please note that each pattern must be a dual implication (<--> or + // iff). One directional implication can create spurious matches. If the + // implication is only one-way, an unsatisfiable condition on the left + // side can imply a satisfiable condition on the right side. Dual + // implication ensures that satisfiable conditions are transformed to + // other satisfiable conditions and unsatisfiable conditions are + // transformed to other unsatisfiable conditions. + + // Here is a concrete example of a unsatisfiable condition on the left + // implying a satisfiable condition on the right: + // + // mask = (1 << z) + // (x & ~mask) == y --> (x == y || x == (y | mask)) + // + // Substituting y = 3, z = 0 yields: + // (x & -2) == 3 --> (x == 3 || x == 2) + + // Pattern match a special case: + /* + QUERY( (y & ~mask = y) => + ((x & ~mask = y) <=> (x = y OR x = (y | mask))) + ); + */ + if (match(ICI->getOperand(0), + m_And(m_Value(RHSVal), m_APInt(RHSC)))) { + APInt Mask = ~*RHSC; + if (Mask.isPowerOf2() && (C->getValue() & ~Mask) == C->getValue()) { + // If we already have a value for the switch, it has to match! + if (!setValueOnce(RHSVal)) + return false; + + Vals.push_back(C); + Vals.push_back( + ConstantInt::get(C->getContext(), + C->getValue() | Mask)); + UsedICmps++; + return true; + } + } + + // Pattern match a special case: + /* + QUERY( (y | mask = y) => + ((x | mask = y) <=> (x = y OR x = (y & ~mask))) + ); + */ + if (match(ICI->getOperand(0), + m_Or(m_Value(RHSVal), m_APInt(RHSC)))) { + APInt Mask = *RHSC; + if (Mask.isPowerOf2() && (C->getValue() | Mask) == C->getValue()) { + // If we already have a value for the switch, it has to match! + if (!setValueOnce(RHSVal)) + return false; + + Vals.push_back(C); + Vals.push_back(ConstantInt::get(C->getContext(), + C->getValue() & ~Mask)); + UsedICmps++; + return true; + } + } + + // If we already have a value for the switch, it has to match! + if (!setValueOnce(ICI->getOperand(0))) + return false; + + UsedICmps++; + Vals.push_back(C); + return ICI->getOperand(0); + } + + // If we have "x ult 3", for example, then we can add 0,1,2 to the set. + ConstantRange Span = + ConstantRange::makeExactICmpRegion(ICI->getPredicate(), C->getValue()); + + // Shift the range if the compare is fed by an add. This is the range + // compare idiom as emitted by instcombine. + Value *CandidateVal = I->getOperand(0); + if (match(I->getOperand(0), m_Add(m_Value(RHSVal), m_APInt(RHSC)))) { + Span = Span.subtract(*RHSC); + CandidateVal = RHSVal; + } + + // If this is an and/!= check, then we are looking to build the set of + // value that *don't* pass the and chain. I.e. to turn "x ugt 2" into + // x != 0 && x != 1. + if (!isEQ) + Span = Span.inverse(); + + // If there are a ton of values, we don't want to make a ginormous switch. + if (Span.isSizeLargerThan(8) || Span.isEmptySet()) { + return false; + } + + // If we already have a value for the switch, it has to match! + if (!setValueOnce(CandidateVal)) + return false; + + // Add all values from the range to the set + for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp) + Vals.push_back(ConstantInt::get(I->getContext(), Tmp)); + + UsedICmps++; + return true; + } + + /// Given a potentially 'or'd or 'and'd together collection of icmp + /// eq/ne/lt/gt instructions that compare a value against a constant, extract + /// the value being compared, and stick the list constants into the Vals + /// vector. + /// One "Extra" case is allowed to differ from the other. + void gather(Value *V) { + bool isEQ = match(V, m_LogicalOr(m_Value(), m_Value())); + + // Keep a stack (SmallVector for efficiency) for depth-first traversal + SmallVector<Value *, 8> DFT; + SmallPtrSet<Value *, 8> Visited; + + // Initialize + Visited.insert(V); + DFT.push_back(V); + + while (!DFT.empty()) { + V = DFT.pop_back_val(); + + if (Instruction *I = dyn_cast<Instruction>(V)) { + // If it is a || (or && depending on isEQ), process the operands. + Value *Op0, *Op1; + if (isEQ ? match(I, m_LogicalOr(m_Value(Op0), m_Value(Op1))) + : match(I, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) { + if (Visited.insert(Op1).second) + DFT.push_back(Op1); + if (Visited.insert(Op0).second) + DFT.push_back(Op0); + + continue; + } + + // Try to match the current instruction + if (matchInstruction(I, isEQ)) + // Match succeed, continue the loop + continue; + } + + // One element of the sequence of || (or &&) could not be match as a + // comparison against the same value as the others. + // We allow only one "Extra" case to be checked before the switch + if (!Extra) { + Extra = V; + continue; + } + // Failed to parse a proper sequence, abort now + CompValue = nullptr; + break; + } + } +}; + +} // end anonymous namespace + +static void EraseTerminatorAndDCECond(Instruction *TI, + MemorySSAUpdater *MSSAU = nullptr) { + Instruction *Cond = nullptr; + if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { + Cond = dyn_cast<Instruction>(SI->getCondition()); + } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { + if (BI->isConditional()) + Cond = dyn_cast<Instruction>(BI->getCondition()); + } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(TI)) { + Cond = dyn_cast<Instruction>(IBI->getAddress()); + } + + TI->eraseFromParent(); + if (Cond) + RecursivelyDeleteTriviallyDeadInstructions(Cond, nullptr, MSSAU); +} + +/// Return true if the specified terminator checks +/// to see if a value is equal to constant integer value. +Value *SimplifyCFGOpt::isValueEqualityComparison(Instruction *TI) { + Value *CV = nullptr; + if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { + // Do not permit merging of large switch instructions into their + // predecessors unless there is only one predecessor. + if (!SI->getParent()->hasNPredecessorsOrMore(128 / SI->getNumSuccessors())) + CV = SI->getCondition(); + } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) + if (BI->isConditional() && BI->getCondition()->hasOneUse()) + if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) { + if (ICI->isEquality() && GetConstantInt(ICI->getOperand(1), DL)) + CV = ICI->getOperand(0); + } + + // Unwrap any lossless ptrtoint cast. + if (CV) { + if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) { + Value *Ptr = PTII->getPointerOperand(); + if (PTII->getType() == DL.getIntPtrType(Ptr->getType())) + CV = Ptr; + } + } + return CV; +} + +/// Given a value comparison instruction, +/// decode all of the 'cases' that it represents and return the 'default' block. +BasicBlock *SimplifyCFGOpt::GetValueEqualityComparisonCases( + Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases) { + if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { + Cases.reserve(SI->getNumCases()); + for (auto Case : SI->cases()) + Cases.push_back(ValueEqualityComparisonCase(Case.getCaseValue(), + Case.getCaseSuccessor())); + return SI->getDefaultDest(); + } + + BranchInst *BI = cast<BranchInst>(TI); + ICmpInst *ICI = cast<ICmpInst>(BI->getCondition()); + BasicBlock *Succ = BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_NE); + Cases.push_back(ValueEqualityComparisonCase( + GetConstantInt(ICI->getOperand(1), DL), Succ)); + return BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_EQ); +} + +/// Given a vector of bb/value pairs, remove any entries +/// in the list that match the specified block. +static void +EliminateBlockCases(BasicBlock *BB, + std::vector<ValueEqualityComparisonCase> &Cases) { + llvm::erase_value(Cases, BB); +} + +/// Return true if there are any keys in C1 that exist in C2 as well. +static bool ValuesOverlap(std::vector<ValueEqualityComparisonCase> &C1, + std::vector<ValueEqualityComparisonCase> &C2) { + std::vector<ValueEqualityComparisonCase> *V1 = &C1, *V2 = &C2; + + // Make V1 be smaller than V2. + if (V1->size() > V2->size()) + std::swap(V1, V2); + + if (V1->empty()) + return false; + if (V1->size() == 1) { + // Just scan V2. + ConstantInt *TheVal = (*V1)[0].Value; + for (const ValueEqualityComparisonCase &VECC : *V2) + if (TheVal == VECC.Value) + return true; + } + + // Otherwise, just sort both lists and compare element by element. + array_pod_sort(V1->begin(), V1->end()); + array_pod_sort(V2->begin(), V2->end()); + unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size(); + while (i1 != e1 && i2 != e2) { + if ((*V1)[i1].Value == (*V2)[i2].Value) + return true; + if ((*V1)[i1].Value < (*V2)[i2].Value) + ++i1; + else + ++i2; + } + return false; +} + +// Set branch weights on SwitchInst. This sets the metadata if there is at +// least one non-zero weight. +static void setBranchWeights(SwitchInst *SI, ArrayRef<uint32_t> Weights) { + // Check that there is at least one non-zero weight. Otherwise, pass + // nullptr to setMetadata which will erase the existing metadata. + MDNode *N = nullptr; + if (llvm::any_of(Weights, [](uint32_t W) { return W != 0; })) + N = MDBuilder(SI->getParent()->getContext()).createBranchWeights(Weights); + SI->setMetadata(LLVMContext::MD_prof, N); +} + +// Similar to the above, but for branch and select instructions that take +// exactly 2 weights. +static void setBranchWeights(Instruction *I, uint32_t TrueWeight, + uint32_t FalseWeight) { + assert(isa<BranchInst>(I) || isa<SelectInst>(I)); + // Check that there is at least one non-zero weight. Otherwise, pass + // nullptr to setMetadata which will erase the existing metadata. + MDNode *N = nullptr; + if (TrueWeight || FalseWeight) + N = MDBuilder(I->getParent()->getContext()) + .createBranchWeights(TrueWeight, FalseWeight); + I->setMetadata(LLVMContext::MD_prof, N); +} + +/// If TI is known to be a terminator instruction and its block is known to +/// only have a single predecessor block, check to see if that predecessor is +/// also a value comparison with the same value, and if that comparison +/// determines the outcome of this comparison. If so, simplify TI. This does a +/// very limited form of jump threading. +bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor( + Instruction *TI, BasicBlock *Pred, IRBuilder<> &Builder) { + Value *PredVal = isValueEqualityComparison(Pred->getTerminator()); + if (!PredVal) + return false; // Not a value comparison in predecessor. + + Value *ThisVal = isValueEqualityComparison(TI); + assert(ThisVal && "This isn't a value comparison!!"); + if (ThisVal != PredVal) + return false; // Different predicates. + + // TODO: Preserve branch weight metadata, similarly to how + // FoldValueComparisonIntoPredecessors preserves it. + + // Find out information about when control will move from Pred to TI's block. + std::vector<ValueEqualityComparisonCase> PredCases; + BasicBlock *PredDef = + GetValueEqualityComparisonCases(Pred->getTerminator(), PredCases); + EliminateBlockCases(PredDef, PredCases); // Remove default from cases. + + // Find information about how control leaves this block. + std::vector<ValueEqualityComparisonCase> ThisCases; + BasicBlock *ThisDef = GetValueEqualityComparisonCases(TI, ThisCases); + EliminateBlockCases(ThisDef, ThisCases); // Remove default from cases. + + // If TI's block is the default block from Pred's comparison, potentially + // simplify TI based on this knowledge. + if (PredDef == TI->getParent()) { + // If we are here, we know that the value is none of those cases listed in + // PredCases. If there are any cases in ThisCases that are in PredCases, we + // can simplify TI. + if (!ValuesOverlap(PredCases, ThisCases)) + return false; + + if (isa<BranchInst>(TI)) { + // Okay, one of the successors of this condbr is dead. Convert it to a + // uncond br. + assert(ThisCases.size() == 1 && "Branch can only have one case!"); + // Insert the new branch. + Instruction *NI = Builder.CreateBr(ThisDef); + (void)NI; + + // Remove PHI node entries for the dead edge. + ThisCases[0].Dest->removePredecessor(PredDef); + + LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() + << "Through successor TI: " << *TI << "Leaving: " << *NI + << "\n"); + + EraseTerminatorAndDCECond(TI); + + if (DTU) + DTU->applyUpdates( + {{DominatorTree::Delete, PredDef, ThisCases[0].Dest}}); + + return true; + } + + SwitchInstProfUpdateWrapper SI = *cast<SwitchInst>(TI); + // Okay, TI has cases that are statically dead, prune them away. + SmallPtrSet<Constant *, 16> DeadCases; + for (unsigned i = 0, e = PredCases.size(); i != e; ++i) + DeadCases.insert(PredCases[i].Value); + + LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() + << "Through successor TI: " << *TI); + + SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases; + for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) { + --i; + auto *Successor = i->getCaseSuccessor(); + if (DTU) + ++NumPerSuccessorCases[Successor]; + if (DeadCases.count(i->getCaseValue())) { + Successor->removePredecessor(PredDef); + SI.removeCase(i); + if (DTU) + --NumPerSuccessorCases[Successor]; + } + } + + if (DTU) { + std::vector<DominatorTree::UpdateType> Updates; + for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases) + if (I.second == 0) + Updates.push_back({DominatorTree::Delete, PredDef, I.first}); + DTU->applyUpdates(Updates); + } + + LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n"); + return true; + } + + // Otherwise, TI's block must correspond to some matched value. Find out + // which value (or set of values) this is. + ConstantInt *TIV = nullptr; + BasicBlock *TIBB = TI->getParent(); + for (unsigned i = 0, e = PredCases.size(); i != e; ++i) + if (PredCases[i].Dest == TIBB) { + if (TIV) + return false; // Cannot handle multiple values coming to this block. + TIV = PredCases[i].Value; + } + assert(TIV && "No edge from pred to succ?"); + + // Okay, we found the one constant that our value can be if we get into TI's + // BB. Find out which successor will unconditionally be branched to. + BasicBlock *TheRealDest = nullptr; + for (unsigned i = 0, e = ThisCases.size(); i != e; ++i) + if (ThisCases[i].Value == TIV) { + TheRealDest = ThisCases[i].Dest; + break; + } + + // If not handled by any explicit cases, it is handled by the default case. + if (!TheRealDest) + TheRealDest = ThisDef; + + SmallPtrSet<BasicBlock *, 2> RemovedSuccs; + + // Remove PHI node entries for dead edges. + BasicBlock *CheckEdge = TheRealDest; + for (BasicBlock *Succ : successors(TIBB)) + if (Succ != CheckEdge) { + if (Succ != TheRealDest) + RemovedSuccs.insert(Succ); + Succ->removePredecessor(TIBB); + } else + CheckEdge = nullptr; + + // Insert the new branch. + Instruction *NI = Builder.CreateBr(TheRealDest); + (void)NI; + + LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() + << "Through successor TI: " << *TI << "Leaving: " << *NI + << "\n"); + + EraseTerminatorAndDCECond(TI); + if (DTU) { + SmallVector<DominatorTree::UpdateType, 2> Updates; + Updates.reserve(RemovedSuccs.size()); + for (auto *RemovedSucc : RemovedSuccs) + Updates.push_back({DominatorTree::Delete, TIBB, RemovedSucc}); + DTU->applyUpdates(Updates); + } + return true; +} + +namespace { + +/// This class implements a stable ordering of constant +/// integers that does not depend on their address. This is important for +/// applications that sort ConstantInt's to ensure uniqueness. +struct ConstantIntOrdering { + bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const { + return LHS->getValue().ult(RHS->getValue()); + } +}; + +} // end anonymous namespace + +static int ConstantIntSortPredicate(ConstantInt *const *P1, + ConstantInt *const *P2) { + const ConstantInt *LHS = *P1; + const ConstantInt *RHS = *P2; + if (LHS == RHS) + return 0; + return LHS->getValue().ult(RHS->getValue()) ? 1 : -1; +} + +/// Get Weights of a given terminator, the default weight is at the front +/// of the vector. If TI is a conditional eq, we need to swap the branch-weight +/// metadata. +static void GetBranchWeights(Instruction *TI, + SmallVectorImpl<uint64_t> &Weights) { + MDNode *MD = TI->getMetadata(LLVMContext::MD_prof); + assert(MD); + for (unsigned i = 1, e = MD->getNumOperands(); i < e; ++i) { + ConstantInt *CI = mdconst::extract<ConstantInt>(MD->getOperand(i)); + Weights.push_back(CI->getValue().getZExtValue()); + } + + // If TI is a conditional eq, the default case is the false case, + // and the corresponding branch-weight data is at index 2. We swap the + // default weight to be the first entry. + if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { + assert(Weights.size() == 2); + ICmpInst *ICI = cast<ICmpInst>(BI->getCondition()); + if (ICI->getPredicate() == ICmpInst::ICMP_EQ) + std::swap(Weights.front(), Weights.back()); + } +} + +/// Keep halving the weights until all can fit in uint32_t. +static void FitWeights(MutableArrayRef<uint64_t> Weights) { + uint64_t Max = *std::max_element(Weights.begin(), Weights.end()); + if (Max > UINT_MAX) { + unsigned Offset = 32 - countLeadingZeros(Max); + for (uint64_t &I : Weights) + I >>= Offset; + } +} + +static void CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses( + BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap) { + Instruction *PTI = PredBlock->getTerminator(); + + // If we have bonus instructions, clone them into the predecessor block. + // Note that there may be multiple predecessor blocks, so we cannot move + // bonus instructions to a predecessor block. + for (Instruction &BonusInst : *BB) { + if (isa<DbgInfoIntrinsic>(BonusInst) || BonusInst.isTerminator()) + continue; + + Instruction *NewBonusInst = BonusInst.clone(); + + if (PTI->getDebugLoc() != NewBonusInst->getDebugLoc()) { + // Unless the instruction has the same !dbg location as the original + // branch, drop it. When we fold the bonus instructions we want to make + // sure we reset their debug locations in order to avoid stepping on + // dead code caused by folding dead branches. + NewBonusInst->setDebugLoc(DebugLoc()); + } + + RemapInstruction(NewBonusInst, VMap, + RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); + VMap[&BonusInst] = NewBonusInst; + + // If we moved a load, we cannot any longer claim any knowledge about + // its potential value. The previous information might have been valid + // only given the branch precondition. + // For an analogous reason, we must also drop all the metadata whose + // semantics we don't understand. We *can* preserve !annotation, because + // it is tied to the instruction itself, not the value or position. + // Similarly strip attributes on call parameters that may cause UB in + // location the call is moved to. + NewBonusInst->dropUndefImplyingAttrsAndUnknownMetadata( + LLVMContext::MD_annotation); + + NewBonusInst->insertInto(PredBlock, PTI->getIterator()); + NewBonusInst->takeName(&BonusInst); + BonusInst.setName(NewBonusInst->getName() + ".old"); + + // Update (liveout) uses of bonus instructions, + // now that the bonus instruction has been cloned into predecessor. + // Note that we expect to be in a block-closed SSA form for this to work! + for (Use &U : make_early_inc_range(BonusInst.uses())) { + auto *UI = cast<Instruction>(U.getUser()); + auto *PN = dyn_cast<PHINode>(UI); + if (!PN) { + assert(UI->getParent() == BB && BonusInst.comesBefore(UI) && + "If the user is not a PHI node, then it should be in the same " + "block as, and come after, the original bonus instruction."); + continue; // Keep using the original bonus instruction. + } + // Is this the block-closed SSA form PHI node? + if (PN->getIncomingBlock(U) == BB) + continue; // Great, keep using the original bonus instruction. + // The only other alternative is an "use" when coming from + // the predecessor block - here we should refer to the cloned bonus instr. + assert(PN->getIncomingBlock(U) == PredBlock && + "Not in block-closed SSA form?"); + U.set(NewBonusInst); + } + } +} + +bool SimplifyCFGOpt::PerformValueComparisonIntoPredecessorFolding( + Instruction *TI, Value *&CV, Instruction *PTI, IRBuilder<> &Builder) { + BasicBlock *BB = TI->getParent(); + BasicBlock *Pred = PTI->getParent(); + + SmallVector<DominatorTree::UpdateType, 32> Updates; + + // Figure out which 'cases' to copy from SI to PSI. + std::vector<ValueEqualityComparisonCase> BBCases; + BasicBlock *BBDefault = GetValueEqualityComparisonCases(TI, BBCases); + + std::vector<ValueEqualityComparisonCase> PredCases; + BasicBlock *PredDefault = GetValueEqualityComparisonCases(PTI, PredCases); + + // Based on whether the default edge from PTI goes to BB or not, fill in + // PredCases and PredDefault with the new switch cases we would like to + // build. + SmallMapVector<BasicBlock *, int, 8> NewSuccessors; + + // Update the branch weight metadata along the way + SmallVector<uint64_t, 8> Weights; + bool PredHasWeights = hasBranchWeightMD(*PTI); + bool SuccHasWeights = hasBranchWeightMD(*TI); + + if (PredHasWeights) { + GetBranchWeights(PTI, Weights); + // branch-weight metadata is inconsistent here. + if (Weights.size() != 1 + PredCases.size()) + PredHasWeights = SuccHasWeights = false; + } else if (SuccHasWeights) + // If there are no predecessor weights but there are successor weights, + // populate Weights with 1, which will later be scaled to the sum of + // successor's weights + Weights.assign(1 + PredCases.size(), 1); + + SmallVector<uint64_t, 8> SuccWeights; + if (SuccHasWeights) { + GetBranchWeights(TI, SuccWeights); + // branch-weight metadata is inconsistent here. + if (SuccWeights.size() != 1 + BBCases.size()) + PredHasWeights = SuccHasWeights = false; + } else if (PredHasWeights) + SuccWeights.assign(1 + BBCases.size(), 1); + + if (PredDefault == BB) { + // If this is the default destination from PTI, only the edges in TI + // that don't occur in PTI, or that branch to BB will be activated. + std::set<ConstantInt *, ConstantIntOrdering> PTIHandled; + for (unsigned i = 0, e = PredCases.size(); i != e; ++i) + if (PredCases[i].Dest != BB) + PTIHandled.insert(PredCases[i].Value); + else { + // The default destination is BB, we don't need explicit targets. + std::swap(PredCases[i], PredCases.back()); + + if (PredHasWeights || SuccHasWeights) { + // Increase weight for the default case. + Weights[0] += Weights[i + 1]; + std::swap(Weights[i + 1], Weights.back()); + Weights.pop_back(); + } + + PredCases.pop_back(); + --i; + --e; + } + + // Reconstruct the new switch statement we will be building. + if (PredDefault != BBDefault) { + PredDefault->removePredecessor(Pred); + if (DTU && PredDefault != BB) + Updates.push_back({DominatorTree::Delete, Pred, PredDefault}); + PredDefault = BBDefault; + ++NewSuccessors[BBDefault]; + } + + unsigned CasesFromPred = Weights.size(); + uint64_t ValidTotalSuccWeight = 0; + for (unsigned i = 0, e = BBCases.size(); i != e; ++i) + if (!PTIHandled.count(BBCases[i].Value) && BBCases[i].Dest != BBDefault) { + PredCases.push_back(BBCases[i]); + ++NewSuccessors[BBCases[i].Dest]; + if (SuccHasWeights || PredHasWeights) { + // The default weight is at index 0, so weight for the ith case + // should be at index i+1. Scale the cases from successor by + // PredDefaultWeight (Weights[0]). + Weights.push_back(Weights[0] * SuccWeights[i + 1]); + ValidTotalSuccWeight += SuccWeights[i + 1]; + } + } + + if (SuccHasWeights || PredHasWeights) { + ValidTotalSuccWeight += SuccWeights[0]; + // Scale the cases from predecessor by ValidTotalSuccWeight. + for (unsigned i = 1; i < CasesFromPred; ++i) + Weights[i] *= ValidTotalSuccWeight; + // Scale the default weight by SuccDefaultWeight (SuccWeights[0]). + Weights[0] *= SuccWeights[0]; + } + } else { + // If this is not the default destination from PSI, only the edges + // in SI that occur in PSI with a destination of BB will be + // activated. + std::set<ConstantInt *, ConstantIntOrdering> PTIHandled; + std::map<ConstantInt *, uint64_t> WeightsForHandled; + for (unsigned i = 0, e = PredCases.size(); i != e; ++i) + if (PredCases[i].Dest == BB) { + PTIHandled.insert(PredCases[i].Value); + + if (PredHasWeights || SuccHasWeights) { + WeightsForHandled[PredCases[i].Value] = Weights[i + 1]; + std::swap(Weights[i + 1], Weights.back()); + Weights.pop_back(); + } + + std::swap(PredCases[i], PredCases.back()); + PredCases.pop_back(); + --i; + --e; + } + + // Okay, now we know which constants were sent to BB from the + // predecessor. Figure out where they will all go now. + for (unsigned i = 0, e = BBCases.size(); i != e; ++i) + if (PTIHandled.count(BBCases[i].Value)) { + // If this is one we are capable of getting... + if (PredHasWeights || SuccHasWeights) + Weights.push_back(WeightsForHandled[BBCases[i].Value]); + PredCases.push_back(BBCases[i]); + ++NewSuccessors[BBCases[i].Dest]; + PTIHandled.erase(BBCases[i].Value); // This constant is taken care of + } + + // If there are any constants vectored to BB that TI doesn't handle, + // they must go to the default destination of TI. + for (ConstantInt *I : PTIHandled) { + if (PredHasWeights || SuccHasWeights) + Weights.push_back(WeightsForHandled[I]); + PredCases.push_back(ValueEqualityComparisonCase(I, BBDefault)); + ++NewSuccessors[BBDefault]; + } + } + + // Okay, at this point, we know which new successor Pred will get. Make + // sure we update the number of entries in the PHI nodes for these + // successors. + SmallPtrSet<BasicBlock *, 2> SuccsOfPred; + if (DTU) { + SuccsOfPred = {succ_begin(Pred), succ_end(Pred)}; + Updates.reserve(Updates.size() + NewSuccessors.size()); + } + for (const std::pair<BasicBlock *, int /*Num*/> &NewSuccessor : + NewSuccessors) { + for (auto I : seq(0, NewSuccessor.second)) { + (void)I; + AddPredecessorToBlock(NewSuccessor.first, Pred, BB); + } + if (DTU && !SuccsOfPred.contains(NewSuccessor.first)) + Updates.push_back({DominatorTree::Insert, Pred, NewSuccessor.first}); + } + + Builder.SetInsertPoint(PTI); + // Convert pointer to int before we switch. + if (CV->getType()->isPointerTy()) { + CV = + Builder.CreatePtrToInt(CV, DL.getIntPtrType(CV->getType()), "magicptr"); + } + + // Now that the successors are updated, create the new Switch instruction. + SwitchInst *NewSI = Builder.CreateSwitch(CV, PredDefault, PredCases.size()); + NewSI->setDebugLoc(PTI->getDebugLoc()); + for (ValueEqualityComparisonCase &V : PredCases) + NewSI->addCase(V.Value, V.Dest); + + if (PredHasWeights || SuccHasWeights) { + // Halve the weights if any of them cannot fit in an uint32_t + FitWeights(Weights); + + SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end()); + + setBranchWeights(NewSI, MDWeights); + } + + EraseTerminatorAndDCECond(PTI); + + // Okay, last check. If BB is still a successor of PSI, then we must + // have an infinite loop case. If so, add an infinitely looping block + // to handle the case to preserve the behavior of the code. + BasicBlock *InfLoopBlock = nullptr; + for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i) + if (NewSI->getSuccessor(i) == BB) { + if (!InfLoopBlock) { + // Insert it at the end of the function, because it's either code, + // or it won't matter if it's hot. :) + InfLoopBlock = + BasicBlock::Create(BB->getContext(), "infloop", BB->getParent()); + BranchInst::Create(InfLoopBlock, InfLoopBlock); + if (DTU) + Updates.push_back( + {DominatorTree::Insert, InfLoopBlock, InfLoopBlock}); + } + NewSI->setSuccessor(i, InfLoopBlock); + } + + if (DTU) { + if (InfLoopBlock) + Updates.push_back({DominatorTree::Insert, Pred, InfLoopBlock}); + + Updates.push_back({DominatorTree::Delete, Pred, BB}); + + DTU->applyUpdates(Updates); + } + + ++NumFoldValueComparisonIntoPredecessors; + return true; +} + +/// The specified terminator is a value equality comparison instruction +/// (either a switch or a branch on "X == c"). +/// See if any of the predecessors of the terminator block are value comparisons +/// on the same value. If so, and if safe to do so, fold them together. +bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(Instruction *TI, + IRBuilder<> &Builder) { + BasicBlock *BB = TI->getParent(); + Value *CV = isValueEqualityComparison(TI); // CondVal + assert(CV && "Not a comparison?"); + + bool Changed = false; + + SmallSetVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB)); + while (!Preds.empty()) { + BasicBlock *Pred = Preds.pop_back_val(); + Instruction *PTI = Pred->getTerminator(); + + // Don't try to fold into itself. + if (Pred == BB) + continue; + + // See if the predecessor is a comparison with the same value. + Value *PCV = isValueEqualityComparison(PTI); // PredCondVal + if (PCV != CV) + continue; + + SmallSetVector<BasicBlock *, 4> FailBlocks; + if (!SafeToMergeTerminators(TI, PTI, &FailBlocks)) { + for (auto *Succ : FailBlocks) { + if (!SplitBlockPredecessors(Succ, TI->getParent(), ".fold.split", DTU)) + return false; + } + } + + PerformValueComparisonIntoPredecessorFolding(TI, CV, PTI, Builder); + Changed = true; + } + return Changed; +} + +// If we would need to insert a select that uses the value of this invoke +// (comments in HoistThenElseCodeToIf explain why we would need to do this), we +// can't hoist the invoke, as there is nowhere to put the select in this case. +static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2, + Instruction *I1, Instruction *I2) { + for (BasicBlock *Succ : successors(BB1)) { + for (const PHINode &PN : Succ->phis()) { + Value *BB1V = PN.getIncomingValueForBlock(BB1); + Value *BB2V = PN.getIncomingValueForBlock(BB2); + if (BB1V != BB2V && (BB1V == I1 || BB2V == I2)) { + return false; + } + } + } + return true; +} + +// Get interesting characteristics of instructions that `HoistThenElseCodeToIf` +// didn't hoist. They restrict what kind of instructions can be reordered +// across. +enum SkipFlags { + SkipReadMem = 1, + SkipSideEffect = 2, + SkipImplicitControlFlow = 4 +}; + +static unsigned skippedInstrFlags(Instruction *I) { + unsigned Flags = 0; + if (I->mayReadFromMemory()) + Flags |= SkipReadMem; + // We can't arbitrarily move around allocas, e.g. moving allocas (especially + // inalloca) across stacksave/stackrestore boundaries. + if (I->mayHaveSideEffects() || isa<AllocaInst>(I)) + Flags |= SkipSideEffect; + if (!isGuaranteedToTransferExecutionToSuccessor(I)) + Flags |= SkipImplicitControlFlow; + return Flags; +} + +// Returns true if it is safe to reorder an instruction across preceding +// instructions in a basic block. +static bool isSafeToHoistInstr(Instruction *I, unsigned Flags) { + // Don't reorder a store over a load. + if ((Flags & SkipReadMem) && I->mayWriteToMemory()) + return false; + + // If we have seen an instruction with side effects, it's unsafe to reorder an + // instruction which reads memory or itself has side effects. + if ((Flags & SkipSideEffect) && + (I->mayReadFromMemory() || I->mayHaveSideEffects())) + return false; + + // Reordering across an instruction which does not necessarily transfer + // control to the next instruction is speculation. + if ((Flags & SkipImplicitControlFlow) && !isSafeToSpeculativelyExecute(I)) + return false; + + // Hoisting of llvm.deoptimize is only legal together with the next return + // instruction, which this pass is not always able to do. + if (auto *CB = dyn_cast<CallBase>(I)) + if (CB->getIntrinsicID() == Intrinsic::experimental_deoptimize) + return false; + + // It's also unsafe/illegal to hoist an instruction above its instruction + // operands + BasicBlock *BB = I->getParent(); + for (Value *Op : I->operands()) { + if (auto *J = dyn_cast<Instruction>(Op)) + if (J->getParent() == BB) + return false; + } + + return true; +} + +static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified = false); + +/// Given a conditional branch that goes to BB1 and BB2, hoist any common code +/// in the two blocks up into the branch block. The caller of this function +/// guarantees that BI's block dominates BB1 and BB2. If EqTermsOnly is given, +/// only perform hoisting in case both blocks only contain a terminator. In that +/// case, only the original BI will be replaced and selects for PHIs are added. +bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI, + const TargetTransformInfo &TTI, + bool EqTermsOnly) { + // This does very trivial matching, with limited scanning, to find identical + // instructions in the two blocks. In particular, we don't want to get into + // O(M*N) situations here where M and N are the sizes of BB1 and BB2. As + // such, we currently just scan for obviously identical instructions in an + // identical order, possibly separated by the same number of non-identical + // instructions. + BasicBlock *BB1 = BI->getSuccessor(0); // The true destination. + BasicBlock *BB2 = BI->getSuccessor(1); // The false destination + + // If either of the blocks has it's address taken, then we can't do this fold, + // because the code we'd hoist would no longer run when we jump into the block + // by it's address. + if (BB1->hasAddressTaken() || BB2->hasAddressTaken()) + return false; + + BasicBlock::iterator BB1_Itr = BB1->begin(); + BasicBlock::iterator BB2_Itr = BB2->begin(); + + Instruction *I1 = &*BB1_Itr++, *I2 = &*BB2_Itr++; + // Skip debug info if it is not identical. + DbgInfoIntrinsic *DBI1 = dyn_cast<DbgInfoIntrinsic>(I1); + DbgInfoIntrinsic *DBI2 = dyn_cast<DbgInfoIntrinsic>(I2); + if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) { + while (isa<DbgInfoIntrinsic>(I1)) + I1 = &*BB1_Itr++; + while (isa<DbgInfoIntrinsic>(I2)) + I2 = &*BB2_Itr++; + } + if (isa<PHINode>(I1)) + return false; + + BasicBlock *BIParent = BI->getParent(); + + bool Changed = false; + + auto _ = make_scope_exit([&]() { + if (Changed) + ++NumHoistCommonCode; + }); + + // Check if only hoisting terminators is allowed. This does not add new + // instructions to the hoist location. + if (EqTermsOnly) { + // Skip any debug intrinsics, as they are free to hoist. + auto *I1NonDbg = &*skipDebugIntrinsics(I1->getIterator()); + auto *I2NonDbg = &*skipDebugIntrinsics(I2->getIterator()); + if (!I1NonDbg->isIdenticalToWhenDefined(I2NonDbg)) + return false; + if (!I1NonDbg->isTerminator()) + return false; + // Now we know that we only need to hoist debug intrinsics and the + // terminator. Let the loop below handle those 2 cases. + } + + // Count how many instructions were not hoisted so far. There's a limit on how + // many instructions we skip, serving as a compilation time control as well as + // preventing excessive increase of life ranges. + unsigned NumSkipped = 0; + + // Record any skipped instuctions that may read memory, write memory or have + // side effects, or have implicit control flow. + unsigned SkipFlagsBB1 = 0; + unsigned SkipFlagsBB2 = 0; + + for (;;) { + // If we are hoisting the terminator instruction, don't move one (making a + // broken BB), instead clone it, and remove BI. + if (I1->isTerminator() || I2->isTerminator()) { + // If any instructions remain in the block, we cannot hoist terminators. + if (NumSkipped || !I1->isIdenticalToWhenDefined(I2)) + return Changed; + goto HoistTerminator; + } + + if (I1->isIdenticalToWhenDefined(I2)) { + // Even if the instructions are identical, it may not be safe to hoist + // them if we have skipped over instructions with side effects or their + // operands weren't hoisted. + if (!isSafeToHoistInstr(I1, SkipFlagsBB1) || + !isSafeToHoistInstr(I2, SkipFlagsBB2)) + return Changed; + + // If we're going to hoist a call, make sure that the two instructions + // we're commoning/hoisting are both marked with musttail, or neither of + // them is marked as such. Otherwise, we might end up in a situation where + // we hoist from a block where the terminator is a `ret` to a block where + // the terminator is a `br`, and `musttail` calls expect to be followed by + // a return. + auto *C1 = dyn_cast<CallInst>(I1); + auto *C2 = dyn_cast<CallInst>(I2); + if (C1 && C2) + if (C1->isMustTailCall() != C2->isMustTailCall()) + return Changed; + + if (!TTI.isProfitableToHoist(I1) || !TTI.isProfitableToHoist(I2)) + return Changed; + + // If any of the two call sites has nomerge attribute, stop hoisting. + if (const auto *CB1 = dyn_cast<CallBase>(I1)) + if (CB1->cannotMerge()) + return Changed; + if (const auto *CB2 = dyn_cast<CallBase>(I2)) + if (CB2->cannotMerge()) + return Changed; + + if (isa<DbgInfoIntrinsic>(I1) || isa<DbgInfoIntrinsic>(I2)) { + assert(isa<DbgInfoIntrinsic>(I1) && isa<DbgInfoIntrinsic>(I2)); + // The debug location is an integral part of a debug info intrinsic + // and can't be separated from it or replaced. Instead of attempting + // to merge locations, simply hoist both copies of the intrinsic. + BIParent->splice(BI->getIterator(), BB1, I1->getIterator()); + BIParent->splice(BI->getIterator(), BB2, I2->getIterator()); + } else { + // For a normal instruction, we just move one to right before the + // branch, then replace all uses of the other with the first. Finally, + // we remove the now redundant second instruction. + BIParent->splice(BI->getIterator(), BB1, I1->getIterator()); + if (!I2->use_empty()) + I2->replaceAllUsesWith(I1); + I1->andIRFlags(I2); + unsigned KnownIDs[] = {LLVMContext::MD_tbaa, + LLVMContext::MD_range, + LLVMContext::MD_fpmath, + LLVMContext::MD_invariant_load, + LLVMContext::MD_nonnull, + LLVMContext::MD_invariant_group, + LLVMContext::MD_align, + LLVMContext::MD_dereferenceable, + LLVMContext::MD_dereferenceable_or_null, + LLVMContext::MD_mem_parallel_loop_access, + LLVMContext::MD_access_group, + LLVMContext::MD_preserve_access_index}; + combineMetadata(I1, I2, KnownIDs, true); + + // I1 and I2 are being combined into a single instruction. Its debug + // location is the merged locations of the original instructions. + I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc()); + + I2->eraseFromParent(); + } + Changed = true; + ++NumHoistCommonInstrs; + } else { + if (NumSkipped >= HoistCommonSkipLimit) + return Changed; + // We are about to skip over a pair of non-identical instructions. Record + // if any have characteristics that would prevent reordering instructions + // across them. + SkipFlagsBB1 |= skippedInstrFlags(I1); + SkipFlagsBB2 |= skippedInstrFlags(I2); + ++NumSkipped; + } + + I1 = &*BB1_Itr++; + I2 = &*BB2_Itr++; + // Skip debug info if it is not identical. + DbgInfoIntrinsic *DBI1 = dyn_cast<DbgInfoIntrinsic>(I1); + DbgInfoIntrinsic *DBI2 = dyn_cast<DbgInfoIntrinsic>(I2); + if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) { + while (isa<DbgInfoIntrinsic>(I1)) + I1 = &*BB1_Itr++; + while (isa<DbgInfoIntrinsic>(I2)) + I2 = &*BB2_Itr++; + } + } + + return Changed; + +HoistTerminator: + // It may not be possible to hoist an invoke. + // FIXME: Can we define a safety predicate for CallBr? + if (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)) + return Changed; + + // TODO: callbr hoisting currently disabled pending further study. + if (isa<CallBrInst>(I1)) + return Changed; + + for (BasicBlock *Succ : successors(BB1)) { + for (PHINode &PN : Succ->phis()) { + Value *BB1V = PN.getIncomingValueForBlock(BB1); + Value *BB2V = PN.getIncomingValueForBlock(BB2); + if (BB1V == BB2V) + continue; + + // Check for passingValueIsAlwaysUndefined here because we would rather + // eliminate undefined control flow then converting it to a select. + if (passingValueIsAlwaysUndefined(BB1V, &PN) || + passingValueIsAlwaysUndefined(BB2V, &PN)) + return Changed; + } + } + + // Okay, it is safe to hoist the terminator. + Instruction *NT = I1->clone(); + NT->insertInto(BIParent, BI->getIterator()); + if (!NT->getType()->isVoidTy()) { + I1->replaceAllUsesWith(NT); + I2->replaceAllUsesWith(NT); + NT->takeName(I1); + } + Changed = true; + ++NumHoistCommonInstrs; + + // Ensure terminator gets a debug location, even an unknown one, in case + // it involves inlinable calls. + NT->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc()); + + // PHIs created below will adopt NT's merged DebugLoc. + IRBuilder<NoFolder> Builder(NT); + + // Hoisting one of the terminators from our successor is a great thing. + // Unfortunately, the successors of the if/else blocks may have PHI nodes in + // them. If they do, all PHI entries for BB1/BB2 must agree for all PHI + // nodes, so we insert select instruction to compute the final result. + std::map<std::pair<Value *, Value *>, SelectInst *> InsertedSelects; + for (BasicBlock *Succ : successors(BB1)) { + for (PHINode &PN : Succ->phis()) { + Value *BB1V = PN.getIncomingValueForBlock(BB1); + Value *BB2V = PN.getIncomingValueForBlock(BB2); + if (BB1V == BB2V) + continue; + + // These values do not agree. Insert a select instruction before NT + // that determines the right value. + SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)]; + if (!SI) { + // Propagate fast-math-flags from phi node to its replacement select. + IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); + if (isa<FPMathOperator>(PN)) + Builder.setFastMathFlags(PN.getFastMathFlags()); + + SI = cast<SelectInst>( + Builder.CreateSelect(BI->getCondition(), BB1V, BB2V, + BB1V->getName() + "." + BB2V->getName(), BI)); + } + + // Make the PHI node use the select for all incoming values for BB1/BB2 + for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) + if (PN.getIncomingBlock(i) == BB1 || PN.getIncomingBlock(i) == BB2) + PN.setIncomingValue(i, SI); + } + } + + SmallVector<DominatorTree::UpdateType, 4> Updates; + + // Update any PHI nodes in our new successors. + for (BasicBlock *Succ : successors(BB1)) { + AddPredecessorToBlock(Succ, BIParent, BB1); + if (DTU) + Updates.push_back({DominatorTree::Insert, BIParent, Succ}); + } + + if (DTU) + for (BasicBlock *Succ : successors(BI)) + Updates.push_back({DominatorTree::Delete, BIParent, Succ}); + + EraseTerminatorAndDCECond(BI); + if (DTU) + DTU->applyUpdates(Updates); + return Changed; +} + +// Check lifetime markers. +static bool isLifeTimeMarker(const Instruction *I) { + if (auto II = dyn_cast<IntrinsicInst>(I)) { + switch (II->getIntrinsicID()) { + default: + break; + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + return true; + } + } + return false; +} + +// TODO: Refine this. This should avoid cases like turning constant memcpy sizes +// into variables. +static bool replacingOperandWithVariableIsCheap(const Instruction *I, + int OpIdx) { + return !isa<IntrinsicInst>(I); +} + +// All instructions in Insts belong to different blocks that all unconditionally +// branch to a common successor. Analyze each instruction and return true if it +// would be possible to sink them into their successor, creating one common +// instruction instead. For every value that would be required to be provided by +// PHI node (because an operand varies in each input block), add to PHIOperands. +static bool canSinkInstructions( + ArrayRef<Instruction *> Insts, + DenseMap<Instruction *, SmallVector<Value *, 4>> &PHIOperands) { + // Prune out obviously bad instructions to move. Each instruction must have + // exactly zero or one use, and we check later that use is by a single, common + // PHI instruction in the successor. + bool HasUse = !Insts.front()->user_empty(); + for (auto *I : Insts) { + // These instructions may change or break semantics if moved. + if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) || + I->getType()->isTokenTy()) + return false; + + // Do not try to sink an instruction in an infinite loop - it can cause + // this algorithm to infinite loop. + if (I->getParent()->getSingleSuccessor() == I->getParent()) + return false; + + // Conservatively return false if I is an inline-asm instruction. Sinking + // and merging inline-asm instructions can potentially create arguments + // that cannot satisfy the inline-asm constraints. + // If the instruction has nomerge attribute, return false. + if (const auto *C = dyn_cast<CallBase>(I)) + if (C->isInlineAsm() || C->cannotMerge()) + return false; + + // Each instruction must have zero or one use. + if (HasUse && !I->hasOneUse()) + return false; + if (!HasUse && !I->user_empty()) + return false; + } + + const Instruction *I0 = Insts.front(); + for (auto *I : Insts) + if (!I->isSameOperationAs(I0)) + return false; + + // All instructions in Insts are known to be the same opcode. If they have a + // use, check that the only user is a PHI or in the same block as the + // instruction, because if a user is in the same block as an instruction we're + // contemplating sinking, it must already be determined to be sinkable. + if (HasUse) { + auto *PNUse = dyn_cast<PHINode>(*I0->user_begin()); + auto *Succ = I0->getParent()->getTerminator()->getSuccessor(0); + if (!all_of(Insts, [&PNUse,&Succ](const Instruction *I) -> bool { + auto *U = cast<Instruction>(*I->user_begin()); + return (PNUse && + PNUse->getParent() == Succ && + PNUse->getIncomingValueForBlock(I->getParent()) == I) || + U->getParent() == I->getParent(); + })) + return false; + } + + // Because SROA can't handle speculating stores of selects, try not to sink + // loads, stores or lifetime markers of allocas when we'd have to create a + // PHI for the address operand. Also, because it is likely that loads or + // stores of allocas will disappear when Mem2Reg/SROA is run, don't sink + // them. + // This can cause code churn which can have unintended consequences down + // the line - see https://llvm.org/bugs/show_bug.cgi?id=30244. + // FIXME: This is a workaround for a deficiency in SROA - see + // https://llvm.org/bugs/show_bug.cgi?id=30188 + if (isa<StoreInst>(I0) && any_of(Insts, [](const Instruction *I) { + return isa<AllocaInst>(I->getOperand(1)->stripPointerCasts()); + })) + return false; + if (isa<LoadInst>(I0) && any_of(Insts, [](const Instruction *I) { + return isa<AllocaInst>(I->getOperand(0)->stripPointerCasts()); + })) + return false; + if (isLifeTimeMarker(I0) && any_of(Insts, [](const Instruction *I) { + return isa<AllocaInst>(I->getOperand(1)->stripPointerCasts()); + })) + return false; + + // For calls to be sinkable, they must all be indirect, or have same callee. + // I.e. if we have two direct calls to different callees, we don't want to + // turn that into an indirect call. Likewise, if we have an indirect call, + // and a direct call, we don't actually want to have a single indirect call. + if (isa<CallBase>(I0)) { + auto IsIndirectCall = [](const Instruction *I) { + return cast<CallBase>(I)->isIndirectCall(); + }; + bool HaveIndirectCalls = any_of(Insts, IsIndirectCall); + bool AllCallsAreIndirect = all_of(Insts, IsIndirectCall); + if (HaveIndirectCalls) { + if (!AllCallsAreIndirect) + return false; + } else { + // All callees must be identical. + Value *Callee = nullptr; + for (const Instruction *I : Insts) { + Value *CurrCallee = cast<CallBase>(I)->getCalledOperand(); + if (!Callee) + Callee = CurrCallee; + else if (Callee != CurrCallee) + return false; + } + } + } + + for (unsigned OI = 0, OE = I0->getNumOperands(); OI != OE; ++OI) { + Value *Op = I0->getOperand(OI); + if (Op->getType()->isTokenTy()) + // Don't touch any operand of token type. + return false; + + auto SameAsI0 = [&I0, OI](const Instruction *I) { + assert(I->getNumOperands() == I0->getNumOperands()); + return I->getOperand(OI) == I0->getOperand(OI); + }; + if (!all_of(Insts, SameAsI0)) { + if ((isa<Constant>(Op) && !replacingOperandWithVariableIsCheap(I0, OI)) || + !canReplaceOperandWithVariable(I0, OI)) + // We can't create a PHI from this GEP. + return false; + for (auto *I : Insts) + PHIOperands[I].push_back(I->getOperand(OI)); + } + } + return true; +} + +// Assuming canSinkInstructions(Blocks) has returned true, sink the last +// instruction of every block in Blocks to their common successor, commoning +// into one instruction. +static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) { + auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0); + + // canSinkInstructions returning true guarantees that every block has at + // least one non-terminator instruction. + SmallVector<Instruction*,4> Insts; + for (auto *BB : Blocks) { + Instruction *I = BB->getTerminator(); + do { + I = I->getPrevNode(); + } while (isa<DbgInfoIntrinsic>(I) && I != &BB->front()); + if (!isa<DbgInfoIntrinsic>(I)) + Insts.push_back(I); + } + + // The only checking we need to do now is that all users of all instructions + // are the same PHI node. canSinkInstructions should have checked this but + // it is slightly over-aggressive - it gets confused by commutative + // instructions so double-check it here. + Instruction *I0 = Insts.front(); + if (!I0->user_empty()) { + auto *PNUse = dyn_cast<PHINode>(*I0->user_begin()); + if (!all_of(Insts, [&PNUse](const Instruction *I) -> bool { + auto *U = cast<Instruction>(*I->user_begin()); + return U == PNUse; + })) + return false; + } + + // We don't need to do any more checking here; canSinkInstructions should + // have done it all for us. + SmallVector<Value*, 4> NewOperands; + for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) { + // This check is different to that in canSinkInstructions. There, we + // cared about the global view once simplifycfg (and instcombine) have + // completed - it takes into account PHIs that become trivially + // simplifiable. However here we need a more local view; if an operand + // differs we create a PHI and rely on instcombine to clean up the very + // small mess we may make. + bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) { + return I->getOperand(O) != I0->getOperand(O); + }); + if (!NeedPHI) { + NewOperands.push_back(I0->getOperand(O)); + continue; + } + + // Create a new PHI in the successor block and populate it. + auto *Op = I0->getOperand(O); + assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!"); + auto *PN = PHINode::Create(Op->getType(), Insts.size(), + Op->getName() + ".sink", &BBEnd->front()); + for (auto *I : Insts) + PN->addIncoming(I->getOperand(O), I->getParent()); + NewOperands.push_back(PN); + } + + // Arbitrarily use I0 as the new "common" instruction; remap its operands + // and move it to the start of the successor block. + for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) + I0->getOperandUse(O).set(NewOperands[O]); + I0->moveBefore(&*BBEnd->getFirstInsertionPt()); + + // Update metadata and IR flags, and merge debug locations. + for (auto *I : Insts) + if (I != I0) { + // The debug location for the "common" instruction is the merged locations + // of all the commoned instructions. We start with the original location + // of the "common" instruction and iteratively merge each location in the + // loop below. + // This is an N-way merge, which will be inefficient if I0 is a CallInst. + // However, as N-way merge for CallInst is rare, so we use simplified API + // instead of using complex API for N-way merge. + I0->applyMergedLocation(I0->getDebugLoc(), I->getDebugLoc()); + combineMetadataForCSE(I0, I, true); + I0->andIRFlags(I); + } + + if (!I0->user_empty()) { + // canSinkLastInstruction checked that all instructions were used by + // one and only one PHI node. Find that now, RAUW it to our common + // instruction and nuke it. + auto *PN = cast<PHINode>(*I0->user_begin()); + PN->replaceAllUsesWith(I0); + PN->eraseFromParent(); + } + + // Finally nuke all instructions apart from the common instruction. + for (auto *I : Insts) { + if (I == I0) + continue; + // The remaining uses are debug users, replace those with the common inst. + // In most (all?) cases this just introduces a use-before-def. + assert(I->user_empty() && "Inst unexpectedly still has non-dbg users"); + I->replaceAllUsesWith(I0); + I->eraseFromParent(); + } + + return true; +} + +namespace { + + // LockstepReverseIterator - Iterates through instructions + // in a set of blocks in reverse order from the first non-terminator. + // For example (assume all blocks have size n): + // LockstepReverseIterator I([B1, B2, B3]); + // *I-- = [B1[n], B2[n], B3[n]]; + // *I-- = [B1[n-1], B2[n-1], B3[n-1]]; + // *I-- = [B1[n-2], B2[n-2], B3[n-2]]; + // ... + class LockstepReverseIterator { + ArrayRef<BasicBlock*> Blocks; + SmallVector<Instruction*,4> Insts; + bool Fail; + + public: + LockstepReverseIterator(ArrayRef<BasicBlock*> Blocks) : Blocks(Blocks) { + reset(); + } + + void reset() { + Fail = false; + Insts.clear(); + for (auto *BB : Blocks) { + Instruction *Inst = BB->getTerminator(); + for (Inst = Inst->getPrevNode(); Inst && isa<DbgInfoIntrinsic>(Inst);) + Inst = Inst->getPrevNode(); + if (!Inst) { + // Block wasn't big enough. + Fail = true; + return; + } + Insts.push_back(Inst); + } + } + + bool isValid() const { + return !Fail; + } + + void operator--() { + if (Fail) + return; + for (auto *&Inst : Insts) { + for (Inst = Inst->getPrevNode(); Inst && isa<DbgInfoIntrinsic>(Inst);) + Inst = Inst->getPrevNode(); + // Already at beginning of block. + if (!Inst) { + Fail = true; + return; + } + } + } + + void operator++() { + if (Fail) + return; + for (auto *&Inst : Insts) { + for (Inst = Inst->getNextNode(); Inst && isa<DbgInfoIntrinsic>(Inst);) + Inst = Inst->getNextNode(); + // Already at end of block. + if (!Inst) { + Fail = true; + return; + } + } + } + + ArrayRef<Instruction*> operator * () const { + return Insts; + } + }; + +} // end anonymous namespace + +/// Check whether BB's predecessors end with unconditional branches. If it is +/// true, sink any common code from the predecessors to BB. +static bool SinkCommonCodeFromPredecessors(BasicBlock *BB, + DomTreeUpdater *DTU) { + // We support two situations: + // (1) all incoming arcs are unconditional + // (2) there are non-unconditional incoming arcs + // + // (2) is very common in switch defaults and + // else-if patterns; + // + // if (a) f(1); + // else if (b) f(2); + // + // produces: + // + // [if] + // / \ + // [f(1)] [if] + // | | \ + // | | | + // | [f(2)]| + // \ | / + // [ end ] + // + // [end] has two unconditional predecessor arcs and one conditional. The + // conditional refers to the implicit empty 'else' arc. This conditional + // arc can also be caused by an empty default block in a switch. + // + // In this case, we attempt to sink code from all *unconditional* arcs. + // If we can sink instructions from these arcs (determined during the scan + // phase below) we insert a common successor for all unconditional arcs and + // connect that to [end], to enable sinking: + // + // [if] + // / \ + // [x(1)] [if] + // | | \ + // | | \ + // | [x(2)] | + // \ / | + // [sink.split] | + // \ / + // [ end ] + // + SmallVector<BasicBlock*,4> UnconditionalPreds; + bool HaveNonUnconditionalPredecessors = false; + for (auto *PredBB : predecessors(BB)) { + auto *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator()); + if (PredBr && PredBr->isUnconditional()) + UnconditionalPreds.push_back(PredBB); + else + HaveNonUnconditionalPredecessors = true; + } + if (UnconditionalPreds.size() < 2) + return false; + + // We take a two-step approach to tail sinking. First we scan from the end of + // each block upwards in lockstep. If the n'th instruction from the end of each + // block can be sunk, those instructions are added to ValuesToSink and we + // carry on. If we can sink an instruction but need to PHI-merge some operands + // (because they're not identical in each instruction) we add these to + // PHIOperands. + int ScanIdx = 0; + SmallPtrSet<Value*,4> InstructionsToSink; + DenseMap<Instruction*, SmallVector<Value*,4>> PHIOperands; + LockstepReverseIterator LRI(UnconditionalPreds); + while (LRI.isValid() && + canSinkInstructions(*LRI, PHIOperands)) { + LLVM_DEBUG(dbgs() << "SINK: instruction can be sunk: " << *(*LRI)[0] + << "\n"); + InstructionsToSink.insert((*LRI).begin(), (*LRI).end()); + ++ScanIdx; + --LRI; + } + + // If no instructions can be sunk, early-return. + if (ScanIdx == 0) + return false; + + bool followedByDeoptOrUnreachable = IsBlockFollowedByDeoptOrUnreachable(BB); + + if (!followedByDeoptOrUnreachable) { + // Okay, we *could* sink last ScanIdx instructions. But how many can we + // actually sink before encountering instruction that is unprofitable to + // sink? + auto ProfitableToSinkInstruction = [&](LockstepReverseIterator &LRI) { + unsigned NumPHIdValues = 0; + for (auto *I : *LRI) + for (auto *V : PHIOperands[I]) { + if (!InstructionsToSink.contains(V)) + ++NumPHIdValues; + // FIXME: this check is overly optimistic. We may end up not sinking + // said instruction, due to the very same profitability check. + // See @creating_too_many_phis in sink-common-code.ll. + } + LLVM_DEBUG(dbgs() << "SINK: #phid values: " << NumPHIdValues << "\n"); + unsigned NumPHIInsts = NumPHIdValues / UnconditionalPreds.size(); + if ((NumPHIdValues % UnconditionalPreds.size()) != 0) + NumPHIInsts++; + + return NumPHIInsts <= 1; + }; + + // We've determined that we are going to sink last ScanIdx instructions, + // and recorded them in InstructionsToSink. Now, some instructions may be + // unprofitable to sink. But that determination depends on the instructions + // that we are going to sink. + + // First, forward scan: find the first instruction unprofitable to sink, + // recording all the ones that are profitable to sink. + // FIXME: would it be better, after we detect that not all are profitable. + // to either record the profitable ones, or erase the unprofitable ones? + // Maybe we need to choose (at runtime) the one that will touch least + // instrs? + LRI.reset(); + int Idx = 0; + SmallPtrSet<Value *, 4> InstructionsProfitableToSink; + while (Idx < ScanIdx) { + if (!ProfitableToSinkInstruction(LRI)) { + // Too many PHIs would be created. + LLVM_DEBUG( + dbgs() << "SINK: stopping here, too many PHIs would be created!\n"); + break; + } + InstructionsProfitableToSink.insert((*LRI).begin(), (*LRI).end()); + --LRI; + ++Idx; + } + + // If no instructions can be sunk, early-return. + if (Idx == 0) + return false; + + // Did we determine that (only) some instructions are unprofitable to sink? + if (Idx < ScanIdx) { + // Okay, some instructions are unprofitable. + ScanIdx = Idx; + InstructionsToSink = InstructionsProfitableToSink; + + // But, that may make other instructions unprofitable, too. + // So, do a backward scan, do any earlier instructions become + // unprofitable? + assert( + !ProfitableToSinkInstruction(LRI) && + "We already know that the last instruction is unprofitable to sink"); + ++LRI; + --Idx; + while (Idx >= 0) { + // If we detect that an instruction becomes unprofitable to sink, + // all earlier instructions won't be sunk either, + // so preemptively keep InstructionsProfitableToSink in sync. + // FIXME: is this the most performant approach? + for (auto *I : *LRI) + InstructionsProfitableToSink.erase(I); + if (!ProfitableToSinkInstruction(LRI)) { + // Everything starting with this instruction won't be sunk. + ScanIdx = Idx; + InstructionsToSink = InstructionsProfitableToSink; + } + ++LRI; + --Idx; + } + } + + // If no instructions can be sunk, early-return. + if (ScanIdx == 0) + return false; + } + + bool Changed = false; + + if (HaveNonUnconditionalPredecessors) { + if (!followedByDeoptOrUnreachable) { + // It is always legal to sink common instructions from unconditional + // predecessors. However, if not all predecessors are unconditional, + // this transformation might be pessimizing. So as a rule of thumb, + // don't do it unless we'd sink at least one non-speculatable instruction. + // See https://bugs.llvm.org/show_bug.cgi?id=30244 + LRI.reset(); + int Idx = 0; + bool Profitable = false; + while (Idx < ScanIdx) { + if (!isSafeToSpeculativelyExecute((*LRI)[0])) { + Profitable = true; + break; + } + --LRI; + ++Idx; + } + if (!Profitable) + return false; + } + + LLVM_DEBUG(dbgs() << "SINK: Splitting edge\n"); + // We have a conditional edge and we're going to sink some instructions. + // Insert a new block postdominating all blocks we're going to sink from. + if (!SplitBlockPredecessors(BB, UnconditionalPreds, ".sink.split", DTU)) + // Edges couldn't be split. + return false; + Changed = true; + } + + // Now that we've analyzed all potential sinking candidates, perform the + // actual sink. We iteratively sink the last non-terminator of the source + // blocks into their common successor unless doing so would require too + // many PHI instructions to be generated (currently only one PHI is allowed + // per sunk instruction). + // + // We can use InstructionsToSink to discount values needing PHI-merging that will + // actually be sunk in a later iteration. This allows us to be more + // aggressive in what we sink. This does allow a false positive where we + // sink presuming a later value will also be sunk, but stop half way through + // and never actually sink it which means we produce more PHIs than intended. + // This is unlikely in practice though. + int SinkIdx = 0; + for (; SinkIdx != ScanIdx; ++SinkIdx) { + LLVM_DEBUG(dbgs() << "SINK: Sink: " + << *UnconditionalPreds[0]->getTerminator()->getPrevNode() + << "\n"); + + // Because we've sunk every instruction in turn, the current instruction to + // sink is always at index 0. + LRI.reset(); + + if (!sinkLastInstruction(UnconditionalPreds)) { + LLVM_DEBUG( + dbgs() + << "SINK: stopping here, failed to actually sink instruction!\n"); + break; + } + + NumSinkCommonInstrs++; + Changed = true; + } + if (SinkIdx != 0) + ++NumSinkCommonCode; + return Changed; +} + +namespace { + +struct CompatibleSets { + using SetTy = SmallVector<InvokeInst *, 2>; + + SmallVector<SetTy, 1> Sets; + + static bool shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes); + + SetTy &getCompatibleSet(InvokeInst *II); + + void insert(InvokeInst *II); +}; + +CompatibleSets::SetTy &CompatibleSets::getCompatibleSet(InvokeInst *II) { + // Perform a linear scan over all the existing sets, see if the new `invoke` + // is compatible with any particular set. Since we know that all the `invokes` + // within a set are compatible, only check the first `invoke` in each set. + // WARNING: at worst, this has quadratic complexity. + for (CompatibleSets::SetTy &Set : Sets) { + if (CompatibleSets::shouldBelongToSameSet({Set.front(), II})) + return Set; + } + + // Otherwise, we either had no sets yet, or this invoke forms a new set. + return Sets.emplace_back(); +} + +void CompatibleSets::insert(InvokeInst *II) { + getCompatibleSet(II).emplace_back(II); +} + +bool CompatibleSets::shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes) { + assert(Invokes.size() == 2 && "Always called with exactly two candidates."); + + // Can we theoretically merge these `invoke`s? + auto IsIllegalToMerge = [](InvokeInst *II) { + return II->cannotMerge() || II->isInlineAsm(); + }; + if (any_of(Invokes, IsIllegalToMerge)) + return false; + + // Either both `invoke`s must be direct, + // or both `invoke`s must be indirect. + auto IsIndirectCall = [](InvokeInst *II) { return II->isIndirectCall(); }; + bool HaveIndirectCalls = any_of(Invokes, IsIndirectCall); + bool AllCallsAreIndirect = all_of(Invokes, IsIndirectCall); + if (HaveIndirectCalls) { + if (!AllCallsAreIndirect) + return false; + } else { + // All callees must be identical. + Value *Callee = nullptr; + for (InvokeInst *II : Invokes) { + Value *CurrCallee = II->getCalledOperand(); + assert(CurrCallee && "There is always a called operand."); + if (!Callee) + Callee = CurrCallee; + else if (Callee != CurrCallee) + return false; + } + } + + // Either both `invoke`s must not have a normal destination, + // or both `invoke`s must have a normal destination, + auto HasNormalDest = [](InvokeInst *II) { + return !isa<UnreachableInst>(II->getNormalDest()->getFirstNonPHIOrDbg()); + }; + if (any_of(Invokes, HasNormalDest)) { + // Do not merge `invoke` that does not have a normal destination with one + // that does have a normal destination, even though doing so would be legal. + if (!all_of(Invokes, HasNormalDest)) + return false; + + // All normal destinations must be identical. + BasicBlock *NormalBB = nullptr; + for (InvokeInst *II : Invokes) { + BasicBlock *CurrNormalBB = II->getNormalDest(); + assert(CurrNormalBB && "There is always a 'continue to' basic block."); + if (!NormalBB) + NormalBB = CurrNormalBB; + else if (NormalBB != CurrNormalBB) + return false; + } + + // In the normal destination, the incoming values for these two `invoke`s + // must be compatible. + SmallPtrSet<Value *, 16> EquivalenceSet(Invokes.begin(), Invokes.end()); + if (!IncomingValuesAreCompatible( + NormalBB, {Invokes[0]->getParent(), Invokes[1]->getParent()}, + &EquivalenceSet)) + return false; + } + +#ifndef NDEBUG + // All unwind destinations must be identical. + // We know that because we have started from said unwind destination. + BasicBlock *UnwindBB = nullptr; + for (InvokeInst *II : Invokes) { + BasicBlock *CurrUnwindBB = II->getUnwindDest(); + assert(CurrUnwindBB && "There is always an 'unwind to' basic block."); + if (!UnwindBB) + UnwindBB = CurrUnwindBB; + else + assert(UnwindBB == CurrUnwindBB && "Unexpected unwind destination."); + } +#endif + + // In the unwind destination, the incoming values for these two `invoke`s + // must be compatible. + if (!IncomingValuesAreCompatible( + Invokes.front()->getUnwindDest(), + {Invokes[0]->getParent(), Invokes[1]->getParent()})) + return false; + + // Ignoring arguments, these `invoke`s must be identical, + // including operand bundles. + const InvokeInst *II0 = Invokes.front(); + for (auto *II : Invokes.drop_front()) + if (!II->isSameOperationAs(II0)) + return false; + + // Can we theoretically form the data operands for the merged `invoke`? + auto IsIllegalToMergeArguments = [](auto Ops) { + Type *Ty = std::get<0>(Ops)->getType(); + assert(Ty == std::get<1>(Ops)->getType() && "Incompatible types?"); + return Ty->isTokenTy() && std::get<0>(Ops) != std::get<1>(Ops); + }; + assert(Invokes.size() == 2 && "Always called with exactly two candidates."); + if (any_of(zip(Invokes[0]->data_ops(), Invokes[1]->data_ops()), + IsIllegalToMergeArguments)) + return false; + + return true; +} + +} // namespace + +// Merge all invokes in the provided set, all of which are compatible +// as per the `CompatibleSets::shouldBelongToSameSet()`. +static void MergeCompatibleInvokesImpl(ArrayRef<InvokeInst *> Invokes, + DomTreeUpdater *DTU) { + assert(Invokes.size() >= 2 && "Must have at least two invokes to merge."); + + SmallVector<DominatorTree::UpdateType, 8> Updates; + if (DTU) + Updates.reserve(2 + 3 * Invokes.size()); + + bool HasNormalDest = + !isa<UnreachableInst>(Invokes[0]->getNormalDest()->getFirstNonPHIOrDbg()); + + // Clone one of the invokes into a new basic block. + // Since they are all compatible, it doesn't matter which invoke is cloned. + InvokeInst *MergedInvoke = [&Invokes, HasNormalDest]() { + InvokeInst *II0 = Invokes.front(); + BasicBlock *II0BB = II0->getParent(); + BasicBlock *InsertBeforeBlock = + II0->getParent()->getIterator()->getNextNode(); + Function *Func = II0BB->getParent(); + LLVMContext &Ctx = II0->getContext(); + + BasicBlock *MergedInvokeBB = BasicBlock::Create( + Ctx, II0BB->getName() + ".invoke", Func, InsertBeforeBlock); + + auto *MergedInvoke = cast<InvokeInst>(II0->clone()); + // NOTE: all invokes have the same attributes, so no handling needed. + MergedInvoke->insertInto(MergedInvokeBB, MergedInvokeBB->end()); + + if (!HasNormalDest) { + // This set does not have a normal destination, + // so just form a new block with unreachable terminator. + BasicBlock *MergedNormalDest = BasicBlock::Create( + Ctx, II0BB->getName() + ".cont", Func, InsertBeforeBlock); + new UnreachableInst(Ctx, MergedNormalDest); + MergedInvoke->setNormalDest(MergedNormalDest); + } + + // The unwind destination, however, remainds identical for all invokes here. + + return MergedInvoke; + }(); + + if (DTU) { + // Predecessor blocks that contained these invokes will now branch to + // the new block that contains the merged invoke, ... + for (InvokeInst *II : Invokes) + Updates.push_back( + {DominatorTree::Insert, II->getParent(), MergedInvoke->getParent()}); + + // ... which has the new `unreachable` block as normal destination, + // or unwinds to the (same for all `invoke`s in this set) `landingpad`, + for (BasicBlock *SuccBBOfMergedInvoke : successors(MergedInvoke)) + Updates.push_back({DominatorTree::Insert, MergedInvoke->getParent(), + SuccBBOfMergedInvoke}); + + // Since predecessor blocks now unconditionally branch to a new block, + // they no longer branch to their original successors. + for (InvokeInst *II : Invokes) + for (BasicBlock *SuccOfPredBB : successors(II->getParent())) + Updates.push_back( + {DominatorTree::Delete, II->getParent(), SuccOfPredBB}); + } + + bool IsIndirectCall = Invokes[0]->isIndirectCall(); + + // Form the merged operands for the merged invoke. + for (Use &U : MergedInvoke->operands()) { + // Only PHI together the indirect callees and data operands. + if (MergedInvoke->isCallee(&U)) { + if (!IsIndirectCall) + continue; + } else if (!MergedInvoke->isDataOperand(&U)) + continue; + + // Don't create trivial PHI's with all-identical incoming values. + bool NeedPHI = any_of(Invokes, [&U](InvokeInst *II) { + return II->getOperand(U.getOperandNo()) != U.get(); + }); + if (!NeedPHI) + continue; + + // Form a PHI out of all the data ops under this index. + PHINode *PN = PHINode::Create( + U->getType(), /*NumReservedValues=*/Invokes.size(), "", MergedInvoke); + for (InvokeInst *II : Invokes) + PN->addIncoming(II->getOperand(U.getOperandNo()), II->getParent()); + + U.set(PN); + } + + // We've ensured that each PHI node has compatible (identical) incoming values + // when coming from each of the `invoke`s in the current merge set, + // so update the PHI nodes accordingly. + for (BasicBlock *Succ : successors(MergedInvoke)) + AddPredecessorToBlock(Succ, /*NewPred=*/MergedInvoke->getParent(), + /*ExistPred=*/Invokes.front()->getParent()); + + // And finally, replace the original `invoke`s with an unconditional branch + // to the block with the merged `invoke`. Also, give that merged `invoke` + // the merged debugloc of all the original `invoke`s. + const DILocation *MergedDebugLoc = nullptr; + for (InvokeInst *II : Invokes) { + // Compute the debug location common to all the original `invoke`s. + if (!MergedDebugLoc) + MergedDebugLoc = II->getDebugLoc(); + else + MergedDebugLoc = + DILocation::getMergedLocation(MergedDebugLoc, II->getDebugLoc()); + + // And replace the old `invoke` with an unconditionally branch + // to the block with the merged `invoke`. + for (BasicBlock *OrigSuccBB : successors(II->getParent())) + OrigSuccBB->removePredecessor(II->getParent()); + BranchInst::Create(MergedInvoke->getParent(), II->getParent()); + II->replaceAllUsesWith(MergedInvoke); + II->eraseFromParent(); + ++NumInvokesMerged; + } + MergedInvoke->setDebugLoc(MergedDebugLoc); + ++NumInvokeSetsFormed; + + if (DTU) + DTU->applyUpdates(Updates); +} + +/// If this block is a `landingpad` exception handling block, categorize all +/// the predecessor `invoke`s into sets, with all `invoke`s in each set +/// being "mergeable" together, and then merge invokes in each set together. +/// +/// This is a weird mix of hoisting and sinking. Visually, it goes from: +/// [...] [...] +/// | | +/// [invoke0] [invoke1] +/// / \ / \ +/// [cont0] [landingpad] [cont1] +/// to: +/// [...] [...] +/// \ / +/// [invoke] +/// / \ +/// [cont] [landingpad] +/// +/// But of course we can only do that if the invokes share the `landingpad`, +/// edges invoke0->cont0 and invoke1->cont1 are "compatible", +/// and the invoked functions are "compatible". +static bool MergeCompatibleInvokes(BasicBlock *BB, DomTreeUpdater *DTU) { + if (!EnableMergeCompatibleInvokes) + return false; + + bool Changed = false; + + // FIXME: generalize to all exception handling blocks? + if (!BB->isLandingPad()) + return Changed; + + CompatibleSets Grouper; + + // Record all the predecessors of this `landingpad`. As per verifier, + // the only allowed predecessor is the unwind edge of an `invoke`. + // We want to group "compatible" `invokes` into the same set to be merged. + for (BasicBlock *PredBB : predecessors(BB)) + Grouper.insert(cast<InvokeInst>(PredBB->getTerminator())); + + // And now, merge `invoke`s that were grouped togeter. + for (ArrayRef<InvokeInst *> Invokes : Grouper.Sets) { + if (Invokes.size() < 2) + continue; + Changed = true; + MergeCompatibleInvokesImpl(Invokes, DTU); + } + + return Changed; +} + +namespace { +/// Track ephemeral values, which should be ignored for cost-modelling +/// purposes. Requires walking instructions in reverse order. +class EphemeralValueTracker { + SmallPtrSet<const Instruction *, 32> EphValues; + + bool isEphemeral(const Instruction *I) { + if (isa<AssumeInst>(I)) + return true; + return !I->mayHaveSideEffects() && !I->isTerminator() && + all_of(I->users(), [&](const User *U) { + return EphValues.count(cast<Instruction>(U)); + }); + } + +public: + bool track(const Instruction *I) { + if (isEphemeral(I)) { + EphValues.insert(I); + return true; + } + return false; + } + + bool contains(const Instruction *I) const { return EphValues.contains(I); } +}; +} // namespace + +/// Determine if we can hoist sink a sole store instruction out of a +/// conditional block. +/// +/// We are looking for code like the following: +/// BrBB: +/// store i32 %add, i32* %arrayidx2 +/// ... // No other stores or function calls (we could be calling a memory +/// ... // function). +/// %cmp = icmp ult %x, %y +/// br i1 %cmp, label %EndBB, label %ThenBB +/// ThenBB: +/// store i32 %add5, i32* %arrayidx2 +/// br label EndBB +/// EndBB: +/// ... +/// We are going to transform this into: +/// BrBB: +/// store i32 %add, i32* %arrayidx2 +/// ... // +/// %cmp = icmp ult %x, %y +/// %add.add5 = select i1 %cmp, i32 %add, %add5 +/// store i32 %add.add5, i32* %arrayidx2 +/// ... +/// +/// \return The pointer to the value of the previous store if the store can be +/// hoisted into the predecessor block. 0 otherwise. +static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, + BasicBlock *StoreBB, BasicBlock *EndBB) { + StoreInst *StoreToHoist = dyn_cast<StoreInst>(I); + if (!StoreToHoist) + return nullptr; + + // Volatile or atomic. + if (!StoreToHoist->isSimple()) + return nullptr; + + Value *StorePtr = StoreToHoist->getPointerOperand(); + Type *StoreTy = StoreToHoist->getValueOperand()->getType(); + + // Look for a store to the same pointer in BrBB. + unsigned MaxNumInstToLookAt = 9; + // Skip pseudo probe intrinsic calls which are not really killing any memory + // accesses. + for (Instruction &CurI : reverse(BrBB->instructionsWithoutDebug(true))) { + if (!MaxNumInstToLookAt) + break; + --MaxNumInstToLookAt; + + // Could be calling an instruction that affects memory like free(). + if (CurI.mayWriteToMemory() && !isa<StoreInst>(CurI)) + return nullptr; + + if (auto *SI = dyn_cast<StoreInst>(&CurI)) { + // Found the previous store to same location and type. Make sure it is + // simple, to avoid introducing a spurious non-atomic write after an + // atomic write. + if (SI->getPointerOperand() == StorePtr && + SI->getValueOperand()->getType() == StoreTy && SI->isSimple()) + // Found the previous store, return its value operand. + return SI->getValueOperand(); + return nullptr; // Unknown store. + } + + if (auto *LI = dyn_cast<LoadInst>(&CurI)) { + if (LI->getPointerOperand() == StorePtr && LI->getType() == StoreTy && + LI->isSimple()) { + // Local objects (created by an `alloca` instruction) are always + // writable, so once we are past a read from a location it is valid to + // also write to that same location. + // If the address of the local object never escapes the function, that + // means it's never concurrently read or written, hence moving the store + // from under the condition will not introduce a data race. + auto *AI = dyn_cast<AllocaInst>(getUnderlyingObject(StorePtr)); + if (AI && !PointerMayBeCaptured(AI, false, true)) + // Found a previous load, return it. + return LI; + } + // The load didn't work out, but we may still find a store. + } + } + + return nullptr; +} + +/// Estimate the cost of the insertion(s) and check that the PHI nodes can be +/// converted to selects. +static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB, + BasicBlock *EndBB, + unsigned &SpeculatedInstructions, + InstructionCost &Cost, + const TargetTransformInfo &TTI) { + TargetTransformInfo::TargetCostKind CostKind = + BB->getParent()->hasMinSize() + ? TargetTransformInfo::TCK_CodeSize + : TargetTransformInfo::TCK_SizeAndLatency; + + bool HaveRewritablePHIs = false; + for (PHINode &PN : EndBB->phis()) { + Value *OrigV = PN.getIncomingValueForBlock(BB); + Value *ThenV = PN.getIncomingValueForBlock(ThenBB); + + // FIXME: Try to remove some of the duplication with HoistThenElseCodeToIf. + // Skip PHIs which are trivial. + if (ThenV == OrigV) + continue; + + Cost += TTI.getCmpSelInstrCost(Instruction::Select, PN.getType(), nullptr, + CmpInst::BAD_ICMP_PREDICATE, CostKind); + + // Don't convert to selects if we could remove undefined behavior instead. + if (passingValueIsAlwaysUndefined(OrigV, &PN) || + passingValueIsAlwaysUndefined(ThenV, &PN)) + return false; + + HaveRewritablePHIs = true; + ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV); + ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV); + if (!OrigCE && !ThenCE) + continue; // Known cheap (FIXME: Maybe not true for aggregates). + + InstructionCost OrigCost = OrigCE ? computeSpeculationCost(OrigCE, TTI) : 0; + InstructionCost ThenCost = ThenCE ? computeSpeculationCost(ThenCE, TTI) : 0; + InstructionCost MaxCost = + 2 * PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic; + if (OrigCost + ThenCost > MaxCost) + return false; + + // Account for the cost of an unfolded ConstantExpr which could end up + // getting expanded into Instructions. + // FIXME: This doesn't account for how many operations are combined in the + // constant expression. + ++SpeculatedInstructions; + if (SpeculatedInstructions > 1) + return false; + } + + return HaveRewritablePHIs; +} + +/// Speculate a conditional basic block flattening the CFG. +/// +/// Note that this is a very risky transform currently. Speculating +/// instructions like this is most often not desirable. Instead, there is an MI +/// pass which can do it with full awareness of the resource constraints. +/// However, some cases are "obvious" and we should do directly. An example of +/// this is speculating a single, reasonably cheap instruction. +/// +/// There is only one distinct advantage to flattening the CFG at the IR level: +/// it makes very common but simplistic optimizations such as are common in +/// instcombine and the DAG combiner more powerful by removing CFG edges and +/// modeling their effects with easier to reason about SSA value graphs. +/// +/// +/// An illustration of this transform is turning this IR: +/// \code +/// BB: +/// %cmp = icmp ult %x, %y +/// br i1 %cmp, label %EndBB, label %ThenBB +/// ThenBB: +/// %sub = sub %x, %y +/// br label BB2 +/// EndBB: +/// %phi = phi [ %sub, %ThenBB ], [ 0, %EndBB ] +/// ... +/// \endcode +/// +/// Into this IR: +/// \code +/// BB: +/// %cmp = icmp ult %x, %y +/// %sub = sub %x, %y +/// %cond = select i1 %cmp, 0, %sub +/// ... +/// \endcode +/// +/// \returns true if the conditional block is removed. +bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, + const TargetTransformInfo &TTI) { + // Be conservative for now. FP select instruction can often be expensive. + Value *BrCond = BI->getCondition(); + if (isa<FCmpInst>(BrCond)) + return false; + + BasicBlock *BB = BI->getParent(); + BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(0); + InstructionCost Budget = + PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic; + + // If ThenBB is actually on the false edge of the conditional branch, remember + // to swap the select operands later. + bool Invert = false; + if (ThenBB != BI->getSuccessor(0)) { + assert(ThenBB == BI->getSuccessor(1) && "No edge from 'if' block?"); + Invert = true; + } + assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block"); + + // If the branch is non-unpredictable, and is predicted to *not* branch to + // the `then` block, then avoid speculating it. + if (!BI->getMetadata(LLVMContext::MD_unpredictable)) { + uint64_t TWeight, FWeight; + if (extractBranchWeights(*BI, TWeight, FWeight) && + (TWeight + FWeight) != 0) { + uint64_t EndWeight = Invert ? TWeight : FWeight; + BranchProbability BIEndProb = + BranchProbability::getBranchProbability(EndWeight, TWeight + FWeight); + BranchProbability Likely = TTI.getPredictableBranchThreshold(); + if (BIEndProb >= Likely) + return false; + } + } + + // Keep a count of how many times instructions are used within ThenBB when + // they are candidates for sinking into ThenBB. Specifically: + // - They are defined in BB, and + // - They have no side effects, and + // - All of their uses are in ThenBB. + SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts; + + SmallVector<Instruction *, 4> SpeculatedDbgIntrinsics; + + unsigned SpeculatedInstructions = 0; + Value *SpeculatedStoreValue = nullptr; + StoreInst *SpeculatedStore = nullptr; + EphemeralValueTracker EphTracker; + for (Instruction &I : reverse(drop_end(*ThenBB))) { + // Skip debug info. + if (isa<DbgInfoIntrinsic>(I)) { + SpeculatedDbgIntrinsics.push_back(&I); + continue; + } + + // Skip pseudo probes. The consequence is we lose track of the branch + // probability for ThenBB, which is fine since the optimization here takes + // place regardless of the branch probability. + if (isa<PseudoProbeInst>(I)) { + // The probe should be deleted so that it will not be over-counted when + // the samples collected on the non-conditional path are counted towards + // the conditional path. We leave it for the counts inference algorithm to + // figure out a proper count for an unknown probe. + SpeculatedDbgIntrinsics.push_back(&I); + continue; + } + + // Ignore ephemeral values, they will be dropped by the transform. + if (EphTracker.track(&I)) + continue; + + // Only speculatively execute a single instruction (not counting the + // terminator) for now. + ++SpeculatedInstructions; + if (SpeculatedInstructions > 1) + return false; + + // Don't hoist the instruction if it's unsafe or expensive. + if (!isSafeToSpeculativelyExecute(&I) && + !(HoistCondStores && (SpeculatedStoreValue = isSafeToSpeculateStore( + &I, BB, ThenBB, EndBB)))) + return false; + if (!SpeculatedStoreValue && + computeSpeculationCost(&I, TTI) > + PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic) + return false; + + // Store the store speculation candidate. + if (SpeculatedStoreValue) + SpeculatedStore = cast<StoreInst>(&I); + + // Do not hoist the instruction if any of its operands are defined but not + // used in BB. The transformation will prevent the operand from + // being sunk into the use block. + for (Use &Op : I.operands()) { + Instruction *OpI = dyn_cast<Instruction>(Op); + if (!OpI || OpI->getParent() != BB || OpI->mayHaveSideEffects()) + continue; // Not a candidate for sinking. + + ++SinkCandidateUseCounts[OpI]; + } + } + + // Consider any sink candidates which are only used in ThenBB as costs for + // speculation. Note, while we iterate over a DenseMap here, we are summing + // and so iteration order isn't significant. + for (const auto &[Inst, Count] : SinkCandidateUseCounts) + if (Inst->hasNUses(Count)) { + ++SpeculatedInstructions; + if (SpeculatedInstructions > 1) + return false; + } + + // Check that we can insert the selects and that it's not too expensive to do + // so. + bool Convert = SpeculatedStore != nullptr; + InstructionCost Cost = 0; + Convert |= validateAndCostRequiredSelects(BB, ThenBB, EndBB, + SpeculatedInstructions, + Cost, TTI); + if (!Convert || Cost > Budget) + return false; + + // If we get here, we can hoist the instruction and if-convert. + LLVM_DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";); + + // Insert a select of the value of the speculated store. + if (SpeculatedStoreValue) { + IRBuilder<NoFolder> Builder(BI); + Value *OrigV = SpeculatedStore->getValueOperand(); + Value *TrueV = SpeculatedStore->getValueOperand(); + Value *FalseV = SpeculatedStoreValue; + if (Invert) + std::swap(TrueV, FalseV); + Value *S = Builder.CreateSelect( + BrCond, TrueV, FalseV, "spec.store.select", BI); + SpeculatedStore->setOperand(0, S); + SpeculatedStore->applyMergedLocation(BI->getDebugLoc(), + SpeculatedStore->getDebugLoc()); + // The value stored is still conditional, but the store itself is now + // unconditonally executed, so we must be sure that any linked dbg.assign + // intrinsics are tracking the new stored value (the result of the + // select). If we don't, and the store were to be removed by another pass + // (e.g. DSE), then we'd eventually end up emitting a location describing + // the conditional value, unconditionally. + // + // === Before this transformation === + // pred: + // store %one, %x.dest, !DIAssignID !1 + // dbg.assign %one, "x", ..., !1, ... + // br %cond if.then + // + // if.then: + // store %two, %x.dest, !DIAssignID !2 + // dbg.assign %two, "x", ..., !2, ... + // + // === After this transformation === + // pred: + // store %one, %x.dest, !DIAssignID !1 + // dbg.assign %one, "x", ..., !1 + /// ... + // %merge = select %cond, %two, %one + // store %merge, %x.dest, !DIAssignID !2 + // dbg.assign %merge, "x", ..., !2 + for (auto *DAI : at::getAssignmentMarkers(SpeculatedStore)) { + if (any_of(DAI->location_ops(), [&](Value *V) { return V == OrigV; })) + DAI->replaceVariableLocationOp(OrigV, S); + } + } + + // Metadata can be dependent on the condition we are hoisting above. + // Conservatively strip all metadata on the instruction. Drop the debug loc + // to avoid making it appear as if the condition is a constant, which would + // be misleading while debugging. + // Similarly strip attributes that maybe dependent on condition we are + // hoisting above. + for (auto &I : make_early_inc_range(*ThenBB)) { + if (!SpeculatedStoreValue || &I != SpeculatedStore) { + // Don't update the DILocation of dbg.assign intrinsics. + if (!isa<DbgAssignIntrinsic>(&I)) + I.setDebugLoc(DebugLoc()); + } + I.dropUndefImplyingAttrsAndUnknownMetadata(); + + // Drop ephemeral values. + if (EphTracker.contains(&I)) { + I.replaceAllUsesWith(PoisonValue::get(I.getType())); + I.eraseFromParent(); + } + } + + // Hoist the instructions. + BB->splice(BI->getIterator(), ThenBB, ThenBB->begin(), + std::prev(ThenBB->end())); + + // Insert selects and rewrite the PHI operands. + IRBuilder<NoFolder> Builder(BI); + for (PHINode &PN : EndBB->phis()) { + unsigned OrigI = PN.getBasicBlockIndex(BB); + unsigned ThenI = PN.getBasicBlockIndex(ThenBB); + Value *OrigV = PN.getIncomingValue(OrigI); + Value *ThenV = PN.getIncomingValue(ThenI); + + // Skip PHIs which are trivial. + if (OrigV == ThenV) + continue; + + // Create a select whose true value is the speculatively executed value and + // false value is the pre-existing value. Swap them if the branch + // destinations were inverted. + Value *TrueV = ThenV, *FalseV = OrigV; + if (Invert) + std::swap(TrueV, FalseV); + Value *V = Builder.CreateSelect(BrCond, TrueV, FalseV, "spec.select", BI); + PN.setIncomingValue(OrigI, V); + PN.setIncomingValue(ThenI, V); + } + + // Remove speculated dbg intrinsics. + // FIXME: Is it possible to do this in a more elegant way? Moving/merging the + // dbg value for the different flows and inserting it after the select. + for (Instruction *I : SpeculatedDbgIntrinsics) { + // We still want to know that an assignment took place so don't remove + // dbg.assign intrinsics. + if (!isa<DbgAssignIntrinsic>(I)) + I->eraseFromParent(); + } + + ++NumSpeculations; + return true; +} + +/// Return true if we can thread a branch across this block. +static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) { + int Size = 0; + EphemeralValueTracker EphTracker; + + // Walk the loop in reverse so that we can identify ephemeral values properly + // (values only feeding assumes). + for (Instruction &I : reverse(BB->instructionsWithoutDebug(false))) { + // Can't fold blocks that contain noduplicate or convergent calls. + if (CallInst *CI = dyn_cast<CallInst>(&I)) + if (CI->cannotDuplicate() || CI->isConvergent()) + return false; + + // Ignore ephemeral values which are deleted during codegen. + // We will delete Phis while threading, so Phis should not be accounted in + // block's size. + if (!EphTracker.track(&I) && !isa<PHINode>(I)) { + if (Size++ > MaxSmallBlockSize) + return false; // Don't clone large BB's. + } + + // We can only support instructions that do not define values that are + // live outside of the current basic block. + for (User *U : I.users()) { + Instruction *UI = cast<Instruction>(U); + if (UI->getParent() != BB || isa<PHINode>(UI)) + return false; + } + + // Looks ok, continue checking. + } + + return true; +} + +static ConstantInt *getKnownValueOnEdge(Value *V, BasicBlock *From, + BasicBlock *To) { + // Don't look past the block defining the value, we might get the value from + // a previous loop iteration. + auto *I = dyn_cast<Instruction>(V); + if (I && I->getParent() == To) + return nullptr; + + // We know the value if the From block branches on it. + auto *BI = dyn_cast<BranchInst>(From->getTerminator()); + if (BI && BI->isConditional() && BI->getCondition() == V && + BI->getSuccessor(0) != BI->getSuccessor(1)) + return BI->getSuccessor(0) == To ? ConstantInt::getTrue(BI->getContext()) + : ConstantInt::getFalse(BI->getContext()); + + return nullptr; +} + +/// If we have a conditional branch on something for which we know the constant +/// value in predecessors (e.g. a phi node in the current block), thread edges +/// from the predecessor to their ultimate destination. +static std::optional<bool> +FoldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU, + const DataLayout &DL, + AssumptionCache *AC) { + SmallMapVector<ConstantInt *, SmallSetVector<BasicBlock *, 2>, 2> KnownValues; + BasicBlock *BB = BI->getParent(); + Value *Cond = BI->getCondition(); + PHINode *PN = dyn_cast<PHINode>(Cond); + if (PN && PN->getParent() == BB) { + // Degenerate case of a single entry PHI. + if (PN->getNumIncomingValues() == 1) { + FoldSingleEntryPHINodes(PN->getParent()); + return true; + } + + for (Use &U : PN->incoming_values()) + if (auto *CB = dyn_cast<ConstantInt>(U)) + KnownValues[CB].insert(PN->getIncomingBlock(U)); + } else { + for (BasicBlock *Pred : predecessors(BB)) { + if (ConstantInt *CB = getKnownValueOnEdge(Cond, Pred, BB)) + KnownValues[CB].insert(Pred); + } + } + + if (KnownValues.empty()) + return false; + + // Now we know that this block has multiple preds and two succs. + // Check that the block is small enough and values defined in the block are + // not used outside of it. + if (!BlockIsSimpleEnoughToThreadThrough(BB)) + return false; + + for (const auto &Pair : KnownValues) { + // Okay, we now know that all edges from PredBB should be revectored to + // branch to RealDest. + ConstantInt *CB = Pair.first; + ArrayRef<BasicBlock *> PredBBs = Pair.second.getArrayRef(); + BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue()); + + if (RealDest == BB) + continue; // Skip self loops. + + // Skip if the predecessor's terminator is an indirect branch. + if (any_of(PredBBs, [](BasicBlock *PredBB) { + return isa<IndirectBrInst>(PredBB->getTerminator()); + })) + continue; + + LLVM_DEBUG({ + dbgs() << "Condition " << *Cond << " in " << BB->getName() + << " has value " << *Pair.first << " in predecessors:\n"; + for (const BasicBlock *PredBB : Pair.second) + dbgs() << " " << PredBB->getName() << "\n"; + dbgs() << "Threading to destination " << RealDest->getName() << ".\n"; + }); + + // Split the predecessors we are threading into a new edge block. We'll + // clone the instructions into this block, and then redirect it to RealDest. + BasicBlock *EdgeBB = SplitBlockPredecessors(BB, PredBBs, ".critedge", DTU); + + // TODO: These just exist to reduce test diff, we can drop them if we like. + EdgeBB->setName(RealDest->getName() + ".critedge"); + EdgeBB->moveBefore(RealDest); + + // Update PHI nodes. + AddPredecessorToBlock(RealDest, EdgeBB, BB); + + // BB may have instructions that are being threaded over. Clone these + // instructions into EdgeBB. We know that there will be no uses of the + // cloned instructions outside of EdgeBB. + BasicBlock::iterator InsertPt = EdgeBB->getFirstInsertionPt(); + DenseMap<Value *, Value *> TranslateMap; // Track translated values. + TranslateMap[Cond] = CB; + for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) { + if (PHINode *PN = dyn_cast<PHINode>(BBI)) { + TranslateMap[PN] = PN->getIncomingValueForBlock(EdgeBB); + continue; + } + // Clone the instruction. + Instruction *N = BBI->clone(); + if (BBI->hasName()) + N->setName(BBI->getName() + ".c"); + + // Update operands due to translation. + for (Use &Op : N->operands()) { + DenseMap<Value *, Value *>::iterator PI = TranslateMap.find(Op); + if (PI != TranslateMap.end()) + Op = PI->second; + } + + // Check for trivial simplification. + if (Value *V = simplifyInstruction(N, {DL, nullptr, nullptr, AC})) { + if (!BBI->use_empty()) + TranslateMap[&*BBI] = V; + if (!N->mayHaveSideEffects()) { + N->deleteValue(); // Instruction folded away, don't need actual inst + N = nullptr; + } + } else { + if (!BBI->use_empty()) + TranslateMap[&*BBI] = N; + } + if (N) { + // Insert the new instruction into its new home. + N->insertInto(EdgeBB, InsertPt); + + // Register the new instruction with the assumption cache if necessary. + if (auto *Assume = dyn_cast<AssumeInst>(N)) + if (AC) + AC->registerAssumption(Assume); + } + } + + BB->removePredecessor(EdgeBB); + BranchInst *EdgeBI = cast<BranchInst>(EdgeBB->getTerminator()); + EdgeBI->setSuccessor(0, RealDest); + EdgeBI->setDebugLoc(BI->getDebugLoc()); + + if (DTU) { + SmallVector<DominatorTree::UpdateType, 2> Updates; + Updates.push_back({DominatorTree::Delete, EdgeBB, BB}); + Updates.push_back({DominatorTree::Insert, EdgeBB, RealDest}); + DTU->applyUpdates(Updates); + } + + // For simplicity, we created a separate basic block for the edge. Merge + // it back into the predecessor if possible. This not only avoids + // unnecessary SimplifyCFG iterations, but also makes sure that we don't + // bypass the check for trivial cycles above. + MergeBlockIntoPredecessor(EdgeBB, DTU); + + // Signal repeat, simplifying any other constants. + return std::nullopt; + } + + return false; +} + +static bool FoldCondBranchOnValueKnownInPredecessor(BranchInst *BI, + DomTreeUpdater *DTU, + const DataLayout &DL, + AssumptionCache *AC) { + std::optional<bool> Result; + bool EverChanged = false; + do { + // Note that None means "we changed things, but recurse further." + Result = FoldCondBranchOnValueKnownInPredecessorImpl(BI, DTU, DL, AC); + EverChanged |= Result == std::nullopt || *Result; + } while (Result == std::nullopt); + return EverChanged; +} + +/// Given a BB that starts with the specified two-entry PHI node, +/// see if we can eliminate it. +static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, + DomTreeUpdater *DTU, const DataLayout &DL) { + // Ok, this is a two entry PHI node. Check to see if this is a simple "if + // statement", which has a very simple dominance structure. Basically, we + // are trying to find the condition that is being branched on, which + // subsequently causes this merge to happen. We really want control + // dependence information for this check, but simplifycfg can't keep it up + // to date, and this catches most of the cases we care about anyway. + BasicBlock *BB = PN->getParent(); + + BasicBlock *IfTrue, *IfFalse; + BranchInst *DomBI = GetIfCondition(BB, IfTrue, IfFalse); + if (!DomBI) + return false; + Value *IfCond = DomBI->getCondition(); + // Don't bother if the branch will be constant folded trivially. + if (isa<ConstantInt>(IfCond)) + return false; + + BasicBlock *DomBlock = DomBI->getParent(); + SmallVector<BasicBlock *, 2> IfBlocks; + llvm::copy_if( + PN->blocks(), std::back_inserter(IfBlocks), [](BasicBlock *IfBlock) { + return cast<BranchInst>(IfBlock->getTerminator())->isUnconditional(); + }); + assert((IfBlocks.size() == 1 || IfBlocks.size() == 2) && + "Will have either one or two blocks to speculate."); + + // If the branch is non-unpredictable, see if we either predictably jump to + // the merge bb (if we have only a single 'then' block), or if we predictably + // jump to one specific 'then' block (if we have two of them). + // It isn't beneficial to speculatively execute the code + // from the block that we know is predictably not entered. + if (!DomBI->getMetadata(LLVMContext::MD_unpredictable)) { + uint64_t TWeight, FWeight; + if (extractBranchWeights(*DomBI, TWeight, FWeight) && + (TWeight + FWeight) != 0) { + BranchProbability BITrueProb = + BranchProbability::getBranchProbability(TWeight, TWeight + FWeight); + BranchProbability Likely = TTI.getPredictableBranchThreshold(); + BranchProbability BIFalseProb = BITrueProb.getCompl(); + if (IfBlocks.size() == 1) { + BranchProbability BIBBProb = + DomBI->getSuccessor(0) == BB ? BITrueProb : BIFalseProb; + if (BIBBProb >= Likely) + return false; + } else { + if (BITrueProb >= Likely || BIFalseProb >= Likely) + return false; + } + } + } + + // Don't try to fold an unreachable block. For example, the phi node itself + // can't be the candidate if-condition for a select that we want to form. + if (auto *IfCondPhiInst = dyn_cast<PHINode>(IfCond)) + if (IfCondPhiInst->getParent() == BB) + return false; + + // Okay, we found that we can merge this two-entry phi node into a select. + // Doing so would require us to fold *all* two entry phi nodes in this block. + // At some point this becomes non-profitable (particularly if the target + // doesn't support cmov's). Only do this transformation if there are two or + // fewer PHI nodes in this block. + unsigned NumPhis = 0; + for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++NumPhis, ++I) + if (NumPhis > 2) + return false; + + // Loop over the PHI's seeing if we can promote them all to select + // instructions. While we are at it, keep track of the instructions + // that need to be moved to the dominating block. + SmallPtrSet<Instruction *, 4> AggressiveInsts; + InstructionCost Cost = 0; + InstructionCost Budget = + TwoEntryPHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic; + + bool Changed = false; + for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) { + PHINode *PN = cast<PHINode>(II++); + if (Value *V = simplifyInstruction(PN, {DL, PN})) { + PN->replaceAllUsesWith(V); + PN->eraseFromParent(); + Changed = true; + continue; + } + + if (!dominatesMergePoint(PN->getIncomingValue(0), BB, AggressiveInsts, + Cost, Budget, TTI) || + !dominatesMergePoint(PN->getIncomingValue(1), BB, AggressiveInsts, + Cost, Budget, TTI)) + return Changed; + } + + // If we folded the first phi, PN dangles at this point. Refresh it. If + // we ran out of PHIs then we simplified them all. + PN = dyn_cast<PHINode>(BB->begin()); + if (!PN) + return true; + + // Return true if at least one of these is a 'not', and another is either + // a 'not' too, or a constant. + auto CanHoistNotFromBothValues = [](Value *V0, Value *V1) { + if (!match(V0, m_Not(m_Value()))) + std::swap(V0, V1); + auto Invertible = m_CombineOr(m_Not(m_Value()), m_AnyIntegralConstant()); + return match(V0, m_Not(m_Value())) && match(V1, Invertible); + }; + + // Don't fold i1 branches on PHIs which contain binary operators or + // (possibly inverted) select form of or/ands, unless one of + // the incoming values is an 'not' and another one is freely invertible. + // These can often be turned into switches and other things. + auto IsBinOpOrAnd = [](Value *V) { + return match( + V, m_CombineOr( + m_BinOp(), + m_CombineOr(m_Select(m_Value(), m_ImmConstant(), m_Value()), + m_Select(m_Value(), m_Value(), m_ImmConstant())))); + }; + if (PN->getType()->isIntegerTy(1) && + (IsBinOpOrAnd(PN->getIncomingValue(0)) || + IsBinOpOrAnd(PN->getIncomingValue(1)) || IsBinOpOrAnd(IfCond)) && + !CanHoistNotFromBothValues(PN->getIncomingValue(0), + PN->getIncomingValue(1))) + return Changed; + + // If all PHI nodes are promotable, check to make sure that all instructions + // in the predecessor blocks can be promoted as well. If not, we won't be able + // to get rid of the control flow, so it's not worth promoting to select + // instructions. + for (BasicBlock *IfBlock : IfBlocks) + for (BasicBlock::iterator I = IfBlock->begin(); !I->isTerminator(); ++I) + if (!AggressiveInsts.count(&*I) && !I->isDebugOrPseudoInst()) { + // This is not an aggressive instruction that we can promote. + // Because of this, we won't be able to get rid of the control flow, so + // the xform is not worth it. + return Changed; + } + + // If either of the blocks has it's address taken, we can't do this fold. + if (any_of(IfBlocks, + [](BasicBlock *IfBlock) { return IfBlock->hasAddressTaken(); })) + return Changed; + + LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond + << " T: " << IfTrue->getName() + << " F: " << IfFalse->getName() << "\n"); + + // If we can still promote the PHI nodes after this gauntlet of tests, + // do all of the PHI's now. + + // Move all 'aggressive' instructions, which are defined in the + // conditional parts of the if's up to the dominating block. + for (BasicBlock *IfBlock : IfBlocks) + hoistAllInstructionsInto(DomBlock, DomBI, IfBlock); + + IRBuilder<NoFolder> Builder(DomBI); + // Propagate fast-math-flags from phi nodes to replacement selects. + IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); + while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) { + if (isa<FPMathOperator>(PN)) + Builder.setFastMathFlags(PN->getFastMathFlags()); + + // Change the PHI node into a select instruction. + Value *TrueVal = PN->getIncomingValueForBlock(IfTrue); + Value *FalseVal = PN->getIncomingValueForBlock(IfFalse); + + Value *Sel = Builder.CreateSelect(IfCond, TrueVal, FalseVal, "", DomBI); + PN->replaceAllUsesWith(Sel); + Sel->takeName(PN); + PN->eraseFromParent(); + } + + // At this point, all IfBlocks are empty, so our if statement + // has been flattened. Change DomBlock to jump directly to our new block to + // avoid other simplifycfg's kicking in on the diamond. + Builder.CreateBr(BB); + + SmallVector<DominatorTree::UpdateType, 3> Updates; + if (DTU) { + Updates.push_back({DominatorTree::Insert, DomBlock, BB}); + for (auto *Successor : successors(DomBlock)) + Updates.push_back({DominatorTree::Delete, DomBlock, Successor}); + } + + DomBI->eraseFromParent(); + if (DTU) + DTU->applyUpdates(Updates); + + return true; +} + +static Value *createLogicalOp(IRBuilderBase &Builder, + Instruction::BinaryOps Opc, Value *LHS, + Value *RHS, const Twine &Name = "") { + // Try to relax logical op to binary op. + if (impliesPoison(RHS, LHS)) + return Builder.CreateBinOp(Opc, LHS, RHS, Name); + if (Opc == Instruction::And) + return Builder.CreateLogicalAnd(LHS, RHS, Name); + if (Opc == Instruction::Or) + return Builder.CreateLogicalOr(LHS, RHS, Name); + llvm_unreachable("Invalid logical opcode"); +} + +/// Return true if either PBI or BI has branch weight available, and store +/// the weights in {Pred|Succ}{True|False}Weight. If one of PBI and BI does +/// not have branch weight, use 1:1 as its weight. +static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI, + uint64_t &PredTrueWeight, + uint64_t &PredFalseWeight, + uint64_t &SuccTrueWeight, + uint64_t &SuccFalseWeight) { + bool PredHasWeights = + extractBranchWeights(*PBI, PredTrueWeight, PredFalseWeight); + bool SuccHasWeights = + extractBranchWeights(*BI, SuccTrueWeight, SuccFalseWeight); + if (PredHasWeights || SuccHasWeights) { + if (!PredHasWeights) + PredTrueWeight = PredFalseWeight = 1; + if (!SuccHasWeights) + SuccTrueWeight = SuccFalseWeight = 1; + return true; + } else { + return false; + } +} + +/// Determine if the two branches share a common destination and deduce a glue +/// that joins the branches' conditions to arrive at the common destination if +/// that would be profitable. +static std::optional<std::tuple<BasicBlock *, Instruction::BinaryOps, bool>> +shouldFoldCondBranchesToCommonDestination(BranchInst *BI, BranchInst *PBI, + const TargetTransformInfo *TTI) { + assert(BI && PBI && BI->isConditional() && PBI->isConditional() && + "Both blocks must end with a conditional branches."); + assert(is_contained(predecessors(BI->getParent()), PBI->getParent()) && + "PredBB must be a predecessor of BB."); + + // We have the potential to fold the conditions together, but if the + // predecessor branch is predictable, we may not want to merge them. + uint64_t PTWeight, PFWeight; + BranchProbability PBITrueProb, Likely; + if (TTI && !PBI->getMetadata(LLVMContext::MD_unpredictable) && + extractBranchWeights(*PBI, PTWeight, PFWeight) && + (PTWeight + PFWeight) != 0) { + PBITrueProb = + BranchProbability::getBranchProbability(PTWeight, PTWeight + PFWeight); + Likely = TTI->getPredictableBranchThreshold(); + } + + if (PBI->getSuccessor(0) == BI->getSuccessor(0)) { + // Speculate the 2nd condition unless the 1st is probably true. + if (PBITrueProb.isUnknown() || PBITrueProb < Likely) + return {{BI->getSuccessor(0), Instruction::Or, false}}; + } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) { + // Speculate the 2nd condition unless the 1st is probably false. + if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely) + return {{BI->getSuccessor(1), Instruction::And, false}}; + } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) { + // Speculate the 2nd condition unless the 1st is probably true. + if (PBITrueProb.isUnknown() || PBITrueProb < Likely) + return {{BI->getSuccessor(1), Instruction::And, true}}; + } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) { + // Speculate the 2nd condition unless the 1st is probably false. + if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely) + return {{BI->getSuccessor(0), Instruction::Or, true}}; + } + return std::nullopt; +} + +static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI, + DomTreeUpdater *DTU, + MemorySSAUpdater *MSSAU, + const TargetTransformInfo *TTI) { + BasicBlock *BB = BI->getParent(); + BasicBlock *PredBlock = PBI->getParent(); + + // Determine if the two branches share a common destination. + BasicBlock *CommonSucc; + Instruction::BinaryOps Opc; + bool InvertPredCond; + std::tie(CommonSucc, Opc, InvertPredCond) = + *shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI); + + LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB); + + IRBuilder<> Builder(PBI); + // The builder is used to create instructions to eliminate the branch in BB. + // If BB's terminator has !annotation metadata, add it to the new + // instructions. + Builder.CollectMetadataToCopy(BB->getTerminator(), + {LLVMContext::MD_annotation}); + + // If we need to invert the condition in the pred block to match, do so now. + if (InvertPredCond) { + Value *NewCond = PBI->getCondition(); + if (NewCond->hasOneUse() && isa<CmpInst>(NewCond)) { + CmpInst *CI = cast<CmpInst>(NewCond); + CI->setPredicate(CI->getInversePredicate()); + } else { + NewCond = + Builder.CreateNot(NewCond, PBI->getCondition()->getName() + ".not"); + } + + PBI->setCondition(NewCond); + PBI->swapSuccessors(); + } + + BasicBlock *UniqueSucc = + PBI->getSuccessor(0) == BB ? BI->getSuccessor(0) : BI->getSuccessor(1); + + // Before cloning instructions, notify the successor basic block that it + // is about to have a new predecessor. This will update PHI nodes, + // which will allow us to update live-out uses of bonus instructions. + AddPredecessorToBlock(UniqueSucc, PredBlock, BB, MSSAU); + + // Try to update branch weights. + uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight; + if (extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight, + SuccTrueWeight, SuccFalseWeight)) { + SmallVector<uint64_t, 8> NewWeights; + + if (PBI->getSuccessor(0) == BB) { + // PBI: br i1 %x, BB, FalseDest + // BI: br i1 %y, UniqueSucc, FalseDest + // TrueWeight is TrueWeight for PBI * TrueWeight for BI. + NewWeights.push_back(PredTrueWeight * SuccTrueWeight); + // FalseWeight is FalseWeight for PBI * TotalWeight for BI + + // TrueWeight for PBI * FalseWeight for BI. + // We assume that total weights of a BranchInst can fit into 32 bits. + // Therefore, we will not have overflow using 64-bit arithmetic. + NewWeights.push_back(PredFalseWeight * + (SuccFalseWeight + SuccTrueWeight) + + PredTrueWeight * SuccFalseWeight); + } else { + // PBI: br i1 %x, TrueDest, BB + // BI: br i1 %y, TrueDest, UniqueSucc + // TrueWeight is TrueWeight for PBI * TotalWeight for BI + + // FalseWeight for PBI * TrueWeight for BI. + NewWeights.push_back(PredTrueWeight * (SuccFalseWeight + SuccTrueWeight) + + PredFalseWeight * SuccTrueWeight); + // FalseWeight is FalseWeight for PBI * FalseWeight for BI. + NewWeights.push_back(PredFalseWeight * SuccFalseWeight); + } + + // Halve the weights if any of them cannot fit in an uint32_t + FitWeights(NewWeights); + + SmallVector<uint32_t, 8> MDWeights(NewWeights.begin(), NewWeights.end()); + setBranchWeights(PBI, MDWeights[0], MDWeights[1]); + + // TODO: If BB is reachable from all paths through PredBlock, then we + // could replace PBI's branch probabilities with BI's. + } else + PBI->setMetadata(LLVMContext::MD_prof, nullptr); + + // Now, update the CFG. + PBI->setSuccessor(PBI->getSuccessor(0) != BB, UniqueSucc); + + if (DTU) + DTU->applyUpdates({{DominatorTree::Insert, PredBlock, UniqueSucc}, + {DominatorTree::Delete, PredBlock, BB}}); + + // If BI was a loop latch, it may have had associated loop metadata. + // We need to copy it to the new latch, that is, PBI. + if (MDNode *LoopMD = BI->getMetadata(LLVMContext::MD_loop)) + PBI->setMetadata(LLVMContext::MD_loop, LoopMD); + + ValueToValueMapTy VMap; // maps original values to cloned values + CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(BB, PredBlock, VMap); + + // Now that the Cond was cloned into the predecessor basic block, + // or/and the two conditions together. + Value *BICond = VMap[BI->getCondition()]; + PBI->setCondition( + createLogicalOp(Builder, Opc, PBI->getCondition(), BICond, "or.cond")); + + // Copy any debug value intrinsics into the end of PredBlock. + for (Instruction &I : *BB) { + if (isa<DbgInfoIntrinsic>(I)) { + Instruction *NewI = I.clone(); + RemapInstruction(NewI, VMap, + RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); + NewI->insertBefore(PBI); + } + } + + ++NumFoldBranchToCommonDest; + return true; +} + +/// Return if an instruction's type or any of its operands' types are a vector +/// type. +static bool isVectorOp(Instruction &I) { + return I.getType()->isVectorTy() || any_of(I.operands(), [](Use &U) { + return U->getType()->isVectorTy(); + }); +} + +/// If this basic block is simple enough, and if a predecessor branches to us +/// and one of our successors, fold the block into the predecessor and use +/// logical operations to pick the right destination. +bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU, + MemorySSAUpdater *MSSAU, + const TargetTransformInfo *TTI, + unsigned BonusInstThreshold) { + // If this block ends with an unconditional branch, + // let SpeculativelyExecuteBB() deal with it. + if (!BI->isConditional()) + return false; + + BasicBlock *BB = BI->getParent(); + TargetTransformInfo::TargetCostKind CostKind = + BB->getParent()->hasMinSize() ? TargetTransformInfo::TCK_CodeSize + : TargetTransformInfo::TCK_SizeAndLatency; + + Instruction *Cond = dyn_cast<Instruction>(BI->getCondition()); + + if (!Cond || + (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond) && + !isa<SelectInst>(Cond)) || + Cond->getParent() != BB || !Cond->hasOneUse()) + return false; + + // Finally, don't infinitely unroll conditional loops. + if (is_contained(successors(BB), BB)) + return false; + + // With which predecessors will we want to deal with? + SmallVector<BasicBlock *, 8> Preds; + for (BasicBlock *PredBlock : predecessors(BB)) { + BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator()); + + // Check that we have two conditional branches. If there is a PHI node in + // the common successor, verify that the same value flows in from both + // blocks. + if (!PBI || PBI->isUnconditional() || !SafeToMergeTerminators(BI, PBI)) + continue; + + // Determine if the two branches share a common destination. + BasicBlock *CommonSucc; + Instruction::BinaryOps Opc; + bool InvertPredCond; + if (auto Recipe = shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI)) + std::tie(CommonSucc, Opc, InvertPredCond) = *Recipe; + else + continue; + + // Check the cost of inserting the necessary logic before performing the + // transformation. + if (TTI) { + Type *Ty = BI->getCondition()->getType(); + InstructionCost Cost = TTI->getArithmeticInstrCost(Opc, Ty, CostKind); + if (InvertPredCond && (!PBI->getCondition()->hasOneUse() || + !isa<CmpInst>(PBI->getCondition()))) + Cost += TTI->getArithmeticInstrCost(Instruction::Xor, Ty, CostKind); + + if (Cost > BranchFoldThreshold) + continue; + } + + // Ok, we do want to deal with this predecessor. Record it. + Preds.emplace_back(PredBlock); + } + + // If there aren't any predecessors into which we can fold, + // don't bother checking the cost. + if (Preds.empty()) + return false; + + // Only allow this transformation if computing the condition doesn't involve + // too many instructions and these involved instructions can be executed + // unconditionally. We denote all involved instructions except the condition + // as "bonus instructions", and only allow this transformation when the + // number of the bonus instructions we'll need to create when cloning into + // each predecessor does not exceed a certain threshold. + unsigned NumBonusInsts = 0; + bool SawVectorOp = false; + const unsigned PredCount = Preds.size(); + for (Instruction &I : *BB) { + // Don't check the branch condition comparison itself. + if (&I == Cond) + continue; + // Ignore dbg intrinsics, and the terminator. + if (isa<DbgInfoIntrinsic>(I) || isa<BranchInst>(I)) + continue; + // I must be safe to execute unconditionally. + if (!isSafeToSpeculativelyExecute(&I)) + return false; + SawVectorOp |= isVectorOp(I); + + // Account for the cost of duplicating this instruction into each + // predecessor. Ignore free instructions. + if (!TTI || TTI->getInstructionCost(&I, CostKind) != + TargetTransformInfo::TCC_Free) { + NumBonusInsts += PredCount; + + // Early exits once we reach the limit. + if (NumBonusInsts > + BonusInstThreshold * BranchFoldToCommonDestVectorMultiplier) + return false; + } + + auto IsBCSSAUse = [BB, &I](Use &U) { + auto *UI = cast<Instruction>(U.getUser()); + if (auto *PN = dyn_cast<PHINode>(UI)) + return PN->getIncomingBlock(U) == BB; + return UI->getParent() == BB && I.comesBefore(UI); + }; + + // Does this instruction require rewriting of uses? + if (!all_of(I.uses(), IsBCSSAUse)) + return false; + } + if (NumBonusInsts > + BonusInstThreshold * + (SawVectorOp ? BranchFoldToCommonDestVectorMultiplier : 1)) + return false; + + // Ok, we have the budget. Perform the transformation. + for (BasicBlock *PredBlock : Preds) { + auto *PBI = cast<BranchInst>(PredBlock->getTerminator()); + return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, TTI); + } + return false; +} + +// If there is only one store in BB1 and BB2, return it, otherwise return +// nullptr. +static StoreInst *findUniqueStoreInBlocks(BasicBlock *BB1, BasicBlock *BB2) { + StoreInst *S = nullptr; + for (auto *BB : {BB1, BB2}) { + if (!BB) + continue; + for (auto &I : *BB) + if (auto *SI = dyn_cast<StoreInst>(&I)) { + if (S) + // Multiple stores seen. + return nullptr; + else + S = SI; + } + } + return S; +} + +static Value *ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB, + Value *AlternativeV = nullptr) { + // PHI is going to be a PHI node that allows the value V that is defined in + // BB to be referenced in BB's only successor. + // + // If AlternativeV is nullptr, the only value we care about in PHI is V. It + // doesn't matter to us what the other operand is (it'll never get used). We + // could just create a new PHI with an undef incoming value, but that could + // increase register pressure if EarlyCSE/InstCombine can't fold it with some + // other PHI. So here we directly look for some PHI in BB's successor with V + // as an incoming operand. If we find one, we use it, else we create a new + // one. + // + // If AlternativeV is not nullptr, we care about both incoming values in PHI. + // PHI must be exactly: phi <ty> [ %BB, %V ], [ %OtherBB, %AlternativeV] + // where OtherBB is the single other predecessor of BB's only successor. + PHINode *PHI = nullptr; + BasicBlock *Succ = BB->getSingleSuccessor(); + + for (auto I = Succ->begin(); isa<PHINode>(I); ++I) + if (cast<PHINode>(I)->getIncomingValueForBlock(BB) == V) { + PHI = cast<PHINode>(I); + if (!AlternativeV) + break; + + assert(Succ->hasNPredecessors(2)); + auto PredI = pred_begin(Succ); + BasicBlock *OtherPredBB = *PredI == BB ? *++PredI : *PredI; + if (PHI->getIncomingValueForBlock(OtherPredBB) == AlternativeV) + break; + PHI = nullptr; + } + if (PHI) + return PHI; + + // If V is not an instruction defined in BB, just return it. + if (!AlternativeV && + (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != BB)) + return V; + + PHI = PHINode::Create(V->getType(), 2, "simplifycfg.merge", &Succ->front()); + PHI->addIncoming(V, BB); + for (BasicBlock *PredBB : predecessors(Succ)) + if (PredBB != BB) + PHI->addIncoming( + AlternativeV ? AlternativeV : UndefValue::get(V->getType()), PredBB); + return PHI; +} + +static bool mergeConditionalStoreToAddress( + BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB, + BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond, + DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) { + // For every pointer, there must be exactly two stores, one coming from + // PTB or PFB, and the other from QTB or QFB. We don't support more than one + // store (to any address) in PTB,PFB or QTB,QFB. + // FIXME: We could relax this restriction with a bit more work and performance + // testing. + StoreInst *PStore = findUniqueStoreInBlocks(PTB, PFB); + StoreInst *QStore = findUniqueStoreInBlocks(QTB, QFB); + if (!PStore || !QStore) + return false; + + // Now check the stores are compatible. + if (!QStore->isUnordered() || !PStore->isUnordered() || + PStore->getValueOperand()->getType() != + QStore->getValueOperand()->getType()) + return false; + + // Check that sinking the store won't cause program behavior changes. Sinking + // the store out of the Q blocks won't change any behavior as we're sinking + // from a block to its unconditional successor. But we're moving a store from + // the P blocks down through the middle block (QBI) and past both QFB and QTB. + // So we need to check that there are no aliasing loads or stores in + // QBI, QTB and QFB. We also need to check there are no conflicting memory + // operations between PStore and the end of its parent block. + // + // The ideal way to do this is to query AliasAnalysis, but we don't + // preserve AA currently so that is dangerous. Be super safe and just + // check there are no other memory operations at all. + for (auto &I : *QFB->getSinglePredecessor()) + if (I.mayReadOrWriteMemory()) + return false; + for (auto &I : *QFB) + if (&I != QStore && I.mayReadOrWriteMemory()) + return false; + if (QTB) + for (auto &I : *QTB) + if (&I != QStore && I.mayReadOrWriteMemory()) + return false; + for (auto I = BasicBlock::iterator(PStore), E = PStore->getParent()->end(); + I != E; ++I) + if (&*I != PStore && I->mayReadOrWriteMemory()) + return false; + + // If we're not in aggressive mode, we only optimize if we have some + // confidence that by optimizing we'll allow P and/or Q to be if-converted. + auto IsWorthwhile = [&](BasicBlock *BB, ArrayRef<StoreInst *> FreeStores) { + if (!BB) + return true; + // Heuristic: if the block can be if-converted/phi-folded and the + // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to + // thread this store. + InstructionCost Cost = 0; + InstructionCost Budget = + PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic; + for (auto &I : BB->instructionsWithoutDebug(false)) { + // Consider terminator instruction to be free. + if (I.isTerminator()) + continue; + // If this is one the stores that we want to speculate out of this BB, + // then don't count it's cost, consider it to be free. + if (auto *S = dyn_cast<StoreInst>(&I)) + if (llvm::find(FreeStores, S)) + continue; + // Else, we have a white-list of instructions that we are ak speculating. + if (!isa<BinaryOperator>(I) && !isa<GetElementPtrInst>(I)) + return false; // Not in white-list - not worthwhile folding. + // And finally, if this is a non-free instruction that we are okay + // speculating, ensure that we consider the speculation budget. + Cost += + TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency); + if (Cost > Budget) + return false; // Eagerly refuse to fold as soon as we're out of budget. + } + assert(Cost <= Budget && + "When we run out of budget we will eagerly return from within the " + "per-instruction loop."); + return true; + }; + + const std::array<StoreInst *, 2> FreeStores = {PStore, QStore}; + if (!MergeCondStoresAggressively && + (!IsWorthwhile(PTB, FreeStores) || !IsWorthwhile(PFB, FreeStores) || + !IsWorthwhile(QTB, FreeStores) || !IsWorthwhile(QFB, FreeStores))) + return false; + + // If PostBB has more than two predecessors, we need to split it so we can + // sink the store. + if (std::next(pred_begin(PostBB), 2) != pred_end(PostBB)) { + // We know that QFB's only successor is PostBB. And QFB has a single + // predecessor. If QTB exists, then its only successor is also PostBB. + // If QTB does not exist, then QFB's only predecessor has a conditional + // branch to QFB and PostBB. + BasicBlock *TruePred = QTB ? QTB : QFB->getSinglePredecessor(); + BasicBlock *NewBB = + SplitBlockPredecessors(PostBB, {QFB, TruePred}, "condstore.split", DTU); + if (!NewBB) + return false; + PostBB = NewBB; + } + + // OK, we're going to sink the stores to PostBB. The store has to be + // conditional though, so first create the predicate. + Value *PCond = cast<BranchInst>(PFB->getSinglePredecessor()->getTerminator()) + ->getCondition(); + Value *QCond = cast<BranchInst>(QFB->getSinglePredecessor()->getTerminator()) + ->getCondition(); + + Value *PPHI = ensureValueAvailableInSuccessor(PStore->getValueOperand(), + PStore->getParent()); + Value *QPHI = ensureValueAvailableInSuccessor(QStore->getValueOperand(), + QStore->getParent(), PPHI); + + IRBuilder<> QB(&*PostBB->getFirstInsertionPt()); + + Value *PPred = PStore->getParent() == PTB ? PCond : QB.CreateNot(PCond); + Value *QPred = QStore->getParent() == QTB ? QCond : QB.CreateNot(QCond); + + if (InvertPCond) + PPred = QB.CreateNot(PPred); + if (InvertQCond) + QPred = QB.CreateNot(QPred); + Value *CombinedPred = QB.CreateOr(PPred, QPred); + + auto *T = SplitBlockAndInsertIfThen(CombinedPred, &*QB.GetInsertPoint(), + /*Unreachable=*/false, + /*BranchWeights=*/nullptr, DTU); + QB.SetInsertPoint(T); + StoreInst *SI = cast<StoreInst>(QB.CreateStore(QPHI, Address)); + SI->setAAMetadata(PStore->getAAMetadata().merge(QStore->getAAMetadata())); + // Choose the minimum alignment. If we could prove both stores execute, we + // could use biggest one. In this case, though, we only know that one of the + // stores executes. And we don't know it's safe to take the alignment from a + // store that doesn't execute. + SI->setAlignment(std::min(PStore->getAlign(), QStore->getAlign())); + + QStore->eraseFromParent(); + PStore->eraseFromParent(); + + return true; +} + +static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI, + DomTreeUpdater *DTU, const DataLayout &DL, + const TargetTransformInfo &TTI) { + // The intention here is to find diamonds or triangles (see below) where each + // conditional block contains a store to the same address. Both of these + // stores are conditional, so they can't be unconditionally sunk. But it may + // be profitable to speculatively sink the stores into one merged store at the + // end, and predicate the merged store on the union of the two conditions of + // PBI and QBI. + // + // This can reduce the number of stores executed if both of the conditions are + // true, and can allow the blocks to become small enough to be if-converted. + // This optimization will also chain, so that ladders of test-and-set + // sequences can be if-converted away. + // + // We only deal with simple diamonds or triangles: + // + // PBI or PBI or a combination of the two + // / \ | \ + // PTB PFB | PFB + // \ / | / + // QBI QBI + // / \ | \ + // QTB QFB | QFB + // \ / | / + // PostBB PostBB + // + // We model triangles as a type of diamond with a nullptr "true" block. + // Triangles are canonicalized so that the fallthrough edge is represented by + // a true condition, as in the diagram above. + BasicBlock *PTB = PBI->getSuccessor(0); + BasicBlock *PFB = PBI->getSuccessor(1); + BasicBlock *QTB = QBI->getSuccessor(0); + BasicBlock *QFB = QBI->getSuccessor(1); + BasicBlock *PostBB = QFB->getSingleSuccessor(); + + // Make sure we have a good guess for PostBB. If QTB's only successor is + // QFB, then QFB is a better PostBB. + if (QTB->getSingleSuccessor() == QFB) + PostBB = QFB; + + // If we couldn't find a good PostBB, stop. + if (!PostBB) + return false; + + bool InvertPCond = false, InvertQCond = false; + // Canonicalize fallthroughs to the true branches. + if (PFB == QBI->getParent()) { + std::swap(PFB, PTB); + InvertPCond = true; + } + if (QFB == PostBB) { + std::swap(QFB, QTB); + InvertQCond = true; + } + + // From this point on we can assume PTB or QTB may be fallthroughs but PFB + // and QFB may not. Model fallthroughs as a nullptr block. + if (PTB == QBI->getParent()) + PTB = nullptr; + if (QTB == PostBB) + QTB = nullptr; + + // Legality bailouts. We must have at least the non-fallthrough blocks and + // the post-dominating block, and the non-fallthroughs must only have one + // predecessor. + auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) { + return BB->getSinglePredecessor() == P && BB->getSingleSuccessor() == S; + }; + if (!HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) || + !HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB)) + return false; + if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) || + (QTB && !HasOnePredAndOneSucc(QTB, QBI->getParent(), PostBB))) + return false; + if (!QBI->getParent()->hasNUses(2)) + return false; + + // OK, this is a sequence of two diamonds or triangles. + // Check if there are stores in PTB or PFB that are repeated in QTB or QFB. + SmallPtrSet<Value *, 4> PStoreAddresses, QStoreAddresses; + for (auto *BB : {PTB, PFB}) { + if (!BB) + continue; + for (auto &I : *BB) + if (StoreInst *SI = dyn_cast<StoreInst>(&I)) + PStoreAddresses.insert(SI->getPointerOperand()); + } + for (auto *BB : {QTB, QFB}) { + if (!BB) + continue; + for (auto &I : *BB) + if (StoreInst *SI = dyn_cast<StoreInst>(&I)) + QStoreAddresses.insert(SI->getPointerOperand()); + } + + set_intersect(PStoreAddresses, QStoreAddresses); + // set_intersect mutates PStoreAddresses in place. Rename it here to make it + // clear what it contains. + auto &CommonAddresses = PStoreAddresses; + + bool Changed = false; + for (auto *Address : CommonAddresses) + Changed |= + mergeConditionalStoreToAddress(PTB, PFB, QTB, QFB, PostBB, Address, + InvertPCond, InvertQCond, DTU, DL, TTI); + return Changed; +} + +/// If the previous block ended with a widenable branch, determine if reusing +/// the target block is profitable and legal. This will have the effect of +/// "widening" PBI, but doesn't require us to reason about hosting safety. +static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, + DomTreeUpdater *DTU) { + // TODO: This can be generalized in two important ways: + // 1) We can allow phi nodes in IfFalseBB and simply reuse all the input + // values from the PBI edge. + // 2) We can sink side effecting instructions into BI's fallthrough + // successor provided they doesn't contribute to computation of + // BI's condition. + Value *CondWB, *WC; + BasicBlock *IfTrueBB, *IfFalseBB; + if (!parseWidenableBranch(PBI, CondWB, WC, IfTrueBB, IfFalseBB) || + IfTrueBB != BI->getParent() || !BI->getParent()->getSinglePredecessor()) + return false; + if (!IfFalseBB->phis().empty()) + return false; // TODO + // This helps avoid infinite loop with SimplifyCondBranchToCondBranch which + // may undo the transform done here. + // TODO: There might be a more fine-grained solution to this. + if (!llvm::succ_empty(IfFalseBB)) + return false; + // Use lambda to lazily compute expensive condition after cheap ones. + auto NoSideEffects = [](BasicBlock &BB) { + return llvm::none_of(BB, [](const Instruction &I) { + return I.mayWriteToMemory() || I.mayHaveSideEffects(); + }); + }; + if (BI->getSuccessor(1) != IfFalseBB && // no inf looping + BI->getSuccessor(1)->getTerminatingDeoptimizeCall() && // profitability + NoSideEffects(*BI->getParent())) { + auto *OldSuccessor = BI->getSuccessor(1); + OldSuccessor->removePredecessor(BI->getParent()); + BI->setSuccessor(1, IfFalseBB); + if (DTU) + DTU->applyUpdates( + {{DominatorTree::Insert, BI->getParent(), IfFalseBB}, + {DominatorTree::Delete, BI->getParent(), OldSuccessor}}); + return true; + } + if (BI->getSuccessor(0) != IfFalseBB && // no inf looping + BI->getSuccessor(0)->getTerminatingDeoptimizeCall() && // profitability + NoSideEffects(*BI->getParent())) { + auto *OldSuccessor = BI->getSuccessor(0); + OldSuccessor->removePredecessor(BI->getParent()); + BI->setSuccessor(0, IfFalseBB); + if (DTU) + DTU->applyUpdates( + {{DominatorTree::Insert, BI->getParent(), IfFalseBB}, + {DominatorTree::Delete, BI->getParent(), OldSuccessor}}); + return true; + } + return false; +} + +/// If we have a conditional branch as a predecessor of another block, +/// this function tries to simplify it. We know +/// that PBI and BI are both conditional branches, and BI is in one of the +/// successor blocks of PBI - PBI branches to BI. +static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, + DomTreeUpdater *DTU, + const DataLayout &DL, + const TargetTransformInfo &TTI) { + assert(PBI->isConditional() && BI->isConditional()); + BasicBlock *BB = BI->getParent(); + + // If this block ends with a branch instruction, and if there is a + // predecessor that ends on a branch of the same condition, make + // this conditional branch redundant. + if (PBI->getCondition() == BI->getCondition() && + PBI->getSuccessor(0) != PBI->getSuccessor(1)) { + // Okay, the outcome of this conditional branch is statically + // knowable. If this block had a single pred, handle specially, otherwise + // FoldCondBranchOnValueKnownInPredecessor() will handle it. + if (BB->getSinglePredecessor()) { + // Turn this into a branch on constant. + bool CondIsTrue = PBI->getSuccessor(0) == BB; + BI->setCondition( + ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue)); + return true; // Nuke the branch on constant. + } + } + + // If the previous block ended with a widenable branch, determine if reusing + // the target block is profitable and legal. This will have the effect of + // "widening" PBI, but doesn't require us to reason about hosting safety. + if (tryWidenCondBranchToCondBranch(PBI, BI, DTU)) + return true; + + // If both branches are conditional and both contain stores to the same + // address, remove the stores from the conditionals and create a conditional + // merged store at the end. + if (MergeCondStores && mergeConditionalStores(PBI, BI, DTU, DL, TTI)) + return true; + + // If this is a conditional branch in an empty block, and if any + // predecessors are a conditional branch to one of our destinations, + // fold the conditions into logical ops and one cond br. + + // Ignore dbg intrinsics. + if (&*BB->instructionsWithoutDebug(false).begin() != BI) + return false; + + int PBIOp, BIOp; + if (PBI->getSuccessor(0) == BI->getSuccessor(0)) { + PBIOp = 0; + BIOp = 0; + } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) { + PBIOp = 0; + BIOp = 1; + } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) { + PBIOp = 1; + BIOp = 0; + } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) { + PBIOp = 1; + BIOp = 1; + } else { + return false; + } + + // Check to make sure that the other destination of this branch + // isn't BB itself. If so, this is an infinite loop that will + // keep getting unwound. + if (PBI->getSuccessor(PBIOp) == BB) + return false; + + // Do not perform this transformation if it would require + // insertion of a large number of select instructions. For targets + // without predication/cmovs, this is a big pessimization. + + BasicBlock *CommonDest = PBI->getSuccessor(PBIOp); + BasicBlock *RemovedDest = PBI->getSuccessor(PBIOp ^ 1); + unsigned NumPhis = 0; + for (BasicBlock::iterator II = CommonDest->begin(); isa<PHINode>(II); + ++II, ++NumPhis) { + if (NumPhis > 2) // Disable this xform. + return false; + } + + // Finally, if everything is ok, fold the branches to logical ops. + BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1); + + LLVM_DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent() + << "AND: " << *BI->getParent()); + + SmallVector<DominatorTree::UpdateType, 5> Updates; + + // If OtherDest *is* BB, then BB is a basic block with a single conditional + // branch in it, where one edge (OtherDest) goes back to itself but the other + // exits. We don't *know* that the program avoids the infinite loop + // (even though that seems likely). If we do this xform naively, we'll end up + // recursively unpeeling the loop. Since we know that (after the xform is + // done) that the block *is* infinite if reached, we just make it an obviously + // infinite loop with no cond branch. + if (OtherDest == BB) { + // Insert it at the end of the function, because it's either code, + // or it won't matter if it's hot. :) + BasicBlock *InfLoopBlock = + BasicBlock::Create(BB->getContext(), "infloop", BB->getParent()); + BranchInst::Create(InfLoopBlock, InfLoopBlock); + if (DTU) + Updates.push_back({DominatorTree::Insert, InfLoopBlock, InfLoopBlock}); + OtherDest = InfLoopBlock; + } + + LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent()); + + // BI may have other predecessors. Because of this, we leave + // it alone, but modify PBI. + + // Make sure we get to CommonDest on True&True directions. + Value *PBICond = PBI->getCondition(); + IRBuilder<NoFolder> Builder(PBI); + if (PBIOp) + PBICond = Builder.CreateNot(PBICond, PBICond->getName() + ".not"); + + Value *BICond = BI->getCondition(); + if (BIOp) + BICond = Builder.CreateNot(BICond, BICond->getName() + ".not"); + + // Merge the conditions. + Value *Cond = + createLogicalOp(Builder, Instruction::Or, PBICond, BICond, "brmerge"); + + // Modify PBI to branch on the new condition to the new dests. + PBI->setCondition(Cond); + PBI->setSuccessor(0, CommonDest); + PBI->setSuccessor(1, OtherDest); + + if (DTU) { + Updates.push_back({DominatorTree::Insert, PBI->getParent(), OtherDest}); + Updates.push_back({DominatorTree::Delete, PBI->getParent(), RemovedDest}); + + DTU->applyUpdates(Updates); + } + + // Update branch weight for PBI. + uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight; + uint64_t PredCommon, PredOther, SuccCommon, SuccOther; + bool HasWeights = + extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight, + SuccTrueWeight, SuccFalseWeight); + if (HasWeights) { + PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight; + PredOther = PBIOp ? PredTrueWeight : PredFalseWeight; + SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight; + SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight; + // The weight to CommonDest should be PredCommon * SuccTotal + + // PredOther * SuccCommon. + // The weight to OtherDest should be PredOther * SuccOther. + uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther) + + PredOther * SuccCommon, + PredOther * SuccOther}; + // Halve the weights if any of them cannot fit in an uint32_t + FitWeights(NewWeights); + + setBranchWeights(PBI, NewWeights[0], NewWeights[1]); + } + + // OtherDest may have phi nodes. If so, add an entry from PBI's + // block that are identical to the entries for BI's block. + AddPredecessorToBlock(OtherDest, PBI->getParent(), BB); + + // We know that the CommonDest already had an edge from PBI to + // it. If it has PHIs though, the PHIs may have different + // entries for BB and PBI's BB. If so, insert a select to make + // them agree. + for (PHINode &PN : CommonDest->phis()) { + Value *BIV = PN.getIncomingValueForBlock(BB); + unsigned PBBIdx = PN.getBasicBlockIndex(PBI->getParent()); + Value *PBIV = PN.getIncomingValue(PBBIdx); + if (BIV != PBIV) { + // Insert a select in PBI to pick the right value. + SelectInst *NV = cast<SelectInst>( + Builder.CreateSelect(PBICond, PBIV, BIV, PBIV->getName() + ".mux")); + PN.setIncomingValue(PBBIdx, NV); + // Although the select has the same condition as PBI, the original branch + // weights for PBI do not apply to the new select because the select's + // 'logical' edges are incoming edges of the phi that is eliminated, not + // the outgoing edges of PBI. + if (HasWeights) { + uint64_t PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight; + uint64_t PredOther = PBIOp ? PredTrueWeight : PredFalseWeight; + uint64_t SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight; + uint64_t SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight; + // The weight to PredCommonDest should be PredCommon * SuccTotal. + // The weight to PredOtherDest should be PredOther * SuccCommon. + uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther), + PredOther * SuccCommon}; + + FitWeights(NewWeights); + + setBranchWeights(NV, NewWeights[0], NewWeights[1]); + } + } + } + + LLVM_DEBUG(dbgs() << "INTO: " << *PBI->getParent()); + LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent()); + + // This basic block is probably dead. We know it has at least + // one fewer predecessor. + return true; +} + +// Simplifies a terminator by replacing it with a branch to TrueBB if Cond is +// true or to FalseBB if Cond is false. +// Takes care of updating the successors and removing the old terminator. +// Also makes sure not to introduce new successors by assuming that edges to +// non-successor TrueBBs and FalseBBs aren't reachable. +bool SimplifyCFGOpt::SimplifyTerminatorOnSelect(Instruction *OldTerm, + Value *Cond, BasicBlock *TrueBB, + BasicBlock *FalseBB, + uint32_t TrueWeight, + uint32_t FalseWeight) { + auto *BB = OldTerm->getParent(); + // Remove any superfluous successor edges from the CFG. + // First, figure out which successors to preserve. + // If TrueBB and FalseBB are equal, only try to preserve one copy of that + // successor. + BasicBlock *KeepEdge1 = TrueBB; + BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr; + + SmallSetVector<BasicBlock *, 2> RemovedSuccessors; + + // Then remove the rest. + for (BasicBlock *Succ : successors(OldTerm)) { + // Make sure only to keep exactly one copy of each edge. + if (Succ == KeepEdge1) + KeepEdge1 = nullptr; + else if (Succ == KeepEdge2) + KeepEdge2 = nullptr; + else { + Succ->removePredecessor(BB, + /*KeepOneInputPHIs=*/true); + + if (Succ != TrueBB && Succ != FalseBB) + RemovedSuccessors.insert(Succ); + } + } + + IRBuilder<> Builder(OldTerm); + Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc()); + + // Insert an appropriate new terminator. + if (!KeepEdge1 && !KeepEdge2) { + if (TrueBB == FalseBB) { + // We were only looking for one successor, and it was present. + // Create an unconditional branch to it. + Builder.CreateBr(TrueBB); + } else { + // We found both of the successors we were looking for. + // Create a conditional branch sharing the condition of the select. + BranchInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB); + if (TrueWeight != FalseWeight) + setBranchWeights(NewBI, TrueWeight, FalseWeight); + } + } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) { + // Neither of the selected blocks were successors, so this + // terminator must be unreachable. + new UnreachableInst(OldTerm->getContext(), OldTerm); + } else { + // One of the selected values was a successor, but the other wasn't. + // Insert an unconditional branch to the one that was found; + // the edge to the one that wasn't must be unreachable. + if (!KeepEdge1) { + // Only TrueBB was found. + Builder.CreateBr(TrueBB); + } else { + // Only FalseBB was found. + Builder.CreateBr(FalseBB); + } + } + + EraseTerminatorAndDCECond(OldTerm); + + if (DTU) { + SmallVector<DominatorTree::UpdateType, 2> Updates; + Updates.reserve(RemovedSuccessors.size()); + for (auto *RemovedSuccessor : RemovedSuccessors) + Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor}); + DTU->applyUpdates(Updates); + } + + return true; +} + +// Replaces +// (switch (select cond, X, Y)) on constant X, Y +// with a branch - conditional if X and Y lead to distinct BBs, +// unconditional otherwise. +bool SimplifyCFGOpt::SimplifySwitchOnSelect(SwitchInst *SI, + SelectInst *Select) { + // Check for constant integer values in the select. + ConstantInt *TrueVal = dyn_cast<ConstantInt>(Select->getTrueValue()); + ConstantInt *FalseVal = dyn_cast<ConstantInt>(Select->getFalseValue()); + if (!TrueVal || !FalseVal) + return false; + + // Find the relevant condition and destinations. + Value *Condition = Select->getCondition(); + BasicBlock *TrueBB = SI->findCaseValue(TrueVal)->getCaseSuccessor(); + BasicBlock *FalseBB = SI->findCaseValue(FalseVal)->getCaseSuccessor(); + + // Get weight for TrueBB and FalseBB. + uint32_t TrueWeight = 0, FalseWeight = 0; + SmallVector<uint64_t, 8> Weights; + bool HasWeights = hasBranchWeightMD(*SI); + if (HasWeights) { + GetBranchWeights(SI, Weights); + if (Weights.size() == 1 + SI->getNumCases()) { + TrueWeight = + (uint32_t)Weights[SI->findCaseValue(TrueVal)->getSuccessorIndex()]; + FalseWeight = + (uint32_t)Weights[SI->findCaseValue(FalseVal)->getSuccessorIndex()]; + } + } + + // Perform the actual simplification. + return SimplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB, TrueWeight, + FalseWeight); +} + +// Replaces +// (indirectbr (select cond, blockaddress(@fn, BlockA), +// blockaddress(@fn, BlockB))) +// with +// (br cond, BlockA, BlockB). +bool SimplifyCFGOpt::SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, + SelectInst *SI) { + // Check that both operands of the select are block addresses. + BlockAddress *TBA = dyn_cast<BlockAddress>(SI->getTrueValue()); + BlockAddress *FBA = dyn_cast<BlockAddress>(SI->getFalseValue()); + if (!TBA || !FBA) + return false; + + // Extract the actual blocks. + BasicBlock *TrueBB = TBA->getBasicBlock(); + BasicBlock *FalseBB = FBA->getBasicBlock(); + + // Perform the actual simplification. + return SimplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB, 0, + 0); +} + +/// This is called when we find an icmp instruction +/// (a seteq/setne with a constant) as the only instruction in a +/// block that ends with an uncond branch. We are looking for a very specific +/// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified. In +/// this case, we merge the first two "or's of icmp" into a switch, but then the +/// default value goes to an uncond block with a seteq in it, we get something +/// like: +/// +/// switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ] +/// DEFAULT: +/// %tmp = icmp eq i8 %A, 92 +/// br label %end +/// end: +/// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ] +/// +/// We prefer to split the edge to 'end' so that there is a true/false entry to +/// the PHI, merging the third icmp into the switch. +bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt( + ICmpInst *ICI, IRBuilder<> &Builder) { + BasicBlock *BB = ICI->getParent(); + + // If the block has any PHIs in it or the icmp has multiple uses, it is too + // complex. + if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse()) + return false; + + Value *V = ICI->getOperand(0); + ConstantInt *Cst = cast<ConstantInt>(ICI->getOperand(1)); + + // The pattern we're looking for is where our only predecessor is a switch on + // 'V' and this block is the default case for the switch. In this case we can + // fold the compared value into the switch to simplify things. + BasicBlock *Pred = BB->getSinglePredecessor(); + if (!Pred || !isa<SwitchInst>(Pred->getTerminator())) + return false; + + SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator()); + if (SI->getCondition() != V) + return false; + + // If BB is reachable on a non-default case, then we simply know the value of + // V in this block. Substitute it and constant fold the icmp instruction + // away. + if (SI->getDefaultDest() != BB) { + ConstantInt *VVal = SI->findCaseDest(BB); + assert(VVal && "Should have a unique destination value"); + ICI->setOperand(0, VVal); + + if (Value *V = simplifyInstruction(ICI, {DL, ICI})) { + ICI->replaceAllUsesWith(V); + ICI->eraseFromParent(); + } + // BB is now empty, so it is likely to simplify away. + return requestResimplify(); + } + + // Ok, the block is reachable from the default dest. If the constant we're + // comparing exists in one of the other edges, then we can constant fold ICI + // and zap it. + if (SI->findCaseValue(Cst) != SI->case_default()) { + Value *V; + if (ICI->getPredicate() == ICmpInst::ICMP_EQ) + V = ConstantInt::getFalse(BB->getContext()); + else + V = ConstantInt::getTrue(BB->getContext()); + + ICI->replaceAllUsesWith(V); + ICI->eraseFromParent(); + // BB is now empty, so it is likely to simplify away. + return requestResimplify(); + } + + // The use of the icmp has to be in the 'end' block, by the only PHI node in + // the block. + BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0); + PHINode *PHIUse = dyn_cast<PHINode>(ICI->user_back()); + if (PHIUse == nullptr || PHIUse != &SuccBlock->front() || + isa<PHINode>(++BasicBlock::iterator(PHIUse))) + return false; + + // If the icmp is a SETEQ, then the default dest gets false, the new edge gets + // true in the PHI. + Constant *DefaultCst = ConstantInt::getTrue(BB->getContext()); + Constant *NewCst = ConstantInt::getFalse(BB->getContext()); + + if (ICI->getPredicate() == ICmpInst::ICMP_EQ) + std::swap(DefaultCst, NewCst); + + // Replace ICI (which is used by the PHI for the default value) with true or + // false depending on if it is EQ or NE. + ICI->replaceAllUsesWith(DefaultCst); + ICI->eraseFromParent(); + + SmallVector<DominatorTree::UpdateType, 2> Updates; + + // Okay, the switch goes to this block on a default value. Add an edge from + // the switch to the merge point on the compared value. + BasicBlock *NewBB = + BasicBlock::Create(BB->getContext(), "switch.edge", BB->getParent(), BB); + { + SwitchInstProfUpdateWrapper SIW(*SI); + auto W0 = SIW.getSuccessorWeight(0); + SwitchInstProfUpdateWrapper::CaseWeightOpt NewW; + if (W0) { + NewW = ((uint64_t(*W0) + 1) >> 1); + SIW.setSuccessorWeight(0, *NewW); + } + SIW.addCase(Cst, NewBB, NewW); + if (DTU) + Updates.push_back({DominatorTree::Insert, Pred, NewBB}); + } + + // NewBB branches to the phi block, add the uncond branch and the phi entry. + Builder.SetInsertPoint(NewBB); + Builder.SetCurrentDebugLocation(SI->getDebugLoc()); + Builder.CreateBr(SuccBlock); + PHIUse->addIncoming(NewCst, NewBB); + if (DTU) { + Updates.push_back({DominatorTree::Insert, NewBB, SuccBlock}); + DTU->applyUpdates(Updates); + } + return true; +} + +/// The specified branch is a conditional branch. +/// Check to see if it is branching on an or/and chain of icmp instructions, and +/// fold it into a switch instruction if so. +bool SimplifyCFGOpt::SimplifyBranchOnICmpChain(BranchInst *BI, + IRBuilder<> &Builder, + const DataLayout &DL) { + Instruction *Cond = dyn_cast<Instruction>(BI->getCondition()); + if (!Cond) + return false; + + // Change br (X == 0 | X == 1), T, F into a switch instruction. + // If this is a bunch of seteq's or'd together, or if it's a bunch of + // 'setne's and'ed together, collect them. + + // Try to gather values from a chain of and/or to be turned into a switch + ConstantComparesGatherer ConstantCompare(Cond, DL); + // Unpack the result + SmallVectorImpl<ConstantInt *> &Values = ConstantCompare.Vals; + Value *CompVal = ConstantCompare.CompValue; + unsigned UsedICmps = ConstantCompare.UsedICmps; + Value *ExtraCase = ConstantCompare.Extra; + + // If we didn't have a multiply compared value, fail. + if (!CompVal) + return false; + + // Avoid turning single icmps into a switch. + if (UsedICmps <= 1) + return false; + + bool TrueWhenEqual = match(Cond, m_LogicalOr(m_Value(), m_Value())); + + // There might be duplicate constants in the list, which the switch + // instruction can't handle, remove them now. + array_pod_sort(Values.begin(), Values.end(), ConstantIntSortPredicate); + Values.erase(std::unique(Values.begin(), Values.end()), Values.end()); + + // If Extra was used, we require at least two switch values to do the + // transformation. A switch with one value is just a conditional branch. + if (ExtraCase && Values.size() < 2) + return false; + + // TODO: Preserve branch weight metadata, similarly to how + // FoldValueComparisonIntoPredecessors preserves it. + + // Figure out which block is which destination. + BasicBlock *DefaultBB = BI->getSuccessor(1); + BasicBlock *EdgeBB = BI->getSuccessor(0); + if (!TrueWhenEqual) + std::swap(DefaultBB, EdgeBB); + + BasicBlock *BB = BI->getParent(); + + LLVM_DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size() + << " cases into SWITCH. BB is:\n" + << *BB); + + SmallVector<DominatorTree::UpdateType, 2> Updates; + + // If there are any extra values that couldn't be folded into the switch + // then we evaluate them with an explicit branch first. Split the block + // right before the condbr to handle it. + if (ExtraCase) { + BasicBlock *NewBB = SplitBlock(BB, BI, DTU, /*LI=*/nullptr, + /*MSSAU=*/nullptr, "switch.early.test"); + + // Remove the uncond branch added to the old block. + Instruction *OldTI = BB->getTerminator(); + Builder.SetInsertPoint(OldTI); + + // There can be an unintended UB if extra values are Poison. Before the + // transformation, extra values may not be evaluated according to the + // condition, and it will not raise UB. But after transformation, we are + // evaluating extra values before checking the condition, and it will raise + // UB. It can be solved by adding freeze instruction to extra values. + AssumptionCache *AC = Options.AC; + + if (!isGuaranteedNotToBeUndefOrPoison(ExtraCase, AC, BI, nullptr)) + ExtraCase = Builder.CreateFreeze(ExtraCase); + + if (TrueWhenEqual) + Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB); + else + Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB); + + OldTI->eraseFromParent(); + + if (DTU) + Updates.push_back({DominatorTree::Insert, BB, EdgeBB}); + + // If there are PHI nodes in EdgeBB, then we need to add a new entry to them + // for the edge we just added. + AddPredecessorToBlock(EdgeBB, BB, NewBB); + + LLVM_DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase + << "\nEXTRABB = " << *BB); + BB = NewBB; + } + + Builder.SetInsertPoint(BI); + // Convert pointer to int before we switch. + if (CompVal->getType()->isPointerTy()) { + CompVal = Builder.CreatePtrToInt( + CompVal, DL.getIntPtrType(CompVal->getType()), "magicptr"); + } + + // Create the new switch instruction now. + SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size()); + + // Add all of the 'cases' to the switch instruction. + for (unsigned i = 0, e = Values.size(); i != e; ++i) + New->addCase(Values[i], EdgeBB); + + // We added edges from PI to the EdgeBB. As such, if there were any + // PHI nodes in EdgeBB, they need entries to be added corresponding to + // the number of edges added. + for (BasicBlock::iterator BBI = EdgeBB->begin(); isa<PHINode>(BBI); ++BBI) { + PHINode *PN = cast<PHINode>(BBI); + Value *InVal = PN->getIncomingValueForBlock(BB); + for (unsigned i = 0, e = Values.size() - 1; i != e; ++i) + PN->addIncoming(InVal, BB); + } + + // Erase the old branch instruction. + EraseTerminatorAndDCECond(BI); + if (DTU) + DTU->applyUpdates(Updates); + + LLVM_DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n'); + return true; +} + +bool SimplifyCFGOpt::simplifyResume(ResumeInst *RI, IRBuilder<> &Builder) { + if (isa<PHINode>(RI->getValue())) + return simplifyCommonResume(RI); + else if (isa<LandingPadInst>(RI->getParent()->getFirstNonPHI()) && + RI->getValue() == RI->getParent()->getFirstNonPHI()) + // The resume must unwind the exception that caused control to branch here. + return simplifySingleResume(RI); + + return false; +} + +// Check if cleanup block is empty +static bool isCleanupBlockEmpty(iterator_range<BasicBlock::iterator> R) { + for (Instruction &I : R) { + auto *II = dyn_cast<IntrinsicInst>(&I); + if (!II) + return false; + + Intrinsic::ID IntrinsicID = II->getIntrinsicID(); + switch (IntrinsicID) { + case Intrinsic::dbg_declare: + case Intrinsic::dbg_value: + case Intrinsic::dbg_label: + case Intrinsic::lifetime_end: + break; + default: + return false; + } + } + return true; +} + +// Simplify resume that is shared by several landing pads (phi of landing pad). +bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) { + BasicBlock *BB = RI->getParent(); + + // Check that there are no other instructions except for debug and lifetime + // intrinsics between the phi's and resume instruction. + if (!isCleanupBlockEmpty( + make_range(RI->getParent()->getFirstNonPHI(), BB->getTerminator()))) + return false; + + SmallSetVector<BasicBlock *, 4> TrivialUnwindBlocks; + auto *PhiLPInst = cast<PHINode>(RI->getValue()); + + // Check incoming blocks to see if any of them are trivial. + for (unsigned Idx = 0, End = PhiLPInst->getNumIncomingValues(); Idx != End; + Idx++) { + auto *IncomingBB = PhiLPInst->getIncomingBlock(Idx); + auto *IncomingValue = PhiLPInst->getIncomingValue(Idx); + + // If the block has other successors, we can not delete it because + // it has other dependents. + if (IncomingBB->getUniqueSuccessor() != BB) + continue; + + auto *LandingPad = dyn_cast<LandingPadInst>(IncomingBB->getFirstNonPHI()); + // Not the landing pad that caused the control to branch here. + if (IncomingValue != LandingPad) + continue; + + if (isCleanupBlockEmpty( + make_range(LandingPad->getNextNode(), IncomingBB->getTerminator()))) + TrivialUnwindBlocks.insert(IncomingBB); + } + + // If no trivial unwind blocks, don't do any simplifications. + if (TrivialUnwindBlocks.empty()) + return false; + + // Turn all invokes that unwind here into calls. + for (auto *TrivialBB : TrivialUnwindBlocks) { + // Blocks that will be simplified should be removed from the phi node. + // Note there could be multiple edges to the resume block, and we need + // to remove them all. + while (PhiLPInst->getBasicBlockIndex(TrivialBB) != -1) + BB->removePredecessor(TrivialBB, true); + + for (BasicBlock *Pred : + llvm::make_early_inc_range(predecessors(TrivialBB))) { + removeUnwindEdge(Pred, DTU); + ++NumInvokes; + } + + // In each SimplifyCFG run, only the current processed block can be erased. + // Otherwise, it will break the iteration of SimplifyCFG pass. So instead + // of erasing TrivialBB, we only remove the branch to the common resume + // block so that we can later erase the resume block since it has no + // predecessors. + TrivialBB->getTerminator()->eraseFromParent(); + new UnreachableInst(RI->getContext(), TrivialBB); + if (DTU) + DTU->applyUpdates({{DominatorTree::Delete, TrivialBB, BB}}); + } + + // Delete the resume block if all its predecessors have been removed. + if (pred_empty(BB)) + DeleteDeadBlock(BB, DTU); + + return !TrivialUnwindBlocks.empty(); +} + +// Simplify resume that is only used by a single (non-phi) landing pad. +bool SimplifyCFGOpt::simplifySingleResume(ResumeInst *RI) { + BasicBlock *BB = RI->getParent(); + auto *LPInst = cast<LandingPadInst>(BB->getFirstNonPHI()); + assert(RI->getValue() == LPInst && + "Resume must unwind the exception that caused control to here"); + + // Check that there are no other instructions except for debug intrinsics. + if (!isCleanupBlockEmpty( + make_range<Instruction *>(LPInst->getNextNode(), RI))) + return false; + + // Turn all invokes that unwind here into calls and delete the basic block. + for (BasicBlock *Pred : llvm::make_early_inc_range(predecessors(BB))) { + removeUnwindEdge(Pred, DTU); + ++NumInvokes; + } + + // The landingpad is now unreachable. Zap it. + DeleteDeadBlock(BB, DTU); + return true; +} + +static bool removeEmptyCleanup(CleanupReturnInst *RI, DomTreeUpdater *DTU) { + // If this is a trivial cleanup pad that executes no instructions, it can be + // eliminated. If the cleanup pad continues to the caller, any predecessor + // that is an EH pad will be updated to continue to the caller and any + // predecessor that terminates with an invoke instruction will have its invoke + // instruction converted to a call instruction. If the cleanup pad being + // simplified does not continue to the caller, each predecessor will be + // updated to continue to the unwind destination of the cleanup pad being + // simplified. + BasicBlock *BB = RI->getParent(); + CleanupPadInst *CPInst = RI->getCleanupPad(); + if (CPInst->getParent() != BB) + // This isn't an empty cleanup. + return false; + + // We cannot kill the pad if it has multiple uses. This typically arises + // from unreachable basic blocks. + if (!CPInst->hasOneUse()) + return false; + + // Check that there are no other instructions except for benign intrinsics. + if (!isCleanupBlockEmpty( + make_range<Instruction *>(CPInst->getNextNode(), RI))) + return false; + + // If the cleanup return we are simplifying unwinds to the caller, this will + // set UnwindDest to nullptr. + BasicBlock *UnwindDest = RI->getUnwindDest(); + Instruction *DestEHPad = UnwindDest ? UnwindDest->getFirstNonPHI() : nullptr; + + // We're about to remove BB from the control flow. Before we do, sink any + // PHINodes into the unwind destination. Doing this before changing the + // control flow avoids some potentially slow checks, since we can currently + // be certain that UnwindDest and BB have no common predecessors (since they + // are both EH pads). + if (UnwindDest) { + // First, go through the PHI nodes in UnwindDest and update any nodes that + // reference the block we are removing + for (PHINode &DestPN : UnwindDest->phis()) { + int Idx = DestPN.getBasicBlockIndex(BB); + // Since BB unwinds to UnwindDest, it has to be in the PHI node. + assert(Idx != -1); + // This PHI node has an incoming value that corresponds to a control + // path through the cleanup pad we are removing. If the incoming + // value is in the cleanup pad, it must be a PHINode (because we + // verified above that the block is otherwise empty). Otherwise, the + // value is either a constant or a value that dominates the cleanup + // pad being removed. + // + // Because BB and UnwindDest are both EH pads, all of their + // predecessors must unwind to these blocks, and since no instruction + // can have multiple unwind destinations, there will be no overlap in + // incoming blocks between SrcPN and DestPN. + Value *SrcVal = DestPN.getIncomingValue(Idx); + PHINode *SrcPN = dyn_cast<PHINode>(SrcVal); + + bool NeedPHITranslation = SrcPN && SrcPN->getParent() == BB; + for (auto *Pred : predecessors(BB)) { + Value *Incoming = + NeedPHITranslation ? SrcPN->getIncomingValueForBlock(Pred) : SrcVal; + DestPN.addIncoming(Incoming, Pred); + } + } + + // Sink any remaining PHI nodes directly into UnwindDest. + Instruction *InsertPt = DestEHPad; + for (PHINode &PN : make_early_inc_range(BB->phis())) { + if (PN.use_empty() || !PN.isUsedOutsideOfBlock(BB)) + // If the PHI node has no uses or all of its uses are in this basic + // block (meaning they are debug or lifetime intrinsics), just leave + // it. It will be erased when we erase BB below. + continue; + + // Otherwise, sink this PHI node into UnwindDest. + // Any predecessors to UnwindDest which are not already represented + // must be back edges which inherit the value from the path through + // BB. In this case, the PHI value must reference itself. + for (auto *pred : predecessors(UnwindDest)) + if (pred != BB) + PN.addIncoming(&PN, pred); + PN.moveBefore(InsertPt); + // Also, add a dummy incoming value for the original BB itself, + // so that the PHI is well-formed until we drop said predecessor. + PN.addIncoming(PoisonValue::get(PN.getType()), BB); + } + } + + std::vector<DominatorTree::UpdateType> Updates; + + // We use make_early_inc_range here because we will remove all predecessors. + for (BasicBlock *PredBB : llvm::make_early_inc_range(predecessors(BB))) { + if (UnwindDest == nullptr) { + if (DTU) { + DTU->applyUpdates(Updates); + Updates.clear(); + } + removeUnwindEdge(PredBB, DTU); + ++NumInvokes; + } else { + BB->removePredecessor(PredBB); + Instruction *TI = PredBB->getTerminator(); + TI->replaceUsesOfWith(BB, UnwindDest); + if (DTU) { + Updates.push_back({DominatorTree::Insert, PredBB, UnwindDest}); + Updates.push_back({DominatorTree::Delete, PredBB, BB}); + } + } + } + + if (DTU) + DTU->applyUpdates(Updates); + + DeleteDeadBlock(BB, DTU); + + return true; +} + +// Try to merge two cleanuppads together. +static bool mergeCleanupPad(CleanupReturnInst *RI) { + // Skip any cleanuprets which unwind to caller, there is nothing to merge + // with. + BasicBlock *UnwindDest = RI->getUnwindDest(); + if (!UnwindDest) + return false; + + // This cleanupret isn't the only predecessor of this cleanuppad, it wouldn't + // be safe to merge without code duplication. + if (UnwindDest->getSinglePredecessor() != RI->getParent()) + return false; + + // Verify that our cleanuppad's unwind destination is another cleanuppad. + auto *SuccessorCleanupPad = dyn_cast<CleanupPadInst>(&UnwindDest->front()); + if (!SuccessorCleanupPad) + return false; + + CleanupPadInst *PredecessorCleanupPad = RI->getCleanupPad(); + // Replace any uses of the successor cleanupad with the predecessor pad + // The only cleanuppad uses should be this cleanupret, it's cleanupret and + // funclet bundle operands. + SuccessorCleanupPad->replaceAllUsesWith(PredecessorCleanupPad); + // Remove the old cleanuppad. + SuccessorCleanupPad->eraseFromParent(); + // Now, we simply replace the cleanupret with a branch to the unwind + // destination. + BranchInst::Create(UnwindDest, RI->getParent()); + RI->eraseFromParent(); + + return true; +} + +bool SimplifyCFGOpt::simplifyCleanupReturn(CleanupReturnInst *RI) { + // It is possible to transiantly have an undef cleanuppad operand because we + // have deleted some, but not all, dead blocks. + // Eventually, this block will be deleted. + if (isa<UndefValue>(RI->getOperand(0))) + return false; + + if (mergeCleanupPad(RI)) + return true; + + if (removeEmptyCleanup(RI, DTU)) + return true; + + return false; +} + +// WARNING: keep in sync with InstCombinerImpl::visitUnreachableInst()! +bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) { + BasicBlock *BB = UI->getParent(); + + bool Changed = false; + + // If there are any instructions immediately before the unreachable that can + // be removed, do so. + while (UI->getIterator() != BB->begin()) { + BasicBlock::iterator BBI = UI->getIterator(); + --BBI; + + if (!isGuaranteedToTransferExecutionToSuccessor(&*BBI)) + break; // Can not drop any more instructions. We're done here. + // Otherwise, this instruction can be freely erased, + // even if it is not side-effect free. + + // Note that deleting EH's here is in fact okay, although it involves a bit + // of subtle reasoning. If this inst is an EH, all the predecessors of this + // block will be the unwind edges of Invoke/CatchSwitch/CleanupReturn, + // and we can therefore guarantee this block will be erased. + + // Delete this instruction (any uses are guaranteed to be dead) + BBI->replaceAllUsesWith(PoisonValue::get(BBI->getType())); + BBI->eraseFromParent(); + Changed = true; + } + + // If the unreachable instruction is the first in the block, take a gander + // at all of the predecessors of this instruction, and simplify them. + if (&BB->front() != UI) + return Changed; + + std::vector<DominatorTree::UpdateType> Updates; + + SmallSetVector<BasicBlock *, 8> Preds(pred_begin(BB), pred_end(BB)); + for (unsigned i = 0, e = Preds.size(); i != e; ++i) { + auto *Predecessor = Preds[i]; + Instruction *TI = Predecessor->getTerminator(); + IRBuilder<> Builder(TI); + if (auto *BI = dyn_cast<BranchInst>(TI)) { + // We could either have a proper unconditional branch, + // or a degenerate conditional branch with matching destinations. + if (all_of(BI->successors(), + [BB](auto *Successor) { return Successor == BB; })) { + new UnreachableInst(TI->getContext(), TI); + TI->eraseFromParent(); + Changed = true; + } else { + assert(BI->isConditional() && "Can't get here with an uncond branch."); + Value* Cond = BI->getCondition(); + assert(BI->getSuccessor(0) != BI->getSuccessor(1) && + "The destinations are guaranteed to be different here."); + if (BI->getSuccessor(0) == BB) { + Builder.CreateAssumption(Builder.CreateNot(Cond)); + Builder.CreateBr(BI->getSuccessor(1)); + } else { + assert(BI->getSuccessor(1) == BB && "Incorrect CFG"); + Builder.CreateAssumption(Cond); + Builder.CreateBr(BI->getSuccessor(0)); + } + EraseTerminatorAndDCECond(BI); + Changed = true; + } + if (DTU) + Updates.push_back({DominatorTree::Delete, Predecessor, BB}); + } else if (auto *SI = dyn_cast<SwitchInst>(TI)) { + SwitchInstProfUpdateWrapper SU(*SI); + for (auto i = SU->case_begin(), e = SU->case_end(); i != e;) { + if (i->getCaseSuccessor() != BB) { + ++i; + continue; + } + BB->removePredecessor(SU->getParent()); + i = SU.removeCase(i); + e = SU->case_end(); + Changed = true; + } + // Note that the default destination can't be removed! + if (DTU && SI->getDefaultDest() != BB) + Updates.push_back({DominatorTree::Delete, Predecessor, BB}); + } else if (auto *II = dyn_cast<InvokeInst>(TI)) { + if (II->getUnwindDest() == BB) { + if (DTU) { + DTU->applyUpdates(Updates); + Updates.clear(); + } + auto *CI = cast<CallInst>(removeUnwindEdge(TI->getParent(), DTU)); + if (!CI->doesNotThrow()) + CI->setDoesNotThrow(); + Changed = true; + } + } else if (auto *CSI = dyn_cast<CatchSwitchInst>(TI)) { + if (CSI->getUnwindDest() == BB) { + if (DTU) { + DTU->applyUpdates(Updates); + Updates.clear(); + } + removeUnwindEdge(TI->getParent(), DTU); + Changed = true; + continue; + } + + for (CatchSwitchInst::handler_iterator I = CSI->handler_begin(), + E = CSI->handler_end(); + I != E; ++I) { + if (*I == BB) { + CSI->removeHandler(I); + --I; + --E; + Changed = true; + } + } + if (DTU) + Updates.push_back({DominatorTree::Delete, Predecessor, BB}); + if (CSI->getNumHandlers() == 0) { + if (CSI->hasUnwindDest()) { + // Redirect all predecessors of the block containing CatchSwitchInst + // to instead branch to the CatchSwitchInst's unwind destination. + if (DTU) { + for (auto *PredecessorOfPredecessor : predecessors(Predecessor)) { + Updates.push_back({DominatorTree::Insert, + PredecessorOfPredecessor, + CSI->getUnwindDest()}); + Updates.push_back({DominatorTree::Delete, + PredecessorOfPredecessor, Predecessor}); + } + } + Predecessor->replaceAllUsesWith(CSI->getUnwindDest()); + } else { + // Rewrite all preds to unwind to caller (or from invoke to call). + if (DTU) { + DTU->applyUpdates(Updates); + Updates.clear(); + } + SmallVector<BasicBlock *, 8> EHPreds(predecessors(Predecessor)); + for (BasicBlock *EHPred : EHPreds) + removeUnwindEdge(EHPred, DTU); + } + // The catchswitch is no longer reachable. + new UnreachableInst(CSI->getContext(), CSI); + CSI->eraseFromParent(); + Changed = true; + } + } else if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) { + (void)CRI; + assert(CRI->hasUnwindDest() && CRI->getUnwindDest() == BB && + "Expected to always have an unwind to BB."); + if (DTU) + Updates.push_back({DominatorTree::Delete, Predecessor, BB}); + new UnreachableInst(TI->getContext(), TI); + TI->eraseFromParent(); + Changed = true; + } + } + + if (DTU) + DTU->applyUpdates(Updates); + + // If this block is now dead, remove it. + if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) { + DeleteDeadBlock(BB, DTU); + return true; + } + + return Changed; +} + +static bool CasesAreContiguous(SmallVectorImpl<ConstantInt *> &Cases) { + assert(Cases.size() >= 1); + + array_pod_sort(Cases.begin(), Cases.end(), ConstantIntSortPredicate); + for (size_t I = 1, E = Cases.size(); I != E; ++I) { + if (Cases[I - 1]->getValue() != Cases[I]->getValue() + 1) + return false; + } + return true; +} + +static void createUnreachableSwitchDefault(SwitchInst *Switch, + DomTreeUpdater *DTU) { + LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n"); + auto *BB = Switch->getParent(); + auto *OrigDefaultBlock = Switch->getDefaultDest(); + OrigDefaultBlock->removePredecessor(BB); + BasicBlock *NewDefaultBlock = BasicBlock::Create( + BB->getContext(), BB->getName() + ".unreachabledefault", BB->getParent(), + OrigDefaultBlock); + new UnreachableInst(Switch->getContext(), NewDefaultBlock); + Switch->setDefaultDest(&*NewDefaultBlock); + if (DTU) { + SmallVector<DominatorTree::UpdateType, 2> Updates; + Updates.push_back({DominatorTree::Insert, BB, &*NewDefaultBlock}); + if (!is_contained(successors(BB), OrigDefaultBlock)) + Updates.push_back({DominatorTree::Delete, BB, &*OrigDefaultBlock}); + DTU->applyUpdates(Updates); + } +} + +/// Turn a switch into an integer range comparison and branch. +/// Switches with more than 2 destinations are ignored. +/// Switches with 1 destination are also ignored. +bool SimplifyCFGOpt::TurnSwitchRangeIntoICmp(SwitchInst *SI, + IRBuilder<> &Builder) { + assert(SI->getNumCases() > 1 && "Degenerate switch?"); + + bool HasDefault = + !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg()); + + auto *BB = SI->getParent(); + + // Partition the cases into two sets with different destinations. + BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr; + BasicBlock *DestB = nullptr; + SmallVector<ConstantInt *, 16> CasesA; + SmallVector<ConstantInt *, 16> CasesB; + + for (auto Case : SI->cases()) { + BasicBlock *Dest = Case.getCaseSuccessor(); + if (!DestA) + DestA = Dest; + if (Dest == DestA) { + CasesA.push_back(Case.getCaseValue()); + continue; + } + if (!DestB) + DestB = Dest; + if (Dest == DestB) { + CasesB.push_back(Case.getCaseValue()); + continue; + } + return false; // More than two destinations. + } + if (!DestB) + return false; // All destinations are the same and the default is unreachable + + assert(DestA && DestB && + "Single-destination switch should have been folded."); + assert(DestA != DestB); + assert(DestB != SI->getDefaultDest()); + assert(!CasesB.empty() && "There must be non-default cases."); + assert(!CasesA.empty() || HasDefault); + + // Figure out if one of the sets of cases form a contiguous range. + SmallVectorImpl<ConstantInt *> *ContiguousCases = nullptr; + BasicBlock *ContiguousDest = nullptr; + BasicBlock *OtherDest = nullptr; + if (!CasesA.empty() && CasesAreContiguous(CasesA)) { + ContiguousCases = &CasesA; + ContiguousDest = DestA; + OtherDest = DestB; + } else if (CasesAreContiguous(CasesB)) { + ContiguousCases = &CasesB; + ContiguousDest = DestB; + OtherDest = DestA; + } else + return false; + + // Start building the compare and branch. + + Constant *Offset = ConstantExpr::getNeg(ContiguousCases->back()); + Constant *NumCases = + ConstantInt::get(Offset->getType(), ContiguousCases->size()); + + Value *Sub = SI->getCondition(); + if (!Offset->isNullValue()) + Sub = Builder.CreateAdd(Sub, Offset, Sub->getName() + ".off"); + + Value *Cmp; + // If NumCases overflowed, then all possible values jump to the successor. + if (NumCases->isNullValue() && !ContiguousCases->empty()) + Cmp = ConstantInt::getTrue(SI->getContext()); + else + Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch"); + BranchInst *NewBI = Builder.CreateCondBr(Cmp, ContiguousDest, OtherDest); + + // Update weight for the newly-created conditional branch. + if (hasBranchWeightMD(*SI)) { + SmallVector<uint64_t, 8> Weights; + GetBranchWeights(SI, Weights); + if (Weights.size() == 1 + SI->getNumCases()) { + uint64_t TrueWeight = 0; + uint64_t FalseWeight = 0; + for (size_t I = 0, E = Weights.size(); I != E; ++I) { + if (SI->getSuccessor(I) == ContiguousDest) + TrueWeight += Weights[I]; + else + FalseWeight += Weights[I]; + } + while (TrueWeight > UINT32_MAX || FalseWeight > UINT32_MAX) { + TrueWeight /= 2; + FalseWeight /= 2; + } + setBranchWeights(NewBI, TrueWeight, FalseWeight); + } + } + + // Prune obsolete incoming values off the successors' PHI nodes. + for (auto BBI = ContiguousDest->begin(); isa<PHINode>(BBI); ++BBI) { + unsigned PreviousEdges = ContiguousCases->size(); + if (ContiguousDest == SI->getDefaultDest()) + ++PreviousEdges; + for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I) + cast<PHINode>(BBI)->removeIncomingValue(SI->getParent()); + } + for (auto BBI = OtherDest->begin(); isa<PHINode>(BBI); ++BBI) { + unsigned PreviousEdges = SI->getNumCases() - ContiguousCases->size(); + if (OtherDest == SI->getDefaultDest()) + ++PreviousEdges; + for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I) + cast<PHINode>(BBI)->removeIncomingValue(SI->getParent()); + } + + // Clean up the default block - it may have phis or other instructions before + // the unreachable terminator. + if (!HasDefault) + createUnreachableSwitchDefault(SI, DTU); + + auto *UnreachableDefault = SI->getDefaultDest(); + + // Drop the switch. + SI->eraseFromParent(); + + if (!HasDefault && DTU) + DTU->applyUpdates({{DominatorTree::Delete, BB, UnreachableDefault}}); + + return true; +} + +/// Compute masked bits for the condition of a switch +/// and use it to remove dead cases. +static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU, + AssumptionCache *AC, + const DataLayout &DL) { + Value *Cond = SI->getCondition(); + KnownBits Known = computeKnownBits(Cond, DL, 0, AC, SI); + + // We can also eliminate cases by determining that their values are outside of + // the limited range of the condition based on how many significant (non-sign) + // bits are in the condition value. + unsigned MaxSignificantBitsInCond = + ComputeMaxSignificantBits(Cond, DL, 0, AC, SI); + + // Gather dead cases. + SmallVector<ConstantInt *, 8> DeadCases; + SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases; + SmallVector<BasicBlock *, 8> UniqueSuccessors; + for (const auto &Case : SI->cases()) { + auto *Successor = Case.getCaseSuccessor(); + if (DTU) { + if (!NumPerSuccessorCases.count(Successor)) + UniqueSuccessors.push_back(Successor); + ++NumPerSuccessorCases[Successor]; + } + const APInt &CaseVal = Case.getCaseValue()->getValue(); + if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) || + (CaseVal.getMinSignedBits() > MaxSignificantBitsInCond)) { + DeadCases.push_back(Case.getCaseValue()); + if (DTU) + --NumPerSuccessorCases[Successor]; + LLVM_DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal + << " is dead.\n"); + } + } + + // If we can prove that the cases must cover all possible values, the + // default destination becomes dead and we can remove it. If we know some + // of the bits in the value, we can use that to more precisely compute the + // number of possible unique case values. + bool HasDefault = + !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg()); + const unsigned NumUnknownBits = + Known.getBitWidth() - (Known.Zero | Known.One).countPopulation(); + assert(NumUnknownBits <= Known.getBitWidth()); + if (HasDefault && DeadCases.empty() && + NumUnknownBits < 64 /* avoid overflow */ && + SI->getNumCases() == (1ULL << NumUnknownBits)) { + createUnreachableSwitchDefault(SI, DTU); + return true; + } + + if (DeadCases.empty()) + return false; + + SwitchInstProfUpdateWrapper SIW(*SI); + for (ConstantInt *DeadCase : DeadCases) { + SwitchInst::CaseIt CaseI = SI->findCaseValue(DeadCase); + assert(CaseI != SI->case_default() && + "Case was not found. Probably mistake in DeadCases forming."); + // Prune unused values from PHI nodes. + CaseI->getCaseSuccessor()->removePredecessor(SI->getParent()); + SIW.removeCase(CaseI); + } + + if (DTU) { + std::vector<DominatorTree::UpdateType> Updates; + for (auto *Successor : UniqueSuccessors) + if (NumPerSuccessorCases[Successor] == 0) + Updates.push_back({DominatorTree::Delete, SI->getParent(), Successor}); + DTU->applyUpdates(Updates); + } + + return true; +} + +/// If BB would be eligible for simplification by +/// TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated +/// by an unconditional branch), look at the phi node for BB in the successor +/// block and see if the incoming value is equal to CaseValue. If so, return +/// the phi node, and set PhiIndex to BB's index in the phi node. +static PHINode *FindPHIForConditionForwarding(ConstantInt *CaseValue, + BasicBlock *BB, int *PhiIndex) { + if (BB->getFirstNonPHIOrDbg() != BB->getTerminator()) + return nullptr; // BB must be empty to be a candidate for simplification. + if (!BB->getSinglePredecessor()) + return nullptr; // BB must be dominated by the switch. + + BranchInst *Branch = dyn_cast<BranchInst>(BB->getTerminator()); + if (!Branch || !Branch->isUnconditional()) + return nullptr; // Terminator must be unconditional branch. + + BasicBlock *Succ = Branch->getSuccessor(0); + + for (PHINode &PHI : Succ->phis()) { + int Idx = PHI.getBasicBlockIndex(BB); + assert(Idx >= 0 && "PHI has no entry for predecessor?"); + + Value *InValue = PHI.getIncomingValue(Idx); + if (InValue != CaseValue) + continue; + + *PhiIndex = Idx; + return &PHI; + } + + return nullptr; +} + +/// Try to forward the condition of a switch instruction to a phi node +/// dominated by the switch, if that would mean that some of the destination +/// blocks of the switch can be folded away. Return true if a change is made. +static bool ForwardSwitchConditionToPHI(SwitchInst *SI) { + using ForwardingNodesMap = DenseMap<PHINode *, SmallVector<int, 4>>; + + ForwardingNodesMap ForwardingNodes; + BasicBlock *SwitchBlock = SI->getParent(); + bool Changed = false; + for (const auto &Case : SI->cases()) { + ConstantInt *CaseValue = Case.getCaseValue(); + BasicBlock *CaseDest = Case.getCaseSuccessor(); + + // Replace phi operands in successor blocks that are using the constant case + // value rather than the switch condition variable: + // switchbb: + // switch i32 %x, label %default [ + // i32 17, label %succ + // ... + // succ: + // %r = phi i32 ... [ 17, %switchbb ] ... + // --> + // %r = phi i32 ... [ %x, %switchbb ] ... + + for (PHINode &Phi : CaseDest->phis()) { + // This only works if there is exactly 1 incoming edge from the switch to + // a phi. If there is >1, that means multiple cases of the switch map to 1 + // value in the phi, and that phi value is not the switch condition. Thus, + // this transform would not make sense (the phi would be invalid because + // a phi can't have different incoming values from the same block). + int SwitchBBIdx = Phi.getBasicBlockIndex(SwitchBlock); + if (Phi.getIncomingValue(SwitchBBIdx) == CaseValue && + count(Phi.blocks(), SwitchBlock) == 1) { + Phi.setIncomingValue(SwitchBBIdx, SI->getCondition()); + Changed = true; + } + } + + // Collect phi nodes that are indirectly using this switch's case constants. + int PhiIdx; + if (auto *Phi = FindPHIForConditionForwarding(CaseValue, CaseDest, &PhiIdx)) + ForwardingNodes[Phi].push_back(PhiIdx); + } + + for (auto &ForwardingNode : ForwardingNodes) { + PHINode *Phi = ForwardingNode.first; + SmallVectorImpl<int> &Indexes = ForwardingNode.second; + if (Indexes.size() < 2) + continue; + + for (int Index : Indexes) + Phi->setIncomingValue(Index, SI->getCondition()); + Changed = true; + } + + return Changed; +} + +/// Return true if the backend will be able to handle +/// initializing an array of constants like C. +static bool ValidLookupTableConstant(Constant *C, const TargetTransformInfo &TTI) { + if (C->isThreadDependent()) + return false; + if (C->isDLLImportDependent()) + return false; + + if (!isa<ConstantFP>(C) && !isa<ConstantInt>(C) && + !isa<ConstantPointerNull>(C) && !isa<GlobalValue>(C) && + !isa<UndefValue>(C) && !isa<ConstantExpr>(C)) + return false; + + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { + // Pointer casts and in-bounds GEPs will not prohibit the backend from + // materializing the array of constants. + Constant *StrippedC = cast<Constant>(CE->stripInBoundsConstantOffsets()); + if (StrippedC == C || !ValidLookupTableConstant(StrippedC, TTI)) + return false; + } + + if (!TTI.shouldBuildLookupTablesForConstant(C)) + return false; + + return true; +} + +/// If V is a Constant, return it. Otherwise, try to look up +/// its constant value in ConstantPool, returning 0 if it's not there. +static Constant * +LookupConstant(Value *V, + const SmallDenseMap<Value *, Constant *> &ConstantPool) { + if (Constant *C = dyn_cast<Constant>(V)) + return C; + return ConstantPool.lookup(V); +} + +/// Try to fold instruction I into a constant. This works for +/// simple instructions such as binary operations where both operands are +/// constant or can be replaced by constants from the ConstantPool. Returns the +/// resulting constant on success, 0 otherwise. +static Constant * +ConstantFold(Instruction *I, const DataLayout &DL, + const SmallDenseMap<Value *, Constant *> &ConstantPool) { + if (SelectInst *Select = dyn_cast<SelectInst>(I)) { + Constant *A = LookupConstant(Select->getCondition(), ConstantPool); + if (!A) + return nullptr; + if (A->isAllOnesValue()) + return LookupConstant(Select->getTrueValue(), ConstantPool); + if (A->isNullValue()) + return LookupConstant(Select->getFalseValue(), ConstantPool); + return nullptr; + } + + SmallVector<Constant *, 4> COps; + for (unsigned N = 0, E = I->getNumOperands(); N != E; ++N) { + if (Constant *A = LookupConstant(I->getOperand(N), ConstantPool)) + COps.push_back(A); + else + return nullptr; + } + + return ConstantFoldInstOperands(I, COps, DL); +} + +/// Try to determine the resulting constant values in phi nodes +/// at the common destination basic block, *CommonDest, for one of the case +/// destionations CaseDest corresponding to value CaseVal (0 for the default +/// case), of a switch instruction SI. +static bool +getCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest, + BasicBlock **CommonDest, + SmallVectorImpl<std::pair<PHINode *, Constant *>> &Res, + const DataLayout &DL, const TargetTransformInfo &TTI) { + // The block from which we enter the common destination. + BasicBlock *Pred = SI->getParent(); + + // If CaseDest is empty except for some side-effect free instructions through + // which we can constant-propagate the CaseVal, continue to its successor. + SmallDenseMap<Value *, Constant *> ConstantPool; + ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal)); + for (Instruction &I : CaseDest->instructionsWithoutDebug(false)) { + if (I.isTerminator()) { + // If the terminator is a simple branch, continue to the next block. + if (I.getNumSuccessors() != 1 || I.isExceptionalTerminator()) + return false; + Pred = CaseDest; + CaseDest = I.getSuccessor(0); + } else if (Constant *C = ConstantFold(&I, DL, ConstantPool)) { + // Instruction is side-effect free and constant. + + // If the instruction has uses outside this block or a phi node slot for + // the block, it is not safe to bypass the instruction since it would then + // no longer dominate all its uses. + for (auto &Use : I.uses()) { + User *User = Use.getUser(); + if (Instruction *I = dyn_cast<Instruction>(User)) + if (I->getParent() == CaseDest) + continue; + if (PHINode *Phi = dyn_cast<PHINode>(User)) + if (Phi->getIncomingBlock(Use) == CaseDest) + continue; + return false; + } + + ConstantPool.insert(std::make_pair(&I, C)); + } else { + break; + } + } + + // If we did not have a CommonDest before, use the current one. + if (!*CommonDest) + *CommonDest = CaseDest; + // If the destination isn't the common one, abort. + if (CaseDest != *CommonDest) + return false; + + // Get the values for this case from phi nodes in the destination block. + for (PHINode &PHI : (*CommonDest)->phis()) { + int Idx = PHI.getBasicBlockIndex(Pred); + if (Idx == -1) + continue; + + Constant *ConstVal = + LookupConstant(PHI.getIncomingValue(Idx), ConstantPool); + if (!ConstVal) + return false; + + // Be conservative about which kinds of constants we support. + if (!ValidLookupTableConstant(ConstVal, TTI)) + return false; + + Res.push_back(std::make_pair(&PHI, ConstVal)); + } + + return Res.size() > 0; +} + +// Helper function used to add CaseVal to the list of cases that generate +// Result. Returns the updated number of cases that generate this result. +static size_t mapCaseToResult(ConstantInt *CaseVal, + SwitchCaseResultVectorTy &UniqueResults, + Constant *Result) { + for (auto &I : UniqueResults) { + if (I.first == Result) { + I.second.push_back(CaseVal); + return I.second.size(); + } + } + UniqueResults.push_back( + std::make_pair(Result, SmallVector<ConstantInt *, 4>(1, CaseVal))); + return 1; +} + +// Helper function that initializes a map containing +// results for the PHI node of the common destination block for a switch +// instruction. Returns false if multiple PHI nodes have been found or if +// there is not a common destination block for the switch. +static bool initializeUniqueCases(SwitchInst *SI, PHINode *&PHI, + BasicBlock *&CommonDest, + SwitchCaseResultVectorTy &UniqueResults, + Constant *&DefaultResult, + const DataLayout &DL, + const TargetTransformInfo &TTI, + uintptr_t MaxUniqueResults) { + for (const auto &I : SI->cases()) { + ConstantInt *CaseVal = I.getCaseValue(); + + // Resulting value at phi nodes for this case value. + SwitchCaseResultsTy Results; + if (!getCaseResults(SI, CaseVal, I.getCaseSuccessor(), &CommonDest, Results, + DL, TTI)) + return false; + + // Only one value per case is permitted. + if (Results.size() > 1) + return false; + + // Add the case->result mapping to UniqueResults. + const size_t NumCasesForResult = + mapCaseToResult(CaseVal, UniqueResults, Results.begin()->second); + + // Early out if there are too many cases for this result. + if (NumCasesForResult > MaxSwitchCasesPerResult) + return false; + + // Early out if there are too many unique results. + if (UniqueResults.size() > MaxUniqueResults) + return false; + + // Check the PHI consistency. + if (!PHI) + PHI = Results[0].first; + else if (PHI != Results[0].first) + return false; + } + // Find the default result value. + SmallVector<std::pair<PHINode *, Constant *>, 1> DefaultResults; + BasicBlock *DefaultDest = SI->getDefaultDest(); + getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResults, + DL, TTI); + // If the default value is not found abort unless the default destination + // is unreachable. + DefaultResult = + DefaultResults.size() == 1 ? DefaultResults.begin()->second : nullptr; + if ((!DefaultResult && + !isa<UnreachableInst>(DefaultDest->getFirstNonPHIOrDbg()))) + return false; + + return true; +} + +// Helper function that checks if it is possible to transform a switch with only +// two cases (or two cases + default) that produces a result into a select. +// TODO: Handle switches with more than 2 cases that map to the same result. +static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector, + Constant *DefaultResult, Value *Condition, + IRBuilder<> &Builder) { + // If we are selecting between only two cases transform into a simple + // select or a two-way select if default is possible. + // Example: + // switch (a) { %0 = icmp eq i32 %a, 10 + // case 10: return 42; %1 = select i1 %0, i32 42, i32 4 + // case 20: return 2; ----> %2 = icmp eq i32 %a, 20 + // default: return 4; %3 = select i1 %2, i32 2, i32 %1 + // } + if (ResultVector.size() == 2 && ResultVector[0].second.size() == 1 && + ResultVector[1].second.size() == 1) { + ConstantInt *FirstCase = ResultVector[0].second[0]; + ConstantInt *SecondCase = ResultVector[1].second[0]; + Value *SelectValue = ResultVector[1].first; + if (DefaultResult) { + Value *ValueCompare = + Builder.CreateICmpEQ(Condition, SecondCase, "switch.selectcmp"); + SelectValue = Builder.CreateSelect(ValueCompare, ResultVector[1].first, + DefaultResult, "switch.select"); + } + Value *ValueCompare = + Builder.CreateICmpEQ(Condition, FirstCase, "switch.selectcmp"); + return Builder.CreateSelect(ValueCompare, ResultVector[0].first, + SelectValue, "switch.select"); + } + + // Handle the degenerate case where two cases have the same result value. + if (ResultVector.size() == 1 && DefaultResult) { + ArrayRef<ConstantInt *> CaseValues = ResultVector[0].second; + unsigned CaseCount = CaseValues.size(); + // n bits group cases map to the same result: + // case 0,4 -> Cond & 0b1..1011 == 0 ? result : default + // case 0,2,4,6 -> Cond & 0b1..1001 == 0 ? result : default + // case 0,2,8,10 -> Cond & 0b1..0101 == 0 ? result : default + if (isPowerOf2_32(CaseCount)) { + ConstantInt *MinCaseVal = CaseValues[0]; + // Find mininal value. + for (auto *Case : CaseValues) + if (Case->getValue().slt(MinCaseVal->getValue())) + MinCaseVal = Case; + + // Mark the bits case number touched. + APInt BitMask = APInt::getZero(MinCaseVal->getBitWidth()); + for (auto *Case : CaseValues) + BitMask |= (Case->getValue() - MinCaseVal->getValue()); + + // Check if cases with the same result can cover all number + // in touched bits. + if (BitMask.countPopulation() == Log2_32(CaseCount)) { + if (!MinCaseVal->isNullValue()) + Condition = Builder.CreateSub(Condition, MinCaseVal); + Value *And = Builder.CreateAnd(Condition, ~BitMask, "switch.and"); + Value *Cmp = Builder.CreateICmpEQ( + And, Constant::getNullValue(And->getType()), "switch.selectcmp"); + return Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult); + } + } + + // Handle the degenerate case where two cases have the same value. + if (CaseValues.size() == 2) { + Value *Cmp1 = Builder.CreateICmpEQ(Condition, CaseValues[0], + "switch.selectcmp.case1"); + Value *Cmp2 = Builder.CreateICmpEQ(Condition, CaseValues[1], + "switch.selectcmp.case2"); + Value *Cmp = Builder.CreateOr(Cmp1, Cmp2, "switch.selectcmp"); + return Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult); + } + } + + return nullptr; +} + +// Helper function to cleanup a switch instruction that has been converted into +// a select, fixing up PHI nodes and basic blocks. +static void removeSwitchAfterSelectFold(SwitchInst *SI, PHINode *PHI, + Value *SelectValue, + IRBuilder<> &Builder, + DomTreeUpdater *DTU) { + std::vector<DominatorTree::UpdateType> Updates; + + BasicBlock *SelectBB = SI->getParent(); + BasicBlock *DestBB = PHI->getParent(); + + if (DTU && !is_contained(predecessors(DestBB), SelectBB)) + Updates.push_back({DominatorTree::Insert, SelectBB, DestBB}); + Builder.CreateBr(DestBB); + + // Remove the switch. + + while (PHI->getBasicBlockIndex(SelectBB) >= 0) + PHI->removeIncomingValue(SelectBB); + PHI->addIncoming(SelectValue, SelectBB); + + SmallPtrSet<BasicBlock *, 4> RemovedSuccessors; + for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) { + BasicBlock *Succ = SI->getSuccessor(i); + + if (Succ == DestBB) + continue; + Succ->removePredecessor(SelectBB); + if (DTU && RemovedSuccessors.insert(Succ).second) + Updates.push_back({DominatorTree::Delete, SelectBB, Succ}); + } + SI->eraseFromParent(); + if (DTU) + DTU->applyUpdates(Updates); +} + +/// If a switch is only used to initialize one or more phi nodes in a common +/// successor block with only two different constant values, try to replace the +/// switch with a select. Returns true if the fold was made. +static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder, + DomTreeUpdater *DTU, const DataLayout &DL, + const TargetTransformInfo &TTI) { + Value *const Cond = SI->getCondition(); + PHINode *PHI = nullptr; + BasicBlock *CommonDest = nullptr; + Constant *DefaultResult; + SwitchCaseResultVectorTy UniqueResults; + // Collect all the cases that will deliver the same value from the switch. + if (!initializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult, + DL, TTI, /*MaxUniqueResults*/ 2)) + return false; + + assert(PHI != nullptr && "PHI for value select not found"); + Builder.SetInsertPoint(SI); + Value *SelectValue = + foldSwitchToSelect(UniqueResults, DefaultResult, Cond, Builder); + if (!SelectValue) + return false; + + removeSwitchAfterSelectFold(SI, PHI, SelectValue, Builder, DTU); + return true; +} + +namespace { + +/// This class represents a lookup table that can be used to replace a switch. +class SwitchLookupTable { +public: + /// Create a lookup table to use as a switch replacement with the contents + /// of Values, using DefaultValue to fill any holes in the table. + SwitchLookupTable( + Module &M, uint64_t TableSize, ConstantInt *Offset, + const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values, + Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName); + + /// Build instructions with Builder to retrieve the value at + /// the position given by Index in the lookup table. + Value *BuildLookup(Value *Index, IRBuilder<> &Builder); + + /// Return true if a table with TableSize elements of + /// type ElementType would fit in a target-legal register. + static bool WouldFitInRegister(const DataLayout &DL, uint64_t TableSize, + Type *ElementType); + +private: + // Depending on the contents of the table, it can be represented in + // different ways. + enum { + // For tables where each element contains the same value, we just have to + // store that single value and return it for each lookup. + SingleValueKind, + + // For tables where there is a linear relationship between table index + // and values. We calculate the result with a simple multiplication + // and addition instead of a table lookup. + LinearMapKind, + + // For small tables with integer elements, we can pack them into a bitmap + // that fits into a target-legal register. Values are retrieved by + // shift and mask operations. + BitMapKind, + + // The table is stored as an array of values. Values are retrieved by load + // instructions from the table. + ArrayKind + } Kind; + + // For SingleValueKind, this is the single value. + Constant *SingleValue = nullptr; + + // For BitMapKind, this is the bitmap. + ConstantInt *BitMap = nullptr; + IntegerType *BitMapElementTy = nullptr; + + // For LinearMapKind, these are the constants used to derive the value. + ConstantInt *LinearOffset = nullptr; + ConstantInt *LinearMultiplier = nullptr; + + // For ArrayKind, this is the array. + GlobalVariable *Array = nullptr; +}; + +} // end anonymous namespace + +SwitchLookupTable::SwitchLookupTable( + Module &M, uint64_t TableSize, ConstantInt *Offset, + const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values, + Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName) { + assert(Values.size() && "Can't build lookup table without values!"); + assert(TableSize >= Values.size() && "Can't fit values in table!"); + + // If all values in the table are equal, this is that value. + SingleValue = Values.begin()->second; + + Type *ValueType = Values.begin()->second->getType(); + + // Build up the table contents. + SmallVector<Constant *, 64> TableContents(TableSize); + for (size_t I = 0, E = Values.size(); I != E; ++I) { + ConstantInt *CaseVal = Values[I].first; + Constant *CaseRes = Values[I].second; + assert(CaseRes->getType() == ValueType); + + uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue(); + TableContents[Idx] = CaseRes; + + if (CaseRes != SingleValue) + SingleValue = nullptr; + } + + // Fill in any holes in the table with the default result. + if (Values.size() < TableSize) { + assert(DefaultValue && + "Need a default value to fill the lookup table holes."); + assert(DefaultValue->getType() == ValueType); + for (uint64_t I = 0; I < TableSize; ++I) { + if (!TableContents[I]) + TableContents[I] = DefaultValue; + } + + if (DefaultValue != SingleValue) + SingleValue = nullptr; + } + + // If each element in the table contains the same value, we only need to store + // that single value. + if (SingleValue) { + Kind = SingleValueKind; + return; + } + + // Check if we can derive the value with a linear transformation from the + // table index. + if (isa<IntegerType>(ValueType)) { + bool LinearMappingPossible = true; + APInt PrevVal; + APInt DistToPrev; + assert(TableSize >= 2 && "Should be a SingleValue table."); + // Check if there is the same distance between two consecutive values. + for (uint64_t I = 0; I < TableSize; ++I) { + ConstantInt *ConstVal = dyn_cast<ConstantInt>(TableContents[I]); + if (!ConstVal) { + // This is an undef. We could deal with it, but undefs in lookup tables + // are very seldom. It's probably not worth the additional complexity. + LinearMappingPossible = false; + break; + } + const APInt &Val = ConstVal->getValue(); + if (I != 0) { + APInt Dist = Val - PrevVal; + if (I == 1) { + DistToPrev = Dist; + } else if (Dist != DistToPrev) { + LinearMappingPossible = false; + break; + } + } + PrevVal = Val; + } + if (LinearMappingPossible) { + LinearOffset = cast<ConstantInt>(TableContents[0]); + LinearMultiplier = ConstantInt::get(M.getContext(), DistToPrev); + Kind = LinearMapKind; + ++NumLinearMaps; + return; + } + } + + // If the type is integer and the table fits in a register, build a bitmap. + if (WouldFitInRegister(DL, TableSize, ValueType)) { + IntegerType *IT = cast<IntegerType>(ValueType); + APInt TableInt(TableSize * IT->getBitWidth(), 0); + for (uint64_t I = TableSize; I > 0; --I) { + TableInt <<= IT->getBitWidth(); + // Insert values into the bitmap. Undef values are set to zero. + if (!isa<UndefValue>(TableContents[I - 1])) { + ConstantInt *Val = cast<ConstantInt>(TableContents[I - 1]); + TableInt |= Val->getValue().zext(TableInt.getBitWidth()); + } + } + BitMap = ConstantInt::get(M.getContext(), TableInt); + BitMapElementTy = IT; + Kind = BitMapKind; + ++NumBitMaps; + return; + } + + // Store the table in an array. + ArrayType *ArrayTy = ArrayType::get(ValueType, TableSize); + Constant *Initializer = ConstantArray::get(ArrayTy, TableContents); + + Array = new GlobalVariable(M, ArrayTy, /*isConstant=*/true, + GlobalVariable::PrivateLinkage, Initializer, + "switch.table." + FuncName); + Array->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); + // Set the alignment to that of an array items. We will be only loading one + // value out of it. + Array->setAlignment(DL.getPrefTypeAlign(ValueType)); + Kind = ArrayKind; +} + +Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) { + switch (Kind) { + case SingleValueKind: + return SingleValue; + case LinearMapKind: { + // Derive the result value from the input value. + Value *Result = Builder.CreateIntCast(Index, LinearMultiplier->getType(), + false, "switch.idx.cast"); + if (!LinearMultiplier->isOne()) + Result = Builder.CreateMul(Result, LinearMultiplier, "switch.idx.mult"); + if (!LinearOffset->isZero()) + Result = Builder.CreateAdd(Result, LinearOffset, "switch.offset"); + return Result; + } + case BitMapKind: { + // Type of the bitmap (e.g. i59). + IntegerType *MapTy = BitMap->getType(); + + // Cast Index to the same type as the bitmap. + // Note: The Index is <= the number of elements in the table, so + // truncating it to the width of the bitmask is safe. + Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast"); + + // Multiply the shift amount by the element width. + ShiftAmt = Builder.CreateMul( + ShiftAmt, ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()), + "switch.shiftamt"); + + // Shift down. + Value *DownShifted = + Builder.CreateLShr(BitMap, ShiftAmt, "switch.downshift"); + // Mask off. + return Builder.CreateTrunc(DownShifted, BitMapElementTy, "switch.masked"); + } + case ArrayKind: { + // Make sure the table index will not overflow when treated as signed. + IntegerType *IT = cast<IntegerType>(Index->getType()); + uint64_t TableSize = + Array->getInitializer()->getType()->getArrayNumElements(); + if (TableSize > (1ULL << std::min(IT->getBitWidth() - 1, 63u))) + Index = Builder.CreateZExt( + Index, IntegerType::get(IT->getContext(), IT->getBitWidth() + 1), + "switch.tableidx.zext"); + + Value *GEPIndices[] = {Builder.getInt32(0), Index}; + Value *GEP = Builder.CreateInBoundsGEP(Array->getValueType(), Array, + GEPIndices, "switch.gep"); + return Builder.CreateLoad( + cast<ArrayType>(Array->getValueType())->getElementType(), GEP, + "switch.load"); + } + } + llvm_unreachable("Unknown lookup table kind!"); +} + +bool SwitchLookupTable::WouldFitInRegister(const DataLayout &DL, + uint64_t TableSize, + Type *ElementType) { + auto *IT = dyn_cast<IntegerType>(ElementType); + if (!IT) + return false; + // FIXME: If the type is wider than it needs to be, e.g. i8 but all values + // are <= 15, we could try to narrow the type. + + // Avoid overflow, fitsInLegalInteger uses unsigned int for the width. + if (TableSize >= UINT_MAX / IT->getBitWidth()) + return false; + return DL.fitsInLegalInteger(TableSize * IT->getBitWidth()); +} + +static bool isTypeLegalForLookupTable(Type *Ty, const TargetTransformInfo &TTI, + const DataLayout &DL) { + // Allow any legal type. + if (TTI.isTypeLegal(Ty)) + return true; + + auto *IT = dyn_cast<IntegerType>(Ty); + if (!IT) + return false; + + // Also allow power of 2 integer types that have at least 8 bits and fit in + // a register. These types are common in frontend languages and targets + // usually support loads of these types. + // TODO: We could relax this to any integer that fits in a register and rely + // on ABI alignment and padding in the table to allow the load to be widened. + // Or we could widen the constants and truncate the load. + unsigned BitWidth = IT->getBitWidth(); + return BitWidth >= 8 && isPowerOf2_32(BitWidth) && + DL.fitsInLegalInteger(IT->getBitWidth()); +} + +static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange) { + // 40% is the default density for building a jump table in optsize/minsize + // mode. See also TargetLoweringBase::isSuitableForJumpTable(), which this + // function was based on. + const uint64_t MinDensity = 40; + + if (CaseRange >= UINT64_MAX / 100) + return false; // Avoid multiplication overflows below. + + return NumCases * 100 >= CaseRange * MinDensity; +} + +static bool isSwitchDense(ArrayRef<int64_t> Values) { + uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front(); + uint64_t Range = Diff + 1; + if (Range < Diff) + return false; // Overflow. + + return isSwitchDense(Values.size(), Range); +} + +/// Determine whether a lookup table should be built for this switch, based on +/// the number of cases, size of the table, and the types of the results. +// TODO: We could support larger than legal types by limiting based on the +// number of loads required and/or table size. If the constants are small we +// could use smaller table entries and extend after the load. +static bool +ShouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize, + const TargetTransformInfo &TTI, const DataLayout &DL, + const SmallDenseMap<PHINode *, Type *> &ResultTypes) { + if (SI->getNumCases() > TableSize) + return false; // TableSize overflowed. + + bool AllTablesFitInRegister = true; + bool HasIllegalType = false; + for (const auto &I : ResultTypes) { + Type *Ty = I.second; + + // Saturate this flag to true. + HasIllegalType = HasIllegalType || !isTypeLegalForLookupTable(Ty, TTI, DL); + + // Saturate this flag to false. + AllTablesFitInRegister = + AllTablesFitInRegister && + SwitchLookupTable::WouldFitInRegister(DL, TableSize, Ty); + + // If both flags saturate, we're done. NOTE: This *only* works with + // saturating flags, and all flags have to saturate first due to the + // non-deterministic behavior of iterating over a dense map. + if (HasIllegalType && !AllTablesFitInRegister) + break; + } + + // If each table would fit in a register, we should build it anyway. + if (AllTablesFitInRegister) + return true; + + // Don't build a table that doesn't fit in-register if it has illegal types. + if (HasIllegalType) + return false; + + return isSwitchDense(SI->getNumCases(), TableSize); +} + +static bool ShouldUseSwitchConditionAsTableIndex( + ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal, + bool HasDefaultResults, const SmallDenseMap<PHINode *, Type *> &ResultTypes, + const DataLayout &DL, const TargetTransformInfo &TTI) { + if (MinCaseVal.isNullValue()) + return true; + if (MinCaseVal.isNegative() || + MaxCaseVal.getLimitedValue() == std::numeric_limits<uint64_t>::max() || + !HasDefaultResults) + return false; + return all_of(ResultTypes, [&](const auto &KV) { + return SwitchLookupTable::WouldFitInRegister( + DL, MaxCaseVal.getLimitedValue() + 1 /* TableSize */, + KV.second /* ResultType */); + }); +} + +/// Try to reuse the switch table index compare. Following pattern: +/// \code +/// if (idx < tablesize) +/// r = table[idx]; // table does not contain default_value +/// else +/// r = default_value; +/// if (r != default_value) +/// ... +/// \endcode +/// Is optimized to: +/// \code +/// cond = idx < tablesize; +/// if (cond) +/// r = table[idx]; +/// else +/// r = default_value; +/// if (cond) +/// ... +/// \endcode +/// Jump threading will then eliminate the second if(cond). +static void reuseTableCompare( + User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch, + Constant *DefaultValue, + const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values) { + ICmpInst *CmpInst = dyn_cast<ICmpInst>(PhiUser); + if (!CmpInst) + return; + + // We require that the compare is in the same block as the phi so that jump + // threading can do its work afterwards. + if (CmpInst->getParent() != PhiBlock) + return; + + Constant *CmpOp1 = dyn_cast<Constant>(CmpInst->getOperand(1)); + if (!CmpOp1) + return; + + Value *RangeCmp = RangeCheckBranch->getCondition(); + Constant *TrueConst = ConstantInt::getTrue(RangeCmp->getType()); + Constant *FalseConst = ConstantInt::getFalse(RangeCmp->getType()); + + // Check if the compare with the default value is constant true or false. + Constant *DefaultConst = ConstantExpr::getICmp(CmpInst->getPredicate(), + DefaultValue, CmpOp1, true); + if (DefaultConst != TrueConst && DefaultConst != FalseConst) + return; + + // Check if the compare with the case values is distinct from the default + // compare result. + for (auto ValuePair : Values) { + Constant *CaseConst = ConstantExpr::getICmp(CmpInst->getPredicate(), + ValuePair.second, CmpOp1, true); + if (!CaseConst || CaseConst == DefaultConst || + (CaseConst != TrueConst && CaseConst != FalseConst)) + return; + } + + // Check if the branch instruction dominates the phi node. It's a simple + // dominance check, but sufficient for our needs. + // Although this check is invariant in the calling loops, it's better to do it + // at this late stage. Practically we do it at most once for a switch. + BasicBlock *BranchBlock = RangeCheckBranch->getParent(); + for (BasicBlock *Pred : predecessors(PhiBlock)) { + if (Pred != BranchBlock && Pred->getUniquePredecessor() != BranchBlock) + return; + } + + if (DefaultConst == FalseConst) { + // The compare yields the same result. We can replace it. + CmpInst->replaceAllUsesWith(RangeCmp); + ++NumTableCmpReuses; + } else { + // The compare yields the same result, just inverted. We can replace it. + Value *InvertedTableCmp = BinaryOperator::CreateXor( + RangeCmp, ConstantInt::get(RangeCmp->getType(), 1), "inverted.cmp", + RangeCheckBranch); + CmpInst->replaceAllUsesWith(InvertedTableCmp); + ++NumTableCmpReuses; + } +} + +/// If the switch is only used to initialize one or more phi nodes in a common +/// successor block with different constant values, replace the switch with +/// lookup tables. +static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, + DomTreeUpdater *DTU, const DataLayout &DL, + const TargetTransformInfo &TTI) { + assert(SI->getNumCases() > 1 && "Degenerate switch?"); + + BasicBlock *BB = SI->getParent(); + Function *Fn = BB->getParent(); + // Only build lookup table when we have a target that supports it or the + // attribute is not set. + if (!TTI.shouldBuildLookupTables() || + (Fn->getFnAttribute("no-jump-tables").getValueAsBool())) + return false; + + // FIXME: If the switch is too sparse for a lookup table, perhaps we could + // split off a dense part and build a lookup table for that. + + // FIXME: This creates arrays of GEPs to constant strings, which means each + // GEP needs a runtime relocation in PIC code. We should just build one big + // string and lookup indices into that. + + // Ignore switches with less than three cases. Lookup tables will not make + // them faster, so we don't analyze them. + if (SI->getNumCases() < 3) + return false; + + // Figure out the corresponding result for each case value and phi node in the + // common destination, as well as the min and max case values. + assert(!SI->cases().empty()); + SwitchInst::CaseIt CI = SI->case_begin(); + ConstantInt *MinCaseVal = CI->getCaseValue(); + ConstantInt *MaxCaseVal = CI->getCaseValue(); + + BasicBlock *CommonDest = nullptr; + + using ResultListTy = SmallVector<std::pair<ConstantInt *, Constant *>, 4>; + SmallDenseMap<PHINode *, ResultListTy> ResultLists; + + SmallDenseMap<PHINode *, Constant *> DefaultResults; + SmallDenseMap<PHINode *, Type *> ResultTypes; + SmallVector<PHINode *, 4> PHIs; + + for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) { + ConstantInt *CaseVal = CI->getCaseValue(); + if (CaseVal->getValue().slt(MinCaseVal->getValue())) + MinCaseVal = CaseVal; + if (CaseVal->getValue().sgt(MaxCaseVal->getValue())) + MaxCaseVal = CaseVal; + + // Resulting value at phi nodes for this case value. + using ResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>; + ResultsTy Results; + if (!getCaseResults(SI, CaseVal, CI->getCaseSuccessor(), &CommonDest, + Results, DL, TTI)) + return false; + + // Append the result from this case to the list for each phi. + for (const auto &I : Results) { + PHINode *PHI = I.first; + Constant *Value = I.second; + if (!ResultLists.count(PHI)) + PHIs.push_back(PHI); + ResultLists[PHI].push_back(std::make_pair(CaseVal, Value)); + } + } + + // Keep track of the result types. + for (PHINode *PHI : PHIs) { + ResultTypes[PHI] = ResultLists[PHI][0].second->getType(); + } + + uint64_t NumResults = ResultLists[PHIs[0]].size(); + + // If the table has holes, we need a constant result for the default case + // or a bitmask that fits in a register. + SmallVector<std::pair<PHINode *, Constant *>, 4> DefaultResultsList; + bool HasDefaultResults = + getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, + DefaultResultsList, DL, TTI); + + for (const auto &I : DefaultResultsList) { + PHINode *PHI = I.first; + Constant *Result = I.second; + DefaultResults[PHI] = Result; + } + + bool UseSwitchConditionAsTableIndex = ShouldUseSwitchConditionAsTableIndex( + *MinCaseVal, *MaxCaseVal, HasDefaultResults, ResultTypes, DL, TTI); + uint64_t TableSize; + if (UseSwitchConditionAsTableIndex) + TableSize = MaxCaseVal->getLimitedValue() + 1; + else + TableSize = + (MaxCaseVal->getValue() - MinCaseVal->getValue()).getLimitedValue() + 1; + + bool TableHasHoles = (NumResults < TableSize); + bool NeedMask = (TableHasHoles && !HasDefaultResults); + if (NeedMask) { + // As an extra penalty for the validity test we require more cases. + if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark). + return false; + if (!DL.fitsInLegalInteger(TableSize)) + return false; + } + + if (!ShouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes)) + return false; + + std::vector<DominatorTree::UpdateType> Updates; + + // Create the BB that does the lookups. + Module &Mod = *CommonDest->getParent()->getParent(); + BasicBlock *LookupBB = BasicBlock::Create( + Mod.getContext(), "switch.lookup", CommonDest->getParent(), CommonDest); + + // Compute the table index value. + Builder.SetInsertPoint(SI); + Value *TableIndex; + ConstantInt *TableIndexOffset; + if (UseSwitchConditionAsTableIndex) { + TableIndexOffset = ConstantInt::get(MaxCaseVal->getType(), 0); + TableIndex = SI->getCondition(); + } else { + TableIndexOffset = MinCaseVal; + TableIndex = + Builder.CreateSub(SI->getCondition(), TableIndexOffset, "switch.tableidx"); + } + + // Compute the maximum table size representable by the integer type we are + // switching upon. + unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits(); + uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX : 1ULL << CaseSize; + assert(MaxTableSize >= TableSize && + "It is impossible for a switch to have more entries than the max " + "representable value of its input integer type's size."); + + // If the default destination is unreachable, or if the lookup table covers + // all values of the conditional variable, branch directly to the lookup table + // BB. Otherwise, check that the condition is within the case range. + const bool DefaultIsReachable = + !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg()); + const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize); + BranchInst *RangeCheckBranch = nullptr; + + if (!DefaultIsReachable || GeneratingCoveredLookupTable) { + Builder.CreateBr(LookupBB); + if (DTU) + Updates.push_back({DominatorTree::Insert, BB, LookupBB}); + // Note: We call removeProdecessor later since we need to be able to get the + // PHI value for the default case in case we're using a bit mask. + } else { + Value *Cmp = Builder.CreateICmpULT( + TableIndex, ConstantInt::get(MinCaseVal->getType(), TableSize)); + RangeCheckBranch = + Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest()); + if (DTU) + Updates.push_back({DominatorTree::Insert, BB, LookupBB}); + } + + // Populate the BB that does the lookups. + Builder.SetInsertPoint(LookupBB); + + if (NeedMask) { + // Before doing the lookup, we do the hole check. The LookupBB is therefore + // re-purposed to do the hole check, and we create a new LookupBB. + BasicBlock *MaskBB = LookupBB; + MaskBB->setName("switch.hole_check"); + LookupBB = BasicBlock::Create(Mod.getContext(), "switch.lookup", + CommonDest->getParent(), CommonDest); + + // Make the mask's bitwidth at least 8-bit and a power-of-2 to avoid + // unnecessary illegal types. + uint64_t TableSizePowOf2 = NextPowerOf2(std::max(7ULL, TableSize - 1ULL)); + APInt MaskInt(TableSizePowOf2, 0); + APInt One(TableSizePowOf2, 1); + // Build bitmask; fill in a 1 bit for every case. + const ResultListTy &ResultList = ResultLists[PHIs[0]]; + for (size_t I = 0, E = ResultList.size(); I != E; ++I) { + uint64_t Idx = (ResultList[I].first->getValue() - TableIndexOffset->getValue()) + .getLimitedValue(); + MaskInt |= One << Idx; + } + ConstantInt *TableMask = ConstantInt::get(Mod.getContext(), MaskInt); + + // Get the TableIndex'th bit of the bitmask. + // If this bit is 0 (meaning hole) jump to the default destination, + // else continue with table lookup. + IntegerType *MapTy = TableMask->getType(); + Value *MaskIndex = + Builder.CreateZExtOrTrunc(TableIndex, MapTy, "switch.maskindex"); + Value *Shifted = Builder.CreateLShr(TableMask, MaskIndex, "switch.shifted"); + Value *LoBit = Builder.CreateTrunc( + Shifted, Type::getInt1Ty(Mod.getContext()), "switch.lobit"); + Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest()); + if (DTU) { + Updates.push_back({DominatorTree::Insert, MaskBB, LookupBB}); + Updates.push_back({DominatorTree::Insert, MaskBB, SI->getDefaultDest()}); + } + Builder.SetInsertPoint(LookupBB); + AddPredecessorToBlock(SI->getDefaultDest(), MaskBB, BB); + } + + if (!DefaultIsReachable || GeneratingCoveredLookupTable) { + // We cached PHINodes in PHIs. To avoid accessing deleted PHINodes later, + // do not delete PHINodes here. + SI->getDefaultDest()->removePredecessor(BB, + /*KeepOneInputPHIs=*/true); + if (DTU) + Updates.push_back({DominatorTree::Delete, BB, SI->getDefaultDest()}); + } + + for (PHINode *PHI : PHIs) { + const ResultListTy &ResultList = ResultLists[PHI]; + + // If using a bitmask, use any value to fill the lookup table holes. + Constant *DV = NeedMask ? ResultLists[PHI][0].second : DefaultResults[PHI]; + StringRef FuncName = Fn->getName(); + SwitchLookupTable Table(Mod, TableSize, TableIndexOffset, ResultList, DV, + DL, FuncName); + + Value *Result = Table.BuildLookup(TableIndex, Builder); + + // Do a small peephole optimization: re-use the switch table compare if + // possible. + if (!TableHasHoles && HasDefaultResults && RangeCheckBranch) { + BasicBlock *PhiBlock = PHI->getParent(); + // Search for compare instructions which use the phi. + for (auto *User : PHI->users()) { + reuseTableCompare(User, PhiBlock, RangeCheckBranch, DV, ResultList); + } + } + + PHI->addIncoming(Result, LookupBB); + } + + Builder.CreateBr(CommonDest); + if (DTU) + Updates.push_back({DominatorTree::Insert, LookupBB, CommonDest}); + + // Remove the switch. + SmallPtrSet<BasicBlock *, 8> RemovedSuccessors; + for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) { + BasicBlock *Succ = SI->getSuccessor(i); + + if (Succ == SI->getDefaultDest()) + continue; + Succ->removePredecessor(BB); + if (DTU && RemovedSuccessors.insert(Succ).second) + Updates.push_back({DominatorTree::Delete, BB, Succ}); + } + SI->eraseFromParent(); + + if (DTU) + DTU->applyUpdates(Updates); + + ++NumLookupTables; + if (NeedMask) + ++NumLookupTablesHoles; + return true; +} + +/// Try to transform a switch that has "holes" in it to a contiguous sequence +/// of cases. +/// +/// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be +/// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}. +/// +/// This converts a sparse switch into a dense switch which allows better +/// lowering and could also allow transforming into a lookup table. +static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, + const DataLayout &DL, + const TargetTransformInfo &TTI) { + auto *CondTy = cast<IntegerType>(SI->getCondition()->getType()); + if (CondTy->getIntegerBitWidth() > 64 || + !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth())) + return false; + // Only bother with this optimization if there are more than 3 switch cases; + // SDAG will only bother creating jump tables for 4 or more cases. + if (SI->getNumCases() < 4) + return false; + + // This transform is agnostic to the signedness of the input or case values. We + // can treat the case values as signed or unsigned. We can optimize more common + // cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values + // as signed. + SmallVector<int64_t,4> Values; + for (const auto &C : SI->cases()) + Values.push_back(C.getCaseValue()->getValue().getSExtValue()); + llvm::sort(Values); + + // If the switch is already dense, there's nothing useful to do here. + if (isSwitchDense(Values)) + return false; + + // First, transform the values such that they start at zero and ascend. + int64_t Base = Values[0]; + for (auto &V : Values) + V -= (uint64_t)(Base); + + // Now we have signed numbers that have been shifted so that, given enough + // precision, there are no negative values. Since the rest of the transform + // is bitwise only, we switch now to an unsigned representation. + + // This transform can be done speculatively because it is so cheap - it + // results in a single rotate operation being inserted. + // FIXME: It's possible that optimizing a switch on powers of two might also + // be beneficial - flag values are often powers of two and we could use a CLZ + // as the key function. + + // countTrailingZeros(0) returns 64. As Values is guaranteed to have more than + // one element and LLVM disallows duplicate cases, Shift is guaranteed to be + // less than 64. + unsigned Shift = 64; + for (auto &V : Values) + Shift = std::min(Shift, countTrailingZeros((uint64_t)V)); + assert(Shift < 64); + if (Shift > 0) + for (auto &V : Values) + V = (int64_t)((uint64_t)V >> Shift); + + if (!isSwitchDense(Values)) + // Transform didn't create a dense switch. + return false; + + // The obvious transform is to shift the switch condition right and emit a + // check that the condition actually cleanly divided by GCD, i.e. + // C & (1 << Shift - 1) == 0 + // inserting a new CFG edge to handle the case where it didn't divide cleanly. + // + // A cheaper way of doing this is a simple ROTR(C, Shift). This performs the + // shift and puts the shifted-off bits in the uppermost bits. If any of these + // are nonzero then the switch condition will be very large and will hit the + // default case. + + auto *Ty = cast<IntegerType>(SI->getCondition()->getType()); + Builder.SetInsertPoint(SI); + auto *ShiftC = ConstantInt::get(Ty, Shift); + auto *Sub = Builder.CreateSub(SI->getCondition(), ConstantInt::get(Ty, Base)); + auto *LShr = Builder.CreateLShr(Sub, ShiftC); + auto *Shl = Builder.CreateShl(Sub, Ty->getBitWidth() - Shift); + auto *Rot = Builder.CreateOr(LShr, Shl); + SI->replaceUsesOfWith(SI->getCondition(), Rot); + + for (auto Case : SI->cases()) { + auto *Orig = Case.getCaseValue(); + auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base); + Case.setValue( + cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(ShiftC->getValue())))); + } + return true; +} + +bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { + BasicBlock *BB = SI->getParent(); + + if (isValueEqualityComparison(SI)) { + // If we only have one predecessor, and if it is a branch on this value, + // see if that predecessor totally determines the outcome of this switch. + if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) + if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder)) + return requestResimplify(); + + Value *Cond = SI->getCondition(); + if (SelectInst *Select = dyn_cast<SelectInst>(Cond)) + if (SimplifySwitchOnSelect(SI, Select)) + return requestResimplify(); + + // If the block only contains the switch, see if we can fold the block + // away into any preds. + if (SI == &*BB->instructionsWithoutDebug(false).begin()) + if (FoldValueComparisonIntoPredecessors(SI, Builder)) + return requestResimplify(); + } + + // Try to transform the switch into an icmp and a branch. + // The conversion from switch to comparison may lose information on + // impossible switch values, so disable it early in the pipeline. + if (Options.ConvertSwitchRangeToICmp && TurnSwitchRangeIntoICmp(SI, Builder)) + return requestResimplify(); + + // Remove unreachable cases. + if (eliminateDeadSwitchCases(SI, DTU, Options.AC, DL)) + return requestResimplify(); + + if (trySwitchToSelect(SI, Builder, DTU, DL, TTI)) + return requestResimplify(); + + if (Options.ForwardSwitchCondToPhi && ForwardSwitchConditionToPHI(SI)) + return requestResimplify(); + + // The conversion from switch to lookup tables results in difficult-to-analyze + // code and makes pruning branches much harder. This is a problem if the + // switch expression itself can still be restricted as a result of inlining or + // CVP. Therefore, only apply this transformation during late stages of the + // optimisation pipeline. + if (Options.ConvertSwitchToLookupTable && + SwitchToLookupTable(SI, Builder, DTU, DL, TTI)) + return requestResimplify(); + + if (ReduceSwitchRange(SI, Builder, DL, TTI)) + return requestResimplify(); + + return false; +} + +bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) { + BasicBlock *BB = IBI->getParent(); + bool Changed = false; + + // Eliminate redundant destinations. + SmallPtrSet<Value *, 8> Succs; + SmallSetVector<BasicBlock *, 8> RemovedSuccs; + for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) { + BasicBlock *Dest = IBI->getDestination(i); + if (!Dest->hasAddressTaken() || !Succs.insert(Dest).second) { + if (!Dest->hasAddressTaken()) + RemovedSuccs.insert(Dest); + Dest->removePredecessor(BB); + IBI->removeDestination(i); + --i; + --e; + Changed = true; + } + } + + if (DTU) { + std::vector<DominatorTree::UpdateType> Updates; + Updates.reserve(RemovedSuccs.size()); + for (auto *RemovedSucc : RemovedSuccs) + Updates.push_back({DominatorTree::Delete, BB, RemovedSucc}); + DTU->applyUpdates(Updates); + } + + if (IBI->getNumDestinations() == 0) { + // If the indirectbr has no successors, change it to unreachable. + new UnreachableInst(IBI->getContext(), IBI); + EraseTerminatorAndDCECond(IBI); + return true; + } + + if (IBI->getNumDestinations() == 1) { + // If the indirectbr has one successor, change it to a direct branch. + BranchInst::Create(IBI->getDestination(0), IBI); + EraseTerminatorAndDCECond(IBI); + return true; + } + + if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) { + if (SimplifyIndirectBrOnSelect(IBI, SI)) + return requestResimplify(); + } + return Changed; +} + +/// Given an block with only a single landing pad and a unconditional branch +/// try to find another basic block which this one can be merged with. This +/// handles cases where we have multiple invokes with unique landing pads, but +/// a shared handler. +/// +/// We specifically choose to not worry about merging non-empty blocks +/// here. That is a PRE/scheduling problem and is best solved elsewhere. In +/// practice, the optimizer produces empty landing pad blocks quite frequently +/// when dealing with exception dense code. (see: instcombine, gvn, if-else +/// sinking in this file) +/// +/// This is primarily a code size optimization. We need to avoid performing +/// any transform which might inhibit optimization (such as our ability to +/// specialize a particular handler via tail commoning). We do this by not +/// merging any blocks which require us to introduce a phi. Since the same +/// values are flowing through both blocks, we don't lose any ability to +/// specialize. If anything, we make such specialization more likely. +/// +/// TODO - This transformation could remove entries from a phi in the target +/// block when the inputs in the phi are the same for the two blocks being +/// merged. In some cases, this could result in removal of the PHI entirely. +static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, + BasicBlock *BB, DomTreeUpdater *DTU) { + auto Succ = BB->getUniqueSuccessor(); + assert(Succ); + // If there's a phi in the successor block, we'd likely have to introduce + // a phi into the merged landing pad block. + if (isa<PHINode>(*Succ->begin())) + return false; + + for (BasicBlock *OtherPred : predecessors(Succ)) { + if (BB == OtherPred) + continue; + BasicBlock::iterator I = OtherPred->begin(); + LandingPadInst *LPad2 = dyn_cast<LandingPadInst>(I); + if (!LPad2 || !LPad2->isIdenticalTo(LPad)) + continue; + for (++I; isa<DbgInfoIntrinsic>(I); ++I) + ; + BranchInst *BI2 = dyn_cast<BranchInst>(I); + if (!BI2 || !BI2->isIdenticalTo(BI)) + continue; + + std::vector<DominatorTree::UpdateType> Updates; + + // We've found an identical block. Update our predecessors to take that + // path instead and make ourselves dead. + SmallSetVector<BasicBlock *, 16> UniquePreds(pred_begin(BB), pred_end(BB)); + for (BasicBlock *Pred : UniquePreds) { + InvokeInst *II = cast<InvokeInst>(Pred->getTerminator()); + assert(II->getNormalDest() != BB && II->getUnwindDest() == BB && + "unexpected successor"); + II->setUnwindDest(OtherPred); + if (DTU) { + Updates.push_back({DominatorTree::Insert, Pred, OtherPred}); + Updates.push_back({DominatorTree::Delete, Pred, BB}); + } + } + + // The debug info in OtherPred doesn't cover the merged control flow that + // used to go through BB. We need to delete it or update it. + for (Instruction &Inst : llvm::make_early_inc_range(*OtherPred)) + if (isa<DbgInfoIntrinsic>(Inst)) + Inst.eraseFromParent(); + + SmallSetVector<BasicBlock *, 16> UniqueSuccs(succ_begin(BB), succ_end(BB)); + for (BasicBlock *Succ : UniqueSuccs) { + Succ->removePredecessor(BB); + if (DTU) + Updates.push_back({DominatorTree::Delete, BB, Succ}); + } + + IRBuilder<> Builder(BI); + Builder.CreateUnreachable(); + BI->eraseFromParent(); + if (DTU) + DTU->applyUpdates(Updates); + return true; + } + return false; +} + +bool SimplifyCFGOpt::simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder) { + return Branch->isUnconditional() ? simplifyUncondBranch(Branch, Builder) + : simplifyCondBranch(Branch, Builder); +} + +bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI, + IRBuilder<> &Builder) { + BasicBlock *BB = BI->getParent(); + BasicBlock *Succ = BI->getSuccessor(0); + + // If the Terminator is the only non-phi instruction, simplify the block. + // If LoopHeader is provided, check if the block or its successor is a loop + // header. (This is for early invocations before loop simplify and + // vectorization to keep canonical loop forms for nested loops. These blocks + // can be eliminated when the pass is invoked later in the back-end.) + // Note that if BB has only one predecessor then we do not introduce new + // backedge, so we can eliminate BB. + bool NeedCanonicalLoop = + Options.NeedCanonicalLoop && + (!LoopHeaders.empty() && BB->hasNPredecessorsOrMore(2) && + (is_contained(LoopHeaders, BB) || is_contained(LoopHeaders, Succ))); + BasicBlock::iterator I = BB->getFirstNonPHIOrDbg(true)->getIterator(); + if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() && + !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB, DTU)) + return true; + + // If the only instruction in the block is a seteq/setne comparison against a + // constant, try to simplify the block. + if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) + if (ICI->isEquality() && isa<ConstantInt>(ICI->getOperand(1))) { + for (++I; isa<DbgInfoIntrinsic>(I); ++I) + ; + if (I->isTerminator() && + tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder)) + return true; + } + + // See if we can merge an empty landing pad block with another which is + // equivalent. + if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(I)) { + for (++I; isa<DbgInfoIntrinsic>(I); ++I) + ; + if (I->isTerminator() && TryToMergeLandingPad(LPad, BI, BB, DTU)) + return true; + } + + // If this basic block is ONLY a compare and a branch, and if a predecessor + // branches to us and our successor, fold the comparison into the + // predecessor and use logical operations to update the incoming value + // for PHI nodes in common successor. + if (FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI, + Options.BonusInstThreshold)) + return requestResimplify(); + return false; +} + +static BasicBlock *allPredecessorsComeFromSameSource(BasicBlock *BB) { + BasicBlock *PredPred = nullptr; + for (auto *P : predecessors(BB)) { + BasicBlock *PPred = P->getSinglePredecessor(); + if (!PPred || (PredPred && PredPred != PPred)) + return nullptr; + PredPred = PPred; + } + return PredPred; +} + +bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { + assert( + !isa<ConstantInt>(BI->getCondition()) && + BI->getSuccessor(0) != BI->getSuccessor(1) && + "Tautological conditional branch should have been eliminated already."); + + BasicBlock *BB = BI->getParent(); + if (!Options.SimplifyCondBranch) + return false; + + // Conditional branch + if (isValueEqualityComparison(BI)) { + // If we only have one predecessor, and if it is a branch on this value, + // see if that predecessor totally determines the outcome of this + // switch. + if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) + if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder)) + return requestResimplify(); + + // This block must be empty, except for the setcond inst, if it exists. + // Ignore dbg and pseudo intrinsics. + auto I = BB->instructionsWithoutDebug(true).begin(); + if (&*I == BI) { + if (FoldValueComparisonIntoPredecessors(BI, Builder)) + return requestResimplify(); + } else if (&*I == cast<Instruction>(BI->getCondition())) { + ++I; + if (&*I == BI && FoldValueComparisonIntoPredecessors(BI, Builder)) + return requestResimplify(); + } + } + + // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction. + if (SimplifyBranchOnICmpChain(BI, Builder, DL)) + return true; + + // If this basic block has dominating predecessor blocks and the dominating + // blocks' conditions imply BI's condition, we know the direction of BI. + std::optional<bool> Imp = isImpliedByDomCondition(BI->getCondition(), BI, DL); + if (Imp) { + // Turn this into a branch on constant. + auto *OldCond = BI->getCondition(); + ConstantInt *TorF = *Imp ? ConstantInt::getTrue(BB->getContext()) + : ConstantInt::getFalse(BB->getContext()); + BI->setCondition(TorF); + RecursivelyDeleteTriviallyDeadInstructions(OldCond); + return requestResimplify(); + } + + // If this basic block is ONLY a compare and a branch, and if a predecessor + // branches to us and one of our successors, fold the comparison into the + // predecessor and use logical operations to pick the right destination. + if (FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI, + Options.BonusInstThreshold)) + return requestResimplify(); + + // We have a conditional branch to two blocks that are only reachable + // from BI. We know that the condbr dominates the two blocks, so see if + // there is any identical code in the "then" and "else" blocks. If so, we + // can hoist it up to the branching block. + if (BI->getSuccessor(0)->getSinglePredecessor()) { + if (BI->getSuccessor(1)->getSinglePredecessor()) { + if (HoistCommon && + HoistThenElseCodeToIf(BI, TTI, !Options.HoistCommonInsts)) + return requestResimplify(); + } else { + // If Successor #1 has multiple preds, we may be able to conditionally + // execute Successor #0 if it branches to Successor #1. + Instruction *Succ0TI = BI->getSuccessor(0)->getTerminator(); + if (Succ0TI->getNumSuccessors() == 1 && + Succ0TI->getSuccessor(0) == BI->getSuccessor(1)) + if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0), TTI)) + return requestResimplify(); + } + } else if (BI->getSuccessor(1)->getSinglePredecessor()) { + // If Successor #0 has multiple preds, we may be able to conditionally + // execute Successor #1 if it branches to Successor #0. + Instruction *Succ1TI = BI->getSuccessor(1)->getTerminator(); + if (Succ1TI->getNumSuccessors() == 1 && + Succ1TI->getSuccessor(0) == BI->getSuccessor(0)) + if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1), TTI)) + return requestResimplify(); + } + + // If this is a branch on something for which we know the constant value in + // predecessors (e.g. a phi node in the current block), thread control + // through this block. + if (FoldCondBranchOnValueKnownInPredecessor(BI, DTU, DL, Options.AC)) + return requestResimplify(); + + // Scan predecessor blocks for conditional branches. + for (BasicBlock *Pred : predecessors(BB)) + if (BranchInst *PBI = dyn_cast<BranchInst>(Pred->getTerminator())) + if (PBI != BI && PBI->isConditional()) + if (SimplifyCondBranchToCondBranch(PBI, BI, DTU, DL, TTI)) + return requestResimplify(); + + // Look for diamond patterns. + if (MergeCondStores) + if (BasicBlock *PrevBB = allPredecessorsComeFromSameSource(BB)) + if (BranchInst *PBI = dyn_cast<BranchInst>(PrevBB->getTerminator())) + if (PBI != BI && PBI->isConditional()) + if (mergeConditionalStores(PBI, BI, DTU, DL, TTI)) + return requestResimplify(); + + return false; +} + +/// Check if passing a value to an instruction will cause undefined behavior. +static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified) { + Constant *C = dyn_cast<Constant>(V); + if (!C) + return false; + + if (I->use_empty()) + return false; + + if (C->isNullValue() || isa<UndefValue>(C)) { + // Only look at the first use, avoid hurting compile time with long uselists + auto *Use = cast<Instruction>(*I->user_begin()); + // Bail out if Use is not in the same BB as I or Use == I or Use comes + // before I in the block. The latter two can be the case if Use is a PHI + // node. + if (Use->getParent() != I->getParent() || Use == I || Use->comesBefore(I)) + return false; + + // Now make sure that there are no instructions in between that can alter + // control flow (eg. calls) + auto InstrRange = + make_range(std::next(I->getIterator()), Use->getIterator()); + if (any_of(InstrRange, [](Instruction &I) { + return !isGuaranteedToTransferExecutionToSuccessor(&I); + })) + return false; + + // Look through GEPs. A load from a GEP derived from NULL is still undefined + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Use)) + if (GEP->getPointerOperand() == I) { + if (!GEP->isInBounds() || !GEP->hasAllZeroIndices()) + PtrValueMayBeModified = true; + return passingValueIsAlwaysUndefined(V, GEP, PtrValueMayBeModified); + } + + // Look through bitcasts. + if (BitCastInst *BC = dyn_cast<BitCastInst>(Use)) + return passingValueIsAlwaysUndefined(V, BC, PtrValueMayBeModified); + + // Load from null is undefined. + if (LoadInst *LI = dyn_cast<LoadInst>(Use)) + if (!LI->isVolatile()) + return !NullPointerIsDefined(LI->getFunction(), + LI->getPointerAddressSpace()); + + // Store to null is undefined. + if (StoreInst *SI = dyn_cast<StoreInst>(Use)) + if (!SI->isVolatile()) + return (!NullPointerIsDefined(SI->getFunction(), + SI->getPointerAddressSpace())) && + SI->getPointerOperand() == I; + + if (auto *CB = dyn_cast<CallBase>(Use)) { + if (C->isNullValue() && NullPointerIsDefined(CB->getFunction())) + return false; + // A call to null is undefined. + if (CB->getCalledOperand() == I) + return true; + + if (C->isNullValue()) { + for (const llvm::Use &Arg : CB->args()) + if (Arg == I) { + unsigned ArgIdx = CB->getArgOperandNo(&Arg); + if (CB->isPassingUndefUB(ArgIdx) && + CB->paramHasAttr(ArgIdx, Attribute::NonNull)) { + // Passing null to a nonnnull+noundef argument is undefined. + return !PtrValueMayBeModified; + } + } + } else if (isa<UndefValue>(C)) { + // Passing undef to a noundef argument is undefined. + for (const llvm::Use &Arg : CB->args()) + if (Arg == I) { + unsigned ArgIdx = CB->getArgOperandNo(&Arg); + if (CB->isPassingUndefUB(ArgIdx)) { + // Passing undef to a noundef argument is undefined. + return true; + } + } + } + } + } + return false; +} + +/// If BB has an incoming value that will always trigger undefined behavior +/// (eg. null pointer dereference), remove the branch leading here. +static bool removeUndefIntroducingPredecessor(BasicBlock *BB, + DomTreeUpdater *DTU) { + for (PHINode &PHI : BB->phis()) + for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i) + if (passingValueIsAlwaysUndefined(PHI.getIncomingValue(i), &PHI)) { + BasicBlock *Predecessor = PHI.getIncomingBlock(i); + Instruction *T = Predecessor->getTerminator(); + IRBuilder<> Builder(T); + if (BranchInst *BI = dyn_cast<BranchInst>(T)) { + BB->removePredecessor(Predecessor); + // Turn unconditional branches into unreachables and remove the dead + // destination from conditional branches. + if (BI->isUnconditional()) + Builder.CreateUnreachable(); + else { + // Preserve guarding condition in assume, because it might not be + // inferrable from any dominating condition. + Value *Cond = BI->getCondition(); + if (BI->getSuccessor(0) == BB) + Builder.CreateAssumption(Builder.CreateNot(Cond)); + else + Builder.CreateAssumption(Cond); + Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1) + : BI->getSuccessor(0)); + } + BI->eraseFromParent(); + if (DTU) + DTU->applyUpdates({{DominatorTree::Delete, Predecessor, BB}}); + return true; + } else if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) { + // Redirect all branches leading to UB into + // a newly created unreachable block. + BasicBlock *Unreachable = BasicBlock::Create( + Predecessor->getContext(), "unreachable", BB->getParent(), BB); + Builder.SetInsertPoint(Unreachable); + // The new block contains only one instruction: Unreachable + Builder.CreateUnreachable(); + for (const auto &Case : SI->cases()) + if (Case.getCaseSuccessor() == BB) { + BB->removePredecessor(Predecessor); + Case.setSuccessor(Unreachable); + } + if (SI->getDefaultDest() == BB) { + BB->removePredecessor(Predecessor); + SI->setDefaultDest(Unreachable); + } + + if (DTU) + DTU->applyUpdates( + { { DominatorTree::Insert, Predecessor, Unreachable }, + { DominatorTree::Delete, Predecessor, BB } }); + return true; + } + } + + return false; +} + +bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) { + bool Changed = false; + + assert(BB && BB->getParent() && "Block not embedded in function!"); + assert(BB->getTerminator() && "Degenerate basic block encountered!"); + + // Remove basic blocks that have no predecessors (except the entry block)... + // or that just have themself as a predecessor. These are unreachable. + if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) || + BB->getSinglePredecessor() == BB) { + LLVM_DEBUG(dbgs() << "Removing BB: \n" << *BB); + DeleteDeadBlock(BB, DTU); + return true; + } + + // Check to see if we can constant propagate this terminator instruction + // away... + Changed |= ConstantFoldTerminator(BB, /*DeleteDeadConditions=*/true, + /*TLI=*/nullptr, DTU); + + // Check for and eliminate duplicate PHI nodes in this block. + Changed |= EliminateDuplicatePHINodes(BB); + + // Check for and remove branches that will always cause undefined behavior. + if (removeUndefIntroducingPredecessor(BB, DTU)) + return requestResimplify(); + + // Merge basic blocks into their predecessor if there is only one distinct + // pred, and if there is only one distinct successor of the predecessor, and + // if there are no PHI nodes. + if (MergeBlockIntoPredecessor(BB, DTU)) + return true; + + if (SinkCommon && Options.SinkCommonInsts) + if (SinkCommonCodeFromPredecessors(BB, DTU) || + MergeCompatibleInvokes(BB, DTU)) { + // SinkCommonCodeFromPredecessors() does not automatically CSE PHI's, + // so we may now how duplicate PHI's. + // Let's rerun EliminateDuplicatePHINodes() first, + // before FoldTwoEntryPHINode() potentially converts them into select's, + // after which we'd need a whole EarlyCSE pass run to cleanup them. + return true; + } + + IRBuilder<> Builder(BB); + + if (Options.FoldTwoEntryPHINode) { + // If there is a trivial two-entry PHI node in this basic block, and we can + // eliminate it, do so now. + if (auto *PN = dyn_cast<PHINode>(BB->begin())) + if (PN->getNumIncomingValues() == 2) + if (FoldTwoEntryPHINode(PN, TTI, DTU, DL)) + return true; + } + + Instruction *Terminator = BB->getTerminator(); + Builder.SetInsertPoint(Terminator); + switch (Terminator->getOpcode()) { + case Instruction::Br: + Changed |= simplifyBranch(cast<BranchInst>(Terminator), Builder); + break; + case Instruction::Resume: + Changed |= simplifyResume(cast<ResumeInst>(Terminator), Builder); + break; + case Instruction::CleanupRet: + Changed |= simplifyCleanupReturn(cast<CleanupReturnInst>(Terminator)); + break; + case Instruction::Switch: + Changed |= simplifySwitch(cast<SwitchInst>(Terminator), Builder); + break; + case Instruction::Unreachable: + Changed |= simplifyUnreachable(cast<UnreachableInst>(Terminator)); + break; + case Instruction::IndirectBr: + Changed |= simplifyIndirectBr(cast<IndirectBrInst>(Terminator)); + break; + } + + return Changed; +} + +bool SimplifyCFGOpt::run(BasicBlock *BB) { + bool Changed = false; + + // Repeated simplify BB as long as resimplification is requested. + do { + Resimplify = false; + + // Perform one round of simplifcation. Resimplify flag will be set if + // another iteration is requested. + Changed |= simplifyOnce(BB); + } while (Resimplify); + + return Changed; +} + +bool llvm::simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, + DomTreeUpdater *DTU, const SimplifyCFGOptions &Options, + ArrayRef<WeakVH> LoopHeaders) { + return SimplifyCFGOpt(TTI, DTU, BB->getModule()->getDataLayout(), LoopHeaders, + Options) + .run(BB); +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/SimplifyIndVar.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/SimplifyIndVar.cpp new file mode 100644 index 0000000000..4e83d2f6e3 --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -0,0 +1,2089 @@ +//===-- SimplifyIndVar.cpp - Induction variable simplification ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements induction variable simplification. It does +// not define any actual pass or policy, but provides a single function to +// simplify a loop's induction variables based on ScalarEvolution. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/SimplifyIndVar.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" + +using namespace llvm; + +#define DEBUG_TYPE "indvars" + +STATISTIC(NumElimIdentity, "Number of IV identities eliminated"); +STATISTIC(NumElimOperand, "Number of IV operands folded into a use"); +STATISTIC(NumFoldedUser, "Number of IV users folded into a constant"); +STATISTIC(NumElimRem , "Number of IV remainder operations eliminated"); +STATISTIC( + NumSimplifiedSDiv, + "Number of IV signed division operations converted to unsigned division"); +STATISTIC( + NumSimplifiedSRem, + "Number of IV signed remainder operations converted to unsigned remainder"); +STATISTIC(NumElimCmp , "Number of IV comparisons eliminated"); + +namespace { + /// This is a utility for simplifying induction variables + /// based on ScalarEvolution. It is the primary instrument of the + /// IndvarSimplify pass, but it may also be directly invoked to cleanup after + /// other loop passes that preserve SCEV. + class SimplifyIndvar { + Loop *L; + LoopInfo *LI; + ScalarEvolution *SE; + DominatorTree *DT; + const TargetTransformInfo *TTI; + SCEVExpander &Rewriter; + SmallVectorImpl<WeakTrackingVH> &DeadInsts; + + bool Changed = false; + + public: + SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, DominatorTree *DT, + LoopInfo *LI, const TargetTransformInfo *TTI, + SCEVExpander &Rewriter, + SmallVectorImpl<WeakTrackingVH> &Dead) + : L(Loop), LI(LI), SE(SE), DT(DT), TTI(TTI), Rewriter(Rewriter), + DeadInsts(Dead) { + assert(LI && "IV simplification requires LoopInfo"); + } + + bool hasChanged() const { return Changed; } + + /// Iteratively perform simplification on a worklist of users of the + /// specified induction variable. This is the top-level driver that applies + /// all simplifications to users of an IV. + void simplifyUsers(PHINode *CurrIV, IVVisitor *V = nullptr); + + Value *foldIVUser(Instruction *UseInst, Instruction *IVOperand); + + bool eliminateIdentitySCEV(Instruction *UseInst, Instruction *IVOperand); + bool replaceIVUserWithLoopInvariant(Instruction *UseInst); + bool replaceFloatIVWithIntegerIV(Instruction *UseInst); + + bool eliminateOverflowIntrinsic(WithOverflowInst *WO); + bool eliminateSaturatingIntrinsic(SaturatingInst *SI); + bool eliminateTrunc(TruncInst *TI); + bool eliminateIVUser(Instruction *UseInst, Instruction *IVOperand); + bool makeIVComparisonInvariant(ICmpInst *ICmp, Instruction *IVOperand); + void eliminateIVComparison(ICmpInst *ICmp, Instruction *IVOperand); + void simplifyIVRemainder(BinaryOperator *Rem, Instruction *IVOperand, + bool IsSigned); + void replaceRemWithNumerator(BinaryOperator *Rem); + void replaceRemWithNumeratorOrZero(BinaryOperator *Rem); + void replaceSRemWithURem(BinaryOperator *Rem); + bool eliminateSDiv(BinaryOperator *SDiv); + bool strengthenOverflowingOperation(BinaryOperator *OBO, + Instruction *IVOperand); + bool strengthenRightShift(BinaryOperator *BO, Instruction *IVOperand); + }; +} + +/// Find a point in code which dominates all given instructions. We can safely +/// assume that, whatever fact we can prove at the found point, this fact is +/// also true for each of the given instructions. +static Instruction *findCommonDominator(ArrayRef<Instruction *> Instructions, + DominatorTree &DT) { + Instruction *CommonDom = nullptr; + for (auto *Insn : Instructions) + CommonDom = + CommonDom ? DT.findNearestCommonDominator(CommonDom, Insn) : Insn; + assert(CommonDom && "Common dominator not found?"); + return CommonDom; +} + +/// Fold an IV operand into its use. This removes increments of an +/// aligned IV when used by a instruction that ignores the low bits. +/// +/// IVOperand is guaranteed SCEVable, but UseInst may not be. +/// +/// Return the operand of IVOperand for this induction variable if IVOperand can +/// be folded (in case more folding opportunities have been exposed). +/// Otherwise return null. +Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand) { + Value *IVSrc = nullptr; + const unsigned OperIdx = 0; + const SCEV *FoldedExpr = nullptr; + bool MustDropExactFlag = false; + switch (UseInst->getOpcode()) { + default: + return nullptr; + case Instruction::UDiv: + case Instruction::LShr: + // We're only interested in the case where we know something about + // the numerator and have a constant denominator. + if (IVOperand != UseInst->getOperand(OperIdx) || + !isa<ConstantInt>(UseInst->getOperand(1))) + return nullptr; + + // Attempt to fold a binary operator with constant operand. + // e.g. ((I + 1) >> 2) => I >> 2 + if (!isa<BinaryOperator>(IVOperand) + || !isa<ConstantInt>(IVOperand->getOperand(1))) + return nullptr; + + IVSrc = IVOperand->getOperand(0); + // IVSrc must be the (SCEVable) IV, since the other operand is const. + assert(SE->isSCEVable(IVSrc->getType()) && "Expect SCEVable IV operand"); + + ConstantInt *D = cast<ConstantInt>(UseInst->getOperand(1)); + if (UseInst->getOpcode() == Instruction::LShr) { + // Get a constant for the divisor. See createSCEV. + uint32_t BitWidth = cast<IntegerType>(UseInst->getType())->getBitWidth(); + if (D->getValue().uge(BitWidth)) + return nullptr; + + D = ConstantInt::get(UseInst->getContext(), + APInt::getOneBitSet(BitWidth, D->getZExtValue())); + } + const auto *LHS = SE->getSCEV(IVSrc); + const auto *RHS = SE->getSCEV(D); + FoldedExpr = SE->getUDivExpr(LHS, RHS); + // We might have 'exact' flag set at this point which will no longer be + // correct after we make the replacement. + if (UseInst->isExact() && LHS != SE->getMulExpr(FoldedExpr, RHS)) + MustDropExactFlag = true; + } + // We have something that might fold it's operand. Compare SCEVs. + if (!SE->isSCEVable(UseInst->getType())) + return nullptr; + + // Bypass the operand if SCEV can prove it has no effect. + if (SE->getSCEV(UseInst) != FoldedExpr) + return nullptr; + + LLVM_DEBUG(dbgs() << "INDVARS: Eliminated IV operand: " << *IVOperand + << " -> " << *UseInst << '\n'); + + UseInst->setOperand(OperIdx, IVSrc); + assert(SE->getSCEV(UseInst) == FoldedExpr && "bad SCEV with folded oper"); + + if (MustDropExactFlag) + UseInst->dropPoisonGeneratingFlags(); + + ++NumElimOperand; + Changed = true; + if (IVOperand->use_empty()) + DeadInsts.emplace_back(IVOperand); + return IVSrc; +} + +bool SimplifyIndvar::makeIVComparisonInvariant(ICmpInst *ICmp, + Instruction *IVOperand) { + auto *Preheader = L->getLoopPreheader(); + if (!Preheader) + return false; + unsigned IVOperIdx = 0; + ICmpInst::Predicate Pred = ICmp->getPredicate(); + if (IVOperand != ICmp->getOperand(0)) { + // Swapped + assert(IVOperand == ICmp->getOperand(1) && "Can't find IVOperand"); + IVOperIdx = 1; + Pred = ICmpInst::getSwappedPredicate(Pred); + } + + // Get the SCEVs for the ICmp operands (in the specific context of the + // current loop) + const Loop *ICmpLoop = LI->getLoopFor(ICmp->getParent()); + const SCEV *S = SE->getSCEVAtScope(ICmp->getOperand(IVOperIdx), ICmpLoop); + const SCEV *X = SE->getSCEVAtScope(ICmp->getOperand(1 - IVOperIdx), ICmpLoop); + auto LIP = SE->getLoopInvariantPredicate(Pred, S, X, L, ICmp); + if (!LIP) + return false; + ICmpInst::Predicate InvariantPredicate = LIP->Pred; + const SCEV *InvariantLHS = LIP->LHS; + const SCEV *InvariantRHS = LIP->RHS; + + // Do not generate something ridiculous. + auto *PHTerm = Preheader->getTerminator(); + if (Rewriter.isHighCostExpansion({ InvariantLHS, InvariantRHS }, L, + 2 * SCEVCheapExpansionBudget, TTI, PHTerm)) + return false; + auto *NewLHS = + Rewriter.expandCodeFor(InvariantLHS, IVOperand->getType(), PHTerm); + auto *NewRHS = + Rewriter.expandCodeFor(InvariantRHS, IVOperand->getType(), PHTerm); + LLVM_DEBUG(dbgs() << "INDVARS: Simplified comparison: " << *ICmp << '\n'); + ICmp->setPredicate(InvariantPredicate); + ICmp->setOperand(0, NewLHS); + ICmp->setOperand(1, NewRHS); + return true; +} + +/// SimplifyIVUsers helper for eliminating useless +/// comparisons against an induction variable. +void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, + Instruction *IVOperand) { + unsigned IVOperIdx = 0; + ICmpInst::Predicate Pred = ICmp->getPredicate(); + ICmpInst::Predicate OriginalPred = Pred; + if (IVOperand != ICmp->getOperand(0)) { + // Swapped + assert(IVOperand == ICmp->getOperand(1) && "Can't find IVOperand"); + IVOperIdx = 1; + Pred = ICmpInst::getSwappedPredicate(Pred); + } + + // Get the SCEVs for the ICmp operands (in the specific context of the + // current loop) + const Loop *ICmpLoop = LI->getLoopFor(ICmp->getParent()); + const SCEV *S = SE->getSCEVAtScope(ICmp->getOperand(IVOperIdx), ICmpLoop); + const SCEV *X = SE->getSCEVAtScope(ICmp->getOperand(1 - IVOperIdx), ICmpLoop); + + // If the condition is always true or always false in the given context, + // replace it with a constant value. + SmallVector<Instruction *, 4> Users; + for (auto *U : ICmp->users()) + Users.push_back(cast<Instruction>(U)); + const Instruction *CtxI = findCommonDominator(Users, *DT); + if (auto Ev = SE->evaluatePredicateAt(Pred, S, X, CtxI)) { + SE->forgetValue(ICmp); + ICmp->replaceAllUsesWith(ConstantInt::getBool(ICmp->getContext(), *Ev)); + DeadInsts.emplace_back(ICmp); + LLVM_DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n'); + } else if (makeIVComparisonInvariant(ICmp, IVOperand)) { + // fallthrough to end of function + } else if (ICmpInst::isSigned(OriginalPred) && + SE->isKnownNonNegative(S) && SE->isKnownNonNegative(X)) { + // If we were unable to make anything above, all we can is to canonicalize + // the comparison hoping that it will open the doors for other + // optimizations. If we find out that we compare two non-negative values, + // we turn the instruction's predicate to its unsigned version. Note that + // we cannot rely on Pred here unless we check if we have swapped it. + assert(ICmp->getPredicate() == OriginalPred && "Predicate changed?"); + LLVM_DEBUG(dbgs() << "INDVARS: Turn to unsigned comparison: " << *ICmp + << '\n'); + ICmp->setPredicate(ICmpInst::getUnsignedPredicate(OriginalPred)); + } else + return; + + ++NumElimCmp; + Changed = true; +} + +bool SimplifyIndvar::eliminateSDiv(BinaryOperator *SDiv) { + // Get the SCEVs for the ICmp operands. + auto *N = SE->getSCEV(SDiv->getOperand(0)); + auto *D = SE->getSCEV(SDiv->getOperand(1)); + + // Simplify unnecessary loops away. + const Loop *L = LI->getLoopFor(SDiv->getParent()); + N = SE->getSCEVAtScope(N, L); + D = SE->getSCEVAtScope(D, L); + + // Replace sdiv by udiv if both of the operands are non-negative + if (SE->isKnownNonNegative(N) && SE->isKnownNonNegative(D)) { + auto *UDiv = BinaryOperator::Create( + BinaryOperator::UDiv, SDiv->getOperand(0), SDiv->getOperand(1), + SDiv->getName() + ".udiv", SDiv); + UDiv->setIsExact(SDiv->isExact()); + SDiv->replaceAllUsesWith(UDiv); + LLVM_DEBUG(dbgs() << "INDVARS: Simplified sdiv: " << *SDiv << '\n'); + ++NumSimplifiedSDiv; + Changed = true; + DeadInsts.push_back(SDiv); + return true; + } + + return false; +} + +// i %s n -> i %u n if i >= 0 and n >= 0 +void SimplifyIndvar::replaceSRemWithURem(BinaryOperator *Rem) { + auto *N = Rem->getOperand(0), *D = Rem->getOperand(1); + auto *URem = BinaryOperator::Create(BinaryOperator::URem, N, D, + Rem->getName() + ".urem", Rem); + Rem->replaceAllUsesWith(URem); + LLVM_DEBUG(dbgs() << "INDVARS: Simplified srem: " << *Rem << '\n'); + ++NumSimplifiedSRem; + Changed = true; + DeadInsts.emplace_back(Rem); +} + +// i % n --> i if i is in [0,n). +void SimplifyIndvar::replaceRemWithNumerator(BinaryOperator *Rem) { + Rem->replaceAllUsesWith(Rem->getOperand(0)); + LLVM_DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n'); + ++NumElimRem; + Changed = true; + DeadInsts.emplace_back(Rem); +} + +// (i+1) % n --> (i+1)==n?0:(i+1) if i is in [0,n). +void SimplifyIndvar::replaceRemWithNumeratorOrZero(BinaryOperator *Rem) { + auto *T = Rem->getType(); + auto *N = Rem->getOperand(0), *D = Rem->getOperand(1); + ICmpInst *ICmp = new ICmpInst(Rem, ICmpInst::ICMP_EQ, N, D); + SelectInst *Sel = + SelectInst::Create(ICmp, ConstantInt::get(T, 0), N, "iv.rem", Rem); + Rem->replaceAllUsesWith(Sel); + LLVM_DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n'); + ++NumElimRem; + Changed = true; + DeadInsts.emplace_back(Rem); +} + +/// SimplifyIVUsers helper for eliminating useless remainder operations +/// operating on an induction variable or replacing srem by urem. +void SimplifyIndvar::simplifyIVRemainder(BinaryOperator *Rem, + Instruction *IVOperand, + bool IsSigned) { + auto *NValue = Rem->getOperand(0); + auto *DValue = Rem->getOperand(1); + // We're only interested in the case where we know something about + // the numerator, unless it is a srem, because we want to replace srem by urem + // in general. + bool UsedAsNumerator = IVOperand == NValue; + if (!UsedAsNumerator && !IsSigned) + return; + + const SCEV *N = SE->getSCEV(NValue); + + // Simplify unnecessary loops away. + const Loop *ICmpLoop = LI->getLoopFor(Rem->getParent()); + N = SE->getSCEVAtScope(N, ICmpLoop); + + bool IsNumeratorNonNegative = !IsSigned || SE->isKnownNonNegative(N); + + // Do not proceed if the Numerator may be negative + if (!IsNumeratorNonNegative) + return; + + const SCEV *D = SE->getSCEV(DValue); + D = SE->getSCEVAtScope(D, ICmpLoop); + + if (UsedAsNumerator) { + auto LT = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; + if (SE->isKnownPredicate(LT, N, D)) { + replaceRemWithNumerator(Rem); + return; + } + + auto *T = Rem->getType(); + const auto *NLessOne = SE->getMinusSCEV(N, SE->getOne(T)); + if (SE->isKnownPredicate(LT, NLessOne, D)) { + replaceRemWithNumeratorOrZero(Rem); + return; + } + } + + // Try to replace SRem with URem, if both N and D are known non-negative. + // Since we had already check N, we only need to check D now + if (!IsSigned || !SE->isKnownNonNegative(D)) + return; + + replaceSRemWithURem(Rem); +} + +bool SimplifyIndvar::eliminateOverflowIntrinsic(WithOverflowInst *WO) { + const SCEV *LHS = SE->getSCEV(WO->getLHS()); + const SCEV *RHS = SE->getSCEV(WO->getRHS()); + if (!SE->willNotOverflow(WO->getBinaryOp(), WO->isSigned(), LHS, RHS)) + return false; + + // Proved no overflow, nuke the overflow check and, if possible, the overflow + // intrinsic as well. + + BinaryOperator *NewResult = BinaryOperator::Create( + WO->getBinaryOp(), WO->getLHS(), WO->getRHS(), "", WO); + + if (WO->isSigned()) + NewResult->setHasNoSignedWrap(true); + else + NewResult->setHasNoUnsignedWrap(true); + + SmallVector<ExtractValueInst *, 4> ToDelete; + + for (auto *U : WO->users()) { + if (auto *EVI = dyn_cast<ExtractValueInst>(U)) { + if (EVI->getIndices()[0] == 1) + EVI->replaceAllUsesWith(ConstantInt::getFalse(WO->getContext())); + else { + assert(EVI->getIndices()[0] == 0 && "Only two possibilities!"); + EVI->replaceAllUsesWith(NewResult); + } + ToDelete.push_back(EVI); + } + } + + for (auto *EVI : ToDelete) + EVI->eraseFromParent(); + + if (WO->use_empty()) + WO->eraseFromParent(); + + Changed = true; + return true; +} + +bool SimplifyIndvar::eliminateSaturatingIntrinsic(SaturatingInst *SI) { + const SCEV *LHS = SE->getSCEV(SI->getLHS()); + const SCEV *RHS = SE->getSCEV(SI->getRHS()); + if (!SE->willNotOverflow(SI->getBinaryOp(), SI->isSigned(), LHS, RHS)) + return false; + + BinaryOperator *BO = BinaryOperator::Create( + SI->getBinaryOp(), SI->getLHS(), SI->getRHS(), SI->getName(), SI); + if (SI->isSigned()) + BO->setHasNoSignedWrap(); + else + BO->setHasNoUnsignedWrap(); + + SI->replaceAllUsesWith(BO); + DeadInsts.emplace_back(SI); + Changed = true; + return true; +} + +bool SimplifyIndvar::eliminateTrunc(TruncInst *TI) { + // It is always legal to replace + // icmp <pred> i32 trunc(iv), n + // with + // icmp <pred> i64 sext(trunc(iv)), sext(n), if pred is signed predicate. + // Or with + // icmp <pred> i64 zext(trunc(iv)), zext(n), if pred is unsigned predicate. + // Or with either of these if pred is an equality predicate. + // + // If we can prove that iv == sext(trunc(iv)) or iv == zext(trunc(iv)) for + // every comparison which uses trunc, it means that we can replace each of + // them with comparison of iv against sext/zext(n). We no longer need trunc + // after that. + // + // TODO: Should we do this if we can widen *some* comparisons, but not all + // of them? Sometimes it is enough to enable other optimizations, but the + // trunc instruction will stay in the loop. + Value *IV = TI->getOperand(0); + Type *IVTy = IV->getType(); + const SCEV *IVSCEV = SE->getSCEV(IV); + const SCEV *TISCEV = SE->getSCEV(TI); + + // Check if iv == zext(trunc(iv)) and if iv == sext(trunc(iv)). If so, we can + // get rid of trunc + bool DoesSExtCollapse = false; + bool DoesZExtCollapse = false; + if (IVSCEV == SE->getSignExtendExpr(TISCEV, IVTy)) + DoesSExtCollapse = true; + if (IVSCEV == SE->getZeroExtendExpr(TISCEV, IVTy)) + DoesZExtCollapse = true; + + // If neither sext nor zext does collapse, it is not profitable to do any + // transform. Bail. + if (!DoesSExtCollapse && !DoesZExtCollapse) + return false; + + // Collect users of the trunc that look like comparisons against invariants. + // Bail if we find something different. + SmallVector<ICmpInst *, 4> ICmpUsers; + for (auto *U : TI->users()) { + // We don't care about users in unreachable blocks. + if (isa<Instruction>(U) && + !DT->isReachableFromEntry(cast<Instruction>(U)->getParent())) + continue; + ICmpInst *ICI = dyn_cast<ICmpInst>(U); + if (!ICI) return false; + assert(L->contains(ICI->getParent()) && "LCSSA form broken?"); + if (!(ICI->getOperand(0) == TI && L->isLoopInvariant(ICI->getOperand(1))) && + !(ICI->getOperand(1) == TI && L->isLoopInvariant(ICI->getOperand(0)))) + return false; + // If we cannot get rid of trunc, bail. + if (ICI->isSigned() && !DoesSExtCollapse) + return false; + if (ICI->isUnsigned() && !DoesZExtCollapse) + return false; + // For equality, either signed or unsigned works. + ICmpUsers.push_back(ICI); + } + + auto CanUseZExt = [&](ICmpInst *ICI) { + // Unsigned comparison can be widened as unsigned. + if (ICI->isUnsigned()) + return true; + // Is it profitable to do zext? + if (!DoesZExtCollapse) + return false; + // For equality, we can safely zext both parts. + if (ICI->isEquality()) + return true; + // Otherwise we can only use zext when comparing two non-negative or two + // negative values. But in practice, we will never pass DoesZExtCollapse + // check for a negative value, because zext(trunc(x)) is non-negative. So + // it only make sense to check for non-negativity here. + const SCEV *SCEVOP1 = SE->getSCEV(ICI->getOperand(0)); + const SCEV *SCEVOP2 = SE->getSCEV(ICI->getOperand(1)); + return SE->isKnownNonNegative(SCEVOP1) && SE->isKnownNonNegative(SCEVOP2); + }; + // Replace all comparisons against trunc with comparisons against IV. + for (auto *ICI : ICmpUsers) { + bool IsSwapped = L->isLoopInvariant(ICI->getOperand(0)); + auto *Op1 = IsSwapped ? ICI->getOperand(0) : ICI->getOperand(1); + Instruction *Ext = nullptr; + // For signed/unsigned predicate, replace the old comparison with comparison + // of immediate IV against sext/zext of the invariant argument. If we can + // use either sext or zext (i.e. we are dealing with equality predicate), + // then prefer zext as a more canonical form. + // TODO: If we see a signed comparison which can be turned into unsigned, + // we can do it here for canonicalization purposes. + ICmpInst::Predicate Pred = ICI->getPredicate(); + if (IsSwapped) Pred = ICmpInst::getSwappedPredicate(Pred); + if (CanUseZExt(ICI)) { + assert(DoesZExtCollapse && "Unprofitable zext?"); + Ext = new ZExtInst(Op1, IVTy, "zext", ICI); + Pred = ICmpInst::getUnsignedPredicate(Pred); + } else { + assert(DoesSExtCollapse && "Unprofitable sext?"); + Ext = new SExtInst(Op1, IVTy, "sext", ICI); + assert(Pred == ICmpInst::getSignedPredicate(Pred) && "Must be signed!"); + } + bool Changed; + L->makeLoopInvariant(Ext, Changed); + (void)Changed; + ICmpInst *NewICI = new ICmpInst(ICI, Pred, IV, Ext); + ICI->replaceAllUsesWith(NewICI); + DeadInsts.emplace_back(ICI); + } + + // Trunc no longer needed. + TI->replaceAllUsesWith(PoisonValue::get(TI->getType())); + DeadInsts.emplace_back(TI); + return true; +} + +/// Eliminate an operation that consumes a simple IV and has no observable +/// side-effect given the range of IV values. IVOperand is guaranteed SCEVable, +/// but UseInst may not be. +bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst, + Instruction *IVOperand) { + if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) { + eliminateIVComparison(ICmp, IVOperand); + return true; + } + if (BinaryOperator *Bin = dyn_cast<BinaryOperator>(UseInst)) { + bool IsSRem = Bin->getOpcode() == Instruction::SRem; + if (IsSRem || Bin->getOpcode() == Instruction::URem) { + simplifyIVRemainder(Bin, IVOperand, IsSRem); + return true; + } + + if (Bin->getOpcode() == Instruction::SDiv) + return eliminateSDiv(Bin); + } + + if (auto *WO = dyn_cast<WithOverflowInst>(UseInst)) + if (eliminateOverflowIntrinsic(WO)) + return true; + + if (auto *SI = dyn_cast<SaturatingInst>(UseInst)) + if (eliminateSaturatingIntrinsic(SI)) + return true; + + if (auto *TI = dyn_cast<TruncInst>(UseInst)) + if (eliminateTrunc(TI)) + return true; + + if (eliminateIdentitySCEV(UseInst, IVOperand)) + return true; + + return false; +} + +static Instruction *GetLoopInvariantInsertPosition(Loop *L, Instruction *Hint) { + if (auto *BB = L->getLoopPreheader()) + return BB->getTerminator(); + + return Hint; +} + +/// Replace the UseInst with a loop invariant expression if it is safe. +bool SimplifyIndvar::replaceIVUserWithLoopInvariant(Instruction *I) { + if (!SE->isSCEVable(I->getType())) + return false; + + // Get the symbolic expression for this instruction. + const SCEV *S = SE->getSCEV(I); + + if (!SE->isLoopInvariant(S, L)) + return false; + + // Do not generate something ridiculous even if S is loop invariant. + if (Rewriter.isHighCostExpansion(S, L, SCEVCheapExpansionBudget, TTI, I)) + return false; + + auto *IP = GetLoopInvariantInsertPosition(L, I); + + if (!Rewriter.isSafeToExpandAt(S, IP)) { + LLVM_DEBUG(dbgs() << "INDVARS: Can not replace IV user: " << *I + << " with non-speculable loop invariant: " << *S << '\n'); + return false; + } + + auto *Invariant = Rewriter.expandCodeFor(S, I->getType(), IP); + + I->replaceAllUsesWith(Invariant); + LLVM_DEBUG(dbgs() << "INDVARS: Replace IV user: " << *I + << " with loop invariant: " << *S << '\n'); + ++NumFoldedUser; + Changed = true; + DeadInsts.emplace_back(I); + return true; +} + +/// Eliminate redundant type cast between integer and float. +bool SimplifyIndvar::replaceFloatIVWithIntegerIV(Instruction *UseInst) { + if (UseInst->getOpcode() != CastInst::SIToFP && + UseInst->getOpcode() != CastInst::UIToFP) + return false; + + Instruction *IVOperand = cast<Instruction>(UseInst->getOperand(0)); + // Get the symbolic expression for this instruction. + const SCEV *IV = SE->getSCEV(IVOperand); + unsigned MaskBits; + if (UseInst->getOpcode() == CastInst::SIToFP) + MaskBits = SE->getSignedRange(IV).getMinSignedBits(); + else + MaskBits = SE->getUnsignedRange(IV).getActiveBits(); + unsigned DestNumSigBits = UseInst->getType()->getFPMantissaWidth(); + if (MaskBits <= DestNumSigBits) { + for (User *U : UseInst->users()) { + // Match for fptosi/fptoui of sitofp and with same type. + auto *CI = dyn_cast<CastInst>(U); + if (!CI) + continue; + + CastInst::CastOps Opcode = CI->getOpcode(); + if (Opcode != CastInst::FPToSI && Opcode != CastInst::FPToUI) + continue; + + Value *Conv = nullptr; + if (IVOperand->getType() != CI->getType()) { + IRBuilder<> Builder(CI); + StringRef Name = IVOperand->getName(); + // To match InstCombine logic, we only need sext if both fptosi and + // sitofp are used. If one of them is unsigned, then we can use zext. + if (SE->getTypeSizeInBits(IVOperand->getType()) > + SE->getTypeSizeInBits(CI->getType())) { + Conv = Builder.CreateTrunc(IVOperand, CI->getType(), Name + ".trunc"); + } else if (Opcode == CastInst::FPToUI || + UseInst->getOpcode() == CastInst::UIToFP) { + Conv = Builder.CreateZExt(IVOperand, CI->getType(), Name + ".zext"); + } else { + Conv = Builder.CreateSExt(IVOperand, CI->getType(), Name + ".sext"); + } + } else + Conv = IVOperand; + + CI->replaceAllUsesWith(Conv); + DeadInsts.push_back(CI); + LLVM_DEBUG(dbgs() << "INDVARS: Replace IV user: " << *CI + << " with: " << *Conv << '\n'); + + ++NumFoldedUser; + Changed = true; + } + } + + return Changed; +} + +/// Eliminate any operation that SCEV can prove is an identity function. +bool SimplifyIndvar::eliminateIdentitySCEV(Instruction *UseInst, + Instruction *IVOperand) { + if (!SE->isSCEVable(UseInst->getType()) || + (UseInst->getType() != IVOperand->getType()) || + (SE->getSCEV(UseInst) != SE->getSCEV(IVOperand))) + return false; + + // getSCEV(X) == getSCEV(Y) does not guarantee that X and Y are related in the + // dominator tree, even if X is an operand to Y. For instance, in + // + // %iv = phi i32 {0,+,1} + // br %cond, label %left, label %merge + // + // left: + // %X = add i32 %iv, 0 + // br label %merge + // + // merge: + // %M = phi (%X, %iv) + // + // getSCEV(%M) == getSCEV(%X) == {0,+,1}, but %X does not dominate %M, and + // %M.replaceAllUsesWith(%X) would be incorrect. + + if (isa<PHINode>(UseInst)) + // If UseInst is not a PHI node then we know that IVOperand dominates + // UseInst directly from the legality of SSA. + if (!DT || !DT->dominates(IVOperand, UseInst)) + return false; + + if (!LI->replacementPreservesLCSSAForm(UseInst, IVOperand)) + return false; + + LLVM_DEBUG(dbgs() << "INDVARS: Eliminated identity: " << *UseInst << '\n'); + + SE->forgetValue(UseInst); + UseInst->replaceAllUsesWith(IVOperand); + ++NumElimIdentity; + Changed = true; + DeadInsts.emplace_back(UseInst); + return true; +} + +/// Annotate BO with nsw / nuw if it provably does not signed-overflow / +/// unsigned-overflow. Returns true if anything changed, false otherwise. +bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO, + Instruction *IVOperand) { + auto Flags = SE->getStrengthenedNoWrapFlagsFromBinOp( + cast<OverflowingBinaryOperator>(BO)); + + if (!Flags) + return false; + + BO->setHasNoUnsignedWrap(ScalarEvolution::maskFlags(*Flags, SCEV::FlagNUW) == + SCEV::FlagNUW); + BO->setHasNoSignedWrap(ScalarEvolution::maskFlags(*Flags, SCEV::FlagNSW) == + SCEV::FlagNSW); + + // The getStrengthenedNoWrapFlagsFromBinOp() check inferred additional nowrap + // flags on addrecs while performing zero/sign extensions. We could call + // forgetValue() here to make sure those flags also propagate to any other + // SCEV expressions based on the addrec. However, this can have pathological + // compile-time impact, see https://bugs.llvm.org/show_bug.cgi?id=50384. + return true; +} + +/// Annotate the Shr in (X << IVOperand) >> C as exact using the +/// information from the IV's range. Returns true if anything changed, false +/// otherwise. +bool SimplifyIndvar::strengthenRightShift(BinaryOperator *BO, + Instruction *IVOperand) { + using namespace llvm::PatternMatch; + + if (BO->getOpcode() == Instruction::Shl) { + bool Changed = false; + ConstantRange IVRange = SE->getUnsignedRange(SE->getSCEV(IVOperand)); + for (auto *U : BO->users()) { + const APInt *C; + if (match(U, + m_AShr(m_Shl(m_Value(), m_Specific(IVOperand)), m_APInt(C))) || + match(U, + m_LShr(m_Shl(m_Value(), m_Specific(IVOperand)), m_APInt(C)))) { + BinaryOperator *Shr = cast<BinaryOperator>(U); + if (!Shr->isExact() && IVRange.getUnsignedMin().uge(*C)) { + Shr->setIsExact(true); + Changed = true; + } + } + } + return Changed; + } + + return false; +} + +/// Add all uses of Def to the current IV's worklist. +static void pushIVUsers( + Instruction *Def, Loop *L, + SmallPtrSet<Instruction*,16> &Simplified, + SmallVectorImpl< std::pair<Instruction*,Instruction*> > &SimpleIVUsers) { + + for (User *U : Def->users()) { + Instruction *UI = cast<Instruction>(U); + + // Avoid infinite or exponential worklist processing. + // Also ensure unique worklist users. + // If Def is a LoopPhi, it may not be in the Simplified set, so check for + // self edges first. + if (UI == Def) + continue; + + // Only change the current Loop, do not change the other parts (e.g. other + // Loops). + if (!L->contains(UI)) + continue; + + // Do not push the same instruction more than once. + if (!Simplified.insert(UI).second) + continue; + + SimpleIVUsers.push_back(std::make_pair(UI, Def)); + } +} + +/// Return true if this instruction generates a simple SCEV +/// expression in terms of that IV. +/// +/// This is similar to IVUsers' isInteresting() but processes each instruction +/// non-recursively when the operand is already known to be a simpleIVUser. +/// +static bool isSimpleIVUser(Instruction *I, const Loop *L, ScalarEvolution *SE) { + if (!SE->isSCEVable(I->getType())) + return false; + + // Get the symbolic expression for this instruction. + const SCEV *S = SE->getSCEV(I); + + // Only consider affine recurrences. + const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S); + if (AR && AR->getLoop() == L) + return true; + + return false; +} + +/// Iteratively perform simplification on a worklist of users +/// of the specified induction variable. Each successive simplification may push +/// more users which may themselves be candidates for simplification. +/// +/// This algorithm does not require IVUsers analysis. Instead, it simplifies +/// instructions in-place during analysis. Rather than rewriting induction +/// variables bottom-up from their users, it transforms a chain of IVUsers +/// top-down, updating the IR only when it encounters a clear optimization +/// opportunity. +/// +/// Once DisableIVRewrite is default, LSR will be the only client of IVUsers. +/// +void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) { + if (!SE->isSCEVable(CurrIV->getType())) + return; + + // Instructions processed by SimplifyIndvar for CurrIV. + SmallPtrSet<Instruction*,16> Simplified; + + // Use-def pairs if IV users waiting to be processed for CurrIV. + SmallVector<std::pair<Instruction*, Instruction*>, 8> SimpleIVUsers; + + // Push users of the current LoopPhi. In rare cases, pushIVUsers may be + // called multiple times for the same LoopPhi. This is the proper thing to + // do for loop header phis that use each other. + pushIVUsers(CurrIV, L, Simplified, SimpleIVUsers); + + while (!SimpleIVUsers.empty()) { + std::pair<Instruction*, Instruction*> UseOper = + SimpleIVUsers.pop_back_val(); + Instruction *UseInst = UseOper.first; + + // If a user of the IndVar is trivially dead, we prefer just to mark it dead + // rather than try to do some complex analysis or transformation (such as + // widening) basing on it. + // TODO: Propagate TLI and pass it here to handle more cases. + if (isInstructionTriviallyDead(UseInst, /* TLI */ nullptr)) { + DeadInsts.emplace_back(UseInst); + continue; + } + + // Bypass back edges to avoid extra work. + if (UseInst == CurrIV) continue; + + // Try to replace UseInst with a loop invariant before any other + // simplifications. + if (replaceIVUserWithLoopInvariant(UseInst)) + continue; + + Instruction *IVOperand = UseOper.second; + for (unsigned N = 0; IVOperand; ++N) { + assert(N <= Simplified.size() && "runaway iteration"); + (void) N; + + Value *NewOper = foldIVUser(UseInst, IVOperand); + if (!NewOper) + break; // done folding + IVOperand = dyn_cast<Instruction>(NewOper); + } + if (!IVOperand) + continue; + + if (eliminateIVUser(UseInst, IVOperand)) { + pushIVUsers(IVOperand, L, Simplified, SimpleIVUsers); + continue; + } + + if (BinaryOperator *BO = dyn_cast<BinaryOperator>(UseInst)) { + if ((isa<OverflowingBinaryOperator>(BO) && + strengthenOverflowingOperation(BO, IVOperand)) || + (isa<ShlOperator>(BO) && strengthenRightShift(BO, IVOperand))) { + // re-queue uses of the now modified binary operator and fall + // through to the checks that remain. + pushIVUsers(IVOperand, L, Simplified, SimpleIVUsers); + } + } + + // Try to use integer induction for FPToSI of float induction directly. + if (replaceFloatIVWithIntegerIV(UseInst)) { + // Re-queue the potentially new direct uses of IVOperand. + pushIVUsers(IVOperand, L, Simplified, SimpleIVUsers); + continue; + } + + CastInst *Cast = dyn_cast<CastInst>(UseInst); + if (V && Cast) { + V->visitCast(Cast); + continue; + } + if (isSimpleIVUser(UseInst, L, SE)) { + pushIVUsers(UseInst, L, Simplified, SimpleIVUsers); + } + } +} + +namespace llvm { + +void IVVisitor::anchor() { } + +/// Simplify instructions that use this induction variable +/// by using ScalarEvolution to analyze the IV's recurrence. +bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT, + LoopInfo *LI, const TargetTransformInfo *TTI, + SmallVectorImpl<WeakTrackingVH> &Dead, + SCEVExpander &Rewriter, IVVisitor *V) { + SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, DT, LI, TTI, + Rewriter, Dead); + SIV.simplifyUsers(CurrIV, V); + return SIV.hasChanged(); +} + +/// Simplify users of induction variables within this +/// loop. This does not actually change or add IVs. +bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, DominatorTree *DT, + LoopInfo *LI, const TargetTransformInfo *TTI, + SmallVectorImpl<WeakTrackingVH> &Dead) { + SCEVExpander Rewriter(*SE, SE->getDataLayout(), "indvars"); +#ifndef NDEBUG + Rewriter.setDebugType(DEBUG_TYPE); +#endif + bool Changed = false; + for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { + Changed |= + simplifyUsersOfIV(cast<PHINode>(I), SE, DT, LI, TTI, Dead, Rewriter); + } + return Changed; +} + +} // namespace llvm + +namespace { +//===----------------------------------------------------------------------===// +// Widen Induction Variables - Extend the width of an IV to cover its +// widest uses. +//===----------------------------------------------------------------------===// + +class WidenIV { + // Parameters + PHINode *OrigPhi; + Type *WideType; + + // Context + LoopInfo *LI; + Loop *L; + ScalarEvolution *SE; + DominatorTree *DT; + + // Does the module have any calls to the llvm.experimental.guard intrinsic + // at all? If not we can avoid scanning instructions looking for guards. + bool HasGuards; + + bool UsePostIncrementRanges; + + // Statistics + unsigned NumElimExt = 0; + unsigned NumWidened = 0; + + // Result + PHINode *WidePhi = nullptr; + Instruction *WideInc = nullptr; + const SCEV *WideIncExpr = nullptr; + SmallVectorImpl<WeakTrackingVH> &DeadInsts; + + SmallPtrSet<Instruction *,16> Widened; + + enum class ExtendKind { Zero, Sign, Unknown }; + + // A map tracking the kind of extension used to widen each narrow IV + // and narrow IV user. + // Key: pointer to a narrow IV or IV user. + // Value: the kind of extension used to widen this Instruction. + DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap; + + using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>; + + // A map with control-dependent ranges for post increment IV uses. The key is + // a pair of IV def and a use of this def denoting the context. The value is + // a ConstantRange representing possible values of the def at the given + // context. + DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos; + + std::optional<ConstantRange> getPostIncRangeInfo(Value *Def, + Instruction *UseI) { + DefUserPair Key(Def, UseI); + auto It = PostIncRangeInfos.find(Key); + return It == PostIncRangeInfos.end() + ? std::optional<ConstantRange>(std::nullopt) + : std::optional<ConstantRange>(It->second); + } + + void calculatePostIncRanges(PHINode *OrigPhi); + void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser); + + void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) { + DefUserPair Key(Def, UseI); + auto It = PostIncRangeInfos.find(Key); + if (It == PostIncRangeInfos.end()) + PostIncRangeInfos.insert({Key, R}); + else + It->second = R.intersectWith(It->second); + } + +public: + /// Record a link in the Narrow IV def-use chain along with the WideIV that + /// computes the same value as the Narrow IV def. This avoids caching Use* + /// pointers. + struct NarrowIVDefUse { + Instruction *NarrowDef = nullptr; + Instruction *NarrowUse = nullptr; + Instruction *WideDef = nullptr; + + // True if the narrow def is never negative. Tracking this information lets + // us use a sign extension instead of a zero extension or vice versa, when + // profitable and legal. + bool NeverNegative = false; + + NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD, + bool NeverNegative) + : NarrowDef(ND), NarrowUse(NU), WideDef(WD), + NeverNegative(NeverNegative) {} + }; + + WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv, + DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI, + bool HasGuards, bool UsePostIncrementRanges = true); + + PHINode *createWideIV(SCEVExpander &Rewriter); + + unsigned getNumElimExt() { return NumElimExt; }; + unsigned getNumWidened() { return NumWidened; }; + +protected: + Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned, + Instruction *Use); + + Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR); + Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU, + const SCEVAddRecExpr *WideAR); + Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU); + + ExtendKind getExtendKind(Instruction *I); + + using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>; + + WidenedRecTy getWideRecurrence(NarrowIVDefUse DU); + + WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU); + + const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, + unsigned OpCode) const; + + Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter); + + bool widenLoopCompare(NarrowIVDefUse DU); + bool widenWithVariantUse(NarrowIVDefUse DU); + + void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef); + +private: + SmallVector<NarrowIVDefUse, 8> NarrowIVUsers; +}; +} // namespace + +/// Determine the insertion point for this user. By default, insert immediately +/// before the user. SCEVExpander or LICM will hoist loop invariants out of the +/// loop. For PHI nodes, there may be multiple uses, so compute the nearest +/// common dominator for the incoming blocks. A nullptr can be returned if no +/// viable location is found: it may happen if User is a PHI and Def only comes +/// to this PHI from unreachable blocks. +static Instruction *getInsertPointForUses(Instruction *User, Value *Def, + DominatorTree *DT, LoopInfo *LI) { + PHINode *PHI = dyn_cast<PHINode>(User); + if (!PHI) + return User; + + Instruction *InsertPt = nullptr; + for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) { + if (PHI->getIncomingValue(i) != Def) + continue; + + BasicBlock *InsertBB = PHI->getIncomingBlock(i); + + if (!DT->isReachableFromEntry(InsertBB)) + continue; + + if (!InsertPt) { + InsertPt = InsertBB->getTerminator(); + continue; + } + InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB); + InsertPt = InsertBB->getTerminator(); + } + + // If we have skipped all inputs, it means that Def only comes to Phi from + // unreachable blocks. + if (!InsertPt) + return nullptr; + + auto *DefI = dyn_cast<Instruction>(Def); + if (!DefI) + return InsertPt; + + assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses"); + + auto *L = LI->getLoopFor(DefI->getParent()); + assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent()))); + + for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom()) + if (LI->getLoopFor(DTN->getBlock()) == L) + return DTN->getBlock()->getTerminator(); + + llvm_unreachable("DefI dominates InsertPt!"); +} + +WidenIV::WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv, + DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI, + bool HasGuards, bool UsePostIncrementRanges) + : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo), + L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree), + HasGuards(HasGuards), UsePostIncrementRanges(UsePostIncrementRanges), + DeadInsts(DI) { + assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV"); + ExtendKindMap[OrigPhi] = WI.IsSigned ? ExtendKind::Sign : ExtendKind::Zero; +} + +Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType, + bool IsSigned, Instruction *Use) { + // Set the debug location and conservative insertion point. + IRBuilder<> Builder(Use); + // Hoist the insertion point into loop preheaders as far as possible. + for (const Loop *L = LI->getLoopFor(Use->getParent()); + L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper); + L = L->getParentLoop()) + Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator()); + + return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) : + Builder.CreateZExt(NarrowOper, WideType); +} + +/// Instantiate a wide operation to replace a narrow operation. This only needs +/// to handle operations that can evaluation to SCEVAddRec. It can safely return +/// 0 for any operation we decide not to clone. +Instruction *WidenIV::cloneIVUser(WidenIV::NarrowIVDefUse DU, + const SCEVAddRecExpr *WideAR) { + unsigned Opcode = DU.NarrowUse->getOpcode(); + switch (Opcode) { + default: + return nullptr; + case Instruction::Add: + case Instruction::Mul: + case Instruction::UDiv: + case Instruction::Sub: + return cloneArithmeticIVUser(DU, WideAR); + + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + return cloneBitwiseIVUser(DU); + } +} + +Instruction *WidenIV::cloneBitwiseIVUser(WidenIV::NarrowIVDefUse DU) { + Instruction *NarrowUse = DU.NarrowUse; + Instruction *NarrowDef = DU.NarrowDef; + Instruction *WideDef = DU.WideDef; + + LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n"); + + // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything + // about the narrow operand yet so must insert a [sz]ext. It is probably loop + // invariant and will be folded or hoisted. If it actually comes from a + // widened IV, it should be removed during a future call to widenIVUse. + bool IsSigned = getExtendKind(NarrowDef) == ExtendKind::Sign; + Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) + ? WideDef + : createExtendInst(NarrowUse->getOperand(0), WideType, + IsSigned, NarrowUse); + Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) + ? WideDef + : createExtendInst(NarrowUse->getOperand(1), WideType, + IsSigned, NarrowUse); + + auto *NarrowBO = cast<BinaryOperator>(NarrowUse); + auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, + NarrowBO->getName()); + IRBuilder<> Builder(NarrowUse); + Builder.Insert(WideBO); + WideBO->copyIRFlags(NarrowBO); + return WideBO; +} + +Instruction *WidenIV::cloneArithmeticIVUser(WidenIV::NarrowIVDefUse DU, + const SCEVAddRecExpr *WideAR) { + Instruction *NarrowUse = DU.NarrowUse; + Instruction *NarrowDef = DU.NarrowDef; + Instruction *WideDef = DU.WideDef; + + LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n"); + + unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1; + + // We're trying to find X such that + // + // Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X + // + // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef), + // and check using SCEV if any of them are correct. + + // Returns true if extending NonIVNarrowDef according to `SignExt` is a + // correct solution to X. + auto GuessNonIVOperand = [&](bool SignExt) { + const SCEV *WideLHS; + const SCEV *WideRHS; + + auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) { + if (SignExt) + return SE->getSignExtendExpr(S, Ty); + return SE->getZeroExtendExpr(S, Ty); + }; + + if (IVOpIdx == 0) { + WideLHS = SE->getSCEV(WideDef); + const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1)); + WideRHS = GetExtend(NarrowRHS, WideType); + } else { + const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0)); + WideLHS = GetExtend(NarrowLHS, WideType); + WideRHS = SE->getSCEV(WideDef); + } + + // WideUse is "WideDef `op.wide` X" as described in the comment. + const SCEV *WideUse = + getSCEVByOpCode(WideLHS, WideRHS, NarrowUse->getOpcode()); + + return WideUse == WideAR; + }; + + bool SignExtend = getExtendKind(NarrowDef) == ExtendKind::Sign; + if (!GuessNonIVOperand(SignExtend)) { + SignExtend = !SignExtend; + if (!GuessNonIVOperand(SignExtend)) + return nullptr; + } + + Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) + ? WideDef + : createExtendInst(NarrowUse->getOperand(0), WideType, + SignExtend, NarrowUse); + Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) + ? WideDef + : createExtendInst(NarrowUse->getOperand(1), WideType, + SignExtend, NarrowUse); + + auto *NarrowBO = cast<BinaryOperator>(NarrowUse); + auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, + NarrowBO->getName()); + + IRBuilder<> Builder(NarrowUse); + Builder.Insert(WideBO); + WideBO->copyIRFlags(NarrowBO); + return WideBO; +} + +WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) { + auto It = ExtendKindMap.find(I); + assert(It != ExtendKindMap.end() && "Instruction not yet extended!"); + return It->second; +} + +const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, + unsigned OpCode) const { + switch (OpCode) { + case Instruction::Add: + return SE->getAddExpr(LHS, RHS); + case Instruction::Sub: + return SE->getMinusSCEV(LHS, RHS); + case Instruction::Mul: + return SE->getMulExpr(LHS, RHS); + case Instruction::UDiv: + return SE->getUDivExpr(LHS, RHS); + default: + llvm_unreachable("Unsupported opcode."); + }; +} + +/// No-wrap operations can transfer sign extension of their result to their +/// operands. Generate the SCEV value for the widened operation without +/// actually modifying the IR yet. If the expression after extending the +/// operands is an AddRec for this loop, return the AddRec and the kind of +/// extension used. +WidenIV::WidenedRecTy +WidenIV::getExtendedOperandRecurrence(WidenIV::NarrowIVDefUse DU) { + // Handle the common case of add<nsw/nuw> + const unsigned OpCode = DU.NarrowUse->getOpcode(); + // Only Add/Sub/Mul instructions supported yet. + if (OpCode != Instruction::Add && OpCode != Instruction::Sub && + OpCode != Instruction::Mul) + return {nullptr, ExtendKind::Unknown}; + + // One operand (NarrowDef) has already been extended to WideDef. Now determine + // if extending the other will lead to a recurrence. + const unsigned ExtendOperIdx = + DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0; + assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU"); + + const SCEV *ExtendOperExpr = nullptr; + const OverflowingBinaryOperator *OBO = + cast<OverflowingBinaryOperator>(DU.NarrowUse); + ExtendKind ExtKind = getExtendKind(DU.NarrowDef); + if (ExtKind == ExtendKind::Sign && OBO->hasNoSignedWrap()) + ExtendOperExpr = SE->getSignExtendExpr( + SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); + else if (ExtKind == ExtendKind::Zero && OBO->hasNoUnsignedWrap()) + ExtendOperExpr = SE->getZeroExtendExpr( + SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); + else + return {nullptr, ExtendKind::Unknown}; + + // When creating this SCEV expr, don't apply the current operations NSW or NUW + // flags. This instruction may be guarded by control flow that the no-wrap + // behavior depends on. Non-control-equivalent instructions can be mapped to + // the same SCEV expression, and it would be incorrect to transfer NSW/NUW + // semantics to those operations. + const SCEV *lhs = SE->getSCEV(DU.WideDef); + const SCEV *rhs = ExtendOperExpr; + + // Let's swap operands to the initial order for the case of non-commutative + // operations, like SUB. See PR21014. + if (ExtendOperIdx == 0) + std::swap(lhs, rhs); + const SCEVAddRecExpr *AddRec = + dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode)); + + if (!AddRec || AddRec->getLoop() != L) + return {nullptr, ExtendKind::Unknown}; + + return {AddRec, ExtKind}; +} + +/// Is this instruction potentially interesting for further simplification after +/// widening it's type? In other words, can the extend be safely hoisted out of +/// the loop with SCEV reducing the value to a recurrence on the same loop. If +/// so, return the extended recurrence and the kind of extension used. Otherwise +/// return {nullptr, ExtendKind::Unknown}. +WidenIV::WidenedRecTy WidenIV::getWideRecurrence(WidenIV::NarrowIVDefUse DU) { + if (!DU.NarrowUse->getType()->isIntegerTy()) + return {nullptr, ExtendKind::Unknown}; + + const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse); + if (SE->getTypeSizeInBits(NarrowExpr->getType()) >= + SE->getTypeSizeInBits(WideType)) { + // NarrowUse implicitly widens its operand. e.g. a gep with a narrow + // index. So don't follow this use. + return {nullptr, ExtendKind::Unknown}; + } + + const SCEV *WideExpr; + ExtendKind ExtKind; + if (DU.NeverNegative) { + WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); + if (isa<SCEVAddRecExpr>(WideExpr)) + ExtKind = ExtendKind::Sign; + else { + WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); + ExtKind = ExtendKind::Zero; + } + } else if (getExtendKind(DU.NarrowDef) == ExtendKind::Sign) { + WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); + ExtKind = ExtendKind::Sign; + } else { + WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); + ExtKind = ExtendKind::Zero; + } + const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr); + if (!AddRec || AddRec->getLoop() != L) + return {nullptr, ExtendKind::Unknown}; + return {AddRec, ExtKind}; +} + +/// This IV user cannot be widened. Replace this use of the original narrow IV +/// with a truncation of the new wide IV to isolate and eliminate the narrow IV. +static void truncateIVUse(WidenIV::NarrowIVDefUse DU, DominatorTree *DT, + LoopInfo *LI) { + auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI); + if (!InsertPt) + return; + LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user " + << *DU.NarrowUse << "\n"); + IRBuilder<> Builder(InsertPt); + Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType()); + DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc); +} + +/// If the narrow use is a compare instruction, then widen the compare +// (and possibly the other operand). The extend operation is hoisted into the +// loop preheader as far as possible. +bool WidenIV::widenLoopCompare(WidenIV::NarrowIVDefUse DU) { + ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse); + if (!Cmp) + return false; + + // We can legally widen the comparison in the following two cases: + // + // - The signedness of the IV extension and comparison match + // + // - The narrow IV is always positive (and thus its sign extension is equal + // to its zero extension). For instance, let's say we're zero extending + // %narrow for the following use + // + // icmp slt i32 %narrow, %val ... (A) + // + // and %narrow is always positive. Then + // + // (A) == icmp slt i32 sext(%narrow), sext(%val) + // == icmp slt i32 zext(%narrow), sext(%val) + bool IsSigned = getExtendKind(DU.NarrowDef) == ExtendKind::Sign; + if (!(DU.NeverNegative || IsSigned == Cmp->isSigned())) + return false; + + Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0); + unsigned CastWidth = SE->getTypeSizeInBits(Op->getType()); + unsigned IVWidth = SE->getTypeSizeInBits(WideType); + assert(CastWidth <= IVWidth && "Unexpected width while widening compare."); + + // Widen the compare instruction. + auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI); + if (!InsertPt) + return false; + IRBuilder<> Builder(InsertPt); + DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); + + // Widen the other operand of the compare, if necessary. + if (CastWidth < IVWidth) { + Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp); + DU.NarrowUse->replaceUsesOfWith(Op, ExtOp); + } + return true; +} + +// The widenIVUse avoids generating trunc by evaluating the use as AddRec, this +// will not work when: +// 1) SCEV traces back to an instruction inside the loop that SCEV can not +// expand, eg. add %indvar, (load %addr) +// 2) SCEV finds a loop variant, eg. add %indvar, %loopvariant +// While SCEV fails to avoid trunc, we can still try to use instruction +// combining approach to prove trunc is not required. This can be further +// extended with other instruction combining checks, but for now we handle the +// following case (sub can be "add" and "mul", "nsw + sext" can be "nus + zext") +// +// Src: +// %c = sub nsw %b, %indvar +// %d = sext %c to i64 +// Dst: +// %indvar.ext1 = sext %indvar to i64 +// %m = sext %b to i64 +// %d = sub nsw i64 %m, %indvar.ext1 +// Therefore, as long as the result of add/sub/mul is extended to wide type, no +// trunc is required regardless of how %b is generated. This pattern is common +// when calculating address in 64 bit architecture +bool WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) { + Instruction *NarrowUse = DU.NarrowUse; + Instruction *NarrowDef = DU.NarrowDef; + Instruction *WideDef = DU.WideDef; + + // Handle the common case of add<nsw/nuw> + const unsigned OpCode = NarrowUse->getOpcode(); + // Only Add/Sub/Mul instructions are supported. + if (OpCode != Instruction::Add && OpCode != Instruction::Sub && + OpCode != Instruction::Mul) + return false; + + // The operand that is not defined by NarrowDef of DU. Let's call it the + // other operand. + assert((NarrowUse->getOperand(0) == NarrowDef || + NarrowUse->getOperand(1) == NarrowDef) && + "bad DU"); + + const OverflowingBinaryOperator *OBO = + cast<OverflowingBinaryOperator>(NarrowUse); + ExtendKind ExtKind = getExtendKind(NarrowDef); + bool CanSignExtend = ExtKind == ExtendKind::Sign && OBO->hasNoSignedWrap(); + bool CanZeroExtend = ExtKind == ExtendKind::Zero && OBO->hasNoUnsignedWrap(); + auto AnotherOpExtKind = ExtKind; + + // Check that all uses are either: + // - narrow def (in case of we are widening the IV increment); + // - single-input LCSSA Phis; + // - comparison of the chosen type; + // - extend of the chosen type (raison d'etre). + SmallVector<Instruction *, 4> ExtUsers; + SmallVector<PHINode *, 4> LCSSAPhiUsers; + SmallVector<ICmpInst *, 4> ICmpUsers; + for (Use &U : NarrowUse->uses()) { + Instruction *User = cast<Instruction>(U.getUser()); + if (User == NarrowDef) + continue; + if (!L->contains(User)) { + auto *LCSSAPhi = cast<PHINode>(User); + // Make sure there is only 1 input, so that we don't have to split + // critical edges. + if (LCSSAPhi->getNumOperands() != 1) + return false; + LCSSAPhiUsers.push_back(LCSSAPhi); + continue; + } + if (auto *ICmp = dyn_cast<ICmpInst>(User)) { + auto Pred = ICmp->getPredicate(); + // We have 3 types of predicates: signed, unsigned and equality + // predicates. For equality, it's legal to widen icmp for either sign and + // zero extend. For sign extend, we can also do so for signed predicates, + // likeweise for zero extend we can widen icmp for unsigned predicates. + if (ExtKind == ExtendKind::Zero && ICmpInst::isSigned(Pred)) + return false; + if (ExtKind == ExtendKind::Sign && ICmpInst::isUnsigned(Pred)) + return false; + ICmpUsers.push_back(ICmp); + continue; + } + if (ExtKind == ExtendKind::Sign) + User = dyn_cast<SExtInst>(User); + else + User = dyn_cast<ZExtInst>(User); + if (!User || User->getType() != WideType) + return false; + ExtUsers.push_back(User); + } + if (ExtUsers.empty()) { + DeadInsts.emplace_back(NarrowUse); + return true; + } + + // We'll prove some facts that should be true in the context of ext users. If + // there is no users, we are done now. If there are some, pick their common + // dominator as context. + const Instruction *CtxI = findCommonDominator(ExtUsers, *DT); + + if (!CanSignExtend && !CanZeroExtend) { + // Because InstCombine turns 'sub nuw' to 'add' losing the no-wrap flag, we + // will most likely not see it. Let's try to prove it. + if (OpCode != Instruction::Add) + return false; + if (ExtKind != ExtendKind::Zero) + return false; + const SCEV *LHS = SE->getSCEV(OBO->getOperand(0)); + const SCEV *RHS = SE->getSCEV(OBO->getOperand(1)); + // TODO: Support case for NarrowDef = NarrowUse->getOperand(1). + if (NarrowUse->getOperand(0) != NarrowDef) + return false; + if (!SE->isKnownNegative(RHS)) + return false; + bool ProvedSubNUW = SE->isKnownPredicateAt(ICmpInst::ICMP_UGE, LHS, + SE->getNegativeSCEV(RHS), CtxI); + if (!ProvedSubNUW) + return false; + // In fact, our 'add' is 'sub nuw'. We will need to widen the 2nd operand as + // neg(zext(neg(op))), which is basically sext(op). + AnotherOpExtKind = ExtendKind::Sign; + } + + // Verifying that Defining operand is an AddRec + const SCEV *Op1 = SE->getSCEV(WideDef); + const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1); + if (!AddRecOp1 || AddRecOp1->getLoop() != L) + return false; + + LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n"); + + // Generating a widening use instruction. + Value *LHS = + (NarrowUse->getOperand(0) == NarrowDef) + ? WideDef + : createExtendInst(NarrowUse->getOperand(0), WideType, + AnotherOpExtKind == ExtendKind::Sign, NarrowUse); + Value *RHS = + (NarrowUse->getOperand(1) == NarrowDef) + ? WideDef + : createExtendInst(NarrowUse->getOperand(1), WideType, + AnotherOpExtKind == ExtendKind::Sign, NarrowUse); + + auto *NarrowBO = cast<BinaryOperator>(NarrowUse); + auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, + NarrowBO->getName()); + IRBuilder<> Builder(NarrowUse); + Builder.Insert(WideBO); + WideBO->copyIRFlags(NarrowBO); + ExtendKindMap[NarrowUse] = ExtKind; + + for (Instruction *User : ExtUsers) { + assert(User->getType() == WideType && "Checked before!"); + LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by " + << *WideBO << "\n"); + ++NumElimExt; + User->replaceAllUsesWith(WideBO); + DeadInsts.emplace_back(User); + } + + for (PHINode *User : LCSSAPhiUsers) { + assert(User->getNumOperands() == 1 && "Checked before!"); + Builder.SetInsertPoint(User); + auto *WidePN = + Builder.CreatePHI(WideBO->getType(), 1, User->getName() + ".wide"); + BasicBlock *LoopExitingBlock = User->getParent()->getSinglePredecessor(); + assert(LoopExitingBlock && L->contains(LoopExitingBlock) && + "Not a LCSSA Phi?"); + WidePN->addIncoming(WideBO, LoopExitingBlock); + Builder.SetInsertPoint(&*User->getParent()->getFirstInsertionPt()); + auto *TruncPN = Builder.CreateTrunc(WidePN, User->getType()); + User->replaceAllUsesWith(TruncPN); + DeadInsts.emplace_back(User); + } + + for (ICmpInst *User : ICmpUsers) { + Builder.SetInsertPoint(User); + auto ExtendedOp = [&](Value * V)->Value * { + if (V == NarrowUse) + return WideBO; + if (ExtKind == ExtendKind::Zero) + return Builder.CreateZExt(V, WideBO->getType()); + else + return Builder.CreateSExt(V, WideBO->getType()); + }; + auto Pred = User->getPredicate(); + auto *LHS = ExtendedOp(User->getOperand(0)); + auto *RHS = ExtendedOp(User->getOperand(1)); + auto *WideCmp = + Builder.CreateICmp(Pred, LHS, RHS, User->getName() + ".wide"); + User->replaceAllUsesWith(WideCmp); + DeadInsts.emplace_back(User); + } + + return true; +} + +/// Determine whether an individual user of the narrow IV can be widened. If so, +/// return the wide clone of the user. +Instruction *WidenIV::widenIVUse(WidenIV::NarrowIVDefUse DU, SCEVExpander &Rewriter) { + assert(ExtendKindMap.count(DU.NarrowDef) && + "Should already know the kind of extension used to widen NarrowDef"); + + // Stop traversing the def-use chain at inner-loop phis or post-loop phis. + if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) { + if (LI->getLoopFor(UsePhi->getParent()) != L) { + // For LCSSA phis, sink the truncate outside the loop. + // After SimplifyCFG most loop exit targets have a single predecessor. + // Otherwise fall back to a truncate within the loop. + if (UsePhi->getNumOperands() != 1) + truncateIVUse(DU, DT, LI); + else { + // Widening the PHI requires us to insert a trunc. The logical place + // for this trunc is in the same BB as the PHI. This is not possible if + // the BB is terminated by a catchswitch. + if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator())) + return nullptr; + + PHINode *WidePhi = + PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide", + UsePhi); + WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0)); + IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt()); + Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType()); + UsePhi->replaceAllUsesWith(Trunc); + DeadInsts.emplace_back(UsePhi); + LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to " + << *WidePhi << "\n"); + } + return nullptr; + } + } + + // This narrow use can be widened by a sext if it's non-negative or its narrow + // def was widended by a sext. Same for zext. + auto canWidenBySExt = [&]() { + return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ExtendKind::Sign; + }; + auto canWidenByZExt = [&]() { + return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ExtendKind::Zero; + }; + + // Our raison d'etre! Eliminate sign and zero extension. + if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) || + (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) { + Value *NewDef = DU.WideDef; + if (DU.NarrowUse->getType() != WideType) { + unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType()); + unsigned IVWidth = SE->getTypeSizeInBits(WideType); + if (CastWidth < IVWidth) { + // The cast isn't as wide as the IV, so insert a Trunc. + IRBuilder<> Builder(DU.NarrowUse); + NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType()); + } + else { + // A wider extend was hidden behind a narrower one. This may induce + // another round of IV widening in which the intermediate IV becomes + // dead. It should be very rare. + LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi + << " not wide enough to subsume " << *DU.NarrowUse + << "\n"); + DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); + NewDef = DU.NarrowUse; + } + } + if (NewDef != DU.NarrowUse) { + LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse + << " replaced by " << *DU.WideDef << "\n"); + ++NumElimExt; + DU.NarrowUse->replaceAllUsesWith(NewDef); + DeadInsts.emplace_back(DU.NarrowUse); + } + // Now that the extend is gone, we want to expose it's uses for potential + // further simplification. We don't need to directly inform SimplifyIVUsers + // of the new users, because their parent IV will be processed later as a + // new loop phi. If we preserved IVUsers analysis, we would also want to + // push the uses of WideDef here. + + // No further widening is needed. The deceased [sz]ext had done it for us. + return nullptr; + } + + // Does this user itself evaluate to a recurrence after widening? + WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU); + if (!WideAddRec.first) + WideAddRec = getWideRecurrence(DU); + + assert((WideAddRec.first == nullptr) == + (WideAddRec.second == ExtendKind::Unknown)); + if (!WideAddRec.first) { + // If use is a loop condition, try to promote the condition instead of + // truncating the IV first. + if (widenLoopCompare(DU)) + return nullptr; + + // We are here about to generate a truncate instruction that may hurt + // performance because the scalar evolution expression computed earlier + // in WideAddRec.first does not indicate a polynomial induction expression. + // In that case, look at the operands of the use instruction to determine + // if we can still widen the use instead of truncating its operand. + if (widenWithVariantUse(DU)) + return nullptr; + + // This user does not evaluate to a recurrence after widening, so don't + // follow it. Instead insert a Trunc to kill off the original use, + // eventually isolating the original narrow IV so it can be removed. + truncateIVUse(DU, DT, LI); + return nullptr; + } + + // Reuse the IV increment that SCEVExpander created as long as it dominates + // NarrowUse. + Instruction *WideUse = nullptr; + if (WideAddRec.first == WideIncExpr && + Rewriter.hoistIVInc(WideInc, DU.NarrowUse)) + WideUse = WideInc; + else { + WideUse = cloneIVUser(DU, WideAddRec.first); + if (!WideUse) + return nullptr; + } + // Evaluation of WideAddRec ensured that the narrow expression could be + // extended outside the loop without overflow. This suggests that the wide use + // evaluates to the same expression as the extended narrow use, but doesn't + // absolutely guarantee it. Hence the following failsafe check. In rare cases + // where it fails, we simply throw away the newly created wide use. + if (WideAddRec.first != SE->getSCEV(WideUse)) { + LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": " + << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first + << "\n"); + DeadInsts.emplace_back(WideUse); + return nullptr; + } + + // if we reached this point then we are going to replace + // DU.NarrowUse with WideUse. Reattach DbgValue then. + replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT); + + ExtendKindMap[DU.NarrowUse] = WideAddRec.second; + // Returning WideUse pushes it on the worklist. + return WideUse; +} + +/// Add eligible users of NarrowDef to NarrowIVUsers. +void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) { + const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef); + bool NonNegativeDef = + SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV, + SE->getZero(NarrowSCEV->getType())); + for (User *U : NarrowDef->users()) { + Instruction *NarrowUser = cast<Instruction>(U); + + // Handle data flow merges and bizarre phi cycles. + if (!Widened.insert(NarrowUser).second) + continue; + + bool NonNegativeUse = false; + if (!NonNegativeDef) { + // We might have a control-dependent range information for this context. + if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser)) + NonNegativeUse = RangeInfo->getSignedMin().isNonNegative(); + } + + NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef, + NonNegativeDef || NonNegativeUse); + } +} + +/// Process a single induction variable. First use the SCEVExpander to create a +/// wide induction variable that evaluates to the same recurrence as the +/// original narrow IV. Then use a worklist to forward traverse the narrow IV's +/// def-use chain. After widenIVUse has processed all interesting IV users, the +/// narrow IV will be isolated for removal by DeleteDeadPHIs. +/// +/// It would be simpler to delete uses as they are processed, but we must avoid +/// invalidating SCEV expressions. +PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) { + // Is this phi an induction variable? + const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi)); + if (!AddRec) + return nullptr; + + // Widen the induction variable expression. + const SCEV *WideIVExpr = getExtendKind(OrigPhi) == ExtendKind::Sign + ? SE->getSignExtendExpr(AddRec, WideType) + : SE->getZeroExtendExpr(AddRec, WideType); + + assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType && + "Expect the new IV expression to preserve its type"); + + // Can the IV be extended outside the loop without overflow? + AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr); + if (!AddRec || AddRec->getLoop() != L) + return nullptr; + + // An AddRec must have loop-invariant operands. Since this AddRec is + // materialized by a loop header phi, the expression cannot have any post-loop + // operands, so they must dominate the loop header. + assert( + SE->properlyDominates(AddRec->getStart(), L->getHeader()) && + SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) && + "Loop header phi recurrence inputs do not dominate the loop"); + + // Iterate over IV uses (including transitive ones) looking for IV increments + // of the form 'add nsw %iv, <const>'. For each increment and each use of + // the increment calculate control-dependent range information basing on + // dominating conditions inside of the loop (e.g. a range check inside of the + // loop). Calculated ranges are stored in PostIncRangeInfos map. + // + // Control-dependent range information is later used to prove that a narrow + // definition is not negative (see pushNarrowIVUsers). It's difficult to do + // this on demand because when pushNarrowIVUsers needs this information some + // of the dominating conditions might be already widened. + if (UsePostIncrementRanges) + calculatePostIncRanges(OrigPhi); + + // The rewriter provides a value for the desired IV expression. This may + // either find an existing phi or materialize a new one. Either way, we + // expect a well-formed cyclic phi-with-increments. i.e. any operand not part + // of the phi-SCC dominates the loop entry. + Instruction *InsertPt = &*L->getHeader()->getFirstInsertionPt(); + Value *ExpandInst = Rewriter.expandCodeFor(AddRec, WideType, InsertPt); + // If the wide phi is not a phi node, for example a cast node, like bitcast, + // inttoptr, ptrtoint, just skip for now. + if (!(WidePhi = dyn_cast<PHINode>(ExpandInst))) { + // if the cast node is an inserted instruction without any user, we should + // remove it to make sure the pass don't touch the function as we can not + // wide the phi. + if (ExpandInst->hasNUses(0) && + Rewriter.isInsertedInstruction(cast<Instruction>(ExpandInst))) + DeadInsts.emplace_back(ExpandInst); + return nullptr; + } + + // Remembering the WideIV increment generated by SCEVExpander allows + // widenIVUse to reuse it when widening the narrow IV's increment. We don't + // employ a general reuse mechanism because the call above is the only call to + // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses. + if (BasicBlock *LatchBlock = L->getLoopLatch()) { + WideInc = + cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock)); + WideIncExpr = SE->getSCEV(WideInc); + // Propagate the debug location associated with the original loop increment + // to the new (widened) increment. + auto *OrigInc = + cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock)); + WideInc->setDebugLoc(OrigInc->getDebugLoc()); + } + + LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n"); + ++NumWidened; + + // Traverse the def-use chain using a worklist starting at the original IV. + assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" ); + + Widened.insert(OrigPhi); + pushNarrowIVUsers(OrigPhi, WidePhi); + + while (!NarrowIVUsers.empty()) { + WidenIV::NarrowIVDefUse DU = NarrowIVUsers.pop_back_val(); + + // Process a def-use edge. This may replace the use, so don't hold a + // use_iterator across it. + Instruction *WideUse = widenIVUse(DU, Rewriter); + + // Follow all def-use edges from the previous narrow use. + if (WideUse) + pushNarrowIVUsers(DU.NarrowUse, WideUse); + + // widenIVUse may have removed the def-use edge. + if (DU.NarrowDef->use_empty()) + DeadInsts.emplace_back(DU.NarrowDef); + } + + // Attach any debug information to the new PHI. + replaceAllDbgUsesWith(*OrigPhi, *WidePhi, *WidePhi, *DT); + + return WidePhi; +} + +/// Calculates control-dependent range for the given def at the given context +/// by looking at dominating conditions inside of the loop +void WidenIV::calculatePostIncRange(Instruction *NarrowDef, + Instruction *NarrowUser) { + using namespace llvm::PatternMatch; + + Value *NarrowDefLHS; + const APInt *NarrowDefRHS; + if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS), + m_APInt(NarrowDefRHS))) || + !NarrowDefRHS->isNonNegative()) + return; + + auto UpdateRangeFromCondition = [&] (Value *Condition, + bool TrueDest) { + CmpInst::Predicate Pred; + Value *CmpRHS; + if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS), + m_Value(CmpRHS)))) + return; + + CmpInst::Predicate P = + TrueDest ? Pred : CmpInst::getInversePredicate(Pred); + + auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS)); + auto CmpConstrainedLHSRange = + ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange); + auto NarrowDefRange = CmpConstrainedLHSRange.addWithNoWrap( + *NarrowDefRHS, OverflowingBinaryOperator::NoSignedWrap); + + updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange); + }; + + auto UpdateRangeFromGuards = [&](Instruction *Ctx) { + if (!HasGuards) + return; + + for (Instruction &I : make_range(Ctx->getIterator().getReverse(), + Ctx->getParent()->rend())) { + Value *C = nullptr; + if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C)))) + UpdateRangeFromCondition(C, /*TrueDest=*/true); + } + }; + + UpdateRangeFromGuards(NarrowUser); + + BasicBlock *NarrowUserBB = NarrowUser->getParent(); + // If NarrowUserBB is statically unreachable asking dominator queries may + // yield surprising results. (e.g. the block may not have a dom tree node) + if (!DT->isReachableFromEntry(NarrowUserBB)) + return; + + for (auto *DTB = (*DT)[NarrowUserBB]->getIDom(); + L->contains(DTB->getBlock()); + DTB = DTB->getIDom()) { + auto *BB = DTB->getBlock(); + auto *TI = BB->getTerminator(); + UpdateRangeFromGuards(TI); + + auto *BI = dyn_cast<BranchInst>(TI); + if (!BI || !BI->isConditional()) + continue; + + auto *TrueSuccessor = BI->getSuccessor(0); + auto *FalseSuccessor = BI->getSuccessor(1); + + auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) { + return BBE.isSingleEdge() && + DT->dominates(BBE, NarrowUser->getParent()); + }; + + if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor))) + UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true); + + if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor))) + UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false); + } +} + +/// Calculates PostIncRangeInfos map for the given IV +void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) { + SmallPtrSet<Instruction *, 16> Visited; + SmallVector<Instruction *, 6> Worklist; + Worklist.push_back(OrigPhi); + Visited.insert(OrigPhi); + + while (!Worklist.empty()) { + Instruction *NarrowDef = Worklist.pop_back_val(); + + for (Use &U : NarrowDef->uses()) { + auto *NarrowUser = cast<Instruction>(U.getUser()); + + // Don't go looking outside the current loop. + auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()]; + if (!NarrowUserLoop || !L->contains(NarrowUserLoop)) + continue; + + if (!Visited.insert(NarrowUser).second) + continue; + + Worklist.push_back(NarrowUser); + + calculatePostIncRange(NarrowDef, NarrowUser); + } + } +} + +PHINode *llvm::createWideIV(const WideIVInfo &WI, + LoopInfo *LI, ScalarEvolution *SE, SCEVExpander &Rewriter, + DominatorTree *DT, SmallVectorImpl<WeakTrackingVH> &DeadInsts, + unsigned &NumElimExt, unsigned &NumWidened, + bool HasGuards, bool UsePostIncrementRanges) { + WidenIV Widener(WI, LI, SE, DT, DeadInsts, HasGuards, UsePostIncrementRanges); + PHINode *WidePHI = Widener.createWideIV(Rewriter); + NumElimExt = Widener.getNumElimExt(); + NumWidened = Widener.getNumWidened(); + return WidePHI; +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/SimplifyLibCalls.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/SimplifyLibCalls.cpp new file mode 100644 index 0000000000..20f18322d4 --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -0,0 +1,4081 @@ +//===------ SimplifyLibCalls.cpp - Library calls simplifier ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the library calls simplifier. It does not implement +// any pass, but can't be used by other passes to do simplifications. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/SimplifyLibCalls.h" +#include "llvm/ADT/APSInt.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/KnownBits.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Transforms/Utils/BuildLibCalls.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/SizeOpts.h" + +#include <cmath> + +using namespace llvm; +using namespace PatternMatch; + +static cl::opt<bool> + EnableUnsafeFPShrink("enable-double-float-shrink", cl::Hidden, + cl::init(false), + cl::desc("Enable unsafe double to float " + "shrinking for math lib calls")); + +//===----------------------------------------------------------------------===// +// Helper Functions +//===----------------------------------------------------------------------===// + +static bool ignoreCallingConv(LibFunc Func) { + return Func == LibFunc_abs || Func == LibFunc_labs || + Func == LibFunc_llabs || Func == LibFunc_strlen; +} + +/// Return true if it is only used in equality comparisons with With. +static bool isOnlyUsedInEqualityComparison(Value *V, Value *With) { + for (User *U : V->users()) { + if (ICmpInst *IC = dyn_cast<ICmpInst>(U)) + if (IC->isEquality() && IC->getOperand(1) == With) + continue; + // Unknown instruction. + return false; + } + return true; +} + +static bool callHasFloatingPointArgument(const CallInst *CI) { + return any_of(CI->operands(), [](const Use &OI) { + return OI->getType()->isFloatingPointTy(); + }); +} + +static bool callHasFP128Argument(const CallInst *CI) { + return any_of(CI->operands(), [](const Use &OI) { + return OI->getType()->isFP128Ty(); + }); +} + +// Convert the entire string Str representing an integer in Base, up to +// the terminating nul if present, to a constant according to the rules +// of strtoul[l] or, when AsSigned is set, of strtol[l]. On success +// return the result, otherwise null. +// The function assumes the string is encoded in ASCII and carefully +// avoids converting sequences (including "") that the corresponding +// library call might fail and set errno for. +static Value *convertStrToInt(CallInst *CI, StringRef &Str, Value *EndPtr, + uint64_t Base, bool AsSigned, IRBuilderBase &B) { + if (Base < 2 || Base > 36) + if (Base != 0) + // Fail for an invalid base (required by POSIX). + return nullptr; + + // Current offset into the original string to reflect in EndPtr. + size_t Offset = 0; + // Strip leading whitespace. + for ( ; Offset != Str.size(); ++Offset) + if (!isSpace((unsigned char)Str[Offset])) { + Str = Str.substr(Offset); + break; + } + + if (Str.empty()) + // Fail for empty subject sequences (POSIX allows but doesn't require + // strtol[l]/strtoul[l] to fail with EINVAL). + return nullptr; + + // Strip but remember the sign. + bool Negate = Str[0] == '-'; + if (Str[0] == '-' || Str[0] == '+') { + Str = Str.drop_front(); + if (Str.empty()) + // Fail for a sign with nothing after it. + return nullptr; + ++Offset; + } + + // Set Max to the absolute value of the minimum (for signed), or + // to the maximum (for unsigned) value representable in the type. + Type *RetTy = CI->getType(); + unsigned NBits = RetTy->getPrimitiveSizeInBits(); + uint64_t Max = AsSigned && Negate ? 1 : 0; + Max += AsSigned ? maxIntN(NBits) : maxUIntN(NBits); + + // Autodetect Base if it's zero and consume the "0x" prefix. + if (Str.size() > 1) { + if (Str[0] == '0') { + if (toUpper((unsigned char)Str[1]) == 'X') { + if (Str.size() == 2 || (Base && Base != 16)) + // Fail if Base doesn't allow the "0x" prefix or for the prefix + // alone that implementations like BSD set errno to EINVAL for. + return nullptr; + + Str = Str.drop_front(2); + Offset += 2; + Base = 16; + } + else if (Base == 0) + Base = 8; + } else if (Base == 0) + Base = 10; + } + else if (Base == 0) + Base = 10; + + // Convert the rest of the subject sequence, not including the sign, + // to its uint64_t representation (this assumes the source character + // set is ASCII). + uint64_t Result = 0; + for (unsigned i = 0; i != Str.size(); ++i) { + unsigned char DigVal = Str[i]; + if (isDigit(DigVal)) + DigVal = DigVal - '0'; + else { + DigVal = toUpper(DigVal); + if (isAlpha(DigVal)) + DigVal = DigVal - 'A' + 10; + else + return nullptr; + } + + if (DigVal >= Base) + // Fail if the digit is not valid in the Base. + return nullptr; + + // Add the digit and fail if the result is not representable in + // the (unsigned form of the) destination type. + bool VFlow; + Result = SaturatingMultiplyAdd(Result, Base, (uint64_t)DigVal, &VFlow); + if (VFlow || Result > Max) + return nullptr; + } + + if (EndPtr) { + // Store the pointer to the end. + Value *Off = B.getInt64(Offset + Str.size()); + Value *StrBeg = CI->getArgOperand(0); + Value *StrEnd = B.CreateInBoundsGEP(B.getInt8Ty(), StrBeg, Off, "endptr"); + B.CreateStore(StrEnd, EndPtr); + } + + if (Negate) + // Unsigned negation doesn't overflow. + Result = -Result; + + return ConstantInt::get(RetTy, Result); +} + +static bool isOnlyUsedInComparisonWithZero(Value *V) { + for (User *U : V->users()) { + if (ICmpInst *IC = dyn_cast<ICmpInst>(U)) + if (Constant *C = dyn_cast<Constant>(IC->getOperand(1))) + if (C->isNullValue()) + continue; + // Unknown instruction. + return false; + } + return true; +} + +static bool canTransformToMemCmp(CallInst *CI, Value *Str, uint64_t Len, + const DataLayout &DL) { + if (!isOnlyUsedInComparisonWithZero(CI)) + return false; + + if (!isDereferenceableAndAlignedPointer(Str, Align(1), APInt(64, Len), DL)) + return false; + + if (CI->getFunction()->hasFnAttribute(Attribute::SanitizeMemory)) + return false; + + return true; +} + +static void annotateDereferenceableBytes(CallInst *CI, + ArrayRef<unsigned> ArgNos, + uint64_t DereferenceableBytes) { + const Function *F = CI->getCaller(); + if (!F) + return; + for (unsigned ArgNo : ArgNos) { + uint64_t DerefBytes = DereferenceableBytes; + unsigned AS = CI->getArgOperand(ArgNo)->getType()->getPointerAddressSpace(); + if (!llvm::NullPointerIsDefined(F, AS) || + CI->paramHasAttr(ArgNo, Attribute::NonNull)) + DerefBytes = std::max(CI->getParamDereferenceableOrNullBytes(ArgNo), + DereferenceableBytes); + + if (CI->getParamDereferenceableBytes(ArgNo) < DerefBytes) { + CI->removeParamAttr(ArgNo, Attribute::Dereferenceable); + if (!llvm::NullPointerIsDefined(F, AS) || + CI->paramHasAttr(ArgNo, Attribute::NonNull)) + CI->removeParamAttr(ArgNo, Attribute::DereferenceableOrNull); + CI->addParamAttr(ArgNo, Attribute::getWithDereferenceableBytes( + CI->getContext(), DerefBytes)); + } + } +} + +static void annotateNonNullNoUndefBasedOnAccess(CallInst *CI, + ArrayRef<unsigned> ArgNos) { + Function *F = CI->getCaller(); + if (!F) + return; + + for (unsigned ArgNo : ArgNos) { + if (!CI->paramHasAttr(ArgNo, Attribute::NoUndef)) + CI->addParamAttr(ArgNo, Attribute::NoUndef); + + if (!CI->paramHasAttr(ArgNo, Attribute::NonNull)) { + unsigned AS = + CI->getArgOperand(ArgNo)->getType()->getPointerAddressSpace(); + if (llvm::NullPointerIsDefined(F, AS)) + continue; + CI->addParamAttr(ArgNo, Attribute::NonNull); + } + + annotateDereferenceableBytes(CI, ArgNo, 1); + } +} + +static void annotateNonNullAndDereferenceable(CallInst *CI, ArrayRef<unsigned> ArgNos, + Value *Size, const DataLayout &DL) { + if (ConstantInt *LenC = dyn_cast<ConstantInt>(Size)) { + annotateNonNullNoUndefBasedOnAccess(CI, ArgNos); + annotateDereferenceableBytes(CI, ArgNos, LenC->getZExtValue()); + } else if (isKnownNonZero(Size, DL)) { + annotateNonNullNoUndefBasedOnAccess(CI, ArgNos); + const APInt *X, *Y; + uint64_t DerefMin = 1; + if (match(Size, m_Select(m_Value(), m_APInt(X), m_APInt(Y)))) { + DerefMin = std::min(X->getZExtValue(), Y->getZExtValue()); + annotateDereferenceableBytes(CI, ArgNos, DerefMin); + } + } +} + +// Copy CallInst "flags" like musttail, notail, and tail. Return New param for +// easier chaining. Calls to emit* and B.createCall should probably be wrapped +// in this function when New is created to replace Old. Callers should take +// care to check Old.isMustTailCall() if they aren't replacing Old directly +// with New. +static Value *copyFlags(const CallInst &Old, Value *New) { + assert(!Old.isMustTailCall() && "do not copy musttail call flags"); + assert(!Old.isNoTailCall() && "do not copy notail call flags"); + if (auto *NewCI = dyn_cast_or_null<CallInst>(New)) + NewCI->setTailCallKind(Old.getTailCallKind()); + return New; +} + +static Value *mergeAttributesAndFlags(CallInst *NewCI, const CallInst &Old) { + NewCI->setAttributes(AttributeList::get( + NewCI->getContext(), {NewCI->getAttributes(), Old.getAttributes()})); + NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType())); + return copyFlags(Old, NewCI); +} + +// Helper to avoid truncating the length if size_t is 32-bits. +static StringRef substr(StringRef Str, uint64_t Len) { + return Len >= Str.size() ? Str : Str.substr(0, Len); +} + +//===----------------------------------------------------------------------===// +// String and Memory Library Call Optimizations +//===----------------------------------------------------------------------===// + +Value *LibCallSimplifier::optimizeStrCat(CallInst *CI, IRBuilderBase &B) { + // Extract some information from the instruction + Value *Dst = CI->getArgOperand(0); + Value *Src = CI->getArgOperand(1); + annotateNonNullNoUndefBasedOnAccess(CI, {0, 1}); + + // See if we can get the length of the input string. + uint64_t Len = GetStringLength(Src); + if (Len) + annotateDereferenceableBytes(CI, 1, Len); + else + return nullptr; + --Len; // Unbias length. + + // Handle the simple, do-nothing case: strcat(x, "") -> x + if (Len == 0) + return Dst; + + return copyFlags(*CI, emitStrLenMemCpy(Src, Dst, Len, B)); +} + +Value *LibCallSimplifier::emitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len, + IRBuilderBase &B) { + // We need to find the end of the destination string. That's where the + // memory is to be moved to. We just generate a call to strlen. + Value *DstLen = emitStrLen(Dst, B, DL, TLI); + if (!DstLen) + return nullptr; + + // Now that we have the destination's length, we must index into the + // destination's pointer to get the actual memcpy destination (end of + // the string .. we're concatenating). + Value *CpyDst = B.CreateInBoundsGEP(B.getInt8Ty(), Dst, DstLen, "endptr"); + + // We have enough information to now generate the memcpy call to do the + // concatenation for us. Make a memcpy to copy the nul byte with align = 1. + B.CreateMemCpy( + CpyDst, Align(1), Src, Align(1), + ConstantInt::get(DL.getIntPtrType(Src->getContext()), Len + 1)); + return Dst; +} + +Value *LibCallSimplifier::optimizeStrNCat(CallInst *CI, IRBuilderBase &B) { + // Extract some information from the instruction. + Value *Dst = CI->getArgOperand(0); + Value *Src = CI->getArgOperand(1); + Value *Size = CI->getArgOperand(2); + uint64_t Len; + annotateNonNullNoUndefBasedOnAccess(CI, 0); + if (isKnownNonZero(Size, DL)) + annotateNonNullNoUndefBasedOnAccess(CI, 1); + + // We don't do anything if length is not constant. + ConstantInt *LengthArg = dyn_cast<ConstantInt>(Size); + if (LengthArg) { + Len = LengthArg->getZExtValue(); + // strncat(x, c, 0) -> x + if (!Len) + return Dst; + } else { + return nullptr; + } + + // See if we can get the length of the input string. + uint64_t SrcLen = GetStringLength(Src); + if (SrcLen) { + annotateDereferenceableBytes(CI, 1, SrcLen); + --SrcLen; // Unbias length. + } else { + return nullptr; + } + + // strncat(x, "", c) -> x + if (SrcLen == 0) + return Dst; + + // We don't optimize this case. + if (Len < SrcLen) + return nullptr; + + // strncat(x, s, c) -> strcat(x, s) + // s is constant so the strcat can be optimized further. + return copyFlags(*CI, emitStrLenMemCpy(Src, Dst, SrcLen, B)); +} + +// Helper to transform memchr(S, C, N) == S to N && *S == C and, when +// NBytes is null, strchr(S, C) to *S == C. A precondition of the function +// is that either S is dereferenceable or the value of N is nonzero. +static Value* memChrToCharCompare(CallInst *CI, Value *NBytes, + IRBuilderBase &B, const DataLayout &DL) +{ + Value *Src = CI->getArgOperand(0); + Value *CharVal = CI->getArgOperand(1); + + // Fold memchr(A, C, N) == A to N && *A == C. + Type *CharTy = B.getInt8Ty(); + Value *Char0 = B.CreateLoad(CharTy, Src); + CharVal = B.CreateTrunc(CharVal, CharTy); + Value *Cmp = B.CreateICmpEQ(Char0, CharVal, "char0cmp"); + + if (NBytes) { + Value *Zero = ConstantInt::get(NBytes->getType(), 0); + Value *And = B.CreateICmpNE(NBytes, Zero); + Cmp = B.CreateLogicalAnd(And, Cmp); + } + + Value *NullPtr = Constant::getNullValue(CI->getType()); + return B.CreateSelect(Cmp, Src, NullPtr); +} + +Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilderBase &B) { + Value *SrcStr = CI->getArgOperand(0); + Value *CharVal = CI->getArgOperand(1); + annotateNonNullNoUndefBasedOnAccess(CI, 0); + + if (isOnlyUsedInEqualityComparison(CI, SrcStr)) + return memChrToCharCompare(CI, nullptr, B, DL); + + // If the second operand is non-constant, see if we can compute the length + // of the input string and turn this into memchr. + ConstantInt *CharC = dyn_cast<ConstantInt>(CharVal); + if (!CharC) { + uint64_t Len = GetStringLength(SrcStr); + if (Len) + annotateDereferenceableBytes(CI, 0, Len); + else + return nullptr; + + Function *Callee = CI->getCalledFunction(); + FunctionType *FT = Callee->getFunctionType(); + unsigned IntBits = TLI->getIntSize(); + if (!FT->getParamType(1)->isIntegerTy(IntBits)) // memchr needs 'int'. + return nullptr; + + unsigned SizeTBits = TLI->getSizeTSize(*CI->getModule()); + Type *SizeTTy = IntegerType::get(CI->getContext(), SizeTBits); + return copyFlags(*CI, + emitMemChr(SrcStr, CharVal, // include nul. + ConstantInt::get(SizeTTy, Len), B, + DL, TLI)); + } + + if (CharC->isZero()) { + Value *NullPtr = Constant::getNullValue(CI->getType()); + if (isOnlyUsedInEqualityComparison(CI, NullPtr)) + // Pre-empt the transformation to strlen below and fold + // strchr(A, '\0') == null to false. + return B.CreateIntToPtr(B.getTrue(), CI->getType()); + } + + // Otherwise, the character is a constant, see if the first argument is + // a string literal. If so, we can constant fold. + StringRef Str; + if (!getConstantStringInfo(SrcStr, Str)) { + if (CharC->isZero()) // strchr(p, 0) -> p + strlen(p) + if (Value *StrLen = emitStrLen(SrcStr, B, DL, TLI)) + return B.CreateInBoundsGEP(B.getInt8Ty(), SrcStr, StrLen, "strchr"); + return nullptr; + } + + // Compute the offset, make sure to handle the case when we're searching for + // zero (a weird way to spell strlen). + size_t I = (0xFF & CharC->getSExtValue()) == 0 + ? Str.size() + : Str.find(CharC->getSExtValue()); + if (I == StringRef::npos) // Didn't find the char. strchr returns null. + return Constant::getNullValue(CI->getType()); + + // strchr(s+n,c) -> gep(s+n+i,c) + return B.CreateInBoundsGEP(B.getInt8Ty(), SrcStr, B.getInt64(I), "strchr"); +} + +Value *LibCallSimplifier::optimizeStrRChr(CallInst *CI, IRBuilderBase &B) { + Value *SrcStr = CI->getArgOperand(0); + Value *CharVal = CI->getArgOperand(1); + ConstantInt *CharC = dyn_cast<ConstantInt>(CharVal); + annotateNonNullNoUndefBasedOnAccess(CI, 0); + + StringRef Str; + if (!getConstantStringInfo(SrcStr, Str)) { + // strrchr(s, 0) -> strchr(s, 0) + if (CharC && CharC->isZero()) + return copyFlags(*CI, emitStrChr(SrcStr, '\0', B, TLI)); + return nullptr; + } + + unsigned SizeTBits = TLI->getSizeTSize(*CI->getModule()); + Type *SizeTTy = IntegerType::get(CI->getContext(), SizeTBits); + + // Try to expand strrchr to the memrchr nonstandard extension if it's + // available, or simply fail otherwise. + uint64_t NBytes = Str.size() + 1; // Include the terminating nul. + Value *Size = ConstantInt::get(SizeTTy, NBytes); + return copyFlags(*CI, emitMemRChr(SrcStr, CharVal, Size, B, DL, TLI)); +} + +Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilderBase &B) { + Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1); + if (Str1P == Str2P) // strcmp(x,x) -> 0 + return ConstantInt::get(CI->getType(), 0); + + StringRef Str1, Str2; + bool HasStr1 = getConstantStringInfo(Str1P, Str1); + bool HasStr2 = getConstantStringInfo(Str2P, Str2); + + // strcmp(x, y) -> cnst (if both x and y are constant strings) + if (HasStr1 && HasStr2) + return ConstantInt::get(CI->getType(), + std::clamp(Str1.compare(Str2), -1, 1)); + + if (HasStr1 && Str1.empty()) // strcmp("", x) -> -*x + return B.CreateNeg(B.CreateZExt( + B.CreateLoad(B.getInt8Ty(), Str2P, "strcmpload"), CI->getType())); + + if (HasStr2 && Str2.empty()) // strcmp(x,"") -> *x + return B.CreateZExt(B.CreateLoad(B.getInt8Ty(), Str1P, "strcmpload"), + CI->getType()); + + // strcmp(P, "x") -> memcmp(P, "x", 2) + uint64_t Len1 = GetStringLength(Str1P); + if (Len1) + annotateDereferenceableBytes(CI, 0, Len1); + uint64_t Len2 = GetStringLength(Str2P); + if (Len2) + annotateDereferenceableBytes(CI, 1, Len2); + + if (Len1 && Len2) { + return copyFlags( + *CI, emitMemCmp(Str1P, Str2P, + ConstantInt::get(DL.getIntPtrType(CI->getContext()), + std::min(Len1, Len2)), + B, DL, TLI)); + } + + // strcmp to memcmp + if (!HasStr1 && HasStr2) { + if (canTransformToMemCmp(CI, Str1P, Len2, DL)) + return copyFlags( + *CI, + emitMemCmp(Str1P, Str2P, + ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len2), + B, DL, TLI)); + } else if (HasStr1 && !HasStr2) { + if (canTransformToMemCmp(CI, Str2P, Len1, DL)) + return copyFlags( + *CI, + emitMemCmp(Str1P, Str2P, + ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len1), + B, DL, TLI)); + } + + annotateNonNullNoUndefBasedOnAccess(CI, {0, 1}); + return nullptr; +} + +// Optimize a memcmp or, when StrNCmp is true, strncmp call CI with constant +// arrays LHS and RHS and nonconstant Size. +static Value *optimizeMemCmpVarSize(CallInst *CI, Value *LHS, Value *RHS, + Value *Size, bool StrNCmp, + IRBuilderBase &B, const DataLayout &DL); + +Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilderBase &B) { + Value *Str1P = CI->getArgOperand(0); + Value *Str2P = CI->getArgOperand(1); + Value *Size = CI->getArgOperand(2); + if (Str1P == Str2P) // strncmp(x,x,n) -> 0 + return ConstantInt::get(CI->getType(), 0); + + if (isKnownNonZero(Size, DL)) + annotateNonNullNoUndefBasedOnAccess(CI, {0, 1}); + // Get the length argument if it is constant. + uint64_t Length; + if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(Size)) + Length = LengthArg->getZExtValue(); + else + return optimizeMemCmpVarSize(CI, Str1P, Str2P, Size, true, B, DL); + + if (Length == 0) // strncmp(x,y,0) -> 0 + return ConstantInt::get(CI->getType(), 0); + + if (Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1) + return copyFlags(*CI, emitMemCmp(Str1P, Str2P, Size, B, DL, TLI)); + + StringRef Str1, Str2; + bool HasStr1 = getConstantStringInfo(Str1P, Str1); + bool HasStr2 = getConstantStringInfo(Str2P, Str2); + + // strncmp(x, y) -> cnst (if both x and y are constant strings) + if (HasStr1 && HasStr2) { + // Avoid truncating the 64-bit Length to 32 bits in ILP32. + StringRef SubStr1 = substr(Str1, Length); + StringRef SubStr2 = substr(Str2, Length); + return ConstantInt::get(CI->getType(), + std::clamp(SubStr1.compare(SubStr2), -1, 1)); + } + + if (HasStr1 && Str1.empty()) // strncmp("", x, n) -> -*x + return B.CreateNeg(B.CreateZExt( + B.CreateLoad(B.getInt8Ty(), Str2P, "strcmpload"), CI->getType())); + + if (HasStr2 && Str2.empty()) // strncmp(x, "", n) -> *x + return B.CreateZExt(B.CreateLoad(B.getInt8Ty(), Str1P, "strcmpload"), + CI->getType()); + + uint64_t Len1 = GetStringLength(Str1P); + if (Len1) + annotateDereferenceableBytes(CI, 0, Len1); + uint64_t Len2 = GetStringLength(Str2P); + if (Len2) + annotateDereferenceableBytes(CI, 1, Len2); + + // strncmp to memcmp + if (!HasStr1 && HasStr2) { + Len2 = std::min(Len2, Length); + if (canTransformToMemCmp(CI, Str1P, Len2, DL)) + return copyFlags( + *CI, + emitMemCmp(Str1P, Str2P, + ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len2), + B, DL, TLI)); + } else if (HasStr1 && !HasStr2) { + Len1 = std::min(Len1, Length); + if (canTransformToMemCmp(CI, Str2P, Len1, DL)) + return copyFlags( + *CI, + emitMemCmp(Str1P, Str2P, + ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len1), + B, DL, TLI)); + } + + return nullptr; +} + +Value *LibCallSimplifier::optimizeStrNDup(CallInst *CI, IRBuilderBase &B) { + Value *Src = CI->getArgOperand(0); + ConstantInt *Size = dyn_cast<ConstantInt>(CI->getArgOperand(1)); + uint64_t SrcLen = GetStringLength(Src); + if (SrcLen && Size) { + annotateDereferenceableBytes(CI, 0, SrcLen); + if (SrcLen <= Size->getZExtValue() + 1) + return copyFlags(*CI, emitStrDup(Src, B, TLI)); + } + + return nullptr; +} + +Value *LibCallSimplifier::optimizeStrCpy(CallInst *CI, IRBuilderBase &B) { + Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); + if (Dst == Src) // strcpy(x,x) -> x + return Src; + + annotateNonNullNoUndefBasedOnAccess(CI, {0, 1}); + // See if we can get the length of the input string. + uint64_t Len = GetStringLength(Src); + if (Len) + annotateDereferenceableBytes(CI, 1, Len); + else + return nullptr; + + // We have enough information to now generate the memcpy call to do the + // copy for us. Make a memcpy to copy the nul byte with align = 1. + CallInst *NewCI = + B.CreateMemCpy(Dst, Align(1), Src, Align(1), + ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len)); + mergeAttributesAndFlags(NewCI, *CI); + return Dst; +} + +Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilderBase &B) { + Function *Callee = CI->getCalledFunction(); + Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); + + // stpcpy(d,s) -> strcpy(d,s) if the result is not used. + if (CI->use_empty()) + return copyFlags(*CI, emitStrCpy(Dst, Src, B, TLI)); + + if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x) + Value *StrLen = emitStrLen(Src, B, DL, TLI); + return StrLen ? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, StrLen) : nullptr; + } + + // See if we can get the length of the input string. + uint64_t Len = GetStringLength(Src); + if (Len) + annotateDereferenceableBytes(CI, 1, Len); + else + return nullptr; + + Type *PT = Callee->getFunctionType()->getParamType(0); + Value *LenV = ConstantInt::get(DL.getIntPtrType(PT), Len); + Value *DstEnd = B.CreateInBoundsGEP( + B.getInt8Ty(), Dst, ConstantInt::get(DL.getIntPtrType(PT), Len - 1)); + + // We have enough information to now generate the memcpy call to do the + // copy for us. Make a memcpy to copy the nul byte with align = 1. + CallInst *NewCI = B.CreateMemCpy(Dst, Align(1), Src, Align(1), LenV); + mergeAttributesAndFlags(NewCI, *CI); + return DstEnd; +} + +// Optimize a call to size_t strlcpy(char*, const char*, size_t). + +Value *LibCallSimplifier::optimizeStrLCpy(CallInst *CI, IRBuilderBase &B) { + Value *Size = CI->getArgOperand(2); + if (isKnownNonZero(Size, DL)) + // Like snprintf, the function stores into the destination only when + // the size argument is nonzero. + annotateNonNullNoUndefBasedOnAccess(CI, 0); + // The function reads the source argument regardless of Size (it returns + // its length). + annotateNonNullNoUndefBasedOnAccess(CI, 1); + + uint64_t NBytes; + if (ConstantInt *SizeC = dyn_cast<ConstantInt>(Size)) + NBytes = SizeC->getZExtValue(); + else + return nullptr; + + Value *Dst = CI->getArgOperand(0); + Value *Src = CI->getArgOperand(1); + if (NBytes <= 1) { + if (NBytes == 1) + // For a call to strlcpy(D, S, 1) first store a nul in *D. + B.CreateStore(B.getInt8(0), Dst); + + // Transform strlcpy(D, S, 0) to a call to strlen(S). + return copyFlags(*CI, emitStrLen(Src, B, DL, TLI)); + } + + // Try to determine the length of the source, substituting its size + // when it's not nul-terminated (as it's required to be) to avoid + // reading past its end. + StringRef Str; + if (!getConstantStringInfo(Src, Str, /*TrimAtNul=*/false)) + return nullptr; + + uint64_t SrcLen = Str.find('\0'); + // Set if the terminating nul should be copied by the call to memcpy + // below. + bool NulTerm = SrcLen < NBytes; + + if (NulTerm) + // Overwrite NBytes with the number of bytes to copy, including + // the terminating nul. + NBytes = SrcLen + 1; + else { + // Set the length of the source for the function to return to its + // size, and cap NBytes at the same. + SrcLen = std::min(SrcLen, uint64_t(Str.size())); + NBytes = std::min(NBytes - 1, SrcLen); + } + + if (SrcLen == 0) { + // Transform strlcpy(D, "", N) to (*D = '\0, 0). + B.CreateStore(B.getInt8(0), Dst); + return ConstantInt::get(CI->getType(), 0); + } + + Function *Callee = CI->getCalledFunction(); + Type *PT = Callee->getFunctionType()->getParamType(0); + // Transform strlcpy(D, S, N) to memcpy(D, S, N') where N' is the lower + // bound on strlen(S) + 1 and N, optionally followed by a nul store to + // D[N' - 1] if necessary. + CallInst *NewCI = B.CreateMemCpy(Dst, Align(1), Src, Align(1), + ConstantInt::get(DL.getIntPtrType(PT), NBytes)); + mergeAttributesAndFlags(NewCI, *CI); + + if (!NulTerm) { + Value *EndOff = ConstantInt::get(CI->getType(), NBytes); + Value *EndPtr = B.CreateInBoundsGEP(B.getInt8Ty(), Dst, EndOff); + B.CreateStore(B.getInt8(0), EndPtr); + } + + // Like snprintf, strlcpy returns the number of nonzero bytes that would + // have been copied if the bound had been sufficiently big (which in this + // case is strlen(Src)). + return ConstantInt::get(CI->getType(), SrcLen); +} + +// Optimize a call CI to either stpncpy when RetEnd is true, or to strncpy +// otherwise. +Value *LibCallSimplifier::optimizeStringNCpy(CallInst *CI, bool RetEnd, + IRBuilderBase &B) { + Function *Callee = CI->getCalledFunction(); + Value *Dst = CI->getArgOperand(0); + Value *Src = CI->getArgOperand(1); + Value *Size = CI->getArgOperand(2); + + if (isKnownNonZero(Size, DL)) { + // Both st{p,r}ncpy(D, S, N) access the source and destination arrays + // only when N is nonzero. + annotateNonNullNoUndefBasedOnAccess(CI, 0); + annotateNonNullNoUndefBasedOnAccess(CI, 1); + } + + // If the "bound" argument is known set N to it. Otherwise set it to + // UINT64_MAX and handle it later. + uint64_t N = UINT64_MAX; + if (ConstantInt *SizeC = dyn_cast<ConstantInt>(Size)) + N = SizeC->getZExtValue(); + + if (N == 0) + // Fold st{p,r}ncpy(D, S, 0) to D. + return Dst; + + if (N == 1) { + Type *CharTy = B.getInt8Ty(); + Value *CharVal = B.CreateLoad(CharTy, Src, "stxncpy.char0"); + B.CreateStore(CharVal, Dst); + if (!RetEnd) + // Transform strncpy(D, S, 1) to return (*D = *S), D. + return Dst; + + // Transform stpncpy(D, S, 1) to return (*D = *S) ? D + 1 : D. + Value *ZeroChar = ConstantInt::get(CharTy, 0); + Value *Cmp = B.CreateICmpEQ(CharVal, ZeroChar, "stpncpy.char0cmp"); + + Value *Off1 = B.getInt32(1); + Value *EndPtr = B.CreateInBoundsGEP(CharTy, Dst, Off1, "stpncpy.end"); + return B.CreateSelect(Cmp, Dst, EndPtr, "stpncpy.sel"); + } + + // If the length of the input string is known set SrcLen to it. + uint64_t SrcLen = GetStringLength(Src); + if (SrcLen) + annotateDereferenceableBytes(CI, 1, SrcLen); + else + return nullptr; + + --SrcLen; // Unbias length. + + if (SrcLen == 0) { + // Transform st{p,r}ncpy(D, "", N) to memset(D, '\0', N) for any N. + Align MemSetAlign = + CI->getAttributes().getParamAttrs(0).getAlignment().valueOrOne(); + CallInst *NewCI = B.CreateMemSet(Dst, B.getInt8('\0'), Size, MemSetAlign); + AttrBuilder ArgAttrs(CI->getContext(), CI->getAttributes().getParamAttrs(0)); + NewCI->setAttributes(NewCI->getAttributes().addParamAttributes( + CI->getContext(), 0, ArgAttrs)); + copyFlags(*CI, NewCI); + return Dst; + } + + if (N > SrcLen + 1) { + if (N > 128) + // Bail if N is large or unknown. + return nullptr; + + // st{p,r}ncpy(D, "a", N) -> memcpy(D, "a\0\0\0", N) for N <= 128. + StringRef Str; + if (!getConstantStringInfo(Src, Str)) + return nullptr; + std::string SrcStr = Str.str(); + // Create a bigger, nul-padded array with the same length, SrcLen, + // as the original string. + SrcStr.resize(N, '\0'); + Src = B.CreateGlobalString(SrcStr, "str"); + } + + Type *PT = Callee->getFunctionType()->getParamType(0); + // st{p,r}ncpy(D, S, N) -> memcpy(align 1 D, align 1 S, N) when both + // S and N are constant. + CallInst *NewCI = B.CreateMemCpy(Dst, Align(1), Src, Align(1), + ConstantInt::get(DL.getIntPtrType(PT), N)); + mergeAttributesAndFlags(NewCI, *CI); + if (!RetEnd) + return Dst; + + // stpncpy(D, S, N) returns the address of the first null in D if it writes + // one, otherwise D + N. + Value *Off = B.getInt64(std::min(SrcLen, N)); + return B.CreateInBoundsGEP(B.getInt8Ty(), Dst, Off, "endptr"); +} + +Value *LibCallSimplifier::optimizeStringLength(CallInst *CI, IRBuilderBase &B, + unsigned CharSize, + Value *Bound) { + Value *Src = CI->getArgOperand(0); + Type *CharTy = B.getIntNTy(CharSize); + + if (isOnlyUsedInZeroEqualityComparison(CI) && + (!Bound || isKnownNonZero(Bound, DL))) { + // Fold strlen: + // strlen(x) != 0 --> *x != 0 + // strlen(x) == 0 --> *x == 0 + // and likewise strnlen with constant N > 0: + // strnlen(x, N) != 0 --> *x != 0 + // strnlen(x, N) == 0 --> *x == 0 + return B.CreateZExt(B.CreateLoad(CharTy, Src, "char0"), + CI->getType()); + } + + if (Bound) { + if (ConstantInt *BoundCst = dyn_cast<ConstantInt>(Bound)) { + if (BoundCst->isZero()) + // Fold strnlen(s, 0) -> 0 for any s, constant or otherwise. + return ConstantInt::get(CI->getType(), 0); + + if (BoundCst->isOne()) { + // Fold strnlen(s, 1) -> *s ? 1 : 0 for any s. + Value *CharVal = B.CreateLoad(CharTy, Src, "strnlen.char0"); + Value *ZeroChar = ConstantInt::get(CharTy, 0); + Value *Cmp = B.CreateICmpNE(CharVal, ZeroChar, "strnlen.char0cmp"); + return B.CreateZExt(Cmp, CI->getType()); + } + } + } + + if (uint64_t Len = GetStringLength(Src, CharSize)) { + Value *LenC = ConstantInt::get(CI->getType(), Len - 1); + // Fold strlen("xyz") -> 3 and strnlen("xyz", 2) -> 2 + // and strnlen("xyz", Bound) -> min(3, Bound) for nonconstant Bound. + if (Bound) + return B.CreateBinaryIntrinsic(Intrinsic::umin, LenC, Bound); + return LenC; + } + + if (Bound) + // Punt for strnlen for now. + return nullptr; + + // If s is a constant pointer pointing to a string literal, we can fold + // strlen(s + x) to strlen(s) - x, when x is known to be in the range + // [0, strlen(s)] or the string has a single null terminator '\0' at the end. + // We only try to simplify strlen when the pointer s points to an array + // of CharSize elements. Otherwise, we would need to scale the offset x before + // doing the subtraction. This will make the optimization more complex, and + // it's not very useful because calling strlen for a pointer of other types is + // very uncommon. + if (GEPOperator *GEP = dyn_cast<GEPOperator>(Src)) { + // TODO: Handle subobjects. + if (!isGEPBasedOnPointerToString(GEP, CharSize)) + return nullptr; + + ConstantDataArraySlice Slice; + if (getConstantDataArrayInfo(GEP->getOperand(0), Slice, CharSize)) { + uint64_t NullTermIdx; + if (Slice.Array == nullptr) { + NullTermIdx = 0; + } else { + NullTermIdx = ~((uint64_t)0); + for (uint64_t I = 0, E = Slice.Length; I < E; ++I) { + if (Slice.Array->getElementAsInteger(I + Slice.Offset) == 0) { + NullTermIdx = I; + break; + } + } + // If the string does not have '\0', leave it to strlen to compute + // its length. + if (NullTermIdx == ~((uint64_t)0)) + return nullptr; + } + + Value *Offset = GEP->getOperand(2); + KnownBits Known = computeKnownBits(Offset, DL, 0, nullptr, CI, nullptr); + uint64_t ArrSize = + cast<ArrayType>(GEP->getSourceElementType())->getNumElements(); + + // If Offset is not provably in the range [0, NullTermIdx], we can still + // optimize if we can prove that the program has undefined behavior when + // Offset is outside that range. That is the case when GEP->getOperand(0) + // is a pointer to an object whose memory extent is NullTermIdx+1. + if ((Known.isNonNegative() && Known.getMaxValue().ule(NullTermIdx)) || + (isa<GlobalVariable>(GEP->getOperand(0)) && + NullTermIdx == ArrSize - 1)) { + Offset = B.CreateSExtOrTrunc(Offset, CI->getType()); + return B.CreateSub(ConstantInt::get(CI->getType(), NullTermIdx), + Offset); + } + } + } + + // strlen(x?"foo":"bars") --> x ? 3 : 4 + if (SelectInst *SI = dyn_cast<SelectInst>(Src)) { + uint64_t LenTrue = GetStringLength(SI->getTrueValue(), CharSize); + uint64_t LenFalse = GetStringLength(SI->getFalseValue(), CharSize); + if (LenTrue && LenFalse) { + ORE.emit([&]() { + return OptimizationRemark("instcombine", "simplify-libcalls", CI) + << "folded strlen(select) to select of constants"; + }); + return B.CreateSelect(SI->getCondition(), + ConstantInt::get(CI->getType(), LenTrue - 1), + ConstantInt::get(CI->getType(), LenFalse - 1)); + } + } + + return nullptr; +} + +Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilderBase &B) { + if (Value *V = optimizeStringLength(CI, B, 8)) + return V; + annotateNonNullNoUndefBasedOnAccess(CI, 0); + return nullptr; +} + +Value *LibCallSimplifier::optimizeStrNLen(CallInst *CI, IRBuilderBase &B) { + Value *Bound = CI->getArgOperand(1); + if (Value *V = optimizeStringLength(CI, B, 8, Bound)) + return V; + + if (isKnownNonZero(Bound, DL)) + annotateNonNullNoUndefBasedOnAccess(CI, 0); + return nullptr; +} + +Value *LibCallSimplifier::optimizeWcslen(CallInst *CI, IRBuilderBase &B) { + Module &M = *CI->getModule(); + unsigned WCharSize = TLI->getWCharSize(M) * 8; + // We cannot perform this optimization without wchar_size metadata. + if (WCharSize == 0) + return nullptr; + + return optimizeStringLength(CI, B, WCharSize); +} + +Value *LibCallSimplifier::optimizeStrPBrk(CallInst *CI, IRBuilderBase &B) { + StringRef S1, S2; + bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1); + bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2); + + // strpbrk(s, "") -> nullptr + // strpbrk("", s) -> nullptr + if ((HasS1 && S1.empty()) || (HasS2 && S2.empty())) + return Constant::getNullValue(CI->getType()); + + // Constant folding. + if (HasS1 && HasS2) { + size_t I = S1.find_first_of(S2); + if (I == StringRef::npos) // No match. + return Constant::getNullValue(CI->getType()); + + return B.CreateInBoundsGEP(B.getInt8Ty(), CI->getArgOperand(0), + B.getInt64(I), "strpbrk"); + } + + // strpbrk(s, "a") -> strchr(s, 'a') + if (HasS2 && S2.size() == 1) + return copyFlags(*CI, emitStrChr(CI->getArgOperand(0), S2[0], B, TLI)); + + return nullptr; +} + +Value *LibCallSimplifier::optimizeStrTo(CallInst *CI, IRBuilderBase &B) { + Value *EndPtr = CI->getArgOperand(1); + if (isa<ConstantPointerNull>(EndPtr)) { + // With a null EndPtr, this function won't capture the main argument. + // It would be readonly too, except that it still may write to errno. + CI->addParamAttr(0, Attribute::NoCapture); + } + + return nullptr; +} + +Value *LibCallSimplifier::optimizeStrSpn(CallInst *CI, IRBuilderBase &B) { + StringRef S1, S2; + bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1); + bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2); + + // strspn(s, "") -> 0 + // strspn("", s) -> 0 + if ((HasS1 && S1.empty()) || (HasS2 && S2.empty())) + return Constant::getNullValue(CI->getType()); + + // Constant folding. + if (HasS1 && HasS2) { + size_t Pos = S1.find_first_not_of(S2); + if (Pos == StringRef::npos) + Pos = S1.size(); + return ConstantInt::get(CI->getType(), Pos); + } + + return nullptr; +} + +Value *LibCallSimplifier::optimizeStrCSpn(CallInst *CI, IRBuilderBase &B) { + StringRef S1, S2; + bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1); + bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2); + + // strcspn("", s) -> 0 + if (HasS1 && S1.empty()) + return Constant::getNullValue(CI->getType()); + + // Constant folding. + if (HasS1 && HasS2) { + size_t Pos = S1.find_first_of(S2); + if (Pos == StringRef::npos) + Pos = S1.size(); + return ConstantInt::get(CI->getType(), Pos); + } + + // strcspn(s, "") -> strlen(s) + if (HasS2 && S2.empty()) + return copyFlags(*CI, emitStrLen(CI->getArgOperand(0), B, DL, TLI)); + + return nullptr; +} + +Value *LibCallSimplifier::optimizeStrStr(CallInst *CI, IRBuilderBase &B) { + // fold strstr(x, x) -> x. + if (CI->getArgOperand(0) == CI->getArgOperand(1)) + return B.CreateBitCast(CI->getArgOperand(0), CI->getType()); + + // fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0 + if (isOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) { + Value *StrLen = emitStrLen(CI->getArgOperand(1), B, DL, TLI); + if (!StrLen) + return nullptr; + Value *StrNCmp = emitStrNCmp(CI->getArgOperand(0), CI->getArgOperand(1), + StrLen, B, DL, TLI); + if (!StrNCmp) + return nullptr; + for (User *U : llvm::make_early_inc_range(CI->users())) { + ICmpInst *Old = cast<ICmpInst>(U); + Value *Cmp = + B.CreateICmp(Old->getPredicate(), StrNCmp, + ConstantInt::getNullValue(StrNCmp->getType()), "cmp"); + replaceAllUsesWith(Old, Cmp); + } + return CI; + } + + // See if either input string is a constant string. + StringRef SearchStr, ToFindStr; + bool HasStr1 = getConstantStringInfo(CI->getArgOperand(0), SearchStr); + bool HasStr2 = getConstantStringInfo(CI->getArgOperand(1), ToFindStr); + + // fold strstr(x, "") -> x. + if (HasStr2 && ToFindStr.empty()) + return B.CreateBitCast(CI->getArgOperand(0), CI->getType()); + + // If both strings are known, constant fold it. + if (HasStr1 && HasStr2) { + size_t Offset = SearchStr.find(ToFindStr); + + if (Offset == StringRef::npos) // strstr("foo", "bar") -> null + return Constant::getNullValue(CI->getType()); + + // strstr("abcd", "bc") -> gep((char*)"abcd", 1) + Value *Result = castToCStr(CI->getArgOperand(0), B); + Result = + B.CreateConstInBoundsGEP1_64(B.getInt8Ty(), Result, Offset, "strstr"); + return B.CreateBitCast(Result, CI->getType()); + } + + // fold strstr(x, "y") -> strchr(x, 'y'). + if (HasStr2 && ToFindStr.size() == 1) { + Value *StrChr = emitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TLI); + return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : nullptr; + } + + annotateNonNullNoUndefBasedOnAccess(CI, {0, 1}); + return nullptr; +} + +Value *LibCallSimplifier::optimizeMemRChr(CallInst *CI, IRBuilderBase &B) { + Value *SrcStr = CI->getArgOperand(0); + Value *Size = CI->getArgOperand(2); + annotateNonNullAndDereferenceable(CI, 0, Size, DL); + Value *CharVal = CI->getArgOperand(1); + ConstantInt *LenC = dyn_cast<ConstantInt>(Size); + Value *NullPtr = Constant::getNullValue(CI->getType()); + + if (LenC) { + if (LenC->isZero()) + // Fold memrchr(x, y, 0) --> null. + return NullPtr; + + if (LenC->isOne()) { + // Fold memrchr(x, y, 1) --> *x == y ? x : null for any x and y, + // constant or otherwise. + Value *Val = B.CreateLoad(B.getInt8Ty(), SrcStr, "memrchr.char0"); + // Slice off the character's high end bits. + CharVal = B.CreateTrunc(CharVal, B.getInt8Ty()); + Value *Cmp = B.CreateICmpEQ(Val, CharVal, "memrchr.char0cmp"); + return B.CreateSelect(Cmp, SrcStr, NullPtr, "memrchr.sel"); + } + } + + StringRef Str; + if (!getConstantStringInfo(SrcStr, Str, /*TrimAtNul=*/false)) + return nullptr; + + if (Str.size() == 0) + // If the array is empty fold memrchr(A, C, N) to null for any value + // of C and N on the basis that the only valid value of N is zero + // (otherwise the call is undefined). + return NullPtr; + + uint64_t EndOff = UINT64_MAX; + if (LenC) { + EndOff = LenC->getZExtValue(); + if (Str.size() < EndOff) + // Punt out-of-bounds accesses to sanitizers and/or libc. + return nullptr; + } + + if (ConstantInt *CharC = dyn_cast<ConstantInt>(CharVal)) { + // Fold memrchr(S, C, N) for a constant C. + size_t Pos = Str.rfind(CharC->getZExtValue(), EndOff); + if (Pos == StringRef::npos) + // When the character is not in the source array fold the result + // to null regardless of Size. + return NullPtr; + + if (LenC) + // Fold memrchr(s, c, N) --> s + Pos for constant N > Pos. + return B.CreateInBoundsGEP(B.getInt8Ty(), SrcStr, B.getInt64(Pos)); + + if (Str.find(Str[Pos]) == Pos) { + // When there is just a single occurrence of C in S, i.e., the one + // in Str[Pos], fold + // memrchr(s, c, N) --> N <= Pos ? null : s + Pos + // for nonconstant N. + Value *Cmp = B.CreateICmpULE(Size, ConstantInt::get(Size->getType(), Pos), + "memrchr.cmp"); + Value *SrcPlus = B.CreateInBoundsGEP(B.getInt8Ty(), SrcStr, + B.getInt64(Pos), "memrchr.ptr_plus"); + return B.CreateSelect(Cmp, NullPtr, SrcPlus, "memrchr.sel"); + } + } + + // Truncate the string to search at most EndOff characters. + Str = Str.substr(0, EndOff); + if (Str.find_first_not_of(Str[0]) != StringRef::npos) + return nullptr; + + // If the source array consists of all equal characters, then for any + // C and N (whether in bounds or not), fold memrchr(S, C, N) to + // N != 0 && *S == C ? S + N - 1 : null + Type *SizeTy = Size->getType(); + Type *Int8Ty = B.getInt8Ty(); + Value *NNeZ = B.CreateICmpNE(Size, ConstantInt::get(SizeTy, 0)); + // Slice off the sought character's high end bits. + CharVal = B.CreateTrunc(CharVal, Int8Ty); + Value *CEqS0 = B.CreateICmpEQ(ConstantInt::get(Int8Ty, Str[0]), CharVal); + Value *And = B.CreateLogicalAnd(NNeZ, CEqS0); + Value *SizeM1 = B.CreateSub(Size, ConstantInt::get(SizeTy, 1)); + Value *SrcPlus = + B.CreateInBoundsGEP(Int8Ty, SrcStr, SizeM1, "memrchr.ptr_plus"); + return B.CreateSelect(And, SrcPlus, NullPtr, "memrchr.sel"); +} + +Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilderBase &B) { + Value *SrcStr = CI->getArgOperand(0); + Value *Size = CI->getArgOperand(2); + + if (isKnownNonZero(Size, DL)) { + annotateNonNullNoUndefBasedOnAccess(CI, 0); + if (isOnlyUsedInEqualityComparison(CI, SrcStr)) + return memChrToCharCompare(CI, Size, B, DL); + } + + Value *CharVal = CI->getArgOperand(1); + ConstantInt *CharC = dyn_cast<ConstantInt>(CharVal); + ConstantInt *LenC = dyn_cast<ConstantInt>(Size); + Value *NullPtr = Constant::getNullValue(CI->getType()); + + // memchr(x, y, 0) -> null + if (LenC) { + if (LenC->isZero()) + return NullPtr; + + if (LenC->isOne()) { + // Fold memchr(x, y, 1) --> *x == y ? x : null for any x and y, + // constant or otherwise. + Value *Val = B.CreateLoad(B.getInt8Ty(), SrcStr, "memchr.char0"); + // Slice off the character's high end bits. + CharVal = B.CreateTrunc(CharVal, B.getInt8Ty()); + Value *Cmp = B.CreateICmpEQ(Val, CharVal, "memchr.char0cmp"); + return B.CreateSelect(Cmp, SrcStr, NullPtr, "memchr.sel"); + } + } + + StringRef Str; + if (!getConstantStringInfo(SrcStr, Str, /*TrimAtNul=*/false)) + return nullptr; + + if (CharC) { + size_t Pos = Str.find(CharC->getZExtValue()); + if (Pos == StringRef::npos) + // When the character is not in the source array fold the result + // to null regardless of Size. + return NullPtr; + + // Fold memchr(s, c, n) -> n <= Pos ? null : s + Pos + // When the constant Size is less than or equal to the character + // position also fold the result to null. + Value *Cmp = B.CreateICmpULE(Size, ConstantInt::get(Size->getType(), Pos), + "memchr.cmp"); + Value *SrcPlus = B.CreateInBoundsGEP(B.getInt8Ty(), SrcStr, B.getInt64(Pos), + "memchr.ptr"); + return B.CreateSelect(Cmp, NullPtr, SrcPlus); + } + + if (Str.size() == 0) + // If the array is empty fold memchr(A, C, N) to null for any value + // of C and N on the basis that the only valid value of N is zero + // (otherwise the call is undefined). + return NullPtr; + + if (LenC) + Str = substr(Str, LenC->getZExtValue()); + + size_t Pos = Str.find_first_not_of(Str[0]); + if (Pos == StringRef::npos + || Str.find_first_not_of(Str[Pos], Pos) == StringRef::npos) { + // If the source array consists of at most two consecutive sequences + // of the same characters, then for any C and N (whether in bounds or + // not), fold memchr(S, C, N) to + // N != 0 && *S == C ? S : null + // or for the two sequences to: + // N != 0 && *S == C ? S : (N > Pos && S[Pos] == C ? S + Pos : null) + // ^Sel2 ^Sel1 are denoted above. + // The latter makes it also possible to fold strchr() calls with strings + // of the same characters. + Type *SizeTy = Size->getType(); + Type *Int8Ty = B.getInt8Ty(); + + // Slice off the sought character's high end bits. + CharVal = B.CreateTrunc(CharVal, Int8Ty); + + Value *Sel1 = NullPtr; + if (Pos != StringRef::npos) { + // Handle two consecutive sequences of the same characters. + Value *PosVal = ConstantInt::get(SizeTy, Pos); + Value *StrPos = ConstantInt::get(Int8Ty, Str[Pos]); + Value *CEqSPos = B.CreateICmpEQ(CharVal, StrPos); + Value *NGtPos = B.CreateICmp(ICmpInst::ICMP_UGT, Size, PosVal); + Value *And = B.CreateAnd(CEqSPos, NGtPos); + Value *SrcPlus = B.CreateInBoundsGEP(B.getInt8Ty(), SrcStr, PosVal); + Sel1 = B.CreateSelect(And, SrcPlus, NullPtr, "memchr.sel1"); + } + + Value *Str0 = ConstantInt::get(Int8Ty, Str[0]); + Value *CEqS0 = B.CreateICmpEQ(Str0, CharVal); + Value *NNeZ = B.CreateICmpNE(Size, ConstantInt::get(SizeTy, 0)); + Value *And = B.CreateAnd(NNeZ, CEqS0); + return B.CreateSelect(And, SrcStr, Sel1, "memchr.sel2"); + } + + if (!LenC) { + if (isOnlyUsedInEqualityComparison(CI, SrcStr)) + // S is dereferenceable so it's safe to load from it and fold + // memchr(S, C, N) == S to N && *S == C for any C and N. + // TODO: This is safe even even for nonconstant S. + return memChrToCharCompare(CI, Size, B, DL); + + // From now on we need a constant length and constant array. + return nullptr; + } + + // If the char is variable but the input str and length are not we can turn + // this memchr call into a simple bit field test. Of course this only works + // when the return value is only checked against null. + // + // It would be really nice to reuse switch lowering here but we can't change + // the CFG at this point. + // + // memchr("\r\n", C, 2) != nullptr -> (1 << C & ((1 << '\r') | (1 << '\n'))) + // != 0 + // after bounds check. + if (Str.empty() || !isOnlyUsedInZeroEqualityComparison(CI)) + return nullptr; + + unsigned char Max = + *std::max_element(reinterpret_cast<const unsigned char *>(Str.begin()), + reinterpret_cast<const unsigned char *>(Str.end())); + + // Make sure the bit field we're about to create fits in a register on the + // target. + // FIXME: On a 64 bit architecture this prevents us from using the + // interesting range of alpha ascii chars. We could do better by emitting + // two bitfields or shifting the range by 64 if no lower chars are used. + if (!DL.fitsInLegalInteger(Max + 1)) + return nullptr; + + // For the bit field use a power-of-2 type with at least 8 bits to avoid + // creating unnecessary illegal types. + unsigned char Width = NextPowerOf2(std::max((unsigned char)7, Max)); + + // Now build the bit field. + APInt Bitfield(Width, 0); + for (char C : Str) + Bitfield.setBit((unsigned char)C); + Value *BitfieldC = B.getInt(Bitfield); + + // Adjust width of "C" to the bitfield width, then mask off the high bits. + Value *C = B.CreateZExtOrTrunc(CharVal, BitfieldC->getType()); + C = B.CreateAnd(C, B.getIntN(Width, 0xFF)); + + // First check that the bit field access is within bounds. + Value *Bounds = B.CreateICmp(ICmpInst::ICMP_ULT, C, B.getIntN(Width, Width), + "memchr.bounds"); + + // Create code that checks if the given bit is set in the field. + Value *Shl = B.CreateShl(B.getIntN(Width, 1ULL), C); + Value *Bits = B.CreateIsNotNull(B.CreateAnd(Shl, BitfieldC), "memchr.bits"); + + // Finally merge both checks and cast to pointer type. The inttoptr + // implicitly zexts the i1 to intptr type. + return B.CreateIntToPtr(B.CreateLogicalAnd(Bounds, Bits, "memchr"), + CI->getType()); +} + +// Optimize a memcmp or, when StrNCmp is true, strncmp call CI with constant +// arrays LHS and RHS and nonconstant Size. +static Value *optimizeMemCmpVarSize(CallInst *CI, Value *LHS, Value *RHS, + Value *Size, bool StrNCmp, + IRBuilderBase &B, const DataLayout &DL) { + if (LHS == RHS) // memcmp(s,s,x) -> 0 + return Constant::getNullValue(CI->getType()); + + StringRef LStr, RStr; + if (!getConstantStringInfo(LHS, LStr, /*TrimAtNul=*/false) || + !getConstantStringInfo(RHS, RStr, /*TrimAtNul=*/false)) + return nullptr; + + // If the contents of both constant arrays are known, fold a call to + // memcmp(A, B, N) to + // N <= Pos ? 0 : (A < B ? -1 : B < A ? +1 : 0) + // where Pos is the first mismatch between A and B, determined below. + + uint64_t Pos = 0; + Value *Zero = ConstantInt::get(CI->getType(), 0); + for (uint64_t MinSize = std::min(LStr.size(), RStr.size()); ; ++Pos) { + if (Pos == MinSize || + (StrNCmp && (LStr[Pos] == '\0' && RStr[Pos] == '\0'))) { + // One array is a leading part of the other of equal or greater + // size, or for strncmp, the arrays are equal strings. + // Fold the result to zero. Size is assumed to be in bounds, since + // otherwise the call would be undefined. + return Zero; + } + + if (LStr[Pos] != RStr[Pos]) + break; + } + + // Normalize the result. + typedef unsigned char UChar; + int IRes = UChar(LStr[Pos]) < UChar(RStr[Pos]) ? -1 : 1; + Value *MaxSize = ConstantInt::get(Size->getType(), Pos); + Value *Cmp = B.CreateICmp(ICmpInst::ICMP_ULE, Size, MaxSize); + Value *Res = ConstantInt::get(CI->getType(), IRes); + return B.CreateSelect(Cmp, Zero, Res); +} + +// Optimize a memcmp call CI with constant size Len. +static Value *optimizeMemCmpConstantSize(CallInst *CI, Value *LHS, Value *RHS, + uint64_t Len, IRBuilderBase &B, + const DataLayout &DL) { + if (Len == 0) // memcmp(s1,s2,0) -> 0 + return Constant::getNullValue(CI->getType()); + + // memcmp(S1,S2,1) -> *(unsigned char*)LHS - *(unsigned char*)RHS + if (Len == 1) { + Value *LHSV = + B.CreateZExt(B.CreateLoad(B.getInt8Ty(), castToCStr(LHS, B), "lhsc"), + CI->getType(), "lhsv"); + Value *RHSV = + B.CreateZExt(B.CreateLoad(B.getInt8Ty(), castToCStr(RHS, B), "rhsc"), + CI->getType(), "rhsv"); + return B.CreateSub(LHSV, RHSV, "chardiff"); + } + + // memcmp(S1,S2,N/8)==0 -> (*(intN_t*)S1 != *(intN_t*)S2)==0 + // TODO: The case where both inputs are constants does not need to be limited + // to legal integers or equality comparison. See block below this. + if (DL.isLegalInteger(Len * 8) && isOnlyUsedInZeroEqualityComparison(CI)) { + IntegerType *IntType = IntegerType::get(CI->getContext(), Len * 8); + Align PrefAlignment = DL.getPrefTypeAlign(IntType); + + // First, see if we can fold either argument to a constant. + Value *LHSV = nullptr; + if (auto *LHSC = dyn_cast<Constant>(LHS)) { + LHSC = ConstantExpr::getBitCast(LHSC, IntType->getPointerTo()); + LHSV = ConstantFoldLoadFromConstPtr(LHSC, IntType, DL); + } + Value *RHSV = nullptr; + if (auto *RHSC = dyn_cast<Constant>(RHS)) { + RHSC = ConstantExpr::getBitCast(RHSC, IntType->getPointerTo()); + RHSV = ConstantFoldLoadFromConstPtr(RHSC, IntType, DL); + } + + // Don't generate unaligned loads. If either source is constant data, + // alignment doesn't matter for that source because there is no load. + if ((LHSV || getKnownAlignment(LHS, DL, CI) >= PrefAlignment) && + (RHSV || getKnownAlignment(RHS, DL, CI) >= PrefAlignment)) { + if (!LHSV) { + Type *LHSPtrTy = + IntType->getPointerTo(LHS->getType()->getPointerAddressSpace()); + LHSV = B.CreateLoad(IntType, B.CreateBitCast(LHS, LHSPtrTy), "lhsv"); + } + if (!RHSV) { + Type *RHSPtrTy = + IntType->getPointerTo(RHS->getType()->getPointerAddressSpace()); + RHSV = B.CreateLoad(IntType, B.CreateBitCast(RHS, RHSPtrTy), "rhsv"); + } + return B.CreateZExt(B.CreateICmpNE(LHSV, RHSV), CI->getType(), "memcmp"); + } + } + + return nullptr; +} + +// Most simplifications for memcmp also apply to bcmp. +Value *LibCallSimplifier::optimizeMemCmpBCmpCommon(CallInst *CI, + IRBuilderBase &B) { + Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1); + Value *Size = CI->getArgOperand(2); + + annotateNonNullAndDereferenceable(CI, {0, 1}, Size, DL); + + if (Value *Res = optimizeMemCmpVarSize(CI, LHS, RHS, Size, false, B, DL)) + return Res; + + // Handle constant Size. + ConstantInt *LenC = dyn_cast<ConstantInt>(Size); + if (!LenC) + return nullptr; + + return optimizeMemCmpConstantSize(CI, LHS, RHS, LenC->getZExtValue(), B, DL); +} + +Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilderBase &B) { + Module *M = CI->getModule(); + if (Value *V = optimizeMemCmpBCmpCommon(CI, B)) + return V; + + // memcmp(x, y, Len) == 0 -> bcmp(x, y, Len) == 0 + // bcmp can be more efficient than memcmp because it only has to know that + // there is a difference, not how different one is to the other. + if (isLibFuncEmittable(M, TLI, LibFunc_bcmp) && + isOnlyUsedInZeroEqualityComparison(CI)) { + Value *LHS = CI->getArgOperand(0); + Value *RHS = CI->getArgOperand(1); + Value *Size = CI->getArgOperand(2); + return copyFlags(*CI, emitBCmp(LHS, RHS, Size, B, DL, TLI)); + } + + return nullptr; +} + +Value *LibCallSimplifier::optimizeBCmp(CallInst *CI, IRBuilderBase &B) { + return optimizeMemCmpBCmpCommon(CI, B); +} + +Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilderBase &B) { + Value *Size = CI->getArgOperand(2); + annotateNonNullAndDereferenceable(CI, {0, 1}, Size, DL); + if (isa<IntrinsicInst>(CI)) + return nullptr; + + // memcpy(x, y, n) -> llvm.memcpy(align 1 x, align 1 y, n) + CallInst *NewCI = B.CreateMemCpy(CI->getArgOperand(0), Align(1), + CI->getArgOperand(1), Align(1), Size); + mergeAttributesAndFlags(NewCI, *CI); + return CI->getArgOperand(0); +} + +Value *LibCallSimplifier::optimizeMemCCpy(CallInst *CI, IRBuilderBase &B) { + Value *Dst = CI->getArgOperand(0); + Value *Src = CI->getArgOperand(1); + ConstantInt *StopChar = dyn_cast<ConstantInt>(CI->getArgOperand(2)); + ConstantInt *N = dyn_cast<ConstantInt>(CI->getArgOperand(3)); + StringRef SrcStr; + if (CI->use_empty() && Dst == Src) + return Dst; + // memccpy(d, s, c, 0) -> nullptr + if (N) { + if (N->isNullValue()) + return Constant::getNullValue(CI->getType()); + if (!getConstantStringInfo(Src, SrcStr, /*TrimAtNul=*/false) || + // TODO: Handle zeroinitializer. + !StopChar) + return nullptr; + } else { + return nullptr; + } + + // Wrap arg 'c' of type int to char + size_t Pos = SrcStr.find(StopChar->getSExtValue() & 0xFF); + if (Pos == StringRef::npos) { + if (N->getZExtValue() <= SrcStr.size()) { + copyFlags(*CI, B.CreateMemCpy(Dst, Align(1), Src, Align(1), + CI->getArgOperand(3))); + return Constant::getNullValue(CI->getType()); + } + return nullptr; + } + + Value *NewN = + ConstantInt::get(N->getType(), std::min(uint64_t(Pos + 1), N->getZExtValue())); + // memccpy -> llvm.memcpy + copyFlags(*CI, B.CreateMemCpy(Dst, Align(1), Src, Align(1), NewN)); + return Pos + 1 <= N->getZExtValue() + ? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, NewN) + : Constant::getNullValue(CI->getType()); +} + +Value *LibCallSimplifier::optimizeMemPCpy(CallInst *CI, IRBuilderBase &B) { + Value *Dst = CI->getArgOperand(0); + Value *N = CI->getArgOperand(2); + // mempcpy(x, y, n) -> llvm.memcpy(align 1 x, align 1 y, n), x + n + CallInst *NewCI = + B.CreateMemCpy(Dst, Align(1), CI->getArgOperand(1), Align(1), N); + // Propagate attributes, but memcpy has no return value, so make sure that + // any return attributes are compliant. + // TODO: Attach return value attributes to the 1st operand to preserve them? + mergeAttributesAndFlags(NewCI, *CI); + return B.CreateInBoundsGEP(B.getInt8Ty(), Dst, N); +} + +Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilderBase &B) { + Value *Size = CI->getArgOperand(2); + annotateNonNullAndDereferenceable(CI, {0, 1}, Size, DL); + if (isa<IntrinsicInst>(CI)) + return nullptr; + + // memmove(x, y, n) -> llvm.memmove(align 1 x, align 1 y, n) + CallInst *NewCI = B.CreateMemMove(CI->getArgOperand(0), Align(1), + CI->getArgOperand(1), Align(1), Size); + mergeAttributesAndFlags(NewCI, *CI); + return CI->getArgOperand(0); +} + +Value *LibCallSimplifier::optimizeMemSet(CallInst *CI, IRBuilderBase &B) { + Value *Size = CI->getArgOperand(2); + annotateNonNullAndDereferenceable(CI, 0, Size, DL); + if (isa<IntrinsicInst>(CI)) + return nullptr; + + // memset(p, v, n) -> llvm.memset(align 1 p, v, n) + Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false); + CallInst *NewCI = B.CreateMemSet(CI->getArgOperand(0), Val, Size, Align(1)); + mergeAttributesAndFlags(NewCI, *CI); + return CI->getArgOperand(0); +} + +Value *LibCallSimplifier::optimizeRealloc(CallInst *CI, IRBuilderBase &B) { + if (isa<ConstantPointerNull>(CI->getArgOperand(0))) + return copyFlags(*CI, emitMalloc(CI->getArgOperand(1), B, DL, TLI)); + + return nullptr; +} + +//===----------------------------------------------------------------------===// +// Math Library Optimizations +//===----------------------------------------------------------------------===// + +// Replace a libcall \p CI with a call to intrinsic \p IID +static Value *replaceUnaryCall(CallInst *CI, IRBuilderBase &B, + Intrinsic::ID IID) { + // Propagate fast-math flags from the existing call to the new call. + IRBuilderBase::FastMathFlagGuard Guard(B); + B.setFastMathFlags(CI->getFastMathFlags()); + + Module *M = CI->getModule(); + Value *V = CI->getArgOperand(0); + Function *F = Intrinsic::getDeclaration(M, IID, CI->getType()); + CallInst *NewCall = B.CreateCall(F, V); + NewCall->takeName(CI); + return copyFlags(*CI, NewCall); +} + +/// Return a variant of Val with float type. +/// Currently this works in two cases: If Val is an FPExtension of a float +/// value to something bigger, simply return the operand. +/// If Val is a ConstantFP but can be converted to a float ConstantFP without +/// loss of precision do so. +static Value *valueHasFloatPrecision(Value *Val) { + if (FPExtInst *Cast = dyn_cast<FPExtInst>(Val)) { + Value *Op = Cast->getOperand(0); + if (Op->getType()->isFloatTy()) + return Op; + } + if (ConstantFP *Const = dyn_cast<ConstantFP>(Val)) { + APFloat F = Const->getValueAPF(); + bool losesInfo; + (void)F.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, + &losesInfo); + if (!losesInfo) + return ConstantFP::get(Const->getContext(), F); + } + return nullptr; +} + +/// Shrink double -> float functions. +static Value *optimizeDoubleFP(CallInst *CI, IRBuilderBase &B, + bool isBinary, const TargetLibraryInfo *TLI, + bool isPrecise = false) { + Function *CalleeFn = CI->getCalledFunction(); + if (!CI->getType()->isDoubleTy() || !CalleeFn) + return nullptr; + + // If not all the uses of the function are converted to float, then bail out. + // This matters if the precision of the result is more important than the + // precision of the arguments. + if (isPrecise) + for (User *U : CI->users()) { + FPTruncInst *Cast = dyn_cast<FPTruncInst>(U); + if (!Cast || !Cast->getType()->isFloatTy()) + return nullptr; + } + + // If this is something like 'g((double) float)', convert to 'gf(float)'. + Value *V[2]; + V[0] = valueHasFloatPrecision(CI->getArgOperand(0)); + V[1] = isBinary ? valueHasFloatPrecision(CI->getArgOperand(1)) : nullptr; + if (!V[0] || (isBinary && !V[1])) + return nullptr; + + // If call isn't an intrinsic, check that it isn't within a function with the + // same name as the float version of this call, otherwise the result is an + // infinite loop. For example, from MinGW-w64: + // + // float expf(float val) { return (float) exp((double) val); } + StringRef CalleeName = CalleeFn->getName(); + bool IsIntrinsic = CalleeFn->isIntrinsic(); + if (!IsIntrinsic) { + StringRef CallerName = CI->getFunction()->getName(); + if (!CallerName.empty() && CallerName.back() == 'f' && + CallerName.size() == (CalleeName.size() + 1) && + CallerName.startswith(CalleeName)) + return nullptr; + } + + // Propagate the math semantics from the current function to the new function. + IRBuilderBase::FastMathFlagGuard Guard(B); + B.setFastMathFlags(CI->getFastMathFlags()); + + // g((double) float) -> (double) gf(float) + Value *R; + if (IsIntrinsic) { + Module *M = CI->getModule(); + Intrinsic::ID IID = CalleeFn->getIntrinsicID(); + Function *Fn = Intrinsic::getDeclaration(M, IID, B.getFloatTy()); + R = isBinary ? B.CreateCall(Fn, V) : B.CreateCall(Fn, V[0]); + } else { + AttributeList CalleeAttrs = CalleeFn->getAttributes(); + R = isBinary ? emitBinaryFloatFnCall(V[0], V[1], TLI, CalleeName, B, + CalleeAttrs) + : emitUnaryFloatFnCall(V[0], TLI, CalleeName, B, CalleeAttrs); + } + return B.CreateFPExt(R, B.getDoubleTy()); +} + +/// Shrink double -> float for unary functions. +static Value *optimizeUnaryDoubleFP(CallInst *CI, IRBuilderBase &B, + const TargetLibraryInfo *TLI, + bool isPrecise = false) { + return optimizeDoubleFP(CI, B, false, TLI, isPrecise); +} + +/// Shrink double -> float for binary functions. +static Value *optimizeBinaryDoubleFP(CallInst *CI, IRBuilderBase &B, + const TargetLibraryInfo *TLI, + bool isPrecise = false) { + return optimizeDoubleFP(CI, B, true, TLI, isPrecise); +} + +// cabs(z) -> sqrt((creal(z)*creal(z)) + (cimag(z)*cimag(z))) +Value *LibCallSimplifier::optimizeCAbs(CallInst *CI, IRBuilderBase &B) { + if (!CI->isFast()) + return nullptr; + + // Propagate fast-math flags from the existing call to new instructions. + IRBuilderBase::FastMathFlagGuard Guard(B); + B.setFastMathFlags(CI->getFastMathFlags()); + + Value *Real, *Imag; + if (CI->arg_size() == 1) { + Value *Op = CI->getArgOperand(0); + assert(Op->getType()->isArrayTy() && "Unexpected signature for cabs!"); + Real = B.CreateExtractValue(Op, 0, "real"); + Imag = B.CreateExtractValue(Op, 1, "imag"); + } else { + assert(CI->arg_size() == 2 && "Unexpected signature for cabs!"); + Real = CI->getArgOperand(0); + Imag = CI->getArgOperand(1); + } + + Value *RealReal = B.CreateFMul(Real, Real); + Value *ImagImag = B.CreateFMul(Imag, Imag); + + Function *FSqrt = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::sqrt, + CI->getType()); + return copyFlags( + *CI, B.CreateCall(FSqrt, B.CreateFAdd(RealReal, ImagImag), "cabs")); +} + +static Value *optimizeTrigReflections(CallInst *Call, LibFunc Func, + IRBuilderBase &B) { + if (!isa<FPMathOperator>(Call)) + return nullptr; + + IRBuilderBase::FastMathFlagGuard Guard(B); + B.setFastMathFlags(Call->getFastMathFlags()); + + // TODO: Can this be shared to also handle LLVM intrinsics? + Value *X; + switch (Func) { + case LibFunc_sin: + case LibFunc_sinf: + case LibFunc_sinl: + case LibFunc_tan: + case LibFunc_tanf: + case LibFunc_tanl: + // sin(-X) --> -sin(X) + // tan(-X) --> -tan(X) + if (match(Call->getArgOperand(0), m_OneUse(m_FNeg(m_Value(X))))) + return B.CreateFNeg( + copyFlags(*Call, B.CreateCall(Call->getCalledFunction(), X))); + break; + case LibFunc_cos: + case LibFunc_cosf: + case LibFunc_cosl: + // cos(-X) --> cos(X) + if (match(Call->getArgOperand(0), m_FNeg(m_Value(X)))) + return copyFlags(*Call, + B.CreateCall(Call->getCalledFunction(), X, "cos")); + break; + default: + break; + } + return nullptr; +} + +// Return a properly extended integer (DstWidth bits wide) if the operation is +// an itofp. +static Value *getIntToFPVal(Value *I2F, IRBuilderBase &B, unsigned DstWidth) { + if (isa<SIToFPInst>(I2F) || isa<UIToFPInst>(I2F)) { + Value *Op = cast<Instruction>(I2F)->getOperand(0); + // Make sure that the exponent fits inside an "int" of size DstWidth, + // thus avoiding any range issues that FP has not. + unsigned BitWidth = Op->getType()->getPrimitiveSizeInBits(); + if (BitWidth < DstWidth || + (BitWidth == DstWidth && isa<SIToFPInst>(I2F))) + return isa<SIToFPInst>(I2F) ? B.CreateSExt(Op, B.getIntNTy(DstWidth)) + : B.CreateZExt(Op, B.getIntNTy(DstWidth)); + } + + return nullptr; +} + +/// Use exp{,2}(x * y) for pow(exp{,2}(x), y); +/// ldexp(1.0, x) for pow(2.0, itofp(x)); exp2(n * x) for pow(2.0 ** n, x); +/// exp10(x) for pow(10.0, x); exp2(log2(n) * x) for pow(n, x). +Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilderBase &B) { + Module *M = Pow->getModule(); + Value *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1); + Module *Mod = Pow->getModule(); + Type *Ty = Pow->getType(); + bool Ignored; + + // Evaluate special cases related to a nested function as the base. + + // pow(exp(x), y) -> exp(x * y) + // pow(exp2(x), y) -> exp2(x * y) + // If exp{,2}() is used only once, it is better to fold two transcendental + // math functions into one. If used again, exp{,2}() would still have to be + // called with the original argument, then keep both original transcendental + // functions. However, this transformation is only safe with fully relaxed + // math semantics, since, besides rounding differences, it changes overflow + // and underflow behavior quite dramatically. For example: + // pow(exp(1000), 0.001) = pow(inf, 0.001) = inf + // Whereas: + // exp(1000 * 0.001) = exp(1) + // TODO: Loosen the requirement for fully relaxed math semantics. + // TODO: Handle exp10() when more targets have it available. + CallInst *BaseFn = dyn_cast<CallInst>(Base); + if (BaseFn && BaseFn->hasOneUse() && BaseFn->isFast() && Pow->isFast()) { + LibFunc LibFn; + + Function *CalleeFn = BaseFn->getCalledFunction(); + if (CalleeFn && TLI->getLibFunc(CalleeFn->getName(), LibFn) && + isLibFuncEmittable(M, TLI, LibFn)) { + StringRef ExpName; + Intrinsic::ID ID; + Value *ExpFn; + LibFunc LibFnFloat, LibFnDouble, LibFnLongDouble; + + switch (LibFn) { + default: + return nullptr; + case LibFunc_expf: + case LibFunc_exp: + case LibFunc_expl: + ExpName = TLI->getName(LibFunc_exp); + ID = Intrinsic::exp; + LibFnFloat = LibFunc_expf; + LibFnDouble = LibFunc_exp; + LibFnLongDouble = LibFunc_expl; + break; + case LibFunc_exp2f: + case LibFunc_exp2: + case LibFunc_exp2l: + ExpName = TLI->getName(LibFunc_exp2); + ID = Intrinsic::exp2; + LibFnFloat = LibFunc_exp2f; + LibFnDouble = LibFunc_exp2; + LibFnLongDouble = LibFunc_exp2l; + break; + } + + // Create new exp{,2}() with the product as its argument. + Value *FMul = B.CreateFMul(BaseFn->getArgOperand(0), Expo, "mul"); + ExpFn = BaseFn->doesNotAccessMemory() + ? B.CreateCall(Intrinsic::getDeclaration(Mod, ID, Ty), + FMul, ExpName) + : emitUnaryFloatFnCall(FMul, TLI, LibFnDouble, LibFnFloat, + LibFnLongDouble, B, + BaseFn->getAttributes()); + + // Since the new exp{,2}() is different from the original one, dead code + // elimination cannot be trusted to remove it, since it may have side + // effects (e.g., errno). When the only consumer for the original + // exp{,2}() is pow(), then it has to be explicitly erased. + substituteInParent(BaseFn, ExpFn); + return ExpFn; + } + } + + // Evaluate special cases related to a constant base. + + const APFloat *BaseF; + if (!match(Pow->getArgOperand(0), m_APFloat(BaseF))) + return nullptr; + + AttributeList NoAttrs; // Attributes are only meaningful on the original call + + // pow(2.0, itofp(x)) -> ldexp(1.0, x) + if (match(Base, m_SpecificFP(2.0)) && + (isa<SIToFPInst>(Expo) || isa<UIToFPInst>(Expo)) && + hasFloatFn(M, TLI, Ty, LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl)) { + if (Value *ExpoI = getIntToFPVal(Expo, B, TLI->getIntSize())) + return copyFlags(*Pow, + emitBinaryFloatFnCall(ConstantFP::get(Ty, 1.0), ExpoI, + TLI, LibFunc_ldexp, LibFunc_ldexpf, + LibFunc_ldexpl, B, NoAttrs)); + } + + // pow(2.0 ** n, x) -> exp2(n * x) + if (hasFloatFn(M, TLI, Ty, LibFunc_exp2, LibFunc_exp2f, LibFunc_exp2l)) { + APFloat BaseR = APFloat(1.0); + BaseR.convert(BaseF->getSemantics(), APFloat::rmTowardZero, &Ignored); + BaseR = BaseR / *BaseF; + bool IsInteger = BaseF->isInteger(), IsReciprocal = BaseR.isInteger(); + const APFloat *NF = IsReciprocal ? &BaseR : BaseF; + APSInt NI(64, false); + if ((IsInteger || IsReciprocal) && + NF->convertToInteger(NI, APFloat::rmTowardZero, &Ignored) == + APFloat::opOK && + NI > 1 && NI.isPowerOf2()) { + double N = NI.logBase2() * (IsReciprocal ? -1.0 : 1.0); + Value *FMul = B.CreateFMul(Expo, ConstantFP::get(Ty, N), "mul"); + if (Pow->doesNotAccessMemory()) + return copyFlags(*Pow, B.CreateCall(Intrinsic::getDeclaration( + Mod, Intrinsic::exp2, Ty), + FMul, "exp2")); + else + return copyFlags(*Pow, emitUnaryFloatFnCall(FMul, TLI, LibFunc_exp2, + LibFunc_exp2f, + LibFunc_exp2l, B, NoAttrs)); + } + } + + // pow(10.0, x) -> exp10(x) + // TODO: There is no exp10() intrinsic yet, but some day there shall be one. + if (match(Base, m_SpecificFP(10.0)) && + hasFloatFn(M, TLI, Ty, LibFunc_exp10, LibFunc_exp10f, LibFunc_exp10l)) + return copyFlags(*Pow, emitUnaryFloatFnCall(Expo, TLI, LibFunc_exp10, + LibFunc_exp10f, LibFunc_exp10l, + B, NoAttrs)); + + // pow(x, y) -> exp2(log2(x) * y) + if (Pow->hasApproxFunc() && Pow->hasNoNaNs() && BaseF->isFiniteNonZero() && + !BaseF->isNegative()) { + // pow(1, inf) is defined to be 1 but exp2(log2(1) * inf) evaluates to NaN. + // Luckily optimizePow has already handled the x == 1 case. + assert(!match(Base, m_FPOne()) && + "pow(1.0, y) should have been simplified earlier!"); + + Value *Log = nullptr; + if (Ty->isFloatTy()) + Log = ConstantFP::get(Ty, std::log2(BaseF->convertToFloat())); + else if (Ty->isDoubleTy()) + Log = ConstantFP::get(Ty, std::log2(BaseF->convertToDouble())); + + if (Log) { + Value *FMul = B.CreateFMul(Log, Expo, "mul"); + if (Pow->doesNotAccessMemory()) + return copyFlags(*Pow, B.CreateCall(Intrinsic::getDeclaration( + Mod, Intrinsic::exp2, Ty), + FMul, "exp2")); + else if (hasFloatFn(M, TLI, Ty, LibFunc_exp2, LibFunc_exp2f, + LibFunc_exp2l)) + return copyFlags(*Pow, emitUnaryFloatFnCall(FMul, TLI, LibFunc_exp2, + LibFunc_exp2f, + LibFunc_exp2l, B, NoAttrs)); + } + } + + return nullptr; +} + +static Value *getSqrtCall(Value *V, AttributeList Attrs, bool NoErrno, + Module *M, IRBuilderBase &B, + const TargetLibraryInfo *TLI) { + // If errno is never set, then use the intrinsic for sqrt(). + if (NoErrno) { + Function *SqrtFn = + Intrinsic::getDeclaration(M, Intrinsic::sqrt, V->getType()); + return B.CreateCall(SqrtFn, V, "sqrt"); + } + + // Otherwise, use the libcall for sqrt(). + if (hasFloatFn(M, TLI, V->getType(), LibFunc_sqrt, LibFunc_sqrtf, + LibFunc_sqrtl)) + // TODO: We also should check that the target can in fact lower the sqrt() + // libcall. We currently have no way to ask this question, so we ask if + // the target has a sqrt() libcall, which is not exactly the same. + return emitUnaryFloatFnCall(V, TLI, LibFunc_sqrt, LibFunc_sqrtf, + LibFunc_sqrtl, B, Attrs); + + return nullptr; +} + +/// Use square root in place of pow(x, +/-0.5). +Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilderBase &B) { + Value *Sqrt, *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1); + Module *Mod = Pow->getModule(); + Type *Ty = Pow->getType(); + + const APFloat *ExpoF; + if (!match(Expo, m_APFloat(ExpoF)) || + (!ExpoF->isExactlyValue(0.5) && !ExpoF->isExactlyValue(-0.5))) + return nullptr; + + // Converting pow(X, -0.5) to 1/sqrt(X) may introduce an extra rounding step, + // so that requires fast-math-flags (afn or reassoc). + if (ExpoF->isNegative() && (!Pow->hasApproxFunc() && !Pow->hasAllowReassoc())) + return nullptr; + + // If we have a pow() library call (accesses memory) and we can't guarantee + // that the base is not an infinity, give up: + // pow(-Inf, 0.5) is optionally required to have a result of +Inf (not setting + // errno), but sqrt(-Inf) is required by various standards to set errno. + if (!Pow->doesNotAccessMemory() && !Pow->hasNoInfs() && + !isKnownNeverInfinity(Base, TLI)) + return nullptr; + + Sqrt = getSqrtCall(Base, AttributeList(), Pow->doesNotAccessMemory(), Mod, B, + TLI); + if (!Sqrt) + return nullptr; + + // Handle signed zero base by expanding to fabs(sqrt(x)). + if (!Pow->hasNoSignedZeros()) { + Function *FAbsFn = Intrinsic::getDeclaration(Mod, Intrinsic::fabs, Ty); + Sqrt = B.CreateCall(FAbsFn, Sqrt, "abs"); + } + + Sqrt = copyFlags(*Pow, Sqrt); + + // Handle non finite base by expanding to + // (x == -infinity ? +infinity : sqrt(x)). + if (!Pow->hasNoInfs()) { + Value *PosInf = ConstantFP::getInfinity(Ty), + *NegInf = ConstantFP::getInfinity(Ty, true); + Value *FCmp = B.CreateFCmpOEQ(Base, NegInf, "isinf"); + Sqrt = B.CreateSelect(FCmp, PosInf, Sqrt); + } + + // If the exponent is negative, then get the reciprocal. + if (ExpoF->isNegative()) + Sqrt = B.CreateFDiv(ConstantFP::get(Ty, 1.0), Sqrt, "reciprocal"); + + return Sqrt; +} + +static Value *createPowWithIntegerExponent(Value *Base, Value *Expo, Module *M, + IRBuilderBase &B) { + Value *Args[] = {Base, Expo}; + Type *Types[] = {Base->getType(), Expo->getType()}; + Function *F = Intrinsic::getDeclaration(M, Intrinsic::powi, Types); + return B.CreateCall(F, Args); +} + +Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilderBase &B) { + Value *Base = Pow->getArgOperand(0); + Value *Expo = Pow->getArgOperand(1); + Function *Callee = Pow->getCalledFunction(); + StringRef Name = Callee->getName(); + Type *Ty = Pow->getType(); + Module *M = Pow->getModule(); + bool AllowApprox = Pow->hasApproxFunc(); + bool Ignored; + + // Propagate the math semantics from the call to any created instructions. + IRBuilderBase::FastMathFlagGuard Guard(B); + B.setFastMathFlags(Pow->getFastMathFlags()); + // Evaluate special cases related to the base. + + // pow(1.0, x) -> 1.0 + if (match(Base, m_FPOne())) + return Base; + + if (Value *Exp = replacePowWithExp(Pow, B)) + return Exp; + + // Evaluate special cases related to the exponent. + + // pow(x, -1.0) -> 1.0 / x + if (match(Expo, m_SpecificFP(-1.0))) + return B.CreateFDiv(ConstantFP::get(Ty, 1.0), Base, "reciprocal"); + + // pow(x, +/-0.0) -> 1.0 + if (match(Expo, m_AnyZeroFP())) + return ConstantFP::get(Ty, 1.0); + + // pow(x, 1.0) -> x + if (match(Expo, m_FPOne())) + return Base; + + // pow(x, 2.0) -> x * x + if (match(Expo, m_SpecificFP(2.0))) + return B.CreateFMul(Base, Base, "square"); + + if (Value *Sqrt = replacePowWithSqrt(Pow, B)) + return Sqrt; + + // If we can approximate pow: + // pow(x, n) -> powi(x, n) * sqrt(x) if n has exactly a 0.5 fraction + // pow(x, n) -> powi(x, n) if n is a constant signed integer value + const APFloat *ExpoF; + if (AllowApprox && match(Expo, m_APFloat(ExpoF)) && + !ExpoF->isExactlyValue(0.5) && !ExpoF->isExactlyValue(-0.5)) { + APFloat ExpoA(abs(*ExpoF)); + APFloat ExpoI(*ExpoF); + Value *Sqrt = nullptr; + if (!ExpoA.isInteger()) { + APFloat Expo2 = ExpoA; + // To check if ExpoA is an integer + 0.5, we add it to itself. If there + // is no floating point exception and the result is an integer, then + // ExpoA == integer + 0.5 + if (Expo2.add(ExpoA, APFloat::rmNearestTiesToEven) != APFloat::opOK) + return nullptr; + + if (!Expo2.isInteger()) + return nullptr; + + if (ExpoI.roundToIntegral(APFloat::rmTowardNegative) != + APFloat::opInexact) + return nullptr; + if (!ExpoI.isInteger()) + return nullptr; + ExpoF = &ExpoI; + + Sqrt = getSqrtCall(Base, AttributeList(), Pow->doesNotAccessMemory(), M, + B, TLI); + if (!Sqrt) + return nullptr; + } + + // 0.5 fraction is now optionally handled. + // Do pow -> powi for remaining integer exponent + APSInt IntExpo(TLI->getIntSize(), /*isUnsigned=*/false); + if (ExpoF->isInteger() && + ExpoF->convertToInteger(IntExpo, APFloat::rmTowardZero, &Ignored) == + APFloat::opOK) { + Value *PowI = copyFlags( + *Pow, + createPowWithIntegerExponent( + Base, ConstantInt::get(B.getIntNTy(TLI->getIntSize()), IntExpo), + M, B)); + + if (PowI && Sqrt) + return B.CreateFMul(PowI, Sqrt); + + return PowI; + } + } + + // powf(x, itofp(y)) -> powi(x, y) + if (AllowApprox && (isa<SIToFPInst>(Expo) || isa<UIToFPInst>(Expo))) { + if (Value *ExpoI = getIntToFPVal(Expo, B, TLI->getIntSize())) + return copyFlags(*Pow, createPowWithIntegerExponent(Base, ExpoI, M, B)); + } + + // Shrink pow() to powf() if the arguments are single precision, + // unless the result is expected to be double precision. + if (UnsafeFPShrink && Name == TLI->getName(LibFunc_pow) && + hasFloatVersion(M, Name)) { + if (Value *Shrunk = optimizeBinaryDoubleFP(Pow, B, TLI, true)) + return Shrunk; + } + + return nullptr; +} + +Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilderBase &B) { + Module *M = CI->getModule(); + Function *Callee = CI->getCalledFunction(); + StringRef Name = Callee->getName(); + Value *Ret = nullptr; + if (UnsafeFPShrink && Name == TLI->getName(LibFunc_exp2) && + hasFloatVersion(M, Name)) + Ret = optimizeUnaryDoubleFP(CI, B, TLI, true); + + Type *Ty = CI->getType(); + Value *Op = CI->getArgOperand(0); + + // exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= IntSize + // exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < IntSize + if ((isa<SIToFPInst>(Op) || isa<UIToFPInst>(Op)) && + hasFloatFn(M, TLI, Ty, LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl)) { + if (Value *Exp = getIntToFPVal(Op, B, TLI->getIntSize())) + return emitBinaryFloatFnCall(ConstantFP::get(Ty, 1.0), Exp, TLI, + LibFunc_ldexp, LibFunc_ldexpf, + LibFunc_ldexpl, B, AttributeList()); + } + + return Ret; +} + +Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilderBase &B) { + Module *M = CI->getModule(); + + // If we can shrink the call to a float function rather than a double + // function, do that first. + Function *Callee = CI->getCalledFunction(); + StringRef Name = Callee->getName(); + if ((Name == "fmin" || Name == "fmax") && hasFloatVersion(M, Name)) + if (Value *Ret = optimizeBinaryDoubleFP(CI, B, TLI)) + return Ret; + + // The LLVM intrinsics minnum/maxnum correspond to fmin/fmax. Canonicalize to + // the intrinsics for improved optimization (for example, vectorization). + // No-signed-zeros is implied by the definitions of fmax/fmin themselves. + // From the C standard draft WG14/N1256: + // "Ideally, fmax would be sensitive to the sign of zero, for example + // fmax(-0.0, +0.0) would return +0; however, implementation in software + // might be impractical." + IRBuilderBase::FastMathFlagGuard Guard(B); + FastMathFlags FMF = CI->getFastMathFlags(); + FMF.setNoSignedZeros(); + B.setFastMathFlags(FMF); + + Intrinsic::ID IID = Callee->getName().startswith("fmin") ? Intrinsic::minnum + : Intrinsic::maxnum; + Function *F = Intrinsic::getDeclaration(CI->getModule(), IID, CI->getType()); + return copyFlags( + *CI, B.CreateCall(F, {CI->getArgOperand(0), CI->getArgOperand(1)})); +} + +Value *LibCallSimplifier::optimizeLog(CallInst *Log, IRBuilderBase &B) { + Function *LogFn = Log->getCalledFunction(); + StringRef LogNm = LogFn->getName(); + Intrinsic::ID LogID = LogFn->getIntrinsicID(); + Module *Mod = Log->getModule(); + Type *Ty = Log->getType(); + Value *Ret = nullptr; + + if (UnsafeFPShrink && hasFloatVersion(Mod, LogNm)) + Ret = optimizeUnaryDoubleFP(Log, B, TLI, true); + + // The earlier call must also be 'fast' in order to do these transforms. + CallInst *Arg = dyn_cast<CallInst>(Log->getArgOperand(0)); + if (!Log->isFast() || !Arg || !Arg->isFast() || !Arg->hasOneUse()) + return Ret; + + LibFunc LogLb, ExpLb, Exp2Lb, Exp10Lb, PowLb; + + // This is only applicable to log(), log2(), log10(). + if (TLI->getLibFunc(LogNm, LogLb)) + switch (LogLb) { + case LibFunc_logf: + LogID = Intrinsic::log; + ExpLb = LibFunc_expf; + Exp2Lb = LibFunc_exp2f; + Exp10Lb = LibFunc_exp10f; + PowLb = LibFunc_powf; + break; + case LibFunc_log: + LogID = Intrinsic::log; + ExpLb = LibFunc_exp; + Exp2Lb = LibFunc_exp2; + Exp10Lb = LibFunc_exp10; + PowLb = LibFunc_pow; + break; + case LibFunc_logl: + LogID = Intrinsic::log; + ExpLb = LibFunc_expl; + Exp2Lb = LibFunc_exp2l; + Exp10Lb = LibFunc_exp10l; + PowLb = LibFunc_powl; + break; + case LibFunc_log2f: + LogID = Intrinsic::log2; + ExpLb = LibFunc_expf; + Exp2Lb = LibFunc_exp2f; + Exp10Lb = LibFunc_exp10f; + PowLb = LibFunc_powf; + break; + case LibFunc_log2: + LogID = Intrinsic::log2; + ExpLb = LibFunc_exp; + Exp2Lb = LibFunc_exp2; + Exp10Lb = LibFunc_exp10; + PowLb = LibFunc_pow; + break; + case LibFunc_log2l: + LogID = Intrinsic::log2; + ExpLb = LibFunc_expl; + Exp2Lb = LibFunc_exp2l; + Exp10Lb = LibFunc_exp10l; + PowLb = LibFunc_powl; + break; + case LibFunc_log10f: + LogID = Intrinsic::log10; + ExpLb = LibFunc_expf; + Exp2Lb = LibFunc_exp2f; + Exp10Lb = LibFunc_exp10f; + PowLb = LibFunc_powf; + break; + case LibFunc_log10: + LogID = Intrinsic::log10; + ExpLb = LibFunc_exp; + Exp2Lb = LibFunc_exp2; + Exp10Lb = LibFunc_exp10; + PowLb = LibFunc_pow; + break; + case LibFunc_log10l: + LogID = Intrinsic::log10; + ExpLb = LibFunc_expl; + Exp2Lb = LibFunc_exp2l; + Exp10Lb = LibFunc_exp10l; + PowLb = LibFunc_powl; + break; + default: + return Ret; + } + else if (LogID == Intrinsic::log || LogID == Intrinsic::log2 || + LogID == Intrinsic::log10) { + if (Ty->getScalarType()->isFloatTy()) { + ExpLb = LibFunc_expf; + Exp2Lb = LibFunc_exp2f; + Exp10Lb = LibFunc_exp10f; + PowLb = LibFunc_powf; + } else if (Ty->getScalarType()->isDoubleTy()) { + ExpLb = LibFunc_exp; + Exp2Lb = LibFunc_exp2; + Exp10Lb = LibFunc_exp10; + PowLb = LibFunc_pow; + } else + return Ret; + } else + return Ret; + + IRBuilderBase::FastMathFlagGuard Guard(B); + B.setFastMathFlags(FastMathFlags::getFast()); + + Intrinsic::ID ArgID = Arg->getIntrinsicID(); + LibFunc ArgLb = NotLibFunc; + TLI->getLibFunc(*Arg, ArgLb); + + // log(pow(x,y)) -> y*log(x) + AttributeList NoAttrs; + if (ArgLb == PowLb || ArgID == Intrinsic::pow) { + Value *LogX = + Log->doesNotAccessMemory() + ? B.CreateCall(Intrinsic::getDeclaration(Mod, LogID, Ty), + Arg->getOperand(0), "log") + : emitUnaryFloatFnCall(Arg->getOperand(0), TLI, LogNm, B, NoAttrs); + Value *MulY = B.CreateFMul(Arg->getArgOperand(1), LogX, "mul"); + // Since pow() may have side effects, e.g. errno, + // dead code elimination may not be trusted to remove it. + substituteInParent(Arg, MulY); + return MulY; + } + + // log(exp{,2,10}(y)) -> y*log({e,2,10}) + // TODO: There is no exp10() intrinsic yet. + if (ArgLb == ExpLb || ArgLb == Exp2Lb || ArgLb == Exp10Lb || + ArgID == Intrinsic::exp || ArgID == Intrinsic::exp2) { + Constant *Eul; + if (ArgLb == ExpLb || ArgID == Intrinsic::exp) + // FIXME: Add more precise value of e for long double. + Eul = ConstantFP::get(Log->getType(), numbers::e); + else if (ArgLb == Exp2Lb || ArgID == Intrinsic::exp2) + Eul = ConstantFP::get(Log->getType(), 2.0); + else + Eul = ConstantFP::get(Log->getType(), 10.0); + Value *LogE = Log->doesNotAccessMemory() + ? B.CreateCall(Intrinsic::getDeclaration(Mod, LogID, Ty), + Eul, "log") + : emitUnaryFloatFnCall(Eul, TLI, LogNm, B, NoAttrs); + Value *MulY = B.CreateFMul(Arg->getArgOperand(0), LogE, "mul"); + // Since exp() may have side effects, e.g. errno, + // dead code elimination may not be trusted to remove it. + substituteInParent(Arg, MulY); + return MulY; + } + + return Ret; +} + +Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilderBase &B) { + Module *M = CI->getModule(); + Function *Callee = CI->getCalledFunction(); + Value *Ret = nullptr; + // TODO: Once we have a way (other than checking for the existince of the + // libcall) to tell whether our target can lower @llvm.sqrt, relax the + // condition below. + if (isLibFuncEmittable(M, TLI, LibFunc_sqrtf) && + (Callee->getName() == "sqrt" || + Callee->getIntrinsicID() == Intrinsic::sqrt)) + Ret = optimizeUnaryDoubleFP(CI, B, TLI, true); + + if (!CI->isFast()) + return Ret; + + Instruction *I = dyn_cast<Instruction>(CI->getArgOperand(0)); + if (!I || I->getOpcode() != Instruction::FMul || !I->isFast()) + return Ret; + + // We're looking for a repeated factor in a multiplication tree, + // so we can do this fold: sqrt(x * x) -> fabs(x); + // or this fold: sqrt((x * x) * y) -> fabs(x) * sqrt(y). + Value *Op0 = I->getOperand(0); + Value *Op1 = I->getOperand(1); + Value *RepeatOp = nullptr; + Value *OtherOp = nullptr; + if (Op0 == Op1) { + // Simple match: the operands of the multiply are identical. + RepeatOp = Op0; + } else { + // Look for a more complicated pattern: one of the operands is itself + // a multiply, so search for a common factor in that multiply. + // Note: We don't bother looking any deeper than this first level or for + // variations of this pattern because instcombine's visitFMUL and/or the + // reassociation pass should give us this form. + Value *OtherMul0, *OtherMul1; + if (match(Op0, m_FMul(m_Value(OtherMul0), m_Value(OtherMul1)))) { + // Pattern: sqrt((x * y) * z) + if (OtherMul0 == OtherMul1 && cast<Instruction>(Op0)->isFast()) { + // Matched: sqrt((x * x) * z) + RepeatOp = OtherMul0; + OtherOp = Op1; + } + } + } + if (!RepeatOp) + return Ret; + + // Fast math flags for any created instructions should match the sqrt + // and multiply. + IRBuilderBase::FastMathFlagGuard Guard(B); + B.setFastMathFlags(I->getFastMathFlags()); + + // If we found a repeated factor, hoist it out of the square root and + // replace it with the fabs of that factor. + Type *ArgType = I->getType(); + Function *Fabs = Intrinsic::getDeclaration(M, Intrinsic::fabs, ArgType); + Value *FabsCall = B.CreateCall(Fabs, RepeatOp, "fabs"); + if (OtherOp) { + // If we found a non-repeated factor, we still need to get its square + // root. We then multiply that by the value that was simplified out + // of the square root calculation. + Function *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, ArgType); + Value *SqrtCall = B.CreateCall(Sqrt, OtherOp, "sqrt"); + return copyFlags(*CI, B.CreateFMul(FabsCall, SqrtCall)); + } + return copyFlags(*CI, FabsCall); +} + +// TODO: Generalize to handle any trig function and its inverse. +Value *LibCallSimplifier::optimizeTan(CallInst *CI, IRBuilderBase &B) { + Module *M = CI->getModule(); + Function *Callee = CI->getCalledFunction(); + Value *Ret = nullptr; + StringRef Name = Callee->getName(); + if (UnsafeFPShrink && Name == "tan" && hasFloatVersion(M, Name)) + Ret = optimizeUnaryDoubleFP(CI, B, TLI, true); + + Value *Op1 = CI->getArgOperand(0); + auto *OpC = dyn_cast<CallInst>(Op1); + if (!OpC) + return Ret; + + // Both calls must be 'fast' in order to remove them. + if (!CI->isFast() || !OpC->isFast()) + return Ret; + + // tan(atan(x)) -> x + // tanf(atanf(x)) -> x + // tanl(atanl(x)) -> x + LibFunc Func; + Function *F = OpC->getCalledFunction(); + if (F && TLI->getLibFunc(F->getName(), Func) && + isLibFuncEmittable(M, TLI, Func) && + ((Func == LibFunc_atan && Callee->getName() == "tan") || + (Func == LibFunc_atanf && Callee->getName() == "tanf") || + (Func == LibFunc_atanl && Callee->getName() == "tanl"))) + Ret = OpC->getArgOperand(0); + return Ret; +} + +static bool isTrigLibCall(CallInst *CI) { + // We can only hope to do anything useful if we can ignore things like errno + // and floating-point exceptions. + // We already checked the prototype. + return CI->doesNotThrow() && CI->doesNotAccessMemory(); +} + +static bool insertSinCosCall(IRBuilderBase &B, Function *OrigCallee, Value *Arg, + bool UseFloat, Value *&Sin, Value *&Cos, + Value *&SinCos, const TargetLibraryInfo *TLI) { + Module *M = OrigCallee->getParent(); + Type *ArgTy = Arg->getType(); + Type *ResTy; + StringRef Name; + + Triple T(OrigCallee->getParent()->getTargetTriple()); + if (UseFloat) { + Name = "__sincospif_stret"; + + assert(T.getArch() != Triple::x86 && "x86 messy and unsupported for now"); + // x86_64 can't use {float, float} since that would be returned in both + // xmm0 and xmm1, which isn't what a real struct would do. + ResTy = T.getArch() == Triple::x86_64 + ? static_cast<Type *>(FixedVectorType::get(ArgTy, 2)) + : static_cast<Type *>(StructType::get(ArgTy, ArgTy)); + } else { + Name = "__sincospi_stret"; + ResTy = StructType::get(ArgTy, ArgTy); + } + + if (!isLibFuncEmittable(M, TLI, Name)) + return false; + LibFunc TheLibFunc; + TLI->getLibFunc(Name, TheLibFunc); + FunctionCallee Callee = getOrInsertLibFunc( + M, *TLI, TheLibFunc, OrigCallee->getAttributes(), ResTy, ArgTy); + + if (Instruction *ArgInst = dyn_cast<Instruction>(Arg)) { + // If the argument is an instruction, it must dominate all uses so put our + // sincos call there. + B.SetInsertPoint(ArgInst->getParent(), ++ArgInst->getIterator()); + } else { + // Otherwise (e.g. for a constant) the beginning of the function is as + // good a place as any. + BasicBlock &EntryBB = B.GetInsertBlock()->getParent()->getEntryBlock(); + B.SetInsertPoint(&EntryBB, EntryBB.begin()); + } + + SinCos = B.CreateCall(Callee, Arg, "sincospi"); + + if (SinCos->getType()->isStructTy()) { + Sin = B.CreateExtractValue(SinCos, 0, "sinpi"); + Cos = B.CreateExtractValue(SinCos, 1, "cospi"); + } else { + Sin = B.CreateExtractElement(SinCos, ConstantInt::get(B.getInt32Ty(), 0), + "sinpi"); + Cos = B.CreateExtractElement(SinCos, ConstantInt::get(B.getInt32Ty(), 1), + "cospi"); + } + + return true; +} + +Value *LibCallSimplifier::optimizeSinCosPi(CallInst *CI, IRBuilderBase &B) { + // Make sure the prototype is as expected, otherwise the rest of the + // function is probably invalid and likely to abort. + if (!isTrigLibCall(CI)) + return nullptr; + + Value *Arg = CI->getArgOperand(0); + SmallVector<CallInst *, 1> SinCalls; + SmallVector<CallInst *, 1> CosCalls; + SmallVector<CallInst *, 1> SinCosCalls; + + bool IsFloat = Arg->getType()->isFloatTy(); + + // Look for all compatible sinpi, cospi and sincospi calls with the same + // argument. If there are enough (in some sense) we can make the + // substitution. + Function *F = CI->getFunction(); + for (User *U : Arg->users()) + classifyArgUse(U, F, IsFloat, SinCalls, CosCalls, SinCosCalls); + + // It's only worthwhile if both sinpi and cospi are actually used. + if (SinCalls.empty() || CosCalls.empty()) + return nullptr; + + Value *Sin, *Cos, *SinCos; + if (!insertSinCosCall(B, CI->getCalledFunction(), Arg, IsFloat, Sin, Cos, + SinCos, TLI)) + return nullptr; + + auto replaceTrigInsts = [this](SmallVectorImpl<CallInst *> &Calls, + Value *Res) { + for (CallInst *C : Calls) + replaceAllUsesWith(C, Res); + }; + + replaceTrigInsts(SinCalls, Sin); + replaceTrigInsts(CosCalls, Cos); + replaceTrigInsts(SinCosCalls, SinCos); + + return nullptr; +} + +void LibCallSimplifier::classifyArgUse( + Value *Val, Function *F, bool IsFloat, + SmallVectorImpl<CallInst *> &SinCalls, + SmallVectorImpl<CallInst *> &CosCalls, + SmallVectorImpl<CallInst *> &SinCosCalls) { + auto *CI = dyn_cast<CallInst>(Val); + if (!CI || CI->use_empty()) + return; + + // Don't consider calls in other functions. + if (CI->getFunction() != F) + return; + + Module *M = CI->getModule(); + Function *Callee = CI->getCalledFunction(); + LibFunc Func; + if (!Callee || !TLI->getLibFunc(*Callee, Func) || + !isLibFuncEmittable(M, TLI, Func) || + !isTrigLibCall(CI)) + return; + + if (IsFloat) { + if (Func == LibFunc_sinpif) + SinCalls.push_back(CI); + else if (Func == LibFunc_cospif) + CosCalls.push_back(CI); + else if (Func == LibFunc_sincospif_stret) + SinCosCalls.push_back(CI); + } else { + if (Func == LibFunc_sinpi) + SinCalls.push_back(CI); + else if (Func == LibFunc_cospi) + CosCalls.push_back(CI); + else if (Func == LibFunc_sincospi_stret) + SinCosCalls.push_back(CI); + } +} + +//===----------------------------------------------------------------------===// +// Integer Library Call Optimizations +//===----------------------------------------------------------------------===// + +Value *LibCallSimplifier::optimizeFFS(CallInst *CI, IRBuilderBase &B) { + // All variants of ffs return int which need not be 32 bits wide. + // ffs{,l,ll}(x) -> x != 0 ? (int)llvm.cttz(x)+1 : 0 + Type *RetType = CI->getType(); + Value *Op = CI->getArgOperand(0); + Type *ArgType = Op->getType(); + Function *F = Intrinsic::getDeclaration(CI->getCalledFunction()->getParent(), + Intrinsic::cttz, ArgType); + Value *V = B.CreateCall(F, {Op, B.getTrue()}, "cttz"); + V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1)); + V = B.CreateIntCast(V, RetType, false); + + Value *Cond = B.CreateICmpNE(Op, Constant::getNullValue(ArgType)); + return B.CreateSelect(Cond, V, ConstantInt::get(RetType, 0)); +} + +Value *LibCallSimplifier::optimizeFls(CallInst *CI, IRBuilderBase &B) { + // All variants of fls return int which need not be 32 bits wide. + // fls{,l,ll}(x) -> (int)(sizeInBits(x) - llvm.ctlz(x, false)) + Value *Op = CI->getArgOperand(0); + Type *ArgType = Op->getType(); + Function *F = Intrinsic::getDeclaration(CI->getCalledFunction()->getParent(), + Intrinsic::ctlz, ArgType); + Value *V = B.CreateCall(F, {Op, B.getFalse()}, "ctlz"); + V = B.CreateSub(ConstantInt::get(V->getType(), ArgType->getIntegerBitWidth()), + V); + return B.CreateIntCast(V, CI->getType(), false); +} + +Value *LibCallSimplifier::optimizeAbs(CallInst *CI, IRBuilderBase &B) { + // abs(x) -> x <s 0 ? -x : x + // The negation has 'nsw' because abs of INT_MIN is undefined. + Value *X = CI->getArgOperand(0); + Value *IsNeg = B.CreateIsNeg(X); + Value *NegX = B.CreateNSWNeg(X, "neg"); + return B.CreateSelect(IsNeg, NegX, X); +} + +Value *LibCallSimplifier::optimizeIsDigit(CallInst *CI, IRBuilderBase &B) { + // isdigit(c) -> (c-'0') <u 10 + Value *Op = CI->getArgOperand(0); + Type *ArgType = Op->getType(); + Op = B.CreateSub(Op, ConstantInt::get(ArgType, '0'), "isdigittmp"); + Op = B.CreateICmpULT(Op, ConstantInt::get(ArgType, 10), "isdigit"); + return B.CreateZExt(Op, CI->getType()); +} + +Value *LibCallSimplifier::optimizeIsAscii(CallInst *CI, IRBuilderBase &B) { + // isascii(c) -> c <u 128 + Value *Op = CI->getArgOperand(0); + Type *ArgType = Op->getType(); + Op = B.CreateICmpULT(Op, ConstantInt::get(ArgType, 128), "isascii"); + return B.CreateZExt(Op, CI->getType()); +} + +Value *LibCallSimplifier::optimizeToAscii(CallInst *CI, IRBuilderBase &B) { + // toascii(c) -> c & 0x7f + return B.CreateAnd(CI->getArgOperand(0), + ConstantInt::get(CI->getType(), 0x7F)); +} + +// Fold calls to atoi, atol, and atoll. +Value *LibCallSimplifier::optimizeAtoi(CallInst *CI, IRBuilderBase &B) { + CI->addParamAttr(0, Attribute::NoCapture); + + StringRef Str; + if (!getConstantStringInfo(CI->getArgOperand(0), Str)) + return nullptr; + + return convertStrToInt(CI, Str, nullptr, 10, /*AsSigned=*/true, B); +} + +// Fold calls to strtol, strtoll, strtoul, and strtoull. +Value *LibCallSimplifier::optimizeStrToInt(CallInst *CI, IRBuilderBase &B, + bool AsSigned) { + Value *EndPtr = CI->getArgOperand(1); + if (isa<ConstantPointerNull>(EndPtr)) { + // With a null EndPtr, this function won't capture the main argument. + // It would be readonly too, except that it still may write to errno. + CI->addParamAttr(0, Attribute::NoCapture); + EndPtr = nullptr; + } else if (!isKnownNonZero(EndPtr, DL)) + return nullptr; + + StringRef Str; + if (!getConstantStringInfo(CI->getArgOperand(0), Str)) + return nullptr; + + if (ConstantInt *CInt = dyn_cast<ConstantInt>(CI->getArgOperand(2))) { + return convertStrToInt(CI, Str, EndPtr, CInt->getSExtValue(), AsSigned, B); + } + + return nullptr; +} + +//===----------------------------------------------------------------------===// +// Formatting and IO Library Call Optimizations +//===----------------------------------------------------------------------===// + +static bool isReportingError(Function *Callee, CallInst *CI, int StreamArg); + +Value *LibCallSimplifier::optimizeErrorReporting(CallInst *CI, IRBuilderBase &B, + int StreamArg) { + Function *Callee = CI->getCalledFunction(); + // Error reporting calls should be cold, mark them as such. + // This applies even to non-builtin calls: it is only a hint and applies to + // functions that the frontend might not understand as builtins. + + // This heuristic was suggested in: + // Improving Static Branch Prediction in a Compiler + // Brian L. Deitrich, Ben-Chung Cheng, Wen-mei W. Hwu + // Proceedings of PACT'98, Oct. 1998, IEEE + if (!CI->hasFnAttr(Attribute::Cold) && + isReportingError(Callee, CI, StreamArg)) { + CI->addFnAttr(Attribute::Cold); + } + + return nullptr; +} + +static bool isReportingError(Function *Callee, CallInst *CI, int StreamArg) { + if (!Callee || !Callee->isDeclaration()) + return false; + + if (StreamArg < 0) + return true; + + // These functions might be considered cold, but only if their stream + // argument is stderr. + + if (StreamArg >= (int)CI->arg_size()) + return false; + LoadInst *LI = dyn_cast<LoadInst>(CI->getArgOperand(StreamArg)); + if (!LI) + return false; + GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getPointerOperand()); + if (!GV || !GV->isDeclaration()) + return false; + return GV->getName() == "stderr"; +} + +Value *LibCallSimplifier::optimizePrintFString(CallInst *CI, IRBuilderBase &B) { + // Check for a fixed format string. + StringRef FormatStr; + if (!getConstantStringInfo(CI->getArgOperand(0), FormatStr)) + return nullptr; + + // Empty format string -> noop. + if (FormatStr.empty()) // Tolerate printf's declared void. + return CI->use_empty() ? (Value *)CI : ConstantInt::get(CI->getType(), 0); + + // Do not do any of the following transformations if the printf return value + // is used, in general the printf return value is not compatible with either + // putchar() or puts(). + if (!CI->use_empty()) + return nullptr; + + Type *IntTy = CI->getType(); + // printf("x") -> putchar('x'), even for "%" and "%%". + if (FormatStr.size() == 1 || FormatStr == "%%") { + // Convert the character to unsigned char before passing it to putchar + // to avoid host-specific sign extension in the IR. Putchar converts + // it to unsigned char regardless. + Value *IntChar = ConstantInt::get(IntTy, (unsigned char)FormatStr[0]); + return copyFlags(*CI, emitPutChar(IntChar, B, TLI)); + } + + // Try to remove call or emit putchar/puts. + if (FormatStr == "%s" && CI->arg_size() > 1) { + StringRef OperandStr; + if (!getConstantStringInfo(CI->getOperand(1), OperandStr)) + return nullptr; + // printf("%s", "") --> NOP + if (OperandStr.empty()) + return (Value *)CI; + // printf("%s", "a") --> putchar('a') + if (OperandStr.size() == 1) { + // Convert the character to unsigned char before passing it to putchar + // to avoid host-specific sign extension in the IR. Putchar converts + // it to unsigned char regardless. + Value *IntChar = ConstantInt::get(IntTy, (unsigned char)OperandStr[0]); + return copyFlags(*CI, emitPutChar(IntChar, B, TLI)); + } + // printf("%s", str"\n") --> puts(str) + if (OperandStr.back() == '\n') { + OperandStr = OperandStr.drop_back(); + Value *GV = B.CreateGlobalString(OperandStr, "str"); + return copyFlags(*CI, emitPutS(GV, B, TLI)); + } + return nullptr; + } + + // printf("foo\n") --> puts("foo") + if (FormatStr.back() == '\n' && + !FormatStr.contains('%')) { // No format characters. + // Create a string literal with no \n on it. We expect the constant merge + // pass to be run after this pass, to merge duplicate strings. + FormatStr = FormatStr.drop_back(); + Value *GV = B.CreateGlobalString(FormatStr, "str"); + return copyFlags(*CI, emitPutS(GV, B, TLI)); + } + + // Optimize specific format strings. + // printf("%c", chr) --> putchar(chr) + if (FormatStr == "%c" && CI->arg_size() > 1 && + CI->getArgOperand(1)->getType()->isIntegerTy()) { + // Convert the argument to the type expected by putchar, i.e., int, which + // need not be 32 bits wide but which is the same as printf's return type. + Value *IntChar = B.CreateIntCast(CI->getArgOperand(1), IntTy, false); + return copyFlags(*CI, emitPutChar(IntChar, B, TLI)); + } + + // printf("%s\n", str) --> puts(str) + if (FormatStr == "%s\n" && CI->arg_size() > 1 && + CI->getArgOperand(1)->getType()->isPointerTy()) + return copyFlags(*CI, emitPutS(CI->getArgOperand(1), B, TLI)); + return nullptr; +} + +Value *LibCallSimplifier::optimizePrintF(CallInst *CI, IRBuilderBase &B) { + + Module *M = CI->getModule(); + Function *Callee = CI->getCalledFunction(); + FunctionType *FT = Callee->getFunctionType(); + if (Value *V = optimizePrintFString(CI, B)) { + return V; + } + + annotateNonNullNoUndefBasedOnAccess(CI, 0); + + // printf(format, ...) -> iprintf(format, ...) if no floating point + // arguments. + if (isLibFuncEmittable(M, TLI, LibFunc_iprintf) && + !callHasFloatingPointArgument(CI)) { + FunctionCallee IPrintFFn = getOrInsertLibFunc(M, *TLI, LibFunc_iprintf, FT, + Callee->getAttributes()); + CallInst *New = cast<CallInst>(CI->clone()); + New->setCalledFunction(IPrintFFn); + B.Insert(New); + return New; + } + + // printf(format, ...) -> __small_printf(format, ...) if no 128-bit floating point + // arguments. + if (isLibFuncEmittable(M, TLI, LibFunc_small_printf) && + !callHasFP128Argument(CI)) { + auto SmallPrintFFn = getOrInsertLibFunc(M, *TLI, LibFunc_small_printf, FT, + Callee->getAttributes()); + CallInst *New = cast<CallInst>(CI->clone()); + New->setCalledFunction(SmallPrintFFn); + B.Insert(New); + return New; + } + + return nullptr; +} + +Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, + IRBuilderBase &B) { + // Check for a fixed format string. + StringRef FormatStr; + if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr)) + return nullptr; + + // If we just have a format string (nothing else crazy) transform it. + Value *Dest = CI->getArgOperand(0); + if (CI->arg_size() == 2) { + // Make sure there's no % in the constant array. We could try to handle + // %% -> % in the future if we cared. + if (FormatStr.contains('%')) + return nullptr; // we found a format specifier, bail out. + + // sprintf(str, fmt) -> llvm.memcpy(align 1 str, align 1 fmt, strlen(fmt)+1) + B.CreateMemCpy( + Dest, Align(1), CI->getArgOperand(1), Align(1), + ConstantInt::get(DL.getIntPtrType(CI->getContext()), + FormatStr.size() + 1)); // Copy the null byte. + return ConstantInt::get(CI->getType(), FormatStr.size()); + } + + // The remaining optimizations require the format string to be "%s" or "%c" + // and have an extra operand. + if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->arg_size() < 3) + return nullptr; + + // Decode the second character of the format string. + if (FormatStr[1] == 'c') { + // sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0 + if (!CI->getArgOperand(2)->getType()->isIntegerTy()) + return nullptr; + Value *V = B.CreateTrunc(CI->getArgOperand(2), B.getInt8Ty(), "char"); + Value *Ptr = castToCStr(Dest, B); + B.CreateStore(V, Ptr); + Ptr = B.CreateInBoundsGEP(B.getInt8Ty(), Ptr, B.getInt32(1), "nul"); + B.CreateStore(B.getInt8(0), Ptr); + + return ConstantInt::get(CI->getType(), 1); + } + + if (FormatStr[1] == 's') { + // sprintf(dest, "%s", str) -> llvm.memcpy(align 1 dest, align 1 str, + // strlen(str)+1) + if (!CI->getArgOperand(2)->getType()->isPointerTy()) + return nullptr; + + if (CI->use_empty()) + // sprintf(dest, "%s", str) -> strcpy(dest, str) + return copyFlags(*CI, emitStrCpy(Dest, CI->getArgOperand(2), B, TLI)); + + uint64_t SrcLen = GetStringLength(CI->getArgOperand(2)); + if (SrcLen) { + B.CreateMemCpy( + Dest, Align(1), CI->getArgOperand(2), Align(1), + ConstantInt::get(DL.getIntPtrType(CI->getContext()), SrcLen)); + // Returns total number of characters written without null-character. + return ConstantInt::get(CI->getType(), SrcLen - 1); + } else if (Value *V = emitStpCpy(Dest, CI->getArgOperand(2), B, TLI)) { + // sprintf(dest, "%s", str) -> stpcpy(dest, str) - dest + // Handle mismatched pointer types (goes away with typeless pointers?). + V = B.CreatePointerCast(V, B.getInt8PtrTy()); + Dest = B.CreatePointerCast(Dest, B.getInt8PtrTy()); + Value *PtrDiff = B.CreatePtrDiff(B.getInt8Ty(), V, Dest); + return B.CreateIntCast(PtrDiff, CI->getType(), false); + } + + bool OptForSize = CI->getFunction()->hasOptSize() || + llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI, + PGSOQueryType::IRPass); + if (OptForSize) + return nullptr; + + Value *Len = emitStrLen(CI->getArgOperand(2), B, DL, TLI); + if (!Len) + return nullptr; + Value *IncLen = + B.CreateAdd(Len, ConstantInt::get(Len->getType(), 1), "leninc"); + B.CreateMemCpy(Dest, Align(1), CI->getArgOperand(2), Align(1), IncLen); + + // The sprintf result is the unincremented number of bytes in the string. + return B.CreateIntCast(Len, CI->getType(), false); + } + return nullptr; +} + +Value *LibCallSimplifier::optimizeSPrintF(CallInst *CI, IRBuilderBase &B) { + Module *M = CI->getModule(); + Function *Callee = CI->getCalledFunction(); + FunctionType *FT = Callee->getFunctionType(); + if (Value *V = optimizeSPrintFString(CI, B)) { + return V; + } + + annotateNonNullNoUndefBasedOnAccess(CI, {0, 1}); + + // sprintf(str, format, ...) -> siprintf(str, format, ...) if no floating + // point arguments. + if (isLibFuncEmittable(M, TLI, LibFunc_siprintf) && + !callHasFloatingPointArgument(CI)) { + FunctionCallee SIPrintFFn = getOrInsertLibFunc(M, *TLI, LibFunc_siprintf, + FT, Callee->getAttributes()); + CallInst *New = cast<CallInst>(CI->clone()); + New->setCalledFunction(SIPrintFFn); + B.Insert(New); + return New; + } + + // sprintf(str, format, ...) -> __small_sprintf(str, format, ...) if no 128-bit + // floating point arguments. + if (isLibFuncEmittable(M, TLI, LibFunc_small_sprintf) && + !callHasFP128Argument(CI)) { + auto SmallSPrintFFn = getOrInsertLibFunc(M, *TLI, LibFunc_small_sprintf, FT, + Callee->getAttributes()); + CallInst *New = cast<CallInst>(CI->clone()); + New->setCalledFunction(SmallSPrintFFn); + B.Insert(New); + return New; + } + + return nullptr; +} + +// Transform an snprintf call CI with the bound N to format the string Str +// either to a call to memcpy, or to single character a store, or to nothing, +// and fold the result to a constant. A nonnull StrArg refers to the string +// argument being formatted. Otherwise the call is one with N < 2 and +// the "%c" directive to format a single character. +Value *LibCallSimplifier::emitSnPrintfMemCpy(CallInst *CI, Value *StrArg, + StringRef Str, uint64_t N, + IRBuilderBase &B) { + assert(StrArg || (N < 2 && Str.size() == 1)); + + unsigned IntBits = TLI->getIntSize(); + uint64_t IntMax = maxIntN(IntBits); + if (Str.size() > IntMax) + // Bail if the string is longer than INT_MAX. POSIX requires + // implementations to set errno to EOVERFLOW in this case, in + // addition to when N is larger than that (checked by the caller). + return nullptr; + + Value *StrLen = ConstantInt::get(CI->getType(), Str.size()); + if (N == 0) + return StrLen; + + // Set to the number of bytes to copy fron StrArg which is also + // the offset of the terinating nul. + uint64_t NCopy; + if (N > Str.size()) + // Copy the full string, including the terminating nul (which must + // be present regardless of the bound). + NCopy = Str.size() + 1; + else + NCopy = N - 1; + + Value *DstArg = CI->getArgOperand(0); + if (NCopy && StrArg) + // Transform the call to lvm.memcpy(dst, fmt, N). + copyFlags( + *CI, + B.CreateMemCpy( + DstArg, Align(1), StrArg, Align(1), + ConstantInt::get(DL.getIntPtrType(CI->getContext()), NCopy))); + + if (N > Str.size()) + // Return early when the whole format string, including the final nul, + // has been copied. + return StrLen; + + // Otherwise, when truncating the string append a terminating nul. + Type *Int8Ty = B.getInt8Ty(); + Value *NulOff = B.getIntN(IntBits, NCopy); + Value *DstEnd = B.CreateInBoundsGEP(Int8Ty, DstArg, NulOff, "endptr"); + B.CreateStore(ConstantInt::get(Int8Ty, 0), DstEnd); + return StrLen; +} + +Value *LibCallSimplifier::optimizeSnPrintFString(CallInst *CI, + IRBuilderBase &B) { + // Check for size + ConstantInt *Size = dyn_cast<ConstantInt>(CI->getArgOperand(1)); + if (!Size) + return nullptr; + + uint64_t N = Size->getZExtValue(); + uint64_t IntMax = maxIntN(TLI->getIntSize()); + if (N > IntMax) + // Bail if the bound exceeds INT_MAX. POSIX requires implementations + // to set errno to EOVERFLOW in this case. + return nullptr; + + Value *DstArg = CI->getArgOperand(0); + Value *FmtArg = CI->getArgOperand(2); + + // Check for a fixed format string. + StringRef FormatStr; + if (!getConstantStringInfo(FmtArg, FormatStr)) + return nullptr; + + // If we just have a format string (nothing else crazy) transform it. + if (CI->arg_size() == 3) { + if (FormatStr.contains('%')) + // Bail if the format string contains a directive and there are + // no arguments. We could handle "%%" in the future. + return nullptr; + + return emitSnPrintfMemCpy(CI, FmtArg, FormatStr, N, B); + } + + // The remaining optimizations require the format string to be "%s" or "%c" + // and have an extra operand. + if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->arg_size() != 4) + return nullptr; + + // Decode the second character of the format string. + if (FormatStr[1] == 'c') { + if (N <= 1) { + // Use an arbitary string of length 1 to transform the call into + // either a nul store (N == 1) or a no-op (N == 0) and fold it + // to one. + StringRef CharStr("*"); + return emitSnPrintfMemCpy(CI, nullptr, CharStr, N, B); + } + + // snprintf(dst, size, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0 + if (!CI->getArgOperand(3)->getType()->isIntegerTy()) + return nullptr; + Value *V = B.CreateTrunc(CI->getArgOperand(3), B.getInt8Ty(), "char"); + Value *Ptr = castToCStr(DstArg, B); + B.CreateStore(V, Ptr); + Ptr = B.CreateInBoundsGEP(B.getInt8Ty(), Ptr, B.getInt32(1), "nul"); + B.CreateStore(B.getInt8(0), Ptr); + return ConstantInt::get(CI->getType(), 1); + } + + if (FormatStr[1] != 's') + return nullptr; + + Value *StrArg = CI->getArgOperand(3); + // snprintf(dest, size, "%s", str) to llvm.memcpy(dest, str, len+1, 1) + StringRef Str; + if (!getConstantStringInfo(StrArg, Str)) + return nullptr; + + return emitSnPrintfMemCpy(CI, StrArg, Str, N, B); +} + +Value *LibCallSimplifier::optimizeSnPrintF(CallInst *CI, IRBuilderBase &B) { + if (Value *V = optimizeSnPrintFString(CI, B)) { + return V; + } + + if (isKnownNonZero(CI->getOperand(1), DL)) + annotateNonNullNoUndefBasedOnAccess(CI, 0); + return nullptr; +} + +Value *LibCallSimplifier::optimizeFPrintFString(CallInst *CI, + IRBuilderBase &B) { + optimizeErrorReporting(CI, B, 0); + + // All the optimizations depend on the format string. + StringRef FormatStr; + if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr)) + return nullptr; + + // Do not do any of the following transformations if the fprintf return + // value is used, in general the fprintf return value is not compatible + // with fwrite(), fputc() or fputs(). + if (!CI->use_empty()) + return nullptr; + + // fprintf(F, "foo") --> fwrite("foo", 3, 1, F) + if (CI->arg_size() == 2) { + // Could handle %% -> % if we cared. + if (FormatStr.contains('%')) + return nullptr; // We found a format specifier. + + unsigned SizeTBits = TLI->getSizeTSize(*CI->getModule()); + Type *SizeTTy = IntegerType::get(CI->getContext(), SizeTBits); + return copyFlags( + *CI, emitFWrite(CI->getArgOperand(1), + ConstantInt::get(SizeTTy, FormatStr.size()), + CI->getArgOperand(0), B, DL, TLI)); + } + + // The remaining optimizations require the format string to be "%s" or "%c" + // and have an extra operand. + if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->arg_size() < 3) + return nullptr; + + // Decode the second character of the format string. + if (FormatStr[1] == 'c') { + // fprintf(F, "%c", chr) --> fputc((int)chr, F) + if (!CI->getArgOperand(2)->getType()->isIntegerTy()) + return nullptr; + Type *IntTy = B.getIntNTy(TLI->getIntSize()); + Value *V = B.CreateIntCast(CI->getArgOperand(2), IntTy, /*isSigned*/ true, + "chari"); + return copyFlags(*CI, emitFPutC(V, CI->getArgOperand(0), B, TLI)); + } + + if (FormatStr[1] == 's') { + // fprintf(F, "%s", str) --> fputs(str, F) + if (!CI->getArgOperand(2)->getType()->isPointerTy()) + return nullptr; + return copyFlags( + *CI, emitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TLI)); + } + return nullptr; +} + +Value *LibCallSimplifier::optimizeFPrintF(CallInst *CI, IRBuilderBase &B) { + Module *M = CI->getModule(); + Function *Callee = CI->getCalledFunction(); + FunctionType *FT = Callee->getFunctionType(); + if (Value *V = optimizeFPrintFString(CI, B)) { + return V; + } + + // fprintf(stream, format, ...) -> fiprintf(stream, format, ...) if no + // floating point arguments. + if (isLibFuncEmittable(M, TLI, LibFunc_fiprintf) && + !callHasFloatingPointArgument(CI)) { + FunctionCallee FIPrintFFn = getOrInsertLibFunc(M, *TLI, LibFunc_fiprintf, + FT, Callee->getAttributes()); + CallInst *New = cast<CallInst>(CI->clone()); + New->setCalledFunction(FIPrintFFn); + B.Insert(New); + return New; + } + + // fprintf(stream, format, ...) -> __small_fprintf(stream, format, ...) if no + // 128-bit floating point arguments. + if (isLibFuncEmittable(M, TLI, LibFunc_small_fprintf) && + !callHasFP128Argument(CI)) { + auto SmallFPrintFFn = + getOrInsertLibFunc(M, *TLI, LibFunc_small_fprintf, FT, + Callee->getAttributes()); + CallInst *New = cast<CallInst>(CI->clone()); + New->setCalledFunction(SmallFPrintFFn); + B.Insert(New); + return New; + } + + return nullptr; +} + +Value *LibCallSimplifier::optimizeFWrite(CallInst *CI, IRBuilderBase &B) { + optimizeErrorReporting(CI, B, 3); + + // Get the element size and count. + ConstantInt *SizeC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); + ConstantInt *CountC = dyn_cast<ConstantInt>(CI->getArgOperand(2)); + if (SizeC && CountC) { + uint64_t Bytes = SizeC->getZExtValue() * CountC->getZExtValue(); + + // If this is writing zero records, remove the call (it's a noop). + if (Bytes == 0) + return ConstantInt::get(CI->getType(), 0); + + // If this is writing one byte, turn it into fputc. + // This optimisation is only valid, if the return value is unused. + if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F) + Value *Char = B.CreateLoad(B.getInt8Ty(), + castToCStr(CI->getArgOperand(0), B), "char"); + Type *IntTy = B.getIntNTy(TLI->getIntSize()); + Value *Cast = B.CreateIntCast(Char, IntTy, /*isSigned*/ true, "chari"); + Value *NewCI = emitFPutC(Cast, CI->getArgOperand(3), B, TLI); + return NewCI ? ConstantInt::get(CI->getType(), 1) : nullptr; + } + } + + return nullptr; +} + +Value *LibCallSimplifier::optimizeFPuts(CallInst *CI, IRBuilderBase &B) { + optimizeErrorReporting(CI, B, 1); + + // Don't rewrite fputs to fwrite when optimising for size because fwrite + // requires more arguments and thus extra MOVs are required. + bool OptForSize = CI->getFunction()->hasOptSize() || + llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI, + PGSOQueryType::IRPass); + if (OptForSize) + return nullptr; + + // We can't optimize if return value is used. + if (!CI->use_empty()) + return nullptr; + + // fputs(s,F) --> fwrite(s,strlen(s),1,F) + uint64_t Len = GetStringLength(CI->getArgOperand(0)); + if (!Len) + return nullptr; + + // Known to have no uses (see above). + unsigned SizeTBits = TLI->getSizeTSize(*CI->getModule()); + Type *SizeTTy = IntegerType::get(CI->getContext(), SizeTBits); + return copyFlags( + *CI, + emitFWrite(CI->getArgOperand(0), + ConstantInt::get(SizeTTy, Len - 1), + CI->getArgOperand(1), B, DL, TLI)); +} + +Value *LibCallSimplifier::optimizePuts(CallInst *CI, IRBuilderBase &B) { + annotateNonNullNoUndefBasedOnAccess(CI, 0); + if (!CI->use_empty()) + return nullptr; + + // Check for a constant string. + // puts("") -> putchar('\n') + StringRef Str; + if (getConstantStringInfo(CI->getArgOperand(0), Str) && Str.empty()) { + // putchar takes an argument of the same type as puts returns, i.e., + // int, which need not be 32 bits wide. + Type *IntTy = CI->getType(); + return copyFlags(*CI, emitPutChar(ConstantInt::get(IntTy, '\n'), B, TLI)); + } + + return nullptr; +} + +Value *LibCallSimplifier::optimizeBCopy(CallInst *CI, IRBuilderBase &B) { + // bcopy(src, dst, n) -> llvm.memmove(dst, src, n) + return copyFlags(*CI, B.CreateMemMove(CI->getArgOperand(1), Align(1), + CI->getArgOperand(0), Align(1), + CI->getArgOperand(2))); +} + +bool LibCallSimplifier::hasFloatVersion(const Module *M, StringRef FuncName) { + SmallString<20> FloatFuncName = FuncName; + FloatFuncName += 'f'; + return isLibFuncEmittable(M, TLI, FloatFuncName); +} + +Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI, + IRBuilderBase &Builder) { + Module *M = CI->getModule(); + LibFunc Func; + Function *Callee = CI->getCalledFunction(); + // Check for string/memory library functions. + if (TLI->getLibFunc(*Callee, Func) && isLibFuncEmittable(M, TLI, Func)) { + // Make sure we never change the calling convention. + assert( + (ignoreCallingConv(Func) || + TargetLibraryInfoImpl::isCallingConvCCompatible(CI)) && + "Optimizing string/memory libcall would change the calling convention"); + switch (Func) { + case LibFunc_strcat: + return optimizeStrCat(CI, Builder); + case LibFunc_strncat: + return optimizeStrNCat(CI, Builder); + case LibFunc_strchr: + return optimizeStrChr(CI, Builder); + case LibFunc_strrchr: + return optimizeStrRChr(CI, Builder); + case LibFunc_strcmp: + return optimizeStrCmp(CI, Builder); + case LibFunc_strncmp: + return optimizeStrNCmp(CI, Builder); + case LibFunc_strcpy: + return optimizeStrCpy(CI, Builder); + case LibFunc_stpcpy: + return optimizeStpCpy(CI, Builder); + case LibFunc_strlcpy: + return optimizeStrLCpy(CI, Builder); + case LibFunc_stpncpy: + return optimizeStringNCpy(CI, /*RetEnd=*/true, Builder); + case LibFunc_strncpy: + return optimizeStringNCpy(CI, /*RetEnd=*/false, Builder); + case LibFunc_strlen: + return optimizeStrLen(CI, Builder); + case LibFunc_strnlen: + return optimizeStrNLen(CI, Builder); + case LibFunc_strpbrk: + return optimizeStrPBrk(CI, Builder); + case LibFunc_strndup: + return optimizeStrNDup(CI, Builder); + case LibFunc_strtol: + case LibFunc_strtod: + case LibFunc_strtof: + case LibFunc_strtoul: + case LibFunc_strtoll: + case LibFunc_strtold: + case LibFunc_strtoull: + return optimizeStrTo(CI, Builder); + case LibFunc_strspn: + return optimizeStrSpn(CI, Builder); + case LibFunc_strcspn: + return optimizeStrCSpn(CI, Builder); + case LibFunc_strstr: + return optimizeStrStr(CI, Builder); + case LibFunc_memchr: + return optimizeMemChr(CI, Builder); + case LibFunc_memrchr: + return optimizeMemRChr(CI, Builder); + case LibFunc_bcmp: + return optimizeBCmp(CI, Builder); + case LibFunc_memcmp: + return optimizeMemCmp(CI, Builder); + case LibFunc_memcpy: + return optimizeMemCpy(CI, Builder); + case LibFunc_memccpy: + return optimizeMemCCpy(CI, Builder); + case LibFunc_mempcpy: + return optimizeMemPCpy(CI, Builder); + case LibFunc_memmove: + return optimizeMemMove(CI, Builder); + case LibFunc_memset: + return optimizeMemSet(CI, Builder); + case LibFunc_realloc: + return optimizeRealloc(CI, Builder); + case LibFunc_wcslen: + return optimizeWcslen(CI, Builder); + case LibFunc_bcopy: + return optimizeBCopy(CI, Builder); + default: + break; + } + } + return nullptr; +} + +Value *LibCallSimplifier::optimizeFloatingPointLibCall(CallInst *CI, + LibFunc Func, + IRBuilderBase &Builder) { + const Module *M = CI->getModule(); + + // Don't optimize calls that require strict floating point semantics. + if (CI->isStrictFP()) + return nullptr; + + if (Value *V = optimizeTrigReflections(CI, Func, Builder)) + return V; + + switch (Func) { + case LibFunc_sinpif: + case LibFunc_sinpi: + case LibFunc_cospif: + case LibFunc_cospi: + return optimizeSinCosPi(CI, Builder); + case LibFunc_powf: + case LibFunc_pow: + case LibFunc_powl: + return optimizePow(CI, Builder); + case LibFunc_exp2l: + case LibFunc_exp2: + case LibFunc_exp2f: + return optimizeExp2(CI, Builder); + case LibFunc_fabsf: + case LibFunc_fabs: + case LibFunc_fabsl: + return replaceUnaryCall(CI, Builder, Intrinsic::fabs); + case LibFunc_sqrtf: + case LibFunc_sqrt: + case LibFunc_sqrtl: + return optimizeSqrt(CI, Builder); + case LibFunc_logf: + case LibFunc_log: + case LibFunc_logl: + case LibFunc_log10f: + case LibFunc_log10: + case LibFunc_log10l: + case LibFunc_log1pf: + case LibFunc_log1p: + case LibFunc_log1pl: + case LibFunc_log2f: + case LibFunc_log2: + case LibFunc_log2l: + case LibFunc_logbf: + case LibFunc_logb: + case LibFunc_logbl: + return optimizeLog(CI, Builder); + case LibFunc_tan: + case LibFunc_tanf: + case LibFunc_tanl: + return optimizeTan(CI, Builder); + case LibFunc_ceil: + return replaceUnaryCall(CI, Builder, Intrinsic::ceil); + case LibFunc_floor: + return replaceUnaryCall(CI, Builder, Intrinsic::floor); + case LibFunc_round: + return replaceUnaryCall(CI, Builder, Intrinsic::round); + case LibFunc_roundeven: + return replaceUnaryCall(CI, Builder, Intrinsic::roundeven); + case LibFunc_nearbyint: + return replaceUnaryCall(CI, Builder, Intrinsic::nearbyint); + case LibFunc_rint: + return replaceUnaryCall(CI, Builder, Intrinsic::rint); + case LibFunc_trunc: + return replaceUnaryCall(CI, Builder, Intrinsic::trunc); + case LibFunc_acos: + case LibFunc_acosh: + case LibFunc_asin: + case LibFunc_asinh: + case LibFunc_atan: + case LibFunc_atanh: + case LibFunc_cbrt: + case LibFunc_cosh: + case LibFunc_exp: + case LibFunc_exp10: + case LibFunc_expm1: + case LibFunc_cos: + case LibFunc_sin: + case LibFunc_sinh: + case LibFunc_tanh: + if (UnsafeFPShrink && hasFloatVersion(M, CI->getCalledFunction()->getName())) + return optimizeUnaryDoubleFP(CI, Builder, TLI, true); + return nullptr; + case LibFunc_copysign: + if (hasFloatVersion(M, CI->getCalledFunction()->getName())) + return optimizeBinaryDoubleFP(CI, Builder, TLI); + return nullptr; + case LibFunc_fminf: + case LibFunc_fmin: + case LibFunc_fminl: + case LibFunc_fmaxf: + case LibFunc_fmax: + case LibFunc_fmaxl: + return optimizeFMinFMax(CI, Builder); + case LibFunc_cabs: + case LibFunc_cabsf: + case LibFunc_cabsl: + return optimizeCAbs(CI, Builder); + default: + return nullptr; + } +} + +Value *LibCallSimplifier::optimizeCall(CallInst *CI, IRBuilderBase &Builder) { + Module *M = CI->getModule(); + assert(!CI->isMustTailCall() && "These transforms aren't musttail safe."); + + // TODO: Split out the code below that operates on FP calls so that + // we can all non-FP calls with the StrictFP attribute to be + // optimized. + if (CI->isNoBuiltin()) + return nullptr; + + LibFunc Func; + Function *Callee = CI->getCalledFunction(); + bool IsCallingConvC = TargetLibraryInfoImpl::isCallingConvCCompatible(CI); + + SmallVector<OperandBundleDef, 2> OpBundles; + CI->getOperandBundlesAsDefs(OpBundles); + + IRBuilderBase::OperandBundlesGuard Guard(Builder); + Builder.setDefaultOperandBundles(OpBundles); + + // Command-line parameter overrides instruction attribute. + // This can't be moved to optimizeFloatingPointLibCall() because it may be + // used by the intrinsic optimizations. + if (EnableUnsafeFPShrink.getNumOccurrences() > 0) + UnsafeFPShrink = EnableUnsafeFPShrink; + else if (isa<FPMathOperator>(CI) && CI->isFast()) + UnsafeFPShrink = true; + + // First, check for intrinsics. + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) { + if (!IsCallingConvC) + return nullptr; + // The FP intrinsics have corresponding constrained versions so we don't + // need to check for the StrictFP attribute here. + switch (II->getIntrinsicID()) { + case Intrinsic::pow: + return optimizePow(CI, Builder); + case Intrinsic::exp2: + return optimizeExp2(CI, Builder); + case Intrinsic::log: + case Intrinsic::log2: + case Intrinsic::log10: + return optimizeLog(CI, Builder); + case Intrinsic::sqrt: + return optimizeSqrt(CI, Builder); + case Intrinsic::memset: + return optimizeMemSet(CI, Builder); + case Intrinsic::memcpy: + return optimizeMemCpy(CI, Builder); + case Intrinsic::memmove: + return optimizeMemMove(CI, Builder); + default: + return nullptr; + } + } + + // Also try to simplify calls to fortified library functions. + if (Value *SimplifiedFortifiedCI = + FortifiedSimplifier.optimizeCall(CI, Builder)) { + // Try to further simplify the result. + CallInst *SimplifiedCI = dyn_cast<CallInst>(SimplifiedFortifiedCI); + if (SimplifiedCI && SimplifiedCI->getCalledFunction()) { + // Ensure that SimplifiedCI's uses are complete, since some calls have + // their uses analyzed. + replaceAllUsesWith(CI, SimplifiedCI); + + // Set insertion point to SimplifiedCI to guarantee we reach all uses + // we might replace later on. + IRBuilderBase::InsertPointGuard Guard(Builder); + Builder.SetInsertPoint(SimplifiedCI); + if (Value *V = optimizeStringMemoryLibCall(SimplifiedCI, Builder)) { + // If we were able to further simplify, remove the now redundant call. + substituteInParent(SimplifiedCI, V); + return V; + } + } + return SimplifiedFortifiedCI; + } + + // Then check for known library functions. + if (TLI->getLibFunc(*Callee, Func) && isLibFuncEmittable(M, TLI, Func)) { + // We never change the calling convention. + if (!ignoreCallingConv(Func) && !IsCallingConvC) + return nullptr; + if (Value *V = optimizeStringMemoryLibCall(CI, Builder)) + return V; + if (Value *V = optimizeFloatingPointLibCall(CI, Func, Builder)) + return V; + switch (Func) { + case LibFunc_ffs: + case LibFunc_ffsl: + case LibFunc_ffsll: + return optimizeFFS(CI, Builder); + case LibFunc_fls: + case LibFunc_flsl: + case LibFunc_flsll: + return optimizeFls(CI, Builder); + case LibFunc_abs: + case LibFunc_labs: + case LibFunc_llabs: + return optimizeAbs(CI, Builder); + case LibFunc_isdigit: + return optimizeIsDigit(CI, Builder); + case LibFunc_isascii: + return optimizeIsAscii(CI, Builder); + case LibFunc_toascii: + return optimizeToAscii(CI, Builder); + case LibFunc_atoi: + case LibFunc_atol: + case LibFunc_atoll: + return optimizeAtoi(CI, Builder); + case LibFunc_strtol: + case LibFunc_strtoll: + return optimizeStrToInt(CI, Builder, /*AsSigned=*/true); + case LibFunc_strtoul: + case LibFunc_strtoull: + return optimizeStrToInt(CI, Builder, /*AsSigned=*/false); + case LibFunc_printf: + return optimizePrintF(CI, Builder); + case LibFunc_sprintf: + return optimizeSPrintF(CI, Builder); + case LibFunc_snprintf: + return optimizeSnPrintF(CI, Builder); + case LibFunc_fprintf: + return optimizeFPrintF(CI, Builder); + case LibFunc_fwrite: + return optimizeFWrite(CI, Builder); + case LibFunc_fputs: + return optimizeFPuts(CI, Builder); + case LibFunc_puts: + return optimizePuts(CI, Builder); + case LibFunc_perror: + return optimizeErrorReporting(CI, Builder); + case LibFunc_vfprintf: + case LibFunc_fiprintf: + return optimizeErrorReporting(CI, Builder, 0); + default: + return nullptr; + } + } + return nullptr; +} + +LibCallSimplifier::LibCallSimplifier( + const DataLayout &DL, const TargetLibraryInfo *TLI, + OptimizationRemarkEmitter &ORE, + BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, + function_ref<void(Instruction *, Value *)> Replacer, + function_ref<void(Instruction *)> Eraser) + : FortifiedSimplifier(TLI), DL(DL), TLI(TLI), ORE(ORE), BFI(BFI), PSI(PSI), + Replacer(Replacer), Eraser(Eraser) {} + +void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) { + // Indirect through the replacer used in this instance. + Replacer(I, With); +} + +void LibCallSimplifier::eraseFromParent(Instruction *I) { + Eraser(I); +} + +// TODO: +// Additional cases that we need to add to this file: +// +// cbrt: +// * cbrt(expN(X)) -> expN(x/3) +// * cbrt(sqrt(x)) -> pow(x,1/6) +// * cbrt(cbrt(x)) -> pow(x,1/9) +// +// exp, expf, expl: +// * exp(log(x)) -> x +// +// log, logf, logl: +// * log(exp(x)) -> x +// * log(exp(y)) -> y*log(e) +// * log(exp10(y)) -> y*log(10) +// * log(sqrt(x)) -> 0.5*log(x) +// +// pow, powf, powl: +// * pow(sqrt(x),y) -> pow(x,y*0.5) +// * pow(pow(x,y),z)-> pow(x,y*z) +// +// signbit: +// * signbit(cnst) -> cnst' +// * signbit(nncst) -> 0 (if pstv is a non-negative constant) +// +// sqrt, sqrtf, sqrtl: +// * sqrt(expN(x)) -> expN(x*0.5) +// * sqrt(Nroot(x)) -> pow(x,1/(2*N)) +// * sqrt(pow(x,y)) -> pow(|x|,y*0.5) +// + +//===----------------------------------------------------------------------===// +// Fortified Library Call Optimizations +//===----------------------------------------------------------------------===// + +bool FortifiedLibCallSimplifier::isFortifiedCallFoldable( + CallInst *CI, unsigned ObjSizeOp, std::optional<unsigned> SizeOp, + std::optional<unsigned> StrOp, std::optional<unsigned> FlagOp) { + // If this function takes a flag argument, the implementation may use it to + // perform extra checks. Don't fold into the non-checking variant. + if (FlagOp) { + ConstantInt *Flag = dyn_cast<ConstantInt>(CI->getArgOperand(*FlagOp)); + if (!Flag || !Flag->isZero()) + return false; + } + + if (SizeOp && CI->getArgOperand(ObjSizeOp) == CI->getArgOperand(*SizeOp)) + return true; + + if (ConstantInt *ObjSizeCI = + dyn_cast<ConstantInt>(CI->getArgOperand(ObjSizeOp))) { + if (ObjSizeCI->isMinusOne()) + return true; + // If the object size wasn't -1 (unknown), bail out if we were asked to. + if (OnlyLowerUnknownSize) + return false; + if (StrOp) { + uint64_t Len = GetStringLength(CI->getArgOperand(*StrOp)); + // If the length is 0 we don't know how long it is and so we can't + // remove the check. + if (Len) + annotateDereferenceableBytes(CI, *StrOp, Len); + else + return false; + return ObjSizeCI->getZExtValue() >= Len; + } + + if (SizeOp) { + if (ConstantInt *SizeCI = + dyn_cast<ConstantInt>(CI->getArgOperand(*SizeOp))) + return ObjSizeCI->getZExtValue() >= SizeCI->getZExtValue(); + } + } + return false; +} + +Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI, + IRBuilderBase &B) { + if (isFortifiedCallFoldable(CI, 3, 2)) { + CallInst *NewCI = + B.CreateMemCpy(CI->getArgOperand(0), Align(1), CI->getArgOperand(1), + Align(1), CI->getArgOperand(2)); + mergeAttributesAndFlags(NewCI, *CI); + return CI->getArgOperand(0); + } + return nullptr; +} + +Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI, + IRBuilderBase &B) { + if (isFortifiedCallFoldable(CI, 3, 2)) { + CallInst *NewCI = + B.CreateMemMove(CI->getArgOperand(0), Align(1), CI->getArgOperand(1), + Align(1), CI->getArgOperand(2)); + mergeAttributesAndFlags(NewCI, *CI); + return CI->getArgOperand(0); + } + return nullptr; +} + +Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI, + IRBuilderBase &B) { + if (isFortifiedCallFoldable(CI, 3, 2)) { + Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false); + CallInst *NewCI = B.CreateMemSet(CI->getArgOperand(0), Val, + CI->getArgOperand(2), Align(1)); + mergeAttributesAndFlags(NewCI, *CI); + return CI->getArgOperand(0); + } + return nullptr; +} + +Value *FortifiedLibCallSimplifier::optimizeMemPCpyChk(CallInst *CI, + IRBuilderBase &B) { + const DataLayout &DL = CI->getModule()->getDataLayout(); + if (isFortifiedCallFoldable(CI, 3, 2)) + if (Value *Call = emitMemPCpy(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), B, DL, TLI)) { + return mergeAttributesAndFlags(cast<CallInst>(Call), *CI); + } + return nullptr; +} + +Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI, + IRBuilderBase &B, + LibFunc Func) { + const DataLayout &DL = CI->getModule()->getDataLayout(); + Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1), + *ObjSize = CI->getArgOperand(2); + + // __stpcpy_chk(x,x,...) -> x+strlen(x) + if (Func == LibFunc_stpcpy_chk && !OnlyLowerUnknownSize && Dst == Src) { + Value *StrLen = emitStrLen(Src, B, DL, TLI); + return StrLen ? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, StrLen) : nullptr; + } + + // If a) we don't have any length information, or b) we know this will + // fit then just lower to a plain st[rp]cpy. Otherwise we'll keep our + // st[rp]cpy_chk call which may fail at runtime if the size is too long. + // TODO: It might be nice to get a maximum length out of the possible + // string lengths for varying. + if (isFortifiedCallFoldable(CI, 2, std::nullopt, 1)) { + if (Func == LibFunc_strcpy_chk) + return copyFlags(*CI, emitStrCpy(Dst, Src, B, TLI)); + else + return copyFlags(*CI, emitStpCpy(Dst, Src, B, TLI)); + } + + if (OnlyLowerUnknownSize) + return nullptr; + + // Maybe we can stil fold __st[rp]cpy_chk to __memcpy_chk. + uint64_t Len = GetStringLength(Src); + if (Len) + annotateDereferenceableBytes(CI, 1, Len); + else + return nullptr; + + unsigned SizeTBits = TLI->getSizeTSize(*CI->getModule()); + Type *SizeTTy = IntegerType::get(CI->getContext(), SizeTBits); + Value *LenV = ConstantInt::get(SizeTTy, Len); + Value *Ret = emitMemCpyChk(Dst, Src, LenV, ObjSize, B, DL, TLI); + // If the function was an __stpcpy_chk, and we were able to fold it into + // a __memcpy_chk, we still need to return the correct end pointer. + if (Ret && Func == LibFunc_stpcpy_chk) + return B.CreateInBoundsGEP(B.getInt8Ty(), Dst, + ConstantInt::get(SizeTTy, Len - 1)); + return copyFlags(*CI, cast<CallInst>(Ret)); +} + +Value *FortifiedLibCallSimplifier::optimizeStrLenChk(CallInst *CI, + IRBuilderBase &B) { + if (isFortifiedCallFoldable(CI, 1, std::nullopt, 0)) + return copyFlags(*CI, emitStrLen(CI->getArgOperand(0), B, + CI->getModule()->getDataLayout(), TLI)); + return nullptr; +} + +Value *FortifiedLibCallSimplifier::optimizeStrpNCpyChk(CallInst *CI, + IRBuilderBase &B, + LibFunc Func) { + if (isFortifiedCallFoldable(CI, 3, 2)) { + if (Func == LibFunc_strncpy_chk) + return copyFlags(*CI, + emitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), B, TLI)); + else + return copyFlags(*CI, + emitStpNCpy(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), B, TLI)); + } + + return nullptr; +} + +Value *FortifiedLibCallSimplifier::optimizeMemCCpyChk(CallInst *CI, + IRBuilderBase &B) { + if (isFortifiedCallFoldable(CI, 4, 3)) + return copyFlags( + *CI, emitMemCCpy(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), CI->getArgOperand(3), B, TLI)); + + return nullptr; +} + +Value *FortifiedLibCallSimplifier::optimizeSNPrintfChk(CallInst *CI, + IRBuilderBase &B) { + if (isFortifiedCallFoldable(CI, 3, 1, std::nullopt, 2)) { + SmallVector<Value *, 8> VariadicArgs(drop_begin(CI->args(), 5)); + return copyFlags(*CI, + emitSNPrintf(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(4), VariadicArgs, B, TLI)); + } + + return nullptr; +} + +Value *FortifiedLibCallSimplifier::optimizeSPrintfChk(CallInst *CI, + IRBuilderBase &B) { + if (isFortifiedCallFoldable(CI, 2, std::nullopt, std::nullopt, 1)) { + SmallVector<Value *, 8> VariadicArgs(drop_begin(CI->args(), 4)); + return copyFlags(*CI, + emitSPrintf(CI->getArgOperand(0), CI->getArgOperand(3), + VariadicArgs, B, TLI)); + } + + return nullptr; +} + +Value *FortifiedLibCallSimplifier::optimizeStrCatChk(CallInst *CI, + IRBuilderBase &B) { + if (isFortifiedCallFoldable(CI, 2)) + return copyFlags( + *CI, emitStrCat(CI->getArgOperand(0), CI->getArgOperand(1), B, TLI)); + + return nullptr; +} + +Value *FortifiedLibCallSimplifier::optimizeStrLCat(CallInst *CI, + IRBuilderBase &B) { + if (isFortifiedCallFoldable(CI, 3)) + return copyFlags(*CI, + emitStrLCat(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), B, TLI)); + + return nullptr; +} + +Value *FortifiedLibCallSimplifier::optimizeStrNCatChk(CallInst *CI, + IRBuilderBase &B) { + if (isFortifiedCallFoldable(CI, 3)) + return copyFlags(*CI, + emitStrNCat(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), B, TLI)); + + return nullptr; +} + +Value *FortifiedLibCallSimplifier::optimizeStrLCpyChk(CallInst *CI, + IRBuilderBase &B) { + if (isFortifiedCallFoldable(CI, 3)) + return copyFlags(*CI, + emitStrLCpy(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), B, TLI)); + + return nullptr; +} + +Value *FortifiedLibCallSimplifier::optimizeVSNPrintfChk(CallInst *CI, + IRBuilderBase &B) { + if (isFortifiedCallFoldable(CI, 3, 1, std::nullopt, 2)) + return copyFlags( + *CI, emitVSNPrintf(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(4), CI->getArgOperand(5), B, TLI)); + + return nullptr; +} + +Value *FortifiedLibCallSimplifier::optimizeVSPrintfChk(CallInst *CI, + IRBuilderBase &B) { + if (isFortifiedCallFoldable(CI, 2, std::nullopt, std::nullopt, 1)) + return copyFlags(*CI, + emitVSPrintf(CI->getArgOperand(0), CI->getArgOperand(3), + CI->getArgOperand(4), B, TLI)); + + return nullptr; +} + +Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI, + IRBuilderBase &Builder) { + // FIXME: We shouldn't be changing "nobuiltin" or TLI unavailable calls here. + // Some clang users checked for _chk libcall availability using: + // __has_builtin(__builtin___memcpy_chk) + // When compiling with -fno-builtin, this is always true. + // When passing -ffreestanding/-mkernel, which both imply -fno-builtin, we + // end up with fortified libcalls, which isn't acceptable in a freestanding + // environment which only provides their non-fortified counterparts. + // + // Until we change clang and/or teach external users to check for availability + // differently, disregard the "nobuiltin" attribute and TLI::has. + // + // PR23093. + + LibFunc Func; + Function *Callee = CI->getCalledFunction(); + bool IsCallingConvC = TargetLibraryInfoImpl::isCallingConvCCompatible(CI); + + SmallVector<OperandBundleDef, 2> OpBundles; + CI->getOperandBundlesAsDefs(OpBundles); + + IRBuilderBase::OperandBundlesGuard Guard(Builder); + Builder.setDefaultOperandBundles(OpBundles); + + // First, check that this is a known library functions and that the prototype + // is correct. + if (!TLI->getLibFunc(*Callee, Func)) + return nullptr; + + // We never change the calling convention. + if (!ignoreCallingConv(Func) && !IsCallingConvC) + return nullptr; + + switch (Func) { + case LibFunc_memcpy_chk: + return optimizeMemCpyChk(CI, Builder); + case LibFunc_mempcpy_chk: + return optimizeMemPCpyChk(CI, Builder); + case LibFunc_memmove_chk: + return optimizeMemMoveChk(CI, Builder); + case LibFunc_memset_chk: + return optimizeMemSetChk(CI, Builder); + case LibFunc_stpcpy_chk: + case LibFunc_strcpy_chk: + return optimizeStrpCpyChk(CI, Builder, Func); + case LibFunc_strlen_chk: + return optimizeStrLenChk(CI, Builder); + case LibFunc_stpncpy_chk: + case LibFunc_strncpy_chk: + return optimizeStrpNCpyChk(CI, Builder, Func); + case LibFunc_memccpy_chk: + return optimizeMemCCpyChk(CI, Builder); + case LibFunc_snprintf_chk: + return optimizeSNPrintfChk(CI, Builder); + case LibFunc_sprintf_chk: + return optimizeSPrintfChk(CI, Builder); + case LibFunc_strcat_chk: + return optimizeStrCatChk(CI, Builder); + case LibFunc_strlcat_chk: + return optimizeStrLCat(CI, Builder); + case LibFunc_strncat_chk: + return optimizeStrNCatChk(CI, Builder); + case LibFunc_strlcpy_chk: + return optimizeStrLCpyChk(CI, Builder); + case LibFunc_vsnprintf_chk: + return optimizeVSNPrintfChk(CI, Builder); + case LibFunc_vsprintf_chk: + return optimizeVSPrintfChk(CI, Builder); + default: + break; + } + return nullptr; +} + +FortifiedLibCallSimplifier::FortifiedLibCallSimplifier( + const TargetLibraryInfo *TLI, bool OnlyLowerUnknownSize) + : TLI(TLI), OnlyLowerUnknownSize(OnlyLowerUnknownSize) {} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/SizeOpts.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/SizeOpts.cpp new file mode 100644 index 0000000000..1242380f73 --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/SizeOpts.cpp @@ -0,0 +1,111 @@ +//===-- SizeOpts.cpp - code size optimization related code ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains some shared code size optimization related code. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/SizeOpts.h" + +using namespace llvm; + +cl::opt<bool> llvm::EnablePGSO( + "pgso", cl::Hidden, cl::init(true), + cl::desc("Enable the profile guided size optimizations. ")); + +cl::opt<bool> llvm::PGSOLargeWorkingSetSizeOnly( + "pgso-lwss-only", cl::Hidden, cl::init(true), + cl::desc("Apply the profile guided size optimizations only " + "if the working set size is large (except for cold code.)")); + +cl::opt<bool> llvm::PGSOColdCodeOnly( + "pgso-cold-code-only", cl::Hidden, cl::init(false), + cl::desc("Apply the profile guided size optimizations only " + "to cold code.")); + +cl::opt<bool> llvm::PGSOColdCodeOnlyForInstrPGO( + "pgso-cold-code-only-for-instr-pgo", cl::Hidden, cl::init(false), + cl::desc("Apply the profile guided size optimizations only " + "to cold code under instrumentation PGO.")); + +cl::opt<bool> llvm::PGSOColdCodeOnlyForSamplePGO( + "pgso-cold-code-only-for-sample-pgo", cl::Hidden, cl::init(false), + cl::desc("Apply the profile guided size optimizations only " + "to cold code under sample PGO.")); + +cl::opt<bool> llvm::PGSOColdCodeOnlyForPartialSamplePGO( + "pgso-cold-code-only-for-partial-sample-pgo", cl::Hidden, cl::init(false), + cl::desc("Apply the profile guided size optimizations only " + "to cold code under partial-profile sample PGO.")); + +cl::opt<bool> llvm::ForcePGSO( + "force-pgso", cl::Hidden, cl::init(false), + cl::desc("Force the (profiled-guided) size optimizations. ")); + +cl::opt<int> llvm::PgsoCutoffInstrProf( + "pgso-cutoff-instr-prof", cl::Hidden, cl::init(950000), + cl::desc("The profile guided size optimization profile summary cutoff " + "for instrumentation profile.")); + +cl::opt<int> llvm::PgsoCutoffSampleProf( + "pgso-cutoff-sample-prof", cl::Hidden, cl::init(990000), + cl::desc("The profile guided size optimization profile summary cutoff " + "for sample profile.")); + +namespace { +struct BasicBlockBFIAdapter { + static bool isFunctionColdInCallGraph(const Function *F, + ProfileSummaryInfo *PSI, + BlockFrequencyInfo &BFI) { + return PSI->isFunctionColdInCallGraph(F, BFI); + } + static bool isFunctionHotInCallGraphNthPercentile(int CutOff, + const Function *F, + ProfileSummaryInfo *PSI, + BlockFrequencyInfo &BFI) { + return PSI->isFunctionHotInCallGraphNthPercentile(CutOff, F, BFI); + } + static bool isFunctionColdInCallGraphNthPercentile(int CutOff, + const Function *F, + ProfileSummaryInfo *PSI, + BlockFrequencyInfo &BFI) { + return PSI->isFunctionColdInCallGraphNthPercentile(CutOff, F, BFI); + } + static bool isColdBlock(const BasicBlock *BB, + ProfileSummaryInfo *PSI, + BlockFrequencyInfo *BFI) { + return PSI->isColdBlock(BB, BFI); + } + static bool isHotBlockNthPercentile(int CutOff, + const BasicBlock *BB, + ProfileSummaryInfo *PSI, + BlockFrequencyInfo *BFI) { + return PSI->isHotBlockNthPercentile(CutOff, BB, BFI); + } + static bool isColdBlockNthPercentile(int CutOff, const BasicBlock *BB, + ProfileSummaryInfo *PSI, + BlockFrequencyInfo *BFI) { + return PSI->isColdBlockNthPercentile(CutOff, BB, BFI); + } +}; +} // end anonymous namespace + +bool llvm::shouldOptimizeForSize(const Function *F, ProfileSummaryInfo *PSI, + BlockFrequencyInfo *BFI, + PGSOQueryType QueryType) { + return shouldFuncOptimizeForSizeImpl<BasicBlockBFIAdapter>(F, PSI, BFI, + QueryType); +} + +bool llvm::shouldOptimizeForSize(const BasicBlock *BB, ProfileSummaryInfo *PSI, + BlockFrequencyInfo *BFI, + PGSOQueryType QueryType) { + assert(BB); + return shouldOptimizeForSizeImpl<BasicBlockBFIAdapter>(BB, PSI, BFI, + QueryType); +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/SplitModule.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/SplitModule.cpp new file mode 100644 index 0000000000..9c39c26d8b --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/SplitModule.cpp @@ -0,0 +1,287 @@ +//===- SplitModule.cpp - Split a module into partitions -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the function llvm::SplitModule, which splits a module +// into multiple linkable partitions. It can be used to implement parallel code +// generation for link-time optimization. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/SplitModule.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/EquivalenceClasses.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/IR/Comdat.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalObject.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MD5.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/ValueMapper.h" +#include <algorithm> +#include <cassert> +#include <iterator> +#include <memory> +#include <queue> +#include <utility> +#include <vector> + +using namespace llvm; + +#define DEBUG_TYPE "split-module" + +namespace { + +using ClusterMapType = EquivalenceClasses<const GlobalValue *>; +using ComdatMembersType = DenseMap<const Comdat *, const GlobalValue *>; +using ClusterIDMapType = DenseMap<const GlobalValue *, unsigned>; + +} // end anonymous namespace + +static void addNonConstUser(ClusterMapType &GVtoClusterMap, + const GlobalValue *GV, const User *U) { + assert((!isa<Constant>(U) || isa<GlobalValue>(U)) && "Bad user"); + + if (const Instruction *I = dyn_cast<Instruction>(U)) { + const GlobalValue *F = I->getParent()->getParent(); + GVtoClusterMap.unionSets(GV, F); + } else if (const GlobalValue *GVU = dyn_cast<GlobalValue>(U)) { + GVtoClusterMap.unionSets(GV, GVU); + } else { + llvm_unreachable("Underimplemented use case"); + } +} + +// Adds all GlobalValue users of V to the same cluster as GV. +static void addAllGlobalValueUsers(ClusterMapType &GVtoClusterMap, + const GlobalValue *GV, const Value *V) { + for (const auto *U : V->users()) { + SmallVector<const User *, 4> Worklist; + Worklist.push_back(U); + while (!Worklist.empty()) { + const User *UU = Worklist.pop_back_val(); + // For each constant that is not a GV (a pure const) recurse. + if (isa<Constant>(UU) && !isa<GlobalValue>(UU)) { + Worklist.append(UU->user_begin(), UU->user_end()); + continue; + } + addNonConstUser(GVtoClusterMap, GV, UU); + } + } +} + +static const GlobalObject *getGVPartitioningRoot(const GlobalValue *GV) { + const GlobalObject *GO = GV->getAliaseeObject(); + if (const auto *GI = dyn_cast_or_null<GlobalIFunc>(GO)) + GO = GI->getResolverFunction(); + return GO; +} + +// Find partitions for module in the way that no locals need to be +// globalized. +// Try to balance pack those partitions into N files since this roughly equals +// thread balancing for the backend codegen step. +static void findPartitions(Module &M, ClusterIDMapType &ClusterIDMap, + unsigned N) { + // At this point module should have the proper mix of globals and locals. + // As we attempt to partition this module, we must not change any + // locals to globals. + LLVM_DEBUG(dbgs() << "Partition module with (" << M.size() << ")functions\n"); + ClusterMapType GVtoClusterMap; + ComdatMembersType ComdatMembers; + + auto recordGVSet = [&GVtoClusterMap, &ComdatMembers](GlobalValue &GV) { + if (GV.isDeclaration()) + return; + + if (!GV.hasName()) + GV.setName("__llvmsplit_unnamed"); + + // Comdat groups must not be partitioned. For comdat groups that contain + // locals, record all their members here so we can keep them together. + // Comdat groups that only contain external globals are already handled by + // the MD5-based partitioning. + if (const Comdat *C = GV.getComdat()) { + auto &Member = ComdatMembers[C]; + if (Member) + GVtoClusterMap.unionSets(Member, &GV); + else + Member = &GV; + } + + // Aliases should not be separated from their aliasees and ifuncs should + // not be separated from their resolvers regardless of linkage. + if (const GlobalObject *Root = getGVPartitioningRoot(&GV)) + if (&GV != Root) + GVtoClusterMap.unionSets(&GV, Root); + + if (const Function *F = dyn_cast<Function>(&GV)) { + for (const BasicBlock &BB : *F) { + BlockAddress *BA = BlockAddress::lookup(&BB); + if (!BA || !BA->isConstantUsed()) + continue; + addAllGlobalValueUsers(GVtoClusterMap, F, BA); + } + } + + if (GV.hasLocalLinkage()) + addAllGlobalValueUsers(GVtoClusterMap, &GV, &GV); + }; + + llvm::for_each(M.functions(), recordGVSet); + llvm::for_each(M.globals(), recordGVSet); + llvm::for_each(M.aliases(), recordGVSet); + + // Assigned all GVs to merged clusters while balancing number of objects in + // each. + auto CompareClusters = [](const std::pair<unsigned, unsigned> &a, + const std::pair<unsigned, unsigned> &b) { + if (a.second || b.second) + return a.second > b.second; + else + return a.first > b.first; + }; + + std::priority_queue<std::pair<unsigned, unsigned>, + std::vector<std::pair<unsigned, unsigned>>, + decltype(CompareClusters)> + BalancinQueue(CompareClusters); + // Pre-populate priority queue with N slot blanks. + for (unsigned i = 0; i < N; ++i) + BalancinQueue.push(std::make_pair(i, 0)); + + using SortType = std::pair<unsigned, ClusterMapType::iterator>; + + SmallVector<SortType, 64> Sets; + SmallPtrSet<const GlobalValue *, 32> Visited; + + // To guarantee determinism, we have to sort SCC according to size. + // When size is the same, use leader's name. + for (ClusterMapType::iterator I = GVtoClusterMap.begin(), + E = GVtoClusterMap.end(); I != E; ++I) + if (I->isLeader()) + Sets.push_back( + std::make_pair(std::distance(GVtoClusterMap.member_begin(I), + GVtoClusterMap.member_end()), I)); + + llvm::sort(Sets, [](const SortType &a, const SortType &b) { + if (a.first == b.first) + return a.second->getData()->getName() > b.second->getData()->getName(); + else + return a.first > b.first; + }); + + for (auto &I : Sets) { + unsigned CurrentClusterID = BalancinQueue.top().first; + unsigned CurrentClusterSize = BalancinQueue.top().second; + BalancinQueue.pop(); + + LLVM_DEBUG(dbgs() << "Root[" << CurrentClusterID << "] cluster_size(" + << I.first << ") ----> " << I.second->getData()->getName() + << "\n"); + + for (ClusterMapType::member_iterator MI = + GVtoClusterMap.findLeader(I.second); + MI != GVtoClusterMap.member_end(); ++MI) { + if (!Visited.insert(*MI).second) + continue; + LLVM_DEBUG(dbgs() << "----> " << (*MI)->getName() + << ((*MI)->hasLocalLinkage() ? " l " : " e ") << "\n"); + Visited.insert(*MI); + ClusterIDMap[*MI] = CurrentClusterID; + CurrentClusterSize++; + } + // Add this set size to the number of entries in this cluster. + BalancinQueue.push(std::make_pair(CurrentClusterID, CurrentClusterSize)); + } +} + +static void externalize(GlobalValue *GV) { + if (GV->hasLocalLinkage()) { + GV->setLinkage(GlobalValue::ExternalLinkage); + GV->setVisibility(GlobalValue::HiddenVisibility); + } + + // Unnamed entities must be named consistently between modules. setName will + // give a distinct name to each such entity. + if (!GV->hasName()) + GV->setName("__llvmsplit_unnamed"); +} + +// Returns whether GV should be in partition (0-based) I of N. +static bool isInPartition(const GlobalValue *GV, unsigned I, unsigned N) { + if (const GlobalObject *Root = getGVPartitioningRoot(GV)) + GV = Root; + + StringRef Name; + if (const Comdat *C = GV->getComdat()) + Name = C->getName(); + else + Name = GV->getName(); + + // Partition by MD5 hash. We only need a few bits for evenness as the number + // of partitions will generally be in the 1-2 figure range; the low 16 bits + // are enough. + MD5 H; + MD5::MD5Result R; + H.update(Name); + H.final(R); + return (R[0] | (R[1] << 8)) % N == I; +} + +void llvm::SplitModule( + Module &M, unsigned N, + function_ref<void(std::unique_ptr<Module> MPart)> ModuleCallback, + bool PreserveLocals) { + if (!PreserveLocals) { + for (Function &F : M) + externalize(&F); + for (GlobalVariable &GV : M.globals()) + externalize(&GV); + for (GlobalAlias &GA : M.aliases()) + externalize(&GA); + for (GlobalIFunc &GIF : M.ifuncs()) + externalize(&GIF); + } + + // This performs splitting without a need for externalization, which might not + // always be possible. + ClusterIDMapType ClusterIDMap; + findPartitions(M, ClusterIDMap, N); + + // FIXME: We should be able to reuse M as the last partition instead of + // cloning it. Note that the callers at the moment expect the module to + // be preserved, so will need some adjustments as well. + for (unsigned I = 0; I < N; ++I) { + ValueToValueMapTy VMap; + std::unique_ptr<Module> MPart( + CloneModule(M, VMap, [&](const GlobalValue *GV) { + if (ClusterIDMap.count(GV)) + return (ClusterIDMap[GV] == I); + else + return isInPartition(GV, I, N); + })); + if (I != 0) + MPart->setModuleInlineAsm(""); + ModuleCallback(std::move(MPart)); + } +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/StripGCRelocates.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/StripGCRelocates.cpp new file mode 100644 index 0000000000..0ff88e8b46 --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/StripGCRelocates.cpp @@ -0,0 +1,86 @@ +//===- StripGCRelocates.cpp - Remove gc.relocates inserted by RewriteStatePoints===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This is a little utility pass that removes the gc.relocates inserted by +// RewriteStatepointsForGC. Note that the generated IR is incorrect, +// but this is useful as a single pass in itself, for analysis of IR, without +// the GC.relocates. The statepoint and gc.result intrinsics would still be +// present. +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/StripGCRelocates.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Statepoint.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" + +using namespace llvm; + +static bool stripGCRelocates(Function &F) { + // Nothing to do for declarations. + if (F.isDeclaration()) + return false; + SmallVector<GCRelocateInst *, 20> GCRelocates; + // TODO: We currently do not handle gc.relocates that are in landing pads, + // i.e. not bound to a single statepoint token. + for (Instruction &I : instructions(F)) { + if (auto *GCR = dyn_cast<GCRelocateInst>(&I)) + if (isa<GCStatepointInst>(GCR->getOperand(0))) + GCRelocates.push_back(GCR); + } + // All gc.relocates are bound to a single statepoint token. The order of + // visiting gc.relocates for deletion does not matter. + for (GCRelocateInst *GCRel : GCRelocates) { + Value *OrigPtr = GCRel->getDerivedPtr(); + Value *ReplaceGCRel = OrigPtr; + + // All gc_relocates are i8 addrspace(1)* typed, we need a bitcast from i8 + // addrspace(1)* to the type of the OrigPtr, if the are not the same. + if (GCRel->getType() != OrigPtr->getType()) + ReplaceGCRel = new BitCastInst(OrigPtr, GCRel->getType(), "cast", GCRel); + + // Replace all uses of gc.relocate and delete the gc.relocate + // There maybe unncessary bitcasts back to the OrigPtr type, an instcombine + // pass would clear this up. + GCRel->replaceAllUsesWith(ReplaceGCRel); + GCRel->eraseFromParent(); + } + return !GCRelocates.empty(); +} + +PreservedAnalyses StripGCRelocates::run(Function &F, + FunctionAnalysisManager &AM) { + if (!stripGCRelocates(F)) + return PreservedAnalyses::all(); + + // Removing gc.relocate preserves the CFG, but most other analysis probably + // need to re-run. + PreservedAnalyses PA; + PA.preserveSet<CFGAnalyses>(); + return PA; +} + +namespace { +struct StripGCRelocatesLegacy : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + StripGCRelocatesLegacy() : FunctionPass(ID) { + initializeStripGCRelocatesLegacyPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &Info) const override {} + + bool runOnFunction(Function &F) override { return ::stripGCRelocates(F); } +}; +char StripGCRelocatesLegacy::ID = 0; +} // namespace + +INITIALIZE_PASS(StripGCRelocatesLegacy, "strip-gc-relocates", + "Strip gc.relocates inserted through RewriteStatepointsForGC", + true, false) diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp new file mode 100644 index 0000000000..10fda4df51 --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp @@ -0,0 +1,51 @@ +//===- StripNonLineTableDebugInfo.cpp -- Strip parts of Debug Info --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/StripNonLineTableDebugInfo.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/Utils.h" +using namespace llvm; + +namespace { + +/// This pass strips all debug info that is not related line tables. +/// The result will be the same as if the program where compiled with +/// -gline-tables-only. +struct StripNonLineTableDebugLegacyPass : public ModulePass { + static char ID; // Pass identification, replacement for typeid + StripNonLineTableDebugLegacyPass() : ModulePass(ID) { + initializeStripNonLineTableDebugLegacyPassPass( + *PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + } + + bool runOnModule(Module &M) override { + return llvm::stripNonLineTableDebugInfo(M); + } +}; +} + +char StripNonLineTableDebugLegacyPass::ID = 0; +INITIALIZE_PASS(StripNonLineTableDebugLegacyPass, + "strip-nonlinetable-debuginfo", + "Strip all debug info except linetables", false, false) + +ModulePass *llvm::createStripNonLineTableDebugLegacyPass() { + return new StripNonLineTableDebugLegacyPass(); +} + +PreservedAnalyses +StripNonLineTableDebugInfoPass::run(Module &M, ModuleAnalysisManager &AM) { + llvm::stripNonLineTableDebugInfo(M); + return PreservedAnalyses::all(); +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/SymbolRewriter.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/SymbolRewriter.cpp new file mode 100644 index 0000000000..4ad16d622e --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/SymbolRewriter.cpp @@ -0,0 +1,586 @@ +//===- SymbolRewriter.cpp - Symbol Rewriter -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// SymbolRewriter is a LLVM pass which can rewrite symbols transparently within +// existing code. It is implemented as a compiler pass and is configured via a +// YAML configuration file. +// +// The YAML configuration file format is as follows: +// +// RewriteMapFile := RewriteDescriptors +// RewriteDescriptors := RewriteDescriptor | RewriteDescriptors +// RewriteDescriptor := RewriteDescriptorType ':' '{' RewriteDescriptorFields '}' +// RewriteDescriptorFields := RewriteDescriptorField | RewriteDescriptorFields +// RewriteDescriptorField := FieldIdentifier ':' FieldValue ',' +// RewriteDescriptorType := Identifier +// FieldIdentifier := Identifier +// FieldValue := Identifier +// Identifier := [0-9a-zA-Z]+ +// +// Currently, the following descriptor types are supported: +// +// - function: (function rewriting) +// + Source (original name of the function) +// + Target (explicit transformation) +// + Transform (pattern transformation) +// + Naked (boolean, whether the function is undecorated) +// - global variable: (external linkage global variable rewriting) +// + Source (original name of externally visible variable) +// + Target (explicit transformation) +// + Transform (pattern transformation) +// - global alias: (global alias rewriting) +// + Source (original name of the aliased name) +// + Target (explicit transformation) +// + Transform (pattern transformation) +// +// Note that source and exactly one of [Target, Transform] must be provided +// +// New rewrite descriptors can be created. Addding a new rewrite descriptor +// involves: +// +// a) extended the rewrite descriptor kind enumeration +// (<anonymous>::RewriteDescriptor::RewriteDescriptorType) +// b) implementing the new descriptor +// (c.f. <anonymous>::ExplicitRewriteFunctionDescriptor) +// c) extending the rewrite map parser +// (<anonymous>::RewriteMapParser::parseEntry) +// +// Specify to rewrite the symbols using the `-rewrite-symbols` option, and +// specify the map file to use for the rewriting via the `-rewrite-map-file` +// option. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/SymbolRewriter.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/ilist.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/IR/Comdat.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalObject.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Value.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Regex.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/YAMLParser.h" +#include <memory> +#include <string> +#include <vector> + +using namespace llvm; +using namespace SymbolRewriter; + +#define DEBUG_TYPE "symbol-rewriter" + +static cl::list<std::string> RewriteMapFiles("rewrite-map-file", + cl::desc("Symbol Rewrite Map"), + cl::value_desc("filename"), + cl::Hidden); + +static void rewriteComdat(Module &M, GlobalObject *GO, + const std::string &Source, + const std::string &Target) { + if (Comdat *CD = GO->getComdat()) { + auto &Comdats = M.getComdatSymbolTable(); + + Comdat *C = M.getOrInsertComdat(Target); + C->setSelectionKind(CD->getSelectionKind()); + GO->setComdat(C); + + Comdats.erase(Comdats.find(Source)); + } +} + +namespace { + +template <RewriteDescriptor::Type DT, typename ValueType, + ValueType *(Module::*Get)(StringRef) const> +class ExplicitRewriteDescriptor : public RewriteDescriptor { +public: + const std::string Source; + const std::string Target; + + ExplicitRewriteDescriptor(StringRef S, StringRef T, const bool Naked) + : RewriteDescriptor(DT), + Source(std::string(Naked ? StringRef("\01" + S.str()) : S)), + Target(std::string(T)) {} + + bool performOnModule(Module &M) override; + + static bool classof(const RewriteDescriptor *RD) { + return RD->getType() == DT; + } +}; + +} // end anonymous namespace + +template <RewriteDescriptor::Type DT, typename ValueType, + ValueType *(Module::*Get)(StringRef) const> +bool ExplicitRewriteDescriptor<DT, ValueType, Get>::performOnModule(Module &M) { + bool Changed = false; + if (ValueType *S = (M.*Get)(Source)) { + if (GlobalObject *GO = dyn_cast<GlobalObject>(S)) + rewriteComdat(M, GO, Source, Target); + + if (Value *T = (M.*Get)(Target)) + S->setValueName(T->getValueName()); + else + S->setName(Target); + + Changed = true; + } + return Changed; +} + +namespace { + +template <RewriteDescriptor::Type DT, typename ValueType, + ValueType *(Module::*Get)(StringRef) const, + iterator_range<typename iplist<ValueType>::iterator> + (Module::*Iterator)()> +class PatternRewriteDescriptor : public RewriteDescriptor { +public: + const std::string Pattern; + const std::string Transform; + + PatternRewriteDescriptor(StringRef P, StringRef T) + : RewriteDescriptor(DT), Pattern(std::string(P)), + Transform(std::string(T)) {} + + bool performOnModule(Module &M) override; + + static bool classof(const RewriteDescriptor *RD) { + return RD->getType() == DT; + } +}; + +} // end anonymous namespace + +template <RewriteDescriptor::Type DT, typename ValueType, + ValueType *(Module::*Get)(StringRef) const, + iterator_range<typename iplist<ValueType>::iterator> + (Module::*Iterator)()> +bool PatternRewriteDescriptor<DT, ValueType, Get, Iterator>:: +performOnModule(Module &M) { + bool Changed = false; + for (auto &C : (M.*Iterator)()) { + std::string Error; + + std::string Name = Regex(Pattern).sub(Transform, C.getName(), &Error); + if (!Error.empty()) + report_fatal_error(Twine("unable to transforn ") + C.getName() + " in " + + M.getModuleIdentifier() + ": " + Error); + + if (C.getName() == Name) + continue; + + if (GlobalObject *GO = dyn_cast<GlobalObject>(&C)) + rewriteComdat(M, GO, std::string(C.getName()), Name); + + if (Value *V = (M.*Get)(Name)) + C.setValueName(V->getValueName()); + else + C.setName(Name); + + Changed = true; + } + return Changed; +} + +namespace { + +/// Represents a rewrite for an explicitly named (function) symbol. Both the +/// source function name and target function name of the transformation are +/// explicitly spelt out. +using ExplicitRewriteFunctionDescriptor = + ExplicitRewriteDescriptor<RewriteDescriptor::Type::Function, Function, + &Module::getFunction>; + +/// Represents a rewrite for an explicitly named (global variable) symbol. Both +/// the source variable name and target variable name are spelt out. This +/// applies only to module level variables. +using ExplicitRewriteGlobalVariableDescriptor = + ExplicitRewriteDescriptor<RewriteDescriptor::Type::GlobalVariable, + GlobalVariable, &Module::getGlobalVariable>; + +/// Represents a rewrite for an explicitly named global alias. Both the source +/// and target name are explicitly spelt out. +using ExplicitRewriteNamedAliasDescriptor = + ExplicitRewriteDescriptor<RewriteDescriptor::Type::NamedAlias, GlobalAlias, + &Module::getNamedAlias>; + +/// Represents a rewrite for a regular expression based pattern for functions. +/// A pattern for the function name is provided and a transformation for that +/// pattern to determine the target function name create the rewrite rule. +using PatternRewriteFunctionDescriptor = + PatternRewriteDescriptor<RewriteDescriptor::Type::Function, Function, + &Module::getFunction, &Module::functions>; + +/// Represents a rewrite for a global variable based upon a matching pattern. +/// Each global variable matching the provided pattern will be transformed as +/// described in the transformation pattern for the target. Applies only to +/// module level variables. +using PatternRewriteGlobalVariableDescriptor = + PatternRewriteDescriptor<RewriteDescriptor::Type::GlobalVariable, + GlobalVariable, &Module::getGlobalVariable, + &Module::globals>; + +/// PatternRewriteNamedAliasDescriptor - represents a rewrite for global +/// aliases which match a given pattern. The provided transformation will be +/// applied to each of the matching names. +using PatternRewriteNamedAliasDescriptor = + PatternRewriteDescriptor<RewriteDescriptor::Type::NamedAlias, GlobalAlias, + &Module::getNamedAlias, &Module::aliases>; + +} // end anonymous namespace + +bool RewriteMapParser::parse(const std::string &MapFile, + RewriteDescriptorList *DL) { + ErrorOr<std::unique_ptr<MemoryBuffer>> Mapping = + MemoryBuffer::getFile(MapFile); + + if (!Mapping) + report_fatal_error(Twine("unable to read rewrite map '") + MapFile + + "': " + Mapping.getError().message()); + + if (!parse(*Mapping, DL)) + report_fatal_error(Twine("unable to parse rewrite map '") + MapFile + "'"); + + return true; +} + +bool RewriteMapParser::parse(std::unique_ptr<MemoryBuffer> &MapFile, + RewriteDescriptorList *DL) { + SourceMgr SM; + yaml::Stream YS(MapFile->getBuffer(), SM); + + for (auto &Document : YS) { + yaml::MappingNode *DescriptorList; + + // ignore empty documents + if (isa<yaml::NullNode>(Document.getRoot())) + continue; + + DescriptorList = dyn_cast<yaml::MappingNode>(Document.getRoot()); + if (!DescriptorList) { + YS.printError(Document.getRoot(), "DescriptorList node must be a map"); + return false; + } + + for (auto &Descriptor : *DescriptorList) + if (!parseEntry(YS, Descriptor, DL)) + return false; + } + + return true; +} + +bool RewriteMapParser::parseEntry(yaml::Stream &YS, yaml::KeyValueNode &Entry, + RewriteDescriptorList *DL) { + yaml::ScalarNode *Key; + yaml::MappingNode *Value; + SmallString<32> KeyStorage; + StringRef RewriteType; + + Key = dyn_cast<yaml::ScalarNode>(Entry.getKey()); + if (!Key) { + YS.printError(Entry.getKey(), "rewrite type must be a scalar"); + return false; + } + + Value = dyn_cast<yaml::MappingNode>(Entry.getValue()); + if (!Value) { + YS.printError(Entry.getValue(), "rewrite descriptor must be a map"); + return false; + } + + RewriteType = Key->getValue(KeyStorage); + if (RewriteType.equals("function")) + return parseRewriteFunctionDescriptor(YS, Key, Value, DL); + else if (RewriteType.equals("global variable")) + return parseRewriteGlobalVariableDescriptor(YS, Key, Value, DL); + else if (RewriteType.equals("global alias")) + return parseRewriteGlobalAliasDescriptor(YS, Key, Value, DL); + + YS.printError(Entry.getKey(), "unknown rewrite type"); + return false; +} + +bool RewriteMapParser:: +parseRewriteFunctionDescriptor(yaml::Stream &YS, yaml::ScalarNode *K, + yaml::MappingNode *Descriptor, + RewriteDescriptorList *DL) { + bool Naked = false; + std::string Source; + std::string Target; + std::string Transform; + + for (auto &Field : *Descriptor) { + yaml::ScalarNode *Key; + yaml::ScalarNode *Value; + SmallString<32> KeyStorage; + SmallString<32> ValueStorage; + StringRef KeyValue; + + Key = dyn_cast<yaml::ScalarNode>(Field.getKey()); + if (!Key) { + YS.printError(Field.getKey(), "descriptor key must be a scalar"); + return false; + } + + Value = dyn_cast<yaml::ScalarNode>(Field.getValue()); + if (!Value) { + YS.printError(Field.getValue(), "descriptor value must be a scalar"); + return false; + } + + KeyValue = Key->getValue(KeyStorage); + if (KeyValue.equals("source")) { + std::string Error; + + Source = std::string(Value->getValue(ValueStorage)); + if (!Regex(Source).isValid(Error)) { + YS.printError(Field.getKey(), "invalid regex: " + Error); + return false; + } + } else if (KeyValue.equals("target")) { + Target = std::string(Value->getValue(ValueStorage)); + } else if (KeyValue.equals("transform")) { + Transform = std::string(Value->getValue(ValueStorage)); + } else if (KeyValue.equals("naked")) { + std::string Undecorated; + + Undecorated = std::string(Value->getValue(ValueStorage)); + Naked = StringRef(Undecorated).lower() == "true" || Undecorated == "1"; + } else { + YS.printError(Field.getKey(), "unknown key for function"); + return false; + } + } + + if (Transform.empty() == Target.empty()) { + YS.printError(Descriptor, + "exactly one of transform or target must be specified"); + return false; + } + + // TODO see if there is a more elegant solution to selecting the rewrite + // descriptor type + if (!Target.empty()) + DL->push_back(std::make_unique<ExplicitRewriteFunctionDescriptor>( + Source, Target, Naked)); + else + DL->push_back( + std::make_unique<PatternRewriteFunctionDescriptor>(Source, Transform)); + + return true; +} + +bool RewriteMapParser:: +parseRewriteGlobalVariableDescriptor(yaml::Stream &YS, yaml::ScalarNode *K, + yaml::MappingNode *Descriptor, + RewriteDescriptorList *DL) { + std::string Source; + std::string Target; + std::string Transform; + + for (auto &Field : *Descriptor) { + yaml::ScalarNode *Key; + yaml::ScalarNode *Value; + SmallString<32> KeyStorage; + SmallString<32> ValueStorage; + StringRef KeyValue; + + Key = dyn_cast<yaml::ScalarNode>(Field.getKey()); + if (!Key) { + YS.printError(Field.getKey(), "descriptor Key must be a scalar"); + return false; + } + + Value = dyn_cast<yaml::ScalarNode>(Field.getValue()); + if (!Value) { + YS.printError(Field.getValue(), "descriptor value must be a scalar"); + return false; + } + + KeyValue = Key->getValue(KeyStorage); + if (KeyValue.equals("source")) { + std::string Error; + + Source = std::string(Value->getValue(ValueStorage)); + if (!Regex(Source).isValid(Error)) { + YS.printError(Field.getKey(), "invalid regex: " + Error); + return false; + } + } else if (KeyValue.equals("target")) { + Target = std::string(Value->getValue(ValueStorage)); + } else if (KeyValue.equals("transform")) { + Transform = std::string(Value->getValue(ValueStorage)); + } else { + YS.printError(Field.getKey(), "unknown Key for Global Variable"); + return false; + } + } + + if (Transform.empty() == Target.empty()) { + YS.printError(Descriptor, + "exactly one of transform or target must be specified"); + return false; + } + + if (!Target.empty()) + DL->push_back(std::make_unique<ExplicitRewriteGlobalVariableDescriptor>( + Source, Target, + /*Naked*/ false)); + else + DL->push_back(std::make_unique<PatternRewriteGlobalVariableDescriptor>( + Source, Transform)); + + return true; +} + +bool RewriteMapParser:: +parseRewriteGlobalAliasDescriptor(yaml::Stream &YS, yaml::ScalarNode *K, + yaml::MappingNode *Descriptor, + RewriteDescriptorList *DL) { + std::string Source; + std::string Target; + std::string Transform; + + for (auto &Field : *Descriptor) { + yaml::ScalarNode *Key; + yaml::ScalarNode *Value; + SmallString<32> KeyStorage; + SmallString<32> ValueStorage; + StringRef KeyValue; + + Key = dyn_cast<yaml::ScalarNode>(Field.getKey()); + if (!Key) { + YS.printError(Field.getKey(), "descriptor key must be a scalar"); + return false; + } + + Value = dyn_cast<yaml::ScalarNode>(Field.getValue()); + if (!Value) { + YS.printError(Field.getValue(), "descriptor value must be a scalar"); + return false; + } + + KeyValue = Key->getValue(KeyStorage); + if (KeyValue.equals("source")) { + std::string Error; + + Source = std::string(Value->getValue(ValueStorage)); + if (!Regex(Source).isValid(Error)) { + YS.printError(Field.getKey(), "invalid regex: " + Error); + return false; + } + } else if (KeyValue.equals("target")) { + Target = std::string(Value->getValue(ValueStorage)); + } else if (KeyValue.equals("transform")) { + Transform = std::string(Value->getValue(ValueStorage)); + } else { + YS.printError(Field.getKey(), "unknown key for Global Alias"); + return false; + } + } + + if (Transform.empty() == Target.empty()) { + YS.printError(Descriptor, + "exactly one of transform or target must be specified"); + return false; + } + + if (!Target.empty()) + DL->push_back(std::make_unique<ExplicitRewriteNamedAliasDescriptor>( + Source, Target, + /*Naked*/ false)); + else + DL->push_back(std::make_unique<PatternRewriteNamedAliasDescriptor>( + Source, Transform)); + + return true; +} + +namespace { + +class RewriteSymbolsLegacyPass : public ModulePass { +public: + static char ID; // Pass identification, replacement for typeid + + RewriteSymbolsLegacyPass(); + RewriteSymbolsLegacyPass(SymbolRewriter::RewriteDescriptorList &DL); + + bool runOnModule(Module &M) override; + +private: + RewriteSymbolPass Impl; +}; + +} // end anonymous namespace + +char RewriteSymbolsLegacyPass::ID = 0; + +RewriteSymbolsLegacyPass::RewriteSymbolsLegacyPass() : ModulePass(ID) { + initializeRewriteSymbolsLegacyPassPass(*PassRegistry::getPassRegistry()); +} + +RewriteSymbolsLegacyPass::RewriteSymbolsLegacyPass( + SymbolRewriter::RewriteDescriptorList &DL) + : ModulePass(ID), Impl(DL) {} + +bool RewriteSymbolsLegacyPass::runOnModule(Module &M) { + return Impl.runImpl(M); +} + +PreservedAnalyses RewriteSymbolPass::run(Module &M, ModuleAnalysisManager &AM) { + if (!runImpl(M)) + return PreservedAnalyses::all(); + + return PreservedAnalyses::none(); +} + +bool RewriteSymbolPass::runImpl(Module &M) { + bool Changed; + + Changed = false; + for (auto &Descriptor : Descriptors) + Changed |= Descriptor->performOnModule(M); + + return Changed; +} + +void RewriteSymbolPass::loadAndParseMapFiles() { + const std::vector<std::string> MapFiles(RewriteMapFiles); + SymbolRewriter::RewriteMapParser Parser; + + for (const auto &MapFile : MapFiles) + Parser.parse(MapFile, &Descriptors); +} + +INITIALIZE_PASS(RewriteSymbolsLegacyPass, "rewrite-symbols", "Rewrite Symbols", + false, false) + +ModulePass *llvm::createRewriteSymbolsPass() { + return new RewriteSymbolsLegacyPass(); +} + +ModulePass * +llvm::createRewriteSymbolsPass(SymbolRewriter::RewriteDescriptorList &DL) { + return new RewriteSymbolsLegacyPass(DL); +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp new file mode 100644 index 0000000000..2b706858cb --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp @@ -0,0 +1,129 @@ +//===- UnifyFunctionExitNodes.cpp - Make all functions have a single exit -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass is used to ensure that functions have at most one return and one +// unreachable instruction in them. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Type.h" +#include "llvm/InitializePasses.h" +#include "llvm/Transforms/Utils.h" +using namespace llvm; + +char UnifyFunctionExitNodesLegacyPass::ID = 0; + +UnifyFunctionExitNodesLegacyPass::UnifyFunctionExitNodesLegacyPass() + : FunctionPass(ID) { + initializeUnifyFunctionExitNodesLegacyPassPass( + *PassRegistry::getPassRegistry()); +} + +INITIALIZE_PASS(UnifyFunctionExitNodesLegacyPass, "mergereturn", + "Unify function exit nodes", false, false) + +Pass *llvm::createUnifyFunctionExitNodesPass() { + return new UnifyFunctionExitNodesLegacyPass(); +} + +void UnifyFunctionExitNodesLegacyPass::getAnalysisUsage( + AnalysisUsage &AU) const { + // We preserve the non-critical-edgeness property + AU.addPreservedID(BreakCriticalEdgesID); + // This is a cluster of orthogonal Transforms + AU.addPreservedID(LowerSwitchID); +} + +namespace { + +bool unifyUnreachableBlocks(Function &F) { + std::vector<BasicBlock *> UnreachableBlocks; + + for (BasicBlock &I : F) + if (isa<UnreachableInst>(I.getTerminator())) + UnreachableBlocks.push_back(&I); + + if (UnreachableBlocks.size() <= 1) + return false; + + BasicBlock *UnreachableBlock = + BasicBlock::Create(F.getContext(), "UnifiedUnreachableBlock", &F); + new UnreachableInst(F.getContext(), UnreachableBlock); + + for (BasicBlock *BB : UnreachableBlocks) { + BB->back().eraseFromParent(); // Remove the unreachable inst. + BranchInst::Create(UnreachableBlock, BB); + } + + return true; +} + +bool unifyReturnBlocks(Function &F) { + std::vector<BasicBlock *> ReturningBlocks; + + for (BasicBlock &I : F) + if (isa<ReturnInst>(I.getTerminator())) + ReturningBlocks.push_back(&I); + + if (ReturningBlocks.size() <= 1) + return false; + + // Insert a new basic block into the function, add PHI nodes (if the function + // returns values), and convert all of the return instructions into + // unconditional branches. + BasicBlock *NewRetBlock = BasicBlock::Create(F.getContext(), + "UnifiedReturnBlock", &F); + + PHINode *PN = nullptr; + if (F.getReturnType()->isVoidTy()) { + ReturnInst::Create(F.getContext(), nullptr, NewRetBlock); + } else { + // If the function doesn't return void... add a PHI node to the block... + PN = PHINode::Create(F.getReturnType(), ReturningBlocks.size(), + "UnifiedRetVal"); + PN->insertInto(NewRetBlock, NewRetBlock->end()); + ReturnInst::Create(F.getContext(), PN, NewRetBlock); + } + + // Loop over all of the blocks, replacing the return instruction with an + // unconditional branch. + for (BasicBlock *BB : ReturningBlocks) { + // Add an incoming element to the PHI node for every return instruction that + // is merging into this new block... + if (PN) + PN->addIncoming(BB->getTerminator()->getOperand(0), BB); + + BB->back().eraseFromParent(); // Remove the return insn + BranchInst::Create(NewRetBlock, BB); + } + + return true; +} +} // namespace + +// Unify all exit nodes of the CFG by creating a new BasicBlock, and converting +// all returns to unconditional branches to this new basic block. Also, unify +// all unreachable blocks. +bool UnifyFunctionExitNodesLegacyPass::runOnFunction(Function &F) { + bool Changed = false; + Changed |= unifyUnreachableBlocks(F); + Changed |= unifyReturnBlocks(F); + return Changed; +} + +PreservedAnalyses UnifyFunctionExitNodesPass::run(Function &F, + FunctionAnalysisManager &AM) { + bool Changed = false; + Changed |= unifyUnreachableBlocks(F); + Changed |= unifyReturnBlocks(F); + return Changed ? PreservedAnalyses() : PreservedAnalyses::all(); +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/UnifyLoopExits.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/UnifyLoopExits.cpp new file mode 100644 index 0000000000..3be96ebc93 --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/UnifyLoopExits.cpp @@ -0,0 +1,254 @@ +//===- UnifyLoopExits.cpp - Redirect exiting edges to one block -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// For each natural loop with multiple exit blocks, this pass creates a new +// block N such that all exiting blocks now branch to N, and then control flow +// is redistributed to all the original exit blocks. +// +// Limitation: This assumes that all terminators in the CFG are direct branches +// (the "br" instruction). The presence of any other control flow +// such as indirectbr, switch or callbr will cause an assert. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/UnifyLoopExits.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/Analysis/DomTreeUpdater.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Dominators.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/Utils.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" + +#define DEBUG_TYPE "unify-loop-exits" + +using namespace llvm; + +static cl::opt<unsigned> MaxBooleansInControlFlowHub( + "max-booleans-in-control-flow-hub", cl::init(32), cl::Hidden, + cl::desc("Set the maximum number of outgoing blocks for using a boolean " + "value to record the exiting block in CreateControlFlowHub.")); + +namespace { +struct UnifyLoopExitsLegacyPass : public FunctionPass { + static char ID; + UnifyLoopExitsLegacyPass() : FunctionPass(ID) { + initializeUnifyLoopExitsLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequiredID(LowerSwitchID); + AU.addRequired<LoopInfoWrapperPass>(); + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addPreservedID(LowerSwitchID); + AU.addPreserved<LoopInfoWrapperPass>(); + AU.addPreserved<DominatorTreeWrapperPass>(); + } + + bool runOnFunction(Function &F) override; +}; +} // namespace + +char UnifyLoopExitsLegacyPass::ID = 0; + +FunctionPass *llvm::createUnifyLoopExitsPass() { + return new UnifyLoopExitsLegacyPass(); +} + +INITIALIZE_PASS_BEGIN(UnifyLoopExitsLegacyPass, "unify-loop-exits", + "Fixup each natural loop to have a single exit block", + false /* Only looks at CFG */, false /* Analysis Pass */) +INITIALIZE_PASS_DEPENDENCY(LowerSwitchLegacyPass) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_END(UnifyLoopExitsLegacyPass, "unify-loop-exits", + "Fixup each natural loop to have a single exit block", + false /* Only looks at CFG */, false /* Analysis Pass */) + +// The current transform introduces new control flow paths which may break the +// SSA requirement that every def must dominate all its uses. For example, +// consider a value D defined inside the loop that is used by some instruction +// U outside the loop. It follows that D dominates U, since the original +// program has valid SSA form. After merging the exits, all paths from D to U +// now flow through the unified exit block. In addition, there may be other +// paths that do not pass through D, but now reach the unified exit +// block. Thus, D no longer dominates U. +// +// Restore the dominance by creating a phi for each such D at the new unified +// loop exit. But when doing this, ignore any uses U that are in the new unified +// loop exit, since those were introduced specially when the block was created. +// +// The use of SSAUpdater seems like overkill for this operation. The location +// for creating the new PHI is well-known, and also the set of incoming blocks +// to the new PHI. +static void restoreSSA(const DominatorTree &DT, const Loop *L, + const SetVector<BasicBlock *> &Incoming, + BasicBlock *LoopExitBlock) { + using InstVector = SmallVector<Instruction *, 8>; + using IIMap = MapVector<Instruction *, InstVector>; + IIMap ExternalUsers; + for (auto *BB : L->blocks()) { + for (auto &I : *BB) { + for (auto &U : I.uses()) { + auto UserInst = cast<Instruction>(U.getUser()); + auto UserBlock = UserInst->getParent(); + if (UserBlock == LoopExitBlock) + continue; + if (L->contains(UserBlock)) + continue; + LLVM_DEBUG(dbgs() << "added ext use for " << I.getName() << "(" + << BB->getName() << ")" + << ": " << UserInst->getName() << "(" + << UserBlock->getName() << ")" + << "\n"); + ExternalUsers[&I].push_back(UserInst); + } + } + } + + for (auto II : ExternalUsers) { + // For each Def used outside the loop, create NewPhi in + // LoopExitBlock. NewPhi receives Def only along exiting blocks that + // dominate it, while the remaining values are undefined since those paths + // didn't exist in the original CFG. + auto Def = II.first; + LLVM_DEBUG(dbgs() << "externally used: " << Def->getName() << "\n"); + auto NewPhi = + PHINode::Create(Def->getType(), Incoming.size(), + Def->getName() + ".moved", &LoopExitBlock->front()); + for (auto *In : Incoming) { + LLVM_DEBUG(dbgs() << "predecessor " << In->getName() << ": "); + if (Def->getParent() == In || DT.dominates(Def, In)) { + LLVM_DEBUG(dbgs() << "dominated\n"); + NewPhi->addIncoming(Def, In); + } else { + LLVM_DEBUG(dbgs() << "not dominated\n"); + NewPhi->addIncoming(UndefValue::get(Def->getType()), In); + } + } + + LLVM_DEBUG(dbgs() << "external users:"); + for (auto *U : II.second) { + LLVM_DEBUG(dbgs() << " " << U->getName()); + U->replaceUsesOfWith(Def, NewPhi); + } + LLVM_DEBUG(dbgs() << "\n"); + } +} + +static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) { + // To unify the loop exits, we need a list of the exiting blocks as + // well as exit blocks. The functions for locating these lists both + // traverse the entire loop body. It is more efficient to first + // locate the exiting blocks and then examine their successors to + // locate the exit blocks. + SetVector<BasicBlock *> ExitingBlocks; + SetVector<BasicBlock *> Exits; + + // We need SetVectors, but the Loop API takes a vector, so we use a temporary. + SmallVector<BasicBlock *, 8> Temp; + L->getExitingBlocks(Temp); + for (auto *BB : Temp) { + ExitingBlocks.insert(BB); + for (auto *S : successors(BB)) { + auto SL = LI.getLoopFor(S); + // A successor is not an exit if it is directly or indirectly in the + // current loop. + if (SL == L || L->contains(SL)) + continue; + Exits.insert(S); + } + } + + LLVM_DEBUG( + dbgs() << "Found exit blocks:"; + for (auto Exit : Exits) { + dbgs() << " " << Exit->getName(); + } + dbgs() << "\n"; + + dbgs() << "Found exiting blocks:"; + for (auto EB : ExitingBlocks) { + dbgs() << " " << EB->getName(); + } + dbgs() << "\n";); + + if (Exits.size() <= 1) { + LLVM_DEBUG(dbgs() << "loop does not have multiple exits; nothing to do\n"); + return false; + } + + SmallVector<BasicBlock *, 8> GuardBlocks; + DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); + auto LoopExitBlock = + CreateControlFlowHub(&DTU, GuardBlocks, ExitingBlocks, Exits, "loop.exit", + MaxBooleansInControlFlowHub.getValue()); + + restoreSSA(DT, L, ExitingBlocks, LoopExitBlock); + +#if defined(EXPENSIVE_CHECKS) + assert(DT.verify(DominatorTree::VerificationLevel::Full)); +#else + assert(DT.verify(DominatorTree::VerificationLevel::Fast)); +#endif // EXPENSIVE_CHECKS + L->verifyLoop(); + + // The guard blocks were created outside the loop, so they need to become + // members of the parent loop. + if (auto ParentLoop = L->getParentLoop()) { + for (auto *G : GuardBlocks) { + ParentLoop->addBasicBlockToLoop(G, LI); + } + ParentLoop->verifyLoop(); + } + +#if defined(EXPENSIVE_CHECKS) + LI.verify(DT); +#endif // EXPENSIVE_CHECKS + + return true; +} + +static bool runImpl(LoopInfo &LI, DominatorTree &DT) { + + bool Changed = false; + auto Loops = LI.getLoopsInPreorder(); + for (auto *L : Loops) { + LLVM_DEBUG(dbgs() << "Loop: " << L->getHeader()->getName() << " (depth: " + << LI.getLoopDepth(L->getHeader()) << ")\n"); + Changed |= unifyLoopExits(DT, LI, L); + } + return Changed; +} + +bool UnifyLoopExitsLegacyPass::runOnFunction(Function &F) { + LLVM_DEBUG(dbgs() << "===== Unifying loop exits in function " << F.getName() + << "\n"); + auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); + auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + + return runImpl(LI, DT); +} + +namespace llvm { + +PreservedAnalyses UnifyLoopExitsPass::run(Function &F, + FunctionAnalysisManager &AM) { + auto &LI = AM.getResult<LoopAnalysis>(F); + auto &DT = AM.getResult<DominatorTreeAnalysis>(F); + + if (!runImpl(LI, DT)) + return PreservedAnalyses::all(); + PreservedAnalyses PA; + PA.preserve<LoopAnalysis>(); + PA.preserve<DominatorTreeAnalysis>(); + return PA; +} +} // namespace llvm diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/Utils.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/Utils.cpp new file mode 100644 index 0000000000..d002922cfd --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/Utils.cpp @@ -0,0 +1,65 @@ +//===-- Utils.cpp - TransformUtils Infrastructure -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the common initialization infrastructure for the +// TransformUtils library. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils.h" +#include "llvm-c/Initialization.h" +#include "llvm-c/Transforms/Utils.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/PassRegistry.h" + +using namespace llvm; + +/// initializeTransformUtils - Initialize all passes in the TransformUtils +/// library. +void llvm::initializeTransformUtils(PassRegistry &Registry) { + initializeAddDiscriminatorsLegacyPassPass(Registry); + initializeAssumeSimplifyPassLegacyPassPass(Registry); + initializeAssumeBuilderPassLegacyPassPass(Registry); + initializeBreakCriticalEdgesPass(Registry); + initializeCanonicalizeFreezeInLoopsPass(Registry); + initializeInstNamerPass(Registry); + initializeLCSSAWrapperPassPass(Registry); + initializeLibCallsShrinkWrapLegacyPassPass(Registry); + initializeLoopSimplifyPass(Registry); + initializeLowerGlobalDtorsLegacyPassPass(Registry); + initializeLowerInvokeLegacyPassPass(Registry); + initializeLowerSwitchLegacyPassPass(Registry); + initializePromoteLegacyPassPass(Registry); + initializeStripNonLineTableDebugLegacyPassPass(Registry); + initializeUnifyFunctionExitNodesLegacyPassPass(Registry); + initializeMetaRenamerPass(Registry); + initializeStripGCRelocatesLegacyPass(Registry); + initializePredicateInfoPrinterLegacyPassPass(Registry); + initializeInjectTLIMappingsLegacyPass(Registry); + initializeFixIrreduciblePass(Registry); + initializeUnifyLoopExitsLegacyPassPass(Registry); +} + +/// LLVMInitializeTransformUtils - C binding for initializeTransformUtilsPasses. +void LLVMInitializeTransformUtils(LLVMPassRegistryRef R) { + initializeTransformUtils(*unwrap(R)); +} + +void LLVMAddLowerSwitchPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createLowerSwitchPass()); +} + +void LLVMAddPromoteMemoryToRegisterPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createPromoteMemoryToRegisterPass()); +} + +void LLVMAddAddDiscriminatorsPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createAddDiscriminatorsPass()); +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/VNCoercion.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/VNCoercion.cpp new file mode 100644 index 0000000000..f295a7e312 --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/VNCoercion.cpp @@ -0,0 +1,593 @@ +#include "llvm/Transforms/Utils/VNCoercion.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "vncoerce" + +namespace llvm { +namespace VNCoercion { + +static bool isFirstClassAggregateOrScalableType(Type *Ty) { + return Ty->isStructTy() || Ty->isArrayTy() || isa<ScalableVectorType>(Ty); +} + +/// Return true if coerceAvailableValueToLoadType will succeed. +bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy, + const DataLayout &DL) { + Type *StoredTy = StoredVal->getType(); + + if (StoredTy == LoadTy) + return true; + + // If the loaded/stored value is a first class array/struct, or scalable type, + // don't try to transform them. We need to be able to bitcast to integer. + if (isFirstClassAggregateOrScalableType(LoadTy) || + isFirstClassAggregateOrScalableType(StoredTy)) + return false; + + uint64_t StoreSize = DL.getTypeSizeInBits(StoredTy).getFixedValue(); + + // The store size must be byte-aligned to support future type casts. + if (llvm::alignTo(StoreSize, 8) != StoreSize) + return false; + + // The store has to be at least as big as the load. + if (StoreSize < DL.getTypeSizeInBits(LoadTy).getFixedValue()) + return false; + + bool StoredNI = DL.isNonIntegralPointerType(StoredTy->getScalarType()); + bool LoadNI = DL.isNonIntegralPointerType(LoadTy->getScalarType()); + // Don't coerce non-integral pointers to integers or vice versa. + if (StoredNI != LoadNI) { + // As a special case, allow coercion of memset used to initialize + // an array w/null. Despite non-integral pointers not generally having a + // specific bit pattern, we do assume null is zero. + if (auto *CI = dyn_cast<Constant>(StoredVal)) + return CI->isNullValue(); + return false; + } else if (StoredNI && LoadNI && + StoredTy->getPointerAddressSpace() != + LoadTy->getPointerAddressSpace()) { + return false; + } + + + // The implementation below uses inttoptr for vectors of unequal size; we + // can't allow this for non integral pointers. We could teach it to extract + // exact subvectors if desired. + if (StoredNI && StoreSize != DL.getTypeSizeInBits(LoadTy).getFixedValue()) + return false; + + if (StoredTy->isTargetExtTy() || LoadTy->isTargetExtTy()) + return false; + + return true; +} + +/// If we saw a store of a value to memory, and +/// then a load from a must-aliased pointer of a different type, try to coerce +/// the stored value. LoadedTy is the type of the load we want to replace. +/// IRB is IRBuilder used to insert new instructions. +/// +/// If we can't do it, return null. +Value *coerceAvailableValueToLoadType(Value *StoredVal, Type *LoadedTy, + IRBuilderBase &Helper, + const DataLayout &DL) { + assert(canCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, DL) && + "precondition violation - materialization can't fail"); + if (auto *C = dyn_cast<Constant>(StoredVal)) + StoredVal = ConstantFoldConstant(C, DL); + + // If this is already the right type, just return it. + Type *StoredValTy = StoredVal->getType(); + + uint64_t StoredValSize = DL.getTypeSizeInBits(StoredValTy).getFixedValue(); + uint64_t LoadedValSize = DL.getTypeSizeInBits(LoadedTy).getFixedValue(); + + // If the store and reload are the same size, we can always reuse it. + if (StoredValSize == LoadedValSize) { + // Pointer to Pointer -> use bitcast. + if (StoredValTy->isPtrOrPtrVectorTy() && LoadedTy->isPtrOrPtrVectorTy()) { + StoredVal = Helper.CreateBitCast(StoredVal, LoadedTy); + } else { + // Convert source pointers to integers, which can be bitcast. + if (StoredValTy->isPtrOrPtrVectorTy()) { + StoredValTy = DL.getIntPtrType(StoredValTy); + StoredVal = Helper.CreatePtrToInt(StoredVal, StoredValTy); + } + + Type *TypeToCastTo = LoadedTy; + if (TypeToCastTo->isPtrOrPtrVectorTy()) + TypeToCastTo = DL.getIntPtrType(TypeToCastTo); + + if (StoredValTy != TypeToCastTo) + StoredVal = Helper.CreateBitCast(StoredVal, TypeToCastTo); + + // Cast to pointer if the load needs a pointer type. + if (LoadedTy->isPtrOrPtrVectorTy()) + StoredVal = Helper.CreateIntToPtr(StoredVal, LoadedTy); + } + + if (auto *C = dyn_cast<ConstantExpr>(StoredVal)) + StoredVal = ConstantFoldConstant(C, DL); + + return StoredVal; + } + // If the loaded value is smaller than the available value, then we can + // extract out a piece from it. If the available value is too small, then we + // can't do anything. + assert(StoredValSize >= LoadedValSize && + "canCoerceMustAliasedValueToLoad fail"); + + // Convert source pointers to integers, which can be manipulated. + if (StoredValTy->isPtrOrPtrVectorTy()) { + StoredValTy = DL.getIntPtrType(StoredValTy); + StoredVal = Helper.CreatePtrToInt(StoredVal, StoredValTy); + } + + // Convert vectors and fp to integer, which can be manipulated. + if (!StoredValTy->isIntegerTy()) { + StoredValTy = IntegerType::get(StoredValTy->getContext(), StoredValSize); + StoredVal = Helper.CreateBitCast(StoredVal, StoredValTy); + } + + // If this is a big-endian system, we need to shift the value down to the low + // bits so that a truncate will work. + if (DL.isBigEndian()) { + uint64_t ShiftAmt = DL.getTypeStoreSizeInBits(StoredValTy).getFixedValue() - + DL.getTypeStoreSizeInBits(LoadedTy).getFixedValue(); + StoredVal = Helper.CreateLShr( + StoredVal, ConstantInt::get(StoredVal->getType(), ShiftAmt)); + } + + // Truncate the integer to the right size now. + Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadedValSize); + StoredVal = Helper.CreateTruncOrBitCast(StoredVal, NewIntTy); + + if (LoadedTy != NewIntTy) { + // If the result is a pointer, inttoptr. + if (LoadedTy->isPtrOrPtrVectorTy()) + StoredVal = Helper.CreateIntToPtr(StoredVal, LoadedTy); + else + // Otherwise, bitcast. + StoredVal = Helper.CreateBitCast(StoredVal, LoadedTy); + } + + if (auto *C = dyn_cast<Constant>(StoredVal)) + StoredVal = ConstantFoldConstant(C, DL); + + return StoredVal; +} + +/// This function is called when we have a memdep query of a load that ends up +/// being a clobbering memory write (store, memset, memcpy, memmove). This +/// means that the write *may* provide bits used by the load but we can't be +/// sure because the pointers don't must-alias. +/// +/// Check this case to see if there is anything more we can do before we give +/// up. This returns -1 if we have to give up, or a byte number in the stored +/// value of the piece that feeds the load. +static int analyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr, + Value *WritePtr, + uint64_t WriteSizeInBits, + const DataLayout &DL) { + // If the loaded/stored value is a first class array/struct, or scalable type, + // don't try to transform them. We need to be able to bitcast to integer. + if (isFirstClassAggregateOrScalableType(LoadTy)) + return -1; + + int64_t StoreOffset = 0, LoadOffset = 0; + Value *StoreBase = + GetPointerBaseWithConstantOffset(WritePtr, StoreOffset, DL); + Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, DL); + if (StoreBase != LoadBase) + return -1; + + uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy).getFixedValue(); + + if ((WriteSizeInBits & 7) | (LoadSize & 7)) + return -1; + uint64_t StoreSize = WriteSizeInBits / 8; // Convert to bytes. + LoadSize /= 8; + + // If the Load isn't completely contained within the stored bits, we don't + // have all the bits to feed it. We could do something crazy in the future + // (issue a smaller load then merge the bits in) but this seems unlikely to be + // valuable. + if (StoreOffset > LoadOffset || + StoreOffset + int64_t(StoreSize) < LoadOffset + int64_t(LoadSize)) + return -1; + + // Okay, we can do this transformation. Return the number of bytes into the + // store that the load is. + return LoadOffset - StoreOffset; +} + +/// This function is called when we have a +/// memdep query of a load that ends up being a clobbering store. +int analyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr, + StoreInst *DepSI, const DataLayout &DL) { + auto *StoredVal = DepSI->getValueOperand(); + + // Cannot handle reading from store of first-class aggregate or scalable type. + if (isFirstClassAggregateOrScalableType(StoredVal->getType())) + return -1; + + if (!canCoerceMustAliasedValueToLoad(StoredVal, LoadTy, DL)) + return -1; + + Value *StorePtr = DepSI->getPointerOperand(); + uint64_t StoreSize = + DL.getTypeSizeInBits(DepSI->getValueOperand()->getType()).getFixedValue(); + return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, StorePtr, StoreSize, + DL); +} + +/// Looks at a memory location for a load (specified by MemLocBase, Offs, and +/// Size) and compares it against a load. +/// +/// If the specified load could be safely widened to a larger integer load +/// that is 1) still efficient, 2) safe for the target, and 3) would provide +/// the specified memory location value, then this function returns the size +/// in bytes of the load width to use. If not, this returns zero. +static unsigned getLoadLoadClobberFullWidthSize(const Value *MemLocBase, + int64_t MemLocOffs, + unsigned MemLocSize, + const LoadInst *LI) { + // We can only extend simple integer loads. + if (!isa<IntegerType>(LI->getType()) || !LI->isSimple()) + return 0; + + // Load widening is hostile to ThreadSanitizer: it may cause false positives + // or make the reports more cryptic (access sizes are wrong). + if (LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeThread)) + return 0; + + const DataLayout &DL = LI->getModule()->getDataLayout(); + + // Get the base of this load. + int64_t LIOffs = 0; + const Value *LIBase = + GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, DL); + + // If the two pointers are not based on the same pointer, we can't tell that + // they are related. + if (LIBase != MemLocBase) + return 0; + + // Okay, the two values are based on the same pointer, but returned as + // no-alias. This happens when we have things like two byte loads at "P+1" + // and "P+3". Check to see if increasing the size of the "LI" load up to its + // alignment (or the largest native integer type) will allow us to load all + // the bits required by MemLoc. + + // If MemLoc is before LI, then no widening of LI will help us out. + if (MemLocOffs < LIOffs) + return 0; + + // Get the alignment of the load in bytes. We assume that it is safe to load + // any legal integer up to this size without a problem. For example, if we're + // looking at an i8 load on x86-32 that is known 1024 byte aligned, we can + // widen it up to an i32 load. If it is known 2-byte aligned, we can widen it + // to i16. + unsigned LoadAlign = LI->getAlign().value(); + + int64_t MemLocEnd = MemLocOffs + MemLocSize; + + // If no amount of rounding up will let MemLoc fit into LI, then bail out. + if (LIOffs + LoadAlign < MemLocEnd) + return 0; + + // This is the size of the load to try. Start with the next larger power of + // two. + unsigned NewLoadByteSize = LI->getType()->getPrimitiveSizeInBits() / 8U; + NewLoadByteSize = NextPowerOf2(NewLoadByteSize); + + while (true) { + // If this load size is bigger than our known alignment or would not fit + // into a native integer register, then we fail. + if (NewLoadByteSize > LoadAlign || + !DL.fitsInLegalInteger(NewLoadByteSize * 8)) + return 0; + + if (LIOffs + NewLoadByteSize > MemLocEnd && + (LI->getParent()->getParent()->hasFnAttribute( + Attribute::SanitizeAddress) || + LI->getParent()->getParent()->hasFnAttribute( + Attribute::SanitizeHWAddress))) + // We will be reading past the location accessed by the original program. + // While this is safe in a regular build, Address Safety analysis tools + // may start reporting false warnings. So, don't do widening. + return 0; + + // If a load of this width would include all of MemLoc, then we succeed. + if (LIOffs + NewLoadByteSize >= MemLocEnd) + return NewLoadByteSize; + + NewLoadByteSize <<= 1; + } +} + +/// This function is called when we have a +/// memdep query of a load that ends up being clobbered by another load. See if +/// the other load can feed into the second load. +int analyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr, LoadInst *DepLI, + const DataLayout &DL) { + // Cannot handle reading from store of first-class aggregate yet. + if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy()) + return -1; + + if (!canCoerceMustAliasedValueToLoad(DepLI, LoadTy, DL)) + return -1; + + Value *DepPtr = DepLI->getPointerOperand(); + uint64_t DepSize = DL.getTypeSizeInBits(DepLI->getType()).getFixedValue(); + int R = analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, DL); + if (R != -1) + return R; + + // If we have a load/load clobber an DepLI can be widened to cover this load, + // then we should widen it! + int64_t LoadOffs = 0; + const Value *LoadBase = + GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, DL); + unsigned LoadSize = DL.getTypeStoreSize(LoadTy).getFixedValue(); + + unsigned Size = + getLoadLoadClobberFullWidthSize(LoadBase, LoadOffs, LoadSize, DepLI); + if (Size == 0) + return -1; + + // Check non-obvious conditions enforced by MDA which we rely on for being + // able to materialize this potentially available value + assert(DepLI->isSimple() && "Cannot widen volatile/atomic load!"); + assert(DepLI->getType()->isIntegerTy() && "Can't widen non-integer load"); + + return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, Size * 8, DL); +} + +int analyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr, + MemIntrinsic *MI, const DataLayout &DL) { + // If the mem operation is a non-constant size, we can't handle it. + ConstantInt *SizeCst = dyn_cast<ConstantInt>(MI->getLength()); + if (!SizeCst) + return -1; + uint64_t MemSizeInBits = SizeCst->getZExtValue() * 8; + + // If this is memset, we just need to see if the offset is valid in the size + // of the memset.. + if (const auto *memset_inst = dyn_cast<MemSetInst>(MI)) { + if (DL.isNonIntegralPointerType(LoadTy->getScalarType())) { + auto *CI = dyn_cast<ConstantInt>(memset_inst->getValue()); + if (!CI || !CI->isZero()) + return -1; + } + return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(), + MemSizeInBits, DL); + } + + // If we have a memcpy/memmove, the only case we can handle is if this is a + // copy from constant memory. In that case, we can read directly from the + // constant memory. + MemTransferInst *MTI = cast<MemTransferInst>(MI); + + Constant *Src = dyn_cast<Constant>(MTI->getSource()); + if (!Src) + return -1; + + GlobalVariable *GV = dyn_cast<GlobalVariable>(getUnderlyingObject(Src)); + if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer()) + return -1; + + // See if the access is within the bounds of the transfer. + int Offset = analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(), + MemSizeInBits, DL); + if (Offset == -1) + return Offset; + + // Otherwise, see if we can constant fold a load from the constant with the + // offset applied as appropriate. + unsigned IndexSize = DL.getIndexTypeSizeInBits(Src->getType()); + if (ConstantFoldLoadFromConstPtr(Src, LoadTy, APInt(IndexSize, Offset), DL)) + return Offset; + return -1; +} + +static Value *getStoreValueForLoadHelper(Value *SrcVal, unsigned Offset, + Type *LoadTy, IRBuilderBase &Builder, + const DataLayout &DL) { + LLVMContext &Ctx = SrcVal->getType()->getContext(); + + // If two pointers are in the same address space, they have the same size, + // so we don't need to do any truncation, etc. This avoids introducing + // ptrtoint instructions for pointers that may be non-integral. + if (SrcVal->getType()->isPointerTy() && LoadTy->isPointerTy() && + cast<PointerType>(SrcVal->getType())->getAddressSpace() == + cast<PointerType>(LoadTy)->getAddressSpace()) { + return SrcVal; + } + + uint64_t StoreSize = + (DL.getTypeSizeInBits(SrcVal->getType()).getFixedValue() + 7) / 8; + uint64_t LoadSize = (DL.getTypeSizeInBits(LoadTy).getFixedValue() + 7) / 8; + // Compute which bits of the stored value are being used by the load. Convert + // to an integer type to start with. + if (SrcVal->getType()->isPtrOrPtrVectorTy()) + SrcVal = + Builder.CreatePtrToInt(SrcVal, DL.getIntPtrType(SrcVal->getType())); + if (!SrcVal->getType()->isIntegerTy()) + SrcVal = + Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize * 8)); + + // Shift the bits to the least significant depending on endianness. + unsigned ShiftAmt; + if (DL.isLittleEndian()) + ShiftAmt = Offset * 8; + else + ShiftAmt = (StoreSize - LoadSize - Offset) * 8; + if (ShiftAmt) + SrcVal = Builder.CreateLShr(SrcVal, + ConstantInt::get(SrcVal->getType(), ShiftAmt)); + + if (LoadSize != StoreSize) + SrcVal = Builder.CreateTruncOrBitCast(SrcVal, + IntegerType::get(Ctx, LoadSize * 8)); + return SrcVal; +} + +/// This function is called when we have a memdep query of a load that ends up +/// being a clobbering store. This means that the store provides bits used by +/// the load but the pointers don't must-alias. Check this case to see if +/// there is anything more we can do before we give up. +Value *getStoreValueForLoad(Value *SrcVal, unsigned Offset, Type *LoadTy, + Instruction *InsertPt, const DataLayout &DL) { + + IRBuilder<> Builder(InsertPt); + SrcVal = getStoreValueForLoadHelper(SrcVal, Offset, LoadTy, Builder, DL); + return coerceAvailableValueToLoadType(SrcVal, LoadTy, Builder, DL); +} + +Constant *getConstantStoreValueForLoad(Constant *SrcVal, unsigned Offset, + Type *LoadTy, const DataLayout &DL) { + return ConstantFoldLoadFromConst(SrcVal, LoadTy, APInt(32, Offset), DL); +} + +/// This function is called when we have a memdep query of a load that ends up +/// being a clobbering load. This means that the load *may* provide bits used +/// by the load but we can't be sure because the pointers don't must-alias. +/// Check this case to see if there is anything more we can do before we give +/// up. +Value *getLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, Type *LoadTy, + Instruction *InsertPt, const DataLayout &DL) { + // If Offset+LoadTy exceeds the size of SrcVal, then we must be wanting to + // widen SrcVal out to a larger load. + unsigned SrcValStoreSize = + DL.getTypeStoreSize(SrcVal->getType()).getFixedValue(); + unsigned LoadSize = DL.getTypeStoreSize(LoadTy).getFixedValue(); + if (Offset + LoadSize > SrcValStoreSize) { + assert(SrcVal->isSimple() && "Cannot widen volatile/atomic load!"); + assert(SrcVal->getType()->isIntegerTy() && "Can't widen non-integer load"); + // If we have a load/load clobber an DepLI can be widened to cover this + // load, then we should widen it to the next power of 2 size big enough! + unsigned NewLoadSize = Offset + LoadSize; + if (!isPowerOf2_32(NewLoadSize)) + NewLoadSize = NextPowerOf2(NewLoadSize); + + Value *PtrVal = SrcVal->getPointerOperand(); + // Insert the new load after the old load. This ensures that subsequent + // memdep queries will find the new load. We can't easily remove the old + // load completely because it is already in the value numbering table. + IRBuilder<> Builder(SrcVal->getParent(), ++BasicBlock::iterator(SrcVal)); + Type *DestTy = IntegerType::get(LoadTy->getContext(), NewLoadSize * 8); + Type *DestPTy = + PointerType::get(DestTy, PtrVal->getType()->getPointerAddressSpace()); + Builder.SetCurrentDebugLocation(SrcVal->getDebugLoc()); + PtrVal = Builder.CreateBitCast(PtrVal, DestPTy); + LoadInst *NewLoad = Builder.CreateLoad(DestTy, PtrVal); + NewLoad->takeName(SrcVal); + NewLoad->setAlignment(SrcVal->getAlign()); + + LLVM_DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal << "\n"); + LLVM_DEBUG(dbgs() << "TO: " << *NewLoad << "\n"); + + // Replace uses of the original load with the wider load. On a big endian + // system, we need to shift down to get the relevant bits. + Value *RV = NewLoad; + if (DL.isBigEndian()) + RV = Builder.CreateLShr(RV, (NewLoadSize - SrcValStoreSize) * 8); + RV = Builder.CreateTrunc(RV, SrcVal->getType()); + SrcVal->replaceAllUsesWith(RV); + + SrcVal = NewLoad; + } + + return getStoreValueForLoad(SrcVal, Offset, LoadTy, InsertPt, DL); +} + +Constant *getConstantLoadValueForLoad(Constant *SrcVal, unsigned Offset, + Type *LoadTy, const DataLayout &DL) { + unsigned SrcValStoreSize = + DL.getTypeStoreSize(SrcVal->getType()).getFixedValue(); + unsigned LoadSize = DL.getTypeStoreSize(LoadTy).getFixedValue(); + if (Offset + LoadSize > SrcValStoreSize) + return nullptr; + return getConstantStoreValueForLoad(SrcVal, Offset, LoadTy, DL); +} + +/// This function is called when we have a +/// memdep query of a load that ends up being a clobbering mem intrinsic. +Value *getMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, + Type *LoadTy, Instruction *InsertPt, + const DataLayout &DL) { + LLVMContext &Ctx = LoadTy->getContext(); + uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy).getFixedValue() / 8; + IRBuilder<> Builder(InsertPt); + + // We know that this method is only called when the mem transfer fully + // provides the bits for the load. + if (MemSetInst *MSI = dyn_cast<MemSetInst>(SrcInst)) { + // memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and + // independently of what the offset is. + Value *Val = MSI->getValue(); + if (LoadSize != 1) + Val = + Builder.CreateZExtOrBitCast(Val, IntegerType::get(Ctx, LoadSize * 8)); + Value *OneElt = Val; + + // Splat the value out to the right number of bits. + for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize;) { + // If we can double the number of bytes set, do it. + if (NumBytesSet * 2 <= LoadSize) { + Value *ShVal = Builder.CreateShl( + Val, ConstantInt::get(Val->getType(), NumBytesSet * 8)); + Val = Builder.CreateOr(Val, ShVal); + NumBytesSet <<= 1; + continue; + } + + // Otherwise insert one byte at a time. + Value *ShVal = + Builder.CreateShl(Val, ConstantInt::get(Val->getType(), 1 * 8)); + Val = Builder.CreateOr(OneElt, ShVal); + ++NumBytesSet; + } + + return coerceAvailableValueToLoadType(Val, LoadTy, Builder, DL); + } + + // Otherwise, this is a memcpy/memmove from a constant global. + MemTransferInst *MTI = cast<MemTransferInst>(SrcInst); + Constant *Src = cast<Constant>(MTI->getSource()); + unsigned IndexSize = DL.getIndexTypeSizeInBits(Src->getType()); + return ConstantFoldLoadFromConstPtr(Src, LoadTy, APInt(IndexSize, Offset), + DL); +} + +Constant *getConstantMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, + Type *LoadTy, const DataLayout &DL) { + LLVMContext &Ctx = LoadTy->getContext(); + uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy).getFixedValue() / 8; + + // We know that this method is only called when the mem transfer fully + // provides the bits for the load. + if (MemSetInst *MSI = dyn_cast<MemSetInst>(SrcInst)) { + auto *Val = dyn_cast<ConstantInt>(MSI->getValue()); + if (!Val) + return nullptr; + + Val = ConstantInt::get(Ctx, APInt::getSplat(LoadSize * 8, Val->getValue())); + return ConstantFoldLoadFromConst(Val, LoadTy, DL); + } + + // Otherwise, this is a memcpy/memmove from a constant global. + MemTransferInst *MTI = cast<MemTransferInst>(SrcInst); + Constant *Src = cast<Constant>(MTI->getSource()); + unsigned IndexSize = DL.getIndexTypeSizeInBits(Src->getType()); + return ConstantFoldLoadFromConstPtr(Src, LoadTy, APInt(IndexSize, Offset), + DL); +} +} // namespace VNCoercion +} // namespace llvm diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/ValueMapper.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/ValueMapper.cpp new file mode 100644 index 0000000000..a5edbb2acc --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/ValueMapper.cpp @@ -0,0 +1,1209 @@ +//===- ValueMapper.cpp - Interface shared by lib/Transforms/Utils ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the MapValue function, which is shared by various parts of +// the lib/Transforms/Utils library. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/ValueMapper.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalIFunc.h" +#include "llvm/IR/GlobalObject.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Debug.h" +#include <cassert> +#include <limits> +#include <memory> +#include <utility> + +using namespace llvm; + +#define DEBUG_TYPE "value-mapper" + +// Out of line method to get vtable etc for class. +void ValueMapTypeRemapper::anchor() {} +void ValueMaterializer::anchor() {} + +namespace { + +/// A basic block used in a BlockAddress whose function body is not yet +/// materialized. +struct DelayedBasicBlock { + BasicBlock *OldBB; + std::unique_ptr<BasicBlock> TempBB; + + DelayedBasicBlock(const BlockAddress &Old) + : OldBB(Old.getBasicBlock()), + TempBB(BasicBlock::Create(Old.getContext())) {} +}; + +struct WorklistEntry { + enum EntryKind { + MapGlobalInit, + MapAppendingVar, + MapAliasOrIFunc, + RemapFunction + }; + struct GVInitTy { + GlobalVariable *GV; + Constant *Init; + }; + struct AppendingGVTy { + GlobalVariable *GV; + Constant *InitPrefix; + }; + struct AliasOrIFuncTy { + GlobalValue *GV; + Constant *Target; + }; + + unsigned Kind : 2; + unsigned MCID : 29; + unsigned AppendingGVIsOldCtorDtor : 1; + unsigned AppendingGVNumNewMembers; + union { + GVInitTy GVInit; + AppendingGVTy AppendingGV; + AliasOrIFuncTy AliasOrIFunc; + Function *RemapF; + } Data; +}; + +struct MappingContext { + ValueToValueMapTy *VM; + ValueMaterializer *Materializer = nullptr; + + /// Construct a MappingContext with a value map and materializer. + explicit MappingContext(ValueToValueMapTy &VM, + ValueMaterializer *Materializer = nullptr) + : VM(&VM), Materializer(Materializer) {} +}; + +class Mapper { + friend class MDNodeMapper; + +#ifndef NDEBUG + DenseSet<GlobalValue *> AlreadyScheduled; +#endif + + RemapFlags Flags; + ValueMapTypeRemapper *TypeMapper; + unsigned CurrentMCID = 0; + SmallVector<MappingContext, 2> MCs; + SmallVector<WorklistEntry, 4> Worklist; + SmallVector<DelayedBasicBlock, 1> DelayedBBs; + SmallVector<Constant *, 16> AppendingInits; + +public: + Mapper(ValueToValueMapTy &VM, RemapFlags Flags, + ValueMapTypeRemapper *TypeMapper, ValueMaterializer *Materializer) + : Flags(Flags), TypeMapper(TypeMapper), + MCs(1, MappingContext(VM, Materializer)) {} + + /// ValueMapper should explicitly call \a flush() before destruction. + ~Mapper() { assert(!hasWorkToDo() && "Expected to be flushed"); } + + bool hasWorkToDo() const { return !Worklist.empty(); } + + unsigned + registerAlternateMappingContext(ValueToValueMapTy &VM, + ValueMaterializer *Materializer = nullptr) { + MCs.push_back(MappingContext(VM, Materializer)); + return MCs.size() - 1; + } + + void addFlags(RemapFlags Flags); + + void remapGlobalObjectMetadata(GlobalObject &GO); + + Value *mapValue(const Value *V); + void remapInstruction(Instruction *I); + void remapFunction(Function &F); + + Constant *mapConstant(const Constant *C) { + return cast_or_null<Constant>(mapValue(C)); + } + + /// Map metadata. + /// + /// Find the mapping for MD. Guarantees that the return will be resolved + /// (not an MDNode, or MDNode::isResolved() returns true). + Metadata *mapMetadata(const Metadata *MD); + + void scheduleMapGlobalInitializer(GlobalVariable &GV, Constant &Init, + unsigned MCID); + void scheduleMapAppendingVariable(GlobalVariable &GV, Constant *InitPrefix, + bool IsOldCtorDtor, + ArrayRef<Constant *> NewMembers, + unsigned MCID); + void scheduleMapAliasOrIFunc(GlobalValue &GV, Constant &Target, + unsigned MCID); + void scheduleRemapFunction(Function &F, unsigned MCID); + + void flush(); + +private: + void mapAppendingVariable(GlobalVariable &GV, Constant *InitPrefix, + bool IsOldCtorDtor, + ArrayRef<Constant *> NewMembers); + + ValueToValueMapTy &getVM() { return *MCs[CurrentMCID].VM; } + ValueMaterializer *getMaterializer() { return MCs[CurrentMCID].Materializer; } + + Value *mapBlockAddress(const BlockAddress &BA); + + /// Map metadata that doesn't require visiting operands. + std::optional<Metadata *> mapSimpleMetadata(const Metadata *MD); + + Metadata *mapToMetadata(const Metadata *Key, Metadata *Val); + Metadata *mapToSelf(const Metadata *MD); +}; + +class MDNodeMapper { + Mapper &M; + + /// Data about a node in \a UniquedGraph. + struct Data { + bool HasChanged = false; + unsigned ID = std::numeric_limits<unsigned>::max(); + TempMDNode Placeholder; + }; + + /// A graph of uniqued nodes. + struct UniquedGraph { + SmallDenseMap<const Metadata *, Data, 32> Info; // Node properties. + SmallVector<MDNode *, 16> POT; // Post-order traversal. + + /// Propagate changed operands through the post-order traversal. + /// + /// Iteratively update \a Data::HasChanged for each node based on \a + /// Data::HasChanged of its operands, until fixed point. + void propagateChanges(); + + /// Get a forward reference to a node to use as an operand. + Metadata &getFwdReference(MDNode &Op); + }; + + /// Worklist of distinct nodes whose operands need to be remapped. + SmallVector<MDNode *, 16> DistinctWorklist; + + // Storage for a UniquedGraph. + SmallDenseMap<const Metadata *, Data, 32> InfoStorage; + SmallVector<MDNode *, 16> POTStorage; + +public: + MDNodeMapper(Mapper &M) : M(M) {} + + /// Map a metadata node (and its transitive operands). + /// + /// Map all the (unmapped) nodes in the subgraph under \c N. The iterative + /// algorithm handles distinct nodes and uniqued node subgraphs using + /// different strategies. + /// + /// Distinct nodes are immediately mapped and added to \a DistinctWorklist + /// using \a mapDistinctNode(). Their mapping can always be computed + /// immediately without visiting operands, even if their operands change. + /// + /// The mapping for uniqued nodes depends on whether their operands change. + /// \a mapTopLevelUniquedNode() traverses the transitive uniqued subgraph of + /// a node to calculate uniqued node mappings in bulk. Distinct leafs are + /// added to \a DistinctWorklist with \a mapDistinctNode(). + /// + /// After mapping \c N itself, this function remaps the operands of the + /// distinct nodes in \a DistinctWorklist until the entire subgraph under \c + /// N has been mapped. + Metadata *map(const MDNode &N); + +private: + /// Map a top-level uniqued node and the uniqued subgraph underneath it. + /// + /// This builds up a post-order traversal of the (unmapped) uniqued subgraph + /// underneath \c FirstN and calculates the nodes' mapping. Each node uses + /// the identity mapping (\a Mapper::mapToSelf()) as long as all of its + /// operands uses the identity mapping. + /// + /// The algorithm works as follows: + /// + /// 1. \a createPOT(): traverse the uniqued subgraph under \c FirstN and + /// save the post-order traversal in the given \a UniquedGraph, tracking + /// nodes' operands change. + /// + /// 2. \a UniquedGraph::propagateChanges(): propagate changed operands + /// through the \a UniquedGraph until fixed point, following the rule + /// that if a node changes, any node that references must also change. + /// + /// 3. \a mapNodesInPOT(): map the uniqued nodes, creating new uniqued nodes + /// (referencing new operands) where necessary. + Metadata *mapTopLevelUniquedNode(const MDNode &FirstN); + + /// Try to map the operand of an \a MDNode. + /// + /// If \c Op is already mapped, return the mapping. If it's not an \a + /// MDNode, compute and return the mapping. If it's a distinct \a MDNode, + /// return the result of \a mapDistinctNode(). + /// + /// \return std::nullopt if \c Op is an unmapped uniqued \a MDNode. + /// \post getMappedOp(Op) only returns std::nullopt if this returns + /// std::nullopt. + std::optional<Metadata *> tryToMapOperand(const Metadata *Op); + + /// Map a distinct node. + /// + /// Return the mapping for the distinct node \c N, saving the result in \a + /// DistinctWorklist for later remapping. + /// + /// \pre \c N is not yet mapped. + /// \pre \c N.isDistinct(). + MDNode *mapDistinctNode(const MDNode &N); + + /// Get a previously mapped node. + std::optional<Metadata *> getMappedOp(const Metadata *Op) const; + + /// Create a post-order traversal of an unmapped uniqued node subgraph. + /// + /// This traverses the metadata graph deeply enough to map \c FirstN. It + /// uses \a tryToMapOperand() (via \a Mapper::mapSimplifiedNode()), so any + /// metadata that has already been mapped will not be part of the POT. + /// + /// Each node that has a changed operand from outside the graph (e.g., a + /// distinct node, an already-mapped uniqued node, or \a ConstantAsMetadata) + /// is marked with \a Data::HasChanged. + /// + /// \return \c true if any nodes in \c G have \a Data::HasChanged. + /// \post \c G.POT is a post-order traversal ending with \c FirstN. + /// \post \a Data::hasChanged in \c G.Info indicates whether any node needs + /// to change because of operands outside the graph. + bool createPOT(UniquedGraph &G, const MDNode &FirstN); + + /// Visit the operands of a uniqued node in the POT. + /// + /// Visit the operands in the range from \c I to \c E, returning the first + /// uniqued node we find that isn't yet in \c G. \c I is always advanced to + /// where to continue the loop through the operands. + /// + /// This sets \c HasChanged if any of the visited operands change. + MDNode *visitOperands(UniquedGraph &G, MDNode::op_iterator &I, + MDNode::op_iterator E, bool &HasChanged); + + /// Map all the nodes in the given uniqued graph. + /// + /// This visits all the nodes in \c G in post-order, using the identity + /// mapping or creating a new node depending on \a Data::HasChanged. + /// + /// \pre \a getMappedOp() returns std::nullopt for nodes in \c G, but not for + /// any of their operands outside of \c G. \pre \a Data::HasChanged is true + /// for a node in \c G iff any of its operands have changed. \post \a + /// getMappedOp() returns the mapped node for every node in \c G. + void mapNodesInPOT(UniquedGraph &G); + + /// Remap a node's operands using the given functor. + /// + /// Iterate through the operands of \c N and update them in place using \c + /// mapOperand. + /// + /// \pre N.isDistinct() or N.isTemporary(). + template <class OperandMapper> + void remapOperands(MDNode &N, OperandMapper mapOperand); +}; + +} // end anonymous namespace + +Value *Mapper::mapValue(const Value *V) { + ValueToValueMapTy::iterator I = getVM().find(V); + + // If the value already exists in the map, use it. + if (I != getVM().end()) { + assert(I->second && "Unexpected null mapping"); + return I->second; + } + + // If we have a materializer and it can materialize a value, use that. + if (auto *Materializer = getMaterializer()) { + if (Value *NewV = Materializer->materialize(const_cast<Value *>(V))) { + getVM()[V] = NewV; + return NewV; + } + } + + // Global values do not need to be seeded into the VM if they + // are using the identity mapping. + if (isa<GlobalValue>(V)) { + if (Flags & RF_NullMapMissingGlobalValues) + return nullptr; + return getVM()[V] = const_cast<Value *>(V); + } + + if (const InlineAsm *IA = dyn_cast<InlineAsm>(V)) { + // Inline asm may need *type* remapping. + FunctionType *NewTy = IA->getFunctionType(); + if (TypeMapper) { + NewTy = cast<FunctionType>(TypeMapper->remapType(NewTy)); + + if (NewTy != IA->getFunctionType()) + V = InlineAsm::get(NewTy, IA->getAsmString(), IA->getConstraintString(), + IA->hasSideEffects(), IA->isAlignStack(), + IA->getDialect(), IA->canThrow()); + } + + return getVM()[V] = const_cast<Value *>(V); + } + + if (const auto *MDV = dyn_cast<MetadataAsValue>(V)) { + const Metadata *MD = MDV->getMetadata(); + + if (auto *LAM = dyn_cast<LocalAsMetadata>(MD)) { + // Look through to grab the local value. + if (Value *LV = mapValue(LAM->getValue())) { + if (V == LAM->getValue()) + return const_cast<Value *>(V); + return MetadataAsValue::get(V->getContext(), ValueAsMetadata::get(LV)); + } + + // FIXME: always return nullptr once Verifier::verifyDominatesUse() + // ensures metadata operands only reference defined SSA values. + return (Flags & RF_IgnoreMissingLocals) + ? nullptr + : MetadataAsValue::get( + V->getContext(), + MDTuple::get(V->getContext(), std::nullopt)); + } + if (auto *AL = dyn_cast<DIArgList>(MD)) { + SmallVector<ValueAsMetadata *, 4> MappedArgs; + for (auto *VAM : AL->getArgs()) { + // Map both Local and Constant VAMs here; they will both ultimately + // be mapped via mapValue. The exceptions are constants when we have no + // module level changes and locals when they have no existing mapped + // value and RF_IgnoreMissingLocals is set; these have identity + // mappings. + if ((Flags & RF_NoModuleLevelChanges) && isa<ConstantAsMetadata>(VAM)) { + MappedArgs.push_back(VAM); + } else if (Value *LV = mapValue(VAM->getValue())) { + MappedArgs.push_back( + LV == VAM->getValue() ? VAM : ValueAsMetadata::get(LV)); + } else if ((Flags & RF_IgnoreMissingLocals) && isa<LocalAsMetadata>(VAM)) { + MappedArgs.push_back(VAM); + } else { + // If we cannot map the value, set the argument as undef. + MappedArgs.push_back(ValueAsMetadata::get( + UndefValue::get(VAM->getValue()->getType()))); + } + } + return MetadataAsValue::get(V->getContext(), + DIArgList::get(V->getContext(), MappedArgs)); + } + + // If this is a module-level metadata and we know that nothing at the module + // level is changing, then use an identity mapping. + if (Flags & RF_NoModuleLevelChanges) + return getVM()[V] = const_cast<Value *>(V); + + // Map the metadata and turn it into a value. + auto *MappedMD = mapMetadata(MD); + if (MD == MappedMD) + return getVM()[V] = const_cast<Value *>(V); + return getVM()[V] = MetadataAsValue::get(V->getContext(), MappedMD); + } + + // Okay, this either must be a constant (which may or may not be mappable) or + // is something that is not in the mapping table. + Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V)); + if (!C) + return nullptr; + + if (BlockAddress *BA = dyn_cast<BlockAddress>(C)) + return mapBlockAddress(*BA); + + if (const auto *E = dyn_cast<DSOLocalEquivalent>(C)) { + auto *Val = mapValue(E->getGlobalValue()); + GlobalValue *GV = dyn_cast<GlobalValue>(Val); + if (GV) + return getVM()[E] = DSOLocalEquivalent::get(GV); + + auto *Func = cast<Function>(Val->stripPointerCastsAndAliases()); + Type *NewTy = E->getType(); + if (TypeMapper) + NewTy = TypeMapper->remapType(NewTy); + return getVM()[E] = llvm::ConstantExpr::getBitCast( + DSOLocalEquivalent::get(Func), NewTy); + } + + if (const auto *NC = dyn_cast<NoCFIValue>(C)) { + auto *Val = mapValue(NC->getGlobalValue()); + GlobalValue *GV = cast<GlobalValue>(Val); + return getVM()[NC] = NoCFIValue::get(GV); + } + + auto mapValueOrNull = [this](Value *V) { + auto Mapped = mapValue(V); + assert((Mapped || (Flags & RF_NullMapMissingGlobalValues)) && + "Unexpected null mapping for constant operand without " + "NullMapMissingGlobalValues flag"); + return Mapped; + }; + + // Otherwise, we have some other constant to remap. Start by checking to see + // if all operands have an identity remapping. + unsigned OpNo = 0, NumOperands = C->getNumOperands(); + Value *Mapped = nullptr; + for (; OpNo != NumOperands; ++OpNo) { + Value *Op = C->getOperand(OpNo); + Mapped = mapValueOrNull(Op); + if (!Mapped) + return nullptr; + if (Mapped != Op) + break; + } + + // See if the type mapper wants to remap the type as well. + Type *NewTy = C->getType(); + if (TypeMapper) + NewTy = TypeMapper->remapType(NewTy); + + // If the result type and all operands match up, then just insert an identity + // mapping. + if (OpNo == NumOperands && NewTy == C->getType()) + return getVM()[V] = C; + + // Okay, we need to create a new constant. We've already processed some or + // all of the operands, set them all up now. + SmallVector<Constant*, 8> Ops; + Ops.reserve(NumOperands); + for (unsigned j = 0; j != OpNo; ++j) + Ops.push_back(cast<Constant>(C->getOperand(j))); + + // If one of the operands mismatch, push it and the other mapped operands. + if (OpNo != NumOperands) { + Ops.push_back(cast<Constant>(Mapped)); + + // Map the rest of the operands that aren't processed yet. + for (++OpNo; OpNo != NumOperands; ++OpNo) { + Mapped = mapValueOrNull(C->getOperand(OpNo)); + if (!Mapped) + return nullptr; + Ops.push_back(cast<Constant>(Mapped)); + } + } + Type *NewSrcTy = nullptr; + if (TypeMapper) + if (auto *GEPO = dyn_cast<GEPOperator>(C)) + NewSrcTy = TypeMapper->remapType(GEPO->getSourceElementType()); + + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) + return getVM()[V] = CE->getWithOperands(Ops, NewTy, false, NewSrcTy); + if (isa<ConstantArray>(C)) + return getVM()[V] = ConstantArray::get(cast<ArrayType>(NewTy), Ops); + if (isa<ConstantStruct>(C)) + return getVM()[V] = ConstantStruct::get(cast<StructType>(NewTy), Ops); + if (isa<ConstantVector>(C)) + return getVM()[V] = ConstantVector::get(Ops); + // If this is a no-operand constant, it must be because the type was remapped. + if (isa<UndefValue>(C)) + return getVM()[V] = UndefValue::get(NewTy); + if (isa<ConstantAggregateZero>(C)) + return getVM()[V] = ConstantAggregateZero::get(NewTy); + assert(isa<ConstantPointerNull>(C)); + return getVM()[V] = ConstantPointerNull::get(cast<PointerType>(NewTy)); +} + +Value *Mapper::mapBlockAddress(const BlockAddress &BA) { + Function *F = cast<Function>(mapValue(BA.getFunction())); + + // F may not have materialized its initializer. In that case, create a + // dummy basic block for now, and replace it once we've materialized all + // the initializers. + BasicBlock *BB; + if (F->empty()) { + DelayedBBs.push_back(DelayedBasicBlock(BA)); + BB = DelayedBBs.back().TempBB.get(); + } else { + BB = cast_or_null<BasicBlock>(mapValue(BA.getBasicBlock())); + } + + return getVM()[&BA] = BlockAddress::get(F, BB ? BB : BA.getBasicBlock()); +} + +Metadata *Mapper::mapToMetadata(const Metadata *Key, Metadata *Val) { + getVM().MD()[Key].reset(Val); + return Val; +} + +Metadata *Mapper::mapToSelf(const Metadata *MD) { + return mapToMetadata(MD, const_cast<Metadata *>(MD)); +} + +std::optional<Metadata *> MDNodeMapper::tryToMapOperand(const Metadata *Op) { + if (!Op) + return nullptr; + + if (std::optional<Metadata *> MappedOp = M.mapSimpleMetadata(Op)) { +#ifndef NDEBUG + if (auto *CMD = dyn_cast<ConstantAsMetadata>(Op)) + assert((!*MappedOp || M.getVM().count(CMD->getValue()) || + M.getVM().getMappedMD(Op)) && + "Expected Value to be memoized"); + else + assert((isa<MDString>(Op) || M.getVM().getMappedMD(Op)) && + "Expected result to be memoized"); +#endif + return *MappedOp; + } + + const MDNode &N = *cast<MDNode>(Op); + if (N.isDistinct()) + return mapDistinctNode(N); + return std::nullopt; +} + +MDNode *MDNodeMapper::mapDistinctNode(const MDNode &N) { + assert(N.isDistinct() && "Expected a distinct node"); + assert(!M.getVM().getMappedMD(&N) && "Expected an unmapped node"); + Metadata *NewM = nullptr; + + if (M.Flags & RF_ReuseAndMutateDistinctMDs) { + NewM = M.mapToSelf(&N); + } else { + NewM = MDNode::replaceWithDistinct(N.clone()); + LLVM_DEBUG(dbgs() << "\nMap " << N << "\n" + << "To " << *NewM << "\n\n"); + M.mapToMetadata(&N, NewM); + } + DistinctWorklist.push_back(cast<MDNode>(NewM)); + + return DistinctWorklist.back(); +} + +static ConstantAsMetadata *wrapConstantAsMetadata(const ConstantAsMetadata &CMD, + Value *MappedV) { + if (CMD.getValue() == MappedV) + return const_cast<ConstantAsMetadata *>(&CMD); + return MappedV ? ConstantAsMetadata::getConstant(MappedV) : nullptr; +} + +std::optional<Metadata *> MDNodeMapper::getMappedOp(const Metadata *Op) const { + if (!Op) + return nullptr; + + if (std::optional<Metadata *> MappedOp = M.getVM().getMappedMD(Op)) + return *MappedOp; + + if (isa<MDString>(Op)) + return const_cast<Metadata *>(Op); + + if (auto *CMD = dyn_cast<ConstantAsMetadata>(Op)) + return wrapConstantAsMetadata(*CMD, M.getVM().lookup(CMD->getValue())); + + return std::nullopt; +} + +Metadata &MDNodeMapper::UniquedGraph::getFwdReference(MDNode &Op) { + auto Where = Info.find(&Op); + assert(Where != Info.end() && "Expected a valid reference"); + + auto &OpD = Where->second; + if (!OpD.HasChanged) + return Op; + + // Lazily construct a temporary node. + if (!OpD.Placeholder) + OpD.Placeholder = Op.clone(); + + return *OpD.Placeholder; +} + +template <class OperandMapper> +void MDNodeMapper::remapOperands(MDNode &N, OperandMapper mapOperand) { + assert(!N.isUniqued() && "Expected distinct or temporary nodes"); + for (unsigned I = 0, E = N.getNumOperands(); I != E; ++I) { + Metadata *Old = N.getOperand(I); + Metadata *New = mapOperand(Old); + if (Old != New) + LLVM_DEBUG(dbgs() << "Replacing Op " << Old << " with " << New << " in " + << N << "\n"); + + if (Old != New) + N.replaceOperandWith(I, New); + } +} + +namespace { + +/// An entry in the worklist for the post-order traversal. +struct POTWorklistEntry { + MDNode *N; ///< Current node. + MDNode::op_iterator Op; ///< Current operand of \c N. + + /// Keep a flag of whether operands have changed in the worklist to avoid + /// hitting the map in \a UniquedGraph. + bool HasChanged = false; + + POTWorklistEntry(MDNode &N) : N(&N), Op(N.op_begin()) {} +}; + +} // end anonymous namespace + +bool MDNodeMapper::createPOT(UniquedGraph &G, const MDNode &FirstN) { + assert(G.Info.empty() && "Expected a fresh traversal"); + assert(FirstN.isUniqued() && "Expected uniqued node in POT"); + + // Construct a post-order traversal of the uniqued subgraph under FirstN. + bool AnyChanges = false; + SmallVector<POTWorklistEntry, 16> Worklist; + Worklist.push_back(POTWorklistEntry(const_cast<MDNode &>(FirstN))); + (void)G.Info[&FirstN]; + while (!Worklist.empty()) { + // Start or continue the traversal through the this node's operands. + auto &WE = Worklist.back(); + if (MDNode *N = visitOperands(G, WE.Op, WE.N->op_end(), WE.HasChanged)) { + // Push a new node to traverse first. + Worklist.push_back(POTWorklistEntry(*N)); + continue; + } + + // Push the node onto the POT. + assert(WE.N->isUniqued() && "Expected only uniqued nodes"); + assert(WE.Op == WE.N->op_end() && "Expected to visit all operands"); + auto &D = G.Info[WE.N]; + AnyChanges |= D.HasChanged = WE.HasChanged; + D.ID = G.POT.size(); + G.POT.push_back(WE.N); + + // Pop the node off the worklist. + Worklist.pop_back(); + } + return AnyChanges; +} + +MDNode *MDNodeMapper::visitOperands(UniquedGraph &G, MDNode::op_iterator &I, + MDNode::op_iterator E, bool &HasChanged) { + while (I != E) { + Metadata *Op = *I++; // Increment even on early return. + if (std::optional<Metadata *> MappedOp = tryToMapOperand(Op)) { + // Check if the operand changes. + HasChanged |= Op != *MappedOp; + continue; + } + + // A uniqued metadata node. + MDNode &OpN = *cast<MDNode>(Op); + assert(OpN.isUniqued() && + "Only uniqued operands cannot be mapped immediately"); + if (G.Info.insert(std::make_pair(&OpN, Data())).second) + return &OpN; // This is a new one. Return it. + } + return nullptr; +} + +void MDNodeMapper::UniquedGraph::propagateChanges() { + bool AnyChanges; + do { + AnyChanges = false; + for (MDNode *N : POT) { + auto &D = Info[N]; + if (D.HasChanged) + continue; + + if (llvm::none_of(N->operands(), [&](const Metadata *Op) { + auto Where = Info.find(Op); + return Where != Info.end() && Where->second.HasChanged; + })) + continue; + + AnyChanges = D.HasChanged = true; + } + } while (AnyChanges); +} + +void MDNodeMapper::mapNodesInPOT(UniquedGraph &G) { + // Construct uniqued nodes, building forward references as necessary. + SmallVector<MDNode *, 16> CyclicNodes; + for (auto *N : G.POT) { + auto &D = G.Info[N]; + if (!D.HasChanged) { + // The node hasn't changed. + M.mapToSelf(N); + continue; + } + + // Remember whether this node had a placeholder. + bool HadPlaceholder(D.Placeholder); + + // Clone the uniqued node and remap the operands. + TempMDNode ClonedN = D.Placeholder ? std::move(D.Placeholder) : N->clone(); + remapOperands(*ClonedN, [this, &D, &G](Metadata *Old) { + if (std::optional<Metadata *> MappedOp = getMappedOp(Old)) + return *MappedOp; + (void)D; + assert(G.Info[Old].ID > D.ID && "Expected a forward reference"); + return &G.getFwdReference(*cast<MDNode>(Old)); + }); + + auto *NewN = MDNode::replaceWithUniqued(std::move(ClonedN)); + if (N && NewN && N != NewN) { + LLVM_DEBUG(dbgs() << "\nMap " << *N << "\n" + << "To " << *NewN << "\n\n"); + } + + M.mapToMetadata(N, NewN); + + // Nodes that were referenced out of order in the POT are involved in a + // uniquing cycle. + if (HadPlaceholder) + CyclicNodes.push_back(NewN); + } + + // Resolve cycles. + for (auto *N : CyclicNodes) + if (!N->isResolved()) + N->resolveCycles(); +} + +Metadata *MDNodeMapper::map(const MDNode &N) { + assert(DistinctWorklist.empty() && "MDNodeMapper::map is not recursive"); + assert(!(M.Flags & RF_NoModuleLevelChanges) && + "MDNodeMapper::map assumes module-level changes"); + + // Require resolved nodes whenever metadata might be remapped. + assert(N.isResolved() && "Unexpected unresolved node"); + + Metadata *MappedN = + N.isUniqued() ? mapTopLevelUniquedNode(N) : mapDistinctNode(N); + while (!DistinctWorklist.empty()) + remapOperands(*DistinctWorklist.pop_back_val(), [this](Metadata *Old) { + if (std::optional<Metadata *> MappedOp = tryToMapOperand(Old)) + return *MappedOp; + return mapTopLevelUniquedNode(*cast<MDNode>(Old)); + }); + return MappedN; +} + +Metadata *MDNodeMapper::mapTopLevelUniquedNode(const MDNode &FirstN) { + assert(FirstN.isUniqued() && "Expected uniqued node"); + + // Create a post-order traversal of uniqued nodes under FirstN. + UniquedGraph G; + if (!createPOT(G, FirstN)) { + // Return early if no nodes have changed. + for (const MDNode *N : G.POT) + M.mapToSelf(N); + return &const_cast<MDNode &>(FirstN); + } + + // Update graph with all nodes that have changed. + G.propagateChanges(); + + // Map all the nodes in the graph. + mapNodesInPOT(G); + + // Return the original node, remapped. + return *getMappedOp(&FirstN); +} + +std::optional<Metadata *> Mapper::mapSimpleMetadata(const Metadata *MD) { + // If the value already exists in the map, use it. + if (std::optional<Metadata *> NewMD = getVM().getMappedMD(MD)) + return *NewMD; + + if (isa<MDString>(MD)) + return const_cast<Metadata *>(MD); + + // This is a module-level metadata. If nothing at the module level is + // changing, use an identity mapping. + if ((Flags & RF_NoModuleLevelChanges)) + return const_cast<Metadata *>(MD); + + if (auto *CMD = dyn_cast<ConstantAsMetadata>(MD)) { + // Don't memoize ConstantAsMetadata. Instead of lasting until the + // LLVMContext is destroyed, they can be deleted when the GlobalValue they + // reference is destructed. These aren't super common, so the extra + // indirection isn't that expensive. + return wrapConstantAsMetadata(*CMD, mapValue(CMD->getValue())); + } + + assert(isa<MDNode>(MD) && "Expected a metadata node"); + + return std::nullopt; +} + +Metadata *Mapper::mapMetadata(const Metadata *MD) { + assert(MD && "Expected valid metadata"); + assert(!isa<LocalAsMetadata>(MD) && "Unexpected local metadata"); + + if (std::optional<Metadata *> NewMD = mapSimpleMetadata(MD)) + return *NewMD; + + return MDNodeMapper(*this).map(*cast<MDNode>(MD)); +} + +void Mapper::flush() { + // Flush out the worklist of global values. + while (!Worklist.empty()) { + WorklistEntry E = Worklist.pop_back_val(); + CurrentMCID = E.MCID; + switch (E.Kind) { + case WorklistEntry::MapGlobalInit: + E.Data.GVInit.GV->setInitializer(mapConstant(E.Data.GVInit.Init)); + remapGlobalObjectMetadata(*E.Data.GVInit.GV); + break; + case WorklistEntry::MapAppendingVar: { + unsigned PrefixSize = AppendingInits.size() - E.AppendingGVNumNewMembers; + // mapAppendingVariable call can change AppendingInits if initalizer for + // the variable depends on another appending global, because of that inits + // need to be extracted and updated before the call. + SmallVector<Constant *, 8> NewInits( + drop_begin(AppendingInits, PrefixSize)); + AppendingInits.resize(PrefixSize); + mapAppendingVariable(*E.Data.AppendingGV.GV, + E.Data.AppendingGV.InitPrefix, + E.AppendingGVIsOldCtorDtor, ArrayRef(NewInits)); + break; + } + case WorklistEntry::MapAliasOrIFunc: { + GlobalValue *GV = E.Data.AliasOrIFunc.GV; + Constant *Target = mapConstant(E.Data.AliasOrIFunc.Target); + if (auto *GA = dyn_cast<GlobalAlias>(GV)) + GA->setAliasee(Target); + else if (auto *GI = dyn_cast<GlobalIFunc>(GV)) + GI->setResolver(Target); + else + llvm_unreachable("Not alias or ifunc"); + break; + } + case WorklistEntry::RemapFunction: + remapFunction(*E.Data.RemapF); + break; + } + } + CurrentMCID = 0; + + // Finish logic for block addresses now that all global values have been + // handled. + while (!DelayedBBs.empty()) { + DelayedBasicBlock DBB = DelayedBBs.pop_back_val(); + BasicBlock *BB = cast_or_null<BasicBlock>(mapValue(DBB.OldBB)); + DBB.TempBB->replaceAllUsesWith(BB ? BB : DBB.OldBB); + } +} + +void Mapper::remapInstruction(Instruction *I) { + // Remap operands. + for (Use &Op : I->operands()) { + Value *V = mapValue(Op); + // If we aren't ignoring missing entries, assert that something happened. + if (V) + Op = V; + else + assert((Flags & RF_IgnoreMissingLocals) && + "Referenced value not in value map!"); + } + + // Remap phi nodes' incoming blocks. + if (PHINode *PN = dyn_cast<PHINode>(I)) { + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *V = mapValue(PN->getIncomingBlock(i)); + // If we aren't ignoring missing entries, assert that something happened. + if (V) + PN->setIncomingBlock(i, cast<BasicBlock>(V)); + else + assert((Flags & RF_IgnoreMissingLocals) && + "Referenced block not in value map!"); + } + } + + // Remap attached metadata. + SmallVector<std::pair<unsigned, MDNode *>, 4> MDs; + I->getAllMetadata(MDs); + for (const auto &MI : MDs) { + MDNode *Old = MI.second; + MDNode *New = cast_or_null<MDNode>(mapMetadata(Old)); + if (New != Old) + I->setMetadata(MI.first, New); + } + + if (!TypeMapper) + return; + + // If the instruction's type is being remapped, do so now. + if (auto *CB = dyn_cast<CallBase>(I)) { + SmallVector<Type *, 3> Tys; + FunctionType *FTy = CB->getFunctionType(); + Tys.reserve(FTy->getNumParams()); + for (Type *Ty : FTy->params()) + Tys.push_back(TypeMapper->remapType(Ty)); + CB->mutateFunctionType(FunctionType::get( + TypeMapper->remapType(I->getType()), Tys, FTy->isVarArg())); + + LLVMContext &C = CB->getContext(); + AttributeList Attrs = CB->getAttributes(); + for (unsigned i = 0; i < Attrs.getNumAttrSets(); ++i) { + for (int AttrIdx = Attribute::FirstTypeAttr; + AttrIdx <= Attribute::LastTypeAttr; AttrIdx++) { + Attribute::AttrKind TypedAttr = (Attribute::AttrKind)AttrIdx; + if (Type *Ty = + Attrs.getAttributeAtIndex(i, TypedAttr).getValueAsType()) { + Attrs = Attrs.replaceAttributeTypeAtIndex(C, i, TypedAttr, + TypeMapper->remapType(Ty)); + break; + } + } + } + CB->setAttributes(Attrs); + return; + } + if (auto *AI = dyn_cast<AllocaInst>(I)) + AI->setAllocatedType(TypeMapper->remapType(AI->getAllocatedType())); + if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) { + GEP->setSourceElementType( + TypeMapper->remapType(GEP->getSourceElementType())); + GEP->setResultElementType( + TypeMapper->remapType(GEP->getResultElementType())); + } + I->mutateType(TypeMapper->remapType(I->getType())); +} + +void Mapper::remapGlobalObjectMetadata(GlobalObject &GO) { + SmallVector<std::pair<unsigned, MDNode *>, 8> MDs; + GO.getAllMetadata(MDs); + GO.clearMetadata(); + for (const auto &I : MDs) + GO.addMetadata(I.first, *cast<MDNode>(mapMetadata(I.second))); +} + +void Mapper::remapFunction(Function &F) { + // Remap the operands. + for (Use &Op : F.operands()) + if (Op) + Op = mapValue(Op); + + // Remap the metadata attachments. + remapGlobalObjectMetadata(F); + + // Remap the argument types. + if (TypeMapper) + for (Argument &A : F.args()) + A.mutateType(TypeMapper->remapType(A.getType())); + + // Remap the instructions. + for (BasicBlock &BB : F) + for (Instruction &I : BB) + remapInstruction(&I); +} + +void Mapper::mapAppendingVariable(GlobalVariable &GV, Constant *InitPrefix, + bool IsOldCtorDtor, + ArrayRef<Constant *> NewMembers) { + SmallVector<Constant *, 16> Elements; + if (InitPrefix) { + unsigned NumElements = + cast<ArrayType>(InitPrefix->getType())->getNumElements(); + for (unsigned I = 0; I != NumElements; ++I) + Elements.push_back(InitPrefix->getAggregateElement(I)); + } + + PointerType *VoidPtrTy; + Type *EltTy; + if (IsOldCtorDtor) { + // FIXME: This upgrade is done during linking to support the C API. See + // also IRLinker::linkAppendingVarProto() in IRMover.cpp. + VoidPtrTy = Type::getInt8Ty(GV.getContext())->getPointerTo(); + auto &ST = *cast<StructType>(NewMembers.front()->getType()); + Type *Tys[3] = {ST.getElementType(0), ST.getElementType(1), VoidPtrTy}; + EltTy = StructType::get(GV.getContext(), Tys, false); + } + + for (auto *V : NewMembers) { + Constant *NewV; + if (IsOldCtorDtor) { + auto *S = cast<ConstantStruct>(V); + auto *E1 = cast<Constant>(mapValue(S->getOperand(0))); + auto *E2 = cast<Constant>(mapValue(S->getOperand(1))); + Constant *Null = Constant::getNullValue(VoidPtrTy); + NewV = ConstantStruct::get(cast<StructType>(EltTy), E1, E2, Null); + } else { + NewV = cast_or_null<Constant>(mapValue(V)); + } + Elements.push_back(NewV); + } + + GV.setInitializer( + ConstantArray::get(cast<ArrayType>(GV.getValueType()), Elements)); +} + +void Mapper::scheduleMapGlobalInitializer(GlobalVariable &GV, Constant &Init, + unsigned MCID) { + assert(AlreadyScheduled.insert(&GV).second && "Should not reschedule"); + assert(MCID < MCs.size() && "Invalid mapping context"); + + WorklistEntry WE; + WE.Kind = WorklistEntry::MapGlobalInit; + WE.MCID = MCID; + WE.Data.GVInit.GV = &GV; + WE.Data.GVInit.Init = &Init; + Worklist.push_back(WE); +} + +void Mapper::scheduleMapAppendingVariable(GlobalVariable &GV, + Constant *InitPrefix, + bool IsOldCtorDtor, + ArrayRef<Constant *> NewMembers, + unsigned MCID) { + assert(AlreadyScheduled.insert(&GV).second && "Should not reschedule"); + assert(MCID < MCs.size() && "Invalid mapping context"); + + WorklistEntry WE; + WE.Kind = WorklistEntry::MapAppendingVar; + WE.MCID = MCID; + WE.Data.AppendingGV.GV = &GV; + WE.Data.AppendingGV.InitPrefix = InitPrefix; + WE.AppendingGVIsOldCtorDtor = IsOldCtorDtor; + WE.AppendingGVNumNewMembers = NewMembers.size(); + Worklist.push_back(WE); + AppendingInits.append(NewMembers.begin(), NewMembers.end()); +} + +void Mapper::scheduleMapAliasOrIFunc(GlobalValue &GV, Constant &Target, + unsigned MCID) { + assert(AlreadyScheduled.insert(&GV).second && "Should not reschedule"); + assert((isa<GlobalAlias>(GV) || isa<GlobalIFunc>(GV)) && + "Should be alias or ifunc"); + assert(MCID < MCs.size() && "Invalid mapping context"); + + WorklistEntry WE; + WE.Kind = WorklistEntry::MapAliasOrIFunc; + WE.MCID = MCID; + WE.Data.AliasOrIFunc.GV = &GV; + WE.Data.AliasOrIFunc.Target = &Target; + Worklist.push_back(WE); +} + +void Mapper::scheduleRemapFunction(Function &F, unsigned MCID) { + assert(AlreadyScheduled.insert(&F).second && "Should not reschedule"); + assert(MCID < MCs.size() && "Invalid mapping context"); + + WorklistEntry WE; + WE.Kind = WorklistEntry::RemapFunction; + WE.MCID = MCID; + WE.Data.RemapF = &F; + Worklist.push_back(WE); +} + +void Mapper::addFlags(RemapFlags Flags) { + assert(!hasWorkToDo() && "Expected to have flushed the worklist"); + this->Flags = this->Flags | Flags; +} + +static Mapper *getAsMapper(void *pImpl) { + return reinterpret_cast<Mapper *>(pImpl); +} + +namespace { + +class FlushingMapper { + Mapper &M; + +public: + explicit FlushingMapper(void *pImpl) : M(*getAsMapper(pImpl)) { + assert(!M.hasWorkToDo() && "Expected to be flushed"); + } + + ~FlushingMapper() { M.flush(); } + + Mapper *operator->() const { return &M; } +}; + +} // end anonymous namespace + +ValueMapper::ValueMapper(ValueToValueMapTy &VM, RemapFlags Flags, + ValueMapTypeRemapper *TypeMapper, + ValueMaterializer *Materializer) + : pImpl(new Mapper(VM, Flags, TypeMapper, Materializer)) {} + +ValueMapper::~ValueMapper() { delete getAsMapper(pImpl); } + +unsigned +ValueMapper::registerAlternateMappingContext(ValueToValueMapTy &VM, + ValueMaterializer *Materializer) { + return getAsMapper(pImpl)->registerAlternateMappingContext(VM, Materializer); +} + +void ValueMapper::addFlags(RemapFlags Flags) { + FlushingMapper(pImpl)->addFlags(Flags); +} + +Value *ValueMapper::mapValue(const Value &V) { + return FlushingMapper(pImpl)->mapValue(&V); +} + +Constant *ValueMapper::mapConstant(const Constant &C) { + return cast_or_null<Constant>(mapValue(C)); +} + +Metadata *ValueMapper::mapMetadata(const Metadata &MD) { + return FlushingMapper(pImpl)->mapMetadata(&MD); +} + +MDNode *ValueMapper::mapMDNode(const MDNode &N) { + return cast_or_null<MDNode>(mapMetadata(N)); +} + +void ValueMapper::remapInstruction(Instruction &I) { + FlushingMapper(pImpl)->remapInstruction(&I); +} + +void ValueMapper::remapFunction(Function &F) { + FlushingMapper(pImpl)->remapFunction(F); +} + +void ValueMapper::scheduleMapGlobalInitializer(GlobalVariable &GV, + Constant &Init, + unsigned MCID) { + getAsMapper(pImpl)->scheduleMapGlobalInitializer(GV, Init, MCID); +} + +void ValueMapper::scheduleMapAppendingVariable(GlobalVariable &GV, + Constant *InitPrefix, + bool IsOldCtorDtor, + ArrayRef<Constant *> NewMembers, + unsigned MCID) { + getAsMapper(pImpl)->scheduleMapAppendingVariable( + GV, InitPrefix, IsOldCtorDtor, NewMembers, MCID); +} + +void ValueMapper::scheduleMapGlobalAlias(GlobalAlias &GA, Constant &Aliasee, + unsigned MCID) { + getAsMapper(pImpl)->scheduleMapAliasOrIFunc(GA, Aliasee, MCID); +} + +void ValueMapper::scheduleMapGlobalIFunc(GlobalIFunc &GI, Constant &Resolver, + unsigned MCID) { + getAsMapper(pImpl)->scheduleMapAliasOrIFunc(GI, Resolver, MCID); +} + +void ValueMapper::scheduleRemapFunction(Function &F, unsigned MCID) { + getAsMapper(pImpl)->scheduleRemapFunction(F, MCID); +} diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/ya.make b/contrib/libs/llvm16/lib/Transforms/Utils/ya.make new file mode 100644 index 0000000000..42083a238b --- /dev/null +++ b/contrib/libs/llvm16/lib/Transforms/Utils/ya.make @@ -0,0 +1,111 @@ +# Generated by devtools/yamaker. + +LIBRARY() + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + +PEERDIR( + contrib/libs/llvm16 + contrib/libs/llvm16/include + contrib/libs/llvm16/lib/Analysis + contrib/libs/llvm16/lib/IR + contrib/libs/llvm16/lib/Support + contrib/libs/llvm16/lib/TargetParser +) + +ADDINCL( + contrib/libs/llvm16/lib/Transforms/Utils +) + +NO_COMPILER_WARNINGS() + +NO_UTIL() + +SRCS( + AMDGPUEmitPrintf.cpp + ASanStackFrameLayout.cpp + AddDiscriminators.cpp + AssumeBundleBuilder.cpp + BasicBlockUtils.cpp + BreakCriticalEdges.cpp + BuildLibCalls.cpp + BypassSlowDivision.cpp + CallGraphUpdater.cpp + CallPromotionUtils.cpp + CanonicalizeAliases.cpp + CanonicalizeFreezeInLoops.cpp + CloneFunction.cpp + CloneModule.cpp + CodeExtractor.cpp + CodeLayout.cpp + CodeMoverUtils.cpp + CtorUtils.cpp + Debugify.cpp + DemoteRegToStack.cpp + EntryExitInstrumenter.cpp + EscapeEnumerator.cpp + Evaluator.cpp + FixIrreducible.cpp + FlattenCFG.cpp + FunctionComparator.cpp + FunctionImportUtils.cpp + GlobalStatus.cpp + GuardUtils.cpp + HelloWorld.cpp + InjectTLIMappings.cpp + InlineFunction.cpp + InstructionNamer.cpp + IntegerDivision.cpp + LCSSA.cpp + LibCallsShrinkWrap.cpp + Local.cpp + LoopPeel.cpp + LoopRotationUtils.cpp + LoopSimplify.cpp + LoopUnroll.cpp + LoopUnrollAndJam.cpp + LoopUnrollRuntime.cpp + LoopUtils.cpp + LoopVersioning.cpp + LowerAtomic.cpp + LowerGlobalDtors.cpp + LowerIFunc.cpp + LowerInvoke.cpp + LowerMemIntrinsics.cpp + LowerSwitch.cpp + MatrixUtils.cpp + Mem2Reg.cpp + MemoryOpRemark.cpp + MemoryTaggingSupport.cpp + MetaRenamer.cpp + MisExpect.cpp + ModuleUtils.cpp + NameAnonGlobals.cpp + PredicateInfo.cpp + PromoteMemoryToRegister.cpp + RelLookupTableConverter.cpp + SCCPSolver.cpp + SSAUpdater.cpp + SSAUpdaterBulk.cpp + SampleProfileInference.cpp + SampleProfileLoaderBaseUtil.cpp + SanitizerStats.cpp + ScalarEvolutionExpander.cpp + SimplifyCFG.cpp + SimplifyIndVar.cpp + SimplifyLibCalls.cpp + SizeOpts.cpp + SplitModule.cpp + StripGCRelocates.cpp + StripNonLineTableDebugInfo.cpp + SymbolRewriter.cpp + UnifyFunctionExitNodes.cpp + UnifyLoopExits.cpp + Utils.cpp + VNCoercion.cpp + ValueMapper.cpp +) + +END() |