aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/llvm16/lib/Transforms/Utils
diff options
context:
space:
mode:
authorvitalyisaev <vitalyisaev@yandex-team.com>2023-06-29 10:00:50 +0300
committervitalyisaev <vitalyisaev@yandex-team.com>2023-06-29 10:00:50 +0300
commit6ffe9e53658409f212834330e13564e4952558f6 (patch)
tree85b1e00183517648b228aafa7c8fb07f5276f419 /contrib/libs/llvm16/lib/Transforms/Utils
parent726057070f9c5a91fc10fde0d5024913d10f1ab9 (diff)
downloadydb-6ffe9e53658409f212834330e13564e4952558f6.tar.gz
YQ Connector: support managed ClickHouse
Со стороны dqrun можно обратиться к инстансу коннектора, который работает на streaming стенде, и извлечь данные из облачного CH.
Diffstat (limited to 'contrib/libs/llvm16/lib/Transforms/Utils')
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp232
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/ASanStackFrameLayout.cpp151
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/AddDiscriminators.cpp283
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/AssumeBundleBuilder.cpp651
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/BasicBlockUtils.cpp1999
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/BreakCriticalEdges.cpp465
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/BuildLibCalls.cpp1939
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/BypassSlowDivision.cpp480
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/CallGraphUpdater.cpp170
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/CallPromotionUtils.cpp620
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/CanonicalizeAliases.cpp76
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp248
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/CloneFunction.cpp1194
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/CloneModule.cpp218
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/CodeExtractor.cpp1894
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/CodeLayout.cpp1014
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/CodeMoverUtils.cpp478
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/CtorUtils.cpp154
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/Debugify.cpp1085
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/DemoteRegToStack.cpp172
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/EntryExitInstrumenter.cpp152
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/EscapeEnumerator.cpp98
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/Evaluator.cpp688
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/FixIrreducible.cpp359
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/FlattenCFG.cpp548
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/FunctionComparator.cpp991
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/FunctionImportUtils.cpp361
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/GlobalStatus.cpp195
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/GuardUtils.cpp126
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/HelloWorld.cpp17
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/InjectTLIMappings.cpp176
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/InlineFunction.cpp2915
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/InstructionNamer.cpp78
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/IntegerDivision.cpp639
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/LCSSA.cpp519
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/LibCallsShrinkWrap.cpp562
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/Local.cpp3518
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/LoopPeel.cpp1040
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/LoopRotationUtils.cpp845
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/LoopSimplify.cpp921
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/LoopUnroll.cpp908
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/LoopUnrollAndJam.cpp999
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/LoopUnrollRuntime.cpp1008
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/LoopUtils.cpp1877
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/LoopVersioning.cpp356
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/LowerAtomic.cpp114
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/LowerGlobalDtors.cpp221
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/LowerIFunc.cpp27
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/LowerInvoke.cpp95
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/LowerMemIntrinsics.cpp605
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/LowerSwitch.cpp611
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/MatrixUtils.cpp104
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/Mem2Reg.cpp116
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/MemoryOpRemark.cpp410
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/MemoryTaggingSupport.cpp219
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/MetaRenamer.cpp251
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/MisExpect.cpp214
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/ModuleUtils.cpp475
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/NameAnonGlobals.cpp90
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/PredicateInfo.cpp948
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/PromoteMemoryToRegister.cpp1111
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/RelLookupTableConverter.cpp221
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/SCCPSolver.cpp1922
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/SSAUpdater.cpp482
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/SSAUpdaterBulk.cpp184
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/SampleProfileInference.cpp1347
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp185
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/SanitizerStats.cpp106
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/ScalarEvolutionExpander.cpp2678
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/SimplifyCFG.cpp7341
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/SimplifyIndVar.cpp2089
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/SimplifyLibCalls.cpp4081
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/SizeOpts.cpp111
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/SplitModule.cpp287
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/StripGCRelocates.cpp86
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp51
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/SymbolRewriter.cpp586
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp129
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/UnifyLoopExits.cpp254
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/Utils.cpp65
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/VNCoercion.cpp593
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/ValueMapper.cpp1209
-rw-r--r--contrib/libs/llvm16/lib/Transforms/Utils/ya.make111
83 files changed, 63148 insertions, 0 deletions
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
new file mode 100644
index 0000000000..24972db404
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
@@ -0,0 +1,232 @@
+//===- AMDGPUEmitPrintf.cpp -----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Utility function to lower a printf call into a series of device
+// library calls on the AMDGPU target.
+//
+// WARNING: This file knows about certain library functions. It recognizes them
+// by name, and hardwires knowledge of their semantics.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/AMDGPUEmitPrintf.h"
+#include "llvm/ADT/SparseBitVector.h"
+#include "llvm/Analysis/ValueTracking.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "amdgpu-emit-printf"
+
+static Value *fitArgInto64Bits(IRBuilder<> &Builder, Value *Arg) {
+ auto Int64Ty = Builder.getInt64Ty();
+ auto Ty = Arg->getType();
+
+ if (auto IntTy = dyn_cast<IntegerType>(Ty)) {
+ switch (IntTy->getBitWidth()) {
+ case 32:
+ return Builder.CreateZExt(Arg, Int64Ty);
+ case 64:
+ return Arg;
+ }
+ }
+
+ if (Ty->getTypeID() == Type::DoubleTyID) {
+ return Builder.CreateBitCast(Arg, Int64Ty);
+ }
+
+ if (isa<PointerType>(Ty)) {
+ return Builder.CreatePtrToInt(Arg, Int64Ty);
+ }
+
+ llvm_unreachable("unexpected type");
+}
+
+static Value *callPrintfBegin(IRBuilder<> &Builder, Value *Version) {
+ auto Int64Ty = Builder.getInt64Ty();
+ auto M = Builder.GetInsertBlock()->getModule();
+ auto Fn = M->getOrInsertFunction("__ockl_printf_begin", Int64Ty, Int64Ty);
+ return Builder.CreateCall(Fn, Version);
+}
+
+static Value *callAppendArgs(IRBuilder<> &Builder, Value *Desc, int NumArgs,
+ Value *Arg0, Value *Arg1, Value *Arg2, Value *Arg3,
+ Value *Arg4, Value *Arg5, Value *Arg6,
+ bool IsLast) {
+ auto Int64Ty = Builder.getInt64Ty();
+ auto Int32Ty = Builder.getInt32Ty();
+ auto M = Builder.GetInsertBlock()->getModule();
+ auto Fn = M->getOrInsertFunction("__ockl_printf_append_args", Int64Ty,
+ Int64Ty, Int32Ty, Int64Ty, Int64Ty, Int64Ty,
+ Int64Ty, Int64Ty, Int64Ty, Int64Ty, Int32Ty);
+ auto IsLastValue = Builder.getInt32(IsLast);
+ auto NumArgsValue = Builder.getInt32(NumArgs);
+ return Builder.CreateCall(Fn, {Desc, NumArgsValue, Arg0, Arg1, Arg2, Arg3,
+ Arg4, Arg5, Arg6, IsLastValue});
+}
+
+static Value *appendArg(IRBuilder<> &Builder, Value *Desc, Value *Arg,
+ bool IsLast) {
+ auto Arg0 = fitArgInto64Bits(Builder, Arg);
+ auto Zero = Builder.getInt64(0);
+ return callAppendArgs(Builder, Desc, 1, Arg0, Zero, Zero, Zero, Zero, Zero,
+ Zero, IsLast);
+}
+
+// The device library does not provide strlen, so we build our own loop
+// here. While we are at it, we also include the terminating null in the length.
+static Value *getStrlenWithNull(IRBuilder<> &Builder, Value *Str) {
+ auto *Prev = Builder.GetInsertBlock();
+ Module *M = Prev->getModule();
+
+ auto CharZero = Builder.getInt8(0);
+ auto One = Builder.getInt64(1);
+ auto Zero = Builder.getInt64(0);
+ auto Int64Ty = Builder.getInt64Ty();
+
+ // The length is either zero for a null pointer, or the computed value for an
+ // actual string. We need a join block for a phi that represents the final
+ // value.
+ //
+ // Strictly speaking, the zero does not matter since
+ // __ockl_printf_append_string_n ignores the length if the pointer is null.
+ BasicBlock *Join = nullptr;
+ if (Prev->getTerminator()) {
+ Join = Prev->splitBasicBlock(Builder.GetInsertPoint(),
+ "strlen.join");
+ Prev->getTerminator()->eraseFromParent();
+ } else {
+ Join = BasicBlock::Create(M->getContext(), "strlen.join",
+ Prev->getParent());
+ }
+ BasicBlock *While =
+ BasicBlock::Create(M->getContext(), "strlen.while",
+ Prev->getParent(), Join);
+ BasicBlock *WhileDone = BasicBlock::Create(
+ M->getContext(), "strlen.while.done",
+ Prev->getParent(), Join);
+
+ // Emit an early return for when the pointer is null.
+ Builder.SetInsertPoint(Prev);
+ auto CmpNull =
+ Builder.CreateICmpEQ(Str, Constant::getNullValue(Str->getType()));
+ BranchInst::Create(Join, While, CmpNull, Prev);
+
+ // Entry to the while loop.
+ Builder.SetInsertPoint(While);
+
+ auto PtrPhi = Builder.CreatePHI(Str->getType(), 2);
+ PtrPhi->addIncoming(Str, Prev);
+ auto PtrNext = Builder.CreateGEP(Builder.getInt8Ty(), PtrPhi, One);
+ PtrPhi->addIncoming(PtrNext, While);
+
+ // Condition for the while loop.
+ auto Data = Builder.CreateLoad(Builder.getInt8Ty(), PtrPhi);
+ auto Cmp = Builder.CreateICmpEQ(Data, CharZero);
+ Builder.CreateCondBr(Cmp, WhileDone, While);
+
+ // Add one to the computed length.
+ Builder.SetInsertPoint(WhileDone, WhileDone->begin());
+ auto Begin = Builder.CreatePtrToInt(Str, Int64Ty);
+ auto End = Builder.CreatePtrToInt(PtrPhi, Int64Ty);
+ auto Len = Builder.CreateSub(End, Begin);
+ Len = Builder.CreateAdd(Len, One);
+
+ // Final join.
+ BranchInst::Create(Join, WhileDone);
+ Builder.SetInsertPoint(Join, Join->begin());
+ auto LenPhi = Builder.CreatePHI(Len->getType(), 2);
+ LenPhi->addIncoming(Len, WhileDone);
+ LenPhi->addIncoming(Zero, Prev);
+
+ return LenPhi;
+}
+
+static Value *callAppendStringN(IRBuilder<> &Builder, Value *Desc, Value *Str,
+ Value *Length, bool isLast) {
+ auto Int64Ty = Builder.getInt64Ty();
+ auto CharPtrTy = Builder.getInt8PtrTy();
+ auto Int32Ty = Builder.getInt32Ty();
+ auto M = Builder.GetInsertBlock()->getModule();
+ auto Fn = M->getOrInsertFunction("__ockl_printf_append_string_n", Int64Ty,
+ Int64Ty, CharPtrTy, Int64Ty, Int32Ty);
+ auto IsLastInt32 = Builder.getInt32(isLast);
+ return Builder.CreateCall(Fn, {Desc, Str, Length, IsLastInt32});
+}
+
+static Value *appendString(IRBuilder<> &Builder, Value *Desc, Value *Arg,
+ bool IsLast) {
+ Arg = Builder.CreateBitCast(
+ Arg, Builder.getInt8PtrTy(Arg->getType()->getPointerAddressSpace()));
+ auto Length = getStrlenWithNull(Builder, Arg);
+ return callAppendStringN(Builder, Desc, Arg, Length, IsLast);
+}
+
+static Value *processArg(IRBuilder<> &Builder, Value *Desc, Value *Arg,
+ bool SpecIsCString, bool IsLast) {
+ if (SpecIsCString && isa<PointerType>(Arg->getType())) {
+ return appendString(Builder, Desc, Arg, IsLast);
+ }
+ // If the format specifies a string but the argument is not, the frontend will
+ // have printed a warning. We just rely on undefined behaviour and send the
+ // argument anyway.
+ return appendArg(Builder, Desc, Arg, IsLast);
+}
+
+// Scan the format string to locate all specifiers, and mark the ones that
+// specify a string, i.e, the "%s" specifier with optional '*' characters.
+static void locateCStrings(SparseBitVector<8> &BV, Value *Fmt) {
+ StringRef Str;
+ if (!getConstantStringInfo(Fmt, Str) || Str.empty())
+ return;
+
+ static const char ConvSpecifiers[] = "diouxXfFeEgGaAcspn";
+ size_t SpecPos = 0;
+ // Skip the first argument, the format string.
+ unsigned ArgIdx = 1;
+
+ while ((SpecPos = Str.find_first_of('%', SpecPos)) != StringRef::npos) {
+ if (Str[SpecPos + 1] == '%') {
+ SpecPos += 2;
+ continue;
+ }
+ auto SpecEnd = Str.find_first_of(ConvSpecifiers, SpecPos);
+ if (SpecEnd == StringRef::npos)
+ return;
+ auto Spec = Str.slice(SpecPos, SpecEnd + 1);
+ ArgIdx += Spec.count('*');
+ if (Str[SpecEnd] == 's') {
+ BV.set(ArgIdx);
+ }
+ SpecPos = SpecEnd + 1;
+ ++ArgIdx;
+ }
+}
+
+Value *llvm::emitAMDGPUPrintfCall(IRBuilder<> &Builder,
+ ArrayRef<Value *> Args) {
+ auto NumOps = Args.size();
+ assert(NumOps >= 1);
+
+ auto Fmt = Args[0];
+ SparseBitVector<8> SpecIsCString;
+ locateCStrings(SpecIsCString, Fmt);
+
+ auto Desc = callPrintfBegin(Builder, Builder.getIntN(64, 0));
+ Desc = appendString(Builder, Desc, Fmt, NumOps == 1);
+
+ // FIXME: This invokes hostcall once for each argument. We can pack up to
+ // seven scalar printf arguments in a single hostcall. See the signature of
+ // callAppendArgs().
+ for (unsigned int i = 1; i != NumOps; ++i) {
+ bool IsLast = i == NumOps - 1;
+ bool IsCString = SpecIsCString.test(i);
+ Desc = processArg(Builder, Desc, Args[i], IsCString, IsLast);
+ }
+
+ return Builder.CreateTrunc(Desc, Builder.getInt32Ty());
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/ASanStackFrameLayout.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/ASanStackFrameLayout.cpp
new file mode 100644
index 0000000000..0318429a76
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/ASanStackFrameLayout.cpp
@@ -0,0 +1,151 @@
+//===-- ASanStackFrameLayout.cpp - helper for AddressSanitizer ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Definition of ComputeASanStackFrameLayout (see ASanStackFrameLayout.h).
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Utils/ASanStackFrameLayout.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/ScopedPrinter.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+
+namespace llvm {
+
+// We sort the stack variables by alignment (largest first) to minimize
+// unnecessary large gaps due to alignment.
+// It is tempting to also sort variables by size so that larger variables
+// have larger redzones at both ends. But reordering will make report analysis
+// harder, especially when temporary unnamed variables are present.
+// So, until we can provide more information (type, line number, etc)
+// for the stack variables we avoid reordering them too much.
+static inline bool CompareVars(const ASanStackVariableDescription &a,
+ const ASanStackVariableDescription &b) {
+ return a.Alignment > b.Alignment;
+}
+
+// We also force minimal alignment for all vars to kMinAlignment so that vars
+// with e.g. alignment 1 and alignment 16 do not get reordered by CompareVars.
+static const uint64_t kMinAlignment = 16;
+
+// We want to add a full redzone after every variable.
+// The larger the variable Size the larger is the redzone.
+// The resulting frame size is a multiple of Alignment.
+static uint64_t VarAndRedzoneSize(uint64_t Size, uint64_t Granularity,
+ uint64_t Alignment) {
+ uint64_t Res = 0;
+ if (Size <= 4) Res = 16;
+ else if (Size <= 16) Res = 32;
+ else if (Size <= 128) Res = Size + 32;
+ else if (Size <= 512) Res = Size + 64;
+ else if (Size <= 4096) Res = Size + 128;
+ else Res = Size + 256;
+ return alignTo(std::max(Res, 2 * Granularity), Alignment);
+}
+
+ASanStackFrameLayout
+ComputeASanStackFrameLayout(SmallVectorImpl<ASanStackVariableDescription> &Vars,
+ uint64_t Granularity, uint64_t MinHeaderSize) {
+ assert(Granularity >= 8 && Granularity <= 64 &&
+ (Granularity & (Granularity - 1)) == 0);
+ assert(MinHeaderSize >= 16 && (MinHeaderSize & (MinHeaderSize - 1)) == 0 &&
+ MinHeaderSize >= Granularity);
+ const size_t NumVars = Vars.size();
+ assert(NumVars > 0);
+ for (size_t i = 0; i < NumVars; i++)
+ Vars[i].Alignment = std::max(Vars[i].Alignment, kMinAlignment);
+
+ llvm::stable_sort(Vars, CompareVars);
+
+ ASanStackFrameLayout Layout;
+ Layout.Granularity = Granularity;
+ Layout.FrameAlignment = std::max(Granularity, Vars[0].Alignment);
+ uint64_t Offset =
+ std::max(std::max(MinHeaderSize, Granularity), Vars[0].Alignment);
+ assert((Offset % Granularity) == 0);
+ for (size_t i = 0; i < NumVars; i++) {
+ bool IsLast = i == NumVars - 1;
+ uint64_t Alignment = std::max(Granularity, Vars[i].Alignment);
+ (void)Alignment; // Used only in asserts.
+ uint64_t Size = Vars[i].Size;
+ assert((Alignment & (Alignment - 1)) == 0);
+ assert(Layout.FrameAlignment >= Alignment);
+ assert((Offset % Alignment) == 0);
+ assert(Size > 0);
+ uint64_t NextAlignment =
+ IsLast ? Granularity : std::max(Granularity, Vars[i + 1].Alignment);
+ uint64_t SizeWithRedzone =
+ VarAndRedzoneSize(Size, Granularity, NextAlignment);
+ Vars[i].Offset = Offset;
+ Offset += SizeWithRedzone;
+ }
+ if (Offset % MinHeaderSize) {
+ Offset += MinHeaderSize - (Offset % MinHeaderSize);
+ }
+ Layout.FrameSize = Offset;
+ assert((Layout.FrameSize % MinHeaderSize) == 0);
+ return Layout;
+}
+
+SmallString<64> ComputeASanStackFrameDescription(
+ const SmallVectorImpl<ASanStackVariableDescription> &Vars) {
+ SmallString<2048> StackDescriptionStorage;
+ raw_svector_ostream StackDescription(StackDescriptionStorage);
+ StackDescription << Vars.size();
+
+ for (const auto &Var : Vars) {
+ std::string Name = Var.Name;
+ if (Var.Line) {
+ Name += ":";
+ Name += to_string(Var.Line);
+ }
+ StackDescription << " " << Var.Offset << " " << Var.Size << " "
+ << Name.size() << " " << Name;
+ }
+ return StackDescription.str();
+}
+
+SmallVector<uint8_t, 64>
+GetShadowBytes(const SmallVectorImpl<ASanStackVariableDescription> &Vars,
+ const ASanStackFrameLayout &Layout) {
+ assert(Vars.size() > 0);
+ SmallVector<uint8_t, 64> SB;
+ SB.clear();
+ const uint64_t Granularity = Layout.Granularity;
+ SB.resize(Vars[0].Offset / Granularity, kAsanStackLeftRedzoneMagic);
+ for (const auto &Var : Vars) {
+ SB.resize(Var.Offset / Granularity, kAsanStackMidRedzoneMagic);
+
+ SB.resize(SB.size() + Var.Size / Granularity, 0);
+ if (Var.Size % Granularity)
+ SB.push_back(Var.Size % Granularity);
+ }
+ SB.resize(Layout.FrameSize / Granularity, kAsanStackRightRedzoneMagic);
+ return SB;
+}
+
+SmallVector<uint8_t, 64> GetShadowBytesAfterScope(
+ const SmallVectorImpl<ASanStackVariableDescription> &Vars,
+ const ASanStackFrameLayout &Layout) {
+ SmallVector<uint8_t, 64> SB = GetShadowBytes(Vars, Layout);
+ const uint64_t Granularity = Layout.Granularity;
+
+ for (const auto &Var : Vars) {
+ assert(Var.LifetimeSize <= Var.Size);
+ const uint64_t LifetimeShadowSize =
+ (Var.LifetimeSize + Granularity - 1) / Granularity;
+ const uint64_t Offset = Var.Offset / Granularity;
+ std::fill(SB.begin() + Offset, SB.begin() + Offset + LifetimeShadowSize,
+ kAsanStackUseAfterScopeMagic);
+ }
+
+ return SB;
+}
+
+} // llvm namespace
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/AddDiscriminators.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/AddDiscriminators.cpp
new file mode 100644
index 0000000000..56acdcc0bc
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/AddDiscriminators.cpp
@@ -0,0 +1,283 @@
+//===- AddDiscriminators.cpp - Insert DWARF path discriminators -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file adds DWARF discriminators to the IR. Path discriminators are
+// used to decide what CFG path was taken inside sub-graphs whose instructions
+// share the same line and column number information.
+//
+// The main user of this is the sample profiler. Instruction samples are
+// mapped to line number information. Since a single line may be spread
+// out over several basic blocks, discriminators add more precise location
+// for the samples.
+//
+// For example,
+//
+// 1 #define ASSERT(P)
+// 2 if (!(P))
+// 3 abort()
+// ...
+// 100 while (true) {
+// 101 ASSERT (sum < 0);
+// 102 ...
+// 130 }
+//
+// when converted to IR, this snippet looks something like:
+//
+// while.body: ; preds = %entry, %if.end
+// %0 = load i32* %sum, align 4, !dbg !15
+// %cmp = icmp slt i32 %0, 0, !dbg !15
+// br i1 %cmp, label %if.end, label %if.then, !dbg !15
+//
+// if.then: ; preds = %while.body
+// call void @abort(), !dbg !15
+// br label %if.end, !dbg !15
+//
+// Notice that all the instructions in blocks 'while.body' and 'if.then'
+// have exactly the same debug information. When this program is sampled
+// at runtime, the profiler will assume that all these instructions are
+// equally frequent. This, in turn, will consider the edge while.body->if.then
+// to be frequently taken (which is incorrect).
+//
+// By adding a discriminator value to the instructions in block 'if.then',
+// we can distinguish instructions at line 101 with discriminator 0 from
+// the instructions at line 101 with discriminator 1.
+//
+// For more details about DWARF discriminators, please visit
+// http://wiki.dwarfstd.org/index.php?title=Path_Discriminators
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/AddDiscriminators.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h"
+#include <utility>
+
+using namespace llvm;
+using namespace sampleprofutil;
+
+#define DEBUG_TYPE "add-discriminators"
+
+// Command line option to disable discriminator generation even in the
+// presence of debug information. This is only needed when debugging
+// debug info generation issues.
+static cl::opt<bool> NoDiscriminators(
+ "no-discriminators", cl::init(false),
+ cl::desc("Disable generation of discriminator information."));
+
+namespace {
+
+// The legacy pass of AddDiscriminators.
+struct AddDiscriminatorsLegacyPass : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+
+ AddDiscriminatorsLegacyPass() : FunctionPass(ID) {
+ initializeAddDiscriminatorsLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override;
+};
+
+} // end anonymous namespace
+
+char AddDiscriminatorsLegacyPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(AddDiscriminatorsLegacyPass, "add-discriminators",
+ "Add DWARF path discriminators", false, false)
+INITIALIZE_PASS_END(AddDiscriminatorsLegacyPass, "add-discriminators",
+ "Add DWARF path discriminators", false, false)
+
+// Create the legacy AddDiscriminatorsPass.
+FunctionPass *llvm::createAddDiscriminatorsPass() {
+ return new AddDiscriminatorsLegacyPass();
+}
+
+static bool shouldHaveDiscriminator(const Instruction *I) {
+ return !isa<IntrinsicInst>(I) || isa<MemIntrinsic>(I);
+}
+
+/// Assign DWARF discriminators.
+///
+/// To assign discriminators, we examine the boundaries of every
+/// basic block and its successors. Suppose there is a basic block B1
+/// with successor B2. The last instruction I1 in B1 and the first
+/// instruction I2 in B2 are located at the same file and line number.
+/// This situation is illustrated in the following code snippet:
+///
+/// if (i < 10) x = i;
+///
+/// entry:
+/// br i1 %cmp, label %if.then, label %if.end, !dbg !10
+/// if.then:
+/// %1 = load i32* %i.addr, align 4, !dbg !10
+/// store i32 %1, i32* %x, align 4, !dbg !10
+/// br label %if.end, !dbg !10
+/// if.end:
+/// ret void, !dbg !12
+///
+/// Notice how the branch instruction in block 'entry' and all the
+/// instructions in block 'if.then' have the exact same debug location
+/// information (!dbg !10).
+///
+/// To distinguish instructions in block 'entry' from instructions in
+/// block 'if.then', we generate a new lexical block for all the
+/// instruction in block 'if.then' that share the same file and line
+/// location with the last instruction of block 'entry'.
+///
+/// This new lexical block will have the same location information as
+/// the previous one, but with a new DWARF discriminator value.
+///
+/// One of the main uses of this discriminator value is in runtime
+/// sample profilers. It allows the profiler to distinguish instructions
+/// at location !dbg !10 that execute on different basic blocks. This is
+/// important because while the predicate 'if (x < 10)' may have been
+/// executed millions of times, the assignment 'x = i' may have only
+/// executed a handful of times (meaning that the entry->if.then edge is
+/// seldom taken).
+///
+/// If we did not have discriminator information, the profiler would
+/// assign the same weight to both blocks 'entry' and 'if.then', which
+/// in turn will make it conclude that the entry->if.then edge is very
+/// hot.
+///
+/// To decide where to create new discriminator values, this function
+/// traverses the CFG and examines instruction at basic block boundaries.
+/// If the last instruction I1 of a block B1 is at the same file and line
+/// location as instruction I2 of successor B2, then it creates a new
+/// lexical block for I2 and all the instruction in B2 that share the same
+/// file and line location as I2. This new lexical block will have a
+/// different discriminator number than I1.
+static bool addDiscriminators(Function &F) {
+ // If the function has debug information, but the user has disabled
+ // discriminators, do nothing.
+ // Simlarly, if the function has no debug info, do nothing.
+ if (NoDiscriminators || !F.getSubprogram())
+ return false;
+
+ // Create FSDiscriminatorVariable if flow sensitive discriminators are used.
+ if (EnableFSDiscriminator)
+ createFSDiscriminatorVariable(F.getParent());
+
+ bool Changed = false;
+
+ using Location = std::pair<StringRef, unsigned>;
+ using BBSet = DenseSet<const BasicBlock *>;
+ using LocationBBMap = DenseMap<Location, BBSet>;
+ using LocationDiscriminatorMap = DenseMap<Location, unsigned>;
+ using LocationSet = DenseSet<Location>;
+
+ LocationBBMap LBM;
+ LocationDiscriminatorMap LDM;
+
+ // Traverse all instructions in the function. If the source line location
+ // of the instruction appears in other basic block, assign a new
+ // discriminator for this instruction.
+ for (BasicBlock &B : F) {
+ for (auto &I : B) {
+ // Not all intrinsic calls should have a discriminator.
+ // We want to avoid a non-deterministic assignment of discriminators at
+ // different debug levels. We still allow discriminators on memory
+ // intrinsic calls because those can be early expanded by SROA into
+ // pairs of loads and stores, and the expanded load/store instructions
+ // should have a valid discriminator.
+ if (!shouldHaveDiscriminator(&I))
+ continue;
+ const DILocation *DIL = I.getDebugLoc();
+ if (!DIL)
+ continue;
+ Location L = std::make_pair(DIL->getFilename(), DIL->getLine());
+ auto &BBMap = LBM[L];
+ auto R = BBMap.insert(&B);
+ if (BBMap.size() == 1)
+ continue;
+ // If we could insert more than one block with the same line+file, a
+ // discriminator is needed to distinguish both instructions.
+ // Only the lowest 7 bits are used to represent a discriminator to fit
+ // it in 1 byte ULEB128 representation.
+ unsigned Discriminator = R.second ? ++LDM[L] : LDM[L];
+ auto NewDIL = DIL->cloneWithBaseDiscriminator(Discriminator);
+ if (!NewDIL) {
+ LLVM_DEBUG(dbgs() << "Could not encode discriminator: "
+ << DIL->getFilename() << ":" << DIL->getLine() << ":"
+ << DIL->getColumn() << ":" << Discriminator << " "
+ << I << "\n");
+ } else {
+ I.setDebugLoc(*NewDIL);
+ LLVM_DEBUG(dbgs() << DIL->getFilename() << ":" << DIL->getLine() << ":"
+ << DIL->getColumn() << ":" << Discriminator << " " << I
+ << "\n");
+ }
+ Changed = true;
+ }
+ }
+
+ // Traverse all instructions and assign new discriminators to call
+ // instructions with the same lineno that are in the same basic block.
+ // Sample base profile needs to distinguish different function calls within
+ // a same source line for correct profile annotation.
+ for (BasicBlock &B : F) {
+ LocationSet CallLocations;
+ for (auto &I : B) {
+ // We bypass intrinsic calls for the following two reasons:
+ // 1) We want to avoid a non-deterministic assignment of
+ // discriminators.
+ // 2) We want to minimize the number of base discriminators used.
+ if (!isa<InvokeInst>(I) && (!isa<CallInst>(I) || isa<IntrinsicInst>(I)))
+ continue;
+
+ DILocation *CurrentDIL = I.getDebugLoc();
+ if (!CurrentDIL)
+ continue;
+ Location L =
+ std::make_pair(CurrentDIL->getFilename(), CurrentDIL->getLine());
+ if (!CallLocations.insert(L).second) {
+ unsigned Discriminator = ++LDM[L];
+ auto NewDIL = CurrentDIL->cloneWithBaseDiscriminator(Discriminator);
+ if (!NewDIL) {
+ LLVM_DEBUG(dbgs()
+ << "Could not encode discriminator: "
+ << CurrentDIL->getFilename() << ":"
+ << CurrentDIL->getLine() << ":" << CurrentDIL->getColumn()
+ << ":" << Discriminator << " " << I << "\n");
+ } else {
+ I.setDebugLoc(*NewDIL);
+ Changed = true;
+ }
+ }
+ }
+ }
+ return Changed;
+}
+
+bool AddDiscriminatorsLegacyPass::runOnFunction(Function &F) {
+ return addDiscriminators(F);
+}
+
+PreservedAnalyses AddDiscriminatorsPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ if (!addDiscriminators(F))
+ return PreservedAnalyses::all();
+
+ // FIXME: should be all()
+ return PreservedAnalyses::none();
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/AssumeBundleBuilder.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/AssumeBundleBuilder.cpp
new file mode 100644
index 0000000000..d17c399ba7
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/AssumeBundleBuilder.cpp
@@ -0,0 +1,651 @@
+//===- AssumeBundleBuilder.cpp - tools to preserve informations -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumeBundleQueries.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/DebugCounter.h"
+#include "llvm/Transforms/Utils/Local.h"
+
+using namespace llvm;
+
+namespace llvm {
+cl::opt<bool> ShouldPreserveAllAttributes(
+ "assume-preserve-all", cl::init(false), cl::Hidden,
+ cl::desc("enable preservation of all attrbitues. even those that are "
+ "unlikely to be usefull"));
+
+cl::opt<bool> EnableKnowledgeRetention(
+ "enable-knowledge-retention", cl::init(false), cl::Hidden,
+ cl::desc(
+ "enable preservation of attributes throughout code transformation"));
+} // namespace llvm
+
+#define DEBUG_TYPE "assume-builder"
+
+STATISTIC(NumAssumeBuilt, "Number of assume built by the assume builder");
+STATISTIC(NumBundlesInAssumes, "Total number of Bundles in the assume built");
+STATISTIC(NumAssumesMerged,
+ "Number of assume merged by the assume simplify pass");
+STATISTIC(NumAssumesRemoved,
+ "Number of assume removed by the assume simplify pass");
+
+DEBUG_COUNTER(BuildAssumeCounter, "assume-builder-counter",
+ "Controls which assumes gets created");
+
+namespace {
+
+bool isUsefullToPreserve(Attribute::AttrKind Kind) {
+ switch (Kind) {
+ case Attribute::NonNull:
+ case Attribute::NoUndef:
+ case Attribute::Alignment:
+ case Attribute::Dereferenceable:
+ case Attribute::DereferenceableOrNull:
+ case Attribute::Cold:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/// This function will try to transform the given knowledge into a more
+/// canonical one. the canonical knowledge maybe the given one.
+RetainedKnowledge canonicalizedKnowledge(RetainedKnowledge RK,
+ const DataLayout &DL) {
+ switch (RK.AttrKind) {
+ default:
+ return RK;
+ case Attribute::NonNull:
+ RK.WasOn = getUnderlyingObject(RK.WasOn);
+ return RK;
+ case Attribute::Alignment: {
+ Value *V = RK.WasOn->stripInBoundsOffsets([&](const Value *Strip) {
+ if (auto *GEP = dyn_cast<GEPOperator>(Strip))
+ RK.ArgValue =
+ MinAlign(RK.ArgValue, GEP->getMaxPreservedAlignment(DL).value());
+ });
+ RK.WasOn = V;
+ return RK;
+ }
+ case Attribute::Dereferenceable:
+ case Attribute::DereferenceableOrNull: {
+ int64_t Offset = 0;
+ Value *V = GetPointerBaseWithConstantOffset(RK.WasOn, Offset, DL,
+ /*AllowNonInBounds*/ false);
+ if (Offset < 0)
+ return RK;
+ RK.ArgValue = RK.ArgValue + Offset;
+ RK.WasOn = V;
+ }
+ }
+ return RK;
+}
+
+/// This class contain all knowledge that have been gather while building an
+/// llvm.assume and the function to manipulate it.
+struct AssumeBuilderState {
+ Module *M;
+
+ using MapKey = std::pair<Value *, Attribute::AttrKind>;
+ SmallMapVector<MapKey, uint64_t, 8> AssumedKnowledgeMap;
+ Instruction *InstBeingModified = nullptr;
+ AssumptionCache* AC = nullptr;
+ DominatorTree* DT = nullptr;
+
+ AssumeBuilderState(Module *M, Instruction *I = nullptr,
+ AssumptionCache *AC = nullptr, DominatorTree *DT = nullptr)
+ : M(M), InstBeingModified(I), AC(AC), DT(DT) {}
+
+ bool tryToPreserveWithoutAddingAssume(RetainedKnowledge RK) {
+ if (!InstBeingModified || !RK.WasOn)
+ return false;
+ bool HasBeenPreserved = false;
+ Use* ToUpdate = nullptr;
+ getKnowledgeForValue(
+ RK.WasOn, {RK.AttrKind}, AC,
+ [&](RetainedKnowledge RKOther, Instruction *Assume,
+ const CallInst::BundleOpInfo *Bundle) {
+ if (!isValidAssumeForContext(Assume, InstBeingModified, DT))
+ return false;
+ if (RKOther.ArgValue >= RK.ArgValue) {
+ HasBeenPreserved = true;
+ return true;
+ } else if (isValidAssumeForContext(InstBeingModified, Assume, DT)) {
+ HasBeenPreserved = true;
+ IntrinsicInst *Intr = cast<IntrinsicInst>(Assume);
+ ToUpdate = &Intr->op_begin()[Bundle->Begin + ABA_Argument];
+ return true;
+ }
+ return false;
+ });
+ if (ToUpdate)
+ ToUpdate->set(
+ ConstantInt::get(Type::getInt64Ty(M->getContext()), RK.ArgValue));
+ return HasBeenPreserved;
+ }
+
+ bool isKnowledgeWorthPreserving(RetainedKnowledge RK) {
+ if (!RK)
+ return false;
+ if (!RK.WasOn)
+ return true;
+ if (RK.WasOn->getType()->isPointerTy()) {
+ Value *UnderlyingPtr = getUnderlyingObject(RK.WasOn);
+ if (isa<AllocaInst>(UnderlyingPtr) || isa<GlobalValue>(UnderlyingPtr))
+ return false;
+ }
+ if (auto *Arg = dyn_cast<Argument>(RK.WasOn)) {
+ if (Arg->hasAttribute(RK.AttrKind) &&
+ (!Attribute::isIntAttrKind(RK.AttrKind) ||
+ Arg->getAttribute(RK.AttrKind).getValueAsInt() >= RK.ArgValue))
+ return false;
+ return true;
+ }
+ if (auto *Inst = dyn_cast<Instruction>(RK.WasOn))
+ if (wouldInstructionBeTriviallyDead(Inst)) {
+ if (RK.WasOn->use_empty())
+ return false;
+ Use *SingleUse = RK.WasOn->getSingleUndroppableUse();
+ if (SingleUse && SingleUse->getUser() == InstBeingModified)
+ return false;
+ }
+ return true;
+ }
+
+ void addKnowledge(RetainedKnowledge RK) {
+ RK = canonicalizedKnowledge(RK, M->getDataLayout());
+
+ if (!isKnowledgeWorthPreserving(RK))
+ return;
+
+ if (tryToPreserveWithoutAddingAssume(RK))
+ return;
+ MapKey Key{RK.WasOn, RK.AttrKind};
+ auto Lookup = AssumedKnowledgeMap.find(Key);
+ if (Lookup == AssumedKnowledgeMap.end()) {
+ AssumedKnowledgeMap[Key] = RK.ArgValue;
+ return;
+ }
+ assert(((Lookup->second == 0 && RK.ArgValue == 0) ||
+ (Lookup->second != 0 && RK.ArgValue != 0)) &&
+ "inconsistent argument value");
+
+ /// This is only desirable because for all attributes taking an argument
+ /// higher is better.
+ Lookup->second = std::max(Lookup->second, RK.ArgValue);
+ }
+
+ void addAttribute(Attribute Attr, Value *WasOn) {
+ if (Attr.isTypeAttribute() || Attr.isStringAttribute() ||
+ (!ShouldPreserveAllAttributes &&
+ !isUsefullToPreserve(Attr.getKindAsEnum())))
+ return;
+ uint64_t AttrArg = 0;
+ if (Attr.isIntAttribute())
+ AttrArg = Attr.getValueAsInt();
+ addKnowledge({Attr.getKindAsEnum(), AttrArg, WasOn});
+ }
+
+ void addCall(const CallBase *Call) {
+ auto addAttrList = [&](AttributeList AttrList, unsigned NumArgs) {
+ for (unsigned Idx = 0; Idx < NumArgs; Idx++)
+ for (Attribute Attr : AttrList.getParamAttrs(Idx)) {
+ bool IsPoisonAttr = Attr.hasAttribute(Attribute::NonNull) ||
+ Attr.hasAttribute(Attribute::Alignment);
+ if (!IsPoisonAttr || Call->isPassingUndefUB(Idx))
+ addAttribute(Attr, Call->getArgOperand(Idx));
+ }
+ for (Attribute Attr : AttrList.getFnAttrs())
+ addAttribute(Attr, nullptr);
+ };
+ addAttrList(Call->getAttributes(), Call->arg_size());
+ if (Function *Fn = Call->getCalledFunction())
+ addAttrList(Fn->getAttributes(), Fn->arg_size());
+ }
+
+ AssumeInst *build() {
+ if (AssumedKnowledgeMap.empty())
+ return nullptr;
+ if (!DebugCounter::shouldExecute(BuildAssumeCounter))
+ return nullptr;
+ Function *FnAssume = Intrinsic::getDeclaration(M, Intrinsic::assume);
+ LLVMContext &C = M->getContext();
+ SmallVector<OperandBundleDef, 8> OpBundle;
+ for (auto &MapElem : AssumedKnowledgeMap) {
+ SmallVector<Value *, 2> Args;
+ if (MapElem.first.first)
+ Args.push_back(MapElem.first.first);
+
+ /// This is only valid because for all attribute that currently exist a
+ /// value of 0 is useless. and should not be preserved.
+ if (MapElem.second)
+ Args.push_back(ConstantInt::get(Type::getInt64Ty(M->getContext()),
+ MapElem.second));
+ OpBundle.push_back(OperandBundleDefT<Value *>(
+ std::string(Attribute::getNameFromAttrKind(MapElem.first.second)),
+ Args));
+ NumBundlesInAssumes++;
+ }
+ NumAssumeBuilt++;
+ return cast<AssumeInst>(CallInst::Create(
+ FnAssume, ArrayRef<Value *>({ConstantInt::getTrue(C)}), OpBundle));
+ }
+
+ void addAccessedPtr(Instruction *MemInst, Value *Pointer, Type *AccType,
+ MaybeAlign MA) {
+ unsigned DerefSize = MemInst->getModule()
+ ->getDataLayout()
+ .getTypeStoreSize(AccType)
+ .getKnownMinValue();
+ if (DerefSize != 0) {
+ addKnowledge({Attribute::Dereferenceable, DerefSize, Pointer});
+ if (!NullPointerIsDefined(MemInst->getFunction(),
+ Pointer->getType()->getPointerAddressSpace()))
+ addKnowledge({Attribute::NonNull, 0u, Pointer});
+ }
+ if (MA.valueOrOne() > 1)
+ addKnowledge({Attribute::Alignment, MA.valueOrOne().value(), Pointer});
+ }
+
+ void addInstruction(Instruction *I) {
+ if (auto *Call = dyn_cast<CallBase>(I))
+ return addCall(Call);
+ if (auto *Load = dyn_cast<LoadInst>(I))
+ return addAccessedPtr(I, Load->getPointerOperand(), Load->getType(),
+ Load->getAlign());
+ if (auto *Store = dyn_cast<StoreInst>(I))
+ return addAccessedPtr(I, Store->getPointerOperand(),
+ Store->getValueOperand()->getType(),
+ Store->getAlign());
+ // TODO: Add support for the other Instructions.
+ // TODO: Maybe we should look around and merge with other llvm.assume.
+ }
+};
+
+} // namespace
+
+AssumeInst *llvm::buildAssumeFromInst(Instruction *I) {
+ if (!EnableKnowledgeRetention)
+ return nullptr;
+ AssumeBuilderState Builder(I->getModule());
+ Builder.addInstruction(I);
+ return Builder.build();
+}
+
+void llvm::salvageKnowledge(Instruction *I, AssumptionCache *AC,
+ DominatorTree *DT) {
+ if (!EnableKnowledgeRetention || I->isTerminator())
+ return;
+ AssumeBuilderState Builder(I->getModule(), I, AC, DT);
+ Builder.addInstruction(I);
+ if (auto *Intr = Builder.build()) {
+ Intr->insertBefore(I);
+ if (AC)
+ AC->registerAssumption(Intr);
+ }
+}
+
+AssumeInst *
+llvm::buildAssumeFromKnowledge(ArrayRef<RetainedKnowledge> Knowledge,
+ Instruction *CtxI, AssumptionCache *AC,
+ DominatorTree *DT) {
+ AssumeBuilderState Builder(CtxI->getModule(), CtxI, AC, DT);
+ for (const RetainedKnowledge &RK : Knowledge)
+ Builder.addKnowledge(RK);
+ return Builder.build();
+}
+
+RetainedKnowledge llvm::simplifyRetainedKnowledge(AssumeInst *Assume,
+ RetainedKnowledge RK,
+ AssumptionCache *AC,
+ DominatorTree *DT) {
+ AssumeBuilderState Builder(Assume->getModule(), Assume, AC, DT);
+ RK = canonicalizedKnowledge(RK, Assume->getModule()->getDataLayout());
+
+ if (!Builder.isKnowledgeWorthPreserving(RK))
+ return RetainedKnowledge::none();
+
+ if (Builder.tryToPreserveWithoutAddingAssume(RK))
+ return RetainedKnowledge::none();
+ return RK;
+}
+
+namespace {
+
+struct AssumeSimplify {
+ Function &F;
+ AssumptionCache &AC;
+ DominatorTree *DT;
+ LLVMContext &C;
+ SmallDenseSet<IntrinsicInst *> CleanupToDo;
+ StringMapEntry<uint32_t> *IgnoreTag;
+ SmallDenseMap<BasicBlock *, SmallVector<IntrinsicInst *, 4>, 8> BBToAssume;
+ bool MadeChange = false;
+
+ AssumeSimplify(Function &F, AssumptionCache &AC, DominatorTree *DT,
+ LLVMContext &C)
+ : F(F), AC(AC), DT(DT), C(C),
+ IgnoreTag(C.getOrInsertBundleTag(IgnoreBundleTag)) {}
+
+ void buildMapping(bool FilterBooleanArgument) {
+ BBToAssume.clear();
+ for (Value *V : AC.assumptions()) {
+ if (!V)
+ continue;
+ IntrinsicInst *Assume = cast<IntrinsicInst>(V);
+ if (FilterBooleanArgument) {
+ auto *Arg = dyn_cast<ConstantInt>(Assume->getOperand(0));
+ if (!Arg || Arg->isZero())
+ continue;
+ }
+ BBToAssume[Assume->getParent()].push_back(Assume);
+ }
+
+ for (auto &Elem : BBToAssume) {
+ llvm::sort(Elem.second,
+ [](const IntrinsicInst *LHS, const IntrinsicInst *RHS) {
+ return LHS->comesBefore(RHS);
+ });
+ }
+ }
+
+ /// Remove all asumes in CleanupToDo if there boolean argument is true and
+ /// ForceCleanup is set or the assume doesn't hold valuable knowledge.
+ void RunCleanup(bool ForceCleanup) {
+ for (IntrinsicInst *Assume : CleanupToDo) {
+ auto *Arg = dyn_cast<ConstantInt>(Assume->getOperand(0));
+ if (!Arg || Arg->isZero() ||
+ (!ForceCleanup &&
+ !isAssumeWithEmptyBundle(cast<AssumeInst>(*Assume))))
+ continue;
+ MadeChange = true;
+ if (ForceCleanup)
+ NumAssumesMerged++;
+ else
+ NumAssumesRemoved++;
+ Assume->eraseFromParent();
+ }
+ CleanupToDo.clear();
+ }
+
+ /// Remove knowledge stored in assume when it is already know by an attribute
+ /// or an other assume. This can when valid update an existing knowledge in an
+ /// attribute or an other assume.
+ void dropRedundantKnowledge() {
+ struct MapValue {
+ IntrinsicInst *Assume;
+ uint64_t ArgValue;
+ CallInst::BundleOpInfo *BOI;
+ };
+ buildMapping(false);
+ SmallDenseMap<std::pair<Value *, Attribute::AttrKind>,
+ SmallVector<MapValue, 2>, 16>
+ Knowledge;
+ for (BasicBlock *BB : depth_first(&F))
+ for (Value *V : BBToAssume[BB]) {
+ if (!V)
+ continue;
+ IntrinsicInst *Assume = cast<IntrinsicInst>(V);
+ for (CallInst::BundleOpInfo &BOI : Assume->bundle_op_infos()) {
+ auto RemoveFromAssume = [&]() {
+ CleanupToDo.insert(Assume);
+ if (BOI.Begin != BOI.End) {
+ Use *U = &Assume->op_begin()[BOI.Begin + ABA_WasOn];
+ U->set(UndefValue::get(U->get()->getType()));
+ }
+ BOI.Tag = IgnoreTag;
+ };
+ if (BOI.Tag == IgnoreTag) {
+ CleanupToDo.insert(Assume);
+ continue;
+ }
+ RetainedKnowledge RK =
+ getKnowledgeFromBundle(cast<AssumeInst>(*Assume), BOI);
+ if (auto *Arg = dyn_cast_or_null<Argument>(RK.WasOn)) {
+ bool HasSameKindAttr = Arg->hasAttribute(RK.AttrKind);
+ if (HasSameKindAttr)
+ if (!Attribute::isIntAttrKind(RK.AttrKind) ||
+ Arg->getAttribute(RK.AttrKind).getValueAsInt() >=
+ RK.ArgValue) {
+ RemoveFromAssume();
+ continue;
+ }
+ if (isValidAssumeForContext(
+ Assume, &*F.getEntryBlock().getFirstInsertionPt()) ||
+ Assume == &*F.getEntryBlock().getFirstInsertionPt()) {
+ if (HasSameKindAttr)
+ Arg->removeAttr(RK.AttrKind);
+ Arg->addAttr(Attribute::get(C, RK.AttrKind, RK.ArgValue));
+ MadeChange = true;
+ RemoveFromAssume();
+ continue;
+ }
+ }
+ auto &Lookup = Knowledge[{RK.WasOn, RK.AttrKind}];
+ for (MapValue &Elem : Lookup) {
+ if (!isValidAssumeForContext(Elem.Assume, Assume, DT))
+ continue;
+ if (Elem.ArgValue >= RK.ArgValue) {
+ RemoveFromAssume();
+ continue;
+ } else if (isValidAssumeForContext(Assume, Elem.Assume, DT)) {
+ Elem.Assume->op_begin()[Elem.BOI->Begin + ABA_Argument].set(
+ ConstantInt::get(Type::getInt64Ty(C), RK.ArgValue));
+ MadeChange = true;
+ RemoveFromAssume();
+ continue;
+ }
+ }
+ Lookup.push_back({Assume, RK.ArgValue, &BOI});
+ }
+ }
+ }
+
+ using MergeIterator = SmallVectorImpl<IntrinsicInst *>::iterator;
+
+ /// Merge all Assumes from Begin to End in and insert the resulting assume as
+ /// high as possible in the basicblock.
+ void mergeRange(BasicBlock *BB, MergeIterator Begin, MergeIterator End) {
+ if (Begin == End || std::next(Begin) == End)
+ return;
+ /// Provide no additional information so that AssumeBuilderState doesn't
+ /// try to do any punning since it already has been done better.
+ AssumeBuilderState Builder(F.getParent());
+
+ /// For now it is initialized to the best value it could have
+ Instruction *InsertPt = BB->getFirstNonPHI();
+ if (isa<LandingPadInst>(InsertPt))
+ InsertPt = InsertPt->getNextNode();
+ for (IntrinsicInst *I : make_range(Begin, End)) {
+ CleanupToDo.insert(I);
+ for (CallInst::BundleOpInfo &BOI : I->bundle_op_infos()) {
+ RetainedKnowledge RK =
+ getKnowledgeFromBundle(cast<AssumeInst>(*I), BOI);
+ if (!RK)
+ continue;
+ Builder.addKnowledge(RK);
+ if (auto *I = dyn_cast_or_null<Instruction>(RK.WasOn))
+ if (I->getParent() == InsertPt->getParent() &&
+ (InsertPt->comesBefore(I) || InsertPt == I))
+ InsertPt = I->getNextNode();
+ }
+ }
+
+ /// Adjust InsertPt if it is before Begin, since mergeAssumes only
+ /// guarantees we can place the resulting assume between Begin and End.
+ if (InsertPt->comesBefore(*Begin))
+ for (auto It = (*Begin)->getIterator(), E = InsertPt->getIterator();
+ It != E; --It)
+ if (!isGuaranteedToTransferExecutionToSuccessor(&*It)) {
+ InsertPt = It->getNextNode();
+ break;
+ }
+ auto *MergedAssume = Builder.build();
+ if (!MergedAssume)
+ return;
+ MadeChange = true;
+ MergedAssume->insertBefore(InsertPt);
+ AC.registerAssumption(MergedAssume);
+ }
+
+ /// Merge assume when they are in the same BasicBlock and for all instruction
+ /// between them isGuaranteedToTransferExecutionToSuccessor returns true.
+ void mergeAssumes() {
+ buildMapping(true);
+
+ SmallVector<MergeIterator, 4> SplitPoints;
+ for (auto &Elem : BBToAssume) {
+ SmallVectorImpl<IntrinsicInst *> &AssumesInBB = Elem.second;
+ if (AssumesInBB.size() < 2)
+ continue;
+ /// AssumesInBB is already sorted by order in the block.
+
+ BasicBlock::iterator It = AssumesInBB.front()->getIterator();
+ BasicBlock::iterator E = AssumesInBB.back()->getIterator();
+ SplitPoints.push_back(AssumesInBB.begin());
+ MergeIterator LastSplit = AssumesInBB.begin();
+ for (; It != E; ++It)
+ if (!isGuaranteedToTransferExecutionToSuccessor(&*It)) {
+ for (; (*LastSplit)->comesBefore(&*It); ++LastSplit)
+ ;
+ if (SplitPoints.back() != LastSplit)
+ SplitPoints.push_back(LastSplit);
+ }
+ SplitPoints.push_back(AssumesInBB.end());
+ for (auto SplitIt = SplitPoints.begin();
+ SplitIt != std::prev(SplitPoints.end()); SplitIt++) {
+ mergeRange(Elem.first, *SplitIt, *(SplitIt + 1));
+ }
+ SplitPoints.clear();
+ }
+ }
+};
+
+bool simplifyAssumes(Function &F, AssumptionCache *AC, DominatorTree *DT) {
+ AssumeSimplify AS(F, *AC, DT, F.getContext());
+
+ /// Remove knowledge that is already known by a dominating other assume or an
+ /// attribute.
+ AS.dropRedundantKnowledge();
+
+ /// Remove assume that are empty.
+ AS.RunCleanup(false);
+
+ /// Merge assume in the same basicblock when possible.
+ AS.mergeAssumes();
+
+ /// Remove assume that were merged.
+ AS.RunCleanup(true);
+ return AS.MadeChange;
+}
+
+} // namespace
+
+PreservedAnalyses AssumeSimplifyPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ if (!EnableKnowledgeRetention)
+ return PreservedAnalyses::all();
+ simplifyAssumes(F, &AM.getResult<AssumptionAnalysis>(F),
+ AM.getCachedResult<DominatorTreeAnalysis>(F));
+ return PreservedAnalyses::all();
+}
+
+namespace {
+class AssumeSimplifyPassLegacyPass : public FunctionPass {
+public:
+ static char ID;
+
+ AssumeSimplifyPassLegacyPass() : FunctionPass(ID) {
+ initializeAssumeSimplifyPassLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+ bool runOnFunction(Function &F) override {
+ if (skipFunction(F) || !EnableKnowledgeRetention)
+ return false;
+ AssumptionCache &AC =
+ getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ DominatorTreeWrapperPass *DTWP =
+ getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ return simplifyAssumes(F, &AC, DTWP ? &DTWP->getDomTree() : nullptr);
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AssumptionCacheTracker>();
+
+ AU.setPreservesAll();
+ }
+};
+} // namespace
+
+char AssumeSimplifyPassLegacyPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(AssumeSimplifyPassLegacyPass, "assume-simplify",
+ "Assume Simplify", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_END(AssumeSimplifyPassLegacyPass, "assume-simplify",
+ "Assume Simplify", false, false)
+
+FunctionPass *llvm::createAssumeSimplifyPass() {
+ return new AssumeSimplifyPassLegacyPass();
+}
+
+PreservedAnalyses AssumeBuilderPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ AssumptionCache *AC = &AM.getResult<AssumptionAnalysis>(F);
+ DominatorTree* DT = AM.getCachedResult<DominatorTreeAnalysis>(F);
+ for (Instruction &I : instructions(F))
+ salvageKnowledge(&I, AC, DT);
+ return PreservedAnalyses::all();
+}
+
+namespace {
+class AssumeBuilderPassLegacyPass : public FunctionPass {
+public:
+ static char ID;
+
+ AssumeBuilderPassLegacyPass() : FunctionPass(ID) {
+ initializeAssumeBuilderPassLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+ bool runOnFunction(Function &F) override {
+ AssumptionCache &AC =
+ getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ DominatorTreeWrapperPass *DTWP =
+ getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ for (Instruction &I : instructions(F))
+ salvageKnowledge(&I, &AC, DTWP ? &DTWP->getDomTree() : nullptr);
+ return true;
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AssumptionCacheTracker>();
+
+ AU.setPreservesAll();
+ }
+};
+} // namespace
+
+char AssumeBuilderPassLegacyPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(AssumeBuilderPassLegacyPass, "assume-builder",
+ "Assume Builder", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_END(AssumeBuilderPassLegacyPass, "assume-builder",
+ "Assume Builder", false, false)
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/BasicBlockUtils.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/BasicBlockUtils.cpp
new file mode 100644
index 0000000000..58a226fc60
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -0,0 +1,1999 @@
+//===- BasicBlockUtils.cpp - BasicBlock Utilities --------------------------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This family of functions perform manipulations on basic blocks, and
+// instructions contained within basic blocks.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/IR/ValueHandle.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <cassert>
+#include <cstdint>
+#include <string>
+#include <utility>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "basicblock-utils"
+
+static cl::opt<unsigned> MaxDeoptOrUnreachableSuccessorCheckDepth(
+ "max-deopt-or-unreachable-succ-check-depth", cl::init(8), cl::Hidden,
+ cl::desc("Set the maximum path length when checking whether a basic block "
+ "is followed by a block that either has a terminating "
+ "deoptimizing call or is terminated with an unreachable"));
+
+void llvm::detachDeadBlocks(
+ ArrayRef<BasicBlock *> BBs,
+ SmallVectorImpl<DominatorTree::UpdateType> *Updates,
+ bool KeepOneInputPHIs) {
+ for (auto *BB : BBs) {
+ // Loop through all of our successors and make sure they know that one
+ // of their predecessors is going away.
+ SmallPtrSet<BasicBlock *, 4> UniqueSuccessors;
+ for (BasicBlock *Succ : successors(BB)) {
+ Succ->removePredecessor(BB, KeepOneInputPHIs);
+ if (Updates && UniqueSuccessors.insert(Succ).second)
+ Updates->push_back({DominatorTree::Delete, BB, Succ});
+ }
+
+ // Zap all the instructions in the block.
+ while (!BB->empty()) {
+ Instruction &I = BB->back();
+ // If this instruction is used, replace uses with an arbitrary value.
+ // Because control flow can't get here, we don't care what we replace the
+ // value with. Note that since this block is unreachable, and all values
+ // contained within it must dominate their uses, that all uses will
+ // eventually be removed (they are themselves dead).
+ if (!I.use_empty())
+ I.replaceAllUsesWith(PoisonValue::get(I.getType()));
+ BB->back().eraseFromParent();
+ }
+ new UnreachableInst(BB->getContext(), BB);
+ assert(BB->size() == 1 &&
+ isa<UnreachableInst>(BB->getTerminator()) &&
+ "The successor list of BB isn't empty before "
+ "applying corresponding DTU updates.");
+ }
+}
+
+void llvm::DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU,
+ bool KeepOneInputPHIs) {
+ DeleteDeadBlocks({BB}, DTU, KeepOneInputPHIs);
+}
+
+void llvm::DeleteDeadBlocks(ArrayRef <BasicBlock *> BBs, DomTreeUpdater *DTU,
+ bool KeepOneInputPHIs) {
+#ifndef NDEBUG
+ // Make sure that all predecessors of each dead block is also dead.
+ SmallPtrSet<BasicBlock *, 4> Dead(BBs.begin(), BBs.end());
+ assert(Dead.size() == BBs.size() && "Duplicating blocks?");
+ for (auto *BB : Dead)
+ for (BasicBlock *Pred : predecessors(BB))
+ assert(Dead.count(Pred) && "All predecessors must be dead!");
+#endif
+
+ SmallVector<DominatorTree::UpdateType, 4> Updates;
+ detachDeadBlocks(BBs, DTU ? &Updates : nullptr, KeepOneInputPHIs);
+
+ if (DTU)
+ DTU->applyUpdates(Updates);
+
+ for (BasicBlock *BB : BBs)
+ if (DTU)
+ DTU->deleteBB(BB);
+ else
+ BB->eraseFromParent();
+}
+
+bool llvm::EliminateUnreachableBlocks(Function &F, DomTreeUpdater *DTU,
+ bool KeepOneInputPHIs) {
+ df_iterator_default_set<BasicBlock*> Reachable;
+
+ // Mark all reachable blocks.
+ for (BasicBlock *BB : depth_first_ext(&F, Reachable))
+ (void)BB/* Mark all reachable blocks */;
+
+ // Collect all dead blocks.
+ std::vector<BasicBlock*> DeadBlocks;
+ for (BasicBlock &BB : F)
+ if (!Reachable.count(&BB))
+ DeadBlocks.push_back(&BB);
+
+ // Delete the dead blocks.
+ DeleteDeadBlocks(DeadBlocks, DTU, KeepOneInputPHIs);
+
+ return !DeadBlocks.empty();
+}
+
+bool llvm::FoldSingleEntryPHINodes(BasicBlock *BB,
+ MemoryDependenceResults *MemDep) {
+ if (!isa<PHINode>(BB->begin()))
+ return false;
+
+ while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
+ if (PN->getIncomingValue(0) != PN)
+ PN->replaceAllUsesWith(PN->getIncomingValue(0));
+ else
+ PN->replaceAllUsesWith(PoisonValue::get(PN->getType()));
+
+ if (MemDep)
+ MemDep->removeInstruction(PN); // Memdep updates AA itself.
+
+ PN->eraseFromParent();
+ }
+ return true;
+}
+
+bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI,
+ MemorySSAUpdater *MSSAU) {
+ // Recursively deleting a PHI may cause multiple PHIs to be deleted
+ // or RAUW'd undef, so use an array of WeakTrackingVH for the PHIs to delete.
+ SmallVector<WeakTrackingVH, 8> PHIs;
+ for (PHINode &PN : BB->phis())
+ PHIs.push_back(&PN);
+
+ bool Changed = false;
+ for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
+ if (PHINode *PN = dyn_cast_or_null<PHINode>(PHIs[i].operator Value*()))
+ Changed |= RecursivelyDeleteDeadPHINode(PN, TLI, MSSAU);
+
+ return Changed;
+}
+
+bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU,
+ LoopInfo *LI, MemorySSAUpdater *MSSAU,
+ MemoryDependenceResults *MemDep,
+ bool PredecessorWithTwoSuccessors,
+ DominatorTree *DT) {
+ if (BB->hasAddressTaken())
+ return false;
+
+ // Can't merge if there are multiple predecessors, or no predecessors.
+ BasicBlock *PredBB = BB->getUniquePredecessor();
+ if (!PredBB) return false;
+
+ // Don't break self-loops.
+ if (PredBB == BB) return false;
+
+ // Don't break unwinding instructions or terminators with other side-effects.
+ Instruction *PTI = PredBB->getTerminator();
+ if (PTI->isExceptionalTerminator() || PTI->mayHaveSideEffects())
+ return false;
+
+ // Can't merge if there are multiple distinct successors.
+ if (!PredecessorWithTwoSuccessors && PredBB->getUniqueSuccessor() != BB)
+ return false;
+
+ // Currently only allow PredBB to have two predecessors, one being BB.
+ // Update BI to branch to BB's only successor instead of BB.
+ BranchInst *PredBB_BI;
+ BasicBlock *NewSucc = nullptr;
+ unsigned FallThruPath;
+ if (PredecessorWithTwoSuccessors) {
+ if (!(PredBB_BI = dyn_cast<BranchInst>(PTI)))
+ return false;
+ BranchInst *BB_JmpI = dyn_cast<BranchInst>(BB->getTerminator());
+ if (!BB_JmpI || !BB_JmpI->isUnconditional())
+ return false;
+ NewSucc = BB_JmpI->getSuccessor(0);
+ FallThruPath = PredBB_BI->getSuccessor(0) == BB ? 0 : 1;
+ }
+
+ // Can't merge if there is PHI loop.
+ for (PHINode &PN : BB->phis())
+ if (llvm::is_contained(PN.incoming_values(), &PN))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "Merging: " << BB->getName() << " into "
+ << PredBB->getName() << "\n");
+
+ // Begin by getting rid of unneeded PHIs.
+ SmallVector<AssertingVH<Value>, 4> IncomingValues;
+ if (isa<PHINode>(BB->front())) {
+ for (PHINode &PN : BB->phis())
+ if (!isa<PHINode>(PN.getIncomingValue(0)) ||
+ cast<PHINode>(PN.getIncomingValue(0))->getParent() != BB)
+ IncomingValues.push_back(PN.getIncomingValue(0));
+ FoldSingleEntryPHINodes(BB, MemDep);
+ }
+
+ if (DT) {
+ assert(!DTU && "cannot use both DT and DTU for updates");
+ DomTreeNode *PredNode = DT->getNode(PredBB);
+ DomTreeNode *BBNode = DT->getNode(BB);
+ if (PredNode) {
+ assert(BBNode && "PredNode unreachable but BBNode reachable?");
+ for (DomTreeNode *C : to_vector(BBNode->children()))
+ C->setIDom(PredNode);
+ }
+ }
+ // DTU update: Collect all the edges that exit BB.
+ // These dominator edges will be redirected from Pred.
+ std::vector<DominatorTree::UpdateType> Updates;
+ if (DTU) {
+ assert(!DT && "cannot use both DT and DTU for updates");
+ // To avoid processing the same predecessor more than once.
+ SmallPtrSet<BasicBlock *, 8> SeenSuccs;
+ SmallPtrSet<BasicBlock *, 2> SuccsOfPredBB(succ_begin(PredBB),
+ succ_end(PredBB));
+ Updates.reserve(Updates.size() + 2 * succ_size(BB) + 1);
+ // Add insert edges first. Experimentally, for the particular case of two
+ // blocks that can be merged, with a single successor and single predecessor
+ // respectively, it is beneficial to have all insert updates first. Deleting
+ // edges first may lead to unreachable blocks, followed by inserting edges
+ // making the blocks reachable again. Such DT updates lead to high compile
+ // times. We add inserts before deletes here to reduce compile time.
+ for (BasicBlock *SuccOfBB : successors(BB))
+ // This successor of BB may already be a PredBB's successor.
+ if (!SuccsOfPredBB.contains(SuccOfBB))
+ if (SeenSuccs.insert(SuccOfBB).second)
+ Updates.push_back({DominatorTree::Insert, PredBB, SuccOfBB});
+ SeenSuccs.clear();
+ for (BasicBlock *SuccOfBB : successors(BB))
+ if (SeenSuccs.insert(SuccOfBB).second)
+ Updates.push_back({DominatorTree::Delete, BB, SuccOfBB});
+ Updates.push_back({DominatorTree::Delete, PredBB, BB});
+ }
+
+ Instruction *STI = BB->getTerminator();
+ Instruction *Start = &*BB->begin();
+ // If there's nothing to move, mark the starting instruction as the last
+ // instruction in the block. Terminator instruction is handled separately.
+ if (Start == STI)
+ Start = PTI;
+
+ // Move all definitions in the successor to the predecessor...
+ PredBB->splice(PTI->getIterator(), BB, BB->begin(), STI->getIterator());
+
+ if (MSSAU)
+ MSSAU->moveAllAfterMergeBlocks(BB, PredBB, Start);
+
+ // Make all PHI nodes that referred to BB now refer to Pred as their
+ // source...
+ BB->replaceAllUsesWith(PredBB);
+
+ if (PredecessorWithTwoSuccessors) {
+ // Delete the unconditional branch from BB.
+ BB->back().eraseFromParent();
+
+ // Update branch in the predecessor.
+ PredBB_BI->setSuccessor(FallThruPath, NewSucc);
+ } else {
+ // Delete the unconditional branch from the predecessor.
+ PredBB->back().eraseFromParent();
+
+ // Move terminator instruction.
+ PredBB->splice(PredBB->end(), BB);
+
+ // Terminator may be a memory accessing instruction too.
+ if (MSSAU)
+ if (MemoryUseOrDef *MUD = cast_or_null<MemoryUseOrDef>(
+ MSSAU->getMemorySSA()->getMemoryAccess(PredBB->getTerminator())))
+ MSSAU->moveToPlace(MUD, PredBB, MemorySSA::End);
+ }
+ // Add unreachable to now empty BB.
+ new UnreachableInst(BB->getContext(), BB);
+
+ // Inherit predecessors name if it exists.
+ if (!PredBB->hasName())
+ PredBB->takeName(BB);
+
+ if (LI)
+ LI->removeBlock(BB);
+
+ if (MemDep)
+ MemDep->invalidateCachedPredecessors();
+
+ if (DTU)
+ DTU->applyUpdates(Updates);
+
+ if (DT) {
+ assert(succ_empty(BB) &&
+ "successors should have been transferred to PredBB");
+ DT->eraseNode(BB);
+ }
+
+ // Finally, erase the old block and update dominator info.
+ DeleteDeadBlock(BB, DTU);
+
+ return true;
+}
+
+bool llvm::MergeBlockSuccessorsIntoGivenBlocks(
+ SmallPtrSetImpl<BasicBlock *> &MergeBlocks, Loop *L, DomTreeUpdater *DTU,
+ LoopInfo *LI) {
+ assert(!MergeBlocks.empty() && "MergeBlocks should not be empty");
+
+ bool BlocksHaveBeenMerged = false;
+ while (!MergeBlocks.empty()) {
+ BasicBlock *BB = *MergeBlocks.begin();
+ BasicBlock *Dest = BB->getSingleSuccessor();
+ if (Dest && (!L || L->contains(Dest))) {
+ BasicBlock *Fold = Dest->getUniquePredecessor();
+ (void)Fold;
+ if (MergeBlockIntoPredecessor(Dest, DTU, LI)) {
+ assert(Fold == BB &&
+ "Expecting BB to be unique predecessor of the Dest block");
+ MergeBlocks.erase(Dest);
+ BlocksHaveBeenMerged = true;
+ } else
+ MergeBlocks.erase(BB);
+ } else
+ MergeBlocks.erase(BB);
+ }
+ return BlocksHaveBeenMerged;
+}
+
+/// Remove redundant instructions within sequences of consecutive dbg.value
+/// instructions. This is done using a backward scan to keep the last dbg.value
+/// describing a specific variable/fragment.
+///
+/// BackwardScan strategy:
+/// ----------------------
+/// Given a sequence of consecutive DbgValueInst like this
+///
+/// dbg.value ..., "x", FragmentX1 (*)
+/// dbg.value ..., "y", FragmentY1
+/// dbg.value ..., "x", FragmentX2
+/// dbg.value ..., "x", FragmentX1 (**)
+///
+/// then the instruction marked with (*) can be removed (it is guaranteed to be
+/// obsoleted by the instruction marked with (**) as the latter instruction is
+/// describing the same variable using the same fragment info).
+///
+/// Possible improvements:
+/// - Check fully overlapping fragments and not only identical fragments.
+/// - Support dbg.addr, dbg.declare. dbg.label, and possibly other meta
+/// instructions being part of the sequence of consecutive instructions.
+static bool removeRedundantDbgInstrsUsingBackwardScan(BasicBlock *BB) {
+ SmallVector<DbgValueInst *, 8> ToBeRemoved;
+ SmallDenseSet<DebugVariable> VariableSet;
+ for (auto &I : reverse(*BB)) {
+ if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(&I)) {
+ DebugVariable Key(DVI->getVariable(),
+ DVI->getExpression(),
+ DVI->getDebugLoc()->getInlinedAt());
+ auto R = VariableSet.insert(Key);
+ // If the variable fragment hasn't been seen before then we don't want
+ // to remove this dbg intrinsic.
+ if (R.second)
+ continue;
+
+ if (auto *DAI = dyn_cast<DbgAssignIntrinsic>(DVI)) {
+ // Don't delete dbg.assign intrinsics that are linked to instructions.
+ if (!at::getAssignmentInsts(DAI).empty())
+ continue;
+ // Unlinked dbg.assign intrinsics can be treated like dbg.values.
+ }
+
+ // If the same variable fragment is described more than once it is enough
+ // to keep the last one (i.e. the first found since we for reverse
+ // iteration).
+ ToBeRemoved.push_back(DVI);
+ continue;
+ }
+ // Sequence with consecutive dbg.value instrs ended. Clear the map to
+ // restart identifying redundant instructions if case we find another
+ // dbg.value sequence.
+ VariableSet.clear();
+ }
+
+ for (auto &Instr : ToBeRemoved)
+ Instr->eraseFromParent();
+
+ return !ToBeRemoved.empty();
+}
+
+/// Remove redundant dbg.value instructions using a forward scan. This can
+/// remove a dbg.value instruction that is redundant due to indicating that a
+/// variable has the same value as already being indicated by an earlier
+/// dbg.value.
+///
+/// ForwardScan strategy:
+/// ---------------------
+/// Given two identical dbg.value instructions, separated by a block of
+/// instructions that isn't describing the same variable, like this
+///
+/// dbg.value X1, "x", FragmentX1 (**)
+/// <block of instructions, none being "dbg.value ..., "x", ...">
+/// dbg.value X1, "x", FragmentX1 (*)
+///
+/// then the instruction marked with (*) can be removed. Variable "x" is already
+/// described as being mapped to the SSA value X1.
+///
+/// Possible improvements:
+/// - Keep track of non-overlapping fragments.
+static bool removeRedundantDbgInstrsUsingForwardScan(BasicBlock *BB) {
+ SmallVector<DbgValueInst *, 8> ToBeRemoved;
+ DenseMap<DebugVariable, std::pair<SmallVector<Value *, 4>, DIExpression *>>
+ VariableMap;
+ for (auto &I : *BB) {
+ if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(&I)) {
+ DebugVariable Key(DVI->getVariable(), std::nullopt,
+ DVI->getDebugLoc()->getInlinedAt());
+ auto VMI = VariableMap.find(Key);
+ auto *DAI = dyn_cast<DbgAssignIntrinsic>(DVI);
+ // A dbg.assign with no linked instructions can be treated like a
+ // dbg.value (i.e. can be deleted).
+ bool IsDbgValueKind = (!DAI || at::getAssignmentInsts(DAI).empty());
+
+ // Update the map if we found a new value/expression describing the
+ // variable, or if the variable wasn't mapped already.
+ SmallVector<Value *, 4> Values(DVI->getValues());
+ if (VMI == VariableMap.end() || VMI->second.first != Values ||
+ VMI->second.second != DVI->getExpression()) {
+ // Use a sentinal value (nullptr) for the DIExpression when we see a
+ // linked dbg.assign so that the next debug intrinsic will never match
+ // it (i.e. always treat linked dbg.assigns as if they're unique).
+ if (IsDbgValueKind)
+ VariableMap[Key] = {Values, DVI->getExpression()};
+ else
+ VariableMap[Key] = {Values, nullptr};
+ continue;
+ }
+
+ // Don't delete dbg.assign intrinsics that are linked to instructions.
+ if (!IsDbgValueKind)
+ continue;
+ ToBeRemoved.push_back(DVI);
+ }
+ }
+
+ for (auto &Instr : ToBeRemoved)
+ Instr->eraseFromParent();
+
+ return !ToBeRemoved.empty();
+}
+
+/// Remove redundant undef dbg.assign intrinsic from an entry block using a
+/// forward scan.
+/// Strategy:
+/// ---------------------
+/// Scanning forward, delete dbg.assign intrinsics iff they are undef, not
+/// linked to an intrinsic, and don't share an aggregate variable with a debug
+/// intrinsic that didn't meet the criteria. In other words, undef dbg.assigns
+/// that come before non-undef debug intrinsics for the variable are
+/// deleted. Given:
+///
+/// dbg.assign undef, "x", FragmentX1 (*)
+/// <block of instructions, none being "dbg.value ..., "x", ...">
+/// dbg.value %V, "x", FragmentX2
+/// <block of instructions, none being "dbg.value ..., "x", ...">
+/// dbg.assign undef, "x", FragmentX1
+///
+/// then (only) the instruction marked with (*) can be removed.
+/// Possible improvements:
+/// - Keep track of non-overlapping fragments.
+static bool remomveUndefDbgAssignsFromEntryBlock(BasicBlock *BB) {
+ assert(BB->isEntryBlock() && "expected entry block");
+ SmallVector<DbgAssignIntrinsic *, 8> ToBeRemoved;
+ DenseSet<DebugVariable> SeenDefForAggregate;
+ // Returns the DebugVariable for DVI with no fragment info.
+ auto GetAggregateVariable = [](DbgValueInst *DVI) {
+ return DebugVariable(DVI->getVariable(), std::nullopt,
+ DVI->getDebugLoc()->getInlinedAt());
+ };
+
+ // Remove undef dbg.assign intrinsics that are encountered before
+ // any non-undef intrinsics from the entry block.
+ for (auto &I : *BB) {
+ DbgValueInst *DVI = dyn_cast<DbgValueInst>(&I);
+ if (!DVI)
+ continue;
+ auto *DAI = dyn_cast<DbgAssignIntrinsic>(DVI);
+ bool IsDbgValueKind = (!DAI || at::getAssignmentInsts(DAI).empty());
+ DebugVariable Aggregate = GetAggregateVariable(DVI);
+ if (!SeenDefForAggregate.contains(Aggregate)) {
+ bool IsKill = DVI->isKillLocation() && IsDbgValueKind;
+ if (!IsKill) {
+ SeenDefForAggregate.insert(Aggregate);
+ } else if (DAI) {
+ ToBeRemoved.push_back(DAI);
+ }
+ }
+ }
+
+ for (DbgAssignIntrinsic *DAI : ToBeRemoved)
+ DAI->eraseFromParent();
+
+ return !ToBeRemoved.empty();
+}
+
+bool llvm::RemoveRedundantDbgInstrs(BasicBlock *BB) {
+ bool MadeChanges = false;
+ // By using the "backward scan" strategy before the "forward scan" strategy we
+ // can remove both dbg.value (2) and (3) in a situation like this:
+ //
+ // (1) dbg.value V1, "x", DIExpression()
+ // ...
+ // (2) dbg.value V2, "x", DIExpression()
+ // (3) dbg.value V1, "x", DIExpression()
+ //
+ // The backward scan will remove (2), it is made obsolete by (3). After
+ // getting (2) out of the way, the foward scan will remove (3) since "x"
+ // already is described as having the value V1 at (1).
+ MadeChanges |= removeRedundantDbgInstrsUsingBackwardScan(BB);
+ if (BB->isEntryBlock() &&
+ isAssignmentTrackingEnabled(*BB->getParent()->getParent()))
+ MadeChanges |= remomveUndefDbgAssignsFromEntryBlock(BB);
+ MadeChanges |= removeRedundantDbgInstrsUsingForwardScan(BB);
+
+ if (MadeChanges)
+ LLVM_DEBUG(dbgs() << "Removed redundant dbg instrs from: "
+ << BB->getName() << "\n");
+ return MadeChanges;
+}
+
+void llvm::ReplaceInstWithValue(BasicBlock::iterator &BI, Value *V) {
+ Instruction &I = *BI;
+ // Replaces all of the uses of the instruction with uses of the value
+ I.replaceAllUsesWith(V);
+
+ // Make sure to propagate a name if there is one already.
+ if (I.hasName() && !V->hasName())
+ V->takeName(&I);
+
+ // Delete the unnecessary instruction now...
+ BI = BI->eraseFromParent();
+}
+
+void llvm::ReplaceInstWithInst(BasicBlock *BB, BasicBlock::iterator &BI,
+ Instruction *I) {
+ assert(I->getParent() == nullptr &&
+ "ReplaceInstWithInst: Instruction already inserted into basic block!");
+
+ // Copy debug location to newly added instruction, if it wasn't already set
+ // by the caller.
+ if (!I->getDebugLoc())
+ I->setDebugLoc(BI->getDebugLoc());
+
+ // Insert the new instruction into the basic block...
+ BasicBlock::iterator New = I->insertInto(BB, BI);
+
+ // Replace all uses of the old instruction, and delete it.
+ ReplaceInstWithValue(BI, I);
+
+ // Move BI back to point to the newly inserted instruction
+ BI = New;
+}
+
+bool llvm::IsBlockFollowedByDeoptOrUnreachable(const BasicBlock *BB) {
+ // Remember visited blocks to avoid infinite loop
+ SmallPtrSet<const BasicBlock *, 8> VisitedBlocks;
+ unsigned Depth = 0;
+ while (BB && Depth++ < MaxDeoptOrUnreachableSuccessorCheckDepth &&
+ VisitedBlocks.insert(BB).second) {
+ if (BB->getTerminatingDeoptimizeCall() ||
+ isa<UnreachableInst>(BB->getTerminator()))
+ return true;
+ BB = BB->getUniqueSuccessor();
+ }
+ return false;
+}
+
+void llvm::ReplaceInstWithInst(Instruction *From, Instruction *To) {
+ BasicBlock::iterator BI(From);
+ ReplaceInstWithInst(From->getParent(), BI, To);
+}
+
+BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, DominatorTree *DT,
+ LoopInfo *LI, MemorySSAUpdater *MSSAU,
+ const Twine &BBName) {
+ unsigned SuccNum = GetSuccessorNumber(BB, Succ);
+
+ Instruction *LatchTerm = BB->getTerminator();
+
+ CriticalEdgeSplittingOptions Options =
+ CriticalEdgeSplittingOptions(DT, LI, MSSAU).setPreserveLCSSA();
+
+ if ((isCriticalEdge(LatchTerm, SuccNum, Options.MergeIdenticalEdges))) {
+ // If it is a critical edge, and the succesor is an exception block, handle
+ // the split edge logic in this specific function
+ if (Succ->isEHPad())
+ return ehAwareSplitEdge(BB, Succ, nullptr, nullptr, Options, BBName);
+
+ // If this is a critical edge, let SplitKnownCriticalEdge do it.
+ return SplitKnownCriticalEdge(LatchTerm, SuccNum, Options, BBName);
+ }
+
+ // If the edge isn't critical, then BB has a single successor or Succ has a
+ // single pred. Split the block.
+ if (BasicBlock *SP = Succ->getSinglePredecessor()) {
+ // If the successor only has a single pred, split the top of the successor
+ // block.
+ assert(SP == BB && "CFG broken");
+ SP = nullptr;
+ return SplitBlock(Succ, &Succ->front(), DT, LI, MSSAU, BBName,
+ /*Before=*/true);
+ }
+
+ // Otherwise, if BB has a single successor, split it at the bottom of the
+ // block.
+ assert(BB->getTerminator()->getNumSuccessors() == 1 &&
+ "Should have a single succ!");
+ return SplitBlock(BB, BB->getTerminator(), DT, LI, MSSAU, BBName);
+}
+
+void llvm::setUnwindEdgeTo(Instruction *TI, BasicBlock *Succ) {
+ if (auto *II = dyn_cast<InvokeInst>(TI))
+ II->setUnwindDest(Succ);
+ else if (auto *CS = dyn_cast<CatchSwitchInst>(TI))
+ CS->setUnwindDest(Succ);
+ else if (auto *CR = dyn_cast<CleanupReturnInst>(TI))
+ CR->setUnwindDest(Succ);
+ else
+ llvm_unreachable("unexpected terminator instruction");
+}
+
+void llvm::updatePhiNodes(BasicBlock *DestBB, BasicBlock *OldPred,
+ BasicBlock *NewPred, PHINode *Until) {
+ int BBIdx = 0;
+ for (PHINode &PN : DestBB->phis()) {
+ // We manually update the LandingPadReplacement PHINode and it is the last
+ // PHI Node. So, if we find it, we are done.
+ if (Until == &PN)
+ break;
+
+ // Reuse the previous value of BBIdx if it lines up. In cases where we
+ // have multiple phi nodes with *lots* of predecessors, this is a speed
+ // win because we don't have to scan the PHI looking for TIBB. This
+ // happens because the BB list of PHI nodes are usually in the same
+ // order.
+ if (PN.getIncomingBlock(BBIdx) != OldPred)
+ BBIdx = PN.getBasicBlockIndex(OldPred);
+
+ assert(BBIdx != -1 && "Invalid PHI Index!");
+ PN.setIncomingBlock(BBIdx, NewPred);
+ }
+}
+
+BasicBlock *llvm::ehAwareSplitEdge(BasicBlock *BB, BasicBlock *Succ,
+ LandingPadInst *OriginalPad,
+ PHINode *LandingPadReplacement,
+ const CriticalEdgeSplittingOptions &Options,
+ const Twine &BBName) {
+
+ auto *PadInst = Succ->getFirstNonPHI();
+ if (!LandingPadReplacement && !PadInst->isEHPad())
+ return SplitEdge(BB, Succ, Options.DT, Options.LI, Options.MSSAU, BBName);
+
+ auto *LI = Options.LI;
+ SmallVector<BasicBlock *, 4> LoopPreds;
+ // Check if extra modifications will be required to preserve loop-simplify
+ // form after splitting. If it would require splitting blocks with IndirectBr
+ // terminators, bail out if preserving loop-simplify form is requested.
+ if (Options.PreserveLoopSimplify && LI) {
+ if (Loop *BBLoop = LI->getLoopFor(BB)) {
+
+ // The only way that we can break LoopSimplify form by splitting a
+ // critical edge is when there exists some edge from BBLoop to Succ *and*
+ // the only edge into Succ from outside of BBLoop is that of NewBB after
+ // the split. If the first isn't true, then LoopSimplify still holds,
+ // NewBB is the new exit block and it has no non-loop predecessors. If the
+ // second isn't true, then Succ was not in LoopSimplify form prior to
+ // the split as it had a non-loop predecessor. In both of these cases,
+ // the predecessor must be directly in BBLoop, not in a subloop, or again
+ // LoopSimplify doesn't hold.
+ for (BasicBlock *P : predecessors(Succ)) {
+ if (P == BB)
+ continue; // The new block is known.
+ if (LI->getLoopFor(P) != BBLoop) {
+ // Loop is not in LoopSimplify form, no need to re simplify after
+ // splitting edge.
+ LoopPreds.clear();
+ break;
+ }
+ LoopPreds.push_back(P);
+ }
+ // Loop-simplify form can be preserved, if we can split all in-loop
+ // predecessors.
+ if (any_of(LoopPreds, [](BasicBlock *Pred) {
+ return isa<IndirectBrInst>(Pred->getTerminator());
+ })) {
+ return nullptr;
+ }
+ }
+ }
+
+ auto *NewBB =
+ BasicBlock::Create(BB->getContext(), BBName, BB->getParent(), Succ);
+ setUnwindEdgeTo(BB->getTerminator(), NewBB);
+ updatePhiNodes(Succ, BB, NewBB, LandingPadReplacement);
+
+ if (LandingPadReplacement) {
+ auto *NewLP = OriginalPad->clone();
+ auto *Terminator = BranchInst::Create(Succ, NewBB);
+ NewLP->insertBefore(Terminator);
+ LandingPadReplacement->addIncoming(NewLP, NewBB);
+ } else {
+ Value *ParentPad = nullptr;
+ if (auto *FuncletPad = dyn_cast<FuncletPadInst>(PadInst))
+ ParentPad = FuncletPad->getParentPad();
+ else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(PadInst))
+ ParentPad = CatchSwitch->getParentPad();
+ else if (auto *CleanupPad = dyn_cast<CleanupPadInst>(PadInst))
+ ParentPad = CleanupPad->getParentPad();
+ else if (auto *LandingPad = dyn_cast<LandingPadInst>(PadInst))
+ ParentPad = LandingPad->getParent();
+ else
+ llvm_unreachable("handling for other EHPads not implemented yet");
+
+ auto *NewCleanupPad = CleanupPadInst::Create(ParentPad, {}, BBName, NewBB);
+ CleanupReturnInst::Create(NewCleanupPad, Succ, NewBB);
+ }
+
+ auto *DT = Options.DT;
+ auto *MSSAU = Options.MSSAU;
+ if (!DT && !LI)
+ return NewBB;
+
+ if (DT) {
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
+ SmallVector<DominatorTree::UpdateType, 3> Updates;
+
+ Updates.push_back({DominatorTree::Insert, BB, NewBB});
+ Updates.push_back({DominatorTree::Insert, NewBB, Succ});
+ Updates.push_back({DominatorTree::Delete, BB, Succ});
+
+ DTU.applyUpdates(Updates);
+ DTU.flush();
+
+ if (MSSAU) {
+ MSSAU->applyUpdates(Updates, *DT);
+ if (VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
+ }
+ }
+
+ if (LI) {
+ if (Loop *BBLoop = LI->getLoopFor(BB)) {
+ // If one or the other blocks were not in a loop, the new block is not
+ // either, and thus LI doesn't need to be updated.
+ if (Loop *SuccLoop = LI->getLoopFor(Succ)) {
+ if (BBLoop == SuccLoop) {
+ // Both in the same loop, the NewBB joins loop.
+ SuccLoop->addBasicBlockToLoop(NewBB, *LI);
+ } else if (BBLoop->contains(SuccLoop)) {
+ // Edge from an outer loop to an inner loop. Add to the outer loop.
+ BBLoop->addBasicBlockToLoop(NewBB, *LI);
+ } else if (SuccLoop->contains(BBLoop)) {
+ // Edge from an inner loop to an outer loop. Add to the outer loop.
+ SuccLoop->addBasicBlockToLoop(NewBB, *LI);
+ } else {
+ // Edge from two loops with no containment relation. Because these
+ // are natural loops, we know that the destination block must be the
+ // header of its loop (adding a branch into a loop elsewhere would
+ // create an irreducible loop).
+ assert(SuccLoop->getHeader() == Succ &&
+ "Should not create irreducible loops!");
+ if (Loop *P = SuccLoop->getParentLoop())
+ P->addBasicBlockToLoop(NewBB, *LI);
+ }
+ }
+
+ // If BB is in a loop and Succ is outside of that loop, we may need to
+ // update LoopSimplify form and LCSSA form.
+ if (!BBLoop->contains(Succ)) {
+ assert(!BBLoop->contains(NewBB) &&
+ "Split point for loop exit is contained in loop!");
+
+ // Update LCSSA form in the newly created exit block.
+ if (Options.PreserveLCSSA) {
+ createPHIsForSplitLoopExit(BB, NewBB, Succ);
+ }
+
+ if (!LoopPreds.empty()) {
+ BasicBlock *NewExitBB = SplitBlockPredecessors(
+ Succ, LoopPreds, "split", DT, LI, MSSAU, Options.PreserveLCSSA);
+ if (Options.PreserveLCSSA)
+ createPHIsForSplitLoopExit(LoopPreds, NewExitBB, Succ);
+ }
+ }
+ }
+ }
+
+ return NewBB;
+}
+
+void llvm::createPHIsForSplitLoopExit(ArrayRef<BasicBlock *> Preds,
+ BasicBlock *SplitBB, BasicBlock *DestBB) {
+ // SplitBB shouldn't have anything non-trivial in it yet.
+ assert((SplitBB->getFirstNonPHI() == SplitBB->getTerminator() ||
+ SplitBB->isLandingPad()) &&
+ "SplitBB has non-PHI nodes!");
+
+ // For each PHI in the destination block.
+ for (PHINode &PN : DestBB->phis()) {
+ int Idx = PN.getBasicBlockIndex(SplitBB);
+ assert(Idx >= 0 && "Invalid Block Index");
+ Value *V = PN.getIncomingValue(Idx);
+
+ // If the input is a PHI which already satisfies LCSSA, don't create
+ // a new one.
+ if (const PHINode *VP = dyn_cast<PHINode>(V))
+ if (VP->getParent() == SplitBB)
+ continue;
+
+ // Otherwise a new PHI is needed. Create one and populate it.
+ PHINode *NewPN = PHINode::Create(
+ PN.getType(), Preds.size(), "split",
+ SplitBB->isLandingPad() ? &SplitBB->front() : SplitBB->getTerminator());
+ for (BasicBlock *BB : Preds)
+ NewPN->addIncoming(V, BB);
+
+ // Update the original PHI.
+ PN.setIncomingValue(Idx, NewPN);
+ }
+}
+
+unsigned
+llvm::SplitAllCriticalEdges(Function &F,
+ const CriticalEdgeSplittingOptions &Options) {
+ unsigned NumBroken = 0;
+ for (BasicBlock &BB : F) {
+ Instruction *TI = BB.getTerminator();
+ if (TI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(TI))
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+ if (SplitCriticalEdge(TI, i, Options))
+ ++NumBroken;
+ }
+ return NumBroken;
+}
+
+static BasicBlock *SplitBlockImpl(BasicBlock *Old, Instruction *SplitPt,
+ DomTreeUpdater *DTU, DominatorTree *DT,
+ LoopInfo *LI, MemorySSAUpdater *MSSAU,
+ const Twine &BBName, bool Before) {
+ if (Before) {
+ DomTreeUpdater LocalDTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
+ return splitBlockBefore(Old, SplitPt,
+ DTU ? DTU : (DT ? &LocalDTU : nullptr), LI, MSSAU,
+ BBName);
+ }
+ BasicBlock::iterator SplitIt = SplitPt->getIterator();
+ while (isa<PHINode>(SplitIt) || SplitIt->isEHPad()) {
+ ++SplitIt;
+ assert(SplitIt != SplitPt->getParent()->end());
+ }
+ std::string Name = BBName.str();
+ BasicBlock *New = Old->splitBasicBlock(
+ SplitIt, Name.empty() ? Old->getName() + ".split" : Name);
+
+ // The new block lives in whichever loop the old one did. This preserves
+ // LCSSA as well, because we force the split point to be after any PHI nodes.
+ if (LI)
+ if (Loop *L = LI->getLoopFor(Old))
+ L->addBasicBlockToLoop(New, *LI);
+
+ if (DTU) {
+ SmallVector<DominatorTree::UpdateType, 8> Updates;
+ // Old dominates New. New node dominates all other nodes dominated by Old.
+ SmallPtrSet<BasicBlock *, 8> UniqueSuccessorsOfOld;
+ Updates.push_back({DominatorTree::Insert, Old, New});
+ Updates.reserve(Updates.size() + 2 * succ_size(New));
+ for (BasicBlock *SuccessorOfOld : successors(New))
+ if (UniqueSuccessorsOfOld.insert(SuccessorOfOld).second) {
+ Updates.push_back({DominatorTree::Insert, New, SuccessorOfOld});
+ Updates.push_back({DominatorTree::Delete, Old, SuccessorOfOld});
+ }
+
+ DTU->applyUpdates(Updates);
+ } else if (DT)
+ // Old dominates New. New node dominates all other nodes dominated by Old.
+ if (DomTreeNode *OldNode = DT->getNode(Old)) {
+ std::vector<DomTreeNode *> Children(OldNode->begin(), OldNode->end());
+
+ DomTreeNode *NewNode = DT->addNewBlock(New, Old);
+ for (DomTreeNode *I : Children)
+ DT->changeImmediateDominator(I, NewNode);
+ }
+
+ // Move MemoryAccesses still tracked in Old, but part of New now.
+ // Update accesses in successor blocks accordingly.
+ if (MSSAU)
+ MSSAU->moveAllAfterSpliceBlocks(Old, New, &*(New->begin()));
+
+ return New;
+}
+
+BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt,
+ DominatorTree *DT, LoopInfo *LI,
+ MemorySSAUpdater *MSSAU, const Twine &BBName,
+ bool Before) {
+ return SplitBlockImpl(Old, SplitPt, /*DTU=*/nullptr, DT, LI, MSSAU, BBName,
+ Before);
+}
+BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt,
+ DomTreeUpdater *DTU, LoopInfo *LI,
+ MemorySSAUpdater *MSSAU, const Twine &BBName,
+ bool Before) {
+ return SplitBlockImpl(Old, SplitPt, DTU, /*DT=*/nullptr, LI, MSSAU, BBName,
+ Before);
+}
+
+BasicBlock *llvm::splitBlockBefore(BasicBlock *Old, Instruction *SplitPt,
+ DomTreeUpdater *DTU, LoopInfo *LI,
+ MemorySSAUpdater *MSSAU,
+ const Twine &BBName) {
+
+ BasicBlock::iterator SplitIt = SplitPt->getIterator();
+ while (isa<PHINode>(SplitIt) || SplitIt->isEHPad())
+ ++SplitIt;
+ std::string Name = BBName.str();
+ BasicBlock *New = Old->splitBasicBlock(
+ SplitIt, Name.empty() ? Old->getName() + ".split" : Name,
+ /* Before=*/true);
+
+ // The new block lives in whichever loop the old one did. This preserves
+ // LCSSA as well, because we force the split point to be after any PHI nodes.
+ if (LI)
+ if (Loop *L = LI->getLoopFor(Old))
+ L->addBasicBlockToLoop(New, *LI);
+
+ if (DTU) {
+ SmallVector<DominatorTree::UpdateType, 8> DTUpdates;
+ // New dominates Old. The predecessor nodes of the Old node dominate
+ // New node.
+ SmallPtrSet<BasicBlock *, 8> UniquePredecessorsOfOld;
+ DTUpdates.push_back({DominatorTree::Insert, New, Old});
+ DTUpdates.reserve(DTUpdates.size() + 2 * pred_size(New));
+ for (BasicBlock *PredecessorOfOld : predecessors(New))
+ if (UniquePredecessorsOfOld.insert(PredecessorOfOld).second) {
+ DTUpdates.push_back({DominatorTree::Insert, PredecessorOfOld, New});
+ DTUpdates.push_back({DominatorTree::Delete, PredecessorOfOld, Old});
+ }
+
+ DTU->applyUpdates(DTUpdates);
+
+ // Move MemoryAccesses still tracked in Old, but part of New now.
+ // Update accesses in successor blocks accordingly.
+ if (MSSAU) {
+ MSSAU->applyUpdates(DTUpdates, DTU->getDomTree());
+ if (VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
+ }
+ }
+ return New;
+}
+
+/// Update DominatorTree, LoopInfo, and LCCSA analysis information.
+static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB,
+ ArrayRef<BasicBlock *> Preds,
+ DomTreeUpdater *DTU, DominatorTree *DT,
+ LoopInfo *LI, MemorySSAUpdater *MSSAU,
+ bool PreserveLCSSA, bool &HasLoopExit) {
+ // Update dominator tree if available.
+ if (DTU) {
+ // Recalculation of DomTree is needed when updating a forward DomTree and
+ // the Entry BB is replaced.
+ if (NewBB->isEntryBlock() && DTU->hasDomTree()) {
+ // The entry block was removed and there is no external interface for
+ // the dominator tree to be notified of this change. In this corner-case
+ // we recalculate the entire tree.
+ DTU->recalculate(*NewBB->getParent());
+ } else {
+ // Split block expects NewBB to have a non-empty set of predecessors.
+ SmallVector<DominatorTree::UpdateType, 8> Updates;
+ SmallPtrSet<BasicBlock *, 8> UniquePreds;
+ Updates.push_back({DominatorTree::Insert, NewBB, OldBB});
+ Updates.reserve(Updates.size() + 2 * Preds.size());
+ for (auto *Pred : Preds)
+ if (UniquePreds.insert(Pred).second) {
+ Updates.push_back({DominatorTree::Insert, Pred, NewBB});
+ Updates.push_back({DominatorTree::Delete, Pred, OldBB});
+ }
+ DTU->applyUpdates(Updates);
+ }
+ } else if (DT) {
+ if (OldBB == DT->getRootNode()->getBlock()) {
+ assert(NewBB->isEntryBlock());
+ DT->setNewRoot(NewBB);
+ } else {
+ // Split block expects NewBB to have a non-empty set of predecessors.
+ DT->splitBlock(NewBB);
+ }
+ }
+
+ // Update MemoryPhis after split if MemorySSA is available
+ if (MSSAU)
+ MSSAU->wireOldPredecessorsToNewImmediatePredecessor(OldBB, NewBB, Preds);
+
+ // The rest of the logic is only relevant for updating the loop structures.
+ if (!LI)
+ return;
+
+ if (DTU && DTU->hasDomTree())
+ DT = &DTU->getDomTree();
+ assert(DT && "DT should be available to update LoopInfo!");
+ Loop *L = LI->getLoopFor(OldBB);
+
+ // If we need to preserve loop analyses, collect some information about how
+ // this split will affect loops.
+ bool IsLoopEntry = !!L;
+ bool SplitMakesNewLoopHeader = false;
+ for (BasicBlock *Pred : Preds) {
+ // Preds that are not reachable from entry should not be used to identify if
+ // OldBB is a loop entry or if SplitMakesNewLoopHeader. Unreachable blocks
+ // are not within any loops, so we incorrectly mark SplitMakesNewLoopHeader
+ // as true and make the NewBB the header of some loop. This breaks LI.
+ if (!DT->isReachableFromEntry(Pred))
+ continue;
+ // If we need to preserve LCSSA, determine if any of the preds is a loop
+ // exit.
+ if (PreserveLCSSA)
+ if (Loop *PL = LI->getLoopFor(Pred))
+ if (!PL->contains(OldBB))
+ HasLoopExit = true;
+
+ // If we need to preserve LoopInfo, note whether any of the preds crosses
+ // an interesting loop boundary.
+ if (!L)
+ continue;
+ if (L->contains(Pred))
+ IsLoopEntry = false;
+ else
+ SplitMakesNewLoopHeader = true;
+ }
+
+ // Unless we have a loop for OldBB, nothing else to do here.
+ if (!L)
+ return;
+
+ if (IsLoopEntry) {
+ // Add the new block to the nearest enclosing loop (and not an adjacent
+ // loop). To find this, examine each of the predecessors and determine which
+ // loops enclose them, and select the most-nested loop which contains the
+ // loop containing the block being split.
+ Loop *InnermostPredLoop = nullptr;
+ for (BasicBlock *Pred : Preds) {
+ if (Loop *PredLoop = LI->getLoopFor(Pred)) {
+ // Seek a loop which actually contains the block being split (to avoid
+ // adjacent loops).
+ while (PredLoop && !PredLoop->contains(OldBB))
+ PredLoop = PredLoop->getParentLoop();
+
+ // Select the most-nested of these loops which contains the block.
+ if (PredLoop && PredLoop->contains(OldBB) &&
+ (!InnermostPredLoop ||
+ InnermostPredLoop->getLoopDepth() < PredLoop->getLoopDepth()))
+ InnermostPredLoop = PredLoop;
+ }
+ }
+
+ if (InnermostPredLoop)
+ InnermostPredLoop->addBasicBlockToLoop(NewBB, *LI);
+ } else {
+ L->addBasicBlockToLoop(NewBB, *LI);
+ if (SplitMakesNewLoopHeader)
+ L->moveToHeader(NewBB);
+ }
+}
+
+/// Update the PHI nodes in OrigBB to include the values coming from NewBB.
+/// This also updates AliasAnalysis, if available.
+static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB,
+ ArrayRef<BasicBlock *> Preds, BranchInst *BI,
+ bool HasLoopExit) {
+ // Otherwise, create a new PHI node in NewBB for each PHI node in OrigBB.
+ SmallPtrSet<BasicBlock *, 16> PredSet(Preds.begin(), Preds.end());
+ for (BasicBlock::iterator I = OrigBB->begin(); isa<PHINode>(I); ) {
+ PHINode *PN = cast<PHINode>(I++);
+
+ // Check to see if all of the values coming in are the same. If so, we
+ // don't need to create a new PHI node, unless it's needed for LCSSA.
+ Value *InVal = nullptr;
+ if (!HasLoopExit) {
+ InVal = PN->getIncomingValueForBlock(Preds[0]);
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ if (!PredSet.count(PN->getIncomingBlock(i)))
+ continue;
+ if (!InVal)
+ InVal = PN->getIncomingValue(i);
+ else if (InVal != PN->getIncomingValue(i)) {
+ InVal = nullptr;
+ break;
+ }
+ }
+ }
+
+ if (InVal) {
+ // If all incoming values for the new PHI would be the same, just don't
+ // make a new PHI. Instead, just remove the incoming values from the old
+ // PHI.
+
+ // NOTE! This loop walks backwards for a reason! First off, this minimizes
+ // the cost of removal if we end up removing a large number of values, and
+ // second off, this ensures that the indices for the incoming values
+ // aren't invalidated when we remove one.
+ for (int64_t i = PN->getNumIncomingValues() - 1; i >= 0; --i)
+ if (PredSet.count(PN->getIncomingBlock(i)))
+ PN->removeIncomingValue(i, false);
+
+ // Add an incoming value to the PHI node in the loop for the preheader
+ // edge.
+ PN->addIncoming(InVal, NewBB);
+ continue;
+ }
+
+ // If the values coming into the block are not the same, we need a new
+ // PHI.
+ // Create the new PHI node, insert it into NewBB at the end of the block
+ PHINode *NewPHI =
+ PHINode::Create(PN->getType(), Preds.size(), PN->getName() + ".ph", BI);
+
+ // NOTE! This loop walks backwards for a reason! First off, this minimizes
+ // the cost of removal if we end up removing a large number of values, and
+ // second off, this ensures that the indices for the incoming values aren't
+ // invalidated when we remove one.
+ for (int64_t i = PN->getNumIncomingValues() - 1; i >= 0; --i) {
+ BasicBlock *IncomingBB = PN->getIncomingBlock(i);
+ if (PredSet.count(IncomingBB)) {
+ Value *V = PN->removeIncomingValue(i, false);
+ NewPHI->addIncoming(V, IncomingBB);
+ }
+ }
+
+ PN->addIncoming(NewPHI, NewBB);
+ }
+}
+
+static void SplitLandingPadPredecessorsImpl(
+ BasicBlock *OrigBB, ArrayRef<BasicBlock *> Preds, const char *Suffix1,
+ const char *Suffix2, SmallVectorImpl<BasicBlock *> &NewBBs,
+ DomTreeUpdater *DTU, DominatorTree *DT, LoopInfo *LI,
+ MemorySSAUpdater *MSSAU, bool PreserveLCSSA);
+
+static BasicBlock *
+SplitBlockPredecessorsImpl(BasicBlock *BB, ArrayRef<BasicBlock *> Preds,
+ const char *Suffix, DomTreeUpdater *DTU,
+ DominatorTree *DT, LoopInfo *LI,
+ MemorySSAUpdater *MSSAU, bool PreserveLCSSA) {
+ // Do not attempt to split that which cannot be split.
+ if (!BB->canSplitPredecessors())
+ return nullptr;
+
+ // For the landingpads we need to act a bit differently.
+ // Delegate this work to the SplitLandingPadPredecessors.
+ if (BB->isLandingPad()) {
+ SmallVector<BasicBlock*, 2> NewBBs;
+ std::string NewName = std::string(Suffix) + ".split-lp";
+
+ SplitLandingPadPredecessorsImpl(BB, Preds, Suffix, NewName.c_str(), NewBBs,
+ DTU, DT, LI, MSSAU, PreserveLCSSA);
+ return NewBBs[0];
+ }
+
+ // Create new basic block, insert right before the original block.
+ BasicBlock *NewBB = BasicBlock::Create(
+ BB->getContext(), BB->getName() + Suffix, BB->getParent(), BB);
+
+ // The new block unconditionally branches to the old block.
+ BranchInst *BI = BranchInst::Create(BB, NewBB);
+
+ Loop *L = nullptr;
+ BasicBlock *OldLatch = nullptr;
+ // Splitting the predecessors of a loop header creates a preheader block.
+ if (LI && LI->isLoopHeader(BB)) {
+ L = LI->getLoopFor(BB);
+ // Using the loop start line number prevents debuggers stepping into the
+ // loop body for this instruction.
+ BI->setDebugLoc(L->getStartLoc());
+
+ // If BB is the header of the Loop, it is possible that the loop is
+ // modified, such that the current latch does not remain the latch of the
+ // loop. If that is the case, the loop metadata from the current latch needs
+ // to be applied to the new latch.
+ OldLatch = L->getLoopLatch();
+ } else
+ BI->setDebugLoc(BB->getFirstNonPHIOrDbg()->getDebugLoc());
+
+ // Move the edges from Preds to point to NewBB instead of BB.
+ for (BasicBlock *Pred : Preds) {
+ // This is slightly more strict than necessary; the minimum requirement
+ // is that there be no more than one indirectbr branching to BB. And
+ // all BlockAddress uses would need to be updated.
+ assert(!isa<IndirectBrInst>(Pred->getTerminator()) &&
+ "Cannot split an edge from an IndirectBrInst");
+ Pred->getTerminator()->replaceSuccessorWith(BB, NewBB);
+ }
+
+ // Insert a new PHI node into NewBB for every PHI node in BB and that new PHI
+ // node becomes an incoming value for BB's phi node. However, if the Preds
+ // list is empty, we need to insert dummy entries into the PHI nodes in BB to
+ // account for the newly created predecessor.
+ if (Preds.empty()) {
+ // Insert dummy values as the incoming value.
+ for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++I)
+ cast<PHINode>(I)->addIncoming(PoisonValue::get(I->getType()), NewBB);
+ }
+
+ // Update DominatorTree, LoopInfo, and LCCSA analysis information.
+ bool HasLoopExit = false;
+ UpdateAnalysisInformation(BB, NewBB, Preds, DTU, DT, LI, MSSAU, PreserveLCSSA,
+ HasLoopExit);
+
+ if (!Preds.empty()) {
+ // Update the PHI nodes in BB with the values coming from NewBB.
+ UpdatePHINodes(BB, NewBB, Preds, BI, HasLoopExit);
+ }
+
+ if (OldLatch) {
+ BasicBlock *NewLatch = L->getLoopLatch();
+ if (NewLatch != OldLatch) {
+ MDNode *MD = OldLatch->getTerminator()->getMetadata("llvm.loop");
+ NewLatch->getTerminator()->setMetadata("llvm.loop", MD);
+ // It's still possible that OldLatch is the latch of another inner loop,
+ // in which case we do not remove the metadata.
+ Loop *IL = LI->getLoopFor(OldLatch);
+ if (IL && IL->getLoopLatch() != OldLatch)
+ OldLatch->getTerminator()->setMetadata("llvm.loop", nullptr);
+ }
+ }
+
+ return NewBB;
+}
+
+BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
+ ArrayRef<BasicBlock *> Preds,
+ const char *Suffix, DominatorTree *DT,
+ LoopInfo *LI, MemorySSAUpdater *MSSAU,
+ bool PreserveLCSSA) {
+ return SplitBlockPredecessorsImpl(BB, Preds, Suffix, /*DTU=*/nullptr, DT, LI,
+ MSSAU, PreserveLCSSA);
+}
+BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
+ ArrayRef<BasicBlock *> Preds,
+ const char *Suffix,
+ DomTreeUpdater *DTU, LoopInfo *LI,
+ MemorySSAUpdater *MSSAU,
+ bool PreserveLCSSA) {
+ return SplitBlockPredecessorsImpl(BB, Preds, Suffix, DTU,
+ /*DT=*/nullptr, LI, MSSAU, PreserveLCSSA);
+}
+
+static void SplitLandingPadPredecessorsImpl(
+ BasicBlock *OrigBB, ArrayRef<BasicBlock *> Preds, const char *Suffix1,
+ const char *Suffix2, SmallVectorImpl<BasicBlock *> &NewBBs,
+ DomTreeUpdater *DTU, DominatorTree *DT, LoopInfo *LI,
+ MemorySSAUpdater *MSSAU, bool PreserveLCSSA) {
+ assert(OrigBB->isLandingPad() && "Trying to split a non-landing pad!");
+
+ // Create a new basic block for OrigBB's predecessors listed in Preds. Insert
+ // it right before the original block.
+ BasicBlock *NewBB1 = BasicBlock::Create(OrigBB->getContext(),
+ OrigBB->getName() + Suffix1,
+ OrigBB->getParent(), OrigBB);
+ NewBBs.push_back(NewBB1);
+
+ // The new block unconditionally branches to the old block.
+ BranchInst *BI1 = BranchInst::Create(OrigBB, NewBB1);
+ BI1->setDebugLoc(OrigBB->getFirstNonPHI()->getDebugLoc());
+
+ // Move the edges from Preds to point to NewBB1 instead of OrigBB.
+ for (BasicBlock *Pred : Preds) {
+ // This is slightly more strict than necessary; the minimum requirement
+ // is that there be no more than one indirectbr branching to BB. And
+ // all BlockAddress uses would need to be updated.
+ assert(!isa<IndirectBrInst>(Pred->getTerminator()) &&
+ "Cannot split an edge from an IndirectBrInst");
+ Pred->getTerminator()->replaceUsesOfWith(OrigBB, NewBB1);
+ }
+
+ bool HasLoopExit = false;
+ UpdateAnalysisInformation(OrigBB, NewBB1, Preds, DTU, DT, LI, MSSAU,
+ PreserveLCSSA, HasLoopExit);
+
+ // Update the PHI nodes in OrigBB with the values coming from NewBB1.
+ UpdatePHINodes(OrigBB, NewBB1, Preds, BI1, HasLoopExit);
+
+ // Move the remaining edges from OrigBB to point to NewBB2.
+ SmallVector<BasicBlock*, 8> NewBB2Preds;
+ for (pred_iterator i = pred_begin(OrigBB), e = pred_end(OrigBB);
+ i != e; ) {
+ BasicBlock *Pred = *i++;
+ if (Pred == NewBB1) continue;
+ assert(!isa<IndirectBrInst>(Pred->getTerminator()) &&
+ "Cannot split an edge from an IndirectBrInst");
+ NewBB2Preds.push_back(Pred);
+ e = pred_end(OrigBB);
+ }
+
+ BasicBlock *NewBB2 = nullptr;
+ if (!NewBB2Preds.empty()) {
+ // Create another basic block for the rest of OrigBB's predecessors.
+ NewBB2 = BasicBlock::Create(OrigBB->getContext(),
+ OrigBB->getName() + Suffix2,
+ OrigBB->getParent(), OrigBB);
+ NewBBs.push_back(NewBB2);
+
+ // The new block unconditionally branches to the old block.
+ BranchInst *BI2 = BranchInst::Create(OrigBB, NewBB2);
+ BI2->setDebugLoc(OrigBB->getFirstNonPHI()->getDebugLoc());
+
+ // Move the remaining edges from OrigBB to point to NewBB2.
+ for (BasicBlock *NewBB2Pred : NewBB2Preds)
+ NewBB2Pred->getTerminator()->replaceUsesOfWith(OrigBB, NewBB2);
+
+ // Update DominatorTree, LoopInfo, and LCCSA analysis information.
+ HasLoopExit = false;
+ UpdateAnalysisInformation(OrigBB, NewBB2, NewBB2Preds, DTU, DT, LI, MSSAU,
+ PreserveLCSSA, HasLoopExit);
+
+ // Update the PHI nodes in OrigBB with the values coming from NewBB2.
+ UpdatePHINodes(OrigBB, NewBB2, NewBB2Preds, BI2, HasLoopExit);
+ }
+
+ LandingPadInst *LPad = OrigBB->getLandingPadInst();
+ Instruction *Clone1 = LPad->clone();
+ Clone1->setName(Twine("lpad") + Suffix1);
+ Clone1->insertInto(NewBB1, NewBB1->getFirstInsertionPt());
+
+ if (NewBB2) {
+ Instruction *Clone2 = LPad->clone();
+ Clone2->setName(Twine("lpad") + Suffix2);
+ Clone2->insertInto(NewBB2, NewBB2->getFirstInsertionPt());
+
+ // Create a PHI node for the two cloned landingpad instructions only
+ // if the original landingpad instruction has some uses.
+ if (!LPad->use_empty()) {
+ assert(!LPad->getType()->isTokenTy() &&
+ "Split cannot be applied if LPad is token type. Otherwise an "
+ "invalid PHINode of token type would be created.");
+ PHINode *PN = PHINode::Create(LPad->getType(), 2, "lpad.phi", LPad);
+ PN->addIncoming(Clone1, NewBB1);
+ PN->addIncoming(Clone2, NewBB2);
+ LPad->replaceAllUsesWith(PN);
+ }
+ LPad->eraseFromParent();
+ } else {
+ // There is no second clone. Just replace the landing pad with the first
+ // clone.
+ LPad->replaceAllUsesWith(Clone1);
+ LPad->eraseFromParent();
+ }
+}
+
+void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
+ ArrayRef<BasicBlock *> Preds,
+ const char *Suffix1, const char *Suffix2,
+ SmallVectorImpl<BasicBlock *> &NewBBs,
+ DominatorTree *DT, LoopInfo *LI,
+ MemorySSAUpdater *MSSAU,
+ bool PreserveLCSSA) {
+ return SplitLandingPadPredecessorsImpl(
+ OrigBB, Preds, Suffix1, Suffix2, NewBBs,
+ /*DTU=*/nullptr, DT, LI, MSSAU, PreserveLCSSA);
+}
+void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
+ ArrayRef<BasicBlock *> Preds,
+ const char *Suffix1, const char *Suffix2,
+ SmallVectorImpl<BasicBlock *> &NewBBs,
+ DomTreeUpdater *DTU, LoopInfo *LI,
+ MemorySSAUpdater *MSSAU,
+ bool PreserveLCSSA) {
+ return SplitLandingPadPredecessorsImpl(OrigBB, Preds, Suffix1, Suffix2,
+ NewBBs, DTU, /*DT=*/nullptr, LI, MSSAU,
+ PreserveLCSSA);
+}
+
+ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
+ BasicBlock *Pred,
+ DomTreeUpdater *DTU) {
+ Instruction *UncondBranch = Pred->getTerminator();
+ // Clone the return and add it to the end of the predecessor.
+ Instruction *NewRet = RI->clone();
+ NewRet->insertInto(Pred, Pred->end());
+
+ // If the return instruction returns a value, and if the value was a
+ // PHI node in "BB", propagate the right value into the return.
+ for (Use &Op : NewRet->operands()) {
+ Value *V = Op;
+ Instruction *NewBC = nullptr;
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(V)) {
+ // Return value might be bitcasted. Clone and insert it before the
+ // return instruction.
+ V = BCI->getOperand(0);
+ NewBC = BCI->clone();
+ NewBC->insertInto(Pred, NewRet->getIterator());
+ Op = NewBC;
+ }
+
+ Instruction *NewEV = nullptr;
+ if (ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(V)) {
+ V = EVI->getOperand(0);
+ NewEV = EVI->clone();
+ if (NewBC) {
+ NewBC->setOperand(0, NewEV);
+ NewEV->insertInto(Pred, NewBC->getIterator());
+ } else {
+ NewEV->insertInto(Pred, NewRet->getIterator());
+ Op = NewEV;
+ }
+ }
+
+ if (PHINode *PN = dyn_cast<PHINode>(V)) {
+ if (PN->getParent() == BB) {
+ if (NewEV) {
+ NewEV->setOperand(0, PN->getIncomingValueForBlock(Pred));
+ } else if (NewBC)
+ NewBC->setOperand(0, PN->getIncomingValueForBlock(Pred));
+ else
+ Op = PN->getIncomingValueForBlock(Pred);
+ }
+ }
+ }
+
+ // Update any PHI nodes in the returning block to realize that we no
+ // longer branch to them.
+ BB->removePredecessor(Pred);
+ UncondBranch->eraseFromParent();
+
+ if (DTU)
+ DTU->applyUpdates({{DominatorTree::Delete, Pred, BB}});
+
+ return cast<ReturnInst>(NewRet);
+}
+
+static Instruction *
+SplitBlockAndInsertIfThenImpl(Value *Cond, Instruction *SplitBefore,
+ bool Unreachable, MDNode *BranchWeights,
+ DomTreeUpdater *DTU, DominatorTree *DT,
+ LoopInfo *LI, BasicBlock *ThenBlock) {
+ SmallVector<DominatorTree::UpdateType, 8> Updates;
+ BasicBlock *Head = SplitBefore->getParent();
+ BasicBlock *Tail = Head->splitBasicBlock(SplitBefore->getIterator());
+ if (DTU) {
+ SmallPtrSet<BasicBlock *, 8> UniqueSuccessorsOfHead;
+ Updates.push_back({DominatorTree::Insert, Head, Tail});
+ Updates.reserve(Updates.size() + 2 * succ_size(Tail));
+ for (BasicBlock *SuccessorOfHead : successors(Tail))
+ if (UniqueSuccessorsOfHead.insert(SuccessorOfHead).second) {
+ Updates.push_back({DominatorTree::Insert, Tail, SuccessorOfHead});
+ Updates.push_back({DominatorTree::Delete, Head, SuccessorOfHead});
+ }
+ }
+ Instruction *HeadOldTerm = Head->getTerminator();
+ LLVMContext &C = Head->getContext();
+ Instruction *CheckTerm;
+ bool CreateThenBlock = (ThenBlock == nullptr);
+ if (CreateThenBlock) {
+ ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail);
+ if (Unreachable)
+ CheckTerm = new UnreachableInst(C, ThenBlock);
+ else {
+ CheckTerm = BranchInst::Create(Tail, ThenBlock);
+ if (DTU)
+ Updates.push_back({DominatorTree::Insert, ThenBlock, Tail});
+ }
+ CheckTerm->setDebugLoc(SplitBefore->getDebugLoc());
+ } else
+ CheckTerm = ThenBlock->getTerminator();
+ BranchInst *HeadNewTerm =
+ BranchInst::Create(/*ifTrue*/ ThenBlock, /*ifFalse*/ Tail, Cond);
+ if (DTU)
+ Updates.push_back({DominatorTree::Insert, Head, ThenBlock});
+ HeadNewTerm->setMetadata(LLVMContext::MD_prof, BranchWeights);
+ ReplaceInstWithInst(HeadOldTerm, HeadNewTerm);
+
+ if (DTU)
+ DTU->applyUpdates(Updates);
+ else if (DT) {
+ if (DomTreeNode *OldNode = DT->getNode(Head)) {
+ std::vector<DomTreeNode *> Children(OldNode->begin(), OldNode->end());
+
+ DomTreeNode *NewNode = DT->addNewBlock(Tail, Head);
+ for (DomTreeNode *Child : Children)
+ DT->changeImmediateDominator(Child, NewNode);
+
+ // Head dominates ThenBlock.
+ if (CreateThenBlock)
+ DT->addNewBlock(ThenBlock, Head);
+ else
+ DT->changeImmediateDominator(ThenBlock, Head);
+ }
+ }
+
+ if (LI) {
+ if (Loop *L = LI->getLoopFor(Head)) {
+ L->addBasicBlockToLoop(ThenBlock, *LI);
+ L->addBasicBlockToLoop(Tail, *LI);
+ }
+ }
+
+ return CheckTerm;
+}
+
+Instruction *llvm::SplitBlockAndInsertIfThen(Value *Cond,
+ Instruction *SplitBefore,
+ bool Unreachable,
+ MDNode *BranchWeights,
+ DominatorTree *DT, LoopInfo *LI,
+ BasicBlock *ThenBlock) {
+ return SplitBlockAndInsertIfThenImpl(Cond, SplitBefore, Unreachable,
+ BranchWeights,
+ /*DTU=*/nullptr, DT, LI, ThenBlock);
+}
+Instruction *llvm::SplitBlockAndInsertIfThen(Value *Cond,
+ Instruction *SplitBefore,
+ bool Unreachable,
+ MDNode *BranchWeights,
+ DomTreeUpdater *DTU, LoopInfo *LI,
+ BasicBlock *ThenBlock) {
+ return SplitBlockAndInsertIfThenImpl(Cond, SplitBefore, Unreachable,
+ BranchWeights, DTU, /*DT=*/nullptr, LI,
+ ThenBlock);
+}
+
+void llvm::SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore,
+ Instruction **ThenTerm,
+ Instruction **ElseTerm,
+ MDNode *BranchWeights,
+ DomTreeUpdater *DTU) {
+ BasicBlock *Head = SplitBefore->getParent();
+
+ SmallPtrSet<BasicBlock *, 8> UniqueOrigSuccessors;
+ if (DTU)
+ UniqueOrigSuccessors.insert(succ_begin(Head), succ_end(Head));
+
+ BasicBlock *Tail = Head->splitBasicBlock(SplitBefore->getIterator());
+ Instruction *HeadOldTerm = Head->getTerminator();
+ LLVMContext &C = Head->getContext();
+ BasicBlock *ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail);
+ BasicBlock *ElseBlock = BasicBlock::Create(C, "", Head->getParent(), Tail);
+ *ThenTerm = BranchInst::Create(Tail, ThenBlock);
+ (*ThenTerm)->setDebugLoc(SplitBefore->getDebugLoc());
+ *ElseTerm = BranchInst::Create(Tail, ElseBlock);
+ (*ElseTerm)->setDebugLoc(SplitBefore->getDebugLoc());
+ BranchInst *HeadNewTerm =
+ BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/ElseBlock, Cond);
+ HeadNewTerm->setMetadata(LLVMContext::MD_prof, BranchWeights);
+ ReplaceInstWithInst(HeadOldTerm, HeadNewTerm);
+ if (DTU) {
+ SmallVector<DominatorTree::UpdateType, 8> Updates;
+ Updates.reserve(4 + 2 * UniqueOrigSuccessors.size());
+ for (BasicBlock *Succ : successors(Head)) {
+ Updates.push_back({DominatorTree::Insert, Head, Succ});
+ Updates.push_back({DominatorTree::Insert, Succ, Tail});
+ }
+ for (BasicBlock *UniqueOrigSuccessor : UniqueOrigSuccessors)
+ Updates.push_back({DominatorTree::Insert, Tail, UniqueOrigSuccessor});
+ for (BasicBlock *UniqueOrigSuccessor : UniqueOrigSuccessors)
+ Updates.push_back({DominatorTree::Delete, Head, UniqueOrigSuccessor});
+ DTU->applyUpdates(Updates);
+ }
+}
+
+BranchInst *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
+ BasicBlock *&IfFalse) {
+ PHINode *SomePHI = dyn_cast<PHINode>(BB->begin());
+ BasicBlock *Pred1 = nullptr;
+ BasicBlock *Pred2 = nullptr;
+
+ if (SomePHI) {
+ if (SomePHI->getNumIncomingValues() != 2)
+ return nullptr;
+ Pred1 = SomePHI->getIncomingBlock(0);
+ Pred2 = SomePHI->getIncomingBlock(1);
+ } else {
+ pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+ if (PI == PE) // No predecessor
+ return nullptr;
+ Pred1 = *PI++;
+ if (PI == PE) // Only one predecessor
+ return nullptr;
+ Pred2 = *PI++;
+ if (PI != PE) // More than two predecessors
+ return nullptr;
+ }
+
+ // We can only handle branches. Other control flow will be lowered to
+ // branches if possible anyway.
+ BranchInst *Pred1Br = dyn_cast<BranchInst>(Pred1->getTerminator());
+ BranchInst *Pred2Br = dyn_cast<BranchInst>(Pred2->getTerminator());
+ if (!Pred1Br || !Pred2Br)
+ return nullptr;
+
+ // Eliminate code duplication by ensuring that Pred1Br is conditional if
+ // either are.
+ if (Pred2Br->isConditional()) {
+ // If both branches are conditional, we don't have an "if statement". In
+ // reality, we could transform this case, but since the condition will be
+ // required anyway, we stand no chance of eliminating it, so the xform is
+ // probably not profitable.
+ if (Pred1Br->isConditional())
+ return nullptr;
+
+ std::swap(Pred1, Pred2);
+ std::swap(Pred1Br, Pred2Br);
+ }
+
+ if (Pred1Br->isConditional()) {
+ // The only thing we have to watch out for here is to make sure that Pred2
+ // doesn't have incoming edges from other blocks. If it does, the condition
+ // doesn't dominate BB.
+ if (!Pred2->getSinglePredecessor())
+ return nullptr;
+
+ // If we found a conditional branch predecessor, make sure that it branches
+ // to BB and Pred2Br. If it doesn't, this isn't an "if statement".
+ if (Pred1Br->getSuccessor(0) == BB &&
+ Pred1Br->getSuccessor(1) == Pred2) {
+ IfTrue = Pred1;
+ IfFalse = Pred2;
+ } else if (Pred1Br->getSuccessor(0) == Pred2 &&
+ Pred1Br->getSuccessor(1) == BB) {
+ IfTrue = Pred2;
+ IfFalse = Pred1;
+ } else {
+ // We know that one arm of the conditional goes to BB, so the other must
+ // go somewhere unrelated, and this must not be an "if statement".
+ return nullptr;
+ }
+
+ return Pred1Br;
+ }
+
+ // Ok, if we got here, both predecessors end with an unconditional branch to
+ // BB. Don't panic! If both blocks only have a single (identical)
+ // predecessor, and THAT is a conditional branch, then we're all ok!
+ BasicBlock *CommonPred = Pred1->getSinglePredecessor();
+ if (CommonPred == nullptr || CommonPred != Pred2->getSinglePredecessor())
+ return nullptr;
+
+ // Otherwise, if this is a conditional branch, then we can use it!
+ BranchInst *BI = dyn_cast<BranchInst>(CommonPred->getTerminator());
+ if (!BI) return nullptr;
+
+ assert(BI->isConditional() && "Two successors but not conditional?");
+ if (BI->getSuccessor(0) == Pred1) {
+ IfTrue = Pred1;
+ IfFalse = Pred2;
+ } else {
+ IfTrue = Pred2;
+ IfFalse = Pred1;
+ }
+ return BI;
+}
+
+// After creating a control flow hub, the operands of PHINodes in an outgoing
+// block Out no longer match the predecessors of that block. Predecessors of Out
+// that are incoming blocks to the hub are now replaced by just one edge from
+// the hub. To match this new control flow, the corresponding values from each
+// PHINode must now be moved a new PHINode in the first guard block of the hub.
+//
+// This operation cannot be performed with SSAUpdater, because it involves one
+// new use: If the block Out is in the list of Incoming blocks, then the newly
+// created PHI in the Hub will use itself along that edge from Out to Hub.
+static void reconnectPhis(BasicBlock *Out, BasicBlock *GuardBlock,
+ const SetVector<BasicBlock *> &Incoming,
+ BasicBlock *FirstGuardBlock) {
+ auto I = Out->begin();
+ while (I != Out->end() && isa<PHINode>(I)) {
+ auto Phi = cast<PHINode>(I);
+ auto NewPhi =
+ PHINode::Create(Phi->getType(), Incoming.size(),
+ Phi->getName() + ".moved", &FirstGuardBlock->front());
+ for (auto *In : Incoming) {
+ Value *V = UndefValue::get(Phi->getType());
+ if (In == Out) {
+ V = NewPhi;
+ } else if (Phi->getBasicBlockIndex(In) != -1) {
+ V = Phi->removeIncomingValue(In, false);
+ }
+ NewPhi->addIncoming(V, In);
+ }
+ assert(NewPhi->getNumIncomingValues() == Incoming.size());
+ if (Phi->getNumOperands() == 0) {
+ Phi->replaceAllUsesWith(NewPhi);
+ I = Phi->eraseFromParent();
+ continue;
+ }
+ Phi->addIncoming(NewPhi, GuardBlock);
+ ++I;
+ }
+}
+
+using BBPredicates = DenseMap<BasicBlock *, Instruction *>;
+using BBSetVector = SetVector<BasicBlock *>;
+
+// Redirects the terminator of the incoming block to the first guard
+// block in the hub. The condition of the original terminator (if it
+// was conditional) and its original successors are returned as a
+// tuple <condition, succ0, succ1>. The function additionally filters
+// out successors that are not in the set of outgoing blocks.
+//
+// - condition is non-null iff the branch is conditional.
+// - Succ1 is non-null iff the sole/taken target is an outgoing block.
+// - Succ2 is non-null iff condition is non-null and the fallthrough
+// target is an outgoing block.
+static std::tuple<Value *, BasicBlock *, BasicBlock *>
+redirectToHub(BasicBlock *BB, BasicBlock *FirstGuardBlock,
+ const BBSetVector &Outgoing) {
+ assert(isa<BranchInst>(BB->getTerminator()) &&
+ "Only support branch terminator.");
+ auto Branch = cast<BranchInst>(BB->getTerminator());
+ auto Condition = Branch->isConditional() ? Branch->getCondition() : nullptr;
+
+ BasicBlock *Succ0 = Branch->getSuccessor(0);
+ BasicBlock *Succ1 = nullptr;
+ Succ0 = Outgoing.count(Succ0) ? Succ0 : nullptr;
+
+ if (Branch->isUnconditional()) {
+ Branch->setSuccessor(0, FirstGuardBlock);
+ assert(Succ0);
+ } else {
+ Succ1 = Branch->getSuccessor(1);
+ Succ1 = Outgoing.count(Succ1) ? Succ1 : nullptr;
+ assert(Succ0 || Succ1);
+ if (Succ0 && !Succ1) {
+ Branch->setSuccessor(0, FirstGuardBlock);
+ } else if (Succ1 && !Succ0) {
+ Branch->setSuccessor(1, FirstGuardBlock);
+ } else {
+ Branch->eraseFromParent();
+ BranchInst::Create(FirstGuardBlock, BB);
+ }
+ }
+
+ assert(Succ0 || Succ1);
+ return std::make_tuple(Condition, Succ0, Succ1);
+}
+// Setup the branch instructions for guard blocks.
+//
+// Each guard block terminates in a conditional branch that transfers
+// control to the corresponding outgoing block or the next guard
+// block. The last guard block has two outgoing blocks as successors
+// since the condition for the final outgoing block is trivially
+// true. So we create one less block (including the first guard block)
+// than the number of outgoing blocks.
+static void setupBranchForGuard(SmallVectorImpl<BasicBlock *> &GuardBlocks,
+ const BBSetVector &Outgoing,
+ BBPredicates &GuardPredicates) {
+ // To help keep the loop simple, temporarily append the last
+ // outgoing block to the list of guard blocks.
+ GuardBlocks.push_back(Outgoing.back());
+
+ for (int i = 0, e = GuardBlocks.size() - 1; i != e; ++i) {
+ auto Out = Outgoing[i];
+ assert(GuardPredicates.count(Out));
+ BranchInst::Create(Out, GuardBlocks[i + 1], GuardPredicates[Out],
+ GuardBlocks[i]);
+ }
+
+ // Remove the last block from the guard list.
+ GuardBlocks.pop_back();
+}
+
+/// We are using one integer to represent the block we are branching to. Then at
+/// each guard block, the predicate was calcuated using a simple `icmp eq`.
+static void calcPredicateUsingInteger(
+ const BBSetVector &Incoming, const BBSetVector &Outgoing,
+ SmallVectorImpl<BasicBlock *> &GuardBlocks, BBPredicates &GuardPredicates) {
+ auto &Context = Incoming.front()->getContext();
+ auto FirstGuardBlock = GuardBlocks.front();
+
+ auto Phi = PHINode::Create(Type::getInt32Ty(Context), Incoming.size(),
+ "merged.bb.idx", FirstGuardBlock);
+
+ for (auto In : Incoming) {
+ Value *Condition;
+ BasicBlock *Succ0;
+ BasicBlock *Succ1;
+ std::tie(Condition, Succ0, Succ1) =
+ redirectToHub(In, FirstGuardBlock, Outgoing);
+ Value *IncomingId = nullptr;
+ if (Succ0 && Succ1) {
+ // target_bb_index = Condition ? index_of_succ0 : index_of_succ1.
+ auto Succ0Iter = find(Outgoing, Succ0);
+ auto Succ1Iter = find(Outgoing, Succ1);
+ Value *Id0 = ConstantInt::get(Type::getInt32Ty(Context),
+ std::distance(Outgoing.begin(), Succ0Iter));
+ Value *Id1 = ConstantInt::get(Type::getInt32Ty(Context),
+ std::distance(Outgoing.begin(), Succ1Iter));
+ IncomingId = SelectInst::Create(Condition, Id0, Id1, "target.bb.idx",
+ In->getTerminator());
+ } else {
+ // Get the index of the non-null successor.
+ auto SuccIter = Succ0 ? find(Outgoing, Succ0) : find(Outgoing, Succ1);
+ IncomingId = ConstantInt::get(Type::getInt32Ty(Context),
+ std::distance(Outgoing.begin(), SuccIter));
+ }
+ Phi->addIncoming(IncomingId, In);
+ }
+
+ for (int i = 0, e = Outgoing.size() - 1; i != e; ++i) {
+ auto Out = Outgoing[i];
+ auto Cmp = ICmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_EQ, Phi,
+ ConstantInt::get(Type::getInt32Ty(Context), i),
+ Out->getName() + ".predicate", GuardBlocks[i]);
+ GuardPredicates[Out] = Cmp;
+ }
+}
+
+/// We record the predicate of each outgoing block using a phi of boolean.
+static void calcPredicateUsingBooleans(
+ const BBSetVector &Incoming, const BBSetVector &Outgoing,
+ SmallVectorImpl<BasicBlock *> &GuardBlocks, BBPredicates &GuardPredicates,
+ SmallVectorImpl<WeakVH> &DeletionCandidates) {
+ auto &Context = Incoming.front()->getContext();
+ auto BoolTrue = ConstantInt::getTrue(Context);
+ auto BoolFalse = ConstantInt::getFalse(Context);
+ auto FirstGuardBlock = GuardBlocks.front();
+
+ // The predicate for the last outgoing is trivially true, and so we
+ // process only the first N-1 successors.
+ for (int i = 0, e = Outgoing.size() - 1; i != e; ++i) {
+ auto Out = Outgoing[i];
+ LLVM_DEBUG(dbgs() << "Creating guard for " << Out->getName() << "\n");
+
+ auto Phi =
+ PHINode::Create(Type::getInt1Ty(Context), Incoming.size(),
+ StringRef("Guard.") + Out->getName(), FirstGuardBlock);
+ GuardPredicates[Out] = Phi;
+ }
+
+ for (auto *In : Incoming) {
+ Value *Condition;
+ BasicBlock *Succ0;
+ BasicBlock *Succ1;
+ std::tie(Condition, Succ0, Succ1) =
+ redirectToHub(In, FirstGuardBlock, Outgoing);
+
+ // Optimization: Consider an incoming block A with both successors
+ // Succ0 and Succ1 in the set of outgoing blocks. The predicates
+ // for Succ0 and Succ1 complement each other. If Succ0 is visited
+ // first in the loop below, control will branch to Succ0 using the
+ // corresponding predicate. But if that branch is not taken, then
+ // control must reach Succ1, which means that the incoming value of
+ // the predicate from `In` is true for Succ1.
+ bool OneSuccessorDone = false;
+ for (int i = 0, e = Outgoing.size() - 1; i != e; ++i) {
+ auto Out = Outgoing[i];
+ PHINode *Phi = cast<PHINode>(GuardPredicates[Out]);
+ if (Out != Succ0 && Out != Succ1) {
+ Phi->addIncoming(BoolFalse, In);
+ } else if (!Succ0 || !Succ1 || OneSuccessorDone) {
+ // Optimization: When only one successor is an outgoing block,
+ // the incoming predicate from `In` is always true.
+ Phi->addIncoming(BoolTrue, In);
+ } else {
+ assert(Succ0 && Succ1);
+ if (Out == Succ0) {
+ Phi->addIncoming(Condition, In);
+ } else {
+ auto Inverted = invertCondition(Condition);
+ DeletionCandidates.push_back(Condition);
+ Phi->addIncoming(Inverted, In);
+ }
+ OneSuccessorDone = true;
+ }
+ }
+ }
+}
+
+// Capture the existing control flow as guard predicates, and redirect
+// control flow from \p Incoming block through the \p GuardBlocks to the
+// \p Outgoing blocks.
+//
+// There is one guard predicate for each outgoing block OutBB. The
+// predicate represents whether the hub should transfer control flow
+// to OutBB. These predicates are NOT ORTHOGONAL. The Hub evaluates
+// them in the same order as the Outgoing set-vector, and control
+// branches to the first outgoing block whose predicate evaluates to true.
+static void
+convertToGuardPredicates(SmallVectorImpl<BasicBlock *> &GuardBlocks,
+ SmallVectorImpl<WeakVH> &DeletionCandidates,
+ const BBSetVector &Incoming,
+ const BBSetVector &Outgoing, const StringRef Prefix,
+ std::optional<unsigned> MaxControlFlowBooleans) {
+ BBPredicates GuardPredicates;
+ auto F = Incoming.front()->getParent();
+
+ for (int i = 0, e = Outgoing.size() - 1; i != e; ++i)
+ GuardBlocks.push_back(
+ BasicBlock::Create(F->getContext(), Prefix + ".guard", F));
+
+ // When we are using an integer to record which target block to jump to, we
+ // are creating less live values, actually we are using one single integer to
+ // store the index of the target block. When we are using booleans to store
+ // the branching information, we need (N-1) boolean values, where N is the
+ // number of outgoing block.
+ if (!MaxControlFlowBooleans || Outgoing.size() <= *MaxControlFlowBooleans)
+ calcPredicateUsingBooleans(Incoming, Outgoing, GuardBlocks, GuardPredicates,
+ DeletionCandidates);
+ else
+ calcPredicateUsingInteger(Incoming, Outgoing, GuardBlocks, GuardPredicates);
+
+ setupBranchForGuard(GuardBlocks, Outgoing, GuardPredicates);
+}
+
+BasicBlock *llvm::CreateControlFlowHub(
+ DomTreeUpdater *DTU, SmallVectorImpl<BasicBlock *> &GuardBlocks,
+ const BBSetVector &Incoming, const BBSetVector &Outgoing,
+ const StringRef Prefix, std::optional<unsigned> MaxControlFlowBooleans) {
+ if (Outgoing.size() < 2)
+ return Outgoing.front();
+
+ SmallVector<DominatorTree::UpdateType, 16> Updates;
+ if (DTU) {
+ for (auto *In : Incoming) {
+ for (auto Succ : successors(In))
+ if (Outgoing.count(Succ))
+ Updates.push_back({DominatorTree::Delete, In, Succ});
+ }
+ }
+
+ SmallVector<WeakVH, 8> DeletionCandidates;
+ convertToGuardPredicates(GuardBlocks, DeletionCandidates, Incoming, Outgoing,
+ Prefix, MaxControlFlowBooleans);
+ auto FirstGuardBlock = GuardBlocks.front();
+
+ // Update the PHINodes in each outgoing block to match the new control flow.
+ for (int i = 0, e = GuardBlocks.size(); i != e; ++i)
+ reconnectPhis(Outgoing[i], GuardBlocks[i], Incoming, FirstGuardBlock);
+
+ reconnectPhis(Outgoing.back(), GuardBlocks.back(), Incoming, FirstGuardBlock);
+
+ if (DTU) {
+ int NumGuards = GuardBlocks.size();
+ assert((int)Outgoing.size() == NumGuards + 1);
+
+ for (auto In : Incoming)
+ Updates.push_back({DominatorTree::Insert, In, FirstGuardBlock});
+
+ for (int i = 0; i != NumGuards - 1; ++i) {
+ Updates.push_back({DominatorTree::Insert, GuardBlocks[i], Outgoing[i]});
+ Updates.push_back(
+ {DominatorTree::Insert, GuardBlocks[i], GuardBlocks[i + 1]});
+ }
+ Updates.push_back({DominatorTree::Insert, GuardBlocks[NumGuards - 1],
+ Outgoing[NumGuards - 1]});
+ Updates.push_back({DominatorTree::Insert, GuardBlocks[NumGuards - 1],
+ Outgoing[NumGuards]});
+ DTU->applyUpdates(Updates);
+ }
+
+ for (auto I : DeletionCandidates) {
+ if (I->use_empty())
+ if (auto Inst = dyn_cast_or_null<Instruction>(I))
+ Inst->eraseFromParent();
+ }
+
+ return FirstGuardBlock;
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/BreakCriticalEdges.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/BreakCriticalEdges.cpp
new file mode 100644
index 0000000000..ddb3575603
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -0,0 +1,465 @@
+//===- BreakCriticalEdges.cpp - Critical Edge Elimination Pass ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// BreakCriticalEdges pass - Break all of the critical edges in the CFG by
+// inserting a dummy basic block. This pass may be "required" by passes that
+// cannot deal with critical edges. For this usage, the structure type is
+// forward declared. This pass obviously invalidates the CFG, but can update
+// dominator trees.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/BreakCriticalEdges.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "break-crit-edges"
+
+STATISTIC(NumBroken, "Number of blocks inserted");
+
+namespace {
+ struct BreakCriticalEdges : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ BreakCriticalEdges() : FunctionPass(ID) {
+ initializeBreakCriticalEdgesPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override {
+ auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
+
+ auto *PDTWP = getAnalysisIfAvailable<PostDominatorTreeWrapperPass>();
+ auto *PDT = PDTWP ? &PDTWP->getPostDomTree() : nullptr;
+
+ auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
+ auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
+ unsigned N =
+ SplitAllCriticalEdges(F, CriticalEdgeSplittingOptions(DT, LI, nullptr, PDT));
+ NumBroken += N;
+ return N > 0;
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
+
+ // No loop canonicalization guarantees are broken by this pass.
+ AU.addPreservedID(LoopSimplifyID);
+ }
+ };
+}
+
+char BreakCriticalEdges::ID = 0;
+INITIALIZE_PASS(BreakCriticalEdges, "break-crit-edges",
+ "Break critical edges in CFG", false, false)
+
+// Publicly exposed interface to pass...
+char &llvm::BreakCriticalEdgesID = BreakCriticalEdges::ID;
+FunctionPass *llvm::createBreakCriticalEdgesPass() {
+ return new BreakCriticalEdges();
+}
+
+PreservedAnalyses BreakCriticalEdgesPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F);
+ auto *LI = AM.getCachedResult<LoopAnalysis>(F);
+ unsigned N = SplitAllCriticalEdges(F, CriticalEdgeSplittingOptions(DT, LI));
+ NumBroken += N;
+ if (N == 0)
+ return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserve<DominatorTreeAnalysis>();
+ PA.preserve<LoopAnalysis>();
+ return PA;
+}
+
+//===----------------------------------------------------------------------===//
+// Implementation of the external critical edge manipulation functions
+//===----------------------------------------------------------------------===//
+
+BasicBlock *llvm::SplitCriticalEdge(Instruction *TI, unsigned SuccNum,
+ const CriticalEdgeSplittingOptions &Options,
+ const Twine &BBName) {
+ if (!isCriticalEdge(TI, SuccNum, Options.MergeIdenticalEdges))
+ return nullptr;
+
+ return SplitKnownCriticalEdge(TI, SuccNum, Options, BBName);
+}
+
+BasicBlock *
+llvm::SplitKnownCriticalEdge(Instruction *TI, unsigned SuccNum,
+ const CriticalEdgeSplittingOptions &Options,
+ const Twine &BBName) {
+ assert(!isa<IndirectBrInst>(TI) &&
+ "Cannot split critical edge from IndirectBrInst");
+
+ BasicBlock *TIBB = TI->getParent();
+ BasicBlock *DestBB = TI->getSuccessor(SuccNum);
+
+ // Splitting the critical edge to a pad block is non-trivial. Don't do
+ // it in this generic function.
+ if (DestBB->isEHPad()) return nullptr;
+
+ if (Options.IgnoreUnreachableDests &&
+ isa<UnreachableInst>(DestBB->getFirstNonPHIOrDbgOrLifetime()))
+ return nullptr;
+
+ auto *LI = Options.LI;
+ SmallVector<BasicBlock *, 4> LoopPreds;
+ // Check if extra modifications will be required to preserve loop-simplify
+ // form after splitting. If it would require splitting blocks with IndirectBr
+ // terminators, bail out if preserving loop-simplify form is requested.
+ if (LI) {
+ if (Loop *TIL = LI->getLoopFor(TIBB)) {
+
+ // The only way that we can break LoopSimplify form by splitting a
+ // critical edge is if after the split there exists some edge from TIL to
+ // DestBB *and* the only edge into DestBB from outside of TIL is that of
+ // NewBB. If the first isn't true, then LoopSimplify still holds, NewBB
+ // is the new exit block and it has no non-loop predecessors. If the
+ // second isn't true, then DestBB was not in LoopSimplify form prior to
+ // the split as it had a non-loop predecessor. In both of these cases,
+ // the predecessor must be directly in TIL, not in a subloop, or again
+ // LoopSimplify doesn't hold.
+ for (BasicBlock *P : predecessors(DestBB)) {
+ if (P == TIBB)
+ continue; // The new block is known.
+ if (LI->getLoopFor(P) != TIL) {
+ // No need to re-simplify, it wasn't to start with.
+ LoopPreds.clear();
+ break;
+ }
+ LoopPreds.push_back(P);
+ }
+ // Loop-simplify form can be preserved, if we can split all in-loop
+ // predecessors.
+ if (any_of(LoopPreds, [](BasicBlock *Pred) {
+ return isa<IndirectBrInst>(Pred->getTerminator());
+ })) {
+ if (Options.PreserveLoopSimplify)
+ return nullptr;
+ LoopPreds.clear();
+ }
+ }
+ }
+
+ // Create a new basic block, linking it into the CFG.
+ BasicBlock *NewBB = nullptr;
+ if (BBName.str() != "")
+ NewBB = BasicBlock::Create(TI->getContext(), BBName);
+ else
+ NewBB = BasicBlock::Create(TI->getContext(), TIBB->getName() + "." +
+ DestBB->getName() +
+ "_crit_edge");
+ // Create our unconditional branch.
+ BranchInst *NewBI = BranchInst::Create(DestBB, NewBB);
+ NewBI->setDebugLoc(TI->getDebugLoc());
+
+ // Insert the block into the function... right after the block TI lives in.
+ Function &F = *TIBB->getParent();
+ Function::iterator FBBI = TIBB->getIterator();
+ F.insert(++FBBI, NewBB);
+
+ // Branch to the new block, breaking the edge.
+ TI->setSuccessor(SuccNum, NewBB);
+
+ // If there are any PHI nodes in DestBB, we need to update them so that they
+ // merge incoming values from NewBB instead of from TIBB.
+ {
+ unsigned BBIdx = 0;
+ for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) {
+ // We no longer enter through TIBB, now we come in through NewBB.
+ // Revector exactly one entry in the PHI node that used to come from
+ // TIBB to come from NewBB.
+ PHINode *PN = cast<PHINode>(I);
+
+ // Reuse the previous value of BBIdx if it lines up. In cases where we
+ // have multiple phi nodes with *lots* of predecessors, this is a speed
+ // win because we don't have to scan the PHI looking for TIBB. This
+ // happens because the BB list of PHI nodes are usually in the same
+ // order.
+ if (PN->getIncomingBlock(BBIdx) != TIBB)
+ BBIdx = PN->getBasicBlockIndex(TIBB);
+ PN->setIncomingBlock(BBIdx, NewBB);
+ }
+ }
+
+ // If there are any other edges from TIBB to DestBB, update those to go
+ // through the split block, making those edges non-critical as well (and
+ // reducing the number of phi entries in the DestBB if relevant).
+ if (Options.MergeIdenticalEdges) {
+ for (unsigned i = SuccNum+1, e = TI->getNumSuccessors(); i != e; ++i) {
+ if (TI->getSuccessor(i) != DestBB) continue;
+
+ // Remove an entry for TIBB from DestBB phi nodes.
+ DestBB->removePredecessor(TIBB, Options.KeepOneInputPHIs);
+
+ // We found another edge to DestBB, go to NewBB instead.
+ TI->setSuccessor(i, NewBB);
+ }
+ }
+
+ // If we have nothing to update, just return.
+ auto *DT = Options.DT;
+ auto *PDT = Options.PDT;
+ auto *MSSAU = Options.MSSAU;
+ if (MSSAU)
+ MSSAU->wireOldPredecessorsToNewImmediatePredecessor(
+ DestBB, NewBB, {TIBB}, Options.MergeIdenticalEdges);
+
+ if (!DT && !PDT && !LI)
+ return NewBB;
+
+ if (DT || PDT) {
+ // Update the DominatorTree.
+ // ---> NewBB -----\
+ // / V
+ // TIBB -------\\------> DestBB
+ //
+ // First, inform the DT about the new path from TIBB to DestBB via NewBB,
+ // then delete the old edge from TIBB to DestBB. By doing this in that order
+ // DestBB stays reachable in the DT the whole time and its subtree doesn't
+ // get disconnected.
+ SmallVector<DominatorTree::UpdateType, 3> Updates;
+ Updates.push_back({DominatorTree::Insert, TIBB, NewBB});
+ Updates.push_back({DominatorTree::Insert, NewBB, DestBB});
+ if (!llvm::is_contained(successors(TIBB), DestBB))
+ Updates.push_back({DominatorTree::Delete, TIBB, DestBB});
+
+ if (DT)
+ DT->applyUpdates(Updates);
+ if (PDT)
+ PDT->applyUpdates(Updates);
+ }
+
+ // Update LoopInfo if it is around.
+ if (LI) {
+ if (Loop *TIL = LI->getLoopFor(TIBB)) {
+ // If one or the other blocks were not in a loop, the new block is not
+ // either, and thus LI doesn't need to be updated.
+ if (Loop *DestLoop = LI->getLoopFor(DestBB)) {
+ if (TIL == DestLoop) {
+ // Both in the same loop, the NewBB joins loop.
+ DestLoop->addBasicBlockToLoop(NewBB, *LI);
+ } else if (TIL->contains(DestLoop)) {
+ // Edge from an outer loop to an inner loop. Add to the outer loop.
+ TIL->addBasicBlockToLoop(NewBB, *LI);
+ } else if (DestLoop->contains(TIL)) {
+ // Edge from an inner loop to an outer loop. Add to the outer loop.
+ DestLoop->addBasicBlockToLoop(NewBB, *LI);
+ } else {
+ // Edge from two loops with no containment relation. Because these
+ // are natural loops, we know that the destination block must be the
+ // header of its loop (adding a branch into a loop elsewhere would
+ // create an irreducible loop).
+ assert(DestLoop->getHeader() == DestBB &&
+ "Should not create irreducible loops!");
+ if (Loop *P = DestLoop->getParentLoop())
+ P->addBasicBlockToLoop(NewBB, *LI);
+ }
+ }
+
+ // If TIBB is in a loop and DestBB is outside of that loop, we may need
+ // to update LoopSimplify form and LCSSA form.
+ if (!TIL->contains(DestBB)) {
+ assert(!TIL->contains(NewBB) &&
+ "Split point for loop exit is contained in loop!");
+
+ // Update LCSSA form in the newly created exit block.
+ if (Options.PreserveLCSSA) {
+ createPHIsForSplitLoopExit(TIBB, NewBB, DestBB);
+ }
+
+ if (!LoopPreds.empty()) {
+ assert(!DestBB->isEHPad() && "We don't split edges to EH pads!");
+ BasicBlock *NewExitBB = SplitBlockPredecessors(
+ DestBB, LoopPreds, "split", DT, LI, MSSAU, Options.PreserveLCSSA);
+ if (Options.PreserveLCSSA)
+ createPHIsForSplitLoopExit(LoopPreds, NewExitBB, DestBB);
+ }
+ }
+ }
+ }
+
+ return NewBB;
+}
+
+// Return the unique indirectbr predecessor of a block. This may return null
+// even if such a predecessor exists, if it's not useful for splitting.
+// If a predecessor is found, OtherPreds will contain all other (non-indirectbr)
+// predecessors of BB.
+static BasicBlock *
+findIBRPredecessor(BasicBlock *BB, SmallVectorImpl<BasicBlock *> &OtherPreds) {
+ // Verify we have exactly one IBR predecessor.
+ // Conservatively bail out if one of the other predecessors is not a "regular"
+ // terminator (that is, not a switch or a br).
+ BasicBlock *IBB = nullptr;
+ for (BasicBlock *PredBB : predecessors(BB)) {
+ Instruction *PredTerm = PredBB->getTerminator();
+ switch (PredTerm->getOpcode()) {
+ case Instruction::IndirectBr:
+ if (IBB)
+ return nullptr;
+ IBB = PredBB;
+ break;
+ case Instruction::Br:
+ case Instruction::Switch:
+ OtherPreds.push_back(PredBB);
+ continue;
+ default:
+ return nullptr;
+ }
+ }
+
+ return IBB;
+}
+
+bool llvm::SplitIndirectBrCriticalEdges(Function &F,
+ bool IgnoreBlocksWithoutPHI,
+ BranchProbabilityInfo *BPI,
+ BlockFrequencyInfo *BFI) {
+ // Check whether the function has any indirectbrs, and collect which blocks
+ // they may jump to. Since most functions don't have indirect branches,
+ // this lowers the common case's overhead to O(Blocks) instead of O(Edges).
+ SmallSetVector<BasicBlock *, 16> Targets;
+ for (auto &BB : F) {
+ auto *IBI = dyn_cast<IndirectBrInst>(BB.getTerminator());
+ if (!IBI)
+ continue;
+
+ for (unsigned Succ = 0, E = IBI->getNumSuccessors(); Succ != E; ++Succ)
+ Targets.insert(IBI->getSuccessor(Succ));
+ }
+
+ if (Targets.empty())
+ return false;
+
+ bool ShouldUpdateAnalysis = BPI && BFI;
+ bool Changed = false;
+ for (BasicBlock *Target : Targets) {
+ if (IgnoreBlocksWithoutPHI && Target->phis().empty())
+ continue;
+
+ SmallVector<BasicBlock *, 16> OtherPreds;
+ BasicBlock *IBRPred = findIBRPredecessor(Target, OtherPreds);
+ // If we did not found an indirectbr, or the indirectbr is the only
+ // incoming edge, this isn't the kind of edge we're looking for.
+ if (!IBRPred || OtherPreds.empty())
+ continue;
+
+ // Don't even think about ehpads/landingpads.
+ Instruction *FirstNonPHI = Target->getFirstNonPHI();
+ if (FirstNonPHI->isEHPad() || Target->isLandingPad())
+ continue;
+
+ // Remember edge probabilities if needed.
+ SmallVector<BranchProbability, 4> EdgeProbabilities;
+ if (ShouldUpdateAnalysis) {
+ EdgeProbabilities.reserve(Target->getTerminator()->getNumSuccessors());
+ for (unsigned I = 0, E = Target->getTerminator()->getNumSuccessors();
+ I < E; ++I)
+ EdgeProbabilities.emplace_back(BPI->getEdgeProbability(Target, I));
+ BPI->eraseBlock(Target);
+ }
+
+ BasicBlock *BodyBlock = Target->splitBasicBlock(FirstNonPHI, ".split");
+ if (ShouldUpdateAnalysis) {
+ // Copy the BFI/BPI from Target to BodyBlock.
+ BPI->setEdgeProbability(BodyBlock, EdgeProbabilities);
+ BFI->setBlockFreq(BodyBlock, BFI->getBlockFreq(Target).getFrequency());
+ }
+ // It's possible Target was its own successor through an indirectbr.
+ // In this case, the indirectbr now comes from BodyBlock.
+ if (IBRPred == Target)
+ IBRPred = BodyBlock;
+
+ // At this point Target only has PHIs, and BodyBlock has the rest of the
+ // block's body. Create a copy of Target that will be used by the "direct"
+ // preds.
+ ValueToValueMapTy VMap;
+ BasicBlock *DirectSucc = CloneBasicBlock(Target, VMap, ".clone", &F);
+
+ BlockFrequency BlockFreqForDirectSucc;
+ for (BasicBlock *Pred : OtherPreds) {
+ // If the target is a loop to itself, then the terminator of the split
+ // block (BodyBlock) needs to be updated.
+ BasicBlock *Src = Pred != Target ? Pred : BodyBlock;
+ Src->getTerminator()->replaceUsesOfWith(Target, DirectSucc);
+ if (ShouldUpdateAnalysis)
+ BlockFreqForDirectSucc += BFI->getBlockFreq(Src) *
+ BPI->getEdgeProbability(Src, DirectSucc);
+ }
+ if (ShouldUpdateAnalysis) {
+ BFI->setBlockFreq(DirectSucc, BlockFreqForDirectSucc.getFrequency());
+ BlockFrequency NewBlockFreqForTarget =
+ BFI->getBlockFreq(Target) - BlockFreqForDirectSucc;
+ BFI->setBlockFreq(Target, NewBlockFreqForTarget.getFrequency());
+ }
+
+ // Ok, now fix up the PHIs. We know the two blocks only have PHIs, and that
+ // they are clones, so the number of PHIs are the same.
+ // (a) Remove the edge coming from IBRPred from the "Direct" PHI
+ // (b) Leave that as the only edge in the "Indirect" PHI.
+ // (c) Merge the two in the body block.
+ BasicBlock::iterator Indirect = Target->begin(),
+ End = Target->getFirstNonPHI()->getIterator();
+ BasicBlock::iterator Direct = DirectSucc->begin();
+ BasicBlock::iterator MergeInsert = BodyBlock->getFirstInsertionPt();
+
+ assert(&*End == Target->getTerminator() &&
+ "Block was expected to only contain PHIs");
+
+ while (Indirect != End) {
+ PHINode *DirPHI = cast<PHINode>(Direct);
+ PHINode *IndPHI = cast<PHINode>(Indirect);
+
+ // Now, clean up - the direct block shouldn't get the indirect value,
+ // and vice versa.
+ DirPHI->removeIncomingValue(IBRPred);
+ Direct++;
+
+ // Advance the pointer here, to avoid invalidation issues when the old
+ // PHI is erased.
+ Indirect++;
+
+ PHINode *NewIndPHI = PHINode::Create(IndPHI->getType(), 1, "ind", IndPHI);
+ NewIndPHI->addIncoming(IndPHI->getIncomingValueForBlock(IBRPred),
+ IBRPred);
+
+ // Create a PHI in the body block, to merge the direct and indirect
+ // predecessors.
+ PHINode *MergePHI =
+ PHINode::Create(IndPHI->getType(), 2, "merge", &*MergeInsert);
+ MergePHI->addIncoming(NewIndPHI, Target);
+ MergePHI->addIncoming(DirPHI, DirectSucc);
+
+ IndPHI->replaceAllUsesWith(MergePHI);
+ IndPHI->eraseFromParent();
+ }
+
+ Changed = true;
+ }
+
+ return Changed;
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/BuildLibCalls.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/BuildLibCalls.cpp
new file mode 100644
index 0000000000..1e21a2f854
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -0,0 +1,1939 @@
+//===- BuildLibCalls.cpp - Utility builder for libcalls -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements some functions that will create standard C libcalls.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/TypeSize.h"
+#include <optional>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "build-libcalls"
+
+//- Infer Attributes ---------------------------------------------------------//
+
+STATISTIC(NumReadNone, "Number of functions inferred as readnone");
+STATISTIC(NumInaccessibleMemOnly,
+ "Number of functions inferred as inaccessiblememonly");
+STATISTIC(NumReadOnly, "Number of functions inferred as readonly");
+STATISTIC(NumWriteOnly, "Number of functions inferred as writeonly");
+STATISTIC(NumArgMemOnly, "Number of functions inferred as argmemonly");
+STATISTIC(NumInaccessibleMemOrArgMemOnly,
+ "Number of functions inferred as inaccessiblemem_or_argmemonly");
+STATISTIC(NumNoUnwind, "Number of functions inferred as nounwind");
+STATISTIC(NumNoCapture, "Number of arguments inferred as nocapture");
+STATISTIC(NumWriteOnlyArg, "Number of arguments inferred as writeonly");
+STATISTIC(NumReadOnlyArg, "Number of arguments inferred as readonly");
+STATISTIC(NumNoAlias, "Number of function returns inferred as noalias");
+STATISTIC(NumNoUndef, "Number of function returns inferred as noundef returns");
+STATISTIC(NumReturnedArg, "Number of arguments inferred as returned");
+STATISTIC(NumWillReturn, "Number of functions inferred as willreturn");
+
+static bool setDoesNotAccessMemory(Function &F) {
+ if (F.doesNotAccessMemory())
+ return false;
+ F.setDoesNotAccessMemory();
+ ++NumReadNone;
+ return true;
+}
+
+static bool setOnlyAccessesInaccessibleMemory(Function &F) {
+ if (F.onlyAccessesInaccessibleMemory())
+ return false;
+ F.setOnlyAccessesInaccessibleMemory();
+ ++NumInaccessibleMemOnly;
+ return true;
+}
+
+static bool setOnlyReadsMemory(Function &F) {
+ if (F.onlyReadsMemory())
+ return false;
+ F.setOnlyReadsMemory();
+ ++NumReadOnly;
+ return true;
+}
+
+static bool setOnlyWritesMemory(Function &F) {
+ if (F.onlyWritesMemory()) // writeonly or readnone
+ return false;
+ ++NumWriteOnly;
+ F.setOnlyWritesMemory();
+ return true;
+}
+
+static bool setOnlyAccessesArgMemory(Function &F) {
+ if (F.onlyAccessesArgMemory())
+ return false;
+ F.setOnlyAccessesArgMemory();
+ ++NumArgMemOnly;
+ return true;
+}
+
+static bool setOnlyAccessesInaccessibleMemOrArgMem(Function &F) {
+ if (F.onlyAccessesInaccessibleMemOrArgMem())
+ return false;
+ F.setOnlyAccessesInaccessibleMemOrArgMem();
+ ++NumInaccessibleMemOrArgMemOnly;
+ return true;
+}
+
+static bool setDoesNotThrow(Function &F) {
+ if (F.doesNotThrow())
+ return false;
+ F.setDoesNotThrow();
+ ++NumNoUnwind;
+ return true;
+}
+
+static bool setRetDoesNotAlias(Function &F) {
+ if (F.hasRetAttribute(Attribute::NoAlias))
+ return false;
+ F.addRetAttr(Attribute::NoAlias);
+ ++NumNoAlias;
+ return true;
+}
+
+static bool setDoesNotCapture(Function &F, unsigned ArgNo) {
+ if (F.hasParamAttribute(ArgNo, Attribute::NoCapture))
+ return false;
+ F.addParamAttr(ArgNo, Attribute::NoCapture);
+ ++NumNoCapture;
+ return true;
+}
+
+static bool setDoesNotAlias(Function &F, unsigned ArgNo) {
+ if (F.hasParamAttribute(ArgNo, Attribute::NoAlias))
+ return false;
+ F.addParamAttr(ArgNo, Attribute::NoAlias);
+ ++NumNoAlias;
+ return true;
+}
+
+static bool setOnlyReadsMemory(Function &F, unsigned ArgNo) {
+ if (F.hasParamAttribute(ArgNo, Attribute::ReadOnly))
+ return false;
+ F.addParamAttr(ArgNo, Attribute::ReadOnly);
+ ++NumReadOnlyArg;
+ return true;
+}
+
+static bool setOnlyWritesMemory(Function &F, unsigned ArgNo) {
+ if (F.hasParamAttribute(ArgNo, Attribute::WriteOnly))
+ return false;
+ F.addParamAttr(ArgNo, Attribute::WriteOnly);
+ ++NumWriteOnlyArg;
+ return true;
+}
+
+static bool setRetNoUndef(Function &F) {
+ if (!F.getReturnType()->isVoidTy() &&
+ !F.hasRetAttribute(Attribute::NoUndef)) {
+ F.addRetAttr(Attribute::NoUndef);
+ ++NumNoUndef;
+ return true;
+ }
+ return false;
+}
+
+static bool setArgsNoUndef(Function &F) {
+ bool Changed = false;
+ for (unsigned ArgNo = 0; ArgNo < F.arg_size(); ++ArgNo) {
+ if (!F.hasParamAttribute(ArgNo, Attribute::NoUndef)) {
+ F.addParamAttr(ArgNo, Attribute::NoUndef);
+ ++NumNoUndef;
+ Changed = true;
+ }
+ }
+ return Changed;
+}
+
+static bool setArgNoUndef(Function &F, unsigned ArgNo) {
+ if (F.hasParamAttribute(ArgNo, Attribute::NoUndef))
+ return false;
+ F.addParamAttr(ArgNo, Attribute::NoUndef);
+ ++NumNoUndef;
+ return true;
+}
+
+static bool setRetAndArgsNoUndef(Function &F) {
+ bool UndefAdded = false;
+ UndefAdded |= setRetNoUndef(F);
+ UndefAdded |= setArgsNoUndef(F);
+ return UndefAdded;
+}
+
+static bool setReturnedArg(Function &F, unsigned ArgNo) {
+ if (F.hasParamAttribute(ArgNo, Attribute::Returned))
+ return false;
+ F.addParamAttr(ArgNo, Attribute::Returned);
+ ++NumReturnedArg;
+ return true;
+}
+
+static bool setNonLazyBind(Function &F) {
+ if (F.hasFnAttribute(Attribute::NonLazyBind))
+ return false;
+ F.addFnAttr(Attribute::NonLazyBind);
+ return true;
+}
+
+static bool setDoesNotFreeMemory(Function &F) {
+ if (F.hasFnAttribute(Attribute::NoFree))
+ return false;
+ F.addFnAttr(Attribute::NoFree);
+ return true;
+}
+
+static bool setWillReturn(Function &F) {
+ if (F.hasFnAttribute(Attribute::WillReturn))
+ return false;
+ F.addFnAttr(Attribute::WillReturn);
+ ++NumWillReturn;
+ return true;
+}
+
+static bool setAlignedAllocParam(Function &F, unsigned ArgNo) {
+ if (F.hasParamAttribute(ArgNo, Attribute::AllocAlign))
+ return false;
+ F.addParamAttr(ArgNo, Attribute::AllocAlign);
+ return true;
+}
+
+static bool setAllocatedPointerParam(Function &F, unsigned ArgNo) {
+ if (F.hasParamAttribute(ArgNo, Attribute::AllocatedPointer))
+ return false;
+ F.addParamAttr(ArgNo, Attribute::AllocatedPointer);
+ return true;
+}
+
+static bool setAllocSize(Function &F, unsigned ElemSizeArg,
+ std::optional<unsigned> NumElemsArg) {
+ if (F.hasFnAttribute(Attribute::AllocSize))
+ return false;
+ F.addFnAttr(Attribute::getWithAllocSizeArgs(F.getContext(), ElemSizeArg,
+ NumElemsArg));
+ return true;
+}
+
+static bool setAllocFamily(Function &F, StringRef Family) {
+ if (F.hasFnAttribute("alloc-family"))
+ return false;
+ F.addFnAttr("alloc-family", Family);
+ return true;
+}
+
+static bool setAllocKind(Function &F, AllocFnKind K) {
+ if (F.hasFnAttribute(Attribute::AllocKind))
+ return false;
+ F.addFnAttr(
+ Attribute::get(F.getContext(), Attribute::AllocKind, uint64_t(K)));
+ return true;
+}
+
+bool llvm::inferNonMandatoryLibFuncAttrs(Module *M, StringRef Name,
+ const TargetLibraryInfo &TLI) {
+ Function *F = M->getFunction(Name);
+ if (!F)
+ return false;
+ return inferNonMandatoryLibFuncAttrs(*F, TLI);
+}
+
+bool llvm::inferNonMandatoryLibFuncAttrs(Function &F,
+ const TargetLibraryInfo &TLI) {
+ LibFunc TheLibFunc;
+ if (!(TLI.getLibFunc(F, TheLibFunc) && TLI.has(TheLibFunc)))
+ return false;
+
+ bool Changed = false;
+
+ if (F.getParent() != nullptr && F.getParent()->getRtLibUseGOT())
+ Changed |= setNonLazyBind(F);
+
+ switch (TheLibFunc) {
+ case LibFunc_strlen:
+ case LibFunc_strnlen:
+ case LibFunc_wcslen:
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setOnlyAccessesArgMemory(F);
+ Changed |= setWillReturn(F);
+ Changed |= setDoesNotCapture(F, 0);
+ break;
+ case LibFunc_strchr:
+ case LibFunc_strrchr:
+ Changed |= setOnlyAccessesArgMemory(F);
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setWillReturn(F);
+ break;
+ case LibFunc_strtol:
+ case LibFunc_strtod:
+ case LibFunc_strtof:
+ case LibFunc_strtoul:
+ case LibFunc_strtoll:
+ case LibFunc_strtold:
+ case LibFunc_strtoull:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setWillReturn(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 0);
+ break;
+ case LibFunc_strcat:
+ case LibFunc_strncat:
+ Changed |= setOnlyAccessesArgMemory(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setWillReturn(F);
+ Changed |= setReturnedArg(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ Changed |= setDoesNotAlias(F, 0);
+ Changed |= setDoesNotAlias(F, 1);
+ break;
+ case LibFunc_strcpy:
+ case LibFunc_strncpy:
+ Changed |= setReturnedArg(F, 0);
+ [[fallthrough]];
+ case LibFunc_stpcpy:
+ case LibFunc_stpncpy:
+ Changed |= setOnlyAccessesArgMemory(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setWillReturn(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyWritesMemory(F, 0);
+ Changed |= setOnlyReadsMemory(F, 1);
+ Changed |= setDoesNotAlias(F, 0);
+ Changed |= setDoesNotAlias(F, 1);
+ break;
+ case LibFunc_strxfrm:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setWillReturn(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ break;
+ case LibFunc_strcmp: // 0,1
+ case LibFunc_strspn: // 0,1
+ case LibFunc_strncmp: // 0,1
+ case LibFunc_strcspn: // 0,1
+ Changed |= setDoesNotThrow(F);
+ Changed |= setOnlyAccessesArgMemory(F);
+ Changed |= setWillReturn(F);
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ break;
+ case LibFunc_strcoll:
+ case LibFunc_strcasecmp: // 0,1
+ case LibFunc_strncasecmp: //
+ // Those functions may depend on the locale, which may be accessed through
+ // global memory.
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setWillReturn(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ break;
+ case LibFunc_strstr:
+ case LibFunc_strpbrk:
+ Changed |= setOnlyAccessesArgMemory(F);
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setWillReturn(F);
+ Changed |= setDoesNotCapture(F, 1);
+ break;
+ case LibFunc_strtok:
+ case LibFunc_strtok_r:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setWillReturn(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ break;
+ case LibFunc_scanf:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setOnlyReadsMemory(F, 0);
+ break;
+ case LibFunc_setbuf:
+ case LibFunc_setvbuf:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ break;
+ case LibFunc_strndup:
+ Changed |= setArgNoUndef(F, 1);
+ [[fallthrough]];
+ case LibFunc_strdup:
+ Changed |= setAllocFamily(F, "malloc");
+ Changed |= setOnlyAccessesInaccessibleMemOrArgMem(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setRetDoesNotAlias(F);
+ Changed |= setWillReturn(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setOnlyReadsMemory(F, 0);
+ break;
+ case LibFunc_stat:
+ case LibFunc_statvfs:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 0);
+ break;
+ case LibFunc_sscanf:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 0);
+ Changed |= setOnlyReadsMemory(F, 1);
+ break;
+ case LibFunc_sprintf:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotAlias(F, 0);
+ Changed |= setOnlyWritesMemory(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ break;
+ case LibFunc_snprintf:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotAlias(F, 0);
+ Changed |= setOnlyWritesMemory(F, 0);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ break;
+ case LibFunc_setitimer:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setWillReturn(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ break;
+ case LibFunc_system:
+ // May throw; "system" is a valid pthread cancellation point.
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setOnlyReadsMemory(F, 0);
+ break;
+ case LibFunc_aligned_alloc:
+ Changed |= setAlignedAllocParam(F, 0);
+ Changed |= setAllocSize(F, 1, std::nullopt);
+ Changed |= setAllocKind(F, AllocFnKind::Alloc | AllocFnKind::Uninitialized | AllocFnKind::Aligned);
+ [[fallthrough]];
+ case LibFunc_valloc:
+ case LibFunc_malloc:
+ case LibFunc_vec_malloc:
+ Changed |= setAllocFamily(F, TheLibFunc == LibFunc_vec_malloc ? "vec_malloc"
+ : "malloc");
+ Changed |= setAllocKind(F, AllocFnKind::Alloc | AllocFnKind::Uninitialized);
+ Changed |= setAllocSize(F, 0, std::nullopt);
+ Changed |= setOnlyAccessesInaccessibleMemory(F);
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setRetDoesNotAlias(F);
+ Changed |= setWillReturn(F);
+ break;
+ case LibFunc_memcmp:
+ Changed |= setOnlyAccessesArgMemory(F);
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setWillReturn(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ break;
+ case LibFunc_memchr:
+ case LibFunc_memrchr:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setOnlyAccessesArgMemory(F);
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setWillReturn(F);
+ break;
+ case LibFunc_modf:
+ case LibFunc_modff:
+ case LibFunc_modfl:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setWillReturn(F);
+ Changed |= setDoesNotCapture(F, 1);
+ break;
+ case LibFunc_memcpy:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setOnlyAccessesArgMemory(F);
+ Changed |= setWillReturn(F);
+ Changed |= setDoesNotAlias(F, 0);
+ Changed |= setReturnedArg(F, 0);
+ Changed |= setOnlyWritesMemory(F, 0);
+ Changed |= setDoesNotAlias(F, 1);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ break;
+ case LibFunc_memmove:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setOnlyAccessesArgMemory(F);
+ Changed |= setWillReturn(F);
+ Changed |= setReturnedArg(F, 0);
+ Changed |= setOnlyWritesMemory(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ break;
+ case LibFunc_mempcpy:
+ case LibFunc_memccpy:
+ Changed |= setWillReturn(F);
+ [[fallthrough]];
+ case LibFunc_memcpy_chk:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setOnlyAccessesArgMemory(F);
+ Changed |= setDoesNotAlias(F, 0);
+ Changed |= setOnlyWritesMemory(F, 0);
+ Changed |= setDoesNotAlias(F, 1);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ break;
+ case LibFunc_memalign:
+ Changed |= setAllocFamily(F, "malloc");
+ Changed |= setAllocKind(F, AllocFnKind::Alloc | AllocFnKind::Aligned |
+ AllocFnKind::Uninitialized);
+ Changed |= setAllocSize(F, 1, std::nullopt);
+ Changed |= setAlignedAllocParam(F, 0);
+ Changed |= setOnlyAccessesInaccessibleMemory(F);
+ Changed |= setRetNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setRetDoesNotAlias(F);
+ Changed |= setWillReturn(F);
+ break;
+ case LibFunc_mkdir:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setOnlyReadsMemory(F, 0);
+ break;
+ case LibFunc_mktime:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setWillReturn(F);
+ Changed |= setDoesNotCapture(F, 0);
+ break;
+ case LibFunc_realloc:
+ case LibFunc_reallocf:
+ case LibFunc_vec_realloc:
+ Changed |= setAllocFamily(
+ F, TheLibFunc == LibFunc_vec_realloc ? "vec_malloc" : "malloc");
+ Changed |= setAllocKind(F, AllocFnKind::Realloc);
+ Changed |= setAllocatedPointerParam(F, 0);
+ Changed |= setAllocSize(F, 1, std::nullopt);
+ Changed |= setOnlyAccessesInaccessibleMemOrArgMem(F);
+ Changed |= setRetNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setRetDoesNotAlias(F);
+ Changed |= setWillReturn(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setArgNoUndef(F, 1);
+ break;
+ case LibFunc_read:
+ // May throw; "read" is a valid pthread cancellation point.
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotCapture(F, 1);
+ break;
+ case LibFunc_rewind:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ break;
+ case LibFunc_rmdir:
+ case LibFunc_remove:
+ case LibFunc_realpath:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setOnlyReadsMemory(F, 0);
+ break;
+ case LibFunc_rename:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 0);
+ Changed |= setOnlyReadsMemory(F, 1);
+ break;
+ case LibFunc_readlink:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 0);
+ break;
+ case LibFunc_write:
+ // May throw; "write" is a valid pthread cancellation point.
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ break;
+ case LibFunc_bcopy:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setOnlyAccessesArgMemory(F);
+ Changed |= setWillReturn(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setOnlyReadsMemory(F, 0);
+ Changed |= setOnlyWritesMemory(F, 1);
+ Changed |= setDoesNotCapture(F, 1);
+ break;
+ case LibFunc_bcmp:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setOnlyAccessesArgMemory(F);
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setWillReturn(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ break;
+ case LibFunc_bzero:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setOnlyAccessesArgMemory(F);
+ Changed |= setWillReturn(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setOnlyWritesMemory(F, 0);
+ break;
+ case LibFunc_calloc:
+ case LibFunc_vec_calloc:
+ Changed |= setAllocFamily(F, TheLibFunc == LibFunc_vec_calloc ? "vec_malloc"
+ : "malloc");
+ Changed |= setAllocKind(F, AllocFnKind::Alloc | AllocFnKind::Zeroed);
+ Changed |= setAllocSize(F, 0, 1);
+ Changed |= setOnlyAccessesInaccessibleMemory(F);
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setRetDoesNotAlias(F);
+ Changed |= setWillReturn(F);
+ break;
+ case LibFunc_chmod:
+ case LibFunc_chown:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setOnlyReadsMemory(F, 0);
+ break;
+ case LibFunc_ctermid:
+ case LibFunc_clearerr:
+ case LibFunc_closedir:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ break;
+ case LibFunc_atoi:
+ case LibFunc_atol:
+ case LibFunc_atof:
+ case LibFunc_atoll:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setWillReturn(F);
+ Changed |= setDoesNotCapture(F, 0);
+ break;
+ case LibFunc_access:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setOnlyReadsMemory(F, 0);
+ break;
+ case LibFunc_fopen:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setRetDoesNotAlias(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 0);
+ Changed |= setOnlyReadsMemory(F, 1);
+ break;
+ case LibFunc_fdopen:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setRetDoesNotAlias(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ break;
+ case LibFunc_feof:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ break;
+ case LibFunc_free:
+ case LibFunc_vec_free:
+ Changed |= setAllocFamily(F, TheLibFunc == LibFunc_vec_free ? "vec_malloc"
+ : "malloc");
+ Changed |= setAllocKind(F, AllocFnKind::Free);
+ Changed |= setAllocatedPointerParam(F, 0);
+ Changed |= setOnlyAccessesInaccessibleMemOrArgMem(F);
+ Changed |= setArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setWillReturn(F);
+ Changed |= setDoesNotCapture(F, 0);
+ break;
+ case LibFunc_fseek:
+ case LibFunc_ftell:
+ case LibFunc_fgetc:
+ case LibFunc_fgetc_unlocked:
+ case LibFunc_fseeko:
+ case LibFunc_ftello:
+ case LibFunc_fileno:
+ case LibFunc_fflush:
+ case LibFunc_fclose:
+ case LibFunc_fsetpos:
+ case LibFunc_flockfile:
+ case LibFunc_funlockfile:
+ case LibFunc_ftrylockfile:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ break;
+ case LibFunc_ferror:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setOnlyReadsMemory(F);
+ break;
+ case LibFunc_fputc:
+ case LibFunc_fputc_unlocked:
+ case LibFunc_fstat:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ break;
+ case LibFunc_frexp:
+ case LibFunc_frexpf:
+ case LibFunc_frexpl:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setWillReturn(F);
+ Changed |= setDoesNotCapture(F, 1);
+ break;
+ case LibFunc_fstatvfs:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ break;
+ case LibFunc_fgets:
+ case LibFunc_fgets_unlocked:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ break;
+ case LibFunc_fread:
+ case LibFunc_fread_unlocked:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotCapture(F, 3);
+ break;
+ case LibFunc_fwrite:
+ case LibFunc_fwrite_unlocked:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotCapture(F, 3);
+ // FIXME: readonly #1?
+ break;
+ case LibFunc_fputs:
+ case LibFunc_fputs_unlocked:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 0);
+ break;
+ case LibFunc_fscanf:
+ case LibFunc_fprintf:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ break;
+ case LibFunc_fgetpos:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ break;
+ case LibFunc_getc:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ break;
+ case LibFunc_getlogin_r:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ break;
+ case LibFunc_getc_unlocked:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ break;
+ case LibFunc_getenv:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotCapture(F, 0);
+ break;
+ case LibFunc_gets:
+ case LibFunc_getchar:
+ case LibFunc_getchar_unlocked:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ break;
+ case LibFunc_getitimer:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ break;
+ case LibFunc_getpwnam:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setOnlyReadsMemory(F, 0);
+ break;
+ case LibFunc_ungetc:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ break;
+ case LibFunc_uname:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ break;
+ case LibFunc_unlink:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setOnlyReadsMemory(F, 0);
+ break;
+ case LibFunc_unsetenv:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setOnlyReadsMemory(F, 0);
+ break;
+ case LibFunc_utime:
+ case LibFunc_utimes:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 0);
+ Changed |= setOnlyReadsMemory(F, 1);
+ break;
+ case LibFunc_putc:
+ case LibFunc_putc_unlocked:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ break;
+ case LibFunc_puts:
+ case LibFunc_printf:
+ case LibFunc_perror:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setOnlyReadsMemory(F, 0);
+ break;
+ case LibFunc_pread:
+ // May throw; "pread" is a valid pthread cancellation point.
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotCapture(F, 1);
+ break;
+ case LibFunc_pwrite:
+ // May throw; "pwrite" is a valid pthread cancellation point.
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ break;
+ case LibFunc_putchar:
+ case LibFunc_putchar_unlocked:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ break;
+ case LibFunc_popen:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setRetDoesNotAlias(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 0);
+ Changed |= setOnlyReadsMemory(F, 1);
+ break;
+ case LibFunc_pclose:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ break;
+ case LibFunc_vscanf:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setOnlyReadsMemory(F, 0);
+ break;
+ case LibFunc_vsscanf:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 0);
+ Changed |= setOnlyReadsMemory(F, 1);
+ break;
+ case LibFunc_vfscanf:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ break;
+ case LibFunc_vprintf:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setOnlyReadsMemory(F, 0);
+ break;
+ case LibFunc_vfprintf:
+ case LibFunc_vsprintf:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ break;
+ case LibFunc_vsnprintf:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ break;
+ case LibFunc_open:
+ // May throw; "open" is a valid pthread cancellation point.
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setOnlyReadsMemory(F, 0);
+ break;
+ case LibFunc_opendir:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setRetDoesNotAlias(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setOnlyReadsMemory(F, 0);
+ break;
+ case LibFunc_tmpfile:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setRetDoesNotAlias(F);
+ break;
+ case LibFunc_times:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ break;
+ case LibFunc_htonl:
+ case LibFunc_htons:
+ case LibFunc_ntohl:
+ case LibFunc_ntohs:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAccessMemory(F);
+ break;
+ case LibFunc_lstat:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 0);
+ break;
+ case LibFunc_lchown:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setOnlyReadsMemory(F, 0);
+ break;
+ case LibFunc_qsort:
+ // May throw; places call through function pointer.
+ // Cannot give undef pointer/size
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotCapture(F, 3);
+ break;
+ case LibFunc_dunder_strndup:
+ Changed |= setArgNoUndef(F, 1);
+ [[fallthrough]];
+ case LibFunc_dunder_strdup:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setRetDoesNotAlias(F);
+ Changed |= setWillReturn(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setOnlyReadsMemory(F, 0);
+ break;
+ case LibFunc_dunder_strtok_r:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ break;
+ case LibFunc_under_IO_getc:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ break;
+ case LibFunc_under_IO_putc:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ break;
+ case LibFunc_dunder_isoc99_scanf:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setOnlyReadsMemory(F, 0);
+ break;
+ case LibFunc_stat64:
+ case LibFunc_lstat64:
+ case LibFunc_statvfs64:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 0);
+ break;
+ case LibFunc_dunder_isoc99_sscanf:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 0);
+ Changed |= setOnlyReadsMemory(F, 1);
+ break;
+ case LibFunc_fopen64:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setRetDoesNotAlias(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 0);
+ Changed |= setOnlyReadsMemory(F, 1);
+ break;
+ case LibFunc_fseeko64:
+ case LibFunc_ftello64:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ break;
+ case LibFunc_tmpfile64:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setRetDoesNotAlias(F);
+ break;
+ case LibFunc_fstat64:
+ case LibFunc_fstatvfs64:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ break;
+ case LibFunc_open64:
+ // May throw; "open" is a valid pthread cancellation point.
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setOnlyReadsMemory(F, 0);
+ break;
+ case LibFunc_gettimeofday:
+ // Currently some platforms have the restrict keyword on the arguments to
+ // gettimeofday. To be conservative, do not add noalias to gettimeofday's
+ // arguments.
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ break;
+ case LibFunc_memset_pattern4:
+ case LibFunc_memset_pattern8:
+ case LibFunc_memset_pattern16:
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ [[fallthrough]];
+ case LibFunc_memset:
+ Changed |= setWillReturn(F);
+ [[fallthrough]];
+ case LibFunc_memset_chk:
+ Changed |= setOnlyAccessesArgMemory(F);
+ Changed |= setOnlyWritesMemory(F, 0);
+ Changed |= setDoesNotThrow(F);
+ break;
+ // int __nvvm_reflect(const char *)
+ case LibFunc_nvvm_reflect:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotAccessMemory(F);
+ Changed |= setDoesNotThrow(F);
+ break;
+ case LibFunc_ldexp:
+ case LibFunc_ldexpf:
+ case LibFunc_ldexpl:
+ Changed |= setWillReturn(F);
+ break;
+ case LibFunc_abs:
+ case LibFunc_acos:
+ case LibFunc_acosf:
+ case LibFunc_acosh:
+ case LibFunc_acoshf:
+ case LibFunc_acoshl:
+ case LibFunc_acosl:
+ case LibFunc_asin:
+ case LibFunc_asinf:
+ case LibFunc_asinh:
+ case LibFunc_asinhf:
+ case LibFunc_asinhl:
+ case LibFunc_asinl:
+ case LibFunc_atan:
+ case LibFunc_atan2:
+ case LibFunc_atan2f:
+ case LibFunc_atan2l:
+ case LibFunc_atanf:
+ case LibFunc_atanh:
+ case LibFunc_atanhf:
+ case LibFunc_atanhl:
+ case LibFunc_atanl:
+ case LibFunc_cbrt:
+ case LibFunc_cbrtf:
+ case LibFunc_cbrtl:
+ case LibFunc_ceil:
+ case LibFunc_ceilf:
+ case LibFunc_ceill:
+ case LibFunc_copysign:
+ case LibFunc_copysignf:
+ case LibFunc_copysignl:
+ case LibFunc_cos:
+ case LibFunc_cosh:
+ case LibFunc_coshf:
+ case LibFunc_coshl:
+ case LibFunc_cosf:
+ case LibFunc_cosl:
+ case LibFunc_cospi:
+ case LibFunc_cospif:
+ case LibFunc_exp:
+ case LibFunc_expf:
+ case LibFunc_expl:
+ case LibFunc_exp2:
+ case LibFunc_exp2f:
+ case LibFunc_exp2l:
+ case LibFunc_expm1:
+ case LibFunc_expm1f:
+ case LibFunc_expm1l:
+ case LibFunc_fabs:
+ case LibFunc_fabsf:
+ case LibFunc_fabsl:
+ case LibFunc_ffs:
+ case LibFunc_ffsl:
+ case LibFunc_ffsll:
+ case LibFunc_floor:
+ case LibFunc_floorf:
+ case LibFunc_floorl:
+ case LibFunc_fls:
+ case LibFunc_flsl:
+ case LibFunc_flsll:
+ case LibFunc_fmax:
+ case LibFunc_fmaxf:
+ case LibFunc_fmaxl:
+ case LibFunc_fmin:
+ case LibFunc_fminf:
+ case LibFunc_fminl:
+ case LibFunc_fmod:
+ case LibFunc_fmodf:
+ case LibFunc_fmodl:
+ case LibFunc_isascii:
+ case LibFunc_isdigit:
+ case LibFunc_labs:
+ case LibFunc_llabs:
+ case LibFunc_log:
+ case LibFunc_log10:
+ case LibFunc_log10f:
+ case LibFunc_log10l:
+ case LibFunc_log1p:
+ case LibFunc_log1pf:
+ case LibFunc_log1pl:
+ case LibFunc_log2:
+ case LibFunc_log2f:
+ case LibFunc_log2l:
+ case LibFunc_logb:
+ case LibFunc_logbf:
+ case LibFunc_logbl:
+ case LibFunc_logf:
+ case LibFunc_logl:
+ case LibFunc_nearbyint:
+ case LibFunc_nearbyintf:
+ case LibFunc_nearbyintl:
+ case LibFunc_pow:
+ case LibFunc_powf:
+ case LibFunc_powl:
+ case LibFunc_rint:
+ case LibFunc_rintf:
+ case LibFunc_rintl:
+ case LibFunc_round:
+ case LibFunc_roundf:
+ case LibFunc_roundl:
+ case LibFunc_sin:
+ case LibFunc_sincospif_stret:
+ case LibFunc_sinf:
+ case LibFunc_sinh:
+ case LibFunc_sinhf:
+ case LibFunc_sinhl:
+ case LibFunc_sinl:
+ case LibFunc_sinpi:
+ case LibFunc_sinpif:
+ case LibFunc_sqrt:
+ case LibFunc_sqrtf:
+ case LibFunc_sqrtl:
+ case LibFunc_tan:
+ case LibFunc_tanf:
+ case LibFunc_tanh:
+ case LibFunc_tanhf:
+ case LibFunc_tanhl:
+ case LibFunc_tanl:
+ case LibFunc_toascii:
+ case LibFunc_trunc:
+ case LibFunc_truncf:
+ case LibFunc_truncl:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotFreeMemory(F);
+ Changed |= setOnlyWritesMemory(F);
+ Changed |= setWillReturn(F);
+ break;
+ default:
+ // FIXME: It'd be really nice to cover all the library functions we're
+ // aware of here.
+ break;
+ }
+ // We have to do this step after AllocKind has been inferred on functions so
+ // we can reliably identify free-like and realloc-like functions.
+ if (!isLibFreeFunction(&F, TheLibFunc) && !isReallocLikeFn(&F))
+ Changed |= setDoesNotFreeMemory(F);
+ return Changed;
+}
+
+static void setArgExtAttr(Function &F, unsigned ArgNo,
+ const TargetLibraryInfo &TLI, bool Signed = true) {
+ Attribute::AttrKind ExtAttr = TLI.getExtAttrForI32Param(Signed);
+ if (ExtAttr != Attribute::None && !F.hasParamAttribute(ArgNo, ExtAttr))
+ F.addParamAttr(ArgNo, ExtAttr);
+}
+
+static void setRetExtAttr(Function &F,
+ const TargetLibraryInfo &TLI, bool Signed = true) {
+ Attribute::AttrKind ExtAttr = TLI.getExtAttrForI32Return(Signed);
+ if (ExtAttr != Attribute::None && !F.hasRetAttribute(ExtAttr))
+ F.addRetAttr(ExtAttr);
+}
+
+// Modeled after X86TargetLowering::markLibCallAttributes.
+static void markRegisterParameterAttributes(Function *F) {
+ if (!F->arg_size() || F->isVarArg())
+ return;
+
+ const CallingConv::ID CC = F->getCallingConv();
+ if (CC != CallingConv::C && CC != CallingConv::X86_StdCall)
+ return;
+
+ const Module *M = F->getParent();
+ unsigned N = M->getNumberRegisterParameters();
+ if (!N)
+ return;
+
+ const DataLayout &DL = M->getDataLayout();
+
+ for (Argument &A : F->args()) {
+ Type *T = A.getType();
+ if (!T->isIntOrPtrTy())
+ continue;
+
+ const TypeSize &TS = DL.getTypeAllocSize(T);
+ if (TS > 8)
+ continue;
+
+ assert(TS <= 4 && "Need to account for parameters larger than word size");
+ const unsigned NumRegs = TS > 4 ? 2 : 1;
+ if (N < NumRegs)
+ return;
+
+ N -= NumRegs;
+ F->addParamAttr(A.getArgNo(), Attribute::InReg);
+ }
+}
+
+FunctionCallee llvm::getOrInsertLibFunc(Module *M, const TargetLibraryInfo &TLI,
+ LibFunc TheLibFunc, FunctionType *T,
+ AttributeList AttributeList) {
+ assert(TLI.has(TheLibFunc) &&
+ "Creating call to non-existing library function.");
+ StringRef Name = TLI.getName(TheLibFunc);
+ FunctionCallee C = M->getOrInsertFunction(Name, T, AttributeList);
+
+ // Make sure any mandatory argument attributes are added.
+
+ // Any outgoing i32 argument should be handled with setArgExtAttr() which
+ // will add an extension attribute if the target ABI requires it. Adding
+ // argument extensions is typically done by the front end but when an
+ // optimizer is building a library call on its own it has to take care of
+ // this. Each such generated function must be handled here with sign or
+ // zero extensions as needed. F is retreived with cast<> because we demand
+ // of the caller to have called isLibFuncEmittable() first.
+ Function *F = cast<Function>(C.getCallee());
+ assert(F->getFunctionType() == T && "Function type does not match.");
+ switch (TheLibFunc) {
+ case LibFunc_fputc:
+ case LibFunc_putchar:
+ setArgExtAttr(*F, 0, TLI);
+ break;
+ case LibFunc_ldexp:
+ case LibFunc_ldexpf:
+ case LibFunc_ldexpl:
+ case LibFunc_memchr:
+ case LibFunc_memrchr:
+ case LibFunc_strchr:
+ setArgExtAttr(*F, 1, TLI);
+ break;
+ case LibFunc_memccpy:
+ setArgExtAttr(*F, 2, TLI);
+ break;
+
+ // These are functions that are known to not need any argument extension
+ // on any target: A size_t argument (which may be an i32 on some targets)
+ // should not trigger the assert below.
+ case LibFunc_bcmp:
+ setRetExtAttr(*F, TLI);
+ break;
+ case LibFunc_calloc:
+ case LibFunc_fwrite:
+ case LibFunc_malloc:
+ case LibFunc_memcmp:
+ case LibFunc_memcpy_chk:
+ case LibFunc_mempcpy:
+ case LibFunc_memset_pattern16:
+ case LibFunc_snprintf:
+ case LibFunc_stpncpy:
+ case LibFunc_strlcat:
+ case LibFunc_strlcpy:
+ case LibFunc_strncat:
+ case LibFunc_strncmp:
+ case LibFunc_strncpy:
+ case LibFunc_vsnprintf:
+ break;
+
+ default:
+#ifndef NDEBUG
+ for (unsigned i = 0; i < T->getNumParams(); i++)
+ assert(!isa<IntegerType>(T->getParamType(i)) &&
+ "Unhandled integer argument.");
+#endif
+ break;
+ }
+
+ markRegisterParameterAttributes(F);
+
+ return C;
+}
+
+FunctionCallee llvm::getOrInsertLibFunc(Module *M, const TargetLibraryInfo &TLI,
+ LibFunc TheLibFunc, FunctionType *T) {
+ return getOrInsertLibFunc(M, TLI, TheLibFunc, T, AttributeList());
+}
+
+bool llvm::isLibFuncEmittable(const Module *M, const TargetLibraryInfo *TLI,
+ LibFunc TheLibFunc) {
+ StringRef FuncName = TLI->getName(TheLibFunc);
+ if (!TLI->has(TheLibFunc))
+ return false;
+
+ // Check if the Module already has a GlobalValue with the same name, in
+ // which case it must be a Function with the expected type.
+ if (GlobalValue *GV = M->getNamedValue(FuncName)) {
+ if (auto *F = dyn_cast<Function>(GV))
+ return TLI->isValidProtoForLibFunc(*F->getFunctionType(), TheLibFunc, *M);
+ return false;
+ }
+
+ return true;
+}
+
+bool llvm::isLibFuncEmittable(const Module *M, const TargetLibraryInfo *TLI,
+ StringRef Name) {
+ LibFunc TheLibFunc;
+ return TLI->getLibFunc(Name, TheLibFunc) &&
+ isLibFuncEmittable(M, TLI, TheLibFunc);
+}
+
+bool llvm::hasFloatFn(const Module *M, const TargetLibraryInfo *TLI, Type *Ty,
+ LibFunc DoubleFn, LibFunc FloatFn, LibFunc LongDoubleFn) {
+ switch (Ty->getTypeID()) {
+ case Type::HalfTyID:
+ return false;
+ case Type::FloatTyID:
+ return isLibFuncEmittable(M, TLI, FloatFn);
+ case Type::DoubleTyID:
+ return isLibFuncEmittable(M, TLI, DoubleFn);
+ default:
+ return isLibFuncEmittable(M, TLI, LongDoubleFn);
+ }
+}
+
+StringRef llvm::getFloatFn(const Module *M, const TargetLibraryInfo *TLI,
+ Type *Ty, LibFunc DoubleFn, LibFunc FloatFn,
+ LibFunc LongDoubleFn, LibFunc &TheLibFunc) {
+ assert(hasFloatFn(M, TLI, Ty, DoubleFn, FloatFn, LongDoubleFn) &&
+ "Cannot get name for unavailable function!");
+
+ switch (Ty->getTypeID()) {
+ case Type::HalfTyID:
+ llvm_unreachable("No name for HalfTy!");
+ case Type::FloatTyID:
+ TheLibFunc = FloatFn;
+ return TLI->getName(FloatFn);
+ case Type::DoubleTyID:
+ TheLibFunc = DoubleFn;
+ return TLI->getName(DoubleFn);
+ default:
+ TheLibFunc = LongDoubleFn;
+ return TLI->getName(LongDoubleFn);
+ }
+}
+
+//- Emit LibCalls ------------------------------------------------------------//
+
+Value *llvm::castToCStr(Value *V, IRBuilderBase &B) {
+ unsigned AS = V->getType()->getPointerAddressSpace();
+ return B.CreateBitCast(V, B.getInt8PtrTy(AS), "cstr");
+}
+
+static IntegerType *getIntTy(IRBuilderBase &B, const TargetLibraryInfo *TLI) {
+ return B.getIntNTy(TLI->getIntSize());
+}
+
+static IntegerType *getSizeTTy(IRBuilderBase &B, const TargetLibraryInfo *TLI) {
+ const Module *M = B.GetInsertBlock()->getModule();
+ return B.getIntNTy(TLI->getSizeTSize(*M));
+}
+
+static Value *emitLibCall(LibFunc TheLibFunc, Type *ReturnType,
+ ArrayRef<Type *> ParamTypes,
+ ArrayRef<Value *> Operands, IRBuilderBase &B,
+ const TargetLibraryInfo *TLI,
+ bool IsVaArgs = false) {
+ Module *M = B.GetInsertBlock()->getModule();
+ if (!isLibFuncEmittable(M, TLI, TheLibFunc))
+ return nullptr;
+
+ StringRef FuncName = TLI->getName(TheLibFunc);
+ FunctionType *FuncType = FunctionType::get(ReturnType, ParamTypes, IsVaArgs);
+ FunctionCallee Callee = getOrInsertLibFunc(M, *TLI, TheLibFunc, FuncType);
+ inferNonMandatoryLibFuncAttrs(M, FuncName, *TLI);
+ CallInst *CI = B.CreateCall(Callee, Operands, FuncName);
+ if (const Function *F =
+ dyn_cast<Function>(Callee.getCallee()->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+ return CI;
+}
+
+Value *llvm::emitStrLen(Value *Ptr, IRBuilderBase &B, const DataLayout &DL,
+ const TargetLibraryInfo *TLI) {
+ Type *SizeTTy = getSizeTTy(B, TLI);
+ return emitLibCall(LibFunc_strlen, SizeTTy,
+ B.getInt8PtrTy(), castToCStr(Ptr, B), B, TLI);
+}
+
+Value *llvm::emitStrDup(Value *Ptr, IRBuilderBase &B,
+ const TargetLibraryInfo *TLI) {
+ return emitLibCall(LibFunc_strdup, B.getInt8PtrTy(), B.getInt8PtrTy(),
+ castToCStr(Ptr, B), B, TLI);
+}
+
+Value *llvm::emitStrChr(Value *Ptr, char C, IRBuilderBase &B,
+ const TargetLibraryInfo *TLI) {
+ Type *I8Ptr = B.getInt8PtrTy();
+ Type *IntTy = getIntTy(B, TLI);
+ return emitLibCall(LibFunc_strchr, I8Ptr, {I8Ptr, IntTy},
+ {castToCStr(Ptr, B), ConstantInt::get(IntTy, C)}, B, TLI);
+}
+
+Value *llvm::emitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilderBase &B,
+ const DataLayout &DL, const TargetLibraryInfo *TLI) {
+ Type *IntTy = getIntTy(B, TLI);
+ Type *SizeTTy = getSizeTTy(B, TLI);
+ return emitLibCall(
+ LibFunc_strncmp, IntTy,
+ {B.getInt8PtrTy(), B.getInt8PtrTy(), SizeTTy},
+ {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, B, TLI);
+}
+
+Value *llvm::emitStrCpy(Value *Dst, Value *Src, IRBuilderBase &B,
+ const TargetLibraryInfo *TLI) {
+ Type *I8Ptr = Dst->getType();
+ return emitLibCall(LibFunc_strcpy, I8Ptr, {I8Ptr, I8Ptr},
+ {castToCStr(Dst, B), castToCStr(Src, B)}, B, TLI);
+}
+
+Value *llvm::emitStpCpy(Value *Dst, Value *Src, IRBuilderBase &B,
+ const TargetLibraryInfo *TLI) {
+ Type *I8Ptr = B.getInt8PtrTy();
+ return emitLibCall(LibFunc_stpcpy, I8Ptr, {I8Ptr, I8Ptr},
+ {castToCStr(Dst, B), castToCStr(Src, B)}, B, TLI);
+}
+
+Value *llvm::emitStrNCpy(Value *Dst, Value *Src, Value *Len, IRBuilderBase &B,
+ const TargetLibraryInfo *TLI) {
+ Type *I8Ptr = B.getInt8PtrTy();
+ Type *SizeTTy = getSizeTTy(B, TLI);
+ return emitLibCall(LibFunc_strncpy, I8Ptr, {I8Ptr, I8Ptr, SizeTTy},
+ {castToCStr(Dst, B), castToCStr(Src, B), Len}, B, TLI);
+}
+
+Value *llvm::emitStpNCpy(Value *Dst, Value *Src, Value *Len, IRBuilderBase &B,
+ const TargetLibraryInfo *TLI) {
+ Type *I8Ptr = B.getInt8PtrTy();
+ Type *SizeTTy = getSizeTTy(B, TLI);
+ return emitLibCall(LibFunc_stpncpy, I8Ptr, {I8Ptr, I8Ptr, SizeTTy},
+ {castToCStr(Dst, B), castToCStr(Src, B), Len}, B, TLI);
+}
+
+Value *llvm::emitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
+ IRBuilderBase &B, const DataLayout &DL,
+ const TargetLibraryInfo *TLI) {
+ Module *M = B.GetInsertBlock()->getModule();
+ if (!isLibFuncEmittable(M, TLI, LibFunc_memcpy_chk))
+ return nullptr;
+
+ AttributeList AS;
+ AS = AttributeList::get(M->getContext(), AttributeList::FunctionIndex,
+ Attribute::NoUnwind);
+ Type *I8Ptr = B.getInt8PtrTy();
+ Type *SizeTTy = getSizeTTy(B, TLI);
+ FunctionCallee MemCpy = getOrInsertLibFunc(M, *TLI, LibFunc_memcpy_chk,
+ AttributeList::get(M->getContext(), AS), I8Ptr,
+ I8Ptr, I8Ptr, SizeTTy, SizeTTy);
+ Dst = castToCStr(Dst, B);
+ Src = castToCStr(Src, B);
+ CallInst *CI = B.CreateCall(MemCpy, {Dst, Src, Len, ObjSize});
+ if (const Function *F =
+ dyn_cast<Function>(MemCpy.getCallee()->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+ return CI;
+}
+
+Value *llvm::emitMemPCpy(Value *Dst, Value *Src, Value *Len, IRBuilderBase &B,
+ const DataLayout &DL, const TargetLibraryInfo *TLI) {
+ Type *I8Ptr = B.getInt8PtrTy();
+ Type *SizeTTy = getSizeTTy(B, TLI);
+ return emitLibCall(LibFunc_mempcpy, I8Ptr,
+ {I8Ptr, I8Ptr, SizeTTy},
+ {Dst, Src, Len}, B, TLI);
+}
+
+Value *llvm::emitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilderBase &B,
+ const DataLayout &DL, const TargetLibraryInfo *TLI) {
+ Type *I8Ptr = B.getInt8PtrTy();
+ Type *IntTy = getIntTy(B, TLI);
+ Type *SizeTTy = getSizeTTy(B, TLI);
+ return emitLibCall(LibFunc_memchr, I8Ptr,
+ {I8Ptr, IntTy, SizeTTy},
+ {castToCStr(Ptr, B), Val, Len}, B, TLI);
+}
+
+Value *llvm::emitMemRChr(Value *Ptr, Value *Val, Value *Len, IRBuilderBase &B,
+ const DataLayout &DL, const TargetLibraryInfo *TLI) {
+ Type *I8Ptr = B.getInt8PtrTy();
+ Type *IntTy = getIntTy(B, TLI);
+ Type *SizeTTy = getSizeTTy(B, TLI);
+ return emitLibCall(LibFunc_memrchr, I8Ptr,
+ {I8Ptr, IntTy, SizeTTy},
+ {castToCStr(Ptr, B), Val, Len}, B, TLI);
+}
+
+Value *llvm::emitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilderBase &B,
+ const DataLayout &DL, const TargetLibraryInfo *TLI) {
+ Type *I8Ptr = B.getInt8PtrTy();
+ Type *IntTy = getIntTy(B, TLI);
+ Type *SizeTTy = getSizeTTy(B, TLI);
+ return emitLibCall(LibFunc_memcmp, IntTy,
+ {I8Ptr, I8Ptr, SizeTTy},
+ {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, B, TLI);
+}
+
+Value *llvm::emitBCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilderBase &B,
+ const DataLayout &DL, const TargetLibraryInfo *TLI) {
+ Type *I8Ptr = B.getInt8PtrTy();
+ Type *IntTy = getIntTy(B, TLI);
+ Type *SizeTTy = getSizeTTy(B, TLI);
+ return emitLibCall(LibFunc_bcmp, IntTy,
+ {I8Ptr, I8Ptr, SizeTTy},
+ {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, B, TLI);
+}
+
+Value *llvm::emitMemCCpy(Value *Ptr1, Value *Ptr2, Value *Val, Value *Len,
+ IRBuilderBase &B, const TargetLibraryInfo *TLI) {
+ Type *I8Ptr = B.getInt8PtrTy();
+ Type *IntTy = getIntTy(B, TLI);
+ Type *SizeTTy = getSizeTTy(B, TLI);
+ return emitLibCall(LibFunc_memccpy, I8Ptr,
+ {I8Ptr, I8Ptr, IntTy, SizeTTy},
+ {Ptr1, Ptr2, Val, Len}, B, TLI);
+}
+
+Value *llvm::emitSNPrintf(Value *Dest, Value *Size, Value *Fmt,
+ ArrayRef<Value *> VariadicArgs, IRBuilderBase &B,
+ const TargetLibraryInfo *TLI) {
+ Type *I8Ptr = B.getInt8PtrTy();
+ Type *IntTy = getIntTy(B, TLI);
+ Type *SizeTTy = getSizeTTy(B, TLI);
+ SmallVector<Value *, 8> Args{castToCStr(Dest, B), Size, castToCStr(Fmt, B)};
+ llvm::append_range(Args, VariadicArgs);
+ return emitLibCall(LibFunc_snprintf, IntTy,
+ {I8Ptr, SizeTTy, I8Ptr},
+ Args, B, TLI, /*IsVaArgs=*/true);
+}
+
+Value *llvm::emitSPrintf(Value *Dest, Value *Fmt,
+ ArrayRef<Value *> VariadicArgs, IRBuilderBase &B,
+ const TargetLibraryInfo *TLI) {
+ Type *I8Ptr = B.getInt8PtrTy();
+ Type *IntTy = getIntTy(B, TLI);
+ SmallVector<Value *, 8> Args{castToCStr(Dest, B), castToCStr(Fmt, B)};
+ llvm::append_range(Args, VariadicArgs);
+ return emitLibCall(LibFunc_sprintf, IntTy,
+ {I8Ptr, I8Ptr}, Args, B, TLI,
+ /*IsVaArgs=*/true);
+}
+
+Value *llvm::emitStrCat(Value *Dest, Value *Src, IRBuilderBase &B,
+ const TargetLibraryInfo *TLI) {
+ return emitLibCall(LibFunc_strcat, B.getInt8PtrTy(),
+ {B.getInt8PtrTy(), B.getInt8PtrTy()},
+ {castToCStr(Dest, B), castToCStr(Src, B)}, B, TLI);
+}
+
+Value *llvm::emitStrLCpy(Value *Dest, Value *Src, Value *Size, IRBuilderBase &B,
+ const TargetLibraryInfo *TLI) {
+ Type *I8Ptr = B.getInt8PtrTy();
+ Type *SizeTTy = getSizeTTy(B, TLI);
+ return emitLibCall(LibFunc_strlcpy, SizeTTy,
+ {I8Ptr, I8Ptr, SizeTTy},
+ {castToCStr(Dest, B), castToCStr(Src, B), Size}, B, TLI);
+}
+
+Value *llvm::emitStrLCat(Value *Dest, Value *Src, Value *Size, IRBuilderBase &B,
+ const TargetLibraryInfo *TLI) {
+ Type *I8Ptr = B.getInt8PtrTy();
+ Type *SizeTTy = getSizeTTy(B, TLI);
+ return emitLibCall(LibFunc_strlcat, SizeTTy,
+ {I8Ptr, I8Ptr, SizeTTy},
+ {castToCStr(Dest, B), castToCStr(Src, B), Size}, B, TLI);
+}
+
+Value *llvm::emitStrNCat(Value *Dest, Value *Src, Value *Size, IRBuilderBase &B,
+ const TargetLibraryInfo *TLI) {
+ Type *I8Ptr = B.getInt8PtrTy();
+ Type *SizeTTy = getSizeTTy(B, TLI);
+ return emitLibCall(LibFunc_strncat, I8Ptr,
+ {I8Ptr, I8Ptr, SizeTTy},
+ {castToCStr(Dest, B), castToCStr(Src, B), Size}, B, TLI);
+}
+
+Value *llvm::emitVSNPrintf(Value *Dest, Value *Size, Value *Fmt, Value *VAList,
+ IRBuilderBase &B, const TargetLibraryInfo *TLI) {
+ Type *I8Ptr = B.getInt8PtrTy();
+ Type *IntTy = getIntTy(B, TLI);
+ Type *SizeTTy = getSizeTTy(B, TLI);
+ return emitLibCall(
+ LibFunc_vsnprintf, IntTy,
+ {I8Ptr, SizeTTy, I8Ptr, VAList->getType()},
+ {castToCStr(Dest, B), Size, castToCStr(Fmt, B), VAList}, B, TLI);
+}
+
+Value *llvm::emitVSPrintf(Value *Dest, Value *Fmt, Value *VAList,
+ IRBuilderBase &B, const TargetLibraryInfo *TLI) {
+ Type *I8Ptr = B.getInt8PtrTy();
+ Type *IntTy = getIntTy(B, TLI);
+ return emitLibCall(LibFunc_vsprintf, IntTy,
+ {I8Ptr, I8Ptr, VAList->getType()},
+ {castToCStr(Dest, B), castToCStr(Fmt, B), VAList}, B, TLI);
+}
+
+/// Append a suffix to the function name according to the type of 'Op'.
+static void appendTypeSuffix(Value *Op, StringRef &Name,
+ SmallString<20> &NameBuffer) {
+ if (!Op->getType()->isDoubleTy()) {
+ NameBuffer += Name;
+
+ if (Op->getType()->isFloatTy())
+ NameBuffer += 'f';
+ else
+ NameBuffer += 'l';
+
+ Name = NameBuffer;
+ }
+}
+
+static Value *emitUnaryFloatFnCallHelper(Value *Op, LibFunc TheLibFunc,
+ StringRef Name, IRBuilderBase &B,
+ const AttributeList &Attrs,
+ const TargetLibraryInfo *TLI) {
+ assert((Name != "") && "Must specify Name to emitUnaryFloatFnCall");
+
+ Module *M = B.GetInsertBlock()->getModule();
+ FunctionCallee Callee = getOrInsertLibFunc(M, *TLI, TheLibFunc, Op->getType(),
+ Op->getType());
+ CallInst *CI = B.CreateCall(Callee, Op, Name);
+
+ // The incoming attribute set may have come from a speculatable intrinsic, but
+ // is being replaced with a library call which is not allowed to be
+ // speculatable.
+ CI->setAttributes(
+ Attrs.removeFnAttribute(B.getContext(), Attribute::Speculatable));
+ if (const Function *F =
+ dyn_cast<Function>(Callee.getCallee()->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
+}
+
+Value *llvm::emitUnaryFloatFnCall(Value *Op, const TargetLibraryInfo *TLI,
+ StringRef Name, IRBuilderBase &B,
+ const AttributeList &Attrs) {
+ SmallString<20> NameBuffer;
+ appendTypeSuffix(Op, Name, NameBuffer);
+
+ LibFunc TheLibFunc;
+ TLI->getLibFunc(Name, TheLibFunc);
+
+ return emitUnaryFloatFnCallHelper(Op, TheLibFunc, Name, B, Attrs, TLI);
+}
+
+Value *llvm::emitUnaryFloatFnCall(Value *Op, const TargetLibraryInfo *TLI,
+ LibFunc DoubleFn, LibFunc FloatFn,
+ LibFunc LongDoubleFn, IRBuilderBase &B,
+ const AttributeList &Attrs) {
+ // Get the name of the function according to TLI.
+ Module *M = B.GetInsertBlock()->getModule();
+ LibFunc TheLibFunc;
+ StringRef Name = getFloatFn(M, TLI, Op->getType(), DoubleFn, FloatFn,
+ LongDoubleFn, TheLibFunc);
+
+ return emitUnaryFloatFnCallHelper(Op, TheLibFunc, Name, B, Attrs, TLI);
+}
+
+static Value *emitBinaryFloatFnCallHelper(Value *Op1, Value *Op2,
+ LibFunc TheLibFunc,
+ StringRef Name, IRBuilderBase &B,
+ const AttributeList &Attrs,
+ const TargetLibraryInfo *TLI) {
+ assert((Name != "") && "Must specify Name to emitBinaryFloatFnCall");
+
+ Module *M = B.GetInsertBlock()->getModule();
+ FunctionCallee Callee = getOrInsertLibFunc(M, *TLI, TheLibFunc, Op1->getType(),
+ Op1->getType(), Op2->getType());
+ inferNonMandatoryLibFuncAttrs(M, Name, *TLI);
+ CallInst *CI = B.CreateCall(Callee, { Op1, Op2 }, Name);
+
+ // The incoming attribute set may have come from a speculatable intrinsic, but
+ // is being replaced with a library call which is not allowed to be
+ // speculatable.
+ CI->setAttributes(
+ Attrs.removeFnAttribute(B.getContext(), Attribute::Speculatable));
+ if (const Function *F =
+ dyn_cast<Function>(Callee.getCallee()->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
+}
+
+Value *llvm::emitBinaryFloatFnCall(Value *Op1, Value *Op2,
+ const TargetLibraryInfo *TLI,
+ StringRef Name, IRBuilderBase &B,
+ const AttributeList &Attrs) {
+ assert((Name != "") && "Must specify Name to emitBinaryFloatFnCall");
+
+ SmallString<20> NameBuffer;
+ appendTypeSuffix(Op1, Name, NameBuffer);
+
+ LibFunc TheLibFunc;
+ TLI->getLibFunc(Name, TheLibFunc);
+
+ return emitBinaryFloatFnCallHelper(Op1, Op2, TheLibFunc, Name, B, Attrs, TLI);
+}
+
+Value *llvm::emitBinaryFloatFnCall(Value *Op1, Value *Op2,
+ const TargetLibraryInfo *TLI,
+ LibFunc DoubleFn, LibFunc FloatFn,
+ LibFunc LongDoubleFn, IRBuilderBase &B,
+ const AttributeList &Attrs) {
+ // Get the name of the function according to TLI.
+ Module *M = B.GetInsertBlock()->getModule();
+ LibFunc TheLibFunc;
+ StringRef Name = getFloatFn(M, TLI, Op1->getType(), DoubleFn, FloatFn,
+ LongDoubleFn, TheLibFunc);
+
+ return emitBinaryFloatFnCallHelper(Op1, Op2, TheLibFunc, Name, B, Attrs, TLI);
+}
+
+// Emit a call to putchar(int) with Char as the argument. Char must have
+// the same precision as int, which need not be 32 bits.
+Value *llvm::emitPutChar(Value *Char, IRBuilderBase &B,
+ const TargetLibraryInfo *TLI) {
+ Module *M = B.GetInsertBlock()->getModule();
+ if (!isLibFuncEmittable(M, TLI, LibFunc_putchar))
+ return nullptr;
+
+ Type *IntTy = getIntTy(B, TLI);
+ StringRef PutCharName = TLI->getName(LibFunc_putchar);
+ FunctionCallee PutChar = getOrInsertLibFunc(M, *TLI, LibFunc_putchar,
+ IntTy, IntTy);
+ inferNonMandatoryLibFuncAttrs(M, PutCharName, *TLI);
+ CallInst *CI = B.CreateCall(PutChar, Char, PutCharName);
+
+ if (const Function *F =
+ dyn_cast<Function>(PutChar.getCallee()->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+ return CI;
+}
+
+Value *llvm::emitPutS(Value *Str, IRBuilderBase &B,
+ const TargetLibraryInfo *TLI) {
+ Module *M = B.GetInsertBlock()->getModule();
+ if (!isLibFuncEmittable(M, TLI, LibFunc_puts))
+ return nullptr;
+
+ Type *IntTy = getIntTy(B, TLI);
+ StringRef PutsName = TLI->getName(LibFunc_puts);
+ FunctionCallee PutS = getOrInsertLibFunc(M, *TLI, LibFunc_puts, IntTy,
+ B.getInt8PtrTy());
+ inferNonMandatoryLibFuncAttrs(M, PutsName, *TLI);
+ CallInst *CI = B.CreateCall(PutS, castToCStr(Str, B), PutsName);
+ if (const Function *F =
+ dyn_cast<Function>(PutS.getCallee()->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+ return CI;
+}
+
+Value *llvm::emitFPutC(Value *Char, Value *File, IRBuilderBase &B,
+ const TargetLibraryInfo *TLI) {
+ Module *M = B.GetInsertBlock()->getModule();
+ if (!isLibFuncEmittable(M, TLI, LibFunc_fputc))
+ return nullptr;
+
+ Type *IntTy = getIntTy(B, TLI);
+ StringRef FPutcName = TLI->getName(LibFunc_fputc);
+ FunctionCallee F = getOrInsertLibFunc(M, *TLI, LibFunc_fputc, IntTy,
+ IntTy, File->getType());
+ if (File->getType()->isPointerTy())
+ inferNonMandatoryLibFuncAttrs(M, FPutcName, *TLI);
+ CallInst *CI = B.CreateCall(F, {Char, File}, FPutcName);
+
+ if (const Function *Fn =
+ dyn_cast<Function>(F.getCallee()->stripPointerCasts()))
+ CI->setCallingConv(Fn->getCallingConv());
+ return CI;
+}
+
+Value *llvm::emitFPutS(Value *Str, Value *File, IRBuilderBase &B,
+ const TargetLibraryInfo *TLI) {
+ Module *M = B.GetInsertBlock()->getModule();
+ if (!isLibFuncEmittable(M, TLI, LibFunc_fputs))
+ return nullptr;
+
+ Type *IntTy = getIntTy(B, TLI);
+ StringRef FPutsName = TLI->getName(LibFunc_fputs);
+ FunctionCallee F = getOrInsertLibFunc(M, *TLI, LibFunc_fputs, IntTy,
+ B.getInt8PtrTy(), File->getType());
+ if (File->getType()->isPointerTy())
+ inferNonMandatoryLibFuncAttrs(M, FPutsName, *TLI);
+ CallInst *CI = B.CreateCall(F, {castToCStr(Str, B), File}, FPutsName);
+
+ if (const Function *Fn =
+ dyn_cast<Function>(F.getCallee()->stripPointerCasts()))
+ CI->setCallingConv(Fn->getCallingConv());
+ return CI;
+}
+
+Value *llvm::emitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilderBase &B,
+ const DataLayout &DL, const TargetLibraryInfo *TLI) {
+ Module *M = B.GetInsertBlock()->getModule();
+ if (!isLibFuncEmittable(M, TLI, LibFunc_fwrite))
+ return nullptr;
+
+ Type *SizeTTy = getSizeTTy(B, TLI);
+ StringRef FWriteName = TLI->getName(LibFunc_fwrite);
+ FunctionCallee F = getOrInsertLibFunc(M, *TLI, LibFunc_fwrite,
+ SizeTTy, B.getInt8PtrTy(), SizeTTy,
+ SizeTTy, File->getType());
+
+ if (File->getType()->isPointerTy())
+ inferNonMandatoryLibFuncAttrs(M, FWriteName, *TLI);
+ CallInst *CI =
+ B.CreateCall(F, {castToCStr(Ptr, B), Size,
+ ConstantInt::get(SizeTTy, 1), File});
+
+ if (const Function *Fn =
+ dyn_cast<Function>(F.getCallee()->stripPointerCasts()))
+ CI->setCallingConv(Fn->getCallingConv());
+ return CI;
+}
+
+Value *llvm::emitMalloc(Value *Num, IRBuilderBase &B, const DataLayout &DL,
+ const TargetLibraryInfo *TLI) {
+ Module *M = B.GetInsertBlock()->getModule();
+ if (!isLibFuncEmittable(M, TLI, LibFunc_malloc))
+ return nullptr;
+
+ StringRef MallocName = TLI->getName(LibFunc_malloc);
+ Type *SizeTTy = getSizeTTy(B, TLI);
+ FunctionCallee Malloc = getOrInsertLibFunc(M, *TLI, LibFunc_malloc,
+ B.getInt8PtrTy(), SizeTTy);
+ inferNonMandatoryLibFuncAttrs(M, MallocName, *TLI);
+ CallInst *CI = B.CreateCall(Malloc, Num, MallocName);
+
+ if (const Function *F =
+ dyn_cast<Function>(Malloc.getCallee()->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
+}
+
+Value *llvm::emitCalloc(Value *Num, Value *Size, IRBuilderBase &B,
+ const TargetLibraryInfo &TLI) {
+ Module *M = B.GetInsertBlock()->getModule();
+ if (!isLibFuncEmittable(M, &TLI, LibFunc_calloc))
+ return nullptr;
+
+ StringRef CallocName = TLI.getName(LibFunc_calloc);
+ Type *SizeTTy = getSizeTTy(B, &TLI);
+ FunctionCallee Calloc = getOrInsertLibFunc(M, TLI, LibFunc_calloc,
+ B.getInt8PtrTy(), SizeTTy, SizeTTy);
+ inferNonMandatoryLibFuncAttrs(M, CallocName, TLI);
+ CallInst *CI = B.CreateCall(Calloc, {Num, Size}, CallocName);
+
+ if (const auto *F =
+ dyn_cast<Function>(Calloc.getCallee()->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/BypassSlowDivision.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/BypassSlowDivision.cpp
new file mode 100644
index 0000000000..930a0bcbfa
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/BypassSlowDivision.cpp
@@ -0,0 +1,480 @@
+//===- BypassSlowDivision.cpp - Bypass slow division ----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains an optimization for div and rem on architectures that
+// execute short instructions significantly faster than longer instructions.
+// For example, on Intel Atom 32-bit divides are slow enough that during
+// runtime it is profitable to check the value of the operands, and if they are
+// positive and less than 256 use an unsigned 8-bit divide.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/BypassSlowDivision.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/KnownBits.h"
+#include <cassert>
+#include <cstdint>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "bypass-slow-division"
+
+namespace {
+
+ struct QuotRemPair {
+ Value *Quotient;
+ Value *Remainder;
+
+ QuotRemPair(Value *InQuotient, Value *InRemainder)
+ : Quotient(InQuotient), Remainder(InRemainder) {}
+ };
+
+ /// A quotient and remainder, plus a BB from which they logically "originate".
+ /// If you use Quotient or Remainder in a Phi node, you should use BB as its
+ /// corresponding predecessor.
+ struct QuotRemWithBB {
+ BasicBlock *BB = nullptr;
+ Value *Quotient = nullptr;
+ Value *Remainder = nullptr;
+ };
+
+using DivCacheTy = DenseMap<DivRemMapKey, QuotRemPair>;
+using BypassWidthsTy = DenseMap<unsigned, unsigned>;
+using VisitedSetTy = SmallPtrSet<Instruction *, 4>;
+
+enum ValueRange {
+ /// Operand definitely fits into BypassType. No runtime checks are needed.
+ VALRNG_KNOWN_SHORT,
+ /// A runtime check is required, as value range is unknown.
+ VALRNG_UNKNOWN,
+ /// Operand is unlikely to fit into BypassType. The bypassing should be
+ /// disabled.
+ VALRNG_LIKELY_LONG
+};
+
+class FastDivInsertionTask {
+ bool IsValidTask = false;
+ Instruction *SlowDivOrRem = nullptr;
+ IntegerType *BypassType = nullptr;
+ BasicBlock *MainBB = nullptr;
+
+ bool isHashLikeValue(Value *V, VisitedSetTy &Visited);
+ ValueRange getValueRange(Value *Op, VisitedSetTy &Visited);
+ QuotRemWithBB createSlowBB(BasicBlock *Successor);
+ QuotRemWithBB createFastBB(BasicBlock *Successor);
+ QuotRemPair createDivRemPhiNodes(QuotRemWithBB &LHS, QuotRemWithBB &RHS,
+ BasicBlock *PhiBB);
+ Value *insertOperandRuntimeCheck(Value *Op1, Value *Op2);
+ std::optional<QuotRemPair> insertFastDivAndRem();
+
+ bool isSignedOp() {
+ return SlowDivOrRem->getOpcode() == Instruction::SDiv ||
+ SlowDivOrRem->getOpcode() == Instruction::SRem;
+ }
+
+ bool isDivisionOp() {
+ return SlowDivOrRem->getOpcode() == Instruction::SDiv ||
+ SlowDivOrRem->getOpcode() == Instruction::UDiv;
+ }
+
+ Type *getSlowType() { return SlowDivOrRem->getType(); }
+
+public:
+ FastDivInsertionTask(Instruction *I, const BypassWidthsTy &BypassWidths);
+
+ Value *getReplacement(DivCacheTy &Cache);
+};
+
+} // end anonymous namespace
+
+FastDivInsertionTask::FastDivInsertionTask(Instruction *I,
+ const BypassWidthsTy &BypassWidths) {
+ switch (I->getOpcode()) {
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ SlowDivOrRem = I;
+ break;
+ default:
+ // I is not a div/rem operation.
+ return;
+ }
+
+ // Skip division on vector types. Only optimize integer instructions.
+ IntegerType *SlowType = dyn_cast<IntegerType>(SlowDivOrRem->getType());
+ if (!SlowType)
+ return;
+
+ // Skip if this bitwidth is not bypassed.
+ auto BI = BypassWidths.find(SlowType->getBitWidth());
+ if (BI == BypassWidths.end())
+ return;
+
+ // Get type for div/rem instruction with bypass bitwidth.
+ IntegerType *BT = IntegerType::get(I->getContext(), BI->second);
+ BypassType = BT;
+
+ // The original basic block.
+ MainBB = I->getParent();
+
+ // The instruction is indeed a slow div or rem operation.
+ IsValidTask = true;
+}
+
+/// Reuses previously-computed dividend or remainder from the current BB if
+/// operands and operation are identical. Otherwise calls insertFastDivAndRem to
+/// perform the optimization and caches the resulting dividend and remainder.
+/// If no replacement can be generated, nullptr is returned.
+Value *FastDivInsertionTask::getReplacement(DivCacheTy &Cache) {
+ // First, make sure that the task is valid.
+ if (!IsValidTask)
+ return nullptr;
+
+ // Then, look for a value in Cache.
+ Value *Dividend = SlowDivOrRem->getOperand(0);
+ Value *Divisor = SlowDivOrRem->getOperand(1);
+ DivRemMapKey Key(isSignedOp(), Dividend, Divisor);
+ auto CacheI = Cache.find(Key);
+
+ if (CacheI == Cache.end()) {
+ // If previous instance does not exist, try to insert fast div.
+ std::optional<QuotRemPair> OptResult = insertFastDivAndRem();
+ // Bail out if insertFastDivAndRem has failed.
+ if (!OptResult)
+ return nullptr;
+ CacheI = Cache.insert({Key, *OptResult}).first;
+ }
+
+ QuotRemPair &Value = CacheI->second;
+ return isDivisionOp() ? Value.Quotient : Value.Remainder;
+}
+
+/// Check if a value looks like a hash.
+///
+/// The routine is expected to detect values computed using the most common hash
+/// algorithms. Typically, hash computations end with one of the following
+/// instructions:
+///
+/// 1) MUL with a constant wider than BypassType
+/// 2) XOR instruction
+///
+/// And even if we are wrong and the value is not a hash, it is still quite
+/// unlikely that such values will fit into BypassType.
+///
+/// To detect string hash algorithms like FNV we have to look through PHI-nodes.
+/// It is implemented as a depth-first search for values that look neither long
+/// nor hash-like.
+bool FastDivInsertionTask::isHashLikeValue(Value *V, VisitedSetTy &Visited) {
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I)
+ return false;
+
+ switch (I->getOpcode()) {
+ case Instruction::Xor:
+ return true;
+ case Instruction::Mul: {
+ // After Constant Hoisting pass, long constants may be represented as
+ // bitcast instructions. As a result, some constants may look like an
+ // instruction at first, and an additional check is necessary to find out if
+ // an operand is actually a constant.
+ Value *Op1 = I->getOperand(1);
+ ConstantInt *C = dyn_cast<ConstantInt>(Op1);
+ if (!C && isa<BitCastInst>(Op1))
+ C = dyn_cast<ConstantInt>(cast<BitCastInst>(Op1)->getOperand(0));
+ return C && C->getValue().getMinSignedBits() > BypassType->getBitWidth();
+ }
+ case Instruction::PHI:
+ // Stop IR traversal in case of a crazy input code. This limits recursion
+ // depth.
+ if (Visited.size() >= 16)
+ return false;
+ // Do not visit nodes that have been visited already. We return true because
+ // it means that we couldn't find any value that doesn't look hash-like.
+ if (!Visited.insert(I).second)
+ return true;
+ return llvm::all_of(cast<PHINode>(I)->incoming_values(), [&](Value *V) {
+ // Ignore undef values as they probably don't affect the division
+ // operands.
+ return getValueRange(V, Visited) == VALRNG_LIKELY_LONG ||
+ isa<UndefValue>(V);
+ });
+ default:
+ return false;
+ }
+}
+
+/// Check if an integer value fits into our bypass type.
+ValueRange FastDivInsertionTask::getValueRange(Value *V,
+ VisitedSetTy &Visited) {
+ unsigned ShortLen = BypassType->getBitWidth();
+ unsigned LongLen = V->getType()->getIntegerBitWidth();
+
+ assert(LongLen > ShortLen && "Value type must be wider than BypassType");
+ unsigned HiBits = LongLen - ShortLen;
+
+ const DataLayout &DL = SlowDivOrRem->getModule()->getDataLayout();
+ KnownBits Known(LongLen);
+
+ computeKnownBits(V, Known, DL);
+
+ if (Known.countMinLeadingZeros() >= HiBits)
+ return VALRNG_KNOWN_SHORT;
+
+ if (Known.countMaxLeadingZeros() < HiBits)
+ return VALRNG_LIKELY_LONG;
+
+ // Long integer divisions are often used in hashtable implementations. It's
+ // not worth bypassing such divisions because hash values are extremely
+ // unlikely to have enough leading zeros. The call below tries to detect
+ // values that are unlikely to fit BypassType (including hashes).
+ if (isHashLikeValue(V, Visited))
+ return VALRNG_LIKELY_LONG;
+
+ return VALRNG_UNKNOWN;
+}
+
+/// Add new basic block for slow div and rem operations and put it before
+/// SuccessorBB.
+QuotRemWithBB FastDivInsertionTask::createSlowBB(BasicBlock *SuccessorBB) {
+ QuotRemWithBB DivRemPair;
+ DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "",
+ MainBB->getParent(), SuccessorBB);
+ IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin());
+ Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());
+
+ Value *Dividend = SlowDivOrRem->getOperand(0);
+ Value *Divisor = SlowDivOrRem->getOperand(1);
+
+ if (isSignedOp()) {
+ DivRemPair.Quotient = Builder.CreateSDiv(Dividend, Divisor);
+ DivRemPair.Remainder = Builder.CreateSRem(Dividend, Divisor);
+ } else {
+ DivRemPair.Quotient = Builder.CreateUDiv(Dividend, Divisor);
+ DivRemPair.Remainder = Builder.CreateURem(Dividend, Divisor);
+ }
+
+ Builder.CreateBr(SuccessorBB);
+ return DivRemPair;
+}
+
+/// Add new basic block for fast div and rem operations and put it before
+/// SuccessorBB.
+QuotRemWithBB FastDivInsertionTask::createFastBB(BasicBlock *SuccessorBB) {
+ QuotRemWithBB DivRemPair;
+ DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "",
+ MainBB->getParent(), SuccessorBB);
+ IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin());
+ Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());
+
+ Value *Dividend = SlowDivOrRem->getOperand(0);
+ Value *Divisor = SlowDivOrRem->getOperand(1);
+ Value *ShortDivisorV =
+ Builder.CreateCast(Instruction::Trunc, Divisor, BypassType);
+ Value *ShortDividendV =
+ Builder.CreateCast(Instruction::Trunc, Dividend, BypassType);
+
+ // udiv/urem because this optimization only handles positive numbers.
+ Value *ShortQV = Builder.CreateUDiv(ShortDividendV, ShortDivisorV);
+ Value *ShortRV = Builder.CreateURem(ShortDividendV, ShortDivisorV);
+ DivRemPair.Quotient =
+ Builder.CreateCast(Instruction::ZExt, ShortQV, getSlowType());
+ DivRemPair.Remainder =
+ Builder.CreateCast(Instruction::ZExt, ShortRV, getSlowType());
+ Builder.CreateBr(SuccessorBB);
+
+ return DivRemPair;
+}
+
+/// Creates Phi nodes for result of Div and Rem.
+QuotRemPair FastDivInsertionTask::createDivRemPhiNodes(QuotRemWithBB &LHS,
+ QuotRemWithBB &RHS,
+ BasicBlock *PhiBB) {
+ IRBuilder<> Builder(PhiBB, PhiBB->begin());
+ Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());
+ PHINode *QuoPhi = Builder.CreatePHI(getSlowType(), 2);
+ QuoPhi->addIncoming(LHS.Quotient, LHS.BB);
+ QuoPhi->addIncoming(RHS.Quotient, RHS.BB);
+ PHINode *RemPhi = Builder.CreatePHI(getSlowType(), 2);
+ RemPhi->addIncoming(LHS.Remainder, LHS.BB);
+ RemPhi->addIncoming(RHS.Remainder, RHS.BB);
+ return QuotRemPair(QuoPhi, RemPhi);
+}
+
+/// Creates a runtime check to test whether both the divisor and dividend fit
+/// into BypassType. The check is inserted at the end of MainBB. True return
+/// value means that the operands fit. Either of the operands may be NULL if it
+/// doesn't need a runtime check.
+Value *FastDivInsertionTask::insertOperandRuntimeCheck(Value *Op1, Value *Op2) {
+ assert((Op1 || Op2) && "Nothing to check");
+ IRBuilder<> Builder(MainBB, MainBB->end());
+ Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());
+
+ Value *OrV;
+ if (Op1 && Op2)
+ OrV = Builder.CreateOr(Op1, Op2);
+ else
+ OrV = Op1 ? Op1 : Op2;
+
+ // BitMask is inverted to check if the operands are
+ // larger than the bypass type
+ uint64_t BitMask = ~BypassType->getBitMask();
+ Value *AndV = Builder.CreateAnd(OrV, BitMask);
+
+ // Compare operand values
+ Value *ZeroV = ConstantInt::getSigned(getSlowType(), 0);
+ return Builder.CreateICmpEQ(AndV, ZeroV);
+}
+
+/// Substitutes the div/rem instruction with code that checks the value of the
+/// operands and uses a shorter-faster div/rem instruction when possible.
+std::optional<QuotRemPair> FastDivInsertionTask::insertFastDivAndRem() {
+ Value *Dividend = SlowDivOrRem->getOperand(0);
+ Value *Divisor = SlowDivOrRem->getOperand(1);
+
+ VisitedSetTy SetL;
+ ValueRange DividendRange = getValueRange(Dividend, SetL);
+ if (DividendRange == VALRNG_LIKELY_LONG)
+ return std::nullopt;
+
+ VisitedSetTy SetR;
+ ValueRange DivisorRange = getValueRange(Divisor, SetR);
+ if (DivisorRange == VALRNG_LIKELY_LONG)
+ return std::nullopt;
+
+ bool DividendShort = (DividendRange == VALRNG_KNOWN_SHORT);
+ bool DivisorShort = (DivisorRange == VALRNG_KNOWN_SHORT);
+
+ if (DividendShort && DivisorShort) {
+ // If both operands are known to be short then just replace the long
+ // division with a short one in-place. Since we're not introducing control
+ // flow in this case, narrowing the division is always a win, even if the
+ // divisor is a constant (and will later get replaced by a multiplication).
+
+ IRBuilder<> Builder(SlowDivOrRem);
+ Value *TruncDividend = Builder.CreateTrunc(Dividend, BypassType);
+ Value *TruncDivisor = Builder.CreateTrunc(Divisor, BypassType);
+ Value *TruncDiv = Builder.CreateUDiv(TruncDividend, TruncDivisor);
+ Value *TruncRem = Builder.CreateURem(TruncDividend, TruncDivisor);
+ Value *ExtDiv = Builder.CreateZExt(TruncDiv, getSlowType());
+ Value *ExtRem = Builder.CreateZExt(TruncRem, getSlowType());
+ return QuotRemPair(ExtDiv, ExtRem);
+ }
+
+ if (isa<ConstantInt>(Divisor)) {
+ // If the divisor is not a constant, DAGCombiner will convert it to a
+ // multiplication by a magic constant. It isn't clear if it is worth
+ // introducing control flow to get a narrower multiply.
+ return std::nullopt;
+ }
+
+ // After Constant Hoisting pass, long constants may be represented as
+ // bitcast instructions. As a result, some constants may look like an
+ // instruction at first, and an additional check is necessary to find out if
+ // an operand is actually a constant.
+ if (auto *BCI = dyn_cast<BitCastInst>(Divisor))
+ if (BCI->getParent() == SlowDivOrRem->getParent() &&
+ isa<ConstantInt>(BCI->getOperand(0)))
+ return std::nullopt;
+
+ IRBuilder<> Builder(MainBB, MainBB->end());
+ Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());
+
+ if (DividendShort && !isSignedOp()) {
+ // If the division is unsigned and Dividend is known to be short, then
+ // either
+ // 1) Divisor is less or equal to Dividend, and the result can be computed
+ // with a short division.
+ // 2) Divisor is greater than Dividend. In this case, no division is needed
+ // at all: The quotient is 0 and the remainder is equal to Dividend.
+ //
+ // So instead of checking at runtime whether Divisor fits into BypassType,
+ // we emit a runtime check to differentiate between these two cases. This
+ // lets us entirely avoid a long div.
+
+ // Split the basic block before the div/rem.
+ BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem);
+ // Remove the unconditional branch from MainBB to SuccessorBB.
+ MainBB->back().eraseFromParent();
+ QuotRemWithBB Long;
+ Long.BB = MainBB;
+ Long.Quotient = ConstantInt::get(getSlowType(), 0);
+ Long.Remainder = Dividend;
+ QuotRemWithBB Fast = createFastBB(SuccessorBB);
+ QuotRemPair Result = createDivRemPhiNodes(Fast, Long, SuccessorBB);
+ Value *CmpV = Builder.CreateICmpUGE(Dividend, Divisor);
+ Builder.CreateCondBr(CmpV, Fast.BB, SuccessorBB);
+ return Result;
+ } else {
+ // General case. Create both slow and fast div/rem pairs and choose one of
+ // them at runtime.
+
+ // Split the basic block before the div/rem.
+ BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem);
+ // Remove the unconditional branch from MainBB to SuccessorBB.
+ MainBB->back().eraseFromParent();
+ QuotRemWithBB Fast = createFastBB(SuccessorBB);
+ QuotRemWithBB Slow = createSlowBB(SuccessorBB);
+ QuotRemPair Result = createDivRemPhiNodes(Fast, Slow, SuccessorBB);
+ Value *CmpV = insertOperandRuntimeCheck(DividendShort ? nullptr : Dividend,
+ DivisorShort ? nullptr : Divisor);
+ Builder.CreateCondBr(CmpV, Fast.BB, Slow.BB);
+ return Result;
+ }
+}
+
+/// This optimization identifies DIV/REM instructions in a BB that can be
+/// profitably bypassed and carried out with a shorter, faster divide.
+bool llvm::bypassSlowDivision(BasicBlock *BB,
+ const BypassWidthsTy &BypassWidths) {
+ DivCacheTy PerBBDivCache;
+
+ bool MadeChange = false;
+ Instruction *Next = &*BB->begin();
+ while (Next != nullptr) {
+ // We may add instructions immediately after I, but we want to skip over
+ // them.
+ Instruction *I = Next;
+ Next = Next->getNextNode();
+
+ // Ignore dead code to save time and avoid bugs.
+ if (I->hasNUses(0))
+ continue;
+
+ FastDivInsertionTask Task(I, BypassWidths);
+ if (Value *Replacement = Task.getReplacement(PerBBDivCache)) {
+ I->replaceAllUsesWith(Replacement);
+ I->eraseFromParent();
+ MadeChange = true;
+ }
+ }
+
+ // Above we eagerly create divs and rems, as pairs, so that we can efficiently
+ // create divrem machine instructions. Now erase any unused divs / rems so we
+ // don't leave extra instructions sitting around.
+ for (auto &KV : PerBBDivCache)
+ for (Value *V : {KV.second.Quotient, KV.second.Remainder})
+ RecursivelyDeleteTriviallyDeadInstructions(V);
+
+ return MadeChange;
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/CallGraphUpdater.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/CallGraphUpdater.cpp
new file mode 100644
index 0000000000..d0b89ba260
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/CallGraphUpdater.cpp
@@ -0,0 +1,170 @@
+//===- CallGraphUpdater.cpp - A (lazy) call graph update helper -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file provides interfaces used to manipulate a call graph, regardless
+/// if it is a "old style" CallGraph or an "new style" LazyCallGraph.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/CallGraphUpdater.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+using namespace llvm;
+
+bool CallGraphUpdater::finalize() {
+ if (!DeadFunctionsInComdats.empty()) {
+ filterDeadComdatFunctions(DeadFunctionsInComdats);
+ DeadFunctions.append(DeadFunctionsInComdats.begin(),
+ DeadFunctionsInComdats.end());
+ }
+
+ if (CG) {
+ // First remove all references, e.g., outgoing via called functions. This is
+ // necessary as we can delete functions that have circular references.
+ for (Function *DeadFn : DeadFunctions) {
+ DeadFn->removeDeadConstantUsers();
+ CallGraphNode *DeadCGN = (*CG)[DeadFn];
+ DeadCGN->removeAllCalledFunctions();
+ CG->getExternalCallingNode()->removeAnyCallEdgeTo(DeadCGN);
+ DeadFn->replaceAllUsesWith(PoisonValue::get(DeadFn->getType()));
+ }
+
+ // Then remove the node and function from the module.
+ for (Function *DeadFn : DeadFunctions) {
+ CallGraphNode *DeadCGN = CG->getOrInsertFunction(DeadFn);
+ assert(DeadCGN->getNumReferences() == 0 &&
+ "References should have been handled by now");
+ delete CG->removeFunctionFromModule(DeadCGN);
+ }
+ } else {
+ // This is the code path for the new lazy call graph and for the case were
+ // no call graph was provided.
+ for (Function *DeadFn : DeadFunctions) {
+ DeadFn->removeDeadConstantUsers();
+ DeadFn->replaceAllUsesWith(PoisonValue::get(DeadFn->getType()));
+
+ if (LCG && !ReplacedFunctions.count(DeadFn)) {
+ // Taken mostly from the inliner:
+ LazyCallGraph::Node &N = LCG->get(*DeadFn);
+ auto *DeadSCC = LCG->lookupSCC(N);
+ assert(DeadSCC && DeadSCC->size() == 1 &&
+ &DeadSCC->begin()->getFunction() == DeadFn);
+ auto &DeadRC = DeadSCC->getOuterRefSCC();
+
+ FunctionAnalysisManager &FAM =
+ AM->getResult<FunctionAnalysisManagerCGSCCProxy>(*DeadSCC, *LCG)
+ .getManager();
+
+ FAM.clear(*DeadFn, DeadFn->getName());
+ AM->clear(*DeadSCC, DeadSCC->getName());
+ LCG->removeDeadFunction(*DeadFn);
+
+ // Mark the relevant parts of the call graph as invalid so we don't
+ // visit them.
+ UR->InvalidatedSCCs.insert(DeadSCC);
+ UR->InvalidatedRefSCCs.insert(&DeadRC);
+ }
+
+ // The function is now really dead and de-attached from everything.
+ DeadFn->eraseFromParent();
+ }
+ }
+
+ bool Changed = !DeadFunctions.empty();
+ DeadFunctionsInComdats.clear();
+ DeadFunctions.clear();
+ return Changed;
+}
+
+void CallGraphUpdater::reanalyzeFunction(Function &Fn) {
+ if (CG) {
+ CallGraphNode *OldCGN = CG->getOrInsertFunction(&Fn);
+ OldCGN->removeAllCalledFunctions();
+ CG->populateCallGraphNode(OldCGN);
+ } else if (LCG) {
+ LazyCallGraph::Node &N = LCG->get(Fn);
+ LazyCallGraph::SCC *C = LCG->lookupSCC(N);
+ updateCGAndAnalysisManagerForCGSCCPass(*LCG, *C, N, *AM, *UR, *FAM);
+ }
+}
+
+void CallGraphUpdater::registerOutlinedFunction(Function &OriginalFn,
+ Function &NewFn) {
+ if (CG)
+ CG->addToCallGraph(&NewFn);
+ else if (LCG)
+ LCG->addSplitFunction(OriginalFn, NewFn);
+}
+
+void CallGraphUpdater::removeFunction(Function &DeadFn) {
+ DeadFn.deleteBody();
+ DeadFn.setLinkage(GlobalValue::ExternalLinkage);
+ if (DeadFn.hasComdat())
+ DeadFunctionsInComdats.push_back(&DeadFn);
+ else
+ DeadFunctions.push_back(&DeadFn);
+
+ // For the old call graph we remove the function from the SCC right away.
+ if (CG && !ReplacedFunctions.count(&DeadFn)) {
+ CallGraphNode *DeadCGN = (*CG)[&DeadFn];
+ DeadCGN->removeAllCalledFunctions();
+ CGSCC->DeleteNode(DeadCGN);
+ }
+}
+
+void CallGraphUpdater::replaceFunctionWith(Function &OldFn, Function &NewFn) {
+ OldFn.removeDeadConstantUsers();
+ ReplacedFunctions.insert(&OldFn);
+ if (CG) {
+ // Update the call graph for the newly promoted function.
+ CallGraphNode *OldCGN = (*CG)[&OldFn];
+ CallGraphNode *NewCGN = CG->getOrInsertFunction(&NewFn);
+ NewCGN->stealCalledFunctionsFrom(OldCGN);
+ CG->ReplaceExternalCallEdge(OldCGN, NewCGN);
+
+ // And update the SCC we're iterating as well.
+ CGSCC->ReplaceNode(OldCGN, NewCGN);
+ } else if (LCG) {
+ // Directly substitute the functions in the call graph.
+ LazyCallGraph::Node &OldLCGN = LCG->get(OldFn);
+ SCC->getOuterRefSCC().replaceNodeFunction(OldLCGN, NewFn);
+ }
+ removeFunction(OldFn);
+}
+
+bool CallGraphUpdater::replaceCallSite(CallBase &OldCS, CallBase &NewCS) {
+ // This is only necessary in the (old) CG.
+ if (!CG)
+ return true;
+
+ Function *Caller = OldCS.getCaller();
+ CallGraphNode *NewCalleeNode =
+ CG->getOrInsertFunction(NewCS.getCalledFunction());
+ CallGraphNode *CallerNode = (*CG)[Caller];
+ if (llvm::none_of(*CallerNode, [&OldCS](const CallGraphNode::CallRecord &CR) {
+ return CR.first && *CR.first == &OldCS;
+ }))
+ return false;
+ CallerNode->replaceCallEdge(OldCS, NewCS, NewCalleeNode);
+ return true;
+}
+
+void CallGraphUpdater::removeCallSite(CallBase &CS) {
+ // This is only necessary in the (old) CG.
+ if (!CG)
+ return;
+
+ Function *Caller = CS.getCaller();
+ CallGraphNode *CallerNode = (*CG)[Caller];
+ CallerNode->removeCallEdgeFor(CS);
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/CallPromotionUtils.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/CallPromotionUtils.cpp
new file mode 100644
index 0000000000..4a82f9606d
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/CallPromotionUtils.cpp
@@ -0,0 +1,620 @@
+//===- CallPromotionUtils.cpp - Utilities for call promotion ----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements utilities useful for promoting indirect call sites to
+// direct call sites.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/CallPromotionUtils.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/TypeMetadataUtils.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "call-promotion-utils"
+
+/// Fix-up phi nodes in an invoke instruction's normal destination.
+///
+/// After versioning an invoke instruction, values coming from the original
+/// block will now be coming from the "merge" block. For example, in the code
+/// below:
+///
+/// then_bb:
+/// %t0 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst
+///
+/// else_bb:
+/// %t1 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst
+///
+/// merge_bb:
+/// %t2 = phi i32 [ %t0, %then_bb ], [ %t1, %else_bb ]
+/// br %normal_dst
+///
+/// normal_dst:
+/// %t3 = phi i32 [ %x, %orig_bb ], ...
+///
+/// "orig_bb" is no longer a predecessor of "normal_dst", so the phi nodes in
+/// "normal_dst" must be fixed to refer to "merge_bb":
+///
+/// normal_dst:
+/// %t3 = phi i32 [ %x, %merge_bb ], ...
+///
+static void fixupPHINodeForNormalDest(InvokeInst *Invoke, BasicBlock *OrigBlock,
+ BasicBlock *MergeBlock) {
+ for (PHINode &Phi : Invoke->getNormalDest()->phis()) {
+ int Idx = Phi.getBasicBlockIndex(OrigBlock);
+ if (Idx == -1)
+ continue;
+ Phi.setIncomingBlock(Idx, MergeBlock);
+ }
+}
+
+/// Fix-up phi nodes in an invoke instruction's unwind destination.
+///
+/// After versioning an invoke instruction, values coming from the original
+/// block will now be coming from either the "then" block or the "else" block.
+/// For example, in the code below:
+///
+/// then_bb:
+/// %t0 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst
+///
+/// else_bb:
+/// %t1 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst
+///
+/// unwind_dst:
+/// %t3 = phi i32 [ %x, %orig_bb ], ...
+///
+/// "orig_bb" is no longer a predecessor of "unwind_dst", so the phi nodes in
+/// "unwind_dst" must be fixed to refer to "then_bb" and "else_bb":
+///
+/// unwind_dst:
+/// %t3 = phi i32 [ %x, %then_bb ], [ %x, %else_bb ], ...
+///
+static void fixupPHINodeForUnwindDest(InvokeInst *Invoke, BasicBlock *OrigBlock,
+ BasicBlock *ThenBlock,
+ BasicBlock *ElseBlock) {
+ for (PHINode &Phi : Invoke->getUnwindDest()->phis()) {
+ int Idx = Phi.getBasicBlockIndex(OrigBlock);
+ if (Idx == -1)
+ continue;
+ auto *V = Phi.getIncomingValue(Idx);
+ Phi.setIncomingBlock(Idx, ThenBlock);
+ Phi.addIncoming(V, ElseBlock);
+ }
+}
+
+/// Create a phi node for the returned value of a call or invoke instruction.
+///
+/// After versioning a call or invoke instruction that returns a value, we have
+/// to merge the value of the original and new instructions. We do this by
+/// creating a phi node and replacing uses of the original instruction with this
+/// phi node.
+///
+/// For example, if \p OrigInst is defined in "else_bb" and \p NewInst is
+/// defined in "then_bb", we create the following phi node:
+///
+/// ; Uses of the original instruction are replaced by uses of the phi node.
+/// %t0 = phi i32 [ %orig_inst, %else_bb ], [ %new_inst, %then_bb ],
+///
+static void createRetPHINode(Instruction *OrigInst, Instruction *NewInst,
+ BasicBlock *MergeBlock, IRBuilder<> &Builder) {
+
+ if (OrigInst->getType()->isVoidTy() || OrigInst->use_empty())
+ return;
+
+ Builder.SetInsertPoint(&MergeBlock->front());
+ PHINode *Phi = Builder.CreatePHI(OrigInst->getType(), 0);
+ SmallVector<User *, 16> UsersToUpdate(OrigInst->users());
+ for (User *U : UsersToUpdate)
+ U->replaceUsesOfWith(OrigInst, Phi);
+ Phi->addIncoming(OrigInst, OrigInst->getParent());
+ Phi->addIncoming(NewInst, NewInst->getParent());
+}
+
+/// Cast a call or invoke instruction to the given type.
+///
+/// When promoting a call site, the return type of the call site might not match
+/// that of the callee. If this is the case, we have to cast the returned value
+/// to the correct type. The location of the cast depends on if we have a call
+/// or invoke instruction.
+///
+/// For example, if the call instruction below requires a bitcast after
+/// promotion:
+///
+/// orig_bb:
+/// %t0 = call i32 @func()
+/// ...
+///
+/// The bitcast is placed after the call instruction:
+///
+/// orig_bb:
+/// ; Uses of the original return value are replaced by uses of the bitcast.
+/// %t0 = call i32 @func()
+/// %t1 = bitcast i32 %t0 to ...
+/// ...
+///
+/// A similar transformation is performed for invoke instructions. However,
+/// since invokes are terminating, a new block is created for the bitcast. For
+/// example, if the invoke instruction below requires a bitcast after promotion:
+///
+/// orig_bb:
+/// %t0 = invoke i32 @func() to label %normal_dst unwind label %unwind_dst
+///
+/// The edge between the original block and the invoke's normal destination is
+/// split, and the bitcast is placed there:
+///
+/// orig_bb:
+/// %t0 = invoke i32 @func() to label %split_bb unwind label %unwind_dst
+///
+/// split_bb:
+/// ; Uses of the original return value are replaced by uses of the bitcast.
+/// %t1 = bitcast i32 %t0 to ...
+/// br label %normal_dst
+///
+static void createRetBitCast(CallBase &CB, Type *RetTy, CastInst **RetBitCast) {
+
+ // Save the users of the calling instruction. These uses will be changed to
+ // use the bitcast after we create it.
+ SmallVector<User *, 16> UsersToUpdate(CB.users());
+
+ // Determine an appropriate location to create the bitcast for the return
+ // value. The location depends on if we have a call or invoke instruction.
+ Instruction *InsertBefore = nullptr;
+ if (auto *Invoke = dyn_cast<InvokeInst>(&CB))
+ InsertBefore =
+ &SplitEdge(Invoke->getParent(), Invoke->getNormalDest())->front();
+ else
+ InsertBefore = &*std::next(CB.getIterator());
+
+ // Bitcast the return value to the correct type.
+ auto *Cast = CastInst::CreateBitOrPointerCast(&CB, RetTy, "", InsertBefore);
+ if (RetBitCast)
+ *RetBitCast = Cast;
+
+ // Replace all the original uses of the calling instruction with the bitcast.
+ for (User *U : UsersToUpdate)
+ U->replaceUsesOfWith(&CB, Cast);
+}
+
+/// Predicate and clone the given call site.
+///
+/// This function creates an if-then-else structure at the location of the call
+/// site. The "if" condition compares the call site's called value to the given
+/// callee. The original call site is moved into the "else" block, and a clone
+/// of the call site is placed in the "then" block. The cloned instruction is
+/// returned.
+///
+/// For example, the call instruction below:
+///
+/// orig_bb:
+/// %t0 = call i32 %ptr()
+/// ...
+///
+/// Is replace by the following:
+///
+/// orig_bb:
+/// %cond = icmp eq i32 ()* %ptr, @func
+/// br i1 %cond, %then_bb, %else_bb
+///
+/// then_bb:
+/// ; The clone of the original call instruction is placed in the "then"
+/// ; block. It is not yet promoted.
+/// %t1 = call i32 %ptr()
+/// br merge_bb
+///
+/// else_bb:
+/// ; The original call instruction is moved to the "else" block.
+/// %t0 = call i32 %ptr()
+/// br merge_bb
+///
+/// merge_bb:
+/// ; Uses of the original call instruction are replaced by uses of the phi
+/// ; node.
+/// %t2 = phi i32 [ %t0, %else_bb ], [ %t1, %then_bb ]
+/// ...
+///
+/// A similar transformation is performed for invoke instructions. However,
+/// since invokes are terminating, more work is required. For example, the
+/// invoke instruction below:
+///
+/// orig_bb:
+/// %t0 = invoke %ptr() to label %normal_dst unwind label %unwind_dst
+///
+/// Is replace by the following:
+///
+/// orig_bb:
+/// %cond = icmp eq i32 ()* %ptr, @func
+/// br i1 %cond, %then_bb, %else_bb
+///
+/// then_bb:
+/// ; The clone of the original invoke instruction is placed in the "then"
+/// ; block, and its normal destination is set to the "merge" block. It is
+/// ; not yet promoted.
+/// %t1 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst
+///
+/// else_bb:
+/// ; The original invoke instruction is moved into the "else" block, and
+/// ; its normal destination is set to the "merge" block.
+/// %t0 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst
+///
+/// merge_bb:
+/// ; Uses of the original invoke instruction are replaced by uses of the
+/// ; phi node, and the merge block branches to the normal destination.
+/// %t2 = phi i32 [ %t0, %else_bb ], [ %t1, %then_bb ]
+/// br %normal_dst
+///
+/// An indirect musttail call is processed slightly differently in that:
+/// 1. No merge block needed for the orginal and the cloned callsite, since
+/// either one ends the flow. No phi node is needed either.
+/// 2. The return statement following the original call site is duplicated too
+/// and placed immediately after the cloned call site per the IR convention.
+///
+/// For example, the musttail call instruction below:
+///
+/// orig_bb:
+/// %t0 = musttail call i32 %ptr()
+/// ...
+///
+/// Is replaced by the following:
+///
+/// cond_bb:
+/// %cond = icmp eq i32 ()* %ptr, @func
+/// br i1 %cond, %then_bb, %orig_bb
+///
+/// then_bb:
+/// ; The clone of the original call instruction is placed in the "then"
+/// ; block. It is not yet promoted.
+/// %t1 = musttail call i32 %ptr()
+/// ret %t1
+///
+/// orig_bb:
+/// ; The original call instruction stays in its original block.
+/// %t0 = musttail call i32 %ptr()
+/// ret %t0
+CallBase &llvm::versionCallSite(CallBase &CB, Value *Callee,
+ MDNode *BranchWeights) {
+
+ IRBuilder<> Builder(&CB);
+ CallBase *OrigInst = &CB;
+ BasicBlock *OrigBlock = OrigInst->getParent();
+
+ // Create the compare. The called value and callee must have the same type to
+ // be compared.
+ if (CB.getCalledOperand()->getType() != Callee->getType())
+ Callee = Builder.CreateBitCast(Callee, CB.getCalledOperand()->getType());
+ auto *Cond = Builder.CreateICmpEQ(CB.getCalledOperand(), Callee);
+
+ if (OrigInst->isMustTailCall()) {
+ // Create an if-then structure. The original instruction stays in its block,
+ // and a clone of the original instruction is placed in the "then" block.
+ Instruction *ThenTerm =
+ SplitBlockAndInsertIfThen(Cond, &CB, false, BranchWeights);
+ BasicBlock *ThenBlock = ThenTerm->getParent();
+ ThenBlock->setName("if.true.direct_targ");
+ CallBase *NewInst = cast<CallBase>(OrigInst->clone());
+ NewInst->insertBefore(ThenTerm);
+
+ // Place a clone of the optional bitcast after the new call site.
+ Value *NewRetVal = NewInst;
+ auto Next = OrigInst->getNextNode();
+ if (auto *BitCast = dyn_cast_or_null<BitCastInst>(Next)) {
+ assert(BitCast->getOperand(0) == OrigInst &&
+ "bitcast following musttail call must use the call");
+ auto NewBitCast = BitCast->clone();
+ NewBitCast->replaceUsesOfWith(OrigInst, NewInst);
+ NewBitCast->insertBefore(ThenTerm);
+ NewRetVal = NewBitCast;
+ Next = BitCast->getNextNode();
+ }
+
+ // Place a clone of the return instruction after the new call site.
+ ReturnInst *Ret = dyn_cast_or_null<ReturnInst>(Next);
+ assert(Ret && "musttail call must precede a ret with an optional bitcast");
+ auto NewRet = Ret->clone();
+ if (Ret->getReturnValue())
+ NewRet->replaceUsesOfWith(Ret->getReturnValue(), NewRetVal);
+ NewRet->insertBefore(ThenTerm);
+
+ // A return instructions is terminating, so we don't need the terminator
+ // instruction just created.
+ ThenTerm->eraseFromParent();
+
+ return *NewInst;
+ }
+
+ // Create an if-then-else structure. The original instruction is moved into
+ // the "else" block, and a clone of the original instruction is placed in the
+ // "then" block.
+ Instruction *ThenTerm = nullptr;
+ Instruction *ElseTerm = nullptr;
+ SplitBlockAndInsertIfThenElse(Cond, &CB, &ThenTerm, &ElseTerm, BranchWeights);
+ BasicBlock *ThenBlock = ThenTerm->getParent();
+ BasicBlock *ElseBlock = ElseTerm->getParent();
+ BasicBlock *MergeBlock = OrigInst->getParent();
+
+ ThenBlock->setName("if.true.direct_targ");
+ ElseBlock->setName("if.false.orig_indirect");
+ MergeBlock->setName("if.end.icp");
+
+ CallBase *NewInst = cast<CallBase>(OrigInst->clone());
+ OrigInst->moveBefore(ElseTerm);
+ NewInst->insertBefore(ThenTerm);
+
+ // If the original call site is an invoke instruction, we have extra work to
+ // do since invoke instructions are terminating. We have to fix-up phi nodes
+ // in the invoke's normal and unwind destinations.
+ if (auto *OrigInvoke = dyn_cast<InvokeInst>(OrigInst)) {
+ auto *NewInvoke = cast<InvokeInst>(NewInst);
+
+ // Invoke instructions are terminating, so we don't need the terminator
+ // instructions that were just created.
+ ThenTerm->eraseFromParent();
+ ElseTerm->eraseFromParent();
+
+ // Branch from the "merge" block to the original normal destination.
+ Builder.SetInsertPoint(MergeBlock);
+ Builder.CreateBr(OrigInvoke->getNormalDest());
+
+ // Fix-up phi nodes in the original invoke's normal and unwind destinations.
+ fixupPHINodeForNormalDest(OrigInvoke, OrigBlock, MergeBlock);
+ fixupPHINodeForUnwindDest(OrigInvoke, MergeBlock, ThenBlock, ElseBlock);
+
+ // Now set the normal destinations of the invoke instructions to be the
+ // "merge" block.
+ OrigInvoke->setNormalDest(MergeBlock);
+ NewInvoke->setNormalDest(MergeBlock);
+ }
+
+ // Create a phi node for the returned value of the call site.
+ createRetPHINode(OrigInst, NewInst, MergeBlock, Builder);
+
+ return *NewInst;
+}
+
+bool llvm::isLegalToPromote(const CallBase &CB, Function *Callee,
+ const char **FailureReason) {
+ assert(!CB.getCalledFunction() && "Only indirect call sites can be promoted");
+
+ auto &DL = Callee->getParent()->getDataLayout();
+
+ // Check the return type. The callee's return value type must be bitcast
+ // compatible with the call site's type.
+ Type *CallRetTy = CB.getType();
+ Type *FuncRetTy = Callee->getReturnType();
+ if (CallRetTy != FuncRetTy)
+ if (!CastInst::isBitOrNoopPointerCastable(FuncRetTy, CallRetTy, DL)) {
+ if (FailureReason)
+ *FailureReason = "Return type mismatch";
+ return false;
+ }
+
+ // The number of formal arguments of the callee.
+ unsigned NumParams = Callee->getFunctionType()->getNumParams();
+
+ // The number of actual arguments in the call.
+ unsigned NumArgs = CB.arg_size();
+
+ // Check the number of arguments. The callee and call site must agree on the
+ // number of arguments.
+ if (NumArgs != NumParams && !Callee->isVarArg()) {
+ if (FailureReason)
+ *FailureReason = "The number of arguments mismatch";
+ return false;
+ }
+
+ // Check the argument types. The callee's formal argument types must be
+ // bitcast compatible with the corresponding actual argument types of the call
+ // site.
+ unsigned I = 0;
+ for (; I < NumParams; ++I) {
+ // Make sure that the callee and call agree on byval/inalloca. The types do
+ // not have to match.
+ if (Callee->hasParamAttribute(I, Attribute::ByVal) !=
+ CB.getAttributes().hasParamAttr(I, Attribute::ByVal)) {
+ if (FailureReason)
+ *FailureReason = "byval mismatch";
+ return false;
+ }
+ if (Callee->hasParamAttribute(I, Attribute::InAlloca) !=
+ CB.getAttributes().hasParamAttr(I, Attribute::InAlloca)) {
+ if (FailureReason)
+ *FailureReason = "inalloca mismatch";
+ return false;
+ }
+
+ Type *FormalTy = Callee->getFunctionType()->getFunctionParamType(I);
+ Type *ActualTy = CB.getArgOperand(I)->getType();
+ if (FormalTy == ActualTy)
+ continue;
+ if (!CastInst::isBitOrNoopPointerCastable(ActualTy, FormalTy, DL)) {
+ if (FailureReason)
+ *FailureReason = "Argument type mismatch";
+ return false;
+ }
+
+ // MustTail call needs stricter type match. See
+ // Verifier::verifyMustTailCall().
+ if (CB.isMustTailCall()) {
+ PointerType *PF = dyn_cast<PointerType>(FormalTy);
+ PointerType *PA = dyn_cast<PointerType>(ActualTy);
+ if (!PF || !PA || PF->getAddressSpace() != PA->getAddressSpace()) {
+ if (FailureReason)
+ *FailureReason = "Musttail call Argument type mismatch";
+ return false;
+ }
+ }
+ }
+ for (; I < NumArgs; I++) {
+ // Vararg functions can have more arguments than parameters.
+ assert(Callee->isVarArg());
+ if (CB.paramHasAttr(I, Attribute::StructRet)) {
+ if (FailureReason)
+ *FailureReason = "SRet arg to vararg function";
+ return false;
+ }
+ }
+
+ return true;
+}
+
+CallBase &llvm::promoteCall(CallBase &CB, Function *Callee,
+ CastInst **RetBitCast) {
+ assert(!CB.getCalledFunction() && "Only indirect call sites can be promoted");
+
+ // Set the called function of the call site to be the given callee (but don't
+ // change the type).
+ CB.setCalledOperand(Callee);
+
+ // Since the call site will no longer be direct, we must clear metadata that
+ // is only appropriate for indirect calls. This includes !prof and !callees
+ // metadata.
+ CB.setMetadata(LLVMContext::MD_prof, nullptr);
+ CB.setMetadata(LLVMContext::MD_callees, nullptr);
+
+ // If the function type of the call site matches that of the callee, no
+ // additional work is required.
+ if (CB.getFunctionType() == Callee->getFunctionType())
+ return CB;
+
+ // Save the return types of the call site and callee.
+ Type *CallSiteRetTy = CB.getType();
+ Type *CalleeRetTy = Callee->getReturnType();
+
+ // Change the function type of the call site the match that of the callee.
+ CB.mutateFunctionType(Callee->getFunctionType());
+
+ // Inspect the arguments of the call site. If an argument's type doesn't
+ // match the corresponding formal argument's type in the callee, bitcast it
+ // to the correct type.
+ auto CalleeType = Callee->getFunctionType();
+ auto CalleeParamNum = CalleeType->getNumParams();
+
+ LLVMContext &Ctx = Callee->getContext();
+ const AttributeList &CallerPAL = CB.getAttributes();
+ // The new list of argument attributes.
+ SmallVector<AttributeSet, 4> NewArgAttrs;
+ bool AttributeChanged = false;
+
+ for (unsigned ArgNo = 0; ArgNo < CalleeParamNum; ++ArgNo) {
+ auto *Arg = CB.getArgOperand(ArgNo);
+ Type *FormalTy = CalleeType->getParamType(ArgNo);
+ Type *ActualTy = Arg->getType();
+ if (FormalTy != ActualTy) {
+ auto *Cast = CastInst::CreateBitOrPointerCast(Arg, FormalTy, "", &CB);
+ CB.setArgOperand(ArgNo, Cast);
+
+ // Remove any incompatible attributes for the argument.
+ AttrBuilder ArgAttrs(Ctx, CallerPAL.getParamAttrs(ArgNo));
+ ArgAttrs.remove(AttributeFuncs::typeIncompatible(FormalTy));
+
+ // We may have a different byval/inalloca type.
+ if (ArgAttrs.getByValType())
+ ArgAttrs.addByValAttr(Callee->getParamByValType(ArgNo));
+ if (ArgAttrs.getInAllocaType())
+ ArgAttrs.addInAllocaAttr(Callee->getParamInAllocaType(ArgNo));
+
+ NewArgAttrs.push_back(AttributeSet::get(Ctx, ArgAttrs));
+ AttributeChanged = true;
+ } else
+ NewArgAttrs.push_back(CallerPAL.getParamAttrs(ArgNo));
+ }
+
+ // If the return type of the call site doesn't match that of the callee, cast
+ // the returned value to the appropriate type.
+ // Remove any incompatible return value attribute.
+ AttrBuilder RAttrs(Ctx, CallerPAL.getRetAttrs());
+ if (!CallSiteRetTy->isVoidTy() && CallSiteRetTy != CalleeRetTy) {
+ createRetBitCast(CB, CallSiteRetTy, RetBitCast);
+ RAttrs.remove(AttributeFuncs::typeIncompatible(CalleeRetTy));
+ AttributeChanged = true;
+ }
+
+ // Set the new callsite attribute.
+ if (AttributeChanged)
+ CB.setAttributes(AttributeList::get(Ctx, CallerPAL.getFnAttrs(),
+ AttributeSet::get(Ctx, RAttrs),
+ NewArgAttrs));
+
+ return CB;
+}
+
+CallBase &llvm::promoteCallWithIfThenElse(CallBase &CB, Function *Callee,
+ MDNode *BranchWeights) {
+
+ // Version the indirect call site. If the called value is equal to the given
+ // callee, 'NewInst' will be executed, otherwise the original call site will
+ // be executed.
+ CallBase &NewInst = versionCallSite(CB, Callee, BranchWeights);
+
+ // Promote 'NewInst' so that it directly calls the desired function.
+ return promoteCall(NewInst, Callee);
+}
+
+bool llvm::tryPromoteCall(CallBase &CB) {
+ assert(!CB.getCalledFunction());
+ Module *M = CB.getCaller()->getParent();
+ const DataLayout &DL = M->getDataLayout();
+ Value *Callee = CB.getCalledOperand();
+
+ LoadInst *VTableEntryLoad = dyn_cast<LoadInst>(Callee);
+ if (!VTableEntryLoad)
+ return false; // Not a vtable entry load.
+ Value *VTableEntryPtr = VTableEntryLoad->getPointerOperand();
+ APInt VTableOffset(DL.getTypeSizeInBits(VTableEntryPtr->getType()), 0);
+ Value *VTableBasePtr = VTableEntryPtr->stripAndAccumulateConstantOffsets(
+ DL, VTableOffset, /* AllowNonInbounds */ true);
+ LoadInst *VTablePtrLoad = dyn_cast<LoadInst>(VTableBasePtr);
+ if (!VTablePtrLoad)
+ return false; // Not a vtable load.
+ Value *Object = VTablePtrLoad->getPointerOperand();
+ APInt ObjectOffset(DL.getTypeSizeInBits(Object->getType()), 0);
+ Value *ObjectBase = Object->stripAndAccumulateConstantOffsets(
+ DL, ObjectOffset, /* AllowNonInbounds */ true);
+ if (!(isa<AllocaInst>(ObjectBase) && ObjectOffset == 0))
+ // Not an Alloca or the offset isn't zero.
+ return false;
+
+ // Look for the vtable pointer store into the object by the ctor.
+ BasicBlock::iterator BBI(VTablePtrLoad);
+ Value *VTablePtr = FindAvailableLoadedValue(
+ VTablePtrLoad, VTablePtrLoad->getParent(), BBI, 0, nullptr, nullptr);
+ if (!VTablePtr)
+ return false; // No vtable found.
+ APInt VTableOffsetGVBase(DL.getTypeSizeInBits(VTablePtr->getType()), 0);
+ Value *VTableGVBase = VTablePtr->stripAndAccumulateConstantOffsets(
+ DL, VTableOffsetGVBase, /* AllowNonInbounds */ true);
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(VTableGVBase);
+ if (!(GV && GV->isConstant() && GV->hasDefinitiveInitializer()))
+ // Not in the form of a global constant variable with an initializer.
+ return false;
+
+ Constant *VTableGVInitializer = GV->getInitializer();
+ APInt VTableGVOffset = VTableOffsetGVBase + VTableOffset;
+ if (!(VTableGVOffset.getActiveBits() <= 64))
+ return false; // Out of range.
+ Constant *Ptr = getPointerAtOffset(VTableGVInitializer,
+ VTableGVOffset.getZExtValue(),
+ *M);
+ if (!Ptr)
+ return false; // No constant (function) pointer found.
+ Function *DirectCallee = dyn_cast<Function>(Ptr->stripPointerCasts());
+ if (!DirectCallee)
+ return false; // No function pointer found.
+
+ if (!isLegalToPromote(CB, DirectCallee))
+ return false;
+
+ // Success.
+ promoteCall(CB, DirectCallee);
+ return true;
+}
+
+#undef DEBUG_TYPE
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/CanonicalizeAliases.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/CanonicalizeAliases.cpp
new file mode 100644
index 0000000000..4d622679db
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/CanonicalizeAliases.cpp
@@ -0,0 +1,76 @@
+//===- CanonicalizeAliases.cpp - ThinLTO Support: Canonicalize Aliases ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Currently this file implements partial alias canonicalization, to
+// flatten chains of aliases (also done by GlobalOpt, but not on for
+// O0 compiles). E.g.
+// @a = alias i8, i8 *@b
+// @b = alias i8, i8 *@g
+//
+// will be converted to:
+// @a = alias i8, i8 *@g <-- @a is now an alias to base object @g
+// @b = alias i8, i8 *@g
+//
+// Eventually this file will implement full alias canonicalization, so that
+// all aliasees are private anonymous values. E.g.
+// @a = alias i8, i8 *@g
+// @g = global i8 0
+//
+// will be converted to:
+// @0 = private global
+// @a = alias i8, i8* @0
+// @g = alias i8, i8* @0
+//
+// This simplifies optimization and ThinLTO linking of the original symbols.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/CanonicalizeAliases.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+
+using namespace llvm;
+
+namespace {
+
+static Constant *canonicalizeAlias(Constant *C, bool &Changed) {
+ if (auto *GA = dyn_cast<GlobalAlias>(C)) {
+ auto *NewAliasee = canonicalizeAlias(GA->getAliasee(), Changed);
+ if (NewAliasee != GA->getAliasee()) {
+ GA->setAliasee(NewAliasee);
+ Changed = true;
+ }
+ return NewAliasee;
+ }
+
+ auto *CE = dyn_cast<ConstantExpr>(C);
+ if (!CE)
+ return C;
+
+ std::vector<Constant *> Ops;
+ for (Use &U : CE->operands())
+ Ops.push_back(canonicalizeAlias(cast<Constant>(U), Changed));
+ return CE->getWithOperands(Ops);
+}
+
+/// Convert aliases to canonical form.
+static bool canonicalizeAliases(Module &M) {
+ bool Changed = false;
+ for (auto &GA : M.aliases())
+ canonicalizeAlias(&GA, Changed);
+ return Changed;
+}
+} // anonymous namespace
+
+PreservedAnalyses CanonicalizeAliasesPass::run(Module &M,
+ ModuleAnalysisManager &AM) {
+ if (!canonicalizeAliases(M))
+ return PreservedAnalyses::all();
+
+ return PreservedAnalyses::none();
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp
new file mode 100644
index 0000000000..a1ee3df907
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp
@@ -0,0 +1,248 @@
+//==- CanonicalizeFreezeInLoops - Canonicalize freezes in a loop-*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass canonicalizes freeze instructions in a loop by pushing them out to
+// the preheader.
+//
+// loop:
+// i = phi init, i.next
+// i.next = add nsw i, 1
+// i.next.fr = freeze i.next // push this out of this loop
+// use(i.next.fr)
+// br i1 (i.next <= N), loop, exit
+// =>
+// init.fr = freeze init
+// loop:
+// i = phi init.fr, i.next
+// i.next = add i, 1 // nsw is dropped here
+// use(i.next)
+// br i1 (i.next <= N), loop, exit
+//
+// Removing freezes from these chains help scalar evolution successfully analyze
+// expressions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/CanonicalizeFreezeInLoops.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/IVDescriptors.h"
+#include "llvm/Analysis/LoopAnalysisManager.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Utils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "canon-freeze"
+
+namespace {
+
+class CanonicalizeFreezeInLoops : public LoopPass {
+public:
+ static char ID;
+
+ CanonicalizeFreezeInLoops();
+
+private:
+ bool runOnLoop(Loop *L, LPPassManager &LPM) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+};
+
+class CanonicalizeFreezeInLoopsImpl {
+ Loop *L;
+ ScalarEvolution &SE;
+ DominatorTree &DT;
+
+ struct FrozenIndPHIInfo {
+ // A freeze instruction that uses an induction phi
+ FreezeInst *FI = nullptr;
+ // The induction phi, step instruction, the operand idx of StepInst which is
+ // a step value
+ PHINode *PHI;
+ BinaryOperator *StepInst;
+ unsigned StepValIdx = 0;
+
+ FrozenIndPHIInfo(PHINode *PHI, BinaryOperator *StepInst)
+ : PHI(PHI), StepInst(StepInst) {}
+ };
+
+ // Can freeze instruction be pushed into operands of I?
+ // In order to do this, I should not create a poison after I's flags are
+ // stripped.
+ bool canHandleInst(const Instruction *I) {
+ auto Opc = I->getOpcode();
+ // If add/sub/mul, drop nsw/nuw flags.
+ return Opc == Instruction::Add || Opc == Instruction::Sub ||
+ Opc == Instruction::Mul;
+ }
+
+ void InsertFreezeAndForgetFromSCEV(Use &U);
+
+public:
+ CanonicalizeFreezeInLoopsImpl(Loop *L, ScalarEvolution &SE, DominatorTree &DT)
+ : L(L), SE(SE), DT(DT) {}
+ bool run();
+};
+
+} // anonymous namespace
+
+// Given U = (value, user), replace value with freeze(value), and let
+// SCEV forget user. The inserted freeze is placed in the preheader.
+void CanonicalizeFreezeInLoopsImpl::InsertFreezeAndForgetFromSCEV(Use &U) {
+ auto *PH = L->getLoopPreheader();
+
+ auto *UserI = cast<Instruction>(U.getUser());
+ auto *ValueToFr = U.get();
+ assert(L->contains(UserI->getParent()) &&
+ "Should not process an instruction that isn't inside the loop");
+ if (isGuaranteedNotToBeUndefOrPoison(ValueToFr, nullptr, UserI, &DT))
+ return;
+
+ LLVM_DEBUG(dbgs() << "canonfr: inserting freeze:\n");
+ LLVM_DEBUG(dbgs() << "\tUser: " << *U.getUser() << "\n");
+ LLVM_DEBUG(dbgs() << "\tOperand: " << *U.get() << "\n");
+
+ U.set(new FreezeInst(ValueToFr, ValueToFr->getName() + ".frozen",
+ PH->getTerminator()));
+
+ SE.forgetValue(UserI);
+}
+
+bool CanonicalizeFreezeInLoopsImpl::run() {
+ // The loop should be in LoopSimplify form.
+ if (!L->isLoopSimplifyForm())
+ return false;
+
+ SmallVector<FrozenIndPHIInfo, 4> Candidates;
+
+ for (auto &PHI : L->getHeader()->phis()) {
+ InductionDescriptor ID;
+ if (!InductionDescriptor::isInductionPHI(&PHI, L, &SE, ID))
+ continue;
+
+ LLVM_DEBUG(dbgs() << "canonfr: PHI: " << PHI << "\n");
+ FrozenIndPHIInfo Info(&PHI, ID.getInductionBinOp());
+ if (!Info.StepInst || !canHandleInst(Info.StepInst)) {
+ // The stepping instruction has unknown form.
+ // Ignore this PHI.
+ continue;
+ }
+
+ Info.StepValIdx = Info.StepInst->getOperand(0) == &PHI;
+ Value *StepV = Info.StepInst->getOperand(Info.StepValIdx);
+ if (auto *StepI = dyn_cast<Instruction>(StepV)) {
+ if (L->contains(StepI->getParent())) {
+ // The step value is inside the loop. Freezing step value will introduce
+ // another freeze into the loop, so skip this PHI.
+ continue;
+ }
+ }
+
+ auto Visit = [&](User *U) {
+ if (auto *FI = dyn_cast<FreezeInst>(U)) {
+ LLVM_DEBUG(dbgs() << "canonfr: found: " << *FI << "\n");
+ Info.FI = FI;
+ Candidates.push_back(Info);
+ }
+ };
+ for_each(PHI.users(), Visit);
+ for_each(Info.StepInst->users(), Visit);
+ }
+
+ if (Candidates.empty())
+ return false;
+
+ SmallSet<PHINode *, 8> ProcessedPHIs;
+ for (const auto &Info : Candidates) {
+ PHINode *PHI = Info.PHI;
+ if (!ProcessedPHIs.insert(Info.PHI).second)
+ continue;
+
+ BinaryOperator *StepI = Info.StepInst;
+ assert(StepI && "Step instruction should have been found");
+
+ // Drop flags from the step instruction.
+ if (!isGuaranteedNotToBeUndefOrPoison(StepI, nullptr, StepI, &DT)) {
+ LLVM_DEBUG(dbgs() << "canonfr: drop flags: " << *StepI << "\n");
+ StepI->dropPoisonGeneratingFlags();
+ SE.forgetValue(StepI);
+ }
+
+ InsertFreezeAndForgetFromSCEV(StepI->getOperandUse(Info.StepValIdx));
+
+ unsigned OperandIdx =
+ PHI->getOperandNumForIncomingValue(PHI->getIncomingValue(0) == StepI);
+ InsertFreezeAndForgetFromSCEV(PHI->getOperandUse(OperandIdx));
+ }
+
+ // Finally, remove the old freeze instructions.
+ for (const auto &Item : Candidates) {
+ auto *FI = Item.FI;
+ LLVM_DEBUG(dbgs() << "canonfr: removing " << *FI << "\n");
+ SE.forgetValue(FI);
+ FI->replaceAllUsesWith(FI->getOperand(0));
+ FI->eraseFromParent();
+ }
+
+ return true;
+}
+
+CanonicalizeFreezeInLoops::CanonicalizeFreezeInLoops() : LoopPass(ID) {
+ initializeCanonicalizeFreezeInLoopsPass(*PassRegistry::getPassRegistry());
+}
+
+void CanonicalizeFreezeInLoops::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+}
+
+bool CanonicalizeFreezeInLoops::runOnLoop(Loop *L, LPPassManager &) {
+ if (skipLoop(L))
+ return false;
+
+ auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ return CanonicalizeFreezeInLoopsImpl(L, SE, DT).run();
+}
+
+PreservedAnalyses
+CanonicalizeFreezeInLoopsPass::run(Loop &L, LoopAnalysisManager &AM,
+ LoopStandardAnalysisResults &AR,
+ LPMUpdater &U) {
+ if (!CanonicalizeFreezeInLoopsImpl(&L, AR.SE, AR.DT).run())
+ return PreservedAnalyses::all();
+
+ return getLoopPassPreservedAnalyses();
+}
+
+INITIALIZE_PASS_BEGIN(CanonicalizeFreezeInLoops, "canon-freeze",
+ "Canonicalize Freeze Instructions in Loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_END(CanonicalizeFreezeInLoops, "canon-freeze",
+ "Canonicalize Freeze Instructions in Loops", false, false)
+
+Pass *llvm::createCanonicalizeFreezeInLoopsPass() {
+ return new CanonicalizeFreezeInLoops();
+}
+
+char CanonicalizeFreezeInLoops::ID = 0;
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/CloneFunction.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/CloneFunction.cpp
new file mode 100644
index 0000000000..87822ee85c
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/CloneFunction.cpp
@@ -0,0 +1,1194 @@
+//===- CloneFunction.cpp - Clone a function into another function ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CloneFunctionInto interface, which is used as the
+// low-level function cloner. This is used by the CloneFunction and function
+// inliner to do the dirty work of copying the body of a function around.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+#include <map>
+#include <optional>
+using namespace llvm;
+
+#define DEBUG_TYPE "clone-function"
+
+/// See comments in Cloning.h.
+BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap,
+ const Twine &NameSuffix, Function *F,
+ ClonedCodeInfo *CodeInfo,
+ DebugInfoFinder *DIFinder) {
+ BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "", F);
+ if (BB->hasName())
+ NewBB->setName(BB->getName() + NameSuffix);
+
+ bool hasCalls = false, hasDynamicAllocas = false, hasMemProfMetadata = false;
+ Module *TheModule = F ? F->getParent() : nullptr;
+
+ // Loop over all instructions, and copy them over.
+ for (const Instruction &I : *BB) {
+ if (DIFinder && TheModule)
+ DIFinder->processInstruction(*TheModule, I);
+
+ Instruction *NewInst = I.clone();
+ if (I.hasName())
+ NewInst->setName(I.getName() + NameSuffix);
+ NewInst->insertInto(NewBB, NewBB->end());
+ VMap[&I] = NewInst; // Add instruction map to value.
+
+ if (isa<CallInst>(I) && !I.isDebugOrPseudoInst()) {
+ hasCalls = true;
+ hasMemProfMetadata |= I.hasMetadata(LLVMContext::MD_memprof);
+ }
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(&I)) {
+ if (!AI->isStaticAlloca()) {
+ hasDynamicAllocas = true;
+ }
+ }
+ }
+
+ if (CodeInfo) {
+ CodeInfo->ContainsCalls |= hasCalls;
+ CodeInfo->ContainsMemProfMetadata |= hasMemProfMetadata;
+ CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas;
+ }
+ return NewBB;
+}
+
+// Clone OldFunc into NewFunc, transforming the old arguments into references to
+// VMap values.
+//
+void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
+ ValueToValueMapTy &VMap,
+ CloneFunctionChangeType Changes,
+ SmallVectorImpl<ReturnInst *> &Returns,
+ const char *NameSuffix, ClonedCodeInfo *CodeInfo,
+ ValueMapTypeRemapper *TypeMapper,
+ ValueMaterializer *Materializer) {
+ assert(NameSuffix && "NameSuffix cannot be null!");
+
+#ifndef NDEBUG
+ for (const Argument &I : OldFunc->args())
+ assert(VMap.count(&I) && "No mapping from source argument specified!");
+#endif
+
+ bool ModuleLevelChanges = Changes > CloneFunctionChangeType::LocalChangesOnly;
+
+ // Copy all attributes other than those stored in the AttributeList. We need
+ // to remap the parameter indices of the AttributeList.
+ AttributeList NewAttrs = NewFunc->getAttributes();
+ NewFunc->copyAttributesFrom(OldFunc);
+ NewFunc->setAttributes(NewAttrs);
+
+ const RemapFlags FuncGlobalRefFlags =
+ ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges;
+
+ // Fix up the personality function that got copied over.
+ if (OldFunc->hasPersonalityFn())
+ NewFunc->setPersonalityFn(MapValue(OldFunc->getPersonalityFn(), VMap,
+ FuncGlobalRefFlags, TypeMapper,
+ Materializer));
+
+ if (OldFunc->hasPrefixData()) {
+ NewFunc->setPrefixData(MapValue(OldFunc->getPrefixData(), VMap,
+ FuncGlobalRefFlags, TypeMapper,
+ Materializer));
+ }
+
+ if (OldFunc->hasPrologueData()) {
+ NewFunc->setPrologueData(MapValue(OldFunc->getPrologueData(), VMap,
+ FuncGlobalRefFlags, TypeMapper,
+ Materializer));
+ }
+
+ SmallVector<AttributeSet, 4> NewArgAttrs(NewFunc->arg_size());
+ AttributeList OldAttrs = OldFunc->getAttributes();
+
+ // Clone any argument attributes that are present in the VMap.
+ for (const Argument &OldArg : OldFunc->args()) {
+ if (Argument *NewArg = dyn_cast<Argument>(VMap[&OldArg])) {
+ NewArgAttrs[NewArg->getArgNo()] =
+ OldAttrs.getParamAttrs(OldArg.getArgNo());
+ }
+ }
+
+ NewFunc->setAttributes(
+ AttributeList::get(NewFunc->getContext(), OldAttrs.getFnAttrs(),
+ OldAttrs.getRetAttrs(), NewArgAttrs));
+
+ // Everything else beyond this point deals with function instructions,
+ // so if we are dealing with a function declaration, we're done.
+ if (OldFunc->isDeclaration())
+ return;
+
+ // When we remap instructions within the same module, we want to avoid
+ // duplicating inlined DISubprograms, so record all subprograms we find as we
+ // duplicate instructions and then freeze them in the MD map. We also record
+ // information about dbg.value and dbg.declare to avoid duplicating the
+ // types.
+ std::optional<DebugInfoFinder> DIFinder;
+
+ // Track the subprogram attachment that needs to be cloned to fine-tune the
+ // mapping within the same module.
+ DISubprogram *SPClonedWithinModule = nullptr;
+ if (Changes < CloneFunctionChangeType::DifferentModule) {
+ assert((NewFunc->getParent() == nullptr ||
+ NewFunc->getParent() == OldFunc->getParent()) &&
+ "Expected NewFunc to have the same parent, or no parent");
+
+ // Need to find subprograms, types, and compile units.
+ DIFinder.emplace();
+
+ SPClonedWithinModule = OldFunc->getSubprogram();
+ if (SPClonedWithinModule)
+ DIFinder->processSubprogram(SPClonedWithinModule);
+ } else {
+ assert((NewFunc->getParent() == nullptr ||
+ NewFunc->getParent() != OldFunc->getParent()) &&
+ "Expected NewFunc to have different parents, or no parent");
+
+ if (Changes == CloneFunctionChangeType::DifferentModule) {
+ assert(NewFunc->getParent() &&
+ "Need parent of new function to maintain debug info invariants");
+
+ // Need to find all the compile units.
+ DIFinder.emplace();
+ }
+ }
+
+ // Loop over all of the basic blocks in the function, cloning them as
+ // appropriate. Note that we save BE this way in order to handle cloning of
+ // recursive functions into themselves.
+ for (const BasicBlock &BB : *OldFunc) {
+
+ // Create a new basic block and copy instructions into it!
+ BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc, CodeInfo,
+ DIFinder ? &*DIFinder : nullptr);
+
+ // Add basic block mapping.
+ VMap[&BB] = CBB;
+
+ // It is only legal to clone a function if a block address within that
+ // function is never referenced outside of the function. Given that, we
+ // want to map block addresses from the old function to block addresses in
+ // the clone. (This is different from the generic ValueMapper
+ // implementation, which generates an invalid blockaddress when
+ // cloning a function.)
+ if (BB.hasAddressTaken()) {
+ Constant *OldBBAddr = BlockAddress::get(const_cast<Function *>(OldFunc),
+ const_cast<BasicBlock *>(&BB));
+ VMap[OldBBAddr] = BlockAddress::get(NewFunc, CBB);
+ }
+
+ // Note return instructions for the caller.
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(CBB->getTerminator()))
+ Returns.push_back(RI);
+ }
+
+ if (Changes < CloneFunctionChangeType::DifferentModule &&
+ DIFinder->subprogram_count() > 0) {
+ // Turn on module-level changes, since we need to clone (some of) the
+ // debug info metadata.
+ //
+ // FIXME: Metadata effectively owned by a function should be made
+ // local, and only that local metadata should be cloned.
+ ModuleLevelChanges = true;
+
+ auto mapToSelfIfNew = [&VMap](MDNode *N) {
+ // Avoid clobbering an existing mapping.
+ (void)VMap.MD().try_emplace(N, N);
+ };
+
+ // Avoid cloning types, compile units, and (other) subprograms.
+ SmallPtrSet<const DISubprogram *, 16> MappedToSelfSPs;
+ for (DISubprogram *ISP : DIFinder->subprograms()) {
+ if (ISP != SPClonedWithinModule) {
+ mapToSelfIfNew(ISP);
+ MappedToSelfSPs.insert(ISP);
+ }
+ }
+
+ // If a subprogram isn't going to be cloned skip its lexical blocks as well.
+ for (DIScope *S : DIFinder->scopes()) {
+ auto *LScope = dyn_cast<DILocalScope>(S);
+ if (LScope && MappedToSelfSPs.count(LScope->getSubprogram()))
+ mapToSelfIfNew(S);
+ }
+
+ for (DICompileUnit *CU : DIFinder->compile_units())
+ mapToSelfIfNew(CU);
+
+ for (DIType *Type : DIFinder->types())
+ mapToSelfIfNew(Type);
+ } else {
+ assert(!SPClonedWithinModule &&
+ "Subprogram should be in DIFinder->subprogram_count()...");
+ }
+
+ const auto RemapFlag = ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges;
+ // Duplicate the metadata that is attached to the cloned function.
+ // Subprograms/CUs/types that were already mapped to themselves won't be
+ // duplicated.
+ SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
+ OldFunc->getAllMetadata(MDs);
+ for (auto MD : MDs) {
+ NewFunc->addMetadata(MD.first, *MapMetadata(MD.second, VMap, RemapFlag,
+ TypeMapper, Materializer));
+ }
+
+ // Loop over all of the instructions in the new function, fixing up operand
+ // references as we go. This uses VMap to do all the hard work.
+ for (Function::iterator
+ BB = cast<BasicBlock>(VMap[&OldFunc->front()])->getIterator(),
+ BE = NewFunc->end();
+ BB != BE; ++BB)
+ // Loop over all instructions, fixing each one as we find it...
+ for (Instruction &II : *BB)
+ RemapInstruction(&II, VMap, RemapFlag, TypeMapper, Materializer);
+
+ // Only update !llvm.dbg.cu for DifferentModule (not CloneModule). In the
+ // same module, the compile unit will already be listed (or not). When
+ // cloning a module, CloneModule() will handle creating the named metadata.
+ if (Changes != CloneFunctionChangeType::DifferentModule)
+ return;
+
+ // Update !llvm.dbg.cu with compile units added to the new module if this
+ // function is being cloned in isolation.
+ //
+ // FIXME: This is making global / module-level changes, which doesn't seem
+ // like the right encapsulation Consider dropping the requirement to update
+ // !llvm.dbg.cu (either obsoleting the node, or restricting it to
+ // non-discardable compile units) instead of discovering compile units by
+ // visiting the metadata attached to global values, which would allow this
+ // code to be deleted. Alternatively, perhaps give responsibility for this
+ // update to CloneFunctionInto's callers.
+ auto *NewModule = NewFunc->getParent();
+ auto *NMD = NewModule->getOrInsertNamedMetadata("llvm.dbg.cu");
+ // Avoid multiple insertions of the same DICompileUnit to NMD.
+ SmallPtrSet<const void *, 8> Visited;
+ for (auto *Operand : NMD->operands())
+ Visited.insert(Operand);
+ for (auto *Unit : DIFinder->compile_units()) {
+ MDNode *MappedUnit =
+ MapMetadata(Unit, VMap, RF_None, TypeMapper, Materializer);
+ if (Visited.insert(MappedUnit).second)
+ NMD->addOperand(MappedUnit);
+ }
+}
+
+/// Return a copy of the specified function and add it to that function's
+/// module. Also, any references specified in the VMap are changed to refer to
+/// their mapped value instead of the original one. If any of the arguments to
+/// the function are in the VMap, the arguments are deleted from the resultant
+/// function. The VMap is updated to include mappings from all of the
+/// instructions and basicblocks in the function from their old to new values.
+///
+Function *llvm::CloneFunction(Function *F, ValueToValueMapTy &VMap,
+ ClonedCodeInfo *CodeInfo) {
+ std::vector<Type *> ArgTypes;
+
+ // The user might be deleting arguments to the function by specifying them in
+ // the VMap. If so, we need to not add the arguments to the arg ty vector
+ //
+ for (const Argument &I : F->args())
+ if (VMap.count(&I) == 0) // Haven't mapped the argument to anything yet?
+ ArgTypes.push_back(I.getType());
+
+ // Create a new function type...
+ FunctionType *FTy =
+ FunctionType::get(F->getFunctionType()->getReturnType(), ArgTypes,
+ F->getFunctionType()->isVarArg());
+
+ // Create the new function...
+ Function *NewF = Function::Create(FTy, F->getLinkage(), F->getAddressSpace(),
+ F->getName(), F->getParent());
+
+ // Loop over the arguments, copying the names of the mapped arguments over...
+ Function::arg_iterator DestI = NewF->arg_begin();
+ for (const Argument &I : F->args())
+ if (VMap.count(&I) == 0) { // Is this argument preserved?
+ DestI->setName(I.getName()); // Copy the name over...
+ VMap[&I] = &*DestI++; // Add mapping to VMap
+ }
+
+ SmallVector<ReturnInst *, 8> Returns; // Ignore returns cloned.
+ CloneFunctionInto(NewF, F, VMap, CloneFunctionChangeType::LocalChangesOnly,
+ Returns, "", CodeInfo);
+
+ return NewF;
+}
+
+namespace {
+/// This is a private class used to implement CloneAndPruneFunctionInto.
+struct PruningFunctionCloner {
+ Function *NewFunc;
+ const Function *OldFunc;
+ ValueToValueMapTy &VMap;
+ bool ModuleLevelChanges;
+ const char *NameSuffix;
+ ClonedCodeInfo *CodeInfo;
+ bool HostFuncIsStrictFP;
+
+ Instruction *cloneInstruction(BasicBlock::const_iterator II);
+
+public:
+ PruningFunctionCloner(Function *newFunc, const Function *oldFunc,
+ ValueToValueMapTy &valueMap, bool moduleLevelChanges,
+ const char *nameSuffix, ClonedCodeInfo *codeInfo)
+ : NewFunc(newFunc), OldFunc(oldFunc), VMap(valueMap),
+ ModuleLevelChanges(moduleLevelChanges), NameSuffix(nameSuffix),
+ CodeInfo(codeInfo) {
+ HostFuncIsStrictFP =
+ newFunc->getAttributes().hasFnAttr(Attribute::StrictFP);
+ }
+
+ /// The specified block is found to be reachable, clone it and
+ /// anything that it can reach.
+ void CloneBlock(const BasicBlock *BB, BasicBlock::const_iterator StartingInst,
+ std::vector<const BasicBlock *> &ToClone);
+};
+} // namespace
+
+static bool hasRoundingModeOperand(Intrinsic::ID CIID) {
+ switch (CIID) {
+#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \
+ case Intrinsic::INTRINSIC: \
+ return ROUND_MODE == 1;
+#define FUNCTION INSTRUCTION
+#include "llvm/IR/ConstrainedOps.def"
+ default:
+ llvm_unreachable("Unexpected constrained intrinsic id");
+ }
+}
+
+Instruction *
+PruningFunctionCloner::cloneInstruction(BasicBlock::const_iterator II) {
+ const Instruction &OldInst = *II;
+ Instruction *NewInst = nullptr;
+ if (HostFuncIsStrictFP) {
+ Intrinsic::ID CIID = getConstrainedIntrinsicID(OldInst);
+ if (CIID != Intrinsic::not_intrinsic) {
+ // Instead of cloning the instruction, a call to constrained intrinsic
+ // should be created.
+ // Assume the first arguments of constrained intrinsics are the same as
+ // the operands of original instruction.
+
+ // Determine overloaded types of the intrinsic.
+ SmallVector<Type *, 2> TParams;
+ SmallVector<Intrinsic::IITDescriptor, 8> Descriptor;
+ getIntrinsicInfoTableEntries(CIID, Descriptor);
+ for (unsigned I = 0, E = Descriptor.size(); I != E; ++I) {
+ Intrinsic::IITDescriptor Operand = Descriptor[I];
+ switch (Operand.Kind) {
+ case Intrinsic::IITDescriptor::Argument:
+ if (Operand.getArgumentKind() !=
+ Intrinsic::IITDescriptor::AK_MatchType) {
+ if (I == 0)
+ TParams.push_back(OldInst.getType());
+ else
+ TParams.push_back(OldInst.getOperand(I - 1)->getType());
+ }
+ break;
+ case Intrinsic::IITDescriptor::SameVecWidthArgument:
+ ++I;
+ break;
+ default:
+ break;
+ }
+ }
+
+ // Create intrinsic call.
+ LLVMContext &Ctx = NewFunc->getContext();
+ Function *IFn =
+ Intrinsic::getDeclaration(NewFunc->getParent(), CIID, TParams);
+ SmallVector<Value *, 4> Args;
+ unsigned NumOperands = OldInst.getNumOperands();
+ if (isa<CallInst>(OldInst))
+ --NumOperands;
+ for (unsigned I = 0; I < NumOperands; ++I) {
+ Value *Op = OldInst.getOperand(I);
+ Args.push_back(Op);
+ }
+ if (const auto *CmpI = dyn_cast<FCmpInst>(&OldInst)) {
+ FCmpInst::Predicate Pred = CmpI->getPredicate();
+ StringRef PredName = FCmpInst::getPredicateName(Pred);
+ Args.push_back(MetadataAsValue::get(Ctx, MDString::get(Ctx, PredName)));
+ }
+
+ // The last arguments of a constrained intrinsic are metadata that
+ // represent rounding mode (absents in some intrinsics) and exception
+ // behavior. The inlined function uses default settings.
+ if (hasRoundingModeOperand(CIID))
+ Args.push_back(
+ MetadataAsValue::get(Ctx, MDString::get(Ctx, "round.tonearest")));
+ Args.push_back(
+ MetadataAsValue::get(Ctx, MDString::get(Ctx, "fpexcept.ignore")));
+
+ NewInst = CallInst::Create(IFn, Args, OldInst.getName() + ".strict");
+ }
+ }
+ if (!NewInst)
+ NewInst = II->clone();
+ return NewInst;
+}
+
+/// The specified block is found to be reachable, clone it and
+/// anything that it can reach.
+void PruningFunctionCloner::CloneBlock(
+ const BasicBlock *BB, BasicBlock::const_iterator StartingInst,
+ std::vector<const BasicBlock *> &ToClone) {
+ WeakTrackingVH &BBEntry = VMap[BB];
+
+ // Have we already cloned this block?
+ if (BBEntry)
+ return;
+
+ // Nope, clone it now.
+ BasicBlock *NewBB;
+ BBEntry = NewBB = BasicBlock::Create(BB->getContext());
+ if (BB->hasName())
+ NewBB->setName(BB->getName() + NameSuffix);
+
+ // It is only legal to clone a function if a block address within that
+ // function is never referenced outside of the function. Given that, we
+ // want to map block addresses from the old function to block addresses in
+ // the clone. (This is different from the generic ValueMapper
+ // implementation, which generates an invalid blockaddress when
+ // cloning a function.)
+ //
+ // Note that we don't need to fix the mapping for unreachable blocks;
+ // the default mapping there is safe.
+ if (BB->hasAddressTaken()) {
+ Constant *OldBBAddr = BlockAddress::get(const_cast<Function *>(OldFunc),
+ const_cast<BasicBlock *>(BB));
+ VMap[OldBBAddr] = BlockAddress::get(NewFunc, NewBB);
+ }
+
+ bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false;
+ bool hasMemProfMetadata = false;
+
+ // Loop over all instructions, and copy them over, DCE'ing as we go. This
+ // loop doesn't include the terminator.
+ for (BasicBlock::const_iterator II = StartingInst, IE = --BB->end(); II != IE;
+ ++II) {
+
+ Instruction *NewInst = cloneInstruction(II);
+
+ if (HostFuncIsStrictFP) {
+ // All function calls in the inlined function must get 'strictfp'
+ // attribute to prevent undesirable optimizations.
+ if (auto *Call = dyn_cast<CallInst>(NewInst))
+ Call->addFnAttr(Attribute::StrictFP);
+ }
+
+ // Eagerly remap operands to the newly cloned instruction, except for PHI
+ // nodes for which we defer processing until we update the CFG. Also defer
+ // debug intrinsic processing because they may contain use-before-defs.
+ if (!isa<PHINode>(NewInst) && !isa<DbgVariableIntrinsic>(NewInst)) {
+ RemapInstruction(NewInst, VMap,
+ ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
+
+ // If we can simplify this instruction to some other value, simply add
+ // a mapping to that value rather than inserting a new instruction into
+ // the basic block.
+ if (Value *V =
+ simplifyInstruction(NewInst, BB->getModule()->getDataLayout())) {
+ // On the off-chance that this simplifies to an instruction in the old
+ // function, map it back into the new function.
+ if (NewFunc != OldFunc)
+ if (Value *MappedV = VMap.lookup(V))
+ V = MappedV;
+
+ if (!NewInst->mayHaveSideEffects()) {
+ VMap[&*II] = V;
+ NewInst->deleteValue();
+ continue;
+ }
+ }
+ }
+
+ if (II->hasName())
+ NewInst->setName(II->getName() + NameSuffix);
+ VMap[&*II] = NewInst; // Add instruction map to value.
+ NewInst->insertInto(NewBB, NewBB->end());
+ if (isa<CallInst>(II) && !II->isDebugOrPseudoInst()) {
+ hasCalls = true;
+ hasMemProfMetadata |= II->hasMetadata(LLVMContext::MD_memprof);
+ }
+
+ if (CodeInfo) {
+ CodeInfo->OrigVMap[&*II] = NewInst;
+ if (auto *CB = dyn_cast<CallBase>(&*II))
+ if (CB->hasOperandBundles())
+ CodeInfo->OperandBundleCallSites.push_back(NewInst);
+ }
+
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
+ if (isa<ConstantInt>(AI->getArraySize()))
+ hasStaticAllocas = true;
+ else
+ hasDynamicAllocas = true;
+ }
+ }
+
+ // Finally, clone over the terminator.
+ const Instruction *OldTI = BB->getTerminator();
+ bool TerminatorDone = false;
+ if (const BranchInst *BI = dyn_cast<BranchInst>(OldTI)) {
+ if (BI->isConditional()) {
+ // If the condition was a known constant in the callee...
+ ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition());
+ // Or is a known constant in the caller...
+ if (!Cond) {
+ Value *V = VMap.lookup(BI->getCondition());
+ Cond = dyn_cast_or_null<ConstantInt>(V);
+ }
+
+ // Constant fold to uncond branch!
+ if (Cond) {
+ BasicBlock *Dest = BI->getSuccessor(!Cond->getZExtValue());
+ VMap[OldTI] = BranchInst::Create(Dest, NewBB);
+ ToClone.push_back(Dest);
+ TerminatorDone = true;
+ }
+ }
+ } else if (const SwitchInst *SI = dyn_cast<SwitchInst>(OldTI)) {
+ // If switching on a value known constant in the caller.
+ ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition());
+ if (!Cond) { // Or known constant after constant prop in the callee...
+ Value *V = VMap.lookup(SI->getCondition());
+ Cond = dyn_cast_or_null<ConstantInt>(V);
+ }
+ if (Cond) { // Constant fold to uncond branch!
+ SwitchInst::ConstCaseHandle Case = *SI->findCaseValue(Cond);
+ BasicBlock *Dest = const_cast<BasicBlock *>(Case.getCaseSuccessor());
+ VMap[OldTI] = BranchInst::Create(Dest, NewBB);
+ ToClone.push_back(Dest);
+ TerminatorDone = true;
+ }
+ }
+
+ if (!TerminatorDone) {
+ Instruction *NewInst = OldTI->clone();
+ if (OldTI->hasName())
+ NewInst->setName(OldTI->getName() + NameSuffix);
+ NewInst->insertInto(NewBB, NewBB->end());
+ VMap[OldTI] = NewInst; // Add instruction map to value.
+
+ if (CodeInfo) {
+ CodeInfo->OrigVMap[OldTI] = NewInst;
+ if (auto *CB = dyn_cast<CallBase>(OldTI))
+ if (CB->hasOperandBundles())
+ CodeInfo->OperandBundleCallSites.push_back(NewInst);
+ }
+
+ // Recursively clone any reachable successor blocks.
+ append_range(ToClone, successors(BB->getTerminator()));
+ }
+
+ if (CodeInfo) {
+ CodeInfo->ContainsCalls |= hasCalls;
+ CodeInfo->ContainsMemProfMetadata |= hasMemProfMetadata;
+ CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas;
+ CodeInfo->ContainsDynamicAllocas |=
+ hasStaticAllocas && BB != &BB->getParent()->front();
+ }
+}
+
+/// This works like CloneAndPruneFunctionInto, except that it does not clone the
+/// entire function. Instead it starts at an instruction provided by the caller
+/// and copies (and prunes) only the code reachable from that instruction.
+void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
+ const Instruction *StartingInst,
+ ValueToValueMapTy &VMap,
+ bool ModuleLevelChanges,
+ SmallVectorImpl<ReturnInst *> &Returns,
+ const char *NameSuffix,
+ ClonedCodeInfo *CodeInfo) {
+ assert(NameSuffix && "NameSuffix cannot be null!");
+
+ ValueMapTypeRemapper *TypeMapper = nullptr;
+ ValueMaterializer *Materializer = nullptr;
+
+#ifndef NDEBUG
+ // If the cloning starts at the beginning of the function, verify that
+ // the function arguments are mapped.
+ if (!StartingInst)
+ for (const Argument &II : OldFunc->args())
+ assert(VMap.count(&II) && "No mapping from source argument specified!");
+#endif
+
+ PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges,
+ NameSuffix, CodeInfo);
+ const BasicBlock *StartingBB;
+ if (StartingInst)
+ StartingBB = StartingInst->getParent();
+ else {
+ StartingBB = &OldFunc->getEntryBlock();
+ StartingInst = &StartingBB->front();
+ }
+
+ // Collect debug intrinsics for remapping later.
+ SmallVector<const DbgVariableIntrinsic *, 8> DbgIntrinsics;
+ for (const auto &BB : *OldFunc) {
+ for (const auto &I : BB) {
+ if (const auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I))
+ DbgIntrinsics.push_back(DVI);
+ }
+ }
+
+ // Clone the entry block, and anything recursively reachable from it.
+ std::vector<const BasicBlock *> CloneWorklist;
+ PFC.CloneBlock(StartingBB, StartingInst->getIterator(), CloneWorklist);
+ while (!CloneWorklist.empty()) {
+ const BasicBlock *BB = CloneWorklist.back();
+ CloneWorklist.pop_back();
+ PFC.CloneBlock(BB, BB->begin(), CloneWorklist);
+ }
+
+ // Loop over all of the basic blocks in the old function. If the block was
+ // reachable, we have cloned it and the old block is now in the value map:
+ // insert it into the new function in the right order. If not, ignore it.
+ //
+ // Defer PHI resolution until rest of function is resolved.
+ SmallVector<const PHINode *, 16> PHIToResolve;
+ for (const BasicBlock &BI : *OldFunc) {
+ Value *V = VMap.lookup(&BI);
+ BasicBlock *NewBB = cast_or_null<BasicBlock>(V);
+ if (!NewBB)
+ continue; // Dead block.
+
+ // Add the new block to the new function.
+ NewFunc->insert(NewFunc->end(), NewBB);
+
+ // Handle PHI nodes specially, as we have to remove references to dead
+ // blocks.
+ for (const PHINode &PN : BI.phis()) {
+ // PHI nodes may have been remapped to non-PHI nodes by the caller or
+ // during the cloning process.
+ if (isa<PHINode>(VMap[&PN]))
+ PHIToResolve.push_back(&PN);
+ else
+ break;
+ }
+
+ // Finally, remap the terminator instructions, as those can't be remapped
+ // until all BBs are mapped.
+ RemapInstruction(NewBB->getTerminator(), VMap,
+ ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
+ TypeMapper, Materializer);
+ }
+
+ // Defer PHI resolution until rest of function is resolved, PHI resolution
+ // requires the CFG to be up-to-date.
+ for (unsigned phino = 0, e = PHIToResolve.size(); phino != e;) {
+ const PHINode *OPN = PHIToResolve[phino];
+ unsigned NumPreds = OPN->getNumIncomingValues();
+ const BasicBlock *OldBB = OPN->getParent();
+ BasicBlock *NewBB = cast<BasicBlock>(VMap[OldBB]);
+
+ // Map operands for blocks that are live and remove operands for blocks
+ // that are dead.
+ for (; phino != PHIToResolve.size() &&
+ PHIToResolve[phino]->getParent() == OldBB;
+ ++phino) {
+ OPN = PHIToResolve[phino];
+ PHINode *PN = cast<PHINode>(VMap[OPN]);
+ for (unsigned pred = 0, e = NumPreds; pred != e; ++pred) {
+ Value *V = VMap.lookup(PN->getIncomingBlock(pred));
+ if (BasicBlock *MappedBlock = cast_or_null<BasicBlock>(V)) {
+ Value *InVal =
+ MapValue(PN->getIncomingValue(pred), VMap,
+ ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
+ assert(InVal && "Unknown input value?");
+ PN->setIncomingValue(pred, InVal);
+ PN->setIncomingBlock(pred, MappedBlock);
+ } else {
+ PN->removeIncomingValue(pred, false);
+ --pred; // Revisit the next entry.
+ --e;
+ }
+ }
+ }
+
+ // The loop above has removed PHI entries for those blocks that are dead
+ // and has updated others. However, if a block is live (i.e. copied over)
+ // but its terminator has been changed to not go to this block, then our
+ // phi nodes will have invalid entries. Update the PHI nodes in this
+ // case.
+ PHINode *PN = cast<PHINode>(NewBB->begin());
+ NumPreds = pred_size(NewBB);
+ if (NumPreds != PN->getNumIncomingValues()) {
+ assert(NumPreds < PN->getNumIncomingValues());
+ // Count how many times each predecessor comes to this block.
+ std::map<BasicBlock *, unsigned> PredCount;
+ for (BasicBlock *Pred : predecessors(NewBB))
+ --PredCount[Pred];
+
+ // Figure out how many entries to remove from each PHI.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ ++PredCount[PN->getIncomingBlock(i)];
+
+ // At this point, the excess predecessor entries are positive in the
+ // map. Loop over all of the PHIs and remove excess predecessor
+ // entries.
+ BasicBlock::iterator I = NewBB->begin();
+ for (; (PN = dyn_cast<PHINode>(I)); ++I) {
+ for (const auto &PCI : PredCount) {
+ BasicBlock *Pred = PCI.first;
+ for (unsigned NumToRemove = PCI.second; NumToRemove; --NumToRemove)
+ PN->removeIncomingValue(Pred, false);
+ }
+ }
+ }
+
+ // If the loops above have made these phi nodes have 0 or 1 operand,
+ // replace them with poison or the input value. We must do this for
+ // correctness, because 0-operand phis are not valid.
+ PN = cast<PHINode>(NewBB->begin());
+ if (PN->getNumIncomingValues() == 0) {
+ BasicBlock::iterator I = NewBB->begin();
+ BasicBlock::const_iterator OldI = OldBB->begin();
+ while ((PN = dyn_cast<PHINode>(I++))) {
+ Value *NV = PoisonValue::get(PN->getType());
+ PN->replaceAllUsesWith(NV);
+ assert(VMap[&*OldI] == PN && "VMap mismatch");
+ VMap[&*OldI] = NV;
+ PN->eraseFromParent();
+ ++OldI;
+ }
+ }
+ }
+
+ // Make a second pass over the PHINodes now that all of them have been
+ // remapped into the new function, simplifying the PHINode and performing any
+ // recursive simplifications exposed. This will transparently update the
+ // WeakTrackingVH in the VMap. Notably, we rely on that so that if we coalesce
+ // two PHINodes, the iteration over the old PHIs remains valid, and the
+ // mapping will just map us to the new node (which may not even be a PHI
+ // node).
+ const DataLayout &DL = NewFunc->getParent()->getDataLayout();
+ SmallSetVector<const Value *, 8> Worklist;
+ for (unsigned Idx = 0, Size = PHIToResolve.size(); Idx != Size; ++Idx)
+ if (isa<PHINode>(VMap[PHIToResolve[Idx]]))
+ Worklist.insert(PHIToResolve[Idx]);
+
+ // Note that we must test the size on each iteration, the worklist can grow.
+ for (unsigned Idx = 0; Idx != Worklist.size(); ++Idx) {
+ const Value *OrigV = Worklist[Idx];
+ auto *I = dyn_cast_or_null<Instruction>(VMap.lookup(OrigV));
+ if (!I)
+ continue;
+
+ // Skip over non-intrinsic callsites, we don't want to remove any nodes from
+ // the CGSCC.
+ CallBase *CB = dyn_cast<CallBase>(I);
+ if (CB && CB->getCalledFunction() &&
+ !CB->getCalledFunction()->isIntrinsic())
+ continue;
+
+ // See if this instruction simplifies.
+ Value *SimpleV = simplifyInstruction(I, DL);
+ if (!SimpleV)
+ continue;
+
+ // Stash away all the uses of the old instruction so we can check them for
+ // recursive simplifications after a RAUW. This is cheaper than checking all
+ // uses of To on the recursive step in most cases.
+ for (const User *U : OrigV->users())
+ Worklist.insert(cast<Instruction>(U));
+
+ // Replace the instruction with its simplified value.
+ I->replaceAllUsesWith(SimpleV);
+
+ // If the original instruction had no side effects, remove it.
+ if (isInstructionTriviallyDead(I))
+ I->eraseFromParent();
+ else
+ VMap[OrigV] = I;
+ }
+
+ // Remap debug intrinsic operands now that all values have been mapped.
+ // Doing this now (late) preserves use-before-defs in debug intrinsics. If
+ // we didn't do this, ValueAsMetadata(use-before-def) operands would be
+ // replaced by empty metadata. This would signal later cleanup passes to
+ // remove the debug intrinsics, potentially causing incorrect locations.
+ for (const auto *DVI : DbgIntrinsics) {
+ if (DbgVariableIntrinsic *NewDVI =
+ cast_or_null<DbgVariableIntrinsic>(VMap.lookup(DVI)))
+ RemapInstruction(NewDVI, VMap,
+ ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
+ TypeMapper, Materializer);
+ }
+
+ // Simplify conditional branches and switches with a constant operand. We try
+ // to prune these out when cloning, but if the simplification required
+ // looking through PHI nodes, those are only available after forming the full
+ // basic block. That may leave some here, and we still want to prune the dead
+ // code as early as possible.
+ Function::iterator Begin = cast<BasicBlock>(VMap[StartingBB])->getIterator();
+ for (BasicBlock &BB : make_range(Begin, NewFunc->end()))
+ ConstantFoldTerminator(&BB);
+
+ // Some blocks may have become unreachable as a result. Find and delete them.
+ {
+ SmallPtrSet<BasicBlock *, 16> ReachableBlocks;
+ SmallVector<BasicBlock *, 16> Worklist;
+ Worklist.push_back(&*Begin);
+ while (!Worklist.empty()) {
+ BasicBlock *BB = Worklist.pop_back_val();
+ if (ReachableBlocks.insert(BB).second)
+ append_range(Worklist, successors(BB));
+ }
+
+ SmallVector<BasicBlock *, 16> UnreachableBlocks;
+ for (BasicBlock &BB : make_range(Begin, NewFunc->end()))
+ if (!ReachableBlocks.contains(&BB))
+ UnreachableBlocks.push_back(&BB);
+ DeleteDeadBlocks(UnreachableBlocks);
+ }
+
+ // Now that the inlined function body has been fully constructed, go through
+ // and zap unconditional fall-through branches. This happens all the time when
+ // specializing code: code specialization turns conditional branches into
+ // uncond branches, and this code folds them.
+ Function::iterator I = Begin;
+ while (I != NewFunc->end()) {
+ BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator());
+ if (!BI || BI->isConditional()) {
+ ++I;
+ continue;
+ }
+
+ BasicBlock *Dest = BI->getSuccessor(0);
+ if (!Dest->getSinglePredecessor()) {
+ ++I;
+ continue;
+ }
+
+ // We shouldn't be able to get single-entry PHI nodes here, as instsimplify
+ // above should have zapped all of them..
+ assert(!isa<PHINode>(Dest->begin()));
+
+ // We know all single-entry PHI nodes in the inlined function have been
+ // removed, so we just need to splice the blocks.
+ BI->eraseFromParent();
+
+ // Make all PHI nodes that referred to Dest now refer to I as their source.
+ Dest->replaceAllUsesWith(&*I);
+
+ // Move all the instructions in the succ to the pred.
+ I->splice(I->end(), Dest);
+
+ // Remove the dest block.
+ Dest->eraseFromParent();
+
+ // Do not increment I, iteratively merge all things this block branches to.
+ }
+
+ // Make a final pass over the basic blocks from the old function to gather
+ // any return instructions which survived folding. We have to do this here
+ // because we can iteratively remove and merge returns above.
+ for (Function::iterator I = cast<BasicBlock>(VMap[StartingBB])->getIterator(),
+ E = NewFunc->end();
+ I != E; ++I)
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(I->getTerminator()))
+ Returns.push_back(RI);
+}
+
+/// This works exactly like CloneFunctionInto,
+/// except that it does some simple constant prop and DCE on the fly. The
+/// effect of this is to copy significantly less code in cases where (for
+/// example) a function call with constant arguments is inlined, and those
+/// constant arguments cause a significant amount of code in the callee to be
+/// dead. Since this doesn't produce an exact copy of the input, it can't be
+/// used for things like CloneFunction or CloneModule.
+void llvm::CloneAndPruneFunctionInto(
+ Function *NewFunc, const Function *OldFunc, ValueToValueMapTy &VMap,
+ bool ModuleLevelChanges, SmallVectorImpl<ReturnInst *> &Returns,
+ const char *NameSuffix, ClonedCodeInfo *CodeInfo) {
+ CloneAndPruneIntoFromInst(NewFunc, OldFunc, &OldFunc->front().front(), VMap,
+ ModuleLevelChanges, Returns, NameSuffix, CodeInfo);
+}
+
+/// Remaps instructions in \p Blocks using the mapping in \p VMap.
+void llvm::remapInstructionsInBlocks(
+ const SmallVectorImpl<BasicBlock *> &Blocks, ValueToValueMapTy &VMap) {
+ // Rewrite the code to refer to itself.
+ for (auto *BB : Blocks)
+ for (auto &Inst : *BB)
+ RemapInstruction(&Inst, VMap,
+ RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
+}
+
+/// Clones a loop \p OrigLoop. Returns the loop and the blocks in \p
+/// Blocks.
+///
+/// Updates LoopInfo and DominatorTree assuming the loop is dominated by block
+/// \p LoopDomBB. Insert the new blocks before block specified in \p Before.
+Loop *llvm::cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB,
+ Loop *OrigLoop, ValueToValueMapTy &VMap,
+ const Twine &NameSuffix, LoopInfo *LI,
+ DominatorTree *DT,
+ SmallVectorImpl<BasicBlock *> &Blocks) {
+ Function *F = OrigLoop->getHeader()->getParent();
+ Loop *ParentLoop = OrigLoop->getParentLoop();
+ DenseMap<Loop *, Loop *> LMap;
+
+ Loop *NewLoop = LI->AllocateLoop();
+ LMap[OrigLoop] = NewLoop;
+ if (ParentLoop)
+ ParentLoop->addChildLoop(NewLoop);
+ else
+ LI->addTopLevelLoop(NewLoop);
+
+ BasicBlock *OrigPH = OrigLoop->getLoopPreheader();
+ assert(OrigPH && "No preheader");
+ BasicBlock *NewPH = CloneBasicBlock(OrigPH, VMap, NameSuffix, F);
+ // To rename the loop PHIs.
+ VMap[OrigPH] = NewPH;
+ Blocks.push_back(NewPH);
+
+ // Update LoopInfo.
+ if (ParentLoop)
+ ParentLoop->addBasicBlockToLoop(NewPH, *LI);
+
+ // Update DominatorTree.
+ DT->addNewBlock(NewPH, LoopDomBB);
+
+ for (Loop *CurLoop : OrigLoop->getLoopsInPreorder()) {
+ Loop *&NewLoop = LMap[CurLoop];
+ if (!NewLoop) {
+ NewLoop = LI->AllocateLoop();
+
+ // Establish the parent/child relationship.
+ Loop *OrigParent = CurLoop->getParentLoop();
+ assert(OrigParent && "Could not find the original parent loop");
+ Loop *NewParentLoop = LMap[OrigParent];
+ assert(NewParentLoop && "Could not find the new parent loop");
+
+ NewParentLoop->addChildLoop(NewLoop);
+ }
+ }
+
+ for (BasicBlock *BB : OrigLoop->getBlocks()) {
+ Loop *CurLoop = LI->getLoopFor(BB);
+ Loop *&NewLoop = LMap[CurLoop];
+ assert(NewLoop && "Expecting new loop to be allocated");
+
+ BasicBlock *NewBB = CloneBasicBlock(BB, VMap, NameSuffix, F);
+ VMap[BB] = NewBB;
+
+ // Update LoopInfo.
+ NewLoop->addBasicBlockToLoop(NewBB, *LI);
+
+ // Add DominatorTree node. After seeing all blocks, update to correct
+ // IDom.
+ DT->addNewBlock(NewBB, NewPH);
+
+ Blocks.push_back(NewBB);
+ }
+
+ for (BasicBlock *BB : OrigLoop->getBlocks()) {
+ // Update loop headers.
+ Loop *CurLoop = LI->getLoopFor(BB);
+ if (BB == CurLoop->getHeader())
+ LMap[CurLoop]->moveToHeader(cast<BasicBlock>(VMap[BB]));
+
+ // Update DominatorTree.
+ BasicBlock *IDomBB = DT->getNode(BB)->getIDom()->getBlock();
+ DT->changeImmediateDominator(cast<BasicBlock>(VMap[BB]),
+ cast<BasicBlock>(VMap[IDomBB]));
+ }
+
+ // Move them physically from the end of the block list.
+ F->splice(Before->getIterator(), F, NewPH->getIterator());
+ F->splice(Before->getIterator(), F, NewLoop->getHeader()->getIterator(),
+ F->end());
+
+ return NewLoop;
+}
+
+/// Duplicate non-Phi instructions from the beginning of block up to
+/// StopAt instruction into a split block between BB and its predecessor.
+BasicBlock *llvm::DuplicateInstructionsInSplitBetween(
+ BasicBlock *BB, BasicBlock *PredBB, Instruction *StopAt,
+ ValueToValueMapTy &ValueMapping, DomTreeUpdater &DTU) {
+
+ assert(count(successors(PredBB), BB) == 1 &&
+ "There must be a single edge between PredBB and BB!");
+ // We are going to have to map operands from the original BB block to the new
+ // copy of the block 'NewBB'. If there are PHI nodes in BB, evaluate them to
+ // account for entry from PredBB.
+ BasicBlock::iterator BI = BB->begin();
+ for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
+ ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
+
+ BasicBlock *NewBB = SplitEdge(PredBB, BB);
+ NewBB->setName(PredBB->getName() + ".split");
+ Instruction *NewTerm = NewBB->getTerminator();
+
+ // FIXME: SplitEdge does not yet take a DTU, so we include the split edge
+ // in the update set here.
+ DTU.applyUpdates({{DominatorTree::Delete, PredBB, BB},
+ {DominatorTree::Insert, PredBB, NewBB},
+ {DominatorTree::Insert, NewBB, BB}});
+
+ // Clone the non-phi instructions of BB into NewBB, keeping track of the
+ // mapping and using it to remap operands in the cloned instructions.
+ // Stop once we see the terminator too. This covers the case where BB's
+ // terminator gets replaced and StopAt == BB's terminator.
+ for (; StopAt != &*BI && BB->getTerminator() != &*BI; ++BI) {
+ Instruction *New = BI->clone();
+ New->setName(BI->getName());
+ New->insertBefore(NewTerm);
+ ValueMapping[&*BI] = New;
+
+ // Remap operands to patch up intra-block references.
+ for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
+ if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
+ auto I = ValueMapping.find(Inst);
+ if (I != ValueMapping.end())
+ New->setOperand(i, I->second);
+ }
+ }
+
+ return NewBB;
+}
+
+void llvm::cloneNoAliasScopes(ArrayRef<MDNode *> NoAliasDeclScopes,
+ DenseMap<MDNode *, MDNode *> &ClonedScopes,
+ StringRef Ext, LLVMContext &Context) {
+ MDBuilder MDB(Context);
+
+ for (auto *ScopeList : NoAliasDeclScopes) {
+ for (const auto &MDOperand : ScopeList->operands()) {
+ if (MDNode *MD = dyn_cast<MDNode>(MDOperand)) {
+ AliasScopeNode SNANode(MD);
+
+ std::string Name;
+ auto ScopeName = SNANode.getName();
+ if (!ScopeName.empty())
+ Name = (Twine(ScopeName) + ":" + Ext).str();
+ else
+ Name = std::string(Ext);
+
+ MDNode *NewScope = MDB.createAnonymousAliasScope(
+ const_cast<MDNode *>(SNANode.getDomain()), Name);
+ ClonedScopes.insert(std::make_pair(MD, NewScope));
+ }
+ }
+ }
+}
+
+void llvm::adaptNoAliasScopes(Instruction *I,
+ const DenseMap<MDNode *, MDNode *> &ClonedScopes,
+ LLVMContext &Context) {
+ auto CloneScopeList = [&](const MDNode *ScopeList) -> MDNode * {
+ bool NeedsReplacement = false;
+ SmallVector<Metadata *, 8> NewScopeList;
+ for (const auto &MDOp : ScopeList->operands()) {
+ if (MDNode *MD = dyn_cast<MDNode>(MDOp)) {
+ if (auto *NewMD = ClonedScopes.lookup(MD)) {
+ NewScopeList.push_back(NewMD);
+ NeedsReplacement = true;
+ continue;
+ }
+ NewScopeList.push_back(MD);
+ }
+ }
+ if (NeedsReplacement)
+ return MDNode::get(Context, NewScopeList);
+ return nullptr;
+ };
+
+ if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(I))
+ if (auto *NewScopeList = CloneScopeList(Decl->getScopeList()))
+ Decl->setScopeList(NewScopeList);
+
+ auto replaceWhenNeeded = [&](unsigned MD_ID) {
+ if (const MDNode *CSNoAlias = I->getMetadata(MD_ID))
+ if (auto *NewScopeList = CloneScopeList(CSNoAlias))
+ I->setMetadata(MD_ID, NewScopeList);
+ };
+ replaceWhenNeeded(LLVMContext::MD_noalias);
+ replaceWhenNeeded(LLVMContext::MD_alias_scope);
+}
+
+void llvm::cloneAndAdaptNoAliasScopes(ArrayRef<MDNode *> NoAliasDeclScopes,
+ ArrayRef<BasicBlock *> NewBlocks,
+ LLVMContext &Context, StringRef Ext) {
+ if (NoAliasDeclScopes.empty())
+ return;
+
+ DenseMap<MDNode *, MDNode *> ClonedScopes;
+ LLVM_DEBUG(dbgs() << "cloneAndAdaptNoAliasScopes: cloning "
+ << NoAliasDeclScopes.size() << " node(s)\n");
+
+ cloneNoAliasScopes(NoAliasDeclScopes, ClonedScopes, Ext, Context);
+ // Identify instructions using metadata that needs adaptation
+ for (BasicBlock *NewBlock : NewBlocks)
+ for (Instruction &I : *NewBlock)
+ adaptNoAliasScopes(&I, ClonedScopes, Context);
+}
+
+void llvm::cloneAndAdaptNoAliasScopes(ArrayRef<MDNode *> NoAliasDeclScopes,
+ Instruction *IStart, Instruction *IEnd,
+ LLVMContext &Context, StringRef Ext) {
+ if (NoAliasDeclScopes.empty())
+ return;
+
+ DenseMap<MDNode *, MDNode *> ClonedScopes;
+ LLVM_DEBUG(dbgs() << "cloneAndAdaptNoAliasScopes: cloning "
+ << NoAliasDeclScopes.size() << " node(s)\n");
+
+ cloneNoAliasScopes(NoAliasDeclScopes, ClonedScopes, Ext, Context);
+ // Identify instructions using metadata that needs adaptation
+ assert(IStart->getParent() == IEnd->getParent() && "different basic block ?");
+ auto ItStart = IStart->getIterator();
+ auto ItEnd = IEnd->getIterator();
+ ++ItEnd; // IEnd is included, increment ItEnd to get the end of the range
+ for (auto &I : llvm::make_range(ItStart, ItEnd))
+ adaptNoAliasScopes(&I, ClonedScopes, Context);
+}
+
+void llvm::identifyNoAliasScopesToClone(
+ ArrayRef<BasicBlock *> BBs, SmallVectorImpl<MDNode *> &NoAliasDeclScopes) {
+ for (BasicBlock *BB : BBs)
+ for (Instruction &I : *BB)
+ if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I))
+ NoAliasDeclScopes.push_back(Decl->getScopeList());
+}
+
+void llvm::identifyNoAliasScopesToClone(
+ BasicBlock::iterator Start, BasicBlock::iterator End,
+ SmallVectorImpl<MDNode *> &NoAliasDeclScopes) {
+ for (Instruction &I : make_range(Start, End))
+ if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I))
+ NoAliasDeclScopes.push_back(Decl->getScopeList());
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/CloneModule.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/CloneModule.cpp
new file mode 100644
index 0000000000..55e051298a
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/CloneModule.cpp
@@ -0,0 +1,218 @@
+//===- CloneModule.cpp - Clone an entire module ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CloneModule interface which makes a copy of an
+// entire module.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+using namespace llvm;
+
+namespace llvm {
+class Constant;
+}
+
+static void copyComdat(GlobalObject *Dst, const GlobalObject *Src) {
+ const Comdat *SC = Src->getComdat();
+ if (!SC)
+ return;
+ Comdat *DC = Dst->getParent()->getOrInsertComdat(SC->getName());
+ DC->setSelectionKind(SC->getSelectionKind());
+ Dst->setComdat(DC);
+}
+
+/// This is not as easy as it might seem because we have to worry about making
+/// copies of global variables and functions, and making their (initializers and
+/// references, respectively) refer to the right globals.
+///
+std::unique_ptr<Module> llvm::CloneModule(const Module &M) {
+ // Create the value map that maps things from the old module over to the new
+ // module.
+ ValueToValueMapTy VMap;
+ return CloneModule(M, VMap);
+}
+
+std::unique_ptr<Module> llvm::CloneModule(const Module &M,
+ ValueToValueMapTy &VMap) {
+ return CloneModule(M, VMap, [](const GlobalValue *GV) { return true; });
+}
+
+std::unique_ptr<Module> llvm::CloneModule(
+ const Module &M, ValueToValueMapTy &VMap,
+ function_ref<bool(const GlobalValue *)> ShouldCloneDefinition) {
+ // First off, we need to create the new module.
+ std::unique_ptr<Module> New =
+ std::make_unique<Module>(M.getModuleIdentifier(), M.getContext());
+ New->setSourceFileName(M.getSourceFileName());
+ New->setDataLayout(M.getDataLayout());
+ New->setTargetTriple(M.getTargetTriple());
+ New->setModuleInlineAsm(M.getModuleInlineAsm());
+
+ // Loop over all of the global variables, making corresponding globals in the
+ // new module. Here we add them to the VMap and to the new Module. We
+ // don't worry about attributes or initializers, they will come later.
+ //
+ for (const GlobalVariable &I : M.globals()) {
+ GlobalVariable *NewGV = new GlobalVariable(
+ *New, I.getValueType(), I.isConstant(), I.getLinkage(),
+ (Constant *)nullptr, I.getName(), (GlobalVariable *)nullptr,
+ I.getThreadLocalMode(), I.getType()->getAddressSpace());
+ NewGV->copyAttributesFrom(&I);
+ VMap[&I] = NewGV;
+ }
+
+ // Loop over the functions in the module, making external functions as before
+ for (const Function &I : M) {
+ Function *NF =
+ Function::Create(cast<FunctionType>(I.getValueType()), I.getLinkage(),
+ I.getAddressSpace(), I.getName(), New.get());
+ NF->copyAttributesFrom(&I);
+ VMap[&I] = NF;
+ }
+
+ // Loop over the aliases in the module
+ for (const GlobalAlias &I : M.aliases()) {
+ if (!ShouldCloneDefinition(&I)) {
+ // An alias cannot act as an external reference, so we need to create
+ // either a function or a global variable depending on the value type.
+ // FIXME: Once pointee types are gone we can probably pick one or the
+ // other.
+ GlobalValue *GV;
+ if (I.getValueType()->isFunctionTy())
+ GV = Function::Create(cast<FunctionType>(I.getValueType()),
+ GlobalValue::ExternalLinkage, I.getAddressSpace(),
+ I.getName(), New.get());
+ else
+ GV = new GlobalVariable(*New, I.getValueType(), false,
+ GlobalValue::ExternalLinkage, nullptr,
+ I.getName(), nullptr, I.getThreadLocalMode(),
+ I.getType()->getAddressSpace());
+ VMap[&I] = GV;
+ // We do not copy attributes (mainly because copying between different
+ // kinds of globals is forbidden), but this is generally not required for
+ // correctness.
+ continue;
+ }
+ auto *GA = GlobalAlias::create(I.getValueType(),
+ I.getType()->getPointerAddressSpace(),
+ I.getLinkage(), I.getName(), New.get());
+ GA->copyAttributesFrom(&I);
+ VMap[&I] = GA;
+ }
+
+ for (const GlobalIFunc &I : M.ifuncs()) {
+ // Defer setting the resolver function until after functions are cloned.
+ auto *GI =
+ GlobalIFunc::create(I.getValueType(), I.getAddressSpace(),
+ I.getLinkage(), I.getName(), nullptr, New.get());
+ GI->copyAttributesFrom(&I);
+ VMap[&I] = GI;
+ }
+
+ // Now that all of the things that global variable initializer can refer to
+ // have been created, loop through and copy the global variable referrers
+ // over... We also set the attributes on the global now.
+ //
+ for (const GlobalVariable &G : M.globals()) {
+ GlobalVariable *GV = cast<GlobalVariable>(VMap[&G]);
+
+ SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
+ G.getAllMetadata(MDs);
+ for (auto MD : MDs)
+ GV->addMetadata(MD.first, *MapMetadata(MD.second, VMap));
+
+ if (G.isDeclaration())
+ continue;
+
+ if (!ShouldCloneDefinition(&G)) {
+ // Skip after setting the correct linkage for an external reference.
+ GV->setLinkage(GlobalValue::ExternalLinkage);
+ continue;
+ }
+ if (G.hasInitializer())
+ GV->setInitializer(MapValue(G.getInitializer(), VMap));
+
+ copyComdat(GV, &G);
+ }
+
+ // Similarly, copy over function bodies now...
+ //
+ for (const Function &I : M) {
+ Function *F = cast<Function>(VMap[&I]);
+
+ if (I.isDeclaration()) {
+ // Copy over metadata for declarations since we're not doing it below in
+ // CloneFunctionInto().
+ SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
+ I.getAllMetadata(MDs);
+ for (auto MD : MDs)
+ F->addMetadata(MD.first, *MapMetadata(MD.second, VMap));
+ continue;
+ }
+
+ if (!ShouldCloneDefinition(&I)) {
+ // Skip after setting the correct linkage for an external reference.
+ F->setLinkage(GlobalValue::ExternalLinkage);
+ // Personality function is not valid on a declaration.
+ F->setPersonalityFn(nullptr);
+ continue;
+ }
+
+ Function::arg_iterator DestI = F->arg_begin();
+ for (const Argument &J : I.args()) {
+ DestI->setName(J.getName());
+ VMap[&J] = &*DestI++;
+ }
+
+ SmallVector<ReturnInst *, 8> Returns; // Ignore returns cloned.
+ CloneFunctionInto(F, &I, VMap, CloneFunctionChangeType::ClonedModule,
+ Returns);
+
+ if (I.hasPersonalityFn())
+ F->setPersonalityFn(MapValue(I.getPersonalityFn(), VMap));
+
+ copyComdat(F, &I);
+ }
+
+ // And aliases
+ for (const GlobalAlias &I : M.aliases()) {
+ // We already dealt with undefined aliases above.
+ if (!ShouldCloneDefinition(&I))
+ continue;
+ GlobalAlias *GA = cast<GlobalAlias>(VMap[&I]);
+ if (const Constant *C = I.getAliasee())
+ GA->setAliasee(MapValue(C, VMap));
+ }
+
+ for (const GlobalIFunc &I : M.ifuncs()) {
+ GlobalIFunc *GI = cast<GlobalIFunc>(VMap[&I]);
+ if (const Constant *Resolver = I.getResolver())
+ GI->setResolver(MapValue(Resolver, VMap));
+ }
+
+ // And named metadata....
+ for (const NamedMDNode &NMD : M.named_metadata()) {
+ NamedMDNode *NewNMD = New->getOrInsertNamedMetadata(NMD.getName());
+ for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i)
+ NewNMD->addOperand(MapMetadata(NMD.getOperand(i), VMap));
+ }
+
+ return New;
+}
+
+extern "C" {
+
+LLVMModuleRef LLVMCloneModule(LLVMModuleRef M) {
+ return wrap(CloneModule(*unwrap(M)).release());
+}
+
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/CodeExtractor.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/CodeExtractor.cpp
new file mode 100644
index 0000000000..c1fe10504e
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/CodeExtractor.cpp
@@ -0,0 +1,1894 @@
+//===- CodeExtractor.cpp - Pull code region into a new function -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the interface to tear out a code region, such as an
+// individual loop or a parallel section, into a new function, replacing it with
+// a call to the new function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/CodeExtractor.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/Support/BlockFrequency.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <map>
+#include <utility>
+#include <vector>
+
+using namespace llvm;
+using namespace llvm::PatternMatch;
+using ProfileCount = Function::ProfileCount;
+
+#define DEBUG_TYPE "code-extractor"
+
+// Provide a command-line option to aggregate function arguments into a struct
+// for functions produced by the code extractor. This is useful when converting
+// extracted functions to pthread-based code, as only one argument (void*) can
+// be passed in to pthread_create().
+static cl::opt<bool>
+AggregateArgsOpt("aggregate-extracted-args", cl::Hidden,
+ cl::desc("Aggregate arguments to code-extracted functions"));
+
+/// Test whether a block is valid for extraction.
+static bool isBlockValidForExtraction(const BasicBlock &BB,
+ const SetVector<BasicBlock *> &Result,
+ bool AllowVarArgs, bool AllowAlloca) {
+ // taking the address of a basic block moved to another function is illegal
+ if (BB.hasAddressTaken())
+ return false;
+
+ // don't hoist code that uses another basicblock address, as it's likely to
+ // lead to unexpected behavior, like cross-function jumps
+ SmallPtrSet<User const *, 16> Visited;
+ SmallVector<User const *, 16> ToVisit;
+
+ for (Instruction const &Inst : BB)
+ ToVisit.push_back(&Inst);
+
+ while (!ToVisit.empty()) {
+ User const *Curr = ToVisit.pop_back_val();
+ if (!Visited.insert(Curr).second)
+ continue;
+ if (isa<BlockAddress const>(Curr))
+ return false; // even a reference to self is likely to be not compatible
+
+ if (isa<Instruction>(Curr) && cast<Instruction>(Curr)->getParent() != &BB)
+ continue;
+
+ for (auto const &U : Curr->operands()) {
+ if (auto *UU = dyn_cast<User>(U))
+ ToVisit.push_back(UU);
+ }
+ }
+
+ // If explicitly requested, allow vastart and alloca. For invoke instructions
+ // verify that extraction is valid.
+ for (BasicBlock::const_iterator I = BB.begin(), E = BB.end(); I != E; ++I) {
+ if (isa<AllocaInst>(I)) {
+ if (!AllowAlloca)
+ return false;
+ continue;
+ }
+
+ if (const auto *II = dyn_cast<InvokeInst>(I)) {
+ // Unwind destination (either a landingpad, catchswitch, or cleanuppad)
+ // must be a part of the subgraph which is being extracted.
+ if (auto *UBB = II->getUnwindDest())
+ if (!Result.count(UBB))
+ return false;
+ continue;
+ }
+
+ // All catch handlers of a catchswitch instruction as well as the unwind
+ // destination must be in the subgraph.
+ if (const auto *CSI = dyn_cast<CatchSwitchInst>(I)) {
+ if (auto *UBB = CSI->getUnwindDest())
+ if (!Result.count(UBB))
+ return false;
+ for (const auto *HBB : CSI->handlers())
+ if (!Result.count(const_cast<BasicBlock*>(HBB)))
+ return false;
+ continue;
+ }
+
+ // Make sure that entire catch handler is within subgraph. It is sufficient
+ // to check that catch return's block is in the list.
+ if (const auto *CPI = dyn_cast<CatchPadInst>(I)) {
+ for (const auto *U : CPI->users())
+ if (const auto *CRI = dyn_cast<CatchReturnInst>(U))
+ if (!Result.count(const_cast<BasicBlock*>(CRI->getParent())))
+ return false;
+ continue;
+ }
+
+ // And do similar checks for cleanup handler - the entire handler must be
+ // in subgraph which is going to be extracted. For cleanup return should
+ // additionally check that the unwind destination is also in the subgraph.
+ if (const auto *CPI = dyn_cast<CleanupPadInst>(I)) {
+ for (const auto *U : CPI->users())
+ if (const auto *CRI = dyn_cast<CleanupReturnInst>(U))
+ if (!Result.count(const_cast<BasicBlock*>(CRI->getParent())))
+ return false;
+ continue;
+ }
+ if (const auto *CRI = dyn_cast<CleanupReturnInst>(I)) {
+ if (auto *UBB = CRI->getUnwindDest())
+ if (!Result.count(UBB))
+ return false;
+ continue;
+ }
+
+ if (const CallInst *CI = dyn_cast<CallInst>(I)) {
+ if (const Function *F = CI->getCalledFunction()) {
+ auto IID = F->getIntrinsicID();
+ if (IID == Intrinsic::vastart) {
+ if (AllowVarArgs)
+ continue;
+ else
+ return false;
+ }
+
+ // Currently, we miscompile outlined copies of eh_typid_for. There are
+ // proposals for fixing this in llvm.org/PR39545.
+ if (IID == Intrinsic::eh_typeid_for)
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+/// Build a set of blocks to extract if the input blocks are viable.
+static SetVector<BasicBlock *>
+buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs, DominatorTree *DT,
+ bool AllowVarArgs, bool AllowAlloca) {
+ assert(!BBs.empty() && "The set of blocks to extract must be non-empty");
+ SetVector<BasicBlock *> Result;
+
+ // Loop over the blocks, adding them to our set-vector, and aborting with an
+ // empty set if we encounter invalid blocks.
+ for (BasicBlock *BB : BBs) {
+ // If this block is dead, don't process it.
+ if (DT && !DT->isReachableFromEntry(BB))
+ continue;
+
+ if (!Result.insert(BB))
+ llvm_unreachable("Repeated basic blocks in extraction input");
+ }
+
+ LLVM_DEBUG(dbgs() << "Region front block: " << Result.front()->getName()
+ << '\n');
+
+ for (auto *BB : Result) {
+ if (!isBlockValidForExtraction(*BB, Result, AllowVarArgs, AllowAlloca))
+ return {};
+
+ // Make sure that the first block is not a landing pad.
+ if (BB == Result.front()) {
+ if (BB->isEHPad()) {
+ LLVM_DEBUG(dbgs() << "The first block cannot be an unwind block\n");
+ return {};
+ }
+ continue;
+ }
+
+ // All blocks other than the first must not have predecessors outside of
+ // the subgraph which is being extracted.
+ for (auto *PBB : predecessors(BB))
+ if (!Result.count(PBB)) {
+ LLVM_DEBUG(dbgs() << "No blocks in this region may have entries from "
+ "outside the region except for the first block!\n"
+ << "Problematic source BB: " << BB->getName() << "\n"
+ << "Problematic destination BB: " << PBB->getName()
+ << "\n");
+ return {};
+ }
+ }
+
+ return Result;
+}
+
+CodeExtractor::CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT,
+ bool AggregateArgs, BlockFrequencyInfo *BFI,
+ BranchProbabilityInfo *BPI, AssumptionCache *AC,
+ bool AllowVarArgs, bool AllowAlloca,
+ BasicBlock *AllocationBlock, std::string Suffix)
+ : DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI),
+ BPI(BPI), AC(AC), AllocationBlock(AllocationBlock),
+ AllowVarArgs(AllowVarArgs),
+ Blocks(buildExtractionBlockSet(BBs, DT, AllowVarArgs, AllowAlloca)),
+ Suffix(Suffix) {}
+
+CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs,
+ BlockFrequencyInfo *BFI,
+ BranchProbabilityInfo *BPI, AssumptionCache *AC,
+ std::string Suffix)
+ : DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI),
+ BPI(BPI), AC(AC), AllocationBlock(nullptr), AllowVarArgs(false),
+ Blocks(buildExtractionBlockSet(L.getBlocks(), &DT,
+ /* AllowVarArgs */ false,
+ /* AllowAlloca */ false)),
+ Suffix(Suffix) {}
+
+/// definedInRegion - Return true if the specified value is defined in the
+/// extracted region.
+static bool definedInRegion(const SetVector<BasicBlock *> &Blocks, Value *V) {
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ if (Blocks.count(I->getParent()))
+ return true;
+ return false;
+}
+
+/// definedInCaller - Return true if the specified value is defined in the
+/// function being code extracted, but not in the region being extracted.
+/// These values must be passed in as live-ins to the function.
+static bool definedInCaller(const SetVector<BasicBlock *> &Blocks, Value *V) {
+ if (isa<Argument>(V)) return true;
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ if (!Blocks.count(I->getParent()))
+ return true;
+ return false;
+}
+
+static BasicBlock *getCommonExitBlock(const SetVector<BasicBlock *> &Blocks) {
+ BasicBlock *CommonExitBlock = nullptr;
+ auto hasNonCommonExitSucc = [&](BasicBlock *Block) {
+ for (auto *Succ : successors(Block)) {
+ // Internal edges, ok.
+ if (Blocks.count(Succ))
+ continue;
+ if (!CommonExitBlock) {
+ CommonExitBlock = Succ;
+ continue;
+ }
+ if (CommonExitBlock != Succ)
+ return true;
+ }
+ return false;
+ };
+
+ if (any_of(Blocks, hasNonCommonExitSucc))
+ return nullptr;
+
+ return CommonExitBlock;
+}
+
+CodeExtractorAnalysisCache::CodeExtractorAnalysisCache(Function &F) {
+ for (BasicBlock &BB : F) {
+ for (Instruction &II : BB.instructionsWithoutDebug())
+ if (auto *AI = dyn_cast<AllocaInst>(&II))
+ Allocas.push_back(AI);
+
+ findSideEffectInfoForBlock(BB);
+ }
+}
+
+void CodeExtractorAnalysisCache::findSideEffectInfoForBlock(BasicBlock &BB) {
+ for (Instruction &II : BB.instructionsWithoutDebug()) {
+ unsigned Opcode = II.getOpcode();
+ Value *MemAddr = nullptr;
+ switch (Opcode) {
+ case Instruction::Store:
+ case Instruction::Load: {
+ if (Opcode == Instruction::Store) {
+ StoreInst *SI = cast<StoreInst>(&II);
+ MemAddr = SI->getPointerOperand();
+ } else {
+ LoadInst *LI = cast<LoadInst>(&II);
+ MemAddr = LI->getPointerOperand();
+ }
+ // Global variable can not be aliased with locals.
+ if (isa<Constant>(MemAddr))
+ break;
+ Value *Base = MemAddr->stripInBoundsConstantOffsets();
+ if (!isa<AllocaInst>(Base)) {
+ SideEffectingBlocks.insert(&BB);
+ return;
+ }
+ BaseMemAddrs[&BB].insert(Base);
+ break;
+ }
+ default: {
+ IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(&II);
+ if (IntrInst) {
+ if (IntrInst->isLifetimeStartOrEnd())
+ break;
+ SideEffectingBlocks.insert(&BB);
+ return;
+ }
+ // Treat all the other cases conservatively if it has side effects.
+ if (II.mayHaveSideEffects()) {
+ SideEffectingBlocks.insert(&BB);
+ return;
+ }
+ }
+ }
+ }
+}
+
+bool CodeExtractorAnalysisCache::doesBlockContainClobberOfAddr(
+ BasicBlock &BB, AllocaInst *Addr) const {
+ if (SideEffectingBlocks.count(&BB))
+ return true;
+ auto It = BaseMemAddrs.find(&BB);
+ if (It != BaseMemAddrs.end())
+ return It->second.count(Addr);
+ return false;
+}
+
+bool CodeExtractor::isLegalToShrinkwrapLifetimeMarkers(
+ const CodeExtractorAnalysisCache &CEAC, Instruction *Addr) const {
+ AllocaInst *AI = cast<AllocaInst>(Addr->stripInBoundsConstantOffsets());
+ Function *Func = (*Blocks.begin())->getParent();
+ for (BasicBlock &BB : *Func) {
+ if (Blocks.count(&BB))
+ continue;
+ if (CEAC.doesBlockContainClobberOfAddr(BB, AI))
+ return false;
+ }
+ return true;
+}
+
+BasicBlock *
+CodeExtractor::findOrCreateBlockForHoisting(BasicBlock *CommonExitBlock) {
+ BasicBlock *SinglePredFromOutlineRegion = nullptr;
+ assert(!Blocks.count(CommonExitBlock) &&
+ "Expect a block outside the region!");
+ for (auto *Pred : predecessors(CommonExitBlock)) {
+ if (!Blocks.count(Pred))
+ continue;
+ if (!SinglePredFromOutlineRegion) {
+ SinglePredFromOutlineRegion = Pred;
+ } else if (SinglePredFromOutlineRegion != Pred) {
+ SinglePredFromOutlineRegion = nullptr;
+ break;
+ }
+ }
+
+ if (SinglePredFromOutlineRegion)
+ return SinglePredFromOutlineRegion;
+
+#ifndef NDEBUG
+ auto getFirstPHI = [](BasicBlock *BB) {
+ BasicBlock::iterator I = BB->begin();
+ PHINode *FirstPhi = nullptr;
+ while (I != BB->end()) {
+ PHINode *Phi = dyn_cast<PHINode>(I);
+ if (!Phi)
+ break;
+ if (!FirstPhi) {
+ FirstPhi = Phi;
+ break;
+ }
+ }
+ return FirstPhi;
+ };
+ // If there are any phi nodes, the single pred either exists or has already
+ // be created before code extraction.
+ assert(!getFirstPHI(CommonExitBlock) && "Phi not expected");
+#endif
+
+ BasicBlock *NewExitBlock = CommonExitBlock->splitBasicBlock(
+ CommonExitBlock->getFirstNonPHI()->getIterator());
+
+ for (BasicBlock *Pred :
+ llvm::make_early_inc_range(predecessors(CommonExitBlock))) {
+ if (Blocks.count(Pred))
+ continue;
+ Pred->getTerminator()->replaceUsesOfWith(CommonExitBlock, NewExitBlock);
+ }
+ // Now add the old exit block to the outline region.
+ Blocks.insert(CommonExitBlock);
+ OldTargets.push_back(NewExitBlock);
+ return CommonExitBlock;
+}
+
+// Find the pair of life time markers for address 'Addr' that are either
+// defined inside the outline region or can legally be shrinkwrapped into the
+// outline region. If there are not other untracked uses of the address, return
+// the pair of markers if found; otherwise return a pair of nullptr.
+CodeExtractor::LifetimeMarkerInfo
+CodeExtractor::getLifetimeMarkers(const CodeExtractorAnalysisCache &CEAC,
+ Instruction *Addr,
+ BasicBlock *ExitBlock) const {
+ LifetimeMarkerInfo Info;
+
+ for (User *U : Addr->users()) {
+ IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(U);
+ if (IntrInst) {
+ // We don't model addresses with multiple start/end markers, but the
+ // markers do not need to be in the region.
+ if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start) {
+ if (Info.LifeStart)
+ return {};
+ Info.LifeStart = IntrInst;
+ continue;
+ }
+ if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_end) {
+ if (Info.LifeEnd)
+ return {};
+ Info.LifeEnd = IntrInst;
+ continue;
+ }
+ // At this point, permit debug uses outside of the region.
+ // This is fixed in a later call to fixupDebugInfoPostExtraction().
+ if (isa<DbgInfoIntrinsic>(IntrInst))
+ continue;
+ }
+ // Find untracked uses of the address, bail.
+ if (!definedInRegion(Blocks, U))
+ return {};
+ }
+
+ if (!Info.LifeStart || !Info.LifeEnd)
+ return {};
+
+ Info.SinkLifeStart = !definedInRegion(Blocks, Info.LifeStart);
+ Info.HoistLifeEnd = !definedInRegion(Blocks, Info.LifeEnd);
+ // Do legality check.
+ if ((Info.SinkLifeStart || Info.HoistLifeEnd) &&
+ !isLegalToShrinkwrapLifetimeMarkers(CEAC, Addr))
+ return {};
+
+ // Check to see if we have a place to do hoisting, if not, bail.
+ if (Info.HoistLifeEnd && !ExitBlock)
+ return {};
+
+ return Info;
+}
+
+void CodeExtractor::findAllocas(const CodeExtractorAnalysisCache &CEAC,
+ ValueSet &SinkCands, ValueSet &HoistCands,
+ BasicBlock *&ExitBlock) const {
+ Function *Func = (*Blocks.begin())->getParent();
+ ExitBlock = getCommonExitBlock(Blocks);
+
+ auto moveOrIgnoreLifetimeMarkers =
+ [&](const LifetimeMarkerInfo &LMI) -> bool {
+ if (!LMI.LifeStart)
+ return false;
+ if (LMI.SinkLifeStart) {
+ LLVM_DEBUG(dbgs() << "Sinking lifetime.start: " << *LMI.LifeStart
+ << "\n");
+ SinkCands.insert(LMI.LifeStart);
+ }
+ if (LMI.HoistLifeEnd) {
+ LLVM_DEBUG(dbgs() << "Hoisting lifetime.end: " << *LMI.LifeEnd << "\n");
+ HoistCands.insert(LMI.LifeEnd);
+ }
+ return true;
+ };
+
+ // Look up allocas in the original function in CodeExtractorAnalysisCache, as
+ // this is much faster than walking all the instructions.
+ for (AllocaInst *AI : CEAC.getAllocas()) {
+ BasicBlock *BB = AI->getParent();
+ if (Blocks.count(BB))
+ continue;
+
+ // As a prior call to extractCodeRegion() may have shrinkwrapped the alloca,
+ // check whether it is actually still in the original function.
+ Function *AIFunc = BB->getParent();
+ if (AIFunc != Func)
+ continue;
+
+ LifetimeMarkerInfo MarkerInfo = getLifetimeMarkers(CEAC, AI, ExitBlock);
+ bool Moved = moveOrIgnoreLifetimeMarkers(MarkerInfo);
+ if (Moved) {
+ LLVM_DEBUG(dbgs() << "Sinking alloca: " << *AI << "\n");
+ SinkCands.insert(AI);
+ continue;
+ }
+
+ // Find bitcasts in the outlined region that have lifetime marker users
+ // outside that region. Replace the lifetime marker use with an
+ // outside region bitcast to avoid unnecessary alloca/reload instructions
+ // and extra lifetime markers.
+ SmallVector<Instruction *, 2> LifetimeBitcastUsers;
+ for (User *U : AI->users()) {
+ if (!definedInRegion(Blocks, U))
+ continue;
+
+ if (U->stripInBoundsConstantOffsets() != AI)
+ continue;
+
+ Instruction *Bitcast = cast<Instruction>(U);
+ for (User *BU : Bitcast->users()) {
+ IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(BU);
+ if (!IntrInst)
+ continue;
+
+ if (!IntrInst->isLifetimeStartOrEnd())
+ continue;
+
+ if (definedInRegion(Blocks, IntrInst))
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Replace use of extracted region bitcast"
+ << *Bitcast << " in out-of-region lifetime marker "
+ << *IntrInst << "\n");
+ LifetimeBitcastUsers.push_back(IntrInst);
+ }
+ }
+
+ for (Instruction *I : LifetimeBitcastUsers) {
+ Module *M = AIFunc->getParent();
+ LLVMContext &Ctx = M->getContext();
+ auto *Int8PtrTy = Type::getInt8PtrTy(Ctx);
+ CastInst *CastI =
+ CastInst::CreatePointerCast(AI, Int8PtrTy, "lt.cast", I);
+ I->replaceUsesOfWith(I->getOperand(1), CastI);
+ }
+
+ // Follow any bitcasts.
+ SmallVector<Instruction *, 2> Bitcasts;
+ SmallVector<LifetimeMarkerInfo, 2> BitcastLifetimeInfo;
+ for (User *U : AI->users()) {
+ if (U->stripInBoundsConstantOffsets() == AI) {
+ Instruction *Bitcast = cast<Instruction>(U);
+ LifetimeMarkerInfo LMI = getLifetimeMarkers(CEAC, Bitcast, ExitBlock);
+ if (LMI.LifeStart) {
+ Bitcasts.push_back(Bitcast);
+ BitcastLifetimeInfo.push_back(LMI);
+ continue;
+ }
+ }
+
+ // Found unknown use of AI.
+ if (!definedInRegion(Blocks, U)) {
+ Bitcasts.clear();
+ break;
+ }
+ }
+
+ // Either no bitcasts reference the alloca or there are unknown uses.
+ if (Bitcasts.empty())
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Sinking alloca (via bitcast): " << *AI << "\n");
+ SinkCands.insert(AI);
+ for (unsigned I = 0, E = Bitcasts.size(); I != E; ++I) {
+ Instruction *BitcastAddr = Bitcasts[I];
+ const LifetimeMarkerInfo &LMI = BitcastLifetimeInfo[I];
+ assert(LMI.LifeStart &&
+ "Unsafe to sink bitcast without lifetime markers");
+ moveOrIgnoreLifetimeMarkers(LMI);
+ if (!definedInRegion(Blocks, BitcastAddr)) {
+ LLVM_DEBUG(dbgs() << "Sinking bitcast-of-alloca: " << *BitcastAddr
+ << "\n");
+ SinkCands.insert(BitcastAddr);
+ }
+ }
+ }
+}
+
+bool CodeExtractor::isEligible() const {
+ if (Blocks.empty())
+ return false;
+ BasicBlock *Header = *Blocks.begin();
+ Function *F = Header->getParent();
+
+ // For functions with varargs, check that varargs handling is only done in the
+ // outlined function, i.e vastart and vaend are only used in outlined blocks.
+ if (AllowVarArgs && F->getFunctionType()->isVarArg()) {
+ auto containsVarArgIntrinsic = [](const Instruction &I) {
+ if (const CallInst *CI = dyn_cast<CallInst>(&I))
+ if (const Function *Callee = CI->getCalledFunction())
+ return Callee->getIntrinsicID() == Intrinsic::vastart ||
+ Callee->getIntrinsicID() == Intrinsic::vaend;
+ return false;
+ };
+
+ for (auto &BB : *F) {
+ if (Blocks.count(&BB))
+ continue;
+ if (llvm::any_of(BB, containsVarArgIntrinsic))
+ return false;
+ }
+ }
+ return true;
+}
+
+void CodeExtractor::findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs,
+ const ValueSet &SinkCands) const {
+ for (BasicBlock *BB : Blocks) {
+ // If a used value is defined outside the region, it's an input. If an
+ // instruction is used outside the region, it's an output.
+ for (Instruction &II : *BB) {
+ for (auto &OI : II.operands()) {
+ Value *V = OI;
+ if (!SinkCands.count(V) && definedInCaller(Blocks, V))
+ Inputs.insert(V);
+ }
+
+ for (User *U : II.users())
+ if (!definedInRegion(Blocks, U)) {
+ Outputs.insert(&II);
+ break;
+ }
+ }
+ }
+}
+
+/// severSplitPHINodesOfEntry - If a PHI node has multiple inputs from outside
+/// of the region, we need to split the entry block of the region so that the
+/// PHI node is easier to deal with.
+void CodeExtractor::severSplitPHINodesOfEntry(BasicBlock *&Header) {
+ unsigned NumPredsFromRegion = 0;
+ unsigned NumPredsOutsideRegion = 0;
+
+ if (Header != &Header->getParent()->getEntryBlock()) {
+ PHINode *PN = dyn_cast<PHINode>(Header->begin());
+ if (!PN) return; // No PHI nodes.
+
+ // If the header node contains any PHI nodes, check to see if there is more
+ // than one entry from outside the region. If so, we need to sever the
+ // header block into two.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (Blocks.count(PN->getIncomingBlock(i)))
+ ++NumPredsFromRegion;
+ else
+ ++NumPredsOutsideRegion;
+
+ // If there is one (or fewer) predecessor from outside the region, we don't
+ // need to do anything special.
+ if (NumPredsOutsideRegion <= 1) return;
+ }
+
+ // Otherwise, we need to split the header block into two pieces: one
+ // containing PHI nodes merging values from outside of the region, and a
+ // second that contains all of the code for the block and merges back any
+ // incoming values from inside of the region.
+ BasicBlock *NewBB = SplitBlock(Header, Header->getFirstNonPHI(), DT);
+
+ // We only want to code extract the second block now, and it becomes the new
+ // header of the region.
+ BasicBlock *OldPred = Header;
+ Blocks.remove(OldPred);
+ Blocks.insert(NewBB);
+ Header = NewBB;
+
+ // Okay, now we need to adjust the PHI nodes and any branches from within the
+ // region to go to the new header block instead of the old header block.
+ if (NumPredsFromRegion) {
+ PHINode *PN = cast<PHINode>(OldPred->begin());
+ // Loop over all of the predecessors of OldPred that are in the region,
+ // changing them to branch to NewBB instead.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (Blocks.count(PN->getIncomingBlock(i))) {
+ Instruction *TI = PN->getIncomingBlock(i)->getTerminator();
+ TI->replaceUsesOfWith(OldPred, NewBB);
+ }
+
+ // Okay, everything within the region is now branching to the right block, we
+ // just have to update the PHI nodes now, inserting PHI nodes into NewBB.
+ BasicBlock::iterator AfterPHIs;
+ for (AfterPHIs = OldPred->begin(); isa<PHINode>(AfterPHIs); ++AfterPHIs) {
+ PHINode *PN = cast<PHINode>(AfterPHIs);
+ // Create a new PHI node in the new region, which has an incoming value
+ // from OldPred of PN.
+ PHINode *NewPN = PHINode::Create(PN->getType(), 1 + NumPredsFromRegion,
+ PN->getName() + ".ce", &NewBB->front());
+ PN->replaceAllUsesWith(NewPN);
+ NewPN->addIncoming(PN, OldPred);
+
+ // Loop over all of the incoming value in PN, moving them to NewPN if they
+ // are from the extracted region.
+ for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) {
+ if (Blocks.count(PN->getIncomingBlock(i))) {
+ NewPN->addIncoming(PN->getIncomingValue(i), PN->getIncomingBlock(i));
+ PN->removeIncomingValue(i);
+ --i;
+ }
+ }
+ }
+ }
+}
+
+/// severSplitPHINodesOfExits - if PHI nodes in exit blocks have inputs from
+/// outlined region, we split these PHIs on two: one with inputs from region
+/// and other with remaining incoming blocks; then first PHIs are placed in
+/// outlined region.
+void CodeExtractor::severSplitPHINodesOfExits(
+ const SmallPtrSetImpl<BasicBlock *> &Exits) {
+ for (BasicBlock *ExitBB : Exits) {
+ BasicBlock *NewBB = nullptr;
+
+ for (PHINode &PN : ExitBB->phis()) {
+ // Find all incoming values from the outlining region.
+ SmallVector<unsigned, 2> IncomingVals;
+ for (unsigned i = 0; i < PN.getNumIncomingValues(); ++i)
+ if (Blocks.count(PN.getIncomingBlock(i)))
+ IncomingVals.push_back(i);
+
+ // Do not process PHI if there is one (or fewer) predecessor from region.
+ // If PHI has exactly one predecessor from region, only this one incoming
+ // will be replaced on codeRepl block, so it should be safe to skip PHI.
+ if (IncomingVals.size() <= 1)
+ continue;
+
+ // Create block for new PHIs and add it to the list of outlined if it
+ // wasn't done before.
+ if (!NewBB) {
+ NewBB = BasicBlock::Create(ExitBB->getContext(),
+ ExitBB->getName() + ".split",
+ ExitBB->getParent(), ExitBB);
+ SmallVector<BasicBlock *, 4> Preds(predecessors(ExitBB));
+ for (BasicBlock *PredBB : Preds)
+ if (Blocks.count(PredBB))
+ PredBB->getTerminator()->replaceUsesOfWith(ExitBB, NewBB);
+ BranchInst::Create(ExitBB, NewBB);
+ Blocks.insert(NewBB);
+ }
+
+ // Split this PHI.
+ PHINode *NewPN =
+ PHINode::Create(PN.getType(), IncomingVals.size(),
+ PN.getName() + ".ce", NewBB->getFirstNonPHI());
+ for (unsigned i : IncomingVals)
+ NewPN->addIncoming(PN.getIncomingValue(i), PN.getIncomingBlock(i));
+ for (unsigned i : reverse(IncomingVals))
+ PN.removeIncomingValue(i, false);
+ PN.addIncoming(NewPN, NewBB);
+ }
+ }
+}
+
+void CodeExtractor::splitReturnBlocks() {
+ for (BasicBlock *Block : Blocks)
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(Block->getTerminator())) {
+ BasicBlock *New =
+ Block->splitBasicBlock(RI->getIterator(), Block->getName() + ".ret");
+ if (DT) {
+ // Old dominates New. New node dominates all other nodes dominated
+ // by Old.
+ DomTreeNode *OldNode = DT->getNode(Block);
+ SmallVector<DomTreeNode *, 8> Children(OldNode->begin(),
+ OldNode->end());
+
+ DomTreeNode *NewNode = DT->addNewBlock(New, Block);
+
+ for (DomTreeNode *I : Children)
+ DT->changeImmediateDominator(I, NewNode);
+ }
+ }
+}
+
+/// constructFunction - make a function based on inputs and outputs, as follows:
+/// f(in0, ..., inN, out0, ..., outN)
+Function *CodeExtractor::constructFunction(const ValueSet &inputs,
+ const ValueSet &outputs,
+ BasicBlock *header,
+ BasicBlock *newRootNode,
+ BasicBlock *newHeader,
+ Function *oldFunction,
+ Module *M) {
+ LLVM_DEBUG(dbgs() << "inputs: " << inputs.size() << "\n");
+ LLVM_DEBUG(dbgs() << "outputs: " << outputs.size() << "\n");
+
+ // This function returns unsigned, outputs will go back by reference.
+ switch (NumExitBlocks) {
+ case 0:
+ case 1: RetTy = Type::getVoidTy(header->getContext()); break;
+ case 2: RetTy = Type::getInt1Ty(header->getContext()); break;
+ default: RetTy = Type::getInt16Ty(header->getContext()); break;
+ }
+
+ std::vector<Type *> ParamTy;
+ std::vector<Type *> AggParamTy;
+ ValueSet StructValues;
+ const DataLayout &DL = M->getDataLayout();
+
+ // Add the types of the input values to the function's argument list
+ for (Value *value : inputs) {
+ LLVM_DEBUG(dbgs() << "value used in func: " << *value << "\n");
+ if (AggregateArgs && !ExcludeArgsFromAggregate.contains(value)) {
+ AggParamTy.push_back(value->getType());
+ StructValues.insert(value);
+ } else
+ ParamTy.push_back(value->getType());
+ }
+
+ // Add the types of the output values to the function's argument list.
+ for (Value *output : outputs) {
+ LLVM_DEBUG(dbgs() << "instr used in func: " << *output << "\n");
+ if (AggregateArgs && !ExcludeArgsFromAggregate.contains(output)) {
+ AggParamTy.push_back(output->getType());
+ StructValues.insert(output);
+ } else
+ ParamTy.push_back(
+ PointerType::get(output->getType(), DL.getAllocaAddrSpace()));
+ }
+
+ assert(
+ (ParamTy.size() + AggParamTy.size()) ==
+ (inputs.size() + outputs.size()) &&
+ "Number of scalar and aggregate params does not match inputs, outputs");
+ assert((StructValues.empty() || AggregateArgs) &&
+ "Expeced StructValues only with AggregateArgs set");
+
+ // Concatenate scalar and aggregate params in ParamTy.
+ size_t NumScalarParams = ParamTy.size();
+ StructType *StructTy = nullptr;
+ if (AggregateArgs && !AggParamTy.empty()) {
+ StructTy = StructType::get(M->getContext(), AggParamTy);
+ ParamTy.push_back(PointerType::get(StructTy, DL.getAllocaAddrSpace()));
+ }
+
+ LLVM_DEBUG({
+ dbgs() << "Function type: " << *RetTy << " f(";
+ for (Type *i : ParamTy)
+ dbgs() << *i << ", ";
+ dbgs() << ")\n";
+ });
+
+ FunctionType *funcType = FunctionType::get(
+ RetTy, ParamTy, AllowVarArgs && oldFunction->isVarArg());
+
+ std::string SuffixToUse =
+ Suffix.empty()
+ ? (header->getName().empty() ? "extracted" : header->getName().str())
+ : Suffix;
+ // Create the new function
+ Function *newFunction = Function::Create(
+ funcType, GlobalValue::InternalLinkage, oldFunction->getAddressSpace(),
+ oldFunction->getName() + "." + SuffixToUse, M);
+
+ // Inherit all of the target dependent attributes and white-listed
+ // target independent attributes.
+ // (e.g. If the extracted region contains a call to an x86.sse
+ // instruction we need to make sure that the extracted region has the
+ // "target-features" attribute allowing it to be lowered.
+ // FIXME: This should be changed to check to see if a specific
+ // attribute can not be inherited.
+ for (const auto &Attr : oldFunction->getAttributes().getFnAttrs()) {
+ if (Attr.isStringAttribute()) {
+ if (Attr.getKindAsString() == "thunk")
+ continue;
+ } else
+ switch (Attr.getKindAsEnum()) {
+ // Those attributes cannot be propagated safely. Explicitly list them
+ // here so we get a warning if new attributes are added.
+ case Attribute::AllocSize:
+ case Attribute::Builtin:
+ case Attribute::Convergent:
+ case Attribute::JumpTable:
+ case Attribute::Naked:
+ case Attribute::NoBuiltin:
+ case Attribute::NoMerge:
+ case Attribute::NoReturn:
+ case Attribute::NoSync:
+ case Attribute::ReturnsTwice:
+ case Attribute::Speculatable:
+ case Attribute::StackAlignment:
+ case Attribute::WillReturn:
+ case Attribute::AllocKind:
+ case Attribute::PresplitCoroutine:
+ case Attribute::Memory:
+ continue;
+ // Those attributes should be safe to propagate to the extracted function.
+ case Attribute::AlwaysInline:
+ case Attribute::Cold:
+ case Attribute::DisableSanitizerInstrumentation:
+ case Attribute::FnRetThunkExtern:
+ case Attribute::Hot:
+ case Attribute::NoRecurse:
+ case Attribute::InlineHint:
+ case Attribute::MinSize:
+ case Attribute::NoCallback:
+ case Attribute::NoDuplicate:
+ case Attribute::NoFree:
+ case Attribute::NoImplicitFloat:
+ case Attribute::NoInline:
+ case Attribute::NonLazyBind:
+ case Attribute::NoRedZone:
+ case Attribute::NoUnwind:
+ case Attribute::NoSanitizeBounds:
+ case Attribute::NoSanitizeCoverage:
+ case Attribute::NullPointerIsValid:
+ case Attribute::OptForFuzzing:
+ case Attribute::OptimizeNone:
+ case Attribute::OptimizeForSize:
+ case Attribute::SafeStack:
+ case Attribute::ShadowCallStack:
+ case Attribute::SanitizeAddress:
+ case Attribute::SanitizeMemory:
+ case Attribute::SanitizeThread:
+ case Attribute::SanitizeHWAddress:
+ case Attribute::SanitizeMemTag:
+ case Attribute::SpeculativeLoadHardening:
+ case Attribute::StackProtect:
+ case Attribute::StackProtectReq:
+ case Attribute::StackProtectStrong:
+ case Attribute::StrictFP:
+ case Attribute::UWTable:
+ case Attribute::VScaleRange:
+ case Attribute::NoCfCheck:
+ case Attribute::MustProgress:
+ case Attribute::NoProfile:
+ case Attribute::SkipProfile:
+ break;
+ // These attributes cannot be applied to functions.
+ case Attribute::Alignment:
+ case Attribute::AllocatedPointer:
+ case Attribute::AllocAlign:
+ case Attribute::ByVal:
+ case Attribute::Dereferenceable:
+ case Attribute::DereferenceableOrNull:
+ case Attribute::ElementType:
+ case Attribute::InAlloca:
+ case Attribute::InReg:
+ case Attribute::Nest:
+ case Attribute::NoAlias:
+ case Attribute::NoCapture:
+ case Attribute::NoUndef:
+ case Attribute::NonNull:
+ case Attribute::Preallocated:
+ case Attribute::ReadNone:
+ case Attribute::ReadOnly:
+ case Attribute::Returned:
+ case Attribute::SExt:
+ case Attribute::StructRet:
+ case Attribute::SwiftError:
+ case Attribute::SwiftSelf:
+ case Attribute::SwiftAsync:
+ case Attribute::ZExt:
+ case Attribute::ImmArg:
+ case Attribute::ByRef:
+ case Attribute::WriteOnly:
+ // These are not really attributes.
+ case Attribute::None:
+ case Attribute::EndAttrKinds:
+ case Attribute::EmptyKey:
+ case Attribute::TombstoneKey:
+ llvm_unreachable("Not a function attribute");
+ }
+
+ newFunction->addFnAttr(Attr);
+ }
+ newFunction->insert(newFunction->end(), newRootNode);
+
+ // Create scalar and aggregate iterators to name all of the arguments we
+ // inserted.
+ Function::arg_iterator ScalarAI = newFunction->arg_begin();
+ Function::arg_iterator AggAI = std::next(ScalarAI, NumScalarParams);
+
+ // Rewrite all users of the inputs in the extracted region to use the
+ // arguments (or appropriate addressing into struct) instead.
+ for (unsigned i = 0, e = inputs.size(), aggIdx = 0; i != e; ++i) {
+ Value *RewriteVal;
+ if (AggregateArgs && StructValues.contains(inputs[i])) {
+ Value *Idx[2];
+ Idx[0] = Constant::getNullValue(Type::getInt32Ty(header->getContext()));
+ Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), aggIdx);
+ Instruction *TI = newFunction->begin()->getTerminator();
+ GetElementPtrInst *GEP = GetElementPtrInst::Create(
+ StructTy, &*AggAI, Idx, "gep_" + inputs[i]->getName(), TI);
+ RewriteVal = new LoadInst(StructTy->getElementType(aggIdx), GEP,
+ "loadgep_" + inputs[i]->getName(), TI);
+ ++aggIdx;
+ } else
+ RewriteVal = &*ScalarAI++;
+
+ std::vector<User *> Users(inputs[i]->user_begin(), inputs[i]->user_end());
+ for (User *use : Users)
+ if (Instruction *inst = dyn_cast<Instruction>(use))
+ if (Blocks.count(inst->getParent()))
+ inst->replaceUsesOfWith(inputs[i], RewriteVal);
+ }
+
+ // Set names for input and output arguments.
+ if (NumScalarParams) {
+ ScalarAI = newFunction->arg_begin();
+ for (unsigned i = 0, e = inputs.size(); i != e; ++i, ++ScalarAI)
+ if (!StructValues.contains(inputs[i]))
+ ScalarAI->setName(inputs[i]->getName());
+ for (unsigned i = 0, e = outputs.size(); i != e; ++i, ++ScalarAI)
+ if (!StructValues.contains(outputs[i]))
+ ScalarAI->setName(outputs[i]->getName() + ".out");
+ }
+
+ // Rewrite branches to basic blocks outside of the loop to new dummy blocks
+ // within the new function. This must be done before we lose track of which
+ // blocks were originally in the code region.
+ std::vector<User *> Users(header->user_begin(), header->user_end());
+ for (auto &U : Users)
+ // The BasicBlock which contains the branch is not in the region
+ // modify the branch target to a new block
+ if (Instruction *I = dyn_cast<Instruction>(U))
+ if (I->isTerminator() && I->getFunction() == oldFunction &&
+ !Blocks.count(I->getParent()))
+ I->replaceUsesOfWith(header, newHeader);
+
+ return newFunction;
+}
+
+/// Erase lifetime.start markers which reference inputs to the extraction
+/// region, and insert the referenced memory into \p LifetimesStart.
+///
+/// The extraction region is defined by a set of blocks (\p Blocks), and a set
+/// of allocas which will be moved from the caller function into the extracted
+/// function (\p SunkAllocas).
+static void eraseLifetimeMarkersOnInputs(const SetVector<BasicBlock *> &Blocks,
+ const SetVector<Value *> &SunkAllocas,
+ SetVector<Value *> &LifetimesStart) {
+ for (BasicBlock *BB : Blocks) {
+ for (Instruction &I : llvm::make_early_inc_range(*BB)) {
+ auto *II = dyn_cast<IntrinsicInst>(&I);
+ if (!II || !II->isLifetimeStartOrEnd())
+ continue;
+
+ // Get the memory operand of the lifetime marker. If the underlying
+ // object is a sunk alloca, or is otherwise defined in the extraction
+ // region, the lifetime marker must not be erased.
+ Value *Mem = II->getOperand(1)->stripInBoundsOffsets();
+ if (SunkAllocas.count(Mem) || definedInRegion(Blocks, Mem))
+ continue;
+
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start)
+ LifetimesStart.insert(Mem);
+ II->eraseFromParent();
+ }
+ }
+}
+
+/// Insert lifetime start/end markers surrounding the call to the new function
+/// for objects defined in the caller.
+static void insertLifetimeMarkersSurroundingCall(
+ Module *M, ArrayRef<Value *> LifetimesStart, ArrayRef<Value *> LifetimesEnd,
+ CallInst *TheCall) {
+ LLVMContext &Ctx = M->getContext();
+ auto Int8PtrTy = Type::getInt8PtrTy(Ctx);
+ auto NegativeOne = ConstantInt::getSigned(Type::getInt64Ty(Ctx), -1);
+ Instruction *Term = TheCall->getParent()->getTerminator();
+
+ // The memory argument to a lifetime marker must be a i8*. Cache any bitcasts
+ // needed to satisfy this requirement so they may be reused.
+ DenseMap<Value *, Value *> Bitcasts;
+
+ // Emit lifetime markers for the pointers given in \p Objects. Insert the
+ // markers before the call if \p InsertBefore, and after the call otherwise.
+ auto insertMarkers = [&](Function *MarkerFunc, ArrayRef<Value *> Objects,
+ bool InsertBefore) {
+ for (Value *Mem : Objects) {
+ assert((!isa<Instruction>(Mem) || cast<Instruction>(Mem)->getFunction() ==
+ TheCall->getFunction()) &&
+ "Input memory not defined in original function");
+ Value *&MemAsI8Ptr = Bitcasts[Mem];
+ if (!MemAsI8Ptr) {
+ if (Mem->getType() == Int8PtrTy)
+ MemAsI8Ptr = Mem;
+ else
+ MemAsI8Ptr =
+ CastInst::CreatePointerCast(Mem, Int8PtrTy, "lt.cast", TheCall);
+ }
+
+ auto Marker = CallInst::Create(MarkerFunc, {NegativeOne, MemAsI8Ptr});
+ if (InsertBefore)
+ Marker->insertBefore(TheCall);
+ else
+ Marker->insertBefore(Term);
+ }
+ };
+
+ if (!LifetimesStart.empty()) {
+ auto StartFn = llvm::Intrinsic::getDeclaration(
+ M, llvm::Intrinsic::lifetime_start, Int8PtrTy);
+ insertMarkers(StartFn, LifetimesStart, /*InsertBefore=*/true);
+ }
+
+ if (!LifetimesEnd.empty()) {
+ auto EndFn = llvm::Intrinsic::getDeclaration(
+ M, llvm::Intrinsic::lifetime_end, Int8PtrTy);
+ insertMarkers(EndFn, LifetimesEnd, /*InsertBefore=*/false);
+ }
+}
+
+/// emitCallAndSwitchStatement - This method sets up the caller side by adding
+/// the call instruction, splitting any PHI nodes in the header block as
+/// necessary.
+CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction,
+ BasicBlock *codeReplacer,
+ ValueSet &inputs,
+ ValueSet &outputs) {
+ // Emit a call to the new function, passing in: *pointer to struct (if
+ // aggregating parameters), or plan inputs and allocated memory for outputs
+ std::vector<Value *> params, ReloadOutputs, Reloads;
+ ValueSet StructValues;
+
+ Module *M = newFunction->getParent();
+ LLVMContext &Context = M->getContext();
+ const DataLayout &DL = M->getDataLayout();
+ CallInst *call = nullptr;
+
+ // Add inputs as params, or to be filled into the struct
+ unsigned ScalarInputArgNo = 0;
+ SmallVector<unsigned, 1> SwiftErrorArgs;
+ for (Value *input : inputs) {
+ if (AggregateArgs && !ExcludeArgsFromAggregate.contains(input))
+ StructValues.insert(input);
+ else {
+ params.push_back(input);
+ if (input->isSwiftError())
+ SwiftErrorArgs.push_back(ScalarInputArgNo);
+ }
+ ++ScalarInputArgNo;
+ }
+
+ // Create allocas for the outputs
+ unsigned ScalarOutputArgNo = 0;
+ for (Value *output : outputs) {
+ if (AggregateArgs && !ExcludeArgsFromAggregate.contains(output)) {
+ StructValues.insert(output);
+ } else {
+ AllocaInst *alloca =
+ new AllocaInst(output->getType(), DL.getAllocaAddrSpace(),
+ nullptr, output->getName() + ".loc",
+ &codeReplacer->getParent()->front().front());
+ ReloadOutputs.push_back(alloca);
+ params.push_back(alloca);
+ ++ScalarOutputArgNo;
+ }
+ }
+
+ StructType *StructArgTy = nullptr;
+ AllocaInst *Struct = nullptr;
+ unsigned NumAggregatedInputs = 0;
+ if (AggregateArgs && !StructValues.empty()) {
+ std::vector<Type *> ArgTypes;
+ for (Value *V : StructValues)
+ ArgTypes.push_back(V->getType());
+
+ // Allocate a struct at the beginning of this function
+ StructArgTy = StructType::get(newFunction->getContext(), ArgTypes);
+ Struct = new AllocaInst(
+ StructArgTy, DL.getAllocaAddrSpace(), nullptr, "structArg",
+ AllocationBlock ? &*AllocationBlock->getFirstInsertionPt()
+ : &codeReplacer->getParent()->front().front());
+ params.push_back(Struct);
+
+ // Store aggregated inputs in the struct.
+ for (unsigned i = 0, e = StructValues.size(); i != e; ++i) {
+ if (inputs.contains(StructValues[i])) {
+ Value *Idx[2];
+ Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
+ Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i);
+ GetElementPtrInst *GEP = GetElementPtrInst::Create(
+ StructArgTy, Struct, Idx, "gep_" + StructValues[i]->getName());
+ GEP->insertInto(codeReplacer, codeReplacer->end());
+ new StoreInst(StructValues[i], GEP, codeReplacer);
+ NumAggregatedInputs++;
+ }
+ }
+ }
+
+ // Emit the call to the function
+ call = CallInst::Create(newFunction, params,
+ NumExitBlocks > 1 ? "targetBlock" : "");
+ // Add debug location to the new call, if the original function has debug
+ // info. In that case, the terminator of the entry block of the extracted
+ // function contains the first debug location of the extracted function,
+ // set in extractCodeRegion.
+ if (codeReplacer->getParent()->getSubprogram()) {
+ if (auto DL = newFunction->getEntryBlock().getTerminator()->getDebugLoc())
+ call->setDebugLoc(DL);
+ }
+ call->insertInto(codeReplacer, codeReplacer->end());
+
+ // Set swifterror parameter attributes.
+ for (unsigned SwiftErrArgNo : SwiftErrorArgs) {
+ call->addParamAttr(SwiftErrArgNo, Attribute::SwiftError);
+ newFunction->addParamAttr(SwiftErrArgNo, Attribute::SwiftError);
+ }
+
+ // Reload the outputs passed in by reference, use the struct if output is in
+ // the aggregate or reload from the scalar argument.
+ for (unsigned i = 0, e = outputs.size(), scalarIdx = 0,
+ aggIdx = NumAggregatedInputs;
+ i != e; ++i) {
+ Value *Output = nullptr;
+ if (AggregateArgs && StructValues.contains(outputs[i])) {
+ Value *Idx[2];
+ Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
+ Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), aggIdx);
+ GetElementPtrInst *GEP = GetElementPtrInst::Create(
+ StructArgTy, Struct, Idx, "gep_reload_" + outputs[i]->getName());
+ GEP->insertInto(codeReplacer, codeReplacer->end());
+ Output = GEP;
+ ++aggIdx;
+ } else {
+ Output = ReloadOutputs[scalarIdx];
+ ++scalarIdx;
+ }
+ LoadInst *load = new LoadInst(outputs[i]->getType(), Output,
+ outputs[i]->getName() + ".reload",
+ codeReplacer);
+ Reloads.push_back(load);
+ std::vector<User *> Users(outputs[i]->user_begin(), outputs[i]->user_end());
+ for (User *U : Users) {
+ Instruction *inst = cast<Instruction>(U);
+ if (!Blocks.count(inst->getParent()))
+ inst->replaceUsesOfWith(outputs[i], load);
+ }
+ }
+
+ // Now we can emit a switch statement using the call as a value.
+ SwitchInst *TheSwitch =
+ SwitchInst::Create(Constant::getNullValue(Type::getInt16Ty(Context)),
+ codeReplacer, 0, codeReplacer);
+
+ // Since there may be multiple exits from the original region, make the new
+ // function return an unsigned, switch on that number. This loop iterates
+ // over all of the blocks in the extracted region, updating any terminator
+ // instructions in the to-be-extracted region that branch to blocks that are
+ // not in the region to be extracted.
+ std::map<BasicBlock *, BasicBlock *> ExitBlockMap;
+
+ // Iterate over the previously collected targets, and create new blocks inside
+ // the function to branch to.
+ unsigned switchVal = 0;
+ for (BasicBlock *OldTarget : OldTargets) {
+ if (Blocks.count(OldTarget))
+ continue;
+ BasicBlock *&NewTarget = ExitBlockMap[OldTarget];
+ if (NewTarget)
+ continue;
+
+ // If we don't already have an exit stub for this non-extracted
+ // destination, create one now!
+ NewTarget = BasicBlock::Create(Context,
+ OldTarget->getName() + ".exitStub",
+ newFunction);
+ unsigned SuccNum = switchVal++;
+
+ Value *brVal = nullptr;
+ assert(NumExitBlocks < 0xffff && "too many exit blocks for switch");
+ switch (NumExitBlocks) {
+ case 0:
+ case 1: break; // No value needed.
+ case 2: // Conditional branch, return a bool
+ brVal = ConstantInt::get(Type::getInt1Ty(Context), !SuccNum);
+ break;
+ default:
+ brVal = ConstantInt::get(Type::getInt16Ty(Context), SuccNum);
+ break;
+ }
+
+ ReturnInst::Create(Context, brVal, NewTarget);
+
+ // Update the switch instruction.
+ TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context),
+ SuccNum),
+ OldTarget);
+ }
+
+ for (BasicBlock *Block : Blocks) {
+ Instruction *TI = Block->getTerminator();
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
+ if (Blocks.count(TI->getSuccessor(i)))
+ continue;
+ BasicBlock *OldTarget = TI->getSuccessor(i);
+ // add a new basic block which returns the appropriate value
+ BasicBlock *NewTarget = ExitBlockMap[OldTarget];
+ assert(NewTarget && "Unknown target block!");
+
+ // rewrite the original branch instruction with this new target
+ TI->setSuccessor(i, NewTarget);
+ }
+ }
+
+ // Store the arguments right after the definition of output value.
+ // This should be proceeded after creating exit stubs to be ensure that invoke
+ // result restore will be placed in the outlined function.
+ Function::arg_iterator ScalarOutputArgBegin = newFunction->arg_begin();
+ std::advance(ScalarOutputArgBegin, ScalarInputArgNo);
+ Function::arg_iterator AggOutputArgBegin = newFunction->arg_begin();
+ std::advance(AggOutputArgBegin, ScalarInputArgNo + ScalarOutputArgNo);
+
+ for (unsigned i = 0, e = outputs.size(), aggIdx = NumAggregatedInputs; i != e;
+ ++i) {
+ auto *OutI = dyn_cast<Instruction>(outputs[i]);
+ if (!OutI)
+ continue;
+
+ // Find proper insertion point.
+ BasicBlock::iterator InsertPt;
+ // In case OutI is an invoke, we insert the store at the beginning in the
+ // 'normal destination' BB. Otherwise we insert the store right after OutI.
+ if (auto *InvokeI = dyn_cast<InvokeInst>(OutI))
+ InsertPt = InvokeI->getNormalDest()->getFirstInsertionPt();
+ else if (auto *Phi = dyn_cast<PHINode>(OutI))
+ InsertPt = Phi->getParent()->getFirstInsertionPt();
+ else
+ InsertPt = std::next(OutI->getIterator());
+
+ Instruction *InsertBefore = &*InsertPt;
+ assert((InsertBefore->getFunction() == newFunction ||
+ Blocks.count(InsertBefore->getParent())) &&
+ "InsertPt should be in new function");
+ if (AggregateArgs && StructValues.contains(outputs[i])) {
+ assert(AggOutputArgBegin != newFunction->arg_end() &&
+ "Number of aggregate output arguments should match "
+ "the number of defined values");
+ Value *Idx[2];
+ Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
+ Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), aggIdx);
+ GetElementPtrInst *GEP = GetElementPtrInst::Create(
+ StructArgTy, &*AggOutputArgBegin, Idx, "gep_" + outputs[i]->getName(),
+ InsertBefore);
+ new StoreInst(outputs[i], GEP, InsertBefore);
+ ++aggIdx;
+ // Since there should be only one struct argument aggregating
+ // all the output values, we shouldn't increment AggOutputArgBegin, which
+ // always points to the struct argument, in this case.
+ } else {
+ assert(ScalarOutputArgBegin != newFunction->arg_end() &&
+ "Number of scalar output arguments should match "
+ "the number of defined values");
+ new StoreInst(outputs[i], &*ScalarOutputArgBegin, InsertBefore);
+ ++ScalarOutputArgBegin;
+ }
+ }
+
+ // Now that we've done the deed, simplify the switch instruction.
+ Type *OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType();
+ switch (NumExitBlocks) {
+ case 0:
+ // There are no successors (the block containing the switch itself), which
+ // means that previously this was the last part of the function, and hence
+ // this should be rewritten as a `ret'
+
+ // Check if the function should return a value
+ if (OldFnRetTy->isVoidTy()) {
+ ReturnInst::Create(Context, nullptr, TheSwitch); // Return void
+ } else if (OldFnRetTy == TheSwitch->getCondition()->getType()) {
+ // return what we have
+ ReturnInst::Create(Context, TheSwitch->getCondition(), TheSwitch);
+ } else {
+ // Otherwise we must have code extracted an unwind or something, just
+ // return whatever we want.
+ ReturnInst::Create(Context,
+ Constant::getNullValue(OldFnRetTy), TheSwitch);
+ }
+
+ TheSwitch->eraseFromParent();
+ break;
+ case 1:
+ // Only a single destination, change the switch into an unconditional
+ // branch.
+ BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch);
+ TheSwitch->eraseFromParent();
+ break;
+ case 2:
+ BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch->getSuccessor(2),
+ call, TheSwitch);
+ TheSwitch->eraseFromParent();
+ break;
+ default:
+ // Otherwise, make the default destination of the switch instruction be one
+ // of the other successors.
+ TheSwitch->setCondition(call);
+ TheSwitch->setDefaultDest(TheSwitch->getSuccessor(NumExitBlocks));
+ // Remove redundant case
+ TheSwitch->removeCase(SwitchInst::CaseIt(TheSwitch, NumExitBlocks-1));
+ break;
+ }
+
+ // Insert lifetime markers around the reloads of any output values. The
+ // allocas output values are stored in are only in-use in the codeRepl block.
+ insertLifetimeMarkersSurroundingCall(M, ReloadOutputs, ReloadOutputs, call);
+
+ return call;
+}
+
+void CodeExtractor::moveCodeToFunction(Function *newFunction) {
+ auto newFuncIt = newFunction->front().getIterator();
+ for (BasicBlock *Block : Blocks) {
+ // Delete the basic block from the old function, and the list of blocks
+ Block->removeFromParent();
+
+ // Insert this basic block into the new function
+ // Insert the original blocks after the entry block created
+ // for the new function. The entry block may be followed
+ // by a set of exit blocks at this point, but these exit
+ // blocks better be placed at the end of the new function.
+ newFuncIt = newFunction->insert(std::next(newFuncIt), Block);
+ }
+}
+
+void CodeExtractor::calculateNewCallTerminatorWeights(
+ BasicBlock *CodeReplacer,
+ DenseMap<BasicBlock *, BlockFrequency> &ExitWeights,
+ BranchProbabilityInfo *BPI) {
+ using Distribution = BlockFrequencyInfoImplBase::Distribution;
+ using BlockNode = BlockFrequencyInfoImplBase::BlockNode;
+
+ // Update the branch weights for the exit block.
+ Instruction *TI = CodeReplacer->getTerminator();
+ SmallVector<unsigned, 8> BranchWeights(TI->getNumSuccessors(), 0);
+
+ // Block Frequency distribution with dummy node.
+ Distribution BranchDist;
+
+ SmallVector<BranchProbability, 4> EdgeProbabilities(
+ TI->getNumSuccessors(), BranchProbability::getUnknown());
+
+ // Add each of the frequencies of the successors.
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i < e; ++i) {
+ BlockNode ExitNode(i);
+ uint64_t ExitFreq = ExitWeights[TI->getSuccessor(i)].getFrequency();
+ if (ExitFreq != 0)
+ BranchDist.addExit(ExitNode, ExitFreq);
+ else
+ EdgeProbabilities[i] = BranchProbability::getZero();
+ }
+
+ // Check for no total weight.
+ if (BranchDist.Total == 0) {
+ BPI->setEdgeProbability(CodeReplacer, EdgeProbabilities);
+ return;
+ }
+
+ // Normalize the distribution so that they can fit in unsigned.
+ BranchDist.normalize();
+
+ // Create normalized branch weights and set the metadata.
+ for (unsigned I = 0, E = BranchDist.Weights.size(); I < E; ++I) {
+ const auto &Weight = BranchDist.Weights[I];
+
+ // Get the weight and update the current BFI.
+ BranchWeights[Weight.TargetNode.Index] = Weight.Amount;
+ BranchProbability BP(Weight.Amount, BranchDist.Total);
+ EdgeProbabilities[Weight.TargetNode.Index] = BP;
+ }
+ BPI->setEdgeProbability(CodeReplacer, EdgeProbabilities);
+ TI->setMetadata(
+ LLVMContext::MD_prof,
+ MDBuilder(TI->getContext()).createBranchWeights(BranchWeights));
+}
+
+/// Erase debug info intrinsics which refer to values in \p F but aren't in
+/// \p F.
+static void eraseDebugIntrinsicsWithNonLocalRefs(Function &F) {
+ for (Instruction &I : instructions(F)) {
+ SmallVector<DbgVariableIntrinsic *, 4> DbgUsers;
+ findDbgUsers(DbgUsers, &I);
+ for (DbgVariableIntrinsic *DVI : DbgUsers)
+ if (DVI->getFunction() != &F)
+ DVI->eraseFromParent();
+ }
+}
+
+/// Fix up the debug info in the old and new functions by pointing line
+/// locations and debug intrinsics to the new subprogram scope, and by deleting
+/// intrinsics which point to values outside of the new function.
+static void fixupDebugInfoPostExtraction(Function &OldFunc, Function &NewFunc,
+ CallInst &TheCall) {
+ DISubprogram *OldSP = OldFunc.getSubprogram();
+ LLVMContext &Ctx = OldFunc.getContext();
+
+ if (!OldSP) {
+ // Erase any debug info the new function contains.
+ stripDebugInfo(NewFunc);
+ // Make sure the old function doesn't contain any non-local metadata refs.
+ eraseDebugIntrinsicsWithNonLocalRefs(NewFunc);
+ return;
+ }
+
+ // Create a subprogram for the new function. Leave out a description of the
+ // function arguments, as the parameters don't correspond to anything at the
+ // source level.
+ assert(OldSP->getUnit() && "Missing compile unit for subprogram");
+ DIBuilder DIB(*OldFunc.getParent(), /*AllowUnresolved=*/false,
+ OldSP->getUnit());
+ auto SPType =
+ DIB.createSubroutineType(DIB.getOrCreateTypeArray(std::nullopt));
+ DISubprogram::DISPFlags SPFlags = DISubprogram::SPFlagDefinition |
+ DISubprogram::SPFlagOptimized |
+ DISubprogram::SPFlagLocalToUnit;
+ auto NewSP = DIB.createFunction(
+ OldSP->getUnit(), NewFunc.getName(), NewFunc.getName(), OldSP->getFile(),
+ /*LineNo=*/0, SPType, /*ScopeLine=*/0, DINode::FlagZero, SPFlags);
+ NewFunc.setSubprogram(NewSP);
+
+ // Debug intrinsics in the new function need to be updated in one of two
+ // ways:
+ // 1) They need to be deleted, because they describe a value in the old
+ // function.
+ // 2) They need to point to fresh metadata, e.g. because they currently
+ // point to a variable in the wrong scope.
+ SmallDenseMap<DINode *, DINode *> RemappedMetadata;
+ SmallVector<Instruction *, 4> DebugIntrinsicsToDelete;
+ DenseMap<const MDNode *, MDNode *> Cache;
+ for (Instruction &I : instructions(NewFunc)) {
+ auto *DII = dyn_cast<DbgInfoIntrinsic>(&I);
+ if (!DII)
+ continue;
+
+ // Point the intrinsic to a fresh label within the new function if the
+ // intrinsic was not inlined from some other function.
+ if (auto *DLI = dyn_cast<DbgLabelInst>(&I)) {
+ if (DLI->getDebugLoc().getInlinedAt())
+ continue;
+ DILabel *OldLabel = DLI->getLabel();
+ DINode *&NewLabel = RemappedMetadata[OldLabel];
+ if (!NewLabel) {
+ DILocalScope *NewScope = DILocalScope::cloneScopeForSubprogram(
+ *OldLabel->getScope(), *NewSP, Ctx, Cache);
+ NewLabel = DILabel::get(Ctx, NewScope, OldLabel->getName(),
+ OldLabel->getFile(), OldLabel->getLine());
+ }
+ DLI->setArgOperand(0, MetadataAsValue::get(Ctx, NewLabel));
+ continue;
+ }
+
+ auto IsInvalidLocation = [&NewFunc](Value *Location) {
+ // Location is invalid if it isn't a constant or an instruction, or is an
+ // instruction but isn't in the new function.
+ if (!Location ||
+ (!isa<Constant>(Location) && !isa<Instruction>(Location)))
+ return true;
+ Instruction *LocationInst = dyn_cast<Instruction>(Location);
+ return LocationInst && LocationInst->getFunction() != &NewFunc;
+ };
+
+ auto *DVI = cast<DbgVariableIntrinsic>(DII);
+ // If any of the used locations are invalid, delete the intrinsic.
+ if (any_of(DVI->location_ops(), IsInvalidLocation)) {
+ DebugIntrinsicsToDelete.push_back(DVI);
+ continue;
+ }
+ // If the variable was in the scope of the old function, i.e. it was not
+ // inlined, point the intrinsic to a fresh variable within the new function.
+ if (!DVI->getDebugLoc().getInlinedAt()) {
+ DILocalVariable *OldVar = DVI->getVariable();
+ DINode *&NewVar = RemappedMetadata[OldVar];
+ if (!NewVar) {
+ DILocalScope *NewScope = DILocalScope::cloneScopeForSubprogram(
+ *OldVar->getScope(), *NewSP, Ctx, Cache);
+ NewVar = DIB.createAutoVariable(
+ NewScope, OldVar->getName(), OldVar->getFile(), OldVar->getLine(),
+ OldVar->getType(), /*AlwaysPreserve=*/false, DINode::FlagZero,
+ OldVar->getAlignInBits());
+ }
+ DVI->setVariable(cast<DILocalVariable>(NewVar));
+ }
+ }
+
+ for (auto *DII : DebugIntrinsicsToDelete)
+ DII->eraseFromParent();
+ DIB.finalizeSubprogram(NewSP);
+
+ // Fix up the scope information attached to the line locations in the new
+ // function.
+ for (Instruction &I : instructions(NewFunc)) {
+ if (const DebugLoc &DL = I.getDebugLoc())
+ I.setDebugLoc(
+ DebugLoc::replaceInlinedAtSubprogram(DL, *NewSP, Ctx, Cache));
+
+ // Loop info metadata may contain line locations. Fix them up.
+ auto updateLoopInfoLoc = [&Ctx, &Cache, NewSP](Metadata *MD) -> Metadata * {
+ if (auto *Loc = dyn_cast_or_null<DILocation>(MD))
+ return DebugLoc::replaceInlinedAtSubprogram(Loc, *NewSP, Ctx, Cache);
+ return MD;
+ };
+ updateLoopMetadataDebugLocations(I, updateLoopInfoLoc);
+ }
+ if (!TheCall.getDebugLoc())
+ TheCall.setDebugLoc(DILocation::get(Ctx, 0, 0, OldSP));
+
+ eraseDebugIntrinsicsWithNonLocalRefs(NewFunc);
+}
+
+Function *
+CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC) {
+ ValueSet Inputs, Outputs;
+ return extractCodeRegion(CEAC, Inputs, Outputs);
+}
+
+Function *
+CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC,
+ ValueSet &inputs, ValueSet &outputs) {
+ if (!isEligible())
+ return nullptr;
+
+ // Assumption: this is a single-entry code region, and the header is the first
+ // block in the region.
+ BasicBlock *header = *Blocks.begin();
+ Function *oldFunction = header->getParent();
+
+ // Calculate the entry frequency of the new function before we change the root
+ // block.
+ BlockFrequency EntryFreq;
+ if (BFI) {
+ assert(BPI && "Both BPI and BFI are required to preserve profile info");
+ for (BasicBlock *Pred : predecessors(header)) {
+ if (Blocks.count(Pred))
+ continue;
+ EntryFreq +=
+ BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, header);
+ }
+ }
+
+ // Remove CondGuardInsts that will be moved to the new function from the old
+ // function's assumption cache.
+ for (BasicBlock *Block : Blocks) {
+ for (Instruction &I : llvm::make_early_inc_range(*Block)) {
+ if (auto *CI = dyn_cast<CondGuardInst>(&I)) {
+ if (AC)
+ AC->unregisterAssumption(CI);
+ CI->eraseFromParent();
+ }
+ }
+ }
+
+ // If we have any return instructions in the region, split those blocks so
+ // that the return is not in the region.
+ splitReturnBlocks();
+
+ // Calculate the exit blocks for the extracted region and the total exit
+ // weights for each of those blocks.
+ DenseMap<BasicBlock *, BlockFrequency> ExitWeights;
+ SmallPtrSet<BasicBlock *, 1> ExitBlocks;
+ for (BasicBlock *Block : Blocks) {
+ for (BasicBlock *Succ : successors(Block)) {
+ if (!Blocks.count(Succ)) {
+ // Update the branch weight for this successor.
+ if (BFI) {
+ BlockFrequency &BF = ExitWeights[Succ];
+ BF += BFI->getBlockFreq(Block) * BPI->getEdgeProbability(Block, Succ);
+ }
+ ExitBlocks.insert(Succ);
+ }
+ }
+ }
+ NumExitBlocks = ExitBlocks.size();
+
+ for (BasicBlock *Block : Blocks) {
+ Instruction *TI = Block->getTerminator();
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
+ if (Blocks.count(TI->getSuccessor(i)))
+ continue;
+ BasicBlock *OldTarget = TI->getSuccessor(i);
+ OldTargets.push_back(OldTarget);
+ }
+ }
+
+ // If we have to split PHI nodes of the entry or exit blocks, do so now.
+ severSplitPHINodesOfEntry(header);
+ severSplitPHINodesOfExits(ExitBlocks);
+
+ // This takes place of the original loop
+ BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(),
+ "codeRepl", oldFunction,
+ header);
+
+ // The new function needs a root node because other nodes can branch to the
+ // head of the region, but the entry node of a function cannot have preds.
+ BasicBlock *newFuncRoot = BasicBlock::Create(header->getContext(),
+ "newFuncRoot");
+ auto *BranchI = BranchInst::Create(header);
+ // If the original function has debug info, we have to add a debug location
+ // to the new branch instruction from the artificial entry block.
+ // We use the debug location of the first instruction in the extracted
+ // blocks, as there is no other equivalent line in the source code.
+ if (oldFunction->getSubprogram()) {
+ any_of(Blocks, [&BranchI](const BasicBlock *BB) {
+ return any_of(*BB, [&BranchI](const Instruction &I) {
+ if (!I.getDebugLoc())
+ return false;
+ BranchI->setDebugLoc(I.getDebugLoc());
+ return true;
+ });
+ });
+ }
+ BranchI->insertInto(newFuncRoot, newFuncRoot->end());
+
+ ValueSet SinkingCands, HoistingCands;
+ BasicBlock *CommonExit = nullptr;
+ findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
+ assert(HoistingCands.empty() || CommonExit);
+
+ // Find inputs to, outputs from the code region.
+ findInputsOutputs(inputs, outputs, SinkingCands);
+
+ // Now sink all instructions which only have non-phi uses inside the region.
+ // Group the allocas at the start of the block, so that any bitcast uses of
+ // the allocas are well-defined.
+ AllocaInst *FirstSunkAlloca = nullptr;
+ for (auto *II : SinkingCands) {
+ if (auto *AI = dyn_cast<AllocaInst>(II)) {
+ AI->moveBefore(*newFuncRoot, newFuncRoot->getFirstInsertionPt());
+ if (!FirstSunkAlloca)
+ FirstSunkAlloca = AI;
+ }
+ }
+ assert((SinkingCands.empty() || FirstSunkAlloca) &&
+ "Did not expect a sink candidate without any allocas");
+ for (auto *II : SinkingCands) {
+ if (!isa<AllocaInst>(II)) {
+ cast<Instruction>(II)->moveAfter(FirstSunkAlloca);
+ }
+ }
+
+ if (!HoistingCands.empty()) {
+ auto *HoistToBlock = findOrCreateBlockForHoisting(CommonExit);
+ Instruction *TI = HoistToBlock->getTerminator();
+ for (auto *II : HoistingCands)
+ cast<Instruction>(II)->moveBefore(TI);
+ }
+
+ // Collect objects which are inputs to the extraction region and also
+ // referenced by lifetime start markers within it. The effects of these
+ // markers must be replicated in the calling function to prevent the stack
+ // coloring pass from merging slots which store input objects.
+ ValueSet LifetimesStart;
+ eraseLifetimeMarkersOnInputs(Blocks, SinkingCands, LifetimesStart);
+
+ // Construct new function based on inputs/outputs & add allocas for all defs.
+ Function *newFunction =
+ constructFunction(inputs, outputs, header, newFuncRoot, codeReplacer,
+ oldFunction, oldFunction->getParent());
+
+ // Update the entry count of the function.
+ if (BFI) {
+ auto Count = BFI->getProfileCountFromFreq(EntryFreq.getFrequency());
+ if (Count)
+ newFunction->setEntryCount(
+ ProfileCount(*Count, Function::PCT_Real)); // FIXME
+ BFI->setBlockFreq(codeReplacer, EntryFreq.getFrequency());
+ }
+
+ CallInst *TheCall =
+ emitCallAndSwitchStatement(newFunction, codeReplacer, inputs, outputs);
+
+ moveCodeToFunction(newFunction);
+
+ // Replicate the effects of any lifetime start/end markers which referenced
+ // input objects in the extraction region by placing markers around the call.
+ insertLifetimeMarkersSurroundingCall(
+ oldFunction->getParent(), LifetimesStart.getArrayRef(), {}, TheCall);
+
+ // Propagate personality info to the new function if there is one.
+ if (oldFunction->hasPersonalityFn())
+ newFunction->setPersonalityFn(oldFunction->getPersonalityFn());
+
+ // Update the branch weights for the exit block.
+ if (BFI && NumExitBlocks > 1)
+ calculateNewCallTerminatorWeights(codeReplacer, ExitWeights, BPI);
+
+ // Loop over all of the PHI nodes in the header and exit blocks, and change
+ // any references to the old incoming edge to be the new incoming edge.
+ for (BasicBlock::iterator I = header->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (!Blocks.count(PN->getIncomingBlock(i)))
+ PN->setIncomingBlock(i, newFuncRoot);
+ }
+
+ for (BasicBlock *ExitBB : ExitBlocks)
+ for (PHINode &PN : ExitBB->phis()) {
+ Value *IncomingCodeReplacerVal = nullptr;
+ for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
+ // Ignore incoming values from outside of the extracted region.
+ if (!Blocks.count(PN.getIncomingBlock(i)))
+ continue;
+
+ // Ensure that there is only one incoming value from codeReplacer.
+ if (!IncomingCodeReplacerVal) {
+ PN.setIncomingBlock(i, codeReplacer);
+ IncomingCodeReplacerVal = PN.getIncomingValue(i);
+ } else
+ assert(IncomingCodeReplacerVal == PN.getIncomingValue(i) &&
+ "PHI has two incompatbile incoming values from codeRepl");
+ }
+ }
+
+ fixupDebugInfoPostExtraction(*oldFunction, *newFunction, *TheCall);
+
+ // Mark the new function `noreturn` if applicable. Terminators which resume
+ // exception propagation are treated as returning instructions. This is to
+ // avoid inserting traps after calls to outlined functions which unwind.
+ bool doesNotReturn = none_of(*newFunction, [](const BasicBlock &BB) {
+ const Instruction *Term = BB.getTerminator();
+ return isa<ReturnInst>(Term) || isa<ResumeInst>(Term);
+ });
+ if (doesNotReturn)
+ newFunction->setDoesNotReturn();
+
+ LLVM_DEBUG(if (verifyFunction(*newFunction, &errs())) {
+ newFunction->dump();
+ report_fatal_error("verification of newFunction failed!");
+ });
+ LLVM_DEBUG(if (verifyFunction(*oldFunction))
+ report_fatal_error("verification of oldFunction failed!"));
+ LLVM_DEBUG(if (AC && verifyAssumptionCache(*oldFunction, *newFunction, AC))
+ report_fatal_error("Stale Asumption cache for old Function!"));
+ return newFunction;
+}
+
+bool CodeExtractor::verifyAssumptionCache(const Function &OldFunc,
+ const Function &NewFunc,
+ AssumptionCache *AC) {
+ for (auto AssumeVH : AC->assumptions()) {
+ auto *I = dyn_cast_or_null<CondGuardInst>(AssumeVH);
+ if (!I)
+ continue;
+
+ // There shouldn't be any llvm.assume intrinsics in the new function.
+ if (I->getFunction() != &OldFunc)
+ return true;
+
+ // There shouldn't be any stale affected values in the assumption cache
+ // that were previously in the old function, but that have now been moved
+ // to the new function.
+ for (auto AffectedValVH : AC->assumptionsFor(I->getOperand(0))) {
+ auto *AffectedCI = dyn_cast_or_null<CondGuardInst>(AffectedValVH);
+ if (!AffectedCI)
+ continue;
+ if (AffectedCI->getFunction() != &OldFunc)
+ return true;
+ auto *AssumedInst = cast<Instruction>(AffectedCI->getOperand(0));
+ if (AssumedInst->getFunction() != &OldFunc)
+ return true;
+ }
+ }
+ return false;
+}
+
+void CodeExtractor::excludeArgFromAggregate(Value *Arg) {
+ ExcludeArgsFromAggregate.insert(Arg);
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/CodeLayout.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/CodeLayout.cpp
new file mode 100644
index 0000000000..9eb3aff3ff
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/CodeLayout.cpp
@@ -0,0 +1,1014 @@
+//===- CodeLayout.cpp - Implementation of code layout algorithms ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// ExtTSP - layout of basic blocks with i-cache optimization.
+//
+// The algorithm tries to find a layout of nodes (basic blocks) of a given CFG
+// optimizing jump locality and thus processor I-cache utilization. This is
+// achieved via increasing the number of fall-through jumps and co-locating
+// frequently executed nodes together. The name follows the underlying
+// optimization problem, Extended-TSP, which is a generalization of classical
+// (maximum) Traveling Salesmen Problem.
+//
+// The algorithm is a greedy heuristic that works with chains (ordered lists)
+// of basic blocks. Initially all chains are isolated basic blocks. On every
+// iteration, we pick a pair of chains whose merging yields the biggest increase
+// in the ExtTSP score, which models how i-cache "friendly" a specific chain is.
+// A pair of chains giving the maximum gain is merged into a new chain. The
+// procedure stops when there is only one chain left, or when merging does not
+// increase ExtTSP. In the latter case, the remaining chains are sorted by
+// density in the decreasing order.
+//
+// An important aspect is the way two chains are merged. Unlike earlier
+// algorithms (e.g., based on the approach of Pettis-Hansen), two
+// chains, X and Y, are first split into three, X1, X2, and Y. Then we
+// consider all possible ways of gluing the three chains (e.g., X1YX2, X1X2Y,
+// X2X1Y, X2YX1, YX1X2, YX2X1) and choose the one producing the largest score.
+// This improves the quality of the final result (the search space is larger)
+// while keeping the implementation sufficiently fast.
+//
+// Reference:
+// * A. Newell and S. Pupyrev, Improved Basic Block Reordering,
+// IEEE Transactions on Computers, 2020
+// https://arxiv.org/abs/1809.04676
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/CodeLayout.h"
+#include "llvm/Support/CommandLine.h"
+
+#include <cmath>
+
+using namespace llvm;
+#define DEBUG_TYPE "code-layout"
+
+cl::opt<bool> EnableExtTspBlockPlacement(
+ "enable-ext-tsp-block-placement", cl::Hidden, cl::init(false),
+ cl::desc("Enable machine block placement based on the ext-tsp model, "
+ "optimizing I-cache utilization."));
+
+cl::opt<bool> ApplyExtTspWithoutProfile(
+ "ext-tsp-apply-without-profile",
+ cl::desc("Whether to apply ext-tsp placement for instances w/o profile"),
+ cl::init(true), cl::Hidden);
+
+// Algorithm-specific params. The values are tuned for the best performance
+// of large-scale front-end bound binaries.
+static cl::opt<double> ForwardWeightCond(
+ "ext-tsp-forward-weight-cond", cl::ReallyHidden, cl::init(0.1),
+ cl::desc("The weight of conditional forward jumps for ExtTSP value"));
+
+static cl::opt<double> ForwardWeightUncond(
+ "ext-tsp-forward-weight-uncond", cl::ReallyHidden, cl::init(0.1),
+ cl::desc("The weight of unconditional forward jumps for ExtTSP value"));
+
+static cl::opt<double> BackwardWeightCond(
+ "ext-tsp-backward-weight-cond", cl::ReallyHidden, cl::init(0.1),
+ cl::desc("The weight of conditonal backward jumps for ExtTSP value"));
+
+static cl::opt<double> BackwardWeightUncond(
+ "ext-tsp-backward-weight-uncond", cl::ReallyHidden, cl::init(0.1),
+ cl::desc("The weight of unconditonal backward jumps for ExtTSP value"));
+
+static cl::opt<double> FallthroughWeightCond(
+ "ext-tsp-fallthrough-weight-cond", cl::ReallyHidden, cl::init(1.0),
+ cl::desc("The weight of conditional fallthrough jumps for ExtTSP value"));
+
+static cl::opt<double> FallthroughWeightUncond(
+ "ext-tsp-fallthrough-weight-uncond", cl::ReallyHidden, cl::init(1.05),
+ cl::desc("The weight of unconditional fallthrough jumps for ExtTSP value"));
+
+static cl::opt<unsigned> ForwardDistance(
+ "ext-tsp-forward-distance", cl::ReallyHidden, cl::init(1024),
+ cl::desc("The maximum distance (in bytes) of a forward jump for ExtTSP"));
+
+static cl::opt<unsigned> BackwardDistance(
+ "ext-tsp-backward-distance", cl::ReallyHidden, cl::init(640),
+ cl::desc("The maximum distance (in bytes) of a backward jump for ExtTSP"));
+
+// The maximum size of a chain created by the algorithm. The size is bounded
+// so that the algorithm can efficiently process extremely large instance.
+static cl::opt<unsigned>
+ MaxChainSize("ext-tsp-max-chain-size", cl::ReallyHidden, cl::init(4096),
+ cl::desc("The maximum size of a chain to create."));
+
+// The maximum size of a chain for splitting. Larger values of the threshold
+// may yield better quality at the cost of worsen run-time.
+static cl::opt<unsigned> ChainSplitThreshold(
+ "ext-tsp-chain-split-threshold", cl::ReallyHidden, cl::init(128),
+ cl::desc("The maximum size of a chain to apply splitting"));
+
+// The option enables splitting (large) chains along in-coming and out-going
+// jumps. This typically results in a better quality.
+static cl::opt<bool> EnableChainSplitAlongJumps(
+ "ext-tsp-enable-chain-split-along-jumps", cl::ReallyHidden, cl::init(true),
+ cl::desc("The maximum size of a chain to apply splitting"));
+
+namespace {
+
+// Epsilon for comparison of doubles.
+constexpr double EPS = 1e-8;
+
+// Compute the Ext-TSP score for a given jump.
+double jumpExtTSPScore(uint64_t JumpDist, uint64_t JumpMaxDist, uint64_t Count,
+ double Weight) {
+ if (JumpDist > JumpMaxDist)
+ return 0;
+ double Prob = 1.0 - static_cast<double>(JumpDist) / JumpMaxDist;
+ return Weight * Prob * Count;
+}
+
+// Compute the Ext-TSP score for a jump between a given pair of blocks,
+// using their sizes, (estimated) addresses and the jump execution count.
+double extTSPScore(uint64_t SrcAddr, uint64_t SrcSize, uint64_t DstAddr,
+ uint64_t Count, bool IsConditional) {
+ // Fallthrough
+ if (SrcAddr + SrcSize == DstAddr) {
+ return jumpExtTSPScore(0, 1, Count,
+ IsConditional ? FallthroughWeightCond
+ : FallthroughWeightUncond);
+ }
+ // Forward
+ if (SrcAddr + SrcSize < DstAddr) {
+ const uint64_t Dist = DstAddr - (SrcAddr + SrcSize);
+ return jumpExtTSPScore(Dist, ForwardDistance, Count,
+ IsConditional ? ForwardWeightCond
+ : ForwardWeightUncond);
+ }
+ // Backward
+ const uint64_t Dist = SrcAddr + SrcSize - DstAddr;
+ return jumpExtTSPScore(Dist, BackwardDistance, Count,
+ IsConditional ? BackwardWeightCond
+ : BackwardWeightUncond);
+}
+
+/// A type of merging two chains, X and Y. The former chain is split into
+/// X1 and X2 and then concatenated with Y in the order specified by the type.
+enum class MergeTypeTy : int { X_Y, X1_Y_X2, Y_X2_X1, X2_X1_Y };
+
+/// The gain of merging two chains, that is, the Ext-TSP score of the merge
+/// together with the corresponfiding merge 'type' and 'offset'.
+class MergeGainTy {
+public:
+ explicit MergeGainTy() = default;
+ explicit MergeGainTy(double Score, size_t MergeOffset, MergeTypeTy MergeType)
+ : Score(Score), MergeOffset(MergeOffset), MergeType(MergeType) {}
+
+ double score() const { return Score; }
+
+ size_t mergeOffset() const { return MergeOffset; }
+
+ MergeTypeTy mergeType() const { return MergeType; }
+
+ // Returns 'true' iff Other is preferred over this.
+ bool operator<(const MergeGainTy &Other) const {
+ return (Other.Score > EPS && Other.Score > Score + EPS);
+ }
+
+ // Update the current gain if Other is preferred over this.
+ void updateIfLessThan(const MergeGainTy &Other) {
+ if (*this < Other)
+ *this = Other;
+ }
+
+private:
+ double Score{-1.0};
+ size_t MergeOffset{0};
+ MergeTypeTy MergeType{MergeTypeTy::X_Y};
+};
+
+class Jump;
+class Chain;
+class ChainEdge;
+
+/// A node in the graph, typically corresponding to a basic block in CFG.
+class Block {
+public:
+ Block(const Block &) = delete;
+ Block(Block &&) = default;
+ Block &operator=(const Block &) = delete;
+ Block &operator=(Block &&) = default;
+
+ // The original index of the block in CFG.
+ size_t Index{0};
+ // The index of the block in the current chain.
+ size_t CurIndex{0};
+ // Size of the block in the binary.
+ uint64_t Size{0};
+ // Execution count of the block in the profile data.
+ uint64_t ExecutionCount{0};
+ // Current chain of the node.
+ Chain *CurChain{nullptr};
+ // An offset of the block in the current chain.
+ mutable uint64_t EstimatedAddr{0};
+ // Forced successor of the block in CFG.
+ Block *ForcedSucc{nullptr};
+ // Forced predecessor of the block in CFG.
+ Block *ForcedPred{nullptr};
+ // Outgoing jumps from the block.
+ std::vector<Jump *> OutJumps;
+ // Incoming jumps to the block.
+ std::vector<Jump *> InJumps;
+
+public:
+ explicit Block(size_t Index, uint64_t Size, uint64_t EC)
+ : Index(Index), Size(Size), ExecutionCount(EC) {}
+ bool isEntry() const { return Index == 0; }
+};
+
+/// An arc in the graph, typically corresponding to a jump between two blocks.
+class Jump {
+public:
+ Jump(const Jump &) = delete;
+ Jump(Jump &&) = default;
+ Jump &operator=(const Jump &) = delete;
+ Jump &operator=(Jump &&) = default;
+
+ // Source block of the jump.
+ Block *Source;
+ // Target block of the jump.
+ Block *Target;
+ // Execution count of the arc in the profile data.
+ uint64_t ExecutionCount{0};
+ // Whether the jump corresponds to a conditional branch.
+ bool IsConditional{false};
+
+public:
+ explicit Jump(Block *Source, Block *Target, uint64_t ExecutionCount)
+ : Source(Source), Target(Target), ExecutionCount(ExecutionCount) {}
+};
+
+/// A chain (ordered sequence) of blocks.
+class Chain {
+public:
+ Chain(const Chain &) = delete;
+ Chain(Chain &&) = default;
+ Chain &operator=(const Chain &) = delete;
+ Chain &operator=(Chain &&) = default;
+
+ explicit Chain(uint64_t Id, Block *Block)
+ : Id(Id), Score(0), Blocks(1, Block) {}
+
+ uint64_t id() const { return Id; }
+
+ bool isEntry() const { return Blocks[0]->Index == 0; }
+
+ bool isCold() const {
+ for (auto *Block : Blocks) {
+ if (Block->ExecutionCount > 0)
+ return false;
+ }
+ return true;
+ }
+
+ double score() const { return Score; }
+
+ void setScore(double NewScore) { Score = NewScore; }
+
+ const std::vector<Block *> &blocks() const { return Blocks; }
+
+ size_t numBlocks() const { return Blocks.size(); }
+
+ const std::vector<std::pair<Chain *, ChainEdge *>> &edges() const {
+ return Edges;
+ }
+
+ ChainEdge *getEdge(Chain *Other) const {
+ for (auto It : Edges) {
+ if (It.first == Other)
+ return It.second;
+ }
+ return nullptr;
+ }
+
+ void removeEdge(Chain *Other) {
+ auto It = Edges.begin();
+ while (It != Edges.end()) {
+ if (It->first == Other) {
+ Edges.erase(It);
+ return;
+ }
+ It++;
+ }
+ }
+
+ void addEdge(Chain *Other, ChainEdge *Edge) {
+ Edges.push_back(std::make_pair(Other, Edge));
+ }
+
+ void merge(Chain *Other, const std::vector<Block *> &MergedBlocks) {
+ Blocks = MergedBlocks;
+ // Update the block's chains
+ for (size_t Idx = 0; Idx < Blocks.size(); Idx++) {
+ Blocks[Idx]->CurChain = this;
+ Blocks[Idx]->CurIndex = Idx;
+ }
+ }
+
+ void mergeEdges(Chain *Other);
+
+ void clear() {
+ Blocks.clear();
+ Blocks.shrink_to_fit();
+ Edges.clear();
+ Edges.shrink_to_fit();
+ }
+
+private:
+ // Unique chain identifier.
+ uint64_t Id;
+ // Cached ext-tsp score for the chain.
+ double Score;
+ // Blocks of the chain.
+ std::vector<Block *> Blocks;
+ // Adjacent chains and corresponding edges (lists of jumps).
+ std::vector<std::pair<Chain *, ChainEdge *>> Edges;
+};
+
+/// An edge in CFG representing jumps between two chains.
+/// When blocks are merged into chains, the edges are combined too so that
+/// there is always at most one edge between a pair of chains
+class ChainEdge {
+public:
+ ChainEdge(const ChainEdge &) = delete;
+ ChainEdge(ChainEdge &&) = default;
+ ChainEdge &operator=(const ChainEdge &) = delete;
+ ChainEdge &operator=(ChainEdge &&) = default;
+
+ explicit ChainEdge(Jump *Jump)
+ : SrcChain(Jump->Source->CurChain), DstChain(Jump->Target->CurChain),
+ Jumps(1, Jump) {}
+
+ const std::vector<Jump *> &jumps() const { return Jumps; }
+
+ void changeEndpoint(Chain *From, Chain *To) {
+ if (From == SrcChain)
+ SrcChain = To;
+ if (From == DstChain)
+ DstChain = To;
+ }
+
+ void appendJump(Jump *Jump) { Jumps.push_back(Jump); }
+
+ void moveJumps(ChainEdge *Other) {
+ Jumps.insert(Jumps.end(), Other->Jumps.begin(), Other->Jumps.end());
+ Other->Jumps.clear();
+ Other->Jumps.shrink_to_fit();
+ }
+
+ bool hasCachedMergeGain(Chain *Src, Chain *Dst) const {
+ return Src == SrcChain ? CacheValidForward : CacheValidBackward;
+ }
+
+ MergeGainTy getCachedMergeGain(Chain *Src, Chain *Dst) const {
+ return Src == SrcChain ? CachedGainForward : CachedGainBackward;
+ }
+
+ void setCachedMergeGain(Chain *Src, Chain *Dst, MergeGainTy MergeGain) {
+ if (Src == SrcChain) {
+ CachedGainForward = MergeGain;
+ CacheValidForward = true;
+ } else {
+ CachedGainBackward = MergeGain;
+ CacheValidBackward = true;
+ }
+ }
+
+ void invalidateCache() {
+ CacheValidForward = false;
+ CacheValidBackward = false;
+ }
+
+private:
+ // Source chain.
+ Chain *SrcChain{nullptr};
+ // Destination chain.
+ Chain *DstChain{nullptr};
+ // Original jumps in the binary with correspinding execution counts.
+ std::vector<Jump *> Jumps;
+ // Cached ext-tsp value for merging the pair of chains.
+ // Since the gain of merging (Src, Dst) and (Dst, Src) might be different,
+ // we store both values here.
+ MergeGainTy CachedGainForward;
+ MergeGainTy CachedGainBackward;
+ // Whether the cached value must be recomputed.
+ bool CacheValidForward{false};
+ bool CacheValidBackward{false};
+};
+
+void Chain::mergeEdges(Chain *Other) {
+ assert(this != Other && "cannot merge a chain with itself");
+
+ // Update edges adjacent to chain Other
+ for (auto EdgeIt : Other->Edges) {
+ Chain *DstChain = EdgeIt.first;
+ ChainEdge *DstEdge = EdgeIt.second;
+ Chain *TargetChain = DstChain == Other ? this : DstChain;
+ ChainEdge *CurEdge = getEdge(TargetChain);
+ if (CurEdge == nullptr) {
+ DstEdge->changeEndpoint(Other, this);
+ this->addEdge(TargetChain, DstEdge);
+ if (DstChain != this && DstChain != Other) {
+ DstChain->addEdge(this, DstEdge);
+ }
+ } else {
+ CurEdge->moveJumps(DstEdge);
+ }
+ // Cleanup leftover edge
+ if (DstChain != Other) {
+ DstChain->removeEdge(Other);
+ }
+ }
+}
+
+using BlockIter = std::vector<Block *>::const_iterator;
+
+/// A wrapper around three chains of blocks; it is used to avoid extra
+/// instantiation of the vectors.
+class MergedChain {
+public:
+ MergedChain(BlockIter Begin1, BlockIter End1, BlockIter Begin2 = BlockIter(),
+ BlockIter End2 = BlockIter(), BlockIter Begin3 = BlockIter(),
+ BlockIter End3 = BlockIter())
+ : Begin1(Begin1), End1(End1), Begin2(Begin2), End2(End2), Begin3(Begin3),
+ End3(End3) {}
+
+ template <typename F> void forEach(const F &Func) const {
+ for (auto It = Begin1; It != End1; It++)
+ Func(*It);
+ for (auto It = Begin2; It != End2; It++)
+ Func(*It);
+ for (auto It = Begin3; It != End3; It++)
+ Func(*It);
+ }
+
+ std::vector<Block *> getBlocks() const {
+ std::vector<Block *> Result;
+ Result.reserve(std::distance(Begin1, End1) + std::distance(Begin2, End2) +
+ std::distance(Begin3, End3));
+ Result.insert(Result.end(), Begin1, End1);
+ Result.insert(Result.end(), Begin2, End2);
+ Result.insert(Result.end(), Begin3, End3);
+ return Result;
+ }
+
+ const Block *getFirstBlock() const { return *Begin1; }
+
+private:
+ BlockIter Begin1;
+ BlockIter End1;
+ BlockIter Begin2;
+ BlockIter End2;
+ BlockIter Begin3;
+ BlockIter End3;
+};
+
+/// The implementation of the ExtTSP algorithm.
+class ExtTSPImpl {
+ using EdgeT = std::pair<uint64_t, uint64_t>;
+ using EdgeCountMap = std::vector<std::pair<EdgeT, uint64_t>>;
+
+public:
+ ExtTSPImpl(size_t NumNodes, const std::vector<uint64_t> &NodeSizes,
+ const std::vector<uint64_t> &NodeCounts,
+ const EdgeCountMap &EdgeCounts)
+ : NumNodes(NumNodes) {
+ initialize(NodeSizes, NodeCounts, EdgeCounts);
+ }
+
+ /// Run the algorithm and return an optimized ordering of blocks.
+ void run(std::vector<uint64_t> &Result) {
+ // Pass 1: Merge blocks with their mutually forced successors
+ mergeForcedPairs();
+
+ // Pass 2: Merge pairs of chains while improving the ExtTSP objective
+ mergeChainPairs();
+
+ // Pass 3: Merge cold blocks to reduce code size
+ mergeColdChains();
+
+ // Collect blocks from all chains
+ concatChains(Result);
+ }
+
+private:
+ /// Initialize the algorithm's data structures.
+ void initialize(const std::vector<uint64_t> &NodeSizes,
+ const std::vector<uint64_t> &NodeCounts,
+ const EdgeCountMap &EdgeCounts) {
+ // Initialize blocks
+ AllBlocks.reserve(NumNodes);
+ for (uint64_t Node = 0; Node < NumNodes; Node++) {
+ uint64_t Size = std::max<uint64_t>(NodeSizes[Node], 1ULL);
+ uint64_t ExecutionCount = NodeCounts[Node];
+ // The execution count of the entry block is set to at least 1
+ if (Node == 0 && ExecutionCount == 0)
+ ExecutionCount = 1;
+ AllBlocks.emplace_back(Node, Size, ExecutionCount);
+ }
+
+ // Initialize jumps between blocks
+ SuccNodes.resize(NumNodes);
+ PredNodes.resize(NumNodes);
+ std::vector<uint64_t> OutDegree(NumNodes, 0);
+ AllJumps.reserve(EdgeCounts.size());
+ for (auto It : EdgeCounts) {
+ auto Pred = It.first.first;
+ auto Succ = It.first.second;
+ OutDegree[Pred]++;
+ // Ignore self-edges
+ if (Pred == Succ)
+ continue;
+
+ SuccNodes[Pred].push_back(Succ);
+ PredNodes[Succ].push_back(Pred);
+ auto ExecutionCount = It.second;
+ if (ExecutionCount > 0) {
+ auto &Block = AllBlocks[Pred];
+ auto &SuccBlock = AllBlocks[Succ];
+ AllJumps.emplace_back(&Block, &SuccBlock, ExecutionCount);
+ SuccBlock.InJumps.push_back(&AllJumps.back());
+ Block.OutJumps.push_back(&AllJumps.back());
+ }
+ }
+ for (auto &Jump : AllJumps) {
+ assert(OutDegree[Jump.Source->Index] > 0);
+ Jump.IsConditional = OutDegree[Jump.Source->Index] > 1;
+ }
+
+ // Initialize chains
+ AllChains.reserve(NumNodes);
+ HotChains.reserve(NumNodes);
+ for (Block &Block : AllBlocks) {
+ AllChains.emplace_back(Block.Index, &Block);
+ Block.CurChain = &AllChains.back();
+ if (Block.ExecutionCount > 0) {
+ HotChains.push_back(&AllChains.back());
+ }
+ }
+
+ // Initialize chain edges
+ AllEdges.reserve(AllJumps.size());
+ for (Block &Block : AllBlocks) {
+ for (auto &Jump : Block.OutJumps) {
+ auto SuccBlock = Jump->Target;
+ ChainEdge *CurEdge = Block.CurChain->getEdge(SuccBlock->CurChain);
+ // this edge is already present in the graph
+ if (CurEdge != nullptr) {
+ assert(SuccBlock->CurChain->getEdge(Block.CurChain) != nullptr);
+ CurEdge->appendJump(Jump);
+ continue;
+ }
+ // this is a new edge
+ AllEdges.emplace_back(Jump);
+ Block.CurChain->addEdge(SuccBlock->CurChain, &AllEdges.back());
+ SuccBlock->CurChain->addEdge(Block.CurChain, &AllEdges.back());
+ }
+ }
+ }
+
+ /// For a pair of blocks, A and B, block B is the forced successor of A,
+ /// if (i) all jumps (based on profile) from A goes to B and (ii) all jumps
+ /// to B are from A. Such blocks should be adjacent in the optimal ordering;
+ /// the method finds and merges such pairs of blocks.
+ void mergeForcedPairs() {
+ // Find fallthroughs based on edge weights
+ for (auto &Block : AllBlocks) {
+ if (SuccNodes[Block.Index].size() == 1 &&
+ PredNodes[SuccNodes[Block.Index][0]].size() == 1 &&
+ SuccNodes[Block.Index][0] != 0) {
+ size_t SuccIndex = SuccNodes[Block.Index][0];
+ Block.ForcedSucc = &AllBlocks[SuccIndex];
+ AllBlocks[SuccIndex].ForcedPred = &Block;
+ }
+ }
+
+ // There might be 'cycles' in the forced dependencies, since profile
+ // data isn't 100% accurate. Typically this is observed in loops, when the
+ // loop edges are the hottest successors for the basic blocks of the loop.
+ // Break the cycles by choosing the block with the smallest index as the
+ // head. This helps to keep the original order of the loops, which likely
+ // have already been rotated in the optimized manner.
+ for (auto &Block : AllBlocks) {
+ if (Block.ForcedSucc == nullptr || Block.ForcedPred == nullptr)
+ continue;
+
+ auto SuccBlock = Block.ForcedSucc;
+ while (SuccBlock != nullptr && SuccBlock != &Block) {
+ SuccBlock = SuccBlock->ForcedSucc;
+ }
+ if (SuccBlock == nullptr)
+ continue;
+ // Break the cycle
+ AllBlocks[Block.ForcedPred->Index].ForcedSucc = nullptr;
+ Block.ForcedPred = nullptr;
+ }
+
+ // Merge blocks with their fallthrough successors
+ for (auto &Block : AllBlocks) {
+ if (Block.ForcedPred == nullptr && Block.ForcedSucc != nullptr) {
+ auto CurBlock = &Block;
+ while (CurBlock->ForcedSucc != nullptr) {
+ const auto NextBlock = CurBlock->ForcedSucc;
+ mergeChains(Block.CurChain, NextBlock->CurChain, 0, MergeTypeTy::X_Y);
+ CurBlock = NextBlock;
+ }
+ }
+ }
+ }
+
+ /// Merge pairs of chains while improving the ExtTSP objective.
+ void mergeChainPairs() {
+ /// Deterministically compare pairs of chains
+ auto compareChainPairs = [](const Chain *A1, const Chain *B1,
+ const Chain *A2, const Chain *B2) {
+ if (A1 != A2)
+ return A1->id() < A2->id();
+ return B1->id() < B2->id();
+ };
+
+ while (HotChains.size() > 1) {
+ Chain *BestChainPred = nullptr;
+ Chain *BestChainSucc = nullptr;
+ auto BestGain = MergeGainTy();
+ // Iterate over all pairs of chains
+ for (Chain *ChainPred : HotChains) {
+ // Get candidates for merging with the current chain
+ for (auto EdgeIter : ChainPred->edges()) {
+ Chain *ChainSucc = EdgeIter.first;
+ class ChainEdge *ChainEdge = EdgeIter.second;
+ // Ignore loop edges
+ if (ChainPred == ChainSucc)
+ continue;
+
+ // Stop early if the combined chain violates the maximum allowed size
+ if (ChainPred->numBlocks() + ChainSucc->numBlocks() >= MaxChainSize)
+ continue;
+
+ // Compute the gain of merging the two chains
+ MergeGainTy CurGain =
+ getBestMergeGain(ChainPred, ChainSucc, ChainEdge);
+ if (CurGain.score() <= EPS)
+ continue;
+
+ if (BestGain < CurGain ||
+ (std::abs(CurGain.score() - BestGain.score()) < EPS &&
+ compareChainPairs(ChainPred, ChainSucc, BestChainPred,
+ BestChainSucc))) {
+ BestGain = CurGain;
+ BestChainPred = ChainPred;
+ BestChainSucc = ChainSucc;
+ }
+ }
+ }
+
+ // Stop merging when there is no improvement
+ if (BestGain.score() <= EPS)
+ break;
+
+ // Merge the best pair of chains
+ mergeChains(BestChainPred, BestChainSucc, BestGain.mergeOffset(),
+ BestGain.mergeType());
+ }
+ }
+
+ /// Merge remaining blocks into chains w/o taking jump counts into
+ /// consideration. This allows to maintain the original block order in the
+ /// absense of profile data
+ void mergeColdChains() {
+ for (size_t SrcBB = 0; SrcBB < NumNodes; SrcBB++) {
+ // Iterating in reverse order to make sure original fallthrough jumps are
+ // merged first; this might be beneficial for code size.
+ size_t NumSuccs = SuccNodes[SrcBB].size();
+ for (size_t Idx = 0; Idx < NumSuccs; Idx++) {
+ auto DstBB = SuccNodes[SrcBB][NumSuccs - Idx - 1];
+ auto SrcChain = AllBlocks[SrcBB].CurChain;
+ auto DstChain = AllBlocks[DstBB].CurChain;
+ if (SrcChain != DstChain && !DstChain->isEntry() &&
+ SrcChain->blocks().back()->Index == SrcBB &&
+ DstChain->blocks().front()->Index == DstBB &&
+ SrcChain->isCold() == DstChain->isCold()) {
+ mergeChains(SrcChain, DstChain, 0, MergeTypeTy::X_Y);
+ }
+ }
+ }
+ }
+
+ /// Compute the Ext-TSP score for a given block order and a list of jumps.
+ double extTSPScore(const MergedChain &MergedBlocks,
+ const std::vector<Jump *> &Jumps) const {
+ if (Jumps.empty())
+ return 0.0;
+ uint64_t CurAddr = 0;
+ MergedBlocks.forEach([&](const Block *BB) {
+ BB->EstimatedAddr = CurAddr;
+ CurAddr += BB->Size;
+ });
+
+ double Score = 0;
+ for (auto &Jump : Jumps) {
+ const Block *SrcBlock = Jump->Source;
+ const Block *DstBlock = Jump->Target;
+ Score += ::extTSPScore(SrcBlock->EstimatedAddr, SrcBlock->Size,
+ DstBlock->EstimatedAddr, Jump->ExecutionCount,
+ Jump->IsConditional);
+ }
+ return Score;
+ }
+
+ /// Compute the gain of merging two chains.
+ ///
+ /// The function considers all possible ways of merging two chains and
+ /// computes the one having the largest increase in ExtTSP objective. The
+ /// result is a pair with the first element being the gain and the second
+ /// element being the corresponding merging type.
+ MergeGainTy getBestMergeGain(Chain *ChainPred, Chain *ChainSucc,
+ ChainEdge *Edge) const {
+ if (Edge->hasCachedMergeGain(ChainPred, ChainSucc)) {
+ return Edge->getCachedMergeGain(ChainPred, ChainSucc);
+ }
+
+ // Precompute jumps between ChainPred and ChainSucc
+ auto Jumps = Edge->jumps();
+ ChainEdge *EdgePP = ChainPred->getEdge(ChainPred);
+ if (EdgePP != nullptr) {
+ Jumps.insert(Jumps.end(), EdgePP->jumps().begin(), EdgePP->jumps().end());
+ }
+ assert(!Jumps.empty() && "trying to merge chains w/o jumps");
+
+ // The object holds the best currently chosen gain of merging the two chains
+ MergeGainTy Gain = MergeGainTy();
+
+ /// Given a merge offset and a list of merge types, try to merge two chains
+ /// and update Gain with a better alternative
+ auto tryChainMerging = [&](size_t Offset,
+ const std::vector<MergeTypeTy> &MergeTypes) {
+ // Skip merging corresponding to concatenation w/o splitting
+ if (Offset == 0 || Offset == ChainPred->blocks().size())
+ return;
+ // Skip merging if it breaks Forced successors
+ auto BB = ChainPred->blocks()[Offset - 1];
+ if (BB->ForcedSucc != nullptr)
+ return;
+ // Apply the merge, compute the corresponding gain, and update the best
+ // value, if the merge is beneficial
+ for (const auto &MergeType : MergeTypes) {
+ Gain.updateIfLessThan(
+ computeMergeGain(ChainPred, ChainSucc, Jumps, Offset, MergeType));
+ }
+ };
+
+ // Try to concatenate two chains w/o splitting
+ Gain.updateIfLessThan(
+ computeMergeGain(ChainPred, ChainSucc, Jumps, 0, MergeTypeTy::X_Y));
+
+ if (EnableChainSplitAlongJumps) {
+ // Attach (a part of) ChainPred before the first block of ChainSucc
+ for (auto &Jump : ChainSucc->blocks().front()->InJumps) {
+ const auto SrcBlock = Jump->Source;
+ if (SrcBlock->CurChain != ChainPred)
+ continue;
+ size_t Offset = SrcBlock->CurIndex + 1;
+ tryChainMerging(Offset, {MergeTypeTy::X1_Y_X2, MergeTypeTy::X2_X1_Y});
+ }
+
+ // Attach (a part of) ChainPred after the last block of ChainSucc
+ for (auto &Jump : ChainSucc->blocks().back()->OutJumps) {
+ const auto DstBlock = Jump->Source;
+ if (DstBlock->CurChain != ChainPred)
+ continue;
+ size_t Offset = DstBlock->CurIndex;
+ tryChainMerging(Offset, {MergeTypeTy::X1_Y_X2, MergeTypeTy::Y_X2_X1});
+ }
+ }
+
+ // Try to break ChainPred in various ways and concatenate with ChainSucc
+ if (ChainPred->blocks().size() <= ChainSplitThreshold) {
+ for (size_t Offset = 1; Offset < ChainPred->blocks().size(); Offset++) {
+ // Try to split the chain in different ways. In practice, applying
+ // X2_Y_X1 merging is almost never provides benefits; thus, we exclude
+ // it from consideration to reduce the search space
+ tryChainMerging(Offset, {MergeTypeTy::X1_Y_X2, MergeTypeTy::Y_X2_X1,
+ MergeTypeTy::X2_X1_Y});
+ }
+ }
+ Edge->setCachedMergeGain(ChainPred, ChainSucc, Gain);
+ return Gain;
+ }
+
+ /// Compute the score gain of merging two chains, respecting a given
+ /// merge 'type' and 'offset'.
+ ///
+ /// The two chains are not modified in the method.
+ MergeGainTy computeMergeGain(const Chain *ChainPred, const Chain *ChainSucc,
+ const std::vector<Jump *> &Jumps,
+ size_t MergeOffset,
+ MergeTypeTy MergeType) const {
+ auto MergedBlocks = mergeBlocks(ChainPred->blocks(), ChainSucc->blocks(),
+ MergeOffset, MergeType);
+
+ // Do not allow a merge that does not preserve the original entry block
+ if ((ChainPred->isEntry() || ChainSucc->isEntry()) &&
+ !MergedBlocks.getFirstBlock()->isEntry())
+ return MergeGainTy();
+
+ // The gain for the new chain
+ auto NewGainScore = extTSPScore(MergedBlocks, Jumps) - ChainPred->score();
+ return MergeGainTy(NewGainScore, MergeOffset, MergeType);
+ }
+
+ /// Merge two chains of blocks respecting a given merge 'type' and 'offset'.
+ ///
+ /// If MergeType == 0, then the result is a concatenation of two chains.
+ /// Otherwise, the first chain is cut into two sub-chains at the offset,
+ /// and merged using all possible ways of concatenating three chains.
+ MergedChain mergeBlocks(const std::vector<Block *> &X,
+ const std::vector<Block *> &Y, size_t MergeOffset,
+ MergeTypeTy MergeType) const {
+ // Split the first chain, X, into X1 and X2
+ BlockIter BeginX1 = X.begin();
+ BlockIter EndX1 = X.begin() + MergeOffset;
+ BlockIter BeginX2 = X.begin() + MergeOffset;
+ BlockIter EndX2 = X.end();
+ BlockIter BeginY = Y.begin();
+ BlockIter EndY = Y.end();
+
+ // Construct a new chain from the three existing ones
+ switch (MergeType) {
+ case MergeTypeTy::X_Y:
+ return MergedChain(BeginX1, EndX2, BeginY, EndY);
+ case MergeTypeTy::X1_Y_X2:
+ return MergedChain(BeginX1, EndX1, BeginY, EndY, BeginX2, EndX2);
+ case MergeTypeTy::Y_X2_X1:
+ return MergedChain(BeginY, EndY, BeginX2, EndX2, BeginX1, EndX1);
+ case MergeTypeTy::X2_X1_Y:
+ return MergedChain(BeginX2, EndX2, BeginX1, EndX1, BeginY, EndY);
+ }
+ llvm_unreachable("unexpected chain merge type");
+ }
+
+ /// Merge chain From into chain Into, update the list of active chains,
+ /// adjacency information, and the corresponding cached values.
+ void mergeChains(Chain *Into, Chain *From, size_t MergeOffset,
+ MergeTypeTy MergeType) {
+ assert(Into != From && "a chain cannot be merged with itself");
+
+ // Merge the blocks
+ MergedChain MergedBlocks =
+ mergeBlocks(Into->blocks(), From->blocks(), MergeOffset, MergeType);
+ Into->merge(From, MergedBlocks.getBlocks());
+ Into->mergeEdges(From);
+ From->clear();
+
+ // Update cached ext-tsp score for the new chain
+ ChainEdge *SelfEdge = Into->getEdge(Into);
+ if (SelfEdge != nullptr) {
+ MergedBlocks = MergedChain(Into->blocks().begin(), Into->blocks().end());
+ Into->setScore(extTSPScore(MergedBlocks, SelfEdge->jumps()));
+ }
+
+ // Remove chain From from the list of active chains
+ llvm::erase_value(HotChains, From);
+
+ // Invalidate caches
+ for (auto EdgeIter : Into->edges()) {
+ EdgeIter.second->invalidateCache();
+ }
+ }
+
+ /// Concatenate all chains into a final order of blocks.
+ void concatChains(std::vector<uint64_t> &Order) {
+ // Collect chains and calculate some stats for their sorting
+ std::vector<Chain *> SortedChains;
+ DenseMap<const Chain *, double> ChainDensity;
+ for (auto &Chain : AllChains) {
+ if (!Chain.blocks().empty()) {
+ SortedChains.push_back(&Chain);
+ // Using doubles to avoid overflow of ExecutionCount
+ double Size = 0;
+ double ExecutionCount = 0;
+ for (auto *Block : Chain.blocks()) {
+ Size += static_cast<double>(Block->Size);
+ ExecutionCount += static_cast<double>(Block->ExecutionCount);
+ }
+ assert(Size > 0 && "a chain of zero size");
+ ChainDensity[&Chain] = ExecutionCount / Size;
+ }
+ }
+
+ // Sorting chains by density in the decreasing order
+ std::stable_sort(SortedChains.begin(), SortedChains.end(),
+ [&](const Chain *C1, const Chain *C2) {
+ // Make sure the original entry block is at the
+ // beginning of the order
+ if (C1->isEntry() != C2->isEntry()) {
+ return C1->isEntry();
+ }
+
+ const double D1 = ChainDensity[C1];
+ const double D2 = ChainDensity[C2];
+ // Compare by density and break ties by chain identifiers
+ return (D1 != D2) ? (D1 > D2) : (C1->id() < C2->id());
+ });
+
+ // Collect the blocks in the order specified by their chains
+ Order.reserve(NumNodes);
+ for (Chain *Chain : SortedChains) {
+ for (Block *Block : Chain->blocks()) {
+ Order.push_back(Block->Index);
+ }
+ }
+ }
+
+private:
+ /// The number of nodes in the graph.
+ const size_t NumNodes;
+
+ /// Successors of each node.
+ std::vector<std::vector<uint64_t>> SuccNodes;
+
+ /// Predecessors of each node.
+ std::vector<std::vector<uint64_t>> PredNodes;
+
+ /// All basic blocks.
+ std::vector<Block> AllBlocks;
+
+ /// All jumps between blocks.
+ std::vector<Jump> AllJumps;
+
+ /// All chains of basic blocks.
+ std::vector<Chain> AllChains;
+
+ /// All edges between chains.
+ std::vector<ChainEdge> AllEdges;
+
+ /// Active chains. The vector gets updated at runtime when chains are merged.
+ std::vector<Chain *> HotChains;
+};
+
+} // end of anonymous namespace
+
+std::vector<uint64_t> llvm::applyExtTspLayout(
+ const std::vector<uint64_t> &NodeSizes,
+ const std::vector<uint64_t> &NodeCounts,
+ const std::vector<std::pair<EdgeT, uint64_t>> &EdgeCounts) {
+ size_t NumNodes = NodeSizes.size();
+
+ // Verify correctness of the input data.
+ assert(NodeCounts.size() == NodeSizes.size() && "Incorrect input");
+ assert(NumNodes > 2 && "Incorrect input");
+
+ // Apply the reordering algorithm.
+ auto Alg = ExtTSPImpl(NumNodes, NodeSizes, NodeCounts, EdgeCounts);
+ std::vector<uint64_t> Result;
+ Alg.run(Result);
+
+ // Verify correctness of the output.
+ assert(Result.front() == 0 && "Original entry point is not preserved");
+ assert(Result.size() == NumNodes && "Incorrect size of reordered layout");
+ return Result;
+}
+
+double llvm::calcExtTspScore(
+ const std::vector<uint64_t> &Order, const std::vector<uint64_t> &NodeSizes,
+ const std::vector<uint64_t> &NodeCounts,
+ const std::vector<std::pair<EdgeT, uint64_t>> &EdgeCounts) {
+ // Estimate addresses of the blocks in memory
+ std::vector<uint64_t> Addr(NodeSizes.size(), 0);
+ for (size_t Idx = 1; Idx < Order.size(); Idx++) {
+ Addr[Order[Idx]] = Addr[Order[Idx - 1]] + NodeSizes[Order[Idx - 1]];
+ }
+ std::vector<uint64_t> OutDegree(NodeSizes.size(), 0);
+ for (auto It : EdgeCounts) {
+ auto Pred = It.first.first;
+ OutDegree[Pred]++;
+ }
+
+ // Increase the score for each jump
+ double Score = 0;
+ for (auto It : EdgeCounts) {
+ auto Pred = It.first.first;
+ auto Succ = It.first.second;
+ uint64_t Count = It.second;
+ bool IsConditional = OutDegree[Pred] > 1;
+ Score += ::extTSPScore(Addr[Pred], NodeSizes[Pred], Addr[Succ], Count,
+ IsConditional);
+ }
+ return Score;
+}
+
+double llvm::calcExtTspScore(
+ const std::vector<uint64_t> &NodeSizes,
+ const std::vector<uint64_t> &NodeCounts,
+ const std::vector<std::pair<EdgeT, uint64_t>> &EdgeCounts) {
+ std::vector<uint64_t> Order(NodeSizes.size());
+ for (size_t Idx = 0; Idx < NodeSizes.size(); Idx++) {
+ Order[Idx] = Idx;
+ }
+ return calcExtTspScore(Order, NodeSizes, NodeCounts, EdgeCounts);
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/CodeMoverUtils.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/CodeMoverUtils.cpp
new file mode 100644
index 0000000000..4a67197417
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/CodeMoverUtils.cpp
@@ -0,0 +1,478 @@
+//===- CodeMoverUtils.cpp - CodeMover Utilities ----------------------------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This family of functions perform movements on basic blocks, and instructions
+// contained within a function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/CodeMoverUtils.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/DependenceAnalysis.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Dominators.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "codemover-utils"
+
+STATISTIC(HasDependences,
+ "Cannot move across instructions that has memory dependences");
+STATISTIC(MayThrowException, "Cannot move across instructions that may throw");
+STATISTIC(NotControlFlowEquivalent,
+ "Instructions are not control flow equivalent");
+STATISTIC(NotMovedPHINode, "Movement of PHINodes are not supported");
+STATISTIC(NotMovedTerminator, "Movement of Terminator are not supported");
+
+namespace {
+/// Represent a control condition. A control condition is a condition of a
+/// terminator to decide which successors to execute. The pointer field
+/// represents the address of the condition of the terminator. The integer field
+/// is a bool, it is true when the basic block is executed when V is true. For
+/// example, `br %cond, bb0, bb1` %cond is a control condition of bb0 with the
+/// integer field equals to true, while %cond is a control condition of bb1 with
+/// the integer field equals to false.
+using ControlCondition = PointerIntPair<Value *, 1, bool>;
+#ifndef NDEBUG
+raw_ostream &operator<<(raw_ostream &OS, const ControlCondition &C) {
+ OS << "[" << *C.getPointer() << ", " << (C.getInt() ? "true" : "false")
+ << "]";
+ return OS;
+}
+#endif
+
+/// Represent a set of control conditions required to execute ToBB from FromBB.
+class ControlConditions {
+ using ConditionVectorTy = SmallVector<ControlCondition, 6>;
+
+ /// A SmallVector of control conditions.
+ ConditionVectorTy Conditions;
+
+public:
+ /// Return a ControlConditions which stores all conditions required to execute
+ /// \p BB from \p Dominator. If \p MaxLookup is non-zero, it limits the
+ /// number of conditions to collect. Return std::nullopt if not all conditions
+ /// are collected successfully, or we hit the limit.
+ static const std::optional<ControlConditions>
+ collectControlConditions(const BasicBlock &BB, const BasicBlock &Dominator,
+ const DominatorTree &DT,
+ const PostDominatorTree &PDT,
+ unsigned MaxLookup = 6);
+
+ /// Return true if there exists no control conditions required to execute ToBB
+ /// from FromBB.
+ bool isUnconditional() const { return Conditions.empty(); }
+
+ /// Return a constant reference of Conditions.
+ const ConditionVectorTy &getControlConditions() const { return Conditions; }
+
+ /// Add \p V as one of the ControlCondition in Condition with IsTrueCondition
+ /// equals to \p True. Return true if inserted successfully.
+ bool addControlCondition(ControlCondition C);
+
+ /// Return true if for all control conditions in Conditions, there exists an
+ /// equivalent control condition in \p Other.Conditions.
+ bool isEquivalent(const ControlConditions &Other) const;
+
+ /// Return true if \p C1 and \p C2 are equivalent.
+ static bool isEquivalent(const ControlCondition &C1,
+ const ControlCondition &C2);
+
+private:
+ ControlConditions() = default;
+
+ static bool isEquivalent(const Value &V1, const Value &V2);
+ static bool isInverse(const Value &V1, const Value &V2);
+};
+} // namespace
+
+static bool domTreeLevelBefore(DominatorTree *DT, const Instruction *InstA,
+ const Instruction *InstB) {
+ // Use ordered basic block in case the 2 instructions are in the same
+ // block.
+ if (InstA->getParent() == InstB->getParent())
+ return InstA->comesBefore(InstB);
+
+ DomTreeNode *DA = DT->getNode(InstA->getParent());
+ DomTreeNode *DB = DT->getNode(InstB->getParent());
+ return DA->getLevel() < DB->getLevel();
+}
+
+const std::optional<ControlConditions>
+ControlConditions::collectControlConditions(const BasicBlock &BB,
+ const BasicBlock &Dominator,
+ const DominatorTree &DT,
+ const PostDominatorTree &PDT,
+ unsigned MaxLookup) {
+ assert(DT.dominates(&Dominator, &BB) && "Expecting Dominator to dominate BB");
+
+ ControlConditions Conditions;
+ unsigned NumConditions = 0;
+
+ // BB is executed unconditional from itself.
+ if (&Dominator == &BB)
+ return Conditions;
+
+ const BasicBlock *CurBlock = &BB;
+ // Walk up the dominator tree from the associated DT node for BB to the
+ // associated DT node for Dominator.
+ do {
+ assert(DT.getNode(CurBlock) && "Expecting a valid DT node for CurBlock");
+ BasicBlock *IDom = DT.getNode(CurBlock)->getIDom()->getBlock();
+ assert(DT.dominates(&Dominator, IDom) &&
+ "Expecting Dominator to dominate IDom");
+
+ // Limitation: can only handle branch instruction currently.
+ const BranchInst *BI = dyn_cast<BranchInst>(IDom->getTerminator());
+ if (!BI)
+ return std::nullopt;
+
+ bool Inserted = false;
+ if (PDT.dominates(CurBlock, IDom)) {
+ LLVM_DEBUG(dbgs() << CurBlock->getName()
+ << " is executed unconditionally from "
+ << IDom->getName() << "\n");
+ } else if (PDT.dominates(CurBlock, BI->getSuccessor(0))) {
+ LLVM_DEBUG(dbgs() << CurBlock->getName() << " is executed when \""
+ << *BI->getCondition() << "\" is true from "
+ << IDom->getName() << "\n");
+ Inserted = Conditions.addControlCondition(
+ ControlCondition(BI->getCondition(), true));
+ } else if (PDT.dominates(CurBlock, BI->getSuccessor(1))) {
+ LLVM_DEBUG(dbgs() << CurBlock->getName() << " is executed when \""
+ << *BI->getCondition() << "\" is false from "
+ << IDom->getName() << "\n");
+ Inserted = Conditions.addControlCondition(
+ ControlCondition(BI->getCondition(), false));
+ } else
+ return std::nullopt;
+
+ if (Inserted)
+ ++NumConditions;
+
+ if (MaxLookup != 0 && NumConditions > MaxLookup)
+ return std::nullopt;
+
+ CurBlock = IDom;
+ } while (CurBlock != &Dominator);
+
+ return Conditions;
+}
+
+bool ControlConditions::addControlCondition(ControlCondition C) {
+ bool Inserted = false;
+ if (none_of(Conditions, [&](ControlCondition &Exists) {
+ return ControlConditions::isEquivalent(C, Exists);
+ })) {
+ Conditions.push_back(C);
+ Inserted = true;
+ }
+
+ LLVM_DEBUG(dbgs() << (Inserted ? "Inserted " : "Not inserted ") << C << "\n");
+ return Inserted;
+}
+
+bool ControlConditions::isEquivalent(const ControlConditions &Other) const {
+ if (Conditions.empty() && Other.Conditions.empty())
+ return true;
+
+ if (Conditions.size() != Other.Conditions.size())
+ return false;
+
+ return all_of(Conditions, [&](const ControlCondition &C) {
+ return any_of(Other.Conditions, [&](const ControlCondition &OtherC) {
+ return ControlConditions::isEquivalent(C, OtherC);
+ });
+ });
+}
+
+bool ControlConditions::isEquivalent(const ControlCondition &C1,
+ const ControlCondition &C2) {
+ if (C1.getInt() == C2.getInt()) {
+ if (isEquivalent(*C1.getPointer(), *C2.getPointer()))
+ return true;
+ } else if (isInverse(*C1.getPointer(), *C2.getPointer()))
+ return true;
+
+ return false;
+}
+
+// FIXME: Use SCEV and reuse GVN/CSE logic to check for equivalence between
+// Values.
+// Currently, isEquivalent rely on other passes to ensure equivalent conditions
+// have the same value, e.g. GVN.
+bool ControlConditions::isEquivalent(const Value &V1, const Value &V2) {
+ return &V1 == &V2;
+}
+
+bool ControlConditions::isInverse(const Value &V1, const Value &V2) {
+ if (const CmpInst *Cmp1 = dyn_cast<CmpInst>(&V1))
+ if (const CmpInst *Cmp2 = dyn_cast<CmpInst>(&V2)) {
+ if (Cmp1->getPredicate() == Cmp2->getInversePredicate() &&
+ Cmp1->getOperand(0) == Cmp2->getOperand(0) &&
+ Cmp1->getOperand(1) == Cmp2->getOperand(1))
+ return true;
+
+ if (Cmp1->getPredicate() ==
+ CmpInst::getSwappedPredicate(Cmp2->getInversePredicate()) &&
+ Cmp1->getOperand(0) == Cmp2->getOperand(1) &&
+ Cmp1->getOperand(1) == Cmp2->getOperand(0))
+ return true;
+ }
+ return false;
+}
+
+bool llvm::isControlFlowEquivalent(const Instruction &I0, const Instruction &I1,
+ const DominatorTree &DT,
+ const PostDominatorTree &PDT) {
+ return isControlFlowEquivalent(*I0.getParent(), *I1.getParent(), DT, PDT);
+}
+
+bool llvm::isControlFlowEquivalent(const BasicBlock &BB0, const BasicBlock &BB1,
+ const DominatorTree &DT,
+ const PostDominatorTree &PDT) {
+ if (&BB0 == &BB1)
+ return true;
+
+ if ((DT.dominates(&BB0, &BB1) && PDT.dominates(&BB1, &BB0)) ||
+ (PDT.dominates(&BB0, &BB1) && DT.dominates(&BB1, &BB0)))
+ return true;
+
+ // If the set of conditions required to execute BB0 and BB1 from their common
+ // dominator are the same, then BB0 and BB1 are control flow equivalent.
+ const BasicBlock *CommonDominator = DT.findNearestCommonDominator(&BB0, &BB1);
+ LLVM_DEBUG(dbgs() << "The nearest common dominator of " << BB0.getName()
+ << " and " << BB1.getName() << " is "
+ << CommonDominator->getName() << "\n");
+
+ const std::optional<ControlConditions> BB0Conditions =
+ ControlConditions::collectControlConditions(BB0, *CommonDominator, DT,
+ PDT);
+ if (BB0Conditions == std::nullopt)
+ return false;
+
+ const std::optional<ControlConditions> BB1Conditions =
+ ControlConditions::collectControlConditions(BB1, *CommonDominator, DT,
+ PDT);
+ if (BB1Conditions == std::nullopt)
+ return false;
+
+ return BB0Conditions->isEquivalent(*BB1Conditions);
+}
+
+static bool reportInvalidCandidate(const Instruction &I,
+ llvm::Statistic &Stat) {
+ ++Stat;
+ LLVM_DEBUG(dbgs() << "Unable to move instruction: " << I << ". "
+ << Stat.getDesc());
+ return false;
+}
+
+/// Collect all instructions in between \p StartInst and \p EndInst, and store
+/// them in \p InBetweenInsts.
+static void
+collectInstructionsInBetween(Instruction &StartInst, const Instruction &EndInst,
+ SmallPtrSetImpl<Instruction *> &InBetweenInsts) {
+ assert(InBetweenInsts.empty() && "Expecting InBetweenInsts to be empty");
+
+ /// Get the next instructions of \p I, and push them to \p WorkList.
+ auto getNextInsts = [](Instruction &I,
+ SmallPtrSetImpl<Instruction *> &WorkList) {
+ if (Instruction *NextInst = I.getNextNode())
+ WorkList.insert(NextInst);
+ else {
+ assert(I.isTerminator() && "Expecting a terminator instruction");
+ for (BasicBlock *Succ : successors(&I))
+ WorkList.insert(&Succ->front());
+ }
+ };
+
+ SmallPtrSet<Instruction *, 10> WorkList;
+ getNextInsts(StartInst, WorkList);
+ while (!WorkList.empty()) {
+ Instruction *CurInst = *WorkList.begin();
+ WorkList.erase(CurInst);
+
+ if (CurInst == &EndInst)
+ continue;
+
+ if (!InBetweenInsts.insert(CurInst).second)
+ continue;
+
+ getNextInsts(*CurInst, WorkList);
+ }
+}
+
+bool llvm::isSafeToMoveBefore(Instruction &I, Instruction &InsertPoint,
+ DominatorTree &DT, const PostDominatorTree *PDT,
+ DependenceInfo *DI, bool CheckForEntireBlock) {
+ // Skip tests when we don't have PDT or DI
+ if (!PDT || !DI)
+ return false;
+
+ // Cannot move itself before itself.
+ if (&I == &InsertPoint)
+ return false;
+
+ // Not moved.
+ if (I.getNextNode() == &InsertPoint)
+ return true;
+
+ if (isa<PHINode>(I) || isa<PHINode>(InsertPoint))
+ return reportInvalidCandidate(I, NotMovedPHINode);
+
+ if (I.isTerminator())
+ return reportInvalidCandidate(I, NotMovedTerminator);
+
+ // TODO remove this limitation.
+ if (!isControlFlowEquivalent(I, InsertPoint, DT, *PDT))
+ return reportInvalidCandidate(I, NotControlFlowEquivalent);
+
+ if (isReachedBefore(&I, &InsertPoint, &DT, PDT))
+ for (const Use &U : I.uses())
+ if (auto *UserInst = dyn_cast<Instruction>(U.getUser()))
+ if (UserInst != &InsertPoint && !DT.dominates(&InsertPoint, U))
+ return false;
+ if (isReachedBefore(&InsertPoint, &I, &DT, PDT))
+ for (const Value *Op : I.operands())
+ if (auto *OpInst = dyn_cast<Instruction>(Op)) {
+ if (&InsertPoint == OpInst)
+ return false;
+ // If OpInst is an instruction that appears earlier in the same BB as
+ // I, then it is okay to move since OpInst will still be available.
+ if (CheckForEntireBlock && I.getParent() == OpInst->getParent() &&
+ DT.dominates(OpInst, &I))
+ continue;
+ if (!DT.dominates(OpInst, &InsertPoint))
+ return false;
+ }
+
+ DT.updateDFSNumbers();
+ const bool MoveForward = domTreeLevelBefore(&DT, &I, &InsertPoint);
+ Instruction &StartInst = (MoveForward ? I : InsertPoint);
+ Instruction &EndInst = (MoveForward ? InsertPoint : I);
+ SmallPtrSet<Instruction *, 10> InstsToCheck;
+ collectInstructionsInBetween(StartInst, EndInst, InstsToCheck);
+ if (!MoveForward)
+ InstsToCheck.insert(&InsertPoint);
+
+ // Check if there exists instructions which may throw, may synchonize, or may
+ // never return, from I to InsertPoint.
+ if (!isSafeToSpeculativelyExecute(&I))
+ if (llvm::any_of(InstsToCheck, [](Instruction *I) {
+ if (I->mayThrow())
+ return true;
+
+ const CallBase *CB = dyn_cast<CallBase>(I);
+ if (!CB)
+ return false;
+ if (!CB->hasFnAttr(Attribute::WillReturn))
+ return true;
+ if (!CB->hasFnAttr(Attribute::NoSync))
+ return true;
+
+ return false;
+ })) {
+ return reportInvalidCandidate(I, MayThrowException);
+ }
+
+ // Check if I has any output/flow/anti dependences with instructions from \p
+ // StartInst to \p EndInst.
+ if (llvm::any_of(InstsToCheck, [&DI, &I](Instruction *CurInst) {
+ auto DepResult = DI->depends(&I, CurInst, true);
+ if (DepResult && (DepResult->isOutput() || DepResult->isFlow() ||
+ DepResult->isAnti()))
+ return true;
+ return false;
+ }))
+ return reportInvalidCandidate(I, HasDependences);
+
+ return true;
+}
+
+bool llvm::isSafeToMoveBefore(BasicBlock &BB, Instruction &InsertPoint,
+ DominatorTree &DT, const PostDominatorTree *PDT,
+ DependenceInfo *DI) {
+ return llvm::all_of(BB, [&](Instruction &I) {
+ if (BB.getTerminator() == &I)
+ return true;
+
+ return isSafeToMoveBefore(I, InsertPoint, DT, PDT, DI,
+ /*CheckForEntireBlock=*/true);
+ });
+}
+
+void llvm::moveInstructionsToTheBeginning(BasicBlock &FromBB, BasicBlock &ToBB,
+ DominatorTree &DT,
+ const PostDominatorTree &PDT,
+ DependenceInfo &DI) {
+ for (Instruction &I :
+ llvm::make_early_inc_range(llvm::drop_begin(llvm::reverse(FromBB)))) {
+ Instruction *MovePos = ToBB.getFirstNonPHIOrDbg();
+
+ if (isSafeToMoveBefore(I, *MovePos, DT, &PDT, &DI))
+ I.moveBefore(MovePos);
+ }
+}
+
+void llvm::moveInstructionsToTheEnd(BasicBlock &FromBB, BasicBlock &ToBB,
+ DominatorTree &DT,
+ const PostDominatorTree &PDT,
+ DependenceInfo &DI) {
+ Instruction *MovePos = ToBB.getTerminator();
+ while (FromBB.size() > 1) {
+ Instruction &I = FromBB.front();
+ if (isSafeToMoveBefore(I, *MovePos, DT, &PDT, &DI))
+ I.moveBefore(MovePos);
+ }
+}
+
+bool llvm::nonStrictlyPostDominate(const BasicBlock *ThisBlock,
+ const BasicBlock *OtherBlock,
+ const DominatorTree *DT,
+ const PostDominatorTree *PDT) {
+ assert(isControlFlowEquivalent(*ThisBlock, *OtherBlock, *DT, *PDT) &&
+ "ThisBlock and OtherBlock must be CFG equivalent!");
+ const BasicBlock *CommonDominator =
+ DT->findNearestCommonDominator(ThisBlock, OtherBlock);
+ if (CommonDominator == nullptr)
+ return false;
+
+ /// Recursively check the predecessors of \p ThisBlock up to
+ /// their common dominator, and see if any of them post-dominates
+ /// \p OtherBlock.
+ SmallVector<const BasicBlock *, 8> WorkList;
+ SmallPtrSet<const BasicBlock *, 8> Visited;
+ WorkList.push_back(ThisBlock);
+ while (!WorkList.empty()) {
+ const BasicBlock *CurBlock = WorkList.back();
+ WorkList.pop_back();
+ Visited.insert(CurBlock);
+ if (PDT->dominates(CurBlock, OtherBlock))
+ return true;
+
+ for (const auto *Pred : predecessors(CurBlock)) {
+ if (Pred == CommonDominator || Visited.count(Pred))
+ continue;
+ WorkList.push_back(Pred);
+ }
+ }
+ return false;
+}
+
+bool llvm::isReachedBefore(const Instruction *I0, const Instruction *I1,
+ const DominatorTree *DT,
+ const PostDominatorTree *PDT) {
+ const BasicBlock *BB0 = I0->getParent();
+ const BasicBlock *BB1 = I1->getParent();
+ if (BB0 == BB1)
+ return DT->dominates(I0, I1);
+
+ return nonStrictlyPostDominate(BB1, BB0, DT, PDT);
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/CtorUtils.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/CtorUtils.cpp
new file mode 100644
index 0000000000..c997f39508
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/CtorUtils.cpp
@@ -0,0 +1,154 @@
+//===- CtorUtils.cpp - Helpers for working with global_ctors ----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines functions that are used to process llvm.global_ctors.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/CtorUtils.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <numeric>
+
+#define DEBUG_TYPE "ctor_utils"
+
+using namespace llvm;
+
+/// Given a specified llvm.global_ctors list, remove the listed elements.
+static void removeGlobalCtors(GlobalVariable *GCL, const BitVector &CtorsToRemove) {
+ // Filter out the initializer elements to remove.
+ ConstantArray *OldCA = cast<ConstantArray>(GCL->getInitializer());
+ SmallVector<Constant *, 10> CAList;
+ for (unsigned I = 0, E = OldCA->getNumOperands(); I < E; ++I)
+ if (!CtorsToRemove.test(I))
+ CAList.push_back(OldCA->getOperand(I));
+
+ // Create the new array initializer.
+ ArrayType *ATy =
+ ArrayType::get(OldCA->getType()->getElementType(), CAList.size());
+ Constant *CA = ConstantArray::get(ATy, CAList);
+
+ // If we didn't change the number of elements, don't create a new GV.
+ if (CA->getType() == OldCA->getType()) {
+ GCL->setInitializer(CA);
+ return;
+ }
+
+ // Create the new global and insert it next to the existing list.
+ GlobalVariable *NGV =
+ new GlobalVariable(CA->getType(), GCL->isConstant(), GCL->getLinkage(),
+ CA, "", GCL->getThreadLocalMode());
+ GCL->getParent()->getGlobalList().insert(GCL->getIterator(), NGV);
+ NGV->takeName(GCL);
+
+ // Nuke the old list, replacing any uses with the new one.
+ if (!GCL->use_empty()) {
+ Constant *V = NGV;
+ if (V->getType() != GCL->getType())
+ V = ConstantExpr::getBitCast(V, GCL->getType());
+ GCL->replaceAllUsesWith(V);
+ }
+ GCL->eraseFromParent();
+}
+
+/// Given a llvm.global_ctors list that we can understand,
+/// return a list of the functions and null terminator as a vector.
+static std::vector<std::pair<uint32_t, Function *>>
+parseGlobalCtors(GlobalVariable *GV) {
+ ConstantArray *CA = cast<ConstantArray>(GV->getInitializer());
+ std::vector<std::pair<uint32_t, Function *>> Result;
+ Result.reserve(CA->getNumOperands());
+ for (auto &V : CA->operands()) {
+ ConstantStruct *CS = cast<ConstantStruct>(V);
+ Result.emplace_back(cast<ConstantInt>(CS->getOperand(0))->getZExtValue(),
+ dyn_cast<Function>(CS->getOperand(1)));
+ }
+ return Result;
+}
+
+/// Find the llvm.global_ctors list.
+static GlobalVariable *findGlobalCtors(Module &M) {
+ GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors");
+ if (!GV)
+ return nullptr;
+
+ // Verify that the initializer is simple enough for us to handle. We are
+ // only allowed to optimize the initializer if it is unique.
+ if (!GV->hasUniqueInitializer())
+ return nullptr;
+
+ // If there are no ctors, then the initializer might be null/undef/poison.
+ // Ignore anything but an array.
+ ConstantArray *CA = dyn_cast<ConstantArray>(GV->getInitializer());
+ if (!CA)
+ return nullptr;
+
+ for (auto &V : CA->operands()) {
+ if (isa<ConstantAggregateZero>(V))
+ continue;
+ ConstantStruct *CS = cast<ConstantStruct>(V);
+ if (isa<ConstantPointerNull>(CS->getOperand(1)))
+ continue;
+
+ // Can only handle global constructors with no arguments.
+ Function *F = dyn_cast<Function>(CS->getOperand(1));
+ if (!F || F->arg_size() != 0)
+ return nullptr;
+ }
+ return GV;
+}
+
+/// Call "ShouldRemove" for every entry in M's global_ctor list and remove the
+/// entries for which it returns true. Return true if anything changed.
+bool llvm::optimizeGlobalCtorsList(
+ Module &M, function_ref<bool(uint32_t, Function *)> ShouldRemove) {
+ GlobalVariable *GlobalCtors = findGlobalCtors(M);
+ if (!GlobalCtors)
+ return false;
+
+ std::vector<std::pair<uint32_t, Function *>> Ctors =
+ parseGlobalCtors(GlobalCtors);
+ if (Ctors.empty())
+ return false;
+
+ bool MadeChange = false;
+ // Loop over global ctors, optimizing them when we can.
+ BitVector CtorsToRemove(Ctors.size());
+ std::vector<size_t> CtorsByPriority(Ctors.size());
+ std::iota(CtorsByPriority.begin(), CtorsByPriority.end(), 0);
+ stable_sort(CtorsByPriority, [&](size_t LHS, size_t RHS) {
+ return Ctors[LHS].first < Ctors[RHS].first;
+ });
+ for (unsigned CtorIndex : CtorsByPriority) {
+ const uint32_t Priority = Ctors[CtorIndex].first;
+ Function *F = Ctors[CtorIndex].second;
+ if (!F)
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Optimizing Global Constructor: " << *F << "\n");
+
+ // If we can evaluate the ctor at compile time, do.
+ if (ShouldRemove(Priority, F)) {
+ Ctors[CtorIndex].second = nullptr;
+ CtorsToRemove.set(CtorIndex);
+ MadeChange = true;
+ continue;
+ }
+ }
+
+ if (!MadeChange)
+ return false;
+
+ removeGlobalCtors(GlobalCtors, CtorsToRemove);
+ return true;
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/Debugify.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/Debugify.cpp
new file mode 100644
index 0000000000..989473693a
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/Debugify.cpp
@@ -0,0 +1,1085 @@
+//===- Debugify.cpp - Check debug info preservation in optimizations ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file In the `synthetic` mode, the `-debugify` attaches synthetic debug info
+/// to everything. It can be used to create targeted tests for debug info
+/// preservation. In addition, when using the `original` mode, it can check
+/// original debug info preservation. The `synthetic` mode is default one.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/Debugify.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassInstrumentation.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/JSON.h"
+#include <optional>
+
+#define DEBUG_TYPE "debugify"
+
+using namespace llvm;
+
+namespace {
+
+cl::opt<bool> Quiet("debugify-quiet",
+ cl::desc("Suppress verbose debugify output"));
+
+cl::opt<uint64_t> DebugifyFunctionsLimit(
+ "debugify-func-limit",
+ cl::desc("Set max number of processed functions per pass."),
+ cl::init(UINT_MAX));
+
+enum class Level {
+ Locations,
+ LocationsAndVariables
+};
+
+cl::opt<Level> DebugifyLevel(
+ "debugify-level", cl::desc("Kind of debug info to add"),
+ cl::values(clEnumValN(Level::Locations, "locations", "Locations only"),
+ clEnumValN(Level::LocationsAndVariables, "location+variables",
+ "Locations and Variables")),
+ cl::init(Level::LocationsAndVariables));
+
+raw_ostream &dbg() { return Quiet ? nulls() : errs(); }
+
+uint64_t getAllocSizeInBits(Module &M, Type *Ty) {
+ return Ty->isSized() ? M.getDataLayout().getTypeAllocSizeInBits(Ty) : 0;
+}
+
+bool isFunctionSkipped(Function &F) {
+ return F.isDeclaration() || !F.hasExactDefinition();
+}
+
+/// Find the basic block's terminating instruction.
+///
+/// Special care is needed to handle musttail and deopt calls, as these behave
+/// like (but are in fact not) terminators.
+Instruction *findTerminatingInstruction(BasicBlock &BB) {
+ if (auto *I = BB.getTerminatingMustTailCall())
+ return I;
+ if (auto *I = BB.getTerminatingDeoptimizeCall())
+ return I;
+ return BB.getTerminator();
+}
+} // end anonymous namespace
+
+bool llvm::applyDebugifyMetadata(
+ Module &M, iterator_range<Module::iterator> Functions, StringRef Banner,
+ std::function<bool(DIBuilder &DIB, Function &F)> ApplyToMF) {
+ // Skip modules with debug info.
+ if (M.getNamedMetadata("llvm.dbg.cu")) {
+ dbg() << Banner << "Skipping module with debug info\n";
+ return false;
+ }
+
+ DIBuilder DIB(M);
+ LLVMContext &Ctx = M.getContext();
+ auto *Int32Ty = Type::getInt32Ty(Ctx);
+
+ // Get a DIType which corresponds to Ty.
+ DenseMap<uint64_t, DIType *> TypeCache;
+ auto getCachedDIType = [&](Type *Ty) -> DIType * {
+ uint64_t Size = getAllocSizeInBits(M, Ty);
+ DIType *&DTy = TypeCache[Size];
+ if (!DTy) {
+ std::string Name = "ty" + utostr(Size);
+ DTy = DIB.createBasicType(Name, Size, dwarf::DW_ATE_unsigned);
+ }
+ return DTy;
+ };
+
+ unsigned NextLine = 1;
+ unsigned NextVar = 1;
+ auto File = DIB.createFile(M.getName(), "/");
+ auto CU = DIB.createCompileUnit(dwarf::DW_LANG_C, File, "debugify",
+ /*isOptimized=*/true, "", 0);
+
+ // Visit each instruction.
+ for (Function &F : Functions) {
+ if (isFunctionSkipped(F))
+ continue;
+
+ bool InsertedDbgVal = false;
+ auto SPType =
+ DIB.createSubroutineType(DIB.getOrCreateTypeArray(std::nullopt));
+ DISubprogram::DISPFlags SPFlags =
+ DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized;
+ if (F.hasPrivateLinkage() || F.hasInternalLinkage())
+ SPFlags |= DISubprogram::SPFlagLocalToUnit;
+ auto SP = DIB.createFunction(CU, F.getName(), F.getName(), File, NextLine,
+ SPType, NextLine, DINode::FlagZero, SPFlags);
+ F.setSubprogram(SP);
+
+ // Helper that inserts a dbg.value before \p InsertBefore, copying the
+ // location (and possibly the type, if it's non-void) from \p TemplateInst.
+ auto insertDbgVal = [&](Instruction &TemplateInst,
+ Instruction *InsertBefore) {
+ std::string Name = utostr(NextVar++);
+ Value *V = &TemplateInst;
+ if (TemplateInst.getType()->isVoidTy())
+ V = ConstantInt::get(Int32Ty, 0);
+ const DILocation *Loc = TemplateInst.getDebugLoc().get();
+ auto LocalVar = DIB.createAutoVariable(SP, Name, File, Loc->getLine(),
+ getCachedDIType(V->getType()),
+ /*AlwaysPreserve=*/true);
+ DIB.insertDbgValueIntrinsic(V, LocalVar, DIB.createExpression(), Loc,
+ InsertBefore);
+ };
+
+ for (BasicBlock &BB : F) {
+ // Attach debug locations.
+ for (Instruction &I : BB)
+ I.setDebugLoc(DILocation::get(Ctx, NextLine++, 1, SP));
+
+ if (DebugifyLevel < Level::LocationsAndVariables)
+ continue;
+
+ // Inserting debug values into EH pads can break IR invariants.
+ if (BB.isEHPad())
+ continue;
+
+ // Find the terminating instruction, after which no debug values are
+ // attached.
+ Instruction *LastInst = findTerminatingInstruction(BB);
+ assert(LastInst && "Expected basic block with a terminator");
+
+ // Maintain an insertion point which can't be invalidated when updates
+ // are made.
+ BasicBlock::iterator InsertPt = BB.getFirstInsertionPt();
+ assert(InsertPt != BB.end() && "Expected to find an insertion point");
+ Instruction *InsertBefore = &*InsertPt;
+
+ // Attach debug values.
+ for (Instruction *I = &*BB.begin(); I != LastInst; I = I->getNextNode()) {
+ // Skip void-valued instructions.
+ if (I->getType()->isVoidTy())
+ continue;
+
+ // Phis and EH pads must be grouped at the beginning of the block.
+ // Only advance the insertion point when we finish visiting these.
+ if (!isa<PHINode>(I) && !I->isEHPad())
+ InsertBefore = I->getNextNode();
+
+ insertDbgVal(*I, InsertBefore);
+ InsertedDbgVal = true;
+ }
+ }
+ // Make sure we emit at least one dbg.value, otherwise MachineDebugify may
+ // not have anything to work with as it goes about inserting DBG_VALUEs.
+ // (It's common for MIR tests to be written containing skeletal IR with
+ // empty functions -- we're still interested in debugifying the MIR within
+ // those tests, and this helps with that.)
+ if (DebugifyLevel == Level::LocationsAndVariables && !InsertedDbgVal) {
+ auto *Term = findTerminatingInstruction(F.getEntryBlock());
+ insertDbgVal(*Term, Term);
+ }
+ if (ApplyToMF)
+ ApplyToMF(DIB, F);
+ DIB.finalizeSubprogram(SP);
+ }
+ DIB.finalize();
+
+ // Track the number of distinct lines and variables.
+ NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.debugify");
+ auto addDebugifyOperand = [&](unsigned N) {
+ NMD->addOperand(MDNode::get(
+ Ctx, ValueAsMetadata::getConstant(ConstantInt::get(Int32Ty, N))));
+ };
+ addDebugifyOperand(NextLine - 1); // Original number of lines.
+ addDebugifyOperand(NextVar - 1); // Original number of variables.
+ assert(NMD->getNumOperands() == 2 &&
+ "llvm.debugify should have exactly 2 operands!");
+
+ // Claim that this synthetic debug info is valid.
+ StringRef DIVersionKey = "Debug Info Version";
+ if (!M.getModuleFlag(DIVersionKey))
+ M.addModuleFlag(Module::Warning, DIVersionKey, DEBUG_METADATA_VERSION);
+
+ return true;
+}
+
+static bool
+applyDebugify(Function &F,
+ enum DebugifyMode Mode = DebugifyMode::SyntheticDebugInfo,
+ DebugInfoPerPass *DebugInfoBeforePass = nullptr,
+ StringRef NameOfWrappedPass = "") {
+ Module &M = *F.getParent();
+ auto FuncIt = F.getIterator();
+ if (Mode == DebugifyMode::SyntheticDebugInfo)
+ return applyDebugifyMetadata(M, make_range(FuncIt, std::next(FuncIt)),
+ "FunctionDebugify: ", /*ApplyToMF*/ nullptr);
+ assert(DebugInfoBeforePass);
+ return collectDebugInfoMetadata(M, M.functions(), *DebugInfoBeforePass,
+ "FunctionDebugify (original debuginfo)",
+ NameOfWrappedPass);
+}
+
+static bool
+applyDebugify(Module &M,
+ enum DebugifyMode Mode = DebugifyMode::SyntheticDebugInfo,
+ DebugInfoPerPass *DebugInfoBeforePass = nullptr,
+ StringRef NameOfWrappedPass = "") {
+ if (Mode == DebugifyMode::SyntheticDebugInfo)
+ return applyDebugifyMetadata(M, M.functions(),
+ "ModuleDebugify: ", /*ApplyToMF*/ nullptr);
+ return collectDebugInfoMetadata(M, M.functions(), *DebugInfoBeforePass,
+ "ModuleDebugify (original debuginfo)",
+ NameOfWrappedPass);
+}
+
+bool llvm::stripDebugifyMetadata(Module &M) {
+ bool Changed = false;
+
+ // Remove the llvm.debugify and llvm.mir.debugify module-level named metadata.
+ NamedMDNode *DebugifyMD = M.getNamedMetadata("llvm.debugify");
+ if (DebugifyMD) {
+ M.eraseNamedMetadata(DebugifyMD);
+ Changed = true;
+ }
+
+ if (auto *MIRDebugifyMD = M.getNamedMetadata("llvm.mir.debugify")) {
+ M.eraseNamedMetadata(MIRDebugifyMD);
+ Changed = true;
+ }
+
+ // Strip out all debug intrinsics and supporting metadata (subprograms, types,
+ // variables, etc).
+ Changed |= StripDebugInfo(M);
+
+ // Strip out the dead dbg.value prototype.
+ Function *DbgValF = M.getFunction("llvm.dbg.value");
+ if (DbgValF) {
+ assert(DbgValF->isDeclaration() && DbgValF->use_empty() &&
+ "Not all debug info stripped?");
+ DbgValF->eraseFromParent();
+ Changed = true;
+ }
+
+ // Strip out the module-level Debug Info Version metadata.
+ // FIXME: There must be an easier way to remove an operand from a NamedMDNode.
+ NamedMDNode *NMD = M.getModuleFlagsMetadata();
+ if (!NMD)
+ return Changed;
+ SmallVector<MDNode *, 4> Flags(NMD->operands());
+ NMD->clearOperands();
+ for (MDNode *Flag : Flags) {
+ auto *Key = cast<MDString>(Flag->getOperand(1));
+ if (Key->getString() == "Debug Info Version") {
+ Changed = true;
+ continue;
+ }
+ NMD->addOperand(Flag);
+ }
+ // If we left it empty we might as well remove it.
+ if (NMD->getNumOperands() == 0)
+ NMD->eraseFromParent();
+
+ return Changed;
+}
+
+bool llvm::collectDebugInfoMetadata(Module &M,
+ iterator_range<Module::iterator> Functions,
+ DebugInfoPerPass &DebugInfoBeforePass,
+ StringRef Banner,
+ StringRef NameOfWrappedPass) {
+ LLVM_DEBUG(dbgs() << Banner << ": (before) " << NameOfWrappedPass << '\n');
+
+ if (!M.getNamedMetadata("llvm.dbg.cu")) {
+ dbg() << Banner << ": Skipping module without debug info\n";
+ return false;
+ }
+
+ uint64_t FunctionsCnt = DebugInfoBeforePass.DIFunctions.size();
+ // Visit each instruction.
+ for (Function &F : Functions) {
+ // Use DI collected after previous Pass (when -debugify-each is used).
+ if (DebugInfoBeforePass.DIFunctions.count(&F))
+ continue;
+
+ if (isFunctionSkipped(F))
+ continue;
+
+ // Stop collecting DI if the Functions number reached the limit.
+ if (++FunctionsCnt >= DebugifyFunctionsLimit)
+ break;
+ // Collect the DISubprogram.
+ auto *SP = F.getSubprogram();
+ DebugInfoBeforePass.DIFunctions.insert({&F, SP});
+ if (SP) {
+ LLVM_DEBUG(dbgs() << " Collecting subprogram: " << *SP << '\n');
+ for (const DINode *DN : SP->getRetainedNodes()) {
+ if (const auto *DV = dyn_cast<DILocalVariable>(DN)) {
+ DebugInfoBeforePass.DIVariables[DV] = 0;
+ }
+ }
+ }
+
+ for (BasicBlock &BB : F) {
+ // Collect debug locations (!dbg) and debug variable intrinsics.
+ for (Instruction &I : BB) {
+ // Skip PHIs.
+ if (isa<PHINode>(I))
+ continue;
+
+ // Cllect dbg.values and dbg.declare.
+ if (DebugifyLevel > Level::Locations) {
+ if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I)) {
+ if (!SP)
+ continue;
+ // Skip inlined variables.
+ if (I.getDebugLoc().getInlinedAt())
+ continue;
+ // Skip undef values.
+ if (DVI->isKillLocation())
+ continue;
+
+ auto *Var = DVI->getVariable();
+ DebugInfoBeforePass.DIVariables[Var]++;
+ continue;
+ }
+ }
+
+ // Skip debug instructions other than dbg.value and dbg.declare.
+ if (isa<DbgInfoIntrinsic>(&I))
+ continue;
+
+ LLVM_DEBUG(dbgs() << " Collecting info for inst: " << I << '\n');
+ DebugInfoBeforePass.InstToDelete.insert({&I, &I});
+
+ const DILocation *Loc = I.getDebugLoc().get();
+ bool HasLoc = Loc != nullptr;
+ DebugInfoBeforePass.DILocations.insert({&I, HasLoc});
+ }
+ }
+ }
+
+ return true;
+}
+
+// This checks the preservation of original debug info attached to functions.
+static bool checkFunctions(const DebugFnMap &DIFunctionsBefore,
+ const DebugFnMap &DIFunctionsAfter,
+ StringRef NameOfWrappedPass,
+ StringRef FileNameFromCU, bool ShouldWriteIntoJSON,
+ llvm::json::Array &Bugs) {
+ bool Preserved = true;
+ for (const auto &F : DIFunctionsAfter) {
+ if (F.second)
+ continue;
+ auto SPIt = DIFunctionsBefore.find(F.first);
+ if (SPIt == DIFunctionsBefore.end()) {
+ if (ShouldWriteIntoJSON)
+ Bugs.push_back(llvm::json::Object({{"metadata", "DISubprogram"},
+ {"name", F.first->getName()},
+ {"action", "not-generate"}}));
+ else
+ dbg() << "ERROR: " << NameOfWrappedPass
+ << " did not generate DISubprogram for " << F.first->getName()
+ << " from " << FileNameFromCU << '\n';
+ Preserved = false;
+ } else {
+ auto SP = SPIt->second;
+ if (!SP)
+ continue;
+ // If the function had the SP attached before the pass, consider it as
+ // a debug info bug.
+ if (ShouldWriteIntoJSON)
+ Bugs.push_back(llvm::json::Object({{"metadata", "DISubprogram"},
+ {"name", F.first->getName()},
+ {"action", "drop"}}));
+ else
+ dbg() << "ERROR: " << NameOfWrappedPass << " dropped DISubprogram of "
+ << F.first->getName() << " from " << FileNameFromCU << '\n';
+ Preserved = false;
+ }
+ }
+
+ return Preserved;
+}
+
+// This checks the preservation of the original debug info attached to
+// instructions.
+static bool checkInstructions(const DebugInstMap &DILocsBefore,
+ const DebugInstMap &DILocsAfter,
+ const WeakInstValueMap &InstToDelete,
+ StringRef NameOfWrappedPass,
+ StringRef FileNameFromCU,
+ bool ShouldWriteIntoJSON,
+ llvm::json::Array &Bugs) {
+ bool Preserved = true;
+ for (const auto &L : DILocsAfter) {
+ if (L.second)
+ continue;
+ auto Instr = L.first;
+
+ // In order to avoid pointer reuse/recycling, skip the values that might
+ // have been deleted during a pass.
+ auto WeakInstrPtr = InstToDelete.find(Instr);
+ if (WeakInstrPtr != InstToDelete.end() && !WeakInstrPtr->second)
+ continue;
+
+ auto FnName = Instr->getFunction()->getName();
+ auto BB = Instr->getParent();
+ auto BBName = BB->hasName() ? BB->getName() : "no-name";
+ auto InstName = Instruction::getOpcodeName(Instr->getOpcode());
+
+ auto InstrIt = DILocsBefore.find(Instr);
+ if (InstrIt == DILocsBefore.end()) {
+ if (ShouldWriteIntoJSON)
+ Bugs.push_back(llvm::json::Object({{"metadata", "DILocation"},
+ {"fn-name", FnName.str()},
+ {"bb-name", BBName.str()},
+ {"instr", InstName},
+ {"action", "not-generate"}}));
+ else
+ dbg() << "WARNING: " << NameOfWrappedPass
+ << " did not generate DILocation for " << *Instr
+ << " (BB: " << BBName << ", Fn: " << FnName
+ << ", File: " << FileNameFromCU << ")\n";
+ Preserved = false;
+ } else {
+ if (!InstrIt->second)
+ continue;
+ // If the instr had the !dbg attached before the pass, consider it as
+ // a debug info issue.
+ if (ShouldWriteIntoJSON)
+ Bugs.push_back(llvm::json::Object({{"metadata", "DILocation"},
+ {"fn-name", FnName.str()},
+ {"bb-name", BBName.str()},
+ {"instr", InstName},
+ {"action", "drop"}}));
+ else
+ dbg() << "WARNING: " << NameOfWrappedPass << " dropped DILocation of "
+ << *Instr << " (BB: " << BBName << ", Fn: " << FnName
+ << ", File: " << FileNameFromCU << ")\n";
+ Preserved = false;
+ }
+ }
+
+ return Preserved;
+}
+
+// This checks the preservation of original debug variable intrinsics.
+static bool checkVars(const DebugVarMap &DIVarsBefore,
+ const DebugVarMap &DIVarsAfter,
+ StringRef NameOfWrappedPass, StringRef FileNameFromCU,
+ bool ShouldWriteIntoJSON, llvm::json::Array &Bugs) {
+ bool Preserved = true;
+ for (const auto &V : DIVarsBefore) {
+ auto VarIt = DIVarsAfter.find(V.first);
+ if (VarIt == DIVarsAfter.end())
+ continue;
+
+ unsigned NumOfDbgValsAfter = VarIt->second;
+
+ if (V.second > NumOfDbgValsAfter) {
+ if (ShouldWriteIntoJSON)
+ Bugs.push_back(llvm::json::Object(
+ {{"metadata", "dbg-var-intrinsic"},
+ {"name", V.first->getName()},
+ {"fn-name", V.first->getScope()->getSubprogram()->getName()},
+ {"action", "drop"}}));
+ else
+ dbg() << "WARNING: " << NameOfWrappedPass
+ << " drops dbg.value()/dbg.declare() for " << V.first->getName()
+ << " from "
+ << "function " << V.first->getScope()->getSubprogram()->getName()
+ << " (file " << FileNameFromCU << ")\n";
+ Preserved = false;
+ }
+ }
+
+ return Preserved;
+}
+
+// Write the json data into the specifed file.
+static void writeJSON(StringRef OrigDIVerifyBugsReportFilePath,
+ StringRef FileNameFromCU, StringRef NameOfWrappedPass,
+ llvm::json::Array &Bugs) {
+ std::error_code EC;
+ raw_fd_ostream OS_FILE{OrigDIVerifyBugsReportFilePath, EC,
+ sys::fs::OF_Append | sys::fs::OF_TextWithCRLF};
+ if (EC) {
+ errs() << "Could not open file: " << EC.message() << ", "
+ << OrigDIVerifyBugsReportFilePath << '\n';
+ return;
+ }
+
+ if (auto L = OS_FILE.lock()) {
+ OS_FILE << "{\"file\":\"" << FileNameFromCU << "\", ";
+
+ StringRef PassName =
+ NameOfWrappedPass != "" ? NameOfWrappedPass : "no-name";
+ OS_FILE << "\"pass\":\"" << PassName << "\", ";
+
+ llvm::json::Value BugsToPrint{std::move(Bugs)};
+ OS_FILE << "\"bugs\": " << BugsToPrint;
+
+ OS_FILE << "}\n";
+ }
+ OS_FILE.close();
+}
+
+bool llvm::checkDebugInfoMetadata(Module &M,
+ iterator_range<Module::iterator> Functions,
+ DebugInfoPerPass &DebugInfoBeforePass,
+ StringRef Banner, StringRef NameOfWrappedPass,
+ StringRef OrigDIVerifyBugsReportFilePath) {
+ LLVM_DEBUG(dbgs() << Banner << ": (after) " << NameOfWrappedPass << '\n');
+
+ if (!M.getNamedMetadata("llvm.dbg.cu")) {
+ dbg() << Banner << ": Skipping module without debug info\n";
+ return false;
+ }
+
+ // Map the debug info holding DIs after a pass.
+ DebugInfoPerPass DebugInfoAfterPass;
+
+ // Visit each instruction.
+ for (Function &F : Functions) {
+ if (isFunctionSkipped(F))
+ continue;
+
+ // Don't process functions without DI collected before the Pass.
+ if (!DebugInfoBeforePass.DIFunctions.count(&F))
+ continue;
+ // TODO: Collect metadata other than DISubprograms.
+ // Collect the DISubprogram.
+ auto *SP = F.getSubprogram();
+ DebugInfoAfterPass.DIFunctions.insert({&F, SP});
+
+ if (SP) {
+ LLVM_DEBUG(dbgs() << " Collecting subprogram: " << *SP << '\n');
+ for (const DINode *DN : SP->getRetainedNodes()) {
+ if (const auto *DV = dyn_cast<DILocalVariable>(DN)) {
+ DebugInfoAfterPass.DIVariables[DV] = 0;
+ }
+ }
+ }
+
+ for (BasicBlock &BB : F) {
+ // Collect debug locations (!dbg) and debug variable intrinsics.
+ for (Instruction &I : BB) {
+ // Skip PHIs.
+ if (isa<PHINode>(I))
+ continue;
+
+ // Collect dbg.values and dbg.declares.
+ if (DebugifyLevel > Level::Locations) {
+ if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I)) {
+ if (!SP)
+ continue;
+ // Skip inlined variables.
+ if (I.getDebugLoc().getInlinedAt())
+ continue;
+ // Skip undef values.
+ if (DVI->isKillLocation())
+ continue;
+
+ auto *Var = DVI->getVariable();
+ DebugInfoAfterPass.DIVariables[Var]++;
+ continue;
+ }
+ }
+
+ // Skip debug instructions other than dbg.value and dbg.declare.
+ if (isa<DbgInfoIntrinsic>(&I))
+ continue;
+
+ LLVM_DEBUG(dbgs() << " Collecting info for inst: " << I << '\n');
+
+ const DILocation *Loc = I.getDebugLoc().get();
+ bool HasLoc = Loc != nullptr;
+
+ DebugInfoAfterPass.DILocations.insert({&I, HasLoc});
+ }
+ }
+ }
+
+ // TODO: The name of the module could be read better?
+ StringRef FileNameFromCU =
+ (cast<DICompileUnit>(M.getNamedMetadata("llvm.dbg.cu")->getOperand(0)))
+ ->getFilename();
+
+ auto DIFunctionsBefore = DebugInfoBeforePass.DIFunctions;
+ auto DIFunctionsAfter = DebugInfoAfterPass.DIFunctions;
+
+ auto DILocsBefore = DebugInfoBeforePass.DILocations;
+ auto DILocsAfter = DebugInfoAfterPass.DILocations;
+
+ auto InstToDelete = DebugInfoBeforePass.InstToDelete;
+
+ auto DIVarsBefore = DebugInfoBeforePass.DIVariables;
+ auto DIVarsAfter = DebugInfoAfterPass.DIVariables;
+
+ bool ShouldWriteIntoJSON = !OrigDIVerifyBugsReportFilePath.empty();
+ llvm::json::Array Bugs;
+
+ bool ResultForFunc =
+ checkFunctions(DIFunctionsBefore, DIFunctionsAfter, NameOfWrappedPass,
+ FileNameFromCU, ShouldWriteIntoJSON, Bugs);
+ bool ResultForInsts = checkInstructions(
+ DILocsBefore, DILocsAfter, InstToDelete, NameOfWrappedPass,
+ FileNameFromCU, ShouldWriteIntoJSON, Bugs);
+
+ bool ResultForVars = checkVars(DIVarsBefore, DIVarsAfter, NameOfWrappedPass,
+ FileNameFromCU, ShouldWriteIntoJSON, Bugs);
+
+ bool Result = ResultForFunc && ResultForInsts && ResultForVars;
+
+ StringRef ResultBanner = NameOfWrappedPass != "" ? NameOfWrappedPass : Banner;
+ if (ShouldWriteIntoJSON && !Bugs.empty())
+ writeJSON(OrigDIVerifyBugsReportFilePath, FileNameFromCU, NameOfWrappedPass,
+ Bugs);
+
+ if (Result)
+ dbg() << ResultBanner << ": PASS\n";
+ else
+ dbg() << ResultBanner << ": FAIL\n";
+
+ // In the case of the `debugify-each`, no need to go over all the instructions
+ // again in the collectDebugInfoMetadata(), since as an input we can use
+ // the debugging information from the previous pass.
+ DebugInfoBeforePass = DebugInfoAfterPass;
+
+ LLVM_DEBUG(dbgs() << "\n\n");
+ return Result;
+}
+
+namespace {
+/// Return true if a mis-sized diagnostic is issued for \p DVI.
+bool diagnoseMisSizedDbgValue(Module &M, DbgValueInst *DVI) {
+ // The size of a dbg.value's value operand should match the size of the
+ // variable it corresponds to.
+ //
+ // TODO: This, along with a check for non-null value operands, should be
+ // promoted to verifier failures.
+
+ // For now, don't try to interpret anything more complicated than an empty
+ // DIExpression. Eventually we should try to handle OP_deref and fragments.
+ if (DVI->getExpression()->getNumElements())
+ return false;
+
+ Value *V = DVI->getVariableLocationOp(0);
+ if (!V)
+ return false;
+
+ Type *Ty = V->getType();
+ uint64_t ValueOperandSize = getAllocSizeInBits(M, Ty);
+ std::optional<uint64_t> DbgVarSize = DVI->getFragmentSizeInBits();
+ if (!ValueOperandSize || !DbgVarSize)
+ return false;
+
+ bool HasBadSize = false;
+ if (Ty->isIntegerTy()) {
+ auto Signedness = DVI->getVariable()->getSignedness();
+ if (Signedness && *Signedness == DIBasicType::Signedness::Signed)
+ HasBadSize = ValueOperandSize < *DbgVarSize;
+ } else {
+ HasBadSize = ValueOperandSize != *DbgVarSize;
+ }
+
+ if (HasBadSize) {
+ dbg() << "ERROR: dbg.value operand has size " << ValueOperandSize
+ << ", but its variable has size " << *DbgVarSize << ": ";
+ DVI->print(dbg());
+ dbg() << "\n";
+ }
+ return HasBadSize;
+}
+
+bool checkDebugifyMetadata(Module &M,
+ iterator_range<Module::iterator> Functions,
+ StringRef NameOfWrappedPass, StringRef Banner,
+ bool Strip, DebugifyStatsMap *StatsMap) {
+ // Skip modules without debugify metadata.
+ NamedMDNode *NMD = M.getNamedMetadata("llvm.debugify");
+ if (!NMD) {
+ dbg() << Banner << ": Skipping module without debugify metadata\n";
+ return false;
+ }
+
+ auto getDebugifyOperand = [&](unsigned Idx) -> unsigned {
+ return mdconst::extract<ConstantInt>(NMD->getOperand(Idx)->getOperand(0))
+ ->getZExtValue();
+ };
+ assert(NMD->getNumOperands() == 2 &&
+ "llvm.debugify should have exactly 2 operands!");
+ unsigned OriginalNumLines = getDebugifyOperand(0);
+ unsigned OriginalNumVars = getDebugifyOperand(1);
+ bool HasErrors = false;
+
+ // Track debug info loss statistics if able.
+ DebugifyStatistics *Stats = nullptr;
+ if (StatsMap && !NameOfWrappedPass.empty())
+ Stats = &StatsMap->operator[](NameOfWrappedPass);
+
+ BitVector MissingLines{OriginalNumLines, true};
+ BitVector MissingVars{OriginalNumVars, true};
+ for (Function &F : Functions) {
+ if (isFunctionSkipped(F))
+ continue;
+
+ // Find missing lines.
+ for (Instruction &I : instructions(F)) {
+ if (isa<DbgValueInst>(&I))
+ continue;
+
+ auto DL = I.getDebugLoc();
+ if (DL && DL.getLine() != 0) {
+ MissingLines.reset(DL.getLine() - 1);
+ continue;
+ }
+
+ if (!isa<PHINode>(&I) && !DL) {
+ dbg() << "WARNING: Instruction with empty DebugLoc in function ";
+ dbg() << F.getName() << " --";
+ I.print(dbg());
+ dbg() << "\n";
+ }
+ }
+
+ // Find missing variables and mis-sized debug values.
+ for (Instruction &I : instructions(F)) {
+ auto *DVI = dyn_cast<DbgValueInst>(&I);
+ if (!DVI)
+ continue;
+
+ unsigned Var = ~0U;
+ (void)to_integer(DVI->getVariable()->getName(), Var, 10);
+ assert(Var <= OriginalNumVars && "Unexpected name for DILocalVariable");
+ bool HasBadSize = diagnoseMisSizedDbgValue(M, DVI);
+ if (!HasBadSize)
+ MissingVars.reset(Var - 1);
+ HasErrors |= HasBadSize;
+ }
+ }
+
+ // Print the results.
+ for (unsigned Idx : MissingLines.set_bits())
+ dbg() << "WARNING: Missing line " << Idx + 1 << "\n";
+
+ for (unsigned Idx : MissingVars.set_bits())
+ dbg() << "WARNING: Missing variable " << Idx + 1 << "\n";
+
+ // Update DI loss statistics.
+ if (Stats) {
+ Stats->NumDbgLocsExpected += OriginalNumLines;
+ Stats->NumDbgLocsMissing += MissingLines.count();
+ Stats->NumDbgValuesExpected += OriginalNumVars;
+ Stats->NumDbgValuesMissing += MissingVars.count();
+ }
+
+ dbg() << Banner;
+ if (!NameOfWrappedPass.empty())
+ dbg() << " [" << NameOfWrappedPass << "]";
+ dbg() << ": " << (HasErrors ? "FAIL" : "PASS") << '\n';
+
+ // Strip debugify metadata if required.
+ if (Strip)
+ return stripDebugifyMetadata(M);
+
+ return false;
+}
+
+/// ModulePass for attaching synthetic debug info to everything, used with the
+/// legacy module pass manager.
+struct DebugifyModulePass : public ModulePass {
+ bool runOnModule(Module &M) override {
+ return applyDebugify(M, Mode, DebugInfoBeforePass, NameOfWrappedPass);
+ }
+
+ DebugifyModulePass(enum DebugifyMode Mode = DebugifyMode::SyntheticDebugInfo,
+ StringRef NameOfWrappedPass = "",
+ DebugInfoPerPass *DebugInfoBeforePass = nullptr)
+ : ModulePass(ID), NameOfWrappedPass(NameOfWrappedPass),
+ DebugInfoBeforePass(DebugInfoBeforePass), Mode(Mode) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ }
+
+ static char ID; // Pass identification.
+
+private:
+ StringRef NameOfWrappedPass;
+ DebugInfoPerPass *DebugInfoBeforePass;
+ enum DebugifyMode Mode;
+};
+
+/// FunctionPass for attaching synthetic debug info to instructions within a
+/// single function, used with the legacy module pass manager.
+struct DebugifyFunctionPass : public FunctionPass {
+ bool runOnFunction(Function &F) override {
+ return applyDebugify(F, Mode, DebugInfoBeforePass, NameOfWrappedPass);
+ }
+
+ DebugifyFunctionPass(
+ enum DebugifyMode Mode = DebugifyMode::SyntheticDebugInfo,
+ StringRef NameOfWrappedPass = "",
+ DebugInfoPerPass *DebugInfoBeforePass = nullptr)
+ : FunctionPass(ID), NameOfWrappedPass(NameOfWrappedPass),
+ DebugInfoBeforePass(DebugInfoBeforePass), Mode(Mode) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ }
+
+ static char ID; // Pass identification.
+
+private:
+ StringRef NameOfWrappedPass;
+ DebugInfoPerPass *DebugInfoBeforePass;
+ enum DebugifyMode Mode;
+};
+
+/// ModulePass for checking debug info inserted by -debugify, used with the
+/// legacy module pass manager.
+struct CheckDebugifyModulePass : public ModulePass {
+ bool runOnModule(Module &M) override {
+ if (Mode == DebugifyMode::SyntheticDebugInfo)
+ return checkDebugifyMetadata(M, M.functions(), NameOfWrappedPass,
+ "CheckModuleDebugify", Strip, StatsMap);
+ return checkDebugInfoMetadata(
+ M, M.functions(), *DebugInfoBeforePass,
+ "CheckModuleDebugify (original debuginfo)", NameOfWrappedPass,
+ OrigDIVerifyBugsReportFilePath);
+ }
+
+ CheckDebugifyModulePass(
+ bool Strip = false, StringRef NameOfWrappedPass = "",
+ DebugifyStatsMap *StatsMap = nullptr,
+ enum DebugifyMode Mode = DebugifyMode::SyntheticDebugInfo,
+ DebugInfoPerPass *DebugInfoBeforePass = nullptr,
+ StringRef OrigDIVerifyBugsReportFilePath = "")
+ : ModulePass(ID), NameOfWrappedPass(NameOfWrappedPass),
+ OrigDIVerifyBugsReportFilePath(OrigDIVerifyBugsReportFilePath),
+ StatsMap(StatsMap), DebugInfoBeforePass(DebugInfoBeforePass), Mode(Mode),
+ Strip(Strip) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ }
+
+ static char ID; // Pass identification.
+
+private:
+ StringRef NameOfWrappedPass;
+ StringRef OrigDIVerifyBugsReportFilePath;
+ DebugifyStatsMap *StatsMap;
+ DebugInfoPerPass *DebugInfoBeforePass;
+ enum DebugifyMode Mode;
+ bool Strip;
+};
+
+/// FunctionPass for checking debug info inserted by -debugify-function, used
+/// with the legacy module pass manager.
+struct CheckDebugifyFunctionPass : public FunctionPass {
+ bool runOnFunction(Function &F) override {
+ Module &M = *F.getParent();
+ auto FuncIt = F.getIterator();
+ if (Mode == DebugifyMode::SyntheticDebugInfo)
+ return checkDebugifyMetadata(M, make_range(FuncIt, std::next(FuncIt)),
+ NameOfWrappedPass, "CheckFunctionDebugify",
+ Strip, StatsMap);
+ return checkDebugInfoMetadata(
+ M, make_range(FuncIt, std::next(FuncIt)), *DebugInfoBeforePass,
+ "CheckFunctionDebugify (original debuginfo)", NameOfWrappedPass,
+ OrigDIVerifyBugsReportFilePath);
+ }
+
+ CheckDebugifyFunctionPass(
+ bool Strip = false, StringRef NameOfWrappedPass = "",
+ DebugifyStatsMap *StatsMap = nullptr,
+ enum DebugifyMode Mode = DebugifyMode::SyntheticDebugInfo,
+ DebugInfoPerPass *DebugInfoBeforePass = nullptr,
+ StringRef OrigDIVerifyBugsReportFilePath = "")
+ : FunctionPass(ID), NameOfWrappedPass(NameOfWrappedPass),
+ OrigDIVerifyBugsReportFilePath(OrigDIVerifyBugsReportFilePath),
+ StatsMap(StatsMap), DebugInfoBeforePass(DebugInfoBeforePass), Mode(Mode),
+ Strip(Strip) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ }
+
+ static char ID; // Pass identification.
+
+private:
+ StringRef NameOfWrappedPass;
+ StringRef OrigDIVerifyBugsReportFilePath;
+ DebugifyStatsMap *StatsMap;
+ DebugInfoPerPass *DebugInfoBeforePass;
+ enum DebugifyMode Mode;
+ bool Strip;
+};
+
+} // end anonymous namespace
+
+void llvm::exportDebugifyStats(StringRef Path, const DebugifyStatsMap &Map) {
+ std::error_code EC;
+ raw_fd_ostream OS{Path, EC};
+ if (EC) {
+ errs() << "Could not open file: " << EC.message() << ", " << Path << '\n';
+ return;
+ }
+
+ OS << "Pass Name" << ',' << "# of missing debug values" << ','
+ << "# of missing locations" << ',' << "Missing/Expected value ratio" << ','
+ << "Missing/Expected location ratio" << '\n';
+ for (const auto &Entry : Map) {
+ StringRef Pass = Entry.first;
+ DebugifyStatistics Stats = Entry.second;
+
+ OS << Pass << ',' << Stats.NumDbgValuesMissing << ','
+ << Stats.NumDbgLocsMissing << ',' << Stats.getMissingValueRatio() << ','
+ << Stats.getEmptyLocationRatio() << '\n';
+ }
+}
+
+ModulePass *createDebugifyModulePass(enum DebugifyMode Mode,
+ llvm::StringRef NameOfWrappedPass,
+ DebugInfoPerPass *DebugInfoBeforePass) {
+ if (Mode == DebugifyMode::SyntheticDebugInfo)
+ return new DebugifyModulePass();
+ assert(Mode == DebugifyMode::OriginalDebugInfo && "Must be original mode");
+ return new DebugifyModulePass(Mode, NameOfWrappedPass, DebugInfoBeforePass);
+}
+
+FunctionPass *
+createDebugifyFunctionPass(enum DebugifyMode Mode,
+ llvm::StringRef NameOfWrappedPass,
+ DebugInfoPerPass *DebugInfoBeforePass) {
+ if (Mode == DebugifyMode::SyntheticDebugInfo)
+ return new DebugifyFunctionPass();
+ assert(Mode == DebugifyMode::OriginalDebugInfo && "Must be original mode");
+ return new DebugifyFunctionPass(Mode, NameOfWrappedPass, DebugInfoBeforePass);
+}
+
+PreservedAnalyses NewPMDebugifyPass::run(Module &M, ModuleAnalysisManager &) {
+ if (Mode == DebugifyMode::SyntheticDebugInfo)
+ applyDebugifyMetadata(M, M.functions(),
+ "ModuleDebugify: ", /*ApplyToMF*/ nullptr);
+ else
+ collectDebugInfoMetadata(M, M.functions(), *DebugInfoBeforePass,
+ "ModuleDebugify (original debuginfo)",
+ NameOfWrappedPass);
+ return PreservedAnalyses::all();
+}
+
+ModulePass *createCheckDebugifyModulePass(
+ bool Strip, StringRef NameOfWrappedPass, DebugifyStatsMap *StatsMap,
+ enum DebugifyMode Mode, DebugInfoPerPass *DebugInfoBeforePass,
+ StringRef OrigDIVerifyBugsReportFilePath) {
+ if (Mode == DebugifyMode::SyntheticDebugInfo)
+ return new CheckDebugifyModulePass(Strip, NameOfWrappedPass, StatsMap);
+ assert(Mode == DebugifyMode::OriginalDebugInfo && "Must be original mode");
+ return new CheckDebugifyModulePass(false, NameOfWrappedPass, nullptr, Mode,
+ DebugInfoBeforePass,
+ OrigDIVerifyBugsReportFilePath);
+}
+
+FunctionPass *createCheckDebugifyFunctionPass(
+ bool Strip, StringRef NameOfWrappedPass, DebugifyStatsMap *StatsMap,
+ enum DebugifyMode Mode, DebugInfoPerPass *DebugInfoBeforePass,
+ StringRef OrigDIVerifyBugsReportFilePath) {
+ if (Mode == DebugifyMode::SyntheticDebugInfo)
+ return new CheckDebugifyFunctionPass(Strip, NameOfWrappedPass, StatsMap);
+ assert(Mode == DebugifyMode::OriginalDebugInfo && "Must be original mode");
+ return new CheckDebugifyFunctionPass(false, NameOfWrappedPass, nullptr, Mode,
+ DebugInfoBeforePass,
+ OrigDIVerifyBugsReportFilePath);
+}
+
+PreservedAnalyses NewPMCheckDebugifyPass::run(Module &M,
+ ModuleAnalysisManager &) {
+ if (Mode == DebugifyMode::SyntheticDebugInfo)
+ checkDebugifyMetadata(M, M.functions(), NameOfWrappedPass,
+ "CheckModuleDebugify", Strip, StatsMap);
+ else
+ checkDebugInfoMetadata(
+ M, M.functions(), *DebugInfoBeforePass,
+ "CheckModuleDebugify (original debuginfo)", NameOfWrappedPass,
+ OrigDIVerifyBugsReportFilePath);
+ return PreservedAnalyses::all();
+}
+
+static bool isIgnoredPass(StringRef PassID) {
+ return isSpecialPass(PassID, {"PassManager", "PassAdaptor",
+ "AnalysisManagerProxy", "PrintFunctionPass",
+ "PrintModulePass", "BitcodeWriterPass",
+ "ThinLTOBitcodeWriterPass", "VerifierPass"});
+}
+
+void DebugifyEachInstrumentation::registerCallbacks(
+ PassInstrumentationCallbacks &PIC) {
+ PIC.registerBeforeNonSkippedPassCallback([this](StringRef P, Any IR) {
+ if (isIgnoredPass(P))
+ return;
+ if (const auto **F = any_cast<const Function *>(&IR))
+ applyDebugify(*const_cast<Function *>(*F),
+ Mode, DebugInfoBeforePass, P);
+ else if (const auto **M = any_cast<const Module *>(&IR))
+ applyDebugify(*const_cast<Module *>(*M),
+ Mode, DebugInfoBeforePass, P);
+ });
+ PIC.registerAfterPassCallback([this](StringRef P, Any IR,
+ const PreservedAnalyses &PassPA) {
+ if (isIgnoredPass(P))
+ return;
+ if (const auto **CF = any_cast<const Function *>(&IR)) {
+ auto &F = *const_cast<Function *>(*CF);
+ Module &M = *F.getParent();
+ auto It = F.getIterator();
+ if (Mode == DebugifyMode::SyntheticDebugInfo)
+ checkDebugifyMetadata(M, make_range(It, std::next(It)), P,
+ "CheckFunctionDebugify", /*Strip=*/true, DIStatsMap);
+ else
+ checkDebugInfoMetadata(
+ M, make_range(It, std::next(It)), *DebugInfoBeforePass,
+ "CheckModuleDebugify (original debuginfo)",
+ P, OrigDIVerifyBugsReportFilePath);
+ } else if (const auto **CM = any_cast<const Module *>(&IR)) {
+ auto &M = *const_cast<Module *>(*CM);
+ if (Mode == DebugifyMode::SyntheticDebugInfo)
+ checkDebugifyMetadata(M, M.functions(), P, "CheckModuleDebugify",
+ /*Strip=*/true, DIStatsMap);
+ else
+ checkDebugInfoMetadata(
+ M, M.functions(), *DebugInfoBeforePass,
+ "CheckModuleDebugify (original debuginfo)",
+ P, OrigDIVerifyBugsReportFilePath);
+ }
+ });
+}
+
+char DebugifyModulePass::ID = 0;
+static RegisterPass<DebugifyModulePass> DM("debugify",
+ "Attach debug info to everything");
+
+char CheckDebugifyModulePass::ID = 0;
+static RegisterPass<CheckDebugifyModulePass>
+ CDM("check-debugify", "Check debug info from -debugify");
+
+char DebugifyFunctionPass::ID = 0;
+static RegisterPass<DebugifyFunctionPass> DF("debugify-function",
+ "Attach debug info to a function");
+
+char CheckDebugifyFunctionPass::ID = 0;
+static RegisterPass<CheckDebugifyFunctionPass>
+ CDF("check-debugify-function", "Check debug info from -debugify-function");
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/DemoteRegToStack.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/DemoteRegToStack.cpp
new file mode 100644
index 0000000000..086ea088dc
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/DemoteRegToStack.cpp
@@ -0,0 +1,172 @@
+//===- DemoteRegToStack.cpp - Move a virtual register to the stack --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+using namespace llvm;
+
+/// DemoteRegToStack - This function takes a virtual register computed by an
+/// Instruction and replaces it with a slot in the stack frame, allocated via
+/// alloca. This allows the CFG to be changed around without fear of
+/// invalidating the SSA information for the value. It returns the pointer to
+/// the alloca inserted to create a stack slot for I.
+AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
+ Instruction *AllocaPoint) {
+ if (I.use_empty()) {
+ I.eraseFromParent();
+ return nullptr;
+ }
+
+ Function *F = I.getParent()->getParent();
+ const DataLayout &DL = F->getParent()->getDataLayout();
+
+ // Create a stack slot to hold the value.
+ AllocaInst *Slot;
+ if (AllocaPoint) {
+ Slot = new AllocaInst(I.getType(), DL.getAllocaAddrSpace(), nullptr,
+ I.getName()+".reg2mem", AllocaPoint);
+ } else {
+ Slot = new AllocaInst(I.getType(), DL.getAllocaAddrSpace(), nullptr,
+ I.getName() + ".reg2mem", &F->getEntryBlock().front());
+ }
+
+ // We cannot demote invoke instructions to the stack if their normal edge
+ // is critical. Therefore, split the critical edge and create a basic block
+ // into which the store can be inserted.
+ if (InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
+ if (!II->getNormalDest()->getSinglePredecessor()) {
+ unsigned SuccNum = GetSuccessorNumber(II->getParent(), II->getNormalDest());
+ assert(isCriticalEdge(II, SuccNum) && "Expected a critical edge!");
+ BasicBlock *BB = SplitCriticalEdge(II, SuccNum);
+ assert(BB && "Unable to split critical edge.");
+ (void)BB;
+ }
+ }
+
+ // Change all of the users of the instruction to read from the stack slot.
+ while (!I.use_empty()) {
+ Instruction *U = cast<Instruction>(I.user_back());
+ if (PHINode *PN = dyn_cast<PHINode>(U)) {
+ // If this is a PHI node, we can't insert a load of the value before the
+ // use. Instead insert the load in the predecessor block corresponding
+ // to the incoming value.
+ //
+ // Note that if there are multiple edges from a basic block to this PHI
+ // node that we cannot have multiple loads. The problem is that the
+ // resulting PHI node will have multiple values (from each load) coming in
+ // from the same block, which is illegal SSA form. For this reason, we
+ // keep track of and reuse loads we insert.
+ DenseMap<BasicBlock*, Value*> Loads;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingValue(i) == &I) {
+ Value *&V = Loads[PN->getIncomingBlock(i)];
+ if (!V) {
+ // Insert the load into the predecessor block
+ V = new LoadInst(I.getType(), Slot, I.getName() + ".reload",
+ VolatileLoads,
+ PN->getIncomingBlock(i)->getTerminator());
+ }
+ PN->setIncomingValue(i, V);
+ }
+
+ } else {
+ // If this is a normal instruction, just insert a load.
+ Value *V = new LoadInst(I.getType(), Slot, I.getName() + ".reload",
+ VolatileLoads, U);
+ U->replaceUsesOfWith(&I, V);
+ }
+ }
+
+ // Insert stores of the computed value into the stack slot. We have to be
+ // careful if I is an invoke instruction, because we can't insert the store
+ // AFTER the terminator instruction.
+ BasicBlock::iterator InsertPt;
+ if (!I.isTerminator()) {
+ InsertPt = ++I.getIterator();
+ // Don't insert before PHI nodes or landingpad instrs.
+ for (; isa<PHINode>(InsertPt) || InsertPt->isEHPad(); ++InsertPt)
+ if (isa<CatchSwitchInst>(InsertPt))
+ break;
+ if (isa<CatchSwitchInst>(InsertPt)) {
+ for (BasicBlock *Handler : successors(&*InsertPt))
+ new StoreInst(&I, Slot, &*Handler->getFirstInsertionPt());
+ return Slot;
+ }
+ } else {
+ InvokeInst &II = cast<InvokeInst>(I);
+ InsertPt = II.getNormalDest()->getFirstInsertionPt();
+ }
+
+ new StoreInst(&I, Slot, &*InsertPt);
+ return Slot;
+}
+
+/// DemotePHIToStack - This function takes a virtual register computed by a PHI
+/// node and replaces it with a slot in the stack frame allocated via alloca.
+/// The PHI node is deleted. It returns the pointer to the alloca inserted.
+AllocaInst *llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) {
+ if (P->use_empty()) {
+ P->eraseFromParent();
+ return nullptr;
+ }
+
+ const DataLayout &DL = P->getModule()->getDataLayout();
+
+ // Create a stack slot to hold the value.
+ AllocaInst *Slot;
+ if (AllocaPoint) {
+ Slot = new AllocaInst(P->getType(), DL.getAllocaAddrSpace(), nullptr,
+ P->getName()+".reg2mem", AllocaPoint);
+ } else {
+ Function *F = P->getParent()->getParent();
+ Slot = new AllocaInst(P->getType(), DL.getAllocaAddrSpace(), nullptr,
+ P->getName() + ".reg2mem",
+ &F->getEntryBlock().front());
+ }
+
+ // Iterate over each operand inserting a store in each predecessor.
+ for (unsigned i = 0, e = P->getNumIncomingValues(); i < e; ++i) {
+ if (InvokeInst *II = dyn_cast<InvokeInst>(P->getIncomingValue(i))) {
+ assert(II->getParent() != P->getIncomingBlock(i) &&
+ "Invoke edge not supported yet"); (void)II;
+ }
+ new StoreInst(P->getIncomingValue(i), Slot,
+ P->getIncomingBlock(i)->getTerminator());
+ }
+
+ // Insert a load in place of the PHI and replace all uses.
+ BasicBlock::iterator InsertPt = P->getIterator();
+ // Don't insert before PHI nodes or landingpad instrs.
+ for (; isa<PHINode>(InsertPt) || InsertPt->isEHPad(); ++InsertPt)
+ if (isa<CatchSwitchInst>(InsertPt))
+ break;
+ if (isa<CatchSwitchInst>(InsertPt)) {
+ // We need a separate load before each actual use of the PHI
+ SmallVector<Instruction *, 4> Users;
+ for (User *U : P->users()) {
+ Instruction *User = cast<Instruction>(U);
+ Users.push_back(User);
+ }
+ for (Instruction *User : Users) {
+ Value *V =
+ new LoadInst(P->getType(), Slot, P->getName() + ".reload", User);
+ User->replaceUsesOfWith(P, V);
+ }
+ } else {
+ Value *V =
+ new LoadInst(P->getType(), Slot, P->getName() + ".reload", &*InsertPt);
+ P->replaceAllUsesWith(V);
+ }
+ // Delete PHI.
+ P->eraseFromParent();
+ return Slot;
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/EntryExitInstrumenter.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/EntryExitInstrumenter.cpp
new file mode 100644
index 0000000000..53af1b1969
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/EntryExitInstrumenter.cpp
@@ -0,0 +1,152 @@
+//===- EntryExitInstrumenter.cpp - Function Entry/Exit Instrumentation ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/EntryExitInstrumenter.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils.h"
+
+using namespace llvm;
+
+static void insertCall(Function &CurFn, StringRef Func,
+ Instruction *InsertionPt, DebugLoc DL) {
+ Module &M = *InsertionPt->getParent()->getParent()->getParent();
+ LLVMContext &C = InsertionPt->getParent()->getContext();
+
+ if (Func == "mcount" ||
+ Func == ".mcount" ||
+ Func == "llvm.arm.gnu.eabi.mcount" ||
+ Func == "\01_mcount" ||
+ Func == "\01mcount" ||
+ Func == "__mcount" ||
+ Func == "_mcount" ||
+ Func == "__cyg_profile_func_enter_bare") {
+ Triple TargetTriple(M.getTargetTriple());
+ if (TargetTriple.isOSAIX() && Func == "__mcount") {
+ Type *SizeTy = M.getDataLayout().getIntPtrType(C);
+ Type *SizePtrTy = SizeTy->getPointerTo();
+ GlobalVariable *GV = new GlobalVariable(M, SizeTy, /*isConstant=*/false,
+ GlobalValue::InternalLinkage,
+ ConstantInt::get(SizeTy, 0));
+ CallInst *Call = CallInst::Create(
+ M.getOrInsertFunction(Func,
+ FunctionType::get(Type::getVoidTy(C), {SizePtrTy},
+ /*isVarArg=*/false)),
+ {GV}, "", InsertionPt);
+ Call->setDebugLoc(DL);
+ } else {
+ FunctionCallee Fn = M.getOrInsertFunction(Func, Type::getVoidTy(C));
+ CallInst *Call = CallInst::Create(Fn, "", InsertionPt);
+ Call->setDebugLoc(DL);
+ }
+ return;
+ }
+
+ if (Func == "__cyg_profile_func_enter" || Func == "__cyg_profile_func_exit") {
+ Type *ArgTypes[] = {Type::getInt8PtrTy(C), Type::getInt8PtrTy(C)};
+
+ FunctionCallee Fn = M.getOrInsertFunction(
+ Func, FunctionType::get(Type::getVoidTy(C), ArgTypes, false));
+
+ Instruction *RetAddr = CallInst::Create(
+ Intrinsic::getDeclaration(&M, Intrinsic::returnaddress),
+ ArrayRef<Value *>(ConstantInt::get(Type::getInt32Ty(C), 0)), "",
+ InsertionPt);
+ RetAddr->setDebugLoc(DL);
+
+ Value *Args[] = {ConstantExpr::getBitCast(&CurFn, Type::getInt8PtrTy(C)),
+ RetAddr};
+
+ CallInst *Call =
+ CallInst::Create(Fn, ArrayRef<Value *>(Args), "", InsertionPt);
+ Call->setDebugLoc(DL);
+ return;
+ }
+
+ // We only know how to call a fixed set of instrumentation functions, because
+ // they all expect different arguments, etc.
+ report_fatal_error(Twine("Unknown instrumentation function: '") + Func + "'");
+}
+
+static bool runOnFunction(Function &F, bool PostInlining) {
+ StringRef EntryAttr = PostInlining ? "instrument-function-entry-inlined"
+ : "instrument-function-entry";
+
+ StringRef ExitAttr = PostInlining ? "instrument-function-exit-inlined"
+ : "instrument-function-exit";
+
+ StringRef EntryFunc = F.getFnAttribute(EntryAttr).getValueAsString();
+ StringRef ExitFunc = F.getFnAttribute(ExitAttr).getValueAsString();
+
+ bool Changed = false;
+
+ // If the attribute is specified, insert instrumentation and then "consume"
+ // the attribute so that it's not inserted again if the pass should happen to
+ // run later for some reason.
+
+ if (!EntryFunc.empty()) {
+ DebugLoc DL;
+ if (auto SP = F.getSubprogram())
+ DL = DILocation::get(SP->getContext(), SP->getScopeLine(), 0, SP);
+
+ insertCall(F, EntryFunc, &*F.begin()->getFirstInsertionPt(), DL);
+ Changed = true;
+ F.removeFnAttr(EntryAttr);
+ }
+
+ if (!ExitFunc.empty()) {
+ for (BasicBlock &BB : F) {
+ Instruction *T = BB.getTerminator();
+ if (!isa<ReturnInst>(T))
+ continue;
+
+ // If T is preceded by a musttail call, that's the real terminator.
+ if (CallInst *CI = BB.getTerminatingMustTailCall())
+ T = CI;
+
+ DebugLoc DL;
+ if (DebugLoc TerminatorDL = T->getDebugLoc())
+ DL = TerminatorDL;
+ else if (auto SP = F.getSubprogram())
+ DL = DILocation::get(SP->getContext(), 0, 0, SP);
+
+ insertCall(F, ExitFunc, T, DL);
+ Changed = true;
+ }
+ F.removeFnAttr(ExitAttr);
+ }
+
+ return Changed;
+}
+
+PreservedAnalyses
+llvm::EntryExitInstrumenterPass::run(Function &F, FunctionAnalysisManager &AM) {
+ runOnFunction(F, PostInlining);
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+}
+
+void llvm::EntryExitInstrumenterPass::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ static_cast<PassInfoMixin<llvm::EntryExitInstrumenterPass> *>(this)
+ ->printPipeline(OS, MapClassName2PassName);
+ OS << "<";
+ if (PostInlining)
+ OS << "post-inline";
+ OS << ">";
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/EscapeEnumerator.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/EscapeEnumerator.cpp
new file mode 100644
index 0000000000..91053338df
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/EscapeEnumerator.cpp
@@ -0,0 +1,98 @@
+//===- EscapeEnumerator.cpp -----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines a helper class that enumerates all possible exits from a function,
+// including exception handling.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/EscapeEnumerator.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Transforms/Utils/Local.h"
+
+using namespace llvm;
+
+static FunctionCallee getDefaultPersonalityFn(Module *M) {
+ LLVMContext &C = M->getContext();
+ Triple T(M->getTargetTriple());
+ EHPersonality Pers = getDefaultEHPersonality(T);
+ return M->getOrInsertFunction(getEHPersonalityName(Pers),
+ FunctionType::get(Type::getInt32Ty(C), true));
+}
+
+IRBuilder<> *EscapeEnumerator::Next() {
+ if (Done)
+ return nullptr;
+
+ // Find all 'return', 'resume', and 'unwind' instructions.
+ while (StateBB != StateE) {
+ BasicBlock *CurBB = &*StateBB++;
+
+ // Branches and invokes do not escape, only unwind, resume, and return
+ // do.
+ Instruction *TI = CurBB->getTerminator();
+ if (!isa<ReturnInst>(TI) && !isa<ResumeInst>(TI))
+ continue;
+
+ if (CallInst *CI = CurBB->getTerminatingMustTailCall())
+ TI = CI;
+ Builder.SetInsertPoint(TI);
+ return &Builder;
+ }
+
+ Done = true;
+
+ if (!HandleExceptions)
+ return nullptr;
+
+ if (F.doesNotThrow())
+ return nullptr;
+
+ // Find all 'call' instructions that may throw.
+ // We cannot tranform calls with musttail tag.
+ SmallVector<Instruction *, 16> Calls;
+ for (BasicBlock &BB : F)
+ for (Instruction &II : BB)
+ if (CallInst *CI = dyn_cast<CallInst>(&II))
+ if (!CI->doesNotThrow() && !CI->isMustTailCall())
+ Calls.push_back(CI);
+
+ if (Calls.empty())
+ return nullptr;
+
+ // Create a cleanup block.
+ LLVMContext &C = F.getContext();
+ BasicBlock *CleanupBB = BasicBlock::Create(C, CleanupBBName, &F);
+ Type *ExnTy = StructType::get(Type::getInt8PtrTy(C), Type::getInt32Ty(C));
+ if (!F.hasPersonalityFn()) {
+ FunctionCallee PersFn = getDefaultPersonalityFn(F.getParent());
+ F.setPersonalityFn(cast<Constant>(PersFn.getCallee()));
+ }
+
+ if (isScopedEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) {
+ report_fatal_error("Scoped EH not supported");
+ }
+
+ LandingPadInst *LPad =
+ LandingPadInst::Create(ExnTy, 1, "cleanup.lpad", CleanupBB);
+ LPad->setCleanup(true);
+ ResumeInst *RI = ResumeInst::Create(LPad, CleanupBB);
+
+ // Transform the 'call' instructions into 'invoke's branching to the
+ // cleanup block. Go in reverse order to make prettier BB names.
+ SmallVector<Value *, 16> Args;
+ for (unsigned I = Calls.size(); I != 0;) {
+ CallInst *CI = cast<CallInst>(Calls[--I]);
+ changeToInvokeAndSplitBasicBlock(CI, CleanupBB, DTU);
+ }
+
+ Builder.SetInsertPoint(RI);
+ return &Builder;
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/Evaluator.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/Evaluator.cpp
new file mode 100644
index 0000000000..dc58bebd72
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/Evaluator.cpp
@@ -0,0 +1,688 @@
+//===- Evaluator.cpp - LLVM IR evaluator ----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Function evaluator for LLVM IR.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/Evaluator.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define DEBUG_TYPE "evaluator"
+
+using namespace llvm;
+
+static inline bool
+isSimpleEnoughValueToCommit(Constant *C,
+ SmallPtrSetImpl<Constant *> &SimpleConstants,
+ const DataLayout &DL);
+
+/// Return true if the specified constant can be handled by the code generator.
+/// We don't want to generate something like:
+/// void *X = &X/42;
+/// because the code generator doesn't have a relocation that can handle that.
+///
+/// This function should be called if C was not found (but just got inserted)
+/// in SimpleConstants to avoid having to rescan the same constants all the
+/// time.
+static bool
+isSimpleEnoughValueToCommitHelper(Constant *C,
+ SmallPtrSetImpl<Constant *> &SimpleConstants,
+ const DataLayout &DL) {
+ // Simple global addresses are supported, do not allow dllimport or
+ // thread-local globals.
+ if (auto *GV = dyn_cast<GlobalValue>(C))
+ return !GV->hasDLLImportStorageClass() && !GV->isThreadLocal();
+
+ // Simple integer, undef, constant aggregate zero, etc are all supported.
+ if (C->getNumOperands() == 0 || isa<BlockAddress>(C))
+ return true;
+
+ // Aggregate values are safe if all their elements are.
+ if (isa<ConstantAggregate>(C)) {
+ for (Value *Op : C->operands())
+ if (!isSimpleEnoughValueToCommit(cast<Constant>(Op), SimpleConstants, DL))
+ return false;
+ return true;
+ }
+
+ // We don't know exactly what relocations are allowed in constant expressions,
+ // so we allow &global+constantoffset, which is safe and uniformly supported
+ // across targets.
+ ConstantExpr *CE = cast<ConstantExpr>(C);
+ switch (CE->getOpcode()) {
+ case Instruction::BitCast:
+ // Bitcast is fine if the casted value is fine.
+ return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL);
+
+ case Instruction::IntToPtr:
+ case Instruction::PtrToInt:
+ // int <=> ptr is fine if the int type is the same size as the
+ // pointer type.
+ if (DL.getTypeSizeInBits(CE->getType()) !=
+ DL.getTypeSizeInBits(CE->getOperand(0)->getType()))
+ return false;
+ return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL);
+
+ // GEP is fine if it is simple + constant offset.
+ case Instruction::GetElementPtr:
+ for (unsigned i = 1, e = CE->getNumOperands(); i != e; ++i)
+ if (!isa<ConstantInt>(CE->getOperand(i)))
+ return false;
+ return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL);
+
+ case Instruction::Add:
+ // We allow simple+cst.
+ if (!isa<ConstantInt>(CE->getOperand(1)))
+ return false;
+ return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL);
+ }
+ return false;
+}
+
+static inline bool
+isSimpleEnoughValueToCommit(Constant *C,
+ SmallPtrSetImpl<Constant *> &SimpleConstants,
+ const DataLayout &DL) {
+ // If we already checked this constant, we win.
+ if (!SimpleConstants.insert(C).second)
+ return true;
+ // Check the constant.
+ return isSimpleEnoughValueToCommitHelper(C, SimpleConstants, DL);
+}
+
+void Evaluator::MutableValue::clear() {
+ if (auto *Agg = Val.dyn_cast<MutableAggregate *>())
+ delete Agg;
+ Val = nullptr;
+}
+
+Constant *Evaluator::MutableValue::read(Type *Ty, APInt Offset,
+ const DataLayout &DL) const {
+ TypeSize TySize = DL.getTypeStoreSize(Ty);
+ const MutableValue *V = this;
+ while (const auto *Agg = V->Val.dyn_cast<MutableAggregate *>()) {
+ Type *AggTy = Agg->Ty;
+ std::optional<APInt> Index = DL.getGEPIndexForOffset(AggTy, Offset);
+ if (!Index || Index->uge(Agg->Elements.size()) ||
+ !TypeSize::isKnownLE(TySize, DL.getTypeStoreSize(AggTy)))
+ return nullptr;
+
+ V = &Agg->Elements[Index->getZExtValue()];
+ }
+
+ return ConstantFoldLoadFromConst(V->Val.get<Constant *>(), Ty, Offset, DL);
+}
+
+bool Evaluator::MutableValue::makeMutable() {
+ Constant *C = Val.get<Constant *>();
+ Type *Ty = C->getType();
+ unsigned NumElements;
+ if (auto *VT = dyn_cast<FixedVectorType>(Ty)) {
+ NumElements = VT->getNumElements();
+ } else if (auto *AT = dyn_cast<ArrayType>(Ty))
+ NumElements = AT->getNumElements();
+ else if (auto *ST = dyn_cast<StructType>(Ty))
+ NumElements = ST->getNumElements();
+ else
+ return false;
+
+ MutableAggregate *MA = new MutableAggregate(Ty);
+ MA->Elements.reserve(NumElements);
+ for (unsigned I = 0; I < NumElements; ++I)
+ MA->Elements.push_back(C->getAggregateElement(I));
+ Val = MA;
+ return true;
+}
+
+bool Evaluator::MutableValue::write(Constant *V, APInt Offset,
+ const DataLayout &DL) {
+ Type *Ty = V->getType();
+ TypeSize TySize = DL.getTypeStoreSize(Ty);
+ MutableValue *MV = this;
+ while (Offset != 0 ||
+ !CastInst::isBitOrNoopPointerCastable(Ty, MV->getType(), DL)) {
+ if (MV->Val.is<Constant *>() && !MV->makeMutable())
+ return false;
+
+ MutableAggregate *Agg = MV->Val.get<MutableAggregate *>();
+ Type *AggTy = Agg->Ty;
+ std::optional<APInt> Index = DL.getGEPIndexForOffset(AggTy, Offset);
+ if (!Index || Index->uge(Agg->Elements.size()) ||
+ !TypeSize::isKnownLE(TySize, DL.getTypeStoreSize(AggTy)))
+ return false;
+
+ MV = &Agg->Elements[Index->getZExtValue()];
+ }
+
+ Type *MVType = MV->getType();
+ MV->clear();
+ if (Ty->isIntegerTy() && MVType->isPointerTy())
+ MV->Val = ConstantExpr::getIntToPtr(V, MVType);
+ else if (Ty->isPointerTy() && MVType->isIntegerTy())
+ MV->Val = ConstantExpr::getPtrToInt(V, MVType);
+ else if (Ty != MVType)
+ MV->Val = ConstantExpr::getBitCast(V, MVType);
+ else
+ MV->Val = V;
+ return true;
+}
+
+Constant *Evaluator::MutableAggregate::toConstant() const {
+ SmallVector<Constant *, 32> Consts;
+ for (const MutableValue &MV : Elements)
+ Consts.push_back(MV.toConstant());
+
+ if (auto *ST = dyn_cast<StructType>(Ty))
+ return ConstantStruct::get(ST, Consts);
+ if (auto *AT = dyn_cast<ArrayType>(Ty))
+ return ConstantArray::get(AT, Consts);
+ assert(isa<FixedVectorType>(Ty) && "Must be vector");
+ return ConstantVector::get(Consts);
+}
+
+/// Return the value that would be computed by a load from P after the stores
+/// reflected by 'memory' have been performed. If we can't decide, return null.
+Constant *Evaluator::ComputeLoadResult(Constant *P, Type *Ty) {
+ APInt Offset(DL.getIndexTypeSizeInBits(P->getType()), 0);
+ P = cast<Constant>(P->stripAndAccumulateConstantOffsets(
+ DL, Offset, /* AllowNonInbounds */ true));
+ Offset = Offset.sextOrTrunc(DL.getIndexTypeSizeInBits(P->getType()));
+ if (auto *GV = dyn_cast<GlobalVariable>(P))
+ return ComputeLoadResult(GV, Ty, Offset);
+ return nullptr;
+}
+
+Constant *Evaluator::ComputeLoadResult(GlobalVariable *GV, Type *Ty,
+ const APInt &Offset) {
+ auto It = MutatedMemory.find(GV);
+ if (It != MutatedMemory.end())
+ return It->second.read(Ty, Offset, DL);
+
+ if (!GV->hasDefinitiveInitializer())
+ return nullptr;
+ return ConstantFoldLoadFromConst(GV->getInitializer(), Ty, Offset, DL);
+}
+
+static Function *getFunction(Constant *C) {
+ if (auto *Fn = dyn_cast<Function>(C))
+ return Fn;
+
+ if (auto *Alias = dyn_cast<GlobalAlias>(C))
+ if (auto *Fn = dyn_cast<Function>(Alias->getAliasee()))
+ return Fn;
+ return nullptr;
+}
+
+Function *
+Evaluator::getCalleeWithFormalArgs(CallBase &CB,
+ SmallVectorImpl<Constant *> &Formals) {
+ auto *V = CB.getCalledOperand()->stripPointerCasts();
+ if (auto *Fn = getFunction(getVal(V)))
+ return getFormalParams(CB, Fn, Formals) ? Fn : nullptr;
+ return nullptr;
+}
+
+bool Evaluator::getFormalParams(CallBase &CB, Function *F,
+ SmallVectorImpl<Constant *> &Formals) {
+ if (!F)
+ return false;
+
+ auto *FTy = F->getFunctionType();
+ if (FTy->getNumParams() > CB.arg_size()) {
+ LLVM_DEBUG(dbgs() << "Too few arguments for function.\n");
+ return false;
+ }
+
+ auto ArgI = CB.arg_begin();
+ for (Type *PTy : FTy->params()) {
+ auto *ArgC = ConstantFoldLoadThroughBitcast(getVal(*ArgI), PTy, DL);
+ if (!ArgC) {
+ LLVM_DEBUG(dbgs() << "Can not convert function argument.\n");
+ return false;
+ }
+ Formals.push_back(ArgC);
+ ++ArgI;
+ }
+ return true;
+}
+
+/// If call expression contains bitcast then we may need to cast
+/// evaluated return value to a type of the call expression.
+Constant *Evaluator::castCallResultIfNeeded(Type *ReturnType, Constant *RV) {
+ if (!RV || RV->getType() == ReturnType)
+ return RV;
+
+ RV = ConstantFoldLoadThroughBitcast(RV, ReturnType, DL);
+ if (!RV)
+ LLVM_DEBUG(dbgs() << "Failed to fold bitcast call expr\n");
+ return RV;
+}
+
+/// Evaluate all instructions in block BB, returning true if successful, false
+/// if we can't evaluate it. NewBB returns the next BB that control flows into,
+/// or null upon return. StrippedPointerCastsForAliasAnalysis is set to true if
+/// we looked through pointer casts to evaluate something.
+bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB,
+ bool &StrippedPointerCastsForAliasAnalysis) {
+ // This is the main evaluation loop.
+ while (true) {
+ Constant *InstResult = nullptr;
+
+ LLVM_DEBUG(dbgs() << "Evaluating Instruction: " << *CurInst << "\n");
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) {
+ if (SI->isVolatile()) {
+ LLVM_DEBUG(dbgs() << "Store is volatile! Can not evaluate.\n");
+ return false; // no volatile accesses.
+ }
+ Constant *Ptr = getVal(SI->getOperand(1));
+ Constant *FoldedPtr = ConstantFoldConstant(Ptr, DL, TLI);
+ if (Ptr != FoldedPtr) {
+ LLVM_DEBUG(dbgs() << "Folding constant ptr expression: " << *Ptr);
+ Ptr = FoldedPtr;
+ LLVM_DEBUG(dbgs() << "; To: " << *Ptr << "\n");
+ }
+
+ APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
+ Ptr = cast<Constant>(Ptr->stripAndAccumulateConstantOffsets(
+ DL, Offset, /* AllowNonInbounds */ true));
+ Offset = Offset.sextOrTrunc(DL.getIndexTypeSizeInBits(Ptr->getType()));
+ auto *GV = dyn_cast<GlobalVariable>(Ptr);
+ if (!GV || !GV->hasUniqueInitializer()) {
+ LLVM_DEBUG(dbgs() << "Store is not to global with unique initializer: "
+ << *Ptr << "\n");
+ return false;
+ }
+
+ // If this might be too difficult for the backend to handle (e.g. the addr
+ // of one global variable divided by another) then we can't commit it.
+ Constant *Val = getVal(SI->getOperand(0));
+ if (!isSimpleEnoughValueToCommit(Val, SimpleConstants, DL)) {
+ LLVM_DEBUG(dbgs() << "Store value is too complex to evaluate store. "
+ << *Val << "\n");
+ return false;
+ }
+
+ auto Res = MutatedMemory.try_emplace(GV, GV->getInitializer());
+ if (!Res.first->second.write(Val, Offset, DL))
+ return false;
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) {
+ if (LI->isVolatile()) {
+ LLVM_DEBUG(
+ dbgs() << "Found a Load! Volatile load, can not evaluate.\n");
+ return false; // no volatile accesses.
+ }
+
+ Constant *Ptr = getVal(LI->getOperand(0));
+ Constant *FoldedPtr = ConstantFoldConstant(Ptr, DL, TLI);
+ if (Ptr != FoldedPtr) {
+ Ptr = FoldedPtr;
+ LLVM_DEBUG(dbgs() << "Found a constant pointer expression, constant "
+ "folding: "
+ << *Ptr << "\n");
+ }
+ InstResult = ComputeLoadResult(Ptr, LI->getType());
+ if (!InstResult) {
+ LLVM_DEBUG(
+ dbgs() << "Failed to compute load result. Can not evaluate load."
+ "\n");
+ return false; // Could not evaluate load.
+ }
+
+ LLVM_DEBUG(dbgs() << "Evaluated load: " << *InstResult << "\n");
+ } else if (AllocaInst *AI = dyn_cast<AllocaInst>(CurInst)) {
+ if (AI->isArrayAllocation()) {
+ LLVM_DEBUG(dbgs() << "Found an array alloca. Can not evaluate.\n");
+ return false; // Cannot handle array allocs.
+ }
+ Type *Ty = AI->getAllocatedType();
+ AllocaTmps.push_back(std::make_unique<GlobalVariable>(
+ Ty, false, GlobalValue::InternalLinkage, UndefValue::get(Ty),
+ AI->getName(), /*TLMode=*/GlobalValue::NotThreadLocal,
+ AI->getType()->getPointerAddressSpace()));
+ InstResult = AllocaTmps.back().get();
+ LLVM_DEBUG(dbgs() << "Found an alloca. Result: " << *InstResult << "\n");
+ } else if (isa<CallInst>(CurInst) || isa<InvokeInst>(CurInst)) {
+ CallBase &CB = *cast<CallBase>(&*CurInst);
+
+ // Debug info can safely be ignored here.
+ if (isa<DbgInfoIntrinsic>(CB)) {
+ LLVM_DEBUG(dbgs() << "Ignoring debug info.\n");
+ ++CurInst;
+ continue;
+ }
+
+ // Cannot handle inline asm.
+ if (CB.isInlineAsm()) {
+ LLVM_DEBUG(dbgs() << "Found inline asm, can not evaluate.\n");
+ return false;
+ }
+
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CB)) {
+ if (MemSetInst *MSI = dyn_cast<MemSetInst>(II)) {
+ if (MSI->isVolatile()) {
+ LLVM_DEBUG(dbgs() << "Can not optimize a volatile memset "
+ << "intrinsic.\n");
+ return false;
+ }
+
+ auto *LenC = dyn_cast<ConstantInt>(getVal(MSI->getLength()));
+ if (!LenC) {
+ LLVM_DEBUG(dbgs() << "Memset with unknown length.\n");
+ return false;
+ }
+
+ Constant *Ptr = getVal(MSI->getDest());
+ APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
+ Ptr = cast<Constant>(Ptr->stripAndAccumulateConstantOffsets(
+ DL, Offset, /* AllowNonInbounds */ true));
+ auto *GV = dyn_cast<GlobalVariable>(Ptr);
+ if (!GV) {
+ LLVM_DEBUG(dbgs() << "Memset with unknown base.\n");
+ return false;
+ }
+
+ Constant *Val = getVal(MSI->getValue());
+ APInt Len = LenC->getValue();
+ while (Len != 0) {
+ Constant *DestVal = ComputeLoadResult(GV, Val->getType(), Offset);
+ if (DestVal != Val) {
+ LLVM_DEBUG(dbgs() << "Memset is not a no-op at offset "
+ << Offset << " of " << *GV << ".\n");
+ return false;
+ }
+ ++Offset;
+ --Len;
+ }
+
+ LLVM_DEBUG(dbgs() << "Ignoring no-op memset.\n");
+ ++CurInst;
+ continue;
+ }
+
+ if (II->isLifetimeStartOrEnd()) {
+ LLVM_DEBUG(dbgs() << "Ignoring lifetime intrinsic.\n");
+ ++CurInst;
+ continue;
+ }
+
+ if (II->getIntrinsicID() == Intrinsic::invariant_start) {
+ // We don't insert an entry into Values, as it doesn't have a
+ // meaningful return value.
+ if (!II->use_empty()) {
+ LLVM_DEBUG(dbgs()
+ << "Found unused invariant_start. Can't evaluate.\n");
+ return false;
+ }
+ ConstantInt *Size = cast<ConstantInt>(II->getArgOperand(0));
+ Value *PtrArg = getVal(II->getArgOperand(1));
+ Value *Ptr = PtrArg->stripPointerCasts();
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) {
+ Type *ElemTy = GV->getValueType();
+ if (!Size->isMinusOne() &&
+ Size->getValue().getLimitedValue() >=
+ DL.getTypeStoreSize(ElemTy)) {
+ Invariants.insert(GV);
+ LLVM_DEBUG(dbgs() << "Found a global var that is an invariant: "
+ << *GV << "\n");
+ } else {
+ LLVM_DEBUG(dbgs()
+ << "Found a global var, but can not treat it as an "
+ "invariant.\n");
+ }
+ }
+ // Continue even if we do nothing.
+ ++CurInst;
+ continue;
+ } else if (II->getIntrinsicID() == Intrinsic::assume) {
+ LLVM_DEBUG(dbgs() << "Skipping assume intrinsic.\n");
+ ++CurInst;
+ continue;
+ } else if (II->getIntrinsicID() == Intrinsic::sideeffect) {
+ LLVM_DEBUG(dbgs() << "Skipping sideeffect intrinsic.\n");
+ ++CurInst;
+ continue;
+ } else if (II->getIntrinsicID() == Intrinsic::pseudoprobe) {
+ LLVM_DEBUG(dbgs() << "Skipping pseudoprobe intrinsic.\n");
+ ++CurInst;
+ continue;
+ } else {
+ Value *Stripped = CurInst->stripPointerCastsForAliasAnalysis();
+ // Only attempt to getVal() if we've actually managed to strip
+ // anything away, or else we'll call getVal() on the current
+ // instruction.
+ if (Stripped != &*CurInst) {
+ InstResult = getVal(Stripped);
+ }
+ if (InstResult) {
+ LLVM_DEBUG(dbgs()
+ << "Stripped pointer casts for alias analysis for "
+ "intrinsic call.\n");
+ StrippedPointerCastsForAliasAnalysis = true;
+ InstResult = ConstantExpr::getBitCast(InstResult, II->getType());
+ } else {
+ LLVM_DEBUG(dbgs() << "Unknown intrinsic. Cannot evaluate.\n");
+ return false;
+ }
+ }
+ }
+
+ if (!InstResult) {
+ // Resolve function pointers.
+ SmallVector<Constant *, 8> Formals;
+ Function *Callee = getCalleeWithFormalArgs(CB, Formals);
+ if (!Callee || Callee->isInterposable()) {
+ LLVM_DEBUG(dbgs() << "Can not resolve function pointer.\n");
+ return false; // Cannot resolve.
+ }
+
+ if (Callee->isDeclaration()) {
+ // If this is a function we can constant fold, do it.
+ if (Constant *C = ConstantFoldCall(&CB, Callee, Formals, TLI)) {
+ InstResult = castCallResultIfNeeded(CB.getType(), C);
+ if (!InstResult)
+ return false;
+ LLVM_DEBUG(dbgs() << "Constant folded function call. Result: "
+ << *InstResult << "\n");
+ } else {
+ LLVM_DEBUG(dbgs() << "Can not constant fold function call.\n");
+ return false;
+ }
+ } else {
+ if (Callee->getFunctionType()->isVarArg()) {
+ LLVM_DEBUG(dbgs()
+ << "Can not constant fold vararg function call.\n");
+ return false;
+ }
+
+ Constant *RetVal = nullptr;
+ // Execute the call, if successful, use the return value.
+ ValueStack.emplace_back();
+ if (!EvaluateFunction(Callee, RetVal, Formals)) {
+ LLVM_DEBUG(dbgs() << "Failed to evaluate function.\n");
+ return false;
+ }
+ ValueStack.pop_back();
+ InstResult = castCallResultIfNeeded(CB.getType(), RetVal);
+ if (RetVal && !InstResult)
+ return false;
+
+ if (InstResult) {
+ LLVM_DEBUG(dbgs() << "Successfully evaluated function. Result: "
+ << *InstResult << "\n\n");
+ } else {
+ LLVM_DEBUG(dbgs()
+ << "Successfully evaluated function. Result: 0\n\n");
+ }
+ }
+ }
+ } else if (CurInst->isTerminator()) {
+ LLVM_DEBUG(dbgs() << "Found a terminator instruction.\n");
+
+ if (BranchInst *BI = dyn_cast<BranchInst>(CurInst)) {
+ if (BI->isUnconditional()) {
+ NextBB = BI->getSuccessor(0);
+ } else {
+ ConstantInt *Cond =
+ dyn_cast<ConstantInt>(getVal(BI->getCondition()));
+ if (!Cond) return false; // Cannot determine.
+
+ NextBB = BI->getSuccessor(!Cond->getZExtValue());
+ }
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurInst)) {
+ ConstantInt *Val =
+ dyn_cast<ConstantInt>(getVal(SI->getCondition()));
+ if (!Val) return false; // Cannot determine.
+ NextBB = SI->findCaseValue(Val)->getCaseSuccessor();
+ } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(CurInst)) {
+ Value *Val = getVal(IBI->getAddress())->stripPointerCasts();
+ if (BlockAddress *BA = dyn_cast<BlockAddress>(Val))
+ NextBB = BA->getBasicBlock();
+ else
+ return false; // Cannot determine.
+ } else if (isa<ReturnInst>(CurInst)) {
+ NextBB = nullptr;
+ } else {
+ // invoke, unwind, resume, unreachable.
+ LLVM_DEBUG(dbgs() << "Can not handle terminator.");
+ return false; // Cannot handle this terminator.
+ }
+
+ // We succeeded at evaluating this block!
+ LLVM_DEBUG(dbgs() << "Successfully evaluated block.\n");
+ return true;
+ } else {
+ SmallVector<Constant *> Ops;
+ for (Value *Op : CurInst->operands())
+ Ops.push_back(getVal(Op));
+ InstResult = ConstantFoldInstOperands(&*CurInst, Ops, DL, TLI);
+ if (!InstResult) {
+ LLVM_DEBUG(dbgs() << "Cannot fold instruction: " << *CurInst << "\n");
+ return false;
+ }
+ LLVM_DEBUG(dbgs() << "Folded instruction " << *CurInst << " to "
+ << *InstResult << "\n");
+ }
+
+ if (!CurInst->use_empty()) {
+ InstResult = ConstantFoldConstant(InstResult, DL, TLI);
+ setVal(&*CurInst, InstResult);
+ }
+
+ // If we just processed an invoke, we finished evaluating the block.
+ if (InvokeInst *II = dyn_cast<InvokeInst>(CurInst)) {
+ NextBB = II->getNormalDest();
+ LLVM_DEBUG(dbgs() << "Found an invoke instruction. Finished Block.\n\n");
+ return true;
+ }
+
+ // Advance program counter.
+ ++CurInst;
+ }
+}
+
+/// Evaluate a call to function F, returning true if successful, false if we
+/// can't evaluate it. ActualArgs contains the formal arguments for the
+/// function.
+bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal,
+ const SmallVectorImpl<Constant*> &ActualArgs) {
+ assert(ActualArgs.size() == F->arg_size() && "wrong number of arguments");
+
+ // Check to see if this function is already executing (recursion). If so,
+ // bail out. TODO: we might want to accept limited recursion.
+ if (is_contained(CallStack, F))
+ return false;
+
+ CallStack.push_back(F);
+
+ // Initialize arguments to the incoming values specified.
+ for (const auto &[ArgNo, Arg] : llvm::enumerate(F->args()))
+ setVal(&Arg, ActualArgs[ArgNo]);
+
+ // ExecutedBlocks - We only handle non-looping, non-recursive code. As such,
+ // we can only evaluate any one basic block at most once. This set keeps
+ // track of what we have executed so we can detect recursive cases etc.
+ SmallPtrSet<BasicBlock*, 32> ExecutedBlocks;
+
+ // CurBB - The current basic block we're evaluating.
+ BasicBlock *CurBB = &F->front();
+
+ BasicBlock::iterator CurInst = CurBB->begin();
+
+ while (true) {
+ BasicBlock *NextBB = nullptr; // Initialized to avoid compiler warnings.
+ LLVM_DEBUG(dbgs() << "Trying to evaluate BB: " << *CurBB << "\n");
+
+ bool StrippedPointerCastsForAliasAnalysis = false;
+
+ if (!EvaluateBlock(CurInst, NextBB, StrippedPointerCastsForAliasAnalysis))
+ return false;
+
+ if (!NextBB) {
+ // Successfully running until there's no next block means that we found
+ // the return. Fill it the return value and pop the call stack.
+ ReturnInst *RI = cast<ReturnInst>(CurBB->getTerminator());
+ if (RI->getNumOperands()) {
+ // The Evaluator can look through pointer casts as long as alias
+ // analysis holds because it's just a simple interpreter and doesn't
+ // skip memory accesses due to invariant group metadata, but we can't
+ // let users of Evaluator use a value that's been gleaned looking
+ // through stripping pointer casts.
+ if (StrippedPointerCastsForAliasAnalysis &&
+ !RI->getReturnValue()->getType()->isVoidTy()) {
+ return false;
+ }
+ RetVal = getVal(RI->getOperand(0));
+ }
+ CallStack.pop_back();
+ return true;
+ }
+
+ // Okay, we succeeded in evaluating this control flow. See if we have
+ // executed the new block before. If so, we have a looping function,
+ // which we cannot evaluate in reasonable time.
+ if (!ExecutedBlocks.insert(NextBB).second)
+ return false; // looped!
+
+ // Okay, we have never been in this block before. Check to see if there
+ // are any PHI nodes. If so, evaluate them with information about where
+ // we came from.
+ PHINode *PN = nullptr;
+ for (CurInst = NextBB->begin();
+ (PN = dyn_cast<PHINode>(CurInst)); ++CurInst)
+ setVal(PN, getVal(PN->getIncomingValueForBlock(CurBB)));
+
+ // Advance to the next block.
+ CurBB = NextBB;
+ }
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/FixIrreducible.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/FixIrreducible.cpp
new file mode 100644
index 0000000000..dda2361673
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/FixIrreducible.cpp
@@ -0,0 +1,359 @@
+//===- FixIrreducible.cpp - Convert irreducible control-flow into loops ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// An irreducible SCC is one which has multiple "header" blocks, i.e., blocks
+// with control-flow edges incident from outside the SCC. This pass converts a
+// irreducible SCC into a natural loop by applying the following transformation:
+//
+// 1. Collect the set of headers H of the SCC.
+// 2. Collect the set of predecessors P of these headers. These may be inside as
+// well as outside the SCC.
+// 3. Create block N and redirect every edge from set P to set H through N.
+//
+// This converts the SCC into a natural loop with N as the header: N is the only
+// block with edges incident from outside the SCC, and all backedges in the SCC
+// are incident on N, i.e., for every backedge, the head now dominates the tail.
+//
+// INPUT CFG: The blocks A and B form an irreducible loop with two headers.
+//
+// Entry
+// / \
+// v v
+// A ----> B
+// ^ /|
+// `----' |
+// v
+// Exit
+//
+// OUTPUT CFG: Edges incident on A and B are now redirected through a
+// new block N, forming a natural loop consisting of N, A and B.
+//
+// Entry
+// |
+// v
+// .---> N <---.
+// / / \ \
+// | / \ |
+// \ v v /
+// `-- A B --'
+// |
+// v
+// Exit
+//
+// The transformation is applied to every maximal SCC that is not already
+// recognized as a loop. The pass operates on all maximal SCCs found in the
+// function body outside of any loop, as well as those found inside each loop,
+// including inside any newly created loops. This ensures that any SCC hidden
+// inside a maximal SCC is also transformed.
+//
+// The actual transformation is handled by function CreateControlFlowHub, which
+// takes a set of incoming blocks (the predecessors) and outgoing blocks (the
+// headers). The function also moves every PHINode in an outgoing block to the
+// hub. Since the hub dominates all the outgoing blocks, each such PHINode
+// continues to dominate its uses. Since every header in an SCC has at least two
+// predecessors, every value used in the header (or later) but defined in a
+// predecessor (or earlier) is represented by a PHINode in a header. Hence the
+// above handling of PHINodes is sufficient and no further processing is
+// required to restore SSA.
+//
+// Limitation: The pass cannot handle switch statements and indirect
+// branches. Both must be lowered to plain branches first.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/FixIrreducible.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/LoopIterator.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+#define DEBUG_TYPE "fix-irreducible"
+
+using namespace llvm;
+
+namespace {
+struct FixIrreducible : public FunctionPass {
+ static char ID;
+ FixIrreducible() : FunctionPass(ID) {
+ initializeFixIrreduciblePass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequiredID(LowerSwitchID);
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreservedID(LowerSwitchID);
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
+ }
+
+ bool runOnFunction(Function &F) override;
+};
+} // namespace
+
+char FixIrreducible::ID = 0;
+
+FunctionPass *llvm::createFixIrreduciblePass() { return new FixIrreducible(); }
+
+INITIALIZE_PASS_BEGIN(FixIrreducible, "fix-irreducible",
+ "Convert irreducible control-flow into natural loops",
+ false /* Only looks at CFG */, false /* Analysis Pass */)
+INITIALIZE_PASS_DEPENDENCY(LowerSwitchLegacyPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_END(FixIrreducible, "fix-irreducible",
+ "Convert irreducible control-flow into natural loops",
+ false /* Only looks at CFG */, false /* Analysis Pass */)
+
+// When a new loop is created, existing children of the parent loop may now be
+// fully inside the new loop. Reconnect these as children of the new loop.
+static void reconnectChildLoops(LoopInfo &LI, Loop *ParentLoop, Loop *NewLoop,
+ SetVector<BasicBlock *> &Blocks,
+ SetVector<BasicBlock *> &Headers) {
+ auto &CandidateLoops = ParentLoop ? ParentLoop->getSubLoopsVector()
+ : LI.getTopLevelLoopsVector();
+ // The new loop cannot be its own child, and any candidate is a
+ // child iff its header is owned by the new loop. Move all the
+ // children to a new vector.
+ auto FirstChild = std::partition(
+ CandidateLoops.begin(), CandidateLoops.end(), [&](Loop *L) {
+ return L == NewLoop || !Blocks.contains(L->getHeader());
+ });
+ SmallVector<Loop *, 8> ChildLoops(FirstChild, CandidateLoops.end());
+ CandidateLoops.erase(FirstChild, CandidateLoops.end());
+
+ for (Loop *Child : ChildLoops) {
+ LLVM_DEBUG(dbgs() << "child loop: " << Child->getHeader()->getName()
+ << "\n");
+ // TODO: A child loop whose header is also a header in the current
+ // SCC gets destroyed since its backedges are removed. That may
+ // not be necessary if we can retain such backedges.
+ if (Headers.count(Child->getHeader())) {
+ for (auto *BB : Child->blocks()) {
+ if (LI.getLoopFor(BB) != Child)
+ continue;
+ LI.changeLoopFor(BB, NewLoop);
+ LLVM_DEBUG(dbgs() << "moved block from child: " << BB->getName()
+ << "\n");
+ }
+ std::vector<Loop *> GrandChildLoops;
+ std::swap(GrandChildLoops, Child->getSubLoopsVector());
+ for (auto *GrandChildLoop : GrandChildLoops) {
+ GrandChildLoop->setParentLoop(nullptr);
+ NewLoop->addChildLoop(GrandChildLoop);
+ }
+ LI.destroy(Child);
+ LLVM_DEBUG(dbgs() << "subsumed child loop (common header)\n");
+ continue;
+ }
+
+ Child->setParentLoop(nullptr);
+ NewLoop->addChildLoop(Child);
+ LLVM_DEBUG(dbgs() << "added child loop to new loop\n");
+ }
+}
+
+// Given a set of blocks and headers in an irreducible SCC, convert it into a
+// natural loop. Also insert this new loop at its appropriate place in the
+// hierarchy of loops.
+static void createNaturalLoopInternal(LoopInfo &LI, DominatorTree &DT,
+ Loop *ParentLoop,
+ SetVector<BasicBlock *> &Blocks,
+ SetVector<BasicBlock *> &Headers) {
+#ifndef NDEBUG
+ // All headers are part of the SCC
+ for (auto *H : Headers) {
+ assert(Blocks.count(H));
+ }
+#endif
+
+ SetVector<BasicBlock *> Predecessors;
+ for (auto *H : Headers) {
+ for (auto *P : predecessors(H)) {
+ Predecessors.insert(P);
+ }
+ }
+
+ LLVM_DEBUG(
+ dbgs() << "Found predecessors:";
+ for (auto P : Predecessors) {
+ dbgs() << " " << P->getName();
+ }
+ dbgs() << "\n");
+
+ // Redirect all the backedges through a "hub" consisting of a series
+ // of guard blocks that manage the flow of control from the
+ // predecessors to the headers.
+ SmallVector<BasicBlock *, 8> GuardBlocks;
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
+ CreateControlFlowHub(&DTU, GuardBlocks, Predecessors, Headers, "irr");
+#if defined(EXPENSIVE_CHECKS)
+ assert(DT.verify(DominatorTree::VerificationLevel::Full));
+#else
+ assert(DT.verify(DominatorTree::VerificationLevel::Fast));
+#endif
+
+ // Create a new loop from the now-transformed cycle
+ auto NewLoop = LI.AllocateLoop();
+ if (ParentLoop) {
+ ParentLoop->addChildLoop(NewLoop);
+ } else {
+ LI.addTopLevelLoop(NewLoop);
+ }
+
+ // Add the guard blocks to the new loop. The first guard block is
+ // the head of all the backedges, and it is the first to be inserted
+ // in the loop. This ensures that it is recognized as the
+ // header. Since the new loop is already in LoopInfo, the new blocks
+ // are also propagated up the chain of parent loops.
+ for (auto *G : GuardBlocks) {
+ LLVM_DEBUG(dbgs() << "added guard block: " << G->getName() << "\n");
+ NewLoop->addBasicBlockToLoop(G, LI);
+ }
+
+ // Add the SCC blocks to the new loop.
+ for (auto *BB : Blocks) {
+ NewLoop->addBlockEntry(BB);
+ if (LI.getLoopFor(BB) == ParentLoop) {
+ LLVM_DEBUG(dbgs() << "moved block from parent: " << BB->getName()
+ << "\n");
+ LI.changeLoopFor(BB, NewLoop);
+ } else {
+ LLVM_DEBUG(dbgs() << "added block from child: " << BB->getName() << "\n");
+ }
+ }
+ LLVM_DEBUG(dbgs() << "header for new loop: "
+ << NewLoop->getHeader()->getName() << "\n");
+
+ reconnectChildLoops(LI, ParentLoop, NewLoop, Blocks, Headers);
+
+ NewLoop->verifyLoop();
+ if (ParentLoop) {
+ ParentLoop->verifyLoop();
+ }
+#if defined(EXPENSIVE_CHECKS)
+ LI.verify(DT);
+#endif // EXPENSIVE_CHECKS
+}
+
+namespace llvm {
+// Enable the graph traits required for traversing a Loop body.
+template <> struct GraphTraits<Loop> : LoopBodyTraits {};
+} // namespace llvm
+
+// Overloaded wrappers to go with the function template below.
+static BasicBlock *unwrapBlock(BasicBlock *B) { return B; }
+static BasicBlock *unwrapBlock(LoopBodyTraits::NodeRef &N) { return N.second; }
+
+static void createNaturalLoop(LoopInfo &LI, DominatorTree &DT, Function *F,
+ SetVector<BasicBlock *> &Blocks,
+ SetVector<BasicBlock *> &Headers) {
+ createNaturalLoopInternal(LI, DT, nullptr, Blocks, Headers);
+}
+
+static void createNaturalLoop(LoopInfo &LI, DominatorTree &DT, Loop &L,
+ SetVector<BasicBlock *> &Blocks,
+ SetVector<BasicBlock *> &Headers) {
+ createNaturalLoopInternal(LI, DT, &L, Blocks, Headers);
+}
+
+// Convert irreducible SCCs; Graph G may be a Function* or a Loop&.
+template <class Graph>
+static bool makeReducible(LoopInfo &LI, DominatorTree &DT, Graph &&G) {
+ bool Changed = false;
+ for (auto Scc = scc_begin(G); !Scc.isAtEnd(); ++Scc) {
+ if (Scc->size() < 2)
+ continue;
+ SetVector<BasicBlock *> Blocks;
+ LLVM_DEBUG(dbgs() << "Found SCC:");
+ for (auto N : *Scc) {
+ auto BB = unwrapBlock(N);
+ LLVM_DEBUG(dbgs() << " " << BB->getName());
+ Blocks.insert(BB);
+ }
+ LLVM_DEBUG(dbgs() << "\n");
+
+ // Minor optimization: The SCC blocks are usually discovered in an order
+ // that is the opposite of the order in which these blocks appear as branch
+ // targets. This results in a lot of condition inversions in the control
+ // flow out of the new ControlFlowHub, which can be mitigated if the orders
+ // match. So we discover the headers using the reverse of the block order.
+ SetVector<BasicBlock *> Headers;
+ LLVM_DEBUG(dbgs() << "Found headers:");
+ for (auto *BB : reverse(Blocks)) {
+ for (const auto P : predecessors(BB)) {
+ // Skip unreachable predecessors.
+ if (!DT.isReachableFromEntry(P))
+ continue;
+ if (!Blocks.count(P)) {
+ LLVM_DEBUG(dbgs() << " " << BB->getName());
+ Headers.insert(BB);
+ break;
+ }
+ }
+ }
+ LLVM_DEBUG(dbgs() << "\n");
+
+ if (Headers.size() == 1) {
+ assert(LI.isLoopHeader(Headers.front()));
+ LLVM_DEBUG(dbgs() << "Natural loop with a single header: skipped\n");
+ continue;
+ }
+ createNaturalLoop(LI, DT, G, Blocks, Headers);
+ Changed = true;
+ }
+ return Changed;
+}
+
+static bool FixIrreducibleImpl(Function &F, LoopInfo &LI, DominatorTree &DT) {
+ LLVM_DEBUG(dbgs() << "===== Fix irreducible control-flow in function: "
+ << F.getName() << "\n");
+
+ bool Changed = false;
+ SmallVector<Loop *, 8> WorkList;
+
+ LLVM_DEBUG(dbgs() << "visiting top-level\n");
+ Changed |= makeReducible(LI, DT, &F);
+
+ // Any SCCs reduced are now already in the list of top-level loops, so simply
+ // add them all to the worklist.
+ append_range(WorkList, LI);
+
+ while (!WorkList.empty()) {
+ auto L = WorkList.pop_back_val();
+ LLVM_DEBUG(dbgs() << "visiting loop with header "
+ << L->getHeader()->getName() << "\n");
+ Changed |= makeReducible(LI, DT, *L);
+ // Any SCCs reduced are now already in the list of child loops, so simply
+ // add them all to the worklist.
+ WorkList.append(L->begin(), L->end());
+ }
+
+ return Changed;
+}
+
+bool FixIrreducible::runOnFunction(Function &F) {
+ auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ return FixIrreducibleImpl(F, LI, DT);
+}
+
+PreservedAnalyses FixIrreduciblePass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ auto &LI = AM.getResult<LoopAnalysis>(F);
+ auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+ if (!FixIrreducibleImpl(F, LI, DT))
+ return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserve<LoopAnalysis>();
+ PA.preserve<DominatorTreeAnalysis>();
+ return PA;
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/FlattenCFG.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/FlattenCFG.cpp
new file mode 100644
index 0000000000..2fb2ab82e4
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/FlattenCFG.cpp
@@ -0,0 +1,548 @@
+//===- FlatternCFG.cpp - Code to perform CFG flattening -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Reduce conditional branches in CFG.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include <cassert>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "flattencfg"
+
+namespace {
+
+class FlattenCFGOpt {
+ AliasAnalysis *AA;
+
+ /// Use parallel-and or parallel-or to generate conditions for
+ /// conditional branches.
+ bool FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder);
+
+ /// If \param BB is the merge block of an if-region, attempt to merge
+ /// the if-region with an adjacent if-region upstream if two if-regions
+ /// contain identical instructions.
+ bool MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder);
+
+ /// Compare a pair of blocks: \p Block1 and \p Block2, which
+ /// are from two if-regions, where \p Head2 is the entry block of the 2nd
+ /// if-region. \returns true if \p Block1 and \p Block2 contain identical
+ /// instructions, and have no memory reference alias with \p Head2.
+ /// This is used as a legality check for merging if-regions.
+ bool CompareIfRegionBlock(BasicBlock *Block1, BasicBlock *Block2,
+ BasicBlock *Head2);
+
+public:
+ FlattenCFGOpt(AliasAnalysis *AA) : AA(AA) {}
+
+ bool run(BasicBlock *BB);
+};
+
+} // end anonymous namespace
+
+/// If \param [in] BB has more than one predecessor that is a conditional
+/// branch, attempt to use parallel and/or for the branch condition. \returns
+/// true on success.
+///
+/// Before:
+/// ......
+/// %cmp10 = fcmp une float %tmp1, %tmp2
+/// br i1 %cmp10, label %if.then, label %lor.rhs
+///
+/// lor.rhs:
+/// ......
+/// %cmp11 = fcmp une float %tmp3, %tmp4
+/// br i1 %cmp11, label %if.then, label %ifend
+///
+/// if.end: // the merge block
+/// ......
+///
+/// if.then: // has two predecessors, both of them contains conditional branch.
+/// ......
+/// br label %if.end;
+///
+/// After:
+/// ......
+/// %cmp10 = fcmp une float %tmp1, %tmp2
+/// ......
+/// %cmp11 = fcmp une float %tmp3, %tmp4
+/// %cmp12 = or i1 %cmp10, %cmp11 // parallel-or mode.
+/// br i1 %cmp12, label %if.then, label %ifend
+///
+/// if.end:
+/// ......
+///
+/// if.then:
+/// ......
+/// br label %if.end;
+///
+/// Current implementation handles two cases.
+/// Case 1: BB is on the else-path.
+///
+/// BB1
+/// / |
+/// BB2 |
+/// / \ |
+/// BB3 \ | where, BB1, BB2 contain conditional branches.
+/// \ | / BB3 contains unconditional branch.
+/// \ | / BB4 corresponds to BB which is also the merge.
+/// BB => BB4
+///
+///
+/// Corresponding source code:
+///
+/// if (a == b && c == d)
+/// statement; // BB3
+///
+/// Case 2: BB is on the then-path.
+///
+/// BB1
+/// / |
+/// | BB2
+/// \ / | where BB1, BB2 contain conditional branches.
+/// BB => BB3 | BB3 contains unconditiona branch and corresponds
+/// \ / to BB. BB4 is the merge.
+/// BB4
+///
+/// Corresponding source code:
+///
+/// if (a == b || c == d)
+/// statement; // BB3
+///
+/// In both cases, BB is the common successor of conditional branches.
+/// In Case 1, BB (BB4) has an unconditional branch (BB3) as
+/// its predecessor. In Case 2, BB (BB3) only has conditional branches
+/// as its predecessors.
+bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder) {
+ PHINode *PHI = dyn_cast<PHINode>(BB->begin());
+ if (PHI)
+ return false; // For simplicity, avoid cases containing PHI nodes.
+
+ BasicBlock *LastCondBlock = nullptr;
+ BasicBlock *FirstCondBlock = nullptr;
+ BasicBlock *UnCondBlock = nullptr;
+ int Idx = -1;
+
+ // Check predecessors of \param BB.
+ SmallPtrSet<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
+ for (BasicBlock *Pred : Preds) {
+ BranchInst *PBI = dyn_cast<BranchInst>(Pred->getTerminator());
+
+ // All predecessors should terminate with a branch.
+ if (!PBI)
+ return false;
+
+ BasicBlock *PP = Pred->getSinglePredecessor();
+
+ if (PBI->isUnconditional()) {
+ // Case 1: Pred (BB3) is an unconditional block, it should
+ // have a single predecessor (BB2) that is also a predecessor
+ // of \param BB (BB4) and should not have address-taken.
+ // There should exist only one such unconditional
+ // branch among the predecessors.
+ if (UnCondBlock || !PP || !Preds.contains(PP) ||
+ Pred->hasAddressTaken())
+ return false;
+
+ UnCondBlock = Pred;
+ continue;
+ }
+
+ // Only conditional branches are allowed beyond this point.
+ assert(PBI->isConditional());
+
+ // Condition's unique use should be the branch instruction.
+ Value *PC = PBI->getCondition();
+ if (!PC || !PC->hasOneUse())
+ return false;
+
+ if (PP && Preds.count(PP)) {
+ // These are internal condition blocks to be merged from, e.g.,
+ // BB2 in both cases.
+ // Should not be address-taken.
+ if (Pred->hasAddressTaken())
+ return false;
+
+ // Instructions in the internal condition blocks should be safe
+ // to hoist up.
+ for (BasicBlock::iterator BI = Pred->begin(), BE = PBI->getIterator();
+ BI != BE;) {
+ Instruction *CI = &*BI++;
+ if (isa<PHINode>(CI) || !isSafeToSpeculativelyExecute(CI))
+ return false;
+ }
+ } else {
+ // This is the condition block to be merged into, e.g. BB1 in
+ // both cases.
+ if (FirstCondBlock)
+ return false;
+ FirstCondBlock = Pred;
+ }
+
+ // Find whether BB is uniformly on the true (or false) path
+ // for all of its predecessors.
+ BasicBlock *PS1 = PBI->getSuccessor(0);
+ BasicBlock *PS2 = PBI->getSuccessor(1);
+ BasicBlock *PS = (PS1 == BB) ? PS2 : PS1;
+ int CIdx = (PS1 == BB) ? 0 : 1;
+
+ if (Idx == -1)
+ Idx = CIdx;
+ else if (CIdx != Idx)
+ return false;
+
+ // PS is the successor which is not BB. Check successors to identify
+ // the last conditional branch.
+ if (!Preds.contains(PS)) {
+ // Case 2.
+ LastCondBlock = Pred;
+ } else {
+ // Case 1
+ BranchInst *BPS = dyn_cast<BranchInst>(PS->getTerminator());
+ if (BPS && BPS->isUnconditional()) {
+ // Case 1: PS(BB3) should be an unconditional branch.
+ LastCondBlock = Pred;
+ }
+ }
+ }
+
+ if (!FirstCondBlock || !LastCondBlock || (FirstCondBlock == LastCondBlock))
+ return false;
+
+ Instruction *TBB = LastCondBlock->getTerminator();
+ BasicBlock *PS1 = TBB->getSuccessor(0);
+ BasicBlock *PS2 = TBB->getSuccessor(1);
+ BranchInst *PBI1 = dyn_cast<BranchInst>(PS1->getTerminator());
+ BranchInst *PBI2 = dyn_cast<BranchInst>(PS2->getTerminator());
+
+ // If PS1 does not jump into PS2, but PS2 jumps into PS1,
+ // attempt branch inversion.
+ if (!PBI1 || !PBI1->isUnconditional() ||
+ (PS1->getTerminator()->getSuccessor(0) != PS2)) {
+ // Check whether PS2 jumps into PS1.
+ if (!PBI2 || !PBI2->isUnconditional() ||
+ (PS2->getTerminator()->getSuccessor(0) != PS1))
+ return false;
+
+ // Do branch inversion.
+ BasicBlock *CurrBlock = LastCondBlock;
+ bool EverChanged = false;
+ for (; CurrBlock != FirstCondBlock;
+ CurrBlock = CurrBlock->getSinglePredecessor()) {
+ auto *BI = cast<BranchInst>(CurrBlock->getTerminator());
+ auto *CI = dyn_cast<CmpInst>(BI->getCondition());
+ if (!CI)
+ continue;
+
+ CmpInst::Predicate Predicate = CI->getPredicate();
+ // Canonicalize icmp_ne -> icmp_eq, fcmp_one -> fcmp_oeq
+ if ((Predicate == CmpInst::ICMP_NE) || (Predicate == CmpInst::FCMP_ONE)) {
+ CI->setPredicate(ICmpInst::getInversePredicate(Predicate));
+ BI->swapSuccessors();
+ EverChanged = true;
+ }
+ }
+ return EverChanged;
+ }
+
+ // PS1 must have a conditional branch.
+ if (!PBI1 || !PBI1->isUnconditional())
+ return false;
+
+ // PS2 should not contain PHI node.
+ PHI = dyn_cast<PHINode>(PS2->begin());
+ if (PHI)
+ return false;
+
+ // Do the transformation.
+ BasicBlock *CB;
+ BranchInst *PBI = cast<BranchInst>(FirstCondBlock->getTerminator());
+ bool Iteration = true;
+ IRBuilder<>::InsertPointGuard Guard(Builder);
+ Value *PC = PBI->getCondition();
+
+ do {
+ CB = PBI->getSuccessor(1 - Idx);
+ // Delete the conditional branch.
+ FirstCondBlock->back().eraseFromParent();
+ FirstCondBlock->splice(FirstCondBlock->end(), CB);
+ PBI = cast<BranchInst>(FirstCondBlock->getTerminator());
+ Value *CC = PBI->getCondition();
+ // Merge conditions.
+ Builder.SetInsertPoint(PBI);
+ Value *NC;
+ if (Idx == 0)
+ // Case 2, use parallel or.
+ NC = Builder.CreateOr(PC, CC);
+ else
+ // Case 1, use parallel and.
+ NC = Builder.CreateAnd(PC, CC);
+
+ PBI->replaceUsesOfWith(CC, NC);
+ PC = NC;
+ if (CB == LastCondBlock)
+ Iteration = false;
+ // Remove internal conditional branches.
+ CB->dropAllReferences();
+ // make CB unreachable and let downstream to delete the block.
+ new UnreachableInst(CB->getContext(), CB);
+ } while (Iteration);
+
+ LLVM_DEBUG(dbgs() << "Use parallel and/or in:\n" << *FirstCondBlock);
+ return true;
+}
+
+/// Compare blocks from two if-regions, where \param Head2 is the entry of the
+/// 2nd if-region. \param Block1 is a block in the 1st if-region to compare.
+/// \param Block2 is a block in the 2nd if-region to compare. \returns true if
+/// Block1 and Block2 have identical instructions and do not have
+/// memory reference alias with Head2.
+bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Block1, BasicBlock *Block2,
+ BasicBlock *Head2) {
+ Instruction *PTI2 = Head2->getTerminator();
+ Instruction *PBI2 = &Head2->front();
+
+ // Check whether instructions in Block1 and Block2 are identical
+ // and do not alias with instructions in Head2.
+ BasicBlock::iterator iter1 = Block1->begin();
+ BasicBlock::iterator end1 = Block1->getTerminator()->getIterator();
+ BasicBlock::iterator iter2 = Block2->begin();
+ BasicBlock::iterator end2 = Block2->getTerminator()->getIterator();
+
+ while (true) {
+ if (iter1 == end1) {
+ if (iter2 != end2)
+ return false;
+ break;
+ }
+
+ if (!iter1->isIdenticalTo(&*iter2))
+ return false;
+
+ // Illegal to remove instructions with side effects except
+ // non-volatile stores.
+ if (iter1->mayHaveSideEffects()) {
+ Instruction *CurI = &*iter1;
+ StoreInst *SI = dyn_cast<StoreInst>(CurI);
+ if (!SI || SI->isVolatile())
+ return false;
+ }
+
+ // For simplicity and speed, data dependency check can be
+ // avoided if read from memory doesn't exist.
+ if (iter1->mayReadFromMemory())
+ return false;
+
+ if (iter1->mayWriteToMemory()) {
+ for (BasicBlock::iterator BI(PBI2), BE(PTI2); BI != BE; ++BI) {
+ if (BI->mayReadFromMemory() || BI->mayWriteToMemory()) {
+ // Check alias with Head2.
+ if (!AA || !AA->isNoAlias(&*iter1, &*BI))
+ return false;
+ }
+ }
+ }
+ ++iter1;
+ ++iter2;
+ }
+
+ return true;
+}
+
+/// Check whether \param BB is the merge block of a if-region. If yes, check
+/// whether there exists an adjacent if-region upstream, the two if-regions
+/// contain identical instructions and can be legally merged. \returns true if
+/// the two if-regions are merged.
+///
+/// From:
+/// if (a)
+/// statement;
+/// if (b)
+/// statement;
+///
+/// To:
+/// if (a || b)
+/// statement;
+///
+///
+/// And from:
+/// if (a)
+/// ;
+/// else
+/// statement;
+/// if (b)
+/// ;
+/// else
+/// statement;
+///
+/// To:
+/// if (a && b)
+/// ;
+/// else
+/// statement;
+///
+/// We always take the form of the first if-region. This means that if the
+/// statement in the first if-region, is in the "then-path", while in the second
+/// if-region it is in the "else-path", then we convert the second to the first
+/// form, by inverting the condition and the branch successors. The same
+/// approach goes for the opposite case.
+bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder) {
+ BasicBlock *IfTrue2, *IfFalse2;
+ BranchInst *DomBI2 = GetIfCondition(BB, IfTrue2, IfFalse2);
+ if (!DomBI2)
+ return false;
+ Instruction *CInst2 = dyn_cast<Instruction>(DomBI2->getCondition());
+ if (!CInst2)
+ return false;
+
+ BasicBlock *SecondEntryBlock = CInst2->getParent();
+ if (SecondEntryBlock->hasAddressTaken())
+ return false;
+
+ BasicBlock *IfTrue1, *IfFalse1;
+ BranchInst *DomBI1 = GetIfCondition(SecondEntryBlock, IfTrue1, IfFalse1);
+ if (!DomBI1)
+ return false;
+ Instruction *CInst1 = dyn_cast<Instruction>(DomBI1->getCondition());
+ if (!CInst1)
+ return false;
+
+ BasicBlock *FirstEntryBlock = CInst1->getParent();
+ // Don't die trying to process degenerate/unreachable code.
+ if (FirstEntryBlock == SecondEntryBlock)
+ return false;
+
+ // Either then-path or else-path should be empty.
+ bool InvertCond2 = false;
+ BinaryOperator::BinaryOps CombineOp;
+ if (IfFalse1 == FirstEntryBlock) {
+ // The else-path is empty, so we must use "or" operation to combine the
+ // conditions.
+ CombineOp = BinaryOperator::Or;
+ if (IfFalse2 != SecondEntryBlock) {
+ if (IfTrue2 != SecondEntryBlock)
+ return false;
+
+ InvertCond2 = true;
+ std::swap(IfTrue2, IfFalse2);
+ }
+
+ if (!CompareIfRegionBlock(IfTrue1, IfTrue2, SecondEntryBlock))
+ return false;
+ } else if (IfTrue1 == FirstEntryBlock) {
+ // The then-path is empty, so we must use "and" operation to combine the
+ // conditions.
+ CombineOp = BinaryOperator::And;
+ if (IfTrue2 != SecondEntryBlock) {
+ if (IfFalse2 != SecondEntryBlock)
+ return false;
+
+ InvertCond2 = true;
+ std::swap(IfTrue2, IfFalse2);
+ }
+
+ if (!CompareIfRegionBlock(IfFalse1, IfFalse2, SecondEntryBlock))
+ return false;
+ } else
+ return false;
+
+ Instruction *PTI2 = SecondEntryBlock->getTerminator();
+ Instruction *PBI2 = &SecondEntryBlock->front();
+
+ // Check whether \param SecondEntryBlock has side-effect and is safe to
+ // speculate.
+ for (BasicBlock::iterator BI(PBI2), BE(PTI2); BI != BE; ++BI) {
+ Instruction *CI = &*BI;
+ if (isa<PHINode>(CI) || CI->mayHaveSideEffects() ||
+ !isSafeToSpeculativelyExecute(CI))
+ return false;
+ }
+
+ // Merge \param SecondEntryBlock into \param FirstEntryBlock.
+ FirstEntryBlock->back().eraseFromParent();
+ FirstEntryBlock->splice(FirstEntryBlock->end(), SecondEntryBlock);
+ BranchInst *PBI = cast<BranchInst>(FirstEntryBlock->getTerminator());
+ assert(PBI->getCondition() == CInst2);
+ BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
+ BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+ Builder.SetInsertPoint(PBI);
+ if (InvertCond2) {
+ // If this is a "cmp" instruction, only used for branching (and nowhere
+ // else), then we can simply invert the predicate.
+ auto Cmp2 = dyn_cast<CmpInst>(CInst2);
+ if (Cmp2 && Cmp2->hasOneUse())
+ Cmp2->setPredicate(Cmp2->getInversePredicate());
+ else
+ CInst2 = cast<Instruction>(Builder.CreateNot(CInst2));
+ PBI->swapSuccessors();
+ }
+ Value *NC = Builder.CreateBinOp(CombineOp, CInst1, CInst2);
+ PBI->replaceUsesOfWith(CInst2, NC);
+ Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt);
+
+ // Handle PHI node to replace its predecessors to FirstEntryBlock.
+ for (BasicBlock *Succ : successors(PBI)) {
+ for (PHINode &Phi : Succ->phis()) {
+ for (unsigned i = 0, e = Phi.getNumIncomingValues(); i != e; ++i) {
+ if (Phi.getIncomingBlock(i) == SecondEntryBlock)
+ Phi.setIncomingBlock(i, FirstEntryBlock);
+ }
+ }
+ }
+
+ // Remove IfTrue1
+ if (IfTrue1 != FirstEntryBlock) {
+ IfTrue1->dropAllReferences();
+ IfTrue1->eraseFromParent();
+ }
+
+ // Remove IfFalse1
+ if (IfFalse1 != FirstEntryBlock) {
+ IfFalse1->dropAllReferences();
+ IfFalse1->eraseFromParent();
+ }
+
+ // Remove \param SecondEntryBlock
+ SecondEntryBlock->dropAllReferences();
+ SecondEntryBlock->eraseFromParent();
+ LLVM_DEBUG(dbgs() << "If conditions merged into:\n" << *FirstEntryBlock);
+ return true;
+}
+
+bool FlattenCFGOpt::run(BasicBlock *BB) {
+ assert(BB && BB->getParent() && "Block not embedded in function!");
+ assert(BB->getTerminator() && "Degenerate basic block encountered!");
+
+ IRBuilder<> Builder(BB);
+
+ if (FlattenParallelAndOr(BB, Builder) || MergeIfRegion(BB, Builder))
+ return true;
+ return false;
+}
+
+/// FlattenCFG - This function is used to flatten a CFG. For
+/// example, it uses parallel-and and parallel-or mode to collapse
+/// if-conditions and merge if-regions with identical statements.
+bool llvm::FlattenCFG(BasicBlock *BB, AAResults *AA) {
+ return FlattenCFGOpt(AA).run(BB);
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/FunctionComparator.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/FunctionComparator.cpp
new file mode 100644
index 0000000000..3fa61ec68c
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/FunctionComparator.cpp
@@ -0,0 +1,991 @@
+//===- FunctionComparator.h - Function Comparator -------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the FunctionComparator and GlobalNumberState classes
+// which are used by the MergeFunctions pass for comparing functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/FunctionComparator.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <utility>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "functioncomparator"
+
+int FunctionComparator::cmpNumbers(uint64_t L, uint64_t R) const {
+ if (L < R)
+ return -1;
+ if (L > R)
+ return 1;
+ return 0;
+}
+
+int FunctionComparator::cmpAligns(Align L, Align R) const {
+ if (L.value() < R.value())
+ return -1;
+ if (L.value() > R.value())
+ return 1;
+ return 0;
+}
+
+int FunctionComparator::cmpOrderings(AtomicOrdering L, AtomicOrdering R) const {
+ if ((int)L < (int)R)
+ return -1;
+ if ((int)L > (int)R)
+ return 1;
+ return 0;
+}
+
+int FunctionComparator::cmpAPInts(const APInt &L, const APInt &R) const {
+ if (int Res = cmpNumbers(L.getBitWidth(), R.getBitWidth()))
+ return Res;
+ if (L.ugt(R))
+ return 1;
+ if (R.ugt(L))
+ return -1;
+ return 0;
+}
+
+int FunctionComparator::cmpAPFloats(const APFloat &L, const APFloat &R) const {
+ // Floats are ordered first by semantics (i.e. float, double, half, etc.),
+ // then by value interpreted as a bitstring (aka APInt).
+ const fltSemantics &SL = L.getSemantics(), &SR = R.getSemantics();
+ if (int Res = cmpNumbers(APFloat::semanticsPrecision(SL),
+ APFloat::semanticsPrecision(SR)))
+ return Res;
+ if (int Res = cmpNumbers(APFloat::semanticsMaxExponent(SL),
+ APFloat::semanticsMaxExponent(SR)))
+ return Res;
+ if (int Res = cmpNumbers(APFloat::semanticsMinExponent(SL),
+ APFloat::semanticsMinExponent(SR)))
+ return Res;
+ if (int Res = cmpNumbers(APFloat::semanticsSizeInBits(SL),
+ APFloat::semanticsSizeInBits(SR)))
+ return Res;
+ return cmpAPInts(L.bitcastToAPInt(), R.bitcastToAPInt());
+}
+
+int FunctionComparator::cmpMem(StringRef L, StringRef R) const {
+ // Prevent heavy comparison, compare sizes first.
+ if (int Res = cmpNumbers(L.size(), R.size()))
+ return Res;
+
+ // Compare strings lexicographically only when it is necessary: only when
+ // strings are equal in size.
+ return std::clamp(L.compare(R), -1, 1);
+}
+
+int FunctionComparator::cmpAttrs(const AttributeList L,
+ const AttributeList R) const {
+ if (int Res = cmpNumbers(L.getNumAttrSets(), R.getNumAttrSets()))
+ return Res;
+
+ for (unsigned i : L.indexes()) {
+ AttributeSet LAS = L.getAttributes(i);
+ AttributeSet RAS = R.getAttributes(i);
+ AttributeSet::iterator LI = LAS.begin(), LE = LAS.end();
+ AttributeSet::iterator RI = RAS.begin(), RE = RAS.end();
+ for (; LI != LE && RI != RE; ++LI, ++RI) {
+ Attribute LA = *LI;
+ Attribute RA = *RI;
+ if (LA.isTypeAttribute() && RA.isTypeAttribute()) {
+ if (LA.getKindAsEnum() != RA.getKindAsEnum())
+ return cmpNumbers(LA.getKindAsEnum(), RA.getKindAsEnum());
+
+ Type *TyL = LA.getValueAsType();
+ Type *TyR = RA.getValueAsType();
+ if (TyL && TyR) {
+ if (int Res = cmpTypes(TyL, TyR))
+ return Res;
+ continue;
+ }
+
+ // Two pointers, at least one null, so the comparison result is
+ // independent of the value of a real pointer.
+ if (int Res = cmpNumbers((uint64_t)TyL, (uint64_t)TyR))
+ return Res;
+ continue;
+ }
+ if (LA < RA)
+ return -1;
+ if (RA < LA)
+ return 1;
+ }
+ if (LI != LE)
+ return 1;
+ if (RI != RE)
+ return -1;
+ }
+ return 0;
+}
+
+int FunctionComparator::cmpRangeMetadata(const MDNode *L,
+ const MDNode *R) const {
+ if (L == R)
+ return 0;
+ if (!L)
+ return -1;
+ if (!R)
+ return 1;
+ // Range metadata is a sequence of numbers. Make sure they are the same
+ // sequence.
+ // TODO: Note that as this is metadata, it is possible to drop and/or merge
+ // this data when considering functions to merge. Thus this comparison would
+ // return 0 (i.e. equivalent), but merging would become more complicated
+ // because the ranges would need to be unioned. It is not likely that
+ // functions differ ONLY in this metadata if they are actually the same
+ // function semantically.
+ if (int Res = cmpNumbers(L->getNumOperands(), R->getNumOperands()))
+ return Res;
+ for (size_t I = 0; I < L->getNumOperands(); ++I) {
+ ConstantInt *LLow = mdconst::extract<ConstantInt>(L->getOperand(I));
+ ConstantInt *RLow = mdconst::extract<ConstantInt>(R->getOperand(I));
+ if (int Res = cmpAPInts(LLow->getValue(), RLow->getValue()))
+ return Res;
+ }
+ return 0;
+}
+
+int FunctionComparator::cmpOperandBundlesSchema(const CallBase &LCS,
+ const CallBase &RCS) const {
+ assert(LCS.getOpcode() == RCS.getOpcode() && "Can't compare otherwise!");
+
+ if (int Res =
+ cmpNumbers(LCS.getNumOperandBundles(), RCS.getNumOperandBundles()))
+ return Res;
+
+ for (unsigned I = 0, E = LCS.getNumOperandBundles(); I != E; ++I) {
+ auto OBL = LCS.getOperandBundleAt(I);
+ auto OBR = RCS.getOperandBundleAt(I);
+
+ if (int Res = OBL.getTagName().compare(OBR.getTagName()))
+ return Res;
+
+ if (int Res = cmpNumbers(OBL.Inputs.size(), OBR.Inputs.size()))
+ return Res;
+ }
+
+ return 0;
+}
+
+/// Constants comparison:
+/// 1. Check whether type of L constant could be losslessly bitcasted to R
+/// type.
+/// 2. Compare constant contents.
+/// For more details see declaration comments.
+int FunctionComparator::cmpConstants(const Constant *L,
+ const Constant *R) const {
+ Type *TyL = L->getType();
+ Type *TyR = R->getType();
+
+ // Check whether types are bitcastable. This part is just re-factored
+ // Type::canLosslesslyBitCastTo method, but instead of returning true/false,
+ // we also pack into result which type is "less" for us.
+ int TypesRes = cmpTypes(TyL, TyR);
+ if (TypesRes != 0) {
+ // Types are different, but check whether we can bitcast them.
+ if (!TyL->isFirstClassType()) {
+ if (TyR->isFirstClassType())
+ return -1;
+ // Neither TyL nor TyR are values of first class type. Return the result
+ // of comparing the types
+ return TypesRes;
+ }
+ if (!TyR->isFirstClassType()) {
+ if (TyL->isFirstClassType())
+ return 1;
+ return TypesRes;
+ }
+
+ // Vector -> Vector conversions are always lossless if the two vector types
+ // have the same size, otherwise not.
+ unsigned TyLWidth = 0;
+ unsigned TyRWidth = 0;
+
+ if (auto *VecTyL = dyn_cast<VectorType>(TyL))
+ TyLWidth = VecTyL->getPrimitiveSizeInBits().getFixedValue();
+ if (auto *VecTyR = dyn_cast<VectorType>(TyR))
+ TyRWidth = VecTyR->getPrimitiveSizeInBits().getFixedValue();
+
+ if (TyLWidth != TyRWidth)
+ return cmpNumbers(TyLWidth, TyRWidth);
+
+ // Zero bit-width means neither TyL nor TyR are vectors.
+ if (!TyLWidth) {
+ PointerType *PTyL = dyn_cast<PointerType>(TyL);
+ PointerType *PTyR = dyn_cast<PointerType>(TyR);
+ if (PTyL && PTyR) {
+ unsigned AddrSpaceL = PTyL->getAddressSpace();
+ unsigned AddrSpaceR = PTyR->getAddressSpace();
+ if (int Res = cmpNumbers(AddrSpaceL, AddrSpaceR))
+ return Res;
+ }
+ if (PTyL)
+ return 1;
+ if (PTyR)
+ return -1;
+
+ // TyL and TyR aren't vectors, nor pointers. We don't know how to
+ // bitcast them.
+ return TypesRes;
+ }
+ }
+
+ // OK, types are bitcastable, now check constant contents.
+
+ if (L->isNullValue() && R->isNullValue())
+ return TypesRes;
+ if (L->isNullValue() && !R->isNullValue())
+ return 1;
+ if (!L->isNullValue() && R->isNullValue())
+ return -1;
+
+ auto GlobalValueL = const_cast<GlobalValue *>(dyn_cast<GlobalValue>(L));
+ auto GlobalValueR = const_cast<GlobalValue *>(dyn_cast<GlobalValue>(R));
+ if (GlobalValueL && GlobalValueR) {
+ return cmpGlobalValues(GlobalValueL, GlobalValueR);
+ }
+
+ if (int Res = cmpNumbers(L->getValueID(), R->getValueID()))
+ return Res;
+
+ if (const auto *SeqL = dyn_cast<ConstantDataSequential>(L)) {
+ const auto *SeqR = cast<ConstantDataSequential>(R);
+ // This handles ConstantDataArray and ConstantDataVector. Note that we
+ // compare the two raw data arrays, which might differ depending on the host
+ // endianness. This isn't a problem though, because the endiness of a module
+ // will affect the order of the constants, but this order is the same
+ // for a given input module and host platform.
+ return cmpMem(SeqL->getRawDataValues(), SeqR->getRawDataValues());
+ }
+
+ switch (L->getValueID()) {
+ case Value::UndefValueVal:
+ case Value::PoisonValueVal:
+ case Value::ConstantTokenNoneVal:
+ return TypesRes;
+ case Value::ConstantIntVal: {
+ const APInt &LInt = cast<ConstantInt>(L)->getValue();
+ const APInt &RInt = cast<ConstantInt>(R)->getValue();
+ return cmpAPInts(LInt, RInt);
+ }
+ case Value::ConstantFPVal: {
+ const APFloat &LAPF = cast<ConstantFP>(L)->getValueAPF();
+ const APFloat &RAPF = cast<ConstantFP>(R)->getValueAPF();
+ return cmpAPFloats(LAPF, RAPF);
+ }
+ case Value::ConstantArrayVal: {
+ const ConstantArray *LA = cast<ConstantArray>(L);
+ const ConstantArray *RA = cast<ConstantArray>(R);
+ uint64_t NumElementsL = cast<ArrayType>(TyL)->getNumElements();
+ uint64_t NumElementsR = cast<ArrayType>(TyR)->getNumElements();
+ if (int Res = cmpNumbers(NumElementsL, NumElementsR))
+ return Res;
+ for (uint64_t i = 0; i < NumElementsL; ++i) {
+ if (int Res = cmpConstants(cast<Constant>(LA->getOperand(i)),
+ cast<Constant>(RA->getOperand(i))))
+ return Res;
+ }
+ return 0;
+ }
+ case Value::ConstantStructVal: {
+ const ConstantStruct *LS = cast<ConstantStruct>(L);
+ const ConstantStruct *RS = cast<ConstantStruct>(R);
+ unsigned NumElementsL = cast<StructType>(TyL)->getNumElements();
+ unsigned NumElementsR = cast<StructType>(TyR)->getNumElements();
+ if (int Res = cmpNumbers(NumElementsL, NumElementsR))
+ return Res;
+ for (unsigned i = 0; i != NumElementsL; ++i) {
+ if (int Res = cmpConstants(cast<Constant>(LS->getOperand(i)),
+ cast<Constant>(RS->getOperand(i))))
+ return Res;
+ }
+ return 0;
+ }
+ case Value::ConstantVectorVal: {
+ const ConstantVector *LV = cast<ConstantVector>(L);
+ const ConstantVector *RV = cast<ConstantVector>(R);
+ unsigned NumElementsL = cast<FixedVectorType>(TyL)->getNumElements();
+ unsigned NumElementsR = cast<FixedVectorType>(TyR)->getNumElements();
+ if (int Res = cmpNumbers(NumElementsL, NumElementsR))
+ return Res;
+ for (uint64_t i = 0; i < NumElementsL; ++i) {
+ if (int Res = cmpConstants(cast<Constant>(LV->getOperand(i)),
+ cast<Constant>(RV->getOperand(i))))
+ return Res;
+ }
+ return 0;
+ }
+ case Value::ConstantExprVal: {
+ const ConstantExpr *LE = cast<ConstantExpr>(L);
+ const ConstantExpr *RE = cast<ConstantExpr>(R);
+ unsigned NumOperandsL = LE->getNumOperands();
+ unsigned NumOperandsR = RE->getNumOperands();
+ if (int Res = cmpNumbers(NumOperandsL, NumOperandsR))
+ return Res;
+ for (unsigned i = 0; i < NumOperandsL; ++i) {
+ if (int Res = cmpConstants(cast<Constant>(LE->getOperand(i)),
+ cast<Constant>(RE->getOperand(i))))
+ return Res;
+ }
+ return 0;
+ }
+ case Value::BlockAddressVal: {
+ const BlockAddress *LBA = cast<BlockAddress>(L);
+ const BlockAddress *RBA = cast<BlockAddress>(R);
+ if (int Res = cmpValues(LBA->getFunction(), RBA->getFunction()))
+ return Res;
+ if (LBA->getFunction() == RBA->getFunction()) {
+ // They are BBs in the same function. Order by which comes first in the
+ // BB order of the function. This order is deterministic.
+ Function *F = LBA->getFunction();
+ BasicBlock *LBB = LBA->getBasicBlock();
+ BasicBlock *RBB = RBA->getBasicBlock();
+ if (LBB == RBB)
+ return 0;
+ for (BasicBlock &BB : *F) {
+ if (&BB == LBB) {
+ assert(&BB != RBB);
+ return -1;
+ }
+ if (&BB == RBB)
+ return 1;
+ }
+ llvm_unreachable("Basic Block Address does not point to a basic block in "
+ "its function.");
+ return -1;
+ } else {
+ // cmpValues said the functions are the same. So because they aren't
+ // literally the same pointer, they must respectively be the left and
+ // right functions.
+ assert(LBA->getFunction() == FnL && RBA->getFunction() == FnR);
+ // cmpValues will tell us if these are equivalent BasicBlocks, in the
+ // context of their respective functions.
+ return cmpValues(LBA->getBasicBlock(), RBA->getBasicBlock());
+ }
+ }
+ case Value::DSOLocalEquivalentVal: {
+ // dso_local_equivalent is functionally equivalent to whatever it points to.
+ // This means the behavior of the IR should be the exact same as if the
+ // function was referenced directly rather than through a
+ // dso_local_equivalent.
+ const auto *LEquiv = cast<DSOLocalEquivalent>(L);
+ const auto *REquiv = cast<DSOLocalEquivalent>(R);
+ return cmpGlobalValues(LEquiv->getGlobalValue(), REquiv->getGlobalValue());
+ }
+ default: // Unknown constant, abort.
+ LLVM_DEBUG(dbgs() << "Looking at valueID " << L->getValueID() << "\n");
+ llvm_unreachable("Constant ValueID not recognized.");
+ return -1;
+ }
+}
+
+int FunctionComparator::cmpGlobalValues(GlobalValue *L, GlobalValue *R) const {
+ uint64_t LNumber = GlobalNumbers->getNumber(L);
+ uint64_t RNumber = GlobalNumbers->getNumber(R);
+ return cmpNumbers(LNumber, RNumber);
+}
+
+/// cmpType - compares two types,
+/// defines total ordering among the types set.
+/// See method declaration comments for more details.
+int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const {
+ PointerType *PTyL = dyn_cast<PointerType>(TyL);
+ PointerType *PTyR = dyn_cast<PointerType>(TyR);
+
+ const DataLayout &DL = FnL->getParent()->getDataLayout();
+ if (PTyL && PTyL->getAddressSpace() == 0)
+ TyL = DL.getIntPtrType(TyL);
+ if (PTyR && PTyR->getAddressSpace() == 0)
+ TyR = DL.getIntPtrType(TyR);
+
+ if (TyL == TyR)
+ return 0;
+
+ if (int Res = cmpNumbers(TyL->getTypeID(), TyR->getTypeID()))
+ return Res;
+
+ switch (TyL->getTypeID()) {
+ default:
+ llvm_unreachable("Unknown type!");
+ case Type::IntegerTyID:
+ return cmpNumbers(cast<IntegerType>(TyL)->getBitWidth(),
+ cast<IntegerType>(TyR)->getBitWidth());
+ // TyL == TyR would have returned true earlier, because types are uniqued.
+ case Type::VoidTyID:
+ case Type::FloatTyID:
+ case Type::DoubleTyID:
+ case Type::X86_FP80TyID:
+ case Type::FP128TyID:
+ case Type::PPC_FP128TyID:
+ case Type::LabelTyID:
+ case Type::MetadataTyID:
+ case Type::TokenTyID:
+ return 0;
+
+ case Type::PointerTyID:
+ assert(PTyL && PTyR && "Both types must be pointers here.");
+ return cmpNumbers(PTyL->getAddressSpace(), PTyR->getAddressSpace());
+
+ case Type::StructTyID: {
+ StructType *STyL = cast<StructType>(TyL);
+ StructType *STyR = cast<StructType>(TyR);
+ if (STyL->getNumElements() != STyR->getNumElements())
+ return cmpNumbers(STyL->getNumElements(), STyR->getNumElements());
+
+ if (STyL->isPacked() != STyR->isPacked())
+ return cmpNumbers(STyL->isPacked(), STyR->isPacked());
+
+ for (unsigned i = 0, e = STyL->getNumElements(); i != e; ++i) {
+ if (int Res = cmpTypes(STyL->getElementType(i), STyR->getElementType(i)))
+ return Res;
+ }
+ return 0;
+ }
+
+ case Type::FunctionTyID: {
+ FunctionType *FTyL = cast<FunctionType>(TyL);
+ FunctionType *FTyR = cast<FunctionType>(TyR);
+ if (FTyL->getNumParams() != FTyR->getNumParams())
+ return cmpNumbers(FTyL->getNumParams(), FTyR->getNumParams());
+
+ if (FTyL->isVarArg() != FTyR->isVarArg())
+ return cmpNumbers(FTyL->isVarArg(), FTyR->isVarArg());
+
+ if (int Res = cmpTypes(FTyL->getReturnType(), FTyR->getReturnType()))
+ return Res;
+
+ for (unsigned i = 0, e = FTyL->getNumParams(); i != e; ++i) {
+ if (int Res = cmpTypes(FTyL->getParamType(i), FTyR->getParamType(i)))
+ return Res;
+ }
+ return 0;
+ }
+
+ case Type::ArrayTyID: {
+ auto *STyL = cast<ArrayType>(TyL);
+ auto *STyR = cast<ArrayType>(TyR);
+ if (STyL->getNumElements() != STyR->getNumElements())
+ return cmpNumbers(STyL->getNumElements(), STyR->getNumElements());
+ return cmpTypes(STyL->getElementType(), STyR->getElementType());
+ }
+ case Type::FixedVectorTyID:
+ case Type::ScalableVectorTyID: {
+ auto *STyL = cast<VectorType>(TyL);
+ auto *STyR = cast<VectorType>(TyR);
+ if (STyL->getElementCount().isScalable() !=
+ STyR->getElementCount().isScalable())
+ return cmpNumbers(STyL->getElementCount().isScalable(),
+ STyR->getElementCount().isScalable());
+ if (STyL->getElementCount() != STyR->getElementCount())
+ return cmpNumbers(STyL->getElementCount().getKnownMinValue(),
+ STyR->getElementCount().getKnownMinValue());
+ return cmpTypes(STyL->getElementType(), STyR->getElementType());
+ }
+ }
+}
+
+// Determine whether the two operations are the same except that pointer-to-A
+// and pointer-to-B are equivalent. This should be kept in sync with
+// Instruction::isSameOperationAs.
+// Read method declaration comments for more details.
+int FunctionComparator::cmpOperations(const Instruction *L,
+ const Instruction *R,
+ bool &needToCmpOperands) const {
+ needToCmpOperands = true;
+ if (int Res = cmpValues(L, R))
+ return Res;
+
+ // Differences from Instruction::isSameOperationAs:
+ // * replace type comparison with calls to cmpTypes.
+ // * we test for I->getRawSubclassOptionalData (nuw/nsw/tail) at the top.
+ // * because of the above, we don't test for the tail bit on calls later on.
+ if (int Res = cmpNumbers(L->getOpcode(), R->getOpcode()))
+ return Res;
+
+ if (const GetElementPtrInst *GEPL = dyn_cast<GetElementPtrInst>(L)) {
+ needToCmpOperands = false;
+ const GetElementPtrInst *GEPR = cast<GetElementPtrInst>(R);
+ if (int Res =
+ cmpValues(GEPL->getPointerOperand(), GEPR->getPointerOperand()))
+ return Res;
+ return cmpGEPs(GEPL, GEPR);
+ }
+
+ if (int Res = cmpNumbers(L->getNumOperands(), R->getNumOperands()))
+ return Res;
+
+ if (int Res = cmpTypes(L->getType(), R->getType()))
+ return Res;
+
+ if (int Res = cmpNumbers(L->getRawSubclassOptionalData(),
+ R->getRawSubclassOptionalData()))
+ return Res;
+
+ // We have two instructions of identical opcode and #operands. Check to see
+ // if all operands are the same type
+ for (unsigned i = 0, e = L->getNumOperands(); i != e; ++i) {
+ if (int Res =
+ cmpTypes(L->getOperand(i)->getType(), R->getOperand(i)->getType()))
+ return Res;
+ }
+
+ // Check special state that is a part of some instructions.
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(L)) {
+ if (int Res = cmpTypes(AI->getAllocatedType(),
+ cast<AllocaInst>(R)->getAllocatedType()))
+ return Res;
+ return cmpAligns(AI->getAlign(), cast<AllocaInst>(R)->getAlign());
+ }
+ if (const LoadInst *LI = dyn_cast<LoadInst>(L)) {
+ if (int Res = cmpNumbers(LI->isVolatile(), cast<LoadInst>(R)->isVolatile()))
+ return Res;
+ if (int Res = cmpAligns(LI->getAlign(), cast<LoadInst>(R)->getAlign()))
+ return Res;
+ if (int Res =
+ cmpOrderings(LI->getOrdering(), cast<LoadInst>(R)->getOrdering()))
+ return Res;
+ if (int Res = cmpNumbers(LI->getSyncScopeID(),
+ cast<LoadInst>(R)->getSyncScopeID()))
+ return Res;
+ return cmpRangeMetadata(
+ LI->getMetadata(LLVMContext::MD_range),
+ cast<LoadInst>(R)->getMetadata(LLVMContext::MD_range));
+ }
+ if (const StoreInst *SI = dyn_cast<StoreInst>(L)) {
+ if (int Res =
+ cmpNumbers(SI->isVolatile(), cast<StoreInst>(R)->isVolatile()))
+ return Res;
+ if (int Res = cmpAligns(SI->getAlign(), cast<StoreInst>(R)->getAlign()))
+ return Res;
+ if (int Res =
+ cmpOrderings(SI->getOrdering(), cast<StoreInst>(R)->getOrdering()))
+ return Res;
+ return cmpNumbers(SI->getSyncScopeID(),
+ cast<StoreInst>(R)->getSyncScopeID());
+ }
+ if (const CmpInst *CI = dyn_cast<CmpInst>(L))
+ return cmpNumbers(CI->getPredicate(), cast<CmpInst>(R)->getPredicate());
+ if (auto *CBL = dyn_cast<CallBase>(L)) {
+ auto *CBR = cast<CallBase>(R);
+ if (int Res = cmpNumbers(CBL->getCallingConv(), CBR->getCallingConv()))
+ return Res;
+ if (int Res = cmpAttrs(CBL->getAttributes(), CBR->getAttributes()))
+ return Res;
+ if (int Res = cmpOperandBundlesSchema(*CBL, *CBR))
+ return Res;
+ if (const CallInst *CI = dyn_cast<CallInst>(L))
+ if (int Res = cmpNumbers(CI->getTailCallKind(),
+ cast<CallInst>(R)->getTailCallKind()))
+ return Res;
+ return cmpRangeMetadata(L->getMetadata(LLVMContext::MD_range),
+ R->getMetadata(LLVMContext::MD_range));
+ }
+ if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(L)) {
+ ArrayRef<unsigned> LIndices = IVI->getIndices();
+ ArrayRef<unsigned> RIndices = cast<InsertValueInst>(R)->getIndices();
+ if (int Res = cmpNumbers(LIndices.size(), RIndices.size()))
+ return Res;
+ for (size_t i = 0, e = LIndices.size(); i != e; ++i) {
+ if (int Res = cmpNumbers(LIndices[i], RIndices[i]))
+ return Res;
+ }
+ return 0;
+ }
+ if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(L)) {
+ ArrayRef<unsigned> LIndices = EVI->getIndices();
+ ArrayRef<unsigned> RIndices = cast<ExtractValueInst>(R)->getIndices();
+ if (int Res = cmpNumbers(LIndices.size(), RIndices.size()))
+ return Res;
+ for (size_t i = 0, e = LIndices.size(); i != e; ++i) {
+ if (int Res = cmpNumbers(LIndices[i], RIndices[i]))
+ return Res;
+ }
+ }
+ if (const FenceInst *FI = dyn_cast<FenceInst>(L)) {
+ if (int Res =
+ cmpOrderings(FI->getOrdering(), cast<FenceInst>(R)->getOrdering()))
+ return Res;
+ return cmpNumbers(FI->getSyncScopeID(),
+ cast<FenceInst>(R)->getSyncScopeID());
+ }
+ if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(L)) {
+ if (int Res = cmpNumbers(CXI->isVolatile(),
+ cast<AtomicCmpXchgInst>(R)->isVolatile()))
+ return Res;
+ if (int Res =
+ cmpNumbers(CXI->isWeak(), cast<AtomicCmpXchgInst>(R)->isWeak()))
+ return Res;
+ if (int Res =
+ cmpOrderings(CXI->getSuccessOrdering(),
+ cast<AtomicCmpXchgInst>(R)->getSuccessOrdering()))
+ return Res;
+ if (int Res =
+ cmpOrderings(CXI->getFailureOrdering(),
+ cast<AtomicCmpXchgInst>(R)->getFailureOrdering()))
+ return Res;
+ return cmpNumbers(CXI->getSyncScopeID(),
+ cast<AtomicCmpXchgInst>(R)->getSyncScopeID());
+ }
+ if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(L)) {
+ if (int Res = cmpNumbers(RMWI->getOperation(),
+ cast<AtomicRMWInst>(R)->getOperation()))
+ return Res;
+ if (int Res = cmpNumbers(RMWI->isVolatile(),
+ cast<AtomicRMWInst>(R)->isVolatile()))
+ return Res;
+ if (int Res = cmpOrderings(RMWI->getOrdering(),
+ cast<AtomicRMWInst>(R)->getOrdering()))
+ return Res;
+ return cmpNumbers(RMWI->getSyncScopeID(),
+ cast<AtomicRMWInst>(R)->getSyncScopeID());
+ }
+ if (const ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(L)) {
+ ArrayRef<int> LMask = SVI->getShuffleMask();
+ ArrayRef<int> RMask = cast<ShuffleVectorInst>(R)->getShuffleMask();
+ if (int Res = cmpNumbers(LMask.size(), RMask.size()))
+ return Res;
+ for (size_t i = 0, e = LMask.size(); i != e; ++i) {
+ if (int Res = cmpNumbers(LMask[i], RMask[i]))
+ return Res;
+ }
+ }
+ if (const PHINode *PNL = dyn_cast<PHINode>(L)) {
+ const PHINode *PNR = cast<PHINode>(R);
+ // Ensure that in addition to the incoming values being identical
+ // (checked by the caller of this function), the incoming blocks
+ // are also identical.
+ for (unsigned i = 0, e = PNL->getNumIncomingValues(); i != e; ++i) {
+ if (int Res =
+ cmpValues(PNL->getIncomingBlock(i), PNR->getIncomingBlock(i)))
+ return Res;
+ }
+ }
+ return 0;
+}
+
+// Determine whether two GEP operations perform the same underlying arithmetic.
+// Read method declaration comments for more details.
+int FunctionComparator::cmpGEPs(const GEPOperator *GEPL,
+ const GEPOperator *GEPR) const {
+ unsigned int ASL = GEPL->getPointerAddressSpace();
+ unsigned int ASR = GEPR->getPointerAddressSpace();
+
+ if (int Res = cmpNumbers(ASL, ASR))
+ return Res;
+
+ // When we have target data, we can reduce the GEP down to the value in bytes
+ // added to the address.
+ const DataLayout &DL = FnL->getParent()->getDataLayout();
+ unsigned BitWidth = DL.getPointerSizeInBits(ASL);
+ APInt OffsetL(BitWidth, 0), OffsetR(BitWidth, 0);
+ if (GEPL->accumulateConstantOffset(DL, OffsetL) &&
+ GEPR->accumulateConstantOffset(DL, OffsetR))
+ return cmpAPInts(OffsetL, OffsetR);
+ if (int Res =
+ cmpTypes(GEPL->getSourceElementType(), GEPR->getSourceElementType()))
+ return Res;
+
+ if (int Res = cmpNumbers(GEPL->getNumOperands(), GEPR->getNumOperands()))
+ return Res;
+
+ for (unsigned i = 0, e = GEPL->getNumOperands(); i != e; ++i) {
+ if (int Res = cmpValues(GEPL->getOperand(i), GEPR->getOperand(i)))
+ return Res;
+ }
+
+ return 0;
+}
+
+int FunctionComparator::cmpInlineAsm(const InlineAsm *L,
+ const InlineAsm *R) const {
+ // InlineAsm's are uniqued. If they are the same pointer, obviously they are
+ // the same, otherwise compare the fields.
+ if (L == R)
+ return 0;
+ if (int Res = cmpTypes(L->getFunctionType(), R->getFunctionType()))
+ return Res;
+ if (int Res = cmpMem(L->getAsmString(), R->getAsmString()))
+ return Res;
+ if (int Res = cmpMem(L->getConstraintString(), R->getConstraintString()))
+ return Res;
+ if (int Res = cmpNumbers(L->hasSideEffects(), R->hasSideEffects()))
+ return Res;
+ if (int Res = cmpNumbers(L->isAlignStack(), R->isAlignStack()))
+ return Res;
+ if (int Res = cmpNumbers(L->getDialect(), R->getDialect()))
+ return Res;
+ assert(L->getFunctionType() != R->getFunctionType());
+ return 0;
+}
+
+/// Compare two values used by the two functions under pair-wise comparison. If
+/// this is the first time the values are seen, they're added to the mapping so
+/// that we will detect mismatches on next use.
+/// See comments in declaration for more details.
+int FunctionComparator::cmpValues(const Value *L, const Value *R) const {
+ // Catch self-reference case.
+ if (L == FnL) {
+ if (R == FnR)
+ return 0;
+ return -1;
+ }
+ if (R == FnR) {
+ if (L == FnL)
+ return 0;
+ return 1;
+ }
+
+ const Constant *ConstL = dyn_cast<Constant>(L);
+ const Constant *ConstR = dyn_cast<Constant>(R);
+ if (ConstL && ConstR) {
+ if (L == R)
+ return 0;
+ return cmpConstants(ConstL, ConstR);
+ }
+
+ if (ConstL)
+ return 1;
+ if (ConstR)
+ return -1;
+
+ const InlineAsm *InlineAsmL = dyn_cast<InlineAsm>(L);
+ const InlineAsm *InlineAsmR = dyn_cast<InlineAsm>(R);
+
+ if (InlineAsmL && InlineAsmR)
+ return cmpInlineAsm(InlineAsmL, InlineAsmR);
+ if (InlineAsmL)
+ return 1;
+ if (InlineAsmR)
+ return -1;
+
+ auto LeftSN = sn_mapL.insert(std::make_pair(L, sn_mapL.size())),
+ RightSN = sn_mapR.insert(std::make_pair(R, sn_mapR.size()));
+
+ return cmpNumbers(LeftSN.first->second, RightSN.first->second);
+}
+
+// Test whether two basic blocks have equivalent behaviour.
+int FunctionComparator::cmpBasicBlocks(const BasicBlock *BBL,
+ const BasicBlock *BBR) const {
+ BasicBlock::const_iterator InstL = BBL->begin(), InstLE = BBL->end();
+ BasicBlock::const_iterator InstR = BBR->begin(), InstRE = BBR->end();
+
+ do {
+ bool needToCmpOperands = true;
+ if (int Res = cmpOperations(&*InstL, &*InstR, needToCmpOperands))
+ return Res;
+ if (needToCmpOperands) {
+ assert(InstL->getNumOperands() == InstR->getNumOperands());
+
+ for (unsigned i = 0, e = InstL->getNumOperands(); i != e; ++i) {
+ Value *OpL = InstL->getOperand(i);
+ Value *OpR = InstR->getOperand(i);
+ if (int Res = cmpValues(OpL, OpR))
+ return Res;
+ // cmpValues should ensure this is true.
+ assert(cmpTypes(OpL->getType(), OpR->getType()) == 0);
+ }
+ }
+
+ ++InstL;
+ ++InstR;
+ } while (InstL != InstLE && InstR != InstRE);
+
+ if (InstL != InstLE && InstR == InstRE)
+ return 1;
+ if (InstL == InstLE && InstR != InstRE)
+ return -1;
+ return 0;
+}
+
+int FunctionComparator::compareSignature() const {
+ if (int Res = cmpAttrs(FnL->getAttributes(), FnR->getAttributes()))
+ return Res;
+
+ if (int Res = cmpNumbers(FnL->hasGC(), FnR->hasGC()))
+ return Res;
+
+ if (FnL->hasGC()) {
+ if (int Res = cmpMem(FnL->getGC(), FnR->getGC()))
+ return Res;
+ }
+
+ if (int Res = cmpNumbers(FnL->hasSection(), FnR->hasSection()))
+ return Res;
+
+ if (FnL->hasSection()) {
+ if (int Res = cmpMem(FnL->getSection(), FnR->getSection()))
+ return Res;
+ }
+
+ if (int Res = cmpNumbers(FnL->isVarArg(), FnR->isVarArg()))
+ return Res;
+
+ // TODO: if it's internal and only used in direct calls, we could handle this
+ // case too.
+ if (int Res = cmpNumbers(FnL->getCallingConv(), FnR->getCallingConv()))
+ return Res;
+
+ if (int Res = cmpTypes(FnL->getFunctionType(), FnR->getFunctionType()))
+ return Res;
+
+ assert(FnL->arg_size() == FnR->arg_size() &&
+ "Identically typed functions have different numbers of args!");
+
+ // Visit the arguments so that they get enumerated in the order they're
+ // passed in.
+ for (Function::const_arg_iterator ArgLI = FnL->arg_begin(),
+ ArgRI = FnR->arg_begin(),
+ ArgLE = FnL->arg_end();
+ ArgLI != ArgLE; ++ArgLI, ++ArgRI) {
+ if (cmpValues(&*ArgLI, &*ArgRI) != 0)
+ llvm_unreachable("Arguments repeat!");
+ }
+ return 0;
+}
+
+// Test whether the two functions have equivalent behaviour.
+int FunctionComparator::compare() {
+ beginCompare();
+
+ if (int Res = compareSignature())
+ return Res;
+
+ // We do a CFG-ordered walk since the actual ordering of the blocks in the
+ // linked list is immaterial. Our walk starts at the entry block for both
+ // functions, then takes each block from each terminator in order. As an
+ // artifact, this also means that unreachable blocks are ignored.
+ SmallVector<const BasicBlock *, 8> FnLBBs, FnRBBs;
+ SmallPtrSet<const BasicBlock *, 32> VisitedBBs; // in terms of F1.
+
+ FnLBBs.push_back(&FnL->getEntryBlock());
+ FnRBBs.push_back(&FnR->getEntryBlock());
+
+ VisitedBBs.insert(FnLBBs[0]);
+ while (!FnLBBs.empty()) {
+ const BasicBlock *BBL = FnLBBs.pop_back_val();
+ const BasicBlock *BBR = FnRBBs.pop_back_val();
+
+ if (int Res = cmpValues(BBL, BBR))
+ return Res;
+
+ if (int Res = cmpBasicBlocks(BBL, BBR))
+ return Res;
+
+ const Instruction *TermL = BBL->getTerminator();
+ const Instruction *TermR = BBR->getTerminator();
+
+ assert(TermL->getNumSuccessors() == TermR->getNumSuccessors());
+ for (unsigned i = 0, e = TermL->getNumSuccessors(); i != e; ++i) {
+ if (!VisitedBBs.insert(TermL->getSuccessor(i)).second)
+ continue;
+
+ FnLBBs.push_back(TermL->getSuccessor(i));
+ FnRBBs.push_back(TermR->getSuccessor(i));
+ }
+ }
+ return 0;
+}
+
+namespace {
+
+// Accumulate the hash of a sequence of 64-bit integers. This is similar to a
+// hash of a sequence of 64bit ints, but the entire input does not need to be
+// available at once. This interface is necessary for functionHash because it
+// needs to accumulate the hash as the structure of the function is traversed
+// without saving these values to an intermediate buffer. This form of hashing
+// is not often needed, as usually the object to hash is just read from a
+// buffer.
+class HashAccumulator64 {
+ uint64_t Hash;
+
+public:
+ // Initialize to random constant, so the state isn't zero.
+ HashAccumulator64() { Hash = 0x6acaa36bef8325c5ULL; }
+
+ void add(uint64_t V) { Hash = hashing::detail::hash_16_bytes(Hash, V); }
+
+ // No finishing is required, because the entire hash value is used.
+ uint64_t getHash() { return Hash; }
+};
+
+} // end anonymous namespace
+
+// A function hash is calculated by considering only the number of arguments and
+// whether a function is varargs, the order of basic blocks (given by the
+// successors of each basic block in depth first order), and the order of
+// opcodes of each instruction within each of these basic blocks. This mirrors
+// the strategy compare() uses to compare functions by walking the BBs in depth
+// first order and comparing each instruction in sequence. Because this hash
+// does not look at the operands, it is insensitive to things such as the
+// target of calls and the constants used in the function, which makes it useful
+// when possibly merging functions which are the same modulo constants and call
+// targets.
+FunctionComparator::FunctionHash FunctionComparator::functionHash(Function &F) {
+ HashAccumulator64 H;
+ H.add(F.isVarArg());
+ H.add(F.arg_size());
+
+ SmallVector<const BasicBlock *, 8> BBs;
+ SmallPtrSet<const BasicBlock *, 16> VisitedBBs;
+
+ // Walk the blocks in the same order as FunctionComparator::cmpBasicBlocks(),
+ // accumulating the hash of the function "structure." (BB and opcode sequence)
+ BBs.push_back(&F.getEntryBlock());
+ VisitedBBs.insert(BBs[0]);
+ while (!BBs.empty()) {
+ const BasicBlock *BB = BBs.pop_back_val();
+ // This random value acts as a block header, as otherwise the partition of
+ // opcodes into BBs wouldn't affect the hash, only the order of the opcodes
+ H.add(45798);
+ for (const auto &Inst : *BB) {
+ H.add(Inst.getOpcode());
+ }
+ const Instruction *Term = BB->getTerminator();
+ for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
+ if (!VisitedBBs.insert(Term->getSuccessor(i)).second)
+ continue;
+ BBs.push_back(Term->getSuccessor(i));
+ }
+ }
+ return H.getHash();
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/FunctionImportUtils.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/FunctionImportUtils.cpp
new file mode 100644
index 0000000000..87be6be018
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/FunctionImportUtils.cpp
@@ -0,0 +1,361 @@
+//===- lib/Transforms/Utils/FunctionImportUtils.cpp - Importing utilities -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the FunctionImportGlobalProcessing class, used
+// to perform the necessary global value handling for function importing.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/FunctionImportUtils.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+/// Uses the "source_filename" instead of a Module hash ID for the suffix of
+/// promoted locals during LTO. NOTE: This requires that the source filename
+/// has a unique name / path to avoid name collisions.
+static cl::opt<bool> UseSourceFilenameForPromotedLocals(
+ "use-source-filename-for-promoted-locals", cl::Hidden,
+ cl::desc("Uses the source file name instead of the Module hash. "
+ "This requires that the source filename has a unique name / "
+ "path to avoid name collisions."));
+
+/// Checks if we should import SGV as a definition, otherwise import as a
+/// declaration.
+bool FunctionImportGlobalProcessing::doImportAsDefinition(
+ const GlobalValue *SGV) {
+ if (!isPerformingImport())
+ return false;
+
+ // Only import the globals requested for importing.
+ if (!GlobalsToImport->count(const_cast<GlobalValue *>(SGV)))
+ return false;
+
+ assert(!isa<GlobalAlias>(SGV) &&
+ "Unexpected global alias in the import list.");
+
+ // Otherwise yes.
+ return true;
+}
+
+bool FunctionImportGlobalProcessing::shouldPromoteLocalToGlobal(
+ const GlobalValue *SGV, ValueInfo VI) {
+ assert(SGV->hasLocalLinkage());
+
+ // Ifuncs and ifunc alias does not have summary.
+ if (isa<GlobalIFunc>(SGV) ||
+ (isa<GlobalAlias>(SGV) &&
+ isa<GlobalIFunc>(cast<GlobalAlias>(SGV)->getAliaseeObject())))
+ return false;
+
+ // Both the imported references and the original local variable must
+ // be promoted.
+ if (!isPerformingImport() && !isModuleExporting())
+ return false;
+
+ if (isPerformingImport()) {
+ assert((!GlobalsToImport->count(const_cast<GlobalValue *>(SGV)) ||
+ !isNonRenamableLocal(*SGV)) &&
+ "Attempting to promote non-renamable local");
+ // We don't know for sure yet if we are importing this value (as either
+ // a reference or a def), since we are simply walking all values in the
+ // module. But by necessity if we end up importing it and it is local,
+ // it must be promoted, so unconditionally promote all values in the
+ // importing module.
+ return true;
+ }
+
+ // When exporting, consult the index. We can have more than one local
+ // with the same GUID, in the case of same-named locals in different but
+ // same-named source files that were compiled in their respective directories
+ // (so the source file name and resulting GUID is the same). Find the one
+ // in this module.
+ auto Summary = ImportIndex.findSummaryInModule(
+ VI, SGV->getParent()->getModuleIdentifier());
+ assert(Summary && "Missing summary for global value when exporting");
+ auto Linkage = Summary->linkage();
+ if (!GlobalValue::isLocalLinkage(Linkage)) {
+ assert(!isNonRenamableLocal(*SGV) &&
+ "Attempting to promote non-renamable local");
+ return true;
+ }
+
+ return false;
+}
+
+#ifndef NDEBUG
+bool FunctionImportGlobalProcessing::isNonRenamableLocal(
+ const GlobalValue &GV) const {
+ if (!GV.hasLocalLinkage())
+ return false;
+ // This needs to stay in sync with the logic in buildModuleSummaryIndex.
+ if (GV.hasSection())
+ return true;
+ if (Used.count(const_cast<GlobalValue *>(&GV)))
+ return true;
+ return false;
+}
+#endif
+
+std::string
+FunctionImportGlobalProcessing::getPromotedName(const GlobalValue *SGV) {
+ assert(SGV->hasLocalLinkage());
+
+ // For locals that must be promoted to global scope, ensure that
+ // the promoted name uniquely identifies the copy in the original module,
+ // using the ID assigned during combined index creation.
+ if (UseSourceFilenameForPromotedLocals &&
+ !SGV->getParent()->getSourceFileName().empty()) {
+ SmallString<256> Suffix(SGV->getParent()->getSourceFileName());
+ std::replace_if(std::begin(Suffix), std::end(Suffix),
+ [&](char ch) { return !isAlnum(ch); }, '_');
+ return ModuleSummaryIndex::getGlobalNameForLocal(
+ SGV->getName(), Suffix);
+ }
+
+ return ModuleSummaryIndex::getGlobalNameForLocal(
+ SGV->getName(),
+ ImportIndex.getModuleHash(SGV->getParent()->getModuleIdentifier()));
+}
+
+GlobalValue::LinkageTypes
+FunctionImportGlobalProcessing::getLinkage(const GlobalValue *SGV,
+ bool DoPromote) {
+ // Any local variable that is referenced by an exported function needs
+ // to be promoted to global scope. Since we don't currently know which
+ // functions reference which local variables/functions, we must treat
+ // all as potentially exported if this module is exporting anything.
+ if (isModuleExporting()) {
+ if (SGV->hasLocalLinkage() && DoPromote)
+ return GlobalValue::ExternalLinkage;
+ return SGV->getLinkage();
+ }
+
+ // Otherwise, if we aren't importing, no linkage change is needed.
+ if (!isPerformingImport())
+ return SGV->getLinkage();
+
+ switch (SGV->getLinkage()) {
+ case GlobalValue::LinkOnceODRLinkage:
+ case GlobalValue::ExternalLinkage:
+ // External and linkonce definitions are converted to available_externally
+ // definitions upon import, so that they are available for inlining
+ // and/or optimization, but are turned into declarations later
+ // during the EliminateAvailableExternally pass.
+ if (doImportAsDefinition(SGV) && !isa<GlobalAlias>(SGV))
+ return GlobalValue::AvailableExternallyLinkage;
+ // An imported external declaration stays external.
+ return SGV->getLinkage();
+
+ case GlobalValue::AvailableExternallyLinkage:
+ // An imported available_externally definition converts
+ // to external if imported as a declaration.
+ if (!doImportAsDefinition(SGV))
+ return GlobalValue::ExternalLinkage;
+ // An imported available_externally declaration stays that way.
+ return SGV->getLinkage();
+
+ case GlobalValue::LinkOnceAnyLinkage:
+ case GlobalValue::WeakAnyLinkage:
+ // Can't import linkonce_any/weak_any definitions correctly, or we might
+ // change the program semantics, since the linker will pick the first
+ // linkonce_any/weak_any definition and importing would change the order
+ // they are seen by the linker. The module linking caller needs to enforce
+ // this.
+ assert(!doImportAsDefinition(SGV));
+ // If imported as a declaration, it becomes external_weak.
+ return SGV->getLinkage();
+
+ case GlobalValue::WeakODRLinkage:
+ // For weak_odr linkage, there is a guarantee that all copies will be
+ // equivalent, so the issue described above for weak_any does not exist,
+ // and the definition can be imported. It can be treated similarly
+ // to an imported externally visible global value.
+ if (doImportAsDefinition(SGV) && !isa<GlobalAlias>(SGV))
+ return GlobalValue::AvailableExternallyLinkage;
+ else
+ return GlobalValue::ExternalLinkage;
+
+ case GlobalValue::AppendingLinkage:
+ // It would be incorrect to import an appending linkage variable,
+ // since it would cause global constructors/destructors to be
+ // executed multiple times. This should have already been handled
+ // by linkIfNeeded, and we will assert in shouldLinkFromSource
+ // if we try to import, so we simply return AppendingLinkage.
+ return GlobalValue::AppendingLinkage;
+
+ case GlobalValue::InternalLinkage:
+ case GlobalValue::PrivateLinkage:
+ // If we are promoting the local to global scope, it is handled
+ // similarly to a normal externally visible global.
+ if (DoPromote) {
+ if (doImportAsDefinition(SGV) && !isa<GlobalAlias>(SGV))
+ return GlobalValue::AvailableExternallyLinkage;
+ else
+ return GlobalValue::ExternalLinkage;
+ }
+ // A non-promoted imported local definition stays local.
+ // The ThinLTO pass will eventually force-import their definitions.
+ return SGV->getLinkage();
+
+ case GlobalValue::ExternalWeakLinkage:
+ // External weak doesn't apply to definitions, must be a declaration.
+ assert(!doImportAsDefinition(SGV));
+ // Linkage stays external_weak.
+ return SGV->getLinkage();
+
+ case GlobalValue::CommonLinkage:
+ // Linkage stays common on definitions.
+ // The ThinLTO pass will eventually force-import their definitions.
+ return SGV->getLinkage();
+ }
+
+ llvm_unreachable("unknown linkage type");
+}
+
+void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) {
+
+ ValueInfo VI;
+ if (GV.hasName()) {
+ VI = ImportIndex.getValueInfo(GV.getGUID());
+ // Set synthetic function entry counts.
+ if (VI && ImportIndex.hasSyntheticEntryCounts()) {
+ if (Function *F = dyn_cast<Function>(&GV)) {
+ if (!F->isDeclaration()) {
+ for (const auto &S : VI.getSummaryList()) {
+ auto *FS = cast<FunctionSummary>(S->getBaseObject());
+ if (FS->modulePath() == M.getModuleIdentifier()) {
+ F->setEntryCount(Function::ProfileCount(FS->entryCount(),
+ Function::PCT_Synthetic));
+ break;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ // We should always have a ValueInfo (i.e. GV in index) for definitions when
+ // we are exporting, and also when importing that value.
+ assert(VI || GV.isDeclaration() ||
+ (isPerformingImport() && !doImportAsDefinition(&GV)));
+
+ // Mark read/write-only variables which can be imported with specific
+ // attribute. We can't internalize them now because IRMover will fail
+ // to link variable definitions to their external declarations during
+ // ThinLTO import. We'll internalize read-only variables later, after
+ // import is finished. See internalizeGVsAfterImport.
+ //
+ // If global value dead stripping is not enabled in summary then
+ // propagateConstants hasn't been run. We can't internalize GV
+ // in such case.
+ if (!GV.isDeclaration() && VI && ImportIndex.withAttributePropagation()) {
+ if (GlobalVariable *V = dyn_cast<GlobalVariable>(&GV)) {
+ // We can have more than one local with the same GUID, in the case of
+ // same-named locals in different but same-named source files that were
+ // compiled in their respective directories (so the source file name
+ // and resulting GUID is the same). Find the one in this module.
+ // Handle the case where there is no summary found in this module. That
+ // can happen in the distributed ThinLTO backend, because the index only
+ // contains summaries from the source modules if they are being imported.
+ // We might have a non-null VI and get here even in that case if the name
+ // matches one in this module (e.g. weak or appending linkage).
+ auto *GVS = dyn_cast_or_null<GlobalVarSummary>(
+ ImportIndex.findSummaryInModule(VI, M.getModuleIdentifier()));
+ if (GVS &&
+ (ImportIndex.isReadOnly(GVS) || ImportIndex.isWriteOnly(GVS))) {
+ V->addAttribute("thinlto-internalize");
+ // Objects referenced by writeonly GV initializer should not be
+ // promoted, because there is no any kind of read access to them
+ // on behalf of this writeonly GV. To avoid promotion we convert
+ // GV initializer to 'zeroinitializer'. This effectively drops
+ // references in IR module (not in combined index), so we can
+ // ignore them when computing import. We do not export references
+ // of writeonly object. See computeImportForReferencedGlobals
+ if (ImportIndex.isWriteOnly(GVS))
+ V->setInitializer(Constant::getNullValue(V->getValueType()));
+ }
+ }
+ }
+
+ if (GV.hasLocalLinkage() && shouldPromoteLocalToGlobal(&GV, VI)) {
+ // Save the original name string before we rename GV below.
+ auto Name = GV.getName().str();
+ GV.setName(getPromotedName(&GV));
+ GV.setLinkage(getLinkage(&GV, /* DoPromote */ true));
+ assert(!GV.hasLocalLinkage());
+ GV.setVisibility(GlobalValue::HiddenVisibility);
+
+ // If we are renaming a COMDAT leader, ensure that we record the COMDAT
+ // for later renaming as well. This is required for COFF.
+ if (const auto *C = GV.getComdat())
+ if (C->getName() == Name)
+ RenamedComdats.try_emplace(C, M.getOrInsertComdat(GV.getName()));
+ } else
+ GV.setLinkage(getLinkage(&GV, /* DoPromote */ false));
+
+ // When ClearDSOLocalOnDeclarations is true, clear dso_local if GV is
+ // converted to a declaration, to disable direct access. Don't do this if GV
+ // is implicitly dso_local due to a non-default visibility.
+ if (ClearDSOLocalOnDeclarations &&
+ (GV.isDeclarationForLinker() ||
+ (isPerformingImport() && !doImportAsDefinition(&GV))) &&
+ !GV.isImplicitDSOLocal()) {
+ GV.setDSOLocal(false);
+ } else if (VI && VI.isDSOLocal(ImportIndex.withDSOLocalPropagation())) {
+ // If all summaries are dso_local, symbol gets resolved to a known local
+ // definition.
+ GV.setDSOLocal(true);
+ if (GV.hasDLLImportStorageClass())
+ GV.setDLLStorageClass(GlobalValue::DefaultStorageClass);
+ }
+
+ // Remove functions imported as available externally defs from comdats,
+ // as this is a declaration for the linker, and will be dropped eventually.
+ // It is illegal for comdats to contain declarations.
+ auto *GO = dyn_cast<GlobalObject>(&GV);
+ if (GO && GO->isDeclarationForLinker() && GO->hasComdat()) {
+ // The IRMover should not have placed any imported declarations in
+ // a comdat, so the only declaration that should be in a comdat
+ // at this point would be a definition imported as available_externally.
+ assert(GO->hasAvailableExternallyLinkage() &&
+ "Expected comdat on definition (possibly available external)");
+ GO->setComdat(nullptr);
+ }
+}
+
+void FunctionImportGlobalProcessing::processGlobalsForThinLTO() {
+ for (GlobalVariable &GV : M.globals())
+ processGlobalForThinLTO(GV);
+ for (Function &SF : M)
+ processGlobalForThinLTO(SF);
+ for (GlobalAlias &GA : M.aliases())
+ processGlobalForThinLTO(GA);
+
+ // Replace any COMDATS that required renaming (because the COMDAT leader was
+ // promoted and renamed).
+ if (!RenamedComdats.empty())
+ for (auto &GO : M.global_objects())
+ if (auto *C = GO.getComdat()) {
+ auto Replacement = RenamedComdats.find(C);
+ if (Replacement != RenamedComdats.end())
+ GO.setComdat(Replacement->second);
+ }
+}
+
+bool FunctionImportGlobalProcessing::run() {
+ processGlobalsForThinLTO();
+ return false;
+}
+
+bool llvm::renameModuleForThinLTO(Module &M, const ModuleSummaryIndex &Index,
+ bool ClearDSOLocalOnDeclarations,
+ SetVector<GlobalValue *> *GlobalsToImport) {
+ FunctionImportGlobalProcessing ThinLTOProcessing(M, Index, GlobalsToImport,
+ ClearDSOLocalOnDeclarations);
+ return ThinLTOProcessing.run();
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/GlobalStatus.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/GlobalStatus.cpp
new file mode 100644
index 0000000000..c5aded3c45
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/GlobalStatus.cpp
@@ -0,0 +1,195 @@
+//===-- GlobalStatus.cpp - Compute status info for globals -----------------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/GlobalStatus.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/Casting.h"
+#include <algorithm>
+#include <cassert>
+
+using namespace llvm;
+
+/// Return the stronger of the two ordering. If the two orderings are acquire
+/// and release, then return AcquireRelease.
+///
+static AtomicOrdering strongerOrdering(AtomicOrdering X, AtomicOrdering Y) {
+ if ((X == AtomicOrdering::Acquire && Y == AtomicOrdering::Release) ||
+ (Y == AtomicOrdering::Acquire && X == AtomicOrdering::Release))
+ return AtomicOrdering::AcquireRelease;
+ return (AtomicOrdering)std::max((unsigned)X, (unsigned)Y);
+}
+
+/// It is safe to destroy a constant iff it is only used by constants itself.
+/// Note that while constants cannot be cyclic, they can be tree-like, so we
+/// should keep a visited set to avoid exponential runtime.
+bool llvm::isSafeToDestroyConstant(const Constant *C) {
+ SmallVector<const Constant *, 8> Worklist;
+ SmallPtrSet<const Constant *, 8> Visited;
+ Worklist.push_back(C);
+ while (!Worklist.empty()) {
+ const Constant *C = Worklist.pop_back_val();
+ if (!Visited.insert(C).second)
+ continue;
+ if (isa<GlobalValue>(C) || isa<ConstantData>(C))
+ return false;
+
+ for (const User *U : C->users()) {
+ if (const Constant *CU = dyn_cast<Constant>(U))
+ Worklist.push_back(CU);
+ else
+ return false;
+ }
+ }
+ return true;
+}
+
+static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
+ SmallPtrSetImpl<const Value *> &VisitedUsers) {
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
+ if (GV->isExternallyInitialized())
+ GS.StoredType = GlobalStatus::StoredOnce;
+
+ for (const Use &U : V->uses()) {
+ const User *UR = U.getUser();
+ if (const Constant *C = dyn_cast<Constant>(UR)) {
+ const ConstantExpr *CE = dyn_cast<ConstantExpr>(C);
+ if (CE && isa<PointerType>(CE->getType())) {
+ // Recursively analyze pointer-typed constant expressions.
+ // FIXME: Do we need to add constexpr selects to VisitedUsers?
+ if (analyzeGlobalAux(CE, GS, VisitedUsers))
+ return true;
+ } else {
+ // Ignore dead constant users.
+ if (!isSafeToDestroyConstant(C))
+ return true;
+ }
+ } else if (const Instruction *I = dyn_cast<Instruction>(UR)) {
+ if (!GS.HasMultipleAccessingFunctions) {
+ const Function *F = I->getParent()->getParent();
+ if (!GS.AccessingFunction)
+ GS.AccessingFunction = F;
+ else if (GS.AccessingFunction != F)
+ GS.HasMultipleAccessingFunctions = true;
+ }
+ if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ GS.IsLoaded = true;
+ // Don't hack on volatile loads.
+ if (LI->isVolatile())
+ return true;
+ GS.Ordering = strongerOrdering(GS.Ordering, LI->getOrdering());
+ } else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ // Don't allow a store OF the address, only stores TO the address.
+ if (SI->getOperand(0) == V)
+ return true;
+
+ // Don't hack on volatile stores.
+ if (SI->isVolatile())
+ return true;
+
+ ++GS.NumStores;
+
+ GS.Ordering = strongerOrdering(GS.Ordering, SI->getOrdering());
+
+ // If this is a direct store to the global (i.e., the global is a scalar
+ // value, not an aggregate), keep more specific information about
+ // stores.
+ if (GS.StoredType != GlobalStatus::Stored) {
+ const Value *Ptr = SI->getPointerOperand()->stripPointerCasts();
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) {
+ Value *StoredVal = SI->getOperand(0);
+
+ if (Constant *C = dyn_cast<Constant>(StoredVal)) {
+ if (C->isThreadDependent()) {
+ // The stored value changes between threads; don't track it.
+ return true;
+ }
+ }
+
+ if (GV->hasInitializer() && StoredVal == GV->getInitializer()) {
+ if (GS.StoredType < GlobalStatus::InitializerStored)
+ GS.StoredType = GlobalStatus::InitializerStored;
+ } else if (isa<LoadInst>(StoredVal) &&
+ cast<LoadInst>(StoredVal)->getOperand(0) == GV) {
+ if (GS.StoredType < GlobalStatus::InitializerStored)
+ GS.StoredType = GlobalStatus::InitializerStored;
+ } else if (GS.StoredType < GlobalStatus::StoredOnce) {
+ GS.StoredType = GlobalStatus::StoredOnce;
+ GS.StoredOnceStore = SI;
+ } else if (GS.StoredType == GlobalStatus::StoredOnce &&
+ GS.getStoredOnceValue() == StoredVal) {
+ // noop.
+ } else {
+ GS.StoredType = GlobalStatus::Stored;
+ }
+ } else {
+ GS.StoredType = GlobalStatus::Stored;
+ }
+ }
+ } else if (isa<BitCastInst>(I) || isa<GetElementPtrInst>(I) ||
+ isa<AddrSpaceCastInst>(I)) {
+ // Skip over bitcasts and GEPs; we don't care about the type or offset
+ // of the pointer.
+ if (analyzeGlobalAux(I, GS, VisitedUsers))
+ return true;
+ } else if (isa<SelectInst>(I) || isa<PHINode>(I)) {
+ // Look through selects and PHIs to find if the pointer is
+ // conditionally accessed. Make sure we only visit an instruction
+ // once; otherwise, we can get infinite recursion or exponential
+ // compile time.
+ if (VisitedUsers.insert(I).second)
+ if (analyzeGlobalAux(I, GS, VisitedUsers))
+ return true;
+ } else if (isa<CmpInst>(I)) {
+ GS.IsCompared = true;
+ } else if (const MemTransferInst *MTI = dyn_cast<MemTransferInst>(I)) {
+ if (MTI->isVolatile())
+ return true;
+ if (MTI->getArgOperand(0) == V)
+ GS.StoredType = GlobalStatus::Stored;
+ if (MTI->getArgOperand(1) == V)
+ GS.IsLoaded = true;
+ } else if (const MemSetInst *MSI = dyn_cast<MemSetInst>(I)) {
+ assert(MSI->getArgOperand(0) == V && "Memset only takes one pointer!");
+ if (MSI->isVolatile())
+ return true;
+ GS.StoredType = GlobalStatus::Stored;
+ } else if (const auto *CB = dyn_cast<CallBase>(I)) {
+ if (!CB->isCallee(&U))
+ return true;
+ GS.IsLoaded = true;
+ } else {
+ return true; // Any other non-load instruction might take address!
+ }
+ } else {
+ // Otherwise must be some other user.
+ return true;
+ }
+ }
+
+ return false;
+}
+
+GlobalStatus::GlobalStatus() = default;
+
+bool GlobalStatus::analyzeGlobal(const Value *V, GlobalStatus &GS) {
+ SmallPtrSet<const Value *, 16> VisitedUsers;
+ return analyzeGlobalAux(V, GS, VisitedUsers);
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/GuardUtils.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/GuardUtils.cpp
new file mode 100644
index 0000000000..7c310f16d4
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/GuardUtils.cpp
@@ -0,0 +1,126 @@
+//===-- GuardUtils.cpp - Utils for work with guards -------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Utils that are used to perform transformations related to guards and their
+// conditions.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/GuardUtils.h"
+#include "llvm/Analysis/GuardUtils.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+static cl::opt<uint32_t> PredicatePassBranchWeight(
+ "guards-predicate-pass-branch-weight", cl::Hidden, cl::init(1 << 20),
+ cl::desc("The probability of a guard failing is assumed to be the "
+ "reciprocal of this value (default = 1 << 20)"));
+
+void llvm::makeGuardControlFlowExplicit(Function *DeoptIntrinsic,
+ CallInst *Guard, bool UseWC) {
+ OperandBundleDef DeoptOB(*Guard->getOperandBundle(LLVMContext::OB_deopt));
+ SmallVector<Value *, 4> Args(drop_begin(Guard->args()));
+
+ auto *CheckBB = Guard->getParent();
+ auto *DeoptBlockTerm =
+ SplitBlockAndInsertIfThen(Guard->getArgOperand(0), Guard, true);
+
+ auto *CheckBI = cast<BranchInst>(CheckBB->getTerminator());
+
+ // SplitBlockAndInsertIfThen inserts control flow that branches to
+ // DeoptBlockTerm if the condition is true. We want the opposite.
+ CheckBI->swapSuccessors();
+
+ CheckBI->getSuccessor(0)->setName("guarded");
+ CheckBI->getSuccessor(1)->setName("deopt");
+
+ if (auto *MD = Guard->getMetadata(LLVMContext::MD_make_implicit))
+ CheckBI->setMetadata(LLVMContext::MD_make_implicit, MD);
+
+ MDBuilder MDB(Guard->getContext());
+ CheckBI->setMetadata(LLVMContext::MD_prof,
+ MDB.createBranchWeights(PredicatePassBranchWeight, 1));
+
+ IRBuilder<> B(DeoptBlockTerm);
+ auto *DeoptCall = B.CreateCall(DeoptIntrinsic, Args, {DeoptOB}, "");
+
+ if (DeoptIntrinsic->getReturnType()->isVoidTy()) {
+ B.CreateRetVoid();
+ } else {
+ DeoptCall->setName("deoptcall");
+ B.CreateRet(DeoptCall);
+ }
+
+ DeoptCall->setCallingConv(Guard->getCallingConv());
+ DeoptBlockTerm->eraseFromParent();
+
+ if (UseWC) {
+ // We want the guard to be expressed as explicit control flow, but still be
+ // widenable. For that, we add Widenable Condition intrinsic call to the
+ // guard's condition.
+ IRBuilder<> B(CheckBI);
+ auto *WC = B.CreateIntrinsic(Intrinsic::experimental_widenable_condition,
+ {}, {}, nullptr, "widenable_cond");
+ CheckBI->setCondition(B.CreateAnd(CheckBI->getCondition(), WC,
+ "exiplicit_guard_cond"));
+ assert(isWidenableBranch(CheckBI) && "Branch must be widenable.");
+ }
+}
+
+
+void llvm::widenWidenableBranch(BranchInst *WidenableBR, Value *NewCond) {
+ assert(isWidenableBranch(WidenableBR) && "precondition");
+
+ // The tempting trivially option is to produce something like this:
+ // br (and oldcond, newcond) where oldcond is assumed to contain a widenable
+ // condition, but that doesn't match the pattern parseWidenableBranch expects
+ // so we have to be more sophisticated.
+
+ Use *C, *WC;
+ BasicBlock *IfTrueBB, *IfFalseBB;
+ parseWidenableBranch(WidenableBR, C, WC, IfTrueBB, IfFalseBB);
+ if (!C) {
+ // br (wc()), ... form
+ IRBuilder<> B(WidenableBR);
+ WidenableBR->setCondition(B.CreateAnd(NewCond, WC->get()));
+ } else {
+ // br (wc & C), ... form
+ IRBuilder<> B(WidenableBR);
+ C->set(B.CreateAnd(NewCond, C->get()));
+ Instruction *WCAnd = cast<Instruction>(WidenableBR->getCondition());
+ // Condition is only guaranteed to dominate branch
+ WCAnd->moveBefore(WidenableBR);
+ }
+ assert(isWidenableBranch(WidenableBR) && "preserve widenabiliy");
+}
+
+void llvm::setWidenableBranchCond(BranchInst *WidenableBR, Value *NewCond) {
+ assert(isWidenableBranch(WidenableBR) && "precondition");
+
+ Use *C, *WC;
+ BasicBlock *IfTrueBB, *IfFalseBB;
+ parseWidenableBranch(WidenableBR, C, WC, IfTrueBB, IfFalseBB);
+ if (!C) {
+ // br (wc()), ... form
+ IRBuilder<> B(WidenableBR);
+ WidenableBR->setCondition(B.CreateAnd(NewCond, WC->get()));
+ } else {
+ // br (wc & C), ... form
+ Instruction *WCAnd = cast<Instruction>(WidenableBR->getCondition());
+ // Condition is only guaranteed to dominate branch
+ WCAnd->moveBefore(WidenableBR);
+ C->set(NewCond);
+ }
+ assert(isWidenableBranch(WidenableBR) && "preserve widenabiliy");
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/HelloWorld.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/HelloWorld.cpp
new file mode 100644
index 0000000000..7019e9e445
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/HelloWorld.cpp
@@ -0,0 +1,17 @@
+//===-- HelloWorld.cpp - Example Transformations --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/HelloWorld.h"
+
+using namespace llvm;
+
+PreservedAnalyses HelloWorldPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ errs() << F.getName() << "\n";
+ return PreservedAnalyses::all();
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/InjectTLIMappings.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/InjectTLIMappings.cpp
new file mode 100644
index 0000000000..55bcb6f3b1
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/InjectTLIMappings.cpp
@@ -0,0 +1,176 @@
+//===- InjectTLIMAppings.cpp - TLI to VFABI attribute injection ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Populates the VFABI attribute with the scalar-to-vector mappings
+// from the TargetLibraryInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/InjectTLIMappings.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/DemandedBits.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "inject-tli-mappings"
+
+STATISTIC(NumCallInjected,
+ "Number of calls in which the mappings have been injected.");
+
+STATISTIC(NumVFDeclAdded,
+ "Number of function declarations that have been added.");
+STATISTIC(NumCompUsedAdded,
+ "Number of `@llvm.compiler.used` operands that have been added.");
+
+/// A helper function that adds the vector function declaration that
+/// vectorizes the CallInst CI with a vectorization factor of VF
+/// lanes. The TLI assumes that all parameters and the return type of
+/// CI (other than void) need to be widened to a VectorType of VF
+/// lanes.
+static void addVariantDeclaration(CallInst &CI, const ElementCount &VF,
+ const StringRef VFName) {
+ Module *M = CI.getModule();
+
+ // Add function declaration.
+ Type *RetTy = ToVectorTy(CI.getType(), VF);
+ SmallVector<Type *, 4> Tys;
+ for (Value *ArgOperand : CI.args())
+ Tys.push_back(ToVectorTy(ArgOperand->getType(), VF));
+ assert(!CI.getFunctionType()->isVarArg() &&
+ "VarArg functions are not supported.");
+ FunctionType *FTy = FunctionType::get(RetTy, Tys, /*isVarArg=*/false);
+ Function *VectorF =
+ Function::Create(FTy, Function::ExternalLinkage, VFName, M);
+ VectorF->copyAttributesFrom(CI.getCalledFunction());
+ ++NumVFDeclAdded;
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Added to the module: `" << VFName
+ << "` of type " << *(VectorF->getType()) << "\n");
+
+ // Make function declaration (without a body) "sticky" in the IR by
+ // listing it in the @llvm.compiler.used intrinsic.
+ assert(!VectorF->size() && "VFABI attribute requires `@llvm.compiler.used` "
+ "only on declarations.");
+ appendToCompilerUsed(*M, {VectorF});
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Adding `" << VFName
+ << "` to `@llvm.compiler.used`.\n");
+ ++NumCompUsedAdded;
+}
+
+static void addMappingsFromTLI(const TargetLibraryInfo &TLI, CallInst &CI) {
+ // This is needed to make sure we don't query the TLI for calls to
+ // bitcast of function pointers, like `%call = call i32 (i32*, ...)
+ // bitcast (i32 (...)* @goo to i32 (i32*, ...)*)(i32* nonnull %i)`,
+ // as such calls make the `isFunctionVectorizable` raise an
+ // exception.
+ if (CI.isNoBuiltin() || !CI.getCalledFunction())
+ return;
+
+ StringRef ScalarName = CI.getCalledFunction()->getName();
+
+ // Nothing to be done if the TLI thinks the function is not
+ // vectorizable.
+ if (!TLI.isFunctionVectorizable(ScalarName))
+ return;
+ SmallVector<std::string, 8> Mappings;
+ VFABI::getVectorVariantNames(CI, Mappings);
+ Module *M = CI.getModule();
+ const SetVector<StringRef> OriginalSetOfMappings(Mappings.begin(),
+ Mappings.end());
+
+ auto AddVariantDecl = [&](const ElementCount &VF) {
+ const std::string TLIName =
+ std::string(TLI.getVectorizedFunction(ScalarName, VF));
+ if (!TLIName.empty()) {
+ std::string MangledName =
+ VFABI::mangleTLIVectorName(TLIName, ScalarName, CI.arg_size(), VF);
+ if (!OriginalSetOfMappings.count(MangledName)) {
+ Mappings.push_back(MangledName);
+ ++NumCallInjected;
+ }
+ Function *VariantF = M->getFunction(TLIName);
+ if (!VariantF)
+ addVariantDeclaration(CI, VF, TLIName);
+ }
+ };
+
+ // All VFs in the TLI are powers of 2.
+ ElementCount WidestFixedVF, WidestScalableVF;
+ TLI.getWidestVF(ScalarName, WidestFixedVF, WidestScalableVF);
+
+ for (ElementCount VF = ElementCount::getFixed(2);
+ ElementCount::isKnownLE(VF, WidestFixedVF); VF *= 2)
+ AddVariantDecl(VF);
+
+ // TODO: Add scalable variants once we're able to test them.
+ assert(WidestScalableVF.isZero() &&
+ "Scalable vector mappings not yet supported");
+
+ VFABI::setVectorVariantNames(&CI, Mappings);
+}
+
+static bool runImpl(const TargetLibraryInfo &TLI, Function &F) {
+ for (auto &I : instructions(F))
+ if (auto CI = dyn_cast<CallInst>(&I))
+ addMappingsFromTLI(TLI, *CI);
+ // Even if the pass adds IR attributes, the analyses are preserved.
+ return false;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// New pass manager implementation.
+////////////////////////////////////////////////////////////////////////////////
+PreservedAnalyses InjectTLIMappings::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ const TargetLibraryInfo &TLI = AM.getResult<TargetLibraryAnalysis>(F);
+ runImpl(TLI, F);
+ // Even if the pass adds IR attributes, the analyses are preserved.
+ return PreservedAnalyses::all();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Legacy PM Implementation.
+////////////////////////////////////////////////////////////////////////////////
+bool InjectTLIMappingsLegacy::runOnFunction(Function &F) {
+ const TargetLibraryInfo &TLI =
+ getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ return runImpl(TLI, F);
+}
+
+void InjectTLIMappingsLegacy::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addPreserved<TargetLibraryInfoWrapperPass>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
+ AU.addPreserved<AAResultsWrapperPass>();
+ AU.addPreserved<LoopAccessLegacyAnalysis>();
+ AU.addPreserved<DemandedBitsWrapperPass>();
+ AU.addPreserved<OptimizationRemarkEmitterWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Legacy Pass manager initialization
+////////////////////////////////////////////////////////////////////////////////
+char InjectTLIMappingsLegacy::ID = 0;
+
+INITIALIZE_PASS_BEGIN(InjectTLIMappingsLegacy, DEBUG_TYPE,
+ "Inject TLI Mappings", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(InjectTLIMappingsLegacy, DEBUG_TYPE, "Inject TLI Mappings",
+ false, false)
+
+FunctionPass *llvm::createInjectTLIMappingsLegacyPass() {
+ return new InjectTLIMappingsLegacy();
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/InlineFunction.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/InlineFunction.cpp
new file mode 100644
index 0000000000..399c9a4379
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/InlineFunction.cpp
@@ -0,0 +1,2915 @@
+//===- InlineFunction.cpp - Code to perform function inlining -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements inlining of a function into a call site, resolving
+// parameters and the return value as appropriate.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/MemoryProfileInfo.h"
+#include "llvm/Analysis/ObjCARCAnalysisUtils.h"
+#include "llvm/Analysis/ObjCARCUtil.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <limits>
+#include <optional>
+#include <string>
+#include <utility>
+#include <vector>
+
+#define DEBUG_TYPE "inline-function"
+
+using namespace llvm;
+using namespace llvm::memprof;
+using ProfileCount = Function::ProfileCount;
+
+static cl::opt<bool>
+EnableNoAliasConversion("enable-noalias-to-md-conversion", cl::init(true),
+ cl::Hidden,
+ cl::desc("Convert noalias attributes to metadata during inlining."));
+
+static cl::opt<bool>
+ UseNoAliasIntrinsic("use-noalias-intrinsic-during-inlining", cl::Hidden,
+ cl::init(true),
+ cl::desc("Use the llvm.experimental.noalias.scope.decl "
+ "intrinsic during inlining."));
+
+// Disabled by default, because the added alignment assumptions may increase
+// compile-time and block optimizations. This option is not suitable for use
+// with frontends that emit comprehensive parameter alignment annotations.
+static cl::opt<bool>
+PreserveAlignmentAssumptions("preserve-alignment-assumptions-during-inlining",
+ cl::init(false), cl::Hidden,
+ cl::desc("Convert align attributes to assumptions during inlining."));
+
+static cl::opt<bool> UpdateReturnAttributes(
+ "update-return-attrs", cl::init(true), cl::Hidden,
+ cl::desc("Update return attributes on calls within inlined body"));
+
+static cl::opt<unsigned> InlinerAttributeWindow(
+ "max-inst-checked-for-throw-during-inlining", cl::Hidden,
+ cl::desc("the maximum number of instructions analyzed for may throw during "
+ "attribute inference in inlined body"),
+ cl::init(4));
+
+namespace {
+
+ /// A class for recording information about inlining a landing pad.
+ class LandingPadInliningInfo {
+ /// Destination of the invoke's unwind.
+ BasicBlock *OuterResumeDest;
+
+ /// Destination for the callee's resume.
+ BasicBlock *InnerResumeDest = nullptr;
+
+ /// LandingPadInst associated with the invoke.
+ LandingPadInst *CallerLPad = nullptr;
+
+ /// PHI for EH values from landingpad insts.
+ PHINode *InnerEHValuesPHI = nullptr;
+
+ SmallVector<Value*, 8> UnwindDestPHIValues;
+
+ public:
+ LandingPadInliningInfo(InvokeInst *II)
+ : OuterResumeDest(II->getUnwindDest()) {
+ // If there are PHI nodes in the unwind destination block, we need to keep
+ // track of which values came into them from the invoke before removing
+ // the edge from this block.
+ BasicBlock *InvokeBB = II->getParent();
+ BasicBlock::iterator I = OuterResumeDest->begin();
+ for (; isa<PHINode>(I); ++I) {
+ // Save the value to use for this edge.
+ PHINode *PHI = cast<PHINode>(I);
+ UnwindDestPHIValues.push_back(PHI->getIncomingValueForBlock(InvokeBB));
+ }
+
+ CallerLPad = cast<LandingPadInst>(I);
+ }
+
+ /// The outer unwind destination is the target of
+ /// unwind edges introduced for calls within the inlined function.
+ BasicBlock *getOuterResumeDest() const {
+ return OuterResumeDest;
+ }
+
+ BasicBlock *getInnerResumeDest();
+
+ LandingPadInst *getLandingPadInst() const { return CallerLPad; }
+
+ /// Forward the 'resume' instruction to the caller's landing pad block.
+ /// When the landing pad block has only one predecessor, this is
+ /// a simple branch. When there is more than one predecessor, we need to
+ /// split the landing pad block after the landingpad instruction and jump
+ /// to there.
+ void forwardResume(ResumeInst *RI,
+ SmallPtrSetImpl<LandingPadInst*> &InlinedLPads);
+
+ /// Add incoming-PHI values to the unwind destination block for the given
+ /// basic block, using the values for the original invoke's source block.
+ void addIncomingPHIValuesFor(BasicBlock *BB) const {
+ addIncomingPHIValuesForInto(BB, OuterResumeDest);
+ }
+
+ void addIncomingPHIValuesForInto(BasicBlock *src, BasicBlock *dest) const {
+ BasicBlock::iterator I = dest->begin();
+ for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) {
+ PHINode *phi = cast<PHINode>(I);
+ phi->addIncoming(UnwindDestPHIValues[i], src);
+ }
+ }
+ };
+
+} // end anonymous namespace
+
+/// Get or create a target for the branch from ResumeInsts.
+BasicBlock *LandingPadInliningInfo::getInnerResumeDest() {
+ if (InnerResumeDest) return InnerResumeDest;
+
+ // Split the landing pad.
+ BasicBlock::iterator SplitPoint = ++CallerLPad->getIterator();
+ InnerResumeDest =
+ OuterResumeDest->splitBasicBlock(SplitPoint,
+ OuterResumeDest->getName() + ".body");
+
+ // The number of incoming edges we expect to the inner landing pad.
+ const unsigned PHICapacity = 2;
+
+ // Create corresponding new PHIs for all the PHIs in the outer landing pad.
+ Instruction *InsertPoint = &InnerResumeDest->front();
+ BasicBlock::iterator I = OuterResumeDest->begin();
+ for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) {
+ PHINode *OuterPHI = cast<PHINode>(I);
+ PHINode *InnerPHI = PHINode::Create(OuterPHI->getType(), PHICapacity,
+ OuterPHI->getName() + ".lpad-body",
+ InsertPoint);
+ OuterPHI->replaceAllUsesWith(InnerPHI);
+ InnerPHI->addIncoming(OuterPHI, OuterResumeDest);
+ }
+
+ // Create a PHI for the exception values.
+ InnerEHValuesPHI = PHINode::Create(CallerLPad->getType(), PHICapacity,
+ "eh.lpad-body", InsertPoint);
+ CallerLPad->replaceAllUsesWith(InnerEHValuesPHI);
+ InnerEHValuesPHI->addIncoming(CallerLPad, OuterResumeDest);
+
+ // All done.
+ return InnerResumeDest;
+}
+
+/// Forward the 'resume' instruction to the caller's landing pad block.
+/// When the landing pad block has only one predecessor, this is a simple
+/// branch. When there is more than one predecessor, we need to split the
+/// landing pad block after the landingpad instruction and jump to there.
+void LandingPadInliningInfo::forwardResume(
+ ResumeInst *RI, SmallPtrSetImpl<LandingPadInst *> &InlinedLPads) {
+ BasicBlock *Dest = getInnerResumeDest();
+ BasicBlock *Src = RI->getParent();
+
+ BranchInst::Create(Dest, Src);
+
+ // Update the PHIs in the destination. They were inserted in an order which
+ // makes this work.
+ addIncomingPHIValuesForInto(Src, Dest);
+
+ InnerEHValuesPHI->addIncoming(RI->getOperand(0), Src);
+ RI->eraseFromParent();
+}
+
+/// Helper for getUnwindDestToken/getUnwindDestTokenHelper.
+static Value *getParentPad(Value *EHPad) {
+ if (auto *FPI = dyn_cast<FuncletPadInst>(EHPad))
+ return FPI->getParentPad();
+ return cast<CatchSwitchInst>(EHPad)->getParentPad();
+}
+
+using UnwindDestMemoTy = DenseMap<Instruction *, Value *>;
+
+/// Helper for getUnwindDestToken that does the descendant-ward part of
+/// the search.
+static Value *getUnwindDestTokenHelper(Instruction *EHPad,
+ UnwindDestMemoTy &MemoMap) {
+ SmallVector<Instruction *, 8> Worklist(1, EHPad);
+
+ while (!Worklist.empty()) {
+ Instruction *CurrentPad = Worklist.pop_back_val();
+ // We only put pads on the worklist that aren't in the MemoMap. When
+ // we find an unwind dest for a pad we may update its ancestors, but
+ // the queue only ever contains uncles/great-uncles/etc. of CurrentPad,
+ // so they should never get updated while queued on the worklist.
+ assert(!MemoMap.count(CurrentPad));
+ Value *UnwindDestToken = nullptr;
+ if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(CurrentPad)) {
+ if (CatchSwitch->hasUnwindDest()) {
+ UnwindDestToken = CatchSwitch->getUnwindDest()->getFirstNonPHI();
+ } else {
+ // Catchswitch doesn't have a 'nounwind' variant, and one might be
+ // annotated as "unwinds to caller" when really it's nounwind (see
+ // e.g. SimplifyCFGOpt::SimplifyUnreachable), so we can't infer the
+ // parent's unwind dest from this. We can check its catchpads'
+ // descendants, since they might include a cleanuppad with an
+ // "unwinds to caller" cleanupret, which can be trusted.
+ for (auto HI = CatchSwitch->handler_begin(),
+ HE = CatchSwitch->handler_end();
+ HI != HE && !UnwindDestToken; ++HI) {
+ BasicBlock *HandlerBlock = *HI;
+ auto *CatchPad = cast<CatchPadInst>(HandlerBlock->getFirstNonPHI());
+ for (User *Child : CatchPad->users()) {
+ // Intentionally ignore invokes here -- since the catchswitch is
+ // marked "unwind to caller", it would be a verifier error if it
+ // contained an invoke which unwinds out of it, so any invoke we'd
+ // encounter must unwind to some child of the catch.
+ if (!isa<CleanupPadInst>(Child) && !isa<CatchSwitchInst>(Child))
+ continue;
+
+ Instruction *ChildPad = cast<Instruction>(Child);
+ auto Memo = MemoMap.find(ChildPad);
+ if (Memo == MemoMap.end()) {
+ // Haven't figured out this child pad yet; queue it.
+ Worklist.push_back(ChildPad);
+ continue;
+ }
+ // We've already checked this child, but might have found that
+ // it offers no proof either way.
+ Value *ChildUnwindDestToken = Memo->second;
+ if (!ChildUnwindDestToken)
+ continue;
+ // We already know the child's unwind dest, which can either
+ // be ConstantTokenNone to indicate unwind to caller, or can
+ // be another child of the catchpad. Only the former indicates
+ // the unwind dest of the catchswitch.
+ if (isa<ConstantTokenNone>(ChildUnwindDestToken)) {
+ UnwindDestToken = ChildUnwindDestToken;
+ break;
+ }
+ assert(getParentPad(ChildUnwindDestToken) == CatchPad);
+ }
+ }
+ }
+ } else {
+ auto *CleanupPad = cast<CleanupPadInst>(CurrentPad);
+ for (User *U : CleanupPad->users()) {
+ if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(U)) {
+ if (BasicBlock *RetUnwindDest = CleanupRet->getUnwindDest())
+ UnwindDestToken = RetUnwindDest->getFirstNonPHI();
+ else
+ UnwindDestToken = ConstantTokenNone::get(CleanupPad->getContext());
+ break;
+ }
+ Value *ChildUnwindDestToken;
+ if (auto *Invoke = dyn_cast<InvokeInst>(U)) {
+ ChildUnwindDestToken = Invoke->getUnwindDest()->getFirstNonPHI();
+ } else if (isa<CleanupPadInst>(U) || isa<CatchSwitchInst>(U)) {
+ Instruction *ChildPad = cast<Instruction>(U);
+ auto Memo = MemoMap.find(ChildPad);
+ if (Memo == MemoMap.end()) {
+ // Haven't resolved this child yet; queue it and keep searching.
+ Worklist.push_back(ChildPad);
+ continue;
+ }
+ // We've checked this child, but still need to ignore it if it
+ // had no proof either way.
+ ChildUnwindDestToken = Memo->second;
+ if (!ChildUnwindDestToken)
+ continue;
+ } else {
+ // Not a relevant user of the cleanuppad
+ continue;
+ }
+ // In a well-formed program, the child/invoke must either unwind to
+ // an(other) child of the cleanup, or exit the cleanup. In the
+ // first case, continue searching.
+ if (isa<Instruction>(ChildUnwindDestToken) &&
+ getParentPad(ChildUnwindDestToken) == CleanupPad)
+ continue;
+ UnwindDestToken = ChildUnwindDestToken;
+ break;
+ }
+ }
+ // If we haven't found an unwind dest for CurrentPad, we may have queued its
+ // children, so move on to the next in the worklist.
+ if (!UnwindDestToken)
+ continue;
+
+ // Now we know that CurrentPad unwinds to UnwindDestToken. It also exits
+ // any ancestors of CurrentPad up to but not including UnwindDestToken's
+ // parent pad. Record this in the memo map, and check to see if the
+ // original EHPad being queried is one of the ones exited.
+ Value *UnwindParent;
+ if (auto *UnwindPad = dyn_cast<Instruction>(UnwindDestToken))
+ UnwindParent = getParentPad(UnwindPad);
+ else
+ UnwindParent = nullptr;
+ bool ExitedOriginalPad = false;
+ for (Instruction *ExitedPad = CurrentPad;
+ ExitedPad && ExitedPad != UnwindParent;
+ ExitedPad = dyn_cast<Instruction>(getParentPad(ExitedPad))) {
+ // Skip over catchpads since they just follow their catchswitches.
+ if (isa<CatchPadInst>(ExitedPad))
+ continue;
+ MemoMap[ExitedPad] = UnwindDestToken;
+ ExitedOriginalPad |= (ExitedPad == EHPad);
+ }
+
+ if (ExitedOriginalPad)
+ return UnwindDestToken;
+
+ // Continue the search.
+ }
+
+ // No definitive information is contained within this funclet.
+ return nullptr;
+}
+
+/// Given an EH pad, find where it unwinds. If it unwinds to an EH pad,
+/// return that pad instruction. If it unwinds to caller, return
+/// ConstantTokenNone. If it does not have a definitive unwind destination,
+/// return nullptr.
+///
+/// This routine gets invoked for calls in funclets in inlinees when inlining
+/// an invoke. Since many funclets don't have calls inside them, it's queried
+/// on-demand rather than building a map of pads to unwind dests up front.
+/// Determining a funclet's unwind dest may require recursively searching its
+/// descendants, and also ancestors and cousins if the descendants don't provide
+/// an answer. Since most funclets will have their unwind dest immediately
+/// available as the unwind dest of a catchswitch or cleanupret, this routine
+/// searches top-down from the given pad and then up. To avoid worst-case
+/// quadratic run-time given that approach, it uses a memo map to avoid
+/// re-processing funclet trees. The callers that rewrite the IR as they go
+/// take advantage of this, for correctness, by checking/forcing rewritten
+/// pads' entries to match the original callee view.
+static Value *getUnwindDestToken(Instruction *EHPad,
+ UnwindDestMemoTy &MemoMap) {
+ // Catchpads unwind to the same place as their catchswitch;
+ // redirct any queries on catchpads so the code below can
+ // deal with just catchswitches and cleanuppads.
+ if (auto *CPI = dyn_cast<CatchPadInst>(EHPad))
+ EHPad = CPI->getCatchSwitch();
+
+ // Check if we've already determined the unwind dest for this pad.
+ auto Memo = MemoMap.find(EHPad);
+ if (Memo != MemoMap.end())
+ return Memo->second;
+
+ // Search EHPad and, if necessary, its descendants.
+ Value *UnwindDestToken = getUnwindDestTokenHelper(EHPad, MemoMap);
+ assert((UnwindDestToken == nullptr) != (MemoMap.count(EHPad) != 0));
+ if (UnwindDestToken)
+ return UnwindDestToken;
+
+ // No information is available for this EHPad from itself or any of its
+ // descendants. An unwind all the way out to a pad in the caller would
+ // need also to agree with the unwind dest of the parent funclet, so
+ // search up the chain to try to find a funclet with information. Put
+ // null entries in the memo map to avoid re-processing as we go up.
+ MemoMap[EHPad] = nullptr;
+#ifndef NDEBUG
+ SmallPtrSet<Instruction *, 4> TempMemos;
+ TempMemos.insert(EHPad);
+#endif
+ Instruction *LastUselessPad = EHPad;
+ Value *AncestorToken;
+ for (AncestorToken = getParentPad(EHPad);
+ auto *AncestorPad = dyn_cast<Instruction>(AncestorToken);
+ AncestorToken = getParentPad(AncestorToken)) {
+ // Skip over catchpads since they just follow their catchswitches.
+ if (isa<CatchPadInst>(AncestorPad))
+ continue;
+ // If the MemoMap had an entry mapping AncestorPad to nullptr, since we
+ // haven't yet called getUnwindDestTokenHelper for AncestorPad in this
+ // call to getUnwindDestToken, that would mean that AncestorPad had no
+ // information in itself, its descendants, or its ancestors. If that
+ // were the case, then we should also have recorded the lack of information
+ // for the descendant that we're coming from. So assert that we don't
+ // find a null entry in the MemoMap for AncestorPad.
+ assert(!MemoMap.count(AncestorPad) || MemoMap[AncestorPad]);
+ auto AncestorMemo = MemoMap.find(AncestorPad);
+ if (AncestorMemo == MemoMap.end()) {
+ UnwindDestToken = getUnwindDestTokenHelper(AncestorPad, MemoMap);
+ } else {
+ UnwindDestToken = AncestorMemo->second;
+ }
+ if (UnwindDestToken)
+ break;
+ LastUselessPad = AncestorPad;
+ MemoMap[LastUselessPad] = nullptr;
+#ifndef NDEBUG
+ TempMemos.insert(LastUselessPad);
+#endif
+ }
+
+ // We know that getUnwindDestTokenHelper was called on LastUselessPad and
+ // returned nullptr (and likewise for EHPad and any of its ancestors up to
+ // LastUselessPad), so LastUselessPad has no information from below. Since
+ // getUnwindDestTokenHelper must investigate all downward paths through
+ // no-information nodes to prove that a node has no information like this,
+ // and since any time it finds information it records it in the MemoMap for
+ // not just the immediately-containing funclet but also any ancestors also
+ // exited, it must be the case that, walking downward from LastUselessPad,
+ // visiting just those nodes which have not been mapped to an unwind dest
+ // by getUnwindDestTokenHelper (the nullptr TempMemos notwithstanding, since
+ // they are just used to keep getUnwindDestTokenHelper from repeating work),
+ // any node visited must have been exhaustively searched with no information
+ // for it found.
+ SmallVector<Instruction *, 8> Worklist(1, LastUselessPad);
+ while (!Worklist.empty()) {
+ Instruction *UselessPad = Worklist.pop_back_val();
+ auto Memo = MemoMap.find(UselessPad);
+ if (Memo != MemoMap.end() && Memo->second) {
+ // Here the name 'UselessPad' is a bit of a misnomer, because we've found
+ // that it is a funclet that does have information about unwinding to
+ // a particular destination; its parent was a useless pad.
+ // Since its parent has no information, the unwind edge must not escape
+ // the parent, and must target a sibling of this pad. This local unwind
+ // gives us no information about EHPad. Leave it and the subtree rooted
+ // at it alone.
+ assert(getParentPad(Memo->second) == getParentPad(UselessPad));
+ continue;
+ }
+ // We know we don't have information for UselesPad. If it has an entry in
+ // the MemoMap (mapping it to nullptr), it must be one of the TempMemos
+ // added on this invocation of getUnwindDestToken; if a previous invocation
+ // recorded nullptr, it would have had to prove that the ancestors of
+ // UselessPad, which include LastUselessPad, had no information, and that
+ // in turn would have required proving that the descendants of
+ // LastUselesPad, which include EHPad, have no information about
+ // LastUselessPad, which would imply that EHPad was mapped to nullptr in
+ // the MemoMap on that invocation, which isn't the case if we got here.
+ assert(!MemoMap.count(UselessPad) || TempMemos.count(UselessPad));
+ // Assert as we enumerate users that 'UselessPad' doesn't have any unwind
+ // information that we'd be contradicting by making a map entry for it
+ // (which is something that getUnwindDestTokenHelper must have proved for
+ // us to get here). Just assert on is direct users here; the checks in
+ // this downward walk at its descendants will verify that they don't have
+ // any unwind edges that exit 'UselessPad' either (i.e. they either have no
+ // unwind edges or unwind to a sibling).
+ MemoMap[UselessPad] = UnwindDestToken;
+ if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(UselessPad)) {
+ assert(CatchSwitch->getUnwindDest() == nullptr && "Expected useless pad");
+ for (BasicBlock *HandlerBlock : CatchSwitch->handlers()) {
+ auto *CatchPad = HandlerBlock->getFirstNonPHI();
+ for (User *U : CatchPad->users()) {
+ assert(
+ (!isa<InvokeInst>(U) ||
+ (getParentPad(
+ cast<InvokeInst>(U)->getUnwindDest()->getFirstNonPHI()) ==
+ CatchPad)) &&
+ "Expected useless pad");
+ if (isa<CatchSwitchInst>(U) || isa<CleanupPadInst>(U))
+ Worklist.push_back(cast<Instruction>(U));
+ }
+ }
+ } else {
+ assert(isa<CleanupPadInst>(UselessPad));
+ for (User *U : UselessPad->users()) {
+ assert(!isa<CleanupReturnInst>(U) && "Expected useless pad");
+ assert((!isa<InvokeInst>(U) ||
+ (getParentPad(
+ cast<InvokeInst>(U)->getUnwindDest()->getFirstNonPHI()) ==
+ UselessPad)) &&
+ "Expected useless pad");
+ if (isa<CatchSwitchInst>(U) || isa<CleanupPadInst>(U))
+ Worklist.push_back(cast<Instruction>(U));
+ }
+ }
+ }
+
+ return UnwindDestToken;
+}
+
+/// When we inline a basic block into an invoke,
+/// we have to turn all of the calls that can throw into invokes.
+/// This function analyze BB to see if there are any calls, and if so,
+/// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI
+/// nodes in that block with the values specified in InvokeDestPHIValues.
+static BasicBlock *HandleCallsInBlockInlinedThroughInvoke(
+ BasicBlock *BB, BasicBlock *UnwindEdge,
+ UnwindDestMemoTy *FuncletUnwindMap = nullptr) {
+ for (Instruction &I : llvm::make_early_inc_range(*BB)) {
+ // We only need to check for function calls: inlined invoke
+ // instructions require no special handling.
+ CallInst *CI = dyn_cast<CallInst>(&I);
+
+ if (!CI || CI->doesNotThrow())
+ continue;
+
+ // We do not need to (and in fact, cannot) convert possibly throwing calls
+ // to @llvm.experimental_deoptimize (resp. @llvm.experimental.guard) into
+ // invokes. The caller's "segment" of the deoptimization continuation
+ // attached to the newly inlined @llvm.experimental_deoptimize
+ // (resp. @llvm.experimental.guard) call should contain the exception
+ // handling logic, if any.
+ if (auto *F = CI->getCalledFunction())
+ if (F->getIntrinsicID() == Intrinsic::experimental_deoptimize ||
+ F->getIntrinsicID() == Intrinsic::experimental_guard)
+ continue;
+
+ if (auto FuncletBundle = CI->getOperandBundle(LLVMContext::OB_funclet)) {
+ // This call is nested inside a funclet. If that funclet has an unwind
+ // destination within the inlinee, then unwinding out of this call would
+ // be UB. Rewriting this call to an invoke which targets the inlined
+ // invoke's unwind dest would give the call's parent funclet multiple
+ // unwind destinations, which is something that subsequent EH table
+ // generation can't handle and that the veirifer rejects. So when we
+ // see such a call, leave it as a call.
+ auto *FuncletPad = cast<Instruction>(FuncletBundle->Inputs[0]);
+ Value *UnwindDestToken =
+ getUnwindDestToken(FuncletPad, *FuncletUnwindMap);
+ if (UnwindDestToken && !isa<ConstantTokenNone>(UnwindDestToken))
+ continue;
+#ifndef NDEBUG
+ Instruction *MemoKey;
+ if (auto *CatchPad = dyn_cast<CatchPadInst>(FuncletPad))
+ MemoKey = CatchPad->getCatchSwitch();
+ else
+ MemoKey = FuncletPad;
+ assert(FuncletUnwindMap->count(MemoKey) &&
+ (*FuncletUnwindMap)[MemoKey] == UnwindDestToken &&
+ "must get memoized to avoid confusing later searches");
+#endif // NDEBUG
+ }
+
+ changeToInvokeAndSplitBasicBlock(CI, UnwindEdge);
+ return BB;
+ }
+ return nullptr;
+}
+
+/// If we inlined an invoke site, we need to convert calls
+/// in the body of the inlined function into invokes.
+///
+/// II is the invoke instruction being inlined. FirstNewBlock is the first
+/// block of the inlined code (the last block is the end of the function),
+/// and InlineCodeInfo is information about the code that got inlined.
+static void HandleInlinedLandingPad(InvokeInst *II, BasicBlock *FirstNewBlock,
+ ClonedCodeInfo &InlinedCodeInfo) {
+ BasicBlock *InvokeDest = II->getUnwindDest();
+
+ Function *Caller = FirstNewBlock->getParent();
+
+ // The inlined code is currently at the end of the function, scan from the
+ // start of the inlined code to its end, checking for stuff we need to
+ // rewrite.
+ LandingPadInliningInfo Invoke(II);
+
+ // Get all of the inlined landing pad instructions.
+ SmallPtrSet<LandingPadInst*, 16> InlinedLPads;
+ for (Function::iterator I = FirstNewBlock->getIterator(), E = Caller->end();
+ I != E; ++I)
+ if (InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator()))
+ InlinedLPads.insert(II->getLandingPadInst());
+
+ // Append the clauses from the outer landing pad instruction into the inlined
+ // landing pad instructions.
+ LandingPadInst *OuterLPad = Invoke.getLandingPadInst();
+ for (LandingPadInst *InlinedLPad : InlinedLPads) {
+ unsigned OuterNum = OuterLPad->getNumClauses();
+ InlinedLPad->reserveClauses(OuterNum);
+ for (unsigned OuterIdx = 0; OuterIdx != OuterNum; ++OuterIdx)
+ InlinedLPad->addClause(OuterLPad->getClause(OuterIdx));
+ if (OuterLPad->isCleanup())
+ InlinedLPad->setCleanup(true);
+ }
+
+ for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end();
+ BB != E; ++BB) {
+ if (InlinedCodeInfo.ContainsCalls)
+ if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke(
+ &*BB, Invoke.getOuterResumeDest()))
+ // Update any PHI nodes in the exceptional block to indicate that there
+ // is now a new entry in them.
+ Invoke.addIncomingPHIValuesFor(NewBB);
+
+ // Forward any resumes that are remaining here.
+ if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator()))
+ Invoke.forwardResume(RI, InlinedLPads);
+ }
+
+ // Now that everything is happy, we have one final detail. The PHI nodes in
+ // the exception destination block still have entries due to the original
+ // invoke instruction. Eliminate these entries (which might even delete the
+ // PHI node) now.
+ InvokeDest->removePredecessor(II->getParent());
+}
+
+/// If we inlined an invoke site, we need to convert calls
+/// in the body of the inlined function into invokes.
+///
+/// II is the invoke instruction being inlined. FirstNewBlock is the first
+/// block of the inlined code (the last block is the end of the function),
+/// and InlineCodeInfo is information about the code that got inlined.
+static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock,
+ ClonedCodeInfo &InlinedCodeInfo) {
+ BasicBlock *UnwindDest = II->getUnwindDest();
+ Function *Caller = FirstNewBlock->getParent();
+
+ assert(UnwindDest->getFirstNonPHI()->isEHPad() && "unexpected BasicBlock!");
+
+ // If there are PHI nodes in the unwind destination block, we need to keep
+ // track of which values came into them from the invoke before removing the
+ // edge from this block.
+ SmallVector<Value *, 8> UnwindDestPHIValues;
+ BasicBlock *InvokeBB = II->getParent();
+ for (PHINode &PHI : UnwindDest->phis()) {
+ // Save the value to use for this edge.
+ UnwindDestPHIValues.push_back(PHI.getIncomingValueForBlock(InvokeBB));
+ }
+
+ // Add incoming-PHI values to the unwind destination block for the given basic
+ // block, using the values for the original invoke's source block.
+ auto UpdatePHINodes = [&](BasicBlock *Src) {
+ BasicBlock::iterator I = UnwindDest->begin();
+ for (Value *V : UnwindDestPHIValues) {
+ PHINode *PHI = cast<PHINode>(I);
+ PHI->addIncoming(V, Src);
+ ++I;
+ }
+ };
+
+ // This connects all the instructions which 'unwind to caller' to the invoke
+ // destination.
+ UnwindDestMemoTy FuncletUnwindMap;
+ for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end();
+ BB != E; ++BB) {
+ if (auto *CRI = dyn_cast<CleanupReturnInst>(BB->getTerminator())) {
+ if (CRI->unwindsToCaller()) {
+ auto *CleanupPad = CRI->getCleanupPad();
+ CleanupReturnInst::Create(CleanupPad, UnwindDest, CRI);
+ CRI->eraseFromParent();
+ UpdatePHINodes(&*BB);
+ // Finding a cleanupret with an unwind destination would confuse
+ // subsequent calls to getUnwindDestToken, so map the cleanuppad
+ // to short-circuit any such calls and recognize this as an "unwind
+ // to caller" cleanup.
+ assert(!FuncletUnwindMap.count(CleanupPad) ||
+ isa<ConstantTokenNone>(FuncletUnwindMap[CleanupPad]));
+ FuncletUnwindMap[CleanupPad] =
+ ConstantTokenNone::get(Caller->getContext());
+ }
+ }
+
+ Instruction *I = BB->getFirstNonPHI();
+ if (!I->isEHPad())
+ continue;
+
+ Instruction *Replacement = nullptr;
+ if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(I)) {
+ if (CatchSwitch->unwindsToCaller()) {
+ Value *UnwindDestToken;
+ if (auto *ParentPad =
+ dyn_cast<Instruction>(CatchSwitch->getParentPad())) {
+ // This catchswitch is nested inside another funclet. If that
+ // funclet has an unwind destination within the inlinee, then
+ // unwinding out of this catchswitch would be UB. Rewriting this
+ // catchswitch to unwind to the inlined invoke's unwind dest would
+ // give the parent funclet multiple unwind destinations, which is
+ // something that subsequent EH table generation can't handle and
+ // that the veirifer rejects. So when we see such a call, leave it
+ // as "unwind to caller".
+ UnwindDestToken = getUnwindDestToken(ParentPad, FuncletUnwindMap);
+ if (UnwindDestToken && !isa<ConstantTokenNone>(UnwindDestToken))
+ continue;
+ } else {
+ // This catchswitch has no parent to inherit constraints from, and
+ // none of its descendants can have an unwind edge that exits it and
+ // targets another funclet in the inlinee. It may or may not have a
+ // descendant that definitively has an unwind to caller. In either
+ // case, we'll have to assume that any unwinds out of it may need to
+ // be routed to the caller, so treat it as though it has a definitive
+ // unwind to caller.
+ UnwindDestToken = ConstantTokenNone::get(Caller->getContext());
+ }
+ auto *NewCatchSwitch = CatchSwitchInst::Create(
+ CatchSwitch->getParentPad(), UnwindDest,
+ CatchSwitch->getNumHandlers(), CatchSwitch->getName(),
+ CatchSwitch);
+ for (BasicBlock *PadBB : CatchSwitch->handlers())
+ NewCatchSwitch->addHandler(PadBB);
+ // Propagate info for the old catchswitch over to the new one in
+ // the unwind map. This also serves to short-circuit any subsequent
+ // checks for the unwind dest of this catchswitch, which would get
+ // confused if they found the outer handler in the callee.
+ FuncletUnwindMap[NewCatchSwitch] = UnwindDestToken;
+ Replacement = NewCatchSwitch;
+ }
+ } else if (!isa<FuncletPadInst>(I)) {
+ llvm_unreachable("unexpected EHPad!");
+ }
+
+ if (Replacement) {
+ Replacement->takeName(I);
+ I->replaceAllUsesWith(Replacement);
+ I->eraseFromParent();
+ UpdatePHINodes(&*BB);
+ }
+ }
+
+ if (InlinedCodeInfo.ContainsCalls)
+ for (Function::iterator BB = FirstNewBlock->getIterator(),
+ E = Caller->end();
+ BB != E; ++BB)
+ if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke(
+ &*BB, UnwindDest, &FuncletUnwindMap))
+ // Update any PHI nodes in the exceptional block to indicate that there
+ // is now a new entry in them.
+ UpdatePHINodes(NewBB);
+
+ // Now that everything is happy, we have one final detail. The PHI nodes in
+ // the exception destination block still have entries due to the original
+ // invoke instruction. Eliminate these entries (which might even delete the
+ // PHI node) now.
+ UnwindDest->removePredecessor(InvokeBB);
+}
+
+static bool haveCommonPrefix(MDNode *MIBStackContext,
+ MDNode *CallsiteStackContext) {
+ assert(MIBStackContext->getNumOperands() > 0 &&
+ CallsiteStackContext->getNumOperands() > 0);
+ // Because of the context trimming performed during matching, the callsite
+ // context could have more stack ids than the MIB. We match up to the end of
+ // the shortest stack context.
+ for (auto MIBStackIter = MIBStackContext->op_begin(),
+ CallsiteStackIter = CallsiteStackContext->op_begin();
+ MIBStackIter != MIBStackContext->op_end() &&
+ CallsiteStackIter != CallsiteStackContext->op_end();
+ MIBStackIter++, CallsiteStackIter++) {
+ auto *Val1 = mdconst::dyn_extract<ConstantInt>(*MIBStackIter);
+ auto *Val2 = mdconst::dyn_extract<ConstantInt>(*CallsiteStackIter);
+ assert(Val1 && Val2);
+ if (Val1->getZExtValue() != Val2->getZExtValue())
+ return false;
+ }
+ return true;
+}
+
+static void removeMemProfMetadata(CallBase *Call) {
+ Call->setMetadata(LLVMContext::MD_memprof, nullptr);
+}
+
+static void removeCallsiteMetadata(CallBase *Call) {
+ Call->setMetadata(LLVMContext::MD_callsite, nullptr);
+}
+
+static void updateMemprofMetadata(CallBase *CI,
+ const std::vector<Metadata *> &MIBList) {
+ assert(!MIBList.empty());
+ // Remove existing memprof, which will either be replaced or may not be needed
+ // if we are able to use a single allocation type function attribute.
+ removeMemProfMetadata(CI);
+ CallStackTrie CallStack;
+ for (Metadata *MIB : MIBList)
+ CallStack.addCallStack(cast<MDNode>(MIB));
+ bool MemprofMDAttached = CallStack.buildAndAttachMIBMetadata(CI);
+ assert(MemprofMDAttached == CI->hasMetadata(LLVMContext::MD_memprof));
+ if (!MemprofMDAttached)
+ // If we used a function attribute remove the callsite metadata as well.
+ removeCallsiteMetadata(CI);
+}
+
+// Update the metadata on the inlined copy ClonedCall of a call OrigCall in the
+// inlined callee body, based on the callsite metadata InlinedCallsiteMD from
+// the call that was inlined.
+static void propagateMemProfHelper(const CallBase *OrigCall,
+ CallBase *ClonedCall,
+ MDNode *InlinedCallsiteMD) {
+ MDNode *OrigCallsiteMD = ClonedCall->getMetadata(LLVMContext::MD_callsite);
+ MDNode *ClonedCallsiteMD = nullptr;
+ // Check if the call originally had callsite metadata, and update it for the
+ // new call in the inlined body.
+ if (OrigCallsiteMD) {
+ // The cloned call's context is now the concatenation of the original call's
+ // callsite metadata and the callsite metadata on the call where it was
+ // inlined.
+ ClonedCallsiteMD = MDNode::concatenate(OrigCallsiteMD, InlinedCallsiteMD);
+ ClonedCall->setMetadata(LLVMContext::MD_callsite, ClonedCallsiteMD);
+ }
+
+ // Update any memprof metadata on the cloned call.
+ MDNode *OrigMemProfMD = ClonedCall->getMetadata(LLVMContext::MD_memprof);
+ if (!OrigMemProfMD)
+ return;
+ // We currently expect that allocations with memprof metadata also have
+ // callsite metadata for the allocation's part of the context.
+ assert(OrigCallsiteMD);
+
+ // New call's MIB list.
+ std::vector<Metadata *> NewMIBList;
+
+ // For each MIB metadata, check if its call stack context starts with the
+ // new clone's callsite metadata. If so, that MIB goes onto the cloned call in
+ // the inlined body. If not, it stays on the out-of-line original call.
+ for (auto &MIBOp : OrigMemProfMD->operands()) {
+ MDNode *MIB = dyn_cast<MDNode>(MIBOp);
+ // Stack is first operand of MIB.
+ MDNode *StackMD = getMIBStackNode(MIB);
+ assert(StackMD);
+ // See if the new cloned callsite context matches this profiled context.
+ if (haveCommonPrefix(StackMD, ClonedCallsiteMD))
+ // Add it to the cloned call's MIB list.
+ NewMIBList.push_back(MIB);
+ }
+ if (NewMIBList.empty()) {
+ removeMemProfMetadata(ClonedCall);
+ removeCallsiteMetadata(ClonedCall);
+ return;
+ }
+ if (NewMIBList.size() < OrigMemProfMD->getNumOperands())
+ updateMemprofMetadata(ClonedCall, NewMIBList);
+}
+
+// Update memprof related metadata (!memprof and !callsite) based on the
+// inlining of Callee into the callsite at CB. The updates include merging the
+// inlined callee's callsite metadata with that of the inlined call,
+// and moving the subset of any memprof contexts to the inlined callee
+// allocations if they match the new inlined call stack.
+// FIXME: Replace memprof metadata with function attribute if all MIB end up
+// having the same behavior. Do other context trimming/merging optimizations
+// too.
+static void
+propagateMemProfMetadata(Function *Callee, CallBase &CB,
+ bool ContainsMemProfMetadata,
+ const ValueMap<const Value *, WeakTrackingVH> &VMap) {
+ MDNode *CallsiteMD = CB.getMetadata(LLVMContext::MD_callsite);
+ // Only need to update if the inlined callsite had callsite metadata, or if
+ // there was any memprof metadata inlined.
+ if (!CallsiteMD && !ContainsMemProfMetadata)
+ return;
+
+ // Propagate metadata onto the cloned calls in the inlined callee.
+ for (const auto &Entry : VMap) {
+ // See if this is a call that has been inlined and remapped, and not
+ // simplified away in the process.
+ auto *OrigCall = dyn_cast_or_null<CallBase>(Entry.first);
+ auto *ClonedCall = dyn_cast_or_null<CallBase>(Entry.second);
+ if (!OrigCall || !ClonedCall)
+ continue;
+ // If the inlined callsite did not have any callsite metadata, then it isn't
+ // involved in any profiled call contexts, and we can remove any memprof
+ // metadata on the cloned call.
+ if (!CallsiteMD) {
+ removeMemProfMetadata(ClonedCall);
+ removeCallsiteMetadata(ClonedCall);
+ continue;
+ }
+ propagateMemProfHelper(OrigCall, ClonedCall, CallsiteMD);
+ }
+}
+
+/// When inlining a call site that has !llvm.mem.parallel_loop_access,
+/// !llvm.access.group, !alias.scope or !noalias metadata, that metadata should
+/// be propagated to all memory-accessing cloned instructions.
+static void PropagateCallSiteMetadata(CallBase &CB, Function::iterator FStart,
+ Function::iterator FEnd) {
+ MDNode *MemParallelLoopAccess =
+ CB.getMetadata(LLVMContext::MD_mem_parallel_loop_access);
+ MDNode *AccessGroup = CB.getMetadata(LLVMContext::MD_access_group);
+ MDNode *AliasScope = CB.getMetadata(LLVMContext::MD_alias_scope);
+ MDNode *NoAlias = CB.getMetadata(LLVMContext::MD_noalias);
+ if (!MemParallelLoopAccess && !AccessGroup && !AliasScope && !NoAlias)
+ return;
+
+ for (BasicBlock &BB : make_range(FStart, FEnd)) {
+ for (Instruction &I : BB) {
+ // This metadata is only relevant for instructions that access memory.
+ if (!I.mayReadOrWriteMemory())
+ continue;
+
+ if (MemParallelLoopAccess) {
+ // TODO: This probably should not overwrite MemParalleLoopAccess.
+ MemParallelLoopAccess = MDNode::concatenate(
+ I.getMetadata(LLVMContext::MD_mem_parallel_loop_access),
+ MemParallelLoopAccess);
+ I.setMetadata(LLVMContext::MD_mem_parallel_loop_access,
+ MemParallelLoopAccess);
+ }
+
+ if (AccessGroup)
+ I.setMetadata(LLVMContext::MD_access_group, uniteAccessGroups(
+ I.getMetadata(LLVMContext::MD_access_group), AccessGroup));
+
+ if (AliasScope)
+ I.setMetadata(LLVMContext::MD_alias_scope, MDNode::concatenate(
+ I.getMetadata(LLVMContext::MD_alias_scope), AliasScope));
+
+ if (NoAlias)
+ I.setMetadata(LLVMContext::MD_noalias, MDNode::concatenate(
+ I.getMetadata(LLVMContext::MD_noalias), NoAlias));
+ }
+ }
+}
+
+/// Bundle operands of the inlined function must be added to inlined call sites.
+static void PropagateOperandBundles(Function::iterator InlinedBB,
+ Instruction *CallSiteEHPad) {
+ for (Instruction &II : llvm::make_early_inc_range(*InlinedBB)) {
+ CallBase *I = dyn_cast<CallBase>(&II);
+ if (!I)
+ continue;
+ // Skip call sites which already have a "funclet" bundle.
+ if (I->getOperandBundle(LLVMContext::OB_funclet))
+ continue;
+ // Skip call sites which are nounwind intrinsics (as long as they don't
+ // lower into regular function calls in the course of IR transformations).
+ auto *CalledFn =
+ dyn_cast<Function>(I->getCalledOperand()->stripPointerCasts());
+ if (CalledFn && CalledFn->isIntrinsic() && I->doesNotThrow() &&
+ !IntrinsicInst::mayLowerToFunctionCall(CalledFn->getIntrinsicID()))
+ continue;
+
+ SmallVector<OperandBundleDef, 1> OpBundles;
+ I->getOperandBundlesAsDefs(OpBundles);
+ OpBundles.emplace_back("funclet", CallSiteEHPad);
+
+ Instruction *NewInst = CallBase::Create(I, OpBundles, I);
+ NewInst->takeName(I);
+ I->replaceAllUsesWith(NewInst);
+ I->eraseFromParent();
+ }
+}
+
+namespace {
+/// Utility for cloning !noalias and !alias.scope metadata. When a code region
+/// using scoped alias metadata is inlined, the aliasing relationships may not
+/// hold between the two version. It is necessary to create a deep clone of the
+/// metadata, putting the two versions in separate scope domains.
+class ScopedAliasMetadataDeepCloner {
+ using MetadataMap = DenseMap<const MDNode *, TrackingMDNodeRef>;
+ SetVector<const MDNode *> MD;
+ MetadataMap MDMap;
+ void addRecursiveMetadataUses();
+
+public:
+ ScopedAliasMetadataDeepCloner(const Function *F);
+
+ /// Create a new clone of the scoped alias metadata, which will be used by
+ /// subsequent remap() calls.
+ void clone();
+
+ /// Remap instructions in the given range from the original to the cloned
+ /// metadata.
+ void remap(Function::iterator FStart, Function::iterator FEnd);
+};
+} // namespace
+
+ScopedAliasMetadataDeepCloner::ScopedAliasMetadataDeepCloner(
+ const Function *F) {
+ for (const BasicBlock &BB : *F) {
+ for (const Instruction &I : BB) {
+ if (const MDNode *M = I.getMetadata(LLVMContext::MD_alias_scope))
+ MD.insert(M);
+ if (const MDNode *M = I.getMetadata(LLVMContext::MD_noalias))
+ MD.insert(M);
+
+ // We also need to clone the metadata in noalias intrinsics.
+ if (const auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I))
+ MD.insert(Decl->getScopeList());
+ }
+ }
+ addRecursiveMetadataUses();
+}
+
+void ScopedAliasMetadataDeepCloner::addRecursiveMetadataUses() {
+ SmallVector<const Metadata *, 16> Queue(MD.begin(), MD.end());
+ while (!Queue.empty()) {
+ const MDNode *M = cast<MDNode>(Queue.pop_back_val());
+ for (const Metadata *Op : M->operands())
+ if (const MDNode *OpMD = dyn_cast<MDNode>(Op))
+ if (MD.insert(OpMD))
+ Queue.push_back(OpMD);
+ }
+}
+
+void ScopedAliasMetadataDeepCloner::clone() {
+ assert(MDMap.empty() && "clone() already called ?");
+
+ SmallVector<TempMDTuple, 16> DummyNodes;
+ for (const MDNode *I : MD) {
+ DummyNodes.push_back(MDTuple::getTemporary(I->getContext(), std::nullopt));
+ MDMap[I].reset(DummyNodes.back().get());
+ }
+
+ // Create new metadata nodes to replace the dummy nodes, replacing old
+ // metadata references with either a dummy node or an already-created new
+ // node.
+ SmallVector<Metadata *, 4> NewOps;
+ for (const MDNode *I : MD) {
+ for (const Metadata *Op : I->operands()) {
+ if (const MDNode *M = dyn_cast<MDNode>(Op))
+ NewOps.push_back(MDMap[M]);
+ else
+ NewOps.push_back(const_cast<Metadata *>(Op));
+ }
+
+ MDNode *NewM = MDNode::get(I->getContext(), NewOps);
+ MDTuple *TempM = cast<MDTuple>(MDMap[I]);
+ assert(TempM->isTemporary() && "Expected temporary node");
+
+ TempM->replaceAllUsesWith(NewM);
+ NewOps.clear();
+ }
+}
+
+void ScopedAliasMetadataDeepCloner::remap(Function::iterator FStart,
+ Function::iterator FEnd) {
+ if (MDMap.empty())
+ return; // Nothing to do.
+
+ for (BasicBlock &BB : make_range(FStart, FEnd)) {
+ for (Instruction &I : BB) {
+ // TODO: The null checks for the MDMap.lookup() results should no longer
+ // be necessary.
+ if (MDNode *M = I.getMetadata(LLVMContext::MD_alias_scope))
+ if (MDNode *MNew = MDMap.lookup(M))
+ I.setMetadata(LLVMContext::MD_alias_scope, MNew);
+
+ if (MDNode *M = I.getMetadata(LLVMContext::MD_noalias))
+ if (MDNode *MNew = MDMap.lookup(M))
+ I.setMetadata(LLVMContext::MD_noalias, MNew);
+
+ if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I))
+ if (MDNode *MNew = MDMap.lookup(Decl->getScopeList()))
+ Decl->setScopeList(MNew);
+ }
+ }
+}
+
+/// If the inlined function has noalias arguments,
+/// then add new alias scopes for each noalias argument, tag the mapped noalias
+/// parameters with noalias metadata specifying the new scope, and tag all
+/// non-derived loads, stores and memory intrinsics with the new alias scopes.
+static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap,
+ const DataLayout &DL, AAResults *CalleeAAR,
+ ClonedCodeInfo &InlinedFunctionInfo) {
+ if (!EnableNoAliasConversion)
+ return;
+
+ const Function *CalledFunc = CB.getCalledFunction();
+ SmallVector<const Argument *, 4> NoAliasArgs;
+
+ for (const Argument &Arg : CalledFunc->args())
+ if (CB.paramHasAttr(Arg.getArgNo(), Attribute::NoAlias) && !Arg.use_empty())
+ NoAliasArgs.push_back(&Arg);
+
+ if (NoAliasArgs.empty())
+ return;
+
+ // To do a good job, if a noalias variable is captured, we need to know if
+ // the capture point dominates the particular use we're considering.
+ DominatorTree DT;
+ DT.recalculate(const_cast<Function&>(*CalledFunc));
+
+ // noalias indicates that pointer values based on the argument do not alias
+ // pointer values which are not based on it. So we add a new "scope" for each
+ // noalias function argument. Accesses using pointers based on that argument
+ // become part of that alias scope, accesses using pointers not based on that
+ // argument are tagged as noalias with that scope.
+
+ DenseMap<const Argument *, MDNode *> NewScopes;
+ MDBuilder MDB(CalledFunc->getContext());
+
+ // Create a new scope domain for this function.
+ MDNode *NewDomain =
+ MDB.createAnonymousAliasScopeDomain(CalledFunc->getName());
+ for (unsigned i = 0, e = NoAliasArgs.size(); i != e; ++i) {
+ const Argument *A = NoAliasArgs[i];
+
+ std::string Name = std::string(CalledFunc->getName());
+ if (A->hasName()) {
+ Name += ": %";
+ Name += A->getName();
+ } else {
+ Name += ": argument ";
+ Name += utostr(i);
+ }
+
+ // Note: We always create a new anonymous root here. This is true regardless
+ // of the linkage of the callee because the aliasing "scope" is not just a
+ // property of the callee, but also all control dependencies in the caller.
+ MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name);
+ NewScopes.insert(std::make_pair(A, NewScope));
+
+ if (UseNoAliasIntrinsic) {
+ // Introduce a llvm.experimental.noalias.scope.decl for the noalias
+ // argument.
+ MDNode *AScopeList = MDNode::get(CalledFunc->getContext(), NewScope);
+ auto *NoAliasDecl =
+ IRBuilder<>(&CB).CreateNoAliasScopeDeclaration(AScopeList);
+ // Ignore the result for now. The result will be used when the
+ // llvm.noalias intrinsic is introduced.
+ (void)NoAliasDecl;
+ }
+ }
+
+ // Iterate over all new instructions in the map; for all memory-access
+ // instructions, add the alias scope metadata.
+ for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end();
+ VMI != VMIE; ++VMI) {
+ if (const Instruction *I = dyn_cast<Instruction>(VMI->first)) {
+ if (!VMI->second)
+ continue;
+
+ Instruction *NI = dyn_cast<Instruction>(VMI->second);
+ if (!NI || InlinedFunctionInfo.isSimplified(I, NI))
+ continue;
+
+ bool IsArgMemOnlyCall = false, IsFuncCall = false;
+ SmallVector<const Value *, 2> PtrArgs;
+
+ if (const LoadInst *LI = dyn_cast<LoadInst>(I))
+ PtrArgs.push_back(LI->getPointerOperand());
+ else if (const StoreInst *SI = dyn_cast<StoreInst>(I))
+ PtrArgs.push_back(SI->getPointerOperand());
+ else if (const VAArgInst *VAAI = dyn_cast<VAArgInst>(I))
+ PtrArgs.push_back(VAAI->getPointerOperand());
+ else if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(I))
+ PtrArgs.push_back(CXI->getPointerOperand());
+ else if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I))
+ PtrArgs.push_back(RMWI->getPointerOperand());
+ else if (const auto *Call = dyn_cast<CallBase>(I)) {
+ // If we know that the call does not access memory, then we'll still
+ // know that about the inlined clone of this call site, and we don't
+ // need to add metadata.
+ if (Call->doesNotAccessMemory())
+ continue;
+
+ IsFuncCall = true;
+ if (CalleeAAR) {
+ MemoryEffects ME = CalleeAAR->getMemoryEffects(Call);
+
+ // We'll retain this knowledge without additional metadata.
+ if (ME.onlyAccessesInaccessibleMem())
+ continue;
+
+ if (ME.onlyAccessesArgPointees())
+ IsArgMemOnlyCall = true;
+ }
+
+ for (Value *Arg : Call->args()) {
+ // Only care about pointer arguments. If a noalias argument is
+ // accessed through a non-pointer argument, it must be captured
+ // first (e.g. via ptrtoint), and we protect against captures below.
+ if (!Arg->getType()->isPointerTy())
+ continue;
+
+ PtrArgs.push_back(Arg);
+ }
+ }
+
+ // If we found no pointers, then this instruction is not suitable for
+ // pairing with an instruction to receive aliasing metadata.
+ // However, if this is a call, this we might just alias with none of the
+ // noalias arguments.
+ if (PtrArgs.empty() && !IsFuncCall)
+ continue;
+
+ // It is possible that there is only one underlying object, but you
+ // need to go through several PHIs to see it, and thus could be
+ // repeated in the Objects list.
+ SmallPtrSet<const Value *, 4> ObjSet;
+ SmallVector<Metadata *, 4> Scopes, NoAliases;
+
+ SmallSetVector<const Argument *, 4> NAPtrArgs;
+ for (const Value *V : PtrArgs) {
+ SmallVector<const Value *, 4> Objects;
+ getUnderlyingObjects(V, Objects, /* LI = */ nullptr);
+
+ for (const Value *O : Objects)
+ ObjSet.insert(O);
+ }
+
+ // Figure out if we're derived from anything that is not a noalias
+ // argument.
+ bool RequiresNoCaptureBefore = false, UsesAliasingPtr = false,
+ UsesUnknownObject = false;
+ for (const Value *V : ObjSet) {
+ // Is this value a constant that cannot be derived from any pointer
+ // value (we need to exclude constant expressions, for example, that
+ // are formed from arithmetic on global symbols).
+ bool IsNonPtrConst = isa<ConstantInt>(V) || isa<ConstantFP>(V) ||
+ isa<ConstantPointerNull>(V) ||
+ isa<ConstantDataVector>(V) || isa<UndefValue>(V);
+ if (IsNonPtrConst)
+ continue;
+
+ // If this is anything other than a noalias argument, then we cannot
+ // completely describe the aliasing properties using alias.scope
+ // metadata (and, thus, won't add any).
+ if (const Argument *A = dyn_cast<Argument>(V)) {
+ if (!CB.paramHasAttr(A->getArgNo(), Attribute::NoAlias))
+ UsesAliasingPtr = true;
+ } else {
+ UsesAliasingPtr = true;
+ }
+
+ if (isEscapeSource(V)) {
+ // An escape source can only alias with a noalias argument if it has
+ // been captured beforehand.
+ RequiresNoCaptureBefore = true;
+ } else if (!isa<Argument>(V) && !isIdentifiedObject(V)) {
+ // If this is neither an escape source, nor some identified object
+ // (which cannot directly alias a noalias argument), nor some other
+ // argument (which, by definition, also cannot alias a noalias
+ // argument), conservatively do not make any assumptions.
+ UsesUnknownObject = true;
+ }
+ }
+
+ // Nothing we can do if the used underlying object cannot be reliably
+ // determined.
+ if (UsesUnknownObject)
+ continue;
+
+ // A function call can always get captured noalias pointers (via other
+ // parameters, globals, etc.).
+ if (IsFuncCall && !IsArgMemOnlyCall)
+ RequiresNoCaptureBefore = true;
+
+ // First, we want to figure out all of the sets with which we definitely
+ // don't alias. Iterate over all noalias set, and add those for which:
+ // 1. The noalias argument is not in the set of objects from which we
+ // definitely derive.
+ // 2. The noalias argument has not yet been captured.
+ // An arbitrary function that might load pointers could see captured
+ // noalias arguments via other noalias arguments or globals, and so we
+ // must always check for prior capture.
+ for (const Argument *A : NoAliasArgs) {
+ if (ObjSet.contains(A))
+ continue; // May be based on a noalias argument.
+
+ // It might be tempting to skip the PointerMayBeCapturedBefore check if
+ // A->hasNoCaptureAttr() is true, but this is incorrect because
+ // nocapture only guarantees that no copies outlive the function, not
+ // that the value cannot be locally captured.
+ if (!RequiresNoCaptureBefore ||
+ !PointerMayBeCapturedBefore(A, /* ReturnCaptures */ false,
+ /* StoreCaptures */ false, I, &DT))
+ NoAliases.push_back(NewScopes[A]);
+ }
+
+ if (!NoAliases.empty())
+ NI->setMetadata(LLVMContext::MD_noalias,
+ MDNode::concatenate(
+ NI->getMetadata(LLVMContext::MD_noalias),
+ MDNode::get(CalledFunc->getContext(), NoAliases)));
+
+ // Next, we want to figure out all of the sets to which we might belong.
+ // We might belong to a set if the noalias argument is in the set of
+ // underlying objects. If there is some non-noalias argument in our list
+ // of underlying objects, then we cannot add a scope because the fact
+ // that some access does not alias with any set of our noalias arguments
+ // cannot itself guarantee that it does not alias with this access
+ // (because there is some pointer of unknown origin involved and the
+ // other access might also depend on this pointer). We also cannot add
+ // scopes to arbitrary functions unless we know they don't access any
+ // non-parameter pointer-values.
+ bool CanAddScopes = !UsesAliasingPtr;
+ if (CanAddScopes && IsFuncCall)
+ CanAddScopes = IsArgMemOnlyCall;
+
+ if (CanAddScopes)
+ for (const Argument *A : NoAliasArgs) {
+ if (ObjSet.count(A))
+ Scopes.push_back(NewScopes[A]);
+ }
+
+ if (!Scopes.empty())
+ NI->setMetadata(
+ LLVMContext::MD_alias_scope,
+ MDNode::concatenate(NI->getMetadata(LLVMContext::MD_alias_scope),
+ MDNode::get(CalledFunc->getContext(), Scopes)));
+ }
+ }
+}
+
+static bool MayContainThrowingOrExitingCall(Instruction *Begin,
+ Instruction *End) {
+
+ assert(Begin->getParent() == End->getParent() &&
+ "Expected to be in same basic block!");
+ return !llvm::isGuaranteedToTransferExecutionToSuccessor(
+ Begin->getIterator(), End->getIterator(), InlinerAttributeWindow + 1);
+}
+
+static AttrBuilder IdentifyValidAttributes(CallBase &CB) {
+
+ AttrBuilder AB(CB.getContext(), CB.getAttributes().getRetAttrs());
+ if (!AB.hasAttributes())
+ return AB;
+ AttrBuilder Valid(CB.getContext());
+ // Only allow these white listed attributes to be propagated back to the
+ // callee. This is because other attributes may only be valid on the call
+ // itself, i.e. attributes such as signext and zeroext.
+ if (auto DerefBytes = AB.getDereferenceableBytes())
+ Valid.addDereferenceableAttr(DerefBytes);
+ if (auto DerefOrNullBytes = AB.getDereferenceableOrNullBytes())
+ Valid.addDereferenceableOrNullAttr(DerefOrNullBytes);
+ if (AB.contains(Attribute::NoAlias))
+ Valid.addAttribute(Attribute::NoAlias);
+ if (AB.contains(Attribute::NonNull))
+ Valid.addAttribute(Attribute::NonNull);
+ return Valid;
+}
+
+static void AddReturnAttributes(CallBase &CB, ValueToValueMapTy &VMap) {
+ if (!UpdateReturnAttributes)
+ return;
+
+ AttrBuilder Valid = IdentifyValidAttributes(CB);
+ if (!Valid.hasAttributes())
+ return;
+ auto *CalledFunction = CB.getCalledFunction();
+ auto &Context = CalledFunction->getContext();
+
+ for (auto &BB : *CalledFunction) {
+ auto *RI = dyn_cast<ReturnInst>(BB.getTerminator());
+ if (!RI || !isa<CallBase>(RI->getOperand(0)))
+ continue;
+ auto *RetVal = cast<CallBase>(RI->getOperand(0));
+ // Check that the cloned RetVal exists and is a call, otherwise we cannot
+ // add the attributes on the cloned RetVal. Simplification during inlining
+ // could have transformed the cloned instruction.
+ auto *NewRetVal = dyn_cast_or_null<CallBase>(VMap.lookup(RetVal));
+ if (!NewRetVal)
+ continue;
+ // Backward propagation of attributes to the returned value may be incorrect
+ // if it is control flow dependent.
+ // Consider:
+ // @callee {
+ // %rv = call @foo()
+ // %rv2 = call @bar()
+ // if (%rv2 != null)
+ // return %rv2
+ // if (%rv == null)
+ // exit()
+ // return %rv
+ // }
+ // caller() {
+ // %val = call nonnull @callee()
+ // }
+ // Here we cannot add the nonnull attribute on either foo or bar. So, we
+ // limit the check to both RetVal and RI are in the same basic block and
+ // there are no throwing/exiting instructions between these instructions.
+ if (RI->getParent() != RetVal->getParent() ||
+ MayContainThrowingOrExitingCall(RetVal, RI))
+ continue;
+ // Add to the existing attributes of NewRetVal, i.e. the cloned call
+ // instruction.
+ // NB! When we have the same attribute already existing on NewRetVal, but
+ // with a differing value, the AttributeList's merge API honours the already
+ // existing attribute value (i.e. attributes such as dereferenceable,
+ // dereferenceable_or_null etc). See AttrBuilder::merge for more details.
+ AttributeList AL = NewRetVal->getAttributes();
+ AttributeList NewAL = AL.addRetAttributes(Context, Valid);
+ NewRetVal->setAttributes(NewAL);
+ }
+}
+
+/// If the inlined function has non-byval align arguments, then
+/// add @llvm.assume-based alignment assumptions to preserve this information.
+static void AddAlignmentAssumptions(CallBase &CB, InlineFunctionInfo &IFI) {
+ if (!PreserveAlignmentAssumptions || !IFI.GetAssumptionCache)
+ return;
+
+ AssumptionCache *AC = &IFI.GetAssumptionCache(*CB.getCaller());
+ auto &DL = CB.getCaller()->getParent()->getDataLayout();
+
+ // To avoid inserting redundant assumptions, we should check for assumptions
+ // already in the caller. To do this, we might need a DT of the caller.
+ DominatorTree DT;
+ bool DTCalculated = false;
+
+ Function *CalledFunc = CB.getCalledFunction();
+ for (Argument &Arg : CalledFunc->args()) {
+ if (!Arg.getType()->isPointerTy() || Arg.hasPassPointeeByValueCopyAttr() ||
+ Arg.hasNUses(0))
+ continue;
+ MaybeAlign Alignment = Arg.getParamAlign();
+ if (!Alignment)
+ continue;
+
+ if (!DTCalculated) {
+ DT.recalculate(*CB.getCaller());
+ DTCalculated = true;
+ }
+ // If we can already prove the asserted alignment in the context of the
+ // caller, then don't bother inserting the assumption.
+ Value *ArgVal = CB.getArgOperand(Arg.getArgNo());
+ if (getKnownAlignment(ArgVal, DL, &CB, AC, &DT) >= *Alignment)
+ continue;
+
+ CallInst *NewAsmp = IRBuilder<>(&CB).CreateAlignmentAssumption(
+ DL, ArgVal, Alignment->value());
+ AC->registerAssumption(cast<AssumeInst>(NewAsmp));
+ }
+}
+
+/// Once we have cloned code over from a callee into the caller,
+/// update the specified callgraph to reflect the changes we made.
+/// Note that it's possible that not all code was copied over, so only
+/// some edges of the callgraph may remain.
+static void UpdateCallGraphAfterInlining(CallBase &CB,
+ Function::iterator FirstNewBlock,
+ ValueToValueMapTy &VMap,
+ InlineFunctionInfo &IFI) {
+ CallGraph &CG = *IFI.CG;
+ const Function *Caller = CB.getCaller();
+ const Function *Callee = CB.getCalledFunction();
+ CallGraphNode *CalleeNode = CG[Callee];
+ CallGraphNode *CallerNode = CG[Caller];
+
+ // Since we inlined some uninlined call sites in the callee into the caller,
+ // add edges from the caller to all of the callees of the callee.
+ CallGraphNode::iterator I = CalleeNode->begin(), E = CalleeNode->end();
+
+ // Consider the case where CalleeNode == CallerNode.
+ CallGraphNode::CalledFunctionsVector CallCache;
+ if (CalleeNode == CallerNode) {
+ CallCache.assign(I, E);
+ I = CallCache.begin();
+ E = CallCache.end();
+ }
+
+ for (; I != E; ++I) {
+ // Skip 'refererence' call records.
+ if (!I->first)
+ continue;
+
+ const Value *OrigCall = *I->first;
+
+ ValueToValueMapTy::iterator VMI = VMap.find(OrigCall);
+ // Only copy the edge if the call was inlined!
+ if (VMI == VMap.end() || VMI->second == nullptr)
+ continue;
+
+ // If the call was inlined, but then constant folded, there is no edge to
+ // add. Check for this case.
+ auto *NewCall = dyn_cast<CallBase>(VMI->second);
+ if (!NewCall)
+ continue;
+
+ // We do not treat intrinsic calls like real function calls because we
+ // expect them to become inline code; do not add an edge for an intrinsic.
+ if (NewCall->getCalledFunction() &&
+ NewCall->getCalledFunction()->isIntrinsic())
+ continue;
+
+ // Remember that this call site got inlined for the client of
+ // InlineFunction.
+ IFI.InlinedCalls.push_back(NewCall);
+
+ // It's possible that inlining the callsite will cause it to go from an
+ // indirect to a direct call by resolving a function pointer. If this
+ // happens, set the callee of the new call site to a more precise
+ // destination. This can also happen if the call graph node of the caller
+ // was just unnecessarily imprecise.
+ if (!I->second->getFunction())
+ if (Function *F = NewCall->getCalledFunction()) {
+ // Indirect call site resolved to direct call.
+ CallerNode->addCalledFunction(NewCall, CG[F]);
+
+ continue;
+ }
+
+ CallerNode->addCalledFunction(NewCall, I->second);
+ }
+
+ // Update the call graph by deleting the edge from Callee to Caller. We must
+ // do this after the loop above in case Caller and Callee are the same.
+ CallerNode->removeCallEdgeFor(*cast<CallBase>(&CB));
+}
+
+static void HandleByValArgumentInit(Type *ByValType, Value *Dst, Value *Src,
+ Module *M, BasicBlock *InsertBlock,
+ InlineFunctionInfo &IFI) {
+ IRBuilder<> Builder(InsertBlock, InsertBlock->begin());
+
+ Value *Size =
+ Builder.getInt64(M->getDataLayout().getTypeStoreSize(ByValType));
+
+ // Always generate a memcpy of alignment 1 here because we don't know
+ // the alignment of the src pointer. Other optimizations can infer
+ // better alignment.
+ Builder.CreateMemCpy(Dst, /*DstAlign*/ Align(1), Src,
+ /*SrcAlign*/ Align(1), Size);
+}
+
+/// When inlining a call site that has a byval argument,
+/// we have to make the implicit memcpy explicit by adding it.
+static Value *HandleByValArgument(Type *ByValType, Value *Arg,
+ Instruction *TheCall,
+ const Function *CalledFunc,
+ InlineFunctionInfo &IFI,
+ MaybeAlign ByValAlignment) {
+ assert(cast<PointerType>(Arg->getType())
+ ->isOpaqueOrPointeeTypeMatches(ByValType));
+ Function *Caller = TheCall->getFunction();
+ const DataLayout &DL = Caller->getParent()->getDataLayout();
+
+ // If the called function is readonly, then it could not mutate the caller's
+ // copy of the byval'd memory. In this case, it is safe to elide the copy and
+ // temporary.
+ if (CalledFunc->onlyReadsMemory()) {
+ // If the byval argument has a specified alignment that is greater than the
+ // passed in pointer, then we either have to round up the input pointer or
+ // give up on this transformation.
+ if (ByValAlignment.valueOrOne() == 1)
+ return Arg;
+
+ AssumptionCache *AC =
+ IFI.GetAssumptionCache ? &IFI.GetAssumptionCache(*Caller) : nullptr;
+
+ // If the pointer is already known to be sufficiently aligned, or if we can
+ // round it up to a larger alignment, then we don't need a temporary.
+ if (getOrEnforceKnownAlignment(Arg, *ByValAlignment, DL, TheCall, AC) >=
+ *ByValAlignment)
+ return Arg;
+
+ // Otherwise, we have to make a memcpy to get a safe alignment. This is bad
+ // for code quality, but rarely happens and is required for correctness.
+ }
+
+ // Create the alloca. If we have DataLayout, use nice alignment.
+ Align Alignment = DL.getPrefTypeAlign(ByValType);
+
+ // If the byval had an alignment specified, we *must* use at least that
+ // alignment, as it is required by the byval argument (and uses of the
+ // pointer inside the callee).
+ if (ByValAlignment)
+ Alignment = std::max(Alignment, *ByValAlignment);
+
+ Value *NewAlloca =
+ new AllocaInst(ByValType, DL.getAllocaAddrSpace(), nullptr, Alignment,
+ Arg->getName(), &*Caller->begin()->begin());
+ IFI.StaticAllocas.push_back(cast<AllocaInst>(NewAlloca));
+
+ // Uses of the argument in the function should use our new alloca
+ // instead.
+ return NewAlloca;
+}
+
+// Check whether this Value is used by a lifetime intrinsic.
+static bool isUsedByLifetimeMarker(Value *V) {
+ for (User *U : V->users())
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U))
+ if (II->isLifetimeStartOrEnd())
+ return true;
+ return false;
+}
+
+// Check whether the given alloca already has
+// lifetime.start or lifetime.end intrinsics.
+static bool hasLifetimeMarkers(AllocaInst *AI) {
+ Type *Ty = AI->getType();
+ Type *Int8PtrTy = Type::getInt8PtrTy(Ty->getContext(),
+ Ty->getPointerAddressSpace());
+ if (Ty == Int8PtrTy)
+ return isUsedByLifetimeMarker(AI);
+
+ // Do a scan to find all the casts to i8*.
+ for (User *U : AI->users()) {
+ if (U->getType() != Int8PtrTy) continue;
+ if (U->stripPointerCasts() != AI) continue;
+ if (isUsedByLifetimeMarker(U))
+ return true;
+ }
+ return false;
+}
+
+/// Return the result of AI->isStaticAlloca() if AI were moved to the entry
+/// block. Allocas used in inalloca calls and allocas of dynamic array size
+/// cannot be static.
+static bool allocaWouldBeStaticInEntry(const AllocaInst *AI ) {
+ return isa<Constant>(AI->getArraySize()) && !AI->isUsedWithInAlloca();
+}
+
+/// Returns a DebugLoc for a new DILocation which is a clone of \p OrigDL
+/// inlined at \p InlinedAt. \p IANodes is an inlined-at cache.
+static DebugLoc inlineDebugLoc(DebugLoc OrigDL, DILocation *InlinedAt,
+ LLVMContext &Ctx,
+ DenseMap<const MDNode *, MDNode *> &IANodes) {
+ auto IA = DebugLoc::appendInlinedAt(OrigDL, InlinedAt, Ctx, IANodes);
+ return DILocation::get(Ctx, OrigDL.getLine(), OrigDL.getCol(),
+ OrigDL.getScope(), IA);
+}
+
+/// Update inlined instructions' line numbers to
+/// to encode location where these instructions are inlined.
+static void fixupLineNumbers(Function *Fn, Function::iterator FI,
+ Instruction *TheCall, bool CalleeHasDebugInfo) {
+ const DebugLoc &TheCallDL = TheCall->getDebugLoc();
+ if (!TheCallDL)
+ return;
+
+ auto &Ctx = Fn->getContext();
+ DILocation *InlinedAtNode = TheCallDL;
+
+ // Create a unique call site, not to be confused with any other call from the
+ // same location.
+ InlinedAtNode = DILocation::getDistinct(
+ Ctx, InlinedAtNode->getLine(), InlinedAtNode->getColumn(),
+ InlinedAtNode->getScope(), InlinedAtNode->getInlinedAt());
+
+ // Cache the inlined-at nodes as they're built so they are reused, without
+ // this every instruction's inlined-at chain would become distinct from each
+ // other.
+ DenseMap<const MDNode *, MDNode *> IANodes;
+
+ // Check if we are not generating inline line tables and want to use
+ // the call site location instead.
+ bool NoInlineLineTables = Fn->hasFnAttribute("no-inline-line-tables");
+
+ for (; FI != Fn->end(); ++FI) {
+ for (BasicBlock::iterator BI = FI->begin(), BE = FI->end();
+ BI != BE; ++BI) {
+ // Loop metadata needs to be updated so that the start and end locs
+ // reference inlined-at locations.
+ auto updateLoopInfoLoc = [&Ctx, &InlinedAtNode,
+ &IANodes](Metadata *MD) -> Metadata * {
+ if (auto *Loc = dyn_cast_or_null<DILocation>(MD))
+ return inlineDebugLoc(Loc, InlinedAtNode, Ctx, IANodes).get();
+ return MD;
+ };
+ updateLoopMetadataDebugLocations(*BI, updateLoopInfoLoc);
+
+ if (!NoInlineLineTables)
+ if (DebugLoc DL = BI->getDebugLoc()) {
+ DebugLoc IDL =
+ inlineDebugLoc(DL, InlinedAtNode, BI->getContext(), IANodes);
+ BI->setDebugLoc(IDL);
+ continue;
+ }
+
+ if (CalleeHasDebugInfo && !NoInlineLineTables)
+ continue;
+
+ // If the inlined instruction has no line number, or if inline info
+ // is not being generated, make it look as if it originates from the call
+ // location. This is important for ((__always_inline, __nodebug__))
+ // functions which must use caller location for all instructions in their
+ // function body.
+
+ // Don't update static allocas, as they may get moved later.
+ if (auto *AI = dyn_cast<AllocaInst>(BI))
+ if (allocaWouldBeStaticInEntry(AI))
+ continue;
+
+ BI->setDebugLoc(TheCallDL);
+ }
+
+ // Remove debug info intrinsics if we're not keeping inline info.
+ if (NoInlineLineTables) {
+ BasicBlock::iterator BI = FI->begin();
+ while (BI != FI->end()) {
+ if (isa<DbgInfoIntrinsic>(BI)) {
+ BI = BI->eraseFromParent();
+ continue;
+ }
+ ++BI;
+ }
+ }
+
+ }
+}
+
+#undef DEBUG_TYPE
+#define DEBUG_TYPE "assignment-tracking"
+/// Find Alloca and linked DbgAssignIntrinsic for locals escaped by \p CB.
+static at::StorageToVarsMap collectEscapedLocals(const DataLayout &DL,
+ const CallBase &CB) {
+ at::StorageToVarsMap EscapedLocals;
+ SmallPtrSet<const Value *, 4> SeenBases;
+
+ LLVM_DEBUG(
+ errs() << "# Finding caller local variables escaped by callee\n");
+ for (const Value *Arg : CB.args()) {
+ LLVM_DEBUG(errs() << "INSPECT: " << *Arg << "\n");
+ if (!Arg->getType()->isPointerTy()) {
+ LLVM_DEBUG(errs() << " | SKIP: Not a pointer\n");
+ continue;
+ }
+
+ const Instruction *I = dyn_cast<Instruction>(Arg);
+ if (!I) {
+ LLVM_DEBUG(errs() << " | SKIP: Not result of instruction\n");
+ continue;
+ }
+
+ // Walk back to the base storage.
+ assert(Arg->getType()->isPtrOrPtrVectorTy());
+ APInt TmpOffset(DL.getIndexTypeSizeInBits(Arg->getType()), 0, false);
+ const AllocaInst *Base = dyn_cast<AllocaInst>(
+ Arg->stripAndAccumulateConstantOffsets(DL, TmpOffset, true));
+ if (!Base) {
+ LLVM_DEBUG(errs() << " | SKIP: Couldn't walk back to base storage\n");
+ continue;
+ }
+
+ assert(Base);
+ LLVM_DEBUG(errs() << " | BASE: " << *Base << "\n");
+ // We only need to process each base address once - skip any duplicates.
+ if (!SeenBases.insert(Base).second)
+ continue;
+
+ // Find all local variables associated with the backing storage.
+ for (auto *DAI : at::getAssignmentMarkers(Base)) {
+ // Skip variables from inlined functions - they are not local variables.
+ if (DAI->getDebugLoc().getInlinedAt())
+ continue;
+ LLVM_DEBUG(errs() << " > DEF : " << *DAI << "\n");
+ EscapedLocals[Base].insert(at::VarRecord(DAI));
+ }
+ }
+ return EscapedLocals;
+}
+
+static void trackInlinedStores(Function::iterator Start, Function::iterator End,
+ const CallBase &CB) {
+ LLVM_DEBUG(errs() << "trackInlinedStores into "
+ << Start->getParent()->getName() << " from "
+ << CB.getCalledFunction()->getName() << "\n");
+ std::unique_ptr<DataLayout> DL = std::make_unique<DataLayout>(CB.getModule());
+ at::trackAssignments(Start, End, collectEscapedLocals(*DL, CB), *DL);
+}
+
+/// Update inlined instructions' DIAssignID metadata. We need to do this
+/// otherwise a function inlined more than once into the same function
+/// will cause DIAssignID to be shared by many instructions.
+static void fixupAssignments(Function::iterator Start, Function::iterator End) {
+ // Map {Old, New} metadata. Not used directly - use GetNewID.
+ DenseMap<DIAssignID *, DIAssignID *> Map;
+ auto GetNewID = [&Map](Metadata *Old) {
+ DIAssignID *OldID = cast<DIAssignID>(Old);
+ if (DIAssignID *NewID = Map.lookup(OldID))
+ return NewID;
+ DIAssignID *NewID = DIAssignID::getDistinct(OldID->getContext());
+ Map[OldID] = NewID;
+ return NewID;
+ };
+ // Loop over all the inlined instructions. If we find a DIAssignID
+ // attachment or use, replace it with a new version.
+ for (auto BBI = Start; BBI != End; ++BBI) {
+ for (Instruction &I : *BBI) {
+ if (auto *ID = I.getMetadata(LLVMContext::MD_DIAssignID))
+ I.setMetadata(LLVMContext::MD_DIAssignID, GetNewID(ID));
+ else if (auto *DAI = dyn_cast<DbgAssignIntrinsic>(&I))
+ DAI->setAssignId(GetNewID(DAI->getAssignID()));
+ }
+ }
+}
+#undef DEBUG_TYPE
+#define DEBUG_TYPE "inline-function"
+
+/// Update the block frequencies of the caller after a callee has been inlined.
+///
+/// Each block cloned into the caller has its block frequency scaled by the
+/// ratio of CallSiteFreq/CalleeEntryFreq. This ensures that the cloned copy of
+/// callee's entry block gets the same frequency as the callsite block and the
+/// relative frequencies of all cloned blocks remain the same after cloning.
+static void updateCallerBFI(BasicBlock *CallSiteBlock,
+ const ValueToValueMapTy &VMap,
+ BlockFrequencyInfo *CallerBFI,
+ BlockFrequencyInfo *CalleeBFI,
+ const BasicBlock &CalleeEntryBlock) {
+ SmallPtrSet<BasicBlock *, 16> ClonedBBs;
+ for (auto Entry : VMap) {
+ if (!isa<BasicBlock>(Entry.first) || !Entry.second)
+ continue;
+ auto *OrigBB = cast<BasicBlock>(Entry.first);
+ auto *ClonedBB = cast<BasicBlock>(Entry.second);
+ uint64_t Freq = CalleeBFI->getBlockFreq(OrigBB).getFrequency();
+ if (!ClonedBBs.insert(ClonedBB).second) {
+ // Multiple blocks in the callee might get mapped to one cloned block in
+ // the caller since we prune the callee as we clone it. When that happens,
+ // we want to use the maximum among the original blocks' frequencies.
+ uint64_t NewFreq = CallerBFI->getBlockFreq(ClonedBB).getFrequency();
+ if (NewFreq > Freq)
+ Freq = NewFreq;
+ }
+ CallerBFI->setBlockFreq(ClonedBB, Freq);
+ }
+ BasicBlock *EntryClone = cast<BasicBlock>(VMap.lookup(&CalleeEntryBlock));
+ CallerBFI->setBlockFreqAndScale(
+ EntryClone, CallerBFI->getBlockFreq(CallSiteBlock).getFrequency(),
+ ClonedBBs);
+}
+
+/// Update the branch metadata for cloned call instructions.
+static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap,
+ const ProfileCount &CalleeEntryCount,
+ const CallBase &TheCall, ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *CallerBFI) {
+ if (CalleeEntryCount.isSynthetic() || CalleeEntryCount.getCount() < 1)
+ return;
+ auto CallSiteCount =
+ PSI ? PSI->getProfileCount(TheCall, CallerBFI) : std::nullopt;
+ int64_t CallCount =
+ std::min(CallSiteCount.value_or(0), CalleeEntryCount.getCount());
+ updateProfileCallee(Callee, -CallCount, &VMap);
+}
+
+void llvm::updateProfileCallee(
+ Function *Callee, int64_t EntryDelta,
+ const ValueMap<const Value *, WeakTrackingVH> *VMap) {
+ auto CalleeCount = Callee->getEntryCount();
+ if (!CalleeCount)
+ return;
+
+ const uint64_t PriorEntryCount = CalleeCount->getCount();
+
+ // Since CallSiteCount is an estimate, it could exceed the original callee
+ // count and has to be set to 0 so guard against underflow.
+ const uint64_t NewEntryCount =
+ (EntryDelta < 0 && static_cast<uint64_t>(-EntryDelta) > PriorEntryCount)
+ ? 0
+ : PriorEntryCount + EntryDelta;
+
+ // During inlining ?
+ if (VMap) {
+ uint64_t CloneEntryCount = PriorEntryCount - NewEntryCount;
+ for (auto Entry : *VMap)
+ if (isa<CallInst>(Entry.first))
+ if (auto *CI = dyn_cast_or_null<CallInst>(Entry.second))
+ CI->updateProfWeight(CloneEntryCount, PriorEntryCount);
+ }
+
+ if (EntryDelta) {
+ Callee->setEntryCount(NewEntryCount);
+
+ for (BasicBlock &BB : *Callee)
+ // No need to update the callsite if it is pruned during inlining.
+ if (!VMap || VMap->count(&BB))
+ for (Instruction &I : BB)
+ if (CallInst *CI = dyn_cast<CallInst>(&I))
+ CI->updateProfWeight(NewEntryCount, PriorEntryCount);
+ }
+}
+
+/// An operand bundle "clang.arc.attachedcall" on a call indicates the call
+/// result is implicitly consumed by a call to retainRV or claimRV immediately
+/// after the call. This function inlines the retainRV/claimRV calls.
+///
+/// There are three cases to consider:
+///
+/// 1. If there is a call to autoreleaseRV that takes a pointer to the returned
+/// object in the callee return block, the autoreleaseRV call and the
+/// retainRV/claimRV call in the caller cancel out. If the call in the caller
+/// is a claimRV call, a call to objc_release is emitted.
+///
+/// 2. If there is a call in the callee return block that doesn't have operand
+/// bundle "clang.arc.attachedcall", the operand bundle on the original call
+/// is transferred to the call in the callee.
+///
+/// 3. Otherwise, a call to objc_retain is inserted if the call in the caller is
+/// a retainRV call.
+static void
+inlineRetainOrClaimRVCalls(CallBase &CB, objcarc::ARCInstKind RVCallKind,
+ const SmallVectorImpl<ReturnInst *> &Returns) {
+ Module *Mod = CB.getModule();
+ assert(objcarc::isRetainOrClaimRV(RVCallKind) && "unexpected ARC function");
+ bool IsRetainRV = RVCallKind == objcarc::ARCInstKind::RetainRV,
+ IsUnsafeClaimRV = !IsRetainRV;
+
+ for (auto *RI : Returns) {
+ Value *RetOpnd = objcarc::GetRCIdentityRoot(RI->getOperand(0));
+ bool InsertRetainCall = IsRetainRV;
+ IRBuilder<> Builder(RI->getContext());
+
+ // Walk backwards through the basic block looking for either a matching
+ // autoreleaseRV call or an unannotated call.
+ auto InstRange = llvm::make_range(++(RI->getIterator().getReverse()),
+ RI->getParent()->rend());
+ for (Instruction &I : llvm::make_early_inc_range(InstRange)) {
+ // Ignore casts.
+ if (isa<CastInst>(I))
+ continue;
+
+ if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
+ if (II->getIntrinsicID() != Intrinsic::objc_autoreleaseReturnValue ||
+ !II->hasNUses(0) ||
+ objcarc::GetRCIdentityRoot(II->getOperand(0)) != RetOpnd)
+ break;
+
+ // If we've found a matching authoreleaseRV call:
+ // - If claimRV is attached to the call, insert a call to objc_release
+ // and erase the autoreleaseRV call.
+ // - If retainRV is attached to the call, just erase the autoreleaseRV
+ // call.
+ if (IsUnsafeClaimRV) {
+ Builder.SetInsertPoint(II);
+ Function *IFn =
+ Intrinsic::getDeclaration(Mod, Intrinsic::objc_release);
+ Value *BC = Builder.CreateBitCast(RetOpnd, IFn->getArg(0)->getType());
+ Builder.CreateCall(IFn, BC, "");
+ }
+ II->eraseFromParent();
+ InsertRetainCall = false;
+ break;
+ }
+
+ auto *CI = dyn_cast<CallInst>(&I);
+
+ if (!CI)
+ break;
+
+ if (objcarc::GetRCIdentityRoot(CI) != RetOpnd ||
+ objcarc::hasAttachedCallOpBundle(CI))
+ break;
+
+ // If we've found an unannotated call that defines RetOpnd, add a
+ // "clang.arc.attachedcall" operand bundle.
+ Value *BundleArgs[] = {*objcarc::getAttachedARCFunction(&CB)};
+ OperandBundleDef OB("clang.arc.attachedcall", BundleArgs);
+ auto *NewCall = CallBase::addOperandBundle(
+ CI, LLVMContext::OB_clang_arc_attachedcall, OB, CI);
+ NewCall->copyMetadata(*CI);
+ CI->replaceAllUsesWith(NewCall);
+ CI->eraseFromParent();
+ InsertRetainCall = false;
+ break;
+ }
+
+ if (InsertRetainCall) {
+ // The retainRV is attached to the call and we've failed to find a
+ // matching autoreleaseRV or an annotated call in the callee. Emit a call
+ // to objc_retain.
+ Builder.SetInsertPoint(RI);
+ Function *IFn = Intrinsic::getDeclaration(Mod, Intrinsic::objc_retain);
+ Value *BC = Builder.CreateBitCast(RetOpnd, IFn->getArg(0)->getType());
+ Builder.CreateCall(IFn, BC, "");
+ }
+ }
+}
+
+/// This function inlines the called function into the basic block of the
+/// caller. This returns false if it is not possible to inline this call.
+/// The program is still in a well defined state if this occurs though.
+///
+/// Note that this only does one level of inlining. For example, if the
+/// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now
+/// exists in the instruction stream. Similarly this will inline a recursive
+/// function by one level.
+llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
+ bool MergeAttributes,
+ AAResults *CalleeAAR,
+ bool InsertLifetime,
+ Function *ForwardVarArgsTo) {
+ assert(CB.getParent() && CB.getFunction() && "Instruction not in function!");
+
+ // FIXME: we don't inline callbr yet.
+ if (isa<CallBrInst>(CB))
+ return InlineResult::failure("We don't inline callbr yet.");
+
+ // If IFI has any state in it, zap it before we fill it in.
+ IFI.reset();
+
+ Function *CalledFunc = CB.getCalledFunction();
+ if (!CalledFunc || // Can't inline external function or indirect
+ CalledFunc->isDeclaration()) // call!
+ return InlineResult::failure("external or indirect");
+
+ // The inliner does not know how to inline through calls with operand bundles
+ // in general ...
+ if (CB.hasOperandBundles()) {
+ for (int i = 0, e = CB.getNumOperandBundles(); i != e; ++i) {
+ uint32_t Tag = CB.getOperandBundleAt(i).getTagID();
+ // ... but it knows how to inline through "deopt" operand bundles ...
+ if (Tag == LLVMContext::OB_deopt)
+ continue;
+ // ... and "funclet" operand bundles.
+ if (Tag == LLVMContext::OB_funclet)
+ continue;
+ if (Tag == LLVMContext::OB_clang_arc_attachedcall)
+ continue;
+ if (Tag == LLVMContext::OB_kcfi)
+ continue;
+
+ return InlineResult::failure("unsupported operand bundle");
+ }
+ }
+
+ // If the call to the callee cannot throw, set the 'nounwind' flag on any
+ // calls that we inline.
+ bool MarkNoUnwind = CB.doesNotThrow();
+
+ BasicBlock *OrigBB = CB.getParent();
+ Function *Caller = OrigBB->getParent();
+
+ // Do not inline strictfp function into non-strictfp one. It would require
+ // conversion of all FP operations in host function to constrained intrinsics.
+ if (CalledFunc->getAttributes().hasFnAttr(Attribute::StrictFP) &&
+ !Caller->getAttributes().hasFnAttr(Attribute::StrictFP)) {
+ return InlineResult::failure("incompatible strictfp attributes");
+ }
+
+ // GC poses two hazards to inlining, which only occur when the callee has GC:
+ // 1. If the caller has no GC, then the callee's GC must be propagated to the
+ // caller.
+ // 2. If the caller has a differing GC, it is invalid to inline.
+ if (CalledFunc->hasGC()) {
+ if (!Caller->hasGC())
+ Caller->setGC(CalledFunc->getGC());
+ else if (CalledFunc->getGC() != Caller->getGC())
+ return InlineResult::failure("incompatible GC");
+ }
+
+ // Get the personality function from the callee if it contains a landing pad.
+ Constant *CalledPersonality =
+ CalledFunc->hasPersonalityFn()
+ ? CalledFunc->getPersonalityFn()->stripPointerCasts()
+ : nullptr;
+
+ // Find the personality function used by the landing pads of the caller. If it
+ // exists, then check to see that it matches the personality function used in
+ // the callee.
+ Constant *CallerPersonality =
+ Caller->hasPersonalityFn()
+ ? Caller->getPersonalityFn()->stripPointerCasts()
+ : nullptr;
+ if (CalledPersonality) {
+ if (!CallerPersonality)
+ Caller->setPersonalityFn(CalledPersonality);
+ // If the personality functions match, then we can perform the
+ // inlining. Otherwise, we can't inline.
+ // TODO: This isn't 100% true. Some personality functions are proper
+ // supersets of others and can be used in place of the other.
+ else if (CalledPersonality != CallerPersonality)
+ return InlineResult::failure("incompatible personality");
+ }
+
+ // We need to figure out which funclet the callsite was in so that we may
+ // properly nest the callee.
+ Instruction *CallSiteEHPad = nullptr;
+ if (CallerPersonality) {
+ EHPersonality Personality = classifyEHPersonality(CallerPersonality);
+ if (isScopedEHPersonality(Personality)) {
+ std::optional<OperandBundleUse> ParentFunclet =
+ CB.getOperandBundle(LLVMContext::OB_funclet);
+ if (ParentFunclet)
+ CallSiteEHPad = cast<FuncletPadInst>(ParentFunclet->Inputs.front());
+
+ // OK, the inlining site is legal. What about the target function?
+
+ if (CallSiteEHPad) {
+ if (Personality == EHPersonality::MSVC_CXX) {
+ // The MSVC personality cannot tolerate catches getting inlined into
+ // cleanup funclets.
+ if (isa<CleanupPadInst>(CallSiteEHPad)) {
+ // Ok, the call site is within a cleanuppad. Let's check the callee
+ // for catchpads.
+ for (const BasicBlock &CalledBB : *CalledFunc) {
+ if (isa<CatchSwitchInst>(CalledBB.getFirstNonPHI()))
+ return InlineResult::failure("catch in cleanup funclet");
+ }
+ }
+ } else if (isAsynchronousEHPersonality(Personality)) {
+ // SEH is even less tolerant, there may not be any sort of exceptional
+ // funclet in the callee.
+ for (const BasicBlock &CalledBB : *CalledFunc) {
+ if (CalledBB.isEHPad())
+ return InlineResult::failure("SEH in cleanup funclet");
+ }
+ }
+ }
+ }
+ }
+
+ // Determine if we are dealing with a call in an EHPad which does not unwind
+ // to caller.
+ bool EHPadForCallUnwindsLocally = false;
+ if (CallSiteEHPad && isa<CallInst>(CB)) {
+ UnwindDestMemoTy FuncletUnwindMap;
+ Value *CallSiteUnwindDestToken =
+ getUnwindDestToken(CallSiteEHPad, FuncletUnwindMap);
+
+ EHPadForCallUnwindsLocally =
+ CallSiteUnwindDestToken &&
+ !isa<ConstantTokenNone>(CallSiteUnwindDestToken);
+ }
+
+ // Get an iterator to the last basic block in the function, which will have
+ // the new function inlined after it.
+ Function::iterator LastBlock = --Caller->end();
+
+ // Make sure to capture all of the return instructions from the cloned
+ // function.
+ SmallVector<ReturnInst*, 8> Returns;
+ ClonedCodeInfo InlinedFunctionInfo;
+ Function::iterator FirstNewBlock;
+
+ { // Scope to destroy VMap after cloning.
+ ValueToValueMapTy VMap;
+ struct ByValInit {
+ Value *Dst;
+ Value *Src;
+ Type *Ty;
+ };
+ // Keep a list of pair (dst, src) to emit byval initializations.
+ SmallVector<ByValInit, 4> ByValInits;
+
+ // When inlining a function that contains noalias scope metadata,
+ // this metadata needs to be cloned so that the inlined blocks
+ // have different "unique scopes" at every call site.
+ // Track the metadata that must be cloned. Do this before other changes to
+ // the function, so that we do not get in trouble when inlining caller ==
+ // callee.
+ ScopedAliasMetadataDeepCloner SAMetadataCloner(CB.getCalledFunction());
+
+ auto &DL = Caller->getParent()->getDataLayout();
+
+ // Calculate the vector of arguments to pass into the function cloner, which
+ // matches up the formal to the actual argument values.
+ auto AI = CB.arg_begin();
+ unsigned ArgNo = 0;
+ for (Function::arg_iterator I = CalledFunc->arg_begin(),
+ E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) {
+ Value *ActualArg = *AI;
+
+ // When byval arguments actually inlined, we need to make the copy implied
+ // by them explicit. However, we don't do this if the callee is readonly
+ // or readnone, because the copy would be unneeded: the callee doesn't
+ // modify the struct.
+ if (CB.isByValArgument(ArgNo)) {
+ ActualArg = HandleByValArgument(CB.getParamByValType(ArgNo), ActualArg,
+ &CB, CalledFunc, IFI,
+ CalledFunc->getParamAlign(ArgNo));
+ if (ActualArg != *AI)
+ ByValInits.push_back(
+ {ActualArg, (Value *)*AI, CB.getParamByValType(ArgNo)});
+ }
+
+ VMap[&*I] = ActualArg;
+ }
+
+ // TODO: Remove this when users have been updated to the assume bundles.
+ // Add alignment assumptions if necessary. We do this before the inlined
+ // instructions are actually cloned into the caller so that we can easily
+ // check what will be known at the start of the inlined code.
+ AddAlignmentAssumptions(CB, IFI);
+
+ AssumptionCache *AC =
+ IFI.GetAssumptionCache ? &IFI.GetAssumptionCache(*Caller) : nullptr;
+
+ /// Preserve all attributes on of the call and its parameters.
+ salvageKnowledge(&CB, AC);
+
+ // We want the inliner to prune the code as it copies. We would LOVE to
+ // have no dead or constant instructions leftover after inlining occurs
+ // (which can happen, e.g., because an argument was constant), but we'll be
+ // happy with whatever the cloner can do.
+ CloneAndPruneFunctionInto(Caller, CalledFunc, VMap,
+ /*ModuleLevelChanges=*/false, Returns, ".i",
+ &InlinedFunctionInfo);
+ // Remember the first block that is newly cloned over.
+ FirstNewBlock = LastBlock; ++FirstNewBlock;
+
+ // Insert retainRV/clainRV runtime calls.
+ objcarc::ARCInstKind RVCallKind = objcarc::getAttachedARCFunctionKind(&CB);
+ if (RVCallKind != objcarc::ARCInstKind::None)
+ inlineRetainOrClaimRVCalls(CB, RVCallKind, Returns);
+
+ // Updated caller/callee profiles only when requested. For sample loader
+ // inlining, the context-sensitive inlinee profile doesn't need to be
+ // subtracted from callee profile, and the inlined clone also doesn't need
+ // to be scaled based on call site count.
+ if (IFI.UpdateProfile) {
+ if (IFI.CallerBFI != nullptr && IFI.CalleeBFI != nullptr)
+ // Update the BFI of blocks cloned into the caller.
+ updateCallerBFI(OrigBB, VMap, IFI.CallerBFI, IFI.CalleeBFI,
+ CalledFunc->front());
+
+ if (auto Profile = CalledFunc->getEntryCount())
+ updateCallProfile(CalledFunc, VMap, *Profile, CB, IFI.PSI,
+ IFI.CallerBFI);
+ }
+
+ // Inject byval arguments initialization.
+ for (ByValInit &Init : ByValInits)
+ HandleByValArgumentInit(Init.Ty, Init.Dst, Init.Src, Caller->getParent(),
+ &*FirstNewBlock, IFI);
+
+ std::optional<OperandBundleUse> ParentDeopt =
+ CB.getOperandBundle(LLVMContext::OB_deopt);
+ if (ParentDeopt) {
+ SmallVector<OperandBundleDef, 2> OpDefs;
+
+ for (auto &VH : InlinedFunctionInfo.OperandBundleCallSites) {
+ CallBase *ICS = dyn_cast_or_null<CallBase>(VH);
+ if (!ICS)
+ continue; // instruction was DCE'd or RAUW'ed to undef
+
+ OpDefs.clear();
+
+ OpDefs.reserve(ICS->getNumOperandBundles());
+
+ for (unsigned COBi = 0, COBe = ICS->getNumOperandBundles(); COBi < COBe;
+ ++COBi) {
+ auto ChildOB = ICS->getOperandBundleAt(COBi);
+ if (ChildOB.getTagID() != LLVMContext::OB_deopt) {
+ // If the inlined call has other operand bundles, let them be
+ OpDefs.emplace_back(ChildOB);
+ continue;
+ }
+
+ // It may be useful to separate this logic (of handling operand
+ // bundles) out to a separate "policy" component if this gets crowded.
+ // Prepend the parent's deoptimization continuation to the newly
+ // inlined call's deoptimization continuation.
+ std::vector<Value *> MergedDeoptArgs;
+ MergedDeoptArgs.reserve(ParentDeopt->Inputs.size() +
+ ChildOB.Inputs.size());
+
+ llvm::append_range(MergedDeoptArgs, ParentDeopt->Inputs);
+ llvm::append_range(MergedDeoptArgs, ChildOB.Inputs);
+
+ OpDefs.emplace_back("deopt", std::move(MergedDeoptArgs));
+ }
+
+ Instruction *NewI = CallBase::Create(ICS, OpDefs, ICS);
+
+ // Note: the RAUW does the appropriate fixup in VMap, so we need to do
+ // this even if the call returns void.
+ ICS->replaceAllUsesWith(NewI);
+
+ VH = nullptr;
+ ICS->eraseFromParent();
+ }
+ }
+
+ // Update the callgraph if requested.
+ if (IFI.CG)
+ UpdateCallGraphAfterInlining(CB, FirstNewBlock, VMap, IFI);
+
+ // For 'nodebug' functions, the associated DISubprogram is always null.
+ // Conservatively avoid propagating the callsite debug location to
+ // instructions inlined from a function whose DISubprogram is not null.
+ fixupLineNumbers(Caller, FirstNewBlock, &CB,
+ CalledFunc->getSubprogram() != nullptr);
+
+ if (isAssignmentTrackingEnabled(*Caller->getParent())) {
+ // Interpret inlined stores to caller-local variables as assignments.
+ trackInlinedStores(FirstNewBlock, Caller->end(), CB);
+
+ // Update DIAssignID metadata attachments and uses so that they are
+ // unique to this inlined instance.
+ fixupAssignments(FirstNewBlock, Caller->end());
+ }
+
+ // Now clone the inlined noalias scope metadata.
+ SAMetadataCloner.clone();
+ SAMetadataCloner.remap(FirstNewBlock, Caller->end());
+
+ // Add noalias metadata if necessary.
+ AddAliasScopeMetadata(CB, VMap, DL, CalleeAAR, InlinedFunctionInfo);
+
+ // Clone return attributes on the callsite into the calls within the inlined
+ // function which feed into its return value.
+ AddReturnAttributes(CB, VMap);
+
+ propagateMemProfMetadata(CalledFunc, CB,
+ InlinedFunctionInfo.ContainsMemProfMetadata, VMap);
+
+ // Propagate metadata on the callsite if necessary.
+ PropagateCallSiteMetadata(CB, FirstNewBlock, Caller->end());
+
+ // Register any cloned assumptions.
+ if (IFI.GetAssumptionCache)
+ for (BasicBlock &NewBlock :
+ make_range(FirstNewBlock->getIterator(), Caller->end()))
+ for (Instruction &I : NewBlock)
+ if (auto *II = dyn_cast<CondGuardInst>(&I))
+ IFI.GetAssumptionCache(*Caller).registerAssumption(II);
+ }
+
+ // If there are any alloca instructions in the block that used to be the entry
+ // block for the callee, move them to the entry block of the caller. First
+ // calculate which instruction they should be inserted before. We insert the
+ // instructions at the end of the current alloca list.
+ {
+ BasicBlock::iterator InsertPoint = Caller->begin()->begin();
+ for (BasicBlock::iterator I = FirstNewBlock->begin(),
+ E = FirstNewBlock->end(); I != E; ) {
+ AllocaInst *AI = dyn_cast<AllocaInst>(I++);
+ if (!AI) continue;
+
+ // If the alloca is now dead, remove it. This often occurs due to code
+ // specialization.
+ if (AI->use_empty()) {
+ AI->eraseFromParent();
+ continue;
+ }
+
+ if (!allocaWouldBeStaticInEntry(AI))
+ continue;
+
+ // Keep track of the static allocas that we inline into the caller.
+ IFI.StaticAllocas.push_back(AI);
+
+ // Scan for the block of allocas that we can move over, and move them
+ // all at once.
+ while (isa<AllocaInst>(I) &&
+ !cast<AllocaInst>(I)->use_empty() &&
+ allocaWouldBeStaticInEntry(cast<AllocaInst>(I))) {
+ IFI.StaticAllocas.push_back(cast<AllocaInst>(I));
+ ++I;
+ }
+
+ // Transfer all of the allocas over in a block. Using splice means
+ // that the instructions aren't removed from the symbol table, then
+ // reinserted.
+ Caller->getEntryBlock().splice(InsertPoint, &*FirstNewBlock,
+ AI->getIterator(), I);
+ }
+ }
+
+ SmallVector<Value*,4> VarArgsToForward;
+ SmallVector<AttributeSet, 4> VarArgsAttrs;
+ for (unsigned i = CalledFunc->getFunctionType()->getNumParams();
+ i < CB.arg_size(); i++) {
+ VarArgsToForward.push_back(CB.getArgOperand(i));
+ VarArgsAttrs.push_back(CB.getAttributes().getParamAttrs(i));
+ }
+
+ bool InlinedMustTailCalls = false, InlinedDeoptimizeCalls = false;
+ if (InlinedFunctionInfo.ContainsCalls) {
+ CallInst::TailCallKind CallSiteTailKind = CallInst::TCK_None;
+ if (CallInst *CI = dyn_cast<CallInst>(&CB))
+ CallSiteTailKind = CI->getTailCallKind();
+
+ // For inlining purposes, the "notail" marker is the same as no marker.
+ if (CallSiteTailKind == CallInst::TCK_NoTail)
+ CallSiteTailKind = CallInst::TCK_None;
+
+ for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E;
+ ++BB) {
+ for (Instruction &I : llvm::make_early_inc_range(*BB)) {
+ CallInst *CI = dyn_cast<CallInst>(&I);
+ if (!CI)
+ continue;
+
+ // Forward varargs from inlined call site to calls to the
+ // ForwardVarArgsTo function, if requested, and to musttail calls.
+ if (!VarArgsToForward.empty() &&
+ ((ForwardVarArgsTo &&
+ CI->getCalledFunction() == ForwardVarArgsTo) ||
+ CI->isMustTailCall())) {
+ // Collect attributes for non-vararg parameters.
+ AttributeList Attrs = CI->getAttributes();
+ SmallVector<AttributeSet, 8> ArgAttrs;
+ if (!Attrs.isEmpty() || !VarArgsAttrs.empty()) {
+ for (unsigned ArgNo = 0;
+ ArgNo < CI->getFunctionType()->getNumParams(); ++ArgNo)
+ ArgAttrs.push_back(Attrs.getParamAttrs(ArgNo));
+ }
+
+ // Add VarArg attributes.
+ ArgAttrs.append(VarArgsAttrs.begin(), VarArgsAttrs.end());
+ Attrs = AttributeList::get(CI->getContext(), Attrs.getFnAttrs(),
+ Attrs.getRetAttrs(), ArgAttrs);
+ // Add VarArgs to existing parameters.
+ SmallVector<Value *, 6> Params(CI->args());
+ Params.append(VarArgsToForward.begin(), VarArgsToForward.end());
+ CallInst *NewCI = CallInst::Create(
+ CI->getFunctionType(), CI->getCalledOperand(), Params, "", CI);
+ NewCI->setDebugLoc(CI->getDebugLoc());
+ NewCI->setAttributes(Attrs);
+ NewCI->setCallingConv(CI->getCallingConv());
+ CI->replaceAllUsesWith(NewCI);
+ CI->eraseFromParent();
+ CI = NewCI;
+ }
+
+ if (Function *F = CI->getCalledFunction())
+ InlinedDeoptimizeCalls |=
+ F->getIntrinsicID() == Intrinsic::experimental_deoptimize;
+
+ // We need to reduce the strength of any inlined tail calls. For
+ // musttail, we have to avoid introducing potential unbounded stack
+ // growth. For example, if functions 'f' and 'g' are mutually recursive
+ // with musttail, we can inline 'g' into 'f' so long as we preserve
+ // musttail on the cloned call to 'f'. If either the inlined call site
+ // or the cloned call site is *not* musttail, the program already has
+ // one frame of stack growth, so it's safe to remove musttail. Here is
+ // a table of example transformations:
+ //
+ // f -> musttail g -> musttail f ==> f -> musttail f
+ // f -> musttail g -> tail f ==> f -> tail f
+ // f -> g -> musttail f ==> f -> f
+ // f -> g -> tail f ==> f -> f
+ //
+ // Inlined notail calls should remain notail calls.
+ CallInst::TailCallKind ChildTCK = CI->getTailCallKind();
+ if (ChildTCK != CallInst::TCK_NoTail)
+ ChildTCK = std::min(CallSiteTailKind, ChildTCK);
+ CI->setTailCallKind(ChildTCK);
+ InlinedMustTailCalls |= CI->isMustTailCall();
+
+ // Call sites inlined through a 'nounwind' call site should be
+ // 'nounwind' as well. However, avoid marking call sites explicitly
+ // where possible. This helps expose more opportunities for CSE after
+ // inlining, commonly when the callee is an intrinsic.
+ if (MarkNoUnwind && !CI->doesNotThrow())
+ CI->setDoesNotThrow();
+ }
+ }
+ }
+
+ // Leave lifetime markers for the static alloca's, scoping them to the
+ // function we just inlined.
+ // We need to insert lifetime intrinsics even at O0 to avoid invalid
+ // access caused by multithreaded coroutines. The check
+ // `Caller->isPresplitCoroutine()` would affect AlwaysInliner at O0 only.
+ if ((InsertLifetime || Caller->isPresplitCoroutine()) &&
+ !IFI.StaticAllocas.empty()) {
+ IRBuilder<> builder(&FirstNewBlock->front());
+ for (unsigned ai = 0, ae = IFI.StaticAllocas.size(); ai != ae; ++ai) {
+ AllocaInst *AI = IFI.StaticAllocas[ai];
+ // Don't mark swifterror allocas. They can't have bitcast uses.
+ if (AI->isSwiftError())
+ continue;
+
+ // If the alloca is already scoped to something smaller than the whole
+ // function then there's no need to add redundant, less accurate markers.
+ if (hasLifetimeMarkers(AI))
+ continue;
+
+ // Try to determine the size of the allocation.
+ ConstantInt *AllocaSize = nullptr;
+ if (ConstantInt *AIArraySize =
+ dyn_cast<ConstantInt>(AI->getArraySize())) {
+ auto &DL = Caller->getParent()->getDataLayout();
+ Type *AllocaType = AI->getAllocatedType();
+ TypeSize AllocaTypeSize = DL.getTypeAllocSize(AllocaType);
+ uint64_t AllocaArraySize = AIArraySize->getLimitedValue();
+
+ // Don't add markers for zero-sized allocas.
+ if (AllocaArraySize == 0)
+ continue;
+
+ // Check that array size doesn't saturate uint64_t and doesn't
+ // overflow when it's multiplied by type size.
+ if (!AllocaTypeSize.isScalable() &&
+ AllocaArraySize != std::numeric_limits<uint64_t>::max() &&
+ std::numeric_limits<uint64_t>::max() / AllocaArraySize >=
+ AllocaTypeSize.getFixedValue()) {
+ AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()),
+ AllocaArraySize * AllocaTypeSize);
+ }
+ }
+
+ builder.CreateLifetimeStart(AI, AllocaSize);
+ for (ReturnInst *RI : Returns) {
+ // Don't insert llvm.lifetime.end calls between a musttail or deoptimize
+ // call and a return. The return kills all local allocas.
+ if (InlinedMustTailCalls &&
+ RI->getParent()->getTerminatingMustTailCall())
+ continue;
+ if (InlinedDeoptimizeCalls &&
+ RI->getParent()->getTerminatingDeoptimizeCall())
+ continue;
+ IRBuilder<>(RI).CreateLifetimeEnd(AI, AllocaSize);
+ }
+ }
+ }
+
+ // If the inlined code contained dynamic alloca instructions, wrap the inlined
+ // code with llvm.stacksave/llvm.stackrestore intrinsics.
+ if (InlinedFunctionInfo.ContainsDynamicAllocas) {
+ Module *M = Caller->getParent();
+ // Get the two intrinsics we care about.
+ Function *StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave);
+ Function *StackRestore=Intrinsic::getDeclaration(M,Intrinsic::stackrestore);
+
+ // Insert the llvm.stacksave.
+ CallInst *SavedPtr = IRBuilder<>(&*FirstNewBlock, FirstNewBlock->begin())
+ .CreateCall(StackSave, {}, "savedstack");
+
+ // Insert a call to llvm.stackrestore before any return instructions in the
+ // inlined function.
+ for (ReturnInst *RI : Returns) {
+ // Don't insert llvm.stackrestore calls between a musttail or deoptimize
+ // call and a return. The return will restore the stack pointer.
+ if (InlinedMustTailCalls && RI->getParent()->getTerminatingMustTailCall())
+ continue;
+ if (InlinedDeoptimizeCalls && RI->getParent()->getTerminatingDeoptimizeCall())
+ continue;
+ IRBuilder<>(RI).CreateCall(StackRestore, SavedPtr);
+ }
+ }
+
+ // If we are inlining for an invoke instruction, we must make sure to rewrite
+ // any call instructions into invoke instructions. This is sensitive to which
+ // funclet pads were top-level in the inlinee, so must be done before
+ // rewriting the "parent pad" links.
+ if (auto *II = dyn_cast<InvokeInst>(&CB)) {
+ BasicBlock *UnwindDest = II->getUnwindDest();
+ Instruction *FirstNonPHI = UnwindDest->getFirstNonPHI();
+ if (isa<LandingPadInst>(FirstNonPHI)) {
+ HandleInlinedLandingPad(II, &*FirstNewBlock, InlinedFunctionInfo);
+ } else {
+ HandleInlinedEHPad(II, &*FirstNewBlock, InlinedFunctionInfo);
+ }
+ }
+
+ // Update the lexical scopes of the new funclets and callsites.
+ // Anything that had 'none' as its parent is now nested inside the callsite's
+ // EHPad.
+ if (CallSiteEHPad) {
+ for (Function::iterator BB = FirstNewBlock->getIterator(),
+ E = Caller->end();
+ BB != E; ++BB) {
+ // Add bundle operands to inlined call sites.
+ PropagateOperandBundles(BB, CallSiteEHPad);
+
+ // It is problematic if the inlinee has a cleanupret which unwinds to
+ // caller and we inline it into a call site which doesn't unwind but into
+ // an EH pad that does. Such an edge must be dynamically unreachable.
+ // As such, we replace the cleanupret with unreachable.
+ if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(BB->getTerminator()))
+ if (CleanupRet->unwindsToCaller() && EHPadForCallUnwindsLocally)
+ changeToUnreachable(CleanupRet);
+
+ Instruction *I = BB->getFirstNonPHI();
+ if (!I->isEHPad())
+ continue;
+
+ if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(I)) {
+ if (isa<ConstantTokenNone>(CatchSwitch->getParentPad()))
+ CatchSwitch->setParentPad(CallSiteEHPad);
+ } else {
+ auto *FPI = cast<FuncletPadInst>(I);
+ if (isa<ConstantTokenNone>(FPI->getParentPad()))
+ FPI->setParentPad(CallSiteEHPad);
+ }
+ }
+ }
+
+ if (InlinedDeoptimizeCalls) {
+ // We need to at least remove the deoptimizing returns from the Return set,
+ // so that the control flow from those returns does not get merged into the
+ // caller (but terminate it instead). If the caller's return type does not
+ // match the callee's return type, we also need to change the return type of
+ // the intrinsic.
+ if (Caller->getReturnType() == CB.getType()) {
+ llvm::erase_if(Returns, [](ReturnInst *RI) {
+ return RI->getParent()->getTerminatingDeoptimizeCall() != nullptr;
+ });
+ } else {
+ SmallVector<ReturnInst *, 8> NormalReturns;
+ Function *NewDeoptIntrinsic = Intrinsic::getDeclaration(
+ Caller->getParent(), Intrinsic::experimental_deoptimize,
+ {Caller->getReturnType()});
+
+ for (ReturnInst *RI : Returns) {
+ CallInst *DeoptCall = RI->getParent()->getTerminatingDeoptimizeCall();
+ if (!DeoptCall) {
+ NormalReturns.push_back(RI);
+ continue;
+ }
+
+ // The calling convention on the deoptimize call itself may be bogus,
+ // since the code we're inlining may have undefined behavior (and may
+ // never actually execute at runtime); but all
+ // @llvm.experimental.deoptimize declarations have to have the same
+ // calling convention in a well-formed module.
+ auto CallingConv = DeoptCall->getCalledFunction()->getCallingConv();
+ NewDeoptIntrinsic->setCallingConv(CallingConv);
+ auto *CurBB = RI->getParent();
+ RI->eraseFromParent();
+
+ SmallVector<Value *, 4> CallArgs(DeoptCall->args());
+
+ SmallVector<OperandBundleDef, 1> OpBundles;
+ DeoptCall->getOperandBundlesAsDefs(OpBundles);
+ auto DeoptAttributes = DeoptCall->getAttributes();
+ DeoptCall->eraseFromParent();
+ assert(!OpBundles.empty() &&
+ "Expected at least the deopt operand bundle");
+
+ IRBuilder<> Builder(CurBB);
+ CallInst *NewDeoptCall =
+ Builder.CreateCall(NewDeoptIntrinsic, CallArgs, OpBundles);
+ NewDeoptCall->setCallingConv(CallingConv);
+ NewDeoptCall->setAttributes(DeoptAttributes);
+ if (NewDeoptCall->getType()->isVoidTy())
+ Builder.CreateRetVoid();
+ else
+ Builder.CreateRet(NewDeoptCall);
+ }
+
+ // Leave behind the normal returns so we can merge control flow.
+ std::swap(Returns, NormalReturns);
+ }
+ }
+
+ // Handle any inlined musttail call sites. In order for a new call site to be
+ // musttail, the source of the clone and the inlined call site must have been
+ // musttail. Therefore it's safe to return without merging control into the
+ // phi below.
+ if (InlinedMustTailCalls) {
+ // Check if we need to bitcast the result of any musttail calls.
+ Type *NewRetTy = Caller->getReturnType();
+ bool NeedBitCast = !CB.use_empty() && CB.getType() != NewRetTy;
+
+ // Handle the returns preceded by musttail calls separately.
+ SmallVector<ReturnInst *, 8> NormalReturns;
+ for (ReturnInst *RI : Returns) {
+ CallInst *ReturnedMustTail =
+ RI->getParent()->getTerminatingMustTailCall();
+ if (!ReturnedMustTail) {
+ NormalReturns.push_back(RI);
+ continue;
+ }
+ if (!NeedBitCast)
+ continue;
+
+ // Delete the old return and any preceding bitcast.
+ BasicBlock *CurBB = RI->getParent();
+ auto *OldCast = dyn_cast_or_null<BitCastInst>(RI->getReturnValue());
+ RI->eraseFromParent();
+ if (OldCast)
+ OldCast->eraseFromParent();
+
+ // Insert a new bitcast and return with the right type.
+ IRBuilder<> Builder(CurBB);
+ Builder.CreateRet(Builder.CreateBitCast(ReturnedMustTail, NewRetTy));
+ }
+
+ // Leave behind the normal returns so we can merge control flow.
+ std::swap(Returns, NormalReturns);
+ }
+
+ // Now that all of the transforms on the inlined code have taken place but
+ // before we splice the inlined code into the CFG and lose track of which
+ // blocks were actually inlined, collect the call sites. We only do this if
+ // call graph updates weren't requested, as those provide value handle based
+ // tracking of inlined call sites instead. Calls to intrinsics are not
+ // collected because they are not inlineable.
+ if (InlinedFunctionInfo.ContainsCalls && !IFI.CG) {
+ // Otherwise just collect the raw call sites that were inlined.
+ for (BasicBlock &NewBB :
+ make_range(FirstNewBlock->getIterator(), Caller->end()))
+ for (Instruction &I : NewBB)
+ if (auto *CB = dyn_cast<CallBase>(&I))
+ if (!(CB->getCalledFunction() &&
+ CB->getCalledFunction()->isIntrinsic()))
+ IFI.InlinedCallSites.push_back(CB);
+ }
+
+ // If we cloned in _exactly one_ basic block, and if that block ends in a
+ // return instruction, we splice the body of the inlined callee directly into
+ // the calling basic block.
+ if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) {
+ // Move all of the instructions right before the call.
+ OrigBB->splice(CB.getIterator(), &*FirstNewBlock, FirstNewBlock->begin(),
+ FirstNewBlock->end());
+ // Remove the cloned basic block.
+ Caller->back().eraseFromParent();
+
+ // If the call site was an invoke instruction, add a branch to the normal
+ // destination.
+ if (InvokeInst *II = dyn_cast<InvokeInst>(&CB)) {
+ BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), &CB);
+ NewBr->setDebugLoc(Returns[0]->getDebugLoc());
+ }
+
+ // If the return instruction returned a value, replace uses of the call with
+ // uses of the returned value.
+ if (!CB.use_empty()) {
+ ReturnInst *R = Returns[0];
+ if (&CB == R->getReturnValue())
+ CB.replaceAllUsesWith(UndefValue::get(CB.getType()));
+ else
+ CB.replaceAllUsesWith(R->getReturnValue());
+ }
+ // Since we are now done with the Call/Invoke, we can delete it.
+ CB.eraseFromParent();
+
+ // Since we are now done with the return instruction, delete it also.
+ Returns[0]->eraseFromParent();
+
+ if (MergeAttributes)
+ AttributeFuncs::mergeAttributesForInlining(*Caller, *CalledFunc);
+
+ // We are now done with the inlining.
+ return InlineResult::success();
+ }
+
+ // Otherwise, we have the normal case, of more than one block to inline or
+ // multiple return sites.
+
+ // We want to clone the entire callee function into the hole between the
+ // "starter" and "ender" blocks. How we accomplish this depends on whether
+ // this is an invoke instruction or a call instruction.
+ BasicBlock *AfterCallBB;
+ BranchInst *CreatedBranchToNormalDest = nullptr;
+ if (InvokeInst *II = dyn_cast<InvokeInst>(&CB)) {
+
+ // Add an unconditional branch to make this look like the CallInst case...
+ CreatedBranchToNormalDest = BranchInst::Create(II->getNormalDest(), &CB);
+
+ // Split the basic block. This guarantees that no PHI nodes will have to be
+ // updated due to new incoming edges, and make the invoke case more
+ // symmetric to the call case.
+ AfterCallBB =
+ OrigBB->splitBasicBlock(CreatedBranchToNormalDest->getIterator(),
+ CalledFunc->getName() + ".exit");
+
+ } else { // It's a call
+ // If this is a call instruction, we need to split the basic block that
+ // the call lives in.
+ //
+ AfterCallBB = OrigBB->splitBasicBlock(CB.getIterator(),
+ CalledFunc->getName() + ".exit");
+ }
+
+ if (IFI.CallerBFI) {
+ // Copy original BB's block frequency to AfterCallBB
+ IFI.CallerBFI->setBlockFreq(
+ AfterCallBB, IFI.CallerBFI->getBlockFreq(OrigBB).getFrequency());
+ }
+
+ // Change the branch that used to go to AfterCallBB to branch to the first
+ // basic block of the inlined function.
+ //
+ Instruction *Br = OrigBB->getTerminator();
+ assert(Br && Br->getOpcode() == Instruction::Br &&
+ "splitBasicBlock broken!");
+ Br->setOperand(0, &*FirstNewBlock);
+
+ // Now that the function is correct, make it a little bit nicer. In
+ // particular, move the basic blocks inserted from the end of the function
+ // into the space made by splitting the source basic block.
+ Caller->splice(AfterCallBB->getIterator(), Caller, FirstNewBlock,
+ Caller->end());
+
+ // Handle all of the return instructions that we just cloned in, and eliminate
+ // any users of the original call/invoke instruction.
+ Type *RTy = CalledFunc->getReturnType();
+
+ PHINode *PHI = nullptr;
+ if (Returns.size() > 1) {
+ // The PHI node should go at the front of the new basic block to merge all
+ // possible incoming values.
+ if (!CB.use_empty()) {
+ PHI = PHINode::Create(RTy, Returns.size(), CB.getName(),
+ &AfterCallBB->front());
+ // Anything that used the result of the function call should now use the
+ // PHI node as their operand.
+ CB.replaceAllUsesWith(PHI);
+ }
+
+ // Loop over all of the return instructions adding entries to the PHI node
+ // as appropriate.
+ if (PHI) {
+ for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
+ ReturnInst *RI = Returns[i];
+ assert(RI->getReturnValue()->getType() == PHI->getType() &&
+ "Ret value not consistent in function!");
+ PHI->addIncoming(RI->getReturnValue(), RI->getParent());
+ }
+ }
+
+ // Add a branch to the merge points and remove return instructions.
+ DebugLoc Loc;
+ for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
+ ReturnInst *RI = Returns[i];
+ BranchInst* BI = BranchInst::Create(AfterCallBB, RI);
+ Loc = RI->getDebugLoc();
+ BI->setDebugLoc(Loc);
+ RI->eraseFromParent();
+ }
+ // We need to set the debug location to *somewhere* inside the
+ // inlined function. The line number may be nonsensical, but the
+ // instruction will at least be associated with the right
+ // function.
+ if (CreatedBranchToNormalDest)
+ CreatedBranchToNormalDest->setDebugLoc(Loc);
+ } else if (!Returns.empty()) {
+ // Otherwise, if there is exactly one return value, just replace anything
+ // using the return value of the call with the computed value.
+ if (!CB.use_empty()) {
+ if (&CB == Returns[0]->getReturnValue())
+ CB.replaceAllUsesWith(UndefValue::get(CB.getType()));
+ else
+ CB.replaceAllUsesWith(Returns[0]->getReturnValue());
+ }
+
+ // Update PHI nodes that use the ReturnBB to use the AfterCallBB.
+ BasicBlock *ReturnBB = Returns[0]->getParent();
+ ReturnBB->replaceAllUsesWith(AfterCallBB);
+
+ // Splice the code from the return block into the block that it will return
+ // to, which contains the code that was after the call.
+ AfterCallBB->splice(AfterCallBB->begin(), ReturnBB);
+
+ if (CreatedBranchToNormalDest)
+ CreatedBranchToNormalDest->setDebugLoc(Returns[0]->getDebugLoc());
+
+ // Delete the return instruction now and empty ReturnBB now.
+ Returns[0]->eraseFromParent();
+ ReturnBB->eraseFromParent();
+ } else if (!CB.use_empty()) {
+ // No returns, but something is using the return value of the call. Just
+ // nuke the result.
+ CB.replaceAllUsesWith(PoisonValue::get(CB.getType()));
+ }
+
+ // Since we are now done with the Call/Invoke, we can delete it.
+ CB.eraseFromParent();
+
+ // If we inlined any musttail calls and the original return is now
+ // unreachable, delete it. It can only contain a bitcast and ret.
+ if (InlinedMustTailCalls && pred_empty(AfterCallBB))
+ AfterCallBB->eraseFromParent();
+
+ // We should always be able to fold the entry block of the function into the
+ // single predecessor of the block...
+ assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!");
+ BasicBlock *CalleeEntry = cast<BranchInst>(Br)->getSuccessor(0);
+
+ // Splice the code entry block into calling block, right before the
+ // unconditional branch.
+ CalleeEntry->replaceAllUsesWith(OrigBB); // Update PHI nodes
+ OrigBB->splice(Br->getIterator(), CalleeEntry);
+
+ // Remove the unconditional branch.
+ Br->eraseFromParent();
+
+ // Now we can remove the CalleeEntry block, which is now empty.
+ CalleeEntry->eraseFromParent();
+
+ // If we inserted a phi node, check to see if it has a single value (e.g. all
+ // the entries are the same or undef). If so, remove the PHI so it doesn't
+ // block other optimizations.
+ if (PHI) {
+ AssumptionCache *AC =
+ IFI.GetAssumptionCache ? &IFI.GetAssumptionCache(*Caller) : nullptr;
+ auto &DL = Caller->getParent()->getDataLayout();
+ if (Value *V = simplifyInstruction(PHI, {DL, nullptr, nullptr, AC})) {
+ PHI->replaceAllUsesWith(V);
+ PHI->eraseFromParent();
+ }
+ }
+
+ if (MergeAttributes)
+ AttributeFuncs::mergeAttributesForInlining(*Caller, *CalledFunc);
+
+ return InlineResult::success();
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/InstructionNamer.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/InstructionNamer.cpp
new file mode 100644
index 0000000000..f3499c9c8a
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/InstructionNamer.cpp
@@ -0,0 +1,78 @@
+//===- InstructionNamer.cpp - Give anonymous instructions names -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a little utility pass that gives instructions names, this is mostly
+// useful when diffing the effect of an optimization because deleting an
+// unnamed instruction can change all other instruction numbering, making the
+// diff very noisy.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/InstructionNamer.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/Type.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils.h"
+
+using namespace llvm;
+
+namespace {
+void nameInstructions(Function &F) {
+ for (auto &Arg : F.args()) {
+ if (!Arg.hasName())
+ Arg.setName("arg");
+ }
+
+ for (BasicBlock &BB : F) {
+ if (!BB.hasName())
+ BB.setName("bb");
+
+ for (Instruction &I : BB) {
+ if (!I.hasName() && !I.getType()->isVoidTy())
+ I.setName("i");
+ }
+ }
+}
+
+struct InstNamer : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ InstNamer() : FunctionPass(ID) {
+ initializeInstNamerPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &Info) const override {
+ Info.setPreservesAll();
+ }
+
+ bool runOnFunction(Function &F) override {
+ nameInstructions(F);
+ return true;
+ }
+};
+
+ char InstNamer::ID = 0;
+ } // namespace
+
+INITIALIZE_PASS(InstNamer, "instnamer",
+ "Assign names to anonymous instructions", false, false)
+char &llvm::InstructionNamerID = InstNamer::ID;
+//===----------------------------------------------------------------------===//
+//
+// InstructionNamer - Give any unnamed non-void instructions "tmp" names.
+//
+FunctionPass *llvm::createInstructionNamerPass() {
+ return new InstNamer();
+}
+
+PreservedAnalyses InstructionNamerPass::run(Function &F,
+ FunctionAnalysisManager &FAM) {
+ nameInstructions(F);
+ return PreservedAnalyses::all();
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/IntegerDivision.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/IntegerDivision.cpp
new file mode 100644
index 0000000000..cea095408b
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/IntegerDivision.cpp
@@ -0,0 +1,639 @@
+//===-- IntegerDivision.cpp - Expand integer division ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains an implementation of 32bit and 64bit scalar integer
+// division for targets that don't have native support. It's largely derived
+// from compiler-rt's implementations of __udivsi3 and __udivmoddi4,
+// but hand-tuned for targets that prefer less control flow.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/IntegerDivision.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "integer-division"
+
+/// Generate code to compute the remainder of two signed integers. Returns the
+/// remainder, which will have the sign of the dividend. Builder's insert point
+/// should be pointing where the caller wants code generated, e.g. at the srem
+/// instruction. This will generate a urem in the process, and Builder's insert
+/// point will be pointing at the uren (if present, i.e. not folded), ready to
+/// be expanded if the user wishes
+static Value *generateSignedRemainderCode(Value *Dividend, Value *Divisor,
+ IRBuilder<> &Builder) {
+ unsigned BitWidth = Dividend->getType()->getIntegerBitWidth();
+ ConstantInt *Shift = Builder.getIntN(BitWidth, BitWidth - 1);
+
+ // Following instructions are generated for both i32 (shift 31) and
+ // i64 (shift 63).
+
+ // ; %dividend_sgn = ashr i32 %dividend, 31
+ // ; %divisor_sgn = ashr i32 %divisor, 31
+ // ; %dvd_xor = xor i32 %dividend, %dividend_sgn
+ // ; %dvs_xor = xor i32 %divisor, %divisor_sgn
+ // ; %u_dividend = sub i32 %dvd_xor, %dividend_sgn
+ // ; %u_divisor = sub i32 %dvs_xor, %divisor_sgn
+ // ; %urem = urem i32 %dividend, %divisor
+ // ; %xored = xor i32 %urem, %dividend_sgn
+ // ; %srem = sub i32 %xored, %dividend_sgn
+ Dividend = Builder.CreateFreeze(Dividend);
+ Divisor = Builder.CreateFreeze(Divisor);
+ Value *DividendSign = Builder.CreateAShr(Dividend, Shift);
+ Value *DivisorSign = Builder.CreateAShr(Divisor, Shift);
+ Value *DvdXor = Builder.CreateXor(Dividend, DividendSign);
+ Value *DvsXor = Builder.CreateXor(Divisor, DivisorSign);
+ Value *UDividend = Builder.CreateSub(DvdXor, DividendSign);
+ Value *UDivisor = Builder.CreateSub(DvsXor, DivisorSign);
+ Value *URem = Builder.CreateURem(UDividend, UDivisor);
+ Value *Xored = Builder.CreateXor(URem, DividendSign);
+ Value *SRem = Builder.CreateSub(Xored, DividendSign);
+
+ if (Instruction *URemInst = dyn_cast<Instruction>(URem))
+ Builder.SetInsertPoint(URemInst);
+
+ return SRem;
+}
+
+
+/// Generate code to compute the remainder of two unsigned integers. Returns the
+/// remainder. Builder's insert point should be pointing where the caller wants
+/// code generated, e.g. at the urem instruction. This will generate a udiv in
+/// the process, and Builder's insert point will be pointing at the udiv (if
+/// present, i.e. not folded), ready to be expanded if the user wishes
+static Value *generatedUnsignedRemainderCode(Value *Dividend, Value *Divisor,
+ IRBuilder<> &Builder) {
+ // Remainder = Dividend - Quotient*Divisor
+
+ // Following instructions are generated for both i32 and i64
+
+ // ; %quotient = udiv i32 %dividend, %divisor
+ // ; %product = mul i32 %divisor, %quotient
+ // ; %remainder = sub i32 %dividend, %product
+ Dividend = Builder.CreateFreeze(Dividend);
+ Divisor = Builder.CreateFreeze(Divisor);
+ Value *Quotient = Builder.CreateUDiv(Dividend, Divisor);
+ Value *Product = Builder.CreateMul(Divisor, Quotient);
+ Value *Remainder = Builder.CreateSub(Dividend, Product);
+
+ if (Instruction *UDiv = dyn_cast<Instruction>(Quotient))
+ Builder.SetInsertPoint(UDiv);
+
+ return Remainder;
+}
+
+/// Generate code to divide two signed integers. Returns the quotient, rounded
+/// towards 0. Builder's insert point should be pointing where the caller wants
+/// code generated, e.g. at the sdiv instruction. This will generate a udiv in
+/// the process, and Builder's insert point will be pointing at the udiv (if
+/// present, i.e. not folded), ready to be expanded if the user wishes.
+static Value *generateSignedDivisionCode(Value *Dividend, Value *Divisor,
+ IRBuilder<> &Builder) {
+ // Implementation taken from compiler-rt's __divsi3 and __divdi3
+
+ unsigned BitWidth = Dividend->getType()->getIntegerBitWidth();
+ ConstantInt *Shift = Builder.getIntN(BitWidth, BitWidth - 1);
+
+ // Following instructions are generated for both i32 (shift 31) and
+ // i64 (shift 63).
+
+ // ; %tmp = ashr i32 %dividend, 31
+ // ; %tmp1 = ashr i32 %divisor, 31
+ // ; %tmp2 = xor i32 %tmp, %dividend
+ // ; %u_dvnd = sub nsw i32 %tmp2, %tmp
+ // ; %tmp3 = xor i32 %tmp1, %divisor
+ // ; %u_dvsr = sub nsw i32 %tmp3, %tmp1
+ // ; %q_sgn = xor i32 %tmp1, %tmp
+ // ; %q_mag = udiv i32 %u_dvnd, %u_dvsr
+ // ; %tmp4 = xor i32 %q_mag, %q_sgn
+ // ; %q = sub i32 %tmp4, %q_sgn
+ Dividend = Builder.CreateFreeze(Dividend);
+ Divisor = Builder.CreateFreeze(Divisor);
+ Value *Tmp = Builder.CreateAShr(Dividend, Shift);
+ Value *Tmp1 = Builder.CreateAShr(Divisor, Shift);
+ Value *Tmp2 = Builder.CreateXor(Tmp, Dividend);
+ Value *U_Dvnd = Builder.CreateSub(Tmp2, Tmp);
+ Value *Tmp3 = Builder.CreateXor(Tmp1, Divisor);
+ Value *U_Dvsr = Builder.CreateSub(Tmp3, Tmp1);
+ Value *Q_Sgn = Builder.CreateXor(Tmp1, Tmp);
+ Value *Q_Mag = Builder.CreateUDiv(U_Dvnd, U_Dvsr);
+ Value *Tmp4 = Builder.CreateXor(Q_Mag, Q_Sgn);
+ Value *Q = Builder.CreateSub(Tmp4, Q_Sgn);
+
+ if (Instruction *UDiv = dyn_cast<Instruction>(Q_Mag))
+ Builder.SetInsertPoint(UDiv);
+
+ return Q;
+}
+
+/// Generates code to divide two unsigned scalar 32-bit or 64-bit integers.
+/// Returns the quotient, rounded towards 0. Builder's insert point should
+/// point where the caller wants code generated, e.g. at the udiv instruction.
+static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor,
+ IRBuilder<> &Builder) {
+ // The basic algorithm can be found in the compiler-rt project's
+ // implementation of __udivsi3.c. Here, we do a lower-level IR based approach
+ // that's been hand-tuned to lessen the amount of control flow involved.
+
+ // Some helper values
+ IntegerType *DivTy = cast<IntegerType>(Dividend->getType());
+ unsigned BitWidth = DivTy->getBitWidth();
+
+ ConstantInt *Zero = ConstantInt::get(DivTy, 0);
+ ConstantInt *One = ConstantInt::get(DivTy, 1);
+ ConstantInt *NegOne = ConstantInt::getSigned(DivTy, -1);
+ ConstantInt *MSB = ConstantInt::get(DivTy, BitWidth - 1);
+
+ ConstantInt *True = Builder.getTrue();
+
+ BasicBlock *IBB = Builder.GetInsertBlock();
+ Function *F = IBB->getParent();
+ Function *CTLZ = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
+ DivTy);
+
+ // Our CFG is going to look like:
+ // +---------------------+
+ // | special-cases |
+ // | ... |
+ // +---------------------+
+ // | |
+ // | +----------+
+ // | | bb1 |
+ // | | ... |
+ // | +----------+
+ // | | |
+ // | | +------------+
+ // | | | preheader |
+ // | | | ... |
+ // | | +------------+
+ // | | |
+ // | | | +---+
+ // | | | | |
+ // | | +------------+ |
+ // | | | do-while | |
+ // | | | ... | |
+ // | | +------------+ |
+ // | | | | |
+ // | +-----------+ +---+
+ // | | loop-exit |
+ // | | ... |
+ // | +-----------+
+ // | |
+ // +-------+
+ // | ... |
+ // | end |
+ // +-------+
+ BasicBlock *SpecialCases = Builder.GetInsertBlock();
+ SpecialCases->setName(Twine(SpecialCases->getName(), "_udiv-special-cases"));
+ BasicBlock *End = SpecialCases->splitBasicBlock(Builder.GetInsertPoint(),
+ "udiv-end");
+ BasicBlock *LoopExit = BasicBlock::Create(Builder.getContext(),
+ "udiv-loop-exit", F, End);
+ BasicBlock *DoWhile = BasicBlock::Create(Builder.getContext(),
+ "udiv-do-while", F, End);
+ BasicBlock *Preheader = BasicBlock::Create(Builder.getContext(),
+ "udiv-preheader", F, End);
+ BasicBlock *BB1 = BasicBlock::Create(Builder.getContext(),
+ "udiv-bb1", F, End);
+
+ // We'll be overwriting the terminator to insert our extra blocks
+ SpecialCases->getTerminator()->eraseFromParent();
+
+ // Same instructions are generated for both i32 (msb 31) and i64 (msb 63).
+
+ // First off, check for special cases: dividend or divisor is zero, divisor
+ // is greater than dividend, and divisor is 1.
+ // ; special-cases:
+ // ; %ret0_1 = icmp eq i32 %divisor, 0
+ // ; %ret0_2 = icmp eq i32 %dividend, 0
+ // ; %ret0_3 = or i1 %ret0_1, %ret0_2
+ // ; %tmp0 = tail call i32 @llvm.ctlz.i32(i32 %divisor, i1 true)
+ // ; %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %dividend, i1 true)
+ // ; %sr = sub nsw i32 %tmp0, %tmp1
+ // ; %ret0_4 = icmp ugt i32 %sr, 31
+ // ; %ret0 = select i1 %ret0_3, i1 true, i1 %ret0_4
+ // ; %retDividend = icmp eq i32 %sr, 31
+ // ; %retVal = select i1 %ret0, i32 0, i32 %dividend
+ // ; %earlyRet = select i1 %ret0, i1 true, %retDividend
+ // ; br i1 %earlyRet, label %end, label %bb1
+ Builder.SetInsertPoint(SpecialCases);
+ Divisor = Builder.CreateFreeze(Divisor);
+ Dividend = Builder.CreateFreeze(Dividend);
+ Value *Ret0_1 = Builder.CreateICmpEQ(Divisor, Zero);
+ Value *Ret0_2 = Builder.CreateICmpEQ(Dividend, Zero);
+ Value *Ret0_3 = Builder.CreateOr(Ret0_1, Ret0_2);
+ Value *Tmp0 = Builder.CreateCall(CTLZ, {Divisor, True});
+ Value *Tmp1 = Builder.CreateCall(CTLZ, {Dividend, True});
+ Value *SR = Builder.CreateSub(Tmp0, Tmp1);
+ Value *Ret0_4 = Builder.CreateICmpUGT(SR, MSB);
+ Value *Ret0 = Builder.CreateLogicalOr(Ret0_3, Ret0_4);
+ Value *RetDividend = Builder.CreateICmpEQ(SR, MSB);
+ Value *RetVal = Builder.CreateSelect(Ret0, Zero, Dividend);
+ Value *EarlyRet = Builder.CreateLogicalOr(Ret0, RetDividend);
+ Builder.CreateCondBr(EarlyRet, End, BB1);
+
+ // ; bb1: ; preds = %special-cases
+ // ; %sr_1 = add i32 %sr, 1
+ // ; %tmp2 = sub i32 31, %sr
+ // ; %q = shl i32 %dividend, %tmp2
+ // ; %skipLoop = icmp eq i32 %sr_1, 0
+ // ; br i1 %skipLoop, label %loop-exit, label %preheader
+ Builder.SetInsertPoint(BB1);
+ Value *SR_1 = Builder.CreateAdd(SR, One);
+ Value *Tmp2 = Builder.CreateSub(MSB, SR);
+ Value *Q = Builder.CreateShl(Dividend, Tmp2);
+ Value *SkipLoop = Builder.CreateICmpEQ(SR_1, Zero);
+ Builder.CreateCondBr(SkipLoop, LoopExit, Preheader);
+
+ // ; preheader: ; preds = %bb1
+ // ; %tmp3 = lshr i32 %dividend, %sr_1
+ // ; %tmp4 = add i32 %divisor, -1
+ // ; br label %do-while
+ Builder.SetInsertPoint(Preheader);
+ Value *Tmp3 = Builder.CreateLShr(Dividend, SR_1);
+ Value *Tmp4 = Builder.CreateAdd(Divisor, NegOne);
+ Builder.CreateBr(DoWhile);
+
+ // ; do-while: ; preds = %do-while, %preheader
+ // ; %carry_1 = phi i32 [ 0, %preheader ], [ %carry, %do-while ]
+ // ; %sr_3 = phi i32 [ %sr_1, %preheader ], [ %sr_2, %do-while ]
+ // ; %r_1 = phi i32 [ %tmp3, %preheader ], [ %r, %do-while ]
+ // ; %q_2 = phi i32 [ %q, %preheader ], [ %q_1, %do-while ]
+ // ; %tmp5 = shl i32 %r_1, 1
+ // ; %tmp6 = lshr i32 %q_2, 31
+ // ; %tmp7 = or i32 %tmp5, %tmp6
+ // ; %tmp8 = shl i32 %q_2, 1
+ // ; %q_1 = or i32 %carry_1, %tmp8
+ // ; %tmp9 = sub i32 %tmp4, %tmp7
+ // ; %tmp10 = ashr i32 %tmp9, 31
+ // ; %carry = and i32 %tmp10, 1
+ // ; %tmp11 = and i32 %tmp10, %divisor
+ // ; %r = sub i32 %tmp7, %tmp11
+ // ; %sr_2 = add i32 %sr_3, -1
+ // ; %tmp12 = icmp eq i32 %sr_2, 0
+ // ; br i1 %tmp12, label %loop-exit, label %do-while
+ Builder.SetInsertPoint(DoWhile);
+ PHINode *Carry_1 = Builder.CreatePHI(DivTy, 2);
+ PHINode *SR_3 = Builder.CreatePHI(DivTy, 2);
+ PHINode *R_1 = Builder.CreatePHI(DivTy, 2);
+ PHINode *Q_2 = Builder.CreatePHI(DivTy, 2);
+ Value *Tmp5 = Builder.CreateShl(R_1, One);
+ Value *Tmp6 = Builder.CreateLShr(Q_2, MSB);
+ Value *Tmp7 = Builder.CreateOr(Tmp5, Tmp6);
+ Value *Tmp8 = Builder.CreateShl(Q_2, One);
+ Value *Q_1 = Builder.CreateOr(Carry_1, Tmp8);
+ Value *Tmp9 = Builder.CreateSub(Tmp4, Tmp7);
+ Value *Tmp10 = Builder.CreateAShr(Tmp9, MSB);
+ Value *Carry = Builder.CreateAnd(Tmp10, One);
+ Value *Tmp11 = Builder.CreateAnd(Tmp10, Divisor);
+ Value *R = Builder.CreateSub(Tmp7, Tmp11);
+ Value *SR_2 = Builder.CreateAdd(SR_3, NegOne);
+ Value *Tmp12 = Builder.CreateICmpEQ(SR_2, Zero);
+ Builder.CreateCondBr(Tmp12, LoopExit, DoWhile);
+
+ // ; loop-exit: ; preds = %do-while, %bb1
+ // ; %carry_2 = phi i32 [ 0, %bb1 ], [ %carry, %do-while ]
+ // ; %q_3 = phi i32 [ %q, %bb1 ], [ %q_1, %do-while ]
+ // ; %tmp13 = shl i32 %q_3, 1
+ // ; %q_4 = or i32 %carry_2, %tmp13
+ // ; br label %end
+ Builder.SetInsertPoint(LoopExit);
+ PHINode *Carry_2 = Builder.CreatePHI(DivTy, 2);
+ PHINode *Q_3 = Builder.CreatePHI(DivTy, 2);
+ Value *Tmp13 = Builder.CreateShl(Q_3, One);
+ Value *Q_4 = Builder.CreateOr(Carry_2, Tmp13);
+ Builder.CreateBr(End);
+
+ // ; end: ; preds = %loop-exit, %special-cases
+ // ; %q_5 = phi i32 [ %q_4, %loop-exit ], [ %retVal, %special-cases ]
+ // ; ret i32 %q_5
+ Builder.SetInsertPoint(End, End->begin());
+ PHINode *Q_5 = Builder.CreatePHI(DivTy, 2);
+
+ // Populate the Phis, since all values have now been created. Our Phis were:
+ // ; %carry_1 = phi i32 [ 0, %preheader ], [ %carry, %do-while ]
+ Carry_1->addIncoming(Zero, Preheader);
+ Carry_1->addIncoming(Carry, DoWhile);
+ // ; %sr_3 = phi i32 [ %sr_1, %preheader ], [ %sr_2, %do-while ]
+ SR_3->addIncoming(SR_1, Preheader);
+ SR_3->addIncoming(SR_2, DoWhile);
+ // ; %r_1 = phi i32 [ %tmp3, %preheader ], [ %r, %do-while ]
+ R_1->addIncoming(Tmp3, Preheader);
+ R_1->addIncoming(R, DoWhile);
+ // ; %q_2 = phi i32 [ %q, %preheader ], [ %q_1, %do-while ]
+ Q_2->addIncoming(Q, Preheader);
+ Q_2->addIncoming(Q_1, DoWhile);
+ // ; %carry_2 = phi i32 [ 0, %bb1 ], [ %carry, %do-while ]
+ Carry_2->addIncoming(Zero, BB1);
+ Carry_2->addIncoming(Carry, DoWhile);
+ // ; %q_3 = phi i32 [ %q, %bb1 ], [ %q_1, %do-while ]
+ Q_3->addIncoming(Q, BB1);
+ Q_3->addIncoming(Q_1, DoWhile);
+ // ; %q_5 = phi i32 [ %q_4, %loop-exit ], [ %retVal, %special-cases ]
+ Q_5->addIncoming(Q_4, LoopExit);
+ Q_5->addIncoming(RetVal, SpecialCases);
+
+ return Q_5;
+}
+
+/// Generate code to calculate the remainder of two integers, replacing Rem with
+/// the generated code. This currently generates code using the udiv expansion,
+/// but future work includes generating more specialized code, e.g. when more
+/// information about the operands are known.
+///
+/// Replace Rem with generated code.
+bool llvm::expandRemainder(BinaryOperator *Rem) {
+ assert((Rem->getOpcode() == Instruction::SRem ||
+ Rem->getOpcode() == Instruction::URem) &&
+ "Trying to expand remainder from a non-remainder function");
+
+ IRBuilder<> Builder(Rem);
+
+ assert(!Rem->getType()->isVectorTy() && "Div over vectors not supported");
+
+ // First prepare the sign if it's a signed remainder
+ if (Rem->getOpcode() == Instruction::SRem) {
+ Value *Remainder = generateSignedRemainderCode(Rem->getOperand(0),
+ Rem->getOperand(1), Builder);
+
+ // Check whether this is the insert point while Rem is still valid.
+ bool IsInsertPoint = Rem->getIterator() == Builder.GetInsertPoint();
+ Rem->replaceAllUsesWith(Remainder);
+ Rem->dropAllReferences();
+ Rem->eraseFromParent();
+
+ // If we didn't actually generate an urem instruction, we're done
+ // This happens for example if the input were constant. In this case the
+ // Builder insertion point was unchanged
+ if (IsInsertPoint)
+ return true;
+
+ BinaryOperator *BO = dyn_cast<BinaryOperator>(Builder.GetInsertPoint());
+ Rem = BO;
+ }
+
+ Value *Remainder = generatedUnsignedRemainderCode(Rem->getOperand(0),
+ Rem->getOperand(1),
+ Builder);
+
+ Rem->replaceAllUsesWith(Remainder);
+ Rem->dropAllReferences();
+ Rem->eraseFromParent();
+
+ // Expand the udiv
+ if (BinaryOperator *UDiv = dyn_cast<BinaryOperator>(Builder.GetInsertPoint())) {
+ assert(UDiv->getOpcode() == Instruction::UDiv && "Non-udiv in expansion?");
+ expandDivision(UDiv);
+ }
+
+ return true;
+}
+
+/// Generate code to divide two integers, replacing Div with the generated
+/// code. This currently generates code similarly to compiler-rt's
+/// implementations, but future work includes generating more specialized code
+/// when more information about the operands are known.
+///
+/// Replace Div with generated code.
+bool llvm::expandDivision(BinaryOperator *Div) {
+ assert((Div->getOpcode() == Instruction::SDiv ||
+ Div->getOpcode() == Instruction::UDiv) &&
+ "Trying to expand division from a non-division function");
+
+ IRBuilder<> Builder(Div);
+
+ assert(!Div->getType()->isVectorTy() && "Div over vectors not supported");
+
+ // First prepare the sign if it's a signed division
+ if (Div->getOpcode() == Instruction::SDiv) {
+ // Lower the code to unsigned division, and reset Div to point to the udiv.
+ Value *Quotient = generateSignedDivisionCode(Div->getOperand(0),
+ Div->getOperand(1), Builder);
+
+ // Check whether this is the insert point while Div is still valid.
+ bool IsInsertPoint = Div->getIterator() == Builder.GetInsertPoint();
+ Div->replaceAllUsesWith(Quotient);
+ Div->dropAllReferences();
+ Div->eraseFromParent();
+
+ // If we didn't actually generate an udiv instruction, we're done
+ // This happens for example if the input were constant. In this case the
+ // Builder insertion point was unchanged
+ if (IsInsertPoint)
+ return true;
+
+ BinaryOperator *BO = dyn_cast<BinaryOperator>(Builder.GetInsertPoint());
+ Div = BO;
+ }
+
+ // Insert the unsigned division code
+ Value *Quotient = generateUnsignedDivisionCode(Div->getOperand(0),
+ Div->getOperand(1),
+ Builder);
+ Div->replaceAllUsesWith(Quotient);
+ Div->dropAllReferences();
+ Div->eraseFromParent();
+
+ return true;
+}
+
+/// Generate code to compute the remainder of two integers of bitwidth up to
+/// 32 bits. Uses the above routines and extends the inputs/truncates the
+/// outputs to operate in 32 bits; that is, these routines are good for targets
+/// that have no or very little suppport for smaller than 32 bit integer
+/// arithmetic.
+///
+/// Replace Rem with emulation code.
+bool llvm::expandRemainderUpTo32Bits(BinaryOperator *Rem) {
+ assert((Rem->getOpcode() == Instruction::SRem ||
+ Rem->getOpcode() == Instruction::URem) &&
+ "Trying to expand remainder from a non-remainder function");
+
+ Type *RemTy = Rem->getType();
+ assert(!RemTy->isVectorTy() && "Div over vectors not supported");
+
+ unsigned RemTyBitWidth = RemTy->getIntegerBitWidth();
+
+ assert(RemTyBitWidth <= 32 &&
+ "Div of bitwidth greater than 32 not supported");
+
+ if (RemTyBitWidth == 32)
+ return expandRemainder(Rem);
+
+ // If bitwidth smaller than 32 extend inputs, extend output and proceed
+ // with 32 bit division.
+ IRBuilder<> Builder(Rem);
+
+ Value *ExtDividend;
+ Value *ExtDivisor;
+ Value *ExtRem;
+ Value *Trunc;
+ Type *Int32Ty = Builder.getInt32Ty();
+
+ if (Rem->getOpcode() == Instruction::SRem) {
+ ExtDividend = Builder.CreateSExt(Rem->getOperand(0), Int32Ty);
+ ExtDivisor = Builder.CreateSExt(Rem->getOperand(1), Int32Ty);
+ ExtRem = Builder.CreateSRem(ExtDividend, ExtDivisor);
+ } else {
+ ExtDividend = Builder.CreateZExt(Rem->getOperand(0), Int32Ty);
+ ExtDivisor = Builder.CreateZExt(Rem->getOperand(1), Int32Ty);
+ ExtRem = Builder.CreateURem(ExtDividend, ExtDivisor);
+ }
+ Trunc = Builder.CreateTrunc(ExtRem, RemTy);
+
+ Rem->replaceAllUsesWith(Trunc);
+ Rem->dropAllReferences();
+ Rem->eraseFromParent();
+
+ return expandRemainder(cast<BinaryOperator>(ExtRem));
+}
+
+/// Generate code to compute the remainder of two integers of bitwidth up to
+/// 64 bits. Uses the above routines and extends the inputs/truncates the
+/// outputs to operate in 64 bits.
+///
+/// Replace Rem with emulation code.
+bool llvm::expandRemainderUpTo64Bits(BinaryOperator *Rem) {
+ assert((Rem->getOpcode() == Instruction::SRem ||
+ Rem->getOpcode() == Instruction::URem) &&
+ "Trying to expand remainder from a non-remainder function");
+
+ Type *RemTy = Rem->getType();
+ assert(!RemTy->isVectorTy() && "Div over vectors not supported");
+
+ unsigned RemTyBitWidth = RemTy->getIntegerBitWidth();
+
+ if (RemTyBitWidth >= 64)
+ return expandRemainder(Rem);
+
+ // If bitwidth smaller than 64 extend inputs, extend output and proceed
+ // with 64 bit division.
+ IRBuilder<> Builder(Rem);
+
+ Value *ExtDividend;
+ Value *ExtDivisor;
+ Value *ExtRem;
+ Value *Trunc;
+ Type *Int64Ty = Builder.getInt64Ty();
+
+ if (Rem->getOpcode() == Instruction::SRem) {
+ ExtDividend = Builder.CreateSExt(Rem->getOperand(0), Int64Ty);
+ ExtDivisor = Builder.CreateSExt(Rem->getOperand(1), Int64Ty);
+ ExtRem = Builder.CreateSRem(ExtDividend, ExtDivisor);
+ } else {
+ ExtDividend = Builder.CreateZExt(Rem->getOperand(0), Int64Ty);
+ ExtDivisor = Builder.CreateZExt(Rem->getOperand(1), Int64Ty);
+ ExtRem = Builder.CreateURem(ExtDividend, ExtDivisor);
+ }
+ Trunc = Builder.CreateTrunc(ExtRem, RemTy);
+
+ Rem->replaceAllUsesWith(Trunc);
+ Rem->dropAllReferences();
+ Rem->eraseFromParent();
+
+ return expandRemainder(cast<BinaryOperator>(ExtRem));
+}
+
+/// Generate code to divide two integers of bitwidth up to 32 bits. Uses the
+/// above routines and extends the inputs/truncates the outputs to operate
+/// in 32 bits; that is, these routines are good for targets that have no
+/// or very little support for smaller than 32 bit integer arithmetic.
+///
+/// Replace Div with emulation code.
+bool llvm::expandDivisionUpTo32Bits(BinaryOperator *Div) {
+ assert((Div->getOpcode() == Instruction::SDiv ||
+ Div->getOpcode() == Instruction::UDiv) &&
+ "Trying to expand division from a non-division function");
+
+ Type *DivTy = Div->getType();
+ assert(!DivTy->isVectorTy() && "Div over vectors not supported");
+
+ unsigned DivTyBitWidth = DivTy->getIntegerBitWidth();
+
+ assert(DivTyBitWidth <= 32 && "Div of bitwidth greater than 32 not supported");
+
+ if (DivTyBitWidth == 32)
+ return expandDivision(Div);
+
+ // If bitwidth smaller than 32 extend inputs, extend output and proceed
+ // with 32 bit division.
+ IRBuilder<> Builder(Div);
+
+ Value *ExtDividend;
+ Value *ExtDivisor;
+ Value *ExtDiv;
+ Value *Trunc;
+ Type *Int32Ty = Builder.getInt32Ty();
+
+ if (Div->getOpcode() == Instruction::SDiv) {
+ ExtDividend = Builder.CreateSExt(Div->getOperand(0), Int32Ty);
+ ExtDivisor = Builder.CreateSExt(Div->getOperand(1), Int32Ty);
+ ExtDiv = Builder.CreateSDiv(ExtDividend, ExtDivisor);
+ } else {
+ ExtDividend = Builder.CreateZExt(Div->getOperand(0), Int32Ty);
+ ExtDivisor = Builder.CreateZExt(Div->getOperand(1), Int32Ty);
+ ExtDiv = Builder.CreateUDiv(ExtDividend, ExtDivisor);
+ }
+ Trunc = Builder.CreateTrunc(ExtDiv, DivTy);
+
+ Div->replaceAllUsesWith(Trunc);
+ Div->dropAllReferences();
+ Div->eraseFromParent();
+
+ return expandDivision(cast<BinaryOperator>(ExtDiv));
+}
+
+/// Generate code to divide two integers of bitwidth up to 64 bits. Uses the
+/// above routines and extends the inputs/truncates the outputs to operate
+/// in 64 bits.
+///
+/// Replace Div with emulation code.
+bool llvm::expandDivisionUpTo64Bits(BinaryOperator *Div) {
+ assert((Div->getOpcode() == Instruction::SDiv ||
+ Div->getOpcode() == Instruction::UDiv) &&
+ "Trying to expand division from a non-division function");
+
+ Type *DivTy = Div->getType();
+ assert(!DivTy->isVectorTy() && "Div over vectors not supported");
+
+ unsigned DivTyBitWidth = DivTy->getIntegerBitWidth();
+
+ if (DivTyBitWidth >= 64)
+ return expandDivision(Div);
+
+ // If bitwidth smaller than 64 extend inputs, extend output and proceed
+ // with 64 bit division.
+ IRBuilder<> Builder(Div);
+
+ Value *ExtDividend;
+ Value *ExtDivisor;
+ Value *ExtDiv;
+ Value *Trunc;
+ Type *Int64Ty = Builder.getInt64Ty();
+
+ if (Div->getOpcode() == Instruction::SDiv) {
+ ExtDividend = Builder.CreateSExt(Div->getOperand(0), Int64Ty);
+ ExtDivisor = Builder.CreateSExt(Div->getOperand(1), Int64Ty);
+ ExtDiv = Builder.CreateSDiv(ExtDividend, ExtDivisor);
+ } else {
+ ExtDividend = Builder.CreateZExt(Div->getOperand(0), Int64Ty);
+ ExtDivisor = Builder.CreateZExt(Div->getOperand(1), Int64Ty);
+ ExtDiv = Builder.CreateUDiv(ExtDividend, ExtDivisor);
+ }
+ Trunc = Builder.CreateTrunc(ExtDiv, DivTy);
+
+ Div->replaceAllUsesWith(Trunc);
+ Div->dropAllReferences();
+ Div->eraseFromParent();
+
+ return expandDivision(cast<BinaryOperator>(ExtDiv));
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/LCSSA.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/LCSSA.cpp
new file mode 100644
index 0000000000..af79dc456e
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/LCSSA.cpp
@@ -0,0 +1,519 @@
+//===-- LCSSA.cpp - Convert loops into loop-closed SSA form ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass transforms loops by placing phi nodes at the end of the loops for
+// all values that are live across the loop boundary. For example, it turns
+// the left into the right code:
+//
+// for (...) for (...)
+// if (c) if (c)
+// X1 = ... X1 = ...
+// else else
+// X2 = ... X2 = ...
+// X3 = phi(X1, X2) X3 = phi(X1, X2)
+// ... = X3 + 4 X4 = phi(X3)
+// ... = X4 + 4
+//
+// This is still valid LLVM; the extra phi nodes are purely redundant, and will
+// be trivially eliminated by InstCombine. The major benefit of this
+// transformation is that it makes many other loop optimizations, such as
+// LoopUnswitching, simpler.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/LCSSA.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/PredIteratorCache.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "lcssa"
+
+STATISTIC(NumLCSSA, "Number of live out of a loop variables");
+
+#ifdef EXPENSIVE_CHECKS
+static bool VerifyLoopLCSSA = true;
+#else
+static bool VerifyLoopLCSSA = false;
+#endif
+static cl::opt<bool, true>
+ VerifyLoopLCSSAFlag("verify-loop-lcssa", cl::location(VerifyLoopLCSSA),
+ cl::Hidden,
+ cl::desc("Verify loop lcssa form (time consuming)"));
+
+/// Return true if the specified block is in the list.
+static bool isExitBlock(BasicBlock *BB,
+ const SmallVectorImpl<BasicBlock *> &ExitBlocks) {
+ return is_contained(ExitBlocks, BB);
+}
+
+/// For every instruction from the worklist, check to see if it has any uses
+/// that are outside the current loop. If so, insert LCSSA PHI nodes and
+/// rewrite the uses.
+bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
+ const DominatorTree &DT, const LoopInfo &LI,
+ ScalarEvolution *SE, IRBuilderBase &Builder,
+ SmallVectorImpl<PHINode *> *PHIsToRemove) {
+ SmallVector<Use *, 16> UsesToRewrite;
+ SmallSetVector<PHINode *, 16> LocalPHIsToRemove;
+ PredIteratorCache PredCache;
+ bool Changed = false;
+
+ IRBuilderBase::InsertPointGuard InsertPtGuard(Builder);
+
+ // Cache the Loop ExitBlocks across this loop. We expect to get a lot of
+ // instructions within the same loops, computing the exit blocks is
+ // expensive, and we're not mutating the loop structure.
+ SmallDenseMap<Loop*, SmallVector<BasicBlock *,1>> LoopExitBlocks;
+
+ while (!Worklist.empty()) {
+ UsesToRewrite.clear();
+
+ Instruction *I = Worklist.pop_back_val();
+ assert(!I->getType()->isTokenTy() && "Tokens shouldn't be in the worklist");
+ BasicBlock *InstBB = I->getParent();
+ Loop *L = LI.getLoopFor(InstBB);
+ assert(L && "Instruction belongs to a BB that's not part of a loop");
+ if (!LoopExitBlocks.count(L))
+ L->getExitBlocks(LoopExitBlocks[L]);
+ assert(LoopExitBlocks.count(L));
+ const SmallVectorImpl<BasicBlock *> &ExitBlocks = LoopExitBlocks[L];
+
+ if (ExitBlocks.empty())
+ continue;
+
+ for (Use &U : make_early_inc_range(I->uses())) {
+ Instruction *User = cast<Instruction>(U.getUser());
+ BasicBlock *UserBB = User->getParent();
+
+ // Skip uses in unreachable blocks.
+ if (!DT.isReachableFromEntry(UserBB)) {
+ U.set(PoisonValue::get(I->getType()));
+ continue;
+ }
+
+ // For practical purposes, we consider that the use in a PHI
+ // occurs in the respective predecessor block. For more info,
+ // see the `phi` doc in LangRef and the LCSSA doc.
+ if (auto *PN = dyn_cast<PHINode>(User))
+ UserBB = PN->getIncomingBlock(U);
+
+ if (InstBB != UserBB && !L->contains(UserBB))
+ UsesToRewrite.push_back(&U);
+ }
+
+ // If there are no uses outside the loop, exit with no change.
+ if (UsesToRewrite.empty())
+ continue;
+
+ ++NumLCSSA; // We are applying the transformation
+
+ // Invoke instructions are special in that their result value is not
+ // available along their unwind edge. The code below tests to see whether
+ // DomBB dominates the value, so adjust DomBB to the normal destination
+ // block, which is effectively where the value is first usable.
+ BasicBlock *DomBB = InstBB;
+ if (auto *Inv = dyn_cast<InvokeInst>(I))
+ DomBB = Inv->getNormalDest();
+
+ const DomTreeNode *DomNode = DT.getNode(DomBB);
+
+ SmallVector<PHINode *, 16> AddedPHIs;
+ SmallVector<PHINode *, 8> PostProcessPHIs;
+
+ SmallVector<PHINode *, 4> InsertedPHIs;
+ SSAUpdater SSAUpdate(&InsertedPHIs);
+ SSAUpdate.Initialize(I->getType(), I->getName());
+
+ // Force re-computation of I, as some users now need to use the new PHI
+ // node.
+ if (SE)
+ SE->forgetValue(I);
+
+ // Insert the LCSSA phi's into all of the exit blocks dominated by the
+ // value, and add them to the Phi's map.
+ for (BasicBlock *ExitBB : ExitBlocks) {
+ if (!DT.dominates(DomNode, DT.getNode(ExitBB)))
+ continue;
+
+ // If we already inserted something for this BB, don't reprocess it.
+ if (SSAUpdate.HasValueForBlock(ExitBB))
+ continue;
+ Builder.SetInsertPoint(&ExitBB->front());
+ PHINode *PN = Builder.CreatePHI(I->getType(), PredCache.size(ExitBB),
+ I->getName() + ".lcssa");
+ // Get the debug location from the original instruction.
+ PN->setDebugLoc(I->getDebugLoc());
+
+ // Add inputs from inside the loop for this PHI. This is valid
+ // because `I` dominates `ExitBB` (checked above). This implies
+ // that every incoming block/edge is dominated by `I` as well,
+ // i.e. we can add uses of `I` to those incoming edges/append to the incoming
+ // blocks without violating the SSA dominance property.
+ for (BasicBlock *Pred : PredCache.get(ExitBB)) {
+ PN->addIncoming(I, Pred);
+
+ // If the exit block has a predecessor not within the loop, arrange for
+ // the incoming value use corresponding to that predecessor to be
+ // rewritten in terms of a different LCSSA PHI.
+ if (!L->contains(Pred))
+ UsesToRewrite.push_back(
+ &PN->getOperandUse(PN->getOperandNumForIncomingValue(
+ PN->getNumIncomingValues() - 1)));
+ }
+
+ AddedPHIs.push_back(PN);
+
+ // Remember that this phi makes the value alive in this block.
+ SSAUpdate.AddAvailableValue(ExitBB, PN);
+
+ // LoopSimplify might fail to simplify some loops (e.g. when indirect
+ // branches are involved). In such situations, it might happen that an
+ // exit for Loop L1 is the header of a disjoint Loop L2. Thus, when we
+ // create PHIs in such an exit block, we are also inserting PHIs into L2's
+ // header. This could break LCSSA form for L2 because these inserted PHIs
+ // can also have uses outside of L2. Remember all PHIs in such situation
+ // as to revisit than later on. FIXME: Remove this if indirectbr support
+ // into LoopSimplify gets improved.
+ if (auto *OtherLoop = LI.getLoopFor(ExitBB))
+ if (!L->contains(OtherLoop))
+ PostProcessPHIs.push_back(PN);
+ }
+
+ // Rewrite all uses outside the loop in terms of the new PHIs we just
+ // inserted.
+ for (Use *UseToRewrite : UsesToRewrite) {
+ Instruction *User = cast<Instruction>(UseToRewrite->getUser());
+ BasicBlock *UserBB = User->getParent();
+
+ // For practical purposes, we consider that the use in a PHI
+ // occurs in the respective predecessor block. For more info,
+ // see the `phi` doc in LangRef and the LCSSA doc.
+ if (auto *PN = dyn_cast<PHINode>(User))
+ UserBB = PN->getIncomingBlock(*UseToRewrite);
+
+ // If this use is in an exit block, rewrite to use the newly inserted PHI.
+ // This is required for correctness because SSAUpdate doesn't handle uses
+ // in the same block. It assumes the PHI we inserted is at the end of the
+ // block.
+ if (isa<PHINode>(UserBB->begin()) && isExitBlock(UserBB, ExitBlocks)) {
+ UseToRewrite->set(&UserBB->front());
+ continue;
+ }
+
+ // If we added a single PHI, it must dominate all uses and we can directly
+ // rename it.
+ if (AddedPHIs.size() == 1) {
+ UseToRewrite->set(AddedPHIs[0]);
+ continue;
+ }
+
+ // Otherwise, do full PHI insertion.
+ SSAUpdate.RewriteUse(*UseToRewrite);
+ }
+
+ SmallVector<DbgValueInst *, 4> DbgValues;
+ llvm::findDbgValues(DbgValues, I);
+
+ // Update pre-existing debug value uses that reside outside the loop.
+ for (auto *DVI : DbgValues) {
+ BasicBlock *UserBB = DVI->getParent();
+ if (InstBB == UserBB || L->contains(UserBB))
+ continue;
+ // We currently only handle debug values residing in blocks that were
+ // traversed while rewriting the uses. If we inserted just a single PHI,
+ // we will handle all relevant debug values.
+ Value *V = AddedPHIs.size() == 1 ? AddedPHIs[0]
+ : SSAUpdate.FindValueForBlock(UserBB);
+ if (V)
+ DVI->replaceVariableLocationOp(I, V);
+ }
+
+ // SSAUpdater might have inserted phi-nodes inside other loops. We'll need
+ // to post-process them to keep LCSSA form.
+ for (PHINode *InsertedPN : InsertedPHIs) {
+ if (auto *OtherLoop = LI.getLoopFor(InsertedPN->getParent()))
+ if (!L->contains(OtherLoop))
+ PostProcessPHIs.push_back(InsertedPN);
+ }
+
+ // Post process PHI instructions that were inserted into another disjoint
+ // loop and update their exits properly.
+ for (auto *PostProcessPN : PostProcessPHIs)
+ if (!PostProcessPN->use_empty())
+ Worklist.push_back(PostProcessPN);
+
+ // Keep track of PHI nodes that we want to remove because they did not have
+ // any uses rewritten.
+ for (PHINode *PN : AddedPHIs)
+ if (PN->use_empty())
+ LocalPHIsToRemove.insert(PN);
+
+ Changed = true;
+ }
+
+ // Remove PHI nodes that did not have any uses rewritten or add them to
+ // PHIsToRemove, so the caller can remove them after some additional cleanup.
+ // We need to redo the use_empty() check here, because even if the PHI node
+ // wasn't used when added to LocalPHIsToRemove, later added PHI nodes can be
+ // using it. This cleanup is not guaranteed to handle trees/cycles of PHI
+ // nodes that only are used by each other. Such situations has only been
+ // noticed when the input IR contains unreachable code, and leaving some extra
+ // redundant PHI nodes in such situations is considered a minor problem.
+ if (PHIsToRemove) {
+ PHIsToRemove->append(LocalPHIsToRemove.begin(), LocalPHIsToRemove.end());
+ } else {
+ for (PHINode *PN : LocalPHIsToRemove)
+ if (PN->use_empty())
+ PN->eraseFromParent();
+ }
+ return Changed;
+}
+
+// Compute the set of BasicBlocks in the loop `L` dominating at least one exit.
+static void computeBlocksDominatingExits(
+ Loop &L, const DominatorTree &DT, SmallVector<BasicBlock *, 8> &ExitBlocks,
+ SmallSetVector<BasicBlock *, 8> &BlocksDominatingExits) {
+ // We start from the exit blocks, as every block trivially dominates itself
+ // (not strictly).
+ SmallVector<BasicBlock *, 8> BBWorklist(ExitBlocks);
+
+ while (!BBWorklist.empty()) {
+ BasicBlock *BB = BBWorklist.pop_back_val();
+
+ // Check if this is a loop header. If this is the case, we're done.
+ if (L.getHeader() == BB)
+ continue;
+
+ // Otherwise, add its immediate predecessor in the dominator tree to the
+ // worklist, unless we visited it already.
+ BasicBlock *IDomBB = DT.getNode(BB)->getIDom()->getBlock();
+
+ // Exit blocks can have an immediate dominator not belonging to the
+ // loop. For an exit block to be immediately dominated by another block
+ // outside the loop, it implies not all paths from that dominator, to the
+ // exit block, go through the loop.
+ // Example:
+ //
+ // |---- A
+ // | |
+ // | B<--
+ // | | |
+ // |---> C --
+ // |
+ // D
+ //
+ // C is the exit block of the loop and it's immediately dominated by A,
+ // which doesn't belong to the loop.
+ if (!L.contains(IDomBB))
+ continue;
+
+ if (BlocksDominatingExits.insert(IDomBB))
+ BBWorklist.push_back(IDomBB);
+ }
+}
+
+bool llvm::formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI,
+ ScalarEvolution *SE) {
+ bool Changed = false;
+
+#ifdef EXPENSIVE_CHECKS
+ // Verify all sub-loops are in LCSSA form already.
+ for (Loop *SubLoop: L) {
+ (void)SubLoop; // Silence unused variable warning.
+ assert(SubLoop->isRecursivelyLCSSAForm(DT, *LI) && "Subloop not in LCSSA!");
+ }
+#endif
+
+ SmallVector<BasicBlock *, 8> ExitBlocks;
+ L.getExitBlocks(ExitBlocks);
+ if (ExitBlocks.empty())
+ return false;
+
+ SmallSetVector<BasicBlock *, 8> BlocksDominatingExits;
+
+ // We want to avoid use-scanning leveraging dominance informations.
+ // If a block doesn't dominate any of the loop exits, the none of the values
+ // defined in the loop can be used outside.
+ // We compute the set of blocks fullfilling the conditions in advance
+ // walking the dominator tree upwards until we hit a loop header.
+ computeBlocksDominatingExits(L, DT, ExitBlocks, BlocksDominatingExits);
+
+ SmallVector<Instruction *, 8> Worklist;
+
+ // Look at all the instructions in the loop, checking to see if they have uses
+ // outside the loop. If so, put them into the worklist to rewrite those uses.
+ for (BasicBlock *BB : BlocksDominatingExits) {
+ // Skip blocks that are part of any sub-loops, they must be in LCSSA
+ // already.
+ if (LI->getLoopFor(BB) != &L)
+ continue;
+ for (Instruction &I : *BB) {
+ // Reject two common cases fast: instructions with no uses (like stores)
+ // and instructions with one use that is in the same block as this.
+ if (I.use_empty() ||
+ (I.hasOneUse() && I.user_back()->getParent() == BB &&
+ !isa<PHINode>(I.user_back())))
+ continue;
+
+ // Tokens cannot be used in PHI nodes, so we skip over them.
+ // We can run into tokens which are live out of a loop with catchswitch
+ // instructions in Windows EH if the catchswitch has one catchpad which
+ // is inside the loop and another which is not.
+ if (I.getType()->isTokenTy())
+ continue;
+
+ Worklist.push_back(&I);
+ }
+ }
+
+ IRBuilder<> Builder(L.getHeader()->getContext());
+ Changed = formLCSSAForInstructions(Worklist, DT, *LI, SE, Builder);
+
+ // If we modified the code, remove any caches about the loop from SCEV to
+ // avoid dangling entries.
+ // FIXME: This is a big hammer, can we clear the cache more selectively?
+ if (SE && Changed)
+ SE->forgetLoop(&L);
+
+ assert(L.isLCSSAForm(DT));
+
+ return Changed;
+}
+
+/// Process a loop nest depth first.
+bool llvm::formLCSSARecursively(Loop &L, const DominatorTree &DT,
+ const LoopInfo *LI, ScalarEvolution *SE) {
+ bool Changed = false;
+
+ // Recurse depth-first through inner loops.
+ for (Loop *SubLoop : L.getSubLoops())
+ Changed |= formLCSSARecursively(*SubLoop, DT, LI, SE);
+
+ Changed |= formLCSSA(L, DT, LI, SE);
+ return Changed;
+}
+
+/// Process all loops in the function, inner-most out.
+static bool formLCSSAOnAllLoops(const LoopInfo *LI, const DominatorTree &DT,
+ ScalarEvolution *SE) {
+ bool Changed = false;
+ for (const auto &L : *LI)
+ Changed |= formLCSSARecursively(*L, DT, LI, SE);
+ return Changed;
+}
+
+namespace {
+struct LCSSAWrapperPass : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ LCSSAWrapperPass() : FunctionPass(ID) {
+ initializeLCSSAWrapperPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ // Cached analysis information for the current function.
+ DominatorTree *DT;
+ LoopInfo *LI;
+ ScalarEvolution *SE;
+
+ bool runOnFunction(Function &F) override;
+ void verifyAnalysis() const override {
+ // This check is very expensive. On the loop intensive compiles it may cause
+ // up to 10x slowdown. Currently it's disabled by default. LPPassManager
+ // always does limited form of the LCSSA verification. Similar reasoning
+ // was used for the LoopInfo verifier.
+ if (VerifyLoopLCSSA) {
+ assert(all_of(*LI,
+ [&](Loop *L) {
+ return L->isRecursivelyLCSSAForm(*DT, *LI);
+ }) &&
+ "LCSSA form is broken!");
+ }
+ };
+
+ /// This transformation requires natural loop information & requires that
+ /// loop preheaders be inserted into the CFG. It maintains both of these,
+ /// as well as the CFG. It also requires dominator information.
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addPreserved<AAResultsWrapperPass>();
+ AU.addPreserved<BasicAAWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
+ AU.addPreserved<SCEVAAWrapperPass>();
+ AU.addPreserved<BranchProbabilityInfoWrapperPass>();
+ AU.addPreserved<MemorySSAWrapperPass>();
+
+ // This is needed to perform LCSSA verification inside LPPassManager
+ AU.addRequired<LCSSAVerificationPass>();
+ AU.addPreserved<LCSSAVerificationPass>();
+ }
+};
+}
+
+char LCSSAWrapperPass::ID = 0;
+INITIALIZE_PASS_BEGIN(LCSSAWrapperPass, "lcssa", "Loop-Closed SSA Form Pass",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LCSSAVerificationPass)
+INITIALIZE_PASS_END(LCSSAWrapperPass, "lcssa", "Loop-Closed SSA Form Pass",
+ false, false)
+
+Pass *llvm::createLCSSAPass() { return new LCSSAWrapperPass(); }
+char &llvm::LCSSAID = LCSSAWrapperPass::ID;
+
+/// Transform \p F into loop-closed SSA form.
+bool LCSSAWrapperPass::runOnFunction(Function &F) {
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
+ SE = SEWP ? &SEWP->getSE() : nullptr;
+
+ return formLCSSAOnAllLoops(LI, *DT, SE);
+}
+
+PreservedAnalyses LCSSAPass::run(Function &F, FunctionAnalysisManager &AM) {
+ auto &LI = AM.getResult<LoopAnalysis>(F);
+ auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+ auto *SE = AM.getCachedResult<ScalarEvolutionAnalysis>(F);
+ if (!formLCSSAOnAllLoops(&LI, DT, SE))
+ return PreservedAnalyses::all();
+
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ PA.preserve<ScalarEvolutionAnalysis>();
+ // BPI maps terminators to probabilities, since we don't modify the CFG, no
+ // updates are needed to preserve it.
+ PA.preserve<BranchProbabilityAnalysis>();
+ PA.preserve<MemorySSAAnalysis>();
+ return PA;
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/LibCallsShrinkWrap.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
new file mode 100644
index 0000000000..5dd469c7af
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
@@ -0,0 +1,562 @@
+//===-- LibCallsShrinkWrap.cpp ----------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass shrink-wraps a call to function if the result is not used.
+// The call can set errno but is otherwise side effect free. For example:
+// sqrt(val);
+// is transformed to
+// if (val < 0)
+// sqrt(val);
+// Even if the result of library call is not being used, the compiler cannot
+// safely delete the call because the function can set errno on error
+// conditions.
+// Note in many functions, the error condition solely depends on the incoming
+// parameter. In this optimization, we can generate the condition can lead to
+// the errno to shrink-wrap the call. Since the chances of hitting the error
+// condition is low, the runtime call is effectively eliminated.
+//
+// These partially dead calls are usually results of C++ abstraction penalty
+// exposed by inlining.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/LibCallsShrinkWrap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+#include <cmath>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "libcalls-shrinkwrap"
+
+STATISTIC(NumWrappedOneCond, "Number of One-Condition Wrappers Inserted");
+STATISTIC(NumWrappedTwoCond, "Number of Two-Condition Wrappers Inserted");
+
+namespace {
+class LibCallsShrinkWrapLegacyPass : public FunctionPass {
+public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit LibCallsShrinkWrapLegacyPass() : FunctionPass(ID) {
+ initializeLibCallsShrinkWrapLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ bool runOnFunction(Function &F) override;
+};
+}
+
+char LibCallsShrinkWrapLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(LibCallsShrinkWrapLegacyPass, "libcalls-shrinkwrap",
+ "Conditionally eliminate dead library calls", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(LibCallsShrinkWrapLegacyPass, "libcalls-shrinkwrap",
+ "Conditionally eliminate dead library calls", false, false)
+
+namespace {
+class LibCallsShrinkWrap : public InstVisitor<LibCallsShrinkWrap> {
+public:
+ LibCallsShrinkWrap(const TargetLibraryInfo &TLI, DominatorTree *DT)
+ : TLI(TLI), DT(DT){};
+ void visitCallInst(CallInst &CI) { checkCandidate(CI); }
+ bool perform() {
+ bool Changed = false;
+ for (auto &CI : WorkList) {
+ LLVM_DEBUG(dbgs() << "CDCE calls: " << CI->getCalledFunction()->getName()
+ << "\n");
+ if (perform(CI)) {
+ Changed = true;
+ LLVM_DEBUG(dbgs() << "Transformed\n");
+ }
+ }
+ return Changed;
+ }
+
+private:
+ bool perform(CallInst *CI);
+ void checkCandidate(CallInst &CI);
+ void shrinkWrapCI(CallInst *CI, Value *Cond);
+ bool performCallDomainErrorOnly(CallInst *CI, const LibFunc &Func);
+ bool performCallErrors(CallInst *CI, const LibFunc &Func);
+ bool performCallRangeErrorOnly(CallInst *CI, const LibFunc &Func);
+ Value *generateOneRangeCond(CallInst *CI, const LibFunc &Func);
+ Value *generateTwoRangeCond(CallInst *CI, const LibFunc &Func);
+ Value *generateCondForPow(CallInst *CI, const LibFunc &Func);
+
+ // Create an OR of two conditions.
+ Value *createOrCond(CallInst *CI, CmpInst::Predicate Cmp, float Val,
+ CmpInst::Predicate Cmp2, float Val2) {
+ IRBuilder<> BBBuilder(CI);
+ Value *Arg = CI->getArgOperand(0);
+ auto Cond2 = createCond(BBBuilder, Arg, Cmp2, Val2);
+ auto Cond1 = createCond(BBBuilder, Arg, Cmp, Val);
+ return BBBuilder.CreateOr(Cond1, Cond2);
+ }
+
+ // Create a single condition using IRBuilder.
+ Value *createCond(IRBuilder<> &BBBuilder, Value *Arg, CmpInst::Predicate Cmp,
+ float Val) {
+ Constant *V = ConstantFP::get(BBBuilder.getContext(), APFloat(Val));
+ if (!Arg->getType()->isFloatTy())
+ V = ConstantExpr::getFPExtend(V, Arg->getType());
+ return BBBuilder.CreateFCmp(Cmp, Arg, V);
+ }
+
+ // Create a single condition.
+ Value *createCond(CallInst *CI, CmpInst::Predicate Cmp, float Val) {
+ IRBuilder<> BBBuilder(CI);
+ Value *Arg = CI->getArgOperand(0);
+ return createCond(BBBuilder, Arg, Cmp, Val);
+ }
+
+ const TargetLibraryInfo &TLI;
+ DominatorTree *DT;
+ SmallVector<CallInst *, 16> WorkList;
+};
+} // end anonymous namespace
+
+// Perform the transformation to calls with errno set by domain error.
+bool LibCallsShrinkWrap::performCallDomainErrorOnly(CallInst *CI,
+ const LibFunc &Func) {
+ Value *Cond = nullptr;
+
+ switch (Func) {
+ case LibFunc_acos: // DomainError: (x < -1 || x > 1)
+ case LibFunc_acosf: // Same as acos
+ case LibFunc_acosl: // Same as acos
+ case LibFunc_asin: // DomainError: (x < -1 || x > 1)
+ case LibFunc_asinf: // Same as asin
+ case LibFunc_asinl: // Same as asin
+ {
+ ++NumWrappedTwoCond;
+ Cond = createOrCond(CI, CmpInst::FCMP_OLT, -1.0f, CmpInst::FCMP_OGT, 1.0f);
+ break;
+ }
+ case LibFunc_cos: // DomainError: (x == +inf || x == -inf)
+ case LibFunc_cosf: // Same as cos
+ case LibFunc_cosl: // Same as cos
+ case LibFunc_sin: // DomainError: (x == +inf || x == -inf)
+ case LibFunc_sinf: // Same as sin
+ case LibFunc_sinl: // Same as sin
+ {
+ ++NumWrappedTwoCond;
+ Cond = createOrCond(CI, CmpInst::FCMP_OEQ, INFINITY, CmpInst::FCMP_OEQ,
+ -INFINITY);
+ break;
+ }
+ case LibFunc_acosh: // DomainError: (x < 1)
+ case LibFunc_acoshf: // Same as acosh
+ case LibFunc_acoshl: // Same as acosh
+ {
+ ++NumWrappedOneCond;
+ Cond = createCond(CI, CmpInst::FCMP_OLT, 1.0f);
+ break;
+ }
+ case LibFunc_sqrt: // DomainError: (x < 0)
+ case LibFunc_sqrtf: // Same as sqrt
+ case LibFunc_sqrtl: // Same as sqrt
+ {
+ ++NumWrappedOneCond;
+ Cond = createCond(CI, CmpInst::FCMP_OLT, 0.0f);
+ break;
+ }
+ default:
+ return false;
+ }
+ shrinkWrapCI(CI, Cond);
+ return true;
+}
+
+// Perform the transformation to calls with errno set by range error.
+bool LibCallsShrinkWrap::performCallRangeErrorOnly(CallInst *CI,
+ const LibFunc &Func) {
+ Value *Cond = nullptr;
+
+ switch (Func) {
+ case LibFunc_cosh:
+ case LibFunc_coshf:
+ case LibFunc_coshl:
+ case LibFunc_exp:
+ case LibFunc_expf:
+ case LibFunc_expl:
+ case LibFunc_exp10:
+ case LibFunc_exp10f:
+ case LibFunc_exp10l:
+ case LibFunc_exp2:
+ case LibFunc_exp2f:
+ case LibFunc_exp2l:
+ case LibFunc_sinh:
+ case LibFunc_sinhf:
+ case LibFunc_sinhl: {
+ Cond = generateTwoRangeCond(CI, Func);
+ break;
+ }
+ case LibFunc_expm1: // RangeError: (709, inf)
+ case LibFunc_expm1f: // RangeError: (88, inf)
+ case LibFunc_expm1l: // RangeError: (11356, inf)
+ {
+ Cond = generateOneRangeCond(CI, Func);
+ break;
+ }
+ default:
+ return false;
+ }
+ shrinkWrapCI(CI, Cond);
+ return true;
+}
+
+// Perform the transformation to calls with errno set by combination of errors.
+bool LibCallsShrinkWrap::performCallErrors(CallInst *CI,
+ const LibFunc &Func) {
+ Value *Cond = nullptr;
+
+ switch (Func) {
+ case LibFunc_atanh: // DomainError: (x < -1 || x > 1)
+ // PoleError: (x == -1 || x == 1)
+ // Overall Cond: (x <= -1 || x >= 1)
+ case LibFunc_atanhf: // Same as atanh
+ case LibFunc_atanhl: // Same as atanh
+ {
+ ++NumWrappedTwoCond;
+ Cond = createOrCond(CI, CmpInst::FCMP_OLE, -1.0f, CmpInst::FCMP_OGE, 1.0f);
+ break;
+ }
+ case LibFunc_log: // DomainError: (x < 0)
+ // PoleError: (x == 0)
+ // Overall Cond: (x <= 0)
+ case LibFunc_logf: // Same as log
+ case LibFunc_logl: // Same as log
+ case LibFunc_log10: // Same as log
+ case LibFunc_log10f: // Same as log
+ case LibFunc_log10l: // Same as log
+ case LibFunc_log2: // Same as log
+ case LibFunc_log2f: // Same as log
+ case LibFunc_log2l: // Same as log
+ case LibFunc_logb: // Same as log
+ case LibFunc_logbf: // Same as log
+ case LibFunc_logbl: // Same as log
+ {
+ ++NumWrappedOneCond;
+ Cond = createCond(CI, CmpInst::FCMP_OLE, 0.0f);
+ break;
+ }
+ case LibFunc_log1p: // DomainError: (x < -1)
+ // PoleError: (x == -1)
+ // Overall Cond: (x <= -1)
+ case LibFunc_log1pf: // Same as log1p
+ case LibFunc_log1pl: // Same as log1p
+ {
+ ++NumWrappedOneCond;
+ Cond = createCond(CI, CmpInst::FCMP_OLE, -1.0f);
+ break;
+ }
+ case LibFunc_pow: // DomainError: x < 0 and y is noninteger
+ // PoleError: x == 0 and y < 0
+ // RangeError: overflow or underflow
+ case LibFunc_powf:
+ case LibFunc_powl: {
+ Cond = generateCondForPow(CI, Func);
+ if (Cond == nullptr)
+ return false;
+ break;
+ }
+ default:
+ return false;
+ }
+ assert(Cond && "performCallErrors should not see an empty condition");
+ shrinkWrapCI(CI, Cond);
+ return true;
+}
+
+// Checks if CI is a candidate for shrinkwrapping and put it into work list if
+// true.
+void LibCallsShrinkWrap::checkCandidate(CallInst &CI) {
+ if (CI.isNoBuiltin())
+ return;
+ // A possible improvement is to handle the calls with the return value being
+ // used. If there is API for fast libcall implementation without setting
+ // errno, we can use the same framework to direct/wrap the call to the fast
+ // API in the error free path, and leave the original call in the slow path.
+ if (!CI.use_empty())
+ return;
+
+ LibFunc Func;
+ Function *Callee = CI.getCalledFunction();
+ if (!Callee)
+ return;
+ if (!TLI.getLibFunc(*Callee, Func) || !TLI.has(Func))
+ return;
+
+ if (CI.arg_empty())
+ return;
+ // TODO: Handle long double in other formats.
+ Type *ArgType = CI.getArgOperand(0)->getType();
+ if (!(ArgType->isFloatTy() || ArgType->isDoubleTy() ||
+ ArgType->isX86_FP80Ty()))
+ return;
+
+ WorkList.push_back(&CI);
+}
+
+// Generate the upper bound condition for RangeError.
+Value *LibCallsShrinkWrap::generateOneRangeCond(CallInst *CI,
+ const LibFunc &Func) {
+ float UpperBound;
+ switch (Func) {
+ case LibFunc_expm1: // RangeError: (709, inf)
+ UpperBound = 709.0f;
+ break;
+ case LibFunc_expm1f: // RangeError: (88, inf)
+ UpperBound = 88.0f;
+ break;
+ case LibFunc_expm1l: // RangeError: (11356, inf)
+ UpperBound = 11356.0f;
+ break;
+ default:
+ llvm_unreachable("Unhandled library call!");
+ }
+
+ ++NumWrappedOneCond;
+ return createCond(CI, CmpInst::FCMP_OGT, UpperBound);
+}
+
+// Generate the lower and upper bound condition for RangeError.
+Value *LibCallsShrinkWrap::generateTwoRangeCond(CallInst *CI,
+ const LibFunc &Func) {
+ float UpperBound, LowerBound;
+ switch (Func) {
+ case LibFunc_cosh: // RangeError: (x < -710 || x > 710)
+ case LibFunc_sinh: // Same as cosh
+ LowerBound = -710.0f;
+ UpperBound = 710.0f;
+ break;
+ case LibFunc_coshf: // RangeError: (x < -89 || x > 89)
+ case LibFunc_sinhf: // Same as coshf
+ LowerBound = -89.0f;
+ UpperBound = 89.0f;
+ break;
+ case LibFunc_coshl: // RangeError: (x < -11357 || x > 11357)
+ case LibFunc_sinhl: // Same as coshl
+ LowerBound = -11357.0f;
+ UpperBound = 11357.0f;
+ break;
+ case LibFunc_exp: // RangeError: (x < -745 || x > 709)
+ LowerBound = -745.0f;
+ UpperBound = 709.0f;
+ break;
+ case LibFunc_expf: // RangeError: (x < -103 || x > 88)
+ LowerBound = -103.0f;
+ UpperBound = 88.0f;
+ break;
+ case LibFunc_expl: // RangeError: (x < -11399 || x > 11356)
+ LowerBound = -11399.0f;
+ UpperBound = 11356.0f;
+ break;
+ case LibFunc_exp10: // RangeError: (x < -323 || x > 308)
+ LowerBound = -323.0f;
+ UpperBound = 308.0f;
+ break;
+ case LibFunc_exp10f: // RangeError: (x < -45 || x > 38)
+ LowerBound = -45.0f;
+ UpperBound = 38.0f;
+ break;
+ case LibFunc_exp10l: // RangeError: (x < -4950 || x > 4932)
+ LowerBound = -4950.0f;
+ UpperBound = 4932.0f;
+ break;
+ case LibFunc_exp2: // RangeError: (x < -1074 || x > 1023)
+ LowerBound = -1074.0f;
+ UpperBound = 1023.0f;
+ break;
+ case LibFunc_exp2f: // RangeError: (x < -149 || x > 127)
+ LowerBound = -149.0f;
+ UpperBound = 127.0f;
+ break;
+ case LibFunc_exp2l: // RangeError: (x < -16445 || x > 11383)
+ LowerBound = -16445.0f;
+ UpperBound = 11383.0f;
+ break;
+ default:
+ llvm_unreachable("Unhandled library call!");
+ }
+
+ ++NumWrappedTwoCond;
+ return createOrCond(CI, CmpInst::FCMP_OGT, UpperBound, CmpInst::FCMP_OLT,
+ LowerBound);
+}
+
+// For pow(x,y), We only handle the following cases:
+// (1) x is a constant && (x >= 1) && (x < MaxUInt8)
+// Cond is: (y > 127)
+// (2) x is a value coming from an integer type.
+// (2.1) if x's bit_size == 8
+// Cond: (x <= 0 || y > 128)
+// (2.2) if x's bit_size is 16
+// Cond: (x <= 0 || y > 64)
+// (2.3) if x's bit_size is 32
+// Cond: (x <= 0 || y > 32)
+// Support for powl(x,y) and powf(x,y) are TBD.
+//
+// Note that condition can be more conservative than the actual condition
+// (i.e. we might invoke the calls that will not set the errno.).
+//
+Value *LibCallsShrinkWrap::generateCondForPow(CallInst *CI,
+ const LibFunc &Func) {
+ // FIXME: LibFunc_powf and powl TBD.
+ if (Func != LibFunc_pow) {
+ LLVM_DEBUG(dbgs() << "Not handled powf() and powl()\n");
+ return nullptr;
+ }
+
+ Value *Base = CI->getArgOperand(0);
+ Value *Exp = CI->getArgOperand(1);
+ IRBuilder<> BBBuilder(CI);
+
+ // Constant Base case.
+ if (ConstantFP *CF = dyn_cast<ConstantFP>(Base)) {
+ double D = CF->getValueAPF().convertToDouble();
+ if (D < 1.0f || D > APInt::getMaxValue(8).getZExtValue()) {
+ LLVM_DEBUG(dbgs() << "Not handled pow(): constant base out of range\n");
+ return nullptr;
+ }
+
+ ++NumWrappedOneCond;
+ Constant *V = ConstantFP::get(CI->getContext(), APFloat(127.0f));
+ if (!Exp->getType()->isFloatTy())
+ V = ConstantExpr::getFPExtend(V, Exp->getType());
+ return BBBuilder.CreateFCmp(CmpInst::FCMP_OGT, Exp, V);
+ }
+
+ // If the Base value coming from an integer type.
+ Instruction *I = dyn_cast<Instruction>(Base);
+ if (!I) {
+ LLVM_DEBUG(dbgs() << "Not handled pow(): FP type base\n");
+ return nullptr;
+ }
+ unsigned Opcode = I->getOpcode();
+ if (Opcode == Instruction::UIToFP || Opcode == Instruction::SIToFP) {
+ unsigned BW = I->getOperand(0)->getType()->getPrimitiveSizeInBits();
+ float UpperV = 0.0f;
+ if (BW == 8)
+ UpperV = 128.0f;
+ else if (BW == 16)
+ UpperV = 64.0f;
+ else if (BW == 32)
+ UpperV = 32.0f;
+ else {
+ LLVM_DEBUG(dbgs() << "Not handled pow(): type too wide\n");
+ return nullptr;
+ }
+
+ ++NumWrappedTwoCond;
+ Constant *V = ConstantFP::get(CI->getContext(), APFloat(UpperV));
+ Constant *V0 = ConstantFP::get(CI->getContext(), APFloat(0.0f));
+ if (!Exp->getType()->isFloatTy())
+ V = ConstantExpr::getFPExtend(V, Exp->getType());
+ if (!Base->getType()->isFloatTy())
+ V0 = ConstantExpr::getFPExtend(V0, Exp->getType());
+
+ Value *Cond = BBBuilder.CreateFCmp(CmpInst::FCMP_OGT, Exp, V);
+ Value *Cond0 = BBBuilder.CreateFCmp(CmpInst::FCMP_OLE, Base, V0);
+ return BBBuilder.CreateOr(Cond0, Cond);
+ }
+ LLVM_DEBUG(dbgs() << "Not handled pow(): base not from integer convert\n");
+ return nullptr;
+}
+
+// Wrap conditions that can potentially generate errno to the library call.
+void LibCallsShrinkWrap::shrinkWrapCI(CallInst *CI, Value *Cond) {
+ assert(Cond != nullptr && "ShrinkWrapCI is not expecting an empty call inst");
+ MDNode *BranchWeights =
+ MDBuilder(CI->getContext()).createBranchWeights(1, 2000);
+
+ Instruction *NewInst =
+ SplitBlockAndInsertIfThen(Cond, CI, false, BranchWeights, DT);
+ BasicBlock *CallBB = NewInst->getParent();
+ CallBB->setName("cdce.call");
+ BasicBlock *SuccBB = CallBB->getSingleSuccessor();
+ assert(SuccBB && "The split block should have a single successor");
+ SuccBB->setName("cdce.end");
+ CI->removeFromParent();
+ CI->insertInto(CallBB, CallBB->getFirstInsertionPt());
+ LLVM_DEBUG(dbgs() << "== Basic Block After ==");
+ LLVM_DEBUG(dbgs() << *CallBB->getSinglePredecessor() << *CallBB
+ << *CallBB->getSingleSuccessor() << "\n");
+}
+
+// Perform the transformation to a single candidate.
+bool LibCallsShrinkWrap::perform(CallInst *CI) {
+ LibFunc Func;
+ Function *Callee = CI->getCalledFunction();
+ assert(Callee && "perform() should apply to a non-empty callee");
+ TLI.getLibFunc(*Callee, Func);
+ assert(Func && "perform() is not expecting an empty function");
+
+ if (performCallDomainErrorOnly(CI, Func) || performCallRangeErrorOnly(CI, Func))
+ return true;
+ return performCallErrors(CI, Func);
+}
+
+void LibCallsShrinkWrapLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+}
+
+static bool runImpl(Function &F, const TargetLibraryInfo &TLI,
+ DominatorTree *DT) {
+ if (F.hasFnAttribute(Attribute::OptimizeForSize))
+ return false;
+ LibCallsShrinkWrap CCDCE(TLI, DT);
+ CCDCE.visit(F);
+ bool Changed = CCDCE.perform();
+
+// Verify the dominator after we've updated it locally.
+ assert(!DT || DT->verify(DominatorTree::VerificationLevel::Fast));
+ return Changed;
+}
+
+bool LibCallsShrinkWrapLegacyPass::runOnFunction(Function &F) {
+ auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
+ return runImpl(F, TLI, DT);
+}
+
+namespace llvm {
+char &LibCallsShrinkWrapPassID = LibCallsShrinkWrapLegacyPass::ID;
+
+// Public interface to LibCallsShrinkWrap pass.
+FunctionPass *createLibCallsShrinkWrapPass() {
+ return new LibCallsShrinkWrapLegacyPass();
+}
+
+PreservedAnalyses LibCallsShrinkWrapPass::run(Function &F,
+ FunctionAnalysisManager &FAM) {
+ auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
+ auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(F);
+ if (!runImpl(F, TLI, DT))
+ return PreservedAnalyses::all();
+ auto PA = PreservedAnalyses();
+ PA.preserve<DominatorTreeAnalysis>();
+ return PA;
+}
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/Local.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/Local.cpp
new file mode 100644
index 0000000000..31cdd2ee56
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/Local.cpp
@@ -0,0 +1,3518 @@
+//===- Local.cpp - Functions to perform local transformations -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This family of functions perform various local transformations to the
+// program.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumeBundleQueries.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/ConstantRange.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/GlobalObject.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsWebAssembly.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/ProfDataUtils.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/IR/ValueHandle.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/KnownBits.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <map>
+#include <optional>
+#include <utility>
+
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+#define DEBUG_TYPE "local"
+
+STATISTIC(NumRemoved, "Number of unreachable basic blocks removed");
+STATISTIC(NumPHICSEs, "Number of PHI's that got CSE'd");
+
+static cl::opt<bool> PHICSEDebugHash(
+ "phicse-debug-hash",
+#ifdef EXPENSIVE_CHECKS
+ cl::init(true),
+#else
+ cl::init(false),
+#endif
+ cl::Hidden,
+ cl::desc("Perform extra assertion checking to verify that PHINodes's hash "
+ "function is well-behaved w.r.t. its isEqual predicate"));
+
+static cl::opt<unsigned> PHICSENumPHISmallSize(
+ "phicse-num-phi-smallsize", cl::init(32), cl::Hidden,
+ cl::desc(
+ "When the basic block contains not more than this number of PHI nodes, "
+ "perform a (faster!) exhaustive search instead of set-driven one."));
+
+// Max recursion depth for collectBitParts used when detecting bswap and
+// bitreverse idioms.
+static const unsigned BitPartRecursionMaxDepth = 48;
+
+//===----------------------------------------------------------------------===//
+// Local constant propagation.
+//
+
+/// ConstantFoldTerminator - If a terminator instruction is predicated on a
+/// constant value, convert it into an unconditional branch to the constant
+/// destination. This is a nontrivial operation because the successors of this
+/// basic block must have their PHI nodes updated.
+/// Also calls RecursivelyDeleteTriviallyDeadInstructions() on any branch/switch
+/// conditions and indirectbr addresses this might make dead if
+/// DeleteDeadConditions is true.
+bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
+ const TargetLibraryInfo *TLI,
+ DomTreeUpdater *DTU) {
+ Instruction *T = BB->getTerminator();
+ IRBuilder<> Builder(T);
+
+ // Branch - See if we are conditional jumping on constant
+ if (auto *BI = dyn_cast<BranchInst>(T)) {
+ if (BI->isUnconditional()) return false; // Can't optimize uncond branch
+
+ BasicBlock *Dest1 = BI->getSuccessor(0);
+ BasicBlock *Dest2 = BI->getSuccessor(1);
+
+ if (Dest2 == Dest1) { // Conditional branch to same location?
+ // This branch matches something like this:
+ // br bool %cond, label %Dest, label %Dest
+ // and changes it into: br label %Dest
+
+ // Let the basic block know that we are letting go of one copy of it.
+ assert(BI->getParent() && "Terminator not inserted in block!");
+ Dest1->removePredecessor(BI->getParent());
+
+ // Replace the conditional branch with an unconditional one.
+ BranchInst *NewBI = Builder.CreateBr(Dest1);
+
+ // Transfer the metadata to the new branch instruction.
+ NewBI->copyMetadata(*BI, {LLVMContext::MD_loop, LLVMContext::MD_dbg,
+ LLVMContext::MD_annotation});
+
+ Value *Cond = BI->getCondition();
+ BI->eraseFromParent();
+ if (DeleteDeadConditions)
+ RecursivelyDeleteTriviallyDeadInstructions(Cond, TLI);
+ return true;
+ }
+
+ if (auto *Cond = dyn_cast<ConstantInt>(BI->getCondition())) {
+ // Are we branching on constant?
+ // YES. Change to unconditional branch...
+ BasicBlock *Destination = Cond->getZExtValue() ? Dest1 : Dest2;
+ BasicBlock *OldDest = Cond->getZExtValue() ? Dest2 : Dest1;
+
+ // Let the basic block know that we are letting go of it. Based on this,
+ // it will adjust it's PHI nodes.
+ OldDest->removePredecessor(BB);
+
+ // Replace the conditional branch with an unconditional one.
+ BranchInst *NewBI = Builder.CreateBr(Destination);
+
+ // Transfer the metadata to the new branch instruction.
+ NewBI->copyMetadata(*BI, {LLVMContext::MD_loop, LLVMContext::MD_dbg,
+ LLVMContext::MD_annotation});
+
+ BI->eraseFromParent();
+ if (DTU)
+ DTU->applyUpdates({{DominatorTree::Delete, BB, OldDest}});
+ return true;
+ }
+
+ return false;
+ }
+
+ if (auto *SI = dyn_cast<SwitchInst>(T)) {
+ // If we are switching on a constant, we can convert the switch to an
+ // unconditional branch.
+ auto *CI = dyn_cast<ConstantInt>(SI->getCondition());
+ BasicBlock *DefaultDest = SI->getDefaultDest();
+ BasicBlock *TheOnlyDest = DefaultDest;
+
+ // If the default is unreachable, ignore it when searching for TheOnlyDest.
+ if (isa<UnreachableInst>(DefaultDest->getFirstNonPHIOrDbg()) &&
+ SI->getNumCases() > 0) {
+ TheOnlyDest = SI->case_begin()->getCaseSuccessor();
+ }
+
+ bool Changed = false;
+
+ // Figure out which case it goes to.
+ for (auto i = SI->case_begin(), e = SI->case_end(); i != e;) {
+ // Found case matching a constant operand?
+ if (i->getCaseValue() == CI) {
+ TheOnlyDest = i->getCaseSuccessor();
+ break;
+ }
+
+ // Check to see if this branch is going to the same place as the default
+ // dest. If so, eliminate it as an explicit compare.
+ if (i->getCaseSuccessor() == DefaultDest) {
+ MDNode *MD = getValidBranchWeightMDNode(*SI);
+ unsigned NCases = SI->getNumCases();
+ // Fold the case metadata into the default if there will be any branches
+ // left, unless the metadata doesn't match the switch.
+ if (NCases > 1 && MD) {
+ // Collect branch weights into a vector.
+ SmallVector<uint32_t, 8> Weights;
+ extractBranchWeights(MD, Weights);
+
+ // Merge weight of this case to the default weight.
+ unsigned idx = i->getCaseIndex();
+ // TODO: Add overflow check.
+ Weights[0] += Weights[idx+1];
+ // Remove weight for this case.
+ std::swap(Weights[idx+1], Weights.back());
+ Weights.pop_back();
+ SI->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(BB->getContext()).
+ createBranchWeights(Weights));
+ }
+ // Remove this entry.
+ BasicBlock *ParentBB = SI->getParent();
+ DefaultDest->removePredecessor(ParentBB);
+ i = SI->removeCase(i);
+ e = SI->case_end();
+
+ // Removing this case may have made the condition constant. In that
+ // case, update CI and restart iteration through the cases.
+ if (auto *NewCI = dyn_cast<ConstantInt>(SI->getCondition())) {
+ CI = NewCI;
+ i = SI->case_begin();
+ }
+
+ Changed = true;
+ continue;
+ }
+
+ // Otherwise, check to see if the switch only branches to one destination.
+ // We do this by reseting "TheOnlyDest" to null when we find two non-equal
+ // destinations.
+ if (i->getCaseSuccessor() != TheOnlyDest)
+ TheOnlyDest = nullptr;
+
+ // Increment this iterator as we haven't removed the case.
+ ++i;
+ }
+
+ if (CI && !TheOnlyDest) {
+ // Branching on a constant, but not any of the cases, go to the default
+ // successor.
+ TheOnlyDest = SI->getDefaultDest();
+ }
+
+ // If we found a single destination that we can fold the switch into, do so
+ // now.
+ if (TheOnlyDest) {
+ // Insert the new branch.
+ Builder.CreateBr(TheOnlyDest);
+ BasicBlock *BB = SI->getParent();
+
+ SmallSet<BasicBlock *, 8> RemovedSuccessors;
+
+ // Remove entries from PHI nodes which we no longer branch to...
+ BasicBlock *SuccToKeep = TheOnlyDest;
+ for (BasicBlock *Succ : successors(SI)) {
+ if (DTU && Succ != TheOnlyDest)
+ RemovedSuccessors.insert(Succ);
+ // Found case matching a constant operand?
+ if (Succ == SuccToKeep) {
+ SuccToKeep = nullptr; // Don't modify the first branch to TheOnlyDest
+ } else {
+ Succ->removePredecessor(BB);
+ }
+ }
+
+ // Delete the old switch.
+ Value *Cond = SI->getCondition();
+ SI->eraseFromParent();
+ if (DeleteDeadConditions)
+ RecursivelyDeleteTriviallyDeadInstructions(Cond, TLI);
+ if (DTU) {
+ std::vector<DominatorTree::UpdateType> Updates;
+ Updates.reserve(RemovedSuccessors.size());
+ for (auto *RemovedSuccessor : RemovedSuccessors)
+ Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor});
+ DTU->applyUpdates(Updates);
+ }
+ return true;
+ }
+
+ if (SI->getNumCases() == 1) {
+ // Otherwise, we can fold this switch into a conditional branch
+ // instruction if it has only one non-default destination.
+ auto FirstCase = *SI->case_begin();
+ Value *Cond = Builder.CreateICmpEQ(SI->getCondition(),
+ FirstCase.getCaseValue(), "cond");
+
+ // Insert the new branch.
+ BranchInst *NewBr = Builder.CreateCondBr(Cond,
+ FirstCase.getCaseSuccessor(),
+ SI->getDefaultDest());
+ SmallVector<uint32_t> Weights;
+ if (extractBranchWeights(*SI, Weights) && Weights.size() == 2) {
+ uint32_t DefWeight = Weights[0];
+ uint32_t CaseWeight = Weights[1];
+ // The TrueWeight should be the weight for the single case of SI.
+ NewBr->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(BB->getContext())
+ .createBranchWeights(CaseWeight, DefWeight));
+ }
+
+ // Update make.implicit metadata to the newly-created conditional branch.
+ MDNode *MakeImplicitMD = SI->getMetadata(LLVMContext::MD_make_implicit);
+ if (MakeImplicitMD)
+ NewBr->setMetadata(LLVMContext::MD_make_implicit, MakeImplicitMD);
+
+ // Delete the old switch.
+ SI->eraseFromParent();
+ return true;
+ }
+ return Changed;
+ }
+
+ if (auto *IBI = dyn_cast<IndirectBrInst>(T)) {
+ // indirectbr blockaddress(@F, @BB) -> br label @BB
+ if (auto *BA =
+ dyn_cast<BlockAddress>(IBI->getAddress()->stripPointerCasts())) {
+ BasicBlock *TheOnlyDest = BA->getBasicBlock();
+ SmallSet<BasicBlock *, 8> RemovedSuccessors;
+
+ // Insert the new branch.
+ Builder.CreateBr(TheOnlyDest);
+
+ BasicBlock *SuccToKeep = TheOnlyDest;
+ for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
+ BasicBlock *DestBB = IBI->getDestination(i);
+ if (DTU && DestBB != TheOnlyDest)
+ RemovedSuccessors.insert(DestBB);
+ if (IBI->getDestination(i) == SuccToKeep) {
+ SuccToKeep = nullptr;
+ } else {
+ DestBB->removePredecessor(BB);
+ }
+ }
+ Value *Address = IBI->getAddress();
+ IBI->eraseFromParent();
+ if (DeleteDeadConditions)
+ // Delete pointer cast instructions.
+ RecursivelyDeleteTriviallyDeadInstructions(Address, TLI);
+
+ // Also zap the blockaddress constant if there are no users remaining,
+ // otherwise the destination is still marked as having its address taken.
+ if (BA->use_empty())
+ BA->destroyConstant();
+
+ // If we didn't find our destination in the IBI successor list, then we
+ // have undefined behavior. Replace the unconditional branch with an
+ // 'unreachable' instruction.
+ if (SuccToKeep) {
+ BB->getTerminator()->eraseFromParent();
+ new UnreachableInst(BB->getContext(), BB);
+ }
+
+ if (DTU) {
+ std::vector<DominatorTree::UpdateType> Updates;
+ Updates.reserve(RemovedSuccessors.size());
+ for (auto *RemovedSuccessor : RemovedSuccessors)
+ Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor});
+ DTU->applyUpdates(Updates);
+ }
+ return true;
+ }
+ }
+
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Local dead code elimination.
+//
+
+/// isInstructionTriviallyDead - Return true if the result produced by the
+/// instruction is not used, and the instruction has no side effects.
+///
+bool llvm::isInstructionTriviallyDead(Instruction *I,
+ const TargetLibraryInfo *TLI) {
+ if (!I->use_empty())
+ return false;
+ return wouldInstructionBeTriviallyDead(I, TLI);
+}
+
+bool llvm::wouldInstructionBeTriviallyDeadOnUnusedPaths(
+ Instruction *I, const TargetLibraryInfo *TLI) {
+ // Instructions that are "markers" and have implied meaning on code around
+ // them (without explicit uses), are not dead on unused paths.
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
+ if (II->getIntrinsicID() == Intrinsic::stacksave ||
+ II->getIntrinsicID() == Intrinsic::launder_invariant_group ||
+ II->isLifetimeStartOrEnd())
+ return false;
+ return wouldInstructionBeTriviallyDead(I, TLI);
+}
+
+bool llvm::wouldInstructionBeTriviallyDead(Instruction *I,
+ const TargetLibraryInfo *TLI) {
+ if (I->isTerminator())
+ return false;
+
+ // We don't want the landingpad-like instructions removed by anything this
+ // general.
+ if (I->isEHPad())
+ return false;
+
+ // We don't want debug info removed by anything this general, unless
+ // debug info is empty.
+ if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(I)) {
+ if (DDI->getAddress())
+ return false;
+ return true;
+ }
+ if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(I)) {
+ if (DVI->hasArgList() || DVI->getValue(0))
+ return false;
+ return true;
+ }
+ if (DbgLabelInst *DLI = dyn_cast<DbgLabelInst>(I)) {
+ if (DLI->getLabel())
+ return false;
+ return true;
+ }
+
+ if (auto *CB = dyn_cast<CallBase>(I))
+ if (isRemovableAlloc(CB, TLI))
+ return true;
+
+ if (!I->willReturn()) {
+ auto *II = dyn_cast<IntrinsicInst>(I);
+ if (!II)
+ return false;
+
+ // TODO: These intrinsics are not safe to remove, because this may remove
+ // a well-defined trap.
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::wasm_trunc_signed:
+ case Intrinsic::wasm_trunc_unsigned:
+ case Intrinsic::ptrauth_auth:
+ case Intrinsic::ptrauth_resign:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ if (!I->mayHaveSideEffects())
+ return true;
+
+ // Special case intrinsics that "may have side effects" but can be deleted
+ // when dead.
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ // Safe to delete llvm.stacksave and launder.invariant.group if dead.
+ if (II->getIntrinsicID() == Intrinsic::stacksave ||
+ II->getIntrinsicID() == Intrinsic::launder_invariant_group)
+ return true;
+
+ if (II->isLifetimeStartOrEnd()) {
+ auto *Arg = II->getArgOperand(1);
+ // Lifetime intrinsics are dead when their right-hand is undef.
+ if (isa<UndefValue>(Arg))
+ return true;
+ // If the right-hand is an alloc, global, or argument and the only uses
+ // are lifetime intrinsics then the intrinsics are dead.
+ if (isa<AllocaInst>(Arg) || isa<GlobalValue>(Arg) || isa<Argument>(Arg))
+ return llvm::all_of(Arg->uses(), [](Use &Use) {
+ if (IntrinsicInst *IntrinsicUse =
+ dyn_cast<IntrinsicInst>(Use.getUser()))
+ return IntrinsicUse->isLifetimeStartOrEnd();
+ return false;
+ });
+ return false;
+ }
+
+ // Assumptions are dead if their condition is trivially true. Guards on
+ // true are operationally no-ops. In the future we can consider more
+ // sophisticated tradeoffs for guards considering potential for check
+ // widening, but for now we keep things simple.
+ if ((II->getIntrinsicID() == Intrinsic::assume &&
+ isAssumeWithEmptyBundle(cast<AssumeInst>(*II))) ||
+ II->getIntrinsicID() == Intrinsic::experimental_guard) {
+ if (ConstantInt *Cond = dyn_cast<ConstantInt>(II->getArgOperand(0)))
+ return !Cond->isZero();
+
+ return false;
+ }
+
+ if (auto *FPI = dyn_cast<ConstrainedFPIntrinsic>(I)) {
+ std::optional<fp::ExceptionBehavior> ExBehavior =
+ FPI->getExceptionBehavior();
+ return *ExBehavior != fp::ebStrict;
+ }
+ }
+
+ if (auto *Call = dyn_cast<CallBase>(I)) {
+ if (Value *FreedOp = getFreedOperand(Call, TLI))
+ if (Constant *C = dyn_cast<Constant>(FreedOp))
+ return C->isNullValue() || isa<UndefValue>(C);
+ if (isMathLibCallNoop(Call, TLI))
+ return true;
+ }
+
+ // Non-volatile atomic loads from constants can be removed.
+ if (auto *LI = dyn_cast<LoadInst>(I))
+ if (auto *GV = dyn_cast<GlobalVariable>(
+ LI->getPointerOperand()->stripPointerCasts()))
+ if (!LI->isVolatile() && GV->isConstant())
+ return true;
+
+ return false;
+}
+
+/// RecursivelyDeleteTriviallyDeadInstructions - If the specified value is a
+/// trivially dead instruction, delete it. If that makes any of its operands
+/// trivially dead, delete them too, recursively. Return true if any
+/// instructions were deleted.
+bool llvm::RecursivelyDeleteTriviallyDeadInstructions(
+ Value *V, const TargetLibraryInfo *TLI, MemorySSAUpdater *MSSAU,
+ std::function<void(Value *)> AboutToDeleteCallback) {
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I || !isInstructionTriviallyDead(I, TLI))
+ return false;
+
+ SmallVector<WeakTrackingVH, 16> DeadInsts;
+ DeadInsts.push_back(I);
+ RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, TLI, MSSAU,
+ AboutToDeleteCallback);
+
+ return true;
+}
+
+bool llvm::RecursivelyDeleteTriviallyDeadInstructionsPermissive(
+ SmallVectorImpl<WeakTrackingVH> &DeadInsts, const TargetLibraryInfo *TLI,
+ MemorySSAUpdater *MSSAU,
+ std::function<void(Value *)> AboutToDeleteCallback) {
+ unsigned S = 0, E = DeadInsts.size(), Alive = 0;
+ for (; S != E; ++S) {
+ auto *I = dyn_cast<Instruction>(DeadInsts[S]);
+ if (!I || !isInstructionTriviallyDead(I)) {
+ DeadInsts[S] = nullptr;
+ ++Alive;
+ }
+ }
+ if (Alive == E)
+ return false;
+ RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, TLI, MSSAU,
+ AboutToDeleteCallback);
+ return true;
+}
+
+void llvm::RecursivelyDeleteTriviallyDeadInstructions(
+ SmallVectorImpl<WeakTrackingVH> &DeadInsts, const TargetLibraryInfo *TLI,
+ MemorySSAUpdater *MSSAU,
+ std::function<void(Value *)> AboutToDeleteCallback) {
+ // Process the dead instruction list until empty.
+ while (!DeadInsts.empty()) {
+ Value *V = DeadInsts.pop_back_val();
+ Instruction *I = cast_or_null<Instruction>(V);
+ if (!I)
+ continue;
+ assert(isInstructionTriviallyDead(I, TLI) &&
+ "Live instruction found in dead worklist!");
+ assert(I->use_empty() && "Instructions with uses are not dead.");
+
+ // Don't lose the debug info while deleting the instructions.
+ salvageDebugInfo(*I);
+
+ if (AboutToDeleteCallback)
+ AboutToDeleteCallback(I);
+
+ // Null out all of the instruction's operands to see if any operand becomes
+ // dead as we go.
+ for (Use &OpU : I->operands()) {
+ Value *OpV = OpU.get();
+ OpU.set(nullptr);
+
+ if (!OpV->use_empty())
+ continue;
+
+ // If the operand is an instruction that became dead as we nulled out the
+ // operand, and if it is 'trivially' dead, delete it in a future loop
+ // iteration.
+ if (Instruction *OpI = dyn_cast<Instruction>(OpV))
+ if (isInstructionTriviallyDead(OpI, TLI))
+ DeadInsts.push_back(OpI);
+ }
+ if (MSSAU)
+ MSSAU->removeMemoryAccess(I);
+
+ I->eraseFromParent();
+ }
+}
+
+bool llvm::replaceDbgUsesWithUndef(Instruction *I) {
+ SmallVector<DbgVariableIntrinsic *, 1> DbgUsers;
+ findDbgUsers(DbgUsers, I);
+ for (auto *DII : DbgUsers)
+ DII->setKillLocation();
+ return !DbgUsers.empty();
+}
+
+/// areAllUsesEqual - Check whether the uses of a value are all the same.
+/// This is similar to Instruction::hasOneUse() except this will also return
+/// true when there are no uses or multiple uses that all refer to the same
+/// value.
+static bool areAllUsesEqual(Instruction *I) {
+ Value::user_iterator UI = I->user_begin();
+ Value::user_iterator UE = I->user_end();
+ if (UI == UE)
+ return true;
+
+ User *TheUse = *UI;
+ for (++UI; UI != UE; ++UI) {
+ if (*UI != TheUse)
+ return false;
+ }
+ return true;
+}
+
+/// RecursivelyDeleteDeadPHINode - If the specified value is an effectively
+/// dead PHI node, due to being a def-use chain of single-use nodes that
+/// either forms a cycle or is terminated by a trivially dead instruction,
+/// delete it. If that makes any of its operands trivially dead, delete them
+/// too, recursively. Return true if a change was made.
+bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN,
+ const TargetLibraryInfo *TLI,
+ llvm::MemorySSAUpdater *MSSAU) {
+ SmallPtrSet<Instruction*, 4> Visited;
+ for (Instruction *I = PN; areAllUsesEqual(I) && !I->mayHaveSideEffects();
+ I = cast<Instruction>(*I->user_begin())) {
+ if (I->use_empty())
+ return RecursivelyDeleteTriviallyDeadInstructions(I, TLI, MSSAU);
+
+ // If we find an instruction more than once, we're on a cycle that
+ // won't prove fruitful.
+ if (!Visited.insert(I).second) {
+ // Break the cycle and delete the instruction and its operands.
+ I->replaceAllUsesWith(PoisonValue::get(I->getType()));
+ (void)RecursivelyDeleteTriviallyDeadInstructions(I, TLI, MSSAU);
+ return true;
+ }
+ }
+ return false;
+}
+
+static bool
+simplifyAndDCEInstruction(Instruction *I,
+ SmallSetVector<Instruction *, 16> &WorkList,
+ const DataLayout &DL,
+ const TargetLibraryInfo *TLI) {
+ if (isInstructionTriviallyDead(I, TLI)) {
+ salvageDebugInfo(*I);
+
+ // Null out all of the instruction's operands to see if any operand becomes
+ // dead as we go.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ Value *OpV = I->getOperand(i);
+ I->setOperand(i, nullptr);
+
+ if (!OpV->use_empty() || I == OpV)
+ continue;
+
+ // If the operand is an instruction that became dead as we nulled out the
+ // operand, and if it is 'trivially' dead, delete it in a future loop
+ // iteration.
+ if (Instruction *OpI = dyn_cast<Instruction>(OpV))
+ if (isInstructionTriviallyDead(OpI, TLI))
+ WorkList.insert(OpI);
+ }
+
+ I->eraseFromParent();
+
+ return true;
+ }
+
+ if (Value *SimpleV = simplifyInstruction(I, DL)) {
+ // Add the users to the worklist. CAREFUL: an instruction can use itself,
+ // in the case of a phi node.
+ for (User *U : I->users()) {
+ if (U != I) {
+ WorkList.insert(cast<Instruction>(U));
+ }
+ }
+
+ // Replace the instruction with its simplified value.
+ bool Changed = false;
+ if (!I->use_empty()) {
+ I->replaceAllUsesWith(SimpleV);
+ Changed = true;
+ }
+ if (isInstructionTriviallyDead(I, TLI)) {
+ I->eraseFromParent();
+ Changed = true;
+ }
+ return Changed;
+ }
+ return false;
+}
+
+/// SimplifyInstructionsInBlock - Scan the specified basic block and try to
+/// simplify any instructions in it and recursively delete dead instructions.
+///
+/// This returns true if it changed the code, note that it can delete
+/// instructions in other blocks as well in this block.
+bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB,
+ const TargetLibraryInfo *TLI) {
+ bool MadeChange = false;
+ const DataLayout &DL = BB->getModule()->getDataLayout();
+
+#ifndef NDEBUG
+ // In debug builds, ensure that the terminator of the block is never replaced
+ // or deleted by these simplifications. The idea of simplification is that it
+ // cannot introduce new instructions, and there is no way to replace the
+ // terminator of a block without introducing a new instruction.
+ AssertingVH<Instruction> TerminatorVH(&BB->back());
+#endif
+
+ SmallSetVector<Instruction *, 16> WorkList;
+ // Iterate over the original function, only adding insts to the worklist
+ // if they actually need to be revisited. This avoids having to pre-init
+ // the worklist with the entire function's worth of instructions.
+ for (BasicBlock::iterator BI = BB->begin(), E = std::prev(BB->end());
+ BI != E;) {
+ assert(!BI->isTerminator());
+ Instruction *I = &*BI;
+ ++BI;
+
+ // We're visiting this instruction now, so make sure it's not in the
+ // worklist from an earlier visit.
+ if (!WorkList.count(I))
+ MadeChange |= simplifyAndDCEInstruction(I, WorkList, DL, TLI);
+ }
+
+ while (!WorkList.empty()) {
+ Instruction *I = WorkList.pop_back_val();
+ MadeChange |= simplifyAndDCEInstruction(I, WorkList, DL, TLI);
+ }
+ return MadeChange;
+}
+
+//===----------------------------------------------------------------------===//
+// Control Flow Graph Restructuring.
+//
+
+void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB,
+ DomTreeUpdater *DTU) {
+
+ // If BB has single-entry PHI nodes, fold them.
+ while (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) {
+ Value *NewVal = PN->getIncomingValue(0);
+ // Replace self referencing PHI with poison, it must be dead.
+ if (NewVal == PN) NewVal = PoisonValue::get(PN->getType());
+ PN->replaceAllUsesWith(NewVal);
+ PN->eraseFromParent();
+ }
+
+ BasicBlock *PredBB = DestBB->getSinglePredecessor();
+ assert(PredBB && "Block doesn't have a single predecessor!");
+
+ bool ReplaceEntryBB = PredBB->isEntryBlock();
+
+ // DTU updates: Collect all the edges that enter
+ // PredBB. These dominator edges will be redirected to DestBB.
+ SmallVector<DominatorTree::UpdateType, 32> Updates;
+
+ if (DTU) {
+ // To avoid processing the same predecessor more than once.
+ SmallPtrSet<BasicBlock *, 2> SeenPreds;
+ Updates.reserve(Updates.size() + 2 * pred_size(PredBB) + 1);
+ for (BasicBlock *PredOfPredBB : predecessors(PredBB))
+ // This predecessor of PredBB may already have DestBB as a successor.
+ if (PredOfPredBB != PredBB)
+ if (SeenPreds.insert(PredOfPredBB).second)
+ Updates.push_back({DominatorTree::Insert, PredOfPredBB, DestBB});
+ SeenPreds.clear();
+ for (BasicBlock *PredOfPredBB : predecessors(PredBB))
+ if (SeenPreds.insert(PredOfPredBB).second)
+ Updates.push_back({DominatorTree::Delete, PredOfPredBB, PredBB});
+ Updates.push_back({DominatorTree::Delete, PredBB, DestBB});
+ }
+
+ // Zap anything that took the address of DestBB. Not doing this will give the
+ // address an invalid value.
+ if (DestBB->hasAddressTaken()) {
+ BlockAddress *BA = BlockAddress::get(DestBB);
+ Constant *Replacement =
+ ConstantInt::get(Type::getInt32Ty(BA->getContext()), 1);
+ BA->replaceAllUsesWith(ConstantExpr::getIntToPtr(Replacement,
+ BA->getType()));
+ BA->destroyConstant();
+ }
+
+ // Anything that branched to PredBB now branches to DestBB.
+ PredBB->replaceAllUsesWith(DestBB);
+
+ // Splice all the instructions from PredBB to DestBB.
+ PredBB->getTerminator()->eraseFromParent();
+ DestBB->splice(DestBB->begin(), PredBB);
+ new UnreachableInst(PredBB->getContext(), PredBB);
+
+ // If the PredBB is the entry block of the function, move DestBB up to
+ // become the entry block after we erase PredBB.
+ if (ReplaceEntryBB)
+ DestBB->moveAfter(PredBB);
+
+ if (DTU) {
+ assert(PredBB->size() == 1 &&
+ isa<UnreachableInst>(PredBB->getTerminator()) &&
+ "The successor list of PredBB isn't empty before "
+ "applying corresponding DTU updates.");
+ DTU->applyUpdatesPermissive(Updates);
+ DTU->deleteBB(PredBB);
+ // Recalculation of DomTree is needed when updating a forward DomTree and
+ // the Entry BB is replaced.
+ if (ReplaceEntryBB && DTU->hasDomTree()) {
+ // The entry block was removed and there is no external interface for
+ // the dominator tree to be notified of this change. In this corner-case
+ // we recalculate the entire tree.
+ DTU->recalculate(*(DestBB->getParent()));
+ }
+ }
+
+ else {
+ PredBB->eraseFromParent(); // Nuke BB if DTU is nullptr.
+ }
+}
+
+/// Return true if we can choose one of these values to use in place of the
+/// other. Note that we will always choose the non-undef value to keep.
+static bool CanMergeValues(Value *First, Value *Second) {
+ return First == Second || isa<UndefValue>(First) || isa<UndefValue>(Second);
+}
+
+/// Return true if we can fold BB, an almost-empty BB ending in an unconditional
+/// branch to Succ, into Succ.
+///
+/// Assumption: Succ is the single successor for BB.
+static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
+ assert(*succ_begin(BB) == Succ && "Succ is not successor of BB!");
+
+ LLVM_DEBUG(dbgs() << "Looking to fold " << BB->getName() << " into "
+ << Succ->getName() << "\n");
+ // Shortcut, if there is only a single predecessor it must be BB and merging
+ // is always safe
+ if (Succ->getSinglePredecessor()) return true;
+
+ // Make a list of the predecessors of BB
+ SmallPtrSet<BasicBlock*, 16> BBPreds(pred_begin(BB), pred_end(BB));
+
+ // Look at all the phi nodes in Succ, to see if they present a conflict when
+ // merging these blocks
+ for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+
+ // If the incoming value from BB is again a PHINode in
+ // BB which has the same incoming value for *PI as PN does, we can
+ // merge the phi nodes and then the blocks can still be merged
+ PHINode *BBPN = dyn_cast<PHINode>(PN->getIncomingValueForBlock(BB));
+ if (BBPN && BBPN->getParent() == BB) {
+ for (unsigned PI = 0, PE = PN->getNumIncomingValues(); PI != PE; ++PI) {
+ BasicBlock *IBB = PN->getIncomingBlock(PI);
+ if (BBPreds.count(IBB) &&
+ !CanMergeValues(BBPN->getIncomingValueForBlock(IBB),
+ PN->getIncomingValue(PI))) {
+ LLVM_DEBUG(dbgs()
+ << "Can't fold, phi node " << PN->getName() << " in "
+ << Succ->getName() << " is conflicting with "
+ << BBPN->getName() << " with regard to common predecessor "
+ << IBB->getName() << "\n");
+ return false;
+ }
+ }
+ } else {
+ Value* Val = PN->getIncomingValueForBlock(BB);
+ for (unsigned PI = 0, PE = PN->getNumIncomingValues(); PI != PE; ++PI) {
+ // See if the incoming value for the common predecessor is equal to the
+ // one for BB, in which case this phi node will not prevent the merging
+ // of the block.
+ BasicBlock *IBB = PN->getIncomingBlock(PI);
+ if (BBPreds.count(IBB) &&
+ !CanMergeValues(Val, PN->getIncomingValue(PI))) {
+ LLVM_DEBUG(dbgs() << "Can't fold, phi node " << PN->getName()
+ << " in " << Succ->getName()
+ << " is conflicting with regard to common "
+ << "predecessor " << IBB->getName() << "\n");
+ return false;
+ }
+ }
+ }
+ }
+
+ return true;
+}
+
+using PredBlockVector = SmallVector<BasicBlock *, 16>;
+using IncomingValueMap = DenseMap<BasicBlock *, Value *>;
+
+/// Determines the value to use as the phi node input for a block.
+///
+/// Select between \p OldVal any value that we know flows from \p BB
+/// to a particular phi on the basis of which one (if either) is not
+/// undef. Update IncomingValues based on the selected value.
+///
+/// \param OldVal The value we are considering selecting.
+/// \param BB The block that the value flows in from.
+/// \param IncomingValues A map from block-to-value for other phi inputs
+/// that we have examined.
+///
+/// \returns the selected value.
+static Value *selectIncomingValueForBlock(Value *OldVal, BasicBlock *BB,
+ IncomingValueMap &IncomingValues) {
+ if (!isa<UndefValue>(OldVal)) {
+ assert((!IncomingValues.count(BB) ||
+ IncomingValues.find(BB)->second == OldVal) &&
+ "Expected OldVal to match incoming value from BB!");
+
+ IncomingValues.insert(std::make_pair(BB, OldVal));
+ return OldVal;
+ }
+
+ IncomingValueMap::const_iterator It = IncomingValues.find(BB);
+ if (It != IncomingValues.end()) return It->second;
+
+ return OldVal;
+}
+
+/// Create a map from block to value for the operands of a
+/// given phi.
+///
+/// Create a map from block to value for each non-undef value flowing
+/// into \p PN.
+///
+/// \param PN The phi we are collecting the map for.
+/// \param IncomingValues [out] The map from block to value for this phi.
+static void gatherIncomingValuesToPhi(PHINode *PN,
+ IncomingValueMap &IncomingValues) {
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ BasicBlock *BB = PN->getIncomingBlock(i);
+ Value *V = PN->getIncomingValue(i);
+
+ if (!isa<UndefValue>(V))
+ IncomingValues.insert(std::make_pair(BB, V));
+ }
+}
+
+/// Replace the incoming undef values to a phi with the values
+/// from a block-to-value map.
+///
+/// \param PN The phi we are replacing the undefs in.
+/// \param IncomingValues A map from block to value.
+static void replaceUndefValuesInPhi(PHINode *PN,
+ const IncomingValueMap &IncomingValues) {
+ SmallVector<unsigned> TrueUndefOps;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Value *V = PN->getIncomingValue(i);
+
+ if (!isa<UndefValue>(V)) continue;
+
+ BasicBlock *BB = PN->getIncomingBlock(i);
+ IncomingValueMap::const_iterator It = IncomingValues.find(BB);
+
+ // Keep track of undef/poison incoming values. Those must match, so we fix
+ // them up below if needed.
+ // Note: this is conservatively correct, but we could try harder and group
+ // the undef values per incoming basic block.
+ if (It == IncomingValues.end()) {
+ TrueUndefOps.push_back(i);
+ continue;
+ }
+
+ // There is a defined value for this incoming block, so map this undef
+ // incoming value to the defined value.
+ PN->setIncomingValue(i, It->second);
+ }
+
+ // If there are both undef and poison values incoming, then convert those
+ // values to undef. It is invalid to have different values for the same
+ // incoming block.
+ unsigned PoisonCount = count_if(TrueUndefOps, [&](unsigned i) {
+ return isa<PoisonValue>(PN->getIncomingValue(i));
+ });
+ if (PoisonCount != 0 && PoisonCount != TrueUndefOps.size()) {
+ for (unsigned i : TrueUndefOps)
+ PN->setIncomingValue(i, UndefValue::get(PN->getType()));
+ }
+}
+
+/// Replace a value flowing from a block to a phi with
+/// potentially multiple instances of that value flowing from the
+/// block's predecessors to the phi.
+///
+/// \param BB The block with the value flowing into the phi.
+/// \param BBPreds The predecessors of BB.
+/// \param PN The phi that we are updating.
+static void redirectValuesFromPredecessorsToPhi(BasicBlock *BB,
+ const PredBlockVector &BBPreds,
+ PHINode *PN) {
+ Value *OldVal = PN->removeIncomingValue(BB, false);
+ assert(OldVal && "No entry in PHI for Pred BB!");
+
+ IncomingValueMap IncomingValues;
+
+ // We are merging two blocks - BB, and the block containing PN - and
+ // as a result we need to redirect edges from the predecessors of BB
+ // to go to the block containing PN, and update PN
+ // accordingly. Since we allow merging blocks in the case where the
+ // predecessor and successor blocks both share some predecessors,
+ // and where some of those common predecessors might have undef
+ // values flowing into PN, we want to rewrite those values to be
+ // consistent with the non-undef values.
+
+ gatherIncomingValuesToPhi(PN, IncomingValues);
+
+ // If this incoming value is one of the PHI nodes in BB, the new entries
+ // in the PHI node are the entries from the old PHI.
+ if (isa<PHINode>(OldVal) && cast<PHINode>(OldVal)->getParent() == BB) {
+ PHINode *OldValPN = cast<PHINode>(OldVal);
+ for (unsigned i = 0, e = OldValPN->getNumIncomingValues(); i != e; ++i) {
+ // Note that, since we are merging phi nodes and BB and Succ might
+ // have common predecessors, we could end up with a phi node with
+ // identical incoming branches. This will be cleaned up later (and
+ // will trigger asserts if we try to clean it up now, without also
+ // simplifying the corresponding conditional branch).
+ BasicBlock *PredBB = OldValPN->getIncomingBlock(i);
+ Value *PredVal = OldValPN->getIncomingValue(i);
+ Value *Selected = selectIncomingValueForBlock(PredVal, PredBB,
+ IncomingValues);
+
+ // And add a new incoming value for this predecessor for the
+ // newly retargeted branch.
+ PN->addIncoming(Selected, PredBB);
+ }
+ } else {
+ for (unsigned i = 0, e = BBPreds.size(); i != e; ++i) {
+ // Update existing incoming values in PN for this
+ // predecessor of BB.
+ BasicBlock *PredBB = BBPreds[i];
+ Value *Selected = selectIncomingValueForBlock(OldVal, PredBB,
+ IncomingValues);
+
+ // And add a new incoming value for this predecessor for the
+ // newly retargeted branch.
+ PN->addIncoming(Selected, PredBB);
+ }
+ }
+
+ replaceUndefValuesInPhi(PN, IncomingValues);
+}
+
+bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
+ DomTreeUpdater *DTU) {
+ assert(BB != &BB->getParent()->getEntryBlock() &&
+ "TryToSimplifyUncondBranchFromEmptyBlock called on entry block!");
+
+ // We can't eliminate infinite loops.
+ BasicBlock *Succ = cast<BranchInst>(BB->getTerminator())->getSuccessor(0);
+ if (BB == Succ) return false;
+
+ // Check to see if merging these blocks would cause conflicts for any of the
+ // phi nodes in BB or Succ. If not, we can safely merge.
+ if (!CanPropagatePredecessorsForPHIs(BB, Succ)) return false;
+
+ // Check for cases where Succ has multiple predecessors and a PHI node in BB
+ // has uses which will not disappear when the PHI nodes are merged. It is
+ // possible to handle such cases, but difficult: it requires checking whether
+ // BB dominates Succ, which is non-trivial to calculate in the case where
+ // Succ has multiple predecessors. Also, it requires checking whether
+ // constructing the necessary self-referential PHI node doesn't introduce any
+ // conflicts; this isn't too difficult, but the previous code for doing this
+ // was incorrect.
+ //
+ // Note that if this check finds a live use, BB dominates Succ, so BB is
+ // something like a loop pre-header (or rarely, a part of an irreducible CFG);
+ // folding the branch isn't profitable in that case anyway.
+ if (!Succ->getSinglePredecessor()) {
+ BasicBlock::iterator BBI = BB->begin();
+ while (isa<PHINode>(*BBI)) {
+ for (Use &U : BBI->uses()) {
+ if (PHINode* PN = dyn_cast<PHINode>(U.getUser())) {
+ if (PN->getIncomingBlock(U) != BB)
+ return false;
+ } else {
+ return false;
+ }
+ }
+ ++BBI;
+ }
+ }
+
+ // 'BB' and 'BB->Pred' are loop latches, bail out to presrve inner loop
+ // metadata.
+ //
+ // FIXME: This is a stop-gap solution to preserve inner-loop metadata given
+ // current status (that loop metadata is implemented as metadata attached to
+ // the branch instruction in the loop latch block). To quote from review
+ // comments, "the current representation of loop metadata (using a loop latch
+ // terminator attachment) is known to be fundamentally broken. Loop latches
+ // are not uniquely associated with loops (both in that a latch can be part of
+ // multiple loops and a loop may have multiple latches). Loop headers are. The
+ // solution to this problem is also known: Add support for basic block
+ // metadata, and attach loop metadata to the loop header."
+ //
+ // Why bail out:
+ // In this case, we expect 'BB' is the latch for outer-loop and 'BB->Pred' is
+ // the latch for inner-loop (see reason below), so bail out to prerserve
+ // inner-loop metadata rather than eliminating 'BB' and attaching its metadata
+ // to this inner-loop.
+ // - The reason we believe 'BB' and 'BB->Pred' have different inner-most
+ // loops: assuming 'BB' and 'BB->Pred' are from the same inner-most loop L,
+ // then 'BB' is the header and latch of 'L' and thereby 'L' must consist of
+ // one self-looping basic block, which is contradictory with the assumption.
+ //
+ // To illustrate how inner-loop metadata is dropped:
+ //
+ // CFG Before
+ //
+ // BB is while.cond.exit, attached with loop metdata md2.
+ // BB->Pred is for.body, attached with loop metadata md1.
+ //
+ // entry
+ // |
+ // v
+ // ---> while.cond -------------> while.end
+ // | |
+ // | v
+ // | while.body
+ // | |
+ // | v
+ // | for.body <---- (md1)
+ // | | |______|
+ // | v
+ // | while.cond.exit (md2)
+ // | |
+ // |_______|
+ //
+ // CFG After
+ //
+ // while.cond1 is the merge of while.cond.exit and while.cond above.
+ // for.body is attached with md2, and md1 is dropped.
+ // If LoopSimplify runs later (as a part of loop pass), it could create
+ // dedicated exits for inner-loop (essentially adding `while.cond.exit`
+ // back), but won't it won't see 'md1' nor restore it for the inner-loop.
+ //
+ // entry
+ // |
+ // v
+ // ---> while.cond1 -------------> while.end
+ // | |
+ // | v
+ // | while.body
+ // | |
+ // | v
+ // | for.body <---- (md2)
+ // |_______| |______|
+ if (Instruction *TI = BB->getTerminator())
+ if (TI->hasMetadata(LLVMContext::MD_loop))
+ for (BasicBlock *Pred : predecessors(BB))
+ if (Instruction *PredTI = Pred->getTerminator())
+ if (PredTI->hasMetadata(LLVMContext::MD_loop))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "Killing Trivial BB: \n" << *BB);
+
+ SmallVector<DominatorTree::UpdateType, 32> Updates;
+ if (DTU) {
+ // To avoid processing the same predecessor more than once.
+ SmallPtrSet<BasicBlock *, 8> SeenPreds;
+ // All predecessors of BB will be moved to Succ.
+ SmallPtrSet<BasicBlock *, 8> PredsOfSucc(pred_begin(Succ), pred_end(Succ));
+ Updates.reserve(Updates.size() + 2 * pred_size(BB) + 1);
+ for (auto *PredOfBB : predecessors(BB))
+ // This predecessor of BB may already have Succ as a successor.
+ if (!PredsOfSucc.contains(PredOfBB))
+ if (SeenPreds.insert(PredOfBB).second)
+ Updates.push_back({DominatorTree::Insert, PredOfBB, Succ});
+ SeenPreds.clear();
+ for (auto *PredOfBB : predecessors(BB))
+ if (SeenPreds.insert(PredOfBB).second)
+ Updates.push_back({DominatorTree::Delete, PredOfBB, BB});
+ Updates.push_back({DominatorTree::Delete, BB, Succ});
+ }
+
+ if (isa<PHINode>(Succ->begin())) {
+ // If there is more than one pred of succ, and there are PHI nodes in
+ // the successor, then we need to add incoming edges for the PHI nodes
+ //
+ const PredBlockVector BBPreds(predecessors(BB));
+
+ // Loop over all of the PHI nodes in the successor of BB.
+ for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+
+ redirectValuesFromPredecessorsToPhi(BB, BBPreds, PN);
+ }
+ }
+
+ if (Succ->getSinglePredecessor()) {
+ // BB is the only predecessor of Succ, so Succ will end up with exactly
+ // the same predecessors BB had.
+
+ // Copy over any phi, debug or lifetime instruction.
+ BB->getTerminator()->eraseFromParent();
+ Succ->splice(Succ->getFirstNonPHI()->getIterator(), BB);
+ } else {
+ while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) {
+ // We explicitly check for such uses in CanPropagatePredecessorsForPHIs.
+ assert(PN->use_empty() && "There shouldn't be any uses here!");
+ PN->eraseFromParent();
+ }
+ }
+
+ // If the unconditional branch we replaced contains llvm.loop metadata, we
+ // add the metadata to the branch instructions in the predecessors.
+ unsigned LoopMDKind = BB->getContext().getMDKindID("llvm.loop");
+ Instruction *TI = BB->getTerminator();
+ if (TI)
+ if (MDNode *LoopMD = TI->getMetadata(LoopMDKind))
+ for (BasicBlock *Pred : predecessors(BB))
+ Pred->getTerminator()->setMetadata(LoopMDKind, LoopMD);
+
+ // Everything that jumped to BB now goes to Succ.
+ BB->replaceAllUsesWith(Succ);
+ if (!Succ->hasName()) Succ->takeName(BB);
+
+ // Clear the successor list of BB to match updates applying to DTU later.
+ if (BB->getTerminator())
+ BB->back().eraseFromParent();
+ new UnreachableInst(BB->getContext(), BB);
+ assert(succ_empty(BB) && "The successor list of BB isn't empty before "
+ "applying corresponding DTU updates.");
+
+ if (DTU)
+ DTU->applyUpdates(Updates);
+
+ DeleteDeadBlock(BB, DTU);
+
+ return true;
+}
+
+static bool EliminateDuplicatePHINodesNaiveImpl(BasicBlock *BB) {
+ // This implementation doesn't currently consider undef operands
+ // specially. Theoretically, two phis which are identical except for
+ // one having an undef where the other doesn't could be collapsed.
+
+ bool Changed = false;
+
+ // Examine each PHI.
+ // Note that increment of I must *NOT* be in the iteration_expression, since
+ // we don't want to immediately advance when we restart from the beginning.
+ for (auto I = BB->begin(); PHINode *PN = dyn_cast<PHINode>(I);) {
+ ++I;
+ // Is there an identical PHI node in this basic block?
+ // Note that we only look in the upper square's triangle,
+ // we already checked that the lower triangle PHI's aren't identical.
+ for (auto J = I; PHINode *DuplicatePN = dyn_cast<PHINode>(J); ++J) {
+ if (!DuplicatePN->isIdenticalToWhenDefined(PN))
+ continue;
+ // A duplicate. Replace this PHI with the base PHI.
+ ++NumPHICSEs;
+ DuplicatePN->replaceAllUsesWith(PN);
+ DuplicatePN->eraseFromParent();
+ Changed = true;
+
+ // The RAUW can change PHIs that we already visited.
+ I = BB->begin();
+ break; // Start over from the beginning.
+ }
+ }
+ return Changed;
+}
+
+static bool EliminateDuplicatePHINodesSetBasedImpl(BasicBlock *BB) {
+ // This implementation doesn't currently consider undef operands
+ // specially. Theoretically, two phis which are identical except for
+ // one having an undef where the other doesn't could be collapsed.
+
+ struct PHIDenseMapInfo {
+ static PHINode *getEmptyKey() {
+ return DenseMapInfo<PHINode *>::getEmptyKey();
+ }
+
+ static PHINode *getTombstoneKey() {
+ return DenseMapInfo<PHINode *>::getTombstoneKey();
+ }
+
+ static bool isSentinel(PHINode *PN) {
+ return PN == getEmptyKey() || PN == getTombstoneKey();
+ }
+
+ // WARNING: this logic must be kept in sync with
+ // Instruction::isIdenticalToWhenDefined()!
+ static unsigned getHashValueImpl(PHINode *PN) {
+ // Compute a hash value on the operands. Instcombine will likely have
+ // sorted them, which helps expose duplicates, but we have to check all
+ // the operands to be safe in case instcombine hasn't run.
+ return static_cast<unsigned>(hash_combine(
+ hash_combine_range(PN->value_op_begin(), PN->value_op_end()),
+ hash_combine_range(PN->block_begin(), PN->block_end())));
+ }
+
+ static unsigned getHashValue(PHINode *PN) {
+#ifndef NDEBUG
+ // If -phicse-debug-hash was specified, return a constant -- this
+ // will force all hashing to collide, so we'll exhaustively search
+ // the table for a match, and the assertion in isEqual will fire if
+ // there's a bug causing equal keys to hash differently.
+ if (PHICSEDebugHash)
+ return 0;
+#endif
+ return getHashValueImpl(PN);
+ }
+
+ static bool isEqualImpl(PHINode *LHS, PHINode *RHS) {
+ if (isSentinel(LHS) || isSentinel(RHS))
+ return LHS == RHS;
+ return LHS->isIdenticalTo(RHS);
+ }
+
+ static bool isEqual(PHINode *LHS, PHINode *RHS) {
+ // These comparisons are nontrivial, so assert that equality implies
+ // hash equality (DenseMap demands this as an invariant).
+ bool Result = isEqualImpl(LHS, RHS);
+ assert(!Result || (isSentinel(LHS) && LHS == RHS) ||
+ getHashValueImpl(LHS) == getHashValueImpl(RHS));
+ return Result;
+ }
+ };
+
+ // Set of unique PHINodes.
+ DenseSet<PHINode *, PHIDenseMapInfo> PHISet;
+ PHISet.reserve(4 * PHICSENumPHISmallSize);
+
+ // Examine each PHI.
+ bool Changed = false;
+ for (auto I = BB->begin(); PHINode *PN = dyn_cast<PHINode>(I++);) {
+ auto Inserted = PHISet.insert(PN);
+ if (!Inserted.second) {
+ // A duplicate. Replace this PHI with its duplicate.
+ ++NumPHICSEs;
+ PN->replaceAllUsesWith(*Inserted.first);
+ PN->eraseFromParent();
+ Changed = true;
+
+ // The RAUW can change PHIs that we already visited. Start over from the
+ // beginning.
+ PHISet.clear();
+ I = BB->begin();
+ }
+ }
+
+ return Changed;
+}
+
+bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
+ if (
+#ifndef NDEBUG
+ !PHICSEDebugHash &&
+#endif
+ hasNItemsOrLess(BB->phis(), PHICSENumPHISmallSize))
+ return EliminateDuplicatePHINodesNaiveImpl(BB);
+ return EliminateDuplicatePHINodesSetBasedImpl(BB);
+}
+
+/// If the specified pointer points to an object that we control, try to modify
+/// the object's alignment to PrefAlign. Returns a minimum known alignment of
+/// the value after the operation, which may be lower than PrefAlign.
+///
+/// Increating value alignment isn't often possible though. If alignment is
+/// important, a more reliable approach is to simply align all global variables
+/// and allocation instructions to their preferred alignment from the beginning.
+static Align tryEnforceAlignment(Value *V, Align PrefAlign,
+ const DataLayout &DL) {
+ V = V->stripPointerCasts();
+
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
+ // TODO: Ideally, this function would not be called if PrefAlign is smaller
+ // than the current alignment, as the known bits calculation should have
+ // already taken it into account. However, this is not always the case,
+ // as computeKnownBits() has a depth limit, while stripPointerCasts()
+ // doesn't.
+ Align CurrentAlign = AI->getAlign();
+ if (PrefAlign <= CurrentAlign)
+ return CurrentAlign;
+
+ // If the preferred alignment is greater than the natural stack alignment
+ // then don't round up. This avoids dynamic stack realignment.
+ if (DL.exceedsNaturalStackAlignment(PrefAlign))
+ return CurrentAlign;
+ AI->setAlignment(PrefAlign);
+ return PrefAlign;
+ }
+
+ if (auto *GO = dyn_cast<GlobalObject>(V)) {
+ // TODO: as above, this shouldn't be necessary.
+ Align CurrentAlign = GO->getPointerAlignment(DL);
+ if (PrefAlign <= CurrentAlign)
+ return CurrentAlign;
+
+ // If there is a large requested alignment and we can, bump up the alignment
+ // of the global. If the memory we set aside for the global may not be the
+ // memory used by the final program then it is impossible for us to reliably
+ // enforce the preferred alignment.
+ if (!GO->canIncreaseAlignment())
+ return CurrentAlign;
+
+ GO->setAlignment(PrefAlign);
+ return PrefAlign;
+ }
+
+ return Align(1);
+}
+
+Align llvm::getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign,
+ const DataLayout &DL,
+ const Instruction *CxtI,
+ AssumptionCache *AC,
+ const DominatorTree *DT) {
+ assert(V->getType()->isPointerTy() &&
+ "getOrEnforceKnownAlignment expects a pointer!");
+
+ KnownBits Known = computeKnownBits(V, DL, 0, AC, CxtI, DT);
+ unsigned TrailZ = Known.countMinTrailingZeros();
+
+ // Avoid trouble with ridiculously large TrailZ values, such as
+ // those computed from a null pointer.
+ // LLVM doesn't support alignments larger than (1 << MaxAlignmentExponent).
+ TrailZ = std::min(TrailZ, +Value::MaxAlignmentExponent);
+
+ Align Alignment = Align(1ull << std::min(Known.getBitWidth() - 1, TrailZ));
+
+ if (PrefAlign && *PrefAlign > Alignment)
+ Alignment = std::max(Alignment, tryEnforceAlignment(V, *PrefAlign, DL));
+
+ // We don't need to make any adjustment.
+ return Alignment;
+}
+
+///===---------------------------------------------------------------------===//
+/// Dbg Intrinsic utilities
+///
+
+/// See if there is a dbg.value intrinsic for DIVar for the PHI node.
+static bool PhiHasDebugValue(DILocalVariable *DIVar,
+ DIExpression *DIExpr,
+ PHINode *APN) {
+ // Since we can't guarantee that the original dbg.declare intrinsic
+ // is removed by LowerDbgDeclare(), we need to make sure that we are
+ // not inserting the same dbg.value intrinsic over and over.
+ SmallVector<DbgValueInst *, 1> DbgValues;
+ findDbgValues(DbgValues, APN);
+ for (auto *DVI : DbgValues) {
+ assert(is_contained(DVI->getValues(), APN));
+ if ((DVI->getVariable() == DIVar) && (DVI->getExpression() == DIExpr))
+ return true;
+ }
+ return false;
+}
+
+/// Check if the alloc size of \p ValTy is large enough to cover the variable
+/// (or fragment of the variable) described by \p DII.
+///
+/// This is primarily intended as a helper for the different
+/// ConvertDebugDeclareToDebugValue functions. The dbg.declare/dbg.addr that is
+/// converted describes an alloca'd variable, so we need to use the
+/// alloc size of the value when doing the comparison. E.g. an i1 value will be
+/// identified as covering an n-bit fragment, if the store size of i1 is at
+/// least n bits.
+static bool valueCoversEntireFragment(Type *ValTy, DbgVariableIntrinsic *DII) {
+ const DataLayout &DL = DII->getModule()->getDataLayout();
+ TypeSize ValueSize = DL.getTypeAllocSizeInBits(ValTy);
+ if (std::optional<uint64_t> FragmentSize = DII->getFragmentSizeInBits()) {
+ assert(!ValueSize.isScalable() &&
+ "Fragments don't work on scalable types.");
+ return ValueSize.getFixedValue() >= *FragmentSize;
+ }
+ // We can't always calculate the size of the DI variable (e.g. if it is a
+ // VLA). Try to use the size of the alloca that the dbg intrinsic describes
+ // intead.
+ if (DII->isAddressOfVariable()) {
+ // DII should have exactly 1 location when it is an address.
+ assert(DII->getNumVariableLocationOps() == 1 &&
+ "address of variable must have exactly 1 location operand.");
+ if (auto *AI =
+ dyn_cast_or_null<AllocaInst>(DII->getVariableLocationOp(0))) {
+ if (std::optional<TypeSize> FragmentSize =
+ AI->getAllocationSizeInBits(DL)) {
+ return TypeSize::isKnownGE(ValueSize, *FragmentSize);
+ }
+ }
+ }
+ // Could not determine size of variable. Conservatively return false.
+ return false;
+}
+
+/// Inserts a llvm.dbg.value intrinsic before a store to an alloca'd value
+/// that has an associated llvm.dbg.declare or llvm.dbg.addr intrinsic.
+void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII,
+ StoreInst *SI, DIBuilder &Builder) {
+ assert(DII->isAddressOfVariable() || isa<DbgAssignIntrinsic>(DII));
+ auto *DIVar = DII->getVariable();
+ assert(DIVar && "Missing variable");
+ auto *DIExpr = DII->getExpression();
+ Value *DV = SI->getValueOperand();
+
+ DebugLoc NewLoc = getDebugValueLoc(DII);
+
+ if (!valueCoversEntireFragment(DV->getType(), DII)) {
+ // FIXME: If storing to a part of the variable described by the dbg.declare,
+ // then we want to insert a dbg.value for the corresponding fragment.
+ LLVM_DEBUG(dbgs() << "Failed to convert dbg.declare to dbg.value: "
+ << *DII << '\n');
+ // For now, when there is a store to parts of the variable (but we do not
+ // know which part) we insert an dbg.value intrinsic to indicate that we
+ // know nothing about the variable's content.
+ DV = UndefValue::get(DV->getType());
+ Builder.insertDbgValueIntrinsic(DV, DIVar, DIExpr, NewLoc, SI);
+ return;
+ }
+
+ Builder.insertDbgValueIntrinsic(DV, DIVar, DIExpr, NewLoc, SI);
+}
+
+/// Inserts a llvm.dbg.value intrinsic before a load of an alloca'd value
+/// that has an associated llvm.dbg.declare or llvm.dbg.addr intrinsic.
+void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII,
+ LoadInst *LI, DIBuilder &Builder) {
+ auto *DIVar = DII->getVariable();
+ auto *DIExpr = DII->getExpression();
+ assert(DIVar && "Missing variable");
+
+ if (!valueCoversEntireFragment(LI->getType(), DII)) {
+ // FIXME: If only referring to a part of the variable described by the
+ // dbg.declare, then we want to insert a dbg.value for the corresponding
+ // fragment.
+ LLVM_DEBUG(dbgs() << "Failed to convert dbg.declare to dbg.value: "
+ << *DII << '\n');
+ return;
+ }
+
+ DebugLoc NewLoc = getDebugValueLoc(DII);
+
+ // We are now tracking the loaded value instead of the address. In the
+ // future if multi-location support is added to the IR, it might be
+ // preferable to keep tracking both the loaded value and the original
+ // address in case the alloca can not be elided.
+ Instruction *DbgValue = Builder.insertDbgValueIntrinsic(
+ LI, DIVar, DIExpr, NewLoc, (Instruction *)nullptr);
+ DbgValue->insertAfter(LI);
+}
+
+/// Inserts a llvm.dbg.value intrinsic after a phi that has an associated
+/// llvm.dbg.declare or llvm.dbg.addr intrinsic.
+void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII,
+ PHINode *APN, DIBuilder &Builder) {
+ auto *DIVar = DII->getVariable();
+ auto *DIExpr = DII->getExpression();
+ assert(DIVar && "Missing variable");
+
+ if (PhiHasDebugValue(DIVar, DIExpr, APN))
+ return;
+
+ if (!valueCoversEntireFragment(APN->getType(), DII)) {
+ // FIXME: If only referring to a part of the variable described by the
+ // dbg.declare, then we want to insert a dbg.value for the corresponding
+ // fragment.
+ LLVM_DEBUG(dbgs() << "Failed to convert dbg.declare to dbg.value: "
+ << *DII << '\n');
+ return;
+ }
+
+ BasicBlock *BB = APN->getParent();
+ auto InsertionPt = BB->getFirstInsertionPt();
+
+ DebugLoc NewLoc = getDebugValueLoc(DII);
+
+ // The block may be a catchswitch block, which does not have a valid
+ // insertion point.
+ // FIXME: Insert dbg.value markers in the successors when appropriate.
+ if (InsertionPt != BB->end())
+ Builder.insertDbgValueIntrinsic(APN, DIVar, DIExpr, NewLoc, &*InsertionPt);
+}
+
+/// Determine whether this alloca is either a VLA or an array.
+static bool isArray(AllocaInst *AI) {
+ return AI->isArrayAllocation() ||
+ (AI->getAllocatedType() && AI->getAllocatedType()->isArrayTy());
+}
+
+/// Determine whether this alloca is a structure.
+static bool isStructure(AllocaInst *AI) {
+ return AI->getAllocatedType() && AI->getAllocatedType()->isStructTy();
+}
+
+/// LowerDbgDeclare - Lowers llvm.dbg.declare intrinsics into appropriate set
+/// of llvm.dbg.value intrinsics.
+bool llvm::LowerDbgDeclare(Function &F) {
+ bool Changed = false;
+ DIBuilder DIB(*F.getParent(), /*AllowUnresolved*/ false);
+ SmallVector<DbgDeclareInst *, 4> Dbgs;
+ for (auto &FI : F)
+ for (Instruction &BI : FI)
+ if (auto DDI = dyn_cast<DbgDeclareInst>(&BI))
+ Dbgs.push_back(DDI);
+
+ if (Dbgs.empty())
+ return Changed;
+
+ for (auto &I : Dbgs) {
+ DbgDeclareInst *DDI = I;
+ AllocaInst *AI = dyn_cast_or_null<AllocaInst>(DDI->getAddress());
+ // If this is an alloca for a scalar variable, insert a dbg.value
+ // at each load and store to the alloca and erase the dbg.declare.
+ // The dbg.values allow tracking a variable even if it is not
+ // stored on the stack, while the dbg.declare can only describe
+ // the stack slot (and at a lexical-scope granularity). Later
+ // passes will attempt to elide the stack slot.
+ if (!AI || isArray(AI) || isStructure(AI))
+ continue;
+
+ // A volatile load/store means that the alloca can't be elided anyway.
+ if (llvm::any_of(AI->users(), [](User *U) -> bool {
+ if (LoadInst *LI = dyn_cast<LoadInst>(U))
+ return LI->isVolatile();
+ if (StoreInst *SI = dyn_cast<StoreInst>(U))
+ return SI->isVolatile();
+ return false;
+ }))
+ continue;
+
+ SmallVector<const Value *, 8> WorkList;
+ WorkList.push_back(AI);
+ while (!WorkList.empty()) {
+ const Value *V = WorkList.pop_back_val();
+ for (const auto &AIUse : V->uses()) {
+ User *U = AIUse.getUser();
+ if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ if (AIUse.getOperandNo() == 1)
+ ConvertDebugDeclareToDebugValue(DDI, SI, DIB);
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+ ConvertDebugDeclareToDebugValue(DDI, LI, DIB);
+ } else if (CallInst *CI = dyn_cast<CallInst>(U)) {
+ // This is a call by-value or some other instruction that takes a
+ // pointer to the variable. Insert a *value* intrinsic that describes
+ // the variable by dereferencing the alloca.
+ if (!CI->isLifetimeStartOrEnd()) {
+ DebugLoc NewLoc = getDebugValueLoc(DDI);
+ auto *DerefExpr =
+ DIExpression::append(DDI->getExpression(), dwarf::DW_OP_deref);
+ DIB.insertDbgValueIntrinsic(AI, DDI->getVariable(), DerefExpr,
+ NewLoc, CI);
+ }
+ } else if (BitCastInst *BI = dyn_cast<BitCastInst>(U)) {
+ if (BI->getType()->isPointerTy())
+ WorkList.push_back(BI);
+ }
+ }
+ }
+ DDI->eraseFromParent();
+ Changed = true;
+ }
+
+ if (Changed)
+ for (BasicBlock &BB : F)
+ RemoveRedundantDbgInstrs(&BB);
+
+ return Changed;
+}
+
+/// Propagate dbg.value intrinsics through the newly inserted PHIs.
+void llvm::insertDebugValuesForPHIs(BasicBlock *BB,
+ SmallVectorImpl<PHINode *> &InsertedPHIs) {
+ assert(BB && "No BasicBlock to clone dbg.value(s) from.");
+ if (InsertedPHIs.size() == 0)
+ return;
+
+ // Map existing PHI nodes to their dbg.values.
+ ValueToValueMapTy DbgValueMap;
+ for (auto &I : *BB) {
+ if (auto DbgII = dyn_cast<DbgVariableIntrinsic>(&I)) {
+ for (Value *V : DbgII->location_ops())
+ if (auto *Loc = dyn_cast_or_null<PHINode>(V))
+ DbgValueMap.insert({Loc, DbgII});
+ }
+ }
+ if (DbgValueMap.size() == 0)
+ return;
+
+ // Map a pair of the destination BB and old dbg.value to the new dbg.value,
+ // so that if a dbg.value is being rewritten to use more than one of the
+ // inserted PHIs in the same destination BB, we can update the same dbg.value
+ // with all the new PHIs instead of creating one copy for each.
+ MapVector<std::pair<BasicBlock *, DbgVariableIntrinsic *>,
+ DbgVariableIntrinsic *>
+ NewDbgValueMap;
+ // Then iterate through the new PHIs and look to see if they use one of the
+ // previously mapped PHIs. If so, create a new dbg.value intrinsic that will
+ // propagate the info through the new PHI. If we use more than one new PHI in
+ // a single destination BB with the same old dbg.value, merge the updates so
+ // that we get a single new dbg.value with all the new PHIs.
+ for (auto *PHI : InsertedPHIs) {
+ BasicBlock *Parent = PHI->getParent();
+ // Avoid inserting an intrinsic into an EH block.
+ if (Parent->getFirstNonPHI()->isEHPad())
+ continue;
+ for (auto *VI : PHI->operand_values()) {
+ auto V = DbgValueMap.find(VI);
+ if (V != DbgValueMap.end()) {
+ auto *DbgII = cast<DbgVariableIntrinsic>(V->second);
+ auto NewDI = NewDbgValueMap.find({Parent, DbgII});
+ if (NewDI == NewDbgValueMap.end()) {
+ auto *NewDbgII = cast<DbgVariableIntrinsic>(DbgII->clone());
+ NewDI = NewDbgValueMap.insert({{Parent, DbgII}, NewDbgII}).first;
+ }
+ DbgVariableIntrinsic *NewDbgII = NewDI->second;
+ // If PHI contains VI as an operand more than once, we may
+ // replaced it in NewDbgII; confirm that it is present.
+ if (is_contained(NewDbgII->location_ops(), VI))
+ NewDbgII->replaceVariableLocationOp(VI, PHI);
+ }
+ }
+ }
+ // Insert thew new dbg.values into their destination blocks.
+ for (auto DI : NewDbgValueMap) {
+ BasicBlock *Parent = DI.first.first;
+ auto *NewDbgII = DI.second;
+ auto InsertionPt = Parent->getFirstInsertionPt();
+ assert(InsertionPt != Parent->end() && "Ill-formed basic block");
+ NewDbgII->insertBefore(&*InsertionPt);
+ }
+}
+
+bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress,
+ DIBuilder &Builder, uint8_t DIExprFlags,
+ int Offset) {
+ auto DbgAddrs = FindDbgAddrUses(Address);
+ for (DbgVariableIntrinsic *DII : DbgAddrs) {
+ const DebugLoc &Loc = DII->getDebugLoc();
+ auto *DIVar = DII->getVariable();
+ auto *DIExpr = DII->getExpression();
+ assert(DIVar && "Missing variable");
+ DIExpr = DIExpression::prepend(DIExpr, DIExprFlags, Offset);
+ // Insert llvm.dbg.declare immediately before DII, and remove old
+ // llvm.dbg.declare.
+ Builder.insertDeclare(NewAddress, DIVar, DIExpr, Loc, DII);
+ DII->eraseFromParent();
+ }
+ return !DbgAddrs.empty();
+}
+
+static void replaceOneDbgValueForAlloca(DbgValueInst *DVI, Value *NewAddress,
+ DIBuilder &Builder, int Offset) {
+ const DebugLoc &Loc = DVI->getDebugLoc();
+ auto *DIVar = DVI->getVariable();
+ auto *DIExpr = DVI->getExpression();
+ assert(DIVar && "Missing variable");
+
+ // This is an alloca-based llvm.dbg.value. The first thing it should do with
+ // the alloca pointer is dereference it. Otherwise we don't know how to handle
+ // it and give up.
+ if (!DIExpr || DIExpr->getNumElements() < 1 ||
+ DIExpr->getElement(0) != dwarf::DW_OP_deref)
+ return;
+
+ // Insert the offset before the first deref.
+ // We could just change the offset argument of dbg.value, but it's unsigned...
+ if (Offset)
+ DIExpr = DIExpression::prepend(DIExpr, 0, Offset);
+
+ Builder.insertDbgValueIntrinsic(NewAddress, DIVar, DIExpr, Loc, DVI);
+ DVI->eraseFromParent();
+}
+
+void llvm::replaceDbgValueForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
+ DIBuilder &Builder, int Offset) {
+ if (auto *L = LocalAsMetadata::getIfExists(AI))
+ if (auto *MDV = MetadataAsValue::getIfExists(AI->getContext(), L))
+ for (Use &U : llvm::make_early_inc_range(MDV->uses()))
+ if (auto *DVI = dyn_cast<DbgValueInst>(U.getUser()))
+ replaceOneDbgValueForAlloca(DVI, NewAllocaAddress, Builder, Offset);
+}
+
+/// Where possible to salvage debug information for \p I do so.
+/// If not possible mark undef.
+void llvm::salvageDebugInfo(Instruction &I) {
+ SmallVector<DbgVariableIntrinsic *, 1> DbgUsers;
+ findDbgUsers(DbgUsers, &I);
+ salvageDebugInfoForDbgValues(I, DbgUsers);
+}
+
+/// Salvage the address component of \p DAI.
+static void salvageDbgAssignAddress(DbgAssignIntrinsic *DAI) {
+ Instruction *I = dyn_cast<Instruction>(DAI->getAddress());
+ // Only instructions can be salvaged at the moment.
+ if (!I)
+ return;
+
+ assert(!DAI->getAddressExpression()->getFragmentInfo().has_value() &&
+ "address-expression shouldn't have fragment info");
+
+ // The address component of a dbg.assign cannot be variadic.
+ uint64_t CurrentLocOps = 0;
+ SmallVector<Value *, 4> AdditionalValues;
+ SmallVector<uint64_t, 16> Ops;
+ Value *NewV = salvageDebugInfoImpl(*I, CurrentLocOps, Ops, AdditionalValues);
+
+ // Check if the salvage failed.
+ if (!NewV)
+ return;
+
+ DIExpression *SalvagedExpr = DIExpression::appendOpsToArg(
+ DAI->getAddressExpression(), Ops, 0, /*StackValue=*/false);
+ assert(!SalvagedExpr->getFragmentInfo().has_value() &&
+ "address-expression shouldn't have fragment info");
+
+ // Salvage succeeds if no additional values are required.
+ if (AdditionalValues.empty()) {
+ DAI->setAddress(NewV);
+ DAI->setAddressExpression(SalvagedExpr);
+ } else {
+ DAI->setKillAddress();
+ }
+}
+
+void llvm::salvageDebugInfoForDbgValues(
+ Instruction &I, ArrayRef<DbgVariableIntrinsic *> DbgUsers) {
+ // These are arbitrary chosen limits on the maximum number of values and the
+ // maximum size of a debug expression we can salvage up to, used for
+ // performance reasons.
+ const unsigned MaxDebugArgs = 16;
+ const unsigned MaxExpressionSize = 128;
+ bool Salvaged = false;
+
+ for (auto *DII : DbgUsers) {
+ if (auto *DAI = dyn_cast<DbgAssignIntrinsic>(DII)) {
+ if (DAI->getAddress() == &I) {
+ salvageDbgAssignAddress(DAI);
+ Salvaged = true;
+ }
+ if (DAI->getValue() != &I)
+ continue;
+ }
+
+ // Do not add DW_OP_stack_value for DbgDeclare and DbgAddr, because they
+ // are implicitly pointing out the value as a DWARF memory location
+ // description.
+ bool StackValue = isa<DbgValueInst>(DII);
+ auto DIILocation = DII->location_ops();
+ assert(
+ is_contained(DIILocation, &I) &&
+ "DbgVariableIntrinsic must use salvaged instruction as its location");
+ SmallVector<Value *, 4> AdditionalValues;
+ // `I` may appear more than once in DII's location ops, and each use of `I`
+ // must be updated in the DIExpression and potentially have additional
+ // values added; thus we call salvageDebugInfoImpl for each `I` instance in
+ // DIILocation.
+ Value *Op0 = nullptr;
+ DIExpression *SalvagedExpr = DII->getExpression();
+ auto LocItr = find(DIILocation, &I);
+ while (SalvagedExpr && LocItr != DIILocation.end()) {
+ SmallVector<uint64_t, 16> Ops;
+ unsigned LocNo = std::distance(DIILocation.begin(), LocItr);
+ uint64_t CurrentLocOps = SalvagedExpr->getNumLocationOperands();
+ Op0 = salvageDebugInfoImpl(I, CurrentLocOps, Ops, AdditionalValues);
+ if (!Op0)
+ break;
+ SalvagedExpr =
+ DIExpression::appendOpsToArg(SalvagedExpr, Ops, LocNo, StackValue);
+ LocItr = std::find(++LocItr, DIILocation.end(), &I);
+ }
+ // salvageDebugInfoImpl should fail on examining the first element of
+ // DbgUsers, or none of them.
+ if (!Op0)
+ break;
+
+ DII->replaceVariableLocationOp(&I, Op0);
+ bool IsValidSalvageExpr = SalvagedExpr->getNumElements() <= MaxExpressionSize;
+ if (AdditionalValues.empty() && IsValidSalvageExpr) {
+ DII->setExpression(SalvagedExpr);
+ } else if (isa<DbgValueInst>(DII) && !isa<DbgAssignIntrinsic>(DII) &&
+ IsValidSalvageExpr &&
+ DII->getNumVariableLocationOps() + AdditionalValues.size() <=
+ MaxDebugArgs) {
+ DII->addVariableLocationOps(AdditionalValues, SalvagedExpr);
+ } else {
+ // Do not salvage using DIArgList for dbg.addr/dbg.declare, as it is
+ // not currently supported in those instructions. Do not salvage using
+ // DIArgList for dbg.assign yet. FIXME: support this.
+ // Also do not salvage if the resulting DIArgList would contain an
+ // unreasonably large number of values.
+ DII->setKillLocation();
+ }
+ LLVM_DEBUG(dbgs() << "SALVAGE: " << *DII << '\n');
+ Salvaged = true;
+ }
+
+ if (Salvaged)
+ return;
+
+ for (auto *DII : DbgUsers)
+ DII->setKillLocation();
+}
+
+Value *getSalvageOpsForGEP(GetElementPtrInst *GEP, const DataLayout &DL,
+ uint64_t CurrentLocOps,
+ SmallVectorImpl<uint64_t> &Opcodes,
+ SmallVectorImpl<Value *> &AdditionalValues) {
+ unsigned BitWidth = DL.getIndexSizeInBits(GEP->getPointerAddressSpace());
+ // Rewrite a GEP into a DIExpression.
+ MapVector<Value *, APInt> VariableOffsets;
+ APInt ConstantOffset(BitWidth, 0);
+ if (!GEP->collectOffset(DL, BitWidth, VariableOffsets, ConstantOffset))
+ return nullptr;
+ if (!VariableOffsets.empty() && !CurrentLocOps) {
+ Opcodes.insert(Opcodes.begin(), {dwarf::DW_OP_LLVM_arg, 0});
+ CurrentLocOps = 1;
+ }
+ for (auto Offset : VariableOffsets) {
+ AdditionalValues.push_back(Offset.first);
+ assert(Offset.second.isStrictlyPositive() &&
+ "Expected strictly positive multiplier for offset.");
+ Opcodes.append({dwarf::DW_OP_LLVM_arg, CurrentLocOps++, dwarf::DW_OP_constu,
+ Offset.second.getZExtValue(), dwarf::DW_OP_mul,
+ dwarf::DW_OP_plus});
+ }
+ DIExpression::appendOffset(Opcodes, ConstantOffset.getSExtValue());
+ return GEP->getOperand(0);
+}
+
+uint64_t getDwarfOpForBinOp(Instruction::BinaryOps Opcode) {
+ switch (Opcode) {
+ case Instruction::Add:
+ return dwarf::DW_OP_plus;
+ case Instruction::Sub:
+ return dwarf::DW_OP_minus;
+ case Instruction::Mul:
+ return dwarf::DW_OP_mul;
+ case Instruction::SDiv:
+ return dwarf::DW_OP_div;
+ case Instruction::SRem:
+ return dwarf::DW_OP_mod;
+ case Instruction::Or:
+ return dwarf::DW_OP_or;
+ case Instruction::And:
+ return dwarf::DW_OP_and;
+ case Instruction::Xor:
+ return dwarf::DW_OP_xor;
+ case Instruction::Shl:
+ return dwarf::DW_OP_shl;
+ case Instruction::LShr:
+ return dwarf::DW_OP_shr;
+ case Instruction::AShr:
+ return dwarf::DW_OP_shra;
+ default:
+ // TODO: Salvage from each kind of binop we know about.
+ return 0;
+ }
+}
+
+Value *getSalvageOpsForBinOp(BinaryOperator *BI, uint64_t CurrentLocOps,
+ SmallVectorImpl<uint64_t> &Opcodes,
+ SmallVectorImpl<Value *> &AdditionalValues) {
+ // Handle binary operations with constant integer operands as a special case.
+ auto *ConstInt = dyn_cast<ConstantInt>(BI->getOperand(1));
+ // Values wider than 64 bits cannot be represented within a DIExpression.
+ if (ConstInt && ConstInt->getBitWidth() > 64)
+ return nullptr;
+
+ Instruction::BinaryOps BinOpcode = BI->getOpcode();
+ // Push any Constant Int operand onto the expression stack.
+ if (ConstInt) {
+ uint64_t Val = ConstInt->getSExtValue();
+ // Add or Sub Instructions with a constant operand can potentially be
+ // simplified.
+ if (BinOpcode == Instruction::Add || BinOpcode == Instruction::Sub) {
+ uint64_t Offset = BinOpcode == Instruction::Add ? Val : -int64_t(Val);
+ DIExpression::appendOffset(Opcodes, Offset);
+ return BI->getOperand(0);
+ }
+ Opcodes.append({dwarf::DW_OP_constu, Val});
+ } else {
+ if (!CurrentLocOps) {
+ Opcodes.append({dwarf::DW_OP_LLVM_arg, 0});
+ CurrentLocOps = 1;
+ }
+ Opcodes.append({dwarf::DW_OP_LLVM_arg, CurrentLocOps});
+ AdditionalValues.push_back(BI->getOperand(1));
+ }
+
+ // Add salvaged binary operator to expression stack, if it has a valid
+ // representation in a DIExpression.
+ uint64_t DwarfBinOp = getDwarfOpForBinOp(BinOpcode);
+ if (!DwarfBinOp)
+ return nullptr;
+ Opcodes.push_back(DwarfBinOp);
+ return BI->getOperand(0);
+}
+
+Value *llvm::salvageDebugInfoImpl(Instruction &I, uint64_t CurrentLocOps,
+ SmallVectorImpl<uint64_t> &Ops,
+ SmallVectorImpl<Value *> &AdditionalValues) {
+ auto &M = *I.getModule();
+ auto &DL = M.getDataLayout();
+
+ if (auto *CI = dyn_cast<CastInst>(&I)) {
+ Value *FromValue = CI->getOperand(0);
+ // No-op casts are irrelevant for debug info.
+ if (CI->isNoopCast(DL)) {
+ return FromValue;
+ }
+
+ Type *Type = CI->getType();
+ if (Type->isPointerTy())
+ Type = DL.getIntPtrType(Type);
+ // Casts other than Trunc, SExt, or ZExt to scalar types cannot be salvaged.
+ if (Type->isVectorTy() ||
+ !(isa<TruncInst>(&I) || isa<SExtInst>(&I) || isa<ZExtInst>(&I) ||
+ isa<IntToPtrInst>(&I) || isa<PtrToIntInst>(&I)))
+ return nullptr;
+
+ llvm::Type *FromType = FromValue->getType();
+ if (FromType->isPointerTy())
+ FromType = DL.getIntPtrType(FromType);
+
+ unsigned FromTypeBitSize = FromType->getScalarSizeInBits();
+ unsigned ToTypeBitSize = Type->getScalarSizeInBits();
+
+ auto ExtOps = DIExpression::getExtOps(FromTypeBitSize, ToTypeBitSize,
+ isa<SExtInst>(&I));
+ Ops.append(ExtOps.begin(), ExtOps.end());
+ return FromValue;
+ }
+
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(&I))
+ return getSalvageOpsForGEP(GEP, DL, CurrentLocOps, Ops, AdditionalValues);
+ if (auto *BI = dyn_cast<BinaryOperator>(&I))
+ return getSalvageOpsForBinOp(BI, CurrentLocOps, Ops, AdditionalValues);
+
+ // *Not* to do: we should not attempt to salvage load instructions,
+ // because the validity and lifetime of a dbg.value containing
+ // DW_OP_deref becomes difficult to analyze. See PR40628 for examples.
+ return nullptr;
+}
+
+/// A replacement for a dbg.value expression.
+using DbgValReplacement = std::optional<DIExpression *>;
+
+/// Point debug users of \p From to \p To using exprs given by \p RewriteExpr,
+/// possibly moving/undefing users to prevent use-before-def. Returns true if
+/// changes are made.
+static bool rewriteDebugUsers(
+ Instruction &From, Value &To, Instruction &DomPoint, DominatorTree &DT,
+ function_ref<DbgValReplacement(DbgVariableIntrinsic &DII)> RewriteExpr) {
+ // Find debug users of From.
+ SmallVector<DbgVariableIntrinsic *, 1> Users;
+ findDbgUsers(Users, &From);
+ if (Users.empty())
+ return false;
+
+ // Prevent use-before-def of To.
+ bool Changed = false;
+ SmallPtrSet<DbgVariableIntrinsic *, 1> UndefOrSalvage;
+ if (isa<Instruction>(&To)) {
+ bool DomPointAfterFrom = From.getNextNonDebugInstruction() == &DomPoint;
+
+ for (auto *DII : Users) {
+ // It's common to see a debug user between From and DomPoint. Move it
+ // after DomPoint to preserve the variable update without any reordering.
+ if (DomPointAfterFrom && DII->getNextNonDebugInstruction() == &DomPoint) {
+ LLVM_DEBUG(dbgs() << "MOVE: " << *DII << '\n');
+ DII->moveAfter(&DomPoint);
+ Changed = true;
+
+ // Users which otherwise aren't dominated by the replacement value must
+ // be salvaged or deleted.
+ } else if (!DT.dominates(&DomPoint, DII)) {
+ UndefOrSalvage.insert(DII);
+ }
+ }
+ }
+
+ // Update debug users without use-before-def risk.
+ for (auto *DII : Users) {
+ if (UndefOrSalvage.count(DII))
+ continue;
+
+ DbgValReplacement DVR = RewriteExpr(*DII);
+ if (!DVR)
+ continue;
+
+ DII->replaceVariableLocationOp(&From, &To);
+ DII->setExpression(*DVR);
+ LLVM_DEBUG(dbgs() << "REWRITE: " << *DII << '\n');
+ Changed = true;
+ }
+
+ if (!UndefOrSalvage.empty()) {
+ // Try to salvage the remaining debug users.
+ salvageDebugInfo(From);
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+/// Check if a bitcast between a value of type \p FromTy to type \p ToTy would
+/// losslessly preserve the bits and semantics of the value. This predicate is
+/// symmetric, i.e swapping \p FromTy and \p ToTy should give the same result.
+///
+/// Note that Type::canLosslesslyBitCastTo is not suitable here because it
+/// allows semantically unequivalent bitcasts, such as <2 x i64> -> <4 x i32>,
+/// and also does not allow lossless pointer <-> integer conversions.
+static bool isBitCastSemanticsPreserving(const DataLayout &DL, Type *FromTy,
+ Type *ToTy) {
+ // Trivially compatible types.
+ if (FromTy == ToTy)
+ return true;
+
+ // Handle compatible pointer <-> integer conversions.
+ if (FromTy->isIntOrPtrTy() && ToTy->isIntOrPtrTy()) {
+ bool SameSize = DL.getTypeSizeInBits(FromTy) == DL.getTypeSizeInBits(ToTy);
+ bool LosslessConversion = !DL.isNonIntegralPointerType(FromTy) &&
+ !DL.isNonIntegralPointerType(ToTy);
+ return SameSize && LosslessConversion;
+ }
+
+ // TODO: This is not exhaustive.
+ return false;
+}
+
+bool llvm::replaceAllDbgUsesWith(Instruction &From, Value &To,
+ Instruction &DomPoint, DominatorTree &DT) {
+ // Exit early if From has no debug users.
+ if (!From.isUsedByMetadata())
+ return false;
+
+ assert(&From != &To && "Can't replace something with itself");
+
+ Type *FromTy = From.getType();
+ Type *ToTy = To.getType();
+
+ auto Identity = [&](DbgVariableIntrinsic &DII) -> DbgValReplacement {
+ return DII.getExpression();
+ };
+
+ // Handle no-op conversions.
+ Module &M = *From.getModule();
+ const DataLayout &DL = M.getDataLayout();
+ if (isBitCastSemanticsPreserving(DL, FromTy, ToTy))
+ return rewriteDebugUsers(From, To, DomPoint, DT, Identity);
+
+ // Handle integer-to-integer widening and narrowing.
+ // FIXME: Use DW_OP_convert when it's available everywhere.
+ if (FromTy->isIntegerTy() && ToTy->isIntegerTy()) {
+ uint64_t FromBits = FromTy->getPrimitiveSizeInBits();
+ uint64_t ToBits = ToTy->getPrimitiveSizeInBits();
+ assert(FromBits != ToBits && "Unexpected no-op conversion");
+
+ // When the width of the result grows, assume that a debugger will only
+ // access the low `FromBits` bits when inspecting the source variable.
+ if (FromBits < ToBits)
+ return rewriteDebugUsers(From, To, DomPoint, DT, Identity);
+
+ // The width of the result has shrunk. Use sign/zero extension to describe
+ // the source variable's high bits.
+ auto SignOrZeroExt = [&](DbgVariableIntrinsic &DII) -> DbgValReplacement {
+ DILocalVariable *Var = DII.getVariable();
+
+ // Without knowing signedness, sign/zero extension isn't possible.
+ auto Signedness = Var->getSignedness();
+ if (!Signedness)
+ return std::nullopt;
+
+ bool Signed = *Signedness == DIBasicType::Signedness::Signed;
+ return DIExpression::appendExt(DII.getExpression(), ToBits, FromBits,
+ Signed);
+ };
+ return rewriteDebugUsers(From, To, DomPoint, DT, SignOrZeroExt);
+ }
+
+ // TODO: Floating-point conversions, vectors.
+ return false;
+}
+
+std::pair<unsigned, unsigned>
+llvm::removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB) {
+ unsigned NumDeadInst = 0;
+ unsigned NumDeadDbgInst = 0;
+ // Delete the instructions backwards, as it has a reduced likelihood of
+ // having to update as many def-use and use-def chains.
+ Instruction *EndInst = BB->getTerminator(); // Last not to be deleted.
+ while (EndInst != &BB->front()) {
+ // Delete the next to last instruction.
+ Instruction *Inst = &*--EndInst->getIterator();
+ if (!Inst->use_empty() && !Inst->getType()->isTokenTy())
+ Inst->replaceAllUsesWith(PoisonValue::get(Inst->getType()));
+ if (Inst->isEHPad() || Inst->getType()->isTokenTy()) {
+ EndInst = Inst;
+ continue;
+ }
+ if (isa<DbgInfoIntrinsic>(Inst))
+ ++NumDeadDbgInst;
+ else
+ ++NumDeadInst;
+ Inst->eraseFromParent();
+ }
+ return {NumDeadInst, NumDeadDbgInst};
+}
+
+unsigned llvm::changeToUnreachable(Instruction *I, bool PreserveLCSSA,
+ DomTreeUpdater *DTU,
+ MemorySSAUpdater *MSSAU) {
+ BasicBlock *BB = I->getParent();
+
+ if (MSSAU)
+ MSSAU->changeToUnreachable(I);
+
+ SmallSet<BasicBlock *, 8> UniqueSuccessors;
+
+ // Loop over all of the successors, removing BB's entry from any PHI
+ // nodes.
+ for (BasicBlock *Successor : successors(BB)) {
+ Successor->removePredecessor(BB, PreserveLCSSA);
+ if (DTU)
+ UniqueSuccessors.insert(Successor);
+ }
+ auto *UI = new UnreachableInst(I->getContext(), I);
+ UI->setDebugLoc(I->getDebugLoc());
+
+ // All instructions after this are dead.
+ unsigned NumInstrsRemoved = 0;
+ BasicBlock::iterator BBI = I->getIterator(), BBE = BB->end();
+ while (BBI != BBE) {
+ if (!BBI->use_empty())
+ BBI->replaceAllUsesWith(PoisonValue::get(BBI->getType()));
+ BBI++->eraseFromParent();
+ ++NumInstrsRemoved;
+ }
+ if (DTU) {
+ SmallVector<DominatorTree::UpdateType, 8> Updates;
+ Updates.reserve(UniqueSuccessors.size());
+ for (BasicBlock *UniqueSuccessor : UniqueSuccessors)
+ Updates.push_back({DominatorTree::Delete, BB, UniqueSuccessor});
+ DTU->applyUpdates(Updates);
+ }
+ return NumInstrsRemoved;
+}
+
+CallInst *llvm::createCallMatchingInvoke(InvokeInst *II) {
+ SmallVector<Value *, 8> Args(II->args());
+ SmallVector<OperandBundleDef, 1> OpBundles;
+ II->getOperandBundlesAsDefs(OpBundles);
+ CallInst *NewCall = CallInst::Create(II->getFunctionType(),
+ II->getCalledOperand(), Args, OpBundles);
+ NewCall->setCallingConv(II->getCallingConv());
+ NewCall->setAttributes(II->getAttributes());
+ NewCall->setDebugLoc(II->getDebugLoc());
+ NewCall->copyMetadata(*II);
+
+ // If the invoke had profile metadata, try converting them for CallInst.
+ uint64_t TotalWeight;
+ if (NewCall->extractProfTotalWeight(TotalWeight)) {
+ // Set the total weight if it fits into i32, otherwise reset.
+ MDBuilder MDB(NewCall->getContext());
+ auto NewWeights = uint32_t(TotalWeight) != TotalWeight
+ ? nullptr
+ : MDB.createBranchWeights({uint32_t(TotalWeight)});
+ NewCall->setMetadata(LLVMContext::MD_prof, NewWeights);
+ }
+
+ return NewCall;
+}
+
+// changeToCall - Convert the specified invoke into a normal call.
+CallInst *llvm::changeToCall(InvokeInst *II, DomTreeUpdater *DTU) {
+ CallInst *NewCall = createCallMatchingInvoke(II);
+ NewCall->takeName(II);
+ NewCall->insertBefore(II);
+ II->replaceAllUsesWith(NewCall);
+
+ // Follow the call by a branch to the normal destination.
+ BasicBlock *NormalDestBB = II->getNormalDest();
+ BranchInst::Create(NormalDestBB, II);
+
+ // Update PHI nodes in the unwind destination
+ BasicBlock *BB = II->getParent();
+ BasicBlock *UnwindDestBB = II->getUnwindDest();
+ UnwindDestBB->removePredecessor(BB);
+ II->eraseFromParent();
+ if (DTU)
+ DTU->applyUpdates({{DominatorTree::Delete, BB, UnwindDestBB}});
+ return NewCall;
+}
+
+BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI,
+ BasicBlock *UnwindEdge,
+ DomTreeUpdater *DTU) {
+ BasicBlock *BB = CI->getParent();
+
+ // Convert this function call into an invoke instruction. First, split the
+ // basic block.
+ BasicBlock *Split = SplitBlock(BB, CI, DTU, /*LI=*/nullptr, /*MSSAU*/ nullptr,
+ CI->getName() + ".noexc");
+
+ // Delete the unconditional branch inserted by SplitBlock
+ BB->back().eraseFromParent();
+
+ // Create the new invoke instruction.
+ SmallVector<Value *, 8> InvokeArgs(CI->args());
+ SmallVector<OperandBundleDef, 1> OpBundles;
+
+ CI->getOperandBundlesAsDefs(OpBundles);
+
+ // Note: we're round tripping operand bundles through memory here, and that
+ // can potentially be avoided with a cleverer API design that we do not have
+ // as of this time.
+
+ InvokeInst *II =
+ InvokeInst::Create(CI->getFunctionType(), CI->getCalledOperand(), Split,
+ UnwindEdge, InvokeArgs, OpBundles, CI->getName(), BB);
+ II->setDebugLoc(CI->getDebugLoc());
+ II->setCallingConv(CI->getCallingConv());
+ II->setAttributes(CI->getAttributes());
+ II->setMetadata(LLVMContext::MD_prof, CI->getMetadata(LLVMContext::MD_prof));
+
+ if (DTU)
+ DTU->applyUpdates({{DominatorTree::Insert, BB, UnwindEdge}});
+
+ // Make sure that anything using the call now uses the invoke! This also
+ // updates the CallGraph if present, because it uses a WeakTrackingVH.
+ CI->replaceAllUsesWith(II);
+
+ // Delete the original call
+ Split->front().eraseFromParent();
+ return Split;
+}
+
+static bool markAliveBlocks(Function &F,
+ SmallPtrSetImpl<BasicBlock *> &Reachable,
+ DomTreeUpdater *DTU = nullptr) {
+ SmallVector<BasicBlock*, 128> Worklist;
+ BasicBlock *BB = &F.front();
+ Worklist.push_back(BB);
+ Reachable.insert(BB);
+ bool Changed = false;
+ do {
+ BB = Worklist.pop_back_val();
+
+ // Do a quick scan of the basic block, turning any obviously unreachable
+ // instructions into LLVM unreachable insts. The instruction combining pass
+ // canonicalizes unreachable insts into stores to null or undef.
+ for (Instruction &I : *BB) {
+ if (auto *CI = dyn_cast<CallInst>(&I)) {
+ Value *Callee = CI->getCalledOperand();
+ // Handle intrinsic calls.
+ if (Function *F = dyn_cast<Function>(Callee)) {
+ auto IntrinsicID = F->getIntrinsicID();
+ // Assumptions that are known to be false are equivalent to
+ // unreachable. Also, if the condition is undefined, then we make the
+ // choice most beneficial to the optimizer, and choose that to also be
+ // unreachable.
+ if (IntrinsicID == Intrinsic::assume) {
+ if (match(CI->getArgOperand(0), m_CombineOr(m_Zero(), m_Undef()))) {
+ // Don't insert a call to llvm.trap right before the unreachable.
+ changeToUnreachable(CI, false, DTU);
+ Changed = true;
+ break;
+ }
+ } else if (IntrinsicID == Intrinsic::experimental_guard) {
+ // A call to the guard intrinsic bails out of the current
+ // compilation unit if the predicate passed to it is false. If the
+ // predicate is a constant false, then we know the guard will bail
+ // out of the current compile unconditionally, so all code following
+ // it is dead.
+ //
+ // Note: unlike in llvm.assume, it is not "obviously profitable" for
+ // guards to treat `undef` as `false` since a guard on `undef` can
+ // still be useful for widening.
+ if (match(CI->getArgOperand(0), m_Zero()))
+ if (!isa<UnreachableInst>(CI->getNextNode())) {
+ changeToUnreachable(CI->getNextNode(), false, DTU);
+ Changed = true;
+ break;
+ }
+ }
+ } else if ((isa<ConstantPointerNull>(Callee) &&
+ !NullPointerIsDefined(CI->getFunction(),
+ cast<PointerType>(Callee->getType())
+ ->getAddressSpace())) ||
+ isa<UndefValue>(Callee)) {
+ changeToUnreachable(CI, false, DTU);
+ Changed = true;
+ break;
+ }
+ if (CI->doesNotReturn() && !CI->isMustTailCall()) {
+ // If we found a call to a no-return function, insert an unreachable
+ // instruction after it. Make sure there isn't *already* one there
+ // though.
+ if (!isa<UnreachableInst>(CI->getNextNode())) {
+ // Don't insert a call to llvm.trap right before the unreachable.
+ changeToUnreachable(CI->getNextNode(), false, DTU);
+ Changed = true;
+ }
+ break;
+ }
+ } else if (auto *SI = dyn_cast<StoreInst>(&I)) {
+ // Store to undef and store to null are undefined and used to signal
+ // that they should be changed to unreachable by passes that can't
+ // modify the CFG.
+
+ // Don't touch volatile stores.
+ if (SI->isVolatile()) continue;
+
+ Value *Ptr = SI->getOperand(1);
+
+ if (isa<UndefValue>(Ptr) ||
+ (isa<ConstantPointerNull>(Ptr) &&
+ !NullPointerIsDefined(SI->getFunction(),
+ SI->getPointerAddressSpace()))) {
+ changeToUnreachable(SI, false, DTU);
+ Changed = true;
+ break;
+ }
+ }
+ }
+
+ Instruction *Terminator = BB->getTerminator();
+ if (auto *II = dyn_cast<InvokeInst>(Terminator)) {
+ // Turn invokes that call 'nounwind' functions into ordinary calls.
+ Value *Callee = II->getCalledOperand();
+ if ((isa<ConstantPointerNull>(Callee) &&
+ !NullPointerIsDefined(BB->getParent())) ||
+ isa<UndefValue>(Callee)) {
+ changeToUnreachable(II, false, DTU);
+ Changed = true;
+ } else {
+ if (II->doesNotReturn() &&
+ !isa<UnreachableInst>(II->getNormalDest()->front())) {
+ // If we found an invoke of a no-return function,
+ // create a new empty basic block with an `unreachable` terminator,
+ // and set it as the normal destination for the invoke,
+ // unless that is already the case.
+ // Note that the original normal destination could have other uses.
+ BasicBlock *OrigNormalDest = II->getNormalDest();
+ OrigNormalDest->removePredecessor(II->getParent());
+ LLVMContext &Ctx = II->getContext();
+ BasicBlock *UnreachableNormalDest = BasicBlock::Create(
+ Ctx, OrigNormalDest->getName() + ".unreachable",
+ II->getFunction(), OrigNormalDest);
+ new UnreachableInst(Ctx, UnreachableNormalDest);
+ II->setNormalDest(UnreachableNormalDest);
+ if (DTU)
+ DTU->applyUpdates(
+ {{DominatorTree::Delete, BB, OrigNormalDest},
+ {DominatorTree::Insert, BB, UnreachableNormalDest}});
+ Changed = true;
+ }
+ if (II->doesNotThrow() && canSimplifyInvokeNoUnwind(&F)) {
+ if (II->use_empty() && !II->mayHaveSideEffects()) {
+ // jump to the normal destination branch.
+ BasicBlock *NormalDestBB = II->getNormalDest();
+ BasicBlock *UnwindDestBB = II->getUnwindDest();
+ BranchInst::Create(NormalDestBB, II);
+ UnwindDestBB->removePredecessor(II->getParent());
+ II->eraseFromParent();
+ if (DTU)
+ DTU->applyUpdates({{DominatorTree::Delete, BB, UnwindDestBB}});
+ } else
+ changeToCall(II, DTU);
+ Changed = true;
+ }
+ }
+ } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Terminator)) {
+ // Remove catchpads which cannot be reached.
+ struct CatchPadDenseMapInfo {
+ static CatchPadInst *getEmptyKey() {
+ return DenseMapInfo<CatchPadInst *>::getEmptyKey();
+ }
+
+ static CatchPadInst *getTombstoneKey() {
+ return DenseMapInfo<CatchPadInst *>::getTombstoneKey();
+ }
+
+ static unsigned getHashValue(CatchPadInst *CatchPad) {
+ return static_cast<unsigned>(hash_combine_range(
+ CatchPad->value_op_begin(), CatchPad->value_op_end()));
+ }
+
+ static bool isEqual(CatchPadInst *LHS, CatchPadInst *RHS) {
+ if (LHS == getEmptyKey() || LHS == getTombstoneKey() ||
+ RHS == getEmptyKey() || RHS == getTombstoneKey())
+ return LHS == RHS;
+ return LHS->isIdenticalTo(RHS);
+ }
+ };
+
+ SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
+ // Set of unique CatchPads.
+ SmallDenseMap<CatchPadInst *, detail::DenseSetEmpty, 4,
+ CatchPadDenseMapInfo, detail::DenseSetPair<CatchPadInst *>>
+ HandlerSet;
+ detail::DenseSetEmpty Empty;
+ for (CatchSwitchInst::handler_iterator I = CatchSwitch->handler_begin(),
+ E = CatchSwitch->handler_end();
+ I != E; ++I) {
+ BasicBlock *HandlerBB = *I;
+ if (DTU)
+ ++NumPerSuccessorCases[HandlerBB];
+ auto *CatchPad = cast<CatchPadInst>(HandlerBB->getFirstNonPHI());
+ if (!HandlerSet.insert({CatchPad, Empty}).second) {
+ if (DTU)
+ --NumPerSuccessorCases[HandlerBB];
+ CatchSwitch->removeHandler(I);
+ --I;
+ --E;
+ Changed = true;
+ }
+ }
+ if (DTU) {
+ std::vector<DominatorTree::UpdateType> Updates;
+ for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases)
+ if (I.second == 0)
+ Updates.push_back({DominatorTree::Delete, BB, I.first});
+ DTU->applyUpdates(Updates);
+ }
+ }
+
+ Changed |= ConstantFoldTerminator(BB, true, nullptr, DTU);
+ for (BasicBlock *Successor : successors(BB))
+ if (Reachable.insert(Successor).second)
+ Worklist.push_back(Successor);
+ } while (!Worklist.empty());
+ return Changed;
+}
+
+Instruction *llvm::removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU) {
+ Instruction *TI = BB->getTerminator();
+
+ if (auto *II = dyn_cast<InvokeInst>(TI))
+ return changeToCall(II, DTU);
+
+ Instruction *NewTI;
+ BasicBlock *UnwindDest;
+
+ if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) {
+ NewTI = CleanupReturnInst::Create(CRI->getCleanupPad(), nullptr, CRI);
+ UnwindDest = CRI->getUnwindDest();
+ } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(TI)) {
+ auto *NewCatchSwitch = CatchSwitchInst::Create(
+ CatchSwitch->getParentPad(), nullptr, CatchSwitch->getNumHandlers(),
+ CatchSwitch->getName(), CatchSwitch);
+ for (BasicBlock *PadBB : CatchSwitch->handlers())
+ NewCatchSwitch->addHandler(PadBB);
+
+ NewTI = NewCatchSwitch;
+ UnwindDest = CatchSwitch->getUnwindDest();
+ } else {
+ llvm_unreachable("Could not find unwind successor");
+ }
+
+ NewTI->takeName(TI);
+ NewTI->setDebugLoc(TI->getDebugLoc());
+ UnwindDest->removePredecessor(BB);
+ TI->replaceAllUsesWith(NewTI);
+ TI->eraseFromParent();
+ if (DTU)
+ DTU->applyUpdates({{DominatorTree::Delete, BB, UnwindDest}});
+ return NewTI;
+}
+
+/// removeUnreachableBlocks - Remove blocks that are not reachable, even
+/// if they are in a dead cycle. Return true if a change was made, false
+/// otherwise.
+bool llvm::removeUnreachableBlocks(Function &F, DomTreeUpdater *DTU,
+ MemorySSAUpdater *MSSAU) {
+ SmallPtrSet<BasicBlock *, 16> Reachable;
+ bool Changed = markAliveBlocks(F, Reachable, DTU);
+
+ // If there are unreachable blocks in the CFG...
+ if (Reachable.size() == F.size())
+ return Changed;
+
+ assert(Reachable.size() < F.size());
+
+ // Are there any blocks left to actually delete?
+ SmallSetVector<BasicBlock *, 8> BlocksToRemove;
+ for (BasicBlock &BB : F) {
+ // Skip reachable basic blocks
+ if (Reachable.count(&BB))
+ continue;
+ // Skip already-deleted blocks
+ if (DTU && DTU->isBBPendingDeletion(&BB))
+ continue;
+ BlocksToRemove.insert(&BB);
+ }
+
+ if (BlocksToRemove.empty())
+ return Changed;
+
+ Changed = true;
+ NumRemoved += BlocksToRemove.size();
+
+ if (MSSAU)
+ MSSAU->removeBlocks(BlocksToRemove);
+
+ DeleteDeadBlocks(BlocksToRemove.takeVector(), DTU);
+
+ return Changed;
+}
+
+void llvm::combineMetadata(Instruction *K, const Instruction *J,
+ ArrayRef<unsigned> KnownIDs, bool DoesKMove) {
+ SmallVector<std::pair<unsigned, MDNode *>, 4> Metadata;
+ K->dropUnknownNonDebugMetadata(KnownIDs);
+ K->getAllMetadataOtherThanDebugLoc(Metadata);
+ for (const auto &MD : Metadata) {
+ unsigned Kind = MD.first;
+ MDNode *JMD = J->getMetadata(Kind);
+ MDNode *KMD = MD.second;
+
+ switch (Kind) {
+ default:
+ K->setMetadata(Kind, nullptr); // Remove unknown metadata
+ break;
+ case LLVMContext::MD_dbg:
+ llvm_unreachable("getAllMetadataOtherThanDebugLoc returned a MD_dbg");
+ case LLVMContext::MD_DIAssignID:
+ K->mergeDIAssignID(J);
+ break;
+ case LLVMContext::MD_tbaa:
+ K->setMetadata(Kind, MDNode::getMostGenericTBAA(JMD, KMD));
+ break;
+ case LLVMContext::MD_alias_scope:
+ K->setMetadata(Kind, MDNode::getMostGenericAliasScope(JMD, KMD));
+ break;
+ case LLVMContext::MD_noalias:
+ case LLVMContext::MD_mem_parallel_loop_access:
+ K->setMetadata(Kind, MDNode::intersect(JMD, KMD));
+ break;
+ case LLVMContext::MD_access_group:
+ K->setMetadata(LLVMContext::MD_access_group,
+ intersectAccessGroups(K, J));
+ break;
+ case LLVMContext::MD_range:
+
+ // If K does move, use most generic range. Otherwise keep the range of
+ // K.
+ if (DoesKMove)
+ // FIXME: If K does move, we should drop the range info and nonnull.
+ // Currently this function is used with DoesKMove in passes
+ // doing hoisting/sinking and the current behavior of using the
+ // most generic range is correct in those cases.
+ K->setMetadata(Kind, MDNode::getMostGenericRange(JMD, KMD));
+ break;
+ case LLVMContext::MD_fpmath:
+ K->setMetadata(Kind, MDNode::getMostGenericFPMath(JMD, KMD));
+ break;
+ case LLVMContext::MD_invariant_load:
+ // Only set the !invariant.load if it is present in both instructions.
+ K->setMetadata(Kind, JMD);
+ break;
+ case LLVMContext::MD_nonnull:
+ // If K does move, keep nonull if it is present in both instructions.
+ if (DoesKMove)
+ K->setMetadata(Kind, JMD);
+ break;
+ case LLVMContext::MD_invariant_group:
+ // Preserve !invariant.group in K.
+ break;
+ case LLVMContext::MD_align:
+ K->setMetadata(Kind,
+ MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD));
+ break;
+ case LLVMContext::MD_dereferenceable:
+ case LLVMContext::MD_dereferenceable_or_null:
+ K->setMetadata(Kind,
+ MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD));
+ break;
+ case LLVMContext::MD_preserve_access_index:
+ // Preserve !preserve.access.index in K.
+ break;
+ }
+ }
+ // Set !invariant.group from J if J has it. If both instructions have it
+ // then we will just pick it from J - even when they are different.
+ // Also make sure that K is load or store - f.e. combining bitcast with load
+ // could produce bitcast with invariant.group metadata, which is invalid.
+ // FIXME: we should try to preserve both invariant.group md if they are
+ // different, but right now instruction can only have one invariant.group.
+ if (auto *JMD = J->getMetadata(LLVMContext::MD_invariant_group))
+ if (isa<LoadInst>(K) || isa<StoreInst>(K))
+ K->setMetadata(LLVMContext::MD_invariant_group, JMD);
+}
+
+void llvm::combineMetadataForCSE(Instruction *K, const Instruction *J,
+ bool KDominatesJ) {
+ unsigned KnownIDs[] = {
+ LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
+ LLVMContext::MD_noalias, LLVMContext::MD_range,
+ LLVMContext::MD_invariant_load, LLVMContext::MD_nonnull,
+ LLVMContext::MD_invariant_group, LLVMContext::MD_align,
+ LLVMContext::MD_dereferenceable,
+ LLVMContext::MD_dereferenceable_or_null,
+ LLVMContext::MD_access_group, LLVMContext::MD_preserve_access_index};
+ combineMetadata(K, J, KnownIDs, KDominatesJ);
+}
+
+void llvm::copyMetadataForLoad(LoadInst &Dest, const LoadInst &Source) {
+ SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
+ Source.getAllMetadata(MD);
+ MDBuilder MDB(Dest.getContext());
+ Type *NewType = Dest.getType();
+ const DataLayout &DL = Source.getModule()->getDataLayout();
+ for (const auto &MDPair : MD) {
+ unsigned ID = MDPair.first;
+ MDNode *N = MDPair.second;
+ // Note, essentially every kind of metadata should be preserved here! This
+ // routine is supposed to clone a load instruction changing *only its type*.
+ // The only metadata it makes sense to drop is metadata which is invalidated
+ // when the pointer type changes. This should essentially never be the case
+ // in LLVM, but we explicitly switch over only known metadata to be
+ // conservatively correct. If you are adding metadata to LLVM which pertains
+ // to loads, you almost certainly want to add it here.
+ switch (ID) {
+ case LLVMContext::MD_dbg:
+ case LLVMContext::MD_tbaa:
+ case LLVMContext::MD_prof:
+ case LLVMContext::MD_fpmath:
+ case LLVMContext::MD_tbaa_struct:
+ case LLVMContext::MD_invariant_load:
+ case LLVMContext::MD_alias_scope:
+ case LLVMContext::MD_noalias:
+ case LLVMContext::MD_nontemporal:
+ case LLVMContext::MD_mem_parallel_loop_access:
+ case LLVMContext::MD_access_group:
+ case LLVMContext::MD_noundef:
+ // All of these directly apply.
+ Dest.setMetadata(ID, N);
+ break;
+
+ case LLVMContext::MD_nonnull:
+ copyNonnullMetadata(Source, N, Dest);
+ break;
+
+ case LLVMContext::MD_align:
+ case LLVMContext::MD_dereferenceable:
+ case LLVMContext::MD_dereferenceable_or_null:
+ // These only directly apply if the new type is also a pointer.
+ if (NewType->isPointerTy())
+ Dest.setMetadata(ID, N);
+ break;
+
+ case LLVMContext::MD_range:
+ copyRangeMetadata(DL, Source, N, Dest);
+ break;
+ }
+ }
+}
+
+void llvm::patchReplacementInstruction(Instruction *I, Value *Repl) {
+ auto *ReplInst = dyn_cast<Instruction>(Repl);
+ if (!ReplInst)
+ return;
+
+ // Patch the replacement so that it is not more restrictive than the value
+ // being replaced.
+ // Note that if 'I' is a load being replaced by some operation,
+ // for example, by an arithmetic operation, then andIRFlags()
+ // would just erase all math flags from the original arithmetic
+ // operation, which is clearly not wanted and not needed.
+ if (!isa<LoadInst>(I))
+ ReplInst->andIRFlags(I);
+
+ // FIXME: If both the original and replacement value are part of the
+ // same control-flow region (meaning that the execution of one
+ // guarantees the execution of the other), then we can combine the
+ // noalias scopes here and do better than the general conservative
+ // answer used in combineMetadata().
+
+ // In general, GVN unifies expressions over different control-flow
+ // regions, and so we need a conservative combination of the noalias
+ // scopes.
+ static const unsigned KnownIDs[] = {
+ LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
+ LLVMContext::MD_noalias, LLVMContext::MD_range,
+ LLVMContext::MD_fpmath, LLVMContext::MD_invariant_load,
+ LLVMContext::MD_invariant_group, LLVMContext::MD_nonnull,
+ LLVMContext::MD_access_group, LLVMContext::MD_preserve_access_index};
+ combineMetadata(ReplInst, I, KnownIDs, false);
+}
+
+template <typename RootType, typename DominatesFn>
+static unsigned replaceDominatedUsesWith(Value *From, Value *To,
+ const RootType &Root,
+ const DominatesFn &Dominates) {
+ assert(From->getType() == To->getType());
+
+ unsigned Count = 0;
+ for (Use &U : llvm::make_early_inc_range(From->uses())) {
+ if (!Dominates(Root, U))
+ continue;
+ U.set(To);
+ LLVM_DEBUG(dbgs() << "Replace dominated use of '" << From->getName()
+ << "' as " << *To << " in " << *U << "\n");
+ ++Count;
+ }
+ return Count;
+}
+
+unsigned llvm::replaceNonLocalUsesWith(Instruction *From, Value *To) {
+ assert(From->getType() == To->getType());
+ auto *BB = From->getParent();
+ unsigned Count = 0;
+
+ for (Use &U : llvm::make_early_inc_range(From->uses())) {
+ auto *I = cast<Instruction>(U.getUser());
+ if (I->getParent() == BB)
+ continue;
+ U.set(To);
+ ++Count;
+ }
+ return Count;
+}
+
+unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To,
+ DominatorTree &DT,
+ const BasicBlockEdge &Root) {
+ auto Dominates = [&DT](const BasicBlockEdge &Root, const Use &U) {
+ return DT.dominates(Root, U);
+ };
+ return ::replaceDominatedUsesWith(From, To, Root, Dominates);
+}
+
+unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To,
+ DominatorTree &DT,
+ const BasicBlock *BB) {
+ auto Dominates = [&DT](const BasicBlock *BB, const Use &U) {
+ return DT.dominates(BB, U);
+ };
+ return ::replaceDominatedUsesWith(From, To, BB, Dominates);
+}
+
+bool llvm::callsGCLeafFunction(const CallBase *Call,
+ const TargetLibraryInfo &TLI) {
+ // Check if the function is specifically marked as a gc leaf function.
+ if (Call->hasFnAttr("gc-leaf-function"))
+ return true;
+ if (const Function *F = Call->getCalledFunction()) {
+ if (F->hasFnAttribute("gc-leaf-function"))
+ return true;
+
+ if (auto IID = F->getIntrinsicID()) {
+ // Most LLVM intrinsics do not take safepoints.
+ return IID != Intrinsic::experimental_gc_statepoint &&
+ IID != Intrinsic::experimental_deoptimize &&
+ IID != Intrinsic::memcpy_element_unordered_atomic &&
+ IID != Intrinsic::memmove_element_unordered_atomic;
+ }
+ }
+
+ // Lib calls can be materialized by some passes, and won't be
+ // marked as 'gc-leaf-function.' All available Libcalls are
+ // GC-leaf.
+ LibFunc LF;
+ if (TLI.getLibFunc(*Call, LF)) {
+ return TLI.has(LF);
+ }
+
+ return false;
+}
+
+void llvm::copyNonnullMetadata(const LoadInst &OldLI, MDNode *N,
+ LoadInst &NewLI) {
+ auto *NewTy = NewLI.getType();
+
+ // This only directly applies if the new type is also a pointer.
+ if (NewTy->isPointerTy()) {
+ NewLI.setMetadata(LLVMContext::MD_nonnull, N);
+ return;
+ }
+
+ // The only other translation we can do is to integral loads with !range
+ // metadata.
+ if (!NewTy->isIntegerTy())
+ return;
+
+ MDBuilder MDB(NewLI.getContext());
+ const Value *Ptr = OldLI.getPointerOperand();
+ auto *ITy = cast<IntegerType>(NewTy);
+ auto *NullInt = ConstantExpr::getPtrToInt(
+ ConstantPointerNull::get(cast<PointerType>(Ptr->getType())), ITy);
+ auto *NonNullInt = ConstantExpr::getAdd(NullInt, ConstantInt::get(ITy, 1));
+ NewLI.setMetadata(LLVMContext::MD_range,
+ MDB.createRange(NonNullInt, NullInt));
+}
+
+void llvm::copyRangeMetadata(const DataLayout &DL, const LoadInst &OldLI,
+ MDNode *N, LoadInst &NewLI) {
+ auto *NewTy = NewLI.getType();
+ // Simply copy the metadata if the type did not change.
+ if (NewTy == OldLI.getType()) {
+ NewLI.setMetadata(LLVMContext::MD_range, N);
+ return;
+ }
+
+ // Give up unless it is converted to a pointer where there is a single very
+ // valuable mapping we can do reliably.
+ // FIXME: It would be nice to propagate this in more ways, but the type
+ // conversions make it hard.
+ if (!NewTy->isPointerTy())
+ return;
+
+ unsigned BitWidth = DL.getPointerTypeSizeInBits(NewTy);
+ if (!getConstantRangeFromMetadata(*N).contains(APInt(BitWidth, 0))) {
+ MDNode *NN = MDNode::get(OldLI.getContext(), std::nullopt);
+ NewLI.setMetadata(LLVMContext::MD_nonnull, NN);
+ }
+}
+
+void llvm::dropDebugUsers(Instruction &I) {
+ SmallVector<DbgVariableIntrinsic *, 1> DbgUsers;
+ findDbgUsers(DbgUsers, &I);
+ for (auto *DII : DbgUsers)
+ DII->eraseFromParent();
+}
+
+void llvm::hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt,
+ BasicBlock *BB) {
+ // Since we are moving the instructions out of its basic block, we do not
+ // retain their original debug locations (DILocations) and debug intrinsic
+ // instructions.
+ //
+ // Doing so would degrade the debugging experience and adversely affect the
+ // accuracy of profiling information.
+ //
+ // Currently, when hoisting the instructions, we take the following actions:
+ // - Remove their debug intrinsic instructions.
+ // - Set their debug locations to the values from the insertion point.
+ //
+ // As per PR39141 (comment #8), the more fundamental reason why the dbg.values
+ // need to be deleted, is because there will not be any instructions with a
+ // DILocation in either branch left after performing the transformation. We
+ // can only insert a dbg.value after the two branches are joined again.
+ //
+ // See PR38762, PR39243 for more details.
+ //
+ // TODO: Extend llvm.dbg.value to take more than one SSA Value (PR39141) to
+ // encode predicated DIExpressions that yield different results on different
+ // code paths.
+
+ for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) {
+ Instruction *I = &*II;
+ I->dropUndefImplyingAttrsAndUnknownMetadata();
+ if (I->isUsedByMetadata())
+ dropDebugUsers(*I);
+ if (I->isDebugOrPseudoInst()) {
+ // Remove DbgInfo and pseudo probe Intrinsics.
+ II = I->eraseFromParent();
+ continue;
+ }
+ I->setDebugLoc(InsertPt->getDebugLoc());
+ ++II;
+ }
+ DomBlock->splice(InsertPt->getIterator(), BB, BB->begin(),
+ BB->getTerminator()->getIterator());
+}
+
+namespace {
+
+/// A potential constituent of a bitreverse or bswap expression. See
+/// collectBitParts for a fuller explanation.
+struct BitPart {
+ BitPart(Value *P, unsigned BW) : Provider(P) {
+ Provenance.resize(BW);
+ }
+
+ /// The Value that this is a bitreverse/bswap of.
+ Value *Provider;
+
+ /// The "provenance" of each bit. Provenance[A] = B means that bit A
+ /// in Provider becomes bit B in the result of this expression.
+ SmallVector<int8_t, 32> Provenance; // int8_t means max size is i128.
+
+ enum { Unset = -1 };
+};
+
+} // end anonymous namespace
+
+/// Analyze the specified subexpression and see if it is capable of providing
+/// pieces of a bswap or bitreverse. The subexpression provides a potential
+/// piece of a bswap or bitreverse if it can be proved that each non-zero bit in
+/// the output of the expression came from a corresponding bit in some other
+/// value. This function is recursive, and the end result is a mapping of
+/// bitnumber to bitnumber. It is the caller's responsibility to validate that
+/// the bitnumber to bitnumber mapping is correct for a bswap or bitreverse.
+///
+/// For example, if the current subexpression if "(shl i32 %X, 24)" then we know
+/// that the expression deposits the low byte of %X into the high byte of the
+/// result and that all other bits are zero. This expression is accepted and a
+/// BitPart is returned with Provider set to %X and Provenance[24-31] set to
+/// [0-7].
+///
+/// For vector types, all analysis is performed at the per-element level. No
+/// cross-element analysis is supported (shuffle/insertion/reduction), and all
+/// constant masks must be splatted across all elements.
+///
+/// To avoid revisiting values, the BitPart results are memoized into the
+/// provided map. To avoid unnecessary copying of BitParts, BitParts are
+/// constructed in-place in the \c BPS map. Because of this \c BPS needs to
+/// store BitParts objects, not pointers. As we need the concept of a nullptr
+/// BitParts (Value has been analyzed and the analysis failed), we an Optional
+/// type instead to provide the same functionality.
+///
+/// Because we pass around references into \c BPS, we must use a container that
+/// does not invalidate internal references (std::map instead of DenseMap).
+static const std::optional<BitPart> &
+collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals,
+ std::map<Value *, std::optional<BitPart>> &BPS, int Depth,
+ bool &FoundRoot) {
+ auto I = BPS.find(V);
+ if (I != BPS.end())
+ return I->second;
+
+ auto &Result = BPS[V] = std::nullopt;
+ auto BitWidth = V->getType()->getScalarSizeInBits();
+
+ // Can't do integer/elements > 128 bits.
+ if (BitWidth > 128)
+ return Result;
+
+ // Prevent stack overflow by limiting the recursion depth
+ if (Depth == BitPartRecursionMaxDepth) {
+ LLVM_DEBUG(dbgs() << "collectBitParts max recursion depth reached.\n");
+ return Result;
+ }
+
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ Value *X, *Y;
+ const APInt *C;
+
+ // If this is an or instruction, it may be an inner node of the bswap.
+ if (match(V, m_Or(m_Value(X), m_Value(Y)))) {
+ // Check we have both sources and they are from the same provider.
+ const auto &A = collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS,
+ Depth + 1, FoundRoot);
+ if (!A || !A->Provider)
+ return Result;
+
+ const auto &B = collectBitParts(Y, MatchBSwaps, MatchBitReversals, BPS,
+ Depth + 1, FoundRoot);
+ if (!B || A->Provider != B->Provider)
+ return Result;
+
+ // Try and merge the two together.
+ Result = BitPart(A->Provider, BitWidth);
+ for (unsigned BitIdx = 0; BitIdx < BitWidth; ++BitIdx) {
+ if (A->Provenance[BitIdx] != BitPart::Unset &&
+ B->Provenance[BitIdx] != BitPart::Unset &&
+ A->Provenance[BitIdx] != B->Provenance[BitIdx])
+ return Result = std::nullopt;
+
+ if (A->Provenance[BitIdx] == BitPart::Unset)
+ Result->Provenance[BitIdx] = B->Provenance[BitIdx];
+ else
+ Result->Provenance[BitIdx] = A->Provenance[BitIdx];
+ }
+
+ return Result;
+ }
+
+ // If this is a logical shift by a constant, recurse then shift the result.
+ if (match(V, m_LogicalShift(m_Value(X), m_APInt(C)))) {
+ const APInt &BitShift = *C;
+
+ // Ensure the shift amount is defined.
+ if (BitShift.uge(BitWidth))
+ return Result;
+
+ // For bswap-only, limit shift amounts to whole bytes, for an early exit.
+ if (!MatchBitReversals && (BitShift.getZExtValue() % 8) != 0)
+ return Result;
+
+ const auto &Res = collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS,
+ Depth + 1, FoundRoot);
+ if (!Res)
+ return Result;
+ Result = Res;
+
+ // Perform the "shift" on BitProvenance.
+ auto &P = Result->Provenance;
+ if (I->getOpcode() == Instruction::Shl) {
+ P.erase(std::prev(P.end(), BitShift.getZExtValue()), P.end());
+ P.insert(P.begin(), BitShift.getZExtValue(), BitPart::Unset);
+ } else {
+ P.erase(P.begin(), std::next(P.begin(), BitShift.getZExtValue()));
+ P.insert(P.end(), BitShift.getZExtValue(), BitPart::Unset);
+ }
+
+ return Result;
+ }
+
+ // If this is a logical 'and' with a mask that clears bits, recurse then
+ // unset the appropriate bits.
+ if (match(V, m_And(m_Value(X), m_APInt(C)))) {
+ const APInt &AndMask = *C;
+
+ // Check that the mask allows a multiple of 8 bits for a bswap, for an
+ // early exit.
+ unsigned NumMaskedBits = AndMask.countPopulation();
+ if (!MatchBitReversals && (NumMaskedBits % 8) != 0)
+ return Result;
+
+ const auto &Res = collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS,
+ Depth + 1, FoundRoot);
+ if (!Res)
+ return Result;
+ Result = Res;
+
+ for (unsigned BitIdx = 0; BitIdx < BitWidth; ++BitIdx)
+ // If the AndMask is zero for this bit, clear the bit.
+ if (AndMask[BitIdx] == 0)
+ Result->Provenance[BitIdx] = BitPart::Unset;
+ return Result;
+ }
+
+ // If this is a zext instruction zero extend the result.
+ if (match(V, m_ZExt(m_Value(X)))) {
+ const auto &Res = collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS,
+ Depth + 1, FoundRoot);
+ if (!Res)
+ return Result;
+
+ Result = BitPart(Res->Provider, BitWidth);
+ auto NarrowBitWidth = X->getType()->getScalarSizeInBits();
+ for (unsigned BitIdx = 0; BitIdx < NarrowBitWidth; ++BitIdx)
+ Result->Provenance[BitIdx] = Res->Provenance[BitIdx];
+ for (unsigned BitIdx = NarrowBitWidth; BitIdx < BitWidth; ++BitIdx)
+ Result->Provenance[BitIdx] = BitPart::Unset;
+ return Result;
+ }
+
+ // If this is a truncate instruction, extract the lower bits.
+ if (match(V, m_Trunc(m_Value(X)))) {
+ const auto &Res = collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS,
+ Depth + 1, FoundRoot);
+ if (!Res)
+ return Result;
+
+ Result = BitPart(Res->Provider, BitWidth);
+ for (unsigned BitIdx = 0; BitIdx < BitWidth; ++BitIdx)
+ Result->Provenance[BitIdx] = Res->Provenance[BitIdx];
+ return Result;
+ }
+
+ // BITREVERSE - most likely due to us previous matching a partial
+ // bitreverse.
+ if (match(V, m_BitReverse(m_Value(X)))) {
+ const auto &Res = collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS,
+ Depth + 1, FoundRoot);
+ if (!Res)
+ return Result;
+
+ Result = BitPart(Res->Provider, BitWidth);
+ for (unsigned BitIdx = 0; BitIdx < BitWidth; ++BitIdx)
+ Result->Provenance[(BitWidth - 1) - BitIdx] = Res->Provenance[BitIdx];
+ return Result;
+ }
+
+ // BSWAP - most likely due to us previous matching a partial bswap.
+ if (match(V, m_BSwap(m_Value(X)))) {
+ const auto &Res = collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS,
+ Depth + 1, FoundRoot);
+ if (!Res)
+ return Result;
+
+ unsigned ByteWidth = BitWidth / 8;
+ Result = BitPart(Res->Provider, BitWidth);
+ for (unsigned ByteIdx = 0; ByteIdx < ByteWidth; ++ByteIdx) {
+ unsigned ByteBitOfs = ByteIdx * 8;
+ for (unsigned BitIdx = 0; BitIdx < 8; ++BitIdx)
+ Result->Provenance[(BitWidth - 8 - ByteBitOfs) + BitIdx] =
+ Res->Provenance[ByteBitOfs + BitIdx];
+ }
+ return Result;
+ }
+
+ // Funnel 'double' shifts take 3 operands, 2 inputs and the shift
+ // amount (modulo).
+ // fshl(X,Y,Z): (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
+ // fshr(X,Y,Z): (X << (BW - (Z % BW))) | (Y >> (Z % BW))
+ if (match(V, m_FShl(m_Value(X), m_Value(Y), m_APInt(C))) ||
+ match(V, m_FShr(m_Value(X), m_Value(Y), m_APInt(C)))) {
+ // We can treat fshr as a fshl by flipping the modulo amount.
+ unsigned ModAmt = C->urem(BitWidth);
+ if (cast<IntrinsicInst>(I)->getIntrinsicID() == Intrinsic::fshr)
+ ModAmt = BitWidth - ModAmt;
+
+ // For bswap-only, limit shift amounts to whole bytes, for an early exit.
+ if (!MatchBitReversals && (ModAmt % 8) != 0)
+ return Result;
+
+ // Check we have both sources and they are from the same provider.
+ const auto &LHS = collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS,
+ Depth + 1, FoundRoot);
+ if (!LHS || !LHS->Provider)
+ return Result;
+
+ const auto &RHS = collectBitParts(Y, MatchBSwaps, MatchBitReversals, BPS,
+ Depth + 1, FoundRoot);
+ if (!RHS || LHS->Provider != RHS->Provider)
+ return Result;
+
+ unsigned StartBitRHS = BitWidth - ModAmt;
+ Result = BitPart(LHS->Provider, BitWidth);
+ for (unsigned BitIdx = 0; BitIdx < StartBitRHS; ++BitIdx)
+ Result->Provenance[BitIdx + ModAmt] = LHS->Provenance[BitIdx];
+ for (unsigned BitIdx = 0; BitIdx < ModAmt; ++BitIdx)
+ Result->Provenance[BitIdx] = RHS->Provenance[BitIdx + StartBitRHS];
+ return Result;
+ }
+ }
+
+ // If we've already found a root input value then we're never going to merge
+ // these back together.
+ if (FoundRoot)
+ return Result;
+
+ // Okay, we got to something that isn't a shift, 'or', 'and', etc. This must
+ // be the root input value to the bswap/bitreverse.
+ FoundRoot = true;
+ Result = BitPart(V, BitWidth);
+ for (unsigned BitIdx = 0; BitIdx < BitWidth; ++BitIdx)
+ Result->Provenance[BitIdx] = BitIdx;
+ return Result;
+}
+
+static bool bitTransformIsCorrectForBSwap(unsigned From, unsigned To,
+ unsigned BitWidth) {
+ if (From % 8 != To % 8)
+ return false;
+ // Convert from bit indices to byte indices and check for a byte reversal.
+ From >>= 3;
+ To >>= 3;
+ BitWidth >>= 3;
+ return From == BitWidth - To - 1;
+}
+
+static bool bitTransformIsCorrectForBitReverse(unsigned From, unsigned To,
+ unsigned BitWidth) {
+ return From == BitWidth - To - 1;
+}
+
+bool llvm::recognizeBSwapOrBitReverseIdiom(
+ Instruction *I, bool MatchBSwaps, bool MatchBitReversals,
+ SmallVectorImpl<Instruction *> &InsertedInsts) {
+ if (!match(I, m_Or(m_Value(), m_Value())) &&
+ !match(I, m_FShl(m_Value(), m_Value(), m_Value())) &&
+ !match(I, m_FShr(m_Value(), m_Value(), m_Value())))
+ return false;
+ if (!MatchBSwaps && !MatchBitReversals)
+ return false;
+ Type *ITy = I->getType();
+ if (!ITy->isIntOrIntVectorTy() || ITy->getScalarSizeInBits() > 128)
+ return false; // Can't do integer/elements > 128 bits.
+
+ // Try to find all the pieces corresponding to the bswap.
+ bool FoundRoot = false;
+ std::map<Value *, std::optional<BitPart>> BPS;
+ const auto &Res =
+ collectBitParts(I, MatchBSwaps, MatchBitReversals, BPS, 0, FoundRoot);
+ if (!Res)
+ return false;
+ ArrayRef<int8_t> BitProvenance = Res->Provenance;
+ assert(all_of(BitProvenance,
+ [](int8_t I) { return I == BitPart::Unset || 0 <= I; }) &&
+ "Illegal bit provenance index");
+
+ // If the upper bits are zero, then attempt to perform as a truncated op.
+ Type *DemandedTy = ITy;
+ if (BitProvenance.back() == BitPart::Unset) {
+ while (!BitProvenance.empty() && BitProvenance.back() == BitPart::Unset)
+ BitProvenance = BitProvenance.drop_back();
+ if (BitProvenance.empty())
+ return false; // TODO - handle null value?
+ DemandedTy = Type::getIntNTy(I->getContext(), BitProvenance.size());
+ if (auto *IVecTy = dyn_cast<VectorType>(ITy))
+ DemandedTy = VectorType::get(DemandedTy, IVecTy);
+ }
+
+ // Check BitProvenance hasn't found a source larger than the result type.
+ unsigned DemandedBW = DemandedTy->getScalarSizeInBits();
+ if (DemandedBW > ITy->getScalarSizeInBits())
+ return false;
+
+ // Now, is the bit permutation correct for a bswap or a bitreverse? We can
+ // only byteswap values with an even number of bytes.
+ APInt DemandedMask = APInt::getAllOnes(DemandedBW);
+ bool OKForBSwap = MatchBSwaps && (DemandedBW % 16) == 0;
+ bool OKForBitReverse = MatchBitReversals;
+ for (unsigned BitIdx = 0;
+ (BitIdx < DemandedBW) && (OKForBSwap || OKForBitReverse); ++BitIdx) {
+ if (BitProvenance[BitIdx] == BitPart::Unset) {
+ DemandedMask.clearBit(BitIdx);
+ continue;
+ }
+ OKForBSwap &= bitTransformIsCorrectForBSwap(BitProvenance[BitIdx], BitIdx,
+ DemandedBW);
+ OKForBitReverse &= bitTransformIsCorrectForBitReverse(BitProvenance[BitIdx],
+ BitIdx, DemandedBW);
+ }
+
+ Intrinsic::ID Intrin;
+ if (OKForBSwap)
+ Intrin = Intrinsic::bswap;
+ else if (OKForBitReverse)
+ Intrin = Intrinsic::bitreverse;
+ else
+ return false;
+
+ Function *F = Intrinsic::getDeclaration(I->getModule(), Intrin, DemandedTy);
+ Value *Provider = Res->Provider;
+
+ // We may need to truncate the provider.
+ if (DemandedTy != Provider->getType()) {
+ auto *Trunc =
+ CastInst::CreateIntegerCast(Provider, DemandedTy, false, "trunc", I);
+ InsertedInsts.push_back(Trunc);
+ Provider = Trunc;
+ }
+
+ Instruction *Result = CallInst::Create(F, Provider, "rev", I);
+ InsertedInsts.push_back(Result);
+
+ if (!DemandedMask.isAllOnes()) {
+ auto *Mask = ConstantInt::get(DemandedTy, DemandedMask);
+ Result = BinaryOperator::Create(Instruction::And, Result, Mask, "mask", I);
+ InsertedInsts.push_back(Result);
+ }
+
+ // We may need to zeroextend back to the result type.
+ if (ITy != Result->getType()) {
+ auto *ExtInst = CastInst::CreateIntegerCast(Result, ITy, false, "zext", I);
+ InsertedInsts.push_back(ExtInst);
+ }
+
+ return true;
+}
+
+// CodeGen has special handling for some string functions that may replace
+// them with target-specific intrinsics. Since that'd skip our interceptors
+// in ASan/MSan/TSan/DFSan, and thus make us miss some memory accesses,
+// we mark affected calls as NoBuiltin, which will disable optimization
+// in CodeGen.
+void llvm::maybeMarkSanitizerLibraryCallNoBuiltin(
+ CallInst *CI, const TargetLibraryInfo *TLI) {
+ Function *F = CI->getCalledFunction();
+ LibFunc Func;
+ if (F && !F->hasLocalLinkage() && F->hasName() &&
+ TLI->getLibFunc(F->getName(), Func) && TLI->hasOptimizedCodeGen(Func) &&
+ !F->doesNotAccessMemory())
+ CI->addFnAttr(Attribute::NoBuiltin);
+}
+
+bool llvm::canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx) {
+ // We can't have a PHI with a metadata type.
+ if (I->getOperand(OpIdx)->getType()->isMetadataTy())
+ return false;
+
+ // Early exit.
+ if (!isa<Constant>(I->getOperand(OpIdx)))
+ return true;
+
+ switch (I->getOpcode()) {
+ default:
+ return true;
+ case Instruction::Call:
+ case Instruction::Invoke: {
+ const auto &CB = cast<CallBase>(*I);
+
+ // Can't handle inline asm. Skip it.
+ if (CB.isInlineAsm())
+ return false;
+
+ // Constant bundle operands may need to retain their constant-ness for
+ // correctness.
+ if (CB.isBundleOperand(OpIdx))
+ return false;
+
+ if (OpIdx < CB.arg_size()) {
+ // Some variadic intrinsics require constants in the variadic arguments,
+ // which currently aren't markable as immarg.
+ if (isa<IntrinsicInst>(CB) &&
+ OpIdx >= CB.getFunctionType()->getNumParams()) {
+ // This is known to be OK for stackmap.
+ return CB.getIntrinsicID() == Intrinsic::experimental_stackmap;
+ }
+
+ // gcroot is a special case, since it requires a constant argument which
+ // isn't also required to be a simple ConstantInt.
+ if (CB.getIntrinsicID() == Intrinsic::gcroot)
+ return false;
+
+ // Some intrinsic operands are required to be immediates.
+ return !CB.paramHasAttr(OpIdx, Attribute::ImmArg);
+ }
+
+ // It is never allowed to replace the call argument to an intrinsic, but it
+ // may be possible for a call.
+ return !isa<IntrinsicInst>(CB);
+ }
+ case Instruction::ShuffleVector:
+ // Shufflevector masks are constant.
+ return OpIdx != 2;
+ case Instruction::Switch:
+ case Instruction::ExtractValue:
+ // All operands apart from the first are constant.
+ return OpIdx == 0;
+ case Instruction::InsertValue:
+ // All operands apart from the first and the second are constant.
+ return OpIdx < 2;
+ case Instruction::Alloca:
+ // Static allocas (constant size in the entry block) are handled by
+ // prologue/epilogue insertion so they're free anyway. We definitely don't
+ // want to make them non-constant.
+ return !cast<AllocaInst>(I)->isStaticAlloca();
+ case Instruction::GetElementPtr:
+ if (OpIdx == 0)
+ return true;
+ gep_type_iterator It = gep_type_begin(I);
+ for (auto E = std::next(It, OpIdx); It != E; ++It)
+ if (It.isStruct())
+ return false;
+ return true;
+ }
+}
+
+Value *llvm::invertCondition(Value *Condition) {
+ // First: Check if it's a constant
+ if (Constant *C = dyn_cast<Constant>(Condition))
+ return ConstantExpr::getNot(C);
+
+ // Second: If the condition is already inverted, return the original value
+ Value *NotCondition;
+ if (match(Condition, m_Not(m_Value(NotCondition))))
+ return NotCondition;
+
+ BasicBlock *Parent = nullptr;
+ Instruction *Inst = dyn_cast<Instruction>(Condition);
+ if (Inst)
+ Parent = Inst->getParent();
+ else if (Argument *Arg = dyn_cast<Argument>(Condition))
+ Parent = &Arg->getParent()->getEntryBlock();
+ assert(Parent && "Unsupported condition to invert");
+
+ // Third: Check all the users for an invert
+ for (User *U : Condition->users())
+ if (Instruction *I = dyn_cast<Instruction>(U))
+ if (I->getParent() == Parent && match(I, m_Not(m_Specific(Condition))))
+ return I;
+
+ // Last option: Create a new instruction
+ auto *Inverted =
+ BinaryOperator::CreateNot(Condition, Condition->getName() + ".inv");
+ if (Inst && !isa<PHINode>(Inst))
+ Inverted->insertAfter(Inst);
+ else
+ Inverted->insertBefore(&*Parent->getFirstInsertionPt());
+ return Inverted;
+}
+
+bool llvm::inferAttributesFromOthers(Function &F) {
+ // Note: We explicitly check for attributes rather than using cover functions
+ // because some of the cover functions include the logic being implemented.
+
+ bool Changed = false;
+ // readnone + not convergent implies nosync
+ if (!F.hasFnAttribute(Attribute::NoSync) &&
+ F.doesNotAccessMemory() && !F.isConvergent()) {
+ F.setNoSync();
+ Changed = true;
+ }
+
+ // readonly implies nofree
+ if (!F.hasFnAttribute(Attribute::NoFree) && F.onlyReadsMemory()) {
+ F.setDoesNotFreeMemory();
+ Changed = true;
+ }
+
+ // willreturn implies mustprogress
+ if (!F.hasFnAttribute(Attribute::MustProgress) && F.willReturn()) {
+ F.setMustProgress();
+ Changed = true;
+ }
+
+ // TODO: There are a bunch of cases of restrictive memory effects we
+ // can infer by inspecting arguments of argmemonly-ish functions.
+
+ return Changed;
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/LoopPeel.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/LoopPeel.cpp
new file mode 100644
index 0000000000..2acbe90023
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/LoopPeel.cpp
@@ -0,0 +1,1040 @@
+//===- LoopPeel.cpp -------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Loop Peeling Utilities.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/LoopPeel.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopIterator.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/ProfDataUtils.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/LoopSimplify.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <optional>
+
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+#define DEBUG_TYPE "loop-peel"
+
+STATISTIC(NumPeeled, "Number of loops peeled");
+
+static cl::opt<unsigned> UnrollPeelCount(
+ "unroll-peel-count", cl::Hidden,
+ cl::desc("Set the unroll peeling count, for testing purposes"));
+
+static cl::opt<bool>
+ UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden,
+ cl::desc("Allows loops to be peeled when the dynamic "
+ "trip count is known to be low."));
+
+static cl::opt<bool>
+ UnrollAllowLoopNestsPeeling("unroll-allow-loop-nests-peeling",
+ cl::init(false), cl::Hidden,
+ cl::desc("Allows loop nests to be peeled."));
+
+static cl::opt<unsigned> UnrollPeelMaxCount(
+ "unroll-peel-max-count", cl::init(7), cl::Hidden,
+ cl::desc("Max average trip count which will cause loop peeling."));
+
+static cl::opt<unsigned> UnrollForcePeelCount(
+ "unroll-force-peel-count", cl::init(0), cl::Hidden,
+ cl::desc("Force a peel count regardless of profiling information."));
+
+static cl::opt<bool> DisableAdvancedPeeling(
+ "disable-advanced-peeling", cl::init(false), cl::Hidden,
+ cl::desc(
+ "Disable advance peeling. Issues for convergent targets (D134803)."));
+
+static const char *PeeledCountMetaData = "llvm.loop.peeled.count";
+
+// Check whether we are capable of peeling this loop.
+bool llvm::canPeel(const Loop *L) {
+ // Make sure the loop is in simplified form
+ if (!L->isLoopSimplifyForm())
+ return false;
+ if (!DisableAdvancedPeeling)
+ return true;
+
+ SmallVector<BasicBlock *, 4> Exits;
+ L->getUniqueNonLatchExitBlocks(Exits);
+ // The latch must either be the only exiting block or all non-latch exit
+ // blocks have either a deopt or unreachable terminator or compose a chain of
+ // blocks where the last one is either deopt or unreachable terminated. Both
+ // deopt and unreachable terminators are a strong indication they are not
+ // taken. Note that this is a profitability check, not a legality check. Also
+ // note that LoopPeeling currently can only update the branch weights of latch
+ // blocks and branch weights to blocks with deopt or unreachable do not need
+ // updating.
+ return llvm::all_of(Exits, IsBlockFollowedByDeoptOrUnreachable);
+}
+
+namespace {
+
+// As a loop is peeled, it may be the case that Phi nodes become
+// loop-invariant (ie, known because there is only one choice).
+// For example, consider the following function:
+// void g(int);
+// void binary() {
+// int x = 0;
+// int y = 0;
+// int a = 0;
+// for(int i = 0; i <100000; ++i) {
+// g(x);
+// x = y;
+// g(a);
+// y = a + 1;
+// a = 5;
+// }
+// }
+// Peeling 3 iterations is beneficial because the values for x, y and a
+// become known. The IR for this loop looks something like the following:
+//
+// %i = phi i32 [ 0, %entry ], [ %inc, %if.end ]
+// %a = phi i32 [ 0, %entry ], [ 5, %if.end ]
+// %y = phi i32 [ 0, %entry ], [ %add, %if.end ]
+// %x = phi i32 [ 0, %entry ], [ %y, %if.end ]
+// ...
+// tail call void @_Z1gi(i32 signext %x)
+// tail call void @_Z1gi(i32 signext %a)
+// %add = add nuw nsw i32 %a, 1
+// %inc = add nuw nsw i32 %i, 1
+// %exitcond = icmp eq i32 %inc, 100000
+// br i1 %exitcond, label %for.cond.cleanup, label %for.body
+//
+// The arguments for the calls to g will become known after 3 iterations
+// of the loop, because the phi nodes values become known after 3 iterations
+// of the loop (ie, they are known on the 4th iteration, so peel 3 iterations).
+// The first iteration has g(0), g(0); the second has g(0), g(5); the
+// third has g(1), g(5) and the fourth (and all subsequent) have g(6), g(5).
+// Now consider the phi nodes:
+// %a is a phi with constants so it is determined after iteration 1.
+// %y is a phi based on a constant and %a so it is determined on
+// the iteration after %a is determined, so iteration 2.
+// %x is a phi based on a constant and %y so it is determined on
+// the iteration after %y, so iteration 3.
+// %i is based on itself (and is an induction variable) so it is
+// never determined.
+// This means that peeling off 3 iterations will result in being able to
+// remove the phi nodes for %a, %y, and %x. The arguments for the
+// corresponding calls to g are determined and the code for computing
+// x, y, and a can be removed.
+//
+// The PhiAnalyzer class calculates how many times a loop should be
+// peeled based on the above analysis of the phi nodes in the loop while
+// respecting the maximum specified.
+class PhiAnalyzer {
+public:
+ PhiAnalyzer(const Loop &L, unsigned MaxIterations);
+
+ // Calculate the sufficient minimum number of iterations of the loop to peel
+ // such that phi instructions become determined (subject to allowable limits)
+ std::optional<unsigned> calculateIterationsToPeel();
+
+protected:
+ using PeelCounter = std::optional<unsigned>;
+ const PeelCounter Unknown = std::nullopt;
+
+ // Add 1 respecting Unknown and return Unknown if result over MaxIterations
+ PeelCounter addOne(PeelCounter PC) const {
+ if (PC == Unknown)
+ return Unknown;
+ return (*PC + 1 <= MaxIterations) ? PeelCounter{*PC + 1} : Unknown;
+ }
+
+ // Calculate the number of iterations after which the given value
+ // becomes an invariant.
+ PeelCounter calculate(const Value &);
+
+ const Loop &L;
+ const unsigned MaxIterations;
+
+ // Map of Values to number of iterations to invariance
+ SmallDenseMap<const Value *, PeelCounter> IterationsToInvariance;
+};
+
+PhiAnalyzer::PhiAnalyzer(const Loop &L, unsigned MaxIterations)
+ : L(L), MaxIterations(MaxIterations) {
+ assert(canPeel(&L) && "loop is not suitable for peeling");
+ assert(MaxIterations > 0 && "no peeling is allowed?");
+}
+
+// This function calculates the number of iterations after which the value
+// becomes an invariant. The pre-calculated values are memorized in a map.
+// N.B. This number will be Unknown or <= MaxIterations.
+// The function is calculated according to the following definition:
+// Given %x = phi <Inputs from above the loop>, ..., [%y, %back.edge].
+// F(%x) = G(%y) + 1 (N.B. [MaxIterations | Unknown] + 1 => Unknown)
+// G(%y) = 0 if %y is a loop invariant
+// G(%y) = G(%BackEdgeValue) if %y is a phi in the header block
+// G(%y) = TODO: if %y is an expression based on phis and loop invariants
+// The example looks like:
+// %x = phi(0, %a) <-- becomes invariant starting from 3rd iteration.
+// %y = phi(0, 5)
+// %a = %y + 1
+// G(%y) = Unknown otherwise (including phi not in header block)
+PhiAnalyzer::PeelCounter PhiAnalyzer::calculate(const Value &V) {
+ // If we already know the answer, take it from the map.
+ auto I = IterationsToInvariance.find(&V);
+ if (I != IterationsToInvariance.end())
+ return I->second;
+
+ // Place Unknown to map to avoid infinite recursion. Such
+ // cycles can never stop on an invariant.
+ IterationsToInvariance[&V] = Unknown;
+
+ if (L.isLoopInvariant(&V))
+ // Loop invariant so known at start.
+ return (IterationsToInvariance[&V] = 0);
+ if (const PHINode *Phi = dyn_cast<PHINode>(&V)) {
+ if (Phi->getParent() != L.getHeader()) {
+ // Phi is not in header block so Unknown.
+ assert(IterationsToInvariance[&V] == Unknown && "unexpected value saved");
+ return Unknown;
+ }
+ // We need to analyze the input from the back edge and add 1.
+ Value *Input = Phi->getIncomingValueForBlock(L.getLoopLatch());
+ PeelCounter Iterations = calculate(*Input);
+ assert(IterationsToInvariance[Input] == Iterations &&
+ "unexpected value saved");
+ return (IterationsToInvariance[Phi] = addOne(Iterations));
+ }
+ if (const Instruction *I = dyn_cast<Instruction>(&V)) {
+ if (isa<CmpInst>(I) || I->isBinaryOp()) {
+ // Binary instructions get the max of the operands.
+ PeelCounter LHS = calculate(*I->getOperand(0));
+ if (LHS == Unknown)
+ return Unknown;
+ PeelCounter RHS = calculate(*I->getOperand(1));
+ if (RHS == Unknown)
+ return Unknown;
+ return (IterationsToInvariance[I] = {std::max(*LHS, *RHS)});
+ }
+ if (I->isCast())
+ // Cast instructions get the value of the operand.
+ return (IterationsToInvariance[I] = calculate(*I->getOperand(0)));
+ }
+ // TODO: handle more expressions
+
+ // Everything else is Unknown.
+ assert(IterationsToInvariance[&V] == Unknown && "unexpected value saved");
+ return Unknown;
+}
+
+std::optional<unsigned> PhiAnalyzer::calculateIterationsToPeel() {
+ unsigned Iterations = 0;
+ for (auto &PHI : L.getHeader()->phis()) {
+ PeelCounter ToInvariance = calculate(PHI);
+ if (ToInvariance != Unknown) {
+ assert(*ToInvariance <= MaxIterations && "bad result in phi analysis");
+ Iterations = std::max(Iterations, *ToInvariance);
+ if (Iterations == MaxIterations)
+ break;
+ }
+ }
+ assert((Iterations <= MaxIterations) && "bad result in phi analysis");
+ return Iterations ? std::optional<unsigned>(Iterations) : std::nullopt;
+}
+
+} // unnamed namespace
+
+// Try to find any invariant memory reads that will become dereferenceable in
+// the remainder loop after peeling. The load must also be used (transitively)
+// by an exit condition. Returns the number of iterations to peel off (at the
+// moment either 0 or 1).
+static unsigned peelToTurnInvariantLoadsDerefencebale(Loop &L,
+ DominatorTree &DT,
+ AssumptionCache *AC) {
+ // Skip loops with a single exiting block, because there should be no benefit
+ // for the heuristic below.
+ if (L.getExitingBlock())
+ return 0;
+
+ // All non-latch exit blocks must have an UnreachableInst terminator.
+ // Otherwise the heuristic below may not be profitable.
+ SmallVector<BasicBlock *, 4> Exits;
+ L.getUniqueNonLatchExitBlocks(Exits);
+ if (any_of(Exits, [](const BasicBlock *BB) {
+ return !isa<UnreachableInst>(BB->getTerminator());
+ }))
+ return 0;
+
+ // Now look for invariant loads that dominate the latch and are not known to
+ // be dereferenceable. If there are such loads and no writes, they will become
+ // dereferenceable in the loop if the first iteration is peeled off. Also
+ // collect the set of instructions controlled by such loads. Only peel if an
+ // exit condition uses (transitively) such a load.
+ BasicBlock *Header = L.getHeader();
+ BasicBlock *Latch = L.getLoopLatch();
+ SmallPtrSet<Value *, 8> LoadUsers;
+ const DataLayout &DL = L.getHeader()->getModule()->getDataLayout();
+ for (BasicBlock *BB : L.blocks()) {
+ for (Instruction &I : *BB) {
+ if (I.mayWriteToMemory())
+ return 0;
+
+ auto Iter = LoadUsers.find(&I);
+ if (Iter != LoadUsers.end()) {
+ for (Value *U : I.users())
+ LoadUsers.insert(U);
+ }
+ // Do not look for reads in the header; they can already be hoisted
+ // without peeling.
+ if (BB == Header)
+ continue;
+ if (auto *LI = dyn_cast<LoadInst>(&I)) {
+ Value *Ptr = LI->getPointerOperand();
+ if (DT.dominates(BB, Latch) && L.isLoopInvariant(Ptr) &&
+ !isDereferenceablePointer(Ptr, LI->getType(), DL, LI, AC, &DT))
+ for (Value *U : I.users())
+ LoadUsers.insert(U);
+ }
+ }
+ }
+ SmallVector<BasicBlock *> ExitingBlocks;
+ L.getExitingBlocks(ExitingBlocks);
+ if (any_of(ExitingBlocks, [&LoadUsers](BasicBlock *Exiting) {
+ return LoadUsers.contains(Exiting->getTerminator());
+ }))
+ return 1;
+ return 0;
+}
+
+// Return the number of iterations to peel off that make conditions in the
+// body true/false. For example, if we peel 2 iterations off the loop below,
+// the condition i < 2 can be evaluated at compile time.
+// for (i = 0; i < n; i++)
+// if (i < 2)
+// ..
+// else
+// ..
+// }
+static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
+ ScalarEvolution &SE) {
+ assert(L.isLoopSimplifyForm() && "Loop needs to be in loop simplify form");
+ unsigned DesiredPeelCount = 0;
+
+ for (auto *BB : L.blocks()) {
+ auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
+ if (!BI || BI->isUnconditional())
+ continue;
+
+ // Ignore loop exit condition.
+ if (L.getLoopLatch() == BB)
+ continue;
+
+ Value *Condition = BI->getCondition();
+ Value *LeftVal, *RightVal;
+ CmpInst::Predicate Pred;
+ if (!match(Condition, m_ICmp(Pred, m_Value(LeftVal), m_Value(RightVal))))
+ continue;
+
+ const SCEV *LeftSCEV = SE.getSCEV(LeftVal);
+ const SCEV *RightSCEV = SE.getSCEV(RightVal);
+
+ // Do not consider predicates that are known to be true or false
+ // independently of the loop iteration.
+ if (SE.evaluatePredicate(Pred, LeftSCEV, RightSCEV))
+ continue;
+
+ // Check if we have a condition with one AddRec and one non AddRec
+ // expression. Normalize LeftSCEV to be the AddRec.
+ if (!isa<SCEVAddRecExpr>(LeftSCEV)) {
+ if (isa<SCEVAddRecExpr>(RightSCEV)) {
+ std::swap(LeftSCEV, RightSCEV);
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ } else
+ continue;
+ }
+
+ const SCEVAddRecExpr *LeftAR = cast<SCEVAddRecExpr>(LeftSCEV);
+
+ // Avoid huge SCEV computations in the loop below, make sure we only
+ // consider AddRecs of the loop we are trying to peel.
+ if (!LeftAR->isAffine() || LeftAR->getLoop() != &L)
+ continue;
+ if (!(ICmpInst::isEquality(Pred) && LeftAR->hasNoSelfWrap()) &&
+ !SE.getMonotonicPredicateType(LeftAR, Pred))
+ continue;
+
+ // Check if extending the current DesiredPeelCount lets us evaluate Pred
+ // or !Pred in the loop body statically.
+ unsigned NewPeelCount = DesiredPeelCount;
+
+ const SCEV *IterVal = LeftAR->evaluateAtIteration(
+ SE.getConstant(LeftSCEV->getType(), NewPeelCount), SE);
+
+ // If the original condition is not known, get the negated predicate
+ // (which holds on the else branch) and check if it is known. This allows
+ // us to peel of iterations that make the original condition false.
+ if (!SE.isKnownPredicate(Pred, IterVal, RightSCEV))
+ Pred = ICmpInst::getInversePredicate(Pred);
+
+ const SCEV *Step = LeftAR->getStepRecurrence(SE);
+ const SCEV *NextIterVal = SE.getAddExpr(IterVal, Step);
+ auto PeelOneMoreIteration = [&IterVal, &NextIterVal, &SE, Step,
+ &NewPeelCount]() {
+ IterVal = NextIterVal;
+ NextIterVal = SE.getAddExpr(IterVal, Step);
+ NewPeelCount++;
+ };
+
+ auto CanPeelOneMoreIteration = [&NewPeelCount, &MaxPeelCount]() {
+ return NewPeelCount < MaxPeelCount;
+ };
+
+ while (CanPeelOneMoreIteration() &&
+ SE.isKnownPredicate(Pred, IterVal, RightSCEV))
+ PeelOneMoreIteration();
+
+ // With *that* peel count, does the predicate !Pred become known in the
+ // first iteration of the loop body after peeling?
+ if (!SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), IterVal,
+ RightSCEV))
+ continue; // If not, give up.
+
+ // However, for equality comparisons, that isn't always sufficient to
+ // eliminate the comparsion in loop body, we may need to peel one more
+ // iteration. See if that makes !Pred become unknown again.
+ if (ICmpInst::isEquality(Pred) &&
+ !SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), NextIterVal,
+ RightSCEV) &&
+ !SE.isKnownPredicate(Pred, IterVal, RightSCEV) &&
+ SE.isKnownPredicate(Pred, NextIterVal, RightSCEV)) {
+ if (!CanPeelOneMoreIteration())
+ continue; // Need to peel one more iteration, but can't. Give up.
+ PeelOneMoreIteration(); // Great!
+ }
+
+ DesiredPeelCount = std::max(DesiredPeelCount, NewPeelCount);
+ }
+
+ return DesiredPeelCount;
+}
+
+/// This "heuristic" exactly matches implicit behavior which used to exist
+/// inside getLoopEstimatedTripCount. It was added here to keep an
+/// improvement inside that API from causing peeling to become more aggressive.
+/// This should probably be removed.
+static bool violatesLegacyMultiExitLoopCheck(Loop *L) {
+ BasicBlock *Latch = L->getLoopLatch();
+ if (!Latch)
+ return true;
+
+ BranchInst *LatchBR = dyn_cast<BranchInst>(Latch->getTerminator());
+ if (!LatchBR || LatchBR->getNumSuccessors() != 2 || !L->isLoopExiting(Latch))
+ return true;
+
+ assert((LatchBR->getSuccessor(0) == L->getHeader() ||
+ LatchBR->getSuccessor(1) == L->getHeader()) &&
+ "At least one edge out of the latch must go to the header");
+
+ SmallVector<BasicBlock *, 4> ExitBlocks;
+ L->getUniqueNonLatchExitBlocks(ExitBlocks);
+ return any_of(ExitBlocks, [](const BasicBlock *EB) {
+ return !EB->getTerminatingDeoptimizeCall();
+ });
+}
+
+
+// Return the number of iterations we want to peel off.
+void llvm::computePeelCount(Loop *L, unsigned LoopSize,
+ TargetTransformInfo::PeelingPreferences &PP,
+ unsigned TripCount, DominatorTree &DT,
+ ScalarEvolution &SE, AssumptionCache *AC,
+ unsigned Threshold) {
+ assert(LoopSize > 0 && "Zero loop size is not allowed!");
+ // Save the PP.PeelCount value set by the target in
+ // TTI.getPeelingPreferences or by the flag -unroll-peel-count.
+ unsigned TargetPeelCount = PP.PeelCount;
+ PP.PeelCount = 0;
+ if (!canPeel(L))
+ return;
+
+ // Only try to peel innermost loops by default.
+ // The constraint can be relaxed by the target in TTI.getPeelingPreferences
+ // or by the flag -unroll-allow-loop-nests-peeling.
+ if (!PP.AllowLoopNestsPeeling && !L->isInnermost())
+ return;
+
+ // If the user provided a peel count, use that.
+ bool UserPeelCount = UnrollForcePeelCount.getNumOccurrences() > 0;
+ if (UserPeelCount) {
+ LLVM_DEBUG(dbgs() << "Force-peeling first " << UnrollForcePeelCount
+ << " iterations.\n");
+ PP.PeelCount = UnrollForcePeelCount;
+ PP.PeelProfiledIterations = true;
+ return;
+ }
+
+ // Skip peeling if it's disabled.
+ if (!PP.AllowPeeling)
+ return;
+
+ // Check that we can peel at least one iteration.
+ if (2 * LoopSize > Threshold)
+ return;
+
+ unsigned AlreadyPeeled = 0;
+ if (auto Peeled = getOptionalIntLoopAttribute(L, PeeledCountMetaData))
+ AlreadyPeeled = *Peeled;
+ // Stop if we already peeled off the maximum number of iterations.
+ if (AlreadyPeeled >= UnrollPeelMaxCount)
+ return;
+
+ // Pay respect to limitations implied by loop size and the max peel count.
+ unsigned MaxPeelCount = UnrollPeelMaxCount;
+ MaxPeelCount = std::min(MaxPeelCount, Threshold / LoopSize - 1);
+
+ // Start the max computation with the PP.PeelCount value set by the target
+ // in TTI.getPeelingPreferences or by the flag -unroll-peel-count.
+ unsigned DesiredPeelCount = TargetPeelCount;
+
+ // Here we try to get rid of Phis which become invariants after 1, 2, ..., N
+ // iterations of the loop. For this we compute the number for iterations after
+ // which every Phi is guaranteed to become an invariant, and try to peel the
+ // maximum number of iterations among these values, thus turning all those
+ // Phis into invariants.
+ if (MaxPeelCount > DesiredPeelCount) {
+ // Check how many iterations are useful for resolving Phis
+ auto NumPeels = PhiAnalyzer(*L, MaxPeelCount).calculateIterationsToPeel();
+ if (NumPeels)
+ DesiredPeelCount = std::max(DesiredPeelCount, *NumPeels);
+ }
+
+ DesiredPeelCount = std::max(DesiredPeelCount,
+ countToEliminateCompares(*L, MaxPeelCount, SE));
+
+ if (DesiredPeelCount == 0)
+ DesiredPeelCount = peelToTurnInvariantLoadsDerefencebale(*L, DT, AC);
+
+ if (DesiredPeelCount > 0) {
+ DesiredPeelCount = std::min(DesiredPeelCount, MaxPeelCount);
+ // Consider max peel count limitation.
+ assert(DesiredPeelCount > 0 && "Wrong loop size estimation?");
+ if (DesiredPeelCount + AlreadyPeeled <= UnrollPeelMaxCount) {
+ LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount
+ << " iteration(s) to turn"
+ << " some Phis into invariants.\n");
+ PP.PeelCount = DesiredPeelCount;
+ PP.PeelProfiledIterations = false;
+ return;
+ }
+ }
+
+ // Bail if we know the statically calculated trip count.
+ // In this case we rather prefer partial unrolling.
+ if (TripCount)
+ return;
+
+ // Do not apply profile base peeling if it is disabled.
+ if (!PP.PeelProfiledIterations)
+ return;
+ // If we don't know the trip count, but have reason to believe the average
+ // trip count is low, peeling should be beneficial, since we will usually
+ // hit the peeled section.
+ // We only do this in the presence of profile information, since otherwise
+ // our estimates of the trip count are not reliable enough.
+ if (L->getHeader()->getParent()->hasProfileData()) {
+ if (violatesLegacyMultiExitLoopCheck(L))
+ return;
+ std::optional<unsigned> EstimatedTripCount = getLoopEstimatedTripCount(L);
+ if (!EstimatedTripCount)
+ return;
+
+ LLVM_DEBUG(dbgs() << "Profile-based estimated trip count is "
+ << *EstimatedTripCount << "\n");
+
+ if (*EstimatedTripCount) {
+ if (*EstimatedTripCount + AlreadyPeeled <= MaxPeelCount) {
+ unsigned PeelCount = *EstimatedTripCount;
+ LLVM_DEBUG(dbgs() << "Peeling first " << PeelCount << " iterations.\n");
+ PP.PeelCount = PeelCount;
+ return;
+ }
+ LLVM_DEBUG(dbgs() << "Already peel count: " << AlreadyPeeled << "\n");
+ LLVM_DEBUG(dbgs() << "Max peel count: " << UnrollPeelMaxCount << "\n");
+ LLVM_DEBUG(dbgs() << "Loop cost: " << LoopSize << "\n");
+ LLVM_DEBUG(dbgs() << "Max peel cost: " << Threshold << "\n");
+ LLVM_DEBUG(dbgs() << "Max peel count by cost: "
+ << (Threshold / LoopSize - 1) << "\n");
+ }
+ }
+}
+
+struct WeightInfo {
+ // Weights for current iteration.
+ SmallVector<uint32_t> Weights;
+ // Weights to subtract after each iteration.
+ const SmallVector<uint32_t> SubWeights;
+};
+
+/// Update the branch weights of an exiting block of a peeled-off loop
+/// iteration.
+/// Let F is a weight of the edge to continue (fallthrough) into the loop.
+/// Let E is a weight of the edge to an exit.
+/// F/(F+E) is a probability to go to loop and E/(F+E) is a probability to
+/// go to exit.
+/// Then, Estimated ExitCount = F / E.
+/// For I-th (counting from 0) peeled off iteration we set the the weights for
+/// the peeled exit as (EC - I, 1). It gives us reasonable distribution,
+/// The probability to go to exit 1/(EC-I) increases. At the same time
+/// the estimated exit count in the remainder loop reduces by I.
+/// To avoid dealing with division rounding we can just multiple both part
+/// of weights to E and use weight as (F - I * E, E).
+static void updateBranchWeights(Instruction *Term, WeightInfo &Info) {
+ MDBuilder MDB(Term->getContext());
+ Term->setMetadata(LLVMContext::MD_prof,
+ MDB.createBranchWeights(Info.Weights));
+ for (auto [Idx, SubWeight] : enumerate(Info.SubWeights))
+ if (SubWeight != 0)
+ Info.Weights[Idx] = Info.Weights[Idx] > SubWeight
+ ? Info.Weights[Idx] - SubWeight
+ : 1;
+}
+
+/// Initialize the weights for all exiting blocks.
+static void initBranchWeights(DenseMap<Instruction *, WeightInfo> &WeightInfos,
+ Loop *L) {
+ SmallVector<BasicBlock *> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+ for (BasicBlock *ExitingBlock : ExitingBlocks) {
+ Instruction *Term = ExitingBlock->getTerminator();
+ SmallVector<uint32_t> Weights;
+ if (!extractBranchWeights(*Term, Weights))
+ continue;
+
+ // See the comment on updateBranchWeights() for an explanation of what we
+ // do here.
+ uint32_t FallThroughWeights = 0;
+ uint32_t ExitWeights = 0;
+ for (auto [Succ, Weight] : zip(successors(Term), Weights)) {
+ if (L->contains(Succ))
+ FallThroughWeights += Weight;
+ else
+ ExitWeights += Weight;
+ }
+
+ // Don't try to update weights for degenerate case.
+ if (FallThroughWeights == 0)
+ continue;
+
+ SmallVector<uint32_t> SubWeights;
+ for (auto [Succ, Weight] : zip(successors(Term), Weights)) {
+ if (!L->contains(Succ)) {
+ // Exit weights stay the same.
+ SubWeights.push_back(0);
+ continue;
+ }
+
+ // Subtract exit weights on each iteration, distributed across all
+ // fallthrough edges.
+ double W = (double)Weight / (double)FallThroughWeights;
+ SubWeights.push_back((uint32_t)(ExitWeights * W));
+ }
+
+ WeightInfos.insert({Term, {std::move(Weights), std::move(SubWeights)}});
+ }
+}
+
+/// Update the weights of original exiting block after peeling off all
+/// iterations.
+static void fixupBranchWeights(Instruction *Term, const WeightInfo &Info) {
+ MDBuilder MDB(Term->getContext());
+ Term->setMetadata(LLVMContext::MD_prof,
+ MDB.createBranchWeights(Info.Weights));
+}
+
+/// Clones the body of the loop L, putting it between \p InsertTop and \p
+/// InsertBot.
+/// \param IterNumber The serial number of the iteration currently being
+/// peeled off.
+/// \param ExitEdges The exit edges of the original loop.
+/// \param[out] NewBlocks A list of the blocks in the newly created clone
+/// \param[out] VMap The value map between the loop and the new clone.
+/// \param LoopBlocks A helper for DFS-traversal of the loop.
+/// \param LVMap A value-map that maps instructions from the original loop to
+/// instructions in the last peeled-off iteration.
+static void cloneLoopBlocks(
+ Loop *L, unsigned IterNumber, BasicBlock *InsertTop, BasicBlock *InsertBot,
+ SmallVectorImpl<std::pair<BasicBlock *, BasicBlock *>> &ExitEdges,
+ SmallVectorImpl<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
+ ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT,
+ LoopInfo *LI, ArrayRef<MDNode *> LoopLocalNoAliasDeclScopes,
+ ScalarEvolution &SE) {
+ BasicBlock *Header = L->getHeader();
+ BasicBlock *Latch = L->getLoopLatch();
+ BasicBlock *PreHeader = L->getLoopPreheader();
+
+ Function *F = Header->getParent();
+ LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
+ LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
+ Loop *ParentLoop = L->getParentLoop();
+
+ // For each block in the original loop, create a new copy,
+ // and update the value map with the newly created values.
+ for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
+ BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".peel", F);
+ NewBlocks.push_back(NewBB);
+
+ // If an original block is an immediate child of the loop L, its copy
+ // is a child of a ParentLoop after peeling. If a block is a child of
+ // a nested loop, it is handled in the cloneLoop() call below.
+ if (ParentLoop && LI->getLoopFor(*BB) == L)
+ ParentLoop->addBasicBlockToLoop(NewBB, *LI);
+
+ VMap[*BB] = NewBB;
+
+ // If dominator tree is available, insert nodes to represent cloned blocks.
+ if (DT) {
+ if (Header == *BB)
+ DT->addNewBlock(NewBB, InsertTop);
+ else {
+ DomTreeNode *IDom = DT->getNode(*BB)->getIDom();
+ // VMap must contain entry for IDom, as the iteration order is RPO.
+ DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDom->getBlock()]));
+ }
+ }
+ }
+
+ {
+ // Identify what other metadata depends on the cloned version. After
+ // cloning, replace the metadata with the corrected version for both
+ // memory instructions and noalias intrinsics.
+ std::string Ext = (Twine("Peel") + Twine(IterNumber)).str();
+ cloneAndAdaptNoAliasScopes(LoopLocalNoAliasDeclScopes, NewBlocks,
+ Header->getContext(), Ext);
+ }
+
+ // Recursively create the new Loop objects for nested loops, if any,
+ // to preserve LoopInfo.
+ for (Loop *ChildLoop : *L) {
+ cloneLoop(ChildLoop, ParentLoop, VMap, LI, nullptr);
+ }
+
+ // Hook-up the control flow for the newly inserted blocks.
+ // The new header is hooked up directly to the "top", which is either
+ // the original loop preheader (for the first iteration) or the previous
+ // iteration's exiting block (for every other iteration)
+ InsertTop->getTerminator()->setSuccessor(0, cast<BasicBlock>(VMap[Header]));
+
+ // Similarly, for the latch:
+ // The original exiting edge is still hooked up to the loop exit.
+ // The backedge now goes to the "bottom", which is either the loop's real
+ // header (for the last peeled iteration) or the copied header of the next
+ // iteration (for every other iteration)
+ BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
+ auto *LatchTerm = cast<Instruction>(NewLatch->getTerminator());
+ for (unsigned idx = 0, e = LatchTerm->getNumSuccessors(); idx < e; ++idx)
+ if (LatchTerm->getSuccessor(idx) == Header) {
+ LatchTerm->setSuccessor(idx, InsertBot);
+ break;
+ }
+ if (DT)
+ DT->changeImmediateDominator(InsertBot, NewLatch);
+
+ // The new copy of the loop body starts with a bunch of PHI nodes
+ // that pick an incoming value from either the preheader, or the previous
+ // loop iteration. Since this copy is no longer part of the loop, we
+ // resolve this statically:
+ // For the first iteration, we use the value from the preheader directly.
+ // For any other iteration, we replace the phi with the value generated by
+ // the immediately preceding clone of the loop body (which represents
+ // the previous iteration).
+ for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
+ PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
+ if (IterNumber == 0) {
+ VMap[&*I] = NewPHI->getIncomingValueForBlock(PreHeader);
+ } else {
+ Value *LatchVal = NewPHI->getIncomingValueForBlock(Latch);
+ Instruction *LatchInst = dyn_cast<Instruction>(LatchVal);
+ if (LatchInst && L->contains(LatchInst))
+ VMap[&*I] = LVMap[LatchInst];
+ else
+ VMap[&*I] = LatchVal;
+ }
+ NewPHI->eraseFromParent();
+ }
+
+ // Fix up the outgoing values - we need to add a value for the iteration
+ // we've just created. Note that this must happen *after* the incoming
+ // values are adjusted, since the value going out of the latch may also be
+ // a value coming into the header.
+ for (auto Edge : ExitEdges)
+ for (PHINode &PHI : Edge.second->phis()) {
+ Value *LatchVal = PHI.getIncomingValueForBlock(Edge.first);
+ Instruction *LatchInst = dyn_cast<Instruction>(LatchVal);
+ if (LatchInst && L->contains(LatchInst))
+ LatchVal = VMap[LatchVal];
+ PHI.addIncoming(LatchVal, cast<BasicBlock>(VMap[Edge.first]));
+ SE.forgetValue(&PHI);
+ }
+
+ // LastValueMap is updated with the values for the current loop
+ // which are used the next time this function is called.
+ for (auto KV : VMap)
+ LVMap[KV.first] = KV.second;
+}
+
+TargetTransformInfo::PeelingPreferences
+llvm::gatherPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ const TargetTransformInfo &TTI,
+ std::optional<bool> UserAllowPeeling,
+ std::optional<bool> UserAllowProfileBasedPeeling,
+ bool UnrollingSpecficValues) {
+ TargetTransformInfo::PeelingPreferences PP;
+
+ // Set the default values.
+ PP.PeelCount = 0;
+ PP.AllowPeeling = true;
+ PP.AllowLoopNestsPeeling = false;
+ PP.PeelProfiledIterations = true;
+
+ // Get the target specifc values.
+ TTI.getPeelingPreferences(L, SE, PP);
+
+ // User specified values using cl::opt.
+ if (UnrollingSpecficValues) {
+ if (UnrollPeelCount.getNumOccurrences() > 0)
+ PP.PeelCount = UnrollPeelCount;
+ if (UnrollAllowPeeling.getNumOccurrences() > 0)
+ PP.AllowPeeling = UnrollAllowPeeling;
+ if (UnrollAllowLoopNestsPeeling.getNumOccurrences() > 0)
+ PP.AllowLoopNestsPeeling = UnrollAllowLoopNestsPeeling;
+ }
+
+ // User specifed values provided by argument.
+ if (UserAllowPeeling)
+ PP.AllowPeeling = *UserAllowPeeling;
+ if (UserAllowProfileBasedPeeling)
+ PP.PeelProfiledIterations = *UserAllowProfileBasedPeeling;
+
+ return PP;
+}
+
+/// Peel off the first \p PeelCount iterations of loop \p L.
+///
+/// Note that this does not peel them off as a single straight-line block.
+/// Rather, each iteration is peeled off separately, and needs to check the
+/// exit condition.
+/// For loops that dynamically execute \p PeelCount iterations or less
+/// this provides a benefit, since the peeled off iterations, which account
+/// for the bulk of dynamic execution, can be further simplified by scalar
+/// optimizations.
+bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
+ ScalarEvolution *SE, DominatorTree &DT, AssumptionCache *AC,
+ bool PreserveLCSSA, ValueToValueMapTy &LVMap) {
+ assert(PeelCount > 0 && "Attempt to peel out zero iterations?");
+ assert(canPeel(L) && "Attempt to peel a loop which is not peelable?");
+
+ LoopBlocksDFS LoopBlocks(L);
+ LoopBlocks.perform(LI);
+
+ BasicBlock *Header = L->getHeader();
+ BasicBlock *PreHeader = L->getLoopPreheader();
+ BasicBlock *Latch = L->getLoopLatch();
+ SmallVector<std::pair<BasicBlock *, BasicBlock *>, 4> ExitEdges;
+ L->getExitEdges(ExitEdges);
+
+ // Remember dominators of blocks we might reach through exits to change them
+ // later. Immediate dominator of such block might change, because we add more
+ // routes which can lead to the exit: we can reach it from the peeled
+ // iterations too.
+ DenseMap<BasicBlock *, BasicBlock *> NonLoopBlocksIDom;
+ for (auto *BB : L->blocks()) {
+ auto *BBDomNode = DT.getNode(BB);
+ SmallVector<BasicBlock *, 16> ChildrenToUpdate;
+ for (auto *ChildDomNode : BBDomNode->children()) {
+ auto *ChildBB = ChildDomNode->getBlock();
+ if (!L->contains(ChildBB))
+ ChildrenToUpdate.push_back(ChildBB);
+ }
+ // The new idom of the block will be the nearest common dominator
+ // of all copies of the previous idom. This is equivalent to the
+ // nearest common dominator of the previous idom and the first latch,
+ // which dominates all copies of the previous idom.
+ BasicBlock *NewIDom = DT.findNearestCommonDominator(BB, Latch);
+ for (auto *ChildBB : ChildrenToUpdate)
+ NonLoopBlocksIDom[ChildBB] = NewIDom;
+ }
+
+ Function *F = Header->getParent();
+
+ // Set up all the necessary basic blocks. It is convenient to split the
+ // preheader into 3 parts - two blocks to anchor the peeled copy of the loop
+ // body, and a new preheader for the "real" loop.
+
+ // Peeling the first iteration transforms.
+ //
+ // PreHeader:
+ // ...
+ // Header:
+ // LoopBody
+ // If (cond) goto Header
+ // Exit:
+ //
+ // into
+ //
+ // InsertTop:
+ // LoopBody
+ // If (!cond) goto Exit
+ // InsertBot:
+ // NewPreHeader:
+ // ...
+ // Header:
+ // LoopBody
+ // If (cond) goto Header
+ // Exit:
+ //
+ // Each following iteration will split the current bottom anchor in two,
+ // and put the new copy of the loop body between these two blocks. That is,
+ // after peeling another iteration from the example above, we'll split
+ // InsertBot, and get:
+ //
+ // InsertTop:
+ // LoopBody
+ // If (!cond) goto Exit
+ // InsertBot:
+ // LoopBody
+ // If (!cond) goto Exit
+ // InsertBot.next:
+ // NewPreHeader:
+ // ...
+ // Header:
+ // LoopBody
+ // If (cond) goto Header
+ // Exit:
+
+ BasicBlock *InsertTop = SplitEdge(PreHeader, Header, &DT, LI);
+ BasicBlock *InsertBot =
+ SplitBlock(InsertTop, InsertTop->getTerminator(), &DT, LI);
+ BasicBlock *NewPreHeader =
+ SplitBlock(InsertBot, InsertBot->getTerminator(), &DT, LI);
+
+ InsertTop->setName(Header->getName() + ".peel.begin");
+ InsertBot->setName(Header->getName() + ".peel.next");
+ NewPreHeader->setName(PreHeader->getName() + ".peel.newph");
+
+ Instruction *LatchTerm =
+ cast<Instruction>(cast<BasicBlock>(Latch)->getTerminator());
+
+ // If we have branch weight information, we'll want to update it for the
+ // newly created branches.
+ DenseMap<Instruction *, WeightInfo> Weights;
+ initBranchWeights(Weights, L);
+
+ // Identify what noalias metadata is inside the loop: if it is inside the
+ // loop, the associated metadata must be cloned for each iteration.
+ SmallVector<MDNode *, 6> LoopLocalNoAliasDeclScopes;
+ identifyNoAliasScopesToClone(L->getBlocks(), LoopLocalNoAliasDeclScopes);
+
+ // For each peeled-off iteration, make a copy of the loop.
+ for (unsigned Iter = 0; Iter < PeelCount; ++Iter) {
+ SmallVector<BasicBlock *, 8> NewBlocks;
+ ValueToValueMapTy VMap;
+
+ cloneLoopBlocks(L, Iter, InsertTop, InsertBot, ExitEdges, NewBlocks,
+ LoopBlocks, VMap, LVMap, &DT, LI,
+ LoopLocalNoAliasDeclScopes, *SE);
+
+ // Remap to use values from the current iteration instead of the
+ // previous one.
+ remapInstructionsInBlocks(NewBlocks, VMap);
+
+ // Update IDoms of the blocks reachable through exits.
+ if (Iter == 0)
+ for (auto BBIDom : NonLoopBlocksIDom)
+ DT.changeImmediateDominator(BBIDom.first,
+ cast<BasicBlock>(LVMap[BBIDom.second]));
+#ifdef EXPENSIVE_CHECKS
+ assert(DT.verify(DominatorTree::VerificationLevel::Fast));
+#endif
+
+ for (auto &[Term, Info] : Weights) {
+ auto *TermCopy = cast<Instruction>(VMap[Term]);
+ updateBranchWeights(TermCopy, Info);
+ }
+
+ // Remove Loop metadata from the latch branch instruction
+ // because it is not the Loop's latch branch anymore.
+ auto *LatchTermCopy = cast<Instruction>(VMap[LatchTerm]);
+ LatchTermCopy->setMetadata(LLVMContext::MD_loop, nullptr);
+
+ InsertTop = InsertBot;
+ InsertBot = SplitBlock(InsertBot, InsertBot->getTerminator(), &DT, LI);
+ InsertBot->setName(Header->getName() + ".peel.next");
+
+ F->splice(InsertTop->getIterator(), F, NewBlocks[0]->getIterator(),
+ F->end());
+ }
+
+ // Now adjust the phi nodes in the loop header to get their initial values
+ // from the last peeled-off iteration instead of the preheader.
+ for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PHI = cast<PHINode>(I);
+ Value *NewVal = PHI->getIncomingValueForBlock(Latch);
+ Instruction *LatchInst = dyn_cast<Instruction>(NewVal);
+ if (LatchInst && L->contains(LatchInst))
+ NewVal = LVMap[LatchInst];
+
+ PHI->setIncomingValueForBlock(NewPreHeader, NewVal);
+ }
+
+ for (const auto &[Term, Info] : Weights)
+ fixupBranchWeights(Term, Info);
+
+ // Update Metadata for count of peeled off iterations.
+ unsigned AlreadyPeeled = 0;
+ if (auto Peeled = getOptionalIntLoopAttribute(L, PeeledCountMetaData))
+ AlreadyPeeled = *Peeled;
+ addStringMetadataToLoop(L, PeeledCountMetaData, AlreadyPeeled + PeelCount);
+
+ if (Loop *ParentLoop = L->getParentLoop())
+ L = ParentLoop;
+
+ // We modified the loop, update SE.
+ SE->forgetTopmostLoop(L);
+
+#ifdef EXPENSIVE_CHECKS
+ // Finally DomtTree must be correct.
+ assert(DT.verify(DominatorTree::VerificationLevel::Fast));
+#endif
+
+ // FIXME: Incrementally update loop-simplify
+ simplifyLoop(L, &DT, LI, SE, AC, nullptr, PreserveLCSSA);
+
+ NumPeeled++;
+
+ return true;
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/LoopRotationUtils.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/LoopRotationUtils.cpp
new file mode 100644
index 0000000000..1a9eaf2421
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/LoopRotationUtils.cpp
@@ -0,0 +1,845 @@
+//===----------------- LoopRotationUtils.cpp -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides utilities to convert a loop into a loop with bottom test.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/LoopRotationUtils.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "loop-rotate"
+
+STATISTIC(NumNotRotatedDueToHeaderSize,
+ "Number of loops not rotated due to the header size");
+STATISTIC(NumInstrsHoisted,
+ "Number of instructions hoisted into loop preheader");
+STATISTIC(NumInstrsDuplicated,
+ "Number of instructions cloned into loop preheader");
+STATISTIC(NumRotated, "Number of loops rotated");
+
+static cl::opt<bool>
+ MultiRotate("loop-rotate-multi", cl::init(false), cl::Hidden,
+ cl::desc("Allow loop rotation multiple times in order to reach "
+ "a better latch exit"));
+
+namespace {
+/// A simple loop rotation transformation.
+class LoopRotate {
+ const unsigned MaxHeaderSize;
+ LoopInfo *LI;
+ const TargetTransformInfo *TTI;
+ AssumptionCache *AC;
+ DominatorTree *DT;
+ ScalarEvolution *SE;
+ MemorySSAUpdater *MSSAU;
+ const SimplifyQuery &SQ;
+ bool RotationOnly;
+ bool IsUtilMode;
+ bool PrepareForLTO;
+
+public:
+ LoopRotate(unsigned MaxHeaderSize, LoopInfo *LI,
+ const TargetTransformInfo *TTI, AssumptionCache *AC,
+ DominatorTree *DT, ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
+ const SimplifyQuery &SQ, bool RotationOnly, bool IsUtilMode,
+ bool PrepareForLTO)
+ : MaxHeaderSize(MaxHeaderSize), LI(LI), TTI(TTI), AC(AC), DT(DT), SE(SE),
+ MSSAU(MSSAU), SQ(SQ), RotationOnly(RotationOnly),
+ IsUtilMode(IsUtilMode), PrepareForLTO(PrepareForLTO) {}
+ bool processLoop(Loop *L);
+
+private:
+ bool rotateLoop(Loop *L, bool SimplifiedLatch);
+ bool simplifyLoopLatch(Loop *L);
+};
+} // end anonymous namespace
+
+/// Insert (K, V) pair into the ValueToValueMap, and verify the key did not
+/// previously exist in the map, and the value was inserted.
+static void InsertNewValueIntoMap(ValueToValueMapTy &VM, Value *K, Value *V) {
+ bool Inserted = VM.insert({K, V}).second;
+ assert(Inserted);
+ (void)Inserted;
+}
+/// RewriteUsesOfClonedInstructions - We just cloned the instructions from the
+/// old header into the preheader. If there were uses of the values produced by
+/// these instruction that were outside of the loop, we have to insert PHI nodes
+/// to merge the two values. Do this now.
+static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader,
+ BasicBlock *OrigPreheader,
+ ValueToValueMapTy &ValueMap,
+ ScalarEvolution *SE,
+ SmallVectorImpl<PHINode*> *InsertedPHIs) {
+ // Remove PHI node entries that are no longer live.
+ BasicBlock::iterator I, E = OrigHeader->end();
+ for (I = OrigHeader->begin(); PHINode *PN = dyn_cast<PHINode>(I); ++I)
+ PN->removeIncomingValue(PN->getBasicBlockIndex(OrigPreheader));
+
+ // Now fix up users of the instructions in OrigHeader, inserting PHI nodes
+ // as necessary.
+ SSAUpdater SSA(InsertedPHIs);
+ for (I = OrigHeader->begin(); I != E; ++I) {
+ Value *OrigHeaderVal = &*I;
+
+ // If there are no uses of the value (e.g. because it returns void), there
+ // is nothing to rewrite.
+ if (OrigHeaderVal->use_empty())
+ continue;
+
+ Value *OrigPreHeaderVal = ValueMap.lookup(OrigHeaderVal);
+
+ // The value now exits in two versions: the initial value in the preheader
+ // and the loop "next" value in the original header.
+ SSA.Initialize(OrigHeaderVal->getType(), OrigHeaderVal->getName());
+ // Force re-computation of OrigHeaderVal, as some users now need to use the
+ // new PHI node.
+ if (SE)
+ SE->forgetValue(OrigHeaderVal);
+ SSA.AddAvailableValue(OrigHeader, OrigHeaderVal);
+ SSA.AddAvailableValue(OrigPreheader, OrigPreHeaderVal);
+
+ // Visit each use of the OrigHeader instruction.
+ for (Use &U : llvm::make_early_inc_range(OrigHeaderVal->uses())) {
+ // SSAUpdater can't handle a non-PHI use in the same block as an
+ // earlier def. We can easily handle those cases manually.
+ Instruction *UserInst = cast<Instruction>(U.getUser());
+ if (!isa<PHINode>(UserInst)) {
+ BasicBlock *UserBB = UserInst->getParent();
+
+ // The original users in the OrigHeader are already using the
+ // original definitions.
+ if (UserBB == OrigHeader)
+ continue;
+
+ // Users in the OrigPreHeader need to use the value to which the
+ // original definitions are mapped.
+ if (UserBB == OrigPreheader) {
+ U = OrigPreHeaderVal;
+ continue;
+ }
+ }
+
+ // Anything else can be handled by SSAUpdater.
+ SSA.RewriteUse(U);
+ }
+
+ // Replace MetadataAsValue(ValueAsMetadata(OrigHeaderVal)) uses in debug
+ // intrinsics.
+ SmallVector<DbgValueInst *, 1> DbgValues;
+ llvm::findDbgValues(DbgValues, OrigHeaderVal);
+ for (auto &DbgValue : DbgValues) {
+ // The original users in the OrigHeader are already using the original
+ // definitions.
+ BasicBlock *UserBB = DbgValue->getParent();
+ if (UserBB == OrigHeader)
+ continue;
+
+ // Users in the OrigPreHeader need to use the value to which the
+ // original definitions are mapped and anything else can be handled by
+ // the SSAUpdater. To avoid adding PHINodes, check if the value is
+ // available in UserBB, if not substitute undef.
+ Value *NewVal;
+ if (UserBB == OrigPreheader)
+ NewVal = OrigPreHeaderVal;
+ else if (SSA.HasValueForBlock(UserBB))
+ NewVal = SSA.GetValueInMiddleOfBlock(UserBB);
+ else
+ NewVal = UndefValue::get(OrigHeaderVal->getType());
+ DbgValue->replaceVariableLocationOp(OrigHeaderVal, NewVal);
+ }
+ }
+}
+
+// Assuming both header and latch are exiting, look for a phi which is only
+// used outside the loop (via a LCSSA phi) in the exit from the header.
+// This means that rotating the loop can remove the phi.
+static bool profitableToRotateLoopExitingLatch(Loop *L) {
+ BasicBlock *Header = L->getHeader();
+ BranchInst *BI = dyn_cast<BranchInst>(Header->getTerminator());
+ assert(BI && BI->isConditional() && "need header with conditional exit");
+ BasicBlock *HeaderExit = BI->getSuccessor(0);
+ if (L->contains(HeaderExit))
+ HeaderExit = BI->getSuccessor(1);
+
+ for (auto &Phi : Header->phis()) {
+ // Look for uses of this phi in the loop/via exits other than the header.
+ if (llvm::any_of(Phi.users(), [HeaderExit](const User *U) {
+ return cast<Instruction>(U)->getParent() != HeaderExit;
+ }))
+ continue;
+ return true;
+ }
+ return false;
+}
+
+// Check that latch exit is deoptimizing (which means - very unlikely to happen)
+// and there is another exit from the loop which is non-deoptimizing.
+// If we rotate latch to that exit our loop has a better chance of being fully
+// canonical.
+//
+// It can give false positives in some rare cases.
+static bool canRotateDeoptimizingLatchExit(Loop *L) {
+ BasicBlock *Latch = L->getLoopLatch();
+ assert(Latch && "need latch");
+ BranchInst *BI = dyn_cast<BranchInst>(Latch->getTerminator());
+ // Need normal exiting latch.
+ if (!BI || !BI->isConditional())
+ return false;
+
+ BasicBlock *Exit = BI->getSuccessor(1);
+ if (L->contains(Exit))
+ Exit = BI->getSuccessor(0);
+
+ // Latch exit is non-deoptimizing, no need to rotate.
+ if (!Exit->getPostdominatingDeoptimizeCall())
+ return false;
+
+ SmallVector<BasicBlock *, 4> Exits;
+ L->getUniqueExitBlocks(Exits);
+ if (!Exits.empty()) {
+ // There is at least one non-deoptimizing exit.
+ //
+ // Note, that BasicBlock::getPostdominatingDeoptimizeCall is not exact,
+ // as it can conservatively return false for deoptimizing exits with
+ // complex enough control flow down to deoptimize call.
+ //
+ // That means here we can report success for a case where
+ // all exits are deoptimizing but one of them has complex enough
+ // control flow (e.g. with loops).
+ //
+ // That should be a very rare case and false positives for this function
+ // have compile-time effect only.
+ return any_of(Exits, [](const BasicBlock *BB) {
+ return !BB->getPostdominatingDeoptimizeCall();
+ });
+ }
+ return false;
+}
+
+/// Rotate loop LP. Return true if the loop is rotated.
+///
+/// \param SimplifiedLatch is true if the latch was just folded into the final
+/// loop exit. In this case we may want to rotate even though the new latch is
+/// now an exiting branch. This rotation would have happened had the latch not
+/// been simplified. However, if SimplifiedLatch is false, then we avoid
+/// rotating loops in which the latch exits to avoid excessive or endless
+/// rotation. LoopRotate should be repeatable and converge to a canonical
+/// form. This property is satisfied because simplifying the loop latch can only
+/// happen once across multiple invocations of the LoopRotate pass.
+///
+/// If -loop-rotate-multi is enabled we can do multiple rotations in one go
+/// so to reach a suitable (non-deoptimizing) exit.
+bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
+ // If the loop has only one block then there is not much to rotate.
+ if (L->getBlocks().size() == 1)
+ return false;
+
+ bool Rotated = false;
+ do {
+ BasicBlock *OrigHeader = L->getHeader();
+ BasicBlock *OrigLatch = L->getLoopLatch();
+
+ BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator());
+ if (!BI || BI->isUnconditional())
+ return Rotated;
+
+ // If the loop header is not one of the loop exiting blocks then
+ // either this loop is already rotated or it is not
+ // suitable for loop rotation transformations.
+ if (!L->isLoopExiting(OrigHeader))
+ return Rotated;
+
+ // If the loop latch already contains a branch that leaves the loop then the
+ // loop is already rotated.
+ if (!OrigLatch)
+ return Rotated;
+
+ // Rotate if either the loop latch does *not* exit the loop, or if the loop
+ // latch was just simplified. Or if we think it will be profitable.
+ if (L->isLoopExiting(OrigLatch) && !SimplifiedLatch && IsUtilMode == false &&
+ !profitableToRotateLoopExitingLatch(L) &&
+ !canRotateDeoptimizingLatchExit(L))
+ return Rotated;
+
+ // Check size of original header and reject loop if it is very big or we can't
+ // duplicate blocks inside it.
+ {
+ SmallPtrSet<const Value *, 32> EphValues;
+ CodeMetrics::collectEphemeralValues(L, AC, EphValues);
+
+ CodeMetrics Metrics;
+ Metrics.analyzeBasicBlock(OrigHeader, *TTI, EphValues, PrepareForLTO);
+ if (Metrics.notDuplicatable) {
+ LLVM_DEBUG(
+ dbgs() << "LoopRotation: NOT rotating - contains non-duplicatable"
+ << " instructions: ";
+ L->dump());
+ return Rotated;
+ }
+ if (Metrics.convergent) {
+ LLVM_DEBUG(dbgs() << "LoopRotation: NOT rotating - contains convergent "
+ "instructions: ";
+ L->dump());
+ return Rotated;
+ }
+ if (!Metrics.NumInsts.isValid()) {
+ LLVM_DEBUG(dbgs() << "LoopRotation: NOT rotating - contains instructions"
+ " with invalid cost: ";
+ L->dump());
+ return Rotated;
+ }
+ if (Metrics.NumInsts > MaxHeaderSize) {
+ LLVM_DEBUG(dbgs() << "LoopRotation: NOT rotating - contains "
+ << Metrics.NumInsts
+ << " instructions, which is more than the threshold ("
+ << MaxHeaderSize << " instructions): ";
+ L->dump());
+ ++NumNotRotatedDueToHeaderSize;
+ return Rotated;
+ }
+
+ // When preparing for LTO, avoid rotating loops with calls that could be
+ // inlined during the LTO stage.
+ if (PrepareForLTO && Metrics.NumInlineCandidates > 0)
+ return Rotated;
+ }
+
+ // Now, this loop is suitable for rotation.
+ BasicBlock *OrigPreheader = L->getLoopPreheader();
+
+ // If the loop could not be converted to canonical form, it must have an
+ // indirectbr in it, just give up.
+ if (!OrigPreheader || !L->hasDedicatedExits())
+ return Rotated;
+
+ // Anything ScalarEvolution may know about this loop or the PHI nodes
+ // in its header will soon be invalidated. We should also invalidate
+ // all outer loops because insertion and deletion of blocks that happens
+ // during the rotation may violate invariants related to backedge taken
+ // infos in them.
+ if (SE) {
+ SE->forgetTopmostLoop(L);
+ // We may hoist some instructions out of loop. In case if they were cached
+ // as "loop variant" or "loop computable", these caches must be dropped.
+ // We also may fold basic blocks, so cached block dispositions also need
+ // to be dropped.
+ SE->forgetBlockAndLoopDispositions();
+ }
+
+ LLVM_DEBUG(dbgs() << "LoopRotation: rotating "; L->dump());
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
+
+ // Find new Loop header. NewHeader is a Header's one and only successor
+ // that is inside loop. Header's other successor is outside the
+ // loop. Otherwise loop is not suitable for rotation.
+ BasicBlock *Exit = BI->getSuccessor(0);
+ BasicBlock *NewHeader = BI->getSuccessor(1);
+ if (L->contains(Exit))
+ std::swap(Exit, NewHeader);
+ assert(NewHeader && "Unable to determine new loop header");
+ assert(L->contains(NewHeader) && !L->contains(Exit) &&
+ "Unable to determine loop header and exit blocks");
+
+ // This code assumes that the new header has exactly one predecessor.
+ // Remove any single-entry PHI nodes in it.
+ assert(NewHeader->getSinglePredecessor() &&
+ "New header doesn't have one pred!");
+ FoldSingleEntryPHINodes(NewHeader);
+
+ // Begin by walking OrigHeader and populating ValueMap with an entry for
+ // each Instruction.
+ BasicBlock::iterator I = OrigHeader->begin(), E = OrigHeader->end();
+ ValueToValueMapTy ValueMap, ValueMapMSSA;
+
+ // For PHI nodes, the value available in OldPreHeader is just the
+ // incoming value from OldPreHeader.
+ for (; PHINode *PN = dyn_cast<PHINode>(I); ++I)
+ InsertNewValueIntoMap(ValueMap, PN,
+ PN->getIncomingValueForBlock(OrigPreheader));
+
+ // For the rest of the instructions, either hoist to the OrigPreheader if
+ // possible or create a clone in the OldPreHeader if not.
+ Instruction *LoopEntryBranch = OrigPreheader->getTerminator();
+
+ // Record all debug intrinsics preceding LoopEntryBranch to avoid
+ // duplication.
+ using DbgIntrinsicHash =
+ std::pair<std::pair<hash_code, DILocalVariable *>, DIExpression *>;
+ auto makeHash = [](DbgVariableIntrinsic *D) -> DbgIntrinsicHash {
+ auto VarLocOps = D->location_ops();
+ return {{hash_combine_range(VarLocOps.begin(), VarLocOps.end()),
+ D->getVariable()},
+ D->getExpression()};
+ };
+ SmallDenseSet<DbgIntrinsicHash, 8> DbgIntrinsics;
+ for (Instruction &I : llvm::drop_begin(llvm::reverse(*OrigPreheader))) {
+ if (auto *DII = dyn_cast<DbgVariableIntrinsic>(&I))
+ DbgIntrinsics.insert(makeHash(DII));
+ else
+ break;
+ }
+
+ // Remember the local noalias scope declarations in the header. After the
+ // rotation, they must be duplicated and the scope must be cloned. This
+ // avoids unwanted interaction across iterations.
+ SmallVector<NoAliasScopeDeclInst *, 6> NoAliasDeclInstructions;
+ for (Instruction &I : *OrigHeader)
+ if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I))
+ NoAliasDeclInstructions.push_back(Decl);
+
+ while (I != E) {
+ Instruction *Inst = &*I++;
+
+ // If the instruction's operands are invariant and it doesn't read or write
+ // memory, then it is safe to hoist. Doing this doesn't change the order of
+ // execution in the preheader, but does prevent the instruction from
+ // executing in each iteration of the loop. This means it is safe to hoist
+ // something that might trap, but isn't safe to hoist something that reads
+ // memory (without proving that the loop doesn't write).
+ if (L->hasLoopInvariantOperands(Inst) && !Inst->mayReadFromMemory() &&
+ !Inst->mayWriteToMemory() && !Inst->isTerminator() &&
+ !isa<DbgInfoIntrinsic>(Inst) && !isa<AllocaInst>(Inst)) {
+ Inst->moveBefore(LoopEntryBranch);
+ ++NumInstrsHoisted;
+ continue;
+ }
+
+ // Otherwise, create a duplicate of the instruction.
+ Instruction *C = Inst->clone();
+ ++NumInstrsDuplicated;
+
+ // Eagerly remap the operands of the instruction.
+ RemapInstruction(C, ValueMap,
+ RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
+
+ // Avoid inserting the same intrinsic twice.
+ if (auto *DII = dyn_cast<DbgVariableIntrinsic>(C))
+ if (DbgIntrinsics.count(makeHash(DII))) {
+ C->deleteValue();
+ continue;
+ }
+
+ // With the operands remapped, see if the instruction constant folds or is
+ // otherwise simplifyable. This commonly occurs because the entry from PHI
+ // nodes allows icmps and other instructions to fold.
+ Value *V = simplifyInstruction(C, SQ);
+ if (V && LI->replacementPreservesLCSSAForm(C, V)) {
+ // If so, then delete the temporary instruction and stick the folded value
+ // in the map.
+ InsertNewValueIntoMap(ValueMap, Inst, V);
+ if (!C->mayHaveSideEffects()) {
+ C->deleteValue();
+ C = nullptr;
+ }
+ } else {
+ InsertNewValueIntoMap(ValueMap, Inst, C);
+ }
+ if (C) {
+ // Otherwise, stick the new instruction into the new block!
+ C->setName(Inst->getName());
+ C->insertBefore(LoopEntryBranch);
+
+ if (auto *II = dyn_cast<AssumeInst>(C))
+ AC->registerAssumption(II);
+ // MemorySSA cares whether the cloned instruction was inserted or not, and
+ // not whether it can be remapped to a simplified value.
+ if (MSSAU)
+ InsertNewValueIntoMap(ValueMapMSSA, Inst, C);
+ }
+ }
+
+ if (!NoAliasDeclInstructions.empty()) {
+ // There are noalias scope declarations:
+ // (general):
+ // Original: OrigPre { OrigHeader NewHeader ... Latch }
+ // after: (OrigPre+OrigHeader') { NewHeader ... Latch OrigHeader }
+ //
+ // with D: llvm.experimental.noalias.scope.decl,
+ // U: !noalias or !alias.scope depending on D
+ // ... { D U1 U2 } can transform into:
+ // (0) : ... { D U1 U2 } // no relevant rotation for this part
+ // (1) : ... D' { U1 U2 D } // D is part of OrigHeader
+ // (2) : ... D' U1' { U2 D U1 } // D, U1 are part of OrigHeader
+ //
+ // We now want to transform:
+ // (1) -> : ... D' { D U1 U2 D'' }
+ // (2) -> : ... D' U1' { D U2 D'' U1'' }
+ // D: original llvm.experimental.noalias.scope.decl
+ // D', U1': duplicate with replaced scopes
+ // D'', U1'': different duplicate with replaced scopes
+ // This ensures a safe fallback to 'may_alias' introduced by the rotate,
+ // as U1'' and U1' scopes will not be compatible wrt to the local restrict
+
+ // Clone the llvm.experimental.noalias.decl again for the NewHeader.
+ Instruction *NewHeaderInsertionPoint = &(*NewHeader->getFirstNonPHI());
+ for (NoAliasScopeDeclInst *NAD : NoAliasDeclInstructions) {
+ LLVM_DEBUG(dbgs() << " Cloning llvm.experimental.noalias.scope.decl:"
+ << *NAD << "\n");
+ Instruction *NewNAD = NAD->clone();
+ NewNAD->insertBefore(NewHeaderInsertionPoint);
+ }
+
+ // Scopes must now be duplicated, once for OrigHeader and once for
+ // OrigPreHeader'.
+ {
+ auto &Context = NewHeader->getContext();
+
+ SmallVector<MDNode *, 8> NoAliasDeclScopes;
+ for (NoAliasScopeDeclInst *NAD : NoAliasDeclInstructions)
+ NoAliasDeclScopes.push_back(NAD->getScopeList());
+
+ LLVM_DEBUG(dbgs() << " Updating OrigHeader scopes\n");
+ cloneAndAdaptNoAliasScopes(NoAliasDeclScopes, {OrigHeader}, Context,
+ "h.rot");
+ LLVM_DEBUG(OrigHeader->dump());
+
+ // Keep the compile time impact low by only adapting the inserted block
+ // of instructions in the OrigPreHeader. This might result in slightly
+ // more aliasing between these instructions and those that were already
+ // present, but it will be much faster when the original PreHeader is
+ // large.
+ LLVM_DEBUG(dbgs() << " Updating part of OrigPreheader scopes\n");
+ auto *FirstDecl =
+ cast<Instruction>(ValueMap[*NoAliasDeclInstructions.begin()]);
+ auto *LastInst = &OrigPreheader->back();
+ cloneAndAdaptNoAliasScopes(NoAliasDeclScopes, FirstDecl, LastInst,
+ Context, "pre.rot");
+ LLVM_DEBUG(OrigPreheader->dump());
+
+ LLVM_DEBUG(dbgs() << " Updated NewHeader:\n");
+ LLVM_DEBUG(NewHeader->dump());
+ }
+ }
+
+ // Along with all the other instructions, we just cloned OrigHeader's
+ // terminator into OrigPreHeader. Fix up the PHI nodes in each of OrigHeader's
+ // successors by duplicating their incoming values for OrigHeader.
+ for (BasicBlock *SuccBB : successors(OrigHeader))
+ for (BasicBlock::iterator BI = SuccBB->begin();
+ PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
+ PN->addIncoming(PN->getIncomingValueForBlock(OrigHeader), OrigPreheader);
+
+ // Now that OrigPreHeader has a clone of OrigHeader's terminator, remove
+ // OrigPreHeader's old terminator (the original branch into the loop), and
+ // remove the corresponding incoming values from the PHI nodes in OrigHeader.
+ LoopEntryBranch->eraseFromParent();
+
+ // Update MemorySSA before the rewrite call below changes the 1:1
+ // instruction:cloned_instruction_or_value mapping.
+ if (MSSAU) {
+ InsertNewValueIntoMap(ValueMapMSSA, OrigHeader, OrigPreheader);
+ MSSAU->updateForClonedBlockIntoPred(OrigHeader, OrigPreheader,
+ ValueMapMSSA);
+ }
+
+ SmallVector<PHINode*, 2> InsertedPHIs;
+ // If there were any uses of instructions in the duplicated block outside the
+ // loop, update them, inserting PHI nodes as required
+ RewriteUsesOfClonedInstructions(OrigHeader, OrigPreheader, ValueMap, SE,
+ &InsertedPHIs);
+
+ // Attach dbg.value intrinsics to the new phis if that phi uses a value that
+ // previously had debug metadata attached. This keeps the debug info
+ // up-to-date in the loop body.
+ if (!InsertedPHIs.empty())
+ insertDebugValuesForPHIs(OrigHeader, InsertedPHIs);
+
+ // NewHeader is now the header of the loop.
+ L->moveToHeader(NewHeader);
+ assert(L->getHeader() == NewHeader && "Latch block is our new header");
+
+ // Inform DT about changes to the CFG.
+ if (DT) {
+ // The OrigPreheader branches to the NewHeader and Exit now. Then, inform
+ // the DT about the removed edge to the OrigHeader (that got removed).
+ SmallVector<DominatorTree::UpdateType, 3> Updates;
+ Updates.push_back({DominatorTree::Insert, OrigPreheader, Exit});
+ Updates.push_back({DominatorTree::Insert, OrigPreheader, NewHeader});
+ Updates.push_back({DominatorTree::Delete, OrigPreheader, OrigHeader});
+
+ if (MSSAU) {
+ MSSAU->applyUpdates(Updates, *DT, /*UpdateDT=*/true);
+ if (VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
+ } else {
+ DT->applyUpdates(Updates);
+ }
+ }
+
+ // At this point, we've finished our major CFG changes. As part of cloning
+ // the loop into the preheader we've simplified instructions and the
+ // duplicated conditional branch may now be branching on a constant. If it is
+ // branching on a constant and if that constant means that we enter the loop,
+ // then we fold away the cond branch to an uncond branch. This simplifies the
+ // loop in cases important for nested loops, and it also means we don't have
+ // to split as many edges.
+ BranchInst *PHBI = cast<BranchInst>(OrigPreheader->getTerminator());
+ assert(PHBI->isConditional() && "Should be clone of BI condbr!");
+ if (!isa<ConstantInt>(PHBI->getCondition()) ||
+ PHBI->getSuccessor(cast<ConstantInt>(PHBI->getCondition())->isZero()) !=
+ NewHeader) {
+ // The conditional branch can't be folded, handle the general case.
+ // Split edges as necessary to preserve LoopSimplify form.
+
+ // Right now OrigPreHeader has two successors, NewHeader and ExitBlock, and
+ // thus is not a preheader anymore.
+ // Split the edge to form a real preheader.
+ BasicBlock *NewPH = SplitCriticalEdge(
+ OrigPreheader, NewHeader,
+ CriticalEdgeSplittingOptions(DT, LI, MSSAU).setPreserveLCSSA());
+ NewPH->setName(NewHeader->getName() + ".lr.ph");
+
+ // Preserve canonical loop form, which means that 'Exit' should have only
+ // one predecessor. Note that Exit could be an exit block for multiple
+ // nested loops, causing both of the edges to now be critical and need to
+ // be split.
+ SmallVector<BasicBlock *, 4> ExitPreds(predecessors(Exit));
+ bool SplitLatchEdge = false;
+ for (BasicBlock *ExitPred : ExitPreds) {
+ // We only need to split loop exit edges.
+ Loop *PredLoop = LI->getLoopFor(ExitPred);
+ if (!PredLoop || PredLoop->contains(Exit) ||
+ isa<IndirectBrInst>(ExitPred->getTerminator()))
+ continue;
+ SplitLatchEdge |= L->getLoopLatch() == ExitPred;
+ BasicBlock *ExitSplit = SplitCriticalEdge(
+ ExitPred, Exit,
+ CriticalEdgeSplittingOptions(DT, LI, MSSAU).setPreserveLCSSA());
+ ExitSplit->moveBefore(Exit);
+ }
+ assert(SplitLatchEdge &&
+ "Despite splitting all preds, failed to split latch exit?");
+ (void)SplitLatchEdge;
+ } else {
+ // We can fold the conditional branch in the preheader, this makes things
+ // simpler. The first step is to remove the extra edge to the Exit block.
+ Exit->removePredecessor(OrigPreheader, true /*preserve LCSSA*/);
+ BranchInst *NewBI = BranchInst::Create(NewHeader, PHBI);
+ NewBI->setDebugLoc(PHBI->getDebugLoc());
+ PHBI->eraseFromParent();
+
+ // With our CFG finalized, update DomTree if it is available.
+ if (DT) DT->deleteEdge(OrigPreheader, Exit);
+
+ // Update MSSA too, if available.
+ if (MSSAU)
+ MSSAU->removeEdge(OrigPreheader, Exit);
+ }
+
+ assert(L->getLoopPreheader() && "Invalid loop preheader after loop rotation");
+ assert(L->getLoopLatch() && "Invalid loop latch after loop rotation");
+
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
+
+ // Now that the CFG and DomTree are in a consistent state again, try to merge
+ // the OrigHeader block into OrigLatch. This will succeed if they are
+ // connected by an unconditional branch. This is just a cleanup so the
+ // emitted code isn't too gross in this common case.
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
+ BasicBlock *PredBB = OrigHeader->getUniquePredecessor();
+ bool DidMerge = MergeBlockIntoPredecessor(OrigHeader, &DTU, LI, MSSAU);
+ if (DidMerge)
+ RemoveRedundantDbgInstrs(PredBB);
+
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
+
+ LLVM_DEBUG(dbgs() << "LoopRotation: into "; L->dump());
+
+ ++NumRotated;
+
+ Rotated = true;
+ SimplifiedLatch = false;
+
+ // Check that new latch is a deoptimizing exit and then repeat rotation if possible.
+ // Deoptimizing latch exit is not a generally typical case, so we just loop over.
+ // TODO: if it becomes a performance bottleneck extend rotation algorithm
+ // to handle multiple rotations in one go.
+ } while (MultiRotate && canRotateDeoptimizingLatchExit(L));
+
+
+ return true;
+}
+
+/// Determine whether the instructions in this range may be safely and cheaply
+/// speculated. This is not an important enough situation to develop complex
+/// heuristics. We handle a single arithmetic instruction along with any type
+/// conversions.
+static bool shouldSpeculateInstrs(BasicBlock::iterator Begin,
+ BasicBlock::iterator End, Loop *L) {
+ bool seenIncrement = false;
+ bool MultiExitLoop = false;
+
+ if (!L->getExitingBlock())
+ MultiExitLoop = true;
+
+ for (BasicBlock::iterator I = Begin; I != End; ++I) {
+
+ if (!isSafeToSpeculativelyExecute(&*I))
+ return false;
+
+ if (isa<DbgInfoIntrinsic>(I))
+ continue;
+
+ switch (I->getOpcode()) {
+ default:
+ return false;
+ case Instruction::GetElementPtr:
+ // GEPs are cheap if all indices are constant.
+ if (!cast<GEPOperator>(I)->hasAllConstantIndices())
+ return false;
+ // fall-thru to increment case
+ [[fallthrough]];
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr: {
+ Value *IVOpnd =
+ !isa<Constant>(I->getOperand(0))
+ ? I->getOperand(0)
+ : !isa<Constant>(I->getOperand(1)) ? I->getOperand(1) : nullptr;
+ if (!IVOpnd)
+ return false;
+
+ // If increment operand is used outside of the loop, this speculation
+ // could cause extra live range interference.
+ if (MultiExitLoop) {
+ for (User *UseI : IVOpnd->users()) {
+ auto *UserInst = cast<Instruction>(UseI);
+ if (!L->contains(UserInst))
+ return false;
+ }
+ }
+
+ if (seenIncrement)
+ return false;
+ seenIncrement = true;
+ break;
+ }
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ // ignore type conversions
+ break;
+ }
+ }
+ return true;
+}
+
+/// Fold the loop tail into the loop exit by speculating the loop tail
+/// instructions. Typically, this is a single post-increment. In the case of a
+/// simple 2-block loop, hoisting the increment can be much better than
+/// duplicating the entire loop header. In the case of loops with early exits,
+/// rotation will not work anyway, but simplifyLoopLatch will put the loop in
+/// canonical form so downstream passes can handle it.
+///
+/// I don't believe this invalidates SCEV.
+bool LoopRotate::simplifyLoopLatch(Loop *L) {
+ BasicBlock *Latch = L->getLoopLatch();
+ if (!Latch || Latch->hasAddressTaken())
+ return false;
+
+ BranchInst *Jmp = dyn_cast<BranchInst>(Latch->getTerminator());
+ if (!Jmp || !Jmp->isUnconditional())
+ return false;
+
+ BasicBlock *LastExit = Latch->getSinglePredecessor();
+ if (!LastExit || !L->isLoopExiting(LastExit))
+ return false;
+
+ BranchInst *BI = dyn_cast<BranchInst>(LastExit->getTerminator());
+ if (!BI)
+ return false;
+
+ if (!shouldSpeculateInstrs(Latch->begin(), Jmp->getIterator(), L))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "Folding loop latch " << Latch->getName() << " into "
+ << LastExit->getName() << "\n");
+
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
+ MergeBlockIntoPredecessor(Latch, &DTU, LI, MSSAU, nullptr,
+ /*PredecessorWithTwoSuccessors=*/true);
+
+ if (SE) {
+ // Merging blocks may remove blocks reference in the block disposition cache. Clear the cache.
+ SE->forgetBlockAndLoopDispositions();
+ }
+
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
+
+ return true;
+}
+
+/// Rotate \c L, and return true if any modification was made.
+bool LoopRotate::processLoop(Loop *L) {
+ // Save the loop metadata.
+ MDNode *LoopMD = L->getLoopID();
+
+ bool SimplifiedLatch = false;
+
+ // Simplify the loop latch before attempting to rotate the header
+ // upward. Rotation may not be needed if the loop tail can be folded into the
+ // loop exit.
+ if (!RotationOnly)
+ SimplifiedLatch = simplifyLoopLatch(L);
+
+ bool MadeChange = rotateLoop(L, SimplifiedLatch);
+ assert((!MadeChange || L->isLoopExiting(L->getLoopLatch())) &&
+ "Loop latch should be exiting after loop-rotate.");
+
+ // Restore the loop metadata.
+ // NB! We presume LoopRotation DOESN'T ADD its own metadata.
+ if ((MadeChange || SimplifiedLatch) && LoopMD)
+ L->setLoopID(LoopMD);
+
+ return MadeChange || SimplifiedLatch;
+}
+
+
+/// The utility to convert a loop into a loop with bottom test.
+bool llvm::LoopRotation(Loop *L, LoopInfo *LI, const TargetTransformInfo *TTI,
+ AssumptionCache *AC, DominatorTree *DT,
+ ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
+ const SimplifyQuery &SQ, bool RotationOnly = true,
+ unsigned Threshold = unsigned(-1),
+ bool IsUtilMode = true, bool PrepareForLTO) {
+ LoopRotate LR(Threshold, LI, TTI, AC, DT, SE, MSSAU, SQ, RotationOnly,
+ IsUtilMode, PrepareForLTO);
+ return LR.processLoop(L);
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/LoopSimplify.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/LoopSimplify.cpp
new file mode 100644
index 0000000000..87a0e54e27
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/LoopSimplify.cpp
@@ -0,0 +1,921 @@
+//===- LoopSimplify.cpp - Loop Canonicalization Pass ----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs several transformations to transform natural loops into a
+// simpler form, which makes subsequent analyses and transformations simpler and
+// more effective.
+//
+// Loop pre-header insertion guarantees that there is a single, non-critical
+// entry edge from outside of the loop to the loop header. This simplifies a
+// number of analyses and transformations, such as LICM.
+//
+// Loop exit-block insertion guarantees that all exit blocks from the loop
+// (blocks which are outside of the loop that have predecessors inside of the
+// loop) only have predecessors from inside of the loop (and are thus dominated
+// by the loop header). This simplifies transformations such as store-sinking
+// that are built into LICM.
+//
+// This pass also guarantees that loops will have exactly one backedge.
+//
+// Indirectbr instructions introduce several complications. If the loop
+// contains or is entered by an indirectbr instruction, it may not be possible
+// to transform the loop and make these guarantees. Client code should check
+// that these conditions are true before relying on them.
+//
+// Similar complications arise from callbr instructions, particularly in
+// asm-goto where blockaddress expressions are used.
+//
+// Note that the simplifycfg pass will clean up blocks which are split out but
+// end up being unnecessary, so usage of this pass should not pessimize
+// generated code.
+//
+// This pass obviously modifies the CFG, but updates loop information and
+// dominator information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/LoopSimplify.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/DependenceAnalysis.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "loop-simplify"
+
+STATISTIC(NumNested , "Number of nested loops split out");
+
+// If the block isn't already, move the new block to right after some 'outside
+// block' block. This prevents the preheader from being placed inside the loop
+// body, e.g. when the loop hasn't been rotated.
+static void placeSplitBlockCarefully(BasicBlock *NewBB,
+ SmallVectorImpl<BasicBlock *> &SplitPreds,
+ Loop *L) {
+ // Check to see if NewBB is already well placed.
+ Function::iterator BBI = --NewBB->getIterator();
+ for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
+ if (&*BBI == SplitPreds[i])
+ return;
+ }
+
+ // If it isn't already after an outside block, move it after one. This is
+ // always good as it makes the uncond branch from the outside block into a
+ // fall-through.
+
+ // Figure out *which* outside block to put this after. Prefer an outside
+ // block that neighbors a BB actually in the loop.
+ BasicBlock *FoundBB = nullptr;
+ for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
+ Function::iterator BBI = SplitPreds[i]->getIterator();
+ if (++BBI != NewBB->getParent()->end() && L->contains(&*BBI)) {
+ FoundBB = SplitPreds[i];
+ break;
+ }
+ }
+
+ // If our heuristic for a *good* bb to place this after doesn't find
+ // anything, just pick something. It's likely better than leaving it within
+ // the loop.
+ if (!FoundBB)
+ FoundBB = SplitPreds[0];
+ NewBB->moveAfter(FoundBB);
+}
+
+/// InsertPreheaderForLoop - Once we discover that a loop doesn't have a
+/// preheader, this method is called to insert one. This method has two phases:
+/// preheader insertion and analysis updating.
+///
+BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, DominatorTree *DT,
+ LoopInfo *LI, MemorySSAUpdater *MSSAU,
+ bool PreserveLCSSA) {
+ BasicBlock *Header = L->getHeader();
+
+ // Compute the set of predecessors of the loop that are not in the loop.
+ SmallVector<BasicBlock*, 8> OutsideBlocks;
+ for (BasicBlock *P : predecessors(Header)) {
+ if (!L->contains(P)) { // Coming in from outside the loop?
+ // If the loop is branched to from an indirect terminator, we won't
+ // be able to fully transform the loop, because it prohibits
+ // edge splitting.
+ if (isa<IndirectBrInst>(P->getTerminator()))
+ return nullptr;
+
+ // Keep track of it.
+ OutsideBlocks.push_back(P);
+ }
+ }
+
+ // Split out the loop pre-header.
+ BasicBlock *PreheaderBB;
+ PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader", DT,
+ LI, MSSAU, PreserveLCSSA);
+ if (!PreheaderBB)
+ return nullptr;
+
+ LLVM_DEBUG(dbgs() << "LoopSimplify: Creating pre-header "
+ << PreheaderBB->getName() << "\n");
+
+ // Make sure that NewBB is put someplace intelligent, which doesn't mess up
+ // code layout too horribly.
+ placeSplitBlockCarefully(PreheaderBB, OutsideBlocks, L);
+
+ return PreheaderBB;
+}
+
+/// Add the specified block, and all of its predecessors, to the specified set,
+/// if it's not already in there. Stop predecessor traversal when we reach
+/// StopBlock.
+static void addBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock,
+ SmallPtrSetImpl<BasicBlock *> &Blocks) {
+ SmallVector<BasicBlock *, 8> Worklist;
+ Worklist.push_back(InputBB);
+ do {
+ BasicBlock *BB = Worklist.pop_back_val();
+ if (Blocks.insert(BB).second && BB != StopBlock)
+ // If BB is not already processed and it is not a stop block then
+ // insert its predecessor in the work list
+ append_range(Worklist, predecessors(BB));
+ } while (!Worklist.empty());
+}
+
+/// The first part of loop-nestification is to find a PHI node that tells
+/// us how to partition the loops.
+static PHINode *findPHIToPartitionLoops(Loop *L, DominatorTree *DT,
+ AssumptionCache *AC) {
+ const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
+ for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) {
+ PHINode *PN = cast<PHINode>(I);
+ ++I;
+ if (Value *V = simplifyInstruction(PN, {DL, nullptr, DT, AC})) {
+ // This is a degenerate PHI already, don't modify it!
+ PN->replaceAllUsesWith(V);
+ PN->eraseFromParent();
+ continue;
+ }
+
+ // Scan this PHI node looking for a use of the PHI node by itself.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingValue(i) == PN &&
+ L->contains(PN->getIncomingBlock(i)))
+ // We found something tasty to remove.
+ return PN;
+ }
+ return nullptr;
+}
+
+/// If this loop has multiple backedges, try to pull one of them out into
+/// a nested loop.
+///
+/// This is important for code that looks like
+/// this:
+///
+/// Loop:
+/// ...
+/// br cond, Loop, Next
+/// ...
+/// br cond2, Loop, Out
+///
+/// To identify this common case, we look at the PHI nodes in the header of the
+/// loop. PHI nodes with unchanging values on one backedge correspond to values
+/// that change in the "outer" loop, but not in the "inner" loop.
+///
+/// If we are able to separate out a loop, return the new outer loop that was
+/// created.
+///
+static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
+ DominatorTree *DT, LoopInfo *LI,
+ ScalarEvolution *SE, bool PreserveLCSSA,
+ AssumptionCache *AC, MemorySSAUpdater *MSSAU) {
+ // Don't try to separate loops without a preheader.
+ if (!Preheader)
+ return nullptr;
+
+ // Treat the presence of convergent functions conservatively. The
+ // transformation is invalid if calls to certain convergent
+ // functions (like an AMDGPU barrier) get included in the resulting
+ // inner loop. But blocks meant for the inner loop will be
+ // identified later at a point where it's too late to abort the
+ // transformation. Also, the convergent attribute is not really
+ // sufficient to express the semantics of functions that are
+ // affected by this transformation. So we choose to back off if such
+ // a function call is present until a better alternative becomes
+ // available. This is similar to the conservative treatment of
+ // convergent function calls in GVNHoist and JumpThreading.
+ for (auto *BB : L->blocks()) {
+ for (auto &II : *BB) {
+ if (auto CI = dyn_cast<CallBase>(&II)) {
+ if (CI->isConvergent()) {
+ return nullptr;
+ }
+ }
+ }
+ }
+
+ // The header is not a landing pad; preheader insertion should ensure this.
+ BasicBlock *Header = L->getHeader();
+ assert(!Header->isEHPad() && "Can't insert backedge to EH pad");
+
+ PHINode *PN = findPHIToPartitionLoops(L, DT, AC);
+ if (!PN) return nullptr; // No known way to partition.
+
+ // Pull out all predecessors that have varying values in the loop. This
+ // handles the case when a PHI node has multiple instances of itself as
+ // arguments.
+ SmallVector<BasicBlock*, 8> OuterLoopPreds;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ if (PN->getIncomingValue(i) != PN ||
+ !L->contains(PN->getIncomingBlock(i))) {
+ // We can't split indirect control flow edges.
+ if (isa<IndirectBrInst>(PN->getIncomingBlock(i)->getTerminator()))
+ return nullptr;
+ OuterLoopPreds.push_back(PN->getIncomingBlock(i));
+ }
+ }
+ LLVM_DEBUG(dbgs() << "LoopSimplify: Splitting out a new outer loop\n");
+
+ // If ScalarEvolution is around and knows anything about values in
+ // this loop, tell it to forget them, because we're about to
+ // substantially change it.
+ if (SE)
+ SE->forgetLoop(L);
+
+ BasicBlock *NewBB = SplitBlockPredecessors(Header, OuterLoopPreds, ".outer",
+ DT, LI, MSSAU, PreserveLCSSA);
+
+ // Make sure that NewBB is put someplace intelligent, which doesn't mess up
+ // code layout too horribly.
+ placeSplitBlockCarefully(NewBB, OuterLoopPreds, L);
+
+ // Create the new outer loop.
+ Loop *NewOuter = LI->AllocateLoop();
+
+ // Change the parent loop to use the outer loop as its child now.
+ if (Loop *Parent = L->getParentLoop())
+ Parent->replaceChildLoopWith(L, NewOuter);
+ else
+ LI->changeTopLevelLoop(L, NewOuter);
+
+ // L is now a subloop of our outer loop.
+ NewOuter->addChildLoop(L);
+
+ for (BasicBlock *BB : L->blocks())
+ NewOuter->addBlockEntry(BB);
+
+ // Now reset the header in L, which had been moved by
+ // SplitBlockPredecessors for the outer loop.
+ L->moveToHeader(Header);
+
+ // Determine which blocks should stay in L and which should be moved out to
+ // the Outer loop now.
+ SmallPtrSet<BasicBlock *, 4> BlocksInL;
+ for (BasicBlock *P : predecessors(Header)) {
+ if (DT->dominates(Header, P))
+ addBlockAndPredsToSet(P, Header, BlocksInL);
+ }
+
+ // Scan all of the loop children of L, moving them to OuterLoop if they are
+ // not part of the inner loop.
+ const std::vector<Loop*> &SubLoops = L->getSubLoops();
+ for (size_t I = 0; I != SubLoops.size(); )
+ if (BlocksInL.count(SubLoops[I]->getHeader()))
+ ++I; // Loop remains in L
+ else
+ NewOuter->addChildLoop(L->removeChildLoop(SubLoops.begin() + I));
+
+ SmallVector<BasicBlock *, 8> OuterLoopBlocks;
+ OuterLoopBlocks.push_back(NewBB);
+ // Now that we know which blocks are in L and which need to be moved to
+ // OuterLoop, move any blocks that need it.
+ for (unsigned i = 0; i != L->getBlocks().size(); ++i) {
+ BasicBlock *BB = L->getBlocks()[i];
+ if (!BlocksInL.count(BB)) {
+ // Move this block to the parent, updating the exit blocks sets
+ L->removeBlockFromLoop(BB);
+ if ((*LI)[BB] == L) {
+ LI->changeLoopFor(BB, NewOuter);
+ OuterLoopBlocks.push_back(BB);
+ }
+ --i;
+ }
+ }
+
+ // Split edges to exit blocks from the inner loop, if they emerged in the
+ // process of separating the outer one.
+ formDedicatedExitBlocks(L, DT, LI, MSSAU, PreserveLCSSA);
+
+ if (PreserveLCSSA) {
+ // Fix LCSSA form for L. Some values, which previously were only used inside
+ // L, can now be used in NewOuter loop. We need to insert phi-nodes for them
+ // in corresponding exit blocks.
+ // We don't need to form LCSSA recursively, because there cannot be uses
+ // inside a newly created loop of defs from inner loops as those would
+ // already be a use of an LCSSA phi node.
+ formLCSSA(*L, *DT, LI, SE);
+
+ assert(NewOuter->isRecursivelyLCSSAForm(*DT, *LI) &&
+ "LCSSA is broken after separating nested loops!");
+ }
+
+ return NewOuter;
+}
+
+/// This method is called when the specified loop has more than one
+/// backedge in it.
+///
+/// If this occurs, revector all of these backedges to target a new basic block
+/// and have that block branch to the loop header. This ensures that loops
+/// have exactly one backedge.
+static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
+ DominatorTree *DT, LoopInfo *LI,
+ MemorySSAUpdater *MSSAU) {
+ assert(L->getNumBackEdges() > 1 && "Must have > 1 backedge!");
+
+ // Get information about the loop
+ BasicBlock *Header = L->getHeader();
+ Function *F = Header->getParent();
+
+ // Unique backedge insertion currently depends on having a preheader.
+ if (!Preheader)
+ return nullptr;
+
+ // The header is not an EH pad; preheader insertion should ensure this.
+ assert(!Header->isEHPad() && "Can't insert backedge to EH pad");
+
+ // Figure out which basic blocks contain back-edges to the loop header.
+ std::vector<BasicBlock*> BackedgeBlocks;
+ for (BasicBlock *P : predecessors(Header)) {
+ // Indirect edges cannot be split, so we must fail if we find one.
+ if (isa<IndirectBrInst>(P->getTerminator()))
+ return nullptr;
+
+ if (P != Preheader) BackedgeBlocks.push_back(P);
+ }
+
+ // Create and insert the new backedge block...
+ BasicBlock *BEBlock = BasicBlock::Create(Header->getContext(),
+ Header->getName() + ".backedge", F);
+ BranchInst *BETerminator = BranchInst::Create(Header, BEBlock);
+ BETerminator->setDebugLoc(Header->getFirstNonPHI()->getDebugLoc());
+
+ LLVM_DEBUG(dbgs() << "LoopSimplify: Inserting unique backedge block "
+ << BEBlock->getName() << "\n");
+
+ // Move the new backedge block to right after the last backedge block.
+ Function::iterator InsertPos = ++BackedgeBlocks.back()->getIterator();
+ F->splice(InsertPos, F, BEBlock->getIterator());
+
+ // Now that the block has been inserted into the function, create PHI nodes in
+ // the backedge block which correspond to any PHI nodes in the header block.
+ for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+ PHINode *NewPN = PHINode::Create(PN->getType(), BackedgeBlocks.size(),
+ PN->getName()+".be", BETerminator);
+
+ // Loop over the PHI node, moving all entries except the one for the
+ // preheader over to the new PHI node.
+ unsigned PreheaderIdx = ~0U;
+ bool HasUniqueIncomingValue = true;
+ Value *UniqueValue = nullptr;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ BasicBlock *IBB = PN->getIncomingBlock(i);
+ Value *IV = PN->getIncomingValue(i);
+ if (IBB == Preheader) {
+ PreheaderIdx = i;
+ } else {
+ NewPN->addIncoming(IV, IBB);
+ if (HasUniqueIncomingValue) {
+ if (!UniqueValue)
+ UniqueValue = IV;
+ else if (UniqueValue != IV)
+ HasUniqueIncomingValue = false;
+ }
+ }
+ }
+
+ // Delete all of the incoming values from the old PN except the preheader's
+ assert(PreheaderIdx != ~0U && "PHI has no preheader entry??");
+ if (PreheaderIdx != 0) {
+ PN->setIncomingValue(0, PN->getIncomingValue(PreheaderIdx));
+ PN->setIncomingBlock(0, PN->getIncomingBlock(PreheaderIdx));
+ }
+ // Nuke all entries except the zero'th.
+ for (unsigned i = 0, e = PN->getNumIncomingValues()-1; i != e; ++i)
+ PN->removeIncomingValue(e-i, false);
+
+ // Finally, add the newly constructed PHI node as the entry for the BEBlock.
+ PN->addIncoming(NewPN, BEBlock);
+
+ // As an optimization, if all incoming values in the new PhiNode (which is a
+ // subset of the incoming values of the old PHI node) have the same value,
+ // eliminate the PHI Node.
+ if (HasUniqueIncomingValue) {
+ NewPN->replaceAllUsesWith(UniqueValue);
+ NewPN->eraseFromParent();
+ }
+ }
+
+ // Now that all of the PHI nodes have been inserted and adjusted, modify the
+ // backedge blocks to jump to the BEBlock instead of the header.
+ // If one of the backedges has llvm.loop metadata attached, we remove
+ // it from the backedge and add it to BEBlock.
+ unsigned LoopMDKind = BEBlock->getContext().getMDKindID("llvm.loop");
+ MDNode *LoopMD = nullptr;
+ for (BasicBlock *BB : BackedgeBlocks) {
+ Instruction *TI = BB->getTerminator();
+ if (!LoopMD)
+ LoopMD = TI->getMetadata(LoopMDKind);
+ TI->setMetadata(LoopMDKind, nullptr);
+ TI->replaceSuccessorWith(Header, BEBlock);
+ }
+ BEBlock->getTerminator()->setMetadata(LoopMDKind, LoopMD);
+
+ //===--- Update all analyses which we must preserve now -----------------===//
+
+ // Update Loop Information - we know that this block is now in the current
+ // loop and all parent loops.
+ L->addBasicBlockToLoop(BEBlock, *LI);
+
+ // Update dominator information
+ DT->splitBlock(BEBlock);
+
+ if (MSSAU)
+ MSSAU->updatePhisWhenInsertingUniqueBackedgeBlock(Header, Preheader,
+ BEBlock);
+
+ return BEBlock;
+}
+
+/// Simplify one loop and queue further loops for simplification.
+static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist,
+ DominatorTree *DT, LoopInfo *LI,
+ ScalarEvolution *SE, AssumptionCache *AC,
+ MemorySSAUpdater *MSSAU, bool PreserveLCSSA) {
+ bool Changed = false;
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
+
+ReprocessLoop:
+
+ // Check to see that no blocks (other than the header) in this loop have
+ // predecessors that are not in the loop. This is not valid for natural
+ // loops, but can occur if the blocks are unreachable. Since they are
+ // unreachable we can just shamelessly delete those CFG edges!
+ for (BasicBlock *BB : L->blocks()) {
+ if (BB == L->getHeader())
+ continue;
+
+ SmallPtrSet<BasicBlock*, 4> BadPreds;
+ for (BasicBlock *P : predecessors(BB))
+ if (!L->contains(P))
+ BadPreds.insert(P);
+
+ // Delete each unique out-of-loop (and thus dead) predecessor.
+ for (BasicBlock *P : BadPreds) {
+
+ LLVM_DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor "
+ << P->getName() << "\n");
+
+ // Zap the dead pred's terminator and replace it with unreachable.
+ Instruction *TI = P->getTerminator();
+ changeToUnreachable(TI, PreserveLCSSA,
+ /*DTU=*/nullptr, MSSAU);
+ Changed = true;
+ }
+ }
+
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
+
+ // If there are exiting blocks with branches on undef, resolve the undef in
+ // the direction which will exit the loop. This will help simplify loop
+ // trip count computations.
+ SmallVector<BasicBlock*, 8> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+ for (BasicBlock *ExitingBlock : ExitingBlocks)
+ if (BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator()))
+ if (BI->isConditional()) {
+ if (UndefValue *Cond = dyn_cast<UndefValue>(BI->getCondition())) {
+
+ LLVM_DEBUG(dbgs()
+ << "LoopSimplify: Resolving \"br i1 undef\" to exit in "
+ << ExitingBlock->getName() << "\n");
+
+ BI->setCondition(ConstantInt::get(Cond->getType(),
+ !L->contains(BI->getSuccessor(0))));
+
+ Changed = true;
+ }
+ }
+
+ // Does the loop already have a preheader? If so, don't insert one.
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader) {
+ Preheader = InsertPreheaderForLoop(L, DT, LI, MSSAU, PreserveLCSSA);
+ if (Preheader)
+ Changed = true;
+ }
+
+ // Next, check to make sure that all exit nodes of the loop only have
+ // predecessors that are inside of the loop. This check guarantees that the
+ // loop preheader/header will dominate the exit blocks. If the exit block has
+ // predecessors from outside of the loop, split the edge now.
+ if (formDedicatedExitBlocks(L, DT, LI, MSSAU, PreserveLCSSA))
+ Changed = true;
+
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
+
+ // If the header has more than two predecessors at this point (from the
+ // preheader and from multiple backedges), we must adjust the loop.
+ BasicBlock *LoopLatch = L->getLoopLatch();
+ if (!LoopLatch) {
+ // If this is really a nested loop, rip it out into a child loop. Don't do
+ // this for loops with a giant number of backedges, just factor them into a
+ // common backedge instead.
+ if (L->getNumBackEdges() < 8) {
+ if (Loop *OuterL = separateNestedLoop(L, Preheader, DT, LI, SE,
+ PreserveLCSSA, AC, MSSAU)) {
+ ++NumNested;
+ // Enqueue the outer loop as it should be processed next in our
+ // depth-first nest walk.
+ Worklist.push_back(OuterL);
+
+ // This is a big restructuring change, reprocess the whole loop.
+ Changed = true;
+ // GCC doesn't tail recursion eliminate this.
+ // FIXME: It isn't clear we can't rely on LLVM to TRE this.
+ goto ReprocessLoop;
+ }
+ }
+
+ // If we either couldn't, or didn't want to, identify nesting of the loops,
+ // insert a new block that all backedges target, then make it jump to the
+ // loop header.
+ LoopLatch = insertUniqueBackedgeBlock(L, Preheader, DT, LI, MSSAU);
+ if (LoopLatch)
+ Changed = true;
+ }
+
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
+
+ const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
+
+ // Scan over the PHI nodes in the loop header. Since they now have only two
+ // incoming values (the loop is canonicalized), we may have simplified the PHI
+ // down to 'X = phi [X, Y]', which should be replaced with 'Y'.
+ PHINode *PN;
+ for (BasicBlock::iterator I = L->getHeader()->begin();
+ (PN = dyn_cast<PHINode>(I++)); )
+ if (Value *V = simplifyInstruction(PN, {DL, nullptr, DT, AC})) {
+ if (SE) SE->forgetValue(PN);
+ if (!PreserveLCSSA || LI->replacementPreservesLCSSAForm(PN, V)) {
+ PN->replaceAllUsesWith(V);
+ PN->eraseFromParent();
+ Changed = true;
+ }
+ }
+
+ // If this loop has multiple exits and the exits all go to the same
+ // block, attempt to merge the exits. This helps several passes, such
+ // as LoopRotation, which do not support loops with multiple exits.
+ // SimplifyCFG also does this (and this code uses the same utility
+ // function), however this code is loop-aware, where SimplifyCFG is
+ // not. That gives it the advantage of being able to hoist
+ // loop-invariant instructions out of the way to open up more
+ // opportunities, and the disadvantage of having the responsibility
+ // to preserve dominator information.
+ auto HasUniqueExitBlock = [&]() {
+ BasicBlock *UniqueExit = nullptr;
+ for (auto *ExitingBB : ExitingBlocks)
+ for (auto *SuccBB : successors(ExitingBB)) {
+ if (L->contains(SuccBB))
+ continue;
+
+ if (!UniqueExit)
+ UniqueExit = SuccBB;
+ else if (UniqueExit != SuccBB)
+ return false;
+ }
+
+ return true;
+ };
+ if (HasUniqueExitBlock()) {
+ for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
+ BasicBlock *ExitingBlock = ExitingBlocks[i];
+ if (!ExitingBlock->getSinglePredecessor()) continue;
+ BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
+ if (!BI || !BI->isConditional()) continue;
+ CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition());
+ if (!CI || CI->getParent() != ExitingBlock) continue;
+
+ // Attempt to hoist out all instructions except for the
+ // comparison and the branch.
+ bool AllInvariant = true;
+ bool AnyInvariant = false;
+ for (auto I = ExitingBlock->instructionsWithoutDebug().begin(); &*I != BI; ) {
+ Instruction *Inst = &*I++;
+ if (Inst == CI)
+ continue;
+ if (!L->makeLoopInvariant(
+ Inst, AnyInvariant,
+ Preheader ? Preheader->getTerminator() : nullptr, MSSAU, SE)) {
+ AllInvariant = false;
+ break;
+ }
+ }
+ if (AnyInvariant)
+ Changed = true;
+ if (!AllInvariant) continue;
+
+ // The block has now been cleared of all instructions except for
+ // a comparison and a conditional branch. SimplifyCFG may be able
+ // to fold it now.
+ if (!FoldBranchToCommonDest(BI, /*DTU=*/nullptr, MSSAU))
+ continue;
+
+ // Success. The block is now dead, so remove it from the loop,
+ // update the dominator tree and delete it.
+ LLVM_DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block "
+ << ExitingBlock->getName() << "\n");
+
+ assert(pred_empty(ExitingBlock));
+ Changed = true;
+ LI->removeBlock(ExitingBlock);
+
+ DomTreeNode *Node = DT->getNode(ExitingBlock);
+ while (!Node->isLeaf()) {
+ DomTreeNode *Child = Node->back();
+ DT->changeImmediateDominator(Child, Node->getIDom());
+ }
+ DT->eraseNode(ExitingBlock);
+ if (MSSAU) {
+ SmallSetVector<BasicBlock *, 8> ExitBlockSet;
+ ExitBlockSet.insert(ExitingBlock);
+ MSSAU->removeBlocks(ExitBlockSet);
+ }
+
+ BI->getSuccessor(0)->removePredecessor(
+ ExitingBlock, /* KeepOneInputPHIs */ PreserveLCSSA);
+ BI->getSuccessor(1)->removePredecessor(
+ ExitingBlock, /* KeepOneInputPHIs */ PreserveLCSSA);
+ ExitingBlock->eraseFromParent();
+ }
+ }
+
+ // Changing exit conditions for blocks may affect exit counts of this loop and
+ // any of its paretns, so we must invalidate the entire subtree if we've made
+ // any changes.
+ if (Changed && SE)
+ SE->forgetTopmostLoop(L);
+
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
+
+ return Changed;
+}
+
+bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI,
+ ScalarEvolution *SE, AssumptionCache *AC,
+ MemorySSAUpdater *MSSAU, bool PreserveLCSSA) {
+ bool Changed = false;
+
+#ifndef NDEBUG
+ // If we're asked to preserve LCSSA, the loop nest needs to start in LCSSA
+ // form.
+ if (PreserveLCSSA) {
+ assert(DT && "DT not available.");
+ assert(LI && "LI not available.");
+ assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
+ "Requested to preserve LCSSA, but it's already broken.");
+ }
+#endif
+
+ // Worklist maintains our depth-first queue of loops in this nest to process.
+ SmallVector<Loop *, 4> Worklist;
+ Worklist.push_back(L);
+
+ // Walk the worklist from front to back, pushing newly found sub loops onto
+ // the back. This will let us process loops from back to front in depth-first
+ // order. We can use this simple process because loops form a tree.
+ for (unsigned Idx = 0; Idx != Worklist.size(); ++Idx) {
+ Loop *L2 = Worklist[Idx];
+ Worklist.append(L2->begin(), L2->end());
+ }
+
+ while (!Worklist.empty())
+ Changed |= simplifyOneLoop(Worklist.pop_back_val(), Worklist, DT, LI, SE,
+ AC, MSSAU, PreserveLCSSA);
+
+ return Changed;
+}
+
+namespace {
+ struct LoopSimplify : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ LoopSimplify() : FunctionPass(ID) {
+ initializeLoopSimplifyPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AssumptionCacheTracker>();
+
+ // We need loop information to identify the loops...
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
+
+ AU.addPreserved<BasicAAWrapperPass>();
+ AU.addPreserved<AAResultsWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
+ AU.addPreserved<SCEVAAWrapperPass>();
+ AU.addPreservedID(LCSSAID);
+ AU.addPreserved<DependenceAnalysisWrapperPass>();
+ AU.addPreservedID(BreakCriticalEdgesID); // No critical edges added.
+ AU.addPreserved<BranchProbabilityInfoWrapperPass>();
+ AU.addPreserved<MemorySSAWrapperPass>();
+ }
+
+ /// verifyAnalysis() - Verify LoopSimplifyForm's guarantees.
+ void verifyAnalysis() const override;
+ };
+}
+
+char LoopSimplify::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopSimplify, "loop-simplify",
+ "Canonicalize natural loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_END(LoopSimplify, "loop-simplify",
+ "Canonicalize natural loops", false, false)
+
+// Publicly exposed interface to pass...
+char &llvm::LoopSimplifyID = LoopSimplify::ID;
+Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); }
+
+/// runOnFunction - Run down all loops in the CFG (recursively, but we could do
+/// it in any convenient order) inserting preheaders...
+///
+bool LoopSimplify::runOnFunction(Function &F) {
+ bool Changed = false;
+ LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
+ ScalarEvolution *SE = SEWP ? &SEWP->getSE() : nullptr;
+ AssumptionCache *AC =
+ &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ MemorySSA *MSSA = nullptr;
+ std::unique_ptr<MemorySSAUpdater> MSSAU;
+ auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
+ if (MSSAAnalysis) {
+ MSSA = &MSSAAnalysis->getMSSA();
+ MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
+ }
+
+ bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
+
+ // Simplify each loop nest in the function.
+ for (auto *L : *LI)
+ Changed |= simplifyLoop(L, DT, LI, SE, AC, MSSAU.get(), PreserveLCSSA);
+
+#ifndef NDEBUG
+ if (PreserveLCSSA) {
+ bool InLCSSA = all_of(
+ *LI, [&](Loop *L) { return L->isRecursivelyLCSSAForm(*DT, *LI); });
+ assert(InLCSSA && "LCSSA is broken after loop-simplify.");
+ }
+#endif
+ return Changed;
+}
+
+PreservedAnalyses LoopSimplifyPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ bool Changed = false;
+ LoopInfo *LI = &AM.getResult<LoopAnalysis>(F);
+ DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F);
+ ScalarEvolution *SE = AM.getCachedResult<ScalarEvolutionAnalysis>(F);
+ AssumptionCache *AC = &AM.getResult<AssumptionAnalysis>(F);
+ auto *MSSAAnalysis = AM.getCachedResult<MemorySSAAnalysis>(F);
+ std::unique_ptr<MemorySSAUpdater> MSSAU;
+ if (MSSAAnalysis) {
+ auto *MSSA = &MSSAAnalysis->getMSSA();
+ MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
+ }
+
+
+ // Note that we don't preserve LCSSA in the new PM, if you need it run LCSSA
+ // after simplifying the loops. MemorySSA is preserved if it exists.
+ for (auto *L : *LI)
+ Changed |=
+ simplifyLoop(L, DT, LI, SE, AC, MSSAU.get(), /*PreserveLCSSA*/ false);
+
+ if (!Changed)
+ return PreservedAnalyses::all();
+
+ PreservedAnalyses PA;
+ PA.preserve<DominatorTreeAnalysis>();
+ PA.preserve<LoopAnalysis>();
+ PA.preserve<ScalarEvolutionAnalysis>();
+ PA.preserve<DependenceAnalysis>();
+ if (MSSAAnalysis)
+ PA.preserve<MemorySSAAnalysis>();
+ // BPI maps conditional terminators to probabilities, LoopSimplify can insert
+ // blocks, but it does so only by splitting existing blocks and edges. This
+ // results in the interesting property that all new terminators inserted are
+ // unconditional branches which do not appear in BPI. All deletions are
+ // handled via ValueHandle callbacks w/in BPI.
+ PA.preserve<BranchProbabilityAnalysis>();
+ return PA;
+}
+
+// FIXME: Restore this code when we re-enable verification in verifyAnalysis
+// below.
+#if 0
+static void verifyLoop(Loop *L) {
+ // Verify subloops.
+ for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+ verifyLoop(*I);
+
+ // It used to be possible to just assert L->isLoopSimplifyForm(), however
+ // with the introduction of indirectbr, there are now cases where it's
+ // not possible to transform a loop as necessary. We can at least check
+ // that there is an indirectbr near any time there's trouble.
+
+ // Indirectbr can interfere with preheader and unique backedge insertion.
+ if (!L->getLoopPreheader() || !L->getLoopLatch()) {
+ bool HasIndBrPred = false;
+ for (BasicBlock *Pred : predecessors(L->getHeader()))
+ if (isa<IndirectBrInst>(Pred->getTerminator())) {
+ HasIndBrPred = true;
+ break;
+ }
+ assert(HasIndBrPred &&
+ "LoopSimplify has no excuse for missing loop header info!");
+ (void)HasIndBrPred;
+ }
+
+ // Indirectbr can interfere with exit block canonicalization.
+ if (!L->hasDedicatedExits()) {
+ bool HasIndBrExiting = false;
+ SmallVector<BasicBlock*, 8> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+ for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
+ if (isa<IndirectBrInst>((ExitingBlocks[i])->getTerminator())) {
+ HasIndBrExiting = true;
+ break;
+ }
+ }
+
+ assert(HasIndBrExiting &&
+ "LoopSimplify has no excuse for missing exit block info!");
+ (void)HasIndBrExiting;
+ }
+}
+#endif
+
+void LoopSimplify::verifyAnalysis() const {
+ // FIXME: This routine is being called mid-way through the loop pass manager
+ // as loop passes destroy this analysis. That's actually fine, but we have no
+ // way of expressing that here. Once all of the passes that destroy this are
+ // hoisted out of the loop pass manager we can add back verification here.
+#if 0
+ for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
+ verifyLoop(*I);
+#endif
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/LoopUnroll.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/LoopUnroll.cpp
new file mode 100644
index 0000000000..e8f585b4a9
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/LoopUnroll.cpp
@@ -0,0 +1,908 @@
+//===-- UnrollLoop.cpp - Loop unrolling utilities -------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements some loop unrolling utilities. It does not define any
+// actual pass or policy, but provides a single function to perform loop
+// unrolling.
+//
+// The process of unrolling can produce extraneous basic blocks linked with
+// unconditional branches. This will be corrected in the future.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/ADT/ilist_iterator.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopIterator.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/ValueHandle.h"
+#include "llvm/IR/ValueMap.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GenericDomTree.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/LoopSimplify.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Transforms/Utils/SimplifyIndVar.h"
+#include "llvm/Transforms/Utils/UnrollLoop.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+#include <algorithm>
+#include <assert.h>
+#include <numeric>
+#include <type_traits>
+#include <vector>
+
+namespace llvm {
+class DataLayout;
+class Value;
+} // namespace llvm
+
+using namespace llvm;
+
+#define DEBUG_TYPE "loop-unroll"
+
+// TODO: Should these be here or in LoopUnroll?
+STATISTIC(NumCompletelyUnrolled, "Number of loops completely unrolled");
+STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)");
+STATISTIC(NumUnrolledNotLatch, "Number of loops unrolled without a conditional "
+ "latch (completely or otherwise)");
+
+static cl::opt<bool>
+UnrollRuntimeEpilog("unroll-runtime-epilog", cl::init(false), cl::Hidden,
+ cl::desc("Allow runtime unrolled loops to be unrolled "
+ "with epilog instead of prolog."));
+
+static cl::opt<bool>
+UnrollVerifyDomtree("unroll-verify-domtree", cl::Hidden,
+ cl::desc("Verify domtree after unrolling"),
+#ifdef EXPENSIVE_CHECKS
+ cl::init(true)
+#else
+ cl::init(false)
+#endif
+ );
+
+static cl::opt<bool>
+UnrollVerifyLoopInfo("unroll-verify-loopinfo", cl::Hidden,
+ cl::desc("Verify loopinfo after unrolling"),
+#ifdef EXPENSIVE_CHECKS
+ cl::init(true)
+#else
+ cl::init(false)
+#endif
+ );
+
+
+/// Check if unrolling created a situation where we need to insert phi nodes to
+/// preserve LCSSA form.
+/// \param Blocks is a vector of basic blocks representing unrolled loop.
+/// \param L is the outer loop.
+/// It's possible that some of the blocks are in L, and some are not. In this
+/// case, if there is a use is outside L, and definition is inside L, we need to
+/// insert a phi-node, otherwise LCSSA will be broken.
+/// The function is just a helper function for llvm::UnrollLoop that returns
+/// true if this situation occurs, indicating that LCSSA needs to be fixed.
+static bool needToInsertPhisForLCSSA(Loop *L,
+ const std::vector<BasicBlock *> &Blocks,
+ LoopInfo *LI) {
+ for (BasicBlock *BB : Blocks) {
+ if (LI->getLoopFor(BB) == L)
+ continue;
+ for (Instruction &I : *BB) {
+ for (Use &U : I.operands()) {
+ if (const auto *Def = dyn_cast<Instruction>(U)) {
+ Loop *DefLoop = LI->getLoopFor(Def->getParent());
+ if (!DefLoop)
+ continue;
+ if (DefLoop->contains(L))
+ return true;
+ }
+ }
+ }
+ }
+ return false;
+}
+
+/// Adds ClonedBB to LoopInfo, creates a new loop for ClonedBB if necessary
+/// and adds a mapping from the original loop to the new loop to NewLoops.
+/// Returns nullptr if no new loop was created and a pointer to the
+/// original loop OriginalBB was part of otherwise.
+const Loop* llvm::addClonedBlockToLoopInfo(BasicBlock *OriginalBB,
+ BasicBlock *ClonedBB, LoopInfo *LI,
+ NewLoopsMap &NewLoops) {
+ // Figure out which loop New is in.
+ const Loop *OldLoop = LI->getLoopFor(OriginalBB);
+ assert(OldLoop && "Should (at least) be in the loop being unrolled!");
+
+ Loop *&NewLoop = NewLoops[OldLoop];
+ if (!NewLoop) {
+ // Found a new sub-loop.
+ assert(OriginalBB == OldLoop->getHeader() &&
+ "Header should be first in RPO");
+
+ NewLoop = LI->AllocateLoop();
+ Loop *NewLoopParent = NewLoops.lookup(OldLoop->getParentLoop());
+
+ if (NewLoopParent)
+ NewLoopParent->addChildLoop(NewLoop);
+ else
+ LI->addTopLevelLoop(NewLoop);
+
+ NewLoop->addBasicBlockToLoop(ClonedBB, *LI);
+ return OldLoop;
+ } else {
+ NewLoop->addBasicBlockToLoop(ClonedBB, *LI);
+ return nullptr;
+ }
+}
+
+/// The function chooses which type of unroll (epilog or prolog) is more
+/// profitabale.
+/// Epilog unroll is more profitable when there is PHI that starts from
+/// constant. In this case epilog will leave PHI start from constant,
+/// but prolog will convert it to non-constant.
+///
+/// loop:
+/// PN = PHI [I, Latch], [CI, PreHeader]
+/// I = foo(PN)
+/// ...
+///
+/// Epilog unroll case.
+/// loop:
+/// PN = PHI [I2, Latch], [CI, PreHeader]
+/// I1 = foo(PN)
+/// I2 = foo(I1)
+/// ...
+/// Prolog unroll case.
+/// NewPN = PHI [PrologI, Prolog], [CI, PreHeader]
+/// loop:
+/// PN = PHI [I2, Latch], [NewPN, PreHeader]
+/// I1 = foo(PN)
+/// I2 = foo(I1)
+/// ...
+///
+static bool isEpilogProfitable(Loop *L) {
+ BasicBlock *PreHeader = L->getLoopPreheader();
+ BasicBlock *Header = L->getHeader();
+ assert(PreHeader && Header);
+ for (const PHINode &PN : Header->phis()) {
+ if (isa<ConstantInt>(PN.getIncomingValueForBlock(PreHeader)))
+ return true;
+ }
+ return false;
+}
+
+/// Perform some cleanup and simplifications on loops after unrolling. It is
+/// useful to simplify the IV's in the new loop, as well as do a quick
+/// simplify/dce pass of the instructions.
+void llvm::simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI,
+ ScalarEvolution *SE, DominatorTree *DT,
+ AssumptionCache *AC,
+ const TargetTransformInfo *TTI) {
+ // Simplify any new induction variables in the partially unrolled loop.
+ if (SE && SimplifyIVs) {
+ SmallVector<WeakTrackingVH, 16> DeadInsts;
+ simplifyLoopIVs(L, SE, DT, LI, TTI, DeadInsts);
+
+ // Aggressively clean up dead instructions that simplifyLoopIVs already
+ // identified. Any remaining should be cleaned up below.
+ while (!DeadInsts.empty()) {
+ Value *V = DeadInsts.pop_back_val();
+ if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
+ RecursivelyDeleteTriviallyDeadInstructions(Inst);
+ }
+ }
+
+ // At this point, the code is well formed. Perform constprop, instsimplify,
+ // and dce.
+ const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
+ SmallVector<WeakTrackingVH, 16> DeadInsts;
+ for (BasicBlock *BB : L->getBlocks()) {
+ for (Instruction &Inst : llvm::make_early_inc_range(*BB)) {
+ if (Value *V = simplifyInstruction(&Inst, {DL, nullptr, DT, AC}))
+ if (LI->replacementPreservesLCSSAForm(&Inst, V))
+ Inst.replaceAllUsesWith(V);
+ if (isInstructionTriviallyDead(&Inst))
+ DeadInsts.emplace_back(&Inst);
+ }
+ // We can't do recursive deletion until we're done iterating, as we might
+ // have a phi which (potentially indirectly) uses instructions later in
+ // the block we're iterating through.
+ RecursivelyDeleteTriviallyDeadInstructions(DeadInsts);
+ }
+}
+
+/// Unroll the given loop by Count. The loop must be in LCSSA form. Unrolling
+/// can only fail when the loop's latch block is not terminated by a conditional
+/// branch instruction. However, if the trip count (and multiple) are not known,
+/// loop unrolling will mostly produce more code that is no faster.
+///
+/// If Runtime is true then UnrollLoop will try to insert a prologue or
+/// epilogue that ensures the latch has a trip multiple of Count. UnrollLoop
+/// will not runtime-unroll the loop if computing the run-time trip count will
+/// be expensive and AllowExpensiveTripCount is false.
+///
+/// The LoopInfo Analysis that is passed will be kept consistent.
+///
+/// This utility preserves LoopInfo. It will also preserve ScalarEvolution and
+/// DominatorTree if they are non-null.
+///
+/// If RemainderLoop is non-null, it will receive the remainder loop (if
+/// required and not fully unrolled).
+LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
+ ScalarEvolution *SE, DominatorTree *DT,
+ AssumptionCache *AC,
+ const TargetTransformInfo *TTI,
+ OptimizationRemarkEmitter *ORE,
+ bool PreserveLCSSA, Loop **RemainderLoop) {
+ assert(DT && "DomTree is required");
+
+ if (!L->getLoopPreheader()) {
+ LLVM_DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n");
+ return LoopUnrollResult::Unmodified;
+ }
+
+ if (!L->getLoopLatch()) {
+ LLVM_DEBUG(dbgs() << " Can't unroll; loop exit-block-insertion failed.\n");
+ return LoopUnrollResult::Unmodified;
+ }
+
+ // Loops with indirectbr cannot be cloned.
+ if (!L->isSafeToClone()) {
+ LLVM_DEBUG(dbgs() << " Can't unroll; Loop body cannot be cloned.\n");
+ return LoopUnrollResult::Unmodified;
+ }
+
+ if (L->getHeader()->hasAddressTaken()) {
+ // The loop-rotate pass can be helpful to avoid this in many cases.
+ LLVM_DEBUG(
+ dbgs() << " Won't unroll loop: address of header block is taken.\n");
+ return LoopUnrollResult::Unmodified;
+ }
+
+ assert(ULO.Count > 0);
+
+ // All these values should be taken only after peeling because they might have
+ // changed.
+ BasicBlock *Preheader = L->getLoopPreheader();
+ BasicBlock *Header = L->getHeader();
+ BasicBlock *LatchBlock = L->getLoopLatch();
+ SmallVector<BasicBlock *, 4> ExitBlocks;
+ L->getExitBlocks(ExitBlocks);
+ std::vector<BasicBlock *> OriginalLoopBlocks = L->getBlocks();
+
+ const unsigned MaxTripCount = SE->getSmallConstantMaxTripCount(L);
+ const bool MaxOrZero = SE->isBackedgeTakenCountMaxOrZero(L);
+
+ // Effectively "DCE" unrolled iterations that are beyond the max tripcount
+ // and will never be executed.
+ if (MaxTripCount && ULO.Count > MaxTripCount)
+ ULO.Count = MaxTripCount;
+
+ struct ExitInfo {
+ unsigned TripCount;
+ unsigned TripMultiple;
+ unsigned BreakoutTrip;
+ bool ExitOnTrue;
+ BasicBlock *FirstExitingBlock = nullptr;
+ SmallVector<BasicBlock *> ExitingBlocks;
+ };
+ DenseMap<BasicBlock *, ExitInfo> ExitInfos;
+ SmallVector<BasicBlock *, 4> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+ for (auto *ExitingBlock : ExitingBlocks) {
+ // The folding code is not prepared to deal with non-branch instructions
+ // right now.
+ auto *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
+ if (!BI)
+ continue;
+
+ ExitInfo &Info = ExitInfos.try_emplace(ExitingBlock).first->second;
+ Info.TripCount = SE->getSmallConstantTripCount(L, ExitingBlock);
+ Info.TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock);
+ if (Info.TripCount != 0) {
+ Info.BreakoutTrip = Info.TripCount % ULO.Count;
+ Info.TripMultiple = 0;
+ } else {
+ Info.BreakoutTrip = Info.TripMultiple =
+ (unsigned)std::gcd(ULO.Count, Info.TripMultiple);
+ }
+ Info.ExitOnTrue = !L->contains(BI->getSuccessor(0));
+ Info.ExitingBlocks.push_back(ExitingBlock);
+ LLVM_DEBUG(dbgs() << " Exiting block %" << ExitingBlock->getName()
+ << ": TripCount=" << Info.TripCount
+ << ", TripMultiple=" << Info.TripMultiple
+ << ", BreakoutTrip=" << Info.BreakoutTrip << "\n");
+ }
+
+ // Are we eliminating the loop control altogether? Note that we can know
+ // we're eliminating the backedge without knowing exactly which iteration
+ // of the unrolled body exits.
+ const bool CompletelyUnroll = ULO.Count == MaxTripCount;
+
+ const bool PreserveOnlyFirst = CompletelyUnroll && MaxOrZero;
+
+ // There's no point in performing runtime unrolling if this unroll count
+ // results in a full unroll.
+ if (CompletelyUnroll)
+ ULO.Runtime = false;
+
+ // Go through all exits of L and see if there are any phi-nodes there. We just
+ // conservatively assume that they're inserted to preserve LCSSA form, which
+ // means that complete unrolling might break this form. We need to either fix
+ // it in-place after the transformation, or entirely rebuild LCSSA. TODO: For
+ // now we just recompute LCSSA for the outer loop, but it should be possible
+ // to fix it in-place.
+ bool NeedToFixLCSSA =
+ PreserveLCSSA && CompletelyUnroll &&
+ any_of(ExitBlocks,
+ [](const BasicBlock *BB) { return isa<PHINode>(BB->begin()); });
+
+ // The current loop unroll pass can unroll loops that have
+ // (1) single latch; and
+ // (2a) latch is unconditional; or
+ // (2b) latch is conditional and is an exiting block
+ // FIXME: The implementation can be extended to work with more complicated
+ // cases, e.g. loops with multiple latches.
+ BranchInst *LatchBI = dyn_cast<BranchInst>(LatchBlock->getTerminator());
+
+ // A conditional branch which exits the loop, which can be optimized to an
+ // unconditional branch in the unrolled loop in some cases.
+ bool LatchIsExiting = L->isLoopExiting(LatchBlock);
+ if (!LatchBI || (LatchBI->isConditional() && !LatchIsExiting)) {
+ LLVM_DEBUG(
+ dbgs() << "Can't unroll; a conditional latch must exit the loop");
+ return LoopUnrollResult::Unmodified;
+ }
+
+ // Loops containing convergent instructions cannot use runtime unrolling,
+ // as the prologue/epilogue may add additional control-dependencies to
+ // convergent operations.
+ LLVM_DEBUG(
+ {
+ bool HasConvergent = false;
+ for (auto &BB : L->blocks())
+ for (auto &I : *BB)
+ if (auto *CB = dyn_cast<CallBase>(&I))
+ HasConvergent |= CB->isConvergent();
+ assert((!HasConvergent || !ULO.Runtime) &&
+ "Can't runtime unroll if loop contains a convergent operation.");
+ });
+
+ bool EpilogProfitability =
+ UnrollRuntimeEpilog.getNumOccurrences() ? UnrollRuntimeEpilog
+ : isEpilogProfitable(L);
+
+ if (ULO.Runtime &&
+ !UnrollRuntimeLoopRemainder(L, ULO.Count, ULO.AllowExpensiveTripCount,
+ EpilogProfitability, ULO.UnrollRemainder,
+ ULO.ForgetAllSCEV, LI, SE, DT, AC, TTI,
+ PreserveLCSSA, RemainderLoop)) {
+ if (ULO.Force)
+ ULO.Runtime = false;
+ else {
+ LLVM_DEBUG(dbgs() << "Won't unroll; remainder loop could not be "
+ "generated when assuming runtime trip count\n");
+ return LoopUnrollResult::Unmodified;
+ }
+ }
+
+ using namespace ore;
+ // Report the unrolling decision.
+ if (CompletelyUnroll) {
+ LLVM_DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName()
+ << " with trip count " << ULO.Count << "!\n");
+ if (ORE)
+ ORE->emit([&]() {
+ return OptimizationRemark(DEBUG_TYPE, "FullyUnrolled", L->getStartLoc(),
+ L->getHeader())
+ << "completely unrolled loop with "
+ << NV("UnrollCount", ULO.Count) << " iterations";
+ });
+ } else {
+ LLVM_DEBUG(dbgs() << "UNROLLING loop %" << Header->getName() << " by "
+ << ULO.Count);
+ if (ULO.Runtime)
+ LLVM_DEBUG(dbgs() << " with run-time trip count");
+ LLVM_DEBUG(dbgs() << "!\n");
+
+ if (ORE)
+ ORE->emit([&]() {
+ OptimizationRemark Diag(DEBUG_TYPE, "PartialUnrolled", L->getStartLoc(),
+ L->getHeader());
+ Diag << "unrolled loop by a factor of " << NV("UnrollCount", ULO.Count);
+ if (ULO.Runtime)
+ Diag << " with run-time trip count";
+ return Diag;
+ });
+ }
+
+ // We are going to make changes to this loop. SCEV may be keeping cached info
+ // about it, in particular about backedge taken count. The changes we make
+ // are guaranteed to invalidate this information for our loop. It is tempting
+ // to only invalidate the loop being unrolled, but it is incorrect as long as
+ // all exiting branches from all inner loops have impact on the outer loops,
+ // and if something changes inside them then any of outer loops may also
+ // change. When we forget outermost loop, we also forget all contained loops
+ // and this is what we need here.
+ if (SE) {
+ if (ULO.ForgetAllSCEV)
+ SE->forgetAllLoops();
+ else {
+ SE->forgetTopmostLoop(L);
+ SE->forgetBlockAndLoopDispositions();
+ }
+ }
+
+ if (!LatchIsExiting)
+ ++NumUnrolledNotLatch;
+
+ // For the first iteration of the loop, we should use the precloned values for
+ // PHI nodes. Insert associations now.
+ ValueToValueMapTy LastValueMap;
+ std::vector<PHINode*> OrigPHINode;
+ for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
+ OrigPHINode.push_back(cast<PHINode>(I));
+ }
+
+ std::vector<BasicBlock *> Headers;
+ std::vector<BasicBlock *> Latches;
+ Headers.push_back(Header);
+ Latches.push_back(LatchBlock);
+
+ // The current on-the-fly SSA update requires blocks to be processed in
+ // reverse postorder so that LastValueMap contains the correct value at each
+ // exit.
+ LoopBlocksDFS DFS(L);
+ DFS.perform(LI);
+
+ // Stash the DFS iterators before adding blocks to the loop.
+ LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO();
+ LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO();
+
+ std::vector<BasicBlock*> UnrolledLoopBlocks = L->getBlocks();
+
+ // Loop Unrolling might create new loops. While we do preserve LoopInfo, we
+ // might break loop-simplified form for these loops (as they, e.g., would
+ // share the same exit blocks). We'll keep track of loops for which we can
+ // break this so that later we can re-simplify them.
+ SmallSetVector<Loop *, 4> LoopsToSimplify;
+ for (Loop *SubLoop : *L)
+ LoopsToSimplify.insert(SubLoop);
+
+ // When a FSDiscriminator is enabled, we don't need to add the multiply
+ // factors to the discriminators.
+ if (Header->getParent()->shouldEmitDebugInfoForProfiling() &&
+ !EnableFSDiscriminator)
+ for (BasicBlock *BB : L->getBlocks())
+ for (Instruction &I : *BB)
+ if (!isa<DbgInfoIntrinsic>(&I))
+ if (const DILocation *DIL = I.getDebugLoc()) {
+ auto NewDIL = DIL->cloneByMultiplyingDuplicationFactor(ULO.Count);
+ if (NewDIL)
+ I.setDebugLoc(*NewDIL);
+ else
+ LLVM_DEBUG(dbgs()
+ << "Failed to create new discriminator: "
+ << DIL->getFilename() << " Line: " << DIL->getLine());
+ }
+
+ // Identify what noalias metadata is inside the loop: if it is inside the
+ // loop, the associated metadata must be cloned for each iteration.
+ SmallVector<MDNode *, 6> LoopLocalNoAliasDeclScopes;
+ identifyNoAliasScopesToClone(L->getBlocks(), LoopLocalNoAliasDeclScopes);
+
+ // We place the unrolled iterations immediately after the original loop
+ // latch. This is a reasonable default placement if we don't have block
+ // frequencies, and if we do, well the layout will be adjusted later.
+ auto BlockInsertPt = std::next(LatchBlock->getIterator());
+ for (unsigned It = 1; It != ULO.Count; ++It) {
+ SmallVector<BasicBlock *, 8> NewBlocks;
+ SmallDenseMap<const Loop *, Loop *, 4> NewLoops;
+ NewLoops[L] = L;
+
+ for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
+ ValueToValueMapTy VMap;
+ BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It));
+ Header->getParent()->insert(BlockInsertPt, New);
+
+ assert((*BB != Header || LI->getLoopFor(*BB) == L) &&
+ "Header should not be in a sub-loop");
+ // Tell LI about New.
+ const Loop *OldLoop = addClonedBlockToLoopInfo(*BB, New, LI, NewLoops);
+ if (OldLoop)
+ LoopsToSimplify.insert(NewLoops[OldLoop]);
+
+ if (*BB == Header)
+ // Loop over all of the PHI nodes in the block, changing them to use
+ // the incoming values from the previous block.
+ for (PHINode *OrigPHI : OrigPHINode) {
+ PHINode *NewPHI = cast<PHINode>(VMap[OrigPHI]);
+ Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock);
+ if (Instruction *InValI = dyn_cast<Instruction>(InVal))
+ if (It > 1 && L->contains(InValI))
+ InVal = LastValueMap[InValI];
+ VMap[OrigPHI] = InVal;
+ NewPHI->eraseFromParent();
+ }
+
+ // Update our running map of newest clones
+ LastValueMap[*BB] = New;
+ for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end();
+ VI != VE; ++VI)
+ LastValueMap[VI->first] = VI->second;
+
+ // Add phi entries for newly created values to all exit blocks.
+ for (BasicBlock *Succ : successors(*BB)) {
+ if (L->contains(Succ))
+ continue;
+ for (PHINode &PHI : Succ->phis()) {
+ Value *Incoming = PHI.getIncomingValueForBlock(*BB);
+ ValueToValueMapTy::iterator It = LastValueMap.find(Incoming);
+ if (It != LastValueMap.end())
+ Incoming = It->second;
+ PHI.addIncoming(Incoming, New);
+ SE->forgetValue(&PHI);
+ }
+ }
+ // Keep track of new headers and latches as we create them, so that
+ // we can insert the proper branches later.
+ if (*BB == Header)
+ Headers.push_back(New);
+ if (*BB == LatchBlock)
+ Latches.push_back(New);
+
+ // Keep track of the exiting block and its successor block contained in
+ // the loop for the current iteration.
+ auto ExitInfoIt = ExitInfos.find(*BB);
+ if (ExitInfoIt != ExitInfos.end())
+ ExitInfoIt->second.ExitingBlocks.push_back(New);
+
+ NewBlocks.push_back(New);
+ UnrolledLoopBlocks.push_back(New);
+
+ // Update DomTree: since we just copy the loop body, and each copy has a
+ // dedicated entry block (copy of the header block), this header's copy
+ // dominates all copied blocks. That means, dominance relations in the
+ // copied body are the same as in the original body.
+ if (*BB == Header)
+ DT->addNewBlock(New, Latches[It - 1]);
+ else {
+ auto BBDomNode = DT->getNode(*BB);
+ auto BBIDom = BBDomNode->getIDom();
+ BasicBlock *OriginalBBIDom = BBIDom->getBlock();
+ DT->addNewBlock(
+ New, cast<BasicBlock>(LastValueMap[cast<Value>(OriginalBBIDom)]));
+ }
+ }
+
+ // Remap all instructions in the most recent iteration
+ remapInstructionsInBlocks(NewBlocks, LastValueMap);
+ for (BasicBlock *NewBlock : NewBlocks)
+ for (Instruction &I : *NewBlock)
+ if (auto *II = dyn_cast<AssumeInst>(&I))
+ AC->registerAssumption(II);
+
+ {
+ // Identify what other metadata depends on the cloned version. After
+ // cloning, replace the metadata with the corrected version for both
+ // memory instructions and noalias intrinsics.
+ std::string ext = (Twine("It") + Twine(It)).str();
+ cloneAndAdaptNoAliasScopes(LoopLocalNoAliasDeclScopes, NewBlocks,
+ Header->getContext(), ext);
+ }
+ }
+
+ // Loop over the PHI nodes in the original block, setting incoming values.
+ for (PHINode *PN : OrigPHINode) {
+ if (CompletelyUnroll) {
+ PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader));
+ PN->eraseFromParent();
+ } else if (ULO.Count > 1) {
+ Value *InVal = PN->removeIncomingValue(LatchBlock, false);
+ // If this value was defined in the loop, take the value defined by the
+ // last iteration of the loop.
+ if (Instruction *InValI = dyn_cast<Instruction>(InVal)) {
+ if (L->contains(InValI))
+ InVal = LastValueMap[InVal];
+ }
+ assert(Latches.back() == LastValueMap[LatchBlock] && "bad last latch");
+ PN->addIncoming(InVal, Latches.back());
+ }
+ }
+
+ // Connect latches of the unrolled iterations to the headers of the next
+ // iteration. Currently they point to the header of the same iteration.
+ for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
+ unsigned j = (i + 1) % e;
+ Latches[i]->getTerminator()->replaceSuccessorWith(Headers[i], Headers[j]);
+ }
+
+ // Update dominators of blocks we might reach through exits.
+ // Immediate dominator of such block might change, because we add more
+ // routes which can lead to the exit: we can now reach it from the copied
+ // iterations too.
+ if (ULO.Count > 1) {
+ for (auto *BB : OriginalLoopBlocks) {
+ auto *BBDomNode = DT->getNode(BB);
+ SmallVector<BasicBlock *, 16> ChildrenToUpdate;
+ for (auto *ChildDomNode : BBDomNode->children()) {
+ auto *ChildBB = ChildDomNode->getBlock();
+ if (!L->contains(ChildBB))
+ ChildrenToUpdate.push_back(ChildBB);
+ }
+ // The new idom of the block will be the nearest common dominator
+ // of all copies of the previous idom. This is equivalent to the
+ // nearest common dominator of the previous idom and the first latch,
+ // which dominates all copies of the previous idom.
+ BasicBlock *NewIDom = DT->findNearestCommonDominator(BB, LatchBlock);
+ for (auto *ChildBB : ChildrenToUpdate)
+ DT->changeImmediateDominator(ChildBB, NewIDom);
+ }
+ }
+
+ assert(!UnrollVerifyDomtree ||
+ DT->verify(DominatorTree::VerificationLevel::Fast));
+
+ SmallVector<DominatorTree::UpdateType> DTUpdates;
+ auto SetDest = [&](BasicBlock *Src, bool WillExit, bool ExitOnTrue) {
+ auto *Term = cast<BranchInst>(Src->getTerminator());
+ const unsigned Idx = ExitOnTrue ^ WillExit;
+ BasicBlock *Dest = Term->getSuccessor(Idx);
+ BasicBlock *DeadSucc = Term->getSuccessor(1-Idx);
+
+ // Remove predecessors from all non-Dest successors.
+ DeadSucc->removePredecessor(Src, /* KeepOneInputPHIs */ true);
+
+ // Replace the conditional branch with an unconditional one.
+ BranchInst::Create(Dest, Term);
+ Term->eraseFromParent();
+
+ DTUpdates.emplace_back(DominatorTree::Delete, Src, DeadSucc);
+ };
+
+ auto WillExit = [&](const ExitInfo &Info, unsigned i, unsigned j,
+ bool IsLatch) -> std::optional<bool> {
+ if (CompletelyUnroll) {
+ if (PreserveOnlyFirst) {
+ if (i == 0)
+ return std::nullopt;
+ return j == 0;
+ }
+ // Complete (but possibly inexact) unrolling
+ if (j == 0)
+ return true;
+ if (Info.TripCount && j != Info.TripCount)
+ return false;
+ return std::nullopt;
+ }
+
+ if (ULO.Runtime) {
+ // If runtime unrolling inserts a prologue, information about non-latch
+ // exits may be stale.
+ if (IsLatch && j != 0)
+ return false;
+ return std::nullopt;
+ }
+
+ if (j != Info.BreakoutTrip &&
+ (Info.TripMultiple == 0 || j % Info.TripMultiple != 0)) {
+ // If we know the trip count or a multiple of it, we can safely use an
+ // unconditional branch for some iterations.
+ return false;
+ }
+ return std::nullopt;
+ };
+
+ // Fold branches for iterations where we know that they will exit or not
+ // exit.
+ for (auto &Pair : ExitInfos) {
+ ExitInfo &Info = Pair.second;
+ for (unsigned i = 0, e = Info.ExitingBlocks.size(); i != e; ++i) {
+ // The branch destination.
+ unsigned j = (i + 1) % e;
+ bool IsLatch = Pair.first == LatchBlock;
+ std::optional<bool> KnownWillExit = WillExit(Info, i, j, IsLatch);
+ if (!KnownWillExit) {
+ if (!Info.FirstExitingBlock)
+ Info.FirstExitingBlock = Info.ExitingBlocks[i];
+ continue;
+ }
+
+ // We don't fold known-exiting branches for non-latch exits here,
+ // because this ensures that both all loop blocks and all exit blocks
+ // remain reachable in the CFG.
+ // TODO: We could fold these branches, but it would require much more
+ // sophisticated updates to LoopInfo.
+ if (*KnownWillExit && !IsLatch) {
+ if (!Info.FirstExitingBlock)
+ Info.FirstExitingBlock = Info.ExitingBlocks[i];
+ continue;
+ }
+
+ SetDest(Info.ExitingBlocks[i], *KnownWillExit, Info.ExitOnTrue);
+ }
+ }
+
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
+ DomTreeUpdater *DTUToUse = &DTU;
+ if (ExitingBlocks.size() == 1 && ExitInfos.size() == 1) {
+ // Manually update the DT if there's a single exiting node. In that case
+ // there's a single exit node and it is sufficient to update the nodes
+ // immediately dominated by the original exiting block. They will become
+ // dominated by the first exiting block that leaves the loop after
+ // unrolling. Note that the CFG inside the loop does not change, so there's
+ // no need to update the DT inside the unrolled loop.
+ DTUToUse = nullptr;
+ auto &[OriginalExit, Info] = *ExitInfos.begin();
+ if (!Info.FirstExitingBlock)
+ Info.FirstExitingBlock = Info.ExitingBlocks.back();
+ for (auto *C : to_vector(DT->getNode(OriginalExit)->children())) {
+ if (L->contains(C->getBlock()))
+ continue;
+ C->setIDom(DT->getNode(Info.FirstExitingBlock));
+ }
+ } else {
+ DTU.applyUpdates(DTUpdates);
+ }
+
+ // When completely unrolling, the last latch becomes unreachable.
+ if (!LatchIsExiting && CompletelyUnroll) {
+ // There is no need to update the DT here, because there must be a unique
+ // latch. Hence if the latch is not exiting it must directly branch back to
+ // the original loop header and does not dominate any nodes.
+ assert(LatchBlock->getSingleSuccessor() && "Loop with multiple latches?");
+ changeToUnreachable(Latches.back()->getTerminator(), PreserveLCSSA);
+ }
+
+ // Merge adjacent basic blocks, if possible.
+ for (BasicBlock *Latch : Latches) {
+ BranchInst *Term = dyn_cast<BranchInst>(Latch->getTerminator());
+ assert((Term ||
+ (CompletelyUnroll && !LatchIsExiting && Latch == Latches.back())) &&
+ "Need a branch as terminator, except when fully unrolling with "
+ "unconditional latch");
+ if (Term && Term->isUnconditional()) {
+ BasicBlock *Dest = Term->getSuccessor(0);
+ BasicBlock *Fold = Dest->getUniquePredecessor();
+ if (MergeBlockIntoPredecessor(Dest, /*DTU=*/DTUToUse, LI,
+ /*MSSAU=*/nullptr, /*MemDep=*/nullptr,
+ /*PredecessorWithTwoSuccessors=*/false,
+ DTUToUse ? nullptr : DT)) {
+ // Dest has been folded into Fold. Update our worklists accordingly.
+ std::replace(Latches.begin(), Latches.end(), Dest, Fold);
+ llvm::erase_value(UnrolledLoopBlocks, Dest);
+ }
+ }
+ }
+
+ if (DTUToUse) {
+ // Apply updates to the DomTree.
+ DT = &DTU.getDomTree();
+ }
+ assert(!UnrollVerifyDomtree ||
+ DT->verify(DominatorTree::VerificationLevel::Fast));
+
+ // At this point, the code is well formed. We now simplify the unrolled loop,
+ // doing constant propagation and dead code elimination as we go.
+ simplifyLoopAfterUnroll(L, !CompletelyUnroll && ULO.Count > 1, LI, SE, DT, AC,
+ TTI);
+
+ NumCompletelyUnrolled += CompletelyUnroll;
+ ++NumUnrolled;
+
+ Loop *OuterL = L->getParentLoop();
+ // Update LoopInfo if the loop is completely removed.
+ if (CompletelyUnroll)
+ LI->erase(L);
+
+ // LoopInfo should not be valid, confirm that.
+ if (UnrollVerifyLoopInfo)
+ LI->verify(*DT);
+
+ // After complete unrolling most of the blocks should be contained in OuterL.
+ // However, some of them might happen to be out of OuterL (e.g. if they
+ // precede a loop exit). In this case we might need to insert PHI nodes in
+ // order to preserve LCSSA form.
+ // We don't need to check this if we already know that we need to fix LCSSA
+ // form.
+ // TODO: For now we just recompute LCSSA for the outer loop in this case, but
+ // it should be possible to fix it in-place.
+ if (PreserveLCSSA && OuterL && CompletelyUnroll && !NeedToFixLCSSA)
+ NeedToFixLCSSA |= ::needToInsertPhisForLCSSA(OuterL, UnrolledLoopBlocks, LI);
+
+ // Make sure that loop-simplify form is preserved. We want to simplify
+ // at least one layer outside of the loop that was unrolled so that any
+ // changes to the parent loop exposed by the unrolling are considered.
+ if (OuterL) {
+ // OuterL includes all loops for which we can break loop-simplify, so
+ // it's sufficient to simplify only it (it'll recursively simplify inner
+ // loops too).
+ if (NeedToFixLCSSA) {
+ // LCSSA must be performed on the outermost affected loop. The unrolled
+ // loop's last loop latch is guaranteed to be in the outermost loop
+ // after LoopInfo's been updated by LoopInfo::erase.
+ Loop *LatchLoop = LI->getLoopFor(Latches.back());
+ Loop *FixLCSSALoop = OuterL;
+ if (!FixLCSSALoop->contains(LatchLoop))
+ while (FixLCSSALoop->getParentLoop() != LatchLoop)
+ FixLCSSALoop = FixLCSSALoop->getParentLoop();
+
+ formLCSSARecursively(*FixLCSSALoop, *DT, LI, SE);
+ } else if (PreserveLCSSA) {
+ assert(OuterL->isLCSSAForm(*DT) &&
+ "Loops should be in LCSSA form after loop-unroll.");
+ }
+
+ // TODO: That potentially might be compile-time expensive. We should try
+ // to fix the loop-simplified form incrementally.
+ simplifyLoop(OuterL, DT, LI, SE, AC, nullptr, PreserveLCSSA);
+ } else {
+ // Simplify loops for which we might've broken loop-simplify form.
+ for (Loop *SubLoop : LoopsToSimplify)
+ simplifyLoop(SubLoop, DT, LI, SE, AC, nullptr, PreserveLCSSA);
+ }
+
+ return CompletelyUnroll ? LoopUnrollResult::FullyUnrolled
+ : LoopUnrollResult::PartiallyUnrolled;
+}
+
+/// Given an llvm.loop loop id metadata node, returns the loop hint metadata
+/// node with the given name (for example, "llvm.loop.unroll.count"). If no
+/// such metadata node exists, then nullptr is returned.
+MDNode *llvm::GetUnrollMetadata(MDNode *LoopID, StringRef Name) {
+ // First operand should refer to the loop id itself.
+ assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
+ assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
+
+ for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) {
+ MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
+ if (!MD)
+ continue;
+
+ MDString *S = dyn_cast<MDString>(MD->getOperand(0));
+ if (!S)
+ continue;
+
+ if (Name.equals(S->getString()))
+ return MD;
+ }
+ return nullptr;
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/LoopUnrollAndJam.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/LoopUnrollAndJam.cpp
new file mode 100644
index 0000000000..b125e952ec
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/LoopUnrollAndJam.cpp
@@ -0,0 +1,999 @@
+//===-- LoopUnrollAndJam.cpp - Loop unrolling utilities -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements loop unroll and jam as a routine, much like
+// LoopUnroll.cpp implements loop unroll.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/DependenceAnalysis.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopIterator.h"
+#include "llvm/Analysis/MustExecute.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/IR/ValueHandle.h"
+#include "llvm/IR/ValueMap.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GenericDomTree.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Transforms/Utils/UnrollLoop.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+#include <assert.h>
+#include <memory>
+#include <type_traits>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "loop-unroll-and-jam"
+
+STATISTIC(NumUnrolledAndJammed, "Number of loops unroll and jammed");
+STATISTIC(NumCompletelyUnrolledAndJammed, "Number of loops unroll and jammed");
+
+typedef SmallPtrSet<BasicBlock *, 4> BasicBlockSet;
+
+// Partition blocks in an outer/inner loop pair into blocks before and after
+// the loop
+static bool partitionLoopBlocks(Loop &L, BasicBlockSet &ForeBlocks,
+ BasicBlockSet &AftBlocks, DominatorTree &DT) {
+ Loop *SubLoop = L.getSubLoops()[0];
+ BasicBlock *SubLoopLatch = SubLoop->getLoopLatch();
+
+ for (BasicBlock *BB : L.blocks()) {
+ if (!SubLoop->contains(BB)) {
+ if (DT.dominates(SubLoopLatch, BB))
+ AftBlocks.insert(BB);
+ else
+ ForeBlocks.insert(BB);
+ }
+ }
+
+ // Check that all blocks in ForeBlocks together dominate the subloop
+ // TODO: This might ideally be done better with a dominator/postdominators.
+ BasicBlock *SubLoopPreHeader = SubLoop->getLoopPreheader();
+ for (BasicBlock *BB : ForeBlocks) {
+ if (BB == SubLoopPreHeader)
+ continue;
+ Instruction *TI = BB->getTerminator();
+ for (BasicBlock *Succ : successors(TI))
+ if (!ForeBlocks.count(Succ))
+ return false;
+ }
+
+ return true;
+}
+
+/// Partition blocks in a loop nest into blocks before and after each inner
+/// loop.
+static bool partitionOuterLoopBlocks(
+ Loop &Root, Loop &JamLoop, BasicBlockSet &JamLoopBlocks,
+ DenseMap<Loop *, BasicBlockSet> &ForeBlocksMap,
+ DenseMap<Loop *, BasicBlockSet> &AftBlocksMap, DominatorTree &DT) {
+ JamLoopBlocks.insert(JamLoop.block_begin(), JamLoop.block_end());
+
+ for (Loop *L : Root.getLoopsInPreorder()) {
+ if (L == &JamLoop)
+ break;
+
+ if (!partitionLoopBlocks(*L, ForeBlocksMap[L], AftBlocksMap[L], DT))
+ return false;
+ }
+
+ return true;
+}
+
+// TODO Remove when UnrollAndJamLoop changed to support unroll and jamming more
+// than 2 levels loop.
+static bool partitionOuterLoopBlocks(Loop *L, Loop *SubLoop,
+ BasicBlockSet &ForeBlocks,
+ BasicBlockSet &SubLoopBlocks,
+ BasicBlockSet &AftBlocks,
+ DominatorTree *DT) {
+ SubLoopBlocks.insert(SubLoop->block_begin(), SubLoop->block_end());
+ return partitionLoopBlocks(*L, ForeBlocks, AftBlocks, *DT);
+}
+
+// Looks at the phi nodes in Header for values coming from Latch. For these
+// instructions and all their operands calls Visit on them, keeping going for
+// all the operands in AftBlocks. Returns false if Visit returns false,
+// otherwise returns true. This is used to process the instructions in the
+// Aft blocks that need to be moved before the subloop. It is used in two
+// places. One to check that the required set of instructions can be moved
+// before the loop. Then to collect the instructions to actually move in
+// moveHeaderPhiOperandsToForeBlocks.
+template <typename T>
+static bool processHeaderPhiOperands(BasicBlock *Header, BasicBlock *Latch,
+ BasicBlockSet &AftBlocks, T Visit) {
+ SmallPtrSet<Instruction *, 8> VisitedInstr;
+
+ std::function<bool(Instruction * I)> ProcessInstr = [&](Instruction *I) {
+ if (VisitedInstr.count(I))
+ return true;
+
+ VisitedInstr.insert(I);
+
+ if (AftBlocks.count(I->getParent()))
+ for (auto &U : I->operands())
+ if (Instruction *II = dyn_cast<Instruction>(U))
+ if (!ProcessInstr(II))
+ return false;
+
+ return Visit(I);
+ };
+
+ for (auto &Phi : Header->phis()) {
+ Value *V = Phi.getIncomingValueForBlock(Latch);
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ if (!ProcessInstr(I))
+ return false;
+ }
+
+ return true;
+}
+
+// Move the phi operands of Header from Latch out of AftBlocks to InsertLoc.
+static void moveHeaderPhiOperandsToForeBlocks(BasicBlock *Header,
+ BasicBlock *Latch,
+ Instruction *InsertLoc,
+ BasicBlockSet &AftBlocks) {
+ // We need to ensure we move the instructions in the correct order,
+ // starting with the earliest required instruction and moving forward.
+ processHeaderPhiOperands(Header, Latch, AftBlocks,
+ [&AftBlocks, &InsertLoc](Instruction *I) {
+ if (AftBlocks.count(I->getParent()))
+ I->moveBefore(InsertLoc);
+ return true;
+ });
+}
+
+/*
+ This method performs Unroll and Jam. For a simple loop like:
+ for (i = ..)
+ Fore(i)
+ for (j = ..)
+ SubLoop(i, j)
+ Aft(i)
+
+ Instead of doing normal inner or outer unrolling, we do:
+ for (i = .., i+=2)
+ Fore(i)
+ Fore(i+1)
+ for (j = ..)
+ SubLoop(i, j)
+ SubLoop(i+1, j)
+ Aft(i)
+ Aft(i+1)
+
+ So the outer loop is essetially unrolled and then the inner loops are fused
+ ("jammed") together into a single loop. This can increase speed when there
+ are loads in SubLoop that are invariant to i, as they become shared between
+ the now jammed inner loops.
+
+ We do this by spliting the blocks in the loop into Fore, Subloop and Aft.
+ Fore blocks are those before the inner loop, Aft are those after. Normal
+ Unroll code is used to copy each of these sets of blocks and the results are
+ combined together into the final form above.
+
+ isSafeToUnrollAndJam should be used prior to calling this to make sure the
+ unrolling will be valid. Checking profitablility is also advisable.
+
+ If EpilogueLoop is non-null, it receives the epilogue loop (if it was
+ necessary to create one and not fully unrolled).
+*/
+LoopUnrollResult
+llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount,
+ unsigned TripMultiple, bool UnrollRemainder,
+ LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
+ AssumptionCache *AC, const TargetTransformInfo *TTI,
+ OptimizationRemarkEmitter *ORE, Loop **EpilogueLoop) {
+
+ // When we enter here we should have already checked that it is safe
+ BasicBlock *Header = L->getHeader();
+ assert(Header && "No header.");
+ assert(L->getSubLoops().size() == 1);
+ Loop *SubLoop = *L->begin();
+
+ // Don't enter the unroll code if there is nothing to do.
+ if (TripCount == 0 && Count < 2) {
+ LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; almost nothing to do\n");
+ return LoopUnrollResult::Unmodified;
+ }
+
+ assert(Count > 0);
+ assert(TripMultiple > 0);
+ assert(TripCount == 0 || TripCount % TripMultiple == 0);
+
+ // Are we eliminating the loop control altogether?
+ bool CompletelyUnroll = (Count == TripCount);
+
+ // We use the runtime remainder in cases where we don't know trip multiple
+ if (TripMultiple % Count != 0) {
+ if (!UnrollRuntimeLoopRemainder(L, Count, /*AllowExpensiveTripCount*/ false,
+ /*UseEpilogRemainder*/ true,
+ UnrollRemainder, /*ForgetAllSCEV*/ false,
+ LI, SE, DT, AC, TTI, true, EpilogueLoop)) {
+ LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; remainder loop could not be "
+ "generated when assuming runtime trip count\n");
+ return LoopUnrollResult::Unmodified;
+ }
+ }
+
+ // Notify ScalarEvolution that the loop will be substantially changed,
+ // if not outright eliminated.
+ if (SE) {
+ SE->forgetLoop(L);
+ SE->forgetBlockAndLoopDispositions();
+ }
+
+ using namespace ore;
+ // Report the unrolling decision.
+ if (CompletelyUnroll) {
+ LLVM_DEBUG(dbgs() << "COMPLETELY UNROLL AND JAMMING loop %"
+ << Header->getName() << " with trip count " << TripCount
+ << "!\n");
+ ORE->emit(OptimizationRemark(DEBUG_TYPE, "FullyUnrolled", L->getStartLoc(),
+ L->getHeader())
+ << "completely unroll and jammed loop with "
+ << NV("UnrollCount", TripCount) << " iterations");
+ } else {
+ auto DiagBuilder = [&]() {
+ OptimizationRemark Diag(DEBUG_TYPE, "PartialUnrolled", L->getStartLoc(),
+ L->getHeader());
+ return Diag << "unroll and jammed loop by a factor of "
+ << NV("UnrollCount", Count);
+ };
+
+ LLVM_DEBUG(dbgs() << "UNROLL AND JAMMING loop %" << Header->getName()
+ << " by " << Count);
+ if (TripMultiple != 1) {
+ LLVM_DEBUG(dbgs() << " with " << TripMultiple << " trips per branch");
+ ORE->emit([&]() {
+ return DiagBuilder() << " with " << NV("TripMultiple", TripMultiple)
+ << " trips per branch";
+ });
+ } else {
+ LLVM_DEBUG(dbgs() << " with run-time trip count");
+ ORE->emit([&]() { return DiagBuilder() << " with run-time trip count"; });
+ }
+ LLVM_DEBUG(dbgs() << "!\n");
+ }
+
+ BasicBlock *Preheader = L->getLoopPreheader();
+ BasicBlock *LatchBlock = L->getLoopLatch();
+ assert(Preheader && "No preheader");
+ assert(LatchBlock && "No latch block");
+ BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator());
+ assert(BI && !BI->isUnconditional());
+ bool ContinueOnTrue = L->contains(BI->getSuccessor(0));
+ BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue);
+ bool SubLoopContinueOnTrue = SubLoop->contains(
+ SubLoop->getLoopLatch()->getTerminator()->getSuccessor(0));
+
+ // Partition blocks in an outer/inner loop pair into blocks before and after
+ // the loop
+ BasicBlockSet SubLoopBlocks;
+ BasicBlockSet ForeBlocks;
+ BasicBlockSet AftBlocks;
+ partitionOuterLoopBlocks(L, SubLoop, ForeBlocks, SubLoopBlocks, AftBlocks,
+ DT);
+
+ // We keep track of the entering/first and exiting/last block of each of
+ // Fore/SubLoop/Aft in each iteration. This helps make the stapling up of
+ // blocks easier.
+ std::vector<BasicBlock *> ForeBlocksFirst;
+ std::vector<BasicBlock *> ForeBlocksLast;
+ std::vector<BasicBlock *> SubLoopBlocksFirst;
+ std::vector<BasicBlock *> SubLoopBlocksLast;
+ std::vector<BasicBlock *> AftBlocksFirst;
+ std::vector<BasicBlock *> AftBlocksLast;
+ ForeBlocksFirst.push_back(Header);
+ ForeBlocksLast.push_back(SubLoop->getLoopPreheader());
+ SubLoopBlocksFirst.push_back(SubLoop->getHeader());
+ SubLoopBlocksLast.push_back(SubLoop->getExitingBlock());
+ AftBlocksFirst.push_back(SubLoop->getExitBlock());
+ AftBlocksLast.push_back(L->getExitingBlock());
+ // Maps Blocks[0] -> Blocks[It]
+ ValueToValueMapTy LastValueMap;
+
+ // Move any instructions from fore phi operands from AftBlocks into Fore.
+ moveHeaderPhiOperandsToForeBlocks(
+ Header, LatchBlock, ForeBlocksLast[0]->getTerminator(), AftBlocks);
+
+ // The current on-the-fly SSA update requires blocks to be processed in
+ // reverse postorder so that LastValueMap contains the correct value at each
+ // exit.
+ LoopBlocksDFS DFS(L);
+ DFS.perform(LI);
+ // Stash the DFS iterators before adding blocks to the loop.
+ LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO();
+ LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO();
+
+ // When a FSDiscriminator is enabled, we don't need to add the multiply
+ // factors to the discriminators.
+ if (Header->getParent()->shouldEmitDebugInfoForProfiling() &&
+ !EnableFSDiscriminator)
+ for (BasicBlock *BB : L->getBlocks())
+ for (Instruction &I : *BB)
+ if (!isa<DbgInfoIntrinsic>(&I))
+ if (const DILocation *DIL = I.getDebugLoc()) {
+ auto NewDIL = DIL->cloneByMultiplyingDuplicationFactor(Count);
+ if (NewDIL)
+ I.setDebugLoc(*NewDIL);
+ else
+ LLVM_DEBUG(dbgs()
+ << "Failed to create new discriminator: "
+ << DIL->getFilename() << " Line: " << DIL->getLine());
+ }
+
+ // Copy all blocks
+ for (unsigned It = 1; It != Count; ++It) {
+ SmallVector<BasicBlock *, 8> NewBlocks;
+ // Maps Blocks[It] -> Blocks[It-1]
+ DenseMap<Value *, Value *> PrevItValueMap;
+ SmallDenseMap<const Loop *, Loop *, 4> NewLoops;
+ NewLoops[L] = L;
+ NewLoops[SubLoop] = SubLoop;
+
+ for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
+ ValueToValueMapTy VMap;
+ BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It));
+ Header->getParent()->insert(Header->getParent()->end(), New);
+
+ // Tell LI about New.
+ addClonedBlockToLoopInfo(*BB, New, LI, NewLoops);
+
+ if (ForeBlocks.count(*BB)) {
+ if (*BB == ForeBlocksFirst[0])
+ ForeBlocksFirst.push_back(New);
+ if (*BB == ForeBlocksLast[0])
+ ForeBlocksLast.push_back(New);
+ } else if (SubLoopBlocks.count(*BB)) {
+ if (*BB == SubLoopBlocksFirst[0])
+ SubLoopBlocksFirst.push_back(New);
+ if (*BB == SubLoopBlocksLast[0])
+ SubLoopBlocksLast.push_back(New);
+ } else if (AftBlocks.count(*BB)) {
+ if (*BB == AftBlocksFirst[0])
+ AftBlocksFirst.push_back(New);
+ if (*BB == AftBlocksLast[0])
+ AftBlocksLast.push_back(New);
+ } else {
+ llvm_unreachable("BB being cloned should be in Fore/Sub/Aft");
+ }
+
+ // Update our running maps of newest clones
+ PrevItValueMap[New] = (It == 1 ? *BB : LastValueMap[*BB]);
+ LastValueMap[*BB] = New;
+ for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end();
+ VI != VE; ++VI) {
+ PrevItValueMap[VI->second] =
+ const_cast<Value *>(It == 1 ? VI->first : LastValueMap[VI->first]);
+ LastValueMap[VI->first] = VI->second;
+ }
+
+ NewBlocks.push_back(New);
+
+ // Update DomTree:
+ if (*BB == ForeBlocksFirst[0])
+ DT->addNewBlock(New, ForeBlocksLast[It - 1]);
+ else if (*BB == SubLoopBlocksFirst[0])
+ DT->addNewBlock(New, SubLoopBlocksLast[It - 1]);
+ else if (*BB == AftBlocksFirst[0])
+ DT->addNewBlock(New, AftBlocksLast[It - 1]);
+ else {
+ // Each set of blocks (Fore/Sub/Aft) will have the same internal domtree
+ // structure.
+ auto BBDomNode = DT->getNode(*BB);
+ auto BBIDom = BBDomNode->getIDom();
+ BasicBlock *OriginalBBIDom = BBIDom->getBlock();
+ assert(OriginalBBIDom);
+ assert(LastValueMap[cast<Value>(OriginalBBIDom)]);
+ DT->addNewBlock(
+ New, cast<BasicBlock>(LastValueMap[cast<Value>(OriginalBBIDom)]));
+ }
+ }
+
+ // Remap all instructions in the most recent iteration
+ remapInstructionsInBlocks(NewBlocks, LastValueMap);
+ for (BasicBlock *NewBlock : NewBlocks) {
+ for (Instruction &I : *NewBlock) {
+ if (auto *II = dyn_cast<AssumeInst>(&I))
+ AC->registerAssumption(II);
+ }
+ }
+
+ // Alter the ForeBlocks phi's, pointing them at the latest version of the
+ // value from the previous iteration's phis
+ for (PHINode &Phi : ForeBlocksFirst[It]->phis()) {
+ Value *OldValue = Phi.getIncomingValueForBlock(AftBlocksLast[It]);
+ assert(OldValue && "should have incoming edge from Aft[It]");
+ Value *NewValue = OldValue;
+ if (Value *PrevValue = PrevItValueMap[OldValue])
+ NewValue = PrevValue;
+
+ assert(Phi.getNumOperands() == 2);
+ Phi.setIncomingBlock(0, ForeBlocksLast[It - 1]);
+ Phi.setIncomingValue(0, NewValue);
+ Phi.removeIncomingValue(1);
+ }
+ }
+
+ // Now that all the basic blocks for the unrolled iterations are in place,
+ // finish up connecting the blocks and phi nodes. At this point LastValueMap
+ // is the last unrolled iterations values.
+
+ // Update Phis in BB from OldBB to point to NewBB and use the latest value
+ // from LastValueMap
+ auto updatePHIBlocksAndValues = [](BasicBlock *BB, BasicBlock *OldBB,
+ BasicBlock *NewBB,
+ ValueToValueMapTy &LastValueMap) {
+ for (PHINode &Phi : BB->phis()) {
+ for (unsigned b = 0; b < Phi.getNumIncomingValues(); ++b) {
+ if (Phi.getIncomingBlock(b) == OldBB) {
+ Value *OldValue = Phi.getIncomingValue(b);
+ if (Value *LastValue = LastValueMap[OldValue])
+ Phi.setIncomingValue(b, LastValue);
+ Phi.setIncomingBlock(b, NewBB);
+ break;
+ }
+ }
+ }
+ };
+ // Move all the phis from Src into Dest
+ auto movePHIs = [](BasicBlock *Src, BasicBlock *Dest) {
+ Instruction *insertPoint = Dest->getFirstNonPHI();
+ while (PHINode *Phi = dyn_cast<PHINode>(Src->begin()))
+ Phi->moveBefore(insertPoint);
+ };
+
+ // Update the PHI values outside the loop to point to the last block
+ updatePHIBlocksAndValues(LoopExit, AftBlocksLast[0], AftBlocksLast.back(),
+ LastValueMap);
+
+ // Update ForeBlocks successors and phi nodes
+ BranchInst *ForeTerm =
+ cast<BranchInst>(ForeBlocksLast.back()->getTerminator());
+ assert(ForeTerm->getNumSuccessors() == 1 && "Expecting one successor");
+ ForeTerm->setSuccessor(0, SubLoopBlocksFirst[0]);
+
+ if (CompletelyUnroll) {
+ while (PHINode *Phi = dyn_cast<PHINode>(ForeBlocksFirst[0]->begin())) {
+ Phi->replaceAllUsesWith(Phi->getIncomingValueForBlock(Preheader));
+ Phi->eraseFromParent();
+ }
+ } else {
+ // Update the PHI values to point to the last aft block
+ updatePHIBlocksAndValues(ForeBlocksFirst[0], AftBlocksLast[0],
+ AftBlocksLast.back(), LastValueMap);
+ }
+
+ for (unsigned It = 1; It != Count; It++) {
+ // Remap ForeBlock successors from previous iteration to this
+ BranchInst *ForeTerm =
+ cast<BranchInst>(ForeBlocksLast[It - 1]->getTerminator());
+ assert(ForeTerm->getNumSuccessors() == 1 && "Expecting one successor");
+ ForeTerm->setSuccessor(0, ForeBlocksFirst[It]);
+ }
+
+ // Subloop successors and phis
+ BranchInst *SubTerm =
+ cast<BranchInst>(SubLoopBlocksLast.back()->getTerminator());
+ SubTerm->setSuccessor(!SubLoopContinueOnTrue, SubLoopBlocksFirst[0]);
+ SubTerm->setSuccessor(SubLoopContinueOnTrue, AftBlocksFirst[0]);
+ SubLoopBlocksFirst[0]->replacePhiUsesWith(ForeBlocksLast[0],
+ ForeBlocksLast.back());
+ SubLoopBlocksFirst[0]->replacePhiUsesWith(SubLoopBlocksLast[0],
+ SubLoopBlocksLast.back());
+
+ for (unsigned It = 1; It != Count; It++) {
+ // Replace the conditional branch of the previous iteration subloop with an
+ // unconditional one to this one
+ BranchInst *SubTerm =
+ cast<BranchInst>(SubLoopBlocksLast[It - 1]->getTerminator());
+ BranchInst::Create(SubLoopBlocksFirst[It], SubTerm);
+ SubTerm->eraseFromParent();
+
+ SubLoopBlocksFirst[It]->replacePhiUsesWith(ForeBlocksLast[It],
+ ForeBlocksLast.back());
+ SubLoopBlocksFirst[It]->replacePhiUsesWith(SubLoopBlocksLast[It],
+ SubLoopBlocksLast.back());
+ movePHIs(SubLoopBlocksFirst[It], SubLoopBlocksFirst[0]);
+ }
+
+ // Aft blocks successors and phis
+ BranchInst *AftTerm = cast<BranchInst>(AftBlocksLast.back()->getTerminator());
+ if (CompletelyUnroll) {
+ BranchInst::Create(LoopExit, AftTerm);
+ AftTerm->eraseFromParent();
+ } else {
+ AftTerm->setSuccessor(!ContinueOnTrue, ForeBlocksFirst[0]);
+ assert(AftTerm->getSuccessor(ContinueOnTrue) == LoopExit &&
+ "Expecting the ContinueOnTrue successor of AftTerm to be LoopExit");
+ }
+ AftBlocksFirst[0]->replacePhiUsesWith(SubLoopBlocksLast[0],
+ SubLoopBlocksLast.back());
+
+ for (unsigned It = 1; It != Count; It++) {
+ // Replace the conditional branch of the previous iteration subloop with an
+ // unconditional one to this one
+ BranchInst *AftTerm =
+ cast<BranchInst>(AftBlocksLast[It - 1]->getTerminator());
+ BranchInst::Create(AftBlocksFirst[It], AftTerm);
+ AftTerm->eraseFromParent();
+
+ AftBlocksFirst[It]->replacePhiUsesWith(SubLoopBlocksLast[It],
+ SubLoopBlocksLast.back());
+ movePHIs(AftBlocksFirst[It], AftBlocksFirst[0]);
+ }
+
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
+ // Dominator Tree. Remove the old links between Fore, Sub and Aft, adding the
+ // new ones required.
+ if (Count != 1) {
+ SmallVector<DominatorTree::UpdateType, 4> DTUpdates;
+ DTUpdates.emplace_back(DominatorTree::UpdateKind::Delete, ForeBlocksLast[0],
+ SubLoopBlocksFirst[0]);
+ DTUpdates.emplace_back(DominatorTree::UpdateKind::Delete,
+ SubLoopBlocksLast[0], AftBlocksFirst[0]);
+
+ DTUpdates.emplace_back(DominatorTree::UpdateKind::Insert,
+ ForeBlocksLast.back(), SubLoopBlocksFirst[0]);
+ DTUpdates.emplace_back(DominatorTree::UpdateKind::Insert,
+ SubLoopBlocksLast.back(), AftBlocksFirst[0]);
+ DTU.applyUpdatesPermissive(DTUpdates);
+ }
+
+ // Merge adjacent basic blocks, if possible.
+ SmallPtrSet<BasicBlock *, 16> MergeBlocks;
+ MergeBlocks.insert(ForeBlocksLast.begin(), ForeBlocksLast.end());
+ MergeBlocks.insert(SubLoopBlocksLast.begin(), SubLoopBlocksLast.end());
+ MergeBlocks.insert(AftBlocksLast.begin(), AftBlocksLast.end());
+
+ MergeBlockSuccessorsIntoGivenBlocks(MergeBlocks, L, &DTU, LI);
+
+ // Apply updates to the DomTree.
+ DT = &DTU.getDomTree();
+
+ // At this point, the code is well formed. We now do a quick sweep over the
+ // inserted code, doing constant propagation and dead code elimination as we
+ // go.
+ simplifyLoopAfterUnroll(SubLoop, true, LI, SE, DT, AC, TTI);
+ simplifyLoopAfterUnroll(L, !CompletelyUnroll && Count > 1, LI, SE, DT, AC,
+ TTI);
+
+ NumCompletelyUnrolledAndJammed += CompletelyUnroll;
+ ++NumUnrolledAndJammed;
+
+ // Update LoopInfo if the loop is completely removed.
+ if (CompletelyUnroll)
+ LI->erase(L);
+
+#ifndef NDEBUG
+ // We shouldn't have done anything to break loop simplify form or LCSSA.
+ Loop *OutestLoop = SubLoop->getParentLoop()
+ ? SubLoop->getParentLoop()->getParentLoop()
+ ? SubLoop->getParentLoop()->getParentLoop()
+ : SubLoop->getParentLoop()
+ : SubLoop;
+ assert(DT->verify());
+ LI->verify(*DT);
+ assert(OutestLoop->isRecursivelyLCSSAForm(*DT, *LI));
+ if (!CompletelyUnroll)
+ assert(L->isLoopSimplifyForm());
+ assert(SubLoop->isLoopSimplifyForm());
+ SE->verify();
+#endif
+
+ return CompletelyUnroll ? LoopUnrollResult::FullyUnrolled
+ : LoopUnrollResult::PartiallyUnrolled;
+}
+
+static bool getLoadsAndStores(BasicBlockSet &Blocks,
+ SmallVector<Instruction *, 4> &MemInstr) {
+ // Scan the BBs and collect legal loads and stores.
+ // Returns false if non-simple loads/stores are found.
+ for (BasicBlock *BB : Blocks) {
+ for (Instruction &I : *BB) {
+ if (auto *Ld = dyn_cast<LoadInst>(&I)) {
+ if (!Ld->isSimple())
+ return false;
+ MemInstr.push_back(&I);
+ } else if (auto *St = dyn_cast<StoreInst>(&I)) {
+ if (!St->isSimple())
+ return false;
+ MemInstr.push_back(&I);
+ } else if (I.mayReadOrWriteMemory()) {
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+static bool preservesForwardDependence(Instruction *Src, Instruction *Dst,
+ unsigned UnrollLevel, unsigned JamLevel,
+ bool Sequentialized, Dependence *D) {
+ // UnrollLevel might carry the dependency Src --> Dst
+ // Does a different loop after unrolling?
+ for (unsigned CurLoopDepth = UnrollLevel + 1; CurLoopDepth <= JamLevel;
+ ++CurLoopDepth) {
+ auto JammedDir = D->getDirection(CurLoopDepth);
+ if (JammedDir == Dependence::DVEntry::LT)
+ return true;
+
+ if (JammedDir & Dependence::DVEntry::GT)
+ return false;
+ }
+
+ return true;
+}
+
+static bool preservesBackwardDependence(Instruction *Src, Instruction *Dst,
+ unsigned UnrollLevel, unsigned JamLevel,
+ bool Sequentialized, Dependence *D) {
+ // UnrollLevel might carry the dependency Dst --> Src
+ for (unsigned CurLoopDepth = UnrollLevel + 1; CurLoopDepth <= JamLevel;
+ ++CurLoopDepth) {
+ auto JammedDir = D->getDirection(CurLoopDepth);
+ if (JammedDir == Dependence::DVEntry::GT)
+ return true;
+
+ if (JammedDir & Dependence::DVEntry::LT)
+ return false;
+ }
+
+ // Backward dependencies are only preserved if not interleaved.
+ return Sequentialized;
+}
+
+// Check whether it is semantically safe Src and Dst considering any potential
+// dependency between them.
+//
+// @param UnrollLevel The level of the loop being unrolled
+// @param JamLevel The level of the loop being jammed; if Src and Dst are on
+// different levels, the outermost common loop counts as jammed level
+//
+// @return true if is safe and false if there is a dependency violation.
+static bool checkDependency(Instruction *Src, Instruction *Dst,
+ unsigned UnrollLevel, unsigned JamLevel,
+ bool Sequentialized, DependenceInfo &DI) {
+ assert(UnrollLevel <= JamLevel &&
+ "Expecting JamLevel to be at least UnrollLevel");
+
+ if (Src == Dst)
+ return true;
+ // Ignore Input dependencies.
+ if (isa<LoadInst>(Src) && isa<LoadInst>(Dst))
+ return true;
+
+ // Check whether unroll-and-jam may violate a dependency.
+ // By construction, every dependency will be lexicographically non-negative
+ // (if it was, it would violate the current execution order), such as
+ // (0,0,>,*,*)
+ // Unroll-and-jam changes the GT execution of two executions to the same
+ // iteration of the chosen unroll level. That is, a GT dependence becomes a GE
+ // dependence (or EQ, if we fully unrolled the loop) at the loop's position:
+ // (0,0,>=,*,*)
+ // Now, the dependency is not necessarily non-negative anymore, i.e.
+ // unroll-and-jam may violate correctness.
+ std::unique_ptr<Dependence> D = DI.depends(Src, Dst, true);
+ if (!D)
+ return true;
+ assert(D->isOrdered() && "Expected an output, flow or anti dep.");
+
+ if (D->isConfused()) {
+ LLVM_DEBUG(dbgs() << " Confused dependency between:\n"
+ << " " << *Src << "\n"
+ << " " << *Dst << "\n");
+ return false;
+ }
+
+ // If outer levels (levels enclosing the loop being unroll-and-jammed) have a
+ // non-equal direction, then the locations accessed in the inner levels cannot
+ // overlap in memory. We assumes the indexes never overlap into neighboring
+ // dimensions.
+ for (unsigned CurLoopDepth = 1; CurLoopDepth < UnrollLevel; ++CurLoopDepth)
+ if (!(D->getDirection(CurLoopDepth) & Dependence::DVEntry::EQ))
+ return true;
+
+ auto UnrollDirection = D->getDirection(UnrollLevel);
+
+ // If the distance carried by the unrolled loop is 0, then after unrolling
+ // that distance will become non-zero resulting in non-overlapping accesses in
+ // the inner loops.
+ if (UnrollDirection == Dependence::DVEntry::EQ)
+ return true;
+
+ if (UnrollDirection & Dependence::DVEntry::LT &&
+ !preservesForwardDependence(Src, Dst, UnrollLevel, JamLevel,
+ Sequentialized, D.get()))
+ return false;
+
+ if (UnrollDirection & Dependence::DVEntry::GT &&
+ !preservesBackwardDependence(Src, Dst, UnrollLevel, JamLevel,
+ Sequentialized, D.get()))
+ return false;
+
+ return true;
+}
+
+static bool
+checkDependencies(Loop &Root, const BasicBlockSet &SubLoopBlocks,
+ const DenseMap<Loop *, BasicBlockSet> &ForeBlocksMap,
+ const DenseMap<Loop *, BasicBlockSet> &AftBlocksMap,
+ DependenceInfo &DI, LoopInfo &LI) {
+ SmallVector<BasicBlockSet, 8> AllBlocks;
+ for (Loop *L : Root.getLoopsInPreorder())
+ if (ForeBlocksMap.find(L) != ForeBlocksMap.end())
+ AllBlocks.push_back(ForeBlocksMap.lookup(L));
+ AllBlocks.push_back(SubLoopBlocks);
+ for (Loop *L : Root.getLoopsInPreorder())
+ if (AftBlocksMap.find(L) != AftBlocksMap.end())
+ AllBlocks.push_back(AftBlocksMap.lookup(L));
+
+ unsigned LoopDepth = Root.getLoopDepth();
+ SmallVector<Instruction *, 4> EarlierLoadsAndStores;
+ SmallVector<Instruction *, 4> CurrentLoadsAndStores;
+ for (BasicBlockSet &Blocks : AllBlocks) {
+ CurrentLoadsAndStores.clear();
+ if (!getLoadsAndStores(Blocks, CurrentLoadsAndStores))
+ return false;
+
+ Loop *CurLoop = LI.getLoopFor((*Blocks.begin())->front().getParent());
+ unsigned CurLoopDepth = CurLoop->getLoopDepth();
+
+ for (auto *Earlier : EarlierLoadsAndStores) {
+ Loop *EarlierLoop = LI.getLoopFor(Earlier->getParent());
+ unsigned EarlierDepth = EarlierLoop->getLoopDepth();
+ unsigned CommonLoopDepth = std::min(EarlierDepth, CurLoopDepth);
+ for (auto *Later : CurrentLoadsAndStores) {
+ if (!checkDependency(Earlier, Later, LoopDepth, CommonLoopDepth, false,
+ DI))
+ return false;
+ }
+ }
+
+ size_t NumInsts = CurrentLoadsAndStores.size();
+ for (size_t I = 0; I < NumInsts; ++I) {
+ for (size_t J = I; J < NumInsts; ++J) {
+ if (!checkDependency(CurrentLoadsAndStores[I], CurrentLoadsAndStores[J],
+ LoopDepth, CurLoopDepth, true, DI))
+ return false;
+ }
+ }
+
+ EarlierLoadsAndStores.append(CurrentLoadsAndStores.begin(),
+ CurrentLoadsAndStores.end());
+ }
+ return true;
+}
+
+static bool isEligibleLoopForm(const Loop &Root) {
+ // Root must have a child.
+ if (Root.getSubLoops().size() != 1)
+ return false;
+
+ const Loop *L = &Root;
+ do {
+ // All loops in Root need to be in simplify and rotated form.
+ if (!L->isLoopSimplifyForm())
+ return false;
+
+ if (!L->isRotatedForm())
+ return false;
+
+ if (L->getHeader()->hasAddressTaken()) {
+ LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Address taken\n");
+ return false;
+ }
+
+ unsigned SubLoopsSize = L->getSubLoops().size();
+ if (SubLoopsSize == 0)
+ return true;
+
+ // Only one child is allowed.
+ if (SubLoopsSize != 1)
+ return false;
+
+ // Only loops with a single exit block can be unrolled and jammed.
+ // The function getExitBlock() is used for this check, rather than
+ // getUniqueExitBlock() to ensure loops with mulitple exit edges are
+ // disallowed.
+ if (!L->getExitBlock()) {
+ LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; only loops with single exit "
+ "blocks can be unrolled and jammed.\n");
+ return false;
+ }
+
+ // Only loops with a single exiting block can be unrolled and jammed.
+ if (!L->getExitingBlock()) {
+ LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; only loops with single "
+ "exiting blocks can be unrolled and jammed.\n");
+ return false;
+ }
+
+ L = L->getSubLoops()[0];
+ } while (L);
+
+ return true;
+}
+
+static Loop *getInnerMostLoop(Loop *L) {
+ while (!L->getSubLoops().empty())
+ L = L->getSubLoops()[0];
+ return L;
+}
+
+bool llvm::isSafeToUnrollAndJam(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
+ DependenceInfo &DI, LoopInfo &LI) {
+ if (!isEligibleLoopForm(*L)) {
+ LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Ineligible loop form\n");
+ return false;
+ }
+
+ /* We currently handle outer loops like this:
+ |
+ ForeFirst <------\ }
+ Blocks | } ForeBlocks of L
+ ForeLast | }
+ | |
+ ... |
+ | |
+ ForeFirst <----\ | }
+ Blocks | | } ForeBlocks of a inner loop of L
+ ForeLast | | }
+ | | |
+ JamLoopFirst <\ | | }
+ Blocks | | | } JamLoopBlocks of the innermost loop
+ JamLoopLast -/ | | }
+ | | |
+ AftFirst | | }
+ Blocks | | } AftBlocks of a inner loop of L
+ AftLast ------/ | }
+ | |
+ ... |
+ | |
+ AftFirst | }
+ Blocks | } AftBlocks of L
+ AftLast --------/ }
+ |
+
+ There are (theoretically) any number of blocks in ForeBlocks, SubLoopBlocks
+ and AftBlocks, providing that there is one edge from Fores to SubLoops,
+ one edge from SubLoops to Afts and a single outer loop exit (from Afts).
+ In practice we currently limit Aft blocks to a single block, and limit
+ things further in the profitablility checks of the unroll and jam pass.
+
+ Because of the way we rearrange basic blocks, we also require that
+ the Fore blocks of L on all unrolled iterations are safe to move before the
+ blocks of the direct child of L of all iterations. So we require that the
+ phi node looping operands of ForeHeader can be moved to at least the end of
+ ForeEnd, so that we can arrange cloned Fore Blocks before the subloop and
+ match up Phi's correctly.
+
+ i.e. The old order of blocks used to be
+ (F1)1 (F2)1 J1_1 J1_2 (A2)1 (A1)1 (F1)2 (F2)2 J2_1 J2_2 (A2)2 (A1)2.
+ It needs to be safe to transform this to
+ (F1)1 (F1)2 (F2)1 (F2)2 J1_1 J1_2 J2_1 J2_2 (A2)1 (A2)2 (A1)1 (A1)2.
+
+ There are then a number of checks along the lines of no calls, no
+ exceptions, inner loop IV is consistent, etc. Note that for loops requiring
+ runtime unrolling, UnrollRuntimeLoopRemainder can also fail in
+ UnrollAndJamLoop if the trip count cannot be easily calculated.
+ */
+
+ // Split blocks into Fore/SubLoop/Aft based on dominators
+ Loop *JamLoop = getInnerMostLoop(L);
+ BasicBlockSet SubLoopBlocks;
+ DenseMap<Loop *, BasicBlockSet> ForeBlocksMap;
+ DenseMap<Loop *, BasicBlockSet> AftBlocksMap;
+ if (!partitionOuterLoopBlocks(*L, *JamLoop, SubLoopBlocks, ForeBlocksMap,
+ AftBlocksMap, DT)) {
+ LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Incompatible loop layout\n");
+ return false;
+ }
+
+ // Aft blocks may need to move instructions to fore blocks, which becomes more
+ // difficult if there are multiple (potentially conditionally executed)
+ // blocks. For now we just exclude loops with multiple aft blocks.
+ if (AftBlocksMap[L].size() != 1) {
+ LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Can't currently handle "
+ "multiple blocks after the loop\n");
+ return false;
+ }
+
+ // Check inner loop backedge count is consistent on all iterations of the
+ // outer loop
+ if (any_of(L->getLoopsInPreorder(), [&SE](Loop *SubLoop) {
+ return !hasIterationCountInvariantInParent(SubLoop, SE);
+ })) {
+ LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Inner loop iteration count is "
+ "not consistent on each iteration\n");
+ return false;
+ }
+
+ // Check the loop safety info for exceptions.
+ SimpleLoopSafetyInfo LSI;
+ LSI.computeLoopSafetyInfo(L);
+ if (LSI.anyBlockMayThrow()) {
+ LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Something may throw\n");
+ return false;
+ }
+
+ // We've ruled out the easy stuff and now need to check that there are no
+ // interdependencies which may prevent us from moving the:
+ // ForeBlocks before Subloop and AftBlocks.
+ // Subloop before AftBlocks.
+ // ForeBlock phi operands before the subloop
+
+ // Make sure we can move all instructions we need to before the subloop
+ BasicBlock *Header = L->getHeader();
+ BasicBlock *Latch = L->getLoopLatch();
+ BasicBlockSet AftBlocks = AftBlocksMap[L];
+ Loop *SubLoop = L->getSubLoops()[0];
+ if (!processHeaderPhiOperands(
+ Header, Latch, AftBlocks, [&AftBlocks, &SubLoop](Instruction *I) {
+ if (SubLoop->contains(I->getParent()))
+ return false;
+ if (AftBlocks.count(I->getParent())) {
+ // If we hit a phi node in afts we know we are done (probably
+ // LCSSA)
+ if (isa<PHINode>(I))
+ return false;
+ // Can't move instructions with side effects or memory
+ // reads/writes
+ if (I->mayHaveSideEffects() || I->mayReadOrWriteMemory())
+ return false;
+ }
+ // Keep going
+ return true;
+ })) {
+ LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; can't move required "
+ "instructions after subloop to before it\n");
+ return false;
+ }
+
+ // Check for memory dependencies which prohibit the unrolling we are doing.
+ // Because of the way we are unrolling Fore/Sub/Aft blocks, we need to check
+ // there are no dependencies between Fore-Sub, Fore-Aft, Sub-Aft and Sub-Sub.
+ if (!checkDependencies(*L, SubLoopBlocks, ForeBlocksMap, AftBlocksMap, DI,
+ LI)) {
+ LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; failed dependency check\n");
+ return false;
+ }
+
+ return true;
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/LoopUnrollRuntime.cpp
new file mode 100644
index 0000000000..b19156bcb4
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -0,0 +1,1008 @@
+//===-- UnrollLoopRuntime.cpp - Runtime Loop unrolling utilities ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements some loop unrolling utilities for loops with run-time
+// trip counts. See LoopUnroll.cpp for unrolling loops with compile-time
+// trip counts.
+//
+// The functions in this file are used to generate extra code when the
+// run-time trip count modulo the unroll factor is not 0. When this is the
+// case, we need to generate code to execute these 'left over' iterations.
+//
+// The current strategy generates an if-then-else sequence prior to the
+// unrolled loop to execute the 'left over' iterations before or after the
+// unrolled loop.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopIterator.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/ProfDataUtils.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
+#include "llvm/Transforms/Utils/UnrollLoop.h"
+#include <algorithm>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "loop-unroll"
+
+STATISTIC(NumRuntimeUnrolled,
+ "Number of loops unrolled with run-time trip counts");
+static cl::opt<bool> UnrollRuntimeMultiExit(
+ "unroll-runtime-multi-exit", cl::init(false), cl::Hidden,
+ cl::desc("Allow runtime unrolling for loops with multiple exits, when "
+ "epilog is generated"));
+static cl::opt<bool> UnrollRuntimeOtherExitPredictable(
+ "unroll-runtime-other-exit-predictable", cl::init(false), cl::Hidden,
+ cl::desc("Assume the non latch exit block to be predictable"));
+
+/// Connect the unrolling prolog code to the original loop.
+/// The unrolling prolog code contains code to execute the
+/// 'extra' iterations if the run-time trip count modulo the
+/// unroll count is non-zero.
+///
+/// This function performs the following:
+/// - Create PHI nodes at prolog end block to combine values
+/// that exit the prolog code and jump around the prolog.
+/// - Add a PHI operand to a PHI node at the loop exit block
+/// for values that exit the prolog and go around the loop.
+/// - Branch around the original loop if the trip count is less
+/// than the unroll factor.
+///
+static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
+ BasicBlock *PrologExit,
+ BasicBlock *OriginalLoopLatchExit,
+ BasicBlock *PreHeader, BasicBlock *NewPreHeader,
+ ValueToValueMapTy &VMap, DominatorTree *DT,
+ LoopInfo *LI, bool PreserveLCSSA,
+ ScalarEvolution &SE) {
+ // Loop structure should be the following:
+ // Preheader
+ // PrologHeader
+ // ...
+ // PrologLatch
+ // PrologExit
+ // NewPreheader
+ // Header
+ // ...
+ // Latch
+ // LatchExit
+ BasicBlock *Latch = L->getLoopLatch();
+ assert(Latch && "Loop must have a latch");
+ BasicBlock *PrologLatch = cast<BasicBlock>(VMap[Latch]);
+
+ // Create a PHI node for each outgoing value from the original loop
+ // (which means it is an outgoing value from the prolog code too).
+ // The new PHI node is inserted in the prolog end basic block.
+ // The new PHI node value is added as an operand of a PHI node in either
+ // the loop header or the loop exit block.
+ for (BasicBlock *Succ : successors(Latch)) {
+ for (PHINode &PN : Succ->phis()) {
+ // Add a new PHI node to the prolog end block and add the
+ // appropriate incoming values.
+ // TODO: This code assumes that the PrologExit (or the LatchExit block for
+ // prolog loop) contains only one predecessor from the loop, i.e. the
+ // PrologLatch. When supporting multiple-exiting block loops, we can have
+ // two or more blocks that have the LatchExit as the target in the
+ // original loop.
+ PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr",
+ PrologExit->getFirstNonPHI());
+ // Adding a value to the new PHI node from the original loop preheader.
+ // This is the value that skips all the prolog code.
+ if (L->contains(&PN)) {
+ // Succ is loop header.
+ NewPN->addIncoming(PN.getIncomingValueForBlock(NewPreHeader),
+ PreHeader);
+ } else {
+ // Succ is LatchExit.
+ NewPN->addIncoming(UndefValue::get(PN.getType()), PreHeader);
+ }
+
+ Value *V = PN.getIncomingValueForBlock(Latch);
+ if (Instruction *I = dyn_cast<Instruction>(V)) {
+ if (L->contains(I)) {
+ V = VMap.lookup(I);
+ }
+ }
+ // Adding a value to the new PHI node from the last prolog block
+ // that was created.
+ NewPN->addIncoming(V, PrologLatch);
+
+ // Update the existing PHI node operand with the value from the
+ // new PHI node. How this is done depends on if the existing
+ // PHI node is in the original loop block, or the exit block.
+ if (L->contains(&PN))
+ PN.setIncomingValueForBlock(NewPreHeader, NewPN);
+ else
+ PN.addIncoming(NewPN, PrologExit);
+ SE.forgetValue(&PN);
+ }
+ }
+
+ // Make sure that created prolog loop is in simplified form
+ SmallVector<BasicBlock *, 4> PrologExitPreds;
+ Loop *PrologLoop = LI->getLoopFor(PrologLatch);
+ if (PrologLoop) {
+ for (BasicBlock *PredBB : predecessors(PrologExit))
+ if (PrologLoop->contains(PredBB))
+ PrologExitPreds.push_back(PredBB);
+
+ SplitBlockPredecessors(PrologExit, PrologExitPreds, ".unr-lcssa", DT, LI,
+ nullptr, PreserveLCSSA);
+ }
+
+ // Create a branch around the original loop, which is taken if there are no
+ // iterations remaining to be executed after running the prologue.
+ Instruction *InsertPt = PrologExit->getTerminator();
+ IRBuilder<> B(InsertPt);
+
+ assert(Count != 0 && "nonsensical Count!");
+
+ // If BECount <u (Count - 1) then (BECount + 1) % Count == (BECount + 1)
+ // This means %xtraiter is (BECount + 1) and all of the iterations of this
+ // loop were executed by the prologue. Note that if BECount <u (Count - 1)
+ // then (BECount + 1) cannot unsigned-overflow.
+ Value *BrLoopExit =
+ B.CreateICmpULT(BECount, ConstantInt::get(BECount->getType(), Count - 1));
+ // Split the exit to maintain loop canonicalization guarantees
+ SmallVector<BasicBlock *, 4> Preds(predecessors(OriginalLoopLatchExit));
+ SplitBlockPredecessors(OriginalLoopLatchExit, Preds, ".unr-lcssa", DT, LI,
+ nullptr, PreserveLCSSA);
+ // Add the branch to the exit block (around the unrolled loop)
+ B.CreateCondBr(BrLoopExit, OriginalLoopLatchExit, NewPreHeader);
+ InsertPt->eraseFromParent();
+ if (DT) {
+ auto *NewDom = DT->findNearestCommonDominator(OriginalLoopLatchExit,
+ PrologExit);
+ DT->changeImmediateDominator(OriginalLoopLatchExit, NewDom);
+ }
+}
+
+/// Connect the unrolling epilog code to the original loop.
+/// The unrolling epilog code contains code to execute the
+/// 'extra' iterations if the run-time trip count modulo the
+/// unroll count is non-zero.
+///
+/// This function performs the following:
+/// - Update PHI nodes at the unrolling loop exit and epilog loop exit
+/// - Create PHI nodes at the unrolling loop exit to combine
+/// values that exit the unrolling loop code and jump around it.
+/// - Update PHI operands in the epilog loop by the new PHI nodes
+/// - Branch around the epilog loop if extra iters (ModVal) is zero.
+///
+static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
+ BasicBlock *Exit, BasicBlock *PreHeader,
+ BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader,
+ ValueToValueMapTy &VMap, DominatorTree *DT,
+ LoopInfo *LI, bool PreserveLCSSA,
+ ScalarEvolution &SE) {
+ BasicBlock *Latch = L->getLoopLatch();
+ assert(Latch && "Loop must have a latch");
+ BasicBlock *EpilogLatch = cast<BasicBlock>(VMap[Latch]);
+
+ // Loop structure should be the following:
+ //
+ // PreHeader
+ // NewPreHeader
+ // Header
+ // ...
+ // Latch
+ // NewExit (PN)
+ // EpilogPreHeader
+ // EpilogHeader
+ // ...
+ // EpilogLatch
+ // Exit (EpilogPN)
+
+ // Update PHI nodes at NewExit and Exit.
+ for (PHINode &PN : NewExit->phis()) {
+ // PN should be used in another PHI located in Exit block as
+ // Exit was split by SplitBlockPredecessors into Exit and NewExit
+ // Basically it should look like:
+ // NewExit:
+ // PN = PHI [I, Latch]
+ // ...
+ // Exit:
+ // EpilogPN = PHI [PN, EpilogPreHeader], [X, Exit2], [Y, Exit2.epil]
+ //
+ // Exits from non-latch blocks point to the original exit block and the
+ // epilogue edges have already been added.
+ //
+ // There is EpilogPreHeader incoming block instead of NewExit as
+ // NewExit was spilt 1 more time to get EpilogPreHeader.
+ assert(PN.hasOneUse() && "The phi should have 1 use");
+ PHINode *EpilogPN = cast<PHINode>(PN.use_begin()->getUser());
+ assert(EpilogPN->getParent() == Exit && "EpilogPN should be in Exit block");
+
+ // Add incoming PreHeader from branch around the Loop
+ PN.addIncoming(UndefValue::get(PN.getType()), PreHeader);
+ SE.forgetValue(&PN);
+
+ Value *V = PN.getIncomingValueForBlock(Latch);
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (I && L->contains(I))
+ // If value comes from an instruction in the loop add VMap value.
+ V = VMap.lookup(I);
+ // For the instruction out of the loop, constant or undefined value
+ // insert value itself.
+ EpilogPN->addIncoming(V, EpilogLatch);
+
+ assert(EpilogPN->getBasicBlockIndex(EpilogPreHeader) >= 0 &&
+ "EpilogPN should have EpilogPreHeader incoming block");
+ // Change EpilogPreHeader incoming block to NewExit.
+ EpilogPN->setIncomingBlock(EpilogPN->getBasicBlockIndex(EpilogPreHeader),
+ NewExit);
+ // Now PHIs should look like:
+ // NewExit:
+ // PN = PHI [I, Latch], [undef, PreHeader]
+ // ...
+ // Exit:
+ // EpilogPN = PHI [PN, NewExit], [VMap[I], EpilogLatch]
+ }
+
+ // Create PHI nodes at NewExit (from the unrolling loop Latch and PreHeader).
+ // Update corresponding PHI nodes in epilog loop.
+ for (BasicBlock *Succ : successors(Latch)) {
+ // Skip this as we already updated phis in exit blocks.
+ if (!L->contains(Succ))
+ continue;
+ for (PHINode &PN : Succ->phis()) {
+ // Add new PHI nodes to the loop exit block and update epilog
+ // PHIs with the new PHI values.
+ PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr",
+ NewExit->getFirstNonPHI());
+ // Adding a value to the new PHI node from the unrolling loop preheader.
+ NewPN->addIncoming(PN.getIncomingValueForBlock(NewPreHeader), PreHeader);
+ // Adding a value to the new PHI node from the unrolling loop latch.
+ NewPN->addIncoming(PN.getIncomingValueForBlock(Latch), Latch);
+
+ // Update the existing PHI node operand with the value from the new PHI
+ // node. Corresponding instruction in epilog loop should be PHI.
+ PHINode *VPN = cast<PHINode>(VMap[&PN]);
+ VPN->setIncomingValueForBlock(EpilogPreHeader, NewPN);
+ }
+ }
+
+ Instruction *InsertPt = NewExit->getTerminator();
+ IRBuilder<> B(InsertPt);
+ Value *BrLoopExit = B.CreateIsNotNull(ModVal, "lcmp.mod");
+ assert(Exit && "Loop must have a single exit block only");
+ // Split the epilogue exit to maintain loop canonicalization guarantees
+ SmallVector<BasicBlock*, 4> Preds(predecessors(Exit));
+ SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI, nullptr,
+ PreserveLCSSA);
+ // Add the branch to the exit block (around the unrolling loop)
+ B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit);
+ InsertPt->eraseFromParent();
+ if (DT) {
+ auto *NewDom = DT->findNearestCommonDominator(Exit, NewExit);
+ DT->changeImmediateDominator(Exit, NewDom);
+ }
+
+ // Split the main loop exit to maintain canonicalization guarantees.
+ SmallVector<BasicBlock*, 4> NewExitPreds{Latch};
+ SplitBlockPredecessors(NewExit, NewExitPreds, ".loopexit", DT, LI, nullptr,
+ PreserveLCSSA);
+}
+
+/// Create a clone of the blocks in a loop and connect them together. A new
+/// loop will be created including all cloned blocks, and the iterator of the
+/// new loop switched to count NewIter down to 0.
+/// The cloned blocks should be inserted between InsertTop and InsertBot.
+/// InsertTop should be new preheader, InsertBot new loop exit.
+/// Returns the new cloned loop that is created.
+static Loop *
+CloneLoopBlocks(Loop *L, Value *NewIter, const bool UseEpilogRemainder,
+ const bool UnrollRemainder,
+ BasicBlock *InsertTop,
+ BasicBlock *InsertBot, BasicBlock *Preheader,
+ std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
+ ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI) {
+ StringRef suffix = UseEpilogRemainder ? "epil" : "prol";
+ BasicBlock *Header = L->getHeader();
+ BasicBlock *Latch = L->getLoopLatch();
+ Function *F = Header->getParent();
+ LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
+ LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
+ Loop *ParentLoop = L->getParentLoop();
+ NewLoopsMap NewLoops;
+ NewLoops[ParentLoop] = ParentLoop;
+
+ // For each block in the original loop, create a new copy,
+ // and update the value map with the newly created values.
+ for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
+ BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F);
+ NewBlocks.push_back(NewBB);
+
+ addClonedBlockToLoopInfo(*BB, NewBB, LI, NewLoops);
+
+ VMap[*BB] = NewBB;
+ if (Header == *BB) {
+ // For the first block, add a CFG connection to this newly
+ // created block.
+ InsertTop->getTerminator()->setSuccessor(0, NewBB);
+ }
+
+ if (DT) {
+ if (Header == *BB) {
+ // The header is dominated by the preheader.
+ DT->addNewBlock(NewBB, InsertTop);
+ } else {
+ // Copy information from original loop to unrolled loop.
+ BasicBlock *IDomBB = DT->getNode(*BB)->getIDom()->getBlock();
+ DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDomBB]));
+ }
+ }
+
+ if (Latch == *BB) {
+ // For the last block, create a loop back to cloned head.
+ VMap.erase((*BB)->getTerminator());
+ // Use an incrementing IV. Pre-incr/post-incr is backedge/trip count.
+ // Subtle: NewIter can be 0 if we wrapped when computing the trip count,
+ // thus we must compare the post-increment (wrapping) value.
+ BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]);
+ BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator());
+ IRBuilder<> Builder(LatchBR);
+ PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2,
+ suffix + ".iter",
+ FirstLoopBB->getFirstNonPHI());
+ auto *Zero = ConstantInt::get(NewIdx->getType(), 0);
+ auto *One = ConstantInt::get(NewIdx->getType(), 1);
+ Value *IdxNext = Builder.CreateAdd(NewIdx, One, NewIdx->getName() + ".next");
+ Value *IdxCmp = Builder.CreateICmpNE(IdxNext, NewIter, NewIdx->getName() + ".cmp");
+ Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot);
+ NewIdx->addIncoming(Zero, InsertTop);
+ NewIdx->addIncoming(IdxNext, NewBB);
+ LatchBR->eraseFromParent();
+ }
+ }
+
+ // Change the incoming values to the ones defined in the preheader or
+ // cloned loop.
+ for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
+ PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
+ unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
+ NewPHI->setIncomingBlock(idx, InsertTop);
+ BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
+ idx = NewPHI->getBasicBlockIndex(Latch);
+ Value *InVal = NewPHI->getIncomingValue(idx);
+ NewPHI->setIncomingBlock(idx, NewLatch);
+ if (Value *V = VMap.lookup(InVal))
+ NewPHI->setIncomingValue(idx, V);
+ }
+
+ Loop *NewLoop = NewLoops[L];
+ assert(NewLoop && "L should have been cloned");
+ MDNode *LoopID = NewLoop->getLoopID();
+
+ // Only add loop metadata if the loop is not going to be completely
+ // unrolled.
+ if (UnrollRemainder)
+ return NewLoop;
+
+ std::optional<MDNode *> NewLoopID = makeFollowupLoopID(
+ LoopID, {LLVMLoopUnrollFollowupAll, LLVMLoopUnrollFollowupRemainder});
+ if (NewLoopID) {
+ NewLoop->setLoopID(*NewLoopID);
+
+ // Do not setLoopAlreadyUnrolled if loop attributes have been defined
+ // explicitly.
+ return NewLoop;
+ }
+
+ // Add unroll disable metadata to disable future unrolling for this loop.
+ NewLoop->setLoopAlreadyUnrolled();
+ return NewLoop;
+}
+
+/// Returns true if we can profitably unroll the multi-exit loop L. Currently,
+/// we return true only if UnrollRuntimeMultiExit is set to true.
+static bool canProfitablyUnrollMultiExitLoop(
+ Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits, BasicBlock *LatchExit,
+ bool UseEpilogRemainder) {
+
+ // Priority goes to UnrollRuntimeMultiExit if it's supplied.
+ if (UnrollRuntimeMultiExit.getNumOccurrences())
+ return UnrollRuntimeMultiExit;
+
+ // The main pain point with multi-exit loop unrolling is that once unrolled,
+ // we will not be able to merge all blocks into a straight line code.
+ // There are branches within the unrolled loop that go to the OtherExits.
+ // The second point is the increase in code size, but this is true
+ // irrespective of multiple exits.
+
+ // Note: Both the heuristics below are coarse grained. We are essentially
+ // enabling unrolling of loops that have a single side exit other than the
+ // normal LatchExit (i.e. exiting into a deoptimize block).
+ // The heuristics considered are:
+ // 1. low number of branches in the unrolled version.
+ // 2. high predictability of these extra branches.
+ // We avoid unrolling loops that have more than two exiting blocks. This
+ // limits the total number of branches in the unrolled loop to be atmost
+ // the unroll factor (since one of the exiting blocks is the latch block).
+ SmallVector<BasicBlock*, 4> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+ if (ExitingBlocks.size() > 2)
+ return false;
+
+ // Allow unrolling of loops with no non latch exit blocks.
+ if (OtherExits.size() == 0)
+ return true;
+
+ // The second heuristic is that L has one exit other than the latchexit and
+ // that exit is a deoptimize block. We know that deoptimize blocks are rarely
+ // taken, which also implies the branch leading to the deoptimize block is
+ // highly predictable. When UnrollRuntimeOtherExitPredictable is specified, we
+ // assume the other exit branch is predictable even if it has no deoptimize
+ // call.
+ return (OtherExits.size() == 1 &&
+ (UnrollRuntimeOtherExitPredictable ||
+ OtherExits[0]->getTerminatingDeoptimizeCall()));
+ // TODO: These can be fine-tuned further to consider code size or deopt states
+ // that are captured by the deoptimize exit block.
+ // Also, we can extend this to support more cases, if we actually
+ // know of kinds of multiexit loops that would benefit from unrolling.
+}
+
+// Assign the maximum possible trip count as the back edge weight for the
+// remainder loop if the original loop comes with a branch weight.
+static void updateLatchBranchWeightsForRemainderLoop(Loop *OrigLoop,
+ Loop *RemainderLoop,
+ uint64_t UnrollFactor) {
+ uint64_t TrueWeight, FalseWeight;
+ BranchInst *LatchBR =
+ cast<BranchInst>(OrigLoop->getLoopLatch()->getTerminator());
+ if (!extractBranchWeights(*LatchBR, TrueWeight, FalseWeight))
+ return;
+ uint64_t ExitWeight = LatchBR->getSuccessor(0) == OrigLoop->getHeader()
+ ? FalseWeight
+ : TrueWeight;
+ assert(UnrollFactor > 1);
+ uint64_t BackEdgeWeight = (UnrollFactor - 1) * ExitWeight;
+ BasicBlock *Header = RemainderLoop->getHeader();
+ BasicBlock *Latch = RemainderLoop->getLoopLatch();
+ auto *RemainderLatchBR = cast<BranchInst>(Latch->getTerminator());
+ unsigned HeaderIdx = (RemainderLatchBR->getSuccessor(0) == Header ? 0 : 1);
+ MDBuilder MDB(RemainderLatchBR->getContext());
+ MDNode *WeightNode =
+ HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight)
+ : MDB.createBranchWeights(BackEdgeWeight, ExitWeight);
+ RemainderLatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
+}
+
+/// Calculate ModVal = (BECount + 1) % Count on the abstract integer domain
+/// accounting for the possibility of unsigned overflow in the 2s complement
+/// domain. Preconditions:
+/// 1) TripCount = BECount + 1 (allowing overflow)
+/// 2) Log2(Count) <= BitWidth(BECount)
+static Value *CreateTripRemainder(IRBuilder<> &B, Value *BECount,
+ Value *TripCount, unsigned Count) {
+ // Note that TripCount is BECount + 1.
+ if (isPowerOf2_32(Count))
+ // If the expression is zero, then either:
+ // 1. There are no iterations to be run in the prolog/epilog loop.
+ // OR
+ // 2. The addition computing TripCount overflowed.
+ //
+ // If (2) is true, we know that TripCount really is (1 << BEWidth) and so
+ // the number of iterations that remain to be run in the original loop is a
+ // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (a
+ // precondition of this method).
+ return B.CreateAnd(TripCount, Count - 1, "xtraiter");
+
+ // As (BECount + 1) can potentially unsigned overflow we count
+ // (BECount % Count) + 1 which is overflow safe as BECount % Count < Count.
+ Constant *CountC = ConstantInt::get(BECount->getType(), Count);
+ Value *ModValTmp = B.CreateURem(BECount, CountC);
+ Value *ModValAdd = B.CreateAdd(ModValTmp,
+ ConstantInt::get(ModValTmp->getType(), 1));
+ // At that point (BECount % Count) + 1 could be equal to Count.
+ // To handle this case we need to take mod by Count one more time.
+ return B.CreateURem(ModValAdd, CountC, "xtraiter");
+}
+
+
+/// Insert code in the prolog/epilog code when unrolling a loop with a
+/// run-time trip-count.
+///
+/// This method assumes that the loop unroll factor is total number
+/// of loop bodies in the loop after unrolling. (Some folks refer
+/// to the unroll factor as the number of *extra* copies added).
+/// We assume also that the loop unroll factor is a power-of-two. So, after
+/// unrolling the loop, the number of loop bodies executed is 2,
+/// 4, 8, etc. Note - LLVM converts the if-then-sequence to a switch
+/// instruction in SimplifyCFG.cpp. Then, the backend decides how code for
+/// the switch instruction is generated.
+///
+/// ***Prolog case***
+/// extraiters = tripcount % loopfactor
+/// if (extraiters == 0) jump Loop:
+/// else jump Prol:
+/// Prol: LoopBody;
+/// extraiters -= 1 // Omitted if unroll factor is 2.
+/// if (extraiters != 0) jump Prol: // Omitted if unroll factor is 2.
+/// if (tripcount < loopfactor) jump End:
+/// Loop:
+/// ...
+/// End:
+///
+/// ***Epilog case***
+/// extraiters = tripcount % loopfactor
+/// if (tripcount < loopfactor) jump LoopExit:
+/// unroll_iters = tripcount - extraiters
+/// Loop: LoopBody; (executes unroll_iter times);
+/// unroll_iter -= 1
+/// if (unroll_iter != 0) jump Loop:
+/// LoopExit:
+/// if (extraiters == 0) jump EpilExit:
+/// Epil: LoopBody; (executes extraiters times)
+/// extraiters -= 1 // Omitted if unroll factor is 2.
+/// if (extraiters != 0) jump Epil: // Omitted if unroll factor is 2.
+/// EpilExit:
+
+bool llvm::UnrollRuntimeLoopRemainder(
+ Loop *L, unsigned Count, bool AllowExpensiveTripCount,
+ bool UseEpilogRemainder, bool UnrollRemainder, bool ForgetAllSCEV,
+ LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
+ const TargetTransformInfo *TTI, bool PreserveLCSSA, Loop **ResultLoop) {
+ LLVM_DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n");
+ LLVM_DEBUG(L->dump());
+ LLVM_DEBUG(UseEpilogRemainder ? dbgs() << "Using epilog remainder.\n"
+ : dbgs() << "Using prolog remainder.\n");
+
+ // Make sure the loop is in canonical form.
+ if (!L->isLoopSimplifyForm()) {
+ LLVM_DEBUG(dbgs() << "Not in simplify form!\n");
+ return false;
+ }
+
+ // Guaranteed by LoopSimplifyForm.
+ BasicBlock *Latch = L->getLoopLatch();
+ BasicBlock *Header = L->getHeader();
+
+ BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
+
+ if (!LatchBR || LatchBR->isUnconditional()) {
+ // The loop-rotate pass can be helpful to avoid this in many cases.
+ LLVM_DEBUG(
+ dbgs()
+ << "Loop latch not terminated by a conditional branch.\n");
+ return false;
+ }
+
+ unsigned ExitIndex = LatchBR->getSuccessor(0) == Header ? 1 : 0;
+ BasicBlock *LatchExit = LatchBR->getSuccessor(ExitIndex);
+
+ if (L->contains(LatchExit)) {
+ // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the
+ // targets of the Latch be an exit block out of the loop.
+ LLVM_DEBUG(
+ dbgs()
+ << "One of the loop latch successors must be the exit block.\n");
+ return false;
+ }
+
+ // These are exit blocks other than the target of the latch exiting block.
+ SmallVector<BasicBlock *, 4> OtherExits;
+ L->getUniqueNonLatchExitBlocks(OtherExits);
+ // Support only single exit and exiting block unless multi-exit loop
+ // unrolling is enabled.
+ if (!L->getExitingBlock() || OtherExits.size()) {
+ // We rely on LCSSA form being preserved when the exit blocks are transformed.
+ // (Note that only an off-by-default mode of the old PM disables PreserveLCCA.)
+ if (!PreserveLCSSA)
+ return false;
+
+ if (!canProfitablyUnrollMultiExitLoop(L, OtherExits, LatchExit,
+ UseEpilogRemainder)) {
+ LLVM_DEBUG(
+ dbgs()
+ << "Multiple exit/exiting blocks in loop and multi-exit unrolling not "
+ "enabled!\n");
+ return false;
+ }
+ }
+ // Use Scalar Evolution to compute the trip count. This allows more loops to
+ // be unrolled than relying on induction var simplification.
+ if (!SE)
+ return false;
+
+ // Only unroll loops with a computable trip count.
+ // We calculate the backedge count by using getExitCount on the Latch block,
+ // which is proven to be the only exiting block in this loop. This is same as
+ // calculating getBackedgeTakenCount on the loop (which computes SCEV for all
+ // exiting blocks).
+ const SCEV *BECountSC = SE->getExitCount(L, Latch);
+ if (isa<SCEVCouldNotCompute>(BECountSC)) {
+ LLVM_DEBUG(dbgs() << "Could not compute exit block SCEV\n");
+ return false;
+ }
+
+ unsigned BEWidth = cast<IntegerType>(BECountSC->getType())->getBitWidth();
+
+ // Add 1 since the backedge count doesn't include the first loop iteration.
+ // (Note that overflow can occur, this is handled explicitly below)
+ const SCEV *TripCountSC =
+ SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1));
+ if (isa<SCEVCouldNotCompute>(TripCountSC)) {
+ LLVM_DEBUG(dbgs() << "Could not compute trip count SCEV.\n");
+ return false;
+ }
+
+ BasicBlock *PreHeader = L->getLoopPreheader();
+ BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
+ const DataLayout &DL = Header->getModule()->getDataLayout();
+ SCEVExpander Expander(*SE, DL, "loop-unroll");
+ if (!AllowExpensiveTripCount &&
+ Expander.isHighCostExpansion(TripCountSC, L, SCEVCheapExpansionBudget,
+ TTI, PreHeaderBR)) {
+ LLVM_DEBUG(dbgs() << "High cost for expanding trip count scev!\n");
+ return false;
+ }
+
+ // This constraint lets us deal with an overflowing trip count easily; see the
+ // comment on ModVal below.
+ if (Log2_32(Count) > BEWidth) {
+ LLVM_DEBUG(
+ dbgs()
+ << "Count failed constraint on overflow trip count calculation.\n");
+ return false;
+ }
+
+ // Loop structure is the following:
+ //
+ // PreHeader
+ // Header
+ // ...
+ // Latch
+ // LatchExit
+
+ BasicBlock *NewPreHeader;
+ BasicBlock *NewExit = nullptr;
+ BasicBlock *PrologExit = nullptr;
+ BasicBlock *EpilogPreHeader = nullptr;
+ BasicBlock *PrologPreHeader = nullptr;
+
+ if (UseEpilogRemainder) {
+ // If epilog remainder
+ // Split PreHeader to insert a branch around loop for unrolling.
+ NewPreHeader = SplitBlock(PreHeader, PreHeader->getTerminator(), DT, LI);
+ NewPreHeader->setName(PreHeader->getName() + ".new");
+ // Split LatchExit to create phi nodes from branch above.
+ NewExit = SplitBlockPredecessors(LatchExit, {Latch}, ".unr-lcssa", DT, LI,
+ nullptr, PreserveLCSSA);
+ // NewExit gets its DebugLoc from LatchExit, which is not part of the
+ // original Loop.
+ // Fix this by setting Loop's DebugLoc to NewExit.
+ auto *NewExitTerminator = NewExit->getTerminator();
+ NewExitTerminator->setDebugLoc(Header->getTerminator()->getDebugLoc());
+ // Split NewExit to insert epilog remainder loop.
+ EpilogPreHeader = SplitBlock(NewExit, NewExitTerminator, DT, LI);
+ EpilogPreHeader->setName(Header->getName() + ".epil.preheader");
+
+ // If the latch exits from multiple level of nested loops, then
+ // by assumption there must be another loop exit which branches to the
+ // outer loop and we must adjust the loop for the newly inserted blocks
+ // to account for the fact that our epilogue is still in the same outer
+ // loop. Note that this leaves loopinfo temporarily out of sync with the
+ // CFG until the actual epilogue loop is inserted.
+ if (auto *ParentL = L->getParentLoop())
+ if (LI->getLoopFor(LatchExit) != ParentL) {
+ LI->removeBlock(NewExit);
+ ParentL->addBasicBlockToLoop(NewExit, *LI);
+ LI->removeBlock(EpilogPreHeader);
+ ParentL->addBasicBlockToLoop(EpilogPreHeader, *LI);
+ }
+
+ } else {
+ // If prolog remainder
+ // Split the original preheader twice to insert prolog remainder loop
+ PrologPreHeader = SplitEdge(PreHeader, Header, DT, LI);
+ PrologPreHeader->setName(Header->getName() + ".prol.preheader");
+ PrologExit = SplitBlock(PrologPreHeader, PrologPreHeader->getTerminator(),
+ DT, LI);
+ PrologExit->setName(Header->getName() + ".prol.loopexit");
+ // Split PrologExit to get NewPreHeader.
+ NewPreHeader = SplitBlock(PrologExit, PrologExit->getTerminator(), DT, LI);
+ NewPreHeader->setName(PreHeader->getName() + ".new");
+ }
+ // Loop structure should be the following:
+ // Epilog Prolog
+ //
+ // PreHeader PreHeader
+ // *NewPreHeader *PrologPreHeader
+ // Header *PrologExit
+ // ... *NewPreHeader
+ // Latch Header
+ // *NewExit ...
+ // *EpilogPreHeader Latch
+ // LatchExit LatchExit
+
+ // Calculate conditions for branch around loop for unrolling
+ // in epilog case and around prolog remainder loop in prolog case.
+ // Compute the number of extra iterations required, which is:
+ // extra iterations = run-time trip count % loop unroll factor
+ PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
+ IRBuilder<> B(PreHeaderBR);
+ Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),
+ PreHeaderBR);
+ Value *BECount;
+ // If there are other exits before the latch, that may cause the latch exit
+ // branch to never be executed, and the latch exit count may be poison.
+ // In this case, freeze the TripCount and base BECount on the frozen
+ // TripCount. We will introduce two branches using these values, and it's
+ // important that they see a consistent value (which would not be guaranteed
+ // if were frozen independently.)
+ if ((!OtherExits.empty() || !SE->loopHasNoAbnormalExits(L)) &&
+ !isGuaranteedNotToBeUndefOrPoison(TripCount, AC, PreHeaderBR, DT)) {
+ TripCount = B.CreateFreeze(TripCount);
+ BECount =
+ B.CreateAdd(TripCount, ConstantInt::get(TripCount->getType(), -1));
+ } else {
+ // If we don't need to freeze, use SCEVExpander for BECount as well, to
+ // allow slightly better value reuse.
+ BECount =
+ Expander.expandCodeFor(BECountSC, BECountSC->getType(), PreHeaderBR);
+ }
+
+ Value * const ModVal = CreateTripRemainder(B, BECount, TripCount, Count);
+
+ Value *BranchVal =
+ UseEpilogRemainder ? B.CreateICmpULT(BECount,
+ ConstantInt::get(BECount->getType(),
+ Count - 1)) :
+ B.CreateIsNotNull(ModVal, "lcmp.mod");
+ BasicBlock *RemainderLoop = UseEpilogRemainder ? NewExit : PrologPreHeader;
+ BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit;
+ // Branch to either remainder (extra iterations) loop or unrolling loop.
+ B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop);
+ PreHeaderBR->eraseFromParent();
+ if (DT) {
+ if (UseEpilogRemainder)
+ DT->changeImmediateDominator(NewExit, PreHeader);
+ else
+ DT->changeImmediateDominator(PrologExit, PreHeader);
+ }
+ Function *F = Header->getParent();
+ // Get an ordered list of blocks in the loop to help with the ordering of the
+ // cloned blocks in the prolog/epilog code
+ LoopBlocksDFS LoopBlocks(L);
+ LoopBlocks.perform(LI);
+
+ //
+ // For each extra loop iteration, create a copy of the loop's basic blocks
+ // and generate a condition that branches to the copy depending on the
+ // number of 'left over' iterations.
+ //
+ std::vector<BasicBlock *> NewBlocks;
+ ValueToValueMapTy VMap;
+
+ // Clone all the basic blocks in the loop. If Count is 2, we don't clone
+ // the loop, otherwise we create a cloned loop to execute the extra
+ // iterations. This function adds the appropriate CFG connections.
+ BasicBlock *InsertBot = UseEpilogRemainder ? LatchExit : PrologExit;
+ BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;
+ Loop *remainderLoop = CloneLoopBlocks(
+ L, ModVal, UseEpilogRemainder, UnrollRemainder, InsertTop, InsertBot,
+ NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI);
+
+ // Assign the maximum possible trip count as the back edge weight for the
+ // remainder loop if the original loop comes with a branch weight.
+ if (remainderLoop && !UnrollRemainder)
+ updateLatchBranchWeightsForRemainderLoop(L, remainderLoop, Count);
+
+ // Insert the cloned blocks into the function.
+ F->splice(InsertBot->getIterator(), F, NewBlocks[0]->getIterator(), F->end());
+
+ // Now the loop blocks are cloned and the other exiting blocks from the
+ // remainder are connected to the original Loop's exit blocks. The remaining
+ // work is to update the phi nodes in the original loop, and take in the
+ // values from the cloned region.
+ for (auto *BB : OtherExits) {
+ // Given we preserve LCSSA form, we know that the values used outside the
+ // loop will be used through these phi nodes at the exit blocks that are
+ // transformed below.
+ for (PHINode &PN : BB->phis()) {
+ unsigned oldNumOperands = PN.getNumIncomingValues();
+ // Add the incoming values from the remainder code to the end of the phi
+ // node.
+ for (unsigned i = 0; i < oldNumOperands; i++){
+ auto *PredBB =PN.getIncomingBlock(i);
+ if (PredBB == Latch)
+ // The latch exit is handled seperately, see connectX
+ continue;
+ if (!L->contains(PredBB))
+ // Even if we had dedicated exits, the code above inserted an
+ // extra branch which can reach the latch exit.
+ continue;
+
+ auto *V = PN.getIncomingValue(i);
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ if (L->contains(I))
+ V = VMap.lookup(I);
+ PN.addIncoming(V, cast<BasicBlock>(VMap[PredBB]));
+ }
+ }
+#if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG)
+ for (BasicBlock *SuccBB : successors(BB)) {
+ assert(!(llvm::is_contained(OtherExits, SuccBB) || SuccBB == LatchExit) &&
+ "Breaks the definition of dedicated exits!");
+ }
+#endif
+ }
+
+ // Update the immediate dominator of the exit blocks and blocks that are
+ // reachable from the exit blocks. This is needed because we now have paths
+ // from both the original loop and the remainder code reaching the exit
+ // blocks. While the IDom of these exit blocks were from the original loop,
+ // now the IDom is the preheader (which decides whether the original loop or
+ // remainder code should run).
+ if (DT && !L->getExitingBlock()) {
+ SmallVector<BasicBlock *, 16> ChildrenToUpdate;
+ // NB! We have to examine the dom children of all loop blocks, not just
+ // those which are the IDom of the exit blocks. This is because blocks
+ // reachable from the exit blocks can have their IDom as the nearest common
+ // dominator of the exit blocks.
+ for (auto *BB : L->blocks()) {
+ auto *DomNodeBB = DT->getNode(BB);
+ for (auto *DomChild : DomNodeBB->children()) {
+ auto *DomChildBB = DomChild->getBlock();
+ if (!L->contains(LI->getLoopFor(DomChildBB)))
+ ChildrenToUpdate.push_back(DomChildBB);
+ }
+ }
+ for (auto *BB : ChildrenToUpdate)
+ DT->changeImmediateDominator(BB, PreHeader);
+ }
+
+ // Loop structure should be the following:
+ // Epilog Prolog
+ //
+ // PreHeader PreHeader
+ // NewPreHeader PrologPreHeader
+ // Header PrologHeader
+ // ... ...
+ // Latch PrologLatch
+ // NewExit PrologExit
+ // EpilogPreHeader NewPreHeader
+ // EpilogHeader Header
+ // ... ...
+ // EpilogLatch Latch
+ // LatchExit LatchExit
+
+ // Rewrite the cloned instruction operands to use the values created when the
+ // clone is created.
+ for (BasicBlock *BB : NewBlocks) {
+ for (Instruction &I : *BB) {
+ RemapInstruction(&I, VMap,
+ RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
+ }
+ }
+
+ if (UseEpilogRemainder) {
+ // Connect the epilog code to the original loop and update the
+ // PHI functions.
+ ConnectEpilog(L, ModVal, NewExit, LatchExit, PreHeader, EpilogPreHeader,
+ NewPreHeader, VMap, DT, LI, PreserveLCSSA, *SE);
+
+ // Update counter in loop for unrolling.
+ // Use an incrementing IV. Pre-incr/post-incr is backedge/trip count.
+ // Subtle: TestVal can be 0 if we wrapped when computing the trip count,
+ // thus we must compare the post-increment (wrapping) value.
+ IRBuilder<> B2(NewPreHeader->getTerminator());
+ Value *TestVal = B2.CreateSub(TripCount, ModVal, "unroll_iter");
+ BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
+ PHINode *NewIdx = PHINode::Create(TestVal->getType(), 2, "niter",
+ Header->getFirstNonPHI());
+ B2.SetInsertPoint(LatchBR);
+ auto *Zero = ConstantInt::get(NewIdx->getType(), 0);
+ auto *One = ConstantInt::get(NewIdx->getType(), 1);
+ Value *IdxNext = B2.CreateAdd(NewIdx, One, NewIdx->getName() + ".next");
+ auto Pred = LatchBR->getSuccessor(0) == Header ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
+ Value *IdxCmp = B2.CreateICmp(Pred, IdxNext, TestVal, NewIdx->getName() + ".ncmp");
+ NewIdx->addIncoming(Zero, NewPreHeader);
+ NewIdx->addIncoming(IdxNext, Latch);
+ LatchBR->setCondition(IdxCmp);
+ } else {
+ // Connect the prolog code to the original loop and update the
+ // PHI functions.
+ ConnectProlog(L, BECount, Count, PrologExit, LatchExit, PreHeader,
+ NewPreHeader, VMap, DT, LI, PreserveLCSSA, *SE);
+ }
+
+ // If this loop is nested, then the loop unroller changes the code in the any
+ // of its parent loops, so the Scalar Evolution pass needs to be run again.
+ SE->forgetTopmostLoop(L);
+
+ // Verify that the Dom Tree and Loop Info are correct.
+#if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG)
+ if (DT) {
+ assert(DT->verify(DominatorTree::VerificationLevel::Full));
+ LI->verify(*DT);
+ }
+#endif
+
+ // For unroll factor 2 remainder loop will have 1 iteration.
+ if (Count == 2 && DT && LI && SE) {
+ // TODO: This code could probably be pulled out into a helper function
+ // (e.g. breakLoopBackedgeAndSimplify) and reused in loop-deletion.
+ BasicBlock *RemainderLatch = remainderLoop->getLoopLatch();
+ assert(RemainderLatch);
+ SmallVector<BasicBlock*> RemainderBlocks(remainderLoop->getBlocks().begin(),
+ remainderLoop->getBlocks().end());
+ breakLoopBackedge(remainderLoop, *DT, *SE, *LI, nullptr);
+ remainderLoop = nullptr;
+
+ // Simplify loop values after breaking the backedge
+ const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
+ SmallVector<WeakTrackingVH, 16> DeadInsts;
+ for (BasicBlock *BB : RemainderBlocks) {
+ for (Instruction &Inst : llvm::make_early_inc_range(*BB)) {
+ if (Value *V = simplifyInstruction(&Inst, {DL, nullptr, DT, AC}))
+ if (LI->replacementPreservesLCSSAForm(&Inst, V))
+ Inst.replaceAllUsesWith(V);
+ if (isInstructionTriviallyDead(&Inst))
+ DeadInsts.emplace_back(&Inst);
+ }
+ // We can't do recursive deletion until we're done iterating, as we might
+ // have a phi which (potentially indirectly) uses instructions later in
+ // the block we're iterating through.
+ RecursivelyDeleteTriviallyDeadInstructions(DeadInsts);
+ }
+
+ // Merge latch into exit block.
+ auto *ExitBB = RemainderLatch->getSingleSuccessor();
+ assert(ExitBB && "required after breaking cond br backedge");
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
+ MergeBlockIntoPredecessor(ExitBB, &DTU, LI);
+ }
+
+ // Canonicalize to LoopSimplifyForm both original and remainder loops. We
+ // cannot rely on the LoopUnrollPass to do this because it only does
+ // canonicalization for parent/subloops and not the sibling loops.
+ if (OtherExits.size() > 0) {
+ // Generate dedicated exit blocks for the original loop, to preserve
+ // LoopSimplifyForm.
+ formDedicatedExitBlocks(L, DT, LI, nullptr, PreserveLCSSA);
+ // Generate dedicated exit blocks for the remainder loop if one exists, to
+ // preserve LoopSimplifyForm.
+ if (remainderLoop)
+ formDedicatedExitBlocks(remainderLoop, DT, LI, nullptr, PreserveLCSSA);
+ }
+
+ auto UnrollResult = LoopUnrollResult::Unmodified;
+ if (remainderLoop && UnrollRemainder) {
+ LLVM_DEBUG(dbgs() << "Unrolling remainder loop\n");
+ UnrollResult =
+ UnrollLoop(remainderLoop,
+ {/*Count*/ Count - 1, /*Force*/ false, /*Runtime*/ false,
+ /*AllowExpensiveTripCount*/ false,
+ /*UnrollRemainder*/ false, ForgetAllSCEV},
+ LI, SE, DT, AC, TTI, /*ORE*/ nullptr, PreserveLCSSA);
+ }
+
+ if (ResultLoop && UnrollResult != LoopUnrollResult::FullyUnrolled)
+ *ResultLoop = remainderLoop;
+ NumRuntimeUnrolled++;
+ return true;
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/LoopUtils.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/LoopUtils.cpp
new file mode 100644
index 0000000000..7df8651ede
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/LoopUtils.cpp
@@ -0,0 +1,1877 @@
+//===-- LoopUtils.cpp - Loop Utility functions -------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines common loop utility functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/PriorityWorklist.h"
+#include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/InstSimplifyFolder.h"
+#include "llvm/Analysis/LoopAccessAnalysis.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/ProfDataUtils.h"
+#include "llvm/IR/ValueHandle.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
+
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+#define DEBUG_TYPE "loop-utils"
+
+static const char *LLVMLoopDisableNonforced = "llvm.loop.disable_nonforced";
+static const char *LLVMLoopDisableLICM = "llvm.licm.disable";
+
+bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI,
+ MemorySSAUpdater *MSSAU,
+ bool PreserveLCSSA) {
+ bool Changed = false;
+
+ // We re-use a vector for the in-loop predecesosrs.
+ SmallVector<BasicBlock *, 4> InLoopPredecessors;
+
+ auto RewriteExit = [&](BasicBlock *BB) {
+ assert(InLoopPredecessors.empty() &&
+ "Must start with an empty predecessors list!");
+ auto Cleanup = make_scope_exit([&] { InLoopPredecessors.clear(); });
+
+ // See if there are any non-loop predecessors of this exit block and
+ // keep track of the in-loop predecessors.
+ bool IsDedicatedExit = true;
+ for (auto *PredBB : predecessors(BB))
+ if (L->contains(PredBB)) {
+ if (isa<IndirectBrInst>(PredBB->getTerminator()))
+ // We cannot rewrite exiting edges from an indirectbr.
+ return false;
+
+ InLoopPredecessors.push_back(PredBB);
+ } else {
+ IsDedicatedExit = false;
+ }
+
+ assert(!InLoopPredecessors.empty() && "Must have *some* loop predecessor!");
+
+ // Nothing to do if this is already a dedicated exit.
+ if (IsDedicatedExit)
+ return false;
+
+ auto *NewExitBB = SplitBlockPredecessors(
+ BB, InLoopPredecessors, ".loopexit", DT, LI, MSSAU, PreserveLCSSA);
+
+ if (!NewExitBB)
+ LLVM_DEBUG(
+ dbgs() << "WARNING: Can't create a dedicated exit block for loop: "
+ << *L << "\n");
+ else
+ LLVM_DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block "
+ << NewExitBB->getName() << "\n");
+ return true;
+ };
+
+ // Walk the exit blocks directly rather than building up a data structure for
+ // them, but only visit each one once.
+ SmallPtrSet<BasicBlock *, 4> Visited;
+ for (auto *BB : L->blocks())
+ for (auto *SuccBB : successors(BB)) {
+ // We're looking for exit blocks so skip in-loop successors.
+ if (L->contains(SuccBB))
+ continue;
+
+ // Visit each exit block exactly once.
+ if (!Visited.insert(SuccBB).second)
+ continue;
+
+ Changed |= RewriteExit(SuccBB);
+ }
+
+ return Changed;
+}
+
+/// Returns the instructions that use values defined in the loop.
+SmallVector<Instruction *, 8> llvm::findDefsUsedOutsideOfLoop(Loop *L) {
+ SmallVector<Instruction *, 8> UsedOutside;
+
+ for (auto *Block : L->getBlocks())
+ // FIXME: I believe that this could use copy_if if the Inst reference could
+ // be adapted into a pointer.
+ for (auto &Inst : *Block) {
+ auto Users = Inst.users();
+ if (any_of(Users, [&](User *U) {
+ auto *Use = cast<Instruction>(U);
+ return !L->contains(Use->getParent());
+ }))
+ UsedOutside.push_back(&Inst);
+ }
+
+ return UsedOutside;
+}
+
+void llvm::getLoopAnalysisUsage(AnalysisUsage &AU) {
+ // By definition, all loop passes need the LoopInfo analysis and the
+ // Dominator tree it depends on. Because they all participate in the loop
+ // pass manager, they must also preserve these.
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
+
+ // We must also preserve LoopSimplify and LCSSA. We locally access their IDs
+ // here because users shouldn't directly get them from this header.
+ extern char &LoopSimplifyID;
+ extern char &LCSSAID;
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addRequiredID(LCSSAID);
+ AU.addPreservedID(LCSSAID);
+ // This is used in the LPPassManager to perform LCSSA verification on passes
+ // which preserve lcssa form
+ AU.addRequired<LCSSAVerificationPass>();
+ AU.addPreserved<LCSSAVerificationPass>();
+
+ // Loop passes are designed to run inside of a loop pass manager which means
+ // that any function analyses they require must be required by the first loop
+ // pass in the manager (so that it is computed before the loop pass manager
+ // runs) and preserved by all loop pasess in the manager. To make this
+ // reasonably robust, the set needed for most loop passes is maintained here.
+ // If your loop pass requires an analysis not listed here, you will need to
+ // carefully audit the loop pass manager nesting structure that results.
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addPreserved<AAResultsWrapperPass>();
+ AU.addPreserved<BasicAAWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ AU.addPreserved<SCEVAAWrapperPass>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
+ // FIXME: When all loop passes preserve MemorySSA, it can be required and
+ // preserved here instead of the individual handling in each pass.
+}
+
+/// Manually defined generic "LoopPass" dependency initialization. This is used
+/// to initialize the exact set of passes from above in \c
+/// getLoopAnalysisUsage. It can be used within a loop pass's initialization
+/// with:
+///
+/// INITIALIZE_PASS_DEPENDENCY(LoopPass)
+///
+/// As-if "LoopPass" were a pass.
+void llvm::initializeLoopPassPass(PassRegistry &Registry) {
+ INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+ INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+ INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+ INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
+ INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+ INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass)
+ INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
+ INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)
+ INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+ INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
+}
+
+/// Create MDNode for input string.
+static MDNode *createStringMetadata(Loop *TheLoop, StringRef Name, unsigned V) {
+ LLVMContext &Context = TheLoop->getHeader()->getContext();
+ Metadata *MDs[] = {
+ MDString::get(Context, Name),
+ ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(Context), V))};
+ return MDNode::get(Context, MDs);
+}
+
+/// Set input string into loop metadata by keeping other values intact.
+/// If the string is already in loop metadata update value if it is
+/// different.
+void llvm::addStringMetadataToLoop(Loop *TheLoop, const char *StringMD,
+ unsigned V) {
+ SmallVector<Metadata *, 4> MDs(1);
+ // If the loop already has metadata, retain it.
+ MDNode *LoopID = TheLoop->getLoopID();
+ if (LoopID) {
+ for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
+ MDNode *Node = cast<MDNode>(LoopID->getOperand(i));
+ // If it is of form key = value, try to parse it.
+ if (Node->getNumOperands() == 2) {
+ MDString *S = dyn_cast<MDString>(Node->getOperand(0));
+ if (S && S->getString().equals(StringMD)) {
+ ConstantInt *IntMD =
+ mdconst::extract_or_null<ConstantInt>(Node->getOperand(1));
+ if (IntMD && IntMD->getSExtValue() == V)
+ // It is already in place. Do nothing.
+ return;
+ // We need to update the value, so just skip it here and it will
+ // be added after copying other existed nodes.
+ continue;
+ }
+ }
+ MDs.push_back(Node);
+ }
+ }
+ // Add new metadata.
+ MDs.push_back(createStringMetadata(TheLoop, StringMD, V));
+ // Replace current metadata node with new one.
+ LLVMContext &Context = TheLoop->getHeader()->getContext();
+ MDNode *NewLoopID = MDNode::get(Context, MDs);
+ // Set operand 0 to refer to the loop id itself.
+ NewLoopID->replaceOperandWith(0, NewLoopID);
+ TheLoop->setLoopID(NewLoopID);
+}
+
+std::optional<ElementCount>
+llvm::getOptionalElementCountLoopAttribute(const Loop *TheLoop) {
+ std::optional<int> Width =
+ getOptionalIntLoopAttribute(TheLoop, "llvm.loop.vectorize.width");
+
+ if (Width) {
+ std::optional<int> IsScalable = getOptionalIntLoopAttribute(
+ TheLoop, "llvm.loop.vectorize.scalable.enable");
+ return ElementCount::get(*Width, IsScalable.value_or(false));
+ }
+
+ return std::nullopt;
+}
+
+std::optional<MDNode *> llvm::makeFollowupLoopID(
+ MDNode *OrigLoopID, ArrayRef<StringRef> FollowupOptions,
+ const char *InheritOptionsExceptPrefix, bool AlwaysNew) {
+ if (!OrigLoopID) {
+ if (AlwaysNew)
+ return nullptr;
+ return std::nullopt;
+ }
+
+ assert(OrigLoopID->getOperand(0) == OrigLoopID);
+
+ bool InheritAllAttrs = !InheritOptionsExceptPrefix;
+ bool InheritSomeAttrs =
+ InheritOptionsExceptPrefix && InheritOptionsExceptPrefix[0] != '\0';
+ SmallVector<Metadata *, 8> MDs;
+ MDs.push_back(nullptr);
+
+ bool Changed = false;
+ if (InheritAllAttrs || InheritSomeAttrs) {
+ for (const MDOperand &Existing : drop_begin(OrigLoopID->operands())) {
+ MDNode *Op = cast<MDNode>(Existing.get());
+
+ auto InheritThisAttribute = [InheritSomeAttrs,
+ InheritOptionsExceptPrefix](MDNode *Op) {
+ if (!InheritSomeAttrs)
+ return false;
+
+ // Skip malformatted attribute metadata nodes.
+ if (Op->getNumOperands() == 0)
+ return true;
+ Metadata *NameMD = Op->getOperand(0).get();
+ if (!isa<MDString>(NameMD))
+ return true;
+ StringRef AttrName = cast<MDString>(NameMD)->getString();
+
+ // Do not inherit excluded attributes.
+ return !AttrName.startswith(InheritOptionsExceptPrefix);
+ };
+
+ if (InheritThisAttribute(Op))
+ MDs.push_back(Op);
+ else
+ Changed = true;
+ }
+ } else {
+ // Modified if we dropped at least one attribute.
+ Changed = OrigLoopID->getNumOperands() > 1;
+ }
+
+ bool HasAnyFollowup = false;
+ for (StringRef OptionName : FollowupOptions) {
+ MDNode *FollowupNode = findOptionMDForLoopID(OrigLoopID, OptionName);
+ if (!FollowupNode)
+ continue;
+
+ HasAnyFollowup = true;
+ for (const MDOperand &Option : drop_begin(FollowupNode->operands())) {
+ MDs.push_back(Option.get());
+ Changed = true;
+ }
+ }
+
+ // Attributes of the followup loop not specified explicity, so signal to the
+ // transformation pass to add suitable attributes.
+ if (!AlwaysNew && !HasAnyFollowup)
+ return std::nullopt;
+
+ // If no attributes were added or remove, the previous loop Id can be reused.
+ if (!AlwaysNew && !Changed)
+ return OrigLoopID;
+
+ // No attributes is equivalent to having no !llvm.loop metadata at all.
+ if (MDs.size() == 1)
+ return nullptr;
+
+ // Build the new loop ID.
+ MDTuple *FollowupLoopID = MDNode::get(OrigLoopID->getContext(), MDs);
+ FollowupLoopID->replaceOperandWith(0, FollowupLoopID);
+ return FollowupLoopID;
+}
+
+bool llvm::hasDisableAllTransformsHint(const Loop *L) {
+ return getBooleanLoopAttribute(L, LLVMLoopDisableNonforced);
+}
+
+bool llvm::hasDisableLICMTransformsHint(const Loop *L) {
+ return getBooleanLoopAttribute(L, LLVMLoopDisableLICM);
+}
+
+TransformationMode llvm::hasUnrollTransformation(const Loop *L) {
+ if (getBooleanLoopAttribute(L, "llvm.loop.unroll.disable"))
+ return TM_SuppressedByUser;
+
+ std::optional<int> Count =
+ getOptionalIntLoopAttribute(L, "llvm.loop.unroll.count");
+ if (Count)
+ return *Count == 1 ? TM_SuppressedByUser : TM_ForcedByUser;
+
+ if (getBooleanLoopAttribute(L, "llvm.loop.unroll.enable"))
+ return TM_ForcedByUser;
+
+ if (getBooleanLoopAttribute(L, "llvm.loop.unroll.full"))
+ return TM_ForcedByUser;
+
+ if (hasDisableAllTransformsHint(L))
+ return TM_Disable;
+
+ return TM_Unspecified;
+}
+
+TransformationMode llvm::hasUnrollAndJamTransformation(const Loop *L) {
+ if (getBooleanLoopAttribute(L, "llvm.loop.unroll_and_jam.disable"))
+ return TM_SuppressedByUser;
+
+ std::optional<int> Count =
+ getOptionalIntLoopAttribute(L, "llvm.loop.unroll_and_jam.count");
+ if (Count)
+ return *Count == 1 ? TM_SuppressedByUser : TM_ForcedByUser;
+
+ if (getBooleanLoopAttribute(L, "llvm.loop.unroll_and_jam.enable"))
+ return TM_ForcedByUser;
+
+ if (hasDisableAllTransformsHint(L))
+ return TM_Disable;
+
+ return TM_Unspecified;
+}
+
+TransformationMode llvm::hasVectorizeTransformation(const Loop *L) {
+ std::optional<bool> Enable =
+ getOptionalBoolLoopAttribute(L, "llvm.loop.vectorize.enable");
+
+ if (Enable == false)
+ return TM_SuppressedByUser;
+
+ std::optional<ElementCount> VectorizeWidth =
+ getOptionalElementCountLoopAttribute(L);
+ std::optional<int> InterleaveCount =
+ getOptionalIntLoopAttribute(L, "llvm.loop.interleave.count");
+
+ // 'Forcing' vector width and interleave count to one effectively disables
+ // this tranformation.
+ if (Enable == true && VectorizeWidth && VectorizeWidth->isScalar() &&
+ InterleaveCount == 1)
+ return TM_SuppressedByUser;
+
+ if (getBooleanLoopAttribute(L, "llvm.loop.isvectorized"))
+ return TM_Disable;
+
+ if (Enable == true)
+ return TM_ForcedByUser;
+
+ if ((VectorizeWidth && VectorizeWidth->isScalar()) && InterleaveCount == 1)
+ return TM_Disable;
+
+ if ((VectorizeWidth && VectorizeWidth->isVector()) || InterleaveCount > 1)
+ return TM_Enable;
+
+ if (hasDisableAllTransformsHint(L))
+ return TM_Disable;
+
+ return TM_Unspecified;
+}
+
+TransformationMode llvm::hasDistributeTransformation(const Loop *L) {
+ if (getBooleanLoopAttribute(L, "llvm.loop.distribute.enable"))
+ return TM_ForcedByUser;
+
+ if (hasDisableAllTransformsHint(L))
+ return TM_Disable;
+
+ return TM_Unspecified;
+}
+
+TransformationMode llvm::hasLICMVersioningTransformation(const Loop *L) {
+ if (getBooleanLoopAttribute(L, "llvm.loop.licm_versioning.disable"))
+ return TM_SuppressedByUser;
+
+ if (hasDisableAllTransformsHint(L))
+ return TM_Disable;
+
+ return TM_Unspecified;
+}
+
+/// Does a BFS from a given node to all of its children inside a given loop.
+/// The returned vector of nodes includes the starting point.
+SmallVector<DomTreeNode *, 16>
+llvm::collectChildrenInLoop(DomTreeNode *N, const Loop *CurLoop) {
+ SmallVector<DomTreeNode *, 16> Worklist;
+ auto AddRegionToWorklist = [&](DomTreeNode *DTN) {
+ // Only include subregions in the top level loop.
+ BasicBlock *BB = DTN->getBlock();
+ if (CurLoop->contains(BB))
+ Worklist.push_back(DTN);
+ };
+
+ AddRegionToWorklist(N);
+
+ for (size_t I = 0; I < Worklist.size(); I++) {
+ for (DomTreeNode *Child : Worklist[I]->children())
+ AddRegionToWorklist(Child);
+ }
+
+ return Worklist;
+}
+
+void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE,
+ LoopInfo *LI, MemorySSA *MSSA) {
+ assert((!DT || L->isLCSSAForm(*DT)) && "Expected LCSSA!");
+ auto *Preheader = L->getLoopPreheader();
+ assert(Preheader && "Preheader should exist!");
+
+ std::unique_ptr<MemorySSAUpdater> MSSAU;
+ if (MSSA)
+ MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
+
+ // Now that we know the removal is safe, remove the loop by changing the
+ // branch from the preheader to go to the single exit block.
+ //
+ // Because we're deleting a large chunk of code at once, the sequence in which
+ // we remove things is very important to avoid invalidation issues.
+
+ // Tell ScalarEvolution that the loop is deleted. Do this before
+ // deleting the loop so that ScalarEvolution can look at the loop
+ // to determine what it needs to clean up.
+ if (SE) {
+ SE->forgetLoop(L);
+ SE->forgetBlockAndLoopDispositions();
+ }
+
+ Instruction *OldTerm = Preheader->getTerminator();
+ assert(!OldTerm->mayHaveSideEffects() &&
+ "Preheader must end with a side-effect-free terminator");
+ assert(OldTerm->getNumSuccessors() == 1 &&
+ "Preheader must have a single successor");
+ // Connect the preheader to the exit block. Keep the old edge to the header
+ // around to perform the dominator tree update in two separate steps
+ // -- #1 insertion of the edge preheader -> exit and #2 deletion of the edge
+ // preheader -> header.
+ //
+ //
+ // 0. Preheader 1. Preheader 2. Preheader
+ // | | | |
+ // V | V |
+ // Header <--\ | Header <--\ | Header <--\
+ // | | | | | | | | | | |
+ // | V | | | V | | | V |
+ // | Body --/ | | Body --/ | | Body --/
+ // V V V V V
+ // Exit Exit Exit
+ //
+ // By doing this is two separate steps we can perform the dominator tree
+ // update without using the batch update API.
+ //
+ // Even when the loop is never executed, we cannot remove the edge from the
+ // source block to the exit block. Consider the case where the unexecuted loop
+ // branches back to an outer loop. If we deleted the loop and removed the edge
+ // coming to this inner loop, this will break the outer loop structure (by
+ // deleting the backedge of the outer loop). If the outer loop is indeed a
+ // non-loop, it will be deleted in a future iteration of loop deletion pass.
+ IRBuilder<> Builder(OldTerm);
+
+ auto *ExitBlock = L->getUniqueExitBlock();
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
+ if (ExitBlock) {
+ assert(ExitBlock && "Should have a unique exit block!");
+ assert(L->hasDedicatedExits() && "Loop should have dedicated exits!");
+
+ Builder.CreateCondBr(Builder.getFalse(), L->getHeader(), ExitBlock);
+ // Remove the old branch. The conditional branch becomes a new terminator.
+ OldTerm->eraseFromParent();
+
+ // Rewrite phis in the exit block to get their inputs from the Preheader
+ // instead of the exiting block.
+ for (PHINode &P : ExitBlock->phis()) {
+ // Set the zero'th element of Phi to be from the preheader and remove all
+ // other incoming values. Given the loop has dedicated exits, all other
+ // incoming values must be from the exiting blocks.
+ int PredIndex = 0;
+ P.setIncomingBlock(PredIndex, Preheader);
+ // Removes all incoming values from all other exiting blocks (including
+ // duplicate values from an exiting block).
+ // Nuke all entries except the zero'th entry which is the preheader entry.
+ // NOTE! We need to remove Incoming Values in the reverse order as done
+ // below, to keep the indices valid for deletion (removeIncomingValues
+ // updates getNumIncomingValues and shifts all values down into the
+ // operand being deleted).
+ for (unsigned i = 0, e = P.getNumIncomingValues() - 1; i != e; ++i)
+ P.removeIncomingValue(e - i, false);
+
+ assert((P.getNumIncomingValues() == 1 &&
+ P.getIncomingBlock(PredIndex) == Preheader) &&
+ "Should have exactly one value and that's from the preheader!");
+ }
+
+ if (DT) {
+ DTU.applyUpdates({{DominatorTree::Insert, Preheader, ExitBlock}});
+ if (MSSA) {
+ MSSAU->applyUpdates({{DominatorTree::Insert, Preheader, ExitBlock}},
+ *DT);
+ if (VerifyMemorySSA)
+ MSSA->verifyMemorySSA();
+ }
+ }
+
+ // Disconnect the loop body by branching directly to its exit.
+ Builder.SetInsertPoint(Preheader->getTerminator());
+ Builder.CreateBr(ExitBlock);
+ // Remove the old branch.
+ Preheader->getTerminator()->eraseFromParent();
+ } else {
+ assert(L->hasNoExitBlocks() &&
+ "Loop should have either zero or one exit blocks.");
+
+ Builder.SetInsertPoint(OldTerm);
+ Builder.CreateUnreachable();
+ Preheader->getTerminator()->eraseFromParent();
+ }
+
+ if (DT) {
+ DTU.applyUpdates({{DominatorTree::Delete, Preheader, L->getHeader()}});
+ if (MSSA) {
+ MSSAU->applyUpdates({{DominatorTree::Delete, Preheader, L->getHeader()}},
+ *DT);
+ SmallSetVector<BasicBlock *, 8> DeadBlockSet(L->block_begin(),
+ L->block_end());
+ MSSAU->removeBlocks(DeadBlockSet);
+ if (VerifyMemorySSA)
+ MSSA->verifyMemorySSA();
+ }
+ }
+
+ // Use a map to unique and a vector to guarantee deterministic ordering.
+ llvm::SmallDenseSet<DebugVariable, 4> DeadDebugSet;
+ llvm::SmallVector<DbgVariableIntrinsic *, 4> DeadDebugInst;
+
+ if (ExitBlock) {
+ // Given LCSSA form is satisfied, we should not have users of instructions
+ // within the dead loop outside of the loop. However, LCSSA doesn't take
+ // unreachable uses into account. We handle them here.
+ // We could do it after drop all references (in this case all users in the
+ // loop will be already eliminated and we have less work to do but according
+ // to API doc of User::dropAllReferences only valid operation after dropping
+ // references, is deletion. So let's substitute all usages of
+ // instruction from the loop with poison value of corresponding type first.
+ for (auto *Block : L->blocks())
+ for (Instruction &I : *Block) {
+ auto *Poison = PoisonValue::get(I.getType());
+ for (Use &U : llvm::make_early_inc_range(I.uses())) {
+ if (auto *Usr = dyn_cast<Instruction>(U.getUser()))
+ if (L->contains(Usr->getParent()))
+ continue;
+ // If we have a DT then we can check that uses outside a loop only in
+ // unreachable block.
+ if (DT)
+ assert(!DT->isReachableFromEntry(U) &&
+ "Unexpected user in reachable block");
+ U.set(Poison);
+ }
+ auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I);
+ if (!DVI)
+ continue;
+ if (!DeadDebugSet.insert(DebugVariable(DVI)).second)
+ continue;
+ DeadDebugInst.push_back(DVI);
+ }
+
+ // After the loop has been deleted all the values defined and modified
+ // inside the loop are going to be unavailable.
+ // Since debug values in the loop have been deleted, inserting an undef
+ // dbg.value truncates the range of any dbg.value before the loop where the
+ // loop used to be. This is particularly important for constant values.
+ Instruction *InsertDbgValueBefore = ExitBlock->getFirstNonPHI();
+ assert(InsertDbgValueBefore &&
+ "There should be a non-PHI instruction in exit block, else these "
+ "instructions will have no parent.");
+ for (auto *DVI : DeadDebugInst) {
+ DVI->setKillLocation();
+ DVI->moveBefore(InsertDbgValueBefore);
+ }
+ }
+
+ // Remove the block from the reference counting scheme, so that we can
+ // delete it freely later.
+ for (auto *Block : L->blocks())
+ Block->dropAllReferences();
+
+ if (MSSA && VerifyMemorySSA)
+ MSSA->verifyMemorySSA();
+
+ if (LI) {
+ // Erase the instructions and the blocks without having to worry
+ // about ordering because we already dropped the references.
+ // NOTE: This iteration is safe because erasing the block does not remove
+ // its entry from the loop's block list. We do that in the next section.
+ for (BasicBlock *BB : L->blocks())
+ BB->eraseFromParent();
+
+ // Finally, the blocks from loopinfo. This has to happen late because
+ // otherwise our loop iterators won't work.
+
+ SmallPtrSet<BasicBlock *, 8> blocks;
+ blocks.insert(L->block_begin(), L->block_end());
+ for (BasicBlock *BB : blocks)
+ LI->removeBlock(BB);
+
+ // The last step is to update LoopInfo now that we've eliminated this loop.
+ // Note: LoopInfo::erase remove the given loop and relink its subloops with
+ // its parent. While removeLoop/removeChildLoop remove the given loop but
+ // not relink its subloops, which is what we want.
+ if (Loop *ParentLoop = L->getParentLoop()) {
+ Loop::iterator I = find(*ParentLoop, L);
+ assert(I != ParentLoop->end() && "Couldn't find loop");
+ ParentLoop->removeChildLoop(I);
+ } else {
+ Loop::iterator I = find(*LI, L);
+ assert(I != LI->end() && "Couldn't find loop");
+ LI->removeLoop(I);
+ }
+ LI->destroy(L);
+ }
+}
+
+void llvm::breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
+ LoopInfo &LI, MemorySSA *MSSA) {
+ auto *Latch = L->getLoopLatch();
+ assert(Latch && "multiple latches not yet supported");
+ auto *Header = L->getHeader();
+ Loop *OutermostLoop = L->getOutermostLoop();
+
+ SE.forgetLoop(L);
+ SE.forgetBlockAndLoopDispositions();
+
+ std::unique_ptr<MemorySSAUpdater> MSSAU;
+ if (MSSA)
+ MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
+
+ // Update the CFG and domtree. We chose to special case a couple of
+ // of common cases for code quality and test readability reasons.
+ [&]() -> void {
+ if (auto *BI = dyn_cast<BranchInst>(Latch->getTerminator())) {
+ if (!BI->isConditional()) {
+ DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager);
+ (void)changeToUnreachable(BI, /*PreserveLCSSA*/ true, &DTU,
+ MSSAU.get());
+ return;
+ }
+
+ // Conditional latch/exit - note that latch can be shared by inner
+ // and outer loop so the other target doesn't need to an exit
+ if (L->isLoopExiting(Latch)) {
+ // TODO: Generalize ConstantFoldTerminator so that it can be used
+ // here without invalidating LCSSA or MemorySSA. (Tricky case for
+ // LCSSA: header is an exit block of a preceeding sibling loop w/o
+ // dedicated exits.)
+ const unsigned ExitIdx = L->contains(BI->getSuccessor(0)) ? 1 : 0;
+ BasicBlock *ExitBB = BI->getSuccessor(ExitIdx);
+
+ DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager);
+ Header->removePredecessor(Latch, true);
+
+ IRBuilder<> Builder(BI);
+ auto *NewBI = Builder.CreateBr(ExitBB);
+ // Transfer the metadata to the new branch instruction (minus the
+ // loop info since this is no longer a loop)
+ NewBI->copyMetadata(*BI, {LLVMContext::MD_dbg,
+ LLVMContext::MD_annotation});
+
+ BI->eraseFromParent();
+ DTU.applyUpdates({{DominatorTree::Delete, Latch, Header}});
+ if (MSSA)
+ MSSAU->applyUpdates({{DominatorTree::Delete, Latch, Header}}, DT);
+ return;
+ }
+ }
+
+ // General case. By splitting the backedge, and then explicitly making it
+ // unreachable we gracefully handle corner cases such as switch and invoke
+ // termiantors.
+ auto *BackedgeBB = SplitEdge(Latch, Header, &DT, &LI, MSSAU.get());
+
+ DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager);
+ (void)changeToUnreachable(BackedgeBB->getTerminator(),
+ /*PreserveLCSSA*/ true, &DTU, MSSAU.get());
+ }();
+
+ // Erase (and destroy) this loop instance. Handles relinking sub-loops
+ // and blocks within the loop as needed.
+ LI.erase(L);
+
+ // If the loop we broke had a parent, then changeToUnreachable might have
+ // caused a block to be removed from the parent loop (see loop_nest_lcssa
+ // test case in zero-btc.ll for an example), thus changing the parent's
+ // exit blocks. If that happened, we need to rebuild LCSSA on the outermost
+ // loop which might have a had a block removed.
+ if (OutermostLoop != L)
+ formLCSSARecursively(*OutermostLoop, DT, &LI, &SE);
+}
+
+
+/// Checks if \p L has an exiting latch branch. There may also be other
+/// exiting blocks. Returns branch instruction terminating the loop
+/// latch if above check is successful, nullptr otherwise.
+static BranchInst *getExpectedExitLoopLatchBranch(Loop *L) {
+ BasicBlock *Latch = L->getLoopLatch();
+ if (!Latch)
+ return nullptr;
+
+ BranchInst *LatchBR = dyn_cast<BranchInst>(Latch->getTerminator());
+ if (!LatchBR || LatchBR->getNumSuccessors() != 2 || !L->isLoopExiting(Latch))
+ return nullptr;
+
+ assert((LatchBR->getSuccessor(0) == L->getHeader() ||
+ LatchBR->getSuccessor(1) == L->getHeader()) &&
+ "At least one edge out of the latch must go to the header");
+
+ return LatchBR;
+}
+
+/// Return the estimated trip count for any exiting branch which dominates
+/// the loop latch.
+static std::optional<uint64_t> getEstimatedTripCount(BranchInst *ExitingBranch,
+ Loop *L,
+ uint64_t &OrigExitWeight) {
+ // To estimate the number of times the loop body was executed, we want to
+ // know the number of times the backedge was taken, vs. the number of times
+ // we exited the loop.
+ uint64_t LoopWeight, ExitWeight;
+ if (!extractBranchWeights(*ExitingBranch, LoopWeight, ExitWeight))
+ return std::nullopt;
+
+ if (L->contains(ExitingBranch->getSuccessor(1)))
+ std::swap(LoopWeight, ExitWeight);
+
+ if (!ExitWeight)
+ // Don't have a way to return predicated infinite
+ return std::nullopt;
+
+ OrigExitWeight = ExitWeight;
+
+ // Estimated exit count is a ratio of the loop weight by the weight of the
+ // edge exiting the loop, rounded to nearest.
+ uint64_t ExitCount = llvm::divideNearest(LoopWeight, ExitWeight);
+ // Estimated trip count is one plus estimated exit count.
+ return ExitCount + 1;
+}
+
+std::optional<unsigned>
+llvm::getLoopEstimatedTripCount(Loop *L,
+ unsigned *EstimatedLoopInvocationWeight) {
+ // Currently we take the estimate exit count only from the loop latch,
+ // ignoring other exiting blocks. This can overestimate the trip count
+ // if we exit through another exit, but can never underestimate it.
+ // TODO: incorporate information from other exits
+ if (BranchInst *LatchBranch = getExpectedExitLoopLatchBranch(L)) {
+ uint64_t ExitWeight;
+ if (std::optional<uint64_t> EstTripCount =
+ getEstimatedTripCount(LatchBranch, L, ExitWeight)) {
+ if (EstimatedLoopInvocationWeight)
+ *EstimatedLoopInvocationWeight = ExitWeight;
+ return *EstTripCount;
+ }
+ }
+ return std::nullopt;
+}
+
+bool llvm::setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount,
+ unsigned EstimatedloopInvocationWeight) {
+ // At the moment, we currently support changing the estimate trip count of
+ // the latch branch only. We could extend this API to manipulate estimated
+ // trip counts for any exit.
+ BranchInst *LatchBranch = getExpectedExitLoopLatchBranch(L);
+ if (!LatchBranch)
+ return false;
+
+ // Calculate taken and exit weights.
+ unsigned LatchExitWeight = 0;
+ unsigned BackedgeTakenWeight = 0;
+
+ if (EstimatedTripCount > 0) {
+ LatchExitWeight = EstimatedloopInvocationWeight;
+ BackedgeTakenWeight = (EstimatedTripCount - 1) * LatchExitWeight;
+ }
+
+ // Make a swap if back edge is taken when condition is "false".
+ if (LatchBranch->getSuccessor(0) != L->getHeader())
+ std::swap(BackedgeTakenWeight, LatchExitWeight);
+
+ MDBuilder MDB(LatchBranch->getContext());
+
+ // Set/Update profile metadata.
+ LatchBranch->setMetadata(
+ LLVMContext::MD_prof,
+ MDB.createBranchWeights(BackedgeTakenWeight, LatchExitWeight));
+
+ return true;
+}
+
+bool llvm::hasIterationCountInvariantInParent(Loop *InnerLoop,
+ ScalarEvolution &SE) {
+ Loop *OuterL = InnerLoop->getParentLoop();
+ if (!OuterL)
+ return true;
+
+ // Get the backedge taken count for the inner loop
+ BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
+ const SCEV *InnerLoopBECountSC = SE.getExitCount(InnerLoop, InnerLoopLatch);
+ if (isa<SCEVCouldNotCompute>(InnerLoopBECountSC) ||
+ !InnerLoopBECountSC->getType()->isIntegerTy())
+ return false;
+
+ // Get whether count is invariant to the outer loop
+ ScalarEvolution::LoopDisposition LD =
+ SE.getLoopDisposition(InnerLoopBECountSC, OuterL);
+ if (LD != ScalarEvolution::LoopInvariant)
+ return false;
+
+ return true;
+}
+
+CmpInst::Predicate llvm::getMinMaxReductionPredicate(RecurKind RK) {
+ switch (RK) {
+ default:
+ llvm_unreachable("Unknown min/max recurrence kind");
+ case RecurKind::UMin:
+ return CmpInst::ICMP_ULT;
+ case RecurKind::UMax:
+ return CmpInst::ICMP_UGT;
+ case RecurKind::SMin:
+ return CmpInst::ICMP_SLT;
+ case RecurKind::SMax:
+ return CmpInst::ICMP_SGT;
+ case RecurKind::FMin:
+ return CmpInst::FCMP_OLT;
+ case RecurKind::FMax:
+ return CmpInst::FCMP_OGT;
+ }
+}
+
+Value *llvm::createSelectCmpOp(IRBuilderBase &Builder, Value *StartVal,
+ RecurKind RK, Value *Left, Value *Right) {
+ if (auto VTy = dyn_cast<VectorType>(Left->getType()))
+ StartVal = Builder.CreateVectorSplat(VTy->getElementCount(), StartVal);
+ Value *Cmp =
+ Builder.CreateCmp(CmpInst::ICMP_NE, Left, StartVal, "rdx.select.cmp");
+ return Builder.CreateSelect(Cmp, Left, Right, "rdx.select");
+}
+
+Value *llvm::createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left,
+ Value *Right) {
+ CmpInst::Predicate Pred = getMinMaxReductionPredicate(RK);
+ Value *Cmp = Builder.CreateCmp(Pred, Left, Right, "rdx.minmax.cmp");
+ Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select");
+ return Select;
+}
+
+// Helper to generate an ordered reduction.
+Value *llvm::getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src,
+ unsigned Op, RecurKind RdxKind) {
+ unsigned VF = cast<FixedVectorType>(Src->getType())->getNumElements();
+
+ // Extract and apply reduction ops in ascending order:
+ // e.g. ((((Acc + Scl[0]) + Scl[1]) + Scl[2]) + ) ... + Scl[VF-1]
+ Value *Result = Acc;
+ for (unsigned ExtractIdx = 0; ExtractIdx != VF; ++ExtractIdx) {
+ Value *Ext =
+ Builder.CreateExtractElement(Src, Builder.getInt32(ExtractIdx));
+
+ if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
+ Result = Builder.CreateBinOp((Instruction::BinaryOps)Op, Result, Ext,
+ "bin.rdx");
+ } else {
+ assert(RecurrenceDescriptor::isMinMaxRecurrenceKind(RdxKind) &&
+ "Invalid min/max");
+ Result = createMinMaxOp(Builder, RdxKind, Result, Ext);
+ }
+ }
+
+ return Result;
+}
+
+// Helper to generate a log2 shuffle reduction.
+Value *llvm::getShuffleReduction(IRBuilderBase &Builder, Value *Src,
+ unsigned Op, RecurKind RdxKind) {
+ unsigned VF = cast<FixedVectorType>(Src->getType())->getNumElements();
+ // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
+ // and vector ops, reducing the set of values being computed by half each
+ // round.
+ assert(isPowerOf2_32(VF) &&
+ "Reduction emission only supported for pow2 vectors!");
+ // Note: fast-math-flags flags are controlled by the builder configuration
+ // and are assumed to apply to all generated arithmetic instructions. Other
+ // poison generating flags (nsw/nuw/inbounds/inrange/exact) are not part
+ // of the builder configuration, and since they're not passed explicitly,
+ // will never be relevant here. Note that it would be generally unsound to
+ // propagate these from an intrinsic call to the expansion anyways as we/
+ // change the order of operations.
+ Value *TmpVec = Src;
+ SmallVector<int, 32> ShuffleMask(VF);
+ for (unsigned i = VF; i != 1; i >>= 1) {
+ // Move the upper half of the vector to the lower half.
+ for (unsigned j = 0; j != i / 2; ++j)
+ ShuffleMask[j] = i / 2 + j;
+
+ // Fill the rest of the mask with undef.
+ std::fill(&ShuffleMask[i / 2], ShuffleMask.end(), -1);
+
+ Value *Shuf = Builder.CreateShuffleVector(TmpVec, ShuffleMask, "rdx.shuf");
+
+ if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
+ TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf,
+ "bin.rdx");
+ } else {
+ assert(RecurrenceDescriptor::isMinMaxRecurrenceKind(RdxKind) &&
+ "Invalid min/max");
+ TmpVec = createMinMaxOp(Builder, RdxKind, TmpVec, Shuf);
+ }
+ }
+ // The result is in the first element of the vector.
+ return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
+}
+
+Value *llvm::createSelectCmpTargetReduction(IRBuilderBase &Builder,
+ const TargetTransformInfo *TTI,
+ Value *Src,
+ const RecurrenceDescriptor &Desc,
+ PHINode *OrigPhi) {
+ assert(RecurrenceDescriptor::isSelectCmpRecurrenceKind(
+ Desc.getRecurrenceKind()) &&
+ "Unexpected reduction kind");
+ Value *InitVal = Desc.getRecurrenceStartValue();
+ Value *NewVal = nullptr;
+
+ // First use the original phi to determine the new value we're trying to
+ // select from in the loop.
+ SelectInst *SI = nullptr;
+ for (auto *U : OrigPhi->users()) {
+ if ((SI = dyn_cast<SelectInst>(U)))
+ break;
+ }
+ assert(SI && "One user of the original phi should be a select");
+
+ if (SI->getTrueValue() == OrigPhi)
+ NewVal = SI->getFalseValue();
+ else {
+ assert(SI->getFalseValue() == OrigPhi &&
+ "At least one input to the select should be the original Phi");
+ NewVal = SI->getTrueValue();
+ }
+
+ // Create a splat vector with the new value and compare this to the vector
+ // we want to reduce.
+ ElementCount EC = cast<VectorType>(Src->getType())->getElementCount();
+ Value *Right = Builder.CreateVectorSplat(EC, InitVal);
+ Value *Cmp =
+ Builder.CreateCmp(CmpInst::ICMP_NE, Src, Right, "rdx.select.cmp");
+
+ // If any predicate is true it means that we want to select the new value.
+ Cmp = Builder.CreateOrReduce(Cmp);
+ return Builder.CreateSelect(Cmp, NewVal, InitVal, "rdx.select");
+}
+
+Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder,
+ const TargetTransformInfo *TTI,
+ Value *Src, RecurKind RdxKind) {
+ auto *SrcVecEltTy = cast<VectorType>(Src->getType())->getElementType();
+ switch (RdxKind) {
+ case RecurKind::Add:
+ return Builder.CreateAddReduce(Src);
+ case RecurKind::Mul:
+ return Builder.CreateMulReduce(Src);
+ case RecurKind::And:
+ return Builder.CreateAndReduce(Src);
+ case RecurKind::Or:
+ return Builder.CreateOrReduce(Src);
+ case RecurKind::Xor:
+ return Builder.CreateXorReduce(Src);
+ case RecurKind::FMulAdd:
+ case RecurKind::FAdd:
+ return Builder.CreateFAddReduce(ConstantFP::getNegativeZero(SrcVecEltTy),
+ Src);
+ case RecurKind::FMul:
+ return Builder.CreateFMulReduce(ConstantFP::get(SrcVecEltTy, 1.0), Src);
+ case RecurKind::SMax:
+ return Builder.CreateIntMaxReduce(Src, true);
+ case RecurKind::SMin:
+ return Builder.CreateIntMinReduce(Src, true);
+ case RecurKind::UMax:
+ return Builder.CreateIntMaxReduce(Src, false);
+ case RecurKind::UMin:
+ return Builder.CreateIntMinReduce(Src, false);
+ case RecurKind::FMax:
+ return Builder.CreateFPMaxReduce(Src);
+ case RecurKind::FMin:
+ return Builder.CreateFPMinReduce(Src);
+ default:
+ llvm_unreachable("Unhandled opcode");
+ }
+}
+
+Value *llvm::createTargetReduction(IRBuilderBase &B,
+ const TargetTransformInfo *TTI,
+ const RecurrenceDescriptor &Desc, Value *Src,
+ PHINode *OrigPhi) {
+ // TODO: Support in-order reductions based on the recurrence descriptor.
+ // All ops in the reduction inherit fast-math-flags from the recurrence
+ // descriptor.
+ IRBuilderBase::FastMathFlagGuard FMFGuard(B);
+ B.setFastMathFlags(Desc.getFastMathFlags());
+
+ RecurKind RK = Desc.getRecurrenceKind();
+ if (RecurrenceDescriptor::isSelectCmpRecurrenceKind(RK))
+ return createSelectCmpTargetReduction(B, TTI, Src, Desc, OrigPhi);
+
+ return createSimpleTargetReduction(B, TTI, Src, RK);
+}
+
+Value *llvm::createOrderedReduction(IRBuilderBase &B,
+ const RecurrenceDescriptor &Desc,
+ Value *Src, Value *Start) {
+ assert((Desc.getRecurrenceKind() == RecurKind::FAdd ||
+ Desc.getRecurrenceKind() == RecurKind::FMulAdd) &&
+ "Unexpected reduction kind");
+ assert(Src->getType()->isVectorTy() && "Expected a vector type");
+ assert(!Start->getType()->isVectorTy() && "Expected a scalar type");
+
+ return B.CreateFAddReduce(Start, Src);
+}
+
+void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue,
+ bool IncludeWrapFlags) {
+ auto *VecOp = dyn_cast<Instruction>(I);
+ if (!VecOp)
+ return;
+ auto *Intersection = (OpValue == nullptr) ? dyn_cast<Instruction>(VL[0])
+ : dyn_cast<Instruction>(OpValue);
+ if (!Intersection)
+ return;
+ const unsigned Opcode = Intersection->getOpcode();
+ VecOp->copyIRFlags(Intersection, IncludeWrapFlags);
+ for (auto *V : VL) {
+ auto *Instr = dyn_cast<Instruction>(V);
+ if (!Instr)
+ continue;
+ if (OpValue == nullptr || Opcode == Instr->getOpcode())
+ VecOp->andIRFlags(V);
+ }
+}
+
+bool llvm::isKnownNegativeInLoop(const SCEV *S, const Loop *L,
+ ScalarEvolution &SE) {
+ const SCEV *Zero = SE.getZero(S->getType());
+ return SE.isAvailableAtLoopEntry(S, L) &&
+ SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SLT, S, Zero);
+}
+
+bool llvm::isKnownNonNegativeInLoop(const SCEV *S, const Loop *L,
+ ScalarEvolution &SE) {
+ const SCEV *Zero = SE.getZero(S->getType());
+ return SE.isAvailableAtLoopEntry(S, L) &&
+ SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SGE, S, Zero);
+}
+
+bool llvm::cannotBeMinInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE,
+ bool Signed) {
+ unsigned BitWidth = cast<IntegerType>(S->getType())->getBitWidth();
+ APInt Min = Signed ? APInt::getSignedMinValue(BitWidth) :
+ APInt::getMinValue(BitWidth);
+ auto Predicate = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
+ return SE.isAvailableAtLoopEntry(S, L) &&
+ SE.isLoopEntryGuardedByCond(L, Predicate, S,
+ SE.getConstant(Min));
+}
+
+bool llvm::cannotBeMaxInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE,
+ bool Signed) {
+ unsigned BitWidth = cast<IntegerType>(S->getType())->getBitWidth();
+ APInt Max = Signed ? APInt::getSignedMaxValue(BitWidth) :
+ APInt::getMaxValue(BitWidth);
+ auto Predicate = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
+ return SE.isAvailableAtLoopEntry(S, L) &&
+ SE.isLoopEntryGuardedByCond(L, Predicate, S,
+ SE.getConstant(Max));
+}
+
+//===----------------------------------------------------------------------===//
+// rewriteLoopExitValues - Optimize IV users outside the loop.
+// As a side effect, reduces the amount of IV processing within the loop.
+//===----------------------------------------------------------------------===//
+
+static bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) {
+ SmallPtrSet<const Instruction *, 8> Visited;
+ SmallVector<const Instruction *, 8> WorkList;
+ Visited.insert(I);
+ WorkList.push_back(I);
+ while (!WorkList.empty()) {
+ const Instruction *Curr = WorkList.pop_back_val();
+ // This use is outside the loop, nothing to do.
+ if (!L->contains(Curr))
+ continue;
+ // Do we assume it is a "hard" use which will not be eliminated easily?
+ if (Curr->mayHaveSideEffects())
+ return true;
+ // Otherwise, add all its users to worklist.
+ for (const auto *U : Curr->users()) {
+ auto *UI = cast<Instruction>(U);
+ if (Visited.insert(UI).second)
+ WorkList.push_back(UI);
+ }
+ }
+ return false;
+}
+
+// Collect information about PHI nodes which can be transformed in
+// rewriteLoopExitValues.
+struct RewritePhi {
+ PHINode *PN; // For which PHI node is this replacement?
+ unsigned Ith; // For which incoming value?
+ const SCEV *ExpansionSCEV; // The SCEV of the incoming value we are rewriting.
+ Instruction *ExpansionPoint; // Where we'd like to expand that SCEV?
+ bool HighCost; // Is this expansion a high-cost?
+
+ RewritePhi(PHINode *P, unsigned I, const SCEV *Val, Instruction *ExpansionPt,
+ bool H)
+ : PN(P), Ith(I), ExpansionSCEV(Val), ExpansionPoint(ExpansionPt),
+ HighCost(H) {}
+};
+
+// Check whether it is possible to delete the loop after rewriting exit
+// value. If it is possible, ignore ReplaceExitValue and do rewriting
+// aggressively.
+static bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) {
+ BasicBlock *Preheader = L->getLoopPreheader();
+ // If there is no preheader, the loop will not be deleted.
+ if (!Preheader)
+ return false;
+
+ // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1.
+ // We obviate multiple ExitingBlocks case for simplicity.
+ // TODO: If we see testcase with multiple ExitingBlocks can be deleted
+ // after exit value rewriting, we can enhance the logic here.
+ SmallVector<BasicBlock *, 4> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+ SmallVector<BasicBlock *, 8> ExitBlocks;
+ L->getUniqueExitBlocks(ExitBlocks);
+ if (ExitBlocks.size() != 1 || ExitingBlocks.size() != 1)
+ return false;
+
+ BasicBlock *ExitBlock = ExitBlocks[0];
+ BasicBlock::iterator BI = ExitBlock->begin();
+ while (PHINode *P = dyn_cast<PHINode>(BI)) {
+ Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]);
+
+ // If the Incoming value of P is found in RewritePhiSet, we know it
+ // could be rewritten to use a loop invariant value in transformation
+ // phase later. Skip it in the loop invariant check below.
+ bool found = false;
+ for (const RewritePhi &Phi : RewritePhiSet) {
+ unsigned i = Phi.Ith;
+ if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) {
+ found = true;
+ break;
+ }
+ }
+
+ Instruction *I;
+ if (!found && (I = dyn_cast<Instruction>(Incoming)))
+ if (!L->hasLoopInvariantOperands(I))
+ return false;
+
+ ++BI;
+ }
+
+ for (auto *BB : L->blocks())
+ if (llvm::any_of(*BB, [](Instruction &I) {
+ return I.mayHaveSideEffects();
+ }))
+ return false;
+
+ return true;
+}
+
+/// Checks if it is safe to call InductionDescriptor::isInductionPHI for \p Phi,
+/// and returns true if this Phi is an induction phi in the loop. When
+/// isInductionPHI returns true, \p ID will be also be set by isInductionPHI.
+static bool checkIsIndPhi(PHINode *Phi, Loop *L, ScalarEvolution *SE,
+ InductionDescriptor &ID) {
+ if (!Phi)
+ return false;
+ if (!L->getLoopPreheader())
+ return false;
+ if (Phi->getParent() != L->getHeader())
+ return false;
+ return InductionDescriptor::isInductionPHI(Phi, L, SE, ID);
+}
+
+int llvm::rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI,
+ ScalarEvolution *SE,
+ const TargetTransformInfo *TTI,
+ SCEVExpander &Rewriter, DominatorTree *DT,
+ ReplaceExitVal ReplaceExitValue,
+ SmallVector<WeakTrackingVH, 16> &DeadInsts) {
+ // Check a pre-condition.
+ assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
+ "Indvars did not preserve LCSSA!");
+
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ L->getUniqueExitBlocks(ExitBlocks);
+
+ SmallVector<RewritePhi, 8> RewritePhiSet;
+ // Find all values that are computed inside the loop, but used outside of it.
+ // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan
+ // the exit blocks of the loop to find them.
+ for (BasicBlock *ExitBB : ExitBlocks) {
+ // If there are no PHI nodes in this exit block, then no values defined
+ // inside the loop are used on this path, skip it.
+ PHINode *PN = dyn_cast<PHINode>(ExitBB->begin());
+ if (!PN) continue;
+
+ unsigned NumPreds = PN->getNumIncomingValues();
+
+ // Iterate over all of the PHI nodes.
+ BasicBlock::iterator BBI = ExitBB->begin();
+ while ((PN = dyn_cast<PHINode>(BBI++))) {
+ if (PN->use_empty())
+ continue; // dead use, don't replace it
+
+ if (!SE->isSCEVable(PN->getType()))
+ continue;
+
+ // Iterate over all of the values in all the PHI nodes.
+ for (unsigned i = 0; i != NumPreds; ++i) {
+ // If the value being merged in is not integer or is not defined
+ // in the loop, skip it.
+ Value *InVal = PN->getIncomingValue(i);
+ if (!isa<Instruction>(InVal))
+ continue;
+
+ // If this pred is for a subloop, not L itself, skip it.
+ if (LI->getLoopFor(PN->getIncomingBlock(i)) != L)
+ continue; // The Block is in a subloop, skip it.
+
+ // Check that InVal is defined in the loop.
+ Instruction *Inst = cast<Instruction>(InVal);
+ if (!L->contains(Inst))
+ continue;
+
+ // Find exit values which are induction variables in the loop, and are
+ // unused in the loop, with the only use being the exit block PhiNode,
+ // and the induction variable update binary operator.
+ // The exit value can be replaced with the final value when it is cheap
+ // to do so.
+ if (ReplaceExitValue == UnusedIndVarInLoop) {
+ InductionDescriptor ID;
+ PHINode *IndPhi = dyn_cast<PHINode>(Inst);
+ if (IndPhi) {
+ if (!checkIsIndPhi(IndPhi, L, SE, ID))
+ continue;
+ // This is an induction PHI. Check that the only users are PHI
+ // nodes, and induction variable update binary operators.
+ if (llvm::any_of(Inst->users(), [&](User *U) {
+ if (!isa<PHINode>(U) && !isa<BinaryOperator>(U))
+ return true;
+ BinaryOperator *B = dyn_cast<BinaryOperator>(U);
+ if (B && B != ID.getInductionBinOp())
+ return true;
+ return false;
+ }))
+ continue;
+ } else {
+ // If it is not an induction phi, it must be an induction update
+ // binary operator with an induction phi user.
+ BinaryOperator *B = dyn_cast<BinaryOperator>(Inst);
+ if (!B)
+ continue;
+ if (llvm::any_of(Inst->users(), [&](User *U) {
+ PHINode *Phi = dyn_cast<PHINode>(U);
+ if (Phi != PN && !checkIsIndPhi(Phi, L, SE, ID))
+ return true;
+ return false;
+ }))
+ continue;
+ if (B != ID.getInductionBinOp())
+ continue;
+ }
+ }
+
+ // Okay, this instruction has a user outside of the current loop
+ // and varies predictably *inside* the loop. Evaluate the value it
+ // contains when the loop exits, if possible. We prefer to start with
+ // expressions which are true for all exits (so as to maximize
+ // expression reuse by the SCEVExpander), but resort to per-exit
+ // evaluation if that fails.
+ const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
+ if (isa<SCEVCouldNotCompute>(ExitValue) ||
+ !SE->isLoopInvariant(ExitValue, L) ||
+ !Rewriter.isSafeToExpand(ExitValue)) {
+ // TODO: This should probably be sunk into SCEV in some way; maybe a
+ // getSCEVForExit(SCEV*, L, ExitingBB)? It can be generalized for
+ // most SCEV expressions and other recurrence types (e.g. shift
+ // recurrences). Is there existing code we can reuse?
+ const SCEV *ExitCount = SE->getExitCount(L, PN->getIncomingBlock(i));
+ if (isa<SCEVCouldNotCompute>(ExitCount))
+ continue;
+ if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Inst)))
+ if (AddRec->getLoop() == L)
+ ExitValue = AddRec->evaluateAtIteration(ExitCount, *SE);
+ if (isa<SCEVCouldNotCompute>(ExitValue) ||
+ !SE->isLoopInvariant(ExitValue, L) ||
+ !Rewriter.isSafeToExpand(ExitValue))
+ continue;
+ }
+
+ // Computing the value outside of the loop brings no benefit if it is
+ // definitely used inside the loop in a way which can not be optimized
+ // away. Avoid doing so unless we know we have a value which computes
+ // the ExitValue already. TODO: This should be merged into SCEV
+ // expander to leverage its knowledge of existing expressions.
+ if (ReplaceExitValue != AlwaysRepl && !isa<SCEVConstant>(ExitValue) &&
+ !isa<SCEVUnknown>(ExitValue) && hasHardUserWithinLoop(L, Inst))
+ continue;
+
+ // Check if expansions of this SCEV would count as being high cost.
+ bool HighCost = Rewriter.isHighCostExpansion(
+ ExitValue, L, SCEVCheapExpansionBudget, TTI, Inst);
+
+ // Note that we must not perform expansions until after
+ // we query *all* the costs, because if we perform temporary expansion
+ // inbetween, one that we might not intend to keep, said expansion
+ // *may* affect cost calculation of the the next SCEV's we'll query,
+ // and next SCEV may errneously get smaller cost.
+
+ // Collect all the candidate PHINodes to be rewritten.
+ Instruction *InsertPt =
+ (isa<PHINode>(Inst) || isa<LandingPadInst>(Inst)) ?
+ &*Inst->getParent()->getFirstInsertionPt() : Inst;
+ RewritePhiSet.emplace_back(PN, i, ExitValue, InsertPt, HighCost);
+ }
+ }
+ }
+
+ // TODO: evaluate whether it is beneficial to change how we calculate
+ // high-cost: if we have SCEV 'A' which we know we will expand, should we
+ // calculate the cost of other SCEV's after expanding SCEV 'A', thus
+ // potentially giving cost bonus to those other SCEV's?
+
+ bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);
+ int NumReplaced = 0;
+
+ // Transformation.
+ for (const RewritePhi &Phi : RewritePhiSet) {
+ PHINode *PN = Phi.PN;
+
+ // Only do the rewrite when the ExitValue can be expanded cheaply.
+ // If LoopCanBeDel is true, rewrite exit value aggressively.
+ if ((ReplaceExitValue == OnlyCheapRepl ||
+ ReplaceExitValue == UnusedIndVarInLoop) &&
+ !LoopCanBeDel && Phi.HighCost)
+ continue;
+
+ Value *ExitVal = Rewriter.expandCodeFor(
+ Phi.ExpansionSCEV, Phi.PN->getType(), Phi.ExpansionPoint);
+
+ LLVM_DEBUG(dbgs() << "rewriteLoopExitValues: AfterLoopVal = " << *ExitVal
+ << '\n'
+ << " LoopVal = " << *(Phi.ExpansionPoint) << "\n");
+
+#ifndef NDEBUG
+ // If we reuse an instruction from a loop which is neither L nor one of
+ // its containing loops, we end up breaking LCSSA form for this loop by
+ // creating a new use of its instruction.
+ if (auto *ExitInsn = dyn_cast<Instruction>(ExitVal))
+ if (auto *EVL = LI->getLoopFor(ExitInsn->getParent()))
+ if (EVL != L)
+ assert(EVL->contains(L) && "LCSSA breach detected!");
+#endif
+
+ NumReplaced++;
+ Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith));
+ PN->setIncomingValue(Phi.Ith, ExitVal);
+ // It's necessary to tell ScalarEvolution about this explicitly so that
+ // it can walk the def-use list and forget all SCEVs, as it may not be
+ // watching the PHI itself. Once the new exit value is in place, there
+ // may not be a def-use connection between the loop and every instruction
+ // which got a SCEVAddRecExpr for that loop.
+ SE->forgetValue(PN);
+
+ // If this instruction is dead now, delete it. Don't do it now to avoid
+ // invalidating iterators.
+ if (isInstructionTriviallyDead(Inst, TLI))
+ DeadInsts.push_back(Inst);
+
+ // Replace PN with ExitVal if that is legal and does not break LCSSA.
+ if (PN->getNumIncomingValues() == 1 &&
+ LI->replacementPreservesLCSSAForm(PN, ExitVal)) {
+ PN->replaceAllUsesWith(ExitVal);
+ PN->eraseFromParent();
+ }
+ }
+
+ // The insertion point instruction may have been deleted; clear it out
+ // so that the rewriter doesn't trip over it later.
+ Rewriter.clearInsertPoint();
+ return NumReplaced;
+}
+
+/// Set weights for \p UnrolledLoop and \p RemainderLoop based on weights for
+/// \p OrigLoop.
+void llvm::setProfileInfoAfterUnrolling(Loop *OrigLoop, Loop *UnrolledLoop,
+ Loop *RemainderLoop, uint64_t UF) {
+ assert(UF > 0 && "Zero unrolled factor is not supported");
+ assert(UnrolledLoop != RemainderLoop &&
+ "Unrolled and Remainder loops are expected to distinct");
+
+ // Get number of iterations in the original scalar loop.
+ unsigned OrigLoopInvocationWeight = 0;
+ std::optional<unsigned> OrigAverageTripCount =
+ getLoopEstimatedTripCount(OrigLoop, &OrigLoopInvocationWeight);
+ if (!OrigAverageTripCount)
+ return;
+
+ // Calculate number of iterations in unrolled loop.
+ unsigned UnrolledAverageTripCount = *OrigAverageTripCount / UF;
+ // Calculate number of iterations for remainder loop.
+ unsigned RemainderAverageTripCount = *OrigAverageTripCount % UF;
+
+ setLoopEstimatedTripCount(UnrolledLoop, UnrolledAverageTripCount,
+ OrigLoopInvocationWeight);
+ setLoopEstimatedTripCount(RemainderLoop, RemainderAverageTripCount,
+ OrigLoopInvocationWeight);
+}
+
+/// Utility that implements appending of loops onto a worklist.
+/// Loops are added in preorder (analogous for reverse postorder for trees),
+/// and the worklist is processed LIFO.
+template <typename RangeT>
+void llvm::appendReversedLoopsToWorklist(
+ RangeT &&Loops, SmallPriorityWorklist<Loop *, 4> &Worklist) {
+ // We use an internal worklist to build up the preorder traversal without
+ // recursion.
+ SmallVector<Loop *, 4> PreOrderLoops, PreOrderWorklist;
+
+ // We walk the initial sequence of loops in reverse because we generally want
+ // to visit defs before uses and the worklist is LIFO.
+ for (Loop *RootL : Loops) {
+ assert(PreOrderLoops.empty() && "Must start with an empty preorder walk.");
+ assert(PreOrderWorklist.empty() &&
+ "Must start with an empty preorder walk worklist.");
+ PreOrderWorklist.push_back(RootL);
+ do {
+ Loop *L = PreOrderWorklist.pop_back_val();
+ PreOrderWorklist.append(L->begin(), L->end());
+ PreOrderLoops.push_back(L);
+ } while (!PreOrderWorklist.empty());
+
+ Worklist.insert(std::move(PreOrderLoops));
+ PreOrderLoops.clear();
+ }
+}
+
+template <typename RangeT>
+void llvm::appendLoopsToWorklist(RangeT &&Loops,
+ SmallPriorityWorklist<Loop *, 4> &Worklist) {
+ appendReversedLoopsToWorklist(reverse(Loops), Worklist);
+}
+
+template void llvm::appendLoopsToWorklist<ArrayRef<Loop *> &>(
+ ArrayRef<Loop *> &Loops, SmallPriorityWorklist<Loop *, 4> &Worklist);
+
+template void
+llvm::appendLoopsToWorklist<Loop &>(Loop &L,
+ SmallPriorityWorklist<Loop *, 4> &Worklist);
+
+void llvm::appendLoopsToWorklist(LoopInfo &LI,
+ SmallPriorityWorklist<Loop *, 4> &Worklist) {
+ appendReversedLoopsToWorklist(LI, Worklist);
+}
+
+Loop *llvm::cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM,
+ LoopInfo *LI, LPPassManager *LPM) {
+ Loop &New = *LI->AllocateLoop();
+ if (PL)
+ PL->addChildLoop(&New);
+ else
+ LI->addTopLevelLoop(&New);
+
+ if (LPM)
+ LPM->addLoop(New);
+
+ // Add all of the blocks in L to the new loop.
+ for (BasicBlock *BB : L->blocks())
+ if (LI->getLoopFor(BB) == L)
+ New.addBasicBlockToLoop(cast<BasicBlock>(VM[BB]), *LI);
+
+ // Add all of the subloops to the new loop.
+ for (Loop *I : *L)
+ cloneLoop(I, &New, VM, LI, LPM);
+
+ return &New;
+}
+
+/// IR Values for the lower and upper bounds of a pointer evolution. We
+/// need to use value-handles because SCEV expansion can invalidate previously
+/// expanded values. Thus expansion of a pointer can invalidate the bounds for
+/// a previous one.
+struct PointerBounds {
+ TrackingVH<Value> Start;
+ TrackingVH<Value> End;
+};
+
+/// Expand code for the lower and upper bound of the pointer group \p CG
+/// in \p TheLoop. \return the values for the bounds.
+static PointerBounds expandBounds(const RuntimeCheckingPtrGroup *CG,
+ Loop *TheLoop, Instruction *Loc,
+ SCEVExpander &Exp) {
+ LLVMContext &Ctx = Loc->getContext();
+ Type *PtrArithTy = Type::getInt8PtrTy(Ctx, CG->AddressSpace);
+
+ Value *Start = nullptr, *End = nullptr;
+ LLVM_DEBUG(dbgs() << "LAA: Adding RT check for range:\n");
+ Start = Exp.expandCodeFor(CG->Low, PtrArithTy, Loc);
+ End = Exp.expandCodeFor(CG->High, PtrArithTy, Loc);
+ if (CG->NeedsFreeze) {
+ IRBuilder<> Builder(Loc);
+ Start = Builder.CreateFreeze(Start, Start->getName() + ".fr");
+ End = Builder.CreateFreeze(End, End->getName() + ".fr");
+ }
+ LLVM_DEBUG(dbgs() << "Start: " << *CG->Low << " End: " << *CG->High << "\n");
+ return {Start, End};
+}
+
+/// Turns a collection of checks into a collection of expanded upper and
+/// lower bounds for both pointers in the check.
+static SmallVector<std::pair<PointerBounds, PointerBounds>, 4>
+expandBounds(const SmallVectorImpl<RuntimePointerCheck> &PointerChecks, Loop *L,
+ Instruction *Loc, SCEVExpander &Exp) {
+ SmallVector<std::pair<PointerBounds, PointerBounds>, 4> ChecksWithBounds;
+
+ // Here we're relying on the SCEV Expander's cache to only emit code for the
+ // same bounds once.
+ transform(PointerChecks, std::back_inserter(ChecksWithBounds),
+ [&](const RuntimePointerCheck &Check) {
+ PointerBounds First = expandBounds(Check.first, L, Loc, Exp),
+ Second = expandBounds(Check.second, L, Loc, Exp);
+ return std::make_pair(First, Second);
+ });
+
+ return ChecksWithBounds;
+}
+
+Value *llvm::addRuntimeChecks(
+ Instruction *Loc, Loop *TheLoop,
+ const SmallVectorImpl<RuntimePointerCheck> &PointerChecks,
+ SCEVExpander &Exp) {
+ // TODO: Move noalias annotation code from LoopVersioning here and share with LV if possible.
+ // TODO: Pass RtPtrChecking instead of PointerChecks and SE separately, if possible
+ auto ExpandedChecks = expandBounds(PointerChecks, TheLoop, Loc, Exp);
+
+ LLVMContext &Ctx = Loc->getContext();
+ IRBuilder<InstSimplifyFolder> ChkBuilder(Ctx,
+ Loc->getModule()->getDataLayout());
+ ChkBuilder.SetInsertPoint(Loc);
+ // Our instructions might fold to a constant.
+ Value *MemoryRuntimeCheck = nullptr;
+
+ for (const auto &Check : ExpandedChecks) {
+ const PointerBounds &A = Check.first, &B = Check.second;
+ // Check if two pointers (A and B) conflict where conflict is computed as:
+ // start(A) <= end(B) && start(B) <= end(A)
+ unsigned AS0 = A.Start->getType()->getPointerAddressSpace();
+ unsigned AS1 = B.Start->getType()->getPointerAddressSpace();
+
+ assert((AS0 == B.End->getType()->getPointerAddressSpace()) &&
+ (AS1 == A.End->getType()->getPointerAddressSpace()) &&
+ "Trying to bounds check pointers with different address spaces");
+
+ Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0);
+ Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1);
+
+ Value *Start0 = ChkBuilder.CreateBitCast(A.Start, PtrArithTy0, "bc");
+ Value *Start1 = ChkBuilder.CreateBitCast(B.Start, PtrArithTy1, "bc");
+ Value *End0 = ChkBuilder.CreateBitCast(A.End, PtrArithTy1, "bc");
+ Value *End1 = ChkBuilder.CreateBitCast(B.End, PtrArithTy0, "bc");
+
+ // [A|B].Start points to the first accessed byte under base [A|B].
+ // [A|B].End points to the last accessed byte, plus one.
+ // There is no conflict when the intervals are disjoint:
+ // NoConflict = (B.Start >= A.End) || (A.Start >= B.End)
+ //
+ // bound0 = (B.Start < A.End)
+ // bound1 = (A.Start < B.End)
+ // IsConflict = bound0 & bound1
+ Value *Cmp0 = ChkBuilder.CreateICmpULT(Start0, End1, "bound0");
+ Value *Cmp1 = ChkBuilder.CreateICmpULT(Start1, End0, "bound1");
+ Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict");
+ if (MemoryRuntimeCheck) {
+ IsConflict =
+ ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx");
+ }
+ MemoryRuntimeCheck = IsConflict;
+ }
+
+ return MemoryRuntimeCheck;
+}
+
+Value *llvm::addDiffRuntimeChecks(
+ Instruction *Loc, ArrayRef<PointerDiffInfo> Checks, SCEVExpander &Expander,
+ function_ref<Value *(IRBuilderBase &, unsigned)> GetVF, unsigned IC) {
+
+ LLVMContext &Ctx = Loc->getContext();
+ IRBuilder<InstSimplifyFolder> ChkBuilder(Ctx,
+ Loc->getModule()->getDataLayout());
+ ChkBuilder.SetInsertPoint(Loc);
+ // Our instructions might fold to a constant.
+ Value *MemoryRuntimeCheck = nullptr;
+
+ for (const auto &C : Checks) {
+ Type *Ty = C.SinkStart->getType();
+ // Compute VF * IC * AccessSize.
+ auto *VFTimesUFTimesSize =
+ ChkBuilder.CreateMul(GetVF(ChkBuilder, Ty->getScalarSizeInBits()),
+ ConstantInt::get(Ty, IC * C.AccessSize));
+ Value *Sink = Expander.expandCodeFor(C.SinkStart, Ty, Loc);
+ Value *Src = Expander.expandCodeFor(C.SrcStart, Ty, Loc);
+ if (C.NeedsFreeze) {
+ IRBuilder<> Builder(Loc);
+ Sink = Builder.CreateFreeze(Sink, Sink->getName() + ".fr");
+ Src = Builder.CreateFreeze(Src, Src->getName() + ".fr");
+ }
+ Value *Diff = ChkBuilder.CreateSub(Sink, Src);
+ Value *IsConflict =
+ ChkBuilder.CreateICmpULT(Diff, VFTimesUFTimesSize, "diff.check");
+
+ if (MemoryRuntimeCheck) {
+ IsConflict =
+ ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx");
+ }
+ MemoryRuntimeCheck = IsConflict;
+ }
+
+ return MemoryRuntimeCheck;
+}
+
+std::optional<IVConditionInfo>
+llvm::hasPartialIVCondition(const Loop &L, unsigned MSSAThreshold,
+ const MemorySSA &MSSA, AAResults &AA) {
+ auto *TI = dyn_cast<BranchInst>(L.getHeader()->getTerminator());
+ if (!TI || !TI->isConditional())
+ return {};
+
+ auto *CondI = dyn_cast<CmpInst>(TI->getCondition());
+ // The case with the condition outside the loop should already be handled
+ // earlier.
+ if (!CondI || !L.contains(CondI))
+ return {};
+
+ SmallVector<Instruction *> InstToDuplicate;
+ InstToDuplicate.push_back(CondI);
+
+ SmallVector<Value *, 4> WorkList;
+ WorkList.append(CondI->op_begin(), CondI->op_end());
+
+ SmallVector<MemoryAccess *, 4> AccessesToCheck;
+ SmallVector<MemoryLocation, 4> AccessedLocs;
+ while (!WorkList.empty()) {
+ Instruction *I = dyn_cast<Instruction>(WorkList.pop_back_val());
+ if (!I || !L.contains(I))
+ continue;
+
+ // TODO: support additional instructions.
+ if (!isa<LoadInst>(I) && !isa<GetElementPtrInst>(I))
+ return {};
+
+ // Do not duplicate volatile and atomic loads.
+ if (auto *LI = dyn_cast<LoadInst>(I))
+ if (LI->isVolatile() || LI->isAtomic())
+ return {};
+
+ InstToDuplicate.push_back(I);
+ if (MemoryAccess *MA = MSSA.getMemoryAccess(I)) {
+ if (auto *MemUse = dyn_cast_or_null<MemoryUse>(MA)) {
+ // Queue the defining access to check for alias checks.
+ AccessesToCheck.push_back(MemUse->getDefiningAccess());
+ AccessedLocs.push_back(MemoryLocation::get(I));
+ } else {
+ // MemoryDefs may clobber the location or may be atomic memory
+ // operations. Bail out.
+ return {};
+ }
+ }
+ WorkList.append(I->op_begin(), I->op_end());
+ }
+
+ if (InstToDuplicate.empty())
+ return {};
+
+ SmallVector<BasicBlock *, 4> ExitingBlocks;
+ L.getExitingBlocks(ExitingBlocks);
+ auto HasNoClobbersOnPath =
+ [&L, &AA, &AccessedLocs, &ExitingBlocks, &InstToDuplicate,
+ MSSAThreshold](BasicBlock *Succ, BasicBlock *Header,
+ SmallVector<MemoryAccess *, 4> AccessesToCheck)
+ -> std::optional<IVConditionInfo> {
+ IVConditionInfo Info;
+ // First, collect all blocks in the loop that are on a patch from Succ
+ // to the header.
+ SmallVector<BasicBlock *, 4> WorkList;
+ WorkList.push_back(Succ);
+ WorkList.push_back(Header);
+ SmallPtrSet<BasicBlock *, 4> Seen;
+ Seen.insert(Header);
+ Info.PathIsNoop &=
+ all_of(*Header, [](Instruction &I) { return !I.mayHaveSideEffects(); });
+
+ while (!WorkList.empty()) {
+ BasicBlock *Current = WorkList.pop_back_val();
+ if (!L.contains(Current))
+ continue;
+ const auto &SeenIns = Seen.insert(Current);
+ if (!SeenIns.second)
+ continue;
+
+ Info.PathIsNoop &= all_of(
+ *Current, [](Instruction &I) { return !I.mayHaveSideEffects(); });
+ WorkList.append(succ_begin(Current), succ_end(Current));
+ }
+
+ // Require at least 2 blocks on a path through the loop. This skips
+ // paths that directly exit the loop.
+ if (Seen.size() < 2)
+ return {};
+
+ // Next, check if there are any MemoryDefs that are on the path through
+ // the loop (in the Seen set) and they may-alias any of the locations in
+ // AccessedLocs. If that is the case, they may modify the condition and
+ // partial unswitching is not possible.
+ SmallPtrSet<MemoryAccess *, 4> SeenAccesses;
+ while (!AccessesToCheck.empty()) {
+ MemoryAccess *Current = AccessesToCheck.pop_back_val();
+ auto SeenI = SeenAccesses.insert(Current);
+ if (!SeenI.second || !Seen.contains(Current->getBlock()))
+ continue;
+
+ // Bail out if exceeded the threshold.
+ if (SeenAccesses.size() >= MSSAThreshold)
+ return {};
+
+ // MemoryUse are read-only accesses.
+ if (isa<MemoryUse>(Current))
+ continue;
+
+ // For a MemoryDef, check if is aliases any of the location feeding
+ // the original condition.
+ if (auto *CurrentDef = dyn_cast<MemoryDef>(Current)) {
+ if (any_of(AccessedLocs, [&AA, CurrentDef](MemoryLocation &Loc) {
+ return isModSet(
+ AA.getModRefInfo(CurrentDef->getMemoryInst(), Loc));
+ }))
+ return {};
+ }
+
+ for (Use &U : Current->uses())
+ AccessesToCheck.push_back(cast<MemoryAccess>(U.getUser()));
+ }
+
+ // We could also allow loops with known trip counts without mustprogress,
+ // but ScalarEvolution may not be available.
+ Info.PathIsNoop &= isMustProgress(&L);
+
+ // If the path is considered a no-op so far, check if it reaches a
+ // single exit block without any phis. This ensures no values from the
+ // loop are used outside of the loop.
+ if (Info.PathIsNoop) {
+ for (auto *Exiting : ExitingBlocks) {
+ if (!Seen.contains(Exiting))
+ continue;
+ for (auto *Succ : successors(Exiting)) {
+ if (L.contains(Succ))
+ continue;
+
+ Info.PathIsNoop &= Succ->phis().empty() &&
+ (!Info.ExitForPath || Info.ExitForPath == Succ);
+ if (!Info.PathIsNoop)
+ break;
+ assert((!Info.ExitForPath || Info.ExitForPath == Succ) &&
+ "cannot have multiple exit blocks");
+ Info.ExitForPath = Succ;
+ }
+ }
+ }
+ if (!Info.ExitForPath)
+ Info.PathIsNoop = false;
+
+ Info.InstToDuplicate = InstToDuplicate;
+ return Info;
+ };
+
+ // If we branch to the same successor, partial unswitching will not be
+ // beneficial.
+ if (TI->getSuccessor(0) == TI->getSuccessor(1))
+ return {};
+
+ if (auto Info = HasNoClobbersOnPath(TI->getSuccessor(0), L.getHeader(),
+ AccessesToCheck)) {
+ Info->KnownValue = ConstantInt::getTrue(TI->getContext());
+ return Info;
+ }
+ if (auto Info = HasNoClobbersOnPath(TI->getSuccessor(1), L.getHeader(),
+ AccessesToCheck)) {
+ Info->KnownValue = ConstantInt::getFalse(TI->getContext());
+ return Info;
+ }
+
+ return {};
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/LoopVersioning.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/LoopVersioning.cpp
new file mode 100644
index 0000000000..17e71cf5a6
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/LoopVersioning.cpp
@@ -0,0 +1,356 @@
+//===- LoopVersioning.cpp - Utility to version a loop ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a utility class to perform loop versioning. The versioned
+// loop speculates that otherwise may-aliasing memory accesses don't overlap and
+// emits checks to prove this.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/LoopVersioning.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/InstSimplifyFolder.h"
+#include "llvm/Analysis/LoopAccessAnalysis.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+ AnnotateNoAlias("loop-version-annotate-no-alias", cl::init(true),
+ cl::Hidden,
+ cl::desc("Add no-alias annotation for instructions that "
+ "are disambiguated by memchecks"));
+
+LoopVersioning::LoopVersioning(const LoopAccessInfo &LAI,
+ ArrayRef<RuntimePointerCheck> Checks, Loop *L,
+ LoopInfo *LI, DominatorTree *DT,
+ ScalarEvolution *SE)
+ : VersionedLoop(L), AliasChecks(Checks.begin(), Checks.end()),
+ Preds(LAI.getPSE().getPredicate()), LAI(LAI), LI(LI), DT(DT),
+ SE(SE) {
+}
+
+void LoopVersioning::versionLoop(
+ const SmallVectorImpl<Instruction *> &DefsUsedOutside) {
+ assert(VersionedLoop->getUniqueExitBlock() && "No single exit block");
+ assert(VersionedLoop->isLoopSimplifyForm() &&
+ "Loop is not in loop-simplify form");
+
+ Value *MemRuntimeCheck;
+ Value *SCEVRuntimeCheck;
+ Value *RuntimeCheck = nullptr;
+
+ // Add the memcheck in the original preheader (this is empty initially).
+ BasicBlock *RuntimeCheckBB = VersionedLoop->getLoopPreheader();
+ const auto &RtPtrChecking = *LAI.getRuntimePointerChecking();
+
+ SCEVExpander Exp2(*RtPtrChecking.getSE(),
+ VersionedLoop->getHeader()->getModule()->getDataLayout(),
+ "induction");
+ MemRuntimeCheck = addRuntimeChecks(RuntimeCheckBB->getTerminator(),
+ VersionedLoop, AliasChecks, Exp2);
+
+ SCEVExpander Exp(*SE, RuntimeCheckBB->getModule()->getDataLayout(),
+ "scev.check");
+ SCEVRuntimeCheck =
+ Exp.expandCodeForPredicate(&Preds, RuntimeCheckBB->getTerminator());
+
+ IRBuilder<InstSimplifyFolder> Builder(
+ RuntimeCheckBB->getContext(),
+ InstSimplifyFolder(RuntimeCheckBB->getModule()->getDataLayout()));
+ if (MemRuntimeCheck && SCEVRuntimeCheck) {
+ Builder.SetInsertPoint(RuntimeCheckBB->getTerminator());
+ RuntimeCheck =
+ Builder.CreateOr(MemRuntimeCheck, SCEVRuntimeCheck, "lver.safe");
+ } else
+ RuntimeCheck = MemRuntimeCheck ? MemRuntimeCheck : SCEVRuntimeCheck;
+
+ assert(RuntimeCheck && "called even though we don't need "
+ "any runtime checks");
+
+ // Rename the block to make the IR more readable.
+ RuntimeCheckBB->setName(VersionedLoop->getHeader()->getName() +
+ ".lver.check");
+
+ // Create empty preheader for the loop (and after cloning for the
+ // non-versioned loop).
+ BasicBlock *PH =
+ SplitBlock(RuntimeCheckBB, RuntimeCheckBB->getTerminator(), DT, LI,
+ nullptr, VersionedLoop->getHeader()->getName() + ".ph");
+
+ // Clone the loop including the preheader.
+ //
+ // FIXME: This does not currently preserve SimplifyLoop because the exit
+ // block is a join between the two loops.
+ SmallVector<BasicBlock *, 8> NonVersionedLoopBlocks;
+ NonVersionedLoop =
+ cloneLoopWithPreheader(PH, RuntimeCheckBB, VersionedLoop, VMap,
+ ".lver.orig", LI, DT, NonVersionedLoopBlocks);
+ remapInstructionsInBlocks(NonVersionedLoopBlocks, VMap);
+
+ // Insert the conditional branch based on the result of the memchecks.
+ Instruction *OrigTerm = RuntimeCheckBB->getTerminator();
+ Builder.SetInsertPoint(OrigTerm);
+ Builder.CreateCondBr(RuntimeCheck, NonVersionedLoop->getLoopPreheader(),
+ VersionedLoop->getLoopPreheader());
+ OrigTerm->eraseFromParent();
+
+ // The loops merge in the original exit block. This is now dominated by the
+ // memchecking block.
+ DT->changeImmediateDominator(VersionedLoop->getExitBlock(), RuntimeCheckBB);
+
+ // Adds the necessary PHI nodes for the versioned loops based on the
+ // loop-defined values used outside of the loop.
+ addPHINodes(DefsUsedOutside);
+ formDedicatedExitBlocks(NonVersionedLoop, DT, LI, nullptr, true);
+ formDedicatedExitBlocks(VersionedLoop, DT, LI, nullptr, true);
+ assert(NonVersionedLoop->isLoopSimplifyForm() &&
+ VersionedLoop->isLoopSimplifyForm() &&
+ "The versioned loops should be in simplify form.");
+}
+
+void LoopVersioning::addPHINodes(
+ const SmallVectorImpl<Instruction *> &DefsUsedOutside) {
+ BasicBlock *PHIBlock = VersionedLoop->getExitBlock();
+ assert(PHIBlock && "No single successor to loop exit block");
+ PHINode *PN;
+
+ // First add a single-operand PHI for each DefsUsedOutside if one does not
+ // exists yet.
+ for (auto *Inst : DefsUsedOutside) {
+ // See if we have a single-operand PHI with the value defined by the
+ // original loop.
+ for (auto I = PHIBlock->begin(); (PN = dyn_cast<PHINode>(I)); ++I) {
+ if (PN->getIncomingValue(0) == Inst) {
+ SE->forgetValue(PN);
+ break;
+ }
+ }
+ // If not create it.
+ if (!PN) {
+ PN = PHINode::Create(Inst->getType(), 2, Inst->getName() + ".lver",
+ &PHIBlock->front());
+ SmallVector<User*, 8> UsersToUpdate;
+ for (User *U : Inst->users())
+ if (!VersionedLoop->contains(cast<Instruction>(U)->getParent()))
+ UsersToUpdate.push_back(U);
+ for (User *U : UsersToUpdate)
+ U->replaceUsesOfWith(Inst, PN);
+ PN->addIncoming(Inst, VersionedLoop->getExitingBlock());
+ }
+ }
+
+ // Then for each PHI add the operand for the edge from the cloned loop.
+ for (auto I = PHIBlock->begin(); (PN = dyn_cast<PHINode>(I)); ++I) {
+ assert(PN->getNumOperands() == 1 &&
+ "Exit block should only have on predecessor");
+
+ // If the definition was cloned used that otherwise use the same value.
+ Value *ClonedValue = PN->getIncomingValue(0);
+ auto Mapped = VMap.find(ClonedValue);
+ if (Mapped != VMap.end())
+ ClonedValue = Mapped->second;
+
+ PN->addIncoming(ClonedValue, NonVersionedLoop->getExitingBlock());
+ }
+}
+
+void LoopVersioning::prepareNoAliasMetadata() {
+ // We need to turn the no-alias relation between pointer checking groups into
+ // no-aliasing annotations between instructions.
+ //
+ // We accomplish this by mapping each pointer checking group (a set of
+ // pointers memchecked together) to an alias scope and then also mapping each
+ // group to the list of scopes it can't alias.
+
+ const RuntimePointerChecking *RtPtrChecking = LAI.getRuntimePointerChecking();
+ LLVMContext &Context = VersionedLoop->getHeader()->getContext();
+
+ // First allocate an aliasing scope for each pointer checking group.
+ //
+ // While traversing through the checking groups in the loop, also create a
+ // reverse map from pointers to the pointer checking group they were assigned
+ // to.
+ MDBuilder MDB(Context);
+ MDNode *Domain = MDB.createAnonymousAliasScopeDomain("LVerDomain");
+
+ for (const auto &Group : RtPtrChecking->CheckingGroups) {
+ GroupToScope[&Group] = MDB.createAnonymousAliasScope(Domain);
+
+ for (unsigned PtrIdx : Group.Members)
+ PtrToGroup[RtPtrChecking->getPointerInfo(PtrIdx).PointerValue] = &Group;
+ }
+
+ // Go through the checks and for each pointer group, collect the scopes for
+ // each non-aliasing pointer group.
+ DenseMap<const RuntimeCheckingPtrGroup *, SmallVector<Metadata *, 4>>
+ GroupToNonAliasingScopes;
+
+ for (const auto &Check : AliasChecks)
+ GroupToNonAliasingScopes[Check.first].push_back(GroupToScope[Check.second]);
+
+ // Finally, transform the above to actually map to scope list which is what
+ // the metadata uses.
+
+ for (auto Pair : GroupToNonAliasingScopes)
+ GroupToNonAliasingScopeList[Pair.first] = MDNode::get(Context, Pair.second);
+}
+
+void LoopVersioning::annotateLoopWithNoAlias() {
+ if (!AnnotateNoAlias)
+ return;
+
+ // First prepare the maps.
+ prepareNoAliasMetadata();
+
+ // Add the scope and no-alias metadata to the instructions.
+ for (Instruction *I : LAI.getDepChecker().getMemoryInstructions()) {
+ annotateInstWithNoAlias(I);
+ }
+}
+
+void LoopVersioning::annotateInstWithNoAlias(Instruction *VersionedInst,
+ const Instruction *OrigInst) {
+ if (!AnnotateNoAlias)
+ return;
+
+ LLVMContext &Context = VersionedLoop->getHeader()->getContext();
+ const Value *Ptr = isa<LoadInst>(OrigInst)
+ ? cast<LoadInst>(OrigInst)->getPointerOperand()
+ : cast<StoreInst>(OrigInst)->getPointerOperand();
+
+ // Find the group for the pointer and then add the scope metadata.
+ auto Group = PtrToGroup.find(Ptr);
+ if (Group != PtrToGroup.end()) {
+ VersionedInst->setMetadata(
+ LLVMContext::MD_alias_scope,
+ MDNode::concatenate(
+ VersionedInst->getMetadata(LLVMContext::MD_alias_scope),
+ MDNode::get(Context, GroupToScope[Group->second])));
+
+ // Add the no-alias metadata.
+ auto NonAliasingScopeList = GroupToNonAliasingScopeList.find(Group->second);
+ if (NonAliasingScopeList != GroupToNonAliasingScopeList.end())
+ VersionedInst->setMetadata(
+ LLVMContext::MD_noalias,
+ MDNode::concatenate(
+ VersionedInst->getMetadata(LLVMContext::MD_noalias),
+ NonAliasingScopeList->second));
+ }
+}
+
+namespace {
+bool runImpl(LoopInfo *LI, LoopAccessInfoManager &LAIs, DominatorTree *DT,
+ ScalarEvolution *SE) {
+ // Build up a worklist of inner-loops to version. This is necessary as the
+ // act of versioning a loop creates new loops and can invalidate iterators
+ // across the loops.
+ SmallVector<Loop *, 8> Worklist;
+
+ for (Loop *TopLevelLoop : *LI)
+ for (Loop *L : depth_first(TopLevelLoop))
+ // We only handle inner-most loops.
+ if (L->isInnermost())
+ Worklist.push_back(L);
+
+ // Now walk the identified inner loops.
+ bool Changed = false;
+ for (Loop *L : Worklist) {
+ if (!L->isLoopSimplifyForm() || !L->isRotatedForm() ||
+ !L->getExitingBlock())
+ continue;
+ const LoopAccessInfo &LAI = LAIs.getInfo(*L);
+ if (!LAI.hasConvergentOp() &&
+ (LAI.getNumRuntimePointerChecks() ||
+ !LAI.getPSE().getPredicate().isAlwaysTrue())) {
+ LoopVersioning LVer(LAI, LAI.getRuntimePointerChecking()->getChecks(), L,
+ LI, DT, SE);
+ LVer.versionLoop();
+ LVer.annotateLoopWithNoAlias();
+ Changed = true;
+ LAIs.clear();
+ }
+ }
+
+ return Changed;
+}
+
+/// Also expose this is a pass. Currently this is only used for
+/// unit-testing. It adds all memchecks necessary to remove all may-aliasing
+/// array accesses from the loop.
+class LoopVersioningLegacyPass : public FunctionPass {
+public:
+ LoopVersioningLegacyPass() : FunctionPass(ID) {
+ initializeLoopVersioningLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override {
+ auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ auto &LAIs = getAnalysis<LoopAccessLegacyAnalysis>().getLAIs();
+ auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+
+ return runImpl(LI, LAIs, DT, SE);
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
+ AU.addRequired<LoopAccessLegacyAnalysis>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ }
+
+ static char ID;
+};
+}
+
+#define LVER_OPTION "loop-versioning"
+#define DEBUG_TYPE LVER_OPTION
+
+char LoopVersioningLegacyPass::ID;
+static const char LVer_name[] = "Loop Versioning";
+
+INITIALIZE_PASS_BEGIN(LoopVersioningLegacyPass, LVER_OPTION, LVer_name, false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopAccessLegacyAnalysis)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_END(LoopVersioningLegacyPass, LVER_OPTION, LVer_name, false,
+ false)
+
+namespace llvm {
+FunctionPass *createLoopVersioningLegacyPass() {
+ return new LoopVersioningLegacyPass();
+}
+
+PreservedAnalyses LoopVersioningPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
+ auto &LI = AM.getResult<LoopAnalysis>(F);
+ LoopAccessInfoManager &LAIs = AM.getResult<LoopAccessAnalysis>(F);
+ auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+
+ if (runImpl(&LI, LAIs, &DT, &SE))
+ return PreservedAnalyses::none();
+ return PreservedAnalyses::all();
+}
+} // namespace llvm
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/LowerAtomic.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/LowerAtomic.cpp
new file mode 100644
index 0000000000..b6f40de0da
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/LowerAtomic.cpp
@@ -0,0 +1,114 @@
+//===- LowerAtomic.cpp - Lower atomic intrinsics --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass lowers atomic intrinsics to non-atomic form for use in a known
+// non-preemptible environment.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/LowerAtomic.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "loweratomic"
+
+bool llvm::lowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI) {
+ IRBuilder<> Builder(CXI);
+ Value *Ptr = CXI->getPointerOperand();
+ Value *Cmp = CXI->getCompareOperand();
+ Value *Val = CXI->getNewValOperand();
+
+ LoadInst *Orig = Builder.CreateLoad(Val->getType(), Ptr);
+ Value *Equal = Builder.CreateICmpEQ(Orig, Cmp);
+ Value *Res = Builder.CreateSelect(Equal, Val, Orig);
+ Builder.CreateStore(Res, Ptr);
+
+ Res = Builder.CreateInsertValue(PoisonValue::get(CXI->getType()), Orig, 0);
+ Res = Builder.CreateInsertValue(Res, Equal, 1);
+
+ CXI->replaceAllUsesWith(Res);
+ CXI->eraseFromParent();
+ return true;
+}
+
+Value *llvm::buildAtomicRMWValue(AtomicRMWInst::BinOp Op,
+ IRBuilderBase &Builder, Value *Loaded,
+ Value *Val) {
+ Value *NewVal;
+ switch (Op) {
+ case AtomicRMWInst::Xchg:
+ return Val;
+ case AtomicRMWInst::Add:
+ return Builder.CreateAdd(Loaded, Val, "new");
+ case AtomicRMWInst::Sub:
+ return Builder.CreateSub(Loaded, Val, "new");
+ case AtomicRMWInst::And:
+ return Builder.CreateAnd(Loaded, Val, "new");
+ case AtomicRMWInst::Nand:
+ return Builder.CreateNot(Builder.CreateAnd(Loaded, Val), "new");
+ case AtomicRMWInst::Or:
+ return Builder.CreateOr(Loaded, Val, "new");
+ case AtomicRMWInst::Xor:
+ return Builder.CreateXor(Loaded, Val, "new");
+ case AtomicRMWInst::Max:
+ NewVal = Builder.CreateICmpSGT(Loaded, Val);
+ return Builder.CreateSelect(NewVal, Loaded, Val, "new");
+ case AtomicRMWInst::Min:
+ NewVal = Builder.CreateICmpSLE(Loaded, Val);
+ return Builder.CreateSelect(NewVal, Loaded, Val, "new");
+ case AtomicRMWInst::UMax:
+ NewVal = Builder.CreateICmpUGT(Loaded, Val);
+ return Builder.CreateSelect(NewVal, Loaded, Val, "new");
+ case AtomicRMWInst::UMin:
+ NewVal = Builder.CreateICmpULE(Loaded, Val);
+ return Builder.CreateSelect(NewVal, Loaded, Val, "new");
+ case AtomicRMWInst::FAdd:
+ return Builder.CreateFAdd(Loaded, Val, "new");
+ case AtomicRMWInst::FSub:
+ return Builder.CreateFSub(Loaded, Val, "new");
+ case AtomicRMWInst::FMax:
+ return Builder.CreateMaxNum(Loaded, Val);
+ case AtomicRMWInst::FMin:
+ return Builder.CreateMinNum(Loaded, Val);
+ case AtomicRMWInst::UIncWrap: {
+ Constant *One = ConstantInt::get(Loaded->getType(), 1);
+ Value *Inc = Builder.CreateAdd(Loaded, One);
+ Value *Cmp = Builder.CreateICmpUGE(Loaded, Val);
+ Constant *Zero = ConstantInt::get(Loaded->getType(), 0);
+ return Builder.CreateSelect(Cmp, Zero, Inc, "new");
+ }
+ case AtomicRMWInst::UDecWrap: {
+ Constant *Zero = ConstantInt::get(Loaded->getType(), 0);
+ Constant *One = ConstantInt::get(Loaded->getType(), 1);
+
+ Value *Dec = Builder.CreateSub(Loaded, One);
+ Value *CmpEq0 = Builder.CreateICmpEQ(Loaded, Zero);
+ Value *CmpOldGtVal = Builder.CreateICmpUGT(Loaded, Val);
+ Value *Or = Builder.CreateOr(CmpEq0, CmpOldGtVal);
+ return Builder.CreateSelect(Or, Val, Dec, "new");
+ }
+ default:
+ llvm_unreachable("Unknown atomic op");
+ }
+}
+
+bool llvm::lowerAtomicRMWInst(AtomicRMWInst *RMWI) {
+ IRBuilder<> Builder(RMWI);
+ Value *Ptr = RMWI->getPointerOperand();
+ Value *Val = RMWI->getValOperand();
+
+ LoadInst *Orig = Builder.CreateLoad(Val->getType(), Ptr);
+ Value *Res = buildAtomicRMWValue(RMWI->getOperation(), Builder, Orig, Val);
+ Builder.CreateStore(Res, Ptr);
+ RMWI->replaceAllUsesWith(Orig);
+ RMWI->eraseFromParent();
+ return true;
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/LowerGlobalDtors.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/LowerGlobalDtors.cpp
new file mode 100644
index 0000000000..195c274ff1
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/LowerGlobalDtors.cpp
@@ -0,0 +1,221 @@
+//===-- LowerGlobalDtors.cpp - Lower @llvm.global_dtors -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Lower @llvm.global_dtors.
+///
+/// Implement @llvm.global_dtors by creating wrapper functions that are
+/// registered in @llvm.global_ctors and which contain a call to
+/// `__cxa_atexit` to register their destructor functions.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/LowerGlobalDtors.h"
+
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include <map>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "lower-global-dtors"
+
+namespace {
+class LowerGlobalDtorsLegacyPass final : public ModulePass {
+ StringRef getPassName() const override {
+ return "Lower @llvm.global_dtors via `__cxa_atexit`";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ ModulePass::getAnalysisUsage(AU);
+ }
+
+ bool runOnModule(Module &M) override;
+
+public:
+ static char ID;
+ LowerGlobalDtorsLegacyPass() : ModulePass(ID) {
+ initializeLowerGlobalDtorsLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+};
+} // End anonymous namespace
+
+char LowerGlobalDtorsLegacyPass::ID = 0;
+INITIALIZE_PASS(LowerGlobalDtorsLegacyPass, DEBUG_TYPE,
+ "Lower @llvm.global_dtors via `__cxa_atexit`", false, false)
+
+ModulePass *llvm::createLowerGlobalDtorsLegacyPass() {
+ return new LowerGlobalDtorsLegacyPass();
+}
+
+static bool runImpl(Module &M);
+bool LowerGlobalDtorsLegacyPass::runOnModule(Module &M) { return runImpl(M); }
+
+PreservedAnalyses LowerGlobalDtorsPass::run(Module &M,
+ ModuleAnalysisManager &AM) {
+ bool Changed = runImpl(M);
+ if (!Changed)
+ return PreservedAnalyses::all();
+
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+}
+
+static bool runImpl(Module &M) {
+ GlobalVariable *GV = M.getGlobalVariable("llvm.global_dtors");
+ if (!GV || !GV->hasInitializer())
+ return false;
+
+ const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
+ if (!InitList)
+ return false;
+
+ // Validate @llvm.global_dtor's type.
+ auto *ETy = dyn_cast<StructType>(InitList->getType()->getElementType());
+ if (!ETy || ETy->getNumElements() != 3 ||
+ !ETy->getTypeAtIndex(0U)->isIntegerTy() ||
+ !ETy->getTypeAtIndex(1U)->isPointerTy() ||
+ !ETy->getTypeAtIndex(2U)->isPointerTy())
+ return false; // Not (int, ptr, ptr).
+
+ // Collect the contents of @llvm.global_dtors, ordered by priority. Within a
+ // priority, sequences of destructors with the same associated object are
+ // recorded so that we can register them as a group.
+ std::map<
+ uint16_t,
+ std::vector<std::pair<Constant *, std::vector<Constant *>>>
+ > DtorFuncs;
+ for (Value *O : InitList->operands()) {
+ auto *CS = dyn_cast<ConstantStruct>(O);
+ if (!CS)
+ continue; // Malformed.
+
+ auto *Priority = dyn_cast<ConstantInt>(CS->getOperand(0));
+ if (!Priority)
+ continue; // Malformed.
+ uint16_t PriorityValue = Priority->getLimitedValue(UINT16_MAX);
+
+ Constant *DtorFunc = CS->getOperand(1);
+ if (DtorFunc->isNullValue())
+ break; // Found a null terminator, skip the rest.
+
+ Constant *Associated = CS->getOperand(2);
+ Associated = cast<Constant>(Associated->stripPointerCasts());
+
+ auto &AtThisPriority = DtorFuncs[PriorityValue];
+ if (AtThisPriority.empty() || AtThisPriority.back().first != Associated) {
+ std::vector<Constant *> NewList;
+ NewList.push_back(DtorFunc);
+ AtThisPriority.push_back(std::make_pair(Associated, NewList));
+ } else {
+ AtThisPriority.back().second.push_back(DtorFunc);
+ }
+ }
+ if (DtorFuncs.empty())
+ return false;
+
+ // extern "C" int __cxa_atexit(void (*f)(void *), void *p, void *d);
+ LLVMContext &C = M.getContext();
+ PointerType *VoidStar = Type::getInt8PtrTy(C);
+ Type *AtExitFuncArgs[] = {VoidStar};
+ FunctionType *AtExitFuncTy =
+ FunctionType::get(Type::getVoidTy(C), AtExitFuncArgs,
+ /*isVarArg=*/false);
+
+ FunctionCallee AtExit = M.getOrInsertFunction(
+ "__cxa_atexit",
+ FunctionType::get(Type::getInt32Ty(C),
+ {PointerType::get(AtExitFuncTy, 0), VoidStar, VoidStar},
+ /*isVarArg=*/false));
+
+ // Declare __dso_local.
+ Type *DsoHandleTy = Type::getInt8Ty(C);
+ Constant *DsoHandle = M.getOrInsertGlobal("__dso_handle", DsoHandleTy, [&] {
+ auto *GV = new GlobalVariable(M, DsoHandleTy, /*isConstant=*/true,
+ GlobalVariable::ExternalWeakLinkage, nullptr,
+ "__dso_handle");
+ GV->setVisibility(GlobalVariable::HiddenVisibility);
+ return GV;
+ });
+
+ // For each unique priority level and associated symbol, generate a function
+ // to call all the destructors at that level, and a function to register the
+ // first function with __cxa_atexit.
+ for (auto &PriorityAndMore : DtorFuncs) {
+ uint16_t Priority = PriorityAndMore.first;
+ uint64_t Id = 0;
+ auto &AtThisPriority = PriorityAndMore.second;
+ for (auto &AssociatedAndMore : AtThisPriority) {
+ Constant *Associated = AssociatedAndMore.first;
+ auto ThisId = Id++;
+
+ Function *CallDtors = Function::Create(
+ AtExitFuncTy, Function::PrivateLinkage,
+ "call_dtors" +
+ (Priority != UINT16_MAX ? (Twine(".") + Twine(Priority))
+ : Twine()) +
+ (AtThisPriority.size() > 1 ? Twine("$") + Twine(ThisId)
+ : Twine()) +
+ (!Associated->isNullValue() ? (Twine(".") + Associated->getName())
+ : Twine()),
+ &M);
+ BasicBlock *BB = BasicBlock::Create(C, "body", CallDtors);
+ FunctionType *VoidVoid = FunctionType::get(Type::getVoidTy(C),
+ /*isVarArg=*/false);
+
+ for (auto *Dtor : reverse(AssociatedAndMore.second))
+ CallInst::Create(VoidVoid, Dtor, "", BB);
+ ReturnInst::Create(C, BB);
+
+ Function *RegisterCallDtors = Function::Create(
+ VoidVoid, Function::PrivateLinkage,
+ "register_call_dtors" +
+ (Priority != UINT16_MAX ? (Twine(".") + Twine(Priority))
+ : Twine()) +
+ (AtThisPriority.size() > 1 ? Twine("$") + Twine(ThisId)
+ : Twine()) +
+ (!Associated->isNullValue() ? (Twine(".") + Associated->getName())
+ : Twine()),
+ &M);
+ BasicBlock *EntryBB = BasicBlock::Create(C, "entry", RegisterCallDtors);
+ BasicBlock *FailBB = BasicBlock::Create(C, "fail", RegisterCallDtors);
+ BasicBlock *RetBB = BasicBlock::Create(C, "return", RegisterCallDtors);
+
+ Value *Null = ConstantPointerNull::get(VoidStar);
+ Value *Args[] = {CallDtors, Null, DsoHandle};
+ Value *Res = CallInst::Create(AtExit, Args, "call", EntryBB);
+ Value *Cmp = new ICmpInst(*EntryBB, ICmpInst::ICMP_NE, Res,
+ Constant::getNullValue(Res->getType()));
+ BranchInst::Create(FailBB, RetBB, Cmp, EntryBB);
+
+ // If `__cxa_atexit` hits out-of-memory, trap, so that we don't misbehave.
+ // This should be very rare, because if the process is running out of
+ // memory before main has even started, something is wrong.
+ CallInst::Create(Intrinsic::getDeclaration(&M, Intrinsic::trap), "",
+ FailBB);
+ new UnreachableInst(C, FailBB);
+
+ ReturnInst::Create(C, RetBB);
+
+ // Now register the registration function with @llvm.global_ctors.
+ appendToGlobalCtors(M, RegisterCallDtors, Priority, Associated);
+ }
+ }
+
+ // Now that we've lowered everything, remove @llvm.global_dtors.
+ GV->eraseFromParent();
+
+ return true;
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/LowerIFunc.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/LowerIFunc.cpp
new file mode 100644
index 0000000000..18ae0bbe2e
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/LowerIFunc.cpp
@@ -0,0 +1,27 @@
+//===- LowerIFunc.cpp -----------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements replacing calls to ifuncs by introducing indirect calls.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/LowerIFunc.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+using namespace llvm;
+
+/// Replace all call users of ifuncs in the module.
+PreservedAnalyses LowerIFuncPass::run(Module &M, ModuleAnalysisManager &AM) {
+ if (M.ifunc_empty())
+ return PreservedAnalyses::all();
+
+ lowerGlobalIFuncUsersAsGlobalCtor(M, {});
+ return PreservedAnalyses::none();
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/LowerInvoke.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/LowerInvoke.cpp
new file mode 100644
index 0000000000..6d788857c1
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/LowerInvoke.cpp
@@ -0,0 +1,95 @@
+//===- LowerInvoke.cpp - Eliminate Invoke instructions --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This transformation is designed for use by code generators which do not yet
+// support stack unwinding. This pass converts 'invoke' instructions to 'call'
+// instructions, so that any exception-handling 'landingpad' blocks become dead
+// code (which can be removed by running the '-simplifycfg' pass afterwards).
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/LowerInvoke.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "lowerinvoke"
+
+STATISTIC(NumInvokes, "Number of invokes replaced");
+
+namespace {
+ class LowerInvokeLegacyPass : public FunctionPass {
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit LowerInvokeLegacyPass() : FunctionPass(ID) {
+ initializeLowerInvokeLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+ bool runOnFunction(Function &F) override;
+ };
+}
+
+char LowerInvokeLegacyPass::ID = 0;
+INITIALIZE_PASS(LowerInvokeLegacyPass, "lowerinvoke",
+ "Lower invoke and unwind, for unwindless code generators",
+ false, false)
+
+static bool runImpl(Function &F) {
+ bool Changed = false;
+ for (BasicBlock &BB : F)
+ if (InvokeInst *II = dyn_cast<InvokeInst>(BB.getTerminator())) {
+ SmallVector<Value *, 16> CallArgs(II->args());
+ SmallVector<OperandBundleDef, 1> OpBundles;
+ II->getOperandBundlesAsDefs(OpBundles);
+ // Insert a normal call instruction...
+ CallInst *NewCall =
+ CallInst::Create(II->getFunctionType(), II->getCalledOperand(),
+ CallArgs, OpBundles, "", II);
+ NewCall->takeName(II);
+ NewCall->setCallingConv(II->getCallingConv());
+ NewCall->setAttributes(II->getAttributes());
+ NewCall->setDebugLoc(II->getDebugLoc());
+ II->replaceAllUsesWith(NewCall);
+
+ // Insert an unconditional branch to the normal destination.
+ BranchInst::Create(II->getNormalDest(), II);
+
+ // Remove any PHI node entries from the exception destination.
+ II->getUnwindDest()->removePredecessor(&BB);
+
+ // Remove the invoke instruction now.
+ II->eraseFromParent();
+
+ ++NumInvokes;
+ Changed = true;
+ }
+ return Changed;
+}
+
+bool LowerInvokeLegacyPass::runOnFunction(Function &F) {
+ return runImpl(F);
+}
+
+namespace llvm {
+char &LowerInvokePassID = LowerInvokeLegacyPass::ID;
+
+// Public Interface To the LowerInvoke pass.
+FunctionPass *createLowerInvokePass() { return new LowerInvokeLegacyPass(); }
+
+PreservedAnalyses LowerInvokePass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ bool Changed = runImpl(F);
+ if (!Changed)
+ return PreservedAnalyses::all();
+
+ return PreservedAnalyses::none();
+}
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/LowerMemIntrinsics.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/LowerMemIntrinsics.cpp
new file mode 100644
index 0000000000..165740b552
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/LowerMemIntrinsics.cpp
@@ -0,0 +1,605 @@
+//===- LowerMemIntrinsics.cpp ----------------------------------*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/LowerMemIntrinsics.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include <optional>
+
+using namespace llvm;
+
+void llvm::createMemCpyLoopKnownSize(
+ Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr,
+ ConstantInt *CopyLen, Align SrcAlign, Align DstAlign, bool SrcIsVolatile,
+ bool DstIsVolatile, bool CanOverlap, const TargetTransformInfo &TTI,
+ std::optional<uint32_t> AtomicElementSize) {
+ // No need to expand zero length copies.
+ if (CopyLen->isZero())
+ return;
+
+ BasicBlock *PreLoopBB = InsertBefore->getParent();
+ BasicBlock *PostLoopBB = nullptr;
+ Function *ParentFunc = PreLoopBB->getParent();
+ LLVMContext &Ctx = PreLoopBB->getContext();
+ const DataLayout &DL = ParentFunc->getParent()->getDataLayout();
+ MDBuilder MDB(Ctx);
+ MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain("MemCopyDomain");
+ StringRef Name = "MemCopyAliasScope";
+ MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name);
+
+ unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace();
+ unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
+
+ Type *TypeOfCopyLen = CopyLen->getType();
+ Type *LoopOpType = TTI.getMemcpyLoopLoweringType(
+ Ctx, CopyLen, SrcAS, DstAS, SrcAlign.value(), DstAlign.value(),
+ AtomicElementSize);
+ assert((!AtomicElementSize || !LoopOpType->isVectorTy()) &&
+ "Atomic memcpy lowering is not supported for vector operand type");
+
+ unsigned LoopOpSize = DL.getTypeStoreSize(LoopOpType);
+ assert((!AtomicElementSize || LoopOpSize % *AtomicElementSize == 0) &&
+ "Atomic memcpy lowering is not supported for selected operand size");
+
+ uint64_t LoopEndCount = CopyLen->getZExtValue() / LoopOpSize;
+
+ if (LoopEndCount != 0) {
+ // Split
+ PostLoopBB = PreLoopBB->splitBasicBlock(InsertBefore, "memcpy-split");
+ BasicBlock *LoopBB =
+ BasicBlock::Create(Ctx, "load-store-loop", ParentFunc, PostLoopBB);
+ PreLoopBB->getTerminator()->setSuccessor(0, LoopBB);
+
+ IRBuilder<> PLBuilder(PreLoopBB->getTerminator());
+
+ // Cast the Src and Dst pointers to pointers to the loop operand type (if
+ // needed).
+ PointerType *SrcOpType = PointerType::get(LoopOpType, SrcAS);
+ PointerType *DstOpType = PointerType::get(LoopOpType, DstAS);
+ if (SrcAddr->getType() != SrcOpType) {
+ SrcAddr = PLBuilder.CreateBitCast(SrcAddr, SrcOpType);
+ }
+ if (DstAddr->getType() != DstOpType) {
+ DstAddr = PLBuilder.CreateBitCast(DstAddr, DstOpType);
+ }
+
+ Align PartDstAlign(commonAlignment(DstAlign, LoopOpSize));
+ Align PartSrcAlign(commonAlignment(SrcAlign, LoopOpSize));
+
+ IRBuilder<> LoopBuilder(LoopBB);
+ PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 2, "loop-index");
+ LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0U), PreLoopBB);
+ // Loop Body
+ Value *SrcGEP =
+ LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex);
+ LoadInst *Load = LoopBuilder.CreateAlignedLoad(LoopOpType, SrcGEP,
+ PartSrcAlign, SrcIsVolatile);
+ if (!CanOverlap) {
+ // Set alias scope for loads.
+ Load->setMetadata(LLVMContext::MD_alias_scope,
+ MDNode::get(Ctx, NewScope));
+ }
+ Value *DstGEP =
+ LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex);
+ StoreInst *Store = LoopBuilder.CreateAlignedStore(
+ Load, DstGEP, PartDstAlign, DstIsVolatile);
+ if (!CanOverlap) {
+ // Indicate that stores don't overlap loads.
+ Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope));
+ }
+ if (AtomicElementSize) {
+ Load->setAtomic(AtomicOrdering::Unordered);
+ Store->setAtomic(AtomicOrdering::Unordered);
+ }
+ Value *NewIndex =
+ LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1U));
+ LoopIndex->addIncoming(NewIndex, LoopBB);
+
+ // Create the loop branch condition.
+ Constant *LoopEndCI = ConstantInt::get(TypeOfCopyLen, LoopEndCount);
+ LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, LoopEndCI),
+ LoopBB, PostLoopBB);
+ }
+
+ uint64_t BytesCopied = LoopEndCount * LoopOpSize;
+ uint64_t RemainingBytes = CopyLen->getZExtValue() - BytesCopied;
+ if (RemainingBytes) {
+ IRBuilder<> RBuilder(PostLoopBB ? PostLoopBB->getFirstNonPHI()
+ : InsertBefore);
+
+ SmallVector<Type *, 5> RemainingOps;
+ TTI.getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes,
+ SrcAS, DstAS, SrcAlign.value(),
+ DstAlign.value(), AtomicElementSize);
+
+ for (auto *OpTy : RemainingOps) {
+ Align PartSrcAlign(commonAlignment(SrcAlign, BytesCopied));
+ Align PartDstAlign(commonAlignment(DstAlign, BytesCopied));
+
+ // Calculate the new index
+ unsigned OperandSize = DL.getTypeStoreSize(OpTy);
+ assert(
+ (!AtomicElementSize || OperandSize % *AtomicElementSize == 0) &&
+ "Atomic memcpy lowering is not supported for selected operand size");
+
+ uint64_t GepIndex = BytesCopied / OperandSize;
+ assert(GepIndex * OperandSize == BytesCopied &&
+ "Division should have no Remainder!");
+ // Cast source to operand type and load
+ PointerType *SrcPtrType = PointerType::get(OpTy, SrcAS);
+ Value *CastedSrc = SrcAddr->getType() == SrcPtrType
+ ? SrcAddr
+ : RBuilder.CreateBitCast(SrcAddr, SrcPtrType);
+ Value *SrcGEP = RBuilder.CreateInBoundsGEP(
+ OpTy, CastedSrc, ConstantInt::get(TypeOfCopyLen, GepIndex));
+ LoadInst *Load =
+ RBuilder.CreateAlignedLoad(OpTy, SrcGEP, PartSrcAlign, SrcIsVolatile);
+ if (!CanOverlap) {
+ // Set alias scope for loads.
+ Load->setMetadata(LLVMContext::MD_alias_scope,
+ MDNode::get(Ctx, NewScope));
+ }
+ // Cast destination to operand type and store.
+ PointerType *DstPtrType = PointerType::get(OpTy, DstAS);
+ Value *CastedDst = DstAddr->getType() == DstPtrType
+ ? DstAddr
+ : RBuilder.CreateBitCast(DstAddr, DstPtrType);
+ Value *DstGEP = RBuilder.CreateInBoundsGEP(
+ OpTy, CastedDst, ConstantInt::get(TypeOfCopyLen, GepIndex));
+ StoreInst *Store = RBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign,
+ DstIsVolatile);
+ if (!CanOverlap) {
+ // Indicate that stores don't overlap loads.
+ Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope));
+ }
+ if (AtomicElementSize) {
+ Load->setAtomic(AtomicOrdering::Unordered);
+ Store->setAtomic(AtomicOrdering::Unordered);
+ }
+ BytesCopied += OperandSize;
+ }
+ }
+ assert(BytesCopied == CopyLen->getZExtValue() &&
+ "Bytes copied should match size in the call!");
+}
+
+void llvm::createMemCpyLoopUnknownSize(
+ Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, Value *CopyLen,
+ Align SrcAlign, Align DstAlign, bool SrcIsVolatile, bool DstIsVolatile,
+ bool CanOverlap, const TargetTransformInfo &TTI,
+ std::optional<uint32_t> AtomicElementSize) {
+ BasicBlock *PreLoopBB = InsertBefore->getParent();
+ BasicBlock *PostLoopBB =
+ PreLoopBB->splitBasicBlock(InsertBefore, "post-loop-memcpy-expansion");
+
+ Function *ParentFunc = PreLoopBB->getParent();
+ const DataLayout &DL = ParentFunc->getParent()->getDataLayout();
+ LLVMContext &Ctx = PreLoopBB->getContext();
+ MDBuilder MDB(Ctx);
+ MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain("MemCopyDomain");
+ StringRef Name = "MemCopyAliasScope";
+ MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name);
+
+ unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace();
+ unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
+
+ Type *LoopOpType = TTI.getMemcpyLoopLoweringType(
+ Ctx, CopyLen, SrcAS, DstAS, SrcAlign.value(), DstAlign.value(),
+ AtomicElementSize);
+ assert((!AtomicElementSize || !LoopOpType->isVectorTy()) &&
+ "Atomic memcpy lowering is not supported for vector operand type");
+ unsigned LoopOpSize = DL.getTypeStoreSize(LoopOpType);
+ assert((!AtomicElementSize || LoopOpSize % *AtomicElementSize == 0) &&
+ "Atomic memcpy lowering is not supported for selected operand size");
+
+ IRBuilder<> PLBuilder(PreLoopBB->getTerminator());
+
+ PointerType *SrcOpType = PointerType::get(LoopOpType, SrcAS);
+ PointerType *DstOpType = PointerType::get(LoopOpType, DstAS);
+ if (SrcAddr->getType() != SrcOpType) {
+ SrcAddr = PLBuilder.CreateBitCast(SrcAddr, SrcOpType);
+ }
+ if (DstAddr->getType() != DstOpType) {
+ DstAddr = PLBuilder.CreateBitCast(DstAddr, DstOpType);
+ }
+
+ // Calculate the loop trip count, and remaining bytes to copy after the loop.
+ Type *CopyLenType = CopyLen->getType();
+ IntegerType *ILengthType = dyn_cast<IntegerType>(CopyLenType);
+ assert(ILengthType &&
+ "expected size argument to memcpy to be an integer type!");
+ Type *Int8Type = Type::getInt8Ty(Ctx);
+ bool LoopOpIsInt8 = LoopOpType == Int8Type;
+ ConstantInt *CILoopOpSize = ConstantInt::get(ILengthType, LoopOpSize);
+ Value *RuntimeLoopCount = LoopOpIsInt8 ?
+ CopyLen :
+ PLBuilder.CreateUDiv(CopyLen, CILoopOpSize);
+ BasicBlock *LoopBB =
+ BasicBlock::Create(Ctx, "loop-memcpy-expansion", ParentFunc, PostLoopBB);
+ IRBuilder<> LoopBuilder(LoopBB);
+
+ Align PartSrcAlign(commonAlignment(SrcAlign, LoopOpSize));
+ Align PartDstAlign(commonAlignment(DstAlign, LoopOpSize));
+
+ PHINode *LoopIndex = LoopBuilder.CreatePHI(CopyLenType, 2, "loop-index");
+ LoopIndex->addIncoming(ConstantInt::get(CopyLenType, 0U), PreLoopBB);
+
+ Value *SrcGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex);
+ LoadInst *Load = LoopBuilder.CreateAlignedLoad(LoopOpType, SrcGEP,
+ PartSrcAlign, SrcIsVolatile);
+ if (!CanOverlap) {
+ // Set alias scope for loads.
+ Load->setMetadata(LLVMContext::MD_alias_scope, MDNode::get(Ctx, NewScope));
+ }
+ Value *DstGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex);
+ StoreInst *Store =
+ LoopBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, DstIsVolatile);
+ if (!CanOverlap) {
+ // Indicate that stores don't overlap loads.
+ Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope));
+ }
+ if (AtomicElementSize) {
+ Load->setAtomic(AtomicOrdering::Unordered);
+ Store->setAtomic(AtomicOrdering::Unordered);
+ }
+ Value *NewIndex =
+ LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(CopyLenType, 1U));
+ LoopIndex->addIncoming(NewIndex, LoopBB);
+
+ bool requiresResidual =
+ !LoopOpIsInt8 && !(AtomicElementSize && LoopOpSize == AtomicElementSize);
+ if (requiresResidual) {
+ Type *ResLoopOpType = AtomicElementSize
+ ? Type::getIntNTy(Ctx, *AtomicElementSize * 8)
+ : Int8Type;
+ unsigned ResLoopOpSize = DL.getTypeStoreSize(ResLoopOpType);
+ assert((ResLoopOpSize == AtomicElementSize ? *AtomicElementSize : 1) &&
+ "Store size is expected to match type size");
+
+ // Add in the
+ Value *RuntimeResidual = PLBuilder.CreateURem(CopyLen, CILoopOpSize);
+ Value *RuntimeBytesCopied = PLBuilder.CreateSub(CopyLen, RuntimeResidual);
+
+ // Loop body for the residual copy.
+ BasicBlock *ResLoopBB = BasicBlock::Create(Ctx, "loop-memcpy-residual",
+ PreLoopBB->getParent(),
+ PostLoopBB);
+ // Residual loop header.
+ BasicBlock *ResHeaderBB = BasicBlock::Create(
+ Ctx, "loop-memcpy-residual-header", PreLoopBB->getParent(), nullptr);
+
+ // Need to update the pre-loop basic block to branch to the correct place.
+ // branch to the main loop if the count is non-zero, branch to the residual
+ // loop if the copy size is smaller then 1 iteration of the main loop but
+ // non-zero and finally branch to after the residual loop if the memcpy
+ // size is zero.
+ ConstantInt *Zero = ConstantInt::get(ILengthType, 0U);
+ PLBuilder.CreateCondBr(PLBuilder.CreateICmpNE(RuntimeLoopCount, Zero),
+ LoopBB, ResHeaderBB);
+ PreLoopBB->getTerminator()->eraseFromParent();
+
+ LoopBuilder.CreateCondBr(
+ LoopBuilder.CreateICmpULT(NewIndex, RuntimeLoopCount), LoopBB,
+ ResHeaderBB);
+
+ // Determine if we need to branch to the residual loop or bypass it.
+ IRBuilder<> RHBuilder(ResHeaderBB);
+ RHBuilder.CreateCondBr(RHBuilder.CreateICmpNE(RuntimeResidual, Zero),
+ ResLoopBB, PostLoopBB);
+
+ // Copy the residual with single byte load/store loop.
+ IRBuilder<> ResBuilder(ResLoopBB);
+ PHINode *ResidualIndex =
+ ResBuilder.CreatePHI(CopyLenType, 2, "residual-loop-index");
+ ResidualIndex->addIncoming(Zero, ResHeaderBB);
+
+ Value *SrcAsResLoopOpType = ResBuilder.CreateBitCast(
+ SrcAddr, PointerType::get(ResLoopOpType, SrcAS));
+ Value *DstAsResLoopOpType = ResBuilder.CreateBitCast(
+ DstAddr, PointerType::get(ResLoopOpType, DstAS));
+ Value *FullOffset = ResBuilder.CreateAdd(RuntimeBytesCopied, ResidualIndex);
+ Value *SrcGEP = ResBuilder.CreateInBoundsGEP(
+ ResLoopOpType, SrcAsResLoopOpType, FullOffset);
+ LoadInst *Load = ResBuilder.CreateAlignedLoad(ResLoopOpType, SrcGEP,
+ PartSrcAlign, SrcIsVolatile);
+ if (!CanOverlap) {
+ // Set alias scope for loads.
+ Load->setMetadata(LLVMContext::MD_alias_scope,
+ MDNode::get(Ctx, NewScope));
+ }
+ Value *DstGEP = ResBuilder.CreateInBoundsGEP(
+ ResLoopOpType, DstAsResLoopOpType, FullOffset);
+ StoreInst *Store = ResBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign,
+ DstIsVolatile);
+ if (!CanOverlap) {
+ // Indicate that stores don't overlap loads.
+ Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope));
+ }
+ if (AtomicElementSize) {
+ Load->setAtomic(AtomicOrdering::Unordered);
+ Store->setAtomic(AtomicOrdering::Unordered);
+ }
+ Value *ResNewIndex = ResBuilder.CreateAdd(
+ ResidualIndex, ConstantInt::get(CopyLenType, ResLoopOpSize));
+ ResidualIndex->addIncoming(ResNewIndex, ResLoopBB);
+
+ // Create the loop branch condition.
+ ResBuilder.CreateCondBr(
+ ResBuilder.CreateICmpULT(ResNewIndex, RuntimeResidual), ResLoopBB,
+ PostLoopBB);
+ } else {
+ // In this case the loop operand type was a byte, and there is no need for a
+ // residual loop to copy the remaining memory after the main loop.
+ // We do however need to patch up the control flow by creating the
+ // terminators for the preloop block and the memcpy loop.
+ ConstantInt *Zero = ConstantInt::get(ILengthType, 0U);
+ PLBuilder.CreateCondBr(PLBuilder.CreateICmpNE(RuntimeLoopCount, Zero),
+ LoopBB, PostLoopBB);
+ PreLoopBB->getTerminator()->eraseFromParent();
+ LoopBuilder.CreateCondBr(
+ LoopBuilder.CreateICmpULT(NewIndex, RuntimeLoopCount), LoopBB,
+ PostLoopBB);
+ }
+}
+
+// Lower memmove to IR. memmove is required to correctly copy overlapping memory
+// regions; therefore, it has to check the relative positions of the source and
+// destination pointers and choose the copy direction accordingly.
+//
+// The code below is an IR rendition of this C function:
+//
+// void* memmove(void* dst, const void* src, size_t n) {
+// unsigned char* d = dst;
+// const unsigned char* s = src;
+// if (s < d) {
+// // copy backwards
+// while (n--) {
+// d[n] = s[n];
+// }
+// } else {
+// // copy forward
+// for (size_t i = 0; i < n; ++i) {
+// d[i] = s[i];
+// }
+// }
+// return dst;
+// }
+static void createMemMoveLoop(Instruction *InsertBefore, Value *SrcAddr,
+ Value *DstAddr, Value *CopyLen, Align SrcAlign,
+ Align DstAlign, bool SrcIsVolatile,
+ bool DstIsVolatile) {
+ Type *TypeOfCopyLen = CopyLen->getType();
+ BasicBlock *OrigBB = InsertBefore->getParent();
+ Function *F = OrigBB->getParent();
+ const DataLayout &DL = F->getParent()->getDataLayout();
+
+ // TODO: Use different element type if possible?
+ IRBuilder<> CastBuilder(InsertBefore);
+ Type *EltTy = CastBuilder.getInt8Ty();
+ Type *PtrTy =
+ CastBuilder.getInt8PtrTy(SrcAddr->getType()->getPointerAddressSpace());
+ SrcAddr = CastBuilder.CreateBitCast(SrcAddr, PtrTy);
+ DstAddr = CastBuilder.CreateBitCast(DstAddr, PtrTy);
+
+ // Create the a comparison of src and dst, based on which we jump to either
+ // the forward-copy part of the function (if src >= dst) or the backwards-copy
+ // part (if src < dst).
+ // SplitBlockAndInsertIfThenElse conveniently creates the basic if-then-else
+ // structure. Its block terminators (unconditional branches) are replaced by
+ // the appropriate conditional branches when the loop is built.
+ ICmpInst *PtrCompare = new ICmpInst(InsertBefore, ICmpInst::ICMP_ULT,
+ SrcAddr, DstAddr, "compare_src_dst");
+ Instruction *ThenTerm, *ElseTerm;
+ SplitBlockAndInsertIfThenElse(PtrCompare, InsertBefore, &ThenTerm,
+ &ElseTerm);
+
+ // Each part of the function consists of two blocks:
+ // copy_backwards: used to skip the loop when n == 0
+ // copy_backwards_loop: the actual backwards loop BB
+ // copy_forward: used to skip the loop when n == 0
+ // copy_forward_loop: the actual forward loop BB
+ BasicBlock *CopyBackwardsBB = ThenTerm->getParent();
+ CopyBackwardsBB->setName("copy_backwards");
+ BasicBlock *CopyForwardBB = ElseTerm->getParent();
+ CopyForwardBB->setName("copy_forward");
+ BasicBlock *ExitBB = InsertBefore->getParent();
+ ExitBB->setName("memmove_done");
+
+ unsigned PartSize = DL.getTypeStoreSize(EltTy);
+ Align PartSrcAlign(commonAlignment(SrcAlign, PartSize));
+ Align PartDstAlign(commonAlignment(DstAlign, PartSize));
+
+ // Initial comparison of n == 0 that lets us skip the loops altogether. Shared
+ // between both backwards and forward copy clauses.
+ ICmpInst *CompareN =
+ new ICmpInst(OrigBB->getTerminator(), ICmpInst::ICMP_EQ, CopyLen,
+ ConstantInt::get(TypeOfCopyLen, 0), "compare_n_to_0");
+
+ // Copying backwards.
+ BasicBlock *LoopBB =
+ BasicBlock::Create(F->getContext(), "copy_backwards_loop", F, CopyForwardBB);
+ IRBuilder<> LoopBuilder(LoopBB);
+ PHINode *LoopPhi = LoopBuilder.CreatePHI(TypeOfCopyLen, 0);
+ Value *IndexPtr = LoopBuilder.CreateSub(
+ LoopPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_ptr");
+ Value *Element = LoopBuilder.CreateAlignedLoad(
+ EltTy, LoopBuilder.CreateInBoundsGEP(EltTy, SrcAddr, IndexPtr),
+ PartSrcAlign, "element");
+ LoopBuilder.CreateAlignedStore(
+ Element, LoopBuilder.CreateInBoundsGEP(EltTy, DstAddr, IndexPtr),
+ PartDstAlign);
+ LoopBuilder.CreateCondBr(
+ LoopBuilder.CreateICmpEQ(IndexPtr, ConstantInt::get(TypeOfCopyLen, 0)),
+ ExitBB, LoopBB);
+ LoopPhi->addIncoming(IndexPtr, LoopBB);
+ LoopPhi->addIncoming(CopyLen, CopyBackwardsBB);
+ BranchInst::Create(ExitBB, LoopBB, CompareN, ThenTerm);
+ ThenTerm->eraseFromParent();
+
+ // Copying forward.
+ BasicBlock *FwdLoopBB =
+ BasicBlock::Create(F->getContext(), "copy_forward_loop", F, ExitBB);
+ IRBuilder<> FwdLoopBuilder(FwdLoopBB);
+ PHINode *FwdCopyPhi = FwdLoopBuilder.CreatePHI(TypeOfCopyLen, 0, "index_ptr");
+ Value *SrcGEP = FwdLoopBuilder.CreateInBoundsGEP(EltTy, SrcAddr, FwdCopyPhi);
+ Value *FwdElement =
+ FwdLoopBuilder.CreateAlignedLoad(EltTy, SrcGEP, PartSrcAlign, "element");
+ Value *DstGEP = FwdLoopBuilder.CreateInBoundsGEP(EltTy, DstAddr, FwdCopyPhi);
+ FwdLoopBuilder.CreateAlignedStore(FwdElement, DstGEP, PartDstAlign);
+ Value *FwdIndexPtr = FwdLoopBuilder.CreateAdd(
+ FwdCopyPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_increment");
+ FwdLoopBuilder.CreateCondBr(FwdLoopBuilder.CreateICmpEQ(FwdIndexPtr, CopyLen),
+ ExitBB, FwdLoopBB);
+ FwdCopyPhi->addIncoming(FwdIndexPtr, FwdLoopBB);
+ FwdCopyPhi->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), CopyForwardBB);
+
+ BranchInst::Create(ExitBB, FwdLoopBB, CompareN, ElseTerm);
+ ElseTerm->eraseFromParent();
+}
+
+static void createMemSetLoop(Instruction *InsertBefore, Value *DstAddr,
+ Value *CopyLen, Value *SetValue, Align DstAlign,
+ bool IsVolatile) {
+ Type *TypeOfCopyLen = CopyLen->getType();
+ BasicBlock *OrigBB = InsertBefore->getParent();
+ Function *F = OrigBB->getParent();
+ const DataLayout &DL = F->getParent()->getDataLayout();
+ BasicBlock *NewBB =
+ OrigBB->splitBasicBlock(InsertBefore, "split");
+ BasicBlock *LoopBB
+ = BasicBlock::Create(F->getContext(), "loadstoreloop", F, NewBB);
+
+ IRBuilder<> Builder(OrigBB->getTerminator());
+
+ // Cast pointer to the type of value getting stored
+ unsigned dstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
+ DstAddr = Builder.CreateBitCast(DstAddr,
+ PointerType::get(SetValue->getType(), dstAS));
+
+ Builder.CreateCondBr(
+ Builder.CreateICmpEQ(ConstantInt::get(TypeOfCopyLen, 0), CopyLen), NewBB,
+ LoopBB);
+ OrigBB->getTerminator()->eraseFromParent();
+
+ unsigned PartSize = DL.getTypeStoreSize(SetValue->getType());
+ Align PartAlign(commonAlignment(DstAlign, PartSize));
+
+ IRBuilder<> LoopBuilder(LoopBB);
+ PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 0);
+ LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), OrigBB);
+
+ LoopBuilder.CreateAlignedStore(
+ SetValue,
+ LoopBuilder.CreateInBoundsGEP(SetValue->getType(), DstAddr, LoopIndex),
+ PartAlign, IsVolatile);
+
+ Value *NewIndex =
+ LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1));
+ LoopIndex->addIncoming(NewIndex, LoopBB);
+
+ LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB,
+ NewBB);
+}
+
+template <typename T>
+static bool canOverlap(MemTransferBase<T> *Memcpy, ScalarEvolution *SE) {
+ if (SE) {
+ auto *SrcSCEV = SE->getSCEV(Memcpy->getRawSource());
+ auto *DestSCEV = SE->getSCEV(Memcpy->getRawDest());
+ if (SE->isKnownPredicateAt(CmpInst::ICMP_NE, SrcSCEV, DestSCEV, Memcpy))
+ return false;
+ }
+ return true;
+}
+
+void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy,
+ const TargetTransformInfo &TTI,
+ ScalarEvolution *SE) {
+ bool CanOverlap = canOverlap(Memcpy, SE);
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Memcpy->getLength())) {
+ createMemCpyLoopKnownSize(
+ /* InsertBefore */ Memcpy,
+ /* SrcAddr */ Memcpy->getRawSource(),
+ /* DstAddr */ Memcpy->getRawDest(),
+ /* CopyLen */ CI,
+ /* SrcAlign */ Memcpy->getSourceAlign().valueOrOne(),
+ /* DestAlign */ Memcpy->getDestAlign().valueOrOne(),
+ /* SrcIsVolatile */ Memcpy->isVolatile(),
+ /* DstIsVolatile */ Memcpy->isVolatile(),
+ /* CanOverlap */ CanOverlap,
+ /* TargetTransformInfo */ TTI);
+ } else {
+ createMemCpyLoopUnknownSize(
+ /* InsertBefore */ Memcpy,
+ /* SrcAddr */ Memcpy->getRawSource(),
+ /* DstAddr */ Memcpy->getRawDest(),
+ /* CopyLen */ Memcpy->getLength(),
+ /* SrcAlign */ Memcpy->getSourceAlign().valueOrOne(),
+ /* DestAlign */ Memcpy->getDestAlign().valueOrOne(),
+ /* SrcIsVolatile */ Memcpy->isVolatile(),
+ /* DstIsVolatile */ Memcpy->isVolatile(),
+ /* CanOverlap */ CanOverlap,
+ /* TargetTransformInfo */ TTI);
+ }
+}
+
+void llvm::expandMemMoveAsLoop(MemMoveInst *Memmove) {
+ createMemMoveLoop(/* InsertBefore */ Memmove,
+ /* SrcAddr */ Memmove->getRawSource(),
+ /* DstAddr */ Memmove->getRawDest(),
+ /* CopyLen */ Memmove->getLength(),
+ /* SrcAlign */ Memmove->getSourceAlign().valueOrOne(),
+ /* DestAlign */ Memmove->getDestAlign().valueOrOne(),
+ /* SrcIsVolatile */ Memmove->isVolatile(),
+ /* DstIsVolatile */ Memmove->isVolatile());
+}
+
+void llvm::expandMemSetAsLoop(MemSetInst *Memset) {
+ createMemSetLoop(/* InsertBefore */ Memset,
+ /* DstAddr */ Memset->getRawDest(),
+ /* CopyLen */ Memset->getLength(),
+ /* SetValue */ Memset->getValue(),
+ /* Alignment */ Memset->getDestAlign().valueOrOne(),
+ Memset->isVolatile());
+}
+
+void llvm::expandAtomicMemCpyAsLoop(AtomicMemCpyInst *AtomicMemcpy,
+ const TargetTransformInfo &TTI,
+ ScalarEvolution *SE) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(AtomicMemcpy->getLength())) {
+ createMemCpyLoopKnownSize(
+ /* InsertBefore */ AtomicMemcpy,
+ /* SrcAddr */ AtomicMemcpy->getRawSource(),
+ /* DstAddr */ AtomicMemcpy->getRawDest(),
+ /* CopyLen */ CI,
+ /* SrcAlign */ AtomicMemcpy->getSourceAlign().valueOrOne(),
+ /* DestAlign */ AtomicMemcpy->getDestAlign().valueOrOne(),
+ /* SrcIsVolatile */ AtomicMemcpy->isVolatile(),
+ /* DstIsVolatile */ AtomicMemcpy->isVolatile(),
+ /* CanOverlap */ false, // SrcAddr & DstAddr may not overlap by spec.
+ /* TargetTransformInfo */ TTI,
+ /* AtomicCpySize */ AtomicMemcpy->getElementSizeInBytes());
+ } else {
+ createMemCpyLoopUnknownSize(
+ /* InsertBefore */ AtomicMemcpy,
+ /* SrcAddr */ AtomicMemcpy->getRawSource(),
+ /* DstAddr */ AtomicMemcpy->getRawDest(),
+ /* CopyLen */ AtomicMemcpy->getLength(),
+ /* SrcAlign */ AtomicMemcpy->getSourceAlign().valueOrOne(),
+ /* DestAlign */ AtomicMemcpy->getDestAlign().valueOrOne(),
+ /* SrcIsVolatile */ AtomicMemcpy->isVolatile(),
+ /* DstIsVolatile */ AtomicMemcpy->isVolatile(),
+ /* CanOverlap */ false, // SrcAddr & DstAddr may not overlap by spec.
+ /* TargetTransformInfo */ TTI,
+ /* AtomicCpySize */ AtomicMemcpy->getElementSizeInBytes());
+ }
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/LowerSwitch.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/LowerSwitch.cpp
new file mode 100644
index 0000000000..227de425ff
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/LowerSwitch.cpp
@@ -0,0 +1,611 @@
+//===- LowerSwitch.cpp - Eliminate Switch instructions --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// The LowerSwitch transformation rewrites switch instructions with a sequence
+// of branches, which allows targets to get away with not implementing the
+// switch instruction until it is convenient.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/LowerSwitch.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/LazyValueInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/ConstantRange.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/KnownBits.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <limits>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "lower-switch"
+
+namespace {
+
+struct IntRange {
+ APInt Low, High;
+};
+
+} // end anonymous namespace
+
+namespace {
+// Return true iff R is covered by Ranges.
+bool IsInRanges(const IntRange &R, const std::vector<IntRange> &Ranges) {
+ // Note: Ranges must be sorted, non-overlapping and non-adjacent.
+
+ // Find the first range whose High field is >= R.High,
+ // then check if the Low field is <= R.Low. If so, we
+ // have a Range that covers R.
+ auto I = llvm::lower_bound(
+ Ranges, R, [](IntRange A, IntRange B) { return A.High.slt(B.High); });
+ return I != Ranges.end() && I->Low.sle(R.Low);
+}
+
+struct CaseRange {
+ ConstantInt *Low;
+ ConstantInt *High;
+ BasicBlock *BB;
+
+ CaseRange(ConstantInt *low, ConstantInt *high, BasicBlock *bb)
+ : Low(low), High(high), BB(bb) {}
+};
+
+using CaseVector = std::vector<CaseRange>;
+using CaseItr = std::vector<CaseRange>::iterator;
+
+/// The comparison function for sorting the switch case values in the vector.
+/// WARNING: Case ranges should be disjoint!
+struct CaseCmp {
+ bool operator()(const CaseRange &C1, const CaseRange &C2) {
+ const ConstantInt *CI1 = cast<const ConstantInt>(C1.Low);
+ const ConstantInt *CI2 = cast<const ConstantInt>(C2.High);
+ return CI1->getValue().slt(CI2->getValue());
+ }
+};
+
+/// Used for debugging purposes.
+LLVM_ATTRIBUTE_USED
+raw_ostream &operator<<(raw_ostream &O, const CaseVector &C) {
+ O << "[";
+
+ for (CaseVector::const_iterator B = C.begin(), E = C.end(); B != E;) {
+ O << "[" << B->Low->getValue() << ", " << B->High->getValue() << "]";
+ if (++B != E)
+ O << ", ";
+ }
+
+ return O << "]";
+}
+
+/// Update the first occurrence of the "switch statement" BB in the PHI
+/// node with the "new" BB. The other occurrences will:
+///
+/// 1) Be updated by subsequent calls to this function. Switch statements may
+/// have more than one outcoming edge into the same BB if they all have the same
+/// value. When the switch statement is converted these incoming edges are now
+/// coming from multiple BBs.
+/// 2) Removed if subsequent incoming values now share the same case, i.e.,
+/// multiple outcome edges are condensed into one. This is necessary to keep the
+/// number of phi values equal to the number of branches to SuccBB.
+void FixPhis(BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB,
+ const APInt &NumMergedCases) {
+ for (auto &I : SuccBB->phis()) {
+ PHINode *PN = cast<PHINode>(&I);
+
+ // Only update the first occurrence if NewBB exists.
+ unsigned Idx = 0, E = PN->getNumIncomingValues();
+ APInt LocalNumMergedCases = NumMergedCases;
+ for (; Idx != E && NewBB; ++Idx) {
+ if (PN->getIncomingBlock(Idx) == OrigBB) {
+ PN->setIncomingBlock(Idx, NewBB);
+ break;
+ }
+ }
+
+ // Skip the updated incoming block so that it will not be removed.
+ if (NewBB)
+ ++Idx;
+
+ // Remove additional occurrences coming from condensed cases and keep the
+ // number of incoming values equal to the number of branches to SuccBB.
+ SmallVector<unsigned, 8> Indices;
+ for (; LocalNumMergedCases.ugt(0) && Idx < E; ++Idx)
+ if (PN->getIncomingBlock(Idx) == OrigBB) {
+ Indices.push_back(Idx);
+ LocalNumMergedCases -= 1;
+ }
+ // Remove incoming values in the reverse order to prevent invalidating
+ // *successive* index.
+ for (unsigned III : llvm::reverse(Indices))
+ PN->removeIncomingValue(III);
+ }
+}
+
+/// Create a new leaf block for the binary lookup tree. It checks if the
+/// switch's value == the case's value. If not, then it jumps to the default
+/// branch. At this point in the tree, the value can't be another valid case
+/// value, so the jump to the "default" branch is warranted.
+BasicBlock *NewLeafBlock(CaseRange &Leaf, Value *Val, ConstantInt *LowerBound,
+ ConstantInt *UpperBound, BasicBlock *OrigBlock,
+ BasicBlock *Default) {
+ Function *F = OrigBlock->getParent();
+ BasicBlock *NewLeaf = BasicBlock::Create(Val->getContext(), "LeafBlock");
+ F->insert(++OrigBlock->getIterator(), NewLeaf);
+
+ // Emit comparison
+ ICmpInst *Comp = nullptr;
+ if (Leaf.Low == Leaf.High) {
+ // Make the seteq instruction...
+ Comp =
+ new ICmpInst(*NewLeaf, ICmpInst::ICMP_EQ, Val, Leaf.Low, "SwitchLeaf");
+ } else {
+ // Make range comparison
+ if (Leaf.Low == LowerBound) {
+ // Val >= Min && Val <= Hi --> Val <= Hi
+ Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_SLE, Val, Leaf.High,
+ "SwitchLeaf");
+ } else if (Leaf.High == UpperBound) {
+ // Val <= Max && Val >= Lo --> Val >= Lo
+ Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_SGE, Val, Leaf.Low,
+ "SwitchLeaf");
+ } else if (Leaf.Low->isZero()) {
+ // Val >= 0 && Val <= Hi --> Val <=u Hi
+ Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Val, Leaf.High,
+ "SwitchLeaf");
+ } else {
+ // Emit V-Lo <=u Hi-Lo
+ Constant *NegLo = ConstantExpr::getNeg(Leaf.Low);
+ Instruction *Add = BinaryOperator::CreateAdd(
+ Val, NegLo, Val->getName() + ".off", NewLeaf);
+ Constant *UpperBound = ConstantExpr::getAdd(NegLo, Leaf.High);
+ Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Add, UpperBound,
+ "SwitchLeaf");
+ }
+ }
+
+ // Make the conditional branch...
+ BasicBlock *Succ = Leaf.BB;
+ BranchInst::Create(Succ, Default, Comp, NewLeaf);
+
+ // Update the PHI incoming value/block for the default.
+ for (auto &I : Default->phis()) {
+ PHINode *PN = cast<PHINode>(&I);
+ auto *V = PN->getIncomingValueForBlock(OrigBlock);
+ PN->addIncoming(V, NewLeaf);
+ }
+
+ // If there were any PHI nodes in this successor, rewrite one entry
+ // from OrigBlock to come from NewLeaf.
+ for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+ // Remove all but one incoming entries from the cluster
+ APInt Range = Leaf.High->getValue() - Leaf.Low->getValue();
+ for (APInt j(Range.getBitWidth(), 0, true); j.slt(Range); ++j) {
+ PN->removeIncomingValue(OrigBlock);
+ }
+
+ int BlockIdx = PN->getBasicBlockIndex(OrigBlock);
+ assert(BlockIdx != -1 && "Switch didn't go to this successor??");
+ PN->setIncomingBlock((unsigned)BlockIdx, NewLeaf);
+ }
+
+ return NewLeaf;
+}
+
+/// Convert the switch statement into a binary lookup of the case values.
+/// The function recursively builds this tree. LowerBound and UpperBound are
+/// used to keep track of the bounds for Val that have already been checked by
+/// a block emitted by one of the previous calls to switchConvert in the call
+/// stack.
+BasicBlock *SwitchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound,
+ ConstantInt *UpperBound, Value *Val,
+ BasicBlock *Predecessor, BasicBlock *OrigBlock,
+ BasicBlock *Default,
+ const std::vector<IntRange> &UnreachableRanges) {
+ assert(LowerBound && UpperBound && "Bounds must be initialized");
+ unsigned Size = End - Begin;
+
+ if (Size == 1) {
+ // Check if the Case Range is perfectly squeezed in between
+ // already checked Upper and Lower bounds. If it is then we can avoid
+ // emitting the code that checks if the value actually falls in the range
+ // because the bounds already tell us so.
+ if (Begin->Low == LowerBound && Begin->High == UpperBound) {
+ APInt NumMergedCases = UpperBound->getValue() - LowerBound->getValue();
+ FixPhis(Begin->BB, OrigBlock, Predecessor, NumMergedCases);
+ return Begin->BB;
+ }
+ return NewLeafBlock(*Begin, Val, LowerBound, UpperBound, OrigBlock,
+ Default);
+ }
+
+ unsigned Mid = Size / 2;
+ std::vector<CaseRange> LHS(Begin, Begin + Mid);
+ LLVM_DEBUG(dbgs() << "LHS: " << LHS << "\n");
+ std::vector<CaseRange> RHS(Begin + Mid, End);
+ LLVM_DEBUG(dbgs() << "RHS: " << RHS << "\n");
+
+ CaseRange &Pivot = *(Begin + Mid);
+ LLVM_DEBUG(dbgs() << "Pivot ==> [" << Pivot.Low->getValue() << ", "
+ << Pivot.High->getValue() << "]\n");
+
+ // NewLowerBound here should never be the integer minimal value.
+ // This is because it is computed from a case range that is never
+ // the smallest, so there is always a case range that has at least
+ // a smaller value.
+ ConstantInt *NewLowerBound = Pivot.Low;
+
+ // Because NewLowerBound is never the smallest representable integer
+ // it is safe here to subtract one.
+ ConstantInt *NewUpperBound = ConstantInt::get(NewLowerBound->getContext(),
+ NewLowerBound->getValue() - 1);
+
+ if (!UnreachableRanges.empty()) {
+ // Check if the gap between LHS's highest and NewLowerBound is unreachable.
+ APInt GapLow = LHS.back().High->getValue() + 1;
+ APInt GapHigh = NewLowerBound->getValue() - 1;
+ IntRange Gap = {GapLow, GapHigh};
+ if (GapHigh.sge(GapLow) && IsInRanges(Gap, UnreachableRanges))
+ NewUpperBound = LHS.back().High;
+ }
+
+ LLVM_DEBUG(dbgs() << "LHS Bounds ==> [" << LowerBound->getValue() << ", "
+ << NewUpperBound->getValue() << "]\n"
+ << "RHS Bounds ==> [" << NewLowerBound->getValue() << ", "
+ << UpperBound->getValue() << "]\n");
+
+ // Create a new node that checks if the value is < pivot. Go to the
+ // left branch if it is and right branch if not.
+ Function *F = OrigBlock->getParent();
+ BasicBlock *NewNode = BasicBlock::Create(Val->getContext(), "NodeBlock");
+
+ ICmpInst *Comp = new ICmpInst(ICmpInst::ICMP_SLT, Val, Pivot.Low, "Pivot");
+
+ BasicBlock *LBranch =
+ SwitchConvert(LHS.begin(), LHS.end(), LowerBound, NewUpperBound, Val,
+ NewNode, OrigBlock, Default, UnreachableRanges);
+ BasicBlock *RBranch =
+ SwitchConvert(RHS.begin(), RHS.end(), NewLowerBound, UpperBound, Val,
+ NewNode, OrigBlock, Default, UnreachableRanges);
+
+ F->insert(++OrigBlock->getIterator(), NewNode);
+ Comp->insertInto(NewNode, NewNode->end());
+
+ BranchInst::Create(LBranch, RBranch, Comp, NewNode);
+ return NewNode;
+}
+
+/// Transform simple list of \p SI's cases into list of CaseRange's \p Cases.
+/// \post \p Cases wouldn't contain references to \p SI's default BB.
+/// \returns Number of \p SI's cases that do not reference \p SI's default BB.
+unsigned Clusterify(CaseVector &Cases, SwitchInst *SI) {
+ unsigned NumSimpleCases = 0;
+
+ // Start with "simple" cases
+ for (auto Case : SI->cases()) {
+ if (Case.getCaseSuccessor() == SI->getDefaultDest())
+ continue;
+ Cases.push_back(CaseRange(Case.getCaseValue(), Case.getCaseValue(),
+ Case.getCaseSuccessor()));
+ ++NumSimpleCases;
+ }
+
+ llvm::sort(Cases, CaseCmp());
+
+ // Merge case into clusters
+ if (Cases.size() >= 2) {
+ CaseItr I = Cases.begin();
+ for (CaseItr J = std::next(I), E = Cases.end(); J != E; ++J) {
+ const APInt &nextValue = J->Low->getValue();
+ const APInt &currentValue = I->High->getValue();
+ BasicBlock *nextBB = J->BB;
+ BasicBlock *currentBB = I->BB;
+
+ // If the two neighboring cases go to the same destination, merge them
+ // into a single case.
+ assert(nextValue.sgt(currentValue) &&
+ "Cases should be strictly ascending");
+ if ((nextValue == currentValue + 1) && (currentBB == nextBB)) {
+ I->High = J->High;
+ // FIXME: Combine branch weights.
+ } else if (++I != J) {
+ *I = *J;
+ }
+ }
+ Cases.erase(std::next(I), Cases.end());
+ }
+
+ return NumSimpleCases;
+}
+
+/// Replace the specified switch instruction with a sequence of chained if-then
+/// insts in a balanced binary search.
+void ProcessSwitchInst(SwitchInst *SI,
+ SmallPtrSetImpl<BasicBlock *> &DeleteList,
+ AssumptionCache *AC, LazyValueInfo *LVI) {
+ BasicBlock *OrigBlock = SI->getParent();
+ Function *F = OrigBlock->getParent();
+ Value *Val = SI->getCondition(); // The value we are switching on...
+ BasicBlock *Default = SI->getDefaultDest();
+
+ // Don't handle unreachable blocks. If there are successors with phis, this
+ // would leave them behind with missing predecessors.
+ if ((OrigBlock != &F->getEntryBlock() && pred_empty(OrigBlock)) ||
+ OrigBlock->getSinglePredecessor() == OrigBlock) {
+ DeleteList.insert(OrigBlock);
+ return;
+ }
+
+ // Prepare cases vector.
+ CaseVector Cases;
+ const unsigned NumSimpleCases = Clusterify(Cases, SI);
+ IntegerType *IT = cast<IntegerType>(SI->getCondition()->getType());
+ const unsigned BitWidth = IT->getBitWidth();
+ // Explictly use higher precision to prevent unsigned overflow where
+ // `UnsignedMax - 0 + 1 == 0`
+ APInt UnsignedZero(BitWidth + 1, 0);
+ APInt UnsignedMax = APInt::getMaxValue(BitWidth);
+ LLVM_DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size()
+ << ". Total non-default cases: " << NumSimpleCases
+ << "\nCase clusters: " << Cases << "\n");
+
+ // If there is only the default destination, just branch.
+ if (Cases.empty()) {
+ BranchInst::Create(Default, OrigBlock);
+ // Remove all the references from Default's PHIs to OrigBlock, but one.
+ FixPhis(Default, OrigBlock, OrigBlock, UnsignedMax);
+ SI->eraseFromParent();
+ return;
+ }
+
+ ConstantInt *LowerBound = nullptr;
+ ConstantInt *UpperBound = nullptr;
+ bool DefaultIsUnreachableFromSwitch = false;
+
+ if (isa<UnreachableInst>(Default->getFirstNonPHIOrDbg())) {
+ // Make the bounds tightly fitted around the case value range, because we
+ // know that the value passed to the switch must be exactly one of the case
+ // values.
+ LowerBound = Cases.front().Low;
+ UpperBound = Cases.back().High;
+ DefaultIsUnreachableFromSwitch = true;
+ } else {
+ // Constraining the range of the value being switched over helps eliminating
+ // unreachable BBs and minimizing the number of `add` instructions
+ // newLeafBlock ends up emitting. Running CorrelatedValuePropagation after
+ // LowerSwitch isn't as good, and also much more expensive in terms of
+ // compile time for the following reasons:
+ // 1. it processes many kinds of instructions, not just switches;
+ // 2. even if limited to icmp instructions only, it will have to process
+ // roughly C icmp's per switch, where C is the number of cases in the
+ // switch, while LowerSwitch only needs to call LVI once per switch.
+ const DataLayout &DL = F->getParent()->getDataLayout();
+ KnownBits Known = computeKnownBits(Val, DL, /*Depth=*/0, AC, SI);
+ // TODO Shouldn't this create a signed range?
+ ConstantRange KnownBitsRange =
+ ConstantRange::fromKnownBits(Known, /*IsSigned=*/false);
+ const ConstantRange LVIRange = LVI->getConstantRange(Val, SI);
+ ConstantRange ValRange = KnownBitsRange.intersectWith(LVIRange);
+ // We delegate removal of unreachable non-default cases to other passes. In
+ // the unlikely event that some of them survived, we just conservatively
+ // maintain the invariant that all the cases lie between the bounds. This
+ // may, however, still render the default case effectively unreachable.
+ const APInt &Low = Cases.front().Low->getValue();
+ const APInt &High = Cases.back().High->getValue();
+ APInt Min = APIntOps::smin(ValRange.getSignedMin(), Low);
+ APInt Max = APIntOps::smax(ValRange.getSignedMax(), High);
+
+ LowerBound = ConstantInt::get(SI->getContext(), Min);
+ UpperBound = ConstantInt::get(SI->getContext(), Max);
+ DefaultIsUnreachableFromSwitch = (Min + (NumSimpleCases - 1) == Max);
+ }
+
+ std::vector<IntRange> UnreachableRanges;
+
+ if (DefaultIsUnreachableFromSwitch) {
+ DenseMap<BasicBlock *, APInt> Popularity;
+ APInt MaxPop(UnsignedZero);
+ BasicBlock *PopSucc = nullptr;
+
+ APInt SignedMax = APInt::getSignedMaxValue(BitWidth);
+ APInt SignedMin = APInt::getSignedMinValue(BitWidth);
+ IntRange R = {SignedMin, SignedMax};
+ UnreachableRanges.push_back(R);
+ for (const auto &I : Cases) {
+ const APInt &Low = I.Low->getValue();
+ const APInt &High = I.High->getValue();
+
+ IntRange &LastRange = UnreachableRanges.back();
+ if (LastRange.Low.eq(Low)) {
+ // There is nothing left of the previous range.
+ UnreachableRanges.pop_back();
+ } else {
+ // Terminate the previous range.
+ assert(Low.sgt(LastRange.Low));
+ LastRange.High = Low - 1;
+ }
+ if (High.ne(SignedMax)) {
+ IntRange R = {High + 1, SignedMax};
+ UnreachableRanges.push_back(R);
+ }
+
+ // Count popularity.
+ assert(High.sge(Low) && "Popularity shouldn't be negative.");
+ APInt N = High.sext(BitWidth + 1) - Low.sext(BitWidth + 1) + 1;
+ // Explict insert to make sure the bitwidth of APInts match
+ APInt &Pop = Popularity.insert({I.BB, APInt(UnsignedZero)}).first->second;
+ if ((Pop += N).ugt(MaxPop)) {
+ MaxPop = Pop;
+ PopSucc = I.BB;
+ }
+ }
+#ifndef NDEBUG
+ /* UnreachableRanges should be sorted and the ranges non-adjacent. */
+ for (auto I = UnreachableRanges.begin(), E = UnreachableRanges.end();
+ I != E; ++I) {
+ assert(I->Low.sle(I->High));
+ auto Next = I + 1;
+ if (Next != E) {
+ assert(Next->Low.sgt(I->High));
+ }
+ }
+#endif
+
+ // As the default block in the switch is unreachable, update the PHI nodes
+ // (remove all of the references to the default block) to reflect this.
+ const unsigned NumDefaultEdges = SI->getNumCases() + 1 - NumSimpleCases;
+ for (unsigned I = 0; I < NumDefaultEdges; ++I)
+ Default->removePredecessor(OrigBlock);
+
+ // Use the most popular block as the new default, reducing the number of
+ // cases.
+ Default = PopSucc;
+ llvm::erase_if(Cases,
+ [PopSucc](const CaseRange &R) { return R.BB == PopSucc; });
+
+ // If there are no cases left, just branch.
+ if (Cases.empty()) {
+ BranchInst::Create(Default, OrigBlock);
+ SI->eraseFromParent();
+ // As all the cases have been replaced with a single branch, only keep
+ // one entry in the PHI nodes.
+ if (!MaxPop.isZero())
+ for (APInt I(UnsignedZero); I.ult(MaxPop - 1); ++I)
+ PopSucc->removePredecessor(OrigBlock);
+ return;
+ }
+
+ // If the condition was a PHI node with the switch block as a predecessor
+ // removing predecessors may have caused the condition to be erased.
+ // Getting the condition value again here protects against that.
+ Val = SI->getCondition();
+ }
+
+ BasicBlock *SwitchBlock =
+ SwitchConvert(Cases.begin(), Cases.end(), LowerBound, UpperBound, Val,
+ OrigBlock, OrigBlock, Default, UnreachableRanges);
+
+ // We have added incoming values for newly-created predecessors in
+ // NewLeafBlock(). The only meaningful work we offload to FixPhis() is to
+ // remove the incoming values from OrigBlock. There might be a special case
+ // that SwitchBlock is the same as Default, under which the PHIs in Default
+ // are fixed inside SwitchConvert().
+ if (SwitchBlock != Default)
+ FixPhis(Default, OrigBlock, nullptr, UnsignedMax);
+
+ // Branch to our shiny new if-then stuff...
+ BranchInst::Create(SwitchBlock, OrigBlock);
+
+ // We are now done with the switch instruction, delete it.
+ BasicBlock *OldDefault = SI->getDefaultDest();
+ SI->eraseFromParent();
+
+ // If the Default block has no more predecessors just add it to DeleteList.
+ if (pred_empty(OldDefault))
+ DeleteList.insert(OldDefault);
+}
+
+bool LowerSwitch(Function &F, LazyValueInfo *LVI, AssumptionCache *AC) {
+ bool Changed = false;
+ SmallPtrSet<BasicBlock *, 8> DeleteList;
+
+ // We use make_early_inc_range here so that we don't traverse new blocks.
+ for (BasicBlock &Cur : llvm::make_early_inc_range(F)) {
+ // If the block is a dead Default block that will be deleted later, don't
+ // waste time processing it.
+ if (DeleteList.count(&Cur))
+ continue;
+
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(Cur.getTerminator())) {
+ Changed = true;
+ ProcessSwitchInst(SI, DeleteList, AC, LVI);
+ }
+ }
+
+ for (BasicBlock *BB : DeleteList) {
+ LVI->eraseBlock(BB);
+ DeleteDeadBlock(BB);
+ }
+
+ return Changed;
+}
+
+/// Replace all SwitchInst instructions with chained branch instructions.
+class LowerSwitchLegacyPass : public FunctionPass {
+public:
+ // Pass identification, replacement for typeid
+ static char ID;
+
+ LowerSwitchLegacyPass() : FunctionPass(ID) {
+ initializeLowerSwitchLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<LazyValueInfoWrapperPass>();
+ }
+};
+
+} // end anonymous namespace
+
+char LowerSwitchLegacyPass::ID = 0;
+
+// Publicly exposed interface to pass...
+char &llvm::LowerSwitchID = LowerSwitchLegacyPass::ID;
+
+INITIALIZE_PASS_BEGIN(LowerSwitchLegacyPass, "lowerswitch",
+ "Lower SwitchInst's to branches", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(LazyValueInfoWrapperPass)
+INITIALIZE_PASS_END(LowerSwitchLegacyPass, "lowerswitch",
+ "Lower SwitchInst's to branches", false, false)
+
+// createLowerSwitchPass - Interface to this file...
+FunctionPass *llvm::createLowerSwitchPass() {
+ return new LowerSwitchLegacyPass();
+}
+
+bool LowerSwitchLegacyPass::runOnFunction(Function &F) {
+ LazyValueInfo *LVI = &getAnalysis<LazyValueInfoWrapperPass>().getLVI();
+ auto *ACT = getAnalysisIfAvailable<AssumptionCacheTracker>();
+ AssumptionCache *AC = ACT ? &ACT->getAssumptionCache(F) : nullptr;
+ return LowerSwitch(F, LVI, AC);
+}
+
+PreservedAnalyses LowerSwitchPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ LazyValueInfo *LVI = &AM.getResult<LazyValueAnalysis>(F);
+ AssumptionCache *AC = AM.getCachedResult<AssumptionAnalysis>(F);
+ return LowerSwitch(F, LVI, AC) ? PreservedAnalyses::none()
+ : PreservedAnalyses::all();
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/MatrixUtils.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/MatrixUtils.cpp
new file mode 100644
index 0000000000..e218773cf5
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/MatrixUtils.cpp
@@ -0,0 +1,104 @@
+//===- MatrixUtils.cpp - Utilities to lower matrix intrinsics ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Utilities for generating tiled loops for matrix operations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/MatrixUtils.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Type.h"
+
+using namespace llvm;
+
+BasicBlock *TileInfo::CreateLoop(BasicBlock *Preheader, BasicBlock *Exit,
+ Value *Bound, Value *Step, StringRef Name,
+ IRBuilderBase &B, DomTreeUpdater &DTU, Loop *L,
+ LoopInfo &LI) {
+ LLVMContext &Ctx = Preheader->getContext();
+ BasicBlock *Header = BasicBlock::Create(
+ Preheader->getContext(), Name + ".header", Preheader->getParent(), Exit);
+ BasicBlock *Body = BasicBlock::Create(Header->getContext(), Name + ".body",
+ Header->getParent(), Exit);
+ BasicBlock *Latch = BasicBlock::Create(Header->getContext(), Name + ".latch",
+ Header->getParent(), Exit);
+
+ Type *I32Ty = Type::getInt64Ty(Ctx);
+ BranchInst::Create(Body, Header);
+ BranchInst::Create(Latch, Body);
+ PHINode *IV =
+ PHINode::Create(I32Ty, 2, Name + ".iv", Header->getTerminator());
+ IV->addIncoming(ConstantInt::get(I32Ty, 0), Preheader);
+
+ B.SetInsertPoint(Latch);
+ Value *Inc = B.CreateAdd(IV, Step, Name + ".step");
+ Value *Cond = B.CreateICmpNE(Inc, Bound, Name + ".cond");
+ BranchInst::Create(Header, Exit, Cond, Latch);
+ IV->addIncoming(Inc, Latch);
+
+ BranchInst *PreheaderBr = cast<BranchInst>(Preheader->getTerminator());
+ BasicBlock *Tmp = PreheaderBr->getSuccessor(0);
+ PreheaderBr->setSuccessor(0, Header);
+ DTU.applyUpdatesPermissive({
+ {DominatorTree::Delete, Preheader, Tmp},
+ {DominatorTree::Insert, Header, Body},
+ {DominatorTree::Insert, Body, Latch},
+ {DominatorTree::Insert, Latch, Header},
+ {DominatorTree::Insert, Latch, Exit},
+ {DominatorTree::Insert, Preheader, Header},
+ });
+
+ L->addBasicBlockToLoop(Header, LI);
+ L->addBasicBlockToLoop(Body, LI);
+ L->addBasicBlockToLoop(Latch, LI);
+ return Body;
+}
+
+// Creates the following loop nest skeleton:
+// for C = 0; C < NumColumns; C += TileSize
+// for R = 0; R < NumRows; R += TileSize
+// for K = 0; K < Inner ; K += TileSize
+BasicBlock *TileInfo::CreateTiledLoops(BasicBlock *Start, BasicBlock *End,
+ IRBuilderBase &B, DomTreeUpdater &DTU,
+ LoopInfo &LI) {
+ Loop *ColumnLoopInfo = LI.AllocateLoop();
+ Loop *RowLoopInfo = LI.AllocateLoop();
+ Loop *KLoopInfo = LI.AllocateLoop();
+ RowLoopInfo->addChildLoop(KLoopInfo);
+ ColumnLoopInfo->addChildLoop(RowLoopInfo);
+ if (Loop *ParentL = LI.getLoopFor(Start))
+ ParentL->addChildLoop(ColumnLoopInfo);
+ else
+ LI.addTopLevelLoop(ColumnLoopInfo);
+
+ BasicBlock *ColBody =
+ CreateLoop(Start, End, B.getInt64(NumColumns), B.getInt64(TileSize),
+ "cols", B, DTU, ColumnLoopInfo, LI);
+ ColumnLoop.Latch = ColBody->getSingleSuccessor();
+ BasicBlock *RowBody =
+ CreateLoop(ColBody, ColumnLoop.Latch, B.getInt64(NumRows),
+ B.getInt64(TileSize), "rows", B, DTU, RowLoopInfo, LI);
+ RowLoop.Latch = RowBody->getSingleSuccessor();
+
+ BasicBlock *InnerBody =
+ CreateLoop(RowBody, RowLoop.Latch, B.getInt64(NumInner),
+ B.getInt64(TileSize), "inner", B, DTU, KLoopInfo, LI);
+ KLoop.Latch = InnerBody->getSingleSuccessor();
+ ColumnLoop.Header = ColBody->getSinglePredecessor();
+ RowLoop.Header = RowBody->getSinglePredecessor();
+ KLoop.Header = InnerBody->getSinglePredecessor();
+ RowLoop.Index = &*RowLoop.Header->begin();
+ ColumnLoop.Index = &*ColumnLoop.Header->begin();
+ KLoop.Index = &*KLoop.Header->begin();
+
+ return InnerBody;
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/Mem2Reg.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/Mem2Reg.cpp
new file mode 100644
index 0000000000..5ad7aeb463
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/Mem2Reg.cpp
@@ -0,0 +1,116 @@
+//===- Mem2Reg.cpp - The -mem2reg pass, a wrapper around the Utils lib ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is a simple pass wrapper around the PromoteMemToReg function call
+// exposed by the Utils library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/Mem2Reg.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "mem2reg"
+
+STATISTIC(NumPromoted, "Number of alloca's promoted");
+
+static bool promoteMemoryToRegister(Function &F, DominatorTree &DT,
+ AssumptionCache &AC) {
+ std::vector<AllocaInst *> Allocas;
+ BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function
+ bool Changed = false;
+
+ while (true) {
+ Allocas.clear();
+
+ // Find allocas that are safe to promote, by looking at all instructions in
+ // the entry node
+ for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I)
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) // Is it an alloca?
+ if (isAllocaPromotable(AI))
+ Allocas.push_back(AI);
+
+ if (Allocas.empty())
+ break;
+
+ PromoteMemToReg(Allocas, DT, &AC);
+ NumPromoted += Allocas.size();
+ Changed = true;
+ }
+ return Changed;
+}
+
+PreservedAnalyses PromotePass::run(Function &F, FunctionAnalysisManager &AM) {
+ auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+ auto &AC = AM.getResult<AssumptionAnalysis>(F);
+ if (!promoteMemoryToRegister(F, DT, AC))
+ return PreservedAnalyses::all();
+
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+}
+
+namespace {
+
+struct PromoteLegacyPass : public FunctionPass {
+ // Pass identification, replacement for typeid
+ static char ID;
+
+ PromoteLegacyPass() : FunctionPass(ID) {
+ initializePromoteLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ // runOnFunction - To run this pass, first we calculate the alloca
+ // instructions that are safe for promotion, then we promote each one.
+ bool runOnFunction(Function &F) override {
+ if (skipFunction(F))
+ return false;
+
+ DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ AssumptionCache &AC =
+ getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ return promoteMemoryToRegister(F, DT, AC);
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.setPreservesCFG();
+ }
+};
+
+} // end anonymous namespace
+
+char PromoteLegacyPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(PromoteLegacyPass, "mem2reg", "Promote Memory to "
+ "Register",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(PromoteLegacyPass, "mem2reg", "Promote Memory to Register",
+ false, false)
+
+// createPromoteMemoryToRegister - Provide an entry point to create this pass.
+FunctionPass *llvm::createPromoteMemoryToRegisterPass() {
+ return new PromoteLegacyPass();
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/MemoryOpRemark.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/MemoryOpRemark.cpp
new file mode 100644
index 0000000000..899928c085
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/MemoryOpRemark.cpp
@@ -0,0 +1,410 @@
+//===-- MemoryOpRemark.cpp - Auto-init remark analysis---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the analysis for the "auto-init" remark.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/MemoryOpRemark.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include <optional>
+
+using namespace llvm;
+using namespace llvm::ore;
+
+MemoryOpRemark::~MemoryOpRemark() = default;
+
+bool MemoryOpRemark::canHandle(const Instruction *I, const TargetLibraryInfo &TLI) {
+ if (isa<StoreInst>(I))
+ return true;
+
+ if (auto *II = dyn_cast<IntrinsicInst>(I)) {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::memcpy_inline:
+ case Intrinsic::memcpy:
+ case Intrinsic::memmove:
+ case Intrinsic::memset:
+ case Intrinsic::memcpy_element_unordered_atomic:
+ case Intrinsic::memmove_element_unordered_atomic:
+ case Intrinsic::memset_element_unordered_atomic:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ if (auto *CI = dyn_cast<CallInst>(I)) {
+ auto *CF = CI->getCalledFunction();
+ if (!CF)
+ return false;
+
+ if (!CF->hasName())
+ return false;
+
+ LibFunc LF;
+ bool KnownLibCall = TLI.getLibFunc(*CF, LF) && TLI.has(LF);
+ if (!KnownLibCall)
+ return false;
+
+ switch (LF) {
+ case LibFunc_memcpy_chk:
+ case LibFunc_mempcpy_chk:
+ case LibFunc_memset_chk:
+ case LibFunc_memmove_chk:
+ case LibFunc_memcpy:
+ case LibFunc_mempcpy:
+ case LibFunc_memset:
+ case LibFunc_memmove:
+ case LibFunc_bzero:
+ case LibFunc_bcopy:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ return false;
+}
+
+void MemoryOpRemark::visit(const Instruction *I) {
+ // For some of them, we can provide more information:
+
+ // For stores:
+ // * size
+ // * volatile / atomic
+ if (auto *SI = dyn_cast<StoreInst>(I)) {
+ visitStore(*SI);
+ return;
+ }
+
+ // For intrinsics:
+ // * user-friendly name
+ // * size
+ if (auto *II = dyn_cast<IntrinsicInst>(I)) {
+ visitIntrinsicCall(*II);
+ return;
+ }
+
+ // For calls:
+ // * known/unknown function (e.g. the compiler knows bzero, but it doesn't
+ // know my_bzero)
+ // * memory operation size
+ if (auto *CI = dyn_cast<CallInst>(I)) {
+ visitCall(*CI);
+ return;
+ }
+
+ visitUnknown(*I);
+}
+
+std::string MemoryOpRemark::explainSource(StringRef Type) const {
+ return (Type + ".").str();
+}
+
+StringRef MemoryOpRemark::remarkName(RemarkKind RK) const {
+ switch (RK) {
+ case RK_Store:
+ return "MemoryOpStore";
+ case RK_Unknown:
+ return "MemoryOpUnknown";
+ case RK_IntrinsicCall:
+ return "MemoryOpIntrinsicCall";
+ case RK_Call:
+ return "MemoryOpCall";
+ }
+ llvm_unreachable("missing RemarkKind case");
+}
+
+static void inlineVolatileOrAtomicWithExtraArgs(bool *Inline, bool Volatile,
+ bool Atomic,
+ DiagnosticInfoIROptimization &R) {
+ if (Inline && *Inline)
+ R << " Inlined: " << NV("StoreInlined", true) << ".";
+ if (Volatile)
+ R << " Volatile: " << NV("StoreVolatile", true) << ".";
+ if (Atomic)
+ R << " Atomic: " << NV("StoreAtomic", true) << ".";
+ // Emit the false cases under ExtraArgs. This won't show them in the remark
+ // message but will end up in the serialized remarks.
+ if ((Inline && !*Inline) || !Volatile || !Atomic)
+ R << setExtraArgs();
+ if (Inline && !*Inline)
+ R << " Inlined: " << NV("StoreInlined", false) << ".";
+ if (!Volatile)
+ R << " Volatile: " << NV("StoreVolatile", false) << ".";
+ if (!Atomic)
+ R << " Atomic: " << NV("StoreAtomic", false) << ".";
+}
+
+static std::optional<uint64_t>
+getSizeInBytes(std::optional<uint64_t> SizeInBits) {
+ if (!SizeInBits || *SizeInBits % 8 != 0)
+ return std::nullopt;
+ return *SizeInBits / 8;
+}
+
+template<typename ...Ts>
+std::unique_ptr<DiagnosticInfoIROptimization>
+MemoryOpRemark::makeRemark(Ts... Args) {
+ switch (diagnosticKind()) {
+ case DK_OptimizationRemarkAnalysis:
+ return std::make_unique<OptimizationRemarkAnalysis>(Args...);
+ case DK_OptimizationRemarkMissed:
+ return std::make_unique<OptimizationRemarkMissed>(Args...);
+ default:
+ llvm_unreachable("unexpected DiagnosticKind");
+ }
+}
+
+void MemoryOpRemark::visitStore(const StoreInst &SI) {
+ bool Volatile = SI.isVolatile();
+ bool Atomic = SI.isAtomic();
+ int64_t Size = DL.getTypeStoreSize(SI.getOperand(0)->getType());
+
+ auto R = makeRemark(RemarkPass.data(), remarkName(RK_Store), &SI);
+ *R << explainSource("Store") << "\nStore size: " << NV("StoreSize", Size)
+ << " bytes.";
+ visitPtr(SI.getOperand(1), /*IsRead=*/false, *R);
+ inlineVolatileOrAtomicWithExtraArgs(nullptr, Volatile, Atomic, *R);
+ ORE.emit(*R);
+}
+
+void MemoryOpRemark::visitUnknown(const Instruction &I) {
+ auto R = makeRemark(RemarkPass.data(), remarkName(RK_Unknown), &I);
+ *R << explainSource("Initialization");
+ ORE.emit(*R);
+}
+
+void MemoryOpRemark::visitIntrinsicCall(const IntrinsicInst &II) {
+ SmallString<32> CallTo;
+ bool Atomic = false;
+ bool Inline = false;
+ switch (II.getIntrinsicID()) {
+ case Intrinsic::memcpy_inline:
+ CallTo = "memcpy";
+ Inline = true;
+ break;
+ case Intrinsic::memcpy:
+ CallTo = "memcpy";
+ break;
+ case Intrinsic::memmove:
+ CallTo = "memmove";
+ break;
+ case Intrinsic::memset:
+ CallTo = "memset";
+ break;
+ case Intrinsic::memcpy_element_unordered_atomic:
+ CallTo = "memcpy";
+ Atomic = true;
+ break;
+ case Intrinsic::memmove_element_unordered_atomic:
+ CallTo = "memmove";
+ Atomic = true;
+ break;
+ case Intrinsic::memset_element_unordered_atomic:
+ CallTo = "memset";
+ Atomic = true;
+ break;
+ default:
+ return visitUnknown(II);
+ }
+
+ auto R = makeRemark(RemarkPass.data(), remarkName(RK_IntrinsicCall), &II);
+ visitCallee(CallTo.str(), /*KnownLibCall=*/true, *R);
+ visitSizeOperand(II.getOperand(2), *R);
+
+ auto *CIVolatile = dyn_cast<ConstantInt>(II.getOperand(3));
+ // No such thing as a memory intrinsic that is both atomic and volatile.
+ bool Volatile = !Atomic && CIVolatile && CIVolatile->getZExtValue();
+ switch (II.getIntrinsicID()) {
+ case Intrinsic::memcpy_inline:
+ case Intrinsic::memcpy:
+ case Intrinsic::memmove:
+ case Intrinsic::memcpy_element_unordered_atomic:
+ visitPtr(II.getOperand(1), /*IsRead=*/true, *R);
+ visitPtr(II.getOperand(0), /*IsRead=*/false, *R);
+ break;
+ case Intrinsic::memset:
+ case Intrinsic::memset_element_unordered_atomic:
+ visitPtr(II.getOperand(0), /*IsRead=*/false, *R);
+ break;
+ }
+ inlineVolatileOrAtomicWithExtraArgs(&Inline, Volatile, Atomic, *R);
+ ORE.emit(*R);
+}
+
+void MemoryOpRemark::visitCall(const CallInst &CI) {
+ Function *F = CI.getCalledFunction();
+ if (!F)
+ return visitUnknown(CI);
+
+ LibFunc LF;
+ bool KnownLibCall = TLI.getLibFunc(*F, LF) && TLI.has(LF);
+ auto R = makeRemark(RemarkPass.data(), remarkName(RK_Call), &CI);
+ visitCallee(F, KnownLibCall, *R);
+ visitKnownLibCall(CI, LF, *R);
+ ORE.emit(*R);
+}
+
+template <typename FTy>
+void MemoryOpRemark::visitCallee(FTy F, bool KnownLibCall,
+ DiagnosticInfoIROptimization &R) {
+ R << "Call to ";
+ if (!KnownLibCall)
+ R << NV("UnknownLibCall", "unknown") << " function ";
+ R << NV("Callee", F) << explainSource("");
+}
+
+void MemoryOpRemark::visitKnownLibCall(const CallInst &CI, LibFunc LF,
+ DiagnosticInfoIROptimization &R) {
+ switch (LF) {
+ default:
+ return;
+ case LibFunc_memset_chk:
+ case LibFunc_memset:
+ visitSizeOperand(CI.getOperand(2), R);
+ visitPtr(CI.getOperand(0), /*IsRead=*/false, R);
+ break;
+ case LibFunc_bzero:
+ visitSizeOperand(CI.getOperand(1), R);
+ visitPtr(CI.getOperand(0), /*IsRead=*/false, R);
+ break;
+ case LibFunc_memcpy_chk:
+ case LibFunc_mempcpy_chk:
+ case LibFunc_memmove_chk:
+ case LibFunc_memcpy:
+ case LibFunc_mempcpy:
+ case LibFunc_memmove:
+ case LibFunc_bcopy:
+ visitSizeOperand(CI.getOperand(2), R);
+ visitPtr(CI.getOperand(1), /*IsRead=*/true, R);
+ visitPtr(CI.getOperand(0), /*IsRead=*/false, R);
+ break;
+ }
+}
+
+void MemoryOpRemark::visitSizeOperand(Value *V, DiagnosticInfoIROptimization &R) {
+ if (auto *Len = dyn_cast<ConstantInt>(V)) {
+ uint64_t Size = Len->getZExtValue();
+ R << " Memory operation size: " << NV("StoreSize", Size) << " bytes.";
+ }
+}
+
+static std::optional<StringRef> nameOrNone(const Value *V) {
+ if (V->hasName())
+ return V->getName();
+ return std::nullopt;
+}
+
+void MemoryOpRemark::visitVariable(const Value *V,
+ SmallVectorImpl<VariableInfo> &Result) {
+ if (auto *GV = dyn_cast<GlobalVariable>(V)) {
+ auto *Ty = GV->getValueType();
+ uint64_t Size = DL.getTypeSizeInBits(Ty).getFixedValue();
+ VariableInfo Var{nameOrNone(GV), Size};
+ if (!Var.isEmpty())
+ Result.push_back(std::move(Var));
+ return;
+ }
+
+ // If we find some information in the debug info, take that.
+ bool FoundDI = false;
+ // Try to get an llvm.dbg.declare, which has a DILocalVariable giving us the
+ // real debug info name and size of the variable.
+ for (const DbgVariableIntrinsic *DVI :
+ FindDbgAddrUses(const_cast<Value *>(V))) {
+ if (DILocalVariable *DILV = DVI->getVariable()) {
+ std::optional<uint64_t> DISize = getSizeInBytes(DILV->getSizeInBits());
+ VariableInfo Var{DILV->getName(), DISize};
+ if (!Var.isEmpty()) {
+ Result.push_back(std::move(Var));
+ FoundDI = true;
+ }
+ }
+ }
+ if (FoundDI) {
+ assert(!Result.empty());
+ return;
+ }
+
+ const auto *AI = dyn_cast<AllocaInst>(V);
+ if (!AI)
+ return;
+
+ // If not, get it from the alloca.
+ std::optional<TypeSize> TySize = AI->getAllocationSize(DL);
+ std::optional<uint64_t> Size =
+ TySize ? std::optional(TySize->getFixedValue()) : std::nullopt;
+ VariableInfo Var{nameOrNone(AI), Size};
+ if (!Var.isEmpty())
+ Result.push_back(std::move(Var));
+}
+
+void MemoryOpRemark::visitPtr(Value *Ptr, bool IsRead, DiagnosticInfoIROptimization &R) {
+ // Find if Ptr is a known variable we can give more information on.
+ SmallVector<Value *, 2> Objects;
+ getUnderlyingObjectsForCodeGen(Ptr, Objects);
+ SmallVector<VariableInfo, 2> VIs;
+ for (const Value *V : Objects)
+ visitVariable(V, VIs);
+
+ if (VIs.empty()) {
+ bool CanBeNull;
+ bool CanBeFreed;
+ uint64_t Size = Ptr->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed);
+ if (!Size)
+ return;
+ VIs.push_back({std::nullopt, Size});
+ }
+
+ R << (IsRead ? "\n Read Variables: " : "\n Written Variables: ");
+ for (unsigned i = 0; i < VIs.size(); ++i) {
+ const VariableInfo &VI = VIs[i];
+ assert(!VI.isEmpty() && "No extra content to display.");
+ if (i != 0)
+ R << ", ";
+ if (VI.Name)
+ R << NV(IsRead ? "RVarName" : "WVarName", *VI.Name);
+ else
+ R << NV(IsRead ? "RVarName" : "WVarName", "<unknown>");
+ if (VI.Size)
+ R << " (" << NV(IsRead ? "RVarSize" : "WVarSize", *VI.Size) << " bytes)";
+ }
+ R << ".";
+}
+
+bool AutoInitRemark::canHandle(const Instruction *I) {
+ if (!I->hasMetadata(LLVMContext::MD_annotation))
+ return false;
+ return any_of(I->getMetadata(LLVMContext::MD_annotation)->operands(),
+ [](const MDOperand &Op) {
+ return cast<MDString>(Op.get())->getString() == "auto-init";
+ });
+}
+
+std::string AutoInitRemark::explainSource(StringRef Type) const {
+ return (Type + " inserted by -ftrivial-auto-var-init.").str();
+}
+
+StringRef AutoInitRemark::remarkName(RemarkKind RK) const {
+ switch (RK) {
+ case RK_Store:
+ return "AutoInitStore";
+ case RK_Unknown:
+ return "AutoInitUnknownInstruction";
+ case RK_IntrinsicCall:
+ return "AutoInitIntrinsicCall";
+ case RK_Call:
+ return "AutoInitCall";
+ }
+ llvm_unreachable("missing RemarkKind case");
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/MemoryTaggingSupport.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/MemoryTaggingSupport.cpp
new file mode 100644
index 0000000000..1e42d74916
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/MemoryTaggingSupport.cpp
@@ -0,0 +1,219 @@
+//== MemoryTaggingSupport.cpp - helpers for memory tagging implementations ===//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares common infrastructure for HWAddressSanitizer and
+// Aarch64StackTagging.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/MemoryTaggingSupport.h"
+
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/Analysis/StackSafetyAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+
+namespace llvm {
+namespace memtag {
+namespace {
+bool maybeReachableFromEachOther(const SmallVectorImpl<IntrinsicInst *> &Insts,
+ const DominatorTree *DT, const LoopInfo *LI,
+ size_t MaxLifetimes) {
+ // If we have too many lifetime ends, give up, as the algorithm below is N^2.
+ if (Insts.size() > MaxLifetimes)
+ return true;
+ for (size_t I = 0; I < Insts.size(); ++I) {
+ for (size_t J = 0; J < Insts.size(); ++J) {
+ if (I == J)
+ continue;
+ if (isPotentiallyReachable(Insts[I], Insts[J], nullptr, DT, LI))
+ return true;
+ }
+ }
+ return false;
+}
+} // namespace
+
+bool forAllReachableExits(const DominatorTree &DT, const PostDominatorTree &PDT,
+ const LoopInfo &LI, const Instruction *Start,
+ const SmallVectorImpl<IntrinsicInst *> &Ends,
+ const SmallVectorImpl<Instruction *> &RetVec,
+ llvm::function_ref<void(Instruction *)> Callback) {
+ if (Ends.size() == 1 && PDT.dominates(Ends[0], Start)) {
+ Callback(Ends[0]);
+ return true;
+ }
+ SmallPtrSet<BasicBlock *, 2> EndBlocks;
+ for (auto *End : Ends) {
+ EndBlocks.insert(End->getParent());
+ }
+ SmallVector<Instruction *, 8> ReachableRetVec;
+ unsigned NumCoveredExits = 0;
+ for (auto *RI : RetVec) {
+ if (!isPotentiallyReachable(Start, RI, nullptr, &DT, &LI))
+ continue;
+ ReachableRetVec.push_back(RI);
+ // If there is an end in the same basic block as the return, we know for
+ // sure that the return is covered. Otherwise, we can check whether there
+ // is a way to reach the RI from the start of the lifetime without passing
+ // through an end.
+ if (EndBlocks.count(RI->getParent()) > 0 ||
+ !isPotentiallyReachable(Start, RI, &EndBlocks, &DT, &LI)) {
+ ++NumCoveredExits;
+ }
+ }
+ // If there's a mix of covered and non-covered exits, just put the untag
+ // on exits, so we avoid the redundancy of untagging twice.
+ if (NumCoveredExits == ReachableRetVec.size()) {
+ for (auto *End : Ends)
+ Callback(End);
+ } else {
+ for (auto *RI : ReachableRetVec)
+ Callback(RI);
+ // We may have inserted untag outside of the lifetime interval.
+ // Signal the caller to remove the lifetime end call for this alloca.
+ return false;
+ }
+ return true;
+}
+
+bool isStandardLifetime(const SmallVectorImpl<IntrinsicInst *> &LifetimeStart,
+ const SmallVectorImpl<IntrinsicInst *> &LifetimeEnd,
+ const DominatorTree *DT, const LoopInfo *LI,
+ size_t MaxLifetimes) {
+ // An alloca that has exactly one start and end in every possible execution.
+ // If it has multiple ends, they have to be unreachable from each other, so
+ // at most one of them is actually used for each execution of the function.
+ return LifetimeStart.size() == 1 &&
+ (LifetimeEnd.size() == 1 ||
+ (LifetimeEnd.size() > 0 &&
+ !maybeReachableFromEachOther(LifetimeEnd, DT, LI, MaxLifetimes)));
+}
+
+Instruction *getUntagLocationIfFunctionExit(Instruction &Inst) {
+ if (isa<ReturnInst>(Inst)) {
+ if (CallInst *CI = Inst.getParent()->getTerminatingMustTailCall())
+ return CI;
+ return &Inst;
+ }
+ if (isa<ResumeInst, CleanupReturnInst>(Inst)) {
+ return &Inst;
+ }
+ return nullptr;
+}
+
+void StackInfoBuilder::visit(Instruction &Inst) {
+ if (CallInst *CI = dyn_cast<CallInst>(&Inst)) {
+ if (CI->canReturnTwice()) {
+ Info.CallsReturnTwice = true;
+ }
+ }
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
+ if (isInterestingAlloca(*AI)) {
+ Info.AllocasToInstrument[AI].AI = AI;
+ }
+ return;
+ }
+ auto *II = dyn_cast<IntrinsicInst>(&Inst);
+ if (II && (II->getIntrinsicID() == Intrinsic::lifetime_start ||
+ II->getIntrinsicID() == Intrinsic::lifetime_end)) {
+ AllocaInst *AI = findAllocaForValue(II->getArgOperand(1));
+ if (!AI) {
+ Info.UnrecognizedLifetimes.push_back(&Inst);
+ return;
+ }
+ if (!isInterestingAlloca(*AI))
+ return;
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start)
+ Info.AllocasToInstrument[AI].LifetimeStart.push_back(II);
+ else
+ Info.AllocasToInstrument[AI].LifetimeEnd.push_back(II);
+ return;
+ }
+ if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&Inst)) {
+ for (Value *V : DVI->location_ops()) {
+ if (auto *AI = dyn_cast_or_null<AllocaInst>(V)) {
+ if (!isInterestingAlloca(*AI))
+ continue;
+ AllocaInfo &AInfo = Info.AllocasToInstrument[AI];
+ auto &DVIVec = AInfo.DbgVariableIntrinsics;
+ if (DVIVec.empty() || DVIVec.back() != DVI)
+ DVIVec.push_back(DVI);
+ }
+ }
+ }
+ Instruction *ExitUntag = getUntagLocationIfFunctionExit(Inst);
+ if (ExitUntag)
+ Info.RetVec.push_back(ExitUntag);
+}
+
+bool StackInfoBuilder::isInterestingAlloca(const AllocaInst &AI) {
+ return (AI.getAllocatedType()->isSized() &&
+ // FIXME: instrument dynamic allocas, too
+ AI.isStaticAlloca() &&
+ // alloca() may be called with 0 size, ignore it.
+ memtag::getAllocaSizeInBytes(AI) > 0 &&
+ // We are only interested in allocas not promotable to registers.
+ // Promotable allocas are common under -O0.
+ !isAllocaPromotable(&AI) &&
+ // inalloca allocas are not treated as static, and we don't want
+ // dynamic alloca instrumentation for them as well.
+ !AI.isUsedWithInAlloca() &&
+ // swifterror allocas are register promoted by ISel
+ !AI.isSwiftError()) &&
+ // safe allocas are not interesting
+ !(SSI && SSI->isSafe(AI));
+}
+
+uint64_t getAllocaSizeInBytes(const AllocaInst &AI) {
+ auto DL = AI.getModule()->getDataLayout();
+ return *AI.getAllocationSize(DL);
+}
+
+void alignAndPadAlloca(memtag::AllocaInfo &Info, llvm::Align Alignment) {
+ const Align NewAlignment = std::max(Info.AI->getAlign(), Alignment);
+ Info.AI->setAlignment(NewAlignment);
+ auto &Ctx = Info.AI->getFunction()->getContext();
+
+ uint64_t Size = getAllocaSizeInBytes(*Info.AI);
+ uint64_t AlignedSize = alignTo(Size, Alignment);
+ if (Size == AlignedSize)
+ return;
+
+ // Add padding to the alloca.
+ Type *AllocatedType =
+ Info.AI->isArrayAllocation()
+ ? ArrayType::get(
+ Info.AI->getAllocatedType(),
+ cast<ConstantInt>(Info.AI->getArraySize())->getZExtValue())
+ : Info.AI->getAllocatedType();
+ Type *PaddingType = ArrayType::get(Type::getInt8Ty(Ctx), AlignedSize - Size);
+ Type *TypeWithPadding = StructType::get(AllocatedType, PaddingType);
+ auto *NewAI = new AllocaInst(TypeWithPadding, Info.AI->getAddressSpace(),
+ nullptr, "", Info.AI);
+ NewAI->takeName(Info.AI);
+ NewAI->setAlignment(Info.AI->getAlign());
+ NewAI->setUsedWithInAlloca(Info.AI->isUsedWithInAlloca());
+ NewAI->setSwiftError(Info.AI->isSwiftError());
+ NewAI->copyMetadata(*Info.AI);
+
+ Value *NewPtr = NewAI;
+
+ // TODO: Remove when typed pointers dropped
+ if (Info.AI->getType() != NewAI->getType())
+ NewPtr = new BitCastInst(NewAI, Info.AI->getType(), "", Info.AI);
+
+ Info.AI->replaceAllUsesWith(NewPtr);
+ Info.AI->eraseFromParent();
+ Info.AI = NewAI;
+}
+
+} // namespace memtag
+} // namespace llvm
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/MetaRenamer.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/MetaRenamer.cpp
new file mode 100644
index 0000000000..0ea210671b
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/MetaRenamer.cpp
@@ -0,0 +1,251 @@
+//===- MetaRenamer.cpp - Rename everything with metasyntatic names --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass renames everything with metasyntatic names. The intent is to use
+// this pass after bugpoint reduction to conceal the nature of the original
+// program.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/MetaRenamer.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/TypeFinder.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Utils.h"
+
+using namespace llvm;
+
+static cl::opt<std::string> RenameExcludeFunctionPrefixes(
+ "rename-exclude-function-prefixes",
+ cl::desc("Prefixes for functions that don't need to be renamed, separated "
+ "by a comma"),
+ cl::Hidden);
+
+static cl::opt<std::string> RenameExcludeAliasPrefixes(
+ "rename-exclude-alias-prefixes",
+ cl::desc("Prefixes for aliases that don't need to be renamed, separated "
+ "by a comma"),
+ cl::Hidden);
+
+static cl::opt<std::string> RenameExcludeGlobalPrefixes(
+ "rename-exclude-global-prefixes",
+ cl::desc(
+ "Prefixes for global values that don't need to be renamed, separated "
+ "by a comma"),
+ cl::Hidden);
+
+static cl::opt<std::string> RenameExcludeStructPrefixes(
+ "rename-exclude-struct-prefixes",
+ cl::desc("Prefixes for structs that don't need to be renamed, separated "
+ "by a comma"),
+ cl::Hidden);
+
+static const char *const metaNames[] = {
+ // See http://en.wikipedia.org/wiki/Metasyntactic_variable
+ "foo", "bar", "baz", "quux", "barney", "snork", "zot", "blam", "hoge",
+ "wibble", "wobble", "widget", "wombat", "ham", "eggs", "pluto", "spam"
+};
+
+namespace {
+// This PRNG is from the ISO C spec. It is intentionally simple and
+// unsuitable for cryptographic use. We're just looking for enough
+// variety to surprise and delight users.
+struct PRNG {
+ unsigned long next;
+
+ void srand(unsigned int seed) { next = seed; }
+
+ int rand() {
+ next = next * 1103515245 + 12345;
+ return (unsigned int)(next / 65536) % 32768;
+ }
+};
+
+struct Renamer {
+ Renamer(unsigned int seed) { prng.srand(seed); }
+
+ const char *newName() {
+ return metaNames[prng.rand() % std::size(metaNames)];
+ }
+
+ PRNG prng;
+};
+
+static void
+parseExcludedPrefixes(StringRef PrefixesStr,
+ SmallVectorImpl<StringRef> &ExcludedPrefixes) {
+ for (;;) {
+ auto PrefixesSplit = PrefixesStr.split(',');
+ if (PrefixesSplit.first.empty())
+ break;
+ ExcludedPrefixes.push_back(PrefixesSplit.first);
+ PrefixesStr = PrefixesSplit.second;
+ }
+}
+
+void MetaRename(Function &F) {
+ for (Argument &Arg : F.args())
+ if (!Arg.getType()->isVoidTy())
+ Arg.setName("arg");
+
+ for (auto &BB : F) {
+ BB.setName("bb");
+
+ for (auto &I : BB)
+ if (!I.getType()->isVoidTy())
+ I.setName("tmp");
+ }
+}
+
+void MetaRename(Module &M,
+ function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
+ // Seed our PRNG with simple additive sum of ModuleID. We're looking to
+ // simply avoid always having the same function names, and we need to
+ // remain deterministic.
+ unsigned int randSeed = 0;
+ for (auto C : M.getModuleIdentifier())
+ randSeed += C;
+
+ Renamer renamer(randSeed);
+
+ SmallVector<StringRef, 8> ExcludedAliasesPrefixes;
+ SmallVector<StringRef, 8> ExcludedGlobalsPrefixes;
+ SmallVector<StringRef, 8> ExcludedStructsPrefixes;
+ SmallVector<StringRef, 8> ExcludedFuncPrefixes;
+ parseExcludedPrefixes(RenameExcludeAliasPrefixes, ExcludedAliasesPrefixes);
+ parseExcludedPrefixes(RenameExcludeGlobalPrefixes, ExcludedGlobalsPrefixes);
+ parseExcludedPrefixes(RenameExcludeStructPrefixes, ExcludedStructsPrefixes);
+ parseExcludedPrefixes(RenameExcludeFunctionPrefixes, ExcludedFuncPrefixes);
+
+ auto IsNameExcluded = [](StringRef &Name,
+ SmallVectorImpl<StringRef> &ExcludedPrefixes) {
+ return any_of(ExcludedPrefixes,
+ [&Name](auto &Prefix) { return Name.startswith(Prefix); });
+ };
+
+ // Rename all aliases
+ for (GlobalAlias &GA : M.aliases()) {
+ StringRef Name = GA.getName();
+ if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1) ||
+ IsNameExcluded(Name, ExcludedAliasesPrefixes))
+ continue;
+
+ GA.setName("alias");
+ }
+
+ // Rename all global variables
+ for (GlobalVariable &GV : M.globals()) {
+ StringRef Name = GV.getName();
+ if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1) ||
+ IsNameExcluded(Name, ExcludedGlobalsPrefixes))
+ continue;
+
+ GV.setName("global");
+ }
+
+ // Rename all struct types
+ TypeFinder StructTypes;
+ StructTypes.run(M, true);
+ for (StructType *STy : StructTypes) {
+ StringRef Name = STy->getName();
+ if (STy->isLiteral() || Name.empty() ||
+ IsNameExcluded(Name, ExcludedStructsPrefixes))
+ continue;
+
+ SmallString<128> NameStorage;
+ STy->setName(
+ (Twine("struct.") + renamer.newName()).toStringRef(NameStorage));
+ }
+
+ // Rename all functions
+ for (auto &F : M) {
+ StringRef Name = F.getName();
+ LibFunc Tmp;
+ // Leave library functions alone because their presence or absence could
+ // affect the behavior of other passes.
+ if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1) ||
+ GetTLI(F).getLibFunc(F, Tmp) ||
+ IsNameExcluded(Name, ExcludedFuncPrefixes))
+ continue;
+
+ // Leave @main alone. The output of -metarenamer might be passed to
+ // lli for execution and the latter needs a main entry point.
+ if (Name != "main")
+ F.setName(renamer.newName());
+
+ MetaRename(F);
+ }
+}
+
+struct MetaRenamer : public ModulePass {
+ // Pass identification, replacement for typeid
+ static char ID;
+
+ MetaRenamer() : ModulePass(ID) {
+ initializeMetaRenamerPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.setPreservesAll();
+ }
+
+ bool runOnModule(Module &M) override {
+ auto GetTLI = [this](Function &F) -> TargetLibraryInfo & {
+ return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ };
+ MetaRename(M, GetTLI);
+ return true;
+ }
+};
+
+} // end anonymous namespace
+
+char MetaRenamer::ID = 0;
+
+INITIALIZE_PASS_BEGIN(MetaRenamer, "metarenamer",
+ "Assign new names to everything", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(MetaRenamer, "metarenamer",
+ "Assign new names to everything", false, false)
+
+//===----------------------------------------------------------------------===//
+//
+// MetaRenamer - Rename everything with metasyntactic names.
+//
+ModulePass *llvm::createMetaRenamerPass() {
+ return new MetaRenamer();
+}
+
+PreservedAnalyses MetaRenamerPass::run(Module &M, ModuleAnalysisManager &AM) {
+ FunctionAnalysisManager &FAM =
+ AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
+ return FAM.getResult<TargetLibraryAnalysis>(F);
+ };
+ MetaRename(M, GetTLI);
+
+ return PreservedAnalyses::all();
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/MisExpect.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/MisExpect.cpp
new file mode 100644
index 0000000000..6f5a25a268
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/MisExpect.cpp
@@ -0,0 +1,214 @@
+//===--- MisExpect.cpp - Check the use of llvm.expect with PGO data -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This contains code to emit warnings for potentially incorrect usage of the
+// llvm.expect intrinsic. This utility extracts the threshold values from
+// metadata associated with the instrumented Branch or Switch instruction. The
+// threshold values are then used to determine if a warning should be emmited.
+//
+// MisExpect's implementation relies on two assumptions about how branch weights
+// are managed in LLVM.
+//
+// 1) Frontend profiling weights are always in place before llvm.expect is
+// lowered in LowerExpectIntrinsic.cpp. Frontend based instrumentation therefore
+// needs to extract the branch weights and then compare them to the weights
+// being added by the llvm.expect intrinsic lowering.
+//
+// 2) Sampling and IR based profiles will *only* have branch weight metadata
+// before profiling data is consulted if they are from a lowered llvm.expect
+// intrinsic. These profiles thus always extract the expected weights and then
+// compare them to the weights collected during profiling to determine if a
+// diagnostic message is warranted.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/MisExpect.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/ProfDataUtils.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/FormatVariadic.h"
+#include <algorithm>
+#include <cstdint>
+#include <functional>
+#include <numeric>
+
+#define DEBUG_TYPE "misexpect"
+
+using namespace llvm;
+using namespace misexpect;
+
+namespace llvm {
+
+// Command line option to enable/disable the warning when profile data suggests
+// a mismatch with the use of the llvm.expect intrinsic
+static cl::opt<bool> PGOWarnMisExpect(
+ "pgo-warn-misexpect", cl::init(false), cl::Hidden,
+ cl::desc("Use this option to turn on/off "
+ "warnings about incorrect usage of llvm.expect intrinsics."));
+
+static cl::opt<uint32_t> MisExpectTolerance(
+ "misexpect-tolerance", cl::init(0),
+ cl::desc("Prevents emiting diagnostics when profile counts are "
+ "within N% of the threshold.."));
+
+} // namespace llvm
+
+namespace {
+
+bool isMisExpectDiagEnabled(LLVMContext &Ctx) {
+ return PGOWarnMisExpect || Ctx.getMisExpectWarningRequested();
+}
+
+uint32_t getMisExpectTolerance(LLVMContext &Ctx) {
+ return std::max(static_cast<uint32_t>(MisExpectTolerance),
+ Ctx.getDiagnosticsMisExpectTolerance());
+}
+
+Instruction *getInstCondition(Instruction *I) {
+ assert(I != nullptr && "MisExpect target Instruction cannot be nullptr");
+ Instruction *Ret = nullptr;
+ if (auto *B = dyn_cast<BranchInst>(I)) {
+ Ret = dyn_cast<Instruction>(B->getCondition());
+ }
+ // TODO: Find a way to resolve condition location for switches
+ // Using the condition of the switch seems to often resolve to an earlier
+ // point in the program, i.e. the calculation of the switch condition, rather
+ // than the switch's location in the source code. Thus, we should use the
+ // instruction to get source code locations rather than the condition to
+ // improve diagnostic output, such as the caret. If the same problem exists
+ // for branch instructions, then we should remove this function and directly
+ // use the instruction
+ //
+ else if (auto *S = dyn_cast<SwitchInst>(I)) {
+ Ret = dyn_cast<Instruction>(S->getCondition());
+ }
+ return Ret ? Ret : I;
+}
+
+void emitMisexpectDiagnostic(Instruction *I, LLVMContext &Ctx,
+ uint64_t ProfCount, uint64_t TotalCount) {
+ double PercentageCorrect = (double)ProfCount / TotalCount;
+ auto PerString =
+ formatv("{0:P} ({1} / {2})", PercentageCorrect, ProfCount, TotalCount);
+ auto RemStr = formatv(
+ "Potential performance regression from use of the llvm.expect intrinsic: "
+ "Annotation was correct on {0} of profiled executions.",
+ PerString);
+ Twine Msg(PerString);
+ Instruction *Cond = getInstCondition(I);
+ if (isMisExpectDiagEnabled(Ctx))
+ Ctx.diagnose(DiagnosticInfoMisExpect(Cond, Msg));
+ OptimizationRemarkEmitter ORE(I->getParent()->getParent());
+ ORE.emit(OptimizationRemark(DEBUG_TYPE, "misexpect", Cond) << RemStr.str());
+}
+
+} // namespace
+
+namespace llvm {
+namespace misexpect {
+
+void verifyMisExpect(Instruction &I, ArrayRef<uint32_t> RealWeights,
+ ArrayRef<uint32_t> ExpectedWeights) {
+ // To determine if we emit a diagnostic, we need to compare the branch weights
+ // from the profile to those added by the llvm.expect intrinsic.
+ // So first, we extract the "likely" and "unlikely" weights from
+ // ExpectedWeights And determine the correct weight in the profile to compare
+ // against.
+ uint64_t LikelyBranchWeight = 0,
+ UnlikelyBranchWeight = std::numeric_limits<uint32_t>::max();
+ size_t MaxIndex = 0;
+ for (size_t Idx = 0, End = ExpectedWeights.size(); Idx < End; Idx++) {
+ uint32_t V = ExpectedWeights[Idx];
+ if (LikelyBranchWeight < V) {
+ LikelyBranchWeight = V;
+ MaxIndex = Idx;
+ }
+ if (UnlikelyBranchWeight > V) {
+ UnlikelyBranchWeight = V;
+ }
+ }
+
+ const uint64_t ProfiledWeight = RealWeights[MaxIndex];
+ const uint64_t RealWeightsTotal =
+ std::accumulate(RealWeights.begin(), RealWeights.end(), (uint64_t)0,
+ std::plus<uint64_t>());
+ const uint64_t NumUnlikelyTargets = RealWeights.size() - 1;
+
+ uint64_t TotalBranchWeight =
+ LikelyBranchWeight + (UnlikelyBranchWeight * NumUnlikelyTargets);
+
+ // FIXME: When we've addressed sample profiling, restore the assertion
+ //
+ // We cannot calculate branch probability if either of these invariants aren't
+ // met. However, MisExpect diagnostics should not prevent code from compiling,
+ // so we simply forgo emitting diagnostics here, and return early.
+ // assert((TotalBranchWeight >= LikelyBranchWeight) && (TotalBranchWeight > 0)
+ // && "TotalBranchWeight is less than the Likely branch weight");
+ if ((TotalBranchWeight == 0) || (TotalBranchWeight <= LikelyBranchWeight))
+ return;
+
+ // To determine our threshold value we need to obtain the branch probability
+ // for the weights added by llvm.expect and use that proportion to calculate
+ // our threshold based on the collected profile data.
+ auto LikelyProbablilty = BranchProbability::getBranchProbability(
+ LikelyBranchWeight, TotalBranchWeight);
+
+ uint64_t ScaledThreshold = LikelyProbablilty.scale(RealWeightsTotal);
+
+ // clamp tolerance range to [0, 100)
+ auto Tolerance = getMisExpectTolerance(I.getContext());
+ Tolerance = std::clamp(Tolerance, 0u, 99u);
+
+ // Allow users to relax checking by N% i.e., if they use a 5% tolerance,
+ // then we check against 0.95*ScaledThreshold
+ if (Tolerance > 0)
+ ScaledThreshold *= (1.0 - Tolerance / 100.0);
+
+ // When the profile weight is below the threshold, we emit the diagnostic
+ if (ProfiledWeight < ScaledThreshold)
+ emitMisexpectDiagnostic(&I, I.getContext(), ProfiledWeight,
+ RealWeightsTotal);
+}
+
+void checkBackendInstrumentation(Instruction &I,
+ const ArrayRef<uint32_t> RealWeights) {
+ SmallVector<uint32_t> ExpectedWeights;
+ if (!extractBranchWeights(I, ExpectedWeights))
+ return;
+ verifyMisExpect(I, RealWeights, ExpectedWeights);
+}
+
+void checkFrontendInstrumentation(Instruction &I,
+ const ArrayRef<uint32_t> ExpectedWeights) {
+ SmallVector<uint32_t> RealWeights;
+ if (!extractBranchWeights(I, RealWeights))
+ return;
+ verifyMisExpect(I, RealWeights, ExpectedWeights);
+}
+
+void checkExpectAnnotations(Instruction &I,
+ const ArrayRef<uint32_t> ExistingWeights,
+ bool IsFrontend) {
+ if (IsFrontend) {
+ checkFrontendInstrumentation(I, ExistingWeights);
+ } else {
+ checkBackendInstrumentation(I, ExistingWeights);
+ }
+}
+
+} // namespace misexpect
+} // namespace llvm
+#undef DEBUG_TYPE
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/ModuleUtils.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/ModuleUtils.cpp
new file mode 100644
index 0000000000..6d17a46695
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/ModuleUtils.cpp
@@ -0,0 +1,475 @@
+//===-- ModuleUtils.cpp - Functions to manipulate Modules -----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This family of functions perform manipulations on Modules.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/xxhash.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "moduleutils"
+
+static void appendToGlobalArray(StringRef ArrayName, Module &M, Function *F,
+ int Priority, Constant *Data) {
+ IRBuilder<> IRB(M.getContext());
+ FunctionType *FnTy = FunctionType::get(IRB.getVoidTy(), false);
+
+ // Get the current set of static global constructors and add the new ctor
+ // to the list.
+ SmallVector<Constant *, 16> CurrentCtors;
+ StructType *EltTy = StructType::get(
+ IRB.getInt32Ty(), PointerType::get(FnTy, F->getAddressSpace()),
+ IRB.getInt8PtrTy());
+
+ if (GlobalVariable *GVCtor = M.getNamedGlobal(ArrayName)) {
+ if (Constant *Init = GVCtor->getInitializer()) {
+ unsigned n = Init->getNumOperands();
+ CurrentCtors.reserve(n + 1);
+ for (unsigned i = 0; i != n; ++i)
+ CurrentCtors.push_back(cast<Constant>(Init->getOperand(i)));
+ }
+ GVCtor->eraseFromParent();
+ }
+
+ // Build a 3 field global_ctor entry. We don't take a comdat key.
+ Constant *CSVals[3];
+ CSVals[0] = IRB.getInt32(Priority);
+ CSVals[1] = F;
+ CSVals[2] = Data ? ConstantExpr::getPointerCast(Data, IRB.getInt8PtrTy())
+ : Constant::getNullValue(IRB.getInt8PtrTy());
+ Constant *RuntimeCtorInit =
+ ConstantStruct::get(EltTy, ArrayRef(CSVals, EltTy->getNumElements()));
+
+ CurrentCtors.push_back(RuntimeCtorInit);
+
+ // Create a new initializer.
+ ArrayType *AT = ArrayType::get(EltTy, CurrentCtors.size());
+ Constant *NewInit = ConstantArray::get(AT, CurrentCtors);
+
+ // Create the new global variable and replace all uses of
+ // the old global variable with the new one.
+ (void)new GlobalVariable(M, NewInit->getType(), false,
+ GlobalValue::AppendingLinkage, NewInit, ArrayName);
+}
+
+void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority, Constant *Data) {
+ appendToGlobalArray("llvm.global_ctors", M, F, Priority, Data);
+}
+
+void llvm::appendToGlobalDtors(Module &M, Function *F, int Priority, Constant *Data) {
+ appendToGlobalArray("llvm.global_dtors", M, F, Priority, Data);
+}
+
+static void collectUsedGlobals(GlobalVariable *GV,
+ SmallSetVector<Constant *, 16> &Init) {
+ if (!GV || !GV->hasInitializer())
+ return;
+
+ auto *CA = cast<ConstantArray>(GV->getInitializer());
+ for (Use &Op : CA->operands())
+ Init.insert(cast<Constant>(Op));
+}
+
+static void appendToUsedList(Module &M, StringRef Name, ArrayRef<GlobalValue *> Values) {
+ GlobalVariable *GV = M.getGlobalVariable(Name);
+
+ SmallSetVector<Constant *, 16> Init;
+ collectUsedGlobals(GV, Init);
+ if (GV)
+ GV->eraseFromParent();
+
+ Type *ArrayEltTy = llvm::Type::getInt8PtrTy(M.getContext());
+ for (auto *V : Values)
+ Init.insert(ConstantExpr::getPointerBitCastOrAddrSpaceCast(V, ArrayEltTy));
+
+ if (Init.empty())
+ return;
+
+ ArrayType *ATy = ArrayType::get(ArrayEltTy, Init.size());
+ GV = new llvm::GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage,
+ ConstantArray::get(ATy, Init.getArrayRef()),
+ Name);
+ GV->setSection("llvm.metadata");
+}
+
+void llvm::appendToUsed(Module &M, ArrayRef<GlobalValue *> Values) {
+ appendToUsedList(M, "llvm.used", Values);
+}
+
+void llvm::appendToCompilerUsed(Module &M, ArrayRef<GlobalValue *> Values) {
+ appendToUsedList(M, "llvm.compiler.used", Values);
+}
+
+static void removeFromUsedList(Module &M, StringRef Name,
+ function_ref<bool(Constant *)> ShouldRemove) {
+ GlobalVariable *GV = M.getNamedGlobal(Name);
+ if (!GV)
+ return;
+
+ SmallSetVector<Constant *, 16> Init;
+ collectUsedGlobals(GV, Init);
+
+ Type *ArrayEltTy = cast<ArrayType>(GV->getValueType())->getElementType();
+
+ SmallVector<Constant *, 16> NewInit;
+ for (Constant *MaybeRemoved : Init) {
+ if (!ShouldRemove(MaybeRemoved->stripPointerCasts()))
+ NewInit.push_back(MaybeRemoved);
+ }
+
+ if (!NewInit.empty()) {
+ ArrayType *ATy = ArrayType::get(ArrayEltTy, NewInit.size());
+ GlobalVariable *NewGV =
+ new GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage,
+ ConstantArray::get(ATy, NewInit), "", GV,
+ GV->getThreadLocalMode(), GV->getAddressSpace());
+ NewGV->setSection(GV->getSection());
+ NewGV->takeName(GV);
+ }
+
+ GV->eraseFromParent();
+}
+
+void llvm::removeFromUsedLists(Module &M,
+ function_ref<bool(Constant *)> ShouldRemove) {
+ removeFromUsedList(M, "llvm.used", ShouldRemove);
+ removeFromUsedList(M, "llvm.compiler.used", ShouldRemove);
+}
+
+void llvm::setKCFIType(Module &M, Function &F, StringRef MangledType) {
+ if (!M.getModuleFlag("kcfi"))
+ return;
+ // Matches CodeGenModule::CreateKCFITypeId in Clang.
+ LLVMContext &Ctx = M.getContext();
+ MDBuilder MDB(Ctx);
+ F.setMetadata(
+ LLVMContext::MD_kcfi_type,
+ MDNode::get(Ctx, MDB.createConstant(ConstantInt::get(
+ Type::getInt32Ty(Ctx),
+ static_cast<uint32_t>(xxHash64(MangledType))))));
+ // If the module was compiled with -fpatchable-function-entry, ensure
+ // we use the same patchable-function-prefix.
+ if (auto *MD = mdconst::extract_or_null<ConstantInt>(
+ M.getModuleFlag("kcfi-offset"))) {
+ if (unsigned Offset = MD->getZExtValue())
+ F.addFnAttr("patchable-function-prefix", std::to_string(Offset));
+ }
+}
+
+FunctionCallee llvm::declareSanitizerInitFunction(Module &M, StringRef InitName,
+ ArrayRef<Type *> InitArgTypes,
+ bool Weak) {
+ assert(!InitName.empty() && "Expected init function name");
+ auto *VoidTy = Type::getVoidTy(M.getContext());
+ auto *FnTy = FunctionType::get(VoidTy, InitArgTypes, false);
+ auto FnCallee = M.getOrInsertFunction(InitName, FnTy);
+ auto *Fn = cast<Function>(FnCallee.getCallee());
+ if (Weak && Fn->isDeclaration())
+ Fn->setLinkage(Function::ExternalWeakLinkage);
+ return FnCallee;
+}
+
+Function *llvm::createSanitizerCtor(Module &M, StringRef CtorName) {
+ Function *Ctor = Function::createWithDefaultAttr(
+ FunctionType::get(Type::getVoidTy(M.getContext()), false),
+ GlobalValue::InternalLinkage, M.getDataLayout().getProgramAddressSpace(),
+ CtorName, &M);
+ Ctor->addFnAttr(Attribute::NoUnwind);
+ setKCFIType(M, *Ctor, "_ZTSFvvE"); // void (*)(void)
+ BasicBlock *CtorBB = BasicBlock::Create(M.getContext(), "", Ctor);
+ ReturnInst::Create(M.getContext(), CtorBB);
+ // Ensure Ctor cannot be discarded, even if in a comdat.
+ appendToUsed(M, {Ctor});
+ return Ctor;
+}
+
+std::pair<Function *, FunctionCallee> llvm::createSanitizerCtorAndInitFunctions(
+ Module &M, StringRef CtorName, StringRef InitName,
+ ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
+ StringRef VersionCheckName, bool Weak) {
+ assert(!InitName.empty() && "Expected init function name");
+ assert(InitArgs.size() == InitArgTypes.size() &&
+ "Sanitizer's init function expects different number of arguments");
+ FunctionCallee InitFunction =
+ declareSanitizerInitFunction(M, InitName, InitArgTypes, Weak);
+ Function *Ctor = createSanitizerCtor(M, CtorName);
+ IRBuilder<> IRB(M.getContext());
+
+ BasicBlock *RetBB = &Ctor->getEntryBlock();
+ if (Weak) {
+ RetBB->setName("ret");
+ auto *EntryBB = BasicBlock::Create(M.getContext(), "entry", Ctor, RetBB);
+ auto *CallInitBB =
+ BasicBlock::Create(M.getContext(), "callfunc", Ctor, RetBB);
+ auto *InitFn = cast<Function>(InitFunction.getCallee());
+ auto *InitFnPtr =
+ PointerType::get(InitFn->getType(), InitFn->getAddressSpace());
+ IRB.SetInsertPoint(EntryBB);
+ Value *InitNotNull =
+ IRB.CreateICmpNE(InitFn, ConstantPointerNull::get(InitFnPtr));
+ IRB.CreateCondBr(InitNotNull, CallInitBB, RetBB);
+ IRB.SetInsertPoint(CallInitBB);
+ } else {
+ IRB.SetInsertPoint(RetBB->getTerminator());
+ }
+
+ IRB.CreateCall(InitFunction, InitArgs);
+ if (!VersionCheckName.empty()) {
+ FunctionCallee VersionCheckFunction = M.getOrInsertFunction(
+ VersionCheckName, FunctionType::get(IRB.getVoidTy(), {}, false),
+ AttributeList());
+ IRB.CreateCall(VersionCheckFunction, {});
+ }
+
+ if (Weak)
+ IRB.CreateBr(RetBB);
+
+ return std::make_pair(Ctor, InitFunction);
+}
+
+std::pair<Function *, FunctionCallee>
+llvm::getOrCreateSanitizerCtorAndInitFunctions(
+ Module &M, StringRef CtorName, StringRef InitName,
+ ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
+ function_ref<void(Function *, FunctionCallee)> FunctionsCreatedCallback,
+ StringRef VersionCheckName, bool Weak) {
+ assert(!CtorName.empty() && "Expected ctor function name");
+
+ if (Function *Ctor = M.getFunction(CtorName))
+ // FIXME: Sink this logic into the module, similar to the handling of
+ // globals. This will make moving to a concurrent model much easier.
+ if (Ctor->arg_empty() ||
+ Ctor->getReturnType() == Type::getVoidTy(M.getContext()))
+ return {Ctor,
+ declareSanitizerInitFunction(M, InitName, InitArgTypes, Weak)};
+
+ Function *Ctor;
+ FunctionCallee InitFunction;
+ std::tie(Ctor, InitFunction) = llvm::createSanitizerCtorAndInitFunctions(
+ M, CtorName, InitName, InitArgTypes, InitArgs, VersionCheckName, Weak);
+ FunctionsCreatedCallback(Ctor, InitFunction);
+ return std::make_pair(Ctor, InitFunction);
+}
+
+void llvm::filterDeadComdatFunctions(
+ SmallVectorImpl<Function *> &DeadComdatFunctions) {
+ SmallPtrSet<Function *, 32> MaybeDeadFunctions;
+ SmallPtrSet<Comdat *, 32> MaybeDeadComdats;
+ for (Function *F : DeadComdatFunctions) {
+ MaybeDeadFunctions.insert(F);
+ if (Comdat *C = F->getComdat())
+ MaybeDeadComdats.insert(C);
+ }
+
+ // Find comdats for which all users are dead now.
+ SmallPtrSet<Comdat *, 32> DeadComdats;
+ for (Comdat *C : MaybeDeadComdats) {
+ auto IsUserDead = [&](GlobalObject *GO) {
+ auto *F = dyn_cast<Function>(GO);
+ return F && MaybeDeadFunctions.contains(F);
+ };
+ if (all_of(C->getUsers(), IsUserDead))
+ DeadComdats.insert(C);
+ }
+
+ // Only keep functions which have no comdat or a dead comdat.
+ erase_if(DeadComdatFunctions, [&](Function *F) {
+ Comdat *C = F->getComdat();
+ return C && !DeadComdats.contains(C);
+ });
+}
+
+std::string llvm::getUniqueModuleId(Module *M) {
+ MD5 Md5;
+ bool ExportsSymbols = false;
+ auto AddGlobal = [&](GlobalValue &GV) {
+ if (GV.isDeclaration() || GV.getName().startswith("llvm.") ||
+ !GV.hasExternalLinkage() || GV.hasComdat())
+ return;
+ ExportsSymbols = true;
+ Md5.update(GV.getName());
+ Md5.update(ArrayRef<uint8_t>{0});
+ };
+
+ for (auto &F : *M)
+ AddGlobal(F);
+ for (auto &GV : M->globals())
+ AddGlobal(GV);
+ for (auto &GA : M->aliases())
+ AddGlobal(GA);
+ for (auto &IF : M->ifuncs())
+ AddGlobal(IF);
+
+ if (!ExportsSymbols)
+ return "";
+
+ MD5::MD5Result R;
+ Md5.final(R);
+
+ SmallString<32> Str;
+ MD5::stringifyResult(R, Str);
+ return ("." + Str).str();
+}
+
+void VFABI::setVectorVariantNames(CallInst *CI,
+ ArrayRef<std::string> VariantMappings) {
+ if (VariantMappings.empty())
+ return;
+
+ SmallString<256> Buffer;
+ llvm::raw_svector_ostream Out(Buffer);
+ for (const std::string &VariantMapping : VariantMappings)
+ Out << VariantMapping << ",";
+ // Get rid of the trailing ','.
+ assert(!Buffer.str().empty() && "Must have at least one char.");
+ Buffer.pop_back();
+
+ Module *M = CI->getModule();
+#ifndef NDEBUG
+ for (const std::string &VariantMapping : VariantMappings) {
+ LLVM_DEBUG(dbgs() << "VFABI: adding mapping '" << VariantMapping << "'\n");
+ std::optional<VFInfo> VI = VFABI::tryDemangleForVFABI(VariantMapping, *M);
+ assert(VI && "Cannot add an invalid VFABI name.");
+ assert(M->getNamedValue(VI->VectorName) &&
+ "Cannot add variant to attribute: "
+ "vector function declaration is missing.");
+ }
+#endif
+ CI->addFnAttr(
+ Attribute::get(M->getContext(), MappingsAttrName, Buffer.str()));
+}
+
+void llvm::embedBufferInModule(Module &M, MemoryBufferRef Buf,
+ StringRef SectionName, Align Alignment) {
+ // Embed the memory buffer into the module.
+ Constant *ModuleConstant = ConstantDataArray::get(
+ M.getContext(), ArrayRef(Buf.getBufferStart(), Buf.getBufferSize()));
+ GlobalVariable *GV = new GlobalVariable(
+ M, ModuleConstant->getType(), true, GlobalValue::PrivateLinkage,
+ ModuleConstant, "llvm.embedded.object");
+ GV->setSection(SectionName);
+ GV->setAlignment(Alignment);
+
+ LLVMContext &Ctx = M.getContext();
+ NamedMDNode *MD = M.getOrInsertNamedMetadata("llvm.embedded.objects");
+ Metadata *MDVals[] = {ConstantAsMetadata::get(GV),
+ MDString::get(Ctx, SectionName)};
+
+ MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
+ GV->setMetadata(LLVMContext::MD_exclude, llvm::MDNode::get(Ctx, {}));
+
+ appendToCompilerUsed(M, GV);
+}
+
+bool llvm::lowerGlobalIFuncUsersAsGlobalCtor(
+ Module &M, ArrayRef<GlobalIFunc *> FilteredIFuncsToLower) {
+ SmallVector<GlobalIFunc *, 32> AllIFuncs;
+ ArrayRef<GlobalIFunc *> IFuncsToLower = FilteredIFuncsToLower;
+ if (FilteredIFuncsToLower.empty()) { // Default to lowering all ifuncs
+ for (GlobalIFunc &GI : M.ifuncs())
+ AllIFuncs.push_back(&GI);
+ IFuncsToLower = AllIFuncs;
+ }
+
+ bool UnhandledUsers = false;
+ LLVMContext &Ctx = M.getContext();
+ const DataLayout &DL = M.getDataLayout();
+
+ PointerType *TableEntryTy =
+ Ctx.supportsTypedPointers()
+ ? PointerType::get(Type::getInt8Ty(Ctx), DL.getProgramAddressSpace())
+ : PointerType::get(Ctx, DL.getProgramAddressSpace());
+
+ ArrayType *FuncPtrTableTy =
+ ArrayType::get(TableEntryTy, IFuncsToLower.size());
+
+ Align PtrAlign = DL.getABITypeAlign(TableEntryTy);
+
+ // Create a global table of function pointers we'll initialize in a global
+ // constructor.
+ auto *FuncPtrTable = new GlobalVariable(
+ M, FuncPtrTableTy, false, GlobalValue::InternalLinkage,
+ PoisonValue::get(FuncPtrTableTy), "", nullptr,
+ GlobalVariable::NotThreadLocal, DL.getDefaultGlobalsAddressSpace());
+ FuncPtrTable->setAlignment(PtrAlign);
+
+ // Create a function to initialize the function pointer table.
+ Function *NewCtor = Function::Create(
+ FunctionType::get(Type::getVoidTy(Ctx), false), Function::InternalLinkage,
+ DL.getProgramAddressSpace(), "", &M);
+
+ BasicBlock *BB = BasicBlock::Create(Ctx, "", NewCtor);
+ IRBuilder<> InitBuilder(BB);
+
+ size_t TableIndex = 0;
+ for (GlobalIFunc *GI : IFuncsToLower) {
+ Function *ResolvedFunction = GI->getResolverFunction();
+
+ // We don't know what to pass to a resolver function taking arguments
+ //
+ // FIXME: Is this even valid? clang and gcc don't complain but this
+ // probably should be invalid IR. We could just pass through undef.
+ if (!std::empty(ResolvedFunction->getFunctionType()->params())) {
+ LLVM_DEBUG(dbgs() << "Not lowering ifunc resolver function "
+ << ResolvedFunction->getName() << " with parameters\n");
+ UnhandledUsers = true;
+ continue;
+ }
+
+ // Initialize the function pointer table.
+ CallInst *ResolvedFunc = InitBuilder.CreateCall(ResolvedFunction);
+ Value *Casted = InitBuilder.CreatePointerCast(ResolvedFunc, TableEntryTy);
+ Constant *GEP = cast<Constant>(InitBuilder.CreateConstInBoundsGEP2_32(
+ FuncPtrTableTy, FuncPtrTable, 0, TableIndex++));
+ InitBuilder.CreateAlignedStore(Casted, GEP, PtrAlign);
+
+ // Update all users to load a pointer from the global table.
+ for (User *User : make_early_inc_range(GI->users())) {
+ Instruction *UserInst = dyn_cast<Instruction>(User);
+ if (!UserInst) {
+ // TODO: Should handle constantexpr casts in user instructions. Probably
+ // can't do much about constant initializers.
+ UnhandledUsers = true;
+ continue;
+ }
+
+ IRBuilder<> UseBuilder(UserInst);
+ LoadInst *ResolvedTarget =
+ UseBuilder.CreateAlignedLoad(TableEntryTy, GEP, PtrAlign);
+ Value *ResolvedCast =
+ UseBuilder.CreatePointerCast(ResolvedTarget, GI->getType());
+ UserInst->replaceUsesOfWith(GI, ResolvedCast);
+ }
+
+ // If we handled all users, erase the ifunc.
+ if (GI->use_empty())
+ GI->eraseFromParent();
+ }
+
+ InitBuilder.CreateRetVoid();
+
+ PointerType *ConstantDataTy = Ctx.supportsTypedPointers()
+ ? PointerType::get(Type::getInt8Ty(Ctx), 0)
+ : PointerType::get(Ctx, 0);
+
+ // TODO: Is this the right priority? Probably should be before any other
+ // constructors?
+ const int Priority = 10;
+ appendToGlobalCtors(M, NewCtor, Priority,
+ ConstantPointerNull::get(ConstantDataTy));
+ return UnhandledUsers;
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/NameAnonGlobals.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/NameAnonGlobals.cpp
new file mode 100644
index 0000000000..d4ab450406
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/NameAnonGlobals.cpp
@@ -0,0 +1,90 @@
+//===- NameAnonGlobals.cpp - ThinLTO Support: Name Unnamed Globals --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements naming anonymous globals to make sure they can be
+// referred to by ThinLTO.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/NameAnonGlobals.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/MD5.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+using namespace llvm;
+
+namespace {
+// Compute a "unique" hash for the module based on the name of the public
+// globals.
+class ModuleHasher {
+ Module &TheModule;
+ std::string TheHash;
+
+public:
+ ModuleHasher(Module &M) : TheModule(M) {}
+
+ /// Return the lazily computed hash.
+ std::string &get() {
+ if (!TheHash.empty())
+ // Cache hit :)
+ return TheHash;
+
+ MD5 Hasher;
+ for (auto &F : TheModule) {
+ if (F.isDeclaration() || F.hasLocalLinkage() || !F.hasName())
+ continue;
+ auto Name = F.getName();
+ Hasher.update(Name);
+ }
+ for (auto &GV : TheModule.globals()) {
+ if (GV.isDeclaration() || GV.hasLocalLinkage() || !GV.hasName())
+ continue;
+ auto Name = GV.getName();
+ Hasher.update(Name);
+ }
+
+ // Now return the result.
+ MD5::MD5Result Hash;
+ Hasher.final(Hash);
+ SmallString<32> Result;
+ MD5::stringifyResult(Hash, Result);
+ TheHash = std::string(Result.str());
+ return TheHash;
+ }
+};
+} // end anonymous namespace
+
+// Rename all the anon globals in the module
+bool llvm::nameUnamedGlobals(Module &M) {
+ bool Changed = false;
+ ModuleHasher ModuleHash(M);
+ int count = 0;
+ auto RenameIfNeed = [&](GlobalValue &GV) {
+ if (GV.hasName())
+ return;
+ GV.setName(Twine("anon.") + ModuleHash.get() + "." + Twine(count++));
+ Changed = true;
+ };
+ for (auto &GO : M.global_objects())
+ RenameIfNeed(GO);
+ for (auto &GA : M.aliases())
+ RenameIfNeed(GA);
+
+ return Changed;
+}
+
+PreservedAnalyses NameAnonGlobalPass::run(Module &M,
+ ModuleAnalysisManager &AM) {
+ if (!nameUnamedGlobals(M))
+ return PreservedAnalyses::all();
+
+ return PreservedAnalyses::none();
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/PredicateInfo.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/PredicateInfo.cpp
new file mode 100644
index 0000000000..1f16ba78bd
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/PredicateInfo.cpp
@@ -0,0 +1,948 @@
+//===-- PredicateInfo.cpp - PredicateInfo Builder--------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------===//
+//
+// This file implements the PredicateInfo class.
+//
+//===----------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/PredicateInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/IR/AssemblyAnnotationWriter.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/DebugCounter.h"
+#include "llvm/Support/FormattedStream.h"
+#include <algorithm>
+#define DEBUG_TYPE "predicateinfo"
+using namespace llvm;
+using namespace PatternMatch;
+
+INITIALIZE_PASS_BEGIN(PredicateInfoPrinterLegacyPass, "print-predicateinfo",
+ "PredicateInfo Printer", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_END(PredicateInfoPrinterLegacyPass, "print-predicateinfo",
+ "PredicateInfo Printer", false, false)
+static cl::opt<bool> VerifyPredicateInfo(
+ "verify-predicateinfo", cl::init(false), cl::Hidden,
+ cl::desc("Verify PredicateInfo in legacy printer pass."));
+DEBUG_COUNTER(RenameCounter, "predicateinfo-rename",
+ "Controls which variables are renamed with predicateinfo");
+
+// Maximum number of conditions considered for renaming for each branch/assume.
+// This limits renaming of deep and/or chains.
+static const unsigned MaxCondsPerBranch = 8;
+
+namespace {
+// Given a predicate info that is a type of branching terminator, get the
+// branching block.
+const BasicBlock *getBranchBlock(const PredicateBase *PB) {
+ assert(isa<PredicateWithEdge>(PB) &&
+ "Only branches and switches should have PHIOnly defs that "
+ "require branch blocks.");
+ return cast<PredicateWithEdge>(PB)->From;
+}
+
+// Given a predicate info that is a type of branching terminator, get the
+// branching terminator.
+static Instruction *getBranchTerminator(const PredicateBase *PB) {
+ assert(isa<PredicateWithEdge>(PB) &&
+ "Not a predicate info type we know how to get a terminator from.");
+ return cast<PredicateWithEdge>(PB)->From->getTerminator();
+}
+
+// Given a predicate info that is a type of branching terminator, get the
+// edge this predicate info represents
+std::pair<BasicBlock *, BasicBlock *> getBlockEdge(const PredicateBase *PB) {
+ assert(isa<PredicateWithEdge>(PB) &&
+ "Not a predicate info type we know how to get an edge from.");
+ const auto *PEdge = cast<PredicateWithEdge>(PB);
+ return std::make_pair(PEdge->From, PEdge->To);
+}
+}
+
+namespace llvm {
+enum LocalNum {
+ // Operations that must appear first in the block.
+ LN_First,
+ // Operations that are somewhere in the middle of the block, and are sorted on
+ // demand.
+ LN_Middle,
+ // Operations that must appear last in a block, like successor phi node uses.
+ LN_Last
+};
+
+// Associate global and local DFS info with defs and uses, so we can sort them
+// into a global domination ordering.
+struct ValueDFS {
+ int DFSIn = 0;
+ int DFSOut = 0;
+ unsigned int LocalNum = LN_Middle;
+ // Only one of Def or Use will be set.
+ Value *Def = nullptr;
+ Use *U = nullptr;
+ // Neither PInfo nor EdgeOnly participate in the ordering
+ PredicateBase *PInfo = nullptr;
+ bool EdgeOnly = false;
+};
+
+// Perform a strict weak ordering on instructions and arguments.
+static bool valueComesBefore(const Value *A, const Value *B) {
+ auto *ArgA = dyn_cast_or_null<Argument>(A);
+ auto *ArgB = dyn_cast_or_null<Argument>(B);
+ if (ArgA && !ArgB)
+ return true;
+ if (ArgB && !ArgA)
+ return false;
+ if (ArgA && ArgB)
+ return ArgA->getArgNo() < ArgB->getArgNo();
+ return cast<Instruction>(A)->comesBefore(cast<Instruction>(B));
+}
+
+// This compares ValueDFS structures. Doing so allows us to walk the minimum
+// number of instructions necessary to compute our def/use ordering.
+struct ValueDFS_Compare {
+ DominatorTree &DT;
+ ValueDFS_Compare(DominatorTree &DT) : DT(DT) {}
+
+ bool operator()(const ValueDFS &A, const ValueDFS &B) const {
+ if (&A == &B)
+ return false;
+ // The only case we can't directly compare them is when they in the same
+ // block, and both have localnum == middle. In that case, we have to use
+ // comesbefore to see what the real ordering is, because they are in the
+ // same basic block.
+
+ assert((A.DFSIn != B.DFSIn || A.DFSOut == B.DFSOut) &&
+ "Equal DFS-in numbers imply equal out numbers");
+ bool SameBlock = A.DFSIn == B.DFSIn;
+
+ // We want to put the def that will get used for a given set of phi uses,
+ // before those phi uses.
+ // So we sort by edge, then by def.
+ // Note that only phi nodes uses and defs can come last.
+ if (SameBlock && A.LocalNum == LN_Last && B.LocalNum == LN_Last)
+ return comparePHIRelated(A, B);
+
+ bool isADef = A.Def;
+ bool isBDef = B.Def;
+ if (!SameBlock || A.LocalNum != LN_Middle || B.LocalNum != LN_Middle)
+ return std::tie(A.DFSIn, A.LocalNum, isADef) <
+ std::tie(B.DFSIn, B.LocalNum, isBDef);
+ return localComesBefore(A, B);
+ }
+
+ // For a phi use, or a non-materialized def, return the edge it represents.
+ std::pair<BasicBlock *, BasicBlock *> getBlockEdge(const ValueDFS &VD) const {
+ if (!VD.Def && VD.U) {
+ auto *PHI = cast<PHINode>(VD.U->getUser());
+ return std::make_pair(PHI->getIncomingBlock(*VD.U), PHI->getParent());
+ }
+ // This is really a non-materialized def.
+ return ::getBlockEdge(VD.PInfo);
+ }
+
+ // For two phi related values, return the ordering.
+ bool comparePHIRelated(const ValueDFS &A, const ValueDFS &B) const {
+ BasicBlock *ASrc, *ADest, *BSrc, *BDest;
+ std::tie(ASrc, ADest) = getBlockEdge(A);
+ std::tie(BSrc, BDest) = getBlockEdge(B);
+
+#ifndef NDEBUG
+ // This function should only be used for values in the same BB, check that.
+ DomTreeNode *DomASrc = DT.getNode(ASrc);
+ DomTreeNode *DomBSrc = DT.getNode(BSrc);
+ assert(DomASrc->getDFSNumIn() == (unsigned)A.DFSIn &&
+ "DFS numbers for A should match the ones of the source block");
+ assert(DomBSrc->getDFSNumIn() == (unsigned)B.DFSIn &&
+ "DFS numbers for B should match the ones of the source block");
+ assert(A.DFSIn == B.DFSIn && "Values must be in the same block");
+#endif
+ (void)ASrc;
+ (void)BSrc;
+
+ // Use DFS numbers to compare destination blocks, to guarantee a
+ // deterministic order.
+ DomTreeNode *DomADest = DT.getNode(ADest);
+ DomTreeNode *DomBDest = DT.getNode(BDest);
+ unsigned AIn = DomADest->getDFSNumIn();
+ unsigned BIn = DomBDest->getDFSNumIn();
+ bool isADef = A.Def;
+ bool isBDef = B.Def;
+ assert((!A.Def || !A.U) && (!B.Def || !B.U) &&
+ "Def and U cannot be set at the same time");
+ // Now sort by edge destination and then defs before uses.
+ return std::tie(AIn, isADef) < std::tie(BIn, isBDef);
+ }
+
+ // Get the definition of an instruction that occurs in the middle of a block.
+ Value *getMiddleDef(const ValueDFS &VD) const {
+ if (VD.Def)
+ return VD.Def;
+ // It's possible for the defs and uses to be null. For branches, the local
+ // numbering will say the placed predicaeinfos should go first (IE
+ // LN_beginning), so we won't be in this function. For assumes, we will end
+ // up here, beause we need to order the def we will place relative to the
+ // assume. So for the purpose of ordering, we pretend the def is right
+ // after the assume, because that is where we will insert the info.
+ if (!VD.U) {
+ assert(VD.PInfo &&
+ "No def, no use, and no predicateinfo should not occur");
+ assert(isa<PredicateAssume>(VD.PInfo) &&
+ "Middle of block should only occur for assumes");
+ return cast<PredicateAssume>(VD.PInfo)->AssumeInst->getNextNode();
+ }
+ return nullptr;
+ }
+
+ // Return either the Def, if it's not null, or the user of the Use, if the def
+ // is null.
+ const Instruction *getDefOrUser(const Value *Def, const Use *U) const {
+ if (Def)
+ return cast<Instruction>(Def);
+ return cast<Instruction>(U->getUser());
+ }
+
+ // This performs the necessary local basic block ordering checks to tell
+ // whether A comes before B, where both are in the same basic block.
+ bool localComesBefore(const ValueDFS &A, const ValueDFS &B) const {
+ auto *ADef = getMiddleDef(A);
+ auto *BDef = getMiddleDef(B);
+
+ // See if we have real values or uses. If we have real values, we are
+ // guaranteed they are instructions or arguments. No matter what, we are
+ // guaranteed they are in the same block if they are instructions.
+ auto *ArgA = dyn_cast_or_null<Argument>(ADef);
+ auto *ArgB = dyn_cast_or_null<Argument>(BDef);
+
+ if (ArgA || ArgB)
+ return valueComesBefore(ArgA, ArgB);
+
+ auto *AInst = getDefOrUser(ADef, A.U);
+ auto *BInst = getDefOrUser(BDef, B.U);
+ return valueComesBefore(AInst, BInst);
+ }
+};
+
+class PredicateInfoBuilder {
+ // Used to store information about each value we might rename.
+ struct ValueInfo {
+ SmallVector<PredicateBase *, 4> Infos;
+ };
+
+ PredicateInfo &PI;
+ Function &F;
+ DominatorTree &DT;
+ AssumptionCache &AC;
+
+ // This stores info about each operand or comparison result we make copies
+ // of. The real ValueInfos start at index 1, index 0 is unused so that we
+ // can more easily detect invalid indexing.
+ SmallVector<ValueInfo, 32> ValueInfos;
+
+ // This gives the index into the ValueInfos array for a given Value. Because
+ // 0 is not a valid Value Info index, you can use DenseMap::lookup and tell
+ // whether it returned a valid result.
+ DenseMap<Value *, unsigned int> ValueInfoNums;
+
+ // The set of edges along which we can only handle phi uses, due to critical
+ // edges.
+ DenseSet<std::pair<BasicBlock *, BasicBlock *>> EdgeUsesOnly;
+
+ ValueInfo &getOrCreateValueInfo(Value *);
+ const ValueInfo &getValueInfo(Value *) const;
+
+ void processAssume(IntrinsicInst *, BasicBlock *,
+ SmallVectorImpl<Value *> &OpsToRename);
+ void processBranch(BranchInst *, BasicBlock *,
+ SmallVectorImpl<Value *> &OpsToRename);
+ void processSwitch(SwitchInst *, BasicBlock *,
+ SmallVectorImpl<Value *> &OpsToRename);
+ void renameUses(SmallVectorImpl<Value *> &OpsToRename);
+ void addInfoFor(SmallVectorImpl<Value *> &OpsToRename, Value *Op,
+ PredicateBase *PB);
+
+ typedef SmallVectorImpl<ValueDFS> ValueDFSStack;
+ void convertUsesToDFSOrdered(Value *, SmallVectorImpl<ValueDFS> &);
+ Value *materializeStack(unsigned int &, ValueDFSStack &, Value *);
+ bool stackIsInScope(const ValueDFSStack &, const ValueDFS &) const;
+ void popStackUntilDFSScope(ValueDFSStack &, const ValueDFS &);
+
+public:
+ PredicateInfoBuilder(PredicateInfo &PI, Function &F, DominatorTree &DT,
+ AssumptionCache &AC)
+ : PI(PI), F(F), DT(DT), AC(AC) {
+ // Push an empty operand info so that we can detect 0 as not finding one
+ ValueInfos.resize(1);
+ }
+
+ void buildPredicateInfo();
+};
+
+bool PredicateInfoBuilder::stackIsInScope(const ValueDFSStack &Stack,
+ const ValueDFS &VDUse) const {
+ if (Stack.empty())
+ return false;
+ // If it's a phi only use, make sure it's for this phi node edge, and that the
+ // use is in a phi node. If it's anything else, and the top of the stack is
+ // EdgeOnly, we need to pop the stack. We deliberately sort phi uses next to
+ // the defs they must go with so that we can know it's time to pop the stack
+ // when we hit the end of the phi uses for a given def.
+ if (Stack.back().EdgeOnly) {
+ if (!VDUse.U)
+ return false;
+ auto *PHI = dyn_cast<PHINode>(VDUse.U->getUser());
+ if (!PHI)
+ return false;
+ // Check edge
+ BasicBlock *EdgePred = PHI->getIncomingBlock(*VDUse.U);
+ if (EdgePred != getBranchBlock(Stack.back().PInfo))
+ return false;
+
+ // Use dominates, which knows how to handle edge dominance.
+ return DT.dominates(getBlockEdge(Stack.back().PInfo), *VDUse.U);
+ }
+
+ return (VDUse.DFSIn >= Stack.back().DFSIn &&
+ VDUse.DFSOut <= Stack.back().DFSOut);
+}
+
+void PredicateInfoBuilder::popStackUntilDFSScope(ValueDFSStack &Stack,
+ const ValueDFS &VD) {
+ while (!Stack.empty() && !stackIsInScope(Stack, VD))
+ Stack.pop_back();
+}
+
+// Convert the uses of Op into a vector of uses, associating global and local
+// DFS info with each one.
+void PredicateInfoBuilder::convertUsesToDFSOrdered(
+ Value *Op, SmallVectorImpl<ValueDFS> &DFSOrderedSet) {
+ for (auto &U : Op->uses()) {
+ if (auto *I = dyn_cast<Instruction>(U.getUser())) {
+ ValueDFS VD;
+ // Put the phi node uses in the incoming block.
+ BasicBlock *IBlock;
+ if (auto *PN = dyn_cast<PHINode>(I)) {
+ IBlock = PN->getIncomingBlock(U);
+ // Make phi node users appear last in the incoming block
+ // they are from.
+ VD.LocalNum = LN_Last;
+ } else {
+ // If it's not a phi node use, it is somewhere in the middle of the
+ // block.
+ IBlock = I->getParent();
+ VD.LocalNum = LN_Middle;
+ }
+ DomTreeNode *DomNode = DT.getNode(IBlock);
+ // It's possible our use is in an unreachable block. Skip it if so.
+ if (!DomNode)
+ continue;
+ VD.DFSIn = DomNode->getDFSNumIn();
+ VD.DFSOut = DomNode->getDFSNumOut();
+ VD.U = &U;
+ DFSOrderedSet.push_back(VD);
+ }
+ }
+}
+
+bool shouldRename(Value *V) {
+ // Only want real values, not constants. Additionally, operands with one use
+ // are only being used in the comparison, which means they will not be useful
+ // for us to consider for predicateinfo.
+ return (isa<Instruction>(V) || isa<Argument>(V)) && !V->hasOneUse();
+}
+
+// Collect relevant operations from Comparison that we may want to insert copies
+// for.
+void collectCmpOps(CmpInst *Comparison, SmallVectorImpl<Value *> &CmpOperands) {
+ auto *Op0 = Comparison->getOperand(0);
+ auto *Op1 = Comparison->getOperand(1);
+ if (Op0 == Op1)
+ return;
+
+ CmpOperands.push_back(Op0);
+ CmpOperands.push_back(Op1);
+}
+
+// Add Op, PB to the list of value infos for Op, and mark Op to be renamed.
+void PredicateInfoBuilder::addInfoFor(SmallVectorImpl<Value *> &OpsToRename,
+ Value *Op, PredicateBase *PB) {
+ auto &OperandInfo = getOrCreateValueInfo(Op);
+ if (OperandInfo.Infos.empty())
+ OpsToRename.push_back(Op);
+ PI.AllInfos.push_back(PB);
+ OperandInfo.Infos.push_back(PB);
+}
+
+// Process an assume instruction and place relevant operations we want to rename
+// into OpsToRename.
+void PredicateInfoBuilder::processAssume(
+ IntrinsicInst *II, BasicBlock *AssumeBB,
+ SmallVectorImpl<Value *> &OpsToRename) {
+ SmallVector<Value *, 4> Worklist;
+ SmallPtrSet<Value *, 4> Visited;
+ Worklist.push_back(II->getOperand(0));
+ while (!Worklist.empty()) {
+ Value *Cond = Worklist.pop_back_val();
+ if (!Visited.insert(Cond).second)
+ continue;
+ if (Visited.size() > MaxCondsPerBranch)
+ break;
+
+ Value *Op0, *Op1;
+ if (match(Cond, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) {
+ Worklist.push_back(Op1);
+ Worklist.push_back(Op0);
+ }
+
+ SmallVector<Value *, 4> Values;
+ Values.push_back(Cond);
+ if (auto *Cmp = dyn_cast<CmpInst>(Cond))
+ collectCmpOps(Cmp, Values);
+
+ for (Value *V : Values) {
+ if (shouldRename(V)) {
+ auto *PA = new PredicateAssume(V, II, Cond);
+ addInfoFor(OpsToRename, V, PA);
+ }
+ }
+ }
+}
+
+// Process a block terminating branch, and place relevant operations to be
+// renamed into OpsToRename.
+void PredicateInfoBuilder::processBranch(
+ BranchInst *BI, BasicBlock *BranchBB,
+ SmallVectorImpl<Value *> &OpsToRename) {
+ BasicBlock *FirstBB = BI->getSuccessor(0);
+ BasicBlock *SecondBB = BI->getSuccessor(1);
+
+ for (BasicBlock *Succ : {FirstBB, SecondBB}) {
+ bool TakenEdge = Succ == FirstBB;
+ // Don't try to insert on a self-edge. This is mainly because we will
+ // eliminate during renaming anyway.
+ if (Succ == BranchBB)
+ continue;
+
+ SmallVector<Value *, 4> Worklist;
+ SmallPtrSet<Value *, 4> Visited;
+ Worklist.push_back(BI->getCondition());
+ while (!Worklist.empty()) {
+ Value *Cond = Worklist.pop_back_val();
+ if (!Visited.insert(Cond).second)
+ continue;
+ if (Visited.size() > MaxCondsPerBranch)
+ break;
+
+ Value *Op0, *Op1;
+ if (TakenEdge ? match(Cond, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))
+ : match(Cond, m_LogicalOr(m_Value(Op0), m_Value(Op1)))) {
+ Worklist.push_back(Op1);
+ Worklist.push_back(Op0);
+ }
+
+ SmallVector<Value *, 4> Values;
+ Values.push_back(Cond);
+ if (auto *Cmp = dyn_cast<CmpInst>(Cond))
+ collectCmpOps(Cmp, Values);
+
+ for (Value *V : Values) {
+ if (shouldRename(V)) {
+ PredicateBase *PB =
+ new PredicateBranch(V, BranchBB, Succ, Cond, TakenEdge);
+ addInfoFor(OpsToRename, V, PB);
+ if (!Succ->getSinglePredecessor())
+ EdgeUsesOnly.insert({BranchBB, Succ});
+ }
+ }
+ }
+ }
+}
+// Process a block terminating switch, and place relevant operations to be
+// renamed into OpsToRename.
+void PredicateInfoBuilder::processSwitch(
+ SwitchInst *SI, BasicBlock *BranchBB,
+ SmallVectorImpl<Value *> &OpsToRename) {
+ Value *Op = SI->getCondition();
+ if ((!isa<Instruction>(Op) && !isa<Argument>(Op)) || Op->hasOneUse())
+ return;
+
+ // Remember how many outgoing edges there are to every successor.
+ SmallDenseMap<BasicBlock *, unsigned, 16> SwitchEdges;
+ for (unsigned i = 0, e = SI->getNumSuccessors(); i != e; ++i) {
+ BasicBlock *TargetBlock = SI->getSuccessor(i);
+ ++SwitchEdges[TargetBlock];
+ }
+
+ // Now propagate info for each case value
+ for (auto C : SI->cases()) {
+ BasicBlock *TargetBlock = C.getCaseSuccessor();
+ if (SwitchEdges.lookup(TargetBlock) == 1) {
+ PredicateSwitch *PS = new PredicateSwitch(
+ Op, SI->getParent(), TargetBlock, C.getCaseValue(), SI);
+ addInfoFor(OpsToRename, Op, PS);
+ if (!TargetBlock->getSinglePredecessor())
+ EdgeUsesOnly.insert({BranchBB, TargetBlock});
+ }
+ }
+}
+
+// Build predicate info for our function
+void PredicateInfoBuilder::buildPredicateInfo() {
+ DT.updateDFSNumbers();
+ // Collect operands to rename from all conditional branch terminators, as well
+ // as assume statements.
+ SmallVector<Value *, 8> OpsToRename;
+ for (auto *DTN : depth_first(DT.getRootNode())) {
+ BasicBlock *BranchBB = DTN->getBlock();
+ if (auto *BI = dyn_cast<BranchInst>(BranchBB->getTerminator())) {
+ if (!BI->isConditional())
+ continue;
+ // Can't insert conditional information if they all go to the same place.
+ if (BI->getSuccessor(0) == BI->getSuccessor(1))
+ continue;
+ processBranch(BI, BranchBB, OpsToRename);
+ } else if (auto *SI = dyn_cast<SwitchInst>(BranchBB->getTerminator())) {
+ processSwitch(SI, BranchBB, OpsToRename);
+ }
+ }
+ for (auto &Assume : AC.assumptions()) {
+ if (auto *II = dyn_cast_or_null<IntrinsicInst>(Assume))
+ if (DT.isReachableFromEntry(II->getParent()))
+ processAssume(II, II->getParent(), OpsToRename);
+ }
+ // Now rename all our operations.
+ renameUses(OpsToRename);
+}
+
+// Given the renaming stack, make all the operands currently on the stack real
+// by inserting them into the IR. Return the last operation's value.
+Value *PredicateInfoBuilder::materializeStack(unsigned int &Counter,
+ ValueDFSStack &RenameStack,
+ Value *OrigOp) {
+ // Find the first thing we have to materialize
+ auto RevIter = RenameStack.rbegin();
+ for (; RevIter != RenameStack.rend(); ++RevIter)
+ if (RevIter->Def)
+ break;
+
+ size_t Start = RevIter - RenameStack.rbegin();
+ // The maximum number of things we should be trying to materialize at once
+ // right now is 4, depending on if we had an assume, a branch, and both used
+ // and of conditions.
+ for (auto RenameIter = RenameStack.end() - Start;
+ RenameIter != RenameStack.end(); ++RenameIter) {
+ auto *Op =
+ RenameIter == RenameStack.begin() ? OrigOp : (RenameIter - 1)->Def;
+ ValueDFS &Result = *RenameIter;
+ auto *ValInfo = Result.PInfo;
+ ValInfo->RenamedOp = (RenameStack.end() - Start) == RenameStack.begin()
+ ? OrigOp
+ : (RenameStack.end() - Start - 1)->Def;
+ // For edge predicates, we can just place the operand in the block before
+ // the terminator. For assume, we have to place it right before the assume
+ // to ensure we dominate all of our uses. Always insert right before the
+ // relevant instruction (terminator, assume), so that we insert in proper
+ // order in the case of multiple predicateinfo in the same block.
+ // The number of named values is used to detect if a new declaration was
+ // added. If so, that declaration is tracked so that it can be removed when
+ // the analysis is done. The corner case were a new declaration results in
+ // a name clash and the old name being renamed is not considered as that
+ // represents an invalid module.
+ if (isa<PredicateWithEdge>(ValInfo)) {
+ IRBuilder<> B(getBranchTerminator(ValInfo));
+ auto NumDecls = F.getParent()->getNumNamedValues();
+ Function *IF = Intrinsic::getDeclaration(
+ F.getParent(), Intrinsic::ssa_copy, Op->getType());
+ if (NumDecls != F.getParent()->getNumNamedValues())
+ PI.CreatedDeclarations.insert(IF);
+ CallInst *PIC =
+ B.CreateCall(IF, Op, Op->getName() + "." + Twine(Counter++));
+ PI.PredicateMap.insert({PIC, ValInfo});
+ Result.Def = PIC;
+ } else {
+ auto *PAssume = dyn_cast<PredicateAssume>(ValInfo);
+ assert(PAssume &&
+ "Should not have gotten here without it being an assume");
+ // Insert the predicate directly after the assume. While it also holds
+ // directly before it, assume(i1 true) is not a useful fact.
+ IRBuilder<> B(PAssume->AssumeInst->getNextNode());
+ auto NumDecls = F.getParent()->getNumNamedValues();
+ Function *IF = Intrinsic::getDeclaration(
+ F.getParent(), Intrinsic::ssa_copy, Op->getType());
+ if (NumDecls != F.getParent()->getNumNamedValues())
+ PI.CreatedDeclarations.insert(IF);
+ CallInst *PIC = B.CreateCall(IF, Op);
+ PI.PredicateMap.insert({PIC, ValInfo});
+ Result.Def = PIC;
+ }
+ }
+ return RenameStack.back().Def;
+}
+
+// Instead of the standard SSA renaming algorithm, which is O(Number of
+// instructions), and walks the entire dominator tree, we walk only the defs +
+// uses. The standard SSA renaming algorithm does not really rely on the
+// dominator tree except to order the stack push/pops of the renaming stacks, so
+// that defs end up getting pushed before hitting the correct uses. This does
+// not require the dominator tree, only the *order* of the dominator tree. The
+// complete and correct ordering of the defs and uses, in dominator tree is
+// contained in the DFS numbering of the dominator tree. So we sort the defs and
+// uses into the DFS ordering, and then just use the renaming stack as per
+// normal, pushing when we hit a def (which is a predicateinfo instruction),
+// popping when we are out of the dfs scope for that def, and replacing any uses
+// with top of stack if it exists. In order to handle liveness without
+// propagating liveness info, we don't actually insert the predicateinfo
+// instruction def until we see a use that it would dominate. Once we see such
+// a use, we materialize the predicateinfo instruction in the right place and
+// use it.
+//
+// TODO: Use this algorithm to perform fast single-variable renaming in
+// promotememtoreg and memoryssa.
+void PredicateInfoBuilder::renameUses(SmallVectorImpl<Value *> &OpsToRename) {
+ ValueDFS_Compare Compare(DT);
+ // Compute liveness, and rename in O(uses) per Op.
+ for (auto *Op : OpsToRename) {
+ LLVM_DEBUG(dbgs() << "Visiting " << *Op << "\n");
+ unsigned Counter = 0;
+ SmallVector<ValueDFS, 16> OrderedUses;
+ const auto &ValueInfo = getValueInfo(Op);
+ // Insert the possible copies into the def/use list.
+ // They will become real copies if we find a real use for them, and never
+ // created otherwise.
+ for (const auto &PossibleCopy : ValueInfo.Infos) {
+ ValueDFS VD;
+ // Determine where we are going to place the copy by the copy type.
+ // The predicate info for branches always come first, they will get
+ // materialized in the split block at the top of the block.
+ // The predicate info for assumes will be somewhere in the middle,
+ // it will get materialized in front of the assume.
+ if (const auto *PAssume = dyn_cast<PredicateAssume>(PossibleCopy)) {
+ VD.LocalNum = LN_Middle;
+ DomTreeNode *DomNode = DT.getNode(PAssume->AssumeInst->getParent());
+ if (!DomNode)
+ continue;
+ VD.DFSIn = DomNode->getDFSNumIn();
+ VD.DFSOut = DomNode->getDFSNumOut();
+ VD.PInfo = PossibleCopy;
+ OrderedUses.push_back(VD);
+ } else if (isa<PredicateWithEdge>(PossibleCopy)) {
+ // If we can only do phi uses, we treat it like it's in the branch
+ // block, and handle it specially. We know that it goes last, and only
+ // dominate phi uses.
+ auto BlockEdge = getBlockEdge(PossibleCopy);
+ if (EdgeUsesOnly.count(BlockEdge)) {
+ VD.LocalNum = LN_Last;
+ auto *DomNode = DT.getNode(BlockEdge.first);
+ if (DomNode) {
+ VD.DFSIn = DomNode->getDFSNumIn();
+ VD.DFSOut = DomNode->getDFSNumOut();
+ VD.PInfo = PossibleCopy;
+ VD.EdgeOnly = true;
+ OrderedUses.push_back(VD);
+ }
+ } else {
+ // Otherwise, we are in the split block (even though we perform
+ // insertion in the branch block).
+ // Insert a possible copy at the split block and before the branch.
+ VD.LocalNum = LN_First;
+ auto *DomNode = DT.getNode(BlockEdge.second);
+ if (DomNode) {
+ VD.DFSIn = DomNode->getDFSNumIn();
+ VD.DFSOut = DomNode->getDFSNumOut();
+ VD.PInfo = PossibleCopy;
+ OrderedUses.push_back(VD);
+ }
+ }
+ }
+ }
+
+ convertUsesToDFSOrdered(Op, OrderedUses);
+ // Here we require a stable sort because we do not bother to try to
+ // assign an order to the operands the uses represent. Thus, two
+ // uses in the same instruction do not have a strict sort order
+ // currently and will be considered equal. We could get rid of the
+ // stable sort by creating one if we wanted.
+ llvm::stable_sort(OrderedUses, Compare);
+ SmallVector<ValueDFS, 8> RenameStack;
+ // For each use, sorted into dfs order, push values and replaces uses with
+ // top of stack, which will represent the reaching def.
+ for (auto &VD : OrderedUses) {
+ // We currently do not materialize copy over copy, but we should decide if
+ // we want to.
+ bool PossibleCopy = VD.PInfo != nullptr;
+ if (RenameStack.empty()) {
+ LLVM_DEBUG(dbgs() << "Rename Stack is empty\n");
+ } else {
+ LLVM_DEBUG(dbgs() << "Rename Stack Top DFS numbers are ("
+ << RenameStack.back().DFSIn << ","
+ << RenameStack.back().DFSOut << ")\n");
+ }
+
+ LLVM_DEBUG(dbgs() << "Current DFS numbers are (" << VD.DFSIn << ","
+ << VD.DFSOut << ")\n");
+
+ bool ShouldPush = (VD.Def || PossibleCopy);
+ bool OutOfScope = !stackIsInScope(RenameStack, VD);
+ if (OutOfScope || ShouldPush) {
+ // Sync to our current scope.
+ popStackUntilDFSScope(RenameStack, VD);
+ if (ShouldPush) {
+ RenameStack.push_back(VD);
+ }
+ }
+ // If we get to this point, and the stack is empty we must have a use
+ // with no renaming needed, just skip it.
+ if (RenameStack.empty())
+ continue;
+ // Skip values, only want to rename the uses
+ if (VD.Def || PossibleCopy)
+ continue;
+ if (!DebugCounter::shouldExecute(RenameCounter)) {
+ LLVM_DEBUG(dbgs() << "Skipping execution due to debug counter\n");
+ continue;
+ }
+ ValueDFS &Result = RenameStack.back();
+
+ // If the possible copy dominates something, materialize our stack up to
+ // this point. This ensures every comparison that affects our operation
+ // ends up with predicateinfo.
+ if (!Result.Def)
+ Result.Def = materializeStack(Counter, RenameStack, Op);
+
+ LLVM_DEBUG(dbgs() << "Found replacement " << *Result.Def << " for "
+ << *VD.U->get() << " in " << *(VD.U->getUser())
+ << "\n");
+ assert(DT.dominates(cast<Instruction>(Result.Def), *VD.U) &&
+ "Predicateinfo def should have dominated this use");
+ VD.U->set(Result.Def);
+ }
+ }
+}
+
+PredicateInfoBuilder::ValueInfo &
+PredicateInfoBuilder::getOrCreateValueInfo(Value *Operand) {
+ auto OIN = ValueInfoNums.find(Operand);
+ if (OIN == ValueInfoNums.end()) {
+ // This will grow it
+ ValueInfos.resize(ValueInfos.size() + 1);
+ // This will use the new size and give us a 0 based number of the info
+ auto InsertResult = ValueInfoNums.insert({Operand, ValueInfos.size() - 1});
+ assert(InsertResult.second && "Value info number already existed?");
+ return ValueInfos[InsertResult.first->second];
+ }
+ return ValueInfos[OIN->second];
+}
+
+const PredicateInfoBuilder::ValueInfo &
+PredicateInfoBuilder::getValueInfo(Value *Operand) const {
+ auto OINI = ValueInfoNums.lookup(Operand);
+ assert(OINI != 0 && "Operand was not really in the Value Info Numbers");
+ assert(OINI < ValueInfos.size() &&
+ "Value Info Number greater than size of Value Info Table");
+ return ValueInfos[OINI];
+}
+
+PredicateInfo::PredicateInfo(Function &F, DominatorTree &DT,
+ AssumptionCache &AC)
+ : F(F) {
+ PredicateInfoBuilder Builder(*this, F, DT, AC);
+ Builder.buildPredicateInfo();
+}
+
+// Remove all declarations we created . The PredicateInfo consumers are
+// responsible for remove the ssa_copy calls created.
+PredicateInfo::~PredicateInfo() {
+ // Collect function pointers in set first, as SmallSet uses a SmallVector
+ // internally and we have to remove the asserting value handles first.
+ SmallPtrSet<Function *, 20> FunctionPtrs;
+ for (const auto &F : CreatedDeclarations)
+ FunctionPtrs.insert(&*F);
+ CreatedDeclarations.clear();
+
+ for (Function *F : FunctionPtrs) {
+ assert(F->user_begin() == F->user_end() &&
+ "PredicateInfo consumer did not remove all SSA copies.");
+ F->eraseFromParent();
+ }
+}
+
+std::optional<PredicateConstraint> PredicateBase::getConstraint() const {
+ switch (Type) {
+ case PT_Assume:
+ case PT_Branch: {
+ bool TrueEdge = true;
+ if (auto *PBranch = dyn_cast<PredicateBranch>(this))
+ TrueEdge = PBranch->TrueEdge;
+
+ if (Condition == RenamedOp) {
+ return {{CmpInst::ICMP_EQ,
+ TrueEdge ? ConstantInt::getTrue(Condition->getType())
+ : ConstantInt::getFalse(Condition->getType())}};
+ }
+
+ CmpInst *Cmp = dyn_cast<CmpInst>(Condition);
+ if (!Cmp) {
+ // TODO: Make this an assertion once RenamedOp is fully accurate.
+ return std::nullopt;
+ }
+
+ CmpInst::Predicate Pred;
+ Value *OtherOp;
+ if (Cmp->getOperand(0) == RenamedOp) {
+ Pred = Cmp->getPredicate();
+ OtherOp = Cmp->getOperand(1);
+ } else if (Cmp->getOperand(1) == RenamedOp) {
+ Pred = Cmp->getSwappedPredicate();
+ OtherOp = Cmp->getOperand(0);
+ } else {
+ // TODO: Make this an assertion once RenamedOp is fully accurate.
+ return std::nullopt;
+ }
+
+ // Invert predicate along false edge.
+ if (!TrueEdge)
+ Pred = CmpInst::getInversePredicate(Pred);
+
+ return {{Pred, OtherOp}};
+ }
+ case PT_Switch:
+ if (Condition != RenamedOp) {
+ // TODO: Make this an assertion once RenamedOp is fully accurate.
+ return std::nullopt;
+ }
+
+ return {{CmpInst::ICMP_EQ, cast<PredicateSwitch>(this)->CaseValue}};
+ }
+ llvm_unreachable("Unknown predicate type");
+}
+
+void PredicateInfo::verifyPredicateInfo() const {}
+
+char PredicateInfoPrinterLegacyPass::ID = 0;
+
+PredicateInfoPrinterLegacyPass::PredicateInfoPrinterLegacyPass()
+ : FunctionPass(ID) {
+ initializePredicateInfoPrinterLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+}
+
+void PredicateInfoPrinterLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequiredTransitive<DominatorTreeWrapperPass>();
+ AU.addRequired<AssumptionCacheTracker>();
+}
+
+// Replace ssa_copy calls created by PredicateInfo with their operand.
+static void replaceCreatedSSACopys(PredicateInfo &PredInfo, Function &F) {
+ for (Instruction &Inst : llvm::make_early_inc_range(instructions(F))) {
+ const auto *PI = PredInfo.getPredicateInfoFor(&Inst);
+ auto *II = dyn_cast<IntrinsicInst>(&Inst);
+ if (!PI || !II || II->getIntrinsicID() != Intrinsic::ssa_copy)
+ continue;
+
+ Inst.replaceAllUsesWith(II->getOperand(0));
+ Inst.eraseFromParent();
+ }
+}
+
+bool PredicateInfoPrinterLegacyPass::runOnFunction(Function &F) {
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ auto PredInfo = std::make_unique<PredicateInfo>(F, DT, AC);
+ PredInfo->print(dbgs());
+ if (VerifyPredicateInfo)
+ PredInfo->verifyPredicateInfo();
+
+ replaceCreatedSSACopys(*PredInfo, F);
+ return false;
+}
+
+PreservedAnalyses PredicateInfoPrinterPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+ auto &AC = AM.getResult<AssumptionAnalysis>(F);
+ OS << "PredicateInfo for function: " << F.getName() << "\n";
+ auto PredInfo = std::make_unique<PredicateInfo>(F, DT, AC);
+ PredInfo->print(OS);
+
+ replaceCreatedSSACopys(*PredInfo, F);
+ return PreservedAnalyses::all();
+}
+
+/// An assembly annotator class to print PredicateInfo information in
+/// comments.
+class PredicateInfoAnnotatedWriter : public AssemblyAnnotationWriter {
+ friend class PredicateInfo;
+ const PredicateInfo *PredInfo;
+
+public:
+ PredicateInfoAnnotatedWriter(const PredicateInfo *M) : PredInfo(M) {}
+
+ void emitBasicBlockStartAnnot(const BasicBlock *BB,
+ formatted_raw_ostream &OS) override {}
+
+ void emitInstructionAnnot(const Instruction *I,
+ formatted_raw_ostream &OS) override {
+ if (const auto *PI = PredInfo->getPredicateInfoFor(I)) {
+ OS << "; Has predicate info\n";
+ if (const auto *PB = dyn_cast<PredicateBranch>(PI)) {
+ OS << "; branch predicate info { TrueEdge: " << PB->TrueEdge
+ << " Comparison:" << *PB->Condition << " Edge: [";
+ PB->From->printAsOperand(OS);
+ OS << ",";
+ PB->To->printAsOperand(OS);
+ OS << "]";
+ } else if (const auto *PS = dyn_cast<PredicateSwitch>(PI)) {
+ OS << "; switch predicate info { CaseValue: " << *PS->CaseValue
+ << " Switch:" << *PS->Switch << " Edge: [";
+ PS->From->printAsOperand(OS);
+ OS << ",";
+ PS->To->printAsOperand(OS);
+ OS << "]";
+ } else if (const auto *PA = dyn_cast<PredicateAssume>(PI)) {
+ OS << "; assume predicate info {"
+ << " Comparison:" << *PA->Condition;
+ }
+ OS << ", RenamedOp: ";
+ PI->RenamedOp->printAsOperand(OS, false);
+ OS << " }\n";
+ }
+ }
+};
+
+void PredicateInfo::print(raw_ostream &OS) const {
+ PredicateInfoAnnotatedWriter Writer(this);
+ F.print(OS, &Writer);
+}
+
+void PredicateInfo::dump() const {
+ PredicateInfoAnnotatedWriter Writer(this);
+ F.print(dbgs(), &Writer);
+}
+
+PreservedAnalyses PredicateInfoVerifierPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+ auto &AC = AM.getResult<AssumptionAnalysis>(F);
+ std::make_unique<PredicateInfo>(F, DT, AC)->verifyPredicateInfo();
+
+ return PreservedAnalyses::all();
+}
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
new file mode 100644
index 0000000000..75ea9dc5df
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -0,0 +1,1111 @@
+//===- PromoteMemoryToRegister.cpp - Convert allocas to registers ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file promotes memory references to be register references. It promotes
+// alloca instructions which only have loads and stores as uses. An alloca is
+// transformed by using iterated dominator frontiers to place PHI nodes, then
+// traversing the function in depth-first order to rewrite loads and stores as
+// appropriate.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/IteratedDominanceFrontier.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include <algorithm>
+#include <cassert>
+#include <iterator>
+#include <utility>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "mem2reg"
+
+STATISTIC(NumLocalPromoted, "Number of alloca's promoted within one block");
+STATISTIC(NumSingleStore, "Number of alloca's promoted with a single store");
+STATISTIC(NumDeadAlloca, "Number of dead alloca's removed");
+STATISTIC(NumPHIInsert, "Number of PHI nodes inserted");
+
+bool llvm::isAllocaPromotable(const AllocaInst *AI) {
+ // Only allow direct and non-volatile loads and stores...
+ for (const User *U : AI->users()) {
+ if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
+ // Note that atomic loads can be transformed; atomic semantics do
+ // not have any meaning for a local alloca.
+ if (LI->isVolatile() || LI->getType() != AI->getAllocatedType())
+ return false;
+ } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ if (SI->getValueOperand() == AI ||
+ SI->getValueOperand()->getType() != AI->getAllocatedType())
+ return false; // Don't allow a store OF the AI, only INTO the AI.
+ // Note that atomic stores can be transformed; atomic semantics do
+ // not have any meaning for a local alloca.
+ if (SI->isVolatile())
+ return false;
+ } else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
+ if (!II->isLifetimeStartOrEnd() && !II->isDroppable())
+ return false;
+ } else if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
+ if (!onlyUsedByLifetimeMarkersOrDroppableInsts(BCI))
+ return false;
+ } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
+ if (!GEPI->hasAllZeroIndices())
+ return false;
+ if (!onlyUsedByLifetimeMarkersOrDroppableInsts(GEPI))
+ return false;
+ } else if (const AddrSpaceCastInst *ASCI = dyn_cast<AddrSpaceCastInst>(U)) {
+ if (!onlyUsedByLifetimeMarkers(ASCI))
+ return false;
+ } else {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+namespace {
+
+/// Helper for updating assignment tracking debug info when promoting allocas.
+class AssignmentTrackingInfo {
+ /// DbgAssignIntrinsics linked to the alloca with at most one per variable
+ /// fragment. (i.e. not be a comprehensive set if there are multiple
+ /// dbg.assigns for one variable fragment).
+ SmallVector<DbgVariableIntrinsic *> DbgAssigns;
+
+public:
+ void init(AllocaInst *AI) {
+ SmallSet<DebugVariable, 2> Vars;
+ for (DbgAssignIntrinsic *DAI : at::getAssignmentMarkers(AI)) {
+ if (Vars.insert(DebugVariable(DAI)).second)
+ DbgAssigns.push_back(DAI);
+ }
+ }
+
+ /// Update assignment tracking debug info given for the to-be-deleted store
+ /// \p ToDelete that stores to this alloca.
+ void updateForDeletedStore(StoreInst *ToDelete, DIBuilder &DIB) const {
+ // There's nothing to do if the alloca doesn't have any variables using
+ // assignment tracking.
+ if (DbgAssigns.empty()) {
+ assert(at::getAssignmentMarkers(ToDelete).empty());
+ return;
+ }
+
+ // Just leave dbg.assign intrinsics in place and remember that we've seen
+ // one for each variable fragment.
+ SmallSet<DebugVariable, 2> VarHasDbgAssignForStore;
+ for (DbgAssignIntrinsic *DAI : at::getAssignmentMarkers(ToDelete))
+ VarHasDbgAssignForStore.insert(DebugVariable(DAI));
+
+ // It's possible for variables using assignment tracking to have no
+ // dbg.assign linked to this store. These are variables in DbgAssigns that
+ // are missing from VarHasDbgAssignForStore. Since there isn't a dbg.assign
+ // to mark the assignment - and the store is going to be deleted - insert a
+ // dbg.value to do that now. An untracked store may be either one that
+ // cannot be represented using assignment tracking (non-const offset or
+ // size) or one that is trackable but has had its DIAssignID attachment
+ // dropped accidentally.
+ for (auto *DAI : DbgAssigns) {
+ if (VarHasDbgAssignForStore.contains(DebugVariable(DAI)))
+ continue;
+ ConvertDebugDeclareToDebugValue(DAI, ToDelete, DIB);
+ }
+ }
+
+ /// Update assignment tracking debug info given for the newly inserted PHI \p
+ /// NewPhi.
+ void updateForNewPhi(PHINode *NewPhi, DIBuilder &DIB) const {
+ // Regardless of the position of dbg.assigns relative to stores, the
+ // incoming values into a new PHI should be the same for the (imaginary)
+ // debug-phi.
+ for (auto *DAI : DbgAssigns)
+ ConvertDebugDeclareToDebugValue(DAI, NewPhi, DIB);
+ }
+
+ void clear() { DbgAssigns.clear(); }
+ bool empty() { return DbgAssigns.empty(); }
+};
+
+struct AllocaInfo {
+ using DbgUserVec = SmallVector<DbgVariableIntrinsic *, 1>;
+
+ SmallVector<BasicBlock *, 32> DefiningBlocks;
+ SmallVector<BasicBlock *, 32> UsingBlocks;
+
+ StoreInst *OnlyStore;
+ BasicBlock *OnlyBlock;
+ bool OnlyUsedInOneBlock;
+
+ /// Debug users of the alloca - does not include dbg.assign intrinsics.
+ DbgUserVec DbgUsers;
+ /// Helper to update assignment tracking debug info.
+ AssignmentTrackingInfo AssignmentTracking;
+
+ void clear() {
+ DefiningBlocks.clear();
+ UsingBlocks.clear();
+ OnlyStore = nullptr;
+ OnlyBlock = nullptr;
+ OnlyUsedInOneBlock = true;
+ DbgUsers.clear();
+ AssignmentTracking.clear();
+ }
+
+ /// Scan the uses of the specified alloca, filling in the AllocaInfo used
+ /// by the rest of the pass to reason about the uses of this alloca.
+ void AnalyzeAlloca(AllocaInst *AI) {
+ clear();
+
+ // As we scan the uses of the alloca instruction, keep track of stores,
+ // and decide whether all of the loads and stores to the alloca are within
+ // the same basic block.
+ for (User *U : AI->users()) {
+ Instruction *User = cast<Instruction>(U);
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+ // Remember the basic blocks which define new values for the alloca
+ DefiningBlocks.push_back(SI->getParent());
+ OnlyStore = SI;
+ } else {
+ LoadInst *LI = cast<LoadInst>(User);
+ // Otherwise it must be a load instruction, keep track of variable
+ // reads.
+ UsingBlocks.push_back(LI->getParent());
+ }
+
+ if (OnlyUsedInOneBlock) {
+ if (!OnlyBlock)
+ OnlyBlock = User->getParent();
+ else if (OnlyBlock != User->getParent())
+ OnlyUsedInOneBlock = false;
+ }
+ }
+ DbgUserVec AllDbgUsers;
+ findDbgUsers(AllDbgUsers, AI);
+ std::copy_if(AllDbgUsers.begin(), AllDbgUsers.end(),
+ std::back_inserter(DbgUsers), [](DbgVariableIntrinsic *DII) {
+ return !isa<DbgAssignIntrinsic>(DII);
+ });
+ AssignmentTracking.init(AI);
+ }
+};
+
+/// Data package used by RenamePass().
+struct RenamePassData {
+ using ValVector = std::vector<Value *>;
+ using LocationVector = std::vector<DebugLoc>;
+
+ RenamePassData(BasicBlock *B, BasicBlock *P, ValVector V, LocationVector L)
+ : BB(B), Pred(P), Values(std::move(V)), Locations(std::move(L)) {}
+
+ BasicBlock *BB;
+ BasicBlock *Pred;
+ ValVector Values;
+ LocationVector Locations;
+};
+
+/// This assigns and keeps a per-bb relative ordering of load/store
+/// instructions in the block that directly load or store an alloca.
+///
+/// This functionality is important because it avoids scanning large basic
+/// blocks multiple times when promoting many allocas in the same block.
+class LargeBlockInfo {
+ /// For each instruction that we track, keep the index of the
+ /// instruction.
+ ///
+ /// The index starts out as the number of the instruction from the start of
+ /// the block.
+ DenseMap<const Instruction *, unsigned> InstNumbers;
+
+public:
+
+ /// This code only looks at accesses to allocas.
+ static bool isInterestingInstruction(const Instruction *I) {
+ return (isa<LoadInst>(I) && isa<AllocaInst>(I->getOperand(0))) ||
+ (isa<StoreInst>(I) && isa<AllocaInst>(I->getOperand(1)));
+ }
+
+ /// Get or calculate the index of the specified instruction.
+ unsigned getInstructionIndex(const Instruction *I) {
+ assert(isInterestingInstruction(I) &&
+ "Not a load/store to/from an alloca?");
+
+ // If we already have this instruction number, return it.
+ DenseMap<const Instruction *, unsigned>::iterator It = InstNumbers.find(I);
+ if (It != InstNumbers.end())
+ return It->second;
+
+ // Scan the whole block to get the instruction. This accumulates
+ // information for every interesting instruction in the block, in order to
+ // avoid gratuitus rescans.
+ const BasicBlock *BB = I->getParent();
+ unsigned InstNo = 0;
+ for (const Instruction &BBI : *BB)
+ if (isInterestingInstruction(&BBI))
+ InstNumbers[&BBI] = InstNo++;
+ It = InstNumbers.find(I);
+
+ assert(It != InstNumbers.end() && "Didn't insert instruction?");
+ return It->second;
+ }
+
+ void deleteValue(const Instruction *I) { InstNumbers.erase(I); }
+
+ void clear() { InstNumbers.clear(); }
+};
+
+struct PromoteMem2Reg {
+ /// The alloca instructions being promoted.
+ std::vector<AllocaInst *> Allocas;
+
+ DominatorTree &DT;
+ DIBuilder DIB;
+
+ /// A cache of @llvm.assume intrinsics used by SimplifyInstruction.
+ AssumptionCache *AC;
+
+ const SimplifyQuery SQ;
+
+ /// Reverse mapping of Allocas.
+ DenseMap<AllocaInst *, unsigned> AllocaLookup;
+
+ /// The PhiNodes we're adding.
+ ///
+ /// That map is used to simplify some Phi nodes as we iterate over it, so
+ /// it should have deterministic iterators. We could use a MapVector, but
+ /// since we already maintain a map from BasicBlock* to a stable numbering
+ /// (BBNumbers), the DenseMap is more efficient (also supports removal).
+ DenseMap<std::pair<unsigned, unsigned>, PHINode *> NewPhiNodes;
+
+ /// For each PHI node, keep track of which entry in Allocas it corresponds
+ /// to.
+ DenseMap<PHINode *, unsigned> PhiToAllocaMap;
+
+ /// For each alloca, we keep track of the dbg.declare intrinsic that
+ /// describes it, if any, so that we can convert it to a dbg.value
+ /// intrinsic if the alloca gets promoted.
+ SmallVector<AllocaInfo::DbgUserVec, 8> AllocaDbgUsers;
+
+ /// For each alloca, keep an instance of a helper class that gives us an easy
+ /// way to update assignment tracking debug info if the alloca is promoted.
+ SmallVector<AssignmentTrackingInfo, 8> AllocaATInfo;
+
+ /// The set of basic blocks the renamer has already visited.
+ SmallPtrSet<BasicBlock *, 16> Visited;
+
+ /// Contains a stable numbering of basic blocks to avoid non-determinstic
+ /// behavior.
+ DenseMap<BasicBlock *, unsigned> BBNumbers;
+
+ /// Lazily compute the number of predecessors a block has.
+ DenseMap<const BasicBlock *, unsigned> BBNumPreds;
+
+public:
+ PromoteMem2Reg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT,
+ AssumptionCache *AC)
+ : Allocas(Allocas.begin(), Allocas.end()), DT(DT),
+ DIB(*DT.getRoot()->getParent()->getParent(), /*AllowUnresolved*/ false),
+ AC(AC), SQ(DT.getRoot()->getParent()->getParent()->getDataLayout(),
+ nullptr, &DT, AC) {}
+
+ void run();
+
+private:
+ void RemoveFromAllocasList(unsigned &AllocaIdx) {
+ Allocas[AllocaIdx] = Allocas.back();
+ Allocas.pop_back();
+ --AllocaIdx;
+ }
+
+ unsigned getNumPreds(const BasicBlock *BB) {
+ unsigned &NP = BBNumPreds[BB];
+ if (NP == 0)
+ NP = pred_size(BB) + 1;
+ return NP - 1;
+ }
+
+ void ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info,
+ const SmallPtrSetImpl<BasicBlock *> &DefBlocks,
+ SmallPtrSetImpl<BasicBlock *> &LiveInBlocks);
+ void RenamePass(BasicBlock *BB, BasicBlock *Pred,
+ RenamePassData::ValVector &IncVals,
+ RenamePassData::LocationVector &IncLocs,
+ std::vector<RenamePassData> &Worklist);
+ bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version);
+};
+
+} // end anonymous namespace
+
+/// Given a LoadInst LI this adds assume(LI != null) after it.
+static void addAssumeNonNull(AssumptionCache *AC, LoadInst *LI) {
+ Function *AssumeIntrinsic =
+ Intrinsic::getDeclaration(LI->getModule(), Intrinsic::assume);
+ ICmpInst *LoadNotNull = new ICmpInst(ICmpInst::ICMP_NE, LI,
+ Constant::getNullValue(LI->getType()));
+ LoadNotNull->insertAfter(LI);
+ CallInst *CI = CallInst::Create(AssumeIntrinsic, {LoadNotNull});
+ CI->insertAfter(LoadNotNull);
+ AC->registerAssumption(cast<AssumeInst>(CI));
+}
+
+static void convertMetadataToAssumes(LoadInst *LI, Value *Val,
+ const DataLayout &DL, AssumptionCache *AC,
+ const DominatorTree *DT) {
+ // If the load was marked as nonnull we don't want to lose that information
+ // when we erase this Load. So we preserve it with an assume. As !nonnull
+ // returns poison while assume violations are immediate undefined behavior,
+ // we can only do this if the value is known non-poison.
+ if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
+ LI->getMetadata(LLVMContext::MD_noundef) &&
+ !isKnownNonZero(Val, DL, 0, AC, LI, DT))
+ addAssumeNonNull(AC, LI);
+}
+
+static void removeIntrinsicUsers(AllocaInst *AI) {
+ // Knowing that this alloca is promotable, we know that it's safe to kill all
+ // instructions except for load and store.
+
+ for (Use &U : llvm::make_early_inc_range(AI->uses())) {
+ Instruction *I = cast<Instruction>(U.getUser());
+ if (isa<LoadInst>(I) || isa<StoreInst>(I))
+ continue;
+
+ // Drop the use of AI in droppable instructions.
+ if (I->isDroppable()) {
+ I->dropDroppableUse(U);
+ continue;
+ }
+
+ if (!I->getType()->isVoidTy()) {
+ // The only users of this bitcast/GEP instruction are lifetime intrinsics.
+ // Follow the use/def chain to erase them now instead of leaving it for
+ // dead code elimination later.
+ for (Use &UU : llvm::make_early_inc_range(I->uses())) {
+ Instruction *Inst = cast<Instruction>(UU.getUser());
+
+ // Drop the use of I in droppable instructions.
+ if (Inst->isDroppable()) {
+ Inst->dropDroppableUse(UU);
+ continue;
+ }
+ Inst->eraseFromParent();
+ }
+ }
+ I->eraseFromParent();
+ }
+}
+
+/// Rewrite as many loads as possible given a single store.
+///
+/// When there is only a single store, we can use the domtree to trivially
+/// replace all of the dominated loads with the stored value. Do so, and return
+/// true if this has successfully promoted the alloca entirely. If this returns
+/// false there were some loads which were not dominated by the single store
+/// and thus must be phi-ed with undef. We fall back to the standard alloca
+/// promotion algorithm in that case.
+static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
+ LargeBlockInfo &LBI, const DataLayout &DL,
+ DominatorTree &DT, AssumptionCache *AC) {
+ StoreInst *OnlyStore = Info.OnlyStore;
+ bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0));
+ BasicBlock *StoreBB = OnlyStore->getParent();
+ int StoreIndex = -1;
+
+ // Clear out UsingBlocks. We will reconstruct it here if needed.
+ Info.UsingBlocks.clear();
+
+ for (User *U : make_early_inc_range(AI->users())) {
+ Instruction *UserInst = cast<Instruction>(U);
+ if (UserInst == OnlyStore)
+ continue;
+ LoadInst *LI = cast<LoadInst>(UserInst);
+
+ // Okay, if we have a load from the alloca, we want to replace it with the
+ // only value stored to the alloca. We can do this if the value is
+ // dominated by the store. If not, we use the rest of the mem2reg machinery
+ // to insert the phi nodes as needed.
+ if (!StoringGlobalVal) { // Non-instructions are always dominated.
+ if (LI->getParent() == StoreBB) {
+ // If we have a use that is in the same block as the store, compare the
+ // indices of the two instructions to see which one came first. If the
+ // load came before the store, we can't handle it.
+ if (StoreIndex == -1)
+ StoreIndex = LBI.getInstructionIndex(OnlyStore);
+
+ if (unsigned(StoreIndex) > LBI.getInstructionIndex(LI)) {
+ // Can't handle this load, bail out.
+ Info.UsingBlocks.push_back(StoreBB);
+ continue;
+ }
+ } else if (!DT.dominates(StoreBB, LI->getParent())) {
+ // If the load and store are in different blocks, use BB dominance to
+ // check their relationships. If the store doesn't dom the use, bail
+ // out.
+ Info.UsingBlocks.push_back(LI->getParent());
+ continue;
+ }
+ }
+
+ // Otherwise, we *can* safely rewrite this load.
+ Value *ReplVal = OnlyStore->getOperand(0);
+ // If the replacement value is the load, this must occur in unreachable
+ // code.
+ if (ReplVal == LI)
+ ReplVal = PoisonValue::get(LI->getType());
+
+ convertMetadataToAssumes(LI, ReplVal, DL, AC, &DT);
+ LI->replaceAllUsesWith(ReplVal);
+ LI->eraseFromParent();
+ LBI.deleteValue(LI);
+ }
+
+ // Finally, after the scan, check to see if the store is all that is left.
+ if (!Info.UsingBlocks.empty())
+ return false; // If not, we'll have to fall back for the remainder.
+
+ DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false);
+ // Update assignment tracking info for the store we're going to delete.
+ Info.AssignmentTracking.updateForDeletedStore(Info.OnlyStore, DIB);
+
+ // Record debuginfo for the store and remove the declaration's
+ // debuginfo.
+ for (DbgVariableIntrinsic *DII : Info.DbgUsers) {
+ if (DII->isAddressOfVariable()) {
+ ConvertDebugDeclareToDebugValue(DII, Info.OnlyStore, DIB);
+ DII->eraseFromParent();
+ } else if (DII->getExpression()->startsWithDeref()) {
+ DII->eraseFromParent();
+ }
+ }
+
+ // Remove dbg.assigns linked to the alloca as these are now redundant.
+ at::deleteAssignmentMarkers(AI);
+
+ // Remove the (now dead) store and alloca.
+ Info.OnlyStore->eraseFromParent();
+ LBI.deleteValue(Info.OnlyStore);
+
+ AI->eraseFromParent();
+ return true;
+}
+
+/// Many allocas are only used within a single basic block. If this is the
+/// case, avoid traversing the CFG and inserting a lot of potentially useless
+/// PHI nodes by just performing a single linear pass over the basic block
+/// using the Alloca.
+///
+/// If we cannot promote this alloca (because it is read before it is written),
+/// return false. This is necessary in cases where, due to control flow, the
+/// alloca is undefined only on some control flow paths. e.g. code like
+/// this is correct in LLVM IR:
+/// // A is an alloca with no stores so far
+/// for (...) {
+/// int t = *A;
+/// if (!first_iteration)
+/// use(t);
+/// *A = 42;
+/// }
+static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
+ LargeBlockInfo &LBI,
+ const DataLayout &DL,
+ DominatorTree &DT,
+ AssumptionCache *AC) {
+ // The trickiest case to handle is when we have large blocks. Because of this,
+ // this code is optimized assuming that large blocks happen. This does not
+ // significantly pessimize the small block case. This uses LargeBlockInfo to
+ // make it efficient to get the index of various operations in the block.
+
+ // Walk the use-def list of the alloca, getting the locations of all stores.
+ using StoresByIndexTy = SmallVector<std::pair<unsigned, StoreInst *>, 64>;
+ StoresByIndexTy StoresByIndex;
+
+ for (User *U : AI->users())
+ if (StoreInst *SI = dyn_cast<StoreInst>(U))
+ StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI));
+
+ // Sort the stores by their index, making it efficient to do a lookup with a
+ // binary search.
+ llvm::sort(StoresByIndex, less_first());
+
+ // Walk all of the loads from this alloca, replacing them with the nearest
+ // store above them, if any.
+ for (User *U : make_early_inc_range(AI->users())) {
+ LoadInst *LI = dyn_cast<LoadInst>(U);
+ if (!LI)
+ continue;
+
+ unsigned LoadIdx = LBI.getInstructionIndex(LI);
+
+ // Find the nearest store that has a lower index than this load.
+ StoresByIndexTy::iterator I = llvm::lower_bound(
+ StoresByIndex,
+ std::make_pair(LoadIdx, static_cast<StoreInst *>(nullptr)),
+ less_first());
+ Value *ReplVal;
+ if (I == StoresByIndex.begin()) {
+ if (StoresByIndex.empty())
+ // If there are no stores, the load takes the undef value.
+ ReplVal = UndefValue::get(LI->getType());
+ else
+ // There is no store before this load, bail out (load may be affected
+ // by the following stores - see main comment).
+ return false;
+ } else {
+ // Otherwise, there was a store before this load, the load takes its
+ // value.
+ ReplVal = std::prev(I)->second->getOperand(0);
+ }
+
+ convertMetadataToAssumes(LI, ReplVal, DL, AC, &DT);
+
+ // If the replacement value is the load, this must occur in unreachable
+ // code.
+ if (ReplVal == LI)
+ ReplVal = PoisonValue::get(LI->getType());
+
+ LI->replaceAllUsesWith(ReplVal);
+ LI->eraseFromParent();
+ LBI.deleteValue(LI);
+ }
+
+ // Remove the (now dead) stores and alloca.
+ DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false);
+ while (!AI->use_empty()) {
+ StoreInst *SI = cast<StoreInst>(AI->user_back());
+ // Update assignment tracking info for the store we're going to delete.
+ Info.AssignmentTracking.updateForDeletedStore(SI, DIB);
+ // Record debuginfo for the store before removing it.
+ for (DbgVariableIntrinsic *DII : Info.DbgUsers) {
+ if (DII->isAddressOfVariable()) {
+ ConvertDebugDeclareToDebugValue(DII, SI, DIB);
+ }
+ }
+ SI->eraseFromParent();
+ LBI.deleteValue(SI);
+ }
+
+ // Remove dbg.assigns linked to the alloca as these are now redundant.
+ at::deleteAssignmentMarkers(AI);
+ AI->eraseFromParent();
+
+ // The alloca's debuginfo can be removed as well.
+ for (DbgVariableIntrinsic *DII : Info.DbgUsers)
+ if (DII->isAddressOfVariable() || DII->getExpression()->startsWithDeref())
+ DII->eraseFromParent();
+
+ ++NumLocalPromoted;
+ return true;
+}
+
+void PromoteMem2Reg::run() {
+ Function &F = *DT.getRoot()->getParent();
+
+ AllocaDbgUsers.resize(Allocas.size());
+ AllocaATInfo.resize(Allocas.size());
+
+ AllocaInfo Info;
+ LargeBlockInfo LBI;
+ ForwardIDFCalculator IDF(DT);
+
+ for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) {
+ AllocaInst *AI = Allocas[AllocaNum];
+
+ assert(isAllocaPromotable(AI) && "Cannot promote non-promotable alloca!");
+ assert(AI->getParent()->getParent() == &F &&
+ "All allocas should be in the same function, which is same as DF!");
+
+ removeIntrinsicUsers(AI);
+
+ if (AI->use_empty()) {
+ // If there are no uses of the alloca, just delete it now.
+ AI->eraseFromParent();
+
+ // Remove the alloca from the Allocas list, since it has been processed
+ RemoveFromAllocasList(AllocaNum);
+ ++NumDeadAlloca;
+ continue;
+ }
+
+ // Calculate the set of read and write-locations for each alloca. This is
+ // analogous to finding the 'uses' and 'definitions' of each variable.
+ Info.AnalyzeAlloca(AI);
+
+ // If there is only a single store to this value, replace any loads of
+ // it that are directly dominated by the definition with the value stored.
+ if (Info.DefiningBlocks.size() == 1) {
+ if (rewriteSingleStoreAlloca(AI, Info, LBI, SQ.DL, DT, AC)) {
+ // The alloca has been processed, move on.
+ RemoveFromAllocasList(AllocaNum);
+ ++NumSingleStore;
+ continue;
+ }
+ }
+
+ // If the alloca is only read and written in one basic block, just perform a
+ // linear sweep over the block to eliminate it.
+ if (Info.OnlyUsedInOneBlock &&
+ promoteSingleBlockAlloca(AI, Info, LBI, SQ.DL, DT, AC)) {
+ // The alloca has been processed, move on.
+ RemoveFromAllocasList(AllocaNum);
+ continue;
+ }
+
+ // If we haven't computed a numbering for the BB's in the function, do so
+ // now.
+ if (BBNumbers.empty()) {
+ unsigned ID = 0;
+ for (auto &BB : F)
+ BBNumbers[&BB] = ID++;
+ }
+
+ // Remember the dbg.declare intrinsic describing this alloca, if any.
+ if (!Info.DbgUsers.empty())
+ AllocaDbgUsers[AllocaNum] = Info.DbgUsers;
+ if (!Info.AssignmentTracking.empty())
+ AllocaATInfo[AllocaNum] = Info.AssignmentTracking;
+
+ // Keep the reverse mapping of the 'Allocas' array for the rename pass.
+ AllocaLookup[Allocas[AllocaNum]] = AllocaNum;
+
+ // Unique the set of defining blocks for efficient lookup.
+ SmallPtrSet<BasicBlock *, 32> DefBlocks(Info.DefiningBlocks.begin(),
+ Info.DefiningBlocks.end());
+
+ // Determine which blocks the value is live in. These are blocks which lead
+ // to uses.
+ SmallPtrSet<BasicBlock *, 32> LiveInBlocks;
+ ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks);
+
+ // At this point, we're committed to promoting the alloca using IDF's, and
+ // the standard SSA construction algorithm. Determine which blocks need phi
+ // nodes and see if we can optimize out some work by avoiding insertion of
+ // dead phi nodes.
+ IDF.setLiveInBlocks(LiveInBlocks);
+ IDF.setDefiningBlocks(DefBlocks);
+ SmallVector<BasicBlock *, 32> PHIBlocks;
+ IDF.calculate(PHIBlocks);
+ llvm::sort(PHIBlocks, [this](BasicBlock *A, BasicBlock *B) {
+ return BBNumbers.find(A)->second < BBNumbers.find(B)->second;
+ });
+
+ unsigned CurrentVersion = 0;
+ for (BasicBlock *BB : PHIBlocks)
+ QueuePhiNode(BB, AllocaNum, CurrentVersion);
+ }
+
+ if (Allocas.empty())
+ return; // All of the allocas must have been trivial!
+
+ LBI.clear();
+
+ // Set the incoming values for the basic block to be null values for all of
+ // the alloca's. We do this in case there is a load of a value that has not
+ // been stored yet. In this case, it will get this null value.
+ RenamePassData::ValVector Values(Allocas.size());
+ for (unsigned i = 0, e = Allocas.size(); i != e; ++i)
+ Values[i] = UndefValue::get(Allocas[i]->getAllocatedType());
+
+ // When handling debug info, treat all incoming values as if they have unknown
+ // locations until proven otherwise.
+ RenamePassData::LocationVector Locations(Allocas.size());
+
+ // Walks all basic blocks in the function performing the SSA rename algorithm
+ // and inserting the phi nodes we marked as necessary
+ std::vector<RenamePassData> RenamePassWorkList;
+ RenamePassWorkList.emplace_back(&F.front(), nullptr, std::move(Values),
+ std::move(Locations));
+ do {
+ RenamePassData RPD = std::move(RenamePassWorkList.back());
+ RenamePassWorkList.pop_back();
+ // RenamePass may add new worklist entries.
+ RenamePass(RPD.BB, RPD.Pred, RPD.Values, RPD.Locations, RenamePassWorkList);
+ } while (!RenamePassWorkList.empty());
+
+ // The renamer uses the Visited set to avoid infinite loops. Clear it now.
+ Visited.clear();
+
+ // Remove the allocas themselves from the function.
+ for (Instruction *A : Allocas) {
+ // Remove dbg.assigns linked to the alloca as these are now redundant.
+ at::deleteAssignmentMarkers(A);
+ // If there are any uses of the alloca instructions left, they must be in
+ // unreachable basic blocks that were not processed by walking the dominator
+ // tree. Just delete the users now.
+ if (!A->use_empty())
+ A->replaceAllUsesWith(PoisonValue::get(A->getType()));
+ A->eraseFromParent();
+ }
+
+ // Remove alloca's dbg.declare intrinsics from the function.
+ for (auto &DbgUsers : AllocaDbgUsers) {
+ for (auto *DII : DbgUsers)
+ if (DII->isAddressOfVariable() || DII->getExpression()->startsWithDeref())
+ DII->eraseFromParent();
+ }
+
+ // Loop over all of the PHI nodes and see if there are any that we can get
+ // rid of because they merge all of the same incoming values. This can
+ // happen due to undef values coming into the PHI nodes. This process is
+ // iterative, because eliminating one PHI node can cause others to be removed.
+ bool EliminatedAPHI = true;
+ while (EliminatedAPHI) {
+ EliminatedAPHI = false;
+
+ // Iterating over NewPhiNodes is deterministic, so it is safe to try to
+ // simplify and RAUW them as we go. If it was not, we could add uses to
+ // the values we replace with in a non-deterministic order, thus creating
+ // non-deterministic def->use chains.
+ for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator
+ I = NewPhiNodes.begin(),
+ E = NewPhiNodes.end();
+ I != E;) {
+ PHINode *PN = I->second;
+
+ // If this PHI node merges one value and/or undefs, get the value.
+ if (Value *V = simplifyInstruction(PN, SQ)) {
+ PN->replaceAllUsesWith(V);
+ PN->eraseFromParent();
+ NewPhiNodes.erase(I++);
+ EliminatedAPHI = true;
+ continue;
+ }
+ ++I;
+ }
+ }
+
+ // At this point, the renamer has added entries to PHI nodes for all reachable
+ // code. Unfortunately, there may be unreachable blocks which the renamer
+ // hasn't traversed. If this is the case, the PHI nodes may not
+ // have incoming values for all predecessors. Loop over all PHI nodes we have
+ // created, inserting undef values if they are missing any incoming values.
+ for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator
+ I = NewPhiNodes.begin(),
+ E = NewPhiNodes.end();
+ I != E; ++I) {
+ // We want to do this once per basic block. As such, only process a block
+ // when we find the PHI that is the first entry in the block.
+ PHINode *SomePHI = I->second;
+ BasicBlock *BB = SomePHI->getParent();
+ if (&BB->front() != SomePHI)
+ continue;
+
+ // Only do work here if there the PHI nodes are missing incoming values. We
+ // know that all PHI nodes that were inserted in a block will have the same
+ // number of incoming values, so we can just check any of them.
+ if (SomePHI->getNumIncomingValues() == getNumPreds(BB))
+ continue;
+
+ // Get the preds for BB.
+ SmallVector<BasicBlock *, 16> Preds(predecessors(BB));
+
+ // Ok, now we know that all of the PHI nodes are missing entries for some
+ // basic blocks. Start by sorting the incoming predecessors for efficient
+ // access.
+ auto CompareBBNumbers = [this](BasicBlock *A, BasicBlock *B) {
+ return BBNumbers.find(A)->second < BBNumbers.find(B)->second;
+ };
+ llvm::sort(Preds, CompareBBNumbers);
+
+ // Now we loop through all BB's which have entries in SomePHI and remove
+ // them from the Preds list.
+ for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) {
+ // Do a log(n) search of the Preds list for the entry we want.
+ SmallVectorImpl<BasicBlock *>::iterator EntIt = llvm::lower_bound(
+ Preds, SomePHI->getIncomingBlock(i), CompareBBNumbers);
+ assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i) &&
+ "PHI node has entry for a block which is not a predecessor!");
+
+ // Remove the entry
+ Preds.erase(EntIt);
+ }
+
+ // At this point, the blocks left in the preds list must have dummy
+ // entries inserted into every PHI nodes for the block. Update all the phi
+ // nodes in this block that we are inserting (there could be phis before
+ // mem2reg runs).
+ unsigned NumBadPreds = SomePHI->getNumIncomingValues();
+ BasicBlock::iterator BBI = BB->begin();
+ while ((SomePHI = dyn_cast<PHINode>(BBI++)) &&
+ SomePHI->getNumIncomingValues() == NumBadPreds) {
+ Value *UndefVal = UndefValue::get(SomePHI->getType());
+ for (BasicBlock *Pred : Preds)
+ SomePHI->addIncoming(UndefVal, Pred);
+ }
+ }
+
+ NewPhiNodes.clear();
+}
+
+/// Determine which blocks the value is live in.
+///
+/// These are blocks which lead to uses. Knowing this allows us to avoid
+/// inserting PHI nodes into blocks which don't lead to uses (thus, the
+/// inserted phi nodes would be dead).
+void PromoteMem2Reg::ComputeLiveInBlocks(
+ AllocaInst *AI, AllocaInfo &Info,
+ const SmallPtrSetImpl<BasicBlock *> &DefBlocks,
+ SmallPtrSetImpl<BasicBlock *> &LiveInBlocks) {
+ // To determine liveness, we must iterate through the predecessors of blocks
+ // where the def is live. Blocks are added to the worklist if we need to
+ // check their predecessors. Start with all the using blocks.
+ SmallVector<BasicBlock *, 64> LiveInBlockWorklist(Info.UsingBlocks.begin(),
+ Info.UsingBlocks.end());
+
+ // If any of the using blocks is also a definition block, check to see if the
+ // definition occurs before or after the use. If it happens before the use,
+ // the value isn't really live-in.
+ for (unsigned i = 0, e = LiveInBlockWorklist.size(); i != e; ++i) {
+ BasicBlock *BB = LiveInBlockWorklist[i];
+ if (!DefBlocks.count(BB))
+ continue;
+
+ // Okay, this is a block that both uses and defines the value. If the first
+ // reference to the alloca is a def (store), then we know it isn't live-in.
+ for (BasicBlock::iterator I = BB->begin();; ++I) {
+ if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ if (SI->getOperand(1) != AI)
+ continue;
+
+ // We found a store to the alloca before a load. The alloca is not
+ // actually live-in here.
+ LiveInBlockWorklist[i] = LiveInBlockWorklist.back();
+ LiveInBlockWorklist.pop_back();
+ --i;
+ --e;
+ break;
+ }
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(I))
+ // Okay, we found a load before a store to the alloca. It is actually
+ // live into this block.
+ if (LI->getOperand(0) == AI)
+ break;
+ }
+ }
+
+ // Now that we have a set of blocks where the phi is live-in, recursively add
+ // their predecessors until we find the full region the value is live.
+ while (!LiveInBlockWorklist.empty()) {
+ BasicBlock *BB = LiveInBlockWorklist.pop_back_val();
+
+ // The block really is live in here, insert it into the set. If already in
+ // the set, then it has already been processed.
+ if (!LiveInBlocks.insert(BB).second)
+ continue;
+
+ // Since the value is live into BB, it is either defined in a predecessor or
+ // live into it to. Add the preds to the worklist unless they are a
+ // defining block.
+ for (BasicBlock *P : predecessors(BB)) {
+ // The value is not live into a predecessor if it defines the value.
+ if (DefBlocks.count(P))
+ continue;
+
+ // Otherwise it is, add to the worklist.
+ LiveInBlockWorklist.push_back(P);
+ }
+ }
+}
+
+/// Queue a phi-node to be added to a basic-block for a specific Alloca.
+///
+/// Returns true if there wasn't already a phi-node for that variable
+bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo,
+ unsigned &Version) {
+ // Look up the basic-block in question.
+ PHINode *&PN = NewPhiNodes[std::make_pair(BBNumbers[BB], AllocaNo)];
+
+ // If the BB already has a phi node added for the i'th alloca then we're done!
+ if (PN)
+ return false;
+
+ // Create a PhiNode using the dereferenced type... and add the phi-node to the
+ // BasicBlock.
+ PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), getNumPreds(BB),
+ Allocas[AllocaNo]->getName() + "." + Twine(Version++),
+ &BB->front());
+ ++NumPHIInsert;
+ PhiToAllocaMap[PN] = AllocaNo;
+ return true;
+}
+
+/// Update the debug location of a phi. \p ApplyMergedLoc indicates whether to
+/// create a merged location incorporating \p DL, or to set \p DL directly.
+static void updateForIncomingValueLocation(PHINode *PN, DebugLoc DL,
+ bool ApplyMergedLoc) {
+ if (ApplyMergedLoc)
+ PN->applyMergedLocation(PN->getDebugLoc(), DL);
+ else
+ PN->setDebugLoc(DL);
+}
+
+/// Recursively traverse the CFG of the function, renaming loads and
+/// stores to the allocas which we are promoting.
+///
+/// IncomingVals indicates what value each Alloca contains on exit from the
+/// predecessor block Pred.
+void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred,
+ RenamePassData::ValVector &IncomingVals,
+ RenamePassData::LocationVector &IncomingLocs,
+ std::vector<RenamePassData> &Worklist) {
+NextIteration:
+ // If we are inserting any phi nodes into this BB, they will already be in the
+ // block.
+ if (PHINode *APN = dyn_cast<PHINode>(BB->begin())) {
+ // If we have PHI nodes to update, compute the number of edges from Pred to
+ // BB.
+ if (PhiToAllocaMap.count(APN)) {
+ // We want to be able to distinguish between PHI nodes being inserted by
+ // this invocation of mem2reg from those phi nodes that already existed in
+ // the IR before mem2reg was run. We determine that APN is being inserted
+ // because it is missing incoming edges. All other PHI nodes being
+ // inserted by this pass of mem2reg will have the same number of incoming
+ // operands so far. Remember this count.
+ unsigned NewPHINumOperands = APN->getNumOperands();
+
+ unsigned NumEdges = llvm::count(successors(Pred), BB);
+ assert(NumEdges && "Must be at least one edge from Pred to BB!");
+
+ // Add entries for all the phis.
+ BasicBlock::iterator PNI = BB->begin();
+ do {
+ unsigned AllocaNo = PhiToAllocaMap[APN];
+
+ // Update the location of the phi node.
+ updateForIncomingValueLocation(APN, IncomingLocs[AllocaNo],
+ APN->getNumIncomingValues() > 0);
+
+ // Add N incoming values to the PHI node.
+ for (unsigned i = 0; i != NumEdges; ++i)
+ APN->addIncoming(IncomingVals[AllocaNo], Pred);
+
+ // The currently active variable for this block is now the PHI.
+ IncomingVals[AllocaNo] = APN;
+ AllocaATInfo[AllocaNo].updateForNewPhi(APN, DIB);
+ for (DbgVariableIntrinsic *DII : AllocaDbgUsers[AllocaNo])
+ if (DII->isAddressOfVariable())
+ ConvertDebugDeclareToDebugValue(DII, APN, DIB);
+
+ // Get the next phi node.
+ ++PNI;
+ APN = dyn_cast<PHINode>(PNI);
+ if (!APN)
+ break;
+
+ // Verify that it is missing entries. If not, it is not being inserted
+ // by this mem2reg invocation so we want to ignore it.
+ } while (APN->getNumOperands() == NewPHINumOperands);
+ }
+ }
+
+ // Don't revisit blocks.
+ if (!Visited.insert(BB).second)
+ return;
+
+ for (BasicBlock::iterator II = BB->begin(); !II->isTerminator();) {
+ Instruction *I = &*II++; // get the instruction, increment iterator
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ AllocaInst *Src = dyn_cast<AllocaInst>(LI->getPointerOperand());
+ if (!Src)
+ continue;
+
+ DenseMap<AllocaInst *, unsigned>::iterator AI = AllocaLookup.find(Src);
+ if (AI == AllocaLookup.end())
+ continue;
+
+ Value *V = IncomingVals[AI->second];
+ convertMetadataToAssumes(LI, V, SQ.DL, AC, &DT);
+
+ // Anything using the load now uses the current value.
+ LI->replaceAllUsesWith(V);
+ LI->eraseFromParent();
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ // Delete this instruction and mark the name as the current holder of the
+ // value
+ AllocaInst *Dest = dyn_cast<AllocaInst>(SI->getPointerOperand());
+ if (!Dest)
+ continue;
+
+ DenseMap<AllocaInst *, unsigned>::iterator ai = AllocaLookup.find(Dest);
+ if (ai == AllocaLookup.end())
+ continue;
+
+ // what value were we writing?
+ unsigned AllocaNo = ai->second;
+ IncomingVals[AllocaNo] = SI->getOperand(0);
+
+ // Record debuginfo for the store before removing it.
+ IncomingLocs[AllocaNo] = SI->getDebugLoc();
+ AllocaATInfo[AllocaNo].updateForDeletedStore(SI, DIB);
+ for (DbgVariableIntrinsic *DII : AllocaDbgUsers[ai->second])
+ if (DII->isAddressOfVariable())
+ ConvertDebugDeclareToDebugValue(DII, SI, DIB);
+ SI->eraseFromParent();
+ }
+ }
+
+ // 'Recurse' to our successors.
+ succ_iterator I = succ_begin(BB), E = succ_end(BB);
+ if (I == E)
+ return;
+
+ // Keep track of the successors so we don't visit the same successor twice
+ SmallPtrSet<BasicBlock *, 8> VisitedSuccs;
+
+ // Handle the first successor without using the worklist.
+ VisitedSuccs.insert(*I);
+ Pred = BB;
+ BB = *I;
+ ++I;
+
+ for (; I != E; ++I)
+ if (VisitedSuccs.insert(*I).second)
+ Worklist.emplace_back(*I, Pred, IncomingVals, IncomingLocs);
+
+ goto NextIteration;
+}
+
+void llvm::PromoteMemToReg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT,
+ AssumptionCache *AC) {
+ // If there is nothing to do, bail out...
+ if (Allocas.empty())
+ return;
+
+ PromoteMem2Reg(Allocas, DT, AC).run();
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/RelLookupTableConverter.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/RelLookupTableConverter.cpp
new file mode 100644
index 0000000000..c9ff94dc97
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/RelLookupTableConverter.cpp
@@ -0,0 +1,221 @@
+//===- RelLookupTableConverterPass - Rel Table Conv -----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements relative lookup table converter that converts
+// lookup tables to relative lookup tables to make them PIC-friendly.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/RelLookupTableConverter.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+
+using namespace llvm;
+
+static bool shouldConvertToRelLookupTable(Module &M, GlobalVariable &GV) {
+ // If lookup table has more than one user,
+ // do not generate a relative lookup table.
+ // This is to simplify the analysis that needs to be done for this pass.
+ // TODO: Add support for lookup tables with multiple uses.
+ // For ex, this can happen when a function that uses a lookup table gets
+ // inlined into multiple call sites.
+ if (!GV.hasInitializer() ||
+ !GV.isConstant() ||
+ !GV.hasOneUse())
+ return false;
+
+ GetElementPtrInst *GEP =
+ dyn_cast<GetElementPtrInst>(GV.use_begin()->getUser());
+ if (!GEP || !GEP->hasOneUse() ||
+ GV.getValueType() != GEP->getSourceElementType())
+ return false;
+
+ LoadInst *Load = dyn_cast<LoadInst>(GEP->use_begin()->getUser());
+ if (!Load || !Load->hasOneUse() ||
+ Load->getType() != GEP->getResultElementType())
+ return false;
+
+ // If the original lookup table does not have local linkage and is
+ // not dso_local, do not generate a relative lookup table.
+ // This optimization creates a relative lookup table that consists of
+ // offsets between the start of the lookup table and its elements.
+ // To be able to generate these offsets, relative lookup table and
+ // its elements should have internal linkage and be dso_local, which means
+ // that they should resolve to symbols within the same linkage unit.
+ if (!GV.hasLocalLinkage() ||
+ !GV.isDSOLocal() ||
+ !GV.isImplicitDSOLocal())
+ return false;
+
+ ConstantArray *Array = dyn_cast<ConstantArray>(GV.getInitializer());
+ if (!Array)
+ return false;
+
+ // If values are not 64-bit pointers, do not generate a relative lookup table.
+ const DataLayout &DL = M.getDataLayout();
+ Type *ElemType = Array->getType()->getElementType();
+ if (!ElemType->isPointerTy() || DL.getPointerTypeSizeInBits(ElemType) != 64)
+ return false;
+
+ for (const Use &Op : Array->operands()) {
+ Constant *ConstOp = cast<Constant>(&Op);
+ GlobalValue *GVOp;
+ APInt Offset;
+
+ // If an operand is not a constant offset from a lookup table,
+ // do not generate a relative lookup table.
+ if (!IsConstantOffsetFromGlobal(ConstOp, GVOp, Offset, DL))
+ return false;
+
+ // If operand is mutable, do not generate a relative lookup table.
+ auto *GlovalVarOp = dyn_cast<GlobalVariable>(GVOp);
+ if (!GlovalVarOp || !GlovalVarOp->isConstant())
+ return false;
+
+ if (!GlovalVarOp->hasLocalLinkage() ||
+ !GlovalVarOp->isDSOLocal() ||
+ !GlovalVarOp->isImplicitDSOLocal())
+ return false;
+ }
+
+ return true;
+}
+
+static GlobalVariable *createRelLookupTable(Function &Func,
+ GlobalVariable &LookupTable) {
+ Module &M = *Func.getParent();
+ ConstantArray *LookupTableArr =
+ cast<ConstantArray>(LookupTable.getInitializer());
+ unsigned NumElts = LookupTableArr->getType()->getNumElements();
+ ArrayType *IntArrayTy =
+ ArrayType::get(Type::getInt32Ty(M.getContext()), NumElts);
+
+ GlobalVariable *RelLookupTable = new GlobalVariable(
+ M, IntArrayTy, LookupTable.isConstant(), LookupTable.getLinkage(),
+ nullptr, "reltable." + Func.getName(), &LookupTable,
+ LookupTable.getThreadLocalMode(), LookupTable.getAddressSpace(),
+ LookupTable.isExternallyInitialized());
+
+ uint64_t Idx = 0;
+ SmallVector<Constant *, 64> RelLookupTableContents(NumElts);
+
+ for (Use &Operand : LookupTableArr->operands()) {
+ Constant *Element = cast<Constant>(Operand);
+ Type *IntPtrTy = M.getDataLayout().getIntPtrType(M.getContext());
+ Constant *Base = llvm::ConstantExpr::getPtrToInt(RelLookupTable, IntPtrTy);
+ Constant *Target = llvm::ConstantExpr::getPtrToInt(Element, IntPtrTy);
+ Constant *Sub = llvm::ConstantExpr::getSub(Target, Base);
+ Constant *RelOffset =
+ llvm::ConstantExpr::getTrunc(Sub, Type::getInt32Ty(M.getContext()));
+ RelLookupTableContents[Idx++] = RelOffset;
+ }
+
+ Constant *Initializer =
+ ConstantArray::get(IntArrayTy, RelLookupTableContents);
+ RelLookupTable->setInitializer(Initializer);
+ RelLookupTable->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+ RelLookupTable->setAlignment(llvm::Align(4));
+ return RelLookupTable;
+}
+
+static void convertToRelLookupTable(GlobalVariable &LookupTable) {
+ GetElementPtrInst *GEP =
+ cast<GetElementPtrInst>(LookupTable.use_begin()->getUser());
+ LoadInst *Load = cast<LoadInst>(GEP->use_begin()->getUser());
+
+ Module &M = *LookupTable.getParent();
+ BasicBlock *BB = GEP->getParent();
+ IRBuilder<> Builder(BB);
+ Function &Func = *BB->getParent();
+
+ // Generate an array that consists of relative offsets.
+ GlobalVariable *RelLookupTable = createRelLookupTable(Func, LookupTable);
+
+ // Place new instruction sequence before GEP.
+ Builder.SetInsertPoint(GEP);
+ Value *Index = GEP->getOperand(2);
+ IntegerType *IntTy = cast<IntegerType>(Index->getType());
+ Value *Offset =
+ Builder.CreateShl(Index, ConstantInt::get(IntTy, 2), "reltable.shift");
+
+ // Insert the call to load.relative intrinsic before LOAD.
+ // GEP might not be immediately followed by a LOAD, like it can be hoisted
+ // outside the loop or another instruction might be inserted them in between.
+ Builder.SetInsertPoint(Load);
+ Function *LoadRelIntrinsic = llvm::Intrinsic::getDeclaration(
+ &M, Intrinsic::load_relative, {Index->getType()});
+ Value *Base = Builder.CreateBitCast(RelLookupTable, Builder.getInt8PtrTy());
+
+ // Create a call to load.relative intrinsic that computes the target address
+ // by adding base address (lookup table address) and relative offset.
+ Value *Result = Builder.CreateCall(LoadRelIntrinsic, {Base, Offset},
+ "reltable.intrinsic");
+
+ // Create a bitcast instruction if necessary.
+ if (Load->getType() != Builder.getInt8PtrTy())
+ Result = Builder.CreateBitCast(Result, Load->getType(), "reltable.bitcast");
+
+ // Replace load instruction with the new generated instruction sequence.
+ Load->replaceAllUsesWith(Result);
+ // Remove Load and GEP instructions.
+ Load->eraseFromParent();
+ GEP->eraseFromParent();
+}
+
+// Convert lookup tables to relative lookup tables in the module.
+static bool convertToRelativeLookupTables(
+ Module &M, function_ref<TargetTransformInfo &(Function &)> GetTTI) {
+ for (Function &F : M) {
+ if (F.isDeclaration())
+ continue;
+
+ // Check if we have a target that supports relative lookup tables.
+ if (!GetTTI(F).shouldBuildRelLookupTables())
+ return false;
+
+ // We assume that the result is independent of the checked function.
+ break;
+ }
+
+ bool Changed = false;
+
+ for (GlobalVariable &GV : llvm::make_early_inc_range(M.globals())) {
+ if (!shouldConvertToRelLookupTable(M, GV))
+ continue;
+
+ convertToRelLookupTable(GV);
+
+ // Remove the original lookup table.
+ GV.eraseFromParent();
+
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+PreservedAnalyses RelLookupTableConverterPass::run(Module &M,
+ ModuleAnalysisManager &AM) {
+ FunctionAnalysisManager &FAM =
+ AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+
+ auto GetTTI = [&](Function &F) -> TargetTransformInfo & {
+ return FAM.getResult<TargetIRAnalysis>(F);
+ };
+
+ if (!convertToRelativeLookupTables(M, GetTTI))
+ return PreservedAnalyses::all();
+
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/SCCPSolver.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/SCCPSolver.cpp
new file mode 100644
index 0000000000..8d03a0d8a2
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/SCCPSolver.cpp
@@ -0,0 +1,1922 @@
+//===- SCCPSolver.cpp - SCCP Utility --------------------------- *- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// \file
+// This file implements the Sparse Conditional Constant Propagation (SCCP)
+// utility.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/SCCPSolver.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ValueLattice.h"
+#include "llvm/Analysis/ValueLatticeUtils.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <cassert>
+#include <utility>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "sccp"
+
+// The maximum number of range extensions allowed for operations requiring
+// widening.
+static const unsigned MaxNumRangeExtensions = 10;
+
+/// Returns MergeOptions with MaxWidenSteps set to MaxNumRangeExtensions.
+static ValueLatticeElement::MergeOptions getMaxWidenStepsOpts() {
+ return ValueLatticeElement::MergeOptions().setMaxWidenSteps(
+ MaxNumRangeExtensions);
+}
+
+namespace llvm {
+
+bool SCCPSolver::isConstant(const ValueLatticeElement &LV) {
+ return LV.isConstant() ||
+ (LV.isConstantRange() && LV.getConstantRange().isSingleElement());
+}
+
+bool SCCPSolver::isOverdefined(const ValueLatticeElement &LV) {
+ return !LV.isUnknownOrUndef() && !SCCPSolver::isConstant(LV);
+}
+
+static bool canRemoveInstruction(Instruction *I) {
+ if (wouldInstructionBeTriviallyDead(I))
+ return true;
+
+ // Some instructions can be handled but are rejected above. Catch
+ // those cases by falling through to here.
+ // TODO: Mark globals as being constant earlier, so
+ // TODO: wouldInstructionBeTriviallyDead() knows that atomic loads
+ // TODO: are safe to remove.
+ return isa<LoadInst>(I);
+}
+
+bool SCCPSolver::tryToReplaceWithConstant(Value *V) {
+ Constant *Const = nullptr;
+ if (V->getType()->isStructTy()) {
+ std::vector<ValueLatticeElement> IVs = getStructLatticeValueFor(V);
+ if (llvm::any_of(IVs, isOverdefined))
+ return false;
+ std::vector<Constant *> ConstVals;
+ auto *ST = cast<StructType>(V->getType());
+ for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
+ ValueLatticeElement V = IVs[i];
+ ConstVals.push_back(SCCPSolver::isConstant(V)
+ ? getConstant(V)
+ : UndefValue::get(ST->getElementType(i)));
+ }
+ Const = ConstantStruct::get(ST, ConstVals);
+ } else {
+ const ValueLatticeElement &IV = getLatticeValueFor(V);
+ if (isOverdefined(IV))
+ return false;
+
+ Const = SCCPSolver::isConstant(IV) ? getConstant(IV)
+ : UndefValue::get(V->getType());
+ }
+ assert(Const && "Constant is nullptr here!");
+
+ // Replacing `musttail` instructions with constant breaks `musttail` invariant
+ // unless the call itself can be removed.
+ // Calls with "clang.arc.attachedcall" implicitly use the return value and
+ // those uses cannot be updated with a constant.
+ CallBase *CB = dyn_cast<CallBase>(V);
+ if (CB && ((CB->isMustTailCall() &&
+ !canRemoveInstruction(CB)) ||
+ CB->getOperandBundle(LLVMContext::OB_clang_arc_attachedcall))) {
+ Function *F = CB->getCalledFunction();
+
+ // Don't zap returns of the callee
+ if (F)
+ addToMustPreserveReturnsInFunctions(F);
+
+ LLVM_DEBUG(dbgs() << " Can\'t treat the result of call " << *CB
+ << " as a constant\n");
+ return false;
+ }
+
+ LLVM_DEBUG(dbgs() << " Constant: " << *Const << " = " << *V << '\n');
+
+ // Replaces all of the uses of a variable with uses of the constant.
+ V->replaceAllUsesWith(Const);
+ return true;
+}
+
+/// Try to replace signed instructions with their unsigned equivalent.
+static bool replaceSignedInst(SCCPSolver &Solver,
+ SmallPtrSetImpl<Value *> &InsertedValues,
+ Instruction &Inst) {
+ // Determine if a signed value is known to be >= 0.
+ auto isNonNegative = [&Solver](Value *V) {
+ // If this value was constant-folded, it may not have a solver entry.
+ // Handle integers. Otherwise, return false.
+ if (auto *C = dyn_cast<Constant>(V)) {
+ auto *CInt = dyn_cast<ConstantInt>(C);
+ return CInt && !CInt->isNegative();
+ }
+ const ValueLatticeElement &IV = Solver.getLatticeValueFor(V);
+ return IV.isConstantRange(/*UndefAllowed=*/false) &&
+ IV.getConstantRange().isAllNonNegative();
+ };
+
+ Instruction *NewInst = nullptr;
+ switch (Inst.getOpcode()) {
+ // Note: We do not fold sitofp -> uitofp here because that could be more
+ // expensive in codegen and may not be reversible in the backend.
+ case Instruction::SExt: {
+ // If the source value is not negative, this is a zext.
+ Value *Op0 = Inst.getOperand(0);
+ if (InsertedValues.count(Op0) || !isNonNegative(Op0))
+ return false;
+ NewInst = new ZExtInst(Op0, Inst.getType(), "", &Inst);
+ break;
+ }
+ case Instruction::AShr: {
+ // If the shifted value is not negative, this is a logical shift right.
+ Value *Op0 = Inst.getOperand(0);
+ if (InsertedValues.count(Op0) || !isNonNegative(Op0))
+ return false;
+ NewInst = BinaryOperator::CreateLShr(Op0, Inst.getOperand(1), "", &Inst);
+ break;
+ }
+ case Instruction::SDiv:
+ case Instruction::SRem: {
+ // If both operands are not negative, this is the same as udiv/urem.
+ Value *Op0 = Inst.getOperand(0), *Op1 = Inst.getOperand(1);
+ if (InsertedValues.count(Op0) || InsertedValues.count(Op1) ||
+ !isNonNegative(Op0) || !isNonNegative(Op1))
+ return false;
+ auto NewOpcode = Inst.getOpcode() == Instruction::SDiv ? Instruction::UDiv
+ : Instruction::URem;
+ NewInst = BinaryOperator::Create(NewOpcode, Op0, Op1, "", &Inst);
+ break;
+ }
+ default:
+ return false;
+ }
+
+ // Wire up the new instruction and update state.
+ assert(NewInst && "Expected replacement instruction");
+ NewInst->takeName(&Inst);
+ InsertedValues.insert(NewInst);
+ Inst.replaceAllUsesWith(NewInst);
+ Solver.removeLatticeValueFor(&Inst);
+ Inst.eraseFromParent();
+ return true;
+}
+
+bool SCCPSolver::simplifyInstsInBlock(BasicBlock &BB,
+ SmallPtrSetImpl<Value *> &InsertedValues,
+ Statistic &InstRemovedStat,
+ Statistic &InstReplacedStat) {
+ bool MadeChanges = false;
+ for (Instruction &Inst : make_early_inc_range(BB)) {
+ if (Inst.getType()->isVoidTy())
+ continue;
+ if (tryToReplaceWithConstant(&Inst)) {
+ if (canRemoveInstruction(&Inst))
+ Inst.eraseFromParent();
+
+ MadeChanges = true;
+ ++InstRemovedStat;
+ } else if (replaceSignedInst(*this, InsertedValues, Inst)) {
+ MadeChanges = true;
+ ++InstReplacedStat;
+ }
+ }
+ return MadeChanges;
+}
+
+bool SCCPSolver::removeNonFeasibleEdges(BasicBlock *BB, DomTreeUpdater &DTU,
+ BasicBlock *&NewUnreachableBB) const {
+ SmallPtrSet<BasicBlock *, 8> FeasibleSuccessors;
+ bool HasNonFeasibleEdges = false;
+ for (BasicBlock *Succ : successors(BB)) {
+ if (isEdgeFeasible(BB, Succ))
+ FeasibleSuccessors.insert(Succ);
+ else
+ HasNonFeasibleEdges = true;
+ }
+
+ // All edges feasible, nothing to do.
+ if (!HasNonFeasibleEdges)
+ return false;
+
+ // SCCP can only determine non-feasible edges for br, switch and indirectbr.
+ Instruction *TI = BB->getTerminator();
+ assert((isa<BranchInst>(TI) || isa<SwitchInst>(TI) ||
+ isa<IndirectBrInst>(TI)) &&
+ "Terminator must be a br, switch or indirectbr");
+
+ if (FeasibleSuccessors.size() == 0) {
+ // Branch on undef/poison, replace with unreachable.
+ SmallPtrSet<BasicBlock *, 8> SeenSuccs;
+ SmallVector<DominatorTree::UpdateType, 8> Updates;
+ for (BasicBlock *Succ : successors(BB)) {
+ Succ->removePredecessor(BB);
+ if (SeenSuccs.insert(Succ).second)
+ Updates.push_back({DominatorTree::Delete, BB, Succ});
+ }
+ TI->eraseFromParent();
+ new UnreachableInst(BB->getContext(), BB);
+ DTU.applyUpdatesPermissive(Updates);
+ } else if (FeasibleSuccessors.size() == 1) {
+ // Replace with an unconditional branch to the only feasible successor.
+ BasicBlock *OnlyFeasibleSuccessor = *FeasibleSuccessors.begin();
+ SmallVector<DominatorTree::UpdateType, 8> Updates;
+ bool HaveSeenOnlyFeasibleSuccessor = false;
+ for (BasicBlock *Succ : successors(BB)) {
+ if (Succ == OnlyFeasibleSuccessor && !HaveSeenOnlyFeasibleSuccessor) {
+ // Don't remove the edge to the only feasible successor the first time
+ // we see it. We still do need to remove any multi-edges to it though.
+ HaveSeenOnlyFeasibleSuccessor = true;
+ continue;
+ }
+
+ Succ->removePredecessor(BB);
+ Updates.push_back({DominatorTree::Delete, BB, Succ});
+ }
+
+ BranchInst::Create(OnlyFeasibleSuccessor, BB);
+ TI->eraseFromParent();
+ DTU.applyUpdatesPermissive(Updates);
+ } else if (FeasibleSuccessors.size() > 1) {
+ SwitchInstProfUpdateWrapper SI(*cast<SwitchInst>(TI));
+ SmallVector<DominatorTree::UpdateType, 8> Updates;
+
+ // If the default destination is unfeasible it will never be taken. Replace
+ // it with a new block with a single Unreachable instruction.
+ BasicBlock *DefaultDest = SI->getDefaultDest();
+ if (!FeasibleSuccessors.contains(DefaultDest)) {
+ if (!NewUnreachableBB) {
+ NewUnreachableBB =
+ BasicBlock::Create(DefaultDest->getContext(), "default.unreachable",
+ DefaultDest->getParent(), DefaultDest);
+ new UnreachableInst(DefaultDest->getContext(), NewUnreachableBB);
+ }
+
+ SI->setDefaultDest(NewUnreachableBB);
+ Updates.push_back({DominatorTree::Delete, BB, DefaultDest});
+ Updates.push_back({DominatorTree::Insert, BB, NewUnreachableBB});
+ }
+
+ for (auto CI = SI->case_begin(); CI != SI->case_end();) {
+ if (FeasibleSuccessors.contains(CI->getCaseSuccessor())) {
+ ++CI;
+ continue;
+ }
+
+ BasicBlock *Succ = CI->getCaseSuccessor();
+ Succ->removePredecessor(BB);
+ Updates.push_back({DominatorTree::Delete, BB, Succ});
+ SI.removeCase(CI);
+ // Don't increment CI, as we removed a case.
+ }
+
+ DTU.applyUpdatesPermissive(Updates);
+ } else {
+ llvm_unreachable("Must have at least one feasible successor");
+ }
+ return true;
+}
+
+/// Helper class for SCCPSolver. This implements the instruction visitor and
+/// holds all the state.
+class SCCPInstVisitor : public InstVisitor<SCCPInstVisitor> {
+ const DataLayout &DL;
+ std::function<const TargetLibraryInfo &(Function &)> GetTLI;
+ SmallPtrSet<BasicBlock *, 8> BBExecutable; // The BBs that are executable.
+ DenseMap<Value *, ValueLatticeElement>
+ ValueState; // The state each value is in.
+
+ /// StructValueState - This maintains ValueState for values that have
+ /// StructType, for example for formal arguments, calls, insertelement, etc.
+ DenseMap<std::pair<Value *, unsigned>, ValueLatticeElement> StructValueState;
+
+ /// GlobalValue - If we are tracking any values for the contents of a global
+ /// variable, we keep a mapping from the constant accessor to the element of
+ /// the global, to the currently known value. If the value becomes
+ /// overdefined, it's entry is simply removed from this map.
+ DenseMap<GlobalVariable *, ValueLatticeElement> TrackedGlobals;
+
+ /// TrackedRetVals - If we are tracking arguments into and the return
+ /// value out of a function, it will have an entry in this map, indicating
+ /// what the known return value for the function is.
+ MapVector<Function *, ValueLatticeElement> TrackedRetVals;
+
+ /// TrackedMultipleRetVals - Same as TrackedRetVals, but used for functions
+ /// that return multiple values.
+ MapVector<std::pair<Function *, unsigned>, ValueLatticeElement>
+ TrackedMultipleRetVals;
+
+ /// MRVFunctionsTracked - Each function in TrackedMultipleRetVals is
+ /// represented here for efficient lookup.
+ SmallPtrSet<Function *, 16> MRVFunctionsTracked;
+
+ /// A list of functions whose return cannot be modified.
+ SmallPtrSet<Function *, 16> MustPreserveReturnsInFunctions;
+
+ /// TrackingIncomingArguments - This is the set of functions for whose
+ /// arguments we make optimistic assumptions about and try to prove as
+ /// constants.
+ SmallPtrSet<Function *, 16> TrackingIncomingArguments;
+
+ /// The reason for two worklists is that overdefined is the lowest state
+ /// on the lattice, and moving things to overdefined as fast as possible
+ /// makes SCCP converge much faster.
+ ///
+ /// By having a separate worklist, we accomplish this because everything
+ /// possibly overdefined will become overdefined at the soonest possible
+ /// point.
+ SmallVector<Value *, 64> OverdefinedInstWorkList;
+ SmallVector<Value *, 64> InstWorkList;
+
+ // The BasicBlock work list
+ SmallVector<BasicBlock *, 64> BBWorkList;
+
+ /// KnownFeasibleEdges - Entries in this set are edges which have already had
+ /// PHI nodes retriggered.
+ using Edge = std::pair<BasicBlock *, BasicBlock *>;
+ DenseSet<Edge> KnownFeasibleEdges;
+
+ DenseMap<Function *, AnalysisResultsForFn> AnalysisResults;
+ DenseMap<Value *, SmallPtrSet<User *, 2>> AdditionalUsers;
+
+ LLVMContext &Ctx;
+
+private:
+ ConstantInt *getConstantInt(const ValueLatticeElement &IV) const {
+ return dyn_cast_or_null<ConstantInt>(getConstant(IV));
+ }
+
+ // pushToWorkList - Helper for markConstant/markOverdefined
+ void pushToWorkList(ValueLatticeElement &IV, Value *V);
+
+ // Helper to push \p V to the worklist, after updating it to \p IV. Also
+ // prints a debug message with the updated value.
+ void pushToWorkListMsg(ValueLatticeElement &IV, Value *V);
+
+ // markConstant - Make a value be marked as "constant". If the value
+ // is not already a constant, add it to the instruction work list so that
+ // the users of the instruction are updated later.
+ bool markConstant(ValueLatticeElement &IV, Value *V, Constant *C,
+ bool MayIncludeUndef = false);
+
+ bool markConstant(Value *V, Constant *C) {
+ assert(!V->getType()->isStructTy() && "structs should use mergeInValue");
+ return markConstant(ValueState[V], V, C);
+ }
+
+ // markOverdefined - Make a value be marked as "overdefined". If the
+ // value is not already overdefined, add it to the overdefined instruction
+ // work list so that the users of the instruction are updated later.
+ bool markOverdefined(ValueLatticeElement &IV, Value *V);
+
+ /// Merge \p MergeWithV into \p IV and push \p V to the worklist, if \p IV
+ /// changes.
+ bool mergeInValue(ValueLatticeElement &IV, Value *V,
+ ValueLatticeElement MergeWithV,
+ ValueLatticeElement::MergeOptions Opts = {
+ /*MayIncludeUndef=*/false, /*CheckWiden=*/false});
+
+ bool mergeInValue(Value *V, ValueLatticeElement MergeWithV,
+ ValueLatticeElement::MergeOptions Opts = {
+ /*MayIncludeUndef=*/false, /*CheckWiden=*/false}) {
+ assert(!V->getType()->isStructTy() &&
+ "non-structs should use markConstant");
+ return mergeInValue(ValueState[V], V, MergeWithV, Opts);
+ }
+
+ /// getValueState - Return the ValueLatticeElement object that corresponds to
+ /// the value. This function handles the case when the value hasn't been seen
+ /// yet by properly seeding constants etc.
+ ValueLatticeElement &getValueState(Value *V) {
+ assert(!V->getType()->isStructTy() && "Should use getStructValueState");
+
+ auto I = ValueState.insert(std::make_pair(V, ValueLatticeElement()));
+ ValueLatticeElement &LV = I.first->second;
+
+ if (!I.second)
+ return LV; // Common case, already in the map.
+
+ if (auto *C = dyn_cast<Constant>(V))
+ LV.markConstant(C); // Constants are constant
+
+ // All others are unknown by default.
+ return LV;
+ }
+
+ /// getStructValueState - Return the ValueLatticeElement object that
+ /// corresponds to the value/field pair. This function handles the case when
+ /// the value hasn't been seen yet by properly seeding constants etc.
+ ValueLatticeElement &getStructValueState(Value *V, unsigned i) {
+ assert(V->getType()->isStructTy() && "Should use getValueState");
+ assert(i < cast<StructType>(V->getType())->getNumElements() &&
+ "Invalid element #");
+
+ auto I = StructValueState.insert(
+ std::make_pair(std::make_pair(V, i), ValueLatticeElement()));
+ ValueLatticeElement &LV = I.first->second;
+
+ if (!I.second)
+ return LV; // Common case, already in the map.
+
+ if (auto *C = dyn_cast<Constant>(V)) {
+ Constant *Elt = C->getAggregateElement(i);
+
+ if (!Elt)
+ LV.markOverdefined(); // Unknown sort of constant.
+ else
+ LV.markConstant(Elt); // Constants are constant.
+ }
+
+ // All others are underdefined by default.
+ return LV;
+ }
+
+ /// markEdgeExecutable - Mark a basic block as executable, adding it to the BB
+ /// work list if it is not already executable.
+ bool markEdgeExecutable(BasicBlock *Source, BasicBlock *Dest);
+
+ // getFeasibleSuccessors - Return a vector of booleans to indicate which
+ // successors are reachable from a given terminator instruction.
+ void getFeasibleSuccessors(Instruction &TI, SmallVectorImpl<bool> &Succs);
+
+ // OperandChangedState - This method is invoked on all of the users of an
+ // instruction that was just changed state somehow. Based on this
+ // information, we need to update the specified user of this instruction.
+ void operandChangedState(Instruction *I) {
+ if (BBExecutable.count(I->getParent())) // Inst is executable?
+ visit(*I);
+ }
+
+ // Add U as additional user of V.
+ void addAdditionalUser(Value *V, User *U) {
+ auto Iter = AdditionalUsers.insert({V, {}});
+ Iter.first->second.insert(U);
+ }
+
+ // Mark I's users as changed, including AdditionalUsers.
+ void markUsersAsChanged(Value *I) {
+ // Functions include their arguments in the use-list. Changed function
+ // values mean that the result of the function changed. We only need to
+ // update the call sites with the new function result and do not have to
+ // propagate the call arguments.
+ if (isa<Function>(I)) {
+ for (User *U : I->users()) {
+ if (auto *CB = dyn_cast<CallBase>(U))
+ handleCallResult(*CB);
+ }
+ } else {
+ for (User *U : I->users())
+ if (auto *UI = dyn_cast<Instruction>(U))
+ operandChangedState(UI);
+ }
+
+ auto Iter = AdditionalUsers.find(I);
+ if (Iter != AdditionalUsers.end()) {
+ // Copy additional users before notifying them of changes, because new
+ // users may be added, potentially invalidating the iterator.
+ SmallVector<Instruction *, 2> ToNotify;
+ for (User *U : Iter->second)
+ if (auto *UI = dyn_cast<Instruction>(U))
+ ToNotify.push_back(UI);
+ for (Instruction *UI : ToNotify)
+ operandChangedState(UI);
+ }
+ }
+ void handleCallOverdefined(CallBase &CB);
+ void handleCallResult(CallBase &CB);
+ void handleCallArguments(CallBase &CB);
+ void handleExtractOfWithOverflow(ExtractValueInst &EVI,
+ const WithOverflowInst *WO, unsigned Idx);
+
+private:
+ friend class InstVisitor<SCCPInstVisitor>;
+
+ // visit implementations - Something changed in this instruction. Either an
+ // operand made a transition, or the instruction is newly executable. Change
+ // the value type of I to reflect these changes if appropriate.
+ void visitPHINode(PHINode &I);
+
+ // Terminators
+
+ void visitReturnInst(ReturnInst &I);
+ void visitTerminator(Instruction &TI);
+
+ void visitCastInst(CastInst &I);
+ void visitSelectInst(SelectInst &I);
+ void visitUnaryOperator(Instruction &I);
+ void visitBinaryOperator(Instruction &I);
+ void visitCmpInst(CmpInst &I);
+ void visitExtractValueInst(ExtractValueInst &EVI);
+ void visitInsertValueInst(InsertValueInst &IVI);
+
+ void visitCatchSwitchInst(CatchSwitchInst &CPI) {
+ markOverdefined(&CPI);
+ visitTerminator(CPI);
+ }
+
+ // Instructions that cannot be folded away.
+
+ void visitStoreInst(StoreInst &I);
+ void visitLoadInst(LoadInst &I);
+ void visitGetElementPtrInst(GetElementPtrInst &I);
+
+ void visitInvokeInst(InvokeInst &II) {
+ visitCallBase(II);
+ visitTerminator(II);
+ }
+
+ void visitCallBrInst(CallBrInst &CBI) {
+ visitCallBase(CBI);
+ visitTerminator(CBI);
+ }
+
+ void visitCallBase(CallBase &CB);
+ void visitResumeInst(ResumeInst &I) { /*returns void*/
+ }
+ void visitUnreachableInst(UnreachableInst &I) { /*returns void*/
+ }
+ void visitFenceInst(FenceInst &I) { /*returns void*/
+ }
+
+ void visitInstruction(Instruction &I);
+
+public:
+ void addAnalysis(Function &F, AnalysisResultsForFn A) {
+ AnalysisResults.insert({&F, std::move(A)});
+ }
+
+ void visitCallInst(CallInst &I) { visitCallBase(I); }
+
+ bool markBlockExecutable(BasicBlock *BB);
+
+ const PredicateBase *getPredicateInfoFor(Instruction *I) {
+ auto A = AnalysisResults.find(I->getParent()->getParent());
+ if (A == AnalysisResults.end())
+ return nullptr;
+ return A->second.PredInfo->getPredicateInfoFor(I);
+ }
+
+ const LoopInfo &getLoopInfo(Function &F) {
+ auto A = AnalysisResults.find(&F);
+ assert(A != AnalysisResults.end() && A->second.LI &&
+ "Need LoopInfo analysis results for function.");
+ return *A->second.LI;
+ }
+
+ DomTreeUpdater getDTU(Function &F) {
+ auto A = AnalysisResults.find(&F);
+ assert(A != AnalysisResults.end() && "Need analysis results for function.");
+ return {A->second.DT, A->second.PDT, DomTreeUpdater::UpdateStrategy::Lazy};
+ }
+
+ SCCPInstVisitor(const DataLayout &DL,
+ std::function<const TargetLibraryInfo &(Function &)> GetTLI,
+ LLVMContext &Ctx)
+ : DL(DL), GetTLI(GetTLI), Ctx(Ctx) {}
+
+ void trackValueOfGlobalVariable(GlobalVariable *GV) {
+ // We only track the contents of scalar globals.
+ if (GV->getValueType()->isSingleValueType()) {
+ ValueLatticeElement &IV = TrackedGlobals[GV];
+ IV.markConstant(GV->getInitializer());
+ }
+ }
+
+ void addTrackedFunction(Function *F) {
+ // Add an entry, F -> undef.
+ if (auto *STy = dyn_cast<StructType>(F->getReturnType())) {
+ MRVFunctionsTracked.insert(F);
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+ TrackedMultipleRetVals.insert(
+ std::make_pair(std::make_pair(F, i), ValueLatticeElement()));
+ } else if (!F->getReturnType()->isVoidTy())
+ TrackedRetVals.insert(std::make_pair(F, ValueLatticeElement()));
+ }
+
+ void addToMustPreserveReturnsInFunctions(Function *F) {
+ MustPreserveReturnsInFunctions.insert(F);
+ }
+
+ bool mustPreserveReturn(Function *F) {
+ return MustPreserveReturnsInFunctions.count(F);
+ }
+
+ void addArgumentTrackedFunction(Function *F) {
+ TrackingIncomingArguments.insert(F);
+ }
+
+ bool isArgumentTrackedFunction(Function *F) {
+ return TrackingIncomingArguments.count(F);
+ }
+
+ void solve();
+
+ bool resolvedUndefsIn(Function &F);
+
+ bool isBlockExecutable(BasicBlock *BB) const {
+ return BBExecutable.count(BB);
+ }
+
+ bool isEdgeFeasible(BasicBlock *From, BasicBlock *To) const;
+
+ std::vector<ValueLatticeElement> getStructLatticeValueFor(Value *V) const {
+ std::vector<ValueLatticeElement> StructValues;
+ auto *STy = dyn_cast<StructType>(V->getType());
+ assert(STy && "getStructLatticeValueFor() can be called only on structs");
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ auto I = StructValueState.find(std::make_pair(V, i));
+ assert(I != StructValueState.end() && "Value not in valuemap!");
+ StructValues.push_back(I->second);
+ }
+ return StructValues;
+ }
+
+ void removeLatticeValueFor(Value *V) { ValueState.erase(V); }
+
+ const ValueLatticeElement &getLatticeValueFor(Value *V) const {
+ assert(!V->getType()->isStructTy() &&
+ "Should use getStructLatticeValueFor");
+ DenseMap<Value *, ValueLatticeElement>::const_iterator I =
+ ValueState.find(V);
+ assert(I != ValueState.end() &&
+ "V not found in ValueState nor Paramstate map!");
+ return I->second;
+ }
+
+ const MapVector<Function *, ValueLatticeElement> &getTrackedRetVals() {
+ return TrackedRetVals;
+ }
+
+ const DenseMap<GlobalVariable *, ValueLatticeElement> &getTrackedGlobals() {
+ return TrackedGlobals;
+ }
+
+ const SmallPtrSet<Function *, 16> getMRVFunctionsTracked() {
+ return MRVFunctionsTracked;
+ }
+
+ void markOverdefined(Value *V) {
+ if (auto *STy = dyn_cast<StructType>(V->getType()))
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+ markOverdefined(getStructValueState(V, i), V);
+ else
+ markOverdefined(ValueState[V], V);
+ }
+
+ bool isStructLatticeConstant(Function *F, StructType *STy);
+
+ Constant *getConstant(const ValueLatticeElement &LV) const;
+ ConstantRange getConstantRange(const ValueLatticeElement &LV, Type *Ty) const;
+
+ SmallPtrSetImpl<Function *> &getArgumentTrackedFunctions() {
+ return TrackingIncomingArguments;
+ }
+
+ void markArgInFuncSpecialization(Function *F,
+ const SmallVectorImpl<ArgInfo> &Args);
+
+ void markFunctionUnreachable(Function *F) {
+ for (auto &BB : *F)
+ BBExecutable.erase(&BB);
+ }
+
+ void solveWhileResolvedUndefsIn(Module &M) {
+ bool ResolvedUndefs = true;
+ while (ResolvedUndefs) {
+ solve();
+ ResolvedUndefs = false;
+ for (Function &F : M)
+ ResolvedUndefs |= resolvedUndefsIn(F);
+ }
+ }
+
+ void solveWhileResolvedUndefsIn(SmallVectorImpl<Function *> &WorkList) {
+ bool ResolvedUndefs = true;
+ while (ResolvedUndefs) {
+ solve();
+ ResolvedUndefs = false;
+ for (Function *F : WorkList)
+ ResolvedUndefs |= resolvedUndefsIn(*F);
+ }
+ }
+};
+
+} // namespace llvm
+
+bool SCCPInstVisitor::markBlockExecutable(BasicBlock *BB) {
+ if (!BBExecutable.insert(BB).second)
+ return false;
+ LLVM_DEBUG(dbgs() << "Marking Block Executable: " << BB->getName() << '\n');
+ BBWorkList.push_back(BB); // Add the block to the work list!
+ return true;
+}
+
+void SCCPInstVisitor::pushToWorkList(ValueLatticeElement &IV, Value *V) {
+ if (IV.isOverdefined())
+ return OverdefinedInstWorkList.push_back(V);
+ InstWorkList.push_back(V);
+}
+
+void SCCPInstVisitor::pushToWorkListMsg(ValueLatticeElement &IV, Value *V) {
+ LLVM_DEBUG(dbgs() << "updated " << IV << ": " << *V << '\n');
+ pushToWorkList(IV, V);
+}
+
+bool SCCPInstVisitor::markConstant(ValueLatticeElement &IV, Value *V,
+ Constant *C, bool MayIncludeUndef) {
+ if (!IV.markConstant(C, MayIncludeUndef))
+ return false;
+ LLVM_DEBUG(dbgs() << "markConstant: " << *C << ": " << *V << '\n');
+ pushToWorkList(IV, V);
+ return true;
+}
+
+bool SCCPInstVisitor::markOverdefined(ValueLatticeElement &IV, Value *V) {
+ if (!IV.markOverdefined())
+ return false;
+
+ LLVM_DEBUG(dbgs() << "markOverdefined: ";
+ if (auto *F = dyn_cast<Function>(V)) dbgs()
+ << "Function '" << F->getName() << "'\n";
+ else dbgs() << *V << '\n');
+ // Only instructions go on the work list
+ pushToWorkList(IV, V);
+ return true;
+}
+
+bool SCCPInstVisitor::isStructLatticeConstant(Function *F, StructType *STy) {
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ const auto &It = TrackedMultipleRetVals.find(std::make_pair(F, i));
+ assert(It != TrackedMultipleRetVals.end());
+ ValueLatticeElement LV = It->second;
+ if (!SCCPSolver::isConstant(LV))
+ return false;
+ }
+ return true;
+}
+
+Constant *SCCPInstVisitor::getConstant(const ValueLatticeElement &LV) const {
+ if (LV.isConstant())
+ return LV.getConstant();
+
+ if (LV.isConstantRange()) {
+ const auto &CR = LV.getConstantRange();
+ if (CR.getSingleElement())
+ return ConstantInt::get(Ctx, *CR.getSingleElement());
+ }
+ return nullptr;
+}
+
+ConstantRange
+SCCPInstVisitor::getConstantRange(const ValueLatticeElement &LV,
+ Type *Ty) const {
+ assert(Ty->isIntOrIntVectorTy() && "Should be int or int vector");
+ if (LV.isConstantRange())
+ return LV.getConstantRange();
+ return ConstantRange::getFull(Ty->getScalarSizeInBits());
+}
+
+void SCCPInstVisitor::markArgInFuncSpecialization(
+ Function *F, const SmallVectorImpl<ArgInfo> &Args) {
+ assert(!Args.empty() && "Specialization without arguments");
+ assert(F->arg_size() == Args[0].Formal->getParent()->arg_size() &&
+ "Functions should have the same number of arguments");
+
+ auto Iter = Args.begin();
+ Argument *NewArg = F->arg_begin();
+ Argument *OldArg = Args[0].Formal->getParent()->arg_begin();
+ for (auto End = F->arg_end(); NewArg != End; ++NewArg, ++OldArg) {
+
+ LLVM_DEBUG(dbgs() << "SCCP: Marking argument "
+ << NewArg->getNameOrAsOperand() << "\n");
+
+ if (Iter != Args.end() && OldArg == Iter->Formal) {
+ // Mark the argument constants in the new function.
+ markConstant(NewArg, Iter->Actual);
+ ++Iter;
+ } else if (ValueState.count(OldArg)) {
+ // For the remaining arguments in the new function, copy the lattice state
+ // over from the old function.
+ //
+ // Note: This previously looked like this:
+ // ValueState[NewArg] = ValueState[OldArg];
+ // This is incorrect because the DenseMap class may resize the underlying
+ // memory when inserting `NewArg`, which will invalidate the reference to
+ // `OldArg`. Instead, we make sure `NewArg` exists before setting it.
+ auto &NewValue = ValueState[NewArg];
+ NewValue = ValueState[OldArg];
+ pushToWorkList(NewValue, NewArg);
+ }
+ }
+}
+
+void SCCPInstVisitor::visitInstruction(Instruction &I) {
+ // All the instructions we don't do any special handling for just
+ // go to overdefined.
+ LLVM_DEBUG(dbgs() << "SCCP: Don't know how to handle: " << I << '\n');
+ markOverdefined(&I);
+}
+
+bool SCCPInstVisitor::mergeInValue(ValueLatticeElement &IV, Value *V,
+ ValueLatticeElement MergeWithV,
+ ValueLatticeElement::MergeOptions Opts) {
+ if (IV.mergeIn(MergeWithV, Opts)) {
+ pushToWorkList(IV, V);
+ LLVM_DEBUG(dbgs() << "Merged " << MergeWithV << " into " << *V << " : "
+ << IV << "\n");
+ return true;
+ }
+ return false;
+}
+
+bool SCCPInstVisitor::markEdgeExecutable(BasicBlock *Source, BasicBlock *Dest) {
+ if (!KnownFeasibleEdges.insert(Edge(Source, Dest)).second)
+ return false; // This edge is already known to be executable!
+
+ if (!markBlockExecutable(Dest)) {
+ // If the destination is already executable, we just made an *edge*
+ // feasible that wasn't before. Revisit the PHI nodes in the block
+ // because they have potentially new operands.
+ LLVM_DEBUG(dbgs() << "Marking Edge Executable: " << Source->getName()
+ << " -> " << Dest->getName() << '\n');
+
+ for (PHINode &PN : Dest->phis())
+ visitPHINode(PN);
+ }
+ return true;
+}
+
+// getFeasibleSuccessors - Return a vector of booleans to indicate which
+// successors are reachable from a given terminator instruction.
+void SCCPInstVisitor::getFeasibleSuccessors(Instruction &TI,
+ SmallVectorImpl<bool> &Succs) {
+ Succs.resize(TI.getNumSuccessors());
+ if (auto *BI = dyn_cast<BranchInst>(&TI)) {
+ if (BI->isUnconditional()) {
+ Succs[0] = true;
+ return;
+ }
+
+ ValueLatticeElement BCValue = getValueState(BI->getCondition());
+ ConstantInt *CI = getConstantInt(BCValue);
+ if (!CI) {
+ // Overdefined condition variables, and branches on unfoldable constant
+ // conditions, mean the branch could go either way.
+ if (!BCValue.isUnknownOrUndef())
+ Succs[0] = Succs[1] = true;
+ return;
+ }
+
+ // Constant condition variables mean the branch can only go a single way.
+ Succs[CI->isZero()] = true;
+ return;
+ }
+
+ // Unwinding instructions successors are always executable.
+ if (TI.isExceptionalTerminator()) {
+ Succs.assign(TI.getNumSuccessors(), true);
+ return;
+ }
+
+ if (auto *SI = dyn_cast<SwitchInst>(&TI)) {
+ if (!SI->getNumCases()) {
+ Succs[0] = true;
+ return;
+ }
+ const ValueLatticeElement &SCValue = getValueState(SI->getCondition());
+ if (ConstantInt *CI = getConstantInt(SCValue)) {
+ Succs[SI->findCaseValue(CI)->getSuccessorIndex()] = true;
+ return;
+ }
+
+ // TODO: Switch on undef is UB. Stop passing false once the rest of LLVM
+ // is ready.
+ if (SCValue.isConstantRange(/*UndefAllowed=*/false)) {
+ const ConstantRange &Range = SCValue.getConstantRange();
+ for (const auto &Case : SI->cases()) {
+ const APInt &CaseValue = Case.getCaseValue()->getValue();
+ if (Range.contains(CaseValue))
+ Succs[Case.getSuccessorIndex()] = true;
+ }
+
+ // TODO: Determine whether default case is reachable.
+ Succs[SI->case_default()->getSuccessorIndex()] = true;
+ return;
+ }
+
+ // Overdefined or unknown condition? All destinations are executable!
+ if (!SCValue.isUnknownOrUndef())
+ Succs.assign(TI.getNumSuccessors(), true);
+ return;
+ }
+
+ // In case of indirect branch and its address is a blockaddress, we mark
+ // the target as executable.
+ if (auto *IBR = dyn_cast<IndirectBrInst>(&TI)) {
+ // Casts are folded by visitCastInst.
+ ValueLatticeElement IBRValue = getValueState(IBR->getAddress());
+ BlockAddress *Addr = dyn_cast_or_null<BlockAddress>(getConstant(IBRValue));
+ if (!Addr) { // Overdefined or unknown condition?
+ // All destinations are executable!
+ if (!IBRValue.isUnknownOrUndef())
+ Succs.assign(TI.getNumSuccessors(), true);
+ return;
+ }
+
+ BasicBlock *T = Addr->getBasicBlock();
+ assert(Addr->getFunction() == T->getParent() &&
+ "Block address of a different function ?");
+ for (unsigned i = 0; i < IBR->getNumSuccessors(); ++i) {
+ // This is the target.
+ if (IBR->getDestination(i) == T) {
+ Succs[i] = true;
+ return;
+ }
+ }
+
+ // If we didn't find our destination in the IBR successor list, then we
+ // have undefined behavior. Its ok to assume no successor is executable.
+ return;
+ }
+
+ // In case of callbr, we pessimistically assume that all successors are
+ // feasible.
+ if (isa<CallBrInst>(&TI)) {
+ Succs.assign(TI.getNumSuccessors(), true);
+ return;
+ }
+
+ LLVM_DEBUG(dbgs() << "Unknown terminator instruction: " << TI << '\n');
+ llvm_unreachable("SCCP: Don't know how to handle this terminator!");
+}
+
+// isEdgeFeasible - Return true if the control flow edge from the 'From' basic
+// block to the 'To' basic block is currently feasible.
+bool SCCPInstVisitor::isEdgeFeasible(BasicBlock *From, BasicBlock *To) const {
+ // Check if we've called markEdgeExecutable on the edge yet. (We could
+ // be more aggressive and try to consider edges which haven't been marked
+ // yet, but there isn't any need.)
+ return KnownFeasibleEdges.count(Edge(From, To));
+}
+
+// visit Implementations - Something changed in this instruction, either an
+// operand made a transition, or the instruction is newly executable. Change
+// the value type of I to reflect these changes if appropriate. This method
+// makes sure to do the following actions:
+//
+// 1. If a phi node merges two constants in, and has conflicting value coming
+// from different branches, or if the PHI node merges in an overdefined
+// value, then the PHI node becomes overdefined.
+// 2. If a phi node merges only constants in, and they all agree on value, the
+// PHI node becomes a constant value equal to that.
+// 3. If V <- x (op) y && isConstant(x) && isConstant(y) V = Constant
+// 4. If V <- x (op) y && (isOverdefined(x) || isOverdefined(y)) V = Overdefined
+// 5. If V <- MEM or V <- CALL or V <- (unknown) then V = Overdefined
+// 6. If a conditional branch has a value that is constant, make the selected
+// destination executable
+// 7. If a conditional branch has a value that is overdefined, make all
+// successors executable.
+void SCCPInstVisitor::visitPHINode(PHINode &PN) {
+ // If this PN returns a struct, just mark the result overdefined.
+ // TODO: We could do a lot better than this if code actually uses this.
+ if (PN.getType()->isStructTy())
+ return (void)markOverdefined(&PN);
+
+ if (getValueState(&PN).isOverdefined())
+ return; // Quick exit
+
+ // Super-extra-high-degree PHI nodes are unlikely to ever be marked constant,
+ // and slow us down a lot. Just mark them overdefined.
+ if (PN.getNumIncomingValues() > 64)
+ return (void)markOverdefined(&PN);
+
+ unsigned NumActiveIncoming = 0;
+
+ // Look at all of the executable operands of the PHI node. If any of them
+ // are overdefined, the PHI becomes overdefined as well. If they are all
+ // constant, and they agree with each other, the PHI becomes the identical
+ // constant. If they are constant and don't agree, the PHI is a constant
+ // range. If there are no executable operands, the PHI remains unknown.
+ ValueLatticeElement PhiState = getValueState(&PN);
+ for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
+ if (!isEdgeFeasible(PN.getIncomingBlock(i), PN.getParent()))
+ continue;
+
+ ValueLatticeElement IV = getValueState(PN.getIncomingValue(i));
+ PhiState.mergeIn(IV);
+ NumActiveIncoming++;
+ if (PhiState.isOverdefined())
+ break;
+ }
+
+ // We allow up to 1 range extension per active incoming value and one
+ // additional extension. Note that we manually adjust the number of range
+ // extensions to match the number of active incoming values. This helps to
+ // limit multiple extensions caused by the same incoming value, if other
+ // incoming values are equal.
+ mergeInValue(&PN, PhiState,
+ ValueLatticeElement::MergeOptions().setMaxWidenSteps(
+ NumActiveIncoming + 1));
+ ValueLatticeElement &PhiStateRef = getValueState(&PN);
+ PhiStateRef.setNumRangeExtensions(
+ std::max(NumActiveIncoming, PhiStateRef.getNumRangeExtensions()));
+}
+
+void SCCPInstVisitor::visitReturnInst(ReturnInst &I) {
+ if (I.getNumOperands() == 0)
+ return; // ret void
+
+ Function *F = I.getParent()->getParent();
+ Value *ResultOp = I.getOperand(0);
+
+ // If we are tracking the return value of this function, merge it in.
+ if (!TrackedRetVals.empty() && !ResultOp->getType()->isStructTy()) {
+ auto TFRVI = TrackedRetVals.find(F);
+ if (TFRVI != TrackedRetVals.end()) {
+ mergeInValue(TFRVI->second, F, getValueState(ResultOp));
+ return;
+ }
+ }
+
+ // Handle functions that return multiple values.
+ if (!TrackedMultipleRetVals.empty()) {
+ if (auto *STy = dyn_cast<StructType>(ResultOp->getType()))
+ if (MRVFunctionsTracked.count(F))
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+ mergeInValue(TrackedMultipleRetVals[std::make_pair(F, i)], F,
+ getStructValueState(ResultOp, i));
+ }
+}
+
+void SCCPInstVisitor::visitTerminator(Instruction &TI) {
+ SmallVector<bool, 16> SuccFeasible;
+ getFeasibleSuccessors(TI, SuccFeasible);
+
+ BasicBlock *BB = TI.getParent();
+
+ // Mark all feasible successors executable.
+ for (unsigned i = 0, e = SuccFeasible.size(); i != e; ++i)
+ if (SuccFeasible[i])
+ markEdgeExecutable(BB, TI.getSuccessor(i));
+}
+
+void SCCPInstVisitor::visitCastInst(CastInst &I) {
+ // ResolvedUndefsIn might mark I as overdefined. Bail out, even if we would
+ // discover a concrete value later.
+ if (ValueState[&I].isOverdefined())
+ return;
+
+ ValueLatticeElement OpSt = getValueState(I.getOperand(0));
+ if (OpSt.isUnknownOrUndef())
+ return;
+
+ if (Constant *OpC = getConstant(OpSt)) {
+ // Fold the constant as we build.
+ Constant *C = ConstantFoldCastOperand(I.getOpcode(), OpC, I.getType(), DL);
+ markConstant(&I, C);
+ } else if (I.getDestTy()->isIntegerTy() &&
+ I.getSrcTy()->isIntOrIntVectorTy()) {
+ auto &LV = getValueState(&I);
+ ConstantRange OpRange = getConstantRange(OpSt, I.getSrcTy());
+
+ Type *DestTy = I.getDestTy();
+ // Vectors where all elements have the same known constant range are treated
+ // as a single constant range in the lattice. When bitcasting such vectors,
+ // there is a mis-match between the width of the lattice value (single
+ // constant range) and the original operands (vector). Go to overdefined in
+ // that case.
+ if (I.getOpcode() == Instruction::BitCast &&
+ I.getOperand(0)->getType()->isVectorTy() &&
+ OpRange.getBitWidth() < DL.getTypeSizeInBits(DestTy))
+ return (void)markOverdefined(&I);
+
+ ConstantRange Res =
+ OpRange.castOp(I.getOpcode(), DL.getTypeSizeInBits(DestTy));
+ mergeInValue(LV, &I, ValueLatticeElement::getRange(Res));
+ } else
+ markOverdefined(&I);
+}
+
+void SCCPInstVisitor::handleExtractOfWithOverflow(ExtractValueInst &EVI,
+ const WithOverflowInst *WO,
+ unsigned Idx) {
+ Value *LHS = WO->getLHS(), *RHS = WO->getRHS();
+ ValueLatticeElement L = getValueState(LHS);
+ ValueLatticeElement R = getValueState(RHS);
+ addAdditionalUser(LHS, &EVI);
+ addAdditionalUser(RHS, &EVI);
+ if (L.isUnknownOrUndef() || R.isUnknownOrUndef())
+ return; // Wait to resolve.
+
+ Type *Ty = LHS->getType();
+ ConstantRange LR = getConstantRange(L, Ty);
+ ConstantRange RR = getConstantRange(R, Ty);
+ if (Idx == 0) {
+ ConstantRange Res = LR.binaryOp(WO->getBinaryOp(), RR);
+ mergeInValue(&EVI, ValueLatticeElement::getRange(Res));
+ } else {
+ assert(Idx == 1 && "Index can only be 0 or 1");
+ ConstantRange NWRegion = ConstantRange::makeGuaranteedNoWrapRegion(
+ WO->getBinaryOp(), RR, WO->getNoWrapKind());
+ if (NWRegion.contains(LR))
+ return (void)markConstant(&EVI, ConstantInt::getFalse(EVI.getType()));
+ markOverdefined(&EVI);
+ }
+}
+
+void SCCPInstVisitor::visitExtractValueInst(ExtractValueInst &EVI) {
+ // If this returns a struct, mark all elements over defined, we don't track
+ // structs in structs.
+ if (EVI.getType()->isStructTy())
+ return (void)markOverdefined(&EVI);
+
+ // resolvedUndefsIn might mark I as overdefined. Bail out, even if we would
+ // discover a concrete value later.
+ if (ValueState[&EVI].isOverdefined())
+ return (void)markOverdefined(&EVI);
+
+ // If this is extracting from more than one level of struct, we don't know.
+ if (EVI.getNumIndices() != 1)
+ return (void)markOverdefined(&EVI);
+
+ Value *AggVal = EVI.getAggregateOperand();
+ if (AggVal->getType()->isStructTy()) {
+ unsigned i = *EVI.idx_begin();
+ if (auto *WO = dyn_cast<WithOverflowInst>(AggVal))
+ return handleExtractOfWithOverflow(EVI, WO, i);
+ ValueLatticeElement EltVal = getStructValueState(AggVal, i);
+ mergeInValue(getValueState(&EVI), &EVI, EltVal);
+ } else {
+ // Otherwise, must be extracting from an array.
+ return (void)markOverdefined(&EVI);
+ }
+}
+
+void SCCPInstVisitor::visitInsertValueInst(InsertValueInst &IVI) {
+ auto *STy = dyn_cast<StructType>(IVI.getType());
+ if (!STy)
+ return (void)markOverdefined(&IVI);
+
+ // resolvedUndefsIn might mark I as overdefined. Bail out, even if we would
+ // discover a concrete value later.
+ if (SCCPSolver::isOverdefined(ValueState[&IVI]))
+ return (void)markOverdefined(&IVI);
+
+ // If this has more than one index, we can't handle it, drive all results to
+ // undef.
+ if (IVI.getNumIndices() != 1)
+ return (void)markOverdefined(&IVI);
+
+ Value *Aggr = IVI.getAggregateOperand();
+ unsigned Idx = *IVI.idx_begin();
+
+ // Compute the result based on what we're inserting.
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ // This passes through all values that aren't the inserted element.
+ if (i != Idx) {
+ ValueLatticeElement EltVal = getStructValueState(Aggr, i);
+ mergeInValue(getStructValueState(&IVI, i), &IVI, EltVal);
+ continue;
+ }
+
+ Value *Val = IVI.getInsertedValueOperand();
+ if (Val->getType()->isStructTy())
+ // We don't track structs in structs.
+ markOverdefined(getStructValueState(&IVI, i), &IVI);
+ else {
+ ValueLatticeElement InVal = getValueState(Val);
+ mergeInValue(getStructValueState(&IVI, i), &IVI, InVal);
+ }
+ }
+}
+
+void SCCPInstVisitor::visitSelectInst(SelectInst &I) {
+ // If this select returns a struct, just mark the result overdefined.
+ // TODO: We could do a lot better than this if code actually uses this.
+ if (I.getType()->isStructTy())
+ return (void)markOverdefined(&I);
+
+ // resolvedUndefsIn might mark I as overdefined. Bail out, even if we would
+ // discover a concrete value later.
+ if (ValueState[&I].isOverdefined())
+ return (void)markOverdefined(&I);
+
+ ValueLatticeElement CondValue = getValueState(I.getCondition());
+ if (CondValue.isUnknownOrUndef())
+ return;
+
+ if (ConstantInt *CondCB = getConstantInt(CondValue)) {
+ Value *OpVal = CondCB->isZero() ? I.getFalseValue() : I.getTrueValue();
+ mergeInValue(&I, getValueState(OpVal));
+ return;
+ }
+
+ // Otherwise, the condition is overdefined or a constant we can't evaluate.
+ // See if we can produce something better than overdefined based on the T/F
+ // value.
+ ValueLatticeElement TVal = getValueState(I.getTrueValue());
+ ValueLatticeElement FVal = getValueState(I.getFalseValue());
+
+ bool Changed = ValueState[&I].mergeIn(TVal);
+ Changed |= ValueState[&I].mergeIn(FVal);
+ if (Changed)
+ pushToWorkListMsg(ValueState[&I], &I);
+}
+
+// Handle Unary Operators.
+void SCCPInstVisitor::visitUnaryOperator(Instruction &I) {
+ ValueLatticeElement V0State = getValueState(I.getOperand(0));
+
+ ValueLatticeElement &IV = ValueState[&I];
+ // resolvedUndefsIn might mark I as overdefined. Bail out, even if we would
+ // discover a concrete value later.
+ if (SCCPSolver::isOverdefined(IV))
+ return (void)markOverdefined(&I);
+
+ // If something is unknown/undef, wait for it to resolve.
+ if (V0State.isUnknownOrUndef())
+ return;
+
+ if (SCCPSolver::isConstant(V0State))
+ if (Constant *C = ConstantFoldUnaryOpOperand(I.getOpcode(),
+ getConstant(V0State), DL))
+ return (void)markConstant(IV, &I, C);
+
+ markOverdefined(&I);
+}
+
+// Handle Binary Operators.
+void SCCPInstVisitor::visitBinaryOperator(Instruction &I) {
+ ValueLatticeElement V1State = getValueState(I.getOperand(0));
+ ValueLatticeElement V2State = getValueState(I.getOperand(1));
+
+ ValueLatticeElement &IV = ValueState[&I];
+ if (IV.isOverdefined())
+ return;
+
+ // If something is undef, wait for it to resolve.
+ if (V1State.isUnknownOrUndef() || V2State.isUnknownOrUndef())
+ return;
+
+ if (V1State.isOverdefined() && V2State.isOverdefined())
+ return (void)markOverdefined(&I);
+
+ // If either of the operands is a constant, try to fold it to a constant.
+ // TODO: Use information from notconstant better.
+ if ((V1State.isConstant() || V2State.isConstant())) {
+ Value *V1 = SCCPSolver::isConstant(V1State) ? getConstant(V1State)
+ : I.getOperand(0);
+ Value *V2 = SCCPSolver::isConstant(V2State) ? getConstant(V2State)
+ : I.getOperand(1);
+ Value *R = simplifyBinOp(I.getOpcode(), V1, V2, SimplifyQuery(DL));
+ auto *C = dyn_cast_or_null<Constant>(R);
+ if (C) {
+ // Conservatively assume that the result may be based on operands that may
+ // be undef. Note that we use mergeInValue to combine the constant with
+ // the existing lattice value for I, as different constants might be found
+ // after one of the operands go to overdefined, e.g. due to one operand
+ // being a special floating value.
+ ValueLatticeElement NewV;
+ NewV.markConstant(C, /*MayIncludeUndef=*/true);
+ return (void)mergeInValue(&I, NewV);
+ }
+ }
+
+ // Only use ranges for binary operators on integers.
+ if (!I.getType()->isIntegerTy())
+ return markOverdefined(&I);
+
+ // Try to simplify to a constant range.
+ ConstantRange A = getConstantRange(V1State, I.getType());
+ ConstantRange B = getConstantRange(V2State, I.getType());
+ ConstantRange R = A.binaryOp(cast<BinaryOperator>(&I)->getOpcode(), B);
+ mergeInValue(&I, ValueLatticeElement::getRange(R));
+
+ // TODO: Currently we do not exploit special values that produce something
+ // better than overdefined with an overdefined operand for vector or floating
+ // point types, like and <4 x i32> overdefined, zeroinitializer.
+}
+
+// Handle ICmpInst instruction.
+void SCCPInstVisitor::visitCmpInst(CmpInst &I) {
+ // Do not cache this lookup, getValueState calls later in the function might
+ // invalidate the reference.
+ if (SCCPSolver::isOverdefined(ValueState[&I]))
+ return (void)markOverdefined(&I);
+
+ Value *Op1 = I.getOperand(0);
+ Value *Op2 = I.getOperand(1);
+
+ // For parameters, use ParamState which includes constant range info if
+ // available.
+ auto V1State = getValueState(Op1);
+ auto V2State = getValueState(Op2);
+
+ Constant *C = V1State.getCompare(I.getPredicate(), I.getType(), V2State, DL);
+ if (C) {
+ ValueLatticeElement CV;
+ CV.markConstant(C);
+ mergeInValue(&I, CV);
+ return;
+ }
+
+ // If operands are still unknown, wait for it to resolve.
+ if ((V1State.isUnknownOrUndef() || V2State.isUnknownOrUndef()) &&
+ !SCCPSolver::isConstant(ValueState[&I]))
+ return;
+
+ markOverdefined(&I);
+}
+
+// Handle getelementptr instructions. If all operands are constants then we
+// can turn this into a getelementptr ConstantExpr.
+void SCCPInstVisitor::visitGetElementPtrInst(GetElementPtrInst &I) {
+ if (SCCPSolver::isOverdefined(ValueState[&I]))
+ return (void)markOverdefined(&I);
+
+ SmallVector<Constant *, 8> Operands;
+ Operands.reserve(I.getNumOperands());
+
+ for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
+ ValueLatticeElement State = getValueState(I.getOperand(i));
+ if (State.isUnknownOrUndef())
+ return; // Operands are not resolved yet.
+
+ if (SCCPSolver::isOverdefined(State))
+ return (void)markOverdefined(&I);
+
+ if (Constant *C = getConstant(State)) {
+ Operands.push_back(C);
+ continue;
+ }
+
+ return (void)markOverdefined(&I);
+ }
+
+ Constant *Ptr = Operands[0];
+ auto Indices = ArrayRef(Operands.begin() + 1, Operands.end());
+ Constant *C =
+ ConstantExpr::getGetElementPtr(I.getSourceElementType(), Ptr, Indices);
+ markConstant(&I, C);
+}
+
+void SCCPInstVisitor::visitStoreInst(StoreInst &SI) {
+ // If this store is of a struct, ignore it.
+ if (SI.getOperand(0)->getType()->isStructTy())
+ return;
+
+ if (TrackedGlobals.empty() || !isa<GlobalVariable>(SI.getOperand(1)))
+ return;
+
+ GlobalVariable *GV = cast<GlobalVariable>(SI.getOperand(1));
+ auto I = TrackedGlobals.find(GV);
+ if (I == TrackedGlobals.end())
+ return;
+
+ // Get the value we are storing into the global, then merge it.
+ mergeInValue(I->second, GV, getValueState(SI.getOperand(0)),
+ ValueLatticeElement::MergeOptions().setCheckWiden(false));
+ if (I->second.isOverdefined())
+ TrackedGlobals.erase(I); // No need to keep tracking this!
+}
+
+static ValueLatticeElement getValueFromMetadata(const Instruction *I) {
+ if (MDNode *Ranges = I->getMetadata(LLVMContext::MD_range))
+ if (I->getType()->isIntegerTy())
+ return ValueLatticeElement::getRange(
+ getConstantRangeFromMetadata(*Ranges));
+ if (I->hasMetadata(LLVMContext::MD_nonnull))
+ return ValueLatticeElement::getNot(
+ ConstantPointerNull::get(cast<PointerType>(I->getType())));
+ return ValueLatticeElement::getOverdefined();
+}
+
+// Handle load instructions. If the operand is a constant pointer to a constant
+// global, we can replace the load with the loaded constant value!
+void SCCPInstVisitor::visitLoadInst(LoadInst &I) {
+ // If this load is of a struct or the load is volatile, just mark the result
+ // as overdefined.
+ if (I.getType()->isStructTy() || I.isVolatile())
+ return (void)markOverdefined(&I);
+
+ // resolvedUndefsIn might mark I as overdefined. Bail out, even if we would
+ // discover a concrete value later.
+ if (ValueState[&I].isOverdefined())
+ return (void)markOverdefined(&I);
+
+ ValueLatticeElement PtrVal = getValueState(I.getOperand(0));
+ if (PtrVal.isUnknownOrUndef())
+ return; // The pointer is not resolved yet!
+
+ ValueLatticeElement &IV = ValueState[&I];
+
+ if (SCCPSolver::isConstant(PtrVal)) {
+ Constant *Ptr = getConstant(PtrVal);
+
+ // load null is undefined.
+ if (isa<ConstantPointerNull>(Ptr)) {
+ if (NullPointerIsDefined(I.getFunction(), I.getPointerAddressSpace()))
+ return (void)markOverdefined(IV, &I);
+ else
+ return;
+ }
+
+ // Transform load (constant global) into the value loaded.
+ if (auto *GV = dyn_cast<GlobalVariable>(Ptr)) {
+ if (!TrackedGlobals.empty()) {
+ // If we are tracking this global, merge in the known value for it.
+ auto It = TrackedGlobals.find(GV);
+ if (It != TrackedGlobals.end()) {
+ mergeInValue(IV, &I, It->second, getMaxWidenStepsOpts());
+ return;
+ }
+ }
+ }
+
+ // Transform load from a constant into a constant if possible.
+ if (Constant *C = ConstantFoldLoadFromConstPtr(Ptr, I.getType(), DL))
+ return (void)markConstant(IV, &I, C);
+ }
+
+ // Fall back to metadata.
+ mergeInValue(&I, getValueFromMetadata(&I));
+}
+
+void SCCPInstVisitor::visitCallBase(CallBase &CB) {
+ handleCallResult(CB);
+ handleCallArguments(CB);
+}
+
+void SCCPInstVisitor::handleCallOverdefined(CallBase &CB) {
+ Function *F = CB.getCalledFunction();
+
+ // Void return and not tracking callee, just bail.
+ if (CB.getType()->isVoidTy())
+ return;
+
+ // Always mark struct return as overdefined.
+ if (CB.getType()->isStructTy())
+ return (void)markOverdefined(&CB);
+
+ // Otherwise, if we have a single return value case, and if the function is
+ // a declaration, maybe we can constant fold it.
+ if (F && F->isDeclaration() && canConstantFoldCallTo(&CB, F)) {
+ SmallVector<Constant *, 8> Operands;
+ for (const Use &A : CB.args()) {
+ if (A.get()->getType()->isStructTy())
+ return markOverdefined(&CB); // Can't handle struct args.
+ if (A.get()->getType()->isMetadataTy())
+ continue; // Carried in CB, not allowed in Operands.
+ ValueLatticeElement State = getValueState(A);
+
+ if (State.isUnknownOrUndef())
+ return; // Operands are not resolved yet.
+ if (SCCPSolver::isOverdefined(State))
+ return (void)markOverdefined(&CB);
+ assert(SCCPSolver::isConstant(State) && "Unknown state!");
+ Operands.push_back(getConstant(State));
+ }
+
+ if (SCCPSolver::isOverdefined(getValueState(&CB)))
+ return (void)markOverdefined(&CB);
+
+ // If we can constant fold this, mark the result of the call as a
+ // constant.
+ if (Constant *C = ConstantFoldCall(&CB, F, Operands, &GetTLI(*F)))
+ return (void)markConstant(&CB, C);
+ }
+
+ // Fall back to metadata.
+ mergeInValue(&CB, getValueFromMetadata(&CB));
+}
+
+void SCCPInstVisitor::handleCallArguments(CallBase &CB) {
+ Function *F = CB.getCalledFunction();
+ // If this is a local function that doesn't have its address taken, mark its
+ // entry block executable and merge in the actual arguments to the call into
+ // the formal arguments of the function.
+ if (TrackingIncomingArguments.count(F)) {
+ markBlockExecutable(&F->front());
+
+ // Propagate information from this call site into the callee.
+ auto CAI = CB.arg_begin();
+ for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E;
+ ++AI, ++CAI) {
+ // If this argument is byval, and if the function is not readonly, there
+ // will be an implicit copy formed of the input aggregate.
+ if (AI->hasByValAttr() && !F->onlyReadsMemory()) {
+ markOverdefined(&*AI);
+ continue;
+ }
+
+ if (auto *STy = dyn_cast<StructType>(AI->getType())) {
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ ValueLatticeElement CallArg = getStructValueState(*CAI, i);
+ mergeInValue(getStructValueState(&*AI, i), &*AI, CallArg,
+ getMaxWidenStepsOpts());
+ }
+ } else
+ mergeInValue(&*AI, getValueState(*CAI), getMaxWidenStepsOpts());
+ }
+ }
+}
+
+void SCCPInstVisitor::handleCallResult(CallBase &CB) {
+ Function *F = CB.getCalledFunction();
+
+ if (auto *II = dyn_cast<IntrinsicInst>(&CB)) {
+ if (II->getIntrinsicID() == Intrinsic::ssa_copy) {
+ if (ValueState[&CB].isOverdefined())
+ return;
+
+ Value *CopyOf = CB.getOperand(0);
+ ValueLatticeElement CopyOfVal = getValueState(CopyOf);
+ const auto *PI = getPredicateInfoFor(&CB);
+ assert(PI && "Missing predicate info for ssa.copy");
+
+ const std::optional<PredicateConstraint> &Constraint =
+ PI->getConstraint();
+ if (!Constraint) {
+ mergeInValue(ValueState[&CB], &CB, CopyOfVal);
+ return;
+ }
+
+ CmpInst::Predicate Pred = Constraint->Predicate;
+ Value *OtherOp = Constraint->OtherOp;
+
+ // Wait until OtherOp is resolved.
+ if (getValueState(OtherOp).isUnknown()) {
+ addAdditionalUser(OtherOp, &CB);
+ return;
+ }
+
+ ValueLatticeElement CondVal = getValueState(OtherOp);
+ ValueLatticeElement &IV = ValueState[&CB];
+ if (CondVal.isConstantRange() || CopyOfVal.isConstantRange()) {
+ auto ImposedCR =
+ ConstantRange::getFull(DL.getTypeSizeInBits(CopyOf->getType()));
+
+ // Get the range imposed by the condition.
+ if (CondVal.isConstantRange())
+ ImposedCR = ConstantRange::makeAllowedICmpRegion(
+ Pred, CondVal.getConstantRange());
+
+ // Combine range info for the original value with the new range from the
+ // condition.
+ auto CopyOfCR = getConstantRange(CopyOfVal, CopyOf->getType());
+ auto NewCR = ImposedCR.intersectWith(CopyOfCR);
+ // If the existing information is != x, do not use the information from
+ // a chained predicate, as the != x information is more likely to be
+ // helpful in practice.
+ if (!CopyOfCR.contains(NewCR) && CopyOfCR.getSingleMissingElement())
+ NewCR = CopyOfCR;
+
+ // The new range is based on a branch condition. That guarantees that
+ // neither of the compare operands can be undef in the branch targets,
+ // unless we have conditions that are always true/false (e.g. icmp ule
+ // i32, %a, i32_max). For the latter overdefined/empty range will be
+ // inferred, but the branch will get folded accordingly anyways.
+ addAdditionalUser(OtherOp, &CB);
+ mergeInValue(
+ IV, &CB,
+ ValueLatticeElement::getRange(NewCR, /*MayIncludeUndef*/ false));
+ return;
+ } else if (Pred == CmpInst::ICMP_EQ &&
+ (CondVal.isConstant() || CondVal.isNotConstant())) {
+ // For non-integer values or integer constant expressions, only
+ // propagate equal constants or not-constants.
+ addAdditionalUser(OtherOp, &CB);
+ mergeInValue(IV, &CB, CondVal);
+ return;
+ } else if (Pred == CmpInst::ICMP_NE && CondVal.isConstant()) {
+ // Propagate inequalities.
+ addAdditionalUser(OtherOp, &CB);
+ mergeInValue(IV, &CB,
+ ValueLatticeElement::getNot(CondVal.getConstant()));
+ return;
+ }
+
+ return (void)mergeInValue(IV, &CB, CopyOfVal);
+ }
+
+ if (ConstantRange::isIntrinsicSupported(II->getIntrinsicID())) {
+ // Compute result range for intrinsics supported by ConstantRange.
+ // Do this even if we don't know a range for all operands, as we may
+ // still know something about the result range, e.g. of abs(x).
+ SmallVector<ConstantRange, 2> OpRanges;
+ for (Value *Op : II->args()) {
+ const ValueLatticeElement &State = getValueState(Op);
+ OpRanges.push_back(getConstantRange(State, Op->getType()));
+ }
+
+ ConstantRange Result =
+ ConstantRange::intrinsic(II->getIntrinsicID(), OpRanges);
+ return (void)mergeInValue(II, ValueLatticeElement::getRange(Result));
+ }
+ }
+
+ // The common case is that we aren't tracking the callee, either because we
+ // are not doing interprocedural analysis or the callee is indirect, or is
+ // external. Handle these cases first.
+ if (!F || F->isDeclaration())
+ return handleCallOverdefined(CB);
+
+ // If this is a single/zero retval case, see if we're tracking the function.
+ if (auto *STy = dyn_cast<StructType>(F->getReturnType())) {
+ if (!MRVFunctionsTracked.count(F))
+ return handleCallOverdefined(CB); // Not tracking this callee.
+
+ // If we are tracking this callee, propagate the result of the function
+ // into this call site.
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+ mergeInValue(getStructValueState(&CB, i), &CB,
+ TrackedMultipleRetVals[std::make_pair(F, i)],
+ getMaxWidenStepsOpts());
+ } else {
+ auto TFRVI = TrackedRetVals.find(F);
+ if (TFRVI == TrackedRetVals.end())
+ return handleCallOverdefined(CB); // Not tracking this callee.
+
+ // If so, propagate the return value of the callee into this call result.
+ mergeInValue(&CB, TFRVI->second, getMaxWidenStepsOpts());
+ }
+}
+
+void SCCPInstVisitor::solve() {
+ // Process the work lists until they are empty!
+ while (!BBWorkList.empty() || !InstWorkList.empty() ||
+ !OverdefinedInstWorkList.empty()) {
+ // Process the overdefined instruction's work list first, which drives other
+ // things to overdefined more quickly.
+ while (!OverdefinedInstWorkList.empty()) {
+ Value *I = OverdefinedInstWorkList.pop_back_val();
+
+ LLVM_DEBUG(dbgs() << "\nPopped off OI-WL: " << *I << '\n');
+
+ // "I" got into the work list because it either made the transition from
+ // bottom to constant, or to overdefined.
+ //
+ // Anything on this worklist that is overdefined need not be visited
+ // since all of its users will have already been marked as overdefined
+ // Update all of the users of this instruction's value.
+ //
+ markUsersAsChanged(I);
+ }
+
+ // Process the instruction work list.
+ while (!InstWorkList.empty()) {
+ Value *I = InstWorkList.pop_back_val();
+
+ LLVM_DEBUG(dbgs() << "\nPopped off I-WL: " << *I << '\n');
+
+ // "I" got into the work list because it made the transition from undef to
+ // constant.
+ //
+ // Anything on this worklist that is overdefined need not be visited
+ // since all of its users will have already been marked as overdefined.
+ // Update all of the users of this instruction's value.
+ //
+ if (I->getType()->isStructTy() || !getValueState(I).isOverdefined())
+ markUsersAsChanged(I);
+ }
+
+ // Process the basic block work list.
+ while (!BBWorkList.empty()) {
+ BasicBlock *BB = BBWorkList.pop_back_val();
+
+ LLVM_DEBUG(dbgs() << "\nPopped off BBWL: " << *BB << '\n');
+
+ // Notify all instructions in this basic block that they are newly
+ // executable.
+ visit(BB);
+ }
+ }
+}
+
+/// While solving the dataflow for a function, we don't compute a result for
+/// operations with an undef operand, to allow undef to be lowered to a
+/// constant later. For example, constant folding of "zext i8 undef to i16"
+/// would result in "i16 0", and if undef is later lowered to "i8 1", then the
+/// zext result would become "i16 1" and would result into an overdefined
+/// lattice value once merged with the previous result. Not computing the
+/// result of the zext (treating undef the same as unknown) allows us to handle
+/// a later undef->constant lowering more optimally.
+///
+/// However, if the operand remains undef when the solver returns, we do need
+/// to assign some result to the instruction (otherwise we would treat it as
+/// unreachable). For simplicity, we mark any instructions that are still
+/// unknown as overdefined.
+bool SCCPInstVisitor::resolvedUndefsIn(Function &F) {
+ bool MadeChange = false;
+ for (BasicBlock &BB : F) {
+ if (!BBExecutable.count(&BB))
+ continue;
+
+ for (Instruction &I : BB) {
+ // Look for instructions which produce undef values.
+ if (I.getType()->isVoidTy())
+ continue;
+
+ if (auto *STy = dyn_cast<StructType>(I.getType())) {
+ // Only a few things that can be structs matter for undef.
+
+ // Tracked calls must never be marked overdefined in resolvedUndefsIn.
+ if (auto *CB = dyn_cast<CallBase>(&I))
+ if (Function *F = CB->getCalledFunction())
+ if (MRVFunctionsTracked.count(F))
+ continue;
+
+ // extractvalue and insertvalue don't need to be marked; they are
+ // tracked as precisely as their operands.
+ if (isa<ExtractValueInst>(I) || isa<InsertValueInst>(I))
+ continue;
+ // Send the results of everything else to overdefined. We could be
+ // more precise than this but it isn't worth bothering.
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ ValueLatticeElement &LV = getStructValueState(&I, i);
+ if (LV.isUnknown()) {
+ markOverdefined(LV, &I);
+ MadeChange = true;
+ }
+ }
+ continue;
+ }
+
+ ValueLatticeElement &LV = getValueState(&I);
+ if (!LV.isUnknown())
+ continue;
+
+ // There are two reasons a call can have an undef result
+ // 1. It could be tracked.
+ // 2. It could be constant-foldable.
+ // Because of the way we solve return values, tracked calls must
+ // never be marked overdefined in resolvedUndefsIn.
+ if (auto *CB = dyn_cast<CallBase>(&I))
+ if (Function *F = CB->getCalledFunction())
+ if (TrackedRetVals.count(F))
+ continue;
+
+ if (isa<LoadInst>(I)) {
+ // A load here means one of two things: a load of undef from a global,
+ // a load from an unknown pointer. Either way, having it return undef
+ // is okay.
+ continue;
+ }
+
+ markOverdefined(&I);
+ MadeChange = true;
+ }
+ }
+
+ LLVM_DEBUG(if (MadeChange) dbgs()
+ << "\nResolved undefs in " << F.getName() << '\n');
+
+ return MadeChange;
+}
+
+//===----------------------------------------------------------------------===//
+//
+// SCCPSolver implementations
+//
+SCCPSolver::SCCPSolver(
+ const DataLayout &DL,
+ std::function<const TargetLibraryInfo &(Function &)> GetTLI,
+ LLVMContext &Ctx)
+ : Visitor(new SCCPInstVisitor(DL, std::move(GetTLI), Ctx)) {}
+
+SCCPSolver::~SCCPSolver() = default;
+
+void SCCPSolver::addAnalysis(Function &F, AnalysisResultsForFn A) {
+ return Visitor->addAnalysis(F, std::move(A));
+}
+
+bool SCCPSolver::markBlockExecutable(BasicBlock *BB) {
+ return Visitor->markBlockExecutable(BB);
+}
+
+const PredicateBase *SCCPSolver::getPredicateInfoFor(Instruction *I) {
+ return Visitor->getPredicateInfoFor(I);
+}
+
+const LoopInfo &SCCPSolver::getLoopInfo(Function &F) {
+ return Visitor->getLoopInfo(F);
+}
+
+DomTreeUpdater SCCPSolver::getDTU(Function &F) { return Visitor->getDTU(F); }
+
+void SCCPSolver::trackValueOfGlobalVariable(GlobalVariable *GV) {
+ Visitor->trackValueOfGlobalVariable(GV);
+}
+
+void SCCPSolver::addTrackedFunction(Function *F) {
+ Visitor->addTrackedFunction(F);
+}
+
+void SCCPSolver::addToMustPreserveReturnsInFunctions(Function *F) {
+ Visitor->addToMustPreserveReturnsInFunctions(F);
+}
+
+bool SCCPSolver::mustPreserveReturn(Function *F) {
+ return Visitor->mustPreserveReturn(F);
+}
+
+void SCCPSolver::addArgumentTrackedFunction(Function *F) {
+ Visitor->addArgumentTrackedFunction(F);
+}
+
+bool SCCPSolver::isArgumentTrackedFunction(Function *F) {
+ return Visitor->isArgumentTrackedFunction(F);
+}
+
+void SCCPSolver::solve() { Visitor->solve(); }
+
+bool SCCPSolver::resolvedUndefsIn(Function &F) {
+ return Visitor->resolvedUndefsIn(F);
+}
+
+void SCCPSolver::solveWhileResolvedUndefsIn(Module &M) {
+ Visitor->solveWhileResolvedUndefsIn(M);
+}
+
+void
+SCCPSolver::solveWhileResolvedUndefsIn(SmallVectorImpl<Function *> &WorkList) {
+ Visitor->solveWhileResolvedUndefsIn(WorkList);
+}
+
+bool SCCPSolver::isBlockExecutable(BasicBlock *BB) const {
+ return Visitor->isBlockExecutable(BB);
+}
+
+bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) const {
+ return Visitor->isEdgeFeasible(From, To);
+}
+
+std::vector<ValueLatticeElement>
+SCCPSolver::getStructLatticeValueFor(Value *V) const {
+ return Visitor->getStructLatticeValueFor(V);
+}
+
+void SCCPSolver::removeLatticeValueFor(Value *V) {
+ return Visitor->removeLatticeValueFor(V);
+}
+
+const ValueLatticeElement &SCCPSolver::getLatticeValueFor(Value *V) const {
+ return Visitor->getLatticeValueFor(V);
+}
+
+const MapVector<Function *, ValueLatticeElement> &
+SCCPSolver::getTrackedRetVals() {
+ return Visitor->getTrackedRetVals();
+}
+
+const DenseMap<GlobalVariable *, ValueLatticeElement> &
+SCCPSolver::getTrackedGlobals() {
+ return Visitor->getTrackedGlobals();
+}
+
+const SmallPtrSet<Function *, 16> SCCPSolver::getMRVFunctionsTracked() {
+ return Visitor->getMRVFunctionsTracked();
+}
+
+void SCCPSolver::markOverdefined(Value *V) { Visitor->markOverdefined(V); }
+
+bool SCCPSolver::isStructLatticeConstant(Function *F, StructType *STy) {
+ return Visitor->isStructLatticeConstant(F, STy);
+}
+
+Constant *SCCPSolver::getConstant(const ValueLatticeElement &LV) const {
+ return Visitor->getConstant(LV);
+}
+
+SmallPtrSetImpl<Function *> &SCCPSolver::getArgumentTrackedFunctions() {
+ return Visitor->getArgumentTrackedFunctions();
+}
+
+void SCCPSolver::markArgInFuncSpecialization(
+ Function *F, const SmallVectorImpl<ArgInfo> &Args) {
+ Visitor->markArgInFuncSpecialization(F, Args);
+}
+
+void SCCPSolver::markFunctionUnreachable(Function *F) {
+ Visitor->markFunctionUnreachable(F);
+}
+
+void SCCPSolver::visit(Instruction *I) { Visitor->visit(I); }
+
+void SCCPSolver::visitCall(CallInst &I) { Visitor->visitCall(I); }
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/SSAUpdater.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/SSAUpdater.cpp
new file mode 100644
index 0000000000..2520aa5d9d
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/SSAUpdater.cpp
@@ -0,0 +1,482 @@
+//===- SSAUpdater.cpp - Unstructured SSA Update Tool ----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SSAUpdater class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/TinyPtrVector.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/SSAUpdaterImpl.h"
+#include <cassert>
+#include <utility>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ssaupdater"
+
+using AvailableValsTy = DenseMap<BasicBlock *, Value *>;
+
+static AvailableValsTy &getAvailableVals(void *AV) {
+ return *static_cast<AvailableValsTy*>(AV);
+}
+
+SSAUpdater::SSAUpdater(SmallVectorImpl<PHINode *> *NewPHI)
+ : InsertedPHIs(NewPHI) {}
+
+SSAUpdater::~SSAUpdater() {
+ delete static_cast<AvailableValsTy*>(AV);
+}
+
+void SSAUpdater::Initialize(Type *Ty, StringRef Name) {
+ if (!AV)
+ AV = new AvailableValsTy();
+ else
+ getAvailableVals(AV).clear();
+ ProtoType = Ty;
+ ProtoName = std::string(Name);
+}
+
+bool SSAUpdater::HasValueForBlock(BasicBlock *BB) const {
+ return getAvailableVals(AV).count(BB);
+}
+
+Value *SSAUpdater::FindValueForBlock(BasicBlock *BB) const {
+ return getAvailableVals(AV).lookup(BB);
+}
+
+void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) {
+ assert(ProtoType && "Need to initialize SSAUpdater");
+ assert(ProtoType == V->getType() &&
+ "All rewritten values must have the same type");
+ getAvailableVals(AV)[BB] = V;
+}
+
+static bool IsEquivalentPHI(PHINode *PHI,
+ SmallDenseMap<BasicBlock *, Value *, 8> &ValueMapping) {
+ unsigned PHINumValues = PHI->getNumIncomingValues();
+ if (PHINumValues != ValueMapping.size())
+ return false;
+
+ // Scan the phi to see if it matches.
+ for (unsigned i = 0, e = PHINumValues; i != e; ++i)
+ if (ValueMapping[PHI->getIncomingBlock(i)] !=
+ PHI->getIncomingValue(i)) {
+ return false;
+ }
+
+ return true;
+}
+
+Value *SSAUpdater::GetValueAtEndOfBlock(BasicBlock *BB) {
+ Value *Res = GetValueAtEndOfBlockInternal(BB);
+ return Res;
+}
+
+Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
+ // If there is no definition of the renamed variable in this block, just use
+ // GetValueAtEndOfBlock to do our work.
+ if (!HasValueForBlock(BB))
+ return GetValueAtEndOfBlock(BB);
+
+ // Otherwise, we have the hard case. Get the live-in values for each
+ // predecessor.
+ SmallVector<std::pair<BasicBlock *, Value *>, 8> PredValues;
+ Value *SingularValue = nullptr;
+
+ // We can get our predecessor info by walking the pred_iterator list, but it
+ // is relatively slow. If we already have PHI nodes in this block, walk one
+ // of them to get the predecessor list instead.
+ if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) {
+ for (unsigned i = 0, e = SomePhi->getNumIncomingValues(); i != e; ++i) {
+ BasicBlock *PredBB = SomePhi->getIncomingBlock(i);
+ Value *PredVal = GetValueAtEndOfBlock(PredBB);
+ PredValues.push_back(std::make_pair(PredBB, PredVal));
+
+ // Compute SingularValue.
+ if (i == 0)
+ SingularValue = PredVal;
+ else if (PredVal != SingularValue)
+ SingularValue = nullptr;
+ }
+ } else {
+ bool isFirstPred = true;
+ for (BasicBlock *PredBB : predecessors(BB)) {
+ Value *PredVal = GetValueAtEndOfBlock(PredBB);
+ PredValues.push_back(std::make_pair(PredBB, PredVal));
+
+ // Compute SingularValue.
+ if (isFirstPred) {
+ SingularValue = PredVal;
+ isFirstPred = false;
+ } else if (PredVal != SingularValue)
+ SingularValue = nullptr;
+ }
+ }
+
+ // If there are no predecessors, just return undef.
+ if (PredValues.empty())
+ return UndefValue::get(ProtoType);
+
+ // Otherwise, if all the merged values are the same, just use it.
+ if (SingularValue)
+ return SingularValue;
+
+ // Otherwise, we do need a PHI: check to see if we already have one available
+ // in this block that produces the right value.
+ if (isa<PHINode>(BB->begin())) {
+ SmallDenseMap<BasicBlock *, Value *, 8> ValueMapping(PredValues.begin(),
+ PredValues.end());
+ for (PHINode &SomePHI : BB->phis()) {
+ if (IsEquivalentPHI(&SomePHI, ValueMapping))
+ return &SomePHI;
+ }
+ }
+
+ // Ok, we have no way out, insert a new one now.
+ PHINode *InsertedPHI = PHINode::Create(ProtoType, PredValues.size(),
+ ProtoName, &BB->front());
+
+ // Fill in all the predecessors of the PHI.
+ for (const auto &PredValue : PredValues)
+ InsertedPHI->addIncoming(PredValue.second, PredValue.first);
+
+ // See if the PHI node can be merged to a single value. This can happen in
+ // loop cases when we get a PHI of itself and one other value.
+ if (Value *V =
+ simplifyInstruction(InsertedPHI, BB->getModule()->getDataLayout())) {
+ InsertedPHI->eraseFromParent();
+ return V;
+ }
+
+ // Set the DebugLoc of the inserted PHI, if available.
+ DebugLoc DL;
+ if (const Instruction *I = BB->getFirstNonPHI())
+ DL = I->getDebugLoc();
+ InsertedPHI->setDebugLoc(DL);
+
+ // If the client wants to know about all new instructions, tell it.
+ if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI);
+
+ LLVM_DEBUG(dbgs() << " Inserted PHI: " << *InsertedPHI << "\n");
+ return InsertedPHI;
+}
+
+void SSAUpdater::RewriteUse(Use &U) {
+ Instruction *User = cast<Instruction>(U.getUser());
+
+ Value *V;
+ if (PHINode *UserPN = dyn_cast<PHINode>(User))
+ V = GetValueAtEndOfBlock(UserPN->getIncomingBlock(U));
+ else
+ V = GetValueInMiddleOfBlock(User->getParent());
+
+ U.set(V);
+}
+
+void SSAUpdater::RewriteUseAfterInsertions(Use &U) {
+ Instruction *User = cast<Instruction>(U.getUser());
+
+ Value *V;
+ if (PHINode *UserPN = dyn_cast<PHINode>(User))
+ V = GetValueAtEndOfBlock(UserPN->getIncomingBlock(U));
+ else
+ V = GetValueAtEndOfBlock(User->getParent());
+
+ U.set(V);
+}
+
+namespace llvm {
+
+template<>
+class SSAUpdaterTraits<SSAUpdater> {
+public:
+ using BlkT = BasicBlock;
+ using ValT = Value *;
+ using PhiT = PHINode;
+ using BlkSucc_iterator = succ_iterator;
+
+ static BlkSucc_iterator BlkSucc_begin(BlkT *BB) { return succ_begin(BB); }
+ static BlkSucc_iterator BlkSucc_end(BlkT *BB) { return succ_end(BB); }
+
+ class PHI_iterator {
+ private:
+ PHINode *PHI;
+ unsigned idx;
+
+ public:
+ explicit PHI_iterator(PHINode *P) // begin iterator
+ : PHI(P), idx(0) {}
+ PHI_iterator(PHINode *P, bool) // end iterator
+ : PHI(P), idx(PHI->getNumIncomingValues()) {}
+
+ PHI_iterator &operator++() { ++idx; return *this; }
+ bool operator==(const PHI_iterator& x) const { return idx == x.idx; }
+ bool operator!=(const PHI_iterator& x) const { return !operator==(x); }
+
+ Value *getIncomingValue() { return PHI->getIncomingValue(idx); }
+ BasicBlock *getIncomingBlock() { return PHI->getIncomingBlock(idx); }
+ };
+
+ static PHI_iterator PHI_begin(PhiT *PHI) { return PHI_iterator(PHI); }
+ static PHI_iterator PHI_end(PhiT *PHI) {
+ return PHI_iterator(PHI, true);
+ }
+
+ /// FindPredecessorBlocks - Put the predecessors of Info->BB into the Preds
+ /// vector, set Info->NumPreds, and allocate space in Info->Preds.
+ static void FindPredecessorBlocks(BasicBlock *BB,
+ SmallVectorImpl<BasicBlock *> *Preds) {
+ // We can get our predecessor info by walking the pred_iterator list,
+ // but it is relatively slow. If we already have PHI nodes in this
+ // block, walk one of them to get the predecessor list instead.
+ if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin()))
+ append_range(*Preds, SomePhi->blocks());
+ else
+ append_range(*Preds, predecessors(BB));
+ }
+
+ /// GetUndefVal - Get an undefined value of the same type as the value
+ /// being handled.
+ static Value *GetUndefVal(BasicBlock *BB, SSAUpdater *Updater) {
+ return UndefValue::get(Updater->ProtoType);
+ }
+
+ /// CreateEmptyPHI - Create a new PHI instruction in the specified block.
+ /// Reserve space for the operands but do not fill them in yet.
+ static Value *CreateEmptyPHI(BasicBlock *BB, unsigned NumPreds,
+ SSAUpdater *Updater) {
+ PHINode *PHI = PHINode::Create(Updater->ProtoType, NumPreds,
+ Updater->ProtoName, &BB->front());
+ return PHI;
+ }
+
+ /// AddPHIOperand - Add the specified value as an operand of the PHI for
+ /// the specified predecessor block.
+ static void AddPHIOperand(PHINode *PHI, Value *Val, BasicBlock *Pred) {
+ PHI->addIncoming(Val, Pred);
+ }
+
+ /// ValueIsPHI - Check if a value is a PHI.
+ static PHINode *ValueIsPHI(Value *Val, SSAUpdater *Updater) {
+ return dyn_cast<PHINode>(Val);
+ }
+
+ /// ValueIsNewPHI - Like ValueIsPHI but also check if the PHI has no source
+ /// operands, i.e., it was just added.
+ static PHINode *ValueIsNewPHI(Value *Val, SSAUpdater *Updater) {
+ PHINode *PHI = ValueIsPHI(Val, Updater);
+ if (PHI && PHI->getNumIncomingValues() == 0)
+ return PHI;
+ return nullptr;
+ }
+
+ /// GetPHIValue - For the specified PHI instruction, return the value
+ /// that it defines.
+ static Value *GetPHIValue(PHINode *PHI) {
+ return PHI;
+ }
+};
+
+} // end namespace llvm
+
+/// Check to see if AvailableVals has an entry for the specified BB and if so,
+/// return it. If not, construct SSA form by first calculating the required
+/// placement of PHIs and then inserting new PHIs where needed.
+Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
+ AvailableValsTy &AvailableVals = getAvailableVals(AV);
+ if (Value *V = AvailableVals[BB])
+ return V;
+
+ SSAUpdaterImpl<SSAUpdater> Impl(this, &AvailableVals, InsertedPHIs);
+ return Impl.GetValue(BB);
+}
+
+//===----------------------------------------------------------------------===//
+// LoadAndStorePromoter Implementation
+//===----------------------------------------------------------------------===//
+
+LoadAndStorePromoter::
+LoadAndStorePromoter(ArrayRef<const Instruction *> Insts,
+ SSAUpdater &S, StringRef BaseName) : SSA(S) {
+ if (Insts.empty()) return;
+
+ const Value *SomeVal;
+ if (const LoadInst *LI = dyn_cast<LoadInst>(Insts[0]))
+ SomeVal = LI;
+ else
+ SomeVal = cast<StoreInst>(Insts[0])->getOperand(0);
+
+ if (BaseName.empty())
+ BaseName = SomeVal->getName();
+ SSA.Initialize(SomeVal->getType(), BaseName);
+}
+
+void LoadAndStorePromoter::run(const SmallVectorImpl<Instruction *> &Insts) {
+ // First step: bucket up uses of the alloca by the block they occur in.
+ // This is important because we have to handle multiple defs/uses in a block
+ // ourselves: SSAUpdater is purely for cross-block references.
+ DenseMap<BasicBlock *, TinyPtrVector<Instruction *>> UsesByBlock;
+
+ for (Instruction *User : Insts)
+ UsesByBlock[User->getParent()].push_back(User);
+
+ // Okay, now we can iterate over all the blocks in the function with uses,
+ // processing them. Keep track of which loads are loading a live-in value.
+ // Walk the uses in the use-list order to be determinstic.
+ SmallVector<LoadInst *, 32> LiveInLoads;
+ DenseMap<Value *, Value *> ReplacedLoads;
+
+ for (Instruction *User : Insts) {
+ BasicBlock *BB = User->getParent();
+ TinyPtrVector<Instruction *> &BlockUses = UsesByBlock[BB];
+
+ // If this block has already been processed, ignore this repeat use.
+ if (BlockUses.empty()) continue;
+
+ // Okay, this is the first use in the block. If this block just has a
+ // single user in it, we can rewrite it trivially.
+ if (BlockUses.size() == 1) {
+ // If it is a store, it is a trivial def of the value in the block.
+ if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+ updateDebugInfo(SI);
+ SSA.AddAvailableValue(BB, SI->getOperand(0));
+ } else
+ // Otherwise it is a load, queue it to rewrite as a live-in load.
+ LiveInLoads.push_back(cast<LoadInst>(User));
+ BlockUses.clear();
+ continue;
+ }
+
+ // Otherwise, check to see if this block is all loads.
+ bool HasStore = false;
+ for (Instruction *I : BlockUses) {
+ if (isa<StoreInst>(I)) {
+ HasStore = true;
+ break;
+ }
+ }
+
+ // If so, we can queue them all as live in loads. We don't have an
+ // efficient way to tell which on is first in the block and don't want to
+ // scan large blocks, so just add all loads as live ins.
+ if (!HasStore) {
+ for (Instruction *I : BlockUses)
+ LiveInLoads.push_back(cast<LoadInst>(I));
+ BlockUses.clear();
+ continue;
+ }
+
+ // Otherwise, we have mixed loads and stores (or just a bunch of stores).
+ // Since SSAUpdater is purely for cross-block values, we need to determine
+ // the order of these instructions in the block. If the first use in the
+ // block is a load, then it uses the live in value. The last store defines
+ // the live out value. We handle this by doing a linear scan of the block.
+ Value *StoredValue = nullptr;
+ for (Instruction &I : *BB) {
+ if (LoadInst *L = dyn_cast<LoadInst>(&I)) {
+ // If this is a load from an unrelated pointer, ignore it.
+ if (!isInstInList(L, Insts)) continue;
+
+ // If we haven't seen a store yet, this is a live in use, otherwise
+ // use the stored value.
+ if (StoredValue) {
+ replaceLoadWithValue(L, StoredValue);
+ L->replaceAllUsesWith(StoredValue);
+ ReplacedLoads[L] = StoredValue;
+ } else {
+ LiveInLoads.push_back(L);
+ }
+ continue;
+ }
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
+ // If this is a store to an unrelated pointer, ignore it.
+ if (!isInstInList(SI, Insts)) continue;
+ updateDebugInfo(SI);
+
+ // Remember that this is the active value in the block.
+ StoredValue = SI->getOperand(0);
+ }
+ }
+
+ // The last stored value that happened is the live-out for the block.
+ assert(StoredValue && "Already checked that there is a store in block");
+ SSA.AddAvailableValue(BB, StoredValue);
+ BlockUses.clear();
+ }
+
+ // Okay, now we rewrite all loads that use live-in values in the loop,
+ // inserting PHI nodes as necessary.
+ for (LoadInst *ALoad : LiveInLoads) {
+ Value *NewVal = SSA.GetValueInMiddleOfBlock(ALoad->getParent());
+ replaceLoadWithValue(ALoad, NewVal);
+
+ // Avoid assertions in unreachable code.
+ if (NewVal == ALoad) NewVal = PoisonValue::get(NewVal->getType());
+ ALoad->replaceAllUsesWith(NewVal);
+ ReplacedLoads[ALoad] = NewVal;
+ }
+
+ // Allow the client to do stuff before we start nuking things.
+ doExtraRewritesBeforeFinalDeletion();
+
+ // Now that everything is rewritten, delete the old instructions from the
+ // function. They should all be dead now.
+ for (Instruction *User : Insts) {
+ if (!shouldDelete(User))
+ continue;
+
+ // If this is a load that still has uses, then the load must have been added
+ // as a live value in the SSAUpdate data structure for a block (e.g. because
+ // the loaded value was stored later). In this case, we need to recursively
+ // propagate the updates until we get to the real value.
+ if (!User->use_empty()) {
+ Value *NewVal = ReplacedLoads[User];
+ assert(NewVal && "not a replaced load?");
+
+ // Propagate down to the ultimate replacee. The intermediately loads
+ // could theoretically already have been deleted, so we don't want to
+ // dereference the Value*'s.
+ DenseMap<Value*, Value*>::iterator RLI = ReplacedLoads.find(NewVal);
+ while (RLI != ReplacedLoads.end()) {
+ NewVal = RLI->second;
+ RLI = ReplacedLoads.find(NewVal);
+ }
+
+ replaceLoadWithValue(cast<LoadInst>(User), NewVal);
+ User->replaceAllUsesWith(NewVal);
+ }
+
+ instructionDeleted(User);
+ User->eraseFromParent();
+ }
+}
+
+bool
+LoadAndStorePromoter::isInstInList(Instruction *I,
+ const SmallVectorImpl<Instruction *> &Insts)
+ const {
+ return is_contained(Insts, I);
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/SSAUpdaterBulk.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/SSAUpdaterBulk.cpp
new file mode 100644
index 0000000000..cad7ff64c0
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/SSAUpdaterBulk.cpp
@@ -0,0 +1,184 @@
+//===- SSAUpdaterBulk.cpp - Unstructured SSA Update Tool ------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SSAUpdaterBulk class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/SSAUpdaterBulk.h"
+#include "llvm/Analysis/IteratedDominanceFrontier.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/Value.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ssaupdaterbulk"
+
+/// Helper function for finding a block which should have a value for the given
+/// user. For PHI-nodes this block is the corresponding predecessor, for other
+/// instructions it's their parent block.
+static BasicBlock *getUserBB(Use *U) {
+ auto *User = cast<Instruction>(U->getUser());
+
+ if (auto *UserPN = dyn_cast<PHINode>(User))
+ return UserPN->getIncomingBlock(*U);
+ else
+ return User->getParent();
+}
+
+/// Add a new variable to the SSA rewriter. This needs to be called before
+/// AddAvailableValue or AddUse calls.
+unsigned SSAUpdaterBulk::AddVariable(StringRef Name, Type *Ty) {
+ unsigned Var = Rewrites.size();
+ LLVM_DEBUG(dbgs() << "SSAUpdater: Var=" << Var << ": initialized with Ty = "
+ << *Ty << ", Name = " << Name << "\n");
+ RewriteInfo RI(Name, Ty);
+ Rewrites.push_back(RI);
+ return Var;
+}
+
+/// Indicate that a rewritten value is available in the specified block with the
+/// specified value.
+void SSAUpdaterBulk::AddAvailableValue(unsigned Var, BasicBlock *BB, Value *V) {
+ assert(Var < Rewrites.size() && "Variable not found!");
+ LLVM_DEBUG(dbgs() << "SSAUpdater: Var=" << Var
+ << ": added new available value " << *V << " in "
+ << BB->getName() << "\n");
+ Rewrites[Var].Defines[BB] = V;
+}
+
+/// Record a use of the symbolic value. This use will be updated with a
+/// rewritten value when RewriteAllUses is called.
+void SSAUpdaterBulk::AddUse(unsigned Var, Use *U) {
+ assert(Var < Rewrites.size() && "Variable not found!");
+ LLVM_DEBUG(dbgs() << "SSAUpdater: Var=" << Var << ": added a use" << *U->get()
+ << " in " << getUserBB(U)->getName() << "\n");
+ Rewrites[Var].Uses.push_back(U);
+}
+
+// Compute value at the given block BB. We either should already know it, or we
+// should be able to recursively reach it going up dominator tree.
+Value *SSAUpdaterBulk::computeValueAt(BasicBlock *BB, RewriteInfo &R,
+ DominatorTree *DT) {
+ if (!R.Defines.count(BB)) {
+ if (DT->isReachableFromEntry(BB) && PredCache.get(BB).size()) {
+ BasicBlock *IDom = DT->getNode(BB)->getIDom()->getBlock();
+ Value *V = computeValueAt(IDom, R, DT);
+ R.Defines[BB] = V;
+ } else
+ R.Defines[BB] = UndefValue::get(R.Ty);
+ }
+ return R.Defines[BB];
+}
+
+/// Given sets of UsingBlocks and DefBlocks, compute the set of LiveInBlocks.
+/// This is basically a subgraph limited by DefBlocks and UsingBlocks.
+static void
+ComputeLiveInBlocks(const SmallPtrSetImpl<BasicBlock *> &UsingBlocks,
+ const SmallPtrSetImpl<BasicBlock *> &DefBlocks,
+ SmallPtrSetImpl<BasicBlock *> &LiveInBlocks,
+ PredIteratorCache &PredCache) {
+ // To determine liveness, we must iterate through the predecessors of blocks
+ // where the def is live. Blocks are added to the worklist if we need to
+ // check their predecessors. Start with all the using blocks.
+ SmallVector<BasicBlock *, 64> LiveInBlockWorklist(UsingBlocks.begin(),
+ UsingBlocks.end());
+
+ // Now that we have a set of blocks where the phi is live-in, recursively add
+ // their predecessors until we find the full region the value is live.
+ while (!LiveInBlockWorklist.empty()) {
+ BasicBlock *BB = LiveInBlockWorklist.pop_back_val();
+
+ // The block really is live in here, insert it into the set. If already in
+ // the set, then it has already been processed.
+ if (!LiveInBlocks.insert(BB).second)
+ continue;
+
+ // Since the value is live into BB, it is either defined in a predecessor or
+ // live into it to. Add the preds to the worklist unless they are a
+ // defining block.
+ for (BasicBlock *P : PredCache.get(BB)) {
+ // The value is not live into a predecessor if it defines the value.
+ if (DefBlocks.count(P))
+ continue;
+
+ // Otherwise it is, add to the worklist.
+ LiveInBlockWorklist.push_back(P);
+ }
+ }
+}
+
+/// Perform all the necessary updates, including new PHI-nodes insertion and the
+/// requested uses update.
+void SSAUpdaterBulk::RewriteAllUses(DominatorTree *DT,
+ SmallVectorImpl<PHINode *> *InsertedPHIs) {
+ for (auto &R : Rewrites) {
+ // Compute locations for new phi-nodes.
+ // For that we need to initialize DefBlocks from definitions in R.Defines,
+ // UsingBlocks from uses in R.Uses, then compute LiveInBlocks, and then use
+ // this set for computing iterated dominance frontier (IDF).
+ // The IDF blocks are the blocks where we need to insert new phi-nodes.
+ ForwardIDFCalculator IDF(*DT);
+ LLVM_DEBUG(dbgs() << "SSAUpdater: rewriting " << R.Uses.size()
+ << " use(s)\n");
+
+ SmallPtrSet<BasicBlock *, 2> DefBlocks;
+ for (auto &Def : R.Defines)
+ DefBlocks.insert(Def.first);
+ IDF.setDefiningBlocks(DefBlocks);
+
+ SmallPtrSet<BasicBlock *, 2> UsingBlocks;
+ for (Use *U : R.Uses)
+ UsingBlocks.insert(getUserBB(U));
+
+ SmallVector<BasicBlock *, 32> IDFBlocks;
+ SmallPtrSet<BasicBlock *, 32> LiveInBlocks;
+ ComputeLiveInBlocks(UsingBlocks, DefBlocks, LiveInBlocks, PredCache);
+ IDF.resetLiveInBlocks();
+ IDF.setLiveInBlocks(LiveInBlocks);
+ IDF.calculate(IDFBlocks);
+
+ // We've computed IDF, now insert new phi-nodes there.
+ SmallVector<PHINode *, 4> InsertedPHIsForVar;
+ for (auto *FrontierBB : IDFBlocks) {
+ IRBuilder<> B(FrontierBB, FrontierBB->begin());
+ PHINode *PN = B.CreatePHI(R.Ty, 0, R.Name);
+ R.Defines[FrontierBB] = PN;
+ InsertedPHIsForVar.push_back(PN);
+ if (InsertedPHIs)
+ InsertedPHIs->push_back(PN);
+ }
+
+ // Fill in arguments of the inserted PHIs.
+ for (auto *PN : InsertedPHIsForVar) {
+ BasicBlock *PBB = PN->getParent();
+ for (BasicBlock *Pred : PredCache.get(PBB))
+ PN->addIncoming(computeValueAt(Pred, R, DT), Pred);
+ }
+
+ // Rewrite actual uses with the inserted definitions.
+ SmallPtrSet<Use *, 4> ProcessedUses;
+ for (Use *U : R.Uses) {
+ if (!ProcessedUses.insert(U).second)
+ continue;
+ Value *V = computeValueAt(getUserBB(U), R, DT);
+ Value *OldVal = U->get();
+ assert(OldVal && "Invalid use!");
+ // Notify that users of the existing value that it is being replaced.
+ if (OldVal != V && OldVal->hasValueHandle())
+ ValueHandleBase::ValueIsRAUWd(OldVal, V);
+ LLVM_DEBUG(dbgs() << "SSAUpdater: replacing " << *OldVal << " with " << *V
+ << "\n");
+ U->set(V);
+ }
+ }
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/SampleProfileInference.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/SampleProfileInference.cpp
new file mode 100644
index 0000000000..691ee00bd8
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/SampleProfileInference.cpp
@@ -0,0 +1,1347 @@
+//===- SampleProfileInference.cpp - Adjust sample profiles in the IR ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a profile inference algorithm. Given an incomplete and
+// possibly imprecise block counts, the algorithm reconstructs realistic block
+// and edge counts that satisfy flow conservation rules, while minimally modify
+// input block counts.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/SampleProfileInference.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include <queue>
+#include <set>
+#include <stack>
+
+using namespace llvm;
+#define DEBUG_TYPE "sample-profile-inference"
+
+namespace {
+
+static cl::opt<bool> SampleProfileEvenFlowDistribution(
+ "sample-profile-even-flow-distribution", cl::init(true), cl::Hidden,
+ cl::desc("Try to evenly distribute flow when there are multiple equally "
+ "likely options."));
+
+static cl::opt<bool> SampleProfileRebalanceUnknown(
+ "sample-profile-rebalance-unknown", cl::init(true), cl::Hidden,
+ cl::desc("Evenly re-distribute flow among unknown subgraphs."));
+
+static cl::opt<bool> SampleProfileJoinIslands(
+ "sample-profile-join-islands", cl::init(true), cl::Hidden,
+ cl::desc("Join isolated components having positive flow."));
+
+static cl::opt<unsigned> SampleProfileProfiCostBlockInc(
+ "sample-profile-profi-cost-block-inc", cl::init(10), cl::Hidden,
+ cl::desc("The cost of increasing a block's count by one."));
+
+static cl::opt<unsigned> SampleProfileProfiCostBlockDec(
+ "sample-profile-profi-cost-block-dec", cl::init(20), cl::Hidden,
+ cl::desc("The cost of decreasing a block's count by one."));
+
+static cl::opt<unsigned> SampleProfileProfiCostBlockEntryInc(
+ "sample-profile-profi-cost-block-entry-inc", cl::init(40), cl::Hidden,
+ cl::desc("The cost of increasing the entry block's count by one."));
+
+static cl::opt<unsigned> SampleProfileProfiCostBlockEntryDec(
+ "sample-profile-profi-cost-block-entry-dec", cl::init(10), cl::Hidden,
+ cl::desc("The cost of decreasing the entry block's count by one."));
+
+static cl::opt<unsigned> SampleProfileProfiCostBlockZeroInc(
+ "sample-profile-profi-cost-block-zero-inc", cl::init(11), cl::Hidden,
+ cl::desc("The cost of increasing a count of zero-weight block by one."));
+
+static cl::opt<unsigned> SampleProfileProfiCostBlockUnknownInc(
+ "sample-profile-profi-cost-block-unknown-inc", cl::init(0), cl::Hidden,
+ cl::desc("The cost of increasing an unknown block's count by one."));
+
+/// A value indicating an infinite flow/capacity/weight of a block/edge.
+/// Not using numeric_limits<int64_t>::max(), as the values can be summed up
+/// during the execution.
+static constexpr int64_t INF = ((int64_t)1) << 50;
+
+/// The minimum-cost maximum flow algorithm.
+///
+/// The algorithm finds the maximum flow of minimum cost on a given (directed)
+/// network using a modified version of the classical Moore-Bellman-Ford
+/// approach. The algorithm applies a number of augmentation iterations in which
+/// flow is sent along paths of positive capacity from the source to the sink.
+/// The worst-case time complexity of the implementation is O(v(f)*m*n), where
+/// where m is the number of edges, n is the number of vertices, and v(f) is the
+/// value of the maximum flow. However, the observed running time on typical
+/// instances is sub-quadratic, that is, o(n^2).
+///
+/// The input is a set of edges with specified costs and capacities, and a pair
+/// of nodes (source and sink). The output is the flow along each edge of the
+/// minimum total cost respecting the given edge capacities.
+class MinCostMaxFlow {
+public:
+ MinCostMaxFlow(const ProfiParams &Params) : Params(Params) {}
+
+ // Initialize algorithm's data structures for a network of a given size.
+ void initialize(uint64_t NodeCount, uint64_t SourceNode, uint64_t SinkNode) {
+ Source = SourceNode;
+ Target = SinkNode;
+
+ Nodes = std::vector<Node>(NodeCount);
+ Edges = std::vector<std::vector<Edge>>(NodeCount, std::vector<Edge>());
+ if (Params.EvenFlowDistribution)
+ AugmentingEdges =
+ std::vector<std::vector<Edge *>>(NodeCount, std::vector<Edge *>());
+ }
+
+ // Run the algorithm.
+ int64_t run() {
+ LLVM_DEBUG(dbgs() << "Starting profi for " << Nodes.size() << " nodes\n");
+
+ // Iteratively find an augmentation path/dag in the network and send the
+ // flow along its edges
+ size_t AugmentationIters = applyFlowAugmentation();
+
+ // Compute the total flow and its cost
+ int64_t TotalCost = 0;
+ int64_t TotalFlow = 0;
+ for (uint64_t Src = 0; Src < Nodes.size(); Src++) {
+ for (auto &Edge : Edges[Src]) {
+ if (Edge.Flow > 0) {
+ TotalCost += Edge.Cost * Edge.Flow;
+ if (Src == Source)
+ TotalFlow += Edge.Flow;
+ }
+ }
+ }
+ LLVM_DEBUG(dbgs() << "Completed profi after " << AugmentationIters
+ << " iterations with " << TotalFlow << " total flow"
+ << " of " << TotalCost << " cost\n");
+ (void)TotalFlow;
+ (void)AugmentationIters;
+ return TotalCost;
+ }
+
+ /// Adding an edge to the network with a specified capacity and a cost.
+ /// Multiple edges between a pair of nodes are allowed but self-edges
+ /// are not supported.
+ void addEdge(uint64_t Src, uint64_t Dst, int64_t Capacity, int64_t Cost) {
+ assert(Capacity > 0 && "adding an edge of zero capacity");
+ assert(Src != Dst && "loop edge are not supported");
+
+ Edge SrcEdge;
+ SrcEdge.Dst = Dst;
+ SrcEdge.Cost = Cost;
+ SrcEdge.Capacity = Capacity;
+ SrcEdge.Flow = 0;
+ SrcEdge.RevEdgeIndex = Edges[Dst].size();
+
+ Edge DstEdge;
+ DstEdge.Dst = Src;
+ DstEdge.Cost = -Cost;
+ DstEdge.Capacity = 0;
+ DstEdge.Flow = 0;
+ DstEdge.RevEdgeIndex = Edges[Src].size();
+
+ Edges[Src].push_back(SrcEdge);
+ Edges[Dst].push_back(DstEdge);
+ }
+
+ /// Adding an edge to the network of infinite capacity and a given cost.
+ void addEdge(uint64_t Src, uint64_t Dst, int64_t Cost) {
+ addEdge(Src, Dst, INF, Cost);
+ }
+
+ /// Get the total flow from a given source node.
+ /// Returns a list of pairs (target node, amount of flow to the target).
+ const std::vector<std::pair<uint64_t, int64_t>> getFlow(uint64_t Src) const {
+ std::vector<std::pair<uint64_t, int64_t>> Flow;
+ for (const auto &Edge : Edges[Src]) {
+ if (Edge.Flow > 0)
+ Flow.push_back(std::make_pair(Edge.Dst, Edge.Flow));
+ }
+ return Flow;
+ }
+
+ /// Get the total flow between a pair of nodes.
+ int64_t getFlow(uint64_t Src, uint64_t Dst) const {
+ int64_t Flow = 0;
+ for (const auto &Edge : Edges[Src]) {
+ if (Edge.Dst == Dst) {
+ Flow += Edge.Flow;
+ }
+ }
+ return Flow;
+ }
+
+private:
+ /// Iteratively find an augmentation path/dag in the network and send the
+ /// flow along its edges. The method returns the number of applied iterations.
+ size_t applyFlowAugmentation() {
+ size_t AugmentationIters = 0;
+ while (findAugmentingPath()) {
+ uint64_t PathCapacity = computeAugmentingPathCapacity();
+ while (PathCapacity > 0) {
+ bool Progress = false;
+ if (Params.EvenFlowDistribution) {
+ // Identify node/edge candidates for augmentation
+ identifyShortestEdges(PathCapacity);
+
+ // Find an augmenting DAG
+ auto AugmentingOrder = findAugmentingDAG();
+
+ // Apply the DAG augmentation
+ Progress = augmentFlowAlongDAG(AugmentingOrder);
+ PathCapacity = computeAugmentingPathCapacity();
+ }
+
+ if (!Progress) {
+ augmentFlowAlongPath(PathCapacity);
+ PathCapacity = 0;
+ }
+
+ AugmentationIters++;
+ }
+ }
+ return AugmentationIters;
+ }
+
+ /// Compute the capacity of the cannonical augmenting path. If the path is
+ /// saturated (that is, no flow can be sent along the path), then return 0.
+ uint64_t computeAugmentingPathCapacity() {
+ uint64_t PathCapacity = INF;
+ uint64_t Now = Target;
+ while (Now != Source) {
+ uint64_t Pred = Nodes[Now].ParentNode;
+ auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
+
+ assert(Edge.Capacity >= Edge.Flow && "incorrect edge flow");
+ uint64_t EdgeCapacity = uint64_t(Edge.Capacity - Edge.Flow);
+ PathCapacity = std::min(PathCapacity, EdgeCapacity);
+
+ Now = Pred;
+ }
+ return PathCapacity;
+ }
+
+ /// Check for existence of an augmenting path with a positive capacity.
+ bool findAugmentingPath() {
+ // Initialize data structures
+ for (auto &Node : Nodes) {
+ Node.Distance = INF;
+ Node.ParentNode = uint64_t(-1);
+ Node.ParentEdgeIndex = uint64_t(-1);
+ Node.Taken = false;
+ }
+
+ std::queue<uint64_t> Queue;
+ Queue.push(Source);
+ Nodes[Source].Distance = 0;
+ Nodes[Source].Taken = true;
+ while (!Queue.empty()) {
+ uint64_t Src = Queue.front();
+ Queue.pop();
+ Nodes[Src].Taken = false;
+ // Although the residual network contains edges with negative costs
+ // (in particular, backward edges), it can be shown that there are no
+ // negative-weight cycles and the following two invariants are maintained:
+ // (i) Dist[Source, V] >= 0 and (ii) Dist[V, Target] >= 0 for all nodes V,
+ // where Dist is the length of the shortest path between two nodes. This
+ // allows to prune the search-space of the path-finding algorithm using
+ // the following early-stop criteria:
+ // -- If we find a path with zero-distance from Source to Target, stop the
+ // search, as the path is the shortest since Dist[Source, Target] >= 0;
+ // -- If we have Dist[Source, V] > Dist[Source, Target], then do not
+ // process node V, as it is guaranteed _not_ to be on a shortest path
+ // from Source to Target; it follows from inequalities
+ // Dist[Source, Target] >= Dist[Source, V] + Dist[V, Target]
+ // >= Dist[Source, V]
+ if (!Params.EvenFlowDistribution && Nodes[Target].Distance == 0)
+ break;
+ if (Nodes[Src].Distance > Nodes[Target].Distance)
+ continue;
+
+ // Process adjacent edges
+ for (uint64_t EdgeIdx = 0; EdgeIdx < Edges[Src].size(); EdgeIdx++) {
+ auto &Edge = Edges[Src][EdgeIdx];
+ if (Edge.Flow < Edge.Capacity) {
+ uint64_t Dst = Edge.Dst;
+ int64_t NewDistance = Nodes[Src].Distance + Edge.Cost;
+ if (Nodes[Dst].Distance > NewDistance) {
+ // Update the distance and the parent node/edge
+ Nodes[Dst].Distance = NewDistance;
+ Nodes[Dst].ParentNode = Src;
+ Nodes[Dst].ParentEdgeIndex = EdgeIdx;
+ // Add the node to the queue, if it is not there yet
+ if (!Nodes[Dst].Taken) {
+ Queue.push(Dst);
+ Nodes[Dst].Taken = true;
+ }
+ }
+ }
+ }
+ }
+
+ return Nodes[Target].Distance != INF;
+ }
+
+ /// Update the current flow along the augmenting path.
+ void augmentFlowAlongPath(uint64_t PathCapacity) {
+ assert(PathCapacity > 0 && "found an incorrect augmenting path");
+ uint64_t Now = Target;
+ while (Now != Source) {
+ uint64_t Pred = Nodes[Now].ParentNode;
+ auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
+ auto &RevEdge = Edges[Now][Edge.RevEdgeIndex];
+
+ Edge.Flow += PathCapacity;
+ RevEdge.Flow -= PathCapacity;
+
+ Now = Pred;
+ }
+ }
+
+ /// Find an Augmenting DAG order using a modified version of DFS in which we
+ /// can visit a node multiple times. In the DFS search, when scanning each
+ /// edge out of a node, continue search at Edge.Dst endpoint if it has not
+ /// been discovered yet and its NumCalls < MaxDfsCalls. The algorithm
+ /// runs in O(MaxDfsCalls * |Edges| + |Nodes|) time.
+ /// It returns an Augmenting Order (Taken nodes in decreasing Finish time)
+ /// that starts with Source and ends with Target.
+ std::vector<uint64_t> findAugmentingDAG() {
+ // We use a stack based implemenation of DFS to avoid recursion.
+ // Defining DFS data structures:
+ // A pair (NodeIdx, EdgeIdx) at the top of the Stack denotes that
+ // - we are currently visiting Nodes[NodeIdx] and
+ // - the next edge to scan is Edges[NodeIdx][EdgeIdx]
+ typedef std::pair<uint64_t, uint64_t> StackItemType;
+ std::stack<StackItemType> Stack;
+ std::vector<uint64_t> AugmentingOrder;
+
+ // Phase 0: Initialize Node attributes and Time for DFS run
+ for (auto &Node : Nodes) {
+ Node.Discovery = 0;
+ Node.Finish = 0;
+ Node.NumCalls = 0;
+ Node.Taken = false;
+ }
+ uint64_t Time = 0;
+ // Mark Target as Taken
+ // Taken attribute will be propagated backwards from Target towards Source
+ Nodes[Target].Taken = true;
+
+ // Phase 1: Start DFS traversal from Source
+ Stack.emplace(Source, 0);
+ Nodes[Source].Discovery = ++Time;
+ while (!Stack.empty()) {
+ auto NodeIdx = Stack.top().first;
+ auto EdgeIdx = Stack.top().second;
+
+ // If we haven't scanned all edges out of NodeIdx, continue scanning
+ if (EdgeIdx < Edges[NodeIdx].size()) {
+ auto &Edge = Edges[NodeIdx][EdgeIdx];
+ auto &Dst = Nodes[Edge.Dst];
+ Stack.top().second++;
+
+ if (Edge.OnShortestPath) {
+ // If we haven't seen Edge.Dst so far, continue DFS search there
+ if (Dst.Discovery == 0 && Dst.NumCalls < MaxDfsCalls) {
+ Dst.Discovery = ++Time;
+ Stack.emplace(Edge.Dst, 0);
+ Dst.NumCalls++;
+ } else if (Dst.Taken && Dst.Finish != 0) {
+ // Else, if Edge.Dst already have a path to Target, so that NodeIdx
+ Nodes[NodeIdx].Taken = true;
+ }
+ }
+ } else {
+ // If we are done scanning all edge out of NodeIdx
+ Stack.pop();
+ // If we haven't found a path from NodeIdx to Target, forget about it
+ if (!Nodes[NodeIdx].Taken) {
+ Nodes[NodeIdx].Discovery = 0;
+ } else {
+ // If we have found a path from NodeIdx to Target, then finish NodeIdx
+ // and propagate Taken flag to DFS parent unless at the Source
+ Nodes[NodeIdx].Finish = ++Time;
+ // NodeIdx == Source if and only if the stack is empty
+ if (NodeIdx != Source) {
+ assert(!Stack.empty() && "empty stack while running dfs");
+ Nodes[Stack.top().first].Taken = true;
+ }
+ AugmentingOrder.push_back(NodeIdx);
+ }
+ }
+ }
+ // Nodes are collected decreasing Finish time, so the order is reversed
+ std::reverse(AugmentingOrder.begin(), AugmentingOrder.end());
+
+ // Phase 2: Extract all forward (DAG) edges and fill in AugmentingEdges
+ for (size_t Src : AugmentingOrder) {
+ AugmentingEdges[Src].clear();
+ for (auto &Edge : Edges[Src]) {
+ uint64_t Dst = Edge.Dst;
+ if (Edge.OnShortestPath && Nodes[Src].Taken && Nodes[Dst].Taken &&
+ Nodes[Dst].Finish < Nodes[Src].Finish) {
+ AugmentingEdges[Src].push_back(&Edge);
+ }
+ }
+ assert((Src == Target || !AugmentingEdges[Src].empty()) &&
+ "incorrectly constructed augmenting edges");
+ }
+
+ return AugmentingOrder;
+ }
+
+ /// Update the current flow along the given (acyclic) subgraph specified by
+ /// the vertex order, AugmentingOrder. The objective is to send as much flow
+ /// as possible while evenly distributing flow among successors of each node.
+ /// After the update at least one edge is saturated.
+ bool augmentFlowAlongDAG(const std::vector<uint64_t> &AugmentingOrder) {
+ // Phase 0: Initialization
+ for (uint64_t Src : AugmentingOrder) {
+ Nodes[Src].FracFlow = 0;
+ Nodes[Src].IntFlow = 0;
+ for (auto &Edge : AugmentingEdges[Src]) {
+ Edge->AugmentedFlow = 0;
+ }
+ }
+
+ // Phase 1: Send a unit of fractional flow along the DAG
+ uint64_t MaxFlowAmount = INF;
+ Nodes[Source].FracFlow = 1.0;
+ for (uint64_t Src : AugmentingOrder) {
+ assert((Src == Target || Nodes[Src].FracFlow > 0.0) &&
+ "incorrectly computed fractional flow");
+ // Distribute flow evenly among successors of Src
+ uint64_t Degree = AugmentingEdges[Src].size();
+ for (auto &Edge : AugmentingEdges[Src]) {
+ double EdgeFlow = Nodes[Src].FracFlow / Degree;
+ Nodes[Edge->Dst].FracFlow += EdgeFlow;
+ if (Edge->Capacity == INF)
+ continue;
+ uint64_t MaxIntFlow = double(Edge->Capacity - Edge->Flow) / EdgeFlow;
+ MaxFlowAmount = std::min(MaxFlowAmount, MaxIntFlow);
+ }
+ }
+ // Stop early if we cannot send any (integral) flow from Source to Target
+ if (MaxFlowAmount == 0)
+ return false;
+
+ // Phase 2: Send an integral flow of MaxFlowAmount
+ Nodes[Source].IntFlow = MaxFlowAmount;
+ for (uint64_t Src : AugmentingOrder) {
+ if (Src == Target)
+ break;
+ // Distribute flow evenly among successors of Src, rounding up to make
+ // sure all flow is sent
+ uint64_t Degree = AugmentingEdges[Src].size();
+ // We are guaranteeed that Node[Src].IntFlow <= SuccFlow * Degree
+ uint64_t SuccFlow = (Nodes[Src].IntFlow + Degree - 1) / Degree;
+ for (auto &Edge : AugmentingEdges[Src]) {
+ uint64_t Dst = Edge->Dst;
+ uint64_t EdgeFlow = std::min(Nodes[Src].IntFlow, SuccFlow);
+ EdgeFlow = std::min(EdgeFlow, uint64_t(Edge->Capacity - Edge->Flow));
+ Nodes[Dst].IntFlow += EdgeFlow;
+ Nodes[Src].IntFlow -= EdgeFlow;
+ Edge->AugmentedFlow += EdgeFlow;
+ }
+ }
+ assert(Nodes[Target].IntFlow <= MaxFlowAmount);
+ Nodes[Target].IntFlow = 0;
+
+ // Phase 3: Send excess flow back traversing the nodes backwards.
+ // Because of rounding, not all flow can be sent along the edges of Src.
+ // Hence, sending the remaining flow back to maintain flow conservation
+ for (size_t Idx = AugmentingOrder.size() - 1; Idx > 0; Idx--) {
+ uint64_t Src = AugmentingOrder[Idx - 1];
+ // Try to send excess flow back along each edge.
+ // Make sure we only send back flow we just augmented (AugmentedFlow).
+ for (auto &Edge : AugmentingEdges[Src]) {
+ uint64_t Dst = Edge->Dst;
+ if (Nodes[Dst].IntFlow == 0)
+ continue;
+ uint64_t EdgeFlow = std::min(Nodes[Dst].IntFlow, Edge->AugmentedFlow);
+ Nodes[Dst].IntFlow -= EdgeFlow;
+ Nodes[Src].IntFlow += EdgeFlow;
+ Edge->AugmentedFlow -= EdgeFlow;
+ }
+ }
+
+ // Phase 4: Update flow values along all edges
+ bool HasSaturatedEdges = false;
+ for (uint64_t Src : AugmentingOrder) {
+ // Verify that we have sent all the excess flow from the node
+ assert(Src == Source || Nodes[Src].IntFlow == 0);
+ for (auto &Edge : AugmentingEdges[Src]) {
+ assert(uint64_t(Edge->Capacity - Edge->Flow) >= Edge->AugmentedFlow);
+ // Update flow values along the edge and its reverse copy
+ auto &RevEdge = Edges[Edge->Dst][Edge->RevEdgeIndex];
+ Edge->Flow += Edge->AugmentedFlow;
+ RevEdge.Flow -= Edge->AugmentedFlow;
+ if (Edge->Capacity == Edge->Flow && Edge->AugmentedFlow > 0)
+ HasSaturatedEdges = true;
+ }
+ }
+
+ // The augmentation is successful iff at least one edge becomes saturated
+ return HasSaturatedEdges;
+ }
+
+ /// Identify candidate (shortest) edges for augmentation.
+ void identifyShortestEdges(uint64_t PathCapacity) {
+ assert(PathCapacity > 0 && "found an incorrect augmenting DAG");
+ // To make sure the augmentation DAG contains only edges with large residual
+ // capacity, we prune all edges whose capacity is below a fraction of
+ // the capacity of the augmented path.
+ // (All edges of the path itself are always in the DAG)
+ uint64_t MinCapacity = std::max(PathCapacity / 2, uint64_t(1));
+
+ // Decide which edges are on a shortest path from Source to Target
+ for (size_t Src = 0; Src < Nodes.size(); Src++) {
+ // An edge cannot be augmenting if the endpoint has large distance
+ if (Nodes[Src].Distance > Nodes[Target].Distance)
+ continue;
+
+ for (auto &Edge : Edges[Src]) {
+ uint64_t Dst = Edge.Dst;
+ Edge.OnShortestPath =
+ Src != Target && Dst != Source &&
+ Nodes[Dst].Distance <= Nodes[Target].Distance &&
+ Nodes[Dst].Distance == Nodes[Src].Distance + Edge.Cost &&
+ Edge.Capacity > Edge.Flow &&
+ uint64_t(Edge.Capacity - Edge.Flow) >= MinCapacity;
+ }
+ }
+ }
+
+ /// Maximum number of DFS iterations for DAG finding.
+ static constexpr uint64_t MaxDfsCalls = 10;
+
+ /// A node in a flow network.
+ struct Node {
+ /// The cost of the cheapest path from the source to the current node.
+ int64_t Distance;
+ /// The node preceding the current one in the path.
+ uint64_t ParentNode;
+ /// The index of the edge between ParentNode and the current node.
+ uint64_t ParentEdgeIndex;
+ /// An indicator of whether the current node is in a queue.
+ bool Taken;
+
+ /// Data fields utilized in DAG-augmentation:
+ /// Fractional flow.
+ double FracFlow;
+ /// Integral flow.
+ uint64_t IntFlow;
+ /// Discovery time.
+ uint64_t Discovery;
+ /// Finish time.
+ uint64_t Finish;
+ /// NumCalls.
+ uint64_t NumCalls;
+ };
+
+ /// An edge in a flow network.
+ struct Edge {
+ /// The cost of the edge.
+ int64_t Cost;
+ /// The capacity of the edge.
+ int64_t Capacity;
+ /// The current flow on the edge.
+ int64_t Flow;
+ /// The destination node of the edge.
+ uint64_t Dst;
+ /// The index of the reverse edge between Dst and the current node.
+ uint64_t RevEdgeIndex;
+
+ /// Data fields utilized in DAG-augmentation:
+ /// Whether the edge is currently on a shortest path from Source to Target.
+ bool OnShortestPath;
+ /// Extra flow along the edge.
+ uint64_t AugmentedFlow;
+ };
+
+ /// The set of network nodes.
+ std::vector<Node> Nodes;
+ /// The set of network edges.
+ std::vector<std::vector<Edge>> Edges;
+ /// Source node of the flow.
+ uint64_t Source;
+ /// Target (sink) node of the flow.
+ uint64_t Target;
+ /// Augmenting edges.
+ std::vector<std::vector<Edge *>> AugmentingEdges;
+ /// Params for flow computation.
+ const ProfiParams &Params;
+};
+
+/// A post-processing adjustment of the control flow. It applies two steps by
+/// rerouting some flow and making it more realistic:
+///
+/// - First, it removes all isolated components ("islands") with a positive flow
+/// that are unreachable from the entry block. For every such component, we
+/// find the shortest from the entry to an exit passing through the component,
+/// and increase the flow by one unit along the path.
+///
+/// - Second, it identifies all "unknown subgraphs" consisting of basic blocks
+/// with no sampled counts. Then it rebalnces the flow that goes through such
+/// a subgraph so that each branch is taken with probability 50%.
+/// An unknown subgraph is such that for every two nodes u and v:
+/// - u dominates v and u is not unknown;
+/// - v post-dominates u; and
+/// - all inner-nodes of all (u,v)-paths are unknown.
+///
+class FlowAdjuster {
+public:
+ FlowAdjuster(const ProfiParams &Params, FlowFunction &Func)
+ : Params(Params), Func(Func) {}
+
+ /// Apply the post-processing.
+ void run() {
+ if (Params.JoinIslands) {
+ // Adjust the flow to get rid of isolated components
+ joinIsolatedComponents();
+ }
+
+ if (Params.RebalanceUnknown) {
+ // Rebalance the flow inside unknown subgraphs
+ rebalanceUnknownSubgraphs();
+ }
+ }
+
+private:
+ void joinIsolatedComponents() {
+ // Find blocks that are reachable from the source
+ auto Visited = BitVector(NumBlocks(), false);
+ findReachable(Func.Entry, Visited);
+
+ // Iterate over all non-reachable blocks and adjust their weights
+ for (uint64_t I = 0; I < NumBlocks(); I++) {
+ auto &Block = Func.Blocks[I];
+ if (Block.Flow > 0 && !Visited[I]) {
+ // Find a path from the entry to an exit passing through the block I
+ auto Path = findShortestPath(I);
+ // Increase the flow along the path
+ assert(Path.size() > 0 && Path[0]->Source == Func.Entry &&
+ "incorrectly computed path adjusting control flow");
+ Func.Blocks[Func.Entry].Flow += 1;
+ for (auto &Jump : Path) {
+ Jump->Flow += 1;
+ Func.Blocks[Jump->Target].Flow += 1;
+ // Update reachability
+ findReachable(Jump->Target, Visited);
+ }
+ }
+ }
+ }
+
+ /// Run BFS from a given block along the jumps with a positive flow and mark
+ /// all reachable blocks.
+ void findReachable(uint64_t Src, BitVector &Visited) {
+ if (Visited[Src])
+ return;
+ std::queue<uint64_t> Queue;
+ Queue.push(Src);
+ Visited[Src] = true;
+ while (!Queue.empty()) {
+ Src = Queue.front();
+ Queue.pop();
+ for (auto *Jump : Func.Blocks[Src].SuccJumps) {
+ uint64_t Dst = Jump->Target;
+ if (Jump->Flow > 0 && !Visited[Dst]) {
+ Queue.push(Dst);
+ Visited[Dst] = true;
+ }
+ }
+ }
+ }
+
+ /// Find the shortest path from the entry block to an exit block passing
+ /// through a given block.
+ std::vector<FlowJump *> findShortestPath(uint64_t BlockIdx) {
+ // A path from the entry block to BlockIdx
+ auto ForwardPath = findShortestPath(Func.Entry, BlockIdx);
+ // A path from BlockIdx to an exit block
+ auto BackwardPath = findShortestPath(BlockIdx, AnyExitBlock);
+
+ // Concatenate the two paths
+ std::vector<FlowJump *> Result;
+ Result.insert(Result.end(), ForwardPath.begin(), ForwardPath.end());
+ Result.insert(Result.end(), BackwardPath.begin(), BackwardPath.end());
+ return Result;
+ }
+
+ /// Apply the Dijkstra algorithm to find the shortest path from a given
+ /// Source to a given Target block.
+ /// If Target == -1, then the path ends at an exit block.
+ std::vector<FlowJump *> findShortestPath(uint64_t Source, uint64_t Target) {
+ // Quit early, if possible
+ if (Source == Target)
+ return std::vector<FlowJump *>();
+ if (Func.Blocks[Source].isExit() && Target == AnyExitBlock)
+ return std::vector<FlowJump *>();
+
+ // Initialize data structures
+ auto Distance = std::vector<int64_t>(NumBlocks(), INF);
+ auto Parent = std::vector<FlowJump *>(NumBlocks(), nullptr);
+ Distance[Source] = 0;
+ std::set<std::pair<uint64_t, uint64_t>> Queue;
+ Queue.insert(std::make_pair(Distance[Source], Source));
+
+ // Run the Dijkstra algorithm
+ while (!Queue.empty()) {
+ uint64_t Src = Queue.begin()->second;
+ Queue.erase(Queue.begin());
+ // If we found a solution, quit early
+ if (Src == Target ||
+ (Func.Blocks[Src].isExit() && Target == AnyExitBlock))
+ break;
+
+ for (auto *Jump : Func.Blocks[Src].SuccJumps) {
+ uint64_t Dst = Jump->Target;
+ int64_t JumpDist = jumpDistance(Jump);
+ if (Distance[Dst] > Distance[Src] + JumpDist) {
+ Queue.erase(std::make_pair(Distance[Dst], Dst));
+
+ Distance[Dst] = Distance[Src] + JumpDist;
+ Parent[Dst] = Jump;
+
+ Queue.insert(std::make_pair(Distance[Dst], Dst));
+ }
+ }
+ }
+ // If Target is not provided, find the closest exit block
+ if (Target == AnyExitBlock) {
+ for (uint64_t I = 0; I < NumBlocks(); I++) {
+ if (Func.Blocks[I].isExit() && Parent[I] != nullptr) {
+ if (Target == AnyExitBlock || Distance[Target] > Distance[I]) {
+ Target = I;
+ }
+ }
+ }
+ }
+ assert(Parent[Target] != nullptr && "a path does not exist");
+
+ // Extract the constructed path
+ std::vector<FlowJump *> Result;
+ uint64_t Now = Target;
+ while (Now != Source) {
+ assert(Now == Parent[Now]->Target && "incorrect parent jump");
+ Result.push_back(Parent[Now]);
+ Now = Parent[Now]->Source;
+ }
+ // Reverse the path, since it is extracted from Target to Source
+ std::reverse(Result.begin(), Result.end());
+ return Result;
+ }
+
+ /// A distance of a path for a given jump.
+ /// In order to incite the path to use blocks/jumps with large positive flow,
+ /// and avoid changing branch probability of outgoing edges drastically,
+ /// set the jump distance so as:
+ /// - to minimize the number of unlikely jumps used and subject to that,
+ /// - to minimize the number of Flow == 0 jumps used and subject to that,
+ /// - minimizes total multiplicative Flow increase for the remaining edges.
+ /// To capture this objective with integer distances, we round off fractional
+ /// parts to a multiple of 1 / BaseDistance.
+ int64_t jumpDistance(FlowJump *Jump) const {
+ if (Jump->IsUnlikely)
+ return Params.CostUnlikely;
+ uint64_t BaseDistance =
+ std::max(FlowAdjuster::MinBaseDistance,
+ std::min(Func.Blocks[Func.Entry].Flow,
+ Params.CostUnlikely / (2 * (NumBlocks() + 1))));
+ if (Jump->Flow > 0)
+ return BaseDistance + BaseDistance / Jump->Flow;
+ return 2 * BaseDistance * (NumBlocks() + 1);
+ };
+
+ uint64_t NumBlocks() const { return Func.Blocks.size(); }
+
+ /// Rebalance unknown subgraphs so that the flow is split evenly across the
+ /// outgoing branches of every block of the subgraph. The method iterates over
+ /// blocks with known weight and identifies unknown subgraphs rooted at the
+ /// blocks. Then it verifies if flow rebalancing is feasible and applies it.
+ void rebalanceUnknownSubgraphs() {
+ // Try to find unknown subgraphs from each block
+ for (const FlowBlock &SrcBlock : Func.Blocks) {
+ // Verify if rebalancing rooted at SrcBlock is feasible
+ if (!canRebalanceAtRoot(&SrcBlock))
+ continue;
+
+ // Find an unknown subgraphs starting at SrcBlock. Along the way,
+ // fill in known destinations and intermediate unknown blocks.
+ std::vector<FlowBlock *> UnknownBlocks;
+ std::vector<FlowBlock *> KnownDstBlocks;
+ findUnknownSubgraph(&SrcBlock, KnownDstBlocks, UnknownBlocks);
+
+ // Verify if rebalancing of the subgraph is feasible. If the search is
+ // successful, find the unique destination block (which can be null)
+ FlowBlock *DstBlock = nullptr;
+ if (!canRebalanceSubgraph(&SrcBlock, KnownDstBlocks, UnknownBlocks,
+ DstBlock))
+ continue;
+
+ // We cannot rebalance subgraphs containing cycles among unknown blocks
+ if (!isAcyclicSubgraph(&SrcBlock, DstBlock, UnknownBlocks))
+ continue;
+
+ // Rebalance the flow
+ rebalanceUnknownSubgraph(&SrcBlock, DstBlock, UnknownBlocks);
+ }
+ }
+
+ /// Verify if rebalancing rooted at a given block is possible.
+ bool canRebalanceAtRoot(const FlowBlock *SrcBlock) {
+ // Do not attempt to find unknown subgraphs from an unknown or a
+ // zero-flow block
+ if (SrcBlock->HasUnknownWeight || SrcBlock->Flow == 0)
+ return false;
+
+ // Do not attempt to process subgraphs from a block w/o unknown sucessors
+ bool HasUnknownSuccs = false;
+ for (auto *Jump : SrcBlock->SuccJumps) {
+ if (Func.Blocks[Jump->Target].HasUnknownWeight) {
+ HasUnknownSuccs = true;
+ break;
+ }
+ }
+ if (!HasUnknownSuccs)
+ return false;
+
+ return true;
+ }
+
+ /// Find an unknown subgraph starting at block SrcBlock. The method sets
+ /// identified destinations, KnownDstBlocks, and intermediate UnknownBlocks.
+ void findUnknownSubgraph(const FlowBlock *SrcBlock,
+ std::vector<FlowBlock *> &KnownDstBlocks,
+ std::vector<FlowBlock *> &UnknownBlocks) {
+ // Run BFS from SrcBlock and make sure all paths are going through unknown
+ // blocks and end at a known DstBlock
+ auto Visited = BitVector(NumBlocks(), false);
+ std::queue<uint64_t> Queue;
+
+ Queue.push(SrcBlock->Index);
+ Visited[SrcBlock->Index] = true;
+ while (!Queue.empty()) {
+ auto &Block = Func.Blocks[Queue.front()];
+ Queue.pop();
+ // Process blocks reachable from Block
+ for (auto *Jump : Block.SuccJumps) {
+ // If Jump can be ignored, skip it
+ if (ignoreJump(SrcBlock, nullptr, Jump))
+ continue;
+
+ uint64_t Dst = Jump->Target;
+ // If Dst has been visited, skip Jump
+ if (Visited[Dst])
+ continue;
+ // Process block Dst
+ Visited[Dst] = true;
+ if (!Func.Blocks[Dst].HasUnknownWeight) {
+ KnownDstBlocks.push_back(&Func.Blocks[Dst]);
+ } else {
+ Queue.push(Dst);
+ UnknownBlocks.push_back(&Func.Blocks[Dst]);
+ }
+ }
+ }
+ }
+
+ /// Verify if rebalancing of the subgraph is feasible. If the checks are
+ /// successful, set the unique destination block, DstBlock (can be null).
+ bool canRebalanceSubgraph(const FlowBlock *SrcBlock,
+ const std::vector<FlowBlock *> &KnownDstBlocks,
+ const std::vector<FlowBlock *> &UnknownBlocks,
+ FlowBlock *&DstBlock) {
+ // If the list of unknown blocks is empty, we don't need rebalancing
+ if (UnknownBlocks.empty())
+ return false;
+
+ // If there are multiple known sinks, we can't rebalance
+ if (KnownDstBlocks.size() > 1)
+ return false;
+ DstBlock = KnownDstBlocks.empty() ? nullptr : KnownDstBlocks.front();
+
+ // Verify sinks of the subgraph
+ for (auto *Block : UnknownBlocks) {
+ if (Block->SuccJumps.empty()) {
+ // If there are multiple (known and unknown) sinks, we can't rebalance
+ if (DstBlock != nullptr)
+ return false;
+ continue;
+ }
+ size_t NumIgnoredJumps = 0;
+ for (auto *Jump : Block->SuccJumps) {
+ if (ignoreJump(SrcBlock, DstBlock, Jump))
+ NumIgnoredJumps++;
+ }
+ // If there is a non-sink block in UnknownBlocks with all jumps ignored,
+ // then we can't rebalance
+ if (NumIgnoredJumps == Block->SuccJumps.size())
+ return false;
+ }
+
+ return true;
+ }
+
+ /// Decide whether the Jump is ignored while processing an unknown subgraphs
+ /// rooted at basic block SrcBlock with the destination block, DstBlock.
+ bool ignoreJump(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
+ const FlowJump *Jump) {
+ // Ignore unlikely jumps with zero flow
+ if (Jump->IsUnlikely && Jump->Flow == 0)
+ return true;
+
+ auto JumpSource = &Func.Blocks[Jump->Source];
+ auto JumpTarget = &Func.Blocks[Jump->Target];
+
+ // Do not ignore jumps coming into DstBlock
+ if (DstBlock != nullptr && JumpTarget == DstBlock)
+ return false;
+
+ // Ignore jumps out of SrcBlock to known blocks
+ if (!JumpTarget->HasUnknownWeight && JumpSource == SrcBlock)
+ return true;
+
+ // Ignore jumps to known blocks with zero flow
+ if (!JumpTarget->HasUnknownWeight && JumpTarget->Flow == 0)
+ return true;
+
+ return false;
+ }
+
+ /// Verify if the given unknown subgraph is acyclic, and if yes, reorder
+ /// UnknownBlocks in the topological order (so that all jumps are "forward").
+ bool isAcyclicSubgraph(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
+ std::vector<FlowBlock *> &UnknownBlocks) {
+ // Extract local in-degrees in the considered subgraph
+ auto LocalInDegree = std::vector<uint64_t>(NumBlocks(), 0);
+ auto fillInDegree = [&](const FlowBlock *Block) {
+ for (auto *Jump : Block->SuccJumps) {
+ if (ignoreJump(SrcBlock, DstBlock, Jump))
+ continue;
+ LocalInDegree[Jump->Target]++;
+ }
+ };
+ fillInDegree(SrcBlock);
+ for (auto *Block : UnknownBlocks) {
+ fillInDegree(Block);
+ }
+ // A loop containing SrcBlock
+ if (LocalInDegree[SrcBlock->Index] > 0)
+ return false;
+
+ std::vector<FlowBlock *> AcyclicOrder;
+ std::queue<uint64_t> Queue;
+ Queue.push(SrcBlock->Index);
+ while (!Queue.empty()) {
+ FlowBlock *Block = &Func.Blocks[Queue.front()];
+ Queue.pop();
+ // Stop propagation once we reach DstBlock, if any
+ if (DstBlock != nullptr && Block == DstBlock)
+ break;
+
+ // Keep an acyclic order of unknown blocks
+ if (Block->HasUnknownWeight && Block != SrcBlock)
+ AcyclicOrder.push_back(Block);
+
+ // Add to the queue all successors with zero local in-degree
+ for (auto *Jump : Block->SuccJumps) {
+ if (ignoreJump(SrcBlock, DstBlock, Jump))
+ continue;
+ uint64_t Dst = Jump->Target;
+ LocalInDegree[Dst]--;
+ if (LocalInDegree[Dst] == 0) {
+ Queue.push(Dst);
+ }
+ }
+ }
+
+ // If there is a cycle in the subgraph, AcyclicOrder contains only a subset
+ // of all blocks
+ if (UnknownBlocks.size() != AcyclicOrder.size())
+ return false;
+ UnknownBlocks = AcyclicOrder;
+ return true;
+ }
+
+ /// Rebalance a given subgraph rooted at SrcBlock, ending at DstBlock and
+ /// having UnknownBlocks intermediate blocks.
+ void rebalanceUnknownSubgraph(const FlowBlock *SrcBlock,
+ const FlowBlock *DstBlock,
+ const std::vector<FlowBlock *> &UnknownBlocks) {
+ assert(SrcBlock->Flow > 0 && "zero-flow block in unknown subgraph");
+
+ // Ditribute flow from the source block
+ uint64_t BlockFlow = 0;
+ // SrcBlock's flow is the sum of outgoing flows along non-ignored jumps
+ for (auto *Jump : SrcBlock->SuccJumps) {
+ if (ignoreJump(SrcBlock, DstBlock, Jump))
+ continue;
+ BlockFlow += Jump->Flow;
+ }
+ rebalanceBlock(SrcBlock, DstBlock, SrcBlock, BlockFlow);
+
+ // Ditribute flow from the remaining blocks
+ for (auto *Block : UnknownBlocks) {
+ assert(Block->HasUnknownWeight && "incorrect unknown subgraph");
+ uint64_t BlockFlow = 0;
+ // Block's flow is the sum of incoming flows
+ for (auto *Jump : Block->PredJumps) {
+ BlockFlow += Jump->Flow;
+ }
+ Block->Flow = BlockFlow;
+ rebalanceBlock(SrcBlock, DstBlock, Block, BlockFlow);
+ }
+ }
+
+ /// Redistribute flow for a block in a subgraph rooted at SrcBlock,
+ /// and ending at DstBlock.
+ void rebalanceBlock(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
+ const FlowBlock *Block, uint64_t BlockFlow) {
+ // Process all successor jumps and update corresponding flow values
+ size_t BlockDegree = 0;
+ for (auto *Jump : Block->SuccJumps) {
+ if (ignoreJump(SrcBlock, DstBlock, Jump))
+ continue;
+ BlockDegree++;
+ }
+ // If all successor jumps of the block are ignored, skip it
+ if (DstBlock == nullptr && BlockDegree == 0)
+ return;
+ assert(BlockDegree > 0 && "all outgoing jumps are ignored");
+
+ // Each of the Block's successors gets the following amount of flow.
+ // Rounding the value up so that all flow is propagated
+ uint64_t SuccFlow = (BlockFlow + BlockDegree - 1) / BlockDegree;
+ for (auto *Jump : Block->SuccJumps) {
+ if (ignoreJump(SrcBlock, DstBlock, Jump))
+ continue;
+ uint64_t Flow = std::min(SuccFlow, BlockFlow);
+ Jump->Flow = Flow;
+ BlockFlow -= Flow;
+ }
+ assert(BlockFlow == 0 && "not all flow is propagated");
+ }
+
+ /// A constant indicating an arbitrary exit block of a function.
+ static constexpr uint64_t AnyExitBlock = uint64_t(-1);
+ /// Minimum BaseDistance for the jump distance values in island joining.
+ static constexpr uint64_t MinBaseDistance = 10000;
+
+ /// Params for flow computation.
+ const ProfiParams &Params;
+ /// The function.
+ FlowFunction &Func;
+};
+
+std::pair<int64_t, int64_t> assignBlockCosts(const ProfiParams &Params,
+ const FlowBlock &Block);
+std::pair<int64_t, int64_t> assignJumpCosts(const ProfiParams &Params,
+ const FlowJump &Jump);
+
+/// Initializing flow network for a given function.
+///
+/// Every block is split into two nodes that are responsible for (i) an
+/// incoming flow, (ii) an outgoing flow; they penalize an increase or a
+/// reduction of the block weight.
+void initializeNetwork(const ProfiParams &Params, MinCostMaxFlow &Network,
+ FlowFunction &Func) {
+ uint64_t NumBlocks = Func.Blocks.size();
+ assert(NumBlocks > 1 && "Too few blocks in a function");
+ uint64_t NumJumps = Func.Jumps.size();
+ assert(NumJumps > 0 && "Too few jumps in a function");
+
+ // Introducing dummy source/sink pairs to allow flow circulation.
+ // The nodes corresponding to blocks of the function have indicies in
+ // the range [0 .. 2 * NumBlocks); the dummy sources/sinks are indexed by the
+ // next four values.
+ uint64_t S = 2 * NumBlocks;
+ uint64_t T = S + 1;
+ uint64_t S1 = S + 2;
+ uint64_t T1 = S + 3;
+
+ Network.initialize(2 * NumBlocks + 4, S1, T1);
+
+ // Initialize nodes of the flow network
+ for (uint64_t B = 0; B < NumBlocks; B++) {
+ auto &Block = Func.Blocks[B];
+
+ // Split every block into two auxiliary nodes to allow
+ // increase/reduction of the block count.
+ uint64_t Bin = 2 * B;
+ uint64_t Bout = 2 * B + 1;
+
+ // Edges from S and to T
+ if (Block.isEntry()) {
+ Network.addEdge(S, Bin, 0);
+ } else if (Block.isExit()) {
+ Network.addEdge(Bout, T, 0);
+ }
+
+ // Assign costs for increasing/decreasing the block counts
+ auto [AuxCostInc, AuxCostDec] = assignBlockCosts(Params, Block);
+
+ // Add the corresponding edges to the network
+ Network.addEdge(Bin, Bout, AuxCostInc);
+ if (Block.Weight > 0) {
+ Network.addEdge(Bout, Bin, Block.Weight, AuxCostDec);
+ Network.addEdge(S1, Bout, Block.Weight, 0);
+ Network.addEdge(Bin, T1, Block.Weight, 0);
+ }
+ }
+
+ // Initialize edges of the flow network
+ for (uint64_t J = 0; J < NumJumps; J++) {
+ auto &Jump = Func.Jumps[J];
+
+ // Get the endpoints corresponding to the jump
+ uint64_t Jin = 2 * Jump.Source + 1;
+ uint64_t Jout = 2 * Jump.Target;
+
+ // Assign costs for increasing/decreasing the jump counts
+ auto [AuxCostInc, AuxCostDec] = assignJumpCosts(Params, Jump);
+
+ // Add the corresponding edges to the network
+ Network.addEdge(Jin, Jout, AuxCostInc);
+ if (Jump.Weight > 0) {
+ Network.addEdge(Jout, Jin, Jump.Weight, AuxCostDec);
+ Network.addEdge(S1, Jout, Jump.Weight, 0);
+ Network.addEdge(Jin, T1, Jump.Weight, 0);
+ }
+ }
+
+ // Make sure we have a valid flow circulation
+ Network.addEdge(T, S, 0);
+}
+
+/// Assign costs for increasing/decreasing the block counts.
+std::pair<int64_t, int64_t> assignBlockCosts(const ProfiParams &Params,
+ const FlowBlock &Block) {
+ // Modifying the weight of an unlikely block is expensive
+ if (Block.IsUnlikely)
+ return std::make_pair(Params.CostUnlikely, Params.CostUnlikely);
+
+ // Assign default values for the costs
+ int64_t CostInc = Params.CostBlockInc;
+ int64_t CostDec = Params.CostBlockDec;
+ // Update the costs depending on the block metadata
+ if (Block.HasUnknownWeight) {
+ CostInc = Params.CostBlockUnknownInc;
+ CostDec = 0;
+ } else {
+ // Increasing the count for "cold" blocks with zero initial count is more
+ // expensive than for "hot" ones
+ if (Block.Weight == 0)
+ CostInc = Params.CostBlockZeroInc;
+ // Modifying the count of the entry block is expensive
+ if (Block.isEntry()) {
+ CostInc = Params.CostBlockEntryInc;
+ CostDec = Params.CostBlockEntryDec;
+ }
+ }
+ return std::make_pair(CostInc, CostDec);
+}
+
+/// Assign costs for increasing/decreasing the jump counts.
+std::pair<int64_t, int64_t> assignJumpCosts(const ProfiParams &Params,
+ const FlowJump &Jump) {
+ // Modifying the weight of an unlikely jump is expensive
+ if (Jump.IsUnlikely)
+ return std::make_pair(Params.CostUnlikely, Params.CostUnlikely);
+
+ // Assign default values for the costs
+ int64_t CostInc = Params.CostJumpInc;
+ int64_t CostDec = Params.CostJumpDec;
+ // Update the costs depending on the block metadata
+ if (Jump.Source + 1 == Jump.Target) {
+ // Adjusting the fall-through branch
+ CostInc = Params.CostJumpFTInc;
+ CostDec = Params.CostJumpFTDec;
+ }
+ if (Jump.HasUnknownWeight) {
+ // The cost is different for fall-through and non-fall-through branches
+ if (Jump.Source + 1 == Jump.Target)
+ CostInc = Params.CostJumpUnknownFTInc;
+ else
+ CostInc = Params.CostJumpUnknownInc;
+ CostDec = 0;
+ } else {
+ assert(Jump.Weight > 0 && "found zero-weight jump with a positive weight");
+ }
+ return std::make_pair(CostInc, CostDec);
+}
+
+/// Extract resulting block and edge counts from the flow network.
+void extractWeights(const ProfiParams &Params, MinCostMaxFlow &Network,
+ FlowFunction &Func) {
+ uint64_t NumBlocks = Func.Blocks.size();
+ uint64_t NumJumps = Func.Jumps.size();
+
+ // Extract resulting jump counts
+ for (uint64_t J = 0; J < NumJumps; J++) {
+ auto &Jump = Func.Jumps[J];
+ uint64_t SrcOut = 2 * Jump.Source + 1;
+ uint64_t DstIn = 2 * Jump.Target;
+
+ int64_t Flow = 0;
+ int64_t AuxFlow = Network.getFlow(SrcOut, DstIn);
+ if (Jump.Source != Jump.Target)
+ Flow = int64_t(Jump.Weight) + AuxFlow;
+ else
+ Flow = int64_t(Jump.Weight) + (AuxFlow > 0 ? AuxFlow : 0);
+
+ Jump.Flow = Flow;
+ assert(Flow >= 0 && "negative jump flow");
+ }
+
+ // Extract resulting block counts
+ auto InFlow = std::vector<uint64_t>(NumBlocks, 0);
+ auto OutFlow = std::vector<uint64_t>(NumBlocks, 0);
+ for (auto &Jump : Func.Jumps) {
+ InFlow[Jump.Target] += Jump.Flow;
+ OutFlow[Jump.Source] += Jump.Flow;
+ }
+ for (uint64_t B = 0; B < NumBlocks; B++) {
+ auto &Block = Func.Blocks[B];
+ Block.Flow = std::max(OutFlow[B], InFlow[B]);
+ }
+}
+
+#ifndef NDEBUG
+/// Verify that the provided block/jump weights are as expected.
+void verifyInput(const FlowFunction &Func) {
+ // Verify the entry block
+ assert(Func.Entry == 0 && Func.Blocks[0].isEntry());
+ for (size_t I = 1; I < Func.Blocks.size(); I++) {
+ assert(!Func.Blocks[I].isEntry() && "multiple entry blocks");
+ }
+ // Verify CFG jumps
+ for (auto &Block : Func.Blocks) {
+ assert((!Block.isEntry() || !Block.isExit()) &&
+ "a block cannot be an entry and an exit");
+ }
+ // Verify input block weights
+ for (auto &Block : Func.Blocks) {
+ assert((!Block.HasUnknownWeight || Block.Weight == 0 || Block.isEntry()) &&
+ "non-zero weight of a block w/o weight except for an entry");
+ }
+ // Verify input jump weights
+ for (auto &Jump : Func.Jumps) {
+ assert((!Jump.HasUnknownWeight || Jump.Weight == 0) &&
+ "non-zero weight of a jump w/o weight");
+ }
+}
+
+/// Verify that the computed flow values satisfy flow conservation rules.
+void verifyOutput(const FlowFunction &Func) {
+ const uint64_t NumBlocks = Func.Blocks.size();
+ auto InFlow = std::vector<uint64_t>(NumBlocks, 0);
+ auto OutFlow = std::vector<uint64_t>(NumBlocks, 0);
+ for (const auto &Jump : Func.Jumps) {
+ InFlow[Jump.Target] += Jump.Flow;
+ OutFlow[Jump.Source] += Jump.Flow;
+ }
+
+ uint64_t TotalInFlow = 0;
+ uint64_t TotalOutFlow = 0;
+ for (uint64_t I = 0; I < NumBlocks; I++) {
+ auto &Block = Func.Blocks[I];
+ if (Block.isEntry()) {
+ TotalInFlow += Block.Flow;
+ assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");
+ } else if (Block.isExit()) {
+ TotalOutFlow += Block.Flow;
+ assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");
+ } else {
+ assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");
+ assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");
+ }
+ }
+ assert(TotalInFlow == TotalOutFlow && "incorrectly computed control flow");
+
+ // Verify that there are no isolated flow components
+ // One could modify FlowFunction to hold edges indexed by the sources, which
+ // will avoid a creation of the object
+ auto PositiveFlowEdges = std::vector<std::vector<uint64_t>>(NumBlocks);
+ for (const auto &Jump : Func.Jumps) {
+ if (Jump.Flow > 0) {
+ PositiveFlowEdges[Jump.Source].push_back(Jump.Target);
+ }
+ }
+
+ // Run BFS from the source along edges with positive flow
+ std::queue<uint64_t> Queue;
+ auto Visited = BitVector(NumBlocks, false);
+ Queue.push(Func.Entry);
+ Visited[Func.Entry] = true;
+ while (!Queue.empty()) {
+ uint64_t Src = Queue.front();
+ Queue.pop();
+ for (uint64_t Dst : PositiveFlowEdges[Src]) {
+ if (!Visited[Dst]) {
+ Queue.push(Dst);
+ Visited[Dst] = true;
+ }
+ }
+ }
+
+ // Verify that every block that has a positive flow is reached from the source
+ // along edges with a positive flow
+ for (uint64_t I = 0; I < NumBlocks; I++) {
+ auto &Block = Func.Blocks[I];
+ assert((Visited[I] || Block.Flow == 0) && "an isolated flow component");
+ }
+}
+#endif
+
+} // end of anonymous namespace
+
+/// Apply the profile inference algorithm for a given function
+void llvm::applyFlowInference(const ProfiParams &Params, FlowFunction &Func) {
+#ifndef NDEBUG
+ // Verify the input data
+ verifyInput(Func);
+#endif
+
+ // Create and apply an inference network model
+ auto InferenceNetwork = MinCostMaxFlow(Params);
+ initializeNetwork(Params, InferenceNetwork, Func);
+ InferenceNetwork.run();
+
+ // Extract flow values for every block and every edge
+ extractWeights(Params, InferenceNetwork, Func);
+
+ // Post-processing adjustments to the flow
+ auto Adjuster = FlowAdjuster(Params, Func);
+ Adjuster.run();
+
+#ifndef NDEBUG
+ // Verify the result
+ verifyOutput(Func);
+#endif
+}
+
+/// Apply the profile inference algorithm for a given flow function
+void llvm::applyFlowInference(FlowFunction &Func) {
+ ProfiParams Params;
+ // Set the params from the command-line flags.
+ Params.EvenFlowDistribution = SampleProfileEvenFlowDistribution;
+ Params.RebalanceUnknown = SampleProfileRebalanceUnknown;
+ Params.JoinIslands = SampleProfileJoinIslands;
+ Params.CostBlockInc = SampleProfileProfiCostBlockInc;
+ Params.CostBlockDec = SampleProfileProfiCostBlockDec;
+ Params.CostBlockEntryInc = SampleProfileProfiCostBlockEntryInc;
+ Params.CostBlockEntryDec = SampleProfileProfiCostBlockEntryDec;
+ Params.CostBlockZeroInc = SampleProfileProfiCostBlockZeroInc;
+ Params.CostBlockUnknownInc = SampleProfileProfiCostBlockUnknownInc;
+
+ applyFlowInference(Params, Func);
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp
new file mode 100644
index 0000000000..f7ae6ad844
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp
@@ -0,0 +1,185 @@
+//===- SampleProfileLoaderBaseUtil.cpp - Profile loader Util func ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SampleProfileLoader base utility functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+namespace llvm {
+
+cl::opt<unsigned> SampleProfileMaxPropagateIterations(
+ "sample-profile-max-propagate-iterations", cl::init(100),
+ cl::desc("Maximum number of iterations to go through when propagating "
+ "sample block/edge weights through the CFG."));
+
+cl::opt<unsigned> SampleProfileRecordCoverage(
+ "sample-profile-check-record-coverage", cl::init(0), cl::value_desc("N"),
+ cl::desc("Emit a warning if less than N% of records in the input profile "
+ "are matched to the IR."));
+
+cl::opt<unsigned> SampleProfileSampleCoverage(
+ "sample-profile-check-sample-coverage", cl::init(0), cl::value_desc("N"),
+ cl::desc("Emit a warning if less than N% of samples in the input profile "
+ "are matched to the IR."));
+
+cl::opt<bool> NoWarnSampleUnused(
+ "no-warn-sample-unused", cl::init(false), cl::Hidden,
+ cl::desc("Use this option to turn off/on warnings about function with "
+ "samples but without debug information to use those samples. "));
+
+cl::opt<bool> SampleProfileUseProfi(
+ "sample-profile-use-profi", cl::Hidden,
+ cl::desc("Use profi to infer block and edge counts."));
+
+namespace sampleprofutil {
+
+/// Return true if the given callsite is hot wrt to hot cutoff threshold.
+///
+/// Functions that were inlined in the original binary will be represented
+/// in the inline stack in the sample profile. If the profile shows that
+/// the original inline decision was "good" (i.e., the callsite is executed
+/// frequently), then we will recreate the inline decision and apply the
+/// profile from the inlined callsite.
+///
+/// To decide whether an inlined callsite is hot, we compare the callsite
+/// sample count with the hot cutoff computed by ProfileSummaryInfo, it is
+/// regarded as hot if the count is above the cutoff value.
+///
+/// When ProfileAccurateForSymsInList is enabled and profile symbol list
+/// is present, functions in the profile symbol list but without profile will
+/// be regarded as cold and much less inlining will happen in CGSCC inlining
+/// pass, so we tend to lower the hot criteria here to allow more early
+/// inlining to happen for warm callsites and it is helpful for performance.
+bool callsiteIsHot(const FunctionSamples *CallsiteFS, ProfileSummaryInfo *PSI,
+ bool ProfAccForSymsInList) {
+ if (!CallsiteFS)
+ return false; // The callsite was not inlined in the original binary.
+
+ assert(PSI && "PSI is expected to be non null");
+ uint64_t CallsiteTotalSamples = CallsiteFS->getTotalSamples();
+ if (ProfAccForSymsInList)
+ return !PSI->isColdCount(CallsiteTotalSamples);
+ else
+ return PSI->isHotCount(CallsiteTotalSamples);
+}
+
+/// Mark as used the sample record for the given function samples at
+/// (LineOffset, Discriminator).
+///
+/// \returns true if this is the first time we mark the given record.
+bool SampleCoverageTracker::markSamplesUsed(const FunctionSamples *FS,
+ uint32_t LineOffset,
+ uint32_t Discriminator,
+ uint64_t Samples) {
+ LineLocation Loc(LineOffset, Discriminator);
+ unsigned &Count = SampleCoverage[FS][Loc];
+ bool FirstTime = (++Count == 1);
+ if (FirstTime)
+ TotalUsedSamples += Samples;
+ return FirstTime;
+}
+
+/// Return the number of sample records that were applied from this profile.
+///
+/// This count does not include records from cold inlined callsites.
+unsigned
+SampleCoverageTracker::countUsedRecords(const FunctionSamples *FS,
+ ProfileSummaryInfo *PSI) const {
+ auto I = SampleCoverage.find(FS);
+
+ // The size of the coverage map for FS represents the number of records
+ // that were marked used at least once.
+ unsigned Count = (I != SampleCoverage.end()) ? I->second.size() : 0;
+
+ // If there are inlined callsites in this function, count the samples found
+ // in the respective bodies. However, do not bother counting callees with 0
+ // total samples, these are callees that were never invoked at runtime.
+ for (const auto &I : FS->getCallsiteSamples())
+ for (const auto &J : I.second) {
+ const FunctionSamples *CalleeSamples = &J.second;
+ if (callsiteIsHot(CalleeSamples, PSI, ProfAccForSymsInList))
+ Count += countUsedRecords(CalleeSamples, PSI);
+ }
+
+ return Count;
+}
+
+/// Return the number of sample records in the body of this profile.
+///
+/// This count does not include records from cold inlined callsites.
+unsigned
+SampleCoverageTracker::countBodyRecords(const FunctionSamples *FS,
+ ProfileSummaryInfo *PSI) const {
+ unsigned Count = FS->getBodySamples().size();
+
+ // Only count records in hot callsites.
+ for (const auto &I : FS->getCallsiteSamples())
+ for (const auto &J : I.second) {
+ const FunctionSamples *CalleeSamples = &J.second;
+ if (callsiteIsHot(CalleeSamples, PSI, ProfAccForSymsInList))
+ Count += countBodyRecords(CalleeSamples, PSI);
+ }
+
+ return Count;
+}
+
+/// Return the number of samples collected in the body of this profile.
+///
+/// This count does not include samples from cold inlined callsites.
+uint64_t
+SampleCoverageTracker::countBodySamples(const FunctionSamples *FS,
+ ProfileSummaryInfo *PSI) const {
+ uint64_t Total = 0;
+ for (const auto &I : FS->getBodySamples())
+ Total += I.second.getSamples();
+
+ // Only count samples in hot callsites.
+ for (const auto &I : FS->getCallsiteSamples())
+ for (const auto &J : I.second) {
+ const FunctionSamples *CalleeSamples = &J.second;
+ if (callsiteIsHot(CalleeSamples, PSI, ProfAccForSymsInList))
+ Total += countBodySamples(CalleeSamples, PSI);
+ }
+
+ return Total;
+}
+
+/// Return the fraction of sample records used in this profile.
+///
+/// The returned value is an unsigned integer in the range 0-100 indicating
+/// the percentage of sample records that were used while applying this
+/// profile to the associated function.
+unsigned SampleCoverageTracker::computeCoverage(unsigned Used,
+ unsigned Total) const {
+ assert(Used <= Total &&
+ "number of used records cannot exceed the total number of records");
+ return Total > 0 ? Used * 100 / Total : 100;
+}
+
+/// Create a global variable to flag FSDiscriminators are used.
+void createFSDiscriminatorVariable(Module *M) {
+ const char *FSDiscriminatorVar = "__llvm_fs_discriminator__";
+ if (M->getGlobalVariable(FSDiscriminatorVar))
+ return;
+
+ auto &Context = M->getContext();
+ // Place this variable to llvm.used so it won't be GC'ed.
+ appendToUsed(*M, {new GlobalVariable(*M, Type::getInt1Ty(Context), true,
+ GlobalValue::WeakODRLinkage,
+ ConstantInt::getTrue(Context),
+ FSDiscriminatorVar)});
+}
+
+} // end of namespace sampleprofutil
+} // end of namespace llvm
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/SanitizerStats.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/SanitizerStats.cpp
new file mode 100644
index 0000000000..fd21ee4cc4
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/SanitizerStats.cpp
@@ -0,0 +1,106 @@
+//===- SanitizerStats.cpp - Sanitizer statistics gathering ----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements code generation for sanitizer statistics gathering.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/SanitizerStats.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+using namespace llvm;
+
+SanitizerStatReport::SanitizerStatReport(Module *M) : M(M) {
+ StatTy = ArrayType::get(Type::getInt8PtrTy(M->getContext()), 2);
+ EmptyModuleStatsTy = makeModuleStatsTy();
+
+ ModuleStatsGV = new GlobalVariable(*M, EmptyModuleStatsTy, false,
+ GlobalValue::InternalLinkage, nullptr);
+}
+
+ArrayType *SanitizerStatReport::makeModuleStatsArrayTy() {
+ return ArrayType::get(StatTy, Inits.size());
+}
+
+StructType *SanitizerStatReport::makeModuleStatsTy() {
+ return StructType::get(M->getContext(), {Type::getInt8PtrTy(M->getContext()),
+ Type::getInt32Ty(M->getContext()),
+ makeModuleStatsArrayTy()});
+}
+
+void SanitizerStatReport::create(IRBuilder<> &B, SanitizerStatKind SK) {
+ Function *F = B.GetInsertBlock()->getParent();
+ Module *M = F->getParent();
+ PointerType *Int8PtrTy = B.getInt8PtrTy();
+ IntegerType *IntPtrTy = B.getIntPtrTy(M->getDataLayout());
+ ArrayType *StatTy = ArrayType::get(Int8PtrTy, 2);
+
+ Inits.push_back(ConstantArray::get(
+ StatTy,
+ {Constant::getNullValue(Int8PtrTy),
+ ConstantExpr::getIntToPtr(
+ ConstantInt::get(IntPtrTy, uint64_t(SK) << (IntPtrTy->getBitWidth() -
+ kSanitizerStatKindBits)),
+ Int8PtrTy)}));
+
+ FunctionType *StatReportTy =
+ FunctionType::get(B.getVoidTy(), Int8PtrTy, false);
+ FunctionCallee StatReport =
+ M->getOrInsertFunction("__sanitizer_stat_report", StatReportTy);
+
+ auto InitAddr = ConstantExpr::getGetElementPtr(
+ EmptyModuleStatsTy, ModuleStatsGV,
+ ArrayRef<Constant *>{
+ ConstantInt::get(IntPtrTy, 0), ConstantInt::get(B.getInt32Ty(), 2),
+ ConstantInt::get(IntPtrTy, Inits.size() - 1),
+ });
+ B.CreateCall(StatReport, ConstantExpr::getBitCast(InitAddr, Int8PtrTy));
+}
+
+void SanitizerStatReport::finish() {
+ if (Inits.empty()) {
+ ModuleStatsGV->eraseFromParent();
+ return;
+ }
+
+ PointerType *Int8PtrTy = Type::getInt8PtrTy(M->getContext());
+ IntegerType *Int32Ty = Type::getInt32Ty(M->getContext());
+ Type *VoidTy = Type::getVoidTy(M->getContext());
+
+ // Create a new ModuleStatsGV to replace the old one. We can't just set the
+ // old one's initializer because its type is different.
+ auto NewModuleStatsGV = new GlobalVariable(
+ *M, makeModuleStatsTy(), false, GlobalValue::InternalLinkage,
+ ConstantStruct::getAnon(
+ {Constant::getNullValue(Int8PtrTy),
+ ConstantInt::get(Int32Ty, Inits.size()),
+ ConstantArray::get(makeModuleStatsArrayTy(), Inits)}));
+ ModuleStatsGV->replaceAllUsesWith(
+ ConstantExpr::getBitCast(NewModuleStatsGV, ModuleStatsGV->getType()));
+ ModuleStatsGV->eraseFromParent();
+
+ // Create a global constructor to register NewModuleStatsGV.
+ auto F = Function::Create(FunctionType::get(VoidTy, false),
+ GlobalValue::InternalLinkage, "", M);
+ auto BB = BasicBlock::Create(M->getContext(), "", F);
+ IRBuilder<> B(BB);
+
+ FunctionType *StatInitTy = FunctionType::get(VoidTy, Int8PtrTy, false);
+ FunctionCallee StatInit =
+ M->getOrInsertFunction("__sanitizer_stat_init", StatInitTy);
+
+ B.CreateCall(StatInit, ConstantExpr::getBitCast(NewModuleStatsGV, Int8PtrTy));
+ B.CreateRetVoid();
+
+ appendToGlobalCtors(*M, F, 0);
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
new file mode 100644
index 0000000000..24f1966edd
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -0,0 +1,2678 @@
+//===- ScalarEvolutionExpander.cpp - Scalar Evolution Analysis ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of the scalar evolution expander,
+// which is used to generate the code corresponding to a given scalar evolution
+// expression.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+
+#ifdef LLVM_ENABLE_ABI_BREAKING_CHECKS
+#define SCEV_DEBUG_WITH_TYPE(TYPE, X) DEBUG_WITH_TYPE(TYPE, X)
+#else
+#define SCEV_DEBUG_WITH_TYPE(TYPE, X)
+#endif
+
+using namespace llvm;
+
+cl::opt<unsigned> llvm::SCEVCheapExpansionBudget(
+ "scev-cheap-expansion-budget", cl::Hidden, cl::init(4),
+ cl::desc("When performing SCEV expansion only if it is cheap to do, this "
+ "controls the budget that is considered cheap (default = 4)"));
+
+using namespace PatternMatch;
+
+/// ReuseOrCreateCast - Arrange for there to be a cast of V to Ty at IP,
+/// reusing an existing cast if a suitable one (= dominating IP) exists, or
+/// creating a new one.
+Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty,
+ Instruction::CastOps Op,
+ BasicBlock::iterator IP) {
+ // This function must be called with the builder having a valid insertion
+ // point. It doesn't need to be the actual IP where the uses of the returned
+ // cast will be added, but it must dominate such IP.
+ // We use this precondition to produce a cast that will dominate all its
+ // uses. In particular, this is crucial for the case where the builder's
+ // insertion point *is* the point where we were asked to put the cast.
+ // Since we don't know the builder's insertion point is actually
+ // where the uses will be added (only that it dominates it), we are
+ // not allowed to move it.
+ BasicBlock::iterator BIP = Builder.GetInsertPoint();
+
+ Value *Ret = nullptr;
+
+ // Check to see if there is already a cast!
+ for (User *U : V->users()) {
+ if (U->getType() != Ty)
+ continue;
+ CastInst *CI = dyn_cast<CastInst>(U);
+ if (!CI || CI->getOpcode() != Op)
+ continue;
+
+ // Found a suitable cast that is at IP or comes before IP. Use it. Note that
+ // the cast must also properly dominate the Builder's insertion point.
+ if (IP->getParent() == CI->getParent() && &*BIP != CI &&
+ (&*IP == CI || CI->comesBefore(&*IP))) {
+ Ret = CI;
+ break;
+ }
+ }
+
+ // Create a new cast.
+ if (!Ret) {
+ SCEVInsertPointGuard Guard(Builder, this);
+ Builder.SetInsertPoint(&*IP);
+ Ret = Builder.CreateCast(Op, V, Ty, V->getName());
+ }
+
+ // We assert at the end of the function since IP might point to an
+ // instruction with different dominance properties than a cast
+ // (an invoke for example) and not dominate BIP (but the cast does).
+ assert(!isa<Instruction>(Ret) ||
+ SE.DT.dominates(cast<Instruction>(Ret), &*BIP));
+
+ return Ret;
+}
+
+BasicBlock::iterator
+SCEVExpander::findInsertPointAfter(Instruction *I,
+ Instruction *MustDominate) const {
+ BasicBlock::iterator IP = ++I->getIterator();
+ if (auto *II = dyn_cast<InvokeInst>(I))
+ IP = II->getNormalDest()->begin();
+
+ while (isa<PHINode>(IP))
+ ++IP;
+
+ if (isa<FuncletPadInst>(IP) || isa<LandingPadInst>(IP)) {
+ ++IP;
+ } else if (isa<CatchSwitchInst>(IP)) {
+ IP = MustDominate->getParent()->getFirstInsertionPt();
+ } else {
+ assert(!IP->isEHPad() && "unexpected eh pad!");
+ }
+
+ // Adjust insert point to be after instructions inserted by the expander, so
+ // we can re-use already inserted instructions. Avoid skipping past the
+ // original \p MustDominate, in case it is an inserted instruction.
+ while (isInsertedInstruction(&*IP) && &*IP != MustDominate)
+ ++IP;
+
+ return IP;
+}
+
+BasicBlock::iterator
+SCEVExpander::GetOptimalInsertionPointForCastOf(Value *V) const {
+ // Cast the argument at the beginning of the entry block, after
+ // any bitcasts of other arguments.
+ if (Argument *A = dyn_cast<Argument>(V)) {
+ BasicBlock::iterator IP = A->getParent()->getEntryBlock().begin();
+ while ((isa<BitCastInst>(IP) &&
+ isa<Argument>(cast<BitCastInst>(IP)->getOperand(0)) &&
+ cast<BitCastInst>(IP)->getOperand(0) != A) ||
+ isa<DbgInfoIntrinsic>(IP))
+ ++IP;
+ return IP;
+ }
+
+ // Cast the instruction immediately after the instruction.
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ return findInsertPointAfter(I, &*Builder.GetInsertPoint());
+
+ // Otherwise, this must be some kind of a constant,
+ // so let's plop this cast into the function's entry block.
+ assert(isa<Constant>(V) &&
+ "Expected the cast argument to be a global/constant");
+ return Builder.GetInsertBlock()
+ ->getParent()
+ ->getEntryBlock()
+ .getFirstInsertionPt();
+}
+
+/// InsertNoopCastOfTo - Insert a cast of V to the specified type,
+/// which must be possible with a noop cast, doing what we can to share
+/// the casts.
+Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) {
+ Instruction::CastOps Op = CastInst::getCastOpcode(V, false, Ty, false);
+ assert((Op == Instruction::BitCast ||
+ Op == Instruction::PtrToInt ||
+ Op == Instruction::IntToPtr) &&
+ "InsertNoopCastOfTo cannot perform non-noop casts!");
+ assert(SE.getTypeSizeInBits(V->getType()) == SE.getTypeSizeInBits(Ty) &&
+ "InsertNoopCastOfTo cannot change sizes!");
+
+ // inttoptr only works for integral pointers. For non-integral pointers, we
+ // can create a GEP on i8* null with the integral value as index. Note that
+ // it is safe to use GEP of null instead of inttoptr here, because only
+ // expressions already based on a GEP of null should be converted to pointers
+ // during expansion.
+ if (Op == Instruction::IntToPtr) {
+ auto *PtrTy = cast<PointerType>(Ty);
+ if (DL.isNonIntegralPointerType(PtrTy)) {
+ auto *Int8PtrTy = Builder.getInt8PtrTy(PtrTy->getAddressSpace());
+ assert(DL.getTypeAllocSize(Builder.getInt8Ty()) == 1 &&
+ "alloc size of i8 must by 1 byte for the GEP to be correct");
+ auto *GEP = Builder.CreateGEP(
+ Builder.getInt8Ty(), Constant::getNullValue(Int8PtrTy), V, "uglygep");
+ return Builder.CreateBitCast(GEP, Ty);
+ }
+ }
+ // Short-circuit unnecessary bitcasts.
+ if (Op == Instruction::BitCast) {
+ if (V->getType() == Ty)
+ return V;
+ if (CastInst *CI = dyn_cast<CastInst>(V)) {
+ if (CI->getOperand(0)->getType() == Ty)
+ return CI->getOperand(0);
+ }
+ }
+ // Short-circuit unnecessary inttoptr<->ptrtoint casts.
+ if ((Op == Instruction::PtrToInt || Op == Instruction::IntToPtr) &&
+ SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(V->getType())) {
+ if (CastInst *CI = dyn_cast<CastInst>(V))
+ if ((CI->getOpcode() == Instruction::PtrToInt ||
+ CI->getOpcode() == Instruction::IntToPtr) &&
+ SE.getTypeSizeInBits(CI->getType()) ==
+ SE.getTypeSizeInBits(CI->getOperand(0)->getType()))
+ return CI->getOperand(0);
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+ if ((CE->getOpcode() == Instruction::PtrToInt ||
+ CE->getOpcode() == Instruction::IntToPtr) &&
+ SE.getTypeSizeInBits(CE->getType()) ==
+ SE.getTypeSizeInBits(CE->getOperand(0)->getType()))
+ return CE->getOperand(0);
+ }
+
+ // Fold a cast of a constant.
+ if (Constant *C = dyn_cast<Constant>(V))
+ return ConstantExpr::getCast(Op, C, Ty);
+
+ // Try to reuse existing cast, or insert one.
+ return ReuseOrCreateCast(V, Ty, Op, GetOptimalInsertionPointForCastOf(V));
+}
+
+/// InsertBinop - Insert the specified binary operator, doing a small amount
+/// of work to avoid inserting an obviously redundant operation, and hoisting
+/// to an outer loop when the opportunity is there and it is safe.
+Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
+ Value *LHS, Value *RHS,
+ SCEV::NoWrapFlags Flags, bool IsSafeToHoist) {
+ // Fold a binop with constant operands.
+ if (Constant *CLHS = dyn_cast<Constant>(LHS))
+ if (Constant *CRHS = dyn_cast<Constant>(RHS))
+ if (Constant *Res = ConstantFoldBinaryOpOperands(Opcode, CLHS, CRHS, DL))
+ return Res;
+
+ // Do a quick scan to see if we have this binop nearby. If so, reuse it.
+ unsigned ScanLimit = 6;
+ BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin();
+ // Scanning starts from the last instruction before the insertion point.
+ BasicBlock::iterator IP = Builder.GetInsertPoint();
+ if (IP != BlockBegin) {
+ --IP;
+ for (; ScanLimit; --IP, --ScanLimit) {
+ // Don't count dbg.value against the ScanLimit, to avoid perturbing the
+ // generated code.
+ if (isa<DbgInfoIntrinsic>(IP))
+ ScanLimit++;
+
+ auto canGenerateIncompatiblePoison = [&Flags](Instruction *I) {
+ // Ensure that no-wrap flags match.
+ if (isa<OverflowingBinaryOperator>(I)) {
+ if (I->hasNoSignedWrap() != (Flags & SCEV::FlagNSW))
+ return true;
+ if (I->hasNoUnsignedWrap() != (Flags & SCEV::FlagNUW))
+ return true;
+ }
+ // Conservatively, do not use any instruction which has any of exact
+ // flags installed.
+ if (isa<PossiblyExactOperator>(I) && I->isExact())
+ return true;
+ return false;
+ };
+ if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(0) == LHS &&
+ IP->getOperand(1) == RHS && !canGenerateIncompatiblePoison(&*IP))
+ return &*IP;
+ if (IP == BlockBegin) break;
+ }
+ }
+
+ // Save the original insertion point so we can restore it when we're done.
+ DebugLoc Loc = Builder.GetInsertPoint()->getDebugLoc();
+ SCEVInsertPointGuard Guard(Builder, this);
+
+ if (IsSafeToHoist) {
+ // Move the insertion point out of as many loops as we can.
+ while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) {
+ if (!L->isLoopInvariant(LHS) || !L->isLoopInvariant(RHS)) break;
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader) break;
+
+ // Ok, move up a level.
+ Builder.SetInsertPoint(Preheader->getTerminator());
+ }
+ }
+
+ // If we haven't found this binop, insert it.
+ // TODO: Use the Builder, which will make CreateBinOp below fold with
+ // InstSimplifyFolder.
+ Instruction *BO = Builder.Insert(BinaryOperator::Create(Opcode, LHS, RHS));
+ BO->setDebugLoc(Loc);
+ if (Flags & SCEV::FlagNUW)
+ BO->setHasNoUnsignedWrap();
+ if (Flags & SCEV::FlagNSW)
+ BO->setHasNoSignedWrap();
+
+ return BO;
+}
+
+/// FactorOutConstant - Test if S is divisible by Factor, using signed
+/// division. If so, update S with Factor divided out and return true.
+/// S need not be evenly divisible if a reasonable remainder can be
+/// computed.
+static bool FactorOutConstant(const SCEV *&S, const SCEV *&Remainder,
+ const SCEV *Factor, ScalarEvolution &SE,
+ const DataLayout &DL) {
+ // Everything is divisible by one.
+ if (Factor->isOne())
+ return true;
+
+ // x/x == 1.
+ if (S == Factor) {
+ S = SE.getConstant(S->getType(), 1);
+ return true;
+ }
+
+ // For a Constant, check for a multiple of the given factor.
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
+ // 0/x == 0.
+ if (C->isZero())
+ return true;
+ // Check for divisibility.
+ if (const SCEVConstant *FC = dyn_cast<SCEVConstant>(Factor)) {
+ ConstantInt *CI =
+ ConstantInt::get(SE.getContext(), C->getAPInt().sdiv(FC->getAPInt()));
+ // If the quotient is zero and the remainder is non-zero, reject
+ // the value at this scale. It will be considered for subsequent
+ // smaller scales.
+ if (!CI->isZero()) {
+ const SCEV *Div = SE.getConstant(CI);
+ S = Div;
+ Remainder = SE.getAddExpr(
+ Remainder, SE.getConstant(C->getAPInt().srem(FC->getAPInt())));
+ return true;
+ }
+ }
+ }
+
+ // In a Mul, check if there is a constant operand which is a multiple
+ // of the given factor.
+ if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) {
+ // Size is known, check if there is a constant operand which is a multiple
+ // of the given factor. If so, we can factor it.
+ if (const SCEVConstant *FC = dyn_cast<SCEVConstant>(Factor))
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0)))
+ if (!C->getAPInt().srem(FC->getAPInt())) {
+ SmallVector<const SCEV *, 4> NewMulOps(M->operands());
+ NewMulOps[0] = SE.getConstant(C->getAPInt().sdiv(FC->getAPInt()));
+ S = SE.getMulExpr(NewMulOps);
+ return true;
+ }
+ }
+
+ // In an AddRec, check if both start and step are divisible.
+ if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) {
+ const SCEV *Step = A->getStepRecurrence(SE);
+ const SCEV *StepRem = SE.getConstant(Step->getType(), 0);
+ if (!FactorOutConstant(Step, StepRem, Factor, SE, DL))
+ return false;
+ if (!StepRem->isZero())
+ return false;
+ const SCEV *Start = A->getStart();
+ if (!FactorOutConstant(Start, Remainder, Factor, SE, DL))
+ return false;
+ S = SE.getAddRecExpr(Start, Step, A->getLoop(),
+ A->getNoWrapFlags(SCEV::FlagNW));
+ return true;
+ }
+
+ return false;
+}
+
+/// SimplifyAddOperands - Sort and simplify a list of add operands. NumAddRecs
+/// is the number of SCEVAddRecExprs present, which are kept at the end of
+/// the list.
+///
+static void SimplifyAddOperands(SmallVectorImpl<const SCEV *> &Ops,
+ Type *Ty,
+ ScalarEvolution &SE) {
+ unsigned NumAddRecs = 0;
+ for (unsigned i = Ops.size(); i > 0 && isa<SCEVAddRecExpr>(Ops[i-1]); --i)
+ ++NumAddRecs;
+ // Group Ops into non-addrecs and addrecs.
+ SmallVector<const SCEV *, 8> NoAddRecs(Ops.begin(), Ops.end() - NumAddRecs);
+ SmallVector<const SCEV *, 8> AddRecs(Ops.end() - NumAddRecs, Ops.end());
+ // Let ScalarEvolution sort and simplify the non-addrecs list.
+ const SCEV *Sum = NoAddRecs.empty() ?
+ SE.getConstant(Ty, 0) :
+ SE.getAddExpr(NoAddRecs);
+ // If it returned an add, use the operands. Otherwise it simplified
+ // the sum into a single value, so just use that.
+ Ops.clear();
+ if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Sum))
+ append_range(Ops, Add->operands());
+ else if (!Sum->isZero())
+ Ops.push_back(Sum);
+ // Then append the addrecs.
+ Ops.append(AddRecs.begin(), AddRecs.end());
+}
+
+/// SplitAddRecs - Flatten a list of add operands, moving addrec start values
+/// out to the top level. For example, convert {a + b,+,c} to a, b, {0,+,d}.
+/// This helps expose more opportunities for folding parts of the expressions
+/// into GEP indices.
+///
+static void SplitAddRecs(SmallVectorImpl<const SCEV *> &Ops,
+ Type *Ty,
+ ScalarEvolution &SE) {
+ // Find the addrecs.
+ SmallVector<const SCEV *, 8> AddRecs;
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+ while (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Ops[i])) {
+ const SCEV *Start = A->getStart();
+ if (Start->isZero()) break;
+ const SCEV *Zero = SE.getConstant(Ty, 0);
+ AddRecs.push_back(SE.getAddRecExpr(Zero,
+ A->getStepRecurrence(SE),
+ A->getLoop(),
+ A->getNoWrapFlags(SCEV::FlagNW)));
+ if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Start)) {
+ Ops[i] = Zero;
+ append_range(Ops, Add->operands());
+ e += Add->getNumOperands();
+ } else {
+ Ops[i] = Start;
+ }
+ }
+ if (!AddRecs.empty()) {
+ // Add the addrecs onto the end of the list.
+ Ops.append(AddRecs.begin(), AddRecs.end());
+ // Resort the operand list, moving any constants to the front.
+ SimplifyAddOperands(Ops, Ty, SE);
+ }
+}
+
+/// expandAddToGEP - Expand an addition expression with a pointer type into
+/// a GEP instead of using ptrtoint+arithmetic+inttoptr. This helps
+/// BasicAliasAnalysis and other passes analyze the result. See the rules
+/// for getelementptr vs. inttoptr in
+/// http://llvm.org/docs/LangRef.html#pointeraliasing
+/// for details.
+///
+/// Design note: The correctness of using getelementptr here depends on
+/// ScalarEvolution not recognizing inttoptr and ptrtoint operators, as
+/// they may introduce pointer arithmetic which may not be safely converted
+/// into getelementptr.
+///
+/// Design note: It might seem desirable for this function to be more
+/// loop-aware. If some of the indices are loop-invariant while others
+/// aren't, it might seem desirable to emit multiple GEPs, keeping the
+/// loop-invariant portions of the overall computation outside the loop.
+/// However, there are a few reasons this is not done here. Hoisting simple
+/// arithmetic is a low-level optimization that often isn't very
+/// important until late in the optimization process. In fact, passes
+/// like InstructionCombining will combine GEPs, even if it means
+/// pushing loop-invariant computation down into loops, so even if the
+/// GEPs were split here, the work would quickly be undone. The
+/// LoopStrengthReduction pass, which is usually run quite late (and
+/// after the last InstructionCombining pass), takes care of hoisting
+/// loop-invariant portions of expressions, after considering what
+/// can be folded using target addressing modes.
+///
+Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
+ const SCEV *const *op_end,
+ PointerType *PTy,
+ Type *Ty,
+ Value *V) {
+ SmallVector<Value *, 4> GepIndices;
+ SmallVector<const SCEV *, 8> Ops(op_begin, op_end);
+ bool AnyNonZeroIndices = false;
+
+ // Split AddRecs up into parts as either of the parts may be usable
+ // without the other.
+ SplitAddRecs(Ops, Ty, SE);
+
+ Type *IntIdxTy = DL.getIndexType(PTy);
+
+ // For opaque pointers, always generate i8 GEP.
+ if (!PTy->isOpaque()) {
+ // Descend down the pointer's type and attempt to convert the other
+ // operands into GEP indices, at each level. The first index in a GEP
+ // indexes into the array implied by the pointer operand; the rest of
+ // the indices index into the element or field type selected by the
+ // preceding index.
+ Type *ElTy = PTy->getNonOpaquePointerElementType();
+ for (;;) {
+ // If the scale size is not 0, attempt to factor out a scale for
+ // array indexing.
+ SmallVector<const SCEV *, 8> ScaledOps;
+ if (ElTy->isSized()) {
+ const SCEV *ElSize = SE.getSizeOfExpr(IntIdxTy, ElTy);
+ if (!ElSize->isZero()) {
+ SmallVector<const SCEV *, 8> NewOps;
+ for (const SCEV *Op : Ops) {
+ const SCEV *Remainder = SE.getConstant(Ty, 0);
+ if (FactorOutConstant(Op, Remainder, ElSize, SE, DL)) {
+ // Op now has ElSize factored out.
+ ScaledOps.push_back(Op);
+ if (!Remainder->isZero())
+ NewOps.push_back(Remainder);
+ AnyNonZeroIndices = true;
+ } else {
+ // The operand was not divisible, so add it to the list of
+ // operands we'll scan next iteration.
+ NewOps.push_back(Op);
+ }
+ }
+ // If we made any changes, update Ops.
+ if (!ScaledOps.empty()) {
+ Ops = NewOps;
+ SimplifyAddOperands(Ops, Ty, SE);
+ }
+ }
+ }
+
+ // Record the scaled array index for this level of the type. If
+ // we didn't find any operands that could be factored, tentatively
+ // assume that element zero was selected (since the zero offset
+ // would obviously be folded away).
+ Value *Scaled =
+ ScaledOps.empty()
+ ? Constant::getNullValue(Ty)
+ : expandCodeForImpl(SE.getAddExpr(ScaledOps), Ty);
+ GepIndices.push_back(Scaled);
+
+ // Collect struct field index operands.
+ while (StructType *STy = dyn_cast<StructType>(ElTy)) {
+ bool FoundFieldNo = false;
+ // An empty struct has no fields.
+ if (STy->getNumElements() == 0) break;
+ // Field offsets are known. See if a constant offset falls within any of
+ // the struct fields.
+ if (Ops.empty())
+ break;
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[0]))
+ if (SE.getTypeSizeInBits(C->getType()) <= 64) {
+ const StructLayout &SL = *DL.getStructLayout(STy);
+ uint64_t FullOffset = C->getValue()->getZExtValue();
+ if (FullOffset < SL.getSizeInBytes()) {
+ unsigned ElIdx = SL.getElementContainingOffset(FullOffset);
+ GepIndices.push_back(
+ ConstantInt::get(Type::getInt32Ty(Ty->getContext()), ElIdx));
+ ElTy = STy->getTypeAtIndex(ElIdx);
+ Ops[0] =
+ SE.getConstant(Ty, FullOffset - SL.getElementOffset(ElIdx));
+ AnyNonZeroIndices = true;
+ FoundFieldNo = true;
+ }
+ }
+ // If no struct field offsets were found, tentatively assume that
+ // field zero was selected (since the zero offset would obviously
+ // be folded away).
+ if (!FoundFieldNo) {
+ ElTy = STy->getTypeAtIndex(0u);
+ GepIndices.push_back(
+ Constant::getNullValue(Type::getInt32Ty(Ty->getContext())));
+ }
+ }
+
+ if (ArrayType *ATy = dyn_cast<ArrayType>(ElTy))
+ ElTy = ATy->getElementType();
+ else
+ // FIXME: Handle VectorType.
+ // E.g., If ElTy is scalable vector, then ElSize is not a compile-time
+ // constant, therefore can not be factored out. The generated IR is less
+ // ideal with base 'V' cast to i8* and do ugly getelementptr over that.
+ break;
+ }
+ }
+
+ // If none of the operands were convertible to proper GEP indices, cast
+ // the base to i8* and do an ugly getelementptr with that. It's still
+ // better than ptrtoint+arithmetic+inttoptr at least.
+ if (!AnyNonZeroIndices) {
+ // Cast the base to i8*.
+ if (!PTy->isOpaque())
+ V = InsertNoopCastOfTo(V,
+ Type::getInt8PtrTy(Ty->getContext(), PTy->getAddressSpace()));
+
+ assert(!isa<Instruction>(V) ||
+ SE.DT.dominates(cast<Instruction>(V), &*Builder.GetInsertPoint()));
+
+ // Expand the operands for a plain byte offset.
+ Value *Idx = expandCodeForImpl(SE.getAddExpr(Ops), Ty);
+
+ // Fold a GEP with constant operands.
+ if (Constant *CLHS = dyn_cast<Constant>(V))
+ if (Constant *CRHS = dyn_cast<Constant>(Idx))
+ return Builder.CreateGEP(Builder.getInt8Ty(), CLHS, CRHS);
+
+ // Do a quick scan to see if we have this GEP nearby. If so, reuse it.
+ unsigned ScanLimit = 6;
+ BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin();
+ // Scanning starts from the last instruction before the insertion point.
+ BasicBlock::iterator IP = Builder.GetInsertPoint();
+ if (IP != BlockBegin) {
+ --IP;
+ for (; ScanLimit; --IP, --ScanLimit) {
+ // Don't count dbg.value against the ScanLimit, to avoid perturbing the
+ // generated code.
+ if (isa<DbgInfoIntrinsic>(IP))
+ ScanLimit++;
+ if (IP->getOpcode() == Instruction::GetElementPtr &&
+ IP->getOperand(0) == V && IP->getOperand(1) == Idx &&
+ cast<GEPOperator>(&*IP)->getSourceElementType() ==
+ Type::getInt8Ty(Ty->getContext()))
+ return &*IP;
+ if (IP == BlockBegin) break;
+ }
+ }
+
+ // Save the original insertion point so we can restore it when we're done.
+ SCEVInsertPointGuard Guard(Builder, this);
+
+ // Move the insertion point out of as many loops as we can.
+ while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) {
+ if (!L->isLoopInvariant(V) || !L->isLoopInvariant(Idx)) break;
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader) break;
+
+ // Ok, move up a level.
+ Builder.SetInsertPoint(Preheader->getTerminator());
+ }
+
+ // Emit a GEP.
+ return Builder.CreateGEP(Builder.getInt8Ty(), V, Idx, "uglygep");
+ }
+
+ {
+ SCEVInsertPointGuard Guard(Builder, this);
+
+ // Move the insertion point out of as many loops as we can.
+ while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) {
+ if (!L->isLoopInvariant(V)) break;
+
+ bool AnyIndexNotLoopInvariant = any_of(
+ GepIndices, [L](Value *Op) { return !L->isLoopInvariant(Op); });
+
+ if (AnyIndexNotLoopInvariant)
+ break;
+
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader) break;
+
+ // Ok, move up a level.
+ Builder.SetInsertPoint(Preheader->getTerminator());
+ }
+
+ // Insert a pretty getelementptr. Note that this GEP is not marked inbounds,
+ // because ScalarEvolution may have changed the address arithmetic to
+ // compute a value which is beyond the end of the allocated object.
+ Value *Casted = V;
+ if (V->getType() != PTy)
+ Casted = InsertNoopCastOfTo(Casted, PTy);
+ Value *GEP = Builder.CreateGEP(PTy->getNonOpaquePointerElementType(),
+ Casted, GepIndices, "scevgep");
+ Ops.push_back(SE.getUnknown(GEP));
+ }
+
+ return expand(SE.getAddExpr(Ops));
+}
+
+Value *SCEVExpander::expandAddToGEP(const SCEV *Op, PointerType *PTy, Type *Ty,
+ Value *V) {
+ const SCEV *const Ops[1] = {Op};
+ return expandAddToGEP(Ops, Ops + 1, PTy, Ty, V);
+}
+
+/// PickMostRelevantLoop - Given two loops pick the one that's most relevant for
+/// SCEV expansion. If they are nested, this is the most nested. If they are
+/// neighboring, pick the later.
+static const Loop *PickMostRelevantLoop(const Loop *A, const Loop *B,
+ DominatorTree &DT) {
+ if (!A) return B;
+ if (!B) return A;
+ if (A->contains(B)) return B;
+ if (B->contains(A)) return A;
+ if (DT.dominates(A->getHeader(), B->getHeader())) return B;
+ if (DT.dominates(B->getHeader(), A->getHeader())) return A;
+ return A; // Arbitrarily break the tie.
+}
+
+/// getRelevantLoop - Get the most relevant loop associated with the given
+/// expression, according to PickMostRelevantLoop.
+const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) {
+ // Test whether we've already computed the most relevant loop for this SCEV.
+ auto Pair = RelevantLoops.insert(std::make_pair(S, nullptr));
+ if (!Pair.second)
+ return Pair.first->second;
+
+ switch (S->getSCEVType()) {
+ case scConstant:
+ return nullptr; // A constant has no relevant loops.
+ case scTruncate:
+ case scZeroExtend:
+ case scSignExtend:
+ case scPtrToInt:
+ case scAddExpr:
+ case scMulExpr:
+ case scUDivExpr:
+ case scAddRecExpr:
+ case scUMaxExpr:
+ case scSMaxExpr:
+ case scUMinExpr:
+ case scSMinExpr:
+ case scSequentialUMinExpr: {
+ const Loop *L = nullptr;
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
+ L = AR->getLoop();
+ for (const SCEV *Op : S->operands())
+ L = PickMostRelevantLoop(L, getRelevantLoop(Op), SE.DT);
+ return RelevantLoops[S] = L;
+ }
+ case scUnknown: {
+ const SCEVUnknown *U = cast<SCEVUnknown>(S);
+ if (const Instruction *I = dyn_cast<Instruction>(U->getValue()))
+ return Pair.first->second = SE.LI.getLoopFor(I->getParent());
+ // A non-instruction has no relevant loops.
+ return nullptr;
+ }
+ case scCouldNotCompute:
+ llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
+ }
+ llvm_unreachable("Unexpected SCEV type!");
+}
+
+namespace {
+
+/// LoopCompare - Compare loops by PickMostRelevantLoop.
+class LoopCompare {
+ DominatorTree &DT;
+public:
+ explicit LoopCompare(DominatorTree &dt) : DT(dt) {}
+
+ bool operator()(std::pair<const Loop *, const SCEV *> LHS,
+ std::pair<const Loop *, const SCEV *> RHS) const {
+ // Keep pointer operands sorted at the end.
+ if (LHS.second->getType()->isPointerTy() !=
+ RHS.second->getType()->isPointerTy())
+ return LHS.second->getType()->isPointerTy();
+
+ // Compare loops with PickMostRelevantLoop.
+ if (LHS.first != RHS.first)
+ return PickMostRelevantLoop(LHS.first, RHS.first, DT) != LHS.first;
+
+ // If one operand is a non-constant negative and the other is not,
+ // put the non-constant negative on the right so that a sub can
+ // be used instead of a negate and add.
+ if (LHS.second->isNonConstantNegative()) {
+ if (!RHS.second->isNonConstantNegative())
+ return false;
+ } else if (RHS.second->isNonConstantNegative())
+ return true;
+
+ // Otherwise they are equivalent according to this comparison.
+ return false;
+ }
+};
+
+}
+
+Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
+ Type *Ty = SE.getEffectiveSCEVType(S->getType());
+
+ // Collect all the add operands in a loop, along with their associated loops.
+ // Iterate in reverse so that constants are emitted last, all else equal, and
+ // so that pointer operands are inserted first, which the code below relies on
+ // to form more involved GEPs.
+ SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops;
+ for (const SCEV *Op : reverse(S->operands()))
+ OpsAndLoops.push_back(std::make_pair(getRelevantLoop(Op), Op));
+
+ // Sort by loop. Use a stable sort so that constants follow non-constants and
+ // pointer operands precede non-pointer operands.
+ llvm::stable_sort(OpsAndLoops, LoopCompare(SE.DT));
+
+ // Emit instructions to add all the operands. Hoist as much as possible
+ // out of loops, and form meaningful getelementptrs where possible.
+ Value *Sum = nullptr;
+ for (auto I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E;) {
+ const Loop *CurLoop = I->first;
+ const SCEV *Op = I->second;
+ if (!Sum) {
+ // This is the first operand. Just expand it.
+ Sum = expand(Op);
+ ++I;
+ continue;
+ }
+
+ assert(!Op->getType()->isPointerTy() && "Only first op can be pointer");
+ if (PointerType *PTy = dyn_cast<PointerType>(Sum->getType())) {
+ // The running sum expression is a pointer. Try to form a getelementptr
+ // at this level with that as the base.
+ SmallVector<const SCEV *, 4> NewOps;
+ for (; I != E && I->first == CurLoop; ++I) {
+ // If the operand is SCEVUnknown and not instructions, peek through
+ // it, to enable more of it to be folded into the GEP.
+ const SCEV *X = I->second;
+ if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(X))
+ if (!isa<Instruction>(U->getValue()))
+ X = SE.getSCEV(U->getValue());
+ NewOps.push_back(X);
+ }
+ Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, Sum);
+ } else if (Op->isNonConstantNegative()) {
+ // Instead of doing a negate and add, just do a subtract.
+ Value *W = expandCodeForImpl(SE.getNegativeSCEV(Op), Ty);
+ Sum = InsertNoopCastOfTo(Sum, Ty);
+ Sum = InsertBinop(Instruction::Sub, Sum, W, SCEV::FlagAnyWrap,
+ /*IsSafeToHoist*/ true);
+ ++I;
+ } else {
+ // A simple add.
+ Value *W = expandCodeForImpl(Op, Ty);
+ Sum = InsertNoopCastOfTo(Sum, Ty);
+ // Canonicalize a constant to the RHS.
+ if (isa<Constant>(Sum)) std::swap(Sum, W);
+ Sum = InsertBinop(Instruction::Add, Sum, W, S->getNoWrapFlags(),
+ /*IsSafeToHoist*/ true);
+ ++I;
+ }
+ }
+
+ return Sum;
+}
+
+Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
+ Type *Ty = SE.getEffectiveSCEVType(S->getType());
+
+ // Collect all the mul operands in a loop, along with their associated loops.
+ // Iterate in reverse so that constants are emitted last, all else equal.
+ SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops;
+ for (const SCEV *Op : reverse(S->operands()))
+ OpsAndLoops.push_back(std::make_pair(getRelevantLoop(Op), Op));
+
+ // Sort by loop. Use a stable sort so that constants follow non-constants.
+ llvm::stable_sort(OpsAndLoops, LoopCompare(SE.DT));
+
+ // Emit instructions to mul all the operands. Hoist as much as possible
+ // out of loops.
+ Value *Prod = nullptr;
+ auto I = OpsAndLoops.begin();
+
+ // Expand the calculation of X pow N in the following manner:
+ // Let N = P1 + P2 + ... + PK, where all P are powers of 2. Then:
+ // X pow N = (X pow P1) * (X pow P2) * ... * (X pow PK).
+ const auto ExpandOpBinPowN = [this, &I, &OpsAndLoops, &Ty]() {
+ auto E = I;
+ // Calculate how many times the same operand from the same loop is included
+ // into this power.
+ uint64_t Exponent = 0;
+ const uint64_t MaxExponent = UINT64_MAX >> 1;
+ // No one sane will ever try to calculate such huge exponents, but if we
+ // need this, we stop on UINT64_MAX / 2 because we need to exit the loop
+ // below when the power of 2 exceeds our Exponent, and we want it to be
+ // 1u << 31 at most to not deal with unsigned overflow.
+ while (E != OpsAndLoops.end() && *I == *E && Exponent != MaxExponent) {
+ ++Exponent;
+ ++E;
+ }
+ assert(Exponent > 0 && "Trying to calculate a zeroth exponent of operand?");
+
+ // Calculate powers with exponents 1, 2, 4, 8 etc. and include those of them
+ // that are needed into the result.
+ Value *P = expandCodeForImpl(I->second, Ty);
+ Value *Result = nullptr;
+ if (Exponent & 1)
+ Result = P;
+ for (uint64_t BinExp = 2; BinExp <= Exponent; BinExp <<= 1) {
+ P = InsertBinop(Instruction::Mul, P, P, SCEV::FlagAnyWrap,
+ /*IsSafeToHoist*/ true);
+ if (Exponent & BinExp)
+ Result = Result ? InsertBinop(Instruction::Mul, Result, P,
+ SCEV::FlagAnyWrap,
+ /*IsSafeToHoist*/ true)
+ : P;
+ }
+
+ I = E;
+ assert(Result && "Nothing was expanded?");
+ return Result;
+ };
+
+ while (I != OpsAndLoops.end()) {
+ if (!Prod) {
+ // This is the first operand. Just expand it.
+ Prod = ExpandOpBinPowN();
+ } else if (I->second->isAllOnesValue()) {
+ // Instead of doing a multiply by negative one, just do a negate.
+ Prod = InsertNoopCastOfTo(Prod, Ty);
+ Prod = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), Prod,
+ SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true);
+ ++I;
+ } else {
+ // A simple mul.
+ Value *W = ExpandOpBinPowN();
+ Prod = InsertNoopCastOfTo(Prod, Ty);
+ // Canonicalize a constant to the RHS.
+ if (isa<Constant>(Prod)) std::swap(Prod, W);
+ const APInt *RHS;
+ if (match(W, m_Power2(RHS))) {
+ // Canonicalize Prod*(1<<C) to Prod<<C.
+ assert(!Ty->isVectorTy() && "vector types are not SCEVable");
+ auto NWFlags = S->getNoWrapFlags();
+ // clear nsw flag if shl will produce poison value.
+ if (RHS->logBase2() == RHS->getBitWidth() - 1)
+ NWFlags = ScalarEvolution::clearFlags(NWFlags, SCEV::FlagNSW);
+ Prod = InsertBinop(Instruction::Shl, Prod,
+ ConstantInt::get(Ty, RHS->logBase2()), NWFlags,
+ /*IsSafeToHoist*/ true);
+ } else {
+ Prod = InsertBinop(Instruction::Mul, Prod, W, S->getNoWrapFlags(),
+ /*IsSafeToHoist*/ true);
+ }
+ }
+ }
+
+ return Prod;
+}
+
+Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) {
+ Type *Ty = SE.getEffectiveSCEVType(S->getType());
+
+ Value *LHS = expandCodeForImpl(S->getLHS(), Ty);
+ if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getRHS())) {
+ const APInt &RHS = SC->getAPInt();
+ if (RHS.isPowerOf2())
+ return InsertBinop(Instruction::LShr, LHS,
+ ConstantInt::get(Ty, RHS.logBase2()),
+ SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true);
+ }
+
+ Value *RHS = expandCodeForImpl(S->getRHS(), Ty);
+ return InsertBinop(Instruction::UDiv, LHS, RHS, SCEV::FlagAnyWrap,
+ /*IsSafeToHoist*/ SE.isKnownNonZero(S->getRHS()));
+}
+
+/// Determine if this is a well-behaved chain of instructions leading back to
+/// the PHI. If so, it may be reused by expanded expressions.
+bool SCEVExpander::isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV,
+ const Loop *L) {
+ if (IncV->getNumOperands() == 0 || isa<PHINode>(IncV) ||
+ (isa<CastInst>(IncV) && !isa<BitCastInst>(IncV)))
+ return false;
+ // If any of the operands don't dominate the insert position, bail.
+ // Addrec operands are always loop-invariant, so this can only happen
+ // if there are instructions which haven't been hoisted.
+ if (L == IVIncInsertLoop) {
+ for (Use &Op : llvm::drop_begin(IncV->operands()))
+ if (Instruction *OInst = dyn_cast<Instruction>(Op))
+ if (!SE.DT.dominates(OInst, IVIncInsertPos))
+ return false;
+ }
+ // Advance to the next instruction.
+ IncV = dyn_cast<Instruction>(IncV->getOperand(0));
+ if (!IncV)
+ return false;
+
+ if (IncV->mayHaveSideEffects())
+ return false;
+
+ if (IncV == PN)
+ return true;
+
+ return isNormalAddRecExprPHI(PN, IncV, L);
+}
+
+/// getIVIncOperand returns an induction variable increment's induction
+/// variable operand.
+///
+/// If allowScale is set, any type of GEP is allowed as long as the nonIV
+/// operands dominate InsertPos.
+///
+/// If allowScale is not set, ensure that a GEP increment conforms to one of the
+/// simple patterns generated by getAddRecExprPHILiterally and
+/// expandAddtoGEP. If the pattern isn't recognized, return NULL.
+Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV,
+ Instruction *InsertPos,
+ bool allowScale) {
+ if (IncV == InsertPos)
+ return nullptr;
+
+ switch (IncV->getOpcode()) {
+ default:
+ return nullptr;
+ // Check for a simple Add/Sub or GEP of a loop invariant step.
+ case Instruction::Add:
+ case Instruction::Sub: {
+ Instruction *OInst = dyn_cast<Instruction>(IncV->getOperand(1));
+ if (!OInst || SE.DT.dominates(OInst, InsertPos))
+ return dyn_cast<Instruction>(IncV->getOperand(0));
+ return nullptr;
+ }
+ case Instruction::BitCast:
+ return dyn_cast<Instruction>(IncV->getOperand(0));
+ case Instruction::GetElementPtr:
+ for (Use &U : llvm::drop_begin(IncV->operands())) {
+ if (isa<Constant>(U))
+ continue;
+ if (Instruction *OInst = dyn_cast<Instruction>(U)) {
+ if (!SE.DT.dominates(OInst, InsertPos))
+ return nullptr;
+ }
+ if (allowScale) {
+ // allow any kind of GEP as long as it can be hoisted.
+ continue;
+ }
+ // This must be a pointer addition of constants (pretty), which is already
+ // handled, or some number of address-size elements (ugly). Ugly geps
+ // have 2 operands. i1* is used by the expander to represent an
+ // address-size element.
+ if (IncV->getNumOperands() != 2)
+ return nullptr;
+ unsigned AS = cast<PointerType>(IncV->getType())->getAddressSpace();
+ if (IncV->getType() != Type::getInt1PtrTy(SE.getContext(), AS)
+ && IncV->getType() != Type::getInt8PtrTy(SE.getContext(), AS))
+ return nullptr;
+ break;
+ }
+ return dyn_cast<Instruction>(IncV->getOperand(0));
+ }
+}
+
+/// If the insert point of the current builder or any of the builders on the
+/// stack of saved builders has 'I' as its insert point, update it to point to
+/// the instruction after 'I'. This is intended to be used when the instruction
+/// 'I' is being moved. If this fixup is not done and 'I' is moved to a
+/// different block, the inconsistent insert point (with a mismatched
+/// Instruction and Block) can lead to an instruction being inserted in a block
+/// other than its parent.
+void SCEVExpander::fixupInsertPoints(Instruction *I) {
+ BasicBlock::iterator It(*I);
+ BasicBlock::iterator NewInsertPt = std::next(It);
+ if (Builder.GetInsertPoint() == It)
+ Builder.SetInsertPoint(&*NewInsertPt);
+ for (auto *InsertPtGuard : InsertPointGuards)
+ if (InsertPtGuard->GetInsertPoint() == It)
+ InsertPtGuard->SetInsertPoint(NewInsertPt);
+}
+
+/// hoistStep - Attempt to hoist a simple IV increment above InsertPos to make
+/// it available to other uses in this loop. Recursively hoist any operands,
+/// until we reach a value that dominates InsertPos.
+bool SCEVExpander::hoistIVInc(Instruction *IncV, Instruction *InsertPos,
+ bool RecomputePoisonFlags) {
+ auto FixupPoisonFlags = [this](Instruction *I) {
+ // Drop flags that are potentially inferred from old context and infer flags
+ // in new context.
+ I->dropPoisonGeneratingFlags();
+ if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(I))
+ if (auto Flags = SE.getStrengthenedNoWrapFlagsFromBinOp(OBO)) {
+ auto *BO = cast<BinaryOperator>(I);
+ BO->setHasNoUnsignedWrap(
+ ScalarEvolution::maskFlags(*Flags, SCEV::FlagNUW) == SCEV::FlagNUW);
+ BO->setHasNoSignedWrap(
+ ScalarEvolution::maskFlags(*Flags, SCEV::FlagNSW) == SCEV::FlagNSW);
+ }
+ };
+
+ if (SE.DT.dominates(IncV, InsertPos)) {
+ if (RecomputePoisonFlags)
+ FixupPoisonFlags(IncV);
+ return true;
+ }
+
+ // InsertPos must itself dominate IncV so that IncV's new position satisfies
+ // its existing users.
+ if (isa<PHINode>(InsertPos) ||
+ !SE.DT.dominates(InsertPos->getParent(), IncV->getParent()))
+ return false;
+
+ if (!SE.LI.movementPreservesLCSSAForm(IncV, InsertPos))
+ return false;
+
+ // Check that the chain of IV operands leading back to Phi can be hoisted.
+ SmallVector<Instruction*, 4> IVIncs;
+ for(;;) {
+ Instruction *Oper = getIVIncOperand(IncV, InsertPos, /*allowScale*/true);
+ if (!Oper)
+ return false;
+ // IncV is safe to hoist.
+ IVIncs.push_back(IncV);
+ IncV = Oper;
+ if (SE.DT.dominates(IncV, InsertPos))
+ break;
+ }
+ for (Instruction *I : llvm::reverse(IVIncs)) {
+ fixupInsertPoints(I);
+ I->moveBefore(InsertPos);
+ if (RecomputePoisonFlags)
+ FixupPoisonFlags(I);
+ }
+ return true;
+}
+
+/// Determine if this cyclic phi is in a form that would have been generated by
+/// LSR. We don't care if the phi was actually expanded in this pass, as long
+/// as it is in a low-cost form, for example, no implied multiplication. This
+/// should match any patterns generated by getAddRecExprPHILiterally and
+/// expandAddtoGEP.
+bool SCEVExpander::isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV,
+ const Loop *L) {
+ for(Instruction *IVOper = IncV;
+ (IVOper = getIVIncOperand(IVOper, L->getLoopPreheader()->getTerminator(),
+ /*allowScale=*/false));) {
+ if (IVOper == PN)
+ return true;
+ }
+ return false;
+}
+
+/// expandIVInc - Expand an IV increment at Builder's current InsertPos.
+/// Typically this is the LatchBlock terminator or IVIncInsertPos, but we may
+/// need to materialize IV increments elsewhere to handle difficult situations.
+Value *SCEVExpander::expandIVInc(PHINode *PN, Value *StepV, const Loop *L,
+ Type *ExpandTy, Type *IntTy,
+ bool useSubtract) {
+ Value *IncV;
+ // If the PHI is a pointer, use a GEP, otherwise use an add or sub.
+ if (ExpandTy->isPointerTy()) {
+ PointerType *GEPPtrTy = cast<PointerType>(ExpandTy);
+ // If the step isn't constant, don't use an implicitly scaled GEP, because
+ // that would require a multiply inside the loop.
+ if (!isa<ConstantInt>(StepV))
+ GEPPtrTy = PointerType::get(Type::getInt1Ty(SE.getContext()),
+ GEPPtrTy->getAddressSpace());
+ IncV = expandAddToGEP(SE.getSCEV(StepV), GEPPtrTy, IntTy, PN);
+ if (IncV->getType() != PN->getType())
+ IncV = Builder.CreateBitCast(IncV, PN->getType());
+ } else {
+ IncV = useSubtract ?
+ Builder.CreateSub(PN, StepV, Twine(IVName) + ".iv.next") :
+ Builder.CreateAdd(PN, StepV, Twine(IVName) + ".iv.next");
+ }
+ return IncV;
+}
+
+/// Check whether we can cheaply express the requested SCEV in terms of
+/// the available PHI SCEV by truncation and/or inversion of the step.
+static bool canBeCheaplyTransformed(ScalarEvolution &SE,
+ const SCEVAddRecExpr *Phi,
+ const SCEVAddRecExpr *Requested,
+ bool &InvertStep) {
+ // We can't transform to match a pointer PHI.
+ if (Phi->getType()->isPointerTy())
+ return false;
+
+ Type *PhiTy = SE.getEffectiveSCEVType(Phi->getType());
+ Type *RequestedTy = SE.getEffectiveSCEVType(Requested->getType());
+
+ if (RequestedTy->getIntegerBitWidth() > PhiTy->getIntegerBitWidth())
+ return false;
+
+ // Try truncate it if necessary.
+ Phi = dyn_cast<SCEVAddRecExpr>(SE.getTruncateOrNoop(Phi, RequestedTy));
+ if (!Phi)
+ return false;
+
+ // Check whether truncation will help.
+ if (Phi == Requested) {
+ InvertStep = false;
+ return true;
+ }
+
+ // Check whether inverting will help: {R,+,-1} == R - {0,+,1}.
+ if (SE.getMinusSCEV(Requested->getStart(), Requested) == Phi) {
+ InvertStep = true;
+ return true;
+ }
+
+ return false;
+}
+
+static bool IsIncrementNSW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) {
+ if (!isa<IntegerType>(AR->getType()))
+ return false;
+
+ unsigned BitWidth = cast<IntegerType>(AR->getType())->getBitWidth();
+ Type *WideTy = IntegerType::get(AR->getType()->getContext(), BitWidth * 2);
+ const SCEV *Step = AR->getStepRecurrence(SE);
+ const SCEV *OpAfterExtend = SE.getAddExpr(SE.getSignExtendExpr(Step, WideTy),
+ SE.getSignExtendExpr(AR, WideTy));
+ const SCEV *ExtendAfterOp =
+ SE.getSignExtendExpr(SE.getAddExpr(AR, Step), WideTy);
+ return ExtendAfterOp == OpAfterExtend;
+}
+
+static bool IsIncrementNUW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) {
+ if (!isa<IntegerType>(AR->getType()))
+ return false;
+
+ unsigned BitWidth = cast<IntegerType>(AR->getType())->getBitWidth();
+ Type *WideTy = IntegerType::get(AR->getType()->getContext(), BitWidth * 2);
+ const SCEV *Step = AR->getStepRecurrence(SE);
+ const SCEV *OpAfterExtend = SE.getAddExpr(SE.getZeroExtendExpr(Step, WideTy),
+ SE.getZeroExtendExpr(AR, WideTy));
+ const SCEV *ExtendAfterOp =
+ SE.getZeroExtendExpr(SE.getAddExpr(AR, Step), WideTy);
+ return ExtendAfterOp == OpAfterExtend;
+}
+
+/// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand
+/// the base addrec, which is the addrec without any non-loop-dominating
+/// values, and return the PHI.
+PHINode *
+SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
+ const Loop *L,
+ Type *ExpandTy,
+ Type *IntTy,
+ Type *&TruncTy,
+ bool &InvertStep) {
+ assert((!IVIncInsertLoop||IVIncInsertPos) && "Uninitialized insert position");
+
+ // Reuse a previously-inserted PHI, if present.
+ BasicBlock *LatchBlock = L->getLoopLatch();
+ if (LatchBlock) {
+ PHINode *AddRecPhiMatch = nullptr;
+ Instruction *IncV = nullptr;
+ TruncTy = nullptr;
+ InvertStep = false;
+
+ // Only try partially matching scevs that need truncation and/or
+ // step-inversion if we know this loop is outside the current loop.
+ bool TryNonMatchingSCEV =
+ IVIncInsertLoop &&
+ SE.DT.properlyDominates(LatchBlock, IVIncInsertLoop->getHeader());
+
+ for (PHINode &PN : L->getHeader()->phis()) {
+ if (!SE.isSCEVable(PN.getType()))
+ continue;
+
+ // We should not look for a incomplete PHI. Getting SCEV for a incomplete
+ // PHI has no meaning at all.
+ if (!PN.isComplete()) {
+ SCEV_DEBUG_WITH_TYPE(
+ DebugType, dbgs() << "One incomplete PHI is found: " << PN << "\n");
+ continue;
+ }
+
+ const SCEVAddRecExpr *PhiSCEV = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(&PN));
+ if (!PhiSCEV)
+ continue;
+
+ bool IsMatchingSCEV = PhiSCEV == Normalized;
+ // We only handle truncation and inversion of phi recurrences for the
+ // expanded expression if the expanded expression's loop dominates the
+ // loop we insert to. Check now, so we can bail out early.
+ if (!IsMatchingSCEV && !TryNonMatchingSCEV)
+ continue;
+
+ // TODO: this possibly can be reworked to avoid this cast at all.
+ Instruction *TempIncV =
+ dyn_cast<Instruction>(PN.getIncomingValueForBlock(LatchBlock));
+ if (!TempIncV)
+ continue;
+
+ // Check whether we can reuse this PHI node.
+ if (LSRMode) {
+ if (!isExpandedAddRecExprPHI(&PN, TempIncV, L))
+ continue;
+ } else {
+ if (!isNormalAddRecExprPHI(&PN, TempIncV, L))
+ continue;
+ }
+
+ // Stop if we have found an exact match SCEV.
+ if (IsMatchingSCEV) {
+ IncV = TempIncV;
+ TruncTy = nullptr;
+ InvertStep = false;
+ AddRecPhiMatch = &PN;
+ break;
+ }
+
+ // Try whether the phi can be translated into the requested form
+ // (truncated and/or offset by a constant).
+ if ((!TruncTy || InvertStep) &&
+ canBeCheaplyTransformed(SE, PhiSCEV, Normalized, InvertStep)) {
+ // Record the phi node. But don't stop we might find an exact match
+ // later.
+ AddRecPhiMatch = &PN;
+ IncV = TempIncV;
+ TruncTy = SE.getEffectiveSCEVType(Normalized->getType());
+ }
+ }
+
+ if (AddRecPhiMatch) {
+ // Ok, the add recurrence looks usable.
+ // Remember this PHI, even in post-inc mode.
+ InsertedValues.insert(AddRecPhiMatch);
+ // Remember the increment.
+ rememberInstruction(IncV);
+ // Those values were not actually inserted but re-used.
+ ReusedValues.insert(AddRecPhiMatch);
+ ReusedValues.insert(IncV);
+ return AddRecPhiMatch;
+ }
+ }
+
+ // Save the original insertion point so we can restore it when we're done.
+ SCEVInsertPointGuard Guard(Builder, this);
+
+ // Another AddRec may need to be recursively expanded below. For example, if
+ // this AddRec is quadratic, the StepV may itself be an AddRec in this
+ // loop. Remove this loop from the PostIncLoops set before expanding such
+ // AddRecs. Otherwise, we cannot find a valid position for the step
+ // (i.e. StepV can never dominate its loop header). Ideally, we could do
+ // SavedIncLoops.swap(PostIncLoops), but we generally have a single element,
+ // so it's not worth implementing SmallPtrSet::swap.
+ PostIncLoopSet SavedPostIncLoops = PostIncLoops;
+ PostIncLoops.clear();
+
+ // Expand code for the start value into the loop preheader.
+ assert(L->getLoopPreheader() &&
+ "Can't expand add recurrences without a loop preheader!");
+ Value *StartV =
+ expandCodeForImpl(Normalized->getStart(), ExpandTy,
+ L->getLoopPreheader()->getTerminator());
+
+ // StartV must have been be inserted into L's preheader to dominate the new
+ // phi.
+ assert(!isa<Instruction>(StartV) ||
+ SE.DT.properlyDominates(cast<Instruction>(StartV)->getParent(),
+ L->getHeader()));
+
+ // Expand code for the step value. Do this before creating the PHI so that PHI
+ // reuse code doesn't see an incomplete PHI.
+ const SCEV *Step = Normalized->getStepRecurrence(SE);
+ // If the stride is negative, insert a sub instead of an add for the increment
+ // (unless it's a constant, because subtracts of constants are canonicalized
+ // to adds).
+ bool useSubtract = !ExpandTy->isPointerTy() && Step->isNonConstantNegative();
+ if (useSubtract)
+ Step = SE.getNegativeSCEV(Step);
+ // Expand the step somewhere that dominates the loop header.
+ Value *StepV = expandCodeForImpl(
+ Step, IntTy, &*L->getHeader()->getFirstInsertionPt());
+
+ // The no-wrap behavior proved by IsIncrement(NUW|NSW) is only applicable if
+ // we actually do emit an addition. It does not apply if we emit a
+ // subtraction.
+ bool IncrementIsNUW = !useSubtract && IsIncrementNUW(SE, Normalized);
+ bool IncrementIsNSW = !useSubtract && IsIncrementNSW(SE, Normalized);
+
+ // Create the PHI.
+ BasicBlock *Header = L->getHeader();
+ Builder.SetInsertPoint(Header, Header->begin());
+ pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header);
+ PHINode *PN = Builder.CreatePHI(ExpandTy, std::distance(HPB, HPE),
+ Twine(IVName) + ".iv");
+
+ // Create the step instructions and populate the PHI.
+ for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) {
+ BasicBlock *Pred = *HPI;
+
+ // Add a start value.
+ if (!L->contains(Pred)) {
+ PN->addIncoming(StartV, Pred);
+ continue;
+ }
+
+ // Create a step value and add it to the PHI.
+ // If IVIncInsertLoop is non-null and equal to the addrec's loop, insert the
+ // instructions at IVIncInsertPos.
+ Instruction *InsertPos = L == IVIncInsertLoop ?
+ IVIncInsertPos : Pred->getTerminator();
+ Builder.SetInsertPoint(InsertPos);
+ Value *IncV = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract);
+
+ if (isa<OverflowingBinaryOperator>(IncV)) {
+ if (IncrementIsNUW)
+ cast<BinaryOperator>(IncV)->setHasNoUnsignedWrap();
+ if (IncrementIsNSW)
+ cast<BinaryOperator>(IncV)->setHasNoSignedWrap();
+ }
+ PN->addIncoming(IncV, Pred);
+ }
+
+ // After expanding subexpressions, restore the PostIncLoops set so the caller
+ // can ensure that IVIncrement dominates the current uses.
+ PostIncLoops = SavedPostIncLoops;
+
+ // Remember this PHI, even in post-inc mode. LSR SCEV-based salvaging is most
+ // effective when we are able to use an IV inserted here, so record it.
+ InsertedValues.insert(PN);
+ InsertedIVs.push_back(PN);
+ return PN;
+}
+
+Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
+ Type *STy = S->getType();
+ Type *IntTy = SE.getEffectiveSCEVType(STy);
+ const Loop *L = S->getLoop();
+
+ // Determine a normalized form of this expression, which is the expression
+ // before any post-inc adjustment is made.
+ const SCEVAddRecExpr *Normalized = S;
+ if (PostIncLoops.count(L)) {
+ PostIncLoopSet Loops;
+ Loops.insert(L);
+ Normalized = cast<SCEVAddRecExpr>(normalizeForPostIncUse(S, Loops, SE));
+ }
+
+ // Strip off any non-loop-dominating component from the addrec start.
+ const SCEV *Start = Normalized->getStart();
+ const SCEV *PostLoopOffset = nullptr;
+ if (!SE.properlyDominates(Start, L->getHeader())) {
+ PostLoopOffset = Start;
+ Start = SE.getConstant(Normalized->getType(), 0);
+ Normalized = cast<SCEVAddRecExpr>(
+ SE.getAddRecExpr(Start, Normalized->getStepRecurrence(SE),
+ Normalized->getLoop(),
+ Normalized->getNoWrapFlags(SCEV::FlagNW)));
+ }
+
+ // Strip off any non-loop-dominating component from the addrec step.
+ const SCEV *Step = Normalized->getStepRecurrence(SE);
+ const SCEV *PostLoopScale = nullptr;
+ if (!SE.dominates(Step, L->getHeader())) {
+ PostLoopScale = Step;
+ Step = SE.getConstant(Normalized->getType(), 1);
+ if (!Start->isZero()) {
+ // The normalization below assumes that Start is constant zero, so if
+ // it isn't re-associate Start to PostLoopOffset.
+ assert(!PostLoopOffset && "Start not-null but PostLoopOffset set?");
+ PostLoopOffset = Start;
+ Start = SE.getConstant(Normalized->getType(), 0);
+ }
+ Normalized =
+ cast<SCEVAddRecExpr>(SE.getAddRecExpr(
+ Start, Step, Normalized->getLoop(),
+ Normalized->getNoWrapFlags(SCEV::FlagNW)));
+ }
+
+ // Expand the core addrec. If we need post-loop scaling, force it to
+ // expand to an integer type to avoid the need for additional casting.
+ Type *ExpandTy = PostLoopScale ? IntTy : STy;
+ // We can't use a pointer type for the addrec if the pointer type is
+ // non-integral.
+ Type *AddRecPHIExpandTy =
+ DL.isNonIntegralPointerType(STy) ? Normalized->getType() : ExpandTy;
+
+ // In some cases, we decide to reuse an existing phi node but need to truncate
+ // it and/or invert the step.
+ Type *TruncTy = nullptr;
+ bool InvertStep = false;
+ PHINode *PN = getAddRecExprPHILiterally(Normalized, L, AddRecPHIExpandTy,
+ IntTy, TruncTy, InvertStep);
+
+ // Accommodate post-inc mode, if necessary.
+ Value *Result;
+ if (!PostIncLoops.count(L))
+ Result = PN;
+ else {
+ // In PostInc mode, use the post-incremented value.
+ BasicBlock *LatchBlock = L->getLoopLatch();
+ assert(LatchBlock && "PostInc mode requires a unique loop latch!");
+ Result = PN->getIncomingValueForBlock(LatchBlock);
+
+ // We might be introducing a new use of the post-inc IV that is not poison
+ // safe, in which case we should drop poison generating flags. Only keep
+ // those flags for which SCEV has proven that they always hold.
+ if (isa<OverflowingBinaryOperator>(Result)) {
+ auto *I = cast<Instruction>(Result);
+ if (!S->hasNoUnsignedWrap())
+ I->setHasNoUnsignedWrap(false);
+ if (!S->hasNoSignedWrap())
+ I->setHasNoSignedWrap(false);
+ }
+
+ // For an expansion to use the postinc form, the client must call
+ // expandCodeFor with an InsertPoint that is either outside the PostIncLoop
+ // or dominated by IVIncInsertPos.
+ if (isa<Instruction>(Result) &&
+ !SE.DT.dominates(cast<Instruction>(Result),
+ &*Builder.GetInsertPoint())) {
+ // The induction variable's postinc expansion does not dominate this use.
+ // IVUsers tries to prevent this case, so it is rare. However, it can
+ // happen when an IVUser outside the loop is not dominated by the latch
+ // block. Adjusting IVIncInsertPos before expansion begins cannot handle
+ // all cases. Consider a phi outside whose operand is replaced during
+ // expansion with the value of the postinc user. Without fundamentally
+ // changing the way postinc users are tracked, the only remedy is
+ // inserting an extra IV increment. StepV might fold into PostLoopOffset,
+ // but hopefully expandCodeFor handles that.
+ bool useSubtract =
+ !ExpandTy->isPointerTy() && Step->isNonConstantNegative();
+ if (useSubtract)
+ Step = SE.getNegativeSCEV(Step);
+ Value *StepV;
+ {
+ // Expand the step somewhere that dominates the loop header.
+ SCEVInsertPointGuard Guard(Builder, this);
+ StepV = expandCodeForImpl(
+ Step, IntTy, &*L->getHeader()->getFirstInsertionPt());
+ }
+ Result = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract);
+ }
+ }
+
+ // We have decided to reuse an induction variable of a dominating loop. Apply
+ // truncation and/or inversion of the step.
+ if (TruncTy) {
+ Type *ResTy = Result->getType();
+ // Normalize the result type.
+ if (ResTy != SE.getEffectiveSCEVType(ResTy))
+ Result = InsertNoopCastOfTo(Result, SE.getEffectiveSCEVType(ResTy));
+ // Truncate the result.
+ if (TruncTy != Result->getType())
+ Result = Builder.CreateTrunc(Result, TruncTy);
+
+ // Invert the result.
+ if (InvertStep)
+ Result = Builder.CreateSub(
+ expandCodeForImpl(Normalized->getStart(), TruncTy), Result);
+ }
+
+ // Re-apply any non-loop-dominating scale.
+ if (PostLoopScale) {
+ assert(S->isAffine() && "Can't linearly scale non-affine recurrences.");
+ Result = InsertNoopCastOfTo(Result, IntTy);
+ Result = Builder.CreateMul(Result,
+ expandCodeForImpl(PostLoopScale, IntTy));
+ }
+
+ // Re-apply any non-loop-dominating offset.
+ if (PostLoopOffset) {
+ if (PointerType *PTy = dyn_cast<PointerType>(ExpandTy)) {
+ if (Result->getType()->isIntegerTy()) {
+ Value *Base = expandCodeForImpl(PostLoopOffset, ExpandTy);
+ Result = expandAddToGEP(SE.getUnknown(Result), PTy, IntTy, Base);
+ } else {
+ Result = expandAddToGEP(PostLoopOffset, PTy, IntTy, Result);
+ }
+ } else {
+ Result = InsertNoopCastOfTo(Result, IntTy);
+ Result = Builder.CreateAdd(
+ Result, expandCodeForImpl(PostLoopOffset, IntTy));
+ }
+ }
+
+ return Result;
+}
+
+Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
+ // In canonical mode we compute the addrec as an expression of a canonical IV
+ // using evaluateAtIteration and expand the resulting SCEV expression. This
+ // way we avoid introducing new IVs to carry on the computation of the addrec
+ // throughout the loop.
+ //
+ // For nested addrecs evaluateAtIteration might need a canonical IV of a
+ // type wider than the addrec itself. Emitting a canonical IV of the
+ // proper type might produce non-legal types, for example expanding an i64
+ // {0,+,2,+,1} addrec would need an i65 canonical IV. To avoid this just fall
+ // back to non-canonical mode for nested addrecs.
+ if (!CanonicalMode || (S->getNumOperands() > 2))
+ return expandAddRecExprLiterally(S);
+
+ Type *Ty = SE.getEffectiveSCEVType(S->getType());
+ const Loop *L = S->getLoop();
+
+ // First check for an existing canonical IV in a suitable type.
+ PHINode *CanonicalIV = nullptr;
+ if (PHINode *PN = L->getCanonicalInductionVariable())
+ if (SE.getTypeSizeInBits(PN->getType()) >= SE.getTypeSizeInBits(Ty))
+ CanonicalIV = PN;
+
+ // Rewrite an AddRec in terms of the canonical induction variable, if
+ // its type is more narrow.
+ if (CanonicalIV &&
+ SE.getTypeSizeInBits(CanonicalIV->getType()) > SE.getTypeSizeInBits(Ty) &&
+ !S->getType()->isPointerTy()) {
+ SmallVector<const SCEV *, 4> NewOps(S->getNumOperands());
+ for (unsigned i = 0, e = S->getNumOperands(); i != e; ++i)
+ NewOps[i] = SE.getAnyExtendExpr(S->getOperand(i), CanonicalIV->getType());
+ Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop(),
+ S->getNoWrapFlags(SCEV::FlagNW)));
+ BasicBlock::iterator NewInsertPt =
+ findInsertPointAfter(cast<Instruction>(V), &*Builder.GetInsertPoint());
+ V = expandCodeForImpl(SE.getTruncateExpr(SE.getUnknown(V), Ty), nullptr,
+ &*NewInsertPt);
+ return V;
+ }
+
+ // {X,+,F} --> X + {0,+,F}
+ if (!S->getStart()->isZero()) {
+ if (PointerType *PTy = dyn_cast<PointerType>(S->getType())) {
+ Value *StartV = expand(SE.getPointerBase(S));
+ assert(StartV->getType() == PTy && "Pointer type mismatch for GEP!");
+ return expandAddToGEP(SE.removePointerBase(S), PTy, Ty, StartV);
+ }
+
+ SmallVector<const SCEV *, 4> NewOps(S->operands());
+ NewOps[0] = SE.getConstant(Ty, 0);
+ const SCEV *Rest = SE.getAddRecExpr(NewOps, L,
+ S->getNoWrapFlags(SCEV::FlagNW));
+
+ // Just do a normal add. Pre-expand the operands to suppress folding.
+ //
+ // The LHS and RHS values are factored out of the expand call to make the
+ // output independent of the argument evaluation order.
+ const SCEV *AddExprLHS = SE.getUnknown(expand(S->getStart()));
+ const SCEV *AddExprRHS = SE.getUnknown(expand(Rest));
+ return expand(SE.getAddExpr(AddExprLHS, AddExprRHS));
+ }
+
+ // If we don't yet have a canonical IV, create one.
+ if (!CanonicalIV) {
+ // Create and insert the PHI node for the induction variable in the
+ // specified loop.
+ BasicBlock *Header = L->getHeader();
+ pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header);
+ CanonicalIV = PHINode::Create(Ty, std::distance(HPB, HPE), "indvar",
+ &Header->front());
+ rememberInstruction(CanonicalIV);
+
+ SmallSet<BasicBlock *, 4> PredSeen;
+ Constant *One = ConstantInt::get(Ty, 1);
+ for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) {
+ BasicBlock *HP = *HPI;
+ if (!PredSeen.insert(HP).second) {
+ // There must be an incoming value for each predecessor, even the
+ // duplicates!
+ CanonicalIV->addIncoming(CanonicalIV->getIncomingValueForBlock(HP), HP);
+ continue;
+ }
+
+ if (L->contains(HP)) {
+ // Insert a unit add instruction right before the terminator
+ // corresponding to the back-edge.
+ Instruction *Add = BinaryOperator::CreateAdd(CanonicalIV, One,
+ "indvar.next",
+ HP->getTerminator());
+ Add->setDebugLoc(HP->getTerminator()->getDebugLoc());
+ rememberInstruction(Add);
+ CanonicalIV->addIncoming(Add, HP);
+ } else {
+ CanonicalIV->addIncoming(Constant::getNullValue(Ty), HP);
+ }
+ }
+ }
+
+ // {0,+,1} --> Insert a canonical induction variable into the loop!
+ if (S->isAffine() && S->getOperand(1)->isOne()) {
+ assert(Ty == SE.getEffectiveSCEVType(CanonicalIV->getType()) &&
+ "IVs with types different from the canonical IV should "
+ "already have been handled!");
+ return CanonicalIV;
+ }
+
+ // {0,+,F} --> {0,+,1} * F
+
+ // If this is a simple linear addrec, emit it now as a special case.
+ if (S->isAffine()) // {0,+,F} --> i*F
+ return
+ expand(SE.getTruncateOrNoop(
+ SE.getMulExpr(SE.getUnknown(CanonicalIV),
+ SE.getNoopOrAnyExtend(S->getOperand(1),
+ CanonicalIV->getType())),
+ Ty));
+
+ // If this is a chain of recurrences, turn it into a closed form, using the
+ // folders, then expandCodeFor the closed form. This allows the folders to
+ // simplify the expression without having to build a bunch of special code
+ // into this folder.
+ const SCEV *IH = SE.getUnknown(CanonicalIV); // Get I as a "symbolic" SCEV.
+
+ // Promote S up to the canonical IV type, if the cast is foldable.
+ const SCEV *NewS = S;
+ const SCEV *Ext = SE.getNoopOrAnyExtend(S, CanonicalIV->getType());
+ if (isa<SCEVAddRecExpr>(Ext))
+ NewS = Ext;
+
+ const SCEV *V = cast<SCEVAddRecExpr>(NewS)->evaluateAtIteration(IH, SE);
+
+ // Truncate the result down to the original type, if needed.
+ const SCEV *T = SE.getTruncateOrNoop(V, Ty);
+ return expand(T);
+}
+
+Value *SCEVExpander::visitPtrToIntExpr(const SCEVPtrToIntExpr *S) {
+ Value *V =
+ expandCodeForImpl(S->getOperand(), S->getOperand()->getType());
+ return ReuseOrCreateCast(V, S->getType(), CastInst::PtrToInt,
+ GetOptimalInsertionPointForCastOf(V));
+}
+
+Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) {
+ Type *Ty = SE.getEffectiveSCEVType(S->getType());
+ Value *V = expandCodeForImpl(
+ S->getOperand(), SE.getEffectiveSCEVType(S->getOperand()->getType())
+ );
+ return Builder.CreateTrunc(V, Ty);
+}
+
+Value *SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr *S) {
+ Type *Ty = SE.getEffectiveSCEVType(S->getType());
+ Value *V = expandCodeForImpl(
+ S->getOperand(), SE.getEffectiveSCEVType(S->getOperand()->getType())
+ );
+ return Builder.CreateZExt(V, Ty);
+}
+
+Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) {
+ Type *Ty = SE.getEffectiveSCEVType(S->getType());
+ Value *V = expandCodeForImpl(
+ S->getOperand(), SE.getEffectiveSCEVType(S->getOperand()->getType())
+ );
+ return Builder.CreateSExt(V, Ty);
+}
+
+Value *SCEVExpander::expandMinMaxExpr(const SCEVNAryExpr *S,
+ Intrinsic::ID IntrinID, Twine Name,
+ bool IsSequential) {
+ Value *LHS = expand(S->getOperand(S->getNumOperands() - 1));
+ Type *Ty = LHS->getType();
+ if (IsSequential)
+ LHS = Builder.CreateFreeze(LHS);
+ for (int i = S->getNumOperands() - 2; i >= 0; --i) {
+ Value *RHS = expandCodeForImpl(S->getOperand(i), Ty);
+ if (IsSequential && i != 0)
+ RHS = Builder.CreateFreeze(RHS);
+ Value *Sel;
+ if (Ty->isIntegerTy())
+ Sel = Builder.CreateIntrinsic(IntrinID, {Ty}, {LHS, RHS},
+ /*FMFSource=*/nullptr, Name);
+ else {
+ Value *ICmp =
+ Builder.CreateICmp(MinMaxIntrinsic::getPredicate(IntrinID), LHS, RHS);
+ Sel = Builder.CreateSelect(ICmp, LHS, RHS, Name);
+ }
+ LHS = Sel;
+ }
+ return LHS;
+}
+
+Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) {
+ return expandMinMaxExpr(S, Intrinsic::smax, "smax");
+}
+
+Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
+ return expandMinMaxExpr(S, Intrinsic::umax, "umax");
+}
+
+Value *SCEVExpander::visitSMinExpr(const SCEVSMinExpr *S) {
+ return expandMinMaxExpr(S, Intrinsic::smin, "smin");
+}
+
+Value *SCEVExpander::visitUMinExpr(const SCEVUMinExpr *S) {
+ return expandMinMaxExpr(S, Intrinsic::umin, "umin");
+}
+
+Value *SCEVExpander::visitSequentialUMinExpr(const SCEVSequentialUMinExpr *S) {
+ return expandMinMaxExpr(S, Intrinsic::umin, "umin", /*IsSequential*/true);
+}
+
+Value *SCEVExpander::expandCodeForImpl(const SCEV *SH, Type *Ty,
+ Instruction *IP) {
+ setInsertPoint(IP);
+ Value *V = expandCodeForImpl(SH, Ty);
+ return V;
+}
+
+Value *SCEVExpander::expandCodeForImpl(const SCEV *SH, Type *Ty) {
+ // Expand the code for this SCEV.
+ Value *V = expand(SH);
+
+ if (Ty) {
+ assert(SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(SH->getType()) &&
+ "non-trivial casts should be done with the SCEVs directly!");
+ V = InsertNoopCastOfTo(V, Ty);
+ }
+ return V;
+}
+
+Value *SCEVExpander::FindValueInExprValueMap(const SCEV *S,
+ const Instruction *InsertPt) {
+ // If the expansion is not in CanonicalMode, and the SCEV contains any
+ // sub scAddRecExpr type SCEV, it is required to expand the SCEV literally.
+ if (!CanonicalMode && SE.containsAddRecurrence(S))
+ return nullptr;
+
+ // If S is a constant, it may be worse to reuse an existing Value.
+ if (isa<SCEVConstant>(S))
+ return nullptr;
+
+ // Choose a Value from the set which dominates the InsertPt.
+ // InsertPt should be inside the Value's parent loop so as not to break
+ // the LCSSA form.
+ for (Value *V : SE.getSCEVValues(S)) {
+ Instruction *EntInst = dyn_cast<Instruction>(V);
+ if (!EntInst)
+ continue;
+
+ assert(EntInst->getFunction() == InsertPt->getFunction());
+ if (S->getType() == V->getType() &&
+ SE.DT.dominates(EntInst, InsertPt) &&
+ (SE.LI.getLoopFor(EntInst->getParent()) == nullptr ||
+ SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt)))
+ return V;
+ }
+ return nullptr;
+}
+
+// The expansion of SCEV will either reuse a previous Value in ExprValueMap,
+// or expand the SCEV literally. Specifically, if the expansion is in LSRMode,
+// and the SCEV contains any sub scAddRecExpr type SCEV, it will be expanded
+// literally, to prevent LSR's transformed SCEV from being reverted. Otherwise,
+// the expansion will try to reuse Value from ExprValueMap, and only when it
+// fails, expand the SCEV literally.
+Value *SCEVExpander::expand(const SCEV *S) {
+ // Compute an insertion point for this SCEV object. Hoist the instructions
+ // as far out in the loop nest as possible.
+ Instruction *InsertPt = &*Builder.GetInsertPoint();
+
+ // We can move insertion point only if there is no div or rem operations
+ // otherwise we are risky to move it over the check for zero denominator.
+ auto SafeToHoist = [](const SCEV *S) {
+ return !SCEVExprContains(S, [](const SCEV *S) {
+ if (const auto *D = dyn_cast<SCEVUDivExpr>(S)) {
+ if (const auto *SC = dyn_cast<SCEVConstant>(D->getRHS()))
+ // Division by non-zero constants can be hoisted.
+ return SC->getValue()->isZero();
+ // All other divisions should not be moved as they may be
+ // divisions by zero and should be kept within the
+ // conditions of the surrounding loops that guard their
+ // execution (see PR35406).
+ return true;
+ }
+ return false;
+ });
+ };
+ if (SafeToHoist(S)) {
+ for (Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock());;
+ L = L->getParentLoop()) {
+ if (SE.isLoopInvariant(S, L)) {
+ if (!L) break;
+ if (BasicBlock *Preheader = L->getLoopPreheader())
+ InsertPt = Preheader->getTerminator();
+ else
+ // LSR sets the insertion point for AddRec start/step values to the
+ // block start to simplify value reuse, even though it's an invalid
+ // position. SCEVExpander must correct for this in all cases.
+ InsertPt = &*L->getHeader()->getFirstInsertionPt();
+ } else {
+ // If the SCEV is computable at this level, insert it into the header
+ // after the PHIs (and after any other instructions that we've inserted
+ // there) so that it is guaranteed to dominate any user inside the loop.
+ if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L))
+ InsertPt = &*L->getHeader()->getFirstInsertionPt();
+
+ while (InsertPt->getIterator() != Builder.GetInsertPoint() &&
+ (isInsertedInstruction(InsertPt) ||
+ isa<DbgInfoIntrinsic>(InsertPt))) {
+ InsertPt = &*std::next(InsertPt->getIterator());
+ }
+ break;
+ }
+ }
+ }
+
+ // Check to see if we already expanded this here.
+ auto I = InsertedExpressions.find(std::make_pair(S, InsertPt));
+ if (I != InsertedExpressions.end())
+ return I->second;
+
+ SCEVInsertPointGuard Guard(Builder, this);
+ Builder.SetInsertPoint(InsertPt);
+
+ // Expand the expression into instructions.
+ Value *V = FindValueInExprValueMap(S, InsertPt);
+ if (!V) {
+ V = visit(S);
+ V = fixupLCSSAFormFor(V);
+ } else {
+ // If we're reusing an existing instruction, we are effectively CSEing two
+ // copies of the instruction (with potentially different flags). As such,
+ // we need to drop any poison generating flags unless we can prove that
+ // said flags must be valid for all new users.
+ if (auto *I = dyn_cast<Instruction>(V))
+ if (I->hasPoisonGeneratingFlags() && !programUndefinedIfPoison(I))
+ I->dropPoisonGeneratingFlags();
+ }
+ // Remember the expanded value for this SCEV at this location.
+ //
+ // This is independent of PostIncLoops. The mapped value simply materializes
+ // the expression at this insertion point. If the mapped value happened to be
+ // a postinc expansion, it could be reused by a non-postinc user, but only if
+ // its insertion point was already at the head of the loop.
+ InsertedExpressions[std::make_pair(S, InsertPt)] = V;
+ return V;
+}
+
+void SCEVExpander::rememberInstruction(Value *I) {
+ auto DoInsert = [this](Value *V) {
+ if (!PostIncLoops.empty())
+ InsertedPostIncValues.insert(V);
+ else
+ InsertedValues.insert(V);
+ };
+ DoInsert(I);
+}
+
+/// replaceCongruentIVs - Check for congruent phis in this loop header and
+/// replace them with their most canonical representative. Return the number of
+/// phis eliminated.
+///
+/// This does not depend on any SCEVExpander state but should be used in
+/// the same context that SCEVExpander is used.
+unsigned
+SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
+ SmallVectorImpl<WeakTrackingVH> &DeadInsts,
+ const TargetTransformInfo *TTI) {
+ // Find integer phis in order of increasing width.
+ SmallVector<PHINode*, 8> Phis;
+ for (PHINode &PN : L->getHeader()->phis())
+ Phis.push_back(&PN);
+
+ if (TTI)
+ // Use stable_sort to preserve order of equivalent PHIs, so the order
+ // of the sorted Phis is the same from run to run on the same loop.
+ llvm::stable_sort(Phis, [](Value *LHS, Value *RHS) {
+ // Put pointers at the back and make sure pointer < pointer = false.
+ if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy())
+ return RHS->getType()->isIntegerTy() && !LHS->getType()->isIntegerTy();
+ return RHS->getType()->getPrimitiveSizeInBits().getFixedValue() <
+ LHS->getType()->getPrimitiveSizeInBits().getFixedValue();
+ });
+
+ unsigned NumElim = 0;
+ DenseMap<const SCEV *, PHINode *> ExprToIVMap;
+ // Process phis from wide to narrow. Map wide phis to their truncation
+ // so narrow phis can reuse them.
+ for (PHINode *Phi : Phis) {
+ auto SimplifyPHINode = [&](PHINode *PN) -> Value * {
+ if (Value *V = simplifyInstruction(PN, {DL, &SE.TLI, &SE.DT, &SE.AC}))
+ return V;
+ if (!SE.isSCEVable(PN->getType()))
+ return nullptr;
+ auto *Const = dyn_cast<SCEVConstant>(SE.getSCEV(PN));
+ if (!Const)
+ return nullptr;
+ return Const->getValue();
+ };
+
+ // Fold constant phis. They may be congruent to other constant phis and
+ // would confuse the logic below that expects proper IVs.
+ if (Value *V = SimplifyPHINode(Phi)) {
+ if (V->getType() != Phi->getType())
+ continue;
+ SE.forgetValue(Phi);
+ Phi->replaceAllUsesWith(V);
+ DeadInsts.emplace_back(Phi);
+ ++NumElim;
+ SCEV_DEBUG_WITH_TYPE(DebugType,
+ dbgs() << "INDVARS: Eliminated constant iv: " << *Phi
+ << '\n');
+ continue;
+ }
+
+ if (!SE.isSCEVable(Phi->getType()))
+ continue;
+
+ PHINode *&OrigPhiRef = ExprToIVMap[SE.getSCEV(Phi)];
+ if (!OrigPhiRef) {
+ OrigPhiRef = Phi;
+ if (Phi->getType()->isIntegerTy() && TTI &&
+ TTI->isTruncateFree(Phi->getType(), Phis.back()->getType())) {
+ // This phi can be freely truncated to the narrowest phi type. Map the
+ // truncated expression to it so it will be reused for narrow types.
+ const SCEV *TruncExpr =
+ SE.getTruncateExpr(SE.getSCEV(Phi), Phis.back()->getType());
+ ExprToIVMap[TruncExpr] = Phi;
+ }
+ continue;
+ }
+
+ // Replacing a pointer phi with an integer phi or vice-versa doesn't make
+ // sense.
+ if (OrigPhiRef->getType()->isPointerTy() != Phi->getType()->isPointerTy())
+ continue;
+
+ if (BasicBlock *LatchBlock = L->getLoopLatch()) {
+ Instruction *OrigInc = dyn_cast<Instruction>(
+ OrigPhiRef->getIncomingValueForBlock(LatchBlock));
+ Instruction *IsomorphicInc =
+ dyn_cast<Instruction>(Phi->getIncomingValueForBlock(LatchBlock));
+
+ if (OrigInc && IsomorphicInc) {
+ // If this phi has the same width but is more canonical, replace the
+ // original with it. As part of the "more canonical" determination,
+ // respect a prior decision to use an IV chain.
+ if (OrigPhiRef->getType() == Phi->getType() &&
+ !(ChainedPhis.count(Phi) ||
+ isExpandedAddRecExprPHI(OrigPhiRef, OrigInc, L)) &&
+ (ChainedPhis.count(Phi) ||
+ isExpandedAddRecExprPHI(Phi, IsomorphicInc, L))) {
+ std::swap(OrigPhiRef, Phi);
+ std::swap(OrigInc, IsomorphicInc);
+ }
+ // Replacing the congruent phi is sufficient because acyclic
+ // redundancy elimination, CSE/GVN, should handle the
+ // rest. However, once SCEV proves that a phi is congruent,
+ // it's often the head of an IV user cycle that is isomorphic
+ // with the original phi. It's worth eagerly cleaning up the
+ // common case of a single IV increment so that DeleteDeadPHIs
+ // can remove cycles that had postinc uses.
+ // Because we may potentially introduce a new use of OrigIV that didn't
+ // exist before at this point, its poison flags need readjustment.
+ const SCEV *TruncExpr =
+ SE.getTruncateOrNoop(SE.getSCEV(OrigInc), IsomorphicInc->getType());
+ if (OrigInc != IsomorphicInc &&
+ TruncExpr == SE.getSCEV(IsomorphicInc) &&
+ SE.LI.replacementPreservesLCSSAForm(IsomorphicInc, OrigInc) &&
+ hoistIVInc(OrigInc, IsomorphicInc, /*RecomputePoisonFlags*/ true)) {
+ SCEV_DEBUG_WITH_TYPE(
+ DebugType, dbgs() << "INDVARS: Eliminated congruent iv.inc: "
+ << *IsomorphicInc << '\n');
+ Value *NewInc = OrigInc;
+ if (OrigInc->getType() != IsomorphicInc->getType()) {
+ Instruction *IP = nullptr;
+ if (PHINode *PN = dyn_cast<PHINode>(OrigInc))
+ IP = &*PN->getParent()->getFirstInsertionPt();
+ else
+ IP = OrigInc->getNextNode();
+
+ IRBuilder<> Builder(IP);
+ Builder.SetCurrentDebugLocation(IsomorphicInc->getDebugLoc());
+ NewInc = Builder.CreateTruncOrBitCast(
+ OrigInc, IsomorphicInc->getType(), IVName);
+ }
+ IsomorphicInc->replaceAllUsesWith(NewInc);
+ DeadInsts.emplace_back(IsomorphicInc);
+ }
+ }
+ }
+ SCEV_DEBUG_WITH_TYPE(DebugType,
+ dbgs() << "INDVARS: Eliminated congruent iv: " << *Phi
+ << '\n');
+ SCEV_DEBUG_WITH_TYPE(
+ DebugType, dbgs() << "INDVARS: Original iv: " << *OrigPhiRef << '\n');
+ ++NumElim;
+ Value *NewIV = OrigPhiRef;
+ if (OrigPhiRef->getType() != Phi->getType()) {
+ IRBuilder<> Builder(&*L->getHeader()->getFirstInsertionPt());
+ Builder.SetCurrentDebugLocation(Phi->getDebugLoc());
+ NewIV = Builder.CreateTruncOrBitCast(OrigPhiRef, Phi->getType(), IVName);
+ }
+ Phi->replaceAllUsesWith(NewIV);
+ DeadInsts.emplace_back(Phi);
+ }
+ return NumElim;
+}
+
+Value *SCEVExpander::getRelatedExistingExpansion(const SCEV *S,
+ const Instruction *At,
+ Loop *L) {
+ using namespace llvm::PatternMatch;
+
+ SmallVector<BasicBlock *, 4> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+
+ // Look for suitable value in simple conditions at the loop exits.
+ for (BasicBlock *BB : ExitingBlocks) {
+ ICmpInst::Predicate Pred;
+ Instruction *LHS, *RHS;
+
+ if (!match(BB->getTerminator(),
+ m_Br(m_ICmp(Pred, m_Instruction(LHS), m_Instruction(RHS)),
+ m_BasicBlock(), m_BasicBlock())))
+ continue;
+
+ if (SE.getSCEV(LHS) == S && SE.DT.dominates(LHS, At))
+ return LHS;
+
+ if (SE.getSCEV(RHS) == S && SE.DT.dominates(RHS, At))
+ return RHS;
+ }
+
+ // Use expand's logic which is used for reusing a previous Value in
+ // ExprValueMap. Note that we don't currently model the cost of
+ // needing to drop poison generating flags on the instruction if we
+ // want to reuse it. We effectively assume that has zero cost.
+ return FindValueInExprValueMap(S, At);
+}
+
+template<typename T> static InstructionCost costAndCollectOperands(
+ const SCEVOperand &WorkItem, const TargetTransformInfo &TTI,
+ TargetTransformInfo::TargetCostKind CostKind,
+ SmallVectorImpl<SCEVOperand> &Worklist) {
+
+ const T *S = cast<T>(WorkItem.S);
+ InstructionCost Cost = 0;
+ // Object to help map SCEV operands to expanded IR instructions.
+ struct OperationIndices {
+ OperationIndices(unsigned Opc, size_t min, size_t max) :
+ Opcode(Opc), MinIdx(min), MaxIdx(max) { }
+ unsigned Opcode;
+ size_t MinIdx;
+ size_t MaxIdx;
+ };
+
+ // Collect the operations of all the instructions that will be needed to
+ // expand the SCEVExpr. This is so that when we come to cost the operands,
+ // we know what the generated user(s) will be.
+ SmallVector<OperationIndices, 2> Operations;
+
+ auto CastCost = [&](unsigned Opcode) -> InstructionCost {
+ Operations.emplace_back(Opcode, 0, 0);
+ return TTI.getCastInstrCost(Opcode, S->getType(),
+ S->getOperand(0)->getType(),
+ TTI::CastContextHint::None, CostKind);
+ };
+
+ auto ArithCost = [&](unsigned Opcode, unsigned NumRequired,
+ unsigned MinIdx = 0,
+ unsigned MaxIdx = 1) -> InstructionCost {
+ Operations.emplace_back(Opcode, MinIdx, MaxIdx);
+ return NumRequired *
+ TTI.getArithmeticInstrCost(Opcode, S->getType(), CostKind);
+ };
+
+ auto CmpSelCost = [&](unsigned Opcode, unsigned NumRequired, unsigned MinIdx,
+ unsigned MaxIdx) -> InstructionCost {
+ Operations.emplace_back(Opcode, MinIdx, MaxIdx);
+ Type *OpType = S->getType();
+ return NumRequired * TTI.getCmpSelInstrCost(
+ Opcode, OpType, CmpInst::makeCmpResultType(OpType),
+ CmpInst::BAD_ICMP_PREDICATE, CostKind);
+ };
+
+ switch (S->getSCEVType()) {
+ case scCouldNotCompute:
+ llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
+ case scUnknown:
+ case scConstant:
+ return 0;
+ case scPtrToInt:
+ Cost = CastCost(Instruction::PtrToInt);
+ break;
+ case scTruncate:
+ Cost = CastCost(Instruction::Trunc);
+ break;
+ case scZeroExtend:
+ Cost = CastCost(Instruction::ZExt);
+ break;
+ case scSignExtend:
+ Cost = CastCost(Instruction::SExt);
+ break;
+ case scUDivExpr: {
+ unsigned Opcode = Instruction::UDiv;
+ if (auto *SC = dyn_cast<SCEVConstant>(S->getOperand(1)))
+ if (SC->getAPInt().isPowerOf2())
+ Opcode = Instruction::LShr;
+ Cost = ArithCost(Opcode, 1);
+ break;
+ }
+ case scAddExpr:
+ Cost = ArithCost(Instruction::Add, S->getNumOperands() - 1);
+ break;
+ case scMulExpr:
+ // TODO: this is a very pessimistic cost modelling for Mul,
+ // because of Bin Pow algorithm actually used by the expander,
+ // see SCEVExpander::visitMulExpr(), ExpandOpBinPowN().
+ Cost = ArithCost(Instruction::Mul, S->getNumOperands() - 1);
+ break;
+ case scSMaxExpr:
+ case scUMaxExpr:
+ case scSMinExpr:
+ case scUMinExpr:
+ case scSequentialUMinExpr: {
+ // FIXME: should this ask the cost for Intrinsic's?
+ // The reduction tree.
+ Cost += CmpSelCost(Instruction::ICmp, S->getNumOperands() - 1, 0, 1);
+ Cost += CmpSelCost(Instruction::Select, S->getNumOperands() - 1, 0, 2);
+ switch (S->getSCEVType()) {
+ case scSequentialUMinExpr: {
+ // The safety net against poison.
+ // FIXME: this is broken.
+ Cost += CmpSelCost(Instruction::ICmp, S->getNumOperands() - 1, 0, 0);
+ Cost += ArithCost(Instruction::Or,
+ S->getNumOperands() > 2 ? S->getNumOperands() - 2 : 0);
+ Cost += CmpSelCost(Instruction::Select, 1, 0, 1);
+ break;
+ }
+ default:
+ assert(!isa<SCEVSequentialMinMaxExpr>(S) &&
+ "Unhandled SCEV expression type?");
+ break;
+ }
+ break;
+ }
+ case scAddRecExpr: {
+ // In this polynominal, we may have some zero operands, and we shouldn't
+ // really charge for those. So how many non-zero coefficients are there?
+ int NumTerms = llvm::count_if(S->operands(), [](const SCEV *Op) {
+ return !Op->isZero();
+ });
+
+ assert(NumTerms >= 1 && "Polynominal should have at least one term.");
+ assert(!(*std::prev(S->operands().end()))->isZero() &&
+ "Last operand should not be zero");
+
+ // Ignoring constant term (operand 0), how many of the coefficients are u> 1?
+ int NumNonZeroDegreeNonOneTerms =
+ llvm::count_if(S->operands(), [](const SCEV *Op) {
+ auto *SConst = dyn_cast<SCEVConstant>(Op);
+ return !SConst || SConst->getAPInt().ugt(1);
+ });
+
+ // Much like with normal add expr, the polynominal will require
+ // one less addition than the number of it's terms.
+ InstructionCost AddCost = ArithCost(Instruction::Add, NumTerms - 1,
+ /*MinIdx*/ 1, /*MaxIdx*/ 1);
+ // Here, *each* one of those will require a multiplication.
+ InstructionCost MulCost =
+ ArithCost(Instruction::Mul, NumNonZeroDegreeNonOneTerms);
+ Cost = AddCost + MulCost;
+
+ // What is the degree of this polynominal?
+ int PolyDegree = S->getNumOperands() - 1;
+ assert(PolyDegree >= 1 && "Should be at least affine.");
+
+ // The final term will be:
+ // Op_{PolyDegree} * x ^ {PolyDegree}
+ // Where x ^ {PolyDegree} will again require PolyDegree-1 mul operations.
+ // Note that x ^ {PolyDegree} = x * x ^ {PolyDegree-1} so charging for
+ // x ^ {PolyDegree} will give us x ^ {2} .. x ^ {PolyDegree-1} for free.
+ // FIXME: this is conservatively correct, but might be overly pessimistic.
+ Cost += MulCost * (PolyDegree - 1);
+ break;
+ }
+ }
+
+ for (auto &CostOp : Operations) {
+ for (auto SCEVOp : enumerate(S->operands())) {
+ // Clamp the index to account for multiple IR operations being chained.
+ size_t MinIdx = std::max(SCEVOp.index(), CostOp.MinIdx);
+ size_t OpIdx = std::min(MinIdx, CostOp.MaxIdx);
+ Worklist.emplace_back(CostOp.Opcode, OpIdx, SCEVOp.value());
+ }
+ }
+ return Cost;
+}
+
+bool SCEVExpander::isHighCostExpansionHelper(
+ const SCEVOperand &WorkItem, Loop *L, const Instruction &At,
+ InstructionCost &Cost, unsigned Budget, const TargetTransformInfo &TTI,
+ SmallPtrSetImpl<const SCEV *> &Processed,
+ SmallVectorImpl<SCEVOperand> &Worklist) {
+ if (Cost > Budget)
+ return true; // Already run out of budget, give up.
+
+ const SCEV *S = WorkItem.S;
+ // Was the cost of expansion of this expression already accounted for?
+ if (!isa<SCEVConstant>(S) && !Processed.insert(S).second)
+ return false; // We have already accounted for this expression.
+
+ // If we can find an existing value for this scev available at the point "At"
+ // then consider the expression cheap.
+ if (getRelatedExistingExpansion(S, &At, L))
+ return false; // Consider the expression to be free.
+
+ TargetTransformInfo::TargetCostKind CostKind =
+ L->getHeader()->getParent()->hasMinSize()
+ ? TargetTransformInfo::TCK_CodeSize
+ : TargetTransformInfo::TCK_RecipThroughput;
+
+ switch (S->getSCEVType()) {
+ case scCouldNotCompute:
+ llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
+ case scUnknown:
+ // Assume to be zero-cost.
+ return false;
+ case scConstant: {
+ // Only evalulate the costs of constants when optimizing for size.
+ if (CostKind != TargetTransformInfo::TCK_CodeSize)
+ return false;
+ const APInt &Imm = cast<SCEVConstant>(S)->getAPInt();
+ Type *Ty = S->getType();
+ Cost += TTI.getIntImmCostInst(
+ WorkItem.ParentOpcode, WorkItem.OperandIdx, Imm, Ty, CostKind);
+ return Cost > Budget;
+ }
+ case scTruncate:
+ case scPtrToInt:
+ case scZeroExtend:
+ case scSignExtend: {
+ Cost +=
+ costAndCollectOperands<SCEVCastExpr>(WorkItem, TTI, CostKind, Worklist);
+ return false; // Will answer upon next entry into this function.
+ }
+ case scUDivExpr: {
+ // UDivExpr is very likely a UDiv that ScalarEvolution's HowFarToZero or
+ // HowManyLessThans produced to compute a precise expression, rather than a
+ // UDiv from the user's code. If we can't find a UDiv in the code with some
+ // simple searching, we need to account for it's cost.
+
+ // At the beginning of this function we already tried to find existing
+ // value for plain 'S'. Now try to lookup 'S + 1' since it is common
+ // pattern involving division. This is just a simple search heuristic.
+ if (getRelatedExistingExpansion(
+ SE.getAddExpr(S, SE.getConstant(S->getType(), 1)), &At, L))
+ return false; // Consider it to be free.
+
+ Cost +=
+ costAndCollectOperands<SCEVUDivExpr>(WorkItem, TTI, CostKind, Worklist);
+ return false; // Will answer upon next entry into this function.
+ }
+ case scAddExpr:
+ case scMulExpr:
+ case scUMaxExpr:
+ case scSMaxExpr:
+ case scUMinExpr:
+ case scSMinExpr:
+ case scSequentialUMinExpr: {
+ assert(cast<SCEVNAryExpr>(S)->getNumOperands() > 1 &&
+ "Nary expr should have more than 1 operand.");
+ // The simple nary expr will require one less op (or pair of ops)
+ // than the number of it's terms.
+ Cost +=
+ costAndCollectOperands<SCEVNAryExpr>(WorkItem, TTI, CostKind, Worklist);
+ return Cost > Budget;
+ }
+ case scAddRecExpr: {
+ assert(cast<SCEVAddRecExpr>(S)->getNumOperands() >= 2 &&
+ "Polynomial should be at least linear");
+ Cost += costAndCollectOperands<SCEVAddRecExpr>(
+ WorkItem, TTI, CostKind, Worklist);
+ return Cost > Budget;
+ }
+ }
+ llvm_unreachable("Unknown SCEV kind!");
+}
+
+Value *SCEVExpander::expandCodeForPredicate(const SCEVPredicate *Pred,
+ Instruction *IP) {
+ assert(IP);
+ switch (Pred->getKind()) {
+ case SCEVPredicate::P_Union:
+ return expandUnionPredicate(cast<SCEVUnionPredicate>(Pred), IP);
+ case SCEVPredicate::P_Compare:
+ return expandComparePredicate(cast<SCEVComparePredicate>(Pred), IP);
+ case SCEVPredicate::P_Wrap: {
+ auto *AddRecPred = cast<SCEVWrapPredicate>(Pred);
+ return expandWrapPredicate(AddRecPred, IP);
+ }
+ }
+ llvm_unreachable("Unknown SCEV predicate type");
+}
+
+Value *SCEVExpander::expandComparePredicate(const SCEVComparePredicate *Pred,
+ Instruction *IP) {
+ Value *Expr0 =
+ expandCodeForImpl(Pred->getLHS(), Pred->getLHS()->getType(), IP);
+ Value *Expr1 =
+ expandCodeForImpl(Pred->getRHS(), Pred->getRHS()->getType(), IP);
+
+ Builder.SetInsertPoint(IP);
+ auto InvPred = ICmpInst::getInversePredicate(Pred->getPredicate());
+ auto *I = Builder.CreateICmp(InvPred, Expr0, Expr1, "ident.check");
+ return I;
+}
+
+Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
+ Instruction *Loc, bool Signed) {
+ assert(AR->isAffine() && "Cannot generate RT check for "
+ "non-affine expression");
+
+ // FIXME: It is highly suspicious that we're ignoring the predicates here.
+ SmallVector<const SCEVPredicate *, 4> Pred;
+ const SCEV *ExitCount =
+ SE.getPredicatedBackedgeTakenCount(AR->getLoop(), Pred);
+
+ assert(!isa<SCEVCouldNotCompute>(ExitCount) && "Invalid loop count");
+
+ const SCEV *Step = AR->getStepRecurrence(SE);
+ const SCEV *Start = AR->getStart();
+
+ Type *ARTy = AR->getType();
+ unsigned SrcBits = SE.getTypeSizeInBits(ExitCount->getType());
+ unsigned DstBits = SE.getTypeSizeInBits(ARTy);
+
+ // The expression {Start,+,Step} has nusw/nssw if
+ // Step < 0, Start - |Step| * Backedge <= Start
+ // Step >= 0, Start + |Step| * Backedge > Start
+ // and |Step| * Backedge doesn't unsigned overflow.
+
+ IntegerType *CountTy = IntegerType::get(Loc->getContext(), SrcBits);
+ Builder.SetInsertPoint(Loc);
+ Value *TripCountVal = expandCodeForImpl(ExitCount, CountTy, Loc);
+
+ IntegerType *Ty =
+ IntegerType::get(Loc->getContext(), SE.getTypeSizeInBits(ARTy));
+
+ Value *StepValue = expandCodeForImpl(Step, Ty, Loc);
+ Value *NegStepValue =
+ expandCodeForImpl(SE.getNegativeSCEV(Step), Ty, Loc);
+ Value *StartValue = expandCodeForImpl(Start, ARTy, Loc);
+
+ ConstantInt *Zero =
+ ConstantInt::get(Loc->getContext(), APInt::getZero(DstBits));
+
+ Builder.SetInsertPoint(Loc);
+ // Compute |Step|
+ Value *StepCompare = Builder.CreateICmp(ICmpInst::ICMP_SLT, StepValue, Zero);
+ Value *AbsStep = Builder.CreateSelect(StepCompare, NegStepValue, StepValue);
+
+ // Compute |Step| * Backedge
+ // Compute:
+ // 1. Start + |Step| * Backedge < Start
+ // 2. Start - |Step| * Backedge > Start
+ //
+ // And select either 1. or 2. depending on whether step is positive or
+ // negative. If Step is known to be positive or negative, only create
+ // either 1. or 2.
+ auto ComputeEndCheck = [&]() -> Value * {
+ // Checking <u 0 is always false.
+ if (!Signed && Start->isZero() && SE.isKnownPositive(Step))
+ return ConstantInt::getFalse(Loc->getContext());
+
+ // Get the backedge taken count and truncate or extended to the AR type.
+ Value *TruncTripCount = Builder.CreateZExtOrTrunc(TripCountVal, Ty);
+
+ Value *MulV, *OfMul;
+ if (Step->isOne()) {
+ // Special-case Step of one. Potentially-costly `umul_with_overflow` isn't
+ // needed, there is never an overflow, so to avoid artificially inflating
+ // the cost of the check, directly emit the optimized IR.
+ MulV = TruncTripCount;
+ OfMul = ConstantInt::getFalse(MulV->getContext());
+ } else {
+ auto *MulF = Intrinsic::getDeclaration(Loc->getModule(),
+ Intrinsic::umul_with_overflow, Ty);
+ CallInst *Mul =
+ Builder.CreateCall(MulF, {AbsStep, TruncTripCount}, "mul");
+ MulV = Builder.CreateExtractValue(Mul, 0, "mul.result");
+ OfMul = Builder.CreateExtractValue(Mul, 1, "mul.overflow");
+ }
+
+ Value *Add = nullptr, *Sub = nullptr;
+ bool NeedPosCheck = !SE.isKnownNegative(Step);
+ bool NeedNegCheck = !SE.isKnownPositive(Step);
+
+ if (PointerType *ARPtrTy = dyn_cast<PointerType>(ARTy)) {
+ StartValue = InsertNoopCastOfTo(
+ StartValue, Builder.getInt8PtrTy(ARPtrTy->getAddressSpace()));
+ Value *NegMulV = Builder.CreateNeg(MulV);
+ if (NeedPosCheck)
+ Add = Builder.CreateGEP(Builder.getInt8Ty(), StartValue, MulV);
+ if (NeedNegCheck)
+ Sub = Builder.CreateGEP(Builder.getInt8Ty(), StartValue, NegMulV);
+ } else {
+ if (NeedPosCheck)
+ Add = Builder.CreateAdd(StartValue, MulV);
+ if (NeedNegCheck)
+ Sub = Builder.CreateSub(StartValue, MulV);
+ }
+
+ Value *EndCompareLT = nullptr;
+ Value *EndCompareGT = nullptr;
+ Value *EndCheck = nullptr;
+ if (NeedPosCheck)
+ EndCheck = EndCompareLT = Builder.CreateICmp(
+ Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, Add, StartValue);
+ if (NeedNegCheck)
+ EndCheck = EndCompareGT = Builder.CreateICmp(
+ Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT, Sub, StartValue);
+ if (NeedPosCheck && NeedNegCheck) {
+ // Select the answer based on the sign of Step.
+ EndCheck = Builder.CreateSelect(StepCompare, EndCompareGT, EndCompareLT);
+ }
+ return Builder.CreateOr(EndCheck, OfMul);
+ };
+ Value *EndCheck = ComputeEndCheck();
+
+ // If the backedge taken count type is larger than the AR type,
+ // check that we don't drop any bits by truncating it. If we are
+ // dropping bits, then we have overflow (unless the step is zero).
+ if (SE.getTypeSizeInBits(CountTy) > SE.getTypeSizeInBits(Ty)) {
+ auto MaxVal = APInt::getMaxValue(DstBits).zext(SrcBits);
+ auto *BackedgeCheck =
+ Builder.CreateICmp(ICmpInst::ICMP_UGT, TripCountVal,
+ ConstantInt::get(Loc->getContext(), MaxVal));
+ BackedgeCheck = Builder.CreateAnd(
+ BackedgeCheck, Builder.CreateICmp(ICmpInst::ICMP_NE, StepValue, Zero));
+
+ EndCheck = Builder.CreateOr(EndCheck, BackedgeCheck);
+ }
+
+ return EndCheck;
+}
+
+Value *SCEVExpander::expandWrapPredicate(const SCEVWrapPredicate *Pred,
+ Instruction *IP) {
+ const auto *A = cast<SCEVAddRecExpr>(Pred->getExpr());
+ Value *NSSWCheck = nullptr, *NUSWCheck = nullptr;
+
+ // Add a check for NUSW
+ if (Pred->getFlags() & SCEVWrapPredicate::IncrementNUSW)
+ NUSWCheck = generateOverflowCheck(A, IP, false);
+
+ // Add a check for NSSW
+ if (Pred->getFlags() & SCEVWrapPredicate::IncrementNSSW)
+ NSSWCheck = generateOverflowCheck(A, IP, true);
+
+ if (NUSWCheck && NSSWCheck)
+ return Builder.CreateOr(NUSWCheck, NSSWCheck);
+
+ if (NUSWCheck)
+ return NUSWCheck;
+
+ if (NSSWCheck)
+ return NSSWCheck;
+
+ return ConstantInt::getFalse(IP->getContext());
+}
+
+Value *SCEVExpander::expandUnionPredicate(const SCEVUnionPredicate *Union,
+ Instruction *IP) {
+ // Loop over all checks in this set.
+ SmallVector<Value *> Checks;
+ for (const auto *Pred : Union->getPredicates()) {
+ Checks.push_back(expandCodeForPredicate(Pred, IP));
+ Builder.SetInsertPoint(IP);
+ }
+
+ if (Checks.empty())
+ return ConstantInt::getFalse(IP->getContext());
+ return Builder.CreateOr(Checks);
+}
+
+Value *SCEVExpander::fixupLCSSAFormFor(Value *V) {
+ auto *DefI = dyn_cast<Instruction>(V);
+ if (!PreserveLCSSA || !DefI)
+ return V;
+
+ Instruction *InsertPt = &*Builder.GetInsertPoint();
+ Loop *DefLoop = SE.LI.getLoopFor(DefI->getParent());
+ Loop *UseLoop = SE.LI.getLoopFor(InsertPt->getParent());
+ if (!DefLoop || UseLoop == DefLoop || DefLoop->contains(UseLoop))
+ return V;
+
+ // Create a temporary instruction to at the current insertion point, so we
+ // can hand it off to the helper to create LCSSA PHIs if required for the
+ // new use.
+ // FIXME: Ideally formLCSSAForInstructions (used in fixupLCSSAFormFor)
+ // would accept a insertion point and return an LCSSA phi for that
+ // insertion point, so there is no need to insert & remove the temporary
+ // instruction.
+ Type *ToTy;
+ if (DefI->getType()->isIntegerTy())
+ ToTy = DefI->getType()->getPointerTo();
+ else
+ ToTy = Type::getInt32Ty(DefI->getContext());
+ Instruction *User =
+ CastInst::CreateBitOrPointerCast(DefI, ToTy, "tmp.lcssa.user", InsertPt);
+ auto RemoveUserOnExit =
+ make_scope_exit([User]() { User->eraseFromParent(); });
+
+ SmallVector<Instruction *, 1> ToUpdate;
+ ToUpdate.push_back(DefI);
+ SmallVector<PHINode *, 16> PHIsToRemove;
+ formLCSSAForInstructions(ToUpdate, SE.DT, SE.LI, &SE, Builder, &PHIsToRemove);
+ for (PHINode *PN : PHIsToRemove) {
+ if (!PN->use_empty())
+ continue;
+ InsertedValues.erase(PN);
+ InsertedPostIncValues.erase(PN);
+ PN->eraseFromParent();
+ }
+
+ return User->getOperand(0);
+}
+
+namespace {
+// Search for a SCEV subexpression that is not safe to expand. Any expression
+// that may expand to a !isSafeToSpeculativelyExecute value is unsafe, namely
+// UDiv expressions. We don't know if the UDiv is derived from an IR divide
+// instruction, but the important thing is that we prove the denominator is
+// nonzero before expansion.
+//
+// IVUsers already checks that IV-derived expressions are safe. So this check is
+// only needed when the expression includes some subexpression that is not IV
+// derived.
+//
+// Currently, we only allow division by a value provably non-zero here.
+//
+// We cannot generally expand recurrences unless the step dominates the loop
+// header. The expander handles the special case of affine recurrences by
+// scaling the recurrence outside the loop, but this technique isn't generally
+// applicable. Expanding a nested recurrence outside a loop requires computing
+// binomial coefficients. This could be done, but the recurrence has to be in a
+// perfectly reduced form, which can't be guaranteed.
+struct SCEVFindUnsafe {
+ ScalarEvolution &SE;
+ bool CanonicalMode;
+ bool IsUnsafe = false;
+
+ SCEVFindUnsafe(ScalarEvolution &SE, bool CanonicalMode)
+ : SE(SE), CanonicalMode(CanonicalMode) {}
+
+ bool follow(const SCEV *S) {
+ if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
+ if (!SE.isKnownNonZero(D->getRHS())) {
+ IsUnsafe = true;
+ return false;
+ }
+ }
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+ const SCEV *Step = AR->getStepRecurrence(SE);
+ if (!AR->isAffine() && !SE.dominates(Step, AR->getLoop()->getHeader())) {
+ IsUnsafe = true;
+ return false;
+ }
+
+ // For non-affine addrecs or in non-canonical mode we need a preheader
+ // to insert into.
+ if (!AR->getLoop()->getLoopPreheader() &&
+ (!CanonicalMode || !AR->isAffine())) {
+ IsUnsafe = true;
+ return false;
+ }
+ }
+ return true;
+ }
+ bool isDone() const { return IsUnsafe; }
+};
+} // namespace
+
+bool SCEVExpander::isSafeToExpand(const SCEV *S) const {
+ SCEVFindUnsafe Search(SE, CanonicalMode);
+ visitAll(S, Search);
+ return !Search.IsUnsafe;
+}
+
+bool SCEVExpander::isSafeToExpandAt(const SCEV *S,
+ const Instruction *InsertionPoint) const {
+ if (!isSafeToExpand(S))
+ return false;
+ // We have to prove that the expanded site of S dominates InsertionPoint.
+ // This is easy when not in the same block, but hard when S is an instruction
+ // to be expanded somewhere inside the same block as our insertion point.
+ // What we really need here is something analogous to an OrderedBasicBlock,
+ // but for the moment, we paper over the problem by handling two common and
+ // cheap to check cases.
+ if (SE.properlyDominates(S, InsertionPoint->getParent()))
+ return true;
+ if (SE.dominates(S, InsertionPoint->getParent())) {
+ if (InsertionPoint->getParent()->getTerminator() == InsertionPoint)
+ return true;
+ if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S))
+ if (llvm::is_contained(InsertionPoint->operand_values(), U->getValue()))
+ return true;
+ }
+ return false;
+}
+
+void SCEVExpanderCleaner::cleanup() {
+ // Result is used, nothing to remove.
+ if (ResultUsed)
+ return;
+
+ auto InsertedInstructions = Expander.getAllInsertedInstructions();
+#ifndef NDEBUG
+ SmallPtrSet<Instruction *, 8> InsertedSet(InsertedInstructions.begin(),
+ InsertedInstructions.end());
+ (void)InsertedSet;
+#endif
+ // Remove sets with value handles.
+ Expander.clear();
+
+ // Remove all inserted instructions.
+ for (Instruction *I : reverse(InsertedInstructions)) {
+#ifndef NDEBUG
+ assert(all_of(I->users(),
+ [&InsertedSet](Value *U) {
+ return InsertedSet.contains(cast<Instruction>(U));
+ }) &&
+ "removed instruction should only be used by instructions inserted "
+ "during expansion");
+#endif
+ assert(!I->getType()->isVoidTy() &&
+ "inserted instruction should have non-void types");
+ I->replaceAllUsesWith(PoisonValue::get(I->getType()));
+ I->eraseFromParent();
+ }
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/SimplifyCFG.cpp
new file mode 100644
index 0000000000..9e0483966d
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -0,0 +1,7341 @@
+//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Peephole optimize the CFG.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/Sequence.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/GuardUtils.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/ConstantRange.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/NoFolder.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/ProfDataUtils.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/IR/ValueHandle.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/KnownBits.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+#include <algorithm>
+#include <cassert>
+#include <climits>
+#include <cstddef>
+#include <cstdint>
+#include <iterator>
+#include <map>
+#include <optional>
+#include <set>
+#include <tuple>
+#include <utility>
+#include <vector>
+
+using namespace llvm;
+using namespace PatternMatch;
+
+#define DEBUG_TYPE "simplifycfg"
+
+cl::opt<bool> llvm::RequireAndPreserveDomTree(
+ "simplifycfg-require-and-preserve-domtree", cl::Hidden,
+
+ cl::desc("Temorary development switch used to gradually uplift SimplifyCFG "
+ "into preserving DomTree,"));
+
+// Chosen as 2 so as to be cheap, but still to have enough power to fold
+// a select, so the "clamp" idiom (of a min followed by a max) will be caught.
+// To catch this, we need to fold a compare and a select, hence '2' being the
+// minimum reasonable default.
+static cl::opt<unsigned> PHINodeFoldingThreshold(
+ "phi-node-folding-threshold", cl::Hidden, cl::init(2),
+ cl::desc(
+ "Control the amount of phi node folding to perform (default = 2)"));
+
+static cl::opt<unsigned> TwoEntryPHINodeFoldingThreshold(
+ "two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4),
+ cl::desc("Control the maximal total instruction cost that we are willing "
+ "to speculatively execute to fold a 2-entry PHI node into a "
+ "select (default = 4)"));
+
+static cl::opt<bool>
+ HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true),
+ cl::desc("Hoist common instructions up to the parent block"));
+
+static cl::opt<unsigned>
+ HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden,
+ cl::init(20),
+ cl::desc("Allow reordering across at most this many "
+ "instructions when hoisting"));
+
+static cl::opt<bool>
+ SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
+ cl::desc("Sink common instructions down to the end block"));
+
+static cl::opt<bool> HoistCondStores(
+ "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
+ cl::desc("Hoist conditional stores if an unconditional store precedes"));
+
+static cl::opt<bool> MergeCondStores(
+ "simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true),
+ cl::desc("Hoist conditional stores even if an unconditional store does not "
+ "precede - hoist multiple conditional stores into a single "
+ "predicated store"));
+
+static cl::opt<bool> MergeCondStoresAggressively(
+ "simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false),
+ cl::desc("When merging conditional stores, do so even if the resultant "
+ "basic blocks are unlikely to be if-converted as a result"));
+
+static cl::opt<bool> SpeculateOneExpensiveInst(
+ "speculate-one-expensive-inst", cl::Hidden, cl::init(true),
+ cl::desc("Allow exactly one expensive instruction to be speculatively "
+ "executed"));
+
+static cl::opt<unsigned> MaxSpeculationDepth(
+ "max-speculation-depth", cl::Hidden, cl::init(10),
+ cl::desc("Limit maximum recursion depth when calculating costs of "
+ "speculatively executed instructions"));
+
+static cl::opt<int>
+ MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden,
+ cl::init(10),
+ cl::desc("Max size of a block which is still considered "
+ "small enough to thread through"));
+
+// Two is chosen to allow one negation and a logical combine.
+static cl::opt<unsigned>
+ BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden,
+ cl::init(2),
+ cl::desc("Maximum cost of combining conditions when "
+ "folding branches"));
+
+static cl::opt<unsigned> BranchFoldToCommonDestVectorMultiplier(
+ "simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden,
+ cl::init(2),
+ cl::desc("Multiplier to apply to threshold when determining whether or not "
+ "to fold branch to common destination when vector operations are "
+ "present"));
+
+static cl::opt<bool> EnableMergeCompatibleInvokes(
+ "simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true),
+ cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"));
+
+static cl::opt<unsigned> MaxSwitchCasesPerResult(
+ "max-switch-cases-per-result", cl::Hidden, cl::init(16),
+ cl::desc("Limit cases to analyze when converting a switch to select"));
+
+STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
+STATISTIC(NumLinearMaps,
+ "Number of switch instructions turned into linear mapping");
+STATISTIC(NumLookupTables,
+ "Number of switch instructions turned into lookup tables");
+STATISTIC(
+ NumLookupTablesHoles,
+ "Number of switch instructions turned into lookup tables (holes checked)");
+STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares");
+STATISTIC(NumFoldValueComparisonIntoPredecessors,
+ "Number of value comparisons folded into predecessor basic blocks");
+STATISTIC(NumFoldBranchToCommonDest,
+ "Number of branches folded into predecessor basic block");
+STATISTIC(
+ NumHoistCommonCode,
+ "Number of common instruction 'blocks' hoisted up to the begin block");
+STATISTIC(NumHoistCommonInstrs,
+ "Number of common instructions hoisted up to the begin block");
+STATISTIC(NumSinkCommonCode,
+ "Number of common instruction 'blocks' sunk down to the end block");
+STATISTIC(NumSinkCommonInstrs,
+ "Number of common instructions sunk down to the end block");
+STATISTIC(NumSpeculations, "Number of speculative executed instructions");
+STATISTIC(NumInvokes,
+ "Number of invokes with empty resume blocks simplified into calls");
+STATISTIC(NumInvokesMerged, "Number of invokes that were merged together");
+STATISTIC(NumInvokeSetsFormed, "Number of invoke sets that were formed");
+
+namespace {
+
+// The first field contains the value that the switch produces when a certain
+// case group is selected, and the second field is a vector containing the
+// cases composing the case group.
+using SwitchCaseResultVectorTy =
+ SmallVector<std::pair<Constant *, SmallVector<ConstantInt *, 4>>, 2>;
+
+// The first field contains the phi node that generates a result of the switch
+// and the second field contains the value generated for a certain case in the
+// switch for that PHI.
+using SwitchCaseResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
+
+/// ValueEqualityComparisonCase - Represents a case of a switch.
+struct ValueEqualityComparisonCase {
+ ConstantInt *Value;
+ BasicBlock *Dest;
+
+ ValueEqualityComparisonCase(ConstantInt *Value, BasicBlock *Dest)
+ : Value(Value), Dest(Dest) {}
+
+ bool operator<(ValueEqualityComparisonCase RHS) const {
+ // Comparing pointers is ok as we only rely on the order for uniquing.
+ return Value < RHS.Value;
+ }
+
+ bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; }
+};
+
+class SimplifyCFGOpt {
+ const TargetTransformInfo &TTI;
+ DomTreeUpdater *DTU;
+ const DataLayout &DL;
+ ArrayRef<WeakVH> LoopHeaders;
+ const SimplifyCFGOptions &Options;
+ bool Resimplify;
+
+ Value *isValueEqualityComparison(Instruction *TI);
+ BasicBlock *GetValueEqualityComparisonCases(
+ Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases);
+ bool SimplifyEqualityComparisonWithOnlyPredecessor(Instruction *TI,
+ BasicBlock *Pred,
+ IRBuilder<> &Builder);
+ bool PerformValueComparisonIntoPredecessorFolding(Instruction *TI, Value *&CV,
+ Instruction *PTI,
+ IRBuilder<> &Builder);
+ bool FoldValueComparisonIntoPredecessors(Instruction *TI,
+ IRBuilder<> &Builder);
+
+ bool simplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
+ bool simplifySingleResume(ResumeInst *RI);
+ bool simplifyCommonResume(ResumeInst *RI);
+ bool simplifyCleanupReturn(CleanupReturnInst *RI);
+ bool simplifyUnreachable(UnreachableInst *UI);
+ bool simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
+ bool simplifyIndirectBr(IndirectBrInst *IBI);
+ bool simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder);
+ bool simplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder);
+ bool simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder);
+
+ bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
+ IRBuilder<> &Builder);
+
+ bool HoistThenElseCodeToIf(BranchInst *BI, const TargetTransformInfo &TTI,
+ bool EqTermsOnly);
+ bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
+ const TargetTransformInfo &TTI);
+ bool SimplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond,
+ BasicBlock *TrueBB, BasicBlock *FalseBB,
+ uint32_t TrueWeight, uint32_t FalseWeight);
+ bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
+ const DataLayout &DL);
+ bool SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select);
+ bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI);
+ bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder);
+
+public:
+ SimplifyCFGOpt(const TargetTransformInfo &TTI, DomTreeUpdater *DTU,
+ const DataLayout &DL, ArrayRef<WeakVH> LoopHeaders,
+ const SimplifyCFGOptions &Opts)
+ : TTI(TTI), DTU(DTU), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) {
+ assert((!DTU || !DTU->hasPostDomTree()) &&
+ "SimplifyCFG is not yet capable of maintaining validity of a "
+ "PostDomTree, so don't ask for it.");
+ }
+
+ bool simplifyOnce(BasicBlock *BB);
+ bool run(BasicBlock *BB);
+
+ // Helper to set Resimplify and return change indication.
+ bool requestResimplify() {
+ Resimplify = true;
+ return true;
+ }
+};
+
+} // end anonymous namespace
+
+/// Return true if all the PHI nodes in the basic block \p BB
+/// receive compatible (identical) incoming values when coming from
+/// all of the predecessor blocks that are specified in \p IncomingBlocks.
+///
+/// Note that if the values aren't exactly identical, but \p EquivalenceSet
+/// is provided, and *both* of the values are present in the set,
+/// then they are considered equal.
+static bool IncomingValuesAreCompatible(
+ BasicBlock *BB, ArrayRef<BasicBlock *> IncomingBlocks,
+ SmallPtrSetImpl<Value *> *EquivalenceSet = nullptr) {
+ assert(IncomingBlocks.size() == 2 &&
+ "Only for a pair of incoming blocks at the time!");
+
+ // FIXME: it is okay if one of the incoming values is an `undef` value,
+ // iff the other incoming value is guaranteed to be a non-poison value.
+ // FIXME: it is okay if one of the incoming values is a `poison` value.
+ return all_of(BB->phis(), [IncomingBlocks, EquivalenceSet](PHINode &PN) {
+ Value *IV0 = PN.getIncomingValueForBlock(IncomingBlocks[0]);
+ Value *IV1 = PN.getIncomingValueForBlock(IncomingBlocks[1]);
+ if (IV0 == IV1)
+ return true;
+ if (EquivalenceSet && EquivalenceSet->contains(IV0) &&
+ EquivalenceSet->contains(IV1))
+ return true;
+ return false;
+ });
+}
+
+/// Return true if it is safe to merge these two
+/// terminator instructions together.
+static bool
+SafeToMergeTerminators(Instruction *SI1, Instruction *SI2,
+ SmallSetVector<BasicBlock *, 4> *FailBlocks = nullptr) {
+ if (SI1 == SI2)
+ return false; // Can't merge with self!
+
+ // It is not safe to merge these two switch instructions if they have a common
+ // successor, and if that successor has a PHI node, and if *that* PHI node has
+ // conflicting incoming values from the two switch blocks.
+ BasicBlock *SI1BB = SI1->getParent();
+ BasicBlock *SI2BB = SI2->getParent();
+
+ SmallPtrSet<BasicBlock *, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB));
+ bool Fail = false;
+ for (BasicBlock *Succ : successors(SI2BB)) {
+ if (!SI1Succs.count(Succ))
+ continue;
+ if (IncomingValuesAreCompatible(Succ, {SI1BB, SI2BB}))
+ continue;
+ Fail = true;
+ if (FailBlocks)
+ FailBlocks->insert(Succ);
+ else
+ break;
+ }
+
+ return !Fail;
+}
+
+/// Update PHI nodes in Succ to indicate that there will now be entries in it
+/// from the 'NewPred' block. The values that will be flowing into the PHI nodes
+/// will be the same as those coming in from ExistPred, an existing predecessor
+/// of Succ.
+static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
+ BasicBlock *ExistPred,
+ MemorySSAUpdater *MSSAU = nullptr) {
+ for (PHINode &PN : Succ->phis())
+ PN.addIncoming(PN.getIncomingValueForBlock(ExistPred), NewPred);
+ if (MSSAU)
+ if (auto *MPhi = MSSAU->getMemorySSA()->getMemoryAccess(Succ))
+ MPhi->addIncoming(MPhi->getIncomingValueForBlock(ExistPred), NewPred);
+}
+
+/// Compute an abstract "cost" of speculating the given instruction,
+/// which is assumed to be safe to speculate. TCC_Free means cheap,
+/// TCC_Basic means less cheap, and TCC_Expensive means prohibitively
+/// expensive.
+static InstructionCost computeSpeculationCost(const User *I,
+ const TargetTransformInfo &TTI) {
+ assert((!isa<Instruction>(I) ||
+ isSafeToSpeculativelyExecute(cast<Instruction>(I))) &&
+ "Instruction is not safe to speculatively execute!");
+ return TTI.getInstructionCost(I, TargetTransformInfo::TCK_SizeAndLatency);
+}
+
+/// If we have a merge point of an "if condition" as accepted above,
+/// return true if the specified value dominates the block. We
+/// don't handle the true generality of domination here, just a special case
+/// which works well enough for us.
+///
+/// If AggressiveInsts is non-null, and if V does not dominate BB, we check to
+/// see if V (which must be an instruction) and its recursive operands
+/// that do not dominate BB have a combined cost lower than Budget and
+/// are non-trapping. If both are true, the instruction is inserted into the
+/// set and true is returned.
+///
+/// The cost for most non-trapping instructions is defined as 1 except for
+/// Select whose cost is 2.
+///
+/// After this function returns, Cost is increased by the cost of
+/// V plus its non-dominating operands. If that cost is greater than
+/// Budget, false is returned and Cost is undefined.
+static bool dominatesMergePoint(Value *V, BasicBlock *BB,
+ SmallPtrSetImpl<Instruction *> &AggressiveInsts,
+ InstructionCost &Cost,
+ InstructionCost Budget,
+ const TargetTransformInfo &TTI,
+ unsigned Depth = 0) {
+ // It is possible to hit a zero-cost cycle (phi/gep instructions for example),
+ // so limit the recursion depth.
+ // TODO: While this recursion limit does prevent pathological behavior, it
+ // would be better to track visited instructions to avoid cycles.
+ if (Depth == MaxSpeculationDepth)
+ return false;
+
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) {
+ // Non-instructions dominate all instructions and can be executed
+ // unconditionally.
+ return true;
+ }
+ BasicBlock *PBB = I->getParent();
+
+ // We don't want to allow weird loops that might have the "if condition" in
+ // the bottom of this block.
+ if (PBB == BB)
+ return false;
+
+ // If this instruction is defined in a block that contains an unconditional
+ // branch to BB, then it must be in the 'conditional' part of the "if
+ // statement". If not, it definitely dominates the region.
+ BranchInst *BI = dyn_cast<BranchInst>(PBB->getTerminator());
+ if (!BI || BI->isConditional() || BI->getSuccessor(0) != BB)
+ return true;
+
+ // If we have seen this instruction before, don't count it again.
+ if (AggressiveInsts.count(I))
+ return true;
+
+ // Okay, it looks like the instruction IS in the "condition". Check to
+ // see if it's a cheap instruction to unconditionally compute, and if it
+ // only uses stuff defined outside of the condition. If so, hoist it out.
+ if (!isSafeToSpeculativelyExecute(I))
+ return false;
+
+ Cost += computeSpeculationCost(I, TTI);
+
+ // Allow exactly one instruction to be speculated regardless of its cost
+ // (as long as it is safe to do so).
+ // This is intended to flatten the CFG even if the instruction is a division
+ // or other expensive operation. The speculation of an expensive instruction
+ // is expected to be undone in CodeGenPrepare if the speculation has not
+ // enabled further IR optimizations.
+ if (Cost > Budget &&
+ (!SpeculateOneExpensiveInst || !AggressiveInsts.empty() || Depth > 0 ||
+ !Cost.isValid()))
+ return false;
+
+ // Okay, we can only really hoist these out if their operands do
+ // not take us over the cost threshold.
+ for (Use &Op : I->operands())
+ if (!dominatesMergePoint(Op, BB, AggressiveInsts, Cost, Budget, TTI,
+ Depth + 1))
+ return false;
+ // Okay, it's safe to do this! Remember this instruction.
+ AggressiveInsts.insert(I);
+ return true;
+}
+
+/// Extract ConstantInt from value, looking through IntToPtr
+/// and PointerNullValue. Return NULL if value is not a constant int.
+static ConstantInt *GetConstantInt(Value *V, const DataLayout &DL) {
+ // Normal constant int.
+ ConstantInt *CI = dyn_cast<ConstantInt>(V);
+ if (CI || !isa<Constant>(V) || !V->getType()->isPointerTy() ||
+ DL.isNonIntegralPointerType(V->getType()))
+ return CI;
+
+ // This is some kind of pointer constant. Turn it into a pointer-sized
+ // ConstantInt if possible.
+ IntegerType *PtrTy = cast<IntegerType>(DL.getIntPtrType(V->getType()));
+
+ // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
+ if (isa<ConstantPointerNull>(V))
+ return ConstantInt::get(PtrTy, 0);
+
+ // IntToPtr const int.
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+ if (CE->getOpcode() == Instruction::IntToPtr)
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(0))) {
+ // The constant is very likely to have the right type already.
+ if (CI->getType() == PtrTy)
+ return CI;
+ else
+ return cast<ConstantInt>(
+ ConstantExpr::getIntegerCast(CI, PtrTy, /*isSigned=*/false));
+ }
+ return nullptr;
+}
+
+namespace {
+
+/// Given a chain of or (||) or and (&&) comparison of a value against a
+/// constant, this will try to recover the information required for a switch
+/// structure.
+/// It will depth-first traverse the chain of comparison, seeking for patterns
+/// like %a == 12 or %a < 4 and combine them to produce a set of integer
+/// representing the different cases for the switch.
+/// Note that if the chain is composed of '||' it will build the set of elements
+/// that matches the comparisons (i.e. any of this value validate the chain)
+/// while for a chain of '&&' it will build the set elements that make the test
+/// fail.
+struct ConstantComparesGatherer {
+ const DataLayout &DL;
+
+ /// Value found for the switch comparison
+ Value *CompValue = nullptr;
+
+ /// Extra clause to be checked before the switch
+ Value *Extra = nullptr;
+
+ /// Set of integers to match in switch
+ SmallVector<ConstantInt *, 8> Vals;
+
+ /// Number of comparisons matched in the and/or chain
+ unsigned UsedICmps = 0;
+
+ /// Construct and compute the result for the comparison instruction Cond
+ ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL) : DL(DL) {
+ gather(Cond);
+ }
+
+ ConstantComparesGatherer(const ConstantComparesGatherer &) = delete;
+ ConstantComparesGatherer &
+ operator=(const ConstantComparesGatherer &) = delete;
+
+private:
+ /// Try to set the current value used for the comparison, it succeeds only if
+ /// it wasn't set before or if the new value is the same as the old one
+ bool setValueOnce(Value *NewVal) {
+ if (CompValue && CompValue != NewVal)
+ return false;
+ CompValue = NewVal;
+ return (CompValue != nullptr);
+ }
+
+ /// Try to match Instruction "I" as a comparison against a constant and
+ /// populates the array Vals with the set of values that match (or do not
+ /// match depending on isEQ).
+ /// Return false on failure. On success, the Value the comparison matched
+ /// against is placed in CompValue.
+ /// If CompValue is already set, the function is expected to fail if a match
+ /// is found but the value compared to is different.
+ bool matchInstruction(Instruction *I, bool isEQ) {
+ // If this is an icmp against a constant, handle this as one of the cases.
+ ICmpInst *ICI;
+ ConstantInt *C;
+ if (!((ICI = dyn_cast<ICmpInst>(I)) &&
+ (C = GetConstantInt(I->getOperand(1), DL)))) {
+ return false;
+ }
+
+ Value *RHSVal;
+ const APInt *RHSC;
+
+ // Pattern match a special case
+ // (x & ~2^z) == y --> x == y || x == y|2^z
+ // This undoes a transformation done by instcombine to fuse 2 compares.
+ if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE)) {
+ // It's a little bit hard to see why the following transformations are
+ // correct. Here is a CVC3 program to verify them for 64-bit values:
+
+ /*
+ ONE : BITVECTOR(64) = BVZEROEXTEND(0bin1, 63);
+ x : BITVECTOR(64);
+ y : BITVECTOR(64);
+ z : BITVECTOR(64);
+ mask : BITVECTOR(64) = BVSHL(ONE, z);
+ QUERY( (y & ~mask = y) =>
+ ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
+ );
+ QUERY( (y | mask = y) =>
+ ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
+ );
+ */
+
+ // Please note that each pattern must be a dual implication (<--> or
+ // iff). One directional implication can create spurious matches. If the
+ // implication is only one-way, an unsatisfiable condition on the left
+ // side can imply a satisfiable condition on the right side. Dual
+ // implication ensures that satisfiable conditions are transformed to
+ // other satisfiable conditions and unsatisfiable conditions are
+ // transformed to other unsatisfiable conditions.
+
+ // Here is a concrete example of a unsatisfiable condition on the left
+ // implying a satisfiable condition on the right:
+ //
+ // mask = (1 << z)
+ // (x & ~mask) == y --> (x == y || x == (y | mask))
+ //
+ // Substituting y = 3, z = 0 yields:
+ // (x & -2) == 3 --> (x == 3 || x == 2)
+
+ // Pattern match a special case:
+ /*
+ QUERY( (y & ~mask = y) =>
+ ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
+ );
+ */
+ if (match(ICI->getOperand(0),
+ m_And(m_Value(RHSVal), m_APInt(RHSC)))) {
+ APInt Mask = ~*RHSC;
+ if (Mask.isPowerOf2() && (C->getValue() & ~Mask) == C->getValue()) {
+ // If we already have a value for the switch, it has to match!
+ if (!setValueOnce(RHSVal))
+ return false;
+
+ Vals.push_back(C);
+ Vals.push_back(
+ ConstantInt::get(C->getContext(),
+ C->getValue() | Mask));
+ UsedICmps++;
+ return true;
+ }
+ }
+
+ // Pattern match a special case:
+ /*
+ QUERY( (y | mask = y) =>
+ ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
+ );
+ */
+ if (match(ICI->getOperand(0),
+ m_Or(m_Value(RHSVal), m_APInt(RHSC)))) {
+ APInt Mask = *RHSC;
+ if (Mask.isPowerOf2() && (C->getValue() | Mask) == C->getValue()) {
+ // If we already have a value for the switch, it has to match!
+ if (!setValueOnce(RHSVal))
+ return false;
+
+ Vals.push_back(C);
+ Vals.push_back(ConstantInt::get(C->getContext(),
+ C->getValue() & ~Mask));
+ UsedICmps++;
+ return true;
+ }
+ }
+
+ // If we already have a value for the switch, it has to match!
+ if (!setValueOnce(ICI->getOperand(0)))
+ return false;
+
+ UsedICmps++;
+ Vals.push_back(C);
+ return ICI->getOperand(0);
+ }
+
+ // If we have "x ult 3", for example, then we can add 0,1,2 to the set.
+ ConstantRange Span =
+ ConstantRange::makeExactICmpRegion(ICI->getPredicate(), C->getValue());
+
+ // Shift the range if the compare is fed by an add. This is the range
+ // compare idiom as emitted by instcombine.
+ Value *CandidateVal = I->getOperand(0);
+ if (match(I->getOperand(0), m_Add(m_Value(RHSVal), m_APInt(RHSC)))) {
+ Span = Span.subtract(*RHSC);
+ CandidateVal = RHSVal;
+ }
+
+ // If this is an and/!= check, then we are looking to build the set of
+ // value that *don't* pass the and chain. I.e. to turn "x ugt 2" into
+ // x != 0 && x != 1.
+ if (!isEQ)
+ Span = Span.inverse();
+
+ // If there are a ton of values, we don't want to make a ginormous switch.
+ if (Span.isSizeLargerThan(8) || Span.isEmptySet()) {
+ return false;
+ }
+
+ // If we already have a value for the switch, it has to match!
+ if (!setValueOnce(CandidateVal))
+ return false;
+
+ // Add all values from the range to the set
+ for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp)
+ Vals.push_back(ConstantInt::get(I->getContext(), Tmp));
+
+ UsedICmps++;
+ return true;
+ }
+
+ /// Given a potentially 'or'd or 'and'd together collection of icmp
+ /// eq/ne/lt/gt instructions that compare a value against a constant, extract
+ /// the value being compared, and stick the list constants into the Vals
+ /// vector.
+ /// One "Extra" case is allowed to differ from the other.
+ void gather(Value *V) {
+ bool isEQ = match(V, m_LogicalOr(m_Value(), m_Value()));
+
+ // Keep a stack (SmallVector for efficiency) for depth-first traversal
+ SmallVector<Value *, 8> DFT;
+ SmallPtrSet<Value *, 8> Visited;
+
+ // Initialize
+ Visited.insert(V);
+ DFT.push_back(V);
+
+ while (!DFT.empty()) {
+ V = DFT.pop_back_val();
+
+ if (Instruction *I = dyn_cast<Instruction>(V)) {
+ // If it is a || (or && depending on isEQ), process the operands.
+ Value *Op0, *Op1;
+ if (isEQ ? match(I, m_LogicalOr(m_Value(Op0), m_Value(Op1)))
+ : match(I, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) {
+ if (Visited.insert(Op1).second)
+ DFT.push_back(Op1);
+ if (Visited.insert(Op0).second)
+ DFT.push_back(Op0);
+
+ continue;
+ }
+
+ // Try to match the current instruction
+ if (matchInstruction(I, isEQ))
+ // Match succeed, continue the loop
+ continue;
+ }
+
+ // One element of the sequence of || (or &&) could not be match as a
+ // comparison against the same value as the others.
+ // We allow only one "Extra" case to be checked before the switch
+ if (!Extra) {
+ Extra = V;
+ continue;
+ }
+ // Failed to parse a proper sequence, abort now
+ CompValue = nullptr;
+ break;
+ }
+ }
+};
+
+} // end anonymous namespace
+
+static void EraseTerminatorAndDCECond(Instruction *TI,
+ MemorySSAUpdater *MSSAU = nullptr) {
+ Instruction *Cond = nullptr;
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+ Cond = dyn_cast<Instruction>(SI->getCondition());
+ } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+ if (BI->isConditional())
+ Cond = dyn_cast<Instruction>(BI->getCondition());
+ } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(TI)) {
+ Cond = dyn_cast<Instruction>(IBI->getAddress());
+ }
+
+ TI->eraseFromParent();
+ if (Cond)
+ RecursivelyDeleteTriviallyDeadInstructions(Cond, nullptr, MSSAU);
+}
+
+/// Return true if the specified terminator checks
+/// to see if a value is equal to constant integer value.
+Value *SimplifyCFGOpt::isValueEqualityComparison(Instruction *TI) {
+ Value *CV = nullptr;
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+ // Do not permit merging of large switch instructions into their
+ // predecessors unless there is only one predecessor.
+ if (!SI->getParent()->hasNPredecessorsOrMore(128 / SI->getNumSuccessors()))
+ CV = SI->getCondition();
+ } else if (BranchInst *BI = dyn_cast<BranchInst>(TI))
+ if (BI->isConditional() && BI->getCondition()->hasOneUse())
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) {
+ if (ICI->isEquality() && GetConstantInt(ICI->getOperand(1), DL))
+ CV = ICI->getOperand(0);
+ }
+
+ // Unwrap any lossless ptrtoint cast.
+ if (CV) {
+ if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) {
+ Value *Ptr = PTII->getPointerOperand();
+ if (PTII->getType() == DL.getIntPtrType(Ptr->getType()))
+ CV = Ptr;
+ }
+ }
+ return CV;
+}
+
+/// Given a value comparison instruction,
+/// decode all of the 'cases' that it represents and return the 'default' block.
+BasicBlock *SimplifyCFGOpt::GetValueEqualityComparisonCases(
+ Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases) {
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+ Cases.reserve(SI->getNumCases());
+ for (auto Case : SI->cases())
+ Cases.push_back(ValueEqualityComparisonCase(Case.getCaseValue(),
+ Case.getCaseSuccessor()));
+ return SI->getDefaultDest();
+ }
+
+ BranchInst *BI = cast<BranchInst>(TI);
+ ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
+ BasicBlock *Succ = BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_NE);
+ Cases.push_back(ValueEqualityComparisonCase(
+ GetConstantInt(ICI->getOperand(1), DL), Succ));
+ return BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_EQ);
+}
+
+/// Given a vector of bb/value pairs, remove any entries
+/// in the list that match the specified block.
+static void
+EliminateBlockCases(BasicBlock *BB,
+ std::vector<ValueEqualityComparisonCase> &Cases) {
+ llvm::erase_value(Cases, BB);
+}
+
+/// Return true if there are any keys in C1 that exist in C2 as well.
+static bool ValuesOverlap(std::vector<ValueEqualityComparisonCase> &C1,
+ std::vector<ValueEqualityComparisonCase> &C2) {
+ std::vector<ValueEqualityComparisonCase> *V1 = &C1, *V2 = &C2;
+
+ // Make V1 be smaller than V2.
+ if (V1->size() > V2->size())
+ std::swap(V1, V2);
+
+ if (V1->empty())
+ return false;
+ if (V1->size() == 1) {
+ // Just scan V2.
+ ConstantInt *TheVal = (*V1)[0].Value;
+ for (const ValueEqualityComparisonCase &VECC : *V2)
+ if (TheVal == VECC.Value)
+ return true;
+ }
+
+ // Otherwise, just sort both lists and compare element by element.
+ array_pod_sort(V1->begin(), V1->end());
+ array_pod_sort(V2->begin(), V2->end());
+ unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size();
+ while (i1 != e1 && i2 != e2) {
+ if ((*V1)[i1].Value == (*V2)[i2].Value)
+ return true;
+ if ((*V1)[i1].Value < (*V2)[i2].Value)
+ ++i1;
+ else
+ ++i2;
+ }
+ return false;
+}
+
+// Set branch weights on SwitchInst. This sets the metadata if there is at
+// least one non-zero weight.
+static void setBranchWeights(SwitchInst *SI, ArrayRef<uint32_t> Weights) {
+ // Check that there is at least one non-zero weight. Otherwise, pass
+ // nullptr to setMetadata which will erase the existing metadata.
+ MDNode *N = nullptr;
+ if (llvm::any_of(Weights, [](uint32_t W) { return W != 0; }))
+ N = MDBuilder(SI->getParent()->getContext()).createBranchWeights(Weights);
+ SI->setMetadata(LLVMContext::MD_prof, N);
+}
+
+// Similar to the above, but for branch and select instructions that take
+// exactly 2 weights.
+static void setBranchWeights(Instruction *I, uint32_t TrueWeight,
+ uint32_t FalseWeight) {
+ assert(isa<BranchInst>(I) || isa<SelectInst>(I));
+ // Check that there is at least one non-zero weight. Otherwise, pass
+ // nullptr to setMetadata which will erase the existing metadata.
+ MDNode *N = nullptr;
+ if (TrueWeight || FalseWeight)
+ N = MDBuilder(I->getParent()->getContext())
+ .createBranchWeights(TrueWeight, FalseWeight);
+ I->setMetadata(LLVMContext::MD_prof, N);
+}
+
+/// If TI is known to be a terminator instruction and its block is known to
+/// only have a single predecessor block, check to see if that predecessor is
+/// also a value comparison with the same value, and if that comparison
+/// determines the outcome of this comparison. If so, simplify TI. This does a
+/// very limited form of jump threading.
+bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor(
+ Instruction *TI, BasicBlock *Pred, IRBuilder<> &Builder) {
+ Value *PredVal = isValueEqualityComparison(Pred->getTerminator());
+ if (!PredVal)
+ return false; // Not a value comparison in predecessor.
+
+ Value *ThisVal = isValueEqualityComparison(TI);
+ assert(ThisVal && "This isn't a value comparison!!");
+ if (ThisVal != PredVal)
+ return false; // Different predicates.
+
+ // TODO: Preserve branch weight metadata, similarly to how
+ // FoldValueComparisonIntoPredecessors preserves it.
+
+ // Find out information about when control will move from Pred to TI's block.
+ std::vector<ValueEqualityComparisonCase> PredCases;
+ BasicBlock *PredDef =
+ GetValueEqualityComparisonCases(Pred->getTerminator(), PredCases);
+ EliminateBlockCases(PredDef, PredCases); // Remove default from cases.
+
+ // Find information about how control leaves this block.
+ std::vector<ValueEqualityComparisonCase> ThisCases;
+ BasicBlock *ThisDef = GetValueEqualityComparisonCases(TI, ThisCases);
+ EliminateBlockCases(ThisDef, ThisCases); // Remove default from cases.
+
+ // If TI's block is the default block from Pred's comparison, potentially
+ // simplify TI based on this knowledge.
+ if (PredDef == TI->getParent()) {
+ // If we are here, we know that the value is none of those cases listed in
+ // PredCases. If there are any cases in ThisCases that are in PredCases, we
+ // can simplify TI.
+ if (!ValuesOverlap(PredCases, ThisCases))
+ return false;
+
+ if (isa<BranchInst>(TI)) {
+ // Okay, one of the successors of this condbr is dead. Convert it to a
+ // uncond br.
+ assert(ThisCases.size() == 1 && "Branch can only have one case!");
+ // Insert the new branch.
+ Instruction *NI = Builder.CreateBr(ThisDef);
+ (void)NI;
+
+ // Remove PHI node entries for the dead edge.
+ ThisCases[0].Dest->removePredecessor(PredDef);
+
+ LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
+ << "Through successor TI: " << *TI << "Leaving: " << *NI
+ << "\n");
+
+ EraseTerminatorAndDCECond(TI);
+
+ if (DTU)
+ DTU->applyUpdates(
+ {{DominatorTree::Delete, PredDef, ThisCases[0].Dest}});
+
+ return true;
+ }
+
+ SwitchInstProfUpdateWrapper SI = *cast<SwitchInst>(TI);
+ // Okay, TI has cases that are statically dead, prune them away.
+ SmallPtrSet<Constant *, 16> DeadCases;
+ for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+ DeadCases.insert(PredCases[i].Value);
+
+ LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
+ << "Through successor TI: " << *TI);
+
+ SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
+ for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
+ --i;
+ auto *Successor = i->getCaseSuccessor();
+ if (DTU)
+ ++NumPerSuccessorCases[Successor];
+ if (DeadCases.count(i->getCaseValue())) {
+ Successor->removePredecessor(PredDef);
+ SI.removeCase(i);
+ if (DTU)
+ --NumPerSuccessorCases[Successor];
+ }
+ }
+
+ if (DTU) {
+ std::vector<DominatorTree::UpdateType> Updates;
+ for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases)
+ if (I.second == 0)
+ Updates.push_back({DominatorTree::Delete, PredDef, I.first});
+ DTU->applyUpdates(Updates);
+ }
+
+ LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n");
+ return true;
+ }
+
+ // Otherwise, TI's block must correspond to some matched value. Find out
+ // which value (or set of values) this is.
+ ConstantInt *TIV = nullptr;
+ BasicBlock *TIBB = TI->getParent();
+ for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+ if (PredCases[i].Dest == TIBB) {
+ if (TIV)
+ return false; // Cannot handle multiple values coming to this block.
+ TIV = PredCases[i].Value;
+ }
+ assert(TIV && "No edge from pred to succ?");
+
+ // Okay, we found the one constant that our value can be if we get into TI's
+ // BB. Find out which successor will unconditionally be branched to.
+ BasicBlock *TheRealDest = nullptr;
+ for (unsigned i = 0, e = ThisCases.size(); i != e; ++i)
+ if (ThisCases[i].Value == TIV) {
+ TheRealDest = ThisCases[i].Dest;
+ break;
+ }
+
+ // If not handled by any explicit cases, it is handled by the default case.
+ if (!TheRealDest)
+ TheRealDest = ThisDef;
+
+ SmallPtrSet<BasicBlock *, 2> RemovedSuccs;
+
+ // Remove PHI node entries for dead edges.
+ BasicBlock *CheckEdge = TheRealDest;
+ for (BasicBlock *Succ : successors(TIBB))
+ if (Succ != CheckEdge) {
+ if (Succ != TheRealDest)
+ RemovedSuccs.insert(Succ);
+ Succ->removePredecessor(TIBB);
+ } else
+ CheckEdge = nullptr;
+
+ // Insert the new branch.
+ Instruction *NI = Builder.CreateBr(TheRealDest);
+ (void)NI;
+
+ LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
+ << "Through successor TI: " << *TI << "Leaving: " << *NI
+ << "\n");
+
+ EraseTerminatorAndDCECond(TI);
+ if (DTU) {
+ SmallVector<DominatorTree::UpdateType, 2> Updates;
+ Updates.reserve(RemovedSuccs.size());
+ for (auto *RemovedSucc : RemovedSuccs)
+ Updates.push_back({DominatorTree::Delete, TIBB, RemovedSucc});
+ DTU->applyUpdates(Updates);
+ }
+ return true;
+}
+
+namespace {
+
+/// This class implements a stable ordering of constant
+/// integers that does not depend on their address. This is important for
+/// applications that sort ConstantInt's to ensure uniqueness.
+struct ConstantIntOrdering {
+ bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const {
+ return LHS->getValue().ult(RHS->getValue());
+ }
+};
+
+} // end anonymous namespace
+
+static int ConstantIntSortPredicate(ConstantInt *const *P1,
+ ConstantInt *const *P2) {
+ const ConstantInt *LHS = *P1;
+ const ConstantInt *RHS = *P2;
+ if (LHS == RHS)
+ return 0;
+ return LHS->getValue().ult(RHS->getValue()) ? 1 : -1;
+}
+
+/// Get Weights of a given terminator, the default weight is at the front
+/// of the vector. If TI is a conditional eq, we need to swap the branch-weight
+/// metadata.
+static void GetBranchWeights(Instruction *TI,
+ SmallVectorImpl<uint64_t> &Weights) {
+ MDNode *MD = TI->getMetadata(LLVMContext::MD_prof);
+ assert(MD);
+ for (unsigned i = 1, e = MD->getNumOperands(); i < e; ++i) {
+ ConstantInt *CI = mdconst::extract<ConstantInt>(MD->getOperand(i));
+ Weights.push_back(CI->getValue().getZExtValue());
+ }
+
+ // If TI is a conditional eq, the default case is the false case,
+ // and the corresponding branch-weight data is at index 2. We swap the
+ // default weight to be the first entry.
+ if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+ assert(Weights.size() == 2);
+ ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
+ if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
+ std::swap(Weights.front(), Weights.back());
+ }
+}
+
+/// Keep halving the weights until all can fit in uint32_t.
+static void FitWeights(MutableArrayRef<uint64_t> Weights) {
+ uint64_t Max = *std::max_element(Weights.begin(), Weights.end());
+ if (Max > UINT_MAX) {
+ unsigned Offset = 32 - countLeadingZeros(Max);
+ for (uint64_t &I : Weights)
+ I >>= Offset;
+ }
+}
+
+static void CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(
+ BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap) {
+ Instruction *PTI = PredBlock->getTerminator();
+
+ // If we have bonus instructions, clone them into the predecessor block.
+ // Note that there may be multiple predecessor blocks, so we cannot move
+ // bonus instructions to a predecessor block.
+ for (Instruction &BonusInst : *BB) {
+ if (isa<DbgInfoIntrinsic>(BonusInst) || BonusInst.isTerminator())
+ continue;
+
+ Instruction *NewBonusInst = BonusInst.clone();
+
+ if (PTI->getDebugLoc() != NewBonusInst->getDebugLoc()) {
+ // Unless the instruction has the same !dbg location as the original
+ // branch, drop it. When we fold the bonus instructions we want to make
+ // sure we reset their debug locations in order to avoid stepping on
+ // dead code caused by folding dead branches.
+ NewBonusInst->setDebugLoc(DebugLoc());
+ }
+
+ RemapInstruction(NewBonusInst, VMap,
+ RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
+ VMap[&BonusInst] = NewBonusInst;
+
+ // If we moved a load, we cannot any longer claim any knowledge about
+ // its potential value. The previous information might have been valid
+ // only given the branch precondition.
+ // For an analogous reason, we must also drop all the metadata whose
+ // semantics we don't understand. We *can* preserve !annotation, because
+ // it is tied to the instruction itself, not the value or position.
+ // Similarly strip attributes on call parameters that may cause UB in
+ // location the call is moved to.
+ NewBonusInst->dropUndefImplyingAttrsAndUnknownMetadata(
+ LLVMContext::MD_annotation);
+
+ NewBonusInst->insertInto(PredBlock, PTI->getIterator());
+ NewBonusInst->takeName(&BonusInst);
+ BonusInst.setName(NewBonusInst->getName() + ".old");
+
+ // Update (liveout) uses of bonus instructions,
+ // now that the bonus instruction has been cloned into predecessor.
+ // Note that we expect to be in a block-closed SSA form for this to work!
+ for (Use &U : make_early_inc_range(BonusInst.uses())) {
+ auto *UI = cast<Instruction>(U.getUser());
+ auto *PN = dyn_cast<PHINode>(UI);
+ if (!PN) {
+ assert(UI->getParent() == BB && BonusInst.comesBefore(UI) &&
+ "If the user is not a PHI node, then it should be in the same "
+ "block as, and come after, the original bonus instruction.");
+ continue; // Keep using the original bonus instruction.
+ }
+ // Is this the block-closed SSA form PHI node?
+ if (PN->getIncomingBlock(U) == BB)
+ continue; // Great, keep using the original bonus instruction.
+ // The only other alternative is an "use" when coming from
+ // the predecessor block - here we should refer to the cloned bonus instr.
+ assert(PN->getIncomingBlock(U) == PredBlock &&
+ "Not in block-closed SSA form?");
+ U.set(NewBonusInst);
+ }
+ }
+}
+
+bool SimplifyCFGOpt::PerformValueComparisonIntoPredecessorFolding(
+ Instruction *TI, Value *&CV, Instruction *PTI, IRBuilder<> &Builder) {
+ BasicBlock *BB = TI->getParent();
+ BasicBlock *Pred = PTI->getParent();
+
+ SmallVector<DominatorTree::UpdateType, 32> Updates;
+
+ // Figure out which 'cases' to copy from SI to PSI.
+ std::vector<ValueEqualityComparisonCase> BBCases;
+ BasicBlock *BBDefault = GetValueEqualityComparisonCases(TI, BBCases);
+
+ std::vector<ValueEqualityComparisonCase> PredCases;
+ BasicBlock *PredDefault = GetValueEqualityComparisonCases(PTI, PredCases);
+
+ // Based on whether the default edge from PTI goes to BB or not, fill in
+ // PredCases and PredDefault with the new switch cases we would like to
+ // build.
+ SmallMapVector<BasicBlock *, int, 8> NewSuccessors;
+
+ // Update the branch weight metadata along the way
+ SmallVector<uint64_t, 8> Weights;
+ bool PredHasWeights = hasBranchWeightMD(*PTI);
+ bool SuccHasWeights = hasBranchWeightMD(*TI);
+
+ if (PredHasWeights) {
+ GetBranchWeights(PTI, Weights);
+ // branch-weight metadata is inconsistent here.
+ if (Weights.size() != 1 + PredCases.size())
+ PredHasWeights = SuccHasWeights = false;
+ } else if (SuccHasWeights)
+ // If there are no predecessor weights but there are successor weights,
+ // populate Weights with 1, which will later be scaled to the sum of
+ // successor's weights
+ Weights.assign(1 + PredCases.size(), 1);
+
+ SmallVector<uint64_t, 8> SuccWeights;
+ if (SuccHasWeights) {
+ GetBranchWeights(TI, SuccWeights);
+ // branch-weight metadata is inconsistent here.
+ if (SuccWeights.size() != 1 + BBCases.size())
+ PredHasWeights = SuccHasWeights = false;
+ } else if (PredHasWeights)
+ SuccWeights.assign(1 + BBCases.size(), 1);
+
+ if (PredDefault == BB) {
+ // If this is the default destination from PTI, only the edges in TI
+ // that don't occur in PTI, or that branch to BB will be activated.
+ std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
+ for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+ if (PredCases[i].Dest != BB)
+ PTIHandled.insert(PredCases[i].Value);
+ else {
+ // The default destination is BB, we don't need explicit targets.
+ std::swap(PredCases[i], PredCases.back());
+
+ if (PredHasWeights || SuccHasWeights) {
+ // Increase weight for the default case.
+ Weights[0] += Weights[i + 1];
+ std::swap(Weights[i + 1], Weights.back());
+ Weights.pop_back();
+ }
+
+ PredCases.pop_back();
+ --i;
+ --e;
+ }
+
+ // Reconstruct the new switch statement we will be building.
+ if (PredDefault != BBDefault) {
+ PredDefault->removePredecessor(Pred);
+ if (DTU && PredDefault != BB)
+ Updates.push_back({DominatorTree::Delete, Pred, PredDefault});
+ PredDefault = BBDefault;
+ ++NewSuccessors[BBDefault];
+ }
+
+ unsigned CasesFromPred = Weights.size();
+ uint64_t ValidTotalSuccWeight = 0;
+ for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
+ if (!PTIHandled.count(BBCases[i].Value) && BBCases[i].Dest != BBDefault) {
+ PredCases.push_back(BBCases[i]);
+ ++NewSuccessors[BBCases[i].Dest];
+ if (SuccHasWeights || PredHasWeights) {
+ // The default weight is at index 0, so weight for the ith case
+ // should be at index i+1. Scale the cases from successor by
+ // PredDefaultWeight (Weights[0]).
+ Weights.push_back(Weights[0] * SuccWeights[i + 1]);
+ ValidTotalSuccWeight += SuccWeights[i + 1];
+ }
+ }
+
+ if (SuccHasWeights || PredHasWeights) {
+ ValidTotalSuccWeight += SuccWeights[0];
+ // Scale the cases from predecessor by ValidTotalSuccWeight.
+ for (unsigned i = 1; i < CasesFromPred; ++i)
+ Weights[i] *= ValidTotalSuccWeight;
+ // Scale the default weight by SuccDefaultWeight (SuccWeights[0]).
+ Weights[0] *= SuccWeights[0];
+ }
+ } else {
+ // If this is not the default destination from PSI, only the edges
+ // in SI that occur in PSI with a destination of BB will be
+ // activated.
+ std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
+ std::map<ConstantInt *, uint64_t> WeightsForHandled;
+ for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+ if (PredCases[i].Dest == BB) {
+ PTIHandled.insert(PredCases[i].Value);
+
+ if (PredHasWeights || SuccHasWeights) {
+ WeightsForHandled[PredCases[i].Value] = Weights[i + 1];
+ std::swap(Weights[i + 1], Weights.back());
+ Weights.pop_back();
+ }
+
+ std::swap(PredCases[i], PredCases.back());
+ PredCases.pop_back();
+ --i;
+ --e;
+ }
+
+ // Okay, now we know which constants were sent to BB from the
+ // predecessor. Figure out where they will all go now.
+ for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
+ if (PTIHandled.count(BBCases[i].Value)) {
+ // If this is one we are capable of getting...
+ if (PredHasWeights || SuccHasWeights)
+ Weights.push_back(WeightsForHandled[BBCases[i].Value]);
+ PredCases.push_back(BBCases[i]);
+ ++NewSuccessors[BBCases[i].Dest];
+ PTIHandled.erase(BBCases[i].Value); // This constant is taken care of
+ }
+
+ // If there are any constants vectored to BB that TI doesn't handle,
+ // they must go to the default destination of TI.
+ for (ConstantInt *I : PTIHandled) {
+ if (PredHasWeights || SuccHasWeights)
+ Weights.push_back(WeightsForHandled[I]);
+ PredCases.push_back(ValueEqualityComparisonCase(I, BBDefault));
+ ++NewSuccessors[BBDefault];
+ }
+ }
+
+ // Okay, at this point, we know which new successor Pred will get. Make
+ // sure we update the number of entries in the PHI nodes for these
+ // successors.
+ SmallPtrSet<BasicBlock *, 2> SuccsOfPred;
+ if (DTU) {
+ SuccsOfPred = {succ_begin(Pred), succ_end(Pred)};
+ Updates.reserve(Updates.size() + NewSuccessors.size());
+ }
+ for (const std::pair<BasicBlock *, int /*Num*/> &NewSuccessor :
+ NewSuccessors) {
+ for (auto I : seq(0, NewSuccessor.second)) {
+ (void)I;
+ AddPredecessorToBlock(NewSuccessor.first, Pred, BB);
+ }
+ if (DTU && !SuccsOfPred.contains(NewSuccessor.first))
+ Updates.push_back({DominatorTree::Insert, Pred, NewSuccessor.first});
+ }
+
+ Builder.SetInsertPoint(PTI);
+ // Convert pointer to int before we switch.
+ if (CV->getType()->isPointerTy()) {
+ CV =
+ Builder.CreatePtrToInt(CV, DL.getIntPtrType(CV->getType()), "magicptr");
+ }
+
+ // Now that the successors are updated, create the new Switch instruction.
+ SwitchInst *NewSI = Builder.CreateSwitch(CV, PredDefault, PredCases.size());
+ NewSI->setDebugLoc(PTI->getDebugLoc());
+ for (ValueEqualityComparisonCase &V : PredCases)
+ NewSI->addCase(V.Value, V.Dest);
+
+ if (PredHasWeights || SuccHasWeights) {
+ // Halve the weights if any of them cannot fit in an uint32_t
+ FitWeights(Weights);
+
+ SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
+
+ setBranchWeights(NewSI, MDWeights);
+ }
+
+ EraseTerminatorAndDCECond(PTI);
+
+ // Okay, last check. If BB is still a successor of PSI, then we must
+ // have an infinite loop case. If so, add an infinitely looping block
+ // to handle the case to preserve the behavior of the code.
+ BasicBlock *InfLoopBlock = nullptr;
+ for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i)
+ if (NewSI->getSuccessor(i) == BB) {
+ if (!InfLoopBlock) {
+ // Insert it at the end of the function, because it's either code,
+ // or it won't matter if it's hot. :)
+ InfLoopBlock =
+ BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
+ BranchInst::Create(InfLoopBlock, InfLoopBlock);
+ if (DTU)
+ Updates.push_back(
+ {DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
+ }
+ NewSI->setSuccessor(i, InfLoopBlock);
+ }
+
+ if (DTU) {
+ if (InfLoopBlock)
+ Updates.push_back({DominatorTree::Insert, Pred, InfLoopBlock});
+
+ Updates.push_back({DominatorTree::Delete, Pred, BB});
+
+ DTU->applyUpdates(Updates);
+ }
+
+ ++NumFoldValueComparisonIntoPredecessors;
+ return true;
+}
+
+/// The specified terminator is a value equality comparison instruction
+/// (either a switch or a branch on "X == c").
+/// See if any of the predecessors of the terminator block are value comparisons
+/// on the same value. If so, and if safe to do so, fold them together.
+bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(Instruction *TI,
+ IRBuilder<> &Builder) {
+ BasicBlock *BB = TI->getParent();
+ Value *CV = isValueEqualityComparison(TI); // CondVal
+ assert(CV && "Not a comparison?");
+
+ bool Changed = false;
+
+ SmallSetVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
+ while (!Preds.empty()) {
+ BasicBlock *Pred = Preds.pop_back_val();
+ Instruction *PTI = Pred->getTerminator();
+
+ // Don't try to fold into itself.
+ if (Pred == BB)
+ continue;
+
+ // See if the predecessor is a comparison with the same value.
+ Value *PCV = isValueEqualityComparison(PTI); // PredCondVal
+ if (PCV != CV)
+ continue;
+
+ SmallSetVector<BasicBlock *, 4> FailBlocks;
+ if (!SafeToMergeTerminators(TI, PTI, &FailBlocks)) {
+ for (auto *Succ : FailBlocks) {
+ if (!SplitBlockPredecessors(Succ, TI->getParent(), ".fold.split", DTU))
+ return false;
+ }
+ }
+
+ PerformValueComparisonIntoPredecessorFolding(TI, CV, PTI, Builder);
+ Changed = true;
+ }
+ return Changed;
+}
+
+// If we would need to insert a select that uses the value of this invoke
+// (comments in HoistThenElseCodeToIf explain why we would need to do this), we
+// can't hoist the invoke, as there is nowhere to put the select in this case.
+static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2,
+ Instruction *I1, Instruction *I2) {
+ for (BasicBlock *Succ : successors(BB1)) {
+ for (const PHINode &PN : Succ->phis()) {
+ Value *BB1V = PN.getIncomingValueForBlock(BB1);
+ Value *BB2V = PN.getIncomingValueForBlock(BB2);
+ if (BB1V != BB2V && (BB1V == I1 || BB2V == I2)) {
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+// Get interesting characteristics of instructions that `HoistThenElseCodeToIf`
+// didn't hoist. They restrict what kind of instructions can be reordered
+// across.
+enum SkipFlags {
+ SkipReadMem = 1,
+ SkipSideEffect = 2,
+ SkipImplicitControlFlow = 4
+};
+
+static unsigned skippedInstrFlags(Instruction *I) {
+ unsigned Flags = 0;
+ if (I->mayReadFromMemory())
+ Flags |= SkipReadMem;
+ // We can't arbitrarily move around allocas, e.g. moving allocas (especially
+ // inalloca) across stacksave/stackrestore boundaries.
+ if (I->mayHaveSideEffects() || isa<AllocaInst>(I))
+ Flags |= SkipSideEffect;
+ if (!isGuaranteedToTransferExecutionToSuccessor(I))
+ Flags |= SkipImplicitControlFlow;
+ return Flags;
+}
+
+// Returns true if it is safe to reorder an instruction across preceding
+// instructions in a basic block.
+static bool isSafeToHoistInstr(Instruction *I, unsigned Flags) {
+ // Don't reorder a store over a load.
+ if ((Flags & SkipReadMem) && I->mayWriteToMemory())
+ return false;
+
+ // If we have seen an instruction with side effects, it's unsafe to reorder an
+ // instruction which reads memory or itself has side effects.
+ if ((Flags & SkipSideEffect) &&
+ (I->mayReadFromMemory() || I->mayHaveSideEffects()))
+ return false;
+
+ // Reordering across an instruction which does not necessarily transfer
+ // control to the next instruction is speculation.
+ if ((Flags & SkipImplicitControlFlow) && !isSafeToSpeculativelyExecute(I))
+ return false;
+
+ // Hoisting of llvm.deoptimize is only legal together with the next return
+ // instruction, which this pass is not always able to do.
+ if (auto *CB = dyn_cast<CallBase>(I))
+ if (CB->getIntrinsicID() == Intrinsic::experimental_deoptimize)
+ return false;
+
+ // It's also unsafe/illegal to hoist an instruction above its instruction
+ // operands
+ BasicBlock *BB = I->getParent();
+ for (Value *Op : I->operands()) {
+ if (auto *J = dyn_cast<Instruction>(Op))
+ if (J->getParent() == BB)
+ return false;
+ }
+
+ return true;
+}
+
+static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified = false);
+
+/// Given a conditional branch that goes to BB1 and BB2, hoist any common code
+/// in the two blocks up into the branch block. The caller of this function
+/// guarantees that BI's block dominates BB1 and BB2. If EqTermsOnly is given,
+/// only perform hoisting in case both blocks only contain a terminator. In that
+/// case, only the original BI will be replaced and selects for PHIs are added.
+bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI,
+ const TargetTransformInfo &TTI,
+ bool EqTermsOnly) {
+ // This does very trivial matching, with limited scanning, to find identical
+ // instructions in the two blocks. In particular, we don't want to get into
+ // O(M*N) situations here where M and N are the sizes of BB1 and BB2. As
+ // such, we currently just scan for obviously identical instructions in an
+ // identical order, possibly separated by the same number of non-identical
+ // instructions.
+ BasicBlock *BB1 = BI->getSuccessor(0); // The true destination.
+ BasicBlock *BB2 = BI->getSuccessor(1); // The false destination
+
+ // If either of the blocks has it's address taken, then we can't do this fold,
+ // because the code we'd hoist would no longer run when we jump into the block
+ // by it's address.
+ if (BB1->hasAddressTaken() || BB2->hasAddressTaken())
+ return false;
+
+ BasicBlock::iterator BB1_Itr = BB1->begin();
+ BasicBlock::iterator BB2_Itr = BB2->begin();
+
+ Instruction *I1 = &*BB1_Itr++, *I2 = &*BB2_Itr++;
+ // Skip debug info if it is not identical.
+ DbgInfoIntrinsic *DBI1 = dyn_cast<DbgInfoIntrinsic>(I1);
+ DbgInfoIntrinsic *DBI2 = dyn_cast<DbgInfoIntrinsic>(I2);
+ if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) {
+ while (isa<DbgInfoIntrinsic>(I1))
+ I1 = &*BB1_Itr++;
+ while (isa<DbgInfoIntrinsic>(I2))
+ I2 = &*BB2_Itr++;
+ }
+ if (isa<PHINode>(I1))
+ return false;
+
+ BasicBlock *BIParent = BI->getParent();
+
+ bool Changed = false;
+
+ auto _ = make_scope_exit([&]() {
+ if (Changed)
+ ++NumHoistCommonCode;
+ });
+
+ // Check if only hoisting terminators is allowed. This does not add new
+ // instructions to the hoist location.
+ if (EqTermsOnly) {
+ // Skip any debug intrinsics, as they are free to hoist.
+ auto *I1NonDbg = &*skipDebugIntrinsics(I1->getIterator());
+ auto *I2NonDbg = &*skipDebugIntrinsics(I2->getIterator());
+ if (!I1NonDbg->isIdenticalToWhenDefined(I2NonDbg))
+ return false;
+ if (!I1NonDbg->isTerminator())
+ return false;
+ // Now we know that we only need to hoist debug intrinsics and the
+ // terminator. Let the loop below handle those 2 cases.
+ }
+
+ // Count how many instructions were not hoisted so far. There's a limit on how
+ // many instructions we skip, serving as a compilation time control as well as
+ // preventing excessive increase of life ranges.
+ unsigned NumSkipped = 0;
+
+ // Record any skipped instuctions that may read memory, write memory or have
+ // side effects, or have implicit control flow.
+ unsigned SkipFlagsBB1 = 0;
+ unsigned SkipFlagsBB2 = 0;
+
+ for (;;) {
+ // If we are hoisting the terminator instruction, don't move one (making a
+ // broken BB), instead clone it, and remove BI.
+ if (I1->isTerminator() || I2->isTerminator()) {
+ // If any instructions remain in the block, we cannot hoist terminators.
+ if (NumSkipped || !I1->isIdenticalToWhenDefined(I2))
+ return Changed;
+ goto HoistTerminator;
+ }
+
+ if (I1->isIdenticalToWhenDefined(I2)) {
+ // Even if the instructions are identical, it may not be safe to hoist
+ // them if we have skipped over instructions with side effects or their
+ // operands weren't hoisted.
+ if (!isSafeToHoistInstr(I1, SkipFlagsBB1) ||
+ !isSafeToHoistInstr(I2, SkipFlagsBB2))
+ return Changed;
+
+ // If we're going to hoist a call, make sure that the two instructions
+ // we're commoning/hoisting are both marked with musttail, or neither of
+ // them is marked as such. Otherwise, we might end up in a situation where
+ // we hoist from a block where the terminator is a `ret` to a block where
+ // the terminator is a `br`, and `musttail` calls expect to be followed by
+ // a return.
+ auto *C1 = dyn_cast<CallInst>(I1);
+ auto *C2 = dyn_cast<CallInst>(I2);
+ if (C1 && C2)
+ if (C1->isMustTailCall() != C2->isMustTailCall())
+ return Changed;
+
+ if (!TTI.isProfitableToHoist(I1) || !TTI.isProfitableToHoist(I2))
+ return Changed;
+
+ // If any of the two call sites has nomerge attribute, stop hoisting.
+ if (const auto *CB1 = dyn_cast<CallBase>(I1))
+ if (CB1->cannotMerge())
+ return Changed;
+ if (const auto *CB2 = dyn_cast<CallBase>(I2))
+ if (CB2->cannotMerge())
+ return Changed;
+
+ if (isa<DbgInfoIntrinsic>(I1) || isa<DbgInfoIntrinsic>(I2)) {
+ assert(isa<DbgInfoIntrinsic>(I1) && isa<DbgInfoIntrinsic>(I2));
+ // The debug location is an integral part of a debug info intrinsic
+ // and can't be separated from it or replaced. Instead of attempting
+ // to merge locations, simply hoist both copies of the intrinsic.
+ BIParent->splice(BI->getIterator(), BB1, I1->getIterator());
+ BIParent->splice(BI->getIterator(), BB2, I2->getIterator());
+ } else {
+ // For a normal instruction, we just move one to right before the
+ // branch, then replace all uses of the other with the first. Finally,
+ // we remove the now redundant second instruction.
+ BIParent->splice(BI->getIterator(), BB1, I1->getIterator());
+ if (!I2->use_empty())
+ I2->replaceAllUsesWith(I1);
+ I1->andIRFlags(I2);
+ unsigned KnownIDs[] = {LLVMContext::MD_tbaa,
+ LLVMContext::MD_range,
+ LLVMContext::MD_fpmath,
+ LLVMContext::MD_invariant_load,
+ LLVMContext::MD_nonnull,
+ LLVMContext::MD_invariant_group,
+ LLVMContext::MD_align,
+ LLVMContext::MD_dereferenceable,
+ LLVMContext::MD_dereferenceable_or_null,
+ LLVMContext::MD_mem_parallel_loop_access,
+ LLVMContext::MD_access_group,
+ LLVMContext::MD_preserve_access_index};
+ combineMetadata(I1, I2, KnownIDs, true);
+
+ // I1 and I2 are being combined into a single instruction. Its debug
+ // location is the merged locations of the original instructions.
+ I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc());
+
+ I2->eraseFromParent();
+ }
+ Changed = true;
+ ++NumHoistCommonInstrs;
+ } else {
+ if (NumSkipped >= HoistCommonSkipLimit)
+ return Changed;
+ // We are about to skip over a pair of non-identical instructions. Record
+ // if any have characteristics that would prevent reordering instructions
+ // across them.
+ SkipFlagsBB1 |= skippedInstrFlags(I1);
+ SkipFlagsBB2 |= skippedInstrFlags(I2);
+ ++NumSkipped;
+ }
+
+ I1 = &*BB1_Itr++;
+ I2 = &*BB2_Itr++;
+ // Skip debug info if it is not identical.
+ DbgInfoIntrinsic *DBI1 = dyn_cast<DbgInfoIntrinsic>(I1);
+ DbgInfoIntrinsic *DBI2 = dyn_cast<DbgInfoIntrinsic>(I2);
+ if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) {
+ while (isa<DbgInfoIntrinsic>(I1))
+ I1 = &*BB1_Itr++;
+ while (isa<DbgInfoIntrinsic>(I2))
+ I2 = &*BB2_Itr++;
+ }
+ }
+
+ return Changed;
+
+HoistTerminator:
+ // It may not be possible to hoist an invoke.
+ // FIXME: Can we define a safety predicate for CallBr?
+ if (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2))
+ return Changed;
+
+ // TODO: callbr hoisting currently disabled pending further study.
+ if (isa<CallBrInst>(I1))
+ return Changed;
+
+ for (BasicBlock *Succ : successors(BB1)) {
+ for (PHINode &PN : Succ->phis()) {
+ Value *BB1V = PN.getIncomingValueForBlock(BB1);
+ Value *BB2V = PN.getIncomingValueForBlock(BB2);
+ if (BB1V == BB2V)
+ continue;
+
+ // Check for passingValueIsAlwaysUndefined here because we would rather
+ // eliminate undefined control flow then converting it to a select.
+ if (passingValueIsAlwaysUndefined(BB1V, &PN) ||
+ passingValueIsAlwaysUndefined(BB2V, &PN))
+ return Changed;
+ }
+ }
+
+ // Okay, it is safe to hoist the terminator.
+ Instruction *NT = I1->clone();
+ NT->insertInto(BIParent, BI->getIterator());
+ if (!NT->getType()->isVoidTy()) {
+ I1->replaceAllUsesWith(NT);
+ I2->replaceAllUsesWith(NT);
+ NT->takeName(I1);
+ }
+ Changed = true;
+ ++NumHoistCommonInstrs;
+
+ // Ensure terminator gets a debug location, even an unknown one, in case
+ // it involves inlinable calls.
+ NT->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc());
+
+ // PHIs created below will adopt NT's merged DebugLoc.
+ IRBuilder<NoFolder> Builder(NT);
+
+ // Hoisting one of the terminators from our successor is a great thing.
+ // Unfortunately, the successors of the if/else blocks may have PHI nodes in
+ // them. If they do, all PHI entries for BB1/BB2 must agree for all PHI
+ // nodes, so we insert select instruction to compute the final result.
+ std::map<std::pair<Value *, Value *>, SelectInst *> InsertedSelects;
+ for (BasicBlock *Succ : successors(BB1)) {
+ for (PHINode &PN : Succ->phis()) {
+ Value *BB1V = PN.getIncomingValueForBlock(BB1);
+ Value *BB2V = PN.getIncomingValueForBlock(BB2);
+ if (BB1V == BB2V)
+ continue;
+
+ // These values do not agree. Insert a select instruction before NT
+ // that determines the right value.
+ SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
+ if (!SI) {
+ // Propagate fast-math-flags from phi node to its replacement select.
+ IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
+ if (isa<FPMathOperator>(PN))
+ Builder.setFastMathFlags(PN.getFastMathFlags());
+
+ SI = cast<SelectInst>(
+ Builder.CreateSelect(BI->getCondition(), BB1V, BB2V,
+ BB1V->getName() + "." + BB2V->getName(), BI));
+ }
+
+ // Make the PHI node use the select for all incoming values for BB1/BB2
+ for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
+ if (PN.getIncomingBlock(i) == BB1 || PN.getIncomingBlock(i) == BB2)
+ PN.setIncomingValue(i, SI);
+ }
+ }
+
+ SmallVector<DominatorTree::UpdateType, 4> Updates;
+
+ // Update any PHI nodes in our new successors.
+ for (BasicBlock *Succ : successors(BB1)) {
+ AddPredecessorToBlock(Succ, BIParent, BB1);
+ if (DTU)
+ Updates.push_back({DominatorTree::Insert, BIParent, Succ});
+ }
+
+ if (DTU)
+ for (BasicBlock *Succ : successors(BI))
+ Updates.push_back({DominatorTree::Delete, BIParent, Succ});
+
+ EraseTerminatorAndDCECond(BI);
+ if (DTU)
+ DTU->applyUpdates(Updates);
+ return Changed;
+}
+
+// Check lifetime markers.
+static bool isLifeTimeMarker(const Instruction *I) {
+ if (auto II = dyn_cast<IntrinsicInst>(I)) {
+ switch (II->getIntrinsicID()) {
+ default:
+ break;
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ return true;
+ }
+ }
+ return false;
+}
+
+// TODO: Refine this. This should avoid cases like turning constant memcpy sizes
+// into variables.
+static bool replacingOperandWithVariableIsCheap(const Instruction *I,
+ int OpIdx) {
+ return !isa<IntrinsicInst>(I);
+}
+
+// All instructions in Insts belong to different blocks that all unconditionally
+// branch to a common successor. Analyze each instruction and return true if it
+// would be possible to sink them into their successor, creating one common
+// instruction instead. For every value that would be required to be provided by
+// PHI node (because an operand varies in each input block), add to PHIOperands.
+static bool canSinkInstructions(
+ ArrayRef<Instruction *> Insts,
+ DenseMap<Instruction *, SmallVector<Value *, 4>> &PHIOperands) {
+ // Prune out obviously bad instructions to move. Each instruction must have
+ // exactly zero or one use, and we check later that use is by a single, common
+ // PHI instruction in the successor.
+ bool HasUse = !Insts.front()->user_empty();
+ for (auto *I : Insts) {
+ // These instructions may change or break semantics if moved.
+ if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
+ I->getType()->isTokenTy())
+ return false;
+
+ // Do not try to sink an instruction in an infinite loop - it can cause
+ // this algorithm to infinite loop.
+ if (I->getParent()->getSingleSuccessor() == I->getParent())
+ return false;
+
+ // Conservatively return false if I is an inline-asm instruction. Sinking
+ // and merging inline-asm instructions can potentially create arguments
+ // that cannot satisfy the inline-asm constraints.
+ // If the instruction has nomerge attribute, return false.
+ if (const auto *C = dyn_cast<CallBase>(I))
+ if (C->isInlineAsm() || C->cannotMerge())
+ return false;
+
+ // Each instruction must have zero or one use.
+ if (HasUse && !I->hasOneUse())
+ return false;
+ if (!HasUse && !I->user_empty())
+ return false;
+ }
+
+ const Instruction *I0 = Insts.front();
+ for (auto *I : Insts)
+ if (!I->isSameOperationAs(I0))
+ return false;
+
+ // All instructions in Insts are known to be the same opcode. If they have a
+ // use, check that the only user is a PHI or in the same block as the
+ // instruction, because if a user is in the same block as an instruction we're
+ // contemplating sinking, it must already be determined to be sinkable.
+ if (HasUse) {
+ auto *PNUse = dyn_cast<PHINode>(*I0->user_begin());
+ auto *Succ = I0->getParent()->getTerminator()->getSuccessor(0);
+ if (!all_of(Insts, [&PNUse,&Succ](const Instruction *I) -> bool {
+ auto *U = cast<Instruction>(*I->user_begin());
+ return (PNUse &&
+ PNUse->getParent() == Succ &&
+ PNUse->getIncomingValueForBlock(I->getParent()) == I) ||
+ U->getParent() == I->getParent();
+ }))
+ return false;
+ }
+
+ // Because SROA can't handle speculating stores of selects, try not to sink
+ // loads, stores or lifetime markers of allocas when we'd have to create a
+ // PHI for the address operand. Also, because it is likely that loads or
+ // stores of allocas will disappear when Mem2Reg/SROA is run, don't sink
+ // them.
+ // This can cause code churn which can have unintended consequences down
+ // the line - see https://llvm.org/bugs/show_bug.cgi?id=30244.
+ // FIXME: This is a workaround for a deficiency in SROA - see
+ // https://llvm.org/bugs/show_bug.cgi?id=30188
+ if (isa<StoreInst>(I0) && any_of(Insts, [](const Instruction *I) {
+ return isa<AllocaInst>(I->getOperand(1)->stripPointerCasts());
+ }))
+ return false;
+ if (isa<LoadInst>(I0) && any_of(Insts, [](const Instruction *I) {
+ return isa<AllocaInst>(I->getOperand(0)->stripPointerCasts());
+ }))
+ return false;
+ if (isLifeTimeMarker(I0) && any_of(Insts, [](const Instruction *I) {
+ return isa<AllocaInst>(I->getOperand(1)->stripPointerCasts());
+ }))
+ return false;
+
+ // For calls to be sinkable, they must all be indirect, or have same callee.
+ // I.e. if we have two direct calls to different callees, we don't want to
+ // turn that into an indirect call. Likewise, if we have an indirect call,
+ // and a direct call, we don't actually want to have a single indirect call.
+ if (isa<CallBase>(I0)) {
+ auto IsIndirectCall = [](const Instruction *I) {
+ return cast<CallBase>(I)->isIndirectCall();
+ };
+ bool HaveIndirectCalls = any_of(Insts, IsIndirectCall);
+ bool AllCallsAreIndirect = all_of(Insts, IsIndirectCall);
+ if (HaveIndirectCalls) {
+ if (!AllCallsAreIndirect)
+ return false;
+ } else {
+ // All callees must be identical.
+ Value *Callee = nullptr;
+ for (const Instruction *I : Insts) {
+ Value *CurrCallee = cast<CallBase>(I)->getCalledOperand();
+ if (!Callee)
+ Callee = CurrCallee;
+ else if (Callee != CurrCallee)
+ return false;
+ }
+ }
+ }
+
+ for (unsigned OI = 0, OE = I0->getNumOperands(); OI != OE; ++OI) {
+ Value *Op = I0->getOperand(OI);
+ if (Op->getType()->isTokenTy())
+ // Don't touch any operand of token type.
+ return false;
+
+ auto SameAsI0 = [&I0, OI](const Instruction *I) {
+ assert(I->getNumOperands() == I0->getNumOperands());
+ return I->getOperand(OI) == I0->getOperand(OI);
+ };
+ if (!all_of(Insts, SameAsI0)) {
+ if ((isa<Constant>(Op) && !replacingOperandWithVariableIsCheap(I0, OI)) ||
+ !canReplaceOperandWithVariable(I0, OI))
+ // We can't create a PHI from this GEP.
+ return false;
+ for (auto *I : Insts)
+ PHIOperands[I].push_back(I->getOperand(OI));
+ }
+ }
+ return true;
+}
+
+// Assuming canSinkInstructions(Blocks) has returned true, sink the last
+// instruction of every block in Blocks to their common successor, commoning
+// into one instruction.
+static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
+ auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0);
+
+ // canSinkInstructions returning true guarantees that every block has at
+ // least one non-terminator instruction.
+ SmallVector<Instruction*,4> Insts;
+ for (auto *BB : Blocks) {
+ Instruction *I = BB->getTerminator();
+ do {
+ I = I->getPrevNode();
+ } while (isa<DbgInfoIntrinsic>(I) && I != &BB->front());
+ if (!isa<DbgInfoIntrinsic>(I))
+ Insts.push_back(I);
+ }
+
+ // The only checking we need to do now is that all users of all instructions
+ // are the same PHI node. canSinkInstructions should have checked this but
+ // it is slightly over-aggressive - it gets confused by commutative
+ // instructions so double-check it here.
+ Instruction *I0 = Insts.front();
+ if (!I0->user_empty()) {
+ auto *PNUse = dyn_cast<PHINode>(*I0->user_begin());
+ if (!all_of(Insts, [&PNUse](const Instruction *I) -> bool {
+ auto *U = cast<Instruction>(*I->user_begin());
+ return U == PNUse;
+ }))
+ return false;
+ }
+
+ // We don't need to do any more checking here; canSinkInstructions should
+ // have done it all for us.
+ SmallVector<Value*, 4> NewOperands;
+ for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
+ // This check is different to that in canSinkInstructions. There, we
+ // cared about the global view once simplifycfg (and instcombine) have
+ // completed - it takes into account PHIs that become trivially
+ // simplifiable. However here we need a more local view; if an operand
+ // differs we create a PHI and rely on instcombine to clean up the very
+ // small mess we may make.
+ bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) {
+ return I->getOperand(O) != I0->getOperand(O);
+ });
+ if (!NeedPHI) {
+ NewOperands.push_back(I0->getOperand(O));
+ continue;
+ }
+
+ // Create a new PHI in the successor block and populate it.
+ auto *Op = I0->getOperand(O);
+ assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
+ auto *PN = PHINode::Create(Op->getType(), Insts.size(),
+ Op->getName() + ".sink", &BBEnd->front());
+ for (auto *I : Insts)
+ PN->addIncoming(I->getOperand(O), I->getParent());
+ NewOperands.push_back(PN);
+ }
+
+ // Arbitrarily use I0 as the new "common" instruction; remap its operands
+ // and move it to the start of the successor block.
+ for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
+ I0->getOperandUse(O).set(NewOperands[O]);
+ I0->moveBefore(&*BBEnd->getFirstInsertionPt());
+
+ // Update metadata and IR flags, and merge debug locations.
+ for (auto *I : Insts)
+ if (I != I0) {
+ // The debug location for the "common" instruction is the merged locations
+ // of all the commoned instructions. We start with the original location
+ // of the "common" instruction and iteratively merge each location in the
+ // loop below.
+ // This is an N-way merge, which will be inefficient if I0 is a CallInst.
+ // However, as N-way merge for CallInst is rare, so we use simplified API
+ // instead of using complex API for N-way merge.
+ I0->applyMergedLocation(I0->getDebugLoc(), I->getDebugLoc());
+ combineMetadataForCSE(I0, I, true);
+ I0->andIRFlags(I);
+ }
+
+ if (!I0->user_empty()) {
+ // canSinkLastInstruction checked that all instructions were used by
+ // one and only one PHI node. Find that now, RAUW it to our common
+ // instruction and nuke it.
+ auto *PN = cast<PHINode>(*I0->user_begin());
+ PN->replaceAllUsesWith(I0);
+ PN->eraseFromParent();
+ }
+
+ // Finally nuke all instructions apart from the common instruction.
+ for (auto *I : Insts) {
+ if (I == I0)
+ continue;
+ // The remaining uses are debug users, replace those with the common inst.
+ // In most (all?) cases this just introduces a use-before-def.
+ assert(I->user_empty() && "Inst unexpectedly still has non-dbg users");
+ I->replaceAllUsesWith(I0);
+ I->eraseFromParent();
+ }
+
+ return true;
+}
+
+namespace {
+
+ // LockstepReverseIterator - Iterates through instructions
+ // in a set of blocks in reverse order from the first non-terminator.
+ // For example (assume all blocks have size n):
+ // LockstepReverseIterator I([B1, B2, B3]);
+ // *I-- = [B1[n], B2[n], B3[n]];
+ // *I-- = [B1[n-1], B2[n-1], B3[n-1]];
+ // *I-- = [B1[n-2], B2[n-2], B3[n-2]];
+ // ...
+ class LockstepReverseIterator {
+ ArrayRef<BasicBlock*> Blocks;
+ SmallVector<Instruction*,4> Insts;
+ bool Fail;
+
+ public:
+ LockstepReverseIterator(ArrayRef<BasicBlock*> Blocks) : Blocks(Blocks) {
+ reset();
+ }
+
+ void reset() {
+ Fail = false;
+ Insts.clear();
+ for (auto *BB : Blocks) {
+ Instruction *Inst = BB->getTerminator();
+ for (Inst = Inst->getPrevNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
+ Inst = Inst->getPrevNode();
+ if (!Inst) {
+ // Block wasn't big enough.
+ Fail = true;
+ return;
+ }
+ Insts.push_back(Inst);
+ }
+ }
+
+ bool isValid() const {
+ return !Fail;
+ }
+
+ void operator--() {
+ if (Fail)
+ return;
+ for (auto *&Inst : Insts) {
+ for (Inst = Inst->getPrevNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
+ Inst = Inst->getPrevNode();
+ // Already at beginning of block.
+ if (!Inst) {
+ Fail = true;
+ return;
+ }
+ }
+ }
+
+ void operator++() {
+ if (Fail)
+ return;
+ for (auto *&Inst : Insts) {
+ for (Inst = Inst->getNextNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
+ Inst = Inst->getNextNode();
+ // Already at end of block.
+ if (!Inst) {
+ Fail = true;
+ return;
+ }
+ }
+ }
+
+ ArrayRef<Instruction*> operator * () const {
+ return Insts;
+ }
+ };
+
+} // end anonymous namespace
+
+/// Check whether BB's predecessors end with unconditional branches. If it is
+/// true, sink any common code from the predecessors to BB.
+static bool SinkCommonCodeFromPredecessors(BasicBlock *BB,
+ DomTreeUpdater *DTU) {
+ // We support two situations:
+ // (1) all incoming arcs are unconditional
+ // (2) there are non-unconditional incoming arcs
+ //
+ // (2) is very common in switch defaults and
+ // else-if patterns;
+ //
+ // if (a) f(1);
+ // else if (b) f(2);
+ //
+ // produces:
+ //
+ // [if]
+ // / \
+ // [f(1)] [if]
+ // | | \
+ // | | |
+ // | [f(2)]|
+ // \ | /
+ // [ end ]
+ //
+ // [end] has two unconditional predecessor arcs and one conditional. The
+ // conditional refers to the implicit empty 'else' arc. This conditional
+ // arc can also be caused by an empty default block in a switch.
+ //
+ // In this case, we attempt to sink code from all *unconditional* arcs.
+ // If we can sink instructions from these arcs (determined during the scan
+ // phase below) we insert a common successor for all unconditional arcs and
+ // connect that to [end], to enable sinking:
+ //
+ // [if]
+ // / \
+ // [x(1)] [if]
+ // | | \
+ // | | \
+ // | [x(2)] |
+ // \ / |
+ // [sink.split] |
+ // \ /
+ // [ end ]
+ //
+ SmallVector<BasicBlock*,4> UnconditionalPreds;
+ bool HaveNonUnconditionalPredecessors = false;
+ for (auto *PredBB : predecessors(BB)) {
+ auto *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
+ if (PredBr && PredBr->isUnconditional())
+ UnconditionalPreds.push_back(PredBB);
+ else
+ HaveNonUnconditionalPredecessors = true;
+ }
+ if (UnconditionalPreds.size() < 2)
+ return false;
+
+ // We take a two-step approach to tail sinking. First we scan from the end of
+ // each block upwards in lockstep. If the n'th instruction from the end of each
+ // block can be sunk, those instructions are added to ValuesToSink and we
+ // carry on. If we can sink an instruction but need to PHI-merge some operands
+ // (because they're not identical in each instruction) we add these to
+ // PHIOperands.
+ int ScanIdx = 0;
+ SmallPtrSet<Value*,4> InstructionsToSink;
+ DenseMap<Instruction*, SmallVector<Value*,4>> PHIOperands;
+ LockstepReverseIterator LRI(UnconditionalPreds);
+ while (LRI.isValid() &&
+ canSinkInstructions(*LRI, PHIOperands)) {
+ LLVM_DEBUG(dbgs() << "SINK: instruction can be sunk: " << *(*LRI)[0]
+ << "\n");
+ InstructionsToSink.insert((*LRI).begin(), (*LRI).end());
+ ++ScanIdx;
+ --LRI;
+ }
+
+ // If no instructions can be sunk, early-return.
+ if (ScanIdx == 0)
+ return false;
+
+ bool followedByDeoptOrUnreachable = IsBlockFollowedByDeoptOrUnreachable(BB);
+
+ if (!followedByDeoptOrUnreachable) {
+ // Okay, we *could* sink last ScanIdx instructions. But how many can we
+ // actually sink before encountering instruction that is unprofitable to
+ // sink?
+ auto ProfitableToSinkInstruction = [&](LockstepReverseIterator &LRI) {
+ unsigned NumPHIdValues = 0;
+ for (auto *I : *LRI)
+ for (auto *V : PHIOperands[I]) {
+ if (!InstructionsToSink.contains(V))
+ ++NumPHIdValues;
+ // FIXME: this check is overly optimistic. We may end up not sinking
+ // said instruction, due to the very same profitability check.
+ // See @creating_too_many_phis in sink-common-code.ll.
+ }
+ LLVM_DEBUG(dbgs() << "SINK: #phid values: " << NumPHIdValues << "\n");
+ unsigned NumPHIInsts = NumPHIdValues / UnconditionalPreds.size();
+ if ((NumPHIdValues % UnconditionalPreds.size()) != 0)
+ NumPHIInsts++;
+
+ return NumPHIInsts <= 1;
+ };
+
+ // We've determined that we are going to sink last ScanIdx instructions,
+ // and recorded them in InstructionsToSink. Now, some instructions may be
+ // unprofitable to sink. But that determination depends on the instructions
+ // that we are going to sink.
+
+ // First, forward scan: find the first instruction unprofitable to sink,
+ // recording all the ones that are profitable to sink.
+ // FIXME: would it be better, after we detect that not all are profitable.
+ // to either record the profitable ones, or erase the unprofitable ones?
+ // Maybe we need to choose (at runtime) the one that will touch least
+ // instrs?
+ LRI.reset();
+ int Idx = 0;
+ SmallPtrSet<Value *, 4> InstructionsProfitableToSink;
+ while (Idx < ScanIdx) {
+ if (!ProfitableToSinkInstruction(LRI)) {
+ // Too many PHIs would be created.
+ LLVM_DEBUG(
+ dbgs() << "SINK: stopping here, too many PHIs would be created!\n");
+ break;
+ }
+ InstructionsProfitableToSink.insert((*LRI).begin(), (*LRI).end());
+ --LRI;
+ ++Idx;
+ }
+
+ // If no instructions can be sunk, early-return.
+ if (Idx == 0)
+ return false;
+
+ // Did we determine that (only) some instructions are unprofitable to sink?
+ if (Idx < ScanIdx) {
+ // Okay, some instructions are unprofitable.
+ ScanIdx = Idx;
+ InstructionsToSink = InstructionsProfitableToSink;
+
+ // But, that may make other instructions unprofitable, too.
+ // So, do a backward scan, do any earlier instructions become
+ // unprofitable?
+ assert(
+ !ProfitableToSinkInstruction(LRI) &&
+ "We already know that the last instruction is unprofitable to sink");
+ ++LRI;
+ --Idx;
+ while (Idx >= 0) {
+ // If we detect that an instruction becomes unprofitable to sink,
+ // all earlier instructions won't be sunk either,
+ // so preemptively keep InstructionsProfitableToSink in sync.
+ // FIXME: is this the most performant approach?
+ for (auto *I : *LRI)
+ InstructionsProfitableToSink.erase(I);
+ if (!ProfitableToSinkInstruction(LRI)) {
+ // Everything starting with this instruction won't be sunk.
+ ScanIdx = Idx;
+ InstructionsToSink = InstructionsProfitableToSink;
+ }
+ ++LRI;
+ --Idx;
+ }
+ }
+
+ // If no instructions can be sunk, early-return.
+ if (ScanIdx == 0)
+ return false;
+ }
+
+ bool Changed = false;
+
+ if (HaveNonUnconditionalPredecessors) {
+ if (!followedByDeoptOrUnreachable) {
+ // It is always legal to sink common instructions from unconditional
+ // predecessors. However, if not all predecessors are unconditional,
+ // this transformation might be pessimizing. So as a rule of thumb,
+ // don't do it unless we'd sink at least one non-speculatable instruction.
+ // See https://bugs.llvm.org/show_bug.cgi?id=30244
+ LRI.reset();
+ int Idx = 0;
+ bool Profitable = false;
+ while (Idx < ScanIdx) {
+ if (!isSafeToSpeculativelyExecute((*LRI)[0])) {
+ Profitable = true;
+ break;
+ }
+ --LRI;
+ ++Idx;
+ }
+ if (!Profitable)
+ return false;
+ }
+
+ LLVM_DEBUG(dbgs() << "SINK: Splitting edge\n");
+ // We have a conditional edge and we're going to sink some instructions.
+ // Insert a new block postdominating all blocks we're going to sink from.
+ if (!SplitBlockPredecessors(BB, UnconditionalPreds, ".sink.split", DTU))
+ // Edges couldn't be split.
+ return false;
+ Changed = true;
+ }
+
+ // Now that we've analyzed all potential sinking candidates, perform the
+ // actual sink. We iteratively sink the last non-terminator of the source
+ // blocks into their common successor unless doing so would require too
+ // many PHI instructions to be generated (currently only one PHI is allowed
+ // per sunk instruction).
+ //
+ // We can use InstructionsToSink to discount values needing PHI-merging that will
+ // actually be sunk in a later iteration. This allows us to be more
+ // aggressive in what we sink. This does allow a false positive where we
+ // sink presuming a later value will also be sunk, but stop half way through
+ // and never actually sink it which means we produce more PHIs than intended.
+ // This is unlikely in practice though.
+ int SinkIdx = 0;
+ for (; SinkIdx != ScanIdx; ++SinkIdx) {
+ LLVM_DEBUG(dbgs() << "SINK: Sink: "
+ << *UnconditionalPreds[0]->getTerminator()->getPrevNode()
+ << "\n");
+
+ // Because we've sunk every instruction in turn, the current instruction to
+ // sink is always at index 0.
+ LRI.reset();
+
+ if (!sinkLastInstruction(UnconditionalPreds)) {
+ LLVM_DEBUG(
+ dbgs()
+ << "SINK: stopping here, failed to actually sink instruction!\n");
+ break;
+ }
+
+ NumSinkCommonInstrs++;
+ Changed = true;
+ }
+ if (SinkIdx != 0)
+ ++NumSinkCommonCode;
+ return Changed;
+}
+
+namespace {
+
+struct CompatibleSets {
+ using SetTy = SmallVector<InvokeInst *, 2>;
+
+ SmallVector<SetTy, 1> Sets;
+
+ static bool shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes);
+
+ SetTy &getCompatibleSet(InvokeInst *II);
+
+ void insert(InvokeInst *II);
+};
+
+CompatibleSets::SetTy &CompatibleSets::getCompatibleSet(InvokeInst *II) {
+ // Perform a linear scan over all the existing sets, see if the new `invoke`
+ // is compatible with any particular set. Since we know that all the `invokes`
+ // within a set are compatible, only check the first `invoke` in each set.
+ // WARNING: at worst, this has quadratic complexity.
+ for (CompatibleSets::SetTy &Set : Sets) {
+ if (CompatibleSets::shouldBelongToSameSet({Set.front(), II}))
+ return Set;
+ }
+
+ // Otherwise, we either had no sets yet, or this invoke forms a new set.
+ return Sets.emplace_back();
+}
+
+void CompatibleSets::insert(InvokeInst *II) {
+ getCompatibleSet(II).emplace_back(II);
+}
+
+bool CompatibleSets::shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes) {
+ assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
+
+ // Can we theoretically merge these `invoke`s?
+ auto IsIllegalToMerge = [](InvokeInst *II) {
+ return II->cannotMerge() || II->isInlineAsm();
+ };
+ if (any_of(Invokes, IsIllegalToMerge))
+ return false;
+
+ // Either both `invoke`s must be direct,
+ // or both `invoke`s must be indirect.
+ auto IsIndirectCall = [](InvokeInst *II) { return II->isIndirectCall(); };
+ bool HaveIndirectCalls = any_of(Invokes, IsIndirectCall);
+ bool AllCallsAreIndirect = all_of(Invokes, IsIndirectCall);
+ if (HaveIndirectCalls) {
+ if (!AllCallsAreIndirect)
+ return false;
+ } else {
+ // All callees must be identical.
+ Value *Callee = nullptr;
+ for (InvokeInst *II : Invokes) {
+ Value *CurrCallee = II->getCalledOperand();
+ assert(CurrCallee && "There is always a called operand.");
+ if (!Callee)
+ Callee = CurrCallee;
+ else if (Callee != CurrCallee)
+ return false;
+ }
+ }
+
+ // Either both `invoke`s must not have a normal destination,
+ // or both `invoke`s must have a normal destination,
+ auto HasNormalDest = [](InvokeInst *II) {
+ return !isa<UnreachableInst>(II->getNormalDest()->getFirstNonPHIOrDbg());
+ };
+ if (any_of(Invokes, HasNormalDest)) {
+ // Do not merge `invoke` that does not have a normal destination with one
+ // that does have a normal destination, even though doing so would be legal.
+ if (!all_of(Invokes, HasNormalDest))
+ return false;
+
+ // All normal destinations must be identical.
+ BasicBlock *NormalBB = nullptr;
+ for (InvokeInst *II : Invokes) {
+ BasicBlock *CurrNormalBB = II->getNormalDest();
+ assert(CurrNormalBB && "There is always a 'continue to' basic block.");
+ if (!NormalBB)
+ NormalBB = CurrNormalBB;
+ else if (NormalBB != CurrNormalBB)
+ return false;
+ }
+
+ // In the normal destination, the incoming values for these two `invoke`s
+ // must be compatible.
+ SmallPtrSet<Value *, 16> EquivalenceSet(Invokes.begin(), Invokes.end());
+ if (!IncomingValuesAreCompatible(
+ NormalBB, {Invokes[0]->getParent(), Invokes[1]->getParent()},
+ &EquivalenceSet))
+ return false;
+ }
+
+#ifndef NDEBUG
+ // All unwind destinations must be identical.
+ // We know that because we have started from said unwind destination.
+ BasicBlock *UnwindBB = nullptr;
+ for (InvokeInst *II : Invokes) {
+ BasicBlock *CurrUnwindBB = II->getUnwindDest();
+ assert(CurrUnwindBB && "There is always an 'unwind to' basic block.");
+ if (!UnwindBB)
+ UnwindBB = CurrUnwindBB;
+ else
+ assert(UnwindBB == CurrUnwindBB && "Unexpected unwind destination.");
+ }
+#endif
+
+ // In the unwind destination, the incoming values for these two `invoke`s
+ // must be compatible.
+ if (!IncomingValuesAreCompatible(
+ Invokes.front()->getUnwindDest(),
+ {Invokes[0]->getParent(), Invokes[1]->getParent()}))
+ return false;
+
+ // Ignoring arguments, these `invoke`s must be identical,
+ // including operand bundles.
+ const InvokeInst *II0 = Invokes.front();
+ for (auto *II : Invokes.drop_front())
+ if (!II->isSameOperationAs(II0))
+ return false;
+
+ // Can we theoretically form the data operands for the merged `invoke`?
+ auto IsIllegalToMergeArguments = [](auto Ops) {
+ Type *Ty = std::get<0>(Ops)->getType();
+ assert(Ty == std::get<1>(Ops)->getType() && "Incompatible types?");
+ return Ty->isTokenTy() && std::get<0>(Ops) != std::get<1>(Ops);
+ };
+ assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
+ if (any_of(zip(Invokes[0]->data_ops(), Invokes[1]->data_ops()),
+ IsIllegalToMergeArguments))
+ return false;
+
+ return true;
+}
+
+} // namespace
+
+// Merge all invokes in the provided set, all of which are compatible
+// as per the `CompatibleSets::shouldBelongToSameSet()`.
+static void MergeCompatibleInvokesImpl(ArrayRef<InvokeInst *> Invokes,
+ DomTreeUpdater *DTU) {
+ assert(Invokes.size() >= 2 && "Must have at least two invokes to merge.");
+
+ SmallVector<DominatorTree::UpdateType, 8> Updates;
+ if (DTU)
+ Updates.reserve(2 + 3 * Invokes.size());
+
+ bool HasNormalDest =
+ !isa<UnreachableInst>(Invokes[0]->getNormalDest()->getFirstNonPHIOrDbg());
+
+ // Clone one of the invokes into a new basic block.
+ // Since they are all compatible, it doesn't matter which invoke is cloned.
+ InvokeInst *MergedInvoke = [&Invokes, HasNormalDest]() {
+ InvokeInst *II0 = Invokes.front();
+ BasicBlock *II0BB = II0->getParent();
+ BasicBlock *InsertBeforeBlock =
+ II0->getParent()->getIterator()->getNextNode();
+ Function *Func = II0BB->getParent();
+ LLVMContext &Ctx = II0->getContext();
+
+ BasicBlock *MergedInvokeBB = BasicBlock::Create(
+ Ctx, II0BB->getName() + ".invoke", Func, InsertBeforeBlock);
+
+ auto *MergedInvoke = cast<InvokeInst>(II0->clone());
+ // NOTE: all invokes have the same attributes, so no handling needed.
+ MergedInvoke->insertInto(MergedInvokeBB, MergedInvokeBB->end());
+
+ if (!HasNormalDest) {
+ // This set does not have a normal destination,
+ // so just form a new block with unreachable terminator.
+ BasicBlock *MergedNormalDest = BasicBlock::Create(
+ Ctx, II0BB->getName() + ".cont", Func, InsertBeforeBlock);
+ new UnreachableInst(Ctx, MergedNormalDest);
+ MergedInvoke->setNormalDest(MergedNormalDest);
+ }
+
+ // The unwind destination, however, remainds identical for all invokes here.
+
+ return MergedInvoke;
+ }();
+
+ if (DTU) {
+ // Predecessor blocks that contained these invokes will now branch to
+ // the new block that contains the merged invoke, ...
+ for (InvokeInst *II : Invokes)
+ Updates.push_back(
+ {DominatorTree::Insert, II->getParent(), MergedInvoke->getParent()});
+
+ // ... which has the new `unreachable` block as normal destination,
+ // or unwinds to the (same for all `invoke`s in this set) `landingpad`,
+ for (BasicBlock *SuccBBOfMergedInvoke : successors(MergedInvoke))
+ Updates.push_back({DominatorTree::Insert, MergedInvoke->getParent(),
+ SuccBBOfMergedInvoke});
+
+ // Since predecessor blocks now unconditionally branch to a new block,
+ // they no longer branch to their original successors.
+ for (InvokeInst *II : Invokes)
+ for (BasicBlock *SuccOfPredBB : successors(II->getParent()))
+ Updates.push_back(
+ {DominatorTree::Delete, II->getParent(), SuccOfPredBB});
+ }
+
+ bool IsIndirectCall = Invokes[0]->isIndirectCall();
+
+ // Form the merged operands for the merged invoke.
+ for (Use &U : MergedInvoke->operands()) {
+ // Only PHI together the indirect callees and data operands.
+ if (MergedInvoke->isCallee(&U)) {
+ if (!IsIndirectCall)
+ continue;
+ } else if (!MergedInvoke->isDataOperand(&U))
+ continue;
+
+ // Don't create trivial PHI's with all-identical incoming values.
+ bool NeedPHI = any_of(Invokes, [&U](InvokeInst *II) {
+ return II->getOperand(U.getOperandNo()) != U.get();
+ });
+ if (!NeedPHI)
+ continue;
+
+ // Form a PHI out of all the data ops under this index.
+ PHINode *PN = PHINode::Create(
+ U->getType(), /*NumReservedValues=*/Invokes.size(), "", MergedInvoke);
+ for (InvokeInst *II : Invokes)
+ PN->addIncoming(II->getOperand(U.getOperandNo()), II->getParent());
+
+ U.set(PN);
+ }
+
+ // We've ensured that each PHI node has compatible (identical) incoming values
+ // when coming from each of the `invoke`s in the current merge set,
+ // so update the PHI nodes accordingly.
+ for (BasicBlock *Succ : successors(MergedInvoke))
+ AddPredecessorToBlock(Succ, /*NewPred=*/MergedInvoke->getParent(),
+ /*ExistPred=*/Invokes.front()->getParent());
+
+ // And finally, replace the original `invoke`s with an unconditional branch
+ // to the block with the merged `invoke`. Also, give that merged `invoke`
+ // the merged debugloc of all the original `invoke`s.
+ const DILocation *MergedDebugLoc = nullptr;
+ for (InvokeInst *II : Invokes) {
+ // Compute the debug location common to all the original `invoke`s.
+ if (!MergedDebugLoc)
+ MergedDebugLoc = II->getDebugLoc();
+ else
+ MergedDebugLoc =
+ DILocation::getMergedLocation(MergedDebugLoc, II->getDebugLoc());
+
+ // And replace the old `invoke` with an unconditionally branch
+ // to the block with the merged `invoke`.
+ for (BasicBlock *OrigSuccBB : successors(II->getParent()))
+ OrigSuccBB->removePredecessor(II->getParent());
+ BranchInst::Create(MergedInvoke->getParent(), II->getParent());
+ II->replaceAllUsesWith(MergedInvoke);
+ II->eraseFromParent();
+ ++NumInvokesMerged;
+ }
+ MergedInvoke->setDebugLoc(MergedDebugLoc);
+ ++NumInvokeSetsFormed;
+
+ if (DTU)
+ DTU->applyUpdates(Updates);
+}
+
+/// If this block is a `landingpad` exception handling block, categorize all
+/// the predecessor `invoke`s into sets, with all `invoke`s in each set
+/// being "mergeable" together, and then merge invokes in each set together.
+///
+/// This is a weird mix of hoisting and sinking. Visually, it goes from:
+/// [...] [...]
+/// | |
+/// [invoke0] [invoke1]
+/// / \ / \
+/// [cont0] [landingpad] [cont1]
+/// to:
+/// [...] [...]
+/// \ /
+/// [invoke]
+/// / \
+/// [cont] [landingpad]
+///
+/// But of course we can only do that if the invokes share the `landingpad`,
+/// edges invoke0->cont0 and invoke1->cont1 are "compatible",
+/// and the invoked functions are "compatible".
+static bool MergeCompatibleInvokes(BasicBlock *BB, DomTreeUpdater *DTU) {
+ if (!EnableMergeCompatibleInvokes)
+ return false;
+
+ bool Changed = false;
+
+ // FIXME: generalize to all exception handling blocks?
+ if (!BB->isLandingPad())
+ return Changed;
+
+ CompatibleSets Grouper;
+
+ // Record all the predecessors of this `landingpad`. As per verifier,
+ // the only allowed predecessor is the unwind edge of an `invoke`.
+ // We want to group "compatible" `invokes` into the same set to be merged.
+ for (BasicBlock *PredBB : predecessors(BB))
+ Grouper.insert(cast<InvokeInst>(PredBB->getTerminator()));
+
+ // And now, merge `invoke`s that were grouped togeter.
+ for (ArrayRef<InvokeInst *> Invokes : Grouper.Sets) {
+ if (Invokes.size() < 2)
+ continue;
+ Changed = true;
+ MergeCompatibleInvokesImpl(Invokes, DTU);
+ }
+
+ return Changed;
+}
+
+namespace {
+/// Track ephemeral values, which should be ignored for cost-modelling
+/// purposes. Requires walking instructions in reverse order.
+class EphemeralValueTracker {
+ SmallPtrSet<const Instruction *, 32> EphValues;
+
+ bool isEphemeral(const Instruction *I) {
+ if (isa<AssumeInst>(I))
+ return true;
+ return !I->mayHaveSideEffects() && !I->isTerminator() &&
+ all_of(I->users(), [&](const User *U) {
+ return EphValues.count(cast<Instruction>(U));
+ });
+ }
+
+public:
+ bool track(const Instruction *I) {
+ if (isEphemeral(I)) {
+ EphValues.insert(I);
+ return true;
+ }
+ return false;
+ }
+
+ bool contains(const Instruction *I) const { return EphValues.contains(I); }
+};
+} // namespace
+
+/// Determine if we can hoist sink a sole store instruction out of a
+/// conditional block.
+///
+/// We are looking for code like the following:
+/// BrBB:
+/// store i32 %add, i32* %arrayidx2
+/// ... // No other stores or function calls (we could be calling a memory
+/// ... // function).
+/// %cmp = icmp ult %x, %y
+/// br i1 %cmp, label %EndBB, label %ThenBB
+/// ThenBB:
+/// store i32 %add5, i32* %arrayidx2
+/// br label EndBB
+/// EndBB:
+/// ...
+/// We are going to transform this into:
+/// BrBB:
+/// store i32 %add, i32* %arrayidx2
+/// ... //
+/// %cmp = icmp ult %x, %y
+/// %add.add5 = select i1 %cmp, i32 %add, %add5
+/// store i32 %add.add5, i32* %arrayidx2
+/// ...
+///
+/// \return The pointer to the value of the previous store if the store can be
+/// hoisted into the predecessor block. 0 otherwise.
+static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB,
+ BasicBlock *StoreBB, BasicBlock *EndBB) {
+ StoreInst *StoreToHoist = dyn_cast<StoreInst>(I);
+ if (!StoreToHoist)
+ return nullptr;
+
+ // Volatile or atomic.
+ if (!StoreToHoist->isSimple())
+ return nullptr;
+
+ Value *StorePtr = StoreToHoist->getPointerOperand();
+ Type *StoreTy = StoreToHoist->getValueOperand()->getType();
+
+ // Look for a store to the same pointer in BrBB.
+ unsigned MaxNumInstToLookAt = 9;
+ // Skip pseudo probe intrinsic calls which are not really killing any memory
+ // accesses.
+ for (Instruction &CurI : reverse(BrBB->instructionsWithoutDebug(true))) {
+ if (!MaxNumInstToLookAt)
+ break;
+ --MaxNumInstToLookAt;
+
+ // Could be calling an instruction that affects memory like free().
+ if (CurI.mayWriteToMemory() && !isa<StoreInst>(CurI))
+ return nullptr;
+
+ if (auto *SI = dyn_cast<StoreInst>(&CurI)) {
+ // Found the previous store to same location and type. Make sure it is
+ // simple, to avoid introducing a spurious non-atomic write after an
+ // atomic write.
+ if (SI->getPointerOperand() == StorePtr &&
+ SI->getValueOperand()->getType() == StoreTy && SI->isSimple())
+ // Found the previous store, return its value operand.
+ return SI->getValueOperand();
+ return nullptr; // Unknown store.
+ }
+
+ if (auto *LI = dyn_cast<LoadInst>(&CurI)) {
+ if (LI->getPointerOperand() == StorePtr && LI->getType() == StoreTy &&
+ LI->isSimple()) {
+ // Local objects (created by an `alloca` instruction) are always
+ // writable, so once we are past a read from a location it is valid to
+ // also write to that same location.
+ // If the address of the local object never escapes the function, that
+ // means it's never concurrently read or written, hence moving the store
+ // from under the condition will not introduce a data race.
+ auto *AI = dyn_cast<AllocaInst>(getUnderlyingObject(StorePtr));
+ if (AI && !PointerMayBeCaptured(AI, false, true))
+ // Found a previous load, return it.
+ return LI;
+ }
+ // The load didn't work out, but we may still find a store.
+ }
+ }
+
+ return nullptr;
+}
+
+/// Estimate the cost of the insertion(s) and check that the PHI nodes can be
+/// converted to selects.
+static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB,
+ BasicBlock *EndBB,
+ unsigned &SpeculatedInstructions,
+ InstructionCost &Cost,
+ const TargetTransformInfo &TTI) {
+ TargetTransformInfo::TargetCostKind CostKind =
+ BB->getParent()->hasMinSize()
+ ? TargetTransformInfo::TCK_CodeSize
+ : TargetTransformInfo::TCK_SizeAndLatency;
+
+ bool HaveRewritablePHIs = false;
+ for (PHINode &PN : EndBB->phis()) {
+ Value *OrigV = PN.getIncomingValueForBlock(BB);
+ Value *ThenV = PN.getIncomingValueForBlock(ThenBB);
+
+ // FIXME: Try to remove some of the duplication with HoistThenElseCodeToIf.
+ // Skip PHIs which are trivial.
+ if (ThenV == OrigV)
+ continue;
+
+ Cost += TTI.getCmpSelInstrCost(Instruction::Select, PN.getType(), nullptr,
+ CmpInst::BAD_ICMP_PREDICATE, CostKind);
+
+ // Don't convert to selects if we could remove undefined behavior instead.
+ if (passingValueIsAlwaysUndefined(OrigV, &PN) ||
+ passingValueIsAlwaysUndefined(ThenV, &PN))
+ return false;
+
+ HaveRewritablePHIs = true;
+ ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV);
+ ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV);
+ if (!OrigCE && !ThenCE)
+ continue; // Known cheap (FIXME: Maybe not true for aggregates).
+
+ InstructionCost OrigCost = OrigCE ? computeSpeculationCost(OrigCE, TTI) : 0;
+ InstructionCost ThenCost = ThenCE ? computeSpeculationCost(ThenCE, TTI) : 0;
+ InstructionCost MaxCost =
+ 2 * PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
+ if (OrigCost + ThenCost > MaxCost)
+ return false;
+
+ // Account for the cost of an unfolded ConstantExpr which could end up
+ // getting expanded into Instructions.
+ // FIXME: This doesn't account for how many operations are combined in the
+ // constant expression.
+ ++SpeculatedInstructions;
+ if (SpeculatedInstructions > 1)
+ return false;
+ }
+
+ return HaveRewritablePHIs;
+}
+
+/// Speculate a conditional basic block flattening the CFG.
+///
+/// Note that this is a very risky transform currently. Speculating
+/// instructions like this is most often not desirable. Instead, there is an MI
+/// pass which can do it with full awareness of the resource constraints.
+/// However, some cases are "obvious" and we should do directly. An example of
+/// this is speculating a single, reasonably cheap instruction.
+///
+/// There is only one distinct advantage to flattening the CFG at the IR level:
+/// it makes very common but simplistic optimizations such as are common in
+/// instcombine and the DAG combiner more powerful by removing CFG edges and
+/// modeling their effects with easier to reason about SSA value graphs.
+///
+///
+/// An illustration of this transform is turning this IR:
+/// \code
+/// BB:
+/// %cmp = icmp ult %x, %y
+/// br i1 %cmp, label %EndBB, label %ThenBB
+/// ThenBB:
+/// %sub = sub %x, %y
+/// br label BB2
+/// EndBB:
+/// %phi = phi [ %sub, %ThenBB ], [ 0, %EndBB ]
+/// ...
+/// \endcode
+///
+/// Into this IR:
+/// \code
+/// BB:
+/// %cmp = icmp ult %x, %y
+/// %sub = sub %x, %y
+/// %cond = select i1 %cmp, 0, %sub
+/// ...
+/// \endcode
+///
+/// \returns true if the conditional block is removed.
+bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
+ const TargetTransformInfo &TTI) {
+ // Be conservative for now. FP select instruction can often be expensive.
+ Value *BrCond = BI->getCondition();
+ if (isa<FCmpInst>(BrCond))
+ return false;
+
+ BasicBlock *BB = BI->getParent();
+ BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(0);
+ InstructionCost Budget =
+ PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
+
+ // If ThenBB is actually on the false edge of the conditional branch, remember
+ // to swap the select operands later.
+ bool Invert = false;
+ if (ThenBB != BI->getSuccessor(0)) {
+ assert(ThenBB == BI->getSuccessor(1) && "No edge from 'if' block?");
+ Invert = true;
+ }
+ assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block");
+
+ // If the branch is non-unpredictable, and is predicted to *not* branch to
+ // the `then` block, then avoid speculating it.
+ if (!BI->getMetadata(LLVMContext::MD_unpredictable)) {
+ uint64_t TWeight, FWeight;
+ if (extractBranchWeights(*BI, TWeight, FWeight) &&
+ (TWeight + FWeight) != 0) {
+ uint64_t EndWeight = Invert ? TWeight : FWeight;
+ BranchProbability BIEndProb =
+ BranchProbability::getBranchProbability(EndWeight, TWeight + FWeight);
+ BranchProbability Likely = TTI.getPredictableBranchThreshold();
+ if (BIEndProb >= Likely)
+ return false;
+ }
+ }
+
+ // Keep a count of how many times instructions are used within ThenBB when
+ // they are candidates for sinking into ThenBB. Specifically:
+ // - They are defined in BB, and
+ // - They have no side effects, and
+ // - All of their uses are in ThenBB.
+ SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
+
+ SmallVector<Instruction *, 4> SpeculatedDbgIntrinsics;
+
+ unsigned SpeculatedInstructions = 0;
+ Value *SpeculatedStoreValue = nullptr;
+ StoreInst *SpeculatedStore = nullptr;
+ EphemeralValueTracker EphTracker;
+ for (Instruction &I : reverse(drop_end(*ThenBB))) {
+ // Skip debug info.
+ if (isa<DbgInfoIntrinsic>(I)) {
+ SpeculatedDbgIntrinsics.push_back(&I);
+ continue;
+ }
+
+ // Skip pseudo probes. The consequence is we lose track of the branch
+ // probability for ThenBB, which is fine since the optimization here takes
+ // place regardless of the branch probability.
+ if (isa<PseudoProbeInst>(I)) {
+ // The probe should be deleted so that it will not be over-counted when
+ // the samples collected on the non-conditional path are counted towards
+ // the conditional path. We leave it for the counts inference algorithm to
+ // figure out a proper count for an unknown probe.
+ SpeculatedDbgIntrinsics.push_back(&I);
+ continue;
+ }
+
+ // Ignore ephemeral values, they will be dropped by the transform.
+ if (EphTracker.track(&I))
+ continue;
+
+ // Only speculatively execute a single instruction (not counting the
+ // terminator) for now.
+ ++SpeculatedInstructions;
+ if (SpeculatedInstructions > 1)
+ return false;
+
+ // Don't hoist the instruction if it's unsafe or expensive.
+ if (!isSafeToSpeculativelyExecute(&I) &&
+ !(HoistCondStores && (SpeculatedStoreValue = isSafeToSpeculateStore(
+ &I, BB, ThenBB, EndBB))))
+ return false;
+ if (!SpeculatedStoreValue &&
+ computeSpeculationCost(&I, TTI) >
+ PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic)
+ return false;
+
+ // Store the store speculation candidate.
+ if (SpeculatedStoreValue)
+ SpeculatedStore = cast<StoreInst>(&I);
+
+ // Do not hoist the instruction if any of its operands are defined but not
+ // used in BB. The transformation will prevent the operand from
+ // being sunk into the use block.
+ for (Use &Op : I.operands()) {
+ Instruction *OpI = dyn_cast<Instruction>(Op);
+ if (!OpI || OpI->getParent() != BB || OpI->mayHaveSideEffects())
+ continue; // Not a candidate for sinking.
+
+ ++SinkCandidateUseCounts[OpI];
+ }
+ }
+
+ // Consider any sink candidates which are only used in ThenBB as costs for
+ // speculation. Note, while we iterate over a DenseMap here, we are summing
+ // and so iteration order isn't significant.
+ for (const auto &[Inst, Count] : SinkCandidateUseCounts)
+ if (Inst->hasNUses(Count)) {
+ ++SpeculatedInstructions;
+ if (SpeculatedInstructions > 1)
+ return false;
+ }
+
+ // Check that we can insert the selects and that it's not too expensive to do
+ // so.
+ bool Convert = SpeculatedStore != nullptr;
+ InstructionCost Cost = 0;
+ Convert |= validateAndCostRequiredSelects(BB, ThenBB, EndBB,
+ SpeculatedInstructions,
+ Cost, TTI);
+ if (!Convert || Cost > Budget)
+ return false;
+
+ // If we get here, we can hoist the instruction and if-convert.
+ LLVM_DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
+
+ // Insert a select of the value of the speculated store.
+ if (SpeculatedStoreValue) {
+ IRBuilder<NoFolder> Builder(BI);
+ Value *OrigV = SpeculatedStore->getValueOperand();
+ Value *TrueV = SpeculatedStore->getValueOperand();
+ Value *FalseV = SpeculatedStoreValue;
+ if (Invert)
+ std::swap(TrueV, FalseV);
+ Value *S = Builder.CreateSelect(
+ BrCond, TrueV, FalseV, "spec.store.select", BI);
+ SpeculatedStore->setOperand(0, S);
+ SpeculatedStore->applyMergedLocation(BI->getDebugLoc(),
+ SpeculatedStore->getDebugLoc());
+ // The value stored is still conditional, but the store itself is now
+ // unconditonally executed, so we must be sure that any linked dbg.assign
+ // intrinsics are tracking the new stored value (the result of the
+ // select). If we don't, and the store were to be removed by another pass
+ // (e.g. DSE), then we'd eventually end up emitting a location describing
+ // the conditional value, unconditionally.
+ //
+ // === Before this transformation ===
+ // pred:
+ // store %one, %x.dest, !DIAssignID !1
+ // dbg.assign %one, "x", ..., !1, ...
+ // br %cond if.then
+ //
+ // if.then:
+ // store %two, %x.dest, !DIAssignID !2
+ // dbg.assign %two, "x", ..., !2, ...
+ //
+ // === After this transformation ===
+ // pred:
+ // store %one, %x.dest, !DIAssignID !1
+ // dbg.assign %one, "x", ..., !1
+ /// ...
+ // %merge = select %cond, %two, %one
+ // store %merge, %x.dest, !DIAssignID !2
+ // dbg.assign %merge, "x", ..., !2
+ for (auto *DAI : at::getAssignmentMarkers(SpeculatedStore)) {
+ if (any_of(DAI->location_ops(), [&](Value *V) { return V == OrigV; }))
+ DAI->replaceVariableLocationOp(OrigV, S);
+ }
+ }
+
+ // Metadata can be dependent on the condition we are hoisting above.
+ // Conservatively strip all metadata on the instruction. Drop the debug loc
+ // to avoid making it appear as if the condition is a constant, which would
+ // be misleading while debugging.
+ // Similarly strip attributes that maybe dependent on condition we are
+ // hoisting above.
+ for (auto &I : make_early_inc_range(*ThenBB)) {
+ if (!SpeculatedStoreValue || &I != SpeculatedStore) {
+ // Don't update the DILocation of dbg.assign intrinsics.
+ if (!isa<DbgAssignIntrinsic>(&I))
+ I.setDebugLoc(DebugLoc());
+ }
+ I.dropUndefImplyingAttrsAndUnknownMetadata();
+
+ // Drop ephemeral values.
+ if (EphTracker.contains(&I)) {
+ I.replaceAllUsesWith(PoisonValue::get(I.getType()));
+ I.eraseFromParent();
+ }
+ }
+
+ // Hoist the instructions.
+ BB->splice(BI->getIterator(), ThenBB, ThenBB->begin(),
+ std::prev(ThenBB->end()));
+
+ // Insert selects and rewrite the PHI operands.
+ IRBuilder<NoFolder> Builder(BI);
+ for (PHINode &PN : EndBB->phis()) {
+ unsigned OrigI = PN.getBasicBlockIndex(BB);
+ unsigned ThenI = PN.getBasicBlockIndex(ThenBB);
+ Value *OrigV = PN.getIncomingValue(OrigI);
+ Value *ThenV = PN.getIncomingValue(ThenI);
+
+ // Skip PHIs which are trivial.
+ if (OrigV == ThenV)
+ continue;
+
+ // Create a select whose true value is the speculatively executed value and
+ // false value is the pre-existing value. Swap them if the branch
+ // destinations were inverted.
+ Value *TrueV = ThenV, *FalseV = OrigV;
+ if (Invert)
+ std::swap(TrueV, FalseV);
+ Value *V = Builder.CreateSelect(BrCond, TrueV, FalseV, "spec.select", BI);
+ PN.setIncomingValue(OrigI, V);
+ PN.setIncomingValue(ThenI, V);
+ }
+
+ // Remove speculated dbg intrinsics.
+ // FIXME: Is it possible to do this in a more elegant way? Moving/merging the
+ // dbg value for the different flows and inserting it after the select.
+ for (Instruction *I : SpeculatedDbgIntrinsics) {
+ // We still want to know that an assignment took place so don't remove
+ // dbg.assign intrinsics.
+ if (!isa<DbgAssignIntrinsic>(I))
+ I->eraseFromParent();
+ }
+
+ ++NumSpeculations;
+ return true;
+}
+
+/// Return true if we can thread a branch across this block.
+static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
+ int Size = 0;
+ EphemeralValueTracker EphTracker;
+
+ // Walk the loop in reverse so that we can identify ephemeral values properly
+ // (values only feeding assumes).
+ for (Instruction &I : reverse(BB->instructionsWithoutDebug(false))) {
+ // Can't fold blocks that contain noduplicate or convergent calls.
+ if (CallInst *CI = dyn_cast<CallInst>(&I))
+ if (CI->cannotDuplicate() || CI->isConvergent())
+ return false;
+
+ // Ignore ephemeral values which are deleted during codegen.
+ // We will delete Phis while threading, so Phis should not be accounted in
+ // block's size.
+ if (!EphTracker.track(&I) && !isa<PHINode>(I)) {
+ if (Size++ > MaxSmallBlockSize)
+ return false; // Don't clone large BB's.
+ }
+
+ // We can only support instructions that do not define values that are
+ // live outside of the current basic block.
+ for (User *U : I.users()) {
+ Instruction *UI = cast<Instruction>(U);
+ if (UI->getParent() != BB || isa<PHINode>(UI))
+ return false;
+ }
+
+ // Looks ok, continue checking.
+ }
+
+ return true;
+}
+
+static ConstantInt *getKnownValueOnEdge(Value *V, BasicBlock *From,
+ BasicBlock *To) {
+ // Don't look past the block defining the value, we might get the value from
+ // a previous loop iteration.
+ auto *I = dyn_cast<Instruction>(V);
+ if (I && I->getParent() == To)
+ return nullptr;
+
+ // We know the value if the From block branches on it.
+ auto *BI = dyn_cast<BranchInst>(From->getTerminator());
+ if (BI && BI->isConditional() && BI->getCondition() == V &&
+ BI->getSuccessor(0) != BI->getSuccessor(1))
+ return BI->getSuccessor(0) == To ? ConstantInt::getTrue(BI->getContext())
+ : ConstantInt::getFalse(BI->getContext());
+
+ return nullptr;
+}
+
+/// If we have a conditional branch on something for which we know the constant
+/// value in predecessors (e.g. a phi node in the current block), thread edges
+/// from the predecessor to their ultimate destination.
+static std::optional<bool>
+FoldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU,
+ const DataLayout &DL,
+ AssumptionCache *AC) {
+ SmallMapVector<ConstantInt *, SmallSetVector<BasicBlock *, 2>, 2> KnownValues;
+ BasicBlock *BB = BI->getParent();
+ Value *Cond = BI->getCondition();
+ PHINode *PN = dyn_cast<PHINode>(Cond);
+ if (PN && PN->getParent() == BB) {
+ // Degenerate case of a single entry PHI.
+ if (PN->getNumIncomingValues() == 1) {
+ FoldSingleEntryPHINodes(PN->getParent());
+ return true;
+ }
+
+ for (Use &U : PN->incoming_values())
+ if (auto *CB = dyn_cast<ConstantInt>(U))
+ KnownValues[CB].insert(PN->getIncomingBlock(U));
+ } else {
+ for (BasicBlock *Pred : predecessors(BB)) {
+ if (ConstantInt *CB = getKnownValueOnEdge(Cond, Pred, BB))
+ KnownValues[CB].insert(Pred);
+ }
+ }
+
+ if (KnownValues.empty())
+ return false;
+
+ // Now we know that this block has multiple preds and two succs.
+ // Check that the block is small enough and values defined in the block are
+ // not used outside of it.
+ if (!BlockIsSimpleEnoughToThreadThrough(BB))
+ return false;
+
+ for (const auto &Pair : KnownValues) {
+ // Okay, we now know that all edges from PredBB should be revectored to
+ // branch to RealDest.
+ ConstantInt *CB = Pair.first;
+ ArrayRef<BasicBlock *> PredBBs = Pair.second.getArrayRef();
+ BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
+
+ if (RealDest == BB)
+ continue; // Skip self loops.
+
+ // Skip if the predecessor's terminator is an indirect branch.
+ if (any_of(PredBBs, [](BasicBlock *PredBB) {
+ return isa<IndirectBrInst>(PredBB->getTerminator());
+ }))
+ continue;
+
+ LLVM_DEBUG({
+ dbgs() << "Condition " << *Cond << " in " << BB->getName()
+ << " has value " << *Pair.first << " in predecessors:\n";
+ for (const BasicBlock *PredBB : Pair.second)
+ dbgs() << " " << PredBB->getName() << "\n";
+ dbgs() << "Threading to destination " << RealDest->getName() << ".\n";
+ });
+
+ // Split the predecessors we are threading into a new edge block. We'll
+ // clone the instructions into this block, and then redirect it to RealDest.
+ BasicBlock *EdgeBB = SplitBlockPredecessors(BB, PredBBs, ".critedge", DTU);
+
+ // TODO: These just exist to reduce test diff, we can drop them if we like.
+ EdgeBB->setName(RealDest->getName() + ".critedge");
+ EdgeBB->moveBefore(RealDest);
+
+ // Update PHI nodes.
+ AddPredecessorToBlock(RealDest, EdgeBB, BB);
+
+ // BB may have instructions that are being threaded over. Clone these
+ // instructions into EdgeBB. We know that there will be no uses of the
+ // cloned instructions outside of EdgeBB.
+ BasicBlock::iterator InsertPt = EdgeBB->getFirstInsertionPt();
+ DenseMap<Value *, Value *> TranslateMap; // Track translated values.
+ TranslateMap[Cond] = CB;
+ for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
+ if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
+ TranslateMap[PN] = PN->getIncomingValueForBlock(EdgeBB);
+ continue;
+ }
+ // Clone the instruction.
+ Instruction *N = BBI->clone();
+ if (BBI->hasName())
+ N->setName(BBI->getName() + ".c");
+
+ // Update operands due to translation.
+ for (Use &Op : N->operands()) {
+ DenseMap<Value *, Value *>::iterator PI = TranslateMap.find(Op);
+ if (PI != TranslateMap.end())
+ Op = PI->second;
+ }
+
+ // Check for trivial simplification.
+ if (Value *V = simplifyInstruction(N, {DL, nullptr, nullptr, AC})) {
+ if (!BBI->use_empty())
+ TranslateMap[&*BBI] = V;
+ if (!N->mayHaveSideEffects()) {
+ N->deleteValue(); // Instruction folded away, don't need actual inst
+ N = nullptr;
+ }
+ } else {
+ if (!BBI->use_empty())
+ TranslateMap[&*BBI] = N;
+ }
+ if (N) {
+ // Insert the new instruction into its new home.
+ N->insertInto(EdgeBB, InsertPt);
+
+ // Register the new instruction with the assumption cache if necessary.
+ if (auto *Assume = dyn_cast<AssumeInst>(N))
+ if (AC)
+ AC->registerAssumption(Assume);
+ }
+ }
+
+ BB->removePredecessor(EdgeBB);
+ BranchInst *EdgeBI = cast<BranchInst>(EdgeBB->getTerminator());
+ EdgeBI->setSuccessor(0, RealDest);
+ EdgeBI->setDebugLoc(BI->getDebugLoc());
+
+ if (DTU) {
+ SmallVector<DominatorTree::UpdateType, 2> Updates;
+ Updates.push_back({DominatorTree::Delete, EdgeBB, BB});
+ Updates.push_back({DominatorTree::Insert, EdgeBB, RealDest});
+ DTU->applyUpdates(Updates);
+ }
+
+ // For simplicity, we created a separate basic block for the edge. Merge
+ // it back into the predecessor if possible. This not only avoids
+ // unnecessary SimplifyCFG iterations, but also makes sure that we don't
+ // bypass the check for trivial cycles above.
+ MergeBlockIntoPredecessor(EdgeBB, DTU);
+
+ // Signal repeat, simplifying any other constants.
+ return std::nullopt;
+ }
+
+ return false;
+}
+
+static bool FoldCondBranchOnValueKnownInPredecessor(BranchInst *BI,
+ DomTreeUpdater *DTU,
+ const DataLayout &DL,
+ AssumptionCache *AC) {
+ std::optional<bool> Result;
+ bool EverChanged = false;
+ do {
+ // Note that None means "we changed things, but recurse further."
+ Result = FoldCondBranchOnValueKnownInPredecessorImpl(BI, DTU, DL, AC);
+ EverChanged |= Result == std::nullopt || *Result;
+ } while (Result == std::nullopt);
+ return EverChanged;
+}
+
+/// Given a BB that starts with the specified two-entry PHI node,
+/// see if we can eliminate it.
+static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
+ DomTreeUpdater *DTU, const DataLayout &DL) {
+ // Ok, this is a two entry PHI node. Check to see if this is a simple "if
+ // statement", which has a very simple dominance structure. Basically, we
+ // are trying to find the condition that is being branched on, which
+ // subsequently causes this merge to happen. We really want control
+ // dependence information for this check, but simplifycfg can't keep it up
+ // to date, and this catches most of the cases we care about anyway.
+ BasicBlock *BB = PN->getParent();
+
+ BasicBlock *IfTrue, *IfFalse;
+ BranchInst *DomBI = GetIfCondition(BB, IfTrue, IfFalse);
+ if (!DomBI)
+ return false;
+ Value *IfCond = DomBI->getCondition();
+ // Don't bother if the branch will be constant folded trivially.
+ if (isa<ConstantInt>(IfCond))
+ return false;
+
+ BasicBlock *DomBlock = DomBI->getParent();
+ SmallVector<BasicBlock *, 2> IfBlocks;
+ llvm::copy_if(
+ PN->blocks(), std::back_inserter(IfBlocks), [](BasicBlock *IfBlock) {
+ return cast<BranchInst>(IfBlock->getTerminator())->isUnconditional();
+ });
+ assert((IfBlocks.size() == 1 || IfBlocks.size() == 2) &&
+ "Will have either one or two blocks to speculate.");
+
+ // If the branch is non-unpredictable, see if we either predictably jump to
+ // the merge bb (if we have only a single 'then' block), or if we predictably
+ // jump to one specific 'then' block (if we have two of them).
+ // It isn't beneficial to speculatively execute the code
+ // from the block that we know is predictably not entered.
+ if (!DomBI->getMetadata(LLVMContext::MD_unpredictable)) {
+ uint64_t TWeight, FWeight;
+ if (extractBranchWeights(*DomBI, TWeight, FWeight) &&
+ (TWeight + FWeight) != 0) {
+ BranchProbability BITrueProb =
+ BranchProbability::getBranchProbability(TWeight, TWeight + FWeight);
+ BranchProbability Likely = TTI.getPredictableBranchThreshold();
+ BranchProbability BIFalseProb = BITrueProb.getCompl();
+ if (IfBlocks.size() == 1) {
+ BranchProbability BIBBProb =
+ DomBI->getSuccessor(0) == BB ? BITrueProb : BIFalseProb;
+ if (BIBBProb >= Likely)
+ return false;
+ } else {
+ if (BITrueProb >= Likely || BIFalseProb >= Likely)
+ return false;
+ }
+ }
+ }
+
+ // Don't try to fold an unreachable block. For example, the phi node itself
+ // can't be the candidate if-condition for a select that we want to form.
+ if (auto *IfCondPhiInst = dyn_cast<PHINode>(IfCond))
+ if (IfCondPhiInst->getParent() == BB)
+ return false;
+
+ // Okay, we found that we can merge this two-entry phi node into a select.
+ // Doing so would require us to fold *all* two entry phi nodes in this block.
+ // At some point this becomes non-profitable (particularly if the target
+ // doesn't support cmov's). Only do this transformation if there are two or
+ // fewer PHI nodes in this block.
+ unsigned NumPhis = 0;
+ for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++NumPhis, ++I)
+ if (NumPhis > 2)
+ return false;
+
+ // Loop over the PHI's seeing if we can promote them all to select
+ // instructions. While we are at it, keep track of the instructions
+ // that need to be moved to the dominating block.
+ SmallPtrSet<Instruction *, 4> AggressiveInsts;
+ InstructionCost Cost = 0;
+ InstructionCost Budget =
+ TwoEntryPHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
+
+ bool Changed = false;
+ for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
+ PHINode *PN = cast<PHINode>(II++);
+ if (Value *V = simplifyInstruction(PN, {DL, PN})) {
+ PN->replaceAllUsesWith(V);
+ PN->eraseFromParent();
+ Changed = true;
+ continue;
+ }
+
+ if (!dominatesMergePoint(PN->getIncomingValue(0), BB, AggressiveInsts,
+ Cost, Budget, TTI) ||
+ !dominatesMergePoint(PN->getIncomingValue(1), BB, AggressiveInsts,
+ Cost, Budget, TTI))
+ return Changed;
+ }
+
+ // If we folded the first phi, PN dangles at this point. Refresh it. If
+ // we ran out of PHIs then we simplified them all.
+ PN = dyn_cast<PHINode>(BB->begin());
+ if (!PN)
+ return true;
+
+ // Return true if at least one of these is a 'not', and another is either
+ // a 'not' too, or a constant.
+ auto CanHoistNotFromBothValues = [](Value *V0, Value *V1) {
+ if (!match(V0, m_Not(m_Value())))
+ std::swap(V0, V1);
+ auto Invertible = m_CombineOr(m_Not(m_Value()), m_AnyIntegralConstant());
+ return match(V0, m_Not(m_Value())) && match(V1, Invertible);
+ };
+
+ // Don't fold i1 branches on PHIs which contain binary operators or
+ // (possibly inverted) select form of or/ands, unless one of
+ // the incoming values is an 'not' and another one is freely invertible.
+ // These can often be turned into switches and other things.
+ auto IsBinOpOrAnd = [](Value *V) {
+ return match(
+ V, m_CombineOr(
+ m_BinOp(),
+ m_CombineOr(m_Select(m_Value(), m_ImmConstant(), m_Value()),
+ m_Select(m_Value(), m_Value(), m_ImmConstant()))));
+ };
+ if (PN->getType()->isIntegerTy(1) &&
+ (IsBinOpOrAnd(PN->getIncomingValue(0)) ||
+ IsBinOpOrAnd(PN->getIncomingValue(1)) || IsBinOpOrAnd(IfCond)) &&
+ !CanHoistNotFromBothValues(PN->getIncomingValue(0),
+ PN->getIncomingValue(1)))
+ return Changed;
+
+ // If all PHI nodes are promotable, check to make sure that all instructions
+ // in the predecessor blocks can be promoted as well. If not, we won't be able
+ // to get rid of the control flow, so it's not worth promoting to select
+ // instructions.
+ for (BasicBlock *IfBlock : IfBlocks)
+ for (BasicBlock::iterator I = IfBlock->begin(); !I->isTerminator(); ++I)
+ if (!AggressiveInsts.count(&*I) && !I->isDebugOrPseudoInst()) {
+ // This is not an aggressive instruction that we can promote.
+ // Because of this, we won't be able to get rid of the control flow, so
+ // the xform is not worth it.
+ return Changed;
+ }
+
+ // If either of the blocks has it's address taken, we can't do this fold.
+ if (any_of(IfBlocks,
+ [](BasicBlock *IfBlock) { return IfBlock->hasAddressTaken(); }))
+ return Changed;
+
+ LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond
+ << " T: " << IfTrue->getName()
+ << " F: " << IfFalse->getName() << "\n");
+
+ // If we can still promote the PHI nodes after this gauntlet of tests,
+ // do all of the PHI's now.
+
+ // Move all 'aggressive' instructions, which are defined in the
+ // conditional parts of the if's up to the dominating block.
+ for (BasicBlock *IfBlock : IfBlocks)
+ hoistAllInstructionsInto(DomBlock, DomBI, IfBlock);
+
+ IRBuilder<NoFolder> Builder(DomBI);
+ // Propagate fast-math-flags from phi nodes to replacement selects.
+ IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
+ while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
+ if (isa<FPMathOperator>(PN))
+ Builder.setFastMathFlags(PN->getFastMathFlags());
+
+ // Change the PHI node into a select instruction.
+ Value *TrueVal = PN->getIncomingValueForBlock(IfTrue);
+ Value *FalseVal = PN->getIncomingValueForBlock(IfFalse);
+
+ Value *Sel = Builder.CreateSelect(IfCond, TrueVal, FalseVal, "", DomBI);
+ PN->replaceAllUsesWith(Sel);
+ Sel->takeName(PN);
+ PN->eraseFromParent();
+ }
+
+ // At this point, all IfBlocks are empty, so our if statement
+ // has been flattened. Change DomBlock to jump directly to our new block to
+ // avoid other simplifycfg's kicking in on the diamond.
+ Builder.CreateBr(BB);
+
+ SmallVector<DominatorTree::UpdateType, 3> Updates;
+ if (DTU) {
+ Updates.push_back({DominatorTree::Insert, DomBlock, BB});
+ for (auto *Successor : successors(DomBlock))
+ Updates.push_back({DominatorTree::Delete, DomBlock, Successor});
+ }
+
+ DomBI->eraseFromParent();
+ if (DTU)
+ DTU->applyUpdates(Updates);
+
+ return true;
+}
+
+static Value *createLogicalOp(IRBuilderBase &Builder,
+ Instruction::BinaryOps Opc, Value *LHS,
+ Value *RHS, const Twine &Name = "") {
+ // Try to relax logical op to binary op.
+ if (impliesPoison(RHS, LHS))
+ return Builder.CreateBinOp(Opc, LHS, RHS, Name);
+ if (Opc == Instruction::And)
+ return Builder.CreateLogicalAnd(LHS, RHS, Name);
+ if (Opc == Instruction::Or)
+ return Builder.CreateLogicalOr(LHS, RHS, Name);
+ llvm_unreachable("Invalid logical opcode");
+}
+
+/// Return true if either PBI or BI has branch weight available, and store
+/// the weights in {Pred|Succ}{True|False}Weight. If one of PBI and BI does
+/// not have branch weight, use 1:1 as its weight.
+static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI,
+ uint64_t &PredTrueWeight,
+ uint64_t &PredFalseWeight,
+ uint64_t &SuccTrueWeight,
+ uint64_t &SuccFalseWeight) {
+ bool PredHasWeights =
+ extractBranchWeights(*PBI, PredTrueWeight, PredFalseWeight);
+ bool SuccHasWeights =
+ extractBranchWeights(*BI, SuccTrueWeight, SuccFalseWeight);
+ if (PredHasWeights || SuccHasWeights) {
+ if (!PredHasWeights)
+ PredTrueWeight = PredFalseWeight = 1;
+ if (!SuccHasWeights)
+ SuccTrueWeight = SuccFalseWeight = 1;
+ return true;
+ } else {
+ return false;
+ }
+}
+
+/// Determine if the two branches share a common destination and deduce a glue
+/// that joins the branches' conditions to arrive at the common destination if
+/// that would be profitable.
+static std::optional<std::tuple<BasicBlock *, Instruction::BinaryOps, bool>>
+shouldFoldCondBranchesToCommonDestination(BranchInst *BI, BranchInst *PBI,
+ const TargetTransformInfo *TTI) {
+ assert(BI && PBI && BI->isConditional() && PBI->isConditional() &&
+ "Both blocks must end with a conditional branches.");
+ assert(is_contained(predecessors(BI->getParent()), PBI->getParent()) &&
+ "PredBB must be a predecessor of BB.");
+
+ // We have the potential to fold the conditions together, but if the
+ // predecessor branch is predictable, we may not want to merge them.
+ uint64_t PTWeight, PFWeight;
+ BranchProbability PBITrueProb, Likely;
+ if (TTI && !PBI->getMetadata(LLVMContext::MD_unpredictable) &&
+ extractBranchWeights(*PBI, PTWeight, PFWeight) &&
+ (PTWeight + PFWeight) != 0) {
+ PBITrueProb =
+ BranchProbability::getBranchProbability(PTWeight, PTWeight + PFWeight);
+ Likely = TTI->getPredictableBranchThreshold();
+ }
+
+ if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
+ // Speculate the 2nd condition unless the 1st is probably true.
+ if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
+ return {{BI->getSuccessor(0), Instruction::Or, false}};
+ } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
+ // Speculate the 2nd condition unless the 1st is probably false.
+ if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
+ return {{BI->getSuccessor(1), Instruction::And, false}};
+ } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
+ // Speculate the 2nd condition unless the 1st is probably true.
+ if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
+ return {{BI->getSuccessor(1), Instruction::And, true}};
+ } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
+ // Speculate the 2nd condition unless the 1st is probably false.
+ if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
+ return {{BI->getSuccessor(0), Instruction::Or, true}};
+ }
+ return std::nullopt;
+}
+
+static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI,
+ DomTreeUpdater *DTU,
+ MemorySSAUpdater *MSSAU,
+ const TargetTransformInfo *TTI) {
+ BasicBlock *BB = BI->getParent();
+ BasicBlock *PredBlock = PBI->getParent();
+
+ // Determine if the two branches share a common destination.
+ BasicBlock *CommonSucc;
+ Instruction::BinaryOps Opc;
+ bool InvertPredCond;
+ std::tie(CommonSucc, Opc, InvertPredCond) =
+ *shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI);
+
+ LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
+
+ IRBuilder<> Builder(PBI);
+ // The builder is used to create instructions to eliminate the branch in BB.
+ // If BB's terminator has !annotation metadata, add it to the new
+ // instructions.
+ Builder.CollectMetadataToCopy(BB->getTerminator(),
+ {LLVMContext::MD_annotation});
+
+ // If we need to invert the condition in the pred block to match, do so now.
+ if (InvertPredCond) {
+ Value *NewCond = PBI->getCondition();
+ if (NewCond->hasOneUse() && isa<CmpInst>(NewCond)) {
+ CmpInst *CI = cast<CmpInst>(NewCond);
+ CI->setPredicate(CI->getInversePredicate());
+ } else {
+ NewCond =
+ Builder.CreateNot(NewCond, PBI->getCondition()->getName() + ".not");
+ }
+
+ PBI->setCondition(NewCond);
+ PBI->swapSuccessors();
+ }
+
+ BasicBlock *UniqueSucc =
+ PBI->getSuccessor(0) == BB ? BI->getSuccessor(0) : BI->getSuccessor(1);
+
+ // Before cloning instructions, notify the successor basic block that it
+ // is about to have a new predecessor. This will update PHI nodes,
+ // which will allow us to update live-out uses of bonus instructions.
+ AddPredecessorToBlock(UniqueSucc, PredBlock, BB, MSSAU);
+
+ // Try to update branch weights.
+ uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
+ if (extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
+ SuccTrueWeight, SuccFalseWeight)) {
+ SmallVector<uint64_t, 8> NewWeights;
+
+ if (PBI->getSuccessor(0) == BB) {
+ // PBI: br i1 %x, BB, FalseDest
+ // BI: br i1 %y, UniqueSucc, FalseDest
+ // TrueWeight is TrueWeight for PBI * TrueWeight for BI.
+ NewWeights.push_back(PredTrueWeight * SuccTrueWeight);
+ // FalseWeight is FalseWeight for PBI * TotalWeight for BI +
+ // TrueWeight for PBI * FalseWeight for BI.
+ // We assume that total weights of a BranchInst can fit into 32 bits.
+ // Therefore, we will not have overflow using 64-bit arithmetic.
+ NewWeights.push_back(PredFalseWeight *
+ (SuccFalseWeight + SuccTrueWeight) +
+ PredTrueWeight * SuccFalseWeight);
+ } else {
+ // PBI: br i1 %x, TrueDest, BB
+ // BI: br i1 %y, TrueDest, UniqueSucc
+ // TrueWeight is TrueWeight for PBI * TotalWeight for BI +
+ // FalseWeight for PBI * TrueWeight for BI.
+ NewWeights.push_back(PredTrueWeight * (SuccFalseWeight + SuccTrueWeight) +
+ PredFalseWeight * SuccTrueWeight);
+ // FalseWeight is FalseWeight for PBI * FalseWeight for BI.
+ NewWeights.push_back(PredFalseWeight * SuccFalseWeight);
+ }
+
+ // Halve the weights if any of them cannot fit in an uint32_t
+ FitWeights(NewWeights);
+
+ SmallVector<uint32_t, 8> MDWeights(NewWeights.begin(), NewWeights.end());
+ setBranchWeights(PBI, MDWeights[0], MDWeights[1]);
+
+ // TODO: If BB is reachable from all paths through PredBlock, then we
+ // could replace PBI's branch probabilities with BI's.
+ } else
+ PBI->setMetadata(LLVMContext::MD_prof, nullptr);
+
+ // Now, update the CFG.
+ PBI->setSuccessor(PBI->getSuccessor(0) != BB, UniqueSucc);
+
+ if (DTU)
+ DTU->applyUpdates({{DominatorTree::Insert, PredBlock, UniqueSucc},
+ {DominatorTree::Delete, PredBlock, BB}});
+
+ // If BI was a loop latch, it may have had associated loop metadata.
+ // We need to copy it to the new latch, that is, PBI.
+ if (MDNode *LoopMD = BI->getMetadata(LLVMContext::MD_loop))
+ PBI->setMetadata(LLVMContext::MD_loop, LoopMD);
+
+ ValueToValueMapTy VMap; // maps original values to cloned values
+ CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(BB, PredBlock, VMap);
+
+ // Now that the Cond was cloned into the predecessor basic block,
+ // or/and the two conditions together.
+ Value *BICond = VMap[BI->getCondition()];
+ PBI->setCondition(
+ createLogicalOp(Builder, Opc, PBI->getCondition(), BICond, "or.cond"));
+
+ // Copy any debug value intrinsics into the end of PredBlock.
+ for (Instruction &I : *BB) {
+ if (isa<DbgInfoIntrinsic>(I)) {
+ Instruction *NewI = I.clone();
+ RemapInstruction(NewI, VMap,
+ RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
+ NewI->insertBefore(PBI);
+ }
+ }
+
+ ++NumFoldBranchToCommonDest;
+ return true;
+}
+
+/// Return if an instruction's type or any of its operands' types are a vector
+/// type.
+static bool isVectorOp(Instruction &I) {
+ return I.getType()->isVectorTy() || any_of(I.operands(), [](Use &U) {
+ return U->getType()->isVectorTy();
+ });
+}
+
+/// If this basic block is simple enough, and if a predecessor branches to us
+/// and one of our successors, fold the block into the predecessor and use
+/// logical operations to pick the right destination.
+bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
+ MemorySSAUpdater *MSSAU,
+ const TargetTransformInfo *TTI,
+ unsigned BonusInstThreshold) {
+ // If this block ends with an unconditional branch,
+ // let SpeculativelyExecuteBB() deal with it.
+ if (!BI->isConditional())
+ return false;
+
+ BasicBlock *BB = BI->getParent();
+ TargetTransformInfo::TargetCostKind CostKind =
+ BB->getParent()->hasMinSize() ? TargetTransformInfo::TCK_CodeSize
+ : TargetTransformInfo::TCK_SizeAndLatency;
+
+ Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
+
+ if (!Cond ||
+ (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond) &&
+ !isa<SelectInst>(Cond)) ||
+ Cond->getParent() != BB || !Cond->hasOneUse())
+ return false;
+
+ // Finally, don't infinitely unroll conditional loops.
+ if (is_contained(successors(BB), BB))
+ return false;
+
+ // With which predecessors will we want to deal with?
+ SmallVector<BasicBlock *, 8> Preds;
+ for (BasicBlock *PredBlock : predecessors(BB)) {
+ BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator());
+
+ // Check that we have two conditional branches. If there is a PHI node in
+ // the common successor, verify that the same value flows in from both
+ // blocks.
+ if (!PBI || PBI->isUnconditional() || !SafeToMergeTerminators(BI, PBI))
+ continue;
+
+ // Determine if the two branches share a common destination.
+ BasicBlock *CommonSucc;
+ Instruction::BinaryOps Opc;
+ bool InvertPredCond;
+ if (auto Recipe = shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI))
+ std::tie(CommonSucc, Opc, InvertPredCond) = *Recipe;
+ else
+ continue;
+
+ // Check the cost of inserting the necessary logic before performing the
+ // transformation.
+ if (TTI) {
+ Type *Ty = BI->getCondition()->getType();
+ InstructionCost Cost = TTI->getArithmeticInstrCost(Opc, Ty, CostKind);
+ if (InvertPredCond && (!PBI->getCondition()->hasOneUse() ||
+ !isa<CmpInst>(PBI->getCondition())))
+ Cost += TTI->getArithmeticInstrCost(Instruction::Xor, Ty, CostKind);
+
+ if (Cost > BranchFoldThreshold)
+ continue;
+ }
+
+ // Ok, we do want to deal with this predecessor. Record it.
+ Preds.emplace_back(PredBlock);
+ }
+
+ // If there aren't any predecessors into which we can fold,
+ // don't bother checking the cost.
+ if (Preds.empty())
+ return false;
+
+ // Only allow this transformation if computing the condition doesn't involve
+ // too many instructions and these involved instructions can be executed
+ // unconditionally. We denote all involved instructions except the condition
+ // as "bonus instructions", and only allow this transformation when the
+ // number of the bonus instructions we'll need to create when cloning into
+ // each predecessor does not exceed a certain threshold.
+ unsigned NumBonusInsts = 0;
+ bool SawVectorOp = false;
+ const unsigned PredCount = Preds.size();
+ for (Instruction &I : *BB) {
+ // Don't check the branch condition comparison itself.
+ if (&I == Cond)
+ continue;
+ // Ignore dbg intrinsics, and the terminator.
+ if (isa<DbgInfoIntrinsic>(I) || isa<BranchInst>(I))
+ continue;
+ // I must be safe to execute unconditionally.
+ if (!isSafeToSpeculativelyExecute(&I))
+ return false;
+ SawVectorOp |= isVectorOp(I);
+
+ // Account for the cost of duplicating this instruction into each
+ // predecessor. Ignore free instructions.
+ if (!TTI || TTI->getInstructionCost(&I, CostKind) !=
+ TargetTransformInfo::TCC_Free) {
+ NumBonusInsts += PredCount;
+
+ // Early exits once we reach the limit.
+ if (NumBonusInsts >
+ BonusInstThreshold * BranchFoldToCommonDestVectorMultiplier)
+ return false;
+ }
+
+ auto IsBCSSAUse = [BB, &I](Use &U) {
+ auto *UI = cast<Instruction>(U.getUser());
+ if (auto *PN = dyn_cast<PHINode>(UI))
+ return PN->getIncomingBlock(U) == BB;
+ return UI->getParent() == BB && I.comesBefore(UI);
+ };
+
+ // Does this instruction require rewriting of uses?
+ if (!all_of(I.uses(), IsBCSSAUse))
+ return false;
+ }
+ if (NumBonusInsts >
+ BonusInstThreshold *
+ (SawVectorOp ? BranchFoldToCommonDestVectorMultiplier : 1))
+ return false;
+
+ // Ok, we have the budget. Perform the transformation.
+ for (BasicBlock *PredBlock : Preds) {
+ auto *PBI = cast<BranchInst>(PredBlock->getTerminator());
+ return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, TTI);
+ }
+ return false;
+}
+
+// If there is only one store in BB1 and BB2, return it, otherwise return
+// nullptr.
+static StoreInst *findUniqueStoreInBlocks(BasicBlock *BB1, BasicBlock *BB2) {
+ StoreInst *S = nullptr;
+ for (auto *BB : {BB1, BB2}) {
+ if (!BB)
+ continue;
+ for (auto &I : *BB)
+ if (auto *SI = dyn_cast<StoreInst>(&I)) {
+ if (S)
+ // Multiple stores seen.
+ return nullptr;
+ else
+ S = SI;
+ }
+ }
+ return S;
+}
+
+static Value *ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB,
+ Value *AlternativeV = nullptr) {
+ // PHI is going to be a PHI node that allows the value V that is defined in
+ // BB to be referenced in BB's only successor.
+ //
+ // If AlternativeV is nullptr, the only value we care about in PHI is V. It
+ // doesn't matter to us what the other operand is (it'll never get used). We
+ // could just create a new PHI with an undef incoming value, but that could
+ // increase register pressure if EarlyCSE/InstCombine can't fold it with some
+ // other PHI. So here we directly look for some PHI in BB's successor with V
+ // as an incoming operand. If we find one, we use it, else we create a new
+ // one.
+ //
+ // If AlternativeV is not nullptr, we care about both incoming values in PHI.
+ // PHI must be exactly: phi <ty> [ %BB, %V ], [ %OtherBB, %AlternativeV]
+ // where OtherBB is the single other predecessor of BB's only successor.
+ PHINode *PHI = nullptr;
+ BasicBlock *Succ = BB->getSingleSuccessor();
+
+ for (auto I = Succ->begin(); isa<PHINode>(I); ++I)
+ if (cast<PHINode>(I)->getIncomingValueForBlock(BB) == V) {
+ PHI = cast<PHINode>(I);
+ if (!AlternativeV)
+ break;
+
+ assert(Succ->hasNPredecessors(2));
+ auto PredI = pred_begin(Succ);
+ BasicBlock *OtherPredBB = *PredI == BB ? *++PredI : *PredI;
+ if (PHI->getIncomingValueForBlock(OtherPredBB) == AlternativeV)
+ break;
+ PHI = nullptr;
+ }
+ if (PHI)
+ return PHI;
+
+ // If V is not an instruction defined in BB, just return it.
+ if (!AlternativeV &&
+ (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != BB))
+ return V;
+
+ PHI = PHINode::Create(V->getType(), 2, "simplifycfg.merge", &Succ->front());
+ PHI->addIncoming(V, BB);
+ for (BasicBlock *PredBB : predecessors(Succ))
+ if (PredBB != BB)
+ PHI->addIncoming(
+ AlternativeV ? AlternativeV : UndefValue::get(V->getType()), PredBB);
+ return PHI;
+}
+
+static bool mergeConditionalStoreToAddress(
+ BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB,
+ BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond,
+ DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) {
+ // For every pointer, there must be exactly two stores, one coming from
+ // PTB or PFB, and the other from QTB or QFB. We don't support more than one
+ // store (to any address) in PTB,PFB or QTB,QFB.
+ // FIXME: We could relax this restriction with a bit more work and performance
+ // testing.
+ StoreInst *PStore = findUniqueStoreInBlocks(PTB, PFB);
+ StoreInst *QStore = findUniqueStoreInBlocks(QTB, QFB);
+ if (!PStore || !QStore)
+ return false;
+
+ // Now check the stores are compatible.
+ if (!QStore->isUnordered() || !PStore->isUnordered() ||
+ PStore->getValueOperand()->getType() !=
+ QStore->getValueOperand()->getType())
+ return false;
+
+ // Check that sinking the store won't cause program behavior changes. Sinking
+ // the store out of the Q blocks won't change any behavior as we're sinking
+ // from a block to its unconditional successor. But we're moving a store from
+ // the P blocks down through the middle block (QBI) and past both QFB and QTB.
+ // So we need to check that there are no aliasing loads or stores in
+ // QBI, QTB and QFB. We also need to check there are no conflicting memory
+ // operations between PStore and the end of its parent block.
+ //
+ // The ideal way to do this is to query AliasAnalysis, but we don't
+ // preserve AA currently so that is dangerous. Be super safe and just
+ // check there are no other memory operations at all.
+ for (auto &I : *QFB->getSinglePredecessor())
+ if (I.mayReadOrWriteMemory())
+ return false;
+ for (auto &I : *QFB)
+ if (&I != QStore && I.mayReadOrWriteMemory())
+ return false;
+ if (QTB)
+ for (auto &I : *QTB)
+ if (&I != QStore && I.mayReadOrWriteMemory())
+ return false;
+ for (auto I = BasicBlock::iterator(PStore), E = PStore->getParent()->end();
+ I != E; ++I)
+ if (&*I != PStore && I->mayReadOrWriteMemory())
+ return false;
+
+ // If we're not in aggressive mode, we only optimize if we have some
+ // confidence that by optimizing we'll allow P and/or Q to be if-converted.
+ auto IsWorthwhile = [&](BasicBlock *BB, ArrayRef<StoreInst *> FreeStores) {
+ if (!BB)
+ return true;
+ // Heuristic: if the block can be if-converted/phi-folded and the
+ // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to
+ // thread this store.
+ InstructionCost Cost = 0;
+ InstructionCost Budget =
+ PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
+ for (auto &I : BB->instructionsWithoutDebug(false)) {
+ // Consider terminator instruction to be free.
+ if (I.isTerminator())
+ continue;
+ // If this is one the stores that we want to speculate out of this BB,
+ // then don't count it's cost, consider it to be free.
+ if (auto *S = dyn_cast<StoreInst>(&I))
+ if (llvm::find(FreeStores, S))
+ continue;
+ // Else, we have a white-list of instructions that we are ak speculating.
+ if (!isa<BinaryOperator>(I) && !isa<GetElementPtrInst>(I))
+ return false; // Not in white-list - not worthwhile folding.
+ // And finally, if this is a non-free instruction that we are okay
+ // speculating, ensure that we consider the speculation budget.
+ Cost +=
+ TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
+ if (Cost > Budget)
+ return false; // Eagerly refuse to fold as soon as we're out of budget.
+ }
+ assert(Cost <= Budget &&
+ "When we run out of budget we will eagerly return from within the "
+ "per-instruction loop.");
+ return true;
+ };
+
+ const std::array<StoreInst *, 2> FreeStores = {PStore, QStore};
+ if (!MergeCondStoresAggressively &&
+ (!IsWorthwhile(PTB, FreeStores) || !IsWorthwhile(PFB, FreeStores) ||
+ !IsWorthwhile(QTB, FreeStores) || !IsWorthwhile(QFB, FreeStores)))
+ return false;
+
+ // If PostBB has more than two predecessors, we need to split it so we can
+ // sink the store.
+ if (std::next(pred_begin(PostBB), 2) != pred_end(PostBB)) {
+ // We know that QFB's only successor is PostBB. And QFB has a single
+ // predecessor. If QTB exists, then its only successor is also PostBB.
+ // If QTB does not exist, then QFB's only predecessor has a conditional
+ // branch to QFB and PostBB.
+ BasicBlock *TruePred = QTB ? QTB : QFB->getSinglePredecessor();
+ BasicBlock *NewBB =
+ SplitBlockPredecessors(PostBB, {QFB, TruePred}, "condstore.split", DTU);
+ if (!NewBB)
+ return false;
+ PostBB = NewBB;
+ }
+
+ // OK, we're going to sink the stores to PostBB. The store has to be
+ // conditional though, so first create the predicate.
+ Value *PCond = cast<BranchInst>(PFB->getSinglePredecessor()->getTerminator())
+ ->getCondition();
+ Value *QCond = cast<BranchInst>(QFB->getSinglePredecessor()->getTerminator())
+ ->getCondition();
+
+ Value *PPHI = ensureValueAvailableInSuccessor(PStore->getValueOperand(),
+ PStore->getParent());
+ Value *QPHI = ensureValueAvailableInSuccessor(QStore->getValueOperand(),
+ QStore->getParent(), PPHI);
+
+ IRBuilder<> QB(&*PostBB->getFirstInsertionPt());
+
+ Value *PPred = PStore->getParent() == PTB ? PCond : QB.CreateNot(PCond);
+ Value *QPred = QStore->getParent() == QTB ? QCond : QB.CreateNot(QCond);
+
+ if (InvertPCond)
+ PPred = QB.CreateNot(PPred);
+ if (InvertQCond)
+ QPred = QB.CreateNot(QPred);
+ Value *CombinedPred = QB.CreateOr(PPred, QPred);
+
+ auto *T = SplitBlockAndInsertIfThen(CombinedPred, &*QB.GetInsertPoint(),
+ /*Unreachable=*/false,
+ /*BranchWeights=*/nullptr, DTU);
+ QB.SetInsertPoint(T);
+ StoreInst *SI = cast<StoreInst>(QB.CreateStore(QPHI, Address));
+ SI->setAAMetadata(PStore->getAAMetadata().merge(QStore->getAAMetadata()));
+ // Choose the minimum alignment. If we could prove both stores execute, we
+ // could use biggest one. In this case, though, we only know that one of the
+ // stores executes. And we don't know it's safe to take the alignment from a
+ // store that doesn't execute.
+ SI->setAlignment(std::min(PStore->getAlign(), QStore->getAlign()));
+
+ QStore->eraseFromParent();
+ PStore->eraseFromParent();
+
+ return true;
+}
+
+static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI,
+ DomTreeUpdater *DTU, const DataLayout &DL,
+ const TargetTransformInfo &TTI) {
+ // The intention here is to find diamonds or triangles (see below) where each
+ // conditional block contains a store to the same address. Both of these
+ // stores are conditional, so they can't be unconditionally sunk. But it may
+ // be profitable to speculatively sink the stores into one merged store at the
+ // end, and predicate the merged store on the union of the two conditions of
+ // PBI and QBI.
+ //
+ // This can reduce the number of stores executed if both of the conditions are
+ // true, and can allow the blocks to become small enough to be if-converted.
+ // This optimization will also chain, so that ladders of test-and-set
+ // sequences can be if-converted away.
+ //
+ // We only deal with simple diamonds or triangles:
+ //
+ // PBI or PBI or a combination of the two
+ // / \ | \
+ // PTB PFB | PFB
+ // \ / | /
+ // QBI QBI
+ // / \ | \
+ // QTB QFB | QFB
+ // \ / | /
+ // PostBB PostBB
+ //
+ // We model triangles as a type of diamond with a nullptr "true" block.
+ // Triangles are canonicalized so that the fallthrough edge is represented by
+ // a true condition, as in the diagram above.
+ BasicBlock *PTB = PBI->getSuccessor(0);
+ BasicBlock *PFB = PBI->getSuccessor(1);
+ BasicBlock *QTB = QBI->getSuccessor(0);
+ BasicBlock *QFB = QBI->getSuccessor(1);
+ BasicBlock *PostBB = QFB->getSingleSuccessor();
+
+ // Make sure we have a good guess for PostBB. If QTB's only successor is
+ // QFB, then QFB is a better PostBB.
+ if (QTB->getSingleSuccessor() == QFB)
+ PostBB = QFB;
+
+ // If we couldn't find a good PostBB, stop.
+ if (!PostBB)
+ return false;
+
+ bool InvertPCond = false, InvertQCond = false;
+ // Canonicalize fallthroughs to the true branches.
+ if (PFB == QBI->getParent()) {
+ std::swap(PFB, PTB);
+ InvertPCond = true;
+ }
+ if (QFB == PostBB) {
+ std::swap(QFB, QTB);
+ InvertQCond = true;
+ }
+
+ // From this point on we can assume PTB or QTB may be fallthroughs but PFB
+ // and QFB may not. Model fallthroughs as a nullptr block.
+ if (PTB == QBI->getParent())
+ PTB = nullptr;
+ if (QTB == PostBB)
+ QTB = nullptr;
+
+ // Legality bailouts. We must have at least the non-fallthrough blocks and
+ // the post-dominating block, and the non-fallthroughs must only have one
+ // predecessor.
+ auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) {
+ return BB->getSinglePredecessor() == P && BB->getSingleSuccessor() == S;
+ };
+ if (!HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) ||
+ !HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB))
+ return false;
+ if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) ||
+ (QTB && !HasOnePredAndOneSucc(QTB, QBI->getParent(), PostBB)))
+ return false;
+ if (!QBI->getParent()->hasNUses(2))
+ return false;
+
+ // OK, this is a sequence of two diamonds or triangles.
+ // Check if there are stores in PTB or PFB that are repeated in QTB or QFB.
+ SmallPtrSet<Value *, 4> PStoreAddresses, QStoreAddresses;
+ for (auto *BB : {PTB, PFB}) {
+ if (!BB)
+ continue;
+ for (auto &I : *BB)
+ if (StoreInst *SI = dyn_cast<StoreInst>(&I))
+ PStoreAddresses.insert(SI->getPointerOperand());
+ }
+ for (auto *BB : {QTB, QFB}) {
+ if (!BB)
+ continue;
+ for (auto &I : *BB)
+ if (StoreInst *SI = dyn_cast<StoreInst>(&I))
+ QStoreAddresses.insert(SI->getPointerOperand());
+ }
+
+ set_intersect(PStoreAddresses, QStoreAddresses);
+ // set_intersect mutates PStoreAddresses in place. Rename it here to make it
+ // clear what it contains.
+ auto &CommonAddresses = PStoreAddresses;
+
+ bool Changed = false;
+ for (auto *Address : CommonAddresses)
+ Changed |=
+ mergeConditionalStoreToAddress(PTB, PFB, QTB, QFB, PostBB, Address,
+ InvertPCond, InvertQCond, DTU, DL, TTI);
+ return Changed;
+}
+
+/// If the previous block ended with a widenable branch, determine if reusing
+/// the target block is profitable and legal. This will have the effect of
+/// "widening" PBI, but doesn't require us to reason about hosting safety.
+static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
+ DomTreeUpdater *DTU) {
+ // TODO: This can be generalized in two important ways:
+ // 1) We can allow phi nodes in IfFalseBB and simply reuse all the input
+ // values from the PBI edge.
+ // 2) We can sink side effecting instructions into BI's fallthrough
+ // successor provided they doesn't contribute to computation of
+ // BI's condition.
+ Value *CondWB, *WC;
+ BasicBlock *IfTrueBB, *IfFalseBB;
+ if (!parseWidenableBranch(PBI, CondWB, WC, IfTrueBB, IfFalseBB) ||
+ IfTrueBB != BI->getParent() || !BI->getParent()->getSinglePredecessor())
+ return false;
+ if (!IfFalseBB->phis().empty())
+ return false; // TODO
+ // This helps avoid infinite loop with SimplifyCondBranchToCondBranch which
+ // may undo the transform done here.
+ // TODO: There might be a more fine-grained solution to this.
+ if (!llvm::succ_empty(IfFalseBB))
+ return false;
+ // Use lambda to lazily compute expensive condition after cheap ones.
+ auto NoSideEffects = [](BasicBlock &BB) {
+ return llvm::none_of(BB, [](const Instruction &I) {
+ return I.mayWriteToMemory() || I.mayHaveSideEffects();
+ });
+ };
+ if (BI->getSuccessor(1) != IfFalseBB && // no inf looping
+ BI->getSuccessor(1)->getTerminatingDeoptimizeCall() && // profitability
+ NoSideEffects(*BI->getParent())) {
+ auto *OldSuccessor = BI->getSuccessor(1);
+ OldSuccessor->removePredecessor(BI->getParent());
+ BI->setSuccessor(1, IfFalseBB);
+ if (DTU)
+ DTU->applyUpdates(
+ {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
+ {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
+ return true;
+ }
+ if (BI->getSuccessor(0) != IfFalseBB && // no inf looping
+ BI->getSuccessor(0)->getTerminatingDeoptimizeCall() && // profitability
+ NoSideEffects(*BI->getParent())) {
+ auto *OldSuccessor = BI->getSuccessor(0);
+ OldSuccessor->removePredecessor(BI->getParent());
+ BI->setSuccessor(0, IfFalseBB);
+ if (DTU)
+ DTU->applyUpdates(
+ {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
+ {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
+ return true;
+ }
+ return false;
+}
+
+/// If we have a conditional branch as a predecessor of another block,
+/// this function tries to simplify it. We know
+/// that PBI and BI are both conditional branches, and BI is in one of the
+/// successor blocks of PBI - PBI branches to BI.
+static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
+ DomTreeUpdater *DTU,
+ const DataLayout &DL,
+ const TargetTransformInfo &TTI) {
+ assert(PBI->isConditional() && BI->isConditional());
+ BasicBlock *BB = BI->getParent();
+
+ // If this block ends with a branch instruction, and if there is a
+ // predecessor that ends on a branch of the same condition, make
+ // this conditional branch redundant.
+ if (PBI->getCondition() == BI->getCondition() &&
+ PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
+ // Okay, the outcome of this conditional branch is statically
+ // knowable. If this block had a single pred, handle specially, otherwise
+ // FoldCondBranchOnValueKnownInPredecessor() will handle it.
+ if (BB->getSinglePredecessor()) {
+ // Turn this into a branch on constant.
+ bool CondIsTrue = PBI->getSuccessor(0) == BB;
+ BI->setCondition(
+ ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue));
+ return true; // Nuke the branch on constant.
+ }
+ }
+
+ // If the previous block ended with a widenable branch, determine if reusing
+ // the target block is profitable and legal. This will have the effect of
+ // "widening" PBI, but doesn't require us to reason about hosting safety.
+ if (tryWidenCondBranchToCondBranch(PBI, BI, DTU))
+ return true;
+
+ // If both branches are conditional and both contain stores to the same
+ // address, remove the stores from the conditionals and create a conditional
+ // merged store at the end.
+ if (MergeCondStores && mergeConditionalStores(PBI, BI, DTU, DL, TTI))
+ return true;
+
+ // If this is a conditional branch in an empty block, and if any
+ // predecessors are a conditional branch to one of our destinations,
+ // fold the conditions into logical ops and one cond br.
+
+ // Ignore dbg intrinsics.
+ if (&*BB->instructionsWithoutDebug(false).begin() != BI)
+ return false;
+
+ int PBIOp, BIOp;
+ if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
+ PBIOp = 0;
+ BIOp = 0;
+ } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
+ PBIOp = 0;
+ BIOp = 1;
+ } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
+ PBIOp = 1;
+ BIOp = 0;
+ } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
+ PBIOp = 1;
+ BIOp = 1;
+ } else {
+ return false;
+ }
+
+ // Check to make sure that the other destination of this branch
+ // isn't BB itself. If so, this is an infinite loop that will
+ // keep getting unwound.
+ if (PBI->getSuccessor(PBIOp) == BB)
+ return false;
+
+ // Do not perform this transformation if it would require
+ // insertion of a large number of select instructions. For targets
+ // without predication/cmovs, this is a big pessimization.
+
+ BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
+ BasicBlock *RemovedDest = PBI->getSuccessor(PBIOp ^ 1);
+ unsigned NumPhis = 0;
+ for (BasicBlock::iterator II = CommonDest->begin(); isa<PHINode>(II);
+ ++II, ++NumPhis) {
+ if (NumPhis > 2) // Disable this xform.
+ return false;
+ }
+
+ // Finally, if everything is ok, fold the branches to logical ops.
+ BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1);
+
+ LLVM_DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
+ << "AND: " << *BI->getParent());
+
+ SmallVector<DominatorTree::UpdateType, 5> Updates;
+
+ // If OtherDest *is* BB, then BB is a basic block with a single conditional
+ // branch in it, where one edge (OtherDest) goes back to itself but the other
+ // exits. We don't *know* that the program avoids the infinite loop
+ // (even though that seems likely). If we do this xform naively, we'll end up
+ // recursively unpeeling the loop. Since we know that (after the xform is
+ // done) that the block *is* infinite if reached, we just make it an obviously
+ // infinite loop with no cond branch.
+ if (OtherDest == BB) {
+ // Insert it at the end of the function, because it's either code,
+ // or it won't matter if it's hot. :)
+ BasicBlock *InfLoopBlock =
+ BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
+ BranchInst::Create(InfLoopBlock, InfLoopBlock);
+ if (DTU)
+ Updates.push_back({DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
+ OtherDest = InfLoopBlock;
+ }
+
+ LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
+
+ // BI may have other predecessors. Because of this, we leave
+ // it alone, but modify PBI.
+
+ // Make sure we get to CommonDest on True&True directions.
+ Value *PBICond = PBI->getCondition();
+ IRBuilder<NoFolder> Builder(PBI);
+ if (PBIOp)
+ PBICond = Builder.CreateNot(PBICond, PBICond->getName() + ".not");
+
+ Value *BICond = BI->getCondition();
+ if (BIOp)
+ BICond = Builder.CreateNot(BICond, BICond->getName() + ".not");
+
+ // Merge the conditions.
+ Value *Cond =
+ createLogicalOp(Builder, Instruction::Or, PBICond, BICond, "brmerge");
+
+ // Modify PBI to branch on the new condition to the new dests.
+ PBI->setCondition(Cond);
+ PBI->setSuccessor(0, CommonDest);
+ PBI->setSuccessor(1, OtherDest);
+
+ if (DTU) {
+ Updates.push_back({DominatorTree::Insert, PBI->getParent(), OtherDest});
+ Updates.push_back({DominatorTree::Delete, PBI->getParent(), RemovedDest});
+
+ DTU->applyUpdates(Updates);
+ }
+
+ // Update branch weight for PBI.
+ uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
+ uint64_t PredCommon, PredOther, SuccCommon, SuccOther;
+ bool HasWeights =
+ extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
+ SuccTrueWeight, SuccFalseWeight);
+ if (HasWeights) {
+ PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
+ PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
+ SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
+ SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
+ // The weight to CommonDest should be PredCommon * SuccTotal +
+ // PredOther * SuccCommon.
+ // The weight to OtherDest should be PredOther * SuccOther.
+ uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther) +
+ PredOther * SuccCommon,
+ PredOther * SuccOther};
+ // Halve the weights if any of them cannot fit in an uint32_t
+ FitWeights(NewWeights);
+
+ setBranchWeights(PBI, NewWeights[0], NewWeights[1]);
+ }
+
+ // OtherDest may have phi nodes. If so, add an entry from PBI's
+ // block that are identical to the entries for BI's block.
+ AddPredecessorToBlock(OtherDest, PBI->getParent(), BB);
+
+ // We know that the CommonDest already had an edge from PBI to
+ // it. If it has PHIs though, the PHIs may have different
+ // entries for BB and PBI's BB. If so, insert a select to make
+ // them agree.
+ for (PHINode &PN : CommonDest->phis()) {
+ Value *BIV = PN.getIncomingValueForBlock(BB);
+ unsigned PBBIdx = PN.getBasicBlockIndex(PBI->getParent());
+ Value *PBIV = PN.getIncomingValue(PBBIdx);
+ if (BIV != PBIV) {
+ // Insert a select in PBI to pick the right value.
+ SelectInst *NV = cast<SelectInst>(
+ Builder.CreateSelect(PBICond, PBIV, BIV, PBIV->getName() + ".mux"));
+ PN.setIncomingValue(PBBIdx, NV);
+ // Although the select has the same condition as PBI, the original branch
+ // weights for PBI do not apply to the new select because the select's
+ // 'logical' edges are incoming edges of the phi that is eliminated, not
+ // the outgoing edges of PBI.
+ if (HasWeights) {
+ uint64_t PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
+ uint64_t PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
+ uint64_t SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
+ uint64_t SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
+ // The weight to PredCommonDest should be PredCommon * SuccTotal.
+ // The weight to PredOtherDest should be PredOther * SuccCommon.
+ uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther),
+ PredOther * SuccCommon};
+
+ FitWeights(NewWeights);
+
+ setBranchWeights(NV, NewWeights[0], NewWeights[1]);
+ }
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "INTO: " << *PBI->getParent());
+ LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
+
+ // This basic block is probably dead. We know it has at least
+ // one fewer predecessor.
+ return true;
+}
+
+// Simplifies a terminator by replacing it with a branch to TrueBB if Cond is
+// true or to FalseBB if Cond is false.
+// Takes care of updating the successors and removing the old terminator.
+// Also makes sure not to introduce new successors by assuming that edges to
+// non-successor TrueBBs and FalseBBs aren't reachable.
+bool SimplifyCFGOpt::SimplifyTerminatorOnSelect(Instruction *OldTerm,
+ Value *Cond, BasicBlock *TrueBB,
+ BasicBlock *FalseBB,
+ uint32_t TrueWeight,
+ uint32_t FalseWeight) {
+ auto *BB = OldTerm->getParent();
+ // Remove any superfluous successor edges from the CFG.
+ // First, figure out which successors to preserve.
+ // If TrueBB and FalseBB are equal, only try to preserve one copy of that
+ // successor.
+ BasicBlock *KeepEdge1 = TrueBB;
+ BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr;
+
+ SmallSetVector<BasicBlock *, 2> RemovedSuccessors;
+
+ // Then remove the rest.
+ for (BasicBlock *Succ : successors(OldTerm)) {
+ // Make sure only to keep exactly one copy of each edge.
+ if (Succ == KeepEdge1)
+ KeepEdge1 = nullptr;
+ else if (Succ == KeepEdge2)
+ KeepEdge2 = nullptr;
+ else {
+ Succ->removePredecessor(BB,
+ /*KeepOneInputPHIs=*/true);
+
+ if (Succ != TrueBB && Succ != FalseBB)
+ RemovedSuccessors.insert(Succ);
+ }
+ }
+
+ IRBuilder<> Builder(OldTerm);
+ Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc());
+
+ // Insert an appropriate new terminator.
+ if (!KeepEdge1 && !KeepEdge2) {
+ if (TrueBB == FalseBB) {
+ // We were only looking for one successor, and it was present.
+ // Create an unconditional branch to it.
+ Builder.CreateBr(TrueBB);
+ } else {
+ // We found both of the successors we were looking for.
+ // Create a conditional branch sharing the condition of the select.
+ BranchInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB);
+ if (TrueWeight != FalseWeight)
+ setBranchWeights(NewBI, TrueWeight, FalseWeight);
+ }
+ } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) {
+ // Neither of the selected blocks were successors, so this
+ // terminator must be unreachable.
+ new UnreachableInst(OldTerm->getContext(), OldTerm);
+ } else {
+ // One of the selected values was a successor, but the other wasn't.
+ // Insert an unconditional branch to the one that was found;
+ // the edge to the one that wasn't must be unreachable.
+ if (!KeepEdge1) {
+ // Only TrueBB was found.
+ Builder.CreateBr(TrueBB);
+ } else {
+ // Only FalseBB was found.
+ Builder.CreateBr(FalseBB);
+ }
+ }
+
+ EraseTerminatorAndDCECond(OldTerm);
+
+ if (DTU) {
+ SmallVector<DominatorTree::UpdateType, 2> Updates;
+ Updates.reserve(RemovedSuccessors.size());
+ for (auto *RemovedSuccessor : RemovedSuccessors)
+ Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor});
+ DTU->applyUpdates(Updates);
+ }
+
+ return true;
+}
+
+// Replaces
+// (switch (select cond, X, Y)) on constant X, Y
+// with a branch - conditional if X and Y lead to distinct BBs,
+// unconditional otherwise.
+bool SimplifyCFGOpt::SimplifySwitchOnSelect(SwitchInst *SI,
+ SelectInst *Select) {
+ // Check for constant integer values in the select.
+ ConstantInt *TrueVal = dyn_cast<ConstantInt>(Select->getTrueValue());
+ ConstantInt *FalseVal = dyn_cast<ConstantInt>(Select->getFalseValue());
+ if (!TrueVal || !FalseVal)
+ return false;
+
+ // Find the relevant condition and destinations.
+ Value *Condition = Select->getCondition();
+ BasicBlock *TrueBB = SI->findCaseValue(TrueVal)->getCaseSuccessor();
+ BasicBlock *FalseBB = SI->findCaseValue(FalseVal)->getCaseSuccessor();
+
+ // Get weight for TrueBB and FalseBB.
+ uint32_t TrueWeight = 0, FalseWeight = 0;
+ SmallVector<uint64_t, 8> Weights;
+ bool HasWeights = hasBranchWeightMD(*SI);
+ if (HasWeights) {
+ GetBranchWeights(SI, Weights);
+ if (Weights.size() == 1 + SI->getNumCases()) {
+ TrueWeight =
+ (uint32_t)Weights[SI->findCaseValue(TrueVal)->getSuccessorIndex()];
+ FalseWeight =
+ (uint32_t)Weights[SI->findCaseValue(FalseVal)->getSuccessorIndex()];
+ }
+ }
+
+ // Perform the actual simplification.
+ return SimplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB, TrueWeight,
+ FalseWeight);
+}
+
+// Replaces
+// (indirectbr (select cond, blockaddress(@fn, BlockA),
+// blockaddress(@fn, BlockB)))
+// with
+// (br cond, BlockA, BlockB).
+bool SimplifyCFGOpt::SimplifyIndirectBrOnSelect(IndirectBrInst *IBI,
+ SelectInst *SI) {
+ // Check that both operands of the select are block addresses.
+ BlockAddress *TBA = dyn_cast<BlockAddress>(SI->getTrueValue());
+ BlockAddress *FBA = dyn_cast<BlockAddress>(SI->getFalseValue());
+ if (!TBA || !FBA)
+ return false;
+
+ // Extract the actual blocks.
+ BasicBlock *TrueBB = TBA->getBasicBlock();
+ BasicBlock *FalseBB = FBA->getBasicBlock();
+
+ // Perform the actual simplification.
+ return SimplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB, 0,
+ 0);
+}
+
+/// This is called when we find an icmp instruction
+/// (a seteq/setne with a constant) as the only instruction in a
+/// block that ends with an uncond branch. We are looking for a very specific
+/// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified. In
+/// this case, we merge the first two "or's of icmp" into a switch, but then the
+/// default value goes to an uncond block with a seteq in it, we get something
+/// like:
+///
+/// switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ]
+/// DEFAULT:
+/// %tmp = icmp eq i8 %A, 92
+/// br label %end
+/// end:
+/// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
+///
+/// We prefer to split the edge to 'end' so that there is a true/false entry to
+/// the PHI, merging the third icmp into the switch.
+bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
+ ICmpInst *ICI, IRBuilder<> &Builder) {
+ BasicBlock *BB = ICI->getParent();
+
+ // If the block has any PHIs in it or the icmp has multiple uses, it is too
+ // complex.
+ if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse())
+ return false;
+
+ Value *V = ICI->getOperand(0);
+ ConstantInt *Cst = cast<ConstantInt>(ICI->getOperand(1));
+
+ // The pattern we're looking for is where our only predecessor is a switch on
+ // 'V' and this block is the default case for the switch. In this case we can
+ // fold the compared value into the switch to simplify things.
+ BasicBlock *Pred = BB->getSinglePredecessor();
+ if (!Pred || !isa<SwitchInst>(Pred->getTerminator()))
+ return false;
+
+ SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator());
+ if (SI->getCondition() != V)
+ return false;
+
+ // If BB is reachable on a non-default case, then we simply know the value of
+ // V in this block. Substitute it and constant fold the icmp instruction
+ // away.
+ if (SI->getDefaultDest() != BB) {
+ ConstantInt *VVal = SI->findCaseDest(BB);
+ assert(VVal && "Should have a unique destination value");
+ ICI->setOperand(0, VVal);
+
+ if (Value *V = simplifyInstruction(ICI, {DL, ICI})) {
+ ICI->replaceAllUsesWith(V);
+ ICI->eraseFromParent();
+ }
+ // BB is now empty, so it is likely to simplify away.
+ return requestResimplify();
+ }
+
+ // Ok, the block is reachable from the default dest. If the constant we're
+ // comparing exists in one of the other edges, then we can constant fold ICI
+ // and zap it.
+ if (SI->findCaseValue(Cst) != SI->case_default()) {
+ Value *V;
+ if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
+ V = ConstantInt::getFalse(BB->getContext());
+ else
+ V = ConstantInt::getTrue(BB->getContext());
+
+ ICI->replaceAllUsesWith(V);
+ ICI->eraseFromParent();
+ // BB is now empty, so it is likely to simplify away.
+ return requestResimplify();
+ }
+
+ // The use of the icmp has to be in the 'end' block, by the only PHI node in
+ // the block.
+ BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0);
+ PHINode *PHIUse = dyn_cast<PHINode>(ICI->user_back());
+ if (PHIUse == nullptr || PHIUse != &SuccBlock->front() ||
+ isa<PHINode>(++BasicBlock::iterator(PHIUse)))
+ return false;
+
+ // If the icmp is a SETEQ, then the default dest gets false, the new edge gets
+ // true in the PHI.
+ Constant *DefaultCst = ConstantInt::getTrue(BB->getContext());
+ Constant *NewCst = ConstantInt::getFalse(BB->getContext());
+
+ if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
+ std::swap(DefaultCst, NewCst);
+
+ // Replace ICI (which is used by the PHI for the default value) with true or
+ // false depending on if it is EQ or NE.
+ ICI->replaceAllUsesWith(DefaultCst);
+ ICI->eraseFromParent();
+
+ SmallVector<DominatorTree::UpdateType, 2> Updates;
+
+ // Okay, the switch goes to this block on a default value. Add an edge from
+ // the switch to the merge point on the compared value.
+ BasicBlock *NewBB =
+ BasicBlock::Create(BB->getContext(), "switch.edge", BB->getParent(), BB);
+ {
+ SwitchInstProfUpdateWrapper SIW(*SI);
+ auto W0 = SIW.getSuccessorWeight(0);
+ SwitchInstProfUpdateWrapper::CaseWeightOpt NewW;
+ if (W0) {
+ NewW = ((uint64_t(*W0) + 1) >> 1);
+ SIW.setSuccessorWeight(0, *NewW);
+ }
+ SIW.addCase(Cst, NewBB, NewW);
+ if (DTU)
+ Updates.push_back({DominatorTree::Insert, Pred, NewBB});
+ }
+
+ // NewBB branches to the phi block, add the uncond branch and the phi entry.
+ Builder.SetInsertPoint(NewBB);
+ Builder.SetCurrentDebugLocation(SI->getDebugLoc());
+ Builder.CreateBr(SuccBlock);
+ PHIUse->addIncoming(NewCst, NewBB);
+ if (DTU) {
+ Updates.push_back({DominatorTree::Insert, NewBB, SuccBlock});
+ DTU->applyUpdates(Updates);
+ }
+ return true;
+}
+
+/// The specified branch is a conditional branch.
+/// Check to see if it is branching on an or/and chain of icmp instructions, and
+/// fold it into a switch instruction if so.
+bool SimplifyCFGOpt::SimplifyBranchOnICmpChain(BranchInst *BI,
+ IRBuilder<> &Builder,
+ const DataLayout &DL) {
+ Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
+ if (!Cond)
+ return false;
+
+ // Change br (X == 0 | X == 1), T, F into a switch instruction.
+ // If this is a bunch of seteq's or'd together, or if it's a bunch of
+ // 'setne's and'ed together, collect them.
+
+ // Try to gather values from a chain of and/or to be turned into a switch
+ ConstantComparesGatherer ConstantCompare(Cond, DL);
+ // Unpack the result
+ SmallVectorImpl<ConstantInt *> &Values = ConstantCompare.Vals;
+ Value *CompVal = ConstantCompare.CompValue;
+ unsigned UsedICmps = ConstantCompare.UsedICmps;
+ Value *ExtraCase = ConstantCompare.Extra;
+
+ // If we didn't have a multiply compared value, fail.
+ if (!CompVal)
+ return false;
+
+ // Avoid turning single icmps into a switch.
+ if (UsedICmps <= 1)
+ return false;
+
+ bool TrueWhenEqual = match(Cond, m_LogicalOr(m_Value(), m_Value()));
+
+ // There might be duplicate constants in the list, which the switch
+ // instruction can't handle, remove them now.
+ array_pod_sort(Values.begin(), Values.end(), ConstantIntSortPredicate);
+ Values.erase(std::unique(Values.begin(), Values.end()), Values.end());
+
+ // If Extra was used, we require at least two switch values to do the
+ // transformation. A switch with one value is just a conditional branch.
+ if (ExtraCase && Values.size() < 2)
+ return false;
+
+ // TODO: Preserve branch weight metadata, similarly to how
+ // FoldValueComparisonIntoPredecessors preserves it.
+
+ // Figure out which block is which destination.
+ BasicBlock *DefaultBB = BI->getSuccessor(1);
+ BasicBlock *EdgeBB = BI->getSuccessor(0);
+ if (!TrueWhenEqual)
+ std::swap(DefaultBB, EdgeBB);
+
+ BasicBlock *BB = BI->getParent();
+
+ LLVM_DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
+ << " cases into SWITCH. BB is:\n"
+ << *BB);
+
+ SmallVector<DominatorTree::UpdateType, 2> Updates;
+
+ // If there are any extra values that couldn't be folded into the switch
+ // then we evaluate them with an explicit branch first. Split the block
+ // right before the condbr to handle it.
+ if (ExtraCase) {
+ BasicBlock *NewBB = SplitBlock(BB, BI, DTU, /*LI=*/nullptr,
+ /*MSSAU=*/nullptr, "switch.early.test");
+
+ // Remove the uncond branch added to the old block.
+ Instruction *OldTI = BB->getTerminator();
+ Builder.SetInsertPoint(OldTI);
+
+ // There can be an unintended UB if extra values are Poison. Before the
+ // transformation, extra values may not be evaluated according to the
+ // condition, and it will not raise UB. But after transformation, we are
+ // evaluating extra values before checking the condition, and it will raise
+ // UB. It can be solved by adding freeze instruction to extra values.
+ AssumptionCache *AC = Options.AC;
+
+ if (!isGuaranteedNotToBeUndefOrPoison(ExtraCase, AC, BI, nullptr))
+ ExtraCase = Builder.CreateFreeze(ExtraCase);
+
+ if (TrueWhenEqual)
+ Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB);
+ else
+ Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB);
+
+ OldTI->eraseFromParent();
+
+ if (DTU)
+ Updates.push_back({DominatorTree::Insert, BB, EdgeBB});
+
+ // If there are PHI nodes in EdgeBB, then we need to add a new entry to them
+ // for the edge we just added.
+ AddPredecessorToBlock(EdgeBB, BB, NewBB);
+
+ LLVM_DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase
+ << "\nEXTRABB = " << *BB);
+ BB = NewBB;
+ }
+
+ Builder.SetInsertPoint(BI);
+ // Convert pointer to int before we switch.
+ if (CompVal->getType()->isPointerTy()) {
+ CompVal = Builder.CreatePtrToInt(
+ CompVal, DL.getIntPtrType(CompVal->getType()), "magicptr");
+ }
+
+ // Create the new switch instruction now.
+ SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size());
+
+ // Add all of the 'cases' to the switch instruction.
+ for (unsigned i = 0, e = Values.size(); i != e; ++i)
+ New->addCase(Values[i], EdgeBB);
+
+ // We added edges from PI to the EdgeBB. As such, if there were any
+ // PHI nodes in EdgeBB, they need entries to be added corresponding to
+ // the number of edges added.
+ for (BasicBlock::iterator BBI = EdgeBB->begin(); isa<PHINode>(BBI); ++BBI) {
+ PHINode *PN = cast<PHINode>(BBI);
+ Value *InVal = PN->getIncomingValueForBlock(BB);
+ for (unsigned i = 0, e = Values.size() - 1; i != e; ++i)
+ PN->addIncoming(InVal, BB);
+ }
+
+ // Erase the old branch instruction.
+ EraseTerminatorAndDCECond(BI);
+ if (DTU)
+ DTU->applyUpdates(Updates);
+
+ LLVM_DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n');
+ return true;
+}
+
+bool SimplifyCFGOpt::simplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
+ if (isa<PHINode>(RI->getValue()))
+ return simplifyCommonResume(RI);
+ else if (isa<LandingPadInst>(RI->getParent()->getFirstNonPHI()) &&
+ RI->getValue() == RI->getParent()->getFirstNonPHI())
+ // The resume must unwind the exception that caused control to branch here.
+ return simplifySingleResume(RI);
+
+ return false;
+}
+
+// Check if cleanup block is empty
+static bool isCleanupBlockEmpty(iterator_range<BasicBlock::iterator> R) {
+ for (Instruction &I : R) {
+ auto *II = dyn_cast<IntrinsicInst>(&I);
+ if (!II)
+ return false;
+
+ Intrinsic::ID IntrinsicID = II->getIntrinsicID();
+ switch (IntrinsicID) {
+ case Intrinsic::dbg_declare:
+ case Intrinsic::dbg_value:
+ case Intrinsic::dbg_label:
+ case Intrinsic::lifetime_end:
+ break;
+ default:
+ return false;
+ }
+ }
+ return true;
+}
+
+// Simplify resume that is shared by several landing pads (phi of landing pad).
+bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) {
+ BasicBlock *BB = RI->getParent();
+
+ // Check that there are no other instructions except for debug and lifetime
+ // intrinsics between the phi's and resume instruction.
+ if (!isCleanupBlockEmpty(
+ make_range(RI->getParent()->getFirstNonPHI(), BB->getTerminator())))
+ return false;
+
+ SmallSetVector<BasicBlock *, 4> TrivialUnwindBlocks;
+ auto *PhiLPInst = cast<PHINode>(RI->getValue());
+
+ // Check incoming blocks to see if any of them are trivial.
+ for (unsigned Idx = 0, End = PhiLPInst->getNumIncomingValues(); Idx != End;
+ Idx++) {
+ auto *IncomingBB = PhiLPInst->getIncomingBlock(Idx);
+ auto *IncomingValue = PhiLPInst->getIncomingValue(Idx);
+
+ // If the block has other successors, we can not delete it because
+ // it has other dependents.
+ if (IncomingBB->getUniqueSuccessor() != BB)
+ continue;
+
+ auto *LandingPad = dyn_cast<LandingPadInst>(IncomingBB->getFirstNonPHI());
+ // Not the landing pad that caused the control to branch here.
+ if (IncomingValue != LandingPad)
+ continue;
+
+ if (isCleanupBlockEmpty(
+ make_range(LandingPad->getNextNode(), IncomingBB->getTerminator())))
+ TrivialUnwindBlocks.insert(IncomingBB);
+ }
+
+ // If no trivial unwind blocks, don't do any simplifications.
+ if (TrivialUnwindBlocks.empty())
+ return false;
+
+ // Turn all invokes that unwind here into calls.
+ for (auto *TrivialBB : TrivialUnwindBlocks) {
+ // Blocks that will be simplified should be removed from the phi node.
+ // Note there could be multiple edges to the resume block, and we need
+ // to remove them all.
+ while (PhiLPInst->getBasicBlockIndex(TrivialBB) != -1)
+ BB->removePredecessor(TrivialBB, true);
+
+ for (BasicBlock *Pred :
+ llvm::make_early_inc_range(predecessors(TrivialBB))) {
+ removeUnwindEdge(Pred, DTU);
+ ++NumInvokes;
+ }
+
+ // In each SimplifyCFG run, only the current processed block can be erased.
+ // Otherwise, it will break the iteration of SimplifyCFG pass. So instead
+ // of erasing TrivialBB, we only remove the branch to the common resume
+ // block so that we can later erase the resume block since it has no
+ // predecessors.
+ TrivialBB->getTerminator()->eraseFromParent();
+ new UnreachableInst(RI->getContext(), TrivialBB);
+ if (DTU)
+ DTU->applyUpdates({{DominatorTree::Delete, TrivialBB, BB}});
+ }
+
+ // Delete the resume block if all its predecessors have been removed.
+ if (pred_empty(BB))
+ DeleteDeadBlock(BB, DTU);
+
+ return !TrivialUnwindBlocks.empty();
+}
+
+// Simplify resume that is only used by a single (non-phi) landing pad.
+bool SimplifyCFGOpt::simplifySingleResume(ResumeInst *RI) {
+ BasicBlock *BB = RI->getParent();
+ auto *LPInst = cast<LandingPadInst>(BB->getFirstNonPHI());
+ assert(RI->getValue() == LPInst &&
+ "Resume must unwind the exception that caused control to here");
+
+ // Check that there are no other instructions except for debug intrinsics.
+ if (!isCleanupBlockEmpty(
+ make_range<Instruction *>(LPInst->getNextNode(), RI)))
+ return false;
+
+ // Turn all invokes that unwind here into calls and delete the basic block.
+ for (BasicBlock *Pred : llvm::make_early_inc_range(predecessors(BB))) {
+ removeUnwindEdge(Pred, DTU);
+ ++NumInvokes;
+ }
+
+ // The landingpad is now unreachable. Zap it.
+ DeleteDeadBlock(BB, DTU);
+ return true;
+}
+
+static bool removeEmptyCleanup(CleanupReturnInst *RI, DomTreeUpdater *DTU) {
+ // If this is a trivial cleanup pad that executes no instructions, it can be
+ // eliminated. If the cleanup pad continues to the caller, any predecessor
+ // that is an EH pad will be updated to continue to the caller and any
+ // predecessor that terminates with an invoke instruction will have its invoke
+ // instruction converted to a call instruction. If the cleanup pad being
+ // simplified does not continue to the caller, each predecessor will be
+ // updated to continue to the unwind destination of the cleanup pad being
+ // simplified.
+ BasicBlock *BB = RI->getParent();
+ CleanupPadInst *CPInst = RI->getCleanupPad();
+ if (CPInst->getParent() != BB)
+ // This isn't an empty cleanup.
+ return false;
+
+ // We cannot kill the pad if it has multiple uses. This typically arises
+ // from unreachable basic blocks.
+ if (!CPInst->hasOneUse())
+ return false;
+
+ // Check that there are no other instructions except for benign intrinsics.
+ if (!isCleanupBlockEmpty(
+ make_range<Instruction *>(CPInst->getNextNode(), RI)))
+ return false;
+
+ // If the cleanup return we are simplifying unwinds to the caller, this will
+ // set UnwindDest to nullptr.
+ BasicBlock *UnwindDest = RI->getUnwindDest();
+ Instruction *DestEHPad = UnwindDest ? UnwindDest->getFirstNonPHI() : nullptr;
+
+ // We're about to remove BB from the control flow. Before we do, sink any
+ // PHINodes into the unwind destination. Doing this before changing the
+ // control flow avoids some potentially slow checks, since we can currently
+ // be certain that UnwindDest and BB have no common predecessors (since they
+ // are both EH pads).
+ if (UnwindDest) {
+ // First, go through the PHI nodes in UnwindDest and update any nodes that
+ // reference the block we are removing
+ for (PHINode &DestPN : UnwindDest->phis()) {
+ int Idx = DestPN.getBasicBlockIndex(BB);
+ // Since BB unwinds to UnwindDest, it has to be in the PHI node.
+ assert(Idx != -1);
+ // This PHI node has an incoming value that corresponds to a control
+ // path through the cleanup pad we are removing. If the incoming
+ // value is in the cleanup pad, it must be a PHINode (because we
+ // verified above that the block is otherwise empty). Otherwise, the
+ // value is either a constant or a value that dominates the cleanup
+ // pad being removed.
+ //
+ // Because BB and UnwindDest are both EH pads, all of their
+ // predecessors must unwind to these blocks, and since no instruction
+ // can have multiple unwind destinations, there will be no overlap in
+ // incoming blocks between SrcPN and DestPN.
+ Value *SrcVal = DestPN.getIncomingValue(Idx);
+ PHINode *SrcPN = dyn_cast<PHINode>(SrcVal);
+
+ bool NeedPHITranslation = SrcPN && SrcPN->getParent() == BB;
+ for (auto *Pred : predecessors(BB)) {
+ Value *Incoming =
+ NeedPHITranslation ? SrcPN->getIncomingValueForBlock(Pred) : SrcVal;
+ DestPN.addIncoming(Incoming, Pred);
+ }
+ }
+
+ // Sink any remaining PHI nodes directly into UnwindDest.
+ Instruction *InsertPt = DestEHPad;
+ for (PHINode &PN : make_early_inc_range(BB->phis())) {
+ if (PN.use_empty() || !PN.isUsedOutsideOfBlock(BB))
+ // If the PHI node has no uses or all of its uses are in this basic
+ // block (meaning they are debug or lifetime intrinsics), just leave
+ // it. It will be erased when we erase BB below.
+ continue;
+
+ // Otherwise, sink this PHI node into UnwindDest.
+ // Any predecessors to UnwindDest which are not already represented
+ // must be back edges which inherit the value from the path through
+ // BB. In this case, the PHI value must reference itself.
+ for (auto *pred : predecessors(UnwindDest))
+ if (pred != BB)
+ PN.addIncoming(&PN, pred);
+ PN.moveBefore(InsertPt);
+ // Also, add a dummy incoming value for the original BB itself,
+ // so that the PHI is well-formed until we drop said predecessor.
+ PN.addIncoming(PoisonValue::get(PN.getType()), BB);
+ }
+ }
+
+ std::vector<DominatorTree::UpdateType> Updates;
+
+ // We use make_early_inc_range here because we will remove all predecessors.
+ for (BasicBlock *PredBB : llvm::make_early_inc_range(predecessors(BB))) {
+ if (UnwindDest == nullptr) {
+ if (DTU) {
+ DTU->applyUpdates(Updates);
+ Updates.clear();
+ }
+ removeUnwindEdge(PredBB, DTU);
+ ++NumInvokes;
+ } else {
+ BB->removePredecessor(PredBB);
+ Instruction *TI = PredBB->getTerminator();
+ TI->replaceUsesOfWith(BB, UnwindDest);
+ if (DTU) {
+ Updates.push_back({DominatorTree::Insert, PredBB, UnwindDest});
+ Updates.push_back({DominatorTree::Delete, PredBB, BB});
+ }
+ }
+ }
+
+ if (DTU)
+ DTU->applyUpdates(Updates);
+
+ DeleteDeadBlock(BB, DTU);
+
+ return true;
+}
+
+// Try to merge two cleanuppads together.
+static bool mergeCleanupPad(CleanupReturnInst *RI) {
+ // Skip any cleanuprets which unwind to caller, there is nothing to merge
+ // with.
+ BasicBlock *UnwindDest = RI->getUnwindDest();
+ if (!UnwindDest)
+ return false;
+
+ // This cleanupret isn't the only predecessor of this cleanuppad, it wouldn't
+ // be safe to merge without code duplication.
+ if (UnwindDest->getSinglePredecessor() != RI->getParent())
+ return false;
+
+ // Verify that our cleanuppad's unwind destination is another cleanuppad.
+ auto *SuccessorCleanupPad = dyn_cast<CleanupPadInst>(&UnwindDest->front());
+ if (!SuccessorCleanupPad)
+ return false;
+
+ CleanupPadInst *PredecessorCleanupPad = RI->getCleanupPad();
+ // Replace any uses of the successor cleanupad with the predecessor pad
+ // The only cleanuppad uses should be this cleanupret, it's cleanupret and
+ // funclet bundle operands.
+ SuccessorCleanupPad->replaceAllUsesWith(PredecessorCleanupPad);
+ // Remove the old cleanuppad.
+ SuccessorCleanupPad->eraseFromParent();
+ // Now, we simply replace the cleanupret with a branch to the unwind
+ // destination.
+ BranchInst::Create(UnwindDest, RI->getParent());
+ RI->eraseFromParent();
+
+ return true;
+}
+
+bool SimplifyCFGOpt::simplifyCleanupReturn(CleanupReturnInst *RI) {
+ // It is possible to transiantly have an undef cleanuppad operand because we
+ // have deleted some, but not all, dead blocks.
+ // Eventually, this block will be deleted.
+ if (isa<UndefValue>(RI->getOperand(0)))
+ return false;
+
+ if (mergeCleanupPad(RI))
+ return true;
+
+ if (removeEmptyCleanup(RI, DTU))
+ return true;
+
+ return false;
+}
+
+// WARNING: keep in sync with InstCombinerImpl::visitUnreachableInst()!
+bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
+ BasicBlock *BB = UI->getParent();
+
+ bool Changed = false;
+
+ // If there are any instructions immediately before the unreachable that can
+ // be removed, do so.
+ while (UI->getIterator() != BB->begin()) {
+ BasicBlock::iterator BBI = UI->getIterator();
+ --BBI;
+
+ if (!isGuaranteedToTransferExecutionToSuccessor(&*BBI))
+ break; // Can not drop any more instructions. We're done here.
+ // Otherwise, this instruction can be freely erased,
+ // even if it is not side-effect free.
+
+ // Note that deleting EH's here is in fact okay, although it involves a bit
+ // of subtle reasoning. If this inst is an EH, all the predecessors of this
+ // block will be the unwind edges of Invoke/CatchSwitch/CleanupReturn,
+ // and we can therefore guarantee this block will be erased.
+
+ // Delete this instruction (any uses are guaranteed to be dead)
+ BBI->replaceAllUsesWith(PoisonValue::get(BBI->getType()));
+ BBI->eraseFromParent();
+ Changed = true;
+ }
+
+ // If the unreachable instruction is the first in the block, take a gander
+ // at all of the predecessors of this instruction, and simplify them.
+ if (&BB->front() != UI)
+ return Changed;
+
+ std::vector<DominatorTree::UpdateType> Updates;
+
+ SmallSetVector<BasicBlock *, 8> Preds(pred_begin(BB), pred_end(BB));
+ for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
+ auto *Predecessor = Preds[i];
+ Instruction *TI = Predecessor->getTerminator();
+ IRBuilder<> Builder(TI);
+ if (auto *BI = dyn_cast<BranchInst>(TI)) {
+ // We could either have a proper unconditional branch,
+ // or a degenerate conditional branch with matching destinations.
+ if (all_of(BI->successors(),
+ [BB](auto *Successor) { return Successor == BB; })) {
+ new UnreachableInst(TI->getContext(), TI);
+ TI->eraseFromParent();
+ Changed = true;
+ } else {
+ assert(BI->isConditional() && "Can't get here with an uncond branch.");
+ Value* Cond = BI->getCondition();
+ assert(BI->getSuccessor(0) != BI->getSuccessor(1) &&
+ "The destinations are guaranteed to be different here.");
+ if (BI->getSuccessor(0) == BB) {
+ Builder.CreateAssumption(Builder.CreateNot(Cond));
+ Builder.CreateBr(BI->getSuccessor(1));
+ } else {
+ assert(BI->getSuccessor(1) == BB && "Incorrect CFG");
+ Builder.CreateAssumption(Cond);
+ Builder.CreateBr(BI->getSuccessor(0));
+ }
+ EraseTerminatorAndDCECond(BI);
+ Changed = true;
+ }
+ if (DTU)
+ Updates.push_back({DominatorTree::Delete, Predecessor, BB});
+ } else if (auto *SI = dyn_cast<SwitchInst>(TI)) {
+ SwitchInstProfUpdateWrapper SU(*SI);
+ for (auto i = SU->case_begin(), e = SU->case_end(); i != e;) {
+ if (i->getCaseSuccessor() != BB) {
+ ++i;
+ continue;
+ }
+ BB->removePredecessor(SU->getParent());
+ i = SU.removeCase(i);
+ e = SU->case_end();
+ Changed = true;
+ }
+ // Note that the default destination can't be removed!
+ if (DTU && SI->getDefaultDest() != BB)
+ Updates.push_back({DominatorTree::Delete, Predecessor, BB});
+ } else if (auto *II = dyn_cast<InvokeInst>(TI)) {
+ if (II->getUnwindDest() == BB) {
+ if (DTU) {
+ DTU->applyUpdates(Updates);
+ Updates.clear();
+ }
+ auto *CI = cast<CallInst>(removeUnwindEdge(TI->getParent(), DTU));
+ if (!CI->doesNotThrow())
+ CI->setDoesNotThrow();
+ Changed = true;
+ }
+ } else if (auto *CSI = dyn_cast<CatchSwitchInst>(TI)) {
+ if (CSI->getUnwindDest() == BB) {
+ if (DTU) {
+ DTU->applyUpdates(Updates);
+ Updates.clear();
+ }
+ removeUnwindEdge(TI->getParent(), DTU);
+ Changed = true;
+ continue;
+ }
+
+ for (CatchSwitchInst::handler_iterator I = CSI->handler_begin(),
+ E = CSI->handler_end();
+ I != E; ++I) {
+ if (*I == BB) {
+ CSI->removeHandler(I);
+ --I;
+ --E;
+ Changed = true;
+ }
+ }
+ if (DTU)
+ Updates.push_back({DominatorTree::Delete, Predecessor, BB});
+ if (CSI->getNumHandlers() == 0) {
+ if (CSI->hasUnwindDest()) {
+ // Redirect all predecessors of the block containing CatchSwitchInst
+ // to instead branch to the CatchSwitchInst's unwind destination.
+ if (DTU) {
+ for (auto *PredecessorOfPredecessor : predecessors(Predecessor)) {
+ Updates.push_back({DominatorTree::Insert,
+ PredecessorOfPredecessor,
+ CSI->getUnwindDest()});
+ Updates.push_back({DominatorTree::Delete,
+ PredecessorOfPredecessor, Predecessor});
+ }
+ }
+ Predecessor->replaceAllUsesWith(CSI->getUnwindDest());
+ } else {
+ // Rewrite all preds to unwind to caller (or from invoke to call).
+ if (DTU) {
+ DTU->applyUpdates(Updates);
+ Updates.clear();
+ }
+ SmallVector<BasicBlock *, 8> EHPreds(predecessors(Predecessor));
+ for (BasicBlock *EHPred : EHPreds)
+ removeUnwindEdge(EHPred, DTU);
+ }
+ // The catchswitch is no longer reachable.
+ new UnreachableInst(CSI->getContext(), CSI);
+ CSI->eraseFromParent();
+ Changed = true;
+ }
+ } else if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) {
+ (void)CRI;
+ assert(CRI->hasUnwindDest() && CRI->getUnwindDest() == BB &&
+ "Expected to always have an unwind to BB.");
+ if (DTU)
+ Updates.push_back({DominatorTree::Delete, Predecessor, BB});
+ new UnreachableInst(TI->getContext(), TI);
+ TI->eraseFromParent();
+ Changed = true;
+ }
+ }
+
+ if (DTU)
+ DTU->applyUpdates(Updates);
+
+ // If this block is now dead, remove it.
+ if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) {
+ DeleteDeadBlock(BB, DTU);
+ return true;
+ }
+
+ return Changed;
+}
+
+static bool CasesAreContiguous(SmallVectorImpl<ConstantInt *> &Cases) {
+ assert(Cases.size() >= 1);
+
+ array_pod_sort(Cases.begin(), Cases.end(), ConstantIntSortPredicate);
+ for (size_t I = 1, E = Cases.size(); I != E; ++I) {
+ if (Cases[I - 1]->getValue() != Cases[I]->getValue() + 1)
+ return false;
+ }
+ return true;
+}
+
+static void createUnreachableSwitchDefault(SwitchInst *Switch,
+ DomTreeUpdater *DTU) {
+ LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
+ auto *BB = Switch->getParent();
+ auto *OrigDefaultBlock = Switch->getDefaultDest();
+ OrigDefaultBlock->removePredecessor(BB);
+ BasicBlock *NewDefaultBlock = BasicBlock::Create(
+ BB->getContext(), BB->getName() + ".unreachabledefault", BB->getParent(),
+ OrigDefaultBlock);
+ new UnreachableInst(Switch->getContext(), NewDefaultBlock);
+ Switch->setDefaultDest(&*NewDefaultBlock);
+ if (DTU) {
+ SmallVector<DominatorTree::UpdateType, 2> Updates;
+ Updates.push_back({DominatorTree::Insert, BB, &*NewDefaultBlock});
+ if (!is_contained(successors(BB), OrigDefaultBlock))
+ Updates.push_back({DominatorTree::Delete, BB, &*OrigDefaultBlock});
+ DTU->applyUpdates(Updates);
+ }
+}
+
+/// Turn a switch into an integer range comparison and branch.
+/// Switches with more than 2 destinations are ignored.
+/// Switches with 1 destination are also ignored.
+bool SimplifyCFGOpt::TurnSwitchRangeIntoICmp(SwitchInst *SI,
+ IRBuilder<> &Builder) {
+ assert(SI->getNumCases() > 1 && "Degenerate switch?");
+
+ bool HasDefault =
+ !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
+
+ auto *BB = SI->getParent();
+
+ // Partition the cases into two sets with different destinations.
+ BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr;
+ BasicBlock *DestB = nullptr;
+ SmallVector<ConstantInt *, 16> CasesA;
+ SmallVector<ConstantInt *, 16> CasesB;
+
+ for (auto Case : SI->cases()) {
+ BasicBlock *Dest = Case.getCaseSuccessor();
+ if (!DestA)
+ DestA = Dest;
+ if (Dest == DestA) {
+ CasesA.push_back(Case.getCaseValue());
+ continue;
+ }
+ if (!DestB)
+ DestB = Dest;
+ if (Dest == DestB) {
+ CasesB.push_back(Case.getCaseValue());
+ continue;
+ }
+ return false; // More than two destinations.
+ }
+ if (!DestB)
+ return false; // All destinations are the same and the default is unreachable
+
+ assert(DestA && DestB &&
+ "Single-destination switch should have been folded.");
+ assert(DestA != DestB);
+ assert(DestB != SI->getDefaultDest());
+ assert(!CasesB.empty() && "There must be non-default cases.");
+ assert(!CasesA.empty() || HasDefault);
+
+ // Figure out if one of the sets of cases form a contiguous range.
+ SmallVectorImpl<ConstantInt *> *ContiguousCases = nullptr;
+ BasicBlock *ContiguousDest = nullptr;
+ BasicBlock *OtherDest = nullptr;
+ if (!CasesA.empty() && CasesAreContiguous(CasesA)) {
+ ContiguousCases = &CasesA;
+ ContiguousDest = DestA;
+ OtherDest = DestB;
+ } else if (CasesAreContiguous(CasesB)) {
+ ContiguousCases = &CasesB;
+ ContiguousDest = DestB;
+ OtherDest = DestA;
+ } else
+ return false;
+
+ // Start building the compare and branch.
+
+ Constant *Offset = ConstantExpr::getNeg(ContiguousCases->back());
+ Constant *NumCases =
+ ConstantInt::get(Offset->getType(), ContiguousCases->size());
+
+ Value *Sub = SI->getCondition();
+ if (!Offset->isNullValue())
+ Sub = Builder.CreateAdd(Sub, Offset, Sub->getName() + ".off");
+
+ Value *Cmp;
+ // If NumCases overflowed, then all possible values jump to the successor.
+ if (NumCases->isNullValue() && !ContiguousCases->empty())
+ Cmp = ConstantInt::getTrue(SI->getContext());
+ else
+ Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
+ BranchInst *NewBI = Builder.CreateCondBr(Cmp, ContiguousDest, OtherDest);
+
+ // Update weight for the newly-created conditional branch.
+ if (hasBranchWeightMD(*SI)) {
+ SmallVector<uint64_t, 8> Weights;
+ GetBranchWeights(SI, Weights);
+ if (Weights.size() == 1 + SI->getNumCases()) {
+ uint64_t TrueWeight = 0;
+ uint64_t FalseWeight = 0;
+ for (size_t I = 0, E = Weights.size(); I != E; ++I) {
+ if (SI->getSuccessor(I) == ContiguousDest)
+ TrueWeight += Weights[I];
+ else
+ FalseWeight += Weights[I];
+ }
+ while (TrueWeight > UINT32_MAX || FalseWeight > UINT32_MAX) {
+ TrueWeight /= 2;
+ FalseWeight /= 2;
+ }
+ setBranchWeights(NewBI, TrueWeight, FalseWeight);
+ }
+ }
+
+ // Prune obsolete incoming values off the successors' PHI nodes.
+ for (auto BBI = ContiguousDest->begin(); isa<PHINode>(BBI); ++BBI) {
+ unsigned PreviousEdges = ContiguousCases->size();
+ if (ContiguousDest == SI->getDefaultDest())
+ ++PreviousEdges;
+ for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
+ cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
+ }
+ for (auto BBI = OtherDest->begin(); isa<PHINode>(BBI); ++BBI) {
+ unsigned PreviousEdges = SI->getNumCases() - ContiguousCases->size();
+ if (OtherDest == SI->getDefaultDest())
+ ++PreviousEdges;
+ for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
+ cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
+ }
+
+ // Clean up the default block - it may have phis or other instructions before
+ // the unreachable terminator.
+ if (!HasDefault)
+ createUnreachableSwitchDefault(SI, DTU);
+
+ auto *UnreachableDefault = SI->getDefaultDest();
+
+ // Drop the switch.
+ SI->eraseFromParent();
+
+ if (!HasDefault && DTU)
+ DTU->applyUpdates({{DominatorTree::Delete, BB, UnreachableDefault}});
+
+ return true;
+}
+
+/// Compute masked bits for the condition of a switch
+/// and use it to remove dead cases.
+static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU,
+ AssumptionCache *AC,
+ const DataLayout &DL) {
+ Value *Cond = SI->getCondition();
+ KnownBits Known = computeKnownBits(Cond, DL, 0, AC, SI);
+
+ // We can also eliminate cases by determining that their values are outside of
+ // the limited range of the condition based on how many significant (non-sign)
+ // bits are in the condition value.
+ unsigned MaxSignificantBitsInCond =
+ ComputeMaxSignificantBits(Cond, DL, 0, AC, SI);
+
+ // Gather dead cases.
+ SmallVector<ConstantInt *, 8> DeadCases;
+ SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
+ SmallVector<BasicBlock *, 8> UniqueSuccessors;
+ for (const auto &Case : SI->cases()) {
+ auto *Successor = Case.getCaseSuccessor();
+ if (DTU) {
+ if (!NumPerSuccessorCases.count(Successor))
+ UniqueSuccessors.push_back(Successor);
+ ++NumPerSuccessorCases[Successor];
+ }
+ const APInt &CaseVal = Case.getCaseValue()->getValue();
+ if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) ||
+ (CaseVal.getMinSignedBits() > MaxSignificantBitsInCond)) {
+ DeadCases.push_back(Case.getCaseValue());
+ if (DTU)
+ --NumPerSuccessorCases[Successor];
+ LLVM_DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal
+ << " is dead.\n");
+ }
+ }
+
+ // If we can prove that the cases must cover all possible values, the
+ // default destination becomes dead and we can remove it. If we know some
+ // of the bits in the value, we can use that to more precisely compute the
+ // number of possible unique case values.
+ bool HasDefault =
+ !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
+ const unsigned NumUnknownBits =
+ Known.getBitWidth() - (Known.Zero | Known.One).countPopulation();
+ assert(NumUnknownBits <= Known.getBitWidth());
+ if (HasDefault && DeadCases.empty() &&
+ NumUnknownBits < 64 /* avoid overflow */ &&
+ SI->getNumCases() == (1ULL << NumUnknownBits)) {
+ createUnreachableSwitchDefault(SI, DTU);
+ return true;
+ }
+
+ if (DeadCases.empty())
+ return false;
+
+ SwitchInstProfUpdateWrapper SIW(*SI);
+ for (ConstantInt *DeadCase : DeadCases) {
+ SwitchInst::CaseIt CaseI = SI->findCaseValue(DeadCase);
+ assert(CaseI != SI->case_default() &&
+ "Case was not found. Probably mistake in DeadCases forming.");
+ // Prune unused values from PHI nodes.
+ CaseI->getCaseSuccessor()->removePredecessor(SI->getParent());
+ SIW.removeCase(CaseI);
+ }
+
+ if (DTU) {
+ std::vector<DominatorTree::UpdateType> Updates;
+ for (auto *Successor : UniqueSuccessors)
+ if (NumPerSuccessorCases[Successor] == 0)
+ Updates.push_back({DominatorTree::Delete, SI->getParent(), Successor});
+ DTU->applyUpdates(Updates);
+ }
+
+ return true;
+}
+
+/// If BB would be eligible for simplification by
+/// TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated
+/// by an unconditional branch), look at the phi node for BB in the successor
+/// block and see if the incoming value is equal to CaseValue. If so, return
+/// the phi node, and set PhiIndex to BB's index in the phi node.
+static PHINode *FindPHIForConditionForwarding(ConstantInt *CaseValue,
+ BasicBlock *BB, int *PhiIndex) {
+ if (BB->getFirstNonPHIOrDbg() != BB->getTerminator())
+ return nullptr; // BB must be empty to be a candidate for simplification.
+ if (!BB->getSinglePredecessor())
+ return nullptr; // BB must be dominated by the switch.
+
+ BranchInst *Branch = dyn_cast<BranchInst>(BB->getTerminator());
+ if (!Branch || !Branch->isUnconditional())
+ return nullptr; // Terminator must be unconditional branch.
+
+ BasicBlock *Succ = Branch->getSuccessor(0);
+
+ for (PHINode &PHI : Succ->phis()) {
+ int Idx = PHI.getBasicBlockIndex(BB);
+ assert(Idx >= 0 && "PHI has no entry for predecessor?");
+
+ Value *InValue = PHI.getIncomingValue(Idx);
+ if (InValue != CaseValue)
+ continue;
+
+ *PhiIndex = Idx;
+ return &PHI;
+ }
+
+ return nullptr;
+}
+
+/// Try to forward the condition of a switch instruction to a phi node
+/// dominated by the switch, if that would mean that some of the destination
+/// blocks of the switch can be folded away. Return true if a change is made.
+static bool ForwardSwitchConditionToPHI(SwitchInst *SI) {
+ using ForwardingNodesMap = DenseMap<PHINode *, SmallVector<int, 4>>;
+
+ ForwardingNodesMap ForwardingNodes;
+ BasicBlock *SwitchBlock = SI->getParent();
+ bool Changed = false;
+ for (const auto &Case : SI->cases()) {
+ ConstantInt *CaseValue = Case.getCaseValue();
+ BasicBlock *CaseDest = Case.getCaseSuccessor();
+
+ // Replace phi operands in successor blocks that are using the constant case
+ // value rather than the switch condition variable:
+ // switchbb:
+ // switch i32 %x, label %default [
+ // i32 17, label %succ
+ // ...
+ // succ:
+ // %r = phi i32 ... [ 17, %switchbb ] ...
+ // -->
+ // %r = phi i32 ... [ %x, %switchbb ] ...
+
+ for (PHINode &Phi : CaseDest->phis()) {
+ // This only works if there is exactly 1 incoming edge from the switch to
+ // a phi. If there is >1, that means multiple cases of the switch map to 1
+ // value in the phi, and that phi value is not the switch condition. Thus,
+ // this transform would not make sense (the phi would be invalid because
+ // a phi can't have different incoming values from the same block).
+ int SwitchBBIdx = Phi.getBasicBlockIndex(SwitchBlock);
+ if (Phi.getIncomingValue(SwitchBBIdx) == CaseValue &&
+ count(Phi.blocks(), SwitchBlock) == 1) {
+ Phi.setIncomingValue(SwitchBBIdx, SI->getCondition());
+ Changed = true;
+ }
+ }
+
+ // Collect phi nodes that are indirectly using this switch's case constants.
+ int PhiIdx;
+ if (auto *Phi = FindPHIForConditionForwarding(CaseValue, CaseDest, &PhiIdx))
+ ForwardingNodes[Phi].push_back(PhiIdx);
+ }
+
+ for (auto &ForwardingNode : ForwardingNodes) {
+ PHINode *Phi = ForwardingNode.first;
+ SmallVectorImpl<int> &Indexes = ForwardingNode.second;
+ if (Indexes.size() < 2)
+ continue;
+
+ for (int Index : Indexes)
+ Phi->setIncomingValue(Index, SI->getCondition());
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+/// Return true if the backend will be able to handle
+/// initializing an array of constants like C.
+static bool ValidLookupTableConstant(Constant *C, const TargetTransformInfo &TTI) {
+ if (C->isThreadDependent())
+ return false;
+ if (C->isDLLImportDependent())
+ return false;
+
+ if (!isa<ConstantFP>(C) && !isa<ConstantInt>(C) &&
+ !isa<ConstantPointerNull>(C) && !isa<GlobalValue>(C) &&
+ !isa<UndefValue>(C) && !isa<ConstantExpr>(C))
+ return false;
+
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+ // Pointer casts and in-bounds GEPs will not prohibit the backend from
+ // materializing the array of constants.
+ Constant *StrippedC = cast<Constant>(CE->stripInBoundsConstantOffsets());
+ if (StrippedC == C || !ValidLookupTableConstant(StrippedC, TTI))
+ return false;
+ }
+
+ if (!TTI.shouldBuildLookupTablesForConstant(C))
+ return false;
+
+ return true;
+}
+
+/// If V is a Constant, return it. Otherwise, try to look up
+/// its constant value in ConstantPool, returning 0 if it's not there.
+static Constant *
+LookupConstant(Value *V,
+ const SmallDenseMap<Value *, Constant *> &ConstantPool) {
+ if (Constant *C = dyn_cast<Constant>(V))
+ return C;
+ return ConstantPool.lookup(V);
+}
+
+/// Try to fold instruction I into a constant. This works for
+/// simple instructions such as binary operations where both operands are
+/// constant or can be replaced by constants from the ConstantPool. Returns the
+/// resulting constant on success, 0 otherwise.
+static Constant *
+ConstantFold(Instruction *I, const DataLayout &DL,
+ const SmallDenseMap<Value *, Constant *> &ConstantPool) {
+ if (SelectInst *Select = dyn_cast<SelectInst>(I)) {
+ Constant *A = LookupConstant(Select->getCondition(), ConstantPool);
+ if (!A)
+ return nullptr;
+ if (A->isAllOnesValue())
+ return LookupConstant(Select->getTrueValue(), ConstantPool);
+ if (A->isNullValue())
+ return LookupConstant(Select->getFalseValue(), ConstantPool);
+ return nullptr;
+ }
+
+ SmallVector<Constant *, 4> COps;
+ for (unsigned N = 0, E = I->getNumOperands(); N != E; ++N) {
+ if (Constant *A = LookupConstant(I->getOperand(N), ConstantPool))
+ COps.push_back(A);
+ else
+ return nullptr;
+ }
+
+ return ConstantFoldInstOperands(I, COps, DL);
+}
+
+/// Try to determine the resulting constant values in phi nodes
+/// at the common destination basic block, *CommonDest, for one of the case
+/// destionations CaseDest corresponding to value CaseVal (0 for the default
+/// case), of a switch instruction SI.
+static bool
+getCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest,
+ BasicBlock **CommonDest,
+ SmallVectorImpl<std::pair<PHINode *, Constant *>> &Res,
+ const DataLayout &DL, const TargetTransformInfo &TTI) {
+ // The block from which we enter the common destination.
+ BasicBlock *Pred = SI->getParent();
+
+ // If CaseDest is empty except for some side-effect free instructions through
+ // which we can constant-propagate the CaseVal, continue to its successor.
+ SmallDenseMap<Value *, Constant *> ConstantPool;
+ ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal));
+ for (Instruction &I : CaseDest->instructionsWithoutDebug(false)) {
+ if (I.isTerminator()) {
+ // If the terminator is a simple branch, continue to the next block.
+ if (I.getNumSuccessors() != 1 || I.isExceptionalTerminator())
+ return false;
+ Pred = CaseDest;
+ CaseDest = I.getSuccessor(0);
+ } else if (Constant *C = ConstantFold(&I, DL, ConstantPool)) {
+ // Instruction is side-effect free and constant.
+
+ // If the instruction has uses outside this block or a phi node slot for
+ // the block, it is not safe to bypass the instruction since it would then
+ // no longer dominate all its uses.
+ for (auto &Use : I.uses()) {
+ User *User = Use.getUser();
+ if (Instruction *I = dyn_cast<Instruction>(User))
+ if (I->getParent() == CaseDest)
+ continue;
+ if (PHINode *Phi = dyn_cast<PHINode>(User))
+ if (Phi->getIncomingBlock(Use) == CaseDest)
+ continue;
+ return false;
+ }
+
+ ConstantPool.insert(std::make_pair(&I, C));
+ } else {
+ break;
+ }
+ }
+
+ // If we did not have a CommonDest before, use the current one.
+ if (!*CommonDest)
+ *CommonDest = CaseDest;
+ // If the destination isn't the common one, abort.
+ if (CaseDest != *CommonDest)
+ return false;
+
+ // Get the values for this case from phi nodes in the destination block.
+ for (PHINode &PHI : (*CommonDest)->phis()) {
+ int Idx = PHI.getBasicBlockIndex(Pred);
+ if (Idx == -1)
+ continue;
+
+ Constant *ConstVal =
+ LookupConstant(PHI.getIncomingValue(Idx), ConstantPool);
+ if (!ConstVal)
+ return false;
+
+ // Be conservative about which kinds of constants we support.
+ if (!ValidLookupTableConstant(ConstVal, TTI))
+ return false;
+
+ Res.push_back(std::make_pair(&PHI, ConstVal));
+ }
+
+ return Res.size() > 0;
+}
+
+// Helper function used to add CaseVal to the list of cases that generate
+// Result. Returns the updated number of cases that generate this result.
+static size_t mapCaseToResult(ConstantInt *CaseVal,
+ SwitchCaseResultVectorTy &UniqueResults,
+ Constant *Result) {
+ for (auto &I : UniqueResults) {
+ if (I.first == Result) {
+ I.second.push_back(CaseVal);
+ return I.second.size();
+ }
+ }
+ UniqueResults.push_back(
+ std::make_pair(Result, SmallVector<ConstantInt *, 4>(1, CaseVal)));
+ return 1;
+}
+
+// Helper function that initializes a map containing
+// results for the PHI node of the common destination block for a switch
+// instruction. Returns false if multiple PHI nodes have been found or if
+// there is not a common destination block for the switch.
+static bool initializeUniqueCases(SwitchInst *SI, PHINode *&PHI,
+ BasicBlock *&CommonDest,
+ SwitchCaseResultVectorTy &UniqueResults,
+ Constant *&DefaultResult,
+ const DataLayout &DL,
+ const TargetTransformInfo &TTI,
+ uintptr_t MaxUniqueResults) {
+ for (const auto &I : SI->cases()) {
+ ConstantInt *CaseVal = I.getCaseValue();
+
+ // Resulting value at phi nodes for this case value.
+ SwitchCaseResultsTy Results;
+ if (!getCaseResults(SI, CaseVal, I.getCaseSuccessor(), &CommonDest, Results,
+ DL, TTI))
+ return false;
+
+ // Only one value per case is permitted.
+ if (Results.size() > 1)
+ return false;
+
+ // Add the case->result mapping to UniqueResults.
+ const size_t NumCasesForResult =
+ mapCaseToResult(CaseVal, UniqueResults, Results.begin()->second);
+
+ // Early out if there are too many cases for this result.
+ if (NumCasesForResult > MaxSwitchCasesPerResult)
+ return false;
+
+ // Early out if there are too many unique results.
+ if (UniqueResults.size() > MaxUniqueResults)
+ return false;
+
+ // Check the PHI consistency.
+ if (!PHI)
+ PHI = Results[0].first;
+ else if (PHI != Results[0].first)
+ return false;
+ }
+ // Find the default result value.
+ SmallVector<std::pair<PHINode *, Constant *>, 1> DefaultResults;
+ BasicBlock *DefaultDest = SI->getDefaultDest();
+ getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResults,
+ DL, TTI);
+ // If the default value is not found abort unless the default destination
+ // is unreachable.
+ DefaultResult =
+ DefaultResults.size() == 1 ? DefaultResults.begin()->second : nullptr;
+ if ((!DefaultResult &&
+ !isa<UnreachableInst>(DefaultDest->getFirstNonPHIOrDbg())))
+ return false;
+
+ return true;
+}
+
+// Helper function that checks if it is possible to transform a switch with only
+// two cases (or two cases + default) that produces a result into a select.
+// TODO: Handle switches with more than 2 cases that map to the same result.
+static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector,
+ Constant *DefaultResult, Value *Condition,
+ IRBuilder<> &Builder) {
+ // If we are selecting between only two cases transform into a simple
+ // select or a two-way select if default is possible.
+ // Example:
+ // switch (a) { %0 = icmp eq i32 %a, 10
+ // case 10: return 42; %1 = select i1 %0, i32 42, i32 4
+ // case 20: return 2; ----> %2 = icmp eq i32 %a, 20
+ // default: return 4; %3 = select i1 %2, i32 2, i32 %1
+ // }
+ if (ResultVector.size() == 2 && ResultVector[0].second.size() == 1 &&
+ ResultVector[1].second.size() == 1) {
+ ConstantInt *FirstCase = ResultVector[0].second[0];
+ ConstantInt *SecondCase = ResultVector[1].second[0];
+ Value *SelectValue = ResultVector[1].first;
+ if (DefaultResult) {
+ Value *ValueCompare =
+ Builder.CreateICmpEQ(Condition, SecondCase, "switch.selectcmp");
+ SelectValue = Builder.CreateSelect(ValueCompare, ResultVector[1].first,
+ DefaultResult, "switch.select");
+ }
+ Value *ValueCompare =
+ Builder.CreateICmpEQ(Condition, FirstCase, "switch.selectcmp");
+ return Builder.CreateSelect(ValueCompare, ResultVector[0].first,
+ SelectValue, "switch.select");
+ }
+
+ // Handle the degenerate case where two cases have the same result value.
+ if (ResultVector.size() == 1 && DefaultResult) {
+ ArrayRef<ConstantInt *> CaseValues = ResultVector[0].second;
+ unsigned CaseCount = CaseValues.size();
+ // n bits group cases map to the same result:
+ // case 0,4 -> Cond & 0b1..1011 == 0 ? result : default
+ // case 0,2,4,6 -> Cond & 0b1..1001 == 0 ? result : default
+ // case 0,2,8,10 -> Cond & 0b1..0101 == 0 ? result : default
+ if (isPowerOf2_32(CaseCount)) {
+ ConstantInt *MinCaseVal = CaseValues[0];
+ // Find mininal value.
+ for (auto *Case : CaseValues)
+ if (Case->getValue().slt(MinCaseVal->getValue()))
+ MinCaseVal = Case;
+
+ // Mark the bits case number touched.
+ APInt BitMask = APInt::getZero(MinCaseVal->getBitWidth());
+ for (auto *Case : CaseValues)
+ BitMask |= (Case->getValue() - MinCaseVal->getValue());
+
+ // Check if cases with the same result can cover all number
+ // in touched bits.
+ if (BitMask.countPopulation() == Log2_32(CaseCount)) {
+ if (!MinCaseVal->isNullValue())
+ Condition = Builder.CreateSub(Condition, MinCaseVal);
+ Value *And = Builder.CreateAnd(Condition, ~BitMask, "switch.and");
+ Value *Cmp = Builder.CreateICmpEQ(
+ And, Constant::getNullValue(And->getType()), "switch.selectcmp");
+ return Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
+ }
+ }
+
+ // Handle the degenerate case where two cases have the same value.
+ if (CaseValues.size() == 2) {
+ Value *Cmp1 = Builder.CreateICmpEQ(Condition, CaseValues[0],
+ "switch.selectcmp.case1");
+ Value *Cmp2 = Builder.CreateICmpEQ(Condition, CaseValues[1],
+ "switch.selectcmp.case2");
+ Value *Cmp = Builder.CreateOr(Cmp1, Cmp2, "switch.selectcmp");
+ return Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
+ }
+ }
+
+ return nullptr;
+}
+
+// Helper function to cleanup a switch instruction that has been converted into
+// a select, fixing up PHI nodes and basic blocks.
+static void removeSwitchAfterSelectFold(SwitchInst *SI, PHINode *PHI,
+ Value *SelectValue,
+ IRBuilder<> &Builder,
+ DomTreeUpdater *DTU) {
+ std::vector<DominatorTree::UpdateType> Updates;
+
+ BasicBlock *SelectBB = SI->getParent();
+ BasicBlock *DestBB = PHI->getParent();
+
+ if (DTU && !is_contained(predecessors(DestBB), SelectBB))
+ Updates.push_back({DominatorTree::Insert, SelectBB, DestBB});
+ Builder.CreateBr(DestBB);
+
+ // Remove the switch.
+
+ while (PHI->getBasicBlockIndex(SelectBB) >= 0)
+ PHI->removeIncomingValue(SelectBB);
+ PHI->addIncoming(SelectValue, SelectBB);
+
+ SmallPtrSet<BasicBlock *, 4> RemovedSuccessors;
+ for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
+ BasicBlock *Succ = SI->getSuccessor(i);
+
+ if (Succ == DestBB)
+ continue;
+ Succ->removePredecessor(SelectBB);
+ if (DTU && RemovedSuccessors.insert(Succ).second)
+ Updates.push_back({DominatorTree::Delete, SelectBB, Succ});
+ }
+ SI->eraseFromParent();
+ if (DTU)
+ DTU->applyUpdates(Updates);
+}
+
+/// If a switch is only used to initialize one or more phi nodes in a common
+/// successor block with only two different constant values, try to replace the
+/// switch with a select. Returns true if the fold was made.
+static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder,
+ DomTreeUpdater *DTU, const DataLayout &DL,
+ const TargetTransformInfo &TTI) {
+ Value *const Cond = SI->getCondition();
+ PHINode *PHI = nullptr;
+ BasicBlock *CommonDest = nullptr;
+ Constant *DefaultResult;
+ SwitchCaseResultVectorTy UniqueResults;
+ // Collect all the cases that will deliver the same value from the switch.
+ if (!initializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult,
+ DL, TTI, /*MaxUniqueResults*/ 2))
+ return false;
+
+ assert(PHI != nullptr && "PHI for value select not found");
+ Builder.SetInsertPoint(SI);
+ Value *SelectValue =
+ foldSwitchToSelect(UniqueResults, DefaultResult, Cond, Builder);
+ if (!SelectValue)
+ return false;
+
+ removeSwitchAfterSelectFold(SI, PHI, SelectValue, Builder, DTU);
+ return true;
+}
+
+namespace {
+
+/// This class represents a lookup table that can be used to replace a switch.
+class SwitchLookupTable {
+public:
+ /// Create a lookup table to use as a switch replacement with the contents
+ /// of Values, using DefaultValue to fill any holes in the table.
+ SwitchLookupTable(
+ Module &M, uint64_t TableSize, ConstantInt *Offset,
+ const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
+ Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName);
+
+ /// Build instructions with Builder to retrieve the value at
+ /// the position given by Index in the lookup table.
+ Value *BuildLookup(Value *Index, IRBuilder<> &Builder);
+
+ /// Return true if a table with TableSize elements of
+ /// type ElementType would fit in a target-legal register.
+ static bool WouldFitInRegister(const DataLayout &DL, uint64_t TableSize,
+ Type *ElementType);
+
+private:
+ // Depending on the contents of the table, it can be represented in
+ // different ways.
+ enum {
+ // For tables where each element contains the same value, we just have to
+ // store that single value and return it for each lookup.
+ SingleValueKind,
+
+ // For tables where there is a linear relationship between table index
+ // and values. We calculate the result with a simple multiplication
+ // and addition instead of a table lookup.
+ LinearMapKind,
+
+ // For small tables with integer elements, we can pack them into a bitmap
+ // that fits into a target-legal register. Values are retrieved by
+ // shift and mask operations.
+ BitMapKind,
+
+ // The table is stored as an array of values. Values are retrieved by load
+ // instructions from the table.
+ ArrayKind
+ } Kind;
+
+ // For SingleValueKind, this is the single value.
+ Constant *SingleValue = nullptr;
+
+ // For BitMapKind, this is the bitmap.
+ ConstantInt *BitMap = nullptr;
+ IntegerType *BitMapElementTy = nullptr;
+
+ // For LinearMapKind, these are the constants used to derive the value.
+ ConstantInt *LinearOffset = nullptr;
+ ConstantInt *LinearMultiplier = nullptr;
+
+ // For ArrayKind, this is the array.
+ GlobalVariable *Array = nullptr;
+};
+
+} // end anonymous namespace
+
+SwitchLookupTable::SwitchLookupTable(
+ Module &M, uint64_t TableSize, ConstantInt *Offset,
+ const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
+ Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName) {
+ assert(Values.size() && "Can't build lookup table without values!");
+ assert(TableSize >= Values.size() && "Can't fit values in table!");
+
+ // If all values in the table are equal, this is that value.
+ SingleValue = Values.begin()->second;
+
+ Type *ValueType = Values.begin()->second->getType();
+
+ // Build up the table contents.
+ SmallVector<Constant *, 64> TableContents(TableSize);
+ for (size_t I = 0, E = Values.size(); I != E; ++I) {
+ ConstantInt *CaseVal = Values[I].first;
+ Constant *CaseRes = Values[I].second;
+ assert(CaseRes->getType() == ValueType);
+
+ uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue();
+ TableContents[Idx] = CaseRes;
+
+ if (CaseRes != SingleValue)
+ SingleValue = nullptr;
+ }
+
+ // Fill in any holes in the table with the default result.
+ if (Values.size() < TableSize) {
+ assert(DefaultValue &&
+ "Need a default value to fill the lookup table holes.");
+ assert(DefaultValue->getType() == ValueType);
+ for (uint64_t I = 0; I < TableSize; ++I) {
+ if (!TableContents[I])
+ TableContents[I] = DefaultValue;
+ }
+
+ if (DefaultValue != SingleValue)
+ SingleValue = nullptr;
+ }
+
+ // If each element in the table contains the same value, we only need to store
+ // that single value.
+ if (SingleValue) {
+ Kind = SingleValueKind;
+ return;
+ }
+
+ // Check if we can derive the value with a linear transformation from the
+ // table index.
+ if (isa<IntegerType>(ValueType)) {
+ bool LinearMappingPossible = true;
+ APInt PrevVal;
+ APInt DistToPrev;
+ assert(TableSize >= 2 && "Should be a SingleValue table.");
+ // Check if there is the same distance between two consecutive values.
+ for (uint64_t I = 0; I < TableSize; ++I) {
+ ConstantInt *ConstVal = dyn_cast<ConstantInt>(TableContents[I]);
+ if (!ConstVal) {
+ // This is an undef. We could deal with it, but undefs in lookup tables
+ // are very seldom. It's probably not worth the additional complexity.
+ LinearMappingPossible = false;
+ break;
+ }
+ const APInt &Val = ConstVal->getValue();
+ if (I != 0) {
+ APInt Dist = Val - PrevVal;
+ if (I == 1) {
+ DistToPrev = Dist;
+ } else if (Dist != DistToPrev) {
+ LinearMappingPossible = false;
+ break;
+ }
+ }
+ PrevVal = Val;
+ }
+ if (LinearMappingPossible) {
+ LinearOffset = cast<ConstantInt>(TableContents[0]);
+ LinearMultiplier = ConstantInt::get(M.getContext(), DistToPrev);
+ Kind = LinearMapKind;
+ ++NumLinearMaps;
+ return;
+ }
+ }
+
+ // If the type is integer and the table fits in a register, build a bitmap.
+ if (WouldFitInRegister(DL, TableSize, ValueType)) {
+ IntegerType *IT = cast<IntegerType>(ValueType);
+ APInt TableInt(TableSize * IT->getBitWidth(), 0);
+ for (uint64_t I = TableSize; I > 0; --I) {
+ TableInt <<= IT->getBitWidth();
+ // Insert values into the bitmap. Undef values are set to zero.
+ if (!isa<UndefValue>(TableContents[I - 1])) {
+ ConstantInt *Val = cast<ConstantInt>(TableContents[I - 1]);
+ TableInt |= Val->getValue().zext(TableInt.getBitWidth());
+ }
+ }
+ BitMap = ConstantInt::get(M.getContext(), TableInt);
+ BitMapElementTy = IT;
+ Kind = BitMapKind;
+ ++NumBitMaps;
+ return;
+ }
+
+ // Store the table in an array.
+ ArrayType *ArrayTy = ArrayType::get(ValueType, TableSize);
+ Constant *Initializer = ConstantArray::get(ArrayTy, TableContents);
+
+ Array = new GlobalVariable(M, ArrayTy, /*isConstant=*/true,
+ GlobalVariable::PrivateLinkage, Initializer,
+ "switch.table." + FuncName);
+ Array->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+ // Set the alignment to that of an array items. We will be only loading one
+ // value out of it.
+ Array->setAlignment(DL.getPrefTypeAlign(ValueType));
+ Kind = ArrayKind;
+}
+
+Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) {
+ switch (Kind) {
+ case SingleValueKind:
+ return SingleValue;
+ case LinearMapKind: {
+ // Derive the result value from the input value.
+ Value *Result = Builder.CreateIntCast(Index, LinearMultiplier->getType(),
+ false, "switch.idx.cast");
+ if (!LinearMultiplier->isOne())
+ Result = Builder.CreateMul(Result, LinearMultiplier, "switch.idx.mult");
+ if (!LinearOffset->isZero())
+ Result = Builder.CreateAdd(Result, LinearOffset, "switch.offset");
+ return Result;
+ }
+ case BitMapKind: {
+ // Type of the bitmap (e.g. i59).
+ IntegerType *MapTy = BitMap->getType();
+
+ // Cast Index to the same type as the bitmap.
+ // Note: The Index is <= the number of elements in the table, so
+ // truncating it to the width of the bitmask is safe.
+ Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast");
+
+ // Multiply the shift amount by the element width.
+ ShiftAmt = Builder.CreateMul(
+ ShiftAmt, ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()),
+ "switch.shiftamt");
+
+ // Shift down.
+ Value *DownShifted =
+ Builder.CreateLShr(BitMap, ShiftAmt, "switch.downshift");
+ // Mask off.
+ return Builder.CreateTrunc(DownShifted, BitMapElementTy, "switch.masked");
+ }
+ case ArrayKind: {
+ // Make sure the table index will not overflow when treated as signed.
+ IntegerType *IT = cast<IntegerType>(Index->getType());
+ uint64_t TableSize =
+ Array->getInitializer()->getType()->getArrayNumElements();
+ if (TableSize > (1ULL << std::min(IT->getBitWidth() - 1, 63u)))
+ Index = Builder.CreateZExt(
+ Index, IntegerType::get(IT->getContext(), IT->getBitWidth() + 1),
+ "switch.tableidx.zext");
+
+ Value *GEPIndices[] = {Builder.getInt32(0), Index};
+ Value *GEP = Builder.CreateInBoundsGEP(Array->getValueType(), Array,
+ GEPIndices, "switch.gep");
+ return Builder.CreateLoad(
+ cast<ArrayType>(Array->getValueType())->getElementType(), GEP,
+ "switch.load");
+ }
+ }
+ llvm_unreachable("Unknown lookup table kind!");
+}
+
+bool SwitchLookupTable::WouldFitInRegister(const DataLayout &DL,
+ uint64_t TableSize,
+ Type *ElementType) {
+ auto *IT = dyn_cast<IntegerType>(ElementType);
+ if (!IT)
+ return false;
+ // FIXME: If the type is wider than it needs to be, e.g. i8 but all values
+ // are <= 15, we could try to narrow the type.
+
+ // Avoid overflow, fitsInLegalInteger uses unsigned int for the width.
+ if (TableSize >= UINT_MAX / IT->getBitWidth())
+ return false;
+ return DL.fitsInLegalInteger(TableSize * IT->getBitWidth());
+}
+
+static bool isTypeLegalForLookupTable(Type *Ty, const TargetTransformInfo &TTI,
+ const DataLayout &DL) {
+ // Allow any legal type.
+ if (TTI.isTypeLegal(Ty))
+ return true;
+
+ auto *IT = dyn_cast<IntegerType>(Ty);
+ if (!IT)
+ return false;
+
+ // Also allow power of 2 integer types that have at least 8 bits and fit in
+ // a register. These types are common in frontend languages and targets
+ // usually support loads of these types.
+ // TODO: We could relax this to any integer that fits in a register and rely
+ // on ABI alignment and padding in the table to allow the load to be widened.
+ // Or we could widen the constants and truncate the load.
+ unsigned BitWidth = IT->getBitWidth();
+ return BitWidth >= 8 && isPowerOf2_32(BitWidth) &&
+ DL.fitsInLegalInteger(IT->getBitWidth());
+}
+
+static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange) {
+ // 40% is the default density for building a jump table in optsize/minsize
+ // mode. See also TargetLoweringBase::isSuitableForJumpTable(), which this
+ // function was based on.
+ const uint64_t MinDensity = 40;
+
+ if (CaseRange >= UINT64_MAX / 100)
+ return false; // Avoid multiplication overflows below.
+
+ return NumCases * 100 >= CaseRange * MinDensity;
+}
+
+static bool isSwitchDense(ArrayRef<int64_t> Values) {
+ uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front();
+ uint64_t Range = Diff + 1;
+ if (Range < Diff)
+ return false; // Overflow.
+
+ return isSwitchDense(Values.size(), Range);
+}
+
+/// Determine whether a lookup table should be built for this switch, based on
+/// the number of cases, size of the table, and the types of the results.
+// TODO: We could support larger than legal types by limiting based on the
+// number of loads required and/or table size. If the constants are small we
+// could use smaller table entries and extend after the load.
+static bool
+ShouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize,
+ const TargetTransformInfo &TTI, const DataLayout &DL,
+ const SmallDenseMap<PHINode *, Type *> &ResultTypes) {
+ if (SI->getNumCases() > TableSize)
+ return false; // TableSize overflowed.
+
+ bool AllTablesFitInRegister = true;
+ bool HasIllegalType = false;
+ for (const auto &I : ResultTypes) {
+ Type *Ty = I.second;
+
+ // Saturate this flag to true.
+ HasIllegalType = HasIllegalType || !isTypeLegalForLookupTable(Ty, TTI, DL);
+
+ // Saturate this flag to false.
+ AllTablesFitInRegister =
+ AllTablesFitInRegister &&
+ SwitchLookupTable::WouldFitInRegister(DL, TableSize, Ty);
+
+ // If both flags saturate, we're done. NOTE: This *only* works with
+ // saturating flags, and all flags have to saturate first due to the
+ // non-deterministic behavior of iterating over a dense map.
+ if (HasIllegalType && !AllTablesFitInRegister)
+ break;
+ }
+
+ // If each table would fit in a register, we should build it anyway.
+ if (AllTablesFitInRegister)
+ return true;
+
+ // Don't build a table that doesn't fit in-register if it has illegal types.
+ if (HasIllegalType)
+ return false;
+
+ return isSwitchDense(SI->getNumCases(), TableSize);
+}
+
+static bool ShouldUseSwitchConditionAsTableIndex(
+ ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal,
+ bool HasDefaultResults, const SmallDenseMap<PHINode *, Type *> &ResultTypes,
+ const DataLayout &DL, const TargetTransformInfo &TTI) {
+ if (MinCaseVal.isNullValue())
+ return true;
+ if (MinCaseVal.isNegative() ||
+ MaxCaseVal.getLimitedValue() == std::numeric_limits<uint64_t>::max() ||
+ !HasDefaultResults)
+ return false;
+ return all_of(ResultTypes, [&](const auto &KV) {
+ return SwitchLookupTable::WouldFitInRegister(
+ DL, MaxCaseVal.getLimitedValue() + 1 /* TableSize */,
+ KV.second /* ResultType */);
+ });
+}
+
+/// Try to reuse the switch table index compare. Following pattern:
+/// \code
+/// if (idx < tablesize)
+/// r = table[idx]; // table does not contain default_value
+/// else
+/// r = default_value;
+/// if (r != default_value)
+/// ...
+/// \endcode
+/// Is optimized to:
+/// \code
+/// cond = idx < tablesize;
+/// if (cond)
+/// r = table[idx];
+/// else
+/// r = default_value;
+/// if (cond)
+/// ...
+/// \endcode
+/// Jump threading will then eliminate the second if(cond).
+static void reuseTableCompare(
+ User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch,
+ Constant *DefaultValue,
+ const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values) {
+ ICmpInst *CmpInst = dyn_cast<ICmpInst>(PhiUser);
+ if (!CmpInst)
+ return;
+
+ // We require that the compare is in the same block as the phi so that jump
+ // threading can do its work afterwards.
+ if (CmpInst->getParent() != PhiBlock)
+ return;
+
+ Constant *CmpOp1 = dyn_cast<Constant>(CmpInst->getOperand(1));
+ if (!CmpOp1)
+ return;
+
+ Value *RangeCmp = RangeCheckBranch->getCondition();
+ Constant *TrueConst = ConstantInt::getTrue(RangeCmp->getType());
+ Constant *FalseConst = ConstantInt::getFalse(RangeCmp->getType());
+
+ // Check if the compare with the default value is constant true or false.
+ Constant *DefaultConst = ConstantExpr::getICmp(CmpInst->getPredicate(),
+ DefaultValue, CmpOp1, true);
+ if (DefaultConst != TrueConst && DefaultConst != FalseConst)
+ return;
+
+ // Check if the compare with the case values is distinct from the default
+ // compare result.
+ for (auto ValuePair : Values) {
+ Constant *CaseConst = ConstantExpr::getICmp(CmpInst->getPredicate(),
+ ValuePair.second, CmpOp1, true);
+ if (!CaseConst || CaseConst == DefaultConst ||
+ (CaseConst != TrueConst && CaseConst != FalseConst))
+ return;
+ }
+
+ // Check if the branch instruction dominates the phi node. It's a simple
+ // dominance check, but sufficient for our needs.
+ // Although this check is invariant in the calling loops, it's better to do it
+ // at this late stage. Practically we do it at most once for a switch.
+ BasicBlock *BranchBlock = RangeCheckBranch->getParent();
+ for (BasicBlock *Pred : predecessors(PhiBlock)) {
+ if (Pred != BranchBlock && Pred->getUniquePredecessor() != BranchBlock)
+ return;
+ }
+
+ if (DefaultConst == FalseConst) {
+ // The compare yields the same result. We can replace it.
+ CmpInst->replaceAllUsesWith(RangeCmp);
+ ++NumTableCmpReuses;
+ } else {
+ // The compare yields the same result, just inverted. We can replace it.
+ Value *InvertedTableCmp = BinaryOperator::CreateXor(
+ RangeCmp, ConstantInt::get(RangeCmp->getType(), 1), "inverted.cmp",
+ RangeCheckBranch);
+ CmpInst->replaceAllUsesWith(InvertedTableCmp);
+ ++NumTableCmpReuses;
+ }
+}
+
+/// If the switch is only used to initialize one or more phi nodes in a common
+/// successor block with different constant values, replace the switch with
+/// lookup tables.
+static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
+ DomTreeUpdater *DTU, const DataLayout &DL,
+ const TargetTransformInfo &TTI) {
+ assert(SI->getNumCases() > 1 && "Degenerate switch?");
+
+ BasicBlock *BB = SI->getParent();
+ Function *Fn = BB->getParent();
+ // Only build lookup table when we have a target that supports it or the
+ // attribute is not set.
+ if (!TTI.shouldBuildLookupTables() ||
+ (Fn->getFnAttribute("no-jump-tables").getValueAsBool()))
+ return false;
+
+ // FIXME: If the switch is too sparse for a lookup table, perhaps we could
+ // split off a dense part and build a lookup table for that.
+
+ // FIXME: This creates arrays of GEPs to constant strings, which means each
+ // GEP needs a runtime relocation in PIC code. We should just build one big
+ // string and lookup indices into that.
+
+ // Ignore switches with less than three cases. Lookup tables will not make
+ // them faster, so we don't analyze them.
+ if (SI->getNumCases() < 3)
+ return false;
+
+ // Figure out the corresponding result for each case value and phi node in the
+ // common destination, as well as the min and max case values.
+ assert(!SI->cases().empty());
+ SwitchInst::CaseIt CI = SI->case_begin();
+ ConstantInt *MinCaseVal = CI->getCaseValue();
+ ConstantInt *MaxCaseVal = CI->getCaseValue();
+
+ BasicBlock *CommonDest = nullptr;
+
+ using ResultListTy = SmallVector<std::pair<ConstantInt *, Constant *>, 4>;
+ SmallDenseMap<PHINode *, ResultListTy> ResultLists;
+
+ SmallDenseMap<PHINode *, Constant *> DefaultResults;
+ SmallDenseMap<PHINode *, Type *> ResultTypes;
+ SmallVector<PHINode *, 4> PHIs;
+
+ for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
+ ConstantInt *CaseVal = CI->getCaseValue();
+ if (CaseVal->getValue().slt(MinCaseVal->getValue()))
+ MinCaseVal = CaseVal;
+ if (CaseVal->getValue().sgt(MaxCaseVal->getValue()))
+ MaxCaseVal = CaseVal;
+
+ // Resulting value at phi nodes for this case value.
+ using ResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
+ ResultsTy Results;
+ if (!getCaseResults(SI, CaseVal, CI->getCaseSuccessor(), &CommonDest,
+ Results, DL, TTI))
+ return false;
+
+ // Append the result from this case to the list for each phi.
+ for (const auto &I : Results) {
+ PHINode *PHI = I.first;
+ Constant *Value = I.second;
+ if (!ResultLists.count(PHI))
+ PHIs.push_back(PHI);
+ ResultLists[PHI].push_back(std::make_pair(CaseVal, Value));
+ }
+ }
+
+ // Keep track of the result types.
+ for (PHINode *PHI : PHIs) {
+ ResultTypes[PHI] = ResultLists[PHI][0].second->getType();
+ }
+
+ uint64_t NumResults = ResultLists[PHIs[0]].size();
+
+ // If the table has holes, we need a constant result for the default case
+ // or a bitmask that fits in a register.
+ SmallVector<std::pair<PHINode *, Constant *>, 4> DefaultResultsList;
+ bool HasDefaultResults =
+ getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest,
+ DefaultResultsList, DL, TTI);
+
+ for (const auto &I : DefaultResultsList) {
+ PHINode *PHI = I.first;
+ Constant *Result = I.second;
+ DefaultResults[PHI] = Result;
+ }
+
+ bool UseSwitchConditionAsTableIndex = ShouldUseSwitchConditionAsTableIndex(
+ *MinCaseVal, *MaxCaseVal, HasDefaultResults, ResultTypes, DL, TTI);
+ uint64_t TableSize;
+ if (UseSwitchConditionAsTableIndex)
+ TableSize = MaxCaseVal->getLimitedValue() + 1;
+ else
+ TableSize =
+ (MaxCaseVal->getValue() - MinCaseVal->getValue()).getLimitedValue() + 1;
+
+ bool TableHasHoles = (NumResults < TableSize);
+ bool NeedMask = (TableHasHoles && !HasDefaultResults);
+ if (NeedMask) {
+ // As an extra penalty for the validity test we require more cases.
+ if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark).
+ return false;
+ if (!DL.fitsInLegalInteger(TableSize))
+ return false;
+ }
+
+ if (!ShouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes))
+ return false;
+
+ std::vector<DominatorTree::UpdateType> Updates;
+
+ // Create the BB that does the lookups.
+ Module &Mod = *CommonDest->getParent()->getParent();
+ BasicBlock *LookupBB = BasicBlock::Create(
+ Mod.getContext(), "switch.lookup", CommonDest->getParent(), CommonDest);
+
+ // Compute the table index value.
+ Builder.SetInsertPoint(SI);
+ Value *TableIndex;
+ ConstantInt *TableIndexOffset;
+ if (UseSwitchConditionAsTableIndex) {
+ TableIndexOffset = ConstantInt::get(MaxCaseVal->getType(), 0);
+ TableIndex = SI->getCondition();
+ } else {
+ TableIndexOffset = MinCaseVal;
+ TableIndex =
+ Builder.CreateSub(SI->getCondition(), TableIndexOffset, "switch.tableidx");
+ }
+
+ // Compute the maximum table size representable by the integer type we are
+ // switching upon.
+ unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
+ uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX : 1ULL << CaseSize;
+ assert(MaxTableSize >= TableSize &&
+ "It is impossible for a switch to have more entries than the max "
+ "representable value of its input integer type's size.");
+
+ // If the default destination is unreachable, or if the lookup table covers
+ // all values of the conditional variable, branch directly to the lookup table
+ // BB. Otherwise, check that the condition is within the case range.
+ const bool DefaultIsReachable =
+ !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
+ const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize);
+ BranchInst *RangeCheckBranch = nullptr;
+
+ if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
+ Builder.CreateBr(LookupBB);
+ if (DTU)
+ Updates.push_back({DominatorTree::Insert, BB, LookupBB});
+ // Note: We call removeProdecessor later since we need to be able to get the
+ // PHI value for the default case in case we're using a bit mask.
+ } else {
+ Value *Cmp = Builder.CreateICmpULT(
+ TableIndex, ConstantInt::get(MinCaseVal->getType(), TableSize));
+ RangeCheckBranch =
+ Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
+ if (DTU)
+ Updates.push_back({DominatorTree::Insert, BB, LookupBB});
+ }
+
+ // Populate the BB that does the lookups.
+ Builder.SetInsertPoint(LookupBB);
+
+ if (NeedMask) {
+ // Before doing the lookup, we do the hole check. The LookupBB is therefore
+ // re-purposed to do the hole check, and we create a new LookupBB.
+ BasicBlock *MaskBB = LookupBB;
+ MaskBB->setName("switch.hole_check");
+ LookupBB = BasicBlock::Create(Mod.getContext(), "switch.lookup",
+ CommonDest->getParent(), CommonDest);
+
+ // Make the mask's bitwidth at least 8-bit and a power-of-2 to avoid
+ // unnecessary illegal types.
+ uint64_t TableSizePowOf2 = NextPowerOf2(std::max(7ULL, TableSize - 1ULL));
+ APInt MaskInt(TableSizePowOf2, 0);
+ APInt One(TableSizePowOf2, 1);
+ // Build bitmask; fill in a 1 bit for every case.
+ const ResultListTy &ResultList = ResultLists[PHIs[0]];
+ for (size_t I = 0, E = ResultList.size(); I != E; ++I) {
+ uint64_t Idx = (ResultList[I].first->getValue() - TableIndexOffset->getValue())
+ .getLimitedValue();
+ MaskInt |= One << Idx;
+ }
+ ConstantInt *TableMask = ConstantInt::get(Mod.getContext(), MaskInt);
+
+ // Get the TableIndex'th bit of the bitmask.
+ // If this bit is 0 (meaning hole) jump to the default destination,
+ // else continue with table lookup.
+ IntegerType *MapTy = TableMask->getType();
+ Value *MaskIndex =
+ Builder.CreateZExtOrTrunc(TableIndex, MapTy, "switch.maskindex");
+ Value *Shifted = Builder.CreateLShr(TableMask, MaskIndex, "switch.shifted");
+ Value *LoBit = Builder.CreateTrunc(
+ Shifted, Type::getInt1Ty(Mod.getContext()), "switch.lobit");
+ Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest());
+ if (DTU) {
+ Updates.push_back({DominatorTree::Insert, MaskBB, LookupBB});
+ Updates.push_back({DominatorTree::Insert, MaskBB, SI->getDefaultDest()});
+ }
+ Builder.SetInsertPoint(LookupBB);
+ AddPredecessorToBlock(SI->getDefaultDest(), MaskBB, BB);
+ }
+
+ if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
+ // We cached PHINodes in PHIs. To avoid accessing deleted PHINodes later,
+ // do not delete PHINodes here.
+ SI->getDefaultDest()->removePredecessor(BB,
+ /*KeepOneInputPHIs=*/true);
+ if (DTU)
+ Updates.push_back({DominatorTree::Delete, BB, SI->getDefaultDest()});
+ }
+
+ for (PHINode *PHI : PHIs) {
+ const ResultListTy &ResultList = ResultLists[PHI];
+
+ // If using a bitmask, use any value to fill the lookup table holes.
+ Constant *DV = NeedMask ? ResultLists[PHI][0].second : DefaultResults[PHI];
+ StringRef FuncName = Fn->getName();
+ SwitchLookupTable Table(Mod, TableSize, TableIndexOffset, ResultList, DV,
+ DL, FuncName);
+
+ Value *Result = Table.BuildLookup(TableIndex, Builder);
+
+ // Do a small peephole optimization: re-use the switch table compare if
+ // possible.
+ if (!TableHasHoles && HasDefaultResults && RangeCheckBranch) {
+ BasicBlock *PhiBlock = PHI->getParent();
+ // Search for compare instructions which use the phi.
+ for (auto *User : PHI->users()) {
+ reuseTableCompare(User, PhiBlock, RangeCheckBranch, DV, ResultList);
+ }
+ }
+
+ PHI->addIncoming(Result, LookupBB);
+ }
+
+ Builder.CreateBr(CommonDest);
+ if (DTU)
+ Updates.push_back({DominatorTree::Insert, LookupBB, CommonDest});
+
+ // Remove the switch.
+ SmallPtrSet<BasicBlock *, 8> RemovedSuccessors;
+ for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
+ BasicBlock *Succ = SI->getSuccessor(i);
+
+ if (Succ == SI->getDefaultDest())
+ continue;
+ Succ->removePredecessor(BB);
+ if (DTU && RemovedSuccessors.insert(Succ).second)
+ Updates.push_back({DominatorTree::Delete, BB, Succ});
+ }
+ SI->eraseFromParent();
+
+ if (DTU)
+ DTU->applyUpdates(Updates);
+
+ ++NumLookupTables;
+ if (NeedMask)
+ ++NumLookupTablesHoles;
+ return true;
+}
+
+/// Try to transform a switch that has "holes" in it to a contiguous sequence
+/// of cases.
+///
+/// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be
+/// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}.
+///
+/// This converts a sparse switch into a dense switch which allows better
+/// lowering and could also allow transforming into a lookup table.
+static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
+ const DataLayout &DL,
+ const TargetTransformInfo &TTI) {
+ auto *CondTy = cast<IntegerType>(SI->getCondition()->getType());
+ if (CondTy->getIntegerBitWidth() > 64 ||
+ !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
+ return false;
+ // Only bother with this optimization if there are more than 3 switch cases;
+ // SDAG will only bother creating jump tables for 4 or more cases.
+ if (SI->getNumCases() < 4)
+ return false;
+
+ // This transform is agnostic to the signedness of the input or case values. We
+ // can treat the case values as signed or unsigned. We can optimize more common
+ // cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values
+ // as signed.
+ SmallVector<int64_t,4> Values;
+ for (const auto &C : SI->cases())
+ Values.push_back(C.getCaseValue()->getValue().getSExtValue());
+ llvm::sort(Values);
+
+ // If the switch is already dense, there's nothing useful to do here.
+ if (isSwitchDense(Values))
+ return false;
+
+ // First, transform the values such that they start at zero and ascend.
+ int64_t Base = Values[0];
+ for (auto &V : Values)
+ V -= (uint64_t)(Base);
+
+ // Now we have signed numbers that have been shifted so that, given enough
+ // precision, there are no negative values. Since the rest of the transform
+ // is bitwise only, we switch now to an unsigned representation.
+
+ // This transform can be done speculatively because it is so cheap - it
+ // results in a single rotate operation being inserted.
+ // FIXME: It's possible that optimizing a switch on powers of two might also
+ // be beneficial - flag values are often powers of two and we could use a CLZ
+ // as the key function.
+
+ // countTrailingZeros(0) returns 64. As Values is guaranteed to have more than
+ // one element and LLVM disallows duplicate cases, Shift is guaranteed to be
+ // less than 64.
+ unsigned Shift = 64;
+ for (auto &V : Values)
+ Shift = std::min(Shift, countTrailingZeros((uint64_t)V));
+ assert(Shift < 64);
+ if (Shift > 0)
+ for (auto &V : Values)
+ V = (int64_t)((uint64_t)V >> Shift);
+
+ if (!isSwitchDense(Values))
+ // Transform didn't create a dense switch.
+ return false;
+
+ // The obvious transform is to shift the switch condition right and emit a
+ // check that the condition actually cleanly divided by GCD, i.e.
+ // C & (1 << Shift - 1) == 0
+ // inserting a new CFG edge to handle the case where it didn't divide cleanly.
+ //
+ // A cheaper way of doing this is a simple ROTR(C, Shift). This performs the
+ // shift and puts the shifted-off bits in the uppermost bits. If any of these
+ // are nonzero then the switch condition will be very large and will hit the
+ // default case.
+
+ auto *Ty = cast<IntegerType>(SI->getCondition()->getType());
+ Builder.SetInsertPoint(SI);
+ auto *ShiftC = ConstantInt::get(Ty, Shift);
+ auto *Sub = Builder.CreateSub(SI->getCondition(), ConstantInt::get(Ty, Base));
+ auto *LShr = Builder.CreateLShr(Sub, ShiftC);
+ auto *Shl = Builder.CreateShl(Sub, Ty->getBitWidth() - Shift);
+ auto *Rot = Builder.CreateOr(LShr, Shl);
+ SI->replaceUsesOfWith(SI->getCondition(), Rot);
+
+ for (auto Case : SI->cases()) {
+ auto *Orig = Case.getCaseValue();
+ auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base);
+ Case.setValue(
+ cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(ShiftC->getValue()))));
+ }
+ return true;
+}
+
+bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
+ BasicBlock *BB = SI->getParent();
+
+ if (isValueEqualityComparison(SI)) {
+ // If we only have one predecessor, and if it is a branch on this value,
+ // see if that predecessor totally determines the outcome of this switch.
+ if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
+ if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
+ return requestResimplify();
+
+ Value *Cond = SI->getCondition();
+ if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
+ if (SimplifySwitchOnSelect(SI, Select))
+ return requestResimplify();
+
+ // If the block only contains the switch, see if we can fold the block
+ // away into any preds.
+ if (SI == &*BB->instructionsWithoutDebug(false).begin())
+ if (FoldValueComparisonIntoPredecessors(SI, Builder))
+ return requestResimplify();
+ }
+
+ // Try to transform the switch into an icmp and a branch.
+ // The conversion from switch to comparison may lose information on
+ // impossible switch values, so disable it early in the pipeline.
+ if (Options.ConvertSwitchRangeToICmp && TurnSwitchRangeIntoICmp(SI, Builder))
+ return requestResimplify();
+
+ // Remove unreachable cases.
+ if (eliminateDeadSwitchCases(SI, DTU, Options.AC, DL))
+ return requestResimplify();
+
+ if (trySwitchToSelect(SI, Builder, DTU, DL, TTI))
+ return requestResimplify();
+
+ if (Options.ForwardSwitchCondToPhi && ForwardSwitchConditionToPHI(SI))
+ return requestResimplify();
+
+ // The conversion from switch to lookup tables results in difficult-to-analyze
+ // code and makes pruning branches much harder. This is a problem if the
+ // switch expression itself can still be restricted as a result of inlining or
+ // CVP. Therefore, only apply this transformation during late stages of the
+ // optimisation pipeline.
+ if (Options.ConvertSwitchToLookupTable &&
+ SwitchToLookupTable(SI, Builder, DTU, DL, TTI))
+ return requestResimplify();
+
+ if (ReduceSwitchRange(SI, Builder, DL, TTI))
+ return requestResimplify();
+
+ return false;
+}
+
+bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) {
+ BasicBlock *BB = IBI->getParent();
+ bool Changed = false;
+
+ // Eliminate redundant destinations.
+ SmallPtrSet<Value *, 8> Succs;
+ SmallSetVector<BasicBlock *, 8> RemovedSuccs;
+ for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
+ BasicBlock *Dest = IBI->getDestination(i);
+ if (!Dest->hasAddressTaken() || !Succs.insert(Dest).second) {
+ if (!Dest->hasAddressTaken())
+ RemovedSuccs.insert(Dest);
+ Dest->removePredecessor(BB);
+ IBI->removeDestination(i);
+ --i;
+ --e;
+ Changed = true;
+ }
+ }
+
+ if (DTU) {
+ std::vector<DominatorTree::UpdateType> Updates;
+ Updates.reserve(RemovedSuccs.size());
+ for (auto *RemovedSucc : RemovedSuccs)
+ Updates.push_back({DominatorTree::Delete, BB, RemovedSucc});
+ DTU->applyUpdates(Updates);
+ }
+
+ if (IBI->getNumDestinations() == 0) {
+ // If the indirectbr has no successors, change it to unreachable.
+ new UnreachableInst(IBI->getContext(), IBI);
+ EraseTerminatorAndDCECond(IBI);
+ return true;
+ }
+
+ if (IBI->getNumDestinations() == 1) {
+ // If the indirectbr has one successor, change it to a direct branch.
+ BranchInst::Create(IBI->getDestination(0), IBI);
+ EraseTerminatorAndDCECond(IBI);
+ return true;
+ }
+
+ if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
+ if (SimplifyIndirectBrOnSelect(IBI, SI))
+ return requestResimplify();
+ }
+ return Changed;
+}
+
+/// Given an block with only a single landing pad and a unconditional branch
+/// try to find another basic block which this one can be merged with. This
+/// handles cases where we have multiple invokes with unique landing pads, but
+/// a shared handler.
+///
+/// We specifically choose to not worry about merging non-empty blocks
+/// here. That is a PRE/scheduling problem and is best solved elsewhere. In
+/// practice, the optimizer produces empty landing pad blocks quite frequently
+/// when dealing with exception dense code. (see: instcombine, gvn, if-else
+/// sinking in this file)
+///
+/// This is primarily a code size optimization. We need to avoid performing
+/// any transform which might inhibit optimization (such as our ability to
+/// specialize a particular handler via tail commoning). We do this by not
+/// merging any blocks which require us to introduce a phi. Since the same
+/// values are flowing through both blocks, we don't lose any ability to
+/// specialize. If anything, we make such specialization more likely.
+///
+/// TODO - This transformation could remove entries from a phi in the target
+/// block when the inputs in the phi are the same for the two blocks being
+/// merged. In some cases, this could result in removal of the PHI entirely.
+static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI,
+ BasicBlock *BB, DomTreeUpdater *DTU) {
+ auto Succ = BB->getUniqueSuccessor();
+ assert(Succ);
+ // If there's a phi in the successor block, we'd likely have to introduce
+ // a phi into the merged landing pad block.
+ if (isa<PHINode>(*Succ->begin()))
+ return false;
+
+ for (BasicBlock *OtherPred : predecessors(Succ)) {
+ if (BB == OtherPred)
+ continue;
+ BasicBlock::iterator I = OtherPred->begin();
+ LandingPadInst *LPad2 = dyn_cast<LandingPadInst>(I);
+ if (!LPad2 || !LPad2->isIdenticalTo(LPad))
+ continue;
+ for (++I; isa<DbgInfoIntrinsic>(I); ++I)
+ ;
+ BranchInst *BI2 = dyn_cast<BranchInst>(I);
+ if (!BI2 || !BI2->isIdenticalTo(BI))
+ continue;
+
+ std::vector<DominatorTree::UpdateType> Updates;
+
+ // We've found an identical block. Update our predecessors to take that
+ // path instead and make ourselves dead.
+ SmallSetVector<BasicBlock *, 16> UniquePreds(pred_begin(BB), pred_end(BB));
+ for (BasicBlock *Pred : UniquePreds) {
+ InvokeInst *II = cast<InvokeInst>(Pred->getTerminator());
+ assert(II->getNormalDest() != BB && II->getUnwindDest() == BB &&
+ "unexpected successor");
+ II->setUnwindDest(OtherPred);
+ if (DTU) {
+ Updates.push_back({DominatorTree::Insert, Pred, OtherPred});
+ Updates.push_back({DominatorTree::Delete, Pred, BB});
+ }
+ }
+
+ // The debug info in OtherPred doesn't cover the merged control flow that
+ // used to go through BB. We need to delete it or update it.
+ for (Instruction &Inst : llvm::make_early_inc_range(*OtherPred))
+ if (isa<DbgInfoIntrinsic>(Inst))
+ Inst.eraseFromParent();
+
+ SmallSetVector<BasicBlock *, 16> UniqueSuccs(succ_begin(BB), succ_end(BB));
+ for (BasicBlock *Succ : UniqueSuccs) {
+ Succ->removePredecessor(BB);
+ if (DTU)
+ Updates.push_back({DominatorTree::Delete, BB, Succ});
+ }
+
+ IRBuilder<> Builder(BI);
+ Builder.CreateUnreachable();
+ BI->eraseFromParent();
+ if (DTU)
+ DTU->applyUpdates(Updates);
+ return true;
+ }
+ return false;
+}
+
+bool SimplifyCFGOpt::simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder) {
+ return Branch->isUnconditional() ? simplifyUncondBranch(Branch, Builder)
+ : simplifyCondBranch(Branch, Builder);
+}
+
+bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI,
+ IRBuilder<> &Builder) {
+ BasicBlock *BB = BI->getParent();
+ BasicBlock *Succ = BI->getSuccessor(0);
+
+ // If the Terminator is the only non-phi instruction, simplify the block.
+ // If LoopHeader is provided, check if the block or its successor is a loop
+ // header. (This is for early invocations before loop simplify and
+ // vectorization to keep canonical loop forms for nested loops. These blocks
+ // can be eliminated when the pass is invoked later in the back-end.)
+ // Note that if BB has only one predecessor then we do not introduce new
+ // backedge, so we can eliminate BB.
+ bool NeedCanonicalLoop =
+ Options.NeedCanonicalLoop &&
+ (!LoopHeaders.empty() && BB->hasNPredecessorsOrMore(2) &&
+ (is_contained(LoopHeaders, BB) || is_contained(LoopHeaders, Succ)));
+ BasicBlock::iterator I = BB->getFirstNonPHIOrDbg(true)->getIterator();
+ if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
+ !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB, DTU))
+ return true;
+
+ // If the only instruction in the block is a seteq/setne comparison against a
+ // constant, try to simplify the block.
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(I))
+ if (ICI->isEquality() && isa<ConstantInt>(ICI->getOperand(1))) {
+ for (++I; isa<DbgInfoIntrinsic>(I); ++I)
+ ;
+ if (I->isTerminator() &&
+ tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder))
+ return true;
+ }
+
+ // See if we can merge an empty landing pad block with another which is
+ // equivalent.
+ if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(I)) {
+ for (++I; isa<DbgInfoIntrinsic>(I); ++I)
+ ;
+ if (I->isTerminator() && TryToMergeLandingPad(LPad, BI, BB, DTU))
+ return true;
+ }
+
+ // If this basic block is ONLY a compare and a branch, and if a predecessor
+ // branches to us and our successor, fold the comparison into the
+ // predecessor and use logical operations to update the incoming value
+ // for PHI nodes in common successor.
+ if (FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
+ Options.BonusInstThreshold))
+ return requestResimplify();
+ return false;
+}
+
+static BasicBlock *allPredecessorsComeFromSameSource(BasicBlock *BB) {
+ BasicBlock *PredPred = nullptr;
+ for (auto *P : predecessors(BB)) {
+ BasicBlock *PPred = P->getSinglePredecessor();
+ if (!PPred || (PredPred && PredPred != PPred))
+ return nullptr;
+ PredPred = PPred;
+ }
+ return PredPred;
+}
+
+bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
+ assert(
+ !isa<ConstantInt>(BI->getCondition()) &&
+ BI->getSuccessor(0) != BI->getSuccessor(1) &&
+ "Tautological conditional branch should have been eliminated already.");
+
+ BasicBlock *BB = BI->getParent();
+ if (!Options.SimplifyCondBranch)
+ return false;
+
+ // Conditional branch
+ if (isValueEqualityComparison(BI)) {
+ // If we only have one predecessor, and if it is a branch on this value,
+ // see if that predecessor totally determines the outcome of this
+ // switch.
+ if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
+ if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
+ return requestResimplify();
+
+ // This block must be empty, except for the setcond inst, if it exists.
+ // Ignore dbg and pseudo intrinsics.
+ auto I = BB->instructionsWithoutDebug(true).begin();
+ if (&*I == BI) {
+ if (FoldValueComparisonIntoPredecessors(BI, Builder))
+ return requestResimplify();
+ } else if (&*I == cast<Instruction>(BI->getCondition())) {
+ ++I;
+ if (&*I == BI && FoldValueComparisonIntoPredecessors(BI, Builder))
+ return requestResimplify();
+ }
+ }
+
+ // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction.
+ if (SimplifyBranchOnICmpChain(BI, Builder, DL))
+ return true;
+
+ // If this basic block has dominating predecessor blocks and the dominating
+ // blocks' conditions imply BI's condition, we know the direction of BI.
+ std::optional<bool> Imp = isImpliedByDomCondition(BI->getCondition(), BI, DL);
+ if (Imp) {
+ // Turn this into a branch on constant.
+ auto *OldCond = BI->getCondition();
+ ConstantInt *TorF = *Imp ? ConstantInt::getTrue(BB->getContext())
+ : ConstantInt::getFalse(BB->getContext());
+ BI->setCondition(TorF);
+ RecursivelyDeleteTriviallyDeadInstructions(OldCond);
+ return requestResimplify();
+ }
+
+ // If this basic block is ONLY a compare and a branch, and if a predecessor
+ // branches to us and one of our successors, fold the comparison into the
+ // predecessor and use logical operations to pick the right destination.
+ if (FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
+ Options.BonusInstThreshold))
+ return requestResimplify();
+
+ // We have a conditional branch to two blocks that are only reachable
+ // from BI. We know that the condbr dominates the two blocks, so see if
+ // there is any identical code in the "then" and "else" blocks. If so, we
+ // can hoist it up to the branching block.
+ if (BI->getSuccessor(0)->getSinglePredecessor()) {
+ if (BI->getSuccessor(1)->getSinglePredecessor()) {
+ if (HoistCommon &&
+ HoistThenElseCodeToIf(BI, TTI, !Options.HoistCommonInsts))
+ return requestResimplify();
+ } else {
+ // If Successor #1 has multiple preds, we may be able to conditionally
+ // execute Successor #0 if it branches to Successor #1.
+ Instruction *Succ0TI = BI->getSuccessor(0)->getTerminator();
+ if (Succ0TI->getNumSuccessors() == 1 &&
+ Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
+ if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0), TTI))
+ return requestResimplify();
+ }
+ } else if (BI->getSuccessor(1)->getSinglePredecessor()) {
+ // If Successor #0 has multiple preds, we may be able to conditionally
+ // execute Successor #1 if it branches to Successor #0.
+ Instruction *Succ1TI = BI->getSuccessor(1)->getTerminator();
+ if (Succ1TI->getNumSuccessors() == 1 &&
+ Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
+ if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1), TTI))
+ return requestResimplify();
+ }
+
+ // If this is a branch on something for which we know the constant value in
+ // predecessors (e.g. a phi node in the current block), thread control
+ // through this block.
+ if (FoldCondBranchOnValueKnownInPredecessor(BI, DTU, DL, Options.AC))
+ return requestResimplify();
+
+ // Scan predecessor blocks for conditional branches.
+ for (BasicBlock *Pred : predecessors(BB))
+ if (BranchInst *PBI = dyn_cast<BranchInst>(Pred->getTerminator()))
+ if (PBI != BI && PBI->isConditional())
+ if (SimplifyCondBranchToCondBranch(PBI, BI, DTU, DL, TTI))
+ return requestResimplify();
+
+ // Look for diamond patterns.
+ if (MergeCondStores)
+ if (BasicBlock *PrevBB = allPredecessorsComeFromSameSource(BB))
+ if (BranchInst *PBI = dyn_cast<BranchInst>(PrevBB->getTerminator()))
+ if (PBI != BI && PBI->isConditional())
+ if (mergeConditionalStores(PBI, BI, DTU, DL, TTI))
+ return requestResimplify();
+
+ return false;
+}
+
+/// Check if passing a value to an instruction will cause undefined behavior.
+static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified) {
+ Constant *C = dyn_cast<Constant>(V);
+ if (!C)
+ return false;
+
+ if (I->use_empty())
+ return false;
+
+ if (C->isNullValue() || isa<UndefValue>(C)) {
+ // Only look at the first use, avoid hurting compile time with long uselists
+ auto *Use = cast<Instruction>(*I->user_begin());
+ // Bail out if Use is not in the same BB as I or Use == I or Use comes
+ // before I in the block. The latter two can be the case if Use is a PHI
+ // node.
+ if (Use->getParent() != I->getParent() || Use == I || Use->comesBefore(I))
+ return false;
+
+ // Now make sure that there are no instructions in between that can alter
+ // control flow (eg. calls)
+ auto InstrRange =
+ make_range(std::next(I->getIterator()), Use->getIterator());
+ if (any_of(InstrRange, [](Instruction &I) {
+ return !isGuaranteedToTransferExecutionToSuccessor(&I);
+ }))
+ return false;
+
+ // Look through GEPs. A load from a GEP derived from NULL is still undefined
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Use))
+ if (GEP->getPointerOperand() == I) {
+ if (!GEP->isInBounds() || !GEP->hasAllZeroIndices())
+ PtrValueMayBeModified = true;
+ return passingValueIsAlwaysUndefined(V, GEP, PtrValueMayBeModified);
+ }
+
+ // Look through bitcasts.
+ if (BitCastInst *BC = dyn_cast<BitCastInst>(Use))
+ return passingValueIsAlwaysUndefined(V, BC, PtrValueMayBeModified);
+
+ // Load from null is undefined.
+ if (LoadInst *LI = dyn_cast<LoadInst>(Use))
+ if (!LI->isVolatile())
+ return !NullPointerIsDefined(LI->getFunction(),
+ LI->getPointerAddressSpace());
+
+ // Store to null is undefined.
+ if (StoreInst *SI = dyn_cast<StoreInst>(Use))
+ if (!SI->isVolatile())
+ return (!NullPointerIsDefined(SI->getFunction(),
+ SI->getPointerAddressSpace())) &&
+ SI->getPointerOperand() == I;
+
+ if (auto *CB = dyn_cast<CallBase>(Use)) {
+ if (C->isNullValue() && NullPointerIsDefined(CB->getFunction()))
+ return false;
+ // A call to null is undefined.
+ if (CB->getCalledOperand() == I)
+ return true;
+
+ if (C->isNullValue()) {
+ for (const llvm::Use &Arg : CB->args())
+ if (Arg == I) {
+ unsigned ArgIdx = CB->getArgOperandNo(&Arg);
+ if (CB->isPassingUndefUB(ArgIdx) &&
+ CB->paramHasAttr(ArgIdx, Attribute::NonNull)) {
+ // Passing null to a nonnnull+noundef argument is undefined.
+ return !PtrValueMayBeModified;
+ }
+ }
+ } else if (isa<UndefValue>(C)) {
+ // Passing undef to a noundef argument is undefined.
+ for (const llvm::Use &Arg : CB->args())
+ if (Arg == I) {
+ unsigned ArgIdx = CB->getArgOperandNo(&Arg);
+ if (CB->isPassingUndefUB(ArgIdx)) {
+ // Passing undef to a noundef argument is undefined.
+ return true;
+ }
+ }
+ }
+ }
+ }
+ return false;
+}
+
+/// If BB has an incoming value that will always trigger undefined behavior
+/// (eg. null pointer dereference), remove the branch leading here.
+static bool removeUndefIntroducingPredecessor(BasicBlock *BB,
+ DomTreeUpdater *DTU) {
+ for (PHINode &PHI : BB->phis())
+ for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i)
+ if (passingValueIsAlwaysUndefined(PHI.getIncomingValue(i), &PHI)) {
+ BasicBlock *Predecessor = PHI.getIncomingBlock(i);
+ Instruction *T = Predecessor->getTerminator();
+ IRBuilder<> Builder(T);
+ if (BranchInst *BI = dyn_cast<BranchInst>(T)) {
+ BB->removePredecessor(Predecessor);
+ // Turn unconditional branches into unreachables and remove the dead
+ // destination from conditional branches.
+ if (BI->isUnconditional())
+ Builder.CreateUnreachable();
+ else {
+ // Preserve guarding condition in assume, because it might not be
+ // inferrable from any dominating condition.
+ Value *Cond = BI->getCondition();
+ if (BI->getSuccessor(0) == BB)
+ Builder.CreateAssumption(Builder.CreateNot(Cond));
+ else
+ Builder.CreateAssumption(Cond);
+ Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1)
+ : BI->getSuccessor(0));
+ }
+ BI->eraseFromParent();
+ if (DTU)
+ DTU->applyUpdates({{DominatorTree::Delete, Predecessor, BB}});
+ return true;
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) {
+ // Redirect all branches leading to UB into
+ // a newly created unreachable block.
+ BasicBlock *Unreachable = BasicBlock::Create(
+ Predecessor->getContext(), "unreachable", BB->getParent(), BB);
+ Builder.SetInsertPoint(Unreachable);
+ // The new block contains only one instruction: Unreachable
+ Builder.CreateUnreachable();
+ for (const auto &Case : SI->cases())
+ if (Case.getCaseSuccessor() == BB) {
+ BB->removePredecessor(Predecessor);
+ Case.setSuccessor(Unreachable);
+ }
+ if (SI->getDefaultDest() == BB) {
+ BB->removePredecessor(Predecessor);
+ SI->setDefaultDest(Unreachable);
+ }
+
+ if (DTU)
+ DTU->applyUpdates(
+ { { DominatorTree::Insert, Predecessor, Unreachable },
+ { DominatorTree::Delete, Predecessor, BB } });
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
+ bool Changed = false;
+
+ assert(BB && BB->getParent() && "Block not embedded in function!");
+ assert(BB->getTerminator() && "Degenerate basic block encountered!");
+
+ // Remove basic blocks that have no predecessors (except the entry block)...
+ // or that just have themself as a predecessor. These are unreachable.
+ if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) ||
+ BB->getSinglePredecessor() == BB) {
+ LLVM_DEBUG(dbgs() << "Removing BB: \n" << *BB);
+ DeleteDeadBlock(BB, DTU);
+ return true;
+ }
+
+ // Check to see if we can constant propagate this terminator instruction
+ // away...
+ Changed |= ConstantFoldTerminator(BB, /*DeleteDeadConditions=*/true,
+ /*TLI=*/nullptr, DTU);
+
+ // Check for and eliminate duplicate PHI nodes in this block.
+ Changed |= EliminateDuplicatePHINodes(BB);
+
+ // Check for and remove branches that will always cause undefined behavior.
+ if (removeUndefIntroducingPredecessor(BB, DTU))
+ return requestResimplify();
+
+ // Merge basic blocks into their predecessor if there is only one distinct
+ // pred, and if there is only one distinct successor of the predecessor, and
+ // if there are no PHI nodes.
+ if (MergeBlockIntoPredecessor(BB, DTU))
+ return true;
+
+ if (SinkCommon && Options.SinkCommonInsts)
+ if (SinkCommonCodeFromPredecessors(BB, DTU) ||
+ MergeCompatibleInvokes(BB, DTU)) {
+ // SinkCommonCodeFromPredecessors() does not automatically CSE PHI's,
+ // so we may now how duplicate PHI's.
+ // Let's rerun EliminateDuplicatePHINodes() first,
+ // before FoldTwoEntryPHINode() potentially converts them into select's,
+ // after which we'd need a whole EarlyCSE pass run to cleanup them.
+ return true;
+ }
+
+ IRBuilder<> Builder(BB);
+
+ if (Options.FoldTwoEntryPHINode) {
+ // If there is a trivial two-entry PHI node in this basic block, and we can
+ // eliminate it, do so now.
+ if (auto *PN = dyn_cast<PHINode>(BB->begin()))
+ if (PN->getNumIncomingValues() == 2)
+ if (FoldTwoEntryPHINode(PN, TTI, DTU, DL))
+ return true;
+ }
+
+ Instruction *Terminator = BB->getTerminator();
+ Builder.SetInsertPoint(Terminator);
+ switch (Terminator->getOpcode()) {
+ case Instruction::Br:
+ Changed |= simplifyBranch(cast<BranchInst>(Terminator), Builder);
+ break;
+ case Instruction::Resume:
+ Changed |= simplifyResume(cast<ResumeInst>(Terminator), Builder);
+ break;
+ case Instruction::CleanupRet:
+ Changed |= simplifyCleanupReturn(cast<CleanupReturnInst>(Terminator));
+ break;
+ case Instruction::Switch:
+ Changed |= simplifySwitch(cast<SwitchInst>(Terminator), Builder);
+ break;
+ case Instruction::Unreachable:
+ Changed |= simplifyUnreachable(cast<UnreachableInst>(Terminator));
+ break;
+ case Instruction::IndirectBr:
+ Changed |= simplifyIndirectBr(cast<IndirectBrInst>(Terminator));
+ break;
+ }
+
+ return Changed;
+}
+
+bool SimplifyCFGOpt::run(BasicBlock *BB) {
+ bool Changed = false;
+
+ // Repeated simplify BB as long as resimplification is requested.
+ do {
+ Resimplify = false;
+
+ // Perform one round of simplifcation. Resimplify flag will be set if
+ // another iteration is requested.
+ Changed |= simplifyOnce(BB);
+ } while (Resimplify);
+
+ return Changed;
+}
+
+bool llvm::simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI,
+ DomTreeUpdater *DTU, const SimplifyCFGOptions &Options,
+ ArrayRef<WeakVH> LoopHeaders) {
+ return SimplifyCFGOpt(TTI, DTU, BB->getModule()->getDataLayout(), LoopHeaders,
+ Options)
+ .run(BB);
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/SimplifyIndVar.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/SimplifyIndVar.cpp
new file mode 100644
index 0000000000..4e83d2f6e3
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -0,0 +1,2089 @@
+//===-- SimplifyIndVar.cpp - Induction variable simplification ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements induction variable simplification. It does
+// not define any actual pass or policy, but provides a single function to
+// simplify a loop's induction variables based on ScalarEvolution.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/SimplifyIndVar.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "indvars"
+
+STATISTIC(NumElimIdentity, "Number of IV identities eliminated");
+STATISTIC(NumElimOperand, "Number of IV operands folded into a use");
+STATISTIC(NumFoldedUser, "Number of IV users folded into a constant");
+STATISTIC(NumElimRem , "Number of IV remainder operations eliminated");
+STATISTIC(
+ NumSimplifiedSDiv,
+ "Number of IV signed division operations converted to unsigned division");
+STATISTIC(
+ NumSimplifiedSRem,
+ "Number of IV signed remainder operations converted to unsigned remainder");
+STATISTIC(NumElimCmp , "Number of IV comparisons eliminated");
+
+namespace {
+ /// This is a utility for simplifying induction variables
+ /// based on ScalarEvolution. It is the primary instrument of the
+ /// IndvarSimplify pass, but it may also be directly invoked to cleanup after
+ /// other loop passes that preserve SCEV.
+ class SimplifyIndvar {
+ Loop *L;
+ LoopInfo *LI;
+ ScalarEvolution *SE;
+ DominatorTree *DT;
+ const TargetTransformInfo *TTI;
+ SCEVExpander &Rewriter;
+ SmallVectorImpl<WeakTrackingVH> &DeadInsts;
+
+ bool Changed = false;
+
+ public:
+ SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, DominatorTree *DT,
+ LoopInfo *LI, const TargetTransformInfo *TTI,
+ SCEVExpander &Rewriter,
+ SmallVectorImpl<WeakTrackingVH> &Dead)
+ : L(Loop), LI(LI), SE(SE), DT(DT), TTI(TTI), Rewriter(Rewriter),
+ DeadInsts(Dead) {
+ assert(LI && "IV simplification requires LoopInfo");
+ }
+
+ bool hasChanged() const { return Changed; }
+
+ /// Iteratively perform simplification on a worklist of users of the
+ /// specified induction variable. This is the top-level driver that applies
+ /// all simplifications to users of an IV.
+ void simplifyUsers(PHINode *CurrIV, IVVisitor *V = nullptr);
+
+ Value *foldIVUser(Instruction *UseInst, Instruction *IVOperand);
+
+ bool eliminateIdentitySCEV(Instruction *UseInst, Instruction *IVOperand);
+ bool replaceIVUserWithLoopInvariant(Instruction *UseInst);
+ bool replaceFloatIVWithIntegerIV(Instruction *UseInst);
+
+ bool eliminateOverflowIntrinsic(WithOverflowInst *WO);
+ bool eliminateSaturatingIntrinsic(SaturatingInst *SI);
+ bool eliminateTrunc(TruncInst *TI);
+ bool eliminateIVUser(Instruction *UseInst, Instruction *IVOperand);
+ bool makeIVComparisonInvariant(ICmpInst *ICmp, Instruction *IVOperand);
+ void eliminateIVComparison(ICmpInst *ICmp, Instruction *IVOperand);
+ void simplifyIVRemainder(BinaryOperator *Rem, Instruction *IVOperand,
+ bool IsSigned);
+ void replaceRemWithNumerator(BinaryOperator *Rem);
+ void replaceRemWithNumeratorOrZero(BinaryOperator *Rem);
+ void replaceSRemWithURem(BinaryOperator *Rem);
+ bool eliminateSDiv(BinaryOperator *SDiv);
+ bool strengthenOverflowingOperation(BinaryOperator *OBO,
+ Instruction *IVOperand);
+ bool strengthenRightShift(BinaryOperator *BO, Instruction *IVOperand);
+ };
+}
+
+/// Find a point in code which dominates all given instructions. We can safely
+/// assume that, whatever fact we can prove at the found point, this fact is
+/// also true for each of the given instructions.
+static Instruction *findCommonDominator(ArrayRef<Instruction *> Instructions,
+ DominatorTree &DT) {
+ Instruction *CommonDom = nullptr;
+ for (auto *Insn : Instructions)
+ CommonDom =
+ CommonDom ? DT.findNearestCommonDominator(CommonDom, Insn) : Insn;
+ assert(CommonDom && "Common dominator not found?");
+ return CommonDom;
+}
+
+/// Fold an IV operand into its use. This removes increments of an
+/// aligned IV when used by a instruction that ignores the low bits.
+///
+/// IVOperand is guaranteed SCEVable, but UseInst may not be.
+///
+/// Return the operand of IVOperand for this induction variable if IVOperand can
+/// be folded (in case more folding opportunities have been exposed).
+/// Otherwise return null.
+Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand) {
+ Value *IVSrc = nullptr;
+ const unsigned OperIdx = 0;
+ const SCEV *FoldedExpr = nullptr;
+ bool MustDropExactFlag = false;
+ switch (UseInst->getOpcode()) {
+ default:
+ return nullptr;
+ case Instruction::UDiv:
+ case Instruction::LShr:
+ // We're only interested in the case where we know something about
+ // the numerator and have a constant denominator.
+ if (IVOperand != UseInst->getOperand(OperIdx) ||
+ !isa<ConstantInt>(UseInst->getOperand(1)))
+ return nullptr;
+
+ // Attempt to fold a binary operator with constant operand.
+ // e.g. ((I + 1) >> 2) => I >> 2
+ if (!isa<BinaryOperator>(IVOperand)
+ || !isa<ConstantInt>(IVOperand->getOperand(1)))
+ return nullptr;
+
+ IVSrc = IVOperand->getOperand(0);
+ // IVSrc must be the (SCEVable) IV, since the other operand is const.
+ assert(SE->isSCEVable(IVSrc->getType()) && "Expect SCEVable IV operand");
+
+ ConstantInt *D = cast<ConstantInt>(UseInst->getOperand(1));
+ if (UseInst->getOpcode() == Instruction::LShr) {
+ // Get a constant for the divisor. See createSCEV.
+ uint32_t BitWidth = cast<IntegerType>(UseInst->getType())->getBitWidth();
+ if (D->getValue().uge(BitWidth))
+ return nullptr;
+
+ D = ConstantInt::get(UseInst->getContext(),
+ APInt::getOneBitSet(BitWidth, D->getZExtValue()));
+ }
+ const auto *LHS = SE->getSCEV(IVSrc);
+ const auto *RHS = SE->getSCEV(D);
+ FoldedExpr = SE->getUDivExpr(LHS, RHS);
+ // We might have 'exact' flag set at this point which will no longer be
+ // correct after we make the replacement.
+ if (UseInst->isExact() && LHS != SE->getMulExpr(FoldedExpr, RHS))
+ MustDropExactFlag = true;
+ }
+ // We have something that might fold it's operand. Compare SCEVs.
+ if (!SE->isSCEVable(UseInst->getType()))
+ return nullptr;
+
+ // Bypass the operand if SCEV can prove it has no effect.
+ if (SE->getSCEV(UseInst) != FoldedExpr)
+ return nullptr;
+
+ LLVM_DEBUG(dbgs() << "INDVARS: Eliminated IV operand: " << *IVOperand
+ << " -> " << *UseInst << '\n');
+
+ UseInst->setOperand(OperIdx, IVSrc);
+ assert(SE->getSCEV(UseInst) == FoldedExpr && "bad SCEV with folded oper");
+
+ if (MustDropExactFlag)
+ UseInst->dropPoisonGeneratingFlags();
+
+ ++NumElimOperand;
+ Changed = true;
+ if (IVOperand->use_empty())
+ DeadInsts.emplace_back(IVOperand);
+ return IVSrc;
+}
+
+bool SimplifyIndvar::makeIVComparisonInvariant(ICmpInst *ICmp,
+ Instruction *IVOperand) {
+ auto *Preheader = L->getLoopPreheader();
+ if (!Preheader)
+ return false;
+ unsigned IVOperIdx = 0;
+ ICmpInst::Predicate Pred = ICmp->getPredicate();
+ if (IVOperand != ICmp->getOperand(0)) {
+ // Swapped
+ assert(IVOperand == ICmp->getOperand(1) && "Can't find IVOperand");
+ IVOperIdx = 1;
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ }
+
+ // Get the SCEVs for the ICmp operands (in the specific context of the
+ // current loop)
+ const Loop *ICmpLoop = LI->getLoopFor(ICmp->getParent());
+ const SCEV *S = SE->getSCEVAtScope(ICmp->getOperand(IVOperIdx), ICmpLoop);
+ const SCEV *X = SE->getSCEVAtScope(ICmp->getOperand(1 - IVOperIdx), ICmpLoop);
+ auto LIP = SE->getLoopInvariantPredicate(Pred, S, X, L, ICmp);
+ if (!LIP)
+ return false;
+ ICmpInst::Predicate InvariantPredicate = LIP->Pred;
+ const SCEV *InvariantLHS = LIP->LHS;
+ const SCEV *InvariantRHS = LIP->RHS;
+
+ // Do not generate something ridiculous.
+ auto *PHTerm = Preheader->getTerminator();
+ if (Rewriter.isHighCostExpansion({ InvariantLHS, InvariantRHS }, L,
+ 2 * SCEVCheapExpansionBudget, TTI, PHTerm))
+ return false;
+ auto *NewLHS =
+ Rewriter.expandCodeFor(InvariantLHS, IVOperand->getType(), PHTerm);
+ auto *NewRHS =
+ Rewriter.expandCodeFor(InvariantRHS, IVOperand->getType(), PHTerm);
+ LLVM_DEBUG(dbgs() << "INDVARS: Simplified comparison: " << *ICmp << '\n');
+ ICmp->setPredicate(InvariantPredicate);
+ ICmp->setOperand(0, NewLHS);
+ ICmp->setOperand(1, NewRHS);
+ return true;
+}
+
+/// SimplifyIVUsers helper for eliminating useless
+/// comparisons against an induction variable.
+void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp,
+ Instruction *IVOperand) {
+ unsigned IVOperIdx = 0;
+ ICmpInst::Predicate Pred = ICmp->getPredicate();
+ ICmpInst::Predicate OriginalPred = Pred;
+ if (IVOperand != ICmp->getOperand(0)) {
+ // Swapped
+ assert(IVOperand == ICmp->getOperand(1) && "Can't find IVOperand");
+ IVOperIdx = 1;
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ }
+
+ // Get the SCEVs for the ICmp operands (in the specific context of the
+ // current loop)
+ const Loop *ICmpLoop = LI->getLoopFor(ICmp->getParent());
+ const SCEV *S = SE->getSCEVAtScope(ICmp->getOperand(IVOperIdx), ICmpLoop);
+ const SCEV *X = SE->getSCEVAtScope(ICmp->getOperand(1 - IVOperIdx), ICmpLoop);
+
+ // If the condition is always true or always false in the given context,
+ // replace it with a constant value.
+ SmallVector<Instruction *, 4> Users;
+ for (auto *U : ICmp->users())
+ Users.push_back(cast<Instruction>(U));
+ const Instruction *CtxI = findCommonDominator(Users, *DT);
+ if (auto Ev = SE->evaluatePredicateAt(Pred, S, X, CtxI)) {
+ SE->forgetValue(ICmp);
+ ICmp->replaceAllUsesWith(ConstantInt::getBool(ICmp->getContext(), *Ev));
+ DeadInsts.emplace_back(ICmp);
+ LLVM_DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n');
+ } else if (makeIVComparisonInvariant(ICmp, IVOperand)) {
+ // fallthrough to end of function
+ } else if (ICmpInst::isSigned(OriginalPred) &&
+ SE->isKnownNonNegative(S) && SE->isKnownNonNegative(X)) {
+ // If we were unable to make anything above, all we can is to canonicalize
+ // the comparison hoping that it will open the doors for other
+ // optimizations. If we find out that we compare two non-negative values,
+ // we turn the instruction's predicate to its unsigned version. Note that
+ // we cannot rely on Pred here unless we check if we have swapped it.
+ assert(ICmp->getPredicate() == OriginalPred && "Predicate changed?");
+ LLVM_DEBUG(dbgs() << "INDVARS: Turn to unsigned comparison: " << *ICmp
+ << '\n');
+ ICmp->setPredicate(ICmpInst::getUnsignedPredicate(OriginalPred));
+ } else
+ return;
+
+ ++NumElimCmp;
+ Changed = true;
+}
+
+bool SimplifyIndvar::eliminateSDiv(BinaryOperator *SDiv) {
+ // Get the SCEVs for the ICmp operands.
+ auto *N = SE->getSCEV(SDiv->getOperand(0));
+ auto *D = SE->getSCEV(SDiv->getOperand(1));
+
+ // Simplify unnecessary loops away.
+ const Loop *L = LI->getLoopFor(SDiv->getParent());
+ N = SE->getSCEVAtScope(N, L);
+ D = SE->getSCEVAtScope(D, L);
+
+ // Replace sdiv by udiv if both of the operands are non-negative
+ if (SE->isKnownNonNegative(N) && SE->isKnownNonNegative(D)) {
+ auto *UDiv = BinaryOperator::Create(
+ BinaryOperator::UDiv, SDiv->getOperand(0), SDiv->getOperand(1),
+ SDiv->getName() + ".udiv", SDiv);
+ UDiv->setIsExact(SDiv->isExact());
+ SDiv->replaceAllUsesWith(UDiv);
+ LLVM_DEBUG(dbgs() << "INDVARS: Simplified sdiv: " << *SDiv << '\n');
+ ++NumSimplifiedSDiv;
+ Changed = true;
+ DeadInsts.push_back(SDiv);
+ return true;
+ }
+
+ return false;
+}
+
+// i %s n -> i %u n if i >= 0 and n >= 0
+void SimplifyIndvar::replaceSRemWithURem(BinaryOperator *Rem) {
+ auto *N = Rem->getOperand(0), *D = Rem->getOperand(1);
+ auto *URem = BinaryOperator::Create(BinaryOperator::URem, N, D,
+ Rem->getName() + ".urem", Rem);
+ Rem->replaceAllUsesWith(URem);
+ LLVM_DEBUG(dbgs() << "INDVARS: Simplified srem: " << *Rem << '\n');
+ ++NumSimplifiedSRem;
+ Changed = true;
+ DeadInsts.emplace_back(Rem);
+}
+
+// i % n --> i if i is in [0,n).
+void SimplifyIndvar::replaceRemWithNumerator(BinaryOperator *Rem) {
+ Rem->replaceAllUsesWith(Rem->getOperand(0));
+ LLVM_DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n');
+ ++NumElimRem;
+ Changed = true;
+ DeadInsts.emplace_back(Rem);
+}
+
+// (i+1) % n --> (i+1)==n?0:(i+1) if i is in [0,n).
+void SimplifyIndvar::replaceRemWithNumeratorOrZero(BinaryOperator *Rem) {
+ auto *T = Rem->getType();
+ auto *N = Rem->getOperand(0), *D = Rem->getOperand(1);
+ ICmpInst *ICmp = new ICmpInst(Rem, ICmpInst::ICMP_EQ, N, D);
+ SelectInst *Sel =
+ SelectInst::Create(ICmp, ConstantInt::get(T, 0), N, "iv.rem", Rem);
+ Rem->replaceAllUsesWith(Sel);
+ LLVM_DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n');
+ ++NumElimRem;
+ Changed = true;
+ DeadInsts.emplace_back(Rem);
+}
+
+/// SimplifyIVUsers helper for eliminating useless remainder operations
+/// operating on an induction variable or replacing srem by urem.
+void SimplifyIndvar::simplifyIVRemainder(BinaryOperator *Rem,
+ Instruction *IVOperand,
+ bool IsSigned) {
+ auto *NValue = Rem->getOperand(0);
+ auto *DValue = Rem->getOperand(1);
+ // We're only interested in the case where we know something about
+ // the numerator, unless it is a srem, because we want to replace srem by urem
+ // in general.
+ bool UsedAsNumerator = IVOperand == NValue;
+ if (!UsedAsNumerator && !IsSigned)
+ return;
+
+ const SCEV *N = SE->getSCEV(NValue);
+
+ // Simplify unnecessary loops away.
+ const Loop *ICmpLoop = LI->getLoopFor(Rem->getParent());
+ N = SE->getSCEVAtScope(N, ICmpLoop);
+
+ bool IsNumeratorNonNegative = !IsSigned || SE->isKnownNonNegative(N);
+
+ // Do not proceed if the Numerator may be negative
+ if (!IsNumeratorNonNegative)
+ return;
+
+ const SCEV *D = SE->getSCEV(DValue);
+ D = SE->getSCEVAtScope(D, ICmpLoop);
+
+ if (UsedAsNumerator) {
+ auto LT = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
+ if (SE->isKnownPredicate(LT, N, D)) {
+ replaceRemWithNumerator(Rem);
+ return;
+ }
+
+ auto *T = Rem->getType();
+ const auto *NLessOne = SE->getMinusSCEV(N, SE->getOne(T));
+ if (SE->isKnownPredicate(LT, NLessOne, D)) {
+ replaceRemWithNumeratorOrZero(Rem);
+ return;
+ }
+ }
+
+ // Try to replace SRem with URem, if both N and D are known non-negative.
+ // Since we had already check N, we only need to check D now
+ if (!IsSigned || !SE->isKnownNonNegative(D))
+ return;
+
+ replaceSRemWithURem(Rem);
+}
+
+bool SimplifyIndvar::eliminateOverflowIntrinsic(WithOverflowInst *WO) {
+ const SCEV *LHS = SE->getSCEV(WO->getLHS());
+ const SCEV *RHS = SE->getSCEV(WO->getRHS());
+ if (!SE->willNotOverflow(WO->getBinaryOp(), WO->isSigned(), LHS, RHS))
+ return false;
+
+ // Proved no overflow, nuke the overflow check and, if possible, the overflow
+ // intrinsic as well.
+
+ BinaryOperator *NewResult = BinaryOperator::Create(
+ WO->getBinaryOp(), WO->getLHS(), WO->getRHS(), "", WO);
+
+ if (WO->isSigned())
+ NewResult->setHasNoSignedWrap(true);
+ else
+ NewResult->setHasNoUnsignedWrap(true);
+
+ SmallVector<ExtractValueInst *, 4> ToDelete;
+
+ for (auto *U : WO->users()) {
+ if (auto *EVI = dyn_cast<ExtractValueInst>(U)) {
+ if (EVI->getIndices()[0] == 1)
+ EVI->replaceAllUsesWith(ConstantInt::getFalse(WO->getContext()));
+ else {
+ assert(EVI->getIndices()[0] == 0 && "Only two possibilities!");
+ EVI->replaceAllUsesWith(NewResult);
+ }
+ ToDelete.push_back(EVI);
+ }
+ }
+
+ for (auto *EVI : ToDelete)
+ EVI->eraseFromParent();
+
+ if (WO->use_empty())
+ WO->eraseFromParent();
+
+ Changed = true;
+ return true;
+}
+
+bool SimplifyIndvar::eliminateSaturatingIntrinsic(SaturatingInst *SI) {
+ const SCEV *LHS = SE->getSCEV(SI->getLHS());
+ const SCEV *RHS = SE->getSCEV(SI->getRHS());
+ if (!SE->willNotOverflow(SI->getBinaryOp(), SI->isSigned(), LHS, RHS))
+ return false;
+
+ BinaryOperator *BO = BinaryOperator::Create(
+ SI->getBinaryOp(), SI->getLHS(), SI->getRHS(), SI->getName(), SI);
+ if (SI->isSigned())
+ BO->setHasNoSignedWrap();
+ else
+ BO->setHasNoUnsignedWrap();
+
+ SI->replaceAllUsesWith(BO);
+ DeadInsts.emplace_back(SI);
+ Changed = true;
+ return true;
+}
+
+bool SimplifyIndvar::eliminateTrunc(TruncInst *TI) {
+ // It is always legal to replace
+ // icmp <pred> i32 trunc(iv), n
+ // with
+ // icmp <pred> i64 sext(trunc(iv)), sext(n), if pred is signed predicate.
+ // Or with
+ // icmp <pred> i64 zext(trunc(iv)), zext(n), if pred is unsigned predicate.
+ // Or with either of these if pred is an equality predicate.
+ //
+ // If we can prove that iv == sext(trunc(iv)) or iv == zext(trunc(iv)) for
+ // every comparison which uses trunc, it means that we can replace each of
+ // them with comparison of iv against sext/zext(n). We no longer need trunc
+ // after that.
+ //
+ // TODO: Should we do this if we can widen *some* comparisons, but not all
+ // of them? Sometimes it is enough to enable other optimizations, but the
+ // trunc instruction will stay in the loop.
+ Value *IV = TI->getOperand(0);
+ Type *IVTy = IV->getType();
+ const SCEV *IVSCEV = SE->getSCEV(IV);
+ const SCEV *TISCEV = SE->getSCEV(TI);
+
+ // Check if iv == zext(trunc(iv)) and if iv == sext(trunc(iv)). If so, we can
+ // get rid of trunc
+ bool DoesSExtCollapse = false;
+ bool DoesZExtCollapse = false;
+ if (IVSCEV == SE->getSignExtendExpr(TISCEV, IVTy))
+ DoesSExtCollapse = true;
+ if (IVSCEV == SE->getZeroExtendExpr(TISCEV, IVTy))
+ DoesZExtCollapse = true;
+
+ // If neither sext nor zext does collapse, it is not profitable to do any
+ // transform. Bail.
+ if (!DoesSExtCollapse && !DoesZExtCollapse)
+ return false;
+
+ // Collect users of the trunc that look like comparisons against invariants.
+ // Bail if we find something different.
+ SmallVector<ICmpInst *, 4> ICmpUsers;
+ for (auto *U : TI->users()) {
+ // We don't care about users in unreachable blocks.
+ if (isa<Instruction>(U) &&
+ !DT->isReachableFromEntry(cast<Instruction>(U)->getParent()))
+ continue;
+ ICmpInst *ICI = dyn_cast<ICmpInst>(U);
+ if (!ICI) return false;
+ assert(L->contains(ICI->getParent()) && "LCSSA form broken?");
+ if (!(ICI->getOperand(0) == TI && L->isLoopInvariant(ICI->getOperand(1))) &&
+ !(ICI->getOperand(1) == TI && L->isLoopInvariant(ICI->getOperand(0))))
+ return false;
+ // If we cannot get rid of trunc, bail.
+ if (ICI->isSigned() && !DoesSExtCollapse)
+ return false;
+ if (ICI->isUnsigned() && !DoesZExtCollapse)
+ return false;
+ // For equality, either signed or unsigned works.
+ ICmpUsers.push_back(ICI);
+ }
+
+ auto CanUseZExt = [&](ICmpInst *ICI) {
+ // Unsigned comparison can be widened as unsigned.
+ if (ICI->isUnsigned())
+ return true;
+ // Is it profitable to do zext?
+ if (!DoesZExtCollapse)
+ return false;
+ // For equality, we can safely zext both parts.
+ if (ICI->isEquality())
+ return true;
+ // Otherwise we can only use zext when comparing two non-negative or two
+ // negative values. But in practice, we will never pass DoesZExtCollapse
+ // check for a negative value, because zext(trunc(x)) is non-negative. So
+ // it only make sense to check for non-negativity here.
+ const SCEV *SCEVOP1 = SE->getSCEV(ICI->getOperand(0));
+ const SCEV *SCEVOP2 = SE->getSCEV(ICI->getOperand(1));
+ return SE->isKnownNonNegative(SCEVOP1) && SE->isKnownNonNegative(SCEVOP2);
+ };
+ // Replace all comparisons against trunc with comparisons against IV.
+ for (auto *ICI : ICmpUsers) {
+ bool IsSwapped = L->isLoopInvariant(ICI->getOperand(0));
+ auto *Op1 = IsSwapped ? ICI->getOperand(0) : ICI->getOperand(1);
+ Instruction *Ext = nullptr;
+ // For signed/unsigned predicate, replace the old comparison with comparison
+ // of immediate IV against sext/zext of the invariant argument. If we can
+ // use either sext or zext (i.e. we are dealing with equality predicate),
+ // then prefer zext as a more canonical form.
+ // TODO: If we see a signed comparison which can be turned into unsigned,
+ // we can do it here for canonicalization purposes.
+ ICmpInst::Predicate Pred = ICI->getPredicate();
+ if (IsSwapped) Pred = ICmpInst::getSwappedPredicate(Pred);
+ if (CanUseZExt(ICI)) {
+ assert(DoesZExtCollapse && "Unprofitable zext?");
+ Ext = new ZExtInst(Op1, IVTy, "zext", ICI);
+ Pred = ICmpInst::getUnsignedPredicate(Pred);
+ } else {
+ assert(DoesSExtCollapse && "Unprofitable sext?");
+ Ext = new SExtInst(Op1, IVTy, "sext", ICI);
+ assert(Pred == ICmpInst::getSignedPredicate(Pred) && "Must be signed!");
+ }
+ bool Changed;
+ L->makeLoopInvariant(Ext, Changed);
+ (void)Changed;
+ ICmpInst *NewICI = new ICmpInst(ICI, Pred, IV, Ext);
+ ICI->replaceAllUsesWith(NewICI);
+ DeadInsts.emplace_back(ICI);
+ }
+
+ // Trunc no longer needed.
+ TI->replaceAllUsesWith(PoisonValue::get(TI->getType()));
+ DeadInsts.emplace_back(TI);
+ return true;
+}
+
+/// Eliminate an operation that consumes a simple IV and has no observable
+/// side-effect given the range of IV values. IVOperand is guaranteed SCEVable,
+/// but UseInst may not be.
+bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst,
+ Instruction *IVOperand) {
+ if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
+ eliminateIVComparison(ICmp, IVOperand);
+ return true;
+ }
+ if (BinaryOperator *Bin = dyn_cast<BinaryOperator>(UseInst)) {
+ bool IsSRem = Bin->getOpcode() == Instruction::SRem;
+ if (IsSRem || Bin->getOpcode() == Instruction::URem) {
+ simplifyIVRemainder(Bin, IVOperand, IsSRem);
+ return true;
+ }
+
+ if (Bin->getOpcode() == Instruction::SDiv)
+ return eliminateSDiv(Bin);
+ }
+
+ if (auto *WO = dyn_cast<WithOverflowInst>(UseInst))
+ if (eliminateOverflowIntrinsic(WO))
+ return true;
+
+ if (auto *SI = dyn_cast<SaturatingInst>(UseInst))
+ if (eliminateSaturatingIntrinsic(SI))
+ return true;
+
+ if (auto *TI = dyn_cast<TruncInst>(UseInst))
+ if (eliminateTrunc(TI))
+ return true;
+
+ if (eliminateIdentitySCEV(UseInst, IVOperand))
+ return true;
+
+ return false;
+}
+
+static Instruction *GetLoopInvariantInsertPosition(Loop *L, Instruction *Hint) {
+ if (auto *BB = L->getLoopPreheader())
+ return BB->getTerminator();
+
+ return Hint;
+}
+
+/// Replace the UseInst with a loop invariant expression if it is safe.
+bool SimplifyIndvar::replaceIVUserWithLoopInvariant(Instruction *I) {
+ if (!SE->isSCEVable(I->getType()))
+ return false;
+
+ // Get the symbolic expression for this instruction.
+ const SCEV *S = SE->getSCEV(I);
+
+ if (!SE->isLoopInvariant(S, L))
+ return false;
+
+ // Do not generate something ridiculous even if S is loop invariant.
+ if (Rewriter.isHighCostExpansion(S, L, SCEVCheapExpansionBudget, TTI, I))
+ return false;
+
+ auto *IP = GetLoopInvariantInsertPosition(L, I);
+
+ if (!Rewriter.isSafeToExpandAt(S, IP)) {
+ LLVM_DEBUG(dbgs() << "INDVARS: Can not replace IV user: " << *I
+ << " with non-speculable loop invariant: " << *S << '\n');
+ return false;
+ }
+
+ auto *Invariant = Rewriter.expandCodeFor(S, I->getType(), IP);
+
+ I->replaceAllUsesWith(Invariant);
+ LLVM_DEBUG(dbgs() << "INDVARS: Replace IV user: " << *I
+ << " with loop invariant: " << *S << '\n');
+ ++NumFoldedUser;
+ Changed = true;
+ DeadInsts.emplace_back(I);
+ return true;
+}
+
+/// Eliminate redundant type cast between integer and float.
+bool SimplifyIndvar::replaceFloatIVWithIntegerIV(Instruction *UseInst) {
+ if (UseInst->getOpcode() != CastInst::SIToFP &&
+ UseInst->getOpcode() != CastInst::UIToFP)
+ return false;
+
+ Instruction *IVOperand = cast<Instruction>(UseInst->getOperand(0));
+ // Get the symbolic expression for this instruction.
+ const SCEV *IV = SE->getSCEV(IVOperand);
+ unsigned MaskBits;
+ if (UseInst->getOpcode() == CastInst::SIToFP)
+ MaskBits = SE->getSignedRange(IV).getMinSignedBits();
+ else
+ MaskBits = SE->getUnsignedRange(IV).getActiveBits();
+ unsigned DestNumSigBits = UseInst->getType()->getFPMantissaWidth();
+ if (MaskBits <= DestNumSigBits) {
+ for (User *U : UseInst->users()) {
+ // Match for fptosi/fptoui of sitofp and with same type.
+ auto *CI = dyn_cast<CastInst>(U);
+ if (!CI)
+ continue;
+
+ CastInst::CastOps Opcode = CI->getOpcode();
+ if (Opcode != CastInst::FPToSI && Opcode != CastInst::FPToUI)
+ continue;
+
+ Value *Conv = nullptr;
+ if (IVOperand->getType() != CI->getType()) {
+ IRBuilder<> Builder(CI);
+ StringRef Name = IVOperand->getName();
+ // To match InstCombine logic, we only need sext if both fptosi and
+ // sitofp are used. If one of them is unsigned, then we can use zext.
+ if (SE->getTypeSizeInBits(IVOperand->getType()) >
+ SE->getTypeSizeInBits(CI->getType())) {
+ Conv = Builder.CreateTrunc(IVOperand, CI->getType(), Name + ".trunc");
+ } else if (Opcode == CastInst::FPToUI ||
+ UseInst->getOpcode() == CastInst::UIToFP) {
+ Conv = Builder.CreateZExt(IVOperand, CI->getType(), Name + ".zext");
+ } else {
+ Conv = Builder.CreateSExt(IVOperand, CI->getType(), Name + ".sext");
+ }
+ } else
+ Conv = IVOperand;
+
+ CI->replaceAllUsesWith(Conv);
+ DeadInsts.push_back(CI);
+ LLVM_DEBUG(dbgs() << "INDVARS: Replace IV user: " << *CI
+ << " with: " << *Conv << '\n');
+
+ ++NumFoldedUser;
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
+
+/// Eliminate any operation that SCEV can prove is an identity function.
+bool SimplifyIndvar::eliminateIdentitySCEV(Instruction *UseInst,
+ Instruction *IVOperand) {
+ if (!SE->isSCEVable(UseInst->getType()) ||
+ (UseInst->getType() != IVOperand->getType()) ||
+ (SE->getSCEV(UseInst) != SE->getSCEV(IVOperand)))
+ return false;
+
+ // getSCEV(X) == getSCEV(Y) does not guarantee that X and Y are related in the
+ // dominator tree, even if X is an operand to Y. For instance, in
+ //
+ // %iv = phi i32 {0,+,1}
+ // br %cond, label %left, label %merge
+ //
+ // left:
+ // %X = add i32 %iv, 0
+ // br label %merge
+ //
+ // merge:
+ // %M = phi (%X, %iv)
+ //
+ // getSCEV(%M) == getSCEV(%X) == {0,+,1}, but %X does not dominate %M, and
+ // %M.replaceAllUsesWith(%X) would be incorrect.
+
+ if (isa<PHINode>(UseInst))
+ // If UseInst is not a PHI node then we know that IVOperand dominates
+ // UseInst directly from the legality of SSA.
+ if (!DT || !DT->dominates(IVOperand, UseInst))
+ return false;
+
+ if (!LI->replacementPreservesLCSSAForm(UseInst, IVOperand))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "INDVARS: Eliminated identity: " << *UseInst << '\n');
+
+ SE->forgetValue(UseInst);
+ UseInst->replaceAllUsesWith(IVOperand);
+ ++NumElimIdentity;
+ Changed = true;
+ DeadInsts.emplace_back(UseInst);
+ return true;
+}
+
+/// Annotate BO with nsw / nuw if it provably does not signed-overflow /
+/// unsigned-overflow. Returns true if anything changed, false otherwise.
+bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO,
+ Instruction *IVOperand) {
+ auto Flags = SE->getStrengthenedNoWrapFlagsFromBinOp(
+ cast<OverflowingBinaryOperator>(BO));
+
+ if (!Flags)
+ return false;
+
+ BO->setHasNoUnsignedWrap(ScalarEvolution::maskFlags(*Flags, SCEV::FlagNUW) ==
+ SCEV::FlagNUW);
+ BO->setHasNoSignedWrap(ScalarEvolution::maskFlags(*Flags, SCEV::FlagNSW) ==
+ SCEV::FlagNSW);
+
+ // The getStrengthenedNoWrapFlagsFromBinOp() check inferred additional nowrap
+ // flags on addrecs while performing zero/sign extensions. We could call
+ // forgetValue() here to make sure those flags also propagate to any other
+ // SCEV expressions based on the addrec. However, this can have pathological
+ // compile-time impact, see https://bugs.llvm.org/show_bug.cgi?id=50384.
+ return true;
+}
+
+/// Annotate the Shr in (X << IVOperand) >> C as exact using the
+/// information from the IV's range. Returns true if anything changed, false
+/// otherwise.
+bool SimplifyIndvar::strengthenRightShift(BinaryOperator *BO,
+ Instruction *IVOperand) {
+ using namespace llvm::PatternMatch;
+
+ if (BO->getOpcode() == Instruction::Shl) {
+ bool Changed = false;
+ ConstantRange IVRange = SE->getUnsignedRange(SE->getSCEV(IVOperand));
+ for (auto *U : BO->users()) {
+ const APInt *C;
+ if (match(U,
+ m_AShr(m_Shl(m_Value(), m_Specific(IVOperand)), m_APInt(C))) ||
+ match(U,
+ m_LShr(m_Shl(m_Value(), m_Specific(IVOperand)), m_APInt(C)))) {
+ BinaryOperator *Shr = cast<BinaryOperator>(U);
+ if (!Shr->isExact() && IVRange.getUnsignedMin().uge(*C)) {
+ Shr->setIsExact(true);
+ Changed = true;
+ }
+ }
+ }
+ return Changed;
+ }
+
+ return false;
+}
+
+/// Add all uses of Def to the current IV's worklist.
+static void pushIVUsers(
+ Instruction *Def, Loop *L,
+ SmallPtrSet<Instruction*,16> &Simplified,
+ SmallVectorImpl< std::pair<Instruction*,Instruction*> > &SimpleIVUsers) {
+
+ for (User *U : Def->users()) {
+ Instruction *UI = cast<Instruction>(U);
+
+ // Avoid infinite or exponential worklist processing.
+ // Also ensure unique worklist users.
+ // If Def is a LoopPhi, it may not be in the Simplified set, so check for
+ // self edges first.
+ if (UI == Def)
+ continue;
+
+ // Only change the current Loop, do not change the other parts (e.g. other
+ // Loops).
+ if (!L->contains(UI))
+ continue;
+
+ // Do not push the same instruction more than once.
+ if (!Simplified.insert(UI).second)
+ continue;
+
+ SimpleIVUsers.push_back(std::make_pair(UI, Def));
+ }
+}
+
+/// Return true if this instruction generates a simple SCEV
+/// expression in terms of that IV.
+///
+/// This is similar to IVUsers' isInteresting() but processes each instruction
+/// non-recursively when the operand is already known to be a simpleIVUser.
+///
+static bool isSimpleIVUser(Instruction *I, const Loop *L, ScalarEvolution *SE) {
+ if (!SE->isSCEVable(I->getType()))
+ return false;
+
+ // Get the symbolic expression for this instruction.
+ const SCEV *S = SE->getSCEV(I);
+
+ // Only consider affine recurrences.
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S);
+ if (AR && AR->getLoop() == L)
+ return true;
+
+ return false;
+}
+
+/// Iteratively perform simplification on a worklist of users
+/// of the specified induction variable. Each successive simplification may push
+/// more users which may themselves be candidates for simplification.
+///
+/// This algorithm does not require IVUsers analysis. Instead, it simplifies
+/// instructions in-place during analysis. Rather than rewriting induction
+/// variables bottom-up from their users, it transforms a chain of IVUsers
+/// top-down, updating the IR only when it encounters a clear optimization
+/// opportunity.
+///
+/// Once DisableIVRewrite is default, LSR will be the only client of IVUsers.
+///
+void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) {
+ if (!SE->isSCEVable(CurrIV->getType()))
+ return;
+
+ // Instructions processed by SimplifyIndvar for CurrIV.
+ SmallPtrSet<Instruction*,16> Simplified;
+
+ // Use-def pairs if IV users waiting to be processed for CurrIV.
+ SmallVector<std::pair<Instruction*, Instruction*>, 8> SimpleIVUsers;
+
+ // Push users of the current LoopPhi. In rare cases, pushIVUsers may be
+ // called multiple times for the same LoopPhi. This is the proper thing to
+ // do for loop header phis that use each other.
+ pushIVUsers(CurrIV, L, Simplified, SimpleIVUsers);
+
+ while (!SimpleIVUsers.empty()) {
+ std::pair<Instruction*, Instruction*> UseOper =
+ SimpleIVUsers.pop_back_val();
+ Instruction *UseInst = UseOper.first;
+
+ // If a user of the IndVar is trivially dead, we prefer just to mark it dead
+ // rather than try to do some complex analysis or transformation (such as
+ // widening) basing on it.
+ // TODO: Propagate TLI and pass it here to handle more cases.
+ if (isInstructionTriviallyDead(UseInst, /* TLI */ nullptr)) {
+ DeadInsts.emplace_back(UseInst);
+ continue;
+ }
+
+ // Bypass back edges to avoid extra work.
+ if (UseInst == CurrIV) continue;
+
+ // Try to replace UseInst with a loop invariant before any other
+ // simplifications.
+ if (replaceIVUserWithLoopInvariant(UseInst))
+ continue;
+
+ Instruction *IVOperand = UseOper.second;
+ for (unsigned N = 0; IVOperand; ++N) {
+ assert(N <= Simplified.size() && "runaway iteration");
+ (void) N;
+
+ Value *NewOper = foldIVUser(UseInst, IVOperand);
+ if (!NewOper)
+ break; // done folding
+ IVOperand = dyn_cast<Instruction>(NewOper);
+ }
+ if (!IVOperand)
+ continue;
+
+ if (eliminateIVUser(UseInst, IVOperand)) {
+ pushIVUsers(IVOperand, L, Simplified, SimpleIVUsers);
+ continue;
+ }
+
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(UseInst)) {
+ if ((isa<OverflowingBinaryOperator>(BO) &&
+ strengthenOverflowingOperation(BO, IVOperand)) ||
+ (isa<ShlOperator>(BO) && strengthenRightShift(BO, IVOperand))) {
+ // re-queue uses of the now modified binary operator and fall
+ // through to the checks that remain.
+ pushIVUsers(IVOperand, L, Simplified, SimpleIVUsers);
+ }
+ }
+
+ // Try to use integer induction for FPToSI of float induction directly.
+ if (replaceFloatIVWithIntegerIV(UseInst)) {
+ // Re-queue the potentially new direct uses of IVOperand.
+ pushIVUsers(IVOperand, L, Simplified, SimpleIVUsers);
+ continue;
+ }
+
+ CastInst *Cast = dyn_cast<CastInst>(UseInst);
+ if (V && Cast) {
+ V->visitCast(Cast);
+ continue;
+ }
+ if (isSimpleIVUser(UseInst, L, SE)) {
+ pushIVUsers(UseInst, L, Simplified, SimpleIVUsers);
+ }
+ }
+}
+
+namespace llvm {
+
+void IVVisitor::anchor() { }
+
+/// Simplify instructions that use this induction variable
+/// by using ScalarEvolution to analyze the IV's recurrence.
+bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT,
+ LoopInfo *LI, const TargetTransformInfo *TTI,
+ SmallVectorImpl<WeakTrackingVH> &Dead,
+ SCEVExpander &Rewriter, IVVisitor *V) {
+ SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, DT, LI, TTI,
+ Rewriter, Dead);
+ SIV.simplifyUsers(CurrIV, V);
+ return SIV.hasChanged();
+}
+
+/// Simplify users of induction variables within this
+/// loop. This does not actually change or add IVs.
+bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, DominatorTree *DT,
+ LoopInfo *LI, const TargetTransformInfo *TTI,
+ SmallVectorImpl<WeakTrackingVH> &Dead) {
+ SCEVExpander Rewriter(*SE, SE->getDataLayout(), "indvars");
+#ifndef NDEBUG
+ Rewriter.setDebugType(DEBUG_TYPE);
+#endif
+ bool Changed = false;
+ for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
+ Changed |=
+ simplifyUsersOfIV(cast<PHINode>(I), SE, DT, LI, TTI, Dead, Rewriter);
+ }
+ return Changed;
+}
+
+} // namespace llvm
+
+namespace {
+//===----------------------------------------------------------------------===//
+// Widen Induction Variables - Extend the width of an IV to cover its
+// widest uses.
+//===----------------------------------------------------------------------===//
+
+class WidenIV {
+ // Parameters
+ PHINode *OrigPhi;
+ Type *WideType;
+
+ // Context
+ LoopInfo *LI;
+ Loop *L;
+ ScalarEvolution *SE;
+ DominatorTree *DT;
+
+ // Does the module have any calls to the llvm.experimental.guard intrinsic
+ // at all? If not we can avoid scanning instructions looking for guards.
+ bool HasGuards;
+
+ bool UsePostIncrementRanges;
+
+ // Statistics
+ unsigned NumElimExt = 0;
+ unsigned NumWidened = 0;
+
+ // Result
+ PHINode *WidePhi = nullptr;
+ Instruction *WideInc = nullptr;
+ const SCEV *WideIncExpr = nullptr;
+ SmallVectorImpl<WeakTrackingVH> &DeadInsts;
+
+ SmallPtrSet<Instruction *,16> Widened;
+
+ enum class ExtendKind { Zero, Sign, Unknown };
+
+ // A map tracking the kind of extension used to widen each narrow IV
+ // and narrow IV user.
+ // Key: pointer to a narrow IV or IV user.
+ // Value: the kind of extension used to widen this Instruction.
+ DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap;
+
+ using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>;
+
+ // A map with control-dependent ranges for post increment IV uses. The key is
+ // a pair of IV def and a use of this def denoting the context. The value is
+ // a ConstantRange representing possible values of the def at the given
+ // context.
+ DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos;
+
+ std::optional<ConstantRange> getPostIncRangeInfo(Value *Def,
+ Instruction *UseI) {
+ DefUserPair Key(Def, UseI);
+ auto It = PostIncRangeInfos.find(Key);
+ return It == PostIncRangeInfos.end()
+ ? std::optional<ConstantRange>(std::nullopt)
+ : std::optional<ConstantRange>(It->second);
+ }
+
+ void calculatePostIncRanges(PHINode *OrigPhi);
+ void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser);
+
+ void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) {
+ DefUserPair Key(Def, UseI);
+ auto It = PostIncRangeInfos.find(Key);
+ if (It == PostIncRangeInfos.end())
+ PostIncRangeInfos.insert({Key, R});
+ else
+ It->second = R.intersectWith(It->second);
+ }
+
+public:
+ /// Record a link in the Narrow IV def-use chain along with the WideIV that
+ /// computes the same value as the Narrow IV def. This avoids caching Use*
+ /// pointers.
+ struct NarrowIVDefUse {
+ Instruction *NarrowDef = nullptr;
+ Instruction *NarrowUse = nullptr;
+ Instruction *WideDef = nullptr;
+
+ // True if the narrow def is never negative. Tracking this information lets
+ // us use a sign extension instead of a zero extension or vice versa, when
+ // profitable and legal.
+ bool NeverNegative = false;
+
+ NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD,
+ bool NeverNegative)
+ : NarrowDef(ND), NarrowUse(NU), WideDef(WD),
+ NeverNegative(NeverNegative) {}
+ };
+
+ WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv,
+ DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI,
+ bool HasGuards, bool UsePostIncrementRanges = true);
+
+ PHINode *createWideIV(SCEVExpander &Rewriter);
+
+ unsigned getNumElimExt() { return NumElimExt; };
+ unsigned getNumWidened() { return NumWidened; };
+
+protected:
+ Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned,
+ Instruction *Use);
+
+ Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR);
+ Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU,
+ const SCEVAddRecExpr *WideAR);
+ Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU);
+
+ ExtendKind getExtendKind(Instruction *I);
+
+ using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>;
+
+ WidenedRecTy getWideRecurrence(NarrowIVDefUse DU);
+
+ WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU);
+
+ const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
+ unsigned OpCode) const;
+
+ Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
+
+ bool widenLoopCompare(NarrowIVDefUse DU);
+ bool widenWithVariantUse(NarrowIVDefUse DU);
+
+ void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
+
+private:
+ SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
+};
+} // namespace
+
+/// Determine the insertion point for this user. By default, insert immediately
+/// before the user. SCEVExpander or LICM will hoist loop invariants out of the
+/// loop. For PHI nodes, there may be multiple uses, so compute the nearest
+/// common dominator for the incoming blocks. A nullptr can be returned if no
+/// viable location is found: it may happen if User is a PHI and Def only comes
+/// to this PHI from unreachable blocks.
+static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
+ DominatorTree *DT, LoopInfo *LI) {
+ PHINode *PHI = dyn_cast<PHINode>(User);
+ if (!PHI)
+ return User;
+
+ Instruction *InsertPt = nullptr;
+ for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
+ if (PHI->getIncomingValue(i) != Def)
+ continue;
+
+ BasicBlock *InsertBB = PHI->getIncomingBlock(i);
+
+ if (!DT->isReachableFromEntry(InsertBB))
+ continue;
+
+ if (!InsertPt) {
+ InsertPt = InsertBB->getTerminator();
+ continue;
+ }
+ InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
+ InsertPt = InsertBB->getTerminator();
+ }
+
+ // If we have skipped all inputs, it means that Def only comes to Phi from
+ // unreachable blocks.
+ if (!InsertPt)
+ return nullptr;
+
+ auto *DefI = dyn_cast<Instruction>(Def);
+ if (!DefI)
+ return InsertPt;
+
+ assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses");
+
+ auto *L = LI->getLoopFor(DefI->getParent());
+ assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent())));
+
+ for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom())
+ if (LI->getLoopFor(DTN->getBlock()) == L)
+ return DTN->getBlock()->getTerminator();
+
+ llvm_unreachable("DefI dominates InsertPt!");
+}
+
+WidenIV::WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv,
+ DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI,
+ bool HasGuards, bool UsePostIncrementRanges)
+ : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo),
+ L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree),
+ HasGuards(HasGuards), UsePostIncrementRanges(UsePostIncrementRanges),
+ DeadInsts(DI) {
+ assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
+ ExtendKindMap[OrigPhi] = WI.IsSigned ? ExtendKind::Sign : ExtendKind::Zero;
+}
+
+Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType,
+ bool IsSigned, Instruction *Use) {
+ // Set the debug location and conservative insertion point.
+ IRBuilder<> Builder(Use);
+ // Hoist the insertion point into loop preheaders as far as possible.
+ for (const Loop *L = LI->getLoopFor(Use->getParent());
+ L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper);
+ L = L->getParentLoop())
+ Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
+
+ return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
+ Builder.CreateZExt(NarrowOper, WideType);
+}
+
+/// Instantiate a wide operation to replace a narrow operation. This only needs
+/// to handle operations that can evaluation to SCEVAddRec. It can safely return
+/// 0 for any operation we decide not to clone.
+Instruction *WidenIV::cloneIVUser(WidenIV::NarrowIVDefUse DU,
+ const SCEVAddRecExpr *WideAR) {
+ unsigned Opcode = DU.NarrowUse->getOpcode();
+ switch (Opcode) {
+ default:
+ return nullptr;
+ case Instruction::Add:
+ case Instruction::Mul:
+ case Instruction::UDiv:
+ case Instruction::Sub:
+ return cloneArithmeticIVUser(DU, WideAR);
+
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ return cloneBitwiseIVUser(DU);
+ }
+}
+
+Instruction *WidenIV::cloneBitwiseIVUser(WidenIV::NarrowIVDefUse DU) {
+ Instruction *NarrowUse = DU.NarrowUse;
+ Instruction *NarrowDef = DU.NarrowDef;
+ Instruction *WideDef = DU.WideDef;
+
+ LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n");
+
+ // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything
+ // about the narrow operand yet so must insert a [sz]ext. It is probably loop
+ // invariant and will be folded or hoisted. If it actually comes from a
+ // widened IV, it should be removed during a future call to widenIVUse.
+ bool IsSigned = getExtendKind(NarrowDef) == ExtendKind::Sign;
+ Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
+ ? WideDef
+ : createExtendInst(NarrowUse->getOperand(0), WideType,
+ IsSigned, NarrowUse);
+ Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
+ ? WideDef
+ : createExtendInst(NarrowUse->getOperand(1), WideType,
+ IsSigned, NarrowUse);
+
+ auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
+ auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
+ NarrowBO->getName());
+ IRBuilder<> Builder(NarrowUse);
+ Builder.Insert(WideBO);
+ WideBO->copyIRFlags(NarrowBO);
+ return WideBO;
+}
+
+Instruction *WidenIV::cloneArithmeticIVUser(WidenIV::NarrowIVDefUse DU,
+ const SCEVAddRecExpr *WideAR) {
+ Instruction *NarrowUse = DU.NarrowUse;
+ Instruction *NarrowDef = DU.NarrowDef;
+ Instruction *WideDef = DU.WideDef;
+
+ LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
+
+ unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1;
+
+ // We're trying to find X such that
+ //
+ // Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X
+ //
+ // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef),
+ // and check using SCEV if any of them are correct.
+
+ // Returns true if extending NonIVNarrowDef according to `SignExt` is a
+ // correct solution to X.
+ auto GuessNonIVOperand = [&](bool SignExt) {
+ const SCEV *WideLHS;
+ const SCEV *WideRHS;
+
+ auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) {
+ if (SignExt)
+ return SE->getSignExtendExpr(S, Ty);
+ return SE->getZeroExtendExpr(S, Ty);
+ };
+
+ if (IVOpIdx == 0) {
+ WideLHS = SE->getSCEV(WideDef);
+ const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1));
+ WideRHS = GetExtend(NarrowRHS, WideType);
+ } else {
+ const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0));
+ WideLHS = GetExtend(NarrowLHS, WideType);
+ WideRHS = SE->getSCEV(WideDef);
+ }
+
+ // WideUse is "WideDef `op.wide` X" as described in the comment.
+ const SCEV *WideUse =
+ getSCEVByOpCode(WideLHS, WideRHS, NarrowUse->getOpcode());
+
+ return WideUse == WideAR;
+ };
+
+ bool SignExtend = getExtendKind(NarrowDef) == ExtendKind::Sign;
+ if (!GuessNonIVOperand(SignExtend)) {
+ SignExtend = !SignExtend;
+ if (!GuessNonIVOperand(SignExtend))
+ return nullptr;
+ }
+
+ Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
+ ? WideDef
+ : createExtendInst(NarrowUse->getOperand(0), WideType,
+ SignExtend, NarrowUse);
+ Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
+ ? WideDef
+ : createExtendInst(NarrowUse->getOperand(1), WideType,
+ SignExtend, NarrowUse);
+
+ auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
+ auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
+ NarrowBO->getName());
+
+ IRBuilder<> Builder(NarrowUse);
+ Builder.Insert(WideBO);
+ WideBO->copyIRFlags(NarrowBO);
+ return WideBO;
+}
+
+WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) {
+ auto It = ExtendKindMap.find(I);
+ assert(It != ExtendKindMap.end() && "Instruction not yet extended!");
+ return It->second;
+}
+
+const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
+ unsigned OpCode) const {
+ switch (OpCode) {
+ case Instruction::Add:
+ return SE->getAddExpr(LHS, RHS);
+ case Instruction::Sub:
+ return SE->getMinusSCEV(LHS, RHS);
+ case Instruction::Mul:
+ return SE->getMulExpr(LHS, RHS);
+ case Instruction::UDiv:
+ return SE->getUDivExpr(LHS, RHS);
+ default:
+ llvm_unreachable("Unsupported opcode.");
+ };
+}
+
+/// No-wrap operations can transfer sign extension of their result to their
+/// operands. Generate the SCEV value for the widened operation without
+/// actually modifying the IR yet. If the expression after extending the
+/// operands is an AddRec for this loop, return the AddRec and the kind of
+/// extension used.
+WidenIV::WidenedRecTy
+WidenIV::getExtendedOperandRecurrence(WidenIV::NarrowIVDefUse DU) {
+ // Handle the common case of add<nsw/nuw>
+ const unsigned OpCode = DU.NarrowUse->getOpcode();
+ // Only Add/Sub/Mul instructions supported yet.
+ if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
+ OpCode != Instruction::Mul)
+ return {nullptr, ExtendKind::Unknown};
+
+ // One operand (NarrowDef) has already been extended to WideDef. Now determine
+ // if extending the other will lead to a recurrence.
+ const unsigned ExtendOperIdx =
+ DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
+ assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
+
+ const SCEV *ExtendOperExpr = nullptr;
+ const OverflowingBinaryOperator *OBO =
+ cast<OverflowingBinaryOperator>(DU.NarrowUse);
+ ExtendKind ExtKind = getExtendKind(DU.NarrowDef);
+ if (ExtKind == ExtendKind::Sign && OBO->hasNoSignedWrap())
+ ExtendOperExpr = SE->getSignExtendExpr(
+ SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
+ else if (ExtKind == ExtendKind::Zero && OBO->hasNoUnsignedWrap())
+ ExtendOperExpr = SE->getZeroExtendExpr(
+ SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
+ else
+ return {nullptr, ExtendKind::Unknown};
+
+ // When creating this SCEV expr, don't apply the current operations NSW or NUW
+ // flags. This instruction may be guarded by control flow that the no-wrap
+ // behavior depends on. Non-control-equivalent instructions can be mapped to
+ // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
+ // semantics to those operations.
+ const SCEV *lhs = SE->getSCEV(DU.WideDef);
+ const SCEV *rhs = ExtendOperExpr;
+
+ // Let's swap operands to the initial order for the case of non-commutative
+ // operations, like SUB. See PR21014.
+ if (ExtendOperIdx == 0)
+ std::swap(lhs, rhs);
+ const SCEVAddRecExpr *AddRec =
+ dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode));
+
+ if (!AddRec || AddRec->getLoop() != L)
+ return {nullptr, ExtendKind::Unknown};
+
+ return {AddRec, ExtKind};
+}
+
+/// Is this instruction potentially interesting for further simplification after
+/// widening it's type? In other words, can the extend be safely hoisted out of
+/// the loop with SCEV reducing the value to a recurrence on the same loop. If
+/// so, return the extended recurrence and the kind of extension used. Otherwise
+/// return {nullptr, ExtendKind::Unknown}.
+WidenIV::WidenedRecTy WidenIV::getWideRecurrence(WidenIV::NarrowIVDefUse DU) {
+ if (!DU.NarrowUse->getType()->isIntegerTy())
+ return {nullptr, ExtendKind::Unknown};
+
+ const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse);
+ if (SE->getTypeSizeInBits(NarrowExpr->getType()) >=
+ SE->getTypeSizeInBits(WideType)) {
+ // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
+ // index. So don't follow this use.
+ return {nullptr, ExtendKind::Unknown};
+ }
+
+ const SCEV *WideExpr;
+ ExtendKind ExtKind;
+ if (DU.NeverNegative) {
+ WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
+ if (isa<SCEVAddRecExpr>(WideExpr))
+ ExtKind = ExtendKind::Sign;
+ else {
+ WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
+ ExtKind = ExtendKind::Zero;
+ }
+ } else if (getExtendKind(DU.NarrowDef) == ExtendKind::Sign) {
+ WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
+ ExtKind = ExtendKind::Sign;
+ } else {
+ WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
+ ExtKind = ExtendKind::Zero;
+ }
+ const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
+ if (!AddRec || AddRec->getLoop() != L)
+ return {nullptr, ExtendKind::Unknown};
+ return {AddRec, ExtKind};
+}
+
+/// This IV user cannot be widened. Replace this use of the original narrow IV
+/// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
+static void truncateIVUse(WidenIV::NarrowIVDefUse DU, DominatorTree *DT,
+ LoopInfo *LI) {
+ auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
+ if (!InsertPt)
+ return;
+ LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user "
+ << *DU.NarrowUse << "\n");
+ IRBuilder<> Builder(InsertPt);
+ Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
+ DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
+}
+
+/// If the narrow use is a compare instruction, then widen the compare
+// (and possibly the other operand). The extend operation is hoisted into the
+// loop preheader as far as possible.
+bool WidenIV::widenLoopCompare(WidenIV::NarrowIVDefUse DU) {
+ ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
+ if (!Cmp)
+ return false;
+
+ // We can legally widen the comparison in the following two cases:
+ //
+ // - The signedness of the IV extension and comparison match
+ //
+ // - The narrow IV is always positive (and thus its sign extension is equal
+ // to its zero extension). For instance, let's say we're zero extending
+ // %narrow for the following use
+ //
+ // icmp slt i32 %narrow, %val ... (A)
+ //
+ // and %narrow is always positive. Then
+ //
+ // (A) == icmp slt i32 sext(%narrow), sext(%val)
+ // == icmp slt i32 zext(%narrow), sext(%val)
+ bool IsSigned = getExtendKind(DU.NarrowDef) == ExtendKind::Sign;
+ if (!(DU.NeverNegative || IsSigned == Cmp->isSigned()))
+ return false;
+
+ Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
+ unsigned CastWidth = SE->getTypeSizeInBits(Op->getType());
+ unsigned IVWidth = SE->getTypeSizeInBits(WideType);
+ assert(CastWidth <= IVWidth && "Unexpected width while widening compare.");
+
+ // Widen the compare instruction.
+ auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
+ if (!InsertPt)
+ return false;
+ IRBuilder<> Builder(InsertPt);
+ DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
+
+ // Widen the other operand of the compare, if necessary.
+ if (CastWidth < IVWidth) {
+ Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp);
+ DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
+ }
+ return true;
+}
+
+// The widenIVUse avoids generating trunc by evaluating the use as AddRec, this
+// will not work when:
+// 1) SCEV traces back to an instruction inside the loop that SCEV can not
+// expand, eg. add %indvar, (load %addr)
+// 2) SCEV finds a loop variant, eg. add %indvar, %loopvariant
+// While SCEV fails to avoid trunc, we can still try to use instruction
+// combining approach to prove trunc is not required. This can be further
+// extended with other instruction combining checks, but for now we handle the
+// following case (sub can be "add" and "mul", "nsw + sext" can be "nus + zext")
+//
+// Src:
+// %c = sub nsw %b, %indvar
+// %d = sext %c to i64
+// Dst:
+// %indvar.ext1 = sext %indvar to i64
+// %m = sext %b to i64
+// %d = sub nsw i64 %m, %indvar.ext1
+// Therefore, as long as the result of add/sub/mul is extended to wide type, no
+// trunc is required regardless of how %b is generated. This pattern is common
+// when calculating address in 64 bit architecture
+bool WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) {
+ Instruction *NarrowUse = DU.NarrowUse;
+ Instruction *NarrowDef = DU.NarrowDef;
+ Instruction *WideDef = DU.WideDef;
+
+ // Handle the common case of add<nsw/nuw>
+ const unsigned OpCode = NarrowUse->getOpcode();
+ // Only Add/Sub/Mul instructions are supported.
+ if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
+ OpCode != Instruction::Mul)
+ return false;
+
+ // The operand that is not defined by NarrowDef of DU. Let's call it the
+ // other operand.
+ assert((NarrowUse->getOperand(0) == NarrowDef ||
+ NarrowUse->getOperand(1) == NarrowDef) &&
+ "bad DU");
+
+ const OverflowingBinaryOperator *OBO =
+ cast<OverflowingBinaryOperator>(NarrowUse);
+ ExtendKind ExtKind = getExtendKind(NarrowDef);
+ bool CanSignExtend = ExtKind == ExtendKind::Sign && OBO->hasNoSignedWrap();
+ bool CanZeroExtend = ExtKind == ExtendKind::Zero && OBO->hasNoUnsignedWrap();
+ auto AnotherOpExtKind = ExtKind;
+
+ // Check that all uses are either:
+ // - narrow def (in case of we are widening the IV increment);
+ // - single-input LCSSA Phis;
+ // - comparison of the chosen type;
+ // - extend of the chosen type (raison d'etre).
+ SmallVector<Instruction *, 4> ExtUsers;
+ SmallVector<PHINode *, 4> LCSSAPhiUsers;
+ SmallVector<ICmpInst *, 4> ICmpUsers;
+ for (Use &U : NarrowUse->uses()) {
+ Instruction *User = cast<Instruction>(U.getUser());
+ if (User == NarrowDef)
+ continue;
+ if (!L->contains(User)) {
+ auto *LCSSAPhi = cast<PHINode>(User);
+ // Make sure there is only 1 input, so that we don't have to split
+ // critical edges.
+ if (LCSSAPhi->getNumOperands() != 1)
+ return false;
+ LCSSAPhiUsers.push_back(LCSSAPhi);
+ continue;
+ }
+ if (auto *ICmp = dyn_cast<ICmpInst>(User)) {
+ auto Pred = ICmp->getPredicate();
+ // We have 3 types of predicates: signed, unsigned and equality
+ // predicates. For equality, it's legal to widen icmp for either sign and
+ // zero extend. For sign extend, we can also do so for signed predicates,
+ // likeweise for zero extend we can widen icmp for unsigned predicates.
+ if (ExtKind == ExtendKind::Zero && ICmpInst::isSigned(Pred))
+ return false;
+ if (ExtKind == ExtendKind::Sign && ICmpInst::isUnsigned(Pred))
+ return false;
+ ICmpUsers.push_back(ICmp);
+ continue;
+ }
+ if (ExtKind == ExtendKind::Sign)
+ User = dyn_cast<SExtInst>(User);
+ else
+ User = dyn_cast<ZExtInst>(User);
+ if (!User || User->getType() != WideType)
+ return false;
+ ExtUsers.push_back(User);
+ }
+ if (ExtUsers.empty()) {
+ DeadInsts.emplace_back(NarrowUse);
+ return true;
+ }
+
+ // We'll prove some facts that should be true in the context of ext users. If
+ // there is no users, we are done now. If there are some, pick their common
+ // dominator as context.
+ const Instruction *CtxI = findCommonDominator(ExtUsers, *DT);
+
+ if (!CanSignExtend && !CanZeroExtend) {
+ // Because InstCombine turns 'sub nuw' to 'add' losing the no-wrap flag, we
+ // will most likely not see it. Let's try to prove it.
+ if (OpCode != Instruction::Add)
+ return false;
+ if (ExtKind != ExtendKind::Zero)
+ return false;
+ const SCEV *LHS = SE->getSCEV(OBO->getOperand(0));
+ const SCEV *RHS = SE->getSCEV(OBO->getOperand(1));
+ // TODO: Support case for NarrowDef = NarrowUse->getOperand(1).
+ if (NarrowUse->getOperand(0) != NarrowDef)
+ return false;
+ if (!SE->isKnownNegative(RHS))
+ return false;
+ bool ProvedSubNUW = SE->isKnownPredicateAt(ICmpInst::ICMP_UGE, LHS,
+ SE->getNegativeSCEV(RHS), CtxI);
+ if (!ProvedSubNUW)
+ return false;
+ // In fact, our 'add' is 'sub nuw'. We will need to widen the 2nd operand as
+ // neg(zext(neg(op))), which is basically sext(op).
+ AnotherOpExtKind = ExtendKind::Sign;
+ }
+
+ // Verifying that Defining operand is an AddRec
+ const SCEV *Op1 = SE->getSCEV(WideDef);
+ const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1);
+ if (!AddRecOp1 || AddRecOp1->getLoop() != L)
+ return false;
+
+ LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
+
+ // Generating a widening use instruction.
+ Value *LHS =
+ (NarrowUse->getOperand(0) == NarrowDef)
+ ? WideDef
+ : createExtendInst(NarrowUse->getOperand(0), WideType,
+ AnotherOpExtKind == ExtendKind::Sign, NarrowUse);
+ Value *RHS =
+ (NarrowUse->getOperand(1) == NarrowDef)
+ ? WideDef
+ : createExtendInst(NarrowUse->getOperand(1), WideType,
+ AnotherOpExtKind == ExtendKind::Sign, NarrowUse);
+
+ auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
+ auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
+ NarrowBO->getName());
+ IRBuilder<> Builder(NarrowUse);
+ Builder.Insert(WideBO);
+ WideBO->copyIRFlags(NarrowBO);
+ ExtendKindMap[NarrowUse] = ExtKind;
+
+ for (Instruction *User : ExtUsers) {
+ assert(User->getType() == WideType && "Checked before!");
+ LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
+ << *WideBO << "\n");
+ ++NumElimExt;
+ User->replaceAllUsesWith(WideBO);
+ DeadInsts.emplace_back(User);
+ }
+
+ for (PHINode *User : LCSSAPhiUsers) {
+ assert(User->getNumOperands() == 1 && "Checked before!");
+ Builder.SetInsertPoint(User);
+ auto *WidePN =
+ Builder.CreatePHI(WideBO->getType(), 1, User->getName() + ".wide");
+ BasicBlock *LoopExitingBlock = User->getParent()->getSinglePredecessor();
+ assert(LoopExitingBlock && L->contains(LoopExitingBlock) &&
+ "Not a LCSSA Phi?");
+ WidePN->addIncoming(WideBO, LoopExitingBlock);
+ Builder.SetInsertPoint(&*User->getParent()->getFirstInsertionPt());
+ auto *TruncPN = Builder.CreateTrunc(WidePN, User->getType());
+ User->replaceAllUsesWith(TruncPN);
+ DeadInsts.emplace_back(User);
+ }
+
+ for (ICmpInst *User : ICmpUsers) {
+ Builder.SetInsertPoint(User);
+ auto ExtendedOp = [&](Value * V)->Value * {
+ if (V == NarrowUse)
+ return WideBO;
+ if (ExtKind == ExtendKind::Zero)
+ return Builder.CreateZExt(V, WideBO->getType());
+ else
+ return Builder.CreateSExt(V, WideBO->getType());
+ };
+ auto Pred = User->getPredicate();
+ auto *LHS = ExtendedOp(User->getOperand(0));
+ auto *RHS = ExtendedOp(User->getOperand(1));
+ auto *WideCmp =
+ Builder.CreateICmp(Pred, LHS, RHS, User->getName() + ".wide");
+ User->replaceAllUsesWith(WideCmp);
+ DeadInsts.emplace_back(User);
+ }
+
+ return true;
+}
+
+/// Determine whether an individual user of the narrow IV can be widened. If so,
+/// return the wide clone of the user.
+Instruction *WidenIV::widenIVUse(WidenIV::NarrowIVDefUse DU, SCEVExpander &Rewriter) {
+ assert(ExtendKindMap.count(DU.NarrowDef) &&
+ "Should already know the kind of extension used to widen NarrowDef");
+
+ // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
+ if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
+ if (LI->getLoopFor(UsePhi->getParent()) != L) {
+ // For LCSSA phis, sink the truncate outside the loop.
+ // After SimplifyCFG most loop exit targets have a single predecessor.
+ // Otherwise fall back to a truncate within the loop.
+ if (UsePhi->getNumOperands() != 1)
+ truncateIVUse(DU, DT, LI);
+ else {
+ // Widening the PHI requires us to insert a trunc. The logical place
+ // for this trunc is in the same BB as the PHI. This is not possible if
+ // the BB is terminated by a catchswitch.
+ if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator()))
+ return nullptr;
+
+ PHINode *WidePhi =
+ PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
+ UsePhi);
+ WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
+ IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt());
+ Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
+ UsePhi->replaceAllUsesWith(Trunc);
+ DeadInsts.emplace_back(UsePhi);
+ LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to "
+ << *WidePhi << "\n");
+ }
+ return nullptr;
+ }
+ }
+
+ // This narrow use can be widened by a sext if it's non-negative or its narrow
+ // def was widended by a sext. Same for zext.
+ auto canWidenBySExt = [&]() {
+ return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ExtendKind::Sign;
+ };
+ auto canWidenByZExt = [&]() {
+ return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ExtendKind::Zero;
+ };
+
+ // Our raison d'etre! Eliminate sign and zero extension.
+ if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) ||
+ (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) {
+ Value *NewDef = DU.WideDef;
+ if (DU.NarrowUse->getType() != WideType) {
+ unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
+ unsigned IVWidth = SE->getTypeSizeInBits(WideType);
+ if (CastWidth < IVWidth) {
+ // The cast isn't as wide as the IV, so insert a Trunc.
+ IRBuilder<> Builder(DU.NarrowUse);
+ NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType());
+ }
+ else {
+ // A wider extend was hidden behind a narrower one. This may induce
+ // another round of IV widening in which the intermediate IV becomes
+ // dead. It should be very rare.
+ LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
+ << " not wide enough to subsume " << *DU.NarrowUse
+ << "\n");
+ DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
+ NewDef = DU.NarrowUse;
+ }
+ }
+ if (NewDef != DU.NarrowUse) {
+ LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
+ << " replaced by " << *DU.WideDef << "\n");
+ ++NumElimExt;
+ DU.NarrowUse->replaceAllUsesWith(NewDef);
+ DeadInsts.emplace_back(DU.NarrowUse);
+ }
+ // Now that the extend is gone, we want to expose it's uses for potential
+ // further simplification. We don't need to directly inform SimplifyIVUsers
+ // of the new users, because their parent IV will be processed later as a
+ // new loop phi. If we preserved IVUsers analysis, we would also want to
+ // push the uses of WideDef here.
+
+ // No further widening is needed. The deceased [sz]ext had done it for us.
+ return nullptr;
+ }
+
+ // Does this user itself evaluate to a recurrence after widening?
+ WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU);
+ if (!WideAddRec.first)
+ WideAddRec = getWideRecurrence(DU);
+
+ assert((WideAddRec.first == nullptr) ==
+ (WideAddRec.second == ExtendKind::Unknown));
+ if (!WideAddRec.first) {
+ // If use is a loop condition, try to promote the condition instead of
+ // truncating the IV first.
+ if (widenLoopCompare(DU))
+ return nullptr;
+
+ // We are here about to generate a truncate instruction that may hurt
+ // performance because the scalar evolution expression computed earlier
+ // in WideAddRec.first does not indicate a polynomial induction expression.
+ // In that case, look at the operands of the use instruction to determine
+ // if we can still widen the use instead of truncating its operand.
+ if (widenWithVariantUse(DU))
+ return nullptr;
+
+ // This user does not evaluate to a recurrence after widening, so don't
+ // follow it. Instead insert a Trunc to kill off the original use,
+ // eventually isolating the original narrow IV so it can be removed.
+ truncateIVUse(DU, DT, LI);
+ return nullptr;
+ }
+
+ // Reuse the IV increment that SCEVExpander created as long as it dominates
+ // NarrowUse.
+ Instruction *WideUse = nullptr;
+ if (WideAddRec.first == WideIncExpr &&
+ Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
+ WideUse = WideInc;
+ else {
+ WideUse = cloneIVUser(DU, WideAddRec.first);
+ if (!WideUse)
+ return nullptr;
+ }
+ // Evaluation of WideAddRec ensured that the narrow expression could be
+ // extended outside the loop without overflow. This suggests that the wide use
+ // evaluates to the same expression as the extended narrow use, but doesn't
+ // absolutely guarantee it. Hence the following failsafe check. In rare cases
+ // where it fails, we simply throw away the newly created wide use.
+ if (WideAddRec.first != SE->getSCEV(WideUse)) {
+ LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": "
+ << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first
+ << "\n");
+ DeadInsts.emplace_back(WideUse);
+ return nullptr;
+ }
+
+ // if we reached this point then we are going to replace
+ // DU.NarrowUse with WideUse. Reattach DbgValue then.
+ replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT);
+
+ ExtendKindMap[DU.NarrowUse] = WideAddRec.second;
+ // Returning WideUse pushes it on the worklist.
+ return WideUse;
+}
+
+/// Add eligible users of NarrowDef to NarrowIVUsers.
+void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
+ const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef);
+ bool NonNegativeDef =
+ SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV,
+ SE->getZero(NarrowSCEV->getType()));
+ for (User *U : NarrowDef->users()) {
+ Instruction *NarrowUser = cast<Instruction>(U);
+
+ // Handle data flow merges and bizarre phi cycles.
+ if (!Widened.insert(NarrowUser).second)
+ continue;
+
+ bool NonNegativeUse = false;
+ if (!NonNegativeDef) {
+ // We might have a control-dependent range information for this context.
+ if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser))
+ NonNegativeUse = RangeInfo->getSignedMin().isNonNegative();
+ }
+
+ NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef,
+ NonNegativeDef || NonNegativeUse);
+ }
+}
+
+/// Process a single induction variable. First use the SCEVExpander to create a
+/// wide induction variable that evaluates to the same recurrence as the
+/// original narrow IV. Then use a worklist to forward traverse the narrow IV's
+/// def-use chain. After widenIVUse has processed all interesting IV users, the
+/// narrow IV will be isolated for removal by DeleteDeadPHIs.
+///
+/// It would be simpler to delete uses as they are processed, but we must avoid
+/// invalidating SCEV expressions.
+PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
+ // Is this phi an induction variable?
+ const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
+ if (!AddRec)
+ return nullptr;
+
+ // Widen the induction variable expression.
+ const SCEV *WideIVExpr = getExtendKind(OrigPhi) == ExtendKind::Sign
+ ? SE->getSignExtendExpr(AddRec, WideType)
+ : SE->getZeroExtendExpr(AddRec, WideType);
+
+ assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
+ "Expect the new IV expression to preserve its type");
+
+ // Can the IV be extended outside the loop without overflow?
+ AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
+ if (!AddRec || AddRec->getLoop() != L)
+ return nullptr;
+
+ // An AddRec must have loop-invariant operands. Since this AddRec is
+ // materialized by a loop header phi, the expression cannot have any post-loop
+ // operands, so they must dominate the loop header.
+ assert(
+ SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
+ SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) &&
+ "Loop header phi recurrence inputs do not dominate the loop");
+
+ // Iterate over IV uses (including transitive ones) looking for IV increments
+ // of the form 'add nsw %iv, <const>'. For each increment and each use of
+ // the increment calculate control-dependent range information basing on
+ // dominating conditions inside of the loop (e.g. a range check inside of the
+ // loop). Calculated ranges are stored in PostIncRangeInfos map.
+ //
+ // Control-dependent range information is later used to prove that a narrow
+ // definition is not negative (see pushNarrowIVUsers). It's difficult to do
+ // this on demand because when pushNarrowIVUsers needs this information some
+ // of the dominating conditions might be already widened.
+ if (UsePostIncrementRanges)
+ calculatePostIncRanges(OrigPhi);
+
+ // The rewriter provides a value for the desired IV expression. This may
+ // either find an existing phi or materialize a new one. Either way, we
+ // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
+ // of the phi-SCC dominates the loop entry.
+ Instruction *InsertPt = &*L->getHeader()->getFirstInsertionPt();
+ Value *ExpandInst = Rewriter.expandCodeFor(AddRec, WideType, InsertPt);
+ // If the wide phi is not a phi node, for example a cast node, like bitcast,
+ // inttoptr, ptrtoint, just skip for now.
+ if (!(WidePhi = dyn_cast<PHINode>(ExpandInst))) {
+ // if the cast node is an inserted instruction without any user, we should
+ // remove it to make sure the pass don't touch the function as we can not
+ // wide the phi.
+ if (ExpandInst->hasNUses(0) &&
+ Rewriter.isInsertedInstruction(cast<Instruction>(ExpandInst)))
+ DeadInsts.emplace_back(ExpandInst);
+ return nullptr;
+ }
+
+ // Remembering the WideIV increment generated by SCEVExpander allows
+ // widenIVUse to reuse it when widening the narrow IV's increment. We don't
+ // employ a general reuse mechanism because the call above is the only call to
+ // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
+ if (BasicBlock *LatchBlock = L->getLoopLatch()) {
+ WideInc =
+ cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
+ WideIncExpr = SE->getSCEV(WideInc);
+ // Propagate the debug location associated with the original loop increment
+ // to the new (widened) increment.
+ auto *OrigInc =
+ cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
+ WideInc->setDebugLoc(OrigInc->getDebugLoc());
+ }
+
+ LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
+ ++NumWidened;
+
+ // Traverse the def-use chain using a worklist starting at the original IV.
+ assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
+
+ Widened.insert(OrigPhi);
+ pushNarrowIVUsers(OrigPhi, WidePhi);
+
+ while (!NarrowIVUsers.empty()) {
+ WidenIV::NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
+
+ // Process a def-use edge. This may replace the use, so don't hold a
+ // use_iterator across it.
+ Instruction *WideUse = widenIVUse(DU, Rewriter);
+
+ // Follow all def-use edges from the previous narrow use.
+ if (WideUse)
+ pushNarrowIVUsers(DU.NarrowUse, WideUse);
+
+ // widenIVUse may have removed the def-use edge.
+ if (DU.NarrowDef->use_empty())
+ DeadInsts.emplace_back(DU.NarrowDef);
+ }
+
+ // Attach any debug information to the new PHI.
+ replaceAllDbgUsesWith(*OrigPhi, *WidePhi, *WidePhi, *DT);
+
+ return WidePhi;
+}
+
+/// Calculates control-dependent range for the given def at the given context
+/// by looking at dominating conditions inside of the loop
+void WidenIV::calculatePostIncRange(Instruction *NarrowDef,
+ Instruction *NarrowUser) {
+ using namespace llvm::PatternMatch;
+
+ Value *NarrowDefLHS;
+ const APInt *NarrowDefRHS;
+ if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS),
+ m_APInt(NarrowDefRHS))) ||
+ !NarrowDefRHS->isNonNegative())
+ return;
+
+ auto UpdateRangeFromCondition = [&] (Value *Condition,
+ bool TrueDest) {
+ CmpInst::Predicate Pred;
+ Value *CmpRHS;
+ if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS),
+ m_Value(CmpRHS))))
+ return;
+
+ CmpInst::Predicate P =
+ TrueDest ? Pred : CmpInst::getInversePredicate(Pred);
+
+ auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS));
+ auto CmpConstrainedLHSRange =
+ ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange);
+ auto NarrowDefRange = CmpConstrainedLHSRange.addWithNoWrap(
+ *NarrowDefRHS, OverflowingBinaryOperator::NoSignedWrap);
+
+ updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange);
+ };
+
+ auto UpdateRangeFromGuards = [&](Instruction *Ctx) {
+ if (!HasGuards)
+ return;
+
+ for (Instruction &I : make_range(Ctx->getIterator().getReverse(),
+ Ctx->getParent()->rend())) {
+ Value *C = nullptr;
+ if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C))))
+ UpdateRangeFromCondition(C, /*TrueDest=*/true);
+ }
+ };
+
+ UpdateRangeFromGuards(NarrowUser);
+
+ BasicBlock *NarrowUserBB = NarrowUser->getParent();
+ // If NarrowUserBB is statically unreachable asking dominator queries may
+ // yield surprising results. (e.g. the block may not have a dom tree node)
+ if (!DT->isReachableFromEntry(NarrowUserBB))
+ return;
+
+ for (auto *DTB = (*DT)[NarrowUserBB]->getIDom();
+ L->contains(DTB->getBlock());
+ DTB = DTB->getIDom()) {
+ auto *BB = DTB->getBlock();
+ auto *TI = BB->getTerminator();
+ UpdateRangeFromGuards(TI);
+
+ auto *BI = dyn_cast<BranchInst>(TI);
+ if (!BI || !BI->isConditional())
+ continue;
+
+ auto *TrueSuccessor = BI->getSuccessor(0);
+ auto *FalseSuccessor = BI->getSuccessor(1);
+
+ auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) {
+ return BBE.isSingleEdge() &&
+ DT->dominates(BBE, NarrowUser->getParent());
+ };
+
+ if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor)))
+ UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true);
+
+ if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor)))
+ UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false);
+ }
+}
+
+/// Calculates PostIncRangeInfos map for the given IV
+void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) {
+ SmallPtrSet<Instruction *, 16> Visited;
+ SmallVector<Instruction *, 6> Worklist;
+ Worklist.push_back(OrigPhi);
+ Visited.insert(OrigPhi);
+
+ while (!Worklist.empty()) {
+ Instruction *NarrowDef = Worklist.pop_back_val();
+
+ for (Use &U : NarrowDef->uses()) {
+ auto *NarrowUser = cast<Instruction>(U.getUser());
+
+ // Don't go looking outside the current loop.
+ auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()];
+ if (!NarrowUserLoop || !L->contains(NarrowUserLoop))
+ continue;
+
+ if (!Visited.insert(NarrowUser).second)
+ continue;
+
+ Worklist.push_back(NarrowUser);
+
+ calculatePostIncRange(NarrowDef, NarrowUser);
+ }
+ }
+}
+
+PHINode *llvm::createWideIV(const WideIVInfo &WI,
+ LoopInfo *LI, ScalarEvolution *SE, SCEVExpander &Rewriter,
+ DominatorTree *DT, SmallVectorImpl<WeakTrackingVH> &DeadInsts,
+ unsigned &NumElimExt, unsigned &NumWidened,
+ bool HasGuards, bool UsePostIncrementRanges) {
+ WidenIV Widener(WI, LI, SE, DT, DeadInsts, HasGuards, UsePostIncrementRanges);
+ PHINode *WidePHI = Widener.createWideIV(Rewriter);
+ NumElimExt = Widener.getNumElimExt();
+ NumWidened = Widener.getNumWidened();
+ return WidePHI;
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/SimplifyLibCalls.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/SimplifyLibCalls.cpp
new file mode 100644
index 0000000000..20f18322d4
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -0,0 +1,4081 @@
+//===------ SimplifyLibCalls.cpp - Library calls simplifier ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the library calls simplifier. It does not implement
+// any pass, but can't be used by other passes to do simplifications.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
+#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/KnownBits.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SizeOpts.h"
+
+#include <cmath>
+
+using namespace llvm;
+using namespace PatternMatch;
+
+static cl::opt<bool>
+ EnableUnsafeFPShrink("enable-double-float-shrink", cl::Hidden,
+ cl::init(false),
+ cl::desc("Enable unsafe double to float "
+ "shrinking for math lib calls"));
+
+//===----------------------------------------------------------------------===//
+// Helper Functions
+//===----------------------------------------------------------------------===//
+
+static bool ignoreCallingConv(LibFunc Func) {
+ return Func == LibFunc_abs || Func == LibFunc_labs ||
+ Func == LibFunc_llabs || Func == LibFunc_strlen;
+}
+
+/// Return true if it is only used in equality comparisons with With.
+static bool isOnlyUsedInEqualityComparison(Value *V, Value *With) {
+ for (User *U : V->users()) {
+ if (ICmpInst *IC = dyn_cast<ICmpInst>(U))
+ if (IC->isEquality() && IC->getOperand(1) == With)
+ continue;
+ // Unknown instruction.
+ return false;
+ }
+ return true;
+}
+
+static bool callHasFloatingPointArgument(const CallInst *CI) {
+ return any_of(CI->operands(), [](const Use &OI) {
+ return OI->getType()->isFloatingPointTy();
+ });
+}
+
+static bool callHasFP128Argument(const CallInst *CI) {
+ return any_of(CI->operands(), [](const Use &OI) {
+ return OI->getType()->isFP128Ty();
+ });
+}
+
+// Convert the entire string Str representing an integer in Base, up to
+// the terminating nul if present, to a constant according to the rules
+// of strtoul[l] or, when AsSigned is set, of strtol[l]. On success
+// return the result, otherwise null.
+// The function assumes the string is encoded in ASCII and carefully
+// avoids converting sequences (including "") that the corresponding
+// library call might fail and set errno for.
+static Value *convertStrToInt(CallInst *CI, StringRef &Str, Value *EndPtr,
+ uint64_t Base, bool AsSigned, IRBuilderBase &B) {
+ if (Base < 2 || Base > 36)
+ if (Base != 0)
+ // Fail for an invalid base (required by POSIX).
+ return nullptr;
+
+ // Current offset into the original string to reflect in EndPtr.
+ size_t Offset = 0;
+ // Strip leading whitespace.
+ for ( ; Offset != Str.size(); ++Offset)
+ if (!isSpace((unsigned char)Str[Offset])) {
+ Str = Str.substr(Offset);
+ break;
+ }
+
+ if (Str.empty())
+ // Fail for empty subject sequences (POSIX allows but doesn't require
+ // strtol[l]/strtoul[l] to fail with EINVAL).
+ return nullptr;
+
+ // Strip but remember the sign.
+ bool Negate = Str[0] == '-';
+ if (Str[0] == '-' || Str[0] == '+') {
+ Str = Str.drop_front();
+ if (Str.empty())
+ // Fail for a sign with nothing after it.
+ return nullptr;
+ ++Offset;
+ }
+
+ // Set Max to the absolute value of the minimum (for signed), or
+ // to the maximum (for unsigned) value representable in the type.
+ Type *RetTy = CI->getType();
+ unsigned NBits = RetTy->getPrimitiveSizeInBits();
+ uint64_t Max = AsSigned && Negate ? 1 : 0;
+ Max += AsSigned ? maxIntN(NBits) : maxUIntN(NBits);
+
+ // Autodetect Base if it's zero and consume the "0x" prefix.
+ if (Str.size() > 1) {
+ if (Str[0] == '0') {
+ if (toUpper((unsigned char)Str[1]) == 'X') {
+ if (Str.size() == 2 || (Base && Base != 16))
+ // Fail if Base doesn't allow the "0x" prefix or for the prefix
+ // alone that implementations like BSD set errno to EINVAL for.
+ return nullptr;
+
+ Str = Str.drop_front(2);
+ Offset += 2;
+ Base = 16;
+ }
+ else if (Base == 0)
+ Base = 8;
+ } else if (Base == 0)
+ Base = 10;
+ }
+ else if (Base == 0)
+ Base = 10;
+
+ // Convert the rest of the subject sequence, not including the sign,
+ // to its uint64_t representation (this assumes the source character
+ // set is ASCII).
+ uint64_t Result = 0;
+ for (unsigned i = 0; i != Str.size(); ++i) {
+ unsigned char DigVal = Str[i];
+ if (isDigit(DigVal))
+ DigVal = DigVal - '0';
+ else {
+ DigVal = toUpper(DigVal);
+ if (isAlpha(DigVal))
+ DigVal = DigVal - 'A' + 10;
+ else
+ return nullptr;
+ }
+
+ if (DigVal >= Base)
+ // Fail if the digit is not valid in the Base.
+ return nullptr;
+
+ // Add the digit and fail if the result is not representable in
+ // the (unsigned form of the) destination type.
+ bool VFlow;
+ Result = SaturatingMultiplyAdd(Result, Base, (uint64_t)DigVal, &VFlow);
+ if (VFlow || Result > Max)
+ return nullptr;
+ }
+
+ if (EndPtr) {
+ // Store the pointer to the end.
+ Value *Off = B.getInt64(Offset + Str.size());
+ Value *StrBeg = CI->getArgOperand(0);
+ Value *StrEnd = B.CreateInBoundsGEP(B.getInt8Ty(), StrBeg, Off, "endptr");
+ B.CreateStore(StrEnd, EndPtr);
+ }
+
+ if (Negate)
+ // Unsigned negation doesn't overflow.
+ Result = -Result;
+
+ return ConstantInt::get(RetTy, Result);
+}
+
+static bool isOnlyUsedInComparisonWithZero(Value *V) {
+ for (User *U : V->users()) {
+ if (ICmpInst *IC = dyn_cast<ICmpInst>(U))
+ if (Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
+ if (C->isNullValue())
+ continue;
+ // Unknown instruction.
+ return false;
+ }
+ return true;
+}
+
+static bool canTransformToMemCmp(CallInst *CI, Value *Str, uint64_t Len,
+ const DataLayout &DL) {
+ if (!isOnlyUsedInComparisonWithZero(CI))
+ return false;
+
+ if (!isDereferenceableAndAlignedPointer(Str, Align(1), APInt(64, Len), DL))
+ return false;
+
+ if (CI->getFunction()->hasFnAttribute(Attribute::SanitizeMemory))
+ return false;
+
+ return true;
+}
+
+static void annotateDereferenceableBytes(CallInst *CI,
+ ArrayRef<unsigned> ArgNos,
+ uint64_t DereferenceableBytes) {
+ const Function *F = CI->getCaller();
+ if (!F)
+ return;
+ for (unsigned ArgNo : ArgNos) {
+ uint64_t DerefBytes = DereferenceableBytes;
+ unsigned AS = CI->getArgOperand(ArgNo)->getType()->getPointerAddressSpace();
+ if (!llvm::NullPointerIsDefined(F, AS) ||
+ CI->paramHasAttr(ArgNo, Attribute::NonNull))
+ DerefBytes = std::max(CI->getParamDereferenceableOrNullBytes(ArgNo),
+ DereferenceableBytes);
+
+ if (CI->getParamDereferenceableBytes(ArgNo) < DerefBytes) {
+ CI->removeParamAttr(ArgNo, Attribute::Dereferenceable);
+ if (!llvm::NullPointerIsDefined(F, AS) ||
+ CI->paramHasAttr(ArgNo, Attribute::NonNull))
+ CI->removeParamAttr(ArgNo, Attribute::DereferenceableOrNull);
+ CI->addParamAttr(ArgNo, Attribute::getWithDereferenceableBytes(
+ CI->getContext(), DerefBytes));
+ }
+ }
+}
+
+static void annotateNonNullNoUndefBasedOnAccess(CallInst *CI,
+ ArrayRef<unsigned> ArgNos) {
+ Function *F = CI->getCaller();
+ if (!F)
+ return;
+
+ for (unsigned ArgNo : ArgNos) {
+ if (!CI->paramHasAttr(ArgNo, Attribute::NoUndef))
+ CI->addParamAttr(ArgNo, Attribute::NoUndef);
+
+ if (!CI->paramHasAttr(ArgNo, Attribute::NonNull)) {
+ unsigned AS =
+ CI->getArgOperand(ArgNo)->getType()->getPointerAddressSpace();
+ if (llvm::NullPointerIsDefined(F, AS))
+ continue;
+ CI->addParamAttr(ArgNo, Attribute::NonNull);
+ }
+
+ annotateDereferenceableBytes(CI, ArgNo, 1);
+ }
+}
+
+static void annotateNonNullAndDereferenceable(CallInst *CI, ArrayRef<unsigned> ArgNos,
+ Value *Size, const DataLayout &DL) {
+ if (ConstantInt *LenC = dyn_cast<ConstantInt>(Size)) {
+ annotateNonNullNoUndefBasedOnAccess(CI, ArgNos);
+ annotateDereferenceableBytes(CI, ArgNos, LenC->getZExtValue());
+ } else if (isKnownNonZero(Size, DL)) {
+ annotateNonNullNoUndefBasedOnAccess(CI, ArgNos);
+ const APInt *X, *Y;
+ uint64_t DerefMin = 1;
+ if (match(Size, m_Select(m_Value(), m_APInt(X), m_APInt(Y)))) {
+ DerefMin = std::min(X->getZExtValue(), Y->getZExtValue());
+ annotateDereferenceableBytes(CI, ArgNos, DerefMin);
+ }
+ }
+}
+
+// Copy CallInst "flags" like musttail, notail, and tail. Return New param for
+// easier chaining. Calls to emit* and B.createCall should probably be wrapped
+// in this function when New is created to replace Old. Callers should take
+// care to check Old.isMustTailCall() if they aren't replacing Old directly
+// with New.
+static Value *copyFlags(const CallInst &Old, Value *New) {
+ assert(!Old.isMustTailCall() && "do not copy musttail call flags");
+ assert(!Old.isNoTailCall() && "do not copy notail call flags");
+ if (auto *NewCI = dyn_cast_or_null<CallInst>(New))
+ NewCI->setTailCallKind(Old.getTailCallKind());
+ return New;
+}
+
+static Value *mergeAttributesAndFlags(CallInst *NewCI, const CallInst &Old) {
+ NewCI->setAttributes(AttributeList::get(
+ NewCI->getContext(), {NewCI->getAttributes(), Old.getAttributes()}));
+ NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType()));
+ return copyFlags(Old, NewCI);
+}
+
+// Helper to avoid truncating the length if size_t is 32-bits.
+static StringRef substr(StringRef Str, uint64_t Len) {
+ return Len >= Str.size() ? Str : Str.substr(0, Len);
+}
+
+//===----------------------------------------------------------------------===//
+// String and Memory Library Call Optimizations
+//===----------------------------------------------------------------------===//
+
+Value *LibCallSimplifier::optimizeStrCat(CallInst *CI, IRBuilderBase &B) {
+ // Extract some information from the instruction
+ Value *Dst = CI->getArgOperand(0);
+ Value *Src = CI->getArgOperand(1);
+ annotateNonNullNoUndefBasedOnAccess(CI, {0, 1});
+
+ // See if we can get the length of the input string.
+ uint64_t Len = GetStringLength(Src);
+ if (Len)
+ annotateDereferenceableBytes(CI, 1, Len);
+ else
+ return nullptr;
+ --Len; // Unbias length.
+
+ // Handle the simple, do-nothing case: strcat(x, "") -> x
+ if (Len == 0)
+ return Dst;
+
+ return copyFlags(*CI, emitStrLenMemCpy(Src, Dst, Len, B));
+}
+
+Value *LibCallSimplifier::emitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len,
+ IRBuilderBase &B) {
+ // We need to find the end of the destination string. That's where the
+ // memory is to be moved to. We just generate a call to strlen.
+ Value *DstLen = emitStrLen(Dst, B, DL, TLI);
+ if (!DstLen)
+ return nullptr;
+
+ // Now that we have the destination's length, we must index into the
+ // destination's pointer to get the actual memcpy destination (end of
+ // the string .. we're concatenating).
+ Value *CpyDst = B.CreateInBoundsGEP(B.getInt8Ty(), Dst, DstLen, "endptr");
+
+ // We have enough information to now generate the memcpy call to do the
+ // concatenation for us. Make a memcpy to copy the nul byte with align = 1.
+ B.CreateMemCpy(
+ CpyDst, Align(1), Src, Align(1),
+ ConstantInt::get(DL.getIntPtrType(Src->getContext()), Len + 1));
+ return Dst;
+}
+
+Value *LibCallSimplifier::optimizeStrNCat(CallInst *CI, IRBuilderBase &B) {
+ // Extract some information from the instruction.
+ Value *Dst = CI->getArgOperand(0);
+ Value *Src = CI->getArgOperand(1);
+ Value *Size = CI->getArgOperand(2);
+ uint64_t Len;
+ annotateNonNullNoUndefBasedOnAccess(CI, 0);
+ if (isKnownNonZero(Size, DL))
+ annotateNonNullNoUndefBasedOnAccess(CI, 1);
+
+ // We don't do anything if length is not constant.
+ ConstantInt *LengthArg = dyn_cast<ConstantInt>(Size);
+ if (LengthArg) {
+ Len = LengthArg->getZExtValue();
+ // strncat(x, c, 0) -> x
+ if (!Len)
+ return Dst;
+ } else {
+ return nullptr;
+ }
+
+ // See if we can get the length of the input string.
+ uint64_t SrcLen = GetStringLength(Src);
+ if (SrcLen) {
+ annotateDereferenceableBytes(CI, 1, SrcLen);
+ --SrcLen; // Unbias length.
+ } else {
+ return nullptr;
+ }
+
+ // strncat(x, "", c) -> x
+ if (SrcLen == 0)
+ return Dst;
+
+ // We don't optimize this case.
+ if (Len < SrcLen)
+ return nullptr;
+
+ // strncat(x, s, c) -> strcat(x, s)
+ // s is constant so the strcat can be optimized further.
+ return copyFlags(*CI, emitStrLenMemCpy(Src, Dst, SrcLen, B));
+}
+
+// Helper to transform memchr(S, C, N) == S to N && *S == C and, when
+// NBytes is null, strchr(S, C) to *S == C. A precondition of the function
+// is that either S is dereferenceable or the value of N is nonzero.
+static Value* memChrToCharCompare(CallInst *CI, Value *NBytes,
+ IRBuilderBase &B, const DataLayout &DL)
+{
+ Value *Src = CI->getArgOperand(0);
+ Value *CharVal = CI->getArgOperand(1);
+
+ // Fold memchr(A, C, N) == A to N && *A == C.
+ Type *CharTy = B.getInt8Ty();
+ Value *Char0 = B.CreateLoad(CharTy, Src);
+ CharVal = B.CreateTrunc(CharVal, CharTy);
+ Value *Cmp = B.CreateICmpEQ(Char0, CharVal, "char0cmp");
+
+ if (NBytes) {
+ Value *Zero = ConstantInt::get(NBytes->getType(), 0);
+ Value *And = B.CreateICmpNE(NBytes, Zero);
+ Cmp = B.CreateLogicalAnd(And, Cmp);
+ }
+
+ Value *NullPtr = Constant::getNullValue(CI->getType());
+ return B.CreateSelect(Cmp, Src, NullPtr);
+}
+
+Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilderBase &B) {
+ Value *SrcStr = CI->getArgOperand(0);
+ Value *CharVal = CI->getArgOperand(1);
+ annotateNonNullNoUndefBasedOnAccess(CI, 0);
+
+ if (isOnlyUsedInEqualityComparison(CI, SrcStr))
+ return memChrToCharCompare(CI, nullptr, B, DL);
+
+ // If the second operand is non-constant, see if we can compute the length
+ // of the input string and turn this into memchr.
+ ConstantInt *CharC = dyn_cast<ConstantInt>(CharVal);
+ if (!CharC) {
+ uint64_t Len = GetStringLength(SrcStr);
+ if (Len)
+ annotateDereferenceableBytes(CI, 0, Len);
+ else
+ return nullptr;
+
+ Function *Callee = CI->getCalledFunction();
+ FunctionType *FT = Callee->getFunctionType();
+ unsigned IntBits = TLI->getIntSize();
+ if (!FT->getParamType(1)->isIntegerTy(IntBits)) // memchr needs 'int'.
+ return nullptr;
+
+ unsigned SizeTBits = TLI->getSizeTSize(*CI->getModule());
+ Type *SizeTTy = IntegerType::get(CI->getContext(), SizeTBits);
+ return copyFlags(*CI,
+ emitMemChr(SrcStr, CharVal, // include nul.
+ ConstantInt::get(SizeTTy, Len), B,
+ DL, TLI));
+ }
+
+ if (CharC->isZero()) {
+ Value *NullPtr = Constant::getNullValue(CI->getType());
+ if (isOnlyUsedInEqualityComparison(CI, NullPtr))
+ // Pre-empt the transformation to strlen below and fold
+ // strchr(A, '\0') == null to false.
+ return B.CreateIntToPtr(B.getTrue(), CI->getType());
+ }
+
+ // Otherwise, the character is a constant, see if the first argument is
+ // a string literal. If so, we can constant fold.
+ StringRef Str;
+ if (!getConstantStringInfo(SrcStr, Str)) {
+ if (CharC->isZero()) // strchr(p, 0) -> p + strlen(p)
+ if (Value *StrLen = emitStrLen(SrcStr, B, DL, TLI))
+ return B.CreateInBoundsGEP(B.getInt8Ty(), SrcStr, StrLen, "strchr");
+ return nullptr;
+ }
+
+ // Compute the offset, make sure to handle the case when we're searching for
+ // zero (a weird way to spell strlen).
+ size_t I = (0xFF & CharC->getSExtValue()) == 0
+ ? Str.size()
+ : Str.find(CharC->getSExtValue());
+ if (I == StringRef::npos) // Didn't find the char. strchr returns null.
+ return Constant::getNullValue(CI->getType());
+
+ // strchr(s+n,c) -> gep(s+n+i,c)
+ return B.CreateInBoundsGEP(B.getInt8Ty(), SrcStr, B.getInt64(I), "strchr");
+}
+
+Value *LibCallSimplifier::optimizeStrRChr(CallInst *CI, IRBuilderBase &B) {
+ Value *SrcStr = CI->getArgOperand(0);
+ Value *CharVal = CI->getArgOperand(1);
+ ConstantInt *CharC = dyn_cast<ConstantInt>(CharVal);
+ annotateNonNullNoUndefBasedOnAccess(CI, 0);
+
+ StringRef Str;
+ if (!getConstantStringInfo(SrcStr, Str)) {
+ // strrchr(s, 0) -> strchr(s, 0)
+ if (CharC && CharC->isZero())
+ return copyFlags(*CI, emitStrChr(SrcStr, '\0', B, TLI));
+ return nullptr;
+ }
+
+ unsigned SizeTBits = TLI->getSizeTSize(*CI->getModule());
+ Type *SizeTTy = IntegerType::get(CI->getContext(), SizeTBits);
+
+ // Try to expand strrchr to the memrchr nonstandard extension if it's
+ // available, or simply fail otherwise.
+ uint64_t NBytes = Str.size() + 1; // Include the terminating nul.
+ Value *Size = ConstantInt::get(SizeTTy, NBytes);
+ return copyFlags(*CI, emitMemRChr(SrcStr, CharVal, Size, B, DL, TLI));
+}
+
+Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilderBase &B) {
+ Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
+ if (Str1P == Str2P) // strcmp(x,x) -> 0
+ return ConstantInt::get(CI->getType(), 0);
+
+ StringRef Str1, Str2;
+ bool HasStr1 = getConstantStringInfo(Str1P, Str1);
+ bool HasStr2 = getConstantStringInfo(Str2P, Str2);
+
+ // strcmp(x, y) -> cnst (if both x and y are constant strings)
+ if (HasStr1 && HasStr2)
+ return ConstantInt::get(CI->getType(),
+ std::clamp(Str1.compare(Str2), -1, 1));
+
+ if (HasStr1 && Str1.empty()) // strcmp("", x) -> -*x
+ return B.CreateNeg(B.CreateZExt(
+ B.CreateLoad(B.getInt8Ty(), Str2P, "strcmpload"), CI->getType()));
+
+ if (HasStr2 && Str2.empty()) // strcmp(x,"") -> *x
+ return B.CreateZExt(B.CreateLoad(B.getInt8Ty(), Str1P, "strcmpload"),
+ CI->getType());
+
+ // strcmp(P, "x") -> memcmp(P, "x", 2)
+ uint64_t Len1 = GetStringLength(Str1P);
+ if (Len1)
+ annotateDereferenceableBytes(CI, 0, Len1);
+ uint64_t Len2 = GetStringLength(Str2P);
+ if (Len2)
+ annotateDereferenceableBytes(CI, 1, Len2);
+
+ if (Len1 && Len2) {
+ return copyFlags(
+ *CI, emitMemCmp(Str1P, Str2P,
+ ConstantInt::get(DL.getIntPtrType(CI->getContext()),
+ std::min(Len1, Len2)),
+ B, DL, TLI));
+ }
+
+ // strcmp to memcmp
+ if (!HasStr1 && HasStr2) {
+ if (canTransformToMemCmp(CI, Str1P, Len2, DL))
+ return copyFlags(
+ *CI,
+ emitMemCmp(Str1P, Str2P,
+ ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len2),
+ B, DL, TLI));
+ } else if (HasStr1 && !HasStr2) {
+ if (canTransformToMemCmp(CI, Str2P, Len1, DL))
+ return copyFlags(
+ *CI,
+ emitMemCmp(Str1P, Str2P,
+ ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len1),
+ B, DL, TLI));
+ }
+
+ annotateNonNullNoUndefBasedOnAccess(CI, {0, 1});
+ return nullptr;
+}
+
+// Optimize a memcmp or, when StrNCmp is true, strncmp call CI with constant
+// arrays LHS and RHS and nonconstant Size.
+static Value *optimizeMemCmpVarSize(CallInst *CI, Value *LHS, Value *RHS,
+ Value *Size, bool StrNCmp,
+ IRBuilderBase &B, const DataLayout &DL);
+
+Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilderBase &B) {
+ Value *Str1P = CI->getArgOperand(0);
+ Value *Str2P = CI->getArgOperand(1);
+ Value *Size = CI->getArgOperand(2);
+ if (Str1P == Str2P) // strncmp(x,x,n) -> 0
+ return ConstantInt::get(CI->getType(), 0);
+
+ if (isKnownNonZero(Size, DL))
+ annotateNonNullNoUndefBasedOnAccess(CI, {0, 1});
+ // Get the length argument if it is constant.
+ uint64_t Length;
+ if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(Size))
+ Length = LengthArg->getZExtValue();
+ else
+ return optimizeMemCmpVarSize(CI, Str1P, Str2P, Size, true, B, DL);
+
+ if (Length == 0) // strncmp(x,y,0) -> 0
+ return ConstantInt::get(CI->getType(), 0);
+
+ if (Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1)
+ return copyFlags(*CI, emitMemCmp(Str1P, Str2P, Size, B, DL, TLI));
+
+ StringRef Str1, Str2;
+ bool HasStr1 = getConstantStringInfo(Str1P, Str1);
+ bool HasStr2 = getConstantStringInfo(Str2P, Str2);
+
+ // strncmp(x, y) -> cnst (if both x and y are constant strings)
+ if (HasStr1 && HasStr2) {
+ // Avoid truncating the 64-bit Length to 32 bits in ILP32.
+ StringRef SubStr1 = substr(Str1, Length);
+ StringRef SubStr2 = substr(Str2, Length);
+ return ConstantInt::get(CI->getType(),
+ std::clamp(SubStr1.compare(SubStr2), -1, 1));
+ }
+
+ if (HasStr1 && Str1.empty()) // strncmp("", x, n) -> -*x
+ return B.CreateNeg(B.CreateZExt(
+ B.CreateLoad(B.getInt8Ty(), Str2P, "strcmpload"), CI->getType()));
+
+ if (HasStr2 && Str2.empty()) // strncmp(x, "", n) -> *x
+ return B.CreateZExt(B.CreateLoad(B.getInt8Ty(), Str1P, "strcmpload"),
+ CI->getType());
+
+ uint64_t Len1 = GetStringLength(Str1P);
+ if (Len1)
+ annotateDereferenceableBytes(CI, 0, Len1);
+ uint64_t Len2 = GetStringLength(Str2P);
+ if (Len2)
+ annotateDereferenceableBytes(CI, 1, Len2);
+
+ // strncmp to memcmp
+ if (!HasStr1 && HasStr2) {
+ Len2 = std::min(Len2, Length);
+ if (canTransformToMemCmp(CI, Str1P, Len2, DL))
+ return copyFlags(
+ *CI,
+ emitMemCmp(Str1P, Str2P,
+ ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len2),
+ B, DL, TLI));
+ } else if (HasStr1 && !HasStr2) {
+ Len1 = std::min(Len1, Length);
+ if (canTransformToMemCmp(CI, Str2P, Len1, DL))
+ return copyFlags(
+ *CI,
+ emitMemCmp(Str1P, Str2P,
+ ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len1),
+ B, DL, TLI));
+ }
+
+ return nullptr;
+}
+
+Value *LibCallSimplifier::optimizeStrNDup(CallInst *CI, IRBuilderBase &B) {
+ Value *Src = CI->getArgOperand(0);
+ ConstantInt *Size = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+ uint64_t SrcLen = GetStringLength(Src);
+ if (SrcLen && Size) {
+ annotateDereferenceableBytes(CI, 0, SrcLen);
+ if (SrcLen <= Size->getZExtValue() + 1)
+ return copyFlags(*CI, emitStrDup(Src, B, TLI));
+ }
+
+ return nullptr;
+}
+
+Value *LibCallSimplifier::optimizeStrCpy(CallInst *CI, IRBuilderBase &B) {
+ Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
+ if (Dst == Src) // strcpy(x,x) -> x
+ return Src;
+
+ annotateNonNullNoUndefBasedOnAccess(CI, {0, 1});
+ // See if we can get the length of the input string.
+ uint64_t Len = GetStringLength(Src);
+ if (Len)
+ annotateDereferenceableBytes(CI, 1, Len);
+ else
+ return nullptr;
+
+ // We have enough information to now generate the memcpy call to do the
+ // copy for us. Make a memcpy to copy the nul byte with align = 1.
+ CallInst *NewCI =
+ B.CreateMemCpy(Dst, Align(1), Src, Align(1),
+ ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len));
+ mergeAttributesAndFlags(NewCI, *CI);
+ return Dst;
+}
+
+Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilderBase &B) {
+ Function *Callee = CI->getCalledFunction();
+ Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
+
+ // stpcpy(d,s) -> strcpy(d,s) if the result is not used.
+ if (CI->use_empty())
+ return copyFlags(*CI, emitStrCpy(Dst, Src, B, TLI));
+
+ if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x)
+ Value *StrLen = emitStrLen(Src, B, DL, TLI);
+ return StrLen ? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, StrLen) : nullptr;
+ }
+
+ // See if we can get the length of the input string.
+ uint64_t Len = GetStringLength(Src);
+ if (Len)
+ annotateDereferenceableBytes(CI, 1, Len);
+ else
+ return nullptr;
+
+ Type *PT = Callee->getFunctionType()->getParamType(0);
+ Value *LenV = ConstantInt::get(DL.getIntPtrType(PT), Len);
+ Value *DstEnd = B.CreateInBoundsGEP(
+ B.getInt8Ty(), Dst, ConstantInt::get(DL.getIntPtrType(PT), Len - 1));
+
+ // We have enough information to now generate the memcpy call to do the
+ // copy for us. Make a memcpy to copy the nul byte with align = 1.
+ CallInst *NewCI = B.CreateMemCpy(Dst, Align(1), Src, Align(1), LenV);
+ mergeAttributesAndFlags(NewCI, *CI);
+ return DstEnd;
+}
+
+// Optimize a call to size_t strlcpy(char*, const char*, size_t).
+
+Value *LibCallSimplifier::optimizeStrLCpy(CallInst *CI, IRBuilderBase &B) {
+ Value *Size = CI->getArgOperand(2);
+ if (isKnownNonZero(Size, DL))
+ // Like snprintf, the function stores into the destination only when
+ // the size argument is nonzero.
+ annotateNonNullNoUndefBasedOnAccess(CI, 0);
+ // The function reads the source argument regardless of Size (it returns
+ // its length).
+ annotateNonNullNoUndefBasedOnAccess(CI, 1);
+
+ uint64_t NBytes;
+ if (ConstantInt *SizeC = dyn_cast<ConstantInt>(Size))
+ NBytes = SizeC->getZExtValue();
+ else
+ return nullptr;
+
+ Value *Dst = CI->getArgOperand(0);
+ Value *Src = CI->getArgOperand(1);
+ if (NBytes <= 1) {
+ if (NBytes == 1)
+ // For a call to strlcpy(D, S, 1) first store a nul in *D.
+ B.CreateStore(B.getInt8(0), Dst);
+
+ // Transform strlcpy(D, S, 0) to a call to strlen(S).
+ return copyFlags(*CI, emitStrLen(Src, B, DL, TLI));
+ }
+
+ // Try to determine the length of the source, substituting its size
+ // when it's not nul-terminated (as it's required to be) to avoid
+ // reading past its end.
+ StringRef Str;
+ if (!getConstantStringInfo(Src, Str, /*TrimAtNul=*/false))
+ return nullptr;
+
+ uint64_t SrcLen = Str.find('\0');
+ // Set if the terminating nul should be copied by the call to memcpy
+ // below.
+ bool NulTerm = SrcLen < NBytes;
+
+ if (NulTerm)
+ // Overwrite NBytes with the number of bytes to copy, including
+ // the terminating nul.
+ NBytes = SrcLen + 1;
+ else {
+ // Set the length of the source for the function to return to its
+ // size, and cap NBytes at the same.
+ SrcLen = std::min(SrcLen, uint64_t(Str.size()));
+ NBytes = std::min(NBytes - 1, SrcLen);
+ }
+
+ if (SrcLen == 0) {
+ // Transform strlcpy(D, "", N) to (*D = '\0, 0).
+ B.CreateStore(B.getInt8(0), Dst);
+ return ConstantInt::get(CI->getType(), 0);
+ }
+
+ Function *Callee = CI->getCalledFunction();
+ Type *PT = Callee->getFunctionType()->getParamType(0);
+ // Transform strlcpy(D, S, N) to memcpy(D, S, N') where N' is the lower
+ // bound on strlen(S) + 1 and N, optionally followed by a nul store to
+ // D[N' - 1] if necessary.
+ CallInst *NewCI = B.CreateMemCpy(Dst, Align(1), Src, Align(1),
+ ConstantInt::get(DL.getIntPtrType(PT), NBytes));
+ mergeAttributesAndFlags(NewCI, *CI);
+
+ if (!NulTerm) {
+ Value *EndOff = ConstantInt::get(CI->getType(), NBytes);
+ Value *EndPtr = B.CreateInBoundsGEP(B.getInt8Ty(), Dst, EndOff);
+ B.CreateStore(B.getInt8(0), EndPtr);
+ }
+
+ // Like snprintf, strlcpy returns the number of nonzero bytes that would
+ // have been copied if the bound had been sufficiently big (which in this
+ // case is strlen(Src)).
+ return ConstantInt::get(CI->getType(), SrcLen);
+}
+
+// Optimize a call CI to either stpncpy when RetEnd is true, or to strncpy
+// otherwise.
+Value *LibCallSimplifier::optimizeStringNCpy(CallInst *CI, bool RetEnd,
+ IRBuilderBase &B) {
+ Function *Callee = CI->getCalledFunction();
+ Value *Dst = CI->getArgOperand(0);
+ Value *Src = CI->getArgOperand(1);
+ Value *Size = CI->getArgOperand(2);
+
+ if (isKnownNonZero(Size, DL)) {
+ // Both st{p,r}ncpy(D, S, N) access the source and destination arrays
+ // only when N is nonzero.
+ annotateNonNullNoUndefBasedOnAccess(CI, 0);
+ annotateNonNullNoUndefBasedOnAccess(CI, 1);
+ }
+
+ // If the "bound" argument is known set N to it. Otherwise set it to
+ // UINT64_MAX and handle it later.
+ uint64_t N = UINT64_MAX;
+ if (ConstantInt *SizeC = dyn_cast<ConstantInt>(Size))
+ N = SizeC->getZExtValue();
+
+ if (N == 0)
+ // Fold st{p,r}ncpy(D, S, 0) to D.
+ return Dst;
+
+ if (N == 1) {
+ Type *CharTy = B.getInt8Ty();
+ Value *CharVal = B.CreateLoad(CharTy, Src, "stxncpy.char0");
+ B.CreateStore(CharVal, Dst);
+ if (!RetEnd)
+ // Transform strncpy(D, S, 1) to return (*D = *S), D.
+ return Dst;
+
+ // Transform stpncpy(D, S, 1) to return (*D = *S) ? D + 1 : D.
+ Value *ZeroChar = ConstantInt::get(CharTy, 0);
+ Value *Cmp = B.CreateICmpEQ(CharVal, ZeroChar, "stpncpy.char0cmp");
+
+ Value *Off1 = B.getInt32(1);
+ Value *EndPtr = B.CreateInBoundsGEP(CharTy, Dst, Off1, "stpncpy.end");
+ return B.CreateSelect(Cmp, Dst, EndPtr, "stpncpy.sel");
+ }
+
+ // If the length of the input string is known set SrcLen to it.
+ uint64_t SrcLen = GetStringLength(Src);
+ if (SrcLen)
+ annotateDereferenceableBytes(CI, 1, SrcLen);
+ else
+ return nullptr;
+
+ --SrcLen; // Unbias length.
+
+ if (SrcLen == 0) {
+ // Transform st{p,r}ncpy(D, "", N) to memset(D, '\0', N) for any N.
+ Align MemSetAlign =
+ CI->getAttributes().getParamAttrs(0).getAlignment().valueOrOne();
+ CallInst *NewCI = B.CreateMemSet(Dst, B.getInt8('\0'), Size, MemSetAlign);
+ AttrBuilder ArgAttrs(CI->getContext(), CI->getAttributes().getParamAttrs(0));
+ NewCI->setAttributes(NewCI->getAttributes().addParamAttributes(
+ CI->getContext(), 0, ArgAttrs));
+ copyFlags(*CI, NewCI);
+ return Dst;
+ }
+
+ if (N > SrcLen + 1) {
+ if (N > 128)
+ // Bail if N is large or unknown.
+ return nullptr;
+
+ // st{p,r}ncpy(D, "a", N) -> memcpy(D, "a\0\0\0", N) for N <= 128.
+ StringRef Str;
+ if (!getConstantStringInfo(Src, Str))
+ return nullptr;
+ std::string SrcStr = Str.str();
+ // Create a bigger, nul-padded array with the same length, SrcLen,
+ // as the original string.
+ SrcStr.resize(N, '\0');
+ Src = B.CreateGlobalString(SrcStr, "str");
+ }
+
+ Type *PT = Callee->getFunctionType()->getParamType(0);
+ // st{p,r}ncpy(D, S, N) -> memcpy(align 1 D, align 1 S, N) when both
+ // S and N are constant.
+ CallInst *NewCI = B.CreateMemCpy(Dst, Align(1), Src, Align(1),
+ ConstantInt::get(DL.getIntPtrType(PT), N));
+ mergeAttributesAndFlags(NewCI, *CI);
+ if (!RetEnd)
+ return Dst;
+
+ // stpncpy(D, S, N) returns the address of the first null in D if it writes
+ // one, otherwise D + N.
+ Value *Off = B.getInt64(std::min(SrcLen, N));
+ return B.CreateInBoundsGEP(B.getInt8Ty(), Dst, Off, "endptr");
+}
+
+Value *LibCallSimplifier::optimizeStringLength(CallInst *CI, IRBuilderBase &B,
+ unsigned CharSize,
+ Value *Bound) {
+ Value *Src = CI->getArgOperand(0);
+ Type *CharTy = B.getIntNTy(CharSize);
+
+ if (isOnlyUsedInZeroEqualityComparison(CI) &&
+ (!Bound || isKnownNonZero(Bound, DL))) {
+ // Fold strlen:
+ // strlen(x) != 0 --> *x != 0
+ // strlen(x) == 0 --> *x == 0
+ // and likewise strnlen with constant N > 0:
+ // strnlen(x, N) != 0 --> *x != 0
+ // strnlen(x, N) == 0 --> *x == 0
+ return B.CreateZExt(B.CreateLoad(CharTy, Src, "char0"),
+ CI->getType());
+ }
+
+ if (Bound) {
+ if (ConstantInt *BoundCst = dyn_cast<ConstantInt>(Bound)) {
+ if (BoundCst->isZero())
+ // Fold strnlen(s, 0) -> 0 for any s, constant or otherwise.
+ return ConstantInt::get(CI->getType(), 0);
+
+ if (BoundCst->isOne()) {
+ // Fold strnlen(s, 1) -> *s ? 1 : 0 for any s.
+ Value *CharVal = B.CreateLoad(CharTy, Src, "strnlen.char0");
+ Value *ZeroChar = ConstantInt::get(CharTy, 0);
+ Value *Cmp = B.CreateICmpNE(CharVal, ZeroChar, "strnlen.char0cmp");
+ return B.CreateZExt(Cmp, CI->getType());
+ }
+ }
+ }
+
+ if (uint64_t Len = GetStringLength(Src, CharSize)) {
+ Value *LenC = ConstantInt::get(CI->getType(), Len - 1);
+ // Fold strlen("xyz") -> 3 and strnlen("xyz", 2) -> 2
+ // and strnlen("xyz", Bound) -> min(3, Bound) for nonconstant Bound.
+ if (Bound)
+ return B.CreateBinaryIntrinsic(Intrinsic::umin, LenC, Bound);
+ return LenC;
+ }
+
+ if (Bound)
+ // Punt for strnlen for now.
+ return nullptr;
+
+ // If s is a constant pointer pointing to a string literal, we can fold
+ // strlen(s + x) to strlen(s) - x, when x is known to be in the range
+ // [0, strlen(s)] or the string has a single null terminator '\0' at the end.
+ // We only try to simplify strlen when the pointer s points to an array
+ // of CharSize elements. Otherwise, we would need to scale the offset x before
+ // doing the subtraction. This will make the optimization more complex, and
+ // it's not very useful because calling strlen for a pointer of other types is
+ // very uncommon.
+ if (GEPOperator *GEP = dyn_cast<GEPOperator>(Src)) {
+ // TODO: Handle subobjects.
+ if (!isGEPBasedOnPointerToString(GEP, CharSize))
+ return nullptr;
+
+ ConstantDataArraySlice Slice;
+ if (getConstantDataArrayInfo(GEP->getOperand(0), Slice, CharSize)) {
+ uint64_t NullTermIdx;
+ if (Slice.Array == nullptr) {
+ NullTermIdx = 0;
+ } else {
+ NullTermIdx = ~((uint64_t)0);
+ for (uint64_t I = 0, E = Slice.Length; I < E; ++I) {
+ if (Slice.Array->getElementAsInteger(I + Slice.Offset) == 0) {
+ NullTermIdx = I;
+ break;
+ }
+ }
+ // If the string does not have '\0', leave it to strlen to compute
+ // its length.
+ if (NullTermIdx == ~((uint64_t)0))
+ return nullptr;
+ }
+
+ Value *Offset = GEP->getOperand(2);
+ KnownBits Known = computeKnownBits(Offset, DL, 0, nullptr, CI, nullptr);
+ uint64_t ArrSize =
+ cast<ArrayType>(GEP->getSourceElementType())->getNumElements();
+
+ // If Offset is not provably in the range [0, NullTermIdx], we can still
+ // optimize if we can prove that the program has undefined behavior when
+ // Offset is outside that range. That is the case when GEP->getOperand(0)
+ // is a pointer to an object whose memory extent is NullTermIdx+1.
+ if ((Known.isNonNegative() && Known.getMaxValue().ule(NullTermIdx)) ||
+ (isa<GlobalVariable>(GEP->getOperand(0)) &&
+ NullTermIdx == ArrSize - 1)) {
+ Offset = B.CreateSExtOrTrunc(Offset, CI->getType());
+ return B.CreateSub(ConstantInt::get(CI->getType(), NullTermIdx),
+ Offset);
+ }
+ }
+ }
+
+ // strlen(x?"foo":"bars") --> x ? 3 : 4
+ if (SelectInst *SI = dyn_cast<SelectInst>(Src)) {
+ uint64_t LenTrue = GetStringLength(SI->getTrueValue(), CharSize);
+ uint64_t LenFalse = GetStringLength(SI->getFalseValue(), CharSize);
+ if (LenTrue && LenFalse) {
+ ORE.emit([&]() {
+ return OptimizationRemark("instcombine", "simplify-libcalls", CI)
+ << "folded strlen(select) to select of constants";
+ });
+ return B.CreateSelect(SI->getCondition(),
+ ConstantInt::get(CI->getType(), LenTrue - 1),
+ ConstantInt::get(CI->getType(), LenFalse - 1));
+ }
+ }
+
+ return nullptr;
+}
+
+Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilderBase &B) {
+ if (Value *V = optimizeStringLength(CI, B, 8))
+ return V;
+ annotateNonNullNoUndefBasedOnAccess(CI, 0);
+ return nullptr;
+}
+
+Value *LibCallSimplifier::optimizeStrNLen(CallInst *CI, IRBuilderBase &B) {
+ Value *Bound = CI->getArgOperand(1);
+ if (Value *V = optimizeStringLength(CI, B, 8, Bound))
+ return V;
+
+ if (isKnownNonZero(Bound, DL))
+ annotateNonNullNoUndefBasedOnAccess(CI, 0);
+ return nullptr;
+}
+
+Value *LibCallSimplifier::optimizeWcslen(CallInst *CI, IRBuilderBase &B) {
+ Module &M = *CI->getModule();
+ unsigned WCharSize = TLI->getWCharSize(M) * 8;
+ // We cannot perform this optimization without wchar_size metadata.
+ if (WCharSize == 0)
+ return nullptr;
+
+ return optimizeStringLength(CI, B, WCharSize);
+}
+
+Value *LibCallSimplifier::optimizeStrPBrk(CallInst *CI, IRBuilderBase &B) {
+ StringRef S1, S2;
+ bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
+ bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2);
+
+ // strpbrk(s, "") -> nullptr
+ // strpbrk("", s) -> nullptr
+ if ((HasS1 && S1.empty()) || (HasS2 && S2.empty()))
+ return Constant::getNullValue(CI->getType());
+
+ // Constant folding.
+ if (HasS1 && HasS2) {
+ size_t I = S1.find_first_of(S2);
+ if (I == StringRef::npos) // No match.
+ return Constant::getNullValue(CI->getType());
+
+ return B.CreateInBoundsGEP(B.getInt8Ty(), CI->getArgOperand(0),
+ B.getInt64(I), "strpbrk");
+ }
+
+ // strpbrk(s, "a") -> strchr(s, 'a')
+ if (HasS2 && S2.size() == 1)
+ return copyFlags(*CI, emitStrChr(CI->getArgOperand(0), S2[0], B, TLI));
+
+ return nullptr;
+}
+
+Value *LibCallSimplifier::optimizeStrTo(CallInst *CI, IRBuilderBase &B) {
+ Value *EndPtr = CI->getArgOperand(1);
+ if (isa<ConstantPointerNull>(EndPtr)) {
+ // With a null EndPtr, this function won't capture the main argument.
+ // It would be readonly too, except that it still may write to errno.
+ CI->addParamAttr(0, Attribute::NoCapture);
+ }
+
+ return nullptr;
+}
+
+Value *LibCallSimplifier::optimizeStrSpn(CallInst *CI, IRBuilderBase &B) {
+ StringRef S1, S2;
+ bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
+ bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2);
+
+ // strspn(s, "") -> 0
+ // strspn("", s) -> 0
+ if ((HasS1 && S1.empty()) || (HasS2 && S2.empty()))
+ return Constant::getNullValue(CI->getType());
+
+ // Constant folding.
+ if (HasS1 && HasS2) {
+ size_t Pos = S1.find_first_not_of(S2);
+ if (Pos == StringRef::npos)
+ Pos = S1.size();
+ return ConstantInt::get(CI->getType(), Pos);
+ }
+
+ return nullptr;
+}
+
+Value *LibCallSimplifier::optimizeStrCSpn(CallInst *CI, IRBuilderBase &B) {
+ StringRef S1, S2;
+ bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
+ bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2);
+
+ // strcspn("", s) -> 0
+ if (HasS1 && S1.empty())
+ return Constant::getNullValue(CI->getType());
+
+ // Constant folding.
+ if (HasS1 && HasS2) {
+ size_t Pos = S1.find_first_of(S2);
+ if (Pos == StringRef::npos)
+ Pos = S1.size();
+ return ConstantInt::get(CI->getType(), Pos);
+ }
+
+ // strcspn(s, "") -> strlen(s)
+ if (HasS2 && S2.empty())
+ return copyFlags(*CI, emitStrLen(CI->getArgOperand(0), B, DL, TLI));
+
+ return nullptr;
+}
+
+Value *LibCallSimplifier::optimizeStrStr(CallInst *CI, IRBuilderBase &B) {
+ // fold strstr(x, x) -> x.
+ if (CI->getArgOperand(0) == CI->getArgOperand(1))
+ return B.CreateBitCast(CI->getArgOperand(0), CI->getType());
+
+ // fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0
+ if (isOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) {
+ Value *StrLen = emitStrLen(CI->getArgOperand(1), B, DL, TLI);
+ if (!StrLen)
+ return nullptr;
+ Value *StrNCmp = emitStrNCmp(CI->getArgOperand(0), CI->getArgOperand(1),
+ StrLen, B, DL, TLI);
+ if (!StrNCmp)
+ return nullptr;
+ for (User *U : llvm::make_early_inc_range(CI->users())) {
+ ICmpInst *Old = cast<ICmpInst>(U);
+ Value *Cmp =
+ B.CreateICmp(Old->getPredicate(), StrNCmp,
+ ConstantInt::getNullValue(StrNCmp->getType()), "cmp");
+ replaceAllUsesWith(Old, Cmp);
+ }
+ return CI;
+ }
+
+ // See if either input string is a constant string.
+ StringRef SearchStr, ToFindStr;
+ bool HasStr1 = getConstantStringInfo(CI->getArgOperand(0), SearchStr);
+ bool HasStr2 = getConstantStringInfo(CI->getArgOperand(1), ToFindStr);
+
+ // fold strstr(x, "") -> x.
+ if (HasStr2 && ToFindStr.empty())
+ return B.CreateBitCast(CI->getArgOperand(0), CI->getType());
+
+ // If both strings are known, constant fold it.
+ if (HasStr1 && HasStr2) {
+ size_t Offset = SearchStr.find(ToFindStr);
+
+ if (Offset == StringRef::npos) // strstr("foo", "bar") -> null
+ return Constant::getNullValue(CI->getType());
+
+ // strstr("abcd", "bc") -> gep((char*)"abcd", 1)
+ Value *Result = castToCStr(CI->getArgOperand(0), B);
+ Result =
+ B.CreateConstInBoundsGEP1_64(B.getInt8Ty(), Result, Offset, "strstr");
+ return B.CreateBitCast(Result, CI->getType());
+ }
+
+ // fold strstr(x, "y") -> strchr(x, 'y').
+ if (HasStr2 && ToFindStr.size() == 1) {
+ Value *StrChr = emitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TLI);
+ return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : nullptr;
+ }
+
+ annotateNonNullNoUndefBasedOnAccess(CI, {0, 1});
+ return nullptr;
+}
+
+Value *LibCallSimplifier::optimizeMemRChr(CallInst *CI, IRBuilderBase &B) {
+ Value *SrcStr = CI->getArgOperand(0);
+ Value *Size = CI->getArgOperand(2);
+ annotateNonNullAndDereferenceable(CI, 0, Size, DL);
+ Value *CharVal = CI->getArgOperand(1);
+ ConstantInt *LenC = dyn_cast<ConstantInt>(Size);
+ Value *NullPtr = Constant::getNullValue(CI->getType());
+
+ if (LenC) {
+ if (LenC->isZero())
+ // Fold memrchr(x, y, 0) --> null.
+ return NullPtr;
+
+ if (LenC->isOne()) {
+ // Fold memrchr(x, y, 1) --> *x == y ? x : null for any x and y,
+ // constant or otherwise.
+ Value *Val = B.CreateLoad(B.getInt8Ty(), SrcStr, "memrchr.char0");
+ // Slice off the character's high end bits.
+ CharVal = B.CreateTrunc(CharVal, B.getInt8Ty());
+ Value *Cmp = B.CreateICmpEQ(Val, CharVal, "memrchr.char0cmp");
+ return B.CreateSelect(Cmp, SrcStr, NullPtr, "memrchr.sel");
+ }
+ }
+
+ StringRef Str;
+ if (!getConstantStringInfo(SrcStr, Str, /*TrimAtNul=*/false))
+ return nullptr;
+
+ if (Str.size() == 0)
+ // If the array is empty fold memrchr(A, C, N) to null for any value
+ // of C and N on the basis that the only valid value of N is zero
+ // (otherwise the call is undefined).
+ return NullPtr;
+
+ uint64_t EndOff = UINT64_MAX;
+ if (LenC) {
+ EndOff = LenC->getZExtValue();
+ if (Str.size() < EndOff)
+ // Punt out-of-bounds accesses to sanitizers and/or libc.
+ return nullptr;
+ }
+
+ if (ConstantInt *CharC = dyn_cast<ConstantInt>(CharVal)) {
+ // Fold memrchr(S, C, N) for a constant C.
+ size_t Pos = Str.rfind(CharC->getZExtValue(), EndOff);
+ if (Pos == StringRef::npos)
+ // When the character is not in the source array fold the result
+ // to null regardless of Size.
+ return NullPtr;
+
+ if (LenC)
+ // Fold memrchr(s, c, N) --> s + Pos for constant N > Pos.
+ return B.CreateInBoundsGEP(B.getInt8Ty(), SrcStr, B.getInt64(Pos));
+
+ if (Str.find(Str[Pos]) == Pos) {
+ // When there is just a single occurrence of C in S, i.e., the one
+ // in Str[Pos], fold
+ // memrchr(s, c, N) --> N <= Pos ? null : s + Pos
+ // for nonconstant N.
+ Value *Cmp = B.CreateICmpULE(Size, ConstantInt::get(Size->getType(), Pos),
+ "memrchr.cmp");
+ Value *SrcPlus = B.CreateInBoundsGEP(B.getInt8Ty(), SrcStr,
+ B.getInt64(Pos), "memrchr.ptr_plus");
+ return B.CreateSelect(Cmp, NullPtr, SrcPlus, "memrchr.sel");
+ }
+ }
+
+ // Truncate the string to search at most EndOff characters.
+ Str = Str.substr(0, EndOff);
+ if (Str.find_first_not_of(Str[0]) != StringRef::npos)
+ return nullptr;
+
+ // If the source array consists of all equal characters, then for any
+ // C and N (whether in bounds or not), fold memrchr(S, C, N) to
+ // N != 0 && *S == C ? S + N - 1 : null
+ Type *SizeTy = Size->getType();
+ Type *Int8Ty = B.getInt8Ty();
+ Value *NNeZ = B.CreateICmpNE(Size, ConstantInt::get(SizeTy, 0));
+ // Slice off the sought character's high end bits.
+ CharVal = B.CreateTrunc(CharVal, Int8Ty);
+ Value *CEqS0 = B.CreateICmpEQ(ConstantInt::get(Int8Ty, Str[0]), CharVal);
+ Value *And = B.CreateLogicalAnd(NNeZ, CEqS0);
+ Value *SizeM1 = B.CreateSub(Size, ConstantInt::get(SizeTy, 1));
+ Value *SrcPlus =
+ B.CreateInBoundsGEP(Int8Ty, SrcStr, SizeM1, "memrchr.ptr_plus");
+ return B.CreateSelect(And, SrcPlus, NullPtr, "memrchr.sel");
+}
+
+Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilderBase &B) {
+ Value *SrcStr = CI->getArgOperand(0);
+ Value *Size = CI->getArgOperand(2);
+
+ if (isKnownNonZero(Size, DL)) {
+ annotateNonNullNoUndefBasedOnAccess(CI, 0);
+ if (isOnlyUsedInEqualityComparison(CI, SrcStr))
+ return memChrToCharCompare(CI, Size, B, DL);
+ }
+
+ Value *CharVal = CI->getArgOperand(1);
+ ConstantInt *CharC = dyn_cast<ConstantInt>(CharVal);
+ ConstantInt *LenC = dyn_cast<ConstantInt>(Size);
+ Value *NullPtr = Constant::getNullValue(CI->getType());
+
+ // memchr(x, y, 0) -> null
+ if (LenC) {
+ if (LenC->isZero())
+ return NullPtr;
+
+ if (LenC->isOne()) {
+ // Fold memchr(x, y, 1) --> *x == y ? x : null for any x and y,
+ // constant or otherwise.
+ Value *Val = B.CreateLoad(B.getInt8Ty(), SrcStr, "memchr.char0");
+ // Slice off the character's high end bits.
+ CharVal = B.CreateTrunc(CharVal, B.getInt8Ty());
+ Value *Cmp = B.CreateICmpEQ(Val, CharVal, "memchr.char0cmp");
+ return B.CreateSelect(Cmp, SrcStr, NullPtr, "memchr.sel");
+ }
+ }
+
+ StringRef Str;
+ if (!getConstantStringInfo(SrcStr, Str, /*TrimAtNul=*/false))
+ return nullptr;
+
+ if (CharC) {
+ size_t Pos = Str.find(CharC->getZExtValue());
+ if (Pos == StringRef::npos)
+ // When the character is not in the source array fold the result
+ // to null regardless of Size.
+ return NullPtr;
+
+ // Fold memchr(s, c, n) -> n <= Pos ? null : s + Pos
+ // When the constant Size is less than or equal to the character
+ // position also fold the result to null.
+ Value *Cmp = B.CreateICmpULE(Size, ConstantInt::get(Size->getType(), Pos),
+ "memchr.cmp");
+ Value *SrcPlus = B.CreateInBoundsGEP(B.getInt8Ty(), SrcStr, B.getInt64(Pos),
+ "memchr.ptr");
+ return B.CreateSelect(Cmp, NullPtr, SrcPlus);
+ }
+
+ if (Str.size() == 0)
+ // If the array is empty fold memchr(A, C, N) to null for any value
+ // of C and N on the basis that the only valid value of N is zero
+ // (otherwise the call is undefined).
+ return NullPtr;
+
+ if (LenC)
+ Str = substr(Str, LenC->getZExtValue());
+
+ size_t Pos = Str.find_first_not_of(Str[0]);
+ if (Pos == StringRef::npos
+ || Str.find_first_not_of(Str[Pos], Pos) == StringRef::npos) {
+ // If the source array consists of at most two consecutive sequences
+ // of the same characters, then for any C and N (whether in bounds or
+ // not), fold memchr(S, C, N) to
+ // N != 0 && *S == C ? S : null
+ // or for the two sequences to:
+ // N != 0 && *S == C ? S : (N > Pos && S[Pos] == C ? S + Pos : null)
+ // ^Sel2 ^Sel1 are denoted above.
+ // The latter makes it also possible to fold strchr() calls with strings
+ // of the same characters.
+ Type *SizeTy = Size->getType();
+ Type *Int8Ty = B.getInt8Ty();
+
+ // Slice off the sought character's high end bits.
+ CharVal = B.CreateTrunc(CharVal, Int8Ty);
+
+ Value *Sel1 = NullPtr;
+ if (Pos != StringRef::npos) {
+ // Handle two consecutive sequences of the same characters.
+ Value *PosVal = ConstantInt::get(SizeTy, Pos);
+ Value *StrPos = ConstantInt::get(Int8Ty, Str[Pos]);
+ Value *CEqSPos = B.CreateICmpEQ(CharVal, StrPos);
+ Value *NGtPos = B.CreateICmp(ICmpInst::ICMP_UGT, Size, PosVal);
+ Value *And = B.CreateAnd(CEqSPos, NGtPos);
+ Value *SrcPlus = B.CreateInBoundsGEP(B.getInt8Ty(), SrcStr, PosVal);
+ Sel1 = B.CreateSelect(And, SrcPlus, NullPtr, "memchr.sel1");
+ }
+
+ Value *Str0 = ConstantInt::get(Int8Ty, Str[0]);
+ Value *CEqS0 = B.CreateICmpEQ(Str0, CharVal);
+ Value *NNeZ = B.CreateICmpNE(Size, ConstantInt::get(SizeTy, 0));
+ Value *And = B.CreateAnd(NNeZ, CEqS0);
+ return B.CreateSelect(And, SrcStr, Sel1, "memchr.sel2");
+ }
+
+ if (!LenC) {
+ if (isOnlyUsedInEqualityComparison(CI, SrcStr))
+ // S is dereferenceable so it's safe to load from it and fold
+ // memchr(S, C, N) == S to N && *S == C for any C and N.
+ // TODO: This is safe even even for nonconstant S.
+ return memChrToCharCompare(CI, Size, B, DL);
+
+ // From now on we need a constant length and constant array.
+ return nullptr;
+ }
+
+ // If the char is variable but the input str and length are not we can turn
+ // this memchr call into a simple bit field test. Of course this only works
+ // when the return value is only checked against null.
+ //
+ // It would be really nice to reuse switch lowering here but we can't change
+ // the CFG at this point.
+ //
+ // memchr("\r\n", C, 2) != nullptr -> (1 << C & ((1 << '\r') | (1 << '\n')))
+ // != 0
+ // after bounds check.
+ if (Str.empty() || !isOnlyUsedInZeroEqualityComparison(CI))
+ return nullptr;
+
+ unsigned char Max =
+ *std::max_element(reinterpret_cast<const unsigned char *>(Str.begin()),
+ reinterpret_cast<const unsigned char *>(Str.end()));
+
+ // Make sure the bit field we're about to create fits in a register on the
+ // target.
+ // FIXME: On a 64 bit architecture this prevents us from using the
+ // interesting range of alpha ascii chars. We could do better by emitting
+ // two bitfields or shifting the range by 64 if no lower chars are used.
+ if (!DL.fitsInLegalInteger(Max + 1))
+ return nullptr;
+
+ // For the bit field use a power-of-2 type with at least 8 bits to avoid
+ // creating unnecessary illegal types.
+ unsigned char Width = NextPowerOf2(std::max((unsigned char)7, Max));
+
+ // Now build the bit field.
+ APInt Bitfield(Width, 0);
+ for (char C : Str)
+ Bitfield.setBit((unsigned char)C);
+ Value *BitfieldC = B.getInt(Bitfield);
+
+ // Adjust width of "C" to the bitfield width, then mask off the high bits.
+ Value *C = B.CreateZExtOrTrunc(CharVal, BitfieldC->getType());
+ C = B.CreateAnd(C, B.getIntN(Width, 0xFF));
+
+ // First check that the bit field access is within bounds.
+ Value *Bounds = B.CreateICmp(ICmpInst::ICMP_ULT, C, B.getIntN(Width, Width),
+ "memchr.bounds");
+
+ // Create code that checks if the given bit is set in the field.
+ Value *Shl = B.CreateShl(B.getIntN(Width, 1ULL), C);
+ Value *Bits = B.CreateIsNotNull(B.CreateAnd(Shl, BitfieldC), "memchr.bits");
+
+ // Finally merge both checks and cast to pointer type. The inttoptr
+ // implicitly zexts the i1 to intptr type.
+ return B.CreateIntToPtr(B.CreateLogicalAnd(Bounds, Bits, "memchr"),
+ CI->getType());
+}
+
+// Optimize a memcmp or, when StrNCmp is true, strncmp call CI with constant
+// arrays LHS and RHS and nonconstant Size.
+static Value *optimizeMemCmpVarSize(CallInst *CI, Value *LHS, Value *RHS,
+ Value *Size, bool StrNCmp,
+ IRBuilderBase &B, const DataLayout &DL) {
+ if (LHS == RHS) // memcmp(s,s,x) -> 0
+ return Constant::getNullValue(CI->getType());
+
+ StringRef LStr, RStr;
+ if (!getConstantStringInfo(LHS, LStr, /*TrimAtNul=*/false) ||
+ !getConstantStringInfo(RHS, RStr, /*TrimAtNul=*/false))
+ return nullptr;
+
+ // If the contents of both constant arrays are known, fold a call to
+ // memcmp(A, B, N) to
+ // N <= Pos ? 0 : (A < B ? -1 : B < A ? +1 : 0)
+ // where Pos is the first mismatch between A and B, determined below.
+
+ uint64_t Pos = 0;
+ Value *Zero = ConstantInt::get(CI->getType(), 0);
+ for (uint64_t MinSize = std::min(LStr.size(), RStr.size()); ; ++Pos) {
+ if (Pos == MinSize ||
+ (StrNCmp && (LStr[Pos] == '\0' && RStr[Pos] == '\0'))) {
+ // One array is a leading part of the other of equal or greater
+ // size, or for strncmp, the arrays are equal strings.
+ // Fold the result to zero. Size is assumed to be in bounds, since
+ // otherwise the call would be undefined.
+ return Zero;
+ }
+
+ if (LStr[Pos] != RStr[Pos])
+ break;
+ }
+
+ // Normalize the result.
+ typedef unsigned char UChar;
+ int IRes = UChar(LStr[Pos]) < UChar(RStr[Pos]) ? -1 : 1;
+ Value *MaxSize = ConstantInt::get(Size->getType(), Pos);
+ Value *Cmp = B.CreateICmp(ICmpInst::ICMP_ULE, Size, MaxSize);
+ Value *Res = ConstantInt::get(CI->getType(), IRes);
+ return B.CreateSelect(Cmp, Zero, Res);
+}
+
+// Optimize a memcmp call CI with constant size Len.
+static Value *optimizeMemCmpConstantSize(CallInst *CI, Value *LHS, Value *RHS,
+ uint64_t Len, IRBuilderBase &B,
+ const DataLayout &DL) {
+ if (Len == 0) // memcmp(s1,s2,0) -> 0
+ return Constant::getNullValue(CI->getType());
+
+ // memcmp(S1,S2,1) -> *(unsigned char*)LHS - *(unsigned char*)RHS
+ if (Len == 1) {
+ Value *LHSV =
+ B.CreateZExt(B.CreateLoad(B.getInt8Ty(), castToCStr(LHS, B), "lhsc"),
+ CI->getType(), "lhsv");
+ Value *RHSV =
+ B.CreateZExt(B.CreateLoad(B.getInt8Ty(), castToCStr(RHS, B), "rhsc"),
+ CI->getType(), "rhsv");
+ return B.CreateSub(LHSV, RHSV, "chardiff");
+ }
+
+ // memcmp(S1,S2,N/8)==0 -> (*(intN_t*)S1 != *(intN_t*)S2)==0
+ // TODO: The case where both inputs are constants does not need to be limited
+ // to legal integers or equality comparison. See block below this.
+ if (DL.isLegalInteger(Len * 8) && isOnlyUsedInZeroEqualityComparison(CI)) {
+ IntegerType *IntType = IntegerType::get(CI->getContext(), Len * 8);
+ Align PrefAlignment = DL.getPrefTypeAlign(IntType);
+
+ // First, see if we can fold either argument to a constant.
+ Value *LHSV = nullptr;
+ if (auto *LHSC = dyn_cast<Constant>(LHS)) {
+ LHSC = ConstantExpr::getBitCast(LHSC, IntType->getPointerTo());
+ LHSV = ConstantFoldLoadFromConstPtr(LHSC, IntType, DL);
+ }
+ Value *RHSV = nullptr;
+ if (auto *RHSC = dyn_cast<Constant>(RHS)) {
+ RHSC = ConstantExpr::getBitCast(RHSC, IntType->getPointerTo());
+ RHSV = ConstantFoldLoadFromConstPtr(RHSC, IntType, DL);
+ }
+
+ // Don't generate unaligned loads. If either source is constant data,
+ // alignment doesn't matter for that source because there is no load.
+ if ((LHSV || getKnownAlignment(LHS, DL, CI) >= PrefAlignment) &&
+ (RHSV || getKnownAlignment(RHS, DL, CI) >= PrefAlignment)) {
+ if (!LHSV) {
+ Type *LHSPtrTy =
+ IntType->getPointerTo(LHS->getType()->getPointerAddressSpace());
+ LHSV = B.CreateLoad(IntType, B.CreateBitCast(LHS, LHSPtrTy), "lhsv");
+ }
+ if (!RHSV) {
+ Type *RHSPtrTy =
+ IntType->getPointerTo(RHS->getType()->getPointerAddressSpace());
+ RHSV = B.CreateLoad(IntType, B.CreateBitCast(RHS, RHSPtrTy), "rhsv");
+ }
+ return B.CreateZExt(B.CreateICmpNE(LHSV, RHSV), CI->getType(), "memcmp");
+ }
+ }
+
+ return nullptr;
+}
+
+// Most simplifications for memcmp also apply to bcmp.
+Value *LibCallSimplifier::optimizeMemCmpBCmpCommon(CallInst *CI,
+ IRBuilderBase &B) {
+ Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1);
+ Value *Size = CI->getArgOperand(2);
+
+ annotateNonNullAndDereferenceable(CI, {0, 1}, Size, DL);
+
+ if (Value *Res = optimizeMemCmpVarSize(CI, LHS, RHS, Size, false, B, DL))
+ return Res;
+
+ // Handle constant Size.
+ ConstantInt *LenC = dyn_cast<ConstantInt>(Size);
+ if (!LenC)
+ return nullptr;
+
+ return optimizeMemCmpConstantSize(CI, LHS, RHS, LenC->getZExtValue(), B, DL);
+}
+
+Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilderBase &B) {
+ Module *M = CI->getModule();
+ if (Value *V = optimizeMemCmpBCmpCommon(CI, B))
+ return V;
+
+ // memcmp(x, y, Len) == 0 -> bcmp(x, y, Len) == 0
+ // bcmp can be more efficient than memcmp because it only has to know that
+ // there is a difference, not how different one is to the other.
+ if (isLibFuncEmittable(M, TLI, LibFunc_bcmp) &&
+ isOnlyUsedInZeroEqualityComparison(CI)) {
+ Value *LHS = CI->getArgOperand(0);
+ Value *RHS = CI->getArgOperand(1);
+ Value *Size = CI->getArgOperand(2);
+ return copyFlags(*CI, emitBCmp(LHS, RHS, Size, B, DL, TLI));
+ }
+
+ return nullptr;
+}
+
+Value *LibCallSimplifier::optimizeBCmp(CallInst *CI, IRBuilderBase &B) {
+ return optimizeMemCmpBCmpCommon(CI, B);
+}
+
+Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilderBase &B) {
+ Value *Size = CI->getArgOperand(2);
+ annotateNonNullAndDereferenceable(CI, {0, 1}, Size, DL);
+ if (isa<IntrinsicInst>(CI))
+ return nullptr;
+
+ // memcpy(x, y, n) -> llvm.memcpy(align 1 x, align 1 y, n)
+ CallInst *NewCI = B.CreateMemCpy(CI->getArgOperand(0), Align(1),
+ CI->getArgOperand(1), Align(1), Size);
+ mergeAttributesAndFlags(NewCI, *CI);
+ return CI->getArgOperand(0);
+}
+
+Value *LibCallSimplifier::optimizeMemCCpy(CallInst *CI, IRBuilderBase &B) {
+ Value *Dst = CI->getArgOperand(0);
+ Value *Src = CI->getArgOperand(1);
+ ConstantInt *StopChar = dyn_cast<ConstantInt>(CI->getArgOperand(2));
+ ConstantInt *N = dyn_cast<ConstantInt>(CI->getArgOperand(3));
+ StringRef SrcStr;
+ if (CI->use_empty() && Dst == Src)
+ return Dst;
+ // memccpy(d, s, c, 0) -> nullptr
+ if (N) {
+ if (N->isNullValue())
+ return Constant::getNullValue(CI->getType());
+ if (!getConstantStringInfo(Src, SrcStr, /*TrimAtNul=*/false) ||
+ // TODO: Handle zeroinitializer.
+ !StopChar)
+ return nullptr;
+ } else {
+ return nullptr;
+ }
+
+ // Wrap arg 'c' of type int to char
+ size_t Pos = SrcStr.find(StopChar->getSExtValue() & 0xFF);
+ if (Pos == StringRef::npos) {
+ if (N->getZExtValue() <= SrcStr.size()) {
+ copyFlags(*CI, B.CreateMemCpy(Dst, Align(1), Src, Align(1),
+ CI->getArgOperand(3)));
+ return Constant::getNullValue(CI->getType());
+ }
+ return nullptr;
+ }
+
+ Value *NewN =
+ ConstantInt::get(N->getType(), std::min(uint64_t(Pos + 1), N->getZExtValue()));
+ // memccpy -> llvm.memcpy
+ copyFlags(*CI, B.CreateMemCpy(Dst, Align(1), Src, Align(1), NewN));
+ return Pos + 1 <= N->getZExtValue()
+ ? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, NewN)
+ : Constant::getNullValue(CI->getType());
+}
+
+Value *LibCallSimplifier::optimizeMemPCpy(CallInst *CI, IRBuilderBase &B) {
+ Value *Dst = CI->getArgOperand(0);
+ Value *N = CI->getArgOperand(2);
+ // mempcpy(x, y, n) -> llvm.memcpy(align 1 x, align 1 y, n), x + n
+ CallInst *NewCI =
+ B.CreateMemCpy(Dst, Align(1), CI->getArgOperand(1), Align(1), N);
+ // Propagate attributes, but memcpy has no return value, so make sure that
+ // any return attributes are compliant.
+ // TODO: Attach return value attributes to the 1st operand to preserve them?
+ mergeAttributesAndFlags(NewCI, *CI);
+ return B.CreateInBoundsGEP(B.getInt8Ty(), Dst, N);
+}
+
+Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilderBase &B) {
+ Value *Size = CI->getArgOperand(2);
+ annotateNonNullAndDereferenceable(CI, {0, 1}, Size, DL);
+ if (isa<IntrinsicInst>(CI))
+ return nullptr;
+
+ // memmove(x, y, n) -> llvm.memmove(align 1 x, align 1 y, n)
+ CallInst *NewCI = B.CreateMemMove(CI->getArgOperand(0), Align(1),
+ CI->getArgOperand(1), Align(1), Size);
+ mergeAttributesAndFlags(NewCI, *CI);
+ return CI->getArgOperand(0);
+}
+
+Value *LibCallSimplifier::optimizeMemSet(CallInst *CI, IRBuilderBase &B) {
+ Value *Size = CI->getArgOperand(2);
+ annotateNonNullAndDereferenceable(CI, 0, Size, DL);
+ if (isa<IntrinsicInst>(CI))
+ return nullptr;
+
+ // memset(p, v, n) -> llvm.memset(align 1 p, v, n)
+ Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false);
+ CallInst *NewCI = B.CreateMemSet(CI->getArgOperand(0), Val, Size, Align(1));
+ mergeAttributesAndFlags(NewCI, *CI);
+ return CI->getArgOperand(0);
+}
+
+Value *LibCallSimplifier::optimizeRealloc(CallInst *CI, IRBuilderBase &B) {
+ if (isa<ConstantPointerNull>(CI->getArgOperand(0)))
+ return copyFlags(*CI, emitMalloc(CI->getArgOperand(1), B, DL, TLI));
+
+ return nullptr;
+}
+
+//===----------------------------------------------------------------------===//
+// Math Library Optimizations
+//===----------------------------------------------------------------------===//
+
+// Replace a libcall \p CI with a call to intrinsic \p IID
+static Value *replaceUnaryCall(CallInst *CI, IRBuilderBase &B,
+ Intrinsic::ID IID) {
+ // Propagate fast-math flags from the existing call to the new call.
+ IRBuilderBase::FastMathFlagGuard Guard(B);
+ B.setFastMathFlags(CI->getFastMathFlags());
+
+ Module *M = CI->getModule();
+ Value *V = CI->getArgOperand(0);
+ Function *F = Intrinsic::getDeclaration(M, IID, CI->getType());
+ CallInst *NewCall = B.CreateCall(F, V);
+ NewCall->takeName(CI);
+ return copyFlags(*CI, NewCall);
+}
+
+/// Return a variant of Val with float type.
+/// Currently this works in two cases: If Val is an FPExtension of a float
+/// value to something bigger, simply return the operand.
+/// If Val is a ConstantFP but can be converted to a float ConstantFP without
+/// loss of precision do so.
+static Value *valueHasFloatPrecision(Value *Val) {
+ if (FPExtInst *Cast = dyn_cast<FPExtInst>(Val)) {
+ Value *Op = Cast->getOperand(0);
+ if (Op->getType()->isFloatTy())
+ return Op;
+ }
+ if (ConstantFP *Const = dyn_cast<ConstantFP>(Val)) {
+ APFloat F = Const->getValueAPF();
+ bool losesInfo;
+ (void)F.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
+ &losesInfo);
+ if (!losesInfo)
+ return ConstantFP::get(Const->getContext(), F);
+ }
+ return nullptr;
+}
+
+/// Shrink double -> float functions.
+static Value *optimizeDoubleFP(CallInst *CI, IRBuilderBase &B,
+ bool isBinary, const TargetLibraryInfo *TLI,
+ bool isPrecise = false) {
+ Function *CalleeFn = CI->getCalledFunction();
+ if (!CI->getType()->isDoubleTy() || !CalleeFn)
+ return nullptr;
+
+ // If not all the uses of the function are converted to float, then bail out.
+ // This matters if the precision of the result is more important than the
+ // precision of the arguments.
+ if (isPrecise)
+ for (User *U : CI->users()) {
+ FPTruncInst *Cast = dyn_cast<FPTruncInst>(U);
+ if (!Cast || !Cast->getType()->isFloatTy())
+ return nullptr;
+ }
+
+ // If this is something like 'g((double) float)', convert to 'gf(float)'.
+ Value *V[2];
+ V[0] = valueHasFloatPrecision(CI->getArgOperand(0));
+ V[1] = isBinary ? valueHasFloatPrecision(CI->getArgOperand(1)) : nullptr;
+ if (!V[0] || (isBinary && !V[1]))
+ return nullptr;
+
+ // If call isn't an intrinsic, check that it isn't within a function with the
+ // same name as the float version of this call, otherwise the result is an
+ // infinite loop. For example, from MinGW-w64:
+ //
+ // float expf(float val) { return (float) exp((double) val); }
+ StringRef CalleeName = CalleeFn->getName();
+ bool IsIntrinsic = CalleeFn->isIntrinsic();
+ if (!IsIntrinsic) {
+ StringRef CallerName = CI->getFunction()->getName();
+ if (!CallerName.empty() && CallerName.back() == 'f' &&
+ CallerName.size() == (CalleeName.size() + 1) &&
+ CallerName.startswith(CalleeName))
+ return nullptr;
+ }
+
+ // Propagate the math semantics from the current function to the new function.
+ IRBuilderBase::FastMathFlagGuard Guard(B);
+ B.setFastMathFlags(CI->getFastMathFlags());
+
+ // g((double) float) -> (double) gf(float)
+ Value *R;
+ if (IsIntrinsic) {
+ Module *M = CI->getModule();
+ Intrinsic::ID IID = CalleeFn->getIntrinsicID();
+ Function *Fn = Intrinsic::getDeclaration(M, IID, B.getFloatTy());
+ R = isBinary ? B.CreateCall(Fn, V) : B.CreateCall(Fn, V[0]);
+ } else {
+ AttributeList CalleeAttrs = CalleeFn->getAttributes();
+ R = isBinary ? emitBinaryFloatFnCall(V[0], V[1], TLI, CalleeName, B,
+ CalleeAttrs)
+ : emitUnaryFloatFnCall(V[0], TLI, CalleeName, B, CalleeAttrs);
+ }
+ return B.CreateFPExt(R, B.getDoubleTy());
+}
+
+/// Shrink double -> float for unary functions.
+static Value *optimizeUnaryDoubleFP(CallInst *CI, IRBuilderBase &B,
+ const TargetLibraryInfo *TLI,
+ bool isPrecise = false) {
+ return optimizeDoubleFP(CI, B, false, TLI, isPrecise);
+}
+
+/// Shrink double -> float for binary functions.
+static Value *optimizeBinaryDoubleFP(CallInst *CI, IRBuilderBase &B,
+ const TargetLibraryInfo *TLI,
+ bool isPrecise = false) {
+ return optimizeDoubleFP(CI, B, true, TLI, isPrecise);
+}
+
+// cabs(z) -> sqrt((creal(z)*creal(z)) + (cimag(z)*cimag(z)))
+Value *LibCallSimplifier::optimizeCAbs(CallInst *CI, IRBuilderBase &B) {
+ if (!CI->isFast())
+ return nullptr;
+
+ // Propagate fast-math flags from the existing call to new instructions.
+ IRBuilderBase::FastMathFlagGuard Guard(B);
+ B.setFastMathFlags(CI->getFastMathFlags());
+
+ Value *Real, *Imag;
+ if (CI->arg_size() == 1) {
+ Value *Op = CI->getArgOperand(0);
+ assert(Op->getType()->isArrayTy() && "Unexpected signature for cabs!");
+ Real = B.CreateExtractValue(Op, 0, "real");
+ Imag = B.CreateExtractValue(Op, 1, "imag");
+ } else {
+ assert(CI->arg_size() == 2 && "Unexpected signature for cabs!");
+ Real = CI->getArgOperand(0);
+ Imag = CI->getArgOperand(1);
+ }
+
+ Value *RealReal = B.CreateFMul(Real, Real);
+ Value *ImagImag = B.CreateFMul(Imag, Imag);
+
+ Function *FSqrt = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::sqrt,
+ CI->getType());
+ return copyFlags(
+ *CI, B.CreateCall(FSqrt, B.CreateFAdd(RealReal, ImagImag), "cabs"));
+}
+
+static Value *optimizeTrigReflections(CallInst *Call, LibFunc Func,
+ IRBuilderBase &B) {
+ if (!isa<FPMathOperator>(Call))
+ return nullptr;
+
+ IRBuilderBase::FastMathFlagGuard Guard(B);
+ B.setFastMathFlags(Call->getFastMathFlags());
+
+ // TODO: Can this be shared to also handle LLVM intrinsics?
+ Value *X;
+ switch (Func) {
+ case LibFunc_sin:
+ case LibFunc_sinf:
+ case LibFunc_sinl:
+ case LibFunc_tan:
+ case LibFunc_tanf:
+ case LibFunc_tanl:
+ // sin(-X) --> -sin(X)
+ // tan(-X) --> -tan(X)
+ if (match(Call->getArgOperand(0), m_OneUse(m_FNeg(m_Value(X)))))
+ return B.CreateFNeg(
+ copyFlags(*Call, B.CreateCall(Call->getCalledFunction(), X)));
+ break;
+ case LibFunc_cos:
+ case LibFunc_cosf:
+ case LibFunc_cosl:
+ // cos(-X) --> cos(X)
+ if (match(Call->getArgOperand(0), m_FNeg(m_Value(X))))
+ return copyFlags(*Call,
+ B.CreateCall(Call->getCalledFunction(), X, "cos"));
+ break;
+ default:
+ break;
+ }
+ return nullptr;
+}
+
+// Return a properly extended integer (DstWidth bits wide) if the operation is
+// an itofp.
+static Value *getIntToFPVal(Value *I2F, IRBuilderBase &B, unsigned DstWidth) {
+ if (isa<SIToFPInst>(I2F) || isa<UIToFPInst>(I2F)) {
+ Value *Op = cast<Instruction>(I2F)->getOperand(0);
+ // Make sure that the exponent fits inside an "int" of size DstWidth,
+ // thus avoiding any range issues that FP has not.
+ unsigned BitWidth = Op->getType()->getPrimitiveSizeInBits();
+ if (BitWidth < DstWidth ||
+ (BitWidth == DstWidth && isa<SIToFPInst>(I2F)))
+ return isa<SIToFPInst>(I2F) ? B.CreateSExt(Op, B.getIntNTy(DstWidth))
+ : B.CreateZExt(Op, B.getIntNTy(DstWidth));
+ }
+
+ return nullptr;
+}
+
+/// Use exp{,2}(x * y) for pow(exp{,2}(x), y);
+/// ldexp(1.0, x) for pow(2.0, itofp(x)); exp2(n * x) for pow(2.0 ** n, x);
+/// exp10(x) for pow(10.0, x); exp2(log2(n) * x) for pow(n, x).
+Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilderBase &B) {
+ Module *M = Pow->getModule();
+ Value *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1);
+ Module *Mod = Pow->getModule();
+ Type *Ty = Pow->getType();
+ bool Ignored;
+
+ // Evaluate special cases related to a nested function as the base.
+
+ // pow(exp(x), y) -> exp(x * y)
+ // pow(exp2(x), y) -> exp2(x * y)
+ // If exp{,2}() is used only once, it is better to fold two transcendental
+ // math functions into one. If used again, exp{,2}() would still have to be
+ // called with the original argument, then keep both original transcendental
+ // functions. However, this transformation is only safe with fully relaxed
+ // math semantics, since, besides rounding differences, it changes overflow
+ // and underflow behavior quite dramatically. For example:
+ // pow(exp(1000), 0.001) = pow(inf, 0.001) = inf
+ // Whereas:
+ // exp(1000 * 0.001) = exp(1)
+ // TODO: Loosen the requirement for fully relaxed math semantics.
+ // TODO: Handle exp10() when more targets have it available.
+ CallInst *BaseFn = dyn_cast<CallInst>(Base);
+ if (BaseFn && BaseFn->hasOneUse() && BaseFn->isFast() && Pow->isFast()) {
+ LibFunc LibFn;
+
+ Function *CalleeFn = BaseFn->getCalledFunction();
+ if (CalleeFn && TLI->getLibFunc(CalleeFn->getName(), LibFn) &&
+ isLibFuncEmittable(M, TLI, LibFn)) {
+ StringRef ExpName;
+ Intrinsic::ID ID;
+ Value *ExpFn;
+ LibFunc LibFnFloat, LibFnDouble, LibFnLongDouble;
+
+ switch (LibFn) {
+ default:
+ return nullptr;
+ case LibFunc_expf:
+ case LibFunc_exp:
+ case LibFunc_expl:
+ ExpName = TLI->getName(LibFunc_exp);
+ ID = Intrinsic::exp;
+ LibFnFloat = LibFunc_expf;
+ LibFnDouble = LibFunc_exp;
+ LibFnLongDouble = LibFunc_expl;
+ break;
+ case LibFunc_exp2f:
+ case LibFunc_exp2:
+ case LibFunc_exp2l:
+ ExpName = TLI->getName(LibFunc_exp2);
+ ID = Intrinsic::exp2;
+ LibFnFloat = LibFunc_exp2f;
+ LibFnDouble = LibFunc_exp2;
+ LibFnLongDouble = LibFunc_exp2l;
+ break;
+ }
+
+ // Create new exp{,2}() with the product as its argument.
+ Value *FMul = B.CreateFMul(BaseFn->getArgOperand(0), Expo, "mul");
+ ExpFn = BaseFn->doesNotAccessMemory()
+ ? B.CreateCall(Intrinsic::getDeclaration(Mod, ID, Ty),
+ FMul, ExpName)
+ : emitUnaryFloatFnCall(FMul, TLI, LibFnDouble, LibFnFloat,
+ LibFnLongDouble, B,
+ BaseFn->getAttributes());
+
+ // Since the new exp{,2}() is different from the original one, dead code
+ // elimination cannot be trusted to remove it, since it may have side
+ // effects (e.g., errno). When the only consumer for the original
+ // exp{,2}() is pow(), then it has to be explicitly erased.
+ substituteInParent(BaseFn, ExpFn);
+ return ExpFn;
+ }
+ }
+
+ // Evaluate special cases related to a constant base.
+
+ const APFloat *BaseF;
+ if (!match(Pow->getArgOperand(0), m_APFloat(BaseF)))
+ return nullptr;
+
+ AttributeList NoAttrs; // Attributes are only meaningful on the original call
+
+ // pow(2.0, itofp(x)) -> ldexp(1.0, x)
+ if (match(Base, m_SpecificFP(2.0)) &&
+ (isa<SIToFPInst>(Expo) || isa<UIToFPInst>(Expo)) &&
+ hasFloatFn(M, TLI, Ty, LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl)) {
+ if (Value *ExpoI = getIntToFPVal(Expo, B, TLI->getIntSize()))
+ return copyFlags(*Pow,
+ emitBinaryFloatFnCall(ConstantFP::get(Ty, 1.0), ExpoI,
+ TLI, LibFunc_ldexp, LibFunc_ldexpf,
+ LibFunc_ldexpl, B, NoAttrs));
+ }
+
+ // pow(2.0 ** n, x) -> exp2(n * x)
+ if (hasFloatFn(M, TLI, Ty, LibFunc_exp2, LibFunc_exp2f, LibFunc_exp2l)) {
+ APFloat BaseR = APFloat(1.0);
+ BaseR.convert(BaseF->getSemantics(), APFloat::rmTowardZero, &Ignored);
+ BaseR = BaseR / *BaseF;
+ bool IsInteger = BaseF->isInteger(), IsReciprocal = BaseR.isInteger();
+ const APFloat *NF = IsReciprocal ? &BaseR : BaseF;
+ APSInt NI(64, false);
+ if ((IsInteger || IsReciprocal) &&
+ NF->convertToInteger(NI, APFloat::rmTowardZero, &Ignored) ==
+ APFloat::opOK &&
+ NI > 1 && NI.isPowerOf2()) {
+ double N = NI.logBase2() * (IsReciprocal ? -1.0 : 1.0);
+ Value *FMul = B.CreateFMul(Expo, ConstantFP::get(Ty, N), "mul");
+ if (Pow->doesNotAccessMemory())
+ return copyFlags(*Pow, B.CreateCall(Intrinsic::getDeclaration(
+ Mod, Intrinsic::exp2, Ty),
+ FMul, "exp2"));
+ else
+ return copyFlags(*Pow, emitUnaryFloatFnCall(FMul, TLI, LibFunc_exp2,
+ LibFunc_exp2f,
+ LibFunc_exp2l, B, NoAttrs));
+ }
+ }
+
+ // pow(10.0, x) -> exp10(x)
+ // TODO: There is no exp10() intrinsic yet, but some day there shall be one.
+ if (match(Base, m_SpecificFP(10.0)) &&
+ hasFloatFn(M, TLI, Ty, LibFunc_exp10, LibFunc_exp10f, LibFunc_exp10l))
+ return copyFlags(*Pow, emitUnaryFloatFnCall(Expo, TLI, LibFunc_exp10,
+ LibFunc_exp10f, LibFunc_exp10l,
+ B, NoAttrs));
+
+ // pow(x, y) -> exp2(log2(x) * y)
+ if (Pow->hasApproxFunc() && Pow->hasNoNaNs() && BaseF->isFiniteNonZero() &&
+ !BaseF->isNegative()) {
+ // pow(1, inf) is defined to be 1 but exp2(log2(1) * inf) evaluates to NaN.
+ // Luckily optimizePow has already handled the x == 1 case.
+ assert(!match(Base, m_FPOne()) &&
+ "pow(1.0, y) should have been simplified earlier!");
+
+ Value *Log = nullptr;
+ if (Ty->isFloatTy())
+ Log = ConstantFP::get(Ty, std::log2(BaseF->convertToFloat()));
+ else if (Ty->isDoubleTy())
+ Log = ConstantFP::get(Ty, std::log2(BaseF->convertToDouble()));
+
+ if (Log) {
+ Value *FMul = B.CreateFMul(Log, Expo, "mul");
+ if (Pow->doesNotAccessMemory())
+ return copyFlags(*Pow, B.CreateCall(Intrinsic::getDeclaration(
+ Mod, Intrinsic::exp2, Ty),
+ FMul, "exp2"));
+ else if (hasFloatFn(M, TLI, Ty, LibFunc_exp2, LibFunc_exp2f,
+ LibFunc_exp2l))
+ return copyFlags(*Pow, emitUnaryFloatFnCall(FMul, TLI, LibFunc_exp2,
+ LibFunc_exp2f,
+ LibFunc_exp2l, B, NoAttrs));
+ }
+ }
+
+ return nullptr;
+}
+
+static Value *getSqrtCall(Value *V, AttributeList Attrs, bool NoErrno,
+ Module *M, IRBuilderBase &B,
+ const TargetLibraryInfo *TLI) {
+ // If errno is never set, then use the intrinsic for sqrt().
+ if (NoErrno) {
+ Function *SqrtFn =
+ Intrinsic::getDeclaration(M, Intrinsic::sqrt, V->getType());
+ return B.CreateCall(SqrtFn, V, "sqrt");
+ }
+
+ // Otherwise, use the libcall for sqrt().
+ if (hasFloatFn(M, TLI, V->getType(), LibFunc_sqrt, LibFunc_sqrtf,
+ LibFunc_sqrtl))
+ // TODO: We also should check that the target can in fact lower the sqrt()
+ // libcall. We currently have no way to ask this question, so we ask if
+ // the target has a sqrt() libcall, which is not exactly the same.
+ return emitUnaryFloatFnCall(V, TLI, LibFunc_sqrt, LibFunc_sqrtf,
+ LibFunc_sqrtl, B, Attrs);
+
+ return nullptr;
+}
+
+/// Use square root in place of pow(x, +/-0.5).
+Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilderBase &B) {
+ Value *Sqrt, *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1);
+ Module *Mod = Pow->getModule();
+ Type *Ty = Pow->getType();
+
+ const APFloat *ExpoF;
+ if (!match(Expo, m_APFloat(ExpoF)) ||
+ (!ExpoF->isExactlyValue(0.5) && !ExpoF->isExactlyValue(-0.5)))
+ return nullptr;
+
+ // Converting pow(X, -0.5) to 1/sqrt(X) may introduce an extra rounding step,
+ // so that requires fast-math-flags (afn or reassoc).
+ if (ExpoF->isNegative() && (!Pow->hasApproxFunc() && !Pow->hasAllowReassoc()))
+ return nullptr;
+
+ // If we have a pow() library call (accesses memory) and we can't guarantee
+ // that the base is not an infinity, give up:
+ // pow(-Inf, 0.5) is optionally required to have a result of +Inf (not setting
+ // errno), but sqrt(-Inf) is required by various standards to set errno.
+ if (!Pow->doesNotAccessMemory() && !Pow->hasNoInfs() &&
+ !isKnownNeverInfinity(Base, TLI))
+ return nullptr;
+
+ Sqrt = getSqrtCall(Base, AttributeList(), Pow->doesNotAccessMemory(), Mod, B,
+ TLI);
+ if (!Sqrt)
+ return nullptr;
+
+ // Handle signed zero base by expanding to fabs(sqrt(x)).
+ if (!Pow->hasNoSignedZeros()) {
+ Function *FAbsFn = Intrinsic::getDeclaration(Mod, Intrinsic::fabs, Ty);
+ Sqrt = B.CreateCall(FAbsFn, Sqrt, "abs");
+ }
+
+ Sqrt = copyFlags(*Pow, Sqrt);
+
+ // Handle non finite base by expanding to
+ // (x == -infinity ? +infinity : sqrt(x)).
+ if (!Pow->hasNoInfs()) {
+ Value *PosInf = ConstantFP::getInfinity(Ty),
+ *NegInf = ConstantFP::getInfinity(Ty, true);
+ Value *FCmp = B.CreateFCmpOEQ(Base, NegInf, "isinf");
+ Sqrt = B.CreateSelect(FCmp, PosInf, Sqrt);
+ }
+
+ // If the exponent is negative, then get the reciprocal.
+ if (ExpoF->isNegative())
+ Sqrt = B.CreateFDiv(ConstantFP::get(Ty, 1.0), Sqrt, "reciprocal");
+
+ return Sqrt;
+}
+
+static Value *createPowWithIntegerExponent(Value *Base, Value *Expo, Module *M,
+ IRBuilderBase &B) {
+ Value *Args[] = {Base, Expo};
+ Type *Types[] = {Base->getType(), Expo->getType()};
+ Function *F = Intrinsic::getDeclaration(M, Intrinsic::powi, Types);
+ return B.CreateCall(F, Args);
+}
+
+Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilderBase &B) {
+ Value *Base = Pow->getArgOperand(0);
+ Value *Expo = Pow->getArgOperand(1);
+ Function *Callee = Pow->getCalledFunction();
+ StringRef Name = Callee->getName();
+ Type *Ty = Pow->getType();
+ Module *M = Pow->getModule();
+ bool AllowApprox = Pow->hasApproxFunc();
+ bool Ignored;
+
+ // Propagate the math semantics from the call to any created instructions.
+ IRBuilderBase::FastMathFlagGuard Guard(B);
+ B.setFastMathFlags(Pow->getFastMathFlags());
+ // Evaluate special cases related to the base.
+
+ // pow(1.0, x) -> 1.0
+ if (match(Base, m_FPOne()))
+ return Base;
+
+ if (Value *Exp = replacePowWithExp(Pow, B))
+ return Exp;
+
+ // Evaluate special cases related to the exponent.
+
+ // pow(x, -1.0) -> 1.0 / x
+ if (match(Expo, m_SpecificFP(-1.0)))
+ return B.CreateFDiv(ConstantFP::get(Ty, 1.0), Base, "reciprocal");
+
+ // pow(x, +/-0.0) -> 1.0
+ if (match(Expo, m_AnyZeroFP()))
+ return ConstantFP::get(Ty, 1.0);
+
+ // pow(x, 1.0) -> x
+ if (match(Expo, m_FPOne()))
+ return Base;
+
+ // pow(x, 2.0) -> x * x
+ if (match(Expo, m_SpecificFP(2.0)))
+ return B.CreateFMul(Base, Base, "square");
+
+ if (Value *Sqrt = replacePowWithSqrt(Pow, B))
+ return Sqrt;
+
+ // If we can approximate pow:
+ // pow(x, n) -> powi(x, n) * sqrt(x) if n has exactly a 0.5 fraction
+ // pow(x, n) -> powi(x, n) if n is a constant signed integer value
+ const APFloat *ExpoF;
+ if (AllowApprox && match(Expo, m_APFloat(ExpoF)) &&
+ !ExpoF->isExactlyValue(0.5) && !ExpoF->isExactlyValue(-0.5)) {
+ APFloat ExpoA(abs(*ExpoF));
+ APFloat ExpoI(*ExpoF);
+ Value *Sqrt = nullptr;
+ if (!ExpoA.isInteger()) {
+ APFloat Expo2 = ExpoA;
+ // To check if ExpoA is an integer + 0.5, we add it to itself. If there
+ // is no floating point exception and the result is an integer, then
+ // ExpoA == integer + 0.5
+ if (Expo2.add(ExpoA, APFloat::rmNearestTiesToEven) != APFloat::opOK)
+ return nullptr;
+
+ if (!Expo2.isInteger())
+ return nullptr;
+
+ if (ExpoI.roundToIntegral(APFloat::rmTowardNegative) !=
+ APFloat::opInexact)
+ return nullptr;
+ if (!ExpoI.isInteger())
+ return nullptr;
+ ExpoF = &ExpoI;
+
+ Sqrt = getSqrtCall(Base, AttributeList(), Pow->doesNotAccessMemory(), M,
+ B, TLI);
+ if (!Sqrt)
+ return nullptr;
+ }
+
+ // 0.5 fraction is now optionally handled.
+ // Do pow -> powi for remaining integer exponent
+ APSInt IntExpo(TLI->getIntSize(), /*isUnsigned=*/false);
+ if (ExpoF->isInteger() &&
+ ExpoF->convertToInteger(IntExpo, APFloat::rmTowardZero, &Ignored) ==
+ APFloat::opOK) {
+ Value *PowI = copyFlags(
+ *Pow,
+ createPowWithIntegerExponent(
+ Base, ConstantInt::get(B.getIntNTy(TLI->getIntSize()), IntExpo),
+ M, B));
+
+ if (PowI && Sqrt)
+ return B.CreateFMul(PowI, Sqrt);
+
+ return PowI;
+ }
+ }
+
+ // powf(x, itofp(y)) -> powi(x, y)
+ if (AllowApprox && (isa<SIToFPInst>(Expo) || isa<UIToFPInst>(Expo))) {
+ if (Value *ExpoI = getIntToFPVal(Expo, B, TLI->getIntSize()))
+ return copyFlags(*Pow, createPowWithIntegerExponent(Base, ExpoI, M, B));
+ }
+
+ // Shrink pow() to powf() if the arguments are single precision,
+ // unless the result is expected to be double precision.
+ if (UnsafeFPShrink && Name == TLI->getName(LibFunc_pow) &&
+ hasFloatVersion(M, Name)) {
+ if (Value *Shrunk = optimizeBinaryDoubleFP(Pow, B, TLI, true))
+ return Shrunk;
+ }
+
+ return nullptr;
+}
+
+Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilderBase &B) {
+ Module *M = CI->getModule();
+ Function *Callee = CI->getCalledFunction();
+ StringRef Name = Callee->getName();
+ Value *Ret = nullptr;
+ if (UnsafeFPShrink && Name == TLI->getName(LibFunc_exp2) &&
+ hasFloatVersion(M, Name))
+ Ret = optimizeUnaryDoubleFP(CI, B, TLI, true);
+
+ Type *Ty = CI->getType();
+ Value *Op = CI->getArgOperand(0);
+
+ // exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= IntSize
+ // exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < IntSize
+ if ((isa<SIToFPInst>(Op) || isa<UIToFPInst>(Op)) &&
+ hasFloatFn(M, TLI, Ty, LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl)) {
+ if (Value *Exp = getIntToFPVal(Op, B, TLI->getIntSize()))
+ return emitBinaryFloatFnCall(ConstantFP::get(Ty, 1.0), Exp, TLI,
+ LibFunc_ldexp, LibFunc_ldexpf,
+ LibFunc_ldexpl, B, AttributeList());
+ }
+
+ return Ret;
+}
+
+Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilderBase &B) {
+ Module *M = CI->getModule();
+
+ // If we can shrink the call to a float function rather than a double
+ // function, do that first.
+ Function *Callee = CI->getCalledFunction();
+ StringRef Name = Callee->getName();
+ if ((Name == "fmin" || Name == "fmax") && hasFloatVersion(M, Name))
+ if (Value *Ret = optimizeBinaryDoubleFP(CI, B, TLI))
+ return Ret;
+
+ // The LLVM intrinsics minnum/maxnum correspond to fmin/fmax. Canonicalize to
+ // the intrinsics for improved optimization (for example, vectorization).
+ // No-signed-zeros is implied by the definitions of fmax/fmin themselves.
+ // From the C standard draft WG14/N1256:
+ // "Ideally, fmax would be sensitive to the sign of zero, for example
+ // fmax(-0.0, +0.0) would return +0; however, implementation in software
+ // might be impractical."
+ IRBuilderBase::FastMathFlagGuard Guard(B);
+ FastMathFlags FMF = CI->getFastMathFlags();
+ FMF.setNoSignedZeros();
+ B.setFastMathFlags(FMF);
+
+ Intrinsic::ID IID = Callee->getName().startswith("fmin") ? Intrinsic::minnum
+ : Intrinsic::maxnum;
+ Function *F = Intrinsic::getDeclaration(CI->getModule(), IID, CI->getType());
+ return copyFlags(
+ *CI, B.CreateCall(F, {CI->getArgOperand(0), CI->getArgOperand(1)}));
+}
+
+Value *LibCallSimplifier::optimizeLog(CallInst *Log, IRBuilderBase &B) {
+ Function *LogFn = Log->getCalledFunction();
+ StringRef LogNm = LogFn->getName();
+ Intrinsic::ID LogID = LogFn->getIntrinsicID();
+ Module *Mod = Log->getModule();
+ Type *Ty = Log->getType();
+ Value *Ret = nullptr;
+
+ if (UnsafeFPShrink && hasFloatVersion(Mod, LogNm))
+ Ret = optimizeUnaryDoubleFP(Log, B, TLI, true);
+
+ // The earlier call must also be 'fast' in order to do these transforms.
+ CallInst *Arg = dyn_cast<CallInst>(Log->getArgOperand(0));
+ if (!Log->isFast() || !Arg || !Arg->isFast() || !Arg->hasOneUse())
+ return Ret;
+
+ LibFunc LogLb, ExpLb, Exp2Lb, Exp10Lb, PowLb;
+
+ // This is only applicable to log(), log2(), log10().
+ if (TLI->getLibFunc(LogNm, LogLb))
+ switch (LogLb) {
+ case LibFunc_logf:
+ LogID = Intrinsic::log;
+ ExpLb = LibFunc_expf;
+ Exp2Lb = LibFunc_exp2f;
+ Exp10Lb = LibFunc_exp10f;
+ PowLb = LibFunc_powf;
+ break;
+ case LibFunc_log:
+ LogID = Intrinsic::log;
+ ExpLb = LibFunc_exp;
+ Exp2Lb = LibFunc_exp2;
+ Exp10Lb = LibFunc_exp10;
+ PowLb = LibFunc_pow;
+ break;
+ case LibFunc_logl:
+ LogID = Intrinsic::log;
+ ExpLb = LibFunc_expl;
+ Exp2Lb = LibFunc_exp2l;
+ Exp10Lb = LibFunc_exp10l;
+ PowLb = LibFunc_powl;
+ break;
+ case LibFunc_log2f:
+ LogID = Intrinsic::log2;
+ ExpLb = LibFunc_expf;
+ Exp2Lb = LibFunc_exp2f;
+ Exp10Lb = LibFunc_exp10f;
+ PowLb = LibFunc_powf;
+ break;
+ case LibFunc_log2:
+ LogID = Intrinsic::log2;
+ ExpLb = LibFunc_exp;
+ Exp2Lb = LibFunc_exp2;
+ Exp10Lb = LibFunc_exp10;
+ PowLb = LibFunc_pow;
+ break;
+ case LibFunc_log2l:
+ LogID = Intrinsic::log2;
+ ExpLb = LibFunc_expl;
+ Exp2Lb = LibFunc_exp2l;
+ Exp10Lb = LibFunc_exp10l;
+ PowLb = LibFunc_powl;
+ break;
+ case LibFunc_log10f:
+ LogID = Intrinsic::log10;
+ ExpLb = LibFunc_expf;
+ Exp2Lb = LibFunc_exp2f;
+ Exp10Lb = LibFunc_exp10f;
+ PowLb = LibFunc_powf;
+ break;
+ case LibFunc_log10:
+ LogID = Intrinsic::log10;
+ ExpLb = LibFunc_exp;
+ Exp2Lb = LibFunc_exp2;
+ Exp10Lb = LibFunc_exp10;
+ PowLb = LibFunc_pow;
+ break;
+ case LibFunc_log10l:
+ LogID = Intrinsic::log10;
+ ExpLb = LibFunc_expl;
+ Exp2Lb = LibFunc_exp2l;
+ Exp10Lb = LibFunc_exp10l;
+ PowLb = LibFunc_powl;
+ break;
+ default:
+ return Ret;
+ }
+ else if (LogID == Intrinsic::log || LogID == Intrinsic::log2 ||
+ LogID == Intrinsic::log10) {
+ if (Ty->getScalarType()->isFloatTy()) {
+ ExpLb = LibFunc_expf;
+ Exp2Lb = LibFunc_exp2f;
+ Exp10Lb = LibFunc_exp10f;
+ PowLb = LibFunc_powf;
+ } else if (Ty->getScalarType()->isDoubleTy()) {
+ ExpLb = LibFunc_exp;
+ Exp2Lb = LibFunc_exp2;
+ Exp10Lb = LibFunc_exp10;
+ PowLb = LibFunc_pow;
+ } else
+ return Ret;
+ } else
+ return Ret;
+
+ IRBuilderBase::FastMathFlagGuard Guard(B);
+ B.setFastMathFlags(FastMathFlags::getFast());
+
+ Intrinsic::ID ArgID = Arg->getIntrinsicID();
+ LibFunc ArgLb = NotLibFunc;
+ TLI->getLibFunc(*Arg, ArgLb);
+
+ // log(pow(x,y)) -> y*log(x)
+ AttributeList NoAttrs;
+ if (ArgLb == PowLb || ArgID == Intrinsic::pow) {
+ Value *LogX =
+ Log->doesNotAccessMemory()
+ ? B.CreateCall(Intrinsic::getDeclaration(Mod, LogID, Ty),
+ Arg->getOperand(0), "log")
+ : emitUnaryFloatFnCall(Arg->getOperand(0), TLI, LogNm, B, NoAttrs);
+ Value *MulY = B.CreateFMul(Arg->getArgOperand(1), LogX, "mul");
+ // Since pow() may have side effects, e.g. errno,
+ // dead code elimination may not be trusted to remove it.
+ substituteInParent(Arg, MulY);
+ return MulY;
+ }
+
+ // log(exp{,2,10}(y)) -> y*log({e,2,10})
+ // TODO: There is no exp10() intrinsic yet.
+ if (ArgLb == ExpLb || ArgLb == Exp2Lb || ArgLb == Exp10Lb ||
+ ArgID == Intrinsic::exp || ArgID == Intrinsic::exp2) {
+ Constant *Eul;
+ if (ArgLb == ExpLb || ArgID == Intrinsic::exp)
+ // FIXME: Add more precise value of e for long double.
+ Eul = ConstantFP::get(Log->getType(), numbers::e);
+ else if (ArgLb == Exp2Lb || ArgID == Intrinsic::exp2)
+ Eul = ConstantFP::get(Log->getType(), 2.0);
+ else
+ Eul = ConstantFP::get(Log->getType(), 10.0);
+ Value *LogE = Log->doesNotAccessMemory()
+ ? B.CreateCall(Intrinsic::getDeclaration(Mod, LogID, Ty),
+ Eul, "log")
+ : emitUnaryFloatFnCall(Eul, TLI, LogNm, B, NoAttrs);
+ Value *MulY = B.CreateFMul(Arg->getArgOperand(0), LogE, "mul");
+ // Since exp() may have side effects, e.g. errno,
+ // dead code elimination may not be trusted to remove it.
+ substituteInParent(Arg, MulY);
+ return MulY;
+ }
+
+ return Ret;
+}
+
+Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilderBase &B) {
+ Module *M = CI->getModule();
+ Function *Callee = CI->getCalledFunction();
+ Value *Ret = nullptr;
+ // TODO: Once we have a way (other than checking for the existince of the
+ // libcall) to tell whether our target can lower @llvm.sqrt, relax the
+ // condition below.
+ if (isLibFuncEmittable(M, TLI, LibFunc_sqrtf) &&
+ (Callee->getName() == "sqrt" ||
+ Callee->getIntrinsicID() == Intrinsic::sqrt))
+ Ret = optimizeUnaryDoubleFP(CI, B, TLI, true);
+
+ if (!CI->isFast())
+ return Ret;
+
+ Instruction *I = dyn_cast<Instruction>(CI->getArgOperand(0));
+ if (!I || I->getOpcode() != Instruction::FMul || !I->isFast())
+ return Ret;
+
+ // We're looking for a repeated factor in a multiplication tree,
+ // so we can do this fold: sqrt(x * x) -> fabs(x);
+ // or this fold: sqrt((x * x) * y) -> fabs(x) * sqrt(y).
+ Value *Op0 = I->getOperand(0);
+ Value *Op1 = I->getOperand(1);
+ Value *RepeatOp = nullptr;
+ Value *OtherOp = nullptr;
+ if (Op0 == Op1) {
+ // Simple match: the operands of the multiply are identical.
+ RepeatOp = Op0;
+ } else {
+ // Look for a more complicated pattern: one of the operands is itself
+ // a multiply, so search for a common factor in that multiply.
+ // Note: We don't bother looking any deeper than this first level or for
+ // variations of this pattern because instcombine's visitFMUL and/or the
+ // reassociation pass should give us this form.
+ Value *OtherMul0, *OtherMul1;
+ if (match(Op0, m_FMul(m_Value(OtherMul0), m_Value(OtherMul1)))) {
+ // Pattern: sqrt((x * y) * z)
+ if (OtherMul0 == OtherMul1 && cast<Instruction>(Op0)->isFast()) {
+ // Matched: sqrt((x * x) * z)
+ RepeatOp = OtherMul0;
+ OtherOp = Op1;
+ }
+ }
+ }
+ if (!RepeatOp)
+ return Ret;
+
+ // Fast math flags for any created instructions should match the sqrt
+ // and multiply.
+ IRBuilderBase::FastMathFlagGuard Guard(B);
+ B.setFastMathFlags(I->getFastMathFlags());
+
+ // If we found a repeated factor, hoist it out of the square root and
+ // replace it with the fabs of that factor.
+ Type *ArgType = I->getType();
+ Function *Fabs = Intrinsic::getDeclaration(M, Intrinsic::fabs, ArgType);
+ Value *FabsCall = B.CreateCall(Fabs, RepeatOp, "fabs");
+ if (OtherOp) {
+ // If we found a non-repeated factor, we still need to get its square
+ // root. We then multiply that by the value that was simplified out
+ // of the square root calculation.
+ Function *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, ArgType);
+ Value *SqrtCall = B.CreateCall(Sqrt, OtherOp, "sqrt");
+ return copyFlags(*CI, B.CreateFMul(FabsCall, SqrtCall));
+ }
+ return copyFlags(*CI, FabsCall);
+}
+
+// TODO: Generalize to handle any trig function and its inverse.
+Value *LibCallSimplifier::optimizeTan(CallInst *CI, IRBuilderBase &B) {
+ Module *M = CI->getModule();
+ Function *Callee = CI->getCalledFunction();
+ Value *Ret = nullptr;
+ StringRef Name = Callee->getName();
+ if (UnsafeFPShrink && Name == "tan" && hasFloatVersion(M, Name))
+ Ret = optimizeUnaryDoubleFP(CI, B, TLI, true);
+
+ Value *Op1 = CI->getArgOperand(0);
+ auto *OpC = dyn_cast<CallInst>(Op1);
+ if (!OpC)
+ return Ret;
+
+ // Both calls must be 'fast' in order to remove them.
+ if (!CI->isFast() || !OpC->isFast())
+ return Ret;
+
+ // tan(atan(x)) -> x
+ // tanf(atanf(x)) -> x
+ // tanl(atanl(x)) -> x
+ LibFunc Func;
+ Function *F = OpC->getCalledFunction();
+ if (F && TLI->getLibFunc(F->getName(), Func) &&
+ isLibFuncEmittable(M, TLI, Func) &&
+ ((Func == LibFunc_atan && Callee->getName() == "tan") ||
+ (Func == LibFunc_atanf && Callee->getName() == "tanf") ||
+ (Func == LibFunc_atanl && Callee->getName() == "tanl")))
+ Ret = OpC->getArgOperand(0);
+ return Ret;
+}
+
+static bool isTrigLibCall(CallInst *CI) {
+ // We can only hope to do anything useful if we can ignore things like errno
+ // and floating-point exceptions.
+ // We already checked the prototype.
+ return CI->doesNotThrow() && CI->doesNotAccessMemory();
+}
+
+static bool insertSinCosCall(IRBuilderBase &B, Function *OrigCallee, Value *Arg,
+ bool UseFloat, Value *&Sin, Value *&Cos,
+ Value *&SinCos, const TargetLibraryInfo *TLI) {
+ Module *M = OrigCallee->getParent();
+ Type *ArgTy = Arg->getType();
+ Type *ResTy;
+ StringRef Name;
+
+ Triple T(OrigCallee->getParent()->getTargetTriple());
+ if (UseFloat) {
+ Name = "__sincospif_stret";
+
+ assert(T.getArch() != Triple::x86 && "x86 messy and unsupported for now");
+ // x86_64 can't use {float, float} since that would be returned in both
+ // xmm0 and xmm1, which isn't what a real struct would do.
+ ResTy = T.getArch() == Triple::x86_64
+ ? static_cast<Type *>(FixedVectorType::get(ArgTy, 2))
+ : static_cast<Type *>(StructType::get(ArgTy, ArgTy));
+ } else {
+ Name = "__sincospi_stret";
+ ResTy = StructType::get(ArgTy, ArgTy);
+ }
+
+ if (!isLibFuncEmittable(M, TLI, Name))
+ return false;
+ LibFunc TheLibFunc;
+ TLI->getLibFunc(Name, TheLibFunc);
+ FunctionCallee Callee = getOrInsertLibFunc(
+ M, *TLI, TheLibFunc, OrigCallee->getAttributes(), ResTy, ArgTy);
+
+ if (Instruction *ArgInst = dyn_cast<Instruction>(Arg)) {
+ // If the argument is an instruction, it must dominate all uses so put our
+ // sincos call there.
+ B.SetInsertPoint(ArgInst->getParent(), ++ArgInst->getIterator());
+ } else {
+ // Otherwise (e.g. for a constant) the beginning of the function is as
+ // good a place as any.
+ BasicBlock &EntryBB = B.GetInsertBlock()->getParent()->getEntryBlock();
+ B.SetInsertPoint(&EntryBB, EntryBB.begin());
+ }
+
+ SinCos = B.CreateCall(Callee, Arg, "sincospi");
+
+ if (SinCos->getType()->isStructTy()) {
+ Sin = B.CreateExtractValue(SinCos, 0, "sinpi");
+ Cos = B.CreateExtractValue(SinCos, 1, "cospi");
+ } else {
+ Sin = B.CreateExtractElement(SinCos, ConstantInt::get(B.getInt32Ty(), 0),
+ "sinpi");
+ Cos = B.CreateExtractElement(SinCos, ConstantInt::get(B.getInt32Ty(), 1),
+ "cospi");
+ }
+
+ return true;
+}
+
+Value *LibCallSimplifier::optimizeSinCosPi(CallInst *CI, IRBuilderBase &B) {
+ // Make sure the prototype is as expected, otherwise the rest of the
+ // function is probably invalid and likely to abort.
+ if (!isTrigLibCall(CI))
+ return nullptr;
+
+ Value *Arg = CI->getArgOperand(0);
+ SmallVector<CallInst *, 1> SinCalls;
+ SmallVector<CallInst *, 1> CosCalls;
+ SmallVector<CallInst *, 1> SinCosCalls;
+
+ bool IsFloat = Arg->getType()->isFloatTy();
+
+ // Look for all compatible sinpi, cospi and sincospi calls with the same
+ // argument. If there are enough (in some sense) we can make the
+ // substitution.
+ Function *F = CI->getFunction();
+ for (User *U : Arg->users())
+ classifyArgUse(U, F, IsFloat, SinCalls, CosCalls, SinCosCalls);
+
+ // It's only worthwhile if both sinpi and cospi are actually used.
+ if (SinCalls.empty() || CosCalls.empty())
+ return nullptr;
+
+ Value *Sin, *Cos, *SinCos;
+ if (!insertSinCosCall(B, CI->getCalledFunction(), Arg, IsFloat, Sin, Cos,
+ SinCos, TLI))
+ return nullptr;
+
+ auto replaceTrigInsts = [this](SmallVectorImpl<CallInst *> &Calls,
+ Value *Res) {
+ for (CallInst *C : Calls)
+ replaceAllUsesWith(C, Res);
+ };
+
+ replaceTrigInsts(SinCalls, Sin);
+ replaceTrigInsts(CosCalls, Cos);
+ replaceTrigInsts(SinCosCalls, SinCos);
+
+ return nullptr;
+}
+
+void LibCallSimplifier::classifyArgUse(
+ Value *Val, Function *F, bool IsFloat,
+ SmallVectorImpl<CallInst *> &SinCalls,
+ SmallVectorImpl<CallInst *> &CosCalls,
+ SmallVectorImpl<CallInst *> &SinCosCalls) {
+ auto *CI = dyn_cast<CallInst>(Val);
+ if (!CI || CI->use_empty())
+ return;
+
+ // Don't consider calls in other functions.
+ if (CI->getFunction() != F)
+ return;
+
+ Module *M = CI->getModule();
+ Function *Callee = CI->getCalledFunction();
+ LibFunc Func;
+ if (!Callee || !TLI->getLibFunc(*Callee, Func) ||
+ !isLibFuncEmittable(M, TLI, Func) ||
+ !isTrigLibCall(CI))
+ return;
+
+ if (IsFloat) {
+ if (Func == LibFunc_sinpif)
+ SinCalls.push_back(CI);
+ else if (Func == LibFunc_cospif)
+ CosCalls.push_back(CI);
+ else if (Func == LibFunc_sincospif_stret)
+ SinCosCalls.push_back(CI);
+ } else {
+ if (Func == LibFunc_sinpi)
+ SinCalls.push_back(CI);
+ else if (Func == LibFunc_cospi)
+ CosCalls.push_back(CI);
+ else if (Func == LibFunc_sincospi_stret)
+ SinCosCalls.push_back(CI);
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Integer Library Call Optimizations
+//===----------------------------------------------------------------------===//
+
+Value *LibCallSimplifier::optimizeFFS(CallInst *CI, IRBuilderBase &B) {
+ // All variants of ffs return int which need not be 32 bits wide.
+ // ffs{,l,ll}(x) -> x != 0 ? (int)llvm.cttz(x)+1 : 0
+ Type *RetType = CI->getType();
+ Value *Op = CI->getArgOperand(0);
+ Type *ArgType = Op->getType();
+ Function *F = Intrinsic::getDeclaration(CI->getCalledFunction()->getParent(),
+ Intrinsic::cttz, ArgType);
+ Value *V = B.CreateCall(F, {Op, B.getTrue()}, "cttz");
+ V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1));
+ V = B.CreateIntCast(V, RetType, false);
+
+ Value *Cond = B.CreateICmpNE(Op, Constant::getNullValue(ArgType));
+ return B.CreateSelect(Cond, V, ConstantInt::get(RetType, 0));
+}
+
+Value *LibCallSimplifier::optimizeFls(CallInst *CI, IRBuilderBase &B) {
+ // All variants of fls return int which need not be 32 bits wide.
+ // fls{,l,ll}(x) -> (int)(sizeInBits(x) - llvm.ctlz(x, false))
+ Value *Op = CI->getArgOperand(0);
+ Type *ArgType = Op->getType();
+ Function *F = Intrinsic::getDeclaration(CI->getCalledFunction()->getParent(),
+ Intrinsic::ctlz, ArgType);
+ Value *V = B.CreateCall(F, {Op, B.getFalse()}, "ctlz");
+ V = B.CreateSub(ConstantInt::get(V->getType(), ArgType->getIntegerBitWidth()),
+ V);
+ return B.CreateIntCast(V, CI->getType(), false);
+}
+
+Value *LibCallSimplifier::optimizeAbs(CallInst *CI, IRBuilderBase &B) {
+ // abs(x) -> x <s 0 ? -x : x
+ // The negation has 'nsw' because abs of INT_MIN is undefined.
+ Value *X = CI->getArgOperand(0);
+ Value *IsNeg = B.CreateIsNeg(X);
+ Value *NegX = B.CreateNSWNeg(X, "neg");
+ return B.CreateSelect(IsNeg, NegX, X);
+}
+
+Value *LibCallSimplifier::optimizeIsDigit(CallInst *CI, IRBuilderBase &B) {
+ // isdigit(c) -> (c-'0') <u 10
+ Value *Op = CI->getArgOperand(0);
+ Type *ArgType = Op->getType();
+ Op = B.CreateSub(Op, ConstantInt::get(ArgType, '0'), "isdigittmp");
+ Op = B.CreateICmpULT(Op, ConstantInt::get(ArgType, 10), "isdigit");
+ return B.CreateZExt(Op, CI->getType());
+}
+
+Value *LibCallSimplifier::optimizeIsAscii(CallInst *CI, IRBuilderBase &B) {
+ // isascii(c) -> c <u 128
+ Value *Op = CI->getArgOperand(0);
+ Type *ArgType = Op->getType();
+ Op = B.CreateICmpULT(Op, ConstantInt::get(ArgType, 128), "isascii");
+ return B.CreateZExt(Op, CI->getType());
+}
+
+Value *LibCallSimplifier::optimizeToAscii(CallInst *CI, IRBuilderBase &B) {
+ // toascii(c) -> c & 0x7f
+ return B.CreateAnd(CI->getArgOperand(0),
+ ConstantInt::get(CI->getType(), 0x7F));
+}
+
+// Fold calls to atoi, atol, and atoll.
+Value *LibCallSimplifier::optimizeAtoi(CallInst *CI, IRBuilderBase &B) {
+ CI->addParamAttr(0, Attribute::NoCapture);
+
+ StringRef Str;
+ if (!getConstantStringInfo(CI->getArgOperand(0), Str))
+ return nullptr;
+
+ return convertStrToInt(CI, Str, nullptr, 10, /*AsSigned=*/true, B);
+}
+
+// Fold calls to strtol, strtoll, strtoul, and strtoull.
+Value *LibCallSimplifier::optimizeStrToInt(CallInst *CI, IRBuilderBase &B,
+ bool AsSigned) {
+ Value *EndPtr = CI->getArgOperand(1);
+ if (isa<ConstantPointerNull>(EndPtr)) {
+ // With a null EndPtr, this function won't capture the main argument.
+ // It would be readonly too, except that it still may write to errno.
+ CI->addParamAttr(0, Attribute::NoCapture);
+ EndPtr = nullptr;
+ } else if (!isKnownNonZero(EndPtr, DL))
+ return nullptr;
+
+ StringRef Str;
+ if (!getConstantStringInfo(CI->getArgOperand(0), Str))
+ return nullptr;
+
+ if (ConstantInt *CInt = dyn_cast<ConstantInt>(CI->getArgOperand(2))) {
+ return convertStrToInt(CI, Str, EndPtr, CInt->getSExtValue(), AsSigned, B);
+ }
+
+ return nullptr;
+}
+
+//===----------------------------------------------------------------------===//
+// Formatting and IO Library Call Optimizations
+//===----------------------------------------------------------------------===//
+
+static bool isReportingError(Function *Callee, CallInst *CI, int StreamArg);
+
+Value *LibCallSimplifier::optimizeErrorReporting(CallInst *CI, IRBuilderBase &B,
+ int StreamArg) {
+ Function *Callee = CI->getCalledFunction();
+ // Error reporting calls should be cold, mark them as such.
+ // This applies even to non-builtin calls: it is only a hint and applies to
+ // functions that the frontend might not understand as builtins.
+
+ // This heuristic was suggested in:
+ // Improving Static Branch Prediction in a Compiler
+ // Brian L. Deitrich, Ben-Chung Cheng, Wen-mei W. Hwu
+ // Proceedings of PACT'98, Oct. 1998, IEEE
+ if (!CI->hasFnAttr(Attribute::Cold) &&
+ isReportingError(Callee, CI, StreamArg)) {
+ CI->addFnAttr(Attribute::Cold);
+ }
+
+ return nullptr;
+}
+
+static bool isReportingError(Function *Callee, CallInst *CI, int StreamArg) {
+ if (!Callee || !Callee->isDeclaration())
+ return false;
+
+ if (StreamArg < 0)
+ return true;
+
+ // These functions might be considered cold, but only if their stream
+ // argument is stderr.
+
+ if (StreamArg >= (int)CI->arg_size())
+ return false;
+ LoadInst *LI = dyn_cast<LoadInst>(CI->getArgOperand(StreamArg));
+ if (!LI)
+ return false;
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getPointerOperand());
+ if (!GV || !GV->isDeclaration())
+ return false;
+ return GV->getName() == "stderr";
+}
+
+Value *LibCallSimplifier::optimizePrintFString(CallInst *CI, IRBuilderBase &B) {
+ // Check for a fixed format string.
+ StringRef FormatStr;
+ if (!getConstantStringInfo(CI->getArgOperand(0), FormatStr))
+ return nullptr;
+
+ // Empty format string -> noop.
+ if (FormatStr.empty()) // Tolerate printf's declared void.
+ return CI->use_empty() ? (Value *)CI : ConstantInt::get(CI->getType(), 0);
+
+ // Do not do any of the following transformations if the printf return value
+ // is used, in general the printf return value is not compatible with either
+ // putchar() or puts().
+ if (!CI->use_empty())
+ return nullptr;
+
+ Type *IntTy = CI->getType();
+ // printf("x") -> putchar('x'), even for "%" and "%%".
+ if (FormatStr.size() == 1 || FormatStr == "%%") {
+ // Convert the character to unsigned char before passing it to putchar
+ // to avoid host-specific sign extension in the IR. Putchar converts
+ // it to unsigned char regardless.
+ Value *IntChar = ConstantInt::get(IntTy, (unsigned char)FormatStr[0]);
+ return copyFlags(*CI, emitPutChar(IntChar, B, TLI));
+ }
+
+ // Try to remove call or emit putchar/puts.
+ if (FormatStr == "%s" && CI->arg_size() > 1) {
+ StringRef OperandStr;
+ if (!getConstantStringInfo(CI->getOperand(1), OperandStr))
+ return nullptr;
+ // printf("%s", "") --> NOP
+ if (OperandStr.empty())
+ return (Value *)CI;
+ // printf("%s", "a") --> putchar('a')
+ if (OperandStr.size() == 1) {
+ // Convert the character to unsigned char before passing it to putchar
+ // to avoid host-specific sign extension in the IR. Putchar converts
+ // it to unsigned char regardless.
+ Value *IntChar = ConstantInt::get(IntTy, (unsigned char)OperandStr[0]);
+ return copyFlags(*CI, emitPutChar(IntChar, B, TLI));
+ }
+ // printf("%s", str"\n") --> puts(str)
+ if (OperandStr.back() == '\n') {
+ OperandStr = OperandStr.drop_back();
+ Value *GV = B.CreateGlobalString(OperandStr, "str");
+ return copyFlags(*CI, emitPutS(GV, B, TLI));
+ }
+ return nullptr;
+ }
+
+ // printf("foo\n") --> puts("foo")
+ if (FormatStr.back() == '\n' &&
+ !FormatStr.contains('%')) { // No format characters.
+ // Create a string literal with no \n on it. We expect the constant merge
+ // pass to be run after this pass, to merge duplicate strings.
+ FormatStr = FormatStr.drop_back();
+ Value *GV = B.CreateGlobalString(FormatStr, "str");
+ return copyFlags(*CI, emitPutS(GV, B, TLI));
+ }
+
+ // Optimize specific format strings.
+ // printf("%c", chr) --> putchar(chr)
+ if (FormatStr == "%c" && CI->arg_size() > 1 &&
+ CI->getArgOperand(1)->getType()->isIntegerTy()) {
+ // Convert the argument to the type expected by putchar, i.e., int, which
+ // need not be 32 bits wide but which is the same as printf's return type.
+ Value *IntChar = B.CreateIntCast(CI->getArgOperand(1), IntTy, false);
+ return copyFlags(*CI, emitPutChar(IntChar, B, TLI));
+ }
+
+ // printf("%s\n", str) --> puts(str)
+ if (FormatStr == "%s\n" && CI->arg_size() > 1 &&
+ CI->getArgOperand(1)->getType()->isPointerTy())
+ return copyFlags(*CI, emitPutS(CI->getArgOperand(1), B, TLI));
+ return nullptr;
+}
+
+Value *LibCallSimplifier::optimizePrintF(CallInst *CI, IRBuilderBase &B) {
+
+ Module *M = CI->getModule();
+ Function *Callee = CI->getCalledFunction();
+ FunctionType *FT = Callee->getFunctionType();
+ if (Value *V = optimizePrintFString(CI, B)) {
+ return V;
+ }
+
+ annotateNonNullNoUndefBasedOnAccess(CI, 0);
+
+ // printf(format, ...) -> iprintf(format, ...) if no floating point
+ // arguments.
+ if (isLibFuncEmittable(M, TLI, LibFunc_iprintf) &&
+ !callHasFloatingPointArgument(CI)) {
+ FunctionCallee IPrintFFn = getOrInsertLibFunc(M, *TLI, LibFunc_iprintf, FT,
+ Callee->getAttributes());
+ CallInst *New = cast<CallInst>(CI->clone());
+ New->setCalledFunction(IPrintFFn);
+ B.Insert(New);
+ return New;
+ }
+
+ // printf(format, ...) -> __small_printf(format, ...) if no 128-bit floating point
+ // arguments.
+ if (isLibFuncEmittable(M, TLI, LibFunc_small_printf) &&
+ !callHasFP128Argument(CI)) {
+ auto SmallPrintFFn = getOrInsertLibFunc(M, *TLI, LibFunc_small_printf, FT,
+ Callee->getAttributes());
+ CallInst *New = cast<CallInst>(CI->clone());
+ New->setCalledFunction(SmallPrintFFn);
+ B.Insert(New);
+ return New;
+ }
+
+ return nullptr;
+}
+
+Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI,
+ IRBuilderBase &B) {
+ // Check for a fixed format string.
+ StringRef FormatStr;
+ if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr))
+ return nullptr;
+
+ // If we just have a format string (nothing else crazy) transform it.
+ Value *Dest = CI->getArgOperand(0);
+ if (CI->arg_size() == 2) {
+ // Make sure there's no % in the constant array. We could try to handle
+ // %% -> % in the future if we cared.
+ if (FormatStr.contains('%'))
+ return nullptr; // we found a format specifier, bail out.
+
+ // sprintf(str, fmt) -> llvm.memcpy(align 1 str, align 1 fmt, strlen(fmt)+1)
+ B.CreateMemCpy(
+ Dest, Align(1), CI->getArgOperand(1), Align(1),
+ ConstantInt::get(DL.getIntPtrType(CI->getContext()),
+ FormatStr.size() + 1)); // Copy the null byte.
+ return ConstantInt::get(CI->getType(), FormatStr.size());
+ }
+
+ // The remaining optimizations require the format string to be "%s" or "%c"
+ // and have an extra operand.
+ if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->arg_size() < 3)
+ return nullptr;
+
+ // Decode the second character of the format string.
+ if (FormatStr[1] == 'c') {
+ // sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0
+ if (!CI->getArgOperand(2)->getType()->isIntegerTy())
+ return nullptr;
+ Value *V = B.CreateTrunc(CI->getArgOperand(2), B.getInt8Ty(), "char");
+ Value *Ptr = castToCStr(Dest, B);
+ B.CreateStore(V, Ptr);
+ Ptr = B.CreateInBoundsGEP(B.getInt8Ty(), Ptr, B.getInt32(1), "nul");
+ B.CreateStore(B.getInt8(0), Ptr);
+
+ return ConstantInt::get(CI->getType(), 1);
+ }
+
+ if (FormatStr[1] == 's') {
+ // sprintf(dest, "%s", str) -> llvm.memcpy(align 1 dest, align 1 str,
+ // strlen(str)+1)
+ if (!CI->getArgOperand(2)->getType()->isPointerTy())
+ return nullptr;
+
+ if (CI->use_empty())
+ // sprintf(dest, "%s", str) -> strcpy(dest, str)
+ return copyFlags(*CI, emitStrCpy(Dest, CI->getArgOperand(2), B, TLI));
+
+ uint64_t SrcLen = GetStringLength(CI->getArgOperand(2));
+ if (SrcLen) {
+ B.CreateMemCpy(
+ Dest, Align(1), CI->getArgOperand(2), Align(1),
+ ConstantInt::get(DL.getIntPtrType(CI->getContext()), SrcLen));
+ // Returns total number of characters written without null-character.
+ return ConstantInt::get(CI->getType(), SrcLen - 1);
+ } else if (Value *V = emitStpCpy(Dest, CI->getArgOperand(2), B, TLI)) {
+ // sprintf(dest, "%s", str) -> stpcpy(dest, str) - dest
+ // Handle mismatched pointer types (goes away with typeless pointers?).
+ V = B.CreatePointerCast(V, B.getInt8PtrTy());
+ Dest = B.CreatePointerCast(Dest, B.getInt8PtrTy());
+ Value *PtrDiff = B.CreatePtrDiff(B.getInt8Ty(), V, Dest);
+ return B.CreateIntCast(PtrDiff, CI->getType(), false);
+ }
+
+ bool OptForSize = CI->getFunction()->hasOptSize() ||
+ llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI,
+ PGSOQueryType::IRPass);
+ if (OptForSize)
+ return nullptr;
+
+ Value *Len = emitStrLen(CI->getArgOperand(2), B, DL, TLI);
+ if (!Len)
+ return nullptr;
+ Value *IncLen =
+ B.CreateAdd(Len, ConstantInt::get(Len->getType(), 1), "leninc");
+ B.CreateMemCpy(Dest, Align(1), CI->getArgOperand(2), Align(1), IncLen);
+
+ // The sprintf result is the unincremented number of bytes in the string.
+ return B.CreateIntCast(Len, CI->getType(), false);
+ }
+ return nullptr;
+}
+
+Value *LibCallSimplifier::optimizeSPrintF(CallInst *CI, IRBuilderBase &B) {
+ Module *M = CI->getModule();
+ Function *Callee = CI->getCalledFunction();
+ FunctionType *FT = Callee->getFunctionType();
+ if (Value *V = optimizeSPrintFString(CI, B)) {
+ return V;
+ }
+
+ annotateNonNullNoUndefBasedOnAccess(CI, {0, 1});
+
+ // sprintf(str, format, ...) -> siprintf(str, format, ...) if no floating
+ // point arguments.
+ if (isLibFuncEmittable(M, TLI, LibFunc_siprintf) &&
+ !callHasFloatingPointArgument(CI)) {
+ FunctionCallee SIPrintFFn = getOrInsertLibFunc(M, *TLI, LibFunc_siprintf,
+ FT, Callee->getAttributes());
+ CallInst *New = cast<CallInst>(CI->clone());
+ New->setCalledFunction(SIPrintFFn);
+ B.Insert(New);
+ return New;
+ }
+
+ // sprintf(str, format, ...) -> __small_sprintf(str, format, ...) if no 128-bit
+ // floating point arguments.
+ if (isLibFuncEmittable(M, TLI, LibFunc_small_sprintf) &&
+ !callHasFP128Argument(CI)) {
+ auto SmallSPrintFFn = getOrInsertLibFunc(M, *TLI, LibFunc_small_sprintf, FT,
+ Callee->getAttributes());
+ CallInst *New = cast<CallInst>(CI->clone());
+ New->setCalledFunction(SmallSPrintFFn);
+ B.Insert(New);
+ return New;
+ }
+
+ return nullptr;
+}
+
+// Transform an snprintf call CI with the bound N to format the string Str
+// either to a call to memcpy, or to single character a store, or to nothing,
+// and fold the result to a constant. A nonnull StrArg refers to the string
+// argument being formatted. Otherwise the call is one with N < 2 and
+// the "%c" directive to format a single character.
+Value *LibCallSimplifier::emitSnPrintfMemCpy(CallInst *CI, Value *StrArg,
+ StringRef Str, uint64_t N,
+ IRBuilderBase &B) {
+ assert(StrArg || (N < 2 && Str.size() == 1));
+
+ unsigned IntBits = TLI->getIntSize();
+ uint64_t IntMax = maxIntN(IntBits);
+ if (Str.size() > IntMax)
+ // Bail if the string is longer than INT_MAX. POSIX requires
+ // implementations to set errno to EOVERFLOW in this case, in
+ // addition to when N is larger than that (checked by the caller).
+ return nullptr;
+
+ Value *StrLen = ConstantInt::get(CI->getType(), Str.size());
+ if (N == 0)
+ return StrLen;
+
+ // Set to the number of bytes to copy fron StrArg which is also
+ // the offset of the terinating nul.
+ uint64_t NCopy;
+ if (N > Str.size())
+ // Copy the full string, including the terminating nul (which must
+ // be present regardless of the bound).
+ NCopy = Str.size() + 1;
+ else
+ NCopy = N - 1;
+
+ Value *DstArg = CI->getArgOperand(0);
+ if (NCopy && StrArg)
+ // Transform the call to lvm.memcpy(dst, fmt, N).
+ copyFlags(
+ *CI,
+ B.CreateMemCpy(
+ DstArg, Align(1), StrArg, Align(1),
+ ConstantInt::get(DL.getIntPtrType(CI->getContext()), NCopy)));
+
+ if (N > Str.size())
+ // Return early when the whole format string, including the final nul,
+ // has been copied.
+ return StrLen;
+
+ // Otherwise, when truncating the string append a terminating nul.
+ Type *Int8Ty = B.getInt8Ty();
+ Value *NulOff = B.getIntN(IntBits, NCopy);
+ Value *DstEnd = B.CreateInBoundsGEP(Int8Ty, DstArg, NulOff, "endptr");
+ B.CreateStore(ConstantInt::get(Int8Ty, 0), DstEnd);
+ return StrLen;
+}
+
+Value *LibCallSimplifier::optimizeSnPrintFString(CallInst *CI,
+ IRBuilderBase &B) {
+ // Check for size
+ ConstantInt *Size = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+ if (!Size)
+ return nullptr;
+
+ uint64_t N = Size->getZExtValue();
+ uint64_t IntMax = maxIntN(TLI->getIntSize());
+ if (N > IntMax)
+ // Bail if the bound exceeds INT_MAX. POSIX requires implementations
+ // to set errno to EOVERFLOW in this case.
+ return nullptr;
+
+ Value *DstArg = CI->getArgOperand(0);
+ Value *FmtArg = CI->getArgOperand(2);
+
+ // Check for a fixed format string.
+ StringRef FormatStr;
+ if (!getConstantStringInfo(FmtArg, FormatStr))
+ return nullptr;
+
+ // If we just have a format string (nothing else crazy) transform it.
+ if (CI->arg_size() == 3) {
+ if (FormatStr.contains('%'))
+ // Bail if the format string contains a directive and there are
+ // no arguments. We could handle "%%" in the future.
+ return nullptr;
+
+ return emitSnPrintfMemCpy(CI, FmtArg, FormatStr, N, B);
+ }
+
+ // The remaining optimizations require the format string to be "%s" or "%c"
+ // and have an extra operand.
+ if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->arg_size() != 4)
+ return nullptr;
+
+ // Decode the second character of the format string.
+ if (FormatStr[1] == 'c') {
+ if (N <= 1) {
+ // Use an arbitary string of length 1 to transform the call into
+ // either a nul store (N == 1) or a no-op (N == 0) and fold it
+ // to one.
+ StringRef CharStr("*");
+ return emitSnPrintfMemCpy(CI, nullptr, CharStr, N, B);
+ }
+
+ // snprintf(dst, size, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0
+ if (!CI->getArgOperand(3)->getType()->isIntegerTy())
+ return nullptr;
+ Value *V = B.CreateTrunc(CI->getArgOperand(3), B.getInt8Ty(), "char");
+ Value *Ptr = castToCStr(DstArg, B);
+ B.CreateStore(V, Ptr);
+ Ptr = B.CreateInBoundsGEP(B.getInt8Ty(), Ptr, B.getInt32(1), "nul");
+ B.CreateStore(B.getInt8(0), Ptr);
+ return ConstantInt::get(CI->getType(), 1);
+ }
+
+ if (FormatStr[1] != 's')
+ return nullptr;
+
+ Value *StrArg = CI->getArgOperand(3);
+ // snprintf(dest, size, "%s", str) to llvm.memcpy(dest, str, len+1, 1)
+ StringRef Str;
+ if (!getConstantStringInfo(StrArg, Str))
+ return nullptr;
+
+ return emitSnPrintfMemCpy(CI, StrArg, Str, N, B);
+}
+
+Value *LibCallSimplifier::optimizeSnPrintF(CallInst *CI, IRBuilderBase &B) {
+ if (Value *V = optimizeSnPrintFString(CI, B)) {
+ return V;
+ }
+
+ if (isKnownNonZero(CI->getOperand(1), DL))
+ annotateNonNullNoUndefBasedOnAccess(CI, 0);
+ return nullptr;
+}
+
+Value *LibCallSimplifier::optimizeFPrintFString(CallInst *CI,
+ IRBuilderBase &B) {
+ optimizeErrorReporting(CI, B, 0);
+
+ // All the optimizations depend on the format string.
+ StringRef FormatStr;
+ if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr))
+ return nullptr;
+
+ // Do not do any of the following transformations if the fprintf return
+ // value is used, in general the fprintf return value is not compatible
+ // with fwrite(), fputc() or fputs().
+ if (!CI->use_empty())
+ return nullptr;
+
+ // fprintf(F, "foo") --> fwrite("foo", 3, 1, F)
+ if (CI->arg_size() == 2) {
+ // Could handle %% -> % if we cared.
+ if (FormatStr.contains('%'))
+ return nullptr; // We found a format specifier.
+
+ unsigned SizeTBits = TLI->getSizeTSize(*CI->getModule());
+ Type *SizeTTy = IntegerType::get(CI->getContext(), SizeTBits);
+ return copyFlags(
+ *CI, emitFWrite(CI->getArgOperand(1),
+ ConstantInt::get(SizeTTy, FormatStr.size()),
+ CI->getArgOperand(0), B, DL, TLI));
+ }
+
+ // The remaining optimizations require the format string to be "%s" or "%c"
+ // and have an extra operand.
+ if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->arg_size() < 3)
+ return nullptr;
+
+ // Decode the second character of the format string.
+ if (FormatStr[1] == 'c') {
+ // fprintf(F, "%c", chr) --> fputc((int)chr, F)
+ if (!CI->getArgOperand(2)->getType()->isIntegerTy())
+ return nullptr;
+ Type *IntTy = B.getIntNTy(TLI->getIntSize());
+ Value *V = B.CreateIntCast(CI->getArgOperand(2), IntTy, /*isSigned*/ true,
+ "chari");
+ return copyFlags(*CI, emitFPutC(V, CI->getArgOperand(0), B, TLI));
+ }
+
+ if (FormatStr[1] == 's') {
+ // fprintf(F, "%s", str) --> fputs(str, F)
+ if (!CI->getArgOperand(2)->getType()->isPointerTy())
+ return nullptr;
+ return copyFlags(
+ *CI, emitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TLI));
+ }
+ return nullptr;
+}
+
+Value *LibCallSimplifier::optimizeFPrintF(CallInst *CI, IRBuilderBase &B) {
+ Module *M = CI->getModule();
+ Function *Callee = CI->getCalledFunction();
+ FunctionType *FT = Callee->getFunctionType();
+ if (Value *V = optimizeFPrintFString(CI, B)) {
+ return V;
+ }
+
+ // fprintf(stream, format, ...) -> fiprintf(stream, format, ...) if no
+ // floating point arguments.
+ if (isLibFuncEmittable(M, TLI, LibFunc_fiprintf) &&
+ !callHasFloatingPointArgument(CI)) {
+ FunctionCallee FIPrintFFn = getOrInsertLibFunc(M, *TLI, LibFunc_fiprintf,
+ FT, Callee->getAttributes());
+ CallInst *New = cast<CallInst>(CI->clone());
+ New->setCalledFunction(FIPrintFFn);
+ B.Insert(New);
+ return New;
+ }
+
+ // fprintf(stream, format, ...) -> __small_fprintf(stream, format, ...) if no
+ // 128-bit floating point arguments.
+ if (isLibFuncEmittable(M, TLI, LibFunc_small_fprintf) &&
+ !callHasFP128Argument(CI)) {
+ auto SmallFPrintFFn =
+ getOrInsertLibFunc(M, *TLI, LibFunc_small_fprintf, FT,
+ Callee->getAttributes());
+ CallInst *New = cast<CallInst>(CI->clone());
+ New->setCalledFunction(SmallFPrintFFn);
+ B.Insert(New);
+ return New;
+ }
+
+ return nullptr;
+}
+
+Value *LibCallSimplifier::optimizeFWrite(CallInst *CI, IRBuilderBase &B) {
+ optimizeErrorReporting(CI, B, 3);
+
+ // Get the element size and count.
+ ConstantInt *SizeC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+ ConstantInt *CountC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
+ if (SizeC && CountC) {
+ uint64_t Bytes = SizeC->getZExtValue() * CountC->getZExtValue();
+
+ // If this is writing zero records, remove the call (it's a noop).
+ if (Bytes == 0)
+ return ConstantInt::get(CI->getType(), 0);
+
+ // If this is writing one byte, turn it into fputc.
+ // This optimisation is only valid, if the return value is unused.
+ if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F)
+ Value *Char = B.CreateLoad(B.getInt8Ty(),
+ castToCStr(CI->getArgOperand(0), B), "char");
+ Type *IntTy = B.getIntNTy(TLI->getIntSize());
+ Value *Cast = B.CreateIntCast(Char, IntTy, /*isSigned*/ true, "chari");
+ Value *NewCI = emitFPutC(Cast, CI->getArgOperand(3), B, TLI);
+ return NewCI ? ConstantInt::get(CI->getType(), 1) : nullptr;
+ }
+ }
+
+ return nullptr;
+}
+
+Value *LibCallSimplifier::optimizeFPuts(CallInst *CI, IRBuilderBase &B) {
+ optimizeErrorReporting(CI, B, 1);
+
+ // Don't rewrite fputs to fwrite when optimising for size because fwrite
+ // requires more arguments and thus extra MOVs are required.
+ bool OptForSize = CI->getFunction()->hasOptSize() ||
+ llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI,
+ PGSOQueryType::IRPass);
+ if (OptForSize)
+ return nullptr;
+
+ // We can't optimize if return value is used.
+ if (!CI->use_empty())
+ return nullptr;
+
+ // fputs(s,F) --> fwrite(s,strlen(s),1,F)
+ uint64_t Len = GetStringLength(CI->getArgOperand(0));
+ if (!Len)
+ return nullptr;
+
+ // Known to have no uses (see above).
+ unsigned SizeTBits = TLI->getSizeTSize(*CI->getModule());
+ Type *SizeTTy = IntegerType::get(CI->getContext(), SizeTBits);
+ return copyFlags(
+ *CI,
+ emitFWrite(CI->getArgOperand(0),
+ ConstantInt::get(SizeTTy, Len - 1),
+ CI->getArgOperand(1), B, DL, TLI));
+}
+
+Value *LibCallSimplifier::optimizePuts(CallInst *CI, IRBuilderBase &B) {
+ annotateNonNullNoUndefBasedOnAccess(CI, 0);
+ if (!CI->use_empty())
+ return nullptr;
+
+ // Check for a constant string.
+ // puts("") -> putchar('\n')
+ StringRef Str;
+ if (getConstantStringInfo(CI->getArgOperand(0), Str) && Str.empty()) {
+ // putchar takes an argument of the same type as puts returns, i.e.,
+ // int, which need not be 32 bits wide.
+ Type *IntTy = CI->getType();
+ return copyFlags(*CI, emitPutChar(ConstantInt::get(IntTy, '\n'), B, TLI));
+ }
+
+ return nullptr;
+}
+
+Value *LibCallSimplifier::optimizeBCopy(CallInst *CI, IRBuilderBase &B) {
+ // bcopy(src, dst, n) -> llvm.memmove(dst, src, n)
+ return copyFlags(*CI, B.CreateMemMove(CI->getArgOperand(1), Align(1),
+ CI->getArgOperand(0), Align(1),
+ CI->getArgOperand(2)));
+}
+
+bool LibCallSimplifier::hasFloatVersion(const Module *M, StringRef FuncName) {
+ SmallString<20> FloatFuncName = FuncName;
+ FloatFuncName += 'f';
+ return isLibFuncEmittable(M, TLI, FloatFuncName);
+}
+
+Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI,
+ IRBuilderBase &Builder) {
+ Module *M = CI->getModule();
+ LibFunc Func;
+ Function *Callee = CI->getCalledFunction();
+ // Check for string/memory library functions.
+ if (TLI->getLibFunc(*Callee, Func) && isLibFuncEmittable(M, TLI, Func)) {
+ // Make sure we never change the calling convention.
+ assert(
+ (ignoreCallingConv(Func) ||
+ TargetLibraryInfoImpl::isCallingConvCCompatible(CI)) &&
+ "Optimizing string/memory libcall would change the calling convention");
+ switch (Func) {
+ case LibFunc_strcat:
+ return optimizeStrCat(CI, Builder);
+ case LibFunc_strncat:
+ return optimizeStrNCat(CI, Builder);
+ case LibFunc_strchr:
+ return optimizeStrChr(CI, Builder);
+ case LibFunc_strrchr:
+ return optimizeStrRChr(CI, Builder);
+ case LibFunc_strcmp:
+ return optimizeStrCmp(CI, Builder);
+ case LibFunc_strncmp:
+ return optimizeStrNCmp(CI, Builder);
+ case LibFunc_strcpy:
+ return optimizeStrCpy(CI, Builder);
+ case LibFunc_stpcpy:
+ return optimizeStpCpy(CI, Builder);
+ case LibFunc_strlcpy:
+ return optimizeStrLCpy(CI, Builder);
+ case LibFunc_stpncpy:
+ return optimizeStringNCpy(CI, /*RetEnd=*/true, Builder);
+ case LibFunc_strncpy:
+ return optimizeStringNCpy(CI, /*RetEnd=*/false, Builder);
+ case LibFunc_strlen:
+ return optimizeStrLen(CI, Builder);
+ case LibFunc_strnlen:
+ return optimizeStrNLen(CI, Builder);
+ case LibFunc_strpbrk:
+ return optimizeStrPBrk(CI, Builder);
+ case LibFunc_strndup:
+ return optimizeStrNDup(CI, Builder);
+ case LibFunc_strtol:
+ case LibFunc_strtod:
+ case LibFunc_strtof:
+ case LibFunc_strtoul:
+ case LibFunc_strtoll:
+ case LibFunc_strtold:
+ case LibFunc_strtoull:
+ return optimizeStrTo(CI, Builder);
+ case LibFunc_strspn:
+ return optimizeStrSpn(CI, Builder);
+ case LibFunc_strcspn:
+ return optimizeStrCSpn(CI, Builder);
+ case LibFunc_strstr:
+ return optimizeStrStr(CI, Builder);
+ case LibFunc_memchr:
+ return optimizeMemChr(CI, Builder);
+ case LibFunc_memrchr:
+ return optimizeMemRChr(CI, Builder);
+ case LibFunc_bcmp:
+ return optimizeBCmp(CI, Builder);
+ case LibFunc_memcmp:
+ return optimizeMemCmp(CI, Builder);
+ case LibFunc_memcpy:
+ return optimizeMemCpy(CI, Builder);
+ case LibFunc_memccpy:
+ return optimizeMemCCpy(CI, Builder);
+ case LibFunc_mempcpy:
+ return optimizeMemPCpy(CI, Builder);
+ case LibFunc_memmove:
+ return optimizeMemMove(CI, Builder);
+ case LibFunc_memset:
+ return optimizeMemSet(CI, Builder);
+ case LibFunc_realloc:
+ return optimizeRealloc(CI, Builder);
+ case LibFunc_wcslen:
+ return optimizeWcslen(CI, Builder);
+ case LibFunc_bcopy:
+ return optimizeBCopy(CI, Builder);
+ default:
+ break;
+ }
+ }
+ return nullptr;
+}
+
+Value *LibCallSimplifier::optimizeFloatingPointLibCall(CallInst *CI,
+ LibFunc Func,
+ IRBuilderBase &Builder) {
+ const Module *M = CI->getModule();
+
+ // Don't optimize calls that require strict floating point semantics.
+ if (CI->isStrictFP())
+ return nullptr;
+
+ if (Value *V = optimizeTrigReflections(CI, Func, Builder))
+ return V;
+
+ switch (Func) {
+ case LibFunc_sinpif:
+ case LibFunc_sinpi:
+ case LibFunc_cospif:
+ case LibFunc_cospi:
+ return optimizeSinCosPi(CI, Builder);
+ case LibFunc_powf:
+ case LibFunc_pow:
+ case LibFunc_powl:
+ return optimizePow(CI, Builder);
+ case LibFunc_exp2l:
+ case LibFunc_exp2:
+ case LibFunc_exp2f:
+ return optimizeExp2(CI, Builder);
+ case LibFunc_fabsf:
+ case LibFunc_fabs:
+ case LibFunc_fabsl:
+ return replaceUnaryCall(CI, Builder, Intrinsic::fabs);
+ case LibFunc_sqrtf:
+ case LibFunc_sqrt:
+ case LibFunc_sqrtl:
+ return optimizeSqrt(CI, Builder);
+ case LibFunc_logf:
+ case LibFunc_log:
+ case LibFunc_logl:
+ case LibFunc_log10f:
+ case LibFunc_log10:
+ case LibFunc_log10l:
+ case LibFunc_log1pf:
+ case LibFunc_log1p:
+ case LibFunc_log1pl:
+ case LibFunc_log2f:
+ case LibFunc_log2:
+ case LibFunc_log2l:
+ case LibFunc_logbf:
+ case LibFunc_logb:
+ case LibFunc_logbl:
+ return optimizeLog(CI, Builder);
+ case LibFunc_tan:
+ case LibFunc_tanf:
+ case LibFunc_tanl:
+ return optimizeTan(CI, Builder);
+ case LibFunc_ceil:
+ return replaceUnaryCall(CI, Builder, Intrinsic::ceil);
+ case LibFunc_floor:
+ return replaceUnaryCall(CI, Builder, Intrinsic::floor);
+ case LibFunc_round:
+ return replaceUnaryCall(CI, Builder, Intrinsic::round);
+ case LibFunc_roundeven:
+ return replaceUnaryCall(CI, Builder, Intrinsic::roundeven);
+ case LibFunc_nearbyint:
+ return replaceUnaryCall(CI, Builder, Intrinsic::nearbyint);
+ case LibFunc_rint:
+ return replaceUnaryCall(CI, Builder, Intrinsic::rint);
+ case LibFunc_trunc:
+ return replaceUnaryCall(CI, Builder, Intrinsic::trunc);
+ case LibFunc_acos:
+ case LibFunc_acosh:
+ case LibFunc_asin:
+ case LibFunc_asinh:
+ case LibFunc_atan:
+ case LibFunc_atanh:
+ case LibFunc_cbrt:
+ case LibFunc_cosh:
+ case LibFunc_exp:
+ case LibFunc_exp10:
+ case LibFunc_expm1:
+ case LibFunc_cos:
+ case LibFunc_sin:
+ case LibFunc_sinh:
+ case LibFunc_tanh:
+ if (UnsafeFPShrink && hasFloatVersion(M, CI->getCalledFunction()->getName()))
+ return optimizeUnaryDoubleFP(CI, Builder, TLI, true);
+ return nullptr;
+ case LibFunc_copysign:
+ if (hasFloatVersion(M, CI->getCalledFunction()->getName()))
+ return optimizeBinaryDoubleFP(CI, Builder, TLI);
+ return nullptr;
+ case LibFunc_fminf:
+ case LibFunc_fmin:
+ case LibFunc_fminl:
+ case LibFunc_fmaxf:
+ case LibFunc_fmax:
+ case LibFunc_fmaxl:
+ return optimizeFMinFMax(CI, Builder);
+ case LibFunc_cabs:
+ case LibFunc_cabsf:
+ case LibFunc_cabsl:
+ return optimizeCAbs(CI, Builder);
+ default:
+ return nullptr;
+ }
+}
+
+Value *LibCallSimplifier::optimizeCall(CallInst *CI, IRBuilderBase &Builder) {
+ Module *M = CI->getModule();
+ assert(!CI->isMustTailCall() && "These transforms aren't musttail safe.");
+
+ // TODO: Split out the code below that operates on FP calls so that
+ // we can all non-FP calls with the StrictFP attribute to be
+ // optimized.
+ if (CI->isNoBuiltin())
+ return nullptr;
+
+ LibFunc Func;
+ Function *Callee = CI->getCalledFunction();
+ bool IsCallingConvC = TargetLibraryInfoImpl::isCallingConvCCompatible(CI);
+
+ SmallVector<OperandBundleDef, 2> OpBundles;
+ CI->getOperandBundlesAsDefs(OpBundles);
+
+ IRBuilderBase::OperandBundlesGuard Guard(Builder);
+ Builder.setDefaultOperandBundles(OpBundles);
+
+ // Command-line parameter overrides instruction attribute.
+ // This can't be moved to optimizeFloatingPointLibCall() because it may be
+ // used by the intrinsic optimizations.
+ if (EnableUnsafeFPShrink.getNumOccurrences() > 0)
+ UnsafeFPShrink = EnableUnsafeFPShrink;
+ else if (isa<FPMathOperator>(CI) && CI->isFast())
+ UnsafeFPShrink = true;
+
+ // First, check for intrinsics.
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) {
+ if (!IsCallingConvC)
+ return nullptr;
+ // The FP intrinsics have corresponding constrained versions so we don't
+ // need to check for the StrictFP attribute here.
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::pow:
+ return optimizePow(CI, Builder);
+ case Intrinsic::exp2:
+ return optimizeExp2(CI, Builder);
+ case Intrinsic::log:
+ case Intrinsic::log2:
+ case Intrinsic::log10:
+ return optimizeLog(CI, Builder);
+ case Intrinsic::sqrt:
+ return optimizeSqrt(CI, Builder);
+ case Intrinsic::memset:
+ return optimizeMemSet(CI, Builder);
+ case Intrinsic::memcpy:
+ return optimizeMemCpy(CI, Builder);
+ case Intrinsic::memmove:
+ return optimizeMemMove(CI, Builder);
+ default:
+ return nullptr;
+ }
+ }
+
+ // Also try to simplify calls to fortified library functions.
+ if (Value *SimplifiedFortifiedCI =
+ FortifiedSimplifier.optimizeCall(CI, Builder)) {
+ // Try to further simplify the result.
+ CallInst *SimplifiedCI = dyn_cast<CallInst>(SimplifiedFortifiedCI);
+ if (SimplifiedCI && SimplifiedCI->getCalledFunction()) {
+ // Ensure that SimplifiedCI's uses are complete, since some calls have
+ // their uses analyzed.
+ replaceAllUsesWith(CI, SimplifiedCI);
+
+ // Set insertion point to SimplifiedCI to guarantee we reach all uses
+ // we might replace later on.
+ IRBuilderBase::InsertPointGuard Guard(Builder);
+ Builder.SetInsertPoint(SimplifiedCI);
+ if (Value *V = optimizeStringMemoryLibCall(SimplifiedCI, Builder)) {
+ // If we were able to further simplify, remove the now redundant call.
+ substituteInParent(SimplifiedCI, V);
+ return V;
+ }
+ }
+ return SimplifiedFortifiedCI;
+ }
+
+ // Then check for known library functions.
+ if (TLI->getLibFunc(*Callee, Func) && isLibFuncEmittable(M, TLI, Func)) {
+ // We never change the calling convention.
+ if (!ignoreCallingConv(Func) && !IsCallingConvC)
+ return nullptr;
+ if (Value *V = optimizeStringMemoryLibCall(CI, Builder))
+ return V;
+ if (Value *V = optimizeFloatingPointLibCall(CI, Func, Builder))
+ return V;
+ switch (Func) {
+ case LibFunc_ffs:
+ case LibFunc_ffsl:
+ case LibFunc_ffsll:
+ return optimizeFFS(CI, Builder);
+ case LibFunc_fls:
+ case LibFunc_flsl:
+ case LibFunc_flsll:
+ return optimizeFls(CI, Builder);
+ case LibFunc_abs:
+ case LibFunc_labs:
+ case LibFunc_llabs:
+ return optimizeAbs(CI, Builder);
+ case LibFunc_isdigit:
+ return optimizeIsDigit(CI, Builder);
+ case LibFunc_isascii:
+ return optimizeIsAscii(CI, Builder);
+ case LibFunc_toascii:
+ return optimizeToAscii(CI, Builder);
+ case LibFunc_atoi:
+ case LibFunc_atol:
+ case LibFunc_atoll:
+ return optimizeAtoi(CI, Builder);
+ case LibFunc_strtol:
+ case LibFunc_strtoll:
+ return optimizeStrToInt(CI, Builder, /*AsSigned=*/true);
+ case LibFunc_strtoul:
+ case LibFunc_strtoull:
+ return optimizeStrToInt(CI, Builder, /*AsSigned=*/false);
+ case LibFunc_printf:
+ return optimizePrintF(CI, Builder);
+ case LibFunc_sprintf:
+ return optimizeSPrintF(CI, Builder);
+ case LibFunc_snprintf:
+ return optimizeSnPrintF(CI, Builder);
+ case LibFunc_fprintf:
+ return optimizeFPrintF(CI, Builder);
+ case LibFunc_fwrite:
+ return optimizeFWrite(CI, Builder);
+ case LibFunc_fputs:
+ return optimizeFPuts(CI, Builder);
+ case LibFunc_puts:
+ return optimizePuts(CI, Builder);
+ case LibFunc_perror:
+ return optimizeErrorReporting(CI, Builder);
+ case LibFunc_vfprintf:
+ case LibFunc_fiprintf:
+ return optimizeErrorReporting(CI, Builder, 0);
+ default:
+ return nullptr;
+ }
+ }
+ return nullptr;
+}
+
+LibCallSimplifier::LibCallSimplifier(
+ const DataLayout &DL, const TargetLibraryInfo *TLI,
+ OptimizationRemarkEmitter &ORE,
+ BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
+ function_ref<void(Instruction *, Value *)> Replacer,
+ function_ref<void(Instruction *)> Eraser)
+ : FortifiedSimplifier(TLI), DL(DL), TLI(TLI), ORE(ORE), BFI(BFI), PSI(PSI),
+ Replacer(Replacer), Eraser(Eraser) {}
+
+void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) {
+ // Indirect through the replacer used in this instance.
+ Replacer(I, With);
+}
+
+void LibCallSimplifier::eraseFromParent(Instruction *I) {
+ Eraser(I);
+}
+
+// TODO:
+// Additional cases that we need to add to this file:
+//
+// cbrt:
+// * cbrt(expN(X)) -> expN(x/3)
+// * cbrt(sqrt(x)) -> pow(x,1/6)
+// * cbrt(cbrt(x)) -> pow(x,1/9)
+//
+// exp, expf, expl:
+// * exp(log(x)) -> x
+//
+// log, logf, logl:
+// * log(exp(x)) -> x
+// * log(exp(y)) -> y*log(e)
+// * log(exp10(y)) -> y*log(10)
+// * log(sqrt(x)) -> 0.5*log(x)
+//
+// pow, powf, powl:
+// * pow(sqrt(x),y) -> pow(x,y*0.5)
+// * pow(pow(x,y),z)-> pow(x,y*z)
+//
+// signbit:
+// * signbit(cnst) -> cnst'
+// * signbit(nncst) -> 0 (if pstv is a non-negative constant)
+//
+// sqrt, sqrtf, sqrtl:
+// * sqrt(expN(x)) -> expN(x*0.5)
+// * sqrt(Nroot(x)) -> pow(x,1/(2*N))
+// * sqrt(pow(x,y)) -> pow(|x|,y*0.5)
+//
+
+//===----------------------------------------------------------------------===//
+// Fortified Library Call Optimizations
+//===----------------------------------------------------------------------===//
+
+bool FortifiedLibCallSimplifier::isFortifiedCallFoldable(
+ CallInst *CI, unsigned ObjSizeOp, std::optional<unsigned> SizeOp,
+ std::optional<unsigned> StrOp, std::optional<unsigned> FlagOp) {
+ // If this function takes a flag argument, the implementation may use it to
+ // perform extra checks. Don't fold into the non-checking variant.
+ if (FlagOp) {
+ ConstantInt *Flag = dyn_cast<ConstantInt>(CI->getArgOperand(*FlagOp));
+ if (!Flag || !Flag->isZero())
+ return false;
+ }
+
+ if (SizeOp && CI->getArgOperand(ObjSizeOp) == CI->getArgOperand(*SizeOp))
+ return true;
+
+ if (ConstantInt *ObjSizeCI =
+ dyn_cast<ConstantInt>(CI->getArgOperand(ObjSizeOp))) {
+ if (ObjSizeCI->isMinusOne())
+ return true;
+ // If the object size wasn't -1 (unknown), bail out if we were asked to.
+ if (OnlyLowerUnknownSize)
+ return false;
+ if (StrOp) {
+ uint64_t Len = GetStringLength(CI->getArgOperand(*StrOp));
+ // If the length is 0 we don't know how long it is and so we can't
+ // remove the check.
+ if (Len)
+ annotateDereferenceableBytes(CI, *StrOp, Len);
+ else
+ return false;
+ return ObjSizeCI->getZExtValue() >= Len;
+ }
+
+ if (SizeOp) {
+ if (ConstantInt *SizeCI =
+ dyn_cast<ConstantInt>(CI->getArgOperand(*SizeOp)))
+ return ObjSizeCI->getZExtValue() >= SizeCI->getZExtValue();
+ }
+ }
+ return false;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI,
+ IRBuilderBase &B) {
+ if (isFortifiedCallFoldable(CI, 3, 2)) {
+ CallInst *NewCI =
+ B.CreateMemCpy(CI->getArgOperand(0), Align(1), CI->getArgOperand(1),
+ Align(1), CI->getArgOperand(2));
+ mergeAttributesAndFlags(NewCI, *CI);
+ return CI->getArgOperand(0);
+ }
+ return nullptr;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI,
+ IRBuilderBase &B) {
+ if (isFortifiedCallFoldable(CI, 3, 2)) {
+ CallInst *NewCI =
+ B.CreateMemMove(CI->getArgOperand(0), Align(1), CI->getArgOperand(1),
+ Align(1), CI->getArgOperand(2));
+ mergeAttributesAndFlags(NewCI, *CI);
+ return CI->getArgOperand(0);
+ }
+ return nullptr;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI,
+ IRBuilderBase &B) {
+ if (isFortifiedCallFoldable(CI, 3, 2)) {
+ Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false);
+ CallInst *NewCI = B.CreateMemSet(CI->getArgOperand(0), Val,
+ CI->getArgOperand(2), Align(1));
+ mergeAttributesAndFlags(NewCI, *CI);
+ return CI->getArgOperand(0);
+ }
+ return nullptr;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeMemPCpyChk(CallInst *CI,
+ IRBuilderBase &B) {
+ const DataLayout &DL = CI->getModule()->getDataLayout();
+ if (isFortifiedCallFoldable(CI, 3, 2))
+ if (Value *Call = emitMemPCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), B, DL, TLI)) {
+ return mergeAttributesAndFlags(cast<CallInst>(Call), *CI);
+ }
+ return nullptr;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI,
+ IRBuilderBase &B,
+ LibFunc Func) {
+ const DataLayout &DL = CI->getModule()->getDataLayout();
+ Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1),
+ *ObjSize = CI->getArgOperand(2);
+
+ // __stpcpy_chk(x,x,...) -> x+strlen(x)
+ if (Func == LibFunc_stpcpy_chk && !OnlyLowerUnknownSize && Dst == Src) {
+ Value *StrLen = emitStrLen(Src, B, DL, TLI);
+ return StrLen ? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, StrLen) : nullptr;
+ }
+
+ // If a) we don't have any length information, or b) we know this will
+ // fit then just lower to a plain st[rp]cpy. Otherwise we'll keep our
+ // st[rp]cpy_chk call which may fail at runtime if the size is too long.
+ // TODO: It might be nice to get a maximum length out of the possible
+ // string lengths for varying.
+ if (isFortifiedCallFoldable(CI, 2, std::nullopt, 1)) {
+ if (Func == LibFunc_strcpy_chk)
+ return copyFlags(*CI, emitStrCpy(Dst, Src, B, TLI));
+ else
+ return copyFlags(*CI, emitStpCpy(Dst, Src, B, TLI));
+ }
+
+ if (OnlyLowerUnknownSize)
+ return nullptr;
+
+ // Maybe we can stil fold __st[rp]cpy_chk to __memcpy_chk.
+ uint64_t Len = GetStringLength(Src);
+ if (Len)
+ annotateDereferenceableBytes(CI, 1, Len);
+ else
+ return nullptr;
+
+ unsigned SizeTBits = TLI->getSizeTSize(*CI->getModule());
+ Type *SizeTTy = IntegerType::get(CI->getContext(), SizeTBits);
+ Value *LenV = ConstantInt::get(SizeTTy, Len);
+ Value *Ret = emitMemCpyChk(Dst, Src, LenV, ObjSize, B, DL, TLI);
+ // If the function was an __stpcpy_chk, and we were able to fold it into
+ // a __memcpy_chk, we still need to return the correct end pointer.
+ if (Ret && Func == LibFunc_stpcpy_chk)
+ return B.CreateInBoundsGEP(B.getInt8Ty(), Dst,
+ ConstantInt::get(SizeTTy, Len - 1));
+ return copyFlags(*CI, cast<CallInst>(Ret));
+}
+
+Value *FortifiedLibCallSimplifier::optimizeStrLenChk(CallInst *CI,
+ IRBuilderBase &B) {
+ if (isFortifiedCallFoldable(CI, 1, std::nullopt, 0))
+ return copyFlags(*CI, emitStrLen(CI->getArgOperand(0), B,
+ CI->getModule()->getDataLayout(), TLI));
+ return nullptr;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeStrpNCpyChk(CallInst *CI,
+ IRBuilderBase &B,
+ LibFunc Func) {
+ if (isFortifiedCallFoldable(CI, 3, 2)) {
+ if (Func == LibFunc_strncpy_chk)
+ return copyFlags(*CI,
+ emitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), B, TLI));
+ else
+ return copyFlags(*CI,
+ emitStpNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), B, TLI));
+ }
+
+ return nullptr;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeMemCCpyChk(CallInst *CI,
+ IRBuilderBase &B) {
+ if (isFortifiedCallFoldable(CI, 4, 3))
+ return copyFlags(
+ *CI, emitMemCCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), CI->getArgOperand(3), B, TLI));
+
+ return nullptr;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeSNPrintfChk(CallInst *CI,
+ IRBuilderBase &B) {
+ if (isFortifiedCallFoldable(CI, 3, 1, std::nullopt, 2)) {
+ SmallVector<Value *, 8> VariadicArgs(drop_begin(CI->args(), 5));
+ return copyFlags(*CI,
+ emitSNPrintf(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(4), VariadicArgs, B, TLI));
+ }
+
+ return nullptr;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeSPrintfChk(CallInst *CI,
+ IRBuilderBase &B) {
+ if (isFortifiedCallFoldable(CI, 2, std::nullopt, std::nullopt, 1)) {
+ SmallVector<Value *, 8> VariadicArgs(drop_begin(CI->args(), 4));
+ return copyFlags(*CI,
+ emitSPrintf(CI->getArgOperand(0), CI->getArgOperand(3),
+ VariadicArgs, B, TLI));
+ }
+
+ return nullptr;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeStrCatChk(CallInst *CI,
+ IRBuilderBase &B) {
+ if (isFortifiedCallFoldable(CI, 2))
+ return copyFlags(
+ *CI, emitStrCat(CI->getArgOperand(0), CI->getArgOperand(1), B, TLI));
+
+ return nullptr;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeStrLCat(CallInst *CI,
+ IRBuilderBase &B) {
+ if (isFortifiedCallFoldable(CI, 3))
+ return copyFlags(*CI,
+ emitStrLCat(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), B, TLI));
+
+ return nullptr;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeStrNCatChk(CallInst *CI,
+ IRBuilderBase &B) {
+ if (isFortifiedCallFoldable(CI, 3))
+ return copyFlags(*CI,
+ emitStrNCat(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), B, TLI));
+
+ return nullptr;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeStrLCpyChk(CallInst *CI,
+ IRBuilderBase &B) {
+ if (isFortifiedCallFoldable(CI, 3))
+ return copyFlags(*CI,
+ emitStrLCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), B, TLI));
+
+ return nullptr;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeVSNPrintfChk(CallInst *CI,
+ IRBuilderBase &B) {
+ if (isFortifiedCallFoldable(CI, 3, 1, std::nullopt, 2))
+ return copyFlags(
+ *CI, emitVSNPrintf(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(4), CI->getArgOperand(5), B, TLI));
+
+ return nullptr;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeVSPrintfChk(CallInst *CI,
+ IRBuilderBase &B) {
+ if (isFortifiedCallFoldable(CI, 2, std::nullopt, std::nullopt, 1))
+ return copyFlags(*CI,
+ emitVSPrintf(CI->getArgOperand(0), CI->getArgOperand(3),
+ CI->getArgOperand(4), B, TLI));
+
+ return nullptr;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI,
+ IRBuilderBase &Builder) {
+ // FIXME: We shouldn't be changing "nobuiltin" or TLI unavailable calls here.
+ // Some clang users checked for _chk libcall availability using:
+ // __has_builtin(__builtin___memcpy_chk)
+ // When compiling with -fno-builtin, this is always true.
+ // When passing -ffreestanding/-mkernel, which both imply -fno-builtin, we
+ // end up with fortified libcalls, which isn't acceptable in a freestanding
+ // environment which only provides their non-fortified counterparts.
+ //
+ // Until we change clang and/or teach external users to check for availability
+ // differently, disregard the "nobuiltin" attribute and TLI::has.
+ //
+ // PR23093.
+
+ LibFunc Func;
+ Function *Callee = CI->getCalledFunction();
+ bool IsCallingConvC = TargetLibraryInfoImpl::isCallingConvCCompatible(CI);
+
+ SmallVector<OperandBundleDef, 2> OpBundles;
+ CI->getOperandBundlesAsDefs(OpBundles);
+
+ IRBuilderBase::OperandBundlesGuard Guard(Builder);
+ Builder.setDefaultOperandBundles(OpBundles);
+
+ // First, check that this is a known library functions and that the prototype
+ // is correct.
+ if (!TLI->getLibFunc(*Callee, Func))
+ return nullptr;
+
+ // We never change the calling convention.
+ if (!ignoreCallingConv(Func) && !IsCallingConvC)
+ return nullptr;
+
+ switch (Func) {
+ case LibFunc_memcpy_chk:
+ return optimizeMemCpyChk(CI, Builder);
+ case LibFunc_mempcpy_chk:
+ return optimizeMemPCpyChk(CI, Builder);
+ case LibFunc_memmove_chk:
+ return optimizeMemMoveChk(CI, Builder);
+ case LibFunc_memset_chk:
+ return optimizeMemSetChk(CI, Builder);
+ case LibFunc_stpcpy_chk:
+ case LibFunc_strcpy_chk:
+ return optimizeStrpCpyChk(CI, Builder, Func);
+ case LibFunc_strlen_chk:
+ return optimizeStrLenChk(CI, Builder);
+ case LibFunc_stpncpy_chk:
+ case LibFunc_strncpy_chk:
+ return optimizeStrpNCpyChk(CI, Builder, Func);
+ case LibFunc_memccpy_chk:
+ return optimizeMemCCpyChk(CI, Builder);
+ case LibFunc_snprintf_chk:
+ return optimizeSNPrintfChk(CI, Builder);
+ case LibFunc_sprintf_chk:
+ return optimizeSPrintfChk(CI, Builder);
+ case LibFunc_strcat_chk:
+ return optimizeStrCatChk(CI, Builder);
+ case LibFunc_strlcat_chk:
+ return optimizeStrLCat(CI, Builder);
+ case LibFunc_strncat_chk:
+ return optimizeStrNCatChk(CI, Builder);
+ case LibFunc_strlcpy_chk:
+ return optimizeStrLCpyChk(CI, Builder);
+ case LibFunc_vsnprintf_chk:
+ return optimizeVSNPrintfChk(CI, Builder);
+ case LibFunc_vsprintf_chk:
+ return optimizeVSPrintfChk(CI, Builder);
+ default:
+ break;
+ }
+ return nullptr;
+}
+
+FortifiedLibCallSimplifier::FortifiedLibCallSimplifier(
+ const TargetLibraryInfo *TLI, bool OnlyLowerUnknownSize)
+ : TLI(TLI), OnlyLowerUnknownSize(OnlyLowerUnknownSize) {}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/SizeOpts.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/SizeOpts.cpp
new file mode 100644
index 0000000000..1242380f73
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/SizeOpts.cpp
@@ -0,0 +1,111 @@
+//===-- SizeOpts.cpp - code size optimization related code ----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains some shared code size optimization related code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/SizeOpts.h"
+
+using namespace llvm;
+
+cl::opt<bool> llvm::EnablePGSO(
+ "pgso", cl::Hidden, cl::init(true),
+ cl::desc("Enable the profile guided size optimizations. "));
+
+cl::opt<bool> llvm::PGSOLargeWorkingSetSizeOnly(
+ "pgso-lwss-only", cl::Hidden, cl::init(true),
+ cl::desc("Apply the profile guided size optimizations only "
+ "if the working set size is large (except for cold code.)"));
+
+cl::opt<bool> llvm::PGSOColdCodeOnly(
+ "pgso-cold-code-only", cl::Hidden, cl::init(false),
+ cl::desc("Apply the profile guided size optimizations only "
+ "to cold code."));
+
+cl::opt<bool> llvm::PGSOColdCodeOnlyForInstrPGO(
+ "pgso-cold-code-only-for-instr-pgo", cl::Hidden, cl::init(false),
+ cl::desc("Apply the profile guided size optimizations only "
+ "to cold code under instrumentation PGO."));
+
+cl::opt<bool> llvm::PGSOColdCodeOnlyForSamplePGO(
+ "pgso-cold-code-only-for-sample-pgo", cl::Hidden, cl::init(false),
+ cl::desc("Apply the profile guided size optimizations only "
+ "to cold code under sample PGO."));
+
+cl::opt<bool> llvm::PGSOColdCodeOnlyForPartialSamplePGO(
+ "pgso-cold-code-only-for-partial-sample-pgo", cl::Hidden, cl::init(false),
+ cl::desc("Apply the profile guided size optimizations only "
+ "to cold code under partial-profile sample PGO."));
+
+cl::opt<bool> llvm::ForcePGSO(
+ "force-pgso", cl::Hidden, cl::init(false),
+ cl::desc("Force the (profiled-guided) size optimizations. "));
+
+cl::opt<int> llvm::PgsoCutoffInstrProf(
+ "pgso-cutoff-instr-prof", cl::Hidden, cl::init(950000),
+ cl::desc("The profile guided size optimization profile summary cutoff "
+ "for instrumentation profile."));
+
+cl::opt<int> llvm::PgsoCutoffSampleProf(
+ "pgso-cutoff-sample-prof", cl::Hidden, cl::init(990000),
+ cl::desc("The profile guided size optimization profile summary cutoff "
+ "for sample profile."));
+
+namespace {
+struct BasicBlockBFIAdapter {
+ static bool isFunctionColdInCallGraph(const Function *F,
+ ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo &BFI) {
+ return PSI->isFunctionColdInCallGraph(F, BFI);
+ }
+ static bool isFunctionHotInCallGraphNthPercentile(int CutOff,
+ const Function *F,
+ ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo &BFI) {
+ return PSI->isFunctionHotInCallGraphNthPercentile(CutOff, F, BFI);
+ }
+ static bool isFunctionColdInCallGraphNthPercentile(int CutOff,
+ const Function *F,
+ ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo &BFI) {
+ return PSI->isFunctionColdInCallGraphNthPercentile(CutOff, F, BFI);
+ }
+ static bool isColdBlock(const BasicBlock *BB,
+ ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI) {
+ return PSI->isColdBlock(BB, BFI);
+ }
+ static bool isHotBlockNthPercentile(int CutOff,
+ const BasicBlock *BB,
+ ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI) {
+ return PSI->isHotBlockNthPercentile(CutOff, BB, BFI);
+ }
+ static bool isColdBlockNthPercentile(int CutOff, const BasicBlock *BB,
+ ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI) {
+ return PSI->isColdBlockNthPercentile(CutOff, BB, BFI);
+ }
+};
+} // end anonymous namespace
+
+bool llvm::shouldOptimizeForSize(const Function *F, ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI,
+ PGSOQueryType QueryType) {
+ return shouldFuncOptimizeForSizeImpl<BasicBlockBFIAdapter>(F, PSI, BFI,
+ QueryType);
+}
+
+bool llvm::shouldOptimizeForSize(const BasicBlock *BB, ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI,
+ PGSOQueryType QueryType) {
+ assert(BB);
+ return shouldOptimizeForSizeImpl<BasicBlockBFIAdapter>(BB, PSI, BFI,
+ QueryType);
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/SplitModule.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/SplitModule.cpp
new file mode 100644
index 0000000000..9c39c26d8b
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/SplitModule.cpp
@@ -0,0 +1,287 @@
+//===- SplitModule.cpp - Split a module into partitions -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the function llvm::SplitModule, which splits a module
+// into multiple linkable partitions. It can be used to implement parallel code
+// generation for link-time optimization.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/SplitModule.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/EquivalenceClasses.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/Comdat.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalObject.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MD5.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+#include <algorithm>
+#include <cassert>
+#include <iterator>
+#include <memory>
+#include <queue>
+#include <utility>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "split-module"
+
+namespace {
+
+using ClusterMapType = EquivalenceClasses<const GlobalValue *>;
+using ComdatMembersType = DenseMap<const Comdat *, const GlobalValue *>;
+using ClusterIDMapType = DenseMap<const GlobalValue *, unsigned>;
+
+} // end anonymous namespace
+
+static void addNonConstUser(ClusterMapType &GVtoClusterMap,
+ const GlobalValue *GV, const User *U) {
+ assert((!isa<Constant>(U) || isa<GlobalValue>(U)) && "Bad user");
+
+ if (const Instruction *I = dyn_cast<Instruction>(U)) {
+ const GlobalValue *F = I->getParent()->getParent();
+ GVtoClusterMap.unionSets(GV, F);
+ } else if (const GlobalValue *GVU = dyn_cast<GlobalValue>(U)) {
+ GVtoClusterMap.unionSets(GV, GVU);
+ } else {
+ llvm_unreachable("Underimplemented use case");
+ }
+}
+
+// Adds all GlobalValue users of V to the same cluster as GV.
+static void addAllGlobalValueUsers(ClusterMapType &GVtoClusterMap,
+ const GlobalValue *GV, const Value *V) {
+ for (const auto *U : V->users()) {
+ SmallVector<const User *, 4> Worklist;
+ Worklist.push_back(U);
+ while (!Worklist.empty()) {
+ const User *UU = Worklist.pop_back_val();
+ // For each constant that is not a GV (a pure const) recurse.
+ if (isa<Constant>(UU) && !isa<GlobalValue>(UU)) {
+ Worklist.append(UU->user_begin(), UU->user_end());
+ continue;
+ }
+ addNonConstUser(GVtoClusterMap, GV, UU);
+ }
+ }
+}
+
+static const GlobalObject *getGVPartitioningRoot(const GlobalValue *GV) {
+ const GlobalObject *GO = GV->getAliaseeObject();
+ if (const auto *GI = dyn_cast_or_null<GlobalIFunc>(GO))
+ GO = GI->getResolverFunction();
+ return GO;
+}
+
+// Find partitions for module in the way that no locals need to be
+// globalized.
+// Try to balance pack those partitions into N files since this roughly equals
+// thread balancing for the backend codegen step.
+static void findPartitions(Module &M, ClusterIDMapType &ClusterIDMap,
+ unsigned N) {
+ // At this point module should have the proper mix of globals and locals.
+ // As we attempt to partition this module, we must not change any
+ // locals to globals.
+ LLVM_DEBUG(dbgs() << "Partition module with (" << M.size() << ")functions\n");
+ ClusterMapType GVtoClusterMap;
+ ComdatMembersType ComdatMembers;
+
+ auto recordGVSet = [&GVtoClusterMap, &ComdatMembers](GlobalValue &GV) {
+ if (GV.isDeclaration())
+ return;
+
+ if (!GV.hasName())
+ GV.setName("__llvmsplit_unnamed");
+
+ // Comdat groups must not be partitioned. For comdat groups that contain
+ // locals, record all their members here so we can keep them together.
+ // Comdat groups that only contain external globals are already handled by
+ // the MD5-based partitioning.
+ if (const Comdat *C = GV.getComdat()) {
+ auto &Member = ComdatMembers[C];
+ if (Member)
+ GVtoClusterMap.unionSets(Member, &GV);
+ else
+ Member = &GV;
+ }
+
+ // Aliases should not be separated from their aliasees and ifuncs should
+ // not be separated from their resolvers regardless of linkage.
+ if (const GlobalObject *Root = getGVPartitioningRoot(&GV))
+ if (&GV != Root)
+ GVtoClusterMap.unionSets(&GV, Root);
+
+ if (const Function *F = dyn_cast<Function>(&GV)) {
+ for (const BasicBlock &BB : *F) {
+ BlockAddress *BA = BlockAddress::lookup(&BB);
+ if (!BA || !BA->isConstantUsed())
+ continue;
+ addAllGlobalValueUsers(GVtoClusterMap, F, BA);
+ }
+ }
+
+ if (GV.hasLocalLinkage())
+ addAllGlobalValueUsers(GVtoClusterMap, &GV, &GV);
+ };
+
+ llvm::for_each(M.functions(), recordGVSet);
+ llvm::for_each(M.globals(), recordGVSet);
+ llvm::for_each(M.aliases(), recordGVSet);
+
+ // Assigned all GVs to merged clusters while balancing number of objects in
+ // each.
+ auto CompareClusters = [](const std::pair<unsigned, unsigned> &a,
+ const std::pair<unsigned, unsigned> &b) {
+ if (a.second || b.second)
+ return a.second > b.second;
+ else
+ return a.first > b.first;
+ };
+
+ std::priority_queue<std::pair<unsigned, unsigned>,
+ std::vector<std::pair<unsigned, unsigned>>,
+ decltype(CompareClusters)>
+ BalancinQueue(CompareClusters);
+ // Pre-populate priority queue with N slot blanks.
+ for (unsigned i = 0; i < N; ++i)
+ BalancinQueue.push(std::make_pair(i, 0));
+
+ using SortType = std::pair<unsigned, ClusterMapType::iterator>;
+
+ SmallVector<SortType, 64> Sets;
+ SmallPtrSet<const GlobalValue *, 32> Visited;
+
+ // To guarantee determinism, we have to sort SCC according to size.
+ // When size is the same, use leader's name.
+ for (ClusterMapType::iterator I = GVtoClusterMap.begin(),
+ E = GVtoClusterMap.end(); I != E; ++I)
+ if (I->isLeader())
+ Sets.push_back(
+ std::make_pair(std::distance(GVtoClusterMap.member_begin(I),
+ GVtoClusterMap.member_end()), I));
+
+ llvm::sort(Sets, [](const SortType &a, const SortType &b) {
+ if (a.first == b.first)
+ return a.second->getData()->getName() > b.second->getData()->getName();
+ else
+ return a.first > b.first;
+ });
+
+ for (auto &I : Sets) {
+ unsigned CurrentClusterID = BalancinQueue.top().first;
+ unsigned CurrentClusterSize = BalancinQueue.top().second;
+ BalancinQueue.pop();
+
+ LLVM_DEBUG(dbgs() << "Root[" << CurrentClusterID << "] cluster_size("
+ << I.first << ") ----> " << I.second->getData()->getName()
+ << "\n");
+
+ for (ClusterMapType::member_iterator MI =
+ GVtoClusterMap.findLeader(I.second);
+ MI != GVtoClusterMap.member_end(); ++MI) {
+ if (!Visited.insert(*MI).second)
+ continue;
+ LLVM_DEBUG(dbgs() << "----> " << (*MI)->getName()
+ << ((*MI)->hasLocalLinkage() ? " l " : " e ") << "\n");
+ Visited.insert(*MI);
+ ClusterIDMap[*MI] = CurrentClusterID;
+ CurrentClusterSize++;
+ }
+ // Add this set size to the number of entries in this cluster.
+ BalancinQueue.push(std::make_pair(CurrentClusterID, CurrentClusterSize));
+ }
+}
+
+static void externalize(GlobalValue *GV) {
+ if (GV->hasLocalLinkage()) {
+ GV->setLinkage(GlobalValue::ExternalLinkage);
+ GV->setVisibility(GlobalValue::HiddenVisibility);
+ }
+
+ // Unnamed entities must be named consistently between modules. setName will
+ // give a distinct name to each such entity.
+ if (!GV->hasName())
+ GV->setName("__llvmsplit_unnamed");
+}
+
+// Returns whether GV should be in partition (0-based) I of N.
+static bool isInPartition(const GlobalValue *GV, unsigned I, unsigned N) {
+ if (const GlobalObject *Root = getGVPartitioningRoot(GV))
+ GV = Root;
+
+ StringRef Name;
+ if (const Comdat *C = GV->getComdat())
+ Name = C->getName();
+ else
+ Name = GV->getName();
+
+ // Partition by MD5 hash. We only need a few bits for evenness as the number
+ // of partitions will generally be in the 1-2 figure range; the low 16 bits
+ // are enough.
+ MD5 H;
+ MD5::MD5Result R;
+ H.update(Name);
+ H.final(R);
+ return (R[0] | (R[1] << 8)) % N == I;
+}
+
+void llvm::SplitModule(
+ Module &M, unsigned N,
+ function_ref<void(std::unique_ptr<Module> MPart)> ModuleCallback,
+ bool PreserveLocals) {
+ if (!PreserveLocals) {
+ for (Function &F : M)
+ externalize(&F);
+ for (GlobalVariable &GV : M.globals())
+ externalize(&GV);
+ for (GlobalAlias &GA : M.aliases())
+ externalize(&GA);
+ for (GlobalIFunc &GIF : M.ifuncs())
+ externalize(&GIF);
+ }
+
+ // This performs splitting without a need for externalization, which might not
+ // always be possible.
+ ClusterIDMapType ClusterIDMap;
+ findPartitions(M, ClusterIDMap, N);
+
+ // FIXME: We should be able to reuse M as the last partition instead of
+ // cloning it. Note that the callers at the moment expect the module to
+ // be preserved, so will need some adjustments as well.
+ for (unsigned I = 0; I < N; ++I) {
+ ValueToValueMapTy VMap;
+ std::unique_ptr<Module> MPart(
+ CloneModule(M, VMap, [&](const GlobalValue *GV) {
+ if (ClusterIDMap.count(GV))
+ return (ClusterIDMap[GV] == I);
+ else
+ return isInPartition(GV, I, N);
+ }));
+ if (I != 0)
+ MPart->setModuleInlineAsm("");
+ ModuleCallback(std::move(MPart));
+ }
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/StripGCRelocates.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/StripGCRelocates.cpp
new file mode 100644
index 0000000000..0ff88e8b46
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/StripGCRelocates.cpp
@@ -0,0 +1,86 @@
+//===- StripGCRelocates.cpp - Remove gc.relocates inserted by RewriteStatePoints===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a little utility pass that removes the gc.relocates inserted by
+// RewriteStatepointsForGC. Note that the generated IR is incorrect,
+// but this is useful as a single pass in itself, for analysis of IR, without
+// the GC.relocates. The statepoint and gc.result intrinsics would still be
+// present.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/StripGCRelocates.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Statepoint.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+
+using namespace llvm;
+
+static bool stripGCRelocates(Function &F) {
+ // Nothing to do for declarations.
+ if (F.isDeclaration())
+ return false;
+ SmallVector<GCRelocateInst *, 20> GCRelocates;
+ // TODO: We currently do not handle gc.relocates that are in landing pads,
+ // i.e. not bound to a single statepoint token.
+ for (Instruction &I : instructions(F)) {
+ if (auto *GCR = dyn_cast<GCRelocateInst>(&I))
+ if (isa<GCStatepointInst>(GCR->getOperand(0)))
+ GCRelocates.push_back(GCR);
+ }
+ // All gc.relocates are bound to a single statepoint token. The order of
+ // visiting gc.relocates for deletion does not matter.
+ for (GCRelocateInst *GCRel : GCRelocates) {
+ Value *OrigPtr = GCRel->getDerivedPtr();
+ Value *ReplaceGCRel = OrigPtr;
+
+ // All gc_relocates are i8 addrspace(1)* typed, we need a bitcast from i8
+ // addrspace(1)* to the type of the OrigPtr, if the are not the same.
+ if (GCRel->getType() != OrigPtr->getType())
+ ReplaceGCRel = new BitCastInst(OrigPtr, GCRel->getType(), "cast", GCRel);
+
+ // Replace all uses of gc.relocate and delete the gc.relocate
+ // There maybe unncessary bitcasts back to the OrigPtr type, an instcombine
+ // pass would clear this up.
+ GCRel->replaceAllUsesWith(ReplaceGCRel);
+ GCRel->eraseFromParent();
+ }
+ return !GCRelocates.empty();
+}
+
+PreservedAnalyses StripGCRelocates::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ if (!stripGCRelocates(F))
+ return PreservedAnalyses::all();
+
+ // Removing gc.relocate preserves the CFG, but most other analysis probably
+ // need to re-run.
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+}
+
+namespace {
+struct StripGCRelocatesLegacy : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ StripGCRelocatesLegacy() : FunctionPass(ID) {
+ initializeStripGCRelocatesLegacyPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &Info) const override {}
+
+ bool runOnFunction(Function &F) override { return ::stripGCRelocates(F); }
+};
+char StripGCRelocatesLegacy::ID = 0;
+} // namespace
+
+INITIALIZE_PASS(StripGCRelocatesLegacy, "strip-gc-relocates",
+ "Strip gc.relocates inserted through RewriteStatepointsForGC",
+ true, false)
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp
new file mode 100644
index 0000000000..10fda4df51
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp
@@ -0,0 +1,51 @@
+//===- StripNonLineTableDebugInfo.cpp -- Strip parts of Debug Info --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/StripNonLineTableDebugInfo.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils.h"
+using namespace llvm;
+
+namespace {
+
+/// This pass strips all debug info that is not related line tables.
+/// The result will be the same as if the program where compiled with
+/// -gline-tables-only.
+struct StripNonLineTableDebugLegacyPass : public ModulePass {
+ static char ID; // Pass identification, replacement for typeid
+ StripNonLineTableDebugLegacyPass() : ModulePass(ID) {
+ initializeStripNonLineTableDebugLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ }
+
+ bool runOnModule(Module &M) override {
+ return llvm::stripNonLineTableDebugInfo(M);
+ }
+};
+}
+
+char StripNonLineTableDebugLegacyPass::ID = 0;
+INITIALIZE_PASS(StripNonLineTableDebugLegacyPass,
+ "strip-nonlinetable-debuginfo",
+ "Strip all debug info except linetables", false, false)
+
+ModulePass *llvm::createStripNonLineTableDebugLegacyPass() {
+ return new StripNonLineTableDebugLegacyPass();
+}
+
+PreservedAnalyses
+StripNonLineTableDebugInfoPass::run(Module &M, ModuleAnalysisManager &AM) {
+ llvm::stripNonLineTableDebugInfo(M);
+ return PreservedAnalyses::all();
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/SymbolRewriter.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/SymbolRewriter.cpp
new file mode 100644
index 0000000000..4ad16d622e
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/SymbolRewriter.cpp
@@ -0,0 +1,586 @@
+//===- SymbolRewriter.cpp - Symbol Rewriter -------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// SymbolRewriter is a LLVM pass which can rewrite symbols transparently within
+// existing code. It is implemented as a compiler pass and is configured via a
+// YAML configuration file.
+//
+// The YAML configuration file format is as follows:
+//
+// RewriteMapFile := RewriteDescriptors
+// RewriteDescriptors := RewriteDescriptor | RewriteDescriptors
+// RewriteDescriptor := RewriteDescriptorType ':' '{' RewriteDescriptorFields '}'
+// RewriteDescriptorFields := RewriteDescriptorField | RewriteDescriptorFields
+// RewriteDescriptorField := FieldIdentifier ':' FieldValue ','
+// RewriteDescriptorType := Identifier
+// FieldIdentifier := Identifier
+// FieldValue := Identifier
+// Identifier := [0-9a-zA-Z]+
+//
+// Currently, the following descriptor types are supported:
+//
+// - function: (function rewriting)
+// + Source (original name of the function)
+// + Target (explicit transformation)
+// + Transform (pattern transformation)
+// + Naked (boolean, whether the function is undecorated)
+// - global variable: (external linkage global variable rewriting)
+// + Source (original name of externally visible variable)
+// + Target (explicit transformation)
+// + Transform (pattern transformation)
+// - global alias: (global alias rewriting)
+// + Source (original name of the aliased name)
+// + Target (explicit transformation)
+// + Transform (pattern transformation)
+//
+// Note that source and exactly one of [Target, Transform] must be provided
+//
+// New rewrite descriptors can be created. Addding a new rewrite descriptor
+// involves:
+//
+// a) extended the rewrite descriptor kind enumeration
+// (<anonymous>::RewriteDescriptor::RewriteDescriptorType)
+// b) implementing the new descriptor
+// (c.f. <anonymous>::ExplicitRewriteFunctionDescriptor)
+// c) extending the rewrite map parser
+// (<anonymous>::RewriteMapParser::parseEntry)
+//
+// Specify to rewrite the symbols using the `-rewrite-symbols` option, and
+// specify the map file to use for the rewriting via the `-rewrite-map-file`
+// option.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/SymbolRewriter.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/ilist.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/IR/Comdat.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalObject.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Regex.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/YAMLParser.h"
+#include <memory>
+#include <string>
+#include <vector>
+
+using namespace llvm;
+using namespace SymbolRewriter;
+
+#define DEBUG_TYPE "symbol-rewriter"
+
+static cl::list<std::string> RewriteMapFiles("rewrite-map-file",
+ cl::desc("Symbol Rewrite Map"),
+ cl::value_desc("filename"),
+ cl::Hidden);
+
+static void rewriteComdat(Module &M, GlobalObject *GO,
+ const std::string &Source,
+ const std::string &Target) {
+ if (Comdat *CD = GO->getComdat()) {
+ auto &Comdats = M.getComdatSymbolTable();
+
+ Comdat *C = M.getOrInsertComdat(Target);
+ C->setSelectionKind(CD->getSelectionKind());
+ GO->setComdat(C);
+
+ Comdats.erase(Comdats.find(Source));
+ }
+}
+
+namespace {
+
+template <RewriteDescriptor::Type DT, typename ValueType,
+ ValueType *(Module::*Get)(StringRef) const>
+class ExplicitRewriteDescriptor : public RewriteDescriptor {
+public:
+ const std::string Source;
+ const std::string Target;
+
+ ExplicitRewriteDescriptor(StringRef S, StringRef T, const bool Naked)
+ : RewriteDescriptor(DT),
+ Source(std::string(Naked ? StringRef("\01" + S.str()) : S)),
+ Target(std::string(T)) {}
+
+ bool performOnModule(Module &M) override;
+
+ static bool classof(const RewriteDescriptor *RD) {
+ return RD->getType() == DT;
+ }
+};
+
+} // end anonymous namespace
+
+template <RewriteDescriptor::Type DT, typename ValueType,
+ ValueType *(Module::*Get)(StringRef) const>
+bool ExplicitRewriteDescriptor<DT, ValueType, Get>::performOnModule(Module &M) {
+ bool Changed = false;
+ if (ValueType *S = (M.*Get)(Source)) {
+ if (GlobalObject *GO = dyn_cast<GlobalObject>(S))
+ rewriteComdat(M, GO, Source, Target);
+
+ if (Value *T = (M.*Get)(Target))
+ S->setValueName(T->getValueName());
+ else
+ S->setName(Target);
+
+ Changed = true;
+ }
+ return Changed;
+}
+
+namespace {
+
+template <RewriteDescriptor::Type DT, typename ValueType,
+ ValueType *(Module::*Get)(StringRef) const,
+ iterator_range<typename iplist<ValueType>::iterator>
+ (Module::*Iterator)()>
+class PatternRewriteDescriptor : public RewriteDescriptor {
+public:
+ const std::string Pattern;
+ const std::string Transform;
+
+ PatternRewriteDescriptor(StringRef P, StringRef T)
+ : RewriteDescriptor(DT), Pattern(std::string(P)),
+ Transform(std::string(T)) {}
+
+ bool performOnModule(Module &M) override;
+
+ static bool classof(const RewriteDescriptor *RD) {
+ return RD->getType() == DT;
+ }
+};
+
+} // end anonymous namespace
+
+template <RewriteDescriptor::Type DT, typename ValueType,
+ ValueType *(Module::*Get)(StringRef) const,
+ iterator_range<typename iplist<ValueType>::iterator>
+ (Module::*Iterator)()>
+bool PatternRewriteDescriptor<DT, ValueType, Get, Iterator>::
+performOnModule(Module &M) {
+ bool Changed = false;
+ for (auto &C : (M.*Iterator)()) {
+ std::string Error;
+
+ std::string Name = Regex(Pattern).sub(Transform, C.getName(), &Error);
+ if (!Error.empty())
+ report_fatal_error(Twine("unable to transforn ") + C.getName() + " in " +
+ M.getModuleIdentifier() + ": " + Error);
+
+ if (C.getName() == Name)
+ continue;
+
+ if (GlobalObject *GO = dyn_cast<GlobalObject>(&C))
+ rewriteComdat(M, GO, std::string(C.getName()), Name);
+
+ if (Value *V = (M.*Get)(Name))
+ C.setValueName(V->getValueName());
+ else
+ C.setName(Name);
+
+ Changed = true;
+ }
+ return Changed;
+}
+
+namespace {
+
+/// Represents a rewrite for an explicitly named (function) symbol. Both the
+/// source function name and target function name of the transformation are
+/// explicitly spelt out.
+using ExplicitRewriteFunctionDescriptor =
+ ExplicitRewriteDescriptor<RewriteDescriptor::Type::Function, Function,
+ &Module::getFunction>;
+
+/// Represents a rewrite for an explicitly named (global variable) symbol. Both
+/// the source variable name and target variable name are spelt out. This
+/// applies only to module level variables.
+using ExplicitRewriteGlobalVariableDescriptor =
+ ExplicitRewriteDescriptor<RewriteDescriptor::Type::GlobalVariable,
+ GlobalVariable, &Module::getGlobalVariable>;
+
+/// Represents a rewrite for an explicitly named global alias. Both the source
+/// and target name are explicitly spelt out.
+using ExplicitRewriteNamedAliasDescriptor =
+ ExplicitRewriteDescriptor<RewriteDescriptor::Type::NamedAlias, GlobalAlias,
+ &Module::getNamedAlias>;
+
+/// Represents a rewrite for a regular expression based pattern for functions.
+/// A pattern for the function name is provided and a transformation for that
+/// pattern to determine the target function name create the rewrite rule.
+using PatternRewriteFunctionDescriptor =
+ PatternRewriteDescriptor<RewriteDescriptor::Type::Function, Function,
+ &Module::getFunction, &Module::functions>;
+
+/// Represents a rewrite for a global variable based upon a matching pattern.
+/// Each global variable matching the provided pattern will be transformed as
+/// described in the transformation pattern for the target. Applies only to
+/// module level variables.
+using PatternRewriteGlobalVariableDescriptor =
+ PatternRewriteDescriptor<RewriteDescriptor::Type::GlobalVariable,
+ GlobalVariable, &Module::getGlobalVariable,
+ &Module::globals>;
+
+/// PatternRewriteNamedAliasDescriptor - represents a rewrite for global
+/// aliases which match a given pattern. The provided transformation will be
+/// applied to each of the matching names.
+using PatternRewriteNamedAliasDescriptor =
+ PatternRewriteDescriptor<RewriteDescriptor::Type::NamedAlias, GlobalAlias,
+ &Module::getNamedAlias, &Module::aliases>;
+
+} // end anonymous namespace
+
+bool RewriteMapParser::parse(const std::string &MapFile,
+ RewriteDescriptorList *DL) {
+ ErrorOr<std::unique_ptr<MemoryBuffer>> Mapping =
+ MemoryBuffer::getFile(MapFile);
+
+ if (!Mapping)
+ report_fatal_error(Twine("unable to read rewrite map '") + MapFile +
+ "': " + Mapping.getError().message());
+
+ if (!parse(*Mapping, DL))
+ report_fatal_error(Twine("unable to parse rewrite map '") + MapFile + "'");
+
+ return true;
+}
+
+bool RewriteMapParser::parse(std::unique_ptr<MemoryBuffer> &MapFile,
+ RewriteDescriptorList *DL) {
+ SourceMgr SM;
+ yaml::Stream YS(MapFile->getBuffer(), SM);
+
+ for (auto &Document : YS) {
+ yaml::MappingNode *DescriptorList;
+
+ // ignore empty documents
+ if (isa<yaml::NullNode>(Document.getRoot()))
+ continue;
+
+ DescriptorList = dyn_cast<yaml::MappingNode>(Document.getRoot());
+ if (!DescriptorList) {
+ YS.printError(Document.getRoot(), "DescriptorList node must be a map");
+ return false;
+ }
+
+ for (auto &Descriptor : *DescriptorList)
+ if (!parseEntry(YS, Descriptor, DL))
+ return false;
+ }
+
+ return true;
+}
+
+bool RewriteMapParser::parseEntry(yaml::Stream &YS, yaml::KeyValueNode &Entry,
+ RewriteDescriptorList *DL) {
+ yaml::ScalarNode *Key;
+ yaml::MappingNode *Value;
+ SmallString<32> KeyStorage;
+ StringRef RewriteType;
+
+ Key = dyn_cast<yaml::ScalarNode>(Entry.getKey());
+ if (!Key) {
+ YS.printError(Entry.getKey(), "rewrite type must be a scalar");
+ return false;
+ }
+
+ Value = dyn_cast<yaml::MappingNode>(Entry.getValue());
+ if (!Value) {
+ YS.printError(Entry.getValue(), "rewrite descriptor must be a map");
+ return false;
+ }
+
+ RewriteType = Key->getValue(KeyStorage);
+ if (RewriteType.equals("function"))
+ return parseRewriteFunctionDescriptor(YS, Key, Value, DL);
+ else if (RewriteType.equals("global variable"))
+ return parseRewriteGlobalVariableDescriptor(YS, Key, Value, DL);
+ else if (RewriteType.equals("global alias"))
+ return parseRewriteGlobalAliasDescriptor(YS, Key, Value, DL);
+
+ YS.printError(Entry.getKey(), "unknown rewrite type");
+ return false;
+}
+
+bool RewriteMapParser::
+parseRewriteFunctionDescriptor(yaml::Stream &YS, yaml::ScalarNode *K,
+ yaml::MappingNode *Descriptor,
+ RewriteDescriptorList *DL) {
+ bool Naked = false;
+ std::string Source;
+ std::string Target;
+ std::string Transform;
+
+ for (auto &Field : *Descriptor) {
+ yaml::ScalarNode *Key;
+ yaml::ScalarNode *Value;
+ SmallString<32> KeyStorage;
+ SmallString<32> ValueStorage;
+ StringRef KeyValue;
+
+ Key = dyn_cast<yaml::ScalarNode>(Field.getKey());
+ if (!Key) {
+ YS.printError(Field.getKey(), "descriptor key must be a scalar");
+ return false;
+ }
+
+ Value = dyn_cast<yaml::ScalarNode>(Field.getValue());
+ if (!Value) {
+ YS.printError(Field.getValue(), "descriptor value must be a scalar");
+ return false;
+ }
+
+ KeyValue = Key->getValue(KeyStorage);
+ if (KeyValue.equals("source")) {
+ std::string Error;
+
+ Source = std::string(Value->getValue(ValueStorage));
+ if (!Regex(Source).isValid(Error)) {
+ YS.printError(Field.getKey(), "invalid regex: " + Error);
+ return false;
+ }
+ } else if (KeyValue.equals("target")) {
+ Target = std::string(Value->getValue(ValueStorage));
+ } else if (KeyValue.equals("transform")) {
+ Transform = std::string(Value->getValue(ValueStorage));
+ } else if (KeyValue.equals("naked")) {
+ std::string Undecorated;
+
+ Undecorated = std::string(Value->getValue(ValueStorage));
+ Naked = StringRef(Undecorated).lower() == "true" || Undecorated == "1";
+ } else {
+ YS.printError(Field.getKey(), "unknown key for function");
+ return false;
+ }
+ }
+
+ if (Transform.empty() == Target.empty()) {
+ YS.printError(Descriptor,
+ "exactly one of transform or target must be specified");
+ return false;
+ }
+
+ // TODO see if there is a more elegant solution to selecting the rewrite
+ // descriptor type
+ if (!Target.empty())
+ DL->push_back(std::make_unique<ExplicitRewriteFunctionDescriptor>(
+ Source, Target, Naked));
+ else
+ DL->push_back(
+ std::make_unique<PatternRewriteFunctionDescriptor>(Source, Transform));
+
+ return true;
+}
+
+bool RewriteMapParser::
+parseRewriteGlobalVariableDescriptor(yaml::Stream &YS, yaml::ScalarNode *K,
+ yaml::MappingNode *Descriptor,
+ RewriteDescriptorList *DL) {
+ std::string Source;
+ std::string Target;
+ std::string Transform;
+
+ for (auto &Field : *Descriptor) {
+ yaml::ScalarNode *Key;
+ yaml::ScalarNode *Value;
+ SmallString<32> KeyStorage;
+ SmallString<32> ValueStorage;
+ StringRef KeyValue;
+
+ Key = dyn_cast<yaml::ScalarNode>(Field.getKey());
+ if (!Key) {
+ YS.printError(Field.getKey(), "descriptor Key must be a scalar");
+ return false;
+ }
+
+ Value = dyn_cast<yaml::ScalarNode>(Field.getValue());
+ if (!Value) {
+ YS.printError(Field.getValue(), "descriptor value must be a scalar");
+ return false;
+ }
+
+ KeyValue = Key->getValue(KeyStorage);
+ if (KeyValue.equals("source")) {
+ std::string Error;
+
+ Source = std::string(Value->getValue(ValueStorage));
+ if (!Regex(Source).isValid(Error)) {
+ YS.printError(Field.getKey(), "invalid regex: " + Error);
+ return false;
+ }
+ } else if (KeyValue.equals("target")) {
+ Target = std::string(Value->getValue(ValueStorage));
+ } else if (KeyValue.equals("transform")) {
+ Transform = std::string(Value->getValue(ValueStorage));
+ } else {
+ YS.printError(Field.getKey(), "unknown Key for Global Variable");
+ return false;
+ }
+ }
+
+ if (Transform.empty() == Target.empty()) {
+ YS.printError(Descriptor,
+ "exactly one of transform or target must be specified");
+ return false;
+ }
+
+ if (!Target.empty())
+ DL->push_back(std::make_unique<ExplicitRewriteGlobalVariableDescriptor>(
+ Source, Target,
+ /*Naked*/ false));
+ else
+ DL->push_back(std::make_unique<PatternRewriteGlobalVariableDescriptor>(
+ Source, Transform));
+
+ return true;
+}
+
+bool RewriteMapParser::
+parseRewriteGlobalAliasDescriptor(yaml::Stream &YS, yaml::ScalarNode *K,
+ yaml::MappingNode *Descriptor,
+ RewriteDescriptorList *DL) {
+ std::string Source;
+ std::string Target;
+ std::string Transform;
+
+ for (auto &Field : *Descriptor) {
+ yaml::ScalarNode *Key;
+ yaml::ScalarNode *Value;
+ SmallString<32> KeyStorage;
+ SmallString<32> ValueStorage;
+ StringRef KeyValue;
+
+ Key = dyn_cast<yaml::ScalarNode>(Field.getKey());
+ if (!Key) {
+ YS.printError(Field.getKey(), "descriptor key must be a scalar");
+ return false;
+ }
+
+ Value = dyn_cast<yaml::ScalarNode>(Field.getValue());
+ if (!Value) {
+ YS.printError(Field.getValue(), "descriptor value must be a scalar");
+ return false;
+ }
+
+ KeyValue = Key->getValue(KeyStorage);
+ if (KeyValue.equals("source")) {
+ std::string Error;
+
+ Source = std::string(Value->getValue(ValueStorage));
+ if (!Regex(Source).isValid(Error)) {
+ YS.printError(Field.getKey(), "invalid regex: " + Error);
+ return false;
+ }
+ } else if (KeyValue.equals("target")) {
+ Target = std::string(Value->getValue(ValueStorage));
+ } else if (KeyValue.equals("transform")) {
+ Transform = std::string(Value->getValue(ValueStorage));
+ } else {
+ YS.printError(Field.getKey(), "unknown key for Global Alias");
+ return false;
+ }
+ }
+
+ if (Transform.empty() == Target.empty()) {
+ YS.printError(Descriptor,
+ "exactly one of transform or target must be specified");
+ return false;
+ }
+
+ if (!Target.empty())
+ DL->push_back(std::make_unique<ExplicitRewriteNamedAliasDescriptor>(
+ Source, Target,
+ /*Naked*/ false));
+ else
+ DL->push_back(std::make_unique<PatternRewriteNamedAliasDescriptor>(
+ Source, Transform));
+
+ return true;
+}
+
+namespace {
+
+class RewriteSymbolsLegacyPass : public ModulePass {
+public:
+ static char ID; // Pass identification, replacement for typeid
+
+ RewriteSymbolsLegacyPass();
+ RewriteSymbolsLegacyPass(SymbolRewriter::RewriteDescriptorList &DL);
+
+ bool runOnModule(Module &M) override;
+
+private:
+ RewriteSymbolPass Impl;
+};
+
+} // end anonymous namespace
+
+char RewriteSymbolsLegacyPass::ID = 0;
+
+RewriteSymbolsLegacyPass::RewriteSymbolsLegacyPass() : ModulePass(ID) {
+ initializeRewriteSymbolsLegacyPassPass(*PassRegistry::getPassRegistry());
+}
+
+RewriteSymbolsLegacyPass::RewriteSymbolsLegacyPass(
+ SymbolRewriter::RewriteDescriptorList &DL)
+ : ModulePass(ID), Impl(DL) {}
+
+bool RewriteSymbolsLegacyPass::runOnModule(Module &M) {
+ return Impl.runImpl(M);
+}
+
+PreservedAnalyses RewriteSymbolPass::run(Module &M, ModuleAnalysisManager &AM) {
+ if (!runImpl(M))
+ return PreservedAnalyses::all();
+
+ return PreservedAnalyses::none();
+}
+
+bool RewriteSymbolPass::runImpl(Module &M) {
+ bool Changed;
+
+ Changed = false;
+ for (auto &Descriptor : Descriptors)
+ Changed |= Descriptor->performOnModule(M);
+
+ return Changed;
+}
+
+void RewriteSymbolPass::loadAndParseMapFiles() {
+ const std::vector<std::string> MapFiles(RewriteMapFiles);
+ SymbolRewriter::RewriteMapParser Parser;
+
+ for (const auto &MapFile : MapFiles)
+ Parser.parse(MapFile, &Descriptors);
+}
+
+INITIALIZE_PASS(RewriteSymbolsLegacyPass, "rewrite-symbols", "Rewrite Symbols",
+ false, false)
+
+ModulePass *llvm::createRewriteSymbolsPass() {
+ return new RewriteSymbolsLegacyPass();
+}
+
+ModulePass *
+llvm::createRewriteSymbolsPass(SymbolRewriter::RewriteDescriptorList &DL) {
+ return new RewriteSymbolsLegacyPass(DL);
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
new file mode 100644
index 0000000000..2b706858cb
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
@@ -0,0 +1,129 @@
+//===- UnifyFunctionExitNodes.cpp - Make all functions have a single exit -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is used to ensure that functions have at most one return and one
+// unreachable instruction in them.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Type.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Transforms/Utils.h"
+using namespace llvm;
+
+char UnifyFunctionExitNodesLegacyPass::ID = 0;
+
+UnifyFunctionExitNodesLegacyPass::UnifyFunctionExitNodesLegacyPass()
+ : FunctionPass(ID) {
+ initializeUnifyFunctionExitNodesLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+}
+
+INITIALIZE_PASS(UnifyFunctionExitNodesLegacyPass, "mergereturn",
+ "Unify function exit nodes", false, false)
+
+Pass *llvm::createUnifyFunctionExitNodesPass() {
+ return new UnifyFunctionExitNodesLegacyPass();
+}
+
+void UnifyFunctionExitNodesLegacyPass::getAnalysisUsage(
+ AnalysisUsage &AU) const {
+ // We preserve the non-critical-edgeness property
+ AU.addPreservedID(BreakCriticalEdgesID);
+ // This is a cluster of orthogonal Transforms
+ AU.addPreservedID(LowerSwitchID);
+}
+
+namespace {
+
+bool unifyUnreachableBlocks(Function &F) {
+ std::vector<BasicBlock *> UnreachableBlocks;
+
+ for (BasicBlock &I : F)
+ if (isa<UnreachableInst>(I.getTerminator()))
+ UnreachableBlocks.push_back(&I);
+
+ if (UnreachableBlocks.size() <= 1)
+ return false;
+
+ BasicBlock *UnreachableBlock =
+ BasicBlock::Create(F.getContext(), "UnifiedUnreachableBlock", &F);
+ new UnreachableInst(F.getContext(), UnreachableBlock);
+
+ for (BasicBlock *BB : UnreachableBlocks) {
+ BB->back().eraseFromParent(); // Remove the unreachable inst.
+ BranchInst::Create(UnreachableBlock, BB);
+ }
+
+ return true;
+}
+
+bool unifyReturnBlocks(Function &F) {
+ std::vector<BasicBlock *> ReturningBlocks;
+
+ for (BasicBlock &I : F)
+ if (isa<ReturnInst>(I.getTerminator()))
+ ReturningBlocks.push_back(&I);
+
+ if (ReturningBlocks.size() <= 1)
+ return false;
+
+ // Insert a new basic block into the function, add PHI nodes (if the function
+ // returns values), and convert all of the return instructions into
+ // unconditional branches.
+ BasicBlock *NewRetBlock = BasicBlock::Create(F.getContext(),
+ "UnifiedReturnBlock", &F);
+
+ PHINode *PN = nullptr;
+ if (F.getReturnType()->isVoidTy()) {
+ ReturnInst::Create(F.getContext(), nullptr, NewRetBlock);
+ } else {
+ // If the function doesn't return void... add a PHI node to the block...
+ PN = PHINode::Create(F.getReturnType(), ReturningBlocks.size(),
+ "UnifiedRetVal");
+ PN->insertInto(NewRetBlock, NewRetBlock->end());
+ ReturnInst::Create(F.getContext(), PN, NewRetBlock);
+ }
+
+ // Loop over all of the blocks, replacing the return instruction with an
+ // unconditional branch.
+ for (BasicBlock *BB : ReturningBlocks) {
+ // Add an incoming element to the PHI node for every return instruction that
+ // is merging into this new block...
+ if (PN)
+ PN->addIncoming(BB->getTerminator()->getOperand(0), BB);
+
+ BB->back().eraseFromParent(); // Remove the return insn
+ BranchInst::Create(NewRetBlock, BB);
+ }
+
+ return true;
+}
+} // namespace
+
+// Unify all exit nodes of the CFG by creating a new BasicBlock, and converting
+// all returns to unconditional branches to this new basic block. Also, unify
+// all unreachable blocks.
+bool UnifyFunctionExitNodesLegacyPass::runOnFunction(Function &F) {
+ bool Changed = false;
+ Changed |= unifyUnreachableBlocks(F);
+ Changed |= unifyReturnBlocks(F);
+ return Changed;
+}
+
+PreservedAnalyses UnifyFunctionExitNodesPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ bool Changed = false;
+ Changed |= unifyUnreachableBlocks(F);
+ Changed |= unifyReturnBlocks(F);
+ return Changed ? PreservedAnalyses() : PreservedAnalyses::all();
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/UnifyLoopExits.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/UnifyLoopExits.cpp
new file mode 100644
index 0000000000..3be96ebc93
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/UnifyLoopExits.cpp
@@ -0,0 +1,254 @@
+//===- UnifyLoopExits.cpp - Redirect exiting edges to one block -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// For each natural loop with multiple exit blocks, this pass creates a new
+// block N such that all exiting blocks now branch to N, and then control flow
+// is redistributed to all the original exit blocks.
+//
+// Limitation: This assumes that all terminators in the CFG are direct branches
+// (the "br" instruction). The presence of any other control flow
+// such as indirectbr, switch or callbr will cause an assert.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/UnifyLoopExits.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+#define DEBUG_TYPE "unify-loop-exits"
+
+using namespace llvm;
+
+static cl::opt<unsigned> MaxBooleansInControlFlowHub(
+ "max-booleans-in-control-flow-hub", cl::init(32), cl::Hidden,
+ cl::desc("Set the maximum number of outgoing blocks for using a boolean "
+ "value to record the exiting block in CreateControlFlowHub."));
+
+namespace {
+struct UnifyLoopExitsLegacyPass : public FunctionPass {
+ static char ID;
+ UnifyLoopExitsLegacyPass() : FunctionPass(ID) {
+ initializeUnifyLoopExitsLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequiredID(LowerSwitchID);
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreservedID(LowerSwitchID);
+ AU.addPreserved<LoopInfoWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ }
+
+ bool runOnFunction(Function &F) override;
+};
+} // namespace
+
+char UnifyLoopExitsLegacyPass::ID = 0;
+
+FunctionPass *llvm::createUnifyLoopExitsPass() {
+ return new UnifyLoopExitsLegacyPass();
+}
+
+INITIALIZE_PASS_BEGIN(UnifyLoopExitsLegacyPass, "unify-loop-exits",
+ "Fixup each natural loop to have a single exit block",
+ false /* Only looks at CFG */, false /* Analysis Pass */)
+INITIALIZE_PASS_DEPENDENCY(LowerSwitchLegacyPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_END(UnifyLoopExitsLegacyPass, "unify-loop-exits",
+ "Fixup each natural loop to have a single exit block",
+ false /* Only looks at CFG */, false /* Analysis Pass */)
+
+// The current transform introduces new control flow paths which may break the
+// SSA requirement that every def must dominate all its uses. For example,
+// consider a value D defined inside the loop that is used by some instruction
+// U outside the loop. It follows that D dominates U, since the original
+// program has valid SSA form. After merging the exits, all paths from D to U
+// now flow through the unified exit block. In addition, there may be other
+// paths that do not pass through D, but now reach the unified exit
+// block. Thus, D no longer dominates U.
+//
+// Restore the dominance by creating a phi for each such D at the new unified
+// loop exit. But when doing this, ignore any uses U that are in the new unified
+// loop exit, since those were introduced specially when the block was created.
+//
+// The use of SSAUpdater seems like overkill for this operation. The location
+// for creating the new PHI is well-known, and also the set of incoming blocks
+// to the new PHI.
+static void restoreSSA(const DominatorTree &DT, const Loop *L,
+ const SetVector<BasicBlock *> &Incoming,
+ BasicBlock *LoopExitBlock) {
+ using InstVector = SmallVector<Instruction *, 8>;
+ using IIMap = MapVector<Instruction *, InstVector>;
+ IIMap ExternalUsers;
+ for (auto *BB : L->blocks()) {
+ for (auto &I : *BB) {
+ for (auto &U : I.uses()) {
+ auto UserInst = cast<Instruction>(U.getUser());
+ auto UserBlock = UserInst->getParent();
+ if (UserBlock == LoopExitBlock)
+ continue;
+ if (L->contains(UserBlock))
+ continue;
+ LLVM_DEBUG(dbgs() << "added ext use for " << I.getName() << "("
+ << BB->getName() << ")"
+ << ": " << UserInst->getName() << "("
+ << UserBlock->getName() << ")"
+ << "\n");
+ ExternalUsers[&I].push_back(UserInst);
+ }
+ }
+ }
+
+ for (auto II : ExternalUsers) {
+ // For each Def used outside the loop, create NewPhi in
+ // LoopExitBlock. NewPhi receives Def only along exiting blocks that
+ // dominate it, while the remaining values are undefined since those paths
+ // didn't exist in the original CFG.
+ auto Def = II.first;
+ LLVM_DEBUG(dbgs() << "externally used: " << Def->getName() << "\n");
+ auto NewPhi =
+ PHINode::Create(Def->getType(), Incoming.size(),
+ Def->getName() + ".moved", &LoopExitBlock->front());
+ for (auto *In : Incoming) {
+ LLVM_DEBUG(dbgs() << "predecessor " << In->getName() << ": ");
+ if (Def->getParent() == In || DT.dominates(Def, In)) {
+ LLVM_DEBUG(dbgs() << "dominated\n");
+ NewPhi->addIncoming(Def, In);
+ } else {
+ LLVM_DEBUG(dbgs() << "not dominated\n");
+ NewPhi->addIncoming(UndefValue::get(Def->getType()), In);
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "external users:");
+ for (auto *U : II.second) {
+ LLVM_DEBUG(dbgs() << " " << U->getName());
+ U->replaceUsesOfWith(Def, NewPhi);
+ }
+ LLVM_DEBUG(dbgs() << "\n");
+ }
+}
+
+static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) {
+ // To unify the loop exits, we need a list of the exiting blocks as
+ // well as exit blocks. The functions for locating these lists both
+ // traverse the entire loop body. It is more efficient to first
+ // locate the exiting blocks and then examine their successors to
+ // locate the exit blocks.
+ SetVector<BasicBlock *> ExitingBlocks;
+ SetVector<BasicBlock *> Exits;
+
+ // We need SetVectors, but the Loop API takes a vector, so we use a temporary.
+ SmallVector<BasicBlock *, 8> Temp;
+ L->getExitingBlocks(Temp);
+ for (auto *BB : Temp) {
+ ExitingBlocks.insert(BB);
+ for (auto *S : successors(BB)) {
+ auto SL = LI.getLoopFor(S);
+ // A successor is not an exit if it is directly or indirectly in the
+ // current loop.
+ if (SL == L || L->contains(SL))
+ continue;
+ Exits.insert(S);
+ }
+ }
+
+ LLVM_DEBUG(
+ dbgs() << "Found exit blocks:";
+ for (auto Exit : Exits) {
+ dbgs() << " " << Exit->getName();
+ }
+ dbgs() << "\n";
+
+ dbgs() << "Found exiting blocks:";
+ for (auto EB : ExitingBlocks) {
+ dbgs() << " " << EB->getName();
+ }
+ dbgs() << "\n";);
+
+ if (Exits.size() <= 1) {
+ LLVM_DEBUG(dbgs() << "loop does not have multiple exits; nothing to do\n");
+ return false;
+ }
+
+ SmallVector<BasicBlock *, 8> GuardBlocks;
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
+ auto LoopExitBlock =
+ CreateControlFlowHub(&DTU, GuardBlocks, ExitingBlocks, Exits, "loop.exit",
+ MaxBooleansInControlFlowHub.getValue());
+
+ restoreSSA(DT, L, ExitingBlocks, LoopExitBlock);
+
+#if defined(EXPENSIVE_CHECKS)
+ assert(DT.verify(DominatorTree::VerificationLevel::Full));
+#else
+ assert(DT.verify(DominatorTree::VerificationLevel::Fast));
+#endif // EXPENSIVE_CHECKS
+ L->verifyLoop();
+
+ // The guard blocks were created outside the loop, so they need to become
+ // members of the parent loop.
+ if (auto ParentLoop = L->getParentLoop()) {
+ for (auto *G : GuardBlocks) {
+ ParentLoop->addBasicBlockToLoop(G, LI);
+ }
+ ParentLoop->verifyLoop();
+ }
+
+#if defined(EXPENSIVE_CHECKS)
+ LI.verify(DT);
+#endif // EXPENSIVE_CHECKS
+
+ return true;
+}
+
+static bool runImpl(LoopInfo &LI, DominatorTree &DT) {
+
+ bool Changed = false;
+ auto Loops = LI.getLoopsInPreorder();
+ for (auto *L : Loops) {
+ LLVM_DEBUG(dbgs() << "Loop: " << L->getHeader()->getName() << " (depth: "
+ << LI.getLoopDepth(L->getHeader()) << ")\n");
+ Changed |= unifyLoopExits(DT, LI, L);
+ }
+ return Changed;
+}
+
+bool UnifyLoopExitsLegacyPass::runOnFunction(Function &F) {
+ LLVM_DEBUG(dbgs() << "===== Unifying loop exits in function " << F.getName()
+ << "\n");
+ auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+
+ return runImpl(LI, DT);
+}
+
+namespace llvm {
+
+PreservedAnalyses UnifyLoopExitsPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ auto &LI = AM.getResult<LoopAnalysis>(F);
+ auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+
+ if (!runImpl(LI, DT))
+ return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserve<LoopAnalysis>();
+ PA.preserve<DominatorTreeAnalysis>();
+ return PA;
+}
+} // namespace llvm
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/Utils.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/Utils.cpp
new file mode 100644
index 0000000000..d002922cfd
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/Utils.cpp
@@ -0,0 +1,65 @@
+//===-- Utils.cpp - TransformUtils Infrastructure -------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the common initialization infrastructure for the
+// TransformUtils library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils.h"
+#include "llvm-c/Initialization.h"
+#include "llvm-c/Transforms/Utils.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
+
+using namespace llvm;
+
+/// initializeTransformUtils - Initialize all passes in the TransformUtils
+/// library.
+void llvm::initializeTransformUtils(PassRegistry &Registry) {
+ initializeAddDiscriminatorsLegacyPassPass(Registry);
+ initializeAssumeSimplifyPassLegacyPassPass(Registry);
+ initializeAssumeBuilderPassLegacyPassPass(Registry);
+ initializeBreakCriticalEdgesPass(Registry);
+ initializeCanonicalizeFreezeInLoopsPass(Registry);
+ initializeInstNamerPass(Registry);
+ initializeLCSSAWrapperPassPass(Registry);
+ initializeLibCallsShrinkWrapLegacyPassPass(Registry);
+ initializeLoopSimplifyPass(Registry);
+ initializeLowerGlobalDtorsLegacyPassPass(Registry);
+ initializeLowerInvokeLegacyPassPass(Registry);
+ initializeLowerSwitchLegacyPassPass(Registry);
+ initializePromoteLegacyPassPass(Registry);
+ initializeStripNonLineTableDebugLegacyPassPass(Registry);
+ initializeUnifyFunctionExitNodesLegacyPassPass(Registry);
+ initializeMetaRenamerPass(Registry);
+ initializeStripGCRelocatesLegacyPass(Registry);
+ initializePredicateInfoPrinterLegacyPassPass(Registry);
+ initializeInjectTLIMappingsLegacyPass(Registry);
+ initializeFixIrreduciblePass(Registry);
+ initializeUnifyLoopExitsLegacyPassPass(Registry);
+}
+
+/// LLVMInitializeTransformUtils - C binding for initializeTransformUtilsPasses.
+void LLVMInitializeTransformUtils(LLVMPassRegistryRef R) {
+ initializeTransformUtils(*unwrap(R));
+}
+
+void LLVMAddLowerSwitchPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createLowerSwitchPass());
+}
+
+void LLVMAddPromoteMemoryToRegisterPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createPromoteMemoryToRegisterPass());
+}
+
+void LLVMAddAddDiscriminatorsPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createAddDiscriminatorsPass());
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/VNCoercion.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/VNCoercion.cpp
new file mode 100644
index 0000000000..f295a7e312
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/VNCoercion.cpp
@@ -0,0 +1,593 @@
+#include "llvm/Transforms/Utils/VNCoercion.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "vncoerce"
+
+namespace llvm {
+namespace VNCoercion {
+
+static bool isFirstClassAggregateOrScalableType(Type *Ty) {
+ return Ty->isStructTy() || Ty->isArrayTy() || isa<ScalableVectorType>(Ty);
+}
+
+/// Return true if coerceAvailableValueToLoadType will succeed.
+bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy,
+ const DataLayout &DL) {
+ Type *StoredTy = StoredVal->getType();
+
+ if (StoredTy == LoadTy)
+ return true;
+
+ // If the loaded/stored value is a first class array/struct, or scalable type,
+ // don't try to transform them. We need to be able to bitcast to integer.
+ if (isFirstClassAggregateOrScalableType(LoadTy) ||
+ isFirstClassAggregateOrScalableType(StoredTy))
+ return false;
+
+ uint64_t StoreSize = DL.getTypeSizeInBits(StoredTy).getFixedValue();
+
+ // The store size must be byte-aligned to support future type casts.
+ if (llvm::alignTo(StoreSize, 8) != StoreSize)
+ return false;
+
+ // The store has to be at least as big as the load.
+ if (StoreSize < DL.getTypeSizeInBits(LoadTy).getFixedValue())
+ return false;
+
+ bool StoredNI = DL.isNonIntegralPointerType(StoredTy->getScalarType());
+ bool LoadNI = DL.isNonIntegralPointerType(LoadTy->getScalarType());
+ // Don't coerce non-integral pointers to integers or vice versa.
+ if (StoredNI != LoadNI) {
+ // As a special case, allow coercion of memset used to initialize
+ // an array w/null. Despite non-integral pointers not generally having a
+ // specific bit pattern, we do assume null is zero.
+ if (auto *CI = dyn_cast<Constant>(StoredVal))
+ return CI->isNullValue();
+ return false;
+ } else if (StoredNI && LoadNI &&
+ StoredTy->getPointerAddressSpace() !=
+ LoadTy->getPointerAddressSpace()) {
+ return false;
+ }
+
+
+ // The implementation below uses inttoptr for vectors of unequal size; we
+ // can't allow this for non integral pointers. We could teach it to extract
+ // exact subvectors if desired.
+ if (StoredNI && StoreSize != DL.getTypeSizeInBits(LoadTy).getFixedValue())
+ return false;
+
+ if (StoredTy->isTargetExtTy() || LoadTy->isTargetExtTy())
+ return false;
+
+ return true;
+}
+
+/// If we saw a store of a value to memory, and
+/// then a load from a must-aliased pointer of a different type, try to coerce
+/// the stored value. LoadedTy is the type of the load we want to replace.
+/// IRB is IRBuilder used to insert new instructions.
+///
+/// If we can't do it, return null.
+Value *coerceAvailableValueToLoadType(Value *StoredVal, Type *LoadedTy,
+ IRBuilderBase &Helper,
+ const DataLayout &DL) {
+ assert(canCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, DL) &&
+ "precondition violation - materialization can't fail");
+ if (auto *C = dyn_cast<Constant>(StoredVal))
+ StoredVal = ConstantFoldConstant(C, DL);
+
+ // If this is already the right type, just return it.
+ Type *StoredValTy = StoredVal->getType();
+
+ uint64_t StoredValSize = DL.getTypeSizeInBits(StoredValTy).getFixedValue();
+ uint64_t LoadedValSize = DL.getTypeSizeInBits(LoadedTy).getFixedValue();
+
+ // If the store and reload are the same size, we can always reuse it.
+ if (StoredValSize == LoadedValSize) {
+ // Pointer to Pointer -> use bitcast.
+ if (StoredValTy->isPtrOrPtrVectorTy() && LoadedTy->isPtrOrPtrVectorTy()) {
+ StoredVal = Helper.CreateBitCast(StoredVal, LoadedTy);
+ } else {
+ // Convert source pointers to integers, which can be bitcast.
+ if (StoredValTy->isPtrOrPtrVectorTy()) {
+ StoredValTy = DL.getIntPtrType(StoredValTy);
+ StoredVal = Helper.CreatePtrToInt(StoredVal, StoredValTy);
+ }
+
+ Type *TypeToCastTo = LoadedTy;
+ if (TypeToCastTo->isPtrOrPtrVectorTy())
+ TypeToCastTo = DL.getIntPtrType(TypeToCastTo);
+
+ if (StoredValTy != TypeToCastTo)
+ StoredVal = Helper.CreateBitCast(StoredVal, TypeToCastTo);
+
+ // Cast to pointer if the load needs a pointer type.
+ if (LoadedTy->isPtrOrPtrVectorTy())
+ StoredVal = Helper.CreateIntToPtr(StoredVal, LoadedTy);
+ }
+
+ if (auto *C = dyn_cast<ConstantExpr>(StoredVal))
+ StoredVal = ConstantFoldConstant(C, DL);
+
+ return StoredVal;
+ }
+ // If the loaded value is smaller than the available value, then we can
+ // extract out a piece from it. If the available value is too small, then we
+ // can't do anything.
+ assert(StoredValSize >= LoadedValSize &&
+ "canCoerceMustAliasedValueToLoad fail");
+
+ // Convert source pointers to integers, which can be manipulated.
+ if (StoredValTy->isPtrOrPtrVectorTy()) {
+ StoredValTy = DL.getIntPtrType(StoredValTy);
+ StoredVal = Helper.CreatePtrToInt(StoredVal, StoredValTy);
+ }
+
+ // Convert vectors and fp to integer, which can be manipulated.
+ if (!StoredValTy->isIntegerTy()) {
+ StoredValTy = IntegerType::get(StoredValTy->getContext(), StoredValSize);
+ StoredVal = Helper.CreateBitCast(StoredVal, StoredValTy);
+ }
+
+ // If this is a big-endian system, we need to shift the value down to the low
+ // bits so that a truncate will work.
+ if (DL.isBigEndian()) {
+ uint64_t ShiftAmt = DL.getTypeStoreSizeInBits(StoredValTy).getFixedValue() -
+ DL.getTypeStoreSizeInBits(LoadedTy).getFixedValue();
+ StoredVal = Helper.CreateLShr(
+ StoredVal, ConstantInt::get(StoredVal->getType(), ShiftAmt));
+ }
+
+ // Truncate the integer to the right size now.
+ Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadedValSize);
+ StoredVal = Helper.CreateTruncOrBitCast(StoredVal, NewIntTy);
+
+ if (LoadedTy != NewIntTy) {
+ // If the result is a pointer, inttoptr.
+ if (LoadedTy->isPtrOrPtrVectorTy())
+ StoredVal = Helper.CreateIntToPtr(StoredVal, LoadedTy);
+ else
+ // Otherwise, bitcast.
+ StoredVal = Helper.CreateBitCast(StoredVal, LoadedTy);
+ }
+
+ if (auto *C = dyn_cast<Constant>(StoredVal))
+ StoredVal = ConstantFoldConstant(C, DL);
+
+ return StoredVal;
+}
+
+/// This function is called when we have a memdep query of a load that ends up
+/// being a clobbering memory write (store, memset, memcpy, memmove). This
+/// means that the write *may* provide bits used by the load but we can't be
+/// sure because the pointers don't must-alias.
+///
+/// Check this case to see if there is anything more we can do before we give
+/// up. This returns -1 if we have to give up, or a byte number in the stored
+/// value of the piece that feeds the load.
+static int analyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,
+ Value *WritePtr,
+ uint64_t WriteSizeInBits,
+ const DataLayout &DL) {
+ // If the loaded/stored value is a first class array/struct, or scalable type,
+ // don't try to transform them. We need to be able to bitcast to integer.
+ if (isFirstClassAggregateOrScalableType(LoadTy))
+ return -1;
+
+ int64_t StoreOffset = 0, LoadOffset = 0;
+ Value *StoreBase =
+ GetPointerBaseWithConstantOffset(WritePtr, StoreOffset, DL);
+ Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, DL);
+ if (StoreBase != LoadBase)
+ return -1;
+
+ uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy).getFixedValue();
+
+ if ((WriteSizeInBits & 7) | (LoadSize & 7))
+ return -1;
+ uint64_t StoreSize = WriteSizeInBits / 8; // Convert to bytes.
+ LoadSize /= 8;
+
+ // If the Load isn't completely contained within the stored bits, we don't
+ // have all the bits to feed it. We could do something crazy in the future
+ // (issue a smaller load then merge the bits in) but this seems unlikely to be
+ // valuable.
+ if (StoreOffset > LoadOffset ||
+ StoreOffset + int64_t(StoreSize) < LoadOffset + int64_t(LoadSize))
+ return -1;
+
+ // Okay, we can do this transformation. Return the number of bytes into the
+ // store that the load is.
+ return LoadOffset - StoreOffset;
+}
+
+/// This function is called when we have a
+/// memdep query of a load that ends up being a clobbering store.
+int analyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr,
+ StoreInst *DepSI, const DataLayout &DL) {
+ auto *StoredVal = DepSI->getValueOperand();
+
+ // Cannot handle reading from store of first-class aggregate or scalable type.
+ if (isFirstClassAggregateOrScalableType(StoredVal->getType()))
+ return -1;
+
+ if (!canCoerceMustAliasedValueToLoad(StoredVal, LoadTy, DL))
+ return -1;
+
+ Value *StorePtr = DepSI->getPointerOperand();
+ uint64_t StoreSize =
+ DL.getTypeSizeInBits(DepSI->getValueOperand()->getType()).getFixedValue();
+ return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, StorePtr, StoreSize,
+ DL);
+}
+
+/// Looks at a memory location for a load (specified by MemLocBase, Offs, and
+/// Size) and compares it against a load.
+///
+/// If the specified load could be safely widened to a larger integer load
+/// that is 1) still efficient, 2) safe for the target, and 3) would provide
+/// the specified memory location value, then this function returns the size
+/// in bytes of the load width to use. If not, this returns zero.
+static unsigned getLoadLoadClobberFullWidthSize(const Value *MemLocBase,
+ int64_t MemLocOffs,
+ unsigned MemLocSize,
+ const LoadInst *LI) {
+ // We can only extend simple integer loads.
+ if (!isa<IntegerType>(LI->getType()) || !LI->isSimple())
+ return 0;
+
+ // Load widening is hostile to ThreadSanitizer: it may cause false positives
+ // or make the reports more cryptic (access sizes are wrong).
+ if (LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeThread))
+ return 0;
+
+ const DataLayout &DL = LI->getModule()->getDataLayout();
+
+ // Get the base of this load.
+ int64_t LIOffs = 0;
+ const Value *LIBase =
+ GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, DL);
+
+ // If the two pointers are not based on the same pointer, we can't tell that
+ // they are related.
+ if (LIBase != MemLocBase)
+ return 0;
+
+ // Okay, the two values are based on the same pointer, but returned as
+ // no-alias. This happens when we have things like two byte loads at "P+1"
+ // and "P+3". Check to see if increasing the size of the "LI" load up to its
+ // alignment (or the largest native integer type) will allow us to load all
+ // the bits required by MemLoc.
+
+ // If MemLoc is before LI, then no widening of LI will help us out.
+ if (MemLocOffs < LIOffs)
+ return 0;
+
+ // Get the alignment of the load in bytes. We assume that it is safe to load
+ // any legal integer up to this size without a problem. For example, if we're
+ // looking at an i8 load on x86-32 that is known 1024 byte aligned, we can
+ // widen it up to an i32 load. If it is known 2-byte aligned, we can widen it
+ // to i16.
+ unsigned LoadAlign = LI->getAlign().value();
+
+ int64_t MemLocEnd = MemLocOffs + MemLocSize;
+
+ // If no amount of rounding up will let MemLoc fit into LI, then bail out.
+ if (LIOffs + LoadAlign < MemLocEnd)
+ return 0;
+
+ // This is the size of the load to try. Start with the next larger power of
+ // two.
+ unsigned NewLoadByteSize = LI->getType()->getPrimitiveSizeInBits() / 8U;
+ NewLoadByteSize = NextPowerOf2(NewLoadByteSize);
+
+ while (true) {
+ // If this load size is bigger than our known alignment or would not fit
+ // into a native integer register, then we fail.
+ if (NewLoadByteSize > LoadAlign ||
+ !DL.fitsInLegalInteger(NewLoadByteSize * 8))
+ return 0;
+
+ if (LIOffs + NewLoadByteSize > MemLocEnd &&
+ (LI->getParent()->getParent()->hasFnAttribute(
+ Attribute::SanitizeAddress) ||
+ LI->getParent()->getParent()->hasFnAttribute(
+ Attribute::SanitizeHWAddress)))
+ // We will be reading past the location accessed by the original program.
+ // While this is safe in a regular build, Address Safety analysis tools
+ // may start reporting false warnings. So, don't do widening.
+ return 0;
+
+ // If a load of this width would include all of MemLoc, then we succeed.
+ if (LIOffs + NewLoadByteSize >= MemLocEnd)
+ return NewLoadByteSize;
+
+ NewLoadByteSize <<= 1;
+ }
+}
+
+/// This function is called when we have a
+/// memdep query of a load that ends up being clobbered by another load. See if
+/// the other load can feed into the second load.
+int analyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr, LoadInst *DepLI,
+ const DataLayout &DL) {
+ // Cannot handle reading from store of first-class aggregate yet.
+ if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy())
+ return -1;
+
+ if (!canCoerceMustAliasedValueToLoad(DepLI, LoadTy, DL))
+ return -1;
+
+ Value *DepPtr = DepLI->getPointerOperand();
+ uint64_t DepSize = DL.getTypeSizeInBits(DepLI->getType()).getFixedValue();
+ int R = analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, DL);
+ if (R != -1)
+ return R;
+
+ // If we have a load/load clobber an DepLI can be widened to cover this load,
+ // then we should widen it!
+ int64_t LoadOffs = 0;
+ const Value *LoadBase =
+ GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, DL);
+ unsigned LoadSize = DL.getTypeStoreSize(LoadTy).getFixedValue();
+
+ unsigned Size =
+ getLoadLoadClobberFullWidthSize(LoadBase, LoadOffs, LoadSize, DepLI);
+ if (Size == 0)
+ return -1;
+
+ // Check non-obvious conditions enforced by MDA which we rely on for being
+ // able to materialize this potentially available value
+ assert(DepLI->isSimple() && "Cannot widen volatile/atomic load!");
+ assert(DepLI->getType()->isIntegerTy() && "Can't widen non-integer load");
+
+ return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, Size * 8, DL);
+}
+
+int analyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
+ MemIntrinsic *MI, const DataLayout &DL) {
+ // If the mem operation is a non-constant size, we can't handle it.
+ ConstantInt *SizeCst = dyn_cast<ConstantInt>(MI->getLength());
+ if (!SizeCst)
+ return -1;
+ uint64_t MemSizeInBits = SizeCst->getZExtValue() * 8;
+
+ // If this is memset, we just need to see if the offset is valid in the size
+ // of the memset..
+ if (const auto *memset_inst = dyn_cast<MemSetInst>(MI)) {
+ if (DL.isNonIntegralPointerType(LoadTy->getScalarType())) {
+ auto *CI = dyn_cast<ConstantInt>(memset_inst->getValue());
+ if (!CI || !CI->isZero())
+ return -1;
+ }
+ return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(),
+ MemSizeInBits, DL);
+ }
+
+ // If we have a memcpy/memmove, the only case we can handle is if this is a
+ // copy from constant memory. In that case, we can read directly from the
+ // constant memory.
+ MemTransferInst *MTI = cast<MemTransferInst>(MI);
+
+ Constant *Src = dyn_cast<Constant>(MTI->getSource());
+ if (!Src)
+ return -1;
+
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(getUnderlyingObject(Src));
+ if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
+ return -1;
+
+ // See if the access is within the bounds of the transfer.
+ int Offset = analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(),
+ MemSizeInBits, DL);
+ if (Offset == -1)
+ return Offset;
+
+ // Otherwise, see if we can constant fold a load from the constant with the
+ // offset applied as appropriate.
+ unsigned IndexSize = DL.getIndexTypeSizeInBits(Src->getType());
+ if (ConstantFoldLoadFromConstPtr(Src, LoadTy, APInt(IndexSize, Offset), DL))
+ return Offset;
+ return -1;
+}
+
+static Value *getStoreValueForLoadHelper(Value *SrcVal, unsigned Offset,
+ Type *LoadTy, IRBuilderBase &Builder,
+ const DataLayout &DL) {
+ LLVMContext &Ctx = SrcVal->getType()->getContext();
+
+ // If two pointers are in the same address space, they have the same size,
+ // so we don't need to do any truncation, etc. This avoids introducing
+ // ptrtoint instructions for pointers that may be non-integral.
+ if (SrcVal->getType()->isPointerTy() && LoadTy->isPointerTy() &&
+ cast<PointerType>(SrcVal->getType())->getAddressSpace() ==
+ cast<PointerType>(LoadTy)->getAddressSpace()) {
+ return SrcVal;
+ }
+
+ uint64_t StoreSize =
+ (DL.getTypeSizeInBits(SrcVal->getType()).getFixedValue() + 7) / 8;
+ uint64_t LoadSize = (DL.getTypeSizeInBits(LoadTy).getFixedValue() + 7) / 8;
+ // Compute which bits of the stored value are being used by the load. Convert
+ // to an integer type to start with.
+ if (SrcVal->getType()->isPtrOrPtrVectorTy())
+ SrcVal =
+ Builder.CreatePtrToInt(SrcVal, DL.getIntPtrType(SrcVal->getType()));
+ if (!SrcVal->getType()->isIntegerTy())
+ SrcVal =
+ Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize * 8));
+
+ // Shift the bits to the least significant depending on endianness.
+ unsigned ShiftAmt;
+ if (DL.isLittleEndian())
+ ShiftAmt = Offset * 8;
+ else
+ ShiftAmt = (StoreSize - LoadSize - Offset) * 8;
+ if (ShiftAmt)
+ SrcVal = Builder.CreateLShr(SrcVal,
+ ConstantInt::get(SrcVal->getType(), ShiftAmt));
+
+ if (LoadSize != StoreSize)
+ SrcVal = Builder.CreateTruncOrBitCast(SrcVal,
+ IntegerType::get(Ctx, LoadSize * 8));
+ return SrcVal;
+}
+
+/// This function is called when we have a memdep query of a load that ends up
+/// being a clobbering store. This means that the store provides bits used by
+/// the load but the pointers don't must-alias. Check this case to see if
+/// there is anything more we can do before we give up.
+Value *getStoreValueForLoad(Value *SrcVal, unsigned Offset, Type *LoadTy,
+ Instruction *InsertPt, const DataLayout &DL) {
+
+ IRBuilder<> Builder(InsertPt);
+ SrcVal = getStoreValueForLoadHelper(SrcVal, Offset, LoadTy, Builder, DL);
+ return coerceAvailableValueToLoadType(SrcVal, LoadTy, Builder, DL);
+}
+
+Constant *getConstantStoreValueForLoad(Constant *SrcVal, unsigned Offset,
+ Type *LoadTy, const DataLayout &DL) {
+ return ConstantFoldLoadFromConst(SrcVal, LoadTy, APInt(32, Offset), DL);
+}
+
+/// This function is called when we have a memdep query of a load that ends up
+/// being a clobbering load. This means that the load *may* provide bits used
+/// by the load but we can't be sure because the pointers don't must-alias.
+/// Check this case to see if there is anything more we can do before we give
+/// up.
+Value *getLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, Type *LoadTy,
+ Instruction *InsertPt, const DataLayout &DL) {
+ // If Offset+LoadTy exceeds the size of SrcVal, then we must be wanting to
+ // widen SrcVal out to a larger load.
+ unsigned SrcValStoreSize =
+ DL.getTypeStoreSize(SrcVal->getType()).getFixedValue();
+ unsigned LoadSize = DL.getTypeStoreSize(LoadTy).getFixedValue();
+ if (Offset + LoadSize > SrcValStoreSize) {
+ assert(SrcVal->isSimple() && "Cannot widen volatile/atomic load!");
+ assert(SrcVal->getType()->isIntegerTy() && "Can't widen non-integer load");
+ // If we have a load/load clobber an DepLI can be widened to cover this
+ // load, then we should widen it to the next power of 2 size big enough!
+ unsigned NewLoadSize = Offset + LoadSize;
+ if (!isPowerOf2_32(NewLoadSize))
+ NewLoadSize = NextPowerOf2(NewLoadSize);
+
+ Value *PtrVal = SrcVal->getPointerOperand();
+ // Insert the new load after the old load. This ensures that subsequent
+ // memdep queries will find the new load. We can't easily remove the old
+ // load completely because it is already in the value numbering table.
+ IRBuilder<> Builder(SrcVal->getParent(), ++BasicBlock::iterator(SrcVal));
+ Type *DestTy = IntegerType::get(LoadTy->getContext(), NewLoadSize * 8);
+ Type *DestPTy =
+ PointerType::get(DestTy, PtrVal->getType()->getPointerAddressSpace());
+ Builder.SetCurrentDebugLocation(SrcVal->getDebugLoc());
+ PtrVal = Builder.CreateBitCast(PtrVal, DestPTy);
+ LoadInst *NewLoad = Builder.CreateLoad(DestTy, PtrVal);
+ NewLoad->takeName(SrcVal);
+ NewLoad->setAlignment(SrcVal->getAlign());
+
+ LLVM_DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal << "\n");
+ LLVM_DEBUG(dbgs() << "TO: " << *NewLoad << "\n");
+
+ // Replace uses of the original load with the wider load. On a big endian
+ // system, we need to shift down to get the relevant bits.
+ Value *RV = NewLoad;
+ if (DL.isBigEndian())
+ RV = Builder.CreateLShr(RV, (NewLoadSize - SrcValStoreSize) * 8);
+ RV = Builder.CreateTrunc(RV, SrcVal->getType());
+ SrcVal->replaceAllUsesWith(RV);
+
+ SrcVal = NewLoad;
+ }
+
+ return getStoreValueForLoad(SrcVal, Offset, LoadTy, InsertPt, DL);
+}
+
+Constant *getConstantLoadValueForLoad(Constant *SrcVal, unsigned Offset,
+ Type *LoadTy, const DataLayout &DL) {
+ unsigned SrcValStoreSize =
+ DL.getTypeStoreSize(SrcVal->getType()).getFixedValue();
+ unsigned LoadSize = DL.getTypeStoreSize(LoadTy).getFixedValue();
+ if (Offset + LoadSize > SrcValStoreSize)
+ return nullptr;
+ return getConstantStoreValueForLoad(SrcVal, Offset, LoadTy, DL);
+}
+
+/// This function is called when we have a
+/// memdep query of a load that ends up being a clobbering mem intrinsic.
+Value *getMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
+ Type *LoadTy, Instruction *InsertPt,
+ const DataLayout &DL) {
+ LLVMContext &Ctx = LoadTy->getContext();
+ uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy).getFixedValue() / 8;
+ IRBuilder<> Builder(InsertPt);
+
+ // We know that this method is only called when the mem transfer fully
+ // provides the bits for the load.
+ if (MemSetInst *MSI = dyn_cast<MemSetInst>(SrcInst)) {
+ // memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and
+ // independently of what the offset is.
+ Value *Val = MSI->getValue();
+ if (LoadSize != 1)
+ Val =
+ Builder.CreateZExtOrBitCast(Val, IntegerType::get(Ctx, LoadSize * 8));
+ Value *OneElt = Val;
+
+ // Splat the value out to the right number of bits.
+ for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize;) {
+ // If we can double the number of bytes set, do it.
+ if (NumBytesSet * 2 <= LoadSize) {
+ Value *ShVal = Builder.CreateShl(
+ Val, ConstantInt::get(Val->getType(), NumBytesSet * 8));
+ Val = Builder.CreateOr(Val, ShVal);
+ NumBytesSet <<= 1;
+ continue;
+ }
+
+ // Otherwise insert one byte at a time.
+ Value *ShVal =
+ Builder.CreateShl(Val, ConstantInt::get(Val->getType(), 1 * 8));
+ Val = Builder.CreateOr(OneElt, ShVal);
+ ++NumBytesSet;
+ }
+
+ return coerceAvailableValueToLoadType(Val, LoadTy, Builder, DL);
+ }
+
+ // Otherwise, this is a memcpy/memmove from a constant global.
+ MemTransferInst *MTI = cast<MemTransferInst>(SrcInst);
+ Constant *Src = cast<Constant>(MTI->getSource());
+ unsigned IndexSize = DL.getIndexTypeSizeInBits(Src->getType());
+ return ConstantFoldLoadFromConstPtr(Src, LoadTy, APInt(IndexSize, Offset),
+ DL);
+}
+
+Constant *getConstantMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
+ Type *LoadTy, const DataLayout &DL) {
+ LLVMContext &Ctx = LoadTy->getContext();
+ uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy).getFixedValue() / 8;
+
+ // We know that this method is only called when the mem transfer fully
+ // provides the bits for the load.
+ if (MemSetInst *MSI = dyn_cast<MemSetInst>(SrcInst)) {
+ auto *Val = dyn_cast<ConstantInt>(MSI->getValue());
+ if (!Val)
+ return nullptr;
+
+ Val = ConstantInt::get(Ctx, APInt::getSplat(LoadSize * 8, Val->getValue()));
+ return ConstantFoldLoadFromConst(Val, LoadTy, DL);
+ }
+
+ // Otherwise, this is a memcpy/memmove from a constant global.
+ MemTransferInst *MTI = cast<MemTransferInst>(SrcInst);
+ Constant *Src = cast<Constant>(MTI->getSource());
+ unsigned IndexSize = DL.getIndexTypeSizeInBits(Src->getType());
+ return ConstantFoldLoadFromConstPtr(Src, LoadTy, APInt(IndexSize, Offset),
+ DL);
+}
+} // namespace VNCoercion
+} // namespace llvm
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/ValueMapper.cpp b/contrib/libs/llvm16/lib/Transforms/Utils/ValueMapper.cpp
new file mode 100644
index 0000000000..a5edbb2acc
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/ValueMapper.cpp
@@ -0,0 +1,1209 @@
+//===- ValueMapper.cpp - Interface shared by lib/Transforms/Utils ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MapValue function, which is shared by various parts of
+// the lib/Transforms/Utils library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/ValueMapper.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalIFunc.h"
+#include "llvm/IR/GlobalObject.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
+#include <cassert>
+#include <limits>
+#include <memory>
+#include <utility>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "value-mapper"
+
+// Out of line method to get vtable etc for class.
+void ValueMapTypeRemapper::anchor() {}
+void ValueMaterializer::anchor() {}
+
+namespace {
+
+/// A basic block used in a BlockAddress whose function body is not yet
+/// materialized.
+struct DelayedBasicBlock {
+ BasicBlock *OldBB;
+ std::unique_ptr<BasicBlock> TempBB;
+
+ DelayedBasicBlock(const BlockAddress &Old)
+ : OldBB(Old.getBasicBlock()),
+ TempBB(BasicBlock::Create(Old.getContext())) {}
+};
+
+struct WorklistEntry {
+ enum EntryKind {
+ MapGlobalInit,
+ MapAppendingVar,
+ MapAliasOrIFunc,
+ RemapFunction
+ };
+ struct GVInitTy {
+ GlobalVariable *GV;
+ Constant *Init;
+ };
+ struct AppendingGVTy {
+ GlobalVariable *GV;
+ Constant *InitPrefix;
+ };
+ struct AliasOrIFuncTy {
+ GlobalValue *GV;
+ Constant *Target;
+ };
+
+ unsigned Kind : 2;
+ unsigned MCID : 29;
+ unsigned AppendingGVIsOldCtorDtor : 1;
+ unsigned AppendingGVNumNewMembers;
+ union {
+ GVInitTy GVInit;
+ AppendingGVTy AppendingGV;
+ AliasOrIFuncTy AliasOrIFunc;
+ Function *RemapF;
+ } Data;
+};
+
+struct MappingContext {
+ ValueToValueMapTy *VM;
+ ValueMaterializer *Materializer = nullptr;
+
+ /// Construct a MappingContext with a value map and materializer.
+ explicit MappingContext(ValueToValueMapTy &VM,
+ ValueMaterializer *Materializer = nullptr)
+ : VM(&VM), Materializer(Materializer) {}
+};
+
+class Mapper {
+ friend class MDNodeMapper;
+
+#ifndef NDEBUG
+ DenseSet<GlobalValue *> AlreadyScheduled;
+#endif
+
+ RemapFlags Flags;
+ ValueMapTypeRemapper *TypeMapper;
+ unsigned CurrentMCID = 0;
+ SmallVector<MappingContext, 2> MCs;
+ SmallVector<WorklistEntry, 4> Worklist;
+ SmallVector<DelayedBasicBlock, 1> DelayedBBs;
+ SmallVector<Constant *, 16> AppendingInits;
+
+public:
+ Mapper(ValueToValueMapTy &VM, RemapFlags Flags,
+ ValueMapTypeRemapper *TypeMapper, ValueMaterializer *Materializer)
+ : Flags(Flags), TypeMapper(TypeMapper),
+ MCs(1, MappingContext(VM, Materializer)) {}
+
+ /// ValueMapper should explicitly call \a flush() before destruction.
+ ~Mapper() { assert(!hasWorkToDo() && "Expected to be flushed"); }
+
+ bool hasWorkToDo() const { return !Worklist.empty(); }
+
+ unsigned
+ registerAlternateMappingContext(ValueToValueMapTy &VM,
+ ValueMaterializer *Materializer = nullptr) {
+ MCs.push_back(MappingContext(VM, Materializer));
+ return MCs.size() - 1;
+ }
+
+ void addFlags(RemapFlags Flags);
+
+ void remapGlobalObjectMetadata(GlobalObject &GO);
+
+ Value *mapValue(const Value *V);
+ void remapInstruction(Instruction *I);
+ void remapFunction(Function &F);
+
+ Constant *mapConstant(const Constant *C) {
+ return cast_or_null<Constant>(mapValue(C));
+ }
+
+ /// Map metadata.
+ ///
+ /// Find the mapping for MD. Guarantees that the return will be resolved
+ /// (not an MDNode, or MDNode::isResolved() returns true).
+ Metadata *mapMetadata(const Metadata *MD);
+
+ void scheduleMapGlobalInitializer(GlobalVariable &GV, Constant &Init,
+ unsigned MCID);
+ void scheduleMapAppendingVariable(GlobalVariable &GV, Constant *InitPrefix,
+ bool IsOldCtorDtor,
+ ArrayRef<Constant *> NewMembers,
+ unsigned MCID);
+ void scheduleMapAliasOrIFunc(GlobalValue &GV, Constant &Target,
+ unsigned MCID);
+ void scheduleRemapFunction(Function &F, unsigned MCID);
+
+ void flush();
+
+private:
+ void mapAppendingVariable(GlobalVariable &GV, Constant *InitPrefix,
+ bool IsOldCtorDtor,
+ ArrayRef<Constant *> NewMembers);
+
+ ValueToValueMapTy &getVM() { return *MCs[CurrentMCID].VM; }
+ ValueMaterializer *getMaterializer() { return MCs[CurrentMCID].Materializer; }
+
+ Value *mapBlockAddress(const BlockAddress &BA);
+
+ /// Map metadata that doesn't require visiting operands.
+ std::optional<Metadata *> mapSimpleMetadata(const Metadata *MD);
+
+ Metadata *mapToMetadata(const Metadata *Key, Metadata *Val);
+ Metadata *mapToSelf(const Metadata *MD);
+};
+
+class MDNodeMapper {
+ Mapper &M;
+
+ /// Data about a node in \a UniquedGraph.
+ struct Data {
+ bool HasChanged = false;
+ unsigned ID = std::numeric_limits<unsigned>::max();
+ TempMDNode Placeholder;
+ };
+
+ /// A graph of uniqued nodes.
+ struct UniquedGraph {
+ SmallDenseMap<const Metadata *, Data, 32> Info; // Node properties.
+ SmallVector<MDNode *, 16> POT; // Post-order traversal.
+
+ /// Propagate changed operands through the post-order traversal.
+ ///
+ /// Iteratively update \a Data::HasChanged for each node based on \a
+ /// Data::HasChanged of its operands, until fixed point.
+ void propagateChanges();
+
+ /// Get a forward reference to a node to use as an operand.
+ Metadata &getFwdReference(MDNode &Op);
+ };
+
+ /// Worklist of distinct nodes whose operands need to be remapped.
+ SmallVector<MDNode *, 16> DistinctWorklist;
+
+ // Storage for a UniquedGraph.
+ SmallDenseMap<const Metadata *, Data, 32> InfoStorage;
+ SmallVector<MDNode *, 16> POTStorage;
+
+public:
+ MDNodeMapper(Mapper &M) : M(M) {}
+
+ /// Map a metadata node (and its transitive operands).
+ ///
+ /// Map all the (unmapped) nodes in the subgraph under \c N. The iterative
+ /// algorithm handles distinct nodes and uniqued node subgraphs using
+ /// different strategies.
+ ///
+ /// Distinct nodes are immediately mapped and added to \a DistinctWorklist
+ /// using \a mapDistinctNode(). Their mapping can always be computed
+ /// immediately without visiting operands, even if their operands change.
+ ///
+ /// The mapping for uniqued nodes depends on whether their operands change.
+ /// \a mapTopLevelUniquedNode() traverses the transitive uniqued subgraph of
+ /// a node to calculate uniqued node mappings in bulk. Distinct leafs are
+ /// added to \a DistinctWorklist with \a mapDistinctNode().
+ ///
+ /// After mapping \c N itself, this function remaps the operands of the
+ /// distinct nodes in \a DistinctWorklist until the entire subgraph under \c
+ /// N has been mapped.
+ Metadata *map(const MDNode &N);
+
+private:
+ /// Map a top-level uniqued node and the uniqued subgraph underneath it.
+ ///
+ /// This builds up a post-order traversal of the (unmapped) uniqued subgraph
+ /// underneath \c FirstN and calculates the nodes' mapping. Each node uses
+ /// the identity mapping (\a Mapper::mapToSelf()) as long as all of its
+ /// operands uses the identity mapping.
+ ///
+ /// The algorithm works as follows:
+ ///
+ /// 1. \a createPOT(): traverse the uniqued subgraph under \c FirstN and
+ /// save the post-order traversal in the given \a UniquedGraph, tracking
+ /// nodes' operands change.
+ ///
+ /// 2. \a UniquedGraph::propagateChanges(): propagate changed operands
+ /// through the \a UniquedGraph until fixed point, following the rule
+ /// that if a node changes, any node that references must also change.
+ ///
+ /// 3. \a mapNodesInPOT(): map the uniqued nodes, creating new uniqued nodes
+ /// (referencing new operands) where necessary.
+ Metadata *mapTopLevelUniquedNode(const MDNode &FirstN);
+
+ /// Try to map the operand of an \a MDNode.
+ ///
+ /// If \c Op is already mapped, return the mapping. If it's not an \a
+ /// MDNode, compute and return the mapping. If it's a distinct \a MDNode,
+ /// return the result of \a mapDistinctNode().
+ ///
+ /// \return std::nullopt if \c Op is an unmapped uniqued \a MDNode.
+ /// \post getMappedOp(Op) only returns std::nullopt if this returns
+ /// std::nullopt.
+ std::optional<Metadata *> tryToMapOperand(const Metadata *Op);
+
+ /// Map a distinct node.
+ ///
+ /// Return the mapping for the distinct node \c N, saving the result in \a
+ /// DistinctWorklist for later remapping.
+ ///
+ /// \pre \c N is not yet mapped.
+ /// \pre \c N.isDistinct().
+ MDNode *mapDistinctNode(const MDNode &N);
+
+ /// Get a previously mapped node.
+ std::optional<Metadata *> getMappedOp(const Metadata *Op) const;
+
+ /// Create a post-order traversal of an unmapped uniqued node subgraph.
+ ///
+ /// This traverses the metadata graph deeply enough to map \c FirstN. It
+ /// uses \a tryToMapOperand() (via \a Mapper::mapSimplifiedNode()), so any
+ /// metadata that has already been mapped will not be part of the POT.
+ ///
+ /// Each node that has a changed operand from outside the graph (e.g., a
+ /// distinct node, an already-mapped uniqued node, or \a ConstantAsMetadata)
+ /// is marked with \a Data::HasChanged.
+ ///
+ /// \return \c true if any nodes in \c G have \a Data::HasChanged.
+ /// \post \c G.POT is a post-order traversal ending with \c FirstN.
+ /// \post \a Data::hasChanged in \c G.Info indicates whether any node needs
+ /// to change because of operands outside the graph.
+ bool createPOT(UniquedGraph &G, const MDNode &FirstN);
+
+ /// Visit the operands of a uniqued node in the POT.
+ ///
+ /// Visit the operands in the range from \c I to \c E, returning the first
+ /// uniqued node we find that isn't yet in \c G. \c I is always advanced to
+ /// where to continue the loop through the operands.
+ ///
+ /// This sets \c HasChanged if any of the visited operands change.
+ MDNode *visitOperands(UniquedGraph &G, MDNode::op_iterator &I,
+ MDNode::op_iterator E, bool &HasChanged);
+
+ /// Map all the nodes in the given uniqued graph.
+ ///
+ /// This visits all the nodes in \c G in post-order, using the identity
+ /// mapping or creating a new node depending on \a Data::HasChanged.
+ ///
+ /// \pre \a getMappedOp() returns std::nullopt for nodes in \c G, but not for
+ /// any of their operands outside of \c G. \pre \a Data::HasChanged is true
+ /// for a node in \c G iff any of its operands have changed. \post \a
+ /// getMappedOp() returns the mapped node for every node in \c G.
+ void mapNodesInPOT(UniquedGraph &G);
+
+ /// Remap a node's operands using the given functor.
+ ///
+ /// Iterate through the operands of \c N and update them in place using \c
+ /// mapOperand.
+ ///
+ /// \pre N.isDistinct() or N.isTemporary().
+ template <class OperandMapper>
+ void remapOperands(MDNode &N, OperandMapper mapOperand);
+};
+
+} // end anonymous namespace
+
+Value *Mapper::mapValue(const Value *V) {
+ ValueToValueMapTy::iterator I = getVM().find(V);
+
+ // If the value already exists in the map, use it.
+ if (I != getVM().end()) {
+ assert(I->second && "Unexpected null mapping");
+ return I->second;
+ }
+
+ // If we have a materializer and it can materialize a value, use that.
+ if (auto *Materializer = getMaterializer()) {
+ if (Value *NewV = Materializer->materialize(const_cast<Value *>(V))) {
+ getVM()[V] = NewV;
+ return NewV;
+ }
+ }
+
+ // Global values do not need to be seeded into the VM if they
+ // are using the identity mapping.
+ if (isa<GlobalValue>(V)) {
+ if (Flags & RF_NullMapMissingGlobalValues)
+ return nullptr;
+ return getVM()[V] = const_cast<Value *>(V);
+ }
+
+ if (const InlineAsm *IA = dyn_cast<InlineAsm>(V)) {
+ // Inline asm may need *type* remapping.
+ FunctionType *NewTy = IA->getFunctionType();
+ if (TypeMapper) {
+ NewTy = cast<FunctionType>(TypeMapper->remapType(NewTy));
+
+ if (NewTy != IA->getFunctionType())
+ V = InlineAsm::get(NewTy, IA->getAsmString(), IA->getConstraintString(),
+ IA->hasSideEffects(), IA->isAlignStack(),
+ IA->getDialect(), IA->canThrow());
+ }
+
+ return getVM()[V] = const_cast<Value *>(V);
+ }
+
+ if (const auto *MDV = dyn_cast<MetadataAsValue>(V)) {
+ const Metadata *MD = MDV->getMetadata();
+
+ if (auto *LAM = dyn_cast<LocalAsMetadata>(MD)) {
+ // Look through to grab the local value.
+ if (Value *LV = mapValue(LAM->getValue())) {
+ if (V == LAM->getValue())
+ return const_cast<Value *>(V);
+ return MetadataAsValue::get(V->getContext(), ValueAsMetadata::get(LV));
+ }
+
+ // FIXME: always return nullptr once Verifier::verifyDominatesUse()
+ // ensures metadata operands only reference defined SSA values.
+ return (Flags & RF_IgnoreMissingLocals)
+ ? nullptr
+ : MetadataAsValue::get(
+ V->getContext(),
+ MDTuple::get(V->getContext(), std::nullopt));
+ }
+ if (auto *AL = dyn_cast<DIArgList>(MD)) {
+ SmallVector<ValueAsMetadata *, 4> MappedArgs;
+ for (auto *VAM : AL->getArgs()) {
+ // Map both Local and Constant VAMs here; they will both ultimately
+ // be mapped via mapValue. The exceptions are constants when we have no
+ // module level changes and locals when they have no existing mapped
+ // value and RF_IgnoreMissingLocals is set; these have identity
+ // mappings.
+ if ((Flags & RF_NoModuleLevelChanges) && isa<ConstantAsMetadata>(VAM)) {
+ MappedArgs.push_back(VAM);
+ } else if (Value *LV = mapValue(VAM->getValue())) {
+ MappedArgs.push_back(
+ LV == VAM->getValue() ? VAM : ValueAsMetadata::get(LV));
+ } else if ((Flags & RF_IgnoreMissingLocals) && isa<LocalAsMetadata>(VAM)) {
+ MappedArgs.push_back(VAM);
+ } else {
+ // If we cannot map the value, set the argument as undef.
+ MappedArgs.push_back(ValueAsMetadata::get(
+ UndefValue::get(VAM->getValue()->getType())));
+ }
+ }
+ return MetadataAsValue::get(V->getContext(),
+ DIArgList::get(V->getContext(), MappedArgs));
+ }
+
+ // If this is a module-level metadata and we know that nothing at the module
+ // level is changing, then use an identity mapping.
+ if (Flags & RF_NoModuleLevelChanges)
+ return getVM()[V] = const_cast<Value *>(V);
+
+ // Map the metadata and turn it into a value.
+ auto *MappedMD = mapMetadata(MD);
+ if (MD == MappedMD)
+ return getVM()[V] = const_cast<Value *>(V);
+ return getVM()[V] = MetadataAsValue::get(V->getContext(), MappedMD);
+ }
+
+ // Okay, this either must be a constant (which may or may not be mappable) or
+ // is something that is not in the mapping table.
+ Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V));
+ if (!C)
+ return nullptr;
+
+ if (BlockAddress *BA = dyn_cast<BlockAddress>(C))
+ return mapBlockAddress(*BA);
+
+ if (const auto *E = dyn_cast<DSOLocalEquivalent>(C)) {
+ auto *Val = mapValue(E->getGlobalValue());
+ GlobalValue *GV = dyn_cast<GlobalValue>(Val);
+ if (GV)
+ return getVM()[E] = DSOLocalEquivalent::get(GV);
+
+ auto *Func = cast<Function>(Val->stripPointerCastsAndAliases());
+ Type *NewTy = E->getType();
+ if (TypeMapper)
+ NewTy = TypeMapper->remapType(NewTy);
+ return getVM()[E] = llvm::ConstantExpr::getBitCast(
+ DSOLocalEquivalent::get(Func), NewTy);
+ }
+
+ if (const auto *NC = dyn_cast<NoCFIValue>(C)) {
+ auto *Val = mapValue(NC->getGlobalValue());
+ GlobalValue *GV = cast<GlobalValue>(Val);
+ return getVM()[NC] = NoCFIValue::get(GV);
+ }
+
+ auto mapValueOrNull = [this](Value *V) {
+ auto Mapped = mapValue(V);
+ assert((Mapped || (Flags & RF_NullMapMissingGlobalValues)) &&
+ "Unexpected null mapping for constant operand without "
+ "NullMapMissingGlobalValues flag");
+ return Mapped;
+ };
+
+ // Otherwise, we have some other constant to remap. Start by checking to see
+ // if all operands have an identity remapping.
+ unsigned OpNo = 0, NumOperands = C->getNumOperands();
+ Value *Mapped = nullptr;
+ for (; OpNo != NumOperands; ++OpNo) {
+ Value *Op = C->getOperand(OpNo);
+ Mapped = mapValueOrNull(Op);
+ if (!Mapped)
+ return nullptr;
+ if (Mapped != Op)
+ break;
+ }
+
+ // See if the type mapper wants to remap the type as well.
+ Type *NewTy = C->getType();
+ if (TypeMapper)
+ NewTy = TypeMapper->remapType(NewTy);
+
+ // If the result type and all operands match up, then just insert an identity
+ // mapping.
+ if (OpNo == NumOperands && NewTy == C->getType())
+ return getVM()[V] = C;
+
+ // Okay, we need to create a new constant. We've already processed some or
+ // all of the operands, set them all up now.
+ SmallVector<Constant*, 8> Ops;
+ Ops.reserve(NumOperands);
+ for (unsigned j = 0; j != OpNo; ++j)
+ Ops.push_back(cast<Constant>(C->getOperand(j)));
+
+ // If one of the operands mismatch, push it and the other mapped operands.
+ if (OpNo != NumOperands) {
+ Ops.push_back(cast<Constant>(Mapped));
+
+ // Map the rest of the operands that aren't processed yet.
+ for (++OpNo; OpNo != NumOperands; ++OpNo) {
+ Mapped = mapValueOrNull(C->getOperand(OpNo));
+ if (!Mapped)
+ return nullptr;
+ Ops.push_back(cast<Constant>(Mapped));
+ }
+ }
+ Type *NewSrcTy = nullptr;
+ if (TypeMapper)
+ if (auto *GEPO = dyn_cast<GEPOperator>(C))
+ NewSrcTy = TypeMapper->remapType(GEPO->getSourceElementType());
+
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+ return getVM()[V] = CE->getWithOperands(Ops, NewTy, false, NewSrcTy);
+ if (isa<ConstantArray>(C))
+ return getVM()[V] = ConstantArray::get(cast<ArrayType>(NewTy), Ops);
+ if (isa<ConstantStruct>(C))
+ return getVM()[V] = ConstantStruct::get(cast<StructType>(NewTy), Ops);
+ if (isa<ConstantVector>(C))
+ return getVM()[V] = ConstantVector::get(Ops);
+ // If this is a no-operand constant, it must be because the type was remapped.
+ if (isa<UndefValue>(C))
+ return getVM()[V] = UndefValue::get(NewTy);
+ if (isa<ConstantAggregateZero>(C))
+ return getVM()[V] = ConstantAggregateZero::get(NewTy);
+ assert(isa<ConstantPointerNull>(C));
+ return getVM()[V] = ConstantPointerNull::get(cast<PointerType>(NewTy));
+}
+
+Value *Mapper::mapBlockAddress(const BlockAddress &BA) {
+ Function *F = cast<Function>(mapValue(BA.getFunction()));
+
+ // F may not have materialized its initializer. In that case, create a
+ // dummy basic block for now, and replace it once we've materialized all
+ // the initializers.
+ BasicBlock *BB;
+ if (F->empty()) {
+ DelayedBBs.push_back(DelayedBasicBlock(BA));
+ BB = DelayedBBs.back().TempBB.get();
+ } else {
+ BB = cast_or_null<BasicBlock>(mapValue(BA.getBasicBlock()));
+ }
+
+ return getVM()[&BA] = BlockAddress::get(F, BB ? BB : BA.getBasicBlock());
+}
+
+Metadata *Mapper::mapToMetadata(const Metadata *Key, Metadata *Val) {
+ getVM().MD()[Key].reset(Val);
+ return Val;
+}
+
+Metadata *Mapper::mapToSelf(const Metadata *MD) {
+ return mapToMetadata(MD, const_cast<Metadata *>(MD));
+}
+
+std::optional<Metadata *> MDNodeMapper::tryToMapOperand(const Metadata *Op) {
+ if (!Op)
+ return nullptr;
+
+ if (std::optional<Metadata *> MappedOp = M.mapSimpleMetadata(Op)) {
+#ifndef NDEBUG
+ if (auto *CMD = dyn_cast<ConstantAsMetadata>(Op))
+ assert((!*MappedOp || M.getVM().count(CMD->getValue()) ||
+ M.getVM().getMappedMD(Op)) &&
+ "Expected Value to be memoized");
+ else
+ assert((isa<MDString>(Op) || M.getVM().getMappedMD(Op)) &&
+ "Expected result to be memoized");
+#endif
+ return *MappedOp;
+ }
+
+ const MDNode &N = *cast<MDNode>(Op);
+ if (N.isDistinct())
+ return mapDistinctNode(N);
+ return std::nullopt;
+}
+
+MDNode *MDNodeMapper::mapDistinctNode(const MDNode &N) {
+ assert(N.isDistinct() && "Expected a distinct node");
+ assert(!M.getVM().getMappedMD(&N) && "Expected an unmapped node");
+ Metadata *NewM = nullptr;
+
+ if (M.Flags & RF_ReuseAndMutateDistinctMDs) {
+ NewM = M.mapToSelf(&N);
+ } else {
+ NewM = MDNode::replaceWithDistinct(N.clone());
+ LLVM_DEBUG(dbgs() << "\nMap " << N << "\n"
+ << "To " << *NewM << "\n\n");
+ M.mapToMetadata(&N, NewM);
+ }
+ DistinctWorklist.push_back(cast<MDNode>(NewM));
+
+ return DistinctWorklist.back();
+}
+
+static ConstantAsMetadata *wrapConstantAsMetadata(const ConstantAsMetadata &CMD,
+ Value *MappedV) {
+ if (CMD.getValue() == MappedV)
+ return const_cast<ConstantAsMetadata *>(&CMD);
+ return MappedV ? ConstantAsMetadata::getConstant(MappedV) : nullptr;
+}
+
+std::optional<Metadata *> MDNodeMapper::getMappedOp(const Metadata *Op) const {
+ if (!Op)
+ return nullptr;
+
+ if (std::optional<Metadata *> MappedOp = M.getVM().getMappedMD(Op))
+ return *MappedOp;
+
+ if (isa<MDString>(Op))
+ return const_cast<Metadata *>(Op);
+
+ if (auto *CMD = dyn_cast<ConstantAsMetadata>(Op))
+ return wrapConstantAsMetadata(*CMD, M.getVM().lookup(CMD->getValue()));
+
+ return std::nullopt;
+}
+
+Metadata &MDNodeMapper::UniquedGraph::getFwdReference(MDNode &Op) {
+ auto Where = Info.find(&Op);
+ assert(Where != Info.end() && "Expected a valid reference");
+
+ auto &OpD = Where->second;
+ if (!OpD.HasChanged)
+ return Op;
+
+ // Lazily construct a temporary node.
+ if (!OpD.Placeholder)
+ OpD.Placeholder = Op.clone();
+
+ return *OpD.Placeholder;
+}
+
+template <class OperandMapper>
+void MDNodeMapper::remapOperands(MDNode &N, OperandMapper mapOperand) {
+ assert(!N.isUniqued() && "Expected distinct or temporary nodes");
+ for (unsigned I = 0, E = N.getNumOperands(); I != E; ++I) {
+ Metadata *Old = N.getOperand(I);
+ Metadata *New = mapOperand(Old);
+ if (Old != New)
+ LLVM_DEBUG(dbgs() << "Replacing Op " << Old << " with " << New << " in "
+ << N << "\n");
+
+ if (Old != New)
+ N.replaceOperandWith(I, New);
+ }
+}
+
+namespace {
+
+/// An entry in the worklist for the post-order traversal.
+struct POTWorklistEntry {
+ MDNode *N; ///< Current node.
+ MDNode::op_iterator Op; ///< Current operand of \c N.
+
+ /// Keep a flag of whether operands have changed in the worklist to avoid
+ /// hitting the map in \a UniquedGraph.
+ bool HasChanged = false;
+
+ POTWorklistEntry(MDNode &N) : N(&N), Op(N.op_begin()) {}
+};
+
+} // end anonymous namespace
+
+bool MDNodeMapper::createPOT(UniquedGraph &G, const MDNode &FirstN) {
+ assert(G.Info.empty() && "Expected a fresh traversal");
+ assert(FirstN.isUniqued() && "Expected uniqued node in POT");
+
+ // Construct a post-order traversal of the uniqued subgraph under FirstN.
+ bool AnyChanges = false;
+ SmallVector<POTWorklistEntry, 16> Worklist;
+ Worklist.push_back(POTWorklistEntry(const_cast<MDNode &>(FirstN)));
+ (void)G.Info[&FirstN];
+ while (!Worklist.empty()) {
+ // Start or continue the traversal through the this node's operands.
+ auto &WE = Worklist.back();
+ if (MDNode *N = visitOperands(G, WE.Op, WE.N->op_end(), WE.HasChanged)) {
+ // Push a new node to traverse first.
+ Worklist.push_back(POTWorklistEntry(*N));
+ continue;
+ }
+
+ // Push the node onto the POT.
+ assert(WE.N->isUniqued() && "Expected only uniqued nodes");
+ assert(WE.Op == WE.N->op_end() && "Expected to visit all operands");
+ auto &D = G.Info[WE.N];
+ AnyChanges |= D.HasChanged = WE.HasChanged;
+ D.ID = G.POT.size();
+ G.POT.push_back(WE.N);
+
+ // Pop the node off the worklist.
+ Worklist.pop_back();
+ }
+ return AnyChanges;
+}
+
+MDNode *MDNodeMapper::visitOperands(UniquedGraph &G, MDNode::op_iterator &I,
+ MDNode::op_iterator E, bool &HasChanged) {
+ while (I != E) {
+ Metadata *Op = *I++; // Increment even on early return.
+ if (std::optional<Metadata *> MappedOp = tryToMapOperand(Op)) {
+ // Check if the operand changes.
+ HasChanged |= Op != *MappedOp;
+ continue;
+ }
+
+ // A uniqued metadata node.
+ MDNode &OpN = *cast<MDNode>(Op);
+ assert(OpN.isUniqued() &&
+ "Only uniqued operands cannot be mapped immediately");
+ if (G.Info.insert(std::make_pair(&OpN, Data())).second)
+ return &OpN; // This is a new one. Return it.
+ }
+ return nullptr;
+}
+
+void MDNodeMapper::UniquedGraph::propagateChanges() {
+ bool AnyChanges;
+ do {
+ AnyChanges = false;
+ for (MDNode *N : POT) {
+ auto &D = Info[N];
+ if (D.HasChanged)
+ continue;
+
+ if (llvm::none_of(N->operands(), [&](const Metadata *Op) {
+ auto Where = Info.find(Op);
+ return Where != Info.end() && Where->second.HasChanged;
+ }))
+ continue;
+
+ AnyChanges = D.HasChanged = true;
+ }
+ } while (AnyChanges);
+}
+
+void MDNodeMapper::mapNodesInPOT(UniquedGraph &G) {
+ // Construct uniqued nodes, building forward references as necessary.
+ SmallVector<MDNode *, 16> CyclicNodes;
+ for (auto *N : G.POT) {
+ auto &D = G.Info[N];
+ if (!D.HasChanged) {
+ // The node hasn't changed.
+ M.mapToSelf(N);
+ continue;
+ }
+
+ // Remember whether this node had a placeholder.
+ bool HadPlaceholder(D.Placeholder);
+
+ // Clone the uniqued node and remap the operands.
+ TempMDNode ClonedN = D.Placeholder ? std::move(D.Placeholder) : N->clone();
+ remapOperands(*ClonedN, [this, &D, &G](Metadata *Old) {
+ if (std::optional<Metadata *> MappedOp = getMappedOp(Old))
+ return *MappedOp;
+ (void)D;
+ assert(G.Info[Old].ID > D.ID && "Expected a forward reference");
+ return &G.getFwdReference(*cast<MDNode>(Old));
+ });
+
+ auto *NewN = MDNode::replaceWithUniqued(std::move(ClonedN));
+ if (N && NewN && N != NewN) {
+ LLVM_DEBUG(dbgs() << "\nMap " << *N << "\n"
+ << "To " << *NewN << "\n\n");
+ }
+
+ M.mapToMetadata(N, NewN);
+
+ // Nodes that were referenced out of order in the POT are involved in a
+ // uniquing cycle.
+ if (HadPlaceholder)
+ CyclicNodes.push_back(NewN);
+ }
+
+ // Resolve cycles.
+ for (auto *N : CyclicNodes)
+ if (!N->isResolved())
+ N->resolveCycles();
+}
+
+Metadata *MDNodeMapper::map(const MDNode &N) {
+ assert(DistinctWorklist.empty() && "MDNodeMapper::map is not recursive");
+ assert(!(M.Flags & RF_NoModuleLevelChanges) &&
+ "MDNodeMapper::map assumes module-level changes");
+
+ // Require resolved nodes whenever metadata might be remapped.
+ assert(N.isResolved() && "Unexpected unresolved node");
+
+ Metadata *MappedN =
+ N.isUniqued() ? mapTopLevelUniquedNode(N) : mapDistinctNode(N);
+ while (!DistinctWorklist.empty())
+ remapOperands(*DistinctWorklist.pop_back_val(), [this](Metadata *Old) {
+ if (std::optional<Metadata *> MappedOp = tryToMapOperand(Old))
+ return *MappedOp;
+ return mapTopLevelUniquedNode(*cast<MDNode>(Old));
+ });
+ return MappedN;
+}
+
+Metadata *MDNodeMapper::mapTopLevelUniquedNode(const MDNode &FirstN) {
+ assert(FirstN.isUniqued() && "Expected uniqued node");
+
+ // Create a post-order traversal of uniqued nodes under FirstN.
+ UniquedGraph G;
+ if (!createPOT(G, FirstN)) {
+ // Return early if no nodes have changed.
+ for (const MDNode *N : G.POT)
+ M.mapToSelf(N);
+ return &const_cast<MDNode &>(FirstN);
+ }
+
+ // Update graph with all nodes that have changed.
+ G.propagateChanges();
+
+ // Map all the nodes in the graph.
+ mapNodesInPOT(G);
+
+ // Return the original node, remapped.
+ return *getMappedOp(&FirstN);
+}
+
+std::optional<Metadata *> Mapper::mapSimpleMetadata(const Metadata *MD) {
+ // If the value already exists in the map, use it.
+ if (std::optional<Metadata *> NewMD = getVM().getMappedMD(MD))
+ return *NewMD;
+
+ if (isa<MDString>(MD))
+ return const_cast<Metadata *>(MD);
+
+ // This is a module-level metadata. If nothing at the module level is
+ // changing, use an identity mapping.
+ if ((Flags & RF_NoModuleLevelChanges))
+ return const_cast<Metadata *>(MD);
+
+ if (auto *CMD = dyn_cast<ConstantAsMetadata>(MD)) {
+ // Don't memoize ConstantAsMetadata. Instead of lasting until the
+ // LLVMContext is destroyed, they can be deleted when the GlobalValue they
+ // reference is destructed. These aren't super common, so the extra
+ // indirection isn't that expensive.
+ return wrapConstantAsMetadata(*CMD, mapValue(CMD->getValue()));
+ }
+
+ assert(isa<MDNode>(MD) && "Expected a metadata node");
+
+ return std::nullopt;
+}
+
+Metadata *Mapper::mapMetadata(const Metadata *MD) {
+ assert(MD && "Expected valid metadata");
+ assert(!isa<LocalAsMetadata>(MD) && "Unexpected local metadata");
+
+ if (std::optional<Metadata *> NewMD = mapSimpleMetadata(MD))
+ return *NewMD;
+
+ return MDNodeMapper(*this).map(*cast<MDNode>(MD));
+}
+
+void Mapper::flush() {
+ // Flush out the worklist of global values.
+ while (!Worklist.empty()) {
+ WorklistEntry E = Worklist.pop_back_val();
+ CurrentMCID = E.MCID;
+ switch (E.Kind) {
+ case WorklistEntry::MapGlobalInit:
+ E.Data.GVInit.GV->setInitializer(mapConstant(E.Data.GVInit.Init));
+ remapGlobalObjectMetadata(*E.Data.GVInit.GV);
+ break;
+ case WorklistEntry::MapAppendingVar: {
+ unsigned PrefixSize = AppendingInits.size() - E.AppendingGVNumNewMembers;
+ // mapAppendingVariable call can change AppendingInits if initalizer for
+ // the variable depends on another appending global, because of that inits
+ // need to be extracted and updated before the call.
+ SmallVector<Constant *, 8> NewInits(
+ drop_begin(AppendingInits, PrefixSize));
+ AppendingInits.resize(PrefixSize);
+ mapAppendingVariable(*E.Data.AppendingGV.GV,
+ E.Data.AppendingGV.InitPrefix,
+ E.AppendingGVIsOldCtorDtor, ArrayRef(NewInits));
+ break;
+ }
+ case WorklistEntry::MapAliasOrIFunc: {
+ GlobalValue *GV = E.Data.AliasOrIFunc.GV;
+ Constant *Target = mapConstant(E.Data.AliasOrIFunc.Target);
+ if (auto *GA = dyn_cast<GlobalAlias>(GV))
+ GA->setAliasee(Target);
+ else if (auto *GI = dyn_cast<GlobalIFunc>(GV))
+ GI->setResolver(Target);
+ else
+ llvm_unreachable("Not alias or ifunc");
+ break;
+ }
+ case WorklistEntry::RemapFunction:
+ remapFunction(*E.Data.RemapF);
+ break;
+ }
+ }
+ CurrentMCID = 0;
+
+ // Finish logic for block addresses now that all global values have been
+ // handled.
+ while (!DelayedBBs.empty()) {
+ DelayedBasicBlock DBB = DelayedBBs.pop_back_val();
+ BasicBlock *BB = cast_or_null<BasicBlock>(mapValue(DBB.OldBB));
+ DBB.TempBB->replaceAllUsesWith(BB ? BB : DBB.OldBB);
+ }
+}
+
+void Mapper::remapInstruction(Instruction *I) {
+ // Remap operands.
+ for (Use &Op : I->operands()) {
+ Value *V = mapValue(Op);
+ // If we aren't ignoring missing entries, assert that something happened.
+ if (V)
+ Op = V;
+ else
+ assert((Flags & RF_IgnoreMissingLocals) &&
+ "Referenced value not in value map!");
+ }
+
+ // Remap phi nodes' incoming blocks.
+ if (PHINode *PN = dyn_cast<PHINode>(I)) {
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Value *V = mapValue(PN->getIncomingBlock(i));
+ // If we aren't ignoring missing entries, assert that something happened.
+ if (V)
+ PN->setIncomingBlock(i, cast<BasicBlock>(V));
+ else
+ assert((Flags & RF_IgnoreMissingLocals) &&
+ "Referenced block not in value map!");
+ }
+ }
+
+ // Remap attached metadata.
+ SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
+ I->getAllMetadata(MDs);
+ for (const auto &MI : MDs) {
+ MDNode *Old = MI.second;
+ MDNode *New = cast_or_null<MDNode>(mapMetadata(Old));
+ if (New != Old)
+ I->setMetadata(MI.first, New);
+ }
+
+ if (!TypeMapper)
+ return;
+
+ // If the instruction's type is being remapped, do so now.
+ if (auto *CB = dyn_cast<CallBase>(I)) {
+ SmallVector<Type *, 3> Tys;
+ FunctionType *FTy = CB->getFunctionType();
+ Tys.reserve(FTy->getNumParams());
+ for (Type *Ty : FTy->params())
+ Tys.push_back(TypeMapper->remapType(Ty));
+ CB->mutateFunctionType(FunctionType::get(
+ TypeMapper->remapType(I->getType()), Tys, FTy->isVarArg()));
+
+ LLVMContext &C = CB->getContext();
+ AttributeList Attrs = CB->getAttributes();
+ for (unsigned i = 0; i < Attrs.getNumAttrSets(); ++i) {
+ for (int AttrIdx = Attribute::FirstTypeAttr;
+ AttrIdx <= Attribute::LastTypeAttr; AttrIdx++) {
+ Attribute::AttrKind TypedAttr = (Attribute::AttrKind)AttrIdx;
+ if (Type *Ty =
+ Attrs.getAttributeAtIndex(i, TypedAttr).getValueAsType()) {
+ Attrs = Attrs.replaceAttributeTypeAtIndex(C, i, TypedAttr,
+ TypeMapper->remapType(Ty));
+ break;
+ }
+ }
+ }
+ CB->setAttributes(Attrs);
+ return;
+ }
+ if (auto *AI = dyn_cast<AllocaInst>(I))
+ AI->setAllocatedType(TypeMapper->remapType(AI->getAllocatedType()));
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
+ GEP->setSourceElementType(
+ TypeMapper->remapType(GEP->getSourceElementType()));
+ GEP->setResultElementType(
+ TypeMapper->remapType(GEP->getResultElementType()));
+ }
+ I->mutateType(TypeMapper->remapType(I->getType()));
+}
+
+void Mapper::remapGlobalObjectMetadata(GlobalObject &GO) {
+ SmallVector<std::pair<unsigned, MDNode *>, 8> MDs;
+ GO.getAllMetadata(MDs);
+ GO.clearMetadata();
+ for (const auto &I : MDs)
+ GO.addMetadata(I.first, *cast<MDNode>(mapMetadata(I.second)));
+}
+
+void Mapper::remapFunction(Function &F) {
+ // Remap the operands.
+ for (Use &Op : F.operands())
+ if (Op)
+ Op = mapValue(Op);
+
+ // Remap the metadata attachments.
+ remapGlobalObjectMetadata(F);
+
+ // Remap the argument types.
+ if (TypeMapper)
+ for (Argument &A : F.args())
+ A.mutateType(TypeMapper->remapType(A.getType()));
+
+ // Remap the instructions.
+ for (BasicBlock &BB : F)
+ for (Instruction &I : BB)
+ remapInstruction(&I);
+}
+
+void Mapper::mapAppendingVariable(GlobalVariable &GV, Constant *InitPrefix,
+ bool IsOldCtorDtor,
+ ArrayRef<Constant *> NewMembers) {
+ SmallVector<Constant *, 16> Elements;
+ if (InitPrefix) {
+ unsigned NumElements =
+ cast<ArrayType>(InitPrefix->getType())->getNumElements();
+ for (unsigned I = 0; I != NumElements; ++I)
+ Elements.push_back(InitPrefix->getAggregateElement(I));
+ }
+
+ PointerType *VoidPtrTy;
+ Type *EltTy;
+ if (IsOldCtorDtor) {
+ // FIXME: This upgrade is done during linking to support the C API. See
+ // also IRLinker::linkAppendingVarProto() in IRMover.cpp.
+ VoidPtrTy = Type::getInt8Ty(GV.getContext())->getPointerTo();
+ auto &ST = *cast<StructType>(NewMembers.front()->getType());
+ Type *Tys[3] = {ST.getElementType(0), ST.getElementType(1), VoidPtrTy};
+ EltTy = StructType::get(GV.getContext(), Tys, false);
+ }
+
+ for (auto *V : NewMembers) {
+ Constant *NewV;
+ if (IsOldCtorDtor) {
+ auto *S = cast<ConstantStruct>(V);
+ auto *E1 = cast<Constant>(mapValue(S->getOperand(0)));
+ auto *E2 = cast<Constant>(mapValue(S->getOperand(1)));
+ Constant *Null = Constant::getNullValue(VoidPtrTy);
+ NewV = ConstantStruct::get(cast<StructType>(EltTy), E1, E2, Null);
+ } else {
+ NewV = cast_or_null<Constant>(mapValue(V));
+ }
+ Elements.push_back(NewV);
+ }
+
+ GV.setInitializer(
+ ConstantArray::get(cast<ArrayType>(GV.getValueType()), Elements));
+}
+
+void Mapper::scheduleMapGlobalInitializer(GlobalVariable &GV, Constant &Init,
+ unsigned MCID) {
+ assert(AlreadyScheduled.insert(&GV).second && "Should not reschedule");
+ assert(MCID < MCs.size() && "Invalid mapping context");
+
+ WorklistEntry WE;
+ WE.Kind = WorklistEntry::MapGlobalInit;
+ WE.MCID = MCID;
+ WE.Data.GVInit.GV = &GV;
+ WE.Data.GVInit.Init = &Init;
+ Worklist.push_back(WE);
+}
+
+void Mapper::scheduleMapAppendingVariable(GlobalVariable &GV,
+ Constant *InitPrefix,
+ bool IsOldCtorDtor,
+ ArrayRef<Constant *> NewMembers,
+ unsigned MCID) {
+ assert(AlreadyScheduled.insert(&GV).second && "Should not reschedule");
+ assert(MCID < MCs.size() && "Invalid mapping context");
+
+ WorklistEntry WE;
+ WE.Kind = WorklistEntry::MapAppendingVar;
+ WE.MCID = MCID;
+ WE.Data.AppendingGV.GV = &GV;
+ WE.Data.AppendingGV.InitPrefix = InitPrefix;
+ WE.AppendingGVIsOldCtorDtor = IsOldCtorDtor;
+ WE.AppendingGVNumNewMembers = NewMembers.size();
+ Worklist.push_back(WE);
+ AppendingInits.append(NewMembers.begin(), NewMembers.end());
+}
+
+void Mapper::scheduleMapAliasOrIFunc(GlobalValue &GV, Constant &Target,
+ unsigned MCID) {
+ assert(AlreadyScheduled.insert(&GV).second && "Should not reschedule");
+ assert((isa<GlobalAlias>(GV) || isa<GlobalIFunc>(GV)) &&
+ "Should be alias or ifunc");
+ assert(MCID < MCs.size() && "Invalid mapping context");
+
+ WorklistEntry WE;
+ WE.Kind = WorklistEntry::MapAliasOrIFunc;
+ WE.MCID = MCID;
+ WE.Data.AliasOrIFunc.GV = &GV;
+ WE.Data.AliasOrIFunc.Target = &Target;
+ Worklist.push_back(WE);
+}
+
+void Mapper::scheduleRemapFunction(Function &F, unsigned MCID) {
+ assert(AlreadyScheduled.insert(&F).second && "Should not reschedule");
+ assert(MCID < MCs.size() && "Invalid mapping context");
+
+ WorklistEntry WE;
+ WE.Kind = WorklistEntry::RemapFunction;
+ WE.MCID = MCID;
+ WE.Data.RemapF = &F;
+ Worklist.push_back(WE);
+}
+
+void Mapper::addFlags(RemapFlags Flags) {
+ assert(!hasWorkToDo() && "Expected to have flushed the worklist");
+ this->Flags = this->Flags | Flags;
+}
+
+static Mapper *getAsMapper(void *pImpl) {
+ return reinterpret_cast<Mapper *>(pImpl);
+}
+
+namespace {
+
+class FlushingMapper {
+ Mapper &M;
+
+public:
+ explicit FlushingMapper(void *pImpl) : M(*getAsMapper(pImpl)) {
+ assert(!M.hasWorkToDo() && "Expected to be flushed");
+ }
+
+ ~FlushingMapper() { M.flush(); }
+
+ Mapper *operator->() const { return &M; }
+};
+
+} // end anonymous namespace
+
+ValueMapper::ValueMapper(ValueToValueMapTy &VM, RemapFlags Flags,
+ ValueMapTypeRemapper *TypeMapper,
+ ValueMaterializer *Materializer)
+ : pImpl(new Mapper(VM, Flags, TypeMapper, Materializer)) {}
+
+ValueMapper::~ValueMapper() { delete getAsMapper(pImpl); }
+
+unsigned
+ValueMapper::registerAlternateMappingContext(ValueToValueMapTy &VM,
+ ValueMaterializer *Materializer) {
+ return getAsMapper(pImpl)->registerAlternateMappingContext(VM, Materializer);
+}
+
+void ValueMapper::addFlags(RemapFlags Flags) {
+ FlushingMapper(pImpl)->addFlags(Flags);
+}
+
+Value *ValueMapper::mapValue(const Value &V) {
+ return FlushingMapper(pImpl)->mapValue(&V);
+}
+
+Constant *ValueMapper::mapConstant(const Constant &C) {
+ return cast_or_null<Constant>(mapValue(C));
+}
+
+Metadata *ValueMapper::mapMetadata(const Metadata &MD) {
+ return FlushingMapper(pImpl)->mapMetadata(&MD);
+}
+
+MDNode *ValueMapper::mapMDNode(const MDNode &N) {
+ return cast_or_null<MDNode>(mapMetadata(N));
+}
+
+void ValueMapper::remapInstruction(Instruction &I) {
+ FlushingMapper(pImpl)->remapInstruction(&I);
+}
+
+void ValueMapper::remapFunction(Function &F) {
+ FlushingMapper(pImpl)->remapFunction(F);
+}
+
+void ValueMapper::scheduleMapGlobalInitializer(GlobalVariable &GV,
+ Constant &Init,
+ unsigned MCID) {
+ getAsMapper(pImpl)->scheduleMapGlobalInitializer(GV, Init, MCID);
+}
+
+void ValueMapper::scheduleMapAppendingVariable(GlobalVariable &GV,
+ Constant *InitPrefix,
+ bool IsOldCtorDtor,
+ ArrayRef<Constant *> NewMembers,
+ unsigned MCID) {
+ getAsMapper(pImpl)->scheduleMapAppendingVariable(
+ GV, InitPrefix, IsOldCtorDtor, NewMembers, MCID);
+}
+
+void ValueMapper::scheduleMapGlobalAlias(GlobalAlias &GA, Constant &Aliasee,
+ unsigned MCID) {
+ getAsMapper(pImpl)->scheduleMapAliasOrIFunc(GA, Aliasee, MCID);
+}
+
+void ValueMapper::scheduleMapGlobalIFunc(GlobalIFunc &GI, Constant &Resolver,
+ unsigned MCID) {
+ getAsMapper(pImpl)->scheduleMapAliasOrIFunc(GI, Resolver, MCID);
+}
+
+void ValueMapper::scheduleRemapFunction(Function &F, unsigned MCID) {
+ getAsMapper(pImpl)->scheduleRemapFunction(F, MCID);
+}
diff --git a/contrib/libs/llvm16/lib/Transforms/Utils/ya.make b/contrib/libs/llvm16/lib/Transforms/Utils/ya.make
new file mode 100644
index 0000000000..42083a238b
--- /dev/null
+++ b/contrib/libs/llvm16/lib/Transforms/Utils/ya.make
@@ -0,0 +1,111 @@
+# Generated by devtools/yamaker.
+
+LIBRARY()
+
+LICENSE(Apache-2.0 WITH LLVM-exception)
+
+LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
+
+PEERDIR(
+ contrib/libs/llvm16
+ contrib/libs/llvm16/include
+ contrib/libs/llvm16/lib/Analysis
+ contrib/libs/llvm16/lib/IR
+ contrib/libs/llvm16/lib/Support
+ contrib/libs/llvm16/lib/TargetParser
+)
+
+ADDINCL(
+ contrib/libs/llvm16/lib/Transforms/Utils
+)
+
+NO_COMPILER_WARNINGS()
+
+NO_UTIL()
+
+SRCS(
+ AMDGPUEmitPrintf.cpp
+ ASanStackFrameLayout.cpp
+ AddDiscriminators.cpp
+ AssumeBundleBuilder.cpp
+ BasicBlockUtils.cpp
+ BreakCriticalEdges.cpp
+ BuildLibCalls.cpp
+ BypassSlowDivision.cpp
+ CallGraphUpdater.cpp
+ CallPromotionUtils.cpp
+ CanonicalizeAliases.cpp
+ CanonicalizeFreezeInLoops.cpp
+ CloneFunction.cpp
+ CloneModule.cpp
+ CodeExtractor.cpp
+ CodeLayout.cpp
+ CodeMoverUtils.cpp
+ CtorUtils.cpp
+ Debugify.cpp
+ DemoteRegToStack.cpp
+ EntryExitInstrumenter.cpp
+ EscapeEnumerator.cpp
+ Evaluator.cpp
+ FixIrreducible.cpp
+ FlattenCFG.cpp
+ FunctionComparator.cpp
+ FunctionImportUtils.cpp
+ GlobalStatus.cpp
+ GuardUtils.cpp
+ HelloWorld.cpp
+ InjectTLIMappings.cpp
+ InlineFunction.cpp
+ InstructionNamer.cpp
+ IntegerDivision.cpp
+ LCSSA.cpp
+ LibCallsShrinkWrap.cpp
+ Local.cpp
+ LoopPeel.cpp
+ LoopRotationUtils.cpp
+ LoopSimplify.cpp
+ LoopUnroll.cpp
+ LoopUnrollAndJam.cpp
+ LoopUnrollRuntime.cpp
+ LoopUtils.cpp
+ LoopVersioning.cpp
+ LowerAtomic.cpp
+ LowerGlobalDtors.cpp
+ LowerIFunc.cpp
+ LowerInvoke.cpp
+ LowerMemIntrinsics.cpp
+ LowerSwitch.cpp
+ MatrixUtils.cpp
+ Mem2Reg.cpp
+ MemoryOpRemark.cpp
+ MemoryTaggingSupport.cpp
+ MetaRenamer.cpp
+ MisExpect.cpp
+ ModuleUtils.cpp
+ NameAnonGlobals.cpp
+ PredicateInfo.cpp
+ PromoteMemoryToRegister.cpp
+ RelLookupTableConverter.cpp
+ SCCPSolver.cpp
+ SSAUpdater.cpp
+ SSAUpdaterBulk.cpp
+ SampleProfileInference.cpp
+ SampleProfileLoaderBaseUtil.cpp
+ SanitizerStats.cpp
+ ScalarEvolutionExpander.cpp
+ SimplifyCFG.cpp
+ SimplifyIndVar.cpp
+ SimplifyLibCalls.cpp
+ SizeOpts.cpp
+ SplitModule.cpp
+ StripGCRelocates.cpp
+ StripNonLineTableDebugInfo.cpp
+ SymbolRewriter.cpp
+ UnifyFunctionExitNodes.cpp
+ UnifyLoopExits.cpp
+ Utils.cpp
+ VNCoercion.cpp
+ ValueMapper.cpp
+)
+
+END()