aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/llvm12/lib/Transforms/IPO
diff options
context:
space:
mode:
authorshadchin <shadchin@yandex-team.ru>2022-02-10 16:44:30 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:44:30 +0300
commit2598ef1d0aee359b4b6d5fdd1758916d5907d04f (patch)
tree012bb94d777798f1f56ac1cec429509766d05181 /contrib/libs/llvm12/lib/Transforms/IPO
parent6751af0b0c1b952fede40b19b71da8025b5d8bcf (diff)
downloadydb-2598ef1d0aee359b4b6d5fdd1758916d5907d04f.tar.gz
Restoring authorship annotation for <shadchin@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/llvm12/lib/Transforms/IPO')
-rw-r--r--contrib/libs/llvm12/lib/Transforms/IPO/AlwaysInliner.cpp96
-rw-r--r--contrib/libs/llvm12/lib/Transforms/IPO/Annotation2Metadata.cpp212
-rw-r--r--contrib/libs/llvm12/lib/Transforms/IPO/ArgumentPromotion.cpp48
-rw-r--r--contrib/libs/llvm12/lib/Transforms/IPO/Attributor.cpp880
-rw-r--r--contrib/libs/llvm12/lib/Transforms/IPO/AttributorAttributes.cpp2272
-rw-r--r--contrib/libs/llvm12/lib/Transforms/IPO/BlockExtractor.cpp104
-rw-r--r--contrib/libs/llvm12/lib/Transforms/IPO/ConstantMerge.cpp4
-rw-r--r--contrib/libs/llvm12/lib/Transforms/IPO/DeadArgumentElimination.cpp2
-rw-r--r--contrib/libs/llvm12/lib/Transforms/IPO/ForceFunctionAttrs.cpp72
-rw-r--r--contrib/libs/llvm12/lib/Transforms/IPO/FunctionAttrs.cpp364
-rw-r--r--contrib/libs/llvm12/lib/Transforms/IPO/FunctionImport.cpp52
-rw-r--r--contrib/libs/llvm12/lib/Transforms/IPO/GlobalOpt.cpp28
-rw-r--r--contrib/libs/llvm12/lib/Transforms/IPO/HotColdSplitting.cpp150
-rw-r--r--contrib/libs/llvm12/lib/Transforms/IPO/IPO.cpp8
-rw-r--r--contrib/libs/llvm12/lib/Transforms/IPO/IROutliner.cpp3528
-rw-r--r--contrib/libs/llvm12/lib/Transforms/IPO/Inliner.cpp64
-rw-r--r--contrib/libs/llvm12/lib/Transforms/IPO/LoopExtractor.cpp204
-rw-r--r--contrib/libs/llvm12/lib/Transforms/IPO/LowerTypeTests.cpp52
-rw-r--r--contrib/libs/llvm12/lib/Transforms/IPO/MergeFunctions.cpp8
-rw-r--r--contrib/libs/llvm12/lib/Transforms/IPO/OpenMPOpt.cpp2320
-rw-r--r--contrib/libs/llvm12/lib/Transforms/IPO/PartialInlining.cpp344
-rw-r--r--contrib/libs/llvm12/lib/Transforms/IPO/PassManagerBuilder.cpp224
-rw-r--r--contrib/libs/llvm12/lib/Transforms/IPO/PruneEH.cpp66
-rw-r--r--contrib/libs/llvm12/lib/Transforms/IPO/SampleContextTracker.cpp1170
-rw-r--r--contrib/libs/llvm12/lib/Transforms/IPO/SampleProfile.cpp1670
-rw-r--r--contrib/libs/llvm12/lib/Transforms/IPO/SampleProfileProbe.cpp868
-rw-r--r--contrib/libs/llvm12/lib/Transforms/IPO/StripSymbols.cpp90
-rw-r--r--contrib/libs/llvm12/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp6
-rw-r--r--contrib/libs/llvm12/lib/Transforms/IPO/WholeProgramDevirt.cpp28
-rw-r--r--contrib/libs/llvm12/lib/Transforms/IPO/ya.make44
30 files changed, 7489 insertions, 7489 deletions
diff --git a/contrib/libs/llvm12/lib/Transforms/IPO/AlwaysInliner.cpp b/contrib/libs/llvm12/lib/Transforms/IPO/AlwaysInliner.cpp
index 532599b42e..29ae893836 100644
--- a/contrib/libs/llvm12/lib/Transforms/IPO/AlwaysInliner.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/IPO/AlwaysInliner.cpp
@@ -13,10 +13,10 @@
#include "llvm/Transforms/IPO/AlwaysInliner.h"
#include "llvm/ADT/SetVector.h"
-#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/InlineCost.h"
-#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DataLayout.h"
@@ -41,19 +41,19 @@ PreservedAnalyses AlwaysInlinerPass::run(Module &M,
auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
return FAM.getResult<AssumptionAnalysis>(F);
};
- auto &PSI = MAM.getResult<ProfileSummaryAnalysis>(M);
+ auto &PSI = MAM.getResult<ProfileSummaryAnalysis>(M);
SmallSetVector<CallBase *, 16> Calls;
bool Changed = false;
SmallVector<Function *, 16> InlinedFunctions;
- for (Function &F : M) {
- // When callee coroutine function is inlined into caller coroutine function
- // before coro-split pass,
- // coro-early pass can not handle this quiet well.
- // So we won't inline the coroutine function if it have not been unsplited
- if (F.isPresplitCoroutine())
- continue;
-
+ for (Function &F : M) {
+ // When callee coroutine function is inlined into caller coroutine function
+ // before coro-split pass,
+ // coro-early pass can not handle this quiet well.
+ // So we won't inline the coroutine function if it have not been unsplited
+ if (F.isPresplitCoroutine())
+ continue;
+
if (!F.isDeclaration() && F.hasFnAttribute(Attribute::AlwaysInline) &&
isInlineViable(F).isSuccess()) {
Calls.clear();
@@ -63,41 +63,41 @@ PreservedAnalyses AlwaysInlinerPass::run(Module &M,
if (CB->getCalledFunction() == &F)
Calls.insert(CB);
- for (CallBase *CB : Calls) {
- Function *Caller = CB->getCaller();
- OptimizationRemarkEmitter ORE(Caller);
- auto OIC = shouldInline(
- *CB,
- [&](CallBase &CB) {
- return InlineCost::getAlways("always inline attribute");
- },
- ORE);
- assert(OIC);
- emitInlinedInto(ORE, CB->getDebugLoc(), CB->getParent(), F, *Caller,
- *OIC, false, DEBUG_TYPE);
-
- InlineFunctionInfo IFI(
- /*cg=*/nullptr, GetAssumptionCache, &PSI,
- &FAM.getResult<BlockFrequencyAnalysis>(*(CB->getCaller())),
- &FAM.getResult<BlockFrequencyAnalysis>(F));
-
- InlineResult Res = InlineFunction(
- *CB, IFI, &FAM.getResult<AAManager>(F), InsertLifetime);
- assert(Res.isSuccess() && "unexpected failure to inline");
- (void)Res;
-
- // Merge the attributes based on the inlining.
- AttributeFuncs::mergeAttributesForInlining(*Caller, F);
-
- Changed = true;
- }
-
+ for (CallBase *CB : Calls) {
+ Function *Caller = CB->getCaller();
+ OptimizationRemarkEmitter ORE(Caller);
+ auto OIC = shouldInline(
+ *CB,
+ [&](CallBase &CB) {
+ return InlineCost::getAlways("always inline attribute");
+ },
+ ORE);
+ assert(OIC);
+ emitInlinedInto(ORE, CB->getDebugLoc(), CB->getParent(), F, *Caller,
+ *OIC, false, DEBUG_TYPE);
+
+ InlineFunctionInfo IFI(
+ /*cg=*/nullptr, GetAssumptionCache, &PSI,
+ &FAM.getResult<BlockFrequencyAnalysis>(*(CB->getCaller())),
+ &FAM.getResult<BlockFrequencyAnalysis>(F));
+
+ InlineResult Res = InlineFunction(
+ *CB, IFI, &FAM.getResult<AAManager>(F), InsertLifetime);
+ assert(Res.isSuccess() && "unexpected failure to inline");
+ (void)Res;
+
+ // Merge the attributes based on the inlining.
+ AttributeFuncs::mergeAttributesForInlining(*Caller, F);
+
+ Changed = true;
+ }
+
// Remember to try and delete this function afterward. This both avoids
// re-walking the rest of the module and avoids dealing with any iterator
// invalidation issues while deleting functions.
InlinedFunctions.push_back(&F);
}
- }
+ }
// Remove any live functions.
erase_if(InlinedFunctions, [&](Function *F) {
@@ -190,13 +190,13 @@ InlineCost AlwaysInlinerLegacyPass::getInlineCost(CallBase &CB) {
if (!Callee)
return InlineCost::getNever("indirect call");
- // When callee coroutine function is inlined into caller coroutine function
- // before coro-split pass,
- // coro-early pass can not handle this quiet well.
- // So we won't inline the coroutine function if it have not been unsplited
- if (Callee->isPresplitCoroutine())
- return InlineCost::getNever("unsplited coroutine call");
-
+ // When callee coroutine function is inlined into caller coroutine function
+ // before coro-split pass,
+ // coro-early pass can not handle this quiet well.
+ // So we won't inline the coroutine function if it have not been unsplited
+ if (Callee->isPresplitCoroutine())
+ return InlineCost::getNever("unsplited coroutine call");
+
// FIXME: We shouldn't even get here for declarations.
if (Callee->isDeclaration())
return InlineCost::getNever("no definition");
diff --git a/contrib/libs/llvm12/lib/Transforms/IPO/Annotation2Metadata.cpp b/contrib/libs/llvm12/lib/Transforms/IPO/Annotation2Metadata.cpp
index 5ca4e24df8..f2ad05676f 100644
--- a/contrib/libs/llvm12/lib/Transforms/IPO/Annotation2Metadata.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/IPO/Annotation2Metadata.cpp
@@ -1,106 +1,106 @@
-//===-- Annotation2Metadata.cpp - Add !annotation metadata. ---------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Add !annotation metadata for entries in @llvm.global.anotations, generated
-// using __attribute__((annotate("_name"))) on functions in Clang.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/IPO/Annotation2Metadata.h"
-#include "llvm/Analysis/OptimizationRemarkEmitter.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/InstIterator.h"
-#include "llvm/IR/Module.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
-#include "llvm/Transforms/IPO.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "annotation2metadata"
-
-static bool convertAnnotation2Metadata(Module &M) {
- // Only add !annotation metadata if the corresponding remarks pass is also
- // enabled.
- if (!OptimizationRemarkEmitter::allowExtraAnalysis(M.getContext(),
- "annotation-remarks"))
- return false;
-
- auto *Annotations = M.getGlobalVariable("llvm.global.annotations");
- auto *C = dyn_cast_or_null<Constant>(Annotations);
- if (!C || C->getNumOperands() != 1)
- return false;
-
- C = cast<Constant>(C->getOperand(0));
-
- // Iterate over all entries in C and attach !annotation metadata to suitable
- // entries.
- for (auto &Op : C->operands()) {
- // Look at the operands to check if we can use the entry to generate
- // !annotation metadata.
- auto *OpC = dyn_cast<ConstantStruct>(&Op);
- if (!OpC || OpC->getNumOperands() != 4)
- continue;
- auto *StrGEP = dyn_cast<ConstantExpr>(OpC->getOperand(1));
- if (!StrGEP || StrGEP->getNumOperands() < 2)
- continue;
- auto *StrC = dyn_cast<GlobalValue>(StrGEP->getOperand(0));
- if (!StrC)
- continue;
- auto *StrData = dyn_cast<ConstantDataSequential>(StrC->getOperand(0));
- if (!StrData)
- continue;
- // Look through bitcast.
- auto *Bitcast = dyn_cast<ConstantExpr>(OpC->getOperand(0));
- if (!Bitcast || Bitcast->getOpcode() != Instruction::BitCast)
- continue;
- auto *Fn = dyn_cast<Function>(Bitcast->getOperand(0));
- if (!Fn)
- continue;
-
- // Add annotation to all instructions in the function.
- for (auto &I : instructions(Fn))
- I.addAnnotationMetadata(StrData->getAsCString());
- }
- return true;
-}
-
-namespace {
-struct Annotation2MetadataLegacy : public ModulePass {
- static char ID;
-
- Annotation2MetadataLegacy() : ModulePass(ID) {
- initializeAnnotation2MetadataLegacyPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnModule(Module &M) override { return convertAnnotation2Metadata(M); }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesAll();
- }
-};
-
-} // end anonymous namespace
-
-char Annotation2MetadataLegacy::ID = 0;
-
-INITIALIZE_PASS_BEGIN(Annotation2MetadataLegacy, DEBUG_TYPE,
- "Annotation2Metadata", false, false)
-INITIALIZE_PASS_END(Annotation2MetadataLegacy, DEBUG_TYPE,
- "Annotation2Metadata", false, false)
-
-ModulePass *llvm::createAnnotation2MetadataLegacyPass() {
- return new Annotation2MetadataLegacy();
-}
-
-PreservedAnalyses Annotation2MetadataPass::run(Module &M,
- ModuleAnalysisManager &AM) {
- convertAnnotation2Metadata(M);
- return PreservedAnalyses::all();
-}
+//===-- Annotation2Metadata.cpp - Add !annotation metadata. ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Add !annotation metadata for entries in @llvm.global.anotations, generated
+// using __attribute__((annotate("_name"))) on functions in Clang.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/Annotation2Metadata.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/IPO.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "annotation2metadata"
+
+static bool convertAnnotation2Metadata(Module &M) {
+ // Only add !annotation metadata if the corresponding remarks pass is also
+ // enabled.
+ if (!OptimizationRemarkEmitter::allowExtraAnalysis(M.getContext(),
+ "annotation-remarks"))
+ return false;
+
+ auto *Annotations = M.getGlobalVariable("llvm.global.annotations");
+ auto *C = dyn_cast_or_null<Constant>(Annotations);
+ if (!C || C->getNumOperands() != 1)
+ return false;
+
+ C = cast<Constant>(C->getOperand(0));
+
+ // Iterate over all entries in C and attach !annotation metadata to suitable
+ // entries.
+ for (auto &Op : C->operands()) {
+ // Look at the operands to check if we can use the entry to generate
+ // !annotation metadata.
+ auto *OpC = dyn_cast<ConstantStruct>(&Op);
+ if (!OpC || OpC->getNumOperands() != 4)
+ continue;
+ auto *StrGEP = dyn_cast<ConstantExpr>(OpC->getOperand(1));
+ if (!StrGEP || StrGEP->getNumOperands() < 2)
+ continue;
+ auto *StrC = dyn_cast<GlobalValue>(StrGEP->getOperand(0));
+ if (!StrC)
+ continue;
+ auto *StrData = dyn_cast<ConstantDataSequential>(StrC->getOperand(0));
+ if (!StrData)
+ continue;
+ // Look through bitcast.
+ auto *Bitcast = dyn_cast<ConstantExpr>(OpC->getOperand(0));
+ if (!Bitcast || Bitcast->getOpcode() != Instruction::BitCast)
+ continue;
+ auto *Fn = dyn_cast<Function>(Bitcast->getOperand(0));
+ if (!Fn)
+ continue;
+
+ // Add annotation to all instructions in the function.
+ for (auto &I : instructions(Fn))
+ I.addAnnotationMetadata(StrData->getAsCString());
+ }
+ return true;
+}
+
+namespace {
+struct Annotation2MetadataLegacy : public ModulePass {
+ static char ID;
+
+ Annotation2MetadataLegacy() : ModulePass(ID) {
+ initializeAnnotation2MetadataLegacyPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module &M) override { return convertAnnotation2Metadata(M); }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ }
+};
+
+} // end anonymous namespace
+
+char Annotation2MetadataLegacy::ID = 0;
+
+INITIALIZE_PASS_BEGIN(Annotation2MetadataLegacy, DEBUG_TYPE,
+ "Annotation2Metadata", false, false)
+INITIALIZE_PASS_END(Annotation2MetadataLegacy, DEBUG_TYPE,
+ "Annotation2Metadata", false, false)
+
+ModulePass *llvm::createAnnotation2MetadataLegacyPass() {
+ return new Annotation2MetadataLegacy();
+}
+
+PreservedAnalyses Annotation2MetadataPass::run(Module &M,
+ ModuleAnalysisManager &AM) {
+ convertAnnotation2Metadata(M);
+ return PreservedAnalyses::all();
+}
diff --git a/contrib/libs/llvm12/lib/Transforms/IPO/ArgumentPromotion.cpp b/contrib/libs/llvm12/lib/Transforms/IPO/ArgumentPromotion.cpp
index 7998a1ae5c..2044b7d37c 100644
--- a/contrib/libs/llvm12/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -33,7 +33,7 @@
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -142,7 +142,7 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
// Simple byval argument? Just add all the struct element types.
Type *AgTy = cast<PointerType>(I->getType())->getElementType();
StructType *STy = cast<StructType>(AgTy);
- llvm::append_range(Params, STy->elements());
+ llvm::append_range(Params, STy->elements());
ArgAttrVec.insert(ArgAttrVec.end(), STy->getNumElements(),
AttributeSet());
++NumByValArgsPromoted;
@@ -160,19 +160,19 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
// In this table, we will track which indices are loaded from the argument
// (where direct loads are tracked as no indices).
ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
- for (User *U : make_early_inc_range(I->users())) {
+ for (User *U : make_early_inc_range(I->users())) {
Instruction *UI = cast<Instruction>(U);
Type *SrcTy;
if (LoadInst *L = dyn_cast<LoadInst>(UI))
SrcTy = L->getType();
else
SrcTy = cast<GetElementPtrInst>(UI)->getSourceElementType();
- // Skip dead GEPs and remove them.
- if (isa<GetElementPtrInst>(UI) && UI->use_empty()) {
- UI->eraseFromParent();
- continue;
- }
-
+ // Skip dead GEPs and remove them.
+ if (isa<GetElementPtrInst>(UI) && UI->use_empty()) {
+ UI->eraseFromParent();
+ continue;
+ }
+
IndicesVector Indices;
Indices.reserve(UI->getNumOperands() - 1);
// Since loads will only have a single operand, and GEPs only a single
@@ -220,11 +220,11 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
Function *NF = Function::Create(NFTy, F->getLinkage(), F->getAddressSpace(),
F->getName());
NF->copyAttributesFrom(F);
- NF->copyMetadata(F, 0);
+ NF->copyMetadata(F, 0);
- // The new function will have the !dbg metadata copied from the original
- // function. The original function may not be deleted, and dbg metadata need
- // to be unique so we need to drop it.
+ // The new function will have the !dbg metadata copied from the original
+ // function. The original function may not be deleted, and dbg metadata need
+ // to be unique so we need to drop it.
F->setSubprogram(nullptr);
LLVM_DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n"
@@ -418,11 +418,11 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
continue;
}
- // There potentially are metadata uses for things like llvm.dbg.value.
- // Replace them with undef, after handling the other regular uses.
- auto RauwUndefMetadata = make_scope_exit(
- [&]() { I->replaceAllUsesWith(UndefValue::get(I->getType())); });
-
+ // There potentially are metadata uses for things like llvm.dbg.value.
+ // Replace them with undef, after handling the other regular uses.
+ auto RauwUndefMetadata = make_scope_exit(
+ [&]() { I->replaceAllUsesWith(UndefValue::get(I->getType())); });
+
if (I->use_empty())
continue;
@@ -442,8 +442,8 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
<< "' in function '" << F->getName() << "'\n");
} else {
GetElementPtrInst *GEP = cast<GetElementPtrInst>(I->user_back());
- assert(!GEP->use_empty() &&
- "GEPs without uses should be cleaned up already");
+ assert(!GEP->use_empty() &&
+ "GEPs without uses should be cleaned up already");
IndicesVector Operands;
Operands.reserve(GEP->getNumIndices());
for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end();
@@ -682,7 +682,7 @@ static bool isSafeToPromoteArgument(Argument *Arg, Type *ByValTy, AAResults &AAR
if (GEP->use_empty()) {
// Dead GEP's cause trouble later. Just remove them if we run into
// them.
- continue;
+ continue;
}
if (!UpdateBaseTy(GEP->getSourceElementType()))
@@ -822,12 +822,12 @@ static bool canPaddingBeAccessed(Argument *arg) {
// Scan through the uses recursively to make sure the pointer is always used
// sanely.
- SmallVector<Value *, 16> WorkList(arg->users());
+ SmallVector<Value *, 16> WorkList(arg->users());
while (!WorkList.empty()) {
- Value *V = WorkList.pop_back_val();
+ Value *V = WorkList.pop_back_val();
if (isa<GetElementPtrInst>(V) || isa<PHINode>(V)) {
if (PtrValues.insert(V).second)
- llvm::append_range(WorkList, V->users());
+ llvm::append_range(WorkList, V->users());
} else if (StoreInst *Store = dyn_cast<StoreInst>(V)) {
Stores.push_back(Store);
} else if (!isa<LoadInst>(V)) {
diff --git a/contrib/libs/llvm12/lib/Transforms/IPO/Attributor.cpp b/contrib/libs/llvm12/lib/Transforms/IPO/Attributor.cpp
index 03ad451350..4a8934bc24 100644
--- a/contrib/libs/llvm12/lib/Transforms/IPO/Attributor.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/IPO/Attributor.cpp
@@ -15,47 +15,47 @@
#include "llvm/Transforms/IPO/Attributor.h"
-#include "llvm/ADT/GraphTraits.h"
-#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/GraphTraits.h"
+#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/TinyPtrVector.h"
-#include "llvm/Analysis/InlineCost.h"
+#include "llvm/ADT/TinyPtrVector.h"
+#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/LazyValueInfo.h"
-#include "llvm/Analysis/MemorySSAUpdater.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/MustExecute.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/NoFolder.h"
#include "llvm/IR/Verifier.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/DebugCounter.h"
-#include "llvm/Support/FileSystem.h"
-#include "llvm/Support/GraphWriter.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/DebugCounter.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
#include <cassert>
-#include <string>
+#include <string>
using namespace llvm;
#define DEBUG_TYPE "attributor"
-DEBUG_COUNTER(ManifestDBGCounter, "attributor-manifest",
- "Determine what attributes are manifested in the IR");
-
+DEBUG_COUNTER(ManifestDBGCounter, "attributor-manifest",
+ "Determine what attributes are manifested in the IR");
+
STATISTIC(NumFnDeleted, "Number of function deleted");
STATISTIC(NumFnWithExactDefinition,
"Number of functions with exact definitions");
STATISTIC(NumFnWithoutExactDefinition,
"Number of functions without exact definitions");
-STATISTIC(NumFnShallowWrappersCreated, "Number of shallow wrappers created");
+STATISTIC(NumFnShallowWrappersCreated, "Number of shallow wrappers created");
STATISTIC(NumAttributesTimedOut,
"Number of abstract attributes timed out before fixpoint");
STATISTIC(NumAttributesValidFixpoint,
@@ -77,14 +77,14 @@ static cl::opt<unsigned>
MaxFixpointIterations("attributor-max-iterations", cl::Hidden,
cl::desc("Maximal number of fixpoint iterations."),
cl::init(32));
-
-static cl::opt<unsigned, true> MaxInitializationChainLengthX(
- "attributor-max-initialization-chain-length", cl::Hidden,
- cl::desc(
- "Maximal number of chained initializations (to avoid stack overflows)"),
- cl::location(MaxInitializationChainLength), cl::init(1024));
-unsigned llvm::MaxInitializationChainLength;
-
+
+static cl::opt<unsigned, true> MaxInitializationChainLengthX(
+ "attributor-max-initialization-chain-length", cl::Hidden,
+ cl::desc(
+ "Maximal number of chained initializations (to avoid stack overflows)"),
+ cl::location(MaxInitializationChainLength), cl::init(1024));
+unsigned llvm::MaxInitializationChainLength;
+
static cl::opt<bool> VerifyMaxFixpointIterations(
"attributor-max-iterations-verify", cl::Hidden,
cl::desc("Verify that max-iterations is a tight bound for a fixpoint"),
@@ -103,52 +103,52 @@ static cl::opt<bool>
"wrappers for non-exact definitions."),
cl::init(false));
-static cl::opt<bool>
- AllowDeepWrapper("attributor-allow-deep-wrappers", cl::Hidden,
- cl::desc("Allow the Attributor to use IP information "
- "derived from non-exact functions via cloning"),
- cl::init(false));
-
-// These options can only used for debug builds.
-#ifndef NDEBUG
+static cl::opt<bool>
+ AllowDeepWrapper("attributor-allow-deep-wrappers", cl::Hidden,
+ cl::desc("Allow the Attributor to use IP information "
+ "derived from non-exact functions via cloning"),
+ cl::init(false));
+
+// These options can only used for debug builds.
+#ifndef NDEBUG
static cl::list<std::string>
SeedAllowList("attributor-seed-allow-list", cl::Hidden,
- cl::desc("Comma seperated list of attribute names that are "
+ cl::desc("Comma seperated list of attribute names that are "
"allowed to be seeded."),
cl::ZeroOrMore, cl::CommaSeparated);
-static cl::list<std::string> FunctionSeedAllowList(
- "attributor-function-seed-allow-list", cl::Hidden,
- cl::desc("Comma seperated list of function names that are "
- "allowed to be seeded."),
- cl::ZeroOrMore, cl::CommaSeparated);
-#endif
-
-static cl::opt<bool>
- DumpDepGraph("attributor-dump-dep-graph", cl::Hidden,
- cl::desc("Dump the dependency graph to dot files."),
- cl::init(false));
-
-static cl::opt<std::string> DepGraphDotFileNamePrefix(
- "attributor-depgraph-dot-filename-prefix", cl::Hidden,
- cl::desc("The prefix used for the CallGraph dot file names."));
-
-static cl::opt<bool> ViewDepGraph("attributor-view-dep-graph", cl::Hidden,
- cl::desc("View the dependency graph."),
- cl::init(false));
-
-static cl::opt<bool> PrintDependencies("attributor-print-dep", cl::Hidden,
- cl::desc("Print attribute dependencies"),
- cl::init(false));
-
+static cl::list<std::string> FunctionSeedAllowList(
+ "attributor-function-seed-allow-list", cl::Hidden,
+ cl::desc("Comma seperated list of function names that are "
+ "allowed to be seeded."),
+ cl::ZeroOrMore, cl::CommaSeparated);
+#endif
+
+static cl::opt<bool>
+ DumpDepGraph("attributor-dump-dep-graph", cl::Hidden,
+ cl::desc("Dump the dependency graph to dot files."),
+ cl::init(false));
+
+static cl::opt<std::string> DepGraphDotFileNamePrefix(
+ "attributor-depgraph-dot-filename-prefix", cl::Hidden,
+ cl::desc("The prefix used for the CallGraph dot file names."));
+
+static cl::opt<bool> ViewDepGraph("attributor-view-dep-graph", cl::Hidden,
+ cl::desc("View the dependency graph."),
+ cl::init(false));
+
+static cl::opt<bool> PrintDependencies("attributor-print-dep", cl::Hidden,
+ cl::desc("Print attribute dependencies"),
+ cl::init(false));
+
/// Logic operators for the change status enum class.
///
///{
-ChangeStatus llvm::operator|(ChangeStatus L, ChangeStatus R) {
- return L == ChangeStatus::CHANGED ? L : R;
+ChangeStatus llvm::operator|(ChangeStatus L, ChangeStatus R) {
+ return L == ChangeStatus::CHANGED ? L : R;
}
-ChangeStatus llvm::operator&(ChangeStatus L, ChangeStatus R) {
- return L == ChangeStatus::UNCHANGED ? L : R;
+ChangeStatus llvm::operator&(ChangeStatus L, ChangeStatus R) {
+ return L == ChangeStatus::UNCHANGED ? L : R;
}
///}
@@ -201,7 +201,7 @@ Argument *IRPosition::getAssociatedArgument() const {
// Not an Argument and no argument number means this is not a call site
// argument, thus we cannot find a callback argument to return.
- int ArgNo = getCallSiteArgNo();
+ int ArgNo = getCallSiteArgNo();
if (ArgNo < 0)
return nullptr;
@@ -329,13 +329,13 @@ const IRPosition
SubsumingPositionIterator::SubsumingPositionIterator(const IRPosition &IRP) {
IRPositions.emplace_back(IRP);
- // Helper to determine if operand bundles on a call site are benin or
- // potentially problematic. We handle only llvm.assume for now.
- auto CanIgnoreOperandBundles = [](const CallBase &CB) {
- return (isa<IntrinsicInst>(CB) &&
- cast<IntrinsicInst>(CB).getIntrinsicID() == Intrinsic ::assume);
- };
-
+ // Helper to determine if operand bundles on a call site are benin or
+ // potentially problematic. We handle only llvm.assume for now.
+ auto CanIgnoreOperandBundles = [](const CallBase &CB) {
+ return (isa<IntrinsicInst>(CB) &&
+ cast<IntrinsicInst>(CB).getIntrinsicID() == Intrinsic ::assume);
+ };
+
const auto *CB = dyn_cast<CallBase>(&IRP.getAnchorValue());
switch (IRP.getPositionKind()) {
case IRPosition::IRP_INVALID:
@@ -350,7 +350,7 @@ SubsumingPositionIterator::SubsumingPositionIterator(const IRPosition &IRP) {
assert(CB && "Expected call site!");
// TODO: We need to look at the operand bundles similar to the redirection
// in CallBase.
- if (!CB->hasOperandBundles() || CanIgnoreOperandBundles(*CB))
+ if (!CB->hasOperandBundles() || CanIgnoreOperandBundles(*CB))
if (const Function *Callee = CB->getCalledFunction())
IRPositions.emplace_back(IRPosition::function(*Callee));
return;
@@ -358,7 +358,7 @@ SubsumingPositionIterator::SubsumingPositionIterator(const IRPosition &IRP) {
assert(CB && "Expected call site!");
// TODO: We need to look at the operand bundles similar to the redirection
// in CallBase.
- if (!CB->hasOperandBundles() || CanIgnoreOperandBundles(*CB)) {
+ if (!CB->hasOperandBundles() || CanIgnoreOperandBundles(*CB)) {
if (const Function *Callee = CB->getCalledFunction()) {
IRPositions.emplace_back(IRPosition::returned(*Callee));
IRPositions.emplace_back(IRPosition::function(*Callee));
@@ -375,16 +375,16 @@ SubsumingPositionIterator::SubsumingPositionIterator(const IRPosition &IRP) {
IRPositions.emplace_back(IRPosition::callsite_function(*CB));
return;
case IRPosition::IRP_CALL_SITE_ARGUMENT: {
- assert(CB && "Expected call site!");
+ assert(CB && "Expected call site!");
// TODO: We need to look at the operand bundles similar to the redirection
// in CallBase.
- if (!CB->hasOperandBundles() || CanIgnoreOperandBundles(*CB)) {
+ if (!CB->hasOperandBundles() || CanIgnoreOperandBundles(*CB)) {
const Function *Callee = CB->getCalledFunction();
- if (Callee) {
- if (Argument *Arg = IRP.getAssociatedArgument())
- IRPositions.emplace_back(IRPosition::argument(*Arg));
+ if (Callee) {
+ if (Argument *Arg = IRP.getAssociatedArgument())
+ IRPositions.emplace_back(IRPosition::argument(*Arg));
IRPositions.emplace_back(IRPosition::function(*Callee));
- }
+ }
}
IRPositions.emplace_back(IRPosition::value(IRP.getAssociatedValue()));
return;
@@ -522,7 +522,7 @@ void IRPosition::verify() {
"Expected call base argument operand for a 'call site argument' "
"position");
assert(cast<CallBase>(U->getUser())->getArgOperandNo(U) ==
- unsigned(getCallSiteArgNo()) &&
+ unsigned(getCallSiteArgNo()) &&
"Argument number mismatch!");
assert(U->get() == &getAssociatedValue() && "Associated value mismatch!");
return;
@@ -561,10 +561,10 @@ Attributor::getAssumedConstant(const Value &V, const AbstractAttribute &AA,
Attributor::~Attributor() {
// The abstract attributes are allocated via the BumpPtrAllocator Allocator,
// thus we cannot delete them. We can, and want to, destruct them though.
- for (auto &DepAA : DG.SyntheticRoot.Deps) {
- AbstractAttribute *AA = cast<AbstractAttribute>(DepAA.getPointer());
+ for (auto &DepAA : DG.SyntheticRoot.Deps) {
+ AbstractAttribute *AA = cast<AbstractAttribute>(DepAA.getPointer());
AA->~AbstractAttribute();
- }
+ }
}
bool Attributor::isAssumedDead(const AbstractAttribute &AA,
@@ -929,15 +929,15 @@ bool Attributor::checkForAllInstructions(function_ref<bool(Instruction &)> Pred,
// TODO: use the function scope once we have call site AAReturnedValues.
const IRPosition &QueryIRP = IRPosition::function(*AssociatedFunction);
- const auto *LivenessAA =
- CheckBBLivenessOnly ? nullptr
- : &(getAAFor<AAIsDead>(QueryingAA, QueryIRP,
- /* TrackDependence */ false));
+ const auto *LivenessAA =
+ CheckBBLivenessOnly ? nullptr
+ : &(getAAFor<AAIsDead>(QueryingAA, QueryIRP,
+ /* TrackDependence */ false));
auto &OpcodeInstMap =
InfoCache.getOpcodeInstMapForFunction(*AssociatedFunction);
if (!checkForAllInstructionsImpl(this, OpcodeInstMap, Pred, &QueryingAA,
- LivenessAA, Opcodes, CheckBBLivenessOnly))
+ LivenessAA, Opcodes, CheckBBLivenessOnly))
return false;
return true;
@@ -970,9 +970,9 @@ bool Attributor::checkForAllReadWriteInstructions(
}
void Attributor::runTillFixpoint() {
- TimeTraceScope TimeScope("Attributor::runTillFixpoint");
+ TimeTraceScope TimeScope("Attributor::runTillFixpoint");
LLVM_DEBUG(dbgs() << "[Attributor] Identified and initialized "
- << DG.SyntheticRoot.Deps.size()
+ << DG.SyntheticRoot.Deps.size()
<< " abstract attributes.\n");
// Now that all abstract attributes are collected and initialized we start
@@ -982,11 +982,11 @@ void Attributor::runTillFixpoint() {
SmallVector<AbstractAttribute *, 32> ChangedAAs;
SetVector<AbstractAttribute *> Worklist, InvalidAAs;
- Worklist.insert(DG.SyntheticRoot.begin(), DG.SyntheticRoot.end());
+ Worklist.insert(DG.SyntheticRoot.begin(), DG.SyntheticRoot.end());
do {
// Remember the size to determine new attributes.
- size_t NumAAs = DG.SyntheticRoot.Deps.size();
+ size_t NumAAs = DG.SyntheticRoot.Deps.size();
LLVM_DEBUG(dbgs() << "\n\n[Attributor] #Iteration: " << IterationCounter
<< ", Worklist size: " << Worklist.size() << "\n");
@@ -1003,7 +1003,7 @@ void Attributor::runTillFixpoint() {
while (!InvalidAA->Deps.empty()) {
const auto &Dep = InvalidAA->Deps.back();
InvalidAA->Deps.pop_back();
- AbstractAttribute *DepAA = cast<AbstractAttribute>(Dep.getPointer());
+ AbstractAttribute *DepAA = cast<AbstractAttribute>(Dep.getPointer());
if (Dep.getInt() == unsigned(DepClassTy::OPTIONAL)) {
Worklist.insert(DepAA);
continue;
@@ -1021,8 +1021,8 @@ void Attributor::runTillFixpoint() {
// changed to the work list.
for (AbstractAttribute *ChangedAA : ChangedAAs)
while (!ChangedAA->Deps.empty()) {
- Worklist.insert(
- cast<AbstractAttribute>(ChangedAA->Deps.back().getPointer()));
+ Worklist.insert(
+ cast<AbstractAttribute>(ChangedAA->Deps.back().getPointer()));
ChangedAA->Deps.pop_back();
}
@@ -1050,8 +1050,8 @@ void Attributor::runTillFixpoint() {
// Add attributes to the changed set if they have been created in the last
// iteration.
- ChangedAAs.append(DG.SyntheticRoot.begin() + NumAAs,
- DG.SyntheticRoot.end());
+ ChangedAAs.append(DG.SyntheticRoot.begin() + NumAAs,
+ DG.SyntheticRoot.end());
// Reset the work list and repopulate with the changed abstract attributes.
// Note that dependent ones are added above.
@@ -1084,8 +1084,8 @@ void Attributor::runTillFixpoint() {
}
while (!ChangedAA->Deps.empty()) {
- ChangedAAs.push_back(
- cast<AbstractAttribute>(ChangedAA->Deps.back().getPointer()));
+ ChangedAAs.push_back(
+ cast<AbstractAttribute>(ChangedAA->Deps.back().getPointer()));
ChangedAA->Deps.pop_back();
}
}
@@ -1107,14 +1107,14 @@ void Attributor::runTillFixpoint() {
}
ChangeStatus Attributor::manifestAttributes() {
- TimeTraceScope TimeScope("Attributor::manifestAttributes");
- size_t NumFinalAAs = DG.SyntheticRoot.Deps.size();
+ TimeTraceScope TimeScope("Attributor::manifestAttributes");
+ size_t NumFinalAAs = DG.SyntheticRoot.Deps.size();
unsigned NumManifested = 0;
unsigned NumAtFixpoint = 0;
ChangeStatus ManifestChange = ChangeStatus::UNCHANGED;
- for (auto &DepAA : DG.SyntheticRoot.Deps) {
- AbstractAttribute *AA = cast<AbstractAttribute>(DepAA.getPointer());
+ for (auto &DepAA : DG.SyntheticRoot.Deps) {
+ AbstractAttribute *AA = cast<AbstractAttribute>(DepAA.getPointer());
AbstractState &State = AA->getState();
// If there is not already a fixpoint reached, we can now take the
@@ -1131,10 +1131,10 @@ ChangeStatus Attributor::manifestAttributes() {
// Skip dead code.
if (isAssumedDead(*AA, nullptr, /* CheckBBLivenessOnly */ true))
continue;
- // Check if the manifest debug counter that allows skipping manifestation of
- // AAs
- if (!DebugCounter::shouldExecute(ManifestDBGCounter))
- continue;
+ // Check if the manifest debug counter that allows skipping manifestation of
+ // AAs
+ if (!DebugCounter::shouldExecute(ManifestDBGCounter))
+ continue;
// Manifest the state and record if we changed the IR.
ChangeStatus LocalChange = AA->manifest(*this);
if (LocalChange == ChangeStatus::CHANGED && AreStatisticsEnabled())
@@ -1158,14 +1158,14 @@ ChangeStatus Attributor::manifestAttributes() {
NumAttributesValidFixpoint += NumAtFixpoint;
(void)NumFinalAAs;
- if (NumFinalAAs != DG.SyntheticRoot.Deps.size()) {
- for (unsigned u = NumFinalAAs; u < DG.SyntheticRoot.Deps.size(); ++u)
- errs() << "Unexpected abstract attribute: "
- << cast<AbstractAttribute>(DG.SyntheticRoot.Deps[u].getPointer())
+ if (NumFinalAAs != DG.SyntheticRoot.Deps.size()) {
+ for (unsigned u = NumFinalAAs; u < DG.SyntheticRoot.Deps.size(); ++u)
+ errs() << "Unexpected abstract attribute: "
+ << cast<AbstractAttribute>(DG.SyntheticRoot.Deps[u].getPointer())
<< " :: "
- << cast<AbstractAttribute>(DG.SyntheticRoot.Deps[u].getPointer())
- ->getIRPosition()
- .getAssociatedValue()
+ << cast<AbstractAttribute>(DG.SyntheticRoot.Deps[u].getPointer())
+ ->getIRPosition()
+ .getAssociatedValue()
<< "\n";
llvm_unreachable("Expected the final number of abstract attributes to "
"remain unchanged!");
@@ -1173,50 +1173,50 @@ ChangeStatus Attributor::manifestAttributes() {
return ManifestChange;
}
-void Attributor::identifyDeadInternalFunctions() {
- // Identify dead internal functions and delete them. This happens outside
- // the other fixpoint analysis as we might treat potentially dead functions
- // as live to lower the number of iterations. If they happen to be dead, the
- // below fixpoint loop will identify and eliminate them.
- SmallVector<Function *, 8> InternalFns;
- for (Function *F : Functions)
- if (F->hasLocalLinkage())
- InternalFns.push_back(F);
-
- SmallPtrSet<Function *, 8> LiveInternalFns;
- bool FoundLiveInternal = true;
- while (FoundLiveInternal) {
- FoundLiveInternal = false;
- for (unsigned u = 0, e = InternalFns.size(); u < e; ++u) {
- Function *F = InternalFns[u];
- if (!F)
- continue;
-
- bool AllCallSitesKnown;
- if (checkForAllCallSites(
- [&](AbstractCallSite ACS) {
- Function *Callee = ACS.getInstruction()->getFunction();
- return ToBeDeletedFunctions.count(Callee) ||
- (Functions.count(Callee) && Callee->hasLocalLinkage() &&
- !LiveInternalFns.count(Callee));
- },
- *F, true, nullptr, AllCallSitesKnown)) {
- continue;
- }
-
- LiveInternalFns.insert(F);
- InternalFns[u] = nullptr;
- FoundLiveInternal = true;
- }
- }
-
- for (unsigned u = 0, e = InternalFns.size(); u < e; ++u)
- if (Function *F = InternalFns[u])
- ToBeDeletedFunctions.insert(F);
-}
-
+void Attributor::identifyDeadInternalFunctions() {
+ // Identify dead internal functions and delete them. This happens outside
+ // the other fixpoint analysis as we might treat potentially dead functions
+ // as live to lower the number of iterations. If they happen to be dead, the
+ // below fixpoint loop will identify and eliminate them.
+ SmallVector<Function *, 8> InternalFns;
+ for (Function *F : Functions)
+ if (F->hasLocalLinkage())
+ InternalFns.push_back(F);
+
+ SmallPtrSet<Function *, 8> LiveInternalFns;
+ bool FoundLiveInternal = true;
+ while (FoundLiveInternal) {
+ FoundLiveInternal = false;
+ for (unsigned u = 0, e = InternalFns.size(); u < e; ++u) {
+ Function *F = InternalFns[u];
+ if (!F)
+ continue;
+
+ bool AllCallSitesKnown;
+ if (checkForAllCallSites(
+ [&](AbstractCallSite ACS) {
+ Function *Callee = ACS.getInstruction()->getFunction();
+ return ToBeDeletedFunctions.count(Callee) ||
+ (Functions.count(Callee) && Callee->hasLocalLinkage() &&
+ !LiveInternalFns.count(Callee));
+ },
+ *F, true, nullptr, AllCallSitesKnown)) {
+ continue;
+ }
+
+ LiveInternalFns.insert(F);
+ InternalFns[u] = nullptr;
+ FoundLiveInternal = true;
+ }
+ }
+
+ for (unsigned u = 0, e = InternalFns.size(); u < e; ++u)
+ if (Function *F = InternalFns[u])
+ ToBeDeletedFunctions.insert(F);
+}
+
ChangeStatus Attributor::cleanupIR() {
- TimeTraceScope TimeScope("Attributor::cleanupIR");
+ TimeTraceScope TimeScope("Attributor::cleanupIR");
// Delete stuff at the end to avoid invalid references and a nice order.
LLVM_DEBUG(dbgs() << "\n[Attributor] Delete at least "
<< ToBeDeletedFunctions.size() << " functions and "
@@ -1327,45 +1327,45 @@ ChangeStatus Attributor::cleanupIR() {
DetatchDeadBlocks(ToBeDeletedBBs, nullptr);
}
- identifyDeadInternalFunctions();
+ identifyDeadInternalFunctions();
// Rewrite the functions as requested during manifest.
ChangeStatus ManifestChange = rewriteFunctionSignatures(CGModifiedFunctions);
for (Function *Fn : CGModifiedFunctions)
- if (!ToBeDeletedFunctions.count(Fn))
- CGUpdater.reanalyzeFunction(*Fn);
+ if (!ToBeDeletedFunctions.count(Fn))
+ CGUpdater.reanalyzeFunction(*Fn);
- for (Function *Fn : ToBeDeletedFunctions) {
- if (!Functions.count(Fn))
- continue;
+ for (Function *Fn : ToBeDeletedFunctions) {
+ if (!Functions.count(Fn))
+ continue;
CGUpdater.removeFunction(*Fn);
- }
-
- if (!ToBeChangedUses.empty())
- ManifestChange = ChangeStatus::CHANGED;
-
- if (!ToBeChangedToUnreachableInsts.empty())
- ManifestChange = ChangeStatus::CHANGED;
-
- if (!ToBeDeletedFunctions.empty())
- ManifestChange = ChangeStatus::CHANGED;
-
- if (!ToBeDeletedBlocks.empty())
- ManifestChange = ChangeStatus::CHANGED;
-
- if (!ToBeDeletedInsts.empty())
- ManifestChange = ChangeStatus::CHANGED;
-
- if (!InvokeWithDeadSuccessor.empty())
- ManifestChange = ChangeStatus::CHANGED;
-
- if (!DeadInsts.empty())
- ManifestChange = ChangeStatus::CHANGED;
-
+ }
+
+ if (!ToBeChangedUses.empty())
+ ManifestChange = ChangeStatus::CHANGED;
+
+ if (!ToBeChangedToUnreachableInsts.empty())
+ ManifestChange = ChangeStatus::CHANGED;
+
+ if (!ToBeDeletedFunctions.empty())
+ ManifestChange = ChangeStatus::CHANGED;
+
+ if (!ToBeDeletedBlocks.empty())
+ ManifestChange = ChangeStatus::CHANGED;
+
+ if (!ToBeDeletedInsts.empty())
+ ManifestChange = ChangeStatus::CHANGED;
+
+ if (!InvokeWithDeadSuccessor.empty())
+ ManifestChange = ChangeStatus::CHANGED;
+
+ if (!DeadInsts.empty())
+ ManifestChange = ChangeStatus::CHANGED;
+
NumFnDeleted += ToBeDeletedFunctions.size();
- LLVM_DEBUG(dbgs() << "[Attributor] Deleted " << ToBeDeletedFunctions.size()
+ LLVM_DEBUG(dbgs() << "[Attributor] Deleted " << ToBeDeletedFunctions.size()
<< " functions after manifest.\n");
#ifdef EXPENSIVE_CHECKS
@@ -1380,37 +1380,37 @@ ChangeStatus Attributor::cleanupIR() {
}
ChangeStatus Attributor::run() {
- TimeTraceScope TimeScope("Attributor::run");
-
- Phase = AttributorPhase::UPDATE;
+ TimeTraceScope TimeScope("Attributor::run");
+
+ Phase = AttributorPhase::UPDATE;
runTillFixpoint();
-
- // dump graphs on demand
- if (DumpDepGraph)
- DG.dumpGraph();
-
- if (ViewDepGraph)
- DG.viewGraph();
-
- if (PrintDependencies)
- DG.print();
-
- Phase = AttributorPhase::MANIFEST;
+
+ // dump graphs on demand
+ if (DumpDepGraph)
+ DG.dumpGraph();
+
+ if (ViewDepGraph)
+ DG.viewGraph();
+
+ if (PrintDependencies)
+ DG.print();
+
+ Phase = AttributorPhase::MANIFEST;
ChangeStatus ManifestChange = manifestAttributes();
-
- Phase = AttributorPhase::CLEANUP;
+
+ Phase = AttributorPhase::CLEANUP;
ChangeStatus CleanupChange = cleanupIR();
-
+
return ManifestChange | CleanupChange;
}
ChangeStatus Attributor::updateAA(AbstractAttribute &AA) {
- TimeTraceScope TimeScope(
- AA.getName() + std::to_string(AA.getIRPosition().getPositionKind()) +
- "::updateAA");
- assert(Phase == AttributorPhase::UPDATE &&
- "We can update AA only in the update stage!");
-
+ TimeTraceScope TimeScope(
+ AA.getName() + std::to_string(AA.getIRPosition().getPositionKind()) +
+ "::updateAA");
+ assert(Phase == AttributorPhase::UPDATE &&
+ "We can update AA only in the update stage!");
+
// Use a new dependence vector for this update.
DependenceVector DV;
DependenceStack.push_back(&DV);
@@ -1438,7 +1438,7 @@ ChangeStatus Attributor::updateAA(AbstractAttribute &AA) {
return CS;
}
-void Attributor::createShallowWrapper(Function &F) {
+void Attributor::createShallowWrapper(Function &F) {
assert(!F.isDeclaration() && "Cannot create a wrapper around a declaration!");
Module &M = *F.getParent();
@@ -1471,7 +1471,7 @@ void Attributor::createShallowWrapper(Function &F) {
BasicBlock *EntryBB = BasicBlock::Create(Ctx, "entry", Wrapper);
SmallVector<Value *, 8> Args;
- Argument *FArgIt = F.arg_begin();
+ Argument *FArgIt = F.arg_begin();
for (Argument &Arg : Wrapper->args()) {
Args.push_back(&Arg);
Arg.setName((FArgIt++)->getName());
@@ -1482,59 +1482,59 @@ void Attributor::createShallowWrapper(Function &F) {
CI->addAttribute(AttributeList::FunctionIndex, Attribute::NoInline);
ReturnInst::Create(Ctx, CI->getType()->isVoidTy() ? nullptr : CI, EntryBB);
- NumFnShallowWrappersCreated++;
-}
-
-/// Make another copy of the function \p F such that the copied version has
-/// internal linkage afterwards and can be analysed. Then we replace all uses
-/// of the original function to the copied one
-///
-/// Only non-exactly defined functions that have `linkonce_odr` or `weak_odr`
-/// linkage can be internalized because these linkages guarantee that other
-/// definitions with the same name have the same semantics as this one
-///
-static Function *internalizeFunction(Function &F) {
- assert(AllowDeepWrapper && "Cannot create a copy if not allowed.");
- assert(!F.isDeclaration() && !F.hasExactDefinition() &&
- !GlobalValue::isInterposableLinkage(F.getLinkage()) &&
- "Trying to internalize function which cannot be internalized.");
-
- Module &M = *F.getParent();
- FunctionType *FnTy = F.getFunctionType();
-
- // create a copy of the current function
- Function *Copied = Function::Create(FnTy, F.getLinkage(), F.getAddressSpace(),
- F.getName() + ".internalized");
- ValueToValueMapTy VMap;
- auto *NewFArgIt = Copied->arg_begin();
- for (auto &Arg : F.args()) {
- auto ArgName = Arg.getName();
- NewFArgIt->setName(ArgName);
- VMap[&Arg] = &(*NewFArgIt++);
- }
- SmallVector<ReturnInst *, 8> Returns;
-
- // Copy the body of the original function to the new one
- CloneFunctionInto(Copied, &F, VMap, /* ModuleLevelChanges */ false, Returns);
-
- // Set the linakage and visibility late as CloneFunctionInto has some implicit
- // requirements.
- Copied->setVisibility(GlobalValue::DefaultVisibility);
- Copied->setLinkage(GlobalValue::PrivateLinkage);
-
- // Copy metadata
- SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
- F.getAllMetadata(MDs);
- for (auto MDIt : MDs)
- Copied->addMetadata(MDIt.first, *MDIt.second);
-
- M.getFunctionList().insert(F.getIterator(), Copied);
- F.replaceAllUsesWith(Copied);
- Copied->setDSOLocal(true);
-
- return Copied;
+ NumFnShallowWrappersCreated++;
}
+/// Make another copy of the function \p F such that the copied version has
+/// internal linkage afterwards and can be analysed. Then we replace all uses
+/// of the original function to the copied one
+///
+/// Only non-exactly defined functions that have `linkonce_odr` or `weak_odr`
+/// linkage can be internalized because these linkages guarantee that other
+/// definitions with the same name have the same semantics as this one
+///
+static Function *internalizeFunction(Function &F) {
+ assert(AllowDeepWrapper && "Cannot create a copy if not allowed.");
+ assert(!F.isDeclaration() && !F.hasExactDefinition() &&
+ !GlobalValue::isInterposableLinkage(F.getLinkage()) &&
+ "Trying to internalize function which cannot be internalized.");
+
+ Module &M = *F.getParent();
+ FunctionType *FnTy = F.getFunctionType();
+
+ // create a copy of the current function
+ Function *Copied = Function::Create(FnTy, F.getLinkage(), F.getAddressSpace(),
+ F.getName() + ".internalized");
+ ValueToValueMapTy VMap;
+ auto *NewFArgIt = Copied->arg_begin();
+ for (auto &Arg : F.args()) {
+ auto ArgName = Arg.getName();
+ NewFArgIt->setName(ArgName);
+ VMap[&Arg] = &(*NewFArgIt++);
+ }
+ SmallVector<ReturnInst *, 8> Returns;
+
+ // Copy the body of the original function to the new one
+ CloneFunctionInto(Copied, &F, VMap, /* ModuleLevelChanges */ false, Returns);
+
+ // Set the linakage and visibility late as CloneFunctionInto has some implicit
+ // requirements.
+ Copied->setVisibility(GlobalValue::DefaultVisibility);
+ Copied->setLinkage(GlobalValue::PrivateLinkage);
+
+ // Copy metadata
+ SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
+ F.getAllMetadata(MDs);
+ for (auto MDIt : MDs)
+ Copied->addMetadata(MDIt.first, *MDIt.second);
+
+ M.getFunctionList().insert(F.getIterator(), Copied);
+ F.replaceAllUsesWith(Copied);
+ Copied->setDSOLocal(true);
+
+ return Copied;
+}
+
bool Attributor::isValidFunctionSignatureRewrite(
Argument &Arg, ArrayRef<Type *> ReplacementTypes) {
@@ -1635,17 +1635,17 @@ bool Attributor::registerFunctionSignatureRewrite(
}
bool Attributor::shouldSeedAttribute(AbstractAttribute &AA) {
- bool Result = true;
-#ifndef NDEBUG
- if (SeedAllowList.size() != 0)
- Result =
- std::count(SeedAllowList.begin(), SeedAllowList.end(), AA.getName());
- Function *Fn = AA.getAnchorScope();
- if (FunctionSeedAllowList.size() != 0 && Fn)
- Result &= std::count(FunctionSeedAllowList.begin(),
- FunctionSeedAllowList.end(), Fn->getName());
-#endif
- return Result;
+ bool Result = true;
+#ifndef NDEBUG
+ if (SeedAllowList.size() != 0)
+ Result =
+ std::count(SeedAllowList.begin(), SeedAllowList.end(), AA.getName());
+ Function *Fn = AA.getAnchorScope();
+ if (FunctionSeedAllowList.size() != 0 && Fn)
+ Result &= std::count(FunctionSeedAllowList.begin(),
+ FunctionSeedAllowList.end(), Fn->getName());
+#endif
+ return Result;
}
ChangeStatus Attributor::rewriteFunctionSignatures(
@@ -1656,7 +1656,7 @@ ChangeStatus Attributor::rewriteFunctionSignatures(
Function *OldFn = It.getFirst();
// Deleted functions do not require rewrites.
- if (!Functions.count(OldFn) || ToBeDeletedFunctions.count(OldFn))
+ if (!Functions.count(OldFn) || ToBeDeletedFunctions.count(OldFn))
continue;
const SmallVectorImpl<std::unique_ptr<ArgumentReplacementInfo>> &ARIs =
@@ -1799,8 +1799,8 @@ ChangeStatus Attributor::rewriteFunctionSignatures(
assert(Success && "Assumed call site replacement to succeed!");
// Rewire the arguments.
- Argument *OldFnArgIt = OldFn->arg_begin();
- Argument *NewFnArgIt = NewFn->arg_begin();
+ Argument *OldFnArgIt = OldFn->arg_begin();
+ Argument *NewFnArgIt = NewFn->arg_begin();
for (unsigned OldArgNum = 0; OldArgNum < ARIs.size();
++OldArgNum, ++OldFnArgIt) {
if (const std::unique_ptr<ArgumentReplacementInfo> &ARI =
@@ -1909,10 +1909,10 @@ void InformationCache::initializeInformationCache(const Function &CF,
InlineableFunctions.insert(&F);
}
-AAResults *InformationCache::getAAResultsForFunction(const Function &F) {
- return AG.getAnalysis<AAManager>(F);
-}
-
+AAResults *InformationCache::getAAResultsForFunction(const Function &F) {
+ return AG.getAnalysis<AAManager>(F);
+}
+
InformationCache::FunctionInfo::~FunctionInfo() {
// The instruction vectors are allocated using a BumpPtrAllocator, we need to
// manually destroy them.
@@ -2013,9 +2013,9 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
// Every function might be simplified.
getOrCreateAAFor<AAValueSimplify>(RetPos);
- // Every returned value might be marked noundef.
- getOrCreateAAFor<AANoUndef>(RetPos);
-
+ // Every returned value might be marked noundef.
+ getOrCreateAAFor<AANoUndef>(RetPos);
+
if (ReturnType->isPointerTy()) {
// Every function with pointer return type might be marked align.
@@ -2042,9 +2042,9 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
// Every argument might be dead.
getOrCreateAAFor<AAIsDead>(ArgPos);
- // Every argument might be marked noundef.
- getOrCreateAAFor<AANoUndef>(ArgPos);
-
+ // Every argument might be marked noundef.
+ getOrCreateAAFor<AANoUndef>(ArgPos);
+
if (Arg.getType()->isPointerTy()) {
// Every argument with pointer type might be marked nonnull.
getOrCreateAAFor<AANonNull>(ArgPos);
@@ -2112,9 +2112,9 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
// Call site argument might be simplified.
getOrCreateAAFor<AAValueSimplify>(CBArgPos);
- // Every call site argument might be marked "noundef".
- getOrCreateAAFor<AANoUndef>(CBArgPos);
-
+ // Every call site argument might be marked "noundef".
+ getOrCreateAAFor<AANoUndef>(CBArgPos);
+
if (!CB.getArgOperand(I)->getType()->isPointerTy())
continue;
@@ -2200,8 +2200,8 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, IRPosition::Kind AP) {
raw_ostream &llvm::operator<<(raw_ostream &OS, const IRPosition &Pos) {
const Value &AV = Pos.getAssociatedValue();
return OS << "{" << Pos.getPositionKind() << ":" << AV.getName() << " ["
- << Pos.getAnchorValue().getName() << "@" << Pos.getCallSiteArgNo()
- << "]}";
+ << Pos.getAnchorValue().getName() << "@" << Pos.getCallSiteArgNo()
+ << "]}";
}
raw_ostream &llvm::operator<<(raw_ostream &OS, const IntegerRangeState &S) {
@@ -2223,49 +2223,49 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const AbstractAttribute &AA) {
return OS;
}
-raw_ostream &llvm::operator<<(raw_ostream &OS,
- const PotentialConstantIntValuesState &S) {
- OS << "set-state(< {";
- if (!S.isValidState())
- OS << "full-set";
- else {
- for (auto &it : S.getAssumedSet())
- OS << it << ", ";
- if (S.undefIsContained())
- OS << "undef ";
- }
- OS << "} >)";
-
- return OS;
-}
-
+raw_ostream &llvm::operator<<(raw_ostream &OS,
+ const PotentialConstantIntValuesState &S) {
+ OS << "set-state(< {";
+ if (!S.isValidState())
+ OS << "full-set";
+ else {
+ for (auto &it : S.getAssumedSet())
+ OS << it << ", ";
+ if (S.undefIsContained())
+ OS << "undef ";
+ }
+ OS << "} >)";
+
+ return OS;
+}
+
void AbstractAttribute::print(raw_ostream &OS) const {
- OS << "[";
- OS << getName();
- OS << "] for CtxI ";
-
- if (auto *I = getCtxI()) {
- OS << "'";
- I->print(OS);
- OS << "'";
- } else
- OS << "<<null inst>>";
-
- OS << " at position " << getIRPosition() << " with state " << getAsStr()
- << '\n';
-}
-
-void AbstractAttribute::printWithDeps(raw_ostream &OS) const {
- print(OS);
-
- for (const auto &DepAA : Deps) {
- auto *AA = DepAA.getPointer();
- OS << " updates ";
- AA->print(OS);
- }
-
- OS << '\n';
+ OS << "[";
+ OS << getName();
+ OS << "] for CtxI ";
+
+ if (auto *I = getCtxI()) {
+ OS << "'";
+ I->print(OS);
+ OS << "'";
+ } else
+ OS << "<<null inst>>";
+
+ OS << " at position " << getIRPosition() << " with state " << getAsStr()
+ << '\n';
}
+
+void AbstractAttribute::printWithDeps(raw_ostream &OS) const {
+ print(OS);
+
+ for (const auto &DepAA : Deps) {
+ auto *AA = DepAA.getPointer();
+ OS << " updates ";
+ AA->print(OS);
+ }
+
+ OS << '\n';
+}
///}
/// ----------------------------------------------------------------------------
@@ -2290,32 +2290,32 @@ static bool runAttributorOnFunctions(InformationCache &InfoCache,
if (AllowShallowWrappers)
for (Function *F : Functions)
if (!A.isFunctionIPOAmendable(*F))
- Attributor::createShallowWrapper(*F);
-
- // Internalize non-exact functions
- // TODO: for now we eagerly internalize functions without calculating the
- // cost, we need a cost interface to determine whether internalizing
- // a function is "benefitial"
- if (AllowDeepWrapper) {
- unsigned FunSize = Functions.size();
- for (unsigned u = 0; u < FunSize; u++) {
- Function *F = Functions[u];
- if (!F->isDeclaration() && !F->isDefinitionExact() && F->getNumUses() &&
- !GlobalValue::isInterposableLinkage(F->getLinkage())) {
- Function *NewF = internalizeFunction(*F);
- Functions.insert(NewF);
-
- // Update call graph
- CGUpdater.replaceFunctionWith(*F, *NewF);
- for (const Use &U : NewF->uses())
- if (CallBase *CB = dyn_cast<CallBase>(U.getUser())) {
- auto *CallerF = CB->getCaller();
- CGUpdater.reanalyzeFunction(*CallerF);
- }
- }
- }
- }
-
+ Attributor::createShallowWrapper(*F);
+
+ // Internalize non-exact functions
+ // TODO: for now we eagerly internalize functions without calculating the
+ // cost, we need a cost interface to determine whether internalizing
+ // a function is "benefitial"
+ if (AllowDeepWrapper) {
+ unsigned FunSize = Functions.size();
+ for (unsigned u = 0; u < FunSize; u++) {
+ Function *F = Functions[u];
+ if (!F->isDeclaration() && !F->isDefinitionExact() && F->getNumUses() &&
+ !GlobalValue::isInterposableLinkage(F->getLinkage())) {
+ Function *NewF = internalizeFunction(*F);
+ Functions.insert(NewF);
+
+ // Update call graph
+ CGUpdater.replaceFunctionWith(*F, *NewF);
+ for (const Use &U : NewF->uses())
+ if (CallBase *CB = dyn_cast<CallBase>(U.getUser())) {
+ auto *CallerF = CB->getCaller();
+ CGUpdater.reanalyzeFunction(*CallerF);
+ }
+ }
+ }
+ }
+
for (Function *F : Functions) {
if (F->hasExactDefinition())
NumFnWithExactDefinition++;
@@ -2323,8 +2323,8 @@ static bool runAttributorOnFunctions(InformationCache &InfoCache,
NumFnWithoutExactDefinition++;
// We look at internal functions only on-demand but if any use is not a
- // direct call or outside the current set of analyzed functions, we have
- // to do it eagerly.
+ // direct call or outside the current set of analyzed functions, we have
+ // to do it eagerly.
if (F->hasLocalLinkage()) {
if (llvm::all_of(F->uses(), [&Functions](const Use &U) {
const auto *CB = dyn_cast<CallBase>(U.getUser());
@@ -2340,41 +2340,41 @@ static bool runAttributorOnFunctions(InformationCache &InfoCache,
}
ChangeStatus Changed = A.run();
-
+
LLVM_DEBUG(dbgs() << "[Attributor] Done with " << Functions.size()
<< " functions, result: " << Changed << ".\n");
return Changed == ChangeStatus::CHANGED;
}
-void AADepGraph::viewGraph() { llvm::ViewGraph(this, "Dependency Graph"); }
-
-void AADepGraph::dumpGraph() {
- static std::atomic<int> CallTimes;
- std::string Prefix;
-
- if (!DepGraphDotFileNamePrefix.empty())
- Prefix = DepGraphDotFileNamePrefix;
- else
- Prefix = "dep_graph";
- std::string Filename =
- Prefix + "_" + std::to_string(CallTimes.load()) + ".dot";
-
- outs() << "Dependency graph dump to " << Filename << ".\n";
-
- std::error_code EC;
-
- raw_fd_ostream File(Filename, EC, sys::fs::OF_Text);
- if (!EC)
- llvm::WriteGraph(File, this);
-
- CallTimes++;
-}
-
-void AADepGraph::print() {
- for (auto DepAA : SyntheticRoot.Deps)
- cast<AbstractAttribute>(DepAA.getPointer())->printWithDeps(outs());
-}
-
+void AADepGraph::viewGraph() { llvm::ViewGraph(this, "Dependency Graph"); }
+
+void AADepGraph::dumpGraph() {
+ static std::atomic<int> CallTimes;
+ std::string Prefix;
+
+ if (!DepGraphDotFileNamePrefix.empty())
+ Prefix = DepGraphDotFileNamePrefix;
+ else
+ Prefix = "dep_graph";
+ std::string Filename =
+ Prefix + "_" + std::to_string(CallTimes.load()) + ".dot";
+
+ outs() << "Dependency graph dump to " << Filename << ".\n";
+
+ std::error_code EC;
+
+ raw_fd_ostream File(Filename, EC, sys::fs::OF_Text);
+ if (!EC)
+ llvm::WriteGraph(File, this);
+
+ CallTimes++;
+}
+
+void AADepGraph::print() {
+ for (auto DepAA : SyntheticRoot.Deps)
+ cast<AbstractAttribute>(DepAA.getPointer())->printWithDeps(outs());
+}
+
PreservedAnalyses AttributorPass::run(Module &M, ModuleAnalysisManager &AM) {
FunctionAnalysisManager &FAM =
AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
@@ -2416,58 +2416,58 @@ PreservedAnalyses AttributorCGSCCPass::run(LazyCallGraph::SCC &C,
InformationCache InfoCache(M, AG, Allocator, /* CGSCC */ &Functions);
if (runAttributorOnFunctions(InfoCache, Functions, AG, CGUpdater)) {
// FIXME: Think about passes we will preserve and add them here.
- PreservedAnalyses PA;
- PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
- return PA;
+ PreservedAnalyses PA;
+ PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
+ return PA;
}
return PreservedAnalyses::all();
}
-namespace llvm {
-
-template <> struct GraphTraits<AADepGraphNode *> {
- using NodeRef = AADepGraphNode *;
- using DepTy = PointerIntPair<AADepGraphNode *, 1>;
- using EdgeRef = PointerIntPair<AADepGraphNode *, 1>;
-
- static NodeRef getEntryNode(AADepGraphNode *DGN) { return DGN; }
- static NodeRef DepGetVal(DepTy &DT) { return DT.getPointer(); }
-
- using ChildIteratorType =
- mapped_iterator<TinyPtrVector<DepTy>::iterator, decltype(&DepGetVal)>;
- using ChildEdgeIteratorType = TinyPtrVector<DepTy>::iterator;
-
- static ChildIteratorType child_begin(NodeRef N) { return N->child_begin(); }
-
- static ChildIteratorType child_end(NodeRef N) { return N->child_end(); }
-};
-
-template <>
-struct GraphTraits<AADepGraph *> : public GraphTraits<AADepGraphNode *> {
- static NodeRef getEntryNode(AADepGraph *DG) { return DG->GetEntryNode(); }
-
- using nodes_iterator =
- mapped_iterator<TinyPtrVector<DepTy>::iterator, decltype(&DepGetVal)>;
-
- static nodes_iterator nodes_begin(AADepGraph *DG) { return DG->begin(); }
-
- static nodes_iterator nodes_end(AADepGraph *DG) { return DG->end(); }
-};
-
-template <> struct DOTGraphTraits<AADepGraph *> : public DefaultDOTGraphTraits {
- DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
-
- static std::string getNodeLabel(const AADepGraphNode *Node,
- const AADepGraph *DG) {
- std::string AAString;
- raw_string_ostream O(AAString);
- Node->print(O);
- return AAString;
- }
-};
-
-} // end namespace llvm
-
+namespace llvm {
+
+template <> struct GraphTraits<AADepGraphNode *> {
+ using NodeRef = AADepGraphNode *;
+ using DepTy = PointerIntPair<AADepGraphNode *, 1>;
+ using EdgeRef = PointerIntPair<AADepGraphNode *, 1>;
+
+ static NodeRef getEntryNode(AADepGraphNode *DGN) { return DGN; }
+ static NodeRef DepGetVal(DepTy &DT) { return DT.getPointer(); }
+
+ using ChildIteratorType =
+ mapped_iterator<TinyPtrVector<DepTy>::iterator, decltype(&DepGetVal)>;
+ using ChildEdgeIteratorType = TinyPtrVector<DepTy>::iterator;
+
+ static ChildIteratorType child_begin(NodeRef N) { return N->child_begin(); }
+
+ static ChildIteratorType child_end(NodeRef N) { return N->child_end(); }
+};
+
+template <>
+struct GraphTraits<AADepGraph *> : public GraphTraits<AADepGraphNode *> {
+ static NodeRef getEntryNode(AADepGraph *DG) { return DG->GetEntryNode(); }
+
+ using nodes_iterator =
+ mapped_iterator<TinyPtrVector<DepTy>::iterator, decltype(&DepGetVal)>;
+
+ static nodes_iterator nodes_begin(AADepGraph *DG) { return DG->begin(); }
+
+ static nodes_iterator nodes_end(AADepGraph *DG) { return DG->end(); }
+};
+
+template <> struct DOTGraphTraits<AADepGraph *> : public DefaultDOTGraphTraits {
+ DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
+
+ static std::string getNodeLabel(const AADepGraphNode *Node,
+ const AADepGraph *DG) {
+ std::string AAString;
+ raw_string_ostream O(AAString);
+ Node->print(O);
+ return AAString;
+ }
+};
+
+} // end namespace llvm
+
namespace {
struct AttributorLegacyPass : public ModulePass {
@@ -2520,7 +2520,7 @@ struct AttributorCGSCCLegacyPass : public CallGraphSCCPass {
AnalysisGetter AG;
CallGraph &CG = const_cast<CallGraph &>(SCC.getCallGraph());
- CallGraphUpdater CGUpdater;
+ CallGraphUpdater CGUpdater;
CGUpdater.initialize(CG, SCC);
Module &M = *Functions.back()->getParent();
BumpPtrAllocator Allocator;
diff --git a/contrib/libs/llvm12/lib/Transforms/IPO/AttributorAttributes.cpp b/contrib/libs/llvm12/lib/Transforms/IPO/AttributorAttributes.cpp
index d6127a8df6..f8bb9cc5b7 100644
--- a/contrib/libs/llvm12/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -13,20 +13,20 @@
#include "llvm/Transforms/IPO/Attributor.h"
-#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumeBundleQueries.h"
-#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/LazyValueInfo.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/NoFolder.h"
#include "llvm/Support/CommandLine.h"
@@ -47,16 +47,16 @@ static cl::opt<bool> ManifestInternal(
static cl::opt<int> MaxHeapToStackSize("max-heap-to-stack-size", cl::init(128),
cl::Hidden);
-template <>
-unsigned llvm::PotentialConstantIntValuesState::MaxPotentialValues = 0;
-
-static cl::opt<unsigned, true> MaxPotentialValues(
- "attributor-max-potential-values", cl::Hidden,
- cl::desc("Maximum number of potential values to be "
- "tracked for each position."),
- cl::location(llvm::PotentialConstantIntValuesState::MaxPotentialValues),
- cl::init(7));
-
+template <>
+unsigned llvm::PotentialConstantIntValuesState::MaxPotentialValues = 0;
+
+static cl::opt<unsigned, true> MaxPotentialValues(
+ "attributor-max-potential-values", cl::Hidden,
+ cl::desc("Maximum number of potential values to be "
+ "tracked for each position."),
+ cl::location(llvm::PotentialConstantIntValuesState::MaxPotentialValues),
+ cl::init(7));
+
STATISTIC(NumAAs, "Number of abstract attributes created");
// Some helper macros to deal with statistics tracking.
@@ -132,8 +132,8 @@ PIPE_OPERATOR(AAMemoryLocation)
PIPE_OPERATOR(AAValueConstantRange)
PIPE_OPERATOR(AAPrivatizablePtr)
PIPE_OPERATOR(AAUndefinedBehavior)
-PIPE_OPERATOR(AAPotentialValues)
-PIPE_OPERATOR(AANoUndef)
+PIPE_OPERATOR(AAPotentialValues)
+PIPE_OPERATOR(AANoUndef)
#undef PIPE_OPERATOR
} // namespace llvm
@@ -452,7 +452,7 @@ static void clampReturnedValueStates(Attributor &A, const AAType &QueryingAA,
const AAType &AA = A.getAAFor<AAType>(QueryingAA, RVPos);
LLVM_DEBUG(dbgs() << "[Attributor] RV: " << RV << " AA: " << AA.getAsStr()
<< " @ " << RVPos << "\n");
- const StateType &AAS = AA.getState();
+ const StateType &AAS = AA.getState();
if (T.hasValue())
*T &= AAS;
else
@@ -502,7 +502,7 @@ static void clampCallSiteArgumentStates(Attributor &A, const AAType &QueryingAA,
Optional<StateType> T;
// The argument number which is also the call site argument number.
- unsigned ArgNo = QueryingAA.getIRPosition().getCallSiteArgNo();
+ unsigned ArgNo = QueryingAA.getIRPosition().getCallSiteArgNo();
auto CallSiteCheck = [&](AbstractCallSite ACS) {
const IRPosition &ACSArgPos = IRPosition::callsite_argument(ACS, ArgNo);
@@ -514,7 +514,7 @@ static void clampCallSiteArgumentStates(Attributor &A, const AAType &QueryingAA,
const AAType &AA = A.getAAFor<AAType>(QueryingAA, ACSArgPos);
LLVM_DEBUG(dbgs() << "[Attributor] ACS: " << *ACS.getInstruction()
<< " AA: " << AA.getAsStr() << " @" << ACSArgPos << "\n");
- const StateType &AAS = AA.getState();
+ const StateType &AAS = AA.getState();
if (T.hasValue())
*T &= AAS;
else
@@ -571,7 +571,7 @@ struct AACallSiteReturnedFromReturned : public BaseType {
IRPosition FnPos = IRPosition::returned(*AssociatedFunction);
const AAType &AA = A.getAAFor<AAType>(*this, FnPos);
- return clampStateAndIndicateChange(S, AA.getState());
+ return clampStateAndIndicateChange(S, AA.getState());
}
};
@@ -738,7 +738,7 @@ struct AANoUnwindCallSite final : AANoUnwindImpl {
void initialize(Attributor &A) override {
AANoUnwindImpl::initialize(A);
Function *F = getAssociatedFunction();
- if (!F || F->isDeclaration())
+ if (!F || F->isDeclaration())
indicatePessimisticFixpoint();
}
@@ -751,7 +751,7 @@ struct AANoUnwindCallSite final : AANoUnwindImpl {
Function *F = getAssociatedFunction();
const IRPosition &FnPos = IRPosition::function(*F);
auto &FnAA = A.getAAFor<AANoUnwind>(*this, FnPos);
- return clampStateAndIndicateChange(getState(), FnAA.getState());
+ return clampStateAndIndicateChange(getState(), FnAA.getState());
}
/// See AbstractAttribute::trackStatistics()
@@ -797,7 +797,7 @@ public:
ReturnedValues.clear();
Function *F = getAssociatedFunction();
- if (!F || F->isDeclaration()) {
+ if (!F || F->isDeclaration()) {
indicatePessimisticFixpoint();
return;
}
@@ -1066,10 +1066,10 @@ ChangeStatus AAReturnedValuesImpl::updateImpl(Attributor &A) {
// map, NewRVsMap.
decltype(ReturnedValues) NewRVsMap;
- auto HandleReturnValue = [&](Value *RV,
- SmallSetVector<ReturnInst *, 4> &RIs) {
- LLVM_DEBUG(dbgs() << "[AAReturnedValues] Returned value: " << *RV << " by #"
- << RIs.size() << " RIs\n");
+ auto HandleReturnValue = [&](Value *RV,
+ SmallSetVector<ReturnInst *, 4> &RIs) {
+ LLVM_DEBUG(dbgs() << "[AAReturnedValues] Returned value: " << *RV << " by #"
+ << RIs.size() << " RIs\n");
CallBase *CB = dyn_cast<CallBase>(RV);
if (!CB || UnresolvedCalls.count(CB))
return;
@@ -1143,13 +1143,13 @@ ChangeStatus AAReturnedValuesImpl::updateImpl(Attributor &A) {
RVState RVS({NewRVsMap, Unused, RetValAAIt.second});
VisitReturnedValue(*CB->getArgOperand(Arg->getArgNo()), RVS, CB);
continue;
- }
- if (isa<CallBase>(RetVal)) {
+ }
+ if (isa<CallBase>(RetVal)) {
// Call sites are resolved by the callee attribute over time, no need to
// do anything for us.
continue;
- }
- if (isa<Constant>(RetVal)) {
+ }
+ if (isa<Constant>(RetVal)) {
// Constants are valid everywhere, we can simply take them.
NewRVsMap[RetVal].insert(RIs.begin(), RIs.end());
continue;
@@ -1390,7 +1390,7 @@ struct AANoSyncCallSite final : AANoSyncImpl {
void initialize(Attributor &A) override {
AANoSyncImpl::initialize(A);
Function *F = getAssociatedFunction();
- if (!F || F->isDeclaration())
+ if (!F || F->isDeclaration())
indicatePessimisticFixpoint();
}
@@ -1403,7 +1403,7 @@ struct AANoSyncCallSite final : AANoSyncImpl {
Function *F = getAssociatedFunction();
const IRPosition &FnPos = IRPosition::function(*F);
auto &FnAA = A.getAAFor<AANoSync>(*this, FnPos);
- return clampStateAndIndicateChange(getState(), FnAA.getState());
+ return clampStateAndIndicateChange(getState(), FnAA.getState());
}
/// See AbstractAttribute::trackStatistics()
@@ -1455,7 +1455,7 @@ struct AANoFreeCallSite final : AANoFreeImpl {
void initialize(Attributor &A) override {
AANoFreeImpl::initialize(A);
Function *F = getAssociatedFunction();
- if (!F || F->isDeclaration())
+ if (!F || F->isDeclaration())
indicatePessimisticFixpoint();
}
@@ -1468,7 +1468,7 @@ struct AANoFreeCallSite final : AANoFreeImpl {
Function *F = getAssociatedFunction();
const IRPosition &FnPos = IRPosition::function(*F);
auto &FnAA = A.getAAFor<AANoFree>(*this, FnPos);
- return clampStateAndIndicateChange(getState(), FnAA.getState());
+ return clampStateAndIndicateChange(getState(), FnAA.getState());
}
/// See AbstractAttribute::trackStatistics()
@@ -1550,7 +1550,7 @@ struct AANoFreeCallSiteArgument final : AANoFreeFloating {
return indicatePessimisticFixpoint();
const IRPosition &ArgPos = IRPosition::argument(*Arg);
auto &ArgAA = A.getAAFor<AANoFree>(*this, ArgPos);
- return clampStateAndIndicateChange(getState(), ArgAA.getState());
+ return clampStateAndIndicateChange(getState(), ArgAA.getState());
}
/// See AbstractAttribute::trackStatistics()
@@ -1686,33 +1686,33 @@ struct AANonNullImpl : AANonNull {
Value &V = getAssociatedValue();
if (!NullIsDefined &&
hasAttr({Attribute::NonNull, Attribute::Dereferenceable},
- /* IgnoreSubsumingPositions */ false, &A)) {
+ /* IgnoreSubsumingPositions */ false, &A)) {
indicateOptimisticFixpoint();
- return;
- }
-
- if (isa<ConstantPointerNull>(V)) {
+ return;
+ }
+
+ if (isa<ConstantPointerNull>(V)) {
indicatePessimisticFixpoint();
- return;
- }
-
- AANonNull::initialize(A);
+ return;
+ }
+ AANonNull::initialize(A);
+
bool CanBeNull = true;
- if (V.getPointerDereferenceableBytes(A.getDataLayout(), CanBeNull)) {
- if (!CanBeNull) {
+ if (V.getPointerDereferenceableBytes(A.getDataLayout(), CanBeNull)) {
+ if (!CanBeNull) {
indicateOptimisticFixpoint();
- return;
- }
- }
+ return;
+ }
+ }
- if (isa<GlobalValue>(&getAssociatedValue())) {
- indicatePessimisticFixpoint();
- return;
- }
-
- if (Instruction *CtxI = getCtxI())
- followUsesInMBEC(*this, A, getState(), *CtxI);
+ if (isa<GlobalValue>(&getAssociatedValue())) {
+ indicatePessimisticFixpoint();
+ return;
+ }
+
+ if (Instruction *CtxI = getCtxI())
+ followUsesInMBEC(*this, A, getState(), *CtxI);
}
/// See followUsesInMBEC
@@ -1761,7 +1761,7 @@ struct AANonNullFloating : public AANonNullImpl {
T.indicatePessimisticFixpoint();
} else {
// Use abstract attribute information.
- const AANonNull::StateType &NS = AA.getState();
+ const AANonNull::StateType &NS = AA.getState();
T ^= NS;
}
return T.isValidState();
@@ -1781,15 +1781,15 @@ struct AANonNullFloating : public AANonNullImpl {
/// NonNull attribute for function return value.
struct AANonNullReturned final
- : AAReturnedFromReturnedValues<AANonNull, AANonNull> {
+ : AAReturnedFromReturnedValues<AANonNull, AANonNull> {
AANonNullReturned(const IRPosition &IRP, Attributor &A)
- : AAReturnedFromReturnedValues<AANonNull, AANonNull>(IRP, A) {}
-
- /// See AbstractAttribute::getAsStr().
- const std::string getAsStr() const override {
- return getAssumed() ? "nonnull" : "may-null";
- }
+ : AAReturnedFromReturnedValues<AANonNull, AANonNull>(IRP, A) {}
+ /// See AbstractAttribute::getAsStr().
+ const std::string getAsStr() const override {
+ return getAssumed() ? "nonnull" : "may-null";
+ }
+
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(nonnull) }
};
@@ -1902,7 +1902,7 @@ struct AANoRecurseCallSite final : AANoRecurseImpl {
void initialize(Attributor &A) override {
AANoRecurseImpl::initialize(A);
Function *F = getAssociatedFunction();
- if (!F || F->isDeclaration())
+ if (!F || F->isDeclaration())
indicatePessimisticFixpoint();
}
@@ -1915,7 +1915,7 @@ struct AANoRecurseCallSite final : AANoRecurseImpl {
Function *F = getAssociatedFunction();
const IRPosition &FnPos = IRPosition::function(*F);
auto &FnAA = A.getAAFor<AANoRecurse>(*this, FnPos);
- return clampStateAndIndicateChange(getState(), FnAA.getState());
+ return clampStateAndIndicateChange(getState(), FnAA.getState());
}
/// See AbstractAttribute::trackStatistics()
@@ -2000,98 +2000,98 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior {
return true;
};
- auto InspectCallSiteForUB = [&](Instruction &I) {
- // Check whether a callsite always cause UB or not
-
- // Skip instructions that are already saved.
- if (AssumedNoUBInsts.count(&I) || KnownUBInsts.count(&I))
- return true;
-
- // Check nonnull and noundef argument attribute violation for each
- // callsite.
- CallBase &CB = cast<CallBase>(I);
- Function *Callee = CB.getCalledFunction();
- if (!Callee)
- return true;
- for (unsigned idx = 0; idx < CB.getNumArgOperands(); idx++) {
- // If current argument is known to be simplified to null pointer and the
- // corresponding argument position is known to have nonnull attribute,
- // the argument is poison. Furthermore, if the argument is poison and
- // the position is known to have noundef attriubte, this callsite is
- // considered UB.
- if (idx >= Callee->arg_size())
- break;
- Value *ArgVal = CB.getArgOperand(idx);
- if (!ArgVal)
- continue;
- // Here, we handle three cases.
- // (1) Not having a value means it is dead. (we can replace the value
- // with undef)
- // (2) Simplified to undef. The argument violate noundef attriubte.
- // (3) Simplified to null pointer where known to be nonnull.
- // The argument is a poison value and violate noundef attribute.
- IRPosition CalleeArgumentIRP = IRPosition::callsite_argument(CB, idx);
- auto &NoUndefAA = A.getAAFor<AANoUndef>(*this, CalleeArgumentIRP,
- /* TrackDependence */ false);
- if (!NoUndefAA.isKnownNoUndef())
- continue;
- auto &ValueSimplifyAA = A.getAAFor<AAValueSimplify>(
- *this, IRPosition::value(*ArgVal), /* TrackDependence */ false);
- if (!ValueSimplifyAA.isKnown())
- continue;
- Optional<Value *> SimplifiedVal =
- ValueSimplifyAA.getAssumedSimplifiedValue(A);
- if (!SimplifiedVal.hasValue() ||
- isa<UndefValue>(*SimplifiedVal.getValue())) {
- KnownUBInsts.insert(&I);
- continue;
- }
- if (!ArgVal->getType()->isPointerTy() ||
- !isa<ConstantPointerNull>(*SimplifiedVal.getValue()))
- continue;
- auto &NonNullAA = A.getAAFor<AANonNull>(*this, CalleeArgumentIRP,
- /* TrackDependence */ false);
- if (NonNullAA.isKnownNonNull())
- KnownUBInsts.insert(&I);
- }
- return true;
- };
-
- auto InspectReturnInstForUB =
- [&](Value &V, const SmallSetVector<ReturnInst *, 4> RetInsts) {
- // Check if a return instruction always cause UB or not
- // Note: It is guaranteed that the returned position of the anchor
- // scope has noundef attribute when this is called.
- // We also ensure the return position is not "assumed dead"
- // because the returned value was then potentially simplified to
- // `undef` in AAReturnedValues without removing the `noundef`
- // attribute yet.
-
- // When the returned position has noundef attriubte, UB occur in the
- // following cases.
- // (1) Returned value is known to be undef.
- // (2) The value is known to be a null pointer and the returned
- // position has nonnull attribute (because the returned value is
- // poison).
- bool FoundUB = false;
- if (isa<UndefValue>(V)) {
- FoundUB = true;
- } else {
- if (isa<ConstantPointerNull>(V)) {
- auto &NonNullAA = A.getAAFor<AANonNull>(
- *this, IRPosition::returned(*getAnchorScope()),
- /* TrackDependence */ false);
- if (NonNullAA.isKnownNonNull())
- FoundUB = true;
- }
- }
-
- if (FoundUB)
- for (ReturnInst *RI : RetInsts)
- KnownUBInsts.insert(RI);
- return true;
- };
-
+ auto InspectCallSiteForUB = [&](Instruction &I) {
+ // Check whether a callsite always cause UB or not
+
+ // Skip instructions that are already saved.
+ if (AssumedNoUBInsts.count(&I) || KnownUBInsts.count(&I))
+ return true;
+
+ // Check nonnull and noundef argument attribute violation for each
+ // callsite.
+ CallBase &CB = cast<CallBase>(I);
+ Function *Callee = CB.getCalledFunction();
+ if (!Callee)
+ return true;
+ for (unsigned idx = 0; idx < CB.getNumArgOperands(); idx++) {
+ // If current argument is known to be simplified to null pointer and the
+ // corresponding argument position is known to have nonnull attribute,
+ // the argument is poison. Furthermore, if the argument is poison and
+ // the position is known to have noundef attriubte, this callsite is
+ // considered UB.
+ if (idx >= Callee->arg_size())
+ break;
+ Value *ArgVal = CB.getArgOperand(idx);
+ if (!ArgVal)
+ continue;
+ // Here, we handle three cases.
+ // (1) Not having a value means it is dead. (we can replace the value
+ // with undef)
+ // (2) Simplified to undef. The argument violate noundef attriubte.
+ // (3) Simplified to null pointer where known to be nonnull.
+ // The argument is a poison value and violate noundef attribute.
+ IRPosition CalleeArgumentIRP = IRPosition::callsite_argument(CB, idx);
+ auto &NoUndefAA = A.getAAFor<AANoUndef>(*this, CalleeArgumentIRP,
+ /* TrackDependence */ false);
+ if (!NoUndefAA.isKnownNoUndef())
+ continue;
+ auto &ValueSimplifyAA = A.getAAFor<AAValueSimplify>(
+ *this, IRPosition::value(*ArgVal), /* TrackDependence */ false);
+ if (!ValueSimplifyAA.isKnown())
+ continue;
+ Optional<Value *> SimplifiedVal =
+ ValueSimplifyAA.getAssumedSimplifiedValue(A);
+ if (!SimplifiedVal.hasValue() ||
+ isa<UndefValue>(*SimplifiedVal.getValue())) {
+ KnownUBInsts.insert(&I);
+ continue;
+ }
+ if (!ArgVal->getType()->isPointerTy() ||
+ !isa<ConstantPointerNull>(*SimplifiedVal.getValue()))
+ continue;
+ auto &NonNullAA = A.getAAFor<AANonNull>(*this, CalleeArgumentIRP,
+ /* TrackDependence */ false);
+ if (NonNullAA.isKnownNonNull())
+ KnownUBInsts.insert(&I);
+ }
+ return true;
+ };
+
+ auto InspectReturnInstForUB =
+ [&](Value &V, const SmallSetVector<ReturnInst *, 4> RetInsts) {
+ // Check if a return instruction always cause UB or not
+ // Note: It is guaranteed that the returned position of the anchor
+ // scope has noundef attribute when this is called.
+ // We also ensure the return position is not "assumed dead"
+ // because the returned value was then potentially simplified to
+ // `undef` in AAReturnedValues without removing the `noundef`
+ // attribute yet.
+
+ // When the returned position has noundef attriubte, UB occur in the
+ // following cases.
+ // (1) Returned value is known to be undef.
+ // (2) The value is known to be a null pointer and the returned
+ // position has nonnull attribute (because the returned value is
+ // poison).
+ bool FoundUB = false;
+ if (isa<UndefValue>(V)) {
+ FoundUB = true;
+ } else {
+ if (isa<ConstantPointerNull>(V)) {
+ auto &NonNullAA = A.getAAFor<AANonNull>(
+ *this, IRPosition::returned(*getAnchorScope()),
+ /* TrackDependence */ false);
+ if (NonNullAA.isKnownNonNull())
+ FoundUB = true;
+ }
+ }
+
+ if (FoundUB)
+ for (ReturnInst *RI : RetInsts)
+ KnownUBInsts.insert(RI);
+ return true;
+ };
+
A.checkForAllInstructions(InspectMemAccessInstForUB, *this,
{Instruction::Load, Instruction::Store,
Instruction::AtomicCmpXchg,
@@ -2099,22 +2099,22 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior {
/* CheckBBLivenessOnly */ true);
A.checkForAllInstructions(InspectBrInstForUB, *this, {Instruction::Br},
/* CheckBBLivenessOnly */ true);
- A.checkForAllCallLikeInstructions(InspectCallSiteForUB, *this);
-
- // If the returned position of the anchor scope has noundef attriubte, check
- // all returned instructions.
- if (!getAnchorScope()->getReturnType()->isVoidTy()) {
- const IRPosition &ReturnIRP = IRPosition::returned(*getAnchorScope());
- if (!A.isAssumedDead(ReturnIRP, this, nullptr)) {
- auto &RetPosNoUndefAA =
- A.getAAFor<AANoUndef>(*this, ReturnIRP,
- /* TrackDependence */ false);
- if (RetPosNoUndefAA.isKnownNoUndef())
- A.checkForAllReturnedValuesAndReturnInsts(InspectReturnInstForUB,
- *this);
- }
- }
-
+ A.checkForAllCallLikeInstructions(InspectCallSiteForUB, *this);
+
+ // If the returned position of the anchor scope has noundef attriubte, check
+ // all returned instructions.
+ if (!getAnchorScope()->getReturnType()->isVoidTy()) {
+ const IRPosition &ReturnIRP = IRPosition::returned(*getAnchorScope());
+ if (!A.isAssumedDead(ReturnIRP, this, nullptr)) {
+ auto &RetPosNoUndefAA =
+ A.getAAFor<AANoUndef>(*this, ReturnIRP,
+ /* TrackDependence */ false);
+ if (RetPosNoUndefAA.isKnownNoUndef())
+ A.checkForAllReturnedValuesAndReturnInsts(InspectReturnInstForUB,
+ *this);
+ }
+ }
+
if (NoUBPrevSize != AssumedNoUBInsts.size() ||
UBPrevSize != KnownUBInsts.size())
return ChangeStatus::CHANGED;
@@ -2282,7 +2282,7 @@ struct AAWillReturnImpl : public AAWillReturn {
AAWillReturn::initialize(A);
Function *F = getAnchorScope();
- if (!F || F->isDeclaration() || mayContainUnboundedCycle(*F, A))
+ if (!F || F->isDeclaration() || mayContainUnboundedCycle(*F, A))
indicatePessimisticFixpoint();
}
@@ -2326,9 +2326,9 @@ struct AAWillReturnCallSite final : AAWillReturnImpl {
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
- AAWillReturn::initialize(A);
+ AAWillReturn::initialize(A);
Function *F = getAssociatedFunction();
- if (!F || !A.isFunctionIPOAmendable(*F))
+ if (!F || !A.isFunctionIPOAmendable(*F))
indicatePessimisticFixpoint();
}
@@ -2341,7 +2341,7 @@ struct AAWillReturnCallSite final : AAWillReturnImpl {
Function *F = getAssociatedFunction();
const IRPosition &FnPos = IRPosition::function(*F);
auto &FnAA = A.getAAFor<AAWillReturn>(*this, FnPos);
- return clampStateAndIndicateChange(getState(), FnAA.getState());
+ return clampStateAndIndicateChange(getState(), FnAA.getState());
}
/// See AbstractAttribute::trackStatistics()
@@ -2501,7 +2501,7 @@ struct AANoAliasCallSiteArgument final : AANoAliasImpl {
void initialize(Attributor &A) override {
// See callsite argument attribute and callee argument attribute.
const auto &CB = cast<CallBase>(getAnchorValue());
- if (CB.paramHasAttr(getCallSiteArgNo(), Attribute::NoAlias))
+ if (CB.paramHasAttr(getCallSiteArgNo(), Attribute::NoAlias))
indicateOptimisticFixpoint();
Value &Val = getAssociatedValue();
if (isa<ConstantPointerNull>(Val) &&
@@ -2516,7 +2516,7 @@ struct AANoAliasCallSiteArgument final : AANoAliasImpl {
const AAMemoryBehavior &MemBehaviorAA,
const CallBase &CB, unsigned OtherArgNo) {
// We do not need to worry about aliasing with the underlying IRP.
- if (this->getCalleeArgNo() == (int)OtherArgNo)
+ if (this->getCalleeArgNo() == (int)OtherArgNo)
return false;
// If it is not a pointer or pointer vector we do not alias.
@@ -2578,7 +2578,7 @@ struct AANoAliasCallSiteArgument final : AANoAliasImpl {
A.recordDependence(NoAliasAA, *this, DepClassTy::OPTIONAL);
const IRPosition &VIRP = IRPosition::value(getAssociatedValue());
- const Function *ScopeFn = VIRP.getAnchorScope();
+ const Function *ScopeFn = VIRP.getAnchorScope();
auto &NoCaptureAA =
A.getAAFor<AANoCapture>(*this, VIRP, /* TrackDependence */ false);
// Check whether the value is captured in the scope using AANoCapture.
@@ -2587,18 +2587,18 @@ struct AANoAliasCallSiteArgument final : AANoAliasImpl {
auto UsePred = [&](const Use &U, bool &Follow) -> bool {
Instruction *UserI = cast<Instruction>(U.getUser());
- // If UserI is the curr instruction and there is a single potential use of
- // the value in UserI we allow the use.
- // TODO: We should inspect the operands and allow those that cannot alias
- // with the value.
- if (UserI == getCtxI() && UserI->getNumOperands() == 1)
+ // If UserI is the curr instruction and there is a single potential use of
+ // the value in UserI we allow the use.
+ // TODO: We should inspect the operands and allow those that cannot alias
+ // with the value.
+ if (UserI == getCtxI() && UserI->getNumOperands() == 1)
return true;
if (ScopeFn) {
const auto &ReachabilityAA =
A.getAAFor<AAReachability>(*this, IRPosition::function(*ScopeFn));
- if (!ReachabilityAA.isAssumedReachable(A, *UserI, *getCtxI()))
+ if (!ReachabilityAA.isAssumedReachable(A, *UserI, *getCtxI()))
return true;
if (auto *CB = dyn_cast<CallBase>(UserI)) {
@@ -2684,14 +2684,14 @@ struct AANoAliasReturned final : AANoAliasImpl {
AANoAliasReturned(const IRPosition &IRP, Attributor &A)
: AANoAliasImpl(IRP, A) {}
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- AANoAliasImpl::initialize(A);
- Function *F = getAssociatedFunction();
- if (!F || F->isDeclaration())
- indicatePessimisticFixpoint();
- }
-
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ AANoAliasImpl::initialize(A);
+ Function *F = getAssociatedFunction();
+ if (!F || F->isDeclaration())
+ indicatePessimisticFixpoint();
+ }
+
/// See AbstractAttribute::updateImpl(...).
virtual ChangeStatus updateImpl(Attributor &A) override {
@@ -2733,7 +2733,7 @@ struct AANoAliasCallSiteReturned final : AANoAliasImpl {
void initialize(Attributor &A) override {
AANoAliasImpl::initialize(A);
Function *F = getAssociatedFunction();
- if (!F || F->isDeclaration())
+ if (!F || F->isDeclaration())
indicatePessimisticFixpoint();
}
@@ -2746,7 +2746,7 @@ struct AANoAliasCallSiteReturned final : AANoAliasImpl {
Function *F = getAssociatedFunction();
const IRPosition &FnPos = IRPosition::returned(*F);
auto &FnAA = A.getAAFor<AANoAlias>(*this, FnPos);
- return clampStateAndIndicateChange(getState(), FnAA.getState());
+ return clampStateAndIndicateChange(getState(), FnAA.getState());
}
/// See AbstractAttribute::trackStatistics()
@@ -2936,13 +2936,13 @@ struct AAIsDeadCallSiteArgument : public AAIsDeadValueImpl {
return indicatePessimisticFixpoint();
const IRPosition &ArgPos = IRPosition::argument(*Arg);
auto &ArgAA = A.getAAFor<AAIsDead>(*this, ArgPos);
- return clampStateAndIndicateChange(getState(), ArgAA.getState());
+ return clampStateAndIndicateChange(getState(), ArgAA.getState());
}
/// See AbstractAttribute::manifest(...).
ChangeStatus manifest(Attributor &A) override {
CallBase &CB = cast<CallBase>(getAnchorValue());
- Use &U = CB.getArgOperandUse(getCallSiteArgNo());
+ Use &U = CB.getArgOperandUse(getCallSiteArgNo());
assert(!isa<UndefValue>(U.get()) &&
"Expected undef values to be filtered out!");
UndefValue &UV = *UndefValue::get(U->getType());
@@ -3057,14 +3057,14 @@ struct AAIsDeadFunction : public AAIsDead {
void initialize(Attributor &A) override {
const Function *F = getAnchorScope();
if (F && !F->isDeclaration()) {
- // We only want to compute liveness once. If the function is not part of
- // the SCC, skip it.
- if (A.isRunOn(*const_cast<Function *>(F))) {
- ToBeExploredFrom.insert(&F->getEntryBlock().front());
- assumeLive(A, F->getEntryBlock());
- } else {
- indicatePessimisticFixpoint();
- }
+ // We only want to compute liveness once. If the function is not part of
+ // the SCC, skip it.
+ if (A.isRunOn(*const_cast<Function *>(F))) {
+ ToBeExploredFrom.insert(&F->getEntryBlock().front());
+ assumeLive(A, F->getEntryBlock());
+ } else {
+ indicatePessimisticFixpoint();
+ }
}
}
@@ -3127,10 +3127,10 @@ struct AAIsDeadFunction : public AAIsDead {
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override;
- bool isEdgeDead(const BasicBlock *From, const BasicBlock *To) const override {
- return !AssumedLiveEdges.count(std::make_pair(From, To));
- }
-
+ bool isEdgeDead(const BasicBlock *From, const BasicBlock *To) const override {
+ return !AssumedLiveEdges.count(std::make_pair(From, To));
+ }
+
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {}
@@ -3208,9 +3208,9 @@ struct AAIsDeadFunction : public AAIsDead {
/// Collection of instructions that are known to not transfer control.
SmallSetVector<const Instruction *, 8> KnownDeadEnds;
- /// Collection of all assumed live edges
- DenseSet<std::pair<const BasicBlock *, const BasicBlock *>> AssumedLiveEdges;
-
+ /// Collection of all assumed live edges
+ DenseSet<std::pair<const BasicBlock *, const BasicBlock *>> AssumedLiveEdges;
+
/// Collection of all assumed live BasicBlocks.
DenseSet<const BasicBlock *> AssumedLiveBlocks;
};
@@ -3326,23 +3326,23 @@ ChangeStatus AAIsDeadFunction::updateImpl(Attributor &A) {
const Instruction *I = Worklist.pop_back_val();
LLVM_DEBUG(dbgs() << "[AAIsDead] Exploration inst: " << *I << "\n");
- // Fast forward for uninteresting instructions. We could look for UB here
- // though.
- while (!I->isTerminator() && !isa<CallBase>(I)) {
- Change = ChangeStatus::CHANGED;
- I = I->getNextNode();
- }
-
+ // Fast forward for uninteresting instructions. We could look for UB here
+ // though.
+ while (!I->isTerminator() && !isa<CallBase>(I)) {
+ Change = ChangeStatus::CHANGED;
+ I = I->getNextNode();
+ }
+
AliveSuccessors.clear();
bool UsedAssumedInformation = false;
switch (I->getOpcode()) {
// TODO: look for (assumed) UB to backwards propagate "deadness".
default:
- assert(I->isTerminator() &&
- "Expected non-terminators to be handled already!");
- for (const BasicBlock *SuccBB : successors(I->getParent()))
- AliveSuccessors.push_back(&SuccBB->front());
+ assert(I->isTerminator() &&
+ "Expected non-terminators to be handled already!");
+ for (const BasicBlock *SuccBB : successors(I->getParent()))
+ AliveSuccessors.push_back(&SuccBB->front());
break;
case Instruction::Call:
UsedAssumedInformation = identifyAliveSuccessors(A, cast<CallInst>(*I),
@@ -3381,9 +3381,9 @@ ChangeStatus AAIsDeadFunction::updateImpl(Attributor &A) {
"Non-terminator expected to have a single successor!");
Worklist.push_back(AliveSuccessor);
} else {
- // record the assumed live edge
- AssumedLiveEdges.insert(
- std::make_pair(I->getParent(), AliveSuccessor->getParent()));
+ // record the assumed live edge
+ AssumedLiveEdges.insert(
+ std::make_pair(I->getParent(), AliveSuccessor->getParent()));
if (assumeLive(A, *AliveSuccessor->getParent()))
Worklist.push_back(AliveSuccessor);
}
@@ -3576,7 +3576,7 @@ struct AADereferenceableFloating : AADereferenceableImpl {
DerefBytes = Base->getPointerDereferenceableBytes(DL, CanBeNull);
T.GlobalState.indicatePessimisticFixpoint();
} else {
- const DerefState &DS = AA.getState();
+ const DerefState &DS = AA.getState();
DerefBytes = DS.DerefBytesState.getAssumed();
T.GlobalState &= DS.GlobalState;
}
@@ -3852,27 +3852,27 @@ struct AAAlignFloating : AAAlignImpl {
AAAlign::StateType &T, bool Stripped) -> bool {
const auto &AA = A.getAAFor<AAAlign>(*this, IRPosition::value(V));
if (!Stripped && this == &AA) {
- int64_t Offset;
- unsigned Alignment = 1;
- if (const Value *Base =
- GetPointerBaseWithConstantOffset(&V, Offset, DL)) {
- Align PA = Base->getPointerAlignment(DL);
- // BasePointerAddr + Offset = Alignment * Q for some integer Q.
- // So we can say that the maximum power of two which is a divisor of
- // gcd(Offset, Alignment) is an alignment.
-
- uint32_t gcd = greatestCommonDivisor(uint32_t(abs((int32_t)Offset)),
- uint32_t(PA.value()));
- Alignment = llvm::PowerOf2Floor(gcd);
- } else {
- Alignment = V.getPointerAlignment(DL).value();
- }
+ int64_t Offset;
+ unsigned Alignment = 1;
+ if (const Value *Base =
+ GetPointerBaseWithConstantOffset(&V, Offset, DL)) {
+ Align PA = Base->getPointerAlignment(DL);
+ // BasePointerAddr + Offset = Alignment * Q for some integer Q.
+ // So we can say that the maximum power of two which is a divisor of
+ // gcd(Offset, Alignment) is an alignment.
+
+ uint32_t gcd = greatestCommonDivisor(uint32_t(abs((int32_t)Offset)),
+ uint32_t(PA.value()));
+ Alignment = llvm::PowerOf2Floor(gcd);
+ } else {
+ Alignment = V.getPointerAlignment(DL).value();
+ }
// Use only IR information if we did not strip anything.
- T.takeKnownMaximum(Alignment);
+ T.takeKnownMaximum(Alignment);
T.indicatePessimisticFixpoint();
} else {
// Use abstract attribute information.
- const AAAlign::StateType &DS = AA.getState();
+ const AAAlign::StateType &DS = AA.getState();
T ^= DS;
}
return T.isValidState();
@@ -3895,17 +3895,17 @@ struct AAAlignFloating : AAAlignImpl {
/// Align attribute for function return value.
struct AAAlignReturned final
: AAReturnedFromReturnedValues<AAAlign, AAAlignImpl> {
- using Base = AAReturnedFromReturnedValues<AAAlign, AAAlignImpl>;
- AAAlignReturned(const IRPosition &IRP, Attributor &A) : Base(IRP, A) {}
-
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- Base::initialize(A);
- Function *F = getAssociatedFunction();
- if (!F || F->isDeclaration())
- indicatePessimisticFixpoint();
- }
-
+ using Base = AAReturnedFromReturnedValues<AAAlign, AAAlignImpl>;
+ AAAlignReturned(const IRPosition &IRP, Attributor &A) : Base(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ Base::initialize(A);
+ Function *F = getAssociatedFunction();
+ if (!F || F->isDeclaration())
+ indicatePessimisticFixpoint();
+ }
+
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(aligned) }
};
@@ -3978,7 +3978,7 @@ struct AAAlignCallSiteReturned final
void initialize(Attributor &A) override {
Base::initialize(A);
Function *F = getAssociatedFunction();
- if (!F || F->isDeclaration())
+ if (!F || F->isDeclaration())
indicatePessimisticFixpoint();
}
@@ -3994,7 +3994,7 @@ struct AANoReturnImpl : public AANoReturn {
void initialize(Attributor &A) override {
AANoReturn::initialize(A);
Function *F = getAssociatedFunction();
- if (!F || F->isDeclaration())
+ if (!F || F->isDeclaration())
indicatePessimisticFixpoint();
}
@@ -4026,17 +4026,17 @@ struct AANoReturnCallSite final : AANoReturnImpl {
AANoReturnCallSite(const IRPosition &IRP, Attributor &A)
: AANoReturnImpl(IRP, A) {}
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- AANoReturnImpl::initialize(A);
- if (Function *F = getAssociatedFunction()) {
- const IRPosition &FnPos = IRPosition::function(*F);
- auto &FnAA = A.getAAFor<AANoReturn>(*this, FnPos);
- if (!FnAA.isAssumedNoReturn())
- indicatePessimisticFixpoint();
- }
- }
-
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ AANoReturnImpl::initialize(A);
+ if (Function *F = getAssociatedFunction()) {
+ const IRPosition &FnPos = IRPosition::function(*F);
+ auto &FnAA = A.getAAFor<AANoReturn>(*this, FnPos);
+ if (!FnAA.isAssumedNoReturn())
+ indicatePessimisticFixpoint();
+ }
+ }
+
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
// TODO: Once we have call site specific value information we can provide
@@ -4046,7 +4046,7 @@ struct AANoReturnCallSite final : AANoReturnImpl {
Function *F = getAssociatedFunction();
const IRPosition &FnPos = IRPosition::function(*F);
auto &FnAA = A.getAAFor<AANoReturn>(*this, FnPos);
- return clampStateAndIndicateChange(getState(), FnAA.getState());
+ return clampStateAndIndicateChange(getState(), FnAA.getState());
}
/// See AbstractAttribute::trackStatistics()
@@ -4079,8 +4079,8 @@ struct AANoCaptureImpl : public AANoCapture {
return;
}
- const Function *F =
- isArgumentPosition() ? getAssociatedFunction() : AnchorScope;
+ const Function *F =
+ isArgumentPosition() ? getAssociatedFunction() : AnchorScope;
// Check what state the associated function can actually capture.
if (F)
@@ -4099,7 +4099,7 @@ struct AANoCaptureImpl : public AANoCapture {
if (!isAssumedNoCaptureMaybeReturned())
return;
- if (isArgumentPosition()) {
+ if (isArgumentPosition()) {
if (isAssumedNoCapture())
Attrs.emplace_back(Attribute::get(Ctx, Attribute::NoCapture));
else if (ManifestInternal)
@@ -4135,7 +4135,7 @@ struct AANoCaptureImpl : public AANoCapture {
State.addKnownBits(NOT_CAPTURED_IN_RET);
// Check existing "returned" attributes.
- int ArgNo = IRP.getCalleeArgNo();
+ int ArgNo = IRP.getCalleeArgNo();
if (F.doesNotThrow() && ArgNo >= 0) {
for (unsigned u = 0, e = F.arg_size(); u < e; ++u)
if (F.hasParamAttribute(u, Attribute::Returned)) {
@@ -4311,13 +4311,13 @@ private:
ChangeStatus AANoCaptureImpl::updateImpl(Attributor &A) {
const IRPosition &IRP = getIRPosition();
- const Value *V = isArgumentPosition() ? IRP.getAssociatedArgument()
- : &IRP.getAssociatedValue();
+ const Value *V = isArgumentPosition() ? IRP.getAssociatedArgument()
+ : &IRP.getAssociatedValue();
if (!V)
return indicatePessimisticFixpoint();
const Function *F =
- isArgumentPosition() ? IRP.getAssociatedFunction() : IRP.getAnchorScope();
+ isArgumentPosition() ? IRP.getAssociatedFunction() : IRP.getAnchorScope();
assert(F && "Expected a function!");
const IRPosition &FnPos = IRPosition::function(*F);
const auto &IsDeadAA =
@@ -4434,7 +4434,7 @@ struct AANoCaptureCallSiteArgument final : AANoCaptureImpl {
return indicatePessimisticFixpoint();
const IRPosition &ArgPos = IRPosition::argument(*Arg);
auto &ArgAA = A.getAAFor<AANoCapture>(*this, ArgPos);
- return clampStateAndIndicateChange(getState(), ArgAA.getState());
+ return clampStateAndIndicateChange(getState(), ArgAA.getState());
}
/// See AbstractAttribute::trackStatistics()
@@ -4550,37 +4550,37 @@ struct AAValueSimplifyImpl : AAValueSimplify {
return true;
}
- /// Returns a candidate is found or not
- template <typename AAType> bool askSimplifiedValueFor(Attributor &A) {
+ /// Returns a candidate is found or not
+ template <typename AAType> bool askSimplifiedValueFor(Attributor &A) {
if (!getAssociatedValue().getType()->isIntegerTy())
return false;
- const auto &AA =
- A.getAAFor<AAType>(*this, getIRPosition(), /* TrackDependence */ false);
-
- Optional<ConstantInt *> COpt = AA.getAssumedConstantInt(A);
+ const auto &AA =
+ A.getAAFor<AAType>(*this, getIRPosition(), /* TrackDependence */ false);
- if (!COpt.hasValue()) {
+ Optional<ConstantInt *> COpt = AA.getAssumedConstantInt(A);
+
+ if (!COpt.hasValue()) {
SimplifiedAssociatedValue = llvm::None;
- A.recordDependence(AA, *this, DepClassTy::OPTIONAL);
- return true;
- }
- if (auto *C = COpt.getValue()) {
- SimplifiedAssociatedValue = C;
- A.recordDependence(AA, *this, DepClassTy::OPTIONAL);
- return true;
- }
- return false;
- }
-
- bool askSimplifiedValueForOtherAAs(Attributor &A) {
- if (askSimplifiedValueFor<AAValueConstantRange>(A))
- return true;
- if (askSimplifiedValueFor<AAPotentialValues>(A))
- return true;
- return false;
- }
-
+ A.recordDependence(AA, *this, DepClassTy::OPTIONAL);
+ return true;
+ }
+ if (auto *C = COpt.getValue()) {
+ SimplifiedAssociatedValue = C;
+ A.recordDependence(AA, *this, DepClassTy::OPTIONAL);
+ return true;
+ }
+ return false;
+ }
+
+ bool askSimplifiedValueForOtherAAs(Attributor &A) {
+ if (askSimplifiedValueFor<AAValueConstantRange>(A))
+ return true;
+ if (askSimplifiedValueFor<AAPotentialValues>(A))
+ return true;
+ return false;
+ }
+
/// See AbstractAttribute::manifest(...).
ChangeStatus manifest(Attributor &A) override {
ChangeStatus Changed = ChangeStatus::UNCHANGED;
@@ -4663,7 +4663,7 @@ struct AAValueSimplifyArgument final : AAValueSimplifyImpl {
auto PredForCallSite = [&](AbstractCallSite ACS) {
const IRPosition &ACSArgPos =
- IRPosition::callsite_argument(ACS, getCallSiteArgNo());
+ IRPosition::callsite_argument(ACS, getCallSiteArgNo());
// Check if a coresponding argument was found or if it is on not
// associated (which can happen for callback calls).
if (ACSArgPos.getPositionKind() == IRPosition::IRP_INVALID)
@@ -4685,7 +4685,7 @@ struct AAValueSimplifyArgument final : AAValueSimplifyImpl {
bool AllCallSitesKnown;
if (!A.checkForAllCallSites(PredForCallSite, *this, true,
AllCallSitesKnown))
- if (!askSimplifiedValueForOtherAAs(A))
+ if (!askSimplifiedValueForOtherAAs(A))
return indicatePessimisticFixpoint();
// If a candicate was found in this update, return CHANGED.
@@ -4713,7 +4713,7 @@ struct AAValueSimplifyReturned : AAValueSimplifyImpl {
};
if (!A.checkForAllReturnedValues(PredForReturned, *this))
- if (!askSimplifiedValueForOtherAAs(A))
+ if (!askSimplifiedValueForOtherAAs(A))
return indicatePessimisticFixpoint();
// If a candicate was found in this update, return CHANGED.
@@ -4782,76 +4782,76 @@ struct AAValueSimplifyFloating : AAValueSimplifyImpl {
indicatePessimisticFixpoint();
}
- /// Check if \p ICmp is an equality comparison (==/!=) with at least one
- /// nullptr. If so, try to simplify it using AANonNull on the other operand.
- /// Return true if successful, in that case SimplifiedAssociatedValue will be
- /// updated and \p Changed is set appropriately.
- bool checkForNullPtrCompare(Attributor &A, ICmpInst *ICmp,
- ChangeStatus &Changed) {
- if (!ICmp)
- return false;
- if (!ICmp->isEquality())
- return false;
-
- // This is a comparison with == or !-. We check for nullptr now.
- bool Op0IsNull = isa<ConstantPointerNull>(ICmp->getOperand(0));
- bool Op1IsNull = isa<ConstantPointerNull>(ICmp->getOperand(1));
- if (!Op0IsNull && !Op1IsNull)
- return false;
-
- LLVMContext &Ctx = ICmp->getContext();
- // Check for `nullptr ==/!= nullptr` first:
- if (Op0IsNull && Op1IsNull) {
- Value *NewVal = ConstantInt::get(
- Type::getInt1Ty(Ctx), ICmp->getPredicate() == CmpInst::ICMP_EQ);
- assert(!SimplifiedAssociatedValue.hasValue() &&
- "Did not expect non-fixed value for constant comparison");
- SimplifiedAssociatedValue = NewVal;
- indicateOptimisticFixpoint();
- Changed = ChangeStatus::CHANGED;
- return true;
- }
-
- // Left is the nullptr ==/!= non-nullptr case. We'll use AANonNull on the
- // non-nullptr operand and if we assume it's non-null we can conclude the
- // result of the comparison.
- assert((Op0IsNull || Op1IsNull) &&
- "Expected nullptr versus non-nullptr comparison at this point");
-
- // The index is the operand that we assume is not null.
- unsigned PtrIdx = Op0IsNull;
- auto &PtrNonNullAA = A.getAAFor<AANonNull>(
- *this, IRPosition::value(*ICmp->getOperand(PtrIdx)));
- if (!PtrNonNullAA.isAssumedNonNull())
- return false;
-
- // The new value depends on the predicate, true for != and false for ==.
- Value *NewVal = ConstantInt::get(Type::getInt1Ty(Ctx),
- ICmp->getPredicate() == CmpInst::ICMP_NE);
-
- assert((!SimplifiedAssociatedValue.hasValue() ||
- SimplifiedAssociatedValue == NewVal) &&
- "Did not expect to change value for zero-comparison");
-
- bool HasValueBefore = SimplifiedAssociatedValue.hasValue();
- SimplifiedAssociatedValue = NewVal;
-
- if (PtrNonNullAA.isKnownNonNull())
- indicateOptimisticFixpoint();
-
- Changed = HasValueBefore ? ChangeStatus::UNCHANGED : ChangeStatus ::CHANGED;
- return true;
- }
-
+ /// Check if \p ICmp is an equality comparison (==/!=) with at least one
+ /// nullptr. If so, try to simplify it using AANonNull on the other operand.
+ /// Return true if successful, in that case SimplifiedAssociatedValue will be
+ /// updated and \p Changed is set appropriately.
+ bool checkForNullPtrCompare(Attributor &A, ICmpInst *ICmp,
+ ChangeStatus &Changed) {
+ if (!ICmp)
+ return false;
+ if (!ICmp->isEquality())
+ return false;
+
+ // This is a comparison with == or !-. We check for nullptr now.
+ bool Op0IsNull = isa<ConstantPointerNull>(ICmp->getOperand(0));
+ bool Op1IsNull = isa<ConstantPointerNull>(ICmp->getOperand(1));
+ if (!Op0IsNull && !Op1IsNull)
+ return false;
+
+ LLVMContext &Ctx = ICmp->getContext();
+ // Check for `nullptr ==/!= nullptr` first:
+ if (Op0IsNull && Op1IsNull) {
+ Value *NewVal = ConstantInt::get(
+ Type::getInt1Ty(Ctx), ICmp->getPredicate() == CmpInst::ICMP_EQ);
+ assert(!SimplifiedAssociatedValue.hasValue() &&
+ "Did not expect non-fixed value for constant comparison");
+ SimplifiedAssociatedValue = NewVal;
+ indicateOptimisticFixpoint();
+ Changed = ChangeStatus::CHANGED;
+ return true;
+ }
+
+ // Left is the nullptr ==/!= non-nullptr case. We'll use AANonNull on the
+ // non-nullptr operand and if we assume it's non-null we can conclude the
+ // result of the comparison.
+ assert((Op0IsNull || Op1IsNull) &&
+ "Expected nullptr versus non-nullptr comparison at this point");
+
+ // The index is the operand that we assume is not null.
+ unsigned PtrIdx = Op0IsNull;
+ auto &PtrNonNullAA = A.getAAFor<AANonNull>(
+ *this, IRPosition::value(*ICmp->getOperand(PtrIdx)));
+ if (!PtrNonNullAA.isAssumedNonNull())
+ return false;
+
+ // The new value depends on the predicate, true for != and false for ==.
+ Value *NewVal = ConstantInt::get(Type::getInt1Ty(Ctx),
+ ICmp->getPredicate() == CmpInst::ICMP_NE);
+
+ assert((!SimplifiedAssociatedValue.hasValue() ||
+ SimplifiedAssociatedValue == NewVal) &&
+ "Did not expect to change value for zero-comparison");
+
+ bool HasValueBefore = SimplifiedAssociatedValue.hasValue();
+ SimplifiedAssociatedValue = NewVal;
+
+ if (PtrNonNullAA.isKnownNonNull())
+ indicateOptimisticFixpoint();
+
+ Changed = HasValueBefore ? ChangeStatus::UNCHANGED : ChangeStatus ::CHANGED;
+ return true;
+ }
+
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
bool HasValueBefore = SimplifiedAssociatedValue.hasValue();
- ChangeStatus Changed;
- if (checkForNullPtrCompare(A, dyn_cast<ICmpInst>(&getAnchorValue()),
- Changed))
- return Changed;
-
+ ChangeStatus Changed;
+ if (checkForNullPtrCompare(A, dyn_cast<ICmpInst>(&getAnchorValue()),
+ Changed))
+ return Changed;
+
auto VisitValueCB = [&](Value &V, const Instruction *CtxI, bool &,
bool Stripped) -> bool {
auto &AA = A.getAAFor<AAValueSimplify>(*this, IRPosition::value(V));
@@ -4869,7 +4869,7 @@ struct AAValueSimplifyFloating : AAValueSimplifyImpl {
if (!genericValueTraversal<AAValueSimplify, bool>(
A, getIRPosition(), *this, Dummy, VisitValueCB, getCtxI(),
/* UseValueSimplify */ false))
- if (!askSimplifiedValueForOtherAAs(A))
+ if (!askSimplifiedValueForOtherAAs(A))
return indicatePessimisticFixpoint();
// If a candicate was found in this update, return CHANGED.
@@ -4944,8 +4944,8 @@ struct AAValueSimplifyCallSiteArgument : AAValueSimplifyFloating {
? dyn_cast<Constant>(SimplifiedAssociatedValue.getValue())
: UndefValue::get(V.getType());
if (C) {
- Use &U = cast<CallBase>(&getAnchorValue())
- ->getArgOperandUse(getCallSiteArgNo());
+ Use &U = cast<CallBase>(&getAnchorValue())
+ ->getArgOperandUse(getCallSiteArgNo());
// We can replace the AssociatedValue with the constant.
if (&V != C && V.getType() == C->getType()) {
if (A.changeUseAfterManifest(U, *C))
@@ -5264,7 +5264,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
return getAssociatedValue().getType()->getPointerElementType();
Optional<Type *> Ty;
- unsigned ArgNo = getIRPosition().getCallSiteArgNo();
+ unsigned ArgNo = getIRPosition().getCallSiteArgNo();
// Make sure the associated call site argument has the same type at all call
// sites and it is an allocation we know is safe to privatize, for now that
@@ -5527,9 +5527,9 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
new StoreInst(F.getArg(ArgNo + u), Ptr, &IP);
}
} else if (auto *PrivArrayType = dyn_cast<ArrayType>(PrivType)) {
- Type *PointeeTy = PrivArrayType->getElementType();
- Type *PointeePtrTy = PointeeTy->getPointerTo();
- uint64_t PointeeTySize = DL.getTypeStoreSize(PointeeTy);
+ Type *PointeeTy = PrivArrayType->getElementType();
+ Type *PointeePtrTy = PointeeTy->getPointerTo();
+ uint64_t PointeeTySize = DL.getTypeStoreSize(PointeeTy);
for (unsigned u = 0, e = PrivArrayType->getNumElements(); u < e; u++) {
Value *Ptr =
constructPointer(PointeePtrTy, &Base, u * PointeeTySize, IRB, DL);
@@ -5575,7 +5575,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
for (unsigned u = 0, e = PrivArrayType->getNumElements(); u < e; u++) {
Value *Ptr =
constructPointer(PointeePtrTy, Base, u * PointeeTySize, IRB, DL);
- LoadInst *L = new LoadInst(PointeeTy, Ptr, "", IP);
+ LoadInst *L = new LoadInst(PointeeTy, Ptr, "", IP);
L->setAlignment(Alignment);
ReplacementValues.push_back(L);
}
@@ -5619,14 +5619,14 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
Function &ReplacementFn, Function::arg_iterator ArgIt) {
BasicBlock &EntryBB = ReplacementFn.getEntryBlock();
Instruction *IP = &*EntryBB.getFirstInsertionPt();
- Instruction *AI = new AllocaInst(PrivatizableType.getValue(), 0,
- Arg->getName() + ".priv", IP);
+ Instruction *AI = new AllocaInst(PrivatizableType.getValue(), 0,
+ Arg->getName() + ".priv", IP);
createInitialization(PrivatizableType.getValue(), *AI, ReplacementFn,
ArgIt->getArgNo(), *IP);
-
- if (AI->getType() != Arg->getType())
- AI =
- BitCastInst::CreateBitOrPointerCast(AI, Arg->getType(), "", IP);
+
+ if (AI->getType() != Arg->getType())
+ AI =
+ BitCastInst::CreateBitOrPointerCast(AI, Arg->getType(), "", IP);
Arg->replaceAllUsesWith(AI);
for (CallInst *CI : TailCalls)
@@ -5685,7 +5685,7 @@ struct AAPrivatizablePtrFloating : public AAPrivatizablePtrImpl {
/// See AAPrivatizablePtrImpl::identifyPrivatizableType(...)
Optional<Type *> identifyPrivatizableType(Attributor &A) override {
- Value *Obj = getUnderlyingObject(&getAssociatedValue());
+ Value *Obj = getUnderlyingObject(&getAssociatedValue());
if (!Obj) {
LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] No underlying object found!\n");
return nullptr;
@@ -5805,7 +5805,7 @@ struct AAMemoryBehaviorImpl : public AAMemoryBehavior {
void initialize(Attributor &A) override {
intersectAssumedBits(BEST_STATE);
getKnownStateFromValue(getIRPosition(), getState());
- AAMemoryBehavior::initialize(A);
+ AAMemoryBehavior::initialize(A);
}
/// Return the memory behavior information encoded in the IR for \p IRP.
@@ -5900,7 +5900,7 @@ struct AAMemoryBehaviorFloating : AAMemoryBehaviorImpl {
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
AAMemoryBehaviorImpl::initialize(A);
- addUsesOf(A, getAssociatedValue());
+ addUsesOf(A, getAssociatedValue());
}
/// See AbstractAttribute::updateImpl(...).
@@ -5926,14 +5926,14 @@ private:
void analyzeUseIn(Attributor &A, const Use *U, const Instruction *UserI);
protected:
- /// Add the uses of \p V to the `Uses` set we look at during the update step.
- void addUsesOf(Attributor &A, const Value &V);
-
+ /// Add the uses of \p V to the `Uses` set we look at during the update step.
+ void addUsesOf(Attributor &A, const Value &V);
+
/// Container for (transitive) uses of the associated argument.
- SmallVector<const Use *, 8> Uses;
-
- /// Set to remember the uses we already traversed.
- SmallPtrSet<const Use *, 8> Visited;
+ SmallVector<const Use *, 8> Uses;
+
+ /// Set to remember the uses we already traversed.
+ SmallPtrSet<const Use *, 8> Visited;
};
/// Memory behavior attribute for function argument.
@@ -5958,7 +5958,7 @@ struct AAMemoryBehaviorArgument : AAMemoryBehaviorFloating {
if (!Arg || !A.isFunctionIPOAmendable(*(Arg->getParent()))) {
indicatePessimisticFixpoint();
} else {
- addUsesOf(A, *Arg);
+ addUsesOf(A, *Arg);
}
}
@@ -5993,21 +5993,21 @@ struct AAMemoryBehaviorCallSiteArgument final : AAMemoryBehaviorArgument {
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
- // If we don't have an associated attribute this is either a variadic call
- // or an indirect call, either way, nothing to do here.
- Argument *Arg = getAssociatedArgument();
- if (!Arg) {
- indicatePessimisticFixpoint();
- return;
- }
- if (Arg->hasByValAttr()) {
- addKnownBits(NO_WRITES);
- removeKnownBits(NO_READS);
- removeAssumedBits(NO_READS);
- }
+ // If we don't have an associated attribute this is either a variadic call
+ // or an indirect call, either way, nothing to do here.
+ Argument *Arg = getAssociatedArgument();
+ if (!Arg) {
+ indicatePessimisticFixpoint();
+ return;
+ }
+ if (Arg->hasByValAttr()) {
+ addKnownBits(NO_WRITES);
+ removeKnownBits(NO_READS);
+ removeAssumedBits(NO_READS);
+ }
AAMemoryBehaviorArgument::initialize(A);
- if (getAssociatedFunction()->isDeclaration())
- indicatePessimisticFixpoint();
+ if (getAssociatedFunction()->isDeclaration())
+ indicatePessimisticFixpoint();
}
/// See AbstractAttribute::updateImpl(...).
@@ -6019,7 +6019,7 @@ struct AAMemoryBehaviorCallSiteArgument final : AAMemoryBehaviorArgument {
Argument *Arg = getAssociatedArgument();
const IRPosition &ArgPos = IRPosition::argument(*Arg);
auto &ArgAA = A.getAAFor<AAMemoryBehavior>(*this, ArgPos);
- return clampStateAndIndicateChange(getState(), ArgAA.getState());
+ return clampStateAndIndicateChange(getState(), ArgAA.getState());
}
/// See AbstractAttribute::trackStatistics()
@@ -6038,14 +6038,14 @@ struct AAMemoryBehaviorCallSiteReturned final : AAMemoryBehaviorFloating {
AAMemoryBehaviorCallSiteReturned(const IRPosition &IRP, Attributor &A)
: AAMemoryBehaviorFloating(IRP, A) {}
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- AAMemoryBehaviorImpl::initialize(A);
- Function *F = getAssociatedFunction();
- if (!F || F->isDeclaration())
- indicatePessimisticFixpoint();
- }
-
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ AAMemoryBehaviorImpl::initialize(A);
+ Function *F = getAssociatedFunction();
+ if (!F || F->isDeclaration())
+ indicatePessimisticFixpoint();
+ }
+
/// See AbstractAttribute::manifest(...).
ChangeStatus manifest(Attributor &A) override {
// We do not annotate returned values.
@@ -6095,7 +6095,7 @@ struct AAMemoryBehaviorCallSite final : AAMemoryBehaviorImpl {
void initialize(Attributor &A) override {
AAMemoryBehaviorImpl::initialize(A);
Function *F = getAssociatedFunction();
- if (!F || F->isDeclaration())
+ if (!F || F->isDeclaration())
indicatePessimisticFixpoint();
}
@@ -6108,7 +6108,7 @@ struct AAMemoryBehaviorCallSite final : AAMemoryBehaviorImpl {
Function *F = getAssociatedFunction();
const IRPosition &FnPos = IRPosition::function(*F);
auto &FnAA = A.getAAFor<AAMemoryBehavior>(*this, FnPos);
- return clampStateAndIndicateChange(getState(), FnAA.getState());
+ return clampStateAndIndicateChange(getState(), FnAA.getState());
}
/// See AbstractAttribute::trackStatistics()
@@ -6210,7 +6210,7 @@ ChangeStatus AAMemoryBehaviorFloating::updateImpl(Attributor &A) {
// Check if the users of UserI should also be visited.
if (followUsersOfUseIn(A, U, UserI))
- addUsesOf(A, *UserI);
+ addUsesOf(A, *UserI);
// If UserI might touch memory we analyze the use in detail.
if (UserI->mayReadOrWriteMemory())
@@ -6221,28 +6221,28 @@ ChangeStatus AAMemoryBehaviorFloating::updateImpl(Attributor &A) {
: ChangeStatus::UNCHANGED;
}
-void AAMemoryBehaviorFloating::addUsesOf(Attributor &A, const Value &V) {
- SmallVector<const Use *, 8> WL;
- for (const Use &U : V.uses())
- WL.push_back(&U);
-
- while (!WL.empty()) {
- const Use *U = WL.pop_back_val();
- if (!Visited.insert(U).second)
- continue;
-
- const Instruction *UserI = cast<Instruction>(U->getUser());
- if (UserI->mayReadOrWriteMemory()) {
- Uses.push_back(U);
- continue;
- }
- if (!followUsersOfUseIn(A, U, UserI))
- continue;
- for (const Use &UU : UserI->uses())
- WL.push_back(&UU);
- }
-}
-
+void AAMemoryBehaviorFloating::addUsesOf(Attributor &A, const Value &V) {
+ SmallVector<const Use *, 8> WL;
+ for (const Use &U : V.uses())
+ WL.push_back(&U);
+
+ while (!WL.empty()) {
+ const Use *U = WL.pop_back_val();
+ if (!Visited.insert(U).second)
+ continue;
+
+ const Instruction *UserI = cast<Instruction>(U->getUser());
+ if (UserI->mayReadOrWriteMemory()) {
+ Uses.push_back(U);
+ continue;
+ }
+ if (!followUsersOfUseIn(A, U, UserI))
+ continue;
+ for (const Use &UU : UserI->uses())
+ WL.push_back(&UU);
+ }
+}
+
bool AAMemoryBehaviorFloating::followUsersOfUseIn(Attributor &A, const Use *U,
const Instruction *UserI) {
// The loaded value is unrelated to the pointer argument, no need to
@@ -6394,7 +6394,7 @@ struct AAMemoryLocationImpl : public AAMemoryLocation {
void initialize(Attributor &A) override {
intersectAssumedBits(BEST_STATE);
getKnownStateFromValue(A, getIRPosition(), getState());
- AAMemoryLocation::initialize(A);
+ AAMemoryLocation::initialize(A);
}
/// Return the memory behavior information encoded in the IR for \p IRP.
@@ -6557,13 +6557,13 @@ protected:
using AccessSet = SmallSet<AccessInfo, 2, AccessInfo>;
AccessSet *AccessKind2Accesses[llvm::CTLog2<VALID_STATE>()];
- /// Categorize the pointer arguments of CB that might access memory in
- /// AccessedLoc and update the state and access map accordingly.
- void
- categorizeArgumentPointerLocations(Attributor &A, CallBase &CB,
- AAMemoryLocation::StateType &AccessedLocs,
- bool &Changed);
-
+ /// Categorize the pointer arguments of CB that might access memory in
+ /// AccessedLoc and update the state and access map accordingly.
+ void
+ categorizeArgumentPointerLocations(Attributor &A, CallBase &CB,
+ AAMemoryLocation::StateType &AccessedLocs,
+ bool &Changed);
+
/// Return the kind(s) of location that may be accessed by \p V.
AAMemoryLocation::MemoryLocationsKind
categorizeAccessedLocations(Attributor &A, Instruction &I, bool &Changed);
@@ -6629,7 +6629,7 @@ void AAMemoryLocationImpl::categorizePtrValue(
auto VisitValueCB = [&](Value &V, const Instruction *,
AAMemoryLocation::StateType &T,
bool Stripped) -> bool {
- // TODO: recognize the TBAA used for constant accesses.
+ // TODO: recognize the TBAA used for constant accesses.
MemoryLocationsKind MLK = NO_LOCATIONS;
assert(!isa<GEPOperator>(V) && "GEPs should have been stripped.");
if (isa<UndefValue>(V))
@@ -6640,13 +6640,13 @@ void AAMemoryLocationImpl::categorizePtrValue(
else
MLK = NO_ARGUMENT_MEM;
} else if (auto *GV = dyn_cast<GlobalValue>(&V)) {
- // Reading constant memory is not treated as a read "effect" by the
- // function attr pass so we won't neither. Constants defined by TBAA are
- // similar. (We know we do not write it because it is constant.)
- if (auto *GVar = dyn_cast<GlobalVariable>(GV))
- if (GVar->isConstant())
- return true;
-
+ // Reading constant memory is not treated as a read "effect" by the
+ // function attr pass so we won't neither. Constants defined by TBAA are
+ // similar. (We know we do not write it because it is constant.)
+ if (auto *GVar = dyn_cast<GlobalVariable>(GV))
+ if (GVar->isConstant())
+ return true;
+
if (GV->hasLocalLinkage())
MLK = NO_GLOBAL_INTERNAL_MEM;
else
@@ -6693,30 +6693,30 @@ void AAMemoryLocationImpl::categorizePtrValue(
}
}
-void AAMemoryLocationImpl::categorizeArgumentPointerLocations(
- Attributor &A, CallBase &CB, AAMemoryLocation::StateType &AccessedLocs,
- bool &Changed) {
- for (unsigned ArgNo = 0, E = CB.getNumArgOperands(); ArgNo < E; ++ArgNo) {
-
- // Skip non-pointer arguments.
- const Value *ArgOp = CB.getArgOperand(ArgNo);
- if (!ArgOp->getType()->isPtrOrPtrVectorTy())
- continue;
-
- // Skip readnone arguments.
- const IRPosition &ArgOpIRP = IRPosition::callsite_argument(CB, ArgNo);
- const auto &ArgOpMemLocationAA = A.getAAFor<AAMemoryBehavior>(
- *this, ArgOpIRP, /* TrackDependence */ true, DepClassTy::OPTIONAL);
-
- if (ArgOpMemLocationAA.isAssumedReadNone())
- continue;
-
- // Categorize potentially accessed pointer arguments as if there was an
- // access instruction with them as pointer.
- categorizePtrValue(A, CB, *ArgOp, AccessedLocs, Changed);
- }
-}
-
+void AAMemoryLocationImpl::categorizeArgumentPointerLocations(
+ Attributor &A, CallBase &CB, AAMemoryLocation::StateType &AccessedLocs,
+ bool &Changed) {
+ for (unsigned ArgNo = 0, E = CB.getNumArgOperands(); ArgNo < E; ++ArgNo) {
+
+ // Skip non-pointer arguments.
+ const Value *ArgOp = CB.getArgOperand(ArgNo);
+ if (!ArgOp->getType()->isPtrOrPtrVectorTy())
+ continue;
+
+ // Skip readnone arguments.
+ const IRPosition &ArgOpIRP = IRPosition::callsite_argument(CB, ArgNo);
+ const auto &ArgOpMemLocationAA = A.getAAFor<AAMemoryBehavior>(
+ *this, ArgOpIRP, /* TrackDependence */ true, DepClassTy::OPTIONAL);
+
+ if (ArgOpMemLocationAA.isAssumedReadNone())
+ continue;
+
+ // Categorize potentially accessed pointer arguments as if there was an
+ // access instruction with them as pointer.
+ categorizePtrValue(A, CB, *ArgOp, AccessedLocs, Changed);
+ }
+}
+
AAMemoryLocation::MemoryLocationsKind
AAMemoryLocationImpl::categorizeAccessedLocations(Attributor &A, Instruction &I,
bool &Changed) {
@@ -6778,8 +6778,8 @@ AAMemoryLocationImpl::categorizeAccessedLocations(Attributor &A, Instruction &I,
// Now handle argument memory if it might be accessed.
bool HasArgAccesses = ((~CBAssumedNotAccessedLocs) & NO_ARGUMENT_MEM);
- if (HasArgAccesses)
- categorizeArgumentPointerLocations(A, *CB, AccessedLocs, Changed);
+ if (HasArgAccesses)
+ categorizeArgumentPointerLocations(A, *CB, AccessedLocs, Changed);
LLVM_DEBUG(
dbgs() << "[AAMemoryLocation] Accessed state after argument handling: "
@@ -6831,9 +6831,9 @@ struct AAMemoryLocationFunction final : public AAMemoryLocationImpl {
LLVM_DEBUG(dbgs() << "[AAMemoryLocation] Accessed locations for " << I
<< ": " << getMemoryLocationsAsStr(MLK) << "\n");
removeAssumedBits(inverseLocation(MLK, false, false));
- // Stop once only the valid bit set in the *not assumed location*, thus
- // once we don't actually exclude any memory locations in the state.
- return getAssumedNotAccessedLocation() != VALID_STATE;
+ // Stop once only the valid bit set in the *not assumed location*, thus
+ // once we don't actually exclude any memory locations in the state.
+ return getAssumedNotAccessedLocation() != VALID_STATE;
};
if (!A.checkForAllReadWriteInstructions(CheckRWInst, *this))
@@ -6865,7 +6865,7 @@ struct AAMemoryLocationCallSite final : AAMemoryLocationImpl {
void initialize(Attributor &A) override {
AAMemoryLocationImpl::initialize(A);
Function *F = getAssociatedFunction();
- if (!F || F->isDeclaration())
+ if (!F || F->isDeclaration())
indicatePessimisticFixpoint();
}
@@ -7075,13 +7075,13 @@ struct AAValueConstantRangeImpl : AAValueConstantRange {
auto &V = getAssociatedValue();
if (!AssumedConstantRange.isEmptySet() &&
!AssumedConstantRange.isSingleElement()) {
- if (Instruction *I = dyn_cast<Instruction>(&V)) {
- assert(I == getCtxI() && "Should not annotate an instruction which is "
- "not the context instruction");
+ if (Instruction *I = dyn_cast<Instruction>(&V)) {
+ assert(I == getCtxI() && "Should not annotate an instruction which is "
+ "not the context instruction");
if (isa<CallInst>(I) || isa<LoadInst>(I))
if (setRangeMetadataIfisBetterRange(I, AssumedConstantRange))
Changed = ChangeStatus::CHANGED;
- }
+ }
}
return Changed;
@@ -7150,9 +7150,9 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl {
return;
}
- if (isa<CallBase>(&V))
- return;
-
+ if (isa<CallBase>(&V))
+ return;
+
if (isa<BinaryOperator>(&V) || isa<CmpInst>(&V) || isa<CastInst>(&V))
return;
// If it is a load instruction with range metadata, use it.
@@ -7390,641 +7390,641 @@ struct AAValueConstantRangeCallSiteArgument : AAValueConstantRangeFloating {
AAValueConstantRangeCallSiteArgument(const IRPosition &IRP, Attributor &A)
: AAValueConstantRangeFloating(IRP, A) {}
- /// See AbstractAttribute::manifest()
- ChangeStatus manifest(Attributor &A) override {
- return ChangeStatus::UNCHANGED;
- }
-
+ /// See AbstractAttribute::manifest()
+ ChangeStatus manifest(Attributor &A) override {
+ return ChangeStatus::UNCHANGED;
+ }
+
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
STATS_DECLTRACK_CSARG_ATTR(value_range)
}
};
-
-/// ------------------ Potential Values Attribute -------------------------
-
-struct AAPotentialValuesImpl : AAPotentialValues {
- using StateType = PotentialConstantIntValuesState;
-
- AAPotentialValuesImpl(const IRPosition &IRP, Attributor &A)
- : AAPotentialValues(IRP, A) {}
-
- /// See AbstractAttribute::getAsStr().
- const std::string getAsStr() const override {
- std::string Str;
- llvm::raw_string_ostream OS(Str);
- OS << getState();
- return OS.str();
- }
-
- /// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override {
- return indicatePessimisticFixpoint();
- }
-};
-
-struct AAPotentialValuesArgument final
- : AAArgumentFromCallSiteArguments<AAPotentialValues, AAPotentialValuesImpl,
- PotentialConstantIntValuesState> {
- using Base =
- AAArgumentFromCallSiteArguments<AAPotentialValues, AAPotentialValuesImpl,
- PotentialConstantIntValuesState>;
- AAPotentialValuesArgument(const IRPosition &IRP, Attributor &A)
- : Base(IRP, A) {}
-
- /// See AbstractAttribute::initialize(..).
- void initialize(Attributor &A) override {
- if (!getAnchorScope() || getAnchorScope()->isDeclaration()) {
- indicatePessimisticFixpoint();
- } else {
- Base::initialize(A);
- }
- }
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override {
- STATS_DECLTRACK_ARG_ATTR(potential_values)
- }
-};
-
-struct AAPotentialValuesReturned
- : AAReturnedFromReturnedValues<AAPotentialValues, AAPotentialValuesImpl> {
- using Base =
- AAReturnedFromReturnedValues<AAPotentialValues, AAPotentialValuesImpl>;
- AAPotentialValuesReturned(const IRPosition &IRP, Attributor &A)
- : Base(IRP, A) {}
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override {
- STATS_DECLTRACK_FNRET_ATTR(potential_values)
- }
-};
-
-struct AAPotentialValuesFloating : AAPotentialValuesImpl {
- AAPotentialValuesFloating(const IRPosition &IRP, Attributor &A)
- : AAPotentialValuesImpl(IRP, A) {}
-
- /// See AbstractAttribute::initialize(..).
- void initialize(Attributor &A) override {
- Value &V = getAssociatedValue();
-
- if (auto *C = dyn_cast<ConstantInt>(&V)) {
- unionAssumed(C->getValue());
- indicateOptimisticFixpoint();
- return;
- }
-
- if (isa<UndefValue>(&V)) {
- unionAssumedWithUndef();
- indicateOptimisticFixpoint();
- return;
- }
-
- if (isa<BinaryOperator>(&V) || isa<ICmpInst>(&V) || isa<CastInst>(&V))
- return;
-
- if (isa<SelectInst>(V) || isa<PHINode>(V))
- return;
-
- indicatePessimisticFixpoint();
-
- LLVM_DEBUG(dbgs() << "[AAPotentialValues] We give up: "
- << getAssociatedValue() << "\n");
- }
-
- static bool calculateICmpInst(const ICmpInst *ICI, const APInt &LHS,
- const APInt &RHS) {
- ICmpInst::Predicate Pred = ICI->getPredicate();
- switch (Pred) {
- case ICmpInst::ICMP_UGT:
- return LHS.ugt(RHS);
- case ICmpInst::ICMP_SGT:
- return LHS.sgt(RHS);
- case ICmpInst::ICMP_EQ:
- return LHS.eq(RHS);
- case ICmpInst::ICMP_UGE:
- return LHS.uge(RHS);
- case ICmpInst::ICMP_SGE:
- return LHS.sge(RHS);
- case ICmpInst::ICMP_ULT:
- return LHS.ult(RHS);
- case ICmpInst::ICMP_SLT:
- return LHS.slt(RHS);
- case ICmpInst::ICMP_NE:
- return LHS.ne(RHS);
- case ICmpInst::ICMP_ULE:
- return LHS.ule(RHS);
- case ICmpInst::ICMP_SLE:
- return LHS.sle(RHS);
- default:
- llvm_unreachable("Invalid ICmp predicate!");
- }
- }
-
- static APInt calculateCastInst(const CastInst *CI, const APInt &Src,
- uint32_t ResultBitWidth) {
- Instruction::CastOps CastOp = CI->getOpcode();
- switch (CastOp) {
- default:
- llvm_unreachable("unsupported or not integer cast");
- case Instruction::Trunc:
- return Src.trunc(ResultBitWidth);
- case Instruction::SExt:
- return Src.sext(ResultBitWidth);
- case Instruction::ZExt:
- return Src.zext(ResultBitWidth);
- case Instruction::BitCast:
- return Src;
- }
- }
-
- static APInt calculateBinaryOperator(const BinaryOperator *BinOp,
- const APInt &LHS, const APInt &RHS,
- bool &SkipOperation, bool &Unsupported) {
- Instruction::BinaryOps BinOpcode = BinOp->getOpcode();
- // Unsupported is set to true when the binary operator is not supported.
- // SkipOperation is set to true when UB occur with the given operand pair
- // (LHS, RHS).
- // TODO: we should look at nsw and nuw keywords to handle operations
- // that create poison or undef value.
- switch (BinOpcode) {
- default:
- Unsupported = true;
- return LHS;
- case Instruction::Add:
- return LHS + RHS;
- case Instruction::Sub:
- return LHS - RHS;
- case Instruction::Mul:
- return LHS * RHS;
- case Instruction::UDiv:
- if (RHS.isNullValue()) {
- SkipOperation = true;
- return LHS;
- }
- return LHS.udiv(RHS);
- case Instruction::SDiv:
- if (RHS.isNullValue()) {
- SkipOperation = true;
- return LHS;
- }
- return LHS.sdiv(RHS);
- case Instruction::URem:
- if (RHS.isNullValue()) {
- SkipOperation = true;
- return LHS;
- }
- return LHS.urem(RHS);
- case Instruction::SRem:
- if (RHS.isNullValue()) {
- SkipOperation = true;
- return LHS;
- }
- return LHS.srem(RHS);
- case Instruction::Shl:
- return LHS.shl(RHS);
- case Instruction::LShr:
- return LHS.lshr(RHS);
- case Instruction::AShr:
- return LHS.ashr(RHS);
- case Instruction::And:
- return LHS & RHS;
- case Instruction::Or:
- return LHS | RHS;
- case Instruction::Xor:
- return LHS ^ RHS;
- }
- }
-
- bool calculateBinaryOperatorAndTakeUnion(const BinaryOperator *BinOp,
- const APInt &LHS, const APInt &RHS) {
- bool SkipOperation = false;
- bool Unsupported = false;
- APInt Result =
- calculateBinaryOperator(BinOp, LHS, RHS, SkipOperation, Unsupported);
- if (Unsupported)
- return false;
- // If SkipOperation is true, we can ignore this operand pair (L, R).
- if (!SkipOperation)
- unionAssumed(Result);
- return isValidState();
- }
-
- ChangeStatus updateWithICmpInst(Attributor &A, ICmpInst *ICI) {
- auto AssumedBefore = getAssumed();
- Value *LHS = ICI->getOperand(0);
- Value *RHS = ICI->getOperand(1);
- if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy())
- return indicatePessimisticFixpoint();
-
- auto &LHSAA = A.getAAFor<AAPotentialValues>(*this, IRPosition::value(*LHS));
- if (!LHSAA.isValidState())
- return indicatePessimisticFixpoint();
-
- auto &RHSAA = A.getAAFor<AAPotentialValues>(*this, IRPosition::value(*RHS));
- if (!RHSAA.isValidState())
- return indicatePessimisticFixpoint();
-
- const DenseSet<APInt> &LHSAAPVS = LHSAA.getAssumedSet();
- const DenseSet<APInt> &RHSAAPVS = RHSAA.getAssumedSet();
-
- // TODO: make use of undef flag to limit potential values aggressively.
- bool MaybeTrue = false, MaybeFalse = false;
- const APInt Zero(RHS->getType()->getIntegerBitWidth(), 0);
- if (LHSAA.undefIsContained() && RHSAA.undefIsContained()) {
- // The result of any comparison between undefs can be soundly replaced
- // with undef.
- unionAssumedWithUndef();
- } else if (LHSAA.undefIsContained()) {
- bool MaybeTrue = false, MaybeFalse = false;
- for (const APInt &R : RHSAAPVS) {
- bool CmpResult = calculateICmpInst(ICI, Zero, R);
- MaybeTrue |= CmpResult;
- MaybeFalse |= !CmpResult;
- if (MaybeTrue & MaybeFalse)
- return indicatePessimisticFixpoint();
- }
- } else if (RHSAA.undefIsContained()) {
- for (const APInt &L : LHSAAPVS) {
- bool CmpResult = calculateICmpInst(ICI, L, Zero);
- MaybeTrue |= CmpResult;
- MaybeFalse |= !CmpResult;
- if (MaybeTrue & MaybeFalse)
- return indicatePessimisticFixpoint();
- }
- } else {
- for (const APInt &L : LHSAAPVS) {
- for (const APInt &R : RHSAAPVS) {
- bool CmpResult = calculateICmpInst(ICI, L, R);
- MaybeTrue |= CmpResult;
- MaybeFalse |= !CmpResult;
- if (MaybeTrue & MaybeFalse)
- return indicatePessimisticFixpoint();
- }
- }
- }
- if (MaybeTrue)
- unionAssumed(APInt(/* numBits */ 1, /* val */ 1));
- if (MaybeFalse)
- unionAssumed(APInt(/* numBits */ 1, /* val */ 0));
- return AssumedBefore == getAssumed() ? ChangeStatus::UNCHANGED
- : ChangeStatus::CHANGED;
- }
-
- ChangeStatus updateWithSelectInst(Attributor &A, SelectInst *SI) {
- auto AssumedBefore = getAssumed();
- Value *LHS = SI->getTrueValue();
- Value *RHS = SI->getFalseValue();
- if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy())
- return indicatePessimisticFixpoint();
-
- // TODO: Use assumed simplified condition value
- auto &LHSAA = A.getAAFor<AAPotentialValues>(*this, IRPosition::value(*LHS));
- if (!LHSAA.isValidState())
- return indicatePessimisticFixpoint();
-
- auto &RHSAA = A.getAAFor<AAPotentialValues>(*this, IRPosition::value(*RHS));
- if (!RHSAA.isValidState())
- return indicatePessimisticFixpoint();
-
- if (LHSAA.undefIsContained() && RHSAA.undefIsContained())
- // select i1 *, undef , undef => undef
- unionAssumedWithUndef();
- else {
- unionAssumed(LHSAA);
- unionAssumed(RHSAA);
- }
- return AssumedBefore == getAssumed() ? ChangeStatus::UNCHANGED
- : ChangeStatus::CHANGED;
- }
-
- ChangeStatus updateWithCastInst(Attributor &A, CastInst *CI) {
- auto AssumedBefore = getAssumed();
- if (!CI->isIntegerCast())
- return indicatePessimisticFixpoint();
- assert(CI->getNumOperands() == 1 && "Expected cast to be unary!");
- uint32_t ResultBitWidth = CI->getDestTy()->getIntegerBitWidth();
- Value *Src = CI->getOperand(0);
- auto &SrcAA = A.getAAFor<AAPotentialValues>(*this, IRPosition::value(*Src));
- if (!SrcAA.isValidState())
- return indicatePessimisticFixpoint();
- const DenseSet<APInt> &SrcAAPVS = SrcAA.getAssumedSet();
- if (SrcAA.undefIsContained())
- unionAssumedWithUndef();
- else {
- for (const APInt &S : SrcAAPVS) {
- APInt T = calculateCastInst(CI, S, ResultBitWidth);
- unionAssumed(T);
- }
- }
- return AssumedBefore == getAssumed() ? ChangeStatus::UNCHANGED
- : ChangeStatus::CHANGED;
- }
-
- ChangeStatus updateWithBinaryOperator(Attributor &A, BinaryOperator *BinOp) {
- auto AssumedBefore = getAssumed();
- Value *LHS = BinOp->getOperand(0);
- Value *RHS = BinOp->getOperand(1);
- if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy())
- return indicatePessimisticFixpoint();
-
- auto &LHSAA = A.getAAFor<AAPotentialValues>(*this, IRPosition::value(*LHS));
- if (!LHSAA.isValidState())
- return indicatePessimisticFixpoint();
-
- auto &RHSAA = A.getAAFor<AAPotentialValues>(*this, IRPosition::value(*RHS));
- if (!RHSAA.isValidState())
- return indicatePessimisticFixpoint();
-
- const DenseSet<APInt> &LHSAAPVS = LHSAA.getAssumedSet();
- const DenseSet<APInt> &RHSAAPVS = RHSAA.getAssumedSet();
- const APInt Zero = APInt(LHS->getType()->getIntegerBitWidth(), 0);
-
- // TODO: make use of undef flag to limit potential values aggressively.
- if (LHSAA.undefIsContained() && RHSAA.undefIsContained()) {
- if (!calculateBinaryOperatorAndTakeUnion(BinOp, Zero, Zero))
- return indicatePessimisticFixpoint();
- } else if (LHSAA.undefIsContained()) {
- for (const APInt &R : RHSAAPVS) {
- if (!calculateBinaryOperatorAndTakeUnion(BinOp, Zero, R))
- return indicatePessimisticFixpoint();
- }
- } else if (RHSAA.undefIsContained()) {
- for (const APInt &L : LHSAAPVS) {
- if (!calculateBinaryOperatorAndTakeUnion(BinOp, L, Zero))
- return indicatePessimisticFixpoint();
- }
- } else {
- for (const APInt &L : LHSAAPVS) {
- for (const APInt &R : RHSAAPVS) {
- if (!calculateBinaryOperatorAndTakeUnion(BinOp, L, R))
- return indicatePessimisticFixpoint();
- }
- }
- }
- return AssumedBefore == getAssumed() ? ChangeStatus::UNCHANGED
- : ChangeStatus::CHANGED;
- }
-
- ChangeStatus updateWithPHINode(Attributor &A, PHINode *PHI) {
- auto AssumedBefore = getAssumed();
- for (unsigned u = 0, e = PHI->getNumIncomingValues(); u < e; u++) {
- Value *IncomingValue = PHI->getIncomingValue(u);
- auto &PotentialValuesAA = A.getAAFor<AAPotentialValues>(
- *this, IRPosition::value(*IncomingValue));
- if (!PotentialValuesAA.isValidState())
- return indicatePessimisticFixpoint();
- if (PotentialValuesAA.undefIsContained())
- unionAssumedWithUndef();
- else
- unionAssumed(PotentialValuesAA.getAssumed());
- }
- return AssumedBefore == getAssumed() ? ChangeStatus::UNCHANGED
- : ChangeStatus::CHANGED;
- }
-
- /// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override {
- Value &V = getAssociatedValue();
- Instruction *I = dyn_cast<Instruction>(&V);
-
- if (auto *ICI = dyn_cast<ICmpInst>(I))
- return updateWithICmpInst(A, ICI);
-
- if (auto *SI = dyn_cast<SelectInst>(I))
- return updateWithSelectInst(A, SI);
-
- if (auto *CI = dyn_cast<CastInst>(I))
- return updateWithCastInst(A, CI);
-
- if (auto *BinOp = dyn_cast<BinaryOperator>(I))
- return updateWithBinaryOperator(A, BinOp);
-
- if (auto *PHI = dyn_cast<PHINode>(I))
- return updateWithPHINode(A, PHI);
-
- return indicatePessimisticFixpoint();
- }
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override {
- STATS_DECLTRACK_FLOATING_ATTR(potential_values)
- }
-};
-
-struct AAPotentialValuesFunction : AAPotentialValuesImpl {
- AAPotentialValuesFunction(const IRPosition &IRP, Attributor &A)
- : AAPotentialValuesImpl(IRP, A) {}
-
- /// See AbstractAttribute::initialize(...).
- ChangeStatus updateImpl(Attributor &A) override {
- llvm_unreachable("AAPotentialValues(Function|CallSite)::updateImpl will "
- "not be called");
- }
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override {
- STATS_DECLTRACK_FN_ATTR(potential_values)
- }
-};
-
-struct AAPotentialValuesCallSite : AAPotentialValuesFunction {
- AAPotentialValuesCallSite(const IRPosition &IRP, Attributor &A)
- : AAPotentialValuesFunction(IRP, A) {}
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override {
- STATS_DECLTRACK_CS_ATTR(potential_values)
- }
-};
-
-struct AAPotentialValuesCallSiteReturned
- : AACallSiteReturnedFromReturned<AAPotentialValues, AAPotentialValuesImpl> {
- AAPotentialValuesCallSiteReturned(const IRPosition &IRP, Attributor &A)
- : AACallSiteReturnedFromReturned<AAPotentialValues,
- AAPotentialValuesImpl>(IRP, A) {}
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override {
- STATS_DECLTRACK_CSRET_ATTR(potential_values)
- }
-};
-
-struct AAPotentialValuesCallSiteArgument : AAPotentialValuesFloating {
- AAPotentialValuesCallSiteArgument(const IRPosition &IRP, Attributor &A)
- : AAPotentialValuesFloating(IRP, A) {}
-
- /// See AbstractAttribute::initialize(..).
- void initialize(Attributor &A) override {
- Value &V = getAssociatedValue();
-
- if (auto *C = dyn_cast<ConstantInt>(&V)) {
- unionAssumed(C->getValue());
- indicateOptimisticFixpoint();
- return;
- }
-
- if (isa<UndefValue>(&V)) {
- unionAssumedWithUndef();
- indicateOptimisticFixpoint();
- return;
- }
- }
-
- /// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override {
- Value &V = getAssociatedValue();
- auto AssumedBefore = getAssumed();
- auto &AA = A.getAAFor<AAPotentialValues>(*this, IRPosition::value(V));
- const auto &S = AA.getAssumed();
- unionAssumed(S);
- return AssumedBefore == getAssumed() ? ChangeStatus::UNCHANGED
- : ChangeStatus::CHANGED;
- }
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override {
- STATS_DECLTRACK_CSARG_ATTR(potential_values)
- }
-};
-
-/// ------------------------ NoUndef Attribute ---------------------------------
-struct AANoUndefImpl : AANoUndef {
- AANoUndefImpl(const IRPosition &IRP, Attributor &A) : AANoUndef(IRP, A) {}
-
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- if (getIRPosition().hasAttr({Attribute::NoUndef})) {
- indicateOptimisticFixpoint();
- return;
- }
- Value &V = getAssociatedValue();
- if (isa<UndefValue>(V))
- indicatePessimisticFixpoint();
- else if (isa<FreezeInst>(V))
- indicateOptimisticFixpoint();
- else if (getPositionKind() != IRPosition::IRP_RETURNED &&
- isGuaranteedNotToBeUndefOrPoison(&V))
- indicateOptimisticFixpoint();
- else
- AANoUndef::initialize(A);
- }
-
- /// See followUsesInMBEC
- bool followUseInMBEC(Attributor &A, const Use *U, const Instruction *I,
- AANoUndef::StateType &State) {
- const Value *UseV = U->get();
- const DominatorTree *DT = nullptr;
- AssumptionCache *AC = nullptr;
- InformationCache &InfoCache = A.getInfoCache();
- if (Function *F = getAnchorScope()) {
- DT = InfoCache.getAnalysisResultForFunction<DominatorTreeAnalysis>(*F);
- AC = InfoCache.getAnalysisResultForFunction<AssumptionAnalysis>(*F);
- }
- State.setKnown(isGuaranteedNotToBeUndefOrPoison(UseV, AC, I, DT));
- bool TrackUse = false;
- // Track use for instructions which must produce undef or poison bits when
- // at least one operand contains such bits.
- if (isa<CastInst>(*I) || isa<GetElementPtrInst>(*I))
- TrackUse = true;
- return TrackUse;
- }
-
- /// See AbstractAttribute::getAsStr().
- const std::string getAsStr() const override {
- return getAssumed() ? "noundef" : "may-undef-or-poison";
- }
-
- ChangeStatus manifest(Attributor &A) override {
- // We don't manifest noundef attribute for dead positions because the
- // associated values with dead positions would be replaced with undef
- // values.
- if (A.isAssumedDead(getIRPosition(), nullptr, nullptr))
- return ChangeStatus::UNCHANGED;
- // A position whose simplified value does not have any value is
- // considered to be dead. We don't manifest noundef in such positions for
- // the same reason above.
- auto &ValueSimplifyAA = A.getAAFor<AAValueSimplify>(
- *this, getIRPosition(), /* TrackDependence */ false);
- if (!ValueSimplifyAA.getAssumedSimplifiedValue(A).hasValue())
- return ChangeStatus::UNCHANGED;
- return AANoUndef::manifest(A);
- }
-};
-
-struct AANoUndefFloating : public AANoUndefImpl {
- AANoUndefFloating(const IRPosition &IRP, Attributor &A)
- : AANoUndefImpl(IRP, A) {}
-
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- AANoUndefImpl::initialize(A);
- if (!getState().isAtFixpoint())
- if (Instruction *CtxI = getCtxI())
- followUsesInMBEC(*this, A, getState(), *CtxI);
- }
-
- /// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override {
- auto VisitValueCB = [&](Value &V, const Instruction *CtxI,
- AANoUndef::StateType &T, bool Stripped) -> bool {
- const auto &AA = A.getAAFor<AANoUndef>(*this, IRPosition::value(V));
- if (!Stripped && this == &AA) {
- T.indicatePessimisticFixpoint();
- } else {
- const AANoUndef::StateType &S =
- static_cast<const AANoUndef::StateType &>(AA.getState());
- T ^= S;
- }
- return T.isValidState();
- };
-
- StateType T;
- if (!genericValueTraversal<AANoUndef, StateType>(
- A, getIRPosition(), *this, T, VisitValueCB, getCtxI()))
- return indicatePessimisticFixpoint();
-
- return clampStateAndIndicateChange(getState(), T);
- }
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(noundef) }
-};
-
-struct AANoUndefReturned final
- : AAReturnedFromReturnedValues<AANoUndef, AANoUndefImpl> {
- AANoUndefReturned(const IRPosition &IRP, Attributor &A)
- : AAReturnedFromReturnedValues<AANoUndef, AANoUndefImpl>(IRP, A) {}
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(noundef) }
-};
-
-struct AANoUndefArgument final
- : AAArgumentFromCallSiteArguments<AANoUndef, AANoUndefImpl> {
- AANoUndefArgument(const IRPosition &IRP, Attributor &A)
- : AAArgumentFromCallSiteArguments<AANoUndef, AANoUndefImpl>(IRP, A) {}
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(noundef) }
-};
-
-struct AANoUndefCallSiteArgument final : AANoUndefFloating {
- AANoUndefCallSiteArgument(const IRPosition &IRP, Attributor &A)
- : AANoUndefFloating(IRP, A) {}
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override { STATS_DECLTRACK_CSARG_ATTR(noundef) }
-};
-
-struct AANoUndefCallSiteReturned final
- : AACallSiteReturnedFromReturned<AANoUndef, AANoUndefImpl> {
- AANoUndefCallSiteReturned(const IRPosition &IRP, Attributor &A)
- : AACallSiteReturnedFromReturned<AANoUndef, AANoUndefImpl>(IRP, A) {}
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override { STATS_DECLTRACK_CSRET_ATTR(noundef) }
-};
+
+/// ------------------ Potential Values Attribute -------------------------
+
+struct AAPotentialValuesImpl : AAPotentialValues {
+ using StateType = PotentialConstantIntValuesState;
+
+ AAPotentialValuesImpl(const IRPosition &IRP, Attributor &A)
+ : AAPotentialValues(IRP, A) {}
+
+ /// See AbstractAttribute::getAsStr().
+ const std::string getAsStr() const override {
+ std::string Str;
+ llvm::raw_string_ostream OS(Str);
+ OS << getState();
+ return OS.str();
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ return indicatePessimisticFixpoint();
+ }
+};
+
+struct AAPotentialValuesArgument final
+ : AAArgumentFromCallSiteArguments<AAPotentialValues, AAPotentialValuesImpl,
+ PotentialConstantIntValuesState> {
+ using Base =
+ AAArgumentFromCallSiteArguments<AAPotentialValues, AAPotentialValuesImpl,
+ PotentialConstantIntValuesState>;
+ AAPotentialValuesArgument(const IRPosition &IRP, Attributor &A)
+ : Base(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(..).
+ void initialize(Attributor &A) override {
+ if (!getAnchorScope() || getAnchorScope()->isDeclaration()) {
+ indicatePessimisticFixpoint();
+ } else {
+ Base::initialize(A);
+ }
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_ARG_ATTR(potential_values)
+ }
+};
+
+struct AAPotentialValuesReturned
+ : AAReturnedFromReturnedValues<AAPotentialValues, AAPotentialValuesImpl> {
+ using Base =
+ AAReturnedFromReturnedValues<AAPotentialValues, AAPotentialValuesImpl>;
+ AAPotentialValuesReturned(const IRPosition &IRP, Attributor &A)
+ : Base(IRP, A) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_FNRET_ATTR(potential_values)
+ }
+};
+
+struct AAPotentialValuesFloating : AAPotentialValuesImpl {
+ AAPotentialValuesFloating(const IRPosition &IRP, Attributor &A)
+ : AAPotentialValuesImpl(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(..).
+ void initialize(Attributor &A) override {
+ Value &V = getAssociatedValue();
+
+ if (auto *C = dyn_cast<ConstantInt>(&V)) {
+ unionAssumed(C->getValue());
+ indicateOptimisticFixpoint();
+ return;
+ }
+
+ if (isa<UndefValue>(&V)) {
+ unionAssumedWithUndef();
+ indicateOptimisticFixpoint();
+ return;
+ }
+
+ if (isa<BinaryOperator>(&V) || isa<ICmpInst>(&V) || isa<CastInst>(&V))
+ return;
+
+ if (isa<SelectInst>(V) || isa<PHINode>(V))
+ return;
+
+ indicatePessimisticFixpoint();
+
+ LLVM_DEBUG(dbgs() << "[AAPotentialValues] We give up: "
+ << getAssociatedValue() << "\n");
+ }
+
+ static bool calculateICmpInst(const ICmpInst *ICI, const APInt &LHS,
+ const APInt &RHS) {
+ ICmpInst::Predicate Pred = ICI->getPredicate();
+ switch (Pred) {
+ case ICmpInst::ICMP_UGT:
+ return LHS.ugt(RHS);
+ case ICmpInst::ICMP_SGT:
+ return LHS.sgt(RHS);
+ case ICmpInst::ICMP_EQ:
+ return LHS.eq(RHS);
+ case ICmpInst::ICMP_UGE:
+ return LHS.uge(RHS);
+ case ICmpInst::ICMP_SGE:
+ return LHS.sge(RHS);
+ case ICmpInst::ICMP_ULT:
+ return LHS.ult(RHS);
+ case ICmpInst::ICMP_SLT:
+ return LHS.slt(RHS);
+ case ICmpInst::ICMP_NE:
+ return LHS.ne(RHS);
+ case ICmpInst::ICMP_ULE:
+ return LHS.ule(RHS);
+ case ICmpInst::ICMP_SLE:
+ return LHS.sle(RHS);
+ default:
+ llvm_unreachable("Invalid ICmp predicate!");
+ }
+ }
+
+ static APInt calculateCastInst(const CastInst *CI, const APInt &Src,
+ uint32_t ResultBitWidth) {
+ Instruction::CastOps CastOp = CI->getOpcode();
+ switch (CastOp) {
+ default:
+ llvm_unreachable("unsupported or not integer cast");
+ case Instruction::Trunc:
+ return Src.trunc(ResultBitWidth);
+ case Instruction::SExt:
+ return Src.sext(ResultBitWidth);
+ case Instruction::ZExt:
+ return Src.zext(ResultBitWidth);
+ case Instruction::BitCast:
+ return Src;
+ }
+ }
+
+ static APInt calculateBinaryOperator(const BinaryOperator *BinOp,
+ const APInt &LHS, const APInt &RHS,
+ bool &SkipOperation, bool &Unsupported) {
+ Instruction::BinaryOps BinOpcode = BinOp->getOpcode();
+ // Unsupported is set to true when the binary operator is not supported.
+ // SkipOperation is set to true when UB occur with the given operand pair
+ // (LHS, RHS).
+ // TODO: we should look at nsw and nuw keywords to handle operations
+ // that create poison or undef value.
+ switch (BinOpcode) {
+ default:
+ Unsupported = true;
+ return LHS;
+ case Instruction::Add:
+ return LHS + RHS;
+ case Instruction::Sub:
+ return LHS - RHS;
+ case Instruction::Mul:
+ return LHS * RHS;
+ case Instruction::UDiv:
+ if (RHS.isNullValue()) {
+ SkipOperation = true;
+ return LHS;
+ }
+ return LHS.udiv(RHS);
+ case Instruction::SDiv:
+ if (RHS.isNullValue()) {
+ SkipOperation = true;
+ return LHS;
+ }
+ return LHS.sdiv(RHS);
+ case Instruction::URem:
+ if (RHS.isNullValue()) {
+ SkipOperation = true;
+ return LHS;
+ }
+ return LHS.urem(RHS);
+ case Instruction::SRem:
+ if (RHS.isNullValue()) {
+ SkipOperation = true;
+ return LHS;
+ }
+ return LHS.srem(RHS);
+ case Instruction::Shl:
+ return LHS.shl(RHS);
+ case Instruction::LShr:
+ return LHS.lshr(RHS);
+ case Instruction::AShr:
+ return LHS.ashr(RHS);
+ case Instruction::And:
+ return LHS & RHS;
+ case Instruction::Or:
+ return LHS | RHS;
+ case Instruction::Xor:
+ return LHS ^ RHS;
+ }
+ }
+
+ bool calculateBinaryOperatorAndTakeUnion(const BinaryOperator *BinOp,
+ const APInt &LHS, const APInt &RHS) {
+ bool SkipOperation = false;
+ bool Unsupported = false;
+ APInt Result =
+ calculateBinaryOperator(BinOp, LHS, RHS, SkipOperation, Unsupported);
+ if (Unsupported)
+ return false;
+ // If SkipOperation is true, we can ignore this operand pair (L, R).
+ if (!SkipOperation)
+ unionAssumed(Result);
+ return isValidState();
+ }
+
+ ChangeStatus updateWithICmpInst(Attributor &A, ICmpInst *ICI) {
+ auto AssumedBefore = getAssumed();
+ Value *LHS = ICI->getOperand(0);
+ Value *RHS = ICI->getOperand(1);
+ if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy())
+ return indicatePessimisticFixpoint();
+
+ auto &LHSAA = A.getAAFor<AAPotentialValues>(*this, IRPosition::value(*LHS));
+ if (!LHSAA.isValidState())
+ return indicatePessimisticFixpoint();
+
+ auto &RHSAA = A.getAAFor<AAPotentialValues>(*this, IRPosition::value(*RHS));
+ if (!RHSAA.isValidState())
+ return indicatePessimisticFixpoint();
+
+ const DenseSet<APInt> &LHSAAPVS = LHSAA.getAssumedSet();
+ const DenseSet<APInt> &RHSAAPVS = RHSAA.getAssumedSet();
+
+ // TODO: make use of undef flag to limit potential values aggressively.
+ bool MaybeTrue = false, MaybeFalse = false;
+ const APInt Zero(RHS->getType()->getIntegerBitWidth(), 0);
+ if (LHSAA.undefIsContained() && RHSAA.undefIsContained()) {
+ // The result of any comparison between undefs can be soundly replaced
+ // with undef.
+ unionAssumedWithUndef();
+ } else if (LHSAA.undefIsContained()) {
+ bool MaybeTrue = false, MaybeFalse = false;
+ for (const APInt &R : RHSAAPVS) {
+ bool CmpResult = calculateICmpInst(ICI, Zero, R);
+ MaybeTrue |= CmpResult;
+ MaybeFalse |= !CmpResult;
+ if (MaybeTrue & MaybeFalse)
+ return indicatePessimisticFixpoint();
+ }
+ } else if (RHSAA.undefIsContained()) {
+ for (const APInt &L : LHSAAPVS) {
+ bool CmpResult = calculateICmpInst(ICI, L, Zero);
+ MaybeTrue |= CmpResult;
+ MaybeFalse |= !CmpResult;
+ if (MaybeTrue & MaybeFalse)
+ return indicatePessimisticFixpoint();
+ }
+ } else {
+ for (const APInt &L : LHSAAPVS) {
+ for (const APInt &R : RHSAAPVS) {
+ bool CmpResult = calculateICmpInst(ICI, L, R);
+ MaybeTrue |= CmpResult;
+ MaybeFalse |= !CmpResult;
+ if (MaybeTrue & MaybeFalse)
+ return indicatePessimisticFixpoint();
+ }
+ }
+ }
+ if (MaybeTrue)
+ unionAssumed(APInt(/* numBits */ 1, /* val */ 1));
+ if (MaybeFalse)
+ unionAssumed(APInt(/* numBits */ 1, /* val */ 0));
+ return AssumedBefore == getAssumed() ? ChangeStatus::UNCHANGED
+ : ChangeStatus::CHANGED;
+ }
+
+ ChangeStatus updateWithSelectInst(Attributor &A, SelectInst *SI) {
+ auto AssumedBefore = getAssumed();
+ Value *LHS = SI->getTrueValue();
+ Value *RHS = SI->getFalseValue();
+ if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy())
+ return indicatePessimisticFixpoint();
+
+ // TODO: Use assumed simplified condition value
+ auto &LHSAA = A.getAAFor<AAPotentialValues>(*this, IRPosition::value(*LHS));
+ if (!LHSAA.isValidState())
+ return indicatePessimisticFixpoint();
+
+ auto &RHSAA = A.getAAFor<AAPotentialValues>(*this, IRPosition::value(*RHS));
+ if (!RHSAA.isValidState())
+ return indicatePessimisticFixpoint();
+
+ if (LHSAA.undefIsContained() && RHSAA.undefIsContained())
+ // select i1 *, undef , undef => undef
+ unionAssumedWithUndef();
+ else {
+ unionAssumed(LHSAA);
+ unionAssumed(RHSAA);
+ }
+ return AssumedBefore == getAssumed() ? ChangeStatus::UNCHANGED
+ : ChangeStatus::CHANGED;
+ }
+
+ ChangeStatus updateWithCastInst(Attributor &A, CastInst *CI) {
+ auto AssumedBefore = getAssumed();
+ if (!CI->isIntegerCast())
+ return indicatePessimisticFixpoint();
+ assert(CI->getNumOperands() == 1 && "Expected cast to be unary!");
+ uint32_t ResultBitWidth = CI->getDestTy()->getIntegerBitWidth();
+ Value *Src = CI->getOperand(0);
+ auto &SrcAA = A.getAAFor<AAPotentialValues>(*this, IRPosition::value(*Src));
+ if (!SrcAA.isValidState())
+ return indicatePessimisticFixpoint();
+ const DenseSet<APInt> &SrcAAPVS = SrcAA.getAssumedSet();
+ if (SrcAA.undefIsContained())
+ unionAssumedWithUndef();
+ else {
+ for (const APInt &S : SrcAAPVS) {
+ APInt T = calculateCastInst(CI, S, ResultBitWidth);
+ unionAssumed(T);
+ }
+ }
+ return AssumedBefore == getAssumed() ? ChangeStatus::UNCHANGED
+ : ChangeStatus::CHANGED;
+ }
+
+ ChangeStatus updateWithBinaryOperator(Attributor &A, BinaryOperator *BinOp) {
+ auto AssumedBefore = getAssumed();
+ Value *LHS = BinOp->getOperand(0);
+ Value *RHS = BinOp->getOperand(1);
+ if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy())
+ return indicatePessimisticFixpoint();
+
+ auto &LHSAA = A.getAAFor<AAPotentialValues>(*this, IRPosition::value(*LHS));
+ if (!LHSAA.isValidState())
+ return indicatePessimisticFixpoint();
+
+ auto &RHSAA = A.getAAFor<AAPotentialValues>(*this, IRPosition::value(*RHS));
+ if (!RHSAA.isValidState())
+ return indicatePessimisticFixpoint();
+
+ const DenseSet<APInt> &LHSAAPVS = LHSAA.getAssumedSet();
+ const DenseSet<APInt> &RHSAAPVS = RHSAA.getAssumedSet();
+ const APInt Zero = APInt(LHS->getType()->getIntegerBitWidth(), 0);
+
+ // TODO: make use of undef flag to limit potential values aggressively.
+ if (LHSAA.undefIsContained() && RHSAA.undefIsContained()) {
+ if (!calculateBinaryOperatorAndTakeUnion(BinOp, Zero, Zero))
+ return indicatePessimisticFixpoint();
+ } else if (LHSAA.undefIsContained()) {
+ for (const APInt &R : RHSAAPVS) {
+ if (!calculateBinaryOperatorAndTakeUnion(BinOp, Zero, R))
+ return indicatePessimisticFixpoint();
+ }
+ } else if (RHSAA.undefIsContained()) {
+ for (const APInt &L : LHSAAPVS) {
+ if (!calculateBinaryOperatorAndTakeUnion(BinOp, L, Zero))
+ return indicatePessimisticFixpoint();
+ }
+ } else {
+ for (const APInt &L : LHSAAPVS) {
+ for (const APInt &R : RHSAAPVS) {
+ if (!calculateBinaryOperatorAndTakeUnion(BinOp, L, R))
+ return indicatePessimisticFixpoint();
+ }
+ }
+ }
+ return AssumedBefore == getAssumed() ? ChangeStatus::UNCHANGED
+ : ChangeStatus::CHANGED;
+ }
+
+ ChangeStatus updateWithPHINode(Attributor &A, PHINode *PHI) {
+ auto AssumedBefore = getAssumed();
+ for (unsigned u = 0, e = PHI->getNumIncomingValues(); u < e; u++) {
+ Value *IncomingValue = PHI->getIncomingValue(u);
+ auto &PotentialValuesAA = A.getAAFor<AAPotentialValues>(
+ *this, IRPosition::value(*IncomingValue));
+ if (!PotentialValuesAA.isValidState())
+ return indicatePessimisticFixpoint();
+ if (PotentialValuesAA.undefIsContained())
+ unionAssumedWithUndef();
+ else
+ unionAssumed(PotentialValuesAA.getAssumed());
+ }
+ return AssumedBefore == getAssumed() ? ChangeStatus::UNCHANGED
+ : ChangeStatus::CHANGED;
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ Value &V = getAssociatedValue();
+ Instruction *I = dyn_cast<Instruction>(&V);
+
+ if (auto *ICI = dyn_cast<ICmpInst>(I))
+ return updateWithICmpInst(A, ICI);
+
+ if (auto *SI = dyn_cast<SelectInst>(I))
+ return updateWithSelectInst(A, SI);
+
+ if (auto *CI = dyn_cast<CastInst>(I))
+ return updateWithCastInst(A, CI);
+
+ if (auto *BinOp = dyn_cast<BinaryOperator>(I))
+ return updateWithBinaryOperator(A, BinOp);
+
+ if (auto *PHI = dyn_cast<PHINode>(I))
+ return updateWithPHINode(A, PHI);
+
+ return indicatePessimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_FLOATING_ATTR(potential_values)
+ }
+};
+
+struct AAPotentialValuesFunction : AAPotentialValuesImpl {
+ AAPotentialValuesFunction(const IRPosition &IRP, Attributor &A)
+ : AAPotentialValuesImpl(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ llvm_unreachable("AAPotentialValues(Function|CallSite)::updateImpl will "
+ "not be called");
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_FN_ATTR(potential_values)
+ }
+};
+
+struct AAPotentialValuesCallSite : AAPotentialValuesFunction {
+ AAPotentialValuesCallSite(const IRPosition &IRP, Attributor &A)
+ : AAPotentialValuesFunction(IRP, A) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_CS_ATTR(potential_values)
+ }
+};
+
+struct AAPotentialValuesCallSiteReturned
+ : AACallSiteReturnedFromReturned<AAPotentialValues, AAPotentialValuesImpl> {
+ AAPotentialValuesCallSiteReturned(const IRPosition &IRP, Attributor &A)
+ : AACallSiteReturnedFromReturned<AAPotentialValues,
+ AAPotentialValuesImpl>(IRP, A) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_CSRET_ATTR(potential_values)
+ }
+};
+
+struct AAPotentialValuesCallSiteArgument : AAPotentialValuesFloating {
+ AAPotentialValuesCallSiteArgument(const IRPosition &IRP, Attributor &A)
+ : AAPotentialValuesFloating(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(..).
+ void initialize(Attributor &A) override {
+ Value &V = getAssociatedValue();
+
+ if (auto *C = dyn_cast<ConstantInt>(&V)) {
+ unionAssumed(C->getValue());
+ indicateOptimisticFixpoint();
+ return;
+ }
+
+ if (isa<UndefValue>(&V)) {
+ unionAssumedWithUndef();
+ indicateOptimisticFixpoint();
+ return;
+ }
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ Value &V = getAssociatedValue();
+ auto AssumedBefore = getAssumed();
+ auto &AA = A.getAAFor<AAPotentialValues>(*this, IRPosition::value(V));
+ const auto &S = AA.getAssumed();
+ unionAssumed(S);
+ return AssumedBefore == getAssumed() ? ChangeStatus::UNCHANGED
+ : ChangeStatus::CHANGED;
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_CSARG_ATTR(potential_values)
+ }
+};
+
+/// ------------------------ NoUndef Attribute ---------------------------------
+struct AANoUndefImpl : AANoUndef {
+ AANoUndefImpl(const IRPosition &IRP, Attributor &A) : AANoUndef(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ if (getIRPosition().hasAttr({Attribute::NoUndef})) {
+ indicateOptimisticFixpoint();
+ return;
+ }
+ Value &V = getAssociatedValue();
+ if (isa<UndefValue>(V))
+ indicatePessimisticFixpoint();
+ else if (isa<FreezeInst>(V))
+ indicateOptimisticFixpoint();
+ else if (getPositionKind() != IRPosition::IRP_RETURNED &&
+ isGuaranteedNotToBeUndefOrPoison(&V))
+ indicateOptimisticFixpoint();
+ else
+ AANoUndef::initialize(A);
+ }
+
+ /// See followUsesInMBEC
+ bool followUseInMBEC(Attributor &A, const Use *U, const Instruction *I,
+ AANoUndef::StateType &State) {
+ const Value *UseV = U->get();
+ const DominatorTree *DT = nullptr;
+ AssumptionCache *AC = nullptr;
+ InformationCache &InfoCache = A.getInfoCache();
+ if (Function *F = getAnchorScope()) {
+ DT = InfoCache.getAnalysisResultForFunction<DominatorTreeAnalysis>(*F);
+ AC = InfoCache.getAnalysisResultForFunction<AssumptionAnalysis>(*F);
+ }
+ State.setKnown(isGuaranteedNotToBeUndefOrPoison(UseV, AC, I, DT));
+ bool TrackUse = false;
+ // Track use for instructions which must produce undef or poison bits when
+ // at least one operand contains such bits.
+ if (isa<CastInst>(*I) || isa<GetElementPtrInst>(*I))
+ TrackUse = true;
+ return TrackUse;
+ }
+
+ /// See AbstractAttribute::getAsStr().
+ const std::string getAsStr() const override {
+ return getAssumed() ? "noundef" : "may-undef-or-poison";
+ }
+
+ ChangeStatus manifest(Attributor &A) override {
+ // We don't manifest noundef attribute for dead positions because the
+ // associated values with dead positions would be replaced with undef
+ // values.
+ if (A.isAssumedDead(getIRPosition(), nullptr, nullptr))
+ return ChangeStatus::UNCHANGED;
+ // A position whose simplified value does not have any value is
+ // considered to be dead. We don't manifest noundef in such positions for
+ // the same reason above.
+ auto &ValueSimplifyAA = A.getAAFor<AAValueSimplify>(
+ *this, getIRPosition(), /* TrackDependence */ false);
+ if (!ValueSimplifyAA.getAssumedSimplifiedValue(A).hasValue())
+ return ChangeStatus::UNCHANGED;
+ return AANoUndef::manifest(A);
+ }
+};
+
+struct AANoUndefFloating : public AANoUndefImpl {
+ AANoUndefFloating(const IRPosition &IRP, Attributor &A)
+ : AANoUndefImpl(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ AANoUndefImpl::initialize(A);
+ if (!getState().isAtFixpoint())
+ if (Instruction *CtxI = getCtxI())
+ followUsesInMBEC(*this, A, getState(), *CtxI);
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ auto VisitValueCB = [&](Value &V, const Instruction *CtxI,
+ AANoUndef::StateType &T, bool Stripped) -> bool {
+ const auto &AA = A.getAAFor<AANoUndef>(*this, IRPosition::value(V));
+ if (!Stripped && this == &AA) {
+ T.indicatePessimisticFixpoint();
+ } else {
+ const AANoUndef::StateType &S =
+ static_cast<const AANoUndef::StateType &>(AA.getState());
+ T ^= S;
+ }
+ return T.isValidState();
+ };
+
+ StateType T;
+ if (!genericValueTraversal<AANoUndef, StateType>(
+ A, getIRPosition(), *this, T, VisitValueCB, getCtxI()))
+ return indicatePessimisticFixpoint();
+
+ return clampStateAndIndicateChange(getState(), T);
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(noundef) }
+};
+
+struct AANoUndefReturned final
+ : AAReturnedFromReturnedValues<AANoUndef, AANoUndefImpl> {
+ AANoUndefReturned(const IRPosition &IRP, Attributor &A)
+ : AAReturnedFromReturnedValues<AANoUndef, AANoUndefImpl>(IRP, A) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(noundef) }
+};
+
+struct AANoUndefArgument final
+ : AAArgumentFromCallSiteArguments<AANoUndef, AANoUndefImpl> {
+ AANoUndefArgument(const IRPosition &IRP, Attributor &A)
+ : AAArgumentFromCallSiteArguments<AANoUndef, AANoUndefImpl>(IRP, A) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(noundef) }
+};
+
+struct AANoUndefCallSiteArgument final : AANoUndefFloating {
+ AANoUndefCallSiteArgument(const IRPosition &IRP, Attributor &A)
+ : AANoUndefFloating(IRP, A) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_CSARG_ATTR(noundef) }
+};
+
+struct AANoUndefCallSiteReturned final
+ : AACallSiteReturnedFromReturned<AANoUndef, AANoUndefImpl> {
+ AANoUndefCallSiteReturned(const IRPosition &IRP, Attributor &A)
+ : AACallSiteReturnedFromReturned<AANoUndef, AANoUndefImpl>(IRP, A) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_CSRET_ATTR(noundef) }
+};
} // namespace
const char AAReturnedValues::ID = 0;
@@ -8048,8 +8048,8 @@ const char AAPrivatizablePtr::ID = 0;
const char AAMemoryBehavior::ID = 0;
const char AAMemoryLocation::ID = 0;
const char AAValueConstantRange::ID = 0;
-const char AAPotentialValues::ID = 0;
-const char AANoUndef::ID = 0;
+const char AAPotentialValues::ID = 0;
+const char AANoUndef::ID = 0;
// Macro magic to create the static generator function for attributes that
// follow the naming scheme.
@@ -8159,8 +8159,8 @@ CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AADereferenceable)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAlign)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoCapture)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAValueConstantRange)
-CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPotentialValues)
-CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoUndef)
+CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPotentialValues)
+CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoUndef)
CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAValueSimplify)
CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAIsDead)
diff --git a/contrib/libs/llvm12/lib/Transforms/IPO/BlockExtractor.cpp b/contrib/libs/llvm12/lib/Transforms/IPO/BlockExtractor.cpp
index c6e222a096..084a7af446 100644
--- a/contrib/libs/llvm12/lib/Transforms/IPO/BlockExtractor.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/IPO/BlockExtractor.cpp
@@ -11,12 +11,12 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/IPO/BlockExtractor.h"
+#include "llvm/Transforms/IPO/BlockExtractor.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
-#include "llvm/IR/PassManager.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
@@ -40,10 +40,10 @@ cl::opt<bool> BlockExtractorEraseFuncs("extract-blocks-erase-funcs",
cl::desc("Erase the existing functions"),
cl::Hidden);
namespace {
-class BlockExtractor {
-public:
- BlockExtractor(bool EraseFunctions) : EraseFunctions(EraseFunctions) {}
- bool runOnModule(Module &M);
+class BlockExtractor {
+public:
+ BlockExtractor(bool EraseFunctions) : EraseFunctions(EraseFunctions) {}
+ bool runOnModule(Module &M);
void init(const SmallVectorImpl<SmallVector<BasicBlock *, 16>>
&GroupsOfBlocksToExtract) {
for (const SmallVectorImpl<BasicBlock *> &GroupOfBlocks :
@@ -56,26 +56,26 @@ public:
loadFile();
}
-private:
- SmallVector<SmallVector<BasicBlock *, 16>, 4> GroupsOfBlocks;
- bool EraseFunctions;
- /// Map a function name to groups of blocks.
- SmallVector<std::pair<std::string, SmallVector<std::string, 4>>, 4>
- BlocksByName;
-
- void loadFile();
- void splitLandingPadPreds(Function &F);
-};
-
-class BlockExtractorLegacyPass : public ModulePass {
- BlockExtractor BE;
- bool runOnModule(Module &M) override;
-
+private:
+ SmallVector<SmallVector<BasicBlock *, 16>, 4> GroupsOfBlocks;
+ bool EraseFunctions;
+ /// Map a function name to groups of blocks.
+ SmallVector<std::pair<std::string, SmallVector<std::string, 4>>, 4>
+ BlocksByName;
+
+ void loadFile();
+ void splitLandingPadPreds(Function &F);
+};
+
+class BlockExtractorLegacyPass : public ModulePass {
+ BlockExtractor BE;
+ bool runOnModule(Module &M) override;
+
public:
static char ID;
- BlockExtractorLegacyPass(const SmallVectorImpl<BasicBlock *> &BlocksToExtract,
- bool EraseFunctions)
- : ModulePass(ID), BE(EraseFunctions) {
+ BlockExtractorLegacyPass(const SmallVectorImpl<BasicBlock *> &BlocksToExtract,
+ bool EraseFunctions)
+ : ModulePass(ID), BE(EraseFunctions) {
// We want one group per element of the input list.
SmallVector<SmallVector<BasicBlock *, 16>, 4> MassagedGroupsOfBlocks;
for (BasicBlock *BB : BlocksToExtract) {
@@ -83,38 +83,38 @@ public:
NewGroup.push_back(BB);
MassagedGroupsOfBlocks.push_back(NewGroup);
}
- BE.init(MassagedGroupsOfBlocks);
+ BE.init(MassagedGroupsOfBlocks);
}
- BlockExtractorLegacyPass(const SmallVectorImpl<SmallVector<BasicBlock *, 16>>
- &GroupsOfBlocksToExtract,
- bool EraseFunctions)
- : ModulePass(ID), BE(EraseFunctions) {
- BE.init(GroupsOfBlocksToExtract);
+ BlockExtractorLegacyPass(const SmallVectorImpl<SmallVector<BasicBlock *, 16>>
+ &GroupsOfBlocksToExtract,
+ bool EraseFunctions)
+ : ModulePass(ID), BE(EraseFunctions) {
+ BE.init(GroupsOfBlocksToExtract);
}
- BlockExtractorLegacyPass()
- : BlockExtractorLegacyPass(SmallVector<BasicBlock *, 0>(), false) {}
-};
+ BlockExtractorLegacyPass()
+ : BlockExtractorLegacyPass(SmallVector<BasicBlock *, 0>(), false) {}
+};
} // end anonymous namespace
-char BlockExtractorLegacyPass::ID = 0;
-INITIALIZE_PASS(BlockExtractorLegacyPass, "extract-blocks",
+char BlockExtractorLegacyPass::ID = 0;
+INITIALIZE_PASS(BlockExtractorLegacyPass, "extract-blocks",
"Extract basic blocks from module", false, false)
-ModulePass *llvm::createBlockExtractorPass() {
- return new BlockExtractorLegacyPass();
-}
+ModulePass *llvm::createBlockExtractorPass() {
+ return new BlockExtractorLegacyPass();
+}
ModulePass *llvm::createBlockExtractorPass(
const SmallVectorImpl<BasicBlock *> &BlocksToExtract, bool EraseFunctions) {
- return new BlockExtractorLegacyPass(BlocksToExtract, EraseFunctions);
+ return new BlockExtractorLegacyPass(BlocksToExtract, EraseFunctions);
}
ModulePass *llvm::createBlockExtractorPass(
const SmallVectorImpl<SmallVector<BasicBlock *, 16>>
&GroupsOfBlocksToExtract,
bool EraseFunctions) {
- return new BlockExtractorLegacyPass(GroupsOfBlocksToExtract, EraseFunctions);
+ return new BlockExtractorLegacyPass(GroupsOfBlocksToExtract, EraseFunctions);
}
/// Gets all of the blocks specified in the input file.
@@ -246,15 +246,15 @@ bool BlockExtractor::runOnModule(Module &M) {
return Changed;
}
-
-bool BlockExtractorLegacyPass::runOnModule(Module &M) {
- return BE.runOnModule(M);
-}
-
-PreservedAnalyses BlockExtractorPass::run(Module &M,
- ModuleAnalysisManager &AM) {
- BlockExtractor BE(false);
- BE.init(SmallVector<SmallVector<BasicBlock *, 16>, 0>());
- return BE.runOnModule(M) ? PreservedAnalyses::none()
- : PreservedAnalyses::all();
-}
+
+bool BlockExtractorLegacyPass::runOnModule(Module &M) {
+ return BE.runOnModule(M);
+}
+
+PreservedAnalyses BlockExtractorPass::run(Module &M,
+ ModuleAnalysisManager &AM) {
+ BlockExtractor BE(false);
+ BE.init(SmallVector<SmallVector<BasicBlock *, 16>, 0>());
+ return BE.runOnModule(M) ? PreservedAnalyses::none()
+ : PreservedAnalyses::all();
+}
diff --git a/contrib/libs/llvm12/lib/Transforms/IPO/ConstantMerge.cpp b/contrib/libs/llvm12/lib/Transforms/IPO/ConstantMerge.cpp
index 8e81f4bad4..60e611dab8 100644
--- a/contrib/libs/llvm12/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/IPO/ConstantMerge.cpp
@@ -95,8 +95,8 @@ isUnmergeableGlobal(GlobalVariable *GV,
// Only process constants with initializers in the default address space.
return !GV->isConstant() || !GV->hasDefinitiveInitializer() ||
GV->getType()->getAddressSpace() != 0 || GV->hasSection() ||
- // Don't touch thread-local variables.
- GV->isThreadLocal() ||
+ // Don't touch thread-local variables.
+ GV->isThreadLocal() ||
// Don't touch values marked with attribute(used).
UsedGlobals.count(GV);
}
diff --git a/contrib/libs/llvm12/lib/Transforms/IPO/DeadArgumentElimination.cpp b/contrib/libs/llvm12/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 0b763e423f..8eaff1862d 100644
--- a/contrib/libs/llvm12/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -289,7 +289,7 @@ bool DeadArgumentEliminationPass::RemoveDeadArgumentsFromCallers(Function &Fn) {
for (Argument &Arg : Fn.args()) {
if (!Arg.hasSwiftErrorAttr() && Arg.use_empty() &&
- !Arg.hasPassPointeeByValueCopyAttr()) {
+ !Arg.hasPassPointeeByValueCopyAttr()) {
if (Arg.isUsedByMetadata()) {
Arg.replaceAllUsesWith(UndefValue::get(Arg.getType()));
Changed = true;
diff --git a/contrib/libs/llvm12/lib/Transforms/IPO/ForceFunctionAttrs.cpp b/contrib/libs/llvm12/lib/Transforms/IPO/ForceFunctionAttrs.cpp
index 1a8bb225a6..39f643632e 100644
--- a/contrib/libs/llvm12/lib/Transforms/IPO/ForceFunctionAttrs.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/IPO/ForceFunctionAttrs.cpp
@@ -26,13 +26,13 @@ static cl::list<std::string>
"example -force-attribute=foo:noinline. This "
"option can be specified multiple times."));
-static cl::list<std::string> ForceRemoveAttributes(
- "force-remove-attribute", cl::Hidden,
- cl::desc("Remove an attribute from a function. This should be a "
- "pair of 'function-name:attribute-name', for "
- "example -force-remove-attribute=foo:noinline. This "
- "option can be specified multiple times."));
-
+static cl::list<std::string> ForceRemoveAttributes(
+ "force-remove-attribute", cl::Hidden,
+ cl::desc("Remove an attribute from a function. This should be a "
+ "pair of 'function-name:attribute-name', for "
+ "example -force-remove-attribute=foo:noinline. This "
+ "option can be specified multiple times."));
+
static Attribute::AttrKind parseAttrKind(StringRef Kind) {
return StringSwitch<Attribute::AttrKind>(Kind)
.Case("alwaysinline", Attribute::AlwaysInline)
@@ -77,49 +77,49 @@ static Attribute::AttrKind parseAttrKind(StringRef Kind) {
}
/// If F has any forced attributes given on the command line, add them.
-/// If F has any forced remove attributes given on the command line, remove
-/// them. When both force and force-remove are given to a function, the latter
-/// takes precedence.
-static void forceAttributes(Function &F) {
- auto ParseFunctionAndAttr = [&](StringRef S) {
- auto Kind = Attribute::None;
+/// If F has any forced remove attributes given on the command line, remove
+/// them. When both force and force-remove are given to a function, the latter
+/// takes precedence.
+static void forceAttributes(Function &F) {
+ auto ParseFunctionAndAttr = [&](StringRef S) {
+ auto Kind = Attribute::None;
auto KV = StringRef(S).split(':');
if (KV.first != F.getName())
- return Kind;
- Kind = parseAttrKind(KV.second);
+ return Kind;
+ Kind = parseAttrKind(KV.second);
if (Kind == Attribute::None) {
LLVM_DEBUG(dbgs() << "ForcedAttribute: " << KV.second
<< " unknown or not handled!\n");
}
- return Kind;
- };
-
- for (auto &S : ForceAttributes) {
- auto Kind = ParseFunctionAndAttr(S);
- if (Kind == Attribute::None || F.hasFnAttribute(Kind))
+ return Kind;
+ };
+
+ for (auto &S : ForceAttributes) {
+ auto Kind = ParseFunctionAndAttr(S);
+ if (Kind == Attribute::None || F.hasFnAttribute(Kind))
continue;
F.addFnAttr(Kind);
}
-
- for (auto &S : ForceRemoveAttributes) {
- auto Kind = ParseFunctionAndAttr(S);
- if (Kind == Attribute::None || !F.hasFnAttribute(Kind))
- continue;
- F.removeFnAttr(Kind);
- }
-}
-
-static bool hasForceAttributes() {
- return !ForceAttributes.empty() || !ForceRemoveAttributes.empty();
+
+ for (auto &S : ForceRemoveAttributes) {
+ auto Kind = ParseFunctionAndAttr(S);
+ if (Kind == Attribute::None || !F.hasFnAttribute(Kind))
+ continue;
+ F.removeFnAttr(Kind);
+ }
}
+static bool hasForceAttributes() {
+ return !ForceAttributes.empty() || !ForceRemoveAttributes.empty();
+}
+
PreservedAnalyses ForceFunctionAttrsPass::run(Module &M,
ModuleAnalysisManager &) {
- if (!hasForceAttributes())
+ if (!hasForceAttributes())
return PreservedAnalyses::all();
for (Function &F : M.functions())
- forceAttributes(F);
+ forceAttributes(F);
// Just conservatively invalidate analyses, this isn't likely to be important.
return PreservedAnalyses::none();
@@ -134,11 +134,11 @@ struct ForceFunctionAttrsLegacyPass : public ModulePass {
}
bool runOnModule(Module &M) override {
- if (!hasForceAttributes())
+ if (!hasForceAttributes())
return false;
for (Function &F : M.functions())
- forceAttributes(F);
+ forceAttributes(F);
// Conservatively assume we changed something.
return true;
diff --git a/contrib/libs/llvm12/lib/Transforms/IPO/FunctionAttrs.cpp b/contrib/libs/llvm12/lib/Transforms/IPO/FunctionAttrs.cpp
index 6730824e86..c8f19378cb 100644
--- a/contrib/libs/llvm12/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -13,7 +13,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/IPO/FunctionAttrs.h"
-#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
@@ -22,7 +22,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
-#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/CGSCCPassManager.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
@@ -64,7 +64,7 @@
using namespace llvm;
-#define DEBUG_TYPE "function-attrs"
+#define DEBUG_TYPE "function-attrs"
STATISTIC(NumReadNone, "Number of functions marked readnone");
STATISTIC(NumReadOnly, "Number of functions marked readonly");
@@ -78,7 +78,7 @@ STATISTIC(NumNonNullReturn, "Number of function returns marked nonnull");
STATISTIC(NumNoRecurse, "Number of functions marked as norecurse");
STATISTIC(NumNoUnwind, "Number of functions marked as nounwind");
STATISTIC(NumNoFree, "Number of functions marked as nofree");
-STATISTIC(NumWillReturn, "Number of functions marked as willreturn");
+STATISTIC(NumWillReturn, "Number of functions marked as willreturn");
static cl::opt<bool> EnableNonnullArgPropagation(
"enable-nonnull-arg-prop", cl::init(true), cl::Hidden,
@@ -149,13 +149,13 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody,
if (isNoModRef(MRI))
continue;
- // A pseudo probe call shouldn't change any function attribute since it
- // doesn't translate to a real instruction. It comes with a memory access
- // tag to prevent itself being removed by optimizations and not block
- // other instructions being optimized.
- if (isa<PseudoProbeInst>(I))
- continue;
-
+ // A pseudo probe call shouldn't change any function attribute since it
+ // doesn't translate to a real instruction. It comes with a memory access
+ // tag to prevent itself being removed by optimizations and not block
+ // other instructions being optimized.
+ if (isa<PseudoProbeInst>(I))
+ continue;
+
if (!AliasAnalysis::onlyAccessesArgPointees(MRB)) {
// The call could access any memory. If that includes writes, note it.
if (isModSet(MRI))
@@ -175,7 +175,7 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody,
AAMDNodes AAInfo;
I->getAAMetadata(AAInfo);
- MemoryLocation Loc = MemoryLocation::getBeforeOrAfter(Arg, AAInfo);
+ MemoryLocation Loc = MemoryLocation::getBeforeOrAfter(Arg, AAInfo);
// Skip accesses to local or constant memory as they don't impact the
// externally visible mod/ref behavior.
@@ -290,18 +290,18 @@ static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter) {
MadeChange = true;
// Clear out any existing attributes.
- AttrBuilder AttrsToRemove;
- AttrsToRemove.addAttribute(Attribute::ReadOnly);
- AttrsToRemove.addAttribute(Attribute::ReadNone);
- AttrsToRemove.addAttribute(Attribute::WriteOnly);
+ AttrBuilder AttrsToRemove;
+ AttrsToRemove.addAttribute(Attribute::ReadOnly);
+ AttrsToRemove.addAttribute(Attribute::ReadNone);
+ AttrsToRemove.addAttribute(Attribute::WriteOnly);
if (!WritesMemory && !ReadsMemory) {
// Clear out any "access range attributes" if readnone was deduced.
- AttrsToRemove.addAttribute(Attribute::ArgMemOnly);
- AttrsToRemove.addAttribute(Attribute::InaccessibleMemOnly);
- AttrsToRemove.addAttribute(Attribute::InaccessibleMemOrArgMemOnly);
+ AttrsToRemove.addAttribute(Attribute::ArgMemOnly);
+ AttrsToRemove.addAttribute(Attribute::InaccessibleMemOnly);
+ AttrsToRemove.addAttribute(Attribute::InaccessibleMemOrArgMemOnly);
}
- F->removeAttributes(AttributeList::FunctionIndex, AttrsToRemove);
+ F->removeAttributes(AttributeList::FunctionIndex, AttrsToRemove);
// Add in the new attribute.
if (WritesMemory && !ReadsMemory)
@@ -650,7 +650,7 @@ static bool addArgumentAttrsFromCallsites(Function &F) {
if (auto *CB = dyn_cast<CallBase>(&I)) {
if (auto *CalledFunc = CB->getCalledFunction()) {
for (auto &CSArg : CalledFunc->args()) {
- if (!CSArg.hasNonNullAttr(/* AllowUndefOrPoison */ false))
+ if (!CSArg.hasNonNullAttr(/* AllowUndefOrPoison */ false))
continue;
// If the non-null callsite argument operand is an argument to 'F'
@@ -1227,11 +1227,11 @@ bool AttributeInferer::run(const SCCNodeSet &SCCNodes) {
return Changed;
}
-struct SCCNodesResult {
- SCCNodeSet SCCNodes;
- bool HasUnknownCall;
-};
-
+struct SCCNodesResult {
+ SCCNodeSet SCCNodes;
+ bool HasUnknownCall;
+};
+
} // end anonymous namespace
/// Helper for non-Convergent inference predicate InstrBreaksAttribute.
@@ -1253,7 +1253,7 @@ static bool InstrBreaksNonThrowing(Instruction &I, const SCCNodeSet &SCCNodes) {
// I is a may-throw call to a function inside our SCC. This doesn't
// invalidate our current working assumption that the SCC is no-throw; we
// just have to scan that other function.
- if (SCCNodes.contains(Callee))
+ if (SCCNodes.contains(Callee))
return false;
}
}
@@ -1273,16 +1273,16 @@ static bool InstrBreaksNoFree(Instruction &I, const SCCNodeSet &SCCNodes) {
if (Callee->doesNotFreeMemory())
return false;
- if (SCCNodes.contains(Callee))
+ if (SCCNodes.contains(Callee))
return false;
return true;
}
-/// Attempt to remove convergent function attribute when possible.
+/// Attempt to remove convergent function attribute when possible.
///
/// Returns true if any changes to function attributes were made.
-static bool inferConvergent(const SCCNodeSet &SCCNodes) {
+static bool inferConvergent(const SCCNodeSet &SCCNodes) {
AttributeInferer AI;
// Request to remove the convergent attribute from all functions in the SCC
@@ -1304,19 +1304,19 @@ static bool inferConvergent(const SCCNodeSet &SCCNodes) {
F.setNotConvergent();
},
/* RequiresExactDefinition= */ false});
- // Perform all the requested attribute inference actions.
- return AI.run(SCCNodes);
-}
-
-/// Infer attributes from all functions in the SCC by scanning every
-/// instruction for compliance to the attribute assumptions. Currently it
-/// does:
-/// - addition of NoUnwind attribute
-///
-/// Returns true if any changes to function attributes were made.
-static bool inferAttrsFromFunctionBodies(const SCCNodeSet &SCCNodes) {
- AttributeInferer AI;
-
+ // Perform all the requested attribute inference actions.
+ return AI.run(SCCNodes);
+}
+
+/// Infer attributes from all functions in the SCC by scanning every
+/// instruction for compliance to the attribute assumptions. Currently it
+/// does:
+/// - addition of NoUnwind attribute
+///
+/// Returns true if any changes to function attributes were made.
+static bool inferAttrsFromFunctionBodies(const SCCNodeSet &SCCNodes) {
+ AttributeInferer AI;
+
if (!DisableNoUnwindInference)
// Request to infer nounwind attribute for all the functions in the SCC if
// every callsite within the SCC is not throwing (except for calls to
@@ -1392,139 +1392,139 @@ static bool addNoRecurseAttrs(const SCCNodeSet &SCCNodes) {
// Every call was to a non-recursive function other than this function, and
// we have no indirect recursion as the SCC size is one. This function cannot
// recurse.
- F->setDoesNotRecurse();
- ++NumNoRecurse;
- return true;
-}
-
-static bool instructionDoesNotReturn(Instruction &I) {
- if (auto *CB = dyn_cast<CallBase>(&I)) {
- Function *Callee = CB->getCalledFunction();
- return Callee && Callee->doesNotReturn();
- }
- return false;
-}
-
-// A basic block can only return if it terminates with a ReturnInst and does not
-// contain calls to noreturn functions.
-static bool basicBlockCanReturn(BasicBlock &BB) {
- if (!isa<ReturnInst>(BB.getTerminator()))
- return false;
- return none_of(BB, instructionDoesNotReturn);
-}
-
-// Set the noreturn function attribute if possible.
-static bool addNoReturnAttrs(const SCCNodeSet &SCCNodes) {
- bool Changed = false;
-
- for (Function *F : SCCNodes) {
- if (!F || !F->hasExactDefinition() || F->hasFnAttribute(Attribute::Naked) ||
- F->doesNotReturn())
- continue;
-
- // The function can return if any basic blocks can return.
- // FIXME: this doesn't handle recursion or unreachable blocks.
- if (none_of(*F, basicBlockCanReturn)) {
- F->setDoesNotReturn();
- Changed = true;
- }
- }
-
- return Changed;
-}
-
-static bool functionWillReturn(const Function &F) {
- // Must-progress function without side-effects must return.
- if (F.mustProgress() && F.onlyReadsMemory())
- return true;
-
- // Can only analyze functions with a definition.
- if (F.isDeclaration())
- return false;
-
- // Functions with loops require more sophisticated analysis, as the loop
- // may be infinite. For now, don't try to handle them.
- SmallVector<std::pair<const BasicBlock *, const BasicBlock *>> Backedges;
- FindFunctionBackedges(F, Backedges);
- if (!Backedges.empty())
- return false;
-
- // If there are no loops, then the function is willreturn if all calls in
- // it are willreturn.
- return all_of(instructions(F), [](const Instruction &I) {
- return I.willReturn();
- });
-}
-
-// Set the willreturn function attribute if possible.
-static bool addWillReturn(const SCCNodeSet &SCCNodes) {
- bool Changed = false;
-
- for (Function *F : SCCNodes) {
- if (!F || F->willReturn() || !functionWillReturn(*F))
- continue;
-
- F->setWillReturn();
- NumWillReturn++;
- Changed = true;
- }
-
- return Changed;
-}
-
-static SCCNodesResult createSCCNodeSet(ArrayRef<Function *> Functions) {
- SCCNodesResult Res;
- Res.HasUnknownCall = false;
- for (Function *F : Functions) {
- if (!F || F->hasOptNone() || F->hasFnAttribute(Attribute::Naked)) {
- // Treat any function we're trying not to optimize as if it were an
- // indirect call and omit it from the node set used below.
- Res.HasUnknownCall = true;
- continue;
- }
- // Track whether any functions in this SCC have an unknown call edge.
- // Note: if this is ever a performance hit, we can common it with
- // subsequent routines which also do scans over the instructions of the
- // function.
- if (!Res.HasUnknownCall) {
- for (Instruction &I : instructions(*F)) {
- if (auto *CB = dyn_cast<CallBase>(&I)) {
- if (!CB->getCalledFunction()) {
- Res.HasUnknownCall = true;
- break;
- }
- }
- }
- }
- Res.SCCNodes.insert(F);
- }
- return Res;
+ F->setDoesNotRecurse();
+ ++NumNoRecurse;
+ return true;
}
+static bool instructionDoesNotReturn(Instruction &I) {
+ if (auto *CB = dyn_cast<CallBase>(&I)) {
+ Function *Callee = CB->getCalledFunction();
+ return Callee && Callee->doesNotReturn();
+ }
+ return false;
+}
+
+// A basic block can only return if it terminates with a ReturnInst and does not
+// contain calls to noreturn functions.
+static bool basicBlockCanReturn(BasicBlock &BB) {
+ if (!isa<ReturnInst>(BB.getTerminator()))
+ return false;
+ return none_of(BB, instructionDoesNotReturn);
+}
+
+// Set the noreturn function attribute if possible.
+static bool addNoReturnAttrs(const SCCNodeSet &SCCNodes) {
+ bool Changed = false;
+
+ for (Function *F : SCCNodes) {
+ if (!F || !F->hasExactDefinition() || F->hasFnAttribute(Attribute::Naked) ||
+ F->doesNotReturn())
+ continue;
+
+ // The function can return if any basic blocks can return.
+ // FIXME: this doesn't handle recursion or unreachable blocks.
+ if (none_of(*F, basicBlockCanReturn)) {
+ F->setDoesNotReturn();
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
+
+static bool functionWillReturn(const Function &F) {
+ // Must-progress function without side-effects must return.
+ if (F.mustProgress() && F.onlyReadsMemory())
+ return true;
+
+ // Can only analyze functions with a definition.
+ if (F.isDeclaration())
+ return false;
+
+ // Functions with loops require more sophisticated analysis, as the loop
+ // may be infinite. For now, don't try to handle them.
+ SmallVector<std::pair<const BasicBlock *, const BasicBlock *>> Backedges;
+ FindFunctionBackedges(F, Backedges);
+ if (!Backedges.empty())
+ return false;
+
+ // If there are no loops, then the function is willreturn if all calls in
+ // it are willreturn.
+ return all_of(instructions(F), [](const Instruction &I) {
+ return I.willReturn();
+ });
+}
+
+// Set the willreturn function attribute if possible.
+static bool addWillReturn(const SCCNodeSet &SCCNodes) {
+ bool Changed = false;
+
+ for (Function *F : SCCNodes) {
+ if (!F || F->willReturn() || !functionWillReturn(*F))
+ continue;
+
+ F->setWillReturn();
+ NumWillReturn++;
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+static SCCNodesResult createSCCNodeSet(ArrayRef<Function *> Functions) {
+ SCCNodesResult Res;
+ Res.HasUnknownCall = false;
+ for (Function *F : Functions) {
+ if (!F || F->hasOptNone() || F->hasFnAttribute(Attribute::Naked)) {
+ // Treat any function we're trying not to optimize as if it were an
+ // indirect call and omit it from the node set used below.
+ Res.HasUnknownCall = true;
+ continue;
+ }
+ // Track whether any functions in this SCC have an unknown call edge.
+ // Note: if this is ever a performance hit, we can common it with
+ // subsequent routines which also do scans over the instructions of the
+ // function.
+ if (!Res.HasUnknownCall) {
+ for (Instruction &I : instructions(*F)) {
+ if (auto *CB = dyn_cast<CallBase>(&I)) {
+ if (!CB->getCalledFunction()) {
+ Res.HasUnknownCall = true;
+ break;
+ }
+ }
+ }
+ }
+ Res.SCCNodes.insert(F);
+ }
+ return Res;
+}
+
template <typename AARGetterT>
-static bool deriveAttrsInPostOrder(ArrayRef<Function *> Functions,
- AARGetterT &&AARGetter) {
- SCCNodesResult Nodes = createSCCNodeSet(Functions);
+static bool deriveAttrsInPostOrder(ArrayRef<Function *> Functions,
+ AARGetterT &&AARGetter) {
+ SCCNodesResult Nodes = createSCCNodeSet(Functions);
bool Changed = false;
// Bail if the SCC only contains optnone functions.
- if (Nodes.SCCNodes.empty())
+ if (Nodes.SCCNodes.empty())
return Changed;
- Changed |= addArgumentReturnedAttrs(Nodes.SCCNodes);
- Changed |= addReadAttrs(Nodes.SCCNodes, AARGetter);
- Changed |= addArgumentAttrs(Nodes.SCCNodes);
- Changed |= inferConvergent(Nodes.SCCNodes);
- Changed |= addNoReturnAttrs(Nodes.SCCNodes);
- Changed |= addWillReturn(Nodes.SCCNodes);
+ Changed |= addArgumentReturnedAttrs(Nodes.SCCNodes);
+ Changed |= addReadAttrs(Nodes.SCCNodes, AARGetter);
+ Changed |= addArgumentAttrs(Nodes.SCCNodes);
+ Changed |= inferConvergent(Nodes.SCCNodes);
+ Changed |= addNoReturnAttrs(Nodes.SCCNodes);
+ Changed |= addWillReturn(Nodes.SCCNodes);
// If we have no external nodes participating in the SCC, we can deduce some
// more precise attributes as well.
- if (!Nodes.HasUnknownCall) {
- Changed |= addNoAliasAttrs(Nodes.SCCNodes);
- Changed |= addNonNullAttrs(Nodes.SCCNodes);
- Changed |= inferAttrsFromFunctionBodies(Nodes.SCCNodes);
- Changed |= addNoRecurseAttrs(Nodes.SCCNodes);
+ if (!Nodes.HasUnknownCall) {
+ Changed |= addNoAliasAttrs(Nodes.SCCNodes);
+ Changed |= addNonNullAttrs(Nodes.SCCNodes);
+ Changed |= inferAttrsFromFunctionBodies(Nodes.SCCNodes);
+ Changed |= addNoRecurseAttrs(Nodes.SCCNodes);
}
return Changed;
@@ -1543,12 +1543,12 @@ PreservedAnalyses PostOrderFunctionAttrsPass::run(LazyCallGraph::SCC &C,
return FAM.getResult<AAManager>(F);
};
- SmallVector<Function *, 8> Functions;
+ SmallVector<Function *, 8> Functions;
for (LazyCallGraph::Node &N : C) {
- Functions.push_back(&N.getFunction());
+ Functions.push_back(&N.getFunction());
}
- if (deriveAttrsInPostOrder(Functions, AARGetter))
+ if (deriveAttrsInPostOrder(Functions, AARGetter))
return PreservedAnalyses::none();
return PreservedAnalyses::all();
@@ -1578,11 +1578,11 @@ struct PostOrderFunctionAttrsLegacyPass : public CallGraphSCCPass {
} // end anonymous namespace
char PostOrderFunctionAttrsLegacyPass::ID = 0;
-INITIALIZE_PASS_BEGIN(PostOrderFunctionAttrsLegacyPass, "function-attrs",
+INITIALIZE_PASS_BEGIN(PostOrderFunctionAttrsLegacyPass, "function-attrs",
"Deduce function attributes", false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
-INITIALIZE_PASS_END(PostOrderFunctionAttrsLegacyPass, "function-attrs",
+INITIALIZE_PASS_END(PostOrderFunctionAttrsLegacyPass, "function-attrs",
"Deduce function attributes", false, false)
Pass *llvm::createPostOrderFunctionAttrsLegacyPass() {
@@ -1591,12 +1591,12 @@ Pass *llvm::createPostOrderFunctionAttrsLegacyPass() {
template <typename AARGetterT>
static bool runImpl(CallGraphSCC &SCC, AARGetterT AARGetter) {
- SmallVector<Function *, 8> Functions;
+ SmallVector<Function *, 8> Functions;
for (CallGraphNode *I : SCC) {
- Functions.push_back(I->getFunction());
+ Functions.push_back(I->getFunction());
}
- return deriveAttrsInPostOrder(Functions, AARGetter);
+ return deriveAttrsInPostOrder(Functions, AARGetter);
}
bool PostOrderFunctionAttrsLegacyPass::runOnSCC(CallGraphSCC &SCC) {
@@ -1629,13 +1629,13 @@ struct ReversePostOrderFunctionAttrsLegacyPass : public ModulePass {
char ReversePostOrderFunctionAttrsLegacyPass::ID = 0;
-INITIALIZE_PASS_BEGIN(ReversePostOrderFunctionAttrsLegacyPass,
- "rpo-function-attrs", "Deduce function attributes in RPO",
- false, false)
+INITIALIZE_PASS_BEGIN(ReversePostOrderFunctionAttrsLegacyPass,
+ "rpo-function-attrs", "Deduce function attributes in RPO",
+ false, false)
INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
-INITIALIZE_PASS_END(ReversePostOrderFunctionAttrsLegacyPass,
- "rpo-function-attrs", "Deduce function attributes in RPO",
- false, false)
+INITIALIZE_PASS_END(ReversePostOrderFunctionAttrsLegacyPass,
+ "rpo-function-attrs", "Deduce function attributes in RPO",
+ false, false)
Pass *llvm::createReversePostOrderFunctionAttrsPass() {
return new ReversePostOrderFunctionAttrsLegacyPass();
@@ -1667,9 +1667,9 @@ static bool addNoRecurseAttrsTopDown(Function &F) {
if (!CB || !CB->getParent()->getParent()->doesNotRecurse())
return false;
}
- F.setDoesNotRecurse();
- ++NumNoRecurse;
- return true;
+ F.setDoesNotRecurse();
+ ++NumNoRecurse;
+ return true;
}
static bool deduceFunctionAttributeInRPO(Module &M, CallGraph &CG) {
diff --git a/contrib/libs/llvm12/lib/Transforms/IPO/FunctionImport.cpp b/contrib/libs/llvm12/lib/Transforms/IPO/FunctionImport.cpp
index 18343030bc..4c5a295f5b 100644
--- a/contrib/libs/llvm12/lib/Transforms/IPO/FunctionImport.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/IPO/FunctionImport.cpp
@@ -124,8 +124,8 @@ static cl::opt<bool> ComputeDead("compute-dead", cl::init(true), cl::Hidden,
cl::desc("Compute dead symbols"));
static cl::opt<bool> EnableImportMetadata(
- "enable-import-metadata", cl::init(false), cl::Hidden,
- cl::desc("Enable import metadata like 'thinlto_src_module'"));
+ "enable-import-metadata", cl::init(false), cl::Hidden,
+ cl::desc("Enable import metadata like 'thinlto_src_module'"));
/// Summary file to use for function importing when using -function-import from
/// the command line.
@@ -255,8 +255,8 @@ selectCallee(const ModuleSummaryIndex &Index,
namespace {
-using EdgeInfo =
- std::tuple<const GlobalValueSummary *, unsigned /* Threshold */>;
+using EdgeInfo =
+ std::tuple<const GlobalValueSummary *, unsigned /* Threshold */>;
} // anonymous namespace
@@ -276,9 +276,9 @@ updateValueInfoForIndirectCalls(const ModuleSummaryIndex &Index, ValueInfo VI) {
}
static void computeImportForReferencedGlobals(
- const GlobalValueSummary &Summary, const ModuleSummaryIndex &Index,
+ const GlobalValueSummary &Summary, const ModuleSummaryIndex &Index,
const GVSummaryMapTy &DefinedGVSummaries,
- SmallVectorImpl<EdgeInfo> &Worklist,
+ SmallVectorImpl<EdgeInfo> &Worklist,
FunctionImporter::ImportMapTy &ImportList,
StringMap<FunctionImporter::ExportSetTy> *ExportLists) {
for (auto &VI : Summary.refs()) {
@@ -316,11 +316,11 @@ static void computeImportForReferencedGlobals(
// which is more efficient than adding them here.
if (ExportLists)
(*ExportLists)[RefSummary->modulePath()].insert(VI);
-
- // If variable is not writeonly we attempt to recursively analyze
- // its references in order to import referenced constants.
- if (!Index.isWriteOnly(cast<GlobalVarSummary>(RefSummary.get())))
- Worklist.emplace_back(RefSummary.get(), 0);
+
+ // If variable is not writeonly we attempt to recursively analyze
+ // its references in order to import referenced constants.
+ if (!Index.isWriteOnly(cast<GlobalVarSummary>(RefSummary.get())))
+ Worklist.emplace_back(RefSummary.get(), 0);
break;
}
}
@@ -360,7 +360,7 @@ static void computeImportForFunction(
StringMap<FunctionImporter::ExportSetTy> *ExportLists,
FunctionImporter::ImportThresholdsTy &ImportThresholds) {
computeImportForReferencedGlobals(Summary, Index, DefinedGVSummaries,
- Worklist, ImportList, ExportLists);
+ Worklist, ImportList, ExportLists);
static int ImportCount = 0;
for (auto &Edge : Summary.calls()) {
ValueInfo VI = Edge.first;
@@ -508,7 +508,7 @@ static void computeImportForFunction(
ImportCount++;
// Insert the newly imported function to the worklist.
- Worklist.emplace_back(ResolvedCalleeSummary, AdjThreshold);
+ Worklist.emplace_back(ResolvedCalleeSummary, AdjThreshold);
}
}
@@ -549,17 +549,17 @@ static void ComputeImportForModule(
// Process the newly imported functions and add callees to the worklist.
while (!Worklist.empty()) {
- auto GVInfo = Worklist.pop_back_val();
- auto *Summary = std::get<0>(GVInfo);
- auto Threshold = std::get<1>(GVInfo);
-
- if (auto *FS = dyn_cast<FunctionSummary>(Summary))
- computeImportForFunction(*FS, Index, Threshold, DefinedGVSummaries,
- Worklist, ImportList, ExportLists,
- ImportThresholds);
- else
- computeImportForReferencedGlobals(*Summary, Index, DefinedGVSummaries,
- Worklist, ImportList, ExportLists);
+ auto GVInfo = Worklist.pop_back_val();
+ auto *Summary = std::get<0>(GVInfo);
+ auto Threshold = std::get<1>(GVInfo);
+
+ if (auto *FS = dyn_cast<FunctionSummary>(Summary))
+ computeImportForFunction(*FS, Index, Threshold, DefinedGVSummaries,
+ Worklist, ImportList, ExportLists,
+ ImportThresholds);
+ else
+ computeImportForReferencedGlobals(*Summary, Index, DefinedGVSummaries,
+ Worklist, ImportList, ExportLists);
}
// Print stats about functions considered but rejected for importing
@@ -888,7 +888,7 @@ void llvm::computeDeadSymbols(
while (!Worklist.empty()) {
auto VI = Worklist.pop_back_val();
for (auto &Summary : VI.getSummaryList()) {
- Summary->setLive(true);
+ Summary->setLive(true);
if (auto *AS = dyn_cast<AliasSummary>(Summary.get())) {
// If this is an alias, visit the aliasee VI to ensure that all copies
// are marked live and it is added to the worklist for further
@@ -1314,7 +1314,7 @@ static bool doImportingForModule(Module &M) {
// Next we need to promote to global scope and rename any local values that
// are potentially exported to other modules.
- if (renameModuleForThinLTO(M, *Index, /*ClearDSOLocalOnDeclarations=*/false,
+ if (renameModuleForThinLTO(M, *Index, /*ClearDSOLocalOnDeclarations=*/false,
/*GlobalsToImport=*/nullptr)) {
errs() << "Error renaming module\n";
return false;
diff --git a/contrib/libs/llvm12/lib/Transforms/IPO/GlobalOpt.cpp b/contrib/libs/llvm12/lib/Transforms/IPO/GlobalOpt.cpp
index 223a05e8ea..b06fc36b72 100644
--- a/contrib/libs/llvm12/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/IPO/GlobalOpt.cpp
@@ -268,7 +268,7 @@ CleanupPointerRootUsers(GlobalVariable *GV,
I = J;
} while (true);
I->eraseFromParent();
- Changed = true;
+ Changed = true;
}
}
@@ -286,7 +286,7 @@ static bool CleanupConstantGlobalUsers(
// we delete a constant array, we may also be holding pointer to one of its
// elements (or an element of one of its elements if we're dealing with an
// array of arrays) in the worklist.
- SmallVector<WeakTrackingVH, 8> WorkList(V->users());
+ SmallVector<WeakTrackingVH, 8> WorkList(V->users());
while (!WorkList.empty()) {
Value *UV = WorkList.pop_back_val();
if (!UV)
@@ -1880,8 +1880,8 @@ static bool isPointerValueDeadOnEntryToFunction(
// and the number of bits loaded in L is less than or equal to
// the number of bits stored in S.
return DT.dominates(S, L) &&
- DL.getTypeStoreSize(LTy).getFixedSize() <=
- DL.getTypeStoreSize(STy).getFixedSize();
+ DL.getTypeStoreSize(LTy).getFixedSize() <=
+ DL.getTypeStoreSize(STy).getFixedSize();
}))
return false;
}
@@ -1933,7 +1933,7 @@ static void makeAllConstantUsesInstructions(Constant *C) {
SmallVector<Value*,4> UUsers;
for (auto *U : Users) {
UUsers.clear();
- append_range(UUsers, U->users());
+ append_range(UUsers, U->users());
for (auto *UU : UUsers) {
Instruction *UI = cast<Instruction>(UU);
Instruction *NewU = U->getAsInstruction();
@@ -1990,8 +1990,8 @@ processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS,
return true;
}
- bool Changed = false;
-
+ bool Changed = false;
+
// If the global is never loaded (but may be stored to), it is dead.
// Delete it now.
if (!GS.IsLoaded) {
@@ -2022,14 +2022,14 @@ processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS,
// Don't actually mark a global constant if it's atomic because atomic loads
// are implemented by a trivial cmpxchg in some edge-cases and that usually
// requires write access to the variable even if it's not actually changed.
- if (GS.Ordering == AtomicOrdering::NotAtomic) {
- assert(!GV->isConstant() && "Expected a non-constant global");
+ if (GS.Ordering == AtomicOrdering::NotAtomic) {
+ assert(!GV->isConstant() && "Expected a non-constant global");
GV->setConstant(true);
- Changed = true;
- }
+ Changed = true;
+ }
// Clean up any obviously simplifiable users now.
- Changed |= CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, GetTLI);
+ Changed |= CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, GetTLI);
// If the global is dead now, just nuke it.
if (GV->use_empty()) {
@@ -2089,7 +2089,7 @@ processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS,
}
}
- return Changed;
+ return Changed;
}
/// Analyze the specified global variable and optimize it if possible. If we
@@ -2224,7 +2224,7 @@ isValidCandidateForColdCC(Function &F,
BlockFrequencyInfo &CallerBFI = GetBFI(*CallerFunc);
if (!isColdCallSite(CB, CallerBFI))
return false;
- if (!llvm::is_contained(AllCallsCold, CallerFunc))
+ if (!llvm::is_contained(AllCallsCold, CallerFunc))
return false;
}
return true;
diff --git a/contrib/libs/llvm12/lib/Transforms/IPO/HotColdSplitting.cpp b/contrib/libs/llvm12/lib/Transforms/IPO/HotColdSplitting.cpp
index aa708ee520..0f91173aab 100644
--- a/contrib/libs/llvm12/lib/Transforms/IPO/HotColdSplitting.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/IPO/HotColdSplitting.cpp
@@ -67,9 +67,9 @@
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <algorithm>
-#include <limits>
+#include <limits>
#include <cassert>
-#include <string>
+#include <string>
#define DEBUG_TYPE "hotcoldsplit"
@@ -78,29 +78,29 @@ STATISTIC(NumColdRegionsOutlined, "Number of cold regions outlined.");
using namespace llvm;
-static cl::opt<bool> EnableStaticAnalysis("hot-cold-static-analysis",
- cl::init(true), cl::Hidden);
+static cl::opt<bool> EnableStaticAnalysis("hot-cold-static-analysis",
+ cl::init(true), cl::Hidden);
static cl::opt<int>
SplittingThreshold("hotcoldsplit-threshold", cl::init(2), cl::Hidden,
cl::desc("Base penalty for splitting cold code (as a "
"multiple of TCC_Basic)"));
-static cl::opt<bool> EnableColdSection(
- "enable-cold-section", cl::init(false), cl::Hidden,
- cl::desc("Enable placement of extracted cold functions"
- " into a separate section after hot-cold splitting."));
-
-static cl::opt<std::string>
- ColdSectionName("hotcoldsplit-cold-section-name", cl::init("__llvm_cold"),
- cl::Hidden,
- cl::desc("Name for the section containing cold functions "
- "extracted by hot-cold splitting."));
-
-static cl::opt<int> MaxParametersForSplit(
- "hotcoldsplit-max-params", cl::init(4), cl::Hidden,
- cl::desc("Maximum number of parameters for a split function"));
-
+static cl::opt<bool> EnableColdSection(
+ "enable-cold-section", cl::init(false), cl::Hidden,
+ cl::desc("Enable placement of extracted cold functions"
+ " into a separate section after hot-cold splitting."));
+
+static cl::opt<std::string>
+ ColdSectionName("hotcoldsplit-cold-section-name", cl::init("__llvm_cold"),
+ cl::Hidden,
+ cl::desc("Name for the section containing cold functions "
+ "extracted by hot-cold splitting."));
+
+static cl::opt<int> MaxParametersForSplit(
+ "hotcoldsplit-max-params", cl::init(4), cl::Hidden,
+ cl::desc("Maximum number of parameters for a split function"));
+
namespace {
// Same as blockEndsInUnreachable in CodeGen/BranchFolding.cpp. Do not modify
// this function unless you modify the MBB version as well.
@@ -237,11 +237,11 @@ bool HotColdSplitting::shouldOutlineFrom(const Function &F) const {
}
/// Get the benefit score of outlining \p Region.
-static InstructionCost getOutliningBenefit(ArrayRef<BasicBlock *> Region,
- TargetTransformInfo &TTI) {
+static InstructionCost getOutliningBenefit(ArrayRef<BasicBlock *> Region,
+ TargetTransformInfo &TTI) {
// Sum up the code size costs of non-terminator instructions. Tight coupling
// with \ref getOutliningPenalty is needed to model the costs of terminators.
- InstructionCost Benefit = 0;
+ InstructionCost Benefit = 0;
for (BasicBlock *BB : Region)
for (Instruction &I : BB->instructionsWithoutDebug())
if (&I != BB->getTerminator())
@@ -275,55 +275,55 @@ static int getOutliningPenalty(ArrayRef<BasicBlock *> Region,
}
for (BasicBlock *SuccBB : successors(BB)) {
- if (!is_contained(Region, SuccBB)) {
+ if (!is_contained(Region, SuccBB)) {
NoBlocksReturn = false;
SuccsOutsideRegion.insert(SuccBB);
}
}
}
- // Count the number of phis in exit blocks with >= 2 incoming values from the
- // outlining region. These phis are split (\ref severSplitPHINodesOfExits),
- // and new outputs are created to supply the split phis. CodeExtractor can't
- // report these new outputs until extraction begins, but it's important to
- // factor the cost of the outputs into the cost calculation.
- unsigned NumSplitExitPhis = 0;
- for (BasicBlock *ExitBB : SuccsOutsideRegion) {
- for (PHINode &PN : ExitBB->phis()) {
- // Find all incoming values from the outlining region.
- int NumIncomingVals = 0;
- for (unsigned i = 0; i < PN.getNumIncomingValues(); ++i)
- if (find(Region, PN.getIncomingBlock(i)) != Region.end()) {
- ++NumIncomingVals;
- if (NumIncomingVals > 1) {
- ++NumSplitExitPhis;
- break;
- }
- }
- }
- }
-
- // Apply a penalty for calling the split function. Factor in the cost of
- // materializing all of the parameters.
- int NumOutputsAndSplitPhis = NumOutputs + NumSplitExitPhis;
- int NumParams = NumInputs + NumOutputsAndSplitPhis;
- if (NumParams > MaxParametersForSplit) {
- LLVM_DEBUG(dbgs() << NumInputs << " inputs and " << NumOutputsAndSplitPhis
- << " outputs exceeds parameter limit ("
- << MaxParametersForSplit << ")\n");
- return std::numeric_limits<int>::max();
- }
- const int CostForArgMaterialization = 2 * TargetTransformInfo::TCC_Basic;
- LLVM_DEBUG(dbgs() << "Applying penalty for: " << NumParams << " params\n");
- Penalty += CostForArgMaterialization * NumParams;
-
- // Apply the typical code size cost for an output alloca and its associated
- // reload in the caller. Also penalize the associated store in the callee.
- LLVM_DEBUG(dbgs() << "Applying penalty for: " << NumOutputsAndSplitPhis
- << " outputs/split phis\n");
- const int CostForRegionOutput = 3 * TargetTransformInfo::TCC_Basic;
- Penalty += CostForRegionOutput * NumOutputsAndSplitPhis;
-
+ // Count the number of phis in exit blocks with >= 2 incoming values from the
+ // outlining region. These phis are split (\ref severSplitPHINodesOfExits),
+ // and new outputs are created to supply the split phis. CodeExtractor can't
+ // report these new outputs until extraction begins, but it's important to
+ // factor the cost of the outputs into the cost calculation.
+ unsigned NumSplitExitPhis = 0;
+ for (BasicBlock *ExitBB : SuccsOutsideRegion) {
+ for (PHINode &PN : ExitBB->phis()) {
+ // Find all incoming values from the outlining region.
+ int NumIncomingVals = 0;
+ for (unsigned i = 0; i < PN.getNumIncomingValues(); ++i)
+ if (find(Region, PN.getIncomingBlock(i)) != Region.end()) {
+ ++NumIncomingVals;
+ if (NumIncomingVals > 1) {
+ ++NumSplitExitPhis;
+ break;
+ }
+ }
+ }
+ }
+
+ // Apply a penalty for calling the split function. Factor in the cost of
+ // materializing all of the parameters.
+ int NumOutputsAndSplitPhis = NumOutputs + NumSplitExitPhis;
+ int NumParams = NumInputs + NumOutputsAndSplitPhis;
+ if (NumParams > MaxParametersForSplit) {
+ LLVM_DEBUG(dbgs() << NumInputs << " inputs and " << NumOutputsAndSplitPhis
+ << " outputs exceeds parameter limit ("
+ << MaxParametersForSplit << ")\n");
+ return std::numeric_limits<int>::max();
+ }
+ const int CostForArgMaterialization = 2 * TargetTransformInfo::TCC_Basic;
+ LLVM_DEBUG(dbgs() << "Applying penalty for: " << NumParams << " params\n");
+ Penalty += CostForArgMaterialization * NumParams;
+
+ // Apply the typical code size cost for an output alloca and its associated
+ // reload in the caller. Also penalize the associated store in the callee.
+ LLVM_DEBUG(dbgs() << "Applying penalty for: " << NumOutputsAndSplitPhis
+ << " outputs/split phis\n");
+ const int CostForRegionOutput = 3 * TargetTransformInfo::TCC_Basic;
+ Penalty += CostForRegionOutput * NumOutputsAndSplitPhis;
+
// Apply a `noreturn` bonus.
if (NoBlocksReturn) {
LLVM_DEBUG(dbgs() << "Applying bonus for: " << Region.size()
@@ -333,7 +333,7 @@ static int getOutliningPenalty(ArrayRef<BasicBlock *> Region,
// Apply a penalty for having more than one successor outside of the region.
// This penalty accounts for the switch needed in the caller.
- if (SuccsOutsideRegion.size() > 1) {
+ if (SuccsOutsideRegion.size() > 1) {
LLVM_DEBUG(dbgs() << "Applying penalty for: " << SuccsOutsideRegion.size()
<< " non-region successors\n");
Penalty += (SuccsOutsideRegion.size() - 1) * TargetTransformInfo::TCC_Basic;
@@ -358,12 +358,12 @@ Function *HotColdSplitting::extractColdRegion(
// splitting.
SetVector<Value *> Inputs, Outputs, Sinks;
CE.findInputsOutputs(Inputs, Outputs, Sinks);
- InstructionCost OutliningBenefit = getOutliningBenefit(Region, TTI);
+ InstructionCost OutliningBenefit = getOutliningBenefit(Region, TTI);
int OutliningPenalty =
getOutliningPenalty(Region, Inputs.size(), Outputs.size());
LLVM_DEBUG(dbgs() << "Split profitability: benefit = " << OutliningBenefit
<< ", penalty = " << OutliningPenalty << "\n");
- if (!OutliningBenefit.isValid() || OutliningBenefit <= OutliningPenalty)
+ if (!OutliningBenefit.isValid() || OutliningBenefit <= OutliningPenalty)
return nullptr;
Function *OrigF = Region[0]->getParent();
@@ -377,12 +377,12 @@ Function *HotColdSplitting::extractColdRegion(
}
CI->setIsNoInline();
- if (EnableColdSection)
- OutF->setSection(ColdSectionName);
- else {
- if (OrigF->hasSection())
- OutF->setSection(OrigF->getSection());
- }
+ if (EnableColdSection)
+ OutF->setSection(ColdSectionName);
+ else {
+ if (OrigF->hasSection())
+ OutF->setSection(OrigF->getSection());
+ }
markFunctionCold(*OutF, BFI != nullptr);
@@ -625,7 +625,7 @@ bool HotColdSplitting::outlineColdRegions(Function &F, bool HasProfileSummary) {
continue;
bool Cold = (BFI && PSI->isColdBlock(BB, BFI)) ||
- (EnableStaticAnalysis && unlikelyExecuted(*BB));
+ (EnableStaticAnalysis && unlikelyExecuted(*BB));
if (!Cold)
continue;
diff --git a/contrib/libs/llvm12/lib/Transforms/IPO/IPO.cpp b/contrib/libs/llvm12/lib/Transforms/IPO/IPO.cpp
index f4c12dd7f4..30a47e3fce 100644
--- a/contrib/libs/llvm12/lib/Transforms/IPO/IPO.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/IPO/IPO.cpp
@@ -25,7 +25,7 @@ using namespace llvm;
void llvm::initializeIPO(PassRegistry &Registry) {
initializeOpenMPOptLegacyPassPass(Registry);
initializeArgPromotionPass(Registry);
- initializeAnnotation2MetadataLegacyPass(Registry);
+ initializeAnnotation2MetadataLegacyPass(Registry);
initializeCalledValuePropagationLegacyPassPass(Registry);
initializeConstantMergeLegacyPassPass(Registry);
initializeCrossDSOCFIPass(Registry);
@@ -36,13 +36,13 @@ void llvm::initializeIPO(PassRegistry &Registry) {
initializeGlobalOptLegacyPassPass(Registry);
initializeGlobalSplitPass(Registry);
initializeHotColdSplittingLegacyPassPass(Registry);
- initializeIROutlinerLegacyPassPass(Registry);
+ initializeIROutlinerLegacyPassPass(Registry);
initializeAlwaysInlinerLegacyPassPass(Registry);
initializeSimpleInlinerPass(Registry);
initializeInferFunctionAttrsLegacyPassPass(Registry);
initializeInternalizeLegacyPassPass(Registry);
- initializeLoopExtractorLegacyPassPass(Registry);
- initializeBlockExtractorLegacyPassPass(Registry);
+ initializeLoopExtractorLegacyPassPass(Registry);
+ initializeBlockExtractorLegacyPassPass(Registry);
initializeSingleLoopExtractorPass(Registry);
initializeLowerTypeTestsPass(Registry);
initializeMergeFunctionsLegacyPassPass(Registry);
diff --git a/contrib/libs/llvm12/lib/Transforms/IPO/IROutliner.cpp b/contrib/libs/llvm12/lib/Transforms/IPO/IROutliner.cpp
index 4b6a4f3d8f..20ab22d119 100644
--- a/contrib/libs/llvm12/lib/Transforms/IPO/IROutliner.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/IPO/IROutliner.cpp
@@ -1,1764 +1,1764 @@
-//===- IROutliner.cpp -- Outline Similar Regions ----------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-// Implementation for the IROutliner which is used by the IROutliner Pass.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/IPO/IROutliner.h"
-#include "llvm/Analysis/IRSimilarityIdentifier.h"
-#include "llvm/Analysis/OptimizationRemarkEmitter.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/IR/Attributes.h"
-#include "llvm/IR/PassManager.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Transforms/IPO.h"
-#include <map>
-#include <set>
-#include <vector>
-
-#define DEBUG_TYPE "iroutliner"
-
-using namespace llvm;
-using namespace IRSimilarity;
-
-// Set to true if the user wants the ir outliner to run on linkonceodr linkage
-// functions. This is false by default because the linker can dedupe linkonceodr
-// functions. Since the outliner is confined to a single module (modulo LTO),
-// this is off by default. It should, however, be the default behavior in
-// LTO.
-static cl::opt<bool> EnableLinkOnceODRIROutlining(
- "enable-linkonceodr-ir-outlining", cl::Hidden,
- cl::desc("Enable the IR outliner on linkonceodr functions"),
- cl::init(false));
-
-// This is a debug option to test small pieces of code to ensure that outlining
-// works correctly.
-static cl::opt<bool> NoCostModel(
- "ir-outlining-no-cost", cl::init(false), cl::ReallyHidden,
- cl::desc("Debug option to outline greedily, without restriction that "
- "calculated benefit outweighs cost"));
-
-/// The OutlinableGroup holds all the overarching information for outlining
-/// a set of regions that are structurally similar to one another, such as the
-/// types of the overall function, the output blocks, the sets of stores needed
-/// and a list of the different regions. This information is used in the
-/// deduplication of extracted regions with the same structure.
-struct OutlinableGroup {
- /// The sections that could be outlined
- std::vector<OutlinableRegion *> Regions;
-
- /// The argument types for the function created as the overall function to
- /// replace the extracted function for each region.
- std::vector<Type *> ArgumentTypes;
- /// The FunctionType for the overall function.
- FunctionType *OutlinedFunctionType = nullptr;
- /// The Function for the collective overall function.
- Function *OutlinedFunction = nullptr;
-
- /// Flag for whether we should not consider this group of OutlinableRegions
- /// for extraction.
- bool IgnoreGroup = false;
-
- /// The return block for the overall function.
- BasicBlock *EndBB = nullptr;
-
- /// A set containing the different GVN store sets needed. Each array contains
- /// a sorted list of the different values that need to be stored into output
- /// registers.
- DenseSet<ArrayRef<unsigned>> OutputGVNCombinations;
-
- /// Flag for whether the \ref ArgumentTypes have been defined after the
- /// extraction of the first region.
- bool InputTypesSet = false;
-
- /// The number of input values in \ref ArgumentTypes. Anything after this
- /// index in ArgumentTypes is an output argument.
- unsigned NumAggregateInputs = 0;
-
- /// The number of instructions that will be outlined by extracting \ref
- /// Regions.
- InstructionCost Benefit = 0;
- /// The number of added instructions needed for the outlining of the \ref
- /// Regions.
- InstructionCost Cost = 0;
-
- /// The argument that needs to be marked with the swifterr attribute. If not
- /// needed, there is no value.
- Optional<unsigned> SwiftErrorArgument;
-
- /// For the \ref Regions, we look at every Value. If it is a constant,
- /// we check whether it is the same in Region.
- ///
- /// \param [in,out] NotSame contains the global value numbers where the
- /// constant is not always the same, and must be passed in as an argument.
- void findSameConstants(DenseSet<unsigned> &NotSame);
-
- /// For the regions, look at each set of GVN stores needed and account for
- /// each combination. Add an argument to the argument types if there is
- /// more than one combination.
- ///
- /// \param [in] M - The module we are outlining from.
- void collectGVNStoreSets(Module &M);
-};
-
-/// Move the contents of \p SourceBB to before the last instruction of \p
-/// TargetBB.
-/// \param SourceBB - the BasicBlock to pull Instructions from.
-/// \param TargetBB - the BasicBlock to put Instruction into.
-static void moveBBContents(BasicBlock &SourceBB, BasicBlock &TargetBB) {
- BasicBlock::iterator BBCurr, BBEnd, BBNext;
- for (BBCurr = SourceBB.begin(), BBEnd = SourceBB.end(); BBCurr != BBEnd;
- BBCurr = BBNext) {
- BBNext = std::next(BBCurr);
- BBCurr->moveBefore(TargetBB, TargetBB.end());
- }
-}
-
-void OutlinableRegion::splitCandidate() {
- assert(!CandidateSplit && "Candidate already split!");
-
- Instruction *StartInst = (*Candidate->begin()).Inst;
- Instruction *EndInst = (*Candidate->end()).Inst;
- assert(StartInst && EndInst && "Expected a start and end instruction?");
- StartBB = StartInst->getParent();
- PrevBB = StartBB;
-
- // The basic block gets split like so:
- // block: block:
- // inst1 inst1
- // inst2 inst2
- // region1 br block_to_outline
- // region2 block_to_outline:
- // region3 -> region1
- // region4 region2
- // inst3 region3
- // inst4 region4
- // br block_after_outline
- // block_after_outline:
- // inst3
- // inst4
-
- std::string OriginalName = PrevBB->getName().str();
-
- StartBB = PrevBB->splitBasicBlock(StartInst, OriginalName + "_to_outline");
-
- // This is the case for the inner block since we do not have to include
- // multiple blocks.
- EndBB = StartBB;
- FollowBB = EndBB->splitBasicBlock(EndInst, OriginalName + "_after_outline");
-
- CandidateSplit = true;
-}
-
-void OutlinableRegion::reattachCandidate() {
- assert(CandidateSplit && "Candidate is not split!");
-
- // The basic block gets reattached like so:
- // block: block:
- // inst1 inst1
- // inst2 inst2
- // br block_to_outline region1
- // block_to_outline: -> region2
- // region1 region3
- // region2 region4
- // region3 inst3
- // region4 inst4
- // br block_after_outline
- // block_after_outline:
- // inst3
- // inst4
- assert(StartBB != nullptr && "StartBB for Candidate is not defined!");
- assert(FollowBB != nullptr && "StartBB for Candidate is not defined!");
-
- // StartBB should only have one predecessor since we put an unconditional
- // branch at the end of PrevBB when we split the BasicBlock.
- PrevBB = StartBB->getSinglePredecessor();
- assert(PrevBB != nullptr &&
- "No Predecessor for the region start basic block!");
-
- assert(PrevBB->getTerminator() && "Terminator removed from PrevBB!");
- assert(EndBB->getTerminator() && "Terminator removed from EndBB!");
- PrevBB->getTerminator()->eraseFromParent();
- EndBB->getTerminator()->eraseFromParent();
-
- moveBBContents(*StartBB, *PrevBB);
-
- BasicBlock *PlacementBB = PrevBB;
- if (StartBB != EndBB)
- PlacementBB = EndBB;
- moveBBContents(*FollowBB, *PlacementBB);
-
- PrevBB->replaceSuccessorsPhiUsesWith(StartBB, PrevBB);
- PrevBB->replaceSuccessorsPhiUsesWith(FollowBB, PlacementBB);
- StartBB->eraseFromParent();
- FollowBB->eraseFromParent();
-
- // Make sure to save changes back to the StartBB.
- StartBB = PrevBB;
- EndBB = nullptr;
- PrevBB = nullptr;
- FollowBB = nullptr;
-
- CandidateSplit = false;
-}
-
-/// Find whether \p V matches the Constants previously found for the \p GVN.
-///
-/// \param V - The value to check for consistency.
-/// \param GVN - The global value number assigned to \p V.
-/// \param GVNToConstant - The mapping of global value number to Constants.
-/// \returns true if the Value matches the Constant mapped to by V and false if
-/// it \p V is a Constant but does not match.
-/// \returns None if \p V is not a Constant.
-static Optional<bool>
-constantMatches(Value *V, unsigned GVN,
- DenseMap<unsigned, Constant *> &GVNToConstant) {
- // See if we have a constants
- Constant *CST = dyn_cast<Constant>(V);
- if (!CST)
- return None;
-
- // Holds a mapping from a global value number to a Constant.
- DenseMap<unsigned, Constant *>::iterator GVNToConstantIt;
- bool Inserted;
-
-
- // If we have a constant, try to make a new entry in the GVNToConstant.
- std::tie(GVNToConstantIt, Inserted) =
- GVNToConstant.insert(std::make_pair(GVN, CST));
- // If it was found and is not equal, it is not the same. We do not
- // handle this case yet, and exit early.
- if (Inserted || (GVNToConstantIt->second == CST))
- return true;
-
- return false;
-}
-
-InstructionCost OutlinableRegion::getBenefit(TargetTransformInfo &TTI) {
- InstructionCost Benefit = 0;
-
- // Estimate the benefit of outlining a specific sections of the program. We
- // delegate mostly this task to the TargetTransformInfo so that if the target
- // has specific changes, we can have a more accurate estimate.
-
- // However, getInstructionCost delegates the code size calculation for
- // arithmetic instructions to getArithmeticInstrCost in
- // include/Analysis/TargetTransformImpl.h, where it always estimates that the
- // code size for a division and remainder instruction to be equal to 4, and
- // everything else to 1. This is not an accurate representation of the
- // division instruction for targets that have a native division instruction.
- // To be overly conservative, we only add 1 to the number of instructions for
- // each division instruction.
- for (Instruction &I : *StartBB) {
- switch (I.getOpcode()) {
- case Instruction::FDiv:
- case Instruction::FRem:
- case Instruction::SDiv:
- case Instruction::SRem:
- case Instruction::UDiv:
- case Instruction::URem:
- Benefit += 1;
- break;
- default:
- Benefit += TTI.getInstructionCost(&I, TargetTransformInfo::TCK_CodeSize);
- break;
- }
- }
-
- return Benefit;
-}
-
-/// Find whether \p Region matches the global value numbering to Constant
-/// mapping found so far.
-///
-/// \param Region - The OutlinableRegion we are checking for constants
-/// \param GVNToConstant - The mapping of global value number to Constants.
-/// \param NotSame - The set of global value numbers that do not have the same
-/// constant in each region.
-/// \returns true if all Constants are the same in every use of a Constant in \p
-/// Region and false if not
-static bool
-collectRegionsConstants(OutlinableRegion &Region,
- DenseMap<unsigned, Constant *> &GVNToConstant,
- DenseSet<unsigned> &NotSame) {
- bool ConstantsTheSame = true;
-
- IRSimilarityCandidate &C = *Region.Candidate;
- for (IRInstructionData &ID : C) {
-
- // Iterate over the operands in an instruction. If the global value number,
- // assigned by the IRSimilarityCandidate, has been seen before, we check if
- // the the number has been found to be not the same value in each instance.
- for (Value *V : ID.OperVals) {
- Optional<unsigned> GVNOpt = C.getGVN(V);
- assert(GVNOpt.hasValue() && "Expected a GVN for operand?");
- unsigned GVN = GVNOpt.getValue();
-
- // Check if this global value has been found to not be the same already.
- if (NotSame.contains(GVN)) {
- if (isa<Constant>(V))
- ConstantsTheSame = false;
- continue;
- }
-
- // If it has been the same so far, we check the value for if the
- // associated Constant value match the previous instances of the same
- // global value number. If the global value does not map to a Constant,
- // it is considered to not be the same value.
- Optional<bool> ConstantMatches = constantMatches(V, GVN, GVNToConstant);
- if (ConstantMatches.hasValue()) {
- if (ConstantMatches.getValue())
- continue;
- else
- ConstantsTheSame = false;
- }
-
- // While this value is a register, it might not have been previously,
- // make sure we don't already have a constant mapped to this global value
- // number.
- if (GVNToConstant.find(GVN) != GVNToConstant.end())
- ConstantsTheSame = false;
-
- NotSame.insert(GVN);
- }
- }
-
- return ConstantsTheSame;
-}
-
-void OutlinableGroup::findSameConstants(DenseSet<unsigned> &NotSame) {
- DenseMap<unsigned, Constant *> GVNToConstant;
-
- for (OutlinableRegion *Region : Regions)
- collectRegionsConstants(*Region, GVNToConstant, NotSame);
-}
-
-void OutlinableGroup::collectGVNStoreSets(Module &M) {
- for (OutlinableRegion *OS : Regions)
- OutputGVNCombinations.insert(OS->GVNStores);
-
- // We are adding an extracted argument to decide between which output path
- // to use in the basic block. It is used in a switch statement and only
- // needs to be an integer.
- if (OutputGVNCombinations.size() > 1)
- ArgumentTypes.push_back(Type::getInt32Ty(M.getContext()));
-}
-
-Function *IROutliner::createFunction(Module &M, OutlinableGroup &Group,
- unsigned FunctionNameSuffix) {
- assert(!Group.OutlinedFunction && "Function is already defined!");
-
- Group.OutlinedFunctionType = FunctionType::get(
- Type::getVoidTy(M.getContext()), Group.ArgumentTypes, false);
-
- // These functions will only be called from within the same module, so
- // we can set an internal linkage.
- Group.OutlinedFunction = Function::Create(
- Group.OutlinedFunctionType, GlobalValue::InternalLinkage,
- "outlined_ir_func_" + std::to_string(FunctionNameSuffix), M);
-
- // Transfer the swifterr attribute to the correct function parameter.
- if (Group.SwiftErrorArgument.hasValue())
- Group.OutlinedFunction->addParamAttr(Group.SwiftErrorArgument.getValue(),
- Attribute::SwiftError);
-
- Group.OutlinedFunction->addFnAttr(Attribute::OptimizeForSize);
- Group.OutlinedFunction->addFnAttr(Attribute::MinSize);
-
- return Group.OutlinedFunction;
-}
-
-/// Move each BasicBlock in \p Old to \p New.
-///
-/// \param [in] Old - the function to move the basic blocks from.
-/// \param [in] New - The function to move the basic blocks to.
-/// \returns the first return block for the function in New.
-static BasicBlock *moveFunctionData(Function &Old, Function &New) {
- Function::iterator CurrBB, NextBB, FinalBB;
- BasicBlock *NewEnd = nullptr;
- std::vector<Instruction *> DebugInsts;
- for (CurrBB = Old.begin(), FinalBB = Old.end(); CurrBB != FinalBB;
- CurrBB = NextBB) {
- NextBB = std::next(CurrBB);
- CurrBB->removeFromParent();
- CurrBB->insertInto(&New);
- Instruction *I = CurrBB->getTerminator();
- if (isa<ReturnInst>(I))
- NewEnd = &(*CurrBB);
- }
-
- assert(NewEnd && "No return instruction for new function?");
- return NewEnd;
-}
-
-/// Find the the constants that will need to be lifted into arguments
-/// as they are not the same in each instance of the region.
-///
-/// \param [in] C - The IRSimilarityCandidate containing the region we are
-/// analyzing.
-/// \param [in] NotSame - The set of global value numbers that do not have a
-/// single Constant across all OutlinableRegions similar to \p C.
-/// \param [out] Inputs - The list containing the global value numbers of the
-/// arguments needed for the region of code.
-static void findConstants(IRSimilarityCandidate &C, DenseSet<unsigned> &NotSame,
- std::vector<unsigned> &Inputs) {
- DenseSet<unsigned> Seen;
- // Iterate over the instructions, and find what constants will need to be
- // extracted into arguments.
- for (IRInstructionDataList::iterator IDIt = C.begin(), EndIDIt = C.end();
- IDIt != EndIDIt; IDIt++) {
- for (Value *V : (*IDIt).OperVals) {
- // Since these are stored before any outlining, they will be in the
- // global value numbering.
- unsigned GVN = C.getGVN(V).getValue();
- if (isa<Constant>(V))
- if (NotSame.contains(GVN) && !Seen.contains(GVN)) {
- Inputs.push_back(GVN);
- Seen.insert(GVN);
- }
- }
- }
-}
-
-/// Find the GVN for the inputs that have been found by the CodeExtractor.
-///
-/// \param [in] C - The IRSimilarityCandidate containing the region we are
-/// analyzing.
-/// \param [in] CurrentInputs - The set of inputs found by the
-/// CodeExtractor.
-/// \param [out] EndInputNumbers - The global value numbers for the extracted
-/// arguments.
-/// \param [in] OutputMappings - The mapping of values that have been replaced
-/// by a new output value.
-/// \param [out] EndInputs - The global value numbers for the extracted
-/// arguments.
-static void mapInputsToGVNs(IRSimilarityCandidate &C,
- SetVector<Value *> &CurrentInputs,
- const DenseMap<Value *, Value *> &OutputMappings,
- std::vector<unsigned> &EndInputNumbers) {
- // Get the Global Value Number for each input. We check if the Value has been
- // replaced by a different value at output, and use the original value before
- // replacement.
- for (Value *Input : CurrentInputs) {
- assert(Input && "Have a nullptr as an input");
- if (OutputMappings.find(Input) != OutputMappings.end())
- Input = OutputMappings.find(Input)->second;
- assert(C.getGVN(Input).hasValue() &&
- "Could not find a numbering for the given input");
- EndInputNumbers.push_back(C.getGVN(Input).getValue());
- }
-}
-
-/// Find the original value for the \p ArgInput values if any one of them was
-/// replaced during a previous extraction.
-///
-/// \param [in] ArgInputs - The inputs to be extracted by the code extractor.
-/// \param [in] OutputMappings - The mapping of values that have been replaced
-/// by a new output value.
-/// \param [out] RemappedArgInputs - The remapped values according to
-/// \p OutputMappings that will be extracted.
-static void
-remapExtractedInputs(const ArrayRef<Value *> ArgInputs,
- const DenseMap<Value *, Value *> &OutputMappings,
- SetVector<Value *> &RemappedArgInputs) {
- // Get the global value number for each input that will be extracted as an
- // argument by the code extractor, remapping if needed for reloaded values.
- for (Value *Input : ArgInputs) {
- if (OutputMappings.find(Input) != OutputMappings.end())
- Input = OutputMappings.find(Input)->second;
- RemappedArgInputs.insert(Input);
- }
-}
-
-/// Find the input GVNs and the output values for a region of Instructions.
-/// Using the code extractor, we collect the inputs to the extracted function.
-///
-/// The \p Region can be identified as needing to be ignored in this function.
-/// It should be checked whether it should be ignored after a call to this
-/// function.
-///
-/// \param [in,out] Region - The region of code to be analyzed.
-/// \param [out] InputGVNs - The global value numbers for the extracted
-/// arguments.
-/// \param [in] NotSame - The global value numbers in the region that do not
-/// have the same constant value in the regions structurally similar to
-/// \p Region.
-/// \param [in] OutputMappings - The mapping of values that have been replaced
-/// by a new output value after extraction.
-/// \param [out] ArgInputs - The values of the inputs to the extracted function.
-/// \param [out] Outputs - The set of values extracted by the CodeExtractor
-/// as outputs.
-static void getCodeExtractorArguments(
- OutlinableRegion &Region, std::vector<unsigned> &InputGVNs,
- DenseSet<unsigned> &NotSame, DenseMap<Value *, Value *> &OutputMappings,
- SetVector<Value *> &ArgInputs, SetVector<Value *> &Outputs) {
- IRSimilarityCandidate &C = *Region.Candidate;
-
- // OverallInputs are the inputs to the region found by the CodeExtractor,
- // SinkCands and HoistCands are used by the CodeExtractor to find sunken
- // allocas of values whose lifetimes are contained completely within the
- // outlined region. PremappedInputs are the arguments found by the
- // CodeExtractor, removing conditions such as sunken allocas, but that
- // may need to be remapped due to the extracted output values replacing
- // the original values. We use DummyOutputs for this first run of finding
- // inputs and outputs since the outputs could change during findAllocas,
- // the correct set of extracted outputs will be in the final Outputs ValueSet.
- SetVector<Value *> OverallInputs, PremappedInputs, SinkCands, HoistCands,
- DummyOutputs;
-
- // Use the code extractor to get the inputs and outputs, without sunken
- // allocas or removing llvm.assumes.
- CodeExtractor *CE = Region.CE;
- CE->findInputsOutputs(OverallInputs, DummyOutputs, SinkCands);
- assert(Region.StartBB && "Region must have a start BasicBlock!");
- Function *OrigF = Region.StartBB->getParent();
- CodeExtractorAnalysisCache CEAC(*OrigF);
- BasicBlock *Dummy = nullptr;
-
- // The region may be ineligible due to VarArgs in the parent function. In this
- // case we ignore the region.
- if (!CE->isEligible()) {
- Region.IgnoreRegion = true;
- return;
- }
-
- // Find if any values are going to be sunk into the function when extracted
- CE->findAllocas(CEAC, SinkCands, HoistCands, Dummy);
- CE->findInputsOutputs(PremappedInputs, Outputs, SinkCands);
-
- // TODO: Support regions with sunken allocas: values whose lifetimes are
- // contained completely within the outlined region. These are not guaranteed
- // to be the same in every region, so we must elevate them all to arguments
- // when they appear. If these values are not equal, it means there is some
- // Input in OverallInputs that was removed for ArgInputs.
- if (OverallInputs.size() != PremappedInputs.size()) {
- Region.IgnoreRegion = true;
- return;
- }
-
- findConstants(C, NotSame, InputGVNs);
-
- mapInputsToGVNs(C, OverallInputs, OutputMappings, InputGVNs);
-
- remapExtractedInputs(PremappedInputs.getArrayRef(), OutputMappings,
- ArgInputs);
-
- // Sort the GVNs, since we now have constants included in the \ref InputGVNs
- // we need to make sure they are in a deterministic order.
- stable_sort(InputGVNs);
-}
-
-/// Look over the inputs and map each input argument to an argument in the
-/// overall function for the OutlinableRegions. This creates a way to replace
-/// the arguments of the extracted function with the arguments of the new
-/// overall function.
-///
-/// \param [in,out] Region - The region of code to be analyzed.
-/// \param [in] InputsGVNs - The global value numbering of the input values
-/// collected.
-/// \param [in] ArgInputs - The values of the arguments to the extracted
-/// function.
-static void
-findExtractedInputToOverallInputMapping(OutlinableRegion &Region,
- std::vector<unsigned> &InputGVNs,
- SetVector<Value *> &ArgInputs) {
-
- IRSimilarityCandidate &C = *Region.Candidate;
- OutlinableGroup &Group = *Region.Parent;
-
- // This counts the argument number in the overall function.
- unsigned TypeIndex = 0;
-
- // This counts the argument number in the extracted function.
- unsigned OriginalIndex = 0;
-
- // Find the mapping of the extracted arguments to the arguments for the
- // overall function. Since there may be extra arguments in the overall
- // function to account for the extracted constants, we have two different
- // counters as we find extracted arguments, and as we come across overall
- // arguments.
- for (unsigned InputVal : InputGVNs) {
- Optional<Value *> InputOpt = C.fromGVN(InputVal);
- assert(InputOpt.hasValue() && "Global value number not found?");
- Value *Input = InputOpt.getValue();
-
- if (!Group.InputTypesSet) {
- Group.ArgumentTypes.push_back(Input->getType());
- // If the input value has a swifterr attribute, make sure to mark the
- // argument in the overall function.
- if (Input->isSwiftError()) {
- assert(
- !Group.SwiftErrorArgument.hasValue() &&
- "Argument already marked with swifterr for this OutlinableGroup!");
- Group.SwiftErrorArgument = TypeIndex;
- }
- }
-
- // Check if we have a constant. If we do add it to the overall argument
- // number to Constant map for the region, and continue to the next input.
- if (Constant *CST = dyn_cast<Constant>(Input)) {
- Region.AggArgToConstant.insert(std::make_pair(TypeIndex, CST));
- TypeIndex++;
- continue;
- }
-
- // It is not a constant, we create the mapping from extracted argument list
- // to the overall argument list.
- assert(ArgInputs.count(Input) && "Input cannot be found!");
-
- Region.ExtractedArgToAgg.insert(std::make_pair(OriginalIndex, TypeIndex));
- Region.AggArgToExtracted.insert(std::make_pair(TypeIndex, OriginalIndex));
- OriginalIndex++;
- TypeIndex++;
- }
-
- // If the function type definitions for the OutlinableGroup holding the region
- // have not been set, set the length of the inputs here. We should have the
- // same inputs for all of the different regions contained in the
- // OutlinableGroup since they are all structurally similar to one another.
- if (!Group.InputTypesSet) {
- Group.NumAggregateInputs = TypeIndex;
- Group.InputTypesSet = true;
- }
-
- Region.NumExtractedInputs = OriginalIndex;
-}
-
-/// Create a mapping of the output arguments for the \p Region to the output
-/// arguments of the overall outlined function.
-///
-/// \param [in,out] Region - The region of code to be analyzed.
-/// \param [in] Outputs - The values found by the code extractor.
-static void
-findExtractedOutputToOverallOutputMapping(OutlinableRegion &Region,
- ArrayRef<Value *> Outputs) {
- OutlinableGroup &Group = *Region.Parent;
- IRSimilarityCandidate &C = *Region.Candidate;
-
- // This counts the argument number in the extracted function.
- unsigned OriginalIndex = Region.NumExtractedInputs;
-
- // This counts the argument number in the overall function.
- unsigned TypeIndex = Group.NumAggregateInputs;
- bool TypeFound;
- DenseSet<unsigned> AggArgsUsed;
-
- // Iterate over the output types and identify if there is an aggregate pointer
- // type whose base type matches the current output type. If there is, we mark
- // that we will use this output register for this value. If not we add another
- // type to the overall argument type list. We also store the GVNs used for
- // stores to identify which values will need to be moved into an special
- // block that holds the stores to the output registers.
- for (Value *Output : Outputs) {
- TypeFound = false;
- // We can do this since it is a result value, and will have a number
- // that is necessarily the same. BUT if in the future, the instructions
- // do not have to be in same order, but are functionally the same, we will
- // have to use a different scheme, as one-to-one correspondence is not
- // guaranteed.
- unsigned GlobalValue = C.getGVN(Output).getValue();
- unsigned ArgumentSize = Group.ArgumentTypes.size();
-
- for (unsigned Jdx = TypeIndex; Jdx < ArgumentSize; Jdx++) {
- if (Group.ArgumentTypes[Jdx] != PointerType::getUnqual(Output->getType()))
- continue;
-
- if (AggArgsUsed.contains(Jdx))
- continue;
-
- TypeFound = true;
- AggArgsUsed.insert(Jdx);
- Region.ExtractedArgToAgg.insert(std::make_pair(OriginalIndex, Jdx));
- Region.AggArgToExtracted.insert(std::make_pair(Jdx, OriginalIndex));
- Region.GVNStores.push_back(GlobalValue);
- break;
- }
-
- // We were unable to find an unused type in the output type set that matches
- // the output, so we add a pointer type to the argument types of the overall
- // function to handle this output and create a mapping to it.
- if (!TypeFound) {
- Group.ArgumentTypes.push_back(PointerType::getUnqual(Output->getType()));
- AggArgsUsed.insert(Group.ArgumentTypes.size() - 1);
- Region.ExtractedArgToAgg.insert(
- std::make_pair(OriginalIndex, Group.ArgumentTypes.size() - 1));
- Region.AggArgToExtracted.insert(
- std::make_pair(Group.ArgumentTypes.size() - 1, OriginalIndex));
- Region.GVNStores.push_back(GlobalValue);
- }
-
- stable_sort(Region.GVNStores);
- OriginalIndex++;
- TypeIndex++;
- }
-}
-
-void IROutliner::findAddInputsOutputs(Module &M, OutlinableRegion &Region,
- DenseSet<unsigned> &NotSame) {
- std::vector<unsigned> Inputs;
- SetVector<Value *> ArgInputs, Outputs;
-
- getCodeExtractorArguments(Region, Inputs, NotSame, OutputMappings, ArgInputs,
- Outputs);
-
- if (Region.IgnoreRegion)
- return;
-
- // Map the inputs found by the CodeExtractor to the arguments found for
- // the overall function.
- findExtractedInputToOverallInputMapping(Region, Inputs, ArgInputs);
-
- // Map the outputs found by the CodeExtractor to the arguments found for
- // the overall function.
- findExtractedOutputToOverallOutputMapping(Region, Outputs.getArrayRef());
-}
-
-/// Replace the extracted function in the Region with a call to the overall
-/// function constructed from the deduplicated similar regions, replacing and
-/// remapping the values passed to the extracted function as arguments to the
-/// new arguments of the overall function.
-///
-/// \param [in] M - The module to outline from.
-/// \param [in] Region - The regions of extracted code to be replaced with a new
-/// function.
-/// \returns a call instruction with the replaced function.
-CallInst *replaceCalledFunction(Module &M, OutlinableRegion &Region) {
- std::vector<Value *> NewCallArgs;
- DenseMap<unsigned, unsigned>::iterator ArgPair;
-
- OutlinableGroup &Group = *Region.Parent;
- CallInst *Call = Region.Call;
- assert(Call && "Call to replace is nullptr?");
- Function *AggFunc = Group.OutlinedFunction;
- assert(AggFunc && "Function to replace with is nullptr?");
-
- // If the arguments are the same size, there are not values that need to be
- // made argument, or different output registers to handle. We can simply
- // replace the called function in this case.
- if (AggFunc->arg_size() == Call->arg_size()) {
- LLVM_DEBUG(dbgs() << "Replace call to " << *Call << " with call to "
- << *AggFunc << " with same number of arguments\n");
- Call->setCalledFunction(AggFunc);
- return Call;
- }
-
- // We have a different number of arguments than the new function, so
- // we need to use our previously mappings off extracted argument to overall
- // function argument, and constants to overall function argument to create the
- // new argument list.
- for (unsigned AggArgIdx = 0; AggArgIdx < AggFunc->arg_size(); AggArgIdx++) {
-
- if (AggArgIdx == AggFunc->arg_size() - 1 &&
- Group.OutputGVNCombinations.size() > 1) {
- // If we are on the last argument, and we need to differentiate between
- // output blocks, add an integer to the argument list to determine
- // what block to take
- LLVM_DEBUG(dbgs() << "Set switch block argument to "
- << Region.OutputBlockNum << "\n");
- NewCallArgs.push_back(ConstantInt::get(Type::getInt32Ty(M.getContext()),
- Region.OutputBlockNum));
- continue;
- }
-
- ArgPair = Region.AggArgToExtracted.find(AggArgIdx);
- if (ArgPair != Region.AggArgToExtracted.end()) {
- Value *ArgumentValue = Call->getArgOperand(ArgPair->second);
- // If we found the mapping from the extracted function to the overall
- // function, we simply add it to the argument list. We use the same
- // value, it just needs to honor the new order of arguments.
- LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to value "
- << *ArgumentValue << "\n");
- NewCallArgs.push_back(ArgumentValue);
- continue;
- }
-
- // If it is a constant, we simply add it to the argument list as a value.
- if (Region.AggArgToConstant.find(AggArgIdx) !=
- Region.AggArgToConstant.end()) {
- Constant *CST = Region.AggArgToConstant.find(AggArgIdx)->second;
- LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to value "
- << *CST << "\n");
- NewCallArgs.push_back(CST);
- continue;
- }
-
- // Add a nullptr value if the argument is not found in the extracted
- // function. If we cannot find a value, it means it is not in use
- // for the region, so we should not pass anything to it.
- LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to nullptr\n");
- NewCallArgs.push_back(ConstantPointerNull::get(
- static_cast<PointerType *>(AggFunc->getArg(AggArgIdx)->getType())));
- }
-
- LLVM_DEBUG(dbgs() << "Replace call to " << *Call << " with call to "
- << *AggFunc << " with new set of arguments\n");
- // Create the new call instruction and erase the old one.
- Call = CallInst::Create(AggFunc->getFunctionType(), AggFunc, NewCallArgs, "",
- Call);
-
- // It is possible that the call to the outlined function is either the first
- // instruction is in the new block, the last instruction, or both. If either
- // of these is the case, we need to make sure that we replace the instruction
- // in the IRInstructionData struct with the new call.
- CallInst *OldCall = Region.Call;
- if (Region.NewFront->Inst == OldCall)
- Region.NewFront->Inst = Call;
- if (Region.NewBack->Inst == OldCall)
- Region.NewBack->Inst = Call;
-
- // Transfer any debug information.
- Call->setDebugLoc(Region.Call->getDebugLoc());
-
- // Remove the old instruction.
- OldCall->eraseFromParent();
- Region.Call = Call;
-
- // Make sure that the argument in the new function has the SwiftError
- // argument.
- if (Group.SwiftErrorArgument.hasValue())
- Call->addParamAttr(Group.SwiftErrorArgument.getValue(),
- Attribute::SwiftError);
-
- return Call;
-}
-
-// Within an extracted function, replace the argument uses of the extracted
-// region with the arguments of the function for an OutlinableGroup.
-//
-/// \param [in] Region - The region of extracted code to be changed.
-/// \param [in,out] OutputBB - The BasicBlock for the output stores for this
-/// region.
-static void replaceArgumentUses(OutlinableRegion &Region,
- BasicBlock *OutputBB) {
- OutlinableGroup &Group = *Region.Parent;
- assert(Region.ExtractedFunction && "Region has no extracted function?");
-
- for (unsigned ArgIdx = 0; ArgIdx < Region.ExtractedFunction->arg_size();
- ArgIdx++) {
- assert(Region.ExtractedArgToAgg.find(ArgIdx) !=
- Region.ExtractedArgToAgg.end() &&
- "No mapping from extracted to outlined?");
- unsigned AggArgIdx = Region.ExtractedArgToAgg.find(ArgIdx)->second;
- Argument *AggArg = Group.OutlinedFunction->getArg(AggArgIdx);
- Argument *Arg = Region.ExtractedFunction->getArg(ArgIdx);
- // The argument is an input, so we can simply replace it with the overall
- // argument value
- if (ArgIdx < Region.NumExtractedInputs) {
- LLVM_DEBUG(dbgs() << "Replacing uses of input " << *Arg << " in function "
- << *Region.ExtractedFunction << " with " << *AggArg
- << " in function " << *Group.OutlinedFunction << "\n");
- Arg->replaceAllUsesWith(AggArg);
- continue;
- }
-
- // If we are replacing an output, we place the store value in its own
- // block inside the overall function before replacing the use of the output
- // in the function.
- assert(Arg->hasOneUse() && "Output argument can only have one use");
- User *InstAsUser = Arg->user_back();
- assert(InstAsUser && "User is nullptr!");
-
- Instruction *I = cast<Instruction>(InstAsUser);
- I->setDebugLoc(DebugLoc());
- LLVM_DEBUG(dbgs() << "Move store for instruction " << *I << " to "
- << *OutputBB << "\n");
-
- I->moveBefore(*OutputBB, OutputBB->end());
-
- LLVM_DEBUG(dbgs() << "Replacing uses of output " << *Arg << " in function "
- << *Region.ExtractedFunction << " with " << *AggArg
- << " in function " << *Group.OutlinedFunction << "\n");
- Arg->replaceAllUsesWith(AggArg);
- }
-}
-
-/// Within an extracted function, replace the constants that need to be lifted
-/// into arguments with the actual argument.
-///
-/// \param Region [in] - The region of extracted code to be changed.
-void replaceConstants(OutlinableRegion &Region) {
- OutlinableGroup &Group = *Region.Parent;
- // Iterate over the constants that need to be elevated into arguments
- for (std::pair<unsigned, Constant *> &Const : Region.AggArgToConstant) {
- unsigned AggArgIdx = Const.first;
- Function *OutlinedFunction = Group.OutlinedFunction;
- assert(OutlinedFunction && "Overall Function is not defined?");
- Constant *CST = Const.second;
- Argument *Arg = Group.OutlinedFunction->getArg(AggArgIdx);
- // Identify the argument it will be elevated to, and replace instances of
- // that constant in the function.
-
- // TODO: If in the future constants do not have one global value number,
- // i.e. a constant 1 could be mapped to several values, this check will
- // have to be more strict. It cannot be using only replaceUsesWithIf.
-
- LLVM_DEBUG(dbgs() << "Replacing uses of constant " << *CST
- << " in function " << *OutlinedFunction << " with "
- << *Arg << "\n");
- CST->replaceUsesWithIf(Arg, [OutlinedFunction](Use &U) {
- if (Instruction *I = dyn_cast<Instruction>(U.getUser()))
- return I->getFunction() == OutlinedFunction;
- return false;
- });
- }
-}
-
-/// For the given function, find all the nondebug or lifetime instructions,
-/// and return them as a vector. Exclude any blocks in \p ExludeBlocks.
-///
-/// \param [in] F - The function we collect the instructions from.
-/// \param [in] ExcludeBlocks - BasicBlocks to ignore.
-/// \returns the list of instructions extracted.
-static std::vector<Instruction *>
-collectRelevantInstructions(Function &F,
- DenseSet<BasicBlock *> &ExcludeBlocks) {
- std::vector<Instruction *> RelevantInstructions;
-
- for (BasicBlock &BB : F) {
- if (ExcludeBlocks.contains(&BB))
- continue;
-
- for (Instruction &Inst : BB) {
- if (Inst.isLifetimeStartOrEnd())
- continue;
- if (isa<DbgInfoIntrinsic>(Inst))
- continue;
-
- RelevantInstructions.push_back(&Inst);
- }
- }
-
- return RelevantInstructions;
-}
-
-/// It is possible that there is a basic block that already performs the same
-/// stores. This returns a duplicate block, if it exists
-///
-/// \param OutputBB [in] the block we are looking for a duplicate of.
-/// \param OutputStoreBBs [in] The existing output blocks.
-/// \returns an optional value with the number output block if there is a match.
-Optional<unsigned>
-findDuplicateOutputBlock(BasicBlock *OutputBB,
- ArrayRef<BasicBlock *> OutputStoreBBs) {
-
- bool WrongInst = false;
- bool WrongSize = false;
- unsigned MatchingNum = 0;
- for (BasicBlock *CompBB : OutputStoreBBs) {
- WrongInst = false;
- if (CompBB->size() - 1 != OutputBB->size()) {
- WrongSize = true;
- MatchingNum++;
- continue;
- }
-
- WrongSize = false;
- BasicBlock::iterator NIt = OutputBB->begin();
- for (Instruction &I : *CompBB) {
- if (isa<BranchInst>(&I))
- continue;
-
- if (!I.isIdenticalTo(&(*NIt))) {
- WrongInst = true;
- break;
- }
-
- NIt++;
- }
- if (!WrongInst && !WrongSize)
- return MatchingNum;
-
- MatchingNum++;
- }
-
- return None;
-}
-
-/// For the outlined section, move needed the StoreInsts for the output
-/// registers into their own block. Then, determine if there is a duplicate
-/// output block already created.
-///
-/// \param [in] OG - The OutlinableGroup of regions to be outlined.
-/// \param [in] Region - The OutlinableRegion that is being analyzed.
-/// \param [in,out] OutputBB - the block that stores for this region will be
-/// placed in.
-/// \param [in] EndBB - the final block of the extracted function.
-/// \param [in] OutputMappings - OutputMappings the mapping of values that have
-/// been replaced by a new output value.
-/// \param [in,out] OutputStoreBBs - The existing output blocks.
-static void
-alignOutputBlockWithAggFunc(OutlinableGroup &OG, OutlinableRegion &Region,
- BasicBlock *OutputBB, BasicBlock *EndBB,
- const DenseMap<Value *, Value *> &OutputMappings,
- std::vector<BasicBlock *> &OutputStoreBBs) {
- DenseSet<unsigned> ValuesToFind(Region.GVNStores.begin(),
- Region.GVNStores.end());
-
- // We iterate over the instructions in the extracted function, and find the
- // global value number of the instructions. If we find a value that should
- // be contained in a store, we replace the uses of the value with the value
- // from the overall function, so that the store is storing the correct
- // value from the overall function.
- DenseSet<BasicBlock *> ExcludeBBs(OutputStoreBBs.begin(),
- OutputStoreBBs.end());
- ExcludeBBs.insert(OutputBB);
- std::vector<Instruction *> ExtractedFunctionInsts =
- collectRelevantInstructions(*(Region.ExtractedFunction), ExcludeBBs);
- std::vector<Instruction *> OverallFunctionInsts =
- collectRelevantInstructions(*OG.OutlinedFunction, ExcludeBBs);
-
- assert(ExtractedFunctionInsts.size() == OverallFunctionInsts.size() &&
- "Number of relevant instructions not equal!");
-
- unsigned NumInstructions = ExtractedFunctionInsts.size();
- for (unsigned Idx = 0; Idx < NumInstructions; Idx++) {
- Value *V = ExtractedFunctionInsts[Idx];
-
- if (OutputMappings.find(V) != OutputMappings.end())
- V = OutputMappings.find(V)->second;
- Optional<unsigned> GVN = Region.Candidate->getGVN(V);
-
- // If we have found one of the stored values for output, replace the value
- // with the corresponding one from the overall function.
- if (GVN.hasValue() && ValuesToFind.erase(GVN.getValue())) {
- V->replaceAllUsesWith(OverallFunctionInsts[Idx]);
- if (ValuesToFind.size() == 0)
- break;
- }
-
- if (ValuesToFind.size() == 0)
- break;
- }
-
- assert(ValuesToFind.size() == 0 && "Not all store values were handled!");
-
- // If the size of the block is 0, then there are no stores, and we do not
- // need to save this block.
- if (OutputBB->size() == 0) {
- Region.OutputBlockNum = -1;
- OutputBB->eraseFromParent();
- return;
- }
-
- // Determine is there is a duplicate block.
- Optional<unsigned> MatchingBB =
- findDuplicateOutputBlock(OutputBB, OutputStoreBBs);
-
- // If there is, we remove the new output block. If it does not,
- // we add it to our list of output blocks.
- if (MatchingBB.hasValue()) {
- LLVM_DEBUG(dbgs() << "Set output block for region in function"
- << Region.ExtractedFunction << " to "
- << MatchingBB.getValue());
-
- Region.OutputBlockNum = MatchingBB.getValue();
- OutputBB->eraseFromParent();
- return;
- }
-
- Region.OutputBlockNum = OutputStoreBBs.size();
-
- LLVM_DEBUG(dbgs() << "Create output block for region in"
- << Region.ExtractedFunction << " to "
- << *OutputBB);
- OutputStoreBBs.push_back(OutputBB);
- BranchInst::Create(EndBB, OutputBB);
-}
-
-/// Create the switch statement for outlined function to differentiate between
-/// all the output blocks.
-///
-/// For the outlined section, determine if an outlined block already exists that
-/// matches the needed stores for the extracted section.
-/// \param [in] M - The module we are outlining from.
-/// \param [in] OG - The group of regions to be outlined.
-/// \param [in] OS - The region that is being analyzed.
-/// \param [in] EndBB - The final block of the extracted function.
-/// \param [in,out] OutputStoreBBs - The existing output blocks.
-void createSwitchStatement(Module &M, OutlinableGroup &OG, BasicBlock *EndBB,
- ArrayRef<BasicBlock *> OutputStoreBBs) {
- // We only need the switch statement if there is more than one store
- // combination.
- if (OG.OutputGVNCombinations.size() > 1) {
- Function *AggFunc = OG.OutlinedFunction;
- // Create a final block
- BasicBlock *ReturnBlock =
- BasicBlock::Create(M.getContext(), "final_block", AggFunc);
- Instruction *Term = EndBB->getTerminator();
- Term->moveBefore(*ReturnBlock, ReturnBlock->end());
- // Put the switch statement in the old end basic block for the function with
- // a fall through to the new return block
- LLVM_DEBUG(dbgs() << "Create switch statement in " << *AggFunc << " for "
- << OutputStoreBBs.size() << "\n");
- SwitchInst *SwitchI =
- SwitchInst::Create(AggFunc->getArg(AggFunc->arg_size() - 1),
- ReturnBlock, OutputStoreBBs.size(), EndBB);
-
- unsigned Idx = 0;
- for (BasicBlock *BB : OutputStoreBBs) {
- SwitchI->addCase(ConstantInt::get(Type::getInt32Ty(M.getContext()), Idx),
- BB);
- Term = BB->getTerminator();
- Term->setSuccessor(0, ReturnBlock);
- Idx++;
- }
- return;
- }
-
- // If there needs to be stores, move them from the output block to the end
- // block to save on branching instructions.
- if (OutputStoreBBs.size() == 1) {
- LLVM_DEBUG(dbgs() << "Move store instructions to the end block in "
- << *OG.OutlinedFunction << "\n");
- BasicBlock *OutputBlock = OutputStoreBBs[0];
- Instruction *Term = OutputBlock->getTerminator();
- Term->eraseFromParent();
- Term = EndBB->getTerminator();
- moveBBContents(*OutputBlock, *EndBB);
- Term->moveBefore(*EndBB, EndBB->end());
- OutputBlock->eraseFromParent();
- }
-}
-
-/// Fill the new function that will serve as the replacement function for all of
-/// the extracted regions of a certain structure from the first region in the
-/// list of regions. Replace this first region's extracted function with the
-/// new overall function.
-///
-/// \param [in] M - The module we are outlining from.
-/// \param [in] CurrentGroup - The group of regions to be outlined.
-/// \param [in,out] OutputStoreBBs - The output blocks for each different
-/// set of stores needed for the different functions.
-/// \param [in,out] FuncsToRemove - Extracted functions to erase from module
-/// once outlining is complete.
-static void fillOverallFunction(Module &M, OutlinableGroup &CurrentGroup,
- std::vector<BasicBlock *> &OutputStoreBBs,
- std::vector<Function *> &FuncsToRemove) {
- OutlinableRegion *CurrentOS = CurrentGroup.Regions[0];
-
- // Move first extracted function's instructions into new function.
- LLVM_DEBUG(dbgs() << "Move instructions from "
- << *CurrentOS->ExtractedFunction << " to instruction "
- << *CurrentGroup.OutlinedFunction << "\n");
-
- CurrentGroup.EndBB = moveFunctionData(*CurrentOS->ExtractedFunction,
- *CurrentGroup.OutlinedFunction);
-
- // Transfer the attributes from the function to the new function.
- for (Attribute A :
- CurrentOS->ExtractedFunction->getAttributes().getFnAttributes())
- CurrentGroup.OutlinedFunction->addFnAttr(A);
-
- // Create an output block for the first extracted function.
- BasicBlock *NewBB = BasicBlock::Create(
- M.getContext(), Twine("output_block_") + Twine(static_cast<unsigned>(0)),
- CurrentGroup.OutlinedFunction);
- CurrentOS->OutputBlockNum = 0;
-
- replaceArgumentUses(*CurrentOS, NewBB);
- replaceConstants(*CurrentOS);
-
- // If the new basic block has no new stores, we can erase it from the module.
- // It it does, we create a branch instruction to the last basic block from the
- // new one.
- if (NewBB->size() == 0) {
- CurrentOS->OutputBlockNum = -1;
- NewBB->eraseFromParent();
- } else {
- BranchInst::Create(CurrentGroup.EndBB, NewBB);
- OutputStoreBBs.push_back(NewBB);
- }
-
- // Replace the call to the extracted function with the outlined function.
- CurrentOS->Call = replaceCalledFunction(M, *CurrentOS);
-
- // We only delete the extracted functions at the end since we may need to
- // reference instructions contained in them for mapping purposes.
- FuncsToRemove.push_back(CurrentOS->ExtractedFunction);
-}
-
-void IROutliner::deduplicateExtractedSections(
- Module &M, OutlinableGroup &CurrentGroup,
- std::vector<Function *> &FuncsToRemove, unsigned &OutlinedFunctionNum) {
- createFunction(M, CurrentGroup, OutlinedFunctionNum);
-
- std::vector<BasicBlock *> OutputStoreBBs;
-
- OutlinableRegion *CurrentOS;
-
- fillOverallFunction(M, CurrentGroup, OutputStoreBBs, FuncsToRemove);
-
- for (unsigned Idx = 1; Idx < CurrentGroup.Regions.size(); Idx++) {
- CurrentOS = CurrentGroup.Regions[Idx];
- AttributeFuncs::mergeAttributesForOutlining(*CurrentGroup.OutlinedFunction,
- *CurrentOS->ExtractedFunction);
-
- // Create a new BasicBlock to hold the needed store instructions.
- BasicBlock *NewBB = BasicBlock::Create(
- M.getContext(), "output_block_" + std::to_string(Idx),
- CurrentGroup.OutlinedFunction);
- replaceArgumentUses(*CurrentOS, NewBB);
-
- alignOutputBlockWithAggFunc(CurrentGroup, *CurrentOS, NewBB,
- CurrentGroup.EndBB, OutputMappings,
- OutputStoreBBs);
-
- CurrentOS->Call = replaceCalledFunction(M, *CurrentOS);
- FuncsToRemove.push_back(CurrentOS->ExtractedFunction);
- }
-
- // Create a switch statement to handle the different output schemes.
- createSwitchStatement(M, CurrentGroup, CurrentGroup.EndBB, OutputStoreBBs);
-
- OutlinedFunctionNum++;
-}
-
-void IROutliner::pruneIncompatibleRegions(
- std::vector<IRSimilarityCandidate> &CandidateVec,
- OutlinableGroup &CurrentGroup) {
- bool PreviouslyOutlined;
-
- // Sort from beginning to end, so the IRSimilarityCandidates are in order.
- stable_sort(CandidateVec, [](const IRSimilarityCandidate &LHS,
- const IRSimilarityCandidate &RHS) {
- return LHS.getStartIdx() < RHS.getStartIdx();
- });
-
- unsigned CurrentEndIdx = 0;
- for (IRSimilarityCandidate &IRSC : CandidateVec) {
- PreviouslyOutlined = false;
- unsigned StartIdx = IRSC.getStartIdx();
- unsigned EndIdx = IRSC.getEndIdx();
-
- for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
- if (Outlined.contains(Idx)) {
- PreviouslyOutlined = true;
- break;
- }
-
- if (PreviouslyOutlined)
- continue;
-
- // TODO: If in the future we can outline across BasicBlocks, we will need to
- // check all BasicBlocks contained in the region.
- if (IRSC.getStartBB()->hasAddressTaken())
- continue;
-
- if (IRSC.front()->Inst->getFunction()->hasLinkOnceODRLinkage() &&
- !OutlineFromLinkODRs)
- continue;
-
- // Greedily prune out any regions that will overlap with already chosen
- // regions.
- if (CurrentEndIdx != 0 && StartIdx <= CurrentEndIdx)
- continue;
-
- bool BadInst = any_of(IRSC, [this](IRInstructionData &ID) {
- // We check if there is a discrepancy between the InstructionDataList
- // and the actual next instruction in the module. If there is, it means
- // that an extra instruction was added, likely by the CodeExtractor.
-
- // Since we do not have any similarity data about this particular
- // instruction, we cannot confidently outline it, and must discard this
- // candidate.
- if (std::next(ID.getIterator())->Inst !=
- ID.Inst->getNextNonDebugInstruction())
- return true;
- return !this->InstructionClassifier.visit(ID.Inst);
- });
-
- if (BadInst)
- continue;
-
- OutlinableRegion *OS = new (RegionAllocator.Allocate())
- OutlinableRegion(IRSC, CurrentGroup);
- CurrentGroup.Regions.push_back(OS);
-
- CurrentEndIdx = EndIdx;
- }
-}
-
-InstructionCost
-IROutliner::findBenefitFromAllRegions(OutlinableGroup &CurrentGroup) {
- InstructionCost RegionBenefit = 0;
- for (OutlinableRegion *Region : CurrentGroup.Regions) {
- TargetTransformInfo &TTI = getTTI(*Region->StartBB->getParent());
- // We add the number of instructions in the region to the benefit as an
- // estimate as to how much will be removed.
- RegionBenefit += Region->getBenefit(TTI);
- LLVM_DEBUG(dbgs() << "Adding: " << RegionBenefit
- << " saved instructions to overfall benefit.\n");
- }
-
- return RegionBenefit;
-}
-
-InstructionCost
-IROutliner::findCostOutputReloads(OutlinableGroup &CurrentGroup) {
- InstructionCost OverallCost = 0;
- for (OutlinableRegion *Region : CurrentGroup.Regions) {
- TargetTransformInfo &TTI = getTTI(*Region->StartBB->getParent());
-
- // Each output incurs a load after the call, so we add that to the cost.
- for (unsigned OutputGVN : Region->GVNStores) {
- Optional<Value *> OV = Region->Candidate->fromGVN(OutputGVN);
- assert(OV.hasValue() && "Could not find value for GVN?");
- Value *V = OV.getValue();
- InstructionCost LoadCost =
- TTI.getMemoryOpCost(Instruction::Load, V->getType(), Align(1), 0,
- TargetTransformInfo::TCK_CodeSize);
-
- LLVM_DEBUG(dbgs() << "Adding: " << LoadCost
- << " instructions to cost for output of type "
- << *V->getType() << "\n");
- OverallCost += LoadCost;
- }
- }
-
- return OverallCost;
-}
-
-/// Find the extra instructions needed to handle any output values for the
-/// region.
-///
-/// \param [in] M - The Module to outline from.
-/// \param [in] CurrentGroup - The collection of OutlinableRegions to analyze.
-/// \param [in] TTI - The TargetTransformInfo used to collect information for
-/// new instruction costs.
-/// \returns the additional cost to handle the outputs.
-static InstructionCost findCostForOutputBlocks(Module &M,
- OutlinableGroup &CurrentGroup,
- TargetTransformInfo &TTI) {
- InstructionCost OutputCost = 0;
-
- for (const ArrayRef<unsigned> &OutputUse :
- CurrentGroup.OutputGVNCombinations) {
- IRSimilarityCandidate &Candidate = *CurrentGroup.Regions[0]->Candidate;
- for (unsigned GVN : OutputUse) {
- Optional<Value *> OV = Candidate.fromGVN(GVN);
- assert(OV.hasValue() && "Could not find value for GVN?");
- Value *V = OV.getValue();
- InstructionCost StoreCost =
- TTI.getMemoryOpCost(Instruction::Load, V->getType(), Align(1), 0,
- TargetTransformInfo::TCK_CodeSize);
-
- // An instruction cost is added for each store set that needs to occur for
- // various output combinations inside the function, plus a branch to
- // return to the exit block.
- LLVM_DEBUG(dbgs() << "Adding: " << StoreCost
- << " instructions to cost for output of type "
- << *V->getType() << "\n");
- OutputCost += StoreCost;
- }
-
- InstructionCost BranchCost =
- TTI.getCFInstrCost(Instruction::Br, TargetTransformInfo::TCK_CodeSize);
- LLVM_DEBUG(dbgs() << "Adding " << BranchCost << " to the current cost for"
- << " a branch instruction\n");
- OutputCost += BranchCost;
- }
-
- // If there is more than one output scheme, we must have a comparison and
- // branch for each different item in the switch statement.
- if (CurrentGroup.OutputGVNCombinations.size() > 1) {
- InstructionCost ComparisonCost = TTI.getCmpSelInstrCost(
- Instruction::ICmp, Type::getInt32Ty(M.getContext()),
- Type::getInt32Ty(M.getContext()), CmpInst::BAD_ICMP_PREDICATE,
- TargetTransformInfo::TCK_CodeSize);
- InstructionCost BranchCost =
- TTI.getCFInstrCost(Instruction::Br, TargetTransformInfo::TCK_CodeSize);
-
- unsigned DifferentBlocks = CurrentGroup.OutputGVNCombinations.size();
- InstructionCost TotalCost = ComparisonCost * BranchCost * DifferentBlocks;
-
- LLVM_DEBUG(dbgs() << "Adding: " << TotalCost
- << " instructions for each switch case for each different"
- << " output path in a function\n");
- OutputCost += TotalCost;
- }
-
- return OutputCost;
-}
-
-void IROutliner::findCostBenefit(Module &M, OutlinableGroup &CurrentGroup) {
- InstructionCost RegionBenefit = findBenefitFromAllRegions(CurrentGroup);
- CurrentGroup.Benefit += RegionBenefit;
- LLVM_DEBUG(dbgs() << "Current Benefit: " << CurrentGroup.Benefit << "\n");
-
- InstructionCost OutputReloadCost = findCostOutputReloads(CurrentGroup);
- CurrentGroup.Cost += OutputReloadCost;
- LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
-
- InstructionCost AverageRegionBenefit =
- RegionBenefit / CurrentGroup.Regions.size();
- unsigned OverallArgumentNum = CurrentGroup.ArgumentTypes.size();
- unsigned NumRegions = CurrentGroup.Regions.size();
- TargetTransformInfo &TTI =
- getTTI(*CurrentGroup.Regions[0]->Candidate->getFunction());
-
- // We add one region to the cost once, to account for the instructions added
- // inside of the newly created function.
- LLVM_DEBUG(dbgs() << "Adding: " << AverageRegionBenefit
- << " instructions to cost for body of new function.\n");
- CurrentGroup.Cost += AverageRegionBenefit;
- LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
-
- // For each argument, we must add an instruction for loading the argument
- // out of the register and into a value inside of the newly outlined function.
- LLVM_DEBUG(dbgs() << "Adding: " << OverallArgumentNum
- << " instructions to cost for each argument in the new"
- << " function.\n");
- CurrentGroup.Cost +=
- OverallArgumentNum * TargetTransformInfo::TCC_Basic;
- LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
-
- // Each argument needs to either be loaded into a register or onto the stack.
- // Some arguments will only be loaded into the stack once the argument
- // registers are filled.
- LLVM_DEBUG(dbgs() << "Adding: " << OverallArgumentNum
- << " instructions to cost for each argument in the new"
- << " function " << NumRegions << " times for the "
- << "needed argument handling at the call site.\n");
- CurrentGroup.Cost +=
- 2 * OverallArgumentNum * TargetTransformInfo::TCC_Basic * NumRegions;
- LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
-
- CurrentGroup.Cost += findCostForOutputBlocks(M, CurrentGroup, TTI);
- LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
-}
-
-void IROutliner::updateOutputMapping(OutlinableRegion &Region,
- ArrayRef<Value *> Outputs,
- LoadInst *LI) {
- // For and load instructions following the call
- Value *Operand = LI->getPointerOperand();
- Optional<unsigned> OutputIdx = None;
- // Find if the operand it is an output register.
- for (unsigned ArgIdx = Region.NumExtractedInputs;
- ArgIdx < Region.Call->arg_size(); ArgIdx++) {
- if (Operand == Region.Call->getArgOperand(ArgIdx)) {
- OutputIdx = ArgIdx - Region.NumExtractedInputs;
- break;
- }
- }
-
- // If we found an output register, place a mapping of the new value
- // to the original in the mapping.
- if (!OutputIdx.hasValue())
- return;
-
- if (OutputMappings.find(Outputs[OutputIdx.getValue()]) ==
- OutputMappings.end()) {
- LLVM_DEBUG(dbgs() << "Mapping extracted output " << *LI << " to "
- << *Outputs[OutputIdx.getValue()] << "\n");
- OutputMappings.insert(std::make_pair(LI, Outputs[OutputIdx.getValue()]));
- } else {
- Value *Orig = OutputMappings.find(Outputs[OutputIdx.getValue()])->second;
- LLVM_DEBUG(dbgs() << "Mapping extracted output " << *Orig << " to "
- << *Outputs[OutputIdx.getValue()] << "\n");
- OutputMappings.insert(std::make_pair(LI, Orig));
- }
-}
-
-bool IROutliner::extractSection(OutlinableRegion &Region) {
- SetVector<Value *> ArgInputs, Outputs, SinkCands;
- Region.CE->findInputsOutputs(ArgInputs, Outputs, SinkCands);
-
- assert(Region.StartBB && "StartBB for the OutlinableRegion is nullptr!");
- assert(Region.FollowBB && "FollowBB for the OutlinableRegion is nullptr!");
- Function *OrigF = Region.StartBB->getParent();
- CodeExtractorAnalysisCache CEAC(*OrigF);
- Region.ExtractedFunction = Region.CE->extractCodeRegion(CEAC);
-
- // If the extraction was successful, find the BasicBlock, and reassign the
- // OutlinableRegion blocks
- if (!Region.ExtractedFunction) {
- LLVM_DEBUG(dbgs() << "CodeExtractor failed to outline " << Region.StartBB
- << "\n");
- Region.reattachCandidate();
- return false;
- }
-
- BasicBlock *RewrittenBB = Region.FollowBB->getSinglePredecessor();
- Region.StartBB = RewrittenBB;
- Region.EndBB = RewrittenBB;
-
- // The sequences of outlinable regions has now changed. We must fix the
- // IRInstructionDataList for consistency. Although they may not be illegal
- // instructions, they should not be compared with anything else as they
- // should not be outlined in this round. So marking these as illegal is
- // allowed.
- IRInstructionDataList *IDL = Region.Candidate->front()->IDL;
- Instruction *BeginRewritten = &*RewrittenBB->begin();
- Instruction *EndRewritten = &*RewrittenBB->begin();
- Region.NewFront = new (InstDataAllocator.Allocate()) IRInstructionData(
- *BeginRewritten, InstructionClassifier.visit(*BeginRewritten), *IDL);
- Region.NewBack = new (InstDataAllocator.Allocate()) IRInstructionData(
- *EndRewritten, InstructionClassifier.visit(*EndRewritten), *IDL);
-
- // Insert the first IRInstructionData of the new region in front of the
- // first IRInstructionData of the IRSimilarityCandidate.
- IDL->insert(Region.Candidate->begin(), *Region.NewFront);
- // Insert the first IRInstructionData of the new region after the
- // last IRInstructionData of the IRSimilarityCandidate.
- IDL->insert(Region.Candidate->end(), *Region.NewBack);
- // Remove the IRInstructionData from the IRSimilarityCandidate.
- IDL->erase(Region.Candidate->begin(), std::prev(Region.Candidate->end()));
-
- assert(RewrittenBB != nullptr &&
- "Could not find a predecessor after extraction!");
-
- // Iterate over the new set of instructions to find the new call
- // instruction.
- for (Instruction &I : *RewrittenBB)
- if (CallInst *CI = dyn_cast<CallInst>(&I)) {
- if (Region.ExtractedFunction == CI->getCalledFunction())
- Region.Call = CI;
- } else if (LoadInst *LI = dyn_cast<LoadInst>(&I))
- updateOutputMapping(Region, Outputs.getArrayRef(), LI);
- Region.reattachCandidate();
- return true;
-}
-
-unsigned IROutliner::doOutline(Module &M) {
- // Find the possible similarity sections.
- IRSimilarityIdentifier &Identifier = getIRSI(M);
- SimilarityGroupList &SimilarityCandidates = *Identifier.getSimilarity();
-
- // Sort them by size of extracted sections
- unsigned OutlinedFunctionNum = 0;
- // If we only have one SimilarityGroup in SimilarityCandidates, we do not have
- // to sort them by the potential number of instructions to be outlined
- if (SimilarityCandidates.size() > 1)
- llvm::stable_sort(SimilarityCandidates,
- [](const std::vector<IRSimilarityCandidate> &LHS,
- const std::vector<IRSimilarityCandidate> &RHS) {
- return LHS[0].getLength() * LHS.size() >
- RHS[0].getLength() * RHS.size();
- });
-
- DenseSet<unsigned> NotSame;
- std::vector<Function *> FuncsToRemove;
- // Iterate over the possible sets of similarity.
- for (SimilarityGroup &CandidateVec : SimilarityCandidates) {
- OutlinableGroup CurrentGroup;
-
- // Remove entries that were previously outlined
- pruneIncompatibleRegions(CandidateVec, CurrentGroup);
-
- // We pruned the number of regions to 0 to 1, meaning that it's not worth
- // trying to outlined since there is no compatible similar instance of this
- // code.
- if (CurrentGroup.Regions.size() < 2)
- continue;
-
- // Determine if there are any values that are the same constant throughout
- // each section in the set.
- NotSame.clear();
- CurrentGroup.findSameConstants(NotSame);
-
- if (CurrentGroup.IgnoreGroup)
- continue;
-
- // Create a CodeExtractor for each outlinable region. Identify inputs and
- // outputs for each section using the code extractor and create the argument
- // types for the Aggregate Outlining Function.
- std::vector<OutlinableRegion *> OutlinedRegions;
- for (OutlinableRegion *OS : CurrentGroup.Regions) {
- // Break the outlinable region out of its parent BasicBlock into its own
- // BasicBlocks (see function implementation).
- OS->splitCandidate();
- std::vector<BasicBlock *> BE = {OS->StartBB};
- OS->CE = new (ExtractorAllocator.Allocate())
- CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
- false, "outlined");
- findAddInputsOutputs(M, *OS, NotSame);
- if (!OS->IgnoreRegion)
- OutlinedRegions.push_back(OS);
- else
- OS->reattachCandidate();
- }
-
- CurrentGroup.Regions = std::move(OutlinedRegions);
-
- if (CurrentGroup.Regions.empty())
- continue;
-
- CurrentGroup.collectGVNStoreSets(M);
-
- if (CostModel)
- findCostBenefit(M, CurrentGroup);
-
- // If we are adhering to the cost model, reattach all the candidates
- if (CurrentGroup.Cost >= CurrentGroup.Benefit && CostModel) {
- for (OutlinableRegion *OS : CurrentGroup.Regions)
- OS->reattachCandidate();
- OptimizationRemarkEmitter &ORE = getORE(
- *CurrentGroup.Regions[0]->Candidate->getFunction());
- ORE.emit([&]() {
- IRSimilarityCandidate *C = CurrentGroup.Regions[0]->Candidate;
- OptimizationRemarkMissed R(DEBUG_TYPE, "WouldNotDecreaseSize",
- C->frontInstruction());
- R << "did not outline "
- << ore::NV(std::to_string(CurrentGroup.Regions.size()))
- << " regions due to estimated increase of "
- << ore::NV("InstructionIncrease",
- CurrentGroup.Cost - CurrentGroup.Benefit)
- << " instructions at locations ";
- interleave(
- CurrentGroup.Regions.begin(), CurrentGroup.Regions.end(),
- [&R](OutlinableRegion *Region) {
- R << ore::NV(
- "DebugLoc",
- Region->Candidate->frontInstruction()->getDebugLoc());
- },
- [&R]() { R << " "; });
- return R;
- });
- continue;
- }
-
- LLVM_DEBUG(dbgs() << "Outlining regions with cost " << CurrentGroup.Cost
- << " and benefit " << CurrentGroup.Benefit << "\n");
-
- // Create functions out of all the sections, and mark them as outlined.
- OutlinedRegions.clear();
- for (OutlinableRegion *OS : CurrentGroup.Regions) {
- bool FunctionOutlined = extractSection(*OS);
- if (FunctionOutlined) {
- unsigned StartIdx = OS->Candidate->getStartIdx();
- unsigned EndIdx = OS->Candidate->getEndIdx();
- for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
- Outlined.insert(Idx);
-
- OutlinedRegions.push_back(OS);
- }
- }
-
- LLVM_DEBUG(dbgs() << "Outlined " << OutlinedRegions.size()
- << " with benefit " << CurrentGroup.Benefit
- << " and cost " << CurrentGroup.Cost << "\n");
-
- CurrentGroup.Regions = std::move(OutlinedRegions);
-
- if (CurrentGroup.Regions.empty())
- continue;
-
- OptimizationRemarkEmitter &ORE =
- getORE(*CurrentGroup.Regions[0]->Call->getFunction());
- ORE.emit([&]() {
- IRSimilarityCandidate *C = CurrentGroup.Regions[0]->Candidate;
- OptimizationRemark R(DEBUG_TYPE, "Outlined", C->front()->Inst);
- R << "outlined " << ore::NV(std::to_string(CurrentGroup.Regions.size()))
- << " regions with decrease of "
- << ore::NV("Benefit", CurrentGroup.Benefit - CurrentGroup.Cost)
- << " instructions at locations ";
- interleave(
- CurrentGroup.Regions.begin(), CurrentGroup.Regions.end(),
- [&R](OutlinableRegion *Region) {
- R << ore::NV("DebugLoc",
- Region->Candidate->frontInstruction()->getDebugLoc());
- },
- [&R]() { R << " "; });
- return R;
- });
-
- deduplicateExtractedSections(M, CurrentGroup, FuncsToRemove,
- OutlinedFunctionNum);
- }
-
- for (Function *F : FuncsToRemove)
- F->eraseFromParent();
-
- return OutlinedFunctionNum;
-}
-
-bool IROutliner::run(Module &M) {
- CostModel = !NoCostModel;
- OutlineFromLinkODRs = EnableLinkOnceODRIROutlining;
-
- return doOutline(M) > 0;
-}
-
-// Pass Manager Boilerplate
-class IROutlinerLegacyPass : public ModulePass {
-public:
- static char ID;
- IROutlinerLegacyPass() : ModulePass(ID) {
- initializeIROutlinerLegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
- AU.addRequired<IRSimilarityIdentifierWrapperPass>();
- }
-
- bool runOnModule(Module &M) override;
-};
-
-bool IROutlinerLegacyPass::runOnModule(Module &M) {
- if (skipModule(M))
- return false;
-
- std::unique_ptr<OptimizationRemarkEmitter> ORE;
- auto GORE = [&ORE](Function &F) -> OptimizationRemarkEmitter & {
- ORE.reset(new OptimizationRemarkEmitter(&F));
- return *ORE.get();
- };
-
- auto GTTI = [this](Function &F) -> TargetTransformInfo & {
- return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- };
-
- auto GIRSI = [this](Module &) -> IRSimilarityIdentifier & {
- return this->getAnalysis<IRSimilarityIdentifierWrapperPass>().getIRSI();
- };
-
- return IROutliner(GTTI, GIRSI, GORE).run(M);
-}
-
-PreservedAnalyses IROutlinerPass::run(Module &M, ModuleAnalysisManager &AM) {
- auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
-
- std::function<TargetTransformInfo &(Function &)> GTTI =
- [&FAM](Function &F) -> TargetTransformInfo & {
- return FAM.getResult<TargetIRAnalysis>(F);
- };
-
- std::function<IRSimilarityIdentifier &(Module &)> GIRSI =
- [&AM](Module &M) -> IRSimilarityIdentifier & {
- return AM.getResult<IRSimilarityAnalysis>(M);
- };
-
- std::unique_ptr<OptimizationRemarkEmitter> ORE;
- std::function<OptimizationRemarkEmitter &(Function &)> GORE =
- [&ORE](Function &F) -> OptimizationRemarkEmitter & {
- ORE.reset(new OptimizationRemarkEmitter(&F));
- return *ORE.get();
- };
-
- if (IROutliner(GTTI, GIRSI, GORE).run(M))
- return PreservedAnalyses::none();
- return PreservedAnalyses::all();
-}
-
-char IROutlinerLegacyPass::ID = 0;
-INITIALIZE_PASS_BEGIN(IROutlinerLegacyPass, "iroutliner", "IR Outliner", false,
- false)
-INITIALIZE_PASS_DEPENDENCY(IRSimilarityIdentifierWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_END(IROutlinerLegacyPass, "iroutliner", "IR Outliner", false,
- false)
-
-ModulePass *llvm::createIROutlinerPass() { return new IROutlinerLegacyPass(); }
+//===- IROutliner.cpp -- Outline Similar Regions ----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+// Implementation for the IROutliner which is used by the IROutliner Pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/IROutliner.h"
+#include "llvm/Analysis/IRSimilarityIdentifier.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/IPO.h"
+#include <map>
+#include <set>
+#include <vector>
+
+#define DEBUG_TYPE "iroutliner"
+
+using namespace llvm;
+using namespace IRSimilarity;
+
+// Set to true if the user wants the ir outliner to run on linkonceodr linkage
+// functions. This is false by default because the linker can dedupe linkonceodr
+// functions. Since the outliner is confined to a single module (modulo LTO),
+// this is off by default. It should, however, be the default behavior in
+// LTO.
+static cl::opt<bool> EnableLinkOnceODRIROutlining(
+ "enable-linkonceodr-ir-outlining", cl::Hidden,
+ cl::desc("Enable the IR outliner on linkonceodr functions"),
+ cl::init(false));
+
+// This is a debug option to test small pieces of code to ensure that outlining
+// works correctly.
+static cl::opt<bool> NoCostModel(
+ "ir-outlining-no-cost", cl::init(false), cl::ReallyHidden,
+ cl::desc("Debug option to outline greedily, without restriction that "
+ "calculated benefit outweighs cost"));
+
+/// The OutlinableGroup holds all the overarching information for outlining
+/// a set of regions that are structurally similar to one another, such as the
+/// types of the overall function, the output blocks, the sets of stores needed
+/// and a list of the different regions. This information is used in the
+/// deduplication of extracted regions with the same structure.
+struct OutlinableGroup {
+ /// The sections that could be outlined
+ std::vector<OutlinableRegion *> Regions;
+
+ /// The argument types for the function created as the overall function to
+ /// replace the extracted function for each region.
+ std::vector<Type *> ArgumentTypes;
+ /// The FunctionType for the overall function.
+ FunctionType *OutlinedFunctionType = nullptr;
+ /// The Function for the collective overall function.
+ Function *OutlinedFunction = nullptr;
+
+ /// Flag for whether we should not consider this group of OutlinableRegions
+ /// for extraction.
+ bool IgnoreGroup = false;
+
+ /// The return block for the overall function.
+ BasicBlock *EndBB = nullptr;
+
+ /// A set containing the different GVN store sets needed. Each array contains
+ /// a sorted list of the different values that need to be stored into output
+ /// registers.
+ DenseSet<ArrayRef<unsigned>> OutputGVNCombinations;
+
+ /// Flag for whether the \ref ArgumentTypes have been defined after the
+ /// extraction of the first region.
+ bool InputTypesSet = false;
+
+ /// The number of input values in \ref ArgumentTypes. Anything after this
+ /// index in ArgumentTypes is an output argument.
+ unsigned NumAggregateInputs = 0;
+
+ /// The number of instructions that will be outlined by extracting \ref
+ /// Regions.
+ InstructionCost Benefit = 0;
+ /// The number of added instructions needed for the outlining of the \ref
+ /// Regions.
+ InstructionCost Cost = 0;
+
+ /// The argument that needs to be marked with the swifterr attribute. If not
+ /// needed, there is no value.
+ Optional<unsigned> SwiftErrorArgument;
+
+ /// For the \ref Regions, we look at every Value. If it is a constant,
+ /// we check whether it is the same in Region.
+ ///
+ /// \param [in,out] NotSame contains the global value numbers where the
+ /// constant is not always the same, and must be passed in as an argument.
+ void findSameConstants(DenseSet<unsigned> &NotSame);
+
+ /// For the regions, look at each set of GVN stores needed and account for
+ /// each combination. Add an argument to the argument types if there is
+ /// more than one combination.
+ ///
+ /// \param [in] M - The module we are outlining from.
+ void collectGVNStoreSets(Module &M);
+};
+
+/// Move the contents of \p SourceBB to before the last instruction of \p
+/// TargetBB.
+/// \param SourceBB - the BasicBlock to pull Instructions from.
+/// \param TargetBB - the BasicBlock to put Instruction into.
+static void moveBBContents(BasicBlock &SourceBB, BasicBlock &TargetBB) {
+ BasicBlock::iterator BBCurr, BBEnd, BBNext;
+ for (BBCurr = SourceBB.begin(), BBEnd = SourceBB.end(); BBCurr != BBEnd;
+ BBCurr = BBNext) {
+ BBNext = std::next(BBCurr);
+ BBCurr->moveBefore(TargetBB, TargetBB.end());
+ }
+}
+
+void OutlinableRegion::splitCandidate() {
+ assert(!CandidateSplit && "Candidate already split!");
+
+ Instruction *StartInst = (*Candidate->begin()).Inst;
+ Instruction *EndInst = (*Candidate->end()).Inst;
+ assert(StartInst && EndInst && "Expected a start and end instruction?");
+ StartBB = StartInst->getParent();
+ PrevBB = StartBB;
+
+ // The basic block gets split like so:
+ // block: block:
+ // inst1 inst1
+ // inst2 inst2
+ // region1 br block_to_outline
+ // region2 block_to_outline:
+ // region3 -> region1
+ // region4 region2
+ // inst3 region3
+ // inst4 region4
+ // br block_after_outline
+ // block_after_outline:
+ // inst3
+ // inst4
+
+ std::string OriginalName = PrevBB->getName().str();
+
+ StartBB = PrevBB->splitBasicBlock(StartInst, OriginalName + "_to_outline");
+
+ // This is the case for the inner block since we do not have to include
+ // multiple blocks.
+ EndBB = StartBB;
+ FollowBB = EndBB->splitBasicBlock(EndInst, OriginalName + "_after_outline");
+
+ CandidateSplit = true;
+}
+
+void OutlinableRegion::reattachCandidate() {
+ assert(CandidateSplit && "Candidate is not split!");
+
+ // The basic block gets reattached like so:
+ // block: block:
+ // inst1 inst1
+ // inst2 inst2
+ // br block_to_outline region1
+ // block_to_outline: -> region2
+ // region1 region3
+ // region2 region4
+ // region3 inst3
+ // region4 inst4
+ // br block_after_outline
+ // block_after_outline:
+ // inst3
+ // inst4
+ assert(StartBB != nullptr && "StartBB for Candidate is not defined!");
+ assert(FollowBB != nullptr && "StartBB for Candidate is not defined!");
+
+ // StartBB should only have one predecessor since we put an unconditional
+ // branch at the end of PrevBB when we split the BasicBlock.
+ PrevBB = StartBB->getSinglePredecessor();
+ assert(PrevBB != nullptr &&
+ "No Predecessor for the region start basic block!");
+
+ assert(PrevBB->getTerminator() && "Terminator removed from PrevBB!");
+ assert(EndBB->getTerminator() && "Terminator removed from EndBB!");
+ PrevBB->getTerminator()->eraseFromParent();
+ EndBB->getTerminator()->eraseFromParent();
+
+ moveBBContents(*StartBB, *PrevBB);
+
+ BasicBlock *PlacementBB = PrevBB;
+ if (StartBB != EndBB)
+ PlacementBB = EndBB;
+ moveBBContents(*FollowBB, *PlacementBB);
+
+ PrevBB->replaceSuccessorsPhiUsesWith(StartBB, PrevBB);
+ PrevBB->replaceSuccessorsPhiUsesWith(FollowBB, PlacementBB);
+ StartBB->eraseFromParent();
+ FollowBB->eraseFromParent();
+
+ // Make sure to save changes back to the StartBB.
+ StartBB = PrevBB;
+ EndBB = nullptr;
+ PrevBB = nullptr;
+ FollowBB = nullptr;
+
+ CandidateSplit = false;
+}
+
+/// Find whether \p V matches the Constants previously found for the \p GVN.
+///
+/// \param V - The value to check for consistency.
+/// \param GVN - The global value number assigned to \p V.
+/// \param GVNToConstant - The mapping of global value number to Constants.
+/// \returns true if the Value matches the Constant mapped to by V and false if
+/// it \p V is a Constant but does not match.
+/// \returns None if \p V is not a Constant.
+static Optional<bool>
+constantMatches(Value *V, unsigned GVN,
+ DenseMap<unsigned, Constant *> &GVNToConstant) {
+ // See if we have a constants
+ Constant *CST = dyn_cast<Constant>(V);
+ if (!CST)
+ return None;
+
+ // Holds a mapping from a global value number to a Constant.
+ DenseMap<unsigned, Constant *>::iterator GVNToConstantIt;
+ bool Inserted;
+
+
+ // If we have a constant, try to make a new entry in the GVNToConstant.
+ std::tie(GVNToConstantIt, Inserted) =
+ GVNToConstant.insert(std::make_pair(GVN, CST));
+ // If it was found and is not equal, it is not the same. We do not
+ // handle this case yet, and exit early.
+ if (Inserted || (GVNToConstantIt->second == CST))
+ return true;
+
+ return false;
+}
+
+InstructionCost OutlinableRegion::getBenefit(TargetTransformInfo &TTI) {
+ InstructionCost Benefit = 0;
+
+ // Estimate the benefit of outlining a specific sections of the program. We
+ // delegate mostly this task to the TargetTransformInfo so that if the target
+ // has specific changes, we can have a more accurate estimate.
+
+ // However, getInstructionCost delegates the code size calculation for
+ // arithmetic instructions to getArithmeticInstrCost in
+ // include/Analysis/TargetTransformImpl.h, where it always estimates that the
+ // code size for a division and remainder instruction to be equal to 4, and
+ // everything else to 1. This is not an accurate representation of the
+ // division instruction for targets that have a native division instruction.
+ // To be overly conservative, we only add 1 to the number of instructions for
+ // each division instruction.
+ for (Instruction &I : *StartBB) {
+ switch (I.getOpcode()) {
+ case Instruction::FDiv:
+ case Instruction::FRem:
+ case Instruction::SDiv:
+ case Instruction::SRem:
+ case Instruction::UDiv:
+ case Instruction::URem:
+ Benefit += 1;
+ break;
+ default:
+ Benefit += TTI.getInstructionCost(&I, TargetTransformInfo::TCK_CodeSize);
+ break;
+ }
+ }
+
+ return Benefit;
+}
+
+/// Find whether \p Region matches the global value numbering to Constant
+/// mapping found so far.
+///
+/// \param Region - The OutlinableRegion we are checking for constants
+/// \param GVNToConstant - The mapping of global value number to Constants.
+/// \param NotSame - The set of global value numbers that do not have the same
+/// constant in each region.
+/// \returns true if all Constants are the same in every use of a Constant in \p
+/// Region and false if not
+static bool
+collectRegionsConstants(OutlinableRegion &Region,
+ DenseMap<unsigned, Constant *> &GVNToConstant,
+ DenseSet<unsigned> &NotSame) {
+ bool ConstantsTheSame = true;
+
+ IRSimilarityCandidate &C = *Region.Candidate;
+ for (IRInstructionData &ID : C) {
+
+ // Iterate over the operands in an instruction. If the global value number,
+ // assigned by the IRSimilarityCandidate, has been seen before, we check if
+ // the the number has been found to be not the same value in each instance.
+ for (Value *V : ID.OperVals) {
+ Optional<unsigned> GVNOpt = C.getGVN(V);
+ assert(GVNOpt.hasValue() && "Expected a GVN for operand?");
+ unsigned GVN = GVNOpt.getValue();
+
+ // Check if this global value has been found to not be the same already.
+ if (NotSame.contains(GVN)) {
+ if (isa<Constant>(V))
+ ConstantsTheSame = false;
+ continue;
+ }
+
+ // If it has been the same so far, we check the value for if the
+ // associated Constant value match the previous instances of the same
+ // global value number. If the global value does not map to a Constant,
+ // it is considered to not be the same value.
+ Optional<bool> ConstantMatches = constantMatches(V, GVN, GVNToConstant);
+ if (ConstantMatches.hasValue()) {
+ if (ConstantMatches.getValue())
+ continue;
+ else
+ ConstantsTheSame = false;
+ }
+
+ // While this value is a register, it might not have been previously,
+ // make sure we don't already have a constant mapped to this global value
+ // number.
+ if (GVNToConstant.find(GVN) != GVNToConstant.end())
+ ConstantsTheSame = false;
+
+ NotSame.insert(GVN);
+ }
+ }
+
+ return ConstantsTheSame;
+}
+
+void OutlinableGroup::findSameConstants(DenseSet<unsigned> &NotSame) {
+ DenseMap<unsigned, Constant *> GVNToConstant;
+
+ for (OutlinableRegion *Region : Regions)
+ collectRegionsConstants(*Region, GVNToConstant, NotSame);
+}
+
+void OutlinableGroup::collectGVNStoreSets(Module &M) {
+ for (OutlinableRegion *OS : Regions)
+ OutputGVNCombinations.insert(OS->GVNStores);
+
+ // We are adding an extracted argument to decide between which output path
+ // to use in the basic block. It is used in a switch statement and only
+ // needs to be an integer.
+ if (OutputGVNCombinations.size() > 1)
+ ArgumentTypes.push_back(Type::getInt32Ty(M.getContext()));
+}
+
+Function *IROutliner::createFunction(Module &M, OutlinableGroup &Group,
+ unsigned FunctionNameSuffix) {
+ assert(!Group.OutlinedFunction && "Function is already defined!");
+
+ Group.OutlinedFunctionType = FunctionType::get(
+ Type::getVoidTy(M.getContext()), Group.ArgumentTypes, false);
+
+ // These functions will only be called from within the same module, so
+ // we can set an internal linkage.
+ Group.OutlinedFunction = Function::Create(
+ Group.OutlinedFunctionType, GlobalValue::InternalLinkage,
+ "outlined_ir_func_" + std::to_string(FunctionNameSuffix), M);
+
+ // Transfer the swifterr attribute to the correct function parameter.
+ if (Group.SwiftErrorArgument.hasValue())
+ Group.OutlinedFunction->addParamAttr(Group.SwiftErrorArgument.getValue(),
+ Attribute::SwiftError);
+
+ Group.OutlinedFunction->addFnAttr(Attribute::OptimizeForSize);
+ Group.OutlinedFunction->addFnAttr(Attribute::MinSize);
+
+ return Group.OutlinedFunction;
+}
+
+/// Move each BasicBlock in \p Old to \p New.
+///
+/// \param [in] Old - the function to move the basic blocks from.
+/// \param [in] New - The function to move the basic blocks to.
+/// \returns the first return block for the function in New.
+static BasicBlock *moveFunctionData(Function &Old, Function &New) {
+ Function::iterator CurrBB, NextBB, FinalBB;
+ BasicBlock *NewEnd = nullptr;
+ std::vector<Instruction *> DebugInsts;
+ for (CurrBB = Old.begin(), FinalBB = Old.end(); CurrBB != FinalBB;
+ CurrBB = NextBB) {
+ NextBB = std::next(CurrBB);
+ CurrBB->removeFromParent();
+ CurrBB->insertInto(&New);
+ Instruction *I = CurrBB->getTerminator();
+ if (isa<ReturnInst>(I))
+ NewEnd = &(*CurrBB);
+ }
+
+ assert(NewEnd && "No return instruction for new function?");
+ return NewEnd;
+}
+
+/// Find the the constants that will need to be lifted into arguments
+/// as they are not the same in each instance of the region.
+///
+/// \param [in] C - The IRSimilarityCandidate containing the region we are
+/// analyzing.
+/// \param [in] NotSame - The set of global value numbers that do not have a
+/// single Constant across all OutlinableRegions similar to \p C.
+/// \param [out] Inputs - The list containing the global value numbers of the
+/// arguments needed for the region of code.
+static void findConstants(IRSimilarityCandidate &C, DenseSet<unsigned> &NotSame,
+ std::vector<unsigned> &Inputs) {
+ DenseSet<unsigned> Seen;
+ // Iterate over the instructions, and find what constants will need to be
+ // extracted into arguments.
+ for (IRInstructionDataList::iterator IDIt = C.begin(), EndIDIt = C.end();
+ IDIt != EndIDIt; IDIt++) {
+ for (Value *V : (*IDIt).OperVals) {
+ // Since these are stored before any outlining, they will be in the
+ // global value numbering.
+ unsigned GVN = C.getGVN(V).getValue();
+ if (isa<Constant>(V))
+ if (NotSame.contains(GVN) && !Seen.contains(GVN)) {
+ Inputs.push_back(GVN);
+ Seen.insert(GVN);
+ }
+ }
+ }
+}
+
+/// Find the GVN for the inputs that have been found by the CodeExtractor.
+///
+/// \param [in] C - The IRSimilarityCandidate containing the region we are
+/// analyzing.
+/// \param [in] CurrentInputs - The set of inputs found by the
+/// CodeExtractor.
+/// \param [out] EndInputNumbers - The global value numbers for the extracted
+/// arguments.
+/// \param [in] OutputMappings - The mapping of values that have been replaced
+/// by a new output value.
+/// \param [out] EndInputs - The global value numbers for the extracted
+/// arguments.
+static void mapInputsToGVNs(IRSimilarityCandidate &C,
+ SetVector<Value *> &CurrentInputs,
+ const DenseMap<Value *, Value *> &OutputMappings,
+ std::vector<unsigned> &EndInputNumbers) {
+ // Get the Global Value Number for each input. We check if the Value has been
+ // replaced by a different value at output, and use the original value before
+ // replacement.
+ for (Value *Input : CurrentInputs) {
+ assert(Input && "Have a nullptr as an input");
+ if (OutputMappings.find(Input) != OutputMappings.end())
+ Input = OutputMappings.find(Input)->second;
+ assert(C.getGVN(Input).hasValue() &&
+ "Could not find a numbering for the given input");
+ EndInputNumbers.push_back(C.getGVN(Input).getValue());
+ }
+}
+
+/// Find the original value for the \p ArgInput values if any one of them was
+/// replaced during a previous extraction.
+///
+/// \param [in] ArgInputs - The inputs to be extracted by the code extractor.
+/// \param [in] OutputMappings - The mapping of values that have been replaced
+/// by a new output value.
+/// \param [out] RemappedArgInputs - The remapped values according to
+/// \p OutputMappings that will be extracted.
+static void
+remapExtractedInputs(const ArrayRef<Value *> ArgInputs,
+ const DenseMap<Value *, Value *> &OutputMappings,
+ SetVector<Value *> &RemappedArgInputs) {
+ // Get the global value number for each input that will be extracted as an
+ // argument by the code extractor, remapping if needed for reloaded values.
+ for (Value *Input : ArgInputs) {
+ if (OutputMappings.find(Input) != OutputMappings.end())
+ Input = OutputMappings.find(Input)->second;
+ RemappedArgInputs.insert(Input);
+ }
+}
+
+/// Find the input GVNs and the output values for a region of Instructions.
+/// Using the code extractor, we collect the inputs to the extracted function.
+///
+/// The \p Region can be identified as needing to be ignored in this function.
+/// It should be checked whether it should be ignored after a call to this
+/// function.
+///
+/// \param [in,out] Region - The region of code to be analyzed.
+/// \param [out] InputGVNs - The global value numbers for the extracted
+/// arguments.
+/// \param [in] NotSame - The global value numbers in the region that do not
+/// have the same constant value in the regions structurally similar to
+/// \p Region.
+/// \param [in] OutputMappings - The mapping of values that have been replaced
+/// by a new output value after extraction.
+/// \param [out] ArgInputs - The values of the inputs to the extracted function.
+/// \param [out] Outputs - The set of values extracted by the CodeExtractor
+/// as outputs.
+static void getCodeExtractorArguments(
+ OutlinableRegion &Region, std::vector<unsigned> &InputGVNs,
+ DenseSet<unsigned> &NotSame, DenseMap<Value *, Value *> &OutputMappings,
+ SetVector<Value *> &ArgInputs, SetVector<Value *> &Outputs) {
+ IRSimilarityCandidate &C = *Region.Candidate;
+
+ // OverallInputs are the inputs to the region found by the CodeExtractor,
+ // SinkCands and HoistCands are used by the CodeExtractor to find sunken
+ // allocas of values whose lifetimes are contained completely within the
+ // outlined region. PremappedInputs are the arguments found by the
+ // CodeExtractor, removing conditions such as sunken allocas, but that
+ // may need to be remapped due to the extracted output values replacing
+ // the original values. We use DummyOutputs for this first run of finding
+ // inputs and outputs since the outputs could change during findAllocas,
+ // the correct set of extracted outputs will be in the final Outputs ValueSet.
+ SetVector<Value *> OverallInputs, PremappedInputs, SinkCands, HoistCands,
+ DummyOutputs;
+
+ // Use the code extractor to get the inputs and outputs, without sunken
+ // allocas or removing llvm.assumes.
+ CodeExtractor *CE = Region.CE;
+ CE->findInputsOutputs(OverallInputs, DummyOutputs, SinkCands);
+ assert(Region.StartBB && "Region must have a start BasicBlock!");
+ Function *OrigF = Region.StartBB->getParent();
+ CodeExtractorAnalysisCache CEAC(*OrigF);
+ BasicBlock *Dummy = nullptr;
+
+ // The region may be ineligible due to VarArgs in the parent function. In this
+ // case we ignore the region.
+ if (!CE->isEligible()) {
+ Region.IgnoreRegion = true;
+ return;
+ }
+
+ // Find if any values are going to be sunk into the function when extracted
+ CE->findAllocas(CEAC, SinkCands, HoistCands, Dummy);
+ CE->findInputsOutputs(PremappedInputs, Outputs, SinkCands);
+
+ // TODO: Support regions with sunken allocas: values whose lifetimes are
+ // contained completely within the outlined region. These are not guaranteed
+ // to be the same in every region, so we must elevate them all to arguments
+ // when they appear. If these values are not equal, it means there is some
+ // Input in OverallInputs that was removed for ArgInputs.
+ if (OverallInputs.size() != PremappedInputs.size()) {
+ Region.IgnoreRegion = true;
+ return;
+ }
+
+ findConstants(C, NotSame, InputGVNs);
+
+ mapInputsToGVNs(C, OverallInputs, OutputMappings, InputGVNs);
+
+ remapExtractedInputs(PremappedInputs.getArrayRef(), OutputMappings,
+ ArgInputs);
+
+ // Sort the GVNs, since we now have constants included in the \ref InputGVNs
+ // we need to make sure they are in a deterministic order.
+ stable_sort(InputGVNs);
+}
+
+/// Look over the inputs and map each input argument to an argument in the
+/// overall function for the OutlinableRegions. This creates a way to replace
+/// the arguments of the extracted function with the arguments of the new
+/// overall function.
+///
+/// \param [in,out] Region - The region of code to be analyzed.
+/// \param [in] InputsGVNs - The global value numbering of the input values
+/// collected.
+/// \param [in] ArgInputs - The values of the arguments to the extracted
+/// function.
+static void
+findExtractedInputToOverallInputMapping(OutlinableRegion &Region,
+ std::vector<unsigned> &InputGVNs,
+ SetVector<Value *> &ArgInputs) {
+
+ IRSimilarityCandidate &C = *Region.Candidate;
+ OutlinableGroup &Group = *Region.Parent;
+
+ // This counts the argument number in the overall function.
+ unsigned TypeIndex = 0;
+
+ // This counts the argument number in the extracted function.
+ unsigned OriginalIndex = 0;
+
+ // Find the mapping of the extracted arguments to the arguments for the
+ // overall function. Since there may be extra arguments in the overall
+ // function to account for the extracted constants, we have two different
+ // counters as we find extracted arguments, and as we come across overall
+ // arguments.
+ for (unsigned InputVal : InputGVNs) {
+ Optional<Value *> InputOpt = C.fromGVN(InputVal);
+ assert(InputOpt.hasValue() && "Global value number not found?");
+ Value *Input = InputOpt.getValue();
+
+ if (!Group.InputTypesSet) {
+ Group.ArgumentTypes.push_back(Input->getType());
+ // If the input value has a swifterr attribute, make sure to mark the
+ // argument in the overall function.
+ if (Input->isSwiftError()) {
+ assert(
+ !Group.SwiftErrorArgument.hasValue() &&
+ "Argument already marked with swifterr for this OutlinableGroup!");
+ Group.SwiftErrorArgument = TypeIndex;
+ }
+ }
+
+ // Check if we have a constant. If we do add it to the overall argument
+ // number to Constant map for the region, and continue to the next input.
+ if (Constant *CST = dyn_cast<Constant>(Input)) {
+ Region.AggArgToConstant.insert(std::make_pair(TypeIndex, CST));
+ TypeIndex++;
+ continue;
+ }
+
+ // It is not a constant, we create the mapping from extracted argument list
+ // to the overall argument list.
+ assert(ArgInputs.count(Input) && "Input cannot be found!");
+
+ Region.ExtractedArgToAgg.insert(std::make_pair(OriginalIndex, TypeIndex));
+ Region.AggArgToExtracted.insert(std::make_pair(TypeIndex, OriginalIndex));
+ OriginalIndex++;
+ TypeIndex++;
+ }
+
+ // If the function type definitions for the OutlinableGroup holding the region
+ // have not been set, set the length of the inputs here. We should have the
+ // same inputs for all of the different regions contained in the
+ // OutlinableGroup since they are all structurally similar to one another.
+ if (!Group.InputTypesSet) {
+ Group.NumAggregateInputs = TypeIndex;
+ Group.InputTypesSet = true;
+ }
+
+ Region.NumExtractedInputs = OriginalIndex;
+}
+
+/// Create a mapping of the output arguments for the \p Region to the output
+/// arguments of the overall outlined function.
+///
+/// \param [in,out] Region - The region of code to be analyzed.
+/// \param [in] Outputs - The values found by the code extractor.
+static void
+findExtractedOutputToOverallOutputMapping(OutlinableRegion &Region,
+ ArrayRef<Value *> Outputs) {
+ OutlinableGroup &Group = *Region.Parent;
+ IRSimilarityCandidate &C = *Region.Candidate;
+
+ // This counts the argument number in the extracted function.
+ unsigned OriginalIndex = Region.NumExtractedInputs;
+
+ // This counts the argument number in the overall function.
+ unsigned TypeIndex = Group.NumAggregateInputs;
+ bool TypeFound;
+ DenseSet<unsigned> AggArgsUsed;
+
+ // Iterate over the output types and identify if there is an aggregate pointer
+ // type whose base type matches the current output type. If there is, we mark
+ // that we will use this output register for this value. If not we add another
+ // type to the overall argument type list. We also store the GVNs used for
+ // stores to identify which values will need to be moved into an special
+ // block that holds the stores to the output registers.
+ for (Value *Output : Outputs) {
+ TypeFound = false;
+ // We can do this since it is a result value, and will have a number
+ // that is necessarily the same. BUT if in the future, the instructions
+ // do not have to be in same order, but are functionally the same, we will
+ // have to use a different scheme, as one-to-one correspondence is not
+ // guaranteed.
+ unsigned GlobalValue = C.getGVN(Output).getValue();
+ unsigned ArgumentSize = Group.ArgumentTypes.size();
+
+ for (unsigned Jdx = TypeIndex; Jdx < ArgumentSize; Jdx++) {
+ if (Group.ArgumentTypes[Jdx] != PointerType::getUnqual(Output->getType()))
+ continue;
+
+ if (AggArgsUsed.contains(Jdx))
+ continue;
+
+ TypeFound = true;
+ AggArgsUsed.insert(Jdx);
+ Region.ExtractedArgToAgg.insert(std::make_pair(OriginalIndex, Jdx));
+ Region.AggArgToExtracted.insert(std::make_pair(Jdx, OriginalIndex));
+ Region.GVNStores.push_back(GlobalValue);
+ break;
+ }
+
+ // We were unable to find an unused type in the output type set that matches
+ // the output, so we add a pointer type to the argument types of the overall
+ // function to handle this output and create a mapping to it.
+ if (!TypeFound) {
+ Group.ArgumentTypes.push_back(PointerType::getUnqual(Output->getType()));
+ AggArgsUsed.insert(Group.ArgumentTypes.size() - 1);
+ Region.ExtractedArgToAgg.insert(
+ std::make_pair(OriginalIndex, Group.ArgumentTypes.size() - 1));
+ Region.AggArgToExtracted.insert(
+ std::make_pair(Group.ArgumentTypes.size() - 1, OriginalIndex));
+ Region.GVNStores.push_back(GlobalValue);
+ }
+
+ stable_sort(Region.GVNStores);
+ OriginalIndex++;
+ TypeIndex++;
+ }
+}
+
+void IROutliner::findAddInputsOutputs(Module &M, OutlinableRegion &Region,
+ DenseSet<unsigned> &NotSame) {
+ std::vector<unsigned> Inputs;
+ SetVector<Value *> ArgInputs, Outputs;
+
+ getCodeExtractorArguments(Region, Inputs, NotSame, OutputMappings, ArgInputs,
+ Outputs);
+
+ if (Region.IgnoreRegion)
+ return;
+
+ // Map the inputs found by the CodeExtractor to the arguments found for
+ // the overall function.
+ findExtractedInputToOverallInputMapping(Region, Inputs, ArgInputs);
+
+ // Map the outputs found by the CodeExtractor to the arguments found for
+ // the overall function.
+ findExtractedOutputToOverallOutputMapping(Region, Outputs.getArrayRef());
+}
+
+/// Replace the extracted function in the Region with a call to the overall
+/// function constructed from the deduplicated similar regions, replacing and
+/// remapping the values passed to the extracted function as arguments to the
+/// new arguments of the overall function.
+///
+/// \param [in] M - The module to outline from.
+/// \param [in] Region - The regions of extracted code to be replaced with a new
+/// function.
+/// \returns a call instruction with the replaced function.
+CallInst *replaceCalledFunction(Module &M, OutlinableRegion &Region) {
+ std::vector<Value *> NewCallArgs;
+ DenseMap<unsigned, unsigned>::iterator ArgPair;
+
+ OutlinableGroup &Group = *Region.Parent;
+ CallInst *Call = Region.Call;
+ assert(Call && "Call to replace is nullptr?");
+ Function *AggFunc = Group.OutlinedFunction;
+ assert(AggFunc && "Function to replace with is nullptr?");
+
+ // If the arguments are the same size, there are not values that need to be
+ // made argument, or different output registers to handle. We can simply
+ // replace the called function in this case.
+ if (AggFunc->arg_size() == Call->arg_size()) {
+ LLVM_DEBUG(dbgs() << "Replace call to " << *Call << " with call to "
+ << *AggFunc << " with same number of arguments\n");
+ Call->setCalledFunction(AggFunc);
+ return Call;
+ }
+
+ // We have a different number of arguments than the new function, so
+ // we need to use our previously mappings off extracted argument to overall
+ // function argument, and constants to overall function argument to create the
+ // new argument list.
+ for (unsigned AggArgIdx = 0; AggArgIdx < AggFunc->arg_size(); AggArgIdx++) {
+
+ if (AggArgIdx == AggFunc->arg_size() - 1 &&
+ Group.OutputGVNCombinations.size() > 1) {
+ // If we are on the last argument, and we need to differentiate between
+ // output blocks, add an integer to the argument list to determine
+ // what block to take
+ LLVM_DEBUG(dbgs() << "Set switch block argument to "
+ << Region.OutputBlockNum << "\n");
+ NewCallArgs.push_back(ConstantInt::get(Type::getInt32Ty(M.getContext()),
+ Region.OutputBlockNum));
+ continue;
+ }
+
+ ArgPair = Region.AggArgToExtracted.find(AggArgIdx);
+ if (ArgPair != Region.AggArgToExtracted.end()) {
+ Value *ArgumentValue = Call->getArgOperand(ArgPair->second);
+ // If we found the mapping from the extracted function to the overall
+ // function, we simply add it to the argument list. We use the same
+ // value, it just needs to honor the new order of arguments.
+ LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to value "
+ << *ArgumentValue << "\n");
+ NewCallArgs.push_back(ArgumentValue);
+ continue;
+ }
+
+ // If it is a constant, we simply add it to the argument list as a value.
+ if (Region.AggArgToConstant.find(AggArgIdx) !=
+ Region.AggArgToConstant.end()) {
+ Constant *CST = Region.AggArgToConstant.find(AggArgIdx)->second;
+ LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to value "
+ << *CST << "\n");
+ NewCallArgs.push_back(CST);
+ continue;
+ }
+
+ // Add a nullptr value if the argument is not found in the extracted
+ // function. If we cannot find a value, it means it is not in use
+ // for the region, so we should not pass anything to it.
+ LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to nullptr\n");
+ NewCallArgs.push_back(ConstantPointerNull::get(
+ static_cast<PointerType *>(AggFunc->getArg(AggArgIdx)->getType())));
+ }
+
+ LLVM_DEBUG(dbgs() << "Replace call to " << *Call << " with call to "
+ << *AggFunc << " with new set of arguments\n");
+ // Create the new call instruction and erase the old one.
+ Call = CallInst::Create(AggFunc->getFunctionType(), AggFunc, NewCallArgs, "",
+ Call);
+
+ // It is possible that the call to the outlined function is either the first
+ // instruction is in the new block, the last instruction, or both. If either
+ // of these is the case, we need to make sure that we replace the instruction
+ // in the IRInstructionData struct with the new call.
+ CallInst *OldCall = Region.Call;
+ if (Region.NewFront->Inst == OldCall)
+ Region.NewFront->Inst = Call;
+ if (Region.NewBack->Inst == OldCall)
+ Region.NewBack->Inst = Call;
+
+ // Transfer any debug information.
+ Call->setDebugLoc(Region.Call->getDebugLoc());
+
+ // Remove the old instruction.
+ OldCall->eraseFromParent();
+ Region.Call = Call;
+
+ // Make sure that the argument in the new function has the SwiftError
+ // argument.
+ if (Group.SwiftErrorArgument.hasValue())
+ Call->addParamAttr(Group.SwiftErrorArgument.getValue(),
+ Attribute::SwiftError);
+
+ return Call;
+}
+
+// Within an extracted function, replace the argument uses of the extracted
+// region with the arguments of the function for an OutlinableGroup.
+//
+/// \param [in] Region - The region of extracted code to be changed.
+/// \param [in,out] OutputBB - The BasicBlock for the output stores for this
+/// region.
+static void replaceArgumentUses(OutlinableRegion &Region,
+ BasicBlock *OutputBB) {
+ OutlinableGroup &Group = *Region.Parent;
+ assert(Region.ExtractedFunction && "Region has no extracted function?");
+
+ for (unsigned ArgIdx = 0; ArgIdx < Region.ExtractedFunction->arg_size();
+ ArgIdx++) {
+ assert(Region.ExtractedArgToAgg.find(ArgIdx) !=
+ Region.ExtractedArgToAgg.end() &&
+ "No mapping from extracted to outlined?");
+ unsigned AggArgIdx = Region.ExtractedArgToAgg.find(ArgIdx)->second;
+ Argument *AggArg = Group.OutlinedFunction->getArg(AggArgIdx);
+ Argument *Arg = Region.ExtractedFunction->getArg(ArgIdx);
+ // The argument is an input, so we can simply replace it with the overall
+ // argument value
+ if (ArgIdx < Region.NumExtractedInputs) {
+ LLVM_DEBUG(dbgs() << "Replacing uses of input " << *Arg << " in function "
+ << *Region.ExtractedFunction << " with " << *AggArg
+ << " in function " << *Group.OutlinedFunction << "\n");
+ Arg->replaceAllUsesWith(AggArg);
+ continue;
+ }
+
+ // If we are replacing an output, we place the store value in its own
+ // block inside the overall function before replacing the use of the output
+ // in the function.
+ assert(Arg->hasOneUse() && "Output argument can only have one use");
+ User *InstAsUser = Arg->user_back();
+ assert(InstAsUser && "User is nullptr!");
+
+ Instruction *I = cast<Instruction>(InstAsUser);
+ I->setDebugLoc(DebugLoc());
+ LLVM_DEBUG(dbgs() << "Move store for instruction " << *I << " to "
+ << *OutputBB << "\n");
+
+ I->moveBefore(*OutputBB, OutputBB->end());
+
+ LLVM_DEBUG(dbgs() << "Replacing uses of output " << *Arg << " in function "
+ << *Region.ExtractedFunction << " with " << *AggArg
+ << " in function " << *Group.OutlinedFunction << "\n");
+ Arg->replaceAllUsesWith(AggArg);
+ }
+}
+
+/// Within an extracted function, replace the constants that need to be lifted
+/// into arguments with the actual argument.
+///
+/// \param Region [in] - The region of extracted code to be changed.
+void replaceConstants(OutlinableRegion &Region) {
+ OutlinableGroup &Group = *Region.Parent;
+ // Iterate over the constants that need to be elevated into arguments
+ for (std::pair<unsigned, Constant *> &Const : Region.AggArgToConstant) {
+ unsigned AggArgIdx = Const.first;
+ Function *OutlinedFunction = Group.OutlinedFunction;
+ assert(OutlinedFunction && "Overall Function is not defined?");
+ Constant *CST = Const.second;
+ Argument *Arg = Group.OutlinedFunction->getArg(AggArgIdx);
+ // Identify the argument it will be elevated to, and replace instances of
+ // that constant in the function.
+
+ // TODO: If in the future constants do not have one global value number,
+ // i.e. a constant 1 could be mapped to several values, this check will
+ // have to be more strict. It cannot be using only replaceUsesWithIf.
+
+ LLVM_DEBUG(dbgs() << "Replacing uses of constant " << *CST
+ << " in function " << *OutlinedFunction << " with "
+ << *Arg << "\n");
+ CST->replaceUsesWithIf(Arg, [OutlinedFunction](Use &U) {
+ if (Instruction *I = dyn_cast<Instruction>(U.getUser()))
+ return I->getFunction() == OutlinedFunction;
+ return false;
+ });
+ }
+}
+
+/// For the given function, find all the nondebug or lifetime instructions,
+/// and return them as a vector. Exclude any blocks in \p ExludeBlocks.
+///
+/// \param [in] F - The function we collect the instructions from.
+/// \param [in] ExcludeBlocks - BasicBlocks to ignore.
+/// \returns the list of instructions extracted.
+static std::vector<Instruction *>
+collectRelevantInstructions(Function &F,
+ DenseSet<BasicBlock *> &ExcludeBlocks) {
+ std::vector<Instruction *> RelevantInstructions;
+
+ for (BasicBlock &BB : F) {
+ if (ExcludeBlocks.contains(&BB))
+ continue;
+
+ for (Instruction &Inst : BB) {
+ if (Inst.isLifetimeStartOrEnd())
+ continue;
+ if (isa<DbgInfoIntrinsic>(Inst))
+ continue;
+
+ RelevantInstructions.push_back(&Inst);
+ }
+ }
+
+ return RelevantInstructions;
+}
+
+/// It is possible that there is a basic block that already performs the same
+/// stores. This returns a duplicate block, if it exists
+///
+/// \param OutputBB [in] the block we are looking for a duplicate of.
+/// \param OutputStoreBBs [in] The existing output blocks.
+/// \returns an optional value with the number output block if there is a match.
+Optional<unsigned>
+findDuplicateOutputBlock(BasicBlock *OutputBB,
+ ArrayRef<BasicBlock *> OutputStoreBBs) {
+
+ bool WrongInst = false;
+ bool WrongSize = false;
+ unsigned MatchingNum = 0;
+ for (BasicBlock *CompBB : OutputStoreBBs) {
+ WrongInst = false;
+ if (CompBB->size() - 1 != OutputBB->size()) {
+ WrongSize = true;
+ MatchingNum++;
+ continue;
+ }
+
+ WrongSize = false;
+ BasicBlock::iterator NIt = OutputBB->begin();
+ for (Instruction &I : *CompBB) {
+ if (isa<BranchInst>(&I))
+ continue;
+
+ if (!I.isIdenticalTo(&(*NIt))) {
+ WrongInst = true;
+ break;
+ }
+
+ NIt++;
+ }
+ if (!WrongInst && !WrongSize)
+ return MatchingNum;
+
+ MatchingNum++;
+ }
+
+ return None;
+}
+
+/// For the outlined section, move needed the StoreInsts for the output
+/// registers into their own block. Then, determine if there is a duplicate
+/// output block already created.
+///
+/// \param [in] OG - The OutlinableGroup of regions to be outlined.
+/// \param [in] Region - The OutlinableRegion that is being analyzed.
+/// \param [in,out] OutputBB - the block that stores for this region will be
+/// placed in.
+/// \param [in] EndBB - the final block of the extracted function.
+/// \param [in] OutputMappings - OutputMappings the mapping of values that have
+/// been replaced by a new output value.
+/// \param [in,out] OutputStoreBBs - The existing output blocks.
+static void
+alignOutputBlockWithAggFunc(OutlinableGroup &OG, OutlinableRegion &Region,
+ BasicBlock *OutputBB, BasicBlock *EndBB,
+ const DenseMap<Value *, Value *> &OutputMappings,
+ std::vector<BasicBlock *> &OutputStoreBBs) {
+ DenseSet<unsigned> ValuesToFind(Region.GVNStores.begin(),
+ Region.GVNStores.end());
+
+ // We iterate over the instructions in the extracted function, and find the
+ // global value number of the instructions. If we find a value that should
+ // be contained in a store, we replace the uses of the value with the value
+ // from the overall function, so that the store is storing the correct
+ // value from the overall function.
+ DenseSet<BasicBlock *> ExcludeBBs(OutputStoreBBs.begin(),
+ OutputStoreBBs.end());
+ ExcludeBBs.insert(OutputBB);
+ std::vector<Instruction *> ExtractedFunctionInsts =
+ collectRelevantInstructions(*(Region.ExtractedFunction), ExcludeBBs);
+ std::vector<Instruction *> OverallFunctionInsts =
+ collectRelevantInstructions(*OG.OutlinedFunction, ExcludeBBs);
+
+ assert(ExtractedFunctionInsts.size() == OverallFunctionInsts.size() &&
+ "Number of relevant instructions not equal!");
+
+ unsigned NumInstructions = ExtractedFunctionInsts.size();
+ for (unsigned Idx = 0; Idx < NumInstructions; Idx++) {
+ Value *V = ExtractedFunctionInsts[Idx];
+
+ if (OutputMappings.find(V) != OutputMappings.end())
+ V = OutputMappings.find(V)->second;
+ Optional<unsigned> GVN = Region.Candidate->getGVN(V);
+
+ // If we have found one of the stored values for output, replace the value
+ // with the corresponding one from the overall function.
+ if (GVN.hasValue() && ValuesToFind.erase(GVN.getValue())) {
+ V->replaceAllUsesWith(OverallFunctionInsts[Idx]);
+ if (ValuesToFind.size() == 0)
+ break;
+ }
+
+ if (ValuesToFind.size() == 0)
+ break;
+ }
+
+ assert(ValuesToFind.size() == 0 && "Not all store values were handled!");
+
+ // If the size of the block is 0, then there are no stores, and we do not
+ // need to save this block.
+ if (OutputBB->size() == 0) {
+ Region.OutputBlockNum = -1;
+ OutputBB->eraseFromParent();
+ return;
+ }
+
+ // Determine is there is a duplicate block.
+ Optional<unsigned> MatchingBB =
+ findDuplicateOutputBlock(OutputBB, OutputStoreBBs);
+
+ // If there is, we remove the new output block. If it does not,
+ // we add it to our list of output blocks.
+ if (MatchingBB.hasValue()) {
+ LLVM_DEBUG(dbgs() << "Set output block for region in function"
+ << Region.ExtractedFunction << " to "
+ << MatchingBB.getValue());
+
+ Region.OutputBlockNum = MatchingBB.getValue();
+ OutputBB->eraseFromParent();
+ return;
+ }
+
+ Region.OutputBlockNum = OutputStoreBBs.size();
+
+ LLVM_DEBUG(dbgs() << "Create output block for region in"
+ << Region.ExtractedFunction << " to "
+ << *OutputBB);
+ OutputStoreBBs.push_back(OutputBB);
+ BranchInst::Create(EndBB, OutputBB);
+}
+
+/// Create the switch statement for outlined function to differentiate between
+/// all the output blocks.
+///
+/// For the outlined section, determine if an outlined block already exists that
+/// matches the needed stores for the extracted section.
+/// \param [in] M - The module we are outlining from.
+/// \param [in] OG - The group of regions to be outlined.
+/// \param [in] OS - The region that is being analyzed.
+/// \param [in] EndBB - The final block of the extracted function.
+/// \param [in,out] OutputStoreBBs - The existing output blocks.
+void createSwitchStatement(Module &M, OutlinableGroup &OG, BasicBlock *EndBB,
+ ArrayRef<BasicBlock *> OutputStoreBBs) {
+ // We only need the switch statement if there is more than one store
+ // combination.
+ if (OG.OutputGVNCombinations.size() > 1) {
+ Function *AggFunc = OG.OutlinedFunction;
+ // Create a final block
+ BasicBlock *ReturnBlock =
+ BasicBlock::Create(M.getContext(), "final_block", AggFunc);
+ Instruction *Term = EndBB->getTerminator();
+ Term->moveBefore(*ReturnBlock, ReturnBlock->end());
+ // Put the switch statement in the old end basic block for the function with
+ // a fall through to the new return block
+ LLVM_DEBUG(dbgs() << "Create switch statement in " << *AggFunc << " for "
+ << OutputStoreBBs.size() << "\n");
+ SwitchInst *SwitchI =
+ SwitchInst::Create(AggFunc->getArg(AggFunc->arg_size() - 1),
+ ReturnBlock, OutputStoreBBs.size(), EndBB);
+
+ unsigned Idx = 0;
+ for (BasicBlock *BB : OutputStoreBBs) {
+ SwitchI->addCase(ConstantInt::get(Type::getInt32Ty(M.getContext()), Idx),
+ BB);
+ Term = BB->getTerminator();
+ Term->setSuccessor(0, ReturnBlock);
+ Idx++;
+ }
+ return;
+ }
+
+ // If there needs to be stores, move them from the output block to the end
+ // block to save on branching instructions.
+ if (OutputStoreBBs.size() == 1) {
+ LLVM_DEBUG(dbgs() << "Move store instructions to the end block in "
+ << *OG.OutlinedFunction << "\n");
+ BasicBlock *OutputBlock = OutputStoreBBs[0];
+ Instruction *Term = OutputBlock->getTerminator();
+ Term->eraseFromParent();
+ Term = EndBB->getTerminator();
+ moveBBContents(*OutputBlock, *EndBB);
+ Term->moveBefore(*EndBB, EndBB->end());
+ OutputBlock->eraseFromParent();
+ }
+}
+
+/// Fill the new function that will serve as the replacement function for all of
+/// the extracted regions of a certain structure from the first region in the
+/// list of regions. Replace this first region's extracted function with the
+/// new overall function.
+///
+/// \param [in] M - The module we are outlining from.
+/// \param [in] CurrentGroup - The group of regions to be outlined.
+/// \param [in,out] OutputStoreBBs - The output blocks for each different
+/// set of stores needed for the different functions.
+/// \param [in,out] FuncsToRemove - Extracted functions to erase from module
+/// once outlining is complete.
+static void fillOverallFunction(Module &M, OutlinableGroup &CurrentGroup,
+ std::vector<BasicBlock *> &OutputStoreBBs,
+ std::vector<Function *> &FuncsToRemove) {
+ OutlinableRegion *CurrentOS = CurrentGroup.Regions[0];
+
+ // Move first extracted function's instructions into new function.
+ LLVM_DEBUG(dbgs() << "Move instructions from "
+ << *CurrentOS->ExtractedFunction << " to instruction "
+ << *CurrentGroup.OutlinedFunction << "\n");
+
+ CurrentGroup.EndBB = moveFunctionData(*CurrentOS->ExtractedFunction,
+ *CurrentGroup.OutlinedFunction);
+
+ // Transfer the attributes from the function to the new function.
+ for (Attribute A :
+ CurrentOS->ExtractedFunction->getAttributes().getFnAttributes())
+ CurrentGroup.OutlinedFunction->addFnAttr(A);
+
+ // Create an output block for the first extracted function.
+ BasicBlock *NewBB = BasicBlock::Create(
+ M.getContext(), Twine("output_block_") + Twine(static_cast<unsigned>(0)),
+ CurrentGroup.OutlinedFunction);
+ CurrentOS->OutputBlockNum = 0;
+
+ replaceArgumentUses(*CurrentOS, NewBB);
+ replaceConstants(*CurrentOS);
+
+ // If the new basic block has no new stores, we can erase it from the module.
+ // It it does, we create a branch instruction to the last basic block from the
+ // new one.
+ if (NewBB->size() == 0) {
+ CurrentOS->OutputBlockNum = -1;
+ NewBB->eraseFromParent();
+ } else {
+ BranchInst::Create(CurrentGroup.EndBB, NewBB);
+ OutputStoreBBs.push_back(NewBB);
+ }
+
+ // Replace the call to the extracted function with the outlined function.
+ CurrentOS->Call = replaceCalledFunction(M, *CurrentOS);
+
+ // We only delete the extracted functions at the end since we may need to
+ // reference instructions contained in them for mapping purposes.
+ FuncsToRemove.push_back(CurrentOS->ExtractedFunction);
+}
+
+void IROutliner::deduplicateExtractedSections(
+ Module &M, OutlinableGroup &CurrentGroup,
+ std::vector<Function *> &FuncsToRemove, unsigned &OutlinedFunctionNum) {
+ createFunction(M, CurrentGroup, OutlinedFunctionNum);
+
+ std::vector<BasicBlock *> OutputStoreBBs;
+
+ OutlinableRegion *CurrentOS;
+
+ fillOverallFunction(M, CurrentGroup, OutputStoreBBs, FuncsToRemove);
+
+ for (unsigned Idx = 1; Idx < CurrentGroup.Regions.size(); Idx++) {
+ CurrentOS = CurrentGroup.Regions[Idx];
+ AttributeFuncs::mergeAttributesForOutlining(*CurrentGroup.OutlinedFunction,
+ *CurrentOS->ExtractedFunction);
+
+ // Create a new BasicBlock to hold the needed store instructions.
+ BasicBlock *NewBB = BasicBlock::Create(
+ M.getContext(), "output_block_" + std::to_string(Idx),
+ CurrentGroup.OutlinedFunction);
+ replaceArgumentUses(*CurrentOS, NewBB);
+
+ alignOutputBlockWithAggFunc(CurrentGroup, *CurrentOS, NewBB,
+ CurrentGroup.EndBB, OutputMappings,
+ OutputStoreBBs);
+
+ CurrentOS->Call = replaceCalledFunction(M, *CurrentOS);
+ FuncsToRemove.push_back(CurrentOS->ExtractedFunction);
+ }
+
+ // Create a switch statement to handle the different output schemes.
+ createSwitchStatement(M, CurrentGroup, CurrentGroup.EndBB, OutputStoreBBs);
+
+ OutlinedFunctionNum++;
+}
+
+void IROutliner::pruneIncompatibleRegions(
+ std::vector<IRSimilarityCandidate> &CandidateVec,
+ OutlinableGroup &CurrentGroup) {
+ bool PreviouslyOutlined;
+
+ // Sort from beginning to end, so the IRSimilarityCandidates are in order.
+ stable_sort(CandidateVec, [](const IRSimilarityCandidate &LHS,
+ const IRSimilarityCandidate &RHS) {
+ return LHS.getStartIdx() < RHS.getStartIdx();
+ });
+
+ unsigned CurrentEndIdx = 0;
+ for (IRSimilarityCandidate &IRSC : CandidateVec) {
+ PreviouslyOutlined = false;
+ unsigned StartIdx = IRSC.getStartIdx();
+ unsigned EndIdx = IRSC.getEndIdx();
+
+ for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
+ if (Outlined.contains(Idx)) {
+ PreviouslyOutlined = true;
+ break;
+ }
+
+ if (PreviouslyOutlined)
+ continue;
+
+ // TODO: If in the future we can outline across BasicBlocks, we will need to
+ // check all BasicBlocks contained in the region.
+ if (IRSC.getStartBB()->hasAddressTaken())
+ continue;
+
+ if (IRSC.front()->Inst->getFunction()->hasLinkOnceODRLinkage() &&
+ !OutlineFromLinkODRs)
+ continue;
+
+ // Greedily prune out any regions that will overlap with already chosen
+ // regions.
+ if (CurrentEndIdx != 0 && StartIdx <= CurrentEndIdx)
+ continue;
+
+ bool BadInst = any_of(IRSC, [this](IRInstructionData &ID) {
+ // We check if there is a discrepancy between the InstructionDataList
+ // and the actual next instruction in the module. If there is, it means
+ // that an extra instruction was added, likely by the CodeExtractor.
+
+ // Since we do not have any similarity data about this particular
+ // instruction, we cannot confidently outline it, and must discard this
+ // candidate.
+ if (std::next(ID.getIterator())->Inst !=
+ ID.Inst->getNextNonDebugInstruction())
+ return true;
+ return !this->InstructionClassifier.visit(ID.Inst);
+ });
+
+ if (BadInst)
+ continue;
+
+ OutlinableRegion *OS = new (RegionAllocator.Allocate())
+ OutlinableRegion(IRSC, CurrentGroup);
+ CurrentGroup.Regions.push_back(OS);
+
+ CurrentEndIdx = EndIdx;
+ }
+}
+
+InstructionCost
+IROutliner::findBenefitFromAllRegions(OutlinableGroup &CurrentGroup) {
+ InstructionCost RegionBenefit = 0;
+ for (OutlinableRegion *Region : CurrentGroup.Regions) {
+ TargetTransformInfo &TTI = getTTI(*Region->StartBB->getParent());
+ // We add the number of instructions in the region to the benefit as an
+ // estimate as to how much will be removed.
+ RegionBenefit += Region->getBenefit(TTI);
+ LLVM_DEBUG(dbgs() << "Adding: " << RegionBenefit
+ << " saved instructions to overfall benefit.\n");
+ }
+
+ return RegionBenefit;
+}
+
+InstructionCost
+IROutliner::findCostOutputReloads(OutlinableGroup &CurrentGroup) {
+ InstructionCost OverallCost = 0;
+ for (OutlinableRegion *Region : CurrentGroup.Regions) {
+ TargetTransformInfo &TTI = getTTI(*Region->StartBB->getParent());
+
+ // Each output incurs a load after the call, so we add that to the cost.
+ for (unsigned OutputGVN : Region->GVNStores) {
+ Optional<Value *> OV = Region->Candidate->fromGVN(OutputGVN);
+ assert(OV.hasValue() && "Could not find value for GVN?");
+ Value *V = OV.getValue();
+ InstructionCost LoadCost =
+ TTI.getMemoryOpCost(Instruction::Load, V->getType(), Align(1), 0,
+ TargetTransformInfo::TCK_CodeSize);
+
+ LLVM_DEBUG(dbgs() << "Adding: " << LoadCost
+ << " instructions to cost for output of type "
+ << *V->getType() << "\n");
+ OverallCost += LoadCost;
+ }
+ }
+
+ return OverallCost;
+}
+
+/// Find the extra instructions needed to handle any output values for the
+/// region.
+///
+/// \param [in] M - The Module to outline from.
+/// \param [in] CurrentGroup - The collection of OutlinableRegions to analyze.
+/// \param [in] TTI - The TargetTransformInfo used to collect information for
+/// new instruction costs.
+/// \returns the additional cost to handle the outputs.
+static InstructionCost findCostForOutputBlocks(Module &M,
+ OutlinableGroup &CurrentGroup,
+ TargetTransformInfo &TTI) {
+ InstructionCost OutputCost = 0;
+
+ for (const ArrayRef<unsigned> &OutputUse :
+ CurrentGroup.OutputGVNCombinations) {
+ IRSimilarityCandidate &Candidate = *CurrentGroup.Regions[0]->Candidate;
+ for (unsigned GVN : OutputUse) {
+ Optional<Value *> OV = Candidate.fromGVN(GVN);
+ assert(OV.hasValue() && "Could not find value for GVN?");
+ Value *V = OV.getValue();
+ InstructionCost StoreCost =
+ TTI.getMemoryOpCost(Instruction::Load, V->getType(), Align(1), 0,
+ TargetTransformInfo::TCK_CodeSize);
+
+ // An instruction cost is added for each store set that needs to occur for
+ // various output combinations inside the function, plus a branch to
+ // return to the exit block.
+ LLVM_DEBUG(dbgs() << "Adding: " << StoreCost
+ << " instructions to cost for output of type "
+ << *V->getType() << "\n");
+ OutputCost += StoreCost;
+ }
+
+ InstructionCost BranchCost =
+ TTI.getCFInstrCost(Instruction::Br, TargetTransformInfo::TCK_CodeSize);
+ LLVM_DEBUG(dbgs() << "Adding " << BranchCost << " to the current cost for"
+ << " a branch instruction\n");
+ OutputCost += BranchCost;
+ }
+
+ // If there is more than one output scheme, we must have a comparison and
+ // branch for each different item in the switch statement.
+ if (CurrentGroup.OutputGVNCombinations.size() > 1) {
+ InstructionCost ComparisonCost = TTI.getCmpSelInstrCost(
+ Instruction::ICmp, Type::getInt32Ty(M.getContext()),
+ Type::getInt32Ty(M.getContext()), CmpInst::BAD_ICMP_PREDICATE,
+ TargetTransformInfo::TCK_CodeSize);
+ InstructionCost BranchCost =
+ TTI.getCFInstrCost(Instruction::Br, TargetTransformInfo::TCK_CodeSize);
+
+ unsigned DifferentBlocks = CurrentGroup.OutputGVNCombinations.size();
+ InstructionCost TotalCost = ComparisonCost * BranchCost * DifferentBlocks;
+
+ LLVM_DEBUG(dbgs() << "Adding: " << TotalCost
+ << " instructions for each switch case for each different"
+ << " output path in a function\n");
+ OutputCost += TotalCost;
+ }
+
+ return OutputCost;
+}
+
+void IROutliner::findCostBenefit(Module &M, OutlinableGroup &CurrentGroup) {
+ InstructionCost RegionBenefit = findBenefitFromAllRegions(CurrentGroup);
+ CurrentGroup.Benefit += RegionBenefit;
+ LLVM_DEBUG(dbgs() << "Current Benefit: " << CurrentGroup.Benefit << "\n");
+
+ InstructionCost OutputReloadCost = findCostOutputReloads(CurrentGroup);
+ CurrentGroup.Cost += OutputReloadCost;
+ LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
+
+ InstructionCost AverageRegionBenefit =
+ RegionBenefit / CurrentGroup.Regions.size();
+ unsigned OverallArgumentNum = CurrentGroup.ArgumentTypes.size();
+ unsigned NumRegions = CurrentGroup.Regions.size();
+ TargetTransformInfo &TTI =
+ getTTI(*CurrentGroup.Regions[0]->Candidate->getFunction());
+
+ // We add one region to the cost once, to account for the instructions added
+ // inside of the newly created function.
+ LLVM_DEBUG(dbgs() << "Adding: " << AverageRegionBenefit
+ << " instructions to cost for body of new function.\n");
+ CurrentGroup.Cost += AverageRegionBenefit;
+ LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
+
+ // For each argument, we must add an instruction for loading the argument
+ // out of the register and into a value inside of the newly outlined function.
+ LLVM_DEBUG(dbgs() << "Adding: " << OverallArgumentNum
+ << " instructions to cost for each argument in the new"
+ << " function.\n");
+ CurrentGroup.Cost +=
+ OverallArgumentNum * TargetTransformInfo::TCC_Basic;
+ LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
+
+ // Each argument needs to either be loaded into a register or onto the stack.
+ // Some arguments will only be loaded into the stack once the argument
+ // registers are filled.
+ LLVM_DEBUG(dbgs() << "Adding: " << OverallArgumentNum
+ << " instructions to cost for each argument in the new"
+ << " function " << NumRegions << " times for the "
+ << "needed argument handling at the call site.\n");
+ CurrentGroup.Cost +=
+ 2 * OverallArgumentNum * TargetTransformInfo::TCC_Basic * NumRegions;
+ LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
+
+ CurrentGroup.Cost += findCostForOutputBlocks(M, CurrentGroup, TTI);
+ LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
+}
+
+void IROutliner::updateOutputMapping(OutlinableRegion &Region,
+ ArrayRef<Value *> Outputs,
+ LoadInst *LI) {
+ // For and load instructions following the call
+ Value *Operand = LI->getPointerOperand();
+ Optional<unsigned> OutputIdx = None;
+ // Find if the operand it is an output register.
+ for (unsigned ArgIdx = Region.NumExtractedInputs;
+ ArgIdx < Region.Call->arg_size(); ArgIdx++) {
+ if (Operand == Region.Call->getArgOperand(ArgIdx)) {
+ OutputIdx = ArgIdx - Region.NumExtractedInputs;
+ break;
+ }
+ }
+
+ // If we found an output register, place a mapping of the new value
+ // to the original in the mapping.
+ if (!OutputIdx.hasValue())
+ return;
+
+ if (OutputMappings.find(Outputs[OutputIdx.getValue()]) ==
+ OutputMappings.end()) {
+ LLVM_DEBUG(dbgs() << "Mapping extracted output " << *LI << " to "
+ << *Outputs[OutputIdx.getValue()] << "\n");
+ OutputMappings.insert(std::make_pair(LI, Outputs[OutputIdx.getValue()]));
+ } else {
+ Value *Orig = OutputMappings.find(Outputs[OutputIdx.getValue()])->second;
+ LLVM_DEBUG(dbgs() << "Mapping extracted output " << *Orig << " to "
+ << *Outputs[OutputIdx.getValue()] << "\n");
+ OutputMappings.insert(std::make_pair(LI, Orig));
+ }
+}
+
+bool IROutliner::extractSection(OutlinableRegion &Region) {
+ SetVector<Value *> ArgInputs, Outputs, SinkCands;
+ Region.CE->findInputsOutputs(ArgInputs, Outputs, SinkCands);
+
+ assert(Region.StartBB && "StartBB for the OutlinableRegion is nullptr!");
+ assert(Region.FollowBB && "FollowBB for the OutlinableRegion is nullptr!");
+ Function *OrigF = Region.StartBB->getParent();
+ CodeExtractorAnalysisCache CEAC(*OrigF);
+ Region.ExtractedFunction = Region.CE->extractCodeRegion(CEAC);
+
+ // If the extraction was successful, find the BasicBlock, and reassign the
+ // OutlinableRegion blocks
+ if (!Region.ExtractedFunction) {
+ LLVM_DEBUG(dbgs() << "CodeExtractor failed to outline " << Region.StartBB
+ << "\n");
+ Region.reattachCandidate();
+ return false;
+ }
+
+ BasicBlock *RewrittenBB = Region.FollowBB->getSinglePredecessor();
+ Region.StartBB = RewrittenBB;
+ Region.EndBB = RewrittenBB;
+
+ // The sequences of outlinable regions has now changed. We must fix the
+ // IRInstructionDataList for consistency. Although they may not be illegal
+ // instructions, they should not be compared with anything else as they
+ // should not be outlined in this round. So marking these as illegal is
+ // allowed.
+ IRInstructionDataList *IDL = Region.Candidate->front()->IDL;
+ Instruction *BeginRewritten = &*RewrittenBB->begin();
+ Instruction *EndRewritten = &*RewrittenBB->begin();
+ Region.NewFront = new (InstDataAllocator.Allocate()) IRInstructionData(
+ *BeginRewritten, InstructionClassifier.visit(*BeginRewritten), *IDL);
+ Region.NewBack = new (InstDataAllocator.Allocate()) IRInstructionData(
+ *EndRewritten, InstructionClassifier.visit(*EndRewritten), *IDL);
+
+ // Insert the first IRInstructionData of the new region in front of the
+ // first IRInstructionData of the IRSimilarityCandidate.
+ IDL->insert(Region.Candidate->begin(), *Region.NewFront);
+ // Insert the first IRInstructionData of the new region after the
+ // last IRInstructionData of the IRSimilarityCandidate.
+ IDL->insert(Region.Candidate->end(), *Region.NewBack);
+ // Remove the IRInstructionData from the IRSimilarityCandidate.
+ IDL->erase(Region.Candidate->begin(), std::prev(Region.Candidate->end()));
+
+ assert(RewrittenBB != nullptr &&
+ "Could not find a predecessor after extraction!");
+
+ // Iterate over the new set of instructions to find the new call
+ // instruction.
+ for (Instruction &I : *RewrittenBB)
+ if (CallInst *CI = dyn_cast<CallInst>(&I)) {
+ if (Region.ExtractedFunction == CI->getCalledFunction())
+ Region.Call = CI;
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(&I))
+ updateOutputMapping(Region, Outputs.getArrayRef(), LI);
+ Region.reattachCandidate();
+ return true;
+}
+
+unsigned IROutliner::doOutline(Module &M) {
+ // Find the possible similarity sections.
+ IRSimilarityIdentifier &Identifier = getIRSI(M);
+ SimilarityGroupList &SimilarityCandidates = *Identifier.getSimilarity();
+
+ // Sort them by size of extracted sections
+ unsigned OutlinedFunctionNum = 0;
+ // If we only have one SimilarityGroup in SimilarityCandidates, we do not have
+ // to sort them by the potential number of instructions to be outlined
+ if (SimilarityCandidates.size() > 1)
+ llvm::stable_sort(SimilarityCandidates,
+ [](const std::vector<IRSimilarityCandidate> &LHS,
+ const std::vector<IRSimilarityCandidate> &RHS) {
+ return LHS[0].getLength() * LHS.size() >
+ RHS[0].getLength() * RHS.size();
+ });
+
+ DenseSet<unsigned> NotSame;
+ std::vector<Function *> FuncsToRemove;
+ // Iterate over the possible sets of similarity.
+ for (SimilarityGroup &CandidateVec : SimilarityCandidates) {
+ OutlinableGroup CurrentGroup;
+
+ // Remove entries that were previously outlined
+ pruneIncompatibleRegions(CandidateVec, CurrentGroup);
+
+ // We pruned the number of regions to 0 to 1, meaning that it's not worth
+ // trying to outlined since there is no compatible similar instance of this
+ // code.
+ if (CurrentGroup.Regions.size() < 2)
+ continue;
+
+ // Determine if there are any values that are the same constant throughout
+ // each section in the set.
+ NotSame.clear();
+ CurrentGroup.findSameConstants(NotSame);
+
+ if (CurrentGroup.IgnoreGroup)
+ continue;
+
+ // Create a CodeExtractor for each outlinable region. Identify inputs and
+ // outputs for each section using the code extractor and create the argument
+ // types for the Aggregate Outlining Function.
+ std::vector<OutlinableRegion *> OutlinedRegions;
+ for (OutlinableRegion *OS : CurrentGroup.Regions) {
+ // Break the outlinable region out of its parent BasicBlock into its own
+ // BasicBlocks (see function implementation).
+ OS->splitCandidate();
+ std::vector<BasicBlock *> BE = {OS->StartBB};
+ OS->CE = new (ExtractorAllocator.Allocate())
+ CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
+ false, "outlined");
+ findAddInputsOutputs(M, *OS, NotSame);
+ if (!OS->IgnoreRegion)
+ OutlinedRegions.push_back(OS);
+ else
+ OS->reattachCandidate();
+ }
+
+ CurrentGroup.Regions = std::move(OutlinedRegions);
+
+ if (CurrentGroup.Regions.empty())
+ continue;
+
+ CurrentGroup.collectGVNStoreSets(M);
+
+ if (CostModel)
+ findCostBenefit(M, CurrentGroup);
+
+ // If we are adhering to the cost model, reattach all the candidates
+ if (CurrentGroup.Cost >= CurrentGroup.Benefit && CostModel) {
+ for (OutlinableRegion *OS : CurrentGroup.Regions)
+ OS->reattachCandidate();
+ OptimizationRemarkEmitter &ORE = getORE(
+ *CurrentGroup.Regions[0]->Candidate->getFunction());
+ ORE.emit([&]() {
+ IRSimilarityCandidate *C = CurrentGroup.Regions[0]->Candidate;
+ OptimizationRemarkMissed R(DEBUG_TYPE, "WouldNotDecreaseSize",
+ C->frontInstruction());
+ R << "did not outline "
+ << ore::NV(std::to_string(CurrentGroup.Regions.size()))
+ << " regions due to estimated increase of "
+ << ore::NV("InstructionIncrease",
+ CurrentGroup.Cost - CurrentGroup.Benefit)
+ << " instructions at locations ";
+ interleave(
+ CurrentGroup.Regions.begin(), CurrentGroup.Regions.end(),
+ [&R](OutlinableRegion *Region) {
+ R << ore::NV(
+ "DebugLoc",
+ Region->Candidate->frontInstruction()->getDebugLoc());
+ },
+ [&R]() { R << " "; });
+ return R;
+ });
+ continue;
+ }
+
+ LLVM_DEBUG(dbgs() << "Outlining regions with cost " << CurrentGroup.Cost
+ << " and benefit " << CurrentGroup.Benefit << "\n");
+
+ // Create functions out of all the sections, and mark them as outlined.
+ OutlinedRegions.clear();
+ for (OutlinableRegion *OS : CurrentGroup.Regions) {
+ bool FunctionOutlined = extractSection(*OS);
+ if (FunctionOutlined) {
+ unsigned StartIdx = OS->Candidate->getStartIdx();
+ unsigned EndIdx = OS->Candidate->getEndIdx();
+ for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
+ Outlined.insert(Idx);
+
+ OutlinedRegions.push_back(OS);
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "Outlined " << OutlinedRegions.size()
+ << " with benefit " << CurrentGroup.Benefit
+ << " and cost " << CurrentGroup.Cost << "\n");
+
+ CurrentGroup.Regions = std::move(OutlinedRegions);
+
+ if (CurrentGroup.Regions.empty())
+ continue;
+
+ OptimizationRemarkEmitter &ORE =
+ getORE(*CurrentGroup.Regions[0]->Call->getFunction());
+ ORE.emit([&]() {
+ IRSimilarityCandidate *C = CurrentGroup.Regions[0]->Candidate;
+ OptimizationRemark R(DEBUG_TYPE, "Outlined", C->front()->Inst);
+ R << "outlined " << ore::NV(std::to_string(CurrentGroup.Regions.size()))
+ << " regions with decrease of "
+ << ore::NV("Benefit", CurrentGroup.Benefit - CurrentGroup.Cost)
+ << " instructions at locations ";
+ interleave(
+ CurrentGroup.Regions.begin(), CurrentGroup.Regions.end(),
+ [&R](OutlinableRegion *Region) {
+ R << ore::NV("DebugLoc",
+ Region->Candidate->frontInstruction()->getDebugLoc());
+ },
+ [&R]() { R << " "; });
+ return R;
+ });
+
+ deduplicateExtractedSections(M, CurrentGroup, FuncsToRemove,
+ OutlinedFunctionNum);
+ }
+
+ for (Function *F : FuncsToRemove)
+ F->eraseFromParent();
+
+ return OutlinedFunctionNum;
+}
+
+bool IROutliner::run(Module &M) {
+ CostModel = !NoCostModel;
+ OutlineFromLinkODRs = EnableLinkOnceODRIROutlining;
+
+ return doOutline(M) > 0;
+}
+
+// Pass Manager Boilerplate
+class IROutlinerLegacyPass : public ModulePass {
+public:
+ static char ID;
+ IROutlinerLegacyPass() : ModulePass(ID) {
+ initializeIROutlinerLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addRequired<IRSimilarityIdentifierWrapperPass>();
+ }
+
+ bool runOnModule(Module &M) override;
+};
+
+bool IROutlinerLegacyPass::runOnModule(Module &M) {
+ if (skipModule(M))
+ return false;
+
+ std::unique_ptr<OptimizationRemarkEmitter> ORE;
+ auto GORE = [&ORE](Function &F) -> OptimizationRemarkEmitter & {
+ ORE.reset(new OptimizationRemarkEmitter(&F));
+ return *ORE.get();
+ };
+
+ auto GTTI = [this](Function &F) -> TargetTransformInfo & {
+ return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ };
+
+ auto GIRSI = [this](Module &) -> IRSimilarityIdentifier & {
+ return this->getAnalysis<IRSimilarityIdentifierWrapperPass>().getIRSI();
+ };
+
+ return IROutliner(GTTI, GIRSI, GORE).run(M);
+}
+
+PreservedAnalyses IROutlinerPass::run(Module &M, ModuleAnalysisManager &AM) {
+ auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+
+ std::function<TargetTransformInfo &(Function &)> GTTI =
+ [&FAM](Function &F) -> TargetTransformInfo & {
+ return FAM.getResult<TargetIRAnalysis>(F);
+ };
+
+ std::function<IRSimilarityIdentifier &(Module &)> GIRSI =
+ [&AM](Module &M) -> IRSimilarityIdentifier & {
+ return AM.getResult<IRSimilarityAnalysis>(M);
+ };
+
+ std::unique_ptr<OptimizationRemarkEmitter> ORE;
+ std::function<OptimizationRemarkEmitter &(Function &)> GORE =
+ [&ORE](Function &F) -> OptimizationRemarkEmitter & {
+ ORE.reset(new OptimizationRemarkEmitter(&F));
+ return *ORE.get();
+ };
+
+ if (IROutliner(GTTI, GIRSI, GORE).run(M))
+ return PreservedAnalyses::none();
+ return PreservedAnalyses::all();
+}
+
+char IROutlinerLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(IROutlinerLegacyPass, "iroutliner", "IR Outliner", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(IRSimilarityIdentifierWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_END(IROutlinerLegacyPass, "iroutliner", "IR Outliner", false,
+ false)
+
+ModulePass *llvm::createIROutlinerPass() { return new IROutlinerLegacyPass(); }
diff --git a/contrib/libs/llvm12/lib/Transforms/IPO/Inliner.cpp b/contrib/libs/llvm12/lib/Transforms/IPO/Inliner.cpp
index e91b6c9b1d..7dfc611b74 100644
--- a/contrib/libs/llvm12/lib/Transforms/IPO/Inliner.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/IPO/Inliner.cpp
@@ -36,7 +36,7 @@
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h"
+#include "llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/DataLayout.h"
@@ -90,14 +90,14 @@ static cl::opt<bool>
DisableInlinedAllocaMerging("disable-inlined-alloca-merging",
cl::init(false), cl::Hidden);
-extern cl::opt<InlinerFunctionImportStatsOpts> InlinerFunctionImportStats;
+extern cl::opt<InlinerFunctionImportStatsOpts> InlinerFunctionImportStats;
-static cl::opt<std::string> CGSCCInlineReplayFile(
- "cgscc-inline-replay", cl::init(""), cl::value_desc("filename"),
- cl::desc(
- "Optimization remarks file containing inline remarks to be replayed "
- "by inlining from cgscc inline remarks."),
- cl::Hidden);
+static cl::opt<std::string> CGSCCInlineReplayFile(
+ "cgscc-inline-replay", cl::init(""), cl::value_desc("filename"),
+ cl::desc(
+ "Optimization remarks file containing inline remarks to be replayed "
+ "by inlining from cgscc inline remarks."),
+ cl::Hidden);
LegacyInlinerBase::LegacyInlinerBase(char &ID) : CallGraphSCCPass(ID) {}
@@ -640,9 +640,9 @@ bool LegacyInlinerBase::removeDeadFunctions(CallGraph &CG,
InlineAdvisor &
InlinerPass::getAdvisor(const ModuleAnalysisManagerCGSCCProxy::Result &MAM,
FunctionAnalysisManager &FAM, Module &M) {
- if (OwnedAdvisor)
- return *OwnedAdvisor;
-
+ if (OwnedAdvisor)
+ return *OwnedAdvisor;
+
auto *IAA = MAM.getCachedResult<InlineAdvisorAnalysis>(M);
if (!IAA) {
// It should still be possible to run the inliner as a stand-alone SCC pass,
@@ -653,16 +653,16 @@ InlinerPass::getAdvisor(const ModuleAnalysisManagerCGSCCProxy::Result &MAM,
// duration of the inliner pass, and thus the lifetime of the owned advisor.
// The one we would get from the MAM can be invalidated as a result of the
// inliner's activity.
- OwnedAdvisor =
- std::make_unique<DefaultInlineAdvisor>(M, FAM, getInlineParams());
-
- if (!CGSCCInlineReplayFile.empty())
- OwnedAdvisor = std::make_unique<ReplayInlineAdvisor>(
- M, FAM, M.getContext(), std::move(OwnedAdvisor),
- CGSCCInlineReplayFile,
- /*EmitRemarks=*/true);
-
- return *OwnedAdvisor;
+ OwnedAdvisor =
+ std::make_unique<DefaultInlineAdvisor>(M, FAM, getInlineParams());
+
+ if (!CGSCCInlineReplayFile.empty())
+ OwnedAdvisor = std::make_unique<ReplayInlineAdvisor>(
+ M, FAM, M.getContext(), std::move(OwnedAdvisor),
+ CGSCCInlineReplayFile,
+ /*EmitRemarks=*/true);
+
+ return *OwnedAdvisor;
}
assert(IAA->getAdvisor() &&
"Expected a present InlineAdvisorAnalysis also have an "
@@ -696,7 +696,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
//
// Note that this particular order of processing is actually critical to
// avoid very bad behaviors. Consider *highly connected* call graphs where
- // each function contains a small amount of code and a couple of calls to
+ // each function contains a small amount of code and a couple of calls to
// other functions. Because the LLVM inliner is fundamentally a bottom-up
// inliner, it can handle gracefully the fact that these all appear to be
// reasonable inlining candidates as it will flatten things until they become
@@ -746,7 +746,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
if (Calls.empty())
return PreservedAnalyses::all();
- // Capture updatable variable for the current SCC.
+ // Capture updatable variable for the current SCC.
auto *C = &InitialC;
// When inlining a callee produces new call sites, we want to keep track of
@@ -812,7 +812,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
continue;
}
- auto Advice = Advisor.getAdvice(*CB, OnlyMandatory);
+ auto Advice = Advisor.getAdvice(*CB, OnlyMandatory);
// Check whether we want to inline this callsite.
if (!Advice->isInliningRecommended()) {
Advice->recordUnattemptedInlining();
@@ -826,8 +826,8 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
&FAM.getResult<BlockFrequencyAnalysis>(*(CB->getCaller())),
&FAM.getResult<BlockFrequencyAnalysis>(Callee));
- InlineResult IR =
- InlineFunction(*CB, IFI, &FAM.getResult<AAManager>(*CB->getCaller()));
+ InlineResult IR =
+ InlineFunction(*CB, IFI, &FAM.getResult<AAManager>(*CB->getCaller()));
if (!IR.isSuccess()) {
Advice->recordUnsuccessfulInlining(IR);
continue;
@@ -882,7 +882,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// Note that after this point, it is an error to do anything other
// than use the callee's address or delete it.
Callee.dropAllReferences();
- assert(!is_contained(DeadFunctions, &Callee) &&
+ assert(!is_contained(DeadFunctions, &Callee) &&
"Cannot put cause a function to become dead twice!");
DeadFunctions.push_back(&Callee);
CalleeWasDeleted = true;
@@ -914,7 +914,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// as we're going to mutate this particular function we want to make sure
// the proxy is in place to forward any invalidation events.
LazyCallGraph::SCC *OldC = C;
- C = &updateCGAndAnalysisManagerForCGSCCPass(CG, *C, N, AM, UR, FAM);
+ C = &updateCGAndAnalysisManagerForCGSCCPass(CG, *C, N, AM, UR, FAM);
LLVM_DEBUG(dbgs() << "Updated inlining SCC: " << *C << "\n");
// If this causes an SCC to split apart into multiple smaller SCCs, there
@@ -994,7 +994,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
ModuleInlinerWrapperPass::ModuleInlinerWrapperPass(InlineParams Params,
bool Debugging,
- bool MandatoryFirst,
+ bool MandatoryFirst,
InliningAdvisorMode Mode,
unsigned MaxDevirtIterations)
: Params(Params), Mode(Mode), MaxDevirtIterations(MaxDevirtIterations),
@@ -1004,15 +1004,15 @@ ModuleInlinerWrapperPass::ModuleInlinerWrapperPass(InlineParams Params,
// into the callers so that our optimizations can reflect that.
// For PreLinkThinLTO pass, we disable hot-caller heuristic for sample PGO
// because it makes profile annotation in the backend inaccurate.
- if (MandatoryFirst)
- PM.addPass(InlinerPass(/*OnlyMandatory*/ true));
+ if (MandatoryFirst)
+ PM.addPass(InlinerPass(/*OnlyMandatory*/ true));
PM.addPass(InlinerPass());
}
PreservedAnalyses ModuleInlinerWrapperPass::run(Module &M,
ModuleAnalysisManager &MAM) {
auto &IAA = MAM.getResult<InlineAdvisorAnalysis>(M);
- if (!IAA.tryCreate(Params, Mode, CGSCCInlineReplayFile)) {
+ if (!IAA.tryCreate(Params, Mode, CGSCCInlineReplayFile)) {
M.getContext().emitError(
"Could not setup Inlining Advisor for the requested "
"mode and/or options");
diff --git a/contrib/libs/llvm12/lib/Transforms/IPO/LoopExtractor.cpp b/contrib/libs/llvm12/lib/Transforms/IPO/LoopExtractor.cpp
index a497c0390b..cbbc5f8882 100644
--- a/contrib/libs/llvm12/lib/Transforms/IPO/LoopExtractor.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/IPO/LoopExtractor.cpp
@@ -13,14 +13,14 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/IPO/LoopExtractor.h"
+#include "llvm/Transforms/IPO/LoopExtractor.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
-#include "llvm/IR/PassManager.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
@@ -38,71 +38,71 @@ using namespace llvm;
STATISTIC(NumExtracted, "Number of loops extracted");
namespace {
-struct LoopExtractorLegacyPass : public ModulePass {
- static char ID; // Pass identification, replacement for typeid
-
- unsigned NumLoops;
-
- explicit LoopExtractorLegacyPass(unsigned NumLoops = ~0)
- : ModulePass(ID), NumLoops(NumLoops) {
- initializeLoopExtractorLegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnModule(Module &M) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequiredID(BreakCriticalEdgesID);
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<LoopInfoWrapperPass>();
- AU.addPreserved<LoopInfoWrapperPass>();
- AU.addRequiredID(LoopSimplifyID);
- AU.addUsedIfAvailable<AssumptionCacheTracker>();
- }
-};
-
-struct LoopExtractor {
- explicit LoopExtractor(
- unsigned NumLoops,
- function_ref<DominatorTree &(Function &)> LookupDomTree,
- function_ref<LoopInfo &(Function &)> LookupLoopInfo,
- function_ref<AssumptionCache *(Function &)> LookupAssumptionCache)
- : NumLoops(NumLoops), LookupDomTree(LookupDomTree),
- LookupLoopInfo(LookupLoopInfo),
- LookupAssumptionCache(LookupAssumptionCache) {}
- bool runOnModule(Module &M);
-
-private:
- // The number of natural loops to extract from the program into functions.
- unsigned NumLoops;
-
- function_ref<DominatorTree &(Function &)> LookupDomTree;
- function_ref<LoopInfo &(Function &)> LookupLoopInfo;
- function_ref<AssumptionCache *(Function &)> LookupAssumptionCache;
-
- bool runOnFunction(Function &F);
-
- bool extractLoops(Loop::iterator From, Loop::iterator To, LoopInfo &LI,
- DominatorTree &DT);
- bool extractLoop(Loop *L, LoopInfo &LI, DominatorTree &DT);
-};
-} // namespace
-
-char LoopExtractorLegacyPass::ID = 0;
-INITIALIZE_PASS_BEGIN(LoopExtractorLegacyPass, "loop-extract",
+struct LoopExtractorLegacyPass : public ModulePass {
+ static char ID; // Pass identification, replacement for typeid
+
+ unsigned NumLoops;
+
+ explicit LoopExtractorLegacyPass(unsigned NumLoops = ~0)
+ : ModulePass(ID), NumLoops(NumLoops) {
+ initializeLoopExtractorLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module &M) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequiredID(BreakCriticalEdgesID);
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addUsedIfAvailable<AssumptionCacheTracker>();
+ }
+};
+
+struct LoopExtractor {
+ explicit LoopExtractor(
+ unsigned NumLoops,
+ function_ref<DominatorTree &(Function &)> LookupDomTree,
+ function_ref<LoopInfo &(Function &)> LookupLoopInfo,
+ function_ref<AssumptionCache *(Function &)> LookupAssumptionCache)
+ : NumLoops(NumLoops), LookupDomTree(LookupDomTree),
+ LookupLoopInfo(LookupLoopInfo),
+ LookupAssumptionCache(LookupAssumptionCache) {}
+ bool runOnModule(Module &M);
+
+private:
+ // The number of natural loops to extract from the program into functions.
+ unsigned NumLoops;
+
+ function_ref<DominatorTree &(Function &)> LookupDomTree;
+ function_ref<LoopInfo &(Function &)> LookupLoopInfo;
+ function_ref<AssumptionCache *(Function &)> LookupAssumptionCache;
+
+ bool runOnFunction(Function &F);
+
+ bool extractLoops(Loop::iterator From, Loop::iterator To, LoopInfo &LI,
+ DominatorTree &DT);
+ bool extractLoop(Loop *L, LoopInfo &LI, DominatorTree &DT);
+};
+} // namespace
+
+char LoopExtractorLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopExtractorLegacyPass, "loop-extract",
"Extract loops into new functions", false, false)
INITIALIZE_PASS_DEPENDENCY(BreakCriticalEdges)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
-INITIALIZE_PASS_END(LoopExtractorLegacyPass, "loop-extract",
+INITIALIZE_PASS_END(LoopExtractorLegacyPass, "loop-extract",
"Extract loops into new functions", false, false)
namespace {
/// SingleLoopExtractor - For bugpoint.
-struct SingleLoopExtractor : public LoopExtractorLegacyPass {
- static char ID; // Pass identification, replacement for typeid
- SingleLoopExtractor() : LoopExtractorLegacyPass(1) {}
-};
+struct SingleLoopExtractor : public LoopExtractorLegacyPass {
+ static char ID; // Pass identification, replacement for typeid
+ SingleLoopExtractor() : LoopExtractorLegacyPass(1) {}
+};
} // End anonymous namespace
char SingleLoopExtractor::ID = 0;
@@ -112,30 +112,30 @@ INITIALIZE_PASS(SingleLoopExtractor, "loop-extract-single",
// createLoopExtractorPass - This pass extracts all natural loops from the
// program into a function if it can.
//
-Pass *llvm::createLoopExtractorPass() { return new LoopExtractorLegacyPass(); }
+Pass *llvm::createLoopExtractorPass() { return new LoopExtractorLegacyPass(); }
-bool LoopExtractorLegacyPass::runOnModule(Module &M) {
+bool LoopExtractorLegacyPass::runOnModule(Module &M) {
if (skipModule(M))
return false;
- bool Changed = false;
- auto LookupDomTree = [this](Function &F) -> DominatorTree & {
- return this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
- };
- auto LookupLoopInfo = [this, &Changed](Function &F) -> LoopInfo & {
- return this->getAnalysis<LoopInfoWrapperPass>(F, &Changed).getLoopInfo();
- };
- auto LookupACT = [this](Function &F) -> AssumptionCache * {
- if (auto *ACT = this->getAnalysisIfAvailable<AssumptionCacheTracker>())
- return ACT->lookupAssumptionCache(F);
- return nullptr;
- };
- return LoopExtractor(NumLoops, LookupDomTree, LookupLoopInfo, LookupACT)
- .runOnModule(M) ||
- Changed;
-}
-
-bool LoopExtractor::runOnModule(Module &M) {
+ bool Changed = false;
+ auto LookupDomTree = [this](Function &F) -> DominatorTree & {
+ return this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
+ };
+ auto LookupLoopInfo = [this, &Changed](Function &F) -> LoopInfo & {
+ return this->getAnalysis<LoopInfoWrapperPass>(F, &Changed).getLoopInfo();
+ };
+ auto LookupACT = [this](Function &F) -> AssumptionCache * {
+ if (auto *ACT = this->getAnalysisIfAvailable<AssumptionCacheTracker>())
+ return ACT->lookupAssumptionCache(F);
+ return nullptr;
+ };
+ return LoopExtractor(NumLoops, LookupDomTree, LookupLoopInfo, LookupACT)
+ .runOnModule(M) ||
+ Changed;
+}
+
+bool LoopExtractor::runOnModule(Module &M) {
if (M.empty())
return false;
@@ -172,13 +172,13 @@ bool LoopExtractor::runOnFunction(Function &F) {
return false;
bool Changed = false;
- LoopInfo &LI = LookupLoopInfo(F);
+ LoopInfo &LI = LookupLoopInfo(F);
// If there are no loops in the function.
if (LI.empty())
return Changed;
- DominatorTree &DT = LookupDomTree(F);
+ DominatorTree &DT = LookupDomTree(F);
// If there is more than one top-level loop in this function, extract all of
// the loops.
@@ -244,7 +244,7 @@ bool LoopExtractor::extractLoops(Loop::iterator From, Loop::iterator To,
bool LoopExtractor::extractLoop(Loop *L, LoopInfo &LI, DominatorTree &DT) {
assert(NumLoops != 0);
Function &Func = *L->getHeader()->getParent();
- AssumptionCache *AC = LookupAssumptionCache(Func);
+ AssumptionCache *AC = LookupAssumptionCache(Func);
CodeExtractorAnalysisCache CEAC(Func);
CodeExtractor Extractor(DT, *L, false, nullptr, nullptr, AC);
if (Extractor.extractCodeRegion(CEAC)) {
@@ -262,24 +262,24 @@ bool LoopExtractor::extractLoop(Loop *L, LoopInfo &LI, DominatorTree &DT) {
Pass *llvm::createSingleLoopExtractorPass() {
return new SingleLoopExtractor();
}
-
-PreservedAnalyses LoopExtractorPass::run(Module &M, ModuleAnalysisManager &AM) {
- auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
- auto LookupDomTree = [&FAM](Function &F) -> DominatorTree & {
- return FAM.getResult<DominatorTreeAnalysis>(F);
- };
- auto LookupLoopInfo = [&FAM](Function &F) -> LoopInfo & {
- return FAM.getResult<LoopAnalysis>(F);
- };
- auto LookupAssumptionCache = [&FAM](Function &F) -> AssumptionCache * {
- return FAM.getCachedResult<AssumptionAnalysis>(F);
- };
- if (!LoopExtractor(NumLoops, LookupDomTree, LookupLoopInfo,
- LookupAssumptionCache)
- .runOnModule(M))
- return PreservedAnalyses::all();
-
- PreservedAnalyses PA;
- PA.preserve<LoopAnalysis>();
- return PA;
-}
+
+PreservedAnalyses LoopExtractorPass::run(Module &M, ModuleAnalysisManager &AM) {
+ auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ auto LookupDomTree = [&FAM](Function &F) -> DominatorTree & {
+ return FAM.getResult<DominatorTreeAnalysis>(F);
+ };
+ auto LookupLoopInfo = [&FAM](Function &F) -> LoopInfo & {
+ return FAM.getResult<LoopAnalysis>(F);
+ };
+ auto LookupAssumptionCache = [&FAM](Function &F) -> AssumptionCache * {
+ return FAM.getCachedResult<AssumptionAnalysis>(F);
+ };
+ if (!LoopExtractor(NumLoops, LookupDomTree, LookupLoopInfo,
+ LookupAssumptionCache)
+ .runOnModule(M))
+ return PreservedAnalyses::all();
+
+ PreservedAnalyses PA;
+ PA.preserve<LoopAnalysis>();
+ return PA;
+}
diff --git a/contrib/libs/llvm12/lib/Transforms/IPO/LowerTypeTests.cpp b/contrib/libs/llvm12/lib/Transforms/IPO/LowerTypeTests.cpp
index 8bd3036f1f..33e232b0b9 100644
--- a/contrib/libs/llvm12/lib/Transforms/IPO/LowerTypeTests.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/IPO/LowerTypeTests.cpp
@@ -198,7 +198,7 @@ void GlobalLayoutBuilder::addFragment(const std::set<uint64_t> &F) {
// indices from the old fragment in this fragment do not insert any more
// indices.
std::vector<uint64_t> &OldFragment = Fragments[OldFragmentIndex];
- llvm::append_range(Fragment, OldFragment);
+ llvm::append_range(Fragment, OldFragment);
OldFragment.clear();
}
}
@@ -1205,7 +1205,7 @@ void LowerTypeTestsModule::verifyTypeMDNode(GlobalObject *GO, MDNode *Type) {
static const unsigned kX86JumpTableEntrySize = 8;
static const unsigned kARMJumpTableEntrySize = 4;
-static const unsigned kARMBTIJumpTableEntrySize = 8;
+static const unsigned kARMBTIJumpTableEntrySize = 8;
unsigned LowerTypeTestsModule::getJumpTableEntrySize() {
switch (Arch) {
@@ -1214,12 +1214,12 @@ unsigned LowerTypeTestsModule::getJumpTableEntrySize() {
return kX86JumpTableEntrySize;
case Triple::arm:
case Triple::thumb:
- return kARMJumpTableEntrySize;
+ return kARMJumpTableEntrySize;
case Triple::aarch64:
- if (const auto *BTE = mdconst::extract_or_null<ConstantInt>(
- M.getModuleFlag("branch-target-enforcement")))
- if (BTE->getZExtValue())
- return kARMBTIJumpTableEntrySize;
+ if (const auto *BTE = mdconst::extract_or_null<ConstantInt>(
+ M.getModuleFlag("branch-target-enforcement")))
+ if (BTE->getZExtValue())
+ return kARMBTIJumpTableEntrySize;
return kARMJumpTableEntrySize;
default:
report_fatal_error("Unsupported architecture for jump tables");
@@ -1238,14 +1238,14 @@ void LowerTypeTestsModule::createJumpTableEntry(
if (JumpTableArch == Triple::x86 || JumpTableArch == Triple::x86_64) {
AsmOS << "jmp ${" << ArgIndex << ":c}@plt\n";
AsmOS << "int3\nint3\nint3\n";
- } else if (JumpTableArch == Triple::arm) {
- AsmOS << "b $" << ArgIndex << "\n";
- } else if (JumpTableArch == Triple::aarch64) {
- if (const auto *BTE = mdconst::extract_or_null<ConstantInt>(
- Dest->getParent()->getModuleFlag("branch-target-enforcement")))
- if (BTE->getZExtValue())
- AsmOS << "bti c\n";
+ } else if (JumpTableArch == Triple::arm) {
AsmOS << "b $" << ArgIndex << "\n";
+ } else if (JumpTableArch == Triple::aarch64) {
+ if (const auto *BTE = mdconst::extract_or_null<ConstantInt>(
+ Dest->getParent()->getModuleFlag("branch-target-enforcement")))
+ if (BTE->getZExtValue())
+ AsmOS << "bti c\n";
+ AsmOS << "b $" << ArgIndex << "\n";
} else if (JumpTableArch == Triple::thumb) {
AsmOS << "b.w $" << ArgIndex << "\n";
} else {
@@ -1338,7 +1338,7 @@ void LowerTypeTestsModule::replaceWeakDeclarationWithJumpTablePtr(
static bool isThumbFunction(Function *F, Triple::ArchType ModuleArch) {
Attribute TFAttr = F->getFnAttribute("target-features");
- if (TFAttr.isValid()) {
+ if (TFAttr.isValid()) {
SmallVector<StringRef, 6> Features;
TFAttr.getValueAsString().split(Features, ',');
for (StringRef Feature : Features) {
@@ -1406,10 +1406,10 @@ void LowerTypeTestsModule::createJumpTable(
// by Clang for -march=armv7.
F->addFnAttr("target-cpu", "cortex-a8");
}
- if (JumpTableArch == Triple::aarch64) {
- F->addFnAttr("branch-target-enforcement", "false");
- F->addFnAttr("sign-return-address", "none");
- }
+ if (JumpTableArch == Triple::aarch64) {
+ F->addFnAttr("branch-target-enforcement", "false");
+ F->addFnAttr("sign-return-address", "none");
+ }
// Make sure we don't emit .eh_frame for this function.
F->addFnAttr(Attribute::NoUnwind);
@@ -2255,13 +2255,13 @@ bool LowerTypeTestsModule::lower() {
PreservedAnalyses LowerTypeTestsPass::run(Module &M,
ModuleAnalysisManager &AM) {
- bool Changed;
- if (UseCommandLine)
- Changed = LowerTypeTestsModule::runForTesting(M);
- else
- Changed =
- LowerTypeTestsModule(M, ExportSummary, ImportSummary, DropTypeTests)
- .lower();
+ bool Changed;
+ if (UseCommandLine)
+ Changed = LowerTypeTestsModule::runForTesting(M);
+ else
+ Changed =
+ LowerTypeTestsModule(M, ExportSummary, ImportSummary, DropTypeTests)
+ .lower();
if (!Changed)
return PreservedAnalyses::all();
return PreservedAnalyses::none();
diff --git a/contrib/libs/llvm12/lib/Transforms/IPO/MergeFunctions.cpp b/contrib/libs/llvm12/lib/Transforms/IPO/MergeFunctions.cpp
index ec5d86b72a..aa4de3d122 100644
--- a/contrib/libs/llvm12/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/IPO/MergeFunctions.cpp
@@ -725,10 +725,10 @@ void MergeFunctions::writeThunk(Function *F, Function *G) {
if (MergeFunctionsPDI) {
DISubprogram *DIS = G->getSubprogram();
if (DIS) {
- DebugLoc CIDbgLoc =
- DILocation::get(DIS->getContext(), DIS->getScopeLine(), 0, DIS);
- DebugLoc RIDbgLoc =
- DILocation::get(DIS->getContext(), DIS->getScopeLine(), 0, DIS);
+ DebugLoc CIDbgLoc =
+ DILocation::get(DIS->getContext(), DIS->getScopeLine(), 0, DIS);
+ DebugLoc RIDbgLoc =
+ DILocation::get(DIS->getContext(), DIS->getScopeLine(), 0, DIS);
CI->setDebugLoc(CIDbgLoc);
RI->setDebugLoc(RIDbgLoc);
} else {
diff --git a/contrib/libs/llvm12/lib/Transforms/IPO/OpenMPOpt.cpp b/contrib/libs/llvm12/lib/Transforms/IPO/OpenMPOpt.cpp
index a5ba6edb9a..bc15d5e0c0 100644
--- a/contrib/libs/llvm12/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -19,16 +19,16 @@
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
-#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Frontend/OpenMP/OMPConstants.h"
#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/Attributor.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/CallGraphUpdater.h"
-#include "llvm/Transforms/Utils/CodeExtractor.h"
+#include "llvm/Transforms/Utils/CodeExtractor.h"
using namespace llvm;
using namespace omp;
@@ -40,22 +40,22 @@ static cl::opt<bool> DisableOpenMPOptimizations(
cl::desc("Disable OpenMP specific optimizations."), cl::Hidden,
cl::init(false));
-static cl::opt<bool> EnableParallelRegionMerging(
- "openmp-opt-enable-merging", cl::ZeroOrMore,
- cl::desc("Enable the OpenMP region merging optimization."), cl::Hidden,
- cl::init(false));
-
+static cl::opt<bool> EnableParallelRegionMerging(
+ "openmp-opt-enable-merging", cl::ZeroOrMore,
+ cl::desc("Enable the OpenMP region merging optimization."), cl::Hidden,
+ cl::init(false));
+
static cl::opt<bool> PrintICVValues("openmp-print-icv-values", cl::init(false),
cl::Hidden);
static cl::opt<bool> PrintOpenMPKernels("openmp-print-gpu-kernels",
cl::init(false), cl::Hidden);
-static cl::opt<bool> HideMemoryTransferLatency(
- "openmp-hide-memory-transfer-latency",
- cl::desc("[WIP] Tries to hide the latency of host to device memory"
- " transfers"),
- cl::Hidden, cl::init(false));
-
+static cl::opt<bool> HideMemoryTransferLatency(
+ "openmp-hide-memory-transfer-latency",
+ cl::desc("[WIP] Tries to hide the latency of host to device memory"
+ " transfers"),
+ cl::Hidden, cl::init(false));
+
STATISTIC(NumOpenMPRuntimeCallsDeduplicated,
"Number of OpenMP runtime calls deduplicated");
STATISTIC(NumOpenMPParallelRegionsDeleted,
@@ -69,8 +69,8 @@ STATISTIC(NumOpenMPTargetRegionKernels,
STATISTIC(
NumOpenMPParallelRegionsReplacedInGPUStateMachine,
"Number of OpenMP parallel regions replaced with ID in GPU state machines");
-STATISTIC(NumOpenMPParallelRegionsMerged,
- "Number of OpenMP parallel regions merged");
+STATISTIC(NumOpenMPParallelRegionsMerged,
+ "Number of OpenMP parallel regions merged");
#if !defined(NDEBUG)
static constexpr auto TAG = "[" DEBUG_TYPE "]";
@@ -318,17 +318,17 @@ struct OMPInformationCache : public InformationCache {
return NumUses;
}
- // Helper function to recollect uses of a runtime function.
- void recollectUsesForFunction(RuntimeFunction RTF) {
- auto &RFI = RFIs[RTF];
- RFI.clearUsesMap();
- collectUses(RFI, /*CollectStats*/ false);
- }
-
+ // Helper function to recollect uses of a runtime function.
+ void recollectUsesForFunction(RuntimeFunction RTF) {
+ auto &RFI = RFIs[RTF];
+ RFI.clearUsesMap();
+ collectUses(RFI, /*CollectStats*/ false);
+ }
+
// Helper function to recollect uses of all runtime functions.
void recollectUses() {
- for (int Idx = 0; Idx < RFIs.size(); ++Idx)
- recollectUsesForFunction(static_cast<RuntimeFunction>(Idx));
+ for (int Idx = 0; Idx < RFIs.size(); ++Idx)
+ recollectUsesForFunction(static_cast<RuntimeFunction>(Idx));
}
/// Helper to initialize all runtime function information for those defined
@@ -392,91 +392,91 @@ struct OMPInformationCache : public InformationCache {
SmallPtrSetImpl<Kernel> &Kernels;
};
-/// Used to map the values physically (in the IR) stored in an offload
-/// array, to a vector in memory.
-struct OffloadArray {
- /// Physical array (in the IR).
- AllocaInst *Array = nullptr;
- /// Mapped values.
- SmallVector<Value *, 8> StoredValues;
- /// Last stores made in the offload array.
- SmallVector<StoreInst *, 8> LastAccesses;
-
- OffloadArray() = default;
-
- /// Initializes the OffloadArray with the values stored in \p Array before
- /// instruction \p Before is reached. Returns false if the initialization
- /// fails.
- /// This MUST be used immediately after the construction of the object.
- bool initialize(AllocaInst &Array, Instruction &Before) {
- if (!Array.getAllocatedType()->isArrayTy())
- return false;
-
- if (!getValues(Array, Before))
- return false;
-
- this->Array = &Array;
- return true;
- }
-
- static const unsigned DeviceIDArgNum = 1;
- static const unsigned BasePtrsArgNum = 3;
- static const unsigned PtrsArgNum = 4;
- static const unsigned SizesArgNum = 5;
-
-private:
- /// Traverses the BasicBlock where \p Array is, collecting the stores made to
- /// \p Array, leaving StoredValues with the values stored before the
- /// instruction \p Before is reached.
- bool getValues(AllocaInst &Array, Instruction &Before) {
- // Initialize container.
- const uint64_t NumValues = Array.getAllocatedType()->getArrayNumElements();
- StoredValues.assign(NumValues, nullptr);
- LastAccesses.assign(NumValues, nullptr);
-
- // TODO: This assumes the instruction \p Before is in the same
- // BasicBlock as Array. Make it general, for any control flow graph.
- BasicBlock *BB = Array.getParent();
- if (BB != Before.getParent())
- return false;
-
- const DataLayout &DL = Array.getModule()->getDataLayout();
- const unsigned int PointerSize = DL.getPointerSize();
-
- for (Instruction &I : *BB) {
- if (&I == &Before)
- break;
-
- if (!isa<StoreInst>(&I))
- continue;
-
- auto *S = cast<StoreInst>(&I);
- int64_t Offset = -1;
- auto *Dst =
- GetPointerBaseWithConstantOffset(S->getPointerOperand(), Offset, DL);
- if (Dst == &Array) {
- int64_t Idx = Offset / PointerSize;
- StoredValues[Idx] = getUnderlyingObject(S->getValueOperand());
- LastAccesses[Idx] = S;
- }
- }
-
- return isFilled();
- }
-
- /// Returns true if all values in StoredValues and
- /// LastAccesses are not nullptrs.
- bool isFilled() {
- const unsigned NumValues = StoredValues.size();
- for (unsigned I = 0; I < NumValues; ++I) {
- if (!StoredValues[I] || !LastAccesses[I])
- return false;
- }
-
- return true;
- }
-};
-
+/// Used to map the values physically (in the IR) stored in an offload
+/// array, to a vector in memory.
+struct OffloadArray {
+ /// Physical array (in the IR).
+ AllocaInst *Array = nullptr;
+ /// Mapped values.
+ SmallVector<Value *, 8> StoredValues;
+ /// Last stores made in the offload array.
+ SmallVector<StoreInst *, 8> LastAccesses;
+
+ OffloadArray() = default;
+
+ /// Initializes the OffloadArray with the values stored in \p Array before
+ /// instruction \p Before is reached. Returns false if the initialization
+ /// fails.
+ /// This MUST be used immediately after the construction of the object.
+ bool initialize(AllocaInst &Array, Instruction &Before) {
+ if (!Array.getAllocatedType()->isArrayTy())
+ return false;
+
+ if (!getValues(Array, Before))
+ return false;
+
+ this->Array = &Array;
+ return true;
+ }
+
+ static const unsigned DeviceIDArgNum = 1;
+ static const unsigned BasePtrsArgNum = 3;
+ static const unsigned PtrsArgNum = 4;
+ static const unsigned SizesArgNum = 5;
+
+private:
+ /// Traverses the BasicBlock where \p Array is, collecting the stores made to
+ /// \p Array, leaving StoredValues with the values stored before the
+ /// instruction \p Before is reached.
+ bool getValues(AllocaInst &Array, Instruction &Before) {
+ // Initialize container.
+ const uint64_t NumValues = Array.getAllocatedType()->getArrayNumElements();
+ StoredValues.assign(NumValues, nullptr);
+ LastAccesses.assign(NumValues, nullptr);
+
+ // TODO: This assumes the instruction \p Before is in the same
+ // BasicBlock as Array. Make it general, for any control flow graph.
+ BasicBlock *BB = Array.getParent();
+ if (BB != Before.getParent())
+ return false;
+
+ const DataLayout &DL = Array.getModule()->getDataLayout();
+ const unsigned int PointerSize = DL.getPointerSize();
+
+ for (Instruction &I : *BB) {
+ if (&I == &Before)
+ break;
+
+ if (!isa<StoreInst>(&I))
+ continue;
+
+ auto *S = cast<StoreInst>(&I);
+ int64_t Offset = -1;
+ auto *Dst =
+ GetPointerBaseWithConstantOffset(S->getPointerOperand(), Offset, DL);
+ if (Dst == &Array) {
+ int64_t Idx = Offset / PointerSize;
+ StoredValues[Idx] = getUnderlyingObject(S->getValueOperand());
+ LastAccesses[Idx] = S;
+ }
+ }
+
+ return isFilled();
+ }
+
+ /// Returns true if all values in StoredValues and
+ /// LastAccesses are not nullptrs.
+ bool isFilled() {
+ const unsigned NumValues = StoredValues.size();
+ for (unsigned I = 0; I < NumValues; ++I) {
+ if (!StoredValues[I] || !LastAccesses[I])
+ return false;
+ }
+
+ return true;
+ }
+};
+
struct OpenMPOpt {
using OptimizationRemarkGetter =
@@ -488,12 +488,12 @@ struct OpenMPOpt {
: M(*(*SCC.begin())->getParent()), SCC(SCC), CGUpdater(CGUpdater),
OREGetter(OREGetter), OMPInfoCache(OMPInfoCache), A(A) {}
- /// Check if any remarks are enabled for openmp-opt
- bool remarksEnabled() {
- auto &Ctx = M.getContext();
- return Ctx.getDiagHandlerPtr()->isAnyRemarkEnabled(DEBUG_TYPE);
- }
-
+ /// Check if any remarks are enabled for openmp-opt
+ bool remarksEnabled() {
+ auto &Ctx = M.getContext();
+ return Ctx.getDiagHandlerPtr()->isAnyRemarkEnabled(DEBUG_TYPE);
+ }
+
/// Run all OpenMP optimizations on the underlying SCC/ModuleSlice.
bool run() {
if (SCC.empty())
@@ -517,18 +517,18 @@ struct OpenMPOpt {
// Recollect uses, in case Attributor deleted any.
OMPInfoCache.recollectUses();
- Changed |= deleteParallelRegions();
- if (HideMemoryTransferLatency)
- Changed |= hideMemTransfersLatency();
- if (remarksEnabled())
- analysisGlobalization();
+ Changed |= deleteParallelRegions();
+ if (HideMemoryTransferLatency)
+ Changed |= hideMemTransfersLatency();
+ if (remarksEnabled())
+ analysisGlobalization();
Changed |= deduplicateRuntimeCalls();
- if (EnableParallelRegionMerging) {
- if (mergeParallelRegions()) {
- deduplicateRuntimeCalls();
- Changed = true;
- }
- }
+ if (EnableParallelRegionMerging) {
+ if (mergeParallelRegions()) {
+ deduplicateRuntimeCalls();
+ Changed = true;
+ }
+ }
return Changed;
}
@@ -536,8 +536,8 @@ struct OpenMPOpt {
/// Print initial ICV values for testing.
/// FIXME: This should be done from the Attributor once it is added.
void printICVs() const {
- InternalControlVar ICVs[] = {ICV_nthreads, ICV_active_levels, ICV_cancel,
- ICV_proc_bind};
+ InternalControlVar ICVs[] = {ICV_nthreads, ICV_active_levels, ICV_cancel,
+ ICV_proc_bind};
for (Function *F : OMPInfoCache.ModuleSlice) {
for (auto ICV : ICVs) {
@@ -593,394 +593,394 @@ struct OpenMPOpt {
}
private:
- /// Merge parallel regions when it is safe.
- bool mergeParallelRegions() {
- const unsigned CallbackCalleeOperand = 2;
- const unsigned CallbackFirstArgOperand = 3;
- using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
-
- // Check if there are any __kmpc_fork_call calls to merge.
- OMPInformationCache::RuntimeFunctionInfo &RFI =
- OMPInfoCache.RFIs[OMPRTL___kmpc_fork_call];
-
- if (!RFI.Declaration)
- return false;
-
- // Unmergable calls that prevent merging a parallel region.
- OMPInformationCache::RuntimeFunctionInfo UnmergableCallsInfo[] = {
- OMPInfoCache.RFIs[OMPRTL___kmpc_push_proc_bind],
- OMPInfoCache.RFIs[OMPRTL___kmpc_push_num_threads],
- };
-
- bool Changed = false;
- LoopInfo *LI = nullptr;
- DominatorTree *DT = nullptr;
-
- SmallDenseMap<BasicBlock *, SmallPtrSet<Instruction *, 4>> BB2PRMap;
-
- BasicBlock *StartBB = nullptr, *EndBB = nullptr;
- auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
- BasicBlock &ContinuationIP) {
- BasicBlock *CGStartBB = CodeGenIP.getBlock();
- BasicBlock *CGEndBB =
- SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI);
- assert(StartBB != nullptr && "StartBB should not be null");
- CGStartBB->getTerminator()->setSuccessor(0, StartBB);
- assert(EndBB != nullptr && "EndBB should not be null");
- EndBB->getTerminator()->setSuccessor(0, CGEndBB);
- };
-
- auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &,
- Value &Inner, Value *&ReplacementValue) -> InsertPointTy {
- ReplacementValue = &Inner;
- return CodeGenIP;
- };
-
- auto FiniCB = [&](InsertPointTy CodeGenIP) {};
-
- /// Create a sequential execution region within a merged parallel region,
- /// encapsulated in a master construct with a barrier for synchronization.
- auto CreateSequentialRegion = [&](Function *OuterFn,
- BasicBlock *OuterPredBB,
- Instruction *SeqStartI,
- Instruction *SeqEndI) {
- // Isolate the instructions of the sequential region to a separate
- // block.
- BasicBlock *ParentBB = SeqStartI->getParent();
- BasicBlock *SeqEndBB =
- SplitBlock(ParentBB, SeqEndI->getNextNode(), DT, LI);
- BasicBlock *SeqAfterBB =
- SplitBlock(SeqEndBB, &*SeqEndBB->getFirstInsertionPt(), DT, LI);
- BasicBlock *SeqStartBB =
- SplitBlock(ParentBB, SeqStartI, DT, LI, nullptr, "seq.par.merged");
-
- assert(ParentBB->getUniqueSuccessor() == SeqStartBB &&
- "Expected a different CFG");
- const DebugLoc DL = ParentBB->getTerminator()->getDebugLoc();
- ParentBB->getTerminator()->eraseFromParent();
-
- auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
- BasicBlock &ContinuationIP) {
- BasicBlock *CGStartBB = CodeGenIP.getBlock();
- BasicBlock *CGEndBB =
- SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI);
- assert(SeqStartBB != nullptr && "SeqStartBB should not be null");
- CGStartBB->getTerminator()->setSuccessor(0, SeqStartBB);
- assert(SeqEndBB != nullptr && "SeqEndBB should not be null");
- SeqEndBB->getTerminator()->setSuccessor(0, CGEndBB);
- };
- auto FiniCB = [&](InsertPointTy CodeGenIP) {};
-
- // Find outputs from the sequential region to outside users and
- // broadcast their values to them.
- for (Instruction &I : *SeqStartBB) {
- SmallPtrSet<Instruction *, 4> OutsideUsers;
- for (User *Usr : I.users()) {
- Instruction &UsrI = *cast<Instruction>(Usr);
- // Ignore outputs to LT intrinsics, code extraction for the merged
- // parallel region will fix them.
- if (UsrI.isLifetimeStartOrEnd())
- continue;
-
- if (UsrI.getParent() != SeqStartBB)
- OutsideUsers.insert(&UsrI);
- }
-
- if (OutsideUsers.empty())
- continue;
-
- // Emit an alloca in the outer region to store the broadcasted
- // value.
- const DataLayout &DL = M.getDataLayout();
- AllocaInst *AllocaI = new AllocaInst(
- I.getType(), DL.getAllocaAddrSpace(), nullptr,
- I.getName() + ".seq.output.alloc", &OuterFn->front().front());
-
- // Emit a store instruction in the sequential BB to update the
- // value.
- new StoreInst(&I, AllocaI, SeqStartBB->getTerminator());
-
- // Emit a load instruction and replace the use of the output value
- // with it.
- for (Instruction *UsrI : OutsideUsers) {
- LoadInst *LoadI = new LoadInst(I.getType(), AllocaI,
- I.getName() + ".seq.output.load", UsrI);
- UsrI->replaceUsesOfWith(&I, LoadI);
- }
- }
-
- OpenMPIRBuilder::LocationDescription Loc(
- InsertPointTy(ParentBB, ParentBB->end()), DL);
- InsertPointTy SeqAfterIP =
- OMPInfoCache.OMPBuilder.createMaster(Loc, BodyGenCB, FiniCB);
-
- OMPInfoCache.OMPBuilder.createBarrier(SeqAfterIP, OMPD_parallel);
-
- BranchInst::Create(SeqAfterBB, SeqAfterIP.getBlock());
-
- LLVM_DEBUG(dbgs() << TAG << "After sequential inlining " << *OuterFn
- << "\n");
- };
-
- // Helper to merge the __kmpc_fork_call calls in MergableCIs. They are all
- // contained in BB and only separated by instructions that can be
- // redundantly executed in parallel. The block BB is split before the first
- // call (in MergableCIs) and after the last so the entire region we merge
- // into a single parallel region is contained in a single basic block
- // without any other instructions. We use the OpenMPIRBuilder to outline
- // that block and call the resulting function via __kmpc_fork_call.
- auto Merge = [&](SmallVectorImpl<CallInst *> &MergableCIs, BasicBlock *BB) {
- // TODO: Change the interface to allow single CIs expanded, e.g, to
- // include an outer loop.
- assert(MergableCIs.size() > 1 && "Assumed multiple mergable CIs");
-
- auto Remark = [&](OptimizationRemark OR) {
- OR << "Parallel region at "
- << ore::NV("OpenMPParallelMergeFront",
- MergableCIs.front()->getDebugLoc())
- << " merged with parallel regions at ";
- for (auto *CI : llvm::drop_begin(MergableCIs)) {
- OR << ore::NV("OpenMPParallelMerge", CI->getDebugLoc());
- if (CI != MergableCIs.back())
- OR << ", ";
- }
- return OR;
- };
-
- emitRemark<OptimizationRemark>(MergableCIs.front(),
- "OpenMPParallelRegionMerging", Remark);
-
- Function *OriginalFn = BB->getParent();
- LLVM_DEBUG(dbgs() << TAG << "Merge " << MergableCIs.size()
- << " parallel regions in " << OriginalFn->getName()
- << "\n");
-
- // Isolate the calls to merge in a separate block.
- EndBB = SplitBlock(BB, MergableCIs.back()->getNextNode(), DT, LI);
- BasicBlock *AfterBB =
- SplitBlock(EndBB, &*EndBB->getFirstInsertionPt(), DT, LI);
- StartBB = SplitBlock(BB, MergableCIs.front(), DT, LI, nullptr,
- "omp.par.merged");
-
- assert(BB->getUniqueSuccessor() == StartBB && "Expected a different CFG");
- const DebugLoc DL = BB->getTerminator()->getDebugLoc();
- BB->getTerminator()->eraseFromParent();
-
- // Create sequential regions for sequential instructions that are
- // in-between mergable parallel regions.
- for (auto *It = MergableCIs.begin(), *End = MergableCIs.end() - 1;
- It != End; ++It) {
- Instruction *ForkCI = *It;
- Instruction *NextForkCI = *(It + 1);
-
- // Continue if there are not in-between instructions.
- if (ForkCI->getNextNode() == NextForkCI)
- continue;
-
- CreateSequentialRegion(OriginalFn, BB, ForkCI->getNextNode(),
- NextForkCI->getPrevNode());
- }
-
- OpenMPIRBuilder::LocationDescription Loc(InsertPointTy(BB, BB->end()),
- DL);
- IRBuilder<>::InsertPoint AllocaIP(
- &OriginalFn->getEntryBlock(),
- OriginalFn->getEntryBlock().getFirstInsertionPt());
- // Create the merged parallel region with default proc binding, to
- // avoid overriding binding settings, and without explicit cancellation.
- InsertPointTy AfterIP = OMPInfoCache.OMPBuilder.createParallel(
- Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr, nullptr,
- OMP_PROC_BIND_default, /* IsCancellable */ false);
- BranchInst::Create(AfterBB, AfterIP.getBlock());
-
- // Perform the actual outlining.
- OMPInfoCache.OMPBuilder.finalize(/* AllowExtractorSinking */ true);
-
- Function *OutlinedFn = MergableCIs.front()->getCaller();
-
- // Replace the __kmpc_fork_call calls with direct calls to the outlined
- // callbacks.
- SmallVector<Value *, 8> Args;
- for (auto *CI : MergableCIs) {
- Value *Callee =
- CI->getArgOperand(CallbackCalleeOperand)->stripPointerCasts();
- FunctionType *FT =
- cast<FunctionType>(Callee->getType()->getPointerElementType());
- Args.clear();
- Args.push_back(OutlinedFn->getArg(0));
- Args.push_back(OutlinedFn->getArg(1));
- for (unsigned U = CallbackFirstArgOperand, E = CI->getNumArgOperands();
- U < E; ++U)
- Args.push_back(CI->getArgOperand(U));
-
- CallInst *NewCI = CallInst::Create(FT, Callee, Args, "", CI);
- if (CI->getDebugLoc())
- NewCI->setDebugLoc(CI->getDebugLoc());
-
- // Forward parameter attributes from the callback to the callee.
- for (unsigned U = CallbackFirstArgOperand, E = CI->getNumArgOperands();
- U < E; ++U)
- for (const Attribute &A : CI->getAttributes().getParamAttributes(U))
- NewCI->addParamAttr(
- U - (CallbackFirstArgOperand - CallbackCalleeOperand), A);
-
- // Emit an explicit barrier to replace the implicit fork-join barrier.
- if (CI != MergableCIs.back()) {
- // TODO: Remove barrier if the merged parallel region includes the
- // 'nowait' clause.
- OMPInfoCache.OMPBuilder.createBarrier(
- InsertPointTy(NewCI->getParent(),
- NewCI->getNextNode()->getIterator()),
- OMPD_parallel);
- }
-
- auto Remark = [&](OptimizationRemark OR) {
- return OR << "Parallel region at "
- << ore::NV("OpenMPParallelMerge", CI->getDebugLoc())
- << " merged with "
- << ore::NV("OpenMPParallelMergeFront",
- MergableCIs.front()->getDebugLoc());
- };
- if (CI != MergableCIs.front())
- emitRemark<OptimizationRemark>(CI, "OpenMPParallelRegionMerging",
- Remark);
-
- CI->eraseFromParent();
- }
-
- assert(OutlinedFn != OriginalFn && "Outlining failed");
- CGUpdater.registerOutlinedFunction(*OriginalFn, *OutlinedFn);
- CGUpdater.reanalyzeFunction(*OriginalFn);
-
- NumOpenMPParallelRegionsMerged += MergableCIs.size();
-
- return true;
- };
-
- // Helper function that identifes sequences of
- // __kmpc_fork_call uses in a basic block.
- auto DetectPRsCB = [&](Use &U, Function &F) {
- CallInst *CI = getCallIfRegularCall(U, &RFI);
- BB2PRMap[CI->getParent()].insert(CI);
-
- return false;
- };
-
- BB2PRMap.clear();
- RFI.foreachUse(SCC, DetectPRsCB);
- SmallVector<SmallVector<CallInst *, 4>, 4> MergableCIsVector;
- // Find mergable parallel regions within a basic block that are
- // safe to merge, that is any in-between instructions can safely
- // execute in parallel after merging.
- // TODO: support merging across basic-blocks.
- for (auto &It : BB2PRMap) {
- auto &CIs = It.getSecond();
- if (CIs.size() < 2)
- continue;
-
- BasicBlock *BB = It.getFirst();
- SmallVector<CallInst *, 4> MergableCIs;
-
- /// Returns true if the instruction is mergable, false otherwise.
- /// A terminator instruction is unmergable by definition since merging
- /// works within a BB. Instructions before the mergable region are
- /// mergable if they are not calls to OpenMP runtime functions that may
- /// set different execution parameters for subsequent parallel regions.
- /// Instructions in-between parallel regions are mergable if they are not
- /// calls to any non-intrinsic function since that may call a non-mergable
- /// OpenMP runtime function.
- auto IsMergable = [&](Instruction &I, bool IsBeforeMergableRegion) {
- // We do not merge across BBs, hence return false (unmergable) if the
- // instruction is a terminator.
- if (I.isTerminator())
- return false;
-
- if (!isa<CallInst>(&I))
- return true;
-
- CallInst *CI = cast<CallInst>(&I);
- if (IsBeforeMergableRegion) {
- Function *CalledFunction = CI->getCalledFunction();
- if (!CalledFunction)
- return false;
- // Return false (unmergable) if the call before the parallel
- // region calls an explicit affinity (proc_bind) or number of
- // threads (num_threads) compiler-generated function. Those settings
- // may be incompatible with following parallel regions.
- // TODO: ICV tracking to detect compatibility.
- for (const auto &RFI : UnmergableCallsInfo) {
- if (CalledFunction == RFI.Declaration)
- return false;
- }
- } else {
- // Return false (unmergable) if there is a call instruction
- // in-between parallel regions when it is not an intrinsic. It
- // may call an unmergable OpenMP runtime function in its callpath.
- // TODO: Keep track of possible OpenMP calls in the callpath.
- if (!isa<IntrinsicInst>(CI))
- return false;
- }
-
- return true;
- };
- // Find maximal number of parallel region CIs that are safe to merge.
- for (auto It = BB->begin(), End = BB->end(); It != End;) {
- Instruction &I = *It;
- ++It;
-
- if (CIs.count(&I)) {
- MergableCIs.push_back(cast<CallInst>(&I));
- continue;
- }
-
- // Continue expanding if the instruction is mergable.
- if (IsMergable(I, MergableCIs.empty()))
- continue;
-
- // Forward the instruction iterator to skip the next parallel region
- // since there is an unmergable instruction which can affect it.
- for (; It != End; ++It) {
- Instruction &SkipI = *It;
- if (CIs.count(&SkipI)) {
- LLVM_DEBUG(dbgs() << TAG << "Skip parallel region " << SkipI
- << " due to " << I << "\n");
- ++It;
- break;
- }
- }
-
- // Store mergable regions found.
- if (MergableCIs.size() > 1) {
- MergableCIsVector.push_back(MergableCIs);
- LLVM_DEBUG(dbgs() << TAG << "Found " << MergableCIs.size()
- << " parallel regions in block " << BB->getName()
- << " of function " << BB->getParent()->getName()
- << "\n";);
- }
-
- MergableCIs.clear();
- }
-
- if (!MergableCIsVector.empty()) {
- Changed = true;
-
- for (auto &MergableCIs : MergableCIsVector)
- Merge(MergableCIs, BB);
- }
- }
-
- if (Changed) {
- /// Re-collect use for fork calls, emitted barrier calls, and
- /// any emitted master/end_master calls.
- OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_fork_call);
- OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_barrier);
- OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_master);
- OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_end_master);
- }
-
- return Changed;
- }
-
+ /// Merge parallel regions when it is safe.
+ bool mergeParallelRegions() {
+ const unsigned CallbackCalleeOperand = 2;
+ const unsigned CallbackFirstArgOperand = 3;
+ using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
+
+ // Check if there are any __kmpc_fork_call calls to merge.
+ OMPInformationCache::RuntimeFunctionInfo &RFI =
+ OMPInfoCache.RFIs[OMPRTL___kmpc_fork_call];
+
+ if (!RFI.Declaration)
+ return false;
+
+ // Unmergable calls that prevent merging a parallel region.
+ OMPInformationCache::RuntimeFunctionInfo UnmergableCallsInfo[] = {
+ OMPInfoCache.RFIs[OMPRTL___kmpc_push_proc_bind],
+ OMPInfoCache.RFIs[OMPRTL___kmpc_push_num_threads],
+ };
+
+ bool Changed = false;
+ LoopInfo *LI = nullptr;
+ DominatorTree *DT = nullptr;
+
+ SmallDenseMap<BasicBlock *, SmallPtrSet<Instruction *, 4>> BB2PRMap;
+
+ BasicBlock *StartBB = nullptr, *EndBB = nullptr;
+ auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
+ BasicBlock &ContinuationIP) {
+ BasicBlock *CGStartBB = CodeGenIP.getBlock();
+ BasicBlock *CGEndBB =
+ SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI);
+ assert(StartBB != nullptr && "StartBB should not be null");
+ CGStartBB->getTerminator()->setSuccessor(0, StartBB);
+ assert(EndBB != nullptr && "EndBB should not be null");
+ EndBB->getTerminator()->setSuccessor(0, CGEndBB);
+ };
+
+ auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &,
+ Value &Inner, Value *&ReplacementValue) -> InsertPointTy {
+ ReplacementValue = &Inner;
+ return CodeGenIP;
+ };
+
+ auto FiniCB = [&](InsertPointTy CodeGenIP) {};
+
+ /// Create a sequential execution region within a merged parallel region,
+ /// encapsulated in a master construct with a barrier for synchronization.
+ auto CreateSequentialRegion = [&](Function *OuterFn,
+ BasicBlock *OuterPredBB,
+ Instruction *SeqStartI,
+ Instruction *SeqEndI) {
+ // Isolate the instructions of the sequential region to a separate
+ // block.
+ BasicBlock *ParentBB = SeqStartI->getParent();
+ BasicBlock *SeqEndBB =
+ SplitBlock(ParentBB, SeqEndI->getNextNode(), DT, LI);
+ BasicBlock *SeqAfterBB =
+ SplitBlock(SeqEndBB, &*SeqEndBB->getFirstInsertionPt(), DT, LI);
+ BasicBlock *SeqStartBB =
+ SplitBlock(ParentBB, SeqStartI, DT, LI, nullptr, "seq.par.merged");
+
+ assert(ParentBB->getUniqueSuccessor() == SeqStartBB &&
+ "Expected a different CFG");
+ const DebugLoc DL = ParentBB->getTerminator()->getDebugLoc();
+ ParentBB->getTerminator()->eraseFromParent();
+
+ auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
+ BasicBlock &ContinuationIP) {
+ BasicBlock *CGStartBB = CodeGenIP.getBlock();
+ BasicBlock *CGEndBB =
+ SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI);
+ assert(SeqStartBB != nullptr && "SeqStartBB should not be null");
+ CGStartBB->getTerminator()->setSuccessor(0, SeqStartBB);
+ assert(SeqEndBB != nullptr && "SeqEndBB should not be null");
+ SeqEndBB->getTerminator()->setSuccessor(0, CGEndBB);
+ };
+ auto FiniCB = [&](InsertPointTy CodeGenIP) {};
+
+ // Find outputs from the sequential region to outside users and
+ // broadcast their values to them.
+ for (Instruction &I : *SeqStartBB) {
+ SmallPtrSet<Instruction *, 4> OutsideUsers;
+ for (User *Usr : I.users()) {
+ Instruction &UsrI = *cast<Instruction>(Usr);
+ // Ignore outputs to LT intrinsics, code extraction for the merged
+ // parallel region will fix them.
+ if (UsrI.isLifetimeStartOrEnd())
+ continue;
+
+ if (UsrI.getParent() != SeqStartBB)
+ OutsideUsers.insert(&UsrI);
+ }
+
+ if (OutsideUsers.empty())
+ continue;
+
+ // Emit an alloca in the outer region to store the broadcasted
+ // value.
+ const DataLayout &DL = M.getDataLayout();
+ AllocaInst *AllocaI = new AllocaInst(
+ I.getType(), DL.getAllocaAddrSpace(), nullptr,
+ I.getName() + ".seq.output.alloc", &OuterFn->front().front());
+
+ // Emit a store instruction in the sequential BB to update the
+ // value.
+ new StoreInst(&I, AllocaI, SeqStartBB->getTerminator());
+
+ // Emit a load instruction and replace the use of the output value
+ // with it.
+ for (Instruction *UsrI : OutsideUsers) {
+ LoadInst *LoadI = new LoadInst(I.getType(), AllocaI,
+ I.getName() + ".seq.output.load", UsrI);
+ UsrI->replaceUsesOfWith(&I, LoadI);
+ }
+ }
+
+ OpenMPIRBuilder::LocationDescription Loc(
+ InsertPointTy(ParentBB, ParentBB->end()), DL);
+ InsertPointTy SeqAfterIP =
+ OMPInfoCache.OMPBuilder.createMaster(Loc, BodyGenCB, FiniCB);
+
+ OMPInfoCache.OMPBuilder.createBarrier(SeqAfterIP, OMPD_parallel);
+
+ BranchInst::Create(SeqAfterBB, SeqAfterIP.getBlock());
+
+ LLVM_DEBUG(dbgs() << TAG << "After sequential inlining " << *OuterFn
+ << "\n");
+ };
+
+ // Helper to merge the __kmpc_fork_call calls in MergableCIs. They are all
+ // contained in BB and only separated by instructions that can be
+ // redundantly executed in parallel. The block BB is split before the first
+ // call (in MergableCIs) and after the last so the entire region we merge
+ // into a single parallel region is contained in a single basic block
+ // without any other instructions. We use the OpenMPIRBuilder to outline
+ // that block and call the resulting function via __kmpc_fork_call.
+ auto Merge = [&](SmallVectorImpl<CallInst *> &MergableCIs, BasicBlock *BB) {
+ // TODO: Change the interface to allow single CIs expanded, e.g, to
+ // include an outer loop.
+ assert(MergableCIs.size() > 1 && "Assumed multiple mergable CIs");
+
+ auto Remark = [&](OptimizationRemark OR) {
+ OR << "Parallel region at "
+ << ore::NV("OpenMPParallelMergeFront",
+ MergableCIs.front()->getDebugLoc())
+ << " merged with parallel regions at ";
+ for (auto *CI : llvm::drop_begin(MergableCIs)) {
+ OR << ore::NV("OpenMPParallelMerge", CI->getDebugLoc());
+ if (CI != MergableCIs.back())
+ OR << ", ";
+ }
+ return OR;
+ };
+
+ emitRemark<OptimizationRemark>(MergableCIs.front(),
+ "OpenMPParallelRegionMerging", Remark);
+
+ Function *OriginalFn = BB->getParent();
+ LLVM_DEBUG(dbgs() << TAG << "Merge " << MergableCIs.size()
+ << " parallel regions in " << OriginalFn->getName()
+ << "\n");
+
+ // Isolate the calls to merge in a separate block.
+ EndBB = SplitBlock(BB, MergableCIs.back()->getNextNode(), DT, LI);
+ BasicBlock *AfterBB =
+ SplitBlock(EndBB, &*EndBB->getFirstInsertionPt(), DT, LI);
+ StartBB = SplitBlock(BB, MergableCIs.front(), DT, LI, nullptr,
+ "omp.par.merged");
+
+ assert(BB->getUniqueSuccessor() == StartBB && "Expected a different CFG");
+ const DebugLoc DL = BB->getTerminator()->getDebugLoc();
+ BB->getTerminator()->eraseFromParent();
+
+ // Create sequential regions for sequential instructions that are
+ // in-between mergable parallel regions.
+ for (auto *It = MergableCIs.begin(), *End = MergableCIs.end() - 1;
+ It != End; ++It) {
+ Instruction *ForkCI = *It;
+ Instruction *NextForkCI = *(It + 1);
+
+ // Continue if there are not in-between instructions.
+ if (ForkCI->getNextNode() == NextForkCI)
+ continue;
+
+ CreateSequentialRegion(OriginalFn, BB, ForkCI->getNextNode(),
+ NextForkCI->getPrevNode());
+ }
+
+ OpenMPIRBuilder::LocationDescription Loc(InsertPointTy(BB, BB->end()),
+ DL);
+ IRBuilder<>::InsertPoint AllocaIP(
+ &OriginalFn->getEntryBlock(),
+ OriginalFn->getEntryBlock().getFirstInsertionPt());
+ // Create the merged parallel region with default proc binding, to
+ // avoid overriding binding settings, and without explicit cancellation.
+ InsertPointTy AfterIP = OMPInfoCache.OMPBuilder.createParallel(
+ Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr, nullptr,
+ OMP_PROC_BIND_default, /* IsCancellable */ false);
+ BranchInst::Create(AfterBB, AfterIP.getBlock());
+
+ // Perform the actual outlining.
+ OMPInfoCache.OMPBuilder.finalize(/* AllowExtractorSinking */ true);
+
+ Function *OutlinedFn = MergableCIs.front()->getCaller();
+
+ // Replace the __kmpc_fork_call calls with direct calls to the outlined
+ // callbacks.
+ SmallVector<Value *, 8> Args;
+ for (auto *CI : MergableCIs) {
+ Value *Callee =
+ CI->getArgOperand(CallbackCalleeOperand)->stripPointerCasts();
+ FunctionType *FT =
+ cast<FunctionType>(Callee->getType()->getPointerElementType());
+ Args.clear();
+ Args.push_back(OutlinedFn->getArg(0));
+ Args.push_back(OutlinedFn->getArg(1));
+ for (unsigned U = CallbackFirstArgOperand, E = CI->getNumArgOperands();
+ U < E; ++U)
+ Args.push_back(CI->getArgOperand(U));
+
+ CallInst *NewCI = CallInst::Create(FT, Callee, Args, "", CI);
+ if (CI->getDebugLoc())
+ NewCI->setDebugLoc(CI->getDebugLoc());
+
+ // Forward parameter attributes from the callback to the callee.
+ for (unsigned U = CallbackFirstArgOperand, E = CI->getNumArgOperands();
+ U < E; ++U)
+ for (const Attribute &A : CI->getAttributes().getParamAttributes(U))
+ NewCI->addParamAttr(
+ U - (CallbackFirstArgOperand - CallbackCalleeOperand), A);
+
+ // Emit an explicit barrier to replace the implicit fork-join barrier.
+ if (CI != MergableCIs.back()) {
+ // TODO: Remove barrier if the merged parallel region includes the
+ // 'nowait' clause.
+ OMPInfoCache.OMPBuilder.createBarrier(
+ InsertPointTy(NewCI->getParent(),
+ NewCI->getNextNode()->getIterator()),
+ OMPD_parallel);
+ }
+
+ auto Remark = [&](OptimizationRemark OR) {
+ return OR << "Parallel region at "
+ << ore::NV("OpenMPParallelMerge", CI->getDebugLoc())
+ << " merged with "
+ << ore::NV("OpenMPParallelMergeFront",
+ MergableCIs.front()->getDebugLoc());
+ };
+ if (CI != MergableCIs.front())
+ emitRemark<OptimizationRemark>(CI, "OpenMPParallelRegionMerging",
+ Remark);
+
+ CI->eraseFromParent();
+ }
+
+ assert(OutlinedFn != OriginalFn && "Outlining failed");
+ CGUpdater.registerOutlinedFunction(*OriginalFn, *OutlinedFn);
+ CGUpdater.reanalyzeFunction(*OriginalFn);
+
+ NumOpenMPParallelRegionsMerged += MergableCIs.size();
+
+ return true;
+ };
+
+ // Helper function that identifes sequences of
+ // __kmpc_fork_call uses in a basic block.
+ auto DetectPRsCB = [&](Use &U, Function &F) {
+ CallInst *CI = getCallIfRegularCall(U, &RFI);
+ BB2PRMap[CI->getParent()].insert(CI);
+
+ return false;
+ };
+
+ BB2PRMap.clear();
+ RFI.foreachUse(SCC, DetectPRsCB);
+ SmallVector<SmallVector<CallInst *, 4>, 4> MergableCIsVector;
+ // Find mergable parallel regions within a basic block that are
+ // safe to merge, that is any in-between instructions can safely
+ // execute in parallel after merging.
+ // TODO: support merging across basic-blocks.
+ for (auto &It : BB2PRMap) {
+ auto &CIs = It.getSecond();
+ if (CIs.size() < 2)
+ continue;
+
+ BasicBlock *BB = It.getFirst();
+ SmallVector<CallInst *, 4> MergableCIs;
+
+ /// Returns true if the instruction is mergable, false otherwise.
+ /// A terminator instruction is unmergable by definition since merging
+ /// works within a BB. Instructions before the mergable region are
+ /// mergable if they are not calls to OpenMP runtime functions that may
+ /// set different execution parameters for subsequent parallel regions.
+ /// Instructions in-between parallel regions are mergable if they are not
+ /// calls to any non-intrinsic function since that may call a non-mergable
+ /// OpenMP runtime function.
+ auto IsMergable = [&](Instruction &I, bool IsBeforeMergableRegion) {
+ // We do not merge across BBs, hence return false (unmergable) if the
+ // instruction is a terminator.
+ if (I.isTerminator())
+ return false;
+
+ if (!isa<CallInst>(&I))
+ return true;
+
+ CallInst *CI = cast<CallInst>(&I);
+ if (IsBeforeMergableRegion) {
+ Function *CalledFunction = CI->getCalledFunction();
+ if (!CalledFunction)
+ return false;
+ // Return false (unmergable) if the call before the parallel
+ // region calls an explicit affinity (proc_bind) or number of
+ // threads (num_threads) compiler-generated function. Those settings
+ // may be incompatible with following parallel regions.
+ // TODO: ICV tracking to detect compatibility.
+ for (const auto &RFI : UnmergableCallsInfo) {
+ if (CalledFunction == RFI.Declaration)
+ return false;
+ }
+ } else {
+ // Return false (unmergable) if there is a call instruction
+ // in-between parallel regions when it is not an intrinsic. It
+ // may call an unmergable OpenMP runtime function in its callpath.
+ // TODO: Keep track of possible OpenMP calls in the callpath.
+ if (!isa<IntrinsicInst>(CI))
+ return false;
+ }
+
+ return true;
+ };
+ // Find maximal number of parallel region CIs that are safe to merge.
+ for (auto It = BB->begin(), End = BB->end(); It != End;) {
+ Instruction &I = *It;
+ ++It;
+
+ if (CIs.count(&I)) {
+ MergableCIs.push_back(cast<CallInst>(&I));
+ continue;
+ }
+
+ // Continue expanding if the instruction is mergable.
+ if (IsMergable(I, MergableCIs.empty()))
+ continue;
+
+ // Forward the instruction iterator to skip the next parallel region
+ // since there is an unmergable instruction which can affect it.
+ for (; It != End; ++It) {
+ Instruction &SkipI = *It;
+ if (CIs.count(&SkipI)) {
+ LLVM_DEBUG(dbgs() << TAG << "Skip parallel region " << SkipI
+ << " due to " << I << "\n");
+ ++It;
+ break;
+ }
+ }
+
+ // Store mergable regions found.
+ if (MergableCIs.size() > 1) {
+ MergableCIsVector.push_back(MergableCIs);
+ LLVM_DEBUG(dbgs() << TAG << "Found " << MergableCIs.size()
+ << " parallel regions in block " << BB->getName()
+ << " of function " << BB->getParent()->getName()
+ << "\n";);
+ }
+
+ MergableCIs.clear();
+ }
+
+ if (!MergableCIsVector.empty()) {
+ Changed = true;
+
+ for (auto &MergableCIs : MergableCIsVector)
+ Merge(MergableCIs, BB);
+ }
+ }
+
+ if (Changed) {
+ /// Re-collect use for fork calls, emitted barrier calls, and
+ /// any emitted master/end_master calls.
+ OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_fork_call);
+ OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_barrier);
+ OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_master);
+ OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_end_master);
+ }
+
+ return Changed;
+ }
+
/// Try to delete parallel regions if possible.
bool deleteParallelRegions() {
const unsigned CallbackCalleeOperand = 2;
@@ -1058,8 +1058,8 @@ private:
for (Function *F : SCC) {
for (auto DeduplicableRuntimeCallID : DeduplicableRuntimeCallIDs)
- Changed |= deduplicateRuntimeCalls(
- *F, OMPInfoCache.RFIs[DeduplicableRuntimeCallID]);
+ Changed |= deduplicateRuntimeCalls(
+ *F, OMPInfoCache.RFIs[DeduplicableRuntimeCallID]);
// __kmpc_global_thread_num is special as we can replace it with an
// argument in enough cases to make it worth trying.
@@ -1076,223 +1076,223 @@ private:
return Changed;
}
- /// Tries to hide the latency of runtime calls that involve host to
- /// device memory transfers by splitting them into their "issue" and "wait"
- /// versions. The "issue" is moved upwards as much as possible. The "wait" is
- /// moved downards as much as possible. The "issue" issues the memory transfer
- /// asynchronously, returning a handle. The "wait" waits in the returned
- /// handle for the memory transfer to finish.
- bool hideMemTransfersLatency() {
- auto &RFI = OMPInfoCache.RFIs[OMPRTL___tgt_target_data_begin_mapper];
- bool Changed = false;
- auto SplitMemTransfers = [&](Use &U, Function &Decl) {
- auto *RTCall = getCallIfRegularCall(U, &RFI);
- if (!RTCall)
- return false;
-
- OffloadArray OffloadArrays[3];
- if (!getValuesInOffloadArrays(*RTCall, OffloadArrays))
- return false;
-
- LLVM_DEBUG(dumpValuesInOffloadArrays(OffloadArrays));
-
- // TODO: Check if can be moved upwards.
- bool WasSplit = false;
- Instruction *WaitMovementPoint = canBeMovedDownwards(*RTCall);
- if (WaitMovementPoint)
- WasSplit = splitTargetDataBeginRTC(*RTCall, *WaitMovementPoint);
-
- Changed |= WasSplit;
- return WasSplit;
- };
- RFI.foreachUse(SCC, SplitMemTransfers);
-
- return Changed;
- }
-
- void analysisGlobalization() {
- RuntimeFunction GlobalizationRuntimeIDs[] = {
- OMPRTL___kmpc_data_sharing_coalesced_push_stack,
- OMPRTL___kmpc_data_sharing_push_stack};
-
- for (const auto GlobalizationCallID : GlobalizationRuntimeIDs) {
- auto &RFI = OMPInfoCache.RFIs[GlobalizationCallID];
-
- auto CheckGlobalization = [&](Use &U, Function &Decl) {
- if (CallInst *CI = getCallIfRegularCall(U, &RFI)) {
- auto Remark = [&](OptimizationRemarkAnalysis ORA) {
- return ORA
- << "Found thread data sharing on the GPU. "
- << "Expect degraded performance due to data globalization.";
- };
- emitRemark<OptimizationRemarkAnalysis>(CI, "OpenMPGlobalization",
- Remark);
- }
-
- return false;
- };
-
- RFI.foreachUse(SCC, CheckGlobalization);
- }
- }
-
- /// Maps the values stored in the offload arrays passed as arguments to
- /// \p RuntimeCall into the offload arrays in \p OAs.
- bool getValuesInOffloadArrays(CallInst &RuntimeCall,
- MutableArrayRef<OffloadArray> OAs) {
- assert(OAs.size() == 3 && "Need space for three offload arrays!");
-
- // A runtime call that involves memory offloading looks something like:
- // call void @__tgt_target_data_begin_mapper(arg0, arg1,
- // i8** %offload_baseptrs, i8** %offload_ptrs, i64* %offload_sizes,
- // ...)
- // So, the idea is to access the allocas that allocate space for these
- // offload arrays, offload_baseptrs, offload_ptrs, offload_sizes.
- // Therefore:
- // i8** %offload_baseptrs.
- Value *BasePtrsArg =
- RuntimeCall.getArgOperand(OffloadArray::BasePtrsArgNum);
- // i8** %offload_ptrs.
- Value *PtrsArg = RuntimeCall.getArgOperand(OffloadArray::PtrsArgNum);
- // i8** %offload_sizes.
- Value *SizesArg = RuntimeCall.getArgOperand(OffloadArray::SizesArgNum);
-
- // Get values stored in **offload_baseptrs.
- auto *V = getUnderlyingObject(BasePtrsArg);
- if (!isa<AllocaInst>(V))
- return false;
- auto *BasePtrsArray = cast<AllocaInst>(V);
- if (!OAs[0].initialize(*BasePtrsArray, RuntimeCall))
- return false;
-
- // Get values stored in **offload_baseptrs.
- V = getUnderlyingObject(PtrsArg);
- if (!isa<AllocaInst>(V))
- return false;
- auto *PtrsArray = cast<AllocaInst>(V);
- if (!OAs[1].initialize(*PtrsArray, RuntimeCall))
- return false;
-
- // Get values stored in **offload_sizes.
- V = getUnderlyingObject(SizesArg);
- // If it's a [constant] global array don't analyze it.
- if (isa<GlobalValue>(V))
- return isa<Constant>(V);
- if (!isa<AllocaInst>(V))
- return false;
-
- auto *SizesArray = cast<AllocaInst>(V);
- if (!OAs[2].initialize(*SizesArray, RuntimeCall))
- return false;
-
- return true;
- }
-
- /// Prints the values in the OffloadArrays \p OAs using LLVM_DEBUG.
- /// For now this is a way to test that the function getValuesInOffloadArrays
- /// is working properly.
- /// TODO: Move this to a unittest when unittests are available for OpenMPOpt.
- void dumpValuesInOffloadArrays(ArrayRef<OffloadArray> OAs) {
- assert(OAs.size() == 3 && "There are three offload arrays to debug!");
-
- LLVM_DEBUG(dbgs() << TAG << " Successfully got offload values:\n");
- std::string ValuesStr;
- raw_string_ostream Printer(ValuesStr);
- std::string Separator = " --- ";
-
- for (auto *BP : OAs[0].StoredValues) {
- BP->print(Printer);
- Printer << Separator;
- }
- LLVM_DEBUG(dbgs() << "\t\toffload_baseptrs: " << Printer.str() << "\n");
- ValuesStr.clear();
-
- for (auto *P : OAs[1].StoredValues) {
- P->print(Printer);
- Printer << Separator;
- }
- LLVM_DEBUG(dbgs() << "\t\toffload_ptrs: " << Printer.str() << "\n");
- ValuesStr.clear();
-
- for (auto *S : OAs[2].StoredValues) {
- S->print(Printer);
- Printer << Separator;
- }
- LLVM_DEBUG(dbgs() << "\t\toffload_sizes: " << Printer.str() << "\n");
- }
-
- /// Returns the instruction where the "wait" counterpart \p RuntimeCall can be
- /// moved. Returns nullptr if the movement is not possible, or not worth it.
- Instruction *canBeMovedDownwards(CallInst &RuntimeCall) {
- // FIXME: This traverses only the BasicBlock where RuntimeCall is.
- // Make it traverse the CFG.
-
- Instruction *CurrentI = &RuntimeCall;
- bool IsWorthIt = false;
- while ((CurrentI = CurrentI->getNextNode())) {
-
- // TODO: Once we detect the regions to be offloaded we should use the
- // alias analysis manager to check if CurrentI may modify one of
- // the offloaded regions.
- if (CurrentI->mayHaveSideEffects() || CurrentI->mayReadFromMemory()) {
- if (IsWorthIt)
- return CurrentI;
-
- return nullptr;
- }
-
- // FIXME: For now if we move it over anything without side effect
- // is worth it.
- IsWorthIt = true;
- }
-
- // Return end of BasicBlock.
- return RuntimeCall.getParent()->getTerminator();
- }
-
- /// Splits \p RuntimeCall into its "issue" and "wait" counterparts.
- bool splitTargetDataBeginRTC(CallInst &RuntimeCall,
- Instruction &WaitMovementPoint) {
- // Create stack allocated handle (__tgt_async_info) at the beginning of the
- // function. Used for storing information of the async transfer, allowing to
- // wait on it later.
- auto &IRBuilder = OMPInfoCache.OMPBuilder;
- auto *F = RuntimeCall.getCaller();
- Instruction *FirstInst = &(F->getEntryBlock().front());
- AllocaInst *Handle = new AllocaInst(
- IRBuilder.AsyncInfo, F->getAddressSpace(), "handle", FirstInst);
-
- // Add "issue" runtime call declaration:
- // declare %struct.tgt_async_info @__tgt_target_data_begin_issue(i64, i32,
- // i8**, i8**, i64*, i64*)
- FunctionCallee IssueDecl = IRBuilder.getOrCreateRuntimeFunction(
- M, OMPRTL___tgt_target_data_begin_mapper_issue);
-
- // Change RuntimeCall call site for its asynchronous version.
- SmallVector<Value *, 16> Args;
- for (auto &Arg : RuntimeCall.args())
- Args.push_back(Arg.get());
- Args.push_back(Handle);
-
- CallInst *IssueCallsite =
- CallInst::Create(IssueDecl, Args, /*NameStr=*/"", &RuntimeCall);
- RuntimeCall.eraseFromParent();
-
- // Add "wait" runtime call declaration:
- // declare void @__tgt_target_data_begin_wait(i64, %struct.__tgt_async_info)
- FunctionCallee WaitDecl = IRBuilder.getOrCreateRuntimeFunction(
- M, OMPRTL___tgt_target_data_begin_mapper_wait);
-
- Value *WaitParams[2] = {
- IssueCallsite->getArgOperand(
- OffloadArray::DeviceIDArgNum), // device_id.
- Handle // handle to wait on.
- };
- CallInst::Create(WaitDecl, WaitParams, /*NameStr=*/"", &WaitMovementPoint);
-
- return true;
- }
-
+ /// Tries to hide the latency of runtime calls that involve host to
+ /// device memory transfers by splitting them into their "issue" and "wait"
+ /// versions. The "issue" is moved upwards as much as possible. The "wait" is
+ /// moved downards as much as possible. The "issue" issues the memory transfer
+ /// asynchronously, returning a handle. The "wait" waits in the returned
+ /// handle for the memory transfer to finish.
+ bool hideMemTransfersLatency() {
+ auto &RFI = OMPInfoCache.RFIs[OMPRTL___tgt_target_data_begin_mapper];
+ bool Changed = false;
+ auto SplitMemTransfers = [&](Use &U, Function &Decl) {
+ auto *RTCall = getCallIfRegularCall(U, &RFI);
+ if (!RTCall)
+ return false;
+
+ OffloadArray OffloadArrays[3];
+ if (!getValuesInOffloadArrays(*RTCall, OffloadArrays))
+ return false;
+
+ LLVM_DEBUG(dumpValuesInOffloadArrays(OffloadArrays));
+
+ // TODO: Check if can be moved upwards.
+ bool WasSplit = false;
+ Instruction *WaitMovementPoint = canBeMovedDownwards(*RTCall);
+ if (WaitMovementPoint)
+ WasSplit = splitTargetDataBeginRTC(*RTCall, *WaitMovementPoint);
+
+ Changed |= WasSplit;
+ return WasSplit;
+ };
+ RFI.foreachUse(SCC, SplitMemTransfers);
+
+ return Changed;
+ }
+
+ void analysisGlobalization() {
+ RuntimeFunction GlobalizationRuntimeIDs[] = {
+ OMPRTL___kmpc_data_sharing_coalesced_push_stack,
+ OMPRTL___kmpc_data_sharing_push_stack};
+
+ for (const auto GlobalizationCallID : GlobalizationRuntimeIDs) {
+ auto &RFI = OMPInfoCache.RFIs[GlobalizationCallID];
+
+ auto CheckGlobalization = [&](Use &U, Function &Decl) {
+ if (CallInst *CI = getCallIfRegularCall(U, &RFI)) {
+ auto Remark = [&](OptimizationRemarkAnalysis ORA) {
+ return ORA
+ << "Found thread data sharing on the GPU. "
+ << "Expect degraded performance due to data globalization.";
+ };
+ emitRemark<OptimizationRemarkAnalysis>(CI, "OpenMPGlobalization",
+ Remark);
+ }
+
+ return false;
+ };
+
+ RFI.foreachUse(SCC, CheckGlobalization);
+ }
+ }
+
+ /// Maps the values stored in the offload arrays passed as arguments to
+ /// \p RuntimeCall into the offload arrays in \p OAs.
+ bool getValuesInOffloadArrays(CallInst &RuntimeCall,
+ MutableArrayRef<OffloadArray> OAs) {
+ assert(OAs.size() == 3 && "Need space for three offload arrays!");
+
+ // A runtime call that involves memory offloading looks something like:
+ // call void @__tgt_target_data_begin_mapper(arg0, arg1,
+ // i8** %offload_baseptrs, i8** %offload_ptrs, i64* %offload_sizes,
+ // ...)
+ // So, the idea is to access the allocas that allocate space for these
+ // offload arrays, offload_baseptrs, offload_ptrs, offload_sizes.
+ // Therefore:
+ // i8** %offload_baseptrs.
+ Value *BasePtrsArg =
+ RuntimeCall.getArgOperand(OffloadArray::BasePtrsArgNum);
+ // i8** %offload_ptrs.
+ Value *PtrsArg = RuntimeCall.getArgOperand(OffloadArray::PtrsArgNum);
+ // i8** %offload_sizes.
+ Value *SizesArg = RuntimeCall.getArgOperand(OffloadArray::SizesArgNum);
+
+ // Get values stored in **offload_baseptrs.
+ auto *V = getUnderlyingObject(BasePtrsArg);
+ if (!isa<AllocaInst>(V))
+ return false;
+ auto *BasePtrsArray = cast<AllocaInst>(V);
+ if (!OAs[0].initialize(*BasePtrsArray, RuntimeCall))
+ return false;
+
+ // Get values stored in **offload_baseptrs.
+ V = getUnderlyingObject(PtrsArg);
+ if (!isa<AllocaInst>(V))
+ return false;
+ auto *PtrsArray = cast<AllocaInst>(V);
+ if (!OAs[1].initialize(*PtrsArray, RuntimeCall))
+ return false;
+
+ // Get values stored in **offload_sizes.
+ V = getUnderlyingObject(SizesArg);
+ // If it's a [constant] global array don't analyze it.
+ if (isa<GlobalValue>(V))
+ return isa<Constant>(V);
+ if (!isa<AllocaInst>(V))
+ return false;
+
+ auto *SizesArray = cast<AllocaInst>(V);
+ if (!OAs[2].initialize(*SizesArray, RuntimeCall))
+ return false;
+
+ return true;
+ }
+
+ /// Prints the values in the OffloadArrays \p OAs using LLVM_DEBUG.
+ /// For now this is a way to test that the function getValuesInOffloadArrays
+ /// is working properly.
+ /// TODO: Move this to a unittest when unittests are available for OpenMPOpt.
+ void dumpValuesInOffloadArrays(ArrayRef<OffloadArray> OAs) {
+ assert(OAs.size() == 3 && "There are three offload arrays to debug!");
+
+ LLVM_DEBUG(dbgs() << TAG << " Successfully got offload values:\n");
+ std::string ValuesStr;
+ raw_string_ostream Printer(ValuesStr);
+ std::string Separator = " --- ";
+
+ for (auto *BP : OAs[0].StoredValues) {
+ BP->print(Printer);
+ Printer << Separator;
+ }
+ LLVM_DEBUG(dbgs() << "\t\toffload_baseptrs: " << Printer.str() << "\n");
+ ValuesStr.clear();
+
+ for (auto *P : OAs[1].StoredValues) {
+ P->print(Printer);
+ Printer << Separator;
+ }
+ LLVM_DEBUG(dbgs() << "\t\toffload_ptrs: " << Printer.str() << "\n");
+ ValuesStr.clear();
+
+ for (auto *S : OAs[2].StoredValues) {
+ S->print(Printer);
+ Printer << Separator;
+ }
+ LLVM_DEBUG(dbgs() << "\t\toffload_sizes: " << Printer.str() << "\n");
+ }
+
+ /// Returns the instruction where the "wait" counterpart \p RuntimeCall can be
+ /// moved. Returns nullptr if the movement is not possible, or not worth it.
+ Instruction *canBeMovedDownwards(CallInst &RuntimeCall) {
+ // FIXME: This traverses only the BasicBlock where RuntimeCall is.
+ // Make it traverse the CFG.
+
+ Instruction *CurrentI = &RuntimeCall;
+ bool IsWorthIt = false;
+ while ((CurrentI = CurrentI->getNextNode())) {
+
+ // TODO: Once we detect the regions to be offloaded we should use the
+ // alias analysis manager to check if CurrentI may modify one of
+ // the offloaded regions.
+ if (CurrentI->mayHaveSideEffects() || CurrentI->mayReadFromMemory()) {
+ if (IsWorthIt)
+ return CurrentI;
+
+ return nullptr;
+ }
+
+ // FIXME: For now if we move it over anything without side effect
+ // is worth it.
+ IsWorthIt = true;
+ }
+
+ // Return end of BasicBlock.
+ return RuntimeCall.getParent()->getTerminator();
+ }
+
+ /// Splits \p RuntimeCall into its "issue" and "wait" counterparts.
+ bool splitTargetDataBeginRTC(CallInst &RuntimeCall,
+ Instruction &WaitMovementPoint) {
+ // Create stack allocated handle (__tgt_async_info) at the beginning of the
+ // function. Used for storing information of the async transfer, allowing to
+ // wait on it later.
+ auto &IRBuilder = OMPInfoCache.OMPBuilder;
+ auto *F = RuntimeCall.getCaller();
+ Instruction *FirstInst = &(F->getEntryBlock().front());
+ AllocaInst *Handle = new AllocaInst(
+ IRBuilder.AsyncInfo, F->getAddressSpace(), "handle", FirstInst);
+
+ // Add "issue" runtime call declaration:
+ // declare %struct.tgt_async_info @__tgt_target_data_begin_issue(i64, i32,
+ // i8**, i8**, i64*, i64*)
+ FunctionCallee IssueDecl = IRBuilder.getOrCreateRuntimeFunction(
+ M, OMPRTL___tgt_target_data_begin_mapper_issue);
+
+ // Change RuntimeCall call site for its asynchronous version.
+ SmallVector<Value *, 16> Args;
+ for (auto &Arg : RuntimeCall.args())
+ Args.push_back(Arg.get());
+ Args.push_back(Handle);
+
+ CallInst *IssueCallsite =
+ CallInst::Create(IssueDecl, Args, /*NameStr=*/"", &RuntimeCall);
+ RuntimeCall.eraseFromParent();
+
+ // Add "wait" runtime call declaration:
+ // declare void @__tgt_target_data_begin_wait(i64, %struct.__tgt_async_info)
+ FunctionCallee WaitDecl = IRBuilder.getOrCreateRuntimeFunction(
+ M, OMPRTL___tgt_target_data_begin_mapper_wait);
+
+ Value *WaitParams[2] = {
+ IssueCallsite->getArgOperand(
+ OffloadArray::DeviceIDArgNum), // device_id.
+ Handle // handle to wait on.
+ };
+ CallInst::Create(WaitDecl, WaitParams, /*NameStr=*/"", &WaitMovementPoint);
+
+ return true;
+ }
+
static Value *combinedIdentStruct(Value *CurrentIdent, Value *NextIdent,
bool GlobalOnly, bool &SingleChoice) {
if (CurrentIdent == NextIdent)
@@ -1578,28 +1578,28 @@ private:
/// Populate the Attributor with abstract attribute opportunities in the
/// function.
void registerAAs() {
- if (SCC.empty())
- return;
-
- // Create CallSite AA for all Getters.
- for (int Idx = 0; Idx < OMPInfoCache.ICVs.size() - 1; ++Idx) {
- auto ICVInfo = OMPInfoCache.ICVs[static_cast<InternalControlVar>(Idx)];
-
- auto &GetterRFI = OMPInfoCache.RFIs[ICVInfo.Getter];
-
- auto CreateAA = [&](Use &U, Function &Caller) {
- CallInst *CI = OpenMPOpt::getCallIfRegularCall(U, &GetterRFI);
- if (!CI)
- return false;
-
- auto &CB = cast<CallBase>(*CI);
-
- IRPosition CBPos = IRPosition::callsite_function(CB);
- A.getOrCreateAAFor<AAICVTracker>(CBPos);
- return false;
- };
-
- GetterRFI.foreachUse(SCC, CreateAA);
+ if (SCC.empty())
+ return;
+
+ // Create CallSite AA for all Getters.
+ for (int Idx = 0; Idx < OMPInfoCache.ICVs.size() - 1; ++Idx) {
+ auto ICVInfo = OMPInfoCache.ICVs[static_cast<InternalControlVar>(Idx)];
+
+ auto &GetterRFI = OMPInfoCache.RFIs[ICVInfo.Getter];
+
+ auto CreateAA = [&](Use &U, Function &Caller) {
+ CallInst *CI = OpenMPOpt::getCallIfRegularCall(U, &GetterRFI);
+ if (!CI)
+ return false;
+
+ auto &CB = cast<CallBase>(*CI);
+
+ IRPosition CBPos = IRPosition::callsite_function(CB);
+ A.getOrCreateAAFor<AAICVTracker>(CBPos);
+ return false;
+ };
+
+ GetterRFI.foreachUse(SCC, CreateAA);
}
}
};
@@ -1623,16 +1623,16 @@ Kernel OpenMPOpt::getUniqueKernelFor(Function &F) {
}
CachedKernel = nullptr;
- if (!F.hasLocalLinkage()) {
-
- // See https://openmp.llvm.org/remarks/OptimizationRemarks.html
- auto Remark = [&](OptimizationRemark OR) {
- return OR << "[OMP100] Potentially unknown OpenMP target region caller";
- };
- emitRemarkOnFunction(&F, "OMP100", Remark);
-
+ if (!F.hasLocalLinkage()) {
+
+ // See https://openmp.llvm.org/remarks/OptimizationRemarks.html
+ auto Remark = [&](OptimizationRemark OR) {
+ return OR << "[OMP100] Potentially unknown OpenMP target region caller";
+ };
+ emitRemarkOnFunction(&F, "OMP100", Remark);
+
return nullptr;
- }
+ }
}
auto GetUniqueKernelForUse = [&](const Use &U) -> Kernel {
@@ -1658,7 +1658,7 @@ Kernel OpenMPOpt::getUniqueKernelFor(Function &F) {
// TODO: In the future we want to track more than just a unique kernel.
SmallPtrSet<Kernel, 2> PotentialKernels;
- OMPInformationCache::foreachUse(F, [&](const Use &U) {
+ OMPInformationCache::foreachUse(F, [&](const Use &U) {
PotentialKernels.insert(GetUniqueKernelForUse(U));
});
@@ -1689,7 +1689,7 @@ bool OpenMPOpt::rewriteDeviceCodeStateMachine() {
unsigned NumDirectCalls = 0;
SmallVector<Use *, 2> ToBeReplacedStateMachineUses;
- OMPInformationCache::foreachUse(*F, [&](Use &U) {
+ OMPInformationCache::foreachUse(*F, [&](Use &U) {
if (auto *CB = dyn_cast<CallBase>(U.getUser()))
if (CB->isCallee(&U)) {
++NumDirectCalls;
@@ -1809,12 +1809,12 @@ struct AAICVTracker : public StateWrapper<BooleanState, AbstractAttribute> {
using Base = StateWrapper<BooleanState, AbstractAttribute>;
AAICVTracker(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
- void initialize(Attributor &A) override {
- Function *F = getAnchorScope();
- if (!F || !A.isFunctionIPOAmendable(*F))
- indicatePessimisticFixpoint();
- }
-
+ void initialize(Attributor &A) override {
+ Function *F = getAnchorScope();
+ if (!F || !A.isFunctionIPOAmendable(*F))
+ indicatePessimisticFixpoint();
+ }
+
/// Returns true if value is assumed to be tracked.
bool isAssumedTracked() const { return getAssumed(); }
@@ -1825,22 +1825,22 @@ struct AAICVTracker : public StateWrapper<BooleanState, AbstractAttribute> {
static AAICVTracker &createForPosition(const IRPosition &IRP, Attributor &A);
/// Return the value with which \p I can be replaced for specific \p ICV.
- virtual Optional<Value *> getReplacementValue(InternalControlVar ICV,
- const Instruction *I,
- Attributor &A) const {
- return None;
- }
-
- /// Return an assumed unique ICV value if a single candidate is found. If
- /// there cannot be one, return a nullptr. If it is not clear yet, return the
- /// Optional::NoneType.
- virtual Optional<Value *>
- getUniqueReplacementValue(InternalControlVar ICV) const = 0;
-
- // Currently only nthreads is being tracked.
- // this array will only grow with time.
- InternalControlVar TrackableICVs[1] = {ICV_nthreads};
-
+ virtual Optional<Value *> getReplacementValue(InternalControlVar ICV,
+ const Instruction *I,
+ Attributor &A) const {
+ return None;
+ }
+
+ /// Return an assumed unique ICV value if a single candidate is found. If
+ /// there cannot be one, return a nullptr. If it is not clear yet, return the
+ /// Optional::NoneType.
+ virtual Optional<Value *>
+ getUniqueReplacementValue(InternalControlVar ICV) const = 0;
+
+ // Currently only nthreads is being tracked.
+ // this array will only grow with time.
+ InternalControlVar TrackableICVs[1] = {ICV_nthreads};
+
/// See AbstractAttribute::getName()
const std::string getName() const override { return "AAICVTracker"; }
@@ -1860,20 +1860,20 @@ struct AAICVTrackerFunction : public AAICVTracker {
: AAICVTracker(IRP, A) {}
// FIXME: come up with better string.
- const std::string getAsStr() const override { return "ICVTrackerFunction"; }
+ const std::string getAsStr() const override { return "ICVTrackerFunction"; }
// FIXME: come up with some stats.
void trackStatistics() const override {}
- /// We don't manifest anything for this AA.
+ /// We don't manifest anything for this AA.
ChangeStatus manifest(Attributor &A) override {
- return ChangeStatus::UNCHANGED;
+ return ChangeStatus::UNCHANGED;
}
// Map of ICV to their values at specific program point.
- EnumeratedArray<DenseMap<Instruction *, Value *>, InternalControlVar,
+ EnumeratedArray<DenseMap<Instruction *, Value *>, InternalControlVar,
InternalControlVar::ICV___last>
- ICVReplacementValuesMap;
+ ICVReplacementValuesMap;
ChangeStatus updateImpl(Attributor &A) override {
ChangeStatus HasChanged = ChangeStatus::UNCHANGED;
@@ -1885,7 +1885,7 @@ struct AAICVTrackerFunction : public AAICVTracker {
for (InternalControlVar ICV : TrackableICVs) {
auto &SetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Setter];
- auto &ValuesMap = ICVReplacementValuesMap[ICV];
+ auto &ValuesMap = ICVReplacementValuesMap[ICV];
auto TrackValues = [&](Use &U, Function &) {
CallInst *CI = OpenMPOpt::getCallIfRegularCall(U);
if (!CI)
@@ -1893,344 +1893,344 @@ struct AAICVTrackerFunction : public AAICVTracker {
// FIXME: handle setters with more that 1 arguments.
/// Track new value.
- if (ValuesMap.insert(std::make_pair(CI, CI->getArgOperand(0))).second)
+ if (ValuesMap.insert(std::make_pair(CI, CI->getArgOperand(0))).second)
HasChanged = ChangeStatus::CHANGED;
return false;
};
- auto CallCheck = [&](Instruction &I) {
- Optional<Value *> ReplVal = getValueForCall(A, &I, ICV);
- if (ReplVal.hasValue() &&
- ValuesMap.insert(std::make_pair(&I, *ReplVal)).second)
- HasChanged = ChangeStatus::CHANGED;
-
- return true;
- };
-
- // Track all changes of an ICV.
+ auto CallCheck = [&](Instruction &I) {
+ Optional<Value *> ReplVal = getValueForCall(A, &I, ICV);
+ if (ReplVal.hasValue() &&
+ ValuesMap.insert(std::make_pair(&I, *ReplVal)).second)
+ HasChanged = ChangeStatus::CHANGED;
+
+ return true;
+ };
+
+ // Track all changes of an ICV.
SetterRFI.foreachUse(TrackValues, F);
-
- A.checkForAllInstructions(CallCheck, *this, {Instruction::Call},
- /* CheckBBLivenessOnly */ true);
-
- /// TODO: Figure out a way to avoid adding entry in
- /// ICVReplacementValuesMap
- Instruction *Entry = &F->getEntryBlock().front();
- if (HasChanged == ChangeStatus::CHANGED && !ValuesMap.count(Entry))
- ValuesMap.insert(std::make_pair(Entry, nullptr));
+
+ A.checkForAllInstructions(CallCheck, *this, {Instruction::Call},
+ /* CheckBBLivenessOnly */ true);
+
+ /// TODO: Figure out a way to avoid adding entry in
+ /// ICVReplacementValuesMap
+ Instruction *Entry = &F->getEntryBlock().front();
+ if (HasChanged == ChangeStatus::CHANGED && !ValuesMap.count(Entry))
+ ValuesMap.insert(std::make_pair(Entry, nullptr));
}
return HasChanged;
}
- /// Hepler to check if \p I is a call and get the value for it if it is
- /// unique.
- Optional<Value *> getValueForCall(Attributor &A, const Instruction *I,
- InternalControlVar &ICV) const {
-
- const auto *CB = dyn_cast<CallBase>(I);
- if (!CB || CB->hasFnAttr("no_openmp") ||
- CB->hasFnAttr("no_openmp_routines"))
- return None;
+ /// Hepler to check if \p I is a call and get the value for it if it is
+ /// unique.
+ Optional<Value *> getValueForCall(Attributor &A, const Instruction *I,
+ InternalControlVar &ICV) const {
+ const auto *CB = dyn_cast<CallBase>(I);
+ if (!CB || CB->hasFnAttr("no_openmp") ||
+ CB->hasFnAttr("no_openmp_routines"))
+ return None;
+
auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
auto &GetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Getter];
- auto &SetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Setter];
- Function *CalledFunction = CB->getCalledFunction();
-
- // Indirect call, assume ICV changes.
- if (CalledFunction == nullptr)
- return nullptr;
- if (CalledFunction == GetterRFI.Declaration)
- return None;
- if (CalledFunction == SetterRFI.Declaration) {
- if (ICVReplacementValuesMap[ICV].count(I))
- return ICVReplacementValuesMap[ICV].lookup(I);
-
- return nullptr;
- }
-
- // Since we don't know, assume it changes the ICV.
- if (CalledFunction->isDeclaration())
- return nullptr;
-
- const auto &ICVTrackingAA =
- A.getAAFor<AAICVTracker>(*this, IRPosition::callsite_returned(*CB));
-
- if (ICVTrackingAA.isAssumedTracked())
- return ICVTrackingAA.getUniqueReplacementValue(ICV);
-
- // If we don't know, assume it changes.
- return nullptr;
- }
-
- // We don't check unique value for a function, so return None.
- Optional<Value *>
- getUniqueReplacementValue(InternalControlVar ICV) const override {
- return None;
- }
-
- /// Return the value with which \p I can be replaced for specific \p ICV.
- Optional<Value *> getReplacementValue(InternalControlVar ICV,
- const Instruction *I,
- Attributor &A) const override {
- const auto &ValuesMap = ICVReplacementValuesMap[ICV];
- if (ValuesMap.count(I))
- return ValuesMap.lookup(I);
-
- SmallVector<const Instruction *, 16> Worklist;
- SmallPtrSet<const Instruction *, 16> Visited;
- Worklist.push_back(I);
-
- Optional<Value *> ReplVal;
-
- while (!Worklist.empty()) {
- const Instruction *CurrInst = Worklist.pop_back_val();
- if (!Visited.insert(CurrInst).second)
- continue;
-
- const BasicBlock *CurrBB = CurrInst->getParent();
-
- // Go up and look for all potential setters/calls that might change the
- // ICV.
- while ((CurrInst = CurrInst->getPrevNode())) {
- if (ValuesMap.count(CurrInst)) {
- Optional<Value *> NewReplVal = ValuesMap.lookup(CurrInst);
- // Unknown value, track new.
- if (!ReplVal.hasValue()) {
- ReplVal = NewReplVal;
- break;
- }
-
- // If we found a new value, we can't know the icv value anymore.
- if (NewReplVal.hasValue())
- if (ReplVal != NewReplVal)
+ auto &SetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Setter];
+ Function *CalledFunction = CB->getCalledFunction();
+
+ // Indirect call, assume ICV changes.
+ if (CalledFunction == nullptr)
+ return nullptr;
+ if (CalledFunction == GetterRFI.Declaration)
+ return None;
+ if (CalledFunction == SetterRFI.Declaration) {
+ if (ICVReplacementValuesMap[ICV].count(I))
+ return ICVReplacementValuesMap[ICV].lookup(I);
+
+ return nullptr;
+ }
+
+ // Since we don't know, assume it changes the ICV.
+ if (CalledFunction->isDeclaration())
+ return nullptr;
+
+ const auto &ICVTrackingAA =
+ A.getAAFor<AAICVTracker>(*this, IRPosition::callsite_returned(*CB));
+
+ if (ICVTrackingAA.isAssumedTracked())
+ return ICVTrackingAA.getUniqueReplacementValue(ICV);
+
+ // If we don't know, assume it changes.
+ return nullptr;
+ }
+
+ // We don't check unique value for a function, so return None.
+ Optional<Value *>
+ getUniqueReplacementValue(InternalControlVar ICV) const override {
+ return None;
+ }
+
+ /// Return the value with which \p I can be replaced for specific \p ICV.
+ Optional<Value *> getReplacementValue(InternalControlVar ICV,
+ const Instruction *I,
+ Attributor &A) const override {
+ const auto &ValuesMap = ICVReplacementValuesMap[ICV];
+ if (ValuesMap.count(I))
+ return ValuesMap.lookup(I);
+
+ SmallVector<const Instruction *, 16> Worklist;
+ SmallPtrSet<const Instruction *, 16> Visited;
+ Worklist.push_back(I);
+
+ Optional<Value *> ReplVal;
+
+ while (!Worklist.empty()) {
+ const Instruction *CurrInst = Worklist.pop_back_val();
+ if (!Visited.insert(CurrInst).second)
+ continue;
+
+ const BasicBlock *CurrBB = CurrInst->getParent();
+
+ // Go up and look for all potential setters/calls that might change the
+ // ICV.
+ while ((CurrInst = CurrInst->getPrevNode())) {
+ if (ValuesMap.count(CurrInst)) {
+ Optional<Value *> NewReplVal = ValuesMap.lookup(CurrInst);
+ // Unknown value, track new.
+ if (!ReplVal.hasValue()) {
+ ReplVal = NewReplVal;
+ break;
+ }
+
+ // If we found a new value, we can't know the icv value anymore.
+ if (NewReplVal.hasValue())
+ if (ReplVal != NewReplVal)
return nullptr;
- break;
+ break;
}
- Optional<Value *> NewReplVal = getValueForCall(A, CurrInst, ICV);
- if (!NewReplVal.hasValue())
- continue;
-
- // Unknown value, track new.
- if (!ReplVal.hasValue()) {
- ReplVal = NewReplVal;
- break;
- }
-
- // if (NewReplVal.hasValue())
- // We found a new value, we can't know the icv value anymore.
- if (ReplVal != NewReplVal)
- return nullptr;
+ Optional<Value *> NewReplVal = getValueForCall(A, CurrInst, ICV);
+ if (!NewReplVal.hasValue())
+ continue;
+
+ // Unknown value, track new.
+ if (!ReplVal.hasValue()) {
+ ReplVal = NewReplVal;
+ break;
+ }
+
+ // if (NewReplVal.hasValue())
+ // We found a new value, we can't know the icv value anymore.
+ if (ReplVal != NewReplVal)
+ return nullptr;
}
-
- // If we are in the same BB and we have a value, we are done.
- if (CurrBB == I->getParent() && ReplVal.hasValue())
- return ReplVal;
-
- // Go through all predecessors and add terminators for analysis.
- for (const BasicBlock *Pred : predecessors(CurrBB))
- if (const Instruction *Terminator = Pred->getTerminator())
- Worklist.push_back(Terminator);
+
+ // If we are in the same BB and we have a value, we are done.
+ if (CurrBB == I->getParent() && ReplVal.hasValue())
+ return ReplVal;
+
+ // Go through all predecessors and add terminators for analysis.
+ for (const BasicBlock *Pred : predecessors(CurrBB))
+ if (const Instruction *Terminator = Pred->getTerminator())
+ Worklist.push_back(Terminator);
}
- return ReplVal;
- }
-};
-
-struct AAICVTrackerFunctionReturned : AAICVTracker {
- AAICVTrackerFunctionReturned(const IRPosition &IRP, Attributor &A)
- : AAICVTracker(IRP, A) {}
-
- // FIXME: come up with better string.
- const std::string getAsStr() const override {
- return "ICVTrackerFunctionReturned";
- }
-
- // FIXME: come up with some stats.
- void trackStatistics() const override {}
-
- /// We don't manifest anything for this AA.
- ChangeStatus manifest(Attributor &A) override {
- return ChangeStatus::UNCHANGED;
- }
-
- // Map of ICV to their values at specific program point.
- EnumeratedArray<Optional<Value *>, InternalControlVar,
- InternalControlVar::ICV___last>
- ICVReplacementValuesMap;
-
- /// Return the value with which \p I can be replaced for specific \p ICV.
- Optional<Value *>
- getUniqueReplacementValue(InternalControlVar ICV) const override {
- return ICVReplacementValuesMap[ICV];
- }
-
- ChangeStatus updateImpl(Attributor &A) override {
- ChangeStatus Changed = ChangeStatus::UNCHANGED;
- const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>(
- *this, IRPosition::function(*getAnchorScope()));
-
- if (!ICVTrackingAA.isAssumedTracked())
- return indicatePessimisticFixpoint();
-
- for (InternalControlVar ICV : TrackableICVs) {
- Optional<Value *> &ReplVal = ICVReplacementValuesMap[ICV];
- Optional<Value *> UniqueICVValue;
-
- auto CheckReturnInst = [&](Instruction &I) {
- Optional<Value *> NewReplVal =
- ICVTrackingAA.getReplacementValue(ICV, &I, A);
-
- // If we found a second ICV value there is no unique returned value.
- if (UniqueICVValue.hasValue() && UniqueICVValue != NewReplVal)
- return false;
-
- UniqueICVValue = NewReplVal;
-
- return true;
- };
-
- if (!A.checkForAllInstructions(CheckReturnInst, *this, {Instruction::Ret},
- /* CheckBBLivenessOnly */ true))
- UniqueICVValue = nullptr;
-
- if (UniqueICVValue == ReplVal)
- continue;
-
- ReplVal = UniqueICVValue;
- Changed = ChangeStatus::CHANGED;
- }
-
- return Changed;
- }
-};
-
-struct AAICVTrackerCallSite : AAICVTracker {
- AAICVTrackerCallSite(const IRPosition &IRP, Attributor &A)
- : AAICVTracker(IRP, A) {}
-
- void initialize(Attributor &A) override {
- Function *F = getAnchorScope();
- if (!F || !A.isFunctionIPOAmendable(*F))
- indicatePessimisticFixpoint();
-
- // We only initialize this AA for getters, so we need to know which ICV it
- // gets.
- auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
- for (InternalControlVar ICV : TrackableICVs) {
- auto ICVInfo = OMPInfoCache.ICVs[ICV];
- auto &Getter = OMPInfoCache.RFIs[ICVInfo.Getter];
- if (Getter.Declaration == getAssociatedFunction()) {
- AssociatedICV = ICVInfo.Kind;
- return;
- }
- }
-
- /// Unknown ICV.
- indicatePessimisticFixpoint();
- }
-
- ChangeStatus manifest(Attributor &A) override {
- if (!ReplVal.hasValue() || !ReplVal.getValue())
- return ChangeStatus::UNCHANGED;
-
- A.changeValueAfterManifest(*getCtxI(), **ReplVal);
- A.deleteAfterManifest(*getCtxI());
-
- return ChangeStatus::CHANGED;
- }
-
- // FIXME: come up with better string.
- const std::string getAsStr() const override { return "ICVTrackerCallSite"; }
-
- // FIXME: come up with some stats.
- void trackStatistics() const override {}
-
- InternalControlVar AssociatedICV;
- Optional<Value *> ReplVal;
-
- ChangeStatus updateImpl(Attributor &A) override {
- const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>(
- *this, IRPosition::function(*getAnchorScope()));
-
- // We don't have any information, so we assume it changes the ICV.
- if (!ICVTrackingAA.isAssumedTracked())
- return indicatePessimisticFixpoint();
-
- Optional<Value *> NewReplVal =
- ICVTrackingAA.getReplacementValue(AssociatedICV, getCtxI(), A);
-
- if (ReplVal == NewReplVal)
- return ChangeStatus::UNCHANGED;
-
- ReplVal = NewReplVal;
- return ChangeStatus::CHANGED;
- }
-
- // Return the value with which associated value can be replaced for specific
- // \p ICV.
- Optional<Value *>
- getUniqueReplacementValue(InternalControlVar ICV) const override {
- return ReplVal;
- }
-};
-
-struct AAICVTrackerCallSiteReturned : AAICVTracker {
- AAICVTrackerCallSiteReturned(const IRPosition &IRP, Attributor &A)
- : AAICVTracker(IRP, A) {}
-
- // FIXME: come up with better string.
- const std::string getAsStr() const override {
- return "ICVTrackerCallSiteReturned";
- }
-
- // FIXME: come up with some stats.
- void trackStatistics() const override {}
-
- /// We don't manifest anything for this AA.
- ChangeStatus manifest(Attributor &A) override {
- return ChangeStatus::UNCHANGED;
- }
-
- // Map of ICV to their values at specific program point.
- EnumeratedArray<Optional<Value *>, InternalControlVar,
- InternalControlVar::ICV___last>
- ICVReplacementValuesMap;
-
- /// Return the value with which associated value can be replaced for specific
- /// \p ICV.
- Optional<Value *>
- getUniqueReplacementValue(InternalControlVar ICV) const override {
- return ICVReplacementValuesMap[ICV];
- }
-
- ChangeStatus updateImpl(Attributor &A) override {
- ChangeStatus Changed = ChangeStatus::UNCHANGED;
- const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>(
- *this, IRPosition::returned(*getAssociatedFunction()));
-
- // We don't have any information, so we assume it changes the ICV.
- if (!ICVTrackingAA.isAssumedTracked())
- return indicatePessimisticFixpoint();
-
- for (InternalControlVar ICV : TrackableICVs) {
- Optional<Value *> &ReplVal = ICVReplacementValuesMap[ICV];
- Optional<Value *> NewReplVal =
- ICVTrackingAA.getUniqueReplacementValue(ICV);
-
- if (ReplVal == NewReplVal)
- continue;
-
- ReplVal = NewReplVal;
- Changed = ChangeStatus::CHANGED;
- }
- return Changed;
+ return ReplVal;
}
};
+
+struct AAICVTrackerFunctionReturned : AAICVTracker {
+ AAICVTrackerFunctionReturned(const IRPosition &IRP, Attributor &A)
+ : AAICVTracker(IRP, A) {}
+
+ // FIXME: come up with better string.
+ const std::string getAsStr() const override {
+ return "ICVTrackerFunctionReturned";
+ }
+
+ // FIXME: come up with some stats.
+ void trackStatistics() const override {}
+
+ /// We don't manifest anything for this AA.
+ ChangeStatus manifest(Attributor &A) override {
+ return ChangeStatus::UNCHANGED;
+ }
+
+ // Map of ICV to their values at specific program point.
+ EnumeratedArray<Optional<Value *>, InternalControlVar,
+ InternalControlVar::ICV___last>
+ ICVReplacementValuesMap;
+
+ /// Return the value with which \p I can be replaced for specific \p ICV.
+ Optional<Value *>
+ getUniqueReplacementValue(InternalControlVar ICV) const override {
+ return ICVReplacementValuesMap[ICV];
+ }
+
+ ChangeStatus updateImpl(Attributor &A) override {
+ ChangeStatus Changed = ChangeStatus::UNCHANGED;
+ const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>(
+ *this, IRPosition::function(*getAnchorScope()));
+
+ if (!ICVTrackingAA.isAssumedTracked())
+ return indicatePessimisticFixpoint();
+
+ for (InternalControlVar ICV : TrackableICVs) {
+ Optional<Value *> &ReplVal = ICVReplacementValuesMap[ICV];
+ Optional<Value *> UniqueICVValue;
+
+ auto CheckReturnInst = [&](Instruction &I) {
+ Optional<Value *> NewReplVal =
+ ICVTrackingAA.getReplacementValue(ICV, &I, A);
+
+ // If we found a second ICV value there is no unique returned value.
+ if (UniqueICVValue.hasValue() && UniqueICVValue != NewReplVal)
+ return false;
+
+ UniqueICVValue = NewReplVal;
+
+ return true;
+ };
+
+ if (!A.checkForAllInstructions(CheckReturnInst, *this, {Instruction::Ret},
+ /* CheckBBLivenessOnly */ true))
+ UniqueICVValue = nullptr;
+
+ if (UniqueICVValue == ReplVal)
+ continue;
+
+ ReplVal = UniqueICVValue;
+ Changed = ChangeStatus::CHANGED;
+ }
+
+ return Changed;
+ }
+};
+
+struct AAICVTrackerCallSite : AAICVTracker {
+ AAICVTrackerCallSite(const IRPosition &IRP, Attributor &A)
+ : AAICVTracker(IRP, A) {}
+
+ void initialize(Attributor &A) override {
+ Function *F = getAnchorScope();
+ if (!F || !A.isFunctionIPOAmendable(*F))
+ indicatePessimisticFixpoint();
+
+ // We only initialize this AA for getters, so we need to know which ICV it
+ // gets.
+ auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
+ for (InternalControlVar ICV : TrackableICVs) {
+ auto ICVInfo = OMPInfoCache.ICVs[ICV];
+ auto &Getter = OMPInfoCache.RFIs[ICVInfo.Getter];
+ if (Getter.Declaration == getAssociatedFunction()) {
+ AssociatedICV = ICVInfo.Kind;
+ return;
+ }
+ }
+
+ /// Unknown ICV.
+ indicatePessimisticFixpoint();
+ }
+
+ ChangeStatus manifest(Attributor &A) override {
+ if (!ReplVal.hasValue() || !ReplVal.getValue())
+ return ChangeStatus::UNCHANGED;
+
+ A.changeValueAfterManifest(*getCtxI(), **ReplVal);
+ A.deleteAfterManifest(*getCtxI());
+
+ return ChangeStatus::CHANGED;
+ }
+
+ // FIXME: come up with better string.
+ const std::string getAsStr() const override { return "ICVTrackerCallSite"; }
+
+ // FIXME: come up with some stats.
+ void trackStatistics() const override {}
+
+ InternalControlVar AssociatedICV;
+ Optional<Value *> ReplVal;
+
+ ChangeStatus updateImpl(Attributor &A) override {
+ const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>(
+ *this, IRPosition::function(*getAnchorScope()));
+
+ // We don't have any information, so we assume it changes the ICV.
+ if (!ICVTrackingAA.isAssumedTracked())
+ return indicatePessimisticFixpoint();
+
+ Optional<Value *> NewReplVal =
+ ICVTrackingAA.getReplacementValue(AssociatedICV, getCtxI(), A);
+
+ if (ReplVal == NewReplVal)
+ return ChangeStatus::UNCHANGED;
+
+ ReplVal = NewReplVal;
+ return ChangeStatus::CHANGED;
+ }
+
+ // Return the value with which associated value can be replaced for specific
+ // \p ICV.
+ Optional<Value *>
+ getUniqueReplacementValue(InternalControlVar ICV) const override {
+ return ReplVal;
+ }
+};
+
+struct AAICVTrackerCallSiteReturned : AAICVTracker {
+ AAICVTrackerCallSiteReturned(const IRPosition &IRP, Attributor &A)
+ : AAICVTracker(IRP, A) {}
+
+ // FIXME: come up with better string.
+ const std::string getAsStr() const override {
+ return "ICVTrackerCallSiteReturned";
+ }
+
+ // FIXME: come up with some stats.
+ void trackStatistics() const override {}
+
+ /// We don't manifest anything for this AA.
+ ChangeStatus manifest(Attributor &A) override {
+ return ChangeStatus::UNCHANGED;
+ }
+
+ // Map of ICV to their values at specific program point.
+ EnumeratedArray<Optional<Value *>, InternalControlVar,
+ InternalControlVar::ICV___last>
+ ICVReplacementValuesMap;
+
+ /// Return the value with which associated value can be replaced for specific
+ /// \p ICV.
+ Optional<Value *>
+ getUniqueReplacementValue(InternalControlVar ICV) const override {
+ return ICVReplacementValuesMap[ICV];
+ }
+
+ ChangeStatus updateImpl(Attributor &A) override {
+ ChangeStatus Changed = ChangeStatus::UNCHANGED;
+ const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>(
+ *this, IRPosition::returned(*getAssociatedFunction()));
+
+ // We don't have any information, so we assume it changes the ICV.
+ if (!ICVTrackingAA.isAssumedTracked())
+ return indicatePessimisticFixpoint();
+
+ for (InternalControlVar ICV : TrackableICVs) {
+ Optional<Value *> &ReplVal = ICVReplacementValuesMap[ICV];
+ Optional<Value *> NewReplVal =
+ ICVTrackingAA.getUniqueReplacementValue(ICV);
+
+ if (ReplVal == NewReplVal)
+ continue;
+
+ ReplVal = NewReplVal;
+ Changed = ChangeStatus::CHANGED;
+ }
+ return Changed;
+ }
+};
} // namespace
const char AAICVTracker::ID = 0;
@@ -2242,17 +2242,17 @@ AAICVTracker &AAICVTracker::createForPosition(const IRPosition &IRP,
case IRPosition::IRP_INVALID:
case IRPosition::IRP_FLOAT:
case IRPosition::IRP_ARGUMENT:
- case IRPosition::IRP_CALL_SITE_ARGUMENT:
- llvm_unreachable("ICVTracker can only be created for function position!");
+ case IRPosition::IRP_CALL_SITE_ARGUMENT:
+ llvm_unreachable("ICVTracker can only be created for function position!");
case IRPosition::IRP_RETURNED:
- AA = new (A.Allocator) AAICVTrackerFunctionReturned(IRP, A);
- break;
+ AA = new (A.Allocator) AAICVTrackerFunctionReturned(IRP, A);
+ break;
case IRPosition::IRP_CALL_SITE_RETURNED:
- AA = new (A.Allocator) AAICVTrackerCallSiteReturned(IRP, A);
- break;
+ AA = new (A.Allocator) AAICVTrackerCallSiteReturned(IRP, A);
+ break;
case IRPosition::IRP_CALL_SITE:
- AA = new (A.Allocator) AAICVTrackerCallSite(IRP, A);
- break;
+ AA = new (A.Allocator) AAICVTrackerCallSite(IRP, A);
+ break;
case IRPosition::IRP_FUNCTION:
AA = new (A.Allocator) AAICVTrackerFunction(IRP, A);
break;
@@ -2271,21 +2271,21 @@ PreservedAnalyses OpenMPOptPass::run(LazyCallGraph::SCC &C,
return PreservedAnalyses::all();
SmallVector<Function *, 16> SCC;
- // If there are kernels in the module, we have to run on all SCC's.
- bool SCCIsInteresting = !OMPInModule.getKernels().empty();
- for (LazyCallGraph::Node &N : C) {
- Function *Fn = &N.getFunction();
- SCC.push_back(Fn);
-
- // Do we already know that the SCC contains kernels,
- // or that OpenMP functions are called from this SCC?
- if (SCCIsInteresting)
- continue;
- // If not, let's check that.
- SCCIsInteresting |= OMPInModule.containsOMPRuntimeCalls(Fn);
- }
-
- if (!SCCIsInteresting || SCC.empty())
+ // If there are kernels in the module, we have to run on all SCC's.
+ bool SCCIsInteresting = !OMPInModule.getKernels().empty();
+ for (LazyCallGraph::Node &N : C) {
+ Function *Fn = &N.getFunction();
+ SCC.push_back(Fn);
+
+ // Do we already know that the SCC contains kernels,
+ // or that OpenMP functions are called from this SCC?
+ if (SCCIsInteresting)
+ continue;
+ // If not, let's check that.
+ SCCIsInteresting |= OMPInModule.containsOMPRuntimeCalls(Fn);
+ }
+
+ if (!SCCIsInteresting || SCC.empty())
return PreservedAnalyses::all();
FunctionAnalysisManager &FAM =
@@ -2343,23 +2343,23 @@ struct OpenMPOptLegacyPass : public CallGraphSCCPass {
return false;
SmallVector<Function *, 16> SCC;
- // If there are kernels in the module, we have to run on all SCC's.
- bool SCCIsInteresting = !OMPInModule.getKernels().empty();
- for (CallGraphNode *CGN : CGSCC) {
- Function *Fn = CGN->getFunction();
- if (!Fn || Fn->isDeclaration())
- continue;
- SCC.push_back(Fn);
-
- // Do we already know that the SCC contains kernels,
- // or that OpenMP functions are called from this SCC?
- if (SCCIsInteresting)
- continue;
- // If not, let's check that.
- SCCIsInteresting |= OMPInModule.containsOMPRuntimeCalls(Fn);
- }
-
- if (!SCCIsInteresting || SCC.empty())
+ // If there are kernels in the module, we have to run on all SCC's.
+ bool SCCIsInteresting = !OMPInModule.getKernels().empty();
+ for (CallGraphNode *CGN : CGSCC) {
+ Function *Fn = CGN->getFunction();
+ if (!Fn || Fn->isDeclaration())
+ continue;
+ SCC.push_back(Fn);
+
+ // Do we already know that the SCC contains kernels,
+ // or that OpenMP functions are called from this SCC?
+ if (SCCIsInteresting)
+ continue;
+ // If not, let's check that.
+ SCCIsInteresting |= OMPInModule.containsOMPRuntimeCalls(Fn);
+ }
+
+ if (!SCCIsInteresting || SCC.empty())
return false;
CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
@@ -2420,18 +2420,18 @@ bool llvm::omp::containsOpenMP(Module &M, OpenMPInModule &OMPInModule) {
if (OMPInModule.isKnown())
return OMPInModule;
- auto RecordFunctionsContainingUsesOf = [&](Function *F) {
- for (User *U : F->users())
- if (auto *I = dyn_cast<Instruction>(U))
- OMPInModule.FuncsWithOMPRuntimeCalls.insert(I->getFunction());
- };
-
+ auto RecordFunctionsContainingUsesOf = [&](Function *F) {
+ for (User *U : F->users())
+ if (auto *I = dyn_cast<Instruction>(U))
+ OMPInModule.FuncsWithOMPRuntimeCalls.insert(I->getFunction());
+ };
+
// MSVC doesn't like long if-else chains for some reason and instead just
// issues an error. Work around it..
do {
#define OMP_RTL(_Enum, _Name, ...) \
- if (Function *F = M.getFunction(_Name)) { \
- RecordFunctionsContainingUsesOf(F); \
+ if (Function *F = M.getFunction(_Name)) { \
+ RecordFunctionsContainingUsesOf(F); \
OMPInModule = true; \
}
#include "llvm/Frontend/OpenMP/OMPKinds.def"
diff --git a/contrib/libs/llvm12/lib/Transforms/IPO/PartialInlining.cpp b/contrib/libs/llvm12/lib/Transforms/IPO/PartialInlining.cpp
index 2bbf4bf110..2c93760385 100644
--- a/contrib/libs/llvm12/lib/Transforms/IPO/PartialInlining.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/IPO/PartialInlining.cpp
@@ -152,7 +152,7 @@ struct FunctionOutliningInfo {
// Returns the number of blocks to be inlined including all blocks
// in Entries and one return block.
- unsigned getNumInlinedBlocks() const { return Entries.size() + 1; }
+ unsigned getNumInlinedBlocks() const { return Entries.size() + 1; }
// A set of blocks including the function entry that guard
// the region to be outlined.
@@ -208,7 +208,7 @@ struct PartialInlinerImpl {
// function (only if we partially inlined early returns) as there is a
// possibility to further "peel" early return statements that were left in the
// outline function due to code size.
- std::pair<bool, Function *> unswitchFunction(Function &F);
+ std::pair<bool, Function *> unswitchFunction(Function &F);
// This class speculatively clones the function to be partial inlined.
// At the end of partial inlining, the remaining callsites to the cloned
@@ -219,19 +219,19 @@ struct PartialInlinerImpl {
// multi-region outlining.
FunctionCloner(Function *F, FunctionOutliningInfo *OI,
OptimizationRemarkEmitter &ORE,
- function_ref<AssumptionCache *(Function &)> LookupAC,
- function_ref<TargetTransformInfo &(Function &)> GetTTI);
+ function_ref<AssumptionCache *(Function &)> LookupAC,
+ function_ref<TargetTransformInfo &(Function &)> GetTTI);
FunctionCloner(Function *F, FunctionOutliningMultiRegionInfo *OMRI,
OptimizationRemarkEmitter &ORE,
- function_ref<AssumptionCache *(Function &)> LookupAC,
- function_ref<TargetTransformInfo &(Function &)> GetTTI);
-
+ function_ref<AssumptionCache *(Function &)> LookupAC,
+ function_ref<TargetTransformInfo &(Function &)> GetTTI);
+
~FunctionCloner();
// Prepare for function outlining: making sure there is only
// one incoming edge from the extracted/outlined region to
// the return block.
- void normalizeReturnBlock() const;
+ void normalizeReturnBlock() const;
// Do function outlining for cold regions.
bool doMultiRegionFunctionOutlining();
@@ -262,7 +262,7 @@ struct PartialInlinerImpl {
std::unique_ptr<BlockFrequencyInfo> ClonedFuncBFI = nullptr;
OptimizationRemarkEmitter &ORE;
function_ref<AssumptionCache *(Function &)> LookupAC;
- function_ref<TargetTransformInfo &(Function &)> GetTTI;
+ function_ref<TargetTransformInfo &(Function &)> GetTTI;
};
private:
@@ -278,14 +278,14 @@ private:
// The result is no larger than 1 and is represented using BP.
// (Note that the outlined region's 'head' block can only have incoming
// edges from the guarding entry blocks).
- BranchProbability
- getOutliningCallBBRelativeFreq(FunctionCloner &Cloner) const;
+ BranchProbability
+ getOutliningCallBBRelativeFreq(FunctionCloner &Cloner) const;
// Return true if the callee of CB should be partially inlined with
// profit.
bool shouldPartialInline(CallBase &CB, FunctionCloner &Cloner,
BlockFrequency WeightedOutliningRcost,
- OptimizationRemarkEmitter &ORE) const;
+ OptimizationRemarkEmitter &ORE) const;
// Try to inline DuplicateFunction (cloned from F with call to
// the OutlinedFunction into its callers. Return true
@@ -294,11 +294,11 @@ private:
// Compute the mapping from use site of DuplicationFunction to the enclosing
// BB's profile count.
- void
- computeCallsiteToProfCountMap(Function *DuplicateFunction,
- DenseMap<User *, uint64_t> &SiteCountMap) const;
+ void
+ computeCallsiteToProfCountMap(Function *DuplicateFunction,
+ DenseMap<User *, uint64_t> &SiteCountMap) const;
- bool isLimitReached() const {
+ bool isLimitReached() const {
return (MaxNumPartialInlining != -1 &&
NumPartialInlining >= MaxNumPartialInlining);
}
@@ -310,12 +310,12 @@ private:
return nullptr;
}
- static CallBase *getOneCallSiteTo(Function &F) {
- User *User = *F.user_begin();
+ static CallBase *getOneCallSiteTo(Function &F) {
+ User *User = *F.user_begin();
return getSupportedCallBase(User);
}
- std::tuple<DebugLoc, BasicBlock *> getOneDebugLoc(Function &F) const {
+ std::tuple<DebugLoc, BasicBlock *> getOneDebugLoc(Function &F) const {
CallBase *CB = getOneCallSiteTo(F);
DebugLoc DLoc = CB->getDebugLoc();
BasicBlock *Block = CB->getParent();
@@ -328,19 +328,19 @@ private:
// outlined function itself;
// - The second value is the estimated size of the new call sequence in
// basic block Cloner.OutliningCallBB;
- std::tuple<int, int> computeOutliningCosts(FunctionCloner &Cloner) const;
+ std::tuple<int, int> computeOutliningCosts(FunctionCloner &Cloner) const;
// Compute the 'InlineCost' of block BB. InlineCost is a proxy used to
// approximate both the size and runtime cost (Note that in the current
// inline cost analysis, there is no clear distinction there either).
- static int computeBBInlineCost(BasicBlock *BB, TargetTransformInfo *TTI);
-
- std::unique_ptr<FunctionOutliningInfo>
- computeOutliningInfo(Function &F) const;
+ static int computeBBInlineCost(BasicBlock *BB, TargetTransformInfo *TTI);
+ std::unique_ptr<FunctionOutliningInfo>
+ computeOutliningInfo(Function &F) const;
+
std::unique_ptr<FunctionOutliningMultiRegionInfo>
- computeOutliningColdRegionsInfo(Function &F,
- OptimizationRemarkEmitter &ORE) const;
+ computeOutliningColdRegionsInfo(Function &F,
+ OptimizationRemarkEmitter &ORE) const;
};
struct PartialInlinerLegacyPass : public ModulePass {
@@ -392,20 +392,20 @@ struct PartialInlinerLegacyPass : public ModulePass {
} // end anonymous namespace
std::unique_ptr<FunctionOutliningMultiRegionInfo>
-PartialInlinerImpl::computeOutliningColdRegionsInfo(
- Function &F, OptimizationRemarkEmitter &ORE) const {
- BasicBlock *EntryBlock = &F.front();
+PartialInlinerImpl::computeOutliningColdRegionsInfo(
+ Function &F, OptimizationRemarkEmitter &ORE) const {
+ BasicBlock *EntryBlock = &F.front();
- DominatorTree DT(F);
+ DominatorTree DT(F);
LoopInfo LI(DT);
- BranchProbabilityInfo BPI(F, LI);
+ BranchProbabilityInfo BPI(F, LI);
std::unique_ptr<BlockFrequencyInfo> ScopedBFI;
BlockFrequencyInfo *BFI;
if (!GetBFI) {
- ScopedBFI.reset(new BlockFrequencyInfo(F, BPI, LI));
+ ScopedBFI.reset(new BlockFrequencyInfo(F, BPI, LI));
BFI = ScopedBFI.get();
} else
- BFI = &(GetBFI(F));
+ BFI = &(GetBFI(F));
// Return if we don't have profiling information.
if (!PSI.hasInstrumentationProfile())
@@ -429,9 +429,9 @@ PartialInlinerImpl::computeOutliningColdRegionsInfo(
<< " has more than one region exit edge.";
});
return nullptr;
- }
-
- ExitBlock = Block;
+ }
+
+ ExitBlock = Block;
}
}
}
@@ -446,14 +446,14 @@ PartialInlinerImpl::computeOutliningColdRegionsInfo(
// Use the same computeBBInlineCost function to compute the cost savings of
// the outlining the candidate region.
- TargetTransformInfo *FTTI = &GetTTI(F);
+ TargetTransformInfo *FTTI = &GetTTI(F);
int OverallFunctionCost = 0;
- for (auto &BB : F)
- OverallFunctionCost += computeBBInlineCost(&BB, FTTI);
-
- LLVM_DEBUG(dbgs() << "OverallFunctionCost = " << OverallFunctionCost
- << "\n";);
+ for (auto &BB : F)
+ OverallFunctionCost += computeBBInlineCost(&BB, FTTI);
+ LLVM_DEBUG(dbgs() << "OverallFunctionCost = " << OverallFunctionCost
+ << "\n";);
+
int MinOutlineRegionCost =
static_cast<int>(OverallFunctionCost * MinRegionSizeRatio);
BranchProbability MinBranchProbability(
@@ -465,7 +465,7 @@ PartialInlinerImpl::computeOutliningColdRegionsInfo(
DenseMap<BasicBlock *, bool> VisitedMap;
DFS.push_back(CurrEntry);
VisitedMap[CurrEntry] = true;
-
+
// Use Depth First Search on the basic blocks to find CFG edges that are
// considered cold.
// Cold regions considered must also have its inline cost compared to the
@@ -473,98 +473,98 @@ PartialInlinerImpl::computeOutliningColdRegionsInfo(
// if it reduced the inline cost of the function by 'MinOutlineRegionCost' or
// more.
while (!DFS.empty()) {
- auto *ThisBB = DFS.back();
+ auto *ThisBB = DFS.back();
DFS.pop_back();
// Only consider regions with predecessor blocks that are considered
// not-cold (default: part of the top 99.99% of all block counters)
// AND greater than our minimum block execution count (default: 100).
- if (PSI.isColdBlock(ThisBB, BFI) ||
- BBProfileCount(ThisBB) < MinBlockCounterExecution)
+ if (PSI.isColdBlock(ThisBB, BFI) ||
+ BBProfileCount(ThisBB) < MinBlockCounterExecution)
continue;
- for (auto SI = succ_begin(ThisBB); SI != succ_end(ThisBB); ++SI) {
+ for (auto SI = succ_begin(ThisBB); SI != succ_end(ThisBB); ++SI) {
if (VisitedMap[*SI])
continue;
VisitedMap[*SI] = true;
DFS.push_back(*SI);
// If branch isn't cold, we skip to the next one.
- BranchProbability SuccProb = BPI.getEdgeProbability(ThisBB, *SI);
+ BranchProbability SuccProb = BPI.getEdgeProbability(ThisBB, *SI);
if (SuccProb > MinBranchProbability)
continue;
-
- LLVM_DEBUG(dbgs() << "Found cold edge: " << ThisBB->getName() << "->"
- << SI->getName()
- << "\nBranch Probability = " << SuccProb << "\n";);
-
+
+ LLVM_DEBUG(dbgs() << "Found cold edge: " << ThisBB->getName() << "->"
+ << SI->getName()
+ << "\nBranch Probability = " << SuccProb << "\n";);
+
SmallVector<BasicBlock *, 8> DominateVector;
DT.getDescendants(*SI, DominateVector);
- assert(!DominateVector.empty() &&
- "SI should be reachable and have at least itself as descendant");
-
+ assert(!DominateVector.empty() &&
+ "SI should be reachable and have at least itself as descendant");
+
// We can only outline single entry regions (for now).
- if (!DominateVector.front()->hasNPredecessors(1)) {
- LLVM_DEBUG(dbgs() << "ABORT: Block " << SI->getName()
- << " doesn't have a single predecessor in the "
- "dominator tree\n";);
+ if (!DominateVector.front()->hasNPredecessors(1)) {
+ LLVM_DEBUG(dbgs() << "ABORT: Block " << SI->getName()
+ << " doesn't have a single predecessor in the "
+ "dominator tree\n";);
continue;
- }
-
+ }
+
BasicBlock *ExitBlock = nullptr;
// We can only outline single exit regions (for now).
- if (!(ExitBlock = IsSingleExit(DominateVector))) {
- LLVM_DEBUG(dbgs() << "ABORT: Block " << SI->getName()
- << " doesn't have a unique successor\n";);
+ if (!(ExitBlock = IsSingleExit(DominateVector))) {
+ LLVM_DEBUG(dbgs() << "ABORT: Block " << SI->getName()
+ << " doesn't have a unique successor\n";);
continue;
- }
-
+ }
+
int OutlineRegionCost = 0;
for (auto *BB : DominateVector)
- OutlineRegionCost += computeBBInlineCost(BB, &GetTTI(*BB->getParent()));
+ OutlineRegionCost += computeBBInlineCost(BB, &GetTTI(*BB->getParent()));
- LLVM_DEBUG(dbgs() << "OutlineRegionCost = " << OutlineRegionCost
- << "\n";);
+ LLVM_DEBUG(dbgs() << "OutlineRegionCost = " << OutlineRegionCost
+ << "\n";);
- if (!SkipCostAnalysis && OutlineRegionCost < MinOutlineRegionCost) {
+ if (!SkipCostAnalysis && OutlineRegionCost < MinOutlineRegionCost) {
ORE.emit([&]() {
return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly",
&SI->front())
- << ore::NV("Callee", &F)
- << " inline cost-savings smaller than "
+ << ore::NV("Callee", &F)
+ << " inline cost-savings smaller than "
<< ore::NV("Cost", MinOutlineRegionCost);
});
-
- LLVM_DEBUG(dbgs() << "ABORT: Outline region cost is smaller than "
- << MinOutlineRegionCost << "\n";);
+
+ LLVM_DEBUG(dbgs() << "ABORT: Outline region cost is smaller than "
+ << MinOutlineRegionCost << "\n";);
continue;
}
-
+
// For now, ignore blocks that belong to a SISE region that is a
// candidate for outlining. In the future, we may want to look
// at inner regions because the outer region may have live-exit
// variables.
for (auto *BB : DominateVector)
VisitedMap[BB] = true;
-
+
// ReturnBlock here means the block after the outline call
BasicBlock *ReturnBlock = ExitBlock->getSingleSuccessor();
FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegInfo(
DominateVector, DominateVector.front(), ExitBlock, ReturnBlock);
OutliningInfo->ORI.push_back(RegInfo);
- LLVM_DEBUG(dbgs() << "Found Cold Candidate starting at block: "
- << DominateVector.front()->getName() << "\n";);
+ LLVM_DEBUG(dbgs() << "Found Cold Candidate starting at block: "
+ << DominateVector.front()->getName() << "\n";);
ColdCandidateFound = true;
NumColdRegionsFound++;
}
}
-
+
if (ColdCandidateFound)
return OutliningInfo;
-
- return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
+
+ return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
}
std::unique_ptr<FunctionOutliningInfo>
-PartialInlinerImpl::computeOutliningInfo(Function &F) const {
- BasicBlock *EntryBlock = &F.front();
+PartialInlinerImpl::computeOutliningInfo(Function &F) const {
+ BasicBlock *EntryBlock = &F.front();
BranchInst *BR = dyn_cast<BranchInst>(EntryBlock->getTerminator());
if (!BR || BR->isUnconditional())
return std::unique_ptr<FunctionOutliningInfo>();
@@ -607,7 +607,7 @@ PartialInlinerImpl::computeOutliningInfo(Function &F) const {
// The number of blocks to be inlined has already reached
// the limit. When MaxNumInlineBlocks is set to 0 or 1, this
// disables partial inlining for the function.
- if (OutliningInfo->getNumInlinedBlocks() >= MaxNumInlineBlocks)
+ if (OutliningInfo->getNumInlinedBlocks() >= MaxNumInlineBlocks)
break;
if (succ_size(CurrEntry) != 2)
@@ -627,7 +627,7 @@ PartialInlinerImpl::computeOutliningInfo(Function &F) const {
break;
}
- BasicBlock *CommSucc, *OtherSucc;
+ BasicBlock *CommSucc, *OtherSucc;
std::tie(CommSucc, OtherSucc) = GetCommonSucc(Succ1, Succ2);
if (!CommSucc)
@@ -643,7 +643,7 @@ PartialInlinerImpl::computeOutliningInfo(Function &F) const {
// Do sanity check of the entries: threre should not
// be any successors (not in the entry set) other than
// {ReturnBlock, NonReturnBlock}
- assert(OutliningInfo->Entries[0] == &F.front() &&
+ assert(OutliningInfo->Entries[0] == &F.front() &&
"Function Entry must be the first in Entries vector");
DenseSet<BasicBlock *> Entries;
for (BasicBlock *E : OutliningInfo->Entries)
@@ -652,7 +652,7 @@ PartialInlinerImpl::computeOutliningInfo(Function &F) const {
// Returns true of BB has Predecessor which is not
// in Entries set.
auto HasNonEntryPred = [Entries](BasicBlock *BB) {
- for (auto *Pred : predecessors(BB)) {
+ for (auto *Pred : predecessors(BB)) {
if (!Entries.count(Pred))
return true;
}
@@ -661,7 +661,7 @@ PartialInlinerImpl::computeOutliningInfo(Function &F) const {
auto CheckAndNormalizeCandidate =
[Entries, HasNonEntryPred](FunctionOutliningInfo *OutliningInfo) {
for (BasicBlock *E : OutliningInfo->Entries) {
- for (auto *Succ : successors(E)) {
+ for (auto *Succ : successors(E)) {
if (Entries.count(Succ))
continue;
if (Succ == OutliningInfo->ReturnBlock)
@@ -681,7 +681,7 @@ PartialInlinerImpl::computeOutliningInfo(Function &F) const {
// Now further growing the candidate's inlining region by
// peeling off dominating blocks from the outlining region:
- while (OutliningInfo->getNumInlinedBlocks() < MaxNumInlineBlocks) {
+ while (OutliningInfo->getNumInlinedBlocks() < MaxNumInlineBlocks) {
BasicBlock *Cand = OutliningInfo->NonReturnBlock;
if (succ_size(Cand) != 2)
break;
@@ -711,11 +711,11 @@ PartialInlinerImpl::computeOutliningInfo(Function &F) const {
}
// Check if there is PGO data or user annotated branch data:
-static bool hasProfileData(const Function &F, const FunctionOutliningInfo &OI) {
- if (F.hasProfileData())
+static bool hasProfileData(const Function &F, const FunctionOutliningInfo &OI) {
+ if (F.hasProfileData())
return true;
// Now check if any of the entry block has MD_prof data:
- for (auto *E : OI.Entries) {
+ for (auto *E : OI.Entries) {
BranchInst *BR = dyn_cast<BranchInst>(E->getTerminator());
if (!BR || BR->isUnconditional())
continue;
@@ -726,8 +726,8 @@ static bool hasProfileData(const Function &F, const FunctionOutliningInfo &OI) {
return false;
}
-BranchProbability PartialInlinerImpl::getOutliningCallBBRelativeFreq(
- FunctionCloner &Cloner) const {
+BranchProbability PartialInlinerImpl::getOutliningCallBBRelativeFreq(
+ FunctionCloner &Cloner) const {
BasicBlock *OutliningCallBB = Cloner.OutlinedFunctions.back().second;
auto EntryFreq =
Cloner.ClonedFuncBFI->getBlockFreq(&Cloner.ClonedFunc->getEntryBlock());
@@ -736,13 +736,13 @@ BranchProbability PartialInlinerImpl::getOutliningCallBBRelativeFreq(
// FIXME Hackery needed because ClonedFuncBFI is based on the function BEFORE
// we outlined any regions, so we may encounter situations where the
// OutliningCallFreq is *slightly* bigger than the EntryFreq.
- if (OutliningCallFreq.getFrequency() > EntryFreq.getFrequency())
+ if (OutliningCallFreq.getFrequency() > EntryFreq.getFrequency())
OutliningCallFreq = EntryFreq;
-
+
auto OutlineRegionRelFreq = BranchProbability::getBranchProbability(
OutliningCallFreq.getFrequency(), EntryFreq.getFrequency());
- if (hasProfileData(*Cloner.OrigFunc, *Cloner.ClonedOI.get()))
+ if (hasProfileData(*Cloner.OrigFunc, *Cloner.ClonedOI.get()))
return OutlineRegionRelFreq;
// When profile data is not available, we need to be conservative in
@@ -768,7 +768,7 @@ BranchProbability PartialInlinerImpl::getOutliningCallBBRelativeFreq(
bool PartialInlinerImpl::shouldPartialInline(
CallBase &CB, FunctionCloner &Cloner, BlockFrequency WeightedOutliningRcost,
- OptimizationRemarkEmitter &ORE) const {
+ OptimizationRemarkEmitter &ORE) const {
using namespace ore;
Function *Callee = CB.getCalledFunction();
@@ -851,8 +851,8 @@ bool PartialInlinerImpl::shouldPartialInline(
// TODO: Ideally we should share Inliner's InlineCost Analysis code.
// For now use a simplified version. The returned 'InlineCost' will be used
// to esimate the size cost as well as runtime cost of the BB.
-int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB,
- TargetTransformInfo *TTI) {
+int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB,
+ TargetTransformInfo *TTI) {
int InlineCost = 0;
const DataLayout &DL = BB->getParent()->getParent()->getDataLayout();
for (Instruction &I : BB->instructionsWithoutDebug()) {
@@ -875,21 +875,21 @@ int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB,
if (I.isLifetimeStartOrEnd())
continue;
- if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
- Intrinsic::ID IID = II->getIntrinsicID();
- SmallVector<Type *, 4> Tys;
- FastMathFlags FMF;
- for (Value *Val : II->args())
- Tys.push_back(Val->getType());
-
- if (auto *FPMO = dyn_cast<FPMathOperator>(II))
- FMF = FPMO->getFastMathFlags();
-
- IntrinsicCostAttributes ICA(IID, II->getType(), Tys, FMF);
- InlineCost += TTI->getIntrinsicInstrCost(ICA, TTI::TCK_SizeAndLatency);
- continue;
- }
-
+ if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
+ Intrinsic::ID IID = II->getIntrinsicID();
+ SmallVector<Type *, 4> Tys;
+ FastMathFlags FMF;
+ for (Value *Val : II->args())
+ Tys.push_back(Val->getType());
+
+ if (auto *FPMO = dyn_cast<FPMathOperator>(II))
+ FMF = FPMO->getFastMathFlags();
+
+ IntrinsicCostAttributes ICA(IID, II->getType(), Tys, FMF);
+ InlineCost += TTI->getIntrinsicInstrCost(ICA, TTI::TCK_SizeAndLatency);
+ continue;
+ }
+
if (CallInst *CI = dyn_cast<CallInst>(&I)) {
InlineCost += getCallsiteCost(*CI, DL);
continue;
@@ -910,20 +910,20 @@ int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB,
}
std::tuple<int, int>
-PartialInlinerImpl::computeOutliningCosts(FunctionCloner &Cloner) const {
+PartialInlinerImpl::computeOutliningCosts(FunctionCloner &Cloner) const {
int OutliningFuncCallCost = 0, OutlinedFunctionCost = 0;
for (auto FuncBBPair : Cloner.OutlinedFunctions) {
Function *OutlinedFunc = FuncBBPair.first;
BasicBlock* OutliningCallBB = FuncBBPair.second;
// Now compute the cost of the call sequence to the outlined function
// 'OutlinedFunction' in BB 'OutliningCallBB':
- auto *OutlinedFuncTTI = &GetTTI(*OutlinedFunc);
- OutliningFuncCallCost +=
- computeBBInlineCost(OutliningCallBB, OutlinedFuncTTI);
+ auto *OutlinedFuncTTI = &GetTTI(*OutlinedFunc);
+ OutliningFuncCallCost +=
+ computeBBInlineCost(OutliningCallBB, OutlinedFuncTTI);
// Now compute the cost of the extracted/outlined function itself:
for (BasicBlock &BB : *OutlinedFunc)
- OutlinedFunctionCost += computeBBInlineCost(&BB, OutlinedFuncTTI);
+ OutlinedFunctionCost += computeBBInlineCost(&BB, OutlinedFuncTTI);
}
assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost &&
"Outlined function cost should be no less than the outlined region");
@@ -947,7 +947,7 @@ PartialInlinerImpl::computeOutliningCosts(FunctionCloner &Cloner) const {
// after the function is partially inlined into the callsite.
void PartialInlinerImpl::computeCallsiteToProfCountMap(
Function *DuplicateFunction,
- DenseMap<User *, uint64_t> &CallSiteToProfCountMap) const {
+ DenseMap<User *, uint64_t> &CallSiteToProfCountMap) const {
std::vector<User *> Users(DuplicateFunction->user_begin(),
DuplicateFunction->user_end());
Function *CurrentCaller = nullptr;
@@ -988,9 +988,9 @@ void PartialInlinerImpl::computeCallsiteToProfCountMap(
PartialInlinerImpl::FunctionCloner::FunctionCloner(
Function *F, FunctionOutliningInfo *OI, OptimizationRemarkEmitter &ORE,
- function_ref<AssumptionCache *(Function &)> LookupAC,
- function_ref<TargetTransformInfo &(Function &)> GetTTI)
- : OrigFunc(F), ORE(ORE), LookupAC(LookupAC), GetTTI(GetTTI) {
+ function_ref<AssumptionCache *(Function &)> LookupAC,
+ function_ref<TargetTransformInfo &(Function &)> GetTTI)
+ : OrigFunc(F), ORE(ORE), LookupAC(LookupAC), GetTTI(GetTTI) {
ClonedOI = std::make_unique<FunctionOutliningInfo>();
// Clone the function, so that we can hack away on it.
@@ -999,9 +999,9 @@ PartialInlinerImpl::FunctionCloner::FunctionCloner(
ClonedOI->ReturnBlock = cast<BasicBlock>(VMap[OI->ReturnBlock]);
ClonedOI->NonReturnBlock = cast<BasicBlock>(VMap[OI->NonReturnBlock]);
- for (BasicBlock *BB : OI->Entries)
+ for (BasicBlock *BB : OI->Entries)
ClonedOI->Entries.push_back(cast<BasicBlock>(VMap[BB]));
-
+
for (BasicBlock *E : OI->ReturnBlockPreds) {
BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
ClonedOI->ReturnBlockPreds.push_back(NewE);
@@ -1014,9 +1014,9 @@ PartialInlinerImpl::FunctionCloner::FunctionCloner(
PartialInlinerImpl::FunctionCloner::FunctionCloner(
Function *F, FunctionOutliningMultiRegionInfo *OI,
OptimizationRemarkEmitter &ORE,
- function_ref<AssumptionCache *(Function &)> LookupAC,
- function_ref<TargetTransformInfo &(Function &)> GetTTI)
- : OrigFunc(F), ORE(ORE), LookupAC(LookupAC), GetTTI(GetTTI) {
+ function_ref<AssumptionCache *(Function &)> LookupAC,
+ function_ref<TargetTransformInfo &(Function &)> GetTTI)
+ : OrigFunc(F), ORE(ORE), LookupAC(LookupAC), GetTTI(GetTTI) {
ClonedOMRI = std::make_unique<FunctionOutliningMultiRegionInfo>();
// Clone the function, so that we can hack away on it.
@@ -1028,9 +1028,9 @@ PartialInlinerImpl::FunctionCloner::FunctionCloner(
for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegionInfo :
OI->ORI) {
SmallVector<BasicBlock *, 8> Region;
- for (BasicBlock *BB : RegionInfo.Region)
+ for (BasicBlock *BB : RegionInfo.Region)
Region.push_back(cast<BasicBlock>(VMap[BB]));
-
+
BasicBlock *NewEntryBlock = cast<BasicBlock>(VMap[RegionInfo.EntryBlock]);
BasicBlock *NewExitBlock = cast<BasicBlock>(VMap[RegionInfo.ExitBlock]);
BasicBlock *NewReturnBlock = nullptr;
@@ -1045,8 +1045,8 @@ PartialInlinerImpl::FunctionCloner::FunctionCloner(
F->replaceAllUsesWith(ClonedFunc);
}
-void PartialInlinerImpl::FunctionCloner::normalizeReturnBlock() const {
- auto GetFirstPHI = [](BasicBlock *BB) {
+void PartialInlinerImpl::FunctionCloner::normalizeReturnBlock() const {
+ auto GetFirstPHI = [](BasicBlock *BB) {
BasicBlock::iterator I = BB->begin();
PHINode *FirstPhi = nullptr;
while (I != BB->end()) {
@@ -1072,7 +1072,7 @@ void PartialInlinerImpl::FunctionCloner::normalizeReturnBlock() const {
// of which will go outside.
BasicBlock *PreReturn = ClonedOI->ReturnBlock;
// only split block when necessary:
- PHINode *FirstPhi = GetFirstPHI(PreReturn);
+ PHINode *FirstPhi = GetFirstPHI(PreReturn);
unsigned NumPredsFromEntries = ClonedOI->ReturnBlockPreds.size();
if (!FirstPhi || FirstPhi->getNumIncomingValues() <= NumPredsFromEntries + 1)
@@ -1120,16 +1120,16 @@ void PartialInlinerImpl::FunctionCloner::normalizeReturnBlock() const {
for (auto *DP : DeadPhis)
DP->eraseFromParent();
- for (auto *E : ClonedOI->ReturnBlockPreds)
+ for (auto *E : ClonedOI->ReturnBlockPreds)
E->getTerminator()->replaceUsesOfWith(PreReturn, ClonedOI->ReturnBlock);
}
bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() {
- auto ComputeRegionCost = [&](SmallVectorImpl<BasicBlock *> &Region) {
+ auto ComputeRegionCost = [&](SmallVectorImpl<BasicBlock *> &Region) {
int Cost = 0;
for (BasicBlock* BB : Region)
- Cost += computeBBInlineCost(BB, &GetTTI(*BB->getParent()));
+ Cost += computeBBInlineCost(BB, &GetTTI(*BB->getParent()));
return Cost;
};
@@ -1162,21 +1162,21 @@ bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() {
CE.findInputsOutputs(Inputs, Outputs, Sinks);
- LLVM_DEBUG({
+ LLVM_DEBUG({
dbgs() << "inputs: " << Inputs.size() << "\n";
dbgs() << "outputs: " << Outputs.size() << "\n";
for (Value *value : Inputs)
dbgs() << "value used in func: " << *value << "\n";
for (Value *output : Outputs)
dbgs() << "instr used in func: " << *output << "\n";
- });
-
+ });
+
// Do not extract regions that have live exit variables.
if (Outputs.size() > 0 && !ForceLiveExit)
continue;
- if (Function *OutlinedFunc = CE.extractCodeRegion(CEAC)) {
- CallBase *OCS = PartialInlinerImpl::getOneCallSiteTo(*OutlinedFunc);
+ if (Function *OutlinedFunc = CE.extractCodeRegion(CEAC)) {
+ CallBase *OCS = PartialInlinerImpl::getOneCallSiteTo(*OutlinedFunc);
BasicBlock *OutliningCallBB = OCS->getParent();
assert(OutliningCallBB->getParent() == ClonedFunc);
OutlinedFunctions.push_back(std::make_pair(OutlinedFunc,OutliningCallBB));
@@ -1205,7 +1205,7 @@ PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining() {
// (i.e. not to be extracted to the out of line function)
auto ToBeInlined = [&, this](BasicBlock *BB) {
return BB == ClonedOI->ReturnBlock ||
- llvm::is_contained(ClonedOI->Entries, BB);
+ llvm::is_contained(ClonedOI->Entries, BB);
};
assert(ClonedOI && "Expecting OutlineInfo for single region outline");
@@ -1220,10 +1220,10 @@ PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining() {
// Gather up the blocks that we're going to extract.
std::vector<BasicBlock *> ToExtract;
- auto *ClonedFuncTTI = &GetTTI(*ClonedFunc);
+ auto *ClonedFuncTTI = &GetTTI(*ClonedFunc);
ToExtract.push_back(ClonedOI->NonReturnBlock);
- OutlinedRegionCost += PartialInlinerImpl::computeBBInlineCost(
- ClonedOI->NonReturnBlock, ClonedFuncTTI);
+ OutlinedRegionCost += PartialInlinerImpl::computeBBInlineCost(
+ ClonedOI->NonReturnBlock, ClonedFuncTTI);
for (BasicBlock &BB : *ClonedFunc)
if (!ToBeInlined(&BB) && &BB != ClonedOI->NonReturnBlock) {
ToExtract.push_back(&BB);
@@ -1231,7 +1231,7 @@ PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining() {
// into the outlined function which may make the outlining
// overhead (the difference of the outlined function cost
// and OutliningRegionCost) look larger.
- OutlinedRegionCost += computeBBInlineCost(&BB, ClonedFuncTTI);
+ OutlinedRegionCost += computeBBInlineCost(&BB, ClonedFuncTTI);
}
// Extract the body of the if.
@@ -1244,7 +1244,7 @@ PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining() {
if (OutlinedFunc) {
BasicBlock *OutliningCallBB =
- PartialInlinerImpl::getOneCallSiteTo(*OutlinedFunc)->getParent();
+ PartialInlinerImpl::getOneCallSiteTo(*OutlinedFunc)->getParent();
assert(OutliningCallBB->getParent() == ClonedFunc);
OutlinedFunctions.push_back(std::make_pair(OutlinedFunc, OutliningCallBB));
} else
@@ -1273,48 +1273,48 @@ PartialInlinerImpl::FunctionCloner::~FunctionCloner() {
}
}
-std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(Function &F) {
- if (F.hasAddressTaken())
+std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(Function &F) {
+ if (F.hasAddressTaken())
return {false, nullptr};
// Let inliner handle it
- if (F.hasFnAttribute(Attribute::AlwaysInline))
+ if (F.hasFnAttribute(Attribute::AlwaysInline))
return {false, nullptr};
- if (F.hasFnAttribute(Attribute::NoInline))
+ if (F.hasFnAttribute(Attribute::NoInline))
return {false, nullptr};
- if (PSI.isFunctionEntryCold(&F))
+ if (PSI.isFunctionEntryCold(&F))
return {false, nullptr};
- if (F.users().empty())
+ if (F.users().empty())
return {false, nullptr};
- OptimizationRemarkEmitter ORE(&F);
+ OptimizationRemarkEmitter ORE(&F);
// Only try to outline cold regions if we have a profile summary, which
// implies we have profiling information.
- if (PSI.hasProfileSummary() && F.hasProfileData() &&
+ if (PSI.hasProfileSummary() && F.hasProfileData() &&
!DisableMultiRegionPartialInline) {
std::unique_ptr<FunctionOutliningMultiRegionInfo> OMRI =
computeOutliningColdRegionsInfo(F, ORE);
if (OMRI) {
- FunctionCloner Cloner(&F, OMRI.get(), ORE, LookupAssumptionCache, GetTTI);
+ FunctionCloner Cloner(&F, OMRI.get(), ORE, LookupAssumptionCache, GetTTI);
- LLVM_DEBUG({
+ LLVM_DEBUG({
dbgs() << "HotCountThreshold = " << PSI.getHotCountThreshold() << "\n";
dbgs() << "ColdCountThreshold = " << PSI.getColdCountThreshold()
<< "\n";
- });
-
+ });
+
bool DidOutline = Cloner.doMultiRegionFunctionOutlining();
if (DidOutline) {
- LLVM_DEBUG({
+ LLVM_DEBUG({
dbgs() << ">>>>>> Outlined (Cloned) Function >>>>>>\n";
Cloner.ClonedFunc->print(dbgs());
dbgs() << "<<<<<< Outlined (Cloned) Function <<<<<<\n";
- });
+ });
if (tryPartialInline(Cloner))
return {true, nullptr};
@@ -1329,15 +1329,15 @@ std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(Function &F) {
if (!OI)
return {false, nullptr};
- FunctionCloner Cloner(&F, OI.get(), ORE, LookupAssumptionCache, GetTTI);
- Cloner.normalizeReturnBlock();
+ FunctionCloner Cloner(&F, OI.get(), ORE, LookupAssumptionCache, GetTTI);
+ Cloner.normalizeReturnBlock();
Function *OutlinedFunction = Cloner.doSingleRegionFunctionOutlining();
if (!OutlinedFunction)
return {false, nullptr};
- if (tryPartialInline(Cloner))
+ if (tryPartialInline(Cloner))
return {true, OutlinedFunction};
return {false, nullptr};
@@ -1355,9 +1355,9 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
// Only calculate RelativeToEntryFreq when we are doing single region
// outlining.
BranchProbability RelativeToEntryFreq;
- if (Cloner.ClonedOI)
+ if (Cloner.ClonedOI)
RelativeToEntryFreq = getOutliningCallBBRelativeFreq(Cloner);
- else
+ else
// RelativeToEntryFreq doesn't make sense when we have more than one
// outlined call because each call will have a different relative frequency
// to the entry block. We can consider using the average, but the
@@ -1375,7 +1375,7 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc);
DebugLoc DLoc;
BasicBlock *Block;
- std::tie(DLoc, Block) = getOneDebugLoc(*Cloner.ClonedFunc);
+ std::tie(DLoc, Block) = getOneDebugLoc(*Cloner.ClonedFunc);
OrigFuncORE.emit([&]() {
return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutlineRegionTooSmall",
DLoc, Block)
@@ -1406,7 +1406,7 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
for (User *User : Users) {
CallBase *CB = getSupportedCallBase(User);
- if (isLimitReached())
+ if (isLimitReached())
continue;
OptimizationRemarkEmitter CallerORE(CB->getCaller());
@@ -1488,7 +1488,7 @@ bool PartialInlinerImpl::run(Module &M) {
if (Recursive)
continue;
- std::pair<bool, Function *> Result = unswitchFunction(*CurrFunc);
+ std::pair<bool, Function *> Result = unswitchFunction(*CurrFunc);
if (Result.second)
Worklist.push_back(Result.second);
Changed |= Result.first;
diff --git a/contrib/libs/llvm12/lib/Transforms/IPO/PassManagerBuilder.cpp b/contrib/libs/llvm12/lib/Transforms/IPO/PassManagerBuilder.cpp
index 068328391d..2d8f1e0a20 100644
--- a/contrib/libs/llvm12/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -51,16 +51,16 @@
using namespace llvm;
-cl::opt<bool> RunPartialInlining("enable-partial-inlining", cl::init(false),
- cl::Hidden, cl::ZeroOrMore,
- cl::desc("Run Partial inlinining pass"));
+cl::opt<bool> RunPartialInlining("enable-partial-inlining", cl::init(false),
+ cl::Hidden, cl::ZeroOrMore,
+ cl::desc("Run Partial inlinining pass"));
static cl::opt<bool>
UseGVNAfterVectorization("use-gvn-after-vectorization",
cl::init(false), cl::Hidden,
cl::desc("Run GVN instead of Early CSE after vectorization passes"));
-cl::opt<bool> ExtraVectorizerPasses(
+cl::opt<bool> ExtraVectorizerPasses(
"extra-vectorizer-passes", cl::init(false), cl::Hidden,
cl::desc("Run cleanup optimization passes after vectorization."));
@@ -68,34 +68,34 @@ static cl::opt<bool>
RunLoopRerolling("reroll-loops", cl::Hidden,
cl::desc("Run the loop rerolling pass"));
-cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden,
- cl::desc("Run the NewGVN pass"));
+cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden,
+ cl::desc("Run the NewGVN pass"));
// Experimental option to use CFL-AA
enum class CFLAAType { None, Steensgaard, Andersen, Both };
-static cl::opt<::CFLAAType>
- UseCFLAA("use-cfl-aa", cl::init(::CFLAAType::None), cl::Hidden,
+static cl::opt<::CFLAAType>
+ UseCFLAA("use-cfl-aa", cl::init(::CFLAAType::None), cl::Hidden,
cl::desc("Enable the new, experimental CFL alias analysis"),
- cl::values(clEnumValN(::CFLAAType::None, "none", "Disable CFL-AA"),
- clEnumValN(::CFLAAType::Steensgaard, "steens",
+ cl::values(clEnumValN(::CFLAAType::None, "none", "Disable CFL-AA"),
+ clEnumValN(::CFLAAType::Steensgaard, "steens",
"Enable unification-based CFL-AA"),
- clEnumValN(::CFLAAType::Andersen, "anders",
+ clEnumValN(::CFLAAType::Andersen, "anders",
"Enable inclusion-based CFL-AA"),
- clEnumValN(::CFLAAType::Both, "both",
+ clEnumValN(::CFLAAType::Both, "both",
"Enable both variants of CFL-AA")));
static cl::opt<bool> EnableLoopInterchange(
"enable-loopinterchange", cl::init(false), cl::Hidden,
cl::desc("Enable the new, experimental LoopInterchange Pass"));
-cl::opt<bool> EnableUnrollAndJam("enable-unroll-and-jam", cl::init(false),
- cl::Hidden,
- cl::desc("Enable Unroll And Jam Pass"));
-
-cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(false),
- cl::Hidden,
- cl::desc("Enable the LoopFlatten Pass"));
+cl::opt<bool> EnableUnrollAndJam("enable-unroll-and-jam", cl::init(false),
+ cl::Hidden,
+ cl::desc("Enable Unroll And Jam Pass"));
+cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(false),
+ cl::Hidden,
+ cl::desc("Enable the LoopFlatten Pass"));
+
static cl::opt<bool>
EnablePrepareForThinLTO("prepare-for-thinlto", cl::init(false), cl::Hidden,
cl::desc("Enable preparation for ThinLTO."));
@@ -107,25 +107,25 @@ static cl::opt<bool>
cl::opt<bool> EnableHotColdSplit("hot-cold-split", cl::init(false),
cl::ZeroOrMore, cl::desc("Enable hot-cold splitting pass"));
-cl::opt<bool> EnableIROutliner("ir-outliner", cl::init(false), cl::Hidden,
- cl::desc("Enable ir outliner pass"));
-
+cl::opt<bool> EnableIROutliner("ir-outliner", cl::init(false), cl::Hidden,
+ cl::desc("Enable ir outliner pass"));
+
static cl::opt<bool> UseLoopVersioningLICM(
"enable-loop-versioning-licm", cl::init(false), cl::Hidden,
cl::desc("Enable the experimental Loop Versioning LICM pass"));
-cl::opt<bool>
+cl::opt<bool>
DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden,
cl::desc("Disable pre-instrumentation inliner"));
-cl::opt<int> PreInlineThreshold(
+cl::opt<int> PreInlineThreshold(
"preinline-threshold", cl::Hidden, cl::init(75), cl::ZeroOrMore,
cl::desc("Control the amount of inlining in pre-instrumentation inliner "
"(default = 75)"));
-cl::opt<bool>
- EnableGVNHoist("enable-gvn-hoist", cl::init(false), cl::ZeroOrMore,
- cl::desc("Enable the GVN hoisting pass (default = off)"));
+cl::opt<bool>
+ EnableGVNHoist("enable-gvn-hoist", cl::init(false), cl::ZeroOrMore,
+ cl::desc("Enable the GVN hoisting pass (default = off)"));
static cl::opt<bool>
DisableLibCallsShrinkWrap("disable-libcalls-shrinkwrap", cl::init(false),
@@ -137,13 +137,13 @@ static cl::opt<bool> EnableSimpleLoopUnswitch(
cl::desc("Enable the simple loop unswitch pass. Also enables independent "
"cleanup passes integrated into the loop pass manager pipeline."));
-cl::opt<bool>
- EnableGVNSink("enable-gvn-sink", cl::init(false), cl::ZeroOrMore,
- cl::desc("Enable the GVN sinking pass (default = off)"));
+cl::opt<bool>
+ EnableGVNSink("enable-gvn-sink", cl::init(false), cl::ZeroOrMore,
+ cl::desc("Enable the GVN sinking pass (default = off)"));
// This option is used in simplifying testing SampleFDO optimizations for
// profile loading.
-cl::opt<bool>
+cl::opt<bool>
EnableCHR("enable-chr", cl::init(true), cl::Hidden,
cl::desc("Enable control height reduction optimization (CHR)"));
@@ -156,15 +156,15 @@ cl::opt<bool> EnableOrderFileInstrumentation(
"enable-order-file-instrumentation", cl::init(false), cl::Hidden,
cl::desc("Enable order file instrumentation (default = off)"));
-cl::opt<bool> EnableMatrix(
- "enable-matrix", cl::init(false), cl::Hidden,
- cl::desc("Enable lowering of the matrix intrinsics"));
-
-cl::opt<bool> EnableConstraintElimination(
- "enable-constraint-elimination", cl::init(false), cl::Hidden,
- cl::desc(
- "Enable pass to eliminate conditions based on linear constraints."));
+cl::opt<bool> EnableMatrix(
+ "enable-matrix", cl::init(false), cl::Hidden,
+ cl::desc("Enable lowering of the matrix intrinsics"));
+cl::opt<bool> EnableConstraintElimination(
+ "enable-constraint-elimination", cl::init(false), cl::Hidden,
+ cl::desc(
+ "Enable pass to eliminate conditions based on linear constraints."));
+
cl::opt<AttributorRunOption> AttributorRun(
"attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE),
cl::desc("Enable the attributor inter-procedural deduction pass."),
@@ -276,13 +276,13 @@ void PassManagerBuilder::addExtensionsToPM(ExtensionPointTy ETy,
void PassManagerBuilder::addInitialAliasAnalysisPasses(
legacy::PassManagerBase &PM) const {
switch (UseCFLAA) {
- case ::CFLAAType::Steensgaard:
+ case ::CFLAAType::Steensgaard:
PM.add(createCFLSteensAAWrapperPass());
break;
- case ::CFLAAType::Andersen:
+ case ::CFLAAType::Andersen:
PM.add(createCFLAndersAAWrapperPass());
break;
- case ::CFLAAType::Both:
+ case ::CFLAAType::Both:
PM.add(createCFLSteensAAWrapperPass());
PM.add(createCFLAndersAAWrapperPass());
break;
@@ -306,13 +306,13 @@ void PassManagerBuilder::populateFunctionPassManager(
if (LibraryInfo)
FPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo));
- // The backends do not handle matrix intrinsics currently.
- // Make sure they are also lowered in O0.
- // FIXME: A lightweight version of the pass should run in the backend
- // pipeline on demand.
- if (EnableMatrix && OptLevel == 0)
- FPM.add(createLowerMatrixIntrinsicsMinimalPass());
-
+ // The backends do not handle matrix intrinsics currently.
+ // Make sure they are also lowered in O0.
+ // FIXME: A lightweight version of the pass should run in the backend
+ // pipeline on demand.
+ if (EnableMatrix && OptLevel == 0)
+ FPM.add(createLowerMatrixIntrinsicsMinimalPass());
+
if (OptLevel == 0) return;
addInitialAliasAnalysisPasses(FPM);
@@ -334,20 +334,20 @@ void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM,
// Perform the preinline and cleanup passes for O1 and above.
// We will not do this inline for context sensitive PGO (when IsCS is true).
- if (OptLevel > 0 && !DisablePreInliner && PGOSampleUse.empty() && !IsCS) {
+ if (OptLevel > 0 && !DisablePreInliner && PGOSampleUse.empty() && !IsCS) {
// Create preinline pass. We construct an InlineParams object and specify
// the threshold here to avoid the command line options of the regular
// inliner to influence pre-inlining. The only fields of InlineParams we
// care about are DefaultThreshold and HintThreshold.
InlineParams IP;
IP.DefaultThreshold = PreInlineThreshold;
- // FIXME: The hint threshold has the same value used by the regular inliner
- // when not optimzing for size. This should probably be lowered after
- // performance testing.
- // Use PreInlineThreshold for both -Os and -Oz. Not running preinliner makes
- // the instrumented binary unusably large. Even if PreInlineThreshold is not
- // correct thresold for -Oz, it is better than not running preinliner.
- IP.HintThreshold = SizeLevel > 0 ? PreInlineThreshold : 325;
+ // FIXME: The hint threshold has the same value used by the regular inliner
+ // when not optimzing for size. This should probably be lowered after
+ // performance testing.
+ // Use PreInlineThreshold for both -Os and -Oz. Not running preinliner makes
+ // the instrumented binary unusably large. Even if PreInlineThreshold is not
+ // correct thresold for -Oz, it is better than not running preinliner.
+ IP.HintThreshold = SizeLevel > 0 ? PreInlineThreshold : 325;
MPM.add(createFunctionInliningPass(IP));
MPM.add(createSROAPass());
@@ -395,9 +395,9 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
}
}
- if (EnableConstraintElimination)
- MPM.add(createConstraintEliminationPass());
-
+ if (EnableConstraintElimination)
+ MPM.add(createConstraintEliminationPass());
+
if (OptLevel > 1) {
// Speculative execution if the target has divergent branches; otherwise nop.
MPM.add(createSpeculativeExecutionIfHasBranchDivergencePass());
@@ -433,7 +433,7 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
MPM.add(createLoopSimplifyCFGPass());
}
// Rotate Loop - disable header duplication at -Oz
- MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1, PrepareForLTO));
+ MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1, PrepareForLTO));
// TODO: Investigate promotion cap for O1.
MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
if (EnableSimpleLoopUnswitch)
@@ -446,11 +446,11 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
MPM.add(createCFGSimplificationPass());
MPM.add(createInstructionCombiningPass());
// We resume loop passes creating a second loop pipeline here.
- if (EnableLoopFlatten) {
- MPM.add(createLoopFlattenPass()); // Flatten loops
- MPM.add(createLoopSimplifyCFGPass());
- }
- MPM.add(createLoopIdiomPass()); // Recognize idioms like memset.
+ if (EnableLoopFlatten) {
+ MPM.add(createLoopFlattenPass()); // Flatten loops
+ MPM.add(createLoopSimplifyCFGPass());
+ }
+ MPM.add(createLoopIdiomPass()); // Recognize idioms like memset.
MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars
addExtensionsToPM(EP_LateLoopOptimizations, MPM);
MPM.add(createLoopDeletionPass()); // Delete dead loops
@@ -458,15 +458,15 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
if (EnableLoopInterchange)
MPM.add(createLoopInterchangePass()); // Interchange loops
- // Unroll small loops and perform peeling.
+ // Unroll small loops and perform peeling.
MPM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops,
ForgetAllSCEVInLoopUnroll));
addExtensionsToPM(EP_LoopOptimizerEnd, MPM);
// This ends the loop pass pipelines.
- // Break up allocas that may now be splittable after loop unrolling.
- MPM.add(createSROAPass());
-
+ // Break up allocas that may now be splittable after loop unrolling.
+ MPM.add(createSROAPass());
+
if (OptLevel > 1) {
MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds
MPM.add(NewGVN ? createNewGVNPass()
@@ -475,9 +475,9 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset
MPM.add(createSCCPPass()); // Constant prop with SCCP
- if (EnableConstraintElimination)
- MPM.add(createConstraintEliminationPass());
-
+ if (EnableConstraintElimination)
+ MPM.add(createConstraintEliminationPass());
+
// Delete dead bit computations (instcombine runs after to fold away the dead
// computations, and then ADCE will run later to exploit any new DCE
// opportunities that creates).
@@ -490,11 +490,11 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
if (OptLevel > 1) {
MPM.add(createJumpThreadingPass()); // Thread jumps
MPM.add(createCorrelatedValuePropagationPass());
- }
- MPM.add(createAggressiveDCEPass()); // Delete dead instructions
-
- // TODO: Investigate if this is too expensive at O1.
- if (OptLevel > 1) {
+ }
+ MPM.add(createAggressiveDCEPass()); // Delete dead instructions
+
+ // TODO: Investigate if this is too expensive at O1.
+ if (OptLevel > 1) {
MPM.add(createDeadStoreEliminationPass()); // Delete dead stores
MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
}
@@ -520,8 +520,8 @@ void PassManagerBuilder::populateModulePassManager(
// is handled separately, so just check this is not the ThinLTO post-link.
bool DefaultOrPreLinkPipeline = !PerformThinLTO;
- MPM.add(createAnnotation2MetadataLegacyPass());
-
+ MPM.add(createAnnotation2MetadataLegacyPass());
+
if (!PGOSampleUse.empty()) {
MPM.add(createPruneEHPass());
// In ThinLTO mode, when flattened profile is used, all the available
@@ -572,8 +572,8 @@ void PassManagerBuilder::populateModulePassManager(
// new unnamed globals.
MPM.add(createNameAnonGlobalPass());
}
-
- MPM.add(createAnnotationRemarksLegacyPass());
+
+ MPM.add(createAnnotationRemarksLegacyPass());
return;
}
@@ -777,7 +777,7 @@ void PassManagerBuilder::populateModulePassManager(
// Re-rotate loops in all our loop nests. These may have fallout out of
// rotated form due to GVN or other transformations, and the vectorizer relies
// on the rotated form. Disable header duplication at -Oz.
- MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1, PrepareForLTO));
+ MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1, PrepareForLTO));
// Distribute loops to allow partial vectorization. I.e. isolate dependences
// into separate loop that would otherwise inhibit vectorization. This is
@@ -818,14 +818,14 @@ void PassManagerBuilder::populateModulePassManager(
// convert to more optimized IR using more aggressive simplify CFG options.
// The extra sinking transform can create larger basic blocks, so do this
// before SLP vectorization.
- // FIXME: study whether hoisting and/or sinking of common instructions should
- // be delayed until after SLP vectorizer.
- MPM.add(createCFGSimplificationPass(SimplifyCFGOptions()
- .forwardSwitchCondToPhi(true)
- .convertSwitchToLookupTable(true)
- .needCanonicalLoops(false)
- .hoistCommonInsts(true)
- .sinkCommonInsts(true)));
+ // FIXME: study whether hoisting and/or sinking of common instructions should
+ // be delayed until after SLP vectorizer.
+ MPM.add(createCFGSimplificationPass(SimplifyCFGOptions()
+ .forwardSwitchCondToPhi(true)
+ .convertSwitchToLookupTable(true)
+ .needCanonicalLoops(false)
+ .hoistCommonInsts(true)
+ .sinkCommonInsts(true)));
if (SLPVectorize) {
MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
@@ -883,9 +883,9 @@ void PassManagerBuilder::populateModulePassManager(
if (EnableHotColdSplit && !(PrepareForLTO || PrepareForThinLTO))
MPM.add(createHotColdSplittingPass());
- if (EnableIROutliner)
- MPM.add(createIROutlinerPass());
-
+ if (EnableIROutliner)
+ MPM.add(createIROutlinerPass());
+
if (MergeFunctions)
MPM.add(createMergeFunctionsPass());
@@ -917,8 +917,8 @@ void PassManagerBuilder::populateModulePassManager(
// Rename anon globals to be able to handle them in the summary
MPM.add(createNameAnonGlobalPass());
}
-
- MPM.add(createAnnotationRemarksLegacyPass());
+
+ MPM.add(createAnnotationRemarksLegacyPass());
}
void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
@@ -1037,7 +1037,7 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
// The IPO passes may leave cruft around. Clean up after them.
PM.add(createInstructionCombiningPass());
addExtensionsToPM(EP_Peephole, PM);
- PM.add(createJumpThreadingPass(/*FreezeSelectCond*/ true));
+ PM.add(createJumpThreadingPass(/*FreezeSelectCond*/ true));
// Break up allocas
PM.add(createSROAPass());
@@ -1059,23 +1059,23 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
// Nuke dead stores.
PM.add(createDeadStoreEliminationPass());
- PM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds.
+ PM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds.
// More loops are countable; try to optimize them.
- if (EnableLoopFlatten)
- PM.add(createLoopFlattenPass());
+ if (EnableLoopFlatten)
+ PM.add(createLoopFlattenPass());
PM.add(createIndVarSimplifyPass());
PM.add(createLoopDeletionPass());
if (EnableLoopInterchange)
PM.add(createLoopInterchangePass());
- if (EnableConstraintElimination)
- PM.add(createConstraintEliminationPass());
-
- // Unroll small loops and perform peeling.
+ if (EnableConstraintElimination)
+ PM.add(createConstraintEliminationPass());
+
+ // Unroll small loops and perform peeling.
PM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops,
ForgetAllSCEVInLoopUnroll));
- PM.add(createLoopDistributePass());
+ PM.add(createLoopDistributePass());
PM.add(createLoopVectorizePass(true, !LoopVectorize));
// The vectorizer may have significantly shortened a loop body; unroll again.
PM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops,
@@ -1087,8 +1087,8 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
// we may have exposed more scalar opportunities. Run parts of the scalar
// optimizer again at this point.
PM.add(createInstructionCombiningPass()); // Initial cleanup
- PM.add(createCFGSimplificationPass(SimplifyCFGOptions() // if-convert
- .hoistCommonInsts(true)));
+ PM.add(createCFGSimplificationPass(SimplifyCFGOptions() // if-convert
+ .hoistCommonInsts(true)));
PM.add(createSCCPPass()); // Propagate exposed constants
PM.add(createInstructionCombiningPass()); // Clean up again
PM.add(createBitTrackingDCEPass());
@@ -1107,7 +1107,7 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
PM.add(createInstructionCombiningPass());
addExtensionsToPM(EP_Peephole, PM);
- PM.add(createJumpThreadingPass(/*FreezeSelectCond*/ true));
+ PM.add(createJumpThreadingPass(/*FreezeSelectCond*/ true));
}
void PassManagerBuilder::addLateLTOOptimizationPasses(
@@ -1118,8 +1118,8 @@ void PassManagerBuilder::addLateLTOOptimizationPasses(
PM.add(createHotColdSplittingPass());
// Delete basic blocks, which optimization passes may have killed.
- PM.add(
- createCFGSimplificationPass(SimplifyCFGOptions().hoistCommonInsts(true)));
+ PM.add(
+ createCFGSimplificationPass(SimplifyCFGOptions().hoistCommonInsts(true)));
// Drop bodies of available externally objects to improve GlobalDCE.
PM.add(createEliminateAvailableExternallyPass());
@@ -1201,8 +1201,8 @@ void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) {
addExtensionsToPM(EP_FullLinkTimeOptimizationLast, PM);
- PM.add(createAnnotationRemarksLegacyPass());
-
+ PM.add(createAnnotationRemarksLegacyPass());
+
if (VerifyOutput)
PM.add(createVerifierPass());
}
diff --git a/contrib/libs/llvm12/lib/Transforms/IPO/PruneEH.cpp b/contrib/libs/llvm12/lib/Transforms/IPO/PruneEH.cpp
index 3f3b18771c..3143f3abfc 100644
--- a/contrib/libs/llvm12/lib/Transforms/IPO/PruneEH.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/IPO/PruneEH.cpp
@@ -13,7 +13,7 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/CallGraph.h"
@@ -28,10 +28,10 @@
#include "llvm/InitializePasses.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
-#include "llvm/Transforms/Utils/CallGraphUpdater.h"
+#include "llvm/Transforms/Utils/CallGraphUpdater.h"
#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
-
+
using namespace llvm;
#define DEBUG_TYPE "prune-eh"
@@ -50,8 +50,8 @@ namespace {
bool runOnSCC(CallGraphSCC &SCC) override;
};
}
-static bool SimplifyFunction(Function *F, CallGraphUpdater &CGU);
-static void DeleteBasicBlock(BasicBlock *BB, CallGraphUpdater &CGU);
+static bool SimplifyFunction(Function *F, CallGraphUpdater &CGU);
+static void DeleteBasicBlock(BasicBlock *BB, CallGraphUpdater &CGU);
char PruneEH::ID = 0;
INITIALIZE_PASS_BEGIN(PruneEH, "prune-eh",
@@ -62,17 +62,17 @@ INITIALIZE_PASS_END(PruneEH, "prune-eh",
Pass *llvm::createPruneEHPass() { return new PruneEH(); }
-static bool runImpl(CallGraphUpdater &CGU, SetVector<Function *> &Functions) {
-#ifndef NDEBUG
- for (auto *F : Functions)
- assert(F && "null Function");
-#endif
+static bool runImpl(CallGraphUpdater &CGU, SetVector<Function *> &Functions) {
+#ifndef NDEBUG
+ for (auto *F : Functions)
+ assert(F && "null Function");
+#endif
bool MadeChange = false;
// First pass, scan all of the functions in the SCC, simplifying them
// according to what we know.
- for (Function *F : Functions)
- MadeChange |= SimplifyFunction(F, CGU);
+ for (Function *F : Functions)
+ MadeChange |= SimplifyFunction(F, CGU);
// Next, check to see if any callees might throw or if there are any external
// functions in this SCC: if so, we cannot prune any functions in this SCC.
@@ -82,8 +82,8 @@ static bool runImpl(CallGraphUpdater &CGU, SetVector<Function *> &Functions) {
// obviously the SCC might throw.
//
bool SCCMightUnwind = false, SCCMightReturn = false;
- for (Function *F : Functions) {
- if (!F->hasExactDefinition()) {
+ for (Function *F : Functions) {
+ if (!F->hasExactDefinition()) {
SCCMightUnwind |= !F->doesNotThrow();
SCCMightReturn |= !F->doesNotReturn();
} else {
@@ -121,7 +121,7 @@ static bool runImpl(CallGraphUpdater &CGU, SetVector<Function *> &Functions) {
if (Function *Callee = CI->getCalledFunction()) {
// If the callee is outside our current SCC then we may throw
// because it might. If it is inside, do nothing.
- if (Functions.contains(Callee))
+ if (Functions.contains(Callee))
InstMightUnwind = false;
}
}
@@ -133,7 +133,7 @@ static bool runImpl(CallGraphUpdater &CGU, SetVector<Function *> &Functions) {
if (IA->hasSideEffects())
SCCMightReturn = true;
}
- }
+ }
if (SCCMightUnwind && SCCMightReturn)
break;
}
@@ -141,7 +141,7 @@ static bool runImpl(CallGraphUpdater &CGU, SetVector<Function *> &Functions) {
// If the SCC doesn't unwind or doesn't throw, note this fact.
if (!SCCMightUnwind || !SCCMightReturn)
- for (Function *F : Functions) {
+ for (Function *F : Functions) {
if (!SCCMightUnwind && !F->hasFnAttribute(Attribute::NoUnwind)) {
F->addFnAttr(Attribute::NoUnwind);
MadeChange = true;
@@ -153,11 +153,11 @@ static bool runImpl(CallGraphUpdater &CGU, SetVector<Function *> &Functions) {
}
}
- for (Function *F : Functions) {
+ for (Function *F : Functions) {
// Convert any invoke instructions to non-throwing functions in this node
// into call instructions with a branch. This makes the exception blocks
// dead.
- MadeChange |= SimplifyFunction(F, CGU);
+ MadeChange |= SimplifyFunction(F, CGU);
}
return MadeChange;
@@ -166,22 +166,22 @@ static bool runImpl(CallGraphUpdater &CGU, SetVector<Function *> &Functions) {
bool PruneEH::runOnSCC(CallGraphSCC &SCC) {
if (skipSCC(SCC))
return false;
- SetVector<Function *> Functions;
- for (auto &N : SCC) {
- if (auto *F = N->getFunction())
- Functions.insert(F);
- }
+ SetVector<Function *> Functions;
+ for (auto &N : SCC) {
+ if (auto *F = N->getFunction())
+ Functions.insert(F);
+ }
CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
- CallGraphUpdater CGU;
- CGU.initialize(CG, SCC);
- return runImpl(CGU, Functions);
+ CallGraphUpdater CGU;
+ CGU.initialize(CG, SCC);
+ return runImpl(CGU, Functions);
}
// SimplifyFunction - Given information about callees, simplify the specified
// function if we have invokes to non-unwinding functions or code after calls to
// no-return functions.
-static bool SimplifyFunction(Function *F, CallGraphUpdater &CGU) {
+static bool SimplifyFunction(Function *F, CallGraphUpdater &CGU) {
bool MadeChange = false;
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator()))
@@ -191,7 +191,7 @@ static bool SimplifyFunction(Function *F, CallGraphUpdater &CGU) {
// If the unwind block is now dead, nuke it.
if (pred_empty(UnwindBlock))
- DeleteBasicBlock(UnwindBlock, CGU); // Delete the new BB.
+ DeleteBasicBlock(UnwindBlock, CGU); // Delete the new BB.
++NumRemoved;
MadeChange = true;
@@ -211,7 +211,7 @@ static bool SimplifyFunction(Function *F, CallGraphUpdater &CGU) {
BB->getInstList().pop_back();
new UnreachableInst(BB->getContext(), &*BB);
- DeleteBasicBlock(New, CGU); // Delete the new BB.
+ DeleteBasicBlock(New, CGU); // Delete the new BB.
MadeChange = true;
++NumUnreach;
break;
@@ -224,7 +224,7 @@ static bool SimplifyFunction(Function *F, CallGraphUpdater &CGU) {
/// DeleteBasicBlock - remove the specified basic block from the program,
/// updating the callgraph to reflect any now-obsolete edges due to calls that
/// exist in the BB.
-static void DeleteBasicBlock(BasicBlock *BB, CallGraphUpdater &CGU) {
+static void DeleteBasicBlock(BasicBlock *BB, CallGraphUpdater &CGU) {
assert(pred_empty(BB) && "BB is not dead!");
Instruction *TokenInst = nullptr;
@@ -240,9 +240,9 @@ static void DeleteBasicBlock(BasicBlock *BB, CallGraphUpdater &CGU) {
if (auto *Call = dyn_cast<CallBase>(&*I)) {
const Function *Callee = Call->getCalledFunction();
if (!Callee || !Intrinsic::isLeaf(Callee->getIntrinsicID()))
- CGU.removeCallSite(*Call);
+ CGU.removeCallSite(*Call);
else if (!Callee->isIntrinsic())
- CGU.removeCallSite(*Call);
+ CGU.removeCallSite(*Call);
}
if (!I->use_empty())
diff --git a/contrib/libs/llvm12/lib/Transforms/IPO/SampleContextTracker.cpp b/contrib/libs/llvm12/lib/Transforms/IPO/SampleContextTracker.cpp
index 158fa0771c..37dcc0feae 100644
--- a/contrib/libs/llvm12/lib/Transforms/IPO/SampleContextTracker.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/IPO/SampleContextTracker.cpp
@@ -1,585 +1,585 @@
-//===- SampleContextTracker.cpp - Context-sensitive Profile Tracker -------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the SampleContextTracker used by CSSPGO.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/IPO/SampleContextTracker.h"
-#include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/IR/DebugInfoMetadata.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/ProfileData/SampleProf.h"
-#include <map>
-#include <queue>
-#include <vector>
-
-using namespace llvm;
-using namespace sampleprof;
-
-#define DEBUG_TYPE "sample-context-tracker"
-
-namespace llvm {
-
-ContextTrieNode *ContextTrieNode::getChildContext(const LineLocation &CallSite,
- StringRef CalleeName) {
- if (CalleeName.empty())
- return getHottestChildContext(CallSite);
-
- uint32_t Hash = nodeHash(CalleeName, CallSite);
- auto It = AllChildContext.find(Hash);
- if (It != AllChildContext.end())
- return &It->second;
- return nullptr;
-}
-
-ContextTrieNode *
-ContextTrieNode::getHottestChildContext(const LineLocation &CallSite) {
- // CSFDO-TODO: This could be slow, change AllChildContext so we can
- // do point look up for child node by call site alone.
- // Retrieve the child node with max count for indirect call
- ContextTrieNode *ChildNodeRet = nullptr;
- uint64_t MaxCalleeSamples = 0;
- for (auto &It : AllChildContext) {
- ContextTrieNode &ChildNode = It.second;
- if (ChildNode.CallSiteLoc != CallSite)
- continue;
- FunctionSamples *Samples = ChildNode.getFunctionSamples();
- if (!Samples)
- continue;
- if (Samples->getTotalSamples() > MaxCalleeSamples) {
- ChildNodeRet = &ChildNode;
- MaxCalleeSamples = Samples->getTotalSamples();
- }
- }
-
- return ChildNodeRet;
-}
-
-ContextTrieNode &ContextTrieNode::moveToChildContext(
- const LineLocation &CallSite, ContextTrieNode &&NodeToMove,
- StringRef ContextStrToRemove, bool DeleteNode) {
- uint32_t Hash = nodeHash(NodeToMove.getFuncName(), CallSite);
- assert(!AllChildContext.count(Hash) && "Node to remove must exist");
- LineLocation OldCallSite = NodeToMove.CallSiteLoc;
- ContextTrieNode &OldParentContext = *NodeToMove.getParentContext();
- AllChildContext[Hash] = NodeToMove;
- ContextTrieNode &NewNode = AllChildContext[Hash];
- NewNode.CallSiteLoc = CallSite;
-
- // Walk through nodes in the moved the subtree, and update
- // FunctionSamples' context as for the context promotion.
- // We also need to set new parant link for all children.
- std::queue<ContextTrieNode *> NodeToUpdate;
- NewNode.setParentContext(this);
- NodeToUpdate.push(&NewNode);
-
- while (!NodeToUpdate.empty()) {
- ContextTrieNode *Node = NodeToUpdate.front();
- NodeToUpdate.pop();
- FunctionSamples *FSamples = Node->getFunctionSamples();
-
- if (FSamples) {
- FSamples->getContext().promoteOnPath(ContextStrToRemove);
- FSamples->getContext().setState(SyntheticContext);
- LLVM_DEBUG(dbgs() << " Context promoted to: " << FSamples->getContext()
- << "\n");
- }
-
- for (auto &It : Node->getAllChildContext()) {
- ContextTrieNode *ChildNode = &It.second;
- ChildNode->setParentContext(Node);
- NodeToUpdate.push(ChildNode);
- }
- }
-
- // Original context no longer needed, destroy if requested.
- if (DeleteNode)
- OldParentContext.removeChildContext(OldCallSite, NewNode.getFuncName());
-
- return NewNode;
-}
-
-void ContextTrieNode::removeChildContext(const LineLocation &CallSite,
- StringRef CalleeName) {
- uint32_t Hash = nodeHash(CalleeName, CallSite);
- // Note this essentially calls dtor and destroys that child context
- AllChildContext.erase(Hash);
-}
-
-std::map<uint32_t, ContextTrieNode> &ContextTrieNode::getAllChildContext() {
- return AllChildContext;
-}
-
-const StringRef ContextTrieNode::getFuncName() const { return FuncName; }
-
-FunctionSamples *ContextTrieNode::getFunctionSamples() const {
- return FuncSamples;
-}
-
-void ContextTrieNode::setFunctionSamples(FunctionSamples *FSamples) {
- FuncSamples = FSamples;
-}
-
-LineLocation ContextTrieNode::getCallSiteLoc() const { return CallSiteLoc; }
-
-ContextTrieNode *ContextTrieNode::getParentContext() const {
- return ParentContext;
-}
-
-void ContextTrieNode::setParentContext(ContextTrieNode *Parent) {
- ParentContext = Parent;
-}
-
-void ContextTrieNode::dump() {
- dbgs() << "Node: " << FuncName << "\n"
- << " Callsite: " << CallSiteLoc << "\n"
- << " Children:\n";
-
- for (auto &It : AllChildContext) {
- dbgs() << " Node: " << It.second.getFuncName() << "\n";
- }
-}
-
-uint32_t ContextTrieNode::nodeHash(StringRef ChildName,
- const LineLocation &Callsite) {
- // We still use child's name for child hash, this is
- // because for children of root node, we don't have
- // different line/discriminator, and we'll rely on name
- // to differentiate children.
- uint32_t NameHash = std::hash<std::string>{}(ChildName.str());
- uint32_t LocId = (Callsite.LineOffset << 16) | Callsite.Discriminator;
- return NameHash + (LocId << 5) + LocId;
-}
-
-ContextTrieNode *ContextTrieNode::getOrCreateChildContext(
- const LineLocation &CallSite, StringRef CalleeName, bool AllowCreate) {
- uint32_t Hash = nodeHash(CalleeName, CallSite);
- auto It = AllChildContext.find(Hash);
- if (It != AllChildContext.end()) {
- assert(It->second.getFuncName() == CalleeName &&
- "Hash collision for child context node");
- return &It->second;
- }
-
- if (!AllowCreate)
- return nullptr;
-
- AllChildContext[Hash] = ContextTrieNode(this, CalleeName, nullptr, CallSite);
- return &AllChildContext[Hash];
-}
-
-// Profiler tracker than manages profiles and its associated context
-SampleContextTracker::SampleContextTracker(
- StringMap<FunctionSamples> &Profiles) {
- for (auto &FuncSample : Profiles) {
- FunctionSamples *FSamples = &FuncSample.second;
- SampleContext Context(FuncSample.first(), RawContext);
- LLVM_DEBUG(dbgs() << "Tracking Context for function: " << Context << "\n");
- if (!Context.isBaseContext())
- FuncToCtxtProfileSet[Context.getNameWithoutContext()].insert(FSamples);
- ContextTrieNode *NewNode = getOrCreateContextPath(Context, true);
- assert(!NewNode->getFunctionSamples() &&
- "New node can't have sample profile");
- NewNode->setFunctionSamples(FSamples);
- }
-}
-
-FunctionSamples *
-SampleContextTracker::getCalleeContextSamplesFor(const CallBase &Inst,
- StringRef CalleeName) {
- LLVM_DEBUG(dbgs() << "Getting callee context for instr: " << Inst << "\n");
- DILocation *DIL = Inst.getDebugLoc();
- if (!DIL)
- return nullptr;
-
- // For indirect call, CalleeName will be empty, in which case the context
- // profile for callee with largest total samples will be returned.
- ContextTrieNode *CalleeContext = getCalleeContextFor(DIL, CalleeName);
- if (CalleeContext) {
- FunctionSamples *FSamples = CalleeContext->getFunctionSamples();
- LLVM_DEBUG(if (FSamples) {
- dbgs() << " Callee context found: " << FSamples->getContext() << "\n";
- });
- return FSamples;
- }
-
- return nullptr;
-}
-
-std::vector<const FunctionSamples *>
-SampleContextTracker::getIndirectCalleeContextSamplesFor(
- const DILocation *DIL) {
- std::vector<const FunctionSamples *> R;
- if (!DIL)
- return R;
-
- ContextTrieNode *CallerNode = getContextFor(DIL);
- LineLocation CallSite = FunctionSamples::getCallSiteIdentifier(DIL);
- for (auto &It : CallerNode->getAllChildContext()) {
- ContextTrieNode &ChildNode = It.second;
- if (ChildNode.getCallSiteLoc() != CallSite)
- continue;
- if (FunctionSamples *CalleeSamples = ChildNode.getFunctionSamples())
- R.push_back(CalleeSamples);
- }
-
- return R;
-}
-
-FunctionSamples *
-SampleContextTracker::getContextSamplesFor(const DILocation *DIL) {
- assert(DIL && "Expect non-null location");
-
- ContextTrieNode *ContextNode = getContextFor(DIL);
- if (!ContextNode)
- return nullptr;
-
- // We may have inlined callees during pre-LTO compilation, in which case
- // we need to rely on the inline stack from !dbg to mark context profile
- // as inlined, instead of `MarkContextSamplesInlined` during inlining.
- // Sample profile loader walks through all instructions to get profile,
- // which calls this function. So once that is done, all previously inlined
- // context profile should be marked properly.
- FunctionSamples *Samples = ContextNode->getFunctionSamples();
- if (Samples && ContextNode->getParentContext() != &RootContext)
- Samples->getContext().setState(InlinedContext);
-
- return Samples;
-}
-
-FunctionSamples *
-SampleContextTracker::getContextSamplesFor(const SampleContext &Context) {
- ContextTrieNode *Node = getContextFor(Context);
- if (!Node)
- return nullptr;
-
- return Node->getFunctionSamples();
-}
-
-SampleContextTracker::ContextSamplesTy &
-SampleContextTracker::getAllContextSamplesFor(const Function &Func) {
- StringRef CanonName = FunctionSamples::getCanonicalFnName(Func);
- return FuncToCtxtProfileSet[CanonName];
-}
-
-SampleContextTracker::ContextSamplesTy &
-SampleContextTracker::getAllContextSamplesFor(StringRef Name) {
- return FuncToCtxtProfileSet[Name];
-}
-
-FunctionSamples *SampleContextTracker::getBaseSamplesFor(const Function &Func,
- bool MergeContext) {
- StringRef CanonName = FunctionSamples::getCanonicalFnName(Func);
- return getBaseSamplesFor(CanonName, MergeContext);
-}
-
-FunctionSamples *SampleContextTracker::getBaseSamplesFor(StringRef Name,
- bool MergeContext) {
- LLVM_DEBUG(dbgs() << "Getting base profile for function: " << Name << "\n");
- // Base profile is top-level node (child of root node), so try to retrieve
- // existing top-level node for given function first. If it exists, it could be
- // that we've merged base profile before, or there's actually context-less
- // profile from the input (e.g. due to unreliable stack walking).
- ContextTrieNode *Node = getTopLevelContextNode(Name);
- if (MergeContext) {
- LLVM_DEBUG(dbgs() << " Merging context profile into base profile: " << Name
- << "\n");
-
- // We have profile for function under different contexts,
- // create synthetic base profile and merge context profiles
- // into base profile.
- for (auto *CSamples : FuncToCtxtProfileSet[Name]) {
- SampleContext &Context = CSamples->getContext();
- ContextTrieNode *FromNode = getContextFor(Context);
- if (FromNode == Node)
- continue;
-
- // Skip inlined context profile and also don't re-merge any context
- if (Context.hasState(InlinedContext) || Context.hasState(MergedContext))
- continue;
-
- ContextTrieNode &ToNode = promoteMergeContextSamplesTree(*FromNode);
- assert((!Node || Node == &ToNode) && "Expect only one base profile");
- Node = &ToNode;
- }
- }
-
- // Still no profile even after merge/promotion (if allowed)
- if (!Node)
- return nullptr;
-
- return Node->getFunctionSamples();
-}
-
-void SampleContextTracker::markContextSamplesInlined(
- const FunctionSamples *InlinedSamples) {
- assert(InlinedSamples && "Expect non-null inlined samples");
- LLVM_DEBUG(dbgs() << "Marking context profile as inlined: "
- << InlinedSamples->getContext() << "\n");
- InlinedSamples->getContext().setState(InlinedContext);
-}
-
-void SampleContextTracker::promoteMergeContextSamplesTree(
- const Instruction &Inst, StringRef CalleeName) {
- LLVM_DEBUG(dbgs() << "Promoting and merging context tree for instr: \n"
- << Inst << "\n");
- // Get the caller context for the call instruction, we don't use callee
- // name from call because there can be context from indirect calls too.
- DILocation *DIL = Inst.getDebugLoc();
- ContextTrieNode *CallerNode = getContextFor(DIL);
- if (!CallerNode)
- return;
-
- // Get the context that needs to be promoted
- LineLocation CallSite = FunctionSamples::getCallSiteIdentifier(DIL);
- // For indirect call, CalleeName will be empty, in which case we need to
- // promote all non-inlined child context profiles.
- if (CalleeName.empty()) {
- for (auto &It : CallerNode->getAllChildContext()) {
- ContextTrieNode *NodeToPromo = &It.second;
- if (CallSite != NodeToPromo->getCallSiteLoc())
- continue;
- FunctionSamples *FromSamples = NodeToPromo->getFunctionSamples();
- if (FromSamples && FromSamples->getContext().hasState(InlinedContext))
- continue;
- promoteMergeContextSamplesTree(*NodeToPromo);
- }
- return;
- }
-
- // Get the context for the given callee that needs to be promoted
- ContextTrieNode *NodeToPromo =
- CallerNode->getChildContext(CallSite, CalleeName);
- if (!NodeToPromo)
- return;
-
- promoteMergeContextSamplesTree(*NodeToPromo);
-}
-
-ContextTrieNode &SampleContextTracker::promoteMergeContextSamplesTree(
- ContextTrieNode &NodeToPromo) {
- // Promote the input node to be directly under root. This can happen
- // when we decided to not inline a function under context represented
- // by the input node. The promote and merge is then needed to reflect
- // the context profile in the base (context-less) profile.
- FunctionSamples *FromSamples = NodeToPromo.getFunctionSamples();
- assert(FromSamples && "Shouldn't promote a context without profile");
- LLVM_DEBUG(dbgs() << " Found context tree root to promote: "
- << FromSamples->getContext() << "\n");
-
- assert(!FromSamples->getContext().hasState(InlinedContext) &&
- "Shouldn't promote inlined context profile");
- StringRef ContextStrToRemove = FromSamples->getContext().getCallingContext();
- return promoteMergeContextSamplesTree(NodeToPromo, RootContext,
- ContextStrToRemove);
-}
-
-void SampleContextTracker::dump() {
- dbgs() << "Context Profile Tree:\n";
- std::queue<ContextTrieNode *> NodeQueue;
- NodeQueue.push(&RootContext);
-
- while (!NodeQueue.empty()) {
- ContextTrieNode *Node = NodeQueue.front();
- NodeQueue.pop();
- Node->dump();
-
- for (auto &It : Node->getAllChildContext()) {
- ContextTrieNode *ChildNode = &It.second;
- NodeQueue.push(ChildNode);
- }
- }
-}
-
-ContextTrieNode *
-SampleContextTracker::getContextFor(const SampleContext &Context) {
- return getOrCreateContextPath(Context, false);
-}
-
-ContextTrieNode *
-SampleContextTracker::getCalleeContextFor(const DILocation *DIL,
- StringRef CalleeName) {
- assert(DIL && "Expect non-null location");
-
- ContextTrieNode *CallContext = getContextFor(DIL);
- if (!CallContext)
- return nullptr;
-
- // When CalleeName is empty, the child context profile with max
- // total samples will be returned.
- return CallContext->getChildContext(
- FunctionSamples::getCallSiteIdentifier(DIL), CalleeName);
-}
-
-ContextTrieNode *SampleContextTracker::getContextFor(const DILocation *DIL) {
- assert(DIL && "Expect non-null location");
- SmallVector<std::pair<LineLocation, StringRef>, 10> S;
-
- // Use C++ linkage name if possible.
- const DILocation *PrevDIL = DIL;
- for (DIL = DIL->getInlinedAt(); DIL; DIL = DIL->getInlinedAt()) {
- StringRef Name = PrevDIL->getScope()->getSubprogram()->getLinkageName();
- if (Name.empty())
- Name = PrevDIL->getScope()->getSubprogram()->getName();
- S.push_back(
- std::make_pair(FunctionSamples::getCallSiteIdentifier(DIL),
- PrevDIL->getScope()->getSubprogram()->getLinkageName()));
- PrevDIL = DIL;
- }
-
- // Push root node, note that root node like main may only
- // a name, but not linkage name.
- StringRef RootName = PrevDIL->getScope()->getSubprogram()->getLinkageName();
- if (RootName.empty())
- RootName = PrevDIL->getScope()->getSubprogram()->getName();
- S.push_back(std::make_pair(LineLocation(0, 0), RootName));
-
- ContextTrieNode *ContextNode = &RootContext;
- int I = S.size();
- while (--I >= 0 && ContextNode) {
- LineLocation &CallSite = S[I].first;
- StringRef &CalleeName = S[I].second;
- ContextNode = ContextNode->getChildContext(CallSite, CalleeName);
- }
-
- if (I < 0)
- return ContextNode;
-
- return nullptr;
-}
-
-ContextTrieNode *
-SampleContextTracker::getOrCreateContextPath(const SampleContext &Context,
- bool AllowCreate) {
- ContextTrieNode *ContextNode = &RootContext;
- StringRef ContextRemain = Context;
- StringRef ChildContext;
- StringRef CalleeName;
- LineLocation CallSiteLoc(0, 0);
-
- while (ContextNode && !ContextRemain.empty()) {
- auto ContextSplit = SampleContext::splitContextString(ContextRemain);
- ChildContext = ContextSplit.first;
- ContextRemain = ContextSplit.second;
- LineLocation NextCallSiteLoc(0, 0);
- SampleContext::decodeContextString(ChildContext, CalleeName,
- NextCallSiteLoc);
-
- // Create child node at parent line/disc location
- if (AllowCreate) {
- ContextNode =
- ContextNode->getOrCreateChildContext(CallSiteLoc, CalleeName);
- } else {
- ContextNode = ContextNode->getChildContext(CallSiteLoc, CalleeName);
- }
- CallSiteLoc = NextCallSiteLoc;
- }
-
- assert((!AllowCreate || ContextNode) &&
- "Node must exist if creation is allowed");
- return ContextNode;
-}
-
-ContextTrieNode *SampleContextTracker::getTopLevelContextNode(StringRef FName) {
- return RootContext.getChildContext(LineLocation(0, 0), FName);
-}
-
-ContextTrieNode &SampleContextTracker::addTopLevelContextNode(StringRef FName) {
- assert(!getTopLevelContextNode(FName) && "Node to add must not exist");
- return *RootContext.getOrCreateChildContext(LineLocation(0, 0), FName);
-}
-
-void SampleContextTracker::mergeContextNode(ContextTrieNode &FromNode,
- ContextTrieNode &ToNode,
- StringRef ContextStrToRemove) {
- FunctionSamples *FromSamples = FromNode.getFunctionSamples();
- FunctionSamples *ToSamples = ToNode.getFunctionSamples();
- if (FromSamples && ToSamples) {
- // Merge/duplicate FromSamples into ToSamples
- ToSamples->merge(*FromSamples);
- ToSamples->getContext().setState(SyntheticContext);
- FromSamples->getContext().setState(MergedContext);
- } else if (FromSamples) {
- // Transfer FromSamples from FromNode to ToNode
- ToNode.setFunctionSamples(FromSamples);
- FromSamples->getContext().setState(SyntheticContext);
- FromSamples->getContext().promoteOnPath(ContextStrToRemove);
- FromNode.setFunctionSamples(nullptr);
- }
-}
-
-ContextTrieNode &SampleContextTracker::promoteMergeContextSamplesTree(
- ContextTrieNode &FromNode, ContextTrieNode &ToNodeParent,
- StringRef ContextStrToRemove) {
- assert(!ContextStrToRemove.empty() && "Context to remove can't be empty");
-
- // Ignore call site location if destination is top level under root
- LineLocation NewCallSiteLoc = LineLocation(0, 0);
- LineLocation OldCallSiteLoc = FromNode.getCallSiteLoc();
- ContextTrieNode &FromNodeParent = *FromNode.getParentContext();
- ContextTrieNode *ToNode = nullptr;
- bool MoveToRoot = (&ToNodeParent == &RootContext);
- if (!MoveToRoot) {
- NewCallSiteLoc = OldCallSiteLoc;
- }
-
- // Locate destination node, create/move if not existing
- ToNode = ToNodeParent.getChildContext(NewCallSiteLoc, FromNode.getFuncName());
- if (!ToNode) {
- // Do not delete node to move from its parent here because
- // caller is iterating over children of that parent node.
- ToNode = &ToNodeParent.moveToChildContext(
- NewCallSiteLoc, std::move(FromNode), ContextStrToRemove, false);
- } else {
- // Destination node exists, merge samples for the context tree
- mergeContextNode(FromNode, *ToNode, ContextStrToRemove);
- LLVM_DEBUG(dbgs() << " Context promoted and merged to: "
- << ToNode->getFunctionSamples()->getContext() << "\n");
-
- // Recursively promote and merge children
- for (auto &It : FromNode.getAllChildContext()) {
- ContextTrieNode &FromChildNode = It.second;
- promoteMergeContextSamplesTree(FromChildNode, *ToNode,
- ContextStrToRemove);
- }
-
- // Remove children once they're all merged
- FromNode.getAllChildContext().clear();
- }
-
- // For root of subtree, remove itself from old parent too
- if (MoveToRoot)
- FromNodeParent.removeChildContext(OldCallSiteLoc, ToNode->getFuncName());
-
- return *ToNode;
-}
-
-// Replace call graph edges with dynamic call edges from the profile.
-void SampleContextTracker::addCallGraphEdges(CallGraph &CG,
- StringMap<Function *> &SymbolMap) {
- // Add profile call edges to the call graph.
- std::queue<ContextTrieNode *> NodeQueue;
- NodeQueue.push(&RootContext);
- while (!NodeQueue.empty()) {
- ContextTrieNode *Node = NodeQueue.front();
- NodeQueue.pop();
- Function *F = SymbolMap.lookup(Node->getFuncName());
- for (auto &I : Node->getAllChildContext()) {
- ContextTrieNode *ChildNode = &I.second;
- NodeQueue.push(ChildNode);
- if (F && !F->isDeclaration()) {
- Function *Callee = SymbolMap.lookup(ChildNode->getFuncName());
- if (Callee && !Callee->isDeclaration())
- CG[F]->addCalledFunction(nullptr, CG[Callee]);
- }
- }
- }
-}
-} // namespace llvm
+//===- SampleContextTracker.cpp - Context-sensitive Profile Tracker -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SampleContextTracker used by CSSPGO.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/SampleContextTracker.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/ProfileData/SampleProf.h"
+#include <map>
+#include <queue>
+#include <vector>
+
+using namespace llvm;
+using namespace sampleprof;
+
+#define DEBUG_TYPE "sample-context-tracker"
+
+namespace llvm {
+
+ContextTrieNode *ContextTrieNode::getChildContext(const LineLocation &CallSite,
+ StringRef CalleeName) {
+ if (CalleeName.empty())
+ return getHottestChildContext(CallSite);
+
+ uint32_t Hash = nodeHash(CalleeName, CallSite);
+ auto It = AllChildContext.find(Hash);
+ if (It != AllChildContext.end())
+ return &It->second;
+ return nullptr;
+}
+
+ContextTrieNode *
+ContextTrieNode::getHottestChildContext(const LineLocation &CallSite) {
+ // CSFDO-TODO: This could be slow, change AllChildContext so we can
+ // do point look up for child node by call site alone.
+ // Retrieve the child node with max count for indirect call
+ ContextTrieNode *ChildNodeRet = nullptr;
+ uint64_t MaxCalleeSamples = 0;
+ for (auto &It : AllChildContext) {
+ ContextTrieNode &ChildNode = It.second;
+ if (ChildNode.CallSiteLoc != CallSite)
+ continue;
+ FunctionSamples *Samples = ChildNode.getFunctionSamples();
+ if (!Samples)
+ continue;
+ if (Samples->getTotalSamples() > MaxCalleeSamples) {
+ ChildNodeRet = &ChildNode;
+ MaxCalleeSamples = Samples->getTotalSamples();
+ }
+ }
+
+ return ChildNodeRet;
+}
+
+ContextTrieNode &ContextTrieNode::moveToChildContext(
+ const LineLocation &CallSite, ContextTrieNode &&NodeToMove,
+ StringRef ContextStrToRemove, bool DeleteNode) {
+ uint32_t Hash = nodeHash(NodeToMove.getFuncName(), CallSite);
+ assert(!AllChildContext.count(Hash) && "Node to remove must exist");
+ LineLocation OldCallSite = NodeToMove.CallSiteLoc;
+ ContextTrieNode &OldParentContext = *NodeToMove.getParentContext();
+ AllChildContext[Hash] = NodeToMove;
+ ContextTrieNode &NewNode = AllChildContext[Hash];
+ NewNode.CallSiteLoc = CallSite;
+
+ // Walk through nodes in the moved the subtree, and update
+ // FunctionSamples' context as for the context promotion.
+ // We also need to set new parant link for all children.
+ std::queue<ContextTrieNode *> NodeToUpdate;
+ NewNode.setParentContext(this);
+ NodeToUpdate.push(&NewNode);
+
+ while (!NodeToUpdate.empty()) {
+ ContextTrieNode *Node = NodeToUpdate.front();
+ NodeToUpdate.pop();
+ FunctionSamples *FSamples = Node->getFunctionSamples();
+
+ if (FSamples) {
+ FSamples->getContext().promoteOnPath(ContextStrToRemove);
+ FSamples->getContext().setState(SyntheticContext);
+ LLVM_DEBUG(dbgs() << " Context promoted to: " << FSamples->getContext()
+ << "\n");
+ }
+
+ for (auto &It : Node->getAllChildContext()) {
+ ContextTrieNode *ChildNode = &It.second;
+ ChildNode->setParentContext(Node);
+ NodeToUpdate.push(ChildNode);
+ }
+ }
+
+ // Original context no longer needed, destroy if requested.
+ if (DeleteNode)
+ OldParentContext.removeChildContext(OldCallSite, NewNode.getFuncName());
+
+ return NewNode;
+}
+
+void ContextTrieNode::removeChildContext(const LineLocation &CallSite,
+ StringRef CalleeName) {
+ uint32_t Hash = nodeHash(CalleeName, CallSite);
+ // Note this essentially calls dtor and destroys that child context
+ AllChildContext.erase(Hash);
+}
+
+std::map<uint32_t, ContextTrieNode> &ContextTrieNode::getAllChildContext() {
+ return AllChildContext;
+}
+
+const StringRef ContextTrieNode::getFuncName() const { return FuncName; }
+
+FunctionSamples *ContextTrieNode::getFunctionSamples() const {
+ return FuncSamples;
+}
+
+void ContextTrieNode::setFunctionSamples(FunctionSamples *FSamples) {
+ FuncSamples = FSamples;
+}
+
+LineLocation ContextTrieNode::getCallSiteLoc() const { return CallSiteLoc; }
+
+ContextTrieNode *ContextTrieNode::getParentContext() const {
+ return ParentContext;
+}
+
+void ContextTrieNode::setParentContext(ContextTrieNode *Parent) {
+ ParentContext = Parent;
+}
+
+void ContextTrieNode::dump() {
+ dbgs() << "Node: " << FuncName << "\n"
+ << " Callsite: " << CallSiteLoc << "\n"
+ << " Children:\n";
+
+ for (auto &It : AllChildContext) {
+ dbgs() << " Node: " << It.second.getFuncName() << "\n";
+ }
+}
+
+uint32_t ContextTrieNode::nodeHash(StringRef ChildName,
+ const LineLocation &Callsite) {
+ // We still use child's name for child hash, this is
+ // because for children of root node, we don't have
+ // different line/discriminator, and we'll rely on name
+ // to differentiate children.
+ uint32_t NameHash = std::hash<std::string>{}(ChildName.str());
+ uint32_t LocId = (Callsite.LineOffset << 16) | Callsite.Discriminator;
+ return NameHash + (LocId << 5) + LocId;
+}
+
+ContextTrieNode *ContextTrieNode::getOrCreateChildContext(
+ const LineLocation &CallSite, StringRef CalleeName, bool AllowCreate) {
+ uint32_t Hash = nodeHash(CalleeName, CallSite);
+ auto It = AllChildContext.find(Hash);
+ if (It != AllChildContext.end()) {
+ assert(It->second.getFuncName() == CalleeName &&
+ "Hash collision for child context node");
+ return &It->second;
+ }
+
+ if (!AllowCreate)
+ return nullptr;
+
+ AllChildContext[Hash] = ContextTrieNode(this, CalleeName, nullptr, CallSite);
+ return &AllChildContext[Hash];
+}
+
+// Profiler tracker than manages profiles and its associated context
+SampleContextTracker::SampleContextTracker(
+ StringMap<FunctionSamples> &Profiles) {
+ for (auto &FuncSample : Profiles) {
+ FunctionSamples *FSamples = &FuncSample.second;
+ SampleContext Context(FuncSample.first(), RawContext);
+ LLVM_DEBUG(dbgs() << "Tracking Context for function: " << Context << "\n");
+ if (!Context.isBaseContext())
+ FuncToCtxtProfileSet[Context.getNameWithoutContext()].insert(FSamples);
+ ContextTrieNode *NewNode = getOrCreateContextPath(Context, true);
+ assert(!NewNode->getFunctionSamples() &&
+ "New node can't have sample profile");
+ NewNode->setFunctionSamples(FSamples);
+ }
+}
+
+FunctionSamples *
+SampleContextTracker::getCalleeContextSamplesFor(const CallBase &Inst,
+ StringRef CalleeName) {
+ LLVM_DEBUG(dbgs() << "Getting callee context for instr: " << Inst << "\n");
+ DILocation *DIL = Inst.getDebugLoc();
+ if (!DIL)
+ return nullptr;
+
+ // For indirect call, CalleeName will be empty, in which case the context
+ // profile for callee with largest total samples will be returned.
+ ContextTrieNode *CalleeContext = getCalleeContextFor(DIL, CalleeName);
+ if (CalleeContext) {
+ FunctionSamples *FSamples = CalleeContext->getFunctionSamples();
+ LLVM_DEBUG(if (FSamples) {
+ dbgs() << " Callee context found: " << FSamples->getContext() << "\n";
+ });
+ return FSamples;
+ }
+
+ return nullptr;
+}
+
+std::vector<const FunctionSamples *>
+SampleContextTracker::getIndirectCalleeContextSamplesFor(
+ const DILocation *DIL) {
+ std::vector<const FunctionSamples *> R;
+ if (!DIL)
+ return R;
+
+ ContextTrieNode *CallerNode = getContextFor(DIL);
+ LineLocation CallSite = FunctionSamples::getCallSiteIdentifier(DIL);
+ for (auto &It : CallerNode->getAllChildContext()) {
+ ContextTrieNode &ChildNode = It.second;
+ if (ChildNode.getCallSiteLoc() != CallSite)
+ continue;
+ if (FunctionSamples *CalleeSamples = ChildNode.getFunctionSamples())
+ R.push_back(CalleeSamples);
+ }
+
+ return R;
+}
+
+FunctionSamples *
+SampleContextTracker::getContextSamplesFor(const DILocation *DIL) {
+ assert(DIL && "Expect non-null location");
+
+ ContextTrieNode *ContextNode = getContextFor(DIL);
+ if (!ContextNode)
+ return nullptr;
+
+ // We may have inlined callees during pre-LTO compilation, in which case
+ // we need to rely on the inline stack from !dbg to mark context profile
+ // as inlined, instead of `MarkContextSamplesInlined` during inlining.
+ // Sample profile loader walks through all instructions to get profile,
+ // which calls this function. So once that is done, all previously inlined
+ // context profile should be marked properly.
+ FunctionSamples *Samples = ContextNode->getFunctionSamples();
+ if (Samples && ContextNode->getParentContext() != &RootContext)
+ Samples->getContext().setState(InlinedContext);
+
+ return Samples;
+}
+
+FunctionSamples *
+SampleContextTracker::getContextSamplesFor(const SampleContext &Context) {
+ ContextTrieNode *Node = getContextFor(Context);
+ if (!Node)
+ return nullptr;
+
+ return Node->getFunctionSamples();
+}
+
+SampleContextTracker::ContextSamplesTy &
+SampleContextTracker::getAllContextSamplesFor(const Function &Func) {
+ StringRef CanonName = FunctionSamples::getCanonicalFnName(Func);
+ return FuncToCtxtProfileSet[CanonName];
+}
+
+SampleContextTracker::ContextSamplesTy &
+SampleContextTracker::getAllContextSamplesFor(StringRef Name) {
+ return FuncToCtxtProfileSet[Name];
+}
+
+FunctionSamples *SampleContextTracker::getBaseSamplesFor(const Function &Func,
+ bool MergeContext) {
+ StringRef CanonName = FunctionSamples::getCanonicalFnName(Func);
+ return getBaseSamplesFor(CanonName, MergeContext);
+}
+
+FunctionSamples *SampleContextTracker::getBaseSamplesFor(StringRef Name,
+ bool MergeContext) {
+ LLVM_DEBUG(dbgs() << "Getting base profile for function: " << Name << "\n");
+ // Base profile is top-level node (child of root node), so try to retrieve
+ // existing top-level node for given function first. If it exists, it could be
+ // that we've merged base profile before, or there's actually context-less
+ // profile from the input (e.g. due to unreliable stack walking).
+ ContextTrieNode *Node = getTopLevelContextNode(Name);
+ if (MergeContext) {
+ LLVM_DEBUG(dbgs() << " Merging context profile into base profile: " << Name
+ << "\n");
+
+ // We have profile for function under different contexts,
+ // create synthetic base profile and merge context profiles
+ // into base profile.
+ for (auto *CSamples : FuncToCtxtProfileSet[Name]) {
+ SampleContext &Context = CSamples->getContext();
+ ContextTrieNode *FromNode = getContextFor(Context);
+ if (FromNode == Node)
+ continue;
+
+ // Skip inlined context profile and also don't re-merge any context
+ if (Context.hasState(InlinedContext) || Context.hasState(MergedContext))
+ continue;
+
+ ContextTrieNode &ToNode = promoteMergeContextSamplesTree(*FromNode);
+ assert((!Node || Node == &ToNode) && "Expect only one base profile");
+ Node = &ToNode;
+ }
+ }
+
+ // Still no profile even after merge/promotion (if allowed)
+ if (!Node)
+ return nullptr;
+
+ return Node->getFunctionSamples();
+}
+
+void SampleContextTracker::markContextSamplesInlined(
+ const FunctionSamples *InlinedSamples) {
+ assert(InlinedSamples && "Expect non-null inlined samples");
+ LLVM_DEBUG(dbgs() << "Marking context profile as inlined: "
+ << InlinedSamples->getContext() << "\n");
+ InlinedSamples->getContext().setState(InlinedContext);
+}
+
+void SampleContextTracker::promoteMergeContextSamplesTree(
+ const Instruction &Inst, StringRef CalleeName) {
+ LLVM_DEBUG(dbgs() << "Promoting and merging context tree for instr: \n"
+ << Inst << "\n");
+ // Get the caller context for the call instruction, we don't use callee
+ // name from call because there can be context from indirect calls too.
+ DILocation *DIL = Inst.getDebugLoc();
+ ContextTrieNode *CallerNode = getContextFor(DIL);
+ if (!CallerNode)
+ return;
+
+ // Get the context that needs to be promoted
+ LineLocation CallSite = FunctionSamples::getCallSiteIdentifier(DIL);
+ // For indirect call, CalleeName will be empty, in which case we need to
+ // promote all non-inlined child context profiles.
+ if (CalleeName.empty()) {
+ for (auto &It : CallerNode->getAllChildContext()) {
+ ContextTrieNode *NodeToPromo = &It.second;
+ if (CallSite != NodeToPromo->getCallSiteLoc())
+ continue;
+ FunctionSamples *FromSamples = NodeToPromo->getFunctionSamples();
+ if (FromSamples && FromSamples->getContext().hasState(InlinedContext))
+ continue;
+ promoteMergeContextSamplesTree(*NodeToPromo);
+ }
+ return;
+ }
+
+ // Get the context for the given callee that needs to be promoted
+ ContextTrieNode *NodeToPromo =
+ CallerNode->getChildContext(CallSite, CalleeName);
+ if (!NodeToPromo)
+ return;
+
+ promoteMergeContextSamplesTree(*NodeToPromo);
+}
+
+ContextTrieNode &SampleContextTracker::promoteMergeContextSamplesTree(
+ ContextTrieNode &NodeToPromo) {
+ // Promote the input node to be directly under root. This can happen
+ // when we decided to not inline a function under context represented
+ // by the input node. The promote and merge is then needed to reflect
+ // the context profile in the base (context-less) profile.
+ FunctionSamples *FromSamples = NodeToPromo.getFunctionSamples();
+ assert(FromSamples && "Shouldn't promote a context without profile");
+ LLVM_DEBUG(dbgs() << " Found context tree root to promote: "
+ << FromSamples->getContext() << "\n");
+
+ assert(!FromSamples->getContext().hasState(InlinedContext) &&
+ "Shouldn't promote inlined context profile");
+ StringRef ContextStrToRemove = FromSamples->getContext().getCallingContext();
+ return promoteMergeContextSamplesTree(NodeToPromo, RootContext,
+ ContextStrToRemove);
+}
+
+void SampleContextTracker::dump() {
+ dbgs() << "Context Profile Tree:\n";
+ std::queue<ContextTrieNode *> NodeQueue;
+ NodeQueue.push(&RootContext);
+
+ while (!NodeQueue.empty()) {
+ ContextTrieNode *Node = NodeQueue.front();
+ NodeQueue.pop();
+ Node->dump();
+
+ for (auto &It : Node->getAllChildContext()) {
+ ContextTrieNode *ChildNode = &It.second;
+ NodeQueue.push(ChildNode);
+ }
+ }
+}
+
+ContextTrieNode *
+SampleContextTracker::getContextFor(const SampleContext &Context) {
+ return getOrCreateContextPath(Context, false);
+}
+
+ContextTrieNode *
+SampleContextTracker::getCalleeContextFor(const DILocation *DIL,
+ StringRef CalleeName) {
+ assert(DIL && "Expect non-null location");
+
+ ContextTrieNode *CallContext = getContextFor(DIL);
+ if (!CallContext)
+ return nullptr;
+
+ // When CalleeName is empty, the child context profile with max
+ // total samples will be returned.
+ return CallContext->getChildContext(
+ FunctionSamples::getCallSiteIdentifier(DIL), CalleeName);
+}
+
+ContextTrieNode *SampleContextTracker::getContextFor(const DILocation *DIL) {
+ assert(DIL && "Expect non-null location");
+ SmallVector<std::pair<LineLocation, StringRef>, 10> S;
+
+ // Use C++ linkage name if possible.
+ const DILocation *PrevDIL = DIL;
+ for (DIL = DIL->getInlinedAt(); DIL; DIL = DIL->getInlinedAt()) {
+ StringRef Name = PrevDIL->getScope()->getSubprogram()->getLinkageName();
+ if (Name.empty())
+ Name = PrevDIL->getScope()->getSubprogram()->getName();
+ S.push_back(
+ std::make_pair(FunctionSamples::getCallSiteIdentifier(DIL),
+ PrevDIL->getScope()->getSubprogram()->getLinkageName()));
+ PrevDIL = DIL;
+ }
+
+ // Push root node, note that root node like main may only
+ // a name, but not linkage name.
+ StringRef RootName = PrevDIL->getScope()->getSubprogram()->getLinkageName();
+ if (RootName.empty())
+ RootName = PrevDIL->getScope()->getSubprogram()->getName();
+ S.push_back(std::make_pair(LineLocation(0, 0), RootName));
+
+ ContextTrieNode *ContextNode = &RootContext;
+ int I = S.size();
+ while (--I >= 0 && ContextNode) {
+ LineLocation &CallSite = S[I].first;
+ StringRef &CalleeName = S[I].second;
+ ContextNode = ContextNode->getChildContext(CallSite, CalleeName);
+ }
+
+ if (I < 0)
+ return ContextNode;
+
+ return nullptr;
+}
+
+ContextTrieNode *
+SampleContextTracker::getOrCreateContextPath(const SampleContext &Context,
+ bool AllowCreate) {
+ ContextTrieNode *ContextNode = &RootContext;
+ StringRef ContextRemain = Context;
+ StringRef ChildContext;
+ StringRef CalleeName;
+ LineLocation CallSiteLoc(0, 0);
+
+ while (ContextNode && !ContextRemain.empty()) {
+ auto ContextSplit = SampleContext::splitContextString(ContextRemain);
+ ChildContext = ContextSplit.first;
+ ContextRemain = ContextSplit.second;
+ LineLocation NextCallSiteLoc(0, 0);
+ SampleContext::decodeContextString(ChildContext, CalleeName,
+ NextCallSiteLoc);
+
+ // Create child node at parent line/disc location
+ if (AllowCreate) {
+ ContextNode =
+ ContextNode->getOrCreateChildContext(CallSiteLoc, CalleeName);
+ } else {
+ ContextNode = ContextNode->getChildContext(CallSiteLoc, CalleeName);
+ }
+ CallSiteLoc = NextCallSiteLoc;
+ }
+
+ assert((!AllowCreate || ContextNode) &&
+ "Node must exist if creation is allowed");
+ return ContextNode;
+}
+
+ContextTrieNode *SampleContextTracker::getTopLevelContextNode(StringRef FName) {
+ return RootContext.getChildContext(LineLocation(0, 0), FName);
+}
+
+ContextTrieNode &SampleContextTracker::addTopLevelContextNode(StringRef FName) {
+ assert(!getTopLevelContextNode(FName) && "Node to add must not exist");
+ return *RootContext.getOrCreateChildContext(LineLocation(0, 0), FName);
+}
+
+void SampleContextTracker::mergeContextNode(ContextTrieNode &FromNode,
+ ContextTrieNode &ToNode,
+ StringRef ContextStrToRemove) {
+ FunctionSamples *FromSamples = FromNode.getFunctionSamples();
+ FunctionSamples *ToSamples = ToNode.getFunctionSamples();
+ if (FromSamples && ToSamples) {
+ // Merge/duplicate FromSamples into ToSamples
+ ToSamples->merge(*FromSamples);
+ ToSamples->getContext().setState(SyntheticContext);
+ FromSamples->getContext().setState(MergedContext);
+ } else if (FromSamples) {
+ // Transfer FromSamples from FromNode to ToNode
+ ToNode.setFunctionSamples(FromSamples);
+ FromSamples->getContext().setState(SyntheticContext);
+ FromSamples->getContext().promoteOnPath(ContextStrToRemove);
+ FromNode.setFunctionSamples(nullptr);
+ }
+}
+
+ContextTrieNode &SampleContextTracker::promoteMergeContextSamplesTree(
+ ContextTrieNode &FromNode, ContextTrieNode &ToNodeParent,
+ StringRef ContextStrToRemove) {
+ assert(!ContextStrToRemove.empty() && "Context to remove can't be empty");
+
+ // Ignore call site location if destination is top level under root
+ LineLocation NewCallSiteLoc = LineLocation(0, 0);
+ LineLocation OldCallSiteLoc = FromNode.getCallSiteLoc();
+ ContextTrieNode &FromNodeParent = *FromNode.getParentContext();
+ ContextTrieNode *ToNode = nullptr;
+ bool MoveToRoot = (&ToNodeParent == &RootContext);
+ if (!MoveToRoot) {
+ NewCallSiteLoc = OldCallSiteLoc;
+ }
+
+ // Locate destination node, create/move if not existing
+ ToNode = ToNodeParent.getChildContext(NewCallSiteLoc, FromNode.getFuncName());
+ if (!ToNode) {
+ // Do not delete node to move from its parent here because
+ // caller is iterating over children of that parent node.
+ ToNode = &ToNodeParent.moveToChildContext(
+ NewCallSiteLoc, std::move(FromNode), ContextStrToRemove, false);
+ } else {
+ // Destination node exists, merge samples for the context tree
+ mergeContextNode(FromNode, *ToNode, ContextStrToRemove);
+ LLVM_DEBUG(dbgs() << " Context promoted and merged to: "
+ << ToNode->getFunctionSamples()->getContext() << "\n");
+
+ // Recursively promote and merge children
+ for (auto &It : FromNode.getAllChildContext()) {
+ ContextTrieNode &FromChildNode = It.second;
+ promoteMergeContextSamplesTree(FromChildNode, *ToNode,
+ ContextStrToRemove);
+ }
+
+ // Remove children once they're all merged
+ FromNode.getAllChildContext().clear();
+ }
+
+ // For root of subtree, remove itself from old parent too
+ if (MoveToRoot)
+ FromNodeParent.removeChildContext(OldCallSiteLoc, ToNode->getFuncName());
+
+ return *ToNode;
+}
+
+// Replace call graph edges with dynamic call edges from the profile.
+void SampleContextTracker::addCallGraphEdges(CallGraph &CG,
+ StringMap<Function *> &SymbolMap) {
+ // Add profile call edges to the call graph.
+ std::queue<ContextTrieNode *> NodeQueue;
+ NodeQueue.push(&RootContext);
+ while (!NodeQueue.empty()) {
+ ContextTrieNode *Node = NodeQueue.front();
+ NodeQueue.pop();
+ Function *F = SymbolMap.lookup(Node->getFuncName());
+ for (auto &I : Node->getAllChildContext()) {
+ ContextTrieNode *ChildNode = &I.second;
+ NodeQueue.push(ChildNode);
+ if (F && !F->isDeclaration()) {
+ Function *Callee = SymbolMap.lookup(ChildNode->getFuncName());
+ if (Callee && !Callee->isDeclaration())
+ CG[F]->addCalledFunction(nullptr, CG[Callee]);
+ }
+ }
+ }
+}
+} // namespace llvm
diff --git a/contrib/libs/llvm12/lib/Transforms/IPO/SampleProfile.cpp b/contrib/libs/llvm12/lib/Transforms/IPO/SampleProfile.cpp
index a6a419bfe7..1dbaaa2be7 100644
--- a/contrib/libs/llvm12/lib/Transforms/IPO/SampleProfile.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/IPO/SampleProfile.cpp
@@ -26,7 +26,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/None.h"
-#include "llvm/ADT/PriorityQueue.h"
+#include "llvm/ADT/PriorityQueue.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
@@ -44,7 +44,7 @@
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
-#include "llvm/Analysis/ReplayInlineAdvisor.h"
+#include "llvm/Analysis/ReplayInlineAdvisor.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/BasicBlock.h"
@@ -77,8 +77,8 @@
#include "llvm/Support/GenericDomTree.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
-#include "llvm/Transforms/IPO/SampleContextTracker.h"
-#include "llvm/Transforms/IPO/SampleProfileProbe.h"
+#include "llvm/Transforms/IPO/SampleContextTracker.h"
+#include "llvm/Transforms/IPO/SampleProfileProbe.h"
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Utils/CallPromotionUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
@@ -105,20 +105,20 @@ STATISTIC(NumCSInlined,
"Number of functions inlined with context sensitive profile");
STATISTIC(NumCSNotInlined,
"Number of functions not inlined with context sensitive profile");
-STATISTIC(NumMismatchedProfile,
- "Number of functions with CFG mismatched profile");
-STATISTIC(NumMatchedProfile, "Number of functions with CFG matched profile");
-STATISTIC(NumDuplicatedInlinesite,
- "Number of inlined callsites with a partial distribution factor");
-
-STATISTIC(NumCSInlinedHitMinLimit,
- "Number of functions with FDO inline stopped due to min size limit");
-STATISTIC(NumCSInlinedHitMaxLimit,
- "Number of functions with FDO inline stopped due to max size limit");
-STATISTIC(
- NumCSInlinedHitGrowthLimit,
- "Number of functions with FDO inline stopped due to growth size limit");
-
+STATISTIC(NumMismatchedProfile,
+ "Number of functions with CFG mismatched profile");
+STATISTIC(NumMatchedProfile, "Number of functions with CFG matched profile");
+STATISTIC(NumDuplicatedInlinesite,
+ "Number of inlined callsites with a partial distribution factor");
+
+STATISTIC(NumCSInlinedHitMinLimit,
+ "Number of functions with FDO inline stopped due to min size limit");
+STATISTIC(NumCSInlinedHitMaxLimit,
+ "Number of functions with FDO inline stopped due to max size limit");
+STATISTIC(
+ NumCSInlinedHitGrowthLimit,
+ "Number of functions with FDO inline stopped due to growth size limit");
+
// Command line option to specify the file to read samples from. This is
// mainly used for debugging.
static cl::opt<std::string> SampleProfileFile(
@@ -177,64 +177,64 @@ static cl::opt<bool> ProfileTopDownLoad(
"order of call graph during sample profile loading. It only "
"works for new pass manager. "));
-static cl::opt<bool> UseProfileIndirectCallEdges(
- "use-profile-indirect-call-edges", cl::init(true), cl::Hidden,
- cl::desc("Considering indirect call samples from profile when top-down "
- "processing functions. Only CSSPGO is supported."));
-
-static cl::opt<bool> UseProfileTopDownOrder(
- "use-profile-top-down-order", cl::init(false), cl::Hidden,
- cl::desc("Process functions in one SCC in a top-down order "
- "based on the input profile."));
-
+static cl::opt<bool> UseProfileIndirectCallEdges(
+ "use-profile-indirect-call-edges", cl::init(true), cl::Hidden,
+ cl::desc("Considering indirect call samples from profile when top-down "
+ "processing functions. Only CSSPGO is supported."));
+
+static cl::opt<bool> UseProfileTopDownOrder(
+ "use-profile-top-down-order", cl::init(false), cl::Hidden,
+ cl::desc("Process functions in one SCC in a top-down order "
+ "based on the input profile."));
+
static cl::opt<bool> ProfileSizeInline(
"sample-profile-inline-size", cl::Hidden, cl::init(false),
cl::desc("Inline cold call sites in profile loader if it's beneficial "
"for code size."));
-static cl::opt<int> ProfileInlineGrowthLimit(
- "sample-profile-inline-growth-limit", cl::Hidden, cl::init(12),
- cl::desc("The size growth ratio limit for proirity-based sample profile "
- "loader inlining."));
-
-static cl::opt<int> ProfileInlineLimitMin(
- "sample-profile-inline-limit-min", cl::Hidden, cl::init(100),
- cl::desc("The lower bound of size growth limit for "
- "proirity-based sample profile loader inlining."));
-
-static cl::opt<int> ProfileInlineLimitMax(
- "sample-profile-inline-limit-max", cl::Hidden, cl::init(10000),
- cl::desc("The upper bound of size growth limit for "
- "proirity-based sample profile loader inlining."));
-
-static cl::opt<int> ProfileICPThreshold(
- "sample-profile-icp-threshold", cl::Hidden, cl::init(5),
- cl::desc(
- "Relative hotness threshold for indirect "
- "call promotion in proirity-based sample profile loader inlining."));
-
-static cl::opt<int> SampleHotCallSiteThreshold(
- "sample-profile-hot-inline-threshold", cl::Hidden, cl::init(3000),
- cl::desc("Hot callsite threshold for proirity-based sample profile loader "
- "inlining."));
-
-static cl::opt<bool> CallsitePrioritizedInline(
- "sample-profile-prioritized-inline", cl::Hidden, cl::ZeroOrMore,
- cl::init(false),
- cl::desc("Use call site prioritized inlining for sample profile loader."
- "Currently only CSSPGO is supported."));
-
+static cl::opt<int> ProfileInlineGrowthLimit(
+ "sample-profile-inline-growth-limit", cl::Hidden, cl::init(12),
+ cl::desc("The size growth ratio limit for proirity-based sample profile "
+ "loader inlining."));
+
+static cl::opt<int> ProfileInlineLimitMin(
+ "sample-profile-inline-limit-min", cl::Hidden, cl::init(100),
+ cl::desc("The lower bound of size growth limit for "
+ "proirity-based sample profile loader inlining."));
+
+static cl::opt<int> ProfileInlineLimitMax(
+ "sample-profile-inline-limit-max", cl::Hidden, cl::init(10000),
+ cl::desc("The upper bound of size growth limit for "
+ "proirity-based sample profile loader inlining."));
+
+static cl::opt<int> ProfileICPThreshold(
+ "sample-profile-icp-threshold", cl::Hidden, cl::init(5),
+ cl::desc(
+ "Relative hotness threshold for indirect "
+ "call promotion in proirity-based sample profile loader inlining."));
+
+static cl::opt<int> SampleHotCallSiteThreshold(
+ "sample-profile-hot-inline-threshold", cl::Hidden, cl::init(3000),
+ cl::desc("Hot callsite threshold for proirity-based sample profile loader "
+ "inlining."));
+
+static cl::opt<bool> CallsitePrioritizedInline(
+ "sample-profile-prioritized-inline", cl::Hidden, cl::ZeroOrMore,
+ cl::init(false),
+ cl::desc("Use call site prioritized inlining for sample profile loader."
+ "Currently only CSSPGO is supported."));
+
static cl::opt<int> SampleColdCallSiteThreshold(
"sample-profile-cold-inline-threshold", cl::Hidden, cl::init(45),
cl::desc("Threshold for inlining cold callsites"));
-static cl::opt<std::string> ProfileInlineReplayFile(
- "sample-profile-inline-replay", cl::init(""), cl::value_desc("filename"),
- cl::desc(
- "Optimization remarks file containing inline remarks to be replayed "
- "by inlining from sample profile loader."),
- cl::Hidden);
-
+static cl::opt<std::string> ProfileInlineReplayFile(
+ "sample-profile-inline-replay", cl::init(""), cl::value_desc("filename"),
+ cl::desc(
+ "Optimization remarks file containing inline remarks to be replayed "
+ "by inlining from sample profile loader."),
+ cl::Hidden);
+
namespace {
using BlockWeightMap = DenseMap<const BasicBlock *, uint64_t>;
@@ -366,38 +366,38 @@ private:
DenseMap<uint64_t, StringRef> &CurrentGUIDToFuncNameMap;
};
-// Inline candidate used by iterative callsite prioritized inliner
-struct InlineCandidate {
- CallBase *CallInstr;
- const FunctionSamples *CalleeSamples;
- // Prorated callsite count, which will be used to guide inlining. For example,
- // if a callsite is duplicated in LTO prelink, then in LTO postlink the two
- // copies will get their own distribution factors and their prorated counts
- // will be used to decide if they should be inlined independently.
- uint64_t CallsiteCount;
- // Call site distribution factor to prorate the profile samples for a
- // duplicated callsite. Default value is 1.0.
- float CallsiteDistribution;
-};
-
-// Inline candidate comparer using call site weight
-struct CandidateComparer {
- bool operator()(const InlineCandidate &LHS, const InlineCandidate &RHS) {
- if (LHS.CallsiteCount != RHS.CallsiteCount)
- return LHS.CallsiteCount < RHS.CallsiteCount;
-
- // Tie breaker using GUID so we have stable/deterministic inlining order
- assert(LHS.CalleeSamples && RHS.CalleeSamples &&
- "Expect non-null FunctionSamples");
- return LHS.CalleeSamples->getGUID(LHS.CalleeSamples->getName()) <
- RHS.CalleeSamples->getGUID(RHS.CalleeSamples->getName());
- }
-};
-
-using CandidateQueue =
- PriorityQueue<InlineCandidate, std::vector<InlineCandidate>,
- CandidateComparer>;
-
+// Inline candidate used by iterative callsite prioritized inliner
+struct InlineCandidate {
+ CallBase *CallInstr;
+ const FunctionSamples *CalleeSamples;
+ // Prorated callsite count, which will be used to guide inlining. For example,
+ // if a callsite is duplicated in LTO prelink, then in LTO postlink the two
+ // copies will get their own distribution factors and their prorated counts
+ // will be used to decide if they should be inlined independently.
+ uint64_t CallsiteCount;
+ // Call site distribution factor to prorate the profile samples for a
+ // duplicated callsite. Default value is 1.0.
+ float CallsiteDistribution;
+};
+
+// Inline candidate comparer using call site weight
+struct CandidateComparer {
+ bool operator()(const InlineCandidate &LHS, const InlineCandidate &RHS) {
+ if (LHS.CallsiteCount != RHS.CallsiteCount)
+ return LHS.CallsiteCount < RHS.CallsiteCount;
+
+ // Tie breaker using GUID so we have stable/deterministic inlining order
+ assert(LHS.CalleeSamples && RHS.CalleeSamples &&
+ "Expect non-null FunctionSamples");
+ return LHS.CalleeSamples->getGUID(LHS.CalleeSamples->getName()) <
+ RHS.CalleeSamples->getGUID(RHS.CalleeSamples->getName());
+ }
+};
+
+using CandidateQueue =
+ PriorityQueue<InlineCandidate, std::vector<InlineCandidate>,
+ CandidateComparer>;
+
/// Sample profile pass.
///
/// This pass reads profile data from the file specified by
@@ -406,16 +406,16 @@ using CandidateQueue =
class SampleProfileLoader {
public:
SampleProfileLoader(
- StringRef Name, StringRef RemapName, ThinOrFullLTOPhase LTOPhase,
+ StringRef Name, StringRef RemapName, ThinOrFullLTOPhase LTOPhase,
std::function<AssumptionCache &(Function &)> GetAssumptionCache,
std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo,
std::function<const TargetLibraryInfo &(Function &)> GetTLI)
: GetAC(std::move(GetAssumptionCache)),
GetTTI(std::move(GetTargetTransformInfo)), GetTLI(std::move(GetTLI)),
CoverageTracker(*this), Filename(std::string(Name)),
- RemappingFilename(std::string(RemapName)), LTOPhase(LTOPhase) {}
+ RemappingFilename(std::string(RemapName)), LTOPhase(LTOPhase) {}
- bool doInitialization(Module &M, FunctionAnalysisManager *FAM = nullptr);
+ bool doInitialization(Module &M, FunctionAnalysisManager *FAM = nullptr);
bool runOnModule(Module &M, ModuleAnalysisManager *AM,
ProfileSummaryInfo *_PSI, CallGraph *CG);
@@ -428,28 +428,28 @@ protected:
unsigned getFunctionLoc(Function &F);
bool emitAnnotations(Function &F);
ErrorOr<uint64_t> getInstWeight(const Instruction &I);
- ErrorOr<uint64_t> getProbeWeight(const Instruction &I);
+ ErrorOr<uint64_t> getProbeWeight(const Instruction &I);
ErrorOr<uint64_t> getBlockWeight(const BasicBlock *BB);
const FunctionSamples *findCalleeFunctionSamples(const CallBase &I) const;
std::vector<const FunctionSamples *>
findIndirectCallFunctionSamples(const Instruction &I, uint64_t &Sum) const;
mutable DenseMap<const DILocation *, const FunctionSamples *> DILocation2SampleMap;
const FunctionSamples *findFunctionSamples(const Instruction &I) const;
- // Attempt to promote indirect call and also inline the promoted call
- bool tryPromoteAndInlineCandidate(
- Function &F, InlineCandidate &Candidate, uint64_t SumOrigin,
- uint64_t &Sum, DenseSet<Instruction *> &PromotedInsns,
- SmallVector<CallBase *, 8> *InlinedCallSites = nullptr);
+ // Attempt to promote indirect call and also inline the promoted call
+ bool tryPromoteAndInlineCandidate(
+ Function &F, InlineCandidate &Candidate, uint64_t SumOrigin,
+ uint64_t &Sum, DenseSet<Instruction *> &PromotedInsns,
+ SmallVector<CallBase *, 8> *InlinedCallSites = nullptr);
bool inlineHotFunctions(Function &F,
DenseSet<GlobalValue::GUID> &InlinedGUIDs);
- InlineCost shouldInlineCandidate(InlineCandidate &Candidate);
- bool getInlineCandidate(InlineCandidate *NewCandidate, CallBase *CB);
- bool
- tryInlineCandidate(InlineCandidate &Candidate,
- SmallVector<CallBase *, 8> *InlinedCallSites = nullptr);
- bool
- inlineHotFunctionsWithPriority(Function &F,
- DenseSet<GlobalValue::GUID> &InlinedGUIDs);
+ InlineCost shouldInlineCandidate(InlineCandidate &Candidate);
+ bool getInlineCandidate(InlineCandidate *NewCandidate, CallBase *CB);
+ bool
+ tryInlineCandidate(InlineCandidate &Candidate,
+ SmallVector<CallBase *, 8> *InlinedCallSites = nullptr);
+ bool
+ inlineHotFunctionsWithPriority(Function &F,
+ DenseSet<GlobalValue::GUID> &InlinedGUIDs);
// Inline cold/small functions in addition to hot ones
bool shouldInlineColdCallee(CallBase &CallInst);
void emitOptimizationRemarksForInlineCandidates(
@@ -468,8 +468,8 @@ protected:
uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge);
void buildEdges(Function &F);
std::vector<Function *> buildFunctionOrder(Module &M, CallGraph *CG);
- void addCallGraphEdges(CallGraph &CG, const FunctionSamples &Samples);
- void replaceCallGraphEdges(CallGraph &CG, StringMap<Function *> &SymbolMap);
+ void addCallGraphEdges(CallGraph &CG, const FunctionSamples &Samples);
+ void replaceCallGraphEdges(CallGraph &CG, StringMap<Function *> &SymbolMap);
bool propagateThroughEdges(Function &F, bool UpdateBlockCount);
void computeDominanceAndLoopInfo(Function &F);
void clearFunctionData();
@@ -528,9 +528,9 @@ protected:
/// Profile reader object.
std::unique_ptr<SampleProfileReader> Reader;
- /// Profile tracker for different context.
- std::unique_ptr<SampleContextTracker> ContextTracker;
-
+ /// Profile tracker for different context.
+ std::unique_ptr<SampleContextTracker> ContextTracker;
+
/// Samples collected for the body of this function.
FunctionSamples *Samples = nullptr;
@@ -543,15 +543,15 @@ protected:
/// Flag indicating whether the profile input loaded successfully.
bool ProfileIsValid = false;
- /// Flag indicating whether input profile is context-sensitive
- bool ProfileIsCS = false;
-
- /// Flag indicating which LTO/ThinLTO phase the pass is invoked in.
+ /// Flag indicating whether input profile is context-sensitive
+ bool ProfileIsCS = false;
+
+ /// Flag indicating which LTO/ThinLTO phase the pass is invoked in.
///
- /// We need to know the LTO phase because for example in ThinLTOPrelink
- /// phase, in annotation, we should not promote indirect calls. Instead,
- /// we will mark GUIDs that needs to be annotated to the function.
- ThinOrFullLTOPhase LTOPhase;
+ /// We need to know the LTO phase because for example in ThinLTOPrelink
+ /// phase, in annotation, we should not promote indirect calls. Instead,
+ /// we will mark GUIDs that needs to be annotated to the function.
+ ThinOrFullLTOPhase LTOPhase;
/// Profile Summary Info computed from sample profile.
ProfileSummaryInfo *PSI = nullptr;
@@ -591,12 +591,12 @@ protected:
// overriden by -profile-sample-accurate or profile-sample-accurate
// attribute.
bool ProfAccForSymsInList;
-
- // External inline advisor used to replay inline decision from remarks.
- std::unique_ptr<ReplayInlineAdvisor> ExternalInlineAdvisor;
-
- // A pseudo probe helper to correlate the imported sample counts.
- std::unique_ptr<PseudoProbeManager> ProbeManager;
+
+ // External inline advisor used to replay inline decision from remarks.
+ std::unique_ptr<ReplayInlineAdvisor> ExternalInlineAdvisor;
+
+ // A pseudo probe helper to correlate the imported sample counts.
+ std::unique_ptr<PseudoProbeManager> ProbeManager;
};
class SampleProfileLoaderLegacyPass : public ModulePass {
@@ -604,11 +604,11 @@ public:
// Class identification, replacement for typeinfo
static char ID;
- SampleProfileLoaderLegacyPass(
- StringRef Name = SampleProfileFile,
- ThinOrFullLTOPhase LTOPhase = ThinOrFullLTOPhase::None)
+ SampleProfileLoaderLegacyPass(
+ StringRef Name = SampleProfileFile,
+ ThinOrFullLTOPhase LTOPhase = ThinOrFullLTOPhase::None)
: ModulePass(ID), SampleLoader(
- Name, SampleProfileRemappingFile, LTOPhase,
+ Name, SampleProfileRemappingFile, LTOPhase,
[&](Function &F) -> AssumptionCache & {
return ACT->getAssumptionCache(F);
},
@@ -830,9 +830,9 @@ void SampleProfileLoader::printBlockWeight(raw_ostream &OS,
///
/// \returns the weight of \p Inst.
ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) {
- if (FunctionSamples::ProfileIsProbeBased)
- return getProbeWeight(Inst);
-
+ if (FunctionSamples::ProfileIsProbeBased)
+ return getProbeWeight(Inst);
+
const DebugLoc &DLoc = Inst.getDebugLoc();
if (!DLoc)
return std::error_code();
@@ -851,10 +851,10 @@ ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) {
// (findCalleeFunctionSamples returns non-empty result), but not inlined here,
// it means that the inlined callsite has no sample, thus the call
// instruction should have 0 count.
- if (!ProfileIsCS)
- if (const auto *CB = dyn_cast<CallBase>(&Inst))
- if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB))
- return 0;
+ if (!ProfileIsCS)
+ if (const auto *CB = dyn_cast<CallBase>(&Inst))
+ if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB))
+ return 0;
const DILocation *DIL = DLoc;
uint32_t LineOffset = FunctionSamples::getOffset(DIL);
@@ -886,51 +886,51 @@ ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) {
return R;
}
-ErrorOr<uint64_t> SampleProfileLoader::getProbeWeight(const Instruction &Inst) {
- assert(FunctionSamples::ProfileIsProbeBased &&
- "Profile is not pseudo probe based");
- Optional<PseudoProbe> Probe = extractProbe(Inst);
- if (!Probe)
- return std::error_code();
-
- const FunctionSamples *FS = findFunctionSamples(Inst);
- if (!FS)
- return std::error_code();
-
- // If a direct call/invoke instruction is inlined in profile
- // (findCalleeFunctionSamples returns non-empty result), but not inlined here,
- // it means that the inlined callsite has no sample, thus the call
- // instruction should have 0 count.
- if (const auto *CB = dyn_cast<CallBase>(&Inst))
- if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB))
- return 0;
-
- const ErrorOr<uint64_t> &R = FS->findSamplesAt(Probe->Id, 0);
- if (R) {
- uint64_t Samples = R.get() * Probe->Factor;
- bool FirstMark = CoverageTracker.markSamplesUsed(FS, Probe->Id, 0, Samples);
- if (FirstMark) {
- ORE->emit([&]() {
- OptimizationRemarkAnalysis Remark(DEBUG_TYPE, "AppliedSamples", &Inst);
- Remark << "Applied " << ore::NV("NumSamples", Samples);
- Remark << " samples from profile (ProbeId=";
- Remark << ore::NV("ProbeId", Probe->Id);
- Remark << ", Factor=";
- Remark << ore::NV("Factor", Probe->Factor);
- Remark << ", OriginalSamples=";
- Remark << ore::NV("OriginalSamples", R.get());
- Remark << ")";
- return Remark;
- });
- }
- LLVM_DEBUG(dbgs() << " " << Probe->Id << ":" << Inst
- << " - weight: " << R.get() << " - factor: "
- << format("%0.2f", Probe->Factor) << ")\n");
- return Samples;
- }
- return R;
-}
-
+ErrorOr<uint64_t> SampleProfileLoader::getProbeWeight(const Instruction &Inst) {
+ assert(FunctionSamples::ProfileIsProbeBased &&
+ "Profile is not pseudo probe based");
+ Optional<PseudoProbe> Probe = extractProbe(Inst);
+ if (!Probe)
+ return std::error_code();
+
+ const FunctionSamples *FS = findFunctionSamples(Inst);
+ if (!FS)
+ return std::error_code();
+
+ // If a direct call/invoke instruction is inlined in profile
+ // (findCalleeFunctionSamples returns non-empty result), but not inlined here,
+ // it means that the inlined callsite has no sample, thus the call
+ // instruction should have 0 count.
+ if (const auto *CB = dyn_cast<CallBase>(&Inst))
+ if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB))
+ return 0;
+
+ const ErrorOr<uint64_t> &R = FS->findSamplesAt(Probe->Id, 0);
+ if (R) {
+ uint64_t Samples = R.get() * Probe->Factor;
+ bool FirstMark = CoverageTracker.markSamplesUsed(FS, Probe->Id, 0, Samples);
+ if (FirstMark) {
+ ORE->emit([&]() {
+ OptimizationRemarkAnalysis Remark(DEBUG_TYPE, "AppliedSamples", &Inst);
+ Remark << "Applied " << ore::NV("NumSamples", Samples);
+ Remark << " samples from profile (ProbeId=";
+ Remark << ore::NV("ProbeId", Probe->Id);
+ Remark << ", Factor=";
+ Remark << ore::NV("Factor", Probe->Factor);
+ Remark << ", OriginalSamples=";
+ Remark << ore::NV("OriginalSamples", R.get());
+ Remark << ")";
+ return Remark;
+ });
+ }
+ LLVM_DEBUG(dbgs() << " " << Probe->Id << ":" << Inst
+ << " - weight: " << R.get() << " - factor: "
+ << format("%0.2f", Probe->Factor) << ")\n");
+ return Samples;
+ }
+ return R;
+}
+
/// Compute the weight of a basic block.
///
/// The weight of basic block \p BB is the maximum weight of all the
@@ -994,18 +994,18 @@ SampleProfileLoader::findCalleeFunctionSamples(const CallBase &Inst) const {
}
StringRef CalleeName;
- if (Function *Callee = Inst.getCalledFunction())
- CalleeName = FunctionSamples::getCanonicalFnName(*Callee);
-
- if (ProfileIsCS)
- return ContextTracker->getCalleeContextSamplesFor(Inst, CalleeName);
+ if (Function *Callee = Inst.getCalledFunction())
+ CalleeName = FunctionSamples::getCanonicalFnName(*Callee);
+ if (ProfileIsCS)
+ return ContextTracker->getCalleeContextSamplesFor(Inst, CalleeName);
+
const FunctionSamples *FS = findFunctionSamples(Inst);
if (FS == nullptr)
return nullptr;
- return FS->findFunctionSamplesAt(FunctionSamples::getCallSiteIdentifier(DIL),
- CalleeName, Reader->getRemapper());
+ return FS->findFunctionSamplesAt(FunctionSamples::getCallSiteIdentifier(DIL),
+ CalleeName, Reader->getRemapper());
}
/// Returns a vector of FunctionSamples that are the indirect call targets
@@ -1021,49 +1021,49 @@ SampleProfileLoader::findIndirectCallFunctionSamples(
return R;
}
- auto FSCompare = [](const FunctionSamples *L, const FunctionSamples *R) {
- assert(L && R && "Expect non-null FunctionSamples");
- if (L->getEntrySamples() != R->getEntrySamples())
- return L->getEntrySamples() > R->getEntrySamples();
- return FunctionSamples::getGUID(L->getName()) <
- FunctionSamples::getGUID(R->getName());
- };
-
- if (ProfileIsCS) {
- auto CalleeSamples =
- ContextTracker->getIndirectCalleeContextSamplesFor(DIL);
- if (CalleeSamples.empty())
- return R;
-
- // For CSSPGO, we only use target context profile's entry count
- // as that already includes both inlined callee and non-inlined ones..
- Sum = 0;
- for (const auto *const FS : CalleeSamples) {
- Sum += FS->getEntrySamples();
- R.push_back(FS);
- }
- llvm::sort(R, FSCompare);
- return R;
- }
-
+ auto FSCompare = [](const FunctionSamples *L, const FunctionSamples *R) {
+ assert(L && R && "Expect non-null FunctionSamples");
+ if (L->getEntrySamples() != R->getEntrySamples())
+ return L->getEntrySamples() > R->getEntrySamples();
+ return FunctionSamples::getGUID(L->getName()) <
+ FunctionSamples::getGUID(R->getName());
+ };
+
+ if (ProfileIsCS) {
+ auto CalleeSamples =
+ ContextTracker->getIndirectCalleeContextSamplesFor(DIL);
+ if (CalleeSamples.empty())
+ return R;
+
+ // For CSSPGO, we only use target context profile's entry count
+ // as that already includes both inlined callee and non-inlined ones..
+ Sum = 0;
+ for (const auto *const FS : CalleeSamples) {
+ Sum += FS->getEntrySamples();
+ R.push_back(FS);
+ }
+ llvm::sort(R, FSCompare);
+ return R;
+ }
+
const FunctionSamples *FS = findFunctionSamples(Inst);
if (FS == nullptr)
return R;
- auto CallSite = FunctionSamples::getCallSiteIdentifier(DIL);
- auto T = FS->findCallTargetMapAt(CallSite);
+ auto CallSite = FunctionSamples::getCallSiteIdentifier(DIL);
+ auto T = FS->findCallTargetMapAt(CallSite);
Sum = 0;
if (T)
for (const auto &T_C : T.get())
Sum += T_C.second;
- if (const FunctionSamplesMap *M = FS->findFunctionSamplesMapAt(CallSite)) {
+ if (const FunctionSamplesMap *M = FS->findFunctionSamplesMapAt(CallSite)) {
if (M->empty())
return R;
for (const auto &NameFS : *M) {
Sum += NameFS.second.getEntrySamples();
R.push_back(&NameFS.second);
}
- llvm::sort(R, FSCompare);
+ llvm::sort(R, FSCompare);
}
return R;
}
@@ -1079,85 +1079,85 @@ SampleProfileLoader::findIndirectCallFunctionSamples(
/// \returns the FunctionSamples pointer to the inlined instance.
const FunctionSamples *
SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
- if (FunctionSamples::ProfileIsProbeBased) {
- Optional<PseudoProbe> Probe = extractProbe(Inst);
- if (!Probe)
- return nullptr;
- }
-
+ if (FunctionSamples::ProfileIsProbeBased) {
+ Optional<PseudoProbe> Probe = extractProbe(Inst);
+ if (!Probe)
+ return nullptr;
+ }
+
const DILocation *DIL = Inst.getDebugLoc();
if (!DIL)
return Samples;
auto it = DILocation2SampleMap.try_emplace(DIL,nullptr);
- if (it.second) {
- if (ProfileIsCS)
- it.first->second = ContextTracker->getContextSamplesFor(DIL);
- else
- it.first->second =
- Samples->findFunctionSamples(DIL, Reader->getRemapper());
- }
+ if (it.second) {
+ if (ProfileIsCS)
+ it.first->second = ContextTracker->getContextSamplesFor(DIL);
+ else
+ it.first->second =
+ Samples->findFunctionSamples(DIL, Reader->getRemapper());
+ }
return it.first->second;
}
-/// Attempt to promote indirect call and also inline the promoted call.
-///
-/// \param F Caller function.
-/// \param Candidate ICP and inline candidate.
-/// \param Sum Sum of target counts for indirect call.
-/// \param PromotedInsns Map to keep track of indirect call already processed.
-/// \param Candidate ICP and inline candidate.
-/// \param InlinedCallSite Output vector for new call sites exposed after
-/// inlining.
-bool SampleProfileLoader::tryPromoteAndInlineCandidate(
- Function &F, InlineCandidate &Candidate, uint64_t SumOrigin, uint64_t &Sum,
- DenseSet<Instruction *> &PromotedInsns,
- SmallVector<CallBase *, 8> *InlinedCallSite) {
- const char *Reason = "Callee function not available";
- // R->getValue() != &F is to prevent promoting a recursive call.
- // If it is a recursive call, we do not inline it as it could bloat
- // the code exponentially. There is way to better handle this, e.g.
- // clone the caller first, and inline the cloned caller if it is
- // recursive. As llvm does not inline recursive calls, we will
- // simply ignore it instead of handling it explicitly.
- auto R = SymbolMap.find(Candidate.CalleeSamples->getFuncName());
- if (R != SymbolMap.end() && R->getValue() &&
- !R->getValue()->isDeclaration() && R->getValue()->getSubprogram() &&
- R->getValue()->hasFnAttribute("use-sample-profile") &&
- R->getValue() != &F &&
- isLegalToPromote(*Candidate.CallInstr, R->getValue(), &Reason)) {
- auto *DI =
- &pgo::promoteIndirectCall(*Candidate.CallInstr, R->getValue(),
- Candidate.CallsiteCount, Sum, false, ORE);
- if (DI) {
- Sum -= Candidate.CallsiteCount;
- // Prorate the indirect callsite distribution.
- // Do not update the promoted direct callsite distribution at this
- // point since the original distribution combined with the callee
- // profile will be used to prorate callsites from the callee if
- // inlined. Once not inlined, the direct callsite distribution should
- // be prorated so that the it will reflect the real callsite counts.
- setProbeDistributionFactor(*Candidate.CallInstr,
- Candidate.CallsiteDistribution * Sum /
- SumOrigin);
- PromotedInsns.insert(Candidate.CallInstr);
- Candidate.CallInstr = DI;
- if (isa<CallInst>(DI) || isa<InvokeInst>(DI)) {
- bool Inlined = tryInlineCandidate(Candidate, InlinedCallSite);
- if (!Inlined) {
- // Prorate the direct callsite distribution so that it reflects real
- // callsite counts.
- setProbeDistributionFactor(*DI, Candidate.CallsiteDistribution *
- Candidate.CallsiteCount /
- SumOrigin);
- }
- return Inlined;
- }
- }
- } else {
- LLVM_DEBUG(dbgs() << "\nFailed to promote indirect call to "
- << Candidate.CalleeSamples->getFuncName() << " because "
- << Reason << "\n");
+/// Attempt to promote indirect call and also inline the promoted call.
+///
+/// \param F Caller function.
+/// \param Candidate ICP and inline candidate.
+/// \param Sum Sum of target counts for indirect call.
+/// \param PromotedInsns Map to keep track of indirect call already processed.
+/// \param Candidate ICP and inline candidate.
+/// \param InlinedCallSite Output vector for new call sites exposed after
+/// inlining.
+bool SampleProfileLoader::tryPromoteAndInlineCandidate(
+ Function &F, InlineCandidate &Candidate, uint64_t SumOrigin, uint64_t &Sum,
+ DenseSet<Instruction *> &PromotedInsns,
+ SmallVector<CallBase *, 8> *InlinedCallSite) {
+ const char *Reason = "Callee function not available";
+ // R->getValue() != &F is to prevent promoting a recursive call.
+ // If it is a recursive call, we do not inline it as it could bloat
+ // the code exponentially. There is way to better handle this, e.g.
+ // clone the caller first, and inline the cloned caller if it is
+ // recursive. As llvm does not inline recursive calls, we will
+ // simply ignore it instead of handling it explicitly.
+ auto R = SymbolMap.find(Candidate.CalleeSamples->getFuncName());
+ if (R != SymbolMap.end() && R->getValue() &&
+ !R->getValue()->isDeclaration() && R->getValue()->getSubprogram() &&
+ R->getValue()->hasFnAttribute("use-sample-profile") &&
+ R->getValue() != &F &&
+ isLegalToPromote(*Candidate.CallInstr, R->getValue(), &Reason)) {
+ auto *DI =
+ &pgo::promoteIndirectCall(*Candidate.CallInstr, R->getValue(),
+ Candidate.CallsiteCount, Sum, false, ORE);
+ if (DI) {
+ Sum -= Candidate.CallsiteCount;
+ // Prorate the indirect callsite distribution.
+ // Do not update the promoted direct callsite distribution at this
+ // point since the original distribution combined with the callee
+ // profile will be used to prorate callsites from the callee if
+ // inlined. Once not inlined, the direct callsite distribution should
+ // be prorated so that the it will reflect the real callsite counts.
+ setProbeDistributionFactor(*Candidate.CallInstr,
+ Candidate.CallsiteDistribution * Sum /
+ SumOrigin);
+ PromotedInsns.insert(Candidate.CallInstr);
+ Candidate.CallInstr = DI;
+ if (isa<CallInst>(DI) || isa<InvokeInst>(DI)) {
+ bool Inlined = tryInlineCandidate(Candidate, InlinedCallSite);
+ if (!Inlined) {
+ // Prorate the direct callsite distribution so that it reflects real
+ // callsite counts.
+ setProbeDistributionFactor(*DI, Candidate.CallsiteDistribution *
+ Candidate.CallsiteCount /
+ SumOrigin);
+ }
+ return Inlined;
+ }
+ }
+ } else {
+ LLVM_DEBUG(dbgs() << "\nFailed to promote indirect call to "
+ << Candidate.CalleeSamples->getFuncName() << " because "
+ << Reason << "\n");
}
return false;
}
@@ -1173,12 +1173,12 @@ bool SampleProfileLoader::shouldInlineColdCallee(CallBase &CallInst) {
InlineCost Cost = getInlineCost(CallInst, getInlineParams(), GetTTI(*Callee),
GetAC, GetTLI);
- if (Cost.isNever())
- return false;
-
- if (Cost.isAlways())
- return true;
-
+ if (Cost.isNever())
+ return false;
+
+ if (Cost.isAlways())
+ return true;
+
return Cost.getCost() <= SampleColdCallSiteThreshold;
}
@@ -1223,11 +1223,11 @@ bool SampleProfileLoader::inlineHotFunctions(
"ProfAccForSymsInList should be false when profile-sample-accurate "
"is enabled");
- DenseMap<CallBase *, const FunctionSamples *> LocalNotInlinedCallSites;
+ DenseMap<CallBase *, const FunctionSamples *> LocalNotInlinedCallSites;
bool Changed = false;
- bool LocalChanged = true;
- while (LocalChanged) {
- LocalChanged = false;
+ bool LocalChanged = true;
+ while (LocalChanged) {
+ LocalChanged = false;
SmallVector<CallBase *, 10> CIS;
for (auto &BB : F) {
bool Hot = false;
@@ -1237,11 +1237,11 @@ bool SampleProfileLoader::inlineHotFunctions(
const FunctionSamples *FS = nullptr;
if (auto *CB = dyn_cast<CallBase>(&I)) {
if (!isa<IntrinsicInst>(I) && (FS = findCalleeFunctionSamples(*CB))) {
- assert((!FunctionSamples::UseMD5 || FS->GUIDToFuncNameMap) &&
- "GUIDToFuncNameMap has to be populated");
+ assert((!FunctionSamples::UseMD5 || FS->GUIDToFuncNameMap) &&
+ "GUIDToFuncNameMap has to be populated");
AllCandidates.push_back(CB);
- if (FS->getEntrySamples() > 0 || ProfileIsCS)
- LocalNotInlinedCallSites.try_emplace(CB, FS);
+ if (FS->getEntrySamples() > 0 || ProfileIsCS)
+ LocalNotInlinedCallSites.try_emplace(CB, FS);
if (callsiteIsHot(FS, PSI))
Hot = true;
else if (shouldInlineColdCallee(*CB))
@@ -1249,7 +1249,7 @@ bool SampleProfileLoader::inlineHotFunctions(
}
}
}
- if (Hot || ExternalInlineAdvisor) {
+ if (Hot || ExternalInlineAdvisor) {
CIS.insert(CIS.begin(), AllCandidates.begin(), AllCandidates.end());
emitOptimizationRemarksForInlineCandidates(AllCandidates, F, true);
} else {
@@ -1259,11 +1259,11 @@ bool SampleProfileLoader::inlineHotFunctions(
}
for (CallBase *I : CIS) {
Function *CalledFunction = I->getCalledFunction();
- InlineCandidate Candidate = {
- I,
- LocalNotInlinedCallSites.count(I) ? LocalNotInlinedCallSites[I]
- : nullptr,
- 0 /* dummy count */, 1.0 /* dummy distribution factor */};
+ InlineCandidate Candidate = {
+ I,
+ LocalNotInlinedCallSites.count(I) ? LocalNotInlinedCallSites[I]
+ : nullptr,
+ 0 /* dummy count */, 1.0 /* dummy distribution factor */};
// Do not inline recursive calls.
if (CalledFunction == &F)
continue;
@@ -1272,8 +1272,8 @@ bool SampleProfileLoader::inlineHotFunctions(
continue;
uint64_t Sum;
for (const auto *FS : findIndirectCallFunctionSamples(*I, Sum)) {
- uint64_t SumOrigin = Sum;
- if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
+ uint64_t SumOrigin = Sum;
+ if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
FS->findInlinedFunctions(InlinedGUIDs, F.getParent(),
PSI->getOrCompHotCountThreshold());
continue;
@@ -1281,34 +1281,34 @@ bool SampleProfileLoader::inlineHotFunctions(
if (!callsiteIsHot(FS, PSI))
continue;
- Candidate = {I, FS, FS->getEntrySamples(), 1.0};
- if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum,
- PromotedInsns)) {
- LocalNotInlinedCallSites.erase(I);
- LocalChanged = true;
+ Candidate = {I, FS, FS->getEntrySamples(), 1.0};
+ if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum,
+ PromotedInsns)) {
+ LocalNotInlinedCallSites.erase(I);
+ LocalChanged = true;
}
}
} else if (CalledFunction && CalledFunction->getSubprogram() &&
!CalledFunction->isDeclaration()) {
- if (tryInlineCandidate(Candidate)) {
- LocalNotInlinedCallSites.erase(I);
+ if (tryInlineCandidate(Candidate)) {
+ LocalNotInlinedCallSites.erase(I);
LocalChanged = true;
}
- } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
+ } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
findCalleeFunctionSamples(*I)->findInlinedFunctions(
InlinedGUIDs, F.getParent(), PSI->getOrCompHotCountThreshold());
}
}
- Changed |= LocalChanged;
+ Changed |= LocalChanged;
}
- // For CS profile, profile for not inlined context will be merged when
- // base profile is being trieved
- if (ProfileIsCS)
- return Changed;
-
+ // For CS profile, profile for not inlined context will be merged when
+ // base profile is being trieved
+ if (ProfileIsCS)
+ return Changed;
+
// Accumulate not inlined callsite information into notInlinedSamples
- for (const auto &Pair : LocalNotInlinedCallSites) {
+ for (const auto &Pair : LocalNotInlinedCallSites) {
CallBase *I = Pair.getFirst();
Function *Callee = I->getCalledFunction();
if (!Callee || Callee->isDeclaration())
@@ -1327,23 +1327,23 @@ bool SampleProfileLoader::inlineHotFunctions(
}
if (ProfileMergeInlinee) {
- // A function call can be replicated by optimizations like callsite
- // splitting or jump threading and the replicates end up sharing the
- // sample nested callee profile instead of slicing the original inlinee's
- // profile. We want to do merge exactly once by filtering out callee
- // profiles with a non-zero head sample count.
- if (FS->getHeadSamples() == 0) {
- // Use entry samples as head samples during the merge, as inlinees
- // don't have head samples.
- const_cast<FunctionSamples *>(FS)->addHeadSamples(
- FS->getEntrySamples());
-
- // Note that we have to do the merge right after processing function.
- // This allows OutlineFS's profile to be used for annotation during
- // top-down processing of functions' annotation.
- FunctionSamples *OutlineFS = Reader->getOrCreateSamplesFor(*Callee);
- OutlineFS->merge(*FS);
- }
+ // A function call can be replicated by optimizations like callsite
+ // splitting or jump threading and the replicates end up sharing the
+ // sample nested callee profile instead of slicing the original inlinee's
+ // profile. We want to do merge exactly once by filtering out callee
+ // profiles with a non-zero head sample count.
+ if (FS->getHeadSamples() == 0) {
+ // Use entry samples as head samples during the merge, as inlinees
+ // don't have head samples.
+ const_cast<FunctionSamples *>(FS)->addHeadSamples(
+ FS->getEntrySamples());
+
+ // Note that we have to do the merge right after processing function.
+ // This allows OutlineFS's profile to be used for annotation during
+ // top-down processing of functions' annotation.
+ FunctionSamples *OutlineFS = Reader->getOrCreateSamplesFor(*Callee);
+ OutlineFS->merge(*FS);
+ }
} else {
auto pair =
notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0});
@@ -1353,266 +1353,266 @@ bool SampleProfileLoader::inlineHotFunctions(
return Changed;
}
-bool SampleProfileLoader::tryInlineCandidate(
- InlineCandidate &Candidate, SmallVector<CallBase *, 8> *InlinedCallSites) {
-
- CallBase &CB = *Candidate.CallInstr;
- Function *CalledFunction = CB.getCalledFunction();
- assert(CalledFunction && "Expect a callee with definition");
- DebugLoc DLoc = CB.getDebugLoc();
- BasicBlock *BB = CB.getParent();
-
- InlineCost Cost = shouldInlineCandidate(Candidate);
- if (Cost.isNever()) {
- ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "InlineFail", DLoc, BB)
- << "incompatible inlining");
- return false;
- }
-
- if (!Cost)
- return false;
-
- InlineFunctionInfo IFI(nullptr, GetAC);
- if (InlineFunction(CB, IFI).isSuccess()) {
- // The call to InlineFunction erases I, so we can't pass it here.
- emitInlinedInto(*ORE, DLoc, BB, *CalledFunction, *BB->getParent(), Cost,
- true, CSINLINE_DEBUG);
-
- // Now populate the list of newly exposed call sites.
- if (InlinedCallSites) {
- InlinedCallSites->clear();
- for (auto &I : IFI.InlinedCallSites)
- InlinedCallSites->push_back(I);
- }
-
- if (ProfileIsCS)
- ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples);
- ++NumCSInlined;
-
- // Prorate inlined probes for a duplicated inlining callsite which probably
- // has a distribution less than 100%. Samples for an inlinee should be
- // distributed among the copies of the original callsite based on each
- // callsite's distribution factor for counts accuracy. Note that an inlined
- // probe may come with its own distribution factor if it has been duplicated
- // in the inlinee body. The two factor are multiplied to reflect the
- // aggregation of duplication.
- if (Candidate.CallsiteDistribution < 1) {
- for (auto &I : IFI.InlinedCallSites) {
- if (Optional<PseudoProbe> Probe = extractProbe(*I))
- setProbeDistributionFactor(*I, Probe->Factor *
- Candidate.CallsiteDistribution);
- }
- NumDuplicatedInlinesite++;
- }
-
- return true;
- }
- return false;
-}
-
-bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate,
- CallBase *CB) {
- assert(CB && "Expect non-null call instruction");
-
- if (isa<IntrinsicInst>(CB))
- return false;
-
- // Find the callee's profile. For indirect call, find hottest target profile.
- const FunctionSamples *CalleeSamples = findCalleeFunctionSamples(*CB);
- if (!CalleeSamples)
- return false;
-
- float Factor = 1.0;
- if (Optional<PseudoProbe> Probe = extractProbe(*CB))
- Factor = Probe->Factor;
-
- uint64_t CallsiteCount = 0;
- ErrorOr<uint64_t> Weight = getBlockWeight(CB->getParent());
- if (Weight)
- CallsiteCount = Weight.get();
- if (CalleeSamples)
- CallsiteCount = std::max(
- CallsiteCount, uint64_t(CalleeSamples->getEntrySamples() * Factor));
-
- *NewCandidate = {CB, CalleeSamples, CallsiteCount, Factor};
- return true;
-}
-
-InlineCost
-SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {
- std::unique_ptr<InlineAdvice> Advice = nullptr;
- if (ExternalInlineAdvisor) {
- Advice = ExternalInlineAdvisor->getAdvice(*Candidate.CallInstr);
- if (!Advice->isInliningRecommended()) {
- Advice->recordUnattemptedInlining();
- return InlineCost::getNever("not previously inlined");
- }
- Advice->recordInlining();
- return InlineCost::getAlways("previously inlined");
- }
-
- // Adjust threshold based on call site hotness, only do this for callsite
- // prioritized inliner because otherwise cost-benefit check is done earlier.
- int SampleThreshold = SampleColdCallSiteThreshold;
- if (CallsitePrioritizedInline) {
- if (Candidate.CallsiteCount > PSI->getHotCountThreshold())
- SampleThreshold = SampleHotCallSiteThreshold;
- else if (!ProfileSizeInline)
- return InlineCost::getNever("cold callsite");
- }
-
- Function *Callee = Candidate.CallInstr->getCalledFunction();
- assert(Callee && "Expect a definition for inline candidate of direct call");
-
- InlineParams Params = getInlineParams();
- Params.ComputeFullInlineCost = true;
- // Checks if there is anything in the reachable portion of the callee at
- // this callsite that makes this inlining potentially illegal. Need to
- // set ComputeFullInlineCost, otherwise getInlineCost may return early
- // when cost exceeds threshold without checking all IRs in the callee.
- // The acutal cost does not matter because we only checks isNever() to
- // see if it is legal to inline the callsite.
- InlineCost Cost = getInlineCost(*Candidate.CallInstr, Callee, Params,
- GetTTI(*Callee), GetAC, GetTLI);
-
- // Honor always inline and never inline from call analyzer
- if (Cost.isNever() || Cost.isAlways())
- return Cost;
-
- // For old FDO inliner, we inline the call site as long as cost is not
- // "Never". The cost-benefit check is done earlier.
- if (!CallsitePrioritizedInline) {
- return InlineCost::get(Cost.getCost(), INT_MAX);
- }
-
- // Otherwise only use the cost from call analyzer, but overwite threshold with
- // Sample PGO threshold.
- return InlineCost::get(Cost.getCost(), SampleThreshold);
-}
-
-bool SampleProfileLoader::inlineHotFunctionsWithPriority(
- Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
- DenseSet<Instruction *> PromotedInsns;
- assert(ProfileIsCS && "Prioritiy based inliner only works with CSSPGO now");
-
- // ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure
- // Profile symbol list is ignored when profile-sample-accurate is on.
- assert((!ProfAccForSymsInList ||
- (!ProfileSampleAccurate &&
- !F.hasFnAttribute("profile-sample-accurate"))) &&
- "ProfAccForSymsInList should be false when profile-sample-accurate "
- "is enabled");
-
- // Populating worklist with initial call sites from root inliner, along
- // with call site weights.
- CandidateQueue CQueue;
- InlineCandidate NewCandidate;
- for (auto &BB : F) {
- for (auto &I : BB.getInstList()) {
- auto *CB = dyn_cast<CallBase>(&I);
- if (!CB)
- continue;
- if (getInlineCandidate(&NewCandidate, CB))
- CQueue.push(NewCandidate);
- }
- }
-
- // Cap the size growth from profile guided inlining. This is needed even
- // though cost of each inline candidate already accounts for callee size,
- // because with top-down inlining, we can grow inliner size significantly
- // with large number of smaller inlinees each pass the cost check.
- assert(ProfileInlineLimitMax >= ProfileInlineLimitMin &&
- "Max inline size limit should not be smaller than min inline size "
- "limit.");
- unsigned SizeLimit = F.getInstructionCount() * ProfileInlineGrowthLimit;
- SizeLimit = std::min(SizeLimit, (unsigned)ProfileInlineLimitMax);
- SizeLimit = std::max(SizeLimit, (unsigned)ProfileInlineLimitMin);
- if (ExternalInlineAdvisor)
- SizeLimit = std::numeric_limits<unsigned>::max();
-
- // Perform iterative BFS call site prioritized inlining
- bool Changed = false;
- while (!CQueue.empty() && F.getInstructionCount() < SizeLimit) {
- InlineCandidate Candidate = CQueue.top();
- CQueue.pop();
- CallBase *I = Candidate.CallInstr;
- Function *CalledFunction = I->getCalledFunction();
-
- if (CalledFunction == &F)
- continue;
- if (I->isIndirectCall()) {
- if (PromotedInsns.count(I))
- continue;
- uint64_t Sum;
- auto CalleeSamples = findIndirectCallFunctionSamples(*I, Sum);
- uint64_t SumOrigin = Sum;
- Sum *= Candidate.CallsiteDistribution;
- for (const auto *FS : CalleeSamples) {
- // TODO: Consider disable pre-lTO ICP for MonoLTO as well
- if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
- FS->findInlinedFunctions(InlinedGUIDs, F.getParent(),
- PSI->getOrCompHotCountThreshold());
- continue;
- }
- uint64_t EntryCountDistributed =
- FS->getEntrySamples() * Candidate.CallsiteDistribution;
- // In addition to regular inline cost check, we also need to make sure
- // ICP isn't introducing excessive speculative checks even if individual
- // target looks beneficial to promote and inline. That means we should
- // only do ICP when there's a small number dominant targets.
- if (EntryCountDistributed < SumOrigin / ProfileICPThreshold)
- break;
- // TODO: Fix CallAnalyzer to handle all indirect calls.
- // For indirect call, we don't run CallAnalyzer to get InlineCost
- // before actual inlining. This is because we could see two different
- // types from the same definition, which makes CallAnalyzer choke as
- // it's expecting matching parameter type on both caller and callee
- // side. See example from PR18962 for the triggering cases (the bug was
- // fixed, but we generate different types).
- if (!PSI->isHotCount(EntryCountDistributed))
- break;
- SmallVector<CallBase *, 8> InlinedCallSites;
- // Attach function profile for promoted indirect callee, and update
- // call site count for the promoted inline candidate too.
- Candidate = {I, FS, EntryCountDistributed,
- Candidate.CallsiteDistribution};
- if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum,
- PromotedInsns, &InlinedCallSites)) {
- for (auto *CB : InlinedCallSites) {
- if (getInlineCandidate(&NewCandidate, CB))
- CQueue.emplace(NewCandidate);
- }
- Changed = true;
- }
- }
- } else if (CalledFunction && CalledFunction->getSubprogram() &&
- !CalledFunction->isDeclaration()) {
- SmallVector<CallBase *, 8> InlinedCallSites;
- if (tryInlineCandidate(Candidate, &InlinedCallSites)) {
- for (auto *CB : InlinedCallSites) {
- if (getInlineCandidate(&NewCandidate, CB))
- CQueue.emplace(NewCandidate);
- }
- Changed = true;
- }
- } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
- findCalleeFunctionSamples(*I)->findInlinedFunctions(
- InlinedGUIDs, F.getParent(), PSI->getOrCompHotCountThreshold());
- }
- }
-
- if (!CQueue.empty()) {
- if (SizeLimit == (unsigned)ProfileInlineLimitMax)
- ++NumCSInlinedHitMaxLimit;
- else if (SizeLimit == (unsigned)ProfileInlineLimitMin)
- ++NumCSInlinedHitMinLimit;
- else
- ++NumCSInlinedHitGrowthLimit;
- }
-
- return Changed;
-}
-
+bool SampleProfileLoader::tryInlineCandidate(
+ InlineCandidate &Candidate, SmallVector<CallBase *, 8> *InlinedCallSites) {
+
+ CallBase &CB = *Candidate.CallInstr;
+ Function *CalledFunction = CB.getCalledFunction();
+ assert(CalledFunction && "Expect a callee with definition");
+ DebugLoc DLoc = CB.getDebugLoc();
+ BasicBlock *BB = CB.getParent();
+
+ InlineCost Cost = shouldInlineCandidate(Candidate);
+ if (Cost.isNever()) {
+ ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "InlineFail", DLoc, BB)
+ << "incompatible inlining");
+ return false;
+ }
+
+ if (!Cost)
+ return false;
+
+ InlineFunctionInfo IFI(nullptr, GetAC);
+ if (InlineFunction(CB, IFI).isSuccess()) {
+ // The call to InlineFunction erases I, so we can't pass it here.
+ emitInlinedInto(*ORE, DLoc, BB, *CalledFunction, *BB->getParent(), Cost,
+ true, CSINLINE_DEBUG);
+
+ // Now populate the list of newly exposed call sites.
+ if (InlinedCallSites) {
+ InlinedCallSites->clear();
+ for (auto &I : IFI.InlinedCallSites)
+ InlinedCallSites->push_back(I);
+ }
+
+ if (ProfileIsCS)
+ ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples);
+ ++NumCSInlined;
+
+ // Prorate inlined probes for a duplicated inlining callsite which probably
+ // has a distribution less than 100%. Samples for an inlinee should be
+ // distributed among the copies of the original callsite based on each
+ // callsite's distribution factor for counts accuracy. Note that an inlined
+ // probe may come with its own distribution factor if it has been duplicated
+ // in the inlinee body. The two factor are multiplied to reflect the
+ // aggregation of duplication.
+ if (Candidate.CallsiteDistribution < 1) {
+ for (auto &I : IFI.InlinedCallSites) {
+ if (Optional<PseudoProbe> Probe = extractProbe(*I))
+ setProbeDistributionFactor(*I, Probe->Factor *
+ Candidate.CallsiteDistribution);
+ }
+ NumDuplicatedInlinesite++;
+ }
+
+ return true;
+ }
+ return false;
+}
+
+bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate,
+ CallBase *CB) {
+ assert(CB && "Expect non-null call instruction");
+
+ if (isa<IntrinsicInst>(CB))
+ return false;
+
+ // Find the callee's profile. For indirect call, find hottest target profile.
+ const FunctionSamples *CalleeSamples = findCalleeFunctionSamples(*CB);
+ if (!CalleeSamples)
+ return false;
+
+ float Factor = 1.0;
+ if (Optional<PseudoProbe> Probe = extractProbe(*CB))
+ Factor = Probe->Factor;
+
+ uint64_t CallsiteCount = 0;
+ ErrorOr<uint64_t> Weight = getBlockWeight(CB->getParent());
+ if (Weight)
+ CallsiteCount = Weight.get();
+ if (CalleeSamples)
+ CallsiteCount = std::max(
+ CallsiteCount, uint64_t(CalleeSamples->getEntrySamples() * Factor));
+
+ *NewCandidate = {CB, CalleeSamples, CallsiteCount, Factor};
+ return true;
+}
+
+InlineCost
+SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {
+ std::unique_ptr<InlineAdvice> Advice = nullptr;
+ if (ExternalInlineAdvisor) {
+ Advice = ExternalInlineAdvisor->getAdvice(*Candidate.CallInstr);
+ if (!Advice->isInliningRecommended()) {
+ Advice->recordUnattemptedInlining();
+ return InlineCost::getNever("not previously inlined");
+ }
+ Advice->recordInlining();
+ return InlineCost::getAlways("previously inlined");
+ }
+
+ // Adjust threshold based on call site hotness, only do this for callsite
+ // prioritized inliner because otherwise cost-benefit check is done earlier.
+ int SampleThreshold = SampleColdCallSiteThreshold;
+ if (CallsitePrioritizedInline) {
+ if (Candidate.CallsiteCount > PSI->getHotCountThreshold())
+ SampleThreshold = SampleHotCallSiteThreshold;
+ else if (!ProfileSizeInline)
+ return InlineCost::getNever("cold callsite");
+ }
+
+ Function *Callee = Candidate.CallInstr->getCalledFunction();
+ assert(Callee && "Expect a definition for inline candidate of direct call");
+
+ InlineParams Params = getInlineParams();
+ Params.ComputeFullInlineCost = true;
+ // Checks if there is anything in the reachable portion of the callee at
+ // this callsite that makes this inlining potentially illegal. Need to
+ // set ComputeFullInlineCost, otherwise getInlineCost may return early
+ // when cost exceeds threshold without checking all IRs in the callee.
+ // The acutal cost does not matter because we only checks isNever() to
+ // see if it is legal to inline the callsite.
+ InlineCost Cost = getInlineCost(*Candidate.CallInstr, Callee, Params,
+ GetTTI(*Callee), GetAC, GetTLI);
+
+ // Honor always inline and never inline from call analyzer
+ if (Cost.isNever() || Cost.isAlways())
+ return Cost;
+
+ // For old FDO inliner, we inline the call site as long as cost is not
+ // "Never". The cost-benefit check is done earlier.
+ if (!CallsitePrioritizedInline) {
+ return InlineCost::get(Cost.getCost(), INT_MAX);
+ }
+
+ // Otherwise only use the cost from call analyzer, but overwite threshold with
+ // Sample PGO threshold.
+ return InlineCost::get(Cost.getCost(), SampleThreshold);
+}
+
+bool SampleProfileLoader::inlineHotFunctionsWithPriority(
+ Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
+ DenseSet<Instruction *> PromotedInsns;
+ assert(ProfileIsCS && "Prioritiy based inliner only works with CSSPGO now");
+
+ // ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure
+ // Profile symbol list is ignored when profile-sample-accurate is on.
+ assert((!ProfAccForSymsInList ||
+ (!ProfileSampleAccurate &&
+ !F.hasFnAttribute("profile-sample-accurate"))) &&
+ "ProfAccForSymsInList should be false when profile-sample-accurate "
+ "is enabled");
+
+ // Populating worklist with initial call sites from root inliner, along
+ // with call site weights.
+ CandidateQueue CQueue;
+ InlineCandidate NewCandidate;
+ for (auto &BB : F) {
+ for (auto &I : BB.getInstList()) {
+ auto *CB = dyn_cast<CallBase>(&I);
+ if (!CB)
+ continue;
+ if (getInlineCandidate(&NewCandidate, CB))
+ CQueue.push(NewCandidate);
+ }
+ }
+
+ // Cap the size growth from profile guided inlining. This is needed even
+ // though cost of each inline candidate already accounts for callee size,
+ // because with top-down inlining, we can grow inliner size significantly
+ // with large number of smaller inlinees each pass the cost check.
+ assert(ProfileInlineLimitMax >= ProfileInlineLimitMin &&
+ "Max inline size limit should not be smaller than min inline size "
+ "limit.");
+ unsigned SizeLimit = F.getInstructionCount() * ProfileInlineGrowthLimit;
+ SizeLimit = std::min(SizeLimit, (unsigned)ProfileInlineLimitMax);
+ SizeLimit = std::max(SizeLimit, (unsigned)ProfileInlineLimitMin);
+ if (ExternalInlineAdvisor)
+ SizeLimit = std::numeric_limits<unsigned>::max();
+
+ // Perform iterative BFS call site prioritized inlining
+ bool Changed = false;
+ while (!CQueue.empty() && F.getInstructionCount() < SizeLimit) {
+ InlineCandidate Candidate = CQueue.top();
+ CQueue.pop();
+ CallBase *I = Candidate.CallInstr;
+ Function *CalledFunction = I->getCalledFunction();
+
+ if (CalledFunction == &F)
+ continue;
+ if (I->isIndirectCall()) {
+ if (PromotedInsns.count(I))
+ continue;
+ uint64_t Sum;
+ auto CalleeSamples = findIndirectCallFunctionSamples(*I, Sum);
+ uint64_t SumOrigin = Sum;
+ Sum *= Candidate.CallsiteDistribution;
+ for (const auto *FS : CalleeSamples) {
+ // TODO: Consider disable pre-lTO ICP for MonoLTO as well
+ if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
+ FS->findInlinedFunctions(InlinedGUIDs, F.getParent(),
+ PSI->getOrCompHotCountThreshold());
+ continue;
+ }
+ uint64_t EntryCountDistributed =
+ FS->getEntrySamples() * Candidate.CallsiteDistribution;
+ // In addition to regular inline cost check, we also need to make sure
+ // ICP isn't introducing excessive speculative checks even if individual
+ // target looks beneficial to promote and inline. That means we should
+ // only do ICP when there's a small number dominant targets.
+ if (EntryCountDistributed < SumOrigin / ProfileICPThreshold)
+ break;
+ // TODO: Fix CallAnalyzer to handle all indirect calls.
+ // For indirect call, we don't run CallAnalyzer to get InlineCost
+ // before actual inlining. This is because we could see two different
+ // types from the same definition, which makes CallAnalyzer choke as
+ // it's expecting matching parameter type on both caller and callee
+ // side. See example from PR18962 for the triggering cases (the bug was
+ // fixed, but we generate different types).
+ if (!PSI->isHotCount(EntryCountDistributed))
+ break;
+ SmallVector<CallBase *, 8> InlinedCallSites;
+ // Attach function profile for promoted indirect callee, and update
+ // call site count for the promoted inline candidate too.
+ Candidate = {I, FS, EntryCountDistributed,
+ Candidate.CallsiteDistribution};
+ if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum,
+ PromotedInsns, &InlinedCallSites)) {
+ for (auto *CB : InlinedCallSites) {
+ if (getInlineCandidate(&NewCandidate, CB))
+ CQueue.emplace(NewCandidate);
+ }
+ Changed = true;
+ }
+ }
+ } else if (CalledFunction && CalledFunction->getSubprogram() &&
+ !CalledFunction->isDeclaration()) {
+ SmallVector<CallBase *, 8> InlinedCallSites;
+ if (tryInlineCandidate(Candidate, &InlinedCallSites)) {
+ for (auto *CB : InlinedCallSites) {
+ if (getInlineCandidate(&NewCandidate, CB))
+ CQueue.emplace(NewCandidate);
+ }
+ Changed = true;
+ }
+ } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
+ findCalleeFunctionSamples(*I)->findInlinedFunctions(
+ InlinedGUIDs, F.getParent(), PSI->getOrCompHotCountThreshold());
+ }
+ }
+
+ if (!CQueue.empty()) {
+ if (SizeLimit == (unsigned)ProfileInlineLimitMax)
+ ++NumCSInlinedHitMaxLimit;
+ else if (SizeLimit == (unsigned)ProfileInlineLimitMin)
+ ++NumCSInlinedHitMinLimit;
+ else
+ ++NumCSInlinedHitGrowthLimit;
+ }
+
+ return Changed;
+}
+
/// Find equivalence classes for the given block.
///
/// This finds all the blocks that are guaranteed to execute the same
@@ -2031,18 +2031,18 @@ void SampleProfileLoader::propagateWeights(Function &F) {
const FunctionSamples *FS = findFunctionSamples(I);
if (!FS)
continue;
- auto CallSite = FunctionSamples::getCallSiteIdentifier(DIL);
- auto T = FS->findCallTargetMapAt(CallSite);
+ auto CallSite = FunctionSamples::getCallSiteIdentifier(DIL);
+ auto T = FS->findCallTargetMapAt(CallSite);
if (!T || T.get().empty())
continue;
- // Prorate the callsite counts to reflect what is already done to the
- // callsite, such as ICP or calliste cloning.
- if (FunctionSamples::ProfileIsProbeBased) {
- if (Optional<PseudoProbe> Probe = extractProbe(I)) {
- if (Probe->Factor < 1)
- T = SampleRecord::adjustCallTargets(T.get(), Probe->Factor);
- }
- }
+ // Prorate the callsite counts to reflect what is already done to the
+ // callsite, such as ICP or calliste cloning.
+ if (FunctionSamples::ProfileIsProbeBased) {
+ if (Optional<PseudoProbe> Probe = extractProbe(I)) {
+ if (Probe->Factor < 1)
+ T = SampleRecord::adjustCallTargets(T.get(), Probe->Factor);
+ }
+ }
SmallVector<InstrProfValueData, 2> SortedCallTargets =
GetSortedValueDataFromCallTargets(T.get());
uint64_t Sum;
@@ -2204,28 +2204,28 @@ void SampleProfileLoader::computeDominanceAndLoopInfo(Function &F) {
bool SampleProfileLoader::emitAnnotations(Function &F) {
bool Changed = false;
- if (FunctionSamples::ProfileIsProbeBased) {
- if (!ProbeManager->profileIsValid(F, *Samples)) {
- LLVM_DEBUG(
- dbgs() << "Profile is invalid due to CFG mismatch for Function "
- << F.getName());
- ++NumMismatchedProfile;
- return false;
- }
- ++NumMatchedProfile;
- } else {
- if (getFunctionLoc(F) == 0)
- return false;
-
- LLVM_DEBUG(dbgs() << "Line number for the first instruction in "
- << F.getName() << ": " << getFunctionLoc(F) << "\n");
- }
+ if (FunctionSamples::ProfileIsProbeBased) {
+ if (!ProbeManager->profileIsValid(F, *Samples)) {
+ LLVM_DEBUG(
+ dbgs() << "Profile is invalid due to CFG mismatch for Function "
+ << F.getName());
+ ++NumMismatchedProfile;
+ return false;
+ }
+ ++NumMatchedProfile;
+ } else {
+ if (getFunctionLoc(F) == 0)
+ return false;
+
+ LLVM_DEBUG(dbgs() << "Line number for the first instruction in "
+ << F.getName() << ": " << getFunctionLoc(F) << "\n");
+ }
DenseSet<GlobalValue::GUID> InlinedGUIDs;
- if (ProfileIsCS && CallsitePrioritizedInline)
- Changed |= inlineHotFunctionsWithPriority(F, InlinedGUIDs);
- else
- Changed |= inlineHotFunctions(F, InlinedGUIDs);
+ if (ProfileIsCS && CallsitePrioritizedInline)
+ Changed |= inlineHotFunctionsWithPriority(F, InlinedGUIDs);
+ else
+ Changed |= inlineHotFunctions(F, InlinedGUIDs);
// Compute basic block weights.
Changed |= computeBlockWeights(F);
@@ -2290,45 +2290,45 @@ INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass, "sample-profile",
"Sample Profile loader", false, false)
-// Add inlined profile call edges to the call graph.
-void SampleProfileLoader::addCallGraphEdges(CallGraph &CG,
- const FunctionSamples &Samples) {
- Function *Caller = SymbolMap.lookup(Samples.getFuncName());
- if (!Caller || Caller->isDeclaration())
- return;
-
- // Skip non-inlined call edges which are not important since top down inlining
- // for non-CS profile is to get more precise profile matching, not to enable
- // more inlining.
-
- for (const auto &CallsiteSamples : Samples.getCallsiteSamples()) {
- for (const auto &InlinedSamples : CallsiteSamples.second) {
- Function *Callee = SymbolMap.lookup(InlinedSamples.first);
- if (Callee && !Callee->isDeclaration())
- CG[Caller]->addCalledFunction(nullptr, CG[Callee]);
- addCallGraphEdges(CG, InlinedSamples.second);
- }
- }
-}
-
-// Replace call graph edges with dynamic call edges from the profile.
-void SampleProfileLoader::replaceCallGraphEdges(
- CallGraph &CG, StringMap<Function *> &SymbolMap) {
- // Remove static call edges from the call graph except for the ones from the
- // root which make the call graph connected.
- for (const auto &Node : CG)
- if (Node.second.get() != CG.getExternalCallingNode())
- Node.second->removeAllCalledFunctions();
-
- // Add profile call edges to the call graph.
- if (ProfileIsCS) {
- ContextTracker->addCallGraphEdges(CG, SymbolMap);
- } else {
- for (const auto &Samples : Reader->getProfiles())
- addCallGraphEdges(CG, Samples.second);
- }
-}
-
+// Add inlined profile call edges to the call graph.
+void SampleProfileLoader::addCallGraphEdges(CallGraph &CG,
+ const FunctionSamples &Samples) {
+ Function *Caller = SymbolMap.lookup(Samples.getFuncName());
+ if (!Caller || Caller->isDeclaration())
+ return;
+
+ // Skip non-inlined call edges which are not important since top down inlining
+ // for non-CS profile is to get more precise profile matching, not to enable
+ // more inlining.
+
+ for (const auto &CallsiteSamples : Samples.getCallsiteSamples()) {
+ for (const auto &InlinedSamples : CallsiteSamples.second) {
+ Function *Callee = SymbolMap.lookup(InlinedSamples.first);
+ if (Callee && !Callee->isDeclaration())
+ CG[Caller]->addCalledFunction(nullptr, CG[Callee]);
+ addCallGraphEdges(CG, InlinedSamples.second);
+ }
+ }
+}
+
+// Replace call graph edges with dynamic call edges from the profile.
+void SampleProfileLoader::replaceCallGraphEdges(
+ CallGraph &CG, StringMap<Function *> &SymbolMap) {
+ // Remove static call edges from the call graph except for the ones from the
+ // root which make the call graph connected.
+ for (const auto &Node : CG)
+ if (Node.second.get() != CG.getExternalCallingNode())
+ Node.second->removeAllCalledFunctions();
+
+ // Add profile call edges to the call graph.
+ if (ProfileIsCS) {
+ ContextTracker->addCallGraphEdges(CG, SymbolMap);
+ } else {
+ for (const auto &Samples : Reader->getProfiles())
+ addCallGraphEdges(CG, Samples.second);
+ }
+}
+
std::vector<Function *>
SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) {
std::vector<Function *> FunctionOrderList;
@@ -2351,103 +2351,103 @@ SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) {
}
assert(&CG->getModule() == &M);
-
- // Add indirect call edges from profile to augment the static call graph.
- // Functions will be processed in a top-down order defined by the static call
- // graph. Adjusting the order by considering indirect call edges from the
- // profile (which don't exist in the static call graph) can enable the
- // inlining of indirect call targets by processing the caller before them.
- // TODO: enable this for non-CS profile and fix the counts returning logic to
- // have a full support for indirect calls.
- if (UseProfileIndirectCallEdges && ProfileIsCS) {
- for (auto &Entry : *CG) {
- const auto *F = Entry.first;
- if (!F || F->isDeclaration() || !F->hasFnAttribute("use-sample-profile"))
- continue;
- auto &AllContexts = ContextTracker->getAllContextSamplesFor(F->getName());
- if (AllContexts.empty())
- continue;
-
- for (const auto &BB : *F) {
- for (const auto &I : BB.getInstList()) {
- const auto *CB = dyn_cast<CallBase>(&I);
- if (!CB || !CB->isIndirectCall())
- continue;
- const DebugLoc &DLoc = I.getDebugLoc();
- if (!DLoc)
- continue;
- auto CallSite = FunctionSamples::getCallSiteIdentifier(DLoc);
- for (FunctionSamples *Samples : AllContexts) {
- if (auto CallTargets = Samples->findCallTargetMapAt(CallSite)) {
- for (const auto &Target : CallTargets.get()) {
- Function *Callee = SymbolMap.lookup(Target.first());
- if (Callee && !Callee->isDeclaration())
- Entry.second->addCalledFunction(nullptr, (*CG)[Callee]);
- }
- }
- }
- }
- }
- }
- }
-
- // Compute a top-down order the profile which is used to sort functions in
- // one SCC later. The static processing order computed for an SCC may not
- // reflect the call contexts in the context-sensitive profile, thus may cause
- // potential inlining to be overlooked. The function order in one SCC is being
- // adjusted to a top-down order based on the profile to favor more inlining.
- DenseMap<Function *, uint64_t> ProfileOrderMap;
- if (UseProfileTopDownOrder ||
- (ProfileIsCS && !UseProfileTopDownOrder.getNumOccurrences())) {
- // Create a static call graph. The call edges are not important since they
- // will be replaced by dynamic edges from the profile.
- CallGraph ProfileCG(M);
- replaceCallGraphEdges(ProfileCG, SymbolMap);
- scc_iterator<CallGraph *> CGI = scc_begin(&ProfileCG);
- uint64_t I = 0;
- while (!CGI.isAtEnd()) {
- for (CallGraphNode *Node : *CGI) {
- if (auto *F = Node->getFunction())
- ProfileOrderMap[F] = ++I;
- }
- ++CGI;
- }
- }
-
+
+ // Add indirect call edges from profile to augment the static call graph.
+ // Functions will be processed in a top-down order defined by the static call
+ // graph. Adjusting the order by considering indirect call edges from the
+ // profile (which don't exist in the static call graph) can enable the
+ // inlining of indirect call targets by processing the caller before them.
+ // TODO: enable this for non-CS profile and fix the counts returning logic to
+ // have a full support for indirect calls.
+ if (UseProfileIndirectCallEdges && ProfileIsCS) {
+ for (auto &Entry : *CG) {
+ const auto *F = Entry.first;
+ if (!F || F->isDeclaration() || !F->hasFnAttribute("use-sample-profile"))
+ continue;
+ auto &AllContexts = ContextTracker->getAllContextSamplesFor(F->getName());
+ if (AllContexts.empty())
+ continue;
+
+ for (const auto &BB : *F) {
+ for (const auto &I : BB.getInstList()) {
+ const auto *CB = dyn_cast<CallBase>(&I);
+ if (!CB || !CB->isIndirectCall())
+ continue;
+ const DebugLoc &DLoc = I.getDebugLoc();
+ if (!DLoc)
+ continue;
+ auto CallSite = FunctionSamples::getCallSiteIdentifier(DLoc);
+ for (FunctionSamples *Samples : AllContexts) {
+ if (auto CallTargets = Samples->findCallTargetMapAt(CallSite)) {
+ for (const auto &Target : CallTargets.get()) {
+ Function *Callee = SymbolMap.lookup(Target.first());
+ if (Callee && !Callee->isDeclaration())
+ Entry.second->addCalledFunction(nullptr, (*CG)[Callee]);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ // Compute a top-down order the profile which is used to sort functions in
+ // one SCC later. The static processing order computed for an SCC may not
+ // reflect the call contexts in the context-sensitive profile, thus may cause
+ // potential inlining to be overlooked. The function order in one SCC is being
+ // adjusted to a top-down order based on the profile to favor more inlining.
+ DenseMap<Function *, uint64_t> ProfileOrderMap;
+ if (UseProfileTopDownOrder ||
+ (ProfileIsCS && !UseProfileTopDownOrder.getNumOccurrences())) {
+ // Create a static call graph. The call edges are not important since they
+ // will be replaced by dynamic edges from the profile.
+ CallGraph ProfileCG(M);
+ replaceCallGraphEdges(ProfileCG, SymbolMap);
+ scc_iterator<CallGraph *> CGI = scc_begin(&ProfileCG);
+ uint64_t I = 0;
+ while (!CGI.isAtEnd()) {
+ for (CallGraphNode *Node : *CGI) {
+ if (auto *F = Node->getFunction())
+ ProfileOrderMap[F] = ++I;
+ }
+ ++CGI;
+ }
+ }
+
scc_iterator<CallGraph *> CGI = scc_begin(CG);
while (!CGI.isAtEnd()) {
- uint64_t Start = FunctionOrderList.size();
- for (CallGraphNode *Node : *CGI) {
- auto *F = Node->getFunction();
+ uint64_t Start = FunctionOrderList.size();
+ for (CallGraphNode *Node : *CGI) {
+ auto *F = Node->getFunction();
if (F && !F->isDeclaration() && F->hasFnAttribute("use-sample-profile"))
FunctionOrderList.push_back(F);
}
-
- // Sort nodes in SCC based on the profile top-down order.
- if (!ProfileOrderMap.empty()) {
- std::stable_sort(FunctionOrderList.begin() + Start,
- FunctionOrderList.end(),
- [&ProfileOrderMap](Function *Left, Function *Right) {
- return ProfileOrderMap[Left] < ProfileOrderMap[Right];
- });
- }
-
+
+ // Sort nodes in SCC based on the profile top-down order.
+ if (!ProfileOrderMap.empty()) {
+ std::stable_sort(FunctionOrderList.begin() + Start,
+ FunctionOrderList.end(),
+ [&ProfileOrderMap](Function *Left, Function *Right) {
+ return ProfileOrderMap[Left] < ProfileOrderMap[Right];
+ });
+ }
+
++CGI;
}
- LLVM_DEBUG({
- dbgs() << "Function processing order:\n";
- for (auto F : reverse(FunctionOrderList)) {
- dbgs() << F->getName() << "\n";
- }
- });
-
+ LLVM_DEBUG({
+ dbgs() << "Function processing order:\n";
+ for (auto F : reverse(FunctionOrderList)) {
+ dbgs() << F->getName() << "\n";
+ }
+ });
+
std::reverse(FunctionOrderList.begin(), FunctionOrderList.end());
return FunctionOrderList;
}
-bool SampleProfileLoader::doInitialization(Module &M,
- FunctionAnalysisManager *FAM) {
+bool SampleProfileLoader::doInitialization(Module &M,
+ FunctionAnalysisManager *FAM) {
auto &Ctx = M.getContext();
auto ReaderOrErr =
@@ -2458,14 +2458,14 @@ bool SampleProfileLoader::doInitialization(Module &M,
return false;
}
Reader = std::move(ReaderOrErr.get());
- Reader->setSkipFlatProf(LTOPhase == ThinOrFullLTOPhase::ThinLTOPostLink);
+ Reader->setSkipFlatProf(LTOPhase == ThinOrFullLTOPhase::ThinLTOPostLink);
Reader->collectFuncsFrom(M);
- if (std::error_code EC = Reader->read()) {
- std::string Msg = "profile reading failed: " + EC.message();
- Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
- return false;
- }
-
+ if (std::error_code EC = Reader->read()) {
+ std::string Msg = "profile reading failed: " + EC.message();
+ Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
+ return false;
+ }
+
PSL = Reader->getProfileSymbolList();
// While profile-sample-accurate is on, ignore symbol list.
@@ -2477,41 +2477,41 @@ bool SampleProfileLoader::doInitialization(Module &M,
NamesInProfile.insert(NameTable->begin(), NameTable->end());
}
- if (FAM && !ProfileInlineReplayFile.empty()) {
- ExternalInlineAdvisor = std::make_unique<ReplayInlineAdvisor>(
- M, *FAM, Ctx, /*OriginalAdvisor=*/nullptr, ProfileInlineReplayFile,
- /*EmitRemarks=*/false);
- if (!ExternalInlineAdvisor->areReplayRemarksLoaded())
- ExternalInlineAdvisor.reset();
- }
-
- // Apply tweaks if context-sensitive profile is available.
- if (Reader->profileIsCS()) {
- ProfileIsCS = true;
- FunctionSamples::ProfileIsCS = true;
-
- // Enable priority-base inliner and size inline by default for CSSPGO.
- if (!ProfileSizeInline.getNumOccurrences())
- ProfileSizeInline = true;
- if (!CallsitePrioritizedInline.getNumOccurrences())
- CallsitePrioritizedInline = true;
-
- // Tracker for profiles under different context
- ContextTracker =
- std::make_unique<SampleContextTracker>(Reader->getProfiles());
- }
-
- // Load pseudo probe descriptors for probe-based function samples.
- if (Reader->profileIsProbeBased()) {
- ProbeManager = std::make_unique<PseudoProbeManager>(M);
- if (!ProbeManager->moduleIsProbed(M)) {
- const char *Msg =
- "Pseudo-probe-based profile requires SampleProfileProbePass";
- Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
- return false;
- }
- }
-
+ if (FAM && !ProfileInlineReplayFile.empty()) {
+ ExternalInlineAdvisor = std::make_unique<ReplayInlineAdvisor>(
+ M, *FAM, Ctx, /*OriginalAdvisor=*/nullptr, ProfileInlineReplayFile,
+ /*EmitRemarks=*/false);
+ if (!ExternalInlineAdvisor->areReplayRemarksLoaded())
+ ExternalInlineAdvisor.reset();
+ }
+
+ // Apply tweaks if context-sensitive profile is available.
+ if (Reader->profileIsCS()) {
+ ProfileIsCS = true;
+ FunctionSamples::ProfileIsCS = true;
+
+ // Enable priority-base inliner and size inline by default for CSSPGO.
+ if (!ProfileSizeInline.getNumOccurrences())
+ ProfileSizeInline = true;
+ if (!CallsitePrioritizedInline.getNumOccurrences())
+ CallsitePrioritizedInline = true;
+
+ // Tracker for profiles under different context
+ ContextTracker =
+ std::make_unique<SampleContextTracker>(Reader->getProfiles());
+ }
+
+ // Load pseudo probe descriptors for probe-based function samples.
+ if (Reader->profileIsProbeBased()) {
+ ProbeManager = std::make_unique<PseudoProbeManager>(M);
+ if (!ProbeManager->moduleIsProbed(M)) {
+ const char *Msg =
+ "Pseudo-probe-based profile requires SampleProfileProbePass";
+ Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
+ return false;
+ }
+ }
+
return true;
}
@@ -2537,7 +2537,7 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
for (const auto &I : Reader->getProfiles())
TotalCollectedSamples += I.second.getTotalSamples();
- auto Remapper = Reader->getRemapper();
+ auto Remapper = Reader->getRemapper();
// Populate the symbol map.
for (const auto &N_F : M.getValueSymbolTable()) {
StringRef OrigName = N_F.getKey();
@@ -2555,16 +2555,16 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
// to nullptr to avoid confusion.
if (!r.second)
r.first->second = nullptr;
- OrigName = NewName;
- }
- // Insert the remapped names into SymbolMap.
- if (Remapper) {
- if (auto MapName = Remapper->lookUpNameInProfile(OrigName)) {
- if (*MapName == OrigName)
- continue;
- SymbolMap.insert(std::make_pair(*MapName, F));
- }
+ OrigName = NewName;
}
+ // Insert the remapped names into SymbolMap.
+ if (Remapper) {
+ if (auto MapName = Remapper->lookUpNameInProfile(OrigName)) {
+ if (*MapName == OrigName)
+ continue;
+ SymbolMap.insert(std::make_pair(*MapName, F));
+ }
+ }
}
bool retval = false;
@@ -2575,10 +2575,10 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
}
// Account for cold calls not inlined....
- if (!ProfileIsCS)
- for (const std::pair<Function *, NotInlinedProfileInfo> &pair :
- notInlinedCallInfo)
- updateProfileCallee(pair.first, pair.second.entryCount);
+ if (!ProfileIsCS)
+ for (const std::pair<Function *, NotInlinedProfileInfo> &pair :
+ notInlinedCallInfo)
+ updateProfileCallee(pair.first, pair.second.entryCount);
return retval;
}
@@ -2593,7 +2593,7 @@ bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) {
}
bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM) {
- LLVM_DEBUG(dbgs() << "\n\nProcessing Function " << F.getName() << "\n");
+ LLVM_DEBUG(dbgs() << "\n\nProcessing Function " << F.getName() << "\n");
DILocation2SampleMap.clear();
// By default the entry count is initialized to -1, which will be treated
// conservatively by getEntryCount as the same as unknown (None). This is
@@ -2635,10 +2635,10 @@ bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM)
initialEntryCount = -1;
}
- // Initialize entry count when the function has no existing entry
- // count value.
- if (!F.getEntryCount().hasValue())
- F.setEntryCount(ProfileCount(initialEntryCount, Function::PCT_Real));
+ // Initialize entry count when the function has no existing entry
+ // count value.
+ if (!F.getEntryCount().hasValue())
+ F.setEntryCount(ProfileCount(initialEntryCount, Function::PCT_Real));
std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
if (AM) {
auto &FAM =
@@ -2649,12 +2649,12 @@ bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM)
OwnedORE = std::make_unique<OptimizationRemarkEmitter>(&F);
ORE = OwnedORE.get();
}
-
- if (ProfileIsCS)
- Samples = ContextTracker->getBaseSamplesFor(F);
- else
- Samples = Reader->getSamplesFor(F);
-
+
+ if (ProfileIsCS)
+ Samples = ContextTracker->getBaseSamplesFor(F);
+ else
+ Samples = Reader->getSamplesFor(F);
+
if (Samples && !Samples->empty())
return emitAnnotations(F);
return false;
@@ -2679,9 +2679,9 @@ PreservedAnalyses SampleProfileLoaderPass::run(Module &M,
ProfileFileName.empty() ? SampleProfileFile : ProfileFileName,
ProfileRemappingFileName.empty() ? SampleProfileRemappingFile
: ProfileRemappingFileName,
- LTOPhase, GetAssumptionCache, GetTTI, GetTLI);
+ LTOPhase, GetAssumptionCache, GetTTI, GetTLI);
- if (!SampleLoader.doInitialization(M, &FAM))
+ if (!SampleLoader.doInitialization(M, &FAM))
return PreservedAnalyses::all();
ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
diff --git a/contrib/libs/llvm12/lib/Transforms/IPO/SampleProfileProbe.cpp b/contrib/libs/llvm12/lib/Transforms/IPO/SampleProfileProbe.cpp
index a885c3ee4d..0e7aec676b 100644
--- a/contrib/libs/llvm12/lib/Transforms/IPO/SampleProfileProbe.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/IPO/SampleProfileProbe.cpp
@@ -1,434 +1,434 @@
-//===- SampleProfileProbe.cpp - Pseudo probe Instrumentation -------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the SampleProfileProber transformation.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/IPO/SampleProfileProbe.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/BlockFrequencyInfo.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CFG.h"
-#include "llvm/IR/Constant.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DebugInfoMetadata.h"
-#include "llvm/IR/GlobalValue.h"
-#include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/IR/MDBuilder.h"
-#include "llvm/ProfileData/SampleProf.h"
-#include "llvm/Support/CRC.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Transforms/Instrumentation.h"
-#include "llvm/Transforms/Utils/ModuleUtils.h"
-#include <unordered_set>
-#include <vector>
-
-using namespace llvm;
-#define DEBUG_TYPE "sample-profile-probe"
-
-STATISTIC(ArtificialDbgLine,
- "Number of probes that have an artificial debug line");
-
-static cl::opt<bool>
- VerifyPseudoProbe("verify-pseudo-probe", cl::init(false), cl::Hidden,
- cl::desc("Do pseudo probe verification"));
-
-static cl::list<std::string> VerifyPseudoProbeFuncList(
- "verify-pseudo-probe-funcs", cl::Hidden,
- cl::desc("The option to specify the name of the functions to verify."));
-
-static cl::opt<bool>
- UpdatePseudoProbe("update-pseudo-probe", cl::init(true), cl::Hidden,
- cl::desc("Update pseudo probe distribution factor"));
-
-bool PseudoProbeVerifier::shouldVerifyFunction(const Function *F) {
- // Skip function declaration.
- if (F->isDeclaration())
- return false;
- // Skip function that will not be emitted into object file. The prevailing
- // defintion will be verified instead.
- if (F->hasAvailableExternallyLinkage())
- return false;
- // Do a name matching.
- static std::unordered_set<std::string> VerifyFuncNames(
- VerifyPseudoProbeFuncList.begin(), VerifyPseudoProbeFuncList.end());
- return VerifyFuncNames.empty() || VerifyFuncNames.count(F->getName().str());
-}
-
-void PseudoProbeVerifier::registerCallbacks(PassInstrumentationCallbacks &PIC) {
- if (VerifyPseudoProbe) {
- PIC.registerAfterPassCallback(
- [this](StringRef P, Any IR, const PreservedAnalyses &) {
- this->runAfterPass(P, IR);
- });
- }
-}
-
-// Callback to run after each transformation for the new pass manager.
-void PseudoProbeVerifier::runAfterPass(StringRef PassID, Any IR) {
- std::string Banner =
- "\n*** Pseudo Probe Verification After " + PassID.str() + " ***\n";
- dbgs() << Banner;
- if (any_isa<const Module *>(IR))
- runAfterPass(any_cast<const Module *>(IR));
- else if (any_isa<const Function *>(IR))
- runAfterPass(any_cast<const Function *>(IR));
- else if (any_isa<const LazyCallGraph::SCC *>(IR))
- runAfterPass(any_cast<const LazyCallGraph::SCC *>(IR));
- else if (any_isa<const Loop *>(IR))
- runAfterPass(any_cast<const Loop *>(IR));
- else
- llvm_unreachable("Unknown IR unit");
-}
-
-void PseudoProbeVerifier::runAfterPass(const Module *M) {
- for (const Function &F : *M)
- runAfterPass(&F);
-}
-
-void PseudoProbeVerifier::runAfterPass(const LazyCallGraph::SCC *C) {
- for (const LazyCallGraph::Node &N : *C)
- runAfterPass(&N.getFunction());
-}
-
-void PseudoProbeVerifier::runAfterPass(const Function *F) {
- if (!shouldVerifyFunction(F))
- return;
- ProbeFactorMap ProbeFactors;
- for (const auto &BB : *F)
- collectProbeFactors(&BB, ProbeFactors);
- verifyProbeFactors(F, ProbeFactors);
-}
-
-void PseudoProbeVerifier::runAfterPass(const Loop *L) {
- const Function *F = L->getHeader()->getParent();
- runAfterPass(F);
-}
-
-void PseudoProbeVerifier::collectProbeFactors(const BasicBlock *Block,
- ProbeFactorMap &ProbeFactors) {
- for (const auto &I : *Block) {
- if (Optional<PseudoProbe> Probe = extractProbe(I))
- ProbeFactors[Probe->Id] += Probe->Factor;
- }
-}
-
-void PseudoProbeVerifier::verifyProbeFactors(
- const Function *F, const ProbeFactorMap &ProbeFactors) {
- bool BannerPrinted = false;
- auto &PrevProbeFactors = FunctionProbeFactors[F->getName()];
- for (const auto &I : ProbeFactors) {
- float CurProbeFactor = I.second;
- if (PrevProbeFactors.count(I.first)) {
- float PrevProbeFactor = PrevProbeFactors[I.first];
- if (std::abs(CurProbeFactor - PrevProbeFactor) >
- DistributionFactorVariance) {
- if (!BannerPrinted) {
- dbgs() << "Function " << F->getName() << ":\n";
- BannerPrinted = true;
- }
- dbgs() << "Probe " << I.first << "\tprevious factor "
- << format("%0.2f", PrevProbeFactor) << "\tcurrent factor "
- << format("%0.2f", CurProbeFactor) << "\n";
- }
- }
-
- // Update
- PrevProbeFactors[I.first] = I.second;
- }
-}
-
-PseudoProbeManager::PseudoProbeManager(const Module &M) {
- if (NamedMDNode *FuncInfo = M.getNamedMetadata(PseudoProbeDescMetadataName)) {
- for (const auto *Operand : FuncInfo->operands()) {
- const auto *MD = cast<MDNode>(Operand);
- auto GUID =
- mdconst::dyn_extract<ConstantInt>(MD->getOperand(0))->getZExtValue();
- auto Hash =
- mdconst::dyn_extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
- GUIDToProbeDescMap.try_emplace(GUID, PseudoProbeDescriptor(GUID, Hash));
- }
- }
-}
-
-const PseudoProbeDescriptor *
-PseudoProbeManager::getDesc(const Function &F) const {
- auto I = GUIDToProbeDescMap.find(
- Function::getGUID(FunctionSamples::getCanonicalFnName(F)));
- return I == GUIDToProbeDescMap.end() ? nullptr : &I->second;
-}
-
-bool PseudoProbeManager::moduleIsProbed(const Module &M) const {
- return M.getNamedMetadata(PseudoProbeDescMetadataName);
-}
-
-bool PseudoProbeManager::profileIsValid(const Function &F,
- const FunctionSamples &Samples) const {
- const auto *Desc = getDesc(F);
- if (!Desc) {
- LLVM_DEBUG(dbgs() << "Probe descriptor missing for Function " << F.getName()
- << "\n");
- return false;
- } else {
- if (Desc->getFunctionHash() != Samples.getFunctionHash()) {
- LLVM_DEBUG(dbgs() << "Hash mismatch for Function " << F.getName()
- << "\n");
- return false;
- }
- }
- return true;
-}
-
-SampleProfileProber::SampleProfileProber(Function &Func,
- const std::string &CurModuleUniqueId)
- : F(&Func), CurModuleUniqueId(CurModuleUniqueId) {
- BlockProbeIds.clear();
- CallProbeIds.clear();
- LastProbeId = (uint32_t)PseudoProbeReservedId::Last;
- computeProbeIdForBlocks();
- computeProbeIdForCallsites();
- computeCFGHash();
-}
-
-// Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index
-// value of each BB in the CFG. The higher 32 bits record the number of edges
-// preceded by the number of indirect calls.
-// This is derived from FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash().
-void SampleProfileProber::computeCFGHash() {
- std::vector<uint8_t> Indexes;
- JamCRC JC;
- for (auto &BB : *F) {
- auto *TI = BB.getTerminator();
- for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) {
- auto *Succ = TI->getSuccessor(I);
- auto Index = getBlockId(Succ);
- for (int J = 0; J < 4; J++)
- Indexes.push_back((uint8_t)(Index >> (J * 8)));
- }
- }
-
- JC.update(Indexes);
-
- FunctionHash = (uint64_t)CallProbeIds.size() << 48 |
- (uint64_t)Indexes.size() << 32 | JC.getCRC();
- // Reserve bit 60-63 for other information purpose.
- FunctionHash &= 0x0FFFFFFFFFFFFFFF;
- assert(FunctionHash && "Function checksum should not be zero");
- LLVM_DEBUG(dbgs() << "\nFunction Hash Computation for " << F->getName()
- << ":\n"
- << " CRC = " << JC.getCRC() << ", Edges = "
- << Indexes.size() << ", ICSites = " << CallProbeIds.size()
- << ", Hash = " << FunctionHash << "\n");
-}
-
-void SampleProfileProber::computeProbeIdForBlocks() {
- for (auto &BB : *F) {
- BlockProbeIds[&BB] = ++LastProbeId;
- }
-}
-
-void SampleProfileProber::computeProbeIdForCallsites() {
- for (auto &BB : *F) {
- for (auto &I : BB) {
- if (!isa<CallBase>(I))
- continue;
- if (isa<IntrinsicInst>(&I))
- continue;
- CallProbeIds[&I] = ++LastProbeId;
- }
- }
-}
-
-uint32_t SampleProfileProber::getBlockId(const BasicBlock *BB) const {
- auto I = BlockProbeIds.find(const_cast<BasicBlock *>(BB));
- return I == BlockProbeIds.end() ? 0 : I->second;
-}
-
-uint32_t SampleProfileProber::getCallsiteId(const Instruction *Call) const {
- auto Iter = CallProbeIds.find(const_cast<Instruction *>(Call));
- return Iter == CallProbeIds.end() ? 0 : Iter->second;
-}
-
-void SampleProfileProber::instrumentOneFunc(Function &F, TargetMachine *TM) {
- Module *M = F.getParent();
- MDBuilder MDB(F.getContext());
- // Compute a GUID without considering the function's linkage type. This is
- // fine since function name is the only key in the profile database.
- uint64_t Guid = Function::getGUID(F.getName());
-
- // Assign an artificial debug line to a probe that doesn't come with a real
- // line. A probe not having a debug line will get an incomplete inline
- // context. This will cause samples collected on the probe to be counted
- // into the base profile instead of a context profile. The line number
- // itself is not important though.
- auto AssignDebugLoc = [&](Instruction *I) {
- assert((isa<PseudoProbeInst>(I) || isa<CallBase>(I)) &&
- "Expecting pseudo probe or call instructions");
- if (!I->getDebugLoc()) {
- if (auto *SP = F.getSubprogram()) {
- auto DIL = DILocation::get(SP->getContext(), 0, 0, SP);
- I->setDebugLoc(DIL);
- ArtificialDbgLine++;
- LLVM_DEBUG({
- dbgs() << "\nIn Function " << F.getName()
- << " Probe gets an artificial debug line\n";
- I->dump();
- });
- }
- }
- };
-
- // Probe basic blocks.
- for (auto &I : BlockProbeIds) {
- BasicBlock *BB = I.first;
- uint32_t Index = I.second;
- // Insert a probe before an instruction with a valid debug line number which
- // will be assigned to the probe. The line number will be used later to
- // model the inline context when the probe is inlined into other functions.
- // Debug instructions, phi nodes and lifetime markers do not have an valid
- // line number. Real instructions generated by optimizations may not come
- // with a line number either.
- auto HasValidDbgLine = [](Instruction *J) {
- return !isa<PHINode>(J) && !isa<DbgInfoIntrinsic>(J) &&
- !J->isLifetimeStartOrEnd() && J->getDebugLoc();
- };
-
- Instruction *J = &*BB->getFirstInsertionPt();
- while (J != BB->getTerminator() && !HasValidDbgLine(J)) {
- J = J->getNextNode();
- }
-
- IRBuilder<> Builder(J);
- assert(Builder.GetInsertPoint() != BB->end() &&
- "Cannot get the probing point");
- Function *ProbeFn =
- llvm::Intrinsic::getDeclaration(M, Intrinsic::pseudoprobe);
- Value *Args[] = {Builder.getInt64(Guid), Builder.getInt64(Index),
- Builder.getInt32(0),
- Builder.getInt64(PseudoProbeFullDistributionFactor)};
- auto *Probe = Builder.CreateCall(ProbeFn, Args);
- AssignDebugLoc(Probe);
- }
-
- // Probe both direct calls and indirect calls. Direct calls are probed so that
- // their probe ID can be used as an call site identifier to represent a
- // calling context.
- for (auto &I : CallProbeIds) {
- auto *Call = I.first;
- uint32_t Index = I.second;
- uint32_t Type = cast<CallBase>(Call)->getCalledFunction()
- ? (uint32_t)PseudoProbeType::DirectCall
- : (uint32_t)PseudoProbeType::IndirectCall;
- AssignDebugLoc(Call);
- // Levarge the 32-bit discriminator field of debug data to store the ID and
- // type of a callsite probe. This gets rid of the dependency on plumbing a
- // customized metadata through the codegen pipeline.
- uint32_t V = PseudoProbeDwarfDiscriminator::packProbeData(
- Index, Type, 0, PseudoProbeDwarfDiscriminator::FullDistributionFactor);
- if (auto DIL = Call->getDebugLoc()) {
- DIL = DIL->cloneWithDiscriminator(V);
- Call->setDebugLoc(DIL);
- }
- }
-
- // Create module-level metadata that contains function info necessary to
- // synthesize probe-based sample counts, which are
- // - FunctionGUID
- // - FunctionHash.
- // - FunctionName
- auto Hash = getFunctionHash();
- auto *MD = MDB.createPseudoProbeDesc(Guid, Hash, &F);
- auto *NMD = M->getNamedMetadata(PseudoProbeDescMetadataName);
- assert(NMD && "llvm.pseudo_probe_desc should be pre-created");
- NMD->addOperand(MD);
-
- // Preserve a comdat group to hold all probes materialized later. This
- // allows that when the function is considered dead and removed, the
- // materialized probes are disposed too.
- // Imported functions are defined in another module. They do not need
- // the following handling since same care will be taken for them in their
- // original module. The pseudo probes inserted into an imported functions
- // above will naturally not be emitted since the imported function is free
- // from object emission. However they will be emitted together with the
- // inliner functions that the imported function is inlined into. We are not
- // creating a comdat group for an import function since it's useless anyway.
- if (!F.isDeclarationForLinker()) {
- if (TM) {
- auto Triple = TM->getTargetTriple();
- if (Triple.supportsCOMDAT() && TM->getFunctionSections()) {
- GetOrCreateFunctionComdat(F, Triple, CurModuleUniqueId);
- }
- }
- }
-}
-
-PreservedAnalyses SampleProfileProbePass::run(Module &M,
- ModuleAnalysisManager &AM) {
- auto ModuleId = getUniqueModuleId(&M);
- // Create the pseudo probe desc metadata beforehand.
- // Note that modules with only data but no functions will require this to
- // be set up so that they will be known as probed later.
- M.getOrInsertNamedMetadata(PseudoProbeDescMetadataName);
-
- for (auto &F : M) {
- if (F.isDeclaration())
- continue;
- SampleProfileProber ProbeManager(F, ModuleId);
- ProbeManager.instrumentOneFunc(F, TM);
- }
-
- return PreservedAnalyses::none();
-}
-
-void PseudoProbeUpdatePass::runOnFunction(Function &F,
- FunctionAnalysisManager &FAM) {
- BlockFrequencyInfo &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
- auto BBProfileCount = [&BFI](BasicBlock *BB) {
- return BFI.getBlockProfileCount(BB)
- ? BFI.getBlockProfileCount(BB).getValue()
- : 0;
- };
-
- // Collect the sum of execution weight for each probe.
- ProbeFactorMap ProbeFactors;
- for (auto &Block : F) {
- for (auto &I : Block) {
- if (Optional<PseudoProbe> Probe = extractProbe(I))
- ProbeFactors[Probe->Id] += BBProfileCount(&Block);
- }
- }
-
- // Fix up over-counted probes.
- for (auto &Block : F) {
- for (auto &I : Block) {
- if (Optional<PseudoProbe> Probe = extractProbe(I)) {
- float Sum = ProbeFactors[Probe->Id];
- if (Sum != 0)
- setProbeDistributionFactor(I, BBProfileCount(&Block) / Sum);
- }
- }
- }
-}
-
-PreservedAnalyses PseudoProbeUpdatePass::run(Module &M,
- ModuleAnalysisManager &AM) {
- if (UpdatePseudoProbe) {
- for (auto &F : M) {
- if (F.isDeclaration())
- continue;
- FunctionAnalysisManager &FAM =
- AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
- runOnFunction(F, FAM);
- }
- }
- return PreservedAnalyses::none();
-}
+//===- SampleProfileProbe.cpp - Pseudo probe Instrumentation -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SampleProfileProber transformation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/SampleProfileProbe.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/ProfileData/SampleProf.h"
+#include "llvm/Support/CRC.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include <unordered_set>
+#include <vector>
+
+using namespace llvm;
+#define DEBUG_TYPE "sample-profile-probe"
+
+STATISTIC(ArtificialDbgLine,
+ "Number of probes that have an artificial debug line");
+
+static cl::opt<bool>
+ VerifyPseudoProbe("verify-pseudo-probe", cl::init(false), cl::Hidden,
+ cl::desc("Do pseudo probe verification"));
+
+static cl::list<std::string> VerifyPseudoProbeFuncList(
+ "verify-pseudo-probe-funcs", cl::Hidden,
+ cl::desc("The option to specify the name of the functions to verify."));
+
+static cl::opt<bool>
+ UpdatePseudoProbe("update-pseudo-probe", cl::init(true), cl::Hidden,
+ cl::desc("Update pseudo probe distribution factor"));
+
+bool PseudoProbeVerifier::shouldVerifyFunction(const Function *F) {
+ // Skip function declaration.
+ if (F->isDeclaration())
+ return false;
+ // Skip function that will not be emitted into object file. The prevailing
+ // defintion will be verified instead.
+ if (F->hasAvailableExternallyLinkage())
+ return false;
+ // Do a name matching.
+ static std::unordered_set<std::string> VerifyFuncNames(
+ VerifyPseudoProbeFuncList.begin(), VerifyPseudoProbeFuncList.end());
+ return VerifyFuncNames.empty() || VerifyFuncNames.count(F->getName().str());
+}
+
+void PseudoProbeVerifier::registerCallbacks(PassInstrumentationCallbacks &PIC) {
+ if (VerifyPseudoProbe) {
+ PIC.registerAfterPassCallback(
+ [this](StringRef P, Any IR, const PreservedAnalyses &) {
+ this->runAfterPass(P, IR);
+ });
+ }
+}
+
+// Callback to run after each transformation for the new pass manager.
+void PseudoProbeVerifier::runAfterPass(StringRef PassID, Any IR) {
+ std::string Banner =
+ "\n*** Pseudo Probe Verification After " + PassID.str() + " ***\n";
+ dbgs() << Banner;
+ if (any_isa<const Module *>(IR))
+ runAfterPass(any_cast<const Module *>(IR));
+ else if (any_isa<const Function *>(IR))
+ runAfterPass(any_cast<const Function *>(IR));
+ else if (any_isa<const LazyCallGraph::SCC *>(IR))
+ runAfterPass(any_cast<const LazyCallGraph::SCC *>(IR));
+ else if (any_isa<const Loop *>(IR))
+ runAfterPass(any_cast<const Loop *>(IR));
+ else
+ llvm_unreachable("Unknown IR unit");
+}
+
+void PseudoProbeVerifier::runAfterPass(const Module *M) {
+ for (const Function &F : *M)
+ runAfterPass(&F);
+}
+
+void PseudoProbeVerifier::runAfterPass(const LazyCallGraph::SCC *C) {
+ for (const LazyCallGraph::Node &N : *C)
+ runAfterPass(&N.getFunction());
+}
+
+void PseudoProbeVerifier::runAfterPass(const Function *F) {
+ if (!shouldVerifyFunction(F))
+ return;
+ ProbeFactorMap ProbeFactors;
+ for (const auto &BB : *F)
+ collectProbeFactors(&BB, ProbeFactors);
+ verifyProbeFactors(F, ProbeFactors);
+}
+
+void PseudoProbeVerifier::runAfterPass(const Loop *L) {
+ const Function *F = L->getHeader()->getParent();
+ runAfterPass(F);
+}
+
+void PseudoProbeVerifier::collectProbeFactors(const BasicBlock *Block,
+ ProbeFactorMap &ProbeFactors) {
+ for (const auto &I : *Block) {
+ if (Optional<PseudoProbe> Probe = extractProbe(I))
+ ProbeFactors[Probe->Id] += Probe->Factor;
+ }
+}
+
+void PseudoProbeVerifier::verifyProbeFactors(
+ const Function *F, const ProbeFactorMap &ProbeFactors) {
+ bool BannerPrinted = false;
+ auto &PrevProbeFactors = FunctionProbeFactors[F->getName()];
+ for (const auto &I : ProbeFactors) {
+ float CurProbeFactor = I.second;
+ if (PrevProbeFactors.count(I.first)) {
+ float PrevProbeFactor = PrevProbeFactors[I.first];
+ if (std::abs(CurProbeFactor - PrevProbeFactor) >
+ DistributionFactorVariance) {
+ if (!BannerPrinted) {
+ dbgs() << "Function " << F->getName() << ":\n";
+ BannerPrinted = true;
+ }
+ dbgs() << "Probe " << I.first << "\tprevious factor "
+ << format("%0.2f", PrevProbeFactor) << "\tcurrent factor "
+ << format("%0.2f", CurProbeFactor) << "\n";
+ }
+ }
+
+ // Update
+ PrevProbeFactors[I.first] = I.second;
+ }
+}
+
+PseudoProbeManager::PseudoProbeManager(const Module &M) {
+ if (NamedMDNode *FuncInfo = M.getNamedMetadata(PseudoProbeDescMetadataName)) {
+ for (const auto *Operand : FuncInfo->operands()) {
+ const auto *MD = cast<MDNode>(Operand);
+ auto GUID =
+ mdconst::dyn_extract<ConstantInt>(MD->getOperand(0))->getZExtValue();
+ auto Hash =
+ mdconst::dyn_extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
+ GUIDToProbeDescMap.try_emplace(GUID, PseudoProbeDescriptor(GUID, Hash));
+ }
+ }
+}
+
+const PseudoProbeDescriptor *
+PseudoProbeManager::getDesc(const Function &F) const {
+ auto I = GUIDToProbeDescMap.find(
+ Function::getGUID(FunctionSamples::getCanonicalFnName(F)));
+ return I == GUIDToProbeDescMap.end() ? nullptr : &I->second;
+}
+
+bool PseudoProbeManager::moduleIsProbed(const Module &M) const {
+ return M.getNamedMetadata(PseudoProbeDescMetadataName);
+}
+
+bool PseudoProbeManager::profileIsValid(const Function &F,
+ const FunctionSamples &Samples) const {
+ const auto *Desc = getDesc(F);
+ if (!Desc) {
+ LLVM_DEBUG(dbgs() << "Probe descriptor missing for Function " << F.getName()
+ << "\n");
+ return false;
+ } else {
+ if (Desc->getFunctionHash() != Samples.getFunctionHash()) {
+ LLVM_DEBUG(dbgs() << "Hash mismatch for Function " << F.getName()
+ << "\n");
+ return false;
+ }
+ }
+ return true;
+}
+
+SampleProfileProber::SampleProfileProber(Function &Func,
+ const std::string &CurModuleUniqueId)
+ : F(&Func), CurModuleUniqueId(CurModuleUniqueId) {
+ BlockProbeIds.clear();
+ CallProbeIds.clear();
+ LastProbeId = (uint32_t)PseudoProbeReservedId::Last;
+ computeProbeIdForBlocks();
+ computeProbeIdForCallsites();
+ computeCFGHash();
+}
+
+// Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index
+// value of each BB in the CFG. The higher 32 bits record the number of edges
+// preceded by the number of indirect calls.
+// This is derived from FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash().
+void SampleProfileProber::computeCFGHash() {
+ std::vector<uint8_t> Indexes;
+ JamCRC JC;
+ for (auto &BB : *F) {
+ auto *TI = BB.getTerminator();
+ for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) {
+ auto *Succ = TI->getSuccessor(I);
+ auto Index = getBlockId(Succ);
+ for (int J = 0; J < 4; J++)
+ Indexes.push_back((uint8_t)(Index >> (J * 8)));
+ }
+ }
+
+ JC.update(Indexes);
+
+ FunctionHash = (uint64_t)CallProbeIds.size() << 48 |
+ (uint64_t)Indexes.size() << 32 | JC.getCRC();
+ // Reserve bit 60-63 for other information purpose.
+ FunctionHash &= 0x0FFFFFFFFFFFFFFF;
+ assert(FunctionHash && "Function checksum should not be zero");
+ LLVM_DEBUG(dbgs() << "\nFunction Hash Computation for " << F->getName()
+ << ":\n"
+ << " CRC = " << JC.getCRC() << ", Edges = "
+ << Indexes.size() << ", ICSites = " << CallProbeIds.size()
+ << ", Hash = " << FunctionHash << "\n");
+}
+
+void SampleProfileProber::computeProbeIdForBlocks() {
+ for (auto &BB : *F) {
+ BlockProbeIds[&BB] = ++LastProbeId;
+ }
+}
+
+void SampleProfileProber::computeProbeIdForCallsites() {
+ for (auto &BB : *F) {
+ for (auto &I : BB) {
+ if (!isa<CallBase>(I))
+ continue;
+ if (isa<IntrinsicInst>(&I))
+ continue;
+ CallProbeIds[&I] = ++LastProbeId;
+ }
+ }
+}
+
+uint32_t SampleProfileProber::getBlockId(const BasicBlock *BB) const {
+ auto I = BlockProbeIds.find(const_cast<BasicBlock *>(BB));
+ return I == BlockProbeIds.end() ? 0 : I->second;
+}
+
+uint32_t SampleProfileProber::getCallsiteId(const Instruction *Call) const {
+ auto Iter = CallProbeIds.find(const_cast<Instruction *>(Call));
+ return Iter == CallProbeIds.end() ? 0 : Iter->second;
+}
+
+void SampleProfileProber::instrumentOneFunc(Function &F, TargetMachine *TM) {
+ Module *M = F.getParent();
+ MDBuilder MDB(F.getContext());
+ // Compute a GUID without considering the function's linkage type. This is
+ // fine since function name is the only key in the profile database.
+ uint64_t Guid = Function::getGUID(F.getName());
+
+ // Assign an artificial debug line to a probe that doesn't come with a real
+ // line. A probe not having a debug line will get an incomplete inline
+ // context. This will cause samples collected on the probe to be counted
+ // into the base profile instead of a context profile. The line number
+ // itself is not important though.
+ auto AssignDebugLoc = [&](Instruction *I) {
+ assert((isa<PseudoProbeInst>(I) || isa<CallBase>(I)) &&
+ "Expecting pseudo probe or call instructions");
+ if (!I->getDebugLoc()) {
+ if (auto *SP = F.getSubprogram()) {
+ auto DIL = DILocation::get(SP->getContext(), 0, 0, SP);
+ I->setDebugLoc(DIL);
+ ArtificialDbgLine++;
+ LLVM_DEBUG({
+ dbgs() << "\nIn Function " << F.getName()
+ << " Probe gets an artificial debug line\n";
+ I->dump();
+ });
+ }
+ }
+ };
+
+ // Probe basic blocks.
+ for (auto &I : BlockProbeIds) {
+ BasicBlock *BB = I.first;
+ uint32_t Index = I.second;
+ // Insert a probe before an instruction with a valid debug line number which
+ // will be assigned to the probe. The line number will be used later to
+ // model the inline context when the probe is inlined into other functions.
+ // Debug instructions, phi nodes and lifetime markers do not have an valid
+ // line number. Real instructions generated by optimizations may not come
+ // with a line number either.
+ auto HasValidDbgLine = [](Instruction *J) {
+ return !isa<PHINode>(J) && !isa<DbgInfoIntrinsic>(J) &&
+ !J->isLifetimeStartOrEnd() && J->getDebugLoc();
+ };
+
+ Instruction *J = &*BB->getFirstInsertionPt();
+ while (J != BB->getTerminator() && !HasValidDbgLine(J)) {
+ J = J->getNextNode();
+ }
+
+ IRBuilder<> Builder(J);
+ assert(Builder.GetInsertPoint() != BB->end() &&
+ "Cannot get the probing point");
+ Function *ProbeFn =
+ llvm::Intrinsic::getDeclaration(M, Intrinsic::pseudoprobe);
+ Value *Args[] = {Builder.getInt64(Guid), Builder.getInt64(Index),
+ Builder.getInt32(0),
+ Builder.getInt64(PseudoProbeFullDistributionFactor)};
+ auto *Probe = Builder.CreateCall(ProbeFn, Args);
+ AssignDebugLoc(Probe);
+ }
+
+ // Probe both direct calls and indirect calls. Direct calls are probed so that
+ // their probe ID can be used as an call site identifier to represent a
+ // calling context.
+ for (auto &I : CallProbeIds) {
+ auto *Call = I.first;
+ uint32_t Index = I.second;
+ uint32_t Type = cast<CallBase>(Call)->getCalledFunction()
+ ? (uint32_t)PseudoProbeType::DirectCall
+ : (uint32_t)PseudoProbeType::IndirectCall;
+ AssignDebugLoc(Call);
+ // Levarge the 32-bit discriminator field of debug data to store the ID and
+ // type of a callsite probe. This gets rid of the dependency on plumbing a
+ // customized metadata through the codegen pipeline.
+ uint32_t V = PseudoProbeDwarfDiscriminator::packProbeData(
+ Index, Type, 0, PseudoProbeDwarfDiscriminator::FullDistributionFactor);
+ if (auto DIL = Call->getDebugLoc()) {
+ DIL = DIL->cloneWithDiscriminator(V);
+ Call->setDebugLoc(DIL);
+ }
+ }
+
+ // Create module-level metadata that contains function info necessary to
+ // synthesize probe-based sample counts, which are
+ // - FunctionGUID
+ // - FunctionHash.
+ // - FunctionName
+ auto Hash = getFunctionHash();
+ auto *MD = MDB.createPseudoProbeDesc(Guid, Hash, &F);
+ auto *NMD = M->getNamedMetadata(PseudoProbeDescMetadataName);
+ assert(NMD && "llvm.pseudo_probe_desc should be pre-created");
+ NMD->addOperand(MD);
+
+ // Preserve a comdat group to hold all probes materialized later. This
+ // allows that when the function is considered dead and removed, the
+ // materialized probes are disposed too.
+ // Imported functions are defined in another module. They do not need
+ // the following handling since same care will be taken for them in their
+ // original module. The pseudo probes inserted into an imported functions
+ // above will naturally not be emitted since the imported function is free
+ // from object emission. However they will be emitted together with the
+ // inliner functions that the imported function is inlined into. We are not
+ // creating a comdat group for an import function since it's useless anyway.
+ if (!F.isDeclarationForLinker()) {
+ if (TM) {
+ auto Triple = TM->getTargetTriple();
+ if (Triple.supportsCOMDAT() && TM->getFunctionSections()) {
+ GetOrCreateFunctionComdat(F, Triple, CurModuleUniqueId);
+ }
+ }
+ }
+}
+
+PreservedAnalyses SampleProfileProbePass::run(Module &M,
+ ModuleAnalysisManager &AM) {
+ auto ModuleId = getUniqueModuleId(&M);
+ // Create the pseudo probe desc metadata beforehand.
+ // Note that modules with only data but no functions will require this to
+ // be set up so that they will be known as probed later.
+ M.getOrInsertNamedMetadata(PseudoProbeDescMetadataName);
+
+ for (auto &F : M) {
+ if (F.isDeclaration())
+ continue;
+ SampleProfileProber ProbeManager(F, ModuleId);
+ ProbeManager.instrumentOneFunc(F, TM);
+ }
+
+ return PreservedAnalyses::none();
+}
+
+void PseudoProbeUpdatePass::runOnFunction(Function &F,
+ FunctionAnalysisManager &FAM) {
+ BlockFrequencyInfo &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
+ auto BBProfileCount = [&BFI](BasicBlock *BB) {
+ return BFI.getBlockProfileCount(BB)
+ ? BFI.getBlockProfileCount(BB).getValue()
+ : 0;
+ };
+
+ // Collect the sum of execution weight for each probe.
+ ProbeFactorMap ProbeFactors;
+ for (auto &Block : F) {
+ for (auto &I : Block) {
+ if (Optional<PseudoProbe> Probe = extractProbe(I))
+ ProbeFactors[Probe->Id] += BBProfileCount(&Block);
+ }
+ }
+
+ // Fix up over-counted probes.
+ for (auto &Block : F) {
+ for (auto &I : Block) {
+ if (Optional<PseudoProbe> Probe = extractProbe(I)) {
+ float Sum = ProbeFactors[Probe->Id];
+ if (Sum != 0)
+ setProbeDistributionFactor(I, BBProfileCount(&Block) / Sum);
+ }
+ }
+ }
+}
+
+PreservedAnalyses PseudoProbeUpdatePass::run(Module &M,
+ ModuleAnalysisManager &AM) {
+ if (UpdatePseudoProbe) {
+ for (auto &F : M) {
+ if (F.isDeclaration())
+ continue;
+ FunctionAnalysisManager &FAM =
+ AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ runOnFunction(F, FAM);
+ }
+ }
+ return PreservedAnalyses::none();
+}
diff --git a/contrib/libs/llvm12/lib/Transforms/IPO/StripSymbols.cpp b/contrib/libs/llvm12/lib/Transforms/IPO/StripSymbols.cpp
index 4fc71847a0..7fc7ab71cb 100644
--- a/contrib/libs/llvm12/lib/Transforms/IPO/StripSymbols.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/IPO/StripSymbols.cpp
@@ -19,21 +19,21 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/IPO/StripSymbols.h"
+#include "llvm/Transforms/IPO/StripSymbols.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
-#include "llvm/IR/PassManager.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/IR/TypeFinder.h"
#include "llvm/IR/ValueSymbolTable.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Utils/Local.h"
-
+
using namespace llvm;
namespace {
@@ -252,7 +252,7 @@ bool StripNonDebugSymbols::runOnModule(Module &M) {
return StripSymbolNames(M, true);
}
-static bool stripDebugDeclareImpl(Module &M) {
+static bool stripDebugDeclareImpl(Module &M) {
Function *Declare = M.getFunction("llvm.dbg.declare");
std::vector<Constant*> DeadConstants;
@@ -290,13 +290,13 @@ static bool stripDebugDeclareImpl(Module &M) {
return true;
}
-bool StripDebugDeclare::runOnModule(Module &M) {
+bool StripDebugDeclare::runOnModule(Module &M) {
if (skipModule(M))
return false;
- return stripDebugDeclareImpl(M);
-}
+ return stripDebugDeclareImpl(M);
+}
-static bool stripDeadDebugInfoImpl(Module &M) {
+static bool stripDeadDebugInfoImpl(Module &M) {
bool Changed = false;
LLVMContext &C = M.getContext();
@@ -377,40 +377,40 @@ static bool stripDeadDebugInfoImpl(Module &M) {
return Changed;
}
-
-/// Remove any debug info for global variables/functions in the given module for
-/// which said global variable/function no longer exists (i.e. is null).
-///
-/// Debugging information is encoded in llvm IR using metadata. This is designed
-/// such a way that debug info for symbols preserved even if symbols are
-/// optimized away by the optimizer. This special pass removes debug info for
-/// such symbols.
-bool StripDeadDebugInfo::runOnModule(Module &M) {
- if (skipModule(M))
- return false;
- return stripDeadDebugInfoImpl(M);
-}
-
-PreservedAnalyses StripSymbolsPass::run(Module &M, ModuleAnalysisManager &AM) {
- StripDebugInfo(M);
- StripSymbolNames(M, false);
- return PreservedAnalyses::all();
-}
-
-PreservedAnalyses StripNonDebugSymbolsPass::run(Module &M,
- ModuleAnalysisManager &AM) {
- StripSymbolNames(M, true);
- return PreservedAnalyses::all();
-}
-
-PreservedAnalyses StripDebugDeclarePass::run(Module &M,
- ModuleAnalysisManager &AM) {
- stripDebugDeclareImpl(M);
- return PreservedAnalyses::all();
-}
-
-PreservedAnalyses StripDeadDebugInfoPass::run(Module &M,
- ModuleAnalysisManager &AM) {
- stripDeadDebugInfoImpl(M);
- return PreservedAnalyses::all();
-}
+
+/// Remove any debug info for global variables/functions in the given module for
+/// which said global variable/function no longer exists (i.e. is null).
+///
+/// Debugging information is encoded in llvm IR using metadata. This is designed
+/// such a way that debug info for symbols preserved even if symbols are
+/// optimized away by the optimizer. This special pass removes debug info for
+/// such symbols.
+bool StripDeadDebugInfo::runOnModule(Module &M) {
+ if (skipModule(M))
+ return false;
+ return stripDeadDebugInfoImpl(M);
+}
+
+PreservedAnalyses StripSymbolsPass::run(Module &M, ModuleAnalysisManager &AM) {
+ StripDebugInfo(M);
+ StripSymbolNames(M, false);
+ return PreservedAnalyses::all();
+}
+
+PreservedAnalyses StripNonDebugSymbolsPass::run(Module &M,
+ ModuleAnalysisManager &AM) {
+ StripSymbolNames(M, true);
+ return PreservedAnalyses::all();
+}
+
+PreservedAnalyses StripDebugDeclarePass::run(Module &M,
+ ModuleAnalysisManager &AM) {
+ stripDebugDeclareImpl(M);
+ return PreservedAnalyses::all();
+}
+
+PreservedAnalyses StripDeadDebugInfoPass::run(Module &M,
+ ModuleAnalysisManager &AM) {
+ stripDeadDebugInfoImpl(M);
+ return PreservedAnalyses::all();
+}
diff --git a/contrib/libs/llvm12/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/contrib/libs/llvm12/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
index 225b4fe95f..82de762f23 100644
--- a/contrib/libs/llvm12/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
@@ -14,7 +14,7 @@
#include "llvm/Bitcode/BitcodeWriter.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfo.h"
-#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
@@ -261,7 +261,7 @@ void splitAndWriteThinLTOBitcode(
if (!RT || RT->getBitWidth() > 64 || F->arg_empty() ||
!F->arg_begin()->use_empty())
return;
- for (auto &Arg : drop_begin(F->args())) {
+ for (auto &Arg : drop_begin(F->args())) {
auto *ArgT = dyn_cast<IntegerType>(Arg.getType());
if (!ArgT || ArgT->getBitWidth() > 64)
return;
@@ -334,7 +334,7 @@ void splitAndWriteThinLTOBitcode(
Linkage = CFL_Declaration;
Elts.push_back(ConstantAsMetadata::get(
llvm::ConstantInt::get(Type::getInt8Ty(Ctx), Linkage)));
- append_range(Elts, Types);
+ append_range(Elts, Types);
CfiFunctionMDs.push_back(MDTuple::get(Ctx, Elts));
}
diff --git a/contrib/libs/llvm12/lib/Transforms/IPO/WholeProgramDevirt.cpp b/contrib/libs/llvm12/lib/Transforms/IPO/WholeProgramDevirt.cpp
index cf1ff405c4..1c851975bb 100644
--- a/contrib/libs/llvm12/lib/Transforms/IPO/WholeProgramDevirt.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/IPO/WholeProgramDevirt.cpp
@@ -59,7 +59,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/iterator_range.h"
-#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/TypeMetadataUtils.h"
@@ -470,7 +470,7 @@ CallSiteInfo &VTableSlotInfo::findCallSiteInfo(CallBase &CB) {
auto *CBType = dyn_cast<IntegerType>(CB.getType());
if (!CBType || CBType->getBitWidth() > 64 || CB.arg_empty())
return CSInfo;
- for (auto &&Arg : drop_begin(CB.args())) {
+ for (auto &&Arg : drop_begin(CB.args())) {
auto *CI = dyn_cast<ConstantInt>(Arg);
if (!CI || CI->getBitWidth() > 64)
return CSInfo;
@@ -753,11 +753,11 @@ PreservedAnalyses WholeProgramDevirtPass::run(Module &M,
auto LookupDomTree = [&FAM](Function &F) -> DominatorTree & {
return FAM.getResult<DominatorTreeAnalysis>(F);
};
- if (UseCommandLine) {
- if (DevirtModule::runForTesting(M, AARGetter, OREGetter, LookupDomTree))
- return PreservedAnalyses::all();
- return PreservedAnalyses::none();
- }
+ if (UseCommandLine) {
+ if (DevirtModule::runForTesting(M, AARGetter, OREGetter, LookupDomTree))
+ return PreservedAnalyses::all();
+ return PreservedAnalyses::none();
+ }
if (!DevirtModule(M, AARGetter, OREGetter, LookupDomTree, ExportSummary,
ImportSummary)
.run())
@@ -1030,10 +1030,10 @@ bool DevirtIndex::tryFindVirtualCallTargets(
void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo,
Constant *TheFn, bool &IsExported) {
- // Don't devirtualize function if we're told to skip it
- // in -wholeprogramdevirt-skip.
- if (FunctionsToSkip.match(TheFn->stripPointerCasts()->getName()))
- return;
+ // Don't devirtualize function if we're told to skip it
+ // in -wholeprogramdevirt-skip.
+ if (FunctionsToSkip.match(TheFn->stripPointerCasts()->getName()))
+ return;
auto Apply = [&](CallSiteInfo &CSInfo) {
for (auto &&VCallSite : CSInfo.CallSites) {
if (RemarksEnabled)
@@ -1267,7 +1267,7 @@ void DevirtModule::applyICallBranchFunnel(VTableSlotInfo &SlotInfo,
// Jump tables are only profitable if the retpoline mitigation is enabled.
Attribute FSAttr = CB.getCaller()->getFnAttribute("target-features");
- if (!FSAttr.isValid() ||
+ if (!FSAttr.isValid() ||
!FSAttr.getValueAsString().contains("+retpoline"))
continue;
@@ -1279,7 +1279,7 @@ void DevirtModule::applyICallBranchFunnel(VTableSlotInfo &SlotInfo,
// x86_64.
std::vector<Type *> NewArgs;
NewArgs.push_back(Int8PtrTy);
- append_range(NewArgs, CB.getFunctionType()->params());
+ append_range(NewArgs, CB.getFunctionType()->params());
FunctionType *NewFT =
FunctionType::get(CB.getFunctionType()->getReturnType(), NewArgs,
CB.getFunctionType()->isVarArg());
@@ -1288,7 +1288,7 @@ void DevirtModule::applyICallBranchFunnel(VTableSlotInfo &SlotInfo,
IRBuilder<> IRB(&CB);
std::vector<Value *> Args;
Args.push_back(IRB.CreateBitCast(VCallSite.VTable, Int8PtrTy));
- llvm::append_range(Args, CB.args());
+ llvm::append_range(Args, CB.args());
CallBase *NewCS = nullptr;
if (isa<CallInst>(CB))
diff --git a/contrib/libs/llvm12/lib/Transforms/IPO/ya.make b/contrib/libs/llvm12/lib/Transforms/IPO/ya.make
index 5b078050fe..ab6721253b 100644
--- a/contrib/libs/llvm12/lib/Transforms/IPO/ya.make
+++ b/contrib/libs/llvm12/lib/Transforms/IPO/ya.make
@@ -12,24 +12,24 @@ LICENSE(Apache-2.0 WITH LLVM-exception)
LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
PEERDIR(
- contrib/libs/llvm12
- contrib/libs/llvm12/include
- contrib/libs/llvm12/lib/Analysis
- contrib/libs/llvm12/lib/Bitcode/Reader
- contrib/libs/llvm12/lib/Bitcode/Writer
- contrib/libs/llvm12/lib/Frontend/OpenMP
- contrib/libs/llvm12/lib/IR
- contrib/libs/llvm12/lib/IRReader
- contrib/libs/llvm12/lib/Linker
- contrib/libs/llvm12/lib/Object
- contrib/libs/llvm12/lib/ProfileData
- contrib/libs/llvm12/lib/Support
- contrib/libs/llvm12/lib/Transforms/AggressiveInstCombine
- contrib/libs/llvm12/lib/Transforms/InstCombine
- contrib/libs/llvm12/lib/Transforms/Instrumentation
- contrib/libs/llvm12/lib/Transforms/Scalar
- contrib/libs/llvm12/lib/Transforms/Utils
- contrib/libs/llvm12/lib/Transforms/Vectorize
+ contrib/libs/llvm12
+ contrib/libs/llvm12/include
+ contrib/libs/llvm12/lib/Analysis
+ contrib/libs/llvm12/lib/Bitcode/Reader
+ contrib/libs/llvm12/lib/Bitcode/Writer
+ contrib/libs/llvm12/lib/Frontend/OpenMP
+ contrib/libs/llvm12/lib/IR
+ contrib/libs/llvm12/lib/IRReader
+ contrib/libs/llvm12/lib/Linker
+ contrib/libs/llvm12/lib/Object
+ contrib/libs/llvm12/lib/ProfileData
+ contrib/libs/llvm12/lib/Support
+ contrib/libs/llvm12/lib/Transforms/AggressiveInstCombine
+ contrib/libs/llvm12/lib/Transforms/InstCombine
+ contrib/libs/llvm12/lib/Transforms/Instrumentation
+ contrib/libs/llvm12/lib/Transforms/Scalar
+ contrib/libs/llvm12/lib/Transforms/Utils
+ contrib/libs/llvm12/lib/Transforms/Vectorize
)
ADDINCL(
@@ -42,7 +42,7 @@ NO_UTIL()
SRCS(
AlwaysInliner.cpp
- Annotation2Metadata.cpp
+ Annotation2Metadata.cpp
ArgumentPromotion.cpp
Attributor.cpp
AttributorAttributes.cpp
@@ -62,7 +62,7 @@ SRCS(
GlobalSplit.cpp
HotColdSplitting.cpp
IPO.cpp
- IROutliner.cpp
+ IROutliner.cpp
InferFunctionAttrs.cpp
InlineSimple.cpp
Inliner.cpp
@@ -75,9 +75,9 @@ SRCS(
PassManagerBuilder.cpp
PruneEH.cpp
SCCP.cpp
- SampleContextTracker.cpp
+ SampleContextTracker.cpp
SampleProfile.cpp
- SampleProfileProbe.cpp
+ SampleProfileProbe.cpp
StripDeadPrototypes.cpp
StripSymbols.cpp
SyntheticCountsPropagation.cpp