diff options
author | shadchin <shadchin@yandex-team.ru> | 2022-02-10 16:44:39 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:44:39 +0300 |
commit | e9656aae26e0358d5378e5b63dcac5c8dbe0e4d0 (patch) | |
tree | 64175d5cadab313b3e7039ebaa06c5bc3295e274 /contrib/libs/llvm12/lib/Transforms/Scalar/MemCpyOptimizer.cpp | |
parent | 2598ef1d0aee359b4b6d5fdd1758916d5907d04f (diff) | |
download | ydb-e9656aae26e0358d5378e5b63dcac5c8dbe0e4d0.tar.gz |
Restoring authorship annotation for <shadchin@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/libs/llvm12/lib/Transforms/Scalar/MemCpyOptimizer.cpp')
-rw-r--r-- | contrib/libs/llvm12/lib/Transforms/Scalar/MemCpyOptimizer.cpp | 1194 |
1 files changed, 597 insertions, 597 deletions
diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/contrib/libs/llvm12/lib/Transforms/Scalar/MemCpyOptimizer.cpp index c5ef74e869..a4e695497f 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -21,11 +21,11 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/GlobalsModRef.h" -#include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/Loads.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/MemoryLocation.h" -#include "llvm/Analysis/MemorySSA.h" -#include "llvm/Analysis/MemorySSAUpdater.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Argument.h" @@ -67,15 +67,15 @@ using namespace llvm; #define DEBUG_TYPE "memcpyopt" -static cl::opt<bool> - EnableMemorySSA("enable-memcpyopt-memoryssa", cl::init(false), cl::Hidden, - cl::desc("Use MemorySSA-backed MemCpyOpt.")); - +static cl::opt<bool> + EnableMemorySSA("enable-memcpyopt-memoryssa", cl::init(false), cl::Hidden, + cl::desc("Use MemorySSA-backed MemCpyOpt.")); + STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted"); STATISTIC(NumMemSetInfer, "Number of memsets inferred"); STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy"); STATISTIC(NumCpyToSet, "Number of memcpys converted to memset"); -STATISTIC(NumCallSlot, "Number of call slot optimizations performed"); +STATISTIC(NumCallSlot, "Number of call slot optimizations performed"); namespace { @@ -279,17 +279,17 @@ private: AU.setPreservesCFG(); AU.addRequired<AssumptionCacheTracker>(); AU.addRequired<DominatorTreeWrapperPass>(); - AU.addPreserved<DominatorTreeWrapperPass>(); - AU.addPreserved<GlobalsAAWrapperPass>(); + AU.addPreserved<DominatorTreeWrapperPass>(); + AU.addPreserved<GlobalsAAWrapperPass>(); AU.addRequired<TargetLibraryInfoWrapperPass>(); - if (!EnableMemorySSA) - AU.addRequired<MemoryDependenceWrapperPass>(); + if (!EnableMemorySSA) + AU.addRequired<MemoryDependenceWrapperPass>(); AU.addPreserved<MemoryDependenceWrapperPass>(); - AU.addRequired<AAResultsWrapperPass>(); - AU.addPreserved<AAResultsWrapperPass>(); - if (EnableMemorySSA) - AU.addRequired<MemorySSAWrapperPass>(); - AU.addPreserved<MemorySSAWrapperPass>(); + AU.addRequired<AAResultsWrapperPass>(); + AU.addPreserved<AAResultsWrapperPass>(); + if (EnableMemorySSA) + AU.addRequired<MemorySSAWrapperPass>(); + AU.addPreserved<MemorySSAWrapperPass>(); } }; @@ -311,56 +311,56 @@ INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) INITIALIZE_PASS_END(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization", false, false) -// Check that V is either not accessible by the caller, or unwinding cannot -// occur between Start and End. -static bool mayBeVisibleThroughUnwinding(Value *V, Instruction *Start, - Instruction *End) { - assert(Start->getParent() == End->getParent() && "Must be in same block"); - if (!Start->getFunction()->doesNotThrow() && - !isa<AllocaInst>(getUnderlyingObject(V))) { - for (const Instruction &I : - make_range(Start->getIterator(), End->getIterator())) { - if (I.mayThrow()) - return true; - } - } - return false; -} - -void MemCpyOptPass::eraseInstruction(Instruction *I) { - if (MSSAU) - MSSAU->removeMemoryAccess(I); - if (MD) - MD->removeInstruction(I); - I->eraseFromParent(); -} - -// Check for mod or ref of Loc between Start and End, excluding both boundaries. -// Start and End must be in the same block -static bool accessedBetween(AliasAnalysis &AA, MemoryLocation Loc, - const MemoryUseOrDef *Start, - const MemoryUseOrDef *End) { - assert(Start->getBlock() == End->getBlock() && "Only local supported"); - for (const MemoryAccess &MA : - make_range(++Start->getIterator(), End->getIterator())) { - if (isModOrRefSet(AA.getModRefInfo(cast<MemoryUseOrDef>(MA).getMemoryInst(), - Loc))) - return true; - } - return false; -} - -// Check for mod of Loc between Start and End, excluding both boundaries. -// Start and End can be in different blocks. -static bool writtenBetween(MemorySSA *MSSA, MemoryLocation Loc, - const MemoryUseOrDef *Start, - const MemoryUseOrDef *End) { - // TODO: Only walk until we hit Start. - MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess( - End->getDefiningAccess(), Loc); - return !MSSA->dominates(Clobber, Start); -} - +// Check that V is either not accessible by the caller, or unwinding cannot +// occur between Start and End. +static bool mayBeVisibleThroughUnwinding(Value *V, Instruction *Start, + Instruction *End) { + assert(Start->getParent() == End->getParent() && "Must be in same block"); + if (!Start->getFunction()->doesNotThrow() && + !isa<AllocaInst>(getUnderlyingObject(V))) { + for (const Instruction &I : + make_range(Start->getIterator(), End->getIterator())) { + if (I.mayThrow()) + return true; + } + } + return false; +} + +void MemCpyOptPass::eraseInstruction(Instruction *I) { + if (MSSAU) + MSSAU->removeMemoryAccess(I); + if (MD) + MD->removeInstruction(I); + I->eraseFromParent(); +} + +// Check for mod or ref of Loc between Start and End, excluding both boundaries. +// Start and End must be in the same block +static bool accessedBetween(AliasAnalysis &AA, MemoryLocation Loc, + const MemoryUseOrDef *Start, + const MemoryUseOrDef *End) { + assert(Start->getBlock() == End->getBlock() && "Only local supported"); + for (const MemoryAccess &MA : + make_range(++Start->getIterator(), End->getIterator())) { + if (isModOrRefSet(AA.getModRefInfo(cast<MemoryUseOrDef>(MA).getMemoryInst(), + Loc))) + return true; + } + return false; +} + +// Check for mod of Loc between Start and End, excluding both boundaries. +// Start and End can be in different blocks. +static bool writtenBetween(MemorySSA *MSSA, MemoryLocation Loc, + const MemoryUseOrDef *Start, + const MemoryUseOrDef *End) { + // TODO: Only walk until we hit Start. + MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess( + End->getDefiningAccess(), Loc); + return !MSSA->dominates(Clobber, Start); +} + /// When scanning forward over instructions, we look for some other patterns to /// fold away. In particular, this looks for stores to neighboring locations of /// memory. If it sees enough consecutive ones, it attempts to merge them @@ -377,27 +377,27 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst, MemsetRanges Ranges(DL); BasicBlock::iterator BI(StartInst); - - // Keeps track of the last memory use or def before the insertion point for - // the new memset. The new MemoryDef for the inserted memsets will be inserted - // after MemInsertPoint. It points to either LastMemDef or to the last user - // before the insertion point of the memset, if there are any such users. - MemoryUseOrDef *MemInsertPoint = nullptr; - // Keeps track of the last MemoryDef between StartInst and the insertion point - // for the new memset. This will become the defining access of the inserted - // memsets. - MemoryDef *LastMemDef = nullptr; + + // Keeps track of the last memory use or def before the insertion point for + // the new memset. The new MemoryDef for the inserted memsets will be inserted + // after MemInsertPoint. It points to either LastMemDef or to the last user + // before the insertion point of the memset, if there are any such users. + MemoryUseOrDef *MemInsertPoint = nullptr; + // Keeps track of the last MemoryDef between StartInst and the insertion point + // for the new memset. This will become the defining access of the inserted + // memsets. + MemoryDef *LastMemDef = nullptr; for (++BI; !BI->isTerminator(); ++BI) { - if (MSSAU) { - auto *CurrentAcc = cast_or_null<MemoryUseOrDef>( - MSSAU->getMemorySSA()->getMemoryAccess(&*BI)); - if (CurrentAcc) { - MemInsertPoint = CurrentAcc; - if (auto *CurrentDef = dyn_cast<MemoryDef>(CurrentAcc)) - LastMemDef = CurrentDef; - } - } - + if (MSSAU) { + auto *CurrentAcc = cast_or_null<MemoryUseOrDef>( + MSSAU->getMemorySSA()->getMemoryAccess(&*BI)); + if (CurrentAcc) { + MemInsertPoint = CurrentAcc; + if (auto *CurrentDef = dyn_cast<MemoryDef>(CurrentAcc)) + LastMemDef = CurrentDef; + } + } + if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) { // If the instruction is readnone, ignore it, otherwise bail out. We // don't even allow readonly here because we don't want something like: @@ -411,15 +411,15 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst, // If this is a store, see if we can merge it in. if (!NextStore->isSimple()) break; - Value *StoredVal = NextStore->getValueOperand(); - - // Don't convert stores of non-integral pointer types to memsets (which - // stores integers). - if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType())) - break; - + Value *StoredVal = NextStore->getValueOperand(); + + // Don't convert stores of non-integral pointer types to memsets (which + // stores integers). + if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType())) + break; + // Check to see if this stored value is of the same byte-splattable value. - Value *StoredByte = isBytewiseValue(StoredVal, DL); + Value *StoredByte = isBytewiseValue(StoredVal, DL); if (isa<UndefValue>(ByteVal) && StoredByte) ByteVal = StoredByte; if (ByteVal != StoredByte) @@ -486,24 +486,24 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst, if (!Range.TheStores.empty()) AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc()); - if (MSSAU) { - assert(LastMemDef && MemInsertPoint && - "Both LastMemDef and MemInsertPoint need to be set"); - auto *NewDef = - cast<MemoryDef>(MemInsertPoint->getMemoryInst() == &*BI - ? MSSAU->createMemoryAccessBefore( - AMemSet, LastMemDef, MemInsertPoint) - : MSSAU->createMemoryAccessAfter( - AMemSet, LastMemDef, MemInsertPoint)); - MSSAU->insertDef(NewDef, /*RenameUses=*/true); - LastMemDef = NewDef; - MemInsertPoint = NewDef; - } - + if (MSSAU) { + assert(LastMemDef && MemInsertPoint && + "Both LastMemDef and MemInsertPoint need to be set"); + auto *NewDef = + cast<MemoryDef>(MemInsertPoint->getMemoryInst() == &*BI + ? MSSAU->createMemoryAccessBefore( + AMemSet, LastMemDef, MemInsertPoint) + : MSSAU->createMemoryAccessAfter( + AMemSet, LastMemDef, MemInsertPoint)); + MSSAU->insertDef(NewDef, /*RenameUses=*/true); + LastMemDef = NewDef; + MemInsertPoint = NewDef; + } + // Zap all the stores. - for (Instruction *SI : Range.TheStores) - eraseInstruction(SI); - + for (Instruction *SI : Range.TheStores) + eraseInstruction(SI); + ++NumMemSetInfer; } @@ -514,10 +514,10 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst, // It will lift the store and its argument + that anything that // may alias with these. // The method returns true if it was successful. -bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) { +bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) { // If the store alias this position, early bail out. MemoryLocation StoreLoc = MemoryLocation::get(SI); - if (isModOrRefSet(AA->getModRefInfo(P, StoreLoc))) + if (isModOrRefSet(AA->getModRefInfo(P, StoreLoc))) return false; // Keep track of the arguments of all instruction we plan to lift @@ -528,7 +528,7 @@ bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) { Args.insert(Ptr); // Instruction to lift before P. - SmallVector<Instruction *, 8> ToLift{SI}; + SmallVector<Instruction *, 8> ToLift{SI}; // Memory locations of lifted instructions. SmallVector<MemoryLocation, 8> MemLocs{StoreLoc}; @@ -541,24 +541,24 @@ bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) { for (auto I = --SI->getIterator(), E = P->getIterator(); I != E; --I) { auto *C = &*I; - // Make sure hoisting does not perform a store that was not guaranteed to - // happen. - if (!isGuaranteedToTransferExecutionToSuccessor(C)) - return false; + // Make sure hoisting does not perform a store that was not guaranteed to + // happen. + if (!isGuaranteedToTransferExecutionToSuccessor(C)) + return false; + + bool MayAlias = isModOrRefSet(AA->getModRefInfo(C, None)); - bool MayAlias = isModOrRefSet(AA->getModRefInfo(C, None)); - bool NeedLift = false; if (Args.erase(C)) NeedLift = true; else if (MayAlias) { - NeedLift = llvm::any_of(MemLocs, [C, this](const MemoryLocation &ML) { - return isModOrRefSet(AA->getModRefInfo(C, ML)); + NeedLift = llvm::any_of(MemLocs, [C, this](const MemoryLocation &ML) { + return isModOrRefSet(AA->getModRefInfo(C, ML)); }); if (!NeedLift) - NeedLift = llvm::any_of(Calls, [C, this](const CallBase *Call) { - return isModOrRefSet(AA->getModRefInfo(C, Call)); + NeedLift = llvm::any_of(Calls, [C, this](const CallBase *Call) { + return isModOrRefSet(AA->getModRefInfo(C, Call)); }); } @@ -568,18 +568,18 @@ bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) { if (MayAlias) { // Since LI is implicitly moved downwards past the lifted instructions, // none of them may modify its source. - if (isModSet(AA->getModRefInfo(C, LoadLoc))) + if (isModSet(AA->getModRefInfo(C, LoadLoc))) return false; else if (const auto *Call = dyn_cast<CallBase>(C)) { // If we can't lift this before P, it's game over. - if (isModOrRefSet(AA->getModRefInfo(P, Call))) + if (isModOrRefSet(AA->getModRefInfo(P, Call))) return false; Calls.push_back(Call); } else if (isa<LoadInst>(C) || isa<StoreInst>(C) || isa<VAArgInst>(C)) { // If we can't lift this before P, it's game over. auto ML = MemoryLocation::get(C); - if (isModOrRefSet(AA->getModRefInfo(P, ML))) + if (isModOrRefSet(AA->getModRefInfo(P, ML))) return false; MemLocs.push_back(ML); @@ -599,40 +599,40 @@ bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) { } } - // Find MSSA insertion point. Normally P will always have a corresponding - // memory access before which we can insert. However, with non-standard AA - // pipelines, there may be a mismatch between AA and MSSA, in which case we - // will scan for a memory access before P. In either case, we know for sure - // that at least the load will have a memory access. - // TODO: Simplify this once P will be determined by MSSA, in which case the - // discrepancy can no longer occur. - MemoryUseOrDef *MemInsertPoint = nullptr; - if (MSSAU) { - if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(P)) { - MemInsertPoint = cast<MemoryUseOrDef>(--MA->getIterator()); - } else { - const Instruction *ConstP = P; - for (const Instruction &I : make_range(++ConstP->getReverseIterator(), - ++LI->getReverseIterator())) { - if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(&I)) { - MemInsertPoint = MA; - break; - } - } - } - } - - // We made it, we need to lift. + // Find MSSA insertion point. Normally P will always have a corresponding + // memory access before which we can insert. However, with non-standard AA + // pipelines, there may be a mismatch between AA and MSSA, in which case we + // will scan for a memory access before P. In either case, we know for sure + // that at least the load will have a memory access. + // TODO: Simplify this once P will be determined by MSSA, in which case the + // discrepancy can no longer occur. + MemoryUseOrDef *MemInsertPoint = nullptr; + if (MSSAU) { + if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(P)) { + MemInsertPoint = cast<MemoryUseOrDef>(--MA->getIterator()); + } else { + const Instruction *ConstP = P; + for (const Instruction &I : make_range(++ConstP->getReverseIterator(), + ++LI->getReverseIterator())) { + if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(&I)) { + MemInsertPoint = MA; + break; + } + } + } + } + + // We made it, we need to lift. for (auto *I : llvm::reverse(ToLift)) { LLVM_DEBUG(dbgs() << "Lifting " << *I << " before " << *P << "\n"); I->moveBefore(P); - if (MSSAU) { - assert(MemInsertPoint && "Must have found insert point"); - if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(I)) { - MSSAU->moveAfter(MA, MemInsertPoint); - MemInsertPoint = MA; - } - } + if (MSSAU) { + assert(MemInsertPoint && "Must have found insert point"); + if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(I)) { + MSSAU->moveAfter(MA, MemInsertPoint); + MemInsertPoint = MA; + } + } } return true; @@ -652,15 +652,15 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { const DataLayout &DL = SI->getModule()->getDataLayout(); - Value *StoredVal = SI->getValueOperand(); - - // Not all the transforms below are correct for non-integral pointers, bail - // until we've audited the individual pieces. - if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType())) - return false; - + Value *StoredVal = SI->getValueOperand(); + + // Not all the transforms below are correct for non-integral pointers, bail + // until we've audited the individual pieces. + if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType())) + return false; + // Load to store forwarding can be interpreted as memcpy. - if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) { + if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) { if (LI->isSimple() && LI->hasOneUse() && LI->getParent() == SI->getParent()) { @@ -672,10 +672,10 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { // the memory we load from in between the load and the store. If // such an instruction is found, we try to promote there instead // of at the store position. - // TODO: Can use MSSA for this. + // TODO: Can use MSSA for this. Instruction *P = SI; for (auto &I : make_range(++LI->getIterator(), SI->getIterator())) { - if (isModSet(AA->getModRefInfo(&I, LoadLoc))) { + if (isModSet(AA->getModRefInfo(&I, LoadLoc))) { P = &I; break; } @@ -686,7 +686,7 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { // position if nothing alias the store memory after this and the store // destination is not in the range. if (P && P != SI) { - if (!moveUp(SI, P, LI)) + if (!moveUp(SI, P, LI)) P = nullptr; } @@ -697,7 +697,7 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { // memmove must be used to preserve semantic. If not, memcpy can // be used. bool UseMemMove = false; - if (!AA->isNoAlias(MemoryLocation::get(SI), LoadLoc)) + if (!AA->isNoAlias(MemoryLocation::get(SI), LoadLoc)) UseMemMove = true; uint64_t Size = DL.getTypeStoreSize(T); @@ -716,16 +716,16 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { LLVM_DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI << " => " << *M << "\n"); - if (MSSAU) { - auto *LastDef = - cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI)); - auto *NewAccess = - MSSAU->createMemoryAccessAfter(M, LastDef, LastDef); - MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true); - } - - eraseInstruction(SI); - eraseInstruction(LI); + if (MSSAU) { + auto *LastDef = + cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI)); + auto *NewAccess = + MSSAU->createMemoryAccessAfter(M, LastDef, LastDef); + MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true); + } + + eraseInstruction(SI); + eraseInstruction(LI); ++NumMemCpyInstr; // Make sure we do not invalidate the iterator. @@ -738,49 +738,49 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { // happen to be using a load-store pair to implement it, rather than // a memcpy. CallInst *C = nullptr; - if (EnableMemorySSA) { - if (auto *LoadClobber = dyn_cast<MemoryUseOrDef>( - MSSA->getWalker()->getClobberingMemoryAccess(LI))) { - // The load most post-dom the call. Limit to the same block for now. - // TODO: Support non-local call-slot optimization? - if (LoadClobber->getBlock() == SI->getParent()) - C = dyn_cast_or_null<CallInst>(LoadClobber->getMemoryInst()); - } - } else { - MemDepResult ldep = MD->getDependency(LI); - if (ldep.isClobber() && !isa<MemCpyInst>(ldep.getInst())) - C = dyn_cast<CallInst>(ldep.getInst()); - } + if (EnableMemorySSA) { + if (auto *LoadClobber = dyn_cast<MemoryUseOrDef>( + MSSA->getWalker()->getClobberingMemoryAccess(LI))) { + // The load most post-dom the call. Limit to the same block for now. + // TODO: Support non-local call-slot optimization? + if (LoadClobber->getBlock() == SI->getParent()) + C = dyn_cast_or_null<CallInst>(LoadClobber->getMemoryInst()); + } + } else { + MemDepResult ldep = MD->getDependency(LI); + if (ldep.isClobber() && !isa<MemCpyInst>(ldep.getInst())) + C = dyn_cast<CallInst>(ldep.getInst()); + } if (C) { // Check that nothing touches the dest of the "copy" between // the call and the store. MemoryLocation StoreLoc = MemoryLocation::get(SI); - if (EnableMemorySSA) { - if (accessedBetween(*AA, StoreLoc, MSSA->getMemoryAccess(C), - MSSA->getMemoryAccess(SI))) + if (EnableMemorySSA) { + if (accessedBetween(*AA, StoreLoc, MSSA->getMemoryAccess(C), + MSSA->getMemoryAccess(SI))) C = nullptr; - } else { - for (BasicBlock::iterator I = --SI->getIterator(), - E = C->getIterator(); - I != E; --I) { - if (isModOrRefSet(AA->getModRefInfo(&*I, StoreLoc))) { - C = nullptr; - break; - } + } else { + for (BasicBlock::iterator I = --SI->getIterator(), + E = C->getIterator(); + I != E; --I) { + if (isModOrRefSet(AA->getModRefInfo(&*I, StoreLoc))) { + C = nullptr; + break; + } } } } if (C) { bool changed = performCallSlotOptzn( - LI, SI, SI->getPointerOperand()->stripPointerCasts(), + LI, SI, SI->getPointerOperand()->stripPointerCasts(), LI->getPointerOperand()->stripPointerCasts(), DL.getTypeStoreSize(SI->getOperand(0)->getType()), commonAlignment(SI->getAlign(), LI->getAlign()), C); if (changed) { - eraseInstruction(SI); - eraseInstruction(LI); + eraseInstruction(SI); + eraseInstruction(LI); ++NumMemCpyInstr; return true; } @@ -814,15 +814,15 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { LLVM_DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n"); - if (MSSAU) { - assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI))); - auto *LastDef = - cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI)); - auto *NewAccess = MSSAU->createMemoryAccessAfter(M, LastDef, LastDef); - MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true); - } - - eraseInstruction(SI); + if (MSSAU) { + assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI))); + auto *LastDef = + cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI)); + auto *NewAccess = MSSAU->createMemoryAccessAfter(M, LastDef, LastDef); + MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true); + } + + eraseInstruction(SI); NumMemSetInfer++; // Make sure we do not invalidate the iterator. @@ -849,8 +849,8 @@ bool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) { /// Takes a memcpy and a call that it depends on, /// and checks for the possibility of a call slot optimization by having /// the call write its result directly into the destination of the memcpy. -bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad, - Instruction *cpyStore, Value *cpyDest, +bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad, + Instruction *cpyStore, Value *cpyDest, Value *cpySrc, uint64_t cpyLen, Align cpyAlign, CallInst *C) { // The general transformation to keep in mind is @@ -881,7 +881,7 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad, if (!srcArraySize) return false; - const DataLayout &DL = cpyLoad->getModule()->getDataLayout(); + const DataLayout &DL = cpyLoad->getModule()->getDataLayout(); uint64_t srcSize = DL.getTypeAllocSize(srcAlloca->getAllocatedType()) * srcArraySize->getZExtValue(); @@ -891,25 +891,25 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad, // Check that accessing the first srcSize bytes of dest will not cause a // trap. Otherwise the transform is invalid since it might cause a trap // to occur earlier than it otherwise would. - if (!isDereferenceableAndAlignedPointer(cpyDest, Align(1), APInt(64, cpyLen), - DL, C, DT)) - return false; - - // Make sure that nothing can observe cpyDest being written early. There are - // a number of cases to consider: - // 1. cpyDest cannot be accessed between C and cpyStore as a precondition of - // the transform. - // 2. C itself may not access cpyDest (prior to the transform). This is - // checked further below. - // 3. If cpyDest is accessible to the caller of this function (potentially - // captured and not based on an alloca), we need to ensure that we cannot - // unwind between C and cpyStore. This is checked here. - // 4. If cpyDest is potentially captured, there may be accesses to it from - // another thread. In this case, we need to check that cpyStore is - // guaranteed to be executed if C is. As it is a non-atomic access, it - // renders accesses from other threads undefined. - // TODO: This is currently not checked. - if (mayBeVisibleThroughUnwinding(cpyDest, C, cpyStore)) + if (!isDereferenceableAndAlignedPointer(cpyDest, Align(1), APInt(64, cpyLen), + DL, C, DT)) + return false; + + // Make sure that nothing can observe cpyDest being written early. There are + // a number of cases to consider: + // 1. cpyDest cannot be accessed between C and cpyStore as a precondition of + // the transform. + // 2. C itself may not access cpyDest (prior to the transform). This is + // checked further below. + // 3. If cpyDest is accessible to the caller of this function (potentially + // captured and not based on an alloca), we need to ensure that we cannot + // unwind between C and cpyStore. This is checked here. + // 4. If cpyDest is potentially captured, there may be accesses to it from + // another thread. In this case, we need to check that cpyStore is + // guaranteed to be executed if C is. As it is a non-atomic access, it + // renders accesses from other threads undefined. + // TODO: This is currently not checked. + if (mayBeVisibleThroughUnwinding(cpyDest, C, cpyStore)) return false; // Check that dest points to memory that is at least as aligned as src. @@ -924,26 +924,26 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad, // guarantees that it holds only undefined values when passed in (so the final // memcpy can be dropped), that it is not read or written between the call and // the memcpy, and that writing beyond the end of it is undefined. - SmallVector<User *, 8> srcUseList(srcAlloca->users()); + SmallVector<User *, 8> srcUseList(srcAlloca->users()); while (!srcUseList.empty()) { User *U = srcUseList.pop_back_val(); if (isa<BitCastInst>(U) || isa<AddrSpaceCastInst>(U)) { - append_range(srcUseList, U->users()); + append_range(srcUseList, U->users()); continue; } if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(U)) { if (!G->hasAllZeroIndices()) return false; - append_range(srcUseList, U->users()); + append_range(srcUseList, U->users()); continue; } if (const IntrinsicInst *IT = dyn_cast<IntrinsicInst>(U)) if (IT->isLifetimeStartOrEnd()) continue; - if (U != C && U != cpyLoad) + if (U != C && U != cpyLoad) return false; } @@ -955,24 +955,24 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad, // Since we're changing the parameter to the callsite, we need to make sure // that what would be the new parameter dominates the callsite. - if (!DT->dominates(cpyDest, C)) { - // Support moving a constant index GEP before the call. - auto *GEP = dyn_cast<GetElementPtrInst>(cpyDest); - if (GEP && GEP->hasAllConstantIndices() && - DT->dominates(GEP->getPointerOperand(), C)) - GEP->moveBefore(C); - else + if (!DT->dominates(cpyDest, C)) { + // Support moving a constant index GEP before the call. + auto *GEP = dyn_cast<GetElementPtrInst>(cpyDest); + if (GEP && GEP->hasAllConstantIndices() && + DT->dominates(GEP->getPointerOperand(), C)) + GEP->moveBefore(C); + else return false; - } + } // In addition to knowing that the call does not access src in some // unexpected manner, for example via a global, which we deduce from // the use analysis, we also need to know that it does not sneakily // access dest. We rely on AA to figure this out for us. - ModRefInfo MR = AA->getModRefInfo(C, cpyDest, LocationSize::precise(srcSize)); + ModRefInfo MR = AA->getModRefInfo(C, cpyDest, LocationSize::precise(srcSize)); // If necessary, perform additional analysis. if (isModOrRefSet(MR)) - MR = AA->callCapturesBefore(C, cpyDest, LocationSize::precise(srcSize), DT); + MR = AA->callCapturesBefore(C, cpyDest, LocationSize::precise(srcSize), DT); if (isModOrRefSet(MR)) return false; @@ -1014,8 +1014,8 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad, // Drop any cached information about the call, because we may have changed // its dependence information by changing its parameter. - if (MD) - MD->removeInstruction(C); + if (MD) + MD->removeInstruction(C); // Update AA metadata // FIXME: MD_tbaa_struct and MD_mem_parallel_loop_access should also be @@ -1024,9 +1024,9 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad, LLVMContext::MD_noalias, LLVMContext::MD_invariant_group, LLVMContext::MD_access_group}; - combineMetadata(C, cpyLoad, KnownIDs, true); + combineMetadata(C, cpyLoad, KnownIDs, true); - ++NumCallSlot; + ++NumCallSlot; return true; } @@ -1063,28 +1063,28 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M, // // TODO: If the code between M and MDep is transparent to the destination "c", // then we could still perform the xform by moving M up to the first memcpy. - if (EnableMemorySSA) { - // TODO: It would be sufficient to check the MDep source up to the memcpy - // size of M, rather than MDep. - if (writtenBetween(MSSA, MemoryLocation::getForSource(MDep), - MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(M))) - return false; - } else { - // NOTE: This is conservative, it will stop on any read from the source loc, - // not just the defining memcpy. - MemDepResult SourceDep = - MD->getPointerDependencyFrom(MemoryLocation::getForSource(MDep), false, - M->getIterator(), M->getParent()); - if (!SourceDep.isClobber() || SourceDep.getInst() != MDep) - return false; - } + if (EnableMemorySSA) { + // TODO: It would be sufficient to check the MDep source up to the memcpy + // size of M, rather than MDep. + if (writtenBetween(MSSA, MemoryLocation::getForSource(MDep), + MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(M))) + return false; + } else { + // NOTE: This is conservative, it will stop on any read from the source loc, + // not just the defining memcpy. + MemDepResult SourceDep = + MD->getPointerDependencyFrom(MemoryLocation::getForSource(MDep), false, + M->getIterator(), M->getParent()); + if (!SourceDep.isClobber() || SourceDep.getInst() != MDep) + return false; + } // If the dest of the second might alias the source of the first, then the // source and dest might overlap. We still want to eliminate the intermediate // value, but we have to generate a memmove instead of memcpy. bool UseMemMove = false; - if (!AA->isNoAlias(MemoryLocation::getForDest(M), - MemoryLocation::getForSource(MDep))) + if (!AA->isNoAlias(MemoryLocation::getForDest(M), + MemoryLocation::getForSource(MDep))) UseMemMove = true; // If all checks passed, then we can transform M. @@ -1094,25 +1094,25 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M, // TODO: Is this worth it if we're creating a less aligned memcpy? For // example we could be moving from movaps -> movq on x86. IRBuilder<> Builder(M); - Instruction *NewM; + Instruction *NewM; if (UseMemMove) - NewM = Builder.CreateMemMove(M->getRawDest(), M->getDestAlign(), - MDep->getRawSource(), MDep->getSourceAlign(), - M->getLength(), M->isVolatile()); + NewM = Builder.CreateMemMove(M->getRawDest(), M->getDestAlign(), + MDep->getRawSource(), MDep->getSourceAlign(), + M->getLength(), M->isVolatile()); else - NewM = Builder.CreateMemCpy(M->getRawDest(), M->getDestAlign(), - MDep->getRawSource(), MDep->getSourceAlign(), - M->getLength(), M->isVolatile()); - - if (MSSAU) { - assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M))); - auto *LastDef = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M)); - auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef); - MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true); - } - + NewM = Builder.CreateMemCpy(M->getRawDest(), M->getDestAlign(), + MDep->getRawSource(), MDep->getSourceAlign(), + M->getLength(), M->isVolatile()); + + if (MSSAU) { + assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M))); + auto *LastDef = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M)); + auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef); + MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true); + } + // Remove the instruction we're replacing. - eraseInstruction(M); + eraseInstruction(M); ++NumMemCpyInstr; return true; } @@ -1137,41 +1137,41 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy, if (MemSet->getDest() != MemCpy->getDest()) return false; - // Check that src and dst of the memcpy aren't the same. While memcpy - // operands cannot partially overlap, exact equality is allowed. - if (!AA->isNoAlias(MemoryLocation(MemCpy->getSource(), - LocationSize::precise(1)), - MemoryLocation(MemCpy->getDest(), - LocationSize::precise(1)))) + // Check that src and dst of the memcpy aren't the same. While memcpy + // operands cannot partially overlap, exact equality is allowed. + if (!AA->isNoAlias(MemoryLocation(MemCpy->getSource(), + LocationSize::precise(1)), + MemoryLocation(MemCpy->getDest(), + LocationSize::precise(1)))) return false; - if (EnableMemorySSA) { - // We know that dst up to src_size is not written. We now need to make sure - // that dst up to dst_size is not accessed. (If we did not move the memset, - // checking for reads would be sufficient.) - if (accessedBetween(*AA, MemoryLocation::getForDest(MemSet), - MSSA->getMemoryAccess(MemSet), - MSSA->getMemoryAccess(MemCpy))) { - return false; - } - } else { - // We have already checked that dst up to src_size is not accessed. We - // need to make sure that there are no accesses up to dst_size either. - MemDepResult DstDepInfo = MD->getPointerDependencyFrom( - MemoryLocation::getForDest(MemSet), false, MemCpy->getIterator(), - MemCpy->getParent()); - if (DstDepInfo.getInst() != MemSet) - return false; - } - + if (EnableMemorySSA) { + // We know that dst up to src_size is not written. We now need to make sure + // that dst up to dst_size is not accessed. (If we did not move the memset, + // checking for reads would be sufficient.) + if (accessedBetween(*AA, MemoryLocation::getForDest(MemSet), + MSSA->getMemoryAccess(MemSet), + MSSA->getMemoryAccess(MemCpy))) { + return false; + } + } else { + // We have already checked that dst up to src_size is not accessed. We + // need to make sure that there are no accesses up to dst_size either. + MemDepResult DstDepInfo = MD->getPointerDependencyFrom( + MemoryLocation::getForDest(MemSet), false, MemCpy->getIterator(), + MemCpy->getParent()); + if (DstDepInfo.getInst() != MemSet) + return false; + } + // Use the same i8* dest as the memcpy, killing the memset dest if different. Value *Dest = MemCpy->getRawDest(); Value *DestSize = MemSet->getLength(); Value *SrcSize = MemCpy->getLength(); - if (mayBeVisibleThroughUnwinding(Dest, MemSet, MemCpy)) - return false; - + if (mayBeVisibleThroughUnwinding(Dest, MemSet, MemCpy)) + return false; + // By default, create an unaligned memset. unsigned Align = 1; // If Dest is aligned, and SrcSize is constant, use the minimum alignment @@ -1197,25 +1197,25 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy, Value *SizeDiff = Builder.CreateSub(DestSize, SrcSize); Value *MemsetLen = Builder.CreateSelect( Ule, ConstantInt::getNullValue(DestSize->getType()), SizeDiff); - Instruction *NewMemSet = Builder.CreateMemSet( + Instruction *NewMemSet = Builder.CreateMemSet( Builder.CreateGEP(Dest->getType()->getPointerElementType(), Dest, SrcSize), MemSet->getOperand(1), MemsetLen, MaybeAlign(Align)); - if (MSSAU) { - assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)) && - "MemCpy must be a MemoryDef"); - // The new memset is inserted after the memcpy, but it is known that its - // defining access is the memset about to be removed which immediately - // precedes the memcpy. - auto *LastDef = - cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)); - auto *NewAccess = MSSAU->createMemoryAccessBefore( - NewMemSet, LastDef->getDefiningAccess(), LastDef); - MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true); - } - - eraseInstruction(MemSet); + if (MSSAU) { + assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)) && + "MemCpy must be a MemoryDef"); + // The new memset is inserted after the memcpy, but it is known that its + // defining access is the memset about to be removed which immediately + // precedes the memcpy. + auto *LastDef = + cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)); + auto *NewAccess = MSSAU->createMemoryAccessBefore( + NewMemSet, LastDef->getDefiningAccess(), LastDef); + MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true); + } + + eraseInstruction(MemSet); return true; } @@ -1234,24 +1234,24 @@ static bool hasUndefContents(Instruction *I, ConstantInt *Size) { return false; } -static bool hasUndefContentsMSSA(MemorySSA *MSSA, AliasAnalysis *AA, Value *V, - MemoryDef *Def, ConstantInt *Size) { - if (MSSA->isLiveOnEntryDef(Def)) - return isa<AllocaInst>(getUnderlyingObject(V)); - - if (IntrinsicInst *II = - dyn_cast_or_null<IntrinsicInst>(Def->getMemoryInst())) { - if (II->getIntrinsicID() == Intrinsic::lifetime_start) { - ConstantInt *LTSize = cast<ConstantInt>(II->getArgOperand(0)); - if (AA->isMustAlias(V, II->getArgOperand(1)) && - LTSize->getZExtValue() >= Size->getZExtValue()) - return true; - } - } - - return false; -} - +static bool hasUndefContentsMSSA(MemorySSA *MSSA, AliasAnalysis *AA, Value *V, + MemoryDef *Def, ConstantInt *Size) { + if (MSSA->isLiveOnEntryDef(Def)) + return isa<AllocaInst>(getUnderlyingObject(V)); + + if (IntrinsicInst *II = + dyn_cast_or_null<IntrinsicInst>(Def->getMemoryInst())) { + if (II->getIntrinsicID() == Intrinsic::lifetime_start) { + ConstantInt *LTSize = cast<ConstantInt>(II->getArgOperand(0)); + if (AA->isMustAlias(V, II->getArgOperand(1)) && + LTSize->getZExtValue() >= Size->getZExtValue()) + return true; + } + } + + return false; +} + /// Transform memcpy to memset when its source was just memset. /// In other words, turn: /// \code @@ -1270,7 +1270,7 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy, MemSetInst *MemSet) { // Make sure that memcpy(..., memset(...), ...), that is we are memsetting and // memcpying from the same address. Otherwise it is hard to reason about. - if (!AA->isMustAlias(MemSet->getRawDest(), MemCpy->getRawSource())) + if (!AA->isMustAlias(MemSet->getRawDest(), MemCpy->getRawSource())) return false; // A known memset size is required. @@ -1287,37 +1287,37 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy, // interested in the bytes from MemSetSize..CopySize here, but as we can't // easily represent this location, we use the full 0..CopySize range. MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy); - bool CanReduceSize = false; - if (EnableMemorySSA) { - MemoryUseOrDef *MemSetAccess = MSSA->getMemoryAccess(MemSet); - MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess( - MemSetAccess->getDefiningAccess(), MemCpyLoc); - if (auto *MD = dyn_cast<MemoryDef>(Clobber)) - if (hasUndefContentsMSSA(MSSA, AA, MemCpy->getSource(), MD, CopySize)) - CanReduceSize = true; - } else { - MemDepResult DepInfo = MD->getPointerDependencyFrom( - MemCpyLoc, true, MemSet->getIterator(), MemSet->getParent()); - if (DepInfo.isDef() && hasUndefContents(DepInfo.getInst(), CopySize)) - CanReduceSize = true; - } - - if (!CanReduceSize) + bool CanReduceSize = false; + if (EnableMemorySSA) { + MemoryUseOrDef *MemSetAccess = MSSA->getMemoryAccess(MemSet); + MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess( + MemSetAccess->getDefiningAccess(), MemCpyLoc); + if (auto *MD = dyn_cast<MemoryDef>(Clobber)) + if (hasUndefContentsMSSA(MSSA, AA, MemCpy->getSource(), MD, CopySize)) + CanReduceSize = true; + } else { + MemDepResult DepInfo = MD->getPointerDependencyFrom( + MemCpyLoc, true, MemSet->getIterator(), MemSet->getParent()); + if (DepInfo.isDef() && hasUndefContents(DepInfo.getInst(), CopySize)) + CanReduceSize = true; + } + + if (!CanReduceSize) return false; - CopySize = MemSetSize; + CopySize = MemSetSize; } IRBuilder<> Builder(MemCpy); - Instruction *NewM = - Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1), - CopySize, MaybeAlign(MemCpy->getDestAlignment())); - if (MSSAU) { - auto *LastDef = - cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)); - auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef); - MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true); - } - + Instruction *NewM = + Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1), + CopySize, MaybeAlign(MemCpy->getDestAlignment())); + if (MSSAU) { + auto *LastDef = + cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)); + auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef); + MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true); + } + return true; } @@ -1333,7 +1333,7 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) { // If the source and destination of the memcpy are the same, then zap it. if (M->getSource() == M->getDest()) { ++BBI; - eraseInstruction(M); + eraseInstruction(M); return true; } @@ -1343,157 +1343,157 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) { if (Value *ByteVal = isBytewiseValue(GV->getInitializer(), M->getModule()->getDataLayout())) { IRBuilder<> Builder(M); - Instruction *NewM = - Builder.CreateMemSet(M->getRawDest(), ByteVal, M->getLength(), - MaybeAlign(M->getDestAlignment()), false); - if (MSSAU) { - auto *LastDef = - cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M)); - auto *NewAccess = - MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef); - MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true); - } - - eraseInstruction(M); + Instruction *NewM = + Builder.CreateMemSet(M->getRawDest(), ByteVal, M->getLength(), + MaybeAlign(M->getDestAlignment()), false); + if (MSSAU) { + auto *LastDef = + cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M)); + auto *NewAccess = + MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef); + MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true); + } + + eraseInstruction(M); ++NumCpyToSet; return true; } - if (EnableMemorySSA) { - MemoryUseOrDef *MA = MSSA->getMemoryAccess(M); - MemoryAccess *AnyClobber = MSSA->getWalker()->getClobberingMemoryAccess(MA); - MemoryLocation DestLoc = MemoryLocation::getForDest(M); - const MemoryAccess *DestClobber = - MSSA->getWalker()->getClobberingMemoryAccess(AnyClobber, DestLoc); - - // Try to turn a partially redundant memset + memcpy into - // memcpy + smaller memset. We don't need the memcpy size for this. - // The memcpy most post-dom the memset, so limit this to the same basic - // block. A non-local generalization is likely not worthwhile. - if (auto *MD = dyn_cast<MemoryDef>(DestClobber)) - if (auto *MDep = dyn_cast_or_null<MemSetInst>(MD->getMemoryInst())) - if (DestClobber->getBlock() == M->getParent()) - if (processMemSetMemCpyDependence(M, MDep)) - return true; - - // The optimizations after this point require the memcpy size. - ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength()); - if (!CopySize) return false; - - MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess( - AnyClobber, MemoryLocation::getForSource(M)); - - // There are four possible optimizations we can do for memcpy: - // a) memcpy-memcpy xform which exposes redundance for DSE. - // b) call-memcpy xform for return slot optimization. - // c) memcpy from freshly alloca'd space or space that has just started - // its lifetime copies undefined data, and we can therefore eliminate - // the memcpy in favor of the data that was already at the destination. - // d) memcpy from a just-memset'd source can be turned into memset. - if (auto *MD = dyn_cast<MemoryDef>(SrcClobber)) { - if (Instruction *MI = MD->getMemoryInst()) { - if (auto *C = dyn_cast<CallInst>(MI)) { - // The memcpy must post-dom the call. Limit to the same block for now. - // Additionally, we need to ensure that there are no accesses to dest - // between the call and the memcpy. Accesses to src will be checked - // by performCallSlotOptzn(). - // TODO: Support non-local call-slot optimization? - if (C->getParent() == M->getParent() && - !accessedBetween(*AA, DestLoc, MD, MA)) { - // FIXME: Can we pass in either of dest/src alignment here instead - // of conservatively taking the minimum? - Align Alignment = std::min(M->getDestAlign().valueOrOne(), - M->getSourceAlign().valueOrOne()); - if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(), - CopySize->getZExtValue(), Alignment, C)) { - LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n" - << " call: " << *C << "\n" - << " memcpy: " << *M << "\n"); - eraseInstruction(M); - ++NumMemCpyInstr; - return true; - } - } - } - if (auto *MDep = dyn_cast<MemCpyInst>(MI)) - return processMemCpyMemCpyDependence(M, MDep); - if (auto *MDep = dyn_cast<MemSetInst>(MI)) { - if (performMemCpyToMemSetOptzn(M, MDep)) { - LLVM_DEBUG(dbgs() << "Converted memcpy to memset\n"); - eraseInstruction(M); - ++NumCpyToSet; - return true; - } - } - } - - if (hasUndefContentsMSSA(MSSA, AA, M->getSource(), MD, CopySize)) { - LLVM_DEBUG(dbgs() << "Removed memcpy from undef\n"); - eraseInstruction(M); - ++NumMemCpyInstr; + if (EnableMemorySSA) { + MemoryUseOrDef *MA = MSSA->getMemoryAccess(M); + MemoryAccess *AnyClobber = MSSA->getWalker()->getClobberingMemoryAccess(MA); + MemoryLocation DestLoc = MemoryLocation::getForDest(M); + const MemoryAccess *DestClobber = + MSSA->getWalker()->getClobberingMemoryAccess(AnyClobber, DestLoc); + + // Try to turn a partially redundant memset + memcpy into + // memcpy + smaller memset. We don't need the memcpy size for this. + // The memcpy most post-dom the memset, so limit this to the same basic + // block. A non-local generalization is likely not worthwhile. + if (auto *MD = dyn_cast<MemoryDef>(DestClobber)) + if (auto *MDep = dyn_cast_or_null<MemSetInst>(MD->getMemoryInst())) + if (DestClobber->getBlock() == M->getParent()) + if (processMemSetMemCpyDependence(M, MDep)) + return true; + + // The optimizations after this point require the memcpy size. + ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength()); + if (!CopySize) return false; + + MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess( + AnyClobber, MemoryLocation::getForSource(M)); + + // There are four possible optimizations we can do for memcpy: + // a) memcpy-memcpy xform which exposes redundance for DSE. + // b) call-memcpy xform for return slot optimization. + // c) memcpy from freshly alloca'd space or space that has just started + // its lifetime copies undefined data, and we can therefore eliminate + // the memcpy in favor of the data that was already at the destination. + // d) memcpy from a just-memset'd source can be turned into memset. + if (auto *MD = dyn_cast<MemoryDef>(SrcClobber)) { + if (Instruction *MI = MD->getMemoryInst()) { + if (auto *C = dyn_cast<CallInst>(MI)) { + // The memcpy must post-dom the call. Limit to the same block for now. + // Additionally, we need to ensure that there are no accesses to dest + // between the call and the memcpy. Accesses to src will be checked + // by performCallSlotOptzn(). + // TODO: Support non-local call-slot optimization? + if (C->getParent() == M->getParent() && + !accessedBetween(*AA, DestLoc, MD, MA)) { + // FIXME: Can we pass in either of dest/src alignment here instead + // of conservatively taking the minimum? + Align Alignment = std::min(M->getDestAlign().valueOrOne(), + M->getSourceAlign().valueOrOne()); + if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(), + CopySize->getZExtValue(), Alignment, C)) { + LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n" + << " call: " << *C << "\n" + << " memcpy: " << *M << "\n"); + eraseInstruction(M); + ++NumMemCpyInstr; + return true; + } + } + } + if (auto *MDep = dyn_cast<MemCpyInst>(MI)) + return processMemCpyMemCpyDependence(M, MDep); + if (auto *MDep = dyn_cast<MemSetInst>(MI)) { + if (performMemCpyToMemSetOptzn(M, MDep)) { + LLVM_DEBUG(dbgs() << "Converted memcpy to memset\n"); + eraseInstruction(M); + ++NumCpyToSet; + return true; + } + } + } + + if (hasUndefContentsMSSA(MSSA, AA, M->getSource(), MD, CopySize)) { + LLVM_DEBUG(dbgs() << "Removed memcpy from undef\n"); + eraseInstruction(M); + ++NumMemCpyInstr; return true; } } - } else { - MemDepResult DepInfo = MD->getDependency(M); - - // Try to turn a partially redundant memset + memcpy into - // memcpy + smaller memset. We don't need the memcpy size for this. - if (DepInfo.isClobber()) - if (MemSetInst *MDep = dyn_cast<MemSetInst>(DepInfo.getInst())) - if (processMemSetMemCpyDependence(M, MDep)) - return true; - - // The optimizations after this point require the memcpy size. - ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength()); - if (!CopySize) return false; - - // There are four possible optimizations we can do for memcpy: - // a) memcpy-memcpy xform which exposes redundance for DSE. - // b) call-memcpy xform for return slot optimization. - // c) memcpy from freshly alloca'd space or space that has just started - // its lifetime copies undefined data, and we can therefore eliminate - // the memcpy in favor of the data that was already at the destination. - // d) memcpy from a just-memset'd source can be turned into memset. - if (DepInfo.isClobber()) { - if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) { - // FIXME: Can we pass in either of dest/src alignment here instead - // of conservatively taking the minimum? - Align Alignment = std::min(M->getDestAlign().valueOrOne(), - M->getSourceAlign().valueOrOne()); - if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(), - CopySize->getZExtValue(), Alignment, C)) { - eraseInstruction(M); - ++NumMemCpyInstr; - return true; - } - } + } else { + MemDepResult DepInfo = MD->getDependency(M); + + // Try to turn a partially redundant memset + memcpy into + // memcpy + smaller memset. We don't need the memcpy size for this. + if (DepInfo.isClobber()) + if (MemSetInst *MDep = dyn_cast<MemSetInst>(DepInfo.getInst())) + if (processMemSetMemCpyDependence(M, MDep)) + return true; + + // The optimizations after this point require the memcpy size. + ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength()); + if (!CopySize) return false; + + // There are four possible optimizations we can do for memcpy: + // a) memcpy-memcpy xform which exposes redundance for DSE. + // b) call-memcpy xform for return slot optimization. + // c) memcpy from freshly alloca'd space or space that has just started + // its lifetime copies undefined data, and we can therefore eliminate + // the memcpy in favor of the data that was already at the destination. + // d) memcpy from a just-memset'd source can be turned into memset. + if (DepInfo.isClobber()) { + if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) { + // FIXME: Can we pass in either of dest/src alignment here instead + // of conservatively taking the minimum? + Align Alignment = std::min(M->getDestAlign().valueOrOne(), + M->getSourceAlign().valueOrOne()); + if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(), + CopySize->getZExtValue(), Alignment, C)) { + eraseInstruction(M); + ++NumMemCpyInstr; + return true; + } + } } - MemoryLocation SrcLoc = MemoryLocation::getForSource(M); - MemDepResult SrcDepInfo = MD->getPointerDependencyFrom( - SrcLoc, true, M->getIterator(), M->getParent()); - - if (SrcDepInfo.isClobber()) { - if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst())) - return processMemCpyMemCpyDependence(M, MDep); - } else if (SrcDepInfo.isDef()) { - if (hasUndefContents(SrcDepInfo.getInst(), CopySize)) { - eraseInstruction(M); - ++NumMemCpyInstr; + MemoryLocation SrcLoc = MemoryLocation::getForSource(M); + MemDepResult SrcDepInfo = MD->getPointerDependencyFrom( + SrcLoc, true, M->getIterator(), M->getParent()); + + if (SrcDepInfo.isClobber()) { + if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst())) + return processMemCpyMemCpyDependence(M, MDep); + } else if (SrcDepInfo.isDef()) { + if (hasUndefContents(SrcDepInfo.getInst(), CopySize)) { + eraseInstruction(M); + ++NumMemCpyInstr; return true; } - } - - if (SrcDepInfo.isClobber()) - if (MemSetInst *MDep = dyn_cast<MemSetInst>(SrcDepInfo.getInst())) - if (performMemCpyToMemSetOptzn(M, MDep)) { - eraseInstruction(M); - ++NumCpyToSet; - return true; - } - } - + } + + if (SrcDepInfo.isClobber()) + if (MemSetInst *MDep = dyn_cast<MemSetInst>(SrcDepInfo.getInst())) + if (performMemCpyToMemSetOptzn(M, MDep)) { + eraseInstruction(M); + ++NumCpyToSet; + return true; + } + } + return false; } @@ -1504,8 +1504,8 @@ bool MemCpyOptPass::processMemMove(MemMoveInst *M) { return false; // See if the pointers alias. - if (!AA->isNoAlias(MemoryLocation::getForDest(M), - MemoryLocation::getForSource(M))) + if (!AA->isNoAlias(MemoryLocation::getForDest(M), + MemoryLocation::getForSource(M))) return false; LLVM_DEBUG(dbgs() << "MemCpyOptPass: Optimizing memmove -> memcpy: " << *M @@ -1518,13 +1518,13 @@ bool MemCpyOptPass::processMemMove(MemMoveInst *M) { M->setCalledFunction(Intrinsic::getDeclaration(M->getModule(), Intrinsic::memcpy, ArgTys)); - // For MemorySSA nothing really changes (except that memcpy may imply stricter - // aliasing guarantees). - + // For MemorySSA nothing really changes (except that memcpy may imply stricter + // aliasing guarantees). + // MemDep may have over conservative information about this instruction, just // conservatively flush it from the cache. - if (MD) - MD->removeInstruction(M); + if (MD) + MD->removeInstruction(M); ++NumMoveToCpy; return true; @@ -1537,21 +1537,21 @@ bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) { Value *ByValArg = CB.getArgOperand(ArgNo); Type *ByValTy = cast<PointerType>(ByValArg->getType())->getElementType(); uint64_t ByValSize = DL.getTypeAllocSize(ByValTy); - MemoryLocation Loc(ByValArg, LocationSize::precise(ByValSize)); - MemCpyInst *MDep = nullptr; - if (EnableMemorySSA) { - MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB); - MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess( - CallAccess->getDefiningAccess(), Loc); - if (auto *MD = dyn_cast<MemoryDef>(Clobber)) - MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst()); - } else { - MemDepResult DepInfo = MD->getPointerDependencyFrom( - Loc, true, CB.getIterator(), CB.getParent()); - if (!DepInfo.isClobber()) - return false; - MDep = dyn_cast<MemCpyInst>(DepInfo.getInst()); - } + MemoryLocation Loc(ByValArg, LocationSize::precise(ByValSize)); + MemCpyInst *MDep = nullptr; + if (EnableMemorySSA) { + MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB); + MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess( + CallAccess->getDefiningAccess(), Loc); + if (auto *MD = dyn_cast<MemoryDef>(Clobber)) + MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst()); + } else { + MemDepResult DepInfo = MD->getPointerDependencyFrom( + Loc, true, CB.getIterator(), CB.getParent()); + if (!DepInfo.isClobber()) + return false; + MDep = dyn_cast<MemCpyInst>(DepInfo.getInst()); + } // If the byval argument isn't fed by a memcpy, ignore it. If it is fed by // a memcpy, see if we can byval from the source of the memcpy instead of the @@ -1574,8 +1574,8 @@ bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) { // source of the memcpy to the alignment we need. If we fail, we bail out. MaybeAlign MemDepAlign = MDep->getSourceAlign(); if ((!MemDepAlign || *MemDepAlign < *ByValAlign) && - getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL, &CB, AC, - DT) < *ByValAlign) + getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL, &CB, AC, + DT) < *ByValAlign) return false; // The address space of the memcpy source must match the byval argument @@ -1589,19 +1589,19 @@ bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) { // *b = 42; // foo(*a) // It would be invalid to transform the second memcpy into foo(*b). - if (EnableMemorySSA) { - if (writtenBetween(MSSA, MemoryLocation::getForSource(MDep), - MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(&CB))) - return false; - } else { - // NOTE: This is conservative, it will stop on any read from the source loc, - // not just the defining memcpy. - MemDepResult SourceDep = MD->getPointerDependencyFrom( - MemoryLocation::getForSource(MDep), false, - CB.getIterator(), MDep->getParent()); - if (!SourceDep.isClobber() || SourceDep.getInst() != MDep) - return false; - } + if (EnableMemorySSA) { + if (writtenBetween(MSSA, MemoryLocation::getForSource(MDep), + MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(&CB))) + return false; + } else { + // NOTE: This is conservative, it will stop on any read from the source loc, + // not just the defining memcpy. + MemDepResult SourceDep = MD->getPointerDependencyFrom( + MemoryLocation::getForSource(MDep), false, + CB.getIterator(), MDep->getParent()); + if (!SourceDep.isClobber() || SourceDep.getInst() != MDep) + return false; + } Value *TmpCast = MDep->getSource(); if (MDep->getSource()->getType() != ByValArg->getType()) { @@ -1632,7 +1632,7 @@ bool MemCpyOptPass::iterateOnFunction(Function &F) { // instruction in a BB can't be dominated by a later instruction in the // same BB (which is a scenario that can happen for an unreachable BB that // has itself as a predecessor). - if (!DT->isReachableFromEntry(&BB)) + if (!DT->isReachableFromEntry(&BB)) continue; for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) { @@ -1668,43 +1668,43 @@ bool MemCpyOptPass::iterateOnFunction(Function &F) { } PreservedAnalyses MemCpyOptPass::run(Function &F, FunctionAnalysisManager &AM) { - auto *MD = !EnableMemorySSA ? &AM.getResult<MemoryDependenceAnalysis>(F) - : AM.getCachedResult<MemoryDependenceAnalysis>(F); + auto *MD = !EnableMemorySSA ? &AM.getResult<MemoryDependenceAnalysis>(F) + : AM.getCachedResult<MemoryDependenceAnalysis>(F); auto &TLI = AM.getResult<TargetLibraryAnalysis>(F); - auto *AA = &AM.getResult<AAManager>(F); - auto *AC = &AM.getResult<AssumptionAnalysis>(F); - auto *DT = &AM.getResult<DominatorTreeAnalysis>(F); - auto *MSSA = EnableMemorySSA ? &AM.getResult<MemorySSAAnalysis>(F) - : AM.getCachedResult<MemorySSAAnalysis>(F); - - bool MadeChange = - runImpl(F, MD, &TLI, AA, AC, DT, MSSA ? &MSSA->getMSSA() : nullptr); + auto *AA = &AM.getResult<AAManager>(F); + auto *AC = &AM.getResult<AssumptionAnalysis>(F); + auto *DT = &AM.getResult<DominatorTreeAnalysis>(F); + auto *MSSA = EnableMemorySSA ? &AM.getResult<MemorySSAAnalysis>(F) + : AM.getCachedResult<MemorySSAAnalysis>(F); + + bool MadeChange = + runImpl(F, MD, &TLI, AA, AC, DT, MSSA ? &MSSA->getMSSA() : nullptr); if (!MadeChange) return PreservedAnalyses::all(); PreservedAnalyses PA; PA.preserveSet<CFGAnalyses>(); PA.preserve<GlobalsAA>(); - if (MD) - PA.preserve<MemoryDependenceAnalysis>(); - if (MSSA) - PA.preserve<MemorySSAAnalysis>(); + if (MD) + PA.preserve<MemoryDependenceAnalysis>(); + if (MSSA) + PA.preserve<MemorySSAAnalysis>(); return PA; } -bool MemCpyOptPass::runImpl(Function &F, MemoryDependenceResults *MD_, - TargetLibraryInfo *TLI_, AliasAnalysis *AA_, - AssumptionCache *AC_, DominatorTree *DT_, - MemorySSA *MSSA_) { +bool MemCpyOptPass::runImpl(Function &F, MemoryDependenceResults *MD_, + TargetLibraryInfo *TLI_, AliasAnalysis *AA_, + AssumptionCache *AC_, DominatorTree *DT_, + MemorySSA *MSSA_) { bool MadeChange = false; MD = MD_; TLI = TLI_; - AA = AA_; - AC = AC_; - DT = DT_; - MSSA = MSSA_; - MemorySSAUpdater MSSAU_(MSSA_); - MSSAU = MSSA_ ? &MSSAU_ : nullptr; + AA = AA_; + AC = AC_; + DT = DT_; + MSSA = MSSA_; + MemorySSAUpdater MSSAU_(MSSA_); + MSSAU = MSSA_ ? &MSSAU_ : nullptr; // If we don't have at least memset and memcpy, there is little point of doing // anything here. These are required by a freestanding implementation, so if // even they are disabled, there is no point in trying hard. @@ -1717,9 +1717,9 @@ bool MemCpyOptPass::runImpl(Function &F, MemoryDependenceResults *MD_, MadeChange = true; } - if (MSSA_ && VerifyMemorySSA) - MSSA_->verifyMemorySSA(); - + if (MSSA_ && VerifyMemorySSA) + MSSA_->verifyMemorySSA(); + MD = nullptr; return MadeChange; } @@ -1729,17 +1729,17 @@ bool MemCpyOptLegacyPass::runOnFunction(Function &F) { if (skipFunction(F)) return false; - auto *MDWP = !EnableMemorySSA - ? &getAnalysis<MemoryDependenceWrapperPass>() - : getAnalysisIfAvailable<MemoryDependenceWrapperPass>(); + auto *MDWP = !EnableMemorySSA + ? &getAnalysis<MemoryDependenceWrapperPass>() + : getAnalysisIfAvailable<MemoryDependenceWrapperPass>(); auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); - auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); - auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); - auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - auto *MSSAWP = EnableMemorySSA - ? &getAnalysis<MemorySSAWrapperPass>() - : getAnalysisIfAvailable<MemorySSAWrapperPass>(); - - return Impl.runImpl(F, MDWP ? & MDWP->getMemDep() : nullptr, TLI, AA, AC, DT, - MSSAWP ? &MSSAWP->getMSSA() : nullptr); + auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); + auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); + auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + auto *MSSAWP = EnableMemorySSA + ? &getAnalysis<MemorySSAWrapperPass>() + : getAnalysisIfAvailable<MemorySSAWrapperPass>(); + + return Impl.runImpl(F, MDWP ? & MDWP->getMemDep() : nullptr, TLI, AA, AC, DT, + MSSAWP ? &MSSAWP->getMSSA() : nullptr); } |