diff options
author | shadchin <shadchin@yandex-team.ru> | 2022-02-10 16:44:39 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:44:39 +0300 |
commit | e9656aae26e0358d5378e5b63dcac5c8dbe0e4d0 (patch) | |
tree | 64175d5cadab313b3e7039ebaa06c5bc3295e274 /contrib/libs/llvm12/lib/Transforms/Utils | |
parent | 2598ef1d0aee359b4b6d5fdd1758916d5907d04f (diff) | |
download | ydb-e9656aae26e0358d5378e5b63dcac5c8dbe0e4d0.tar.gz |
Restoring authorship annotation for <shadchin@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/libs/llvm12/lib/Transforms/Utils')
52 files changed, 6878 insertions, 6878 deletions
diff --git a/contrib/libs/llvm12/lib/Transforms/Utils/AssumeBundleBuilder.cpp b/contrib/libs/llvm12/lib/Transforms/Utils/AssumeBundleBuilder.cpp index c1f42de423..3daff3b443 100644 --- a/contrib/libs/llvm12/lib/Transforms/Utils/AssumeBundleBuilder.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Utils/AssumeBundleBuilder.cpp @@ -52,7 +52,7 @@ namespace { bool isUsefullToPreserve(Attribute::AttrKind Kind) { switch (Kind) { case Attribute::NonNull: - case Attribute::NoUndef: + case Attribute::NoUndef: case Attribute::Alignment: case Attribute::Dereferenceable: case Attribute::DereferenceableOrNull: @@ -70,7 +70,7 @@ RetainedKnowledge canonicalizedKnowledge(RetainedKnowledge RK, Module *M) { default: return RK; case Attribute::NonNull: - RK.WasOn = getUnderlyingObject(RK.WasOn); + RK.WasOn = getUnderlyingObject(RK.WasOn); return RK; case Attribute::Alignment: { Value *V = RK.WasOn->stripInBoundsOffsets([&](const Value *Strip) { @@ -146,7 +146,7 @@ struct AssumeBuilderState { if (!RK.WasOn) return true; if (RK.WasOn->getType()->isPointerTy()) { - Value *UnderlyingPtr = getUnderlyingObject(RK.WasOn); + Value *UnderlyingPtr = getUnderlyingObject(RK.WasOn); if (isa<AllocaInst>(UnderlyingPtr) || isa<GlobalValue>(UnderlyingPtr)) return false; } diff --git a/contrib/libs/llvm12/lib/Transforms/Utils/BasicBlockUtils.cpp b/contrib/libs/llvm12/lib/Transforms/Utils/BasicBlockUtils.cpp index faf7a757c3..6bcd42c4c6 100644 --- a/contrib/libs/llvm12/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -105,7 +105,7 @@ void llvm::DeleteDeadBlocks(ArrayRef <BasicBlock *> BBs, DomTreeUpdater *DTU, DetatchDeadBlocks(BBs, DTU ? &Updates : nullptr, KeepOneInputPHIs); if (DTU) - DTU->applyUpdates(Updates); + DTU->applyUpdates(Updates); for (BasicBlock *BB : BBs) if (DTU) @@ -136,10 +136,10 @@ bool llvm::EliminateUnreachableBlocks(Function &F, DomTreeUpdater *DTU, return !DeadBlocks.empty(); } -bool llvm::FoldSingleEntryPHINodes(BasicBlock *BB, +bool llvm::FoldSingleEntryPHINodes(BasicBlock *BB, MemoryDependenceResults *MemDep) { - if (!isa<PHINode>(BB->begin())) - return false; + if (!isa<PHINode>(BB->begin())) + return false; while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) { if (PN->getIncomingValue(0) != PN) @@ -152,7 +152,7 @@ bool llvm::FoldSingleEntryPHINodes(BasicBlock *BB, PN->eraseFromParent(); } - return true; + return true; } bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI, @@ -230,21 +230,21 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, // These dominator edges will be redirected from Pred. std::vector<DominatorTree::UpdateType> Updates; if (DTU) { - SmallSetVector<BasicBlock *, 2> UniqueSuccessors(succ_begin(BB), - succ_end(BB)); - Updates.reserve(1 + (2 * UniqueSuccessors.size())); + SmallSetVector<BasicBlock *, 2> UniqueSuccessors(succ_begin(BB), + succ_end(BB)); + Updates.reserve(1 + (2 * UniqueSuccessors.size())); // Add insert edges first. Experimentally, for the particular case of two // blocks that can be merged, with a single successor and single predecessor // respectively, it is beneficial to have all insert updates first. Deleting // edges first may lead to unreachable blocks, followed by inserting edges // making the blocks reachable again. Such DT updates lead to high compile // times. We add inserts before deletes here to reduce compile time. - for (BasicBlock *UniqueSuccessor : UniqueSuccessors) + for (BasicBlock *UniqueSuccessor : UniqueSuccessors) // This successor of BB may already have PredBB as a predecessor. - if (!llvm::is_contained(successors(PredBB), UniqueSuccessor)) - Updates.push_back({DominatorTree::Insert, PredBB, UniqueSuccessor}); - for (BasicBlock *UniqueSuccessor : UniqueSuccessors) - Updates.push_back({DominatorTree::Delete, BB, UniqueSuccessor}); + if (!llvm::is_contained(successors(PredBB), UniqueSuccessor)) + Updates.push_back({DominatorTree::Insert, PredBB, UniqueSuccessor}); + for (BasicBlock *UniqueSuccessor : UniqueSuccessors) + Updates.push_back({DominatorTree::Delete, BB, UniqueSuccessor}); Updates.push_back({DominatorTree::Delete, PredBB, BB}); } @@ -305,7 +305,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, isa<UnreachableInst>(BB->getTerminator()) && "The successor list of BB isn't empty before " "applying corresponding DTU updates."); - DTU->applyUpdates(Updates); + DTU->applyUpdates(Updates); DTU->deleteBB(BB); } else { BB->eraseFromParent(); // Nuke BB if DTU is nullptr. @@ -497,16 +497,16 @@ void llvm::ReplaceInstWithInst(Instruction *From, Instruction *To) { } BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, DominatorTree *DT, - LoopInfo *LI, MemorySSAUpdater *MSSAU, - const Twine &BBName) { + LoopInfo *LI, MemorySSAUpdater *MSSAU, + const Twine &BBName) { unsigned SuccNum = GetSuccessorNumber(BB, Succ); // If this is a critical edge, let SplitCriticalEdge do it. Instruction *LatchTerm = BB->getTerminator(); if (SplitCriticalEdge( LatchTerm, SuccNum, - CriticalEdgeSplittingOptions(DT, LI, MSSAU).setPreserveLCSSA(), - BBName)) + CriticalEdgeSplittingOptions(DT, LI, MSSAU).setPreserveLCSSA(), + BBName)) return LatchTerm->getSuccessor(SuccNum); // If the edge isn't critical, then BB has a single successor or Succ has a @@ -516,15 +516,15 @@ BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, DominatorTree *DT, // block. assert(SP == BB && "CFG broken"); SP = nullptr; - return SplitBlock(Succ, &Succ->front(), DT, LI, MSSAU, BBName, - /*Before=*/true); + return SplitBlock(Succ, &Succ->front(), DT, LI, MSSAU, BBName, + /*Before=*/true); } // Otherwise, if BB has a single successor, split it at the bottom of the // block. assert(BB->getTerminator()->getNumSuccessors() == 1 && "Should have a single succ!"); - return SplitBlock(BB, BB->getTerminator(), DT, LI, MSSAU, BBName); + return SplitBlock(BB, BB->getTerminator(), DT, LI, MSSAU, BBName); } unsigned @@ -542,16 +542,16 @@ llvm::SplitAllCriticalEdges(Function &F, return NumBroken; } -static BasicBlock *SplitBlockImpl(BasicBlock *Old, Instruction *SplitPt, - DomTreeUpdater *DTU, DominatorTree *DT, - LoopInfo *LI, MemorySSAUpdater *MSSAU, - const Twine &BBName, bool Before) { - if (Before) { - DomTreeUpdater LocalDTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); - return splitBlockBefore(Old, SplitPt, - DTU ? DTU : (DT ? &LocalDTU : nullptr), LI, MSSAU, - BBName); - } +static BasicBlock *SplitBlockImpl(BasicBlock *Old, Instruction *SplitPt, + DomTreeUpdater *DTU, DominatorTree *DT, + LoopInfo *LI, MemorySSAUpdater *MSSAU, + const Twine &BBName, bool Before) { + if (Before) { + DomTreeUpdater LocalDTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); + return splitBlockBefore(Old, SplitPt, + DTU ? DTU : (DT ? &LocalDTU : nullptr), LI, MSSAU, + BBName); + } BasicBlock::iterator SplitIt = SplitPt->getIterator(); while (isa<PHINode>(SplitIt) || SplitIt->isEHPad()) ++SplitIt; @@ -565,21 +565,21 @@ static BasicBlock *SplitBlockImpl(BasicBlock *Old, Instruction *SplitPt, if (Loop *L = LI->getLoopFor(Old)) L->addBasicBlockToLoop(New, *LI); - if (DTU) { - SmallVector<DominatorTree::UpdateType, 8> Updates; + if (DTU) { + SmallVector<DominatorTree::UpdateType, 8> Updates; + // Old dominates New. New node dominates all other nodes dominated by Old. + SmallSetVector<BasicBlock *, 8> UniqueSuccessorsOfOld(succ_begin(New), + succ_end(New)); + Updates.push_back({DominatorTree::Insert, Old, New}); + Updates.reserve(Updates.size() + 2 * UniqueSuccessorsOfOld.size()); + for (BasicBlock *UniqueSuccessorOfOld : UniqueSuccessorsOfOld) { + Updates.push_back({DominatorTree::Insert, New, UniqueSuccessorOfOld}); + Updates.push_back({DominatorTree::Delete, Old, UniqueSuccessorOfOld}); + } + + DTU->applyUpdates(Updates); + } else if (DT) // Old dominates New. New node dominates all other nodes dominated by Old. - SmallSetVector<BasicBlock *, 8> UniqueSuccessorsOfOld(succ_begin(New), - succ_end(New)); - Updates.push_back({DominatorTree::Insert, Old, New}); - Updates.reserve(Updates.size() + 2 * UniqueSuccessorsOfOld.size()); - for (BasicBlock *UniqueSuccessorOfOld : UniqueSuccessorsOfOld) { - Updates.push_back({DominatorTree::Insert, New, UniqueSuccessorOfOld}); - Updates.push_back({DominatorTree::Delete, Old, UniqueSuccessorOfOld}); - } - - DTU->applyUpdates(Updates); - } else if (DT) - // Old dominates New. New node dominates all other nodes dominated by Old. if (DomTreeNode *OldNode = DT->getNode(Old)) { std::vector<DomTreeNode *> Children(OldNode->begin(), OldNode->end()); @@ -596,94 +596,94 @@ static BasicBlock *SplitBlockImpl(BasicBlock *Old, Instruction *SplitPt, return New; } -BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, - DominatorTree *DT, LoopInfo *LI, - MemorySSAUpdater *MSSAU, const Twine &BBName, - bool Before) { - return SplitBlockImpl(Old, SplitPt, /*DTU=*/nullptr, DT, LI, MSSAU, BBName, - Before); -} -BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, - DomTreeUpdater *DTU, LoopInfo *LI, - MemorySSAUpdater *MSSAU, const Twine &BBName, - bool Before) { - return SplitBlockImpl(Old, SplitPt, DTU, /*DT=*/nullptr, LI, MSSAU, BBName, - Before); -} - -BasicBlock *llvm::splitBlockBefore(BasicBlock *Old, Instruction *SplitPt, - DomTreeUpdater *DTU, LoopInfo *LI, - MemorySSAUpdater *MSSAU, - const Twine &BBName) { - - BasicBlock::iterator SplitIt = SplitPt->getIterator(); - while (isa<PHINode>(SplitIt) || SplitIt->isEHPad()) - ++SplitIt; - std::string Name = BBName.str(); - BasicBlock *New = Old->splitBasicBlock( - SplitIt, Name.empty() ? Old->getName() + ".split" : Name, - /* Before=*/true); - - // The new block lives in whichever loop the old one did. This preserves - // LCSSA as well, because we force the split point to be after any PHI nodes. - if (LI) - if (Loop *L = LI->getLoopFor(Old)) - L->addBasicBlockToLoop(New, *LI); - - if (DTU) { - SmallVector<DominatorTree::UpdateType, 8> DTUpdates; - // New dominates Old. The predecessor nodes of the Old node dominate - // New node. - SmallSetVector<BasicBlock *, 8> UniquePredecessorsOfOld(pred_begin(New), - pred_end(New)); - DTUpdates.push_back({DominatorTree::Insert, New, Old}); - DTUpdates.reserve(DTUpdates.size() + 2 * UniquePredecessorsOfOld.size()); - for (BasicBlock *UniquePredecessorOfOld : UniquePredecessorsOfOld) { - DTUpdates.push_back({DominatorTree::Insert, UniquePredecessorOfOld, New}); - DTUpdates.push_back({DominatorTree::Delete, UniquePredecessorOfOld, Old}); - } - - DTU->applyUpdates(DTUpdates); - - // Move MemoryAccesses still tracked in Old, but part of New now. - // Update accesses in successor blocks accordingly. - if (MSSAU) { - MSSAU->applyUpdates(DTUpdates, DTU->getDomTree()); - if (VerifyMemorySSA) - MSSAU->getMemorySSA()->verifyMemorySSA(); - } - } - return New; -} - +BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, + DominatorTree *DT, LoopInfo *LI, + MemorySSAUpdater *MSSAU, const Twine &BBName, + bool Before) { + return SplitBlockImpl(Old, SplitPt, /*DTU=*/nullptr, DT, LI, MSSAU, BBName, + Before); +} +BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, + DomTreeUpdater *DTU, LoopInfo *LI, + MemorySSAUpdater *MSSAU, const Twine &BBName, + bool Before) { + return SplitBlockImpl(Old, SplitPt, DTU, /*DT=*/nullptr, LI, MSSAU, BBName, + Before); +} + +BasicBlock *llvm::splitBlockBefore(BasicBlock *Old, Instruction *SplitPt, + DomTreeUpdater *DTU, LoopInfo *LI, + MemorySSAUpdater *MSSAU, + const Twine &BBName) { + + BasicBlock::iterator SplitIt = SplitPt->getIterator(); + while (isa<PHINode>(SplitIt) || SplitIt->isEHPad()) + ++SplitIt; + std::string Name = BBName.str(); + BasicBlock *New = Old->splitBasicBlock( + SplitIt, Name.empty() ? Old->getName() + ".split" : Name, + /* Before=*/true); + + // The new block lives in whichever loop the old one did. This preserves + // LCSSA as well, because we force the split point to be after any PHI nodes. + if (LI) + if (Loop *L = LI->getLoopFor(Old)) + L->addBasicBlockToLoop(New, *LI); + + if (DTU) { + SmallVector<DominatorTree::UpdateType, 8> DTUpdates; + // New dominates Old. The predecessor nodes of the Old node dominate + // New node. + SmallSetVector<BasicBlock *, 8> UniquePredecessorsOfOld(pred_begin(New), + pred_end(New)); + DTUpdates.push_back({DominatorTree::Insert, New, Old}); + DTUpdates.reserve(DTUpdates.size() + 2 * UniquePredecessorsOfOld.size()); + for (BasicBlock *UniquePredecessorOfOld : UniquePredecessorsOfOld) { + DTUpdates.push_back({DominatorTree::Insert, UniquePredecessorOfOld, New}); + DTUpdates.push_back({DominatorTree::Delete, UniquePredecessorOfOld, Old}); + } + + DTU->applyUpdates(DTUpdates); + + // Move MemoryAccesses still tracked in Old, but part of New now. + // Update accesses in successor blocks accordingly. + if (MSSAU) { + MSSAU->applyUpdates(DTUpdates, DTU->getDomTree()); + if (VerifyMemorySSA) + MSSAU->getMemorySSA()->verifyMemorySSA(); + } + } + return New; +} + /// Update DominatorTree, LoopInfo, and LCCSA analysis information. static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB, ArrayRef<BasicBlock *> Preds, - DomTreeUpdater *DTU, DominatorTree *DT, - LoopInfo *LI, MemorySSAUpdater *MSSAU, + DomTreeUpdater *DTU, DominatorTree *DT, + LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA, bool &HasLoopExit) { // Update dominator tree if available. - if (DTU) { - // Recalculation of DomTree is needed when updating a forward DomTree and - // the Entry BB is replaced. - if (NewBB == &NewBB->getParent()->getEntryBlock() && DTU->hasDomTree()) { - // The entry block was removed and there is no external interface for - // the dominator tree to be notified of this change. In this corner-case - // we recalculate the entire tree. - DTU->recalculate(*NewBB->getParent()); - } else { - // Split block expects NewBB to have a non-empty set of predecessors. - SmallVector<DominatorTree::UpdateType, 8> Updates; - SmallSetVector<BasicBlock *, 8> UniquePreds(Preds.begin(), Preds.end()); - Updates.push_back({DominatorTree::Insert, NewBB, OldBB}); - Updates.reserve(Updates.size() + 2 * UniquePreds.size()); - for (auto *UniquePred : UniquePreds) { - Updates.push_back({DominatorTree::Insert, UniquePred, NewBB}); - Updates.push_back({DominatorTree::Delete, UniquePred, OldBB}); - } - DTU->applyUpdates(Updates); - } - } else if (DT) { + if (DTU) { + // Recalculation of DomTree is needed when updating a forward DomTree and + // the Entry BB is replaced. + if (NewBB == &NewBB->getParent()->getEntryBlock() && DTU->hasDomTree()) { + // The entry block was removed and there is no external interface for + // the dominator tree to be notified of this change. In this corner-case + // we recalculate the entire tree. + DTU->recalculate(*NewBB->getParent()); + } else { + // Split block expects NewBB to have a non-empty set of predecessors. + SmallVector<DominatorTree::UpdateType, 8> Updates; + SmallSetVector<BasicBlock *, 8> UniquePreds(Preds.begin(), Preds.end()); + Updates.push_back({DominatorTree::Insert, NewBB, OldBB}); + Updates.reserve(Updates.size() + 2 * UniquePreds.size()); + for (auto *UniquePred : UniquePreds) { + Updates.push_back({DominatorTree::Insert, UniquePred, NewBB}); + Updates.push_back({DominatorTree::Delete, UniquePred, OldBB}); + } + DTU->applyUpdates(Updates); + } + } else if (DT) { if (OldBB == DT->getRootNode()->getBlock()) { assert(NewBB == &NewBB->getParent()->getEntryBlock()); DT->setNewRoot(NewBB); @@ -701,8 +701,8 @@ static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB, if (!LI) return; - if (DTU && DTU->hasDomTree()) - DT = &DTU->getDomTree(); + if (DTU && DTU->hasDomTree()) + DT = &DTU->getDomTree(); assert(DT && "DT should be available to update LoopInfo!"); Loop *L = LI->getLoopFor(OldBB); @@ -836,17 +836,17 @@ static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB, } } -static void SplitLandingPadPredecessorsImpl( - BasicBlock *OrigBB, ArrayRef<BasicBlock *> Preds, const char *Suffix1, - const char *Suffix2, SmallVectorImpl<BasicBlock *> &NewBBs, - DomTreeUpdater *DTU, DominatorTree *DT, LoopInfo *LI, - MemorySSAUpdater *MSSAU, bool PreserveLCSSA); - -static BasicBlock * -SplitBlockPredecessorsImpl(BasicBlock *BB, ArrayRef<BasicBlock *> Preds, - const char *Suffix, DomTreeUpdater *DTU, - DominatorTree *DT, LoopInfo *LI, - MemorySSAUpdater *MSSAU, bool PreserveLCSSA) { +static void SplitLandingPadPredecessorsImpl( + BasicBlock *OrigBB, ArrayRef<BasicBlock *> Preds, const char *Suffix1, + const char *Suffix2, SmallVectorImpl<BasicBlock *> &NewBBs, + DomTreeUpdater *DTU, DominatorTree *DT, LoopInfo *LI, + MemorySSAUpdater *MSSAU, bool PreserveLCSSA); + +static BasicBlock * +SplitBlockPredecessorsImpl(BasicBlock *BB, ArrayRef<BasicBlock *> Preds, + const char *Suffix, DomTreeUpdater *DTU, + DominatorTree *DT, LoopInfo *LI, + MemorySSAUpdater *MSSAU, bool PreserveLCSSA) { // Do not attempt to split that which cannot be split. if (!BB->canSplitPredecessors()) return nullptr; @@ -857,8 +857,8 @@ SplitBlockPredecessorsImpl(BasicBlock *BB, ArrayRef<BasicBlock *> Preds, SmallVector<BasicBlock*, 2> NewBBs; std::string NewName = std::string(Suffix) + ".split-lp"; - SplitLandingPadPredecessorsImpl(BB, Preds, Suffix, NewName.c_str(), NewBBs, - DTU, DT, LI, MSSAU, PreserveLCSSA); + SplitLandingPadPredecessorsImpl(BB, Preds, Suffix, NewName.c_str(), NewBBs, + DTU, DT, LI, MSSAU, PreserveLCSSA); return NewBBs[0]; } @@ -868,22 +868,22 @@ SplitBlockPredecessorsImpl(BasicBlock *BB, ArrayRef<BasicBlock *> Preds, // The new block unconditionally branches to the old block. BranchInst *BI = BranchInst::Create(BB, NewBB); - - Loop *L = nullptr; - BasicBlock *OldLatch = nullptr; + + Loop *L = nullptr; + BasicBlock *OldLatch = nullptr; // Splitting the predecessors of a loop header creates a preheader block. - if (LI && LI->isLoopHeader(BB)) { - L = LI->getLoopFor(BB); + if (LI && LI->isLoopHeader(BB)) { + L = LI->getLoopFor(BB); // Using the loop start line number prevents debuggers stepping into the // loop body for this instruction. - BI->setDebugLoc(L->getStartLoc()); - - // If BB is the header of the Loop, it is possible that the loop is - // modified, such that the current latch does not remain the latch of the - // loop. If that is the case, the loop metadata from the current latch needs - // to be applied to the new latch. - OldLatch = L->getLoopLatch(); - } else + BI->setDebugLoc(L->getStartLoc()); + + // If BB is the header of the Loop, it is possible that the loop is + // modified, such that the current latch does not remain the latch of the + // loop. If that is the case, the loop metadata from the current latch needs + // to be applied to the new latch. + OldLatch = L->getLoopLatch(); + } else BI->setDebugLoc(BB->getFirstNonPHIOrDbg()->getDebugLoc()); // Move the edges from Preds to point to NewBB instead of BB. @@ -910,7 +910,7 @@ SplitBlockPredecessorsImpl(BasicBlock *BB, ArrayRef<BasicBlock *> Preds, // Update DominatorTree, LoopInfo, and LCCSA analysis information. bool HasLoopExit = false; - UpdateAnalysisInformation(BB, NewBB, Preds, DTU, DT, LI, MSSAU, PreserveLCSSA, + UpdateAnalysisInformation(BB, NewBB, Preds, DTU, DT, LI, MSSAU, PreserveLCSSA, HasLoopExit); if (!Preds.empty()) { @@ -918,41 +918,41 @@ SplitBlockPredecessorsImpl(BasicBlock *BB, ArrayRef<BasicBlock *> Preds, UpdatePHINodes(BB, NewBB, Preds, BI, HasLoopExit); } - if (OldLatch) { - BasicBlock *NewLatch = L->getLoopLatch(); - if (NewLatch != OldLatch) { - MDNode *MD = OldLatch->getTerminator()->getMetadata("llvm.loop"); - NewLatch->getTerminator()->setMetadata("llvm.loop", MD); - OldLatch->getTerminator()->setMetadata("llvm.loop", nullptr); - } - } - + if (OldLatch) { + BasicBlock *NewLatch = L->getLoopLatch(); + if (NewLatch != OldLatch) { + MDNode *MD = OldLatch->getTerminator()->getMetadata("llvm.loop"); + NewLatch->getTerminator()->setMetadata("llvm.loop", MD); + OldLatch->getTerminator()->setMetadata("llvm.loop", nullptr); + } + } + return NewBB; } -BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, - ArrayRef<BasicBlock *> Preds, - const char *Suffix, DominatorTree *DT, - LoopInfo *LI, MemorySSAUpdater *MSSAU, - bool PreserveLCSSA) { - return SplitBlockPredecessorsImpl(BB, Preds, Suffix, /*DTU=*/nullptr, DT, LI, - MSSAU, PreserveLCSSA); -} -BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, - ArrayRef<BasicBlock *> Preds, - const char *Suffix, - DomTreeUpdater *DTU, LoopInfo *LI, - MemorySSAUpdater *MSSAU, - bool PreserveLCSSA) { - return SplitBlockPredecessorsImpl(BB, Preds, Suffix, DTU, - /*DT=*/nullptr, LI, MSSAU, PreserveLCSSA); -} - -static void SplitLandingPadPredecessorsImpl( - BasicBlock *OrigBB, ArrayRef<BasicBlock *> Preds, const char *Suffix1, - const char *Suffix2, SmallVectorImpl<BasicBlock *> &NewBBs, - DomTreeUpdater *DTU, DominatorTree *DT, LoopInfo *LI, - MemorySSAUpdater *MSSAU, bool PreserveLCSSA) { +BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, + ArrayRef<BasicBlock *> Preds, + const char *Suffix, DominatorTree *DT, + LoopInfo *LI, MemorySSAUpdater *MSSAU, + bool PreserveLCSSA) { + return SplitBlockPredecessorsImpl(BB, Preds, Suffix, /*DTU=*/nullptr, DT, LI, + MSSAU, PreserveLCSSA); +} +BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, + ArrayRef<BasicBlock *> Preds, + const char *Suffix, + DomTreeUpdater *DTU, LoopInfo *LI, + MemorySSAUpdater *MSSAU, + bool PreserveLCSSA) { + return SplitBlockPredecessorsImpl(BB, Preds, Suffix, DTU, + /*DT=*/nullptr, LI, MSSAU, PreserveLCSSA); +} + +static void SplitLandingPadPredecessorsImpl( + BasicBlock *OrigBB, ArrayRef<BasicBlock *> Preds, const char *Suffix1, + const char *Suffix2, SmallVectorImpl<BasicBlock *> &NewBBs, + DomTreeUpdater *DTU, DominatorTree *DT, LoopInfo *LI, + MemorySSAUpdater *MSSAU, bool PreserveLCSSA) { assert(OrigBB->isLandingPad() && "Trying to split a non-landing pad!"); // Create a new basic block for OrigBB's predecessors listed in Preds. Insert @@ -977,8 +977,8 @@ static void SplitLandingPadPredecessorsImpl( } bool HasLoopExit = false; - UpdateAnalysisInformation(OrigBB, NewBB1, Preds, DTU, DT, LI, MSSAU, - PreserveLCSSA, HasLoopExit); + UpdateAnalysisInformation(OrigBB, NewBB1, Preds, DTU, DT, LI, MSSAU, + PreserveLCSSA, HasLoopExit); // Update the PHI nodes in OrigBB with the values coming from NewBB1. UpdatePHINodes(OrigBB, NewBB1, Preds, BI1, HasLoopExit); @@ -1013,7 +1013,7 @@ static void SplitLandingPadPredecessorsImpl( // Update DominatorTree, LoopInfo, and LCCSA analysis information. HasLoopExit = false; - UpdateAnalysisInformation(OrigBB, NewBB2, NewBB2Preds, DTU, DT, LI, MSSAU, + UpdateAnalysisInformation(OrigBB, NewBB2, NewBB2Preds, DTU, DT, LI, MSSAU, PreserveLCSSA, HasLoopExit); // Update the PHI nodes in OrigBB with the values coming from NewBB2. @@ -1050,29 +1050,29 @@ static void SplitLandingPadPredecessorsImpl( } } -void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB, - ArrayRef<BasicBlock *> Preds, - const char *Suffix1, const char *Suffix2, - SmallVectorImpl<BasicBlock *> &NewBBs, - DominatorTree *DT, LoopInfo *LI, - MemorySSAUpdater *MSSAU, - bool PreserveLCSSA) { - return SplitLandingPadPredecessorsImpl( - OrigBB, Preds, Suffix1, Suffix2, NewBBs, - /*DTU=*/nullptr, DT, LI, MSSAU, PreserveLCSSA); -} -void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB, - ArrayRef<BasicBlock *> Preds, - const char *Suffix1, const char *Suffix2, - SmallVectorImpl<BasicBlock *> &NewBBs, - DomTreeUpdater *DTU, LoopInfo *LI, - MemorySSAUpdater *MSSAU, - bool PreserveLCSSA) { - return SplitLandingPadPredecessorsImpl(OrigBB, Preds, Suffix1, Suffix2, - NewBBs, DTU, /*DT=*/nullptr, LI, MSSAU, - PreserveLCSSA); -} - +void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB, + ArrayRef<BasicBlock *> Preds, + const char *Suffix1, const char *Suffix2, + SmallVectorImpl<BasicBlock *> &NewBBs, + DominatorTree *DT, LoopInfo *LI, + MemorySSAUpdater *MSSAU, + bool PreserveLCSSA) { + return SplitLandingPadPredecessorsImpl( + OrigBB, Preds, Suffix1, Suffix2, NewBBs, + /*DTU=*/nullptr, DT, LI, MSSAU, PreserveLCSSA); +} +void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB, + ArrayRef<BasicBlock *> Preds, + const char *Suffix1, const char *Suffix2, + SmallVectorImpl<BasicBlock *> &NewBBs, + DomTreeUpdater *DTU, LoopInfo *LI, + MemorySSAUpdater *MSSAU, + bool PreserveLCSSA) { + return SplitLandingPadPredecessorsImpl(OrigBB, Preds, Suffix1, Suffix2, + NewBBs, DTU, /*DT=*/nullptr, LI, MSSAU, + PreserveLCSSA); +} + ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, BasicBlock *Pred, DomTreeUpdater *DTU) { @@ -1132,24 +1132,24 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, return cast<ReturnInst>(NewRet); } -static Instruction * -SplitBlockAndInsertIfThenImpl(Value *Cond, Instruction *SplitBefore, - bool Unreachable, MDNode *BranchWeights, - DomTreeUpdater *DTU, DominatorTree *DT, - LoopInfo *LI, BasicBlock *ThenBlock) { - SmallVector<DominatorTree::UpdateType, 8> Updates; +static Instruction * +SplitBlockAndInsertIfThenImpl(Value *Cond, Instruction *SplitBefore, + bool Unreachable, MDNode *BranchWeights, + DomTreeUpdater *DTU, DominatorTree *DT, + LoopInfo *LI, BasicBlock *ThenBlock) { + SmallVector<DominatorTree::UpdateType, 8> Updates; BasicBlock *Head = SplitBefore->getParent(); BasicBlock *Tail = Head->splitBasicBlock(SplitBefore->getIterator()); - if (DTU) { - SmallSetVector<BasicBlock *, 8> UniqueSuccessorsOfHead(succ_begin(Tail), - succ_end(Tail)); - Updates.push_back({DominatorTree::Insert, Head, Tail}); - Updates.reserve(Updates.size() + 2 * UniqueSuccessorsOfHead.size()); - for (BasicBlock *UniqueSuccessorOfHead : UniqueSuccessorsOfHead) { - Updates.push_back({DominatorTree::Insert, Tail, UniqueSuccessorOfHead}); - Updates.push_back({DominatorTree::Delete, Head, UniqueSuccessorOfHead}); - } - } + if (DTU) { + SmallSetVector<BasicBlock *, 8> UniqueSuccessorsOfHead(succ_begin(Tail), + succ_end(Tail)); + Updates.push_back({DominatorTree::Insert, Head, Tail}); + Updates.reserve(Updates.size() + 2 * UniqueSuccessorsOfHead.size()); + for (BasicBlock *UniqueSuccessorOfHead : UniqueSuccessorsOfHead) { + Updates.push_back({DominatorTree::Insert, Tail, UniqueSuccessorOfHead}); + Updates.push_back({DominatorTree::Delete, Head, UniqueSuccessorOfHead}); + } + } Instruction *HeadOldTerm = Head->getTerminator(); LLVMContext &C = Head->getContext(); Instruction *CheckTerm; @@ -1158,24 +1158,24 @@ SplitBlockAndInsertIfThenImpl(Value *Cond, Instruction *SplitBefore, ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail); if (Unreachable) CheckTerm = new UnreachableInst(C, ThenBlock); - else { + else { CheckTerm = BranchInst::Create(Tail, ThenBlock); - if (DTU) - Updates.push_back({DominatorTree::Insert, ThenBlock, Tail}); - } + if (DTU) + Updates.push_back({DominatorTree::Insert, ThenBlock, Tail}); + } CheckTerm->setDebugLoc(SplitBefore->getDebugLoc()); } else CheckTerm = ThenBlock->getTerminator(); BranchInst *HeadNewTerm = - BranchInst::Create(/*ifTrue*/ ThenBlock, /*ifFalse*/ Tail, Cond); - if (DTU) - Updates.push_back({DominatorTree::Insert, Head, ThenBlock}); + BranchInst::Create(/*ifTrue*/ ThenBlock, /*ifFalse*/ Tail, Cond); + if (DTU) + Updates.push_back({DominatorTree::Insert, Head, ThenBlock}); HeadNewTerm->setMetadata(LLVMContext::MD_prof, BranchWeights); ReplaceInstWithInst(HeadOldTerm, HeadNewTerm); - if (DTU) - DTU->applyUpdates(Updates); - else if (DT) { + if (DTU) + DTU->applyUpdates(Updates); + else if (DT) { if (DomTreeNode *OldNode = DT->getNode(Head)) { std::vector<DomTreeNode *> Children(OldNode->begin(), OldNode->end()); @@ -1201,27 +1201,27 @@ SplitBlockAndInsertIfThenImpl(Value *Cond, Instruction *SplitBefore, return CheckTerm; } -Instruction *llvm::SplitBlockAndInsertIfThen(Value *Cond, - Instruction *SplitBefore, - bool Unreachable, - MDNode *BranchWeights, - DominatorTree *DT, LoopInfo *LI, - BasicBlock *ThenBlock) { - return SplitBlockAndInsertIfThenImpl(Cond, SplitBefore, Unreachable, - BranchWeights, - /*DTU=*/nullptr, DT, LI, ThenBlock); -} -Instruction *llvm::SplitBlockAndInsertIfThen(Value *Cond, - Instruction *SplitBefore, - bool Unreachable, - MDNode *BranchWeights, - DomTreeUpdater *DTU, LoopInfo *LI, - BasicBlock *ThenBlock) { - return SplitBlockAndInsertIfThenImpl(Cond, SplitBefore, Unreachable, - BranchWeights, DTU, /*DT=*/nullptr, LI, - ThenBlock); -} - +Instruction *llvm::SplitBlockAndInsertIfThen(Value *Cond, + Instruction *SplitBefore, + bool Unreachable, + MDNode *BranchWeights, + DominatorTree *DT, LoopInfo *LI, + BasicBlock *ThenBlock) { + return SplitBlockAndInsertIfThenImpl(Cond, SplitBefore, Unreachable, + BranchWeights, + /*DTU=*/nullptr, DT, LI, ThenBlock); +} +Instruction *llvm::SplitBlockAndInsertIfThen(Value *Cond, + Instruction *SplitBefore, + bool Unreachable, + MDNode *BranchWeights, + DomTreeUpdater *DTU, LoopInfo *LI, + BasicBlock *ThenBlock) { + return SplitBlockAndInsertIfThenImpl(Cond, SplitBefore, Unreachable, + BranchWeights, DTU, /*DT=*/nullptr, LI, + ThenBlock); +} + void llvm::SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, @@ -1532,7 +1532,7 @@ BasicBlock *llvm::CreateControlFlowHub( SmallVector<DominatorTree::UpdateType, 16> Updates; if (DTU) { for (auto In : Incoming) { - Updates.push_back({DominatorTree::Insert, In, FirstGuardBlock}); + Updates.push_back({DominatorTree::Insert, In, FirstGuardBlock}); for (auto Succ : successors(In)) { if (Outgoing.count(Succ)) Updates.push_back({DominatorTree::Delete, In, Succ}); diff --git a/contrib/libs/llvm12/lib/Transforms/Utils/BreakCriticalEdges.cpp b/contrib/libs/llvm12/lib/Transforms/Utils/BreakCriticalEdges.cpp index df6a710afe..939a1a3a86 100644 --- a/contrib/libs/llvm12/lib/Transforms/Utils/BreakCriticalEdges.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -134,9 +134,9 @@ static void createPHIsForSplitLoopExit(ArrayRef<BasicBlock *> Preds, } } -BasicBlock *llvm::SplitCriticalEdge(Instruction *TI, unsigned SuccNum, - const CriticalEdgeSplittingOptions &Options, - const Twine &BBName) { +BasicBlock *llvm::SplitCriticalEdge(Instruction *TI, unsigned SuccNum, + const CriticalEdgeSplittingOptions &Options, + const Twine &BBName) { if (!isCriticalEdge(TI, SuccNum, Options.MergeIdenticalEdges)) return nullptr; @@ -158,21 +158,21 @@ BasicBlock *llvm::SplitCriticalEdge(Instruction *TI, unsigned SuccNum, SmallVector<BasicBlock *, 4> LoopPreds; // Check if extra modifications will be required to preserve loop-simplify // form after splitting. If it would require splitting blocks with IndirectBr - // or CallBr terminators, bail out if preserving loop-simplify form is - // requested. + // or CallBr terminators, bail out if preserving loop-simplify form is + // requested. if (LI) { if (Loop *TIL = LI->getLoopFor(TIBB)) { - // The only way that we can break LoopSimplify form by splitting a - // critical edge is if after the split there exists some edge from TIL to - // DestBB *and* the only edge into DestBB from outside of TIL is that of + // The only way that we can break LoopSimplify form by splitting a + // critical edge is if after the split there exists some edge from TIL to + // DestBB *and* the only edge into DestBB from outside of TIL is that of // NewBB. If the first isn't true, then LoopSimplify still holds, NewBB // is the new exit block and it has no non-loop predecessors. If the // second isn't true, then DestBB was not in LoopSimplify form prior to // the split as it had a non-loop predecessor. In both of these cases, // the predecessor must be directly in TIL, not in a subloop, or again // LoopSimplify doesn't hold. - for (BasicBlock *P : predecessors(DestBB)) { + for (BasicBlock *P : predecessors(DestBB)) { if (P == TIBB) continue; // The new block is known. if (LI->getLoopFor(P) != TIL) { @@ -185,10 +185,10 @@ BasicBlock *llvm::SplitCriticalEdge(Instruction *TI, unsigned SuccNum, // Loop-simplify form can be preserved, if we can split all in-loop // predecessors. if (any_of(LoopPreds, [](BasicBlock *Pred) { - const Instruction *T = Pred->getTerminator(); - if (const auto *CBR = dyn_cast<CallBrInst>(T)) - return CBR->getDefaultDest() != Pred; - return isa<IndirectBrInst>(T); + const Instruction *T = Pred->getTerminator(); + if (const auto *CBR = dyn_cast<CallBrInst>(T)) + return CBR->getDefaultDest() != Pred; + return isa<IndirectBrInst>(T); })) { if (Options.PreserveLoopSimplify) return nullptr; @@ -198,13 +198,13 @@ BasicBlock *llvm::SplitCriticalEdge(Instruction *TI, unsigned SuccNum, } // Create a new basic block, linking it into the CFG. - BasicBlock *NewBB = nullptr; - if (BBName.str() != "") - NewBB = BasicBlock::Create(TI->getContext(), BBName); - else - NewBB = BasicBlock::Create(TI->getContext(), TIBB->getName() + "." + - DestBB->getName() + - "_crit_edge"); + BasicBlock *NewBB = nullptr; + if (BBName.str() != "") + NewBB = BasicBlock::Create(TI->getContext(), BBName); + else + NewBB = BasicBlock::Create(TI->getContext(), TIBB->getName() + "." + + DestBB->getName() + + "_crit_edge"); // Create our unconditional branch. BranchInst *NewBI = BranchInst::Create(DestBB, NewBB); NewBI->setDebugLoc(TI->getDebugLoc()); @@ -277,7 +277,7 @@ BasicBlock *llvm::SplitCriticalEdge(Instruction *TI, unsigned SuccNum, SmallVector<DominatorTree::UpdateType, 3> Updates; Updates.push_back({DominatorTree::Insert, TIBB, NewBB}); Updates.push_back({DominatorTree::Insert, NewBB, DestBB}); - if (!llvm::is_contained(successors(TIBB), DestBB)) + if (!llvm::is_contained(successors(TIBB), DestBB)) Updates.push_back({DominatorTree::Delete, TIBB, DestBB}); if (DT) diff --git a/contrib/libs/llvm12/lib/Transforms/Utils/BuildLibCalls.cpp b/contrib/libs/llvm12/lib/Transforms/Utils/BuildLibCalls.cpp index a35a9f6613..dba5403f27 100644 --- a/contrib/libs/llvm12/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Utils/BuildLibCalls.cpp @@ -31,22 +31,22 @@ using namespace llvm; //- Infer Attributes ---------------------------------------------------------// STATISTIC(NumReadNone, "Number of functions inferred as readnone"); -STATISTIC(NumInaccessibleMemOnly, - "Number of functions inferred as inaccessiblememonly"); +STATISTIC(NumInaccessibleMemOnly, + "Number of functions inferred as inaccessiblememonly"); STATISTIC(NumReadOnly, "Number of functions inferred as readonly"); STATISTIC(NumArgMemOnly, "Number of functions inferred as argmemonly"); -STATISTIC(NumInaccessibleMemOrArgMemOnly, - "Number of functions inferred as inaccessiblemem_or_argmemonly"); +STATISTIC(NumInaccessibleMemOrArgMemOnly, + "Number of functions inferred as inaccessiblemem_or_argmemonly"); STATISTIC(NumNoUnwind, "Number of functions inferred as nounwind"); STATISTIC(NumNoCapture, "Number of arguments inferred as nocapture"); -STATISTIC(NumWriteOnlyArg, "Number of arguments inferred as writeonly"); -STATISTIC(NumSExtArg, "Number of arguments inferred as signext"); +STATISTIC(NumWriteOnlyArg, "Number of arguments inferred as writeonly"); +STATISTIC(NumSExtArg, "Number of arguments inferred as signext"); STATISTIC(NumReadOnlyArg, "Number of arguments inferred as readonly"); STATISTIC(NumNoAlias, "Number of function returns inferred as noalias"); -STATISTIC(NumNoUndef, "Number of function returns inferred as noundef returns"); +STATISTIC(NumNoUndef, "Number of function returns inferred as noundef returns"); STATISTIC(NumNonNull, "Number of function returns inferred as nonnull returns"); STATISTIC(NumReturnedArg, "Number of arguments inferred as returned"); -STATISTIC(NumWillReturn, "Number of functions inferred as willreturn"); +STATISTIC(NumWillReturn, "Number of functions inferred as willreturn"); static bool setDoesNotAccessMemory(Function &F) { if (F.doesNotAccessMemory()) @@ -56,14 +56,14 @@ static bool setDoesNotAccessMemory(Function &F) { return true; } -static bool setOnlyAccessesInaccessibleMemory(Function &F) { - if (F.onlyAccessesInaccessibleMemory()) - return false; - F.setOnlyAccessesInaccessibleMemory(); - ++NumInaccessibleMemOnly; - return true; -} - +static bool setOnlyAccessesInaccessibleMemory(Function &F) { + if (F.onlyAccessesInaccessibleMemory()) + return false; + F.setOnlyAccessesInaccessibleMemory(); + ++NumInaccessibleMemOnly; + return true; +} + static bool setOnlyReadsMemory(Function &F) { if (F.onlyReadsMemory()) return false; @@ -80,14 +80,14 @@ static bool setOnlyAccessesArgMemory(Function &F) { return true; } -static bool setOnlyAccessesInaccessibleMemOrArgMem(Function &F) { - if (F.onlyAccessesInaccessibleMemOrArgMem()) - return false; - F.setOnlyAccessesInaccessibleMemOrArgMem(); - ++NumInaccessibleMemOrArgMemOnly; - return true; -} - +static bool setOnlyAccessesInaccessibleMemOrArgMem(Function &F) { + if (F.onlyAccessesInaccessibleMemOrArgMem()) + return false; + F.setOnlyAccessesInaccessibleMemOrArgMem(); + ++NumInaccessibleMemOrArgMemOnly; + return true; +} + static bool setDoesNotThrow(Function &F) { if (F.doesNotThrow()) return false; @@ -128,48 +128,48 @@ static bool setOnlyReadsMemory(Function &F, unsigned ArgNo) { return true; } -static bool setOnlyWritesMemory(Function &F, unsigned ArgNo) { - if (F.hasParamAttribute(ArgNo, Attribute::WriteOnly)) +static bool setOnlyWritesMemory(Function &F, unsigned ArgNo) { + if (F.hasParamAttribute(ArgNo, Attribute::WriteOnly)) return false; - F.addParamAttr(ArgNo, Attribute::WriteOnly); - ++NumWriteOnlyArg; + F.addParamAttr(ArgNo, Attribute::WriteOnly); + ++NumWriteOnlyArg; return true; } -static bool setSignExtendedArg(Function &F, unsigned ArgNo) { - if (F.hasParamAttribute(ArgNo, Attribute::SExt)) - return false; - F.addParamAttr(ArgNo, Attribute::SExt); - ++NumSExtArg; - return true; -} - -static bool setRetNoUndef(Function &F) { - if (!F.getReturnType()->isVoidTy() && - !F.hasAttribute(AttributeList::ReturnIndex, Attribute::NoUndef)) { - F.addAttribute(AttributeList::ReturnIndex, Attribute::NoUndef); - ++NumNoUndef; - return true; - } - return false; -} - -static bool setArgsNoUndef(Function &F) { - bool Changed = false; - for (unsigned ArgNo = 0; ArgNo < F.arg_size(); ++ArgNo) { - if (!F.hasParamAttribute(ArgNo, Attribute::NoUndef)) { - F.addParamAttr(ArgNo, Attribute::NoUndef); - ++NumNoUndef; - Changed = true; - } - } - return Changed; -} - -static bool setRetAndArgsNoUndef(Function &F) { - return setRetNoUndef(F) | setArgsNoUndef(F); -} - +static bool setSignExtendedArg(Function &F, unsigned ArgNo) { + if (F.hasParamAttribute(ArgNo, Attribute::SExt)) + return false; + F.addParamAttr(ArgNo, Attribute::SExt); + ++NumSExtArg; + return true; +} + +static bool setRetNoUndef(Function &F) { + if (!F.getReturnType()->isVoidTy() && + !F.hasAttribute(AttributeList::ReturnIndex, Attribute::NoUndef)) { + F.addAttribute(AttributeList::ReturnIndex, Attribute::NoUndef); + ++NumNoUndef; + return true; + } + return false; +} + +static bool setArgsNoUndef(Function &F) { + bool Changed = false; + for (unsigned ArgNo = 0; ArgNo < F.arg_size(); ++ArgNo) { + if (!F.hasParamAttribute(ArgNo, Attribute::NoUndef)) { + F.addParamAttr(ArgNo, Attribute::NoUndef); + ++NumNoUndef; + Changed = true; + } + } + return Changed; +} + +static bool setRetAndArgsNoUndef(Function &F) { + return setRetNoUndef(F) | setArgsNoUndef(F); +} + static bool setReturnedArg(Function &F, unsigned ArgNo) { if (F.hasParamAttribute(ArgNo, Attribute::Returned)) return false; @@ -192,14 +192,14 @@ static bool setDoesNotFreeMemory(Function &F) { return true; } -static bool setWillReturn(Function &F) { - if (F.hasFnAttribute(Attribute::WillReturn)) - return false; - F.addFnAttr(Attribute::WillReturn); - ++NumWillReturn; - return true; -} - +static bool setWillReturn(Function &F) { + if (F.hasFnAttribute(Attribute::WillReturn)) + return false; + F.addFnAttr(Attribute::WillReturn); + ++NumWillReturn; + return true; +} + bool llvm::inferLibFuncAttributes(Module *M, StringRef Name, const TargetLibraryInfo &TLI) { Function *F = M->getFunction(Name); @@ -227,15 +227,15 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setOnlyReadsMemory(F); Changed |= setDoesNotThrow(F); Changed |= setOnlyAccessesArgMemory(F); - Changed |= setWillReturn(F); + Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_strchr: case LibFunc_strrchr: - Changed |= setOnlyAccessesArgMemory(F); + Changed |= setOnlyAccessesArgMemory(F); Changed |= setOnlyReadsMemory(F); Changed |= setDoesNotThrow(F); - Changed |= setWillReturn(F); + Changed |= setWillReturn(F); return Changed; case LibFunc_strtol: case LibFunc_strtod: @@ -245,7 +245,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { case LibFunc_strtold: case LibFunc_strtoull: Changed |= setDoesNotThrow(F); - Changed |= setWillReturn(F); + Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 0); return Changed; @@ -253,23 +253,23 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { case LibFunc_strncpy: case LibFunc_strcat: case LibFunc_strncat: - Changed |= setWillReturn(F); + Changed |= setWillReturn(F); Changed |= setReturnedArg(F, 0); LLVM_FALLTHROUGH; case LibFunc_stpcpy: case LibFunc_stpncpy: - Changed |= setOnlyAccessesArgMemory(F); + Changed |= setOnlyAccessesArgMemory(F); Changed |= setDoesNotThrow(F); - Changed |= setWillReturn(F); + Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyWritesMemory(F, 0); + Changed |= setOnlyWritesMemory(F, 0); Changed |= setOnlyReadsMemory(F, 1); - Changed |= setDoesNotAlias(F, 0); - Changed |= setDoesNotAlias(F, 1); + Changed |= setDoesNotAlias(F, 0); + Changed |= setDoesNotAlias(F, 1); return Changed; case LibFunc_strxfrm: Changed |= setDoesNotThrow(F); - Changed |= setWillReturn(F); + Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); @@ -278,70 +278,70 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { case LibFunc_strspn: // 0,1 case LibFunc_strncmp: // 0,1 case LibFunc_strcspn: // 0,1 - Changed |= setDoesNotThrow(F); - Changed |= setOnlyAccessesArgMemory(F); - Changed |= setWillReturn(F); - Changed |= setOnlyReadsMemory(F); - Changed |= setDoesNotCapture(F, 0); - Changed |= setDoesNotCapture(F, 1); - return Changed; - case LibFunc_strcoll: + Changed |= setDoesNotThrow(F); + Changed |= setOnlyAccessesArgMemory(F); + Changed |= setWillReturn(F); + Changed |= setOnlyReadsMemory(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc_strcoll: case LibFunc_strcasecmp: // 0,1 case LibFunc_strncasecmp: // - // Those functions may depend on the locale, which may be accessed through - // global memory. + // Those functions may depend on the locale, which may be accessed through + // global memory. Changed |= setOnlyReadsMemory(F); Changed |= setDoesNotThrow(F); - Changed |= setWillReturn(F); + Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_strstr: case LibFunc_strpbrk: - Changed |= setOnlyAccessesArgMemory(F); + Changed |= setOnlyAccessesArgMemory(F); Changed |= setOnlyReadsMemory(F); Changed |= setDoesNotThrow(F); - Changed |= setWillReturn(F); + Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_strtok: case LibFunc_strtok_r: Changed |= setDoesNotThrow(F); - Changed |= setWillReturn(F); + Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_scanf: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_setbuf: case LibFunc_setvbuf: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_strdup: case LibFunc_strndup: - Changed |= setOnlyAccessesInaccessibleMemOrArgMem(F); + Changed |= setOnlyAccessesInaccessibleMemOrArgMem(F); Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); - Changed |= setWillReturn(F); + Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_stat: case LibFunc_statvfs: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_sscanf: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); @@ -349,94 +349,94 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_sprintf: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotAlias(F, 0); - Changed |= setOnlyWritesMemory(F, 0); + Changed |= setOnlyWritesMemory(F, 0); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_snprintf: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotAlias(F, 0); - Changed |= setOnlyWritesMemory(F, 0); + Changed |= setOnlyWritesMemory(F, 0); Changed |= setDoesNotCapture(F, 2); Changed |= setOnlyReadsMemory(F, 2); return Changed; case LibFunc_setitimer: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); - Changed |= setWillReturn(F); + Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 1); Changed |= setDoesNotCapture(F, 2); Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_system: // May throw; "system" is a valid pthread cancellation point. - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_malloc: - case LibFunc_vec_malloc: - Changed |= setOnlyAccessesInaccessibleMemory(F); - Changed |= setRetNoUndef(F); + case LibFunc_vec_malloc: + Changed |= setOnlyAccessesInaccessibleMemory(F); + Changed |= setRetNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); - Changed |= setWillReturn(F); + Changed |= setWillReturn(F); return Changed; case LibFunc_memcmp: - Changed |= setOnlyAccessesArgMemory(F); + Changed |= setOnlyAccessesArgMemory(F); Changed |= setOnlyReadsMemory(F); Changed |= setDoesNotThrow(F); - Changed |= setWillReturn(F); + Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_memchr: case LibFunc_memrchr: - Changed |= setDoesNotThrow(F); - Changed |= setOnlyAccessesArgMemory(F); + Changed |= setDoesNotThrow(F); + Changed |= setOnlyAccessesArgMemory(F); Changed |= setOnlyReadsMemory(F); - Changed |= setWillReturn(F); + Changed |= setWillReturn(F); return Changed; case LibFunc_modf: case LibFunc_modff: case LibFunc_modfl: Changed |= setDoesNotThrow(F); - Changed |= setWillReturn(F); + Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_memcpy: - Changed |= setDoesNotThrow(F); - Changed |= setOnlyAccessesArgMemory(F); - Changed |= setWillReturn(F); + Changed |= setDoesNotThrow(F); + Changed |= setOnlyAccessesArgMemory(F); + Changed |= setWillReturn(F); Changed |= setDoesNotAlias(F, 0); - Changed |= setReturnedArg(F, 0); - Changed |= setOnlyWritesMemory(F, 0); + Changed |= setReturnedArg(F, 0); + Changed |= setOnlyWritesMemory(F, 0); Changed |= setDoesNotAlias(F, 1); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_memmove: - Changed |= setDoesNotThrow(F); - Changed |= setOnlyAccessesArgMemory(F); - Changed |= setWillReturn(F); + Changed |= setDoesNotThrow(F); + Changed |= setOnlyAccessesArgMemory(F); + Changed |= setWillReturn(F); Changed |= setReturnedArg(F, 0); - Changed |= setOnlyWritesMemory(F, 0); + Changed |= setOnlyWritesMemory(F, 0); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_mempcpy: case LibFunc_memccpy: - Changed |= setDoesNotThrow(F); - Changed |= setOnlyAccessesArgMemory(F); - Changed |= setWillReturn(F); + Changed |= setDoesNotThrow(F); + Changed |= setOnlyAccessesArgMemory(F); + Changed |= setWillReturn(F); Changed |= setDoesNotAlias(F, 0); - Changed |= setOnlyWritesMemory(F, 0); + Changed |= setOnlyWritesMemory(F, 0); Changed |= setDoesNotAlias(F, 1); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); @@ -445,57 +445,57 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setDoesNotThrow(F); return Changed; case LibFunc_memalign: - Changed |= setOnlyAccessesInaccessibleMemory(F); - Changed |= setRetNoUndef(F); - Changed |= setDoesNotThrow(F); + Changed |= setOnlyAccessesInaccessibleMemory(F); + Changed |= setRetNoUndef(F); + Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); - Changed |= setWillReturn(F); + Changed |= setWillReturn(F); return Changed; case LibFunc_mkdir: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_mktime: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); - Changed |= setWillReturn(F); + Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_realloc: - case LibFunc_vec_realloc: - Changed |= setOnlyAccessesInaccessibleMemOrArgMem(F); - Changed |= setRetNoUndef(F); + case LibFunc_vec_realloc: + Changed |= setOnlyAccessesInaccessibleMemOrArgMem(F); + Changed |= setRetNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); - Changed |= setWillReturn(F); + Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 0); return Changed; - case LibFunc_reallocf: - Changed |= setRetNoUndef(F); - Changed |= setWillReturn(F); - return Changed; + case LibFunc_reallocf: + Changed |= setRetNoUndef(F); + Changed |= setWillReturn(F); + return Changed; case LibFunc_read: // May throw; "read" is a valid pthread cancellation point. - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_rewind: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_rmdir: case LibFunc_remove: case LibFunc_realpath: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_rename: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); @@ -503,7 +503,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_readlink: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); @@ -511,52 +511,52 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { return Changed; case LibFunc_write: // May throw; "write" is a valid pthread cancellation point. - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_aligned_alloc: - Changed |= setOnlyAccessesInaccessibleMemory(F); - Changed |= setRetNoUndef(F); + Changed |= setOnlyAccessesInaccessibleMemory(F); + Changed |= setRetNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); - Changed |= setWillReturn(F); + Changed |= setWillReturn(F); return Changed; case LibFunc_bcopy: Changed |= setDoesNotThrow(F); - Changed |= setOnlyAccessesArgMemory(F); - Changed |= setWillReturn(F); + Changed |= setOnlyAccessesArgMemory(F); + Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 0); - Changed |= setOnlyReadsMemory(F, 0); - Changed |= setOnlyWritesMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 0); + Changed |= setOnlyWritesMemory(F, 1); Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_bcmp: Changed |= setDoesNotThrow(F); - Changed |= setOnlyAccessesArgMemory(F); + Changed |= setOnlyAccessesArgMemory(F); Changed |= setOnlyReadsMemory(F); - Changed |= setWillReturn(F); + Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_bzero: Changed |= setDoesNotThrow(F); - Changed |= setOnlyAccessesArgMemory(F); - Changed |= setWillReturn(F); + Changed |= setOnlyAccessesArgMemory(F); + Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 0); - Changed |= setOnlyWritesMemory(F, 0); + Changed |= setOnlyWritesMemory(F, 0); return Changed; case LibFunc_calloc: - case LibFunc_vec_calloc: - Changed |= setOnlyAccessesInaccessibleMemory(F); - Changed |= setRetNoUndef(F); + case LibFunc_vec_calloc: + Changed |= setOnlyAccessesInaccessibleMemory(F); + Changed |= setRetNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); - Changed |= setWillReturn(F); + Changed |= setWillReturn(F); return Changed; case LibFunc_chmod: case LibFunc_chown: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); @@ -564,7 +564,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { case LibFunc_ctermid: case LibFunc_clearerr: case LibFunc_closedir: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); return Changed; @@ -574,17 +574,17 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { case LibFunc_atoll: Changed |= setDoesNotThrow(F); Changed |= setOnlyReadsMemory(F); - Changed |= setWillReturn(F); + Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_access: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_fopen: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); Changed |= setDoesNotCapture(F, 0); @@ -593,25 +593,25 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_fdopen: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_feof: - Changed |= setRetAndArgsNoUndef(F); - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 0); - return Changed; + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); + return Changed; case LibFunc_free: - case LibFunc_vec_free: - Changed |= setOnlyAccessesInaccessibleMemOrArgMem(F); - Changed |= setArgsNoUndef(F); - Changed |= setDoesNotThrow(F); - Changed |= setWillReturn(F); - Changed |= setDoesNotCapture(F, 0); - return Changed; + case LibFunc_vec_free: + Changed |= setOnlyAccessesInaccessibleMemOrArgMem(F); + Changed |= setArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setWillReturn(F); + Changed |= setDoesNotCapture(F, 0); + return Changed; case LibFunc_fseek: case LibFunc_ftell: case LibFunc_fgetc: @@ -625,12 +625,12 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { case LibFunc_flockfile: case LibFunc_funlockfile: case LibFunc_ftrylockfile: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_ferror: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F); @@ -638,38 +638,38 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { case LibFunc_fputc: case LibFunc_fputc_unlocked: case LibFunc_fstat: - Changed |= setRetAndArgsNoUndef(F); - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - return Changed; + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; case LibFunc_frexp: case LibFunc_frexpf: case LibFunc_frexpl: - Changed |= setDoesNotThrow(F); - Changed |= setWillReturn(F); - Changed |= setDoesNotCapture(F, 1); - return Changed; + Changed |= setDoesNotThrow(F); + Changed |= setWillReturn(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; case LibFunc_fstatvfs: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_fgets: case LibFunc_fgets_unlocked: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 2); return Changed; case LibFunc_fread: case LibFunc_fread_unlocked: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 3); return Changed; case LibFunc_fwrite: case LibFunc_fwrite_unlocked: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 3); @@ -677,7 +677,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { return Changed; case LibFunc_fputs: case LibFunc_fputs_unlocked: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); @@ -685,35 +685,35 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { return Changed; case LibFunc_fscanf: case LibFunc_fprintf: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_fgetpos: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_getc: - Changed |= setRetAndArgsNoUndef(F); - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 0); - return Changed; + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); + return Changed; case LibFunc_getlogin_r: - Changed |= setRetAndArgsNoUndef(F); - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 0); - return Changed; + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); + return Changed; case LibFunc_getc_unlocked: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_getenv: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setOnlyReadsMemory(F); Changed |= setDoesNotCapture(F, 0); @@ -721,45 +721,45 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { case LibFunc_gets: case LibFunc_getchar: case LibFunc_getchar_unlocked: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); return Changed; case LibFunc_getitimer: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_getpwnam: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_ungetc: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_uname: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_unlink: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_unsetenv: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_utime: case LibFunc_utimes: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); @@ -768,36 +768,36 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { return Changed; case LibFunc_putc: case LibFunc_putc_unlocked: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_puts: case LibFunc_printf: case LibFunc_perror: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_pread: // May throw; "pread" is a valid pthread cancellation point. - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_pwrite: // May throw; "pwrite" is a valid pthread cancellation point. - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_putchar: case LibFunc_putchar_unlocked: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); return Changed; case LibFunc_popen: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); Changed |= setDoesNotCapture(F, 0); @@ -806,18 +806,18 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_pclose: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_vscanf: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_vsscanf: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); @@ -825,35 +825,35 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_vfscanf: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_valloc: - Changed |= setOnlyAccessesInaccessibleMemory(F); - Changed |= setRetNoUndef(F); + Changed |= setOnlyAccessesInaccessibleMemory(F); + Changed |= setRetNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); - Changed |= setWillReturn(F); + Changed |= setWillReturn(F); return Changed; case LibFunc_vprintf: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_vfprintf: case LibFunc_vsprintf: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_vsnprintf: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 2); @@ -861,24 +861,24 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { return Changed; case LibFunc_open: // May throw; "open" is a valid pthread cancellation point. - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_opendir: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_tmpfile: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); return Changed; case LibFunc_times: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); return Changed; @@ -890,29 +890,29 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setDoesNotAccessMemory(F); return Changed; case LibFunc_lstat: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_lchown: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_qsort: // May throw; places call through function pointer. - // Cannot give undef pointer/size - Changed |= setRetAndArgsNoUndef(F); + // Cannot give undef pointer/size + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotCapture(F, 3); return Changed; case LibFunc_dunder_strdup: case LibFunc_dunder_strndup: Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); - Changed |= setWillReturn(F); + Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); return Changed; @@ -922,17 +922,17 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_under_IO_getc: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_under_IO_putc: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_dunder_isoc99_scanf: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); @@ -940,14 +940,14 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { case LibFunc_stat64: case LibFunc_lstat64: case LibFunc_statvfs64: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_dunder_isoc99_sscanf: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); @@ -955,7 +955,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_fopen64: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); Changed |= setDoesNotCapture(F, 0); @@ -965,24 +965,24 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { return Changed; case LibFunc_fseeko64: case LibFunc_ftello64: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_tmpfile64: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); return Changed; case LibFunc_fstat64: case LibFunc_fstatvfs64: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_open64: // May throw; "open" is a valid pthread cancellation point. - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); return Changed; @@ -990,7 +990,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { // Currently some platforms have the restrict keyword on the arguments to // gettimeofday. To be conservative, do not add noalias to gettimeofday's // arguments. - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); @@ -1001,155 +1001,155 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { case LibFunc_memset_pattern16: Changed |= setOnlyAccessesArgMemory(F); Changed |= setDoesNotCapture(F, 0); - Changed |= setOnlyWritesMemory(F, 0); + Changed |= setOnlyWritesMemory(F, 0); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); return Changed; - case LibFunc_memset: - Changed |= setOnlyAccessesArgMemory(F); - Changed |= setWillReturn(F); - Changed |= setDoesNotThrow(F); - Changed |= setOnlyWritesMemory(F, 0); - return Changed; + case LibFunc_memset: + Changed |= setOnlyAccessesArgMemory(F); + Changed |= setWillReturn(F); + Changed |= setDoesNotThrow(F); + Changed |= setOnlyWritesMemory(F, 0); + return Changed; // int __nvvm_reflect(const char *) case LibFunc_nvvm_reflect: - Changed |= setRetAndArgsNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotAccessMemory(F); Changed |= setDoesNotThrow(F); return Changed; - case LibFunc_ldexp: - case LibFunc_ldexpf: - case LibFunc_ldexpl: - Changed |= setSignExtendedArg(F, 1); - Changed |= setWillReturn(F); - return Changed; - case LibFunc_abs: - case LibFunc_acos: - case LibFunc_acosf: - case LibFunc_acosh: - case LibFunc_acoshf: - case LibFunc_acoshl: - case LibFunc_acosl: - case LibFunc_asin: - case LibFunc_asinf: - case LibFunc_asinh: - case LibFunc_asinhf: - case LibFunc_asinhl: - case LibFunc_asinl: - case LibFunc_atan: - case LibFunc_atan2: - case LibFunc_atan2f: - case LibFunc_atan2l: - case LibFunc_atanf: - case LibFunc_atanh: - case LibFunc_atanhf: - case LibFunc_atanhl: - case LibFunc_atanl: - case LibFunc_cbrt: - case LibFunc_cbrtf: - case LibFunc_cbrtl: - case LibFunc_ceil: - case LibFunc_ceilf: - case LibFunc_ceill: - case LibFunc_copysign: - case LibFunc_copysignf: - case LibFunc_copysignl: - case LibFunc_cos: - case LibFunc_cosh: - case LibFunc_coshf: - case LibFunc_coshl: - case LibFunc_cosf: - case LibFunc_cosl: - case LibFunc_cospi: - case LibFunc_cospif: - case LibFunc_exp: - case LibFunc_expf: - case LibFunc_expl: - case LibFunc_exp2: - case LibFunc_exp2f: - case LibFunc_exp2l: - case LibFunc_expm1: - case LibFunc_expm1f: - case LibFunc_expm1l: - case LibFunc_fabs: - case LibFunc_fabsf: - case LibFunc_fabsl: - case LibFunc_ffs: - case LibFunc_ffsl: - case LibFunc_ffsll: - case LibFunc_floor: - case LibFunc_floorf: - case LibFunc_floorl: - case LibFunc_fls: - case LibFunc_flsl: - case LibFunc_flsll: - case LibFunc_fmax: - case LibFunc_fmaxf: - case LibFunc_fmaxl: - case LibFunc_fmin: - case LibFunc_fminf: - case LibFunc_fminl: - case LibFunc_fmod: - case LibFunc_fmodf: - case LibFunc_fmodl: - case LibFunc_isascii: - case LibFunc_isdigit: - case LibFunc_labs: - case LibFunc_llabs: - case LibFunc_log: - case LibFunc_log10: - case LibFunc_log10f: - case LibFunc_log10l: - case LibFunc_log1p: - case LibFunc_log1pf: - case LibFunc_log1pl: - case LibFunc_log2: - case LibFunc_log2f: - case LibFunc_log2l: - case LibFunc_logb: - case LibFunc_logbf: - case LibFunc_logbl: - case LibFunc_logf: - case LibFunc_logl: - case LibFunc_nearbyint: - case LibFunc_nearbyintf: - case LibFunc_nearbyintl: - case LibFunc_pow: - case LibFunc_powf: - case LibFunc_powl: - case LibFunc_rint: - case LibFunc_rintf: - case LibFunc_rintl: - case LibFunc_round: - case LibFunc_roundf: - case LibFunc_roundl: - case LibFunc_sin: - case LibFunc_sincospif_stret: - case LibFunc_sinf: - case LibFunc_sinh: - case LibFunc_sinhf: - case LibFunc_sinhl: - case LibFunc_sinl: - case LibFunc_sinpi: - case LibFunc_sinpif: - case LibFunc_sqrt: - case LibFunc_sqrtf: - case LibFunc_sqrtl: - case LibFunc_strnlen: - case LibFunc_tan: - case LibFunc_tanf: - case LibFunc_tanh: - case LibFunc_tanhf: - case LibFunc_tanhl: - case LibFunc_tanl: - case LibFunc_toascii: - case LibFunc_trunc: - case LibFunc_truncf: - case LibFunc_truncl: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotFreeMemory(F); - Changed |= setWillReturn(F); - return Changed; + case LibFunc_ldexp: + case LibFunc_ldexpf: + case LibFunc_ldexpl: + Changed |= setSignExtendedArg(F, 1); + Changed |= setWillReturn(F); + return Changed; + case LibFunc_abs: + case LibFunc_acos: + case LibFunc_acosf: + case LibFunc_acosh: + case LibFunc_acoshf: + case LibFunc_acoshl: + case LibFunc_acosl: + case LibFunc_asin: + case LibFunc_asinf: + case LibFunc_asinh: + case LibFunc_asinhf: + case LibFunc_asinhl: + case LibFunc_asinl: + case LibFunc_atan: + case LibFunc_atan2: + case LibFunc_atan2f: + case LibFunc_atan2l: + case LibFunc_atanf: + case LibFunc_atanh: + case LibFunc_atanhf: + case LibFunc_atanhl: + case LibFunc_atanl: + case LibFunc_cbrt: + case LibFunc_cbrtf: + case LibFunc_cbrtl: + case LibFunc_ceil: + case LibFunc_ceilf: + case LibFunc_ceill: + case LibFunc_copysign: + case LibFunc_copysignf: + case LibFunc_copysignl: + case LibFunc_cos: + case LibFunc_cosh: + case LibFunc_coshf: + case LibFunc_coshl: + case LibFunc_cosf: + case LibFunc_cosl: + case LibFunc_cospi: + case LibFunc_cospif: + case LibFunc_exp: + case LibFunc_expf: + case LibFunc_expl: + case LibFunc_exp2: + case LibFunc_exp2f: + case LibFunc_exp2l: + case LibFunc_expm1: + case LibFunc_expm1f: + case LibFunc_expm1l: + case LibFunc_fabs: + case LibFunc_fabsf: + case LibFunc_fabsl: + case LibFunc_ffs: + case LibFunc_ffsl: + case LibFunc_ffsll: + case LibFunc_floor: + case LibFunc_floorf: + case LibFunc_floorl: + case LibFunc_fls: + case LibFunc_flsl: + case LibFunc_flsll: + case LibFunc_fmax: + case LibFunc_fmaxf: + case LibFunc_fmaxl: + case LibFunc_fmin: + case LibFunc_fminf: + case LibFunc_fminl: + case LibFunc_fmod: + case LibFunc_fmodf: + case LibFunc_fmodl: + case LibFunc_isascii: + case LibFunc_isdigit: + case LibFunc_labs: + case LibFunc_llabs: + case LibFunc_log: + case LibFunc_log10: + case LibFunc_log10f: + case LibFunc_log10l: + case LibFunc_log1p: + case LibFunc_log1pf: + case LibFunc_log1pl: + case LibFunc_log2: + case LibFunc_log2f: + case LibFunc_log2l: + case LibFunc_logb: + case LibFunc_logbf: + case LibFunc_logbl: + case LibFunc_logf: + case LibFunc_logl: + case LibFunc_nearbyint: + case LibFunc_nearbyintf: + case LibFunc_nearbyintl: + case LibFunc_pow: + case LibFunc_powf: + case LibFunc_powl: + case LibFunc_rint: + case LibFunc_rintf: + case LibFunc_rintl: + case LibFunc_round: + case LibFunc_roundf: + case LibFunc_roundl: + case LibFunc_sin: + case LibFunc_sincospif_stret: + case LibFunc_sinf: + case LibFunc_sinh: + case LibFunc_sinhf: + case LibFunc_sinhl: + case LibFunc_sinl: + case LibFunc_sinpi: + case LibFunc_sinpif: + case LibFunc_sqrt: + case LibFunc_sqrtf: + case LibFunc_sqrtl: + case LibFunc_strnlen: + case LibFunc_tan: + case LibFunc_tanf: + case LibFunc_tanh: + case LibFunc_tanhf: + case LibFunc_tanhl: + case LibFunc_tanl: + case LibFunc_toascii: + case LibFunc_trunc: + case LibFunc_truncf: + case LibFunc_truncl: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotFreeMemory(F); + Changed |= setWillReturn(F); + return Changed; default: // FIXME: It'd be really nice to cover all the library functions we're // aware of here. @@ -1298,15 +1298,15 @@ Value *llvm::emitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize, return CI; } -Value *llvm::emitMemPCpy(Value *Dst, Value *Src, Value *Len, IRBuilderBase &B, - const DataLayout &DL, const TargetLibraryInfo *TLI) { - LLVMContext &Context = B.GetInsertBlock()->getContext(); - return emitLibCall( - LibFunc_mempcpy, B.getInt8PtrTy(), - {B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context)}, - {Dst, Src, Len}, B, TLI); -} - +Value *llvm::emitMemPCpy(Value *Dst, Value *Src, Value *Len, IRBuilderBase &B, + const DataLayout &DL, const TargetLibraryInfo *TLI) { + LLVMContext &Context = B.GetInsertBlock()->getContext(); + return emitLibCall( + LibFunc_mempcpy, B.getInt8PtrTy(), + {B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context)}, + {Dst, Src, Len}, B, TLI); +} + Value *llvm::emitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilderBase &B, const DataLayout &DL, const TargetLibraryInfo *TLI) { LLVMContext &Context = B.GetInsertBlock()->getContext(); @@ -1346,7 +1346,7 @@ Value *llvm::emitSNPrintf(Value *Dest, Value *Size, Value *Fmt, ArrayRef<Value *> VariadicArgs, IRBuilderBase &B, const TargetLibraryInfo *TLI) { SmallVector<Value *, 8> Args{castToCStr(Dest, B), Size, castToCStr(Fmt, B)}; - llvm::append_range(Args, VariadicArgs); + llvm::append_range(Args, VariadicArgs); return emitLibCall(LibFunc_snprintf, B.getInt32Ty(), {B.getInt8PtrTy(), Size->getType(), B.getInt8PtrTy()}, Args, B, TLI, /*IsVaArgs=*/true); @@ -1356,7 +1356,7 @@ Value *llvm::emitSPrintf(Value *Dest, Value *Fmt, ArrayRef<Value *> VariadicArgs, IRBuilderBase &B, const TargetLibraryInfo *TLI) { SmallVector<Value *, 8> Args{castToCStr(Dest, B), castToCStr(Fmt, B)}; - llvm::append_range(Args, VariadicArgs); + llvm::append_range(Args, VariadicArgs); return emitLibCall(LibFunc_sprintf, B.getInt32Ty(), {B.getInt8PtrTy(), B.getInt8PtrTy()}, Args, B, TLI, /*IsVaArgs=*/true); @@ -1464,15 +1464,15 @@ Value *llvm::emitUnaryFloatFnCall(Value *Op, const TargetLibraryInfo *TLI, static Value *emitBinaryFloatFnCallHelper(Value *Op1, Value *Op2, StringRef Name, IRBuilderBase &B, - const AttributeList &Attrs, - const TargetLibraryInfo *TLI = nullptr) { + const AttributeList &Attrs, + const TargetLibraryInfo *TLI = nullptr) { assert((Name != "") && "Must specify Name to emitBinaryFloatFnCall"); Module *M = B.GetInsertBlock()->getModule(); FunctionCallee Callee = M->getOrInsertFunction(Name, Op1->getType(), Op1->getType(), Op2->getType()); - if (TLI != nullptr) - inferLibFuncAttributes(M, Name, *TLI); + if (TLI != nullptr) + inferLibFuncAttributes(M, Name, *TLI); CallInst *CI = B.CreateCall(Callee, { Op1, Op2 }, Name); // The incoming attribute set may have come from a speculatable intrinsic, but @@ -1508,7 +1508,7 @@ Value *llvm::emitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name = getFloatFnName(TLI, Op1->getType(), DoubleFn, FloatFn, LongDoubleFn); - return emitBinaryFloatFnCallHelper(Op1, Op2, Name, B, Attrs, TLI); + return emitBinaryFloatFnCallHelper(Op1, Op2, Name, B, Attrs, TLI); } Value *llvm::emitPutChar(Value *Char, IRBuilderBase &B, diff --git a/contrib/libs/llvm12/lib/Transforms/Utils/CallGraphUpdater.cpp b/contrib/libs/llvm12/lib/Transforms/Utils/CallGraphUpdater.cpp index 7835063f46..b2763900e1 100644 --- a/contrib/libs/llvm12/lib/Transforms/Utils/CallGraphUpdater.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Utils/CallGraphUpdater.cpp @@ -96,12 +96,12 @@ void CallGraphUpdater::reanalyzeFunction(Function &Fn) { } } -void CallGraphUpdater::registerOutlinedFunction(Function &OriginalFn, - Function &NewFn) { +void CallGraphUpdater::registerOutlinedFunction(Function &OriginalFn, + Function &NewFn) { if (CG) CG->addToCallGraph(&NewFn); else if (LCG) - LCG->addSplitFunction(OriginalFn, NewFn); + LCG->addSplitFunction(OriginalFn, NewFn); } void CallGraphUpdater::removeFunction(Function &DeadFn) { diff --git a/contrib/libs/llvm12/lib/Transforms/Utils/CallPromotionUtils.cpp b/contrib/libs/llvm12/lib/Transforms/Utils/CallPromotionUtils.cpp index 0a182983c7..bf08bf2747 100644 --- a/contrib/libs/llvm12/lib/Transforms/Utils/CallPromotionUtils.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Utils/CallPromotionUtils.cpp @@ -112,7 +112,7 @@ static void createRetPHINode(Instruction *OrigInst, Instruction *NewInst, Builder.SetInsertPoint(&MergeBlock->front()); PHINode *Phi = Builder.CreatePHI(OrigInst->getType(), 0); - SmallVector<User *, 16> UsersToUpdate(OrigInst->users()); + SmallVector<User *, 16> UsersToUpdate(OrigInst->users()); for (User *U : UsersToUpdate) U->replaceUsesOfWith(OrigInst, Phi); Phi->addIncoming(OrigInst, OrigInst->getParent()); @@ -163,7 +163,7 @@ static void createRetBitCast(CallBase &CB, Type *RetTy, CastInst **RetBitCast) { // Save the users of the calling instruction. These uses will be changed to // use the bitcast after we create it. - SmallVector<User *, 16> UsersToUpdate(CB.users()); + SmallVector<User *, 16> UsersToUpdate(CB.users()); // Determine an appropriate location to create the bitcast for the return // value. The location depends on if we have a call or invoke instruction. @@ -426,11 +426,11 @@ bool llvm::isLegalToPromote(const CallBase &CB, Function *Callee, } } for (; I < NumArgs; I++) { - // Vararg functions can have more arguments than parameters. + // Vararg functions can have more arguments than parameters. assert(Callee->isVarArg()); if (CB.paramHasAttr(I, Attribute::StructRet)) { - if (FailureReason) - *FailureReason = "SRet arg to vararg function"; + if (FailureReason) + *FailureReason = "SRet arg to vararg function"; return false; } } diff --git a/contrib/libs/llvm12/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp b/contrib/libs/llvm12/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp index 06dfd56a44..1f649fe6c7 100644 --- a/contrib/libs/llvm12/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp @@ -109,7 +109,7 @@ void CanonicalizeFreezeInLoopsImpl::InsertFreezeAndForgetFromSCEV(Use &U) { auto *ValueToFr = U.get(); assert(L->contains(UserI->getParent()) && "Should not process an instruction that isn't inside the loop"); - if (isGuaranteedNotToBeUndefOrPoison(ValueToFr, nullptr, UserI, &DT)) + if (isGuaranteedNotToBeUndefOrPoison(ValueToFr, nullptr, UserI, &DT)) return; LLVM_DEBUG(dbgs() << "canonfr: inserting freeze:\n"); @@ -176,7 +176,7 @@ bool CanonicalizeFreezeInLoopsImpl::run() { assert(StepI && "Step instruction should have been found"); // Drop flags from the step instruction. - if (!isGuaranteedNotToBeUndefOrPoison(StepI, nullptr, StepI, &DT)) { + if (!isGuaranteedNotToBeUndefOrPoison(StepI, nullptr, StepI, &DT)) { LLVM_DEBUG(dbgs() << "canonfr: drop flags: " << *StepI << "\n"); StepI->dropPoisonGeneratingFlags(); SE.forgetValue(StepI); diff --git a/contrib/libs/llvm12/lib/Transforms/Utils/CloneFunction.cpp b/contrib/libs/llvm12/lib/Transforms/Utils/CloneFunction.cpp index cb36aa5521..6ab061510a 100644 --- a/contrib/libs/llvm12/lib/Transforms/Utils/CloneFunction.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Utils/CloneFunction.cpp @@ -27,7 +27,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" -#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -37,8 +37,8 @@ #include <map> using namespace llvm; -#define DEBUG_TYPE "clone-function" - +#define DEBUG_TYPE "clone-function" + /// See comments in Cloning.h. BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix, Function *F, @@ -140,10 +140,10 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, MD[SP].reset(SP); } - // Everything else beyond this point deals with function instructions, - // so if we are dealing with a function declaration, we're done. - if (OldFunc->isDeclaration()) - return; + // Everything else beyond this point deals with function instructions, + // so if we are dealing with a function declaration, we're done. + if (OldFunc->isDeclaration()) + return; // When we remap instructions, we want to avoid duplicating inlined // DISubprograms, so record all subprograms we find as we duplicate @@ -193,19 +193,19 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, for (DIType *Type : DIFinder.types()) VMap.MD()[Type].reset(Type); - // Duplicate the metadata that is attached to the cloned function. - // Subprograms/CUs/types that were already mapped to themselves won't be - // duplicated. - SmallVector<std::pair<unsigned, MDNode *>, 1> MDs; - OldFunc->getAllMetadata(MDs); - for (auto MD : MDs) { - NewFunc->addMetadata( - MD.first, - *MapMetadata(MD.second, VMap, - ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, - TypeMapper, Materializer)); - } - + // Duplicate the metadata that is attached to the cloned function. + // Subprograms/CUs/types that were already mapped to themselves won't be + // duplicated. + SmallVector<std::pair<unsigned, MDNode *>, 1> MDs; + OldFunc->getAllMetadata(MDs); + for (auto MD : MDs) { + NewFunc->addMetadata( + MD.first, + *MapMetadata(MD.second, VMap, + ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, + TypeMapper, Materializer)); + } + // Loop over all of the instructions in the function, fixing up operand // references as we go. This uses VMap to do all the hard work. for (Function::iterator BB = @@ -436,7 +436,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, CodeInfo->OperandBundleCallSites.push_back(NewInst); // Recursively clone any reachable successor blocks. - append_range(ToClone, successors(BB->getTerminator())); + append_range(ToClone, successors(BB->getTerminator())); } if (CodeInfo) { @@ -676,7 +676,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, // Check if this block has become dead during inlining or other // simplifications. Note that the first block will appear dead, as it has // not yet been wired up properly. - if (I != Begin && (pred_empty(&*I) || I->getSinglePredecessor() == &*I)) { + if (I != Begin && (pred_empty(&*I) || I->getSinglePredecessor() == &*I)) { BasicBlock *DeadBB = &*I++; DeleteDeadBlock(DeadBB); continue; @@ -884,116 +884,116 @@ BasicBlock *llvm::DuplicateInstructionsInSplitBetween( return NewBB; } - -void llvm::cloneNoAliasScopes( - ArrayRef<MDNode *> NoAliasDeclScopes, - DenseMap<MDNode *, MDNode *> &ClonedScopes, - StringRef Ext, LLVMContext &Context) { - MDBuilder MDB(Context); - - for (auto *ScopeList : NoAliasDeclScopes) { - for (auto &MDOperand : ScopeList->operands()) { - if (MDNode *MD = dyn_cast<MDNode>(MDOperand)) { - AliasScopeNode SNANode(MD); - - std::string Name; - auto ScopeName = SNANode.getName(); - if (!ScopeName.empty()) - Name = (Twine(ScopeName) + ":" + Ext).str(); - else - Name = std::string(Ext); - - MDNode *NewScope = MDB.createAnonymousAliasScope( - const_cast<MDNode *>(SNANode.getDomain()), Name); - ClonedScopes.insert(std::make_pair(MD, NewScope)); - } - } - } -} - -void llvm::adaptNoAliasScopes( - Instruction *I, const DenseMap<MDNode *, MDNode *> &ClonedScopes, - LLVMContext &Context) { - auto CloneScopeList = [&](const MDNode *ScopeList) -> MDNode * { - bool NeedsReplacement = false; - SmallVector<Metadata *, 8> NewScopeList; - for (auto &MDOp : ScopeList->operands()) { - if (MDNode *MD = dyn_cast<MDNode>(MDOp)) { - if (auto *NewMD = ClonedScopes.lookup(MD)) { - NewScopeList.push_back(NewMD); - NeedsReplacement = true; - continue; - } - NewScopeList.push_back(MD); - } - } - if (NeedsReplacement) - return MDNode::get(Context, NewScopeList); - return nullptr; - }; - - if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(I)) - if (auto *NewScopeList = CloneScopeList(Decl->getScopeList())) - Decl->setScopeList(NewScopeList); - - auto replaceWhenNeeded = [&](unsigned MD_ID) { - if (const MDNode *CSNoAlias = I->getMetadata(MD_ID)) - if (auto *NewScopeList = CloneScopeList(CSNoAlias)) - I->setMetadata(MD_ID, NewScopeList); - }; - replaceWhenNeeded(LLVMContext::MD_noalias); - replaceWhenNeeded(LLVMContext::MD_alias_scope); -} - -void llvm::cloneAndAdaptNoAliasScopes( - ArrayRef<MDNode *> NoAliasDeclScopes, - ArrayRef<BasicBlock *> NewBlocks, LLVMContext &Context, StringRef Ext) { - if (NoAliasDeclScopes.empty()) - return; - - DenseMap<MDNode *, MDNode *> ClonedScopes; - LLVM_DEBUG(dbgs() << "cloneAndAdaptNoAliasScopes: cloning " - << NoAliasDeclScopes.size() << " node(s)\n"); - - cloneNoAliasScopes(NoAliasDeclScopes, ClonedScopes, Ext, Context); - // Identify instructions using metadata that needs adaptation - for (BasicBlock *NewBlock : NewBlocks) - for (Instruction &I : *NewBlock) - adaptNoAliasScopes(&I, ClonedScopes, Context); -} - -void llvm::cloneAndAdaptNoAliasScopes( - ArrayRef<MDNode *> NoAliasDeclScopes, Instruction *IStart, - Instruction *IEnd, LLVMContext &Context, StringRef Ext) { - if (NoAliasDeclScopes.empty()) - return; - - DenseMap<MDNode *, MDNode *> ClonedScopes; - LLVM_DEBUG(dbgs() << "cloneAndAdaptNoAliasScopes: cloning " - << NoAliasDeclScopes.size() << " node(s)\n"); - - cloneNoAliasScopes(NoAliasDeclScopes, ClonedScopes, Ext, Context); - // Identify instructions using metadata that needs adaptation - assert(IStart->getParent() == IEnd->getParent() && "different basic block ?"); - auto ItStart = IStart->getIterator(); - auto ItEnd = IEnd->getIterator(); - ++ItEnd; // IEnd is included, increment ItEnd to get the end of the range - for (auto &I : llvm::make_range(ItStart, ItEnd)) - adaptNoAliasScopes(&I, ClonedScopes, Context); -} - -void llvm::identifyNoAliasScopesToClone( - ArrayRef<BasicBlock *> BBs, SmallVectorImpl<MDNode *> &NoAliasDeclScopes) { - for (BasicBlock *BB : BBs) - for (Instruction &I : *BB) - if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I)) - NoAliasDeclScopes.push_back(Decl->getScopeList()); -} - -void llvm::identifyNoAliasScopesToClone( - BasicBlock::iterator Start, BasicBlock::iterator End, - SmallVectorImpl<MDNode *> &NoAliasDeclScopes) { - for (Instruction &I : make_range(Start, End)) - if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I)) - NoAliasDeclScopes.push_back(Decl->getScopeList()); -} + +void llvm::cloneNoAliasScopes( + ArrayRef<MDNode *> NoAliasDeclScopes, + DenseMap<MDNode *, MDNode *> &ClonedScopes, + StringRef Ext, LLVMContext &Context) { + MDBuilder MDB(Context); + + for (auto *ScopeList : NoAliasDeclScopes) { + for (auto &MDOperand : ScopeList->operands()) { + if (MDNode *MD = dyn_cast<MDNode>(MDOperand)) { + AliasScopeNode SNANode(MD); + + std::string Name; + auto ScopeName = SNANode.getName(); + if (!ScopeName.empty()) + Name = (Twine(ScopeName) + ":" + Ext).str(); + else + Name = std::string(Ext); + + MDNode *NewScope = MDB.createAnonymousAliasScope( + const_cast<MDNode *>(SNANode.getDomain()), Name); + ClonedScopes.insert(std::make_pair(MD, NewScope)); + } + } + } +} + +void llvm::adaptNoAliasScopes( + Instruction *I, const DenseMap<MDNode *, MDNode *> &ClonedScopes, + LLVMContext &Context) { + auto CloneScopeList = [&](const MDNode *ScopeList) -> MDNode * { + bool NeedsReplacement = false; + SmallVector<Metadata *, 8> NewScopeList; + for (auto &MDOp : ScopeList->operands()) { + if (MDNode *MD = dyn_cast<MDNode>(MDOp)) { + if (auto *NewMD = ClonedScopes.lookup(MD)) { + NewScopeList.push_back(NewMD); + NeedsReplacement = true; + continue; + } + NewScopeList.push_back(MD); + } + } + if (NeedsReplacement) + return MDNode::get(Context, NewScopeList); + return nullptr; + }; + + if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(I)) + if (auto *NewScopeList = CloneScopeList(Decl->getScopeList())) + Decl->setScopeList(NewScopeList); + + auto replaceWhenNeeded = [&](unsigned MD_ID) { + if (const MDNode *CSNoAlias = I->getMetadata(MD_ID)) + if (auto *NewScopeList = CloneScopeList(CSNoAlias)) + I->setMetadata(MD_ID, NewScopeList); + }; + replaceWhenNeeded(LLVMContext::MD_noalias); + replaceWhenNeeded(LLVMContext::MD_alias_scope); +} + +void llvm::cloneAndAdaptNoAliasScopes( + ArrayRef<MDNode *> NoAliasDeclScopes, + ArrayRef<BasicBlock *> NewBlocks, LLVMContext &Context, StringRef Ext) { + if (NoAliasDeclScopes.empty()) + return; + + DenseMap<MDNode *, MDNode *> ClonedScopes; + LLVM_DEBUG(dbgs() << "cloneAndAdaptNoAliasScopes: cloning " + << NoAliasDeclScopes.size() << " node(s)\n"); + + cloneNoAliasScopes(NoAliasDeclScopes, ClonedScopes, Ext, Context); + // Identify instructions using metadata that needs adaptation + for (BasicBlock *NewBlock : NewBlocks) + for (Instruction &I : *NewBlock) + adaptNoAliasScopes(&I, ClonedScopes, Context); +} + +void llvm::cloneAndAdaptNoAliasScopes( + ArrayRef<MDNode *> NoAliasDeclScopes, Instruction *IStart, + Instruction *IEnd, LLVMContext &Context, StringRef Ext) { + if (NoAliasDeclScopes.empty()) + return; + + DenseMap<MDNode *, MDNode *> ClonedScopes; + LLVM_DEBUG(dbgs() << "cloneAndAdaptNoAliasScopes: cloning " + << NoAliasDeclScopes.size() << " node(s)\n"); + + cloneNoAliasScopes(NoAliasDeclScopes, ClonedScopes, Ext, Context); + // Identify instructions using metadata that needs adaptation + assert(IStart->getParent() == IEnd->getParent() && "different basic block ?"); + auto ItStart = IStart->getIterator(); + auto ItEnd = IEnd->getIterator(); + ++ItEnd; // IEnd is included, increment ItEnd to get the end of the range + for (auto &I : llvm::make_range(ItStart, ItEnd)) + adaptNoAliasScopes(&I, ClonedScopes, Context); +} + +void llvm::identifyNoAliasScopesToClone( + ArrayRef<BasicBlock *> BBs, SmallVectorImpl<MDNode *> &NoAliasDeclScopes) { + for (BasicBlock *BB : BBs) + for (Instruction &I : *BB) + if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I)) + NoAliasDeclScopes.push_back(Decl->getScopeList()); +} + +void llvm::identifyNoAliasScopesToClone( + BasicBlock::iterator Start, BasicBlock::iterator End, + SmallVectorImpl<MDNode *> &NoAliasDeclScopes) { + for (Instruction &I : make_range(Start, End)) + if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I)) + NoAliasDeclScopes.push_back(Decl->getScopeList()); +} diff --git a/contrib/libs/llvm12/lib/Transforms/Utils/CloneModule.cpp b/contrib/libs/llvm12/lib/Transforms/Utils/CloneModule.cpp index 43e6c4b542..a6327bbf21 100644 --- a/contrib/libs/llvm12/lib/Transforms/Utils/CloneModule.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Utils/CloneModule.cpp @@ -117,14 +117,14 @@ std::unique_ptr<Module> llvm::CloneModule( // for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I) { - GlobalVariable *GV = cast<GlobalVariable>(VMap[&*I]); - - SmallVector<std::pair<unsigned, MDNode *>, 1> MDs; - I->getAllMetadata(MDs); - for (auto MD : MDs) - GV->addMetadata(MD.first, - *MapMetadata(MD.second, VMap, RF_MoveDistinctMDs)); - + GlobalVariable *GV = cast<GlobalVariable>(VMap[&*I]); + + SmallVector<std::pair<unsigned, MDNode *>, 1> MDs; + I->getAllMetadata(MDs); + for (auto MD : MDs) + GV->addMetadata(MD.first, + *MapMetadata(MD.second, VMap, RF_MoveDistinctMDs)); + if (I->isDeclaration()) continue; diff --git a/contrib/libs/llvm12/lib/Transforms/Utils/CodeExtractor.cpp b/contrib/libs/llvm12/lib/Transforms/Utils/CodeExtractor.cpp index 56dcc38ca5..390925a03b 100644 --- a/contrib/libs/llvm12/lib/Transforms/Utils/CodeExtractor.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Utils/CodeExtractor.cpp @@ -535,46 +535,46 @@ void CodeExtractor::findAllocas(const CodeExtractorAnalysisCache &CEAC, continue; } - // Find bitcasts in the outlined region that have lifetime marker users - // outside that region. Replace the lifetime marker use with an - // outside region bitcast to avoid unnecessary alloca/reload instructions - // and extra lifetime markers. - SmallVector<Instruction *, 2> LifetimeBitcastUsers; - for (User *U : AI->users()) { - if (!definedInRegion(Blocks, U)) - continue; - - if (U->stripInBoundsConstantOffsets() != AI) - continue; - - Instruction *Bitcast = cast<Instruction>(U); - for (User *BU : Bitcast->users()) { - IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(BU); - if (!IntrInst) - continue; - - if (!IntrInst->isLifetimeStartOrEnd()) - continue; - - if (definedInRegion(Blocks, IntrInst)) - continue; - - LLVM_DEBUG(dbgs() << "Replace use of extracted region bitcast" - << *Bitcast << " in out-of-region lifetime marker " - << *IntrInst << "\n"); - LifetimeBitcastUsers.push_back(IntrInst); - } - } - - for (Instruction *I : LifetimeBitcastUsers) { - Module *M = AIFunc->getParent(); - LLVMContext &Ctx = M->getContext(); - auto *Int8PtrTy = Type::getInt8PtrTy(Ctx); - CastInst *CastI = - CastInst::CreatePointerCast(AI, Int8PtrTy, "lt.cast", I); - I->replaceUsesOfWith(I->getOperand(1), CastI); - } - + // Find bitcasts in the outlined region that have lifetime marker users + // outside that region. Replace the lifetime marker use with an + // outside region bitcast to avoid unnecessary alloca/reload instructions + // and extra lifetime markers. + SmallVector<Instruction *, 2> LifetimeBitcastUsers; + for (User *U : AI->users()) { + if (!definedInRegion(Blocks, U)) + continue; + + if (U->stripInBoundsConstantOffsets() != AI) + continue; + + Instruction *Bitcast = cast<Instruction>(U); + for (User *BU : Bitcast->users()) { + IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(BU); + if (!IntrInst) + continue; + + if (!IntrInst->isLifetimeStartOrEnd()) + continue; + + if (definedInRegion(Blocks, IntrInst)) + continue; + + LLVM_DEBUG(dbgs() << "Replace use of extracted region bitcast" + << *Bitcast << " in out-of-region lifetime marker " + << *IntrInst << "\n"); + LifetimeBitcastUsers.push_back(IntrInst); + } + } + + for (Instruction *I : LifetimeBitcastUsers) { + Module *M = AIFunc->getParent(); + LLVMContext &Ctx = M->getContext(); + auto *Int8PtrTy = Type::getInt8PtrTy(Ctx); + CastInst *CastI = + CastInst::CreatePointerCast(AI, Int8PtrTy, "lt.cast", I); + I->replaceUsesOfWith(I->getOperand(1), CastI); + } + // Follow any bitcasts. SmallVector<Instruction *, 2> Bitcasts; SmallVector<LifetimeMarkerInfo, 2> BitcastLifetimeInfo; @@ -768,7 +768,7 @@ void CodeExtractor::severSplitPHINodesOfExits( NewBB = BasicBlock::Create(ExitBB->getContext(), ExitBB->getName() + ".split", ExitBB->getParent(), ExitBB); - SmallVector<BasicBlock *, 4> Preds(predecessors(ExitBB)); + SmallVector<BasicBlock *, 4> Preds(predecessors(ExitBB)); for (BasicBlock *PredBB : Preds) if (Blocks.count(PredBB)) PredBB->getTerminator()->replaceUsesOfWith(ExitBB, NewBB); @@ -934,7 +934,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, case Attribute::WriteOnly: case Attribute::ZExt: case Attribute::ImmArg: - case Attribute::ByRef: + case Attribute::ByRef: case Attribute::EndAttrKinds: case Attribute::EmptyKey: case Attribute::TombstoneKey: @@ -942,11 +942,11 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, // Those attributes should be safe to propagate to the extracted function. case Attribute::AlwaysInline: case Attribute::Cold: - case Attribute::Hot: + case Attribute::Hot: case Attribute::NoRecurse: case Attribute::InlineHint: case Attribute::MinSize: - case Attribute::NoCallback: + case Attribute::NoCallback: case Attribute::NoDuplicate: case Attribute::NoFree: case Attribute::NoImplicitFloat: @@ -972,8 +972,8 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, case Attribute::StrictFP: case Attribute::UWTable: case Attribute::NoCfCheck: - case Attribute::MustProgress: - case Attribute::NoProfile: + case Attribute::MustProgress: + case Attribute::NoProfile: break; } @@ -1478,7 +1478,7 @@ static void fixupDebugInfoPostExtraction(Function &OldFunc, Function &NewFunc, // function arguments, as the parameters don't correspond to anything at the // source level. assert(OldSP->getUnit() && "Missing compile unit for subprogram"); - DIBuilder DIB(*OldFunc.getParent(), /*AllowUnresolved=*/false, + DIBuilder DIB(*OldFunc.getParent(), /*AllowUnresolved=*/false, OldSP->getUnit()); auto SPType = DIB.createSubroutineType(DIB.getOrCreateTypeArray(None)); DISubprogram::DISPFlags SPFlags = DISubprogram::SPFlagDefinition | @@ -1549,7 +1549,7 @@ static void fixupDebugInfoPostExtraction(Function &OldFunc, Function &NewFunc, // function. for (Instruction &I : instructions(NewFunc)) { if (const DebugLoc &DL = I.getDebugLoc()) - I.setDebugLoc(DILocation::get(Ctx, DL.getLine(), DL.getCol(), NewSP)); + I.setDebugLoc(DILocation::get(Ctx, DL.getLine(), DL.getCol(), NewSP)); // Loop info metadata may contain line locations. Fix them up. auto updateLoopInfoLoc = [&Ctx, @@ -1560,7 +1560,7 @@ static void fixupDebugInfoPostExtraction(Function &OldFunc, Function &NewFunc, updateLoopMetadataDebugLocations(I, updateLoopInfoLoc); } if (!TheCall.getDebugLoc()) - TheCall.setDebugLoc(DILocation::get(Ctx, 0, 0, OldSP)); + TheCall.setDebugLoc(DILocation::get(Ctx, 0, 0, OldSP)); eraseDebugIntrinsicsWithNonLocalRefs(NewFunc); } @@ -1783,7 +1783,7 @@ bool CodeExtractor::verifyAssumptionCache(const Function &OldFunc, const Function &NewFunc, AssumptionCache *AC) { for (auto AssumeVH : AC->assumptions()) { - auto *I = dyn_cast_or_null<CallInst>(AssumeVH); + auto *I = dyn_cast_or_null<CallInst>(AssumeVH); if (!I) continue; @@ -1795,12 +1795,12 @@ bool CodeExtractor::verifyAssumptionCache(const Function &OldFunc, // that were previously in the old function, but that have now been moved // to the new function. for (auto AffectedValVH : AC->assumptionsFor(I->getOperand(0))) { - auto *AffectedCI = dyn_cast_or_null<CallInst>(AffectedValVH); + auto *AffectedCI = dyn_cast_or_null<CallInst>(AffectedValVH); if (!AffectedCI) continue; if (AffectedCI->getFunction() != &OldFunc) return true; - auto *AssumedInst = cast<Instruction>(AffectedCI->getOperand(0)); + auto *AssumedInst = cast<Instruction>(AffectedCI->getOperand(0)); if (AssumedInst->getFunction() != &OldFunc) return true; } diff --git a/contrib/libs/llvm12/lib/Transforms/Utils/CodeMoverUtils.cpp b/contrib/libs/llvm12/lib/Transforms/Utils/CodeMoverUtils.cpp index c341aadbd1..ce982c7403 100644 --- a/contrib/libs/llvm12/lib/Transforms/Utils/CodeMoverUtils.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Utils/CodeMoverUtils.cpp @@ -355,32 +355,32 @@ bool llvm::isSafeToMoveBefore(Instruction &I, Instruction &InsertPoint, // Check if there exists instructions which may throw, may synchonize, or may // never return, from I to InsertPoint. if (!isSafeToSpeculativelyExecute(&I)) - if (llvm::any_of(InstsToCheck, [](Instruction *I) { - if (I->mayThrow()) - return true; - - const CallBase *CB = dyn_cast<CallBase>(I); - if (!CB) - return false; - if (!CB->hasFnAttr(Attribute::WillReturn)) - return true; - if (!CB->hasFnAttr(Attribute::NoSync)) - return true; - - return false; - })) { + if (llvm::any_of(InstsToCheck, [](Instruction *I) { + if (I->mayThrow()) + return true; + + const CallBase *CB = dyn_cast<CallBase>(I); + if (!CB) + return false; + if (!CB->hasFnAttr(Attribute::WillReturn)) + return true; + if (!CB->hasFnAttr(Attribute::NoSync)) + return true; + + return false; + })) { return reportInvalidCandidate(I, MayThrowException); } // Check if I has any output/flow/anti dependences with instructions from \p // StartInst to \p EndInst. - if (llvm::any_of(InstsToCheck, [&DI, &I](Instruction *CurInst) { - auto DepResult = DI->depends(&I, CurInst, true); - if (DepResult && (DepResult->isOutput() || DepResult->isFlow() || - DepResult->isAnti())) - return true; - return false; - })) + if (llvm::any_of(InstsToCheck, [&DI, &I](Instruction *CurInst) { + auto DepResult = DI->depends(&I, CurInst, true); + if (DepResult && (DepResult->isOutput() || DepResult->isFlow() || + DepResult->isAnti())) + return true; + return false; + })) return reportInvalidCandidate(I, HasDependences); return true; diff --git a/contrib/libs/llvm12/lib/Transforms/Utils/Debugify.cpp b/contrib/libs/llvm12/lib/Transforms/Utils/Debugify.cpp index 816b849506..3e4d53c10d 100644 --- a/contrib/libs/llvm12/lib/Transforms/Utils/Debugify.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Utils/Debugify.cpp @@ -20,7 +20,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" -#include "llvm/IR/PassInstrumentation.h" +#include "llvm/IR/PassInstrumentation.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" @@ -199,18 +199,18 @@ bool llvm::applyDebugifyMetadata( return true; } -static bool applyDebugify(Function &F) { - Module &M = *F.getParent(); - auto FuncIt = F.getIterator(); - return applyDebugifyMetadata(M, make_range(FuncIt, std::next(FuncIt)), - "FunctionDebugify: ", /*ApplyToMF=*/nullptr); -} - -static bool applyDebugify(Module &M) { - return applyDebugifyMetadata(M, M.functions(), - "ModuleDebugify: ", /*ApplyToMF=*/nullptr); -} - +static bool applyDebugify(Function &F) { + Module &M = *F.getParent(); + auto FuncIt = F.getIterator(); + return applyDebugifyMetadata(M, make_range(FuncIt, std::next(FuncIt)), + "FunctionDebugify: ", /*ApplyToMF=*/nullptr); +} + +static bool applyDebugify(Module &M) { + return applyDebugifyMetadata(M, M.functions(), + "ModuleDebugify: ", /*ApplyToMF=*/nullptr); +} + bool llvm::stripDebugifyMetadata(Module &M) { bool Changed = false; @@ -239,7 +239,7 @@ bool llvm::stripDebugifyMetadata(Module &M) { NamedMDNode *NMD = M.getModuleFlagsMetadata(); if (!NMD) return Changed; - SmallVector<MDNode *, 4> Flags(NMD->operands()); + SmallVector<MDNode *, 4> Flags(NMD->operands()); NMD->clearOperands(); for (MDNode *Flag : Flags) { MDString *Key = dyn_cast_or_null<MDString>(Flag->getOperand(1)); @@ -394,7 +394,7 @@ bool checkDebugifyMetadata(Module &M, /// ModulePass for attaching synthetic debug info to everything, used with the /// legacy module pass manager. struct DebugifyModulePass : public ModulePass { - bool runOnModule(Module &M) override { return applyDebugify(M); } + bool runOnModule(Module &M) override { return applyDebugify(M); } DebugifyModulePass() : ModulePass(ID) {} @@ -408,7 +408,7 @@ struct DebugifyModulePass : public ModulePass { /// FunctionPass for attaching synthetic debug info to instructions within a /// single function, used with the legacy module pass manager. struct DebugifyFunctionPass : public FunctionPass { - bool runOnFunction(Function &F) override { return applyDebugify(F); } + bool runOnFunction(Function &F) override { return applyDebugify(F); } DebugifyFunctionPass() : FunctionPass(ID) {} @@ -475,32 +475,32 @@ private: } // end anonymous namespace -void llvm::exportDebugifyStats(StringRef Path, const DebugifyStatsMap &Map) { - std::error_code EC; - raw_fd_ostream OS{Path, EC}; - if (EC) { - errs() << "Could not open file: " << EC.message() << ", " << Path << '\n'; - return; - } - - OS << "Pass Name" << ',' << "# of missing debug values" << ',' - << "# of missing locations" << ',' << "Missing/Expected value ratio" << ',' - << "Missing/Expected location ratio" << '\n'; - for (const auto &Entry : Map) { - StringRef Pass = Entry.first; - DebugifyStatistics Stats = Entry.second; - - OS << Pass << ',' << Stats.NumDbgValuesMissing << ',' - << Stats.NumDbgLocsMissing << ',' << Stats.getMissingValueRatio() << ',' - << Stats.getEmptyLocationRatio() << '\n'; - } -} - -ModulePass *llvm::createDebugifyModulePass() { - return new DebugifyModulePass(); -} - -FunctionPass *llvm::createDebugifyFunctionPass() { +void llvm::exportDebugifyStats(StringRef Path, const DebugifyStatsMap &Map) { + std::error_code EC; + raw_fd_ostream OS{Path, EC}; + if (EC) { + errs() << "Could not open file: " << EC.message() << ", " << Path << '\n'; + return; + } + + OS << "Pass Name" << ',' << "# of missing debug values" << ',' + << "# of missing locations" << ',' << "Missing/Expected value ratio" << ',' + << "Missing/Expected location ratio" << '\n'; + for (const auto &Entry : Map) { + StringRef Pass = Entry.first; + DebugifyStatistics Stats = Entry.second; + + OS << Pass << ',' << Stats.NumDbgValuesMissing << ',' + << Stats.NumDbgLocsMissing << ',' << Stats.getMissingValueRatio() << ',' + << Stats.getEmptyLocationRatio() << '\n'; + } +} + +ModulePass *llvm::createDebugifyModulePass() { + return new DebugifyModulePass(); +} + +FunctionPass *llvm::createDebugifyFunctionPass() { return new DebugifyFunctionPass(); } @@ -510,15 +510,15 @@ PreservedAnalyses NewPMDebugifyPass::run(Module &M, ModuleAnalysisManager &) { return PreservedAnalyses::all(); } -ModulePass *llvm::createCheckDebugifyModulePass(bool Strip, - StringRef NameOfWrappedPass, - DebugifyStatsMap *StatsMap) { +ModulePass *llvm::createCheckDebugifyModulePass(bool Strip, + StringRef NameOfWrappedPass, + DebugifyStatsMap *StatsMap) { return new CheckDebugifyModulePass(Strip, NameOfWrappedPass, StatsMap); } -FunctionPass * -llvm::createCheckDebugifyFunctionPass(bool Strip, StringRef NameOfWrappedPass, - DebugifyStatsMap *StatsMap) { +FunctionPass * +llvm::createCheckDebugifyFunctionPass(bool Strip, StringRef NameOfWrappedPass, + DebugifyStatsMap *StatsMap) { return new CheckDebugifyFunctionPass(Strip, NameOfWrappedPass, StatsMap); } @@ -529,41 +529,41 @@ PreservedAnalyses NewPMCheckDebugifyPass::run(Module &M, return PreservedAnalyses::all(); } -static bool isIgnoredPass(StringRef PassID) { - return isSpecialPass(PassID, {"PassManager", "PassAdaptor", - "AnalysisManagerProxy", "PrintFunctionPass", - "PrintModulePass", "BitcodeWriterPass", - "ThinLTOBitcodeWriterPass", "VerifierPass"}); -} - -void DebugifyEachInstrumentation::registerCallbacks( - PassInstrumentationCallbacks &PIC) { - PIC.registerBeforeNonSkippedPassCallback([](StringRef P, Any IR) { - if (isIgnoredPass(P)) - return; - if (any_isa<const Function *>(IR)) - applyDebugify(*const_cast<Function *>(any_cast<const Function *>(IR))); - else if (any_isa<const Module *>(IR)) - applyDebugify(*const_cast<Module *>(any_cast<const Module *>(IR))); - }); - PIC.registerAfterPassCallback([this](StringRef P, Any IR, - const PreservedAnalyses &PassPA) { - if (isIgnoredPass(P)) - return; - if (any_isa<const Function *>(IR)) { - auto &F = *const_cast<Function *>(any_cast<const Function *>(IR)); - Module &M = *F.getParent(); - auto It = F.getIterator(); - checkDebugifyMetadata(M, make_range(It, std::next(It)), P, - "CheckFunctionDebugify", /*Strip=*/true, &StatsMap); - } else if (any_isa<const Module *>(IR)) { - auto &M = *const_cast<Module *>(any_cast<const Module *>(IR)); - checkDebugifyMetadata(M, M.functions(), P, "CheckModuleDebugify", - /*Strip=*/true, &StatsMap); - } - }); -} - +static bool isIgnoredPass(StringRef PassID) { + return isSpecialPass(PassID, {"PassManager", "PassAdaptor", + "AnalysisManagerProxy", "PrintFunctionPass", + "PrintModulePass", "BitcodeWriterPass", + "ThinLTOBitcodeWriterPass", "VerifierPass"}); +} + +void DebugifyEachInstrumentation::registerCallbacks( + PassInstrumentationCallbacks &PIC) { + PIC.registerBeforeNonSkippedPassCallback([](StringRef P, Any IR) { + if (isIgnoredPass(P)) + return; + if (any_isa<const Function *>(IR)) + applyDebugify(*const_cast<Function *>(any_cast<const Function *>(IR))); + else if (any_isa<const Module *>(IR)) + applyDebugify(*const_cast<Module *>(any_cast<const Module *>(IR))); + }); + PIC.registerAfterPassCallback([this](StringRef P, Any IR, + const PreservedAnalyses &PassPA) { + if (isIgnoredPass(P)) + return; + if (any_isa<const Function *>(IR)) { + auto &F = *const_cast<Function *>(any_cast<const Function *>(IR)); + Module &M = *F.getParent(); + auto It = F.getIterator(); + checkDebugifyMetadata(M, make_range(It, std::next(It)), P, + "CheckFunctionDebugify", /*Strip=*/true, &StatsMap); + } else if (any_isa<const Module *>(IR)) { + auto &M = *const_cast<Module *>(any_cast<const Module *>(IR)); + checkDebugifyMetadata(M, M.functions(), P, "CheckModuleDebugify", + /*Strip=*/true, &StatsMap); + } + }); +} + char DebugifyModulePass::ID = 0; static RegisterPass<DebugifyModulePass> DM("debugify", "Attach debug info to everything"); diff --git a/contrib/libs/llvm12/lib/Transforms/Utils/EntryExitInstrumenter.cpp b/contrib/libs/llvm12/lib/Transforms/Utils/EntryExitInstrumenter.cpp index a68c090a9a..26f8e21952 100644 --- a/contrib/libs/llvm12/lib/Transforms/Utils/EntryExitInstrumenter.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Utils/EntryExitInstrumenter.cpp @@ -83,7 +83,7 @@ static bool runOnFunction(Function &F, bool PostInlining) { if (!EntryFunc.empty()) { DebugLoc DL; if (auto SP = F.getSubprogram()) - DL = DILocation::get(SP->getContext(), SP->getScopeLine(), 0, SP); + DL = DILocation::get(SP->getContext(), SP->getScopeLine(), 0, SP); insertCall(F, EntryFunc, &*F.begin()->getFirstInsertionPt(), DL); Changed = true; @@ -97,14 +97,14 @@ static bool runOnFunction(Function &F, bool PostInlining) { continue; // If T is preceded by a musttail call, that's the real terminator. - if (CallInst *CI = BB.getTerminatingMustTailCall()) - T = CI; + if (CallInst *CI = BB.getTerminatingMustTailCall()) + T = CI; DebugLoc DL; if (DebugLoc TerminatorDL = T->getDebugLoc()) DL = TerminatorDL; else if (auto SP = F.getSubprogram()) - DL = DILocation::get(SP->getContext(), 0, 0, SP); + DL = DILocation::get(SP->getContext(), 0, 0, SP); insertCall(F, ExitFunc, T, DL); Changed = true; diff --git a/contrib/libs/llvm12/lib/Transforms/Utils/EscapeEnumerator.cpp b/contrib/libs/llvm12/lib/Transforms/Utils/EscapeEnumerator.cpp index 9646f29d8f..accedd5b4e 100644 --- a/contrib/libs/llvm12/lib/Transforms/Utils/EscapeEnumerator.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Utils/EscapeEnumerator.cpp @@ -41,8 +41,8 @@ IRBuilder<> *EscapeEnumerator::Next() { if (!isa<ReturnInst>(TI) && !isa<ResumeInst>(TI)) continue; - if (CallInst *CI = CurBB->getTerminatingMustTailCall()) - TI = CI; + if (CallInst *CI = CurBB->getTerminatingMustTailCall()) + TI = CI; Builder.SetInsertPoint(TI); return &Builder; } @@ -56,12 +56,12 @@ IRBuilder<> *EscapeEnumerator::Next() { return nullptr; // Find all 'call' instructions that may throw. - // We cannot tranform calls with musttail tag. + // We cannot tranform calls with musttail tag. SmallVector<Instruction *, 16> Calls; for (BasicBlock &BB : F) for (Instruction &II : BB) if (CallInst *CI = dyn_cast<CallInst>(&II)) - if (!CI->doesNotThrow() && !CI->isMustTailCall()) + if (!CI->doesNotThrow() && !CI->isMustTailCall()) Calls.push_back(CI); if (Calls.empty()) diff --git a/contrib/libs/llvm12/lib/Transforms/Utils/Evaluator.cpp b/contrib/libs/llvm12/lib/Transforms/Utils/Evaluator.cpp index f21a60673a..732b00635e 100644 --- a/contrib/libs/llvm12/lib/Transforms/Utils/Evaluator.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Utils/Evaluator.cpp @@ -183,11 +183,11 @@ evaluateBitcastFromPtr(Constant *Ptr, const DataLayout &DL, std::function<Constant *(Constant *)> Func) { Constant *Val; while (!(Val = Func(Ptr))) { - // If Ty is a non-opaque struct, we can convert the pointer to the struct + // If Ty is a non-opaque struct, we can convert the pointer to the struct // into a pointer to its first member. // FIXME: This could be extended to support arrays as well. Type *Ty = cast<PointerType>(Ptr->getType())->getElementType(); - if (!isa<StructType>(Ty) || cast<StructType>(Ty)->isOpaque()) + if (!isa<StructType>(Ty) || cast<StructType>(Ty)->isOpaque()) break; IntegerType *IdxTy = IntegerType::get(Ty->getContext(), 32); @@ -210,7 +210,7 @@ static Constant *getInitializer(Constant *C) { Constant *Evaluator::ComputeLoadResult(Constant *P) { // If this memory location has been recently stored, use the stored value: it // is the most up-to-date. - auto findMemLoc = [this](Constant *Ptr) { return MutatedMemory.lookup(Ptr); }; + auto findMemLoc = [this](Constant *Ptr) { return MutatedMemory.lookup(Ptr); }; if (Constant *Val = findMemLoc(P)) return Val; @@ -547,10 +547,10 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, LLVM_DEBUG(dbgs() << "Skipping sideeffect intrinsic.\n"); ++CurInst; continue; - } else if (II->getIntrinsicID() == Intrinsic::pseudoprobe) { - LLVM_DEBUG(dbgs() << "Skipping pseudoprobe intrinsic.\n"); - ++CurInst; - continue; + } else if (II->getIntrinsicID() == Intrinsic::pseudoprobe) { + LLVM_DEBUG(dbgs() << "Skipping pseudoprobe intrinsic.\n"); + ++CurInst; + continue; } LLVM_DEBUG(dbgs() << "Unknown intrinsic. Can not evaluate.\n"); diff --git a/contrib/libs/llvm12/lib/Transforms/Utils/FixIrreducible.cpp b/contrib/libs/llvm12/lib/Transforms/Utils/FixIrreducible.cpp index e9f1bf6b6b..44af95eef6 100644 --- a/contrib/libs/llvm12/lib/Transforms/Utils/FixIrreducible.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Utils/FixIrreducible.cpp @@ -66,7 +66,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Utils/FixIrreducible.h" +#include "llvm/Transforms/Utils/FixIrreducible.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/Analysis/LoopIterator.h" #include "llvm/InitializePasses.h" @@ -105,7 +105,7 @@ FunctionPass *llvm::createFixIrreduciblePass() { return new FixIrreducible(); } INITIALIZE_PASS_BEGIN(FixIrreducible, "fix-irreducible", "Convert irreducible control-flow into natural loops", false /* Only looks at CFG */, false /* Analysis Pass */) -INITIALIZE_PASS_DEPENDENCY(LowerSwitchLegacyPass) +INITIALIZE_PASS_DEPENDENCY(LowerSwitchLegacyPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_END(FixIrreducible, "fix-irreducible", @@ -305,7 +305,7 @@ static bool makeReducible(LoopInfo &LI, DominatorTree &DT, Graph &&G) { return Changed; } -static bool FixIrreducibleImpl(Function &F, LoopInfo &LI, DominatorTree &DT) { +static bool FixIrreducibleImpl(Function &F, LoopInfo &LI, DominatorTree &DT) { LLVM_DEBUG(dbgs() << "===== Fix irreducible control-flow in function: " << F.getName() << "\n"); @@ -317,10 +317,10 @@ static bool FixIrreducibleImpl(Function &F, LoopInfo &LI, DominatorTree &DT) { // Any SCCs reduced are now already in the list of top-level loops, so simply // add them all to the worklist. - append_range(WorkList, LI); + append_range(WorkList, LI); while (!WorkList.empty()) { - auto L = WorkList.pop_back_val(); + auto L = WorkList.pop_back_val(); LLVM_DEBUG(dbgs() << "visiting loop with header " << L->getHeader()->getName() << "\n"); Changed |= makeReducible(LI, DT, *L); @@ -331,21 +331,21 @@ static bool FixIrreducibleImpl(Function &F, LoopInfo &LI, DominatorTree &DT) { return Changed; } - -bool FixIrreducible::runOnFunction(Function &F) { - auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); - auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - return FixIrreducibleImpl(F, LI, DT); -} - -PreservedAnalyses FixIrreduciblePass::run(Function &F, - FunctionAnalysisManager &AM) { - auto &LI = AM.getResult<LoopAnalysis>(F); - auto &DT = AM.getResult<DominatorTreeAnalysis>(F); - if (!FixIrreducibleImpl(F, LI, DT)) - return PreservedAnalyses::all(); - PreservedAnalyses PA; - PA.preserve<LoopAnalysis>(); - PA.preserve<DominatorTreeAnalysis>(); - return PA; -} + +bool FixIrreducible::runOnFunction(Function &F) { + auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); + auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + return FixIrreducibleImpl(F, LI, DT); +} + +PreservedAnalyses FixIrreduciblePass::run(Function &F, + FunctionAnalysisManager &AM) { + auto &LI = AM.getResult<LoopAnalysis>(F); + auto &DT = AM.getResult<DominatorTreeAnalysis>(F); + if (!FixIrreducibleImpl(F, LI, DT)) + return PreservedAnalyses::all(); + PreservedAnalyses PA; + PA.preserve<LoopAnalysis>(); + PA.preserve<DominatorTreeAnalysis>(); + return PA; +} diff --git a/contrib/libs/llvm12/lib/Transforms/Utils/FunctionComparator.cpp b/contrib/libs/llvm12/lib/Transforms/Utils/FunctionComparator.cpp index b4d11bd2f4..2696557a71 100644 --- a/contrib/libs/llvm12/lib/Transforms/Utils/FunctionComparator.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Utils/FunctionComparator.cpp @@ -124,17 +124,17 @@ int FunctionComparator::cmpAttrs(const AttributeList L, Type *TyL = LA.getValueAsType(); Type *TyR = RA.getValueAsType(); - if (TyL && TyR) { - if (int Res = cmpTypes(TyL, TyR)) - return Res; - continue; - } + if (TyL && TyR) { + if (int Res = cmpTypes(TyL, TyR)) + return Res; + continue; + } // Two pointers, at least one null, so the comparison result is // independent of the value of a real pointer. - if (int Res = cmpNumbers((uint64_t)TyL, (uint64_t)TyR)) - return Res; - continue; + if (int Res = cmpNumbers((uint64_t)TyL, (uint64_t)TyR)) + return Res; + continue; } if (LA < RA) return -1; @@ -291,7 +291,7 @@ int FunctionComparator::cmpConstants(const Constant *L, switch (L->getValueID()) { case Value::UndefValueVal: - case Value::PoisonValueVal: + case Value::PoisonValueVal: case Value::ConstantTokenNoneVal: return TypesRes; case Value::ConstantIntVal: { @@ -494,13 +494,13 @@ int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const { case Type::ScalableVectorTyID: { auto *STyL = cast<VectorType>(TyL); auto *STyR = cast<VectorType>(TyR); - if (STyL->getElementCount().isScalable() != - STyR->getElementCount().isScalable()) - return cmpNumbers(STyL->getElementCount().isScalable(), - STyR->getElementCount().isScalable()); - if (STyL->getElementCount() != STyR->getElementCount()) - return cmpNumbers(STyL->getElementCount().getKnownMinValue(), - STyR->getElementCount().getKnownMinValue()); + if (STyL->getElementCount().isScalable() != + STyR->getElementCount().isScalable()) + return cmpNumbers(STyL->getElementCount().isScalable(), + STyR->getElementCount().isScalable()); + if (STyL->getElementCount() != STyR->getElementCount()) + return cmpNumbers(STyL->getElementCount().getKnownMinValue(), + STyR->getElementCount().getKnownMinValue()); return cmpTypes(STyL->getElementType(), STyR->getElementType()); } } diff --git a/contrib/libs/llvm12/lib/Transforms/Utils/GlobalStatus.cpp b/contrib/libs/llvm12/lib/Transforms/Utils/GlobalStatus.cpp index fbc0b4b45a..f782396be7 100644 --- a/contrib/libs/llvm12/lib/Transforms/Utils/GlobalStatus.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Utils/GlobalStatus.cpp @@ -136,8 +136,8 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS, GS.StoredType = GlobalStatus::Stored; } } - } else if (isa<BitCastInst>(I) || isa<GetElementPtrInst>(I) || - isa<AddrSpaceCastInst>(I)) { + } else if (isa<BitCastInst>(I) || isa<GetElementPtrInst>(I) || + isa<AddrSpaceCastInst>(I)) { // Skip over bitcasts and GEPs; we don't care about the type or offset // of the pointer. if (analyzeGlobalAux(I, GS, VisitedUsers)) diff --git a/contrib/libs/llvm12/lib/Transforms/Utils/GuardUtils.cpp b/contrib/libs/llvm12/lib/Transforms/Utils/GuardUtils.cpp index 2d18d45e43..4dbcbf80d3 100644 --- a/contrib/libs/llvm12/lib/Transforms/Utils/GuardUtils.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Utils/GuardUtils.cpp @@ -30,7 +30,7 @@ static cl::opt<uint32_t> PredicatePassBranchWeight( void llvm::makeGuardControlFlowExplicit(Function *DeoptIntrinsic, CallInst *Guard, bool UseWC) { OperandBundleDef DeoptOB(*Guard->getOperandBundle(LLVMContext::OB_deopt)); - SmallVector<Value *, 4> Args(drop_begin(Guard->args())); + SmallVector<Value *, 4> Args(drop_begin(Guard->args())); auto *CheckBB = Guard->getParent(); auto *DeoptBlockTerm = diff --git a/contrib/libs/llvm12/lib/Transforms/Utils/InjectTLIMappings.cpp b/contrib/libs/llvm12/lib/Transforms/Utils/InjectTLIMappings.cpp index dbaf00eb6d..a2b72e4e7f 100644 --- a/contrib/libs/llvm12/lib/Transforms/Utils/InjectTLIMappings.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Utils/InjectTLIMappings.cpp @@ -16,7 +16,7 @@ #include "llvm/Analysis/DemandedBits.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" -#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/IntrinsicInst.h" @@ -78,8 +78,8 @@ static void addMappingsFromTLI(const TargetLibraryInfo &TLI, CallInst &CI) { if (CI.isNoBuiltin() || !CI.getCalledFunction()) return; - StringRef ScalarName = CI.getCalledFunction()->getName(); - + StringRef ScalarName = CI.getCalledFunction()->getName(); + // Nothing to be done if the TLI thinks the function is not // vectorizable. if (!TLI.isFunctionVectorizable(ScalarName)) diff --git a/contrib/libs/llvm12/lib/Transforms/Utils/InlineFunction.cpp b/contrib/libs/llvm12/lib/Transforms/Utils/InlineFunction.cpp index 9237e9a513..fb271a2118 100644 --- a/contrib/libs/llvm12/lib/Transforms/Utils/InlineFunction.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Utils/InlineFunction.cpp @@ -79,12 +79,12 @@ EnableNoAliasConversion("enable-noalias-to-md-conversion", cl::init(true), cl::Hidden, cl::desc("Convert noalias attributes to metadata during inlining.")); -static cl::opt<bool> - UseNoAliasIntrinsic("use-noalias-intrinsic-during-inlining", cl::Hidden, - cl::ZeroOrMore, cl::init(true), - cl::desc("Use the llvm.experimental.noalias.scope.decl " - "intrinsic during inlining.")); - +static cl::opt<bool> + UseNoAliasIntrinsic("use-noalias-intrinsic-during-inlining", cl::Hidden, + cl::ZeroOrMore, cl::init(true), + cl::desc("Use the llvm.experimental.noalias.scope.decl " + "intrinsic during inlining.")); + // Disabled by default, because the added alignment assumptions may increase // compile-time and block optimizations. This option is not suitable for use // with frontends that emit comprehensive parameter alignment annotations. @@ -777,150 +777,150 @@ static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock, UnwindDest->removePredecessor(InvokeBB); } -/// When inlining a call site that has !llvm.mem.parallel_loop_access, -/// !llvm.access.group, !alias.scope or !noalias metadata, that metadata should -/// be propagated to all memory-accessing cloned instructions. -static void PropagateCallSiteMetadata(CallBase &CB, Function::iterator FStart, - Function::iterator FEnd) { - MDNode *MemParallelLoopAccess = - CB.getMetadata(LLVMContext::MD_mem_parallel_loop_access); - MDNode *AccessGroup = CB.getMetadata(LLVMContext::MD_access_group); - MDNode *AliasScope = CB.getMetadata(LLVMContext::MD_alias_scope); - MDNode *NoAlias = CB.getMetadata(LLVMContext::MD_noalias); - if (!MemParallelLoopAccess && !AccessGroup && !AliasScope && !NoAlias) +/// When inlining a call site that has !llvm.mem.parallel_loop_access, +/// !llvm.access.group, !alias.scope or !noalias metadata, that metadata should +/// be propagated to all memory-accessing cloned instructions. +static void PropagateCallSiteMetadata(CallBase &CB, Function::iterator FStart, + Function::iterator FEnd) { + MDNode *MemParallelLoopAccess = + CB.getMetadata(LLVMContext::MD_mem_parallel_loop_access); + MDNode *AccessGroup = CB.getMetadata(LLVMContext::MD_access_group); + MDNode *AliasScope = CB.getMetadata(LLVMContext::MD_alias_scope); + MDNode *NoAlias = CB.getMetadata(LLVMContext::MD_noalias); + if (!MemParallelLoopAccess && !AccessGroup && !AliasScope && !NoAlias) return; - for (BasicBlock &BB : make_range(FStart, FEnd)) { - for (Instruction &I : BB) { - // This metadata is only relevant for instructions that access memory. - if (!I.mayReadOrWriteMemory()) - continue; - - if (MemParallelLoopAccess) { - // TODO: This probably should not overwrite MemParalleLoopAccess. - MemParallelLoopAccess = MDNode::concatenate( - I.getMetadata(LLVMContext::MD_mem_parallel_loop_access), - MemParallelLoopAccess); - I.setMetadata(LLVMContext::MD_mem_parallel_loop_access, - MemParallelLoopAccess); - } - - if (AccessGroup) - I.setMetadata(LLVMContext::MD_access_group, uniteAccessGroups( - I.getMetadata(LLVMContext::MD_access_group), AccessGroup)); - - if (AliasScope) - I.setMetadata(LLVMContext::MD_alias_scope, MDNode::concatenate( - I.getMetadata(LLVMContext::MD_alias_scope), AliasScope)); - - if (NoAlias) - I.setMetadata(LLVMContext::MD_noalias, MDNode::concatenate( - I.getMetadata(LLVMContext::MD_noalias), NoAlias)); - } + for (BasicBlock &BB : make_range(FStart, FEnd)) { + for (Instruction &I : BB) { + // This metadata is only relevant for instructions that access memory. + if (!I.mayReadOrWriteMemory()) + continue; + + if (MemParallelLoopAccess) { + // TODO: This probably should not overwrite MemParalleLoopAccess. + MemParallelLoopAccess = MDNode::concatenate( + I.getMetadata(LLVMContext::MD_mem_parallel_loop_access), + MemParallelLoopAccess); + I.setMetadata(LLVMContext::MD_mem_parallel_loop_access, + MemParallelLoopAccess); + } + + if (AccessGroup) + I.setMetadata(LLVMContext::MD_access_group, uniteAccessGroups( + I.getMetadata(LLVMContext::MD_access_group), AccessGroup)); + + if (AliasScope) + I.setMetadata(LLVMContext::MD_alias_scope, MDNode::concatenate( + I.getMetadata(LLVMContext::MD_alias_scope), AliasScope)); + + if (NoAlias) + I.setMetadata(LLVMContext::MD_noalias, MDNode::concatenate( + I.getMetadata(LLVMContext::MD_noalias), NoAlias)); + } } } -/// Utility for cloning !noalias and !alias.scope metadata. When a code region -/// using scoped alias metadata is inlined, the aliasing relationships may not -/// hold between the two version. It is necessary to create a deep clone of the -/// metadata, putting the two versions in separate scope domains. -class ScopedAliasMetadataDeepCloner { - using MetadataMap = DenseMap<const MDNode *, TrackingMDNodeRef>; +/// Utility for cloning !noalias and !alias.scope metadata. When a code region +/// using scoped alias metadata is inlined, the aliasing relationships may not +/// hold between the two version. It is necessary to create a deep clone of the +/// metadata, putting the two versions in separate scope domains. +class ScopedAliasMetadataDeepCloner { + using MetadataMap = DenseMap<const MDNode *, TrackingMDNodeRef>; SetVector<const MDNode *> MD; - MetadataMap MDMap; - void addRecursiveMetadataUses(); - -public: - ScopedAliasMetadataDeepCloner(const Function *F); - - /// Create a new clone of the scoped alias metadata, which will be used by - /// subsequent remap() calls. - void clone(); - - /// Remap instructions in the given range from the original to the cloned - /// metadata. - void remap(Function::iterator FStart, Function::iterator FEnd); -}; - -ScopedAliasMetadataDeepCloner::ScopedAliasMetadataDeepCloner( - const Function *F) { - for (const BasicBlock &BB : *F) { - for (const Instruction &I : BB) { - if (const MDNode *M = I.getMetadata(LLVMContext::MD_alias_scope)) + MetadataMap MDMap; + void addRecursiveMetadataUses(); + +public: + ScopedAliasMetadataDeepCloner(const Function *F); + + /// Create a new clone of the scoped alias metadata, which will be used by + /// subsequent remap() calls. + void clone(); + + /// Remap instructions in the given range from the original to the cloned + /// metadata. + void remap(Function::iterator FStart, Function::iterator FEnd); +}; + +ScopedAliasMetadataDeepCloner::ScopedAliasMetadataDeepCloner( + const Function *F) { + for (const BasicBlock &BB : *F) { + for (const Instruction &I : BB) { + if (const MDNode *M = I.getMetadata(LLVMContext::MD_alias_scope)) MD.insert(M); - if (const MDNode *M = I.getMetadata(LLVMContext::MD_noalias)) + if (const MDNode *M = I.getMetadata(LLVMContext::MD_noalias)) MD.insert(M); - - // We also need to clone the metadata in noalias intrinsics. - if (const auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I)) - MD.insert(Decl->getScopeList()); + + // We also need to clone the metadata in noalias intrinsics. + if (const auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I)) + MD.insert(Decl->getScopeList()); } - } - addRecursiveMetadataUses(); -} + } + addRecursiveMetadataUses(); +} -void ScopedAliasMetadataDeepCloner::addRecursiveMetadataUses() { +void ScopedAliasMetadataDeepCloner::addRecursiveMetadataUses() { SmallVector<const Metadata *, 16> Queue(MD.begin(), MD.end()); while (!Queue.empty()) { const MDNode *M = cast<MDNode>(Queue.pop_back_val()); - for (const Metadata *Op : M->operands()) - if (const MDNode *OpMD = dyn_cast<MDNode>(Op)) - if (MD.insert(OpMD)) - Queue.push_back(OpMD); + for (const Metadata *Op : M->operands()) + if (const MDNode *OpMD = dyn_cast<MDNode>(Op)) + if (MD.insert(OpMD)) + Queue.push_back(OpMD); } -} +} + +void ScopedAliasMetadataDeepCloner::clone() { + assert(MDMap.empty() && "clone() already called ?"); -void ScopedAliasMetadataDeepCloner::clone() { - assert(MDMap.empty() && "clone() already called ?"); - SmallVector<TempMDTuple, 16> DummyNodes; for (const MDNode *I : MD) { - DummyNodes.push_back(MDTuple::getTemporary(I->getContext(), None)); + DummyNodes.push_back(MDTuple::getTemporary(I->getContext(), None)); MDMap[I].reset(DummyNodes.back().get()); } // Create new metadata nodes to replace the dummy nodes, replacing old // metadata references with either a dummy node or an already-created new // node. - SmallVector<Metadata *, 4> NewOps; + SmallVector<Metadata *, 4> NewOps; for (const MDNode *I : MD) { - for (const Metadata *Op : I->operands()) { - if (const MDNode *M = dyn_cast<MDNode>(Op)) + for (const Metadata *Op : I->operands()) { + if (const MDNode *M = dyn_cast<MDNode>(Op)) NewOps.push_back(MDMap[M]); else - NewOps.push_back(const_cast<Metadata *>(Op)); + NewOps.push_back(const_cast<Metadata *>(Op)); } - MDNode *NewM = MDNode::get(I->getContext(), NewOps); + MDNode *NewM = MDNode::get(I->getContext(), NewOps); MDTuple *TempM = cast<MDTuple>(MDMap[I]); assert(TempM->isTemporary() && "Expected temporary node"); TempM->replaceAllUsesWith(NewM); - NewOps.clear(); + NewOps.clear(); } -} - -void ScopedAliasMetadataDeepCloner::remap(Function::iterator FStart, - Function::iterator FEnd) { - if (MDMap.empty()) - return; // Nothing to do. - - for (BasicBlock &BB : make_range(FStart, FEnd)) { - for (Instruction &I : BB) { - // TODO: The null checks for the MDMap.lookup() results should no longer - // be necessary. - if (MDNode *M = I.getMetadata(LLVMContext::MD_alias_scope)) - if (MDNode *MNew = MDMap.lookup(M)) - I.setMetadata(LLVMContext::MD_alias_scope, MNew); - - if (MDNode *M = I.getMetadata(LLVMContext::MD_noalias)) - if (MDNode *MNew = MDMap.lookup(M)) - I.setMetadata(LLVMContext::MD_noalias, MNew); - - if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I)) - if (MDNode *MNew = MDMap.lookup(Decl->getScopeList())) - Decl->setScopeList(MNew); - } +} + +void ScopedAliasMetadataDeepCloner::remap(Function::iterator FStart, + Function::iterator FEnd) { + if (MDMap.empty()) + return; // Nothing to do. + + for (BasicBlock &BB : make_range(FStart, FEnd)) { + for (Instruction &I : BB) { + // TODO: The null checks for the MDMap.lookup() results should no longer + // be necessary. + if (MDNode *M = I.getMetadata(LLVMContext::MD_alias_scope)) + if (MDNode *MNew = MDMap.lookup(M)) + I.setMetadata(LLVMContext::MD_alias_scope, MNew); + + if (MDNode *M = I.getMetadata(LLVMContext::MD_noalias)) + if (MDNode *MNew = MDMap.lookup(M)) + I.setMetadata(LLVMContext::MD_noalias, MNew); + + if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I)) + if (MDNode *MNew = MDMap.lookup(Decl->getScopeList())) + Decl->setScopeList(MNew); + } } } @@ -977,17 +977,17 @@ static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap, // property of the callee, but also all control dependencies in the caller. MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name); NewScopes.insert(std::make_pair(A, NewScope)); - - if (UseNoAliasIntrinsic) { - // Introduce a llvm.experimental.noalias.scope.decl for the noalias - // argument. - MDNode *AScopeList = MDNode::get(CalledFunc->getContext(), NewScope); - auto *NoAliasDecl = - IRBuilder<>(&CB).CreateNoAliasScopeDeclaration(AScopeList); - // Ignore the result for now. The result will be used when the - // llvm.noalias intrinsic is introduced. - (void)NoAliasDecl; - } + + if (UseNoAliasIntrinsic) { + // Introduce a llvm.experimental.noalias.scope.decl for the noalias + // argument. + MDNode *AScopeList = MDNode::get(CalledFunc->getContext(), NewScope); + auto *NoAliasDecl = + IRBuilder<>(&CB).CreateNoAliasScopeDeclaration(AScopeList); + // Ignore the result for now. The result will be used when the + // llvm.noalias intrinsic is introduced. + (void)NoAliasDecl; + } } // Iterate over all new instructions in the map; for all memory-access @@ -1058,7 +1058,7 @@ static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap, SmallSetVector<const Argument *, 4> NAPtrArgs; for (const Value *V : PtrArgs) { SmallVector<const Value *, 4> Objects; - getUnderlyingObjects(V, Objects, /* LI = */ nullptr); + getUnderlyingObjects(V, Objects, /* LI = */ nullptr); for (const Value *O : Objects) ObjSet.insert(O); @@ -1266,7 +1266,7 @@ static void AddAlignmentAssumptions(CallBase &CB, InlineFunctionInfo &IFI) { Function *CalledFunc = CB.getCalledFunction(); for (Argument &Arg : CalledFunc->args()) { unsigned Align = Arg.getType()->isPointerTy() ? Arg.getParamAlignment() : 0; - if (Align && !Arg.hasPassPointeeByValueCopyAttr() && !Arg.hasNUses(0)) { + if (Align && !Arg.hasPassPointeeByValueCopyAttr() && !Arg.hasNUses(0)) { if (!DTCalculated) { DT.recalculate(*CB.getCaller()); DTCalculated = true; @@ -1469,8 +1469,8 @@ static DebugLoc inlineDebugLoc(DebugLoc OrigDL, DILocation *InlinedAt, LLVMContext &Ctx, DenseMap<const MDNode *, MDNode *> &IANodes) { auto IA = DebugLoc::appendInlinedAt(OrigDL, InlinedAt, Ctx, IANodes); - return DILocation::get(Ctx, OrigDL.getLine(), OrigDL.getCol(), - OrigDL.getScope(), IA); + return DILocation::get(Ctx, OrigDL.getLine(), OrigDL.getCol(), + OrigDL.getScope(), IA); } /// Update inlined instructions' line numbers to @@ -1594,7 +1594,7 @@ static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap, return; auto CallSiteCount = PSI ? PSI->getProfileCount(TheCall, CallerBFI) : None; int64_t CallCount = - std::min(CallSiteCount.getValueOr(0), CalleeEntryCount.getCount()); + std::min(CallSiteCount.getValueOr(0), CalleeEntryCount.getCount()); updateProfileCallee(Callee, -CallCount, &VMap); } @@ -1785,14 +1785,14 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, // Keep a list of pair (dst, src) to emit byval initializations. SmallVector<std::pair<Value*, Value*>, 4> ByValInit; - // When inlining a function that contains noalias scope metadata, - // this metadata needs to be cloned so that the inlined blocks - // have different "unique scopes" at every call site. - // Track the metadata that must be cloned. Do this before other changes to - // the function, so that we do not get in trouble when inlining caller == - // callee. - ScopedAliasMetadataDeepCloner SAMetadataCloner(CB.getCalledFunction()); - + // When inlining a function that contains noalias scope metadata, + // this metadata needs to be cloned so that the inlined blocks + // have different "unique scopes" at every call site. + // Track the metadata that must be cloned. Do this before other changes to + // the function, so that we do not get in trouble when inlining caller == + // callee. + ScopedAliasMetadataDeepCloner SAMetadataCloner(CB.getCalledFunction()); + auto &DL = Caller->getParent()->getDataLayout(); // Calculate the vector of arguments to pass into the function cloner, which @@ -1883,8 +1883,8 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, MergedDeoptArgs.reserve(ParentDeopt->Inputs.size() + ChildOB.Inputs.size()); - llvm::append_range(MergedDeoptArgs, ParentDeopt->Inputs); - llvm::append_range(MergedDeoptArgs, ChildOB.Inputs); + llvm::append_range(MergedDeoptArgs, ParentDeopt->Inputs); + llvm::append_range(MergedDeoptArgs, ChildOB.Inputs); OpDefs.emplace_back("deopt", std::move(MergedDeoptArgs)); } @@ -1910,9 +1910,9 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, fixupLineNumbers(Caller, FirstNewBlock, &CB, CalledFunc->getSubprogram() != nullptr); - // Now clone the inlined noalias scope metadata. - SAMetadataCloner.clone(); - SAMetadataCloner.remap(FirstNewBlock, Caller->end()); + // Now clone the inlined noalias scope metadata. + SAMetadataCloner.clone(); + SAMetadataCloner.remap(FirstNewBlock, Caller->end()); // Add noalias metadata if necessary. AddAliasScopeMetadata(CB, VMap, DL, CalleeAAR); @@ -1921,8 +1921,8 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, // function which feed into its return value. AddReturnAttributes(CB, VMap); - // Propagate metadata on the callsite if necessary. - PropagateCallSiteMetadata(CB, FirstNewBlock, Caller->end()); + // Propagate metadata on the callsite if necessary. + PropagateCallSiteMetadata(CB, FirstNewBlock, Caller->end()); // Register any cloned assumptions. if (IFI.GetAssumptionCache) @@ -2087,7 +2087,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, dyn_cast<ConstantInt>(AI->getArraySize())) { auto &DL = Caller->getParent()->getDataLayout(); Type *AllocaType = AI->getAllocatedType(); - TypeSize AllocaTypeSize = DL.getTypeAllocSize(AllocaType); + TypeSize AllocaTypeSize = DL.getTypeAllocSize(AllocaType); uint64_t AllocaArraySize = AIArraySize->getLimitedValue(); // Don't add markers for zero-sized allocas. @@ -2096,10 +2096,10 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, // Check that array size doesn't saturate uint64_t and doesn't // overflow when it's multiplied by type size. - if (!AllocaTypeSize.isScalable() && - AllocaArraySize != std::numeric_limits<uint64_t>::max() && + if (!AllocaTypeSize.isScalable() && + AllocaArraySize != std::numeric_limits<uint64_t>::max() && std::numeric_limits<uint64_t>::max() / AllocaArraySize >= - AllocaTypeSize.getFixedSize()) { + AllocaTypeSize.getFixedSize()) { AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()), AllocaArraySize * AllocaTypeSize); } @@ -2225,7 +2225,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, // match the callee's return type, we also need to change the return type of // the intrinsic. if (Caller->getReturnType() == CB.getType()) { - llvm::erase_if(Returns, [](ReturnInst *RI) { + llvm::erase_if(Returns, [](ReturnInst *RI) { return RI->getParent()->getTerminatingDeoptimizeCall() != nullptr; }); } else { @@ -2251,7 +2251,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, auto *CurBB = RI->getParent(); RI->eraseFromParent(); - SmallVector<Value *, 4> CallArgs(DeoptCall->args()); + SmallVector<Value *, 4> CallArgs(DeoptCall->args()); SmallVector<OperandBundleDef, 1> OpBundles; DeoptCall->getOperandBundlesAsDefs(OpBundles); @@ -2488,7 +2488,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, // If we inlined any musttail calls and the original return is now // unreachable, delete it. It can only contain a bitcast and ret. - if (InlinedMustTailCalls && pred_empty(AfterCallBB)) + if (InlinedMustTailCalls && pred_empty(AfterCallBB)) AfterCallBB->eraseFromParent(); // We should always be able to fold the entry block of the function into the diff --git a/contrib/libs/llvm12/lib/Transforms/Utils/InstructionNamer.cpp b/contrib/libs/llvm12/lib/Transforms/Utils/InstructionNamer.cpp index b86b5c5b12..f3499c9c8a 100644 --- a/contrib/libs/llvm12/lib/Transforms/Utils/InstructionNamer.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Utils/InstructionNamer.cpp @@ -13,52 +13,52 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Utils/InstructionNamer.h" +#include "llvm/Transforms/Utils/InstructionNamer.h" #include "llvm/IR/Function.h" -#include "llvm/IR/PassManager.h" +#include "llvm/IR/PassManager.h" #include "llvm/IR/Type.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Transforms/Utils.h" - + using namespace llvm; namespace { -void nameInstructions(Function &F) { - for (auto &Arg : F.args()) { - if (!Arg.hasName()) - Arg.setName("arg"); - } +void nameInstructions(Function &F) { + for (auto &Arg : F.args()) { + if (!Arg.hasName()) + Arg.setName("arg"); + } + + for (BasicBlock &BB : F) { + if (!BB.hasName()) + BB.setName("bb"); - for (BasicBlock &BB : F) { - if (!BB.hasName()) - BB.setName("bb"); - - for (Instruction &I : BB) { - if (!I.hasName() && !I.getType()->isVoidTy()) - I.setName("i"); + for (Instruction &I : BB) { + if (!I.hasName() && !I.getType()->isVoidTy()) + I.setName("i"); } - } -} + } +} -struct InstNamer : public FunctionPass { - static char ID; // Pass identification, replacement for typeid - InstNamer() : FunctionPass(ID) { - initializeInstNamerPass(*PassRegistry::getPassRegistry()); - } +struct InstNamer : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + InstNamer() : FunctionPass(ID) { + initializeInstNamerPass(*PassRegistry::getPassRegistry()); + } - void getAnalysisUsage(AnalysisUsage &Info) const override { - Info.setPreservesAll(); - } + void getAnalysisUsage(AnalysisUsage &Info) const override { + Info.setPreservesAll(); + } - bool runOnFunction(Function &F) override { - nameInstructions(F); - return true; - } -}; + bool runOnFunction(Function &F) override { + nameInstructions(F); + return true; + } +}; char InstNamer::ID = 0; - } // namespace + } // namespace INITIALIZE_PASS(InstNamer, "instnamer", "Assign names to anonymous instructions", false, false) @@ -70,9 +70,9 @@ char &llvm::InstructionNamerID = InstNamer::ID; FunctionPass *llvm::createInstructionNamerPass() { return new InstNamer(); } - -PreservedAnalyses InstructionNamerPass::run(Function &F, - FunctionAnalysisManager &FAM) { - nameInstructions(F); - return PreservedAnalyses::all(); -} + +PreservedAnalyses InstructionNamerPass::run(Function &F, + FunctionAnalysisManager &FAM) { + nameInstructions(F); + return PreservedAnalyses::all(); +} diff --git a/contrib/libs/llvm12/lib/Transforms/Utils/LCSSA.cpp b/contrib/libs/llvm12/lib/Transforms/Utils/LCSSA.cpp index aad469a909..7437701f53 100644 --- a/contrib/libs/llvm12/lib/Transforms/Utils/LCSSA.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Utils/LCSSA.cpp @@ -40,7 +40,7 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" -#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/PredIteratorCache.h" @@ -78,15 +78,15 @@ static bool isExitBlock(BasicBlock *BB, /// rewrite the uses. bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist, const DominatorTree &DT, const LoopInfo &LI, - ScalarEvolution *SE, IRBuilderBase &Builder, - SmallVectorImpl<PHINode *> *PHIsToRemove) { + ScalarEvolution *SE, IRBuilderBase &Builder, + SmallVectorImpl<PHINode *> *PHIsToRemove) { SmallVector<Use *, 16> UsesToRewrite; - SmallSetVector<PHINode *, 16> LocalPHIsToRemove; + SmallSetVector<PHINode *, 16> LocalPHIsToRemove; PredIteratorCache PredCache; bool Changed = false; - IRBuilderBase::InsertPointGuard InsertPtGuard(Builder); - + IRBuilderBase::InsertPointGuard InsertPtGuard(Builder); + // Cache the Loop ExitBlocks across this loop. We expect to get a lot of // instructions within the same loops, computing the exit blocks is // expensive, and we're not mutating the loop structure. @@ -111,10 +111,10 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist, for (Use &U : I->uses()) { Instruction *User = cast<Instruction>(U.getUser()); BasicBlock *UserBB = User->getParent(); - - // For practical purposes, we consider that the use in a PHI - // occurs in the respective predecessor block. For more info, - // see the `phi` doc in LangRef and the LCSSA doc. + + // For practical purposes, we consider that the use in a PHI + // occurs in the respective predecessor block. For more info, + // see the `phi` doc in LangRef and the LCSSA doc. if (auto *PN = dyn_cast<PHINode>(User)) UserBB = PN->getIncomingBlock(U); @@ -159,17 +159,17 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist, // If we already inserted something for this BB, don't reprocess it. if (SSAUpdate.HasValueForBlock(ExitBB)) continue; - Builder.SetInsertPoint(&ExitBB->front()); - PHINode *PN = Builder.CreatePHI(I->getType(), PredCache.size(ExitBB), - I->getName() + ".lcssa"); + Builder.SetInsertPoint(&ExitBB->front()); + PHINode *PN = Builder.CreatePHI(I->getType(), PredCache.size(ExitBB), + I->getName() + ".lcssa"); // Get the debug location from the original instruction. PN->setDebugLoc(I->getDebugLoc()); - - // Add inputs from inside the loop for this PHI. This is valid - // because `I` dominates `ExitBB` (checked above). This implies - // that every incoming block/edge is dominated by `I` as well, - // i.e. we can add uses of `I` to those incoming edges/append to the incoming - // blocks without violating the SSA dominance property. + + // Add inputs from inside the loop for this PHI. This is valid + // because `I` dominates `ExitBB` (checked above). This implies + // that every incoming block/edge is dominated by `I` as well, + // i.e. we can add uses of `I` to those incoming edges/append to the incoming + // blocks without violating the SSA dominance property. for (BasicBlock *Pred : PredCache.get(ExitBB)) { PN->addIncoming(I, Pred); @@ -205,17 +205,17 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist, for (Use *UseToRewrite : UsesToRewrite) { Instruction *User = cast<Instruction>(UseToRewrite->getUser()); BasicBlock *UserBB = User->getParent(); - - // For practical purposes, we consider that the use in a PHI - // occurs in the respective predecessor block. For more info, - // see the `phi` doc in LangRef and the LCSSA doc. + + // For practical purposes, we consider that the use in a PHI + // occurs in the respective predecessor block. For more info, + // see the `phi` doc in LangRef and the LCSSA doc. if (auto *PN = dyn_cast<PHINode>(User)) UserBB = PN->getIncomingBlock(*UseToRewrite); - // If this use is in an exit block, rewrite to use the newly inserted PHI. - // This is required for correctness because SSAUpdate doesn't handle uses - // in the same block. It assumes the PHI we inserted is at the end of the - // block. + // If this use is in an exit block, rewrite to use the newly inserted PHI. + // This is required for correctness because SSAUpdate doesn't handle uses + // in the same block. It assumes the PHI we inserted is at the end of the + // block. if (isa<PHINode>(UserBB->begin()) && isExitBlock(UserBB, ExitBlocks)) { UseToRewrite->set(&UserBB->front()); continue; @@ -265,29 +265,29 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist, Worklist.push_back(PostProcessPN); // Keep track of PHI nodes that we want to remove because they did not have - // any uses rewritten. + // any uses rewritten. for (PHINode *PN : AddedPHIs) if (PN->use_empty()) - LocalPHIsToRemove.insert(PN); - + LocalPHIsToRemove.insert(PN); + Changed = true; } - - // Remove PHI nodes that did not have any uses rewritten or add them to - // PHIsToRemove, so the caller can remove them after some additional cleanup. - // We need to redo the use_empty() check here, because even if the PHI node - // wasn't used when added to LocalPHIsToRemove, later added PHI nodes can be - // using it. This cleanup is not guaranteed to handle trees/cycles of PHI - // nodes that only are used by each other. Such situations has only been - // noticed when the input IR contains unreachable code, and leaving some extra - // redundant PHI nodes in such situations is considered a minor problem. - if (PHIsToRemove) { - PHIsToRemove->append(LocalPHIsToRemove.begin(), LocalPHIsToRemove.end()); - } else { - for (PHINode *PN : LocalPHIsToRemove) - if (PN->use_empty()) - PN->eraseFromParent(); - } + + // Remove PHI nodes that did not have any uses rewritten or add them to + // PHIsToRemove, so the caller can remove them after some additional cleanup. + // We need to redo the use_empty() check here, because even if the PHI node + // wasn't used when added to LocalPHIsToRemove, later added PHI nodes can be + // using it. This cleanup is not guaranteed to handle trees/cycles of PHI + // nodes that only are used by each other. Such situations has only been + // noticed when the input IR contains unreachable code, and leaving some extra + // redundant PHI nodes in such situations is considered a minor problem. + if (PHIsToRemove) { + PHIsToRemove->append(LocalPHIsToRemove.begin(), LocalPHIsToRemove.end()); + } else { + for (PHINode *PN : LocalPHIsToRemove) + if (PN->use_empty()) + PN->eraseFromParent(); + } return Changed; } @@ -297,7 +297,7 @@ static void computeBlocksDominatingExits( SmallSetVector<BasicBlock *, 8> &BlocksDominatingExits) { // We start from the exit blocks, as every block trivially dominates itself // (not strictly). - SmallVector<BasicBlock *, 8> BBWorklist(ExitBlocks); + SmallVector<BasicBlock *, 8> BBWorklist(ExitBlocks); while (!BBWorklist.empty()) { BasicBlock *BB = BBWorklist.pop_back_val(); @@ -386,9 +386,9 @@ bool llvm::formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI, } } - IRBuilder<> Builder(L.getHeader()->getContext()); - Changed = formLCSSAForInstructions(Worklist, DT, *LI, SE, Builder); - + IRBuilder<> Builder(L.getHeader()->getContext()); + Changed = formLCSSAForInstructions(Worklist, DT, *LI, SE, Builder); + // If we modified the code, remove any caches about the loop from SCEV to // avoid dangling entries. // FIXME: This is a big hammer, can we clear the cache more selectively? diff --git a/contrib/libs/llvm12/lib/Transforms/Utils/Local.cpp b/contrib/libs/llvm12/lib/Transforms/Utils/Local.cpp index 5d8d638169..ae26058c21 100644 --- a/contrib/libs/llvm12/lib/Transforms/Utils/Local.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Utils/Local.cpp @@ -91,25 +91,25 @@ using namespace llvm::PatternMatch; #define DEBUG_TYPE "local" STATISTIC(NumRemoved, "Number of unreachable basic blocks removed"); -STATISTIC(NumPHICSEs, "Number of PHI's that got CSE'd"); - -static cl::opt<bool> PHICSEDebugHash( - "phicse-debug-hash", -#ifdef EXPENSIVE_CHECKS - cl::init(true), -#else - cl::init(false), -#endif - cl::Hidden, - cl::desc("Perform extra assertion checking to verify that PHINodes's hash " - "function is well-behaved w.r.t. its isEqual predicate")); - -static cl::opt<unsigned> PHICSENumPHISmallSize( - "phicse-num-phi-smallsize", cl::init(32), cl::Hidden, - cl::desc( - "When the basic block contains not more than this number of PHI nodes, " - "perform a (faster!) exhaustive search instead of set-driven one.")); - +STATISTIC(NumPHICSEs, "Number of PHI's that got CSE'd"); + +static cl::opt<bool> PHICSEDebugHash( + "phicse-debug-hash", +#ifdef EXPENSIVE_CHECKS + cl::init(true), +#else + cl::init(false), +#endif + cl::Hidden, + cl::desc("Perform extra assertion checking to verify that PHINodes's hash " + "function is well-behaved w.r.t. its isEqual predicate")); + +static cl::opt<unsigned> PHICSENumPHISmallSize( + "phicse-num-phi-smallsize", cl::init(32), cl::Hidden, + cl::desc( + "When the basic block contains not more than this number of PHI nodes, " + "perform a (faster!) exhaustive search instead of set-driven one.")); + // Max recursion depth for collectBitParts used when detecting bswap and // bitreverse idioms static const unsigned BitPartRecursionMaxDepth = 64; @@ -134,7 +134,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, // Branch - See if we are conditional jumping on constant if (auto *BI = dyn_cast<BranchInst>(T)) { if (BI->isUnconditional()) return false; // Can't optimize uncond branch - + BasicBlock *Dest1 = BI->getSuccessor(0); BasicBlock *Dest2 = BI->getSuccessor(1); @@ -155,25 +155,25 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, RecursivelyDeleteTriviallyDeadInstructions(Cond, TLI); return true; } - - if (auto *Cond = dyn_cast<ConstantInt>(BI->getCondition())) { - // Are we branching on constant? - // YES. Change to unconditional branch... - BasicBlock *Destination = Cond->getZExtValue() ? Dest1 : Dest2; - BasicBlock *OldDest = Cond->getZExtValue() ? Dest2 : Dest1; - - // Let the basic block know that we are letting go of it. Based on this, - // it will adjust it's PHI nodes. - OldDest->removePredecessor(BB); - - // Replace the conditional branch with an unconditional one. - Builder.CreateBr(Destination); - BI->eraseFromParent(); - if (DTU) - DTU->applyUpdates({{DominatorTree::Delete, BB, OldDest}}); - return true; - } - + + if (auto *Cond = dyn_cast<ConstantInt>(BI->getCondition())) { + // Are we branching on constant? + // YES. Change to unconditional branch... + BasicBlock *Destination = Cond->getZExtValue() ? Dest1 : Dest2; + BasicBlock *OldDest = Cond->getZExtValue() ? Dest2 : Dest1; + + // Let the basic block know that we are letting go of it. Based on this, + // it will adjust it's PHI nodes. + OldDest->removePredecessor(BB); + + // Replace the conditional branch with an unconditional one. + Builder.CreateBr(Destination); + BI->eraseFromParent(); + if (DTU) + DTU->applyUpdates({{DominatorTree::Delete, BB, OldDest}}); + return true; + } + return false; } @@ -190,8 +190,8 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, TheOnlyDest = SI->case_begin()->getCaseSuccessor(); } - bool Changed = false; - + bool Changed = false; + // Figure out which case it goes to. for (auto i = SI->case_begin(), e = SI->case_end(); i != e;) { // Found case matching a constant operand? @@ -230,7 +230,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, DefaultDest->removePredecessor(ParentBB); i = SI->removeCase(i); e = SI->case_end(); - Changed = true; + Changed = true; continue; } @@ -257,16 +257,16 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, Builder.CreateBr(TheOnlyDest); BasicBlock *BB = SI->getParent(); - SmallSetVector<BasicBlock *, 8> RemovedSuccessors; - + SmallSetVector<BasicBlock *, 8> RemovedSuccessors; + // Remove entries from PHI nodes which we no longer branch to... - BasicBlock *SuccToKeep = TheOnlyDest; + BasicBlock *SuccToKeep = TheOnlyDest; for (BasicBlock *Succ : successors(SI)) { - if (DTU && Succ != TheOnlyDest) - RemovedSuccessors.insert(Succ); + if (DTU && Succ != TheOnlyDest) + RemovedSuccessors.insert(Succ); // Found case matching a constant operand? - if (Succ == SuccToKeep) { - SuccToKeep = nullptr; // Don't modify the first branch to TheOnlyDest + if (Succ == SuccToKeep) { + SuccToKeep = nullptr; // Don't modify the first branch to TheOnlyDest } else { Succ->removePredecessor(BB); } @@ -277,13 +277,13 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, SI->eraseFromParent(); if (DeleteDeadConditions) RecursivelyDeleteTriviallyDeadInstructions(Cond, TLI); - if (DTU) { - std::vector<DominatorTree::UpdateType> Updates; - Updates.reserve(RemovedSuccessors.size()); - for (auto *RemovedSuccessor : RemovedSuccessors) - Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor}); - DTU->applyUpdates(Updates); - } + if (DTU) { + std::vector<DominatorTree::UpdateType> Updates; + Updates.reserve(RemovedSuccessors.size()); + for (auto *RemovedSuccessor : RemovedSuccessors) + Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor}); + DTU->applyUpdates(Updates); + } return true; } @@ -321,7 +321,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, SI->eraseFromParent(); return true; } - return Changed; + return Changed; } if (auto *IBI = dyn_cast<IndirectBrInst>(T)) { @@ -329,20 +329,20 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, if (auto *BA = dyn_cast<BlockAddress>(IBI->getAddress()->stripPointerCasts())) { BasicBlock *TheOnlyDest = BA->getBasicBlock(); - SmallSetVector<BasicBlock *, 8> RemovedSuccessors; + SmallSetVector<BasicBlock *, 8> RemovedSuccessors; // Insert the new branch. Builder.CreateBr(TheOnlyDest); - BasicBlock *SuccToKeep = TheOnlyDest; + BasicBlock *SuccToKeep = TheOnlyDest; for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) { - BasicBlock *DestBB = IBI->getDestination(i); - if (DTU && DestBB != TheOnlyDest) - RemovedSuccessors.insert(DestBB); - if (IBI->getDestination(i) == SuccToKeep) { - SuccToKeep = nullptr; + BasicBlock *DestBB = IBI->getDestination(i); + if (DTU && DestBB != TheOnlyDest) + RemovedSuccessors.insert(DestBB); + if (IBI->getDestination(i) == SuccToKeep) { + SuccToKeep = nullptr; } else { - DestBB->removePredecessor(BB); + DestBB->removePredecessor(BB); } } Value *Address = IBI->getAddress(); @@ -359,18 +359,18 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, // If we didn't find our destination in the IBI successor list, then we // have undefined behavior. Replace the unconditional branch with an // 'unreachable' instruction. - if (SuccToKeep) { + if (SuccToKeep) { BB->getTerminator()->eraseFromParent(); new UnreachableInst(BB->getContext(), BB); } - if (DTU) { - std::vector<DominatorTree::UpdateType> Updates; - Updates.reserve(RemovedSuccessors.size()); - for (auto *RemovedSuccessor : RemovedSuccessors) - Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor}); - DTU->applyUpdates(Updates); - } + if (DTU) { + std::vector<DominatorTree::UpdateType> Updates; + Updates.reserve(RemovedSuccessors.size()); + for (auto *RemovedSuccessor : RemovedSuccessors) + Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor}); + DTU->applyUpdates(Updates); + } return true; } } @@ -420,9 +420,9 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I, return true; } - if (!I->willReturn()) - return false; - + if (!I->willReturn()) + return false; + if (!I->mayHaveSideEffects()) return true; @@ -484,24 +484,24 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I, /// trivially dead, delete them too, recursively. Return true if any /// instructions were deleted. bool llvm::RecursivelyDeleteTriviallyDeadInstructions( - Value *V, const TargetLibraryInfo *TLI, MemorySSAUpdater *MSSAU, - std::function<void(Value *)> AboutToDeleteCallback) { + Value *V, const TargetLibraryInfo *TLI, MemorySSAUpdater *MSSAU, + std::function<void(Value *)> AboutToDeleteCallback) { Instruction *I = dyn_cast<Instruction>(V); if (!I || !isInstructionTriviallyDead(I, TLI)) return false; SmallVector<WeakTrackingVH, 16> DeadInsts; DeadInsts.push_back(I); - RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, TLI, MSSAU, - AboutToDeleteCallback); + RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, TLI, MSSAU, + AboutToDeleteCallback); return true; } bool llvm::RecursivelyDeleteTriviallyDeadInstructionsPermissive( SmallVectorImpl<WeakTrackingVH> &DeadInsts, const TargetLibraryInfo *TLI, - MemorySSAUpdater *MSSAU, - std::function<void(Value *)> AboutToDeleteCallback) { + MemorySSAUpdater *MSSAU, + std::function<void(Value *)> AboutToDeleteCallback) { unsigned S = 0, E = DeadInsts.size(), Alive = 0; for (; S != E; ++S) { auto *I = cast<Instruction>(DeadInsts[S]); @@ -512,15 +512,15 @@ bool llvm::RecursivelyDeleteTriviallyDeadInstructionsPermissive( } if (Alive == E) return false; - RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, TLI, MSSAU, - AboutToDeleteCallback); + RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, TLI, MSSAU, + AboutToDeleteCallback); return true; } void llvm::RecursivelyDeleteTriviallyDeadInstructions( SmallVectorImpl<WeakTrackingVH> &DeadInsts, const TargetLibraryInfo *TLI, - MemorySSAUpdater *MSSAU, - std::function<void(Value *)> AboutToDeleteCallback) { + MemorySSAUpdater *MSSAU, + std::function<void(Value *)> AboutToDeleteCallback) { // Process the dead instruction list until empty. while (!DeadInsts.empty()) { Value *V = DeadInsts.pop_back_val(); @@ -534,9 +534,9 @@ void llvm::RecursivelyDeleteTriviallyDeadInstructions( // Don't lose the debug info while deleting the instructions. salvageDebugInfo(*I); - if (AboutToDeleteCallback) - AboutToDeleteCallback(I); - + if (AboutToDeleteCallback) + AboutToDeleteCallback(I); + // Null out all of the instruction's operands to see if any operand becomes // dead as we go. for (Use &OpU : I->operands()) { @@ -740,11 +740,11 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, if (DTU) { for (auto I = pred_begin(PredBB), E = pred_end(PredBB); I != E; ++I) { // This predecessor of PredBB may already have DestBB as a successor. - if (!llvm::is_contained(successors(*I), DestBB)) + if (!llvm::is_contained(successors(*I), DestBB)) Updates.push_back({DominatorTree::Insert, *I, DestBB}); - Updates.push_back({DominatorTree::Delete, *I, PredBB}); + Updates.push_back({DominatorTree::Delete, *I, PredBB}); } - Updates.push_back({DominatorTree::Delete, PredBB, DestBB}); + Updates.push_back({DominatorTree::Delete, PredBB, DestBB}); } // Zap anything that took the address of DestBB. Not doing this will give the @@ -918,7 +918,7 @@ static void gatherIncomingValuesToPhi(PHINode *PN, /// \param IncomingValues A map from block to value. static void replaceUndefValuesInPhi(PHINode *PN, const IncomingValueMap &IncomingValues) { - SmallVector<unsigned> TrueUndefOps; + SmallVector<unsigned> TrueUndefOps; for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { Value *V = PN->getIncomingValue(i); @@ -927,30 +927,30 @@ static void replaceUndefValuesInPhi(PHINode *PN, BasicBlock *BB = PN->getIncomingBlock(i); IncomingValueMap::const_iterator It = IncomingValues.find(BB); - // Keep track of undef/poison incoming values. Those must match, so we fix - // them up below if needed. - // Note: this is conservatively correct, but we could try harder and group - // the undef values per incoming basic block. - if (It == IncomingValues.end()) { - TrueUndefOps.push_back(i); - continue; - } - - // There is a defined value for this incoming block, so map this undef - // incoming value to the defined value. + // Keep track of undef/poison incoming values. Those must match, so we fix + // them up below if needed. + // Note: this is conservatively correct, but we could try harder and group + // the undef values per incoming basic block. + if (It == IncomingValues.end()) { + TrueUndefOps.push_back(i); + continue; + } + + // There is a defined value for this incoming block, so map this undef + // incoming value to the defined value. PN->setIncomingValue(i, It->second); } - - // If there are both undef and poison values incoming, then convert those - // values to undef. It is invalid to have different values for the same - // incoming block. - unsigned PoisonCount = count_if(TrueUndefOps, [&](unsigned i) { - return isa<PoisonValue>(PN->getIncomingValue(i)); - }); - if (PoisonCount != 0 && PoisonCount != TrueUndefOps.size()) { - for (unsigned i : TrueUndefOps) - PN->setIncomingValue(i, UndefValue::get(PN->getType())); - } + + // If there are both undef and poison values incoming, then convert those + // values to undef. It is invalid to have different values for the same + // incoming block. + unsigned PoisonCount = count_if(TrueUndefOps, [&](unsigned i) { + return isa<PoisonValue>(PN->getIncomingValue(i)); + }); + if (PoisonCount != 0 && PoisonCount != TrueUndefOps.size()) { + for (unsigned i : TrueUndefOps) + PN->setIncomingValue(i, UndefValue::get(PN->getType())); + } } /// Replace a value flowing from a block to a phi with @@ -1072,15 +1072,15 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, SmallVector<DominatorTree::UpdateType, 32> Updates; if (DTU) { // All predecessors of BB will be moved to Succ. - SmallSetVector<BasicBlock *, 8> Predecessors(pred_begin(BB), pred_end(BB)); - Updates.reserve(Updates.size() + 2 * Predecessors.size()); - for (auto *Predecessor : Predecessors) { + SmallSetVector<BasicBlock *, 8> Predecessors(pred_begin(BB), pred_end(BB)); + Updates.reserve(Updates.size() + 2 * Predecessors.size()); + for (auto *Predecessor : Predecessors) { // This predecessor of BB may already have Succ as a successor. - if (!llvm::is_contained(successors(Predecessor), Succ)) - Updates.push_back({DominatorTree::Insert, Predecessor, Succ}); - Updates.push_back({DominatorTree::Delete, Predecessor, BB}); + if (!llvm::is_contained(successors(Predecessor), Succ)) + Updates.push_back({DominatorTree::Insert, Predecessor, Succ}); + Updates.push_back({DominatorTree::Delete, Predecessor, BB}); } - Updates.push_back({DominatorTree::Delete, BB, Succ}); + Updates.push_back({DominatorTree::Delete, BB, Succ}); } if (isa<PHINode>(Succ->begin())) { @@ -1136,7 +1136,7 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, "applying corresponding DTU updates."); if (DTU) { - DTU->applyUpdates(Updates); + DTU->applyUpdates(Updates); DTU->deleteBB(BB); } else { BB->eraseFromParent(); // Delete the old basic block. @@ -1144,43 +1144,43 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, return true; } -static bool EliminateDuplicatePHINodesNaiveImpl(BasicBlock *BB) { +static bool EliminateDuplicatePHINodesNaiveImpl(BasicBlock *BB) { + // This implementation doesn't currently consider undef operands + // specially. Theoretically, two phis which are identical except for + // one having an undef where the other doesn't could be collapsed. + + bool Changed = false; + + // Examine each PHI. + // Note that increment of I must *NOT* be in the iteration_expression, since + // we don't want to immediately advance when we restart from the beginning. + for (auto I = BB->begin(); PHINode *PN = dyn_cast<PHINode>(I);) { + ++I; + // Is there an identical PHI node in this basic block? + // Note that we only look in the upper square's triangle, + // we already checked that the lower triangle PHI's aren't identical. + for (auto J = I; PHINode *DuplicatePN = dyn_cast<PHINode>(J); ++J) { + if (!DuplicatePN->isIdenticalToWhenDefined(PN)) + continue; + // A duplicate. Replace this PHI with the base PHI. + ++NumPHICSEs; + DuplicatePN->replaceAllUsesWith(PN); + DuplicatePN->eraseFromParent(); + Changed = true; + + // The RAUW can change PHIs that we already visited. + I = BB->begin(); + break; // Start over from the beginning. + } + } + return Changed; +} + +static bool EliminateDuplicatePHINodesSetBasedImpl(BasicBlock *BB) { // This implementation doesn't currently consider undef operands // specially. Theoretically, two phis which are identical except for // one having an undef where the other doesn't could be collapsed. - bool Changed = false; - - // Examine each PHI. - // Note that increment of I must *NOT* be in the iteration_expression, since - // we don't want to immediately advance when we restart from the beginning. - for (auto I = BB->begin(); PHINode *PN = dyn_cast<PHINode>(I);) { - ++I; - // Is there an identical PHI node in this basic block? - // Note that we only look in the upper square's triangle, - // we already checked that the lower triangle PHI's aren't identical. - for (auto J = I; PHINode *DuplicatePN = dyn_cast<PHINode>(J); ++J) { - if (!DuplicatePN->isIdenticalToWhenDefined(PN)) - continue; - // A duplicate. Replace this PHI with the base PHI. - ++NumPHICSEs; - DuplicatePN->replaceAllUsesWith(PN); - DuplicatePN->eraseFromParent(); - Changed = true; - - // The RAUW can change PHIs that we already visited. - I = BB->begin(); - break; // Start over from the beginning. - } - } - return Changed; -} - -static bool EliminateDuplicatePHINodesSetBasedImpl(BasicBlock *BB) { - // This implementation doesn't currently consider undef operands - // specially. Theoretically, two phis which are identical except for - // one having an undef where the other doesn't could be collapsed. - struct PHIDenseMapInfo { static PHINode *getEmptyKey() { return DenseMapInfo<PHINode *>::getEmptyKey(); @@ -1190,13 +1190,13 @@ static bool EliminateDuplicatePHINodesSetBasedImpl(BasicBlock *BB) { return DenseMapInfo<PHINode *>::getTombstoneKey(); } - static bool isSentinel(PHINode *PN) { - return PN == getEmptyKey() || PN == getTombstoneKey(); - } - - // WARNING: this logic must be kept in sync with - // Instruction::isIdenticalToWhenDefined()! - static unsigned getHashValueImpl(PHINode *PN) { + static bool isSentinel(PHINode *PN) { + return PN == getEmptyKey() || PN == getTombstoneKey(); + } + + // WARNING: this logic must be kept in sync with + // Instruction::isIdenticalToWhenDefined()! + static unsigned getHashValueImpl(PHINode *PN) { // Compute a hash value on the operands. Instcombine will likely have // sorted them, which helps expose duplicates, but we have to check all // the operands to be safe in case instcombine hasn't run. @@ -1205,37 +1205,37 @@ static bool EliminateDuplicatePHINodesSetBasedImpl(BasicBlock *BB) { hash_combine_range(PN->block_begin(), PN->block_end()))); } - static unsigned getHashValue(PHINode *PN) { -#ifndef NDEBUG - // If -phicse-debug-hash was specified, return a constant -- this - // will force all hashing to collide, so we'll exhaustively search - // the table for a match, and the assertion in isEqual will fire if - // there's a bug causing equal keys to hash differently. - if (PHICSEDebugHash) - return 0; -#endif - return getHashValueImpl(PN); - } - - static bool isEqualImpl(PHINode *LHS, PHINode *RHS) { - if (isSentinel(LHS) || isSentinel(RHS)) + static unsigned getHashValue(PHINode *PN) { +#ifndef NDEBUG + // If -phicse-debug-hash was specified, return a constant -- this + // will force all hashing to collide, so we'll exhaustively search + // the table for a match, and the assertion in isEqual will fire if + // there's a bug causing equal keys to hash differently. + if (PHICSEDebugHash) + return 0; +#endif + return getHashValueImpl(PN); + } + + static bool isEqualImpl(PHINode *LHS, PHINode *RHS) { + if (isSentinel(LHS) || isSentinel(RHS)) return LHS == RHS; return LHS->isIdenticalTo(RHS); } - - static bool isEqual(PHINode *LHS, PHINode *RHS) { - // These comparisons are nontrivial, so assert that equality implies - // hash equality (DenseMap demands this as an invariant). - bool Result = isEqualImpl(LHS, RHS); - assert(!Result || (isSentinel(LHS) && LHS == RHS) || - getHashValueImpl(LHS) == getHashValueImpl(RHS)); - return Result; - } + + static bool isEqual(PHINode *LHS, PHINode *RHS) { + // These comparisons are nontrivial, so assert that equality implies + // hash equality (DenseMap demands this as an invariant). + bool Result = isEqualImpl(LHS, RHS); + assert(!Result || (isSentinel(LHS) && LHS == RHS) || + getHashValueImpl(LHS) == getHashValueImpl(RHS)); + return Result; + } }; // Set of unique PHINodes. DenseSet<PHINode *, PHIDenseMapInfo> PHISet; - PHISet.reserve(4 * PHICSENumPHISmallSize); + PHISet.reserve(4 * PHICSENumPHISmallSize); // Examine each PHI. bool Changed = false; @@ -1243,7 +1243,7 @@ static bool EliminateDuplicatePHINodesSetBasedImpl(BasicBlock *BB) { auto Inserted = PHISet.insert(PN); if (!Inserted.second) { // A duplicate. Replace this PHI with its duplicate. - ++NumPHICSEs; + ++NumPHICSEs; PN->replaceAllUsesWith(*Inserted.first); PN->eraseFromParent(); Changed = true; @@ -1258,63 +1258,63 @@ static bool EliminateDuplicatePHINodesSetBasedImpl(BasicBlock *BB) { return Changed; } -bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { - if ( -#ifndef NDEBUG - !PHICSEDebugHash && -#endif - hasNItemsOrLess(BB->phis(), PHICSENumPHISmallSize)) - return EliminateDuplicatePHINodesNaiveImpl(BB); - return EliminateDuplicatePHINodesSetBasedImpl(BB); -} - -/// If the specified pointer points to an object that we control, try to modify -/// the object's alignment to PrefAlign. Returns a minimum known alignment of -/// the value after the operation, which may be lower than PrefAlign. -/// -/// Increating value alignment isn't often possible though. If alignment is -/// important, a more reliable approach is to simply align all global variables -/// and allocation instructions to their preferred alignment from the beginning. -static Align tryEnforceAlignment(Value *V, Align PrefAlign, - const DataLayout &DL) { +bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { + if ( +#ifndef NDEBUG + !PHICSEDebugHash && +#endif + hasNItemsOrLess(BB->phis(), PHICSENumPHISmallSize)) + return EliminateDuplicatePHINodesNaiveImpl(BB); + return EliminateDuplicatePHINodesSetBasedImpl(BB); +} + +/// If the specified pointer points to an object that we control, try to modify +/// the object's alignment to PrefAlign. Returns a minimum known alignment of +/// the value after the operation, which may be lower than PrefAlign. +/// +/// Increating value alignment isn't often possible though. If alignment is +/// important, a more reliable approach is to simply align all global variables +/// and allocation instructions to their preferred alignment from the beginning. +static Align tryEnforceAlignment(Value *V, Align PrefAlign, + const DataLayout &DL) { V = V->stripPointerCasts(); if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) { - // TODO: Ideally, this function would not be called if PrefAlign is smaller - // than the current alignment, as the known bits calculation should have - // already taken it into account. However, this is not always the case, - // as computeKnownBits() has a depth limit, while stripPointerCasts() - // doesn't. - Align CurrentAlign = AI->getAlign(); - if (PrefAlign <= CurrentAlign) - return CurrentAlign; + // TODO: Ideally, this function would not be called if PrefAlign is smaller + // than the current alignment, as the known bits calculation should have + // already taken it into account. However, this is not always the case, + // as computeKnownBits() has a depth limit, while stripPointerCasts() + // doesn't. + Align CurrentAlign = AI->getAlign(); + if (PrefAlign <= CurrentAlign) + return CurrentAlign; // If the preferred alignment is greater than the natural stack alignment // then don't round up. This avoids dynamic stack realignment. if (DL.exceedsNaturalStackAlignment(PrefAlign)) - return CurrentAlign; + return CurrentAlign; AI->setAlignment(PrefAlign); return PrefAlign; } if (auto *GO = dyn_cast<GlobalObject>(V)) { // TODO: as above, this shouldn't be necessary. - Align CurrentAlign = GO->getPointerAlignment(DL); - if (PrefAlign <= CurrentAlign) - return CurrentAlign; + Align CurrentAlign = GO->getPointerAlignment(DL); + if (PrefAlign <= CurrentAlign) + return CurrentAlign; // If there is a large requested alignment and we can, bump up the alignment // of the global. If the memory we set aside for the global may not be the // memory used by the final program then it is impossible for us to reliably // enforce the preferred alignment. if (!GO->canIncreaseAlignment()) - return CurrentAlign; + return CurrentAlign; GO->setAlignment(PrefAlign); return PrefAlign; } - return Align(1); + return Align(1); } Align llvm::getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, @@ -1336,7 +1336,7 @@ Align llvm::getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, Align Alignment = Align(1ull << std::min(Known.getBitWidth() - 1, TrailZ)); if (PrefAlign && *PrefAlign > Alignment) - Alignment = std::max(Alignment, tryEnforceAlignment(V, *PrefAlign, DL)); + Alignment = std::max(Alignment, tryEnforceAlignment(V, *PrefAlign, DL)); // We don't need to make any adjustment. return Alignment; @@ -1374,22 +1374,22 @@ static bool PhiHasDebugValue(DILocalVariable *DIVar, /// least n bits. static bool valueCoversEntireFragment(Type *ValTy, DbgVariableIntrinsic *DII) { const DataLayout &DL = DII->getModule()->getDataLayout(); - TypeSize ValueSize = DL.getTypeAllocSizeInBits(ValTy); - if (Optional<uint64_t> FragmentSize = DII->getFragmentSizeInBits()) { - assert(!ValueSize.isScalable() && - "Fragments don't work on scalable types."); - return ValueSize.getFixedSize() >= *FragmentSize; - } + TypeSize ValueSize = DL.getTypeAllocSizeInBits(ValTy); + if (Optional<uint64_t> FragmentSize = DII->getFragmentSizeInBits()) { + assert(!ValueSize.isScalable() && + "Fragments don't work on scalable types."); + return ValueSize.getFixedSize() >= *FragmentSize; + } // We can't always calculate the size of the DI variable (e.g. if it is a // VLA). Try to use the size of the alloca that the dbg intrinsic describes // intead. if (DII->isAddressOfVariable()) if (auto *AI = dyn_cast_or_null<AllocaInst>(DII->getVariableLocation())) - if (Optional<TypeSize> FragmentSize = AI->getAllocationSizeInBits(DL)) { - assert(ValueSize.isScalable() == FragmentSize->isScalable() && - "Both sizes should agree on the scalable flag."); - return TypeSize::isKnownGE(ValueSize, *FragmentSize); - } + if (Optional<TypeSize> FragmentSize = AI->getAllocationSizeInBits(DL)) { + assert(ValueSize.isScalable() == FragmentSize->isScalable() && + "Both sizes should agree on the scalable flag."); + return TypeSize::isKnownGE(ValueSize, *FragmentSize); + } // Could not determine size of variable. Conservatively return false. return false; } @@ -1404,7 +1404,7 @@ static DebugLoc getDebugValueLoc(DbgVariableIntrinsic *DII, Instruction *Src) { MDNode *Scope = DeclareLoc.getScope(); DILocation *InlinedAt = DeclareLoc.getInlinedAt(); // Produce an unknown location with the correct scope / inlinedAt fields. - return DILocation::get(DII->getContext(), 0, 0, Scope, InlinedAt); + return DILocation::get(DII->getContext(), 0, 0, Scope, InlinedAt); } /// Inserts a llvm.dbg.value intrinsic before a store to an alloca'd value @@ -2021,10 +2021,10 @@ bool llvm::replaceAllDbgUsesWith(Instruction &From, Value &To, return false; } -std::pair<unsigned, unsigned> -llvm::removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB) { +std::pair<unsigned, unsigned> +llvm::removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB) { unsigned NumDeadInst = 0; - unsigned NumDeadDbgInst = 0; + unsigned NumDeadDbgInst = 0; // Delete the instructions backwards, as it has a reduced likelihood of // having to update as many def-use and use-def chains. Instruction *EndInst = BB->getTerminator(); // Last not to be deleted. @@ -2037,13 +2037,13 @@ llvm::removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB) { EndInst = Inst; continue; } - if (isa<DbgInfoIntrinsic>(Inst)) - ++NumDeadDbgInst; - else + if (isa<DbgInfoIntrinsic>(Inst)) + ++NumDeadDbgInst; + else ++NumDeadInst; Inst->eraseFromParent(); } - return {NumDeadInst, NumDeadDbgInst}; + return {NumDeadInst, NumDeadDbgInst}; } unsigned llvm::changeToUnreachable(Instruction *I, bool UseLLVMTrap, @@ -2054,14 +2054,14 @@ unsigned llvm::changeToUnreachable(Instruction *I, bool UseLLVMTrap, if (MSSAU) MSSAU->changeToUnreachable(I); - SmallSetVector<BasicBlock *, 8> UniqueSuccessors; - + SmallSetVector<BasicBlock *, 8> UniqueSuccessors; + // Loop over all of the successors, removing BB's entry from any PHI // nodes. for (BasicBlock *Successor : successors(BB)) { Successor->removePredecessor(BB, PreserveLCSSA); if (DTU) - UniqueSuccessors.insert(Successor); + UniqueSuccessors.insert(Successor); } // Insert a call to llvm.trap right before this. This turns the undefined // behavior into a hard fail instead of falling through into random code. @@ -2083,18 +2083,18 @@ unsigned llvm::changeToUnreachable(Instruction *I, bool UseLLVMTrap, BB->getInstList().erase(BBI++); ++NumInstrsRemoved; } - if (DTU) { - SmallVector<DominatorTree::UpdateType, 8> Updates; - Updates.reserve(UniqueSuccessors.size()); - for (BasicBlock *UniqueSuccessor : UniqueSuccessors) - Updates.push_back({DominatorTree::Delete, BB, UniqueSuccessor}); - DTU->applyUpdates(Updates); - } + if (DTU) { + SmallVector<DominatorTree::UpdateType, 8> Updates; + Updates.reserve(UniqueSuccessors.size()); + for (BasicBlock *UniqueSuccessor : UniqueSuccessors) + Updates.push_back({DominatorTree::Delete, BB, UniqueSuccessor}); + DTU->applyUpdates(Updates); + } return NumInstrsRemoved; } CallInst *llvm::createCallMatchingInvoke(InvokeInst *II) { - SmallVector<Value *, 8> Args(II->args()); + SmallVector<Value *, 8> Args(II->args()); SmallVector<OperandBundleDef, 1> OpBundles; II->getOperandBundlesAsDefs(OpBundles); CallInst *NewCall = CallInst::Create(II->getFunctionType(), @@ -2135,7 +2135,7 @@ void llvm::changeToCall(InvokeInst *II, DomTreeUpdater *DTU) { UnwindDestBB->removePredecessor(BB); II->eraseFromParent(); if (DTU) - DTU->applyUpdates({{DominatorTree::Delete, BB, UnwindDestBB}}); + DTU->applyUpdates({{DominatorTree::Delete, BB, UnwindDestBB}}); } BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI, @@ -2151,7 +2151,7 @@ BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI, BB->getInstList().pop_back(); // Create the new invoke instruction. - SmallVector<Value *, 8> InvokeArgs(CI->args()); + SmallVector<Value *, 8> InvokeArgs(CI->args()); SmallVector<OperandBundleDef, 1> OpBundles; CI->getOperandBundlesAsDefs(OpBundles); @@ -2282,7 +2282,7 @@ static bool markAliveBlocks(Function &F, UnwindDestBB->removePredecessor(II->getParent()); II->eraseFromParent(); if (DTU) - DTU->applyUpdates({{DominatorTree::Delete, BB, UnwindDestBB}}); + DTU->applyUpdates({{DominatorTree::Delete, BB, UnwindDestBB}}); } else changeToCall(II, DTU); Changed = true; @@ -2311,7 +2311,7 @@ static bool markAliveBlocks(Function &F, } }; - SmallMapVector<BasicBlock *, int, 8> NumPerSuccessorCases; + SmallMapVector<BasicBlock *, int, 8> NumPerSuccessorCases; // Set of unique CatchPads. SmallDenseMap<CatchPadInst *, detail::DenseSetEmpty, 4, CatchPadDenseMapInfo, detail::DenseSetPair<CatchPadInst *>> @@ -2321,22 +2321,22 @@ static bool markAliveBlocks(Function &F, E = CatchSwitch->handler_end(); I != E; ++I) { BasicBlock *HandlerBB = *I; - ++NumPerSuccessorCases[HandlerBB]; + ++NumPerSuccessorCases[HandlerBB]; auto *CatchPad = cast<CatchPadInst>(HandlerBB->getFirstNonPHI()); if (!HandlerSet.insert({CatchPad, Empty}).second) { - --NumPerSuccessorCases[HandlerBB]; + --NumPerSuccessorCases[HandlerBB]; CatchSwitch->removeHandler(I); --I; --E; Changed = true; } } - std::vector<DominatorTree::UpdateType> Updates; - for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases) - if (I.second == 0) - Updates.push_back({DominatorTree::Delete, BB, I.first}); - if (DTU) - DTU->applyUpdates(Updates); + std::vector<DominatorTree::UpdateType> Updates; + for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases) + if (I.second == 0) + Updates.push_back({DominatorTree::Delete, BB, I.first}); + if (DTU) + DTU->applyUpdates(Updates); } Changed |= ConstantFoldTerminator(BB, true, nullptr, DTU); @@ -2380,7 +2380,7 @@ void llvm::removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU) { TI->replaceAllUsesWith(NewTI); TI->eraseFromParent(); if (DTU) - DTU->applyUpdates({{DominatorTree::Delete, BB, UnwindDest}}); + DTU->applyUpdates({{DominatorTree::Delete, BB, UnwindDest}}); } /// removeUnreachableBlocks - Remove blocks that are not reachable, even @@ -2397,38 +2397,38 @@ bool llvm::removeUnreachableBlocks(Function &F, DomTreeUpdater *DTU, assert(Reachable.size() < F.size()); - // Are there any blocks left to actually delete? - SmallSetVector<BasicBlock *, 8> BlocksToRemove; + // Are there any blocks left to actually delete? + SmallSetVector<BasicBlock *, 8> BlocksToRemove; for (BasicBlock &BB : F) { // Skip reachable basic blocks if (Reachable.count(&BB)) continue; - // Skip already-deleted blocks - if (DTU && DTU->isBBPendingDeletion(&BB)) - continue; - BlocksToRemove.insert(&BB); - } - - if (BlocksToRemove.empty()) - return Changed; - - Changed = true; - NumRemoved += BlocksToRemove.size(); - + // Skip already-deleted blocks + if (DTU && DTU->isBBPendingDeletion(&BB)) + continue; + BlocksToRemove.insert(&BB); + } + + if (BlocksToRemove.empty()) + return Changed; + + Changed = true; + NumRemoved += BlocksToRemove.size(); + if (MSSAU) - MSSAU->removeBlocks(BlocksToRemove); + MSSAU->removeBlocks(BlocksToRemove); - // Loop over all of the basic blocks that are up for removal, dropping all of + // Loop over all of the basic blocks that are up for removal, dropping all of // their internal references. Update DTU if available. std::vector<DominatorTree::UpdateType> Updates; - for (auto *BB : BlocksToRemove) { - SmallSetVector<BasicBlock *, 8> UniqueSuccessors; + for (auto *BB : BlocksToRemove) { + SmallSetVector<BasicBlock *, 8> UniqueSuccessors; for (BasicBlock *Successor : successors(BB)) { - // Only remove references to BB in reachable successors of BB. - if (Reachable.count(Successor)) + // Only remove references to BB in reachable successors of BB. + if (Reachable.count(Successor)) Successor->removePredecessor(BB); if (DTU) - UniqueSuccessors.insert(Successor); + UniqueSuccessors.insert(Successor); } BB->dropAllReferences(); if (DTU) { @@ -2442,22 +2442,22 @@ bool llvm::removeUnreachableBlocks(Function &F, DomTreeUpdater *DTU, new UnreachableInst(BB->getContext(), BB); assert(succ_empty(BB) && "The successor list of BB isn't empty before " "applying corresponding DTU updates."); - Updates.reserve(Updates.size() + UniqueSuccessors.size()); - for (auto *UniqueSuccessor : UniqueSuccessors) - Updates.push_back({DominatorTree::Delete, BB, UniqueSuccessor}); + Updates.reserve(Updates.size() + UniqueSuccessors.size()); + for (auto *UniqueSuccessor : UniqueSuccessors) + Updates.push_back({DominatorTree::Delete, BB, UniqueSuccessor}); } } if (DTU) { - DTU->applyUpdates(Updates); - for (auto *BB : BlocksToRemove) + DTU->applyUpdates(Updates); + for (auto *BB : BlocksToRemove) DTU->deleteBB(BB); } else { - for (auto *BB : BlocksToRemove) + for (auto *BB : BlocksToRemove) BB->eraseFromParent(); } - return Changed; + return Changed; } void llvm::combineMetadata(Instruction *K, const Instruction *J, @@ -2702,13 +2702,13 @@ bool llvm::callsGCLeafFunction(const CallBase *Call, if (F->hasFnAttribute("gc-leaf-function")) return true; - if (auto IID = F->getIntrinsicID()) { + if (auto IID = F->getIntrinsicID()) { // Most LLVM intrinsics do not take safepoints. return IID != Intrinsic::experimental_gc_statepoint && - IID != Intrinsic::experimental_deoptimize && - IID != Intrinsic::memcpy_element_unordered_atomic && - IID != Intrinsic::memmove_element_unordered_atomic; - } + IID != Intrinsic::experimental_deoptimize && + IID != Intrinsic::memcpy_element_unordered_atomic && + IID != Intrinsic::memmove_element_unordered_atomic; + } } // Lib calls can be materialized by some passes, and won't be @@ -2836,7 +2836,7 @@ struct BitPart { /// Analyze the specified subexpression and see if it is capable of providing /// pieces of a bswap or bitreverse. The subexpression provides a potential -/// piece of a bswap or bitreverse if it can be proved that each non-zero bit in +/// piece of a bswap or bitreverse if it can be proved that each non-zero bit in /// the output of the expression came from a corresponding bit in some other /// value. This function is recursive, and the end result is a mapping of /// bitnumber to bitnumber. It is the caller's responsibility to validate that @@ -2848,10 +2848,10 @@ struct BitPart { /// BitPart is returned with Provider set to %X and Provenance[24-31] set to /// [0-7]. /// -/// For vector types, all analysis is performed at the per-element level. No -/// cross-element analysis is supported (shuffle/insertion/reduction), and all -/// constant masks must be splatted across all elements. -/// +/// For vector types, all analysis is performed at the per-element level. No +/// cross-element analysis is supported (shuffle/insertion/reduction), and all +/// constant masks must be splatted across all elements. +/// /// To avoid revisiting values, the BitPart results are memoized into the /// provided map. To avoid unnecessary copying of BitParts, BitParts are /// constructed in-place in the \c BPS map. Because of this \c BPS needs to @@ -2869,7 +2869,7 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, return I->second; auto &Result = BPS[V] = None; - auto BitWidth = V->getType()->getScalarSizeInBits(); + auto BitWidth = V->getType()->getScalarSizeInBits(); // Prevent stack overflow by limiting the recursion depth if (Depth == BitPartRecursionMaxDepth) { @@ -2877,16 +2877,16 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, return Result; } - if (auto *I = dyn_cast<Instruction>(V)) { - Value *X, *Y; - const APInt *C; - + if (auto *I = dyn_cast<Instruction>(V)) { + Value *X, *Y; + const APInt *C; + // If this is an or instruction, it may be an inner node of the bswap. - if (match(V, m_Or(m_Value(X), m_Value(Y)))) { - const auto &A = - collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, Depth + 1); - const auto &B = - collectBitParts(Y, MatchBSwaps, MatchBitReversals, BPS, Depth + 1); + if (match(V, m_Or(m_Value(X), m_Value(Y)))) { + const auto &A = + collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, Depth + 1); + const auto &B = + collectBitParts(Y, MatchBSwaps, MatchBitReversals, BPS, Depth + 1); if (!A || !B) return Result; @@ -2895,31 +2895,31 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, return Result; Result = BitPart(A->Provider, BitWidth); - for (unsigned BitIdx = 0; BitIdx < BitWidth; ++BitIdx) { - if (A->Provenance[BitIdx] != BitPart::Unset && - B->Provenance[BitIdx] != BitPart::Unset && - A->Provenance[BitIdx] != B->Provenance[BitIdx]) + for (unsigned BitIdx = 0; BitIdx < BitWidth; ++BitIdx) { + if (A->Provenance[BitIdx] != BitPart::Unset && + B->Provenance[BitIdx] != BitPart::Unset && + A->Provenance[BitIdx] != B->Provenance[BitIdx]) return Result = None; - if (A->Provenance[BitIdx] == BitPart::Unset) - Result->Provenance[BitIdx] = B->Provenance[BitIdx]; + if (A->Provenance[BitIdx] == BitPart::Unset) + Result->Provenance[BitIdx] = B->Provenance[BitIdx]; else - Result->Provenance[BitIdx] = A->Provenance[BitIdx]; + Result->Provenance[BitIdx] = A->Provenance[BitIdx]; } return Result; } // If this is a logical shift by a constant, recurse then shift the result. - if (match(V, m_LogicalShift(m_Value(X), m_APInt(C)))) { - const APInt &BitShift = *C; - + if (match(V, m_LogicalShift(m_Value(X), m_APInt(C)))) { + const APInt &BitShift = *C; + // Ensure the shift amount is defined. - if (BitShift.uge(BitWidth)) + if (BitShift.uge(BitWidth)) return Result; - const auto &Res = - collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, Depth + 1); + const auto &Res = + collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, Depth + 1); if (!Res) return Result; Result = Res; @@ -2927,11 +2927,11 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, // Perform the "shift" on BitProvenance. auto &P = Result->Provenance; if (I->getOpcode() == Instruction::Shl) { - P.erase(std::prev(P.end(), BitShift.getZExtValue()), P.end()); - P.insert(P.begin(), BitShift.getZExtValue(), BitPart::Unset); + P.erase(std::prev(P.end(), BitShift.getZExtValue()), P.end()); + P.insert(P.begin(), BitShift.getZExtValue(), BitPart::Unset); } else { - P.erase(P.begin(), std::next(P.begin(), BitShift.getZExtValue())); - P.insert(P.end(), BitShift.getZExtValue(), BitPart::Unset); + P.erase(P.begin(), std::next(P.begin(), BitShift.getZExtValue())); + P.insert(P.end(), BitShift.getZExtValue(), BitPart::Unset); } return Result; @@ -2939,111 +2939,111 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, // If this is a logical 'and' with a mask that clears bits, recurse then // unset the appropriate bits. - if (match(V, m_And(m_Value(X), m_APInt(C)))) { - const APInt &AndMask = *C; + if (match(V, m_And(m_Value(X), m_APInt(C)))) { + const APInt &AndMask = *C; // Check that the mask allows a multiple of 8 bits for a bswap, for an // early exit. unsigned NumMaskedBits = AndMask.countPopulation(); - if (!MatchBitReversals && (NumMaskedBits % 8) != 0) + if (!MatchBitReversals && (NumMaskedBits % 8) != 0) return Result; - const auto &Res = - collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, Depth + 1); + const auto &Res = + collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, Depth + 1); if (!Res) return Result; Result = Res; - for (unsigned BitIdx = 0; BitIdx < BitWidth; ++BitIdx) + for (unsigned BitIdx = 0; BitIdx < BitWidth; ++BitIdx) // If the AndMask is zero for this bit, clear the bit. - if (AndMask[BitIdx] == 0) - Result->Provenance[BitIdx] = BitPart::Unset; + if (AndMask[BitIdx] == 0) + Result->Provenance[BitIdx] = BitPart::Unset; return Result; } // If this is a zext instruction zero extend the result. - if (match(V, m_ZExt(m_Value(X)))) { - const auto &Res = - collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, Depth + 1); + if (match(V, m_ZExt(m_Value(X)))) { + const auto &Res = + collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, Depth + 1); + if (!Res) + return Result; + + Result = BitPart(Res->Provider, BitWidth); + auto NarrowBitWidth = X->getType()->getScalarSizeInBits(); + for (unsigned BitIdx = 0; BitIdx < NarrowBitWidth; ++BitIdx) + Result->Provenance[BitIdx] = Res->Provenance[BitIdx]; + for (unsigned BitIdx = NarrowBitWidth; BitIdx < BitWidth; ++BitIdx) + Result->Provenance[BitIdx] = BitPart::Unset; + return Result; + } + + // BITREVERSE - most likely due to us previous matching a partial + // bitreverse. + if (match(V, m_BitReverse(m_Value(X)))) { + const auto &Res = + collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, Depth + 1); if (!Res) return Result; Result = BitPart(Res->Provider, BitWidth); - auto NarrowBitWidth = X->getType()->getScalarSizeInBits(); - for (unsigned BitIdx = 0; BitIdx < NarrowBitWidth; ++BitIdx) - Result->Provenance[BitIdx] = Res->Provenance[BitIdx]; - for (unsigned BitIdx = NarrowBitWidth; BitIdx < BitWidth; ++BitIdx) - Result->Provenance[BitIdx] = BitPart::Unset; + for (unsigned BitIdx = 0; BitIdx < BitWidth; ++BitIdx) + Result->Provenance[(BitWidth - 1) - BitIdx] = Res->Provenance[BitIdx]; + return Result; + } + + // BSWAP - most likely due to us previous matching a partial bswap. + if (match(V, m_BSwap(m_Value(X)))) { + const auto &Res = + collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, Depth + 1); + if (!Res) + return Result; + + unsigned ByteWidth = BitWidth / 8; + Result = BitPart(Res->Provider, BitWidth); + for (unsigned ByteIdx = 0; ByteIdx < ByteWidth; ++ByteIdx) { + unsigned ByteBitOfs = ByteIdx * 8; + for (unsigned BitIdx = 0; BitIdx < 8; ++BitIdx) + Result->Provenance[(BitWidth - 8 - ByteBitOfs) + BitIdx] = + Res->Provenance[ByteBitOfs + BitIdx]; + } + return Result; + } + + // Funnel 'double' shifts take 3 operands, 2 inputs and the shift + // amount (modulo). + // fshl(X,Y,Z): (X << (Z % BW)) | (Y >> (BW - (Z % BW))) + // fshr(X,Y,Z): (X << (BW - (Z % BW))) | (Y >> (Z % BW)) + if (match(V, m_FShl(m_Value(X), m_Value(Y), m_APInt(C))) || + match(V, m_FShr(m_Value(X), m_Value(Y), m_APInt(C)))) { + // We can treat fshr as a fshl by flipping the modulo amount. + unsigned ModAmt = C->urem(BitWidth); + if (cast<IntrinsicInst>(I)->getIntrinsicID() == Intrinsic::fshr) + ModAmt = BitWidth - ModAmt; + + const auto &LHS = + collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, Depth + 1); + const auto &RHS = + collectBitParts(Y, MatchBSwaps, MatchBitReversals, BPS, Depth + 1); + + // Check we have both sources and they are from the same provider. + if (!LHS || !RHS || !LHS->Provider || LHS->Provider != RHS->Provider) + return Result; + + unsigned StartBitRHS = BitWidth - ModAmt; + Result = BitPart(LHS->Provider, BitWidth); + for (unsigned BitIdx = 0; BitIdx < StartBitRHS; ++BitIdx) + Result->Provenance[BitIdx + ModAmt] = LHS->Provenance[BitIdx]; + for (unsigned BitIdx = 0; BitIdx < ModAmt; ++BitIdx) + Result->Provenance[BitIdx] = RHS->Provenance[BitIdx + StartBitRHS]; return Result; } - - // BITREVERSE - most likely due to us previous matching a partial - // bitreverse. - if (match(V, m_BitReverse(m_Value(X)))) { - const auto &Res = - collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, Depth + 1); - if (!Res) - return Result; - - Result = BitPart(Res->Provider, BitWidth); - for (unsigned BitIdx = 0; BitIdx < BitWidth; ++BitIdx) - Result->Provenance[(BitWidth - 1) - BitIdx] = Res->Provenance[BitIdx]; - return Result; - } - - // BSWAP - most likely due to us previous matching a partial bswap. - if (match(V, m_BSwap(m_Value(X)))) { - const auto &Res = - collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, Depth + 1); - if (!Res) - return Result; - - unsigned ByteWidth = BitWidth / 8; - Result = BitPart(Res->Provider, BitWidth); - for (unsigned ByteIdx = 0; ByteIdx < ByteWidth; ++ByteIdx) { - unsigned ByteBitOfs = ByteIdx * 8; - for (unsigned BitIdx = 0; BitIdx < 8; ++BitIdx) - Result->Provenance[(BitWidth - 8 - ByteBitOfs) + BitIdx] = - Res->Provenance[ByteBitOfs + BitIdx]; - } - return Result; - } - - // Funnel 'double' shifts take 3 operands, 2 inputs and the shift - // amount (modulo). - // fshl(X,Y,Z): (X << (Z % BW)) | (Y >> (BW - (Z % BW))) - // fshr(X,Y,Z): (X << (BW - (Z % BW))) | (Y >> (Z % BW)) - if (match(V, m_FShl(m_Value(X), m_Value(Y), m_APInt(C))) || - match(V, m_FShr(m_Value(X), m_Value(Y), m_APInt(C)))) { - // We can treat fshr as a fshl by flipping the modulo amount. - unsigned ModAmt = C->urem(BitWidth); - if (cast<IntrinsicInst>(I)->getIntrinsicID() == Intrinsic::fshr) - ModAmt = BitWidth - ModAmt; - - const auto &LHS = - collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, Depth + 1); - const auto &RHS = - collectBitParts(Y, MatchBSwaps, MatchBitReversals, BPS, Depth + 1); - - // Check we have both sources and they are from the same provider. - if (!LHS || !RHS || !LHS->Provider || LHS->Provider != RHS->Provider) - return Result; - - unsigned StartBitRHS = BitWidth - ModAmt; - Result = BitPart(LHS->Provider, BitWidth); - for (unsigned BitIdx = 0; BitIdx < StartBitRHS; ++BitIdx) - Result->Provenance[BitIdx + ModAmt] = LHS->Provenance[BitIdx]; - for (unsigned BitIdx = 0; BitIdx < ModAmt; ++BitIdx) - Result->Provenance[BitIdx] = RHS->Provenance[BitIdx + StartBitRHS]; - return Result; - } } // Okay, we got to something that isn't a shift, 'or' or 'and'. This must be // the input value to the bswap/bitreverse. Result = BitPart(V, BitWidth); - for (unsigned BitIdx = 0; BitIdx < BitWidth; ++BitIdx) - Result->Provenance[BitIdx] = BitIdx; + for (unsigned BitIdx = 0; BitIdx < BitWidth; ++BitIdx) + Result->Provenance[BitIdx] = BitIdx; return Result; } @@ -3070,89 +3070,89 @@ bool llvm::recognizeBSwapOrBitReverseIdiom( return false; if (!MatchBSwaps && !MatchBitReversals) return false; - Type *ITy = I->getType(); - if (!ITy->isIntOrIntVectorTy() || ITy->getScalarSizeInBits() > 128) - return false; // Can't do integer/elements > 128 bits. + Type *ITy = I->getType(); + if (!ITy->isIntOrIntVectorTy() || ITy->getScalarSizeInBits() > 128) + return false; // Can't do integer/elements > 128 bits. - Type *DemandedTy = ITy; - if (I->hasOneUse()) - if (auto *Trunc = dyn_cast<TruncInst>(I->user_back())) - DemandedTy = Trunc->getType(); + Type *DemandedTy = ITy; + if (I->hasOneUse()) + if (auto *Trunc = dyn_cast<TruncInst>(I->user_back())) + DemandedTy = Trunc->getType(); // Try to find all the pieces corresponding to the bswap. std::map<Value *, Optional<BitPart>> BPS; auto Res = collectBitParts(I, MatchBSwaps, MatchBitReversals, BPS, 0); if (!Res) return false; - ArrayRef<int8_t> BitProvenance = Res->Provenance; - assert(all_of(BitProvenance, - [](int8_t I) { return I == BitPart::Unset || 0 <= I; }) && - "Illegal bit provenance index"); - - // If the upper bits are zero, then attempt to perform as a truncated op. - if (BitProvenance.back() == BitPart::Unset) { - while (!BitProvenance.empty() && BitProvenance.back() == BitPart::Unset) - BitProvenance = BitProvenance.drop_back(); - if (BitProvenance.empty()) - return false; // TODO - handle null value? - DemandedTy = Type::getIntNTy(I->getContext(), BitProvenance.size()); - if (auto *IVecTy = dyn_cast<VectorType>(ITy)) - DemandedTy = VectorType::get(DemandedTy, IVecTy); - } - - // Check BitProvenance hasn't found a source larger than the result type. - unsigned DemandedBW = DemandedTy->getScalarSizeInBits(); - if (DemandedBW > ITy->getScalarSizeInBits()) - return false; - + ArrayRef<int8_t> BitProvenance = Res->Provenance; + assert(all_of(BitProvenance, + [](int8_t I) { return I == BitPart::Unset || 0 <= I; }) && + "Illegal bit provenance index"); + + // If the upper bits are zero, then attempt to perform as a truncated op. + if (BitProvenance.back() == BitPart::Unset) { + while (!BitProvenance.empty() && BitProvenance.back() == BitPart::Unset) + BitProvenance = BitProvenance.drop_back(); + if (BitProvenance.empty()) + return false; // TODO - handle null value? + DemandedTy = Type::getIntNTy(I->getContext(), BitProvenance.size()); + if (auto *IVecTy = dyn_cast<VectorType>(ITy)) + DemandedTy = VectorType::get(DemandedTy, IVecTy); + } + + // Check BitProvenance hasn't found a source larger than the result type. + unsigned DemandedBW = DemandedTy->getScalarSizeInBits(); + if (DemandedBW > ITy->getScalarSizeInBits()) + return false; + // Now, is the bit permutation correct for a bswap or a bitreverse? We can // only byteswap values with an even number of bytes. - APInt DemandedMask = APInt::getAllOnesValue(DemandedBW); - bool OKForBSwap = MatchBSwaps && (DemandedBW % 16) == 0; - bool OKForBitReverse = MatchBitReversals; - for (unsigned BitIdx = 0; - (BitIdx < DemandedBW) && (OKForBSwap || OKForBitReverse); ++BitIdx) { - if (BitProvenance[BitIdx] == BitPart::Unset) { - DemandedMask.clearBit(BitIdx); - continue; - } - OKForBSwap &= bitTransformIsCorrectForBSwap(BitProvenance[BitIdx], BitIdx, - DemandedBW); - OKForBitReverse &= bitTransformIsCorrectForBitReverse(BitProvenance[BitIdx], - BitIdx, DemandedBW); + APInt DemandedMask = APInt::getAllOnesValue(DemandedBW); + bool OKForBSwap = MatchBSwaps && (DemandedBW % 16) == 0; + bool OKForBitReverse = MatchBitReversals; + for (unsigned BitIdx = 0; + (BitIdx < DemandedBW) && (OKForBSwap || OKForBitReverse); ++BitIdx) { + if (BitProvenance[BitIdx] == BitPart::Unset) { + DemandedMask.clearBit(BitIdx); + continue; + } + OKForBSwap &= bitTransformIsCorrectForBSwap(BitProvenance[BitIdx], BitIdx, + DemandedBW); + OKForBitReverse &= bitTransformIsCorrectForBitReverse(BitProvenance[BitIdx], + BitIdx, DemandedBW); } Intrinsic::ID Intrin; - if (OKForBSwap) + if (OKForBSwap) Intrin = Intrinsic::bswap; - else if (OKForBitReverse) + else if (OKForBitReverse) Intrin = Intrinsic::bitreverse; else return false; - Function *F = Intrinsic::getDeclaration(I->getModule(), Intrin, DemandedTy); - Value *Provider = Res->Provider; - - // We may need to truncate the provider. - if (DemandedTy != Provider->getType()) { - auto *Trunc = - CastInst::CreateIntegerCast(Provider, DemandedTy, false, "trunc", I); - InsertedInsts.push_back(Trunc); - Provider = Trunc; - } - - Instruction *Result = CallInst::Create(F, Provider, "rev", I); - InsertedInsts.push_back(Result); - - if (!DemandedMask.isAllOnesValue()) { - auto *Mask = ConstantInt::get(DemandedTy, DemandedMask); - Result = BinaryOperator::Create(Instruction::And, Result, Mask, "mask", I); - InsertedInsts.push_back(Result); - } - - // We may need to zeroextend back to the result type. - if (ITy != Result->getType()) { - auto *ExtInst = CastInst::CreateIntegerCast(Result, ITy, false, "zext", I); + Function *F = Intrinsic::getDeclaration(I->getModule(), Intrin, DemandedTy); + Value *Provider = Res->Provider; + + // We may need to truncate the provider. + if (DemandedTy != Provider->getType()) { + auto *Trunc = + CastInst::CreateIntegerCast(Provider, DemandedTy, false, "trunc", I); + InsertedInsts.push_back(Trunc); + Provider = Trunc; + } + + Instruction *Result = CallInst::Create(F, Provider, "rev", I); + InsertedInsts.push_back(Result); + + if (!DemandedMask.isAllOnesValue()) { + auto *Mask = ConstantInt::get(DemandedTy, DemandedMask); + Result = BinaryOperator::Create(Instruction::And, Result, Mask, "mask", I); + InsertedInsts.push_back(Result); + } + + // We may need to zeroextend back to the result type. + if (ITy != Result->getType()) { + auto *ExtInst = CastInst::CreateIntegerCast(Result, ITy, false, "zext", I); InsertedInsts.push_back(ExtInst); } diff --git a/contrib/libs/llvm12/lib/Transforms/Utils/LoopPeel.cpp b/contrib/libs/llvm12/lib/Transforms/Utils/LoopPeel.cpp index 10ffb140a7..befacb5917 100644 --- a/contrib/libs/llvm12/lib/Transforms/Utils/LoopPeel.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Utils/LoopPeel.cpp @@ -1,862 +1,862 @@ -//===- LoopPeel.cpp -------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Loop Peeling Utilities. -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Utils/LoopPeel.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/Optional.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/LoopIterator.h" -#include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/ScalarEvolutionExpressions.h" -#include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Instruction.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/IR/MDBuilder.h" -#include "llvm/IR/Metadata.h" -#include "llvm/IR/PatternMatch.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/Transforms/Utils/LoopSimplify.h" -#include "llvm/Transforms/Utils/LoopUtils.h" -#include "llvm/Transforms/Utils/UnrollLoop.h" -#include "llvm/Transforms/Utils/ValueMapper.h" -#include <algorithm> -#include <cassert> -#include <cstdint> -#include <limits> - -using namespace llvm; -using namespace llvm::PatternMatch; - -#define DEBUG_TYPE "loop-peel" - -STATISTIC(NumPeeled, "Number of loops peeled"); - -static cl::opt<unsigned> UnrollPeelCount( - "unroll-peel-count", cl::Hidden, - cl::desc("Set the unroll peeling count, for testing purposes")); - -static cl::opt<bool> - UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden, - cl::desc("Allows loops to be peeled when the dynamic " - "trip count is known to be low.")); - -static cl::opt<bool> - UnrollAllowLoopNestsPeeling("unroll-allow-loop-nests-peeling", - cl::init(false), cl::Hidden, - cl::desc("Allows loop nests to be peeled.")); - -static cl::opt<unsigned> UnrollPeelMaxCount( - "unroll-peel-max-count", cl::init(7), cl::Hidden, - cl::desc("Max average trip count which will cause loop peeling.")); - -static cl::opt<unsigned> UnrollForcePeelCount( - "unroll-force-peel-count", cl::init(0), cl::Hidden, - cl::desc("Force a peel count regardless of profiling information.")); - -static cl::opt<bool> UnrollPeelMultiDeoptExit( - "unroll-peel-multi-deopt-exit", cl::init(true), cl::Hidden, - cl::desc("Allow peeling of loops with multiple deopt exits.")); - -static const char *PeeledCountMetaData = "llvm.loop.peeled.count"; - -// Designates that a Phi is estimated to become invariant after an "infinite" -// number of loop iterations (i.e. only may become an invariant if the loop is -// fully unrolled). -static const unsigned InfiniteIterationsToInvariance = - std::numeric_limits<unsigned>::max(); - -// Check whether we are capable of peeling this loop. -bool llvm::canPeel(Loop *L) { - // Make sure the loop is in simplified form - if (!L->isLoopSimplifyForm()) - return false; - - if (UnrollPeelMultiDeoptExit) { - SmallVector<BasicBlock *, 4> Exits; - L->getUniqueNonLatchExitBlocks(Exits); - - if (!Exits.empty()) { - // Latch's terminator is a conditional branch, Latch is exiting and - // all non Latch exits ends up with deoptimize. - const BasicBlock *Latch = L->getLoopLatch(); - const BranchInst *T = dyn_cast<BranchInst>(Latch->getTerminator()); - return T && T->isConditional() && L->isLoopExiting(Latch) && - all_of(Exits, [](const BasicBlock *BB) { - return BB->getTerminatingDeoptimizeCall(); - }); - } - } - - // Only peel loops that contain a single exit - if (!L->getExitingBlock() || !L->getUniqueExitBlock()) - return false; - - // Don't try to peel loops where the latch is not the exiting block. - // This can be an indication of two different things: - // 1) The loop is not rotated. - // 2) The loop contains irreducible control flow that involves the latch. - const BasicBlock *Latch = L->getLoopLatch(); - if (Latch != L->getExitingBlock()) - return false; - - // Peeling is only supported if the latch is a branch. - if (!isa<BranchInst>(Latch->getTerminator())) - return false; - - return true; -} - -// This function calculates the number of iterations after which the given Phi -// becomes an invariant. The pre-calculated values are memorized in the map. The -// function (shortcut is I) is calculated according to the following definition: -// Given %x = phi <Inputs from above the loop>, ..., [%y, %back.edge]. -// If %y is a loop invariant, then I(%x) = 1. -// If %y is a Phi from the loop header, I(%x) = I(%y) + 1. -// Otherwise, I(%x) is infinite. -// TODO: Actually if %y is an expression that depends only on Phi %z and some -// loop invariants, we can estimate I(%x) = I(%z) + 1. The example -// looks like: -// %x = phi(0, %a), <-- becomes invariant starting from 3rd iteration. -// %y = phi(0, 5), -// %a = %y + 1. -static unsigned calculateIterationsToInvariance( - PHINode *Phi, Loop *L, BasicBlock *BackEdge, - SmallDenseMap<PHINode *, unsigned> &IterationsToInvariance) { - assert(Phi->getParent() == L->getHeader() && - "Non-loop Phi should not be checked for turning into invariant."); - assert(BackEdge == L->getLoopLatch() && "Wrong latch?"); - // If we already know the answer, take it from the map. - auto I = IterationsToInvariance.find(Phi); - if (I != IterationsToInvariance.end()) - return I->second; - - // Otherwise we need to analyze the input from the back edge. - Value *Input = Phi->getIncomingValueForBlock(BackEdge); - // Place infinity to map to avoid infinite recursion for cycled Phis. Such - // cycles can never stop on an invariant. - IterationsToInvariance[Phi] = InfiniteIterationsToInvariance; - unsigned ToInvariance = InfiniteIterationsToInvariance; - - if (L->isLoopInvariant(Input)) - ToInvariance = 1u; - else if (PHINode *IncPhi = dyn_cast<PHINode>(Input)) { - // Only consider Phis in header block. - if (IncPhi->getParent() != L->getHeader()) - return InfiniteIterationsToInvariance; - // If the input becomes an invariant after X iterations, then our Phi - // becomes an invariant after X + 1 iterations. - unsigned InputToInvariance = calculateIterationsToInvariance( - IncPhi, L, BackEdge, IterationsToInvariance); - if (InputToInvariance != InfiniteIterationsToInvariance) - ToInvariance = InputToInvariance + 1u; - } - - // If we found that this Phi lies in an invariant chain, update the map. - if (ToInvariance != InfiniteIterationsToInvariance) - IterationsToInvariance[Phi] = ToInvariance; - return ToInvariance; -} - -// Return the number of iterations to peel off that make conditions in the -// body true/false. For example, if we peel 2 iterations off the loop below, -// the condition i < 2 can be evaluated at compile time. -// for (i = 0; i < n; i++) -// if (i < 2) -// .. -// else -// .. -// } -static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount, - ScalarEvolution &SE) { - assert(L.isLoopSimplifyForm() && "Loop needs to be in loop simplify form"); - unsigned DesiredPeelCount = 0; - - for (auto *BB : L.blocks()) { - auto *BI = dyn_cast<BranchInst>(BB->getTerminator()); - if (!BI || BI->isUnconditional()) - continue; - - // Ignore loop exit condition. - if (L.getLoopLatch() == BB) - continue; - - Value *Condition = BI->getCondition(); - Value *LeftVal, *RightVal; - CmpInst::Predicate Pred; - if (!match(Condition, m_ICmp(Pred, m_Value(LeftVal), m_Value(RightVal)))) - continue; - - const SCEV *LeftSCEV = SE.getSCEV(LeftVal); - const SCEV *RightSCEV = SE.getSCEV(RightVal); - - // Do not consider predicates that are known to be true or false - // independently of the loop iteration. - if (SE.isKnownPredicate(Pred, LeftSCEV, RightSCEV) || - SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), LeftSCEV, - RightSCEV)) - continue; - - // Check if we have a condition with one AddRec and one non AddRec - // expression. Normalize LeftSCEV to be the AddRec. - if (!isa<SCEVAddRecExpr>(LeftSCEV)) { - if (isa<SCEVAddRecExpr>(RightSCEV)) { - std::swap(LeftSCEV, RightSCEV); - Pred = ICmpInst::getSwappedPredicate(Pred); - } else - continue; - } - - const SCEVAddRecExpr *LeftAR = cast<SCEVAddRecExpr>(LeftSCEV); - - // Avoid huge SCEV computations in the loop below, make sure we only - // consider AddRecs of the loop we are trying to peel. - if (!LeftAR->isAffine() || LeftAR->getLoop() != &L) - continue; - if (!(ICmpInst::isEquality(Pred) && LeftAR->hasNoSelfWrap()) && - !SE.getMonotonicPredicateType(LeftAR, Pred)) - continue; - - // Check if extending the current DesiredPeelCount lets us evaluate Pred - // or !Pred in the loop body statically. - unsigned NewPeelCount = DesiredPeelCount; - - const SCEV *IterVal = LeftAR->evaluateAtIteration( - SE.getConstant(LeftSCEV->getType(), NewPeelCount), SE); - - // If the original condition is not known, get the negated predicate - // (which holds on the else branch) and check if it is known. This allows - // us to peel of iterations that make the original condition false. - if (!SE.isKnownPredicate(Pred, IterVal, RightSCEV)) - Pred = ICmpInst::getInversePredicate(Pred); - - const SCEV *Step = LeftAR->getStepRecurrence(SE); - const SCEV *NextIterVal = SE.getAddExpr(IterVal, Step); - auto PeelOneMoreIteration = [&IterVal, &NextIterVal, &SE, Step, - &NewPeelCount]() { - IterVal = NextIterVal; - NextIterVal = SE.getAddExpr(IterVal, Step); - NewPeelCount++; - }; - - auto CanPeelOneMoreIteration = [&NewPeelCount, &MaxPeelCount]() { - return NewPeelCount < MaxPeelCount; - }; - - while (CanPeelOneMoreIteration() && - SE.isKnownPredicate(Pred, IterVal, RightSCEV)) - PeelOneMoreIteration(); - - // With *that* peel count, does the predicate !Pred become known in the - // first iteration of the loop body after peeling? - if (!SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), IterVal, - RightSCEV)) - continue; // If not, give up. - - // However, for equality comparisons, that isn't always sufficient to - // eliminate the comparsion in loop body, we may need to peel one more - // iteration. See if that makes !Pred become unknown again. - if (ICmpInst::isEquality(Pred) && - !SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), NextIterVal, - RightSCEV) && - !SE.isKnownPredicate(Pred, IterVal, RightSCEV) && - SE.isKnownPredicate(Pred, NextIterVal, RightSCEV)) { - if (!CanPeelOneMoreIteration()) - continue; // Need to peel one more iteration, but can't. Give up. - PeelOneMoreIteration(); // Great! - } - - DesiredPeelCount = std::max(DesiredPeelCount, NewPeelCount); - } - - return DesiredPeelCount; -} - -// Return the number of iterations we want to peel off. -void llvm::computePeelCount(Loop *L, unsigned LoopSize, - TargetTransformInfo::PeelingPreferences &PP, - unsigned &TripCount, ScalarEvolution &SE, - unsigned Threshold) { - assert(LoopSize > 0 && "Zero loop size is not allowed!"); - // Save the PP.PeelCount value set by the target in - // TTI.getPeelingPreferences or by the flag -unroll-peel-count. - unsigned TargetPeelCount = PP.PeelCount; - PP.PeelCount = 0; - if (!canPeel(L)) - return; - - // Only try to peel innermost loops by default. - // The constraint can be relaxed by the target in TTI.getUnrollingPreferences - // or by the flag -unroll-allow-loop-nests-peeling. - if (!PP.AllowLoopNestsPeeling && !L->isInnermost()) - return; - - // If the user provided a peel count, use that. - bool UserPeelCount = UnrollForcePeelCount.getNumOccurrences() > 0; - if (UserPeelCount) { - LLVM_DEBUG(dbgs() << "Force-peeling first " << UnrollForcePeelCount - << " iterations.\n"); - PP.PeelCount = UnrollForcePeelCount; - PP.PeelProfiledIterations = true; - return; - } - - // Skip peeling if it's disabled. - if (!PP.AllowPeeling) - return; - - unsigned AlreadyPeeled = 0; - if (auto Peeled = getOptionalIntLoopAttribute(L, PeeledCountMetaData)) - AlreadyPeeled = *Peeled; - // Stop if we already peeled off the maximum number of iterations. - if (AlreadyPeeled >= UnrollPeelMaxCount) - return; - - // Here we try to get rid of Phis which become invariants after 1, 2, ..., N - // iterations of the loop. For this we compute the number for iterations after - // which every Phi is guaranteed to become an invariant, and try to peel the - // maximum number of iterations among these values, thus turning all those - // Phis into invariants. - // First, check that we can peel at least one iteration. - if (2 * LoopSize <= Threshold && UnrollPeelMaxCount > 0) { - // Store the pre-calculated values here. - SmallDenseMap<PHINode *, unsigned> IterationsToInvariance; - // Now go through all Phis to calculate their the number of iterations they - // need to become invariants. - // Start the max computation with the UP.PeelCount value set by the target - // in TTI.getUnrollingPreferences or by the flag -unroll-peel-count. - unsigned DesiredPeelCount = TargetPeelCount; - BasicBlock *BackEdge = L->getLoopLatch(); - assert(BackEdge && "Loop is not in simplified form?"); - for (auto BI = L->getHeader()->begin(); isa<PHINode>(&*BI); ++BI) { - PHINode *Phi = cast<PHINode>(&*BI); - unsigned ToInvariance = calculateIterationsToInvariance( - Phi, L, BackEdge, IterationsToInvariance); - if (ToInvariance != InfiniteIterationsToInvariance) - DesiredPeelCount = std::max(DesiredPeelCount, ToInvariance); - } - - // Pay respect to limitations implied by loop size and the max peel count. - unsigned MaxPeelCount = UnrollPeelMaxCount; - MaxPeelCount = std::min(MaxPeelCount, Threshold / LoopSize - 1); - - DesiredPeelCount = std::max(DesiredPeelCount, - countToEliminateCompares(*L, MaxPeelCount, SE)); - - if (DesiredPeelCount > 0) { - DesiredPeelCount = std::min(DesiredPeelCount, MaxPeelCount); - // Consider max peel count limitation. - assert(DesiredPeelCount > 0 && "Wrong loop size estimation?"); - if (DesiredPeelCount + AlreadyPeeled <= UnrollPeelMaxCount) { - LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount - << " iteration(s) to turn" - << " some Phis into invariants.\n"); - PP.PeelCount = DesiredPeelCount; - PP.PeelProfiledIterations = false; - return; - } - } - } - - // Bail if we know the statically calculated trip count. - // In this case we rather prefer partial unrolling. - if (TripCount) - return; - - // Do not apply profile base peeling if it is disabled. - if (!PP.PeelProfiledIterations) - return; - // If we don't know the trip count, but have reason to believe the average - // trip count is low, peeling should be beneficial, since we will usually - // hit the peeled section. - // We only do this in the presence of profile information, since otherwise - // our estimates of the trip count are not reliable enough. - if (L->getHeader()->getParent()->hasProfileData()) { - Optional<unsigned> PeelCount = getLoopEstimatedTripCount(L); - if (!PeelCount) - return; - - LLVM_DEBUG(dbgs() << "Profile-based estimated trip count is " << *PeelCount - << "\n"); - - if (*PeelCount) { - if ((*PeelCount + AlreadyPeeled <= UnrollPeelMaxCount) && - (LoopSize * (*PeelCount + 1) <= Threshold)) { - LLVM_DEBUG(dbgs() << "Peeling first " << *PeelCount - << " iterations.\n"); - PP.PeelCount = *PeelCount; - return; - } - LLVM_DEBUG(dbgs() << "Requested peel count: " << *PeelCount << "\n"); - LLVM_DEBUG(dbgs() << "Already peel count: " << AlreadyPeeled << "\n"); - LLVM_DEBUG(dbgs() << "Max peel count: " << UnrollPeelMaxCount << "\n"); - LLVM_DEBUG(dbgs() << "Peel cost: " << LoopSize * (*PeelCount + 1) - << "\n"); - LLVM_DEBUG(dbgs() << "Max peel cost: " << Threshold << "\n"); - } - } -} - -/// Update the branch weights of the latch of a peeled-off loop -/// iteration. -/// This sets the branch weights for the latch of the recently peeled off loop -/// iteration correctly. -/// Let F is a weight of the edge from latch to header. -/// Let E is a weight of the edge from latch to exit. -/// F/(F+E) is a probability to go to loop and E/(F+E) is a probability to -/// go to exit. -/// Then, Estimated TripCount = F / E. -/// For I-th (counting from 0) peeled off iteration we set the the weights for -/// the peeled latch as (TC - I, 1). It gives us reasonable distribution, -/// The probability to go to exit 1/(TC-I) increases. At the same time -/// the estimated trip count of remaining loop reduces by I. -/// To avoid dealing with division rounding we can just multiple both part -/// of weights to E and use weight as (F - I * E, E). -/// -/// \param Header The copy of the header block that belongs to next iteration. -/// \param LatchBR The copy of the latch branch that belongs to this iteration. -/// \param[in,out] FallThroughWeight The weight of the edge from latch to -/// header before peeling (in) and after peeled off one iteration (out). -static void updateBranchWeights(BasicBlock *Header, BranchInst *LatchBR, - uint64_t ExitWeight, - uint64_t &FallThroughWeight) { - // FallThroughWeight is 0 means that there is no branch weights on original - // latch block or estimated trip count is zero. - if (!FallThroughWeight) - return; - - unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1); - MDBuilder MDB(LatchBR->getContext()); - MDNode *WeightNode = - HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThroughWeight) - : MDB.createBranchWeights(FallThroughWeight, ExitWeight); - LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode); - FallThroughWeight = - FallThroughWeight > ExitWeight ? FallThroughWeight - ExitWeight : 1; -} - -/// Initialize the weights. -/// -/// \param Header The header block. -/// \param LatchBR The latch branch. -/// \param[out] ExitWeight The weight of the edge from Latch to Exit. -/// \param[out] FallThroughWeight The weight of the edge from Latch to Header. -static void initBranchWeights(BasicBlock *Header, BranchInst *LatchBR, - uint64_t &ExitWeight, - uint64_t &FallThroughWeight) { - uint64_t TrueWeight, FalseWeight; - if (!LatchBR->extractProfMetadata(TrueWeight, FalseWeight)) - return; - unsigned HeaderIdx = LatchBR->getSuccessor(0) == Header ? 0 : 1; - ExitWeight = HeaderIdx ? TrueWeight : FalseWeight; - FallThroughWeight = HeaderIdx ? FalseWeight : TrueWeight; -} - -/// Update the weights of original Latch block after peeling off all iterations. -/// -/// \param Header The header block. -/// \param LatchBR The latch branch. -/// \param ExitWeight The weight of the edge from Latch to Exit. -/// \param FallThroughWeight The weight of the edge from Latch to Header. -static void fixupBranchWeights(BasicBlock *Header, BranchInst *LatchBR, - uint64_t ExitWeight, - uint64_t FallThroughWeight) { - // FallThroughWeight is 0 means that there is no branch weights on original - // latch block or estimated trip count is zero. - if (!FallThroughWeight) - return; - - // Sets the branch weights on the loop exit. - MDBuilder MDB(LatchBR->getContext()); - unsigned HeaderIdx = LatchBR->getSuccessor(0) == Header ? 0 : 1; - MDNode *WeightNode = - HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThroughWeight) - : MDB.createBranchWeights(FallThroughWeight, ExitWeight); - LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode); -} - -/// Clones the body of the loop L, putting it between \p InsertTop and \p -/// InsertBot. -/// \param IterNumber The serial number of the iteration currently being -/// peeled off. -/// \param ExitEdges The exit edges of the original loop. -/// \param[out] NewBlocks A list of the blocks in the newly created clone -/// \param[out] VMap The value map between the loop and the new clone. -/// \param LoopBlocks A helper for DFS-traversal of the loop. -/// \param LVMap A value-map that maps instructions from the original loop to -/// instructions in the last peeled-off iteration. -static void cloneLoopBlocks( - Loop *L, unsigned IterNumber, BasicBlock *InsertTop, BasicBlock *InsertBot, - SmallVectorImpl<std::pair<BasicBlock *, BasicBlock *>> &ExitEdges, - SmallVectorImpl<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks, - ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT, - LoopInfo *LI, ArrayRef<MDNode *> LoopLocalNoAliasDeclScopes) { - BasicBlock *Header = L->getHeader(); - BasicBlock *Latch = L->getLoopLatch(); - BasicBlock *PreHeader = L->getLoopPreheader(); - - Function *F = Header->getParent(); - LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO(); - LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO(); - Loop *ParentLoop = L->getParentLoop(); - - // For each block in the original loop, create a new copy, - // and update the value map with the newly created values. - for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { - BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".peel", F); - NewBlocks.push_back(NewBB); - - // If an original block is an immediate child of the loop L, its copy - // is a child of a ParentLoop after peeling. If a block is a child of - // a nested loop, it is handled in the cloneLoop() call below. - if (ParentLoop && LI->getLoopFor(*BB) == L) - ParentLoop->addBasicBlockToLoop(NewBB, *LI); - - VMap[*BB] = NewBB; - - // If dominator tree is available, insert nodes to represent cloned blocks. - if (DT) { - if (Header == *BB) - DT->addNewBlock(NewBB, InsertTop); - else { - DomTreeNode *IDom = DT->getNode(*BB)->getIDom(); - // VMap must contain entry for IDom, as the iteration order is RPO. - DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDom->getBlock()])); - } - } - } - - { - // Identify what other metadata depends on the cloned version. After - // cloning, replace the metadata with the corrected version for both - // memory instructions and noalias intrinsics. - std::string Ext = (Twine("Peel") + Twine(IterNumber)).str(); - cloneAndAdaptNoAliasScopes(LoopLocalNoAliasDeclScopes, NewBlocks, - Header->getContext(), Ext); - } - - // Recursively create the new Loop objects for nested loops, if any, - // to preserve LoopInfo. - for (Loop *ChildLoop : *L) { - cloneLoop(ChildLoop, ParentLoop, VMap, LI, nullptr); - } - - // Hook-up the control flow for the newly inserted blocks. - // The new header is hooked up directly to the "top", which is either - // the original loop preheader (for the first iteration) or the previous - // iteration's exiting block (for every other iteration) - InsertTop->getTerminator()->setSuccessor(0, cast<BasicBlock>(VMap[Header])); - - // Similarly, for the latch: - // The original exiting edge is still hooked up to the loop exit. - // The backedge now goes to the "bottom", which is either the loop's real - // header (for the last peeled iteration) or the copied header of the next - // iteration (for every other iteration) - BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]); - BranchInst *LatchBR = cast<BranchInst>(NewLatch->getTerminator()); - for (unsigned idx = 0, e = LatchBR->getNumSuccessors(); idx < e; ++idx) - if (LatchBR->getSuccessor(idx) == Header) { - LatchBR->setSuccessor(idx, InsertBot); - break; - } - if (DT) - DT->changeImmediateDominator(InsertBot, NewLatch); - - // The new copy of the loop body starts with a bunch of PHI nodes - // that pick an incoming value from either the preheader, or the previous - // loop iteration. Since this copy is no longer part of the loop, we - // resolve this statically: - // For the first iteration, we use the value from the preheader directly. - // For any other iteration, we replace the phi with the value generated by - // the immediately preceding clone of the loop body (which represents - // the previous iteration). - for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { - PHINode *NewPHI = cast<PHINode>(VMap[&*I]); - if (IterNumber == 0) { - VMap[&*I] = NewPHI->getIncomingValueForBlock(PreHeader); - } else { - Value *LatchVal = NewPHI->getIncomingValueForBlock(Latch); - Instruction *LatchInst = dyn_cast<Instruction>(LatchVal); - if (LatchInst && L->contains(LatchInst)) - VMap[&*I] = LVMap[LatchInst]; - else - VMap[&*I] = LatchVal; - } - cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI); - } - - // Fix up the outgoing values - we need to add a value for the iteration - // we've just created. Note that this must happen *after* the incoming - // values are adjusted, since the value going out of the latch may also be - // a value coming into the header. - for (auto Edge : ExitEdges) - for (PHINode &PHI : Edge.second->phis()) { - Value *LatchVal = PHI.getIncomingValueForBlock(Edge.first); - Instruction *LatchInst = dyn_cast<Instruction>(LatchVal); - if (LatchInst && L->contains(LatchInst)) - LatchVal = VMap[LatchVal]; - PHI.addIncoming(LatchVal, cast<BasicBlock>(VMap[Edge.first])); - } - - // LastValueMap is updated with the values for the current loop - // which are used the next time this function is called. - for (auto KV : VMap) - LVMap[KV.first] = KV.second; -} - -TargetTransformInfo::PeelingPreferences llvm::gatherPeelingPreferences( - Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, - Optional<bool> UserAllowPeeling, - Optional<bool> UserAllowProfileBasedPeeling, bool UnrollingSpecficValues) { - TargetTransformInfo::PeelingPreferences PP; - - // Set the default values. - PP.PeelCount = 0; - PP.AllowPeeling = true; - PP.AllowLoopNestsPeeling = false; - PP.PeelProfiledIterations = true; - - // Get the target specifc values. - TTI.getPeelingPreferences(L, SE, PP); - - // User specified values using cl::opt. - if (UnrollingSpecficValues) { - if (UnrollPeelCount.getNumOccurrences() > 0) - PP.PeelCount = UnrollPeelCount; - if (UnrollAllowPeeling.getNumOccurrences() > 0) - PP.AllowPeeling = UnrollAllowPeeling; - if (UnrollAllowLoopNestsPeeling.getNumOccurrences() > 0) - PP.AllowLoopNestsPeeling = UnrollAllowLoopNestsPeeling; - } - - // User specifed values provided by argument. - if (UserAllowPeeling.hasValue()) - PP.AllowPeeling = *UserAllowPeeling; - if (UserAllowProfileBasedPeeling.hasValue()) - PP.PeelProfiledIterations = *UserAllowProfileBasedPeeling; - - return PP; -} - -/// Peel off the first \p PeelCount iterations of loop \p L. -/// -/// Note that this does not peel them off as a single straight-line block. -/// Rather, each iteration is peeled off separately, and needs to check the -/// exit condition. -/// For loops that dynamically execute \p PeelCount iterations or less -/// this provides a benefit, since the peeled off iterations, which account -/// for the bulk of dynamic execution, can be further simplified by scalar -/// optimizations. -bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, - ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, - bool PreserveLCSSA) { - assert(PeelCount > 0 && "Attempt to peel out zero iterations?"); - assert(canPeel(L) && "Attempt to peel a loop which is not peelable?"); - - LoopBlocksDFS LoopBlocks(L); - LoopBlocks.perform(LI); - - BasicBlock *Header = L->getHeader(); - BasicBlock *PreHeader = L->getLoopPreheader(); - BasicBlock *Latch = L->getLoopLatch(); - SmallVector<std::pair<BasicBlock *, BasicBlock *>, 4> ExitEdges; - L->getExitEdges(ExitEdges); - - DenseMap<BasicBlock *, BasicBlock *> ExitIDom; - if (DT) { - // We'd like to determine the idom of exit block after peeling one - // iteration. - // Let Exit is exit block. - // Let ExitingSet - is a set of predecessors of Exit block. They are exiting - // blocks. - // Let Latch' and ExitingSet' are copies after a peeling. - // We'd like to find an idom'(Exit) - idom of Exit after peeling. - // It is an evident that idom'(Exit) will be the nearest common dominator - // of ExitingSet and ExitingSet'. - // idom(Exit) is a nearest common dominator of ExitingSet. - // idom(Exit)' is a nearest common dominator of ExitingSet'. - // Taking into account that we have a single Latch, Latch' will dominate - // Header and idom(Exit). - // So the idom'(Exit) is nearest common dominator of idom(Exit)' and Latch'. - // All these basic blocks are in the same loop, so what we find is - // (nearest common dominator of idom(Exit) and Latch)'. - // In the loop below we remember nearest common dominator of idom(Exit) and - // Latch to update idom of Exit later. - assert(L->hasDedicatedExits() && "No dedicated exits?"); - for (auto Edge : ExitEdges) { - if (ExitIDom.count(Edge.second)) - continue; - BasicBlock *BB = DT->findNearestCommonDominator( - DT->getNode(Edge.second)->getIDom()->getBlock(), Latch); - assert(L->contains(BB) && "IDom is not in a loop"); - ExitIDom[Edge.second] = BB; - } - } - - Function *F = Header->getParent(); - - // Set up all the necessary basic blocks. It is convenient to split the - // preheader into 3 parts - two blocks to anchor the peeled copy of the loop - // body, and a new preheader for the "real" loop. - - // Peeling the first iteration transforms. - // - // PreHeader: - // ... - // Header: - // LoopBody - // If (cond) goto Header - // Exit: - // - // into - // - // InsertTop: - // LoopBody - // If (!cond) goto Exit - // InsertBot: - // NewPreHeader: - // ... - // Header: - // LoopBody - // If (cond) goto Header - // Exit: - // - // Each following iteration will split the current bottom anchor in two, - // and put the new copy of the loop body between these two blocks. That is, - // after peeling another iteration from the example above, we'll split - // InsertBot, and get: - // - // InsertTop: - // LoopBody - // If (!cond) goto Exit - // InsertBot: - // LoopBody - // If (!cond) goto Exit - // InsertBot.next: - // NewPreHeader: - // ... - // Header: - // LoopBody - // If (cond) goto Header - // Exit: - - BasicBlock *InsertTop = SplitEdge(PreHeader, Header, DT, LI); - BasicBlock *InsertBot = - SplitBlock(InsertTop, InsertTop->getTerminator(), DT, LI); - BasicBlock *NewPreHeader = - SplitBlock(InsertBot, InsertBot->getTerminator(), DT, LI); - - InsertTop->setName(Header->getName() + ".peel.begin"); - InsertBot->setName(Header->getName() + ".peel.next"); - NewPreHeader->setName(PreHeader->getName() + ".peel.newph"); - - ValueToValueMapTy LVMap; - - // If we have branch weight information, we'll want to update it for the - // newly created branches. - BranchInst *LatchBR = - cast<BranchInst>(cast<BasicBlock>(Latch)->getTerminator()); - uint64_t ExitWeight = 0, FallThroughWeight = 0; - initBranchWeights(Header, LatchBR, ExitWeight, FallThroughWeight); - - // Identify what noalias metadata is inside the loop: if it is inside the - // loop, the associated metadata must be cloned for each iteration. - SmallVector<MDNode *, 6> LoopLocalNoAliasDeclScopes; - identifyNoAliasScopesToClone(L->getBlocks(), LoopLocalNoAliasDeclScopes); - - // For each peeled-off iteration, make a copy of the loop. - for (unsigned Iter = 0; Iter < PeelCount; ++Iter) { - SmallVector<BasicBlock *, 8> NewBlocks; - ValueToValueMapTy VMap; - - cloneLoopBlocks(L, Iter, InsertTop, InsertBot, ExitEdges, NewBlocks, - LoopBlocks, VMap, LVMap, DT, LI, - LoopLocalNoAliasDeclScopes); - - // Remap to use values from the current iteration instead of the - // previous one. - remapInstructionsInBlocks(NewBlocks, VMap); - - if (DT) { - // Latches of the cloned loops dominate over the loop exit, so idom of the - // latter is the first cloned loop body, as original PreHeader dominates - // the original loop body. - if (Iter == 0) - for (auto Exit : ExitIDom) - DT->changeImmediateDominator(Exit.first, - cast<BasicBlock>(LVMap[Exit.second])); -#ifdef EXPENSIVE_CHECKS - assert(DT->verify(DominatorTree::VerificationLevel::Fast)); -#endif - } - - auto *LatchBRCopy = cast<BranchInst>(VMap[LatchBR]); - updateBranchWeights(InsertBot, LatchBRCopy, ExitWeight, FallThroughWeight); - // Remove Loop metadata from the latch branch instruction - // because it is not the Loop's latch branch anymore. - LatchBRCopy->setMetadata(LLVMContext::MD_loop, nullptr); - - InsertTop = InsertBot; - InsertBot = SplitBlock(InsertBot, InsertBot->getTerminator(), DT, LI); - InsertBot->setName(Header->getName() + ".peel.next"); - - F->getBasicBlockList().splice(InsertTop->getIterator(), - F->getBasicBlockList(), - NewBlocks[0]->getIterator(), F->end()); - } - - // Now adjust the phi nodes in the loop header to get their initial values - // from the last peeled-off iteration instead of the preheader. - for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { - PHINode *PHI = cast<PHINode>(I); - Value *NewVal = PHI->getIncomingValueForBlock(Latch); - Instruction *LatchInst = dyn_cast<Instruction>(NewVal); - if (LatchInst && L->contains(LatchInst)) - NewVal = LVMap[LatchInst]; - - PHI->setIncomingValueForBlock(NewPreHeader, NewVal); - } - - fixupBranchWeights(Header, LatchBR, ExitWeight, FallThroughWeight); - - // Update Metadata for count of peeled off iterations. - unsigned AlreadyPeeled = 0; - if (auto Peeled = getOptionalIntLoopAttribute(L, PeeledCountMetaData)) - AlreadyPeeled = *Peeled; - addStringMetadataToLoop(L, PeeledCountMetaData, AlreadyPeeled + PeelCount); - - if (Loop *ParentLoop = L->getParentLoop()) - L = ParentLoop; - - // We modified the loop, update SE. - SE->forgetTopmostLoop(L); - - // Finally DomtTree must be correct. - assert(DT->verify(DominatorTree::VerificationLevel::Fast)); - - // FIXME: Incrementally update loop-simplify - simplifyLoop(L, DT, LI, SE, AC, nullptr, PreserveLCSSA); - - NumPeeled++; - - return true; -} +//===- LoopPeel.cpp -------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Loop Peeling Utilities. +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/LoopPeel.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopIterator.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/LoopSimplify.h" +#include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Transforms/Utils/UnrollLoop.h" +#include "llvm/Transforms/Utils/ValueMapper.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <limits> + +using namespace llvm; +using namespace llvm::PatternMatch; + +#define DEBUG_TYPE "loop-peel" + +STATISTIC(NumPeeled, "Number of loops peeled"); + +static cl::opt<unsigned> UnrollPeelCount( + "unroll-peel-count", cl::Hidden, + cl::desc("Set the unroll peeling count, for testing purposes")); + +static cl::opt<bool> + UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden, + cl::desc("Allows loops to be peeled when the dynamic " + "trip count is known to be low.")); + +static cl::opt<bool> + UnrollAllowLoopNestsPeeling("unroll-allow-loop-nests-peeling", + cl::init(false), cl::Hidden, + cl::desc("Allows loop nests to be peeled.")); + +static cl::opt<unsigned> UnrollPeelMaxCount( + "unroll-peel-max-count", cl::init(7), cl::Hidden, + cl::desc("Max average trip count which will cause loop peeling.")); + +static cl::opt<unsigned> UnrollForcePeelCount( + "unroll-force-peel-count", cl::init(0), cl::Hidden, + cl::desc("Force a peel count regardless of profiling information.")); + +static cl::opt<bool> UnrollPeelMultiDeoptExit( + "unroll-peel-multi-deopt-exit", cl::init(true), cl::Hidden, + cl::desc("Allow peeling of loops with multiple deopt exits.")); + +static const char *PeeledCountMetaData = "llvm.loop.peeled.count"; + +// Designates that a Phi is estimated to become invariant after an "infinite" +// number of loop iterations (i.e. only may become an invariant if the loop is +// fully unrolled). +static const unsigned InfiniteIterationsToInvariance = + std::numeric_limits<unsigned>::max(); + +// Check whether we are capable of peeling this loop. +bool llvm::canPeel(Loop *L) { + // Make sure the loop is in simplified form + if (!L->isLoopSimplifyForm()) + return false; + + if (UnrollPeelMultiDeoptExit) { + SmallVector<BasicBlock *, 4> Exits; + L->getUniqueNonLatchExitBlocks(Exits); + + if (!Exits.empty()) { + // Latch's terminator is a conditional branch, Latch is exiting and + // all non Latch exits ends up with deoptimize. + const BasicBlock *Latch = L->getLoopLatch(); + const BranchInst *T = dyn_cast<BranchInst>(Latch->getTerminator()); + return T && T->isConditional() && L->isLoopExiting(Latch) && + all_of(Exits, [](const BasicBlock *BB) { + return BB->getTerminatingDeoptimizeCall(); + }); + } + } + + // Only peel loops that contain a single exit + if (!L->getExitingBlock() || !L->getUniqueExitBlock()) + return false; + + // Don't try to peel loops where the latch is not the exiting block. + // This can be an indication of two different things: + // 1) The loop is not rotated. + // 2) The loop contains irreducible control flow that involves the latch. + const BasicBlock *Latch = L->getLoopLatch(); + if (Latch != L->getExitingBlock()) + return false; + + // Peeling is only supported if the latch is a branch. + if (!isa<BranchInst>(Latch->getTerminator())) + return false; + + return true; +} + +// This function calculates the number of iterations after which the given Phi +// becomes an invariant. The pre-calculated values are memorized in the map. The +// function (shortcut is I) is calculated according to the following definition: +// Given %x = phi <Inputs from above the loop>, ..., [%y, %back.edge]. +// If %y is a loop invariant, then I(%x) = 1. +// If %y is a Phi from the loop header, I(%x) = I(%y) + 1. +// Otherwise, I(%x) is infinite. +// TODO: Actually if %y is an expression that depends only on Phi %z and some +// loop invariants, we can estimate I(%x) = I(%z) + 1. The example +// looks like: +// %x = phi(0, %a), <-- becomes invariant starting from 3rd iteration. +// %y = phi(0, 5), +// %a = %y + 1. +static unsigned calculateIterationsToInvariance( + PHINode *Phi, Loop *L, BasicBlock *BackEdge, + SmallDenseMap<PHINode *, unsigned> &IterationsToInvariance) { + assert(Phi->getParent() == L->getHeader() && + "Non-loop Phi should not be checked for turning into invariant."); + assert(BackEdge == L->getLoopLatch() && "Wrong latch?"); + // If we already know the answer, take it from the map. + auto I = IterationsToInvariance.find(Phi); + if (I != IterationsToInvariance.end()) + return I->second; + + // Otherwise we need to analyze the input from the back edge. + Value *Input = Phi->getIncomingValueForBlock(BackEdge); + // Place infinity to map to avoid infinite recursion for cycled Phis. Such + // cycles can never stop on an invariant. + IterationsToInvariance[Phi] = InfiniteIterationsToInvariance; + unsigned ToInvariance = InfiniteIterationsToInvariance; + + if (L->isLoopInvariant(Input)) + ToInvariance = 1u; + else if (PHINode *IncPhi = dyn_cast<PHINode>(Input)) { + // Only consider Phis in header block. + if (IncPhi->getParent() != L->getHeader()) + return InfiniteIterationsToInvariance; + // If the input becomes an invariant after X iterations, then our Phi + // becomes an invariant after X + 1 iterations. + unsigned InputToInvariance = calculateIterationsToInvariance( + IncPhi, L, BackEdge, IterationsToInvariance); + if (InputToInvariance != InfiniteIterationsToInvariance) + ToInvariance = InputToInvariance + 1u; + } + + // If we found that this Phi lies in an invariant chain, update the map. + if (ToInvariance != InfiniteIterationsToInvariance) + IterationsToInvariance[Phi] = ToInvariance; + return ToInvariance; +} + +// Return the number of iterations to peel off that make conditions in the +// body true/false. For example, if we peel 2 iterations off the loop below, +// the condition i < 2 can be evaluated at compile time. +// for (i = 0; i < n; i++) +// if (i < 2) +// .. +// else +// .. +// } +static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount, + ScalarEvolution &SE) { + assert(L.isLoopSimplifyForm() && "Loop needs to be in loop simplify form"); + unsigned DesiredPeelCount = 0; + + for (auto *BB : L.blocks()) { + auto *BI = dyn_cast<BranchInst>(BB->getTerminator()); + if (!BI || BI->isUnconditional()) + continue; + + // Ignore loop exit condition. + if (L.getLoopLatch() == BB) + continue; + + Value *Condition = BI->getCondition(); + Value *LeftVal, *RightVal; + CmpInst::Predicate Pred; + if (!match(Condition, m_ICmp(Pred, m_Value(LeftVal), m_Value(RightVal)))) + continue; + + const SCEV *LeftSCEV = SE.getSCEV(LeftVal); + const SCEV *RightSCEV = SE.getSCEV(RightVal); + + // Do not consider predicates that are known to be true or false + // independently of the loop iteration. + if (SE.isKnownPredicate(Pred, LeftSCEV, RightSCEV) || + SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), LeftSCEV, + RightSCEV)) + continue; + + // Check if we have a condition with one AddRec and one non AddRec + // expression. Normalize LeftSCEV to be the AddRec. + if (!isa<SCEVAddRecExpr>(LeftSCEV)) { + if (isa<SCEVAddRecExpr>(RightSCEV)) { + std::swap(LeftSCEV, RightSCEV); + Pred = ICmpInst::getSwappedPredicate(Pred); + } else + continue; + } + + const SCEVAddRecExpr *LeftAR = cast<SCEVAddRecExpr>(LeftSCEV); + + // Avoid huge SCEV computations in the loop below, make sure we only + // consider AddRecs of the loop we are trying to peel. + if (!LeftAR->isAffine() || LeftAR->getLoop() != &L) + continue; + if (!(ICmpInst::isEquality(Pred) && LeftAR->hasNoSelfWrap()) && + !SE.getMonotonicPredicateType(LeftAR, Pred)) + continue; + + // Check if extending the current DesiredPeelCount lets us evaluate Pred + // or !Pred in the loop body statically. + unsigned NewPeelCount = DesiredPeelCount; + + const SCEV *IterVal = LeftAR->evaluateAtIteration( + SE.getConstant(LeftSCEV->getType(), NewPeelCount), SE); + + // If the original condition is not known, get the negated predicate + // (which holds on the else branch) and check if it is known. This allows + // us to peel of iterations that make the original condition false. + if (!SE.isKnownPredicate(Pred, IterVal, RightSCEV)) + Pred = ICmpInst::getInversePredicate(Pred); + + const SCEV *Step = LeftAR->getStepRecurrence(SE); + const SCEV *NextIterVal = SE.getAddExpr(IterVal, Step); + auto PeelOneMoreIteration = [&IterVal, &NextIterVal, &SE, Step, + &NewPeelCount]() { + IterVal = NextIterVal; + NextIterVal = SE.getAddExpr(IterVal, Step); + NewPeelCount++; + }; + + auto CanPeelOneMoreIteration = [&NewPeelCount, &MaxPeelCount]() { + return NewPeelCount < MaxPeelCount; + }; + + while (CanPeelOneMoreIteration() && + SE.isKnownPredicate(Pred, IterVal, RightSCEV)) + PeelOneMoreIteration(); + + // With *that* peel count, does the predicate !Pred become known in the + // first iteration of the loop body after peeling? + if (!SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), IterVal, + RightSCEV)) + continue; // If not, give up. + + // However, for equality comparisons, that isn't always sufficient to + // eliminate the comparsion in loop body, we may need to peel one more + // iteration. See if that makes !Pred become unknown again. + if (ICmpInst::isEquality(Pred) && + !SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), NextIterVal, + RightSCEV) && + !SE.isKnownPredicate(Pred, IterVal, RightSCEV) && + SE.isKnownPredicate(Pred, NextIterVal, RightSCEV)) { + if (!CanPeelOneMoreIteration()) + continue; // Need to peel one more iteration, but can't. Give up. + PeelOneMoreIteration(); // Great! + } + + DesiredPeelCount = std::max(DesiredPeelCount, NewPeelCount); + } + + return DesiredPeelCount; +} + +// Return the number of iterations we want to peel off. +void llvm::computePeelCount(Loop *L, unsigned LoopSize, + TargetTransformInfo::PeelingPreferences &PP, + unsigned &TripCount, ScalarEvolution &SE, + unsigned Threshold) { + assert(LoopSize > 0 && "Zero loop size is not allowed!"); + // Save the PP.PeelCount value set by the target in + // TTI.getPeelingPreferences or by the flag -unroll-peel-count. + unsigned TargetPeelCount = PP.PeelCount; + PP.PeelCount = 0; + if (!canPeel(L)) + return; + + // Only try to peel innermost loops by default. + // The constraint can be relaxed by the target in TTI.getUnrollingPreferences + // or by the flag -unroll-allow-loop-nests-peeling. + if (!PP.AllowLoopNestsPeeling && !L->isInnermost()) + return; + + // If the user provided a peel count, use that. + bool UserPeelCount = UnrollForcePeelCount.getNumOccurrences() > 0; + if (UserPeelCount) { + LLVM_DEBUG(dbgs() << "Force-peeling first " << UnrollForcePeelCount + << " iterations.\n"); + PP.PeelCount = UnrollForcePeelCount; + PP.PeelProfiledIterations = true; + return; + } + + // Skip peeling if it's disabled. + if (!PP.AllowPeeling) + return; + + unsigned AlreadyPeeled = 0; + if (auto Peeled = getOptionalIntLoopAttribute(L, PeeledCountMetaData)) + AlreadyPeeled = *Peeled; + // Stop if we already peeled off the maximum number of iterations. + if (AlreadyPeeled >= UnrollPeelMaxCount) + return; + + // Here we try to get rid of Phis which become invariants after 1, 2, ..., N + // iterations of the loop. For this we compute the number for iterations after + // which every Phi is guaranteed to become an invariant, and try to peel the + // maximum number of iterations among these values, thus turning all those + // Phis into invariants. + // First, check that we can peel at least one iteration. + if (2 * LoopSize <= Threshold && UnrollPeelMaxCount > 0) { + // Store the pre-calculated values here. + SmallDenseMap<PHINode *, unsigned> IterationsToInvariance; + // Now go through all Phis to calculate their the number of iterations they + // need to become invariants. + // Start the max computation with the UP.PeelCount value set by the target + // in TTI.getUnrollingPreferences or by the flag -unroll-peel-count. + unsigned DesiredPeelCount = TargetPeelCount; + BasicBlock *BackEdge = L->getLoopLatch(); + assert(BackEdge && "Loop is not in simplified form?"); + for (auto BI = L->getHeader()->begin(); isa<PHINode>(&*BI); ++BI) { + PHINode *Phi = cast<PHINode>(&*BI); + unsigned ToInvariance = calculateIterationsToInvariance( + Phi, L, BackEdge, IterationsToInvariance); + if (ToInvariance != InfiniteIterationsToInvariance) + DesiredPeelCount = std::max(DesiredPeelCount, ToInvariance); + } + + // Pay respect to limitations implied by loop size and the max peel count. + unsigned MaxPeelCount = UnrollPeelMaxCount; + MaxPeelCount = std::min(MaxPeelCount, Threshold / LoopSize - 1); + + DesiredPeelCount = std::max(DesiredPeelCount, + countToEliminateCompares(*L, MaxPeelCount, SE)); + + if (DesiredPeelCount > 0) { + DesiredPeelCount = std::min(DesiredPeelCount, MaxPeelCount); + // Consider max peel count limitation. + assert(DesiredPeelCount > 0 && "Wrong loop size estimation?"); + if (DesiredPeelCount + AlreadyPeeled <= UnrollPeelMaxCount) { + LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount + << " iteration(s) to turn" + << " some Phis into invariants.\n"); + PP.PeelCount = DesiredPeelCount; + PP.PeelProfiledIterations = false; + return; + } + } + } + + // Bail if we know the statically calculated trip count. + // In this case we rather prefer partial unrolling. + if (TripCount) + return; + + // Do not apply profile base peeling if it is disabled. + if (!PP.PeelProfiledIterations) + return; + // If we don't know the trip count, but have reason to believe the average + // trip count is low, peeling should be beneficial, since we will usually + // hit the peeled section. + // We only do this in the presence of profile information, since otherwise + // our estimates of the trip count are not reliable enough. + if (L->getHeader()->getParent()->hasProfileData()) { + Optional<unsigned> PeelCount = getLoopEstimatedTripCount(L); + if (!PeelCount) + return; + + LLVM_DEBUG(dbgs() << "Profile-based estimated trip count is " << *PeelCount + << "\n"); + + if (*PeelCount) { + if ((*PeelCount + AlreadyPeeled <= UnrollPeelMaxCount) && + (LoopSize * (*PeelCount + 1) <= Threshold)) { + LLVM_DEBUG(dbgs() << "Peeling first " << *PeelCount + << " iterations.\n"); + PP.PeelCount = *PeelCount; + return; + } + LLVM_DEBUG(dbgs() << "Requested peel count: " << *PeelCount << "\n"); + LLVM_DEBUG(dbgs() << "Already peel count: " << AlreadyPeeled << "\n"); + LLVM_DEBUG(dbgs() << "Max peel count: " << UnrollPeelMaxCount << "\n"); + LLVM_DEBUG(dbgs() << "Peel cost: " << LoopSize * (*PeelCount + 1) + << "\n"); + LLVM_DEBUG(dbgs() << "Max peel cost: " << Threshold << "\n"); + } + } +} + +/// Update the branch weights of the latch of a peeled-off loop +/// iteration. +/// This sets the branch weights for the latch of the recently peeled off loop +/// iteration correctly. +/// Let F is a weight of the edge from latch to header. +/// Let E is a weight of the edge from latch to exit. +/// F/(F+E) is a probability to go to loop and E/(F+E) is a probability to +/// go to exit. +/// Then, Estimated TripCount = F / E. +/// For I-th (counting from 0) peeled off iteration we set the the weights for +/// the peeled latch as (TC - I, 1). It gives us reasonable distribution, +/// The probability to go to exit 1/(TC-I) increases. At the same time +/// the estimated trip count of remaining loop reduces by I. +/// To avoid dealing with division rounding we can just multiple both part +/// of weights to E and use weight as (F - I * E, E). +/// +/// \param Header The copy of the header block that belongs to next iteration. +/// \param LatchBR The copy of the latch branch that belongs to this iteration. +/// \param[in,out] FallThroughWeight The weight of the edge from latch to +/// header before peeling (in) and after peeled off one iteration (out). +static void updateBranchWeights(BasicBlock *Header, BranchInst *LatchBR, + uint64_t ExitWeight, + uint64_t &FallThroughWeight) { + // FallThroughWeight is 0 means that there is no branch weights on original + // latch block or estimated trip count is zero. + if (!FallThroughWeight) + return; + + unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1); + MDBuilder MDB(LatchBR->getContext()); + MDNode *WeightNode = + HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThroughWeight) + : MDB.createBranchWeights(FallThroughWeight, ExitWeight); + LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode); + FallThroughWeight = + FallThroughWeight > ExitWeight ? FallThroughWeight - ExitWeight : 1; +} + +/// Initialize the weights. +/// +/// \param Header The header block. +/// \param LatchBR The latch branch. +/// \param[out] ExitWeight The weight of the edge from Latch to Exit. +/// \param[out] FallThroughWeight The weight of the edge from Latch to Header. +static void initBranchWeights(BasicBlock *Header, BranchInst *LatchBR, + uint64_t &ExitWeight, + uint64_t &FallThroughWeight) { + uint64_t TrueWeight, FalseWeight; + if (!LatchBR->extractProfMetadata(TrueWeight, FalseWeight)) + return; + unsigned HeaderIdx = LatchBR->getSuccessor(0) == Header ? 0 : 1; + ExitWeight = HeaderIdx ? TrueWeight : FalseWeight; + FallThroughWeight = HeaderIdx ? FalseWeight : TrueWeight; +} + +/// Update the weights of original Latch block after peeling off all iterations. +/// +/// \param Header The header block. +/// \param LatchBR The latch branch. +/// \param ExitWeight The weight of the edge from Latch to Exit. +/// \param FallThroughWeight The weight of the edge from Latch to Header. +static void fixupBranchWeights(BasicBlock *Header, BranchInst *LatchBR, + uint64_t ExitWeight, + uint64_t FallThroughWeight) { + // FallThroughWeight is 0 means that there is no branch weights on original + // latch block or estimated trip count is zero. + if (!FallThroughWeight) + return; + + // Sets the branch weights on the loop exit. + MDBuilder MDB(LatchBR->getContext()); + unsigned HeaderIdx = LatchBR->getSuccessor(0) == Header ? 0 : 1; + MDNode *WeightNode = + HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThroughWeight) + : MDB.createBranchWeights(FallThroughWeight, ExitWeight); + LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode); +} + +/// Clones the body of the loop L, putting it between \p InsertTop and \p +/// InsertBot. +/// \param IterNumber The serial number of the iteration currently being +/// peeled off. +/// \param ExitEdges The exit edges of the original loop. +/// \param[out] NewBlocks A list of the blocks in the newly created clone +/// \param[out] VMap The value map between the loop and the new clone. +/// \param LoopBlocks A helper for DFS-traversal of the loop. +/// \param LVMap A value-map that maps instructions from the original loop to +/// instructions in the last peeled-off iteration. +static void cloneLoopBlocks( + Loop *L, unsigned IterNumber, BasicBlock *InsertTop, BasicBlock *InsertBot, + SmallVectorImpl<std::pair<BasicBlock *, BasicBlock *>> &ExitEdges, + SmallVectorImpl<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks, + ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT, + LoopInfo *LI, ArrayRef<MDNode *> LoopLocalNoAliasDeclScopes) { + BasicBlock *Header = L->getHeader(); + BasicBlock *Latch = L->getLoopLatch(); + BasicBlock *PreHeader = L->getLoopPreheader(); + + Function *F = Header->getParent(); + LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO(); + LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO(); + Loop *ParentLoop = L->getParentLoop(); + + // For each block in the original loop, create a new copy, + // and update the value map with the newly created values. + for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { + BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".peel", F); + NewBlocks.push_back(NewBB); + + // If an original block is an immediate child of the loop L, its copy + // is a child of a ParentLoop after peeling. If a block is a child of + // a nested loop, it is handled in the cloneLoop() call below. + if (ParentLoop && LI->getLoopFor(*BB) == L) + ParentLoop->addBasicBlockToLoop(NewBB, *LI); + + VMap[*BB] = NewBB; + + // If dominator tree is available, insert nodes to represent cloned blocks. + if (DT) { + if (Header == *BB) + DT->addNewBlock(NewBB, InsertTop); + else { + DomTreeNode *IDom = DT->getNode(*BB)->getIDom(); + // VMap must contain entry for IDom, as the iteration order is RPO. + DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDom->getBlock()])); + } + } + } + + { + // Identify what other metadata depends on the cloned version. After + // cloning, replace the metadata with the corrected version for both + // memory instructions and noalias intrinsics. + std::string Ext = (Twine("Peel") + Twine(IterNumber)).str(); + cloneAndAdaptNoAliasScopes(LoopLocalNoAliasDeclScopes, NewBlocks, + Header->getContext(), Ext); + } + + // Recursively create the new Loop objects for nested loops, if any, + // to preserve LoopInfo. + for (Loop *ChildLoop : *L) { + cloneLoop(ChildLoop, ParentLoop, VMap, LI, nullptr); + } + + // Hook-up the control flow for the newly inserted blocks. + // The new header is hooked up directly to the "top", which is either + // the original loop preheader (for the first iteration) or the previous + // iteration's exiting block (for every other iteration) + InsertTop->getTerminator()->setSuccessor(0, cast<BasicBlock>(VMap[Header])); + + // Similarly, for the latch: + // The original exiting edge is still hooked up to the loop exit. + // The backedge now goes to the "bottom", which is either the loop's real + // header (for the last peeled iteration) or the copied header of the next + // iteration (for every other iteration) + BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]); + BranchInst *LatchBR = cast<BranchInst>(NewLatch->getTerminator()); + for (unsigned idx = 0, e = LatchBR->getNumSuccessors(); idx < e; ++idx) + if (LatchBR->getSuccessor(idx) == Header) { + LatchBR->setSuccessor(idx, InsertBot); + break; + } + if (DT) + DT->changeImmediateDominator(InsertBot, NewLatch); + + // The new copy of the loop body starts with a bunch of PHI nodes + // that pick an incoming value from either the preheader, or the previous + // loop iteration. Since this copy is no longer part of the loop, we + // resolve this statically: + // For the first iteration, we use the value from the preheader directly. + // For any other iteration, we replace the phi with the value generated by + // the immediately preceding clone of the loop body (which represents + // the previous iteration). + for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { + PHINode *NewPHI = cast<PHINode>(VMap[&*I]); + if (IterNumber == 0) { + VMap[&*I] = NewPHI->getIncomingValueForBlock(PreHeader); + } else { + Value *LatchVal = NewPHI->getIncomingValueForBlock(Latch); + Instruction *LatchInst = dyn_cast<Instruction>(LatchVal); + if (LatchInst && L->contains(LatchInst)) + VMap[&*I] = LVMap[LatchInst]; + else + VMap[&*I] = LatchVal; + } + cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI); + } + + // Fix up the outgoing values - we need to add a value for the iteration + // we've just created. Note that this must happen *after* the incoming + // values are adjusted, since the value going out of the latch may also be + // a value coming into the header. + for (auto Edge : ExitEdges) + for (PHINode &PHI : Edge.second->phis()) { + Value *LatchVal = PHI.getIncomingValueForBlock(Edge.first); + Instruction *LatchInst = dyn_cast<Instruction>(LatchVal); + if (LatchInst && L->contains(LatchInst)) + LatchVal = VMap[LatchVal]; + PHI.addIncoming(LatchVal, cast<BasicBlock>(VMap[Edge.first])); + } + + // LastValueMap is updated with the values for the current loop + // which are used the next time this function is called. + for (auto KV : VMap) + LVMap[KV.first] = KV.second; +} + +TargetTransformInfo::PeelingPreferences llvm::gatherPeelingPreferences( + Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, + Optional<bool> UserAllowPeeling, + Optional<bool> UserAllowProfileBasedPeeling, bool UnrollingSpecficValues) { + TargetTransformInfo::PeelingPreferences PP; + + // Set the default values. + PP.PeelCount = 0; + PP.AllowPeeling = true; + PP.AllowLoopNestsPeeling = false; + PP.PeelProfiledIterations = true; + + // Get the target specifc values. + TTI.getPeelingPreferences(L, SE, PP); + + // User specified values using cl::opt. + if (UnrollingSpecficValues) { + if (UnrollPeelCount.getNumOccurrences() > 0) + PP.PeelCount = UnrollPeelCount; + if (UnrollAllowPeeling.getNumOccurrences() > 0) + PP.AllowPeeling = UnrollAllowPeeling; + if (UnrollAllowLoopNestsPeeling.getNumOccurrences() > 0) + PP.AllowLoopNestsPeeling = UnrollAllowLoopNestsPeeling; + } + + // User specifed values provided by argument. + if (UserAllowPeeling.hasValue()) + PP.AllowPeeling = *UserAllowPeeling; + if (UserAllowProfileBasedPeeling.hasValue()) + PP.PeelProfiledIterations = *UserAllowProfileBasedPeeling; + + return PP; +} + +/// Peel off the first \p PeelCount iterations of loop \p L. +/// +/// Note that this does not peel them off as a single straight-line block. +/// Rather, each iteration is peeled off separately, and needs to check the +/// exit condition. +/// For loops that dynamically execute \p PeelCount iterations or less +/// this provides a benefit, since the peeled off iterations, which account +/// for the bulk of dynamic execution, can be further simplified by scalar +/// optimizations. +bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, + ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, + bool PreserveLCSSA) { + assert(PeelCount > 0 && "Attempt to peel out zero iterations?"); + assert(canPeel(L) && "Attempt to peel a loop which is not peelable?"); + + LoopBlocksDFS LoopBlocks(L); + LoopBlocks.perform(LI); + + BasicBlock *Header = L->getHeader(); + BasicBlock *PreHeader = L->getLoopPreheader(); + BasicBlock *Latch = L->getLoopLatch(); + SmallVector<std::pair<BasicBlock *, BasicBlock *>, 4> ExitEdges; + L->getExitEdges(ExitEdges); + + DenseMap<BasicBlock *, BasicBlock *> ExitIDom; + if (DT) { + // We'd like to determine the idom of exit block after peeling one + // iteration. + // Let Exit is exit block. + // Let ExitingSet - is a set of predecessors of Exit block. They are exiting + // blocks. + // Let Latch' and ExitingSet' are copies after a peeling. + // We'd like to find an idom'(Exit) - idom of Exit after peeling. + // It is an evident that idom'(Exit) will be the nearest common dominator + // of ExitingSet and ExitingSet'. + // idom(Exit) is a nearest common dominator of ExitingSet. + // idom(Exit)' is a nearest common dominator of ExitingSet'. + // Taking into account that we have a single Latch, Latch' will dominate + // Header and idom(Exit). + // So the idom'(Exit) is nearest common dominator of idom(Exit)' and Latch'. + // All these basic blocks are in the same loop, so what we find is + // (nearest common dominator of idom(Exit) and Latch)'. + // In the loop below we remember nearest common dominator of idom(Exit) and + // Latch to update idom of Exit later. + assert(L->hasDedicatedExits() && "No dedicated exits?"); + for (auto Edge : ExitEdges) { + if (ExitIDom.count(Edge.second)) + continue; + BasicBlock *BB = DT->findNearestCommonDominator( + DT->getNode(Edge.second)->getIDom()->getBlock(), Latch); + assert(L->contains(BB) && "IDom is not in a loop"); + ExitIDom[Edge.second] = BB; + } + } + + Function *F = Header->getParent(); + + // Set up all the necessary basic blocks. It is convenient to split the + // preheader into 3 parts - two blocks to anchor the peeled copy of the loop + // body, and a new preheader for the "real" loop. + + // Peeling the first iteration transforms. + // + // PreHeader: + // ... + // Header: + // LoopBody + // If (cond) goto Header + // Exit: + // + // into + // + // InsertTop: + // LoopBody + // If (!cond) goto Exit + // InsertBot: + // NewPreHeader: + // ... + // Header: + // LoopBody + // If (cond) goto Header + // Exit: + // + // Each following iteration will split the current bottom anchor in two, + // and put the new copy of the loop body between these two blocks. That is, + // after peeling another iteration from the example above, we'll split + // InsertBot, and get: + // + // InsertTop: + // LoopBody + // If (!cond) goto Exit + // InsertBot: + // LoopBody + // If (!cond) goto Exit + // InsertBot.next: + // NewPreHeader: + // ... + // Header: + // LoopBody + // If (cond) goto Header + // Exit: + + BasicBlock *InsertTop = SplitEdge(PreHeader, Header, DT, LI); + BasicBlock *InsertBot = + SplitBlock(InsertTop, InsertTop->getTerminator(), DT, LI); + BasicBlock *NewPreHeader = + SplitBlock(InsertBot, InsertBot->getTerminator(), DT, LI); + + InsertTop->setName(Header->getName() + ".peel.begin"); + InsertBot->setName(Header->getName() + ".peel.next"); + NewPreHeader->setName(PreHeader->getName() + ".peel.newph"); + + ValueToValueMapTy LVMap; + + // If we have branch weight information, we'll want to update it for the + // newly created branches. + BranchInst *LatchBR = + cast<BranchInst>(cast<BasicBlock>(Latch)->getTerminator()); + uint64_t ExitWeight = 0, FallThroughWeight = 0; + initBranchWeights(Header, LatchBR, ExitWeight, FallThroughWeight); + + // Identify what noalias metadata is inside the loop: if it is inside the + // loop, the associated metadata must be cloned for each iteration. + SmallVector<MDNode *, 6> LoopLocalNoAliasDeclScopes; + identifyNoAliasScopesToClone(L->getBlocks(), LoopLocalNoAliasDeclScopes); + + // For each peeled-off iteration, make a copy of the loop. + for (unsigned Iter = 0; Iter < PeelCount; ++Iter) { + SmallVector<BasicBlock *, 8> NewBlocks; + ValueToValueMapTy VMap; + + cloneLoopBlocks(L, Iter, InsertTop, InsertBot, ExitEdges, NewBlocks, + LoopBlocks, VMap, LVMap, DT, LI, + LoopLocalNoAliasDeclScopes); + + // Remap to use values from the current iteration instead of the + // previous one. + remapInstructionsInBlocks(NewBlocks, VMap); + + if (DT) { + // Latches of the cloned loops dominate over the loop exit, so idom of the + // latter is the first cloned loop body, as original PreHeader dominates + // the original loop body. + if (Iter == 0) + for (auto Exit : ExitIDom) + DT->changeImmediateDominator(Exit.first, + cast<BasicBlock>(LVMap[Exit.second])); +#ifdef EXPENSIVE_CHECKS + assert(DT->verify(DominatorTree::VerificationLevel::Fast)); +#endif + } + + auto *LatchBRCopy = cast<BranchInst>(VMap[LatchBR]); + updateBranchWeights(InsertBot, LatchBRCopy, ExitWeight, FallThroughWeight); + // Remove Loop metadata from the latch branch instruction + // because it is not the Loop's latch branch anymore. + LatchBRCopy->setMetadata(LLVMContext::MD_loop, nullptr); + + InsertTop = InsertBot; + InsertBot = SplitBlock(InsertBot, InsertBot->getTerminator(), DT, LI); + InsertBot->setName(Header->getName() + ".peel.next"); + + F->getBasicBlockList().splice(InsertTop->getIterator(), + F->getBasicBlockList(), + NewBlocks[0]->getIterator(), F->end()); + } + + // Now adjust the phi nodes in the loop header to get their initial values + // from the last peeled-off iteration instead of the preheader. + for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { + PHINode *PHI = cast<PHINode>(I); + Value *NewVal = PHI->getIncomingValueForBlock(Latch); + Instruction *LatchInst = dyn_cast<Instruction>(NewVal); + if (LatchInst && L->contains(LatchInst)) + NewVal = LVMap[LatchInst]; + + PHI->setIncomingValueForBlock(NewPreHeader, NewVal); + } + + fixupBranchWeights(Header, LatchBR, ExitWeight, FallThroughWeight); + + // Update Metadata for count of peeled off iterations. + unsigned AlreadyPeeled = 0; + if (auto Peeled = getOptionalIntLoopAttribute(L, PeeledCountMetaData)) + AlreadyPeeled = *Peeled; + addStringMetadataToLoop(L, PeeledCountMetaData, AlreadyPeeled + PeelCount); + + if (Loop *ParentLoop = L->getParentLoop()) + L = ParentLoop; + + // We modified the loop, update SE. + SE->forgetTopmostLoop(L); + + // Finally DomtTree must be correct. + assert(DT->verify(DominatorTree::VerificationLevel::Fast)); + + // FIXME: Incrementally update loop-simplify + simplifyLoop(L, DT, LI, SE, AC, nullptr, PreserveLCSSA); + + NumPeeled++; + + return true; +} diff --git a/contrib/libs/llvm12/lib/Transforms/Utils/LoopRotationUtils.cpp b/contrib/libs/llvm12/lib/Transforms/Utils/LoopRotationUtils.cpp index bf1f8bea39..b678efdc8d 100644 --- a/contrib/libs/llvm12/lib/Transforms/Utils/LoopRotationUtils.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Utils/LoopRotationUtils.cpp @@ -35,7 +35,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/SSAUpdater.h" @@ -44,8 +44,8 @@ using namespace llvm; #define DEBUG_TYPE "loop-rotate" -STATISTIC(NumNotRotatedDueToHeaderSize, - "Number of loops not rotated due to the header size"); +STATISTIC(NumNotRotatedDueToHeaderSize, + "Number of loops not rotated due to the header size"); STATISTIC(NumRotated, "Number of loops rotated"); static cl::opt<bool> @@ -66,17 +66,17 @@ class LoopRotate { const SimplifyQuery &SQ; bool RotationOnly; bool IsUtilMode; - bool PrepareForLTO; + bool PrepareForLTO; public: LoopRotate(unsigned MaxHeaderSize, LoopInfo *LI, const TargetTransformInfo *TTI, AssumptionCache *AC, DominatorTree *DT, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, - const SimplifyQuery &SQ, bool RotationOnly, bool IsUtilMode, - bool PrepareForLTO) + const SimplifyQuery &SQ, bool RotationOnly, bool IsUtilMode, + bool PrepareForLTO) : MaxHeaderSize(MaxHeaderSize), LI(LI), TTI(TTI), AC(AC), DT(DT), SE(SE), MSSAU(MSSAU), SQ(SQ), RotationOnly(RotationOnly), - IsUtilMode(IsUtilMode), PrepareForLTO(PrepareForLTO) {} + IsUtilMode(IsUtilMode), PrepareForLTO(PrepareForLTO) {} bool processLoop(Loop *L); private: @@ -304,7 +304,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { CodeMetrics::collectEphemeralValues(L, AC, EphValues); CodeMetrics Metrics; - Metrics.analyzeBasicBlock(OrigHeader, *TTI, EphValues, PrepareForLTO); + Metrics.analyzeBasicBlock(OrigHeader, *TTI, EphValues, PrepareForLTO); if (Metrics.notDuplicatable) { LLVM_DEBUG( dbgs() << "LoopRotation: NOT rotating - contains non-duplicatable" @@ -324,14 +324,14 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { << " instructions, which is more than the threshold (" << MaxHeaderSize << " instructions): "; L->dump()); - ++NumNotRotatedDueToHeaderSize; + ++NumNotRotatedDueToHeaderSize; return Rotated; } - - // When preparing for LTO, avoid rotating loops with calls that could be - // inlined during the LTO stage. - if (PrepareForLTO && Metrics.NumInlineCandidates > 0) - return Rotated; + + // When preparing for LTO, avoid rotating loops with calls that could be + // inlined during the LTO stage. + if (PrepareForLTO && Metrics.NumInlineCandidates > 0) + return Rotated; } // Now, this loop is suitable for rotation. @@ -401,14 +401,14 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { break; } - // Remember the local noalias scope declarations in the header. After the - // rotation, they must be duplicated and the scope must be cloned. This - // avoids unwanted interaction across iterations. - SmallVector<NoAliasScopeDeclInst *, 6> NoAliasDeclInstructions; - for (Instruction &I : *OrigHeader) - if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I)) - NoAliasDeclInstructions.push_back(Decl); - + // Remember the local noalias scope declarations in the header. After the + // rotation, they must be duplicated and the scope must be cloned. This + // avoids unwanted interaction across iterations. + SmallVector<NoAliasScopeDeclInst *, 6> NoAliasDeclInstructions; + for (Instruction &I : *OrigHeader) + if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I)) + NoAliasDeclInstructions.push_back(Decl); + while (I != E) { Instruction *Inst = &*I++; @@ -469,69 +469,69 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { } } - if (!NoAliasDeclInstructions.empty()) { - // There are noalias scope declarations: - // (general): - // Original: OrigPre { OrigHeader NewHeader ... Latch } - // after: (OrigPre+OrigHeader') { NewHeader ... Latch OrigHeader } - // - // with D: llvm.experimental.noalias.scope.decl, - // U: !noalias or !alias.scope depending on D - // ... { D U1 U2 } can transform into: - // (0) : ... { D U1 U2 } // no relevant rotation for this part - // (1) : ... D' { U1 U2 D } // D is part of OrigHeader - // (2) : ... D' U1' { U2 D U1 } // D, U1 are part of OrigHeader - // - // We now want to transform: - // (1) -> : ... D' { D U1 U2 D'' } - // (2) -> : ... D' U1' { D U2 D'' U1'' } - // D: original llvm.experimental.noalias.scope.decl - // D', U1': duplicate with replaced scopes - // D'', U1'': different duplicate with replaced scopes - // This ensures a safe fallback to 'may_alias' introduced by the rotate, - // as U1'' and U1' scopes will not be compatible wrt to the local restrict - - // Clone the llvm.experimental.noalias.decl again for the NewHeader. - Instruction *NewHeaderInsertionPoint = &(*NewHeader->getFirstNonPHI()); - for (NoAliasScopeDeclInst *NAD : NoAliasDeclInstructions) { - LLVM_DEBUG(dbgs() << " Cloning llvm.experimental.noalias.scope.decl:" - << *NAD << "\n"); - Instruction *NewNAD = NAD->clone(); - NewNAD->insertBefore(NewHeaderInsertionPoint); - } - - // Scopes must now be duplicated, once for OrigHeader and once for - // OrigPreHeader'. - { - auto &Context = NewHeader->getContext(); - - SmallVector<MDNode *, 8> NoAliasDeclScopes; - for (NoAliasScopeDeclInst *NAD : NoAliasDeclInstructions) - NoAliasDeclScopes.push_back(NAD->getScopeList()); - - LLVM_DEBUG(dbgs() << " Updating OrigHeader scopes\n"); - cloneAndAdaptNoAliasScopes(NoAliasDeclScopes, {OrigHeader}, Context, - "h.rot"); - LLVM_DEBUG(OrigHeader->dump()); - - // Keep the compile time impact low by only adapting the inserted block - // of instructions in the OrigPreHeader. This might result in slightly - // more aliasing between these instructions and those that were already - // present, but it will be much faster when the original PreHeader is - // large. - LLVM_DEBUG(dbgs() << " Updating part of OrigPreheader scopes\n"); - auto *FirstDecl = - cast<Instruction>(ValueMap[*NoAliasDeclInstructions.begin()]); - auto *LastInst = &OrigPreheader->back(); - cloneAndAdaptNoAliasScopes(NoAliasDeclScopes, FirstDecl, LastInst, - Context, "pre.rot"); - LLVM_DEBUG(OrigPreheader->dump()); - - LLVM_DEBUG(dbgs() << " Updated NewHeader:\n"); - LLVM_DEBUG(NewHeader->dump()); - } - } - + if (!NoAliasDeclInstructions.empty()) { + // There are noalias scope declarations: + // (general): + // Original: OrigPre { OrigHeader NewHeader ... Latch } + // after: (OrigPre+OrigHeader') { NewHeader ... Latch OrigHeader } + // + // with D: llvm.experimental.noalias.scope.decl, + // U: !noalias or !alias.scope depending on D + // ... { D U1 U2 } can transform into: + // (0) : ... { D U1 U2 } // no relevant rotation for this part + // (1) : ... D' { U1 U2 D } // D is part of OrigHeader + // (2) : ... D' U1' { U2 D U1 } // D, U1 are part of OrigHeader + // + // We now want to transform: + // (1) -> : ... D' { D U1 U2 D'' } + // (2) -> : ... D' U1' { D U2 D'' U1'' } + // D: original llvm.experimental.noalias.scope.decl + // D', U1': duplicate with replaced scopes + // D'', U1'': different duplicate with replaced scopes + // This ensures a safe fallback to 'may_alias' introduced by the rotate, + // as U1'' and U1' scopes will not be compatible wrt to the local restrict + + // Clone the llvm.experimental.noalias.decl again for the NewHeader. + Instruction *NewHeaderInsertionPoint = &(*NewHeader->getFirstNonPHI()); + for (NoAliasScopeDeclInst *NAD : NoAliasDeclInstructions) { + LLVM_DEBUG(dbgs() << " Cloning llvm.experimental.noalias.scope.decl:" + << *NAD << "\n"); + Instruction *NewNAD = NAD->clone(); + NewNAD->insertBefore(NewHeaderInsertionPoint); + } + + // Scopes must now be duplicated, once for OrigHeader and once for + // OrigPreHeader'. + { + auto &Context = NewHeader->getContext(); + + SmallVector<MDNode *, 8> NoAliasDeclScopes; + for (NoAliasScopeDeclInst *NAD : NoAliasDeclInstructions) + NoAliasDeclScopes.push_back(NAD->getScopeList()); + + LLVM_DEBUG(dbgs() << " Updating OrigHeader scopes\n"); + cloneAndAdaptNoAliasScopes(NoAliasDeclScopes, {OrigHeader}, Context, + "h.rot"); + LLVM_DEBUG(OrigHeader->dump()); + + // Keep the compile time impact low by only adapting the inserted block + // of instructions in the OrigPreHeader. This might result in slightly + // more aliasing between these instructions and those that were already + // present, but it will be much faster when the original PreHeader is + // large. + LLVM_DEBUG(dbgs() << " Updating part of OrigPreheader scopes\n"); + auto *FirstDecl = + cast<Instruction>(ValueMap[*NoAliasDeclInstructions.begin()]); + auto *LastInst = &OrigPreheader->back(); + cloneAndAdaptNoAliasScopes(NoAliasDeclScopes, FirstDecl, LastInst, + Context, "pre.rot"); + LLVM_DEBUG(OrigPreheader->dump()); + + LLVM_DEBUG(dbgs() << " Updated NewHeader:\n"); + LLVM_DEBUG(NewHeader->dump()); + } + } + // Along with all the other instructions, we just cloned OrigHeader's // terminator into OrigPreHeader. Fix up the PHI nodes in each of OrigHeader's // successors by duplicating their incoming values for OrigHeader. @@ -579,11 +579,11 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { Updates.push_back({DominatorTree::Delete, OrigPreheader, OrigHeader}); if (MSSAU) { - MSSAU->applyUpdates(Updates, *DT, /*UpdateDT=*/true); + MSSAU->applyUpdates(Updates, *DT, /*UpdateDT=*/true); if (VerifyMemorySSA) MSSAU->getMemorySSA()->verifyMemorySSA(); - } else { - DT->applyUpdates(Updates); + } else { + DT->applyUpdates(Updates); } } @@ -657,10 +657,10 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { // connected by an unconditional branch. This is just a cleanup so the // emitted code isn't too gross in this common case. DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); - BasicBlock *PredBB = OrigHeader->getUniquePredecessor(); - bool DidMerge = MergeBlockIntoPredecessor(OrigHeader, &DTU, LI, MSSAU); - if (DidMerge) - RemoveRedundantDbgInstrs(PredBB); + BasicBlock *PredBB = OrigHeader->getUniquePredecessor(); + bool DidMerge = MergeBlockIntoPredecessor(OrigHeader, &DTU, LI, MSSAU); + if (DidMerge) + RemoveRedundantDbgInstrs(PredBB); if (MSSAU && VerifyMemorySSA) MSSAU->getMemorySSA()->verifyMemorySSA(); @@ -824,8 +824,8 @@ bool llvm::LoopRotation(Loop *L, LoopInfo *LI, const TargetTransformInfo *TTI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, const SimplifyQuery &SQ, bool RotationOnly = true, unsigned Threshold = unsigned(-1), - bool IsUtilMode = true, bool PrepareForLTO) { + bool IsUtilMode = true, bool PrepareForLTO) { LoopRotate LR(Threshold, LI, TTI, AC, DT, SE, MSSAU, SQ, RotationOnly, - IsUtilMode, PrepareForLTO); + IsUtilMode, PrepareForLTO); return LR.processLoop(L); } diff --git a/contrib/libs/llvm12/lib/Transforms/Utils/LoopSimplify.cpp b/contrib/libs/llvm12/lib/Transforms/Utils/LoopSimplify.cpp index 0b6f3de0e0..2e104334ad 100644 --- a/contrib/libs/llvm12/lib/Transforms/Utils/LoopSimplify.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Utils/LoopSimplify.cpp @@ -163,7 +163,7 @@ BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, DominatorTree *DT, /// if it's not already in there. Stop predecessor traversal when we reach /// StopBlock. static void addBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock, - SmallPtrSetImpl<BasicBlock *> &Blocks) { + SmallPtrSetImpl<BasicBlock *> &Blocks) { SmallVector<BasicBlock *, 8> Worklist; Worklist.push_back(InputBB); do { @@ -171,7 +171,7 @@ static void addBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock, if (Blocks.insert(BB).second && BB != StopBlock) // If BB is not already processed and it is not a stop block then // insert its predecessor in the work list - append_range(Worklist, predecessors(BB)); + append_range(Worklist, predecessors(BB)); } while (!Worklist.empty()); } @@ -305,8 +305,8 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader, // Determine which blocks should stay in L and which should be moved out to // the Outer loop now. - SmallPtrSet<BasicBlock *, 4> BlocksInL; - for (BasicBlock *P : predecessors(Header)) { + SmallPtrSet<BasicBlock *, 4> BlocksInL; + for (BasicBlock *P : predecessors(Header)) { if (DT->dominates(Header, P)) addBlockAndPredsToSet(P, Header, BlocksInL); } @@ -679,7 +679,7 @@ ReprocessLoop: // The block has now been cleared of all instructions except for // a comparison and a conditional branch. SimplifyCFG may be able // to fold it now. - if (!FoldBranchToCommonDest(BI, /*DTU=*/nullptr, MSSAU)) + if (!FoldBranchToCommonDest(BI, /*DTU=*/nullptr, MSSAU)) continue; // Success. The block is now dead, so remove it from the loop, @@ -687,7 +687,7 @@ ReprocessLoop: LLVM_DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block " << ExitingBlock->getName() << "\n"); - assert(pred_empty(ExitingBlock)); + assert(pred_empty(ExitingBlock)); Changed = true; LI->removeBlock(ExitingBlock); @@ -832,8 +832,8 @@ bool LoopSimplify::runOnFunction(Function &F) { bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); // Simplify each loop nest in the function. - for (auto *L : *LI) - Changed |= simplifyLoop(L, DT, LI, SE, AC, MSSAU.get(), PreserveLCSSA); + for (auto *L : *LI) + Changed |= simplifyLoop(L, DT, LI, SE, AC, MSSAU.get(), PreserveLCSSA); #ifndef NDEBUG if (PreserveLCSSA) { @@ -862,9 +862,9 @@ PreservedAnalyses LoopSimplifyPass::run(Function &F, // Note that we don't preserve LCSSA in the new PM, if you need it run LCSSA // after simplifying the loops. MemorySSA is preserved if it exists. - for (auto *L : *LI) + for (auto *L : *LI) Changed |= - simplifyLoop(L, DT, LI, SE, AC, MSSAU.get(), /*PreserveLCSSA*/ false); + simplifyLoop(L, DT, LI, SE, AC, MSSAU.get(), /*PreserveLCSSA*/ false); if (!Changed) return PreservedAnalyses::all(); diff --git a/contrib/libs/llvm12/lib/Transforms/Utils/LoopUnroll.cpp b/contrib/libs/llvm12/lib/Transforms/Utils/LoopUnroll.cpp index 6a80eba7a1..d4cd574052 100644 --- a/contrib/libs/llvm12/lib/Transforms/Utils/LoopUnroll.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Utils/LoopUnroll.cpp @@ -59,7 +59,7 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Utils/LoopPeel.h" +#include "llvm/Transforms/Utils/LoopPeel.h" #include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/SimplifyIndVar.h" @@ -109,15 +109,15 @@ UnrollVerifyDomtree("unroll-verify-domtree", cl::Hidden, /// insert a phi-node, otherwise LCSSA will be broken. /// The function is just a helper function for llvm::UnrollLoop that returns /// true if this situation occurs, indicating that LCSSA needs to be fixed. -static bool needToInsertPhisForLCSSA(Loop *L, - const std::vector<BasicBlock *> &Blocks, +static bool needToInsertPhisForLCSSA(Loop *L, + const std::vector<BasicBlock *> &Blocks, LoopInfo *LI) { for (BasicBlock *BB : Blocks) { if (LI->getLoopFor(BB) == L) continue; for (Instruction &I : *BB) { for (Use &U : I.operands()) { - if (const auto *Def = dyn_cast<Instruction>(U)) { + if (const auto *Def = dyn_cast<Instruction>(U)) { Loop *DefLoop = LI->getLoopFor(Def->getParent()); if (!DefLoop) continue; @@ -288,12 +288,12 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, OptimizationRemarkEmitter *ORE, bool PreserveLCSSA, Loop **RemainderLoop) { - if (!L->getLoopPreheader()) { + if (!L->getLoopPreheader()) { LLVM_DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n"); return LoopUnrollResult::Unmodified; } - if (!L->getLoopLatch()) { + if (!L->getLoopLatch()) { LLVM_DEBUG(dbgs() << " Can't unroll; loop exit-block-insertion failed.\n"); return LoopUnrollResult::Unmodified; } @@ -304,7 +304,7 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, return LoopUnrollResult::Unmodified; } - if (L->getHeader()->hasAddressTaken()) { + if (L->getHeader()->hasAddressTaken()) { // The loop-rotate pass can be helpful to avoid this in many cases. LLVM_DEBUG( dbgs() << " Won't unroll loop: address of header block is taken.\n"); @@ -362,58 +362,58 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, } } - // All these values should be taken only after peeling because they might have - // changed. - BasicBlock *Preheader = L->getLoopPreheader(); - BasicBlock *Header = L->getHeader(); - BasicBlock *LatchBlock = L->getLoopLatch(); - SmallVector<BasicBlock *, 4> ExitBlocks; - L->getExitBlocks(ExitBlocks); - std::vector<BasicBlock *> OriginalLoopBlocks = L->getBlocks(); - - // Go through all exits of L and see if there are any phi-nodes there. We just - // conservatively assume that they're inserted to preserve LCSSA form, which - // means that complete unrolling might break this form. We need to either fix - // it in-place after the transformation, or entirely rebuild LCSSA. TODO: For - // now we just recompute LCSSA for the outer loop, but it should be possible - // to fix it in-place. - bool NeedToFixLCSSA = - PreserveLCSSA && CompletelyUnroll && - any_of(ExitBlocks, - [](const BasicBlock *BB) { return isa<PHINode>(BB->begin()); }); - - // The current loop unroll pass can unroll loops that have - // (1) single latch; and - // (2a) latch is unconditional; or - // (2b) latch is conditional and is an exiting block - // FIXME: The implementation can be extended to work with more complicated - // cases, e.g. loops with multiple latches. - BranchInst *LatchBI = dyn_cast<BranchInst>(LatchBlock->getTerminator()); - - // A conditional branch which exits the loop, which can be optimized to an - // unconditional branch in the unrolled loop in some cases. - BranchInst *ExitingBI = nullptr; - bool LatchIsExiting = L->isLoopExiting(LatchBlock); - if (LatchIsExiting) - ExitingBI = LatchBI; - else if (BasicBlock *ExitingBlock = L->getExitingBlock()) - ExitingBI = dyn_cast<BranchInst>(ExitingBlock->getTerminator()); - if (!LatchBI || (LatchBI->isConditional() && !LatchIsExiting)) { - // If the peeling guard is changed this assert may be relaxed or even - // deleted. - assert(!Peeled && "Peeling guard changed!"); - LLVM_DEBUG( - dbgs() << "Can't unroll; a conditional latch must exit the loop"); - return LoopUnrollResult::Unmodified; - } - LLVM_DEBUG({ - if (ExitingBI) - dbgs() << " Exiting Block = " << ExitingBI->getParent()->getName() - << "\n"; - else - dbgs() << " No single exiting block\n"; - }); - + // All these values should be taken only after peeling because they might have + // changed. + BasicBlock *Preheader = L->getLoopPreheader(); + BasicBlock *Header = L->getHeader(); + BasicBlock *LatchBlock = L->getLoopLatch(); + SmallVector<BasicBlock *, 4> ExitBlocks; + L->getExitBlocks(ExitBlocks); + std::vector<BasicBlock *> OriginalLoopBlocks = L->getBlocks(); + + // Go through all exits of L and see if there are any phi-nodes there. We just + // conservatively assume that they're inserted to preserve LCSSA form, which + // means that complete unrolling might break this form. We need to either fix + // it in-place after the transformation, or entirely rebuild LCSSA. TODO: For + // now we just recompute LCSSA for the outer loop, but it should be possible + // to fix it in-place. + bool NeedToFixLCSSA = + PreserveLCSSA && CompletelyUnroll && + any_of(ExitBlocks, + [](const BasicBlock *BB) { return isa<PHINode>(BB->begin()); }); + + // The current loop unroll pass can unroll loops that have + // (1) single latch; and + // (2a) latch is unconditional; or + // (2b) latch is conditional and is an exiting block + // FIXME: The implementation can be extended to work with more complicated + // cases, e.g. loops with multiple latches. + BranchInst *LatchBI = dyn_cast<BranchInst>(LatchBlock->getTerminator()); + + // A conditional branch which exits the loop, which can be optimized to an + // unconditional branch in the unrolled loop in some cases. + BranchInst *ExitingBI = nullptr; + bool LatchIsExiting = L->isLoopExiting(LatchBlock); + if (LatchIsExiting) + ExitingBI = LatchBI; + else if (BasicBlock *ExitingBlock = L->getExitingBlock()) + ExitingBI = dyn_cast<BranchInst>(ExitingBlock->getTerminator()); + if (!LatchBI || (LatchBI->isConditional() && !LatchIsExiting)) { + // If the peeling guard is changed this assert may be relaxed or even + // deleted. + assert(!Peeled && "Peeling guard changed!"); + LLVM_DEBUG( + dbgs() << "Can't unroll; a conditional latch must exit the loop"); + return LoopUnrollResult::Unmodified; + } + LLVM_DEBUG({ + if (ExitingBI) + dbgs() << " Exiting Block = " << ExitingBI->getParent()->getName() + << "\n"; + else + dbgs() << " No single exiting block\n"; + }); + // Loops containing convergent instructions must have a count that divides // their TripMultiple. LLVM_DEBUG( @@ -590,11 +590,11 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, << DIL->getFilename() << " Line: " << DIL->getLine()); } - // Identify what noalias metadata is inside the loop: if it is inside the - // loop, the associated metadata must be cloned for each iteration. - SmallVector<MDNode *, 6> LoopLocalNoAliasDeclScopes; - identifyNoAliasScopesToClone(L->getBlocks(), LoopLocalNoAliasDeclScopes); - + // Identify what noalias metadata is inside the loop: if it is inside the + // loop, the associated metadata must be cloned for each iteration. + SmallVector<MDNode *, 6> LoopLocalNoAliasDeclScopes; + identifyNoAliasScopesToClone(L->getBlocks(), LoopLocalNoAliasDeclScopes); + for (unsigned It = 1; It != ULO.Count; ++It) { SmallVector<BasicBlock *, 8> NewBlocks; SmallDenseMap<const Loop *, Loop *, 4> NewLoops; @@ -688,15 +688,15 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, AC->registerAssumption(II); } } - - { - // Identify what other metadata depends on the cloned version. After - // cloning, replace the metadata with the corrected version for both - // memory instructions and noalias intrinsics. - std::string ext = (Twine("It") + Twine(It)).str(); - cloneAndAdaptNoAliasScopes(LoopLocalNoAliasDeclScopes, NewBlocks, - Header->getContext(), ext); - } + + { + // Identify what other metadata depends on the cloned version. After + // cloning, replace the metadata with the corrected version for both + // memory instructions and noalias intrinsics. + std::string ext = (Twine("It") + Twine(It)).str(); + cloneAndAdaptNoAliasScopes(LoopLocalNoAliasDeclScopes, NewBlocks, + Header->getContext(), ext); + } } // Loop over the PHI nodes in the original block, setting incoming values. @@ -884,7 +884,7 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, if (MergeBlockIntoPredecessor(Dest, &DTU, LI)) { // Dest has been folded into Fold. Update our worklists accordingly. std::replace(Latches.begin(), Latches.end(), Dest, Fold); - llvm::erase_value(UnrolledLoopBlocks, Dest); + llvm::erase_value(UnrolledLoopBlocks, Dest); } } } diff --git a/contrib/libs/llvm12/lib/Transforms/Utils/LoopUnrollAndJam.cpp b/contrib/libs/llvm12/lib/Transforms/Utils/LoopUnrollAndJam.cpp index daa298e0f7..6e32a2b865 100644 --- a/contrib/libs/llvm12/lib/Transforms/Utils/LoopUnrollAndJam.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Utils/LoopUnrollAndJam.cpp @@ -148,7 +148,7 @@ static bool processHeaderPhiOperands(BasicBlock *Header, BasicBlock *Latch, } while (!Worklist.empty()) { - Instruction *I = Worklist.pop_back_val(); + Instruction *I = Worklist.pop_back_val(); if (!Visit(I)) return false; @@ -516,10 +516,10 @@ llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount, cast<BranchInst>(SubLoopBlocksLast.back()->getTerminator()); SubTerm->setSuccessor(!SubLoopContinueOnTrue, SubLoopBlocksFirst[0]); SubTerm->setSuccessor(SubLoopContinueOnTrue, AftBlocksFirst[0]); - SubLoopBlocksFirst[0]->replacePhiUsesWith(ForeBlocksLast[0], - ForeBlocksLast.back()); - SubLoopBlocksFirst[0]->replacePhiUsesWith(SubLoopBlocksLast[0], - SubLoopBlocksLast.back()); + SubLoopBlocksFirst[0]->replacePhiUsesWith(ForeBlocksLast[0], + ForeBlocksLast.back()); + SubLoopBlocksFirst[0]->replacePhiUsesWith(SubLoopBlocksLast[0], + SubLoopBlocksLast.back()); for (unsigned It = 1; It != Count; It++) { // Replace the conditional branch of the previous iteration subloop with an @@ -529,10 +529,10 @@ llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount, BranchInst::Create(SubLoopBlocksFirst[It], SubTerm); SubTerm->eraseFromParent(); - SubLoopBlocksFirst[It]->replacePhiUsesWith(ForeBlocksLast[It], - ForeBlocksLast.back()); - SubLoopBlocksFirst[It]->replacePhiUsesWith(SubLoopBlocksLast[It], - SubLoopBlocksLast.back()); + SubLoopBlocksFirst[It]->replacePhiUsesWith(ForeBlocksLast[It], + ForeBlocksLast.back()); + SubLoopBlocksFirst[It]->replacePhiUsesWith(SubLoopBlocksLast[It], + SubLoopBlocksLast.back()); movePHIs(SubLoopBlocksFirst[It], SubLoopBlocksFirst[0]); } @@ -546,8 +546,8 @@ llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount, assert(AftTerm->getSuccessor(ContinueOnTrue) == LoopExit && "Expecting the ContinueOnTrue successor of AftTerm to be LoopExit"); } - AftBlocksFirst[0]->replacePhiUsesWith(SubLoopBlocksLast[0], - SubLoopBlocksLast.back()); + AftBlocksFirst[0]->replacePhiUsesWith(SubLoopBlocksLast[0], + SubLoopBlocksLast.back()); for (unsigned It = 1; It != Count; It++) { // Replace the conditional branch of the previous iteration subloop with an @@ -557,8 +557,8 @@ llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount, BranchInst::Create(AftBlocksFirst[It], AftTerm); AftTerm->eraseFromParent(); - AftBlocksFirst[It]->replacePhiUsesWith(SubLoopBlocksLast[It], - SubLoopBlocksLast.back()); + AftBlocksFirst[It]->replacePhiUsesWith(SubLoopBlocksLast[It], + SubLoopBlocksLast.back()); movePHIs(AftBlocksFirst[It], AftBlocksFirst[0]); } diff --git a/contrib/libs/llvm12/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/contrib/libs/llvm12/lib/Transforms/Utils/LoopUnrollRuntime.cpp index 97a9bedcd1..0abf62be15 100644 --- a/contrib/libs/llvm12/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -26,7 +26,7 @@ #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Dominators.h" -#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/Support/CommandLine.h" @@ -505,32 +505,32 @@ static bool canProfitablyUnrollMultiExitLoop( // know of kinds of multiexit loops that would benefit from unrolling. } -// Assign the maximum possible trip count as the back edge weight for the -// remainder loop if the original loop comes with a branch weight. -static void updateLatchBranchWeightsForRemainderLoop(Loop *OrigLoop, - Loop *RemainderLoop, - uint64_t UnrollFactor) { - uint64_t TrueWeight, FalseWeight; - BranchInst *LatchBR = - cast<BranchInst>(OrigLoop->getLoopLatch()->getTerminator()); - if (LatchBR->extractProfMetadata(TrueWeight, FalseWeight)) { - uint64_t ExitWeight = LatchBR->getSuccessor(0) == OrigLoop->getHeader() - ? FalseWeight - : TrueWeight; - assert(UnrollFactor > 1); - uint64_t BackEdgeWeight = (UnrollFactor - 1) * ExitWeight; - BasicBlock *Header = RemainderLoop->getHeader(); - BasicBlock *Latch = RemainderLoop->getLoopLatch(); - auto *RemainderLatchBR = cast<BranchInst>(Latch->getTerminator()); - unsigned HeaderIdx = (RemainderLatchBR->getSuccessor(0) == Header ? 0 : 1); - MDBuilder MDB(RemainderLatchBR->getContext()); - MDNode *WeightNode = - HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight) - : MDB.createBranchWeights(BackEdgeWeight, ExitWeight); - RemainderLatchBR->setMetadata(LLVMContext::MD_prof, WeightNode); - } -} - +// Assign the maximum possible trip count as the back edge weight for the +// remainder loop if the original loop comes with a branch weight. +static void updateLatchBranchWeightsForRemainderLoop(Loop *OrigLoop, + Loop *RemainderLoop, + uint64_t UnrollFactor) { + uint64_t TrueWeight, FalseWeight; + BranchInst *LatchBR = + cast<BranchInst>(OrigLoop->getLoopLatch()->getTerminator()); + if (LatchBR->extractProfMetadata(TrueWeight, FalseWeight)) { + uint64_t ExitWeight = LatchBR->getSuccessor(0) == OrigLoop->getHeader() + ? FalseWeight + : TrueWeight; + assert(UnrollFactor > 1); + uint64_t BackEdgeWeight = (UnrollFactor - 1) * ExitWeight; + BasicBlock *Header = RemainderLoop->getHeader(); + BasicBlock *Latch = RemainderLoop->getLoopLatch(); + auto *RemainderLatchBR = cast<BranchInst>(Latch->getTerminator()); + unsigned HeaderIdx = (RemainderLatchBR->getSuccessor(0) == Header ? 0 : 1); + MDBuilder MDB(RemainderLatchBR->getContext()); + MDNode *WeightNode = + HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight) + : MDB.createBranchWeights(BackEdgeWeight, ExitWeight); + RemainderLatchBR->setMetadata(LLVMContext::MD_prof, WeightNode); + } +} + /// Insert code in the prolog/epilog code when unrolling a loop with a /// run-time trip-count. /// @@ -814,11 +814,11 @@ bool llvm::UnrollRuntimeLoopRemainder( InsertTop, InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI); - // Assign the maximum possible trip count as the back edge weight for the - // remainder loop if the original loop comes with a branch weight. - if (remainderLoop && !UnrollRemainder) - updateLatchBranchWeightsForRemainderLoop(L, remainderLoop, Count); - + // Assign the maximum possible trip count as the back edge weight for the + // remainder loop if the original loop comes with a branch weight. + if (remainderLoop && !UnrollRemainder) + updateLatchBranchWeightsForRemainderLoop(L, remainderLoop, Count); + // Insert the cloned blocks into the function. F->getBasicBlockList().splice(InsertBot->getIterator(), F->getBasicBlockList(), diff --git a/contrib/libs/llvm12/lib/Transforms/Utils/LoopUtils.cpp b/contrib/libs/llvm12/lib/Transforms/Utils/LoopUtils.cpp index 3aeb7d1e02..f0f423e981 100644 --- a/contrib/libs/llvm12/lib/Transforms/Utils/LoopUtils.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Utils/LoopUtils.cpp @@ -63,7 +63,7 @@ static cl::opt<bool> ForceReductionIntrinsic( static const char *LLVMLoopDisableNonforced = "llvm.loop.disable_nonforced"; static const char *LLVMLoopDisableLICM = "llvm.licm.disable"; -static const char *LLVMLoopMustProgress = "llvm.loop.mustprogress"; +static const char *LLVMLoopMustProgress = "llvm.loop.mustprogress"; bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, @@ -298,24 +298,24 @@ static Optional<bool> getOptionalBoolLoopAttribute(const Loop *TheLoop, llvm_unreachable("unexpected number of options"); } -bool llvm::getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name) { +bool llvm::getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name) { return getOptionalBoolLoopAttribute(TheLoop, Name).getValueOr(false); } -Optional<ElementCount> -llvm::getOptionalElementCountLoopAttribute(Loop *TheLoop) { - Optional<int> Width = - getOptionalIntLoopAttribute(TheLoop, "llvm.loop.vectorize.width"); - - if (Width.hasValue()) { - Optional<int> IsScalable = getOptionalIntLoopAttribute( - TheLoop, "llvm.loop.vectorize.scalable.enable"); - return ElementCount::get(*Width, IsScalable.getValueOr(false)); - } - - return None; -} - +Optional<ElementCount> +llvm::getOptionalElementCountLoopAttribute(Loop *TheLoop) { + Optional<int> Width = + getOptionalIntLoopAttribute(TheLoop, "llvm.loop.vectorize.width"); + + if (Width.hasValue()) { + Optional<int> IsScalable = getOptionalIntLoopAttribute( + TheLoop, "llvm.loop.vectorize.scalable.enable"); + return ElementCount::get(*Width, IsScalable.getValueOr(false)); + } + + return None; +} + llvm::Optional<int> llvm::getOptionalIntLoopAttribute(Loop *TheLoop, StringRef Name) { const MDOperand *AttrMD = @@ -349,7 +349,7 @@ Optional<MDNode *> llvm::makeFollowupLoopID( bool Changed = false; if (InheritAllAttrs || InheritSomeAttrs) { - for (const MDOperand &Existing : drop_begin(OrigLoopID->operands())) { + for (const MDOperand &Existing : drop_begin(OrigLoopID->operands())) { MDNode *Op = cast<MDNode>(Existing.get()); auto InheritThisAttribute = [InheritSomeAttrs, @@ -386,7 +386,7 @@ Optional<MDNode *> llvm::makeFollowupLoopID( continue; HasAnyFollowup = true; - for (const MDOperand &Option : drop_begin(FollowupNode->operands())) { + for (const MDOperand &Option : drop_begin(FollowupNode->operands())) { MDs.push_back(Option.get()); Changed = true; } @@ -419,10 +419,10 @@ bool llvm::hasDisableLICMTransformsHint(const Loop *L) { return getBooleanLoopAttribute(L, LLVMLoopDisableLICM); } -bool llvm::hasMustProgress(const Loop *L) { - return getBooleanLoopAttribute(L, LLVMLoopMustProgress); -} - +bool llvm::hasMustProgress(const Loop *L) { + return getBooleanLoopAttribute(L, LLVMLoopMustProgress); +} + TransformationMode llvm::hasUnrollTransformation(Loop *L) { if (getBooleanLoopAttribute(L, "llvm.loop.unroll.disable")) return TM_SuppressedByUser; @@ -469,15 +469,15 @@ TransformationMode llvm::hasVectorizeTransformation(Loop *L) { if (Enable == false) return TM_SuppressedByUser; - Optional<ElementCount> VectorizeWidth = - getOptionalElementCountLoopAttribute(L); + Optional<ElementCount> VectorizeWidth = + getOptionalElementCountLoopAttribute(L); Optional<int> InterleaveCount = getOptionalIntLoopAttribute(L, "llvm.loop.interleave.count"); // 'Forcing' vector width and interleave count to one effectively disables // this tranformation. - if (Enable == true && VectorizeWidth && VectorizeWidth->isScalar() && - InterleaveCount == 1) + if (Enable == true && VectorizeWidth && VectorizeWidth->isScalar() && + InterleaveCount == 1) return TM_SuppressedByUser; if (getBooleanLoopAttribute(L, "llvm.loop.isvectorized")) @@ -486,10 +486,10 @@ TransformationMode llvm::hasVectorizeTransformation(Loop *L) { if (Enable == true) return TM_ForcedByUser; - if ((VectorizeWidth && VectorizeWidth->isScalar()) && InterleaveCount == 1) + if ((VectorizeWidth && VectorizeWidth->isScalar()) && InterleaveCount == 1) return TM_Disable; - if ((VectorizeWidth && VectorizeWidth->isVector()) || InterleaveCount > 1) + if ((VectorizeWidth && VectorizeWidth->isVector()) || InterleaveCount > 1) return TM_Enable; if (hasDisableAllTransformsHint(L)) @@ -592,61 +592,61 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, // non-loop, it will be deleted in a future iteration of loop deletion pass. IRBuilder<> Builder(OldBr); - auto *ExitBlock = L->getUniqueExitBlock(); - DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); - if (ExitBlock) { - assert(ExitBlock && "Should have a unique exit block!"); - assert(L->hasDedicatedExits() && "Loop should have dedicated exits!"); - - Builder.CreateCondBr(Builder.getFalse(), L->getHeader(), ExitBlock); - // Remove the old branch. The conditional branch becomes a new terminator. - OldBr->eraseFromParent(); - - // Rewrite phis in the exit block to get their inputs from the Preheader - // instead of the exiting block. - for (PHINode &P : ExitBlock->phis()) { - // Set the zero'th element of Phi to be from the preheader and remove all - // other incoming values. Given the loop has dedicated exits, all other - // incoming values must be from the exiting blocks. - int PredIndex = 0; - P.setIncomingBlock(PredIndex, Preheader); - // Removes all incoming values from all other exiting blocks (including - // duplicate values from an exiting block). - // Nuke all entries except the zero'th entry which is the preheader entry. - // NOTE! We need to remove Incoming Values in the reverse order as done - // below, to keep the indices valid for deletion (removeIncomingValues - // updates getNumIncomingValues and shifts all values down into the - // operand being deleted). - for (unsigned i = 0, e = P.getNumIncomingValues() - 1; i != e; ++i) - P.removeIncomingValue(e - i, false); - - assert((P.getNumIncomingValues() == 1 && - P.getIncomingBlock(PredIndex) == Preheader) && - "Should have exactly one value and that's from the preheader!"); + auto *ExitBlock = L->getUniqueExitBlock(); + DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); + if (ExitBlock) { + assert(ExitBlock && "Should have a unique exit block!"); + assert(L->hasDedicatedExits() && "Loop should have dedicated exits!"); + + Builder.CreateCondBr(Builder.getFalse(), L->getHeader(), ExitBlock); + // Remove the old branch. The conditional branch becomes a new terminator. + OldBr->eraseFromParent(); + + // Rewrite phis in the exit block to get their inputs from the Preheader + // instead of the exiting block. + for (PHINode &P : ExitBlock->phis()) { + // Set the zero'th element of Phi to be from the preheader and remove all + // other incoming values. Given the loop has dedicated exits, all other + // incoming values must be from the exiting blocks. + int PredIndex = 0; + P.setIncomingBlock(PredIndex, Preheader); + // Removes all incoming values from all other exiting blocks (including + // duplicate values from an exiting block). + // Nuke all entries except the zero'th entry which is the preheader entry. + // NOTE! We need to remove Incoming Values in the reverse order as done + // below, to keep the indices valid for deletion (removeIncomingValues + // updates getNumIncomingValues and shifts all values down into the + // operand being deleted). + for (unsigned i = 0, e = P.getNumIncomingValues() - 1; i != e; ++i) + P.removeIncomingValue(e - i, false); + + assert((P.getNumIncomingValues() == 1 && + P.getIncomingBlock(PredIndex) == Preheader) && + "Should have exactly one value and that's from the preheader!"); + } + + if (DT) { + DTU.applyUpdates({{DominatorTree::Insert, Preheader, ExitBlock}}); + if (MSSA) { + MSSAU->applyUpdates({{DominatorTree::Insert, Preheader, ExitBlock}}, + *DT); + if (VerifyMemorySSA) + MSSA->verifyMemorySSA(); + } } - - if (DT) { - DTU.applyUpdates({{DominatorTree::Insert, Preheader, ExitBlock}}); - if (MSSA) { - MSSAU->applyUpdates({{DominatorTree::Insert, Preheader, ExitBlock}}, - *DT); - if (VerifyMemorySSA) - MSSA->verifyMemorySSA(); - } - } - - // Disconnect the loop body by branching directly to its exit. - Builder.SetInsertPoint(Preheader->getTerminator()); - Builder.CreateBr(ExitBlock); - // Remove the old branch. - Preheader->getTerminator()->eraseFromParent(); - } else { - assert(L->hasNoExitBlocks() && - "Loop should have either zero or one exit blocks."); - - Builder.SetInsertPoint(OldBr); - Builder.CreateUnreachable(); - Preheader->getTerminator()->eraseFromParent(); + + // Disconnect the loop body by branching directly to its exit. + Builder.SetInsertPoint(Preheader->getTerminator()); + Builder.CreateBr(ExitBlock); + // Remove the old branch. + Preheader->getTerminator()->eraseFromParent(); + } else { + assert(L->hasNoExitBlocks() && + "Loop should have either zero or one exit blocks."); + + Builder.SetInsertPoint(OldBr); + Builder.CreateUnreachable(); + Preheader->getTerminator()->eraseFromParent(); } if (DT) { @@ -666,58 +666,58 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, llvm::SmallDenseSet<std::pair<DIVariable *, DIExpression *>, 4> DeadDebugSet; llvm::SmallVector<DbgVariableIntrinsic *, 4> DeadDebugInst; - if (ExitBlock) { - // Given LCSSA form is satisfied, we should not have users of instructions - // within the dead loop outside of the loop. However, LCSSA doesn't take - // unreachable uses into account. We handle them here. - // We could do it after drop all references (in this case all users in the - // loop will be already eliminated and we have less work to do but according - // to API doc of User::dropAllReferences only valid operation after dropping - // references, is deletion. So let's substitute all usages of - // instruction from the loop with undef value of corresponding type first. - for (auto *Block : L->blocks()) - for (Instruction &I : *Block) { - auto *Undef = UndefValue::get(I.getType()); - for (Value::use_iterator UI = I.use_begin(), E = I.use_end(); - UI != E;) { - Use &U = *UI; - ++UI; - if (auto *Usr = dyn_cast<Instruction>(U.getUser())) - if (L->contains(Usr->getParent())) - continue; - // If we have a DT then we can check that uses outside a loop only in - // unreachable block. - if (DT) - assert(!DT->isReachableFromEntry(U) && - "Unexpected user in reachable block"); - U.set(Undef); - } - auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I); - if (!DVI) - continue; - auto Key = - DeadDebugSet.find({DVI->getVariable(), DVI->getExpression()}); - if (Key != DeadDebugSet.end()) - continue; - DeadDebugSet.insert({DVI->getVariable(), DVI->getExpression()}); - DeadDebugInst.push_back(DVI); + if (ExitBlock) { + // Given LCSSA form is satisfied, we should not have users of instructions + // within the dead loop outside of the loop. However, LCSSA doesn't take + // unreachable uses into account. We handle them here. + // We could do it after drop all references (in this case all users in the + // loop will be already eliminated and we have less work to do but according + // to API doc of User::dropAllReferences only valid operation after dropping + // references, is deletion. So let's substitute all usages of + // instruction from the loop with undef value of corresponding type first. + for (auto *Block : L->blocks()) + for (Instruction &I : *Block) { + auto *Undef = UndefValue::get(I.getType()); + for (Value::use_iterator UI = I.use_begin(), E = I.use_end(); + UI != E;) { + Use &U = *UI; + ++UI; + if (auto *Usr = dyn_cast<Instruction>(U.getUser())) + if (L->contains(Usr->getParent())) + continue; + // If we have a DT then we can check that uses outside a loop only in + // unreachable block. + if (DT) + assert(!DT->isReachableFromEntry(U) && + "Unexpected user in reachable block"); + U.set(Undef); + } + auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I); + if (!DVI) + continue; + auto Key = + DeadDebugSet.find({DVI->getVariable(), DVI->getExpression()}); + if (Key != DeadDebugSet.end()) + continue; + DeadDebugSet.insert({DVI->getVariable(), DVI->getExpression()}); + DeadDebugInst.push_back(DVI); } - // After the loop has been deleted all the values defined and modified - // inside the loop are going to be unavailable. - // Since debug values in the loop have been deleted, inserting an undef - // dbg.value truncates the range of any dbg.value before the loop where the - // loop used to be. This is particularly important for constant values. - DIBuilder DIB(*ExitBlock->getModule()); - Instruction *InsertDbgValueBefore = ExitBlock->getFirstNonPHI(); - assert(InsertDbgValueBefore && - "There should be a non-PHI instruction in exit block, else these " - "instructions will have no parent."); - for (auto *DVI : DeadDebugInst) - DIB.insertDbgValueIntrinsic(UndefValue::get(Builder.getInt32Ty()), - DVI->getVariable(), DVI->getExpression(), - DVI->getDebugLoc(), InsertDbgValueBefore); - } + // After the loop has been deleted all the values defined and modified + // inside the loop are going to be unavailable. + // Since debug values in the loop have been deleted, inserting an undef + // dbg.value truncates the range of any dbg.value before the loop where the + // loop used to be. This is particularly important for constant values. + DIBuilder DIB(*ExitBlock->getModule()); + Instruction *InsertDbgValueBefore = ExitBlock->getFirstNonPHI(); + assert(InsertDbgValueBefore && + "There should be a non-PHI instruction in exit block, else these " + "instructions will have no parent."); + for (auto *DVI : DeadDebugInst) + DIB.insertDbgValueIntrinsic(UndefValue::get(Builder.getInt32Ty()), + DVI->getVariable(), DVI->getExpression(), + DVI->getDebugLoc(), InsertDbgValueBefore); + } // Remove the block from the reference counting scheme, so that we can // delete it freely later. @@ -761,51 +761,51 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, } } -static Loop *getOutermostLoop(Loop *L) { - while (Loop *Parent = L->getParentLoop()) - L = Parent; - return L; -} - -void llvm::breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE, - LoopInfo &LI, MemorySSA *MSSA) { - auto *Latch = L->getLoopLatch(); - assert(Latch && "multiple latches not yet supported"); - auto *Header = L->getHeader(); - Loop *OutermostLoop = getOutermostLoop(L); - - SE.forgetLoop(L); - - // Note: By splitting the backedge, and then explicitly making it unreachable - // we gracefully handle corner cases such as non-bottom tested loops and the - // like. We also have the benefit of being able to reuse existing well tested - // code. It might be worth special casing the common bottom tested case at - // some point to avoid code churn. - - std::unique_ptr<MemorySSAUpdater> MSSAU; - if (MSSA) - MSSAU = std::make_unique<MemorySSAUpdater>(MSSA); - - auto *BackedgeBB = SplitEdge(Latch, Header, &DT, &LI, MSSAU.get()); - - DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager); - (void)changeToUnreachable(BackedgeBB->getTerminator(), /*UseTrap*/false, - /*PreserveLCSSA*/true, &DTU, MSSAU.get()); - - // Erase (and destroy) this loop instance. Handles relinking sub-loops - // and blocks within the loop as needed. - LI.erase(L); - - // If the loop we broke had a parent, then changeToUnreachable might have - // caused a block to be removed from the parent loop (see loop_nest_lcssa - // test case in zero-btc.ll for an example), thus changing the parent's - // exit blocks. If that happened, we need to rebuild LCSSA on the outermost - // loop which might have a had a block removed. - if (OutermostLoop != L) - formLCSSARecursively(*OutermostLoop, DT, &LI, &SE); -} - - +static Loop *getOutermostLoop(Loop *L) { + while (Loop *Parent = L->getParentLoop()) + L = Parent; + return L; +} + +void llvm::breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE, + LoopInfo &LI, MemorySSA *MSSA) { + auto *Latch = L->getLoopLatch(); + assert(Latch && "multiple latches not yet supported"); + auto *Header = L->getHeader(); + Loop *OutermostLoop = getOutermostLoop(L); + + SE.forgetLoop(L); + + // Note: By splitting the backedge, and then explicitly making it unreachable + // we gracefully handle corner cases such as non-bottom tested loops and the + // like. We also have the benefit of being able to reuse existing well tested + // code. It might be worth special casing the common bottom tested case at + // some point to avoid code churn. + + std::unique_ptr<MemorySSAUpdater> MSSAU; + if (MSSA) + MSSAU = std::make_unique<MemorySSAUpdater>(MSSA); + + auto *BackedgeBB = SplitEdge(Latch, Header, &DT, &LI, MSSAU.get()); + + DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager); + (void)changeToUnreachable(BackedgeBB->getTerminator(), /*UseTrap*/false, + /*PreserveLCSSA*/true, &DTU, MSSAU.get()); + + // Erase (and destroy) this loop instance. Handles relinking sub-loops + // and blocks within the loop as needed. + LI.erase(L); + + // If the loop we broke had a parent, then changeToUnreachable might have + // caused a block to be removed from the parent loop (see loop_nest_lcssa + // test case in zero-btc.ll for an example), thus changing the parent's + // exit blocks. If that happened, we need to rebuild LCSSA on the outermost + // loop which might have a had a block removed. + if (OutermostLoop != L) + formLCSSARecursively(*OutermostLoop, DT, &LI, &SE); +} + + /// Checks if \p L has single exit through latch block except possibly /// "deoptimizing" exits. Returns branch instruction terminating the loop /// latch if above check is successful, nullptr otherwise. @@ -918,29 +918,29 @@ bool llvm::hasIterationCountInvariantInParent(Loop *InnerLoop, return true; } -Value *llvm::createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left, - Value *Right) { - CmpInst::Predicate Pred; +Value *llvm::createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left, + Value *Right) { + CmpInst::Predicate Pred; switch (RK) { default: llvm_unreachable("Unknown min/max recurrence kind"); - case RecurKind::UMin: - Pred = CmpInst::ICMP_ULT; + case RecurKind::UMin: + Pred = CmpInst::ICMP_ULT; break; - case RecurKind::UMax: - Pred = CmpInst::ICMP_UGT; + case RecurKind::UMax: + Pred = CmpInst::ICMP_UGT; break; - case RecurKind::SMin: - Pred = CmpInst::ICMP_SLT; + case RecurKind::SMin: + Pred = CmpInst::ICMP_SLT; break; - case RecurKind::SMax: - Pred = CmpInst::ICMP_SGT; + case RecurKind::SMax: + Pred = CmpInst::ICMP_SGT; break; - case RecurKind::FMin: - Pred = CmpInst::FCMP_OLT; + case RecurKind::FMin: + Pred = CmpInst::FCMP_OLT; break; - case RecurKind::FMax: - Pred = CmpInst::FCMP_OGT; + case RecurKind::FMax: + Pred = CmpInst::FCMP_OGT; break; } @@ -950,15 +950,15 @@ Value *llvm::createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left, FastMathFlags FMF; FMF.setFast(); Builder.setFastMathFlags(FMF); - Value *Cmp = Builder.CreateCmp(Pred, Left, Right, "rdx.minmax.cmp"); + Value *Cmp = Builder.CreateCmp(Pred, Left, Right, "rdx.minmax.cmp"); Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select"); return Select; } // Helper to generate an ordered reduction. -Value *llvm::getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src, - unsigned Op, RecurKind RdxKind, - ArrayRef<Value *> RedOps) { +Value *llvm::getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src, + unsigned Op, RecurKind RdxKind, + ArrayRef<Value *> RedOps) { unsigned VF = cast<FixedVectorType>(Src->getType())->getNumElements(); // Extract and apply reduction ops in ascending order: @@ -972,9 +972,9 @@ Value *llvm::getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src, Result = Builder.CreateBinOp((Instruction::BinaryOps)Op, Result, Ext, "bin.rdx"); } else { - assert(RecurrenceDescriptor::isMinMaxRecurrenceKind(RdxKind) && + assert(RecurrenceDescriptor::isMinMaxRecurrenceKind(RdxKind) && "Invalid min/max"); - Result = createMinMaxOp(Builder, RdxKind, Result, Ext); + Result = createMinMaxOp(Builder, RdxKind, Result, Ext); } if (!RedOps.empty()) @@ -985,9 +985,9 @@ Value *llvm::getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src, } // Helper to generate a log2 shuffle reduction. -Value *llvm::getShuffleReduction(IRBuilderBase &Builder, Value *Src, - unsigned Op, RecurKind RdxKind, - ArrayRef<Value *> RedOps) { +Value *llvm::getShuffleReduction(IRBuilderBase &Builder, Value *Src, + unsigned Op, RecurKind RdxKind, + ArrayRef<Value *> RedOps) { unsigned VF = cast<FixedVectorType>(Src->getType())->getNumElements(); // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles // and vector ops, reducing the set of values being computed by half each @@ -1004,16 +1004,16 @@ Value *llvm::getShuffleReduction(IRBuilderBase &Builder, Value *Src, // Fill the rest of the mask with undef. std::fill(&ShuffleMask[i / 2], ShuffleMask.end(), -1); - Value *Shuf = Builder.CreateShuffleVector(TmpVec, ShuffleMask, "rdx.shuf"); + Value *Shuf = Builder.CreateShuffleVector(TmpVec, ShuffleMask, "rdx.shuf"); if (Op != Instruction::ICmp && Op != Instruction::FCmp) { // The builder propagates its fast-math-flags setting. TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf, "bin.rdx"); } else { - assert(RecurrenceDescriptor::isMinMaxRecurrenceKind(RdxKind) && + assert(RecurrenceDescriptor::isMinMaxRecurrenceKind(RdxKind) && "Invalid min/max"); - TmpVec = createMinMaxOp(Builder, RdxKind, TmpVec, Shuf); + TmpVec = createMinMaxOp(Builder, RdxKind, TmpVec, Shuf); } if (!RedOps.empty()) propagateIRFlags(TmpVec, RedOps); @@ -1027,48 +1027,48 @@ Value *llvm::getShuffleReduction(IRBuilderBase &Builder, Value *Src, return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0)); } -Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder, - const TargetTransformInfo *TTI, - Value *Src, RecurKind RdxKind, - ArrayRef<Value *> RedOps) { - unsigned Opcode = RecurrenceDescriptor::getOpcode(RdxKind); - TargetTransformInfo::ReductionFlags RdxFlags; - RdxFlags.IsMaxOp = RdxKind == RecurKind::SMax || RdxKind == RecurKind::UMax || - RdxKind == RecurKind::FMax; - RdxFlags.IsSigned = RdxKind == RecurKind::SMax || RdxKind == RecurKind::SMin; - if (!ForceReductionIntrinsic && - !TTI->useReductionIntrinsic(Opcode, Src->getType(), RdxFlags)) - return getShuffleReduction(Builder, Src, Opcode, RdxKind, RedOps); - - auto *SrcVecEltTy = cast<VectorType>(Src->getType())->getElementType(); - switch (RdxKind) { - case RecurKind::Add: - return Builder.CreateAddReduce(Src); - case RecurKind::Mul: - return Builder.CreateMulReduce(Src); - case RecurKind::And: - return Builder.CreateAndReduce(Src); - case RecurKind::Or: - return Builder.CreateOrReduce(Src); - case RecurKind::Xor: - return Builder.CreateXorReduce(Src); - case RecurKind::FAdd: - return Builder.CreateFAddReduce(ConstantFP::getNegativeZero(SrcVecEltTy), - Src); - case RecurKind::FMul: - return Builder.CreateFMulReduce(ConstantFP::get(SrcVecEltTy, 1.0), Src); - case RecurKind::SMax: - return Builder.CreateIntMaxReduce(Src, true); - case RecurKind::SMin: - return Builder.CreateIntMinReduce(Src, true); - case RecurKind::UMax: - return Builder.CreateIntMaxReduce(Src, false); - case RecurKind::UMin: - return Builder.CreateIntMinReduce(Src, false); - case RecurKind::FMax: - return Builder.CreateFPMaxReduce(Src); - case RecurKind::FMin: - return Builder.CreateFPMinReduce(Src); +Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder, + const TargetTransformInfo *TTI, + Value *Src, RecurKind RdxKind, + ArrayRef<Value *> RedOps) { + unsigned Opcode = RecurrenceDescriptor::getOpcode(RdxKind); + TargetTransformInfo::ReductionFlags RdxFlags; + RdxFlags.IsMaxOp = RdxKind == RecurKind::SMax || RdxKind == RecurKind::UMax || + RdxKind == RecurKind::FMax; + RdxFlags.IsSigned = RdxKind == RecurKind::SMax || RdxKind == RecurKind::SMin; + if (!ForceReductionIntrinsic && + !TTI->useReductionIntrinsic(Opcode, Src->getType(), RdxFlags)) + return getShuffleReduction(Builder, Src, Opcode, RdxKind, RedOps); + + auto *SrcVecEltTy = cast<VectorType>(Src->getType())->getElementType(); + switch (RdxKind) { + case RecurKind::Add: + return Builder.CreateAddReduce(Src); + case RecurKind::Mul: + return Builder.CreateMulReduce(Src); + case RecurKind::And: + return Builder.CreateAndReduce(Src); + case RecurKind::Or: + return Builder.CreateOrReduce(Src); + case RecurKind::Xor: + return Builder.CreateXorReduce(Src); + case RecurKind::FAdd: + return Builder.CreateFAddReduce(ConstantFP::getNegativeZero(SrcVecEltTy), + Src); + case RecurKind::FMul: + return Builder.CreateFMulReduce(ConstantFP::get(SrcVecEltTy, 1.0), Src); + case RecurKind::SMax: + return Builder.CreateIntMaxReduce(Src, true); + case RecurKind::SMin: + return Builder.CreateIntMinReduce(Src, true); + case RecurKind::UMax: + return Builder.CreateIntMaxReduce(Src, false); + case RecurKind::UMin: + return Builder.CreateIntMinReduce(Src, false); + case RecurKind::FMax: + return Builder.CreateFPMaxReduce(Src); + case RecurKind::FMin: + return Builder.CreateFPMinReduce(Src); default: llvm_unreachable("Unhandled opcode"); } @@ -1076,13 +1076,13 @@ Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder, Value *llvm::createTargetReduction(IRBuilderBase &B, const TargetTransformInfo *TTI, - RecurrenceDescriptor &Desc, Value *Src) { + RecurrenceDescriptor &Desc, Value *Src) { // TODO: Support in-order reductions based on the recurrence descriptor. // All ops in the reduction inherit fast-math-flags from the recurrence // descriptor. IRBuilderBase::FastMathFlagGuard FMFGuard(B); B.setFastMathFlags(Desc.getFastMathFlags()); - return createSimpleTargetReduction(B, TTI, Src, Desc.getRecurrenceKind()); + return createSimpleTargetReduction(B, TTI, Src, Desc.getRecurrenceKind()); } void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue) { @@ -1158,7 +1158,7 @@ static bool isValidRewrite(ScalarEvolution *SE, Value *FromVal, Value *ToVal) { // producing an expression involving multiple pointers. Until then, we must // bail out here. // - // Retrieve the pointer operand of the GEP. Don't use getUnderlyingObject + // Retrieve the pointer operand of the GEP. Don't use getUnderlyingObject // because it understands lcssa phis while SCEV does not. Value *FromPtr = FromVal; Value *ToPtr = ToVal; @@ -1175,7 +1175,7 @@ static bool isValidRewrite(ScalarEvolution *SE, Value *FromVal, Value *ToVal) { // SCEV may have rewritten an expression that produces the GEP's pointer // operand. That's ok as long as the pointer operand has the same base - // pointer. Unlike getUnderlyingObject(), getPointerBase() will find the + // pointer. Unlike getUnderlyingObject(), getPointerBase() will find the // base of a recurrence. This handles the case in which SCEV expansion // converts a pointer type recurrence into a nonrecurrent pointer base // indexed by an integer recurrence. diff --git a/contrib/libs/llvm12/lib/Transforms/Utils/LoopVersioning.cpp b/contrib/libs/llvm12/lib/Transforms/Utils/LoopVersioning.cpp index b46592f2d7..599bd1feb2 100644 --- a/contrib/libs/llvm12/lib/Transforms/Utils/LoopVersioning.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Utils/LoopVersioning.cpp @@ -16,12 +16,12 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/MemorySSA.h" -#include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/MDBuilder.h" -#include "llvm/IR/PassManager.h" +#include "llvm/IR/PassManager.h" #include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -36,22 +36,22 @@ static cl::opt<bool> cl::desc("Add no-alias annotation for instructions that " "are disambiguated by memchecks")); -LoopVersioning::LoopVersioning(const LoopAccessInfo &LAI, - ArrayRef<RuntimePointerCheck> Checks, Loop *L, - LoopInfo *LI, DominatorTree *DT, - ScalarEvolution *SE) - : VersionedLoop(L), NonVersionedLoop(nullptr), - AliasChecks(Checks.begin(), Checks.end()), - Preds(LAI.getPSE().getUnionPredicate()), LAI(LAI), LI(LI), DT(DT), +LoopVersioning::LoopVersioning(const LoopAccessInfo &LAI, + ArrayRef<RuntimePointerCheck> Checks, Loop *L, + LoopInfo *LI, DominatorTree *DT, + ScalarEvolution *SE) + : VersionedLoop(L), NonVersionedLoop(nullptr), + AliasChecks(Checks.begin(), Checks.end()), + Preds(LAI.getPSE().getUnionPredicate()), LAI(LAI), LI(LI), DT(DT), SE(SE) { - assert(L->getUniqueExitBlock() && "No single exit block"); + assert(L->getUniqueExitBlock() && "No single exit block"); } void LoopVersioning::versionLoop( const SmallVectorImpl<Instruction *> &DefsUsedOutside) { - assert(VersionedLoop->isLoopSimplifyForm() && - "Loop is not in loop-simplify form"); - + assert(VersionedLoop->isLoopSimplifyForm() && + "Loop is not in loop-simplify form"); + Instruction *FirstCheckInst; Instruction *MemRuntimeCheck; Value *SCEVRuntimeCheck; @@ -67,7 +67,7 @@ void LoopVersioning::versionLoop( SCEVExpander Exp(*SE, RuntimeCheckBB->getModule()->getDataLayout(), "scev.check"); SCEVRuntimeCheck = - Exp.expandCodeForPredicate(&Preds, RuntimeCheckBB->getTerminator()); + Exp.expandCodeForPredicate(&Preds, RuntimeCheckBB->getTerminator()); auto *CI = dyn_cast<ConstantInt>(SCEVRuntimeCheck); // Discard the SCEV runtime check if it is always true. @@ -118,11 +118,11 @@ void LoopVersioning::versionLoop( // Adds the necessary PHI nodes for the versioned loops based on the // loop-defined values used outside of the loop. addPHINodes(DefsUsedOutside); - formDedicatedExitBlocks(NonVersionedLoop, DT, LI, nullptr, true); - formDedicatedExitBlocks(VersionedLoop, DT, LI, nullptr, true); - assert(NonVersionedLoop->isLoopSimplifyForm() && - VersionedLoop->isLoopSimplifyForm() && - "The versioned loops should be in simplify form."); + formDedicatedExitBlocks(NonVersionedLoop, DT, LI, nullptr, true); + formDedicatedExitBlocks(VersionedLoop, DT, LI, nullptr, true); + assert(NonVersionedLoop->isLoopSimplifyForm() && + VersionedLoop->isLoopSimplifyForm() && + "The versioned loops should be in simplify form."); } void LoopVersioning::addPHINodes( @@ -254,59 +254,59 @@ void LoopVersioning::annotateInstWithNoAlias(Instruction *VersionedInst, } namespace { -bool runImpl(LoopInfo *LI, function_ref<const LoopAccessInfo &(Loop &)> GetLAA, - DominatorTree *DT, ScalarEvolution *SE) { - // Build up a worklist of inner-loops to version. This is necessary as the - // act of versioning a loop creates new loops and can invalidate iterators - // across the loops. - SmallVector<Loop *, 8> Worklist; - - for (Loop *TopLevelLoop : *LI) - for (Loop *L : depth_first(TopLevelLoop)) - // We only handle inner-most loops. - if (L->isInnermost()) - Worklist.push_back(L); - - // Now walk the identified inner loops. - bool Changed = false; - for (Loop *L : Worklist) { - if (!L->isLoopSimplifyForm() || !L->isRotatedForm() || - !L->getExitingBlock()) - continue; - const LoopAccessInfo &LAI = GetLAA(*L); - if (!LAI.hasConvergentOp() && - (LAI.getNumRuntimePointerChecks() || - !LAI.getPSE().getUnionPredicate().isAlwaysTrue())) { - LoopVersioning LVer(LAI, LAI.getRuntimePointerChecking()->getChecks(), L, - LI, DT, SE); - LVer.versionLoop(); - LVer.annotateLoopWithNoAlias(); - Changed = true; - } - } - - return Changed; -} - +bool runImpl(LoopInfo *LI, function_ref<const LoopAccessInfo &(Loop &)> GetLAA, + DominatorTree *DT, ScalarEvolution *SE) { + // Build up a worklist of inner-loops to version. This is necessary as the + // act of versioning a loop creates new loops and can invalidate iterators + // across the loops. + SmallVector<Loop *, 8> Worklist; + + for (Loop *TopLevelLoop : *LI) + for (Loop *L : depth_first(TopLevelLoop)) + // We only handle inner-most loops. + if (L->isInnermost()) + Worklist.push_back(L); + + // Now walk the identified inner loops. + bool Changed = false; + for (Loop *L : Worklist) { + if (!L->isLoopSimplifyForm() || !L->isRotatedForm() || + !L->getExitingBlock()) + continue; + const LoopAccessInfo &LAI = GetLAA(*L); + if (!LAI.hasConvergentOp() && + (LAI.getNumRuntimePointerChecks() || + !LAI.getPSE().getUnionPredicate().isAlwaysTrue())) { + LoopVersioning LVer(LAI, LAI.getRuntimePointerChecking()->getChecks(), L, + LI, DT, SE); + LVer.versionLoop(); + LVer.annotateLoopWithNoAlias(); + Changed = true; + } + } + + return Changed; +} + /// Also expose this is a pass. Currently this is only used for /// unit-testing. It adds all memchecks necessary to remove all may-aliasing /// array accesses from the loop. -class LoopVersioningLegacyPass : public FunctionPass { +class LoopVersioningLegacyPass : public FunctionPass { public: - LoopVersioningLegacyPass() : FunctionPass(ID) { - initializeLoopVersioningLegacyPassPass(*PassRegistry::getPassRegistry()); + LoopVersioningLegacyPass() : FunctionPass(ID) { + initializeLoopVersioningLegacyPassPass(*PassRegistry::getPassRegistry()); } bool runOnFunction(Function &F) override { auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); - auto GetLAA = [&](Loop &L) -> const LoopAccessInfo & { - return getAnalysis<LoopAccessLegacyAnalysis>().getInfo(&L); - }; - + auto GetLAA = [&](Loop &L) -> const LoopAccessInfo & { + return getAnalysis<LoopAccessLegacyAnalysis>().getInfo(&L); + }; + auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); - return runImpl(LI, GetLAA, DT, SE); + return runImpl(LI, GetLAA, DT, SE); } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -325,45 +325,45 @@ public: #define LVER_OPTION "loop-versioning" #define DEBUG_TYPE LVER_OPTION -char LoopVersioningLegacyPass::ID; +char LoopVersioningLegacyPass::ID; static const char LVer_name[] = "Loop Versioning"; -INITIALIZE_PASS_BEGIN(LoopVersioningLegacyPass, LVER_OPTION, LVer_name, false, - false) +INITIALIZE_PASS_BEGIN(LoopVersioningLegacyPass, LVER_OPTION, LVer_name, false, + false) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopAccessLegacyAnalysis) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) -INITIALIZE_PASS_END(LoopVersioningLegacyPass, LVER_OPTION, LVer_name, false, - false) +INITIALIZE_PASS_END(LoopVersioningLegacyPass, LVER_OPTION, LVer_name, false, + false) namespace llvm { -FunctionPass *createLoopVersioningLegacyPass() { - return new LoopVersioningLegacyPass(); +FunctionPass *createLoopVersioningLegacyPass() { + return new LoopVersioningLegacyPass(); } - -PreservedAnalyses LoopVersioningPass::run(Function &F, - FunctionAnalysisManager &AM) { - auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F); - auto &LI = AM.getResult<LoopAnalysis>(F); - auto &TTI = AM.getResult<TargetIRAnalysis>(F); - auto &DT = AM.getResult<DominatorTreeAnalysis>(F); - auto &TLI = AM.getResult<TargetLibraryAnalysis>(F); - auto &AA = AM.getResult<AAManager>(F); - auto &AC = AM.getResult<AssumptionAnalysis>(F); - MemorySSA *MSSA = EnableMSSALoopDependency - ? &AM.getResult<MemorySSAAnalysis>(F).getMSSA() - : nullptr; - - auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager(); - auto GetLAA = [&](Loop &L) -> const LoopAccessInfo & { - LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, - TLI, TTI, nullptr, MSSA}; - return LAM.getResult<LoopAccessAnalysis>(L, AR); - }; - - if (runImpl(&LI, GetLAA, &DT, &SE)) - return PreservedAnalyses::none(); - return PreservedAnalyses::all(); + +PreservedAnalyses LoopVersioningPass::run(Function &F, + FunctionAnalysisManager &AM) { + auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F); + auto &LI = AM.getResult<LoopAnalysis>(F); + auto &TTI = AM.getResult<TargetIRAnalysis>(F); + auto &DT = AM.getResult<DominatorTreeAnalysis>(F); + auto &TLI = AM.getResult<TargetLibraryAnalysis>(F); + auto &AA = AM.getResult<AAManager>(F); + auto &AC = AM.getResult<AssumptionAnalysis>(F); + MemorySSA *MSSA = EnableMSSALoopDependency + ? &AM.getResult<MemorySSAAnalysis>(F).getMSSA() + : nullptr; + + auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager(); + auto GetLAA = [&](Loop &L) -> const LoopAccessInfo & { + LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, + TLI, TTI, nullptr, MSSA}; + return LAM.getResult<LoopAccessAnalysis>(L, AR); + }; + + if (runImpl(&LI, GetLAA, &DT, &SE)) + return PreservedAnalyses::none(); + return PreservedAnalyses::all(); } -} // namespace llvm +} // namespace llvm diff --git a/contrib/libs/llvm12/lib/Transforms/Utils/LowerInvoke.cpp b/contrib/libs/llvm12/lib/Transforms/Utils/LowerInvoke.cpp index 9f85a3ab9c..fe0ff5899d 100644 --- a/contrib/libs/llvm12/lib/Transforms/Utils/LowerInvoke.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Utils/LowerInvoke.cpp @@ -48,7 +48,7 @@ static bool runImpl(Function &F) { bool Changed = false; for (BasicBlock &BB : F) if (InvokeInst *II = dyn_cast<InvokeInst>(BB.getTerminator())) { - SmallVector<Value *, 16> CallArgs(II->args()); + SmallVector<Value *, 16> CallArgs(II->args()); SmallVector<OperandBundleDef, 1> OpBundles; II->getOperandBundlesAsDefs(OpBundles); // Insert a normal call instruction... diff --git a/contrib/libs/llvm12/lib/Transforms/Utils/LowerSwitch.cpp b/contrib/libs/llvm12/lib/Transforms/Utils/LowerSwitch.cpp index 6f72afc90c..ec8d7a7074 100644 --- a/contrib/libs/llvm12/lib/Transforms/Utils/LowerSwitch.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Utils/LowerSwitch.cpp @@ -12,7 +12,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Utils/LowerSwitch.h" +#include "llvm/Transforms/Utils/LowerSwitch.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" @@ -27,7 +27,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/PassManager.h" +#include "llvm/IR/PassManager.h" #include "llvm/IR/Value.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" @@ -57,9 +57,9 @@ namespace { } // end anonymous namespace -namespace { +namespace { // Return true iff R is covered by Ranges. -bool IsInRanges(const IntRange &R, const std::vector<IntRange> &Ranges) { +bool IsInRanges(const IntRange &R, const std::vector<IntRange> &Ranges) { // Note: Ranges must be sorted, non-overlapping and non-adjacent. // Find the first range whose High field is >= R.High, @@ -70,34 +70,34 @@ bool IsInRanges(const IntRange &R, const std::vector<IntRange> &Ranges) { return I != Ranges.end() && I->Low <= R.Low; } -struct CaseRange { - ConstantInt *Low; - ConstantInt *High; - BasicBlock *BB; - - CaseRange(ConstantInt *low, ConstantInt *high, BasicBlock *bb) - : Low(low), High(high), BB(bb) {} -}; - -using CaseVector = std::vector<CaseRange>; -using CaseItr = std::vector<CaseRange>::iterator; - -/// The comparison function for sorting the switch case values in the vector. -/// WARNING: Case ranges should be disjoint! -struct CaseCmp { - bool operator()(const CaseRange &C1, const CaseRange &C2) { - const ConstantInt *CI1 = cast<const ConstantInt>(C1.Low); - const ConstantInt *CI2 = cast<const ConstantInt>(C2.High); - return CI1->getValue().slt(CI2->getValue()); +struct CaseRange { + ConstantInt *Low; + ConstantInt *High; + BasicBlock *BB; + + CaseRange(ConstantInt *low, ConstantInt *high, BasicBlock *bb) + : Low(low), High(high), BB(bb) {} +}; + +using CaseVector = std::vector<CaseRange>; +using CaseItr = std::vector<CaseRange>::iterator; + +/// The comparison function for sorting the switch case values in the vector. +/// WARNING: Case ranges should be disjoint! +struct CaseCmp { + bool operator()(const CaseRange &C1, const CaseRange &C2) { + const ConstantInt *CI1 = cast<const ConstantInt>(C1.Low); + const ConstantInt *CI2 = cast<const ConstantInt>(C2.High); + return CI1->getValue().slt(CI2->getValue()); } -}; +}; /// Used for debugging purposes. LLVM_ATTRIBUTE_USED -raw_ostream &operator<<(raw_ostream &O, const CaseVector &C) { +raw_ostream &operator<<(raw_ostream &O, const CaseVector &C) { O << "["; - for (CaseVector::const_iterator B = C.begin(), E = C.end(); B != E;) { + for (CaseVector::const_iterator B = C.begin(), E = C.end(); B != E;) { O << "[" << B->Low->getValue() << ", " << B->High->getValue() << "]"; if (++B != E) O << ", "; @@ -116,9 +116,9 @@ raw_ostream &operator<<(raw_ostream &O, const CaseVector &C) { /// 2) Removed if subsequent incoming values now share the same case, i.e., /// multiple outcome edges are condensed into one. This is necessary to keep the /// number of phi values equal to the number of branches to SuccBB. -void FixPhis( - BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB, - const unsigned NumMergedCases = std::numeric_limits<unsigned>::max()) { +void FixPhis( + BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB, + const unsigned NumMergedCases = std::numeric_limits<unsigned>::max()) { for (BasicBlock::iterator I = SuccBB->begin(), IE = SuccBB->getFirstNonPHI()->getIterator(); I != IE; ++I) { @@ -149,80 +149,80 @@ void FixPhis( } } -/// Create a new leaf block for the binary lookup tree. It checks if the -/// switch's value == the case's value. If not, then it jumps to the default -/// branch. At this point in the tree, the value can't be another valid case -/// value, so the jump to the "default" branch is warranted. -BasicBlock *NewLeafBlock(CaseRange &Leaf, Value *Val, ConstantInt *LowerBound, - ConstantInt *UpperBound, BasicBlock *OrigBlock, - BasicBlock *Default) { - Function *F = OrigBlock->getParent(); - BasicBlock *NewLeaf = BasicBlock::Create(Val->getContext(), "LeafBlock"); - F->getBasicBlockList().insert(++OrigBlock->getIterator(), NewLeaf); - - // Emit comparison - ICmpInst *Comp = nullptr; - if (Leaf.Low == Leaf.High) { - // Make the seteq instruction... - Comp = - new ICmpInst(*NewLeaf, ICmpInst::ICMP_EQ, Val, Leaf.Low, "SwitchLeaf"); - } else { - // Make range comparison - if (Leaf.Low == LowerBound) { - // Val >= Min && Val <= Hi --> Val <= Hi - Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_SLE, Val, Leaf.High, - "SwitchLeaf"); - } else if (Leaf.High == UpperBound) { - // Val <= Max && Val >= Lo --> Val >= Lo - Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_SGE, Val, Leaf.Low, - "SwitchLeaf"); - } else if (Leaf.Low->isZero()) { - // Val >= 0 && Val <= Hi --> Val <=u Hi - Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Val, Leaf.High, - "SwitchLeaf"); - } else { - // Emit V-Lo <=u Hi-Lo - Constant *NegLo = ConstantExpr::getNeg(Leaf.Low); - Instruction *Add = BinaryOperator::CreateAdd( - Val, NegLo, Val->getName() + ".off", NewLeaf); - Constant *UpperBound = ConstantExpr::getAdd(NegLo, Leaf.High); - Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Add, UpperBound, - "SwitchLeaf"); - } - } - - // Make the conditional branch... - BasicBlock *Succ = Leaf.BB; - BranchInst::Create(Succ, Default, Comp, NewLeaf); - - // If there were any PHI nodes in this successor, rewrite one entry - // from OrigBlock to come from NewLeaf. - for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) { - PHINode *PN = cast<PHINode>(I); - // Remove all but one incoming entries from the cluster - uint64_t Range = Leaf.High->getSExtValue() - Leaf.Low->getSExtValue(); - for (uint64_t j = 0; j < Range; ++j) { - PN->removeIncomingValue(OrigBlock); - } - - int BlockIdx = PN->getBasicBlockIndex(OrigBlock); - assert(BlockIdx != -1 && "Switch didn't go to this successor??"); - PN->setIncomingBlock((unsigned)BlockIdx, NewLeaf); - } - - return NewLeaf; -} - +/// Create a new leaf block for the binary lookup tree. It checks if the +/// switch's value == the case's value. If not, then it jumps to the default +/// branch. At this point in the tree, the value can't be another valid case +/// value, so the jump to the "default" branch is warranted. +BasicBlock *NewLeafBlock(CaseRange &Leaf, Value *Val, ConstantInt *LowerBound, + ConstantInt *UpperBound, BasicBlock *OrigBlock, + BasicBlock *Default) { + Function *F = OrigBlock->getParent(); + BasicBlock *NewLeaf = BasicBlock::Create(Val->getContext(), "LeafBlock"); + F->getBasicBlockList().insert(++OrigBlock->getIterator(), NewLeaf); + + // Emit comparison + ICmpInst *Comp = nullptr; + if (Leaf.Low == Leaf.High) { + // Make the seteq instruction... + Comp = + new ICmpInst(*NewLeaf, ICmpInst::ICMP_EQ, Val, Leaf.Low, "SwitchLeaf"); + } else { + // Make range comparison + if (Leaf.Low == LowerBound) { + // Val >= Min && Val <= Hi --> Val <= Hi + Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_SLE, Val, Leaf.High, + "SwitchLeaf"); + } else if (Leaf.High == UpperBound) { + // Val <= Max && Val >= Lo --> Val >= Lo + Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_SGE, Val, Leaf.Low, + "SwitchLeaf"); + } else if (Leaf.Low->isZero()) { + // Val >= 0 && Val <= Hi --> Val <=u Hi + Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Val, Leaf.High, + "SwitchLeaf"); + } else { + // Emit V-Lo <=u Hi-Lo + Constant *NegLo = ConstantExpr::getNeg(Leaf.Low); + Instruction *Add = BinaryOperator::CreateAdd( + Val, NegLo, Val->getName() + ".off", NewLeaf); + Constant *UpperBound = ConstantExpr::getAdd(NegLo, Leaf.High); + Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Add, UpperBound, + "SwitchLeaf"); + } + } + + // Make the conditional branch... + BasicBlock *Succ = Leaf.BB; + BranchInst::Create(Succ, Default, Comp, NewLeaf); + + // If there were any PHI nodes in this successor, rewrite one entry + // from OrigBlock to come from NewLeaf. + for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) { + PHINode *PN = cast<PHINode>(I); + // Remove all but one incoming entries from the cluster + uint64_t Range = Leaf.High->getSExtValue() - Leaf.Low->getSExtValue(); + for (uint64_t j = 0; j < Range; ++j) { + PN->removeIncomingValue(OrigBlock); + } + + int BlockIdx = PN->getBasicBlockIndex(OrigBlock); + assert(BlockIdx != -1 && "Switch didn't go to this successor??"); + PN->setIncomingBlock((unsigned)BlockIdx, NewLeaf); + } + + return NewLeaf; +} + /// Convert the switch statement into a binary lookup of the case values. /// The function recursively builds this tree. LowerBound and UpperBound are /// used to keep track of the bounds for Val that have already been checked by /// a block emitted by one of the previous calls to switchConvert in the call /// stack. -BasicBlock *SwitchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound, - ConstantInt *UpperBound, Value *Val, - BasicBlock *Predecessor, BasicBlock *OrigBlock, - BasicBlock *Default, - const std::vector<IntRange> &UnreachableRanges) { +BasicBlock *SwitchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound, + ConstantInt *UpperBound, Value *Val, + BasicBlock *Predecessor, BasicBlock *OrigBlock, + BasicBlock *Default, + const std::vector<IntRange> &UnreachableRanges) { assert(LowerBound && UpperBound && "Bounds must be initialized"); unsigned Size = End - Begin; @@ -234,10 +234,10 @@ BasicBlock *SwitchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound, if (Begin->Low == LowerBound && Begin->High == UpperBound) { unsigned NumMergedCases = 0; NumMergedCases = UpperBound->getSExtValue() - LowerBound->getSExtValue(); - FixPhis(Begin->BB, OrigBlock, Predecessor, NumMergedCases); + FixPhis(Begin->BB, OrigBlock, Predecessor, NumMergedCases); return Begin->BB; } - return NewLeafBlock(*Begin, Val, LowerBound, UpperBound, OrigBlock, + return NewLeafBlock(*Begin, Val, LowerBound, UpperBound, OrigBlock, Default); } @@ -284,12 +284,12 @@ BasicBlock *SwitchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound, ICmpInst* Comp = new ICmpInst(ICmpInst::ICMP_SLT, Val, Pivot.Low, "Pivot"); - BasicBlock *LBranch = - SwitchConvert(LHS.begin(), LHS.end(), LowerBound, NewUpperBound, Val, - NewNode, OrigBlock, Default, UnreachableRanges); - BasicBlock *RBranch = - SwitchConvert(RHS.begin(), RHS.end(), NewLowerBound, UpperBound, Val, - NewNode, OrigBlock, Default, UnreachableRanges); + BasicBlock *LBranch = + SwitchConvert(LHS.begin(), LHS.end(), LowerBound, NewUpperBound, Val, + NewNode, OrigBlock, Default, UnreachableRanges); + BasicBlock *RBranch = + SwitchConvert(RHS.begin(), RHS.end(), NewLowerBound, UpperBound, Val, + NewNode, OrigBlock, Default, UnreachableRanges); F->getBasicBlockList().insert(++OrigBlock->getIterator(), NewNode); NewNode->getInstList().push_back(Comp); @@ -301,7 +301,7 @@ BasicBlock *SwitchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound, /// Transform simple list of \p SI's cases into list of CaseRange's \p Cases. /// \post \p Cases wouldn't contain references to \p SI's default BB. /// \returns Number of \p SI's cases that do not reference \p SI's default BB. -unsigned Clusterify(CaseVector &Cases, SwitchInst *SI) { +unsigned Clusterify(CaseVector &Cases, SwitchInst *SI) { unsigned NumSimpleCases = 0; // Start with "simple" cases @@ -342,9 +342,9 @@ unsigned Clusterify(CaseVector &Cases, SwitchInst *SI) { /// Replace the specified switch instruction with a sequence of chained if-then /// insts in a balanced binary search. -void ProcessSwitchInst(SwitchInst *SI, - SmallPtrSetImpl<BasicBlock *> &DeleteList, - AssumptionCache *AC, LazyValueInfo *LVI) { +void ProcessSwitchInst(SwitchInst *SI, + SmallPtrSetImpl<BasicBlock *> &DeleteList, + AssumptionCache *AC, LazyValueInfo *LVI) { BasicBlock *OrigBlock = SI->getParent(); Function *F = OrigBlock->getParent(); Value *Val = SI->getCondition(); // The value we are switching on... @@ -369,7 +369,7 @@ void ProcessSwitchInst(SwitchInst *SI, if (Cases.empty()) { BranchInst::Create(Default, OrigBlock); // Remove all the references from Default's PHIs to OrigBlock, but one. - FixPhis(Default, OrigBlock, OrigBlock); + FixPhis(Default, OrigBlock, OrigBlock); SI->eraseFromParent(); return; } @@ -400,7 +400,7 @@ void ProcessSwitchInst(SwitchInst *SI, // TODO Shouldn't this create a signed range? ConstantRange KnownBitsRange = ConstantRange::fromKnownBits(Known, /*IsSigned=*/false); - const ConstantRange LVIRange = LVI->getConstantRange(Val, SI); + const ConstantRange LVIRange = LVI->getConstantRange(Val, SI); ConstantRange ValRange = KnownBitsRange.intersectWith(LVIRange); // We delegate removal of unreachable non-default cases to other passes. In // the unlikely event that some of them survived, we just conservatively @@ -474,8 +474,8 @@ void ProcessSwitchInst(SwitchInst *SI, // cases. assert(MaxPop > 0 && PopSucc); Default = PopSucc; - llvm::erase_if(Cases, - [PopSucc](const CaseRange &R) { return R.BB == PopSucc; }); + llvm::erase_if(Cases, + [PopSucc](const CaseRange &R) { return R.BB == PopSucc; }); // If there are no cases left, just branch. if (Cases.empty()) { @@ -501,12 +501,12 @@ void ProcessSwitchInst(SwitchInst *SI, BranchInst::Create(Default, NewDefault); BasicBlock *SwitchBlock = - SwitchConvert(Cases.begin(), Cases.end(), LowerBound, UpperBound, Val, + SwitchConvert(Cases.begin(), Cases.end(), LowerBound, UpperBound, Val, OrigBlock, OrigBlock, NewDefault, UnreachableRanges); // If there are entries in any PHI nodes for the default edge, make sure // to update them as well. - FixPhis(Default, OrigBlock, NewDefault); + FixPhis(Default, OrigBlock, NewDefault); // Branch to our shiny new if-then stuff... BranchInst::Create(SwitchBlock, OrigBlock); @@ -516,84 +516,84 @@ void ProcessSwitchInst(SwitchInst *SI, OrigBlock->getInstList().erase(SI); // If the Default block has no more predecessors just add it to DeleteList. - if (pred_empty(OldDefault)) + if (pred_empty(OldDefault)) DeleteList.insert(OldDefault); } - -bool LowerSwitch(Function &F, LazyValueInfo *LVI, AssumptionCache *AC) { - bool Changed = false; - SmallPtrSet<BasicBlock *, 8> DeleteList; - - for (Function::iterator I = F.begin(), E = F.end(); I != E;) { - BasicBlock *Cur = - &*I++; // Advance over block so we don't traverse new blocks - - // If the block is a dead Default block that will be deleted later, don't - // waste time processing it. - if (DeleteList.count(Cur)) - continue; - - if (SwitchInst *SI = dyn_cast<SwitchInst>(Cur->getTerminator())) { - Changed = true; - ProcessSwitchInst(SI, DeleteList, AC, LVI); - } - } - - for (BasicBlock *BB : DeleteList) { - LVI->eraseBlock(BB); - DeleteDeadBlock(BB); - } - - return Changed; -} - -/// Replace all SwitchInst instructions with chained branch instructions. -class LowerSwitchLegacyPass : public FunctionPass { -public: - // Pass identification, replacement for typeid - static char ID; - - LowerSwitchLegacyPass() : FunctionPass(ID) { - initializeLowerSwitchLegacyPassPass(*PassRegistry::getPassRegistry()); - } - - bool runOnFunction(Function &F) override; - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<LazyValueInfoWrapperPass>(); - } -}; - -} // end anonymous namespace - -char LowerSwitchLegacyPass::ID = 0; - -// Publicly exposed interface to pass... -char &llvm::LowerSwitchID = LowerSwitchLegacyPass::ID; - -INITIALIZE_PASS_BEGIN(LowerSwitchLegacyPass, "lowerswitch", - "Lower SwitchInst's to branches", false, false) -INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_DEPENDENCY(LazyValueInfoWrapperPass) -INITIALIZE_PASS_END(LowerSwitchLegacyPass, "lowerswitch", - "Lower SwitchInst's to branches", false, false) - -// createLowerSwitchPass - Interface to this file... -FunctionPass *llvm::createLowerSwitchPass() { - return new LowerSwitchLegacyPass(); -} - -bool LowerSwitchLegacyPass::runOnFunction(Function &F) { - LazyValueInfo *LVI = &getAnalysis<LazyValueInfoWrapperPass>().getLVI(); - auto *ACT = getAnalysisIfAvailable<AssumptionCacheTracker>(); - AssumptionCache *AC = ACT ? &ACT->getAssumptionCache(F) : nullptr; - return LowerSwitch(F, LVI, AC); -} - -PreservedAnalyses LowerSwitchPass::run(Function &F, - FunctionAnalysisManager &AM) { - LazyValueInfo *LVI = &AM.getResult<LazyValueAnalysis>(F); - AssumptionCache *AC = AM.getCachedResult<AssumptionAnalysis>(F); - return LowerSwitch(F, LVI, AC) ? PreservedAnalyses::none() - : PreservedAnalyses::all(); -} + +bool LowerSwitch(Function &F, LazyValueInfo *LVI, AssumptionCache *AC) { + bool Changed = false; + SmallPtrSet<BasicBlock *, 8> DeleteList; + + for (Function::iterator I = F.begin(), E = F.end(); I != E;) { + BasicBlock *Cur = + &*I++; // Advance over block so we don't traverse new blocks + + // If the block is a dead Default block that will be deleted later, don't + // waste time processing it. + if (DeleteList.count(Cur)) + continue; + + if (SwitchInst *SI = dyn_cast<SwitchInst>(Cur->getTerminator())) { + Changed = true; + ProcessSwitchInst(SI, DeleteList, AC, LVI); + } + } + + for (BasicBlock *BB : DeleteList) { + LVI->eraseBlock(BB); + DeleteDeadBlock(BB); + } + + return Changed; +} + +/// Replace all SwitchInst instructions with chained branch instructions. +class LowerSwitchLegacyPass : public FunctionPass { +public: + // Pass identification, replacement for typeid + static char ID; + + LowerSwitchLegacyPass() : FunctionPass(ID) { + initializeLowerSwitchLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<LazyValueInfoWrapperPass>(); + } +}; + +} // end anonymous namespace + +char LowerSwitchLegacyPass::ID = 0; + +// Publicly exposed interface to pass... +char &llvm::LowerSwitchID = LowerSwitchLegacyPass::ID; + +INITIALIZE_PASS_BEGIN(LowerSwitchLegacyPass, "lowerswitch", + "Lower SwitchInst's to branches", false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(LazyValueInfoWrapperPass) +INITIALIZE_PASS_END(LowerSwitchLegacyPass, "lowerswitch", + "Lower SwitchInst's to branches", false, false) + +// createLowerSwitchPass - Interface to this file... +FunctionPass *llvm::createLowerSwitchPass() { + return new LowerSwitchLegacyPass(); +} + +bool LowerSwitchLegacyPass::runOnFunction(Function &F) { + LazyValueInfo *LVI = &getAnalysis<LazyValueInfoWrapperPass>().getLVI(); + auto *ACT = getAnalysisIfAvailable<AssumptionCacheTracker>(); + AssumptionCache *AC = ACT ? &ACT->getAssumptionCache(F) : nullptr; + return LowerSwitch(F, LVI, AC); +} + +PreservedAnalyses LowerSwitchPass::run(Function &F, + FunctionAnalysisManager &AM) { + LazyValueInfo *LVI = &AM.getResult<LazyValueAnalysis>(F); + AssumptionCache *AC = AM.getCachedResult<AssumptionAnalysis>(F); + return LowerSwitch(F, LVI, AC) ? PreservedAnalyses::none() + : PreservedAnalyses::all(); +} diff --git a/contrib/libs/llvm12/lib/Transforms/Utils/MatrixUtils.cpp b/contrib/libs/llvm12/lib/Transforms/Utils/MatrixUtils.cpp index 7dea93aaa7..6a137630de 100644 --- a/contrib/libs/llvm12/lib/Transforms/Utils/MatrixUtils.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Utils/MatrixUtils.cpp @@ -1,104 +1,104 @@ -//===- MatrixUtils.cpp - Utilities to lower matrix intrinsics ---*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Utilities for generating tiled loops for matrix operations. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Utils/MatrixUtils.h" -#include "llvm/Analysis/DomTreeUpdater.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Type.h" - -using namespace llvm; - -BasicBlock *TileInfo::CreateLoop(BasicBlock *Preheader, BasicBlock *Exit, - Value *Bound, Value *Step, StringRef Name, - IRBuilderBase &B, DomTreeUpdater &DTU, Loop *L, - LoopInfo &LI) { - LLVMContext &Ctx = Preheader->getContext(); - BasicBlock *Header = BasicBlock::Create( - Preheader->getContext(), Name + ".header", Preheader->getParent(), Exit); - BasicBlock *Body = BasicBlock::Create(Header->getContext(), Name + ".body", - Header->getParent(), Exit); - BasicBlock *Latch = BasicBlock::Create(Header->getContext(), Name + ".latch", - Header->getParent(), Exit); - - Type *I32Ty = Type::getInt64Ty(Ctx); - BranchInst::Create(Body, Header); - BranchInst::Create(Latch, Body); - PHINode *IV = - PHINode::Create(I32Ty, 2, Name + ".iv", Header->getTerminator()); - IV->addIncoming(ConstantInt::get(I32Ty, 0), Preheader); - - B.SetInsertPoint(Latch); - Value *Inc = B.CreateAdd(IV, Step, Name + ".step"); - Value *Cond = B.CreateICmpNE(Inc, Bound, Name + ".cond"); - BranchInst::Create(Header, Exit, Cond, Latch); - IV->addIncoming(Inc, Latch); - - BranchInst *PreheaderBr = cast<BranchInst>(Preheader->getTerminator()); - BasicBlock *Tmp = PreheaderBr->getSuccessor(0); - PreheaderBr->setSuccessor(0, Header); - DTU.applyUpdatesPermissive({ - {DominatorTree::Delete, Preheader, Tmp}, - {DominatorTree::Insert, Header, Body}, - {DominatorTree::Insert, Body, Latch}, - {DominatorTree::Insert, Latch, Header}, - {DominatorTree::Insert, Latch, Exit}, - {DominatorTree::Insert, Preheader, Header}, - }); - - L->addBasicBlockToLoop(Header, LI); - L->addBasicBlockToLoop(Body, LI); - L->addBasicBlockToLoop(Latch, LI); - return Body; -} - -// Creates the following loop nest skeleton: -// for C = 0; C < NumColumns; C += TileSize -// for R = 0; R < NumRows; R += TileSize -// for K = 0; K < Inner ; K += TileSize -BasicBlock *TileInfo::CreateTiledLoops(BasicBlock *Start, BasicBlock *End, - IRBuilderBase &B, DomTreeUpdater &DTU, - LoopInfo &LI) { - Loop *ColLoop = LI.AllocateLoop(); - Loop *RowLoop = LI.AllocateLoop(); - Loop *InnerLoop = LI.AllocateLoop(); - RowLoop->addChildLoop(InnerLoop); - ColLoop->addChildLoop(RowLoop); - if (Loop *ParentL = LI.getLoopFor(Start)) - ParentL->addChildLoop(ColLoop); - else - LI.addTopLevelLoop(ColLoop); - - BasicBlock *ColBody = - CreateLoop(Start, End, B.getInt64(NumColumns), B.getInt64(TileSize), - "cols", B, DTU, ColLoop, LI); - BasicBlock *ColLatch = ColBody->getSingleSuccessor(); - BasicBlock *RowBody = - CreateLoop(ColBody, ColLatch, B.getInt64(NumRows), B.getInt64(TileSize), - "rows", B, DTU, RowLoop, LI); - RowLoopLatch = RowBody->getSingleSuccessor(); - - BasicBlock *InnerBody = - CreateLoop(RowBody, RowLoopLatch, B.getInt64(NumInner), - B.getInt64(TileSize), "inner", B, DTU, InnerLoop, LI); - InnerLoopLatch = InnerBody->getSingleSuccessor(); - ColumnLoopHeader = ColBody->getSinglePredecessor(); - RowLoopHeader = RowBody->getSinglePredecessor(); - InnerLoopHeader = InnerBody->getSinglePredecessor(); - CurrentRow = &*RowLoopHeader->begin(); - CurrentCol = &*ColumnLoopHeader->begin(); - CurrentK = &*InnerLoopHeader->begin(); - - return InnerBody; -} +//===- MatrixUtils.cpp - Utilities to lower matrix intrinsics ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Utilities for generating tiled loops for matrix operations. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/MatrixUtils.h" +#include "llvm/Analysis/DomTreeUpdater.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Type.h" + +using namespace llvm; + +BasicBlock *TileInfo::CreateLoop(BasicBlock *Preheader, BasicBlock *Exit, + Value *Bound, Value *Step, StringRef Name, + IRBuilderBase &B, DomTreeUpdater &DTU, Loop *L, + LoopInfo &LI) { + LLVMContext &Ctx = Preheader->getContext(); + BasicBlock *Header = BasicBlock::Create( + Preheader->getContext(), Name + ".header", Preheader->getParent(), Exit); + BasicBlock *Body = BasicBlock::Create(Header->getContext(), Name + ".body", + Header->getParent(), Exit); + BasicBlock *Latch = BasicBlock::Create(Header->getContext(), Name + ".latch", + Header->getParent(), Exit); + + Type *I32Ty = Type::getInt64Ty(Ctx); + BranchInst::Create(Body, Header); + BranchInst::Create(Latch, Body); + PHINode *IV = + PHINode::Create(I32Ty, 2, Name + ".iv", Header->getTerminator()); + IV->addIncoming(ConstantInt::get(I32Ty, 0), Preheader); + + B.SetInsertPoint(Latch); + Value *Inc = B.CreateAdd(IV, Step, Name + ".step"); + Value *Cond = B.CreateICmpNE(Inc, Bound, Name + ".cond"); + BranchInst::Create(Header, Exit, Cond, Latch); + IV->addIncoming(Inc, Latch); + + BranchInst *PreheaderBr = cast<BranchInst>(Preheader->getTerminator()); + BasicBlock *Tmp = PreheaderBr->getSuccessor(0); + PreheaderBr->setSuccessor(0, Header); + DTU.applyUpdatesPermissive({ + {DominatorTree::Delete, Preheader, Tmp}, + {DominatorTree::Insert, Header, Body}, + {DominatorTree::Insert, Body, Latch}, + {DominatorTree::Insert, Latch, Header}, + {DominatorTree::Insert, Latch, Exit}, + {DominatorTree::Insert, Preheader, Header}, + }); + + L->addBasicBlockToLoop(Header, LI); + L->addBasicBlockToLoop(Body, LI); + L->addBasicBlockToLoop(Latch, LI); + return Body; +} + +// Creates the following loop nest skeleton: +// for C = 0; C < NumColumns; C += TileSize +// for R = 0; R < NumRows; R += TileSize +// for K = 0; K < Inner ; K += TileSize +BasicBlock *TileInfo::CreateTiledLoops(BasicBlock *Start, BasicBlock *End, + IRBuilderBase &B, DomTreeUpdater &DTU, + LoopInfo &LI) { + Loop *ColLoop = LI.AllocateLoop(); + Loop *RowLoop = LI.AllocateLoop(); + Loop *InnerLoop = LI.AllocateLoop(); + RowLoop->addChildLoop(InnerLoop); + ColLoop->addChildLoop(RowLoop); + if (Loop *ParentL = LI.getLoopFor(Start)) + ParentL->addChildLoop(ColLoop); + else + LI.addTopLevelLoop(ColLoop); + + BasicBlock *ColBody = + CreateLoop(Start, End, B.getInt64(NumColumns), B.getInt64(TileSize), + "cols", B, DTU, ColLoop, LI); + BasicBlock *ColLatch = ColBody->getSingleSuccessor(); + BasicBlock *RowBody = + CreateLoop(ColBody, ColLatch, B.getInt64(NumRows), B.getInt64(TileSize), + "rows", B, DTU, RowLoop, LI); + RowLoopLatch = RowBody->getSingleSuccessor(); + + BasicBlock *InnerBody = + CreateLoop(RowBody, RowLoopLatch, B.getInt64(NumInner), + B.getInt64(TileSize), "inner", B, DTU, InnerLoop, LI); + InnerLoopLatch = InnerBody->getSingleSuccessor(); + ColumnLoopHeader = ColBody->getSinglePredecessor(); + RowLoopHeader = RowBody->getSinglePredecessor(); + InnerLoopHeader = InnerBody->getSinglePredecessor(); + CurrentRow = &*RowLoopHeader->begin(); + CurrentCol = &*ColumnLoopHeader->begin(); + CurrentK = &*InnerLoopHeader->begin(); + + return InnerBody; +} diff --git a/contrib/libs/llvm12/lib/Transforms/Utils/MetaRenamer.cpp b/contrib/libs/llvm12/lib/Transforms/Utils/MetaRenamer.cpp index 4a1be618ed..e350320e75 100644 --- a/contrib/libs/llvm12/lib/Transforms/Utils/MetaRenamer.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Utils/MetaRenamer.cpp @@ -12,7 +12,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Utils/MetaRenamer.h" +#include "llvm/Transforms/Utils/MetaRenamer.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" @@ -26,7 +26,7 @@ #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Module.h" -#include "llvm/IR/PassManager.h" +#include "llvm/IR/PassManager.h" #include "llvm/IR/Type.h" #include "llvm/IR/TypeFinder.h" #include "llvm/InitializePasses.h" @@ -42,125 +42,125 @@ static const char *const metaNames[] = { }; namespace { -// This PRNG is from the ISO C spec. It is intentionally simple and -// unsuitable for cryptographic use. We're just looking for enough -// variety to surprise and delight users. -struct PRNG { - unsigned long next; - - void srand(unsigned int seed) { next = seed; } - - int rand() { - next = next * 1103515245 + 12345; - return (unsigned int)(next / 65536) % 32768; - } -}; - -struct Renamer { - Renamer(unsigned int seed) { prng.srand(seed); } - - const char *newName() { - return metaNames[prng.rand() % array_lengthof(metaNames)]; - } - - PRNG prng; -}; - -void MetaRename(Function &F) { - for (auto AI = F.arg_begin(), AE = F.arg_end(); AI != AE; ++AI) - if (!AI->getType()->isVoidTy()) - AI->setName("arg"); - - for (auto &BB : F) { - BB.setName("bb"); - - for (auto &I : BB) - if (!I.getType()->isVoidTy()) - I.setName("tmp"); - } -} - -void MetaRename(Module &M, - function_ref<TargetLibraryInfo &(Function &)> GetTLI) { - // Seed our PRNG with simple additive sum of ModuleID. We're looking to - // simply avoid always having the same function names, and we need to - // remain deterministic. - unsigned int randSeed = 0; - for (auto C : M.getModuleIdentifier()) - randSeed += C; - - Renamer renamer(randSeed); - - // Rename all aliases - for (auto AI = M.alias_begin(), AE = M.alias_end(); AI != AE; ++AI) { - StringRef Name = AI->getName(); - if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1)) - continue; - - AI->setName("alias"); - } - - // Rename all global variables - for (auto GI = M.global_begin(), GE = M.global_end(); GI != GE; ++GI) { - StringRef Name = GI->getName(); - if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1)) - continue; - - GI->setName("global"); - } - - // Rename all struct types - TypeFinder StructTypes; - StructTypes.run(M, true); - for (StructType *STy : StructTypes) { - if (STy->isLiteral() || STy->getName().empty()) - continue; - - SmallString<128> NameStorage; - STy->setName( - (Twine("struct.") + renamer.newName()).toStringRef(NameStorage)); - } - - // Rename all functions - for (auto &F : M) { - StringRef Name = F.getName(); - LibFunc Tmp; - // Leave library functions alone because their presence or absence could - // affect the behavior of other passes. - if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1) || - GetTLI(F).getLibFunc(F, Tmp)) - continue; - - // Leave @main alone. The output of -metarenamer might be passed to - // lli for execution and the latter needs a main entry point. - if (Name != "main") - F.setName(renamer.newName()); - - MetaRename(F); - } -} - -struct MetaRenamer : public ModulePass { - // Pass identification, replacement for typeid - static char ID; - - MetaRenamer() : ModulePass(ID) { - initializeMetaRenamerPass(*PassRegistry::getPassRegistry()); - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<TargetLibraryInfoWrapperPass>(); - AU.setPreservesAll(); - } - - bool runOnModule(Module &M) override { - auto GetTLI = [this](Function &F) -> TargetLibraryInfo & { - return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); - }; - MetaRename(M, GetTLI); - return true; - } -}; +// This PRNG is from the ISO C spec. It is intentionally simple and +// unsuitable for cryptographic use. We're just looking for enough +// variety to surprise and delight users. +struct PRNG { + unsigned long next; + + void srand(unsigned int seed) { next = seed; } + + int rand() { + next = next * 1103515245 + 12345; + return (unsigned int)(next / 65536) % 32768; + } +}; + +struct Renamer { + Renamer(unsigned int seed) { prng.srand(seed); } + + const char *newName() { + return metaNames[prng.rand() % array_lengthof(metaNames)]; + } + + PRNG prng; +}; + +void MetaRename(Function &F) { + for (auto AI = F.arg_begin(), AE = F.arg_end(); AI != AE; ++AI) + if (!AI->getType()->isVoidTy()) + AI->setName("arg"); + + for (auto &BB : F) { + BB.setName("bb"); + + for (auto &I : BB) + if (!I.getType()->isVoidTy()) + I.setName("tmp"); + } +} + +void MetaRename(Module &M, + function_ref<TargetLibraryInfo &(Function &)> GetTLI) { + // Seed our PRNG with simple additive sum of ModuleID. We're looking to + // simply avoid always having the same function names, and we need to + // remain deterministic. + unsigned int randSeed = 0; + for (auto C : M.getModuleIdentifier()) + randSeed += C; + + Renamer renamer(randSeed); + + // Rename all aliases + for (auto AI = M.alias_begin(), AE = M.alias_end(); AI != AE; ++AI) { + StringRef Name = AI->getName(); + if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1)) + continue; + + AI->setName("alias"); + } + + // Rename all global variables + for (auto GI = M.global_begin(), GE = M.global_end(); GI != GE; ++GI) { + StringRef Name = GI->getName(); + if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1)) + continue; + + GI->setName("global"); + } + + // Rename all struct types + TypeFinder StructTypes; + StructTypes.run(M, true); + for (StructType *STy : StructTypes) { + if (STy->isLiteral() || STy->getName().empty()) + continue; + + SmallString<128> NameStorage; + STy->setName( + (Twine("struct.") + renamer.newName()).toStringRef(NameStorage)); + } + + // Rename all functions + for (auto &F : M) { + StringRef Name = F.getName(); + LibFunc Tmp; + // Leave library functions alone because their presence or absence could + // affect the behavior of other passes. + if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1) || + GetTLI(F).getLibFunc(F, Tmp)) + continue; + + // Leave @main alone. The output of -metarenamer might be passed to + // lli for execution and the latter needs a main entry point. + if (Name != "main") + F.setName(renamer.newName()); + + MetaRename(F); + } +} + +struct MetaRenamer : public ModulePass { + // Pass identification, replacement for typeid + static char ID; + + MetaRenamer() : ModulePass(ID) { + initializeMetaRenamerPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<TargetLibraryInfoWrapperPass>(); + AU.setPreservesAll(); + } + + bool runOnModule(Module &M) override { + auto GetTLI = [this](Function &F) -> TargetLibraryInfo & { + return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); + }; + MetaRename(M, GetTLI); + return true; + } +}; } // end anonymous namespace @@ -179,14 +179,14 @@ INITIALIZE_PASS_END(MetaRenamer, "metarenamer", ModulePass *llvm::createMetaRenamerPass() { return new MetaRenamer(); } - -PreservedAnalyses MetaRenamerPass::run(Module &M, ModuleAnalysisManager &AM) { - FunctionAnalysisManager &FAM = - AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); - auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & { - return FAM.getResult<TargetLibraryAnalysis>(F); - }; - MetaRename(M, GetTLI); - - return PreservedAnalyses::all(); -} + +PreservedAnalyses MetaRenamerPass::run(Module &M, ModuleAnalysisManager &AM) { + FunctionAnalysisManager &FAM = + AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); + auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & { + return FAM.getResult<TargetLibraryAnalysis>(F); + }; + MetaRename(M, GetTLI); + + return PreservedAnalyses::all(); +} diff --git a/contrib/libs/llvm12/lib/Transforms/Utils/PredicateInfo.cpp b/contrib/libs/llvm12/lib/Transforms/Utils/PredicateInfo.cpp index b53eab4c19..3312a6f945 100644 --- a/contrib/libs/llvm12/lib/Transforms/Utils/PredicateInfo.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Utils/PredicateInfo.cpp @@ -53,10 +53,10 @@ static cl::opt<bool> VerifyPredicateInfo( DEBUG_COUNTER(RenameCounter, "predicateinfo-rename", "Controls which variables are renamed with predicateinfo"); -// Maximum number of conditions considered for renaming for each branch/assume. -// This limits renaming of deep and/or chains. -static const unsigned MaxCondsPerBranch = 8; - +// Maximum number of conditions considered for renaming for each branch/assume. +// This limits renaming of deep and/or chains. +static const unsigned MaxCondsPerBranch = 8; + namespace { // Given a predicate info that is a type of branching terminator, get the // branching block. @@ -371,13 +371,13 @@ void PredicateInfoBuilder::convertUsesToDFSOrdered( } } -bool shouldRename(Value *V) { - // Only want real values, not constants. Additionally, operands with one use - // are only being used in the comparison, which means they will not be useful - // for us to consider for predicateinfo. - return (isa<Instruction>(V) || isa<Argument>(V)) && !V->hasOneUse(); -} - +bool shouldRename(Value *V) { + // Only want real values, not constants. Additionally, operands with one use + // are only being used in the comparison, which means they will not be useful + // for us to consider for predicateinfo. + return (isa<Instruction>(V) || isa<Argument>(V)) && !V->hasOneUse(); +} + // Collect relevant operations from Comparison that we may want to insert copies // for. void collectCmpOps(CmpInst *Comparison, SmallVectorImpl<Value *> &CmpOperands) { @@ -385,9 +385,9 @@ void collectCmpOps(CmpInst *Comparison, SmallVectorImpl<Value *> &CmpOperands) { auto *Op1 = Comparison->getOperand(1); if (Op0 == Op1) return; - - CmpOperands.push_back(Op0); - CmpOperands.push_back(Op1); + + CmpOperands.push_back(Op0); + CmpOperands.push_back(Op1); } // Add Op, PB to the list of value infos for Op, and mark Op to be renamed. @@ -405,31 +405,31 @@ void PredicateInfoBuilder::addInfoFor(SmallVectorImpl<Value *> &OpsToRename, void PredicateInfoBuilder::processAssume( IntrinsicInst *II, BasicBlock *AssumeBB, SmallVectorImpl<Value *> &OpsToRename) { - SmallVector<Value *, 4> Worklist; - SmallPtrSet<Value *, 4> Visited; - Worklist.push_back(II->getOperand(0)); - while (!Worklist.empty()) { - Value *Cond = Worklist.pop_back_val(); - if (!Visited.insert(Cond).second) - continue; - if (Visited.size() > MaxCondsPerBranch) - break; - - Value *Op0, *Op1; - if (match(Cond, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) { - Worklist.push_back(Op1); - Worklist.push_back(Op0); - } - - SmallVector<Value *, 4> Values; - Values.push_back(Cond); - if (auto *Cmp = dyn_cast<CmpInst>(Cond)) - collectCmpOps(Cmp, Values); - - for (Value *V : Values) { - if (shouldRename(V)) { - auto *PA = new PredicateAssume(V, II, Cond); - addInfoFor(OpsToRename, V, PA); + SmallVector<Value *, 4> Worklist; + SmallPtrSet<Value *, 4> Visited; + Worklist.push_back(II->getOperand(0)); + while (!Worklist.empty()) { + Value *Cond = Worklist.pop_back_val(); + if (!Visited.insert(Cond).second) + continue; + if (Visited.size() > MaxCondsPerBranch) + break; + + Value *Op0, *Op1; + if (match(Cond, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) { + Worklist.push_back(Op1); + Worklist.push_back(Op0); + } + + SmallVector<Value *, 4> Values; + Values.push_back(Cond); + if (auto *Cmp = dyn_cast<CmpInst>(Cond)) + collectCmpOps(Cmp, Values); + + for (Value *V : Values) { + if (shouldRename(V)) { + auto *PA = new PredicateAssume(V, II, Cond); + addInfoFor(OpsToRename, V, PA); } } } @@ -443,44 +443,44 @@ void PredicateInfoBuilder::processBranch( BasicBlock *FirstBB = BI->getSuccessor(0); BasicBlock *SecondBB = BI->getSuccessor(1); - for (BasicBlock *Succ : {FirstBB, SecondBB}) { - bool TakenEdge = Succ == FirstBB; - // Don't try to insert on a self-edge. This is mainly because we will - // eliminate during renaming anyway. - if (Succ == BranchBB) - continue; - - SmallVector<Value *, 4> Worklist; - SmallPtrSet<Value *, 4> Visited; - Worklist.push_back(BI->getCondition()); - while (!Worklist.empty()) { - Value *Cond = Worklist.pop_back_val(); - if (!Visited.insert(Cond).second) + for (BasicBlock *Succ : {FirstBB, SecondBB}) { + bool TakenEdge = Succ == FirstBB; + // Don't try to insert on a self-edge. This is mainly because we will + // eliminate during renaming anyway. + if (Succ == BranchBB) + continue; + + SmallVector<Value *, 4> Worklist; + SmallPtrSet<Value *, 4> Visited; + Worklist.push_back(BI->getCondition()); + while (!Worklist.empty()) { + Value *Cond = Worklist.pop_back_val(); + if (!Visited.insert(Cond).second) continue; - if (Visited.size() > MaxCondsPerBranch) - break; - - Value *Op0, *Op1; - if (TakenEdge ? match(Cond, m_LogicalAnd(m_Value(Op0), m_Value(Op1))) - : match(Cond, m_LogicalOr(m_Value(Op0), m_Value(Op1)))) { - Worklist.push_back(Op1); - Worklist.push_back(Op0); - } - - SmallVector<Value *, 4> Values; - Values.push_back(Cond); - if (auto *Cmp = dyn_cast<CmpInst>(Cond)) - collectCmpOps(Cmp, Values); - - for (Value *V : Values) { - if (shouldRename(V)) { - PredicateBase *PB = - new PredicateBranch(V, BranchBB, Succ, Cond, TakenEdge); - addInfoFor(OpsToRename, V, PB); - if (!Succ->getSinglePredecessor()) - EdgeUsesOnly.insert({BranchBB, Succ}); - } - } + if (Visited.size() > MaxCondsPerBranch) + break; + + Value *Op0, *Op1; + if (TakenEdge ? match(Cond, m_LogicalAnd(m_Value(Op0), m_Value(Op1))) + : match(Cond, m_LogicalOr(m_Value(Op0), m_Value(Op1)))) { + Worklist.push_back(Op1); + Worklist.push_back(Op0); + } + + SmallVector<Value *, 4> Values; + Values.push_back(Cond); + if (auto *Cmp = dyn_cast<CmpInst>(Cond)) + collectCmpOps(Cmp, Values); + + for (Value *V : Values) { + if (shouldRename(V)) { + PredicateBase *PB = + new PredicateBranch(V, BranchBB, Succ, Cond, TakenEdge); + addInfoFor(OpsToRename, V, PB); + if (!Succ->getSinglePredecessor()) + EdgeUsesOnly.insert({BranchBB, Succ}); + } + } } } } @@ -799,56 +799,56 @@ PredicateInfo::~PredicateInfo() { } } -Optional<PredicateConstraint> PredicateBase::getConstraint() const { - switch (Type) { - case PT_Assume: - case PT_Branch: { - bool TrueEdge = true; - if (auto *PBranch = dyn_cast<PredicateBranch>(this)) - TrueEdge = PBranch->TrueEdge; - - if (Condition == RenamedOp) { - return {{CmpInst::ICMP_EQ, - TrueEdge ? ConstantInt::getTrue(Condition->getType()) - : ConstantInt::getFalse(Condition->getType())}}; - } - - CmpInst *Cmp = dyn_cast<CmpInst>(Condition); - if (!Cmp) { - // TODO: Make this an assertion once RenamedOp is fully accurate. - return None; - } - - CmpInst::Predicate Pred; - Value *OtherOp; - if (Cmp->getOperand(0) == RenamedOp) { - Pred = Cmp->getPredicate(); - OtherOp = Cmp->getOperand(1); - } else if (Cmp->getOperand(1) == RenamedOp) { - Pred = Cmp->getSwappedPredicate(); - OtherOp = Cmp->getOperand(0); - } else { - // TODO: Make this an assertion once RenamedOp is fully accurate. - return None; - } - - // Invert predicate along false edge. - if (!TrueEdge) - Pred = CmpInst::getInversePredicate(Pred); - - return {{Pred, OtherOp}}; - } - case PT_Switch: - if (Condition != RenamedOp) { - // TODO: Make this an assertion once RenamedOp is fully accurate. - return None; - } - - return {{CmpInst::ICMP_EQ, cast<PredicateSwitch>(this)->CaseValue}}; - } - llvm_unreachable("Unknown predicate type"); -} - +Optional<PredicateConstraint> PredicateBase::getConstraint() const { + switch (Type) { + case PT_Assume: + case PT_Branch: { + bool TrueEdge = true; + if (auto *PBranch = dyn_cast<PredicateBranch>(this)) + TrueEdge = PBranch->TrueEdge; + + if (Condition == RenamedOp) { + return {{CmpInst::ICMP_EQ, + TrueEdge ? ConstantInt::getTrue(Condition->getType()) + : ConstantInt::getFalse(Condition->getType())}}; + } + + CmpInst *Cmp = dyn_cast<CmpInst>(Condition); + if (!Cmp) { + // TODO: Make this an assertion once RenamedOp is fully accurate. + return None; + } + + CmpInst::Predicate Pred; + Value *OtherOp; + if (Cmp->getOperand(0) == RenamedOp) { + Pred = Cmp->getPredicate(); + OtherOp = Cmp->getOperand(1); + } else if (Cmp->getOperand(1) == RenamedOp) { + Pred = Cmp->getSwappedPredicate(); + OtherOp = Cmp->getOperand(0); + } else { + // TODO: Make this an assertion once RenamedOp is fully accurate. + return None; + } + + // Invert predicate along false edge. + if (!TrueEdge) + Pred = CmpInst::getInversePredicate(Pred); + + return {{Pred, OtherOp}}; + } + case PT_Switch: + if (Condition != RenamedOp) { + // TODO: Make this an assertion once RenamedOp is fully accurate. + return None; + } + + return {{CmpInst::ICMP_EQ, cast<PredicateSwitch>(this)->CaseValue}}; + } + llvm_unreachable("Unknown predicate type"); +} + void PredicateInfo::verifyPredicateInfo() const {} char PredicateInfoPrinterLegacyPass::ID = 0; diff --git a/contrib/libs/llvm12/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/contrib/libs/llvm12/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index 014a9db12f..86bbb6a889 100644 --- a/contrib/libs/llvm12/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -77,19 +77,19 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) { if (SI->isVolatile()) return false; } else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) { - if (!II->isLifetimeStartOrEnd() && !II->isDroppable()) + if (!II->isLifetimeStartOrEnd() && !II->isDroppable()) return false; } else if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { - if (!onlyUsedByLifetimeMarkersOrDroppableInsts(BCI)) + if (!onlyUsedByLifetimeMarkersOrDroppableInsts(BCI)) return false; } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) { if (!GEPI->hasAllZeroIndices()) return false; - if (!onlyUsedByLifetimeMarkersOrDroppableInsts(GEPI)) + if (!onlyUsedByLifetimeMarkersOrDroppableInsts(GEPI)) + return false; + } else if (const AddrSpaceCastInst *ASCI = dyn_cast<AddrSpaceCastInst>(U)) { + if (!onlyUsedByLifetimeMarkers(ASCI)) return false; - } else if (const AddrSpaceCastInst *ASCI = dyn_cast<AddrSpaceCastInst>(U)) { - if (!onlyUsedByLifetimeMarkers(ASCI)) - return false; } else { return false; } @@ -101,8 +101,8 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) { namespace { struct AllocaInfo { - using DbgUserVec = SmallVector<DbgVariableIntrinsic *, 1>; - + using DbgUserVec = SmallVector<DbgVariableIntrinsic *, 1>; + SmallVector<BasicBlock *, 32> DefiningBlocks; SmallVector<BasicBlock *, 32> UsingBlocks; @@ -110,7 +110,7 @@ struct AllocaInfo { BasicBlock *OnlyBlock; bool OnlyUsedInOneBlock; - DbgUserVec DbgUsers; + DbgUserVec DbgUsers; void clear() { DefiningBlocks.clear(); @@ -118,7 +118,7 @@ struct AllocaInfo { OnlyStore = nullptr; OnlyBlock = nullptr; OnlyUsedInOneBlock = true; - DbgUsers.clear(); + DbgUsers.clear(); } /// Scan the uses of the specified alloca, filling in the AllocaInfo used @@ -129,8 +129,8 @@ struct AllocaInfo { // As we scan the uses of the alloca instruction, keep track of stores, // and decide whether all of the loads and stores to the alloca are within // the same basic block. - for (User *U : AI->users()) { - Instruction *User = cast<Instruction>(U); + for (User *U : AI->users()) { + Instruction *User = cast<Instruction>(U); if (StoreInst *SI = dyn_cast<StoreInst>(User)) { // Remember the basic blocks which define new values for the alloca @@ -151,7 +151,7 @@ struct AllocaInfo { } } - findDbgUsers(DbgUsers, AI); + findDbgUsers(DbgUsers, AI); } }; @@ -249,7 +249,7 @@ struct PromoteMem2Reg { /// For each alloca, we keep track of the dbg.declare intrinsic that /// describes it, if any, so that we can convert it to a dbg.value /// intrinsic if the alloca gets promoted. - SmallVector<AllocaInfo::DbgUserVec, 8> AllocaDbgUsers; + SmallVector<AllocaInfo::DbgUserVec, 8> AllocaDbgUsers; /// The set of basic blocks the renamer has already visited. SmallPtrSet<BasicBlock *, 16> Visited; @@ -309,37 +309,37 @@ static void addAssumeNonNull(AssumptionCache *AC, LoadInst *LI) { AC->registerAssumption(CI); } -static void removeIntrinsicUsers(AllocaInst *AI) { +static void removeIntrinsicUsers(AllocaInst *AI) { // Knowing that this alloca is promotable, we know that it's safe to kill all // instructions except for load and store. - for (auto UI = AI->use_begin(), UE = AI->use_end(); UI != UE;) { - Instruction *I = cast<Instruction>(UI->getUser()); - Use &U = *UI; + for (auto UI = AI->use_begin(), UE = AI->use_end(); UI != UE;) { + Instruction *I = cast<Instruction>(UI->getUser()); + Use &U = *UI; ++UI; if (isa<LoadInst>(I) || isa<StoreInst>(I)) continue; - // Drop the use of AI in droppable instructions. - if (I->isDroppable()) { - I->dropDroppableUse(U); - continue; - } - + // Drop the use of AI in droppable instructions. + if (I->isDroppable()) { + I->dropDroppableUse(U); + continue; + } + if (!I->getType()->isVoidTy()) { // The only users of this bitcast/GEP instruction are lifetime intrinsics. // Follow the use/def chain to erase them now instead of leaving it for // dead code elimination later. - for (auto UUI = I->use_begin(), UUE = I->use_end(); UUI != UUE;) { - Instruction *Inst = cast<Instruction>(UUI->getUser()); - Use &UU = *UUI; + for (auto UUI = I->use_begin(), UUE = I->use_end(); UUI != UUE;) { + Instruction *Inst = cast<Instruction>(UUI->getUser()); + Use &UU = *UUI; ++UUI; - - // Drop the use of I in droppable instructions. - if (Inst->isDroppable()) { - Inst->dropDroppableUse(UU); - continue; - } + + // Drop the use of I in droppable instructions. + if (Inst->isDroppable()) { + Inst->dropDroppableUse(UU); + continue; + } Inst->eraseFromParent(); } } @@ -366,8 +366,8 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, // Clear out UsingBlocks. We will reconstruct it here if needed. Info.UsingBlocks.clear(); - for (User *U : make_early_inc_range(AI->users())) { - Instruction *UserInst = cast<Instruction>(U); + for (User *U : make_early_inc_range(AI->users())) { + Instruction *UserInst = cast<Instruction>(U); if (UserInst == OnlyStore) continue; LoadInst *LI = cast<LoadInst>(UserInst); @@ -423,14 +423,14 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, // Record debuginfo for the store and remove the declaration's // debuginfo. - for (DbgVariableIntrinsic *DII : Info.DbgUsers) { - if (DII->isAddressOfVariable()) { - DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false); - ConvertDebugDeclareToDebugValue(DII, Info.OnlyStore, DIB); - DII->eraseFromParent(); - } else if (DII->getExpression()->startsWithDeref()) { - DII->eraseFromParent(); - } + for (DbgVariableIntrinsic *DII : Info.DbgUsers) { + if (DII->isAddressOfVariable()) { + DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false); + ConvertDebugDeclareToDebugValue(DII, Info.OnlyStore, DIB); + DII->eraseFromParent(); + } else if (DII->getExpression()->startsWithDeref()) { + DII->eraseFromParent(); + } } // Remove the (now dead) store and alloca. Info.OnlyStore->eraseFromParent(); @@ -480,8 +480,8 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, // Walk all of the loads from this alloca, replacing them with the nearest // store above them, if any. - for (User *U : make_early_inc_range(AI->users())) { - LoadInst *LI = dyn_cast<LoadInst>(U); + for (User *U : make_early_inc_range(AI->users())) { + LoadInst *LI = dyn_cast<LoadInst>(U); if (!LI) continue; @@ -525,11 +525,11 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, while (!AI->use_empty()) { StoreInst *SI = cast<StoreInst>(AI->user_back()); // Record debuginfo for the store before removing it. - for (DbgVariableIntrinsic *DII : Info.DbgUsers) { - if (DII->isAddressOfVariable()) { - DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false); - ConvertDebugDeclareToDebugValue(DII, SI, DIB); - } + for (DbgVariableIntrinsic *DII : Info.DbgUsers) { + if (DII->isAddressOfVariable()) { + DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false); + ConvertDebugDeclareToDebugValue(DII, SI, DIB); + } } SI->eraseFromParent(); LBI.deleteValue(SI); @@ -538,9 +538,9 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, AI->eraseFromParent(); // The alloca's debuginfo can be removed as well. - for (DbgVariableIntrinsic *DII : Info.DbgUsers) - if (DII->isAddressOfVariable() || DII->getExpression()->startsWithDeref()) - DII->eraseFromParent(); + for (DbgVariableIntrinsic *DII : Info.DbgUsers) + if (DII->isAddressOfVariable() || DII->getExpression()->startsWithDeref()) + DII->eraseFromParent(); ++NumLocalPromoted; return true; @@ -549,7 +549,7 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, void PromoteMem2Reg::run() { Function &F = *DT.getRoot()->getParent(); - AllocaDbgUsers.resize(Allocas.size()); + AllocaDbgUsers.resize(Allocas.size()); AllocaInfo Info; LargeBlockInfo LBI; @@ -562,7 +562,7 @@ void PromoteMem2Reg::run() { assert(AI->getParent()->getParent() == &F && "All allocas should be in the same function, which is same as DF!"); - removeIntrinsicUsers(AI); + removeIntrinsicUsers(AI); if (AI->use_empty()) { // If there are no uses of the alloca, just delete it now. @@ -607,8 +607,8 @@ void PromoteMem2Reg::run() { } // Remember the dbg.declare intrinsic describing this alloca, if any. - if (!Info.DbgUsers.empty()) - AllocaDbgUsers[AllocaNum] = Info.DbgUsers; + if (!Info.DbgUsers.empty()) + AllocaDbgUsers[AllocaNum] = Info.DbgUsers; // Keep the reverse mapping of the 'Allocas' array for the rename pass. AllocaLookup[Allocas[AllocaNum]] = AllocaNum; @@ -681,11 +681,11 @@ void PromoteMem2Reg::run() { } // Remove alloca's dbg.declare instrinsics from the function. - for (auto &DbgUsers : AllocaDbgUsers) { - for (auto *DII : DbgUsers) - if (DII->isAddressOfVariable() || DII->getExpression()->startsWithDeref()) - DII->eraseFromParent(); - } + for (auto &DbgUsers : AllocaDbgUsers) { + for (auto *DII : DbgUsers) + if (DII->isAddressOfVariable() || DII->getExpression()->startsWithDeref()) + DII->eraseFromParent(); + } // Loop over all of the PHI nodes and see if there are any that we can get // rid of because they merge all of the same incoming values. This can @@ -740,7 +740,7 @@ void PromoteMem2Reg::run() { continue; // Get the preds for BB. - SmallVector<BasicBlock *, 16> Preds(predecessors(BB)); + SmallVector<BasicBlock *, 16> Preds(predecessors(BB)); // Ok, now we know that all of the PHI nodes are missing entries for some // basic blocks. Start by sorting the incoming predecessors for efficient @@ -907,7 +907,7 @@ NextIteration: // operands so far. Remember this count. unsigned NewPHINumOperands = APN->getNumOperands(); - unsigned NumEdges = llvm::count(successors(Pred), BB); + unsigned NumEdges = llvm::count(successors(Pred), BB); assert(NumEdges && "Must be at least one edge from Pred to BB!"); // Add entries for all the phis. @@ -925,9 +925,9 @@ NextIteration: // The currently active variable for this block is now the PHI. IncomingVals[AllocaNo] = APN; - for (DbgVariableIntrinsic *DII : AllocaDbgUsers[AllocaNo]) - if (DII->isAddressOfVariable()) - ConvertDebugDeclareToDebugValue(DII, APN, DIB); + for (DbgVariableIntrinsic *DII : AllocaDbgUsers[AllocaNo]) + if (DII->isAddressOfVariable()) + ConvertDebugDeclareToDebugValue(DII, APN, DIB); // Get the next phi node. ++PNI; @@ -986,9 +986,9 @@ NextIteration: // Record debuginfo for the store before removing it. IncomingLocs[AllocaNo] = SI->getDebugLoc(); - for (DbgVariableIntrinsic *DII : AllocaDbgUsers[ai->second]) - if (DII->isAddressOfVariable()) - ConvertDebugDeclareToDebugValue(DII, SI, DIB); + for (DbgVariableIntrinsic *DII : AllocaDbgUsers[ai->second]) + if (DII->isAddressOfVariable()) + ConvertDebugDeclareToDebugValue(DII, SI, DIB); BB->getInstList().erase(SI); } } diff --git a/contrib/libs/llvm12/lib/Transforms/Utils/SSAUpdater.cpp b/contrib/libs/llvm12/lib/Transforms/Utils/SSAUpdater.cpp index ba9c371085..c210d1c460 100644 --- a/contrib/libs/llvm12/lib/Transforms/Utils/SSAUpdater.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Utils/SSAUpdater.cpp @@ -64,7 +64,7 @@ bool SSAUpdater::HasValueForBlock(BasicBlock *BB) const { } Value *SSAUpdater::FindValueForBlock(BasicBlock *BB) const { - return getAvailableVals(AV).lookup(BB); + return getAvailableVals(AV).lookup(BB); } void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) { @@ -253,10 +253,10 @@ public: // We can get our predecessor info by walking the pred_iterator list, // but it is relatively slow. If we already have PHI nodes in this // block, walk one of them to get the predecessor list instead. - if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) - append_range(*Preds, SomePhi->blocks()); - else - append_range(*Preds, predecessors(BB)); + if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) + append_range(*Preds, SomePhi->blocks()); + else + append_range(*Preds, predecessors(BB)); } /// GetUndefVal - Get an undefined value of the same type as the value diff --git a/contrib/libs/llvm12/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/contrib/libs/llvm12/lib/Transforms/Utils/ScalarEvolutionExpander.cpp index 75ccdd81b0..6dbfb0b61f 100644 --- a/contrib/libs/llvm12/lib/Transforms/Utils/ScalarEvolutionExpander.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Utils/ScalarEvolutionExpander.cpp @@ -27,7 +27,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Transforms/Utils/LoopUtils.h" using namespace llvm; @@ -39,7 +39,7 @@ cl::opt<unsigned> llvm::SCEVCheapExpansionBudget( using namespace PatternMatch; /// ReuseOrCreateCast - Arrange for there to be a cast of V to Ty at IP, -/// reusing an existing cast if a suitable one (= dominating IP) exists, or +/// reusing an existing cast if a suitable one (= dominating IP) exists, or /// creating a new one. Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty, Instruction::CastOps Op, @@ -58,27 +58,27 @@ Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty, Instruction *Ret = nullptr; // Check to see if there is already a cast! - for (User *U : V->users()) { - if (U->getType() != Ty) - continue; - CastInst *CI = dyn_cast<CastInst>(U); - if (!CI || CI->getOpcode() != Op) - continue; - - // Found a suitable cast that is at IP or comes before IP. Use it. Note that - // the cast must also properly dominate the Builder's insertion point. - if (IP->getParent() == CI->getParent() && &*BIP != CI && - (&*IP == CI || CI->comesBefore(&*IP))) { - Ret = CI; - break; - } - } - + for (User *U : V->users()) { + if (U->getType() != Ty) + continue; + CastInst *CI = dyn_cast<CastInst>(U); + if (!CI || CI->getOpcode() != Op) + continue; + + // Found a suitable cast that is at IP or comes before IP. Use it. Note that + // the cast must also properly dominate the Builder's insertion point. + if (IP->getParent() == CI->getParent() && &*BIP != CI && + (&*IP == CI || CI->comesBefore(&*IP))) { + Ret = CI; + break; + } + } + // Create a new cast. - if (!Ret) { + if (!Ret) { Ret = CastInst::Create(Op, V, Ty, V->getName(), &*IP); - rememberInstruction(Ret); - } + rememberInstruction(Ret); + } // We assert at the end of the function since IP might point to an // instruction with different dominance properties than a cast @@ -88,8 +88,8 @@ Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty, return Ret; } -BasicBlock::iterator -SCEVExpander::findInsertPointAfter(Instruction *I, Instruction *MustDominate) { +BasicBlock::iterator +SCEVExpander::findInsertPointAfter(Instruction *I, Instruction *MustDominate) { BasicBlock::iterator IP = ++I->getIterator(); if (auto *II = dyn_cast<InvokeInst>(I)) IP = II->getNormalDest()->begin(); @@ -100,17 +100,17 @@ SCEVExpander::findInsertPointAfter(Instruction *I, Instruction *MustDominate) { if (isa<FuncletPadInst>(IP) || isa<LandingPadInst>(IP)) { ++IP; } else if (isa<CatchSwitchInst>(IP)) { - IP = MustDominate->getParent()->getFirstInsertionPt(); + IP = MustDominate->getParent()->getFirstInsertionPt(); } else { assert(!IP->isEHPad() && "unexpected eh pad!"); } - // Adjust insert point to be after instructions inserted by the expander, so - // we can re-use already inserted instructions. Avoid skipping past the - // original \p MustDominate, in case it is an inserted instruction. - while (isInsertedInstruction(&*IP) && &*IP != MustDominate) - ++IP; - + // Adjust insert point to be after instructions inserted by the expander, so + // we can re-use already inserted instructions. Avoid skipping past the + // original \p MustDominate, in case it is an inserted instruction. + while (isInsertedInstruction(&*IP) && &*IP != MustDominate) + ++IP; + return IP; } @@ -126,22 +126,22 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) { assert(SE.getTypeSizeInBits(V->getType()) == SE.getTypeSizeInBits(Ty) && "InsertNoopCastOfTo cannot change sizes!"); - // inttoptr only works for integral pointers. For non-integral pointers, we - // can create a GEP on i8* null with the integral value as index. Note that - // it is safe to use GEP of null instead of inttoptr here, because only - // expressions already based on a GEP of null should be converted to pointers - // during expansion. - if (Op == Instruction::IntToPtr) { - auto *PtrTy = cast<PointerType>(Ty); - if (DL.isNonIntegralPointerType(PtrTy)) { - auto *Int8PtrTy = Builder.getInt8PtrTy(PtrTy->getAddressSpace()); - assert(DL.getTypeAllocSize(Int8PtrTy->getElementType()) == 1 && - "alloc size of i8 must by 1 byte for the GEP to be correct"); - auto *GEP = Builder.CreateGEP( - Builder.getInt8Ty(), Constant::getNullValue(Int8PtrTy), V, "uglygep"); - return Builder.CreateBitCast(GEP, Ty); - } - } + // inttoptr only works for integral pointers. For non-integral pointers, we + // can create a GEP on i8* null with the integral value as index. Note that + // it is safe to use GEP of null instead of inttoptr here, because only + // expressions already based on a GEP of null should be converted to pointers + // during expansion. + if (Op == Instruction::IntToPtr) { + auto *PtrTy = cast<PointerType>(Ty); + if (DL.isNonIntegralPointerType(PtrTy)) { + auto *Int8PtrTy = Builder.getInt8PtrTy(PtrTy->getAddressSpace()); + assert(DL.getTypeAllocSize(Int8PtrTy->getElementType()) == 1 && + "alloc size of i8 must by 1 byte for the GEP to be correct"); + auto *GEP = Builder.CreateGEP( + Builder.getInt8Ty(), Constant::getNullValue(Int8PtrTy), V, "uglygep"); + return Builder.CreateBitCast(GEP, Ty); + } + } // Short-circuit unnecessary bitcasts. if (Op == Instruction::BitCast) { if (V->getType() == Ty) @@ -186,7 +186,7 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) { // Cast the instruction immediately after the instruction. Instruction *I = cast<Instruction>(V); - BasicBlock::iterator IP = findInsertPointAfter(I, &*Builder.GetInsertPoint()); + BasicBlock::iterator IP = findInsertPointAfter(I, &*Builder.GetInsertPoint()); return ReuseOrCreateCast(I, Ty, Op, IP); } @@ -309,7 +309,7 @@ static bool FactorOutConstant(const SCEV *&S, const SCEV *&Remainder, if (const SCEVConstant *FC = dyn_cast<SCEVConstant>(Factor)) if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0))) if (!C->getAPInt().srem(FC->getAPInt())) { - SmallVector<const SCEV *, 4> NewMulOps(M->operands()); + SmallVector<const SCEV *, 4> NewMulOps(M->operands()); NewMulOps[0] = SE.getConstant(C->getAPInt().sdiv(FC->getAPInt())); S = SE.getMulExpr(NewMulOps); return true; @@ -481,10 +481,10 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, // we didn't find any operands that could be factored, tentatively // assume that element zero was selected (since the zero offset // would obviously be folded away). - Value *Scaled = - ScaledOps.empty() - ? Constant::getNullValue(Ty) - : expandCodeForImpl(SE.getAddExpr(ScaledOps), Ty, false); + Value *Scaled = + ScaledOps.empty() + ? Constant::getNullValue(Ty) + : expandCodeForImpl(SE.getAddExpr(ScaledOps), Ty, false); GepIndices.push_back(Scaled); // Collect struct field index operands. @@ -543,7 +543,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, SE.DT.dominates(cast<Instruction>(V), &*Builder.GetInsertPoint())); // Expand the operands for a plain byte offset. - Value *Idx = expandCodeForImpl(SE.getAddExpr(Ops), Ty, false); + Value *Idx = expandCodeForImpl(SE.getAddExpr(Ops), Ty, false); // Fold a GEP with constant operands. if (Constant *CLHS = dyn_cast<Constant>(V)) @@ -584,7 +584,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, } // Emit a GEP. - return Builder.CreateGEP(Builder.getInt8Ty(), V, Idx, "uglygep"); + return Builder.CreateGEP(Builder.getInt8Ty(), V, Idx, "uglygep"); } { @@ -764,14 +764,14 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) { Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, expand(Op)); } else if (Op->isNonConstantNegative()) { // Instead of doing a negate and add, just do a subtract. - Value *W = expandCodeForImpl(SE.getNegativeSCEV(Op), Ty, false); + Value *W = expandCodeForImpl(SE.getNegativeSCEV(Op), Ty, false); Sum = InsertNoopCastOfTo(Sum, Ty); Sum = InsertBinop(Instruction::Sub, Sum, W, SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true); ++I; } else { // A simple add. - Value *W = expandCodeForImpl(Op, Ty, false); + Value *W = expandCodeForImpl(Op, Ty, false); Sum = InsertNoopCastOfTo(Sum, Ty); // Canonicalize a constant to the RHS. if (isa<Constant>(Sum)) std::swap(Sum, W); @@ -823,7 +823,7 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) { // Calculate powers with exponents 1, 2, 4, 8 etc. and include those of them // that are needed into the result. - Value *P = expandCodeForImpl(I->second, Ty, false); + Value *P = expandCodeForImpl(I->second, Ty, false); Value *Result = nullptr; if (Exponent & 1) Result = P; @@ -882,7 +882,7 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) { Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) { Type *Ty = SE.getEffectiveSCEVType(S->getType()); - Value *LHS = expandCodeForImpl(S->getLHS(), Ty, false); + Value *LHS = expandCodeForImpl(S->getLHS(), Ty, false); if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getRHS())) { const APInt &RHS = SC->getAPInt(); if (RHS.isPowerOf2()) @@ -891,7 +891,7 @@ Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) { SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true); } - Value *RHS = expandCodeForImpl(S->getRHS(), Ty, false); + Value *RHS = expandCodeForImpl(S->getRHS(), Ty, false); return InsertBinop(Instruction::UDiv, LHS, RHS, SCEV::FlagAnyWrap, /*IsSafeToHoist*/ SE.isKnownNonZero(S->getRHS())); } @@ -911,7 +911,7 @@ static void ExposePointerBase(const SCEV *&Base, const SCEV *&Rest, } if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(Base)) { Base = A->getOperand(A->getNumOperands()-1); - SmallVector<const SCEV *, 8> NewAddOps(A->operands()); + SmallVector<const SCEV *, 8> NewAddOps(A->operands()); NewAddOps.back() = Rest; Rest = SE.getAddExpr(NewAddOps); ExposePointerBase(Base, Rest, SE); @@ -1089,7 +1089,7 @@ Value *SCEVExpander::expandIVInc(PHINode *PN, Value *StepV, const Loop *L, GEPPtrTy = PointerType::get(Type::getInt1Ty(SE.getContext()), GEPPtrTy->getAddressSpace()); IncV = expandAddToGEP(SE.getSCEV(StepV), GEPPtrTy, IntTy, PN); - if (IncV->getType() != PN->getType()) + if (IncV->getType() != PN->getType()) IncV = Builder.CreateBitCast(IncV, PN->getType()); } else { IncV = useSubtract ? @@ -1206,14 +1206,14 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, if (!SE.isSCEVable(PN.getType())) continue; - // We should not look for a incomplete PHI. Getting SCEV for a incomplete - // PHI has no meaning at all. - if (!PN.isComplete()) { - DEBUG_WITH_TYPE( - DebugType, dbgs() << "One incomplete PHI is found: " << PN << "\n"); - continue; - } - + // We should not look for a incomplete PHI. Getting SCEV for a incomplete + // PHI has no meaning at all. + if (!PN.isComplete()) { + DEBUG_WITH_TYPE( + DebugType, dbgs() << "One incomplete PHI is found: " << PN << "\n"); + continue; + } + const SCEVAddRecExpr *PhiSCEV = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(&PN)); if (!PhiSCEV) continue; @@ -1274,9 +1274,9 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, InsertedValues.insert(AddRecPhiMatch); // Remember the increment. rememberInstruction(IncV); - // Those values were not actually inserted but re-used. - ReusedValues.insert(AddRecPhiMatch); - ReusedValues.insert(IncV); + // Those values were not actually inserted but re-used. + ReusedValues.insert(AddRecPhiMatch); + ReusedValues.insert(IncV); return AddRecPhiMatch; } } @@ -1297,9 +1297,9 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, // Expand code for the start value into the loop preheader. assert(L->getLoopPreheader() && "Can't expand add recurrences without a loop preheader!"); - Value *StartV = - expandCodeForImpl(Normalized->getStart(), ExpandTy, - L->getLoopPreheader()->getTerminator(), false); + Value *StartV = + expandCodeForImpl(Normalized->getStart(), ExpandTy, + L->getLoopPreheader()->getTerminator(), false); // StartV must have been be inserted into L's preheader to dominate the new // phi. @@ -1317,8 +1317,8 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, if (useSubtract) Step = SE.getNegativeSCEV(Step); // Expand the step somewhere that dominates the loop header. - Value *StepV = expandCodeForImpl( - Step, IntTy, &*L->getHeader()->getFirstInsertionPt(), false); + Value *StepV = expandCodeForImpl( + Step, IntTy, &*L->getHeader()->getFirstInsertionPt(), false); // The no-wrap behavior proved by IsIncrement(NUW|NSW) is only applicable if // we actually do emit an addition. It does not apply if we emit a @@ -1440,17 +1440,17 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { assert(LatchBlock && "PostInc mode requires a unique loop latch!"); Result = PN->getIncomingValueForBlock(LatchBlock); - // We might be introducing a new use of the post-inc IV that is not poison - // safe, in which case we should drop poison generating flags. Only keep - // those flags for which SCEV has proven that they always hold. - if (isa<OverflowingBinaryOperator>(Result)) { - auto *I = cast<Instruction>(Result); - if (!S->hasNoUnsignedWrap()) - I->setHasNoUnsignedWrap(false); - if (!S->hasNoSignedWrap()) - I->setHasNoSignedWrap(false); - } - + // We might be introducing a new use of the post-inc IV that is not poison + // safe, in which case we should drop poison generating flags. Only keep + // those flags for which SCEV has proven that they always hold. + if (isa<OverflowingBinaryOperator>(Result)) { + auto *I = cast<Instruction>(Result); + if (!S->hasNoUnsignedWrap()) + I->setHasNoUnsignedWrap(false); + if (!S->hasNoSignedWrap()) + I->setHasNoSignedWrap(false); + } + // For an expansion to use the postinc form, the client must call // expandCodeFor with an InsertPoint that is either outside the PostIncLoop // or dominated by IVIncInsertPos. @@ -1474,8 +1474,8 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { { // Expand the step somewhere that dominates the loop header. SCEVInsertPointGuard Guard(Builder, this); - StepV = expandCodeForImpl( - Step, IntTy, &*L->getHeader()->getFirstInsertionPt(), false); + StepV = expandCodeForImpl( + Step, IntTy, &*L->getHeader()->getFirstInsertionPt(), false); } Result = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract); } @@ -1489,13 +1489,13 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { if (ResTy != SE.getEffectiveSCEVType(ResTy)) Result = InsertNoopCastOfTo(Result, SE.getEffectiveSCEVType(ResTy)); // Truncate the result. - if (TruncTy != Result->getType()) + if (TruncTy != Result->getType()) Result = Builder.CreateTrunc(Result, TruncTy); - + // Invert the result. - if (InvertStep) - Result = Builder.CreateSub( - expandCodeForImpl(Normalized->getStart(), TruncTy, false), Result); + if (InvertStep) + Result = Builder.CreateSub( + expandCodeForImpl(Normalized->getStart(), TruncTy, false), Result); } // Re-apply any non-loop-dominating scale. @@ -1503,22 +1503,22 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { assert(S->isAffine() && "Can't linearly scale non-affine recurrences."); Result = InsertNoopCastOfTo(Result, IntTy); Result = Builder.CreateMul(Result, - expandCodeForImpl(PostLoopScale, IntTy, false)); + expandCodeForImpl(PostLoopScale, IntTy, false)); } // Re-apply any non-loop-dominating offset. if (PostLoopOffset) { if (PointerType *PTy = dyn_cast<PointerType>(ExpandTy)) { if (Result->getType()->isIntegerTy()) { - Value *Base = expandCodeForImpl(PostLoopOffset, ExpandTy, false); + Value *Base = expandCodeForImpl(PostLoopOffset, ExpandTy, false); Result = expandAddToGEP(SE.getUnknown(Result), PTy, IntTy, Base); } else { Result = expandAddToGEP(PostLoopOffset, PTy, IntTy, Result); } } else { Result = InsertNoopCastOfTo(Result, IntTy); - Result = Builder.CreateAdd( - Result, expandCodeForImpl(PostLoopOffset, IntTy, false)); + Result = Builder.CreateAdd( + Result, expandCodeForImpl(PostLoopOffset, IntTy, false)); } } @@ -1559,15 +1559,15 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop(), S->getNoWrapFlags(SCEV::FlagNW))); BasicBlock::iterator NewInsertPt = - findInsertPointAfter(cast<Instruction>(V), &*Builder.GetInsertPoint()); - V = expandCodeForImpl(SE.getTruncateExpr(SE.getUnknown(V), Ty), nullptr, - &*NewInsertPt, false); + findInsertPointAfter(cast<Instruction>(V), &*Builder.GetInsertPoint()); + V = expandCodeForImpl(SE.getTruncateExpr(SE.getUnknown(V), Ty), nullptr, + &*NewInsertPt, false); return V; } // {X,+,F} --> X + {0,+,F} if (!S->getStart()->isZero()) { - SmallVector<const SCEV *, 4> NewOps(S->operands()); + SmallVector<const SCEV *, 4> NewOps(S->operands()); NewOps[0] = SE.getConstant(Ty, 0); const SCEV *Rest = SE.getAddRecExpr(NewOps, L, S->getNoWrapFlags(SCEV::FlagNW)); @@ -1674,34 +1674,34 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { return expand(T); } -Value *SCEVExpander::visitPtrToIntExpr(const SCEVPtrToIntExpr *S) { - Value *V = - expandCodeForImpl(S->getOperand(), S->getOperand()->getType(), false); - return Builder.CreatePtrToInt(V, S->getType()); -} - +Value *SCEVExpander::visitPtrToIntExpr(const SCEVPtrToIntExpr *S) { + Value *V = + expandCodeForImpl(S->getOperand(), S->getOperand()->getType(), false); + return Builder.CreatePtrToInt(V, S->getType()); +} + Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) { Type *Ty = SE.getEffectiveSCEVType(S->getType()); - Value *V = expandCodeForImpl( - S->getOperand(), SE.getEffectiveSCEVType(S->getOperand()->getType()), - false); - return Builder.CreateTrunc(V, Ty); + Value *V = expandCodeForImpl( + S->getOperand(), SE.getEffectiveSCEVType(S->getOperand()->getType()), + false); + return Builder.CreateTrunc(V, Ty); } Value *SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr *S) { Type *Ty = SE.getEffectiveSCEVType(S->getType()); - Value *V = expandCodeForImpl( - S->getOperand(), SE.getEffectiveSCEVType(S->getOperand()->getType()), - false); - return Builder.CreateZExt(V, Ty); + Value *V = expandCodeForImpl( + S->getOperand(), SE.getEffectiveSCEVType(S->getOperand()->getType()), + false); + return Builder.CreateZExt(V, Ty); } Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) { Type *Ty = SE.getEffectiveSCEVType(S->getType()); - Value *V = expandCodeForImpl( - S->getOperand(), SE.getEffectiveSCEVType(S->getOperand()->getType()), - false); - return Builder.CreateSExt(V, Ty); + Value *V = expandCodeForImpl( + S->getOperand(), SE.getEffectiveSCEVType(S->getOperand()->getType()), + false); + return Builder.CreateSExt(V, Ty); } Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) { @@ -1715,7 +1715,7 @@ Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) { Ty = SE.getEffectiveSCEVType(Ty); LHS = InsertNoopCastOfTo(LHS, Ty); } - Value *RHS = expandCodeForImpl(S->getOperand(i), Ty, false); + Value *RHS = expandCodeForImpl(S->getOperand(i), Ty, false); Value *ICmp = Builder.CreateICmpSGT(LHS, RHS); Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smax"); LHS = Sel; @@ -1738,7 +1738,7 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) { Ty = SE.getEffectiveSCEVType(Ty); LHS = InsertNoopCastOfTo(LHS, Ty); } - Value *RHS = expandCodeForImpl(S->getOperand(i), Ty, false); + Value *RHS = expandCodeForImpl(S->getOperand(i), Ty, false); Value *ICmp = Builder.CreateICmpUGT(LHS, RHS); Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umax"); LHS = Sel; @@ -1761,7 +1761,7 @@ Value *SCEVExpander::visitSMinExpr(const SCEVSMinExpr *S) { Ty = SE.getEffectiveSCEVType(Ty); LHS = InsertNoopCastOfTo(LHS, Ty); } - Value *RHS = expandCodeForImpl(S->getOperand(i), Ty, false); + Value *RHS = expandCodeForImpl(S->getOperand(i), Ty, false); Value *ICmp = Builder.CreateICmpSLT(LHS, RHS); Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smin"); LHS = Sel; @@ -1784,7 +1784,7 @@ Value *SCEVExpander::visitUMinExpr(const SCEVUMinExpr *S) { Ty = SE.getEffectiveSCEVType(Ty); LHS = InsertNoopCastOfTo(LHS, Ty); } - Value *RHS = expandCodeForImpl(S->getOperand(i), Ty, false); + Value *RHS = expandCodeForImpl(S->getOperand(i), Ty, false); Value *ICmp = Builder.CreateICmpULT(LHS, RHS); Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umin"); LHS = Sel; @@ -1796,45 +1796,45 @@ Value *SCEVExpander::visitUMinExpr(const SCEVUMinExpr *S) { return LHS; } -Value *SCEVExpander::expandCodeForImpl(const SCEV *SH, Type *Ty, - Instruction *IP, bool Root) { +Value *SCEVExpander::expandCodeForImpl(const SCEV *SH, Type *Ty, + Instruction *IP, bool Root) { setInsertPoint(IP); - Value *V = expandCodeForImpl(SH, Ty, Root); - return V; + Value *V = expandCodeForImpl(SH, Ty, Root); + return V; } -Value *SCEVExpander::expandCodeForImpl(const SCEV *SH, Type *Ty, bool Root) { +Value *SCEVExpander::expandCodeForImpl(const SCEV *SH, Type *Ty, bool Root) { // Expand the code for this SCEV. Value *V = expand(SH); - - if (PreserveLCSSA) { - if (auto *Inst = dyn_cast<Instruction>(V)) { - // Create a temporary instruction to at the current insertion point, so we - // can hand it off to the helper to create LCSSA PHIs if required for the - // new use. - // FIXME: Ideally formLCSSAForInstructions (used in fixupLCSSAFormFor) - // would accept a insertion point and return an LCSSA phi for that - // insertion point, so there is no need to insert & remove the temporary - // instruction. - Instruction *Tmp; - if (Inst->getType()->isIntegerTy()) - Tmp = - cast<Instruction>(Builder.CreateAdd(Inst, Inst, "tmp.lcssa.user")); - else { - assert(Inst->getType()->isPointerTy()); - Tmp = cast<Instruction>( - Builder.CreateGEP(Inst, Builder.getInt32(1), "tmp.lcssa.user")); - } - V = fixupLCSSAFormFor(Tmp, 0); - - // Clean up temporary instruction. - InsertedValues.erase(Tmp); - InsertedPostIncValues.erase(Tmp); - Tmp->eraseFromParent(); - } - } - - InsertedExpressions[std::make_pair(SH, &*Builder.GetInsertPoint())] = V; + + if (PreserveLCSSA) { + if (auto *Inst = dyn_cast<Instruction>(V)) { + // Create a temporary instruction to at the current insertion point, so we + // can hand it off to the helper to create LCSSA PHIs if required for the + // new use. + // FIXME: Ideally formLCSSAForInstructions (used in fixupLCSSAFormFor) + // would accept a insertion point and return an LCSSA phi for that + // insertion point, so there is no need to insert & remove the temporary + // instruction. + Instruction *Tmp; + if (Inst->getType()->isIntegerTy()) + Tmp = + cast<Instruction>(Builder.CreateAdd(Inst, Inst, "tmp.lcssa.user")); + else { + assert(Inst->getType()->isPointerTy()); + Tmp = cast<Instruction>( + Builder.CreateGEP(Inst, Builder.getInt32(1), "tmp.lcssa.user")); + } + V = fixupLCSSAFormFor(Tmp, 0); + + // Clean up temporary instruction. + InsertedValues.erase(Tmp); + InsertedPostIncValues.erase(Tmp); + Tmp->eraseFromParent(); + } + } + + InsertedExpressions[std::make_pair(SH, &*Builder.GetInsertPoint())] = V; if (Ty) { assert(SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(SH->getType()) && "non-trivial casts should be done with the SCEVs directly!"); @@ -1918,12 +1918,12 @@ Value *SCEVExpander::expand(const SCEV *S) { // there) so that it is guaranteed to dominate any user inside the loop. if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L)) InsertPt = &*L->getHeader()->getFirstInsertionPt(); - + while (InsertPt->getIterator() != Builder.GetInsertPoint() && (isInsertedInstruction(InsertPt) || - isa<DbgInfoIntrinsic>(InsertPt))) { + isa<DbgInfoIntrinsic>(InsertPt))) { InsertPt = &*std::next(InsertPt->getIterator()); - } + } break; } } @@ -1976,25 +1976,25 @@ Value *SCEVExpander::expand(const SCEV *S) { } void SCEVExpander::rememberInstruction(Value *I) { - auto DoInsert = [this](Value *V) { - if (!PostIncLoops.empty()) - InsertedPostIncValues.insert(V); - else - InsertedValues.insert(V); - }; - DoInsert(I); - - if (!PreserveLCSSA) - return; - - if (auto *Inst = dyn_cast<Instruction>(I)) { - // A new instruction has been added, which might introduce new uses outside - // a defining loop. Fix LCSSA from for each operand of the new instruction, - // if required. - for (unsigned OpIdx = 0, OpEnd = Inst->getNumOperands(); OpIdx != OpEnd; - OpIdx++) - fixupLCSSAFormFor(Inst, OpIdx); - } + auto DoInsert = [this](Value *V) { + if (!PostIncLoops.empty()) + InsertedPostIncValues.insert(V); + else + InsertedValues.insert(V); + }; + DoInsert(I); + + if (!PreserveLCSSA) + return; + + if (auto *Inst = dyn_cast<Instruction>(I)) { + // A new instruction has been added, which might introduce new uses outside + // a defining loop. Fix LCSSA from for each operand of the new instruction, + // if required. + for (unsigned OpIdx = 0, OpEnd = Inst->getNumOperands(); OpIdx != OpEnd; + OpIdx++) + fixupLCSSAFormFor(Inst, OpIdx); + } } /// replaceCongruentIVs - Check for congruent phis in this loop header and @@ -2017,8 +2017,8 @@ SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, // Put pointers at the back and make sure pointer < pointer = false. if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy()) return RHS->getType()->isIntegerTy() && !LHS->getType()->isIntegerTy(); - return RHS->getType()->getPrimitiveSizeInBits().getFixedSize() < - LHS->getType()->getPrimitiveSizeInBits().getFixedSize(); + return RHS->getType()->getPrimitiveSizeInBits().getFixedSize() < + LHS->getType()->getPrimitiveSizeInBits().getFixedSize(); }); unsigned NumElim = 0; @@ -2126,8 +2126,8 @@ SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, } DEBUG_WITH_TYPE(DebugType, dbgs() << "INDVARS: Eliminated congruent iv: " << *Phi << '\n'); - DEBUG_WITH_TYPE(DebugType, dbgs() << "INDVARS: Original iv: " - << *OrigPhiRef << '\n'); + DEBUG_WITH_TYPE(DebugType, dbgs() << "INDVARS: Original iv: " + << *OrigPhiRef << '\n'); ++NumElim; Value *NewIV = OrigPhiRef; if (OrigPhiRef->getType() != Phi->getType()) { @@ -2179,156 +2179,156 @@ SCEVExpander::getRelatedExistingExpansion(const SCEV *S, const Instruction *At, return None; } -template<typename T> static int costAndCollectOperands( - const SCEVOperand &WorkItem, const TargetTransformInfo &TTI, - TargetTransformInfo::TargetCostKind CostKind, - SmallVectorImpl<SCEVOperand> &Worklist) { - - const T *S = cast<T>(WorkItem.S); - int Cost = 0; - // Object to help map SCEV operands to expanded IR instructions. - struct OperationIndices { - OperationIndices(unsigned Opc, size_t min, size_t max) : - Opcode(Opc), MinIdx(min), MaxIdx(max) { } - unsigned Opcode; - size_t MinIdx; - size_t MaxIdx; - }; - - // Collect the operations of all the instructions that will be needed to - // expand the SCEVExpr. This is so that when we come to cost the operands, - // we know what the generated user(s) will be. - SmallVector<OperationIndices, 2> Operations; - - auto CastCost = [&](unsigned Opcode) { - Operations.emplace_back(Opcode, 0, 0); - return TTI.getCastInstrCost(Opcode, S->getType(), - S->getOperand(0)->getType(), - TTI::CastContextHint::None, CostKind); - }; - - auto ArithCost = [&](unsigned Opcode, unsigned NumRequired, - unsigned MinIdx = 0, unsigned MaxIdx = 1) { - Operations.emplace_back(Opcode, MinIdx, MaxIdx); - return NumRequired * - TTI.getArithmeticInstrCost(Opcode, S->getType(), CostKind); - }; - - auto CmpSelCost = [&](unsigned Opcode, unsigned NumRequired, - unsigned MinIdx, unsigned MaxIdx) { - Operations.emplace_back(Opcode, MinIdx, MaxIdx); - Type *OpType = S->getOperand(0)->getType(); - return NumRequired * TTI.getCmpSelInstrCost( - Opcode, OpType, CmpInst::makeCmpResultType(OpType), - CmpInst::BAD_ICMP_PREDICATE, CostKind); - }; - - switch (S->getSCEVType()) { - case scCouldNotCompute: - llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); - case scUnknown: - case scConstant: - return 0; - case scPtrToInt: - Cost = CastCost(Instruction::PtrToInt); - break; - case scTruncate: - Cost = CastCost(Instruction::Trunc); - break; - case scZeroExtend: - Cost = CastCost(Instruction::ZExt); - break; - case scSignExtend: - Cost = CastCost(Instruction::SExt); - break; - case scUDivExpr: { - unsigned Opcode = Instruction::UDiv; - if (auto *SC = dyn_cast<SCEVConstant>(S->getOperand(1))) - if (SC->getAPInt().isPowerOf2()) - Opcode = Instruction::LShr; - Cost = ArithCost(Opcode, 1); - break; - } - case scAddExpr: - Cost = ArithCost(Instruction::Add, S->getNumOperands() - 1); - break; - case scMulExpr: - // TODO: this is a very pessimistic cost modelling for Mul, - // because of Bin Pow algorithm actually used by the expander, - // see SCEVExpander::visitMulExpr(), ExpandOpBinPowN(). - Cost = ArithCost(Instruction::Mul, S->getNumOperands() - 1); - break; - case scSMaxExpr: - case scUMaxExpr: - case scSMinExpr: - case scUMinExpr: { - Cost += CmpSelCost(Instruction::ICmp, S->getNumOperands() - 1, 0, 1); - Cost += CmpSelCost(Instruction::Select, S->getNumOperands() - 1, 0, 2); - break; - } - case scAddRecExpr: { - // In this polynominal, we may have some zero operands, and we shouldn't - // really charge for those. So how many non-zero coeffients are there? - int NumTerms = llvm::count_if(S->operands(), [](const SCEV *Op) { - return !Op->isZero(); - }); - - assert(NumTerms >= 1 && "Polynominal should have at least one term."); - assert(!(*std::prev(S->operands().end()))->isZero() && - "Last operand should not be zero"); - - // Ignoring constant term (operand 0), how many of the coeffients are u> 1? - int NumNonZeroDegreeNonOneTerms = - llvm::count_if(S->operands(), [](const SCEV *Op) { - auto *SConst = dyn_cast<SCEVConstant>(Op); - return !SConst || SConst->getAPInt().ugt(1); - }); - - // Much like with normal add expr, the polynominal will require - // one less addition than the number of it's terms. - int AddCost = ArithCost(Instruction::Add, NumTerms - 1, - /*MinIdx*/1, /*MaxIdx*/1); - // Here, *each* one of those will require a multiplication. - int MulCost = ArithCost(Instruction::Mul, NumNonZeroDegreeNonOneTerms); - Cost = AddCost + MulCost; - - // What is the degree of this polynominal? - int PolyDegree = S->getNumOperands() - 1; - assert(PolyDegree >= 1 && "Should be at least affine."); - - // The final term will be: - // Op_{PolyDegree} * x ^ {PolyDegree} - // Where x ^ {PolyDegree} will again require PolyDegree-1 mul operations. - // Note that x ^ {PolyDegree} = x * x ^ {PolyDegree-1} so charging for - // x ^ {PolyDegree} will give us x ^ {2} .. x ^ {PolyDegree-1} for free. - // FIXME: this is conservatively correct, but might be overly pessimistic. - Cost += MulCost * (PolyDegree - 1); - break; - } - } - - for (auto &CostOp : Operations) { - for (auto SCEVOp : enumerate(S->operands())) { - // Clamp the index to account for multiple IR operations being chained. - size_t MinIdx = std::max(SCEVOp.index(), CostOp.MinIdx); - size_t OpIdx = std::min(MinIdx, CostOp.MaxIdx); - Worklist.emplace_back(CostOp.Opcode, OpIdx, SCEVOp.value()); - } - } - return Cost; -} - +template<typename T> static int costAndCollectOperands( + const SCEVOperand &WorkItem, const TargetTransformInfo &TTI, + TargetTransformInfo::TargetCostKind CostKind, + SmallVectorImpl<SCEVOperand> &Worklist) { + + const T *S = cast<T>(WorkItem.S); + int Cost = 0; + // Object to help map SCEV operands to expanded IR instructions. + struct OperationIndices { + OperationIndices(unsigned Opc, size_t min, size_t max) : + Opcode(Opc), MinIdx(min), MaxIdx(max) { } + unsigned Opcode; + size_t MinIdx; + size_t MaxIdx; + }; + + // Collect the operations of all the instructions that will be needed to + // expand the SCEVExpr. This is so that when we come to cost the operands, + // we know what the generated user(s) will be. + SmallVector<OperationIndices, 2> Operations; + + auto CastCost = [&](unsigned Opcode) { + Operations.emplace_back(Opcode, 0, 0); + return TTI.getCastInstrCost(Opcode, S->getType(), + S->getOperand(0)->getType(), + TTI::CastContextHint::None, CostKind); + }; + + auto ArithCost = [&](unsigned Opcode, unsigned NumRequired, + unsigned MinIdx = 0, unsigned MaxIdx = 1) { + Operations.emplace_back(Opcode, MinIdx, MaxIdx); + return NumRequired * + TTI.getArithmeticInstrCost(Opcode, S->getType(), CostKind); + }; + + auto CmpSelCost = [&](unsigned Opcode, unsigned NumRequired, + unsigned MinIdx, unsigned MaxIdx) { + Operations.emplace_back(Opcode, MinIdx, MaxIdx); + Type *OpType = S->getOperand(0)->getType(); + return NumRequired * TTI.getCmpSelInstrCost( + Opcode, OpType, CmpInst::makeCmpResultType(OpType), + CmpInst::BAD_ICMP_PREDICATE, CostKind); + }; + + switch (S->getSCEVType()) { + case scCouldNotCompute: + llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); + case scUnknown: + case scConstant: + return 0; + case scPtrToInt: + Cost = CastCost(Instruction::PtrToInt); + break; + case scTruncate: + Cost = CastCost(Instruction::Trunc); + break; + case scZeroExtend: + Cost = CastCost(Instruction::ZExt); + break; + case scSignExtend: + Cost = CastCost(Instruction::SExt); + break; + case scUDivExpr: { + unsigned Opcode = Instruction::UDiv; + if (auto *SC = dyn_cast<SCEVConstant>(S->getOperand(1))) + if (SC->getAPInt().isPowerOf2()) + Opcode = Instruction::LShr; + Cost = ArithCost(Opcode, 1); + break; + } + case scAddExpr: + Cost = ArithCost(Instruction::Add, S->getNumOperands() - 1); + break; + case scMulExpr: + // TODO: this is a very pessimistic cost modelling for Mul, + // because of Bin Pow algorithm actually used by the expander, + // see SCEVExpander::visitMulExpr(), ExpandOpBinPowN(). + Cost = ArithCost(Instruction::Mul, S->getNumOperands() - 1); + break; + case scSMaxExpr: + case scUMaxExpr: + case scSMinExpr: + case scUMinExpr: { + Cost += CmpSelCost(Instruction::ICmp, S->getNumOperands() - 1, 0, 1); + Cost += CmpSelCost(Instruction::Select, S->getNumOperands() - 1, 0, 2); + break; + } + case scAddRecExpr: { + // In this polynominal, we may have some zero operands, and we shouldn't + // really charge for those. So how many non-zero coeffients are there? + int NumTerms = llvm::count_if(S->operands(), [](const SCEV *Op) { + return !Op->isZero(); + }); + + assert(NumTerms >= 1 && "Polynominal should have at least one term."); + assert(!(*std::prev(S->operands().end()))->isZero() && + "Last operand should not be zero"); + + // Ignoring constant term (operand 0), how many of the coeffients are u> 1? + int NumNonZeroDegreeNonOneTerms = + llvm::count_if(S->operands(), [](const SCEV *Op) { + auto *SConst = dyn_cast<SCEVConstant>(Op); + return !SConst || SConst->getAPInt().ugt(1); + }); + + // Much like with normal add expr, the polynominal will require + // one less addition than the number of it's terms. + int AddCost = ArithCost(Instruction::Add, NumTerms - 1, + /*MinIdx*/1, /*MaxIdx*/1); + // Here, *each* one of those will require a multiplication. + int MulCost = ArithCost(Instruction::Mul, NumNonZeroDegreeNonOneTerms); + Cost = AddCost + MulCost; + + // What is the degree of this polynominal? + int PolyDegree = S->getNumOperands() - 1; + assert(PolyDegree >= 1 && "Should be at least affine."); + + // The final term will be: + // Op_{PolyDegree} * x ^ {PolyDegree} + // Where x ^ {PolyDegree} will again require PolyDegree-1 mul operations. + // Note that x ^ {PolyDegree} = x * x ^ {PolyDegree-1} so charging for + // x ^ {PolyDegree} will give us x ^ {2} .. x ^ {PolyDegree-1} for free. + // FIXME: this is conservatively correct, but might be overly pessimistic. + Cost += MulCost * (PolyDegree - 1); + break; + } + } + + for (auto &CostOp : Operations) { + for (auto SCEVOp : enumerate(S->operands())) { + // Clamp the index to account for multiple IR operations being chained. + size_t MinIdx = std::max(SCEVOp.index(), CostOp.MinIdx); + size_t OpIdx = std::min(MinIdx, CostOp.MaxIdx); + Worklist.emplace_back(CostOp.Opcode, OpIdx, SCEVOp.value()); + } + } + return Cost; +} + bool SCEVExpander::isHighCostExpansionHelper( - const SCEVOperand &WorkItem, Loop *L, const Instruction &At, - int &BudgetRemaining, const TargetTransformInfo &TTI, - SmallPtrSetImpl<const SCEV *> &Processed, - SmallVectorImpl<SCEVOperand> &Worklist) { + const SCEVOperand &WorkItem, Loop *L, const Instruction &At, + int &BudgetRemaining, const TargetTransformInfo &TTI, + SmallPtrSetImpl<const SCEV *> &Processed, + SmallVectorImpl<SCEVOperand> &Worklist) { if (BudgetRemaining < 0) return true; // Already run out of budget, give up. - const SCEV *S = WorkItem.S; + const SCEV *S = WorkItem.S; // Was the cost of expansion of this expression already accounted for? - if (!isa<SCEVConstant>(S) && !Processed.insert(S).second) + if (!isa<SCEVConstant>(S) && !Processed.insert(S).second) return false; // We have already accounted for this expression. // If we can find an existing value for this scev available at the point "At" @@ -2336,37 +2336,37 @@ bool SCEVExpander::isHighCostExpansionHelper( if (getRelatedExistingExpansion(S, &At, L)) return false; // Consider the expression to be free. - TargetTransformInfo::TargetCostKind CostKind = - L->getHeader()->getParent()->hasMinSize() - ? TargetTransformInfo::TCK_CodeSize - : TargetTransformInfo::TCK_RecipThroughput; - + TargetTransformInfo::TargetCostKind CostKind = + L->getHeader()->getParent()->hasMinSize() + ? TargetTransformInfo::TCK_CodeSize + : TargetTransformInfo::TCK_RecipThroughput; + switch (S->getSCEVType()) { - case scCouldNotCompute: - llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); + case scCouldNotCompute: + llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); case scUnknown: - // Assume to be zero-cost. - return false; - case scConstant: { - // Only evalulate the costs of constants when optimizing for size. - if (CostKind != TargetTransformInfo::TCK_CodeSize) - return 0; - const APInt &Imm = cast<SCEVConstant>(S)->getAPInt(); - Type *Ty = S->getType(); - BudgetRemaining -= TTI.getIntImmCostInst( - WorkItem.ParentOpcode, WorkItem.OperandIdx, Imm, Ty, CostKind); - return BudgetRemaining < 0; - } - case scTruncate: - case scPtrToInt: - case scZeroExtend: - case scSignExtend: { - int Cost = - costAndCollectOperands<SCEVCastExpr>(WorkItem, TTI, CostKind, Worklist); - BudgetRemaining -= Cost; + // Assume to be zero-cost. + return false; + case scConstant: { + // Only evalulate the costs of constants when optimizing for size. + if (CostKind != TargetTransformInfo::TCK_CodeSize) + return 0; + const APInt &Imm = cast<SCEVConstant>(S)->getAPInt(); + Type *Ty = S->getType(); + BudgetRemaining -= TTI.getIntImmCostInst( + WorkItem.ParentOpcode, WorkItem.OperandIdx, Imm, Ty, CostKind); + return BudgetRemaining < 0; + } + case scTruncate: + case scPtrToInt: + case scZeroExtend: + case scSignExtend: { + int Cost = + costAndCollectOperands<SCEVCastExpr>(WorkItem, TTI, CostKind, Worklist); + BudgetRemaining -= Cost; return false; // Will answer upon next entry into this function. } - case scUDivExpr: { + case scUDivExpr: { // UDivExpr is very likely a UDiv that ScalarEvolution's HowFarToZero or // HowManyLessThans produced to compute a precise expression, rather than a // UDiv from the user's code. If we can't find a UDiv in the code with some @@ -2379,36 +2379,36 @@ bool SCEVExpander::isHighCostExpansionHelper( SE.getAddExpr(S, SE.getConstant(S->getType(), 1)), &At, L)) return false; // Consider it to be free. - int Cost = - costAndCollectOperands<SCEVUDivExpr>(WorkItem, TTI, CostKind, Worklist); + int Cost = + costAndCollectOperands<SCEVUDivExpr>(WorkItem, TTI, CostKind, Worklist); // Need to count the cost of this UDiv. - BudgetRemaining -= Cost; + BudgetRemaining -= Cost; return false; // Will answer upon next entry into this function. } - case scAddExpr: - case scMulExpr: - case scUMaxExpr: - case scSMaxExpr: - case scUMinExpr: - case scSMinExpr: { - assert(cast<SCEVNAryExpr>(S)->getNumOperands() > 1 && + case scAddExpr: + case scMulExpr: + case scUMaxExpr: + case scSMaxExpr: + case scUMinExpr: + case scSMinExpr: { + assert(cast<SCEVNAryExpr>(S)->getNumOperands() > 1 && "Nary expr should have more than 1 operand."); // The simple nary expr will require one less op (or pair of ops) // than the number of it's terms. - int Cost = - costAndCollectOperands<SCEVNAryExpr>(WorkItem, TTI, CostKind, Worklist); - BudgetRemaining -= Cost; - return BudgetRemaining < 0; - } - case scAddRecExpr: { - assert(cast<SCEVAddRecExpr>(S)->getNumOperands() >= 2 && - "Polynomial should be at least linear"); - BudgetRemaining -= costAndCollectOperands<SCEVAddRecExpr>( - WorkItem, TTI, CostKind, Worklist); - return BudgetRemaining < 0; - } - } - llvm_unreachable("Unknown SCEV kind!"); + int Cost = + costAndCollectOperands<SCEVNAryExpr>(WorkItem, TTI, CostKind, Worklist); + BudgetRemaining -= Cost; + return BudgetRemaining < 0; + } + case scAddRecExpr: { + assert(cast<SCEVAddRecExpr>(S)->getNumOperands() >= 2 && + "Polynomial should be at least linear"); + BudgetRemaining -= costAndCollectOperands<SCEVAddRecExpr>( + WorkItem, TTI, CostKind, Worklist); + return BudgetRemaining < 0; + } + } + llvm_unreachable("Unknown SCEV kind!"); } Value *SCEVExpander::expandCodeForPredicate(const SCEVPredicate *Pred, @@ -2429,10 +2429,10 @@ Value *SCEVExpander::expandCodeForPredicate(const SCEVPredicate *Pred, Value *SCEVExpander::expandEqualPredicate(const SCEVEqualPredicate *Pred, Instruction *IP) { - Value *Expr0 = - expandCodeForImpl(Pred->getLHS(), Pred->getLHS()->getType(), IP, false); - Value *Expr1 = - expandCodeForImpl(Pred->getRHS(), Pred->getRHS()->getType(), IP, false); + Value *Expr0 = + expandCodeForImpl(Pred->getLHS(), Pred->getLHS()->getType(), IP, false); + Value *Expr1 = + expandCodeForImpl(Pred->getRHS(), Pred->getRHS()->getType(), IP, false); Builder.SetInsertPoint(IP); auto *I = Builder.CreateICmpNE(Expr0, Expr1, "ident.check"); @@ -2448,7 +2448,7 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR, const SCEV *ExitCount = SE.getPredicatedBackedgeTakenCount(AR->getLoop(), Pred); - assert(!isa<SCEVCouldNotCompute>(ExitCount) && "Invalid loop count"); + assert(!isa<SCEVCouldNotCompute>(ExitCount) && "Invalid loop count"); const SCEV *Step = AR->getStepRecurrence(SE); const SCEV *Start = AR->getStart(); @@ -2464,16 +2464,16 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR, IntegerType *CountTy = IntegerType::get(Loc->getContext(), SrcBits); Builder.SetInsertPoint(Loc); - Value *TripCountVal = expandCodeForImpl(ExitCount, CountTy, Loc, false); + Value *TripCountVal = expandCodeForImpl(ExitCount, CountTy, Loc, false); IntegerType *Ty = IntegerType::get(Loc->getContext(), SE.getTypeSizeInBits(ARTy)); Type *ARExpandTy = DL.isNonIntegralPointerType(ARTy) ? ARTy : Ty; - Value *StepValue = expandCodeForImpl(Step, Ty, Loc, false); - Value *NegStepValue = - expandCodeForImpl(SE.getNegativeSCEV(Step), Ty, Loc, false); - Value *StartValue = expandCodeForImpl(Start, ARExpandTy, Loc, false); + Value *StepValue = expandCodeForImpl(Step, Ty, Loc, false); + Value *NegStepValue = + expandCodeForImpl(SE.getNegativeSCEV(Step), Ty, Loc, false); + Value *StartValue = expandCodeForImpl(Start, ARExpandTy, Loc, false); ConstantInt *Zero = ConstantInt::get(Loc->getContext(), APInt::getNullValue(DstBits)); @@ -2533,7 +2533,7 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR, EndCheck = Builder.CreateOr(EndCheck, BackedgeCheck); } - return Builder.CreateOr(EndCheck, OfMul); + return Builder.CreateOr(EndCheck, OfMul); } Value *SCEVExpander::expandWrapPredicate(const SCEVWrapPredicate *Pred, @@ -2576,34 +2576,34 @@ Value *SCEVExpander::expandUnionPredicate(const SCEVUnionPredicate *Union, return Check; } -Value *SCEVExpander::fixupLCSSAFormFor(Instruction *User, unsigned OpIdx) { - assert(PreserveLCSSA); - SmallVector<Instruction *, 1> ToUpdate; - - auto *OpV = User->getOperand(OpIdx); - auto *OpI = dyn_cast<Instruction>(OpV); - if (!OpI) - return OpV; - - Loop *DefLoop = SE.LI.getLoopFor(OpI->getParent()); - Loop *UseLoop = SE.LI.getLoopFor(User->getParent()); - if (!DefLoop || UseLoop == DefLoop || DefLoop->contains(UseLoop)) - return OpV; - - ToUpdate.push_back(OpI); - SmallVector<PHINode *, 16> PHIsToRemove; - formLCSSAForInstructions(ToUpdate, SE.DT, SE.LI, &SE, Builder, &PHIsToRemove); - for (PHINode *PN : PHIsToRemove) { - if (!PN->use_empty()) - continue; - InsertedValues.erase(PN); - InsertedPostIncValues.erase(PN); - PN->eraseFromParent(); - } - - return User->getOperand(OpIdx); -} - +Value *SCEVExpander::fixupLCSSAFormFor(Instruction *User, unsigned OpIdx) { + assert(PreserveLCSSA); + SmallVector<Instruction *, 1> ToUpdate; + + auto *OpV = User->getOperand(OpIdx); + auto *OpI = dyn_cast<Instruction>(OpV); + if (!OpI) + return OpV; + + Loop *DefLoop = SE.LI.getLoopFor(OpI->getParent()); + Loop *UseLoop = SE.LI.getLoopFor(User->getParent()); + if (!DefLoop || UseLoop == DefLoop || DefLoop->contains(UseLoop)) + return OpV; + + ToUpdate.push_back(OpI); + SmallVector<PHINode *, 16> PHIsToRemove; + formLCSSAForInstructions(ToUpdate, SE.DT, SE.LI, &SE, Builder, &PHIsToRemove); + for (PHINode *PN : PHIsToRemove) { + if (!PN->use_empty()) + continue; + InsertedValues.erase(PN); + InsertedPostIncValues.erase(PN); + PN->eraseFromParent(); + } + + return User->getOperand(OpIdx); +} + namespace { // Search for a SCEV subexpression that is not safe to expand. Any expression // that may expand to a !isSafeToSpeculativelyExecute value is unsafe, namely @@ -2681,40 +2681,40 @@ bool isSafeToExpandAt(const SCEV *S, const Instruction *InsertionPoint, } return false; } - -SCEVExpanderCleaner::~SCEVExpanderCleaner() { - // Result is used, nothing to remove. - if (ResultUsed) - return; - - auto InsertedInstructions = Expander.getAllInsertedInstructions(); -#ifndef NDEBUG - SmallPtrSet<Instruction *, 8> InsertedSet(InsertedInstructions.begin(), - InsertedInstructions.end()); - (void)InsertedSet; -#endif - // Remove sets with value handles. - Expander.clear(); - - // Sort so that earlier instructions do not dominate later instructions. - stable_sort(InsertedInstructions, [this](Instruction *A, Instruction *B) { - return DT.dominates(B, A); - }); - // Remove all inserted instructions. - for (Instruction *I : InsertedInstructions) { - -#ifndef NDEBUG - assert(all_of(I->users(), - [&InsertedSet](Value *U) { - return InsertedSet.contains(cast<Instruction>(U)); - }) && - "removed instruction should only be used by instructions inserted " - "during expansion"); -#endif - assert(!I->getType()->isVoidTy() && - "inserted instruction should have non-void types"); - I->replaceAllUsesWith(UndefValue::get(I->getType())); - I->eraseFromParent(); - } + +SCEVExpanderCleaner::~SCEVExpanderCleaner() { + // Result is used, nothing to remove. + if (ResultUsed) + return; + + auto InsertedInstructions = Expander.getAllInsertedInstructions(); +#ifndef NDEBUG + SmallPtrSet<Instruction *, 8> InsertedSet(InsertedInstructions.begin(), + InsertedInstructions.end()); + (void)InsertedSet; +#endif + // Remove sets with value handles. + Expander.clear(); + + // Sort so that earlier instructions do not dominate later instructions. + stable_sort(InsertedInstructions, [this](Instruction *A, Instruction *B) { + return DT.dominates(B, A); + }); + // Remove all inserted instructions. + for (Instruction *I : InsertedInstructions) { + +#ifndef NDEBUG + assert(all_of(I->users(), + [&InsertedSet](Value *U) { + return InsertedSet.contains(cast<Instruction>(U)); + }) && + "removed instruction should only be used by instructions inserted " + "during expansion"); +#endif + assert(!I->getType()->isVoidTy() && + "inserted instruction should have non-void types"); + I->replaceAllUsesWith(UndefValue::get(I->getType())); + I->eraseFromParent(); + } +} } -} diff --git a/contrib/libs/llvm12/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/libs/llvm12/lib/Transforms/Utils/SimplifyCFG.cpp index a543253d5f..de9560df97 100644 --- a/contrib/libs/llvm12/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Utils/SimplifyCFG.cpp @@ -13,11 +13,11 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/ScopeExit.h" -#include "llvm/ADT/Sequence.h" +#include "llvm/ADT/ScopeExit.h" +#include "llvm/ADT/Sequence.h" #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" @@ -61,7 +61,7 @@ #include "llvm/IR/Use.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" -#include "llvm/IR/ValueHandle.h" +#include "llvm/IR/ValueHandle.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -71,7 +71,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Utils/SSAUpdater.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include <algorithm> #include <cassert> @@ -90,12 +90,12 @@ using namespace PatternMatch; #define DEBUG_TYPE "simplifycfg" -cl::opt<bool> llvm::RequireAndPreserveDomTree( - "simplifycfg-require-and-preserve-domtree", cl::Hidden, cl::ZeroOrMore, - cl::init(false), - cl::desc("Temorary development switch used to gradually uplift SimplifyCFG " - "into preserving DomTree,")); - +cl::opt<bool> llvm::RequireAndPreserveDomTree( + "simplifycfg-require-and-preserve-domtree", cl::Hidden, cl::ZeroOrMore, + cl::init(false), + cl::desc("Temorary development switch used to gradually uplift SimplifyCFG " + "into preserving DomTree,")); + // Chosen as 2 so as to be cheap, but still to have enough power to fold // a select, so the "clamp" idiom (of a min followed by a max) will be caught. // To catch this, we need to fold a compare and a select, hence '2' being the @@ -116,10 +116,10 @@ static cl::opt<bool> DupRet( cl::desc("Duplicate return instructions into unconditional branches")); static cl::opt<bool> - HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true), - cl::desc("Hoist common instructions up to the parent block")); - -static cl::opt<bool> + HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true), + cl::desc("Hoist common instructions up to the parent block")); + +static cl::opt<bool> SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true), cl::desc("Sink common instructions down to the end block")); @@ -153,13 +153,13 @@ MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden, cl::init(10), cl::desc("Max size of a block which is still considered " "small enough to thread through")); -// Two is chosen to allow one negation and a logical combine. -static cl::opt<unsigned> - BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden, - cl::init(2), - cl::desc("Maximum cost of combining conditions when " - "folding branches")); - +// Two is chosen to allow one negation and a logical combine. +static cl::opt<unsigned> + BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden, + cl::init(2), + cl::desc("Maximum cost of combining conditions when " + "folding branches")); + STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps"); STATISTIC(NumLinearMaps, "Number of switch instructions turned into linear mapping"); @@ -169,22 +169,22 @@ STATISTIC( NumLookupTablesHoles, "Number of switch instructions turned into lookup tables (holes checked)"); STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares"); -STATISTIC(NumFoldValueComparisonIntoPredecessors, - "Number of value comparisons folded into predecessor basic blocks"); -STATISTIC(NumFoldBranchToCommonDest, - "Number of branches folded into predecessor basic block"); -STATISTIC( - NumHoistCommonCode, - "Number of common instruction 'blocks' hoisted up to the begin block"); -STATISTIC(NumHoistCommonInstrs, - "Number of common instructions hoisted up to the begin block"); -STATISTIC(NumSinkCommonCode, - "Number of common instruction 'blocks' sunk down to the end block"); -STATISTIC(NumSinkCommonInstrs, +STATISTIC(NumFoldValueComparisonIntoPredecessors, + "Number of value comparisons folded into predecessor basic blocks"); +STATISTIC(NumFoldBranchToCommonDest, + "Number of branches folded into predecessor basic block"); +STATISTIC( + NumHoistCommonCode, + "Number of common instruction 'blocks' hoisted up to the begin block"); +STATISTIC(NumHoistCommonInstrs, + "Number of common instructions hoisted up to the begin block"); +STATISTIC(NumSinkCommonCode, + "Number of common instruction 'blocks' sunk down to the end block"); +STATISTIC(NumSinkCommonInstrs, "Number of common instructions sunk down to the end block"); STATISTIC(NumSpeculations, "Number of speculative executed instructions"); -STATISTIC(NumInvokes, - "Number of invokes with empty resume blocks simplified into calls"); +STATISTIC(NumInvokes, + "Number of invokes with empty resume blocks simplified into calls"); namespace { @@ -217,9 +217,9 @@ struct ValueEqualityComparisonCase { class SimplifyCFGOpt { const TargetTransformInfo &TTI; - DomTreeUpdater *DTU; + DomTreeUpdater *DTU; const DataLayout &DL; - ArrayRef<WeakVH> LoopHeaders; + ArrayRef<WeakVH> LoopHeaders; const SimplifyCFGOptions &Options; bool Resimplify; @@ -229,9 +229,9 @@ class SimplifyCFGOpt { bool SimplifyEqualityComparisonWithOnlyPredecessor(Instruction *TI, BasicBlock *Pred, IRBuilder<> &Builder); - bool PerformValueComparisonIntoPredecessorFolding(Instruction *TI, Value *&CV, - Instruction *PTI, - IRBuilder<> &Builder); + bool PerformValueComparisonIntoPredecessorFolding(Instruction *TI, Value *&CV, + Instruction *PTI, + IRBuilder<> &Builder); bool FoldValueComparisonIntoPredecessors(Instruction *TI, IRBuilder<> &Builder); @@ -264,17 +264,17 @@ class SimplifyCFGOpt { bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder); public: - SimplifyCFGOpt(const TargetTransformInfo &TTI, DomTreeUpdater *DTU, - const DataLayout &DL, ArrayRef<WeakVH> LoopHeaders, + SimplifyCFGOpt(const TargetTransformInfo &TTI, DomTreeUpdater *DTU, + const DataLayout &DL, ArrayRef<WeakVH> LoopHeaders, const SimplifyCFGOptions &Opts) - : TTI(TTI), DTU(DTU), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) { - assert((!DTU || !DTU->hasPostDomTree()) && - "SimplifyCFG is not yet capable of maintaining validity of a " - "PostDomTree, so don't ask for it."); - } - - bool simplifyOnce(BasicBlock *BB); - bool simplifyOnceImpl(BasicBlock *BB); + : TTI(TTI), DTU(DTU), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) { + assert((!DTU || !DTU->hasPostDomTree()) && + "SimplifyCFG is not yet capable of maintaining validity of a " + "PostDomTree, so don't ask for it."); + } + + bool simplifyOnce(BasicBlock *BB); + bool simplifyOnceImpl(BasicBlock *BB); bool run(BasicBlock *BB); // Helper to set Resimplify and return change indication. @@ -655,7 +655,7 @@ private: /// vector. /// One "Extra" case is allowed to differ from the other. void gather(Value *V) { - bool isEQ = match(V, m_LogicalOr(m_Value(), m_Value())); + bool isEQ = match(V, m_LogicalOr(m_Value(), m_Value())); // Keep a stack (SmallVector for efficiency) for depth-first traversal SmallVector<Value *, 8> DFT; @@ -670,14 +670,14 @@ private: if (Instruction *I = dyn_cast<Instruction>(V)) { // If it is a || (or && depending on isEQ), process the operands. - Value *Op0, *Op1; - if (isEQ ? match(I, m_LogicalOr(m_Value(Op0), m_Value(Op1))) - : match(I, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) { - if (Visited.insert(Op1).second) - DFT.push_back(Op1); - if (Visited.insert(Op0).second) - DFT.push_back(Op0); - + Value *Op0, *Op1; + if (isEQ ? match(I, m_LogicalOr(m_Value(Op0), m_Value(Op1))) + : match(I, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) { + if (Visited.insert(Op1).second) + DFT.push_back(Op1); + if (Visited.insert(Op0).second) + DFT.push_back(Op0); + continue; } @@ -772,7 +772,7 @@ BasicBlock *SimplifyCFGOpt::GetValueEqualityComparisonCases( static void EliminateBlockCases(BasicBlock *BB, std::vector<ValueEqualityComparisonCase> &Cases) { - llvm::erase_value(Cases, BB); + llvm::erase_value(Cases, BB); } /// Return true if there are any keys in C1 that exist in C2 as well. @@ -882,18 +882,18 @@ bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor( (void)NI; // Remove PHI node entries for the dead edge. - ThisCases[0].Dest->removePredecessor(PredDef); + ThisCases[0].Dest->removePredecessor(PredDef); LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n"); EraseTerminatorAndDCECond(TI); - - if (DTU) - DTU->applyUpdates( - {{DominatorTree::Delete, PredDef, ThisCases[0].Dest}}); - + + if (DTU) + DTU->applyUpdates( + {{DominatorTree::Delete, PredDef, ThisCases[0].Dest}}); + return true; } @@ -906,25 +906,25 @@ bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor( LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() << "Through successor TI: " << *TI); - SmallMapVector<BasicBlock *, int, 8> NumPerSuccessorCases; + SmallMapVector<BasicBlock *, int, 8> NumPerSuccessorCases; for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) { --i; - auto *Successor = i->getCaseSuccessor(); - ++NumPerSuccessorCases[Successor]; + auto *Successor = i->getCaseSuccessor(); + ++NumPerSuccessorCases[Successor]; if (DeadCases.count(i->getCaseValue())) { - Successor->removePredecessor(PredDef); + Successor->removePredecessor(PredDef); SI.removeCase(i); - --NumPerSuccessorCases[Successor]; + --NumPerSuccessorCases[Successor]; } } - - std::vector<DominatorTree::UpdateType> Updates; - for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases) - if (I.second == 0) - Updates.push_back({DominatorTree::Delete, PredDef, I.first}); - if (DTU) - DTU->applyUpdates(Updates); - + + std::vector<DominatorTree::UpdateType> Updates; + for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases) + if (I.second == 0) + Updates.push_back({DominatorTree::Delete, PredDef, I.first}); + if (DTU) + DTU->applyUpdates(Updates); + LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n"); return true; } @@ -954,16 +954,16 @@ bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor( if (!TheRealDest) TheRealDest = ThisDef; - SmallSetVector<BasicBlock *, 2> RemovedSuccs; - + SmallSetVector<BasicBlock *, 2> RemovedSuccs; + // Remove PHI node entries for dead edges. BasicBlock *CheckEdge = TheRealDest; for (BasicBlock *Succ : successors(TIBB)) - if (Succ != CheckEdge) { - if (Succ != TheRealDest) - RemovedSuccs.insert(Succ); + if (Succ != CheckEdge) { + if (Succ != TheRealDest) + RemovedSuccs.insert(Succ); Succ->removePredecessor(TIBB); - } else + } else CheckEdge = nullptr; // Insert the new branch. @@ -975,13 +975,13 @@ bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor( << "\n"); EraseTerminatorAndDCECond(TI); - if (DTU) { - SmallVector<DominatorTree::UpdateType, 2> Updates; - Updates.reserve(RemovedSuccs.size()); - for (auto *RemovedSucc : RemovedSuccs) - Updates.push_back({DominatorTree::Delete, TIBB, RemovedSucc}); - DTU->applyUpdates(Updates); - } + if (DTU) { + SmallVector<DominatorTree::UpdateType, 2> Updates; + Updates.reserve(RemovedSuccs.size()); + for (auto *RemovedSucc : RemovedSuccs) + Updates.push_back({DominatorTree::Delete, TIBB, RemovedSucc}); + DTU->applyUpdates(Updates); + } return true; } @@ -1049,300 +1049,300 @@ static void FitWeights(MutableArrayRef<uint64_t> Weights) { } } -static void CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses( - BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap) { - Instruction *PTI = PredBlock->getTerminator(); - - // If we have bonus instructions, clone them into the predecessor block. - // Note that there may be multiple predecessor blocks, so we cannot move - // bonus instructions to a predecessor block. - for (Instruction &BonusInst : *BB) { - if (isa<DbgInfoIntrinsic>(BonusInst) || BonusInst.isTerminator()) - continue; - - Instruction *NewBonusInst = BonusInst.clone(); - - if (PTI->getDebugLoc() != NewBonusInst->getDebugLoc()) { - // Unless the instruction has the same !dbg location as the original - // branch, drop it. When we fold the bonus instructions we want to make - // sure we reset their debug locations in order to avoid stepping on - // dead code caused by folding dead branches. - NewBonusInst->setDebugLoc(DebugLoc()); - } - - RemapInstruction(NewBonusInst, VMap, - RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); - VMap[&BonusInst] = NewBonusInst; - - // If we moved a load, we cannot any longer claim any knowledge about - // its potential value. The previous information might have been valid - // only given the branch precondition. - // For an analogous reason, we must also drop all the metadata whose - // semantics we don't understand. We *can* preserve !annotation, because - // it is tied to the instruction itself, not the value or position. - NewBonusInst->dropUnknownNonDebugMetadata(LLVMContext::MD_annotation); - - PredBlock->getInstList().insert(PTI->getIterator(), NewBonusInst); - NewBonusInst->takeName(&BonusInst); - BonusInst.setName(NewBonusInst->getName() + ".old"); - - // Update (liveout) uses of bonus instructions, - // now that the bonus instruction has been cloned into predecessor. - SSAUpdater SSAUpdate; - SSAUpdate.Initialize(BonusInst.getType(), - (NewBonusInst->getName() + ".merge").str()); - SSAUpdate.AddAvailableValue(BB, &BonusInst); - SSAUpdate.AddAvailableValue(PredBlock, NewBonusInst); - for (Use &U : make_early_inc_range(BonusInst.uses())) - SSAUpdate.RewriteUseAfterInsertions(U); - } -} - -bool SimplifyCFGOpt::PerformValueComparisonIntoPredecessorFolding( - Instruction *TI, Value *&CV, Instruction *PTI, IRBuilder<> &Builder) { - BasicBlock *BB = TI->getParent(); - BasicBlock *Pred = PTI->getParent(); - - std::vector<DominatorTree::UpdateType> Updates; - - // Figure out which 'cases' to copy from SI to PSI. - std::vector<ValueEqualityComparisonCase> BBCases; - BasicBlock *BBDefault = GetValueEqualityComparisonCases(TI, BBCases); - - std::vector<ValueEqualityComparisonCase> PredCases; - BasicBlock *PredDefault = GetValueEqualityComparisonCases(PTI, PredCases); - - // Based on whether the default edge from PTI goes to BB or not, fill in - // PredCases and PredDefault with the new switch cases we would like to - // build. - SmallMapVector<BasicBlock *, int, 8> NewSuccessors; - - // Update the branch weight metadata along the way - SmallVector<uint64_t, 8> Weights; - bool PredHasWeights = HasBranchWeights(PTI); - bool SuccHasWeights = HasBranchWeights(TI); - - if (PredHasWeights) { - GetBranchWeights(PTI, Weights); - // branch-weight metadata is inconsistent here. - if (Weights.size() != 1 + PredCases.size()) - PredHasWeights = SuccHasWeights = false; - } else if (SuccHasWeights) - // If there are no predecessor weights but there are successor weights, - // populate Weights with 1, which will later be scaled to the sum of - // successor's weights - Weights.assign(1 + PredCases.size(), 1); - - SmallVector<uint64_t, 8> SuccWeights; - if (SuccHasWeights) { - GetBranchWeights(TI, SuccWeights); - // branch-weight metadata is inconsistent here. - if (SuccWeights.size() != 1 + BBCases.size()) - PredHasWeights = SuccHasWeights = false; - } else if (PredHasWeights) - SuccWeights.assign(1 + BBCases.size(), 1); - - if (PredDefault == BB) { - // If this is the default destination from PTI, only the edges in TI - // that don't occur in PTI, or that branch to BB will be activated. - std::set<ConstantInt *, ConstantIntOrdering> PTIHandled; - for (unsigned i = 0, e = PredCases.size(); i != e; ++i) - if (PredCases[i].Dest != BB) - PTIHandled.insert(PredCases[i].Value); - else { - // The default destination is BB, we don't need explicit targets. - std::swap(PredCases[i], PredCases.back()); - - if (PredHasWeights || SuccHasWeights) { - // Increase weight for the default case. - Weights[0] += Weights[i + 1]; - std::swap(Weights[i + 1], Weights.back()); - Weights.pop_back(); +static void CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses( + BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap) { + Instruction *PTI = PredBlock->getTerminator(); + + // If we have bonus instructions, clone them into the predecessor block. + // Note that there may be multiple predecessor blocks, so we cannot move + // bonus instructions to a predecessor block. + for (Instruction &BonusInst : *BB) { + if (isa<DbgInfoIntrinsic>(BonusInst) || BonusInst.isTerminator()) + continue; + + Instruction *NewBonusInst = BonusInst.clone(); + + if (PTI->getDebugLoc() != NewBonusInst->getDebugLoc()) { + // Unless the instruction has the same !dbg location as the original + // branch, drop it. When we fold the bonus instructions we want to make + // sure we reset their debug locations in order to avoid stepping on + // dead code caused by folding dead branches. + NewBonusInst->setDebugLoc(DebugLoc()); + } + + RemapInstruction(NewBonusInst, VMap, + RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); + VMap[&BonusInst] = NewBonusInst; + + // If we moved a load, we cannot any longer claim any knowledge about + // its potential value. The previous information might have been valid + // only given the branch precondition. + // For an analogous reason, we must also drop all the metadata whose + // semantics we don't understand. We *can* preserve !annotation, because + // it is tied to the instruction itself, not the value or position. + NewBonusInst->dropUnknownNonDebugMetadata(LLVMContext::MD_annotation); + + PredBlock->getInstList().insert(PTI->getIterator(), NewBonusInst); + NewBonusInst->takeName(&BonusInst); + BonusInst.setName(NewBonusInst->getName() + ".old"); + + // Update (liveout) uses of bonus instructions, + // now that the bonus instruction has been cloned into predecessor. + SSAUpdater SSAUpdate; + SSAUpdate.Initialize(BonusInst.getType(), + (NewBonusInst->getName() + ".merge").str()); + SSAUpdate.AddAvailableValue(BB, &BonusInst); + SSAUpdate.AddAvailableValue(PredBlock, NewBonusInst); + for (Use &U : make_early_inc_range(BonusInst.uses())) + SSAUpdate.RewriteUseAfterInsertions(U); + } +} + +bool SimplifyCFGOpt::PerformValueComparisonIntoPredecessorFolding( + Instruction *TI, Value *&CV, Instruction *PTI, IRBuilder<> &Builder) { + BasicBlock *BB = TI->getParent(); + BasicBlock *Pred = PTI->getParent(); + + std::vector<DominatorTree::UpdateType> Updates; + + // Figure out which 'cases' to copy from SI to PSI. + std::vector<ValueEqualityComparisonCase> BBCases; + BasicBlock *BBDefault = GetValueEqualityComparisonCases(TI, BBCases); + + std::vector<ValueEqualityComparisonCase> PredCases; + BasicBlock *PredDefault = GetValueEqualityComparisonCases(PTI, PredCases); + + // Based on whether the default edge from PTI goes to BB or not, fill in + // PredCases and PredDefault with the new switch cases we would like to + // build. + SmallMapVector<BasicBlock *, int, 8> NewSuccessors; + + // Update the branch weight metadata along the way + SmallVector<uint64_t, 8> Weights; + bool PredHasWeights = HasBranchWeights(PTI); + bool SuccHasWeights = HasBranchWeights(TI); + + if (PredHasWeights) { + GetBranchWeights(PTI, Weights); + // branch-weight metadata is inconsistent here. + if (Weights.size() != 1 + PredCases.size()) + PredHasWeights = SuccHasWeights = false; + } else if (SuccHasWeights) + // If there are no predecessor weights but there are successor weights, + // populate Weights with 1, which will later be scaled to the sum of + // successor's weights + Weights.assign(1 + PredCases.size(), 1); + + SmallVector<uint64_t, 8> SuccWeights; + if (SuccHasWeights) { + GetBranchWeights(TI, SuccWeights); + // branch-weight metadata is inconsistent here. + if (SuccWeights.size() != 1 + BBCases.size()) + PredHasWeights = SuccHasWeights = false; + } else if (PredHasWeights) + SuccWeights.assign(1 + BBCases.size(), 1); + + if (PredDefault == BB) { + // If this is the default destination from PTI, only the edges in TI + // that don't occur in PTI, or that branch to BB will be activated. + std::set<ConstantInt *, ConstantIntOrdering> PTIHandled; + for (unsigned i = 0, e = PredCases.size(); i != e; ++i) + if (PredCases[i].Dest != BB) + PTIHandled.insert(PredCases[i].Value); + else { + // The default destination is BB, we don't need explicit targets. + std::swap(PredCases[i], PredCases.back()); + + if (PredHasWeights || SuccHasWeights) { + // Increase weight for the default case. + Weights[0] += Weights[i + 1]; + std::swap(Weights[i + 1], Weights.back()); + Weights.pop_back(); } - PredCases.pop_back(); - --i; - --e; - } - - // Reconstruct the new switch statement we will be building. - if (PredDefault != BBDefault) { - PredDefault->removePredecessor(Pred); - if (PredDefault != BB) - Updates.push_back({DominatorTree::Delete, Pred, PredDefault}); - PredDefault = BBDefault; - ++NewSuccessors[BBDefault]; - } - - unsigned CasesFromPred = Weights.size(); - uint64_t ValidTotalSuccWeight = 0; - for (unsigned i = 0, e = BBCases.size(); i != e; ++i) - if (!PTIHandled.count(BBCases[i].Value) && BBCases[i].Dest != BBDefault) { - PredCases.push_back(BBCases[i]); - ++NewSuccessors[BBCases[i].Dest]; + PredCases.pop_back(); + --i; + --e; + } + + // Reconstruct the new switch statement we will be building. + if (PredDefault != BBDefault) { + PredDefault->removePredecessor(Pred); + if (PredDefault != BB) + Updates.push_back({DominatorTree::Delete, Pred, PredDefault}); + PredDefault = BBDefault; + ++NewSuccessors[BBDefault]; + } + + unsigned CasesFromPred = Weights.size(); + uint64_t ValidTotalSuccWeight = 0; + for (unsigned i = 0, e = BBCases.size(); i != e; ++i) + if (!PTIHandled.count(BBCases[i].Value) && BBCases[i].Dest != BBDefault) { + PredCases.push_back(BBCases[i]); + ++NewSuccessors[BBCases[i].Dest]; if (SuccHasWeights || PredHasWeights) { - // The default weight is at index 0, so weight for the ith case - // should be at index i+1. Scale the cases from successor by - // PredDefaultWeight (Weights[0]). - Weights.push_back(Weights[0] * SuccWeights[i + 1]); - ValidTotalSuccWeight += SuccWeights[i + 1]; + // The default weight is at index 0, so weight for the ith case + // should be at index i+1. Scale the cases from successor by + // PredDefaultWeight (Weights[0]). + Weights.push_back(Weights[0] * SuccWeights[i + 1]); + ValidTotalSuccWeight += SuccWeights[i + 1]; + } + } + + if (SuccHasWeights || PredHasWeights) { + ValidTotalSuccWeight += SuccWeights[0]; + // Scale the cases from predecessor by ValidTotalSuccWeight. + for (unsigned i = 1; i < CasesFromPred; ++i) + Weights[i] *= ValidTotalSuccWeight; + // Scale the default weight by SuccDefaultWeight (SuccWeights[0]). + Weights[0] *= SuccWeights[0]; + } + } else { + // If this is not the default destination from PSI, only the edges + // in SI that occur in PSI with a destination of BB will be + // activated. + std::set<ConstantInt *, ConstantIntOrdering> PTIHandled; + std::map<ConstantInt *, uint64_t> WeightsForHandled; + for (unsigned i = 0, e = PredCases.size(); i != e; ++i) + if (PredCases[i].Dest == BB) { + PTIHandled.insert(PredCases[i].Value); + + if (PredHasWeights || SuccHasWeights) { + WeightsForHandled[PredCases[i].Value] = Weights[i + 1]; + std::swap(Weights[i + 1], Weights.back()); + Weights.pop_back(); } - } - - if (SuccHasWeights || PredHasWeights) { - ValidTotalSuccWeight += SuccWeights[0]; - // Scale the cases from predecessor by ValidTotalSuccWeight. - for (unsigned i = 1; i < CasesFromPred; ++i) - Weights[i] *= ValidTotalSuccWeight; - // Scale the default weight by SuccDefaultWeight (SuccWeights[0]). - Weights[0] *= SuccWeights[0]; - } - } else { - // If this is not the default destination from PSI, only the edges - // in SI that occur in PSI with a destination of BB will be - // activated. - std::set<ConstantInt *, ConstantIntOrdering> PTIHandled; - std::map<ConstantInt *, uint64_t> WeightsForHandled; - for (unsigned i = 0, e = PredCases.size(); i != e; ++i) - if (PredCases[i].Dest == BB) { - PTIHandled.insert(PredCases[i].Value); - - if (PredHasWeights || SuccHasWeights) { - WeightsForHandled[PredCases[i].Value] = Weights[i + 1]; - std::swap(Weights[i + 1], Weights.back()); - Weights.pop_back(); - } - - std::swap(PredCases[i], PredCases.back()); - PredCases.pop_back(); - --i; - --e; - } - - // Okay, now we know which constants were sent to BB from the - // predecessor. Figure out where they will all go now. - for (unsigned i = 0, e = BBCases.size(); i != e; ++i) - if (PTIHandled.count(BBCases[i].Value)) { - // If this is one we are capable of getting... - if (PredHasWeights || SuccHasWeights) - Weights.push_back(WeightsForHandled[BBCases[i].Value]); - PredCases.push_back(BBCases[i]); - ++NewSuccessors[BBCases[i].Dest]; - PTIHandled.erase(BBCases[i].Value); // This constant is taken care of + + std::swap(PredCases[i], PredCases.back()); + PredCases.pop_back(); + --i; + --e; } - // If there are any constants vectored to BB that TI doesn't handle, - // they must go to the default destination of TI. - for (ConstantInt *I : PTIHandled) { - if (PredHasWeights || SuccHasWeights) - Weights.push_back(WeightsForHandled[I]); - PredCases.push_back(ValueEqualityComparisonCase(I, BBDefault)); - ++NewSuccessors[BBDefault]; - } - } - - // Okay, at this point, we know which new successor Pred will get. Make - // sure we update the number of entries in the PHI nodes for these - // successors. - for (const std::pair<BasicBlock *, int /*Num*/> &NewSuccessor : - NewSuccessors) { - for (auto I : seq(0, NewSuccessor.second)) { - (void)I; - AddPredecessorToBlock(NewSuccessor.first, Pred, BB); - } - if (!is_contained(successors(Pred), NewSuccessor.first)) - Updates.push_back({DominatorTree::Insert, Pred, NewSuccessor.first}); - } - - Builder.SetInsertPoint(PTI); - // Convert pointer to int before we switch. - if (CV->getType()->isPointerTy()) { - CV = - Builder.CreatePtrToInt(CV, DL.getIntPtrType(CV->getType()), "magicptr"); - } - - // Now that the successors are updated, create the new Switch instruction. - SwitchInst *NewSI = Builder.CreateSwitch(CV, PredDefault, PredCases.size()); - NewSI->setDebugLoc(PTI->getDebugLoc()); - for (ValueEqualityComparisonCase &V : PredCases) - NewSI->addCase(V.Value, V.Dest); - - if (PredHasWeights || SuccHasWeights) { - // Halve the weights if any of them cannot fit in an uint32_t - FitWeights(Weights); - - SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end()); - - setBranchWeights(NewSI, MDWeights); - } - - EraseTerminatorAndDCECond(PTI); - - // Okay, last check. If BB is still a successor of PSI, then we must - // have an infinite loop case. If so, add an infinitely looping block - // to handle the case to preserve the behavior of the code. - BasicBlock *InfLoopBlock = nullptr; - for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i) - if (NewSI->getSuccessor(i) == BB) { - if (!InfLoopBlock) { - // Insert it at the end of the function, because it's either code, - // or it won't matter if it's hot. :) - InfLoopBlock = - BasicBlock::Create(BB->getContext(), "infloop", BB->getParent()); - BranchInst::Create(InfLoopBlock, InfLoopBlock); - Updates.push_back({DominatorTree::Insert, InfLoopBlock, InfLoopBlock}); + // Okay, now we know which constants were sent to BB from the + // predecessor. Figure out where they will all go now. + for (unsigned i = 0, e = BBCases.size(); i != e; ++i) + if (PTIHandled.count(BBCases[i].Value)) { + // If this is one we are capable of getting... + if (PredHasWeights || SuccHasWeights) + Weights.push_back(WeightsForHandled[BBCases[i].Value]); + PredCases.push_back(BBCases[i]); + ++NewSuccessors[BBCases[i].Dest]; + PTIHandled.erase(BBCases[i].Value); // This constant is taken care of } - NewSI->setSuccessor(i, InfLoopBlock); - } - - if (InfLoopBlock) - Updates.push_back({DominatorTree::Insert, Pred, InfLoopBlock}); - - Updates.push_back({DominatorTree::Delete, Pred, BB}); - - if (DTU) - DTU->applyUpdates(Updates); - - ++NumFoldValueComparisonIntoPredecessors; - return true; -} - -/// The specified terminator is a value equality comparison instruction -/// (either a switch or a branch on "X == c"). -/// See if any of the predecessors of the terminator block are value comparisons -/// on the same value. If so, and if safe to do so, fold them together. -bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(Instruction *TI, - IRBuilder<> &Builder) { - BasicBlock *BB = TI->getParent(); - Value *CV = isValueEqualityComparison(TI); // CondVal - assert(CV && "Not a comparison?"); - - bool Changed = false; - - SmallSetVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB)); - while (!Preds.empty()) { - BasicBlock *Pred = Preds.pop_back_val(); - Instruction *PTI = Pred->getTerminator(); - - // Don't try to fold into itself. - if (Pred == BB) - continue; - - // See if the predecessor is a comparison with the same value. - Value *PCV = isValueEqualityComparison(PTI); // PredCondVal - if (PCV != CV) - continue; - - SmallSetVector<BasicBlock *, 4> FailBlocks; - if (!SafeToMergeTerminators(TI, PTI, &FailBlocks)) { - for (auto *Succ : FailBlocks) { - if (!SplitBlockPredecessors(Succ, TI->getParent(), ".fold.split", DTU)) - return false; - } + + // If there are any constants vectored to BB that TI doesn't handle, + // they must go to the default destination of TI. + for (ConstantInt *I : PTIHandled) { + if (PredHasWeights || SuccHasWeights) + Weights.push_back(WeightsForHandled[I]); + PredCases.push_back(ValueEqualityComparisonCase(I, BBDefault)); + ++NewSuccessors[BBDefault]; } - - PerformValueComparisonIntoPredecessorFolding(TI, CV, PTI, Builder); - Changed = true; + } + + // Okay, at this point, we know which new successor Pred will get. Make + // sure we update the number of entries in the PHI nodes for these + // successors. + for (const std::pair<BasicBlock *, int /*Num*/> &NewSuccessor : + NewSuccessors) { + for (auto I : seq(0, NewSuccessor.second)) { + (void)I; + AddPredecessorToBlock(NewSuccessor.first, Pred, BB); + } + if (!is_contained(successors(Pred), NewSuccessor.first)) + Updates.push_back({DominatorTree::Insert, Pred, NewSuccessor.first}); + } + + Builder.SetInsertPoint(PTI); + // Convert pointer to int before we switch. + if (CV->getType()->isPointerTy()) { + CV = + Builder.CreatePtrToInt(CV, DL.getIntPtrType(CV->getType()), "magicptr"); + } + + // Now that the successors are updated, create the new Switch instruction. + SwitchInst *NewSI = Builder.CreateSwitch(CV, PredDefault, PredCases.size()); + NewSI->setDebugLoc(PTI->getDebugLoc()); + for (ValueEqualityComparisonCase &V : PredCases) + NewSI->addCase(V.Value, V.Dest); + + if (PredHasWeights || SuccHasWeights) { + // Halve the weights if any of them cannot fit in an uint32_t + FitWeights(Weights); + + SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end()); + + setBranchWeights(NewSI, MDWeights); + } + + EraseTerminatorAndDCECond(PTI); + + // Okay, last check. If BB is still a successor of PSI, then we must + // have an infinite loop case. If so, add an infinitely looping block + // to handle the case to preserve the behavior of the code. + BasicBlock *InfLoopBlock = nullptr; + for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i) + if (NewSI->getSuccessor(i) == BB) { + if (!InfLoopBlock) { + // Insert it at the end of the function, because it's either code, + // or it won't matter if it's hot. :) + InfLoopBlock = + BasicBlock::Create(BB->getContext(), "infloop", BB->getParent()); + BranchInst::Create(InfLoopBlock, InfLoopBlock); + Updates.push_back({DominatorTree::Insert, InfLoopBlock, InfLoopBlock}); + } + NewSI->setSuccessor(i, InfLoopBlock); + } + + if (InfLoopBlock) + Updates.push_back({DominatorTree::Insert, Pred, InfLoopBlock}); + + Updates.push_back({DominatorTree::Delete, Pred, BB}); + + if (DTU) + DTU->applyUpdates(Updates); + + ++NumFoldValueComparisonIntoPredecessors; + return true; +} + +/// The specified terminator is a value equality comparison instruction +/// (either a switch or a branch on "X == c"). +/// See if any of the predecessors of the terminator block are value comparisons +/// on the same value. If so, and if safe to do so, fold them together. +bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(Instruction *TI, + IRBuilder<> &Builder) { + BasicBlock *BB = TI->getParent(); + Value *CV = isValueEqualityComparison(TI); // CondVal + assert(CV && "Not a comparison?"); + + bool Changed = false; + + SmallSetVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB)); + while (!Preds.empty()) { + BasicBlock *Pred = Preds.pop_back_val(); + Instruction *PTI = Pred->getTerminator(); + + // Don't try to fold into itself. + if (Pred == BB) + continue; + + // See if the predecessor is a comparison with the same value. + Value *PCV = isValueEqualityComparison(PTI); // PredCondVal + if (PCV != CV) + continue; + + SmallSetVector<BasicBlock *, 4> FailBlocks; + if (!SafeToMergeTerminators(TI, PTI, &FailBlocks)) { + for (auto *Succ : FailBlocks) { + if (!SplitBlockPredecessors(Succ, TI->getParent(), ".fold.split", DTU)) + return false; + } + } + + PerformValueComparisonIntoPredecessorFolding(TI, CV, PTI, Builder); + Changed = true; } return Changed; } @@ -1364,7 +1364,7 @@ static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2, return true; } -static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified = false); +static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified = false); /// Given a conditional branch that goes to BB1 and BB2, hoist any common code /// in the two blocks up into the branch block. The caller of this function @@ -1401,12 +1401,12 @@ bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI, BasicBlock *BIParent = BI->getParent(); bool Changed = false; - - auto _ = make_scope_exit([&]() { - if (Changed) - ++NumHoistCommonCode; - }); - + + auto _ = make_scope_exit([&]() { + if (Changed) + ++NumHoistCommonCode; + }); + do { // If we are hoisting the terminator instruction, don't move one (making a // broken BB), instead clone it, and remove BI. @@ -1475,7 +1475,7 @@ bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI, I2->eraseFromParent(); Changed = true; } - ++NumHoistCommonInstrs; + ++NumHoistCommonInstrs; I1 = &*BB1_Itr++; I2 = &*BB2_Itr++; @@ -1530,8 +1530,8 @@ HoistTerminator: I2->replaceAllUsesWith(NT); NT->takeName(I1); } - Changed = true; - ++NumHoistCommonInstrs; + Changed = true; + ++NumHoistCommonInstrs; // Ensure terminator gets a debug location, even an unknown one, in case // it involves inlinable calls. @@ -1573,20 +1573,20 @@ HoistTerminator: } } - SmallVector<DominatorTree::UpdateType, 4> Updates; - + SmallVector<DominatorTree::UpdateType, 4> Updates; + // Update any PHI nodes in our new successors. - for (BasicBlock *Succ : successors(BB1)) { + for (BasicBlock *Succ : successors(BB1)) { AddPredecessorToBlock(Succ, BIParent, BB1); - Updates.push_back({DominatorTree::Insert, BIParent, Succ}); - } - for (BasicBlock *Succ : successors(BI)) - Updates.push_back({DominatorTree::Delete, BIParent, Succ}); + Updates.push_back({DominatorTree::Insert, BIParent, Succ}); + } + for (BasicBlock *Succ : successors(BI)) + Updates.push_back({DominatorTree::Delete, BIParent, Succ}); EraseTerminatorAndDCECond(BI); - if (DTU) - DTU->applyUpdates(Updates); - return Changed; + if (DTU) + DTU->applyUpdates(Updates); + return Changed; } // Check lifetime markers. @@ -1628,11 +1628,11 @@ static bool canSinkInstructions( I->getType()->isTokenTy()) return false; - // Do not try to sink an instruction in an infinite loop - it can cause - // this algorithm to infinite loop. - if (I->getParent()->getSingleSuccessor() == I->getParent()) - return false; - + // Do not try to sink an instruction in an infinite loop - it can cause + // this algorithm to infinite loop. + if (I->getParent()->getSingleSuccessor() == I->getParent()) + return false; + // Conservatively return false if I is an inline-asm instruction. Sinking // and merging inline-asm instructions can potentially create arguments // that cannot satisfy the inline-asm constraints. @@ -1719,13 +1719,13 @@ static bool canSinkInstructions( return true; } -// Assuming canSinkInstructions(Blocks) has returned true, sink the last +// Assuming canSinkInstructions(Blocks) has returned true, sink the last // instruction of every block in Blocks to their common successor, commoning // into one instruction. static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) { auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0); - // canSinkInstructions returning true guarantees that every block has at + // canSinkInstructions returning true guarantees that every block has at // least one non-terminator instruction. SmallVector<Instruction*,4> Insts; for (auto *BB : Blocks) { @@ -1738,9 +1738,9 @@ static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) { } // The only checking we need to do now is that all users of all instructions - // are the same PHI node. canSinkInstructions should have checked this but - // it is slightly over-aggressive - it gets confused by commutative - // instructions so double-check it here. + // are the same PHI node. canSinkInstructions should have checked this but + // it is slightly over-aggressive - it gets confused by commutative + // instructions so double-check it here. Instruction *I0 = Insts.front(); if (!I0->user_empty()) { auto *PNUse = dyn_cast<PHINode>(*I0->user_begin()); @@ -1751,11 +1751,11 @@ static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) { return false; } - // We don't need to do any more checking here; canSinkInstructions should + // We don't need to do any more checking here; canSinkInstructions should // have done it all for us. SmallVector<Value*, 4> NewOperands; for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) { - // This check is different to that in canSinkInstructions. There, we + // This check is different to that in canSinkInstructions. There, we // cared about the global view once simplifycfg (and instcombine) have // completed - it takes into account PHIs that become trivially // simplifiable. However here we need a more local view; if an operand @@ -1882,8 +1882,8 @@ namespace { /// true, sink any common code from the predecessors to BB. /// We also allow one predecessor to end with conditional branch (but no more /// than one). -static bool SinkCommonCodeFromPredecessors(BasicBlock *BB, - DomTreeUpdater *DTU) { +static bool SinkCommonCodeFromPredecessors(BasicBlock *BB, + DomTreeUpdater *DTU) { // We support two situations: // (1) all incoming arcs are unconditional // (2) one incoming arc is conditional @@ -1958,12 +1958,12 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB, --LRI; } - // If no instructions can be sunk, early-return. - if (ScanIdx == 0) - return false; - - bool Changed = false; - + // If no instructions can be sunk, early-return. + if (ScanIdx == 0) + return false; + + bool Changed = false; + auto ProfitableToSinkInstruction = [&](LockstepReverseIterator &LRI) { unsigned NumPHIdValues = 0; for (auto *I : *LRI) @@ -1978,7 +1978,7 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB, return NumPHIInsts <= 1; }; - if (Cond) { + if (Cond) { // Check if we would actually sink anything first! This mutates the CFG and // adds an extra block. The goal in doing this is to allow instructions that // couldn't be sunk before to be sunk - obviously, speculatable instructions @@ -2001,7 +2001,7 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB, LLVM_DEBUG(dbgs() << "SINK: Splitting edge\n"); // We have a conditional edge and we're going to sink some instructions. // Insert a new block postdominating all blocks we're going to sink from. - if (!SplitBlockPredecessors(BB, UnconditionalPreds, ".sink.split", DTU)) + if (!SplitBlockPredecessors(BB, UnconditionalPreds, ".sink.split", DTU)) // Edges couldn't be split. return false; Changed = true; @@ -2019,8 +2019,8 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB, // sink presuming a later value will also be sunk, but stop half way through // and never actually sink it which means we produce more PHIs than intended. // This is unlikely in practice though. - unsigned SinkIdx = 0; - for (; SinkIdx != ScanIdx; ++SinkIdx) { + unsigned SinkIdx = 0; + for (; SinkIdx != ScanIdx; ++SinkIdx) { LLVM_DEBUG(dbgs() << "SINK: Sink: " << *UnconditionalPreds[0]->getTerminator()->getPrevNode() << "\n"); @@ -2035,18 +2035,18 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB, break; } - if (!sinkLastInstruction(UnconditionalPreds)) { - LLVM_DEBUG( - dbgs() - << "SINK: stopping here, failed to actually sink instruction!\n"); - break; - } - - NumSinkCommonInstrs++; + if (!sinkLastInstruction(UnconditionalPreds)) { + LLVM_DEBUG( + dbgs() + << "SINK: stopping here, failed to actually sink instruction!\n"); + break; + } + + NumSinkCommonInstrs++; Changed = true; } - if (SinkIdx != 0) - ++NumSinkCommonCode; + if (SinkIdx != 0) + ++NumSinkCommonCode; return Changed; } @@ -2090,9 +2090,9 @@ static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, // Look for a store to the same pointer in BrBB. unsigned MaxNumInstToLookAt = 9; - // Skip pseudo probe intrinsic calls which are not really killing any memory - // accesses. - for (Instruction &CurI : reverse(BrBB->instructionsWithoutDebug(true))) { + // Skip pseudo probe intrinsic calls which are not really killing any memory + // accesses. + for (Instruction &CurI : reverse(BrBB->instructionsWithoutDebug(true))) { if (!MaxNumInstToLookAt) break; --MaxNumInstToLookAt; @@ -2113,65 +2113,65 @@ static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, return nullptr; } -/// Estimate the cost of the insertion(s) and check that the PHI nodes can be -/// converted to selects. -static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB, - BasicBlock *EndBB, - unsigned &SpeculatedInstructions, - int &BudgetRemaining, - const TargetTransformInfo &TTI) { - TargetTransformInfo::TargetCostKind CostKind = - BB->getParent()->hasMinSize() - ? TargetTransformInfo::TCK_CodeSize - : TargetTransformInfo::TCK_SizeAndLatency; - - bool HaveRewritablePHIs = false; - for (PHINode &PN : EndBB->phis()) { - Value *OrigV = PN.getIncomingValueForBlock(BB); - Value *ThenV = PN.getIncomingValueForBlock(ThenBB); - - // FIXME: Try to remove some of the duplication with HoistThenElseCodeToIf. - // Skip PHIs which are trivial. - if (ThenV == OrigV) - continue; - - BudgetRemaining -= - TTI.getCmpSelInstrCost(Instruction::Select, PN.getType(), nullptr, - CmpInst::BAD_ICMP_PREDICATE, CostKind); - - // Don't convert to selects if we could remove undefined behavior instead. - if (passingValueIsAlwaysUndefined(OrigV, &PN) || - passingValueIsAlwaysUndefined(ThenV, &PN)) - return false; - - HaveRewritablePHIs = true; - ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV); - ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV); - if (!OrigCE && !ThenCE) - continue; // Known safe and cheap. - - if ((ThenCE && !isSafeToSpeculativelyExecute(ThenCE)) || - (OrigCE && !isSafeToSpeculativelyExecute(OrigCE))) - return false; - unsigned OrigCost = OrigCE ? ComputeSpeculationCost(OrigCE, TTI) : 0; - unsigned ThenCost = ThenCE ? ComputeSpeculationCost(ThenCE, TTI) : 0; - unsigned MaxCost = - 2 * PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic; - if (OrigCost + ThenCost > MaxCost) - return false; - - // Account for the cost of an unfolded ConstantExpr which could end up - // getting expanded into Instructions. - // FIXME: This doesn't account for how many operations are combined in the - // constant expression. - ++SpeculatedInstructions; - if (SpeculatedInstructions > 1) - return false; - } - - return HaveRewritablePHIs; -} - +/// Estimate the cost of the insertion(s) and check that the PHI nodes can be +/// converted to selects. +static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB, + BasicBlock *EndBB, + unsigned &SpeculatedInstructions, + int &BudgetRemaining, + const TargetTransformInfo &TTI) { + TargetTransformInfo::TargetCostKind CostKind = + BB->getParent()->hasMinSize() + ? TargetTransformInfo::TCK_CodeSize + : TargetTransformInfo::TCK_SizeAndLatency; + + bool HaveRewritablePHIs = false; + for (PHINode &PN : EndBB->phis()) { + Value *OrigV = PN.getIncomingValueForBlock(BB); + Value *ThenV = PN.getIncomingValueForBlock(ThenBB); + + // FIXME: Try to remove some of the duplication with HoistThenElseCodeToIf. + // Skip PHIs which are trivial. + if (ThenV == OrigV) + continue; + + BudgetRemaining -= + TTI.getCmpSelInstrCost(Instruction::Select, PN.getType(), nullptr, + CmpInst::BAD_ICMP_PREDICATE, CostKind); + + // Don't convert to selects if we could remove undefined behavior instead. + if (passingValueIsAlwaysUndefined(OrigV, &PN) || + passingValueIsAlwaysUndefined(ThenV, &PN)) + return false; + + HaveRewritablePHIs = true; + ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV); + ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV); + if (!OrigCE && !ThenCE) + continue; // Known safe and cheap. + + if ((ThenCE && !isSafeToSpeculativelyExecute(ThenCE)) || + (OrigCE && !isSafeToSpeculativelyExecute(OrigCE))) + return false; + unsigned OrigCost = OrigCE ? ComputeSpeculationCost(OrigCE, TTI) : 0; + unsigned ThenCost = ThenCE ? ComputeSpeculationCost(ThenCE, TTI) : 0; + unsigned MaxCost = + 2 * PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic; + if (OrigCost + ThenCost > MaxCost) + return false; + + // Account for the cost of an unfolded ConstantExpr which could end up + // getting expanded into Instructions. + // FIXME: This doesn't account for how many operations are combined in the + // constant expression. + ++SpeculatedInstructions; + if (SpeculatedInstructions > 1) + return false; + } + + return HaveRewritablePHIs; +} + /// Speculate a conditional basic block flattening the CFG. /// /// Note that this is a very risky transform currently. Speculating @@ -2218,8 +2218,8 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, BasicBlock *BB = BI->getParent(); BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(0); - int BudgetRemaining = - PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic; + int BudgetRemaining = + PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic; // If ThenBB is actually on the false edge of the conditional branch, remember // to swap the select operands later. @@ -2252,14 +2252,14 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, continue; } - // Skip pseudo probes. The consequence is we lose track of the branch - // probability for ThenBB, which is fine since the optimization here takes - // place regardless of the branch probability. - if (isa<PseudoProbeInst>(I)) { - SpeculatedDbgIntrinsics.push_back(I); - continue; - } - + // Skip pseudo probes. The consequence is we lose track of the branch + // probability for ThenBB, which is fine since the optimization here takes + // place regardless of the branch probability. + if (isa<PseudoProbeInst>(I)) { + SpeculatedDbgIntrinsics.push_back(I); + continue; + } + // Only speculatively execute a single instruction (not counting the // terminator) for now. ++SpeculatedInstructions; @@ -2305,13 +2305,13 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, return false; } - // Check that we can insert the selects and that it's not too expensive to do - // so. - bool Convert = SpeculatedStore != nullptr; - Convert |= validateAndCostRequiredSelects(BB, ThenBB, EndBB, - SpeculatedInstructions, - BudgetRemaining, TTI); - if (!Convert || BudgetRemaining < 0) + // Check that we can insert the selects and that it's not too expensive to do + // so. + bool Convert = SpeculatedStore != nullptr; + Convert |= validateAndCostRequiredSelects(BB, ThenBB, EndBB, + SpeculatedInstructions, + BudgetRemaining, TTI); + if (!Convert || BudgetRemaining < 0) return false; // If we get here, we can hoist the instruction and if-convert. @@ -2385,12 +2385,12 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) { for (Instruction &I : BB->instructionsWithoutDebug()) { if (Size > MaxSmallBlockSize) return false; // Don't clone large BB's. - - // Can't fold blocks that contain noduplicate or convergent calls. - if (CallInst *CI = dyn_cast<CallInst>(&I)) - if (CI->cannotDuplicate() || CI->isConvergent()) - return false; - + + // Can't fold blocks that contain noduplicate or convergent calls. + if (CallInst *CI = dyn_cast<CallInst>(&I)) + if (CI->cannotDuplicate() || CI->isConvergent()) + return false; + // We will delete Phis while threading, so Phis should not be accounted in // block's size if (!isa<PHINode>(I)) @@ -2413,8 +2413,8 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) { /// If we have a conditional branch on a PHI node value that is defined in the /// same block as the branch and if any PHI entries are constants, thread edges /// corresponding to that entry to be branches to their ultimate destination. -static bool FoldCondBranchOnPHI(BranchInst *BI, DomTreeUpdater *DTU, - const DataLayout &DL, AssumptionCache *AC) { +static bool FoldCondBranchOnPHI(BranchInst *BI, DomTreeUpdater *DTU, + const DataLayout &DL, AssumptionCache *AC) { BasicBlock *BB = BI->getParent(); PHINode *PN = dyn_cast<PHINode>(BI->getCondition()); // NOTE: we currently cannot transform this case if the PHI node is used @@ -2450,8 +2450,8 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, DomTreeUpdater *DTU, if (isa<IndirectBrInst>(PredBB->getTerminator())) continue; - SmallVector<DominatorTree::UpdateType, 3> Updates; - + SmallVector<DominatorTree::UpdateType, 3> Updates; + // The dest block might have PHI nodes, other predecessors and other // difficult cases. Instead of being smart about this, just insert a new // block that jumps to the destination block, effectively splitting @@ -2460,7 +2460,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, DomTreeUpdater *DTU, BasicBlock::Create(BB->getContext(), RealDest->getName() + ".critedge", RealDest->getParent(), RealDest); BranchInst *CritEdgeBranch = BranchInst::Create(RealDest, EdgeBB); - Updates.push_back({DominatorTree::Insert, EdgeBB, RealDest}); + Updates.push_back({DominatorTree::Insert, EdgeBB, RealDest}); CritEdgeBranch->setDebugLoc(BI->getDebugLoc()); // Update PHI nodes. @@ -2519,14 +2519,14 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, DomTreeUpdater *DTU, PredBBTI->setSuccessor(i, EdgeBB); } - Updates.push_back({DominatorTree::Insert, PredBB, EdgeBB}); - Updates.push_back({DominatorTree::Delete, PredBB, BB}); - - if (DTU) - DTU->applyUpdates(Updates); - + Updates.push_back({DominatorTree::Insert, PredBB, EdgeBB}); + Updates.push_back({DominatorTree::Delete, PredBB, BB}); + + if (DTU) + DTU->applyUpdates(Updates); + // Recurse, simplifying any other constants. - return FoldCondBranchOnPHI(BI, DTU, DL, AC) || true; + return FoldCondBranchOnPHI(BI, DTU, DL, AC) || true; } return false; @@ -2535,7 +2535,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, DomTreeUpdater *DTU, /// Given a BB that starts with the specified two-entry PHI node, /// see if we can eliminate it. static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, - DomTreeUpdater *DTU, const DataLayout &DL) { + DomTreeUpdater *DTU, const DataLayout &DL) { // Ok, this is a two entry PHI node. Check to see if this is a simple "if // statement", which has a very simple dominance structure. Basically, we // are trying to find the condition that is being branched on, which @@ -2568,13 +2568,13 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, int BudgetRemaining = TwoEntryPHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic; - bool Changed = false; + bool Changed = false; for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) { PHINode *PN = cast<PHINode>(II++); if (Value *V = SimplifyInstruction(PN, {DL, PN})) { PN->replaceAllUsesWith(V); PN->eraseFromParent(); - Changed = true; + Changed = true; continue; } @@ -2582,7 +2582,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, BudgetRemaining, TTI) || !DominatesMergePoint(PN->getIncomingValue(1), BB, AggressiveInsts, BudgetRemaining, TTI)) - return Changed; + return Changed; } // If we folded the first phi, PN dangles at this point. Refresh it. If @@ -2609,7 +2609,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, isa<BinaryOperator>(IfCond)) && !CanHoistNotFromBothValues(PN->getIncomingValue(0), PN->getIncomingValue(1))) - return Changed; + return Changed; // If all PHI nodes are promotable, check to make sure that all instructions // in the predecessor blocks can be promoted as well. If not, we won't be able @@ -2623,12 +2623,12 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, } else { DomBlock = *pred_begin(IfBlock1); for (BasicBlock::iterator I = IfBlock1->begin(); !I->isTerminator(); ++I) - if (!AggressiveInsts.count(&*I) && !isa<DbgInfoIntrinsic>(I) && - !isa<PseudoProbeInst>(I)) { + if (!AggressiveInsts.count(&*I) && !isa<DbgInfoIntrinsic>(I) && + !isa<PseudoProbeInst>(I)) { // This is not an aggressive instruction that we can promote. // Because of this, we won't be able to get rid of the control flow, so // the xform is not worth it. - return Changed; + return Changed; } } @@ -2637,12 +2637,12 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, } else { DomBlock = *pred_begin(IfBlock2); for (BasicBlock::iterator I = IfBlock2->begin(); !I->isTerminator(); ++I) - if (!AggressiveInsts.count(&*I) && !isa<DbgInfoIntrinsic>(I) && - !isa<PseudoProbeInst>(I)) { + if (!AggressiveInsts.count(&*I) && !isa<DbgInfoIntrinsic>(I) && + !isa<PseudoProbeInst>(I)) { // This is not an aggressive instruction that we can promote. // Because of this, we won't be able to get rid of the control flow, so // the xform is not worth it. - return Changed; + return Changed; } } assert(DomBlock && "Failed to find root DomBlock"); @@ -2685,18 +2685,18 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, Instruction *OldTI = DomBlock->getTerminator(); Builder.SetInsertPoint(OldTI); Builder.CreateBr(BB); - - SmallVector<DominatorTree::UpdateType, 3> Updates; - if (DTU) { - Updates.push_back({DominatorTree::Insert, DomBlock, BB}); - for (auto *Successor : successors(DomBlock)) - Updates.push_back({DominatorTree::Delete, DomBlock, Successor}); - } - + + SmallVector<DominatorTree::UpdateType, 3> Updates; + if (DTU) { + Updates.push_back({DominatorTree::Insert, DomBlock, BB}); + for (auto *Successor : successors(DomBlock)) + Updates.push_back({DominatorTree::Delete, DomBlock, Successor}); + } + OldTI->eraseFromParent(); - if (DTU) - DTU->applyUpdates(Updates); - + if (DTU) + DTU->applyUpdates(Updates); + return true; } @@ -2705,11 +2705,11 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, /// introducing a select if the return values disagree. bool SimplifyCFGOpt::SimplifyCondBranchToTwoReturns(BranchInst *BI, IRBuilder<> &Builder) { - auto *BB = BI->getParent(); + auto *BB = BI->getParent(); assert(BI->isConditional() && "Must be a conditional branch"); BasicBlock *TrueSucc = BI->getSuccessor(0); BasicBlock *FalseSucc = BI->getSuccessor(1); - // NOTE: destinations may match, this could be degenerate uncond branch. + // NOTE: destinations may match, this could be degenerate uncond branch. ReturnInst *TrueRet = cast<ReturnInst>(TrueSucc->getTerminator()); ReturnInst *FalseRet = cast<ReturnInst>(FalseSucc->getTerminator()); @@ -2726,17 +2726,17 @@ bool SimplifyCFGOpt::SimplifyCondBranchToTwoReturns(BranchInst *BI, // there is no return value for this function, just change the // branch into a return. if (FalseRet->getNumOperands() == 0) { - TrueSucc->removePredecessor(BB); - FalseSucc->removePredecessor(BB); + TrueSucc->removePredecessor(BB); + FalseSucc->removePredecessor(BB); Builder.CreateRetVoid(); EraseTerminatorAndDCECond(BI); - if (DTU) { - SmallVector<DominatorTree::UpdateType, 2> Updates; - Updates.push_back({DominatorTree::Delete, BB, TrueSucc}); - if (TrueSucc != FalseSucc) - Updates.push_back({DominatorTree::Delete, BB, FalseSucc}); - DTU->applyUpdates(Updates); - } + if (DTU) { + SmallVector<DominatorTree::UpdateType, 2> Updates; + Updates.push_back({DominatorTree::Delete, BB, TrueSucc}); + if (TrueSucc != FalseSucc) + Updates.push_back({DominatorTree::Delete, BB, FalseSucc}); + DTU->applyUpdates(Updates); + } return true; } @@ -2748,10 +2748,10 @@ bool SimplifyCFGOpt::SimplifyCondBranchToTwoReturns(BranchInst *BI, // Unwrap any PHI nodes in the return blocks. if (PHINode *TVPN = dyn_cast_or_null<PHINode>(TrueValue)) if (TVPN->getParent() == TrueSucc) - TrueValue = TVPN->getIncomingValueForBlock(BB); + TrueValue = TVPN->getIncomingValueForBlock(BB); if (PHINode *FVPN = dyn_cast_or_null<PHINode>(FalseValue)) if (FVPN->getParent() == FalseSucc) - FalseValue = FVPN->getIncomingValueForBlock(BB); + FalseValue = FVPN->getIncomingValueForBlock(BB); // In order for this transformation to be safe, we must be able to // unconditionally execute both operands to the return. This is @@ -2767,8 +2767,8 @@ bool SimplifyCFGOpt::SimplifyCondBranchToTwoReturns(BranchInst *BI, // Okay, we collected all the mapped values and checked them for sanity, and // defined to really do this transformation. First, update the CFG. - TrueSucc->removePredecessor(BB); - FalseSucc->removePredecessor(BB); + TrueSucc->removePredecessor(BB); + FalseSucc->removePredecessor(BB); // Insert select instructions where needed. Value *BrCond = BI->getCondition(); @@ -2793,13 +2793,13 @@ bool SimplifyCFGOpt::SimplifyCondBranchToTwoReturns(BranchInst *BI, << *TrueSucc << "\nFALSEBLOCK: " << *FalseSucc); EraseTerminatorAndDCECond(BI); - if (DTU) { - SmallVector<DominatorTree::UpdateType, 2> Updates; - Updates.push_back({DominatorTree::Delete, BB, TrueSucc}); - if (TrueSucc != FalseSucc) - Updates.push_back({DominatorTree::Delete, BB, FalseSucc}); - DTU->applyUpdates(Updates); - } + if (DTU) { + SmallVector<DominatorTree::UpdateType, 2> Updates; + Updates.push_back({DominatorTree::Delete, BB, TrueSucc}); + if (TrueSucc != FalseSucc) + Updates.push_back({DominatorTree::Delete, BB, FalseSucc}); + DTU->applyUpdates(Updates); + } return true; } @@ -2827,169 +2827,169 @@ static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI, } } -// Determine if the two branches share a common destination, -// and deduce a glue that we need to use to join branch's conditions -// to arrive at the common destination. -static Optional<std::pair<Instruction::BinaryOps, bool>> -CheckIfCondBranchesShareCommonDestination(BranchInst *BI, BranchInst *PBI) { - assert(BI && PBI && BI->isConditional() && PBI->isConditional() && - "Both blocks must end with a conditional branches."); - assert(is_contained(predecessors(BI->getParent()), PBI->getParent()) && - "PredBB must be a predecessor of BB."); - - if (PBI->getSuccessor(0) == BI->getSuccessor(0)) - return {{Instruction::Or, false}}; - else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) - return {{Instruction::And, false}}; - else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) - return {{Instruction::And, true}}; - else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) - return {{Instruction::Or, true}}; - return None; -} - -static bool PerformBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI, - DomTreeUpdater *DTU, - MemorySSAUpdater *MSSAU) { - BasicBlock *BB = BI->getParent(); - BasicBlock *PredBlock = PBI->getParent(); - - // Determine if the two branches share a common destination. - Instruction::BinaryOps Opc; - bool InvertPredCond; - std::tie(Opc, InvertPredCond) = - *CheckIfCondBranchesShareCommonDestination(BI, PBI); - - LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB); - - IRBuilder<> Builder(PBI); - // The builder is used to create instructions to eliminate the branch in BB. - // If BB's terminator has !annotation metadata, add it to the new - // instructions. - Builder.CollectMetadataToCopy(BB->getTerminator(), - {LLVMContext::MD_annotation}); - - // If we need to invert the condition in the pred block to match, do so now. - if (InvertPredCond) { - Value *NewCond = PBI->getCondition(); - if (NewCond->hasOneUse() && isa<CmpInst>(NewCond)) { - CmpInst *CI = cast<CmpInst>(NewCond); - CI->setPredicate(CI->getInversePredicate()); - } else { - NewCond = - Builder.CreateNot(NewCond, PBI->getCondition()->getName() + ".not"); - } - - PBI->setCondition(NewCond); - PBI->swapSuccessors(); - } - - BasicBlock *UniqueSucc = - PBI->getSuccessor(0) == BB ? BI->getSuccessor(0) : BI->getSuccessor(1); - - // Before cloning instructions, notify the successor basic block that it - // is about to have a new predecessor. This will update PHI nodes, - // which will allow us to update live-out uses of bonus instructions. - AddPredecessorToBlock(UniqueSucc, PredBlock, BB, MSSAU); - - // Try to update branch weights. - uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight; - if (extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight, - SuccTrueWeight, SuccFalseWeight)) { - SmallVector<uint64_t, 8> NewWeights; - - if (PBI->getSuccessor(0) == BB) { - // PBI: br i1 %x, BB, FalseDest - // BI: br i1 %y, UniqueSucc, FalseDest - // TrueWeight is TrueWeight for PBI * TrueWeight for BI. - NewWeights.push_back(PredTrueWeight * SuccTrueWeight); - // FalseWeight is FalseWeight for PBI * TotalWeight for BI + - // TrueWeight for PBI * FalseWeight for BI. - // We assume that total weights of a BranchInst can fit into 32 bits. - // Therefore, we will not have overflow using 64-bit arithmetic. - NewWeights.push_back(PredFalseWeight * - (SuccFalseWeight + SuccTrueWeight) + - PredTrueWeight * SuccFalseWeight); - } else { - // PBI: br i1 %x, TrueDest, BB - // BI: br i1 %y, TrueDest, UniqueSucc - // TrueWeight is TrueWeight for PBI * TotalWeight for BI + - // FalseWeight for PBI * TrueWeight for BI. - NewWeights.push_back(PredTrueWeight * (SuccFalseWeight + SuccTrueWeight) + - PredFalseWeight * SuccTrueWeight); - // FalseWeight is FalseWeight for PBI * FalseWeight for BI. - NewWeights.push_back(PredFalseWeight * SuccFalseWeight); - } - - // Halve the weights if any of them cannot fit in an uint32_t - FitWeights(NewWeights); - - SmallVector<uint32_t, 8> MDWeights(NewWeights.begin(), NewWeights.end()); - setBranchWeights(PBI, MDWeights[0], MDWeights[1]); - - // TODO: If BB is reachable from all paths through PredBlock, then we - // could replace PBI's branch probabilities with BI's. - } else - PBI->setMetadata(LLVMContext::MD_prof, nullptr); - - // Now, update the CFG. - PBI->setSuccessor(PBI->getSuccessor(0) != BB, UniqueSucc); - - if (DTU) - DTU->applyUpdates({{DominatorTree::Insert, PredBlock, UniqueSucc}, - {DominatorTree::Delete, PredBlock, BB}}); - - // If BI was a loop latch, it may have had associated loop metadata. - // We need to copy it to the new latch, that is, PBI. - if (MDNode *LoopMD = BI->getMetadata(LLVMContext::MD_loop)) - PBI->setMetadata(LLVMContext::MD_loop, LoopMD); - - ValueToValueMapTy VMap; // maps original values to cloned values - CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(BB, PredBlock, VMap); - - // Now that the Cond was cloned into the predecessor basic block, - // or/and the two conditions together. - Instruction *NewCond = cast<Instruction>(Builder.CreateBinOp( - Opc, PBI->getCondition(), VMap[BI->getCondition()], "or.cond")); - PBI->setCondition(NewCond); - - // Copy any debug value intrinsics into the end of PredBlock. - for (Instruction &I : *BB) { - if (isa<DbgInfoIntrinsic>(I)) { - Instruction *NewI = I.clone(); - RemapInstruction(NewI, VMap, - RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); - NewI->insertBefore(PBI); - } - } - - ++NumFoldBranchToCommonDest; - return true; -} - +// Determine if the two branches share a common destination, +// and deduce a glue that we need to use to join branch's conditions +// to arrive at the common destination. +static Optional<std::pair<Instruction::BinaryOps, bool>> +CheckIfCondBranchesShareCommonDestination(BranchInst *BI, BranchInst *PBI) { + assert(BI && PBI && BI->isConditional() && PBI->isConditional() && + "Both blocks must end with a conditional branches."); + assert(is_contained(predecessors(BI->getParent()), PBI->getParent()) && + "PredBB must be a predecessor of BB."); + + if (PBI->getSuccessor(0) == BI->getSuccessor(0)) + return {{Instruction::Or, false}}; + else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) + return {{Instruction::And, false}}; + else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) + return {{Instruction::And, true}}; + else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) + return {{Instruction::Or, true}}; + return None; +} + +static bool PerformBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI, + DomTreeUpdater *DTU, + MemorySSAUpdater *MSSAU) { + BasicBlock *BB = BI->getParent(); + BasicBlock *PredBlock = PBI->getParent(); + + // Determine if the two branches share a common destination. + Instruction::BinaryOps Opc; + bool InvertPredCond; + std::tie(Opc, InvertPredCond) = + *CheckIfCondBranchesShareCommonDestination(BI, PBI); + + LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB); + + IRBuilder<> Builder(PBI); + // The builder is used to create instructions to eliminate the branch in BB. + // If BB's terminator has !annotation metadata, add it to the new + // instructions. + Builder.CollectMetadataToCopy(BB->getTerminator(), + {LLVMContext::MD_annotation}); + + // If we need to invert the condition in the pred block to match, do so now. + if (InvertPredCond) { + Value *NewCond = PBI->getCondition(); + if (NewCond->hasOneUse() && isa<CmpInst>(NewCond)) { + CmpInst *CI = cast<CmpInst>(NewCond); + CI->setPredicate(CI->getInversePredicate()); + } else { + NewCond = + Builder.CreateNot(NewCond, PBI->getCondition()->getName() + ".not"); + } + + PBI->setCondition(NewCond); + PBI->swapSuccessors(); + } + + BasicBlock *UniqueSucc = + PBI->getSuccessor(0) == BB ? BI->getSuccessor(0) : BI->getSuccessor(1); + + // Before cloning instructions, notify the successor basic block that it + // is about to have a new predecessor. This will update PHI nodes, + // which will allow us to update live-out uses of bonus instructions. + AddPredecessorToBlock(UniqueSucc, PredBlock, BB, MSSAU); + + // Try to update branch weights. + uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight; + if (extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight, + SuccTrueWeight, SuccFalseWeight)) { + SmallVector<uint64_t, 8> NewWeights; + + if (PBI->getSuccessor(0) == BB) { + // PBI: br i1 %x, BB, FalseDest + // BI: br i1 %y, UniqueSucc, FalseDest + // TrueWeight is TrueWeight for PBI * TrueWeight for BI. + NewWeights.push_back(PredTrueWeight * SuccTrueWeight); + // FalseWeight is FalseWeight for PBI * TotalWeight for BI + + // TrueWeight for PBI * FalseWeight for BI. + // We assume that total weights of a BranchInst can fit into 32 bits. + // Therefore, we will not have overflow using 64-bit arithmetic. + NewWeights.push_back(PredFalseWeight * + (SuccFalseWeight + SuccTrueWeight) + + PredTrueWeight * SuccFalseWeight); + } else { + // PBI: br i1 %x, TrueDest, BB + // BI: br i1 %y, TrueDest, UniqueSucc + // TrueWeight is TrueWeight for PBI * TotalWeight for BI + + // FalseWeight for PBI * TrueWeight for BI. + NewWeights.push_back(PredTrueWeight * (SuccFalseWeight + SuccTrueWeight) + + PredFalseWeight * SuccTrueWeight); + // FalseWeight is FalseWeight for PBI * FalseWeight for BI. + NewWeights.push_back(PredFalseWeight * SuccFalseWeight); + } + + // Halve the weights if any of them cannot fit in an uint32_t + FitWeights(NewWeights); + + SmallVector<uint32_t, 8> MDWeights(NewWeights.begin(), NewWeights.end()); + setBranchWeights(PBI, MDWeights[0], MDWeights[1]); + + // TODO: If BB is reachable from all paths through PredBlock, then we + // could replace PBI's branch probabilities with BI's. + } else + PBI->setMetadata(LLVMContext::MD_prof, nullptr); + + // Now, update the CFG. + PBI->setSuccessor(PBI->getSuccessor(0) != BB, UniqueSucc); + + if (DTU) + DTU->applyUpdates({{DominatorTree::Insert, PredBlock, UniqueSucc}, + {DominatorTree::Delete, PredBlock, BB}}); + + // If BI was a loop latch, it may have had associated loop metadata. + // We need to copy it to the new latch, that is, PBI. + if (MDNode *LoopMD = BI->getMetadata(LLVMContext::MD_loop)) + PBI->setMetadata(LLVMContext::MD_loop, LoopMD); + + ValueToValueMapTy VMap; // maps original values to cloned values + CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(BB, PredBlock, VMap); + + // Now that the Cond was cloned into the predecessor basic block, + // or/and the two conditions together. + Instruction *NewCond = cast<Instruction>(Builder.CreateBinOp( + Opc, PBI->getCondition(), VMap[BI->getCondition()], "or.cond")); + PBI->setCondition(NewCond); + + // Copy any debug value intrinsics into the end of PredBlock. + for (Instruction &I : *BB) { + if (isa<DbgInfoIntrinsic>(I)) { + Instruction *NewI = I.clone(); + RemapInstruction(NewI, VMap, + RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); + NewI->insertBefore(PBI); + } + } + + ++NumFoldBranchToCommonDest; + return true; +} + /// If this basic block is simple enough, and if a predecessor branches to us /// and one of our successors, fold the block into the predecessor and use /// logical operations to pick the right destination. -bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU, - MemorySSAUpdater *MSSAU, - const TargetTransformInfo *TTI, +bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU, + MemorySSAUpdater *MSSAU, + const TargetTransformInfo *TTI, unsigned BonusInstThreshold) { - // If this block ends with an unconditional branch, - // let SpeculativelyExecuteBB() deal with it. - if (!BI->isConditional()) - return false; - + // If this block ends with an unconditional branch, + // let SpeculativelyExecuteBB() deal with it. + if (!BI->isConditional()) + return false; + BasicBlock *BB = BI->getParent(); const unsigned PredCount = pred_size(BB); bool Changed = false; - TargetTransformInfo::TargetCostKind CostKind = - BB->getParent()->hasMinSize() ? TargetTransformInfo::TCK_CodeSize - : TargetTransformInfo::TCK_SizeAndLatency; + TargetTransformInfo::TargetCostKind CostKind = + BB->getParent()->hasMinSize() ? TargetTransformInfo::TCK_CodeSize + : TargetTransformInfo::TCK_SizeAndLatency; - Instruction *Cond = dyn_cast<Instruction>(BI->getCondition()); + Instruction *Cond = dyn_cast<Instruction>(BI->getCondition()); if (!Cond || (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) || Cond->getParent() != BB || !Cond->hasOneUse()) @@ -3002,15 +3002,15 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU, // number of the bonus instructions we'll need to create when cloning into // each predecessor does not exceed a certain threshold. unsigned NumBonusInsts = 0; - for (Instruction &I : *BB) { - // Don't check the branch condition comparison itself. - if (&I == Cond) + for (Instruction &I : *BB) { + // Don't check the branch condition comparison itself. + if (&I == Cond) + continue; + // Ignore dbg intrinsics, and the terminator. + if (isa<DbgInfoIntrinsic>(I) || isa<BranchInst>(I)) continue; - // Ignore dbg intrinsics, and the terminator. - if (isa<DbgInfoIntrinsic>(I) || isa<BranchInst>(I)) - continue; - // I must be safe to execute unconditionally. - if (!isSafeToSpeculativelyExecute(&I)) + // I must be safe to execute unconditionally. + if (!isSafeToSpeculativelyExecute(&I)) return Changed; // Account for the cost of duplicating this instruction into each @@ -3031,7 +3031,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU, return Changed; // Finally, don't infinitely unroll conditional loops. - if (is_contained(successors(BB), BB)) + if (is_contained(successors(BB), BB)) return Changed; for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { @@ -3041,31 +3041,31 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU, // Check that we have two conditional branches. If there is a PHI node in // the common successor, verify that the same value flows in from both // blocks. - if (!PBI || PBI->isUnconditional() || !SafeToMergeTerminators(BI, PBI)) + if (!PBI || PBI->isUnconditional() || !SafeToMergeTerminators(BI, PBI)) continue; // Determine if the two branches share a common destination. - Instruction::BinaryOps Opc; - bool InvertPredCond; - if (auto Recepie = CheckIfCondBranchesShareCommonDestination(BI, PBI)) - std::tie(Opc, InvertPredCond) = *Recepie; - else - continue; - - // Check the cost of inserting the necessary logic before performing the - // transformation. - if (TTI) { - Type *Ty = BI->getCondition()->getType(); - unsigned Cost = TTI->getArithmeticInstrCost(Opc, Ty, CostKind); - if (InvertPredCond && (!PBI->getCondition()->hasOneUse() || - !isa<CmpInst>(PBI->getCondition()))) - Cost += TTI->getArithmeticInstrCost(Instruction::Xor, Ty, CostKind); - - if (Cost > BranchFoldThreshold) + Instruction::BinaryOps Opc; + bool InvertPredCond; + if (auto Recepie = CheckIfCondBranchesShareCommonDestination(BI, PBI)) + std::tie(Opc, InvertPredCond) = *Recepie; + else + continue; + + // Check the cost of inserting the necessary logic before performing the + // transformation. + if (TTI) { + Type *Ty = BI->getCondition()->getType(); + unsigned Cost = TTI->getArithmeticInstrCost(Opc, Ty, CostKind); + if (InvertPredCond && (!PBI->getCondition()->hasOneUse() || + !isa<CmpInst>(PBI->getCondition()))) + Cost += TTI->getArithmeticInstrCost(Instruction::Xor, Ty, CostKind); + + if (Cost > BranchFoldThreshold) continue; } - return PerformBranchToCommonDestFolding(BI, PBI, DTU, MSSAU); + return PerformBranchToCommonDestFolding(BI, PBI, DTU, MSSAU); } return Changed; } @@ -3138,10 +3138,10 @@ static Value *ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB, return PHI; } -static bool mergeConditionalStoreToAddress( - BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB, - BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond, - DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) { +static bool mergeConditionalStoreToAddress( + BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB, + BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond, + DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) { // For every pointer, there must be exactly two stores, one coming from // PTB or PFB, and the other from QTB or QFB. We don't support more than one // store (to any address) in PTB,PFB or QTB,QFB. @@ -3216,7 +3216,7 @@ static bool mergeConditionalStoreToAddress( return true; }; - const std::array<StoreInst *, 2> FreeStores = {PStore, QStore}; + const std::array<StoreInst *, 2> FreeStores = {PStore, QStore}; if (!MergeCondStoresAggressively && (!IsWorthwhile(PTB, FreeStores) || !IsWorthwhile(PFB, FreeStores) || !IsWorthwhile(QTB, FreeStores) || !IsWorthwhile(QFB, FreeStores))) @@ -3230,8 +3230,8 @@ static bool mergeConditionalStoreToAddress( // If QTB does not exist, then QFB's only predecessor has a conditional // branch to QFB and PostBB. BasicBlock *TruePred = QTB ? QTB : QFB->getSinglePredecessor(); - BasicBlock *NewBB = - SplitBlockPredecessors(PostBB, {QFB, TruePred}, "condstore.split", DTU); + BasicBlock *NewBB = + SplitBlockPredecessors(PostBB, {QFB, TruePred}, "condstore.split", DTU); if (!NewBB) return false; PostBB = NewBB; @@ -3260,9 +3260,9 @@ static bool mergeConditionalStoreToAddress( QPred = QB.CreateNot(QPred); Value *CombinedPred = QB.CreateOr(PPred, QPred); - auto *T = SplitBlockAndInsertIfThen(CombinedPred, &*QB.GetInsertPoint(), - /*Unreachable=*/false, - /*BranchWeights=*/nullptr, DTU); + auto *T = SplitBlockAndInsertIfThen(CombinedPred, &*QB.GetInsertPoint(), + /*Unreachable=*/false, + /*BranchWeights=*/nullptr, DTU); QB.SetInsertPoint(T); StoreInst *SI = cast<StoreInst>(QB.CreateStore(QPHI, Address)); AAMDNodes AAMD; @@ -3282,7 +3282,7 @@ static bool mergeConditionalStoreToAddress( } static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI, - DomTreeUpdater *DTU, const DataLayout &DL, + DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) { // The intention here is to find diamonds or triangles (see below) where each // conditional block contains a store to the same address. Both of these @@ -3384,17 +3384,17 @@ static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI, bool Changed = false; for (auto *Address : CommonAddresses) - Changed |= - mergeConditionalStoreToAddress(PTB, PFB, QTB, QFB, PostBB, Address, - InvertPCond, InvertQCond, DTU, DL, TTI); + Changed |= + mergeConditionalStoreToAddress(PTB, PFB, QTB, QFB, PostBB, Address, + InvertPCond, InvertQCond, DTU, DL, TTI); return Changed; } /// If the previous block ended with a widenable branch, determine if reusing /// the target block is profitable and legal. This will have the effect of /// "widening" PBI, but doesn't require us to reason about hosting safety. -static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, - DomTreeUpdater *DTU) { +static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, + DomTreeUpdater *DTU) { // TODO: This can be generalized in two important ways: // 1) We can allow phi nodes in IfFalseBB and simply reuse all the input // values from the PBI edge. @@ -3417,25 +3417,25 @@ static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, if (BI->getSuccessor(1) != IfFalseBB && // no inf looping BI->getSuccessor(1)->getTerminatingDeoptimizeCall() && // profitability NoSideEffects(*BI->getParent())) { - auto *OldSuccessor = BI->getSuccessor(1); - OldSuccessor->removePredecessor(BI->getParent()); + auto *OldSuccessor = BI->getSuccessor(1); + OldSuccessor->removePredecessor(BI->getParent()); BI->setSuccessor(1, IfFalseBB); - if (DTU) - DTU->applyUpdates( - {{DominatorTree::Insert, BI->getParent(), IfFalseBB}, - {DominatorTree::Delete, BI->getParent(), OldSuccessor}}); + if (DTU) + DTU->applyUpdates( + {{DominatorTree::Insert, BI->getParent(), IfFalseBB}, + {DominatorTree::Delete, BI->getParent(), OldSuccessor}}); return true; } if (BI->getSuccessor(0) != IfFalseBB && // no inf looping BI->getSuccessor(0)->getTerminatingDeoptimizeCall() && // profitability NoSideEffects(*BI->getParent())) { - auto *OldSuccessor = BI->getSuccessor(0); - OldSuccessor->removePredecessor(BI->getParent()); + auto *OldSuccessor = BI->getSuccessor(0); + OldSuccessor->removePredecessor(BI->getParent()); BI->setSuccessor(0, IfFalseBB); - if (DTU) - DTU->applyUpdates( - {{DominatorTree::Insert, BI->getParent(), IfFalseBB}, - {DominatorTree::Delete, BI->getParent(), OldSuccessor}}); + if (DTU) + DTU->applyUpdates( + {{DominatorTree::Insert, BI->getParent(), IfFalseBB}, + {DominatorTree::Delete, BI->getParent(), OldSuccessor}}); return true; } return false; @@ -3446,7 +3446,7 @@ static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, /// that PBI and BI are both conditional branches, and BI is in one of the /// successor blocks of PBI - PBI branches to BI. static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, - DomTreeUpdater *DTU, + DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) { assert(PBI->isConditional() && BI->isConditional()); @@ -3500,7 +3500,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, // If the previous block ended with a widenable branch, determine if reusing // the target block is profitable and legal. This will have the effect of // "widening" PBI, but doesn't require us to reason about hosting safety. - if (tryWidenCondBranchToCondBranch(PBI, BI, DTU)) + if (tryWidenCondBranchToCondBranch(PBI, BI, DTU)) return true; if (auto *CE = dyn_cast<ConstantExpr>(BI->getCondition())) @@ -3510,7 +3510,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, // If both branches are conditional and both contain stores to the same // address, remove the stores from the conditionals and create a conditional // merged store at the end. - if (MergeCondStores && mergeConditionalStores(PBI, BI, DTU, DL, TTI)) + if (MergeCondStores && mergeConditionalStores(PBI, BI, DTU, DL, TTI)) return true; // If this is a conditional branch in an empty block, and if any @@ -3553,7 +3553,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, // case, it would be unsafe to hoist the operation into a select instruction. BasicBlock *CommonDest = PBI->getSuccessor(PBIOp); - BasicBlock *RemovedDest = PBI->getSuccessor(PBIOp ^ 1); + BasicBlock *RemovedDest = PBI->getSuccessor(PBIOp ^ 1); unsigned NumPhis = 0; for (BasicBlock::iterator II = CommonDest->begin(); isa<PHINode>(II); ++II, ++NumPhis) { @@ -3579,8 +3579,8 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, LLVM_DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent() << "AND: " << *BI->getParent()); - SmallVector<DominatorTree::UpdateType, 5> Updates; - + SmallVector<DominatorTree::UpdateType, 5> Updates; + // If OtherDest *is* BB, then BB is a basic block with a single conditional // branch in it, where one edge (OtherDest) goes back to itself but the other // exits. We don't *know* that the program avoids the infinite loop @@ -3594,7 +3594,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, BasicBlock *InfLoopBlock = BasicBlock::Create(BB->getContext(), "infloop", BB->getParent()); BranchInst::Create(InfLoopBlock, InfLoopBlock); - Updates.push_back({DominatorTree::Insert, InfLoopBlock, InfLoopBlock}); + Updates.push_back({DominatorTree::Insert, InfLoopBlock, InfLoopBlock}); OtherDest = InfLoopBlock; } @@ -3621,12 +3621,12 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, PBI->setSuccessor(0, CommonDest); PBI->setSuccessor(1, OtherDest); - Updates.push_back({DominatorTree::Insert, PBI->getParent(), OtherDest}); - Updates.push_back({DominatorTree::Delete, PBI->getParent(), RemovedDest}); - - if (DTU) - DTU->applyUpdates(Updates); - + Updates.push_back({DominatorTree::Insert, PBI->getParent(), OtherDest}); + Updates.push_back({DominatorTree::Delete, PBI->getParent(), RemovedDest}); + + if (DTU) + DTU->applyUpdates(Updates); + // Update branch weight for PBI. uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight; uint64_t PredCommon, PredOther, SuccCommon, SuccOther; @@ -3706,7 +3706,7 @@ bool SimplifyCFGOpt::SimplifyTerminatorOnSelect(Instruction *OldTerm, BasicBlock *FalseBB, uint32_t TrueWeight, uint32_t FalseWeight) { - auto *BB = OldTerm->getParent(); + auto *BB = OldTerm->getParent(); // Remove any superfluous successor edges from the CFG. // First, figure out which successors to preserve. // If TrueBB and FalseBB are equal, only try to preserve one copy of that @@ -3714,8 +3714,8 @@ bool SimplifyCFGOpt::SimplifyTerminatorOnSelect(Instruction *OldTerm, BasicBlock *KeepEdge1 = TrueBB; BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr; - SmallSetVector<BasicBlock *, 2> RemovedSuccessors; - + SmallSetVector<BasicBlock *, 2> RemovedSuccessors; + // Then remove the rest. for (BasicBlock *Succ : successors(OldTerm)) { // Make sure only to keep exactly one copy of each edge. @@ -3723,13 +3723,13 @@ bool SimplifyCFGOpt::SimplifyTerminatorOnSelect(Instruction *OldTerm, KeepEdge1 = nullptr; else if (Succ == KeepEdge2) KeepEdge2 = nullptr; - else { - Succ->removePredecessor(BB, + else { + Succ->removePredecessor(BB, /*KeepOneInputPHIs=*/true); - - if (Succ != TrueBB && Succ != FalseBB) - RemovedSuccessors.insert(Succ); - } + + if (Succ != TrueBB && Succ != FalseBB) + RemovedSuccessors.insert(Succ); + } } IRBuilder<> Builder(OldTerm); @@ -3737,11 +3737,11 @@ bool SimplifyCFGOpt::SimplifyTerminatorOnSelect(Instruction *OldTerm, // Insert an appropriate new terminator. if (!KeepEdge1 && !KeepEdge2) { - if (TrueBB == FalseBB) { + if (TrueBB == FalseBB) { // We were only looking for one successor, and it was present. // Create an unconditional branch to it. Builder.CreateBr(TrueBB); - } else { + } else { // We found both of the successors we were looking for. // Create a conditional branch sharing the condition of the select. BranchInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB); @@ -3756,25 +3756,25 @@ bool SimplifyCFGOpt::SimplifyTerminatorOnSelect(Instruction *OldTerm, // One of the selected values was a successor, but the other wasn't. // Insert an unconditional branch to the one that was found; // the edge to the one that wasn't must be unreachable. - if (!KeepEdge1) { + if (!KeepEdge1) { // Only TrueBB was found. Builder.CreateBr(TrueBB); - } else { + } else { // Only FalseBB was found. Builder.CreateBr(FalseBB); - } + } } EraseTerminatorAndDCECond(OldTerm); - - if (DTU) { - SmallVector<DominatorTree::UpdateType, 2> Updates; - Updates.reserve(RemovedSuccessors.size()); - for (auto *RemovedSuccessor : RemovedSuccessors) - Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor}); - DTU->applyUpdates(Updates); - } - + + if (DTU) { + SmallVector<DominatorTree::UpdateType, 2> Updates; + Updates.reserve(RemovedSuccessors.size()); + for (auto *RemovedSuccessor : RemovedSuccessors) + Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor}); + DTU->applyUpdates(Updates); + } + return true; } @@ -3929,8 +3929,8 @@ bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt( ICI->replaceAllUsesWith(DefaultCst); ICI->eraseFromParent(); - SmallVector<DominatorTree::UpdateType, 2> Updates; - + SmallVector<DominatorTree::UpdateType, 2> Updates; + // Okay, the switch goes to this block on a default value. Add an edge from // the switch to the merge point on the compared value. BasicBlock *NewBB = @@ -3944,17 +3944,17 @@ bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt( SIW.setSuccessorWeight(0, *NewW); } SIW.addCase(Cst, NewBB, NewW); - Updates.push_back({DominatorTree::Insert, Pred, NewBB}); + Updates.push_back({DominatorTree::Insert, Pred, NewBB}); } // NewBB branches to the phi block, add the uncond branch and the phi entry. Builder.SetInsertPoint(NewBB); Builder.SetCurrentDebugLocation(SI->getDebugLoc()); Builder.CreateBr(SuccBlock); - Updates.push_back({DominatorTree::Insert, NewBB, SuccBlock}); + Updates.push_back({DominatorTree::Insert, NewBB, SuccBlock}); PHIUse->addIncoming(NewCst, NewBB); - if (DTU) - DTU->applyUpdates(Updates); + if (DTU) + DTU->applyUpdates(Updates); return true; } @@ -3988,7 +3988,7 @@ bool SimplifyCFGOpt::SimplifyBranchOnICmpChain(BranchInst *BI, if (UsedICmps <= 1) return false; - bool TrueWhenEqual = match(Cond, m_LogicalOr(m_Value(), m_Value())); + bool TrueWhenEqual = match(Cond, m_LogicalOr(m_Value(), m_Value())); // There might be duplicate constants in the list, which the switch // instruction can't handle, remove them now. @@ -4020,15 +4020,15 @@ bool SimplifyCFGOpt::SimplifyBranchOnICmpChain(BranchInst *BI, << " cases into SWITCH. BB is:\n" << *BB); - SmallVector<DominatorTree::UpdateType, 2> Updates; - + SmallVector<DominatorTree::UpdateType, 2> Updates; + // If there are any extra values that couldn't be folded into the switch // then we evaluate them with an explicit branch first. Split the block // right before the condbr to handle it. if (ExtraCase) { - BasicBlock *NewBB = SplitBlock(BB, BI, DTU, /*LI=*/nullptr, - /*MSSAU=*/nullptr, "switch.early.test"); - + BasicBlock *NewBB = SplitBlock(BB, BI, DTU, /*LI=*/nullptr, + /*MSSAU=*/nullptr, "switch.early.test"); + // Remove the uncond branch added to the old block. Instruction *OldTI = BB->getTerminator(); Builder.SetInsertPoint(OldTI); @@ -4040,8 +4040,8 @@ bool SimplifyCFGOpt::SimplifyBranchOnICmpChain(BranchInst *BI, OldTI->eraseFromParent(); - Updates.push_back({DominatorTree::Insert, BB, EdgeBB}); - + Updates.push_back({DominatorTree::Insert, BB, EdgeBB}); + // If there are PHI nodes in EdgeBB, then we need to add a new entry to them // for the edge we just added. AddPredecessorToBlock(EdgeBB, BB, NewBB); @@ -4077,8 +4077,8 @@ bool SimplifyCFGOpt::SimplifyBranchOnICmpChain(BranchInst *BI, // Erase the old branch instruction. EraseTerminatorAndDCECond(BI); - if (DTU) - DTU->applyUpdates(Updates); + if (DTU) + DTU->applyUpdates(Updates); LLVM_DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n'); return true; @@ -4095,36 +4095,36 @@ bool SimplifyCFGOpt::simplifyResume(ResumeInst *RI, IRBuilder<> &Builder) { return false; } -// Check if cleanup block is empty -static bool isCleanupBlockEmpty(iterator_range<BasicBlock::iterator> R) { - for (Instruction &I : R) { - auto *II = dyn_cast<IntrinsicInst>(&I); - if (!II) - return false; - - Intrinsic::ID IntrinsicID = II->getIntrinsicID(); - switch (IntrinsicID) { - case Intrinsic::dbg_declare: - case Intrinsic::dbg_value: - case Intrinsic::dbg_label: - case Intrinsic::lifetime_end: - break; - default: - return false; - } - } - return true; -} - +// Check if cleanup block is empty +static bool isCleanupBlockEmpty(iterator_range<BasicBlock::iterator> R) { + for (Instruction &I : R) { + auto *II = dyn_cast<IntrinsicInst>(&I); + if (!II) + return false; + + Intrinsic::ID IntrinsicID = II->getIntrinsicID(); + switch (IntrinsicID) { + case Intrinsic::dbg_declare: + case Intrinsic::dbg_value: + case Intrinsic::dbg_label: + case Intrinsic::lifetime_end: + break; + default: + return false; + } + } + return true; +} + // Simplify resume that is shared by several landing pads (phi of landing pad). bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) { BasicBlock *BB = RI->getParent(); - // Check that there are no other instructions except for debug and lifetime - // intrinsics between the phi's and resume instruction. - if (!isCleanupBlockEmpty( - make_range(RI->getParent()->getFirstNonPHI(), BB->getTerminator()))) - return false; + // Check that there are no other instructions except for debug and lifetime + // intrinsics between the phi's and resume instruction. + if (!isCleanupBlockEmpty( + make_range(RI->getParent()->getFirstNonPHI(), BB->getTerminator()))) + return false; SmallSetVector<BasicBlock *, 4> TrivialUnwindBlocks; auto *PhiLPInst = cast<PHINode>(RI->getValue()); @@ -4145,8 +4145,8 @@ bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) { if (IncomingValue != LandingPad) continue; - if (isCleanupBlockEmpty( - make_range(LandingPad->getNextNode(), IncomingBB->getTerminator()))) + if (isCleanupBlockEmpty( + make_range(LandingPad->getNextNode(), IncomingBB->getTerminator()))) TrivialUnwindBlocks.insert(IncomingBB); } @@ -4165,8 +4165,8 @@ bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) { for (pred_iterator PI = pred_begin(TrivialBB), PE = pred_end(TrivialBB); PI != PE;) { BasicBlock *Pred = *PI++; - removeUnwindEdge(Pred, DTU); - ++NumInvokes; + removeUnwindEdge(Pred, DTU); + ++NumInvokes; } // In each SimplifyCFG run, only the current processed block can be erased. @@ -4176,17 +4176,17 @@ bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) { // predecessors. TrivialBB->getTerminator()->eraseFromParent(); new UnreachableInst(RI->getContext(), TrivialBB); - if (DTU) - DTU->applyUpdates({{DominatorTree::Delete, TrivialBB, BB}}); + if (DTU) + DTU->applyUpdates({{DominatorTree::Delete, TrivialBB, BB}}); } // Delete the resume block if all its predecessors have been removed. - if (pred_empty(BB)) { - if (DTU) - DTU->deleteBB(BB); - else - BB->eraseFromParent(); - } + if (pred_empty(BB)) { + if (DTU) + DTU->deleteBB(BB); + else + BB->eraseFromParent(); + } return !TrivialUnwindBlocks.empty(); } @@ -4199,26 +4199,26 @@ bool SimplifyCFGOpt::simplifySingleResume(ResumeInst *RI) { "Resume must unwind the exception that caused control to here"); // Check that there are no other instructions except for debug intrinsics. - if (!isCleanupBlockEmpty( - make_range<Instruction *>(LPInst->getNextNode(), RI))) + if (!isCleanupBlockEmpty( + make_range<Instruction *>(LPInst->getNextNode(), RI))) return false; // Turn all invokes that unwind here into calls and delete the basic block. for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;) { BasicBlock *Pred = *PI++; - removeUnwindEdge(Pred, DTU); - ++NumInvokes; + removeUnwindEdge(Pred, DTU); + ++NumInvokes; } // The landingpad is now unreachable. Zap it. - if (DTU) - DTU->deleteBB(BB); - else - BB->eraseFromParent(); + if (DTU) + DTU->deleteBB(BB); + else + BB->eraseFromParent(); return true; } -static bool removeEmptyCleanup(CleanupReturnInst *RI, DomTreeUpdater *DTU) { +static bool removeEmptyCleanup(CleanupReturnInst *RI, DomTreeUpdater *DTU) { // If this is a trivial cleanup pad that executes no instructions, it can be // eliminated. If the cleanup pad continues to the caller, any predecessor // that is an EH pad will be updated to continue to the caller and any @@ -4239,8 +4239,8 @@ static bool removeEmptyCleanup(CleanupReturnInst *RI, DomTreeUpdater *DTU) { return false; // Check that there are no other instructions except for benign intrinsics. - if (!isCleanupBlockEmpty( - make_range<Instruction *>(CPInst->getNextNode(), RI))) + if (!isCleanupBlockEmpty( + make_range<Instruction *>(CPInst->getNextNode(), RI))) return false; // If the cleanup return we are simplifying unwinds to the caller, this will @@ -4325,32 +4325,32 @@ static bool removeEmptyCleanup(CleanupReturnInst *RI, DomTreeUpdater *DTU) { } } - std::vector<DominatorTree::UpdateType> Updates; - + std::vector<DominatorTree::UpdateType> Updates; + for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;) { // The iterator must be updated here because we are removing this pred. BasicBlock *PredBB = *PI++; if (UnwindDest == nullptr) { - if (DTU) - DTU->applyUpdates(Updates); - Updates.clear(); - removeUnwindEdge(PredBB, DTU); - ++NumInvokes; + if (DTU) + DTU->applyUpdates(Updates); + Updates.clear(); + removeUnwindEdge(PredBB, DTU); + ++NumInvokes; } else { Instruction *TI = PredBB->getTerminator(); TI->replaceUsesOfWith(BB, UnwindDest); - Updates.push_back({DominatorTree::Insert, PredBB, UnwindDest}); - Updates.push_back({DominatorTree::Delete, PredBB, BB}); + Updates.push_back({DominatorTree::Insert, PredBB, UnwindDest}); + Updates.push_back({DominatorTree::Delete, PredBB, BB}); } } - if (DTU) { - DTU->applyUpdates(Updates); - DTU->deleteBB(BB); - } else - // The cleanup pad is now unreachable. Zap it. - BB->eraseFromParent(); - + if (DTU) { + DTU->applyUpdates(Updates); + DTU->deleteBB(BB); + } else + // The cleanup pad is now unreachable. Zap it. + BB->eraseFromParent(); + return true; } @@ -4397,7 +4397,7 @@ bool SimplifyCFGOpt::simplifyCleanupReturn(CleanupReturnInst *RI) { if (mergeCleanupPad(RI)) return true; - if (removeEmptyCleanup(RI, DTU)) + if (removeEmptyCleanup(RI, DTU)) return true; return false; @@ -4428,16 +4428,16 @@ bool SimplifyCFGOpt::simplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) { BasicBlock *Pred = UncondBranchPreds.pop_back_val(); LLVM_DEBUG(dbgs() << "FOLDING: " << *BB << "INTO UNCOND BRANCH PRED: " << *Pred); - (void)FoldReturnIntoUncondBranch(RI, BB, Pred, DTU); + (void)FoldReturnIntoUncondBranch(RI, BB, Pred, DTU); } // If we eliminated all predecessors of the block, delete the block now. if (pred_empty(BB)) { // We know there are no successors, so just nuke the block. - if (DTU) - DTU->deleteBB(BB); - else - BB->eraseFromParent(); + if (DTU) + DTU->deleteBB(BB); + else + BB->eraseFromParent(); } return true; @@ -4517,26 +4517,26 @@ bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) { if (&BB->front() != UI) return Changed; - std::vector<DominatorTree::UpdateType> Updates; - - SmallSetVector<BasicBlock *, 8> Preds(pred_begin(BB), pred_end(BB)); + std::vector<DominatorTree::UpdateType> Updates; + + SmallSetVector<BasicBlock *, 8> Preds(pred_begin(BB), pred_end(BB)); for (unsigned i = 0, e = Preds.size(); i != e; ++i) { - auto *Predecessor = Preds[i]; - Instruction *TI = Predecessor->getTerminator(); + auto *Predecessor = Preds[i]; + Instruction *TI = Predecessor->getTerminator(); IRBuilder<> Builder(TI); if (auto *BI = dyn_cast<BranchInst>(TI)) { - // We could either have a proper unconditional branch, - // or a degenerate conditional branch with matching destinations. - if (all_of(BI->successors(), - [BB](auto *Successor) { return Successor == BB; })) { + // We could either have a proper unconditional branch, + // or a degenerate conditional branch with matching destinations. + if (all_of(BI->successors(), + [BB](auto *Successor) { return Successor == BB; })) { new UnreachableInst(TI->getContext(), TI); TI->eraseFromParent(); Changed = true; } else { - assert(BI->isConditional() && "Can't get here with an uncond branch."); + assert(BI->isConditional() && "Can't get here with an uncond branch."); Value* Cond = BI->getCondition(); - assert(BI->getSuccessor(0) != BI->getSuccessor(1) && - "The destinations are guaranteed to be different here."); + assert(BI->getSuccessor(0) != BI->getSuccessor(1) && + "The destinations are guaranteed to be different here."); if (BI->getSuccessor(0) == BB) { Builder.CreateAssumption(Builder.CreateNot(Cond)); Builder.CreateBr(BI->getSuccessor(1)); @@ -4548,7 +4548,7 @@ bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) { EraseTerminatorAndDCECond(BI); Changed = true; } - Updates.push_back({DominatorTree::Delete, Predecessor, BB}); + Updates.push_back({DominatorTree::Delete, Predecessor, BB}); } else if (auto *SI = dyn_cast<SwitchInst>(TI)) { SwitchInstProfUpdateWrapper SU(*SI); for (auto i = SU->case_begin(), e = SU->case_end(); i != e;) { @@ -4561,23 +4561,23 @@ bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) { e = SU->case_end(); Changed = true; } - // Note that the default destination can't be removed! - if (SI->getDefaultDest() != BB) - Updates.push_back({DominatorTree::Delete, Predecessor, BB}); + // Note that the default destination can't be removed! + if (SI->getDefaultDest() != BB) + Updates.push_back({DominatorTree::Delete, Predecessor, BB}); } else if (auto *II = dyn_cast<InvokeInst>(TI)) { if (II->getUnwindDest() == BB) { - if (DTU) - DTU->applyUpdates(Updates); - Updates.clear(); - removeUnwindEdge(TI->getParent(), DTU); + if (DTU) + DTU->applyUpdates(Updates); + Updates.clear(); + removeUnwindEdge(TI->getParent(), DTU); Changed = true; } } else if (auto *CSI = dyn_cast<CatchSwitchInst>(TI)) { if (CSI->getUnwindDest() == BB) { - if (DTU) - DTU->applyUpdates(Updates); - Updates.clear(); - removeUnwindEdge(TI->getParent(), DTU); + if (DTU) + DTU->applyUpdates(Updates); + Updates.clear(); + removeUnwindEdge(TI->getParent(), DTU); Changed = true; continue; } @@ -4592,53 +4592,53 @@ bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) { Changed = true; } } - Updates.push_back({DominatorTree::Delete, Predecessor, BB}); + Updates.push_back({DominatorTree::Delete, Predecessor, BB}); if (CSI->getNumHandlers() == 0) { if (CSI->hasUnwindDest()) { - // Redirect all predecessors of the block containing CatchSwitchInst - // to instead branch to the CatchSwitchInst's unwind destination. - for (auto *PredecessorOfPredecessor : predecessors(Predecessor)) { - Updates.push_back({DominatorTree::Insert, PredecessorOfPredecessor, - CSI->getUnwindDest()}); - Updates.push_back( - {DominatorTree::Delete, PredecessorOfPredecessor, Predecessor}); - } - Predecessor->replaceAllUsesWith(CSI->getUnwindDest()); + // Redirect all predecessors of the block containing CatchSwitchInst + // to instead branch to the CatchSwitchInst's unwind destination. + for (auto *PredecessorOfPredecessor : predecessors(Predecessor)) { + Updates.push_back({DominatorTree::Insert, PredecessorOfPredecessor, + CSI->getUnwindDest()}); + Updates.push_back( + {DominatorTree::Delete, PredecessorOfPredecessor, Predecessor}); + } + Predecessor->replaceAllUsesWith(CSI->getUnwindDest()); } else { // Rewrite all preds to unwind to caller (or from invoke to call). - if (DTU) - DTU->applyUpdates(Updates); - Updates.clear(); - SmallVector<BasicBlock *, 8> EHPreds(predecessors(Predecessor)); + if (DTU) + DTU->applyUpdates(Updates); + Updates.clear(); + SmallVector<BasicBlock *, 8> EHPreds(predecessors(Predecessor)); for (BasicBlock *EHPred : EHPreds) - removeUnwindEdge(EHPred, DTU); + removeUnwindEdge(EHPred, DTU); } // The catchswitch is no longer reachable. new UnreachableInst(CSI->getContext(), CSI); CSI->eraseFromParent(); Changed = true; } - } else if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) { - (void)CRI; - assert(CRI->hasUnwindDest() && CRI->getUnwindDest() == BB && - "Expected to always have an unwind to BB."); - Updates.push_back({DominatorTree::Delete, Predecessor, BB}); + } else if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) { + (void)CRI; + assert(CRI->hasUnwindDest() && CRI->getUnwindDest() == BB && + "Expected to always have an unwind to BB."); + Updates.push_back({DominatorTree::Delete, Predecessor, BB}); new UnreachableInst(TI->getContext(), TI); TI->eraseFromParent(); Changed = true; } } - if (DTU) - DTU->applyUpdates(Updates); - + if (DTU) + DTU->applyUpdates(Updates); + // If this block is now dead, remove it. if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) { // We know there are no successors, so just nuke the block. - if (DTU) - DTU->deleteBB(BB); - else - BB->eraseFromParent(); + if (DTU) + DTU->deleteBB(BB); + else + BB->eraseFromParent(); return true; } @@ -4656,26 +4656,26 @@ static bool CasesAreContiguous(SmallVectorImpl<ConstantInt *> &Cases) { return true; } -static void createUnreachableSwitchDefault(SwitchInst *Switch, - DomTreeUpdater *DTU) { +static void createUnreachableSwitchDefault(SwitchInst *Switch, + DomTreeUpdater *DTU) { LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n"); - auto *BB = Switch->getParent(); - BasicBlock *NewDefaultBlock = SplitBlockPredecessors( - Switch->getDefaultDest(), Switch->getParent(), "", DTU); - auto *OrigDefaultBlock = Switch->getDefaultDest(); + auto *BB = Switch->getParent(); + BasicBlock *NewDefaultBlock = SplitBlockPredecessors( + Switch->getDefaultDest(), Switch->getParent(), "", DTU); + auto *OrigDefaultBlock = Switch->getDefaultDest(); Switch->setDefaultDest(&*NewDefaultBlock); - if (DTU) - DTU->applyUpdates({{DominatorTree::Insert, BB, &*NewDefaultBlock}, - {DominatorTree::Delete, BB, OrigDefaultBlock}}); - SplitBlock(&*NewDefaultBlock, &NewDefaultBlock->front(), DTU); - SmallVector<DominatorTree::UpdateType, 2> Updates; - for (auto *Successor : successors(NewDefaultBlock)) - Updates.push_back({DominatorTree::Delete, NewDefaultBlock, Successor}); + if (DTU) + DTU->applyUpdates({{DominatorTree::Insert, BB, &*NewDefaultBlock}, + {DominatorTree::Delete, BB, OrigDefaultBlock}}); + SplitBlock(&*NewDefaultBlock, &NewDefaultBlock->front(), DTU); + SmallVector<DominatorTree::UpdateType, 2> Updates; + for (auto *Successor : successors(NewDefaultBlock)) + Updates.push_back({DominatorTree::Delete, NewDefaultBlock, Successor}); auto *NewTerminator = NewDefaultBlock->getTerminator(); new UnreachableInst(Switch->getContext(), NewTerminator); EraseTerminatorAndDCECond(NewTerminator); - if (DTU) - DTU->applyUpdates(Updates); + if (DTU) + DTU->applyUpdates(Updates); } /// Turn a switch with two reachable destinations into an integer range @@ -4687,8 +4687,8 @@ bool SimplifyCFGOpt::TurnSwitchRangeIntoICmp(SwitchInst *SI, bool HasDefault = !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg()); - auto *BB = SI->getParent(); - + auto *BB = SI->getParent(); + // Partition the cases into two sets with different destinations. BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr; BasicBlock *DestB = nullptr; @@ -4792,23 +4792,23 @@ bool SimplifyCFGOpt::TurnSwitchRangeIntoICmp(SwitchInst *SI, // Clean up the default block - it may have phis or other instructions before // the unreachable terminator. if (!HasDefault) - createUnreachableSwitchDefault(SI, DTU); + createUnreachableSwitchDefault(SI, DTU); + + auto *UnreachableDefault = SI->getDefaultDest(); - auto *UnreachableDefault = SI->getDefaultDest(); - // Drop the switch. SI->eraseFromParent(); - if (!HasDefault && DTU) - DTU->applyUpdates({{DominatorTree::Delete, BB, UnreachableDefault}}); - + if (!HasDefault && DTU) + DTU->applyUpdates({{DominatorTree::Delete, BB, UnreachableDefault}}); + return true; } /// Compute masked bits for the condition of a switch /// and use it to remove dead cases. -static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU, - AssumptionCache *AC, +static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU, + AssumptionCache *AC, const DataLayout &DL) { Value *Cond = SI->getCondition(); unsigned Bits = Cond->getType()->getIntegerBitWidth(); @@ -4822,15 +4822,15 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU, // Gather dead cases. SmallVector<ConstantInt *, 8> DeadCases; - SmallMapVector<BasicBlock *, int, 8> NumPerSuccessorCases; + SmallMapVector<BasicBlock *, int, 8> NumPerSuccessorCases; for (auto &Case : SI->cases()) { - auto *Successor = Case.getCaseSuccessor(); - ++NumPerSuccessorCases[Successor]; + auto *Successor = Case.getCaseSuccessor(); + ++NumPerSuccessorCases[Successor]; const APInt &CaseVal = Case.getCaseValue()->getValue(); if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) || (CaseVal.getMinSignedBits() > MaxSignificantBitsInCond)) { DeadCases.push_back(Case.getCaseValue()); - --NumPerSuccessorCases[Successor]; + --NumPerSuccessorCases[Successor]; LLVM_DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal << " is dead.\n"); } @@ -4848,7 +4848,7 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU, if (HasDefault && DeadCases.empty() && NumUnknownBits < 64 /* avoid overflow */ && SI->getNumCases() == (1ULL << NumUnknownBits)) { - createUnreachableSwitchDefault(SI, DTU); + createUnreachableSwitchDefault(SI, DTU); return true; } @@ -4865,13 +4865,13 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU, SIW.removeCase(CaseI); } - std::vector<DominatorTree::UpdateType> Updates; - for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases) - if (I.second == 0) - Updates.push_back({DominatorTree::Delete, SI->getParent(), I.first}); - if (DTU) - DTU->applyUpdates(Updates); - + std::vector<DominatorTree::UpdateType> Updates; + for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases) + if (I.second == 0) + Updates.push_back({DominatorTree::Delete, SI->getParent(), I.first}); + if (DTU) + DTU->applyUpdates(Updates); + return true; } @@ -5227,19 +5227,19 @@ static Value *ConvertTwoCaseSwitch(const SwitchCaseResultVectorTy &ResultVector, // a select, fixing up PHI nodes and basic blocks. static void RemoveSwitchAfterSelectConversion(SwitchInst *SI, PHINode *PHI, Value *SelectValue, - IRBuilder<> &Builder, - DomTreeUpdater *DTU) { - std::vector<DominatorTree::UpdateType> Updates; - + IRBuilder<> &Builder, + DomTreeUpdater *DTU) { + std::vector<DominatorTree::UpdateType> Updates; + BasicBlock *SelectBB = SI->getParent(); - BasicBlock *DestBB = PHI->getParent(); - - if (!is_contained(predecessors(DestBB), SelectBB)) - Updates.push_back({DominatorTree::Insert, SelectBB, DestBB}); - Builder.CreateBr(DestBB); - - // Remove the switch. - + BasicBlock *DestBB = PHI->getParent(); + + if (!is_contained(predecessors(DestBB), SelectBB)) + Updates.push_back({DominatorTree::Insert, SelectBB, DestBB}); + Builder.CreateBr(DestBB); + + // Remove the switch. + while (PHI->getBasicBlockIndex(SelectBB) >= 0) PHI->removeIncomingValue(SelectBB); PHI->addIncoming(SelectValue, SelectBB); @@ -5247,21 +5247,21 @@ static void RemoveSwitchAfterSelectConversion(SwitchInst *SI, PHINode *PHI, for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) { BasicBlock *Succ = SI->getSuccessor(i); - if (Succ == DestBB) + if (Succ == DestBB) continue; Succ->removePredecessor(SelectBB); - Updates.push_back({DominatorTree::Delete, SelectBB, Succ}); + Updates.push_back({DominatorTree::Delete, SelectBB, Succ}); } SI->eraseFromParent(); - if (DTU) - DTU->applyUpdates(Updates); + if (DTU) + DTU->applyUpdates(Updates); } /// If the switch is only used to initialize one or more /// phi nodes in a common successor block with only two different /// constant values, replace the switch with select. static bool switchToSelect(SwitchInst *SI, IRBuilder<> &Builder, - DomTreeUpdater *DTU, const DataLayout &DL, + DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) { Value *const Cond = SI->getCondition(); PHINode *PHI = nullptr; @@ -5281,7 +5281,7 @@ static bool switchToSelect(SwitchInst *SI, IRBuilder<> &Builder, Value *SelectValue = ConvertTwoCaseSwitch(UniqueResults, DefaultResult, Cond, Builder); if (SelectValue) { - RemoveSwitchAfterSelectConversion(SI, PHI, SelectValue, Builder, DTU); + RemoveSwitchAfterSelectConversion(SI, PHI, SelectValue, Builder, DTU); return true; } // The switch couldn't be converted into a select. @@ -5666,12 +5666,12 @@ static void reuseTableCompare( /// successor block with different constant values, replace the switch with /// lookup tables. static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, - DomTreeUpdater *DTU, const DataLayout &DL, + DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) { assert(SI->getNumCases() > 1 && "Degenerate switch?"); - BasicBlock *BB = SI->getParent(); - Function *Fn = BB->getParent(); + BasicBlock *BB = SI->getParent(); + Function *Fn = BB->getParent(); // Only build lookup table when we have a target that supports it or the // attribute is not set. if (!TTI.shouldBuildLookupTables() || @@ -5765,8 +5765,8 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, if (!ShouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes)) return false; - std::vector<DominatorTree::UpdateType> Updates; - + std::vector<DominatorTree::UpdateType> Updates; + // Create the BB that does the lookups. Module &Mod = *CommonDest->getParent()->getParent(); BasicBlock *LookupBB = BasicBlock::Create( @@ -5799,7 +5799,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, if (!DefaultIsReachable || GeneratingCoveredLookupTable) { Builder.CreateBr(LookupBB); - Updates.push_back({DominatorTree::Insert, BB, LookupBB}); + Updates.push_back({DominatorTree::Insert, BB, LookupBB}); // Note: We call removeProdecessor later since we need to be able to get the // PHI value for the default case in case we're using a bit mask. } else { @@ -5807,7 +5807,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, TableIndex, ConstantInt::get(MinCaseVal->getType(), TableSize)); RangeCheckBranch = Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest()); - Updates.push_back({DominatorTree::Insert, BB, LookupBB}); + Updates.push_back({DominatorTree::Insert, BB, LookupBB}); } // Populate the BB that does the lookups. @@ -5845,18 +5845,18 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, Value *LoBit = Builder.CreateTrunc( Shifted, Type::getInt1Ty(Mod.getContext()), "switch.lobit"); Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest()); - Updates.push_back({DominatorTree::Insert, MaskBB, LookupBB}); - Updates.push_back({DominatorTree::Insert, MaskBB, SI->getDefaultDest()}); + Updates.push_back({DominatorTree::Insert, MaskBB, LookupBB}); + Updates.push_back({DominatorTree::Insert, MaskBB, SI->getDefaultDest()}); Builder.SetInsertPoint(LookupBB); - AddPredecessorToBlock(SI->getDefaultDest(), MaskBB, BB); + AddPredecessorToBlock(SI->getDefaultDest(), MaskBB, BB); } if (!DefaultIsReachable || GeneratingCoveredLookupTable) { // We cached PHINodes in PHIs. To avoid accessing deleted PHINodes later, // do not delete PHINodes here. - SI->getDefaultDest()->removePredecessor(BB, + SI->getDefaultDest()->removePredecessor(BB, /*KeepOneInputPHIs=*/true); - Updates.push_back({DominatorTree::Delete, BB, SI->getDefaultDest()}); + Updates.push_back({DominatorTree::Delete, BB, SI->getDefaultDest()}); } bool ReturnedEarly = false; @@ -5893,29 +5893,29 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, PHI->addIncoming(Result, LookupBB); } - if (!ReturnedEarly) { + if (!ReturnedEarly) { Builder.CreateBr(CommonDest); - Updates.push_back({DominatorTree::Insert, LookupBB, CommonDest}); - } + Updates.push_back({DominatorTree::Insert, LookupBB, CommonDest}); + } // Remove the switch. - SmallSetVector<BasicBlock *, 8> RemovedSuccessors; + SmallSetVector<BasicBlock *, 8> RemovedSuccessors; for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) { BasicBlock *Succ = SI->getSuccessor(i); if (Succ == SI->getDefaultDest()) continue; - Succ->removePredecessor(BB); - RemovedSuccessors.insert(Succ); + Succ->removePredecessor(BB); + RemovedSuccessors.insert(Succ); } SI->eraseFromParent(); - if (DTU) { - for (BasicBlock *RemovedSuccessor : RemovedSuccessors) - Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor}); - DTU->applyUpdates(Updates); - } - + if (DTU) { + for (BasicBlock *RemovedSuccessor : RemovedSuccessors) + Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor}); + DTU->applyUpdates(Updates); + } + ++NumLookupTables; if (NeedMask) ++NumLookupTablesHoles; @@ -6051,10 +6051,10 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { return requestResimplify(); // Remove unreachable cases. - if (eliminateDeadSwitchCases(SI, DTU, Options.AC, DL)) + if (eliminateDeadSwitchCases(SI, DTU, Options.AC, DL)) return requestResimplify(); - if (switchToSelect(SI, Builder, DTU, DL, TTI)) + if (switchToSelect(SI, Builder, DTU, DL, TTI)) return requestResimplify(); if (Options.ForwardSwitchCondToPhi && ForwardSwitchConditionToPHI(SI)) @@ -6066,7 +6066,7 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { // CVP. Therefore, only apply this transformation during late stages of the // optimisation pipeline. if (Options.ConvertSwitchToLookupTable && - SwitchToLookupTable(SI, Builder, DTU, DL, TTI)) + SwitchToLookupTable(SI, Builder, DTU, DL, TTI)) return requestResimplify(); if (ReduceSwitchRange(SI, Builder, DL, TTI)) @@ -6081,12 +6081,12 @@ bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) { // Eliminate redundant destinations. SmallPtrSet<Value *, 8> Succs; - SmallSetVector<BasicBlock *, 8> RemovedSuccs; + SmallSetVector<BasicBlock *, 8> RemovedSuccs; for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) { BasicBlock *Dest = IBI->getDestination(i); if (!Dest->hasAddressTaken() || !Succs.insert(Dest).second) { - if (!Dest->hasAddressTaken()) - RemovedSuccs.insert(Dest); + if (!Dest->hasAddressTaken()) + RemovedSuccs.insert(Dest); Dest->removePredecessor(BB); IBI->removeDestination(i); --i; @@ -6095,14 +6095,14 @@ bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) { } } - if (DTU) { - std::vector<DominatorTree::UpdateType> Updates; - Updates.reserve(RemovedSuccs.size()); - for (auto *RemovedSucc : RemovedSuccs) - Updates.push_back({DominatorTree::Delete, BB, RemovedSucc}); - DTU->applyUpdates(Updates); - } - + if (DTU) { + std::vector<DominatorTree::UpdateType> Updates; + Updates.reserve(RemovedSuccs.size()); + for (auto *RemovedSucc : RemovedSuccs) + Updates.push_back({DominatorTree::Delete, BB, RemovedSucc}); + DTU->applyUpdates(Updates); + } + if (IBI->getNumDestinations() == 0) { // If the indirectbr has no successors, change it to unreachable. new UnreachableInst(IBI->getContext(), IBI); @@ -6146,7 +6146,7 @@ bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) { /// block when the inputs in the phi are the same for the two blocks being /// merged. In some cases, this could result in removal of the PHI entirely. static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, - BasicBlock *BB, DomTreeUpdater *DTU) { + BasicBlock *BB, DomTreeUpdater *DTU) { auto Succ = BB->getUniqueSuccessor(); assert(Succ); // If there's a phi in the successor block, we'd likely have to introduce @@ -6167,8 +6167,8 @@ static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, if (!BI2 || !BI2->isIdenticalTo(BI)) continue; - std::vector<DominatorTree::UpdateType> Updates; - + std::vector<DominatorTree::UpdateType> Updates; + // We've found an identical block. Update our predecessors to take that // path instead and make ourselves dead. SmallPtrSet<BasicBlock *, 16> Preds; @@ -6178,8 +6178,8 @@ static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, assert(II->getNormalDest() != BB && II->getUnwindDest() == BB && "unexpected successor"); II->setUnwindDest(OtherPred); - Updates.push_back({DominatorTree::Insert, Pred, OtherPred}); - Updates.push_back({DominatorTree::Delete, Pred, BB}); + Updates.push_back({DominatorTree::Insert, Pred, OtherPred}); + Updates.push_back({DominatorTree::Delete, Pred, BB}); } // The debug info in OtherPred doesn't cover the merged control flow that @@ -6195,14 +6195,14 @@ static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, Succs.insert(succ_begin(BB), succ_end(BB)); for (BasicBlock *Succ : Succs) { Succ->removePredecessor(BB); - Updates.push_back({DominatorTree::Delete, BB, Succ}); + Updates.push_back({DominatorTree::Delete, BB, Succ}); } IRBuilder<> Builder(BI); Builder.CreateUnreachable(); BI->eraseFromParent(); - if (DTU) - DTU->applyUpdates(Updates); + if (DTU) + DTU->applyUpdates(Updates); return true; } return false; @@ -6227,11 +6227,11 @@ bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI, // backedge, so we can eliminate BB. bool NeedCanonicalLoop = Options.NeedCanonicalLoop && - (!LoopHeaders.empty() && BB->hasNPredecessorsOrMore(2) && - (is_contained(LoopHeaders, BB) || is_contained(LoopHeaders, Succ))); + (!LoopHeaders.empty() && BB->hasNPredecessorsOrMore(2) && + (is_contained(LoopHeaders, BB) || is_contained(LoopHeaders, Succ))); BasicBlock::iterator I = BB->getFirstNonPHIOrDbg()->getIterator(); if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() && - !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB, DTU)) + !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB, DTU)) return true; // If the only instruction in the block is a seteq/setne comparison against a @@ -6250,7 +6250,7 @@ bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI, if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(I)) { for (++I; isa<DbgInfoIntrinsic>(I); ++I) ; - if (I->isTerminator() && TryToMergeLandingPad(LPad, BI, BB, DTU)) + if (I->isTerminator() && TryToMergeLandingPad(LPad, BI, BB, DTU)) return true; } @@ -6258,8 +6258,8 @@ bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI, // branches to us and our successor, fold the comparison into the // predecessor and use logical operations to update the incoming value // for PHI nodes in common successor. - if (FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI, - Options.BonusInstThreshold)) + if (FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI, + Options.BonusInstThreshold)) return requestResimplify(); return false; } @@ -6322,8 +6322,8 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { // If this basic block is ONLY a compare and a branch, and if a predecessor // branches to us and one of our successors, fold the comparison into the // predecessor and use logical operations to pick the right destination. - if (FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI, - Options.BonusInstThreshold)) + if (FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI, + Options.BonusInstThreshold)) return requestResimplify(); // We have a conditional branch to two blocks that are only reachable @@ -6332,9 +6332,9 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { // can hoist it up to the branching block. if (BI->getSuccessor(0)->getSinglePredecessor()) { if (BI->getSuccessor(1)->getSinglePredecessor()) { - if (HoistCommon && Options.HoistCommonInsts) - if (HoistThenElseCodeToIf(BI, TTI)) - return requestResimplify(); + if (HoistCommon && Options.HoistCommonInsts) + if (HoistThenElseCodeToIf(BI, TTI)) + return requestResimplify(); } else { // If Successor #1 has multiple preds, we may be able to conditionally // execute Successor #0 if it branches to Successor #1. @@ -6358,14 +6358,14 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { // through this block if any PHI node entries are constants. if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition())) if (PN->getParent() == BI->getParent()) - if (FoldCondBranchOnPHI(BI, DTU, DL, Options.AC)) + if (FoldCondBranchOnPHI(BI, DTU, DL, Options.AC)) return requestResimplify(); // Scan predecessor blocks for conditional branches. for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator())) if (PBI != BI && PBI->isConditional()) - if (SimplifyCondBranchToCondBranch(PBI, BI, DTU, DL, TTI)) + if (SimplifyCondBranchToCondBranch(PBI, BI, DTU, DL, TTI)) return requestResimplify(); // Look for diamond patterns. @@ -6373,14 +6373,14 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { if (BasicBlock *PrevBB = allPredecessorsComeFromSameSource(BB)) if (BranchInst *PBI = dyn_cast<BranchInst>(PrevBB->getTerminator())) if (PBI != BI && PBI->isConditional()) - if (mergeConditionalStores(PBI, BI, DTU, DL, TTI)) + if (mergeConditionalStores(PBI, BI, DTU, DL, TTI)) return requestResimplify(); return false; } /// Check if passing a value to an instruction will cause undefined behavior. -static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified) { +static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified) { Constant *C = dyn_cast<Constant>(V); if (!C) return false; @@ -6403,15 +6403,15 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValu // Look through GEPs. A load from a GEP derived from NULL is still undefined if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Use)) - if (GEP->getPointerOperand() == I) { - if (!GEP->isInBounds() || !GEP->hasAllZeroIndices()) - PtrValueMayBeModified = true; - return passingValueIsAlwaysUndefined(V, GEP, PtrValueMayBeModified); - } + if (GEP->getPointerOperand() == I) { + if (!GEP->isInBounds() || !GEP->hasAllZeroIndices()) + PtrValueMayBeModified = true; + return passingValueIsAlwaysUndefined(V, GEP, PtrValueMayBeModified); + } // Look through bitcasts. if (BitCastInst *BC = dyn_cast<BitCastInst>(Use)) - return passingValueIsAlwaysUndefined(V, BC, PtrValueMayBeModified); + return passingValueIsAlwaysUndefined(V, BC, PtrValueMayBeModified); // Load from null is undefined. if (LoadInst *LI = dyn_cast<LoadInst>(Use)) @@ -6426,51 +6426,51 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValu SI->getPointerAddressSpace())) && SI->getPointerOperand() == I; - if (auto *CB = dyn_cast<CallBase>(Use)) { - if (C->isNullValue() && NullPointerIsDefined(CB->getFunction())) - return false; - // A call to null is undefined. - if (CB->getCalledOperand() == I) - return true; - - if (C->isNullValue()) { - for (const llvm::Use &Arg : CB->args()) - if (Arg == I) { - unsigned ArgIdx = CB->getArgOperandNo(&Arg); - if (CB->paramHasAttr(ArgIdx, Attribute::NonNull) && - CB->paramHasAttr(ArgIdx, Attribute::NoUndef)) { - // Passing null to a nonnnull+noundef argument is undefined. - return !PtrValueMayBeModified; - } - } - } else if (isa<UndefValue>(C)) { - // Passing undef to a noundef argument is undefined. - for (const llvm::Use &Arg : CB->args()) - if (Arg == I) { - unsigned ArgIdx = CB->getArgOperandNo(&Arg); - if (CB->paramHasAttr(ArgIdx, Attribute::NoUndef)) { - // Passing undef to a noundef argument is undefined. - return true; - } - } - } - } + if (auto *CB = dyn_cast<CallBase>(Use)) { + if (C->isNullValue() && NullPointerIsDefined(CB->getFunction())) + return false; + // A call to null is undefined. + if (CB->getCalledOperand() == I) + return true; + + if (C->isNullValue()) { + for (const llvm::Use &Arg : CB->args()) + if (Arg == I) { + unsigned ArgIdx = CB->getArgOperandNo(&Arg); + if (CB->paramHasAttr(ArgIdx, Attribute::NonNull) && + CB->paramHasAttr(ArgIdx, Attribute::NoUndef)) { + // Passing null to a nonnnull+noundef argument is undefined. + return !PtrValueMayBeModified; + } + } + } else if (isa<UndefValue>(C)) { + // Passing undef to a noundef argument is undefined. + for (const llvm::Use &Arg : CB->args()) + if (Arg == I) { + unsigned ArgIdx = CB->getArgOperandNo(&Arg); + if (CB->paramHasAttr(ArgIdx, Attribute::NoUndef)) { + // Passing undef to a noundef argument is undefined. + return true; + } + } + } + } } return false; } /// If BB has an incoming value that will always trigger undefined behavior /// (eg. null pointer dereference), remove the branch leading here. -static bool removeUndefIntroducingPredecessor(BasicBlock *BB, - DomTreeUpdater *DTU) { +static bool removeUndefIntroducingPredecessor(BasicBlock *BB, + DomTreeUpdater *DTU) { for (PHINode &PHI : BB->phis()) for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i) if (passingValueIsAlwaysUndefined(PHI.getIncomingValue(i), &PHI)) { - BasicBlock *Predecessor = PHI.getIncomingBlock(i); - Instruction *T = Predecessor->getTerminator(); + BasicBlock *Predecessor = PHI.getIncomingBlock(i); + Instruction *T = Predecessor->getTerminator(); IRBuilder<> Builder(T); if (BranchInst *BI = dyn_cast<BranchInst>(T)) { - BB->removePredecessor(Predecessor); + BB->removePredecessor(Predecessor); // Turn uncoditional branches into unreachables and remove the dead // destination from conditional branches. if (BI->isUnconditional()) @@ -6479,8 +6479,8 @@ static bool removeUndefIntroducingPredecessor(BasicBlock *BB, Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1) : BI->getSuccessor(0)); BI->eraseFromParent(); - if (DTU) - DTU->applyUpdates({{DominatorTree::Delete, Predecessor, BB}}); + if (DTU) + DTU->applyUpdates({{DominatorTree::Delete, Predecessor, BB}}); return true; } // TODO: SwitchInst. @@ -6489,7 +6489,7 @@ static bool removeUndefIntroducingPredecessor(BasicBlock *BB, return false; } -bool SimplifyCFGOpt::simplifyOnceImpl(BasicBlock *BB) { +bool SimplifyCFGOpt::simplifyOnceImpl(BasicBlock *BB) { bool Changed = false; assert(BB && BB->getParent() && "Block not embedded in function!"); @@ -6500,29 +6500,29 @@ bool SimplifyCFGOpt::simplifyOnceImpl(BasicBlock *BB) { if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) || BB->getSinglePredecessor() == BB) { LLVM_DEBUG(dbgs() << "Removing BB: \n" << *BB); - DeleteDeadBlock(BB, DTU); + DeleteDeadBlock(BB, DTU); return true; } // Check to see if we can constant propagate this terminator instruction // away... - Changed |= ConstantFoldTerminator(BB, /*DeleteDeadConditions=*/true, - /*TLI=*/nullptr, DTU); + Changed |= ConstantFoldTerminator(BB, /*DeleteDeadConditions=*/true, + /*TLI=*/nullptr, DTU); // Check for and eliminate duplicate PHI nodes in this block. Changed |= EliminateDuplicatePHINodes(BB); // Check for and remove branches that will always cause undefined behavior. - Changed |= removeUndefIntroducingPredecessor(BB, DTU); + Changed |= removeUndefIntroducingPredecessor(BB, DTU); // Merge basic blocks into their predecessor if there is only one distinct // pred, and if there is only one distinct successor of the predecessor, and // if there are no PHI nodes. - if (MergeBlockIntoPredecessor(BB, DTU)) + if (MergeBlockIntoPredecessor(BB, DTU)) return true; if (SinkCommon && Options.SinkCommonInsts) - Changed |= SinkCommonCodeFromPredecessors(BB, DTU); + Changed |= SinkCommonCodeFromPredecessors(BB, DTU); IRBuilder<> Builder(BB); @@ -6531,7 +6531,7 @@ bool SimplifyCFGOpt::simplifyOnceImpl(BasicBlock *BB) { // eliminate it, do so now. if (auto *PN = dyn_cast<PHINode>(BB->begin())) if (PN->getNumIncomingValues() == 2) - Changed |= FoldTwoEntryPHINode(PN, TTI, DTU, DL); + Changed |= FoldTwoEntryPHINode(PN, TTI, DTU, DL); } Instruction *Terminator = BB->getTerminator(); @@ -6563,23 +6563,23 @@ bool SimplifyCFGOpt::simplifyOnceImpl(BasicBlock *BB) { return Changed; } -bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) { - bool Changed = simplifyOnceImpl(BB); - - assert((!RequireAndPreserveDomTree || - (DTU && - DTU->getDomTree().verify(DominatorTree::VerificationLevel::Full))) && - "Failed to maintain validity of domtree!"); - - return Changed; -} - +bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) { + bool Changed = simplifyOnceImpl(BB); + + assert((!RequireAndPreserveDomTree || + (DTU && + DTU->getDomTree().verify(DominatorTree::VerificationLevel::Full))) && + "Failed to maintain validity of domtree!"); + + return Changed; +} + bool SimplifyCFGOpt::run(BasicBlock *BB) { - assert((!RequireAndPreserveDomTree || - (DTU && - DTU->getDomTree().verify(DominatorTree::VerificationLevel::Full))) && - "Original domtree is invalid?"); - + assert((!RequireAndPreserveDomTree || + (DTU && + DTU->getDomTree().verify(DominatorTree::VerificationLevel::Full))) && + "Original domtree is invalid?"); + bool Changed = false; // Repeated simplify BB as long as resimplification is requested. @@ -6595,9 +6595,9 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { } bool llvm::simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, - DomTreeUpdater *DTU, const SimplifyCFGOptions &Options, - ArrayRef<WeakVH> LoopHeaders) { - return SimplifyCFGOpt(TTI, RequireAndPreserveDomTree ? DTU : nullptr, - BB->getModule()->getDataLayout(), LoopHeaders, Options) + DomTreeUpdater *DTU, const SimplifyCFGOptions &Options, + ArrayRef<WeakVH> LoopHeaders) { + return SimplifyCFGOpt(TTI, RequireAndPreserveDomTree ? DTU : nullptr, + BB->getModule()->getDataLayout(), LoopHeaders, Options) .run(BB); } diff --git a/contrib/libs/llvm12/lib/Transforms/Utils/SimplifyIndVar.cpp b/contrib/libs/llvm12/lib/Transforms/Utils/SimplifyIndVar.cpp index 40a1e09818..290c04a7ad 100644 --- a/contrib/libs/llvm12/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -194,12 +194,12 @@ bool SimplifyIndvar::makeIVComparisonInvariant(ICmpInst *ICmp, auto *PN = dyn_cast<PHINode>(IVOperand); if (!PN) return false; - auto LIP = SE->getLoopInvariantPredicate(Pred, S, X, L); - if (!LIP) + auto LIP = SE->getLoopInvariantPredicate(Pred, S, X, L); + if (!LIP) return false; - ICmpInst::Predicate InvariantPredicate = LIP->Pred; - const SCEV *InvariantLHS = LIP->LHS; - const SCEV *InvariantRHS = LIP->RHS; + ICmpInst::Predicate InvariantPredicate = LIP->Pred; + const SCEV *InvariantLHS = LIP->LHS; + const SCEV *InvariantRHS = LIP->RHS; // Rewrite the comparison to a loop invariant comparison if it can be done // cheaply, where cheaply means "we don't need to emit any new @@ -477,7 +477,7 @@ bool SimplifyIndvar::eliminateOverflowIntrinsic(WithOverflowInst *WO) { if (WO->use_empty()) WO->eraseFromParent(); - Changed = true; + Changed = true; return true; } @@ -968,1122 +968,1122 @@ bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, DominatorTree *DT, } } // namespace llvm - -//===----------------------------------------------------------------------===// -// Widen Induction Variables - Extend the width of an IV to cover its -// widest uses. -//===----------------------------------------------------------------------===// - -class WidenIV { - // Parameters - PHINode *OrigPhi; - Type *WideType; - - // Context - LoopInfo *LI; - Loop *L; - ScalarEvolution *SE; - DominatorTree *DT; - - // Does the module have any calls to the llvm.experimental.guard intrinsic - // at all? If not we can avoid scanning instructions looking for guards. - bool HasGuards; - - bool UsePostIncrementRanges; - - // Statistics - unsigned NumElimExt = 0; - unsigned NumWidened = 0; - - // Result - PHINode *WidePhi = nullptr; - Instruction *WideInc = nullptr; - const SCEV *WideIncExpr = nullptr; - SmallVectorImpl<WeakTrackingVH> &DeadInsts; - - SmallPtrSet<Instruction *,16> Widened; - - enum ExtendKind { ZeroExtended, SignExtended, Unknown }; - - // A map tracking the kind of extension used to widen each narrow IV - // and narrow IV user. - // Key: pointer to a narrow IV or IV user. - // Value: the kind of extension used to widen this Instruction. - DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap; - - using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>; - - // A map with control-dependent ranges for post increment IV uses. The key is - // a pair of IV def and a use of this def denoting the context. The value is - // a ConstantRange representing possible values of the def at the given - // context. - DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos; - - Optional<ConstantRange> getPostIncRangeInfo(Value *Def, - Instruction *UseI) { - DefUserPair Key(Def, UseI); - auto It = PostIncRangeInfos.find(Key); - return It == PostIncRangeInfos.end() - ? Optional<ConstantRange>(None) - : Optional<ConstantRange>(It->second); - } - - void calculatePostIncRanges(PHINode *OrigPhi); - void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser); - - void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) { - DefUserPair Key(Def, UseI); - auto It = PostIncRangeInfos.find(Key); - if (It == PostIncRangeInfos.end()) - PostIncRangeInfos.insert({Key, R}); - else - It->second = R.intersectWith(It->second); - } - -public: - /// Record a link in the Narrow IV def-use chain along with the WideIV that - /// computes the same value as the Narrow IV def. This avoids caching Use* - /// pointers. - struct NarrowIVDefUse { - Instruction *NarrowDef = nullptr; - Instruction *NarrowUse = nullptr; - Instruction *WideDef = nullptr; - - // True if the narrow def is never negative. Tracking this information lets - // us use a sign extension instead of a zero extension or vice versa, when - // profitable and legal. - bool NeverNegative = false; - - NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD, - bool NeverNegative) - : NarrowDef(ND), NarrowUse(NU), WideDef(WD), - NeverNegative(NeverNegative) {} - }; - - WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv, - DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI, - bool HasGuards, bool UsePostIncrementRanges = true); - - PHINode *createWideIV(SCEVExpander &Rewriter); - - unsigned getNumElimExt() { return NumElimExt; }; - unsigned getNumWidened() { return NumWidened; }; - -protected: - Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned, - Instruction *Use); - - Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR); - Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU, - const SCEVAddRecExpr *WideAR); - Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU); - - ExtendKind getExtendKind(Instruction *I); - - using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>; - - WidenedRecTy getWideRecurrence(NarrowIVDefUse DU); - - WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU); - - const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, - unsigned OpCode) const; - - Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter); - - bool widenLoopCompare(NarrowIVDefUse DU); - bool widenWithVariantUse(NarrowIVDefUse DU); - - void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef); - -private: - SmallVector<NarrowIVDefUse, 8> NarrowIVUsers; -}; - - -/// Determine the insertion point for this user. By default, insert immediately -/// before the user. SCEVExpander or LICM will hoist loop invariants out of the -/// loop. For PHI nodes, there may be multiple uses, so compute the nearest -/// common dominator for the incoming blocks. A nullptr can be returned if no -/// viable location is found: it may happen if User is a PHI and Def only comes -/// to this PHI from unreachable blocks. -static Instruction *getInsertPointForUses(Instruction *User, Value *Def, - DominatorTree *DT, LoopInfo *LI) { - PHINode *PHI = dyn_cast<PHINode>(User); - if (!PHI) - return User; - - Instruction *InsertPt = nullptr; - for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) { - if (PHI->getIncomingValue(i) != Def) - continue; - - BasicBlock *InsertBB = PHI->getIncomingBlock(i); - - if (!DT->isReachableFromEntry(InsertBB)) - continue; - - if (!InsertPt) { - InsertPt = InsertBB->getTerminator(); - continue; - } - InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB); - InsertPt = InsertBB->getTerminator(); - } - - // If we have skipped all inputs, it means that Def only comes to Phi from - // unreachable blocks. - if (!InsertPt) - return nullptr; - - auto *DefI = dyn_cast<Instruction>(Def); - if (!DefI) - return InsertPt; - - assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses"); - - auto *L = LI->getLoopFor(DefI->getParent()); - assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent()))); - - for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom()) - if (LI->getLoopFor(DTN->getBlock()) == L) - return DTN->getBlock()->getTerminator(); - - llvm_unreachable("DefI dominates InsertPt!"); -} - -WidenIV::WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv, - DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI, - bool HasGuards, bool UsePostIncrementRanges) - : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo), - L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree), - HasGuards(HasGuards), UsePostIncrementRanges(UsePostIncrementRanges), - DeadInsts(DI) { - assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV"); - ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended; -} - -Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType, - bool IsSigned, Instruction *Use) { - // Set the debug location and conservative insertion point. - IRBuilder<> Builder(Use); - // Hoist the insertion point into loop preheaders as far as possible. - for (const Loop *L = LI->getLoopFor(Use->getParent()); - L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper); - L = L->getParentLoop()) - Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator()); - - return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) : - Builder.CreateZExt(NarrowOper, WideType); -} - -/// Instantiate a wide operation to replace a narrow operation. This only needs -/// to handle operations that can evaluation to SCEVAddRec. It can safely return -/// 0 for any operation we decide not to clone. -Instruction *WidenIV::cloneIVUser(WidenIV::NarrowIVDefUse DU, - const SCEVAddRecExpr *WideAR) { - unsigned Opcode = DU.NarrowUse->getOpcode(); - switch (Opcode) { - default: - return nullptr; - case Instruction::Add: - case Instruction::Mul: - case Instruction::UDiv: - case Instruction::Sub: - return cloneArithmeticIVUser(DU, WideAR); - - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: - case Instruction::Shl: - case Instruction::LShr: - case Instruction::AShr: - return cloneBitwiseIVUser(DU); - } -} - -Instruction *WidenIV::cloneBitwiseIVUser(WidenIV::NarrowIVDefUse DU) { - Instruction *NarrowUse = DU.NarrowUse; - Instruction *NarrowDef = DU.NarrowDef; - Instruction *WideDef = DU.WideDef; - - LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n"); - - // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything - // about the narrow operand yet so must insert a [sz]ext. It is probably loop - // invariant and will be folded or hoisted. If it actually comes from a - // widened IV, it should be removed during a future call to widenIVUse. - bool IsSigned = getExtendKind(NarrowDef) == SignExtended; - Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) - ? WideDef - : createExtendInst(NarrowUse->getOperand(0), WideType, - IsSigned, NarrowUse); - Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) - ? WideDef - : createExtendInst(NarrowUse->getOperand(1), WideType, - IsSigned, NarrowUse); - - auto *NarrowBO = cast<BinaryOperator>(NarrowUse); - auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, - NarrowBO->getName()); - IRBuilder<> Builder(NarrowUse); - Builder.Insert(WideBO); - WideBO->copyIRFlags(NarrowBO); - return WideBO; -} - -Instruction *WidenIV::cloneArithmeticIVUser(WidenIV::NarrowIVDefUse DU, - const SCEVAddRecExpr *WideAR) { - Instruction *NarrowUse = DU.NarrowUse; - Instruction *NarrowDef = DU.NarrowDef; - Instruction *WideDef = DU.WideDef; - - LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n"); - - unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1; - - // We're trying to find X such that - // - // Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X - // - // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef), - // and check using SCEV if any of them are correct. - - // Returns true if extending NonIVNarrowDef according to `SignExt` is a - // correct solution to X. - auto GuessNonIVOperand = [&](bool SignExt) { - const SCEV *WideLHS; - const SCEV *WideRHS; - - auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) { - if (SignExt) - return SE->getSignExtendExpr(S, Ty); - return SE->getZeroExtendExpr(S, Ty); - }; - - if (IVOpIdx == 0) { - WideLHS = SE->getSCEV(WideDef); - const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1)); - WideRHS = GetExtend(NarrowRHS, WideType); - } else { - const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0)); - WideLHS = GetExtend(NarrowLHS, WideType); - WideRHS = SE->getSCEV(WideDef); - } - - // WideUse is "WideDef `op.wide` X" as described in the comment. - const SCEV *WideUse = - getSCEVByOpCode(WideLHS, WideRHS, NarrowUse->getOpcode()); - - return WideUse == WideAR; - }; - - bool SignExtend = getExtendKind(NarrowDef) == SignExtended; - if (!GuessNonIVOperand(SignExtend)) { - SignExtend = !SignExtend; - if (!GuessNonIVOperand(SignExtend)) - return nullptr; - } - - Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) - ? WideDef - : createExtendInst(NarrowUse->getOperand(0), WideType, - SignExtend, NarrowUse); - Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) - ? WideDef - : createExtendInst(NarrowUse->getOperand(1), WideType, - SignExtend, NarrowUse); - - auto *NarrowBO = cast<BinaryOperator>(NarrowUse); - auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, - NarrowBO->getName()); - - IRBuilder<> Builder(NarrowUse); - Builder.Insert(WideBO); - WideBO->copyIRFlags(NarrowBO); - return WideBO; -} - -WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) { - auto It = ExtendKindMap.find(I); - assert(It != ExtendKindMap.end() && "Instruction not yet extended!"); - return It->second; -} - -const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, - unsigned OpCode) const { - switch (OpCode) { - case Instruction::Add: - return SE->getAddExpr(LHS, RHS); - case Instruction::Sub: - return SE->getMinusSCEV(LHS, RHS); - case Instruction::Mul: - return SE->getMulExpr(LHS, RHS); - case Instruction::UDiv: - return SE->getUDivExpr(LHS, RHS); - default: - llvm_unreachable("Unsupported opcode."); - }; -} - -/// No-wrap operations can transfer sign extension of their result to their -/// operands. Generate the SCEV value for the widened operation without -/// actually modifying the IR yet. If the expression after extending the -/// operands is an AddRec for this loop, return the AddRec and the kind of -/// extension used. -WidenIV::WidenedRecTy -WidenIV::getExtendedOperandRecurrence(WidenIV::NarrowIVDefUse DU) { - // Handle the common case of add<nsw/nuw> - const unsigned OpCode = DU.NarrowUse->getOpcode(); - // Only Add/Sub/Mul instructions supported yet. - if (OpCode != Instruction::Add && OpCode != Instruction::Sub && - OpCode != Instruction::Mul) - return {nullptr, Unknown}; - - // One operand (NarrowDef) has already been extended to WideDef. Now determine - // if extending the other will lead to a recurrence. - const unsigned ExtendOperIdx = - DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0; - assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU"); - - const SCEV *ExtendOperExpr = nullptr; - const OverflowingBinaryOperator *OBO = - cast<OverflowingBinaryOperator>(DU.NarrowUse); - ExtendKind ExtKind = getExtendKind(DU.NarrowDef); - if (ExtKind == SignExtended && OBO->hasNoSignedWrap()) - ExtendOperExpr = SE->getSignExtendExpr( - SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); - else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap()) - ExtendOperExpr = SE->getZeroExtendExpr( - SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); - else - return {nullptr, Unknown}; - - // When creating this SCEV expr, don't apply the current operations NSW or NUW - // flags. This instruction may be guarded by control flow that the no-wrap - // behavior depends on. Non-control-equivalent instructions can be mapped to - // the same SCEV expression, and it would be incorrect to transfer NSW/NUW - // semantics to those operations. - const SCEV *lhs = SE->getSCEV(DU.WideDef); - const SCEV *rhs = ExtendOperExpr; - - // Let's swap operands to the initial order for the case of non-commutative - // operations, like SUB. See PR21014. - if (ExtendOperIdx == 0) - std::swap(lhs, rhs); - const SCEVAddRecExpr *AddRec = - dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode)); - - if (!AddRec || AddRec->getLoop() != L) - return {nullptr, Unknown}; - - return {AddRec, ExtKind}; -} - -/// Is this instruction potentially interesting for further simplification after -/// widening it's type? In other words, can the extend be safely hoisted out of -/// the loop with SCEV reducing the value to a recurrence on the same loop. If -/// so, return the extended recurrence and the kind of extension used. Otherwise -/// return {nullptr, Unknown}. -WidenIV::WidenedRecTy WidenIV::getWideRecurrence(WidenIV::NarrowIVDefUse DU) { - if (!SE->isSCEVable(DU.NarrowUse->getType())) - return {nullptr, Unknown}; - - const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse); - if (SE->getTypeSizeInBits(NarrowExpr->getType()) >= - SE->getTypeSizeInBits(WideType)) { - // NarrowUse implicitly widens its operand. e.g. a gep with a narrow - // index. So don't follow this use. - return {nullptr, Unknown}; - } - - const SCEV *WideExpr; - ExtendKind ExtKind; - if (DU.NeverNegative) { - WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); - if (isa<SCEVAddRecExpr>(WideExpr)) - ExtKind = SignExtended; - else { - WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); - ExtKind = ZeroExtended; - } - } else if (getExtendKind(DU.NarrowDef) == SignExtended) { - WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); - ExtKind = SignExtended; - } else { - WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); - ExtKind = ZeroExtended; - } - const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr); - if (!AddRec || AddRec->getLoop() != L) - return {nullptr, Unknown}; - return {AddRec, ExtKind}; -} - -/// This IV user cannot be widened. Replace this use of the original narrow IV -/// with a truncation of the new wide IV to isolate and eliminate the narrow IV. -static void truncateIVUse(WidenIV::NarrowIVDefUse DU, DominatorTree *DT, - LoopInfo *LI) { - auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI); - if (!InsertPt) - return; - LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user " - << *DU.NarrowUse << "\n"); - IRBuilder<> Builder(InsertPt); - Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType()); - DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc); -} - -/// If the narrow use is a compare instruction, then widen the compare -// (and possibly the other operand). The extend operation is hoisted into the -// loop preheader as far as possible. -bool WidenIV::widenLoopCompare(WidenIV::NarrowIVDefUse DU) { - ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse); - if (!Cmp) - return false; - - // We can legally widen the comparison in the following two cases: - // - // - The signedness of the IV extension and comparison match - // - // - The narrow IV is always positive (and thus its sign extension is equal - // to its zero extension). For instance, let's say we're zero extending - // %narrow for the following use - // - // icmp slt i32 %narrow, %val ... (A) - // - // and %narrow is always positive. Then - // - // (A) == icmp slt i32 sext(%narrow), sext(%val) - // == icmp slt i32 zext(%narrow), sext(%val) - bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended; - if (!(DU.NeverNegative || IsSigned == Cmp->isSigned())) - return false; - - Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0); - unsigned CastWidth = SE->getTypeSizeInBits(Op->getType()); - unsigned IVWidth = SE->getTypeSizeInBits(WideType); - assert(CastWidth <= IVWidth && "Unexpected width while widening compare."); - - // Widen the compare instruction. - auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI); - if (!InsertPt) - return false; - IRBuilder<> Builder(InsertPt); - DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); - - // Widen the other operand of the compare, if necessary. - if (CastWidth < IVWidth) { - Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp); - DU.NarrowUse->replaceUsesOfWith(Op, ExtOp); - } - return true; -} - -// The widenIVUse avoids generating trunc by evaluating the use as AddRec, this -// will not work when: -// 1) SCEV traces back to an instruction inside the loop that SCEV can not -// expand, eg. add %indvar, (load %addr) -// 2) SCEV finds a loop variant, eg. add %indvar, %loopvariant -// While SCEV fails to avoid trunc, we can still try to use instruction -// combining approach to prove trunc is not required. This can be further -// extended with other instruction combining checks, but for now we handle the -// following case (sub can be "add" and "mul", "nsw + sext" can be "nus + zext") -// -// Src: -// %c = sub nsw %b, %indvar -// %d = sext %c to i64 -// Dst: -// %indvar.ext1 = sext %indvar to i64 -// %m = sext %b to i64 -// %d = sub nsw i64 %m, %indvar.ext1 -// Therefore, as long as the result of add/sub/mul is extended to wide type, no -// trunc is required regardless of how %b is generated. This pattern is common -// when calculating address in 64 bit architecture -bool WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) { - Instruction *NarrowUse = DU.NarrowUse; - Instruction *NarrowDef = DU.NarrowDef; - Instruction *WideDef = DU.WideDef; - - // Handle the common case of add<nsw/nuw> - const unsigned OpCode = NarrowUse->getOpcode(); - // Only Add/Sub/Mul instructions are supported. - if (OpCode != Instruction::Add && OpCode != Instruction::Sub && - OpCode != Instruction::Mul) - return false; - - // The operand that is not defined by NarrowDef of DU. Let's call it the - // other operand. - assert((NarrowUse->getOperand(0) == NarrowDef || - NarrowUse->getOperand(1) == NarrowDef) && - "bad DU"); - - const OverflowingBinaryOperator *OBO = - cast<OverflowingBinaryOperator>(NarrowUse); - ExtendKind ExtKind = getExtendKind(NarrowDef); - bool CanSignExtend = ExtKind == SignExtended && OBO->hasNoSignedWrap(); - bool CanZeroExtend = ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap(); - auto AnotherOpExtKind = ExtKind; - - // Check that all uses are either: - // - narrow def (in case of we are widening the IV increment); - // - single-input LCSSA Phis; - // - comparison of the chosen type; - // - extend of the chosen type (raison d'etre). - SmallVector<Instruction *, 4> ExtUsers; - SmallVector<PHINode *, 4> LCSSAPhiUsers; - SmallVector<ICmpInst *, 4> ICmpUsers; - for (Use &U : NarrowUse->uses()) { - Instruction *User = cast<Instruction>(U.getUser()); - if (User == NarrowDef) - continue; - if (!L->contains(User)) { - auto *LCSSAPhi = cast<PHINode>(User); - // Make sure there is only 1 input, so that we don't have to split - // critical edges. - if (LCSSAPhi->getNumOperands() != 1) - return false; - LCSSAPhiUsers.push_back(LCSSAPhi); - continue; - } - if (auto *ICmp = dyn_cast<ICmpInst>(User)) { - auto Pred = ICmp->getPredicate(); - // We have 3 types of predicates: signed, unsigned and equality - // predicates. For equality, it's legal to widen icmp for either sign and - // zero extend. For sign extend, we can also do so for signed predicates, - // likeweise for zero extend we can widen icmp for unsigned predicates. - if (ExtKind == ZeroExtended && ICmpInst::isSigned(Pred)) - return false; - if (ExtKind == SignExtended && ICmpInst::isUnsigned(Pred)) - return false; - ICmpUsers.push_back(ICmp); - continue; - } - if (ExtKind == SignExtended) - User = dyn_cast<SExtInst>(User); - else - User = dyn_cast<ZExtInst>(User); - if (!User || User->getType() != WideType) - return false; - ExtUsers.push_back(User); - } - if (ExtUsers.empty()) { - DeadInsts.emplace_back(NarrowUse); - return true; - } - - // We'll prove some facts that should be true in the context of ext users. If - // there is no users, we are done now. If there are some, pick their common - // dominator as context. - Instruction *Context = nullptr; - for (auto *Ext : ExtUsers) { - if (!Context || DT->dominates(Ext, Context)) - Context = Ext; - else if (!DT->dominates(Context, Ext)) - // For users that don't have dominance relation, use common dominator. - Context = - DT->findNearestCommonDominator(Context->getParent(), Ext->getParent()) - ->getTerminator(); - } - assert(Context && "Context not found?"); - - if (!CanSignExtend && !CanZeroExtend) { - // Because InstCombine turns 'sub nuw' to 'add' losing the no-wrap flag, we - // will most likely not see it. Let's try to prove it. - if (OpCode != Instruction::Add) - return false; - if (ExtKind != ZeroExtended) - return false; - const SCEV *LHS = SE->getSCEV(OBO->getOperand(0)); - const SCEV *RHS = SE->getSCEV(OBO->getOperand(1)); - // TODO: Support case for NarrowDef = NarrowUse->getOperand(1). - if (NarrowUse->getOperand(0) != NarrowDef) - return false; - if (!SE->isKnownNegative(RHS)) - return false; - bool ProvedSubNUW = SE->isKnownPredicateAt( - ICmpInst::ICMP_UGE, LHS, SE->getNegativeSCEV(RHS), Context); - if (!ProvedSubNUW) - return false; - // In fact, our 'add' is 'sub nuw'. We will need to widen the 2nd operand as - // neg(zext(neg(op))), which is basically sext(op). - AnotherOpExtKind = SignExtended; - } - - // Verifying that Defining operand is an AddRec - const SCEV *Op1 = SE->getSCEV(WideDef); - const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1); - if (!AddRecOp1 || AddRecOp1->getLoop() != L) - return false; - - LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n"); - - // Generating a widening use instruction. - Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) - ? WideDef - : createExtendInst(NarrowUse->getOperand(0), WideType, - AnotherOpExtKind, NarrowUse); - Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) - ? WideDef - : createExtendInst(NarrowUse->getOperand(1), WideType, - AnotherOpExtKind, NarrowUse); - - auto *NarrowBO = cast<BinaryOperator>(NarrowUse); - auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, - NarrowBO->getName()); - IRBuilder<> Builder(NarrowUse); - Builder.Insert(WideBO); - WideBO->copyIRFlags(NarrowBO); - ExtendKindMap[NarrowUse] = ExtKind; - - for (Instruction *User : ExtUsers) { - assert(User->getType() == WideType && "Checked before!"); - LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by " - << *WideBO << "\n"); - ++NumElimExt; - User->replaceAllUsesWith(WideBO); - DeadInsts.emplace_back(User); - } - - for (PHINode *User : LCSSAPhiUsers) { - assert(User->getNumOperands() == 1 && "Checked before!"); - Builder.SetInsertPoint(User); - auto *WidePN = - Builder.CreatePHI(WideBO->getType(), 1, User->getName() + ".wide"); - BasicBlock *LoopExitingBlock = User->getParent()->getSinglePredecessor(); - assert(LoopExitingBlock && L->contains(LoopExitingBlock) && - "Not a LCSSA Phi?"); - WidePN->addIncoming(WideBO, LoopExitingBlock); - Builder.SetInsertPoint(&*User->getParent()->getFirstInsertionPt()); - auto *TruncPN = Builder.CreateTrunc(WidePN, User->getType()); - User->replaceAllUsesWith(TruncPN); - DeadInsts.emplace_back(User); - } - - for (ICmpInst *User : ICmpUsers) { - Builder.SetInsertPoint(User); - auto ExtendedOp = [&](Value * V)->Value * { - if (V == NarrowUse) - return WideBO; - if (ExtKind == ZeroExtended) - return Builder.CreateZExt(V, WideBO->getType()); - else - return Builder.CreateSExt(V, WideBO->getType()); - }; - auto Pred = User->getPredicate(); - auto *LHS = ExtendedOp(User->getOperand(0)); - auto *RHS = ExtendedOp(User->getOperand(1)); - auto *WideCmp = - Builder.CreateICmp(Pred, LHS, RHS, User->getName() + ".wide"); - User->replaceAllUsesWith(WideCmp); - DeadInsts.emplace_back(User); - } - - return true; -} - -/// Determine whether an individual user of the narrow IV can be widened. If so, -/// return the wide clone of the user. -Instruction *WidenIV::widenIVUse(WidenIV::NarrowIVDefUse DU, SCEVExpander &Rewriter) { - assert(ExtendKindMap.count(DU.NarrowDef) && - "Should already know the kind of extension used to widen NarrowDef"); - - // Stop traversing the def-use chain at inner-loop phis or post-loop phis. - if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) { - if (LI->getLoopFor(UsePhi->getParent()) != L) { - // For LCSSA phis, sink the truncate outside the loop. - // After SimplifyCFG most loop exit targets have a single predecessor. - // Otherwise fall back to a truncate within the loop. - if (UsePhi->getNumOperands() != 1) - truncateIVUse(DU, DT, LI); - else { - // Widening the PHI requires us to insert a trunc. The logical place - // for this trunc is in the same BB as the PHI. This is not possible if - // the BB is terminated by a catchswitch. - if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator())) - return nullptr; - - PHINode *WidePhi = - PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide", - UsePhi); - WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0)); - IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt()); - Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType()); - UsePhi->replaceAllUsesWith(Trunc); - DeadInsts.emplace_back(UsePhi); - LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to " - << *WidePhi << "\n"); - } - return nullptr; - } - } - - // This narrow use can be widened by a sext if it's non-negative or its narrow - // def was widended by a sext. Same for zext. - auto canWidenBySExt = [&]() { - return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended; - }; - auto canWidenByZExt = [&]() { - return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended; - }; - - // Our raison d'etre! Eliminate sign and zero extension. - if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) || - (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) { - Value *NewDef = DU.WideDef; - if (DU.NarrowUse->getType() != WideType) { - unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType()); - unsigned IVWidth = SE->getTypeSizeInBits(WideType); - if (CastWidth < IVWidth) { - // The cast isn't as wide as the IV, so insert a Trunc. - IRBuilder<> Builder(DU.NarrowUse); - NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType()); - } - else { - // A wider extend was hidden behind a narrower one. This may induce - // another round of IV widening in which the intermediate IV becomes - // dead. It should be very rare. - LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi - << " not wide enough to subsume " << *DU.NarrowUse - << "\n"); - DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); - NewDef = DU.NarrowUse; - } - } - if (NewDef != DU.NarrowUse) { - LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse - << " replaced by " << *DU.WideDef << "\n"); - ++NumElimExt; - DU.NarrowUse->replaceAllUsesWith(NewDef); - DeadInsts.emplace_back(DU.NarrowUse); - } - // Now that the extend is gone, we want to expose it's uses for potential - // further simplification. We don't need to directly inform SimplifyIVUsers - // of the new users, because their parent IV will be processed later as a - // new loop phi. If we preserved IVUsers analysis, we would also want to - // push the uses of WideDef here. - - // No further widening is needed. The deceased [sz]ext had done it for us. - return nullptr; - } - - // Does this user itself evaluate to a recurrence after widening? - WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU); - if (!WideAddRec.first) - WideAddRec = getWideRecurrence(DU); - - assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown)); - if (!WideAddRec.first) { - // If use is a loop condition, try to promote the condition instead of - // truncating the IV first. - if (widenLoopCompare(DU)) - return nullptr; - - // We are here about to generate a truncate instruction that may hurt - // performance because the scalar evolution expression computed earlier - // in WideAddRec.first does not indicate a polynomial induction expression. - // In that case, look at the operands of the use instruction to determine - // if we can still widen the use instead of truncating its operand. - if (widenWithVariantUse(DU)) - return nullptr; - - // This user does not evaluate to a recurrence after widening, so don't - // follow it. Instead insert a Trunc to kill off the original use, - // eventually isolating the original narrow IV so it can be removed. - truncateIVUse(DU, DT, LI); - return nullptr; - } - // Assume block terminators cannot evaluate to a recurrence. We can't to - // insert a Trunc after a terminator if there happens to be a critical edge. - assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() && - "SCEV is not expected to evaluate a block terminator"); - - // Reuse the IV increment that SCEVExpander created as long as it dominates - // NarrowUse. - Instruction *WideUse = nullptr; - if (WideAddRec.first == WideIncExpr && - Rewriter.hoistIVInc(WideInc, DU.NarrowUse)) - WideUse = WideInc; - else { - WideUse = cloneIVUser(DU, WideAddRec.first); - if (!WideUse) - return nullptr; - } - // Evaluation of WideAddRec ensured that the narrow expression could be - // extended outside the loop without overflow. This suggests that the wide use - // evaluates to the same expression as the extended narrow use, but doesn't - // absolutely guarantee it. Hence the following failsafe check. In rare cases - // where it fails, we simply throw away the newly created wide use. - if (WideAddRec.first != SE->getSCEV(WideUse)) { - LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": " - << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first - << "\n"); - DeadInsts.emplace_back(WideUse); - return nullptr; - } - - // if we reached this point then we are going to replace - // DU.NarrowUse with WideUse. Reattach DbgValue then. - replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT); - - ExtendKindMap[DU.NarrowUse] = WideAddRec.second; - // Returning WideUse pushes it on the worklist. - return WideUse; -} - -/// Add eligible users of NarrowDef to NarrowIVUsers. -void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) { - const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef); - bool NonNegativeDef = - SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV, - SE->getZero(NarrowSCEV->getType())); - for (User *U : NarrowDef->users()) { - Instruction *NarrowUser = cast<Instruction>(U); - - // Handle data flow merges and bizarre phi cycles. - if (!Widened.insert(NarrowUser).second) - continue; - - bool NonNegativeUse = false; - if (!NonNegativeDef) { - // We might have a control-dependent range information for this context. - if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser)) - NonNegativeUse = RangeInfo->getSignedMin().isNonNegative(); - } - - NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef, - NonNegativeDef || NonNegativeUse); - } -} - -/// Process a single induction variable. First use the SCEVExpander to create a -/// wide induction variable that evaluates to the same recurrence as the -/// original narrow IV. Then use a worklist to forward traverse the narrow IV's -/// def-use chain. After widenIVUse has processed all interesting IV users, the -/// narrow IV will be isolated for removal by DeleteDeadPHIs. -/// -/// It would be simpler to delete uses as they are processed, but we must avoid -/// invalidating SCEV expressions. -PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) { - // Is this phi an induction variable? - const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi)); - if (!AddRec) - return nullptr; - - // Widen the induction variable expression. - const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended - ? SE->getSignExtendExpr(AddRec, WideType) - : SE->getZeroExtendExpr(AddRec, WideType); - - assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType && - "Expect the new IV expression to preserve its type"); - - // Can the IV be extended outside the loop without overflow? - AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr); - if (!AddRec || AddRec->getLoop() != L) - return nullptr; - - // An AddRec must have loop-invariant operands. Since this AddRec is - // materialized by a loop header phi, the expression cannot have any post-loop - // operands, so they must dominate the loop header. - assert( - SE->properlyDominates(AddRec->getStart(), L->getHeader()) && - SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) && - "Loop header phi recurrence inputs do not dominate the loop"); - - // Iterate over IV uses (including transitive ones) looking for IV increments - // of the form 'add nsw %iv, <const>'. For each increment and each use of - // the increment calculate control-dependent range information basing on - // dominating conditions inside of the loop (e.g. a range check inside of the - // loop). Calculated ranges are stored in PostIncRangeInfos map. - // - // Control-dependent range information is later used to prove that a narrow - // definition is not negative (see pushNarrowIVUsers). It's difficult to do - // this on demand because when pushNarrowIVUsers needs this information some - // of the dominating conditions might be already widened. - if (UsePostIncrementRanges) - calculatePostIncRanges(OrigPhi); - - // The rewriter provides a value for the desired IV expression. This may - // either find an existing phi or materialize a new one. Either way, we - // expect a well-formed cyclic phi-with-increments. i.e. any operand not part - // of the phi-SCC dominates the loop entry. - Instruction *InsertPt = &*L->getHeader()->getFirstInsertionPt(); - Value *ExpandInst = Rewriter.expandCodeFor(AddRec, WideType, InsertPt); - // If the wide phi is not a phi node, for example a cast node, like bitcast, - // inttoptr, ptrtoint, just skip for now. - if (!(WidePhi = dyn_cast<PHINode>(ExpandInst))) { - // if the cast node is an inserted instruction without any user, we should - // remove it to make sure the pass don't touch the function as we can not - // wide the phi. - if (ExpandInst->hasNUses(0) && - Rewriter.isInsertedInstruction(cast<Instruction>(ExpandInst))) - DeadInsts.emplace_back(ExpandInst); - return nullptr; - } - - // Remembering the WideIV increment generated by SCEVExpander allows - // widenIVUse to reuse it when widening the narrow IV's increment. We don't - // employ a general reuse mechanism because the call above is the only call to - // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses. - if (BasicBlock *LatchBlock = L->getLoopLatch()) { - WideInc = - cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock)); - WideIncExpr = SE->getSCEV(WideInc); - // Propagate the debug location associated with the original loop increment - // to the new (widened) increment. - auto *OrigInc = - cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock)); - WideInc->setDebugLoc(OrigInc->getDebugLoc()); - } - - LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n"); - ++NumWidened; - - // Traverse the def-use chain using a worklist starting at the original IV. - assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" ); - - Widened.insert(OrigPhi); - pushNarrowIVUsers(OrigPhi, WidePhi); - - while (!NarrowIVUsers.empty()) { - WidenIV::NarrowIVDefUse DU = NarrowIVUsers.pop_back_val(); - - // Process a def-use edge. This may replace the use, so don't hold a - // use_iterator across it. - Instruction *WideUse = widenIVUse(DU, Rewriter); - - // Follow all def-use edges from the previous narrow use. - if (WideUse) - pushNarrowIVUsers(DU.NarrowUse, WideUse); - - // widenIVUse may have removed the def-use edge. - if (DU.NarrowDef->use_empty()) - DeadInsts.emplace_back(DU.NarrowDef); - } - - // Attach any debug information to the new PHI. - replaceAllDbgUsesWith(*OrigPhi, *WidePhi, *WidePhi, *DT); - - return WidePhi; -} - -/// Calculates control-dependent range for the given def at the given context -/// by looking at dominating conditions inside of the loop -void WidenIV::calculatePostIncRange(Instruction *NarrowDef, - Instruction *NarrowUser) { - using namespace llvm::PatternMatch; - - Value *NarrowDefLHS; - const APInt *NarrowDefRHS; - if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS), - m_APInt(NarrowDefRHS))) || - !NarrowDefRHS->isNonNegative()) - return; - - auto UpdateRangeFromCondition = [&] (Value *Condition, - bool TrueDest) { - CmpInst::Predicate Pred; - Value *CmpRHS; - if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS), - m_Value(CmpRHS)))) - return; - - CmpInst::Predicate P = - TrueDest ? Pred : CmpInst::getInversePredicate(Pred); - - auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS)); - auto CmpConstrainedLHSRange = - ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange); - auto NarrowDefRange = CmpConstrainedLHSRange.addWithNoWrap( - *NarrowDefRHS, OverflowingBinaryOperator::NoSignedWrap); - - updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange); - }; - - auto UpdateRangeFromGuards = [&](Instruction *Ctx) { - if (!HasGuards) - return; - - for (Instruction &I : make_range(Ctx->getIterator().getReverse(), - Ctx->getParent()->rend())) { - Value *C = nullptr; - if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C)))) - UpdateRangeFromCondition(C, /*TrueDest=*/true); - } - }; - - UpdateRangeFromGuards(NarrowUser); - - BasicBlock *NarrowUserBB = NarrowUser->getParent(); - // If NarrowUserBB is statically unreachable asking dominator queries may - // yield surprising results. (e.g. the block may not have a dom tree node) - if (!DT->isReachableFromEntry(NarrowUserBB)) - return; - - for (auto *DTB = (*DT)[NarrowUserBB]->getIDom(); - L->contains(DTB->getBlock()); - DTB = DTB->getIDom()) { - auto *BB = DTB->getBlock(); - auto *TI = BB->getTerminator(); - UpdateRangeFromGuards(TI); - - auto *BI = dyn_cast<BranchInst>(TI); - if (!BI || !BI->isConditional()) - continue; - - auto *TrueSuccessor = BI->getSuccessor(0); - auto *FalseSuccessor = BI->getSuccessor(1); - - auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) { - return BBE.isSingleEdge() && - DT->dominates(BBE, NarrowUser->getParent()); - }; - - if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor))) - UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true); - - if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor))) - UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false); - } -} - -/// Calculates PostIncRangeInfos map for the given IV -void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) { - SmallPtrSet<Instruction *, 16> Visited; - SmallVector<Instruction *, 6> Worklist; - Worklist.push_back(OrigPhi); - Visited.insert(OrigPhi); - - while (!Worklist.empty()) { - Instruction *NarrowDef = Worklist.pop_back_val(); - - for (Use &U : NarrowDef->uses()) { - auto *NarrowUser = cast<Instruction>(U.getUser()); - - // Don't go looking outside the current loop. - auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()]; - if (!NarrowUserLoop || !L->contains(NarrowUserLoop)) - continue; - - if (!Visited.insert(NarrowUser).second) - continue; - - Worklist.push_back(NarrowUser); - - calculatePostIncRange(NarrowDef, NarrowUser); - } - } -} - -PHINode *llvm::createWideIV(const WideIVInfo &WI, - LoopInfo *LI, ScalarEvolution *SE, SCEVExpander &Rewriter, - DominatorTree *DT, SmallVectorImpl<WeakTrackingVH> &DeadInsts, - unsigned &NumElimExt, unsigned &NumWidened, - bool HasGuards, bool UsePostIncrementRanges) { - WidenIV Widener(WI, LI, SE, DT, DeadInsts, HasGuards, UsePostIncrementRanges); - PHINode *WidePHI = Widener.createWideIV(Rewriter); - NumElimExt = Widener.getNumElimExt(); - NumWidened = Widener.getNumWidened(); - return WidePHI; -} + +//===----------------------------------------------------------------------===// +// Widen Induction Variables - Extend the width of an IV to cover its +// widest uses. +//===----------------------------------------------------------------------===// + +class WidenIV { + // Parameters + PHINode *OrigPhi; + Type *WideType; + + // Context + LoopInfo *LI; + Loop *L; + ScalarEvolution *SE; + DominatorTree *DT; + + // Does the module have any calls to the llvm.experimental.guard intrinsic + // at all? If not we can avoid scanning instructions looking for guards. + bool HasGuards; + + bool UsePostIncrementRanges; + + // Statistics + unsigned NumElimExt = 0; + unsigned NumWidened = 0; + + // Result + PHINode *WidePhi = nullptr; + Instruction *WideInc = nullptr; + const SCEV *WideIncExpr = nullptr; + SmallVectorImpl<WeakTrackingVH> &DeadInsts; + + SmallPtrSet<Instruction *,16> Widened; + + enum ExtendKind { ZeroExtended, SignExtended, Unknown }; + + // A map tracking the kind of extension used to widen each narrow IV + // and narrow IV user. + // Key: pointer to a narrow IV or IV user. + // Value: the kind of extension used to widen this Instruction. + DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap; + + using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>; + + // A map with control-dependent ranges for post increment IV uses. The key is + // a pair of IV def and a use of this def denoting the context. The value is + // a ConstantRange representing possible values of the def at the given + // context. + DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos; + + Optional<ConstantRange> getPostIncRangeInfo(Value *Def, + Instruction *UseI) { + DefUserPair Key(Def, UseI); + auto It = PostIncRangeInfos.find(Key); + return It == PostIncRangeInfos.end() + ? Optional<ConstantRange>(None) + : Optional<ConstantRange>(It->second); + } + + void calculatePostIncRanges(PHINode *OrigPhi); + void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser); + + void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) { + DefUserPair Key(Def, UseI); + auto It = PostIncRangeInfos.find(Key); + if (It == PostIncRangeInfos.end()) + PostIncRangeInfos.insert({Key, R}); + else + It->second = R.intersectWith(It->second); + } + +public: + /// Record a link in the Narrow IV def-use chain along with the WideIV that + /// computes the same value as the Narrow IV def. This avoids caching Use* + /// pointers. + struct NarrowIVDefUse { + Instruction *NarrowDef = nullptr; + Instruction *NarrowUse = nullptr; + Instruction *WideDef = nullptr; + + // True if the narrow def is never negative. Tracking this information lets + // us use a sign extension instead of a zero extension or vice versa, when + // profitable and legal. + bool NeverNegative = false; + + NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD, + bool NeverNegative) + : NarrowDef(ND), NarrowUse(NU), WideDef(WD), + NeverNegative(NeverNegative) {} + }; + + WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv, + DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI, + bool HasGuards, bool UsePostIncrementRanges = true); + + PHINode *createWideIV(SCEVExpander &Rewriter); + + unsigned getNumElimExt() { return NumElimExt; }; + unsigned getNumWidened() { return NumWidened; }; + +protected: + Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned, + Instruction *Use); + + Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR); + Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU, + const SCEVAddRecExpr *WideAR); + Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU); + + ExtendKind getExtendKind(Instruction *I); + + using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>; + + WidenedRecTy getWideRecurrence(NarrowIVDefUse DU); + + WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU); + + const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, + unsigned OpCode) const; + + Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter); + + bool widenLoopCompare(NarrowIVDefUse DU); + bool widenWithVariantUse(NarrowIVDefUse DU); + + void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef); + +private: + SmallVector<NarrowIVDefUse, 8> NarrowIVUsers; +}; + + +/// Determine the insertion point for this user. By default, insert immediately +/// before the user. SCEVExpander or LICM will hoist loop invariants out of the +/// loop. For PHI nodes, there may be multiple uses, so compute the nearest +/// common dominator for the incoming blocks. A nullptr can be returned if no +/// viable location is found: it may happen if User is a PHI and Def only comes +/// to this PHI from unreachable blocks. +static Instruction *getInsertPointForUses(Instruction *User, Value *Def, + DominatorTree *DT, LoopInfo *LI) { + PHINode *PHI = dyn_cast<PHINode>(User); + if (!PHI) + return User; + + Instruction *InsertPt = nullptr; + for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) { + if (PHI->getIncomingValue(i) != Def) + continue; + + BasicBlock *InsertBB = PHI->getIncomingBlock(i); + + if (!DT->isReachableFromEntry(InsertBB)) + continue; + + if (!InsertPt) { + InsertPt = InsertBB->getTerminator(); + continue; + } + InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB); + InsertPt = InsertBB->getTerminator(); + } + + // If we have skipped all inputs, it means that Def only comes to Phi from + // unreachable blocks. + if (!InsertPt) + return nullptr; + + auto *DefI = dyn_cast<Instruction>(Def); + if (!DefI) + return InsertPt; + + assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses"); + + auto *L = LI->getLoopFor(DefI->getParent()); + assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent()))); + + for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom()) + if (LI->getLoopFor(DTN->getBlock()) == L) + return DTN->getBlock()->getTerminator(); + + llvm_unreachable("DefI dominates InsertPt!"); +} + +WidenIV::WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv, + DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI, + bool HasGuards, bool UsePostIncrementRanges) + : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo), + L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree), + HasGuards(HasGuards), UsePostIncrementRanges(UsePostIncrementRanges), + DeadInsts(DI) { + assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV"); + ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended; +} + +Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType, + bool IsSigned, Instruction *Use) { + // Set the debug location and conservative insertion point. + IRBuilder<> Builder(Use); + // Hoist the insertion point into loop preheaders as far as possible. + for (const Loop *L = LI->getLoopFor(Use->getParent()); + L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper); + L = L->getParentLoop()) + Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator()); + + return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) : + Builder.CreateZExt(NarrowOper, WideType); +} + +/// Instantiate a wide operation to replace a narrow operation. This only needs +/// to handle operations that can evaluation to SCEVAddRec. It can safely return +/// 0 for any operation we decide not to clone. +Instruction *WidenIV::cloneIVUser(WidenIV::NarrowIVDefUse DU, + const SCEVAddRecExpr *WideAR) { + unsigned Opcode = DU.NarrowUse->getOpcode(); + switch (Opcode) { + default: + return nullptr; + case Instruction::Add: + case Instruction::Mul: + case Instruction::UDiv: + case Instruction::Sub: + return cloneArithmeticIVUser(DU, WideAR); + + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + return cloneBitwiseIVUser(DU); + } +} + +Instruction *WidenIV::cloneBitwiseIVUser(WidenIV::NarrowIVDefUse DU) { + Instruction *NarrowUse = DU.NarrowUse; + Instruction *NarrowDef = DU.NarrowDef; + Instruction *WideDef = DU.WideDef; + + LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n"); + + // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything + // about the narrow operand yet so must insert a [sz]ext. It is probably loop + // invariant and will be folded or hoisted. If it actually comes from a + // widened IV, it should be removed during a future call to widenIVUse. + bool IsSigned = getExtendKind(NarrowDef) == SignExtended; + Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) + ? WideDef + : createExtendInst(NarrowUse->getOperand(0), WideType, + IsSigned, NarrowUse); + Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) + ? WideDef + : createExtendInst(NarrowUse->getOperand(1), WideType, + IsSigned, NarrowUse); + + auto *NarrowBO = cast<BinaryOperator>(NarrowUse); + auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, + NarrowBO->getName()); + IRBuilder<> Builder(NarrowUse); + Builder.Insert(WideBO); + WideBO->copyIRFlags(NarrowBO); + return WideBO; +} + +Instruction *WidenIV::cloneArithmeticIVUser(WidenIV::NarrowIVDefUse DU, + const SCEVAddRecExpr *WideAR) { + Instruction *NarrowUse = DU.NarrowUse; + Instruction *NarrowDef = DU.NarrowDef; + Instruction *WideDef = DU.WideDef; + + LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n"); + + unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1; + + // We're trying to find X such that + // + // Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X + // + // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef), + // and check using SCEV if any of them are correct. + + // Returns true if extending NonIVNarrowDef according to `SignExt` is a + // correct solution to X. + auto GuessNonIVOperand = [&](bool SignExt) { + const SCEV *WideLHS; + const SCEV *WideRHS; + + auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) { + if (SignExt) + return SE->getSignExtendExpr(S, Ty); + return SE->getZeroExtendExpr(S, Ty); + }; + + if (IVOpIdx == 0) { + WideLHS = SE->getSCEV(WideDef); + const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1)); + WideRHS = GetExtend(NarrowRHS, WideType); + } else { + const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0)); + WideLHS = GetExtend(NarrowLHS, WideType); + WideRHS = SE->getSCEV(WideDef); + } + + // WideUse is "WideDef `op.wide` X" as described in the comment. + const SCEV *WideUse = + getSCEVByOpCode(WideLHS, WideRHS, NarrowUse->getOpcode()); + + return WideUse == WideAR; + }; + + bool SignExtend = getExtendKind(NarrowDef) == SignExtended; + if (!GuessNonIVOperand(SignExtend)) { + SignExtend = !SignExtend; + if (!GuessNonIVOperand(SignExtend)) + return nullptr; + } + + Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) + ? WideDef + : createExtendInst(NarrowUse->getOperand(0), WideType, + SignExtend, NarrowUse); + Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) + ? WideDef + : createExtendInst(NarrowUse->getOperand(1), WideType, + SignExtend, NarrowUse); + + auto *NarrowBO = cast<BinaryOperator>(NarrowUse); + auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, + NarrowBO->getName()); + + IRBuilder<> Builder(NarrowUse); + Builder.Insert(WideBO); + WideBO->copyIRFlags(NarrowBO); + return WideBO; +} + +WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) { + auto It = ExtendKindMap.find(I); + assert(It != ExtendKindMap.end() && "Instruction not yet extended!"); + return It->second; +} + +const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, + unsigned OpCode) const { + switch (OpCode) { + case Instruction::Add: + return SE->getAddExpr(LHS, RHS); + case Instruction::Sub: + return SE->getMinusSCEV(LHS, RHS); + case Instruction::Mul: + return SE->getMulExpr(LHS, RHS); + case Instruction::UDiv: + return SE->getUDivExpr(LHS, RHS); + default: + llvm_unreachable("Unsupported opcode."); + }; +} + +/// No-wrap operations can transfer sign extension of their result to their +/// operands. Generate the SCEV value for the widened operation without +/// actually modifying the IR yet. If the expression after extending the +/// operands is an AddRec for this loop, return the AddRec and the kind of +/// extension used. +WidenIV::WidenedRecTy +WidenIV::getExtendedOperandRecurrence(WidenIV::NarrowIVDefUse DU) { + // Handle the common case of add<nsw/nuw> + const unsigned OpCode = DU.NarrowUse->getOpcode(); + // Only Add/Sub/Mul instructions supported yet. + if (OpCode != Instruction::Add && OpCode != Instruction::Sub && + OpCode != Instruction::Mul) + return {nullptr, Unknown}; + + // One operand (NarrowDef) has already been extended to WideDef. Now determine + // if extending the other will lead to a recurrence. + const unsigned ExtendOperIdx = + DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0; + assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU"); + + const SCEV *ExtendOperExpr = nullptr; + const OverflowingBinaryOperator *OBO = + cast<OverflowingBinaryOperator>(DU.NarrowUse); + ExtendKind ExtKind = getExtendKind(DU.NarrowDef); + if (ExtKind == SignExtended && OBO->hasNoSignedWrap()) + ExtendOperExpr = SE->getSignExtendExpr( + SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); + else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap()) + ExtendOperExpr = SE->getZeroExtendExpr( + SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); + else + return {nullptr, Unknown}; + + // When creating this SCEV expr, don't apply the current operations NSW or NUW + // flags. This instruction may be guarded by control flow that the no-wrap + // behavior depends on. Non-control-equivalent instructions can be mapped to + // the same SCEV expression, and it would be incorrect to transfer NSW/NUW + // semantics to those operations. + const SCEV *lhs = SE->getSCEV(DU.WideDef); + const SCEV *rhs = ExtendOperExpr; + + // Let's swap operands to the initial order for the case of non-commutative + // operations, like SUB. See PR21014. + if (ExtendOperIdx == 0) + std::swap(lhs, rhs); + const SCEVAddRecExpr *AddRec = + dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode)); + + if (!AddRec || AddRec->getLoop() != L) + return {nullptr, Unknown}; + + return {AddRec, ExtKind}; +} + +/// Is this instruction potentially interesting for further simplification after +/// widening it's type? In other words, can the extend be safely hoisted out of +/// the loop with SCEV reducing the value to a recurrence on the same loop. If +/// so, return the extended recurrence and the kind of extension used. Otherwise +/// return {nullptr, Unknown}. +WidenIV::WidenedRecTy WidenIV::getWideRecurrence(WidenIV::NarrowIVDefUse DU) { + if (!SE->isSCEVable(DU.NarrowUse->getType())) + return {nullptr, Unknown}; + + const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse); + if (SE->getTypeSizeInBits(NarrowExpr->getType()) >= + SE->getTypeSizeInBits(WideType)) { + // NarrowUse implicitly widens its operand. e.g. a gep with a narrow + // index. So don't follow this use. + return {nullptr, Unknown}; + } + + const SCEV *WideExpr; + ExtendKind ExtKind; + if (DU.NeverNegative) { + WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); + if (isa<SCEVAddRecExpr>(WideExpr)) + ExtKind = SignExtended; + else { + WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); + ExtKind = ZeroExtended; + } + } else if (getExtendKind(DU.NarrowDef) == SignExtended) { + WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); + ExtKind = SignExtended; + } else { + WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); + ExtKind = ZeroExtended; + } + const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr); + if (!AddRec || AddRec->getLoop() != L) + return {nullptr, Unknown}; + return {AddRec, ExtKind}; +} + +/// This IV user cannot be widened. Replace this use of the original narrow IV +/// with a truncation of the new wide IV to isolate and eliminate the narrow IV. +static void truncateIVUse(WidenIV::NarrowIVDefUse DU, DominatorTree *DT, + LoopInfo *LI) { + auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI); + if (!InsertPt) + return; + LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user " + << *DU.NarrowUse << "\n"); + IRBuilder<> Builder(InsertPt); + Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType()); + DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc); +} + +/// If the narrow use is a compare instruction, then widen the compare +// (and possibly the other operand). The extend operation is hoisted into the +// loop preheader as far as possible. +bool WidenIV::widenLoopCompare(WidenIV::NarrowIVDefUse DU) { + ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse); + if (!Cmp) + return false; + + // We can legally widen the comparison in the following two cases: + // + // - The signedness of the IV extension and comparison match + // + // - The narrow IV is always positive (and thus its sign extension is equal + // to its zero extension). For instance, let's say we're zero extending + // %narrow for the following use + // + // icmp slt i32 %narrow, %val ... (A) + // + // and %narrow is always positive. Then + // + // (A) == icmp slt i32 sext(%narrow), sext(%val) + // == icmp slt i32 zext(%narrow), sext(%val) + bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended; + if (!(DU.NeverNegative || IsSigned == Cmp->isSigned())) + return false; + + Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0); + unsigned CastWidth = SE->getTypeSizeInBits(Op->getType()); + unsigned IVWidth = SE->getTypeSizeInBits(WideType); + assert(CastWidth <= IVWidth && "Unexpected width while widening compare."); + + // Widen the compare instruction. + auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI); + if (!InsertPt) + return false; + IRBuilder<> Builder(InsertPt); + DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); + + // Widen the other operand of the compare, if necessary. + if (CastWidth < IVWidth) { + Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp); + DU.NarrowUse->replaceUsesOfWith(Op, ExtOp); + } + return true; +} + +// The widenIVUse avoids generating trunc by evaluating the use as AddRec, this +// will not work when: +// 1) SCEV traces back to an instruction inside the loop that SCEV can not +// expand, eg. add %indvar, (load %addr) +// 2) SCEV finds a loop variant, eg. add %indvar, %loopvariant +// While SCEV fails to avoid trunc, we can still try to use instruction +// combining approach to prove trunc is not required. This can be further +// extended with other instruction combining checks, but for now we handle the +// following case (sub can be "add" and "mul", "nsw + sext" can be "nus + zext") +// +// Src: +// %c = sub nsw %b, %indvar +// %d = sext %c to i64 +// Dst: +// %indvar.ext1 = sext %indvar to i64 +// %m = sext %b to i64 +// %d = sub nsw i64 %m, %indvar.ext1 +// Therefore, as long as the result of add/sub/mul is extended to wide type, no +// trunc is required regardless of how %b is generated. This pattern is common +// when calculating address in 64 bit architecture +bool WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) { + Instruction *NarrowUse = DU.NarrowUse; + Instruction *NarrowDef = DU.NarrowDef; + Instruction *WideDef = DU.WideDef; + + // Handle the common case of add<nsw/nuw> + const unsigned OpCode = NarrowUse->getOpcode(); + // Only Add/Sub/Mul instructions are supported. + if (OpCode != Instruction::Add && OpCode != Instruction::Sub && + OpCode != Instruction::Mul) + return false; + + // The operand that is not defined by NarrowDef of DU. Let's call it the + // other operand. + assert((NarrowUse->getOperand(0) == NarrowDef || + NarrowUse->getOperand(1) == NarrowDef) && + "bad DU"); + + const OverflowingBinaryOperator *OBO = + cast<OverflowingBinaryOperator>(NarrowUse); + ExtendKind ExtKind = getExtendKind(NarrowDef); + bool CanSignExtend = ExtKind == SignExtended && OBO->hasNoSignedWrap(); + bool CanZeroExtend = ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap(); + auto AnotherOpExtKind = ExtKind; + + // Check that all uses are either: + // - narrow def (in case of we are widening the IV increment); + // - single-input LCSSA Phis; + // - comparison of the chosen type; + // - extend of the chosen type (raison d'etre). + SmallVector<Instruction *, 4> ExtUsers; + SmallVector<PHINode *, 4> LCSSAPhiUsers; + SmallVector<ICmpInst *, 4> ICmpUsers; + for (Use &U : NarrowUse->uses()) { + Instruction *User = cast<Instruction>(U.getUser()); + if (User == NarrowDef) + continue; + if (!L->contains(User)) { + auto *LCSSAPhi = cast<PHINode>(User); + // Make sure there is only 1 input, so that we don't have to split + // critical edges. + if (LCSSAPhi->getNumOperands() != 1) + return false; + LCSSAPhiUsers.push_back(LCSSAPhi); + continue; + } + if (auto *ICmp = dyn_cast<ICmpInst>(User)) { + auto Pred = ICmp->getPredicate(); + // We have 3 types of predicates: signed, unsigned and equality + // predicates. For equality, it's legal to widen icmp for either sign and + // zero extend. For sign extend, we can also do so for signed predicates, + // likeweise for zero extend we can widen icmp for unsigned predicates. + if (ExtKind == ZeroExtended && ICmpInst::isSigned(Pred)) + return false; + if (ExtKind == SignExtended && ICmpInst::isUnsigned(Pred)) + return false; + ICmpUsers.push_back(ICmp); + continue; + } + if (ExtKind == SignExtended) + User = dyn_cast<SExtInst>(User); + else + User = dyn_cast<ZExtInst>(User); + if (!User || User->getType() != WideType) + return false; + ExtUsers.push_back(User); + } + if (ExtUsers.empty()) { + DeadInsts.emplace_back(NarrowUse); + return true; + } + + // We'll prove some facts that should be true in the context of ext users. If + // there is no users, we are done now. If there are some, pick their common + // dominator as context. + Instruction *Context = nullptr; + for (auto *Ext : ExtUsers) { + if (!Context || DT->dominates(Ext, Context)) + Context = Ext; + else if (!DT->dominates(Context, Ext)) + // For users that don't have dominance relation, use common dominator. + Context = + DT->findNearestCommonDominator(Context->getParent(), Ext->getParent()) + ->getTerminator(); + } + assert(Context && "Context not found?"); + + if (!CanSignExtend && !CanZeroExtend) { + // Because InstCombine turns 'sub nuw' to 'add' losing the no-wrap flag, we + // will most likely not see it. Let's try to prove it. + if (OpCode != Instruction::Add) + return false; + if (ExtKind != ZeroExtended) + return false; + const SCEV *LHS = SE->getSCEV(OBO->getOperand(0)); + const SCEV *RHS = SE->getSCEV(OBO->getOperand(1)); + // TODO: Support case for NarrowDef = NarrowUse->getOperand(1). + if (NarrowUse->getOperand(0) != NarrowDef) + return false; + if (!SE->isKnownNegative(RHS)) + return false; + bool ProvedSubNUW = SE->isKnownPredicateAt( + ICmpInst::ICMP_UGE, LHS, SE->getNegativeSCEV(RHS), Context); + if (!ProvedSubNUW) + return false; + // In fact, our 'add' is 'sub nuw'. We will need to widen the 2nd operand as + // neg(zext(neg(op))), which is basically sext(op). + AnotherOpExtKind = SignExtended; + } + + // Verifying that Defining operand is an AddRec + const SCEV *Op1 = SE->getSCEV(WideDef); + const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1); + if (!AddRecOp1 || AddRecOp1->getLoop() != L) + return false; + + LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n"); + + // Generating a widening use instruction. + Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) + ? WideDef + : createExtendInst(NarrowUse->getOperand(0), WideType, + AnotherOpExtKind, NarrowUse); + Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) + ? WideDef + : createExtendInst(NarrowUse->getOperand(1), WideType, + AnotherOpExtKind, NarrowUse); + + auto *NarrowBO = cast<BinaryOperator>(NarrowUse); + auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, + NarrowBO->getName()); + IRBuilder<> Builder(NarrowUse); + Builder.Insert(WideBO); + WideBO->copyIRFlags(NarrowBO); + ExtendKindMap[NarrowUse] = ExtKind; + + for (Instruction *User : ExtUsers) { + assert(User->getType() == WideType && "Checked before!"); + LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by " + << *WideBO << "\n"); + ++NumElimExt; + User->replaceAllUsesWith(WideBO); + DeadInsts.emplace_back(User); + } + + for (PHINode *User : LCSSAPhiUsers) { + assert(User->getNumOperands() == 1 && "Checked before!"); + Builder.SetInsertPoint(User); + auto *WidePN = + Builder.CreatePHI(WideBO->getType(), 1, User->getName() + ".wide"); + BasicBlock *LoopExitingBlock = User->getParent()->getSinglePredecessor(); + assert(LoopExitingBlock && L->contains(LoopExitingBlock) && + "Not a LCSSA Phi?"); + WidePN->addIncoming(WideBO, LoopExitingBlock); + Builder.SetInsertPoint(&*User->getParent()->getFirstInsertionPt()); + auto *TruncPN = Builder.CreateTrunc(WidePN, User->getType()); + User->replaceAllUsesWith(TruncPN); + DeadInsts.emplace_back(User); + } + + for (ICmpInst *User : ICmpUsers) { + Builder.SetInsertPoint(User); + auto ExtendedOp = [&](Value * V)->Value * { + if (V == NarrowUse) + return WideBO; + if (ExtKind == ZeroExtended) + return Builder.CreateZExt(V, WideBO->getType()); + else + return Builder.CreateSExt(V, WideBO->getType()); + }; + auto Pred = User->getPredicate(); + auto *LHS = ExtendedOp(User->getOperand(0)); + auto *RHS = ExtendedOp(User->getOperand(1)); + auto *WideCmp = + Builder.CreateICmp(Pred, LHS, RHS, User->getName() + ".wide"); + User->replaceAllUsesWith(WideCmp); + DeadInsts.emplace_back(User); + } + + return true; +} + +/// Determine whether an individual user of the narrow IV can be widened. If so, +/// return the wide clone of the user. +Instruction *WidenIV::widenIVUse(WidenIV::NarrowIVDefUse DU, SCEVExpander &Rewriter) { + assert(ExtendKindMap.count(DU.NarrowDef) && + "Should already know the kind of extension used to widen NarrowDef"); + + // Stop traversing the def-use chain at inner-loop phis or post-loop phis. + if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) { + if (LI->getLoopFor(UsePhi->getParent()) != L) { + // For LCSSA phis, sink the truncate outside the loop. + // After SimplifyCFG most loop exit targets have a single predecessor. + // Otherwise fall back to a truncate within the loop. + if (UsePhi->getNumOperands() != 1) + truncateIVUse(DU, DT, LI); + else { + // Widening the PHI requires us to insert a trunc. The logical place + // for this trunc is in the same BB as the PHI. This is not possible if + // the BB is terminated by a catchswitch. + if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator())) + return nullptr; + + PHINode *WidePhi = + PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide", + UsePhi); + WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0)); + IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt()); + Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType()); + UsePhi->replaceAllUsesWith(Trunc); + DeadInsts.emplace_back(UsePhi); + LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to " + << *WidePhi << "\n"); + } + return nullptr; + } + } + + // This narrow use can be widened by a sext if it's non-negative or its narrow + // def was widended by a sext. Same for zext. + auto canWidenBySExt = [&]() { + return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended; + }; + auto canWidenByZExt = [&]() { + return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended; + }; + + // Our raison d'etre! Eliminate sign and zero extension. + if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) || + (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) { + Value *NewDef = DU.WideDef; + if (DU.NarrowUse->getType() != WideType) { + unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType()); + unsigned IVWidth = SE->getTypeSizeInBits(WideType); + if (CastWidth < IVWidth) { + // The cast isn't as wide as the IV, so insert a Trunc. + IRBuilder<> Builder(DU.NarrowUse); + NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType()); + } + else { + // A wider extend was hidden behind a narrower one. This may induce + // another round of IV widening in which the intermediate IV becomes + // dead. It should be very rare. + LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi + << " not wide enough to subsume " << *DU.NarrowUse + << "\n"); + DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); + NewDef = DU.NarrowUse; + } + } + if (NewDef != DU.NarrowUse) { + LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse + << " replaced by " << *DU.WideDef << "\n"); + ++NumElimExt; + DU.NarrowUse->replaceAllUsesWith(NewDef); + DeadInsts.emplace_back(DU.NarrowUse); + } + // Now that the extend is gone, we want to expose it's uses for potential + // further simplification. We don't need to directly inform SimplifyIVUsers + // of the new users, because their parent IV will be processed later as a + // new loop phi. If we preserved IVUsers analysis, we would also want to + // push the uses of WideDef here. + + // No further widening is needed. The deceased [sz]ext had done it for us. + return nullptr; + } + + // Does this user itself evaluate to a recurrence after widening? + WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU); + if (!WideAddRec.first) + WideAddRec = getWideRecurrence(DU); + + assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown)); + if (!WideAddRec.first) { + // If use is a loop condition, try to promote the condition instead of + // truncating the IV first. + if (widenLoopCompare(DU)) + return nullptr; + + // We are here about to generate a truncate instruction that may hurt + // performance because the scalar evolution expression computed earlier + // in WideAddRec.first does not indicate a polynomial induction expression. + // In that case, look at the operands of the use instruction to determine + // if we can still widen the use instead of truncating its operand. + if (widenWithVariantUse(DU)) + return nullptr; + + // This user does not evaluate to a recurrence after widening, so don't + // follow it. Instead insert a Trunc to kill off the original use, + // eventually isolating the original narrow IV so it can be removed. + truncateIVUse(DU, DT, LI); + return nullptr; + } + // Assume block terminators cannot evaluate to a recurrence. We can't to + // insert a Trunc after a terminator if there happens to be a critical edge. + assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() && + "SCEV is not expected to evaluate a block terminator"); + + // Reuse the IV increment that SCEVExpander created as long as it dominates + // NarrowUse. + Instruction *WideUse = nullptr; + if (WideAddRec.first == WideIncExpr && + Rewriter.hoistIVInc(WideInc, DU.NarrowUse)) + WideUse = WideInc; + else { + WideUse = cloneIVUser(DU, WideAddRec.first); + if (!WideUse) + return nullptr; + } + // Evaluation of WideAddRec ensured that the narrow expression could be + // extended outside the loop without overflow. This suggests that the wide use + // evaluates to the same expression as the extended narrow use, but doesn't + // absolutely guarantee it. Hence the following failsafe check. In rare cases + // where it fails, we simply throw away the newly created wide use. + if (WideAddRec.first != SE->getSCEV(WideUse)) { + LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": " + << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first + << "\n"); + DeadInsts.emplace_back(WideUse); + return nullptr; + } + + // if we reached this point then we are going to replace + // DU.NarrowUse with WideUse. Reattach DbgValue then. + replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT); + + ExtendKindMap[DU.NarrowUse] = WideAddRec.second; + // Returning WideUse pushes it on the worklist. + return WideUse; +} + +/// Add eligible users of NarrowDef to NarrowIVUsers. +void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) { + const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef); + bool NonNegativeDef = + SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV, + SE->getZero(NarrowSCEV->getType())); + for (User *U : NarrowDef->users()) { + Instruction *NarrowUser = cast<Instruction>(U); + + // Handle data flow merges and bizarre phi cycles. + if (!Widened.insert(NarrowUser).second) + continue; + + bool NonNegativeUse = false; + if (!NonNegativeDef) { + // We might have a control-dependent range information for this context. + if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser)) + NonNegativeUse = RangeInfo->getSignedMin().isNonNegative(); + } + + NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef, + NonNegativeDef || NonNegativeUse); + } +} + +/// Process a single induction variable. First use the SCEVExpander to create a +/// wide induction variable that evaluates to the same recurrence as the +/// original narrow IV. Then use a worklist to forward traverse the narrow IV's +/// def-use chain. After widenIVUse has processed all interesting IV users, the +/// narrow IV will be isolated for removal by DeleteDeadPHIs. +/// +/// It would be simpler to delete uses as they are processed, but we must avoid +/// invalidating SCEV expressions. +PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) { + // Is this phi an induction variable? + const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi)); + if (!AddRec) + return nullptr; + + // Widen the induction variable expression. + const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended + ? SE->getSignExtendExpr(AddRec, WideType) + : SE->getZeroExtendExpr(AddRec, WideType); + + assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType && + "Expect the new IV expression to preserve its type"); + + // Can the IV be extended outside the loop without overflow? + AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr); + if (!AddRec || AddRec->getLoop() != L) + return nullptr; + + // An AddRec must have loop-invariant operands. Since this AddRec is + // materialized by a loop header phi, the expression cannot have any post-loop + // operands, so they must dominate the loop header. + assert( + SE->properlyDominates(AddRec->getStart(), L->getHeader()) && + SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) && + "Loop header phi recurrence inputs do not dominate the loop"); + + // Iterate over IV uses (including transitive ones) looking for IV increments + // of the form 'add nsw %iv, <const>'. For each increment and each use of + // the increment calculate control-dependent range information basing on + // dominating conditions inside of the loop (e.g. a range check inside of the + // loop). Calculated ranges are stored in PostIncRangeInfos map. + // + // Control-dependent range information is later used to prove that a narrow + // definition is not negative (see pushNarrowIVUsers). It's difficult to do + // this on demand because when pushNarrowIVUsers needs this information some + // of the dominating conditions might be already widened. + if (UsePostIncrementRanges) + calculatePostIncRanges(OrigPhi); + + // The rewriter provides a value for the desired IV expression. This may + // either find an existing phi or materialize a new one. Either way, we + // expect a well-formed cyclic phi-with-increments. i.e. any operand not part + // of the phi-SCC dominates the loop entry. + Instruction *InsertPt = &*L->getHeader()->getFirstInsertionPt(); + Value *ExpandInst = Rewriter.expandCodeFor(AddRec, WideType, InsertPt); + // If the wide phi is not a phi node, for example a cast node, like bitcast, + // inttoptr, ptrtoint, just skip for now. + if (!(WidePhi = dyn_cast<PHINode>(ExpandInst))) { + // if the cast node is an inserted instruction without any user, we should + // remove it to make sure the pass don't touch the function as we can not + // wide the phi. + if (ExpandInst->hasNUses(0) && + Rewriter.isInsertedInstruction(cast<Instruction>(ExpandInst))) + DeadInsts.emplace_back(ExpandInst); + return nullptr; + } + + // Remembering the WideIV increment generated by SCEVExpander allows + // widenIVUse to reuse it when widening the narrow IV's increment. We don't + // employ a general reuse mechanism because the call above is the only call to + // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses. + if (BasicBlock *LatchBlock = L->getLoopLatch()) { + WideInc = + cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock)); + WideIncExpr = SE->getSCEV(WideInc); + // Propagate the debug location associated with the original loop increment + // to the new (widened) increment. + auto *OrigInc = + cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock)); + WideInc->setDebugLoc(OrigInc->getDebugLoc()); + } + + LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n"); + ++NumWidened; + + // Traverse the def-use chain using a worklist starting at the original IV. + assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" ); + + Widened.insert(OrigPhi); + pushNarrowIVUsers(OrigPhi, WidePhi); + + while (!NarrowIVUsers.empty()) { + WidenIV::NarrowIVDefUse DU = NarrowIVUsers.pop_back_val(); + + // Process a def-use edge. This may replace the use, so don't hold a + // use_iterator across it. + Instruction *WideUse = widenIVUse(DU, Rewriter); + + // Follow all def-use edges from the previous narrow use. + if (WideUse) + pushNarrowIVUsers(DU.NarrowUse, WideUse); + + // widenIVUse may have removed the def-use edge. + if (DU.NarrowDef->use_empty()) + DeadInsts.emplace_back(DU.NarrowDef); + } + + // Attach any debug information to the new PHI. + replaceAllDbgUsesWith(*OrigPhi, *WidePhi, *WidePhi, *DT); + + return WidePhi; +} + +/// Calculates control-dependent range for the given def at the given context +/// by looking at dominating conditions inside of the loop +void WidenIV::calculatePostIncRange(Instruction *NarrowDef, + Instruction *NarrowUser) { + using namespace llvm::PatternMatch; + + Value *NarrowDefLHS; + const APInt *NarrowDefRHS; + if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS), + m_APInt(NarrowDefRHS))) || + !NarrowDefRHS->isNonNegative()) + return; + + auto UpdateRangeFromCondition = [&] (Value *Condition, + bool TrueDest) { + CmpInst::Predicate Pred; + Value *CmpRHS; + if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS), + m_Value(CmpRHS)))) + return; + + CmpInst::Predicate P = + TrueDest ? Pred : CmpInst::getInversePredicate(Pred); + + auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS)); + auto CmpConstrainedLHSRange = + ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange); + auto NarrowDefRange = CmpConstrainedLHSRange.addWithNoWrap( + *NarrowDefRHS, OverflowingBinaryOperator::NoSignedWrap); + + updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange); + }; + + auto UpdateRangeFromGuards = [&](Instruction *Ctx) { + if (!HasGuards) + return; + + for (Instruction &I : make_range(Ctx->getIterator().getReverse(), + Ctx->getParent()->rend())) { + Value *C = nullptr; + if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C)))) + UpdateRangeFromCondition(C, /*TrueDest=*/true); + } + }; + + UpdateRangeFromGuards(NarrowUser); + + BasicBlock *NarrowUserBB = NarrowUser->getParent(); + // If NarrowUserBB is statically unreachable asking dominator queries may + // yield surprising results. (e.g. the block may not have a dom tree node) + if (!DT->isReachableFromEntry(NarrowUserBB)) + return; + + for (auto *DTB = (*DT)[NarrowUserBB]->getIDom(); + L->contains(DTB->getBlock()); + DTB = DTB->getIDom()) { + auto *BB = DTB->getBlock(); + auto *TI = BB->getTerminator(); + UpdateRangeFromGuards(TI); + + auto *BI = dyn_cast<BranchInst>(TI); + if (!BI || !BI->isConditional()) + continue; + + auto *TrueSuccessor = BI->getSuccessor(0); + auto *FalseSuccessor = BI->getSuccessor(1); + + auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) { + return BBE.isSingleEdge() && + DT->dominates(BBE, NarrowUser->getParent()); + }; + + if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor))) + UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true); + + if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor))) + UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false); + } +} + +/// Calculates PostIncRangeInfos map for the given IV +void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) { + SmallPtrSet<Instruction *, 16> Visited; + SmallVector<Instruction *, 6> Worklist; + Worklist.push_back(OrigPhi); + Visited.insert(OrigPhi); + + while (!Worklist.empty()) { + Instruction *NarrowDef = Worklist.pop_back_val(); + + for (Use &U : NarrowDef->uses()) { + auto *NarrowUser = cast<Instruction>(U.getUser()); + + // Don't go looking outside the current loop. + auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()]; + if (!NarrowUserLoop || !L->contains(NarrowUserLoop)) + continue; + + if (!Visited.insert(NarrowUser).second) + continue; + + Worklist.push_back(NarrowUser); + + calculatePostIncRange(NarrowDef, NarrowUser); + } + } +} + +PHINode *llvm::createWideIV(const WideIVInfo &WI, + LoopInfo *LI, ScalarEvolution *SE, SCEVExpander &Rewriter, + DominatorTree *DT, SmallVectorImpl<WeakTrackingVH> &DeadInsts, + unsigned &NumElimExt, unsigned &NumWidened, + bool HasGuards, bool UsePostIncrementRanges) { + WidenIV Widener(WI, LI, SE, DT, DeadInsts, HasGuards, UsePostIncrementRanges); + PHINode *WidePHI = Widener.createWideIV(Rewriter); + NumElimExt = Widener.getNumElimExt(); + NumWidened = Widener.getNumWidened(); + return WidePHI; +} diff --git a/contrib/libs/llvm12/lib/Transforms/Utils/SimplifyLibCalls.cpp b/contrib/libs/llvm12/lib/Transforms/Utils/SimplifyLibCalls.cpp index 7217405b70..f9a9dd237b 100644 --- a/contrib/libs/llvm12/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -541,8 +541,8 @@ Value *LibCallSimplifier::optimizeStrCpy(CallInst *CI, IRBuilderBase &B) { B.CreateMemCpy(Dst, Align(1), Src, Align(1), ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len)); NewCI->setAttributes(CI->getAttributes()); - NewCI->removeAttributes(AttributeList::ReturnIndex, - AttributeFuncs::typeIncompatible(NewCI->getType())); + NewCI->removeAttributes(AttributeList::ReturnIndex, + AttributeFuncs::typeIncompatible(NewCI->getType())); return Dst; } @@ -570,8 +570,8 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilderBase &B) { // copy for us. Make a memcpy to copy the nul byte with align = 1. CallInst *NewCI = B.CreateMemCpy(Dst, Align(1), Src, Align(1), LenV); NewCI->setAttributes(CI->getAttributes()); - NewCI->removeAttributes(AttributeList::ReturnIndex, - AttributeFuncs::typeIncompatible(NewCI->getType())); + NewCI->removeAttributes(AttributeList::ReturnIndex, + AttributeFuncs::typeIncompatible(NewCI->getType())); return DstEnd; } @@ -612,27 +612,27 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilderBase &B) { return Dst; } - // strncpy(a, "a", 4) - > memcpy(a, "a\0\0\0", 4) - if (Len > SrcLen + 1) { - if (Len <= 128) { - StringRef Str; - if (!getConstantStringInfo(Src, Str)) - return nullptr; - std::string SrcStr = Str.str(); - SrcStr.resize(Len, '\0'); - Src = B.CreateGlobalString(SrcStr, "str"); - } else { - return nullptr; - } - } + // strncpy(a, "a", 4) - > memcpy(a, "a\0\0\0", 4) + if (Len > SrcLen + 1) { + if (Len <= 128) { + StringRef Str; + if (!getConstantStringInfo(Src, Str)) + return nullptr; + std::string SrcStr = Str.str(); + SrcStr.resize(Len, '\0'); + Src = B.CreateGlobalString(SrcStr, "str"); + } else { + return nullptr; + } + } Type *PT = Callee->getFunctionType()->getParamType(0); // strncpy(x, s, c) -> memcpy(align 1 x, align 1 s, c) [s and c are constant] CallInst *NewCI = B.CreateMemCpy(Dst, Align(1), Src, Align(1), ConstantInt::get(DL.getIntPtrType(PT), Len)); NewCI->setAttributes(CI->getAttributes()); - NewCI->removeAttributes(AttributeList::ReturnIndex, - AttributeFuncs::typeIncompatible(NewCI->getType())); + NewCI->removeAttributes(AttributeList::ReturnIndex, + AttributeFuncs::typeIncompatible(NewCI->getType())); return Dst; } @@ -1108,8 +1108,8 @@ Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilderBase &B) { CallInst *NewCI = B.CreateMemCpy(CI->getArgOperand(0), Align(1), CI->getArgOperand(1), Align(1), Size); NewCI->setAttributes(CI->getAttributes()); - NewCI->removeAttributes(AttributeList::ReturnIndex, - AttributeFuncs::typeIncompatible(NewCI->getType())); + NewCI->removeAttributes(AttributeList::ReturnIndex, + AttributeFuncs::typeIncompatible(NewCI->getType())); return CI->getArgOperand(0); } @@ -1158,12 +1158,12 @@ Value *LibCallSimplifier::optimizeMemPCpy(CallInst *CI, IRBuilderBase &B) { // mempcpy(x, y, n) -> llvm.memcpy(align 1 x, align 1 y, n), x + n CallInst *NewCI = B.CreateMemCpy(Dst, Align(1), CI->getArgOperand(1), Align(1), N); - // Propagate attributes, but memcpy has no return value, so make sure that - // any return attributes are compliant. - // TODO: Attach return value attributes to the 1st operand to preserve them? + // Propagate attributes, but memcpy has no return value, so make sure that + // any return attributes are compliant. + // TODO: Attach return value attributes to the 1st operand to preserve them? NewCI->setAttributes(CI->getAttributes()); - NewCI->removeAttributes(AttributeList::ReturnIndex, - AttributeFuncs::typeIncompatible(NewCI->getType())); + NewCI->removeAttributes(AttributeList::ReturnIndex, + AttributeFuncs::typeIncompatible(NewCI->getType())); return B.CreateInBoundsGEP(B.getInt8Ty(), Dst, N); } @@ -1177,8 +1177,8 @@ Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilderBase &B) { CallInst *NewCI = B.CreateMemMove(CI->getArgOperand(0), Align(1), CI->getArgOperand(1), Align(1), Size); NewCI->setAttributes(CI->getAttributes()); - NewCI->removeAttributes(AttributeList::ReturnIndex, - AttributeFuncs::typeIncompatible(NewCI->getType())); + NewCI->removeAttributes(AttributeList::ReturnIndex, + AttributeFuncs::typeIncompatible(NewCI->getType())); return CI->getArgOperand(0); } @@ -1239,8 +1239,8 @@ Value *LibCallSimplifier::optimizeMemSet(CallInst *CI, IRBuilderBase &B) { Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false); CallInst *NewCI = B.CreateMemSet(CI->getArgOperand(0), Val, Size, Align(1)); NewCI->setAttributes(CI->getAttributes()); - NewCI->removeAttributes(AttributeList::ReturnIndex, - AttributeFuncs::typeIncompatible(NewCI->getType())); + NewCI->removeAttributes(AttributeList::ReturnIndex, + AttributeFuncs::typeIncompatible(NewCI->getType())); return CI->getArgOperand(0); } @@ -1653,14 +1653,14 @@ Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilderBase &B) { if (ExpoF->isNegative() && (!Pow->hasApproxFunc() && !Pow->hasAllowReassoc())) return nullptr; - // If we have a pow() library call (accesses memory) and we can't guarantee - // that the base is not an infinity, give up: - // pow(-Inf, 0.5) is optionally required to have a result of +Inf (not setting - // errno), but sqrt(-Inf) is required by various standards to set errno. - if (!Pow->doesNotAccessMemory() && !Pow->hasNoInfs() && - !isKnownNeverInfinity(Base, TLI)) - return nullptr; - + // If we have a pow() library call (accesses memory) and we can't guarantee + // that the base is not an infinity, give up: + // pow(-Inf, 0.5) is optionally required to have a result of +Inf (not setting + // errno), but sqrt(-Inf) is required by various standards to set errno. + if (!Pow->doesNotAccessMemory() && !Pow->hasNoInfs() && + !isKnownNeverInfinity(Base, TLI)) + return nullptr; + Sqrt = getSqrtCall(Base, Attrs, Pow->doesNotAccessMemory(), Mod, B, TLI); if (!Sqrt) return nullptr; @@ -1747,8 +1747,8 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilderBase &B) { // pow(x, n) -> x * x * x * ... const APFloat *ExpoF; - if (AllowApprox && match(Expo, m_APFloat(ExpoF)) && - !ExpoF->isExactlyValue(0.5) && !ExpoF->isExactlyValue(-0.5)) { + if (AllowApprox && match(Expo, m_APFloat(ExpoF)) && + !ExpoF->isExactlyValue(0.5) && !ExpoF->isExactlyValue(-0.5)) { // We limit to a max of 7 multiplications, thus the maximum exponent is 32. // If the exponent is an integer+0.5 we generate a call to sqrt and an // additional fmul. @@ -1774,8 +1774,8 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilderBase &B) { Sqrt = getSqrtCall(Base, Pow->getCalledFunction()->getAttributes(), Pow->doesNotAccessMemory(), M, B, TLI); - if (!Sqrt) - return nullptr; + if (!Sqrt) + return nullptr; } // We will memoize intermediate products of the Addition Chain. @@ -2199,7 +2199,7 @@ Value *LibCallSimplifier::optimizeSinCosPi(CallInst *CI, IRBuilderBase &B) { classifyArgUse(U, F, IsFloat, SinCalls, CosCalls, SinCosCalls); // It's only worthwhile if both sinpi and cospi are actually used. - if (SinCalls.empty() || CosCalls.empty()) + if (SinCalls.empty() || CosCalls.empty()) return nullptr; Value *Sin, *Cos, *SinCos; @@ -2225,7 +2225,7 @@ void LibCallSimplifier::classifyArgUse( SmallVectorImpl<CallInst *> &SinCosCalls) { CallInst *CI = dyn_cast<CallInst>(Val); - if (!CI || CI->use_empty()) + if (!CI || CI->use_empty()) return; // Don't consider calls in other functions. @@ -2522,30 +2522,30 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, if (!CI->getArgOperand(2)->getType()->isPointerTy()) return nullptr; - if (CI->use_empty()) - // sprintf(dest, "%s", str) -> strcpy(dest, str) - return emitStrCpy(CI->getArgOperand(0), CI->getArgOperand(2), B, TLI); - - uint64_t SrcLen = GetStringLength(CI->getArgOperand(2)); - if (SrcLen) { - B.CreateMemCpy( - CI->getArgOperand(0), Align(1), CI->getArgOperand(2), Align(1), - ConstantInt::get(DL.getIntPtrType(CI->getContext()), SrcLen)); - // Returns total number of characters written without null-character. - return ConstantInt::get(CI->getType(), SrcLen - 1); - } else if (Value *V = emitStpCpy(CI->getArgOperand(0), CI->getArgOperand(2), - B, TLI)) { - // sprintf(dest, "%s", str) -> stpcpy(dest, str) - dest - Value *PtrDiff = B.CreatePtrDiff(V, CI->getArgOperand(0)); - return B.CreateIntCast(PtrDiff, CI->getType(), false); - } - - bool OptForSize = CI->getFunction()->hasOptSize() || - llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI, - PGSOQueryType::IRPass); - if (OptForSize) - return nullptr; - + if (CI->use_empty()) + // sprintf(dest, "%s", str) -> strcpy(dest, str) + return emitStrCpy(CI->getArgOperand(0), CI->getArgOperand(2), B, TLI); + + uint64_t SrcLen = GetStringLength(CI->getArgOperand(2)); + if (SrcLen) { + B.CreateMemCpy( + CI->getArgOperand(0), Align(1), CI->getArgOperand(2), Align(1), + ConstantInt::get(DL.getIntPtrType(CI->getContext()), SrcLen)); + // Returns total number of characters written without null-character. + return ConstantInt::get(CI->getType(), SrcLen - 1); + } else if (Value *V = emitStpCpy(CI->getArgOperand(0), CI->getArgOperand(2), + B, TLI)) { + // sprintf(dest, "%s", str) -> stpcpy(dest, str) - dest + Value *PtrDiff = B.CreatePtrDiff(V, CI->getArgOperand(0)); + return B.CreateIntCast(PtrDiff, CI->getType(), false); + } + + bool OptForSize = CI->getFunction()->hasOptSize() || + llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI, + PGSOQueryType::IRPass); + if (OptForSize) + return nullptr; + Value *Len = emitStrLen(CI->getArgOperand(2), B, DL, TLI); if (!Len) return nullptr; @@ -3278,8 +3278,8 @@ Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI, B.CreateMemCpy(CI->getArgOperand(0), Align(1), CI->getArgOperand(1), Align(1), CI->getArgOperand(2)); NewCI->setAttributes(CI->getAttributes()); - NewCI->removeAttributes(AttributeList::ReturnIndex, - AttributeFuncs::typeIncompatible(NewCI->getType())); + NewCI->removeAttributes(AttributeList::ReturnIndex, + AttributeFuncs::typeIncompatible(NewCI->getType())); return CI->getArgOperand(0); } return nullptr; @@ -3292,8 +3292,8 @@ Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI, B.CreateMemMove(CI->getArgOperand(0), Align(1), CI->getArgOperand(1), Align(1), CI->getArgOperand(2)); NewCI->setAttributes(CI->getAttributes()); - NewCI->removeAttributes(AttributeList::ReturnIndex, - AttributeFuncs::typeIncompatible(NewCI->getType())); + NewCI->removeAttributes(AttributeList::ReturnIndex, + AttributeFuncs::typeIncompatible(NewCI->getType())); return CI->getArgOperand(0); } return nullptr; @@ -3308,29 +3308,29 @@ Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI, CallInst *NewCI = B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), Align(1)); NewCI->setAttributes(CI->getAttributes()); - NewCI->removeAttributes(AttributeList::ReturnIndex, - AttributeFuncs::typeIncompatible(NewCI->getType())); + NewCI->removeAttributes(AttributeList::ReturnIndex, + AttributeFuncs::typeIncompatible(NewCI->getType())); return CI->getArgOperand(0); } return nullptr; } -Value *FortifiedLibCallSimplifier::optimizeMemPCpyChk(CallInst *CI, - IRBuilderBase &B) { - const DataLayout &DL = CI->getModule()->getDataLayout(); - if (isFortifiedCallFoldable(CI, 3, 2)) - if (Value *Call = emitMemPCpy(CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2), B, DL, TLI)) { - CallInst *NewCI = cast<CallInst>(Call); - NewCI->setAttributes(CI->getAttributes()); - NewCI->removeAttributes( - AttributeList::ReturnIndex, - AttributeFuncs::typeIncompatible(NewCI->getType())); - return NewCI; - } - return nullptr; -} - +Value *FortifiedLibCallSimplifier::optimizeMemPCpyChk(CallInst *CI, + IRBuilderBase &B) { + const DataLayout &DL = CI->getModule()->getDataLayout(); + if (isFortifiedCallFoldable(CI, 3, 2)) + if (Value *Call = emitMemPCpy(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), B, DL, TLI)) { + CallInst *NewCI = cast<CallInst>(Call); + NewCI->setAttributes(CI->getAttributes()); + NewCI->removeAttributes( + AttributeList::ReturnIndex, + AttributeFuncs::typeIncompatible(NewCI->getType())); + return NewCI; + } + return nullptr; +} + Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI, IRBuilderBase &B, LibFunc Func) { @@ -3411,7 +3411,7 @@ Value *FortifiedLibCallSimplifier::optimizeMemCCpyChk(CallInst *CI, Value *FortifiedLibCallSimplifier::optimizeSNPrintfChk(CallInst *CI, IRBuilderBase &B) { if (isFortifiedCallFoldable(CI, 3, 1, None, 2)) { - SmallVector<Value *, 8> VariadicArgs(drop_begin(CI->args(), 5)); + SmallVector<Value *, 8> VariadicArgs(drop_begin(CI->args(), 5)); return emitSNPrintf(CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4), VariadicArgs, B, TLI); } @@ -3422,7 +3422,7 @@ Value *FortifiedLibCallSimplifier::optimizeSNPrintfChk(CallInst *CI, Value *FortifiedLibCallSimplifier::optimizeSPrintfChk(CallInst *CI, IRBuilderBase &B) { if (isFortifiedCallFoldable(CI, 2, None, None, 1)) { - SmallVector<Value *, 8> VariadicArgs(drop_begin(CI->args(), 4)); + SmallVector<Value *, 8> VariadicArgs(drop_begin(CI->args(), 4)); return emitSPrintf(CI->getArgOperand(0), CI->getArgOperand(3), VariadicArgs, B, TLI); } @@ -3520,8 +3520,8 @@ Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI, switch (Func) { case LibFunc_memcpy_chk: return optimizeMemCpyChk(CI, Builder); - case LibFunc_mempcpy_chk: - return optimizeMemPCpyChk(CI, Builder); + case LibFunc_mempcpy_chk: + return optimizeMemPCpyChk(CI, Builder); case LibFunc_memmove_chk: return optimizeMemMoveChk(CI, Builder); case LibFunc_memset_chk: diff --git a/contrib/libs/llvm12/lib/Transforms/Utils/StripGCRelocates.cpp b/contrib/libs/llvm12/lib/Transforms/Utils/StripGCRelocates.cpp index 8ee1612931..1fa574f04c 100644 --- a/contrib/libs/llvm12/lib/Transforms/Utils/StripGCRelocates.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Utils/StripGCRelocates.cpp @@ -13,7 +13,7 @@ // present. //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Utils/StripGCRelocates.h" +#include "llvm/Transforms/Utils/StripGCRelocates.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" @@ -25,7 +25,7 @@ using namespace llvm; -static bool stripGCRelocates(Function &F) { +static bool stripGCRelocates(Function &F) { // Nothing to do for declarations. if (F.isDeclaration()) return false; @@ -57,32 +57,32 @@ static bool stripGCRelocates(Function &F) { return !GCRelocates.empty(); } -PreservedAnalyses StripGCRelocates::run(Function &F, - FunctionAnalysisManager &AM) { - if (!stripGCRelocates(F)) - return PreservedAnalyses::all(); - - // Removing gc.relocate preserves the CFG, but most other analysis probably - // need to re-run. - PreservedAnalyses PA; - PA.preserveSet<CFGAnalyses>(); - return PA; -} - -namespace { -struct StripGCRelocatesLegacy : public FunctionPass { - static char ID; // Pass identification, replacement for typeid - StripGCRelocatesLegacy() : FunctionPass(ID) { - initializeStripGCRelocatesLegacyPass(*PassRegistry::getPassRegistry()); - } - - void getAnalysisUsage(AnalysisUsage &Info) const override {} - - bool runOnFunction(Function &F) override { return ::stripGCRelocates(F); } -}; -char StripGCRelocatesLegacy::ID = 0; -} // namespace - -INITIALIZE_PASS(StripGCRelocatesLegacy, "strip-gc-relocates", +PreservedAnalyses StripGCRelocates::run(Function &F, + FunctionAnalysisManager &AM) { + if (!stripGCRelocates(F)) + return PreservedAnalyses::all(); + + // Removing gc.relocate preserves the CFG, but most other analysis probably + // need to re-run. + PreservedAnalyses PA; + PA.preserveSet<CFGAnalyses>(); + return PA; +} + +namespace { +struct StripGCRelocatesLegacy : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + StripGCRelocatesLegacy() : FunctionPass(ID) { + initializeStripGCRelocatesLegacyPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &Info) const override {} + + bool runOnFunction(Function &F) override { return ::stripGCRelocates(F); } +}; +char StripGCRelocatesLegacy::ID = 0; +} // namespace + +INITIALIZE_PASS(StripGCRelocatesLegacy, "strip-gc-relocates", "Strip gc.relocates inserted through RewriteStatepointsForGC", true, false) diff --git a/contrib/libs/llvm12/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp b/contrib/libs/llvm12/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp index 5b88ffa97a..10fda4df51 100644 --- a/contrib/libs/llvm12/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Utils/StripNonLineTableDebugInfo.h" +#include "llvm/Transforms/Utils/StripNonLineTableDebugInfo.h" #include "llvm/IR/DebugInfo.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" @@ -18,11 +18,11 @@ namespace { /// This pass strips all debug info that is not related line tables. /// The result will be the same as if the program where compiled with /// -gline-tables-only. -struct StripNonLineTableDebugLegacyPass : public ModulePass { +struct StripNonLineTableDebugLegacyPass : public ModulePass { static char ID; // Pass identification, replacement for typeid - StripNonLineTableDebugLegacyPass() : ModulePass(ID) { - initializeStripNonLineTableDebugLegacyPassPass( - *PassRegistry::getPassRegistry()); + StripNonLineTableDebugLegacyPass() : ModulePass(ID) { + initializeStripNonLineTableDebugLegacyPassPass( + *PassRegistry::getPassRegistry()); } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -35,17 +35,17 @@ struct StripNonLineTableDebugLegacyPass : public ModulePass { }; } -char StripNonLineTableDebugLegacyPass::ID = 0; -INITIALIZE_PASS(StripNonLineTableDebugLegacyPass, - "strip-nonlinetable-debuginfo", +char StripNonLineTableDebugLegacyPass::ID = 0; +INITIALIZE_PASS(StripNonLineTableDebugLegacyPass, + "strip-nonlinetable-debuginfo", "Strip all debug info except linetables", false, false) -ModulePass *llvm::createStripNonLineTableDebugLegacyPass() { - return new StripNonLineTableDebugLegacyPass(); +ModulePass *llvm::createStripNonLineTableDebugLegacyPass() { + return new StripNonLineTableDebugLegacyPass(); +} + +PreservedAnalyses +StripNonLineTableDebugInfoPass::run(Module &M, ModuleAnalysisManager &AM) { + llvm::stripNonLineTableDebugInfo(M); + return PreservedAnalyses::all(); } - -PreservedAnalyses -StripNonLineTableDebugInfoPass::run(Module &M, ModuleAnalysisManager &AM) { - llvm::stripNonLineTableDebugInfo(M); - return PreservedAnalyses::all(); -} diff --git a/contrib/libs/llvm12/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/contrib/libs/llvm12/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp index 70187f6fce..3631733713 100644 --- a/contrib/libs/llvm12/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// // -// This pass is used to ensure that functions have at most one return and one -// unreachable instruction in them. +// This pass is used to ensure that functions have at most one return and one +// unreachable instruction in them. // //===----------------------------------------------------------------------===// @@ -20,66 +20,66 @@ #include "llvm/Transforms/Utils.h" using namespace llvm; -char UnifyFunctionExitNodesLegacyPass::ID = 0; +char UnifyFunctionExitNodesLegacyPass::ID = 0; -UnifyFunctionExitNodesLegacyPass::UnifyFunctionExitNodesLegacyPass() - : FunctionPass(ID) { - initializeUnifyFunctionExitNodesLegacyPassPass( - *PassRegistry::getPassRegistry()); +UnifyFunctionExitNodesLegacyPass::UnifyFunctionExitNodesLegacyPass() + : FunctionPass(ID) { + initializeUnifyFunctionExitNodesLegacyPassPass( + *PassRegistry::getPassRegistry()); } -INITIALIZE_PASS(UnifyFunctionExitNodesLegacyPass, "mergereturn", +INITIALIZE_PASS(UnifyFunctionExitNodesLegacyPass, "mergereturn", "Unify function exit nodes", false, false) Pass *llvm::createUnifyFunctionExitNodesPass() { - return new UnifyFunctionExitNodesLegacyPass(); + return new UnifyFunctionExitNodesLegacyPass(); } -void UnifyFunctionExitNodesLegacyPass::getAnalysisUsage( - AnalysisUsage &AU) const { +void UnifyFunctionExitNodesLegacyPass::getAnalysisUsage( + AnalysisUsage &AU) const { // We preserve the non-critical-edgeness property AU.addPreservedID(BreakCriticalEdgesID); // This is a cluster of orthogonal Transforms AU.addPreservedID(LowerSwitchID); } -namespace { - -bool unifyUnreachableBlocks(Function &F) { - std::vector<BasicBlock *> UnreachableBlocks; - +namespace { + +bool unifyUnreachableBlocks(Function &F) { + std::vector<BasicBlock *> UnreachableBlocks; + for (BasicBlock &I : F) - if (isa<UnreachableInst>(I.getTerminator())) + if (isa<UnreachableInst>(I.getTerminator())) UnreachableBlocks.push_back(&I); - if (UnreachableBlocks.size() <= 1) - return false; + if (UnreachableBlocks.size() <= 1) + return false; + + BasicBlock *UnreachableBlock = + BasicBlock::Create(F.getContext(), "UnifiedUnreachableBlock", &F); + new UnreachableInst(F.getContext(), UnreachableBlock); - BasicBlock *UnreachableBlock = - BasicBlock::Create(F.getContext(), "UnifiedUnreachableBlock", &F); - new UnreachableInst(F.getContext(), UnreachableBlock); - - for (BasicBlock *BB : UnreachableBlocks) { - BB->getInstList().pop_back(); // Remove the unreachable inst. - BranchInst::Create(UnreachableBlock, BB); + for (BasicBlock *BB : UnreachableBlocks) { + BB->getInstList().pop_back(); // Remove the unreachable inst. + BranchInst::Create(UnreachableBlock, BB); } - return true; -} - -bool unifyReturnBlocks(Function &F) { - std::vector<BasicBlock *> ReturningBlocks; - - for (BasicBlock &I : F) - if (isa<ReturnInst>(I.getTerminator())) - ReturningBlocks.push_back(&I); - - if (ReturningBlocks.size() <= 1) + return true; +} + +bool unifyReturnBlocks(Function &F) { + std::vector<BasicBlock *> ReturningBlocks; + + for (BasicBlock &I : F) + if (isa<ReturnInst>(I.getTerminator())) + ReturningBlocks.push_back(&I); + + if (ReturningBlocks.size() <= 1) return false; - // Insert a new basic block into the function, add PHI nodes (if the function - // returns values), and convert all of the return instructions into - // unconditional branches. + // Insert a new basic block into the function, add PHI nodes (if the function + // returns values), and convert all of the return instructions into + // unconditional branches. BasicBlock *NewRetBlock = BasicBlock::Create(F.getContext(), "UnifiedReturnBlock", &F); @@ -105,25 +105,25 @@ bool unifyReturnBlocks(Function &F) { BB->getInstList().pop_back(); // Remove the return insn BranchInst::Create(NewRetBlock, BB); } - + return true; } -} // namespace - -// Unify all exit nodes of the CFG by creating a new BasicBlock, and converting -// all returns to unconditional branches to this new basic block. Also, unify -// all unreachable blocks. -bool UnifyFunctionExitNodesLegacyPass::runOnFunction(Function &F) { - bool Changed = false; - Changed |= unifyUnreachableBlocks(F); - Changed |= unifyReturnBlocks(F); - return Changed; -} - -PreservedAnalyses UnifyFunctionExitNodesPass::run(Function &F, - FunctionAnalysisManager &AM) { - bool Changed = false; - Changed |= unifyUnreachableBlocks(F); - Changed |= unifyReturnBlocks(F); - return Changed ? PreservedAnalyses() : PreservedAnalyses::all(); -} +} // namespace + +// Unify all exit nodes of the CFG by creating a new BasicBlock, and converting +// all returns to unconditional branches to this new basic block. Also, unify +// all unreachable blocks. +bool UnifyFunctionExitNodesLegacyPass::runOnFunction(Function &F) { + bool Changed = false; + Changed |= unifyUnreachableBlocks(F); + Changed |= unifyReturnBlocks(F); + return Changed; +} + +PreservedAnalyses UnifyFunctionExitNodesPass::run(Function &F, + FunctionAnalysisManager &AM) { + bool Changed = false; + Changed |= unifyUnreachableBlocks(F); + Changed |= unifyReturnBlocks(F); + return Changed ? PreservedAnalyses() : PreservedAnalyses::all(); +} diff --git a/contrib/libs/llvm12/lib/Transforms/Utils/UnifyLoopExits.cpp b/contrib/libs/llvm12/lib/Transforms/Utils/UnifyLoopExits.cpp index 1bca6040af..0b718ed613 100644 --- a/contrib/libs/llvm12/lib/Transforms/Utils/UnifyLoopExits.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Utils/UnifyLoopExits.cpp @@ -16,8 +16,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Utils/UnifyLoopExits.h" -#include "llvm/ADT/MapVector.h" +#include "llvm/Transforms/Utils/UnifyLoopExits.h" +#include "llvm/ADT/MapVector.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/InitializePasses.h" @@ -29,10 +29,10 @@ using namespace llvm; namespace { -struct UnifyLoopExitsLegacyPass : public FunctionPass { +struct UnifyLoopExitsLegacyPass : public FunctionPass { static char ID; - UnifyLoopExitsLegacyPass() : FunctionPass(ID) { - initializeUnifyLoopExitsLegacyPassPass(*PassRegistry::getPassRegistry()); + UnifyLoopExitsLegacyPass() : FunctionPass(ID) { + initializeUnifyLoopExitsLegacyPassPass(*PassRegistry::getPassRegistry()); } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -48,19 +48,19 @@ struct UnifyLoopExitsLegacyPass : public FunctionPass { }; } // namespace -char UnifyLoopExitsLegacyPass::ID = 0; +char UnifyLoopExitsLegacyPass::ID = 0; -FunctionPass *llvm::createUnifyLoopExitsPass() { - return new UnifyLoopExitsLegacyPass(); -} +FunctionPass *llvm::createUnifyLoopExitsPass() { + return new UnifyLoopExitsLegacyPass(); +} -INITIALIZE_PASS_BEGIN(UnifyLoopExitsLegacyPass, "unify-loop-exits", +INITIALIZE_PASS_BEGIN(UnifyLoopExitsLegacyPass, "unify-loop-exits", "Fixup each natural loop to have a single exit block", false /* Only looks at CFG */, false /* Analysis Pass */) -INITIALIZE_PASS_DEPENDENCY(LowerSwitchLegacyPass) +INITIALIZE_PASS_DEPENDENCY(LowerSwitchLegacyPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) -INITIALIZE_PASS_END(UnifyLoopExitsLegacyPass, "unify-loop-exits", +INITIALIZE_PASS_END(UnifyLoopExitsLegacyPass, "unify-loop-exits", "Fixup each natural loop to have a single exit block", false /* Only looks at CFG */, false /* Analysis Pass */) @@ -84,7 +84,7 @@ static void restoreSSA(const DominatorTree &DT, const Loop *L, const SetVector<BasicBlock *> &Incoming, BasicBlock *LoopExitBlock) { using InstVector = SmallVector<Instruction *, 8>; - using IIMap = MapVector<Instruction *, InstVector>; + using IIMap = MapVector<Instruction *, InstVector>; IIMap ExternalUsers; for (auto BB : L->blocks()) { for (auto &I : *BB) { @@ -207,7 +207,7 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) { return true; } -static bool runImpl(LoopInfo &LI, DominatorTree &DT) { +static bool runImpl(LoopInfo &LI, DominatorTree &DT) { bool Changed = false; auto Loops = LI.getLoopsInPreorder(); @@ -218,28 +218,28 @@ static bool runImpl(LoopInfo &LI, DominatorTree &DT) { } return Changed; } - -bool UnifyLoopExitsLegacyPass::runOnFunction(Function &F) { - LLVM_DEBUG(dbgs() << "===== Unifying loop exits in function " << F.getName() - << "\n"); - auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); - auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - - return runImpl(LI, DT); -} - -namespace llvm { - -PreservedAnalyses UnifyLoopExitsPass::run(Function &F, - FunctionAnalysisManager &AM) { - auto &LI = AM.getResult<LoopAnalysis>(F); - auto &DT = AM.getResult<DominatorTreeAnalysis>(F); - - if (!runImpl(LI, DT)) - return PreservedAnalyses::all(); - PreservedAnalyses PA; - PA.preserve<LoopAnalysis>(); - PA.preserve<DominatorTreeAnalysis>(); - return PA; -} -} // namespace llvm + +bool UnifyLoopExitsLegacyPass::runOnFunction(Function &F) { + LLVM_DEBUG(dbgs() << "===== Unifying loop exits in function " << F.getName() + << "\n"); + auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); + auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + + return runImpl(LI, DT); +} + +namespace llvm { + +PreservedAnalyses UnifyLoopExitsPass::run(Function &F, + FunctionAnalysisManager &AM) { + auto &LI = AM.getResult<LoopAnalysis>(F); + auto &DT = AM.getResult<DominatorTreeAnalysis>(F); + + if (!runImpl(LI, DT)) + return PreservedAnalyses::all(); + PreservedAnalyses PA; + PA.preserve<LoopAnalysis>(); + PA.preserve<DominatorTreeAnalysis>(); + return PA; +} +} // namespace llvm diff --git a/contrib/libs/llvm12/lib/Transforms/Utils/UniqueInternalLinkageNames.cpp b/contrib/libs/llvm12/lib/Transforms/Utils/UniqueInternalLinkageNames.cpp index fa87b7384d..c57cec6be6 100644 --- a/contrib/libs/llvm12/lib/Transforms/Utils/UniqueInternalLinkageNames.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Utils/UniqueInternalLinkageNames.cpp @@ -13,11 +13,11 @@ #include "llvm/Transforms/Utils/UniqueInternalLinkageNames.h" #include "llvm/ADT/SmallString.h" -#include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" -#include "llvm/Support/CommandLine.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/MD5.h" #include "llvm/Transforms/Utils/ModuleUtils.h" @@ -30,31 +30,31 @@ static bool uniqueifyInternalLinkageNames(Module &M) { Md5.final(R); SmallString<32> Str; llvm::MD5::stringifyResult(R, Str); - // Convert MD5hash to Decimal. Demangler suffixes can either contain numbers - // or characters but not both. - APInt IntHash = APInt(128, Str.str(), 16); - // Prepend "__uniq" before the hash for tools like profilers to understand that - // this symbol is of internal linkage type. - std::string ModuleNameHash = (Twine(".__uniq.") + Twine(IntHash.toString(10, false))).str(); + // Convert MD5hash to Decimal. Demangler suffixes can either contain numbers + // or characters but not both. + APInt IntHash = APInt(128, Str.str(), 16); + // Prepend "__uniq" before the hash for tools like profilers to understand that + // this symbol is of internal linkage type. + std::string ModuleNameHash = (Twine(".__uniq.") + Twine(IntHash.toString(10, false))).str(); bool Changed = false; - MDBuilder MDB(M.getContext()); + MDBuilder MDB(M.getContext()); // Append the module hash to all internal linkage functions. for (auto &F : M) { if (F.hasInternalLinkage()) { F.setName(F.getName() + ModuleNameHash); - F.addFnAttr("sample-profile-suffix-elision-policy", "selected"); - // Replace linkage names in the debug metadata. - if (DISubprogram *SP = F.getSubprogram()) { - if (SP->getRawLinkageName()) { - auto *Name = MDB.createString(F.getName()); - SP->replaceRawLinkageName(Name); - if (DISubprogram *SPDecl = SP->getDeclaration()) { - if (SPDecl->getRawLinkageName()) - SPDecl->replaceRawLinkageName(Name); - } - } - } + F.addFnAttr("sample-profile-suffix-elision-policy", "selected"); + // Replace linkage names in the debug metadata. + if (DISubprogram *SP = F.getSubprogram()) { + if (SP->getRawLinkageName()) { + auto *Name = MDB.createString(F.getName()); + SP->replaceRawLinkageName(Name); + if (DISubprogram *SPDecl = SP->getDeclaration()) { + if (SPDecl->getRawLinkageName()) + SPDecl->replaceRawLinkageName(Name); + } + } + } Changed = true; } } diff --git a/contrib/libs/llvm12/lib/Transforms/Utils/Utils.cpp b/contrib/libs/llvm12/lib/Transforms/Utils/Utils.cpp index 6137c3092b..73c0532f3f 100644 --- a/contrib/libs/llvm12/lib/Transforms/Utils/Utils.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Utils/Utils.cpp @@ -34,17 +34,17 @@ void llvm::initializeTransformUtils(PassRegistry &Registry) { initializeLibCallsShrinkWrapLegacyPassPass(Registry); initializeLoopSimplifyPass(Registry); initializeLowerInvokeLegacyPassPass(Registry); - initializeLowerSwitchLegacyPassPass(Registry); + initializeLowerSwitchLegacyPassPass(Registry); initializeNameAnonGlobalLegacyPassPass(Registry); initializePromoteLegacyPassPass(Registry); - initializeStripNonLineTableDebugLegacyPassPass(Registry); - initializeUnifyFunctionExitNodesLegacyPassPass(Registry); + initializeStripNonLineTableDebugLegacyPassPass(Registry); + initializeUnifyFunctionExitNodesLegacyPassPass(Registry); initializeMetaRenamerPass(Registry); - initializeStripGCRelocatesLegacyPass(Registry); + initializeStripGCRelocatesLegacyPass(Registry); initializePredicateInfoPrinterLegacyPassPass(Registry); initializeInjectTLIMappingsLegacyPass(Registry); initializeFixIrreduciblePass(Registry); - initializeUnifyLoopExitsLegacyPassPass(Registry); + initializeUnifyLoopExitsLegacyPassPass(Registry); initializeUniqueInternalLinkageNamesLegacyPassPass(Registry); } diff --git a/contrib/libs/llvm12/lib/Transforms/Utils/VNCoercion.cpp b/contrib/libs/llvm12/lib/Transforms/Utils/VNCoercion.cpp index aa9c96db83..61cd8595a7 100644 --- a/contrib/libs/llvm12/lib/Transforms/Utils/VNCoercion.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Utils/VNCoercion.cpp @@ -17,7 +17,7 @@ static bool isFirstClassAggregateOrScalableType(Type *Ty) { bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy, const DataLayout &DL) { Type *StoredTy = StoredVal->getType(); - + if (StoredTy == LoadTy) return true; @@ -37,29 +37,29 @@ bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy, if (StoreSize < DL.getTypeSizeInBits(LoadTy).getFixedSize()) return false; - bool StoredNI = DL.isNonIntegralPointerType(StoredTy->getScalarType()); - bool LoadNI = DL.isNonIntegralPointerType(LoadTy->getScalarType()); + bool StoredNI = DL.isNonIntegralPointerType(StoredTy->getScalarType()); + bool LoadNI = DL.isNonIntegralPointerType(LoadTy->getScalarType()); // Don't coerce non-integral pointers to integers or vice versa. - if (StoredNI != LoadNI) { + if (StoredNI != LoadNI) { // As a special case, allow coercion of memset used to initialize // an array w/null. Despite non-integral pointers not generally having a // specific bit pattern, we do assume null is zero. if (auto *CI = dyn_cast<Constant>(StoredVal)) return CI->isNullValue(); return false; - } else if (StoredNI && LoadNI && - StoredTy->getPointerAddressSpace() != - LoadTy->getPointerAddressSpace()) { - return false; + } else if (StoredNI && LoadNI && + StoredTy->getPointerAddressSpace() != + LoadTy->getPointerAddressSpace()) { + return false; } - - - // The implementation below uses inttoptr for vectors of unequal size; we - // can't allow this for non integral pointers. We could teach it to extract - // exact subvectors if desired. - if (StoredNI && StoreSize != DL.getTypeSizeInBits(LoadTy).getFixedSize()) - return false; - + + + // The implementation below uses inttoptr for vectors of unequal size; we + // can't allow this for non integral pointers. We could teach it to extract + // exact subvectors if desired. + if (StoredNI && StoreSize != DL.getTypeSizeInBits(LoadTy).getFixedSize()) + return false; + return true; } @@ -236,8 +236,8 @@ int analyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr, if (isFirstClassAggregateOrScalableType(StoredVal->getType())) return -1; - if (!canCoerceMustAliasedValueToLoad(StoredVal, LoadTy, DL)) - return -1; + if (!canCoerceMustAliasedValueToLoad(StoredVal, LoadTy, DL)) + return -1; Value *StorePtr = DepSI->getPointerOperand(); uint64_t StoreSize = @@ -340,7 +340,7 @@ int analyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr, LoadInst *DepLI, if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy()) return -1; - if (!canCoerceMustAliasedValueToLoad(DepLI, LoadTy, DL)) + if (!canCoerceMustAliasedValueToLoad(DepLI, LoadTy, DL)) return -1; Value *DepPtr = DepLI->getPointerOperand(); @@ -398,7 +398,7 @@ int analyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr, if (!Src) return -1; - GlobalVariable *GV = dyn_cast<GlobalVariable>(getUnderlyingObject(Src)); + GlobalVariable *GV = dyn_cast<GlobalVariable>(getUnderlyingObject(Src)); if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer()) return -1; diff --git a/contrib/libs/llvm12/lib/Transforms/Utils/ValueMapper.cpp b/contrib/libs/llvm12/lib/Transforms/Utils/ValueMapper.cpp index 43e0439534..930e0b7ee0 100644 --- a/contrib/libs/llvm12/lib/Transforms/Utils/ValueMapper.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Utils/ValueMapper.cpp @@ -819,15 +819,15 @@ void Mapper::flush() { break; case WorklistEntry::MapAppendingVar: { unsigned PrefixSize = AppendingInits.size() - E.AppendingGVNumNewMembers; - // mapAppendingVariable call can change AppendingInits if initalizer for - // the variable depends on another appending global, because of that inits - // need to be extracted and updated before the call. - SmallVector<Constant *, 8> NewInits( - drop_begin(AppendingInits, PrefixSize)); - AppendingInits.resize(PrefixSize); + // mapAppendingVariable call can change AppendingInits if initalizer for + // the variable depends on another appending global, because of that inits + // need to be extracted and updated before the call. + SmallVector<Constant *, 8> NewInits( + drop_begin(AppendingInits, PrefixSize)); + AppendingInits.resize(PrefixSize); mapAppendingVariable(*E.Data.AppendingGV.GV, E.Data.AppendingGV.InitPrefix, - E.AppendingGVIsOldCtorDtor, makeArrayRef(NewInits)); + E.AppendingGVIsOldCtorDtor, makeArrayRef(NewInits)); break; } case WorklistEntry::MapGlobalIndirectSymbol: @@ -901,13 +901,13 @@ void Mapper::remapInstruction(Instruction *I) { LLVMContext &C = CB->getContext(); AttributeList Attrs = CB->getAttributes(); for (unsigned i = 0; i < Attrs.getNumAttrSets(); ++i) { - for (Attribute::AttrKind TypedAttr : - {Attribute::ByVal, Attribute::StructRet, Attribute::ByRef}) { - if (Type *Ty = Attrs.getAttribute(i, TypedAttr).getValueAsType()) { - Attrs = Attrs.replaceAttributeType(C, i, TypedAttr, - TypeMapper->remapType(Ty)); - break; - } + for (Attribute::AttrKind TypedAttr : + {Attribute::ByVal, Attribute::StructRet, Attribute::ByRef}) { + if (Type *Ty = Attrs.getAttribute(i, TypedAttr).getValueAsType()) { + Attrs = Attrs.replaceAttributeType(C, i, TypedAttr, + TypeMapper->remapType(Ty)); + break; + } } } CB->setAttributes(Attrs); diff --git a/contrib/libs/llvm12/lib/Transforms/Utils/ya.make b/contrib/libs/llvm12/lib/Transforms/Utils/ya.make index 35e2cfb7f1..c07d5d6db6 100644 --- a/contrib/libs/llvm12/lib/Transforms/Utils/ya.make +++ b/contrib/libs/llvm12/lib/Transforms/Utils/ya.make @@ -12,11 +12,11 @@ LICENSE(Apache-2.0 WITH LLVM-exception) LICENSE_TEXTS(.yandex_meta/licenses.list.txt) PEERDIR( - contrib/libs/llvm12 - contrib/libs/llvm12/include - contrib/libs/llvm12/lib/Analysis - contrib/libs/llvm12/lib/IR - contrib/libs/llvm12/lib/Support + contrib/libs/llvm12 + contrib/libs/llvm12/include + contrib/libs/llvm12/lib/Analysis + contrib/libs/llvm12/lib/IR + contrib/libs/llvm12/lib/Support ) ADDINCL( @@ -63,7 +63,7 @@ SRCS( LCSSA.cpp LibCallsShrinkWrap.cpp Local.cpp - LoopPeel.cpp + LoopPeel.cpp LoopRotationUtils.cpp LoopSimplify.cpp LoopUnroll.cpp @@ -74,7 +74,7 @@ SRCS( LowerInvoke.cpp LowerMemIntrinsics.cpp LowerSwitch.cpp - MatrixUtils.cpp + MatrixUtils.cpp Mem2Reg.cpp MetaRenamer.cpp ModuleUtils.cpp |