diff options
author | orivej <orivej@yandex-team.ru> | 2022-02-10 16:45:01 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:45:01 +0300 |
commit | 2d37894b1b037cf24231090eda8589bbb44fb6fc (patch) | |
tree | be835aa92c6248212e705f25388ebafcf84bc7a1 /contrib/libs/llvm12/lib/Transforms/Utils/BypassSlowDivision.cpp | |
parent | 718c552901d703c502ccbefdfc3c9028d608b947 (diff) | |
download | ydb-2d37894b1b037cf24231090eda8589bbb44fb6fc.tar.gz |
Restoring authorship annotation for <orivej@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/libs/llvm12/lib/Transforms/Utils/BypassSlowDivision.cpp')
-rw-r--r-- | contrib/libs/llvm12/lib/Transforms/Utils/BypassSlowDivision.cpp | 964 |
1 files changed, 482 insertions, 482 deletions
diff --git a/contrib/libs/llvm12/lib/Transforms/Utils/BypassSlowDivision.cpp b/contrib/libs/llvm12/lib/Transforms/Utils/BypassSlowDivision.cpp index 4299153e7b..833d042106 100644 --- a/contrib/libs/llvm12/lib/Transforms/Utils/BypassSlowDivision.cpp +++ b/contrib/libs/llvm12/lib/Transforms/Utils/BypassSlowDivision.cpp @@ -1,482 +1,482 @@ -//===- BypassSlowDivision.cpp - Bypass slow division ----------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file contains an optimization for div and rem on architectures that -// execute short instructions significantly faster than longer instructions. -// For example, on Intel Atom 32-bit divides are slow enough that during -// runtime it is profitable to check the value of the operands, and if they are -// positive and less than 256 use an unsigned 8-bit divide. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Utils/BypassSlowDivision.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/None.h" -#include "llvm/ADT/Optional.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Instruction.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Type.h" -#include "llvm/IR/Value.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/KnownBits.h" -#include <cassert> -#include <cstdint> - -using namespace llvm; - -#define DEBUG_TYPE "bypass-slow-division" - -namespace { - - struct QuotRemPair { - Value *Quotient; - Value *Remainder; - - QuotRemPair(Value *InQuotient, Value *InRemainder) - : Quotient(InQuotient), Remainder(InRemainder) {} - }; - - /// A quotient and remainder, plus a BB from which they logically "originate". - /// If you use Quotient or Remainder in a Phi node, you should use BB as its - /// corresponding predecessor. - struct QuotRemWithBB { - BasicBlock *BB = nullptr; - Value *Quotient = nullptr; - Value *Remainder = nullptr; - }; - -using DivCacheTy = DenseMap<DivRemMapKey, QuotRemPair>; -using BypassWidthsTy = DenseMap<unsigned, unsigned>; -using VisitedSetTy = SmallPtrSet<Instruction *, 4>; - -enum ValueRange { - /// Operand definitely fits into BypassType. No runtime checks are needed. - VALRNG_KNOWN_SHORT, - /// A runtime check is required, as value range is unknown. - VALRNG_UNKNOWN, - /// Operand is unlikely to fit into BypassType. The bypassing should be - /// disabled. - VALRNG_LIKELY_LONG -}; - -class FastDivInsertionTask { - bool IsValidTask = false; - Instruction *SlowDivOrRem = nullptr; - IntegerType *BypassType = nullptr; - BasicBlock *MainBB = nullptr; - - bool isHashLikeValue(Value *V, VisitedSetTy &Visited); - ValueRange getValueRange(Value *Op, VisitedSetTy &Visited); - QuotRemWithBB createSlowBB(BasicBlock *Successor); - QuotRemWithBB createFastBB(BasicBlock *Successor); - QuotRemPair createDivRemPhiNodes(QuotRemWithBB &LHS, QuotRemWithBB &RHS, - BasicBlock *PhiBB); - Value *insertOperandRuntimeCheck(Value *Op1, Value *Op2); - Optional<QuotRemPair> insertFastDivAndRem(); - - bool isSignedOp() { - return SlowDivOrRem->getOpcode() == Instruction::SDiv || - SlowDivOrRem->getOpcode() == Instruction::SRem; - } - - bool isDivisionOp() { - return SlowDivOrRem->getOpcode() == Instruction::SDiv || - SlowDivOrRem->getOpcode() == Instruction::UDiv; - } - - Type *getSlowType() { return SlowDivOrRem->getType(); } - -public: - FastDivInsertionTask(Instruction *I, const BypassWidthsTy &BypassWidths); - - Value *getReplacement(DivCacheTy &Cache); -}; - -} // end anonymous namespace - -FastDivInsertionTask::FastDivInsertionTask(Instruction *I, - const BypassWidthsTy &BypassWidths) { - switch (I->getOpcode()) { - case Instruction::UDiv: - case Instruction::SDiv: - case Instruction::URem: - case Instruction::SRem: - SlowDivOrRem = I; - break; - default: - // I is not a div/rem operation. - return; - } - - // Skip division on vector types. Only optimize integer instructions. - IntegerType *SlowType = dyn_cast<IntegerType>(SlowDivOrRem->getType()); - if (!SlowType) - return; - - // Skip if this bitwidth is not bypassed. - auto BI = BypassWidths.find(SlowType->getBitWidth()); - if (BI == BypassWidths.end()) - return; - - // Get type for div/rem instruction with bypass bitwidth. - IntegerType *BT = IntegerType::get(I->getContext(), BI->second); - BypassType = BT; - - // The original basic block. - MainBB = I->getParent(); - - // The instruction is indeed a slow div or rem operation. - IsValidTask = true; -} - -/// Reuses previously-computed dividend or remainder from the current BB if -/// operands and operation are identical. Otherwise calls insertFastDivAndRem to -/// perform the optimization and caches the resulting dividend and remainder. -/// If no replacement can be generated, nullptr is returned. -Value *FastDivInsertionTask::getReplacement(DivCacheTy &Cache) { - // First, make sure that the task is valid. - if (!IsValidTask) - return nullptr; - - // Then, look for a value in Cache. - Value *Dividend = SlowDivOrRem->getOperand(0); - Value *Divisor = SlowDivOrRem->getOperand(1); - DivRemMapKey Key(isSignedOp(), Dividend, Divisor); - auto CacheI = Cache.find(Key); - - if (CacheI == Cache.end()) { - // If previous instance does not exist, try to insert fast div. - Optional<QuotRemPair> OptResult = insertFastDivAndRem(); - // Bail out if insertFastDivAndRem has failed. - if (!OptResult) - return nullptr; - CacheI = Cache.insert({Key, *OptResult}).first; - } - - QuotRemPair &Value = CacheI->second; - return isDivisionOp() ? Value.Quotient : Value.Remainder; -} - -/// Check if a value looks like a hash. -/// -/// The routine is expected to detect values computed using the most common hash -/// algorithms. Typically, hash computations end with one of the following -/// instructions: -/// -/// 1) MUL with a constant wider than BypassType -/// 2) XOR instruction -/// -/// And even if we are wrong and the value is not a hash, it is still quite -/// unlikely that such values will fit into BypassType. -/// -/// To detect string hash algorithms like FNV we have to look through PHI-nodes. -/// It is implemented as a depth-first search for values that look neither long -/// nor hash-like. -bool FastDivInsertionTask::isHashLikeValue(Value *V, VisitedSetTy &Visited) { - Instruction *I = dyn_cast<Instruction>(V); - if (!I) - return false; - - switch (I->getOpcode()) { - case Instruction::Xor: - return true; - case Instruction::Mul: { - // After Constant Hoisting pass, long constants may be represented as - // bitcast instructions. As a result, some constants may look like an - // instruction at first, and an additional check is necessary to find out if - // an operand is actually a constant. - Value *Op1 = I->getOperand(1); - ConstantInt *C = dyn_cast<ConstantInt>(Op1); - if (!C && isa<BitCastInst>(Op1)) - C = dyn_cast<ConstantInt>(cast<BitCastInst>(Op1)->getOperand(0)); - return C && C->getValue().getMinSignedBits() > BypassType->getBitWidth(); - } - case Instruction::PHI: - // Stop IR traversal in case of a crazy input code. This limits recursion - // depth. - if (Visited.size() >= 16) - return false; - // Do not visit nodes that have been visited already. We return true because - // it means that we couldn't find any value that doesn't look hash-like. - if (!Visited.insert(I).second) - return true; - return llvm::all_of(cast<PHINode>(I)->incoming_values(), [&](Value *V) { - // Ignore undef values as they probably don't affect the division - // operands. - return getValueRange(V, Visited) == VALRNG_LIKELY_LONG || - isa<UndefValue>(V); - }); - default: - return false; - } -} - -/// Check if an integer value fits into our bypass type. -ValueRange FastDivInsertionTask::getValueRange(Value *V, - VisitedSetTy &Visited) { - unsigned ShortLen = BypassType->getBitWidth(); - unsigned LongLen = V->getType()->getIntegerBitWidth(); - - assert(LongLen > ShortLen && "Value type must be wider than BypassType"); - unsigned HiBits = LongLen - ShortLen; - - const DataLayout &DL = SlowDivOrRem->getModule()->getDataLayout(); - KnownBits Known(LongLen); - - computeKnownBits(V, Known, DL); - - if (Known.countMinLeadingZeros() >= HiBits) - return VALRNG_KNOWN_SHORT; - - if (Known.countMaxLeadingZeros() < HiBits) - return VALRNG_LIKELY_LONG; - - // Long integer divisions are often used in hashtable implementations. It's - // not worth bypassing such divisions because hash values are extremely - // unlikely to have enough leading zeros. The call below tries to detect - // values that are unlikely to fit BypassType (including hashes). - if (isHashLikeValue(V, Visited)) - return VALRNG_LIKELY_LONG; - - return VALRNG_UNKNOWN; -} - -/// Add new basic block for slow div and rem operations and put it before -/// SuccessorBB. -QuotRemWithBB FastDivInsertionTask::createSlowBB(BasicBlock *SuccessorBB) { - QuotRemWithBB DivRemPair; - DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "", - MainBB->getParent(), SuccessorBB); - IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin()); - Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc()); - - Value *Dividend = SlowDivOrRem->getOperand(0); - Value *Divisor = SlowDivOrRem->getOperand(1); - - if (isSignedOp()) { - DivRemPair.Quotient = Builder.CreateSDiv(Dividend, Divisor); - DivRemPair.Remainder = Builder.CreateSRem(Dividend, Divisor); - } else { - DivRemPair.Quotient = Builder.CreateUDiv(Dividend, Divisor); - DivRemPair.Remainder = Builder.CreateURem(Dividend, Divisor); - } - - Builder.CreateBr(SuccessorBB); - return DivRemPair; -} - -/// Add new basic block for fast div and rem operations and put it before -/// SuccessorBB. -QuotRemWithBB FastDivInsertionTask::createFastBB(BasicBlock *SuccessorBB) { - QuotRemWithBB DivRemPair; - DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "", - MainBB->getParent(), SuccessorBB); - IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin()); - Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc()); - - Value *Dividend = SlowDivOrRem->getOperand(0); - Value *Divisor = SlowDivOrRem->getOperand(1); - Value *ShortDivisorV = - Builder.CreateCast(Instruction::Trunc, Divisor, BypassType); - Value *ShortDividendV = - Builder.CreateCast(Instruction::Trunc, Dividend, BypassType); - - // udiv/urem because this optimization only handles positive numbers. - Value *ShortQV = Builder.CreateUDiv(ShortDividendV, ShortDivisorV); - Value *ShortRV = Builder.CreateURem(ShortDividendV, ShortDivisorV); - DivRemPair.Quotient = - Builder.CreateCast(Instruction::ZExt, ShortQV, getSlowType()); - DivRemPair.Remainder = - Builder.CreateCast(Instruction::ZExt, ShortRV, getSlowType()); - Builder.CreateBr(SuccessorBB); - - return DivRemPair; -} - -/// Creates Phi nodes for result of Div and Rem. -QuotRemPair FastDivInsertionTask::createDivRemPhiNodes(QuotRemWithBB &LHS, - QuotRemWithBB &RHS, - BasicBlock *PhiBB) { - IRBuilder<> Builder(PhiBB, PhiBB->begin()); - Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc()); - PHINode *QuoPhi = Builder.CreatePHI(getSlowType(), 2); - QuoPhi->addIncoming(LHS.Quotient, LHS.BB); - QuoPhi->addIncoming(RHS.Quotient, RHS.BB); - PHINode *RemPhi = Builder.CreatePHI(getSlowType(), 2); - RemPhi->addIncoming(LHS.Remainder, LHS.BB); - RemPhi->addIncoming(RHS.Remainder, RHS.BB); - return QuotRemPair(QuoPhi, RemPhi); -} - -/// Creates a runtime check to test whether both the divisor and dividend fit -/// into BypassType. The check is inserted at the end of MainBB. True return -/// value means that the operands fit. Either of the operands may be NULL if it -/// doesn't need a runtime check. -Value *FastDivInsertionTask::insertOperandRuntimeCheck(Value *Op1, Value *Op2) { - assert((Op1 || Op2) && "Nothing to check"); - IRBuilder<> Builder(MainBB, MainBB->end()); - Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc()); - - Value *OrV; - if (Op1 && Op2) - OrV = Builder.CreateOr(Op1, Op2); - else - OrV = Op1 ? Op1 : Op2; - - // BitMask is inverted to check if the operands are - // larger than the bypass type - uint64_t BitMask = ~BypassType->getBitMask(); - Value *AndV = Builder.CreateAnd(OrV, BitMask); - - // Compare operand values - Value *ZeroV = ConstantInt::getSigned(getSlowType(), 0); - return Builder.CreateICmpEQ(AndV, ZeroV); -} - -/// Substitutes the div/rem instruction with code that checks the value of the -/// operands and uses a shorter-faster div/rem instruction when possible. -Optional<QuotRemPair> FastDivInsertionTask::insertFastDivAndRem() { - Value *Dividend = SlowDivOrRem->getOperand(0); - Value *Divisor = SlowDivOrRem->getOperand(1); - - VisitedSetTy SetL; - ValueRange DividendRange = getValueRange(Dividend, SetL); - if (DividendRange == VALRNG_LIKELY_LONG) - return None; - - VisitedSetTy SetR; - ValueRange DivisorRange = getValueRange(Divisor, SetR); - if (DivisorRange == VALRNG_LIKELY_LONG) - return None; - - bool DividendShort = (DividendRange == VALRNG_KNOWN_SHORT); - bool DivisorShort = (DivisorRange == VALRNG_KNOWN_SHORT); - - if (DividendShort && DivisorShort) { - // If both operands are known to be short then just replace the long - // division with a short one in-place. Since we're not introducing control - // flow in this case, narrowing the division is always a win, even if the - // divisor is a constant (and will later get replaced by a multiplication). - - IRBuilder<> Builder(SlowDivOrRem); - Value *TruncDividend = Builder.CreateTrunc(Dividend, BypassType); - Value *TruncDivisor = Builder.CreateTrunc(Divisor, BypassType); - Value *TruncDiv = Builder.CreateUDiv(TruncDividend, TruncDivisor); - Value *TruncRem = Builder.CreateURem(TruncDividend, TruncDivisor); - Value *ExtDiv = Builder.CreateZExt(TruncDiv, getSlowType()); - Value *ExtRem = Builder.CreateZExt(TruncRem, getSlowType()); - return QuotRemPair(ExtDiv, ExtRem); - } - - if (isa<ConstantInt>(Divisor)) { - // If the divisor is not a constant, DAGCombiner will convert it to a - // multiplication by a magic constant. It isn't clear if it is worth - // introducing control flow to get a narrower multiply. - return None; - } - - // After Constant Hoisting pass, long constants may be represented as - // bitcast instructions. As a result, some constants may look like an - // instruction at first, and an additional check is necessary to find out if - // an operand is actually a constant. - if (auto *BCI = dyn_cast<BitCastInst>(Divisor)) - if (BCI->getParent() == SlowDivOrRem->getParent() && - isa<ConstantInt>(BCI->getOperand(0))) - return None; - - IRBuilder<> Builder(MainBB, MainBB->end()); - Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc()); - - if (DividendShort && !isSignedOp()) { - // If the division is unsigned and Dividend is known to be short, then - // either - // 1) Divisor is less or equal to Dividend, and the result can be computed - // with a short division. - // 2) Divisor is greater than Dividend. In this case, no division is needed - // at all: The quotient is 0 and the remainder is equal to Dividend. - // - // So instead of checking at runtime whether Divisor fits into BypassType, - // we emit a runtime check to differentiate between these two cases. This - // lets us entirely avoid a long div. - - // Split the basic block before the div/rem. - BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem); - // Remove the unconditional branch from MainBB to SuccessorBB. - MainBB->getInstList().back().eraseFromParent(); - QuotRemWithBB Long; - Long.BB = MainBB; - Long.Quotient = ConstantInt::get(getSlowType(), 0); - Long.Remainder = Dividend; - QuotRemWithBB Fast = createFastBB(SuccessorBB); - QuotRemPair Result = createDivRemPhiNodes(Fast, Long, SuccessorBB); - Value *CmpV = Builder.CreateICmpUGE(Dividend, Divisor); - Builder.CreateCondBr(CmpV, Fast.BB, SuccessorBB); - return Result; - } else { - // General case. Create both slow and fast div/rem pairs and choose one of - // them at runtime. - - // Split the basic block before the div/rem. - BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem); - // Remove the unconditional branch from MainBB to SuccessorBB. - MainBB->getInstList().back().eraseFromParent(); - QuotRemWithBB Fast = createFastBB(SuccessorBB); - QuotRemWithBB Slow = createSlowBB(SuccessorBB); - QuotRemPair Result = createDivRemPhiNodes(Fast, Slow, SuccessorBB); - Value *CmpV = insertOperandRuntimeCheck(DividendShort ? nullptr : Dividend, - DivisorShort ? nullptr : Divisor); - Builder.CreateCondBr(CmpV, Fast.BB, Slow.BB); - return Result; - } -} - -/// This optimization identifies DIV/REM instructions in a BB that can be -/// profitably bypassed and carried out with a shorter, faster divide. -bool llvm::bypassSlowDivision(BasicBlock *BB, - const BypassWidthsTy &BypassWidths) { - DivCacheTy PerBBDivCache; - - bool MadeChange = false; - Instruction *Next = &*BB->begin(); - while (Next != nullptr) { - // We may add instructions immediately after I, but we want to skip over - // them. - Instruction *I = Next; - Next = Next->getNextNode(); - - // Ignore dead code to save time and avoid bugs. - if (I->hasNUses(0)) - continue; - - FastDivInsertionTask Task(I, BypassWidths); - if (Value *Replacement = Task.getReplacement(PerBBDivCache)) { - I->replaceAllUsesWith(Replacement); - I->eraseFromParent(); - MadeChange = true; - } - } - - // Above we eagerly create divs and rems, as pairs, so that we can efficiently - // create divrem machine instructions. Now erase any unused divs / rems so we - // don't leave extra instructions sitting around. - for (auto &KV : PerBBDivCache) - for (Value *V : {KV.second.Quotient, KV.second.Remainder}) - RecursivelyDeleteTriviallyDeadInstructions(V); - - return MadeChange; -} +//===- BypassSlowDivision.cpp - Bypass slow division ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains an optimization for div and rem on architectures that +// execute short instructions significantly faster than longer instructions. +// For example, on Intel Atom 32-bit divides are slow enough that during +// runtime it is profitable to check the value of the operands, and if they are +// positive and less than 256 use an unsigned 8-bit divide. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/BypassSlowDivision.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/KnownBits.h" +#include <cassert> +#include <cstdint> + +using namespace llvm; + +#define DEBUG_TYPE "bypass-slow-division" + +namespace { + + struct QuotRemPair { + Value *Quotient; + Value *Remainder; + + QuotRemPair(Value *InQuotient, Value *InRemainder) + : Quotient(InQuotient), Remainder(InRemainder) {} + }; + + /// A quotient and remainder, plus a BB from which they logically "originate". + /// If you use Quotient or Remainder in a Phi node, you should use BB as its + /// corresponding predecessor. + struct QuotRemWithBB { + BasicBlock *BB = nullptr; + Value *Quotient = nullptr; + Value *Remainder = nullptr; + }; + +using DivCacheTy = DenseMap<DivRemMapKey, QuotRemPair>; +using BypassWidthsTy = DenseMap<unsigned, unsigned>; +using VisitedSetTy = SmallPtrSet<Instruction *, 4>; + +enum ValueRange { + /// Operand definitely fits into BypassType. No runtime checks are needed. + VALRNG_KNOWN_SHORT, + /// A runtime check is required, as value range is unknown. + VALRNG_UNKNOWN, + /// Operand is unlikely to fit into BypassType. The bypassing should be + /// disabled. + VALRNG_LIKELY_LONG +}; + +class FastDivInsertionTask { + bool IsValidTask = false; + Instruction *SlowDivOrRem = nullptr; + IntegerType *BypassType = nullptr; + BasicBlock *MainBB = nullptr; + + bool isHashLikeValue(Value *V, VisitedSetTy &Visited); + ValueRange getValueRange(Value *Op, VisitedSetTy &Visited); + QuotRemWithBB createSlowBB(BasicBlock *Successor); + QuotRemWithBB createFastBB(BasicBlock *Successor); + QuotRemPair createDivRemPhiNodes(QuotRemWithBB &LHS, QuotRemWithBB &RHS, + BasicBlock *PhiBB); + Value *insertOperandRuntimeCheck(Value *Op1, Value *Op2); + Optional<QuotRemPair> insertFastDivAndRem(); + + bool isSignedOp() { + return SlowDivOrRem->getOpcode() == Instruction::SDiv || + SlowDivOrRem->getOpcode() == Instruction::SRem; + } + + bool isDivisionOp() { + return SlowDivOrRem->getOpcode() == Instruction::SDiv || + SlowDivOrRem->getOpcode() == Instruction::UDiv; + } + + Type *getSlowType() { return SlowDivOrRem->getType(); } + +public: + FastDivInsertionTask(Instruction *I, const BypassWidthsTy &BypassWidths); + + Value *getReplacement(DivCacheTy &Cache); +}; + +} // end anonymous namespace + +FastDivInsertionTask::FastDivInsertionTask(Instruction *I, + const BypassWidthsTy &BypassWidths) { + switch (I->getOpcode()) { + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::URem: + case Instruction::SRem: + SlowDivOrRem = I; + break; + default: + // I is not a div/rem operation. + return; + } + + // Skip division on vector types. Only optimize integer instructions. + IntegerType *SlowType = dyn_cast<IntegerType>(SlowDivOrRem->getType()); + if (!SlowType) + return; + + // Skip if this bitwidth is not bypassed. + auto BI = BypassWidths.find(SlowType->getBitWidth()); + if (BI == BypassWidths.end()) + return; + + // Get type for div/rem instruction with bypass bitwidth. + IntegerType *BT = IntegerType::get(I->getContext(), BI->second); + BypassType = BT; + + // The original basic block. + MainBB = I->getParent(); + + // The instruction is indeed a slow div or rem operation. + IsValidTask = true; +} + +/// Reuses previously-computed dividend or remainder from the current BB if +/// operands and operation are identical. Otherwise calls insertFastDivAndRem to +/// perform the optimization and caches the resulting dividend and remainder. +/// If no replacement can be generated, nullptr is returned. +Value *FastDivInsertionTask::getReplacement(DivCacheTy &Cache) { + // First, make sure that the task is valid. + if (!IsValidTask) + return nullptr; + + // Then, look for a value in Cache. + Value *Dividend = SlowDivOrRem->getOperand(0); + Value *Divisor = SlowDivOrRem->getOperand(1); + DivRemMapKey Key(isSignedOp(), Dividend, Divisor); + auto CacheI = Cache.find(Key); + + if (CacheI == Cache.end()) { + // If previous instance does not exist, try to insert fast div. + Optional<QuotRemPair> OptResult = insertFastDivAndRem(); + // Bail out if insertFastDivAndRem has failed. + if (!OptResult) + return nullptr; + CacheI = Cache.insert({Key, *OptResult}).first; + } + + QuotRemPair &Value = CacheI->second; + return isDivisionOp() ? Value.Quotient : Value.Remainder; +} + +/// Check if a value looks like a hash. +/// +/// The routine is expected to detect values computed using the most common hash +/// algorithms. Typically, hash computations end with one of the following +/// instructions: +/// +/// 1) MUL with a constant wider than BypassType +/// 2) XOR instruction +/// +/// And even if we are wrong and the value is not a hash, it is still quite +/// unlikely that such values will fit into BypassType. +/// +/// To detect string hash algorithms like FNV we have to look through PHI-nodes. +/// It is implemented as a depth-first search for values that look neither long +/// nor hash-like. +bool FastDivInsertionTask::isHashLikeValue(Value *V, VisitedSetTy &Visited) { + Instruction *I = dyn_cast<Instruction>(V); + if (!I) + return false; + + switch (I->getOpcode()) { + case Instruction::Xor: + return true; + case Instruction::Mul: { + // After Constant Hoisting pass, long constants may be represented as + // bitcast instructions. As a result, some constants may look like an + // instruction at first, and an additional check is necessary to find out if + // an operand is actually a constant. + Value *Op1 = I->getOperand(1); + ConstantInt *C = dyn_cast<ConstantInt>(Op1); + if (!C && isa<BitCastInst>(Op1)) + C = dyn_cast<ConstantInt>(cast<BitCastInst>(Op1)->getOperand(0)); + return C && C->getValue().getMinSignedBits() > BypassType->getBitWidth(); + } + case Instruction::PHI: + // Stop IR traversal in case of a crazy input code. This limits recursion + // depth. + if (Visited.size() >= 16) + return false; + // Do not visit nodes that have been visited already. We return true because + // it means that we couldn't find any value that doesn't look hash-like. + if (!Visited.insert(I).second) + return true; + return llvm::all_of(cast<PHINode>(I)->incoming_values(), [&](Value *V) { + // Ignore undef values as they probably don't affect the division + // operands. + return getValueRange(V, Visited) == VALRNG_LIKELY_LONG || + isa<UndefValue>(V); + }); + default: + return false; + } +} + +/// Check if an integer value fits into our bypass type. +ValueRange FastDivInsertionTask::getValueRange(Value *V, + VisitedSetTy &Visited) { + unsigned ShortLen = BypassType->getBitWidth(); + unsigned LongLen = V->getType()->getIntegerBitWidth(); + + assert(LongLen > ShortLen && "Value type must be wider than BypassType"); + unsigned HiBits = LongLen - ShortLen; + + const DataLayout &DL = SlowDivOrRem->getModule()->getDataLayout(); + KnownBits Known(LongLen); + + computeKnownBits(V, Known, DL); + + if (Known.countMinLeadingZeros() >= HiBits) + return VALRNG_KNOWN_SHORT; + + if (Known.countMaxLeadingZeros() < HiBits) + return VALRNG_LIKELY_LONG; + + // Long integer divisions are often used in hashtable implementations. It's + // not worth bypassing such divisions because hash values are extremely + // unlikely to have enough leading zeros. The call below tries to detect + // values that are unlikely to fit BypassType (including hashes). + if (isHashLikeValue(V, Visited)) + return VALRNG_LIKELY_LONG; + + return VALRNG_UNKNOWN; +} + +/// Add new basic block for slow div and rem operations and put it before +/// SuccessorBB. +QuotRemWithBB FastDivInsertionTask::createSlowBB(BasicBlock *SuccessorBB) { + QuotRemWithBB DivRemPair; + DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "", + MainBB->getParent(), SuccessorBB); + IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin()); + Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc()); + + Value *Dividend = SlowDivOrRem->getOperand(0); + Value *Divisor = SlowDivOrRem->getOperand(1); + + if (isSignedOp()) { + DivRemPair.Quotient = Builder.CreateSDiv(Dividend, Divisor); + DivRemPair.Remainder = Builder.CreateSRem(Dividend, Divisor); + } else { + DivRemPair.Quotient = Builder.CreateUDiv(Dividend, Divisor); + DivRemPair.Remainder = Builder.CreateURem(Dividend, Divisor); + } + + Builder.CreateBr(SuccessorBB); + return DivRemPair; +} + +/// Add new basic block for fast div and rem operations and put it before +/// SuccessorBB. +QuotRemWithBB FastDivInsertionTask::createFastBB(BasicBlock *SuccessorBB) { + QuotRemWithBB DivRemPair; + DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "", + MainBB->getParent(), SuccessorBB); + IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin()); + Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc()); + + Value *Dividend = SlowDivOrRem->getOperand(0); + Value *Divisor = SlowDivOrRem->getOperand(1); + Value *ShortDivisorV = + Builder.CreateCast(Instruction::Trunc, Divisor, BypassType); + Value *ShortDividendV = + Builder.CreateCast(Instruction::Trunc, Dividend, BypassType); + + // udiv/urem because this optimization only handles positive numbers. + Value *ShortQV = Builder.CreateUDiv(ShortDividendV, ShortDivisorV); + Value *ShortRV = Builder.CreateURem(ShortDividendV, ShortDivisorV); + DivRemPair.Quotient = + Builder.CreateCast(Instruction::ZExt, ShortQV, getSlowType()); + DivRemPair.Remainder = + Builder.CreateCast(Instruction::ZExt, ShortRV, getSlowType()); + Builder.CreateBr(SuccessorBB); + + return DivRemPair; +} + +/// Creates Phi nodes for result of Div and Rem. +QuotRemPair FastDivInsertionTask::createDivRemPhiNodes(QuotRemWithBB &LHS, + QuotRemWithBB &RHS, + BasicBlock *PhiBB) { + IRBuilder<> Builder(PhiBB, PhiBB->begin()); + Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc()); + PHINode *QuoPhi = Builder.CreatePHI(getSlowType(), 2); + QuoPhi->addIncoming(LHS.Quotient, LHS.BB); + QuoPhi->addIncoming(RHS.Quotient, RHS.BB); + PHINode *RemPhi = Builder.CreatePHI(getSlowType(), 2); + RemPhi->addIncoming(LHS.Remainder, LHS.BB); + RemPhi->addIncoming(RHS.Remainder, RHS.BB); + return QuotRemPair(QuoPhi, RemPhi); +} + +/// Creates a runtime check to test whether both the divisor and dividend fit +/// into BypassType. The check is inserted at the end of MainBB. True return +/// value means that the operands fit. Either of the operands may be NULL if it +/// doesn't need a runtime check. +Value *FastDivInsertionTask::insertOperandRuntimeCheck(Value *Op1, Value *Op2) { + assert((Op1 || Op2) && "Nothing to check"); + IRBuilder<> Builder(MainBB, MainBB->end()); + Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc()); + + Value *OrV; + if (Op1 && Op2) + OrV = Builder.CreateOr(Op1, Op2); + else + OrV = Op1 ? Op1 : Op2; + + // BitMask is inverted to check if the operands are + // larger than the bypass type + uint64_t BitMask = ~BypassType->getBitMask(); + Value *AndV = Builder.CreateAnd(OrV, BitMask); + + // Compare operand values + Value *ZeroV = ConstantInt::getSigned(getSlowType(), 0); + return Builder.CreateICmpEQ(AndV, ZeroV); +} + +/// Substitutes the div/rem instruction with code that checks the value of the +/// operands and uses a shorter-faster div/rem instruction when possible. +Optional<QuotRemPair> FastDivInsertionTask::insertFastDivAndRem() { + Value *Dividend = SlowDivOrRem->getOperand(0); + Value *Divisor = SlowDivOrRem->getOperand(1); + + VisitedSetTy SetL; + ValueRange DividendRange = getValueRange(Dividend, SetL); + if (DividendRange == VALRNG_LIKELY_LONG) + return None; + + VisitedSetTy SetR; + ValueRange DivisorRange = getValueRange(Divisor, SetR); + if (DivisorRange == VALRNG_LIKELY_LONG) + return None; + + bool DividendShort = (DividendRange == VALRNG_KNOWN_SHORT); + bool DivisorShort = (DivisorRange == VALRNG_KNOWN_SHORT); + + if (DividendShort && DivisorShort) { + // If both operands are known to be short then just replace the long + // division with a short one in-place. Since we're not introducing control + // flow in this case, narrowing the division is always a win, even if the + // divisor is a constant (and will later get replaced by a multiplication). + + IRBuilder<> Builder(SlowDivOrRem); + Value *TruncDividend = Builder.CreateTrunc(Dividend, BypassType); + Value *TruncDivisor = Builder.CreateTrunc(Divisor, BypassType); + Value *TruncDiv = Builder.CreateUDiv(TruncDividend, TruncDivisor); + Value *TruncRem = Builder.CreateURem(TruncDividend, TruncDivisor); + Value *ExtDiv = Builder.CreateZExt(TruncDiv, getSlowType()); + Value *ExtRem = Builder.CreateZExt(TruncRem, getSlowType()); + return QuotRemPair(ExtDiv, ExtRem); + } + + if (isa<ConstantInt>(Divisor)) { + // If the divisor is not a constant, DAGCombiner will convert it to a + // multiplication by a magic constant. It isn't clear if it is worth + // introducing control flow to get a narrower multiply. + return None; + } + + // After Constant Hoisting pass, long constants may be represented as + // bitcast instructions. As a result, some constants may look like an + // instruction at first, and an additional check is necessary to find out if + // an operand is actually a constant. + if (auto *BCI = dyn_cast<BitCastInst>(Divisor)) + if (BCI->getParent() == SlowDivOrRem->getParent() && + isa<ConstantInt>(BCI->getOperand(0))) + return None; + + IRBuilder<> Builder(MainBB, MainBB->end()); + Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc()); + + if (DividendShort && !isSignedOp()) { + // If the division is unsigned and Dividend is known to be short, then + // either + // 1) Divisor is less or equal to Dividend, and the result can be computed + // with a short division. + // 2) Divisor is greater than Dividend. In this case, no division is needed + // at all: The quotient is 0 and the remainder is equal to Dividend. + // + // So instead of checking at runtime whether Divisor fits into BypassType, + // we emit a runtime check to differentiate between these two cases. This + // lets us entirely avoid a long div. + + // Split the basic block before the div/rem. + BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem); + // Remove the unconditional branch from MainBB to SuccessorBB. + MainBB->getInstList().back().eraseFromParent(); + QuotRemWithBB Long; + Long.BB = MainBB; + Long.Quotient = ConstantInt::get(getSlowType(), 0); + Long.Remainder = Dividend; + QuotRemWithBB Fast = createFastBB(SuccessorBB); + QuotRemPair Result = createDivRemPhiNodes(Fast, Long, SuccessorBB); + Value *CmpV = Builder.CreateICmpUGE(Dividend, Divisor); + Builder.CreateCondBr(CmpV, Fast.BB, SuccessorBB); + return Result; + } else { + // General case. Create both slow and fast div/rem pairs and choose one of + // them at runtime. + + // Split the basic block before the div/rem. + BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem); + // Remove the unconditional branch from MainBB to SuccessorBB. + MainBB->getInstList().back().eraseFromParent(); + QuotRemWithBB Fast = createFastBB(SuccessorBB); + QuotRemWithBB Slow = createSlowBB(SuccessorBB); + QuotRemPair Result = createDivRemPhiNodes(Fast, Slow, SuccessorBB); + Value *CmpV = insertOperandRuntimeCheck(DividendShort ? nullptr : Dividend, + DivisorShort ? nullptr : Divisor); + Builder.CreateCondBr(CmpV, Fast.BB, Slow.BB); + return Result; + } +} + +/// This optimization identifies DIV/REM instructions in a BB that can be +/// profitably bypassed and carried out with a shorter, faster divide. +bool llvm::bypassSlowDivision(BasicBlock *BB, + const BypassWidthsTy &BypassWidths) { + DivCacheTy PerBBDivCache; + + bool MadeChange = false; + Instruction *Next = &*BB->begin(); + while (Next != nullptr) { + // We may add instructions immediately after I, but we want to skip over + // them. + Instruction *I = Next; + Next = Next->getNextNode(); + + // Ignore dead code to save time and avoid bugs. + if (I->hasNUses(0)) + continue; + + FastDivInsertionTask Task(I, BypassWidths); + if (Value *Replacement = Task.getReplacement(PerBBDivCache)) { + I->replaceAllUsesWith(Replacement); + I->eraseFromParent(); + MadeChange = true; + } + } + + // Above we eagerly create divs and rems, as pairs, so that we can efficiently + // create divrem machine instructions. Now erase any unused divs / rems so we + // don't leave extra instructions sitting around. + for (auto &KV : PerBBDivCache) + for (Value *V : {KV.second.Quotient, KV.second.Remainder}) + RecursivelyDeleteTriviallyDeadInstructions(V); + + return MadeChange; +} |